diff mbox

[21/21] block: add spice block device backend

Message ID 1384777531-14635-22-git-send-email-marcandre.lureau@gmail.com
State New
Headers show

Commit Message

Marc-André Lureau Nov. 18, 2013, 12:25 p.m. UTC
From: Marc-André Lureau <marcandre.lureau@redhat.com>

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
---
 block/Makefile.objs |   1 +
 block/spicebd.c     | 536 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 537 insertions(+)
 create mode 100644 block/spicebd.c

Comments

Marc-André Lureau Nov. 20, 2013, 11 a.m. UTC | #1
/note to self:
- add failing case where nbd channel isn't connected before and after
migration (do not wait for nbd init or disconnection)

On Mon, Nov 18, 2013 at 1:25 PM, Marc-André Lureau
<marcandre.lureau@gmail.com> wrote:
> From: Marc-André Lureau <marcandre.lureau@redhat.com>
>
> Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
> ---
>  block/Makefile.objs |   1 +
>  block/spicebd.c     | 536 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 537 insertions(+)
>  create mode 100644 block/spicebd.c
>
> diff --git a/block/Makefile.objs b/block/Makefile.objs
> index 4e8c91e..f49b7c3 100644
> --- a/block/Makefile.objs
> +++ b/block/Makefile.objs
> @@ -16,6 +16,7 @@ block-obj-$(CONFIG_CURL) += curl.o
>  block-obj-$(CONFIG_RBD) += rbd.o
>  block-obj-$(CONFIG_GLUSTERFS) += gluster.o
>  block-obj-$(CONFIG_LIBSSH2) += ssh.o
> +common-obj-$(CONFIG_SPICE) += spicebd.o
>  endif
>
>  common-obj-y += stream.o
> diff --git a/block/spicebd.c b/block/spicebd.c
> new file mode 100644
> index 0000000..6b23b61
> --- /dev/null
> +++ b/block/spicebd.c
> @@ -0,0 +1,536 @@
> +/*
> + * Spice block backend for QEMU.
> + *
> + * Copyright (C) 2013 Red Hat, Inc.
> + * Author: Marc-André Lureau <marcandre.lureau@redhat.com>
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <stdarg.h>
> +#include <spice/protocol.h>
> +
> +#include "nbd-client.h"
> +#include "ui/qemu-spice.h"
> +#include "block/block_int.h"
> +#include "qemu/sockets.h"
> +#include "qemu/uri.h"
> +#include "qapi/qmp/qint.h"
> +#include "sysemu/sysemu.h"
> +#include "sysemu/char.h"
> +#include "qmp-commands.h"
> +#include "sysemu/blockdev.h"
> +#include "migration/migration.h"
> +
> +#ifndef DEBUG_SPICE
> +#define DEBUG_SPICE   0
> +#endif
> +
> +#define SOCKET_CHR 0
> +#define SOCKET_NBD 1
> +
> +#define DPRINTF(fmt, ...)                               \
> +    do {                                                \
> +        if (DEBUG_SPICE) {                              \
> +            fprintf(stderr, "spicebd: %-15s " fmt "\n", \
> +                    __func__, ##__VA_ARGS__);           \
> +        }                                               \
> +    } while (0)
> +
> +typedef struct Buffer {
> +    uint8_t data[4096];
> +    uint8_t *p;
> +    char left;
> +} Buffer;
> +
> +typedef struct BDRVSpiceState {
> +    BlockDriverState *bs;
> +    QEMUBH *bh;
> +    NbdClientSession client;
> +
> +    /* our spicechr-fd pipe */
> +    int sv[2];
> +    Buffer readb;
> +    Buffer writeb;
> +
> +    int aio_count;
> +    CharDriverState *chr;
> +    guint chr_watch;
> +
> +    Coroutine *coroutine;
> +    bool need_read;
> +    bool need_write;
> +    bool opened;
> +    bool inmigrate;
> +} BDRVSpiceState;
> +
> +static void nbd_read_handler(void *opaque);
> +static void update_chr_handlers(BDRVSpiceState *s);
> +
> +static int parse_uri(const char *filename, QDict *options, Error **errp)
> +{
> +    URI *uri = NULL;
> +
> +    uri = uri_parse(filename);
> +    if (!uri) {
> +        return -EINVAL;
> +    }
> +
> +    if (strcmp(uri->scheme, "spicebd") != 0) {
> +        error_setg(errp, "URI scheme must be 'spicebd'");
> +        goto err;
> +    }
> +
> +    uri_free(uri);
> +    return 0;
> +
> + err:
> +    if (uri) {
> +        uri_free(uri);
> +    }
> +    return -EINVAL;
> +}
> +
> +static void spice_parse_filename(const char *filename, QDict *options,
> +                                 Error **errp)
> +{
> +    parse_uri(filename, options, errp);
> +}
> +
> +static void co_restart(void *opaque)
> +{
> +    BDRVSpiceState *s = opaque;
> +
> +    qemu_coroutine_enter(s->coroutine, NULL);
> +}
> +
> +static void close_socketpair(BDRVSpiceState *s)
> +{
> +    if (!s->opened) {
> +        return;
> +    }
> +
> +    DPRINTF("");
> +    nbd_client_session_close(&s->client);
> +
> +    if (s->sv[SOCKET_NBD] >= 0) {
> +        qemu_aio_set_fd_handler(s->sv[SOCKET_NBD], NULL, NULL, NULL);
> +        closesocket(s->sv[SOCKET_NBD]);
> +        s->sv[SOCKET_NBD] = -1;
> +    }
> +
> +    if (s->sv[SOCKET_CHR] >= 0) {
> +        qemu_aio_set_fd_handler(s->sv[SOCKET_CHR], NULL, NULL, NULL);
> +        closesocket(s->sv[SOCKET_CHR]);
> +        s->sv[SOCKET_CHR] = -1;
> +    }
> +
> +    if (s->inmigrate) {
> +        vm_start_release();
> +        s->inmigrate = false;
> +    }
> +
> +    s->opened = FALSE;
> +    if (s->coroutine && s->coroutine != qemu_coroutine_self()) {
> +        co_restart(s);
> +    }
> +}
> +
> +static int chardev_can_read(void *opaque)
> +{
> +    BDRVSpiceState *s = opaque;
> +    int retval = 0;
> +
> +    GPollFD pfd = {
> +        .fd = s->sv[SOCKET_CHR],
> +        .events = G_IO_OUT
> +    };
> +    g_poll(&pfd, 1, 0);
> +    if (pfd.revents & G_IO_OUT) {
> +        retval = s->writeb.left == 0 ? sizeof(s->writeb.data) : 0;
> +    }
> +
> +    return retval;
> +}
> +
> +static void chardev_read(void *opaque, const uint8_t *buf, int size)
> +{
> +    BDRVSpiceState *s = opaque;
> +    int written;
> +
> +    DPRINTF("reply from client %d", size);
> +    written = write(s->sv[SOCKET_CHR], buf, size);
> +    if (written == -1) {
> +        if (errno != EAGAIN) {
> +            close_socketpair(s);
> +            return;
> +        } else {
> +            written = 0;
> +        }
> +    }
> +
> +    if (s->writeb.left == 0) {
> +        size -= written;
> +        assert(size <= sizeof(s->writeb.data));
> +        memcpy(s->writeb.data, buf, size);
> +        s->writeb.p = s->writeb.data;
> +        s->writeb.left = size;
> +    } else {
> +        s->writeb.left -= written;
> +        s->writeb.p += written;
> +    }
> +
> +    s->need_write = s->writeb.left > 0;
> +    update_chr_handlers(s);
> +}
> +
> +static void change_blockdev_cb(void *opaque)
> +{
> +    BDRVSpiceState *s = opaque;
> +    BlockDriverState *bs = s->bs;
> +    Error *err = NULL;
> +    int bdrv_flags;
> +    QDict *options = NULL;
> +    char device_name[32];
> +
> +    qemu_bh_delete(s->bh);
> +    s->bh = NULL;
> +
> +    memcpy(device_name, bs->child_device_name, sizeof(device_name));
> +    DPRINTF("set bs %p to NULL", bs);
> +    bs->opaque = NULL;
> +
> +    bdrv_flags = bdrv_is_read_only(bs) ? 0 : BDRV_O_RDWR;
> +    if (bdrv_is_snapshot(bs)) {
> +        bdrv_flags |= BDRV_O_SNAPSHOT;
> +        options = qdict_new();
> +        qdict_put(options, "snapshot.size", qint_from_int(s->client.size));
> +    }
> +
> +    qmp_eject(device_name, true, true, &err);
> +    if (error_is_set(&err)) {
> +        fprintf(stderr, "spicebd: %s\n", error_get_pretty(err));
> +        error_free(err);
> +        QDECREF(options);
> +        free(s);
> +    }
> +
> +    qmp_change_blockdev_int(device_name, "spicebd:", NULL,
> +                            options, s, &err);
> +    if (error_is_set(&err)) {
> +        fprintf(stderr, "spicebd: %s\n", error_get_pretty(err));
> +        error_free(err);
> +    }
> +
> +    if (s->inmigrate) {
> +        vm_start_release();
> +        s->inmigrate = false;
> +    }
> +}
> +
> +static void coroutine_fn co_init(void *opaque)
> +{
> +    BDRVSpiceState *s = opaque;
> +    BlockDriverState *bs = s->bs;
> +
> +    DPRINTF("temporary coroutine for session initialization %p, "
> +            "device: %s", s, bs->child_device_name);
> +
> +    qemu_set_nonblock(s->sv[SOCKET_NBD]);
> +    /* After session_init, the fd_handler is managed by nbd-client */
> +    qemu_aio_set_fd_handler(s->sv[SOCKET_NBD], co_restart, NULL, s);
> +    if (nbd_client_session_init(&s->client, bs,
> +                                s->sv[SOCKET_NBD], NULL) < 0) {
> +        close_socketpair(s);
> +    } else {
> +        assert(s->bh == NULL);
> +        /* NOTE: need to defer out of current AIO call, to avoid non-blocking
> +           drain_all when ejecting? */
> +        s->bh = qemu_bh_new(change_blockdev_cb, s);
> +        qemu_bh_schedule(s->bh);
> +    }
> +
> +    s->coroutine = NULL;
> +    DPRINTF("end of temporary coroutine");
> +}
> +
> +static void spice_init(BDRVSpiceState *s)
> +{
> +    if (s->opened) {
> +        return;
> +    }
> +
> +    s->opened = TRUE;
> +
> +    /* TODO: teach nbd to use a iostream api instead of a socket */
> +    if (socketpair(PF_UNIX, SOCK_STREAM, 0, s->sv) == -1) {
> +        fprintf(stderr, "failed to create socketpair\n");
> +        return;
> +    }
> +
> +    s->need_read = TRUE;
> +    update_chr_handlers(s);
> +
> +    if (runstate_check(RUN_STATE_INMIGRATE)) {
> +        vm_start_hold();
> +        s->inmigrate = true;
> +    }
> +
> +    /* tell server we are ready */
> +    qemu_chr_fe_event(s->chr, SPICE_PORT_EVENT_OPENED);
> +    s->coroutine = qemu_coroutine_create(co_init);
> +    qemu_coroutine_enter(s->coroutine, s);
> +}
> +
> +static void chardev_event(void *opaque, int event)
> +{
> +    BDRVSpiceState *s = opaque;
> +
> +    switch (event) {
> +    case CHR_EVENT_CLOSED:
> +        DPRINTF("chardev close");
> +        close_socketpair(s);
> +        break;
> +    case CHR_EVENT_BREAK:
> +        DPRINTF("chardev break");
> +        if (s->coroutine) {
> +            DPRINTF("already waiting for incoming session");
> +            qemu_chr_fe_event(s->chr, SPICE_PORT_EVENT_OPENED);
> +            return;
> +        }
> +        close_socketpair(s);
> +        /* fall-through */
> +    case CHR_EVENT_OPENED:
> +        DPRINTF("chardev opened");
> +        spice_init(s);
> +        break;
> +    default:
> +        DPRINTF("unhandled chardev event %d", event);
> +    }
> +}
> +
> +static gboolean write_to_chr(GIOChannel *chan, GIOCondition cond,
> +                             void *opaque)
> +{
> +    BDRVSpiceState *s = opaque;
> +    int r;
> +
> +    r = qemu_chr_fe_write(s->chr, s->readb.p, s->readb.left);
> +    DPRINTF("write_to_chr %d/%d", r, s->readb.left);
> +    if (r <= 0) {
> +        close_socketpair(s);
> +        return FALSE;
> +    }
> +
> +    s->readb.p += r;
> +    s->readb.left -= r;
> +
> +    if (s->readb.left > 0) {
> +        if (!s->chr_watch) {
> +            s->chr_watch = qemu_chr_fe_add_watch(s->chr, G_IO_OUT,
> +                                                 write_to_chr, s);
> +        }
> +        return TRUE;
> +    } else {
> +        s->need_read = TRUE;
> +        update_chr_handlers(s);
> +    }
> +
> +    return FALSE;
> +}
> +
> +static void nbd_read_handler(void *opaque)
> +{
> +    BDRVSpiceState *s = opaque;
> +
> +    DPRINTF("read from nbd");
> +
> +    if (s->readb.left > 0) {
> +        abort();
> +    }
> +
> +    do {
> +        s->readb.left = recv(s->sv[SOCKET_CHR], s->readb.data,
> +                             sizeof(s->readb.data), MSG_DONTWAIT);
> +    } while (s->readb.left == -1 && errno == EAGAIN);
> +
> +    if (s->readb.left <= 0) {
> +        close_socketpair(s);
> +        return;
> +    }
> +
> +    s->need_read = FALSE;
> +    update_chr_handlers(s);
> +
> +    s->readb.p = s->readb.data;
> +    write_to_chr(NULL, 0, s);
> +}
> +
> +static void nbd_write_handler(void *opaque)
> +{
> +    BDRVSpiceState *s = opaque;
> +
> +    DPRINTF("resuming chardev_read left: %d", s->writeb.left);
> +
> +    chardev_read(s, s->writeb.data, s->writeb.left);
> +}
> +
> +static void update_chr_handlers(BDRVSpiceState *s)
> +{
> +    qemu_aio_set_fd_handler(s->sv[SOCKET_CHR],
> +                            s->need_read ? nbd_read_handler : NULL,
> +                            s->need_write ? nbd_write_handler : NULL,
> +                            s);
> +}
> +
> +static int spice_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
> +                           Error **errp)
> +{
> +    BDRVSpiceState *s = bs->opaque;
> +    int ret = -1;
> +
> +    s->bs = bs;
> +
> +    if (s->opened) {
> +        DPRINTF("re-open spicebd");
> +        s->client.bs = bs;
> +        return 0;
> +    }
> +
> +    DPRINTF("open %p  device=%s flags=0x%x", s,
> +            bs->child_device_name, bdrv_flags);
> +    if (strlen(bs->child_device_name) == 0) {
> +        fprintf(stderr, "spicebd: missing associated child device\n");
> +        return -1;
> +    }
> +
> +    if (bdrv_flags & BDRV_O_RDWR) {
> +        fprintf(stderr, "spicebd: only read-only supported\n");
> +        return -1;
> +    }
> +
> +    s->chr = qemu_chr_open_spice_vmc("nbd", true);
> +    if (!s->chr) {
> +        goto err;
> +    }
> +
> +    qemu_chr_add_handlers(s->chr, chardev_can_read,
> +                          chardev_read, chardev_event, s);
> +    spice_init(s);
> +
> +    return 0;
> +
> + err:
> +    return ret;
> +}
> +
> +static void spice_close(BlockDriverState *bs)
> +{
> +    BDRVSpiceState *s = bs->opaque;
> +
> +    DPRINTF("spice close %p\n", s);
> +
> +    if (s == NULL) {
> +        /* changing bd */
> +        return;
> +    }
> +
> +    s->bs = NULL;
> +
> +    close_socketpair(s);
> +    assert(!s->coroutine); /* after close_socketpair */
> +
> +    if (s->chr) {
> +        s->chr->chr_close(s->chr);
> +        g_free(s->chr);
> +        s->chr = NULL;
> +    }
> +}
> +
> +static coroutine_fn int spice_co_readv(BlockDriverState *bs,
> +                                       int64_t sector_num,
> +                                       int nb_sectors, QEMUIOVector *qiov)
> +{
> +    BDRVSpiceState *s = bs->opaque;
> +
> +    return nbd_client_session_co_readv(&s->client, sector_num,
> +                                       nb_sectors, qiov);
> +}
> +
> +static coroutine_fn int spice_co_writev(BlockDriverState *bs,
> +                                        int64_t sector_num,
> +                                        int nb_sectors, QEMUIOVector *qiov)
> +{
> +    BDRVSpiceState *s = bs->opaque;
> +
> +    return nbd_client_session_co_writev(&s->client, sector_num,
> +                                        nb_sectors, qiov);
> +}
> +
> +static coroutine_fn int spice_co_flush(BlockDriverState *bs)
> +{
> +    BDRVSpiceState *s = bs->opaque;
> +
> +    if (s == NULL) {
> +        /* changing bd */
> +        return -1;
> +    }
> +
> +    return nbd_client_session_co_flush(&s->client);
> +}
> +
> +static coroutine_fn int spice_co_discard(BlockDriverState *bs,
> +    int64_t sector_num, int nb_sectors)
> +{
> +    BDRVSpiceState *s = bs->opaque;
> +
> +    return nbd_client_session_co_discard(&s->client, sector_num, nb_sectors);
> +}
> +
> +static coroutine_fn int64_t spice_getlength(BlockDriverState *bs)
> +{
> +    BDRVSpiceState *s = bs->opaque;
> +
> +    DPRINTF("length=%" PRIi64, s->client.size);
> +
> +    return s->client.size;
> +}
> +
> +static BlockDriver bdrv_spice = {
> +    .format_name                  = "spicebd",
> +    .protocol_name                = "spicebd",
> +    .instance_size                = sizeof(BDRVSpiceState),
> +    .bdrv_parse_filename          = spice_parse_filename,
> +    .bdrv_file_open               = spice_file_open,
> +    .bdrv_close                   = spice_close,
> +    .bdrv_co_readv                = spice_co_readv,
> +    .bdrv_co_writev               = spice_co_writev,
> +    .bdrv_getlength               = spice_getlength,
> +    .bdrv_co_flush_to_os          = spice_co_flush,
> +    .bdrv_co_discard              = spice_co_discard,
> +};
> +
> +static void bdrv_spice_init(void)
> +{
> +    bdrv_register(&bdrv_spice);
> +}
> +
> +block_init(bdrv_spice_init);
> --
> 1.8.3.1
>
diff mbox

Patch

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 4e8c91e..f49b7c3 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -16,6 +16,7 @@  block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
 block-obj-$(CONFIG_LIBSSH2) += ssh.o
+common-obj-$(CONFIG_SPICE) += spicebd.o
 endif
 
 common-obj-y += stream.o
diff --git a/block/spicebd.c b/block/spicebd.c
new file mode 100644
index 0000000..6b23b61
--- /dev/null
+++ b/block/spicebd.c
@@ -0,0 +1,536 @@ 
+/*
+ * Spice block backend for QEMU.
+ *
+ * Copyright (C) 2013 Red Hat, Inc.
+ * Author: Marc-André Lureau <marcandre.lureau@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <spice/protocol.h>
+
+#include "nbd-client.h"
+#include "ui/qemu-spice.h"
+#include "block/block_int.h"
+#include "qemu/sockets.h"
+#include "qemu/uri.h"
+#include "qapi/qmp/qint.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/char.h"
+#include "qmp-commands.h"
+#include "sysemu/blockdev.h"
+#include "migration/migration.h"
+
+#ifndef DEBUG_SPICE
+#define DEBUG_SPICE   0
+#endif
+
+#define SOCKET_CHR 0
+#define SOCKET_NBD 1
+
+#define DPRINTF(fmt, ...)                               \
+    do {                                                \
+        if (DEBUG_SPICE) {                              \
+            fprintf(stderr, "spicebd: %-15s " fmt "\n", \
+                    __func__, ##__VA_ARGS__);           \
+        }                                               \
+    } while (0)
+
+typedef struct Buffer {
+    uint8_t data[4096];
+    uint8_t *p;
+    char left;
+} Buffer;
+
+typedef struct BDRVSpiceState {
+    BlockDriverState *bs;
+    QEMUBH *bh;
+    NbdClientSession client;
+
+    /* our spicechr-fd pipe */
+    int sv[2];
+    Buffer readb;
+    Buffer writeb;
+
+    int aio_count;
+    CharDriverState *chr;
+    guint chr_watch;
+
+    Coroutine *coroutine;
+    bool need_read;
+    bool need_write;
+    bool opened;
+    bool inmigrate;
+} BDRVSpiceState;
+
+static void nbd_read_handler(void *opaque);
+static void update_chr_handlers(BDRVSpiceState *s);
+
+static int parse_uri(const char *filename, QDict *options, Error **errp)
+{
+    URI *uri = NULL;
+
+    uri = uri_parse(filename);
+    if (!uri) {
+        return -EINVAL;
+    }
+
+    if (strcmp(uri->scheme, "spicebd") != 0) {
+        error_setg(errp, "URI scheme must be 'spicebd'");
+        goto err;
+    }
+
+    uri_free(uri);
+    return 0;
+
+ err:
+    if (uri) {
+        uri_free(uri);
+    }
+    return -EINVAL;
+}
+
+static void spice_parse_filename(const char *filename, QDict *options,
+                                 Error **errp)
+{
+    parse_uri(filename, options, errp);
+}
+
+static void co_restart(void *opaque)
+{
+    BDRVSpiceState *s = opaque;
+
+    qemu_coroutine_enter(s->coroutine, NULL);
+}
+
+static void close_socketpair(BDRVSpiceState *s)
+{
+    if (!s->opened) {
+        return;
+    }
+
+    DPRINTF("");
+    nbd_client_session_close(&s->client);
+
+    if (s->sv[SOCKET_NBD] >= 0) {
+        qemu_aio_set_fd_handler(s->sv[SOCKET_NBD], NULL, NULL, NULL);
+        closesocket(s->sv[SOCKET_NBD]);
+        s->sv[SOCKET_NBD] = -1;
+    }
+
+    if (s->sv[SOCKET_CHR] >= 0) {
+        qemu_aio_set_fd_handler(s->sv[SOCKET_CHR], NULL, NULL, NULL);
+        closesocket(s->sv[SOCKET_CHR]);
+        s->sv[SOCKET_CHR] = -1;
+    }
+
+    if (s->inmigrate) {
+        vm_start_release();
+        s->inmigrate = false;
+    }
+
+    s->opened = FALSE;
+    if (s->coroutine && s->coroutine != qemu_coroutine_self()) {
+        co_restart(s);
+    }
+}
+
+static int chardev_can_read(void *opaque)
+{
+    BDRVSpiceState *s = opaque;
+    int retval = 0;
+
+    GPollFD pfd = {
+        .fd = s->sv[SOCKET_CHR],
+        .events = G_IO_OUT
+    };
+    g_poll(&pfd, 1, 0);
+    if (pfd.revents & G_IO_OUT) {
+        retval = s->writeb.left == 0 ? sizeof(s->writeb.data) : 0;
+    }
+
+    return retval;
+}
+
+static void chardev_read(void *opaque, const uint8_t *buf, int size)
+{
+    BDRVSpiceState *s = opaque;
+    int written;
+
+    DPRINTF("reply from client %d", size);
+    written = write(s->sv[SOCKET_CHR], buf, size);
+    if (written == -1) {
+        if (errno != EAGAIN) {
+            close_socketpair(s);
+            return;
+        } else {
+            written = 0;
+        }
+    }
+
+    if (s->writeb.left == 0) {
+        size -= written;
+        assert(size <= sizeof(s->writeb.data));
+        memcpy(s->writeb.data, buf, size);
+        s->writeb.p = s->writeb.data;
+        s->writeb.left = size;
+    } else {
+        s->writeb.left -= written;
+        s->writeb.p += written;
+    }
+
+    s->need_write = s->writeb.left > 0;
+    update_chr_handlers(s);
+}
+
+static void change_blockdev_cb(void *opaque)
+{
+    BDRVSpiceState *s = opaque;
+    BlockDriverState *bs = s->bs;
+    Error *err = NULL;
+    int bdrv_flags;
+    QDict *options = NULL;
+    char device_name[32];
+
+    qemu_bh_delete(s->bh);
+    s->bh = NULL;
+
+    memcpy(device_name, bs->child_device_name, sizeof(device_name));
+    DPRINTF("set bs %p to NULL", bs);
+    bs->opaque = NULL;
+
+    bdrv_flags = bdrv_is_read_only(bs) ? 0 : BDRV_O_RDWR;
+    if (bdrv_is_snapshot(bs)) {
+        bdrv_flags |= BDRV_O_SNAPSHOT;
+        options = qdict_new();
+        qdict_put(options, "snapshot.size", qint_from_int(s->client.size));
+    }
+
+    qmp_eject(device_name, true, true, &err);
+    if (error_is_set(&err)) {
+        fprintf(stderr, "spicebd: %s\n", error_get_pretty(err));
+        error_free(err);
+        QDECREF(options);
+        free(s);
+    }
+
+    qmp_change_blockdev_int(device_name, "spicebd:", NULL,
+                            options, s, &err);
+    if (error_is_set(&err)) {
+        fprintf(stderr, "spicebd: %s\n", error_get_pretty(err));
+        error_free(err);
+    }
+
+    if (s->inmigrate) {
+        vm_start_release();
+        s->inmigrate = false;
+    }
+}
+
+static void coroutine_fn co_init(void *opaque)
+{
+    BDRVSpiceState *s = opaque;
+    BlockDriverState *bs = s->bs;
+
+    DPRINTF("temporary coroutine for session initialization %p, "
+            "device: %s", s, bs->child_device_name);
+
+    qemu_set_nonblock(s->sv[SOCKET_NBD]);
+    /* After session_init, the fd_handler is managed by nbd-client */
+    qemu_aio_set_fd_handler(s->sv[SOCKET_NBD], co_restart, NULL, s);
+    if (nbd_client_session_init(&s->client, bs,
+                                s->sv[SOCKET_NBD], NULL) < 0) {
+        close_socketpair(s);
+    } else {
+        assert(s->bh == NULL);
+        /* NOTE: need to defer out of current AIO call, to avoid non-blocking
+           drain_all when ejecting? */
+        s->bh = qemu_bh_new(change_blockdev_cb, s);
+        qemu_bh_schedule(s->bh);
+    }
+
+    s->coroutine = NULL;
+    DPRINTF("end of temporary coroutine");
+}
+
+static void spice_init(BDRVSpiceState *s)
+{
+    if (s->opened) {
+        return;
+    }
+
+    s->opened = TRUE;
+
+    /* TODO: teach nbd to use a iostream api instead of a socket */
+    if (socketpair(PF_UNIX, SOCK_STREAM, 0, s->sv) == -1) {
+        fprintf(stderr, "failed to create socketpair\n");
+        return;
+    }
+
+    s->need_read = TRUE;
+    update_chr_handlers(s);
+
+    if (runstate_check(RUN_STATE_INMIGRATE)) {
+        vm_start_hold();
+        s->inmigrate = true;
+    }
+
+    /* tell server we are ready */
+    qemu_chr_fe_event(s->chr, SPICE_PORT_EVENT_OPENED);
+    s->coroutine = qemu_coroutine_create(co_init);
+    qemu_coroutine_enter(s->coroutine, s);
+}
+
+static void chardev_event(void *opaque, int event)
+{
+    BDRVSpiceState *s = opaque;
+
+    switch (event) {
+    case CHR_EVENT_CLOSED:
+        DPRINTF("chardev close");
+        close_socketpair(s);
+        break;
+    case CHR_EVENT_BREAK:
+        DPRINTF("chardev break");
+        if (s->coroutine) {
+            DPRINTF("already waiting for incoming session");
+            qemu_chr_fe_event(s->chr, SPICE_PORT_EVENT_OPENED);
+            return;
+        }
+        close_socketpair(s);
+        /* fall-through */
+    case CHR_EVENT_OPENED:
+        DPRINTF("chardev opened");
+        spice_init(s);
+        break;
+    default:
+        DPRINTF("unhandled chardev event %d", event);
+    }
+}
+
+static gboolean write_to_chr(GIOChannel *chan, GIOCondition cond,
+                             void *opaque)
+{
+    BDRVSpiceState *s = opaque;
+    int r;
+
+    r = qemu_chr_fe_write(s->chr, s->readb.p, s->readb.left);
+    DPRINTF("write_to_chr %d/%d", r, s->readb.left);
+    if (r <= 0) {
+        close_socketpair(s);
+        return FALSE;
+    }
+
+    s->readb.p += r;
+    s->readb.left -= r;
+
+    if (s->readb.left > 0) {
+        if (!s->chr_watch) {
+            s->chr_watch = qemu_chr_fe_add_watch(s->chr, G_IO_OUT,
+                                                 write_to_chr, s);
+        }
+        return TRUE;
+    } else {
+        s->need_read = TRUE;
+        update_chr_handlers(s);
+    }
+
+    return FALSE;
+}
+
+static void nbd_read_handler(void *opaque)
+{
+    BDRVSpiceState *s = opaque;
+
+    DPRINTF("read from nbd");
+
+    if (s->readb.left > 0) {
+        abort();
+    }
+
+    do {
+        s->readb.left = recv(s->sv[SOCKET_CHR], s->readb.data,
+                             sizeof(s->readb.data), MSG_DONTWAIT);
+    } while (s->readb.left == -1 && errno == EAGAIN);
+
+    if (s->readb.left <= 0) {
+        close_socketpair(s);
+        return;
+    }
+
+    s->need_read = FALSE;
+    update_chr_handlers(s);
+
+    s->readb.p = s->readb.data;
+    write_to_chr(NULL, 0, s);
+}
+
+static void nbd_write_handler(void *opaque)
+{
+    BDRVSpiceState *s = opaque;
+
+    DPRINTF("resuming chardev_read left: %d", s->writeb.left);
+
+    chardev_read(s, s->writeb.data, s->writeb.left);
+}
+
+static void update_chr_handlers(BDRVSpiceState *s)
+{
+    qemu_aio_set_fd_handler(s->sv[SOCKET_CHR],
+                            s->need_read ? nbd_read_handler : NULL,
+                            s->need_write ? nbd_write_handler : NULL,
+                            s);
+}
+
+static int spice_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
+                           Error **errp)
+{
+    BDRVSpiceState *s = bs->opaque;
+    int ret = -1;
+
+    s->bs = bs;
+
+    if (s->opened) {
+        DPRINTF("re-open spicebd");
+        s->client.bs = bs;
+        return 0;
+    }
+
+    DPRINTF("open %p  device=%s flags=0x%x", s,
+            bs->child_device_name, bdrv_flags);
+    if (strlen(bs->child_device_name) == 0) {
+        fprintf(stderr, "spicebd: missing associated child device\n");
+        return -1;
+    }
+
+    if (bdrv_flags & BDRV_O_RDWR) {
+        fprintf(stderr, "spicebd: only read-only supported\n");
+        return -1;
+    }
+
+    s->chr = qemu_chr_open_spice_vmc("nbd", true);
+    if (!s->chr) {
+        goto err;
+    }
+
+    qemu_chr_add_handlers(s->chr, chardev_can_read,
+                          chardev_read, chardev_event, s);
+    spice_init(s);
+
+    return 0;
+
+ err:
+    return ret;
+}
+
+static void spice_close(BlockDriverState *bs)
+{
+    BDRVSpiceState *s = bs->opaque;
+
+    DPRINTF("spice close %p\n", s);
+
+    if (s == NULL) {
+        /* changing bd */
+        return;
+    }
+
+    s->bs = NULL;
+
+    close_socketpair(s);
+    assert(!s->coroutine); /* after close_socketpair */
+
+    if (s->chr) {
+        s->chr->chr_close(s->chr);
+        g_free(s->chr);
+        s->chr = NULL;
+    }
+}
+
+static coroutine_fn int spice_co_readv(BlockDriverState *bs,
+                                       int64_t sector_num,
+                                       int nb_sectors, QEMUIOVector *qiov)
+{
+    BDRVSpiceState *s = bs->opaque;
+
+    return nbd_client_session_co_readv(&s->client, sector_num,
+                                       nb_sectors, qiov);
+}
+
+static coroutine_fn int spice_co_writev(BlockDriverState *bs,
+                                        int64_t sector_num,
+                                        int nb_sectors, QEMUIOVector *qiov)
+{
+    BDRVSpiceState *s = bs->opaque;
+
+    return nbd_client_session_co_writev(&s->client, sector_num,
+                                        nb_sectors, qiov);
+}
+
+static coroutine_fn int spice_co_flush(BlockDriverState *bs)
+{
+    BDRVSpiceState *s = bs->opaque;
+
+    if (s == NULL) {
+        /* changing bd */
+        return -1;
+    }
+
+    return nbd_client_session_co_flush(&s->client);
+}
+
+static coroutine_fn int spice_co_discard(BlockDriverState *bs,
+    int64_t sector_num, int nb_sectors)
+{
+    BDRVSpiceState *s = bs->opaque;
+
+    return nbd_client_session_co_discard(&s->client, sector_num, nb_sectors);
+}
+
+static coroutine_fn int64_t spice_getlength(BlockDriverState *bs)
+{
+    BDRVSpiceState *s = bs->opaque;
+
+    DPRINTF("length=%" PRIi64, s->client.size);
+
+    return s->client.size;
+}
+
+static BlockDriver bdrv_spice = {
+    .format_name                  = "spicebd",
+    .protocol_name                = "spicebd",
+    .instance_size                = sizeof(BDRVSpiceState),
+    .bdrv_parse_filename          = spice_parse_filename,
+    .bdrv_file_open               = spice_file_open,
+    .bdrv_close                   = spice_close,
+    .bdrv_co_readv                = spice_co_readv,
+    .bdrv_co_writev               = spice_co_writev,
+    .bdrv_getlength               = spice_getlength,
+    .bdrv_co_flush_to_os          = spice_co_flush,
+    .bdrv_co_discard              = spice_co_discard,
+};
+
+static void bdrv_spice_init(void)
+{
+    bdrv_register(&bdrv_spice);
+}
+
+block_init(bdrv_spice_init);