diff mbox

[PATCHv7,1/5] block: add native support for NFS

Message ID 1390985425-13165-2-git-send-email-pl@kamp.de
State New
Headers show

Commit Message

Peter Lieven Jan. 29, 2014, 8:50 a.m. UTC
This patch adds native support for accessing images on NFS
shares without the requirement to actually mount the entire
NFS share on the host.

NFS Images can simply be specified by an url of the form:
nfs://<host>/<export>/<filename>[?param=value[&param2=value2[&...]]]

For example:
qemu-img create -f qcow2 nfs://10.0.0.1/qemu-images/test.qcow2

You need LibNFS from Ronnie Sahlberg available at:
   git://github.com/sahlberg/libnfs.git
for this to work.

During configure it is automatically probed for libnfs and support
is enabled on-the-fly. You can forbid or enforce libnfs support
with --disable-libnfs or --enable-libnfs respectively.

Due to NFS restrictions you might need to execute your binaries
as root, allow them to open priviledged ports (<1024) or specify
insecure option on the NFS server.

For additional information on ROOT vs. non-ROOT operation and URL
format + parameters see:
   https://raw.github.com/sahlberg/libnfs/master/README

Supported by qemu are the uid, gid and tcp-syncnt URL parameters.

LibNFS currently support NFS version 3 only.

Signed-off-by: Peter Lieven <pl@kamp.de>
---
 MAINTAINERS         |    5 +
 block/Makefile.objs |    1 +
 block/nfs.c         |  440 +++++++++++++++++++++++++++++++++++++++++++++++++++
 configure           |   26 +++
 qapi-schema.json    |    1 +
 5 files changed, 473 insertions(+)
 create mode 100644 block/nfs.c

Comments

Benoît Canet Jan. 29, 2014, 4:19 p.m. UTC | #1
Le Wednesday 29 Jan 2014 à 09:50:21 (+0100), Peter Lieven a écrit :
> This patch adds native support for accessing images on NFS
> shares without the requirement to actually mount the entire
> NFS share on the host.
> 
> NFS Images can simply be specified by an url of the form:
> nfs://<host>/<export>/<filename>[?param=value[&param2=value2[&...]]]
> 
> For example:
> qemu-img create -f qcow2 nfs://10.0.0.1/qemu-images/test.qcow2
> 
> You need LibNFS from Ronnie Sahlberg available at:
>    git://github.com/sahlberg/libnfs.git
> for this to work.
> 
> During configure it is automatically probed for libnfs and support
> is enabled on-the-fly. You can forbid or enforce libnfs support
> with --disable-libnfs or --enable-libnfs respectively.
> 
> Due to NFS restrictions you might need to execute your binaries
> as root, allow them to open priviledged ports (<1024) or specify
> insecure option on the NFS server.
> 
> For additional information on ROOT vs. non-ROOT operation and URL
> format + parameters see:
>    https://raw.github.com/sahlberg/libnfs/master/README
> 
> Supported by qemu are the uid, gid and tcp-syncnt URL parameters.
> 
> LibNFS currently support NFS version 3 only.
> 
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
>  MAINTAINERS         |    5 +
>  block/Makefile.objs |    1 +
>  block/nfs.c         |  440 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  configure           |   26 +++
>  qapi-schema.json    |    1 +
>  5 files changed, 473 insertions(+)
>  create mode 100644 block/nfs.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index fb53242..f8411f9 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -936,6 +936,11 @@ M: Peter Lieven <pl@kamp.de>
>  S: Supported
>  F: block/iscsi.c
>  
> +NFS
> +M: Peter Lieven <pl@kamp.de>
> +S: Maintained
> +F: block/nfs.c
> +
>  SSH
>  M: Richard W.M. Jones <rjones@redhat.com>
>  S: Supported
> diff --git a/block/Makefile.objs b/block/Makefile.objs
> index 4e8c91e..e254a21 100644
> --- a/block/Makefile.objs
> +++ b/block/Makefile.objs
> @@ -12,6 +12,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
>  ifeq ($(CONFIG_POSIX),y)
>  block-obj-y += nbd.o nbd-client.o sheepdog.o
>  block-obj-$(CONFIG_LIBISCSI) += iscsi.o
> +block-obj-$(CONFIG_LIBNFS) += nfs.o
>  block-obj-$(CONFIG_CURL) += curl.o
>  block-obj-$(CONFIG_RBD) += rbd.o
>  block-obj-$(CONFIG_GLUSTERFS) += gluster.o
> diff --git a/block/nfs.c b/block/nfs.c
> new file mode 100644
> index 0000000..2bbf7a2
> --- /dev/null
> +++ b/block/nfs.c
> @@ -0,0 +1,440 @@
> +/*
> + * QEMU Block driver for native access to files on NFS shares
> + *
> + * Copyright (c) 2014 Peter Lieven <pl@kamp.de>
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include "config-host.h"
> +
> +#include <poll.h>
> +#include "qemu-common.h"
> +#include "qemu/config-file.h"
> +#include "qemu/error-report.h"
> +#include "block/block_int.h"
> +#include "trace.h"
> +#include "qemu/iov.h"
> +#include "qemu/uri.h"
> +#include "sysemu/sysemu.h"
> +#include <nfsc/libnfs.h>
> +
> +typedef struct NFSClient {
> +    struct nfs_context *context;
> +    struct nfsfh *fh;
> +    int events;
> +    bool has_zero_init;
> +} NFSClient;
> +
> +typedef struct NFSRPC {
> +    int status;
> +    int complete;
> +    QEMUIOVector *iov;
> +    struct stat *st;
> +    Coroutine *co;
> +    QEMUBH *bh;
> +} NFSRPC;
> +
> +static void nfs_process_read(void *arg);
> +static void nfs_process_write(void *arg);
> +
> +static void nfs_set_events(NFSClient *client)
> +{
> +    int ev = nfs_which_events(client->context);
> +    if (ev != client->events) {
> +        qemu_aio_set_fd_handler(nfs_get_fd(client->context),
> +                      (ev & POLLIN) ? nfs_process_read : NULL,
> +                      (ev & POLLOUT) ? nfs_process_write : NULL,
> +                      client);
> +
> +    }
> +    client->events = ev;
> +}
> +
> +static void nfs_process_read(void *arg)
> +{
> +    NFSClient *client = arg;
> +    nfs_service(client->context, POLLIN);
> +    nfs_set_events(client);
> +}
> +
> +static void nfs_process_write(void *arg)
> +{
> +    NFSClient *client = arg;
> +    nfs_service(client->context, POLLOUT);
> +    nfs_set_events(client);
> +}
> +
> +static void nfs_co_init_task(NFSClient *client, NFSRPC *task)
> +{
> +    *task = (NFSRPC) {
> +        .co         = qemu_coroutine_self(),
> +    };
> +}
> +
> +static void nfs_co_generic_bh_cb(void *opaque)
> +{
> +    NFSRPC *task = opaque;
> +    qemu_bh_delete(task->bh);
> +    qemu_coroutine_enter(task->co, NULL);
> +}
> +
> +static void
> +nfs_co_generic_cb(int status, struct nfs_context *nfs, void *data,
> +                  void *private_data)
> +{
> +    NFSRPC *task = private_data;
> +    task->complete = 1;
> +    task->status = status;
> +    if (task->status > 0 && task->iov) {
> +        if (task->status <= task->iov->size) {
It feel very odd to compare something named status with something named size.

> +            qemu_iovec_from_buf(task->iov, 0, data, task->status);
> +        } else {
> +            task->status = -EIO;
> +        }
> +    }
> +    if (task->status == 0 && task->st) {
> +        memcpy(task->st, data, sizeof(struct stat));
> +    }
> +    if (task->co) {
> +        task->bh = qemu_bh_new(nfs_co_generic_bh_cb, task);
> +        qemu_bh_schedule(task->bh);
> +    }
> +}
> +
> +static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
> +                                     int64_t sector_num, int nb_sectors,
> +                                     QEMUIOVector *iov)
> +{
> +    NFSClient *client = bs->opaque;
> +    NFSRPC task;
> +
> +    nfs_co_init_task(client, &task);
> +    task.iov = iov;
> +
> +    if (nfs_pread_async(client->context, client->fh,
> +                        sector_num * BDRV_SECTOR_SIZE,
> +                        nb_sectors * BDRV_SECTOR_SIZE,
> +                        nfs_co_generic_cb, &task) != 0) {
> +        return -ENOMEM;
> +    }
> +
> +    while (!task.complete) {
> +        nfs_set_events(client);
> +        qemu_coroutine_yield();
> +    }
> +
> +    if (task.status < 0) {
> +        return task.status;
> +    }
> +
> +    /* zero pad short reads */
> +    if (task.status < iov->size) {
> +        qemu_iovec_memset(iov, task.status, 0, iov->size - task.status);
> +    }
> +
> +    return 0;
> +}
> +
> +static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
> +                                        int64_t sector_num, int nb_sectors,
> +                                        QEMUIOVector *iov)
> +{
> +    NFSClient *client = bs->opaque;
> +    NFSRPC task;
> +    char *buf = NULL;
> +
> +    nfs_co_init_task(client, &task);
> +
> +    buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
> +    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
> +
> +    if (nfs_pwrite_async(client->context, client->fh,
> +                         sector_num * BDRV_SECTOR_SIZE,
> +                         nb_sectors * BDRV_SECTOR_SIZE,
> +                         buf, nfs_co_generic_cb, &task) != 0) {
> +        g_free(buf);
> +        return -ENOMEM;
> +    }
> +
> +    while (!task.complete) {
> +        nfs_set_events(client);
> +        qemu_coroutine_yield();
> +    }
> +
> +    g_free(buf);
> +
> +    if (task.status != nb_sectors * BDRV_SECTOR_SIZE) {
> +        return task.status < 0 ? task.status : -EIO;
> +    }
> +
> +    return 0;
> +}
> +
> +static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
> +{
> +    NFSClient *client = bs->opaque;
> +    NFSRPC task;
> +
> +    nfs_co_init_task(client, &task);
> +
> +    if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
> +                        &task) != 0) {
> +        return -ENOMEM;
> +    }
> +
> +    while (!task.complete) {
> +        nfs_set_events(client);
> +        qemu_coroutine_yield();
> +    }
> +
> +    return task.status;
> +}
> +
> +/* TODO Convert to fine grained options */
> +static QemuOptsList runtime_opts = {
> +    .name = "nfs",
> +    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
> +    .desc = {
> +        {
> +            .name = "filename",
> +            .type = QEMU_OPT_STRING,
> +            .help = "URL to the NFS file",
> +        },
> +        { /* end of list */ }
> +    },
> +};
> +
> +static void nfs_client_close(NFSClient *client)
> +{
> +    if (client->context) {
> +        if (client->fh) {
> +            nfs_close(client->context, client->fh);
> +        }
> +        qemu_aio_set_fd_handler(nfs_get_fd(client->context), NULL, NULL, NULL);
> +        nfs_destroy_context(client->context);
> +    }
> +    memset(client, 0, sizeof(NFSClient));
> +}
> +
> +static void nfs_file_close(BlockDriverState *bs)
> +{
> +    NFSClient *client = bs->opaque;
> +    nfs_client_close(client);
> +}
> +
> +static int64_t nfs_.cilclient_open(NFSClient *client, const char *filename,
> +                               int flags, Error **errp)
> +{
> +    int ret = -EINVAL, i;
> +    struct stat st;
> +    URI *uri;
> +    QueryParams *qp = NULL;
> +    char *file = NULL, *strp = NULL;
> +
> +    uri = uri_parse(filename);
> +    if (!uri) {
> +        error_setg(errp, "Invalid URL specified");
> +        goto fail;
Should this be goto out since we don't have done nfs_init_context yet ?
> +    }
> +    strp = strrchr(uri->path, '/');
> +    if (strp == NULL) {
> +        error_setg(errp, "Invalid URL specified");
> +        goto fail;
goto out ?
> +    }
> +    file = g_strdup(strp);
> +    *strp = 0;
> +
> +    client->context = nfs_init_context();
> +    if (client->context == NULL) {
> +        error_setg(errp, "Failed to init NFS context");
> +        goto fail;
> +    }
> +
> +    qp = query_params_parse(uri->query);
> +    for (i = 0; i < qp->n; i++) {
> +        if (!qp->p[i].value) {
> +            error_setg(errp, "Value for NFS parameter expected: %s",
> +                       qp->p[i].name);
> +            goto fail;
> +        }
> +        if (!strncmp(qp->p[i].name, "uid", 3)) {
> +            nfs_set_uid(client->context, atoi(qp->p[i].value));
> +        } else if (!strncmp(qp->p[i].name, "gid", 3)) {
> +            nfs_set_gid(client->context, atoi(qp->p[i].value));
> +        } else if (!strncmp(qp->p[i].name, "tcp-syncnt", 10)) {
> +            nfs_set_tcp_syncnt(client->context, atoi(qp->p[i].value));
> +        } else {
> +            error_setg(errp, "Unknown NFS parameter name: %s",
> +                       qp->p[i].name);
> +            goto fail;
> +        }
> +    }
> +
> +    ret = nfs_mount(client->context, uri->server, uri->path);
> +    if (ret < 0) {
> +        error_setg(errp, "Failed to mount nfs share: %s",
> +                   nfs_get_error(client->context));
> +        goto fail;
> +    }
> +
> +    if (flags & O_CREAT) {
> +        ret = nfs_creat(client->context, file, 0600, &client->fh);
> +        if (ret < 0) {
> +            error_setg(errp, "Failed to create file: %s",
> +                       nfs_get_error(client->context));
> +            goto fail;
> +        }
> +    } else {
> +        ret = nfs_open(client->context, file, flags, &client->fh);
> +        if (ret < 0) {
> +            error_setg(errp, "Failed to open file : %s",
> +                       nfs_get_error(client->context));
> +            goto fail;
> +        }
> +    }
> +
> +    ret = nfs_fstat(client->context, client->fh, &st);
> +    if (ret < 0) {
> +        error_setg(errp, "Failed to fstat file: %s",
> +                   nfs_get_error(client->context));
> +        goto fail;
> +    }
> +
> +    ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE);
> +    client->has_zero_init = S_ISREG(st.st_mode);
> +    goto out;
> +fail:
> +    nfs_client_close(client);
> +out:
> +    if (qp) {
> +        query_params_free(qp);
> +    }
> +    uri_free(uri);
> +    g_free(file);
> +    return ret;
> +}
> +
> +static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
> +                         Error **errp) {
> +    NFSClient *client = bs->opaque;
> +    int64_t ret;
> +    QemuOpts *opts;
> +    Error *local_err = NULL;
> +
> +    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
> +    qemu_opts_absorb_qdict(opts, options, &local_err);
> +    if (error_is_set(&local_err)) {
> +        qerror_report_err(local_err);
I have seen more usage of error_propagate(errp, local_err); in QEMU code.
Maybe I am missing the point.

> +        error_free(local_err);
> +        return -EINVAL;
> +    }
> +    ret = nfs_client_open(client, qemu_opt_get(opts, "filename"),
> +                          (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
> +                          errp);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +    bs->total_sectors = ret;
> +    return 0;
> +}
> +
> +static int nfs_file_create(const char *url, QEMUOptionParameter *options,
> +                         Error **errp)
> +{
> +    int ret = 0;
> +    int64_t total_size = 0;
> +    NFSClient *client = g_malloc0(sizeof(NFSClient));
> +
> +    /* Read out options */
> +    while (options && options->name) {
> +        if (!strcmp(options->name, "size")) {
> +            total_size = options->value.n;
> +        }
> +        options++;
> +    }
> +
> +    ret = nfs_client_open(client, url, O_CREAT, errp);
> +    if (ret < 0) {
> +        goto out;
> +    }
> +    ret = nfs_ftruncate(client->context, client->fh, total_size);
> +out:
Should the out: label be after nfs_client_close(client); since the code will
jump to it on nfs_client_open failure ?

> +    nfs_client_close(client);
> +    g_free(client);
> +    return ret;
> +}
> +
> +static int nfs_has_zero_init(BlockDriverState *bs)
> +{
> +    NFSClient *client = bs->opaque;
> +    return client->has_zero_init;
> +}
> +
> +static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
> +{
> +    NFSClient *client = bs->opaque;
> +    NFSRPC task = {0};
> +    struct stat st;
> +
> +    task.st = &st;
> +    if (nfs_fstat_async(client->context, client->fh, nfs_co_generic_cb,
> +                        &task) != 0) {
> +        return -ENOMEM;
> +    }
> +
> +    while (!task.complete) {
> +        nfs_set_events(client);
> +        qemu_aio_wait();
> +    }
> +
> +    return (task.status < 0 ? task.status : st.st_blocks * st.st_blksize);
> +}
> +
> +static int nfs_file_truncate(BlockDriverState *bs, int64_t offset)
> +{
> +    NFSClient *client = bs->opaque;
> +    return nfs_ftruncate(client->context, client->fh, offset);
> +}
> +
> +static BlockDriver bdrv_nfs = {
> +    .format_name     = "nfs",
> +    .protocol_name   = "nfs",
> +
> +    .instance_size   = sizeof(NFSClient),
> +    .bdrv_needs_filename = true,
> +    .bdrv_has_zero_init = nfs_has_zero_init,
> +    .bdrv_get_allocated_file_size = nfs_get_allocated_file_size,
> +    .bdrv_truncate = nfs_file_truncate,
> +
> +    .bdrv_file_open  = nfs_file_open,
> +    .bdrv_close      = nfs_file_close,
> +    .bdrv_create     = nfs_file_create,
> +
> +    .bdrv_co_readv         = nfs_co_readv,
> +    .bdrv_co_writev        = nfs_co_writev,
> +    .bdrv_co_flush_to_disk = nfs_co_flush,
> +};
> +
> +static void nfs_block_init(void)
> +{
> +    bdrv_register(&bdrv_nfs);
> +}
> +
> +block_init(nfs_block_init);
> diff --git a/configure b/configure
> index b472694..9ca0c63 100755
> --- a/configure
> +++ b/configure
> @@ -251,6 +251,7 @@ vss_win32_sdk=""
>  win_sdk="no"
>  want_tools="yes"
>  libiscsi=""
> +libnfs=""
>  coroutine=""
>  coroutine_pool=""
>  seccomp=""
> @@ -840,6 +841,10 @@ for opt do
>    ;;
>    --enable-libiscsi) libiscsi="yes"
>    ;;
> +  --disable-libnfs) libnfs="no"
> +  ;;
> +  --enable-libnfs) libnfs="yes"
> +  ;;
>    --enable-profiler) profiler="yes"
>    ;;
>    --disable-cocoa) cocoa="no"
> @@ -1229,6 +1234,8 @@ Advanced options (experts only):
>    --enable-rbd             enable building the rados block device (rbd)
>    --disable-libiscsi       disable iscsi support
>    --enable-libiscsi        enable iscsi support
> +  --disable-libnfs         disable nfs support
> +  --enable-libnfs          enable nfs support
>    --disable-smartcard-nss  disable smartcard nss support
>    --enable-smartcard-nss   enable smartcard nss support
>    --disable-libusb         disable libusb (for usb passthrough)
> @@ -3600,6 +3607,20 @@ elif test "$debug" = "no" ; then
>    CFLAGS="-O2 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 $CFLAGS"
>  fi
>  
> +##########################################
> +# Do we have libnfs
> +if test "$libnfs" != "no" ; then
> +  if $pkg_config --atleast-version=1.8.91 libnfs; then
> +    libnfs="yes"
> +    libnfs_libs=$($pkg_config --libs libnfs)
> +    LIBS="$LIBS $libnfs_libs"
> +  else
> +    if test "$libnfs" = "yes" ; then
> +      feature_not_found "libnfs"
> +    fi
> +    libnfs="no"
> +  fi
> +fi
>  
>  # Disable zero malloc errors for official releases unless explicitly told to
>  # enable/disable
> @@ -3829,6 +3850,7 @@ echo "libiscsi support  $libiscsi (1.4.0)"
>  else
>  echo "libiscsi support  $libiscsi"
>  fi
> +echo "libnfs support    $libnfs"
>  echo "build guest agent $guest_agent"
>  echo "QGA VSS support   $guest_agent_with_vss"
>  echo "seccomp support   $seccomp"
> @@ -4165,6 +4187,10 @@ if test "$libiscsi" = "yes" ; then
>    fi
>  fi
>  
> +if test "$libnfs" = "yes" ; then
> +  echo "CONFIG_LIBNFS=y" >> $config_host_mak
> +fi
> +
>  if test "$seccomp" = "yes"; then
>    echo "CONFIG_SECCOMP=y" >> $config_host_mak
>  fi
> diff --git a/qapi-schema.json b/qapi-schema.json
> index 05ced9d..7cfb5e5 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -4371,6 +4371,7 @@
>  # TODO gluster: Wait for structured options
>  # TODO iscsi: Wait for structured options
>  # TODO nbd: Should take InetSocketAddress for 'host'?
> +# TODO nfs: Wait for structured options
>  # TODO rbd: Wait for structured options
>  # TODO sheepdog: Wait for structured options
>  # TODO ssh: Should take InetSocketAddress for 'host'?
> -- 
> 1.7.9.5
> 
>
Peter Lieven Jan. 29, 2014, 4:38 p.m. UTC | #2
On 29.01.2014 17:19, Benoît Canet wrote:
> Le Wednesday 29 Jan 2014 à 09:50:21 (+0100), Peter Lieven a écrit :
>> This patch adds native support for accessing images on NFS
>> shares without the requirement to actually mount the entire
>> NFS share on the host.
>>
>> NFS Images can simply be specified by an url of the form:
>> nfs://<host>/<export>/<filename>[?param=value[&param2=value2[&...]]]
>>
>> For example:
>> qemu-img create -f qcow2 nfs://10.0.0.1/qemu-images/test.qcow2
>>
>> You need LibNFS from Ronnie Sahlberg available at:
>>     git://github.com/sahlberg/libnfs.git
>> for this to work.
>>
>> During configure it is automatically probed for libnfs and support
>> is enabled on-the-fly. You can forbid or enforce libnfs support
>> with --disable-libnfs or --enable-libnfs respectively.
>>
>> Due to NFS restrictions you might need to execute your binaries
>> as root, allow them to open priviledged ports (<1024) or specify
>> insecure option on the NFS server.
>>
>> For additional information on ROOT vs. non-ROOT operation and URL
>> format + parameters see:
>>     https://raw.github.com/sahlberg/libnfs/master/README
>>
>> Supported by qemu are the uid, gid and tcp-syncnt URL parameters.
>>
>> LibNFS currently support NFS version 3 only.
>>
>> Signed-off-by: Peter Lieven <pl@kamp.de>
>> ---
>>   MAINTAINERS         |    5 +
>>   block/Makefile.objs |    1 +
>>   block/nfs.c         |  440 +++++++++++++++++++++++++++++++++++++++++++++++++++
>>   configure           |   26 +++
>>   qapi-schema.json    |    1 +
>>   5 files changed, 473 insertions(+)
>>   create mode 100644 block/nfs.c
>>
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index fb53242..f8411f9 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -936,6 +936,11 @@ M: Peter Lieven <pl@kamp.de>
>>   S: Supported
>>   F: block/iscsi.c
>>   
>> +NFS
>> +M: Peter Lieven <pl@kamp.de>
>> +S: Maintained
>> +F: block/nfs.c
>> +
>>   SSH
>>   M: Richard W.M. Jones <rjones@redhat.com>
>>   S: Supported
>> diff --git a/block/Makefile.objs b/block/Makefile.objs
>> index 4e8c91e..e254a21 100644
>> --- a/block/Makefile.objs
>> +++ b/block/Makefile.objs
>> @@ -12,6 +12,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
>>   ifeq ($(CONFIG_POSIX),y)
>>   block-obj-y += nbd.o nbd-client.o sheepdog.o
>>   block-obj-$(CONFIG_LIBISCSI) += iscsi.o
>> +block-obj-$(CONFIG_LIBNFS) += nfs.o
>>   block-obj-$(CONFIG_CURL) += curl.o
>>   block-obj-$(CONFIG_RBD) += rbd.o
>>   block-obj-$(CONFIG_GLUSTERFS) += gluster.o
>> diff --git a/block/nfs.c b/block/nfs.c
>> new file mode 100644
>> index 0000000..2bbf7a2
>> --- /dev/null
>> +++ b/block/nfs.c
>> @@ -0,0 +1,440 @@
>> +/*
>> + * QEMU Block driver for native access to files on NFS shares
>> + *
>> + * Copyright (c) 2014 Peter Lieven <pl@kamp.de>
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a copy
>> + * of this software and associated documentation files (the "Software"), to deal
>> + * in the Software without restriction, including without limitation the rights
>> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
>> + * copies of the Software, and to permit persons to whom the Software is
>> + * furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
>> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
>> + * THE SOFTWARE.
>> + */
>> +
>> +#include "config-host.h"
>> +
>> +#include <poll.h>
>> +#include "qemu-common.h"
>> +#include "qemu/config-file.h"
>> +#include "qemu/error-report.h"
>> +#include "block/block_int.h"
>> +#include "trace.h"
>> +#include "qemu/iov.h"
>> +#include "qemu/uri.h"
>> +#include "sysemu/sysemu.h"
>> +#include <nfsc/libnfs.h>
>> +
>> +typedef struct NFSClient {
>> +    struct nfs_context *context;
>> +    struct nfsfh *fh;
>> +    int events;
>> +    bool has_zero_init;
>> +} NFSClient;
>> +
>> +typedef struct NFSRPC {
>> +    int status;
>> +    int complete;
>> +    QEMUIOVector *iov;
>> +    struct stat *st;
>> +    Coroutine *co;
>> +    QEMUBH *bh;
>> +} NFSRPC;
>> +
>> +static void nfs_process_read(void *arg);
>> +static void nfs_process_write(void *arg);
>> +
>> +static void nfs_set_events(NFSClient *client)
>> +{
>> +    int ev = nfs_which_events(client->context);
>> +    if (ev != client->events) {
>> +        qemu_aio_set_fd_handler(nfs_get_fd(client->context),
>> +                      (ev & POLLIN) ? nfs_process_read : NULL,
>> +                      (ev & POLLOUT) ? nfs_process_write : NULL,
>> +                      client);
>> +
>> +    }
>> +    client->events = ev;
>> +}
>> +
>> +static void nfs_process_read(void *arg)
>> +{
>> +    NFSClient *client = arg;
>> +    nfs_service(client->context, POLLIN);
>> +    nfs_set_events(client);
>> +}
>> +
>> +static void nfs_process_write(void *arg)
>> +{
>> +    NFSClient *client = arg;
>> +    nfs_service(client->context, POLLOUT);
>> +    nfs_set_events(client);
>> +}
>> +
>> +static void nfs_co_init_task(NFSClient *client, NFSRPC *task)
>> +{
>> +    *task = (NFSRPC) {
>> +        .co         = qemu_coroutine_self(),
>> +    };
>> +}
>> +
>> +static void nfs_co_generic_bh_cb(void *opaque)
>> +{
>> +    NFSRPC *task = opaque;
>> +    qemu_bh_delete(task->bh);
>> +    qemu_coroutine_enter(task->co, NULL);
>> +}
>> +
>> +static void
>> +nfs_co_generic_cb(int status, struct nfs_context *nfs, void *data,
>> +                  void *private_data)
>> +{
>> +    NFSRPC *task = private_data;
>> +    task->complete = 1;
>> +    task->status = status;
>> +    if (task->status > 0 && task->iov) {
>> +        if (task->status <= task->iov->size) {
> It feel very odd to compare something named status with something named size.
>
>> +            qemu_iovec_from_buf(task->iov, 0, data, task->status);
>> +        } else {
>> +            task->status = -EIO;
>> +        }
>> +    }
>> +    if (task->status == 0 && task->st) {
>> +        memcpy(task->st, data, sizeof(struct stat));
>> +    }
>> +    if (task->co) {
>> +        task->bh = qemu_bh_new(nfs_co_generic_bh_cb, task);
>> +        qemu_bh_schedule(task->bh);
>> +    }
>> +}
>> +
>> +static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
>> +                                     int64_t sector_num, int nb_sectors,
>> +                                     QEMUIOVector *iov)
>> +{
>> +    NFSClient *client = bs->opaque;
>> +    NFSRPC task;
>> +
>> +    nfs_co_init_task(client, &task);
>> +    task.iov = iov;
>> +
>> +    if (nfs_pread_async(client->context, client->fh,
>> +                        sector_num * BDRV_SECTOR_SIZE,
>> +                        nb_sectors * BDRV_SECTOR_SIZE,
>> +                        nfs_co_generic_cb, &task) != 0) {
>> +        return -ENOMEM;
>> +    }
>> +
>> +    while (!task.complete) {
>> +        nfs_set_events(client);
>> +        qemu_coroutine_yield();
>> +    }
>> +
>> +    if (task.status < 0) {
>> +        return task.status;
>> +    }
>> +
>> +    /* zero pad short reads */
>> +    if (task.status < iov->size) {
>> +        qemu_iovec_memset(iov, task.status, 0, iov->size - task.status);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
>> +                                        int64_t sector_num, int nb_sectors,
>> +                                        QEMUIOVector *iov)
>> +{
>> +    NFSClient *client = bs->opaque;
>> +    NFSRPC task;
>> +    char *buf = NULL;
>> +
>> +    nfs_co_init_task(client, &task);
>> +
>> +    buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
>> +    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
>> +
>> +    if (nfs_pwrite_async(client->context, client->fh,
>> +                         sector_num * BDRV_SECTOR_SIZE,
>> +                         nb_sectors * BDRV_SECTOR_SIZE,
>> +                         buf, nfs_co_generic_cb, &task) != 0) {
>> +        g_free(buf);
>> +        return -ENOMEM;
>> +    }
>> +
>> +    while (!task.complete) {
>> +        nfs_set_events(client);
>> +        qemu_coroutine_yield();
>> +    }
>> +
>> +    g_free(buf);
>> +
>> +    if (task.status != nb_sectors * BDRV_SECTOR_SIZE) {
>> +        return task.status < 0 ? task.status : -EIO;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
>> +{
>> +    NFSClient *client = bs->opaque;
>> +    NFSRPC task;
>> +
>> +    nfs_co_init_task(client, &task);
>> +
>> +    if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
>> +                        &task) != 0) {
>> +        return -ENOMEM;
>> +    }
>> +
>> +    while (!task.complete) {
>> +        nfs_set_events(client);
>> +        qemu_coroutine_yield();
>> +    }
>> +
>> +    return task.status;
>> +}
>> +
>> +/* TODO Convert to fine grained options */
>> +static QemuOptsList runtime_opts = {
>> +    .name = "nfs",
>> +    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
>> +    .desc = {
>> +        {
>> +            .name = "filename",
>> +            .type = QEMU_OPT_STRING,
>> +            .help = "URL to the NFS file",
>> +        },
>> +        { /* end of list */ }
>> +    },
>> +};
>> +
>> +static void nfs_client_close(NFSClient *client)
>> +{
>> +    if (client->context) {
>> +        if (client->fh) {
>> +            nfs_close(client->context, client->fh);
>> +        }
>> +        qemu_aio_set_fd_handler(nfs_get_fd(client->context), NULL, NULL, NULL);
>> +        nfs_destroy_context(client->context);
>> +    }
>> +    memset(client, 0, sizeof(NFSClient));
>> +}
>> +
>> +static void nfs_file_close(BlockDriverState *bs)
>> +{
>> +    NFSClient *client = bs->opaque;
>> +    nfs_client_close(client);
>> +}
>> +
>> +static int64_t nfs_.cilclient_open(NFSClient *client, const char *filename,
>> +                               int flags, Error **errp)
>> +{
>> +    int ret = -EINVAL, i;
>> +    struct stat st;
>> +    URI *uri;
>> +    QueryParams *qp = NULL;
>> +    char *file = NULL, *strp = NULL;
>> +
>> +    uri = uri_parse(filename);
>> +    if (!uri) {
>> +        error_setg(errp, "Invalid URL specified");
>> +        goto fail;
> Should this be goto out since we don't have done nfs_init_context yet ?
>> +    }
>> +    strp = strrchr(uri->path, '/');
>> +    if (strp == NULL) {
>> +        error_setg(errp, "Invalid URL specified");
>> +        goto fail;
> goto out ?
>> +    }
>> +    file = g_strdup(strp);
>> +    *strp = 0;
>> +
>> +    client->context = nfs_init_context();
>> +    if (client->context == NULL) {
>> +        error_setg(errp, "Failed to init NFS context");
>> +        goto fail;
>> +    }
>> +
>> +    qp = query_params_parse(uri->query);
>> +    for (i = 0; i < qp->n; i++) {
>> +        if (!qp->p[i].value) {
>> +            error_setg(errp, "Value for NFS parameter expected: %s",
>> +                       qp->p[i].name);
>> +            goto fail;
>> +        }
>> +        if (!strncmp(qp->p[i].name, "uid", 3)) {
>> +            nfs_set_uid(client->context, atoi(qp->p[i].value));
>> +        } else if (!strncmp(qp->p[i].name, "gid", 3)) {
>> +            nfs_set_gid(client->context, atoi(qp->p[i].value));
>> +        } else if (!strncmp(qp->p[i].name, "tcp-syncnt", 10)) {
>> +            nfs_set_tcp_syncnt(client->context, atoi(qp->p[i].value));
>> +        } else {
>> +            error_setg(errp, "Unknown NFS parameter name: %s",
>> +                       qp->p[i].name);
>> +            goto fail;
>> +        }
>> +    }
>> +
>> +    ret = nfs_mount(client->context, uri->server, uri->path);
>> +    if (ret < 0) {
>> +        error_setg(errp, "Failed to mount nfs share: %s",
>> +                   nfs_get_error(client->context));
>> +        goto fail;
>> +    }
>> +
>> +    if (flags & O_CREAT) {
>> +        ret = nfs_creat(client->context, file, 0600, &client->fh);
>> +        if (ret < 0) {
>> +            error_setg(errp, "Failed to create file: %s",
>> +                       nfs_get_error(client->context));
>> +            goto fail;
>> +        }
>> +    } else {
>> +        ret = nfs_open(client->context, file, flags, &client->fh);
>> +        if (ret < 0) {
>> +            error_setg(errp, "Failed to open file : %s",
>> +                       nfs_get_error(client->context));
>> +            goto fail;
>> +        }
>> +    }
>> +
>> +    ret = nfs_fstat(client->context, client->fh, &st);
>> +    if (ret < 0) {
>> +        error_setg(errp, "Failed to fstat file: %s",
>> +                   nfs_get_error(client->context));
>> +        goto fail;
>> +    }
>> +
>> +    ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE);
>> +    client->has_zero_init = S_ISREG(st.st_mode);
>> +    goto out;
>> +fail:
>> +    nfs_client_close(client);
>> +out:
>> +    if (qp) {
>> +        query_params_free(qp);
>> +    }
>> +    uri_free(uri);
>> +    g_free(file);
>> +    return ret;
>> +}
>> +
>> +static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
>> +                         Error **errp) {
>> +    NFSClient *client = bs->opaque;
>> +    int64_t ret;
>> +    QemuOpts *opts;
>> +    Error *local_err = NULL;
>> +
>> +    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
>> +    qemu_opts_absorb_qdict(opts, options, &local_err);
>> +    if (error_is_set(&local_err)) {
>> +        qerror_report_err(local_err);
> I have seen more usage of error_propagate(errp, local_err); in QEMU code.
> Maybe I am missing the point.
>
>> +        error_free(local_err);
>> +        return -EINVAL;
>> +    }
>> +    ret = nfs_client_open(client, qemu_opt_get(opts, "filename"),
>> +                          (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
>> +                          errp);
>> +    if (ret < 0) {
>> +        return ret;
>> +    }
>> +    bs->total_sectors = ret;
>> +    return 0;
>> +}
>> +
>> +static int nfs_file_create(const char *url, QEMUOptionParameter *options,
>> +                         Error **errp)
>> +{
>> +    int ret = 0;
>> +    int64_t total_size = 0;
>> +    NFSClient *client = g_malloc0(sizeof(NFSClient));
>> +
>> +    /* Read out options */
>> +    while (options && options->name) {
>> +        if (!strcmp(options->name, "size")) {
>> +            total_size = options->value.n;
>> +        }
>> +        options++;
>> +    }
>> +
>> +    ret = nfs_client_open(client, url, O_CREAT, errp);
>> +    if (ret < 0) {
>> +        goto out;
>> +    }
>> +    ret = nfs_ftruncate(client->context, client->fh, total_size);
>> +out:
> Should the out: label be after nfs_client_close(client); since the code will
> jump to it on nfs_client_open failure ?
You are right, but it doesn't hurt since you can call nfs_client_close multiple
times.

Peter
>
>> +    nfs_client_close(client);
>> +    g_free(client);
>> +    return ret;
>> +}
>> +
>> +static int nfs_has_zero_init(BlockDriverState *bs)
>> +{
>> +    NFSClient *client = bs->opaque;
>> +    return client->has_zero_init;
>> +}
>> +
>> +static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
>> +{
>> +    NFSClient *client = bs->opaque;
>> +    NFSRPC task = {0};
>> +    struct stat st;
>> +
>> +    task.st = &st;
>> +    if (nfs_fstat_async(client->context, client->fh, nfs_co_generic_cb,
>> +                        &task) != 0) {
>> +        return -ENOMEM;
>> +    }
>> +
>> +    while (!task.complete) {
>> +        nfs_set_events(client);
>> +        qemu_aio_wait();
>> +    }
>> +
>> +    return (task.status < 0 ? task.status : st.st_blocks * st.st_blksize);
>> +}
>> +
>> +static int nfs_file_truncate(BlockDriverState *bs, int64_t offset)
>> +{
>> +    NFSClient *client = bs->opaque;
>> +    return nfs_ftruncate(client->context, client->fh, offset);
>> +}
>> +
>> +static BlockDriver bdrv_nfs = {
>> +    .format_name     = "nfs",
>> +    .protocol_name   = "nfs",
>> +
>> +    .instance_size   = sizeof(NFSClient),
>> +    .bdrv_needs_filename = true,
>> +    .bdrv_has_zero_init = nfs_has_zero_init,
>> +    .bdrv_get_allocated_file_size = nfs_get_allocated_file_size,
>> +    .bdrv_truncate = nfs_file_truncate,
>> +
>> +    .bdrv_file_open  = nfs_file_open,
>> +    .bdrv_close      = nfs_file_close,
>> +    .bdrv_create     = nfs_file_create,
>> +
>> +    .bdrv_co_readv         = nfs_co_readv,
>> +    .bdrv_co_writev        = nfs_co_writev,
>> +    .bdrv_co_flush_to_disk = nfs_co_flush,
>> +};
>> +
>> +static void nfs_block_init(void)
>> +{
>> +    bdrv_register(&bdrv_nfs);
>> +}
>> +
>> +block_init(nfs_block_init);
>> diff --git a/configure b/configure
>> index b472694..9ca0c63 100755
>> --- a/configure
>> +++ b/configure
>> @@ -251,6 +251,7 @@ vss_win32_sdk=""
>>   win_sdk="no"
>>   want_tools="yes"
>>   libiscsi=""
>> +libnfs=""
>>   coroutine=""
>>   coroutine_pool=""
>>   seccomp=""
>> @@ -840,6 +841,10 @@ for opt do
>>     ;;
>>     --enable-libiscsi) libiscsi="yes"
>>     ;;
>> +  --disable-libnfs) libnfs="no"
>> +  ;;
>> +  --enable-libnfs) libnfs="yes"
>> +  ;;
>>     --enable-profiler) profiler="yes"
>>     ;;
>>     --disable-cocoa) cocoa="no"
>> @@ -1229,6 +1234,8 @@ Advanced options (experts only):
>>     --enable-rbd             enable building the rados block device (rbd)
>>     --disable-libiscsi       disable iscsi support
>>     --enable-libiscsi        enable iscsi support
>> +  --disable-libnfs         disable nfs support
>> +  --enable-libnfs          enable nfs support
>>     --disable-smartcard-nss  disable smartcard nss support
>>     --enable-smartcard-nss   enable smartcard nss support
>>     --disable-libusb         disable libusb (for usb passthrough)
>> @@ -3600,6 +3607,20 @@ elif test "$debug" = "no" ; then
>>     CFLAGS="-O2 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 $CFLAGS"
>>   fi
>>   
>> +##########################################
>> +# Do we have libnfs
>> +if test "$libnfs" != "no" ; then
>> +  if $pkg_config --atleast-version=1.8.91 libnfs; then
>> +    libnfs="yes"
>> +    libnfs_libs=$($pkg_config --libs libnfs)
>> +    LIBS="$LIBS $libnfs_libs"
>> +  else
>> +    if test "$libnfs" = "yes" ; then
>> +      feature_not_found "libnfs"
>> +    fi
>> +    libnfs="no"
>> +  fi
>> +fi
>>   
>>   # Disable zero malloc errors for official releases unless explicitly told to
>>   # enable/disable
>> @@ -3829,6 +3850,7 @@ echo "libiscsi support  $libiscsi (1.4.0)"
>>   else
>>   echo "libiscsi support  $libiscsi"
>>   fi
>> +echo "libnfs support    $libnfs"
>>   echo "build guest agent $guest_agent"
>>   echo "QGA VSS support   $guest_agent_with_vss"
>>   echo "seccomp support   $seccomp"
>> @@ -4165,6 +4187,10 @@ if test "$libiscsi" = "yes" ; then
>>     fi
>>   fi
>>   
>> +if test "$libnfs" = "yes" ; then
>> +  echo "CONFIG_LIBNFS=y" >> $config_host_mak
>> +fi
>> +
>>   if test "$seccomp" = "yes"; then
>>     echo "CONFIG_SECCOMP=y" >> $config_host_mak
>>   fi
>> diff --git a/qapi-schema.json b/qapi-schema.json
>> index 05ced9d..7cfb5e5 100644
>> --- a/qapi-schema.json
>> +++ b/qapi-schema.json
>> @@ -4371,6 +4371,7 @@
>>   # TODO gluster: Wait for structured options
>>   # TODO iscsi: Wait for structured options
>>   # TODO nbd: Should take InetSocketAddress for 'host'?
>> +# TODO nfs: Wait for structured options
>>   # TODO rbd: Wait for structured options
>>   # TODO sheepdog: Wait for structured options
>>   # TODO ssh: Should take InetSocketAddress for 'host'?
>> -- 
>> 1.7.9.5
>>
>>
Stefan Hajnoczi Jan. 30, 2014, 9:05 a.m. UTC | #3
On Wed, Jan 29, 2014 at 05:38:29PM +0100, Peter Lieven wrote:
> On 29.01.2014 17:19, Benoît Canet wrote:
> >Le Wednesday 29 Jan 2014 à 09:50:21 (+0100), Peter Lieven a écrit :
> >>This patch adds native support for accessing images on NFS
> >>shares without the requirement to actually mount the entire
> >>NFS share on the host.
> >>
> >>NFS Images can simply be specified by an url of the form:
> >>nfs://<host>/<export>/<filename>[?param=value[&param2=value2[&...]]]
> >>
> >>For example:
> >>qemu-img create -f qcow2 nfs://10.0.0.1/qemu-images/test.qcow2
> >>
> >>You need LibNFS from Ronnie Sahlberg available at:
> >>    git://github.com/sahlberg/libnfs.git
> >>for this to work.
> >>
> >>During configure it is automatically probed for libnfs and support
> >>is enabled on-the-fly. You can forbid or enforce libnfs support
> >>with --disable-libnfs or --enable-libnfs respectively.
> >>
> >>Due to NFS restrictions you might need to execute your binaries
> >>as root, allow them to open priviledged ports (<1024) or specify
> >>insecure option on the NFS server.
> >>
> >>For additional information on ROOT vs. non-ROOT operation and URL
> >>format + parameters see:
> >>    https://raw.github.com/sahlberg/libnfs/master/README
> >>
> >>Supported by qemu are the uid, gid and tcp-syncnt URL parameters.
> >>
> >>LibNFS currently support NFS version 3 only.
> >>
> >>Signed-off-by: Peter Lieven <pl@kamp.de>
> >>---
> >>  MAINTAINERS         |    5 +
> >>  block/Makefile.objs |    1 +
> >>  block/nfs.c         |  440 +++++++++++++++++++++++++++++++++++++++++++++++++++
> >>  configure           |   26 +++
> >>  qapi-schema.json    |    1 +
> >>  5 files changed, 473 insertions(+)
> >>  create mode 100644 block/nfs.c
> >>
> >>diff --git a/MAINTAINERS b/MAINTAINERS
> >>index fb53242..f8411f9 100644
> >>--- a/MAINTAINERS
> >>+++ b/MAINTAINERS
> >>@@ -936,6 +936,11 @@ M: Peter Lieven <pl@kamp.de>
> >>  S: Supported
> >>  F: block/iscsi.c
> >>+NFS
> >>+M: Peter Lieven <pl@kamp.de>
> >>+S: Maintained
> >>+F: block/nfs.c
> >>+
> >>  SSH
> >>  M: Richard W.M. Jones <rjones@redhat.com>
> >>  S: Supported
> >>diff --git a/block/Makefile.objs b/block/Makefile.objs
> >>index 4e8c91e..e254a21 100644
> >>--- a/block/Makefile.objs
> >>+++ b/block/Makefile.objs
> >>@@ -12,6 +12,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
> >>  ifeq ($(CONFIG_POSIX),y)
> >>  block-obj-y += nbd.o nbd-client.o sheepdog.o
> >>  block-obj-$(CONFIG_LIBISCSI) += iscsi.o
> >>+block-obj-$(CONFIG_LIBNFS) += nfs.o
> >>  block-obj-$(CONFIG_CURL) += curl.o
> >>  block-obj-$(CONFIG_RBD) += rbd.o
> >>  block-obj-$(CONFIG_GLUSTERFS) += gluster.o
> >>diff --git a/block/nfs.c b/block/nfs.c
> >>new file mode 100644
> >>index 0000000..2bbf7a2
> >>--- /dev/null
> >>+++ b/block/nfs.c
> >>@@ -0,0 +1,440 @@
> >>+/*
> >>+ * QEMU Block driver for native access to files on NFS shares
> >>+ *
> >>+ * Copyright (c) 2014 Peter Lieven <pl@kamp.de>
> >>+ *
> >>+ * Permission is hereby granted, free of charge, to any person obtaining a copy
> >>+ * of this software and associated documentation files (the "Software"), to deal
> >>+ * in the Software without restriction, including without limitation the rights
> >>+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> >>+ * copies of the Software, and to permit persons to whom the Software is
> >>+ * furnished to do so, subject to the following conditions:
> >>+ *
> >>+ * The above copyright notice and this permission notice shall be included in
> >>+ * all copies or substantial portions of the Software.
> >>+ *
> >>+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> >>+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> >>+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> >>+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> >>+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> >>+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> >>+ * THE SOFTWARE.
> >>+ */
> >>+
> >>+#include "config-host.h"
> >>+
> >>+#include <poll.h>
> >>+#include "qemu-common.h"
> >>+#include "qemu/config-file.h"
> >>+#include "qemu/error-report.h"
> >>+#include "block/block_int.h"
> >>+#include "trace.h"
> >>+#include "qemu/iov.h"
> >>+#include "qemu/uri.h"
> >>+#include "sysemu/sysemu.h"
> >>+#include <nfsc/libnfs.h>
> >>+
> >>+typedef struct NFSClient {
> >>+    struct nfs_context *context;
> >>+    struct nfsfh *fh;
> >>+    int events;
> >>+    bool has_zero_init;
> >>+} NFSClient;
> >>+
> >>+typedef struct NFSRPC {
> >>+    int status;
> >>+    int complete;
> >>+    QEMUIOVector *iov;
> >>+    struct stat *st;
> >>+    Coroutine *co;
> >>+    QEMUBH *bh;
> >>+} NFSRPC;
> >>+
> >>+static void nfs_process_read(void *arg);
> >>+static void nfs_process_write(void *arg);
> >>+
> >>+static void nfs_set_events(NFSClient *client)
> >>+{
> >>+    int ev = nfs_which_events(client->context);
> >>+    if (ev != client->events) {
> >>+        qemu_aio_set_fd_handler(nfs_get_fd(client->context),
> >>+                      (ev & POLLIN) ? nfs_process_read : NULL,
> >>+                      (ev & POLLOUT) ? nfs_process_write : NULL,
> >>+                      client);
> >>+
> >>+    }
> >>+    client->events = ev;
> >>+}
> >>+
> >>+static void nfs_process_read(void *arg)
> >>+{
> >>+    NFSClient *client = arg;
> >>+    nfs_service(client->context, POLLIN);
> >>+    nfs_set_events(client);
> >>+}
> >>+
> >>+static void nfs_process_write(void *arg)
> >>+{
> >>+    NFSClient *client = arg;
> >>+    nfs_service(client->context, POLLOUT);
> >>+    nfs_set_events(client);
> >>+}
> >>+
> >>+static void nfs_co_init_task(NFSClient *client, NFSRPC *task)
> >>+{
> >>+    *task = (NFSRPC) {
> >>+        .co         = qemu_coroutine_self(),
> >>+    };
> >>+}
> >>+
> >>+static void nfs_co_generic_bh_cb(void *opaque)
> >>+{
> >>+    NFSRPC *task = opaque;
> >>+    qemu_bh_delete(task->bh);
> >>+    qemu_coroutine_enter(task->co, NULL);
> >>+}
> >>+
> >>+static void
> >>+nfs_co_generic_cb(int status, struct nfs_context *nfs, void *data,
> >>+                  void *private_data)
> >>+{
> >>+    NFSRPC *task = private_data;
> >>+    task->complete = 1;
> >>+    task->status = status;
> >>+    if (task->status > 0 && task->iov) {
> >>+        if (task->status <= task->iov->size) {
> >It feel very odd to compare something named status with something named size.
> >
> >>+            qemu_iovec_from_buf(task->iov, 0, data, task->status);
> >>+        } else {
> >>+            task->status = -EIO;
> >>+        }
> >>+    }
> >>+    if (task->status == 0 && task->st) {
> >>+        memcpy(task->st, data, sizeof(struct stat));
> >>+    }
> >>+    if (task->co) {
> >>+        task->bh = qemu_bh_new(nfs_co_generic_bh_cb, task);
> >>+        qemu_bh_schedule(task->bh);
> >>+    }
> >>+}
> >>+
> >>+static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
> >>+                                     int64_t sector_num, int nb_sectors,
> >>+                                     QEMUIOVector *iov)
> >>+{
> >>+    NFSClient *client = bs->opaque;
> >>+    NFSRPC task;
> >>+
> >>+    nfs_co_init_task(client, &task);
> >>+    task.iov = iov;
> >>+
> >>+    if (nfs_pread_async(client->context, client->fh,
> >>+                        sector_num * BDRV_SECTOR_SIZE,
> >>+                        nb_sectors * BDRV_SECTOR_SIZE,
> >>+                        nfs_co_generic_cb, &task) != 0) {
> >>+        return -ENOMEM;
> >>+    }
> >>+
> >>+    while (!task.complete) {
> >>+        nfs_set_events(client);
> >>+        qemu_coroutine_yield();
> >>+    }
> >>+
> >>+    if (task.status < 0) {
> >>+        return task.status;
> >>+    }
> >>+
> >>+    /* zero pad short reads */
> >>+    if (task.status < iov->size) {
> >>+        qemu_iovec_memset(iov, task.status, 0, iov->size - task.status);
> >>+    }
> >>+
> >>+    return 0;
> >>+}
> >>+
> >>+static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
> >>+                                        int64_t sector_num, int nb_sectors,
> >>+                                        QEMUIOVector *iov)
> >>+{
> >>+    NFSClient *client = bs->opaque;
> >>+    NFSRPC task;
> >>+    char *buf = NULL;
> >>+
> >>+    nfs_co_init_task(client, &task);
> >>+
> >>+    buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
> >>+    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
> >>+
> >>+    if (nfs_pwrite_async(client->context, client->fh,
> >>+                         sector_num * BDRV_SECTOR_SIZE,
> >>+                         nb_sectors * BDRV_SECTOR_SIZE,
> >>+                         buf, nfs_co_generic_cb, &task) != 0) {
> >>+        g_free(buf);
> >>+        return -ENOMEM;
> >>+    }
> >>+
> >>+    while (!task.complete) {
> >>+        nfs_set_events(client);
> >>+        qemu_coroutine_yield();
> >>+    }
> >>+
> >>+    g_free(buf);
> >>+
> >>+    if (task.status != nb_sectors * BDRV_SECTOR_SIZE) {
> >>+        return task.status < 0 ? task.status : -EIO;
> >>+    }
> >>+
> >>+    return 0;
> >>+}
> >>+
> >>+static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
> >>+{
> >>+    NFSClient *client = bs->opaque;
> >>+    NFSRPC task;
> >>+
> >>+    nfs_co_init_task(client, &task);
> >>+
> >>+    if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
> >>+                        &task) != 0) {
> >>+        return -ENOMEM;
> >>+    }
> >>+
> >>+    while (!task.complete) {
> >>+        nfs_set_events(client);
> >>+        qemu_coroutine_yield();
> >>+    }
> >>+
> >>+    return task.status;
> >>+}
> >>+
> >>+/* TODO Convert to fine grained options */
> >>+static QemuOptsList runtime_opts = {
> >>+    .name = "nfs",
> >>+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
> >>+    .desc = {
> >>+        {
> >>+            .name = "filename",
> >>+            .type = QEMU_OPT_STRING,
> >>+            .help = "URL to the NFS file",
> >>+        },
> >>+        { /* end of list */ }
> >>+    },
> >>+};
> >>+
> >>+static void nfs_client_close(NFSClient *client)
> >>+{
> >>+    if (client->context) {
> >>+        if (client->fh) {
> >>+            nfs_close(client->context, client->fh);
> >>+        }
> >>+        qemu_aio_set_fd_handler(nfs_get_fd(client->context), NULL, NULL, NULL);
> >>+        nfs_destroy_context(client->context);
> >>+    }
> >>+    memset(client, 0, sizeof(NFSClient));
> >>+}
> >>+
> >>+static void nfs_file_close(BlockDriverState *bs)
> >>+{
> >>+    NFSClient *client = bs->opaque;
> >>+    nfs_client_close(client);
> >>+}
> >>+
> >>+static int64_t nfs_.cilclient_open(NFSClient *client, const char *filename,
> >>+                               int flags, Error **errp)
> >>+{
> >>+    int ret = -EINVAL, i;
> >>+    struct stat st;
> >>+    URI *uri;
> >>+    QueryParams *qp = NULL;
> >>+    char *file = NULL, *strp = NULL;
> >>+
> >>+    uri = uri_parse(filename);
> >>+    if (!uri) {
> >>+        error_setg(errp, "Invalid URL specified");
> >>+        goto fail;
> >Should this be goto out since we don't have done nfs_init_context yet ?
> >>+    }
> >>+    strp = strrchr(uri->path, '/');
> >>+    if (strp == NULL) {
> >>+        error_setg(errp, "Invalid URL specified");
> >>+        goto fail;
> >goto out ?
> >>+    }
> >>+    file = g_strdup(strp);
> >>+    *strp = 0;
> >>+
> >>+    client->context = nfs_init_context();
> >>+    if (client->context == NULL) {
> >>+        error_setg(errp, "Failed to init NFS context");
> >>+        goto fail;
> >>+    }
> >>+
> >>+    qp = query_params_parse(uri->query);
> >>+    for (i = 0; i < qp->n; i++) {
> >>+        if (!qp->p[i].value) {
> >>+            error_setg(errp, "Value for NFS parameter expected: %s",
> >>+                       qp->p[i].name);
> >>+            goto fail;
> >>+        }
> >>+        if (!strncmp(qp->p[i].name, "uid", 3)) {
> >>+            nfs_set_uid(client->context, atoi(qp->p[i].value));
> >>+        } else if (!strncmp(qp->p[i].name, "gid", 3)) {
> >>+            nfs_set_gid(client->context, atoi(qp->p[i].value));
> >>+        } else if (!strncmp(qp->p[i].name, "tcp-syncnt", 10)) {
> >>+            nfs_set_tcp_syncnt(client->context, atoi(qp->p[i].value));
> >>+        } else {
> >>+            error_setg(errp, "Unknown NFS parameter name: %s",
> >>+                       qp->p[i].name);
> >>+            goto fail;
> >>+        }
> >>+    }
> >>+
> >>+    ret = nfs_mount(client->context, uri->server, uri->path);
> >>+    if (ret < 0) {
> >>+        error_setg(errp, "Failed to mount nfs share: %s",
> >>+                   nfs_get_error(client->context));
> >>+        goto fail;
> >>+    }
> >>+
> >>+    if (flags & O_CREAT) {
> >>+        ret = nfs_creat(client->context, file, 0600, &client->fh);
> >>+        if (ret < 0) {
> >>+            error_setg(errp, "Failed to create file: %s",
> >>+                       nfs_get_error(client->context));
> >>+            goto fail;
> >>+        }
> >>+    } else {
> >>+        ret = nfs_open(client->context, file, flags, &client->fh);
> >>+        if (ret < 0) {
> >>+            error_setg(errp, "Failed to open file : %s",
> >>+                       nfs_get_error(client->context));
> >>+            goto fail;
> >>+        }
> >>+    }
> >>+
> >>+    ret = nfs_fstat(client->context, client->fh, &st);
> >>+    if (ret < 0) {
> >>+        error_setg(errp, "Failed to fstat file: %s",
> >>+                   nfs_get_error(client->context));
> >>+        goto fail;
> >>+    }
> >>+
> >>+    ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE);
> >>+    client->has_zero_init = S_ISREG(st.st_mode);
> >>+    goto out;
> >>+fail:
> >>+    nfs_client_close(client);
> >>+out:
> >>+    if (qp) {
> >>+        query_params_free(qp);
> >>+    }
> >>+    uri_free(uri);
> >>+    g_free(file);
> >>+    return ret;
> >>+}
> >>+
> >>+static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
> >>+                         Error **errp) {
> >>+    NFSClient *client = bs->opaque;
> >>+    int64_t ret;
> >>+    QemuOpts *opts;
> >>+    Error *local_err = NULL;
> >>+
> >>+    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
> >>+    qemu_opts_absorb_qdict(opts, options, &local_err);
> >>+    if (error_is_set(&local_err)) {
> >>+        qerror_report_err(local_err);
> >I have seen more usage of error_propagate(errp, local_err); in QEMU code.
> >Maybe I am missing the point.
> >
> >>+        error_free(local_err);
> >>+        return -EINVAL;
> >>+    }
> >>+    ret = nfs_client_open(client, qemu_opt_get(opts, "filename"),
> >>+                          (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
> >>+                          errp);
> >>+    if (ret < 0) {
> >>+        return ret;
> >>+    }
> >>+    bs->total_sectors = ret;
> >>+    return 0;
> >>+}
> >>+
> >>+static int nfs_file_create(const char *url, QEMUOptionParameter *options,
> >>+                         Error **errp)
> >>+{
> >>+    int ret = 0;
> >>+    int64_t total_size = 0;
> >>+    NFSClient *client = g_malloc0(sizeof(NFSClient));
> >>+
> >>+    /* Read out options */
> >>+    while (options && options->name) {
> >>+        if (!strcmp(options->name, "size")) {
> >>+            total_size = options->value.n;
> >>+        }
> >>+        options++;
> >>+    }
> >>+
> >>+    ret = nfs_client_open(client, url, O_CREAT, errp);
> >>+    if (ret < 0) {
> >>+        goto out;
> >>+    }
> >>+    ret = nfs_ftruncate(client->context, client->fh, total_size);
> >>+out:
> >Should the out: label be after nfs_client_close(client); since the code will
> >jump to it on nfs_client_open failure ?
> You are right, but it doesn't hurt since you can call nfs_client_close multiple
> times.

Will you send another revision addressing the other comments?

Stefan
Peter Lieven Jan. 30, 2014, 9:12 a.m. UTC | #4
Am 30.01.2014 um 10:05 schrieb Stefan Hajnoczi <stefanha@gmail.com>:

> On Wed, Jan 29, 2014 at 05:38:29PM +0100, Peter Lieven wrote:
>> On 29.01.2014 17:19, Benoît Canet wrote:
>>> Le Wednesday 29 Jan 2014 à 09:50:21 (+0100), Peter Lieven a écrit :
>>>> This patch adds native support for accessing images on NFS
>>>> shares without the requirement to actually mount the entire
>>>> NFS share on the host.
>>>> 
>>>> NFS Images can simply be specified by an url of the form:
>>>> nfs://<host>/<export>/<filename>[?param=value[&param2=value2[&...]]]
>>>> 
>>>> For example:
>>>> qemu-img create -f qcow2 nfs://10.0.0.1/qemu-images/test.qcow2
>>>> 
>>>> You need LibNFS from Ronnie Sahlberg available at:
>>>>   git://github.com/sahlberg/libnfs.git
>>>> for this to work.
>>>> 
>>>> During configure it is automatically probed for libnfs and support
>>>> is enabled on-the-fly. You can forbid or enforce libnfs support
>>>> with --disable-libnfs or --enable-libnfs respectively.
>>>> 
>>>> Due to NFS restrictions you might need to execute your binaries
>>>> as root, allow them to open priviledged ports (<1024) or specify
>>>> insecure option on the NFS server.
>>>> 
>>>> For additional information on ROOT vs. non-ROOT operation and URL
>>>> format + parameters see:
>>>>   https://raw.github.com/sahlberg/libnfs/master/README
>>>> 
>>>> Supported by qemu are the uid, gid and tcp-syncnt URL parameters.
>>>> 
>>>> LibNFS currently support NFS version 3 only.
>>>> 
>>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>>> ---
>>>> MAINTAINERS         |    5 +
>>>> block/Makefile.objs |    1 +
>>>> block/nfs.c         |  440 +++++++++++++++++++++++++++++++++++++++++++++++++++
>>>> configure           |   26 +++
>>>> qapi-schema.json    |    1 +
>>>> 5 files changed, 473 insertions(+)
>>>> create mode 100644 block/nfs.c
>>>> 
>>>> diff --git a/MAINTAINERS b/MAINTAINERS
>>>> index fb53242..f8411f9 100644
>>>> --- a/MAINTAINERS
>>>> +++ b/MAINTAINERS
>>>> @@ -936,6 +936,11 @@ M: Peter Lieven <pl@kamp.de>
>>>> S: Supported
>>>> F: block/iscsi.c
>>>> +NFS
>>>> +M: Peter Lieven <pl@kamp.de>
>>>> +S: Maintained
>>>> +F: block/nfs.c
>>>> +
>>>> SSH
>>>> M: Richard W.M. Jones <rjones@redhat.com>
>>>> S: Supported
>>>> diff --git a/block/Makefile.objs b/block/Makefile.objs
>>>> index 4e8c91e..e254a21 100644
>>>> --- a/block/Makefile.objs
>>>> +++ b/block/Makefile.objs
>>>> @@ -12,6 +12,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
>>>> ifeq ($(CONFIG_POSIX),y)
>>>> block-obj-y += nbd.o nbd-client.o sheepdog.o
>>>> block-obj-$(CONFIG_LIBISCSI) += iscsi.o
>>>> +block-obj-$(CONFIG_LIBNFS) += nfs.o
>>>> block-obj-$(CONFIG_CURL) += curl.o
>>>> block-obj-$(CONFIG_RBD) += rbd.o
>>>> block-obj-$(CONFIG_GLUSTERFS) += gluster.o
>>>> diff --git a/block/nfs.c b/block/nfs.c
>>>> new file mode 100644
>>>> index 0000000..2bbf7a2
>>>> --- /dev/null
>>>> +++ b/block/nfs.c
>>>> @@ -0,0 +1,440 @@
>>>> +/*
>>>> + * QEMU Block driver for native access to files on NFS shares
>>>> + *
>>>> + * Copyright (c) 2014 Peter Lieven <pl@kamp.de>
>>>> + *
>>>> + * Permission is hereby granted, free of charge, to any person obtaining a copy
>>>> + * of this software and associated documentation files (the "Software"), to deal
>>>> + * in the Software without restriction, including without limitation the rights
>>>> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
>>>> + * copies of the Software, and to permit persons to whom the Software is
>>>> + * furnished to do so, subject to the following conditions:
>>>> + *
>>>> + * The above copyright notice and this permission notice shall be included in
>>>> + * all copies or substantial portions of the Software.
>>>> + *
>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
>>>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>>>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
>>>> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
>>>> + * THE SOFTWARE.
>>>> + */
>>>> +
>>>> +#include "config-host.h"
>>>> +
>>>> +#include <poll.h>
>>>> +#include "qemu-common.h"
>>>> +#include "qemu/config-file.h"
>>>> +#include "qemu/error-report.h"
>>>> +#include "block/block_int.h"
>>>> +#include "trace.h"
>>>> +#include "qemu/iov.h"
>>>> +#include "qemu/uri.h"
>>>> +#include "sysemu/sysemu.h"
>>>> +#include <nfsc/libnfs.h>
>>>> +
>>>> +typedef struct NFSClient {
>>>> +    struct nfs_context *context;
>>>> +    struct nfsfh *fh;
>>>> +    int events;
>>>> +    bool has_zero_init;
>>>> +} NFSClient;
>>>> +
>>>> +typedef struct NFSRPC {
>>>> +    int status;
>>>> +    int complete;
>>>> +    QEMUIOVector *iov;
>>>> +    struct stat *st;
>>>> +    Coroutine *co;
>>>> +    QEMUBH *bh;
>>>> +} NFSRPC;
>>>> +
>>>> +static void nfs_process_read(void *arg);
>>>> +static void nfs_process_write(void *arg);
>>>> +
>>>> +static void nfs_set_events(NFSClient *client)
>>>> +{
>>>> +    int ev = nfs_which_events(client->context);
>>>> +    if (ev != client->events) {
>>>> +        qemu_aio_set_fd_handler(nfs_get_fd(client->context),
>>>> +                      (ev & POLLIN) ? nfs_process_read : NULL,
>>>> +                      (ev & POLLOUT) ? nfs_process_write : NULL,
>>>> +                      client);
>>>> +
>>>> +    }
>>>> +    client->events = ev;
>>>> +}
>>>> +
>>>> +static void nfs_process_read(void *arg)
>>>> +{
>>>> +    NFSClient *client = arg;
>>>> +    nfs_service(client->context, POLLIN);
>>>> +    nfs_set_events(client);
>>>> +}
>>>> +
>>>> +static void nfs_process_write(void *arg)
>>>> +{
>>>> +    NFSClient *client = arg;
>>>> +    nfs_service(client->context, POLLOUT);
>>>> +    nfs_set_events(client);
>>>> +}
>>>> +
>>>> +static void nfs_co_init_task(NFSClient *client, NFSRPC *task)
>>>> +{
>>>> +    *task = (NFSRPC) {
>>>> +        .co         = qemu_coroutine_self(),
>>>> +    };
>>>> +}
>>>> +
>>>> +static void nfs_co_generic_bh_cb(void *opaque)
>>>> +{
>>>> +    NFSRPC *task = opaque;
>>>> +    qemu_bh_delete(task->bh);
>>>> +    qemu_coroutine_enter(task->co, NULL);
>>>> +}
>>>> +
>>>> +static void
>>>> +nfs_co_generic_cb(int status, struct nfs_context *nfs, void *data,
>>>> +                  void *private_data)
>>>> +{
>>>> +    NFSRPC *task = private_data;
>>>> +    task->complete = 1;
>>>> +    task->status = status;
>>>> +    if (task->status > 0 && task->iov) {
>>>> +        if (task->status <= task->iov->size) {
>>> It feel very odd to compare something named status with something named size.
>>> 
>>>> +            qemu_iovec_from_buf(task->iov, 0, data, task->status);
>>>> +        } else {
>>>> +            task->status = -EIO;
>>>> +        }
>>>> +    }
>>>> +    if (task->status == 0 && task->st) {
>>>> +        memcpy(task->st, data, sizeof(struct stat));
>>>> +    }
>>>> +    if (task->co) {
>>>> +        task->bh = qemu_bh_new(nfs_co_generic_bh_cb, task);
>>>> +        qemu_bh_schedule(task->bh);
>>>> +    }
>>>> +}
>>>> +
>>>> +static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
>>>> +                                     int64_t sector_num, int nb_sectors,
>>>> +                                     QEMUIOVector *iov)
>>>> +{
>>>> +    NFSClient *client = bs->opaque;
>>>> +    NFSRPC task;
>>>> +
>>>> +    nfs_co_init_task(client, &task);
>>>> +    task.iov = iov;
>>>> +
>>>> +    if (nfs_pread_async(client->context, client->fh,
>>>> +                        sector_num * BDRV_SECTOR_SIZE,
>>>> +                        nb_sectors * BDRV_SECTOR_SIZE,
>>>> +                        nfs_co_generic_cb, &task) != 0) {
>>>> +        return -ENOMEM;
>>>> +    }
>>>> +
>>>> +    while (!task.complete) {
>>>> +        nfs_set_events(client);
>>>> +        qemu_coroutine_yield();
>>>> +    }
>>>> +
>>>> +    if (task.status < 0) {
>>>> +        return task.status;
>>>> +    }
>>>> +
>>>> +    /* zero pad short reads */
>>>> +    if (task.status < iov->size) {
>>>> +        qemu_iovec_memset(iov, task.status, 0, iov->size - task.status);
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
>>>> +                                        int64_t sector_num, int nb_sectors,
>>>> +                                        QEMUIOVector *iov)
>>>> +{
>>>> +    NFSClient *client = bs->opaque;
>>>> +    NFSRPC task;
>>>> +    char *buf = NULL;
>>>> +
>>>> +    nfs_co_init_task(client, &task);
>>>> +
>>>> +    buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
>>>> +    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
>>>> +
>>>> +    if (nfs_pwrite_async(client->context, client->fh,
>>>> +                         sector_num * BDRV_SECTOR_SIZE,
>>>> +                         nb_sectors * BDRV_SECTOR_SIZE,
>>>> +                         buf, nfs_co_generic_cb, &task) != 0) {
>>>> +        g_free(buf);
>>>> +        return -ENOMEM;
>>>> +    }
>>>> +
>>>> +    while (!task.complete) {
>>>> +        nfs_set_events(client);
>>>> +        qemu_coroutine_yield();
>>>> +    }
>>>> +
>>>> +    g_free(buf);
>>>> +
>>>> +    if (task.status != nb_sectors * BDRV_SECTOR_SIZE) {
>>>> +        return task.status < 0 ? task.status : -EIO;
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
>>>> +{
>>>> +    NFSClient *client = bs->opaque;
>>>> +    NFSRPC task;
>>>> +
>>>> +    nfs_co_init_task(client, &task);
>>>> +
>>>> +    if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
>>>> +                        &task) != 0) {
>>>> +        return -ENOMEM;
>>>> +    }
>>>> +
>>>> +    while (!task.complete) {
>>>> +        nfs_set_events(client);
>>>> +        qemu_coroutine_yield();
>>>> +    }
>>>> +
>>>> +    return task.status;
>>>> +}
>>>> +
>>>> +/* TODO Convert to fine grained options */
>>>> +static QemuOptsList runtime_opts = {
>>>> +    .name = "nfs",
>>>> +    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
>>>> +    .desc = {
>>>> +        {
>>>> +            .name = "filename",
>>>> +            .type = QEMU_OPT_STRING,
>>>> +            .help = "URL to the NFS file",
>>>> +        },
>>>> +        { /* end of list */ }
>>>> +    },
>>>> +};
>>>> +
>>>> +static void nfs_client_close(NFSClient *client)
>>>> +{
>>>> +    if (client->context) {
>>>> +        if (client->fh) {
>>>> +            nfs_close(client->context, client->fh);
>>>> +        }
>>>> +        qemu_aio_set_fd_handler(nfs_get_fd(client->context), NULL, NULL, NULL);
>>>> +        nfs_destroy_context(client->context);
>>>> +    }
>>>> +    memset(client, 0, sizeof(NFSClient));
>>>> +}
>>>> +
>>>> +static void nfs_file_close(BlockDriverState *bs)
>>>> +{
>>>> +    NFSClient *client = bs->opaque;
>>>> +    nfs_client_close(client);
>>>> +}
>>>> +
>>>> +static int64_t nfs_.cilclient_open(NFSClient *client, const char *filename,
>>>> +                               int flags, Error **errp)
>>>> +{
>>>> +    int ret = -EINVAL, i;
>>>> +    struct stat st;
>>>> +    URI *uri;
>>>> +    QueryParams *qp = NULL;
>>>> +    char *file = NULL, *strp = NULL;
>>>> +
>>>> +    uri = uri_parse(filename);
>>>> +    if (!uri) {
>>>> +        error_setg(errp, "Invalid URL specified");
>>>> +        goto fail;
>>> Should this be goto out since we don't have done nfs_init_context yet ?
>>>> +    }
>>>> +    strp = strrchr(uri->path, '/');
>>>> +    if (strp == NULL) {
>>>> +        error_setg(errp, "Invalid URL specified");
>>>> +        goto fail;
>>> goto out ?
>>>> +    }
>>>> +    file = g_strdup(strp);
>>>> +    *strp = 0;
>>>> +
>>>> +    client->context = nfs_init_context();
>>>> +    if (client->context == NULL) {
>>>> +        error_setg(errp, "Failed to init NFS context");
>>>> +        goto fail;
>>>> +    }
>>>> +
>>>> +    qp = query_params_parse(uri->query);
>>>> +    for (i = 0; i < qp->n; i++) {
>>>> +        if (!qp->p[i].value) {
>>>> +            error_setg(errp, "Value for NFS parameter expected: %s",
>>>> +                       qp->p[i].name);
>>>> +            goto fail;
>>>> +        }
>>>> +        if (!strncmp(qp->p[i].name, "uid", 3)) {
>>>> +            nfs_set_uid(client->context, atoi(qp->p[i].value));
>>>> +        } else if (!strncmp(qp->p[i].name, "gid", 3)) {
>>>> +            nfs_set_gid(client->context, atoi(qp->p[i].value));
>>>> +        } else if (!strncmp(qp->p[i].name, "tcp-syncnt", 10)) {
>>>> +            nfs_set_tcp_syncnt(client->context, atoi(qp->p[i].value));
>>>> +        } else {
>>>> +            error_setg(errp, "Unknown NFS parameter name: %s",
>>>> +                       qp->p[i].name);
>>>> +            goto fail;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    ret = nfs_mount(client->context, uri->server, uri->path);
>>>> +    if (ret < 0) {
>>>> +        error_setg(errp, "Failed to mount nfs share: %s",
>>>> +                   nfs_get_error(client->context));
>>>> +        goto fail;
>>>> +    }
>>>> +
>>>> +    if (flags & O_CREAT) {
>>>> +        ret = nfs_creat(client->context, file, 0600, &client->fh);
>>>> +        if (ret < 0) {
>>>> +            error_setg(errp, "Failed to create file: %s",
>>>> +                       nfs_get_error(client->context));
>>>> +            goto fail;
>>>> +        }
>>>> +    } else {
>>>> +        ret = nfs_open(client->context, file, flags, &client->fh);
>>>> +        if (ret < 0) {
>>>> +            error_setg(errp, "Failed to open file : %s",
>>>> +                       nfs_get_error(client->context));
>>>> +            goto fail;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    ret = nfs_fstat(client->context, client->fh, &st);
>>>> +    if (ret < 0) {
>>>> +        error_setg(errp, "Failed to fstat file: %s",
>>>> +                   nfs_get_error(client->context));
>>>> +        goto fail;
>>>> +    }
>>>> +
>>>> +    ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE);
>>>> +    client->has_zero_init = S_ISREG(st.st_mode);
>>>> +    goto out;
>>>> +fail:
>>>> +    nfs_client_close(client);
>>>> +out:
>>>> +    if (qp) {
>>>> +        query_params_free(qp);
>>>> +    }
>>>> +    uri_free(uri);
>>>> +    g_free(file);
>>>> +    return ret;
>>>> +}
>>>> +
>>>> +static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
>>>> +                         Error **errp) {
>>>> +    NFSClient *client = bs->opaque;
>>>> +    int64_t ret;
>>>> +    QemuOpts *opts;
>>>> +    Error *local_err = NULL;
>>>> +
>>>> +    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
>>>> +    qemu_opts_absorb_qdict(opts, options, &local_err);
>>>> +    if (error_is_set(&local_err)) {
>>>> +        qerror_report_err(local_err);
>>> I have seen more usage of error_propagate(errp, local_err); in QEMU code.
>>> Maybe I am missing the point.
>>> 
>>>> +        error_free(local_err);
>>>> +        return -EINVAL;
>>>> +    }
>>>> +    ret = nfs_client_open(client, qemu_opt_get(opts, "filename"),
>>>> +                          (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
>>>> +                          errp);
>>>> +    if (ret < 0) {
>>>> +        return ret;
>>>> +    }
>>>> +    bs->total_sectors = ret;
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static int nfs_file_create(const char *url, QEMUOptionParameter *options,
>>>> +                         Error **errp)
>>>> +{
>>>> +    int ret = 0;
>>>> +    int64_t total_size = 0;
>>>> +    NFSClient *client = g_malloc0(sizeof(NFSClient));
>>>> +
>>>> +    /* Read out options */
>>>> +    while (options && options->name) {
>>>> +        if (!strcmp(options->name, "size")) {
>>>> +            total_size = options->value.n;
>>>> +        }
>>>> +        options++;
>>>> +    }
>>>> +
>>>> +    ret = nfs_client_open(client, url, O_CREAT, errp);
>>>> +    if (ret < 0) {
>>>> +        goto out;
>>>> +    }
>>>> +    ret = nfs_ftruncate(client->context, client->fh, total_size);
>>>> +out:
>>> Should the out: label be after nfs_client_close(client); since the code will
>>> jump to it on nfs_client_open failure ?
>> You are right, but it doesn't hurt since you can call nfs_client_close multiple
>> times.
> 
> Will you send another revision addressing the other comments?

Sorry, which other comments are you referring to?

Have you been able to test the patch without the bad commit?

BR,
Peter
Stefan Hajnoczi Jan. 30, 2014, 2:22 p.m. UTC | #5
On Wed, Jan 29, 2014 at 05:19:59PM +0100, Benoît Canet wrote:
> Le Wednesday 29 Jan 2014 à 09:50:21 (+0100), Peter Lieven a écrit :

Hi Peter,
If I read your reply to Benoit correctly, you only addressed the
questions about nfs_client_close().  Here are Benoits other comments and
my thoughts on them:

> > +static void
> > +nfs_co_generic_cb(int status, struct nfs_context *nfs, void *data,
> > +                  void *private_data)
> > +{
> > +    NFSRPC *task = private_data;
> > +    task->complete = 1;
> > +    task->status = status;
> > +    if (task->status > 0 && task->iov) {
> > +        if (task->status <= task->iov->size) {
> It feel very odd to compare something named status with something named size.

Maybe the argument name isn't ideal.  Something like 'ret' or 'result'
would be more commonplace.

Not critical but would be nice to change.

> > +static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
> > +                         Error **errp) {
> > +    NFSClient *client = bs->opaque;
> > +    int64_t ret;
> > +    QemuOpts *opts;
> > +    Error *local_err = NULL;
> > +
> > +    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
> > +    qemu_opts_absorb_qdict(opts, options, &local_err);
> > +    if (error_is_set(&local_err)) {
> > +        qerror_report_err(local_err);
> I have seen more usage of error_propagate(errp, local_err); in QEMU code.
> Maybe I am missing the point.

Yes, I think you are right.  The Error should be propagated to the
caller.  It's not clear to me whether we can ever get an error from
qemu_opts_absorb_qdict() in this call site though.
Stefan Hajnoczi Jan. 30, 2014, 2:23 p.m. UTC | #6
On Thu, Jan 30, 2014 at 10:12:35AM +0100, Peter Lieven wrote:
> 
> Am 30.01.2014 um 10:05 schrieb Stefan Hajnoczi <stefanha@gmail.com>:
> 
> > On Wed, Jan 29, 2014 at 05:38:29PM +0100, Peter Lieven wrote:
> >> On 29.01.2014 17:19, Benoît Canet wrote:
> >>> Le Wednesday 29 Jan 2014 à 09:50:21 (+0100), Peter Lieven a écrit :
> >>>> This patch adds native support for accessing images on NFS
> >>>> shares without the requirement to actually mount the entire
> >>>> NFS share on the host.
> >>>> 
> >>>> NFS Images can simply be specified by an url of the form:
> >>>> nfs://<host>/<export>/<filename>[?param=value[&param2=value2[&...]]]
> >>>> 
> >>>> For example:
> >>>> qemu-img create -f qcow2 nfs://10.0.0.1/qemu-images/test.qcow2
> >>>> 
> >>>> You need LibNFS from Ronnie Sahlberg available at:
> >>>>   git://github.com/sahlberg/libnfs.git
> >>>> for this to work.
> >>>> 
> >>>> During configure it is automatically probed for libnfs and support
> >>>> is enabled on-the-fly. You can forbid or enforce libnfs support
> >>>> with --disable-libnfs or --enable-libnfs respectively.
> >>>> 
> >>>> Due to NFS restrictions you might need to execute your binaries
> >>>> as root, allow them to open priviledged ports (<1024) or specify
> >>>> insecure option on the NFS server.
> >>>> 
> >>>> For additional information on ROOT vs. non-ROOT operation and URL
> >>>> format + parameters see:
> >>>>   https://raw.github.com/sahlberg/libnfs/master/README
> >>>> 
> >>>> Supported by qemu are the uid, gid and tcp-syncnt URL parameters.
> >>>> 
> >>>> LibNFS currently support NFS version 3 only.
> >>>> 
> >>>> Signed-off-by: Peter Lieven <pl@kamp.de>
> >>>> ---
> >>>> MAINTAINERS         |    5 +
> >>>> block/Makefile.objs |    1 +
> >>>> block/nfs.c         |  440 +++++++++++++++++++++++++++++++++++++++++++++++++++
> >>>> configure           |   26 +++
> >>>> qapi-schema.json    |    1 +
> >>>> 5 files changed, 473 insertions(+)
> >>>> create mode 100644 block/nfs.c
> >>>> 
> >>>> diff --git a/MAINTAINERS b/MAINTAINERS
> >>>> index fb53242..f8411f9 100644
> >>>> --- a/MAINTAINERS
> >>>> +++ b/MAINTAINERS
> >>>> @@ -936,6 +936,11 @@ M: Peter Lieven <pl@kamp.de>
> >>>> S: Supported
> >>>> F: block/iscsi.c
> >>>> +NFS
> >>>> +M: Peter Lieven <pl@kamp.de>
> >>>> +S: Maintained
> >>>> +F: block/nfs.c
> >>>> +
> >>>> SSH
> >>>> M: Richard W.M. Jones <rjones@redhat.com>
> >>>> S: Supported
> >>>> diff --git a/block/Makefile.objs b/block/Makefile.objs
> >>>> index 4e8c91e..e254a21 100644
> >>>> --- a/block/Makefile.objs
> >>>> +++ b/block/Makefile.objs
> >>>> @@ -12,6 +12,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
> >>>> ifeq ($(CONFIG_POSIX),y)
> >>>> block-obj-y += nbd.o nbd-client.o sheepdog.o
> >>>> block-obj-$(CONFIG_LIBISCSI) += iscsi.o
> >>>> +block-obj-$(CONFIG_LIBNFS) += nfs.o
> >>>> block-obj-$(CONFIG_CURL) += curl.o
> >>>> block-obj-$(CONFIG_RBD) += rbd.o
> >>>> block-obj-$(CONFIG_GLUSTERFS) += gluster.o
> >>>> diff --git a/block/nfs.c b/block/nfs.c
> >>>> new file mode 100644
> >>>> index 0000000..2bbf7a2
> >>>> --- /dev/null
> >>>> +++ b/block/nfs.c
> >>>> @@ -0,0 +1,440 @@
> >>>> +/*
> >>>> + * QEMU Block driver for native access to files on NFS shares
> >>>> + *
> >>>> + * Copyright (c) 2014 Peter Lieven <pl@kamp.de>
> >>>> + *
> >>>> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> >>>> + * of this software and associated documentation files (the "Software"), to deal
> >>>> + * in the Software without restriction, including without limitation the rights
> >>>> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> >>>> + * copies of the Software, and to permit persons to whom the Software is
> >>>> + * furnished to do so, subject to the following conditions:
> >>>> + *
> >>>> + * The above copyright notice and this permission notice shall be included in
> >>>> + * all copies or substantial portions of the Software.
> >>>> + *
> >>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> >>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> >>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> >>>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> >>>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> >>>> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> >>>> + * THE SOFTWARE.
> >>>> + */
> >>>> +
> >>>> +#include "config-host.h"
> >>>> +
> >>>> +#include <poll.h>
> >>>> +#include "qemu-common.h"
> >>>> +#include "qemu/config-file.h"
> >>>> +#include "qemu/error-report.h"
> >>>> +#include "block/block_int.h"
> >>>> +#include "trace.h"
> >>>> +#include "qemu/iov.h"
> >>>> +#include "qemu/uri.h"
> >>>> +#include "sysemu/sysemu.h"
> >>>> +#include <nfsc/libnfs.h>
> >>>> +
> >>>> +typedef struct NFSClient {
> >>>> +    struct nfs_context *context;
> >>>> +    struct nfsfh *fh;
> >>>> +    int events;
> >>>> +    bool has_zero_init;
> >>>> +} NFSClient;
> >>>> +
> >>>> +typedef struct NFSRPC {
> >>>> +    int status;
> >>>> +    int complete;
> >>>> +    QEMUIOVector *iov;
> >>>> +    struct stat *st;
> >>>> +    Coroutine *co;
> >>>> +    QEMUBH *bh;
> >>>> +} NFSRPC;
> >>>> +
> >>>> +static void nfs_process_read(void *arg);
> >>>> +static void nfs_process_write(void *arg);
> >>>> +
> >>>> +static void nfs_set_events(NFSClient *client)
> >>>> +{
> >>>> +    int ev = nfs_which_events(client->context);
> >>>> +    if (ev != client->events) {
> >>>> +        qemu_aio_set_fd_handler(nfs_get_fd(client->context),
> >>>> +                      (ev & POLLIN) ? nfs_process_read : NULL,
> >>>> +                      (ev & POLLOUT) ? nfs_process_write : NULL,
> >>>> +                      client);
> >>>> +
> >>>> +    }
> >>>> +    client->events = ev;
> >>>> +}
> >>>> +
> >>>> +static void nfs_process_read(void *arg)
> >>>> +{
> >>>> +    NFSClient *client = arg;
> >>>> +    nfs_service(client->context, POLLIN);
> >>>> +    nfs_set_events(client);
> >>>> +}
> >>>> +
> >>>> +static void nfs_process_write(void *arg)
> >>>> +{
> >>>> +    NFSClient *client = arg;
> >>>> +    nfs_service(client->context, POLLOUT);
> >>>> +    nfs_set_events(client);
> >>>> +}
> >>>> +
> >>>> +static void nfs_co_init_task(NFSClient *client, NFSRPC *task)
> >>>> +{
> >>>> +    *task = (NFSRPC) {
> >>>> +        .co         = qemu_coroutine_self(),
> >>>> +    };
> >>>> +}
> >>>> +
> >>>> +static void nfs_co_generic_bh_cb(void *opaque)
> >>>> +{
> >>>> +    NFSRPC *task = opaque;
> >>>> +    qemu_bh_delete(task->bh);
> >>>> +    qemu_coroutine_enter(task->co, NULL);
> >>>> +}
> >>>> +
> >>>> +static void
> >>>> +nfs_co_generic_cb(int status, struct nfs_context *nfs, void *data,
> >>>> +                  void *private_data)
> >>>> +{
> >>>> +    NFSRPC *task = private_data;
> >>>> +    task->complete = 1;
> >>>> +    task->status = status;
> >>>> +    if (task->status > 0 && task->iov) {
> >>>> +        if (task->status <= task->iov->size) {
> >>> It feel very odd to compare something named status with something named size.
> >>> 
> >>>> +            qemu_iovec_from_buf(task->iov, 0, data, task->status);
> >>>> +        } else {
> >>>> +            task->status = -EIO;
> >>>> +        }
> >>>> +    }
> >>>> +    if (task->status == 0 && task->st) {
> >>>> +        memcpy(task->st, data, sizeof(struct stat));
> >>>> +    }
> >>>> +    if (task->co) {
> >>>> +        task->bh = qemu_bh_new(nfs_co_generic_bh_cb, task);
> >>>> +        qemu_bh_schedule(task->bh);
> >>>> +    }
> >>>> +}
> >>>> +
> >>>> +static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
> >>>> +                                     int64_t sector_num, int nb_sectors,
> >>>> +                                     QEMUIOVector *iov)
> >>>> +{
> >>>> +    NFSClient *client = bs->opaque;
> >>>> +    NFSRPC task;
> >>>> +
> >>>> +    nfs_co_init_task(client, &task);
> >>>> +    task.iov = iov;
> >>>> +
> >>>> +    if (nfs_pread_async(client->context, client->fh,
> >>>> +                        sector_num * BDRV_SECTOR_SIZE,
> >>>> +                        nb_sectors * BDRV_SECTOR_SIZE,
> >>>> +                        nfs_co_generic_cb, &task) != 0) {
> >>>> +        return -ENOMEM;
> >>>> +    }
> >>>> +
> >>>> +    while (!task.complete) {
> >>>> +        nfs_set_events(client);
> >>>> +        qemu_coroutine_yield();
> >>>> +    }
> >>>> +
> >>>> +    if (task.status < 0) {
> >>>> +        return task.status;
> >>>> +    }
> >>>> +
> >>>> +    /* zero pad short reads */
> >>>> +    if (task.status < iov->size) {
> >>>> +        qemu_iovec_memset(iov, task.status, 0, iov->size - task.status);
> >>>> +    }
> >>>> +
> >>>> +    return 0;
> >>>> +}
> >>>> +
> >>>> +static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
> >>>> +                                        int64_t sector_num, int nb_sectors,
> >>>> +                                        QEMUIOVector *iov)
> >>>> +{
> >>>> +    NFSClient *client = bs->opaque;
> >>>> +    NFSRPC task;
> >>>> +    char *buf = NULL;
> >>>> +
> >>>> +    nfs_co_init_task(client, &task);
> >>>> +
> >>>> +    buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
> >>>> +    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
> >>>> +
> >>>> +    if (nfs_pwrite_async(client->context, client->fh,
> >>>> +                         sector_num * BDRV_SECTOR_SIZE,
> >>>> +                         nb_sectors * BDRV_SECTOR_SIZE,
> >>>> +                         buf, nfs_co_generic_cb, &task) != 0) {
> >>>> +        g_free(buf);
> >>>> +        return -ENOMEM;
> >>>> +    }
> >>>> +
> >>>> +    while (!task.complete) {
> >>>> +        nfs_set_events(client);
> >>>> +        qemu_coroutine_yield();
> >>>> +    }
> >>>> +
> >>>> +    g_free(buf);
> >>>> +
> >>>> +    if (task.status != nb_sectors * BDRV_SECTOR_SIZE) {
> >>>> +        return task.status < 0 ? task.status : -EIO;
> >>>> +    }
> >>>> +
> >>>> +    return 0;
> >>>> +}
> >>>> +
> >>>> +static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
> >>>> +{
> >>>> +    NFSClient *client = bs->opaque;
> >>>> +    NFSRPC task;
> >>>> +
> >>>> +    nfs_co_init_task(client, &task);
> >>>> +
> >>>> +    if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
> >>>> +                        &task) != 0) {
> >>>> +        return -ENOMEM;
> >>>> +    }
> >>>> +
> >>>> +    while (!task.complete) {
> >>>> +        nfs_set_events(client);
> >>>> +        qemu_coroutine_yield();
> >>>> +    }
> >>>> +
> >>>> +    return task.status;
> >>>> +}
> >>>> +
> >>>> +/* TODO Convert to fine grained options */
> >>>> +static QemuOptsList runtime_opts = {
> >>>> +    .name = "nfs",
> >>>> +    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
> >>>> +    .desc = {
> >>>> +        {
> >>>> +            .name = "filename",
> >>>> +            .type = QEMU_OPT_STRING,
> >>>> +            .help = "URL to the NFS file",
> >>>> +        },
> >>>> +        { /* end of list */ }
> >>>> +    },
> >>>> +};
> >>>> +
> >>>> +static void nfs_client_close(NFSClient *client)
> >>>> +{
> >>>> +    if (client->context) {
> >>>> +        if (client->fh) {
> >>>> +            nfs_close(client->context, client->fh);
> >>>> +        }
> >>>> +        qemu_aio_set_fd_handler(nfs_get_fd(client->context), NULL, NULL, NULL);
> >>>> +        nfs_destroy_context(client->context);
> >>>> +    }
> >>>> +    memset(client, 0, sizeof(NFSClient));
> >>>> +}
> >>>> +
> >>>> +static void nfs_file_close(BlockDriverState *bs)
> >>>> +{
> >>>> +    NFSClient *client = bs->opaque;
> >>>> +    nfs_client_close(client);
> >>>> +}
> >>>> +
> >>>> +static int64_t nfs_.cilclient_open(NFSClient *client, const char *filename,
> >>>> +                               int flags, Error **errp)
> >>>> +{
> >>>> +    int ret = -EINVAL, i;
> >>>> +    struct stat st;
> >>>> +    URI *uri;
> >>>> +    QueryParams *qp = NULL;
> >>>> +    char *file = NULL, *strp = NULL;
> >>>> +
> >>>> +    uri = uri_parse(filename);
> >>>> +    if (!uri) {
> >>>> +        error_setg(errp, "Invalid URL specified");
> >>>> +        goto fail;
> >>> Should this be goto out since we don't have done nfs_init_context yet ?
> >>>> +    }
> >>>> +    strp = strrchr(uri->path, '/');
> >>>> +    if (strp == NULL) {
> >>>> +        error_setg(errp, "Invalid URL specified");
> >>>> +        goto fail;
> >>> goto out ?
> >>>> +    }
> >>>> +    file = g_strdup(strp);
> >>>> +    *strp = 0;
> >>>> +
> >>>> +    client->context = nfs_init_context();
> >>>> +    if (client->context == NULL) {
> >>>> +        error_setg(errp, "Failed to init NFS context");
> >>>> +        goto fail;
> >>>> +    }
> >>>> +
> >>>> +    qp = query_params_parse(uri->query);
> >>>> +    for (i = 0; i < qp->n; i++) {
> >>>> +        if (!qp->p[i].value) {
> >>>> +            error_setg(errp, "Value for NFS parameter expected: %s",
> >>>> +                       qp->p[i].name);
> >>>> +            goto fail;
> >>>> +        }
> >>>> +        if (!strncmp(qp->p[i].name, "uid", 3)) {
> >>>> +            nfs_set_uid(client->context, atoi(qp->p[i].value));
> >>>> +        } else if (!strncmp(qp->p[i].name, "gid", 3)) {
> >>>> +            nfs_set_gid(client->context, atoi(qp->p[i].value));
> >>>> +        } else if (!strncmp(qp->p[i].name, "tcp-syncnt", 10)) {
> >>>> +            nfs_set_tcp_syncnt(client->context, atoi(qp->p[i].value));
> >>>> +        } else {
> >>>> +            error_setg(errp, "Unknown NFS parameter name: %s",
> >>>> +                       qp->p[i].name);
> >>>> +            goto fail;
> >>>> +        }
> >>>> +    }
> >>>> +
> >>>> +    ret = nfs_mount(client->context, uri->server, uri->path);
> >>>> +    if (ret < 0) {
> >>>> +        error_setg(errp, "Failed to mount nfs share: %s",
> >>>> +                   nfs_get_error(client->context));
> >>>> +        goto fail;
> >>>> +    }
> >>>> +
> >>>> +    if (flags & O_CREAT) {
> >>>> +        ret = nfs_creat(client->context, file, 0600, &client->fh);
> >>>> +        if (ret < 0) {
> >>>> +            error_setg(errp, "Failed to create file: %s",
> >>>> +                       nfs_get_error(client->context));
> >>>> +            goto fail;
> >>>> +        }
> >>>> +    } else {
> >>>> +        ret = nfs_open(client->context, file, flags, &client->fh);
> >>>> +        if (ret < 0) {
> >>>> +            error_setg(errp, "Failed to open file : %s",
> >>>> +                       nfs_get_error(client->context));
> >>>> +            goto fail;
> >>>> +        }
> >>>> +    }
> >>>> +
> >>>> +    ret = nfs_fstat(client->context, client->fh, &st);
> >>>> +    if (ret < 0) {
> >>>> +        error_setg(errp, "Failed to fstat file: %s",
> >>>> +                   nfs_get_error(client->context));
> >>>> +        goto fail;
> >>>> +    }
> >>>> +
> >>>> +    ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE);
> >>>> +    client->has_zero_init = S_ISREG(st.st_mode);
> >>>> +    goto out;
> >>>> +fail:
> >>>> +    nfs_client_close(client);
> >>>> +out:
> >>>> +    if (qp) {
> >>>> +        query_params_free(qp);
> >>>> +    }
> >>>> +    uri_free(uri);
> >>>> +    g_free(file);
> >>>> +    return ret;
> >>>> +}
> >>>> +
> >>>> +static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
> >>>> +                         Error **errp) {
> >>>> +    NFSClient *client = bs->opaque;
> >>>> +    int64_t ret;
> >>>> +    QemuOpts *opts;
> >>>> +    Error *local_err = NULL;
> >>>> +
> >>>> +    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
> >>>> +    qemu_opts_absorb_qdict(opts, options, &local_err);
> >>>> +    if (error_is_set(&local_err)) {
> >>>> +        qerror_report_err(local_err);
> >>> I have seen more usage of error_propagate(errp, local_err); in QEMU code.
> >>> Maybe I am missing the point.
> >>> 
> >>>> +        error_free(local_err);
> >>>> +        return -EINVAL;
> >>>> +    }
> >>>> +    ret = nfs_client_open(client, qemu_opt_get(opts, "filename"),
> >>>> +                          (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
> >>>> +                          errp);
> >>>> +    if (ret < 0) {
> >>>> +        return ret;
> >>>> +    }
> >>>> +    bs->total_sectors = ret;
> >>>> +    return 0;
> >>>> +}
> >>>> +
> >>>> +static int nfs_file_create(const char *url, QEMUOptionParameter *options,
> >>>> +                         Error **errp)
> >>>> +{
> >>>> +    int ret = 0;
> >>>> +    int64_t total_size = 0;
> >>>> +    NFSClient *client = g_malloc0(sizeof(NFSClient));
> >>>> +
> >>>> +    /* Read out options */
> >>>> +    while (options && options->name) {
> >>>> +        if (!strcmp(options->name, "size")) {
> >>>> +            total_size = options->value.n;
> >>>> +        }
> >>>> +        options++;
> >>>> +    }
> >>>> +
> >>>> +    ret = nfs_client_open(client, url, O_CREAT, errp);
> >>>> +    if (ret < 0) {
> >>>> +        goto out;
> >>>> +    }
> >>>> +    ret = nfs_ftruncate(client->context, client->fh, total_size);
> >>>> +out:
> >>> Should the out: label be after nfs_client_close(client); since the code will
> >>> jump to it on nfs_client_open failure ?
> >> You are right, but it doesn't hurt since you can call nfs_client_close multiple
> >> times.
> > 
> > Will you send another revision addressing the other comments?
> 
> Sorry, which other comments are you referring to?

I replied to Benoit's email hilighting his questions which were not
addressed.

> Have you been able to test the patch without the bad commit?

Waiting for a final version to test.

Stefan
Peter Lieven Jan. 30, 2014, 9:33 p.m. UTC | #7
Am 30.01.2014 um 15:23 schrieb Stefan Hajnoczi <stefanha@gmail.com>:

> On Thu, Jan 30, 2014 at 10:12:35AM +0100, Peter Lieven wrote:
>> 
>> Am 30.01.2014 um 10:05 schrieb Stefan Hajnoczi <stefanha@gmail.com>:
>> 
>>> On Wed, Jan 29, 2014 at 05:38:29PM +0100, Peter Lieven wrote:
>>>> On 29.01.2014 17:19, Benoît Canet wrote:
>>>>> Le Wednesday 29 Jan 2014 à 09:50:21 (+0100), Peter Lieven a écrit :
>>>>>> This patch adds native support for accessing images on NFS
>>>>>> shares without the requirement to actually mount the entire
>>>>>> NFS share on the host.
>>>>>> 
>>>>>> NFS Images can simply be specified by an url of the form:
>>>>>> nfs://<host>/<export>/<filename>[?param=value[&param2=value2[&...]]]
>>>>>> 
>>>>>> For example:
>>>>>> qemu-img create -f qcow2 nfs://10.0.0.1/qemu-images/test.qcow2
>>>>>> 
>>>>>> You need LibNFS from Ronnie Sahlberg available at:
>>>>>>  git://github.com/sahlberg/libnfs.git
>>>>>> for this to work.
>>>>>> 
>>>>>> During configure it is automatically probed for libnfs and support
>>>>>> is enabled on-the-fly. You can forbid or enforce libnfs support
>>>>>> with --disable-libnfs or --enable-libnfs respectively.
>>>>>> 
>>>>>> Due to NFS restrictions you might need to execute your binaries
>>>>>> as root, allow them to open priviledged ports (<1024) or specify
>>>>>> insecure option on the NFS server.
>>>>>> 
>>>>>> For additional information on ROOT vs. non-ROOT operation and URL
>>>>>> format + parameters see:
>>>>>>  https://raw.github.com/sahlberg/libnfs/master/README
>>>>>> 
>>>>>> Supported by qemu are the uid, gid and tcp-syncnt URL parameters.
>>>>>> 
>>>>>> LibNFS currently support NFS version 3 only.
>>>>>> 
>>>>>> Signed-off-by: Peter Lieven <pl@kamp.de>
>>>>>> ---
>>>>>> MAINTAINERS         |    5 +
>>>>>> block/Makefile.objs |    1 +
>>>>>> block/nfs.c         |  440 +++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>> configure           |   26 +++
>>>>>> qapi-schema.json    |    1 +
>>>>>> 5 files changed, 473 insertions(+)
>>>>>> create mode 100644 block/nfs.c
>>>>>> 
>>>>>> diff --git a/MAINTAINERS b/MAINTAINERS
>>>>>> index fb53242..f8411f9 100644
>>>>>> --- a/MAINTAINERS
>>>>>> +++ b/MAINTAINERS
>>>>>> @@ -936,6 +936,11 @@ M: Peter Lieven <pl@kamp.de>
>>>>>> S: Supported
>>>>>> F: block/iscsi.c
>>>>>> +NFS
>>>>>> +M: Peter Lieven <pl@kamp.de>
>>>>>> +S: Maintained
>>>>>> +F: block/nfs.c
>>>>>> +
>>>>>> SSH
>>>>>> M: Richard W.M. Jones <rjones@redhat.com>
>>>>>> S: Supported
>>>>>> diff --git a/block/Makefile.objs b/block/Makefile.objs
>>>>>> index 4e8c91e..e254a21 100644
>>>>>> --- a/block/Makefile.objs
>>>>>> +++ b/block/Makefile.objs
>>>>>> @@ -12,6 +12,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
>>>>>> ifeq ($(CONFIG_POSIX),y)
>>>>>> block-obj-y += nbd.o nbd-client.o sheepdog.o
>>>>>> block-obj-$(CONFIG_LIBISCSI) += iscsi.o
>>>>>> +block-obj-$(CONFIG_LIBNFS) += nfs.o
>>>>>> block-obj-$(CONFIG_CURL) += curl.o
>>>>>> block-obj-$(CONFIG_RBD) += rbd.o
>>>>>> block-obj-$(CONFIG_GLUSTERFS) += gluster.o
>>>>>> diff --git a/block/nfs.c b/block/nfs.c
>>>>>> new file mode 100644
>>>>>> index 0000000..2bbf7a2
>>>>>> --- /dev/null
>>>>>> +++ b/block/nfs.c
>>>>>> @@ -0,0 +1,440 @@
>>>>>> +/*
>>>>>> + * QEMU Block driver for native access to files on NFS shares
>>>>>> + *
>>>>>> + * Copyright (c) 2014 Peter Lieven <pl@kamp.de>
>>>>>> + *
>>>>>> + * Permission is hereby granted, free of charge, to any person obtaining a copy
>>>>>> + * of this software and associated documentation files (the "Software"), to deal
>>>>>> + * in the Software without restriction, including without limitation the rights
>>>>>> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
>>>>>> + * copies of the Software, and to permit persons to whom the Software is
>>>>>> + * furnished to do so, subject to the following conditions:
>>>>>> + *
>>>>>> + * The above copyright notice and this permission notice shall be included in
>>>>>> + * all copies or substantial portions of the Software.
>>>>>> + *
>>>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>>>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>>>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
>>>>>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>>>>>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
>>>>>> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
>>>>>> + * THE SOFTWARE.
>>>>>> + */
>>>>>> +
>>>>>> +#include "config-host.h"
>>>>>> +
>>>>>> +#include <poll.h>
>>>>>> +#include "qemu-common.h"
>>>>>> +#include "qemu/config-file.h"
>>>>>> +#include "qemu/error-report.h"
>>>>>> +#include "block/block_int.h"
>>>>>> +#include "trace.h"
>>>>>> +#include "qemu/iov.h"
>>>>>> +#include "qemu/uri.h"
>>>>>> +#include "sysemu/sysemu.h"
>>>>>> +#include <nfsc/libnfs.h>
>>>>>> +
>>>>>> +typedef struct NFSClient {
>>>>>> +    struct nfs_context *context;
>>>>>> +    struct nfsfh *fh;
>>>>>> +    int events;
>>>>>> +    bool has_zero_init;
>>>>>> +} NFSClient;
>>>>>> +
>>>>>> +typedef struct NFSRPC {
>>>>>> +    int status;
>>>>>> +    int complete;
>>>>>> +    QEMUIOVector *iov;
>>>>>> +    struct stat *st;
>>>>>> +    Coroutine *co;
>>>>>> +    QEMUBH *bh;
>>>>>> +} NFSRPC;
>>>>>> +
>>>>>> +static void nfs_process_read(void *arg);
>>>>>> +static void nfs_process_write(void *arg);
>>>>>> +
>>>>>> +static void nfs_set_events(NFSClient *client)
>>>>>> +{
>>>>>> +    int ev = nfs_which_events(client->context);
>>>>>> +    if (ev != client->events) {
>>>>>> +        qemu_aio_set_fd_handler(nfs_get_fd(client->context),
>>>>>> +                      (ev & POLLIN) ? nfs_process_read : NULL,
>>>>>> +                      (ev & POLLOUT) ? nfs_process_write : NULL,
>>>>>> +                      client);
>>>>>> +
>>>>>> +    }
>>>>>> +    client->events = ev;
>>>>>> +}
>>>>>> +
>>>>>> +static void nfs_process_read(void *arg)
>>>>>> +{
>>>>>> +    NFSClient *client = arg;
>>>>>> +    nfs_service(client->context, POLLIN);
>>>>>> +    nfs_set_events(client);
>>>>>> +}
>>>>>> +
>>>>>> +static void nfs_process_write(void *arg)
>>>>>> +{
>>>>>> +    NFSClient *client = arg;
>>>>>> +    nfs_service(client->context, POLLOUT);
>>>>>> +    nfs_set_events(client);
>>>>>> +}
>>>>>> +
>>>>>> +static void nfs_co_init_task(NFSClient *client, NFSRPC *task)
>>>>>> +{
>>>>>> +    *task = (NFSRPC) {
>>>>>> +        .co         = qemu_coroutine_self(),
>>>>>> +    };
>>>>>> +}
>>>>>> +
>>>>>> +static void nfs_co_generic_bh_cb(void *opaque)
>>>>>> +{
>>>>>> +    NFSRPC *task = opaque;
>>>>>> +    qemu_bh_delete(task->bh);
>>>>>> +    qemu_coroutine_enter(task->co, NULL);
>>>>>> +}
>>>>>> +
>>>>>> +static void
>>>>>> +nfs_co_generic_cb(int status, struct nfs_context *nfs, void *data,
>>>>>> +                  void *private_data)
>>>>>> +{
>>>>>> +    NFSRPC *task = private_data;
>>>>>> +    task->complete = 1;
>>>>>> +    task->status = status;
>>>>>> +    if (task->status > 0 && task->iov) {
>>>>>> +        if (task->status <= task->iov->size) {
>>>>> It feel very odd to compare something named status with something named size.
>>>>> 
>>>>>> +            qemu_iovec_from_buf(task->iov, 0, data, task->status);
>>>>>> +        } else {
>>>>>> +            task->status = -EIO;
>>>>>> +        }
>>>>>> +    }
>>>>>> +    if (task->status == 0 && task->st) {
>>>>>> +        memcpy(task->st, data, sizeof(struct stat));
>>>>>> +    }
>>>>>> +    if (task->co) {
>>>>>> +        task->bh = qemu_bh_new(nfs_co_generic_bh_cb, task);
>>>>>> +        qemu_bh_schedule(task->bh);
>>>>>> +    }
>>>>>> +}
>>>>>> +
>>>>>> +static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
>>>>>> +                                     int64_t sector_num, int nb_sectors,
>>>>>> +                                     QEMUIOVector *iov)
>>>>>> +{
>>>>>> +    NFSClient *client = bs->opaque;
>>>>>> +    NFSRPC task;
>>>>>> +
>>>>>> +    nfs_co_init_task(client, &task);
>>>>>> +    task.iov = iov;
>>>>>> +
>>>>>> +    if (nfs_pread_async(client->context, client->fh,
>>>>>> +                        sector_num * BDRV_SECTOR_SIZE,
>>>>>> +                        nb_sectors * BDRV_SECTOR_SIZE,
>>>>>> +                        nfs_co_generic_cb, &task) != 0) {
>>>>>> +        return -ENOMEM;
>>>>>> +    }
>>>>>> +
>>>>>> +    while (!task.complete) {
>>>>>> +        nfs_set_events(client);
>>>>>> +        qemu_coroutine_yield();
>>>>>> +    }
>>>>>> +
>>>>>> +    if (task.status < 0) {
>>>>>> +        return task.status;
>>>>>> +    }
>>>>>> +
>>>>>> +    /* zero pad short reads */
>>>>>> +    if (task.status < iov->size) {
>>>>>> +        qemu_iovec_memset(iov, task.status, 0, iov->size - task.status);
>>>>>> +    }
>>>>>> +
>>>>>> +    return 0;
>>>>>> +}
>>>>>> +
>>>>>> +static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
>>>>>> +                                        int64_t sector_num, int nb_sectors,
>>>>>> +                                        QEMUIOVector *iov)
>>>>>> +{
>>>>>> +    NFSClient *client = bs->opaque;
>>>>>> +    NFSRPC task;
>>>>>> +    char *buf = NULL;
>>>>>> +
>>>>>> +    nfs_co_init_task(client, &task);
>>>>>> +
>>>>>> +    buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
>>>>>> +    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
>>>>>> +
>>>>>> +    if (nfs_pwrite_async(client->context, client->fh,
>>>>>> +                         sector_num * BDRV_SECTOR_SIZE,
>>>>>> +                         nb_sectors * BDRV_SECTOR_SIZE,
>>>>>> +                         buf, nfs_co_generic_cb, &task) != 0) {
>>>>>> +        g_free(buf);
>>>>>> +        return -ENOMEM;
>>>>>> +    }
>>>>>> +
>>>>>> +    while (!task.complete) {
>>>>>> +        nfs_set_events(client);
>>>>>> +        qemu_coroutine_yield();
>>>>>> +    }
>>>>>> +
>>>>>> +    g_free(buf);
>>>>>> +
>>>>>> +    if (task.status != nb_sectors * BDRV_SECTOR_SIZE) {
>>>>>> +        return task.status < 0 ? task.status : -EIO;
>>>>>> +    }
>>>>>> +
>>>>>> +    return 0;
>>>>>> +}
>>>>>> +
>>>>>> +static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
>>>>>> +{
>>>>>> +    NFSClient *client = bs->opaque;
>>>>>> +    NFSRPC task;
>>>>>> +
>>>>>> +    nfs_co_init_task(client, &task);
>>>>>> +
>>>>>> +    if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
>>>>>> +                        &task) != 0) {
>>>>>> +        return -ENOMEM;
>>>>>> +    }
>>>>>> +
>>>>>> +    while (!task.complete) {
>>>>>> +        nfs_set_events(client);
>>>>>> +        qemu_coroutine_yield();
>>>>>> +    }
>>>>>> +
>>>>>> +    return task.status;
>>>>>> +}
>>>>>> +
>>>>>> +/* TODO Convert to fine grained options */
>>>>>> +static QemuOptsList runtime_opts = {
>>>>>> +    .name = "nfs",
>>>>>> +    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
>>>>>> +    .desc = {
>>>>>> +        {
>>>>>> +            .name = "filename",
>>>>>> +            .type = QEMU_OPT_STRING,
>>>>>> +            .help = "URL to the NFS file",
>>>>>> +        },
>>>>>> +        { /* end of list */ }
>>>>>> +    },
>>>>>> +};
>>>>>> +
>>>>>> +static void nfs_client_close(NFSClient *client)
>>>>>> +{
>>>>>> +    if (client->context) {
>>>>>> +        if (client->fh) {
>>>>>> +            nfs_close(client->context, client->fh);
>>>>>> +        }
>>>>>> +        qemu_aio_set_fd_handler(nfs_get_fd(client->context), NULL, NULL, NULL);
>>>>>> +        nfs_destroy_context(client->context);
>>>>>> +    }
>>>>>> +    memset(client, 0, sizeof(NFSClient));
>>>>>> +}
>>>>>> +
>>>>>> +static void nfs_file_close(BlockDriverState *bs)
>>>>>> +{
>>>>>> +    NFSClient *client = bs->opaque;
>>>>>> +    nfs_client_close(client);
>>>>>> +}
>>>>>> +
>>>>>> +static int64_t nfs_.cilclient_open(NFSClient *client, const char *filename,
>>>>>> +                               int flags, Error **errp)
>>>>>> +{
>>>>>> +    int ret = -EINVAL, i;
>>>>>> +    struct stat st;
>>>>>> +    URI *uri;
>>>>>> +    QueryParams *qp = NULL;
>>>>>> +    char *file = NULL, *strp = NULL;
>>>>>> +
>>>>>> +    uri = uri_parse(filename);
>>>>>> +    if (!uri) {
>>>>>> +        error_setg(errp, "Invalid URL specified");
>>>>>> +        goto fail;
>>>>> Should this be goto out since we don't have done nfs_init_context yet ?
>>>>>> +    }
>>>>>> +    strp = strrchr(uri->path, '/');
>>>>>> +    if (strp == NULL) {
>>>>>> +        error_setg(errp, "Invalid URL specified");
>>>>>> +        goto fail;
>>>>> goto out ?
>>>>>> +    }
>>>>>> +    file = g_strdup(strp);
>>>>>> +    *strp = 0;
>>>>>> +
>>>>>> +    client->context = nfs_init_context();
>>>>>> +    if (client->context == NULL) {
>>>>>> +        error_setg(errp, "Failed to init NFS context");
>>>>>> +        goto fail;
>>>>>> +    }
>>>>>> +
>>>>>> +    qp = query_params_parse(uri->query);
>>>>>> +    for (i = 0; i < qp->n; i++) {
>>>>>> +        if (!qp->p[i].value) {
>>>>>> +            error_setg(errp, "Value for NFS parameter expected: %s",
>>>>>> +                       qp->p[i].name);
>>>>>> +            goto fail;
>>>>>> +        }
>>>>>> +        if (!strncmp(qp->p[i].name, "uid", 3)) {
>>>>>> +            nfs_set_uid(client->context, atoi(qp->p[i].value));
>>>>>> +        } else if (!strncmp(qp->p[i].name, "gid", 3)) {
>>>>>> +            nfs_set_gid(client->context, atoi(qp->p[i].value));
>>>>>> +        } else if (!strncmp(qp->p[i].name, "tcp-syncnt", 10)) {
>>>>>> +            nfs_set_tcp_syncnt(client->context, atoi(qp->p[i].value));
>>>>>> +        } else {
>>>>>> +            error_setg(errp, "Unknown NFS parameter name: %s",
>>>>>> +                       qp->p[i].name);
>>>>>> +            goto fail;
>>>>>> +        }
>>>>>> +    }
>>>>>> +
>>>>>> +    ret = nfs_mount(client->context, uri->server, uri->path);
>>>>>> +    if (ret < 0) {
>>>>>> +        error_setg(errp, "Failed to mount nfs share: %s",
>>>>>> +                   nfs_get_error(client->context));
>>>>>> +        goto fail;
>>>>>> +    }
>>>>>> +
>>>>>> +    if (flags & O_CREAT) {
>>>>>> +        ret = nfs_creat(client->context, file, 0600, &client->fh);
>>>>>> +        if (ret < 0) {
>>>>>> +            error_setg(errp, "Failed to create file: %s",
>>>>>> +                       nfs_get_error(client->context));
>>>>>> +            goto fail;
>>>>>> +        }
>>>>>> +    } else {
>>>>>> +        ret = nfs_open(client->context, file, flags, &client->fh);
>>>>>> +        if (ret < 0) {
>>>>>> +            error_setg(errp, "Failed to open file : %s",
>>>>>> +                       nfs_get_error(client->context));
>>>>>> +            goto fail;
>>>>>> +        }
>>>>>> +    }
>>>>>> +
>>>>>> +    ret = nfs_fstat(client->context, client->fh, &st);
>>>>>> +    if (ret < 0) {
>>>>>> +        error_setg(errp, "Failed to fstat file: %s",
>>>>>> +                   nfs_get_error(client->context));
>>>>>> +        goto fail;
>>>>>> +    }
>>>>>> +
>>>>>> +    ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE);
>>>>>> +    client->has_zero_init = S_ISREG(st.st_mode);
>>>>>> +    goto out;
>>>>>> +fail:
>>>>>> +    nfs_client_close(client);
>>>>>> +out:
>>>>>> +    if (qp) {
>>>>>> +        query_params_free(qp);
>>>>>> +    }
>>>>>> +    uri_free(uri);
>>>>>> +    g_free(file);
>>>>>> +    return ret;
>>>>>> +}
>>>>>> +
>>>>>> +static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
>>>>>> +                         Error **errp) {
>>>>>> +    NFSClient *client = bs->opaque;
>>>>>> +    int64_t ret;
>>>>>> +    QemuOpts *opts;
>>>>>> +    Error *local_err = NULL;
>>>>>> +
>>>>>> +    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
>>>>>> +    qemu_opts_absorb_qdict(opts, options, &local_err);
>>>>>> +    if (error_is_set(&local_err)) {
>>>>>> +        qerror_report_err(local_err);
>>>>> I have seen more usage of error_propagate(errp, local_err); in QEMU code.
>>>>> Maybe I am missing the point.
>>>>> 
>>>>>> +        error_free(local_err);
>>>>>> +        return -EINVAL;
>>>>>> +    }
>>>>>> +    ret = nfs_client_open(client, qemu_opt_get(opts, "filename"),
>>>>>> +                          (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
>>>>>> +                          errp);
>>>>>> +    if (ret < 0) {
>>>>>> +        return ret;
>>>>>> +    }
>>>>>> +    bs->total_sectors = ret;
>>>>>> +    return 0;
>>>>>> +}
>>>>>> +
>>>>>> +static int nfs_file_create(const char *url, QEMUOptionParameter *options,
>>>>>> +                         Error **errp)
>>>>>> +{
>>>>>> +    int ret = 0;
>>>>>> +    int64_t total_size = 0;
>>>>>> +    NFSClient *client = g_malloc0(sizeof(NFSClient));
>>>>>> +
>>>>>> +    /* Read out options */
>>>>>> +    while (options && options->name) {
>>>>>> +        if (!strcmp(options->name, "size")) {
>>>>>> +            total_size = options->value.n;
>>>>>> +        }
>>>>>> +        options++;
>>>>>> +    }
>>>>>> +
>>>>>> +    ret = nfs_client_open(client, url, O_CREAT, errp);
>>>>>> +    if (ret < 0) {
>>>>>> +        goto out;
>>>>>> +    }
>>>>>> +    ret = nfs_ftruncate(client->context, client->fh, total_size);
>>>>>> +out:
>>>>> Should the out: label be after nfs_client_close(client); since the code will
>>>>> jump to it on nfs_client_open failure ?
>>>> You are right, but it doesn't hurt since you can call nfs_client_close multiple
>>>> times.
>>> 
>>> Will you send another revision addressing the other comments?
>> 
>> Sorry, which other comments are you referring to?
> 
> I replied to Benoit's email hilighting his questions which were not
> addressed.

Sorry, I missed the other comments.

> 
>> Have you been able to test the patch without the bad commit?
> 
> Waiting for a final version to test.

I cannot do that, unfortunately.

Ronnie, can you revert the bad commit and leave it for the 2.0 development.
It would be good if you bump the version to 1.9.2 so I can check
for that in the configure script

Peter
Peter Lieven Jan. 30, 2014, 9:35 p.m. UTC | #8
Am 30.01.2014 um 15:22 schrieb Stefan Hajnoczi <stefanha@gmail.com>:

> On Wed, Jan 29, 2014 at 05:19:59PM +0100, Benoît Canet wrote:
>> Le Wednesday 29 Jan 2014 à 09:50:21 (+0100), Peter Lieven a écrit :
> 
> Hi Peter,
> If I read your reply to Benoit correctly, you only addressed the
> questions about nfs_client_close().  Here are Benoits other comments and
> my thoughts on them:
> 
>>> +static void
>>> +nfs_co_generic_cb(int status, struct nfs_context *nfs, void *data,
>>> +                  void *private_data)
>>> +{
>>> +    NFSRPC *task = private_data;
>>> +    task->complete = 1;
>>> +    task->status = status;
>>> +    if (task->status > 0 && task->iov) {
>>> +        if (task->status <= task->iov->size) {
>> It feel very odd to compare something named status with something named size.
> 
> Maybe the argument name isn't ideal.  Something like 'ret' or 'result'
> would be more commonplace.

I took the "status" from libnfs examples. I can change it in the qemu code if
you like.

> 
> Not critical but would be nice to change.
> 
>>> +static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
>>> +                         Error **errp) {
>>> +    NFSClient *client = bs->opaque;
>>> +    int64_t ret;
>>> +    QemuOpts *opts;
>>> +    Error *local_err = NULL;
>>> +
>>> +    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
>>> +    qemu_opts_absorb_qdict(opts, options, &local_err);
>>> +    if (error_is_set(&local_err)) {
>>> +        qerror_report_err(local_err);
>> I have seen more usage of error_propagate(errp, local_err); in QEMU code.
>> Maybe I am missing the point.
> 
> Yes, I think you are right.  The Error should be propagated to the
> caller.  It's not clear to me whether we can ever get an error from
> qemu_opts_absorb_qdict() in this call site though.

Is there any action I should take here? If yes, can you advise what
to do please.

Peter
Stefan Hajnoczi Jan. 31, 2014, 8:57 a.m. UTC | #9
On Thu, Jan 30, 2014 at 10:35 PM, Peter Lieven <pl@kamp.de> wrote:
>
> Am 30.01.2014 um 15:22 schrieb Stefan Hajnoczi <stefanha@gmail.com>:
>
>> On Wed, Jan 29, 2014 at 05:19:59PM +0100, Benoît Canet wrote:
>>> Le Wednesday 29 Jan 2014 à 09:50:21 (+0100), Peter Lieven a écrit :
>>>> +static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
>>>> +                         Error **errp) {
>>>> +    NFSClient *client = bs->opaque;
>>>> +    int64_t ret;
>>>> +    QemuOpts *opts;
>>>> +    Error *local_err = NULL;
>>>> +
>>>> +    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
>>>> +    qemu_opts_absorb_qdict(opts, options, &local_err);
>>>> +    if (error_is_set(&local_err)) {
>>>> +        qerror_report_err(local_err);
>>> I have seen more usage of error_propagate(errp, local_err); in QEMU code.
>>> Maybe I am missing the point.
>>
>> Yes, I think you are right.  The Error should be propagated to the
>> caller.  It's not clear to me whether we can ever get an error from
>> qemu_opts_absorb_qdict() in this call site though.
>
> Is there any action I should take here? If yes, can you advise what
> to do please.

The issue is that nfs_file_open() takes an Error **errp argument.
This means the function should report detailed errors using the Error
object.

The patch prints and then discards the local_error instead of
propagating it to the caller's errp.

We should just propagate the error instead of printing it:
if (error_is_set(&local_err)) {
    error_propagate(errp, local_err);
    goto ...;
}

Stefan
Peter Lieven Jan. 31, 2014, 9:11 a.m. UTC | #10
On 31.01.2014 09:57, Stefan Hajnoczi wrote:
> On Thu, Jan 30, 2014 at 10:35 PM, Peter Lieven <pl@kamp.de> wrote:
>> Am 30.01.2014 um 15:22 schrieb Stefan Hajnoczi <stefanha@gmail.com>:
>>
>>> On Wed, Jan 29, 2014 at 05:19:59PM +0100, Benoît Canet wrote:
>>>> Le Wednesday 29 Jan 2014 à 09:50:21 (+0100), Peter Lieven a écrit :
>>>>> +static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
>>>>> +                         Error **errp) {
>>>>> +    NFSClient *client = bs->opaque;
>>>>> +    int64_t ret;
>>>>> +    QemuOpts *opts;
>>>>> +    Error *local_err = NULL;
>>>>> +
>>>>> +    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
>>>>> +    qemu_opts_absorb_qdict(opts, options, &local_err);
>>>>> +    if (error_is_set(&local_err)) {
>>>>> +        qerror_report_err(local_err);
>>>> I have seen more usage of error_propagate(errp, local_err); in QEMU code.
>>>> Maybe I am missing the point.
>>> Yes, I think you are right.  The Error should be propagated to the
>>> caller.  It's not clear to me whether we can ever get an error from
>>> qemu_opts_absorb_qdict() in this call site though.
>> Is there any action I should take here? If yes, can you advise what
>> to do please.
> The issue is that nfs_file_open() takes an Error **errp argument.
> This means the function should report detailed errors using the Error
> object.
>
> The patch prints and then discards the local_error instead of
> propagating it to the caller's errp.
>
> We should just propagate the error instead of printing it:
> if (error_is_set(&local_err)) {
>      error_propagate(errp, local_err);
>      goto ...;

Ok, you are just referring to this part in nfs_file_open:

     if (error_is_set(&local_err)) {
         qerror_report_err(local_err);
         error_free(local_err);
         return -EINVAL;
     }

which I would change to:

     if (error_is_set(&local_err)) {
         error_propagate(errp, local_err);
         return -EINVAL;
     }

The use of error_setg in nfs_client_open is ok?

Peter
Stefan Hajnoczi Jan. 31, 2014, 10:46 a.m. UTC | #11
On Fri, Jan 31, 2014 at 10:11 AM, Peter Lieven <pl@kamp.de> wrote:
> On 31.01.2014 09:57, Stefan Hajnoczi wrote:
>>
>> On Thu, Jan 30, 2014 at 10:35 PM, Peter Lieven <pl@kamp.de> wrote:
>>>
>>> Am 30.01.2014 um 15:22 schrieb Stefan Hajnoczi <stefanha@gmail.com>:
>>>
>>>> On Wed, Jan 29, 2014 at 05:19:59PM +0100, Benoīt Canet wrote:
>>>>>
>>>>> Le Wednesday 29 Jan 2014 ą 09:50:21 (+0100), Peter Lieven a écrit :
>>>>>>
>>>>>> +static int nfs_file_open(BlockDriverState *bs, QDict *options, int
>>>>>> flags,
>>>>>> +                         Error **errp) {
>>>>>> +    NFSClient *client = bs->opaque;
>>>>>> +    int64_t ret;
>>>>>> +    QemuOpts *opts;
>>>>>> +    Error *local_err = NULL;
>>>>>> +
>>>>>> +    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
>>>>>> +    qemu_opts_absorb_qdict(opts, options, &local_err);
>>>>>> +    if (error_is_set(&local_err)) {
>>>>>> +        qerror_report_err(local_err);
>>>>>
>>>>> I have seen more usage of error_propagate(errp, local_err); in QEMU
>>>>> code.
>>>>> Maybe I am missing the point.
>>>>
>>>> Yes, I think you are right.  The Error should be propagated to the
>>>> caller.  It's not clear to me whether we can ever get an error from
>>>> qemu_opts_absorb_qdict() in this call site though.
>>>
>>> Is there any action I should take here? If yes, can you advise what
>>> to do please.
>>
>> The issue is that nfs_file_open() takes an Error **errp argument.
>> This means the function should report detailed errors using the Error
>> object.
>>
>> The patch prints and then discards the local_error instead of
>> propagating it to the caller's errp.
>>
>> We should just propagate the error instead of printing it:
>> if (error_is_set(&local_err)) {
>>      error_propagate(errp, local_err);
>>      goto ...;
>
>
> Ok, you are just referring to this part in nfs_file_open:
>
>     if (error_is_set(&local_err)) {
>         qerror_report_err(local_err);
>         error_free(local_err);
>         return -EINVAL;
>     }
>
> which I would change to:
>
>     if (error_is_set(&local_err)) {
>         error_propagate(errp, local_err);
>         return -EINVAL;
>     }

Yes.

> The use of error_setg in nfs_client_open is ok?

Yes, it's fine.

The Error API is not 100% obvious when you first see it, but once you
learn the conventions it's pretty usable:

Functions take an Error **errp argument.  This argument is optional,
so a caller that doesn't care about detailed error messages may pass
NULL.  This has implications...

error_setg(errp, fmt, ...) handles errp == NULL internally so you can
call it unconditionally.

The tricky thing is that error_is_set() only works if errp is non-NULL
(otherwise error_setg() skips creating an Error object).  So it means
you cannot rely on your errp argument and often functions will declare
Error *local_err = NULL, so they can pass &local_err to child
functions.  Finally, error_propagate(errp, local_err) is used to pass
out the Error object to our caller.

Hope this summary makes the Error API clear.

Stefan
Peter Lieven Jan. 31, 2014, 11:16 a.m. UTC | #12
On 31.01.2014 11:46, Stefan Hajnoczi wrote:
> On Fri, Jan 31, 2014 at 10:11 AM, Peter Lieven <pl@kamp.de> wrote:
>> On 31.01.2014 09:57, Stefan Hajnoczi wrote:
>>> On Thu, Jan 30, 2014 at 10:35 PM, Peter Lieven <pl@kamp.de> wrote:
>>>> Am 30.01.2014 um 15:22 schrieb Stefan Hajnoczi <stefanha@gmail.com>:
>>>>
>>>>> On Wed, Jan 29, 2014 at 05:19:59PM +0100, Benoīt Canet wrote:
>>>>>> Le Wednesday 29 Jan 2014 ą 09:50:21 (+0100), Peter Lieven a écrit :
>>>>>>> +static int nfs_file_open(BlockDriverState *bs, QDict *options, int
>>>>>>> flags,
>>>>>>> +                         Error **errp) {
>>>>>>> +    NFSClient *client = bs->opaque;
>>>>>>> +    int64_t ret;
>>>>>>> +    QemuOpts *opts;
>>>>>>> +    Error *local_err = NULL;
>>>>>>> +
>>>>>>> +    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
>>>>>>> +    qemu_opts_absorb_qdict(opts, options, &local_err);
>>>>>>> +    if (error_is_set(&local_err)) {
>>>>>>> +        qerror_report_err(local_err);
>>>>>> I have seen more usage of error_propagate(errp, local_err); in QEMU
>>>>>> code.
>>>>>> Maybe I am missing the point.
>>>>> Yes, I think you are right.  The Error should be propagated to the
>>>>> caller.  It's not clear to me whether we can ever get an error from
>>>>> qemu_opts_absorb_qdict() in this call site though.
>>>> Is there any action I should take here? If yes, can you advise what
>>>> to do please.
>>> The issue is that nfs_file_open() takes an Error **errp argument.
>>> This means the function should report detailed errors using the Error
>>> object.
>>>
>>> The patch prints and then discards the local_error instead of
>>> propagating it to the caller's errp.
>>>
>>> We should just propagate the error instead of printing it:
>>> if (error_is_set(&local_err)) {
>>>       error_propagate(errp, local_err);
>>>       goto ...;
>>
>> Ok, you are just referring to this part in nfs_file_open:
>>
>>      if (error_is_set(&local_err)) {
>>          qerror_report_err(local_err);
>>          error_free(local_err);
>>          return -EINVAL;
>>      }
>>
>> which I would change to:
>>
>>      if (error_is_set(&local_err)) {
>>          error_propagate(errp, local_err);
>>          return -EINVAL;
>>      }
> Yes.
>
>> The use of error_setg in nfs_client_open is ok?
> Yes, it's fine.
>
> The Error API is not 100% obvious when you first see it, but once you
> learn the conventions it's pretty usable:
>
> Functions take an Error **errp argument.  This argument is optional,
> so a caller that doesn't care about detailed error messages may pass
> NULL.  This has implications...
>
> error_setg(errp, fmt, ...) handles errp == NULL internally so you can
> call it unconditionally.
>
> The tricky thing is that error_is_set() only works if errp is non-NULL
> (otherwise error_setg() skips creating an Error object).  So it means
> you cannot rely on your errp argument and often functions will declare
> Error *local_err = NULL, so they can pass &local_err to child
> functions.  Finally, error_propagate(errp, local_err) is used to pass
> out the Error object to our caller.
>
> Hope this summary makes the Error API clear.
>
> Stefan
Thanks for the explanation. I will sent you an updated series shortly.

Peter
Peter Lieven Jan. 31, 2014, 11:36 a.m. UTC | #13
On 29.01.2014 17:19, Benoît Canet wrote:
> Le Wednesday 29 Jan 2014 à 09:50:21 (+0100), Peter Lieven a écrit :
>> This patch adds native support for accessing images on NFS
>> shares without the requirement to actually mount the entire
>> NFS share on the host.
>>
>> NFS Images can simply be specified by an url of the form:
>> nfs://<host>/<export>/<filename>[?param=value[&param2=value2[&...]]]
>>
>> For example:
>> qemu-img create -f qcow2 nfs://10.0.0.1/qemu-images/test.qcow2
>>
>> You need LibNFS from Ronnie Sahlberg available at:
>>     git://github.com/sahlberg/libnfs.git
>> for this to work.
>>
>> During configure it is automatically probed for libnfs and support
>> is enabled on-the-fly. You can forbid or enforce libnfs support
>> with --disable-libnfs or --enable-libnfs respectively.
>>
>> Due to NFS restrictions you might need to execute your binaries
>> as root, allow them to open priviledged ports (<1024) or specify
>> insecure option on the NFS server.
>>
>> For additional information on ROOT vs. non-ROOT operation and URL
>> format + parameters see:
>>     https://raw.github.com/sahlberg/libnfs/master/README
>>
>> Supported by qemu are the uid, gid and tcp-syncnt URL parameters.
>>
>> LibNFS currently support NFS version 3 only.
>>
>> Signed-off-by: Peter Lieven <pl@kamp.de>
>> ---
>>   MAINTAINERS         |    5 +
>>   block/Makefile.objs |    1 +
>>   block/nfs.c         |  440 +++++++++++++++++++++++++++++++++++++++++++++++++++
>>   configure           |   26 +++
>>   qapi-schema.json    |    1 +
>>   5 files changed, 473 insertions(+)
>>   create mode 100644 block/nfs.c
>>
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index fb53242..f8411f9 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -936,6 +936,11 @@ M: Peter Lieven <pl@kamp.de>
>>   S: Supported
>>   F: block/iscsi.c
>>   
>> +NFS
>> +M: Peter Lieven <pl@kamp.de>
>> +S: Maintained
>> +F: block/nfs.c
>> +
>>   SSH
>>   M: Richard W.M. Jones <rjones@redhat.com>
>>   S: Supported
>> diff --git a/block/Makefile.objs b/block/Makefile.objs
>> index 4e8c91e..e254a21 100644
>> --- a/block/Makefile.objs
>> +++ b/block/Makefile.objs
>> @@ -12,6 +12,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
>>   ifeq ($(CONFIG_POSIX),y)
>>   block-obj-y += nbd.o nbd-client.o sheepdog.o
>>   block-obj-$(CONFIG_LIBISCSI) += iscsi.o
>> +block-obj-$(CONFIG_LIBNFS) += nfs.o
>>   block-obj-$(CONFIG_CURL) += curl.o
>>   block-obj-$(CONFIG_RBD) += rbd.o
>>   block-obj-$(CONFIG_GLUSTERFS) += gluster.o
>> diff --git a/block/nfs.c b/block/nfs.c
>> new file mode 100644
>> index 0000000..2bbf7a2
>> --- /dev/null
>> +++ b/block/nfs.c
>> @@ -0,0 +1,440 @@
>> +/*
>> + * QEMU Block driver for native access to files on NFS shares
>> + *
>> + * Copyright (c) 2014 Peter Lieven <pl@kamp.de>
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a copy
>> + * of this software and associated documentation files (the "Software"), to deal
>> + * in the Software without restriction, including without limitation the rights
>> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
>> + * copies of the Software, and to permit persons to whom the Software is
>> + * furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
>> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
>> + * THE SOFTWARE.
>> + */
>> +
>> +#include "config-host.h"
>> +
>> +#include <poll.h>
>> +#include "qemu-common.h"
>> +#include "qemu/config-file.h"
>> +#include "qemu/error-report.h"
>> +#include "block/block_int.h"
>> +#include "trace.h"
>> +#include "qemu/iov.h"
>> +#include "qemu/uri.h"
>> +#include "sysemu/sysemu.h"
>> +#include <nfsc/libnfs.h>
>> +
>> +typedef struct NFSClient {
>> +    struct nfs_context *context;
>> +    struct nfsfh *fh;
>> +    int events;
>> +    bool has_zero_init;
>> +} NFSClient;
>> +
>> +typedef struct NFSRPC {
>> +    int status;
>> +    int complete;
>> +    QEMUIOVector *iov;
>> +    struct stat *st;
>> +    Coroutine *co;
>> +    QEMUBH *bh;
>> +} NFSRPC;
>> +
>> +static void nfs_process_read(void *arg);
>> +static void nfs_process_write(void *arg);
>> +
>> +static void nfs_set_events(NFSClient *client)
>> +{
>> +    int ev = nfs_which_events(client->context);
>> +    if (ev != client->events) {
>> +        qemu_aio_set_fd_handler(nfs_get_fd(client->context),
>> +                      (ev & POLLIN) ? nfs_process_read : NULL,
>> +                      (ev & POLLOUT) ? nfs_process_write : NULL,
>> +                      client);
>> +
>> +    }
>> +    client->events = ev;
>> +}
>> +
>> +static void nfs_process_read(void *arg)
>> +{
>> +    NFSClient *client = arg;
>> +    nfs_service(client->context, POLLIN);
>> +    nfs_set_events(client);
>> +}
>> +
>> +static void nfs_process_write(void *arg)
>> +{
>> +    NFSClient *client = arg;
>> +    nfs_service(client->context, POLLOUT);
>> +    nfs_set_events(client);
>> +}
>> +
>> +static void nfs_co_init_task(NFSClient *client, NFSRPC *task)
>> +{
>> +    *task = (NFSRPC) {
>> +        .co         = qemu_coroutine_self(),
>> +    };
>> +}
>> +
>> +static void nfs_co_generic_bh_cb(void *opaque)
>> +{
>> +    NFSRPC *task = opaque;
>> +    qemu_bh_delete(task->bh);
>> +    qemu_coroutine_enter(task->co, NULL);
>> +}
>> +
>> +static void
>> +nfs_co_generic_cb(int status, struct nfs_context *nfs, void *data,
>> +                  void *private_data)
>> +{
>> +    NFSRPC *task = private_data;
>> +    task->complete = 1;
>> +    task->status = status;
>> +    if (task->status > 0 && task->iov) {
>> +        if (task->status <= task->iov->size) {
> It feel very odd to compare something named status with something named size.
>
>> +            qemu_iovec_from_buf(task->iov, 0, data, task->status);
>> +        } else {
>> +            task->status = -EIO;
>> +        }
>> +    }
>> +    if (task->status == 0 && task->st) {
>> +        memcpy(task->st, data, sizeof(struct stat));
>> +    }
>> +    if (task->co) {
>> +        task->bh = qemu_bh_new(nfs_co_generic_bh_cb, task);
>> +        qemu_bh_schedule(task->bh);
>> +    }
>> +}
>> +
>> +static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
>> +                                     int64_t sector_num, int nb_sectors,
>> +                                     QEMUIOVector *iov)
>> +{
>> +    NFSClient *client = bs->opaque;
>> +    NFSRPC task;
>> +
>> +    nfs_co_init_task(client, &task);
>> +    task.iov = iov;
>> +
>> +    if (nfs_pread_async(client->context, client->fh,
>> +                        sector_num * BDRV_SECTOR_SIZE,
>> +                        nb_sectors * BDRV_SECTOR_SIZE,
>> +                        nfs_co_generic_cb, &task) != 0) {
>> +        return -ENOMEM;
>> +    }
>> +
>> +    while (!task.complete) {
>> +        nfs_set_events(client);
>> +        qemu_coroutine_yield();
>> +    }
>> +
>> +    if (task.status < 0) {
>> +        return task.status;
>> +    }
>> +
>> +    /* zero pad short reads */
>> +    if (task.status < iov->size) {
>> +        qemu_iovec_memset(iov, task.status, 0, iov->size - task.status);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
>> +                                        int64_t sector_num, int nb_sectors,
>> +                                        QEMUIOVector *iov)
>> +{
>> +    NFSClient *client = bs->opaque;
>> +    NFSRPC task;
>> +    char *buf = NULL;
>> +
>> +    nfs_co_init_task(client, &task);
>> +
>> +    buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
>> +    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
>> +
>> +    if (nfs_pwrite_async(client->context, client->fh,
>> +                         sector_num * BDRV_SECTOR_SIZE,
>> +                         nb_sectors * BDRV_SECTOR_SIZE,
>> +                         buf, nfs_co_generic_cb, &task) != 0) {
>> +        g_free(buf);
>> +        return -ENOMEM;
>> +    }
>> +
>> +    while (!task.complete) {
>> +        nfs_set_events(client);
>> +        qemu_coroutine_yield();
>> +    }
>> +
>> +    g_free(buf);
>> +
>> +    if (task.status != nb_sectors * BDRV_SECTOR_SIZE) {
>> +        return task.status < 0 ? task.status : -EIO;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
>> +{
>> +    NFSClient *client = bs->opaque;
>> +    NFSRPC task;
>> +
>> +    nfs_co_init_task(client, &task);
>> +
>> +    if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
>> +                        &task) != 0) {
>> +        return -ENOMEM;
>> +    }
>> +
>> +    while (!task.complete) {
>> +        nfs_set_events(client);
>> +        qemu_coroutine_yield();
>> +    }
>> +
>> +    return task.status;
>> +}
>> +
>> +/* TODO Convert to fine grained options */
>> +static QemuOptsList runtime_opts = {
>> +    .name = "nfs",
>> +    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
>> +    .desc = {
>> +        {
>> +            .name = "filename",
>> +            .type = QEMU_OPT_STRING,
>> +            .help = "URL to the NFS file",
>> +        },
>> +        { /* end of list */ }
>> +    },
>> +};
>> +
>> +static void nfs_client_close(NFSClient *client)
>> +{
>> +    if (client->context) {
>> +        if (client->fh) {
>> +            nfs_close(client->context, client->fh);
>> +        }
>> +        qemu_aio_set_fd_handler(nfs_get_fd(client->context), NULL, NULL, NULL);
>> +        nfs_destroy_context(client->context);
>> +    }
>> +    memset(client, 0, sizeof(NFSClient));
>> +}
>> +
>> +static void nfs_file_close(BlockDriverState *bs)
>> +{
>> +    NFSClient *client = bs->opaque;
>> +    nfs_client_close(client);
>> +}
>> +
>> +static int64_t nfs_.cilclient_open(NFSClient *client, const char *filename,
>> +                               int flags, Error **errp)
>> +{
>> +    int ret = -EINVAL, i;
>> +    struct stat st;
>> +    URI *uri;
>> +    QueryParams *qp = NULL;
>> +    char *file = NULL, *strp = NULL;
>> +
>> +    uri = uri_parse(filename);
>> +    if (!uri) {
>> +        error_setg(errp, "Invalid URL specified");
>> +        goto fail;
> Should this be goto out since we don't have done nfs_init_context yet ?
it doesn't matter nfs_client_close can be called with client->context == NULL.

Sorry I entirely missed your other comments.

Peter
diff mbox

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index fb53242..f8411f9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -936,6 +936,11 @@  M: Peter Lieven <pl@kamp.de>
 S: Supported
 F: block/iscsi.c
 
+NFS
+M: Peter Lieven <pl@kamp.de>
+S: Maintained
+F: block/nfs.c
+
 SSH
 M: Richard W.M. Jones <rjones@redhat.com>
 S: Supported
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 4e8c91e..e254a21 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -12,6 +12,7 @@  block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
 ifeq ($(CONFIG_POSIX),y)
 block-obj-y += nbd.o nbd-client.o sheepdog.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
+block-obj-$(CONFIG_LIBNFS) += nfs.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
diff --git a/block/nfs.c b/block/nfs.c
new file mode 100644
index 0000000..2bbf7a2
--- /dev/null
+++ b/block/nfs.c
@@ -0,0 +1,440 @@ 
+/*
+ * QEMU Block driver for native access to files on NFS shares
+ *
+ * Copyright (c) 2014 Peter Lieven <pl@kamp.de>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "config-host.h"
+
+#include <poll.h>
+#include "qemu-common.h"
+#include "qemu/config-file.h"
+#include "qemu/error-report.h"
+#include "block/block_int.h"
+#include "trace.h"
+#include "qemu/iov.h"
+#include "qemu/uri.h"
+#include "sysemu/sysemu.h"
+#include <nfsc/libnfs.h>
+
+typedef struct NFSClient {
+    struct nfs_context *context;
+    struct nfsfh *fh;
+    int events;
+    bool has_zero_init;
+} NFSClient;
+
+typedef struct NFSRPC {
+    int status;
+    int complete;
+    QEMUIOVector *iov;
+    struct stat *st;
+    Coroutine *co;
+    QEMUBH *bh;
+} NFSRPC;
+
+static void nfs_process_read(void *arg);
+static void nfs_process_write(void *arg);
+
+static void nfs_set_events(NFSClient *client)
+{
+    int ev = nfs_which_events(client->context);
+    if (ev != client->events) {
+        qemu_aio_set_fd_handler(nfs_get_fd(client->context),
+                      (ev & POLLIN) ? nfs_process_read : NULL,
+                      (ev & POLLOUT) ? nfs_process_write : NULL,
+                      client);
+
+    }
+    client->events = ev;
+}
+
+static void nfs_process_read(void *arg)
+{
+    NFSClient *client = arg;
+    nfs_service(client->context, POLLIN);
+    nfs_set_events(client);
+}
+
+static void nfs_process_write(void *arg)
+{
+    NFSClient *client = arg;
+    nfs_service(client->context, POLLOUT);
+    nfs_set_events(client);
+}
+
+static void nfs_co_init_task(NFSClient *client, NFSRPC *task)
+{
+    *task = (NFSRPC) {
+        .co         = qemu_coroutine_self(),
+    };
+}
+
+static void nfs_co_generic_bh_cb(void *opaque)
+{
+    NFSRPC *task = opaque;
+    qemu_bh_delete(task->bh);
+    qemu_coroutine_enter(task->co, NULL);
+}
+
+static void
+nfs_co_generic_cb(int status, struct nfs_context *nfs, void *data,
+                  void *private_data)
+{
+    NFSRPC *task = private_data;
+    task->complete = 1;
+    task->status = status;
+    if (task->status > 0 && task->iov) {
+        if (task->status <= task->iov->size) {
+            qemu_iovec_from_buf(task->iov, 0, data, task->status);
+        } else {
+            task->status = -EIO;
+        }
+    }
+    if (task->status == 0 && task->st) {
+        memcpy(task->st, data, sizeof(struct stat));
+    }
+    if (task->co) {
+        task->bh = qemu_bh_new(nfs_co_generic_bh_cb, task);
+        qemu_bh_schedule(task->bh);
+    }
+}
+
+static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
+                                     int64_t sector_num, int nb_sectors,
+                                     QEMUIOVector *iov)
+{
+    NFSClient *client = bs->opaque;
+    NFSRPC task;
+
+    nfs_co_init_task(client, &task);
+    task.iov = iov;
+
+    if (nfs_pread_async(client->context, client->fh,
+                        sector_num * BDRV_SECTOR_SIZE,
+                        nb_sectors * BDRV_SECTOR_SIZE,
+                        nfs_co_generic_cb, &task) != 0) {
+        return -ENOMEM;
+    }
+
+    while (!task.complete) {
+        nfs_set_events(client);
+        qemu_coroutine_yield();
+    }
+
+    if (task.status < 0) {
+        return task.status;
+    }
+
+    /* zero pad short reads */
+    if (task.status < iov->size) {
+        qemu_iovec_memset(iov, task.status, 0, iov->size - task.status);
+    }
+
+    return 0;
+}
+
+static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
+                                        int64_t sector_num, int nb_sectors,
+                                        QEMUIOVector *iov)
+{
+    NFSClient *client = bs->opaque;
+    NFSRPC task;
+    char *buf = NULL;
+
+    nfs_co_init_task(client, &task);
+
+    buf = g_malloc(nb_sectors * BDRV_SECTOR_SIZE);
+    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
+
+    if (nfs_pwrite_async(client->context, client->fh,
+                         sector_num * BDRV_SECTOR_SIZE,
+                         nb_sectors * BDRV_SECTOR_SIZE,
+                         buf, nfs_co_generic_cb, &task) != 0) {
+        g_free(buf);
+        return -ENOMEM;
+    }
+
+    while (!task.complete) {
+        nfs_set_events(client);
+        qemu_coroutine_yield();
+    }
+
+    g_free(buf);
+
+    if (task.status != nb_sectors * BDRV_SECTOR_SIZE) {
+        return task.status < 0 ? task.status : -EIO;
+    }
+
+    return 0;
+}
+
+static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
+{
+    NFSClient *client = bs->opaque;
+    NFSRPC task;
+
+    nfs_co_init_task(client, &task);
+
+    if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
+                        &task) != 0) {
+        return -ENOMEM;
+    }
+
+    while (!task.complete) {
+        nfs_set_events(client);
+        qemu_coroutine_yield();
+    }
+
+    return task.status;
+}
+
+/* TODO Convert to fine grained options */
+static QemuOptsList runtime_opts = {
+    .name = "nfs",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = "filename",
+            .type = QEMU_OPT_STRING,
+            .help = "URL to the NFS file",
+        },
+        { /* end of list */ }
+    },
+};
+
+static void nfs_client_close(NFSClient *client)
+{
+    if (client->context) {
+        if (client->fh) {
+            nfs_close(client->context, client->fh);
+        }
+        qemu_aio_set_fd_handler(nfs_get_fd(client->context), NULL, NULL, NULL);
+        nfs_destroy_context(client->context);
+    }
+    memset(client, 0, sizeof(NFSClient));
+}
+
+static void nfs_file_close(BlockDriverState *bs)
+{
+    NFSClient *client = bs->opaque;
+    nfs_client_close(client);
+}
+
+static int64_t nfs_client_open(NFSClient *client, const char *filename,
+                               int flags, Error **errp)
+{
+    int ret = -EINVAL, i;
+    struct stat st;
+    URI *uri;
+    QueryParams *qp = NULL;
+    char *file = NULL, *strp = NULL;
+
+    uri = uri_parse(filename);
+    if (!uri) {
+        error_setg(errp, "Invalid URL specified");
+        goto fail;
+    }
+    strp = strrchr(uri->path, '/');
+    if (strp == NULL) {
+        error_setg(errp, "Invalid URL specified");
+        goto fail;
+    }
+    file = g_strdup(strp);
+    *strp = 0;
+
+    client->context = nfs_init_context();
+    if (client->context == NULL) {
+        error_setg(errp, "Failed to init NFS context");
+        goto fail;
+    }
+
+    qp = query_params_parse(uri->query);
+    for (i = 0; i < qp->n; i++) {
+        if (!qp->p[i].value) {
+            error_setg(errp, "Value for NFS parameter expected: %s",
+                       qp->p[i].name);
+            goto fail;
+        }
+        if (!strncmp(qp->p[i].name, "uid", 3)) {
+            nfs_set_uid(client->context, atoi(qp->p[i].value));
+        } else if (!strncmp(qp->p[i].name, "gid", 3)) {
+            nfs_set_gid(client->context, atoi(qp->p[i].value));
+        } else if (!strncmp(qp->p[i].name, "tcp-syncnt", 10)) {
+            nfs_set_tcp_syncnt(client->context, atoi(qp->p[i].value));
+        } else {
+            error_setg(errp, "Unknown NFS parameter name: %s",
+                       qp->p[i].name);
+            goto fail;
+        }
+    }
+
+    ret = nfs_mount(client->context, uri->server, uri->path);
+    if (ret < 0) {
+        error_setg(errp, "Failed to mount nfs share: %s",
+                   nfs_get_error(client->context));
+        goto fail;
+    }
+
+    if (flags & O_CREAT) {
+        ret = nfs_creat(client->context, file, 0600, &client->fh);
+        if (ret < 0) {
+            error_setg(errp, "Failed to create file: %s",
+                       nfs_get_error(client->context));
+            goto fail;
+        }
+    } else {
+        ret = nfs_open(client->context, file, flags, &client->fh);
+        if (ret < 0) {
+            error_setg(errp, "Failed to open file : %s",
+                       nfs_get_error(client->context));
+            goto fail;
+        }
+    }
+
+    ret = nfs_fstat(client->context, client->fh, &st);
+    if (ret < 0) {
+        error_setg(errp, "Failed to fstat file: %s",
+                   nfs_get_error(client->context));
+        goto fail;
+    }
+
+    ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE);
+    client->has_zero_init = S_ISREG(st.st_mode);
+    goto out;
+fail:
+    nfs_client_close(client);
+out:
+    if (qp) {
+        query_params_free(qp);
+    }
+    uri_free(uri);
+    g_free(file);
+    return ret;
+}
+
+static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
+                         Error **errp) {
+    NFSClient *client = bs->opaque;
+    int64_t ret;
+    QemuOpts *opts;
+    Error *local_err = NULL;
+
+    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (error_is_set(&local_err)) {
+        qerror_report_err(local_err);
+        error_free(local_err);
+        return -EINVAL;
+    }
+    ret = nfs_client_open(client, qemu_opt_get(opts, "filename"),
+                          (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
+                          errp);
+    if (ret < 0) {
+        return ret;
+    }
+    bs->total_sectors = ret;
+    return 0;
+}
+
+static int nfs_file_create(const char *url, QEMUOptionParameter *options,
+                         Error **errp)
+{
+    int ret = 0;
+    int64_t total_size = 0;
+    NFSClient *client = g_malloc0(sizeof(NFSClient));
+
+    /* Read out options */
+    while (options && options->name) {
+        if (!strcmp(options->name, "size")) {
+            total_size = options->value.n;
+        }
+        options++;
+    }
+
+    ret = nfs_client_open(client, url, O_CREAT, errp);
+    if (ret < 0) {
+        goto out;
+    }
+    ret = nfs_ftruncate(client->context, client->fh, total_size);
+out:
+    nfs_client_close(client);
+    g_free(client);
+    return ret;
+}
+
+static int nfs_has_zero_init(BlockDriverState *bs)
+{
+    NFSClient *client = bs->opaque;
+    return client->has_zero_init;
+}
+
+static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
+{
+    NFSClient *client = bs->opaque;
+    NFSRPC task = {0};
+    struct stat st;
+
+    task.st = &st;
+    if (nfs_fstat_async(client->context, client->fh, nfs_co_generic_cb,
+                        &task) != 0) {
+        return -ENOMEM;
+    }
+
+    while (!task.complete) {
+        nfs_set_events(client);
+        qemu_aio_wait();
+    }
+
+    return (task.status < 0 ? task.status : st.st_blocks * st.st_blksize);
+}
+
+static int nfs_file_truncate(BlockDriverState *bs, int64_t offset)
+{
+    NFSClient *client = bs->opaque;
+    return nfs_ftruncate(client->context, client->fh, offset);
+}
+
+static BlockDriver bdrv_nfs = {
+    .format_name     = "nfs",
+    .protocol_name   = "nfs",
+
+    .instance_size   = sizeof(NFSClient),
+    .bdrv_needs_filename = true,
+    .bdrv_has_zero_init = nfs_has_zero_init,
+    .bdrv_get_allocated_file_size = nfs_get_allocated_file_size,
+    .bdrv_truncate = nfs_file_truncate,
+
+    .bdrv_file_open  = nfs_file_open,
+    .bdrv_close      = nfs_file_close,
+    .bdrv_create     = nfs_file_create,
+
+    .bdrv_co_readv         = nfs_co_readv,
+    .bdrv_co_writev        = nfs_co_writev,
+    .bdrv_co_flush_to_disk = nfs_co_flush,
+};
+
+static void nfs_block_init(void)
+{
+    bdrv_register(&bdrv_nfs);
+}
+
+block_init(nfs_block_init);
diff --git a/configure b/configure
index b472694..9ca0c63 100755
--- a/configure
+++ b/configure
@@ -251,6 +251,7 @@  vss_win32_sdk=""
 win_sdk="no"
 want_tools="yes"
 libiscsi=""
+libnfs=""
 coroutine=""
 coroutine_pool=""
 seccomp=""
@@ -840,6 +841,10 @@  for opt do
   ;;
   --enable-libiscsi) libiscsi="yes"
   ;;
+  --disable-libnfs) libnfs="no"
+  ;;
+  --enable-libnfs) libnfs="yes"
+  ;;
   --enable-profiler) profiler="yes"
   ;;
   --disable-cocoa) cocoa="no"
@@ -1229,6 +1234,8 @@  Advanced options (experts only):
   --enable-rbd             enable building the rados block device (rbd)
   --disable-libiscsi       disable iscsi support
   --enable-libiscsi        enable iscsi support
+  --disable-libnfs         disable nfs support
+  --enable-libnfs          enable nfs support
   --disable-smartcard-nss  disable smartcard nss support
   --enable-smartcard-nss   enable smartcard nss support
   --disable-libusb         disable libusb (for usb passthrough)
@@ -3600,6 +3607,20 @@  elif test "$debug" = "no" ; then
   CFLAGS="-O2 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 $CFLAGS"
 fi
 
+##########################################
+# Do we have libnfs
+if test "$libnfs" != "no" ; then
+  if $pkg_config --atleast-version=1.8.91 libnfs; then
+    libnfs="yes"
+    libnfs_libs=$($pkg_config --libs libnfs)
+    LIBS="$LIBS $libnfs_libs"
+  else
+    if test "$libnfs" = "yes" ; then
+      feature_not_found "libnfs"
+    fi
+    libnfs="no"
+  fi
+fi
 
 # Disable zero malloc errors for official releases unless explicitly told to
 # enable/disable
@@ -3829,6 +3850,7 @@  echo "libiscsi support  $libiscsi (1.4.0)"
 else
 echo "libiscsi support  $libiscsi"
 fi
+echo "libnfs support    $libnfs"
 echo "build guest agent $guest_agent"
 echo "QGA VSS support   $guest_agent_with_vss"
 echo "seccomp support   $seccomp"
@@ -4165,6 +4187,10 @@  if test "$libiscsi" = "yes" ; then
   fi
 fi
 
+if test "$libnfs" = "yes" ; then
+  echo "CONFIG_LIBNFS=y" >> $config_host_mak
+fi
+
 if test "$seccomp" = "yes"; then
   echo "CONFIG_SECCOMP=y" >> $config_host_mak
 fi
diff --git a/qapi-schema.json b/qapi-schema.json
index 05ced9d..7cfb5e5 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -4371,6 +4371,7 @@ 
 # TODO gluster: Wait for structured options
 # TODO iscsi: Wait for structured options
 # TODO nbd: Should take InetSocketAddress for 'host'?
+# TODO nfs: Wait for structured options
 # TODO rbd: Wait for structured options
 # TODO sheepdog: Wait for structured options
 # TODO ssh: Should take InetSocketAddress for 'host'?