@@ -171,8 +171,8 @@ struct vhost_net *vhost_net_init(VhostNetOptions
*options)
net->dev.vq_index = net->nc->queue_index * net->dev.nvqs;
}
- r = vhost_dev_init(&net->dev, options->opaque,
- options->backend_type, options->busyloop_timeout);
+ r = vhost_dev_init(&net->dev, options->opaque, options->backend_type,
+ options->busyloop_timeout, options->vhostlog);
if (r < 0) {
goto fail;
}
@@ -248,7 +248,7 @@ static void vhost_scsi_realize(DeviceState *dev, Error
**errp)
s->dev.backend_features = 0;
ret = vhost_dev_init(&s->dev, (void *)(uintptr_t)vhostfd,
- VHOST_BACKEND_TYPE_KERNEL, 0);
+ VHOST_BACKEND_TYPE_KERNEL, 0, NULL);
if (ret < 0) {
error_setg(errp, "vhost-scsi: vhost initialization failed: %s",
strerror(-ret));
@@ -342,7 +342,7 @@ static void vhost_vsock_device_realize(DeviceState
*dev, Error **errp)
vsock->vhost_dev.nvqs = ARRAY_SIZE(vsock->vhost_vqs);
vsock->vhost_dev.vqs = vsock->vhost_vqs;
ret = vhost_dev_init(&vsock->vhost_dev, (void *)(uintptr_t)vhostfd,
- VHOST_BACKEND_TYPE_KERNEL, 0);
+ VHOST_BACKEND_TYPE_KERNEL, 0, NULL);
if (ret < 0) {
error_setg_errno(errp, -ret, "vhost-vsock: vhost_dev_init failed");
goto err_virtio;
@@ -20,7 +20,7 @@
#include "qemu/atomic.h"
#include "qemu/range.h"
#include "qemu/error-report.h"
-#include "qemu/memfd.h"
+#include "qemu/mmap-file.h"
#include <linux/vhost.h>
#include "exec/address-spaces.h"
#include "hw/virtio/virtio-bus.h"
@@ -326,7 +326,7 @@ static uint64_t vhost_get_log_size(struct vhost_dev
*dev)
return log_size;
}
-static struct vhost_log *vhost_log_alloc(uint64_t size, bool share)
+static struct vhost_log *vhost_log_alloc(char *path, uint64_t size, bool
share)
{
struct vhost_log *log;
uint64_t logsize = size * sizeof(*(log->log));
@@ -334,9 +334,7 @@ static struct vhost_log *vhost_log_alloc(uint64_t size,
bool share)
log = g_new0(struct vhost_log, 1);
if (share) {
- log->log = qemu_memfd_alloc("vhost-log", logsize,
- F_SEAL_GROW | F_SEAL_SHRINK |
F_SEAL_SEAL,
- &fd);
+ log->log = qemu_mmap_alloc(path, logsize, &fd);
memset(log->log, 0, logsize);
} else {
log->log = g_malloc0(logsize);
@@ -349,12 +347,12 @@ static struct vhost_log *vhost_log_alloc(uint64_t
size, bool share)
return log;
}
-static struct vhost_log *vhost_log_get(uint64_t size, bool share)
+static struct vhost_log *vhost_log_get(char *path, uint64_t size, bool
share)
{
struct vhost_log *log = share ? vhost_log_shm : vhost_log;
if (!log || log->size != size) {
- log = vhost_log_alloc(size, share);
+ log = vhost_log_alloc(path, size, share);
if (share) {
vhost_log_shm = log;
} else {
@@ -388,8 +386,7 @@ static void vhost_log_put(struct vhost_dev *dev, bool
sync)
g_free(log->log);
vhost_log = NULL;
} else if (vhost_log_shm == log) {
- qemu_memfd_free(log->log, log->size * sizeof(*(log->log)),
- log->fd);
+ qemu_mmap_free(log->log, log->size * sizeof(*(log->log)),
log->fd);
vhost_log_shm = NULL;
}
@@ -405,9 +402,12 @@ static bool vhost_dev_log_is_shared(struct vhost_dev
*dev)
static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t
size)
{
- struct vhost_log *log = vhost_log_get(size,
vhost_dev_log_is_shared(dev));
- uint64_t log_base = (uintptr_t)log->log;
int r;
+ struct vhost_log *log;
+ uint64_t log_base;
+
+ log = vhost_log_get(dev->log_filename, size,
vhost_dev_log_is_shared(dev));
+ log_base = (uintptr_t)log->log;
/* inform backend of log switching, this must be done before
releasing the current log, to ensure no logging is lost */
@@ -1049,7 +1049,8 @@ static void vhost_virtqueue_cleanup(struct
vhost_virtqueue *vq)
}
int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
- VhostBackendType backend_type, uint32_t
busyloop_timeout)
+ VhostBackendType backend_type,
+ uint32_t busyloop_timeout, char *vhostlog)
{
uint64_t features;
int i, r, n_initialized_vqs = 0;
@@ -1118,11 +1119,18 @@ int vhost_dev_init(struct vhost_dev *hdev, void
*opaque,
.priority = 10
};
+ hdev->log = NULL;
+ hdev->log_size = 0;
+ hdev->log_enabled = false;
+ hdev->log_filename = vhostlog ? g_strdup(vhostlog) : NULL;
+ g_free(vhostlog);
+
if (hdev->migration_blocker == NULL) {
if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) {
error_setg(&hdev->migration_blocker,
"Migration disabled: vhost lacks VHOST_F_LOG_ALL
feature.");
- } else if (!qemu_memfd_check()) {
+ } else if (vhost_dev_log_is_shared(hdev) &&
+ !qemu_mmap_check(hdev->log_filename)) {
error_setg(&hdev->migration_blocker,
"Migration disabled: failed to allocate shared
memory");
}
@@ -1135,9 +1143,6 @@ int vhost_dev_init(struct vhost_dev *hdev, void
*opaque,
hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions));
hdev->n_mem_sections = 0;
hdev->mem_sections = NULL;
- hdev->log = NULL;
- hdev->log_size = 0;
- hdev->log_enabled = false;
hdev->started = false;
hdev->memory_changed = false;
memory_listener_register(&hdev->memory_listener,
&address_space_memory);
@@ -1175,6 +1180,7 @@ void vhost_dev_cleanup(struct vhost_dev *hdev)
if (hdev->vhost_ops) {
hdev->vhost_ops->vhost_backend_cleanup(hdev);
}
+ g_free(hdev->log_filename);
assert(!hdev->log);
memset(hdev, 0, sizeof(struct vhost_dev));
@@ -1335,7 +1341,8 @@ int vhost_dev_start(struct vhost_dev *hdev,
VirtIODevice *vdev)
uint64_t log_base;
hdev->log_size = vhost_get_log_size(hdev);
- hdev->log = vhost_log_get(hdev->log_size,
+ hdev->log = vhost_log_get(hdev->log_filename,
+ hdev->log_size,
vhost_dev_log_is_shared(hdev));
log_base = (uintptr_t)hdev->log->log;
r = hdev->vhost_ops->vhost_set_log_base(hdev,
@@ -52,6 +52,7 @@ struct vhost_dev {
uint64_t max_queues;
bool started;
bool log_enabled;
+ char *log_filename;
uint64_t log_size;
Error *migration_blocker;
bool memory_changed;
@@ -65,7 +66,8 @@ struct vhost_dev {
int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
VhostBackendType backend_type,
- uint32_t busyloop_timeout);
+ uint32_t busyloop_timeout,
+ char *vhostlog);
void vhost_dev_cleanup(struct vhost_dev *hdev);
int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev);
void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev);
@@ -12,6 +12,7 @@ typedef struct VhostNetOptions {
NetClientState *net_backend;
uint32_t busyloop_timeout;
void *opaque;
+ char *vhostlog;
} VhostNetOptions;
uint64_t vhost_net_get_max_queues(VHostNetState *net);
new file mode 100644
@@ -0,0 +1,10 @@
+#ifndef QEMU_MMAP_FILE_H
+#define QEMU_MMAP_FILE_H
+
+#include "qemu-common.h"
+
+void *qemu_mmap_alloc(const char *path, size_t size, int *fd);
+void qemu_mmap_free(void *ptr, size_t size, int fd);
+bool qemu_mmap_check(const char *path);
+
+#endif
@@ -699,6 +699,12 @@ static void net_init_tap_one(const NetdevTapOptions
*tap, NetClientState *peer,
}
options.opaque = (void *)(uintptr_t)vhostfd;
+ if (tap->has_vhostlog) {
+ options.vhostlog = g_strdup(tap->vhostlog);
+ } else {
+ options.vhostlog = NULL;
+ }
+
s->vhost_net = vhost_net_init(&options);
if (!s->vhost_net) {
error_setg(errp,
@@ -2640,6 +2640,8 @@
#
# @vhostforce: #optional vhost on for non-MSIX virtio guests
#
+# @vhostlog: #optional file or directory for vhost backend log
+#
# @queues: #optional number of queues to be created for multiqueue capable
tap
#
# @poll-us: #optional maximum number of microseconds that could
@@ -2662,6 +2664,7 @@
'*vhostfd': 'str',
'*vhostfds': 'str',
'*vhostforce': 'bool',
+ '*vhostlog': 'str',
'*queues': 'uint32',
'*poll-us': 'uint32'} }
@@ -1599,7 +1599,7 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
#else
"-netdev
tap,id=str[,fd=h][,fds=x:y:...:z][,ifname=name][,script=file][,downscript=dfile]\n"
"
[,br=bridge][,helper=helper][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off]\n"
- "
[,vhostfd=h][,vhostfds=x:y:...:z][,vhostforce=on|off][,queues=n]\n"
+ "
[,vhostfd=h][,vhostfds=x:y:...:z][,vhostforce=on|off][,vhostlog=file|dir][,queues=n]\n"
" [,poll-us=n]\n"
" configure a host TAP network backend with ID 'str'\n"
" connected to a bridge (default="
DEFAULT_BRIDGE_INTERFACE ")\n"
@@ -1618,6 +1618,7 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
" use vhost=on to enable experimental in kernel
accelerator\n"
" (only has effect for virtio guests which use
MSIX)\n"
" use vhostforce=on to force vhost on for non-MSIX
virtio guests\n"
+ " use 'vhostlog=file|dir' file or directory for vhost
backend log\n"
" use 'vhostfd=h' to connect to an already opened vhost
net device\n"
" use 'vhostfds=x:y:...:z to connect to multiple
already opened vhost net devices\n"
" use 'queues=n' to specify the number of queues to be
created for multiqueue TAP\n"
@@ -3,6 +3,7 @@ util-obj-y += bufferiszero.o
util-obj-$(CONFIG_POSIX) += compatfd.o
util-obj-$(CONFIG_POSIX) += event_notifier-posix.o
util-obj-$(CONFIG_POSIX) += mmap-alloc.o
+util-obj-$(CONFIG_POSIX) += mmap-file.o
util-obj-$(CONFIG_POSIX) += oslib-posix.o
util-obj-$(CONFIG_POSIX) += qemu-openpty.o
util-obj-$(CONFIG_POSIX) += qemu-thread-posix.o
new file mode 100644
@@ -0,0 +1,153 @@
+/*
+ * Support for file backed by mmaped host memory.
+ *
+ * Authors:
+ * Rafael David Tinoco <rafael.tinoco@canonical.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/mmap-file.h"
+
+static char *qemu_mmap_rand_name(void)
+{
+ char *name;
+ GRand *rsufix;
+ guint32 sufix;
+
+ rsufix = g_rand_new();
+ sufix = g_rand_int(rsufix);
+ g_free(rsufix);
+ name = g_strdup_printf("mmap-%u", sufix);
+
+ return name;
+}
+
+static inline void qemu_mmap_rand_name_free(char *str)
+{
+ g_free(str);
+}
+
+static bool qemu_mmap_is(const char *path, mode_t what)
+{
+ struct stat s;
+
+ memset(&s, 0, sizeof(struct stat));
+ if (stat(path, &s)) {
+ perror("stat");
+ goto negative;
+ }
+
+ if ((s.st_mode & S_IFMT) == what) {
+ return true;
+ }
+
+negative:
+ return false;
+}
+
+static inline bool qemu_mmap_is_file(const char *path)
+{
+ return qemu_mmap_is(path, S_IFREG);
+}
+
+static inline bool qemu_mmap_is_dir(const char *path)
+{
+ return qemu_mmap_is(path, S_IFDIR);
+}
+
+static void *qemu_mmap_alloc_file(const char *filepath, size_t size, int
*fd)
+{
+ void *ptr;
+ int mfd = -1;
+
+ *fd = -1;
+
+ mfd = open(filepath, O_CREAT | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR);
+ if (mfd == -1) {
+ perror("open");
+ return NULL;
+ }
+
+ unlink(filepath);
+
+ if (ftruncate(mfd, size) == -1) {
+ perror("ftruncate");
+ close(mfd);
+ return NULL;
+ }
+
+ ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0);
+ if (ptr == MAP_FAILED) {
+ perror("mmap");
+ close(mfd);
+ return NULL;
+ }
+
+ *fd = mfd;
+ return ptr;
+}
+
+static void *qemu_mmap_alloc_dir(const char *dirpath, size_t size, int *fd)
+{
+ void *ptr;
+ char *file, *rand, *tmp, *dir2use = NULL;
+
+ if (dirpath && !qemu_mmap_is_dir(dirpath)) {
+ return NULL;
+ }
+
+ tmp = (char *) g_get_tmp_dir();
+ dir2use = dirpath ? (char *) dirpath : tmp;
+ rand = qemu_mmap_rand_name();
+ file = g_strdup_printf("%s/%s", dir2use, rand);
+ ptr = qemu_mmap_alloc_file(file, size, fd);
+ g_free(tmp);
+ qemu_mmap_rand_name_free(rand);
+
+ return ptr;
+}
+
+/*
+ * "path" can be:
+ *
+ * filename = full path for the file to back mmap
+ * dir path = full dir path where to create random file for mmap
+ * null = will use <tmpdir> to create random file for mmap
+ */
+void *qemu_mmap_alloc(const char *path, size_t size, int *fd)
+{
+ if (!path || qemu_mmap_is_dir(path)) {
+ return qemu_mmap_alloc_dir(path, size, fd);
+ }
+
+ return qemu_mmap_alloc_file(path, size, fd);
+}
+
+void qemu_mmap_free(void *ptr, size_t size, int fd)
+{
+ if (ptr) {
+ munmap(ptr, size);
+ }
+
+ if (fd != -1) {
+ close(fd);
+ }
+}
+
+bool qemu_mmap_check(const char *path)
+{
+ void *ptr;
+ int fd = -1;
+ bool r = true;
+
+ ptr = qemu_mmap_alloc(path, 4096, &fd);
+ if (!ptr) {
+ r = false;
+ }
+ qemu_mmap_free(ptr, 4096, fd);
+
+ return r == true ? true : false;
Hi On Sat, Oct 22, 2016 at 10:01 AM Rafael David Tinoco < rafael.tinoco@canonical.com> wrote: Commit 31190ed7 added a migration blocker in vhost_dev_init() to check if memfd would succeed. It is better if this blocker first checks if vhost backend requires shared log. This will avoid a situation where a blocker is added inappropriately (e.g. shared log allocation fails when vhost backend doesn't support it). Could you make this a seperate patch? Commit: 35f9b6e added a fallback mechanism for systems not supporting memfd_create syscall (started being supported since 3.17). Backporting memfd_create might not be accepted for distros relying on older kernels. Nowadays there is no way for security driver to discover memfd filename to be created: <tmpdir>/memfd-XXXXXX. Also, because vhost log file descriptors can be passed to other processes, after discussion, we thought it is best to back mmap by using files that can be placed into a specific directory: this commit creates "vhostlog" argv parameter for such purpose. This will allow security drivers to operate on those files appropriately. Argv examples: -netdev tap,id=net0,vhost=on -netdev tap,id=net0,vhost=on,vhostlog=/tmp/guest.log -netdev tap,id=net0,vhost=on,vhostlog=/tmp Could it be only a filename? This would simplify testing. For vhost backends supporting shared logs, if vhostlog is non-existent, or a directory, random files are going to be created in the specified directory (or, for non-existent, in tmpdir). If vhostlog is specified, the filepath is always used when allocating vhost log files. Regarding testing, you add utility code mmap-file, could you make this a seperate commit, with unit tests? thanks Signed-off-by: Rafael David Tinoco <rafael.tinoco@canonical.com> --- hw/net/vhost_net.c | 4 +- hw/scsi/vhost-scsi.c | 2 +- hw/virtio/vhost-vsock.c | 2 +- hw/virtio/vhost.c | 41 +++++++------ include/hw/virtio/vhost.h | 4 +- include/net/vhost_net.h | 1 + include/qemu/mmap-file.h | 10 +++ net/tap.c | 6 ++ qapi-schema.json | 3 + qemu-options.hx | 3 +- util/Makefile.objs | 1 + util/mmap-file.c | 153 ++++++++++++++++++++++++++++++++++++++++++++++ 12 files changed, 207 insertions(+), 23 deletions(-) create mode 100644 include/qemu/mmap-file.h create mode 100644 util/mmap-file.c +} -- 2.9.3