@@ -20,6 +20,7 @@
#include "qom/object_interfaces.h"
#include "qemu/mmap-alloc.h"
#include "qemu/madvise.h"
+#include "hw/qdev-core.h"
#ifdef CONFIG_NUMA
#include <numaif.h>
@@ -237,7 +238,7 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value,
uint64_t sz = memory_region_size(&backend->mr);
if (!qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads,
- backend->prealloc_context, errp)) {
+ backend->prealloc_context, false, errp)) {
return;
}
backend->prealloc = true;
@@ -323,6 +324,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
void *ptr;
uint64_t sz;
+ bool async = !phase_check(PHASE_LATE_BACKENDS_CREATED);
if (!bc->alloc) {
return;
@@ -402,7 +404,8 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
if (backend->prealloc && !qemu_prealloc_mem(memory_region_get_fd(&backend->mr),
ptr, sz,
backend->prealloc_threads,
- backend->prealloc_context, errp)) {
+ backend->prealloc_context,
+ async, errp)) {
return;
}
}
@@ -605,7 +605,7 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
int fd = memory_region_get_fd(&vmem->memdev->mr);
Error *local_err = NULL;
- if (!qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err)) {
+ if (!qemu_prealloc_mem(fd, area, size, 1, NULL, false, &local_err)) {
static bool warned;
/*
@@ -1248,7 +1248,7 @@ static int virtio_mem_prealloc_range_cb(VirtIOMEM *vmem, void *arg,
int fd = memory_region_get_fd(&vmem->memdev->mr);
Error *local_err = NULL;
- if (!qemu_prealloc_mem(fd, area, size, 1, NULL, &local_err)) {
+ if (!qemu_prealloc_mem(fd, area, size, 1, NULL, false, &local_err)) {
error_report_err(local_err);
return -ENOMEM;
}
@@ -1083,6 +1083,11 @@ typedef enum MachineInitPhase {
*/
PHASE_ACCEL_CREATED,
+ /*
+ * Late backend objects have been created and initialized.
+ */
+ PHASE_LATE_BACKENDS_CREATED,
+
/*
* machine_class->init has been called, thus creating any embedded
* devices and validating machine properties. Devices created at
@@ -680,6 +680,8 @@ typedef struct ThreadContext ThreadContext;
* @area: start address of the are to preallocate
* @sz: the size of the area to preallocate
* @max_threads: maximum number of threads to use
+ * @tc: prealloc context threads pointer, NULL if not in use
+ * @async: request asynchronous preallocation, requires @tc
* @errp: returns an error if this function fails
*
* Preallocate memory (populate/prefault page tables writable) for the virtual
@@ -687,10 +689,24 @@ typedef struct ThreadContext ThreadContext;
* each page in the area was faulted in writable at least once, for example,
* after allocating file blocks for mapped files.
*
+ * When setting @async, allocation might be performed asynchronously.
+ * qemu_finish_async_prealloc_mem() must be called to finish any asynchronous
+ * preallocation.
+ *
* Return: true on success, else false setting @errp with error.
*/
bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
- ThreadContext *tc, Error **errp);
+ ThreadContext *tc, bool async, Error **errp);
+
+/**
+ * qemu_finish_async_prealloc_mem:
+ * @errp: returns an error if this function fails
+ *
+ * Finish all outstanding asynchronous memory preallocation.
+ *
+ * Return: true on success, else false setting @errp with error.
+ */
+bool qemu_finish_async_prealloc_mem(Error **errp);
/**
* qemu_get_pid_name:
@@ -2013,6 +2013,14 @@ static void qemu_create_late_backends(void)
object_option_foreach_add(object_create_late);
+ /*
+ * Wait for any outstanding memory prealloc from created memory
+ * backends to complete.
+ */
+ if (!qemu_finish_async_prealloc_mem(&error_fatal)) {
+ exit(1);
+ }
+
if (tpm_init() < 0) {
exit(1);
}
@@ -3699,6 +3707,7 @@ void qemu_init(int argc, char **argv)
* over memory-backend-file objects).
*/
qemu_create_late_backends();
+ phase_advance(PHASE_LATE_BACKENDS_CREATED);
/*
* Note: creates a QOM object, must run only after global and
@@ -42,6 +42,7 @@
#include "qemu/cutils.h"
#include "qemu/units.h"
#include "qemu/thread-context.h"
+#include "qemu/main-loop.h"
#ifdef CONFIG_LINUX
#include <sys/syscall.h>
@@ -63,11 +64,15 @@
struct MemsetThread;
+static QLIST_HEAD(, MemsetContext) memset_contexts =
+ QLIST_HEAD_INITIALIZER(memset_contexts);
+
typedef struct MemsetContext {
bool all_threads_created;
bool any_thread_failed;
struct MemsetThread *threads;
int num_threads;
+ QLIST_ENTRY(MemsetContext) next;
} MemsetContext;
struct MemsetThread {
@@ -412,19 +417,44 @@ static inline int get_memset_num_threads(size_t hpagesize, size_t numpages,
return ret;
}
+static int wait_and_free_mem_prealloc_context(MemsetContext *context)
+{
+ int i, ret = 0, tmp;
+
+ for (i = 0; i < context->num_threads; i++) {
+ tmp = (uintptr_t)qemu_thread_join(&context->threads[i].pgthread);
+
+ if (tmp) {
+ ret = tmp;
+ }
+ }
+ g_free(context->threads);
+ g_free(context);
+ return ret;
+}
+
static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
- int max_threads, ThreadContext *tc,
+ int max_threads, ThreadContext *tc, bool async,
bool use_madv_populate_write)
{
static gsize initialized = 0;
- MemsetContext context = {
- .num_threads = get_memset_num_threads(hpagesize, numpages, max_threads),
- };
+ MemsetContext *context = g_malloc0(sizeof(MemsetContext));
size_t numpages_per_thread, leftover;
void *(*touch_fn)(void *);
- int ret = 0, i = 0;
+ int ret, i = 0;
char *addr = area;
+ /*
+ * Asynchronous preallocation is only allowed when using MADV_POPULATE_WRITE
+ * and prealloc context for thread placement.
+ */
+ if (!use_madv_populate_write || !tc) {
+ async = false;
+ }
+
+ context->num_threads =
+ get_memset_num_threads(hpagesize, numpages, max_threads);
+
if (g_once_init_enter(&initialized)) {
qemu_mutex_init(&page_mutex);
qemu_cond_init(&page_cond);
@@ -432,8 +462,11 @@ static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
}
if (use_madv_populate_write) {
- /* Avoid creating a single thread for MADV_POPULATE_WRITE */
- if (context.num_threads == 1) {
+ /*
+ * Avoid creating a single thread for MADV_POPULATE_WRITE when
+ * preallocating synchronously.
+ */
+ if (context->num_threads == 1 && !async) {
if (qemu_madvise(area, hpagesize * numpages,
QEMU_MADV_POPULATE_WRITE)) {
return -errno;
@@ -445,50 +478,86 @@ static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
touch_fn = do_touch_pages;
}
- context.threads = g_new0(MemsetThread, context.num_threads);
- numpages_per_thread = numpages / context.num_threads;
- leftover = numpages % context.num_threads;
- for (i = 0; i < context.num_threads; i++) {
- context.threads[i].addr = addr;
- context.threads[i].numpages = numpages_per_thread + (i < leftover);
- context.threads[i].hpagesize = hpagesize;
- context.threads[i].context = &context;
+ context->threads = g_new0(MemsetThread, context->num_threads);
+ numpages_per_thread = numpages / context->num_threads;
+ leftover = numpages % context->num_threads;
+ for (i = 0; i < context->num_threads; i++) {
+ context->threads[i].addr = addr;
+ context->threads[i].numpages = numpages_per_thread + (i < leftover);
+ context->threads[i].hpagesize = hpagesize;
+ context->threads[i].context = context;
if (tc) {
- thread_context_create_thread(tc, &context.threads[i].pgthread,
+ thread_context_create_thread(tc, &context->threads[i].pgthread,
"touch_pages",
- touch_fn, &context.threads[i],
+ touch_fn, &context->threads[i],
QEMU_THREAD_JOINABLE);
} else {
- qemu_thread_create(&context.threads[i].pgthread, "touch_pages",
- touch_fn, &context.threads[i],
+ qemu_thread_create(&context->threads[i].pgthread, "touch_pages",
+ touch_fn, &context->threads[i],
QEMU_THREAD_JOINABLE);
}
- addr += context.threads[i].numpages * hpagesize;
+ addr += context->threads[i].numpages * hpagesize;
+ }
+
+ if (async) {
+ /*
+ * async requests currently require the BQL. Add it to the list and kick
+ * preallocation off during qemu_finish_async_prealloc_mem().
+ */
+ assert(bql_locked());
+ QLIST_INSERT_HEAD(&memset_contexts, context, next);
+ return 0;
}
if (!use_madv_populate_write) {
- sigbus_memset_context = &context;
+ sigbus_memset_context = context;
}
qemu_mutex_lock(&page_mutex);
- context.all_threads_created = true;
+ context->all_threads_created = true;
qemu_cond_broadcast(&page_cond);
qemu_mutex_unlock(&page_mutex);
- for (i = 0; i < context.num_threads; i++) {
- int tmp = (uintptr_t)qemu_thread_join(&context.threads[i].pgthread);
+ ret = wait_and_free_mem_prealloc_context(context);
+ if (!use_madv_populate_write) {
+ sigbus_memset_context = NULL;
+ }
+ return ret;
+}
+
+bool qemu_finish_async_prealloc_mem(Error **errp)
+{
+ int ret = 0, tmp;
+ MemsetContext *context, *next_context;
+
+ /* Waiting for preallocation requires the BQL. */
+ assert(bql_locked());
+ if (QLIST_EMPTY(&memset_contexts)) {
+ return true;
+ }
+
+ qemu_mutex_lock(&page_mutex);
+ QLIST_FOREACH(context, &memset_contexts, next) {
+ context->all_threads_created = true;
+ }
+ qemu_cond_broadcast(&page_cond);
+ qemu_mutex_unlock(&page_mutex);
+
+ QLIST_FOREACH_SAFE(context, &memset_contexts, next, next_context) {
+ QLIST_REMOVE(context, next);
+ tmp = wait_and_free_mem_prealloc_context(context);
if (tmp) {
ret = tmp;
}
}
- if (!use_madv_populate_write) {
- sigbus_memset_context = NULL;
+ if (ret) {
+ error_setg_errno(errp, -ret,
+ "qemu_prealloc_mem: preallocating memory failed");
+ return false;
}
- g_free(context.threads);
-
- return ret;
+ return true;
}
static bool madv_populate_write_possible(char *area, size_t pagesize)
@@ -498,7 +567,7 @@ static bool madv_populate_write_possible(char *area, size_t pagesize)
}
bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
- ThreadContext *tc, Error **errp)
+ ThreadContext *tc, bool async, Error **errp)
{
static gsize initialized;
int ret;
@@ -540,7 +609,7 @@ bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
}
/* touch pages simultaneously */
- ret = touch_all_pages(area, hpagesize, numpages, max_threads, tc,
+ ret = touch_all_pages(area, hpagesize, numpages, max_threads, tc, async,
use_madv_populate_write);
if (ret) {
error_setg_errno(errp, -ret,
@@ -265,7 +265,7 @@ int getpagesize(void)
}
bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
- ThreadContext *tc, Error **errp)
+ ThreadContext *tc, bool async, Error **errp)
{
int i;
size_t pagesize = qemu_real_host_page_size();
@@ -278,6 +278,12 @@ bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
return true;
}
+bool qemu_finish_async_prealloc_mem(Error **errp)
+{
+ /* async prealloc not supported, there is nothing to finish */
+ return true;
+}
+
char *qemu_get_pid_name(pid_t pid)
{
/* XXX Implement me */