@@ -269,7 +269,8 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value,
void *ptr = memory_region_get_ram_ptr(&backend->mr);
uint64_t sz = memory_region_size(&backend->mr);
- os_mem_prealloc(fd, ptr, sz, backend->prealloc_threads, &local_err);
+ os_mem_prealloc(fd, ptr, sz, backend->prealloc_threads, backend->policy,
+ backend->host_nodes, MAX_NODES + 1, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
@@ -415,7 +416,8 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
*/
if (backend->prealloc) {
os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
- backend->prealloc_threads, &local_err);
+ backend->prealloc_threads, backend->policy,
+ backend->host_nodes, MAX_NODES + 1, &local_err);
if (local_err) {
goto out;
}
@@ -688,7 +688,8 @@ unsigned long qemu_getauxval(unsigned long type);
void qemu_set_tty_echo(int fd, bool echo);
void os_mem_prealloc(int fd, char *area, size_t sz, int smp_cpus,
- Error **errp);
+ int policy, unsigned long *node_bitmap,
+ unsigned long max_node, Error **errp);
/**
* qemu_get_pid_name:
@@ -87,3 +87,5 @@ if have_block
if_false: files('filemonitor-stub.c'))
util_ss.add(when: 'CONFIG_LINUX', if_true: files('vfio-helpers.c'))
endif
+
+util_ss.add(when: 'CONFIG_NUMA', if_true: numa)
@@ -38,11 +38,13 @@
#include "qemu/sockets.h"
#include "qemu/thread.h"
#include <libgen.h>
+#include "qemu/bitmap.h"
#include "qemu/cutils.h"
#include "qemu/compiler.h"
#ifdef CONFIG_LINUX
#include <sys/syscall.h>
+#include <numaif.h>
#endif
#ifdef __FreeBSD__
@@ -79,6 +81,9 @@ struct MemsetThread {
size_t hpagesize;
QemuThread pgthread;
sigjmp_buf env;
+ int policy;
+ unsigned long *node_bitmap;
+ unsigned long max_node;
};
typedef struct MemsetThread MemsetThread;
@@ -464,6 +469,18 @@ static void *do_touch_pages(void *arg)
}
qemu_mutex_unlock(&page_mutex);
+#ifdef CONFIG_NUMA
+ if (memset_args->max_node &&
+ !bitmap_empty(memset_args->node_bitmap, memset_args->max_node)) {
+ long ret = set_mempolicy(memset_args->policy, memset_args->node_bitmap,
+ memset_args->max_node);
+ if (ret < 0) {
+ memset_thread_failed = true;
+ return NULL;
+ }
+ }
+#endif
+
/* unblock SIGBUS */
sigemptyset(&set);
sigaddset(&set, SIGBUS);
@@ -510,7 +527,8 @@ static inline int get_memset_num_threads(int smp_cpus)
}
static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
- int smp_cpus)
+ int smp_cpus, int policy,
+ unsigned long *node_bitmap, unsigned long max_node)
{
static gsize initialized = 0;
size_t numpages_per_thread, leftover;
@@ -533,6 +551,9 @@ static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
memset_thread[i].addr = addr;
memset_thread[i].numpages = numpages_per_thread + (i < leftover);
memset_thread[i].hpagesize = hpagesize;
+ memset_thread[i].policy = policy;
+ memset_thread[i].node_bitmap = node_bitmap;
+ memset_thread[i].max_node = max_node;
qemu_thread_create(&memset_thread[i].pgthread, "touch_pages",
do_touch_pages, &memset_thread[i],
QEMU_THREAD_JOINABLE);
@@ -554,7 +575,8 @@ static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
}
void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus,
- Error **errp)
+ int policy, unsigned long *node_bitmap,
+ unsigned long max_node, Error **errp)
{
int ret;
struct sigaction act, oldact;
@@ -573,7 +595,8 @@ void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus,
}
/* touch pages simultaneously */
- if (touch_all_pages(area, hpagesize, numpages, smp_cpus)) {
+ if (touch_all_pages(area, hpagesize, numpages, smp_cpus, policy,
+ node_bitmap, max_node)) {
error_setg(errp, "os_mem_prealloc: Insufficient free host memory "
"pages available to allocate guest RAM");
}
@@ -371,7 +371,8 @@ int getpagesize(void)
}
void os_mem_prealloc(int fd, char *area, size_t memory, int smp_cpus,
- Error **errp)
+ int policy, unsigned long *node_bitmap,
+ unsigned long max_node, Error **errp)
{
int i;
size_t pagesize = qemu_real_host_page_size;
This is needed for cases where we want to make sure that a shared memory region gets allocated from a specific NUMA node. This is impossible to do with mbind(2) because it ignores the policy for memory mapped with MAP_SHARED. We work around this by calling set_mempolicy from prealloc threads instead. Signed-off-by: Daniil Tatianin <d-tatianin@yandex-team.ru> --- backends/hostmem.c | 6 ++++-- include/qemu/osdep.h | 3 ++- util/meson.build | 2 ++ util/oslib-posix.c | 29 ++++++++++++++++++++++++++--- util/oslib-win32.c | 3 ++- 5 files changed, 36 insertions(+), 7 deletions(-)