@@ -10,11 +10,20 @@
* See the COPYING file in the top-level directory.
*/
#include "sysemu/hostmem.h"
-#include "sysemu/sysemu.h"
#include "qapi/visitor.h"
+#include "qapi-types.h"
+#include "qapi-visit.h"
#include "qapi/qmp/qerror.h"
#include "qom/object_interfaces.h"
+#ifdef CONFIG_NUMA
+#include <numaif.h>
+QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
+QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
+QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
+QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
+#endif
+
static void
host_memory_backend_get_size(Object *obj, Visitor *v, void *opaque,
const char *name, Error **errp)
@@ -49,6 +58,84 @@ host_memory_backend_set_size(Object *obj, Visitor *v, void *opaque,
backend->size = value;
}
+static void
+get_host_nodes(Object *obj, Visitor *v, void *opaque, const char *name,
+ Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+ uint16List *host_nodes = NULL;
+ uint16List **node = &host_nodes;
+ unsigned long value;
+
+ value = find_first_bit(backend->host_nodes, MAX_NODES);
+ if (value == MAX_NODES) {
+ return;
+ }
+
+ *node = g_malloc0(sizeof(**node));
+ (*node)->value = value;
+ node = &(*node)->next;
+
+ do {
+ value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
+ if (value == MAX_NODES) {
+ break;
+ }
+
+ *node = g_malloc0(sizeof(**node));
+ (*node)->value = value;
+ node = &(*node)->next;
+ } while (true);
+
+ visit_type_uint16List(v, &host_nodes, name, errp);
+}
+
+static void
+set_host_nodes(Object *obj, Visitor *v, void *opaque, const char *name,
+ Error **errp)
+{
+#ifdef CONFIG_NUMA
+ HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+ uint16List *l = NULL;
+
+ visit_type_uint16List(v, &l, name, errp);
+
+ while (l) {
+ bitmap_set(backend->host_nodes, l->value, 1);
+ l = l->next;
+ }
+#else
+ error_setg(errp, "NUMA node binding are not supported by this QEMU");
+#endif
+}
+
+static void
+get_policy(Object *obj, Visitor *v, void *opaque, const char *name,
+ Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+ int policy = backend->policy;
+
+ visit_type_enum(v, &policy, HostMemPolicy_lookup, NULL, name, errp);
+}
+
+static void
+set_policy(Object *obj, Visitor *v, void *opaque, const char *name,
+ Error **errp)
+{
+ HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+ int policy;
+
+ visit_type_enum(v, &policy, HostMemPolicy_lookup, NULL, name, errp);
+ backend->policy = policy;
+
+#ifndef CONFIG_NUMA
+ if (policy != HOST_MEM_POLICY_DEFAULT) {
+ error_setg(errp, "NUMA policies are not supported by this QEMU");
+ }
+#endif
+}
+
static bool host_memory_backend_get_merge(Object *obj, Error **errp)
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
@@ -159,6 +246,12 @@ static void host_memory_backend_initfn(Object *obj)
object_property_add(obj, "size", "int",
host_memory_backend_get_size,
host_memory_backend_set_size, NULL, NULL, NULL);
+ object_property_add(obj, "host-nodes", "int",
+ get_host_nodes,
+ set_host_nodes, NULL, NULL, NULL);
+ object_property_add(obj, "policy", "str",
+ get_policy,
+ set_policy, NULL, NULL, NULL);
}
static void host_memory_backend_finalize(Object *obj)
@@ -200,6 +293,36 @@ host_memory_backend_memory_init(UserCreatable *uc, Error **errp)
if (!backend->dump) {
qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
}
+#ifdef CONFIG_NUMA
+ unsigned long maxnode = find_last_bit(backend->host_nodes, MAX_NODES);
+
+ /* check for invalid host-nodes and policies and give more verbose
+ * error messages than mbind(). */
+ if (maxnode != MAX_NODES && backend->policy == MPOL_DEFAULT) {
+ error_setg(errp, "host-nodes must be empty for policy default,"
+ " or you should explicitly specify a policy other"
+ " than default");
+ return;
+ } else if (maxnode == MAX_NODES && backend->policy != MPOL_DEFAULT) {
+ error_setg(errp, "host-nodes must be set for policy %s",
+ HostMemPolicy_lookup[backend->policy]);
+ return;
+ }
+
+ /* This is a workaround for a long standing bug in Linux'
+ * mbind implementation, which cuts off the last specified
+ * node.
+ */
+ if (mbind(ptr, sz, backend->policy, backend->host_nodes, maxnode + 2, 0)) {
+ error_setg_errno(errp, errno,
+ "cannot bind memory to host NUMA nodes");
+ return;
+ }
+#endif
+ /* Preallocate memory after the NUMA policy has been instantiated.
+ * This is necessary to guarantee memory is allocated with
+ * specified NUMA policy in place.
+ */
if (backend->prealloc) {
os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz);
}
@@ -12,8 +12,10 @@
#ifndef QEMU_HOSTMEM_H
#define QEMU_HOSTMEM_H
+#include "sysemu/sysemu.h" /* for MAX_NODES */
#include "qom/object.h"
#include "exec/memory.h"
+#include "qemu/bitmap.h"
#define TYPE_MEMORY_BACKEND "memory"
#define MEMORY_BACKEND(obj) \
@@ -52,6 +54,8 @@ struct HostMemoryBackend {
uint64_t size;
bool merge, dump;
bool prealloc, force_prealloc;
+ DECLARE_BITMAP(host_nodes, MAX_NODES);
+ HostMemPolicy policy;
MemoryRegion mr;
};
@@ -4737,3 +4737,23 @@
'*cpus': ['uint16'],
'*mem': 'size',
'*memdev': 'str' }}
+
+##
+# @HostMemPolicy
+#
+# Host memory policy types
+#
+# @default: restore default policy, remove any nondefault policy
+#
+# @preferred: set the preferred host nodes for allocation
+#
+# @bind: a strict policy that restricts memory allocation to the
+# host nodes specified
+#
+# @interleave: memory allocations are interleaved across the set
+# of host nodes specified
+#
+# Since 2.1
+##
+{ 'enum': 'HostMemPolicy',
+ 'data': [ 'default', 'preferred', 'bind', 'interleave' ] }