@@ -460,6 +460,20 @@ static void machine_set_mem_merge(Object *obj, bool value, Error **errp)
ms->mem_merge = value;
}
+static int machine_get_anon_alloc(Object *obj, Error **errp)
+{
+ MachineState *ms = MACHINE(obj);
+
+ return ms->anon_alloc;
+}
+
+static void machine_set_anon_alloc(Object *obj, int value, Error **errp)
+{
+ MachineState *ms = MACHINE(obj);
+
+ ms->anon_alloc = value;
+}
+
static bool machine_get_usb(Object *obj, Error **errp)
{
MachineState *ms = MACHINE(obj);
@@ -1078,6 +1092,11 @@ static void machine_class_init(ObjectClass *oc, void *data)
object_class_property_set_description(oc, "mem-merge",
"Enable/disable memory merge support");
+ object_class_property_add_enum(oc, "anon-alloc", "AnonAllocOption",
+ &AnonAllocOption_lookup,
+ machine_get_anon_alloc,
+ machine_set_anon_alloc);
+
object_class_property_add_bool(oc, "usb",
machine_get_usb, machine_set_usb);
object_class_property_set_description(oc, "usb",
@@ -393,6 +393,7 @@ struct MachineState {
bool enable_graphics;
ConfidentialGuestSupport *cgs;
HostMemoryBackend *memdev;
+ AnonAllocOption anon_alloc;
/*
* convenience alias to ram_memdev_id backend memory region
* or to numa container memory region
@@ -1898,3 +1898,17 @@
{ 'command': 'x-query-interrupt-controllers',
'returns': 'HumanReadableText',
'features': [ 'unstable' ]}
+
+##
+# @AnonAllocOption:
+#
+# An enumeration of the options for allocating anonymous guest memory.
+#
+# @mmap: allocate using mmap MAP_ANON
+#
+# @memfd: allocate using memfd_create
+#
+# Since: 9.2
+##
+{ 'enum': 'AnonAllocOption',
+ 'data': [ 'mmap', 'memfd' ] }
@@ -38,6 +38,7 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
" nvdimm=on|off controls NVDIMM support (default=off)\n"
" memory-encryption=@var{} memory encryption object to use (default=none)\n"
" hmat=on|off controls ACPI HMAT support (default=off)\n"
+ " anon-alloc=mmap|memfd allocate anonymous guest RAM using mmap MAP_ANON or memfd_create (default: mmap)\n"
" memory-backend='backend-id' specifies explicitly provided backend for main RAM (default=none)\n"
" cxl-fmw.0.targets.0=firsttarget,cxl-fmw.0.targets.1=secondtarget,cxl-fmw.0.size=size[,cxl-fmw.0.interleave-granularity=granularity]\n",
QEMU_ARCH_ALL)
@@ -101,6 +102,16 @@ SRST
Enables or disables ACPI Heterogeneous Memory Attribute Table
(HMAT) support. The default is off.
+ ``anon-alloc=mmap|memfd``
+ Allocate anonymous guest RAM using mmap MAP_ANON (the default)
+ or memfd_create. This option applies to memory allocated as a
+ side effect of creating various devices. It does not apply to
+ memory-backend-objects, whether explicitly specified on the
+ command line, or implicitly created by the -m command line
+ option.
+
+ Some migration modes require anon-alloc=memfd.
+
``memory-backend='id'``
An alternative to legacy ``-mem-path`` and ``mem-prealloc`` options.
Allows to use a memory backend as main RAM.
@@ -47,6 +47,7 @@
#include "qemu/qemu-print.h"
#include "qemu/log.h"
#include "qemu/memalign.h"
+#include "qemu/memfd.h"
#include "exec/memory.h"
#include "exec/ioport.h"
#include "sysemu/dma.h"
@@ -69,6 +70,8 @@
#include "qemu/pmem.h"
+#include "qapi/qapi-types-migration.h"
+#include "migration/options.h"
#include "migration/vmstate.h"
#include "qemu/range.h"
@@ -1849,6 +1852,35 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
qemu_mutex_unlock_ramlist();
return;
}
+
+ } else if (current_machine->anon_alloc == ANON_ALLOC_OPTION_MEMFD &&
+ !object_dynamic_cast(new_block->mr->parent_obj.parent,
+ TYPE_MEMORY_BACKEND)) {
+ size_t max_length = new_block->max_length;
+ MemoryRegion *mr = new_block->mr;
+ const char *name = memory_region_name(mr);
+
+ new_block->mr->align = QEMU_VMALLOC_ALIGN;
+ new_block->flags |= RAM_SHARED;
+
+ if (new_block->fd == -1) {
+ new_block->fd = qemu_memfd_create(name, max_length + mr->align,
+ 0, 0, 0, errp);
+ }
+
+ if (new_block->fd >= 0) {
+ int mfd = new_block->fd;
+ qemu_set_cloexec(mfd);
+ new_block->host = file_ram_alloc(new_block, max_length, mfd,
+ false, 0, errp);
+ }
+ if (!new_block->host) {
+ qemu_mutex_unlock_ramlist();
+ return;
+ }
+ memory_try_enable_merging(new_block->host, new_block->max_length);
+ free_on_error = true;
+
} else {
new_block->host = qemu_anon_ram_alloc(new_block->max_length,
&new_block->mr->align,
@@ -1932,6 +1964,9 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
ram_block_notify_add(new_block->host, new_block->used_length,
new_block->max_length);
}
+ trace_ram_block_add(memory_region_name(new_block->mr), new_block->flags,
+ new_block->fd, new_block->used_length,
+ new_block->max_length);
return;
out_free:
@@ -47,3 +47,6 @@ dirtylimit_vcpu_execute(int cpu_index, int64_t sleep_time_us) "CPU[%d] sleep %"P
# cpu-throttle.c
cpu_throttle_set(int new_throttle_pct) "set guest CPU throttled by %d%%"
+
+#physmem.c
+ram_block_add(const char *name, uint32_t flags, int fd, size_t used_length, size_t max_length) "%s, flags %u, fd %d, len %lu, maxlen %lu"
Allocate anonymous memory using mmap MAP_ANON or memfd_create depending on the value of the anon-alloc machine property. This option applies to memory allocated as a side effect of creating various devices. It does not apply to memory-backend-objects, whether explicitly specified on the command line, or implicitly created by the -m command line option. The memfd option is intended to support new migration modes, in which the memory region can be transferred in place to a new QEMU process, by sending the memfd file descriptor to the process. Memory contents are preserved, and if the mode also transfers device descriptors, then pages that are locked in memory for DMA remain locked. This behavior is a pre-requisite for supporting vfio, vdpa, and iommufd devices with the new modes. Signed-off-by: Steve Sistare <steven.sistare@oracle.com> --- hw/core/machine.c | 19 +++++++++++++++++++ include/hw/boards.h | 1 + qapi/machine.json | 14 ++++++++++++++ qemu-options.hx | 11 +++++++++++ system/physmem.c | 35 +++++++++++++++++++++++++++++++++++ system/trace-events | 3 +++ 6 files changed, 83 insertions(+)