@@ -2358,6 +2358,27 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
return ms->possible_cpus;
}
+static bool virt_nvdimm_validate(const MachineState *ms, NVDIMMDevice *nvdimm,
+ Error **errp)
+{
+ NvdimmSyncModes sync;
+
+ if (!ms->nvdimms_state->is_enabled) {
+ error_setg(errp, "nvdimm is not enabled: add 'nvdimm=on' to '-M'");
+ return false;
+ }
+
+ sync = object_property_get_enum(OBJECT(nvdimm), NVDIMM_SYNC_DAX_PROP,
+ "NvdimmSyncModes", &error_abort);
+ if (sync == NVDIMM_SYNC_MODES_WRITEBACK) {
+ error_setg(errp, "NVDIMM device " NVDIMM_SYNC_DAX_PROP
+ "=%s mode unsupported", NvdimmSyncModes_str(sync));
+ return false;
+ }
+
+ return true;
+}
+
static void virt_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
Error **errp)
{
@@ -2376,9 +2397,10 @@ static void virt_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
return;
}
- if (is_nvdimm && !ms->nvdimms_state->is_enabled) {
- error_setg(errp, "nvdimm is not enabled: add 'nvdimm=on' to '-M'");
- return;
+ if (is_nvdimm) {
+ if (!virt_nvdimm_validate(ms, NVDIMM(dev), errp)) {
+ return;
+ }
}
pc_dimm_pre_plug(PC_DIMM(dev), MACHINE(hotplug_dev), NULL, errp);
@@ -1211,6 +1211,27 @@ void pc_i8259_create(ISABus *isa_bus, qemu_irq *i8259_irqs)
g_free(i8259);
}
+static bool pc_nvdimm_validate(const MachineState *ms, NVDIMMDevice *nvdimm,
+ Error **errp)
+{
+ NvdimmSyncModes sync;
+
+ if (!ms->nvdimms_state->is_enabled) {
+ error_setg(errp, "nvdimm is not enabled: add 'nvdimm=on' to '-M'");
+ return false;
+ }
+
+ sync = object_property_get_enum(OBJECT(nvdimm), NVDIMM_SYNC_DAX_PROP,
+ "NvdimmSyncModes", &error_abort);
+ if (sync == NVDIMM_SYNC_MODES_WRITEBACK) {
+ error_setg(errp, "NVDIMM device " NVDIMM_SYNC_DAX_PROP
+ "=%s mode unsupported", NvdimmSyncModes_str(sync));
+ return false;
+ }
+
+ return true;
+}
+
static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
Error **errp)
{
@@ -1233,9 +1254,10 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
return;
}
- if (is_nvdimm && !ms->nvdimms_state->is_enabled) {
- error_setg(errp, "nvdimm is not enabled: missing 'nvdimm' in '-M'");
- return;
+ if (is_nvdimm) {
+ if (!pc_nvdimm_validate(ms, NVDIMM(dev), errp)) {
+ return;
+ }
}
hotplug_handler_pre_plug(x86ms->acpi_dev, dev, &local_err);
@@ -96,6 +96,19 @@ static void nvdimm_set_uuid(Object *obj, Visitor *v, const char *name,
g_free(value);
}
+static int nvdimm_get_sync_mode(Object *obj, Error **errp G_GNUC_UNUSED)
+{
+ NVDIMMDevice *nvdimm = NVDIMM(obj);
+
+ return nvdimm->sync_dax;
+}
+
+static void nvdimm_set_sync_mode(Object *obj, int mode, Error **errp)
+{
+ NVDIMMDevice *nvdimm = NVDIMM(obj);
+
+ nvdimm->sync_dax = mode;
+}
static void nvdimm_init(Object *obj)
{
@@ -105,6 +118,13 @@ static void nvdimm_init(Object *obj)
object_property_add(obj, NVDIMM_UUID_PROP, "QemuUUID", nvdimm_get_uuid,
nvdimm_set_uuid, NULL, NULL);
+
+ object_property_add_enum(obj, NVDIMM_SYNC_DAX_PROP, "NvdimmSyncModes",
+ &NvdimmSyncModes_lookup, nvdimm_get_sync_mode,
+ nvdimm_set_sync_mode);
+ object_property_set_description(obj, NVDIMM_SYNC_DAX_PROP,
+ "Set the Synchronus DAX mode");
+
}
static void nvdimm_finalize(Object *obj)
@@ -119,6 +139,9 @@ static void nvdimm_prepare_memory_region(NVDIMMDevice *nvdimm, Error **errp)
PCDIMMDevice *dimm = PC_DIMM(nvdimm);
uint64_t align, pmem_size, size;
MemoryRegion *mr;
+ HostMemoryBackend *hostmem;
+ bool is_file_backed;
+ bool __attribute__((unused)) is_pmem = false;
g_assert(!nvdimm->nvdimm_mr);
@@ -135,9 +158,8 @@ static void nvdimm_prepare_memory_region(NVDIMMDevice *nvdimm, Error **errp)
nvdimm->label_data = memory_region_get_ram_ptr(mr) + pmem_size;
pmem_size = QEMU_ALIGN_DOWN(pmem_size, align);
+ hostmem = dimm->hostmem;
if (size <= nvdimm->label_size || !pmem_size) {
- HostMemoryBackend *hostmem = dimm->hostmem;
-
error_setg(errp, "the size of memdev %s (0x%" PRIx64 ") is too "
"small to contain nvdimm label (0x%" PRIx64 ") and "
"aligned PMEM (0x%" PRIx64 ")",
@@ -147,14 +169,36 @@ static void nvdimm_prepare_memory_region(NVDIMMDevice *nvdimm, Error **errp)
}
if (!nvdimm->unarmed && memory_region_is_rom(mr)) {
- HostMemoryBackend *hostmem = dimm->hostmem;
-
error_setg(errp, "'unarmed' property must be off since memdev %s "
"is read-only",
object_get_canonical_path_component(OBJECT(hostmem)));
return;
}
+ is_file_backed = (memory_region_get_fd(mr) > 0);
+ if (nvdimm->sync_dax == NVDIMM_SYNC_MODES_WRITEBACK && !is_file_backed) {
+ error_setg(errp, NVDIMM_SYNC_DAX_PROP"='%s' mode requires the "
+ "memdev %s to be file backed",
+ NvdimmSyncModes_str(nvdimm->sync_dax),
+ object_get_canonical_path_component(OBJECT(hostmem)));
+ return;
+ }
+
+#ifdef CONFIG_LIBPMEM
+ if (is_file_backed) {
+ is_pmem = object_property_get_bool(OBJECT(hostmem), "pmem",
+ &error_abort);
+ }
+
+ if (nvdimm->sync_dax == NVDIMM_SYNC_MODES_DIRECT && !is_pmem) {
+ error_setg(errp, "NVDIMM device "NVDIMM_SYNC_DAX_PROP"=%s mode requires"
+ " the memory backend device to be synchronous DAX capable. "
+ "Indicate it so with pmem=yes for the corresponding "
+ "memory-backend-file.",
+ NvdimmSyncModes_str(nvdimm->sync_dax));
+ }
+#endif
+
nvdimm->nvdimm_mr = g_new(MemoryRegion, 1);
memory_region_init_alias(nvdimm->nvdimm_mr, OBJECT(dimm),
"nvdimm-memory", mr, 0, pmem_size);
@@ -4616,6 +4616,11 @@ static void spapr_machine_latest_class_options(MachineClass *mc)
static void spapr_machine_6_0_class_options(MachineClass *mc)
{
/* Defaults for the latest behaviour inherited from the base class */
+ static GlobalProperty compat[] = {
+ { "nvdimm", "sync-dax", "writeback" },
+ };
+
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
}
DEFINE_SPAPR_MACHINE(6_0, "6.0", true);
@@ -4625,8 +4630,13 @@ DEFINE_SPAPR_MACHINE(6_0, "6.0", true);
*/
static void spapr_machine_5_2_class_options(MachineClass *mc)
{
+ static GlobalProperty compat[] = {
+ { "nvdimm", "sync-dax", "unsafe" },
+ };
+
spapr_machine_6_0_class_options(mc);
compat_props_add(mc->compat_props, hw_compat_5_2, hw_compat_5_2_len);
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
}
DEFINE_SPAPR_MACHINE(5_2, "5.2", false);
@@ -50,6 +50,10 @@ bool spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
{
const MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
const MachineState *ms = MACHINE(hotplug_dev);
+ PCDIMMDevice __attribute__((unused)) *dimm = PC_DIMM(nvdimm);
+ MemoryRegion __attribute__((unused)) *mr;
+ bool __attribute__((unused)) is_pmem = false;
+ NvdimmSyncModes __attribute__((unused)) sync;
g_autofree char *uuidstr = NULL;
QemuUUID uuid;
int ret;
@@ -77,6 +81,24 @@ bool spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
return false;
}
+#ifdef CONFIG_LIBPMEM
+ sync = object_property_get_enum(OBJECT(nvdimm), NVDIMM_SYNC_DAX_PROP,
+ "NvdimmSyncModes", &error_abort);
+
+ mr = host_memory_backend_get_memory(dimm->hostmem);
+ if (memory_region_get_fd(mr) > 0) { /* memor-backend-file */
+ HostMemoryBackend *backend = MEMORY_BACKEND(dimm->hostmem);
+ is_pmem = object_property_get_bool(OBJECT(backend), "pmem",
+ &error_abort);
+ }
+
+ if (sync == NVDIMM_SYNC_MODES_WRITEBACK && is_pmem) {
+ warn_report("The NVDIMM backing device being Synchronous DAX capable, "
+ NVDIMM_SYNC_DAX_PROP"='%s' is unnecessary as the backend "
+ "ensures the safety already.", NvdimmSyncModes_str(sync));
+ }
+#endif
+
uuidstr = object_property_get_str(OBJECT(nvdimm), NVDIMM_UUID_PROP,
&error_abort);
ret = qemu_uuid_parse(uuidstr, &uuid);
@@ -124,6 +146,9 @@ static int spapr_dt_nvdimm(SpaprMachineState *spapr, void *fdt,
uint64_t lsize = nvdimm->label_size;
uint64_t size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
NULL);
+ NvdimmSyncModes sync_dax = object_property_get_enum(OBJECT(nvdimm),
+ NVDIMM_SYNC_DAX_PROP,
+ "NvdimmSyncModes", &error_abort);
drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM, slot);
g_assert(drc);
@@ -158,6 +183,11 @@ static int spapr_dt_nvdimm(SpaprMachineState *spapr, void *fdt,
"operating-system")));
_FDT(fdt_setprop(fdt, child_offset, "ibm,cache-flush-required", NULL, 0));
+ if (sync_dax == NVDIMM_SYNC_MODES_WRITEBACK) {
+ _FDT(fdt_setprop(fdt, child_offset, "ibm,hcall-flush-required",
+ NULL, 0));
+ }
+
return child_offset;
}
@@ -566,6 +596,8 @@ static target_ulong h_scm_flush(PowerPCCPU *cpu, SpaprMachineState *spapr,
uint64_t continue_token = args[1];
SpaprDrc *drc = spapr_drc_by_index(drc_index);
PCDIMMDevice *dimm;
+ NVDIMMDevice *nvdimm;
+ NvdimmSyncModes sync_dax;
HostMemoryBackend *backend = NULL;
SpaprNVDIMMDeviceFlushState *state;
ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
@@ -575,6 +607,13 @@ static target_ulong h_scm_flush(PowerPCCPU *cpu, SpaprMachineState *spapr,
return H_PARAMETER;
}
+ nvdimm = NVDIMM(drc->dev);
+ sync_dax = object_property_get_enum(OBJECT(nvdimm), NVDIMM_SYNC_DAX_PROP,
+ "NvdimmSyncModes", &error_abort);
+ if (sync_dax != NVDIMM_SYNC_MODES_WRITEBACK) {
+ return H_UNSUPPORTED;
+ }
+
if (continue_token != 0) {
goto get_status;
}
@@ -28,6 +28,7 @@
#include "qemu/uuid.h"
#include "hw/acpi/aml-build.h"
#include "qom/object.h"
+#include "qapi/qapi-types-machine.h"
#define NVDIMM_DEBUG 0
#define nvdimm_debug(fmt, ...) \
@@ -51,6 +52,7 @@ OBJECT_DECLARE_TYPE(NVDIMMDevice, NVDIMMClass, NVDIMM)
#define NVDIMM_LABEL_SIZE_PROP "label-size"
#define NVDIMM_UUID_PROP "uuid"
#define NVDIMM_UNARMED_PROP "unarmed"
+#define NVDIMM_SYNC_DAX_PROP "sync-dax"
struct NVDIMMDevice {
/* private */
@@ -85,6 +87,15 @@ struct NVDIMMDevice {
*/
bool unarmed;
+ /*
+ * The 'writeback' value would indicate the guest to make explicit
+ * flush requests to hypervisor. When 'direct', the device is
+ * assumed to be synchronous DAX capable and no explicit flush
+ * is required. 'unsafe' indicates flush semantics unimplemented
+ * and the data persistence not guaranteed in power failure scenarios.
+ */
+ NvdimmSyncModes sync_dax;
+
/*
* The PPC64 - spapr requires each nvdimm device have a uuid.
*/
@@ -332,6 +332,7 @@ struct SpaprMachineState {
#define H_P7 -60
#define H_P8 -61
#define H_P9 -62
+#define H_UNSUPPORTED -67
#define H_OVERLAP -68
#define H_UNSUPPORTED_FLAG -256
#define H_MULTI_THREADS_ACTIVE -9005
@@ -197,3 +197,23 @@
{ 'enum': 'GrabToggleKeys',
'data': [ 'ctrl-ctrl', 'alt-alt', 'shift-shift','meta-meta', 'scrolllock',
'ctrl-scrolllock' ] }
+
+##
+# @NvdimmSyncModes:
+#
+# Indicates the mode of flush to be used to ensure persistence in case
+# of power failures.
+#
+# @unsafe: This is to indicate, the data on the backend device not be
+# consistent in power failure scenarios.
+# @direct: This is to indicate the backend device supports synchronous DAX
+# and no explicit flush requests from the guest is required.
+# @writeback: To be used when the backend device doesn't support synchronous
+# DAX. The hypervisor issues flushes to the disk when requested
+# by the guest.
+# Since: 6.0
+#
+##
+{ 'enum': 'NvdimmSyncModes',
+ 'data': [ 'unsafe', 'writeback',
+ { 'name': 'direct', 'if': 'defined(CONFIG_LIBPMEM)' } ] }
The patch adds the 'sync-dax' property to the nvdimm device. When the sync-dax is 'direct' indicates the backend is synchronous DAX capable and no explicit flush requests are required. When the mode is set to 'writeback' it indicates the backend is not synhronous DAX capable and explicit flushes to Hypervisor are required. On PPC where the flush requests from guest can be honoured by the qemu, the 'writeback' mode is supported and set as the default. The device tree property "hcall-flush-required" is added to the nvdimm node which makes the guest to issue H_SCM_FLUSH hcalls to request for flushes explicitly. This would be the default behaviour without sync-dax property set for the nvdimm device. For old pSeries machine, the default is 'unsafe'. For non-PPC platforms, the mode is set to 'unsafe' as the default. Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com> --- hw/arm/virt.c | 28 +++++++++++++++++++++++-- hw/i386/pc.c | 28 +++++++++++++++++++++++-- hw/mem/nvdimm.c | 52 +++++++++++++++++++++++++++++++++++++++++++---- hw/ppc/spapr.c | 10 +++++++++ hw/ppc/spapr_nvdimm.c | 39 +++++++++++++++++++++++++++++++++++ include/hw/mem/nvdimm.h | 11 ++++++++++ include/hw/ppc/spapr.h | 1 + qapi/common.json | 20 ++++++++++++++++++ 8 files changed, 179 insertions(+), 10 deletions(-)