@@ -85,13 +85,93 @@ unsigned int memory_devices_get_reserved_memslots(void)
return get_reserved_memslots(current_machine);
}
+bool memory_devices_memslot_auto_decision_active(void)
+{
+ if (!current_machine->device_memory) {
+ return false;
+ }
+
+ return current_machine->device_memory->memslot_auto_decision_active;
+}
+
+static unsigned int memory_device_memslot_decision_limit(MachineState *ms,
+ MemoryRegion *mr)
+{
+ const unsigned int reserved = get_reserved_memslots(ms);
+ const uint64_t size = memory_region_size(mr);
+ unsigned int max = vhost_get_max_memslots();
+ unsigned int free = vhost_get_free_memslots();
+ uint64_t available_space;
+ unsigned int memslots;
+
+ if (kvm_enabled()) {
+ max = MIN(max, kvm_get_max_memslots());
+ free = MIN(free, kvm_get_free_memslots());
+ }
+
+ /*
+ * If we only have less overall memslots than what we consider reasonable,
+ * just keep it to a minimum.
+ */
+ if (max < MEMORY_DEVICES_SAFE_MAX_MEMSLOTS) {
+ return 1;
+ }
+
+ /*
+ * Consider our soft-limit across all memory devices. We don't really
+ * expect to exceed this limit in reasonable configurations.
+ */
+ if (MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT <=
+ ms->device_memory->required_memslots) {
+ return 1;
+ }
+ memslots = MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT -
+ ms->device_memory->required_memslots;
+
+ /*
+ * Consider the actually still free memslots. This is only relevant if
+ * other memslot consumers would consume *significantly* more memslots than
+ * what we prepared for (> 253). Unlikely, but let's just handle it
+ * cleanly.
+ */
+ memslots = MIN(memslots, free - reserved);
+ if (memslots < 1 || unlikely(free < reserved)) {
+ return 1;
+ }
+
+ /* We cannot have any other memory devices? So give all to this device. */
+ if (size == ms->maxram_size - ms->ram_size) {
+ return memslots;
+ }
+
+ /*
+ * Simple heuristic: equally distribute the memslots over the space
+ * still available for memory devices.
+ */
+ available_space = ms->maxram_size - ms->ram_size -
+ ms->device_memory->used_region_size;
+ memslots = (double)memslots * size / available_space;
+ return memslots < 1 ? 1 : memslots;
+}
+
static void memory_device_check_addable(MachineState *ms, MemoryDeviceState *md,
MemoryRegion *mr, Error **errp)
{
+ const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
const uint64_t used_region_size = ms->device_memory->used_region_size;
const uint64_t size = memory_region_size(mr);
- const unsigned int required_memslots = memory_device_get_memslots(md);
const unsigned int reserved_memslots = get_reserved_memslots(ms);
+ unsigned int required_memslots, memslot_limit;
+
+ /*
+ * Instruct the device to decide how many memslots to use, if applicable,
+ * before we query the number of required memslots the first time.
+ */
+ if (mdc->decide_memslots) {
+ memslot_limit = memory_device_memslot_decision_limit(ms, mr);
+ mdc->decide_memslots(md, memslot_limit);
+ }
+ required_memslots = memory_device_get_memslots(md);
/* we will need memory slots for kvm and vhost */
if (kvm_enabled() &&
@@ -300,6 +380,7 @@ out:
void memory_device_plug(MemoryDeviceState *md, MachineState *ms)
{
const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
+ const unsigned int memslots = memory_device_get_memslots(md);
const uint64_t addr = mdc->get_addr(md);
MemoryRegion *mr;
@@ -311,7 +392,11 @@ void memory_device_plug(MemoryDeviceState *md, MachineState *ms)
g_assert(ms->device_memory);
ms->device_memory->used_region_size += memory_region_size(mr);
- ms->device_memory->required_memslots += memory_device_get_memslots(md);
+ ms->device_memory->required_memslots += memslots;
+ if (mdc->decide_memslots && memslots > 1) {
+ ms->device_memory->memslot_auto_decision_active++;
+ }
+
memory_region_add_subregion(&ms->device_memory->mr,
addr - ms->device_memory->base, mr);
trace_memory_device_plug(DEVICE(md)->id ? DEVICE(md)->id : "", addr);
@@ -320,6 +405,7 @@ void memory_device_plug(MemoryDeviceState *md, MachineState *ms)
void memory_device_unplug(MemoryDeviceState *md, MachineState *ms)
{
const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
+ const unsigned int memslots = memory_device_get_memslots(md);
MemoryRegion *mr;
/*
@@ -330,8 +416,12 @@ void memory_device_unplug(MemoryDeviceState *md, MachineState *ms)
g_assert(ms->device_memory);
memory_region_del_subregion(&ms->device_memory->mr, mr);
+
+ if (mdc->decide_memslots && memslots > 1) {
+ ms->device_memory->memslot_auto_decision_active--;
+ }
ms->device_memory->used_region_size -= memory_region_size(mr);
- ms->device_memory->required_memslots -= memory_device_get_memslots(md);
+ ms->device_memory->required_memslots -= memslots;
trace_memory_device_unplug(DEVICE(md)->id ? DEVICE(md)->id : "",
mdc->get_addr(md));
}
@@ -1462,6 +1462,19 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
goto fail;
}
+ limit = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
+ if (limit < MEMORY_DEVICES_SAFE_MAX_MEMSLOTS &&
+ memory_devices_memslot_auto_decision_active()) {
+ error_setg(errp, "some memory device (like virtio-mem)"
+ " decided how many memory slots to use based on the overall"
+ " number of memory slots; this vhost backend would further"
+ " restricts the overall number of memory slots");
+ error_append_hint(errp, "Try plugging this vhost backend before"
+ " plugging such memory devices.\n");
+ r = -EINVAL;
+ goto fail;
+ }
+
for (i = 0; i < hdev->nvqs; ++i, ++n_initialized_vqs) {
r = vhost_virtqueue_init(hdev, hdev->vqs + i, hdev->vq_index + i);
if (r < 0) {
@@ -1548,7 +1561,6 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
* but we'll need additional information about the reservations.
*/
reserved = memory_devices_get_reserved_memslots();
- limit = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
if (used + reserved > limit) {
error_setg(errp, "vhost backend memory slots limit (%d) is less"
" than current number of used (%d) and reserved (%d)"
@@ -304,6 +304,9 @@ struct MachineClass {
* @used_region_size: the part of @mr already used by memory devices
* @required_memslots: the number of memslots required by memory devices
* @used_memslots: the number of memslots currently used by memory devices
+ * @memslot_auto_decision_active: whether any plugged memory device
+ * automatically decided to use more than
+ * one memslot
*/
typedef struct DeviceMemoryState {
hwaddr base;
@@ -314,6 +317,7 @@ typedef struct DeviceMemoryState {
uint64_t used_region_size;
unsigned int required_memslots;
unsigned int used_memslots;
+ unsigned int memslot_auto_decision_active;
} DeviceMemoryState;
/**
@@ -14,6 +14,7 @@
#define MEMORY_DEVICE_H
#include "hw/qdev-core.h"
+#include "qemu/typedefs.h"
#include "qapi/qapi-types-machine.h"
#include "qom/object.h"
@@ -99,6 +100,15 @@ struct MemoryDeviceClass {
*/
MemoryRegion *(*get_memory_region)(MemoryDeviceState *md, Error **errp);
+ /*
+ * Optional: Instruct the memory device to decide how many memory slots
+ * it requires, not exceeding the given limit.
+ *
+ * Called exactly once when pre-plugging the memory device, before
+ * querying the number of memslots using @get_memslots the first time.
+ */
+ void (*decide_memslots)(MemoryDeviceState *md, unsigned int limit);
+
/*
* Optional for memory devices that require only a single memslot,
* required for all other memory devices: Return the number of memslots
@@ -129,9 +139,31 @@ struct MemoryDeviceClass {
MemoryDeviceInfo *info);
};
+/*
+ * Traditionally, KVM/vhost in many setups supported 509 memslots, whereby
+ * 253 memslots were "reserved" for boot memory and other devices (such
+ * as PCI BARs, which can get mapped dynamically) and 256 memslots were
+ * dedicated for DIMMs. These magic numbers worked reliably in the past.
+ *
+ * Further, using many memslots can negatively affect performance, so setting
+ * the soft-limit of memslots used by memory devices to the traditional
+ * DIMM limit of 256 sounds reasonable.
+ *
+ * If we have less than 509 memslots, we will instruct memory devices that
+ * support automatically deciding how many memslots to use to only use a single
+ * one.
+ *
+ * Hotplugging vhost devices with at least 509 memslots is not expected to
+ * cause problems, not even when devices automatically decided how many memslots
+ * to use.
+ */
+#define MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT 256
+#define MEMORY_DEVICES_SAFE_MAX_MEMSLOTS 509
+
MemoryDeviceInfoList *qmp_memory_device_list(void);
uint64_t get_plugged_memory_size(void);
unsigned int memory_devices_get_reserved_memslots(void);
+bool memory_devices_memslot_auto_decision_active(void);
void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms,
const uint64_t *legacy_align, Error **errp);
void memory_device_plug(MemoryDeviceState *md, MachineState *ms);
@@ -15,3 +15,8 @@ unsigned int memory_devices_get_reserved_memslots(void)
{
return 0;
}
+
+bool memory_devices_memslot_auto_decision_active(void)
+{
+ return false;
+}