summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--hw/mem/memory-device.c96
-rw-r--r--hw/virtio/vhost.c14
-rw-r--r--include/hw/boards.h4
-rw-r--r--include/hw/mem/memory-device.h32
-rw-r--r--stubs/memory_device.c5
5 files changed, 147 insertions, 4 deletions
diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c
index 1b14ba5661..ae38f48f16 100644
--- a/hw/mem/memory-device.c
+++ b/hw/mem/memory-device.c
@@ -85,13 +85,93 @@ unsigned int memory_devices_get_reserved_memslots(void)
     return get_reserved_memslots(current_machine);
 }
 
+bool memory_devices_memslot_auto_decision_active(void)
+{
+    if (!current_machine->device_memory) {
+        return false;
+    }
+
+    return current_machine->device_memory->memslot_auto_decision_active;
+}
+
+static unsigned int memory_device_memslot_decision_limit(MachineState *ms,
+                                                         MemoryRegion *mr)
+{
+    const unsigned int reserved = get_reserved_memslots(ms);
+    const uint64_t size = memory_region_size(mr);
+    unsigned int max = vhost_get_max_memslots();
+    unsigned int free = vhost_get_free_memslots();
+    uint64_t available_space;
+    unsigned int memslots;
+
+    if (kvm_enabled()) {
+        max = MIN(max, kvm_get_max_memslots());
+        free = MIN(free, kvm_get_free_memslots());
+    }
+
+    /*
+     * If we only have less overall memslots than what we consider reasonable,
+     * just keep it to a minimum.
+     */
+    if (max < MEMORY_DEVICES_SAFE_MAX_MEMSLOTS) {
+        return 1;
+    }
+
+    /*
+     * Consider our soft-limit across all memory devices. We don't really
+     * expect to exceed this limit in reasonable configurations.
+     */
+    if (MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT <=
+        ms->device_memory->required_memslots) {
+        return 1;
+    }
+    memslots = MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT -
+               ms->device_memory->required_memslots;
+
+    /*
+     * Consider the actually still free memslots. This is only relevant if
+     * other memslot consumers would consume *significantly* more memslots than
+     * what we prepared for (> 253). Unlikely, but let's just handle it
+     * cleanly.
+     */
+    memslots = MIN(memslots, free - reserved);
+    if (memslots < 1 || unlikely(free < reserved)) {
+        return 1;
+    }
+
+    /* We cannot have any other memory devices? So give all to this device. */
+    if (size == ms->maxram_size - ms->ram_size) {
+        return memslots;
+    }
+
+    /*
+     * Simple heuristic: equally distribute the memslots over the space
+     * still available for memory devices.
+     */
+    available_space = ms->maxram_size - ms->ram_size -
+                      ms->device_memory->used_region_size;
+    memslots = (double)memslots * size / available_space;
+    return memslots < 1 ? 1 : memslots;
+}
+
 static void memory_device_check_addable(MachineState *ms, MemoryDeviceState *md,
                                         MemoryRegion *mr, Error **errp)
 {
+    const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
     const uint64_t used_region_size = ms->device_memory->used_region_size;
     const uint64_t size = memory_region_size(mr);
-    const unsigned int required_memslots = memory_device_get_memslots(md);
     const unsigned int reserved_memslots = get_reserved_memslots(ms);
+    unsigned int required_memslots, memslot_limit;
+
+    /*
+     * Instruct the device to decide how many memslots to use, if applicable,
+     * before we query the number of required memslots the first time.
+     */
+    if (mdc->decide_memslots) {
+        memslot_limit = memory_device_memslot_decision_limit(ms, mr);
+        mdc->decide_memslots(md, memslot_limit);
+    }
+    required_memslots = memory_device_get_memslots(md);
 
     /* we will need memory slots for kvm and vhost */
     if (kvm_enabled() &&
@@ -300,6 +380,7 @@ out:
 void memory_device_plug(MemoryDeviceState *md, MachineState *ms)
 {
     const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
+    const unsigned int memslots = memory_device_get_memslots(md);
     const uint64_t addr = mdc->get_addr(md);
     MemoryRegion *mr;
 
@@ -311,7 +392,11 @@ void memory_device_plug(MemoryDeviceState *md, MachineState *ms)
     g_assert(ms->device_memory);
 
     ms->device_memory->used_region_size += memory_region_size(mr);
-    ms->device_memory->required_memslots += memory_device_get_memslots(md);
+    ms->device_memory->required_memslots += memslots;
+    if (mdc->decide_memslots && memslots > 1) {
+        ms->device_memory->memslot_auto_decision_active++;
+    }
+
     memory_region_add_subregion(&ms->device_memory->mr,
                                 addr - ms->device_memory->base, mr);
     trace_memory_device_plug(DEVICE(md)->id ? DEVICE(md)->id : "", addr);
@@ -320,6 +405,7 @@ void memory_device_plug(MemoryDeviceState *md, MachineState *ms)
 void memory_device_unplug(MemoryDeviceState *md, MachineState *ms)
 {
     const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
+    const unsigned int memslots = memory_device_get_memslots(md);
     MemoryRegion *mr;
 
     /*
@@ -330,8 +416,12 @@ void memory_device_unplug(MemoryDeviceState *md, MachineState *ms)
     g_assert(ms->device_memory);
 
     memory_region_del_subregion(&ms->device_memory->mr, mr);
+
+    if (mdc->decide_memslots && memslots > 1) {
+        ms->device_memory->memslot_auto_decision_active--;
+    }
     ms->device_memory->used_region_size -= memory_region_size(mr);
-    ms->device_memory->required_memslots -= memory_device_get_memslots(md);
+    ms->device_memory->required_memslots -= memslots;
     trace_memory_device_unplug(DEVICE(md)->id ? DEVICE(md)->id : "",
                                mdc->get_addr(md));
 }
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 2bdb4fb231..a546a585da 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1462,6 +1462,19 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
         goto fail;
     }
 
+    limit = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
+    if (limit < MEMORY_DEVICES_SAFE_MAX_MEMSLOTS &&
+        memory_devices_memslot_auto_decision_active()) {
+        error_setg(errp, "some memory device (like virtio-mem)"
+            " decided how many memory slots to use based on the overall"
+            " number of memory slots; this vhost backend would further"
+            " restricts the overall number of memory slots");
+        error_append_hint(errp, "Try plugging this vhost backend before"
+            " plugging such memory devices.\n");
+        r = -EINVAL;
+        goto fail;
+    }
+
     for (i = 0; i < hdev->nvqs; ++i, ++n_initialized_vqs) {
         r = vhost_virtqueue_init(hdev, hdev->vqs + i, hdev->vq_index + i);
         if (r < 0) {
@@ -1548,7 +1561,6 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
      * but we'll need additional information about the reservations.
      */
     reserved = memory_devices_get_reserved_memslots();
-    limit = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
     if (used + reserved > limit) {
         error_setg(errp, "vhost backend memory slots limit (%d) is less"
                    " than current number of used (%d) and reserved (%d)"
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 01bddb258f..43a56dc51e 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -304,6 +304,9 @@ struct MachineClass {
  * @used_region_size: the part of @mr already used by memory devices
  * @required_memslots: the number of memslots required by memory devices
  * @used_memslots: the number of memslots currently used by memory devices
+ * @memslot_auto_decision_active: whether any plugged memory device
+ *                                automatically decided to use more than
+ *                                one memslot
  */
 typedef struct DeviceMemoryState {
     hwaddr base;
@@ -314,6 +317,7 @@ typedef struct DeviceMemoryState {
     uint64_t used_region_size;
     unsigned int required_memslots;
     unsigned int used_memslots;
+    unsigned int memslot_auto_decision_active;
 } DeviceMemoryState;
 
 /**
diff --git a/include/hw/mem/memory-device.h b/include/hw/mem/memory-device.h
index c7b624da6a..3354d6c166 100644
--- a/include/hw/mem/memory-device.h
+++ b/include/hw/mem/memory-device.h
@@ -14,6 +14,7 @@
 #define MEMORY_DEVICE_H
 
 #include "hw/qdev-core.h"
+#include "qemu/typedefs.h"
 #include "qapi/qapi-types-machine.h"
 #include "qom/object.h"
 
@@ -100,6 +101,15 @@ struct MemoryDeviceClass {
     MemoryRegion *(*get_memory_region)(MemoryDeviceState *md, Error **errp);
 
     /*
+     * Optional: Instruct the memory device to decide how many memory slots
+     * it requires, not exceeding the given limit.
+     *
+     * Called exactly once when pre-plugging the memory device, before
+     * querying the number of memslots using @get_memslots the first time.
+     */
+    void (*decide_memslots)(MemoryDeviceState *md, unsigned int limit);
+
+    /*
      * Optional for memory devices that require only a single memslot,
      * required for all other memory devices: Return the number of memslots
      * (distinct RAM memory regions in the device memory region) that are
@@ -129,9 +139,31 @@ struct MemoryDeviceClass {
                              MemoryDeviceInfo *info);
 };
 
+/*
+ * Traditionally, KVM/vhost in many setups supported 509 memslots, whereby
+ * 253 memslots were "reserved" for boot memory and other devices (such
+ * as PCI BARs, which can get mapped dynamically) and 256 memslots were
+ * dedicated for DIMMs. These magic numbers worked reliably in the past.
+ *
+ * Further, using many memslots can negatively affect performance, so setting
+ * the soft-limit of memslots used by memory devices to the traditional
+ * DIMM limit of 256 sounds reasonable.
+ *
+ * If we have less than 509 memslots, we will instruct memory devices that
+ * support automatically deciding how many memslots to use to only use a single
+ * one.
+ *
+ * Hotplugging vhost devices with at least 509 memslots is not expected to
+ * cause problems, not even when devices automatically decided how many memslots
+ * to use.
+ */
+#define MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT 256
+#define MEMORY_DEVICES_SAFE_MAX_MEMSLOTS 509
+
 MemoryDeviceInfoList *qmp_memory_device_list(void);
 uint64_t get_plugged_memory_size(void);
 unsigned int memory_devices_get_reserved_memslots(void);
+bool memory_devices_memslot_auto_decision_active(void);
 void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms,
                             const uint64_t *legacy_align, Error **errp);
 void memory_device_plug(MemoryDeviceState *md, MachineState *ms);
diff --git a/stubs/memory_device.c b/stubs/memory_device.c
index 318a5d4187..15fd93ff67 100644
--- a/stubs/memory_device.c
+++ b/stubs/memory_device.c
@@ -15,3 +15,8 @@ unsigned int memory_devices_get_reserved_memslots(void)
 {
     return 0;
 }
+
+bool memory_devices_memslot_auto_decision_active(void)
+{
+    return false;
+}