summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS1
-rw-r--r--backends/hostmem-file.c61
-rw-r--r--docs/devel/multi-process.rst5
-rw-r--r--docs/system/index.rst1
-rw-r--r--docs/system/vm-templating.rst125
-rw-r--r--hw/acpi/nvdimm.c11
-rw-r--r--hw/core/machine.c11
-rw-r--r--hw/mem/nvdimm.c10
-rw-r--r--hw/ppc/spapr_nvdimm.c3
-rw-r--r--include/exec/memory.h15
-rw-r--r--include/exec/ram_addr.h8
-rw-r--r--include/hw/mem/nvdimm.h6
-rw-r--r--qapi/qom.json17
-rw-r--r--qemu-options.hx16
-rw-r--r--softmmu/memory.c20
-rw-r--r--softmmu/physmem.c93
16 files changed, 354 insertions, 49 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index d5773b1cbd..355b1960ce 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2961,6 +2961,7 @@ M: Igor Mammedov <imammedo@redhat.com>
 S: Maintained
 F: backends/hostmem*.c
 F: include/sysemu/hostmem.h
+F: docs/system/vm-templating.rst
 T: git https://gitlab.com/ehabkost/qemu.git machine-next
 
 Cryptodev Backends
diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
index b4335a80e6..361d4a8103 100644
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -18,6 +18,8 @@
 #include "sysemu/hostmem.h"
 #include "qom/object_interfaces.h"
 #include "qom/object.h"
+#include "qapi/visitor.h"
+#include "qapi/qapi-visit-common.h"
 
 OBJECT_DECLARE_SIMPLE_TYPE(HostMemoryBackendFile, MEMORY_BACKEND_FILE)
 
@@ -31,6 +33,7 @@ struct HostMemoryBackendFile {
     bool discard_data;
     bool is_pmem;
     bool readonly;
+    OnOffAuto rom;
 };
 
 static void
@@ -53,15 +56,39 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
         return;
     }
 
+    switch (fb->rom) {
+    case ON_OFF_AUTO_AUTO:
+        /* Traditionally, opening the file readonly always resulted in ROM. */
+        fb->rom = fb->readonly ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
+        break;
+    case ON_OFF_AUTO_ON:
+        if (!fb->readonly) {
+            error_setg(errp, "property 'rom' = 'on' is not supported with"
+                       " 'readonly' = 'off'");
+            return;
+        }
+        break;
+    case ON_OFF_AUTO_OFF:
+        if (fb->readonly && backend->share) {
+            error_setg(errp, "property 'rom' = 'off' is incompatible with"
+                       " 'readonly' = 'on' and 'share' = 'on'");
+            return;
+        }
+        break;
+    default:
+        assert(false);
+    }
+
     name = host_memory_backend_get_name(backend);
     ram_flags = backend->share ? RAM_SHARED : 0;
+    ram_flags |= fb->readonly ? RAM_READONLY_FD : 0;
+    ram_flags |= fb->rom == ON_OFF_AUTO_ON ? RAM_READONLY : 0;
     ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
     ram_flags |= fb->is_pmem ? RAM_PMEM : 0;
     ram_flags |= RAM_NAMED_FILE;
     memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name,
                                      backend->size, fb->align, ram_flags,
-                                     fb->mem_path, fb->offset, fb->readonly,
-                                     errp);
+                                     fb->mem_path, fb->offset, errp);
     g_free(name);
 #endif
 }
@@ -201,6 +228,32 @@ static void file_memory_backend_set_readonly(Object *obj, bool value,
     fb->readonly = value;
 }
 
+static void file_memory_backend_get_rom(Object *obj, Visitor *v,
+                                        const char *name, void *opaque,
+                                        Error **errp)
+{
+    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(obj);
+    OnOffAuto rom = fb->rom;
+
+    visit_type_OnOffAuto(v, name, &rom, errp);
+}
+
+static void file_memory_backend_set_rom(Object *obj, Visitor *v,
+                                        const char *name, void *opaque,
+                                        Error **errp)
+{
+    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(obj);
+
+    if (host_memory_backend_mr_inited(backend)) {
+        error_setg(errp, "cannot change property '%s' of %s.", name,
+                   object_get_typename(obj));
+        return;
+    }
+
+    visit_type_OnOffAuto(v, name, &fb->rom, errp);
+}
+
 static void file_backend_unparent(Object *obj)
 {
     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
@@ -243,6 +296,10 @@ file_backend_class_init(ObjectClass *oc, void *data)
     object_class_property_add_bool(oc, "readonly",
         file_memory_backend_get_readonly,
         file_memory_backend_set_readonly);
+    object_class_property_add(oc, "rom", "OnOffAuto",
+        file_memory_backend_get_rom, file_memory_backend_set_rom, NULL, NULL);
+    object_class_property_set_description(oc, "rom",
+        "Whether to create Read Only Memory (ROM)");
 }
 
 static void file_backend_instance_finalize(Object *o)
diff --git a/docs/devel/multi-process.rst b/docs/devel/multi-process.rst
index e4801751f2..4ef539c0b0 100644
--- a/docs/devel/multi-process.rst
+++ b/docs/devel/multi-process.rst
@@ -409,8 +409,9 @@ the initial messages sent to the emulation process is a guest memory
 table. Each entry in this table consists of a file descriptor and size
 that the emulation process can ``mmap()`` to directly access guest
 memory, similar to ``vhost_user_set_mem_table()``. Note guest memory
-must be backed by file descriptors, such as when QEMU is given the
-*-mem-path* command line option.
+must be backed by shared file-backed memory, for example, using
+*-object memory-backend-file,share=on* and setting that memory backend
+as RAM for the machine.
 
 IOMMU operations
 ^^^^^^^^^^^^^^^^
diff --git a/docs/system/index.rst b/docs/system/index.rst
index 45bf1f19e7..c21065e519 100644
--- a/docs/system/index.rst
+++ b/docs/system/index.rst
@@ -38,3 +38,4 @@ or Hypervisor.Framework.
    security
    multi-process
    confidential-guest-support
+   vm-templating
diff --git a/docs/system/vm-templating.rst b/docs/system/vm-templating.rst
new file mode 100644
index 0000000000..28905a1eeb
--- /dev/null
+++ b/docs/system/vm-templating.rst
@@ -0,0 +1,125 @@
+QEMU VM templating
+==================
+
+This document explains how to use VM templating in QEMU.
+
+For now, the focus is on VM memory aspects, and not about how to save and
+restore other VM state (i.e., migrate-to-file with ``x-ignore-shared``).
+
+Overview
+--------
+
+With VM templating, a single template VM serves as the starting point for
+new VMs. This allows for fast and efficient replication of VMs, resulting
+in fast startup times and reduced memory consumption.
+
+Conceptually, the VM state is frozen, to then be used as a basis for new
+VMs. The Copy-On-Write mechanism in the operating systems makes sure that
+new VMs are able to read template VM memory; however, any modifications
+stay private and don't modify the original template VM or any other
+created VM.
+
+!!! Security Alert !!!
+----------------------
+
+When effectively cloning VMs by VM templating, hardware identifiers
+(such as UUIDs and NIC MAC addresses), and similar data in the guest OS
+(such as machine IDs, SSH keys, certificates) that are supposed to be
+*unique* are no longer unique, which can be a security concern.
+
+Please be aware of these implications and how to mitigate them for your
+use case, which might involve vmgenid, hot(un)plug of NIC, etc..
+
+Memory configuration
+--------------------
+
+In order to create the template VM, we have to make sure that VM memory
+ends up in a file, from where it can be reused for the new VMs:
+
+Supply VM RAM via memory-backend-file, with ``share=on`` (modifications go
+to the file) and ``readonly=off`` (open the file writable). Note that
+``readonly=off`` is implicit.
+
+In the following command-line example, a 2GB VM is created, whereby VM RAM
+is to be stored in the ``template`` file.
+
+.. parsed-literal::
+
+    |qemu_system| [...] -m 2g \\
+        -object memory-backend-file,id=pc.ram,mem-path=template,size=2g,share=on,... \\
+        -machine q35,memory-backend=pc.ram
+
+If multiple memory backends are used (vNUMA, DIMMs), configure all
+memory backends accordingly.
+
+Once the VM is in the desired state, stop the VM and save other VM state,
+leaving the current state of VM RAM reside in the file.
+
+In order to have a new VM be based on a template VM, we have to
+configure VM RAM to be based on a template VM RAM file; however, the VM
+should not be able to modify file content.
+
+Supply VM RAM via memory-backend-file, with ``share=off`` (modifications
+stay private), ``readonly=on`` (open the file readonly) and ``rom=off``
+(don't make the memory readonly for the VM). Note that ``share=off`` is
+implicit and that other VM state has to be restored separately.
+
+In the following command-line example, a 2GB VM is created based on the
+existing 2GB file ``template``.
+
+.. parsed-literal::
+
+    |qemu_system| [...] -m 2g \\
+        -object memory-backend-file,id=pc.ram,mem-path=template,size=2g,readonly=on,rom=off,... \\
+        -machine q35,memory-backend=pc.ram
+
+If multiple memory backends are used (vNUMA, DIMMs), configure all
+memory backends accordingly.
+
+Note that ``-mem-path`` cannot be used for VM templating when creating the
+template VM or when starting new VMs based on a template VM.
+
+Incompatible features
+---------------------
+
+Some features are incompatible with VM templating, as the underlying file
+cannot be modified to discard VM RAM, or to actually share memory with
+another process.
+
+vhost-user and multi-process QEMU
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+vhost-user and multi-process QEMU are incompatible with VM templating.
+These technologies rely on shared memory, however, the template VMs
+don't actually share memory (``share=off``), even though they are
+file-based.
+
+virtio-balloon
+~~~~~~~~~~~~~~
+
+virtio-balloon inflation and "free page reporting" cannot discard VM RAM
+and will repeatedly report errors. While virtio-balloon can be used
+for template VMs (e.g., report VM RAM stats), "free page reporting"
+should be disabled and the balloon should not be inflated.
+
+virtio-mem
+~~~~~~~~~~
+
+virtio-mem cannot discard VM RAM that is managed by the virtio-mem
+device. virtio-mem will fail early when realizing the device. To use
+VM templating with virtio-mem, either hotplug virtio-mem devices to the
+new VM, or don't supply any memory to the template VM using virtio-mem
+(requested-size=0), not using a template VM file as memory backend for the
+virtio-mem device.
+
+VM migration
+~~~~~~~~~~~~
+
+For VM migration, "x-release-ram" similarly relies on discarding of VM
+RAM on the migration source to free up migrated RAM, and will
+repeatedly report errors.
+
+Postcopy live migration fails discarding VM RAM on the migration
+destination early and refuses to activate postcopy live migration. Note
+that postcopy live migration usually only works on selected filesystems
+(shmem/tmpfs, hugetlbfs) either way.
diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index a3b25a92f3..3cbd41629d 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -670,7 +670,8 @@ static void nvdimm_dsm_label_size(NVDIMMDevice *nvdimm, hwaddr dsm_mem_addr)
 }
 
 static uint32_t nvdimm_rw_label_data_check(NVDIMMDevice *nvdimm,
-                                           uint32_t offset, uint32_t length)
+                                           uint32_t offset, uint32_t length,
+                                           bool is_write)
 {
     uint32_t ret = NVDIMM_DSM_RET_STATUS_INVALID;
 
@@ -690,6 +691,10 @@ static uint32_t nvdimm_rw_label_data_check(NVDIMMDevice *nvdimm,
         return ret;
     }
 
+    if (is_write && nvdimm->readonly) {
+        return NVDIMM_DSM_RET_STATUS_UNSUPPORT;
+    }
+
     return NVDIMM_DSM_RET_STATUS_SUCCESS;
 }
 
@@ -713,7 +718,7 @@ static void nvdimm_dsm_get_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in,
                                  get_label_data->length);
 
     status = nvdimm_rw_label_data_check(nvdimm, get_label_data->offset,
-                                        get_label_data->length);
+                                        get_label_data->length, false);
     if (status != NVDIMM_DSM_RET_STATUS_SUCCESS) {
         nvdimm_dsm_no_payload(status, dsm_mem_addr);
         return;
@@ -752,7 +757,7 @@ static void nvdimm_dsm_set_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in,
                                   set_label_data->length);
 
     status = nvdimm_rw_label_data_check(nvdimm, set_label_data->offset,
-                                        set_label_data->length);
+                                        set_label_data->length, true);
     if (status != NVDIMM_DSM_RET_STATUS_SUCCESS) {
         nvdimm_dsm_no_payload(status, dsm_mem_addr);
         return;
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 230aab819c..59208e611e 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -1359,6 +1359,7 @@ out:
 
 void machine_run_board_init(MachineState *machine, const char *mem_path, Error **errp)
 {
+    ERRP_GUARD();
     MachineClass *machine_class = MACHINE_GET_CLASS(machine);
     ObjectClass *oc = object_class_by_name(machine->cpu_type);
     CPUClass *cc;
@@ -1387,9 +1388,13 @@ void machine_run_board_init(MachineState *machine, const char *mem_path, Error *
                numa_uses_legacy_mem()) {
         if (object_property_find(object_get_objects_root(),
                                  machine_class->default_ram_id)) {
-            error_setg(errp, "object name '%s' is reserved for the default"
-                " RAM backend, it can't be used for any other purposes."
-                " Change the object's 'id' to something else",
+            error_setg(errp, "object's id '%s' is reserved for the default"
+                " RAM backend, it can't be used for any other purposes",
+                machine_class->default_ram_id);
+            error_append_hint(errp,
+                "Change the object's 'id' to something else or disable"
+                " automatic creation of the default RAM backend by setting"
+                " 'memory-backend=%s' with '-machine'.\n",
                 machine_class->default_ram_id);
             return;
         }
diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
index 31080c22c9..1631a7d13f 100644
--- a/hw/mem/nvdimm.c
+++ b/hw/mem/nvdimm.c
@@ -154,6 +154,9 @@ static void nvdimm_prepare_memory_region(NVDIMMDevice *nvdimm, Error **errp)
                    object_get_canonical_path_component(OBJECT(hostmem)));
         return;
     }
+    if (memory_region_is_rom(mr)) {
+        nvdimm->readonly = true;
+    }
 
     nvdimm->nvdimm_mr = g_new(MemoryRegion, 1);
     memory_region_init_alias(nvdimm->nvdimm_mr, OBJECT(dimm),
@@ -207,15 +210,16 @@ static void nvdimm_unrealize(PCDIMMDevice *dimm)
  * label read/write functions.
  */
 static void nvdimm_validate_rw_label_data(NVDIMMDevice *nvdimm, uint64_t size,
-                                        uint64_t offset)
+                                        uint64_t offset, bool is_write)
 {
     assert((nvdimm->label_size >= size + offset) && (offset + size > offset));
+    assert(!is_write || !nvdimm->readonly);
 }
 
 static void nvdimm_read_label_data(NVDIMMDevice *nvdimm, void *buf,
                                    uint64_t size, uint64_t offset)
 {
-    nvdimm_validate_rw_label_data(nvdimm, size, offset);
+    nvdimm_validate_rw_label_data(nvdimm, size, offset, false);
 
     memcpy(buf, nvdimm->label_data + offset, size);
 }
@@ -229,7 +233,7 @@ static void nvdimm_write_label_data(NVDIMMDevice *nvdimm, const void *buf,
                                             "pmem", NULL);
     uint64_t backend_offset;
 
-    nvdimm_validate_rw_label_data(nvdimm, size, offset);
+    nvdimm_validate_rw_label_data(nvdimm, size, offset, true);
 
     if (!is_pmem) {
         memcpy(nvdimm->label_data + offset, buf, size);
diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c
index a8688243a6..60d6d0acc0 100644
--- a/hw/ppc/spapr_nvdimm.c
+++ b/hw/ppc/spapr_nvdimm.c
@@ -320,7 +320,8 @@ static target_ulong h_scm_write_metadata(PowerPCCPU *cpu,
 
     nvdimm = NVDIMM(drc->dev);
     if ((offset + len < offset) ||
-        (nvdimm->label_size < len + offset)) {
+        (nvdimm->label_size < len + offset) ||
+        nvdimm->readonly) {
         return H_P2;
     }
 
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 68284428f8..ef23d65afc 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -235,6 +235,12 @@ typedef struct IOMMUTLBEvent {
 /* RAM is an mmap-ed named file */
 #define RAM_NAMED_FILE (1 << 9)
 
+/* RAM is mmap-ed read-only */
+#define RAM_READONLY (1 << 10)
+
+/* RAM FD is opened read-only */
+#define RAM_READONLY_FD (1 << 11)
+
 static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
                                        IOMMUNotifierFlag flags,
                                        hwaddr start, hwaddr end,
@@ -1089,6 +1095,7 @@ struct AddressSpace {
     struct FlatView *current_map;
 
     int ioeventfd_nb;
+    int ioeventfd_notifiers;
     struct MemoryRegionIoeventfd *ioeventfds;
     QTAILQ_HEAD(, MemoryListener) listeners;
     QTAILQ_ENTRY(AddressSpace) address_spaces_link;
@@ -1331,10 +1338,10 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr,
  * @align: alignment of the region base address; if 0, the default alignment
  *         (getpagesize()) will be used.
  * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
- *             RAM_NORESERVE,
+ *             RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
+ *             RAM_READONLY_FD
  * @path: the path in which to allocate the RAM.
  * @offset: offset within the file referenced by path
- * @readonly: true to open @path for reading, false for read/write.
  * @errp: pointer to Error*, to store an error if it happens.
  *
  * Note that this function does not do anything to cause the data in the
@@ -1348,7 +1355,6 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
                                       uint32_t ram_flags,
                                       const char *path,
                                       ram_addr_t offset,
-                                      bool readonly,
                                       Error **errp);
 
 /**
@@ -1360,7 +1366,8 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
  * @name: the name of the region.
  * @size: size of the region.
  * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
- *             RAM_NORESERVE, RAM_PROTECTED.
+ *             RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
+ *             RAM_READONLY_FD
  * @fd: the fd to mmap.
  * @offset: offset within the file referenced by fd
  * @errp: pointer to Error*, to store an error if it happens.
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 9f2e3893f5..90676093f5 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -108,10 +108,10 @@ long qemu_maxrampagesize(void);
  *  @size: the size in bytes of the ram block
  *  @mr: the memory region where the ram block is
  *  @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
- *              RAM_NORESERVE.
+ *              RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
+ *              RAM_READONLY_FD
  *  @mem_path or @fd: specify the backing file or device
  *  @offset: Offset into target file
- *  @readonly: true to open @path for reading, false for read/write.
  *  @errp: pointer to Error*, to store an error if it happens
  *
  * Return:
@@ -120,10 +120,10 @@ long qemu_maxrampagesize(void);
  */
 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
                                    uint32_t ram_flags, const char *mem_path,
-                                   off_t offset, bool readonly, Error **errp);
+                                   off_t offset, Error **errp);
 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
                                  uint32_t ram_flags, int fd, off_t offset,
-                                 bool readonly, Error **errp);
+                                 Error **errp);
 
 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                                   MemoryRegion *mr, Error **errp);
diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h
index acf887c83d..d3b763453a 100644
--- a/include/hw/mem/nvdimm.h
+++ b/include/hw/mem/nvdimm.h
@@ -78,6 +78,12 @@ struct NVDIMMDevice {
     bool unarmed;
 
     /*
+     * Whether our DIMM is backed by ROM, and even label data cannot be
+     * written. If set, implies that "unarmed" is also set.
+     */
+    bool readonly;
+
+    /*
      * The PPC64 - spapr requires each nvdimm device have a uuid.
      */
     QemuUUID uuid;
diff --git a/qapi/qom.json b/qapi/qom.json
index fa3e88c8e6..c53ef978ff 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -668,6 +668,20 @@
 # @readonly: if true, the backing file is opened read-only; if false,
 #     it is opened read-write.  (default: false)
 #
+# @rom: whether to create Read Only Memory (ROM) that cannot be modified
+#       by the VM.  Any write attempts to such ROM will be denied.  Most
+#       use cases want writable RAM instead of ROM.  However, selected use
+#       cases, like R/O NVDIMMs, can benefit from ROM.  If set to 'on',
+#       create ROM; if set to 'off', create writable RAM;  if set to
+#       'auto', the value of the @readonly property is used.  This
+#       property is primarily helpful when we want to have proper RAM in
+#       configurations that would traditionally create ROM before this
+#       property was introduced: VM templating, where we want to open a
+#       file readonly (@readonly set to true) and mark the memory to be
+#       private for QEMU (@share set to false).  For this use case, we need
+#       writable RAM instead of ROM, and want to set this property to 'off'.
+#       (default: auto, since 8.2)
+#
 # Since: 2.1
 ##
 { 'struct': 'MemoryBackendFileProperties',
@@ -677,7 +691,8 @@
             '*discard-data': 'bool',
             'mem-path': 'str',
             '*pmem': { 'type': 'bool', 'if': 'CONFIG_LIBPMEM' },
-            '*readonly': 'bool' } }
+            '*readonly': 'bool',
+            '*rom': 'OnOffAuto' } }
 
 ##
 # @MemoryBackendMemfdProperties:
diff --git a/qemu-options.hx b/qemu-options.hx
index 2bcf7e4e97..bcd77255cb 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -5063,7 +5063,7 @@ SRST
     they are specified. Note that the 'id' property must be set. These
     objects are placed in the '/objects' path.
 
-    ``-object memory-backend-file,id=id,size=size,mem-path=dir,share=on|off,discard-data=on|off,merge=on|off,dump=on|off,prealloc=on|off,host-nodes=host-nodes,policy=default|preferred|bind|interleave,align=align,offset=offset,readonly=on|off``
+    ``-object memory-backend-file,id=id,size=size,mem-path=dir,share=on|off,discard-data=on|off,merge=on|off,dump=on|off,prealloc=on|off,host-nodes=host-nodes,policy=default|preferred|bind|interleave,align=align,offset=offset,readonly=on|off,rom=on|off|auto``
         Creates a memory file backend object, which can be used to back
         the guest RAM with huge pages.
 
@@ -5153,6 +5153,20 @@ SRST
         The ``readonly`` option specifies whether the backing file is opened
         read-only or read-write (default).
 
+        The ``rom`` option specifies whether to create Read Only Memory
+        (ROM) that cannot be modified by the VM. Any write attempts to such
+        ROM will be denied. Most use cases want proper RAM instead of ROM.
+        However, selected use cases, like R/O NVDIMMs, can benefit from
+        ROM. If set to ``on``, create ROM; if set to ``off``, create
+        writable RAM; if set to ``auto`` (default), the value of the
+        ``readonly`` option is used. This option is primarily helpful when
+        we want to have writable RAM in configurations that would
+        traditionally create ROM before the ``rom`` option was introduced:
+        VM templating, where we want to open a file readonly
+        (``readonly=on``) and mark the memory to be private for QEMU
+        (``share=off``). For this use case, we need writable RAM instead
+        of ROM, and want to also set ``rom=off``.
+
     ``-object memory-backend-ram,id=id,merge=on|off,dump=on|off,share=on|off,prealloc=on|off,size=size,host-nodes=host-nodes,policy=default|preferred|bind|interleave``
         Creates a memory backend object, which can be used to back the
         guest RAM. Memory backend objects offer more control than the
diff --git a/softmmu/memory.c b/softmmu/memory.c
index 7d9494ce70..c0383a163d 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -842,6 +842,10 @@ static void address_space_update_ioeventfds(AddressSpace *as)
     AddrRange tmp;
     unsigned i;
 
+    if (!as->ioeventfd_notifiers) {
+        return;
+    }
+
     /*
      * It is likely that the number of ioeventfds hasn't changed much, so use
      * the previous size as the starting value, with some headroom to avoid
@@ -1620,18 +1624,17 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
                                       uint32_t ram_flags,
                                       const char *path,
                                       ram_addr_t offset,
-                                      bool readonly,
                                       Error **errp)
 {
     Error *err = NULL;
     memory_region_init(mr, owner, name, size);
     mr->ram = true;
-    mr->readonly = readonly;
+    mr->readonly = !!(ram_flags & RAM_READONLY);
     mr->terminates = true;
     mr->destructor = memory_region_destructor_ram;
     mr->align = align;
     mr->ram_block = qemu_ram_alloc_from_file(size, mr, ram_flags, path,
-                                             offset, readonly, &err);
+                                             offset, &err);
     if (err) {
         mr->size = int128_zero();
         object_unparent(OBJECT(mr));
@@ -1651,10 +1654,11 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr,
     Error *err = NULL;
     memory_region_init(mr, owner, name, size);
     mr->ram = true;
+    mr->readonly = !!(ram_flags & RAM_READONLY);
     mr->terminates = true;
     mr->destructor = memory_region_destructor_ram;
     mr->ram_block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset,
-                                           false, &err);
+                                           &err);
     if (err) {
         mr->size = int128_zero();
         object_unparent(OBJECT(mr));
@@ -3075,6 +3079,10 @@ void memory_listener_register(MemoryListener *listener, AddressSpace *as)
     }
 
     listener_add_address_space(listener, as);
+
+    if (listener->eventfd_add || listener->eventfd_del) {
+        as->ioeventfd_notifiers++;
+    }
 }
 
 void memory_listener_unregister(MemoryListener *listener)
@@ -3083,6 +3091,10 @@ void memory_listener_unregister(MemoryListener *listener)
         return;
     }
 
+    if (listener->eventfd_add || listener->eventfd_del) {
+        listener->address_space->ioeventfd_notifiers--;
+    }
+
     listener_del_address_space(listener, listener->address_space);
     QTAILQ_REMOVE(&memory_listeners, listener, link);
     QTAILQ_REMOVE(&listener->address_space->listeners, listener, link_as);
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index 18277ddd67..4f6ca653b3 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -1288,8 +1288,7 @@ static int64_t get_file_align(int fd)
 static int file_ram_open(const char *path,
                          const char *region_name,
                          bool readonly,
-                         bool *created,
-                         Error **errp)
+                         bool *created)
 {
     char *filename;
     char *sanitized_name;
@@ -1300,10 +1299,33 @@ static int file_ram_open(const char *path,
     for (;;) {
         fd = open(path, readonly ? O_RDONLY : O_RDWR);
         if (fd >= 0) {
+            /*
+             * open(O_RDONLY) won't fail with EISDIR. Check manually if we
+             * opened a directory and fail similarly to how we fail ENOENT
+             * in readonly mode. Note that mkstemp() would imply O_RDWR.
+             */
+            if (readonly) {
+                struct stat file_stat;
+
+                if (fstat(fd, &file_stat)) {
+                    close(fd);
+                    if (errno == EINTR) {
+                        continue;
+                    }
+                    return -errno;
+                } else if (S_ISDIR(file_stat.st_mode)) {
+                    close(fd);
+                    return -EISDIR;
+                }
+            }
             /* @path names an existing file, use it */
             break;
         }
         if (errno == ENOENT) {
+            if (readonly) {
+                /* Refuse to create new, readonly files. */
+                return -ENOENT;
+            }
             /* @path names a file that doesn't exist, create it */
             fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
             if (fd >= 0) {
@@ -1333,10 +1355,7 @@ static int file_ram_open(const char *path,
             g_free(filename);
         }
         if (errno != EEXIST && errno != EINTR) {
-            error_setg_errno(errp, errno,
-                             "can't open backing store %s for guest RAM",
-                             path);
-            return -1;
+            return -errno;
         }
         /*
          * Try again on EINTR and EEXIST.  The latter happens when
@@ -1350,7 +1369,6 @@ static int file_ram_open(const char *path,
 static void *file_ram_alloc(RAMBlock *block,
                             ram_addr_t memory,
                             int fd,
-                            bool readonly,
                             bool truncate,
                             off_t offset,
                             Error **errp)
@@ -1408,7 +1426,7 @@ static void *file_ram_alloc(RAMBlock *block,
         perror("ftruncate");
     }
 
-    qemu_map_flags = readonly ? QEMU_MAP_READONLY : 0;
+    qemu_map_flags = (block->flags & RAM_READONLY) ? QEMU_MAP_READONLY : 0;
     qemu_map_flags |= (block->flags & RAM_SHARED) ? QEMU_MAP_SHARED : 0;
     qemu_map_flags |= (block->flags & RAM_PMEM) ? QEMU_MAP_SYNC : 0;
     qemu_map_flags |= (block->flags & RAM_NORESERVE) ? QEMU_MAP_NORESERVE : 0;
@@ -1876,7 +1894,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
 #ifdef CONFIG_POSIX
 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
                                  uint32_t ram_flags, int fd, off_t offset,
-                                 bool readonly, Error **errp)
+                                 Error **errp)
 {
     RAMBlock *new_block;
     Error *local_err = NULL;
@@ -1884,7 +1902,8 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
 
     /* Just support these ram flags by now. */
     assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE |
-                          RAM_PROTECTED | RAM_NAMED_FILE)) == 0);
+                          RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY |
+                          RAM_READONLY_FD)) == 0);
 
     if (xen_enabled()) {
         error_setg(errp, "-mem-path not supported with Xen");
@@ -1919,8 +1938,8 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
     new_block->used_length = size;
     new_block->max_length = size;
     new_block->flags = ram_flags;
-    new_block->host = file_ram_alloc(new_block, size, fd, readonly,
-                                     !file_size, offset, errp);
+    new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset,
+                                     errp);
     if (!new_block->host) {
         g_free(new_block);
         return NULL;
@@ -1939,20 +1958,40 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
 
 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
                                    uint32_t ram_flags, const char *mem_path,
-                                   off_t offset, bool readonly, Error **errp)
+                                   off_t offset, Error **errp)
 {
     int fd;
     bool created;
     RAMBlock *block;
 
-    fd = file_ram_open(mem_path, memory_region_name(mr), readonly, &created,
-                       errp);
+    fd = file_ram_open(mem_path, memory_region_name(mr),
+                       !!(ram_flags & RAM_READONLY_FD), &created);
     if (fd < 0) {
+        error_setg_errno(errp, -fd, "can't open backing store %s for guest RAM",
+                         mem_path);
+        if (!(ram_flags & RAM_READONLY_FD) && !(ram_flags & RAM_SHARED) &&
+            fd == -EACCES) {
+            /*
+             * If we can open the file R/O (note: will never create a new file)
+             * and we are dealing with a private mapping, there are still ways
+             * to consume such files and get RAM instead of ROM.
+             */
+            fd = file_ram_open(mem_path, memory_region_name(mr), true,
+                               &created);
+            if (fd < 0) {
+                return NULL;
+            }
+            assert(!created);
+            close(fd);
+            error_append_hint(errp, "Consider opening the backing store"
+                " read-only but still creating writable RAM using"
+                " '-object memory-backend-file,readonly=on,rom=off...'"
+                " (see \"VM templating\" documentation)\n");
+        }
         return NULL;
     }
 
-    block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset, readonly,
-                                   errp);
+    block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, offset, errp);
     if (!block) {
         if (created) {
             unlink(mem_path);
@@ -2070,6 +2109,7 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
     ram_addr_t offset;
     int flags;
     void *area, *vaddr;
+    int prot;
 
     RAMBLOCK_FOREACH(block) {
         offset = addr - block->offset;
@@ -2084,13 +2124,14 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
                 flags |= block->flags & RAM_SHARED ?
                          MAP_SHARED : MAP_PRIVATE;
                 flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
+                prot = PROT_READ;
+                prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
                 if (block->fd >= 0) {
-                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
-                                flags, block->fd, offset + block->fd_offset);
+                    area = mmap(vaddr, length, prot, flags, block->fd,
+                                offset + block->fd_offset);
                 } else {
                     flags |= MAP_ANONYMOUS;
-                    area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
-                                flags, -1, 0);
+                    area = mmap(vaddr, length, prot, flags, -1, 0);
                 }
                 if (area != vaddr) {
                     error_report("Could not remap addr: "
@@ -3481,6 +3522,16 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
              */
 #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
             /*
+             * fallocate() will fail with readonly files. Let's print a
+             * proper error message.
+             */
+            if (rb->flags & RAM_READONLY_FD) {
+                error_report("ram_block_discard_range: Discarding RAM"
+                             " with readonly files is not supported");
+                goto err;
+
+            }
+            /*
              * We'll discard data from the actual file, even though we only
              * have a MAP_PRIVATE mapping, possibly messing with other
              * MAP_PRIVATE/MAP_SHARED mappings. There is no easy way to