diff options
| -rw-r--r-- | backends/hostmem-file.c | 44 | ||||
| -rwxr-xr-x | configure | 29 | ||||
| -rw-r--r-- | docs/nvdimm.txt | 22 | ||||
| -rw-r--r-- | exec.c | 38 | ||||
| -rw-r--r-- | hw/mem/nvdimm.c | 9 | ||||
| -rw-r--r-- | hw/sparc/sun4m.c | 12 | ||||
| -rw-r--r-- | hw/sparc64/sun4u.c | 4 | ||||
| -rw-r--r-- | include/exec/cpu_ldst.h | 23 | ||||
| -rw-r--r-- | include/exec/cpu_ldst_useronly_template.h | 12 | ||||
| -rw-r--r-- | include/exec/memory.h | 31 | ||||
| -rw-r--r-- | include/exec/ram_addr.h | 28 | ||||
| -rw-r--r-- | include/qemu/pmem.h | 36 | ||||
| -rw-r--r-- | linux-user/syscall.c | 134 | ||||
| -rw-r--r-- | memory.c | 8 | ||||
| -rw-r--r-- | migration/ram.c | 17 | ||||
| -rw-r--r-- | numa.c | 2 | ||||
| -rw-r--r-- | qemu-options.hx | 7 | ||||
| -rwxr-xr-x | scripts/qemu-binfmt-conf.sh | 6 | ||||
| -rw-r--r-- | target/sh4/translate.c | 1 |
19 files changed, 398 insertions, 65 deletions
diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c index 134b08d63a..2476dcb435 100644 --- a/backends/hostmem-file.c +++ b/backends/hostmem-file.c @@ -12,6 +12,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu-common.h" +#include "qemu/error-report.h" #include "sysemu/hostmem.h" #include "sysemu/sysemu.h" #include "qom/object_interfaces.h" @@ -31,9 +32,10 @@ typedef struct HostMemoryBackendFile HostMemoryBackendFile; struct HostMemoryBackendFile { HostMemoryBackend parent_obj; - bool discard_data; char *mem_path; uint64_t align; + bool discard_data; + bool is_pmem; }; static void @@ -58,7 +60,9 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) path = object_get_canonical_path(OBJECT(backend)); memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), path, - backend->size, fb->align, backend->share, + backend->size, fb->align, + (backend->share ? RAM_SHARED : 0) | + (fb->is_pmem ? RAM_PMEM : 0), fb->mem_path, errp); g_free(path); } @@ -130,6 +134,39 @@ static void file_memory_backend_set_align(Object *o, Visitor *v, error_propagate(errp, local_err); } +static bool file_memory_backend_get_pmem(Object *o, Error **errp) +{ + return MEMORY_BACKEND_FILE(o)->is_pmem; +} + +static void file_memory_backend_set_pmem(Object *o, bool value, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(o); + HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o); + + if (host_memory_backend_mr_inited(backend)) { + error_setg(errp, "cannot change property 'pmem' of %s '%s'", + object_get_typename(o), + object_get_canonical_path_component(o)); + return; + } + +#ifndef CONFIG_LIBPMEM + if (value) { + Error *local_err = NULL; + error_setg(&local_err, + "Lack of libpmem support while setting the 'pmem=on'" + " of %s '%s'. We can't ensure data persistence.", + object_get_typename(o), + object_get_canonical_path_component(o)); + error_propagate(errp, local_err); + return; + } +#endif + + fb->is_pmem = value; +} + static void file_backend_unparent(Object *obj) { HostMemoryBackend *backend = MEMORY_BACKEND(obj); @@ -161,6 +198,9 @@ file_backend_class_init(ObjectClass *oc, void *data) file_memory_backend_get_align, file_memory_backend_set_align, NULL, NULL, &error_abort); + object_class_property_add_bool(oc, "pmem", + file_memory_backend_get_pmem, file_memory_backend_set_pmem, + &error_abort); } static void file_backend_instance_finalize(Object *o) diff --git a/configure b/configure index 7d9a63636c..e7bddc04b0 100755 --- a/configure +++ b/configure @@ -476,6 +476,7 @@ vxhs="" libxml2="" docker="no" debug_mutex="no" +libpmem="" # cross compilers defaults, can be overridden with --cross-cc-ARCH cross_cc_aarch64="aarch64-linux-gnu-gcc" @@ -1440,6 +1441,10 @@ for opt do ;; --disable-debug-mutex) debug_mutex=no ;; + --enable-libpmem) libpmem=yes + ;; + --disable-libpmem) libpmem=no + ;; *) echo "ERROR: unknown option $opt" echo "Try '$0 --help' for more information" @@ -1716,6 +1721,7 @@ disabled with --disable-FEATURE, default is enabled if available: vhost-user vhost-user support capstone capstone disassembler support debug-mutex mutex debugging support + libpmem libpmem support NOTE: The object files are built at the place where configure is launched EOF @@ -5594,6 +5600,24 @@ if has "docker"; then fi ########################################## +# check for libpmem + +if test "$libpmem" != "no"; then + if $pkg_config --exists "libpmem"; then + libpmem="yes" + libpmem_libs=$($pkg_config --libs libpmem) + libpmem_cflags=$($pkg_config --cflags libpmem) + libs_softmmu="$libs_softmmu $libpmem_libs" + QEMU_CFLAGS="$QEMU_CFLAGS $libpmem_cflags" + else + if test "$libpmem" = "yes" ; then + feature_not_found "libpmem" "Install nvml or pmdk" + fi + libpmem="no" + fi +fi + +########################################## # End of CC checks # After here, no more $cc or $ld runs @@ -6059,6 +6083,7 @@ echo "replication support $replication" echo "VxHS block device $vxhs" echo "capstone $capstone" echo "docker $docker" +echo "libpmem support $libpmem" if test "$sdl_too_old" = "yes"; then echo "-> Your SDL version is too old - please upgrade to have SDL support" @@ -6816,6 +6841,10 @@ if test "$vxhs" = "yes" ; then echo "VXHS_LIBS=$vxhs_libs" >> $config_host_mak fi +if test "$libpmem" = "yes" ; then + echo "CONFIG_LIBPMEM=y" >> $config_host_mak +fi + if test "$tcg_interpreter" = "yes"; then QEMU_INCLUDES="-iquote \$(SRC_PATH)/tcg/tci $QEMU_INCLUDES" elif test "$ARCH" = "sparc64" ; then diff --git a/docs/nvdimm.txt b/docs/nvdimm.txt index 24b443b655..5f158a6170 100644 --- a/docs/nvdimm.txt +++ b/docs/nvdimm.txt @@ -173,3 +173,25 @@ There are currently two valid values for this option: the NVDIMMs in the event of power loss. This implies that the platform also supports flushing dirty data through the memory controller on power loss. + +If the vNVDIMM backend is in host persistent memory that can be accessed in +SNIA NVM Programming Model [1] (e.g., Intel NVDIMM), it's suggested to set +the 'pmem' option of memory-backend-file to 'on'. When 'pmem' is 'on' and QEMU +is built with libpmem [2] support (configured with --enable-libpmem), QEMU +will take necessary operations to guarantee the persistence of its own writes +to the vNVDIMM backend(e.g., in vNVDIMM label emulation and live migration). +If 'pmem' is 'on' while there is no libpmem support, qemu will exit and report +a "lack of libpmem support" message to ensure the persistence is available. +For example, if we want to ensure the persistence for some backend file, +use the QEMU command line: + + -object memory-backend-file,id=nv_mem,mem-path=/XXX/yyy,size=4G,pmem=on + +References +---------- + +[1] NVM Programming Model (NPM) + Version 1.2 + https://www.snia.org/sites/default/files/technical_work/final/NVMProgrammingModel_v1.2.pdf +[2] Persistent Memory Development Kit (PMDK), formerly known as NVML project, home page: + http://pmem.io/pmdk/ diff --git a/exec.c b/exec.c index e7be0761c2..6826c8337d 100644 --- a/exec.c +++ b/exec.c @@ -87,26 +87,6 @@ AddressSpace address_space_memory; MemoryRegion io_mem_rom, io_mem_notdirty; static MemoryRegion io_mem_unassigned; - -/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */ -#define RAM_PREALLOC (1 << 0) - -/* RAM is mmap-ed with MAP_SHARED */ -#define RAM_SHARED (1 << 1) - -/* Only a portion of RAM (used_length) is actually used, and migrated. - * This used_length size can change across reboots. - */ -#define RAM_RESIZEABLE (1 << 2) - -/* UFFDIO_ZEROPAGE is available on this RAMBlock to atomically - * zero the page and wake waiting processes. - * (Set during postcopy) - */ -#define RAM_UF_ZEROPAGE (1 << 3) - -/* RAM can be migrated */ -#define RAM_MIGRATABLE (1 << 4) #endif #ifdef TARGET_PAGE_BITS_VARY @@ -2252,13 +2232,16 @@ static void ram_block_add(RAMBlock *new_block, Error **errp, bool shared) #ifdef __linux__ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, - bool share, int fd, + uint32_t ram_flags, int fd, Error **errp) { RAMBlock *new_block; Error *local_err = NULL; int64_t file_size; + /* Just support these ram flags by now. */ + assert((ram_flags & ~(RAM_SHARED | RAM_PMEM)) == 0); + if (xen_enabled()) { error_setg(errp, "-mem-path not supported with Xen"); return NULL; @@ -2294,14 +2277,14 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, new_block->mr = mr; new_block->used_length = size; new_block->max_length = size; - new_block->flags = share ? RAM_SHARED : 0; + new_block->flags = ram_flags; new_block->host = file_ram_alloc(new_block, size, fd, !file_size, errp); if (!new_block->host) { g_free(new_block); return NULL; } - ram_block_add(new_block, &local_err, share); + ram_block_add(new_block, &local_err, ram_flags & RAM_SHARED); if (local_err) { g_free(new_block); error_propagate(errp, local_err); @@ -2313,7 +2296,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, - bool share, const char *mem_path, + uint32_t ram_flags, const char *mem_path, Error **errp) { int fd; @@ -2325,7 +2308,7 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, return NULL; } - block = qemu_ram_alloc_from_fd(size, mr, share, fd, errp); + block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, errp); if (!block) { if (created) { unlink(mem_path); @@ -4086,6 +4069,11 @@ err: return ret; } +bool ramblock_is_pmem(RAMBlock *rb) +{ + return rb->flags & RAM_PMEM; +} + #endif void page_size_init(void) diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c index 021d1c3997..1c6674c4ed 100644 --- a/hw/mem/nvdimm.c +++ b/hw/mem/nvdimm.c @@ -23,6 +23,7 @@ */ #include "qemu/osdep.h" +#include "qemu/pmem.h" #include "qapi/error.h" #include "qapi/visitor.h" #include "hw/mem/nvdimm.h" @@ -164,11 +165,17 @@ static void nvdimm_write_label_data(NVDIMMDevice *nvdimm, const void *buf, { MemoryRegion *mr; PCDIMMDevice *dimm = PC_DIMM(nvdimm); + bool is_pmem = object_property_get_bool(OBJECT(dimm->hostmem), + "pmem", NULL); uint64_t backend_offset; nvdimm_validate_rw_label_data(nvdimm, size, offset); - memcpy(nvdimm->label_data + offset, buf, size); + if (!is_pmem) { + memcpy(nvdimm->label_data + offset, buf, size); + } else { + pmem_memcpy_persist(nvdimm->label_data + offset, buf, size); + } mr = host_memory_backend_get_memory(dimm->hostmem); backend_offset = memory_region_size(mr) - nvdimm->label_size + offset; diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c index d981de1841..3c29b68e67 100644 --- a/hw/sparc/sun4m.c +++ b/hw/sparc/sun4m.c @@ -1035,7 +1035,17 @@ static void sun4m_hw_init(const struct sun4m_hwdef *hwdef, ecc_init(hwdef->ecc_base, slavio_irq[28], hwdef->ecc_version); - fw_cfg = fw_cfg_init_mem(CFG_ADDR, CFG_ADDR + 2); + dev = qdev_create(NULL, TYPE_FW_CFG_MEM); + fw_cfg = FW_CFG(dev); + qdev_prop_set_uint32(dev, "data_width", 1); + qdev_prop_set_bit(dev, "dma_enabled", false); + object_property_add_child(OBJECT(qdev_get_machine()), TYPE_FW_CFG, + OBJECT(fw_cfg), NULL); + qdev_init_nofail(dev); + s = SYS_BUS_DEVICE(dev); + sysbus_mmio_map(s, 0, CFG_ADDR); + sysbus_mmio_map(s, 1, CFG_ADDR + 2); + fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)smp_cpus); fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus); fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size); diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c index 74b748497e..d16843b30e 100644 --- a/hw/sparc64/sun4u.c +++ b/hw/sparc64/sun4u.c @@ -139,7 +139,7 @@ static uint64_t sun4u_load_kernel(const char *kernel_filename, unsigned int i; long kernel_size; uint8_t *ptr; - uint64_t kernel_top; + uint64_t kernel_top = 0; linux_boot = (kernel_filename != NULL); @@ -172,7 +172,7 @@ static uint64_t sun4u_load_kernel(const char *kernel_filename, } /* load initrd above kernel */ *initrd_size = 0; - if (initrd_filename) { + if (initrd_filename && kernel_top) { *initrd_addr = TARGET_PAGE_ALIGN(kernel_top); *initrd_size = load_image_targphys(initrd_filename, diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h index 0f2cb717b1..41ed0526e2 100644 --- a/include/exec/cpu_ldst.h +++ b/include/exec/cpu_ldst.h @@ -48,8 +48,19 @@ #define CPU_LDST_H #if defined(CONFIG_USER_ONLY) +/* sparc32plus has 64bit long but 32bit space address + * this can make bad result with g2h() and h2g() + */ +#if TARGET_VIRT_ADDR_SPACE_BITS <= 32 +typedef uint32_t abi_ptr; +#define TARGET_ABI_FMT_ptr "%x" +#else +typedef uint64_t abi_ptr; +#define TARGET_ABI_FMT_ptr "%"PRIx64 +#endif + /* All direct uses of g2h and h2g need to go away for usermode softmmu. */ -#define g2h(x) ((void *)((unsigned long)(target_ulong)(x) + guest_base)) +#define g2h(x) ((void *)((unsigned long)(abi_ptr)(x) + guest_base)) #define guest_addr_valid(x) ((x) <= GUEST_ADDR_MAX) #define h2g_valid(x) guest_addr_valid((unsigned long)(x) - guest_base) @@ -61,7 +72,7 @@ static inline int guest_range_valid(unsigned long start, unsigned long len) #define h2g_nocheck(x) ({ \ unsigned long __ret = (unsigned long)(x) - guest_base; \ - (abi_ulong)__ret; \ + (abi_ptr)__ret; \ }) #define h2g(x) ({ \ @@ -69,7 +80,9 @@ static inline int guest_range_valid(unsigned long start, unsigned long len) assert(h2g_valid(x)); \ h2g_nocheck(x); \ }) - +#else +typedef target_ulong abi_ptr; +#define TARGET_ABI_FMT_ptr TARGET_ABI_FMT_lx #endif #if defined(CONFIG_USER_ONLY) @@ -397,7 +410,7 @@ extern __thread uintptr_t helper_retaddr; * This is the equivalent of the initial fast-path code used by * TCG backends for guest load and store accesses. */ -static inline void *tlb_vaddr_to_host(CPUArchState *env, target_ulong addr, +static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr, int access_type, int mmu_idx) { #if defined(CONFIG_USER_ONLY) @@ -405,7 +418,7 @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, target_ulong addr, #else int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); CPUTLBEntry *tlbentry = &env->tlb_table[mmu_idx][index]; - target_ulong tlb_addr; + abi_ptr tlb_addr; uintptr_t haddr; switch (access_type) { diff --git a/include/exec/cpu_ldst_useronly_template.h b/include/exec/cpu_ldst_useronly_template.h index e30e58ed4a..0fd6019af0 100644 --- a/include/exec/cpu_ldst_useronly_template.h +++ b/include/exec/cpu_ldst_useronly_template.h @@ -62,7 +62,7 @@ #endif static inline RES_TYPE -glue(glue(cpu_ld, USUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr) +glue(glue(cpu_ld, USUFFIX), MEMSUFFIX)(CPUArchState *env, abi_ptr ptr) { #if !defined(CODE_ACCESS) trace_guest_mem_before_exec( @@ -74,7 +74,7 @@ glue(glue(cpu_ld, USUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr) static inline RES_TYPE glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, - target_ulong ptr, + abi_ptr ptr, uintptr_t retaddr) { RES_TYPE ret; @@ -86,7 +86,7 @@ glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, #if DATA_SIZE <= 2 static inline int -glue(glue(cpu_lds, SUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr) +glue(glue(cpu_lds, SUFFIX), MEMSUFFIX)(CPUArchState *env, abi_ptr ptr) { #if !defined(CODE_ACCESS) trace_guest_mem_before_exec( @@ -98,7 +98,7 @@ glue(glue(cpu_lds, SUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr) static inline int glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, - target_ulong ptr, + abi_ptr ptr, uintptr_t retaddr) { int ret; @@ -111,7 +111,7 @@ glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, #ifndef CODE_ACCESS static inline void -glue(glue(cpu_st, SUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr, +glue(glue(cpu_st, SUFFIX), MEMSUFFIX)(CPUArchState *env, abi_ptr ptr, RES_TYPE v) { #if !defined(CODE_ACCESS) @@ -124,7 +124,7 @@ glue(glue(cpu_st, SUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr, static inline void glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, - target_ulong ptr, + abi_ptr ptr, RES_TYPE v, uintptr_t retaddr) { diff --git a/include/exec/memory.h b/include/exec/memory.h index 6863656182..eb4f2fb249 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -103,6 +103,29 @@ struct IOMMUNotifier { }; typedef struct IOMMUNotifier IOMMUNotifier; +/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */ +#define RAM_PREALLOC (1 << 0) + +/* RAM is mmap-ed with MAP_SHARED */ +#define RAM_SHARED (1 << 1) + +/* Only a portion of RAM (used_length) is actually used, and migrated. + * This used_length size can change across reboots. + */ +#define RAM_RESIZEABLE (1 << 2) + +/* UFFDIO_ZEROPAGE is available on this RAMBlock to atomically + * zero the page and wake waiting processes. + * (Set during postcopy) + */ +#define RAM_UF_ZEROPAGE (1 << 3) + +/* RAM can be migrated */ +#define RAM_MIGRATABLE (1 << 4) + +/* RAM is a persistent kind memory */ +#define RAM_PMEM (1 << 5) + static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn, IOMMUNotifierFlag flags, hwaddr start, hwaddr end, @@ -611,6 +634,7 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr, void *host), Error **errp); #ifdef __linux__ + /** * memory_region_init_ram_from_file: Initialize RAM memory region with a * mmap-ed backend. @@ -622,7 +646,10 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr, * @size: size of the region. * @align: alignment of the region base address; if 0, the default alignment * (getpagesize()) will be used. - * @share: %true if memory must be mmaped with the MAP_SHARED flag + * @ram_flags: Memory region features: + * - RAM_SHARED: memory must be mmaped with the MAP_SHARED flag + * - RAM_PMEM: the memory is persistent memory + * Other bits are ignored now. * @path: the path in which to allocate the RAM. * @errp: pointer to Error*, to store an error if it happens. * @@ -634,7 +661,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr, const char *name, uint64_t size, uint64_t align, - bool share, + uint32_t ram_flags, const char *path, Error **errp); diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index cf4ce06248..3abb639056 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -70,13 +70,37 @@ static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, return host_addr_offset >> TARGET_PAGE_BITS; } +bool ramblock_is_pmem(RAMBlock *rb); + long qemu_getrampagesize(void); + +/** + * qemu_ram_alloc_from_file, + * qemu_ram_alloc_from_fd: Allocate a ram block from the specified backing + * file or device + * + * Parameters: + * @size: the size in bytes of the ram block + * @mr: the memory region where the ram block is + * @ram_flags: specify the properties of the ram block, which can be one + * or bit-or of following values + * - RAM_SHARED: mmap the backing file or device with MAP_SHARED + * - RAM_PMEM: the backend @mem_path or @fd is persistent memory + * Other bits are ignored. + * @mem_path or @fd: specify the backing file or device + * @errp: pointer to Error*, to store an error if it happens + * + * Return: + * On success, return a pointer to the ram block. + * On failure, return NULL. + */ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, - bool share, const char *mem_path, + uint32_t ram_flags, const char *mem_path, Error **errp); RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, - bool share, int fd, + uint32_t ram_flags, int fd, Error **errp); + RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, MemoryRegion *mr, Error **errp); RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share, MemoryRegion *mr, diff --git a/include/qemu/pmem.h b/include/qemu/pmem.h new file mode 100644 index 0000000000..dfb6d0da62 --- /dev/null +++ b/include/qemu/pmem.h @@ -0,0 +1,36 @@ +/* + * QEMU header file for libpmem. + * + * Copyright (c) 2018 Intel Corporation. + * + * Author: Haozhong Zhang <address@hidden> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_PMEM_H +#define QEMU_PMEM_H + +#ifdef CONFIG_LIBPMEM +#include <libpmem.h> +#else /* !CONFIG_LIBPMEM */ + +static inline void * +pmem_memcpy_persist(void *pmemdest, const void *src, size_t len) +{ + /* If 'pmem' option is 'on', we should always have libpmem support, + or qemu will report a error and exit, never come here. */ + g_assert_not_reached(); + return NULL; +} + +static inline void +pmem_persist(const void *addr, size_t len) +{ + g_assert_not_reached(); +} + +#endif /* CONFIG_LIBPMEM */ + +#endif /* !QEMU_PMEM_H */ diff --git a/linux-user/syscall.c b/linux-user/syscall.c index bb42a225eb..202aa777ad 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -502,6 +502,20 @@ enum { }; enum { + QEMU_IFLA_TUN_UNSPEC, + QEMU_IFLA_TUN_OWNER, + QEMU_IFLA_TUN_GROUP, + QEMU_IFLA_TUN_TYPE, + QEMU_IFLA_TUN_PI, + QEMU_IFLA_TUN_VNET_HDR, + QEMU_IFLA_TUN_PERSIST, + QEMU_IFLA_TUN_MULTI_QUEUE, + QEMU_IFLA_TUN_NUM_QUEUES, + QEMU_IFLA_TUN_NUM_DISABLED_QUEUES, + QEMU___IFLA_TUN_MAX, +}; + +enum { QEMU_IFLA_INFO_UNSPEC, QEMU_IFLA_INFO_KIND, QEMU_IFLA_INFO_DATA, @@ -539,6 +553,40 @@ enum { QEMU___IFLA_XDP_MAX, }; +enum { + QEMU_RTA_UNSPEC, + QEMU_RTA_DST, + QEMU_RTA_SRC, + QEMU_RTA_IIF, + QEMU_RTA_OIF, + QEMU_RTA_GATEWAY, + QEMU_RTA_PRIORITY, + QEMU_RTA_PREFSRC, + QEMU_RTA_METRICS, + QEMU_RTA_MULTIPATH, + QEMU_RTA_PROTOINFO, /* no longer used */ + QEMU_RTA_FLOW, + QEMU_RTA_CACHEINFO, + QEMU_RTA_SESSION, /* no longer used */ + QEMU_RTA_MP_ALGO, /* no longer used */ + QEMU_RTA_TABLE, + QEMU_RTA_MARK, + QEMU_RTA_MFC_STATS, + QEMU_RTA_VIA, + QEMU_RTA_NEWDST, + QEMU_RTA_PREF, + QEMU_RTA_ENCAP_TYPE, + QEMU_RTA_ENCAP, + QEMU_RTA_EXPIRES, + QEMU_RTA_PAD, + QEMU_RTA_UID, + QEMU_RTA_TTL_PROPAGATE, + QEMU_RTA_IP_PROTO, + QEMU_RTA_SPORT, + QEMU_RTA_DPORT, + QEMU___RTA_MAX +}; + typedef abi_long (*TargetFdDataFunc)(void *, size_t); typedef abi_long (*TargetFdAddrFunc)(void *, abi_ulong, socklen_t); typedef struct TargetFdTrans { @@ -2315,6 +2363,34 @@ static abi_long host_to_target_slave_data_bridge_nlattr(struct nlattr *nlattr, return 0; } +static abi_long host_to_target_data_tun_nlattr(struct nlattr *nlattr, + void *context) +{ + uint32_t *u32; + + switch (nlattr->nla_type) { + /* uint8_t */ + case QEMU_IFLA_TUN_TYPE: + case QEMU_IFLA_TUN_PI: + case QEMU_IFLA_TUN_VNET_HDR: + case QEMU_IFLA_TUN_PERSIST: + case QEMU_IFLA_TUN_MULTI_QUEUE: + break; + /* uint32_t */ + case QEMU_IFLA_TUN_NUM_QUEUES: + case QEMU_IFLA_TUN_NUM_DISABLED_QUEUES: + case QEMU_IFLA_TUN_OWNER: + case QEMU_IFLA_TUN_GROUP: + u32 = NLA_DATA(nlattr); + *u32 = tswap32(*u32); + break; + default: + gemu_log("Unknown QEMU_IFLA_TUN type %d\n", nlattr->nla_type); + break; + } + return 0; +} + struct linkinfo_context { int len; char *name; @@ -2349,6 +2425,12 @@ static abi_long host_to_target_data_linkinfo_nlattr(struct nlattr *nlattr, nlattr->nla_len, NULL, host_to_target_data_bridge_nlattr); + } else if (strncmp(li_context->name, "tun", + li_context->len) == 0) { + return host_to_target_for_each_nlattr(NLA_DATA(nlattr), + nlattr->nla_len, + NULL, + host_to_target_data_tun_nlattr); } else { gemu_log("Unknown QEMU_IFLA_INFO_KIND %s\n", li_context->name); } @@ -2659,19 +2741,38 @@ static abi_long host_to_target_data_addr_rtattr(struct rtattr *rtattr) static abi_long host_to_target_data_route_rtattr(struct rtattr *rtattr) { uint32_t *u32; + struct rta_cacheinfo *ci; + switch (rtattr->rta_type) { /* binary: depends on family type */ - case RTA_GATEWAY: - case RTA_DST: - case RTA_PREFSRC: + case QEMU_RTA_GATEWAY: + case QEMU_RTA_DST: + case QEMU_RTA_PREFSRC: + break; + /* u8 */ + case QEMU_RTA_PREF: break; /* u32 */ - case RTA_PRIORITY: - case RTA_TABLE: - case RTA_OIF: + case QEMU_RTA_PRIORITY: + case QEMU_RTA_TABLE: + case QEMU_RTA_OIF: u32 = RTA_DATA(rtattr); *u32 = tswap32(*u32); break; + /* struct rta_cacheinfo */ + case QEMU_RTA_CACHEINFO: + ci = RTA_DATA(rtattr); + ci->rta_clntref = tswap32(ci->rta_clntref); + ci->rta_lastuse = tswap32(ci->rta_lastuse); + ci->rta_expires = tswap32(ci->rta_expires); + ci->rta_error = tswap32(ci->rta_error); + ci->rta_used = tswap32(ci->rta_used); +#if defined(RTNETLINK_HAVE_PEERINFO) + ci->rta_id = tswap32(ci->rta_id); + ci->rta_ts = tswap32(ci->rta_ts); + ci->rta_tsage = tswap32(ci->rta_tsage); +#endif + break; default: gemu_log("Unknown host RTA type: %d\n", rtattr->rta_type); break; @@ -2808,13 +2909,13 @@ static abi_long target_to_host_data_route_rtattr(struct rtattr *rtattr) uint32_t *u32; switch (rtattr->rta_type) { /* binary: depends on family type */ - case RTA_DST: - case RTA_SRC: - case RTA_GATEWAY: + case QEMU_RTA_DST: + case QEMU_RTA_SRC: + case QEMU_RTA_GATEWAY: break; /* u32 */ - case RTA_PRIORITY: - case RTA_OIF: + case QEMU_RTA_PRIORITY: + case QEMU_RTA_OIF: u32 = RTA_DATA(rtattr); *u32 = tswap32(*u32); break; @@ -3892,7 +3993,7 @@ static abi_long do_sendrecvmsg_locked(int fd, struct target_msghdr *msgp, len = ret; if (fd_trans_host_to_target_data(fd)) { ret = fd_trans_host_to_target_data(fd)(msg.msg_iov->iov_base, - len); + MIN(msg.msg_iov->iov_len, len)); } else { ret = host_to_target_cmsg(msgp, &msg); } @@ -4169,7 +4270,12 @@ static abi_long do_recvfrom(int fd, abi_ulong msg, size_t len, int flags, } if (!is_error(ret)) { if (fd_trans_host_to_target_data(fd)) { - ret = fd_trans_host_to_target_data(fd)(host_msg, ret); + abi_long trans; + trans = fd_trans_host_to_target_data(fd)(host_msg, MIN(ret, len)); + if (is_error(trans)) { + ret = trans; + goto fail; + } } if (target_addr) { host_to_target_sockaddr(target_addr, addr, addrlen); @@ -7644,7 +7750,7 @@ static int open_self_maps(void *cpu_env, int fd) if (h2g(min) == ts->info->stack_limit) { pstrcpy(path, sizeof(path), " [stack]"); } - dprintf(fd, TARGET_ABI_FMT_lx "-" TARGET_ABI_FMT_lx + dprintf(fd, TARGET_ABI_FMT_ptr "-" TARGET_ABI_FMT_ptr " %c%c%c%c %08" PRIx64 " %02x:%02x %d %s%s\n", h2g(min), h2g(max - 1) + 1, flag_r, flag_w, flag_x, flag_p, offset, dev_maj, dev_min, inode, diff --git a/memory.c b/memory.c index 8b44672c13..9b73892768 100644 --- a/memory.c +++ b/memory.c @@ -1551,7 +1551,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr, const char *name, uint64_t size, uint64_t align, - bool share, + uint32_t ram_flags, const char *path, Error **errp) { @@ -1560,7 +1560,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr, mr->terminates = true; mr->destructor = memory_region_destructor_ram; mr->align = align; - mr->ram_block = qemu_ram_alloc_from_file(size, mr, share, path, errp); + mr->ram_block = qemu_ram_alloc_from_file(size, mr, ram_flags, path, errp); mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; } @@ -1576,7 +1576,9 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr, mr->ram = true; mr->terminates = true; mr->destructor = memory_region_destructor_ram; - mr->ram_block = qemu_ram_alloc_from_fd(size, mr, share, fd, errp); + mr->ram_block = qemu_ram_alloc_from_fd(size, mr, + share ? RAM_SHARED : 0, + fd, errp); mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; } #endif diff --git a/migration/ram.c b/migration/ram.c index 24dea2730c..fa79d0a5b9 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -33,6 +33,7 @@ #include "qemu/bitops.h" #include "qemu/bitmap.h" #include "qemu/main-loop.h" +#include "qemu/pmem.h" #include "xbzrle.h" #include "ram.h" #include "migration.h" @@ -3547,6 +3548,13 @@ static int ram_load_setup(QEMUFile *f, void *opaque) static int ram_load_cleanup(void *opaque) { RAMBlock *rb; + + RAMBLOCK_FOREACH_MIGRATABLE(rb) { + if (ramblock_is_pmem(rb)) { + pmem_persist(rb->host, rb->used_length); + } + } + xbzrle_load_cleanup(); compress_threads_load_cleanup(); @@ -3906,6 +3914,15 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) static bool ram_has_postcopy(void *opaque) { + RAMBlock *rb; + RAMBLOCK_FOREACH_MIGRATABLE(rb) { + if (ramblock_is_pmem(rb)) { + info_report("Block: %s, host: %p is a nvdimm memory, postcopy" + "is not supported now!", rb->idstr, rb->host); + return false; + } + } + return migrate_postcopy_ram(); } diff --git a/numa.c b/numa.c index 5f6367b989..81542d4ebb 100644 --- a/numa.c +++ b/numa.c @@ -479,7 +479,7 @@ static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner, if (mem_path) { #ifdef __linux__ Error *err = NULL; - memory_region_init_ram_from_file(mr, owner, name, ram_size, 0, false, + memory_region_init_ram_from_file(mr, owner, name, ram_size, 0, 0, mem_path, &err); if (err) { error_report_err(err); diff --git a/qemu-options.hx b/qemu-options.hx index 4efdedfdbb..5515dfaba5 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -4048,6 +4048,13 @@ requires an alignment different than the default one used by QEMU, eg the device DAX /dev/dax0.0 requires 2M alignment rather than 4K. In such cases, users can specify the required alignment via this option. +The @option{pmem} option specifies whether the backing file specified +by @option{mem-path} is in host persistent memory that can be accessed +using the SNIA NVM programming model (e.g. Intel NVDIMM). +If @option{pmem} is set to 'on', QEMU will take necessary operations to +guarantee the persistence of its own writes to @option{mem-path} +(e.g. in vNVDIMM label emulation and live migration). + @item -object memory-backend-ram,id=@var{id},merge=@var{on|off},dump=@var{on|off},share=@var{on|off},prealloc=@var{on|off},size=@var{size},host-nodes=@var{host-nodes},policy=@var{default|preferred|bind|interleave} Creates a memory backend object, which can be used to back the guest RAM. diff --git a/scripts/qemu-binfmt-conf.sh b/scripts/qemu-binfmt-conf.sh index b0dc8a714a..b5a16742a1 100755 --- a/scripts/qemu-binfmt-conf.sh +++ b/scripts/qemu-binfmt-conf.sh @@ -4,7 +4,7 @@ qemu_target_list="i386 i486 alpha arm armeb sparc32plus ppc ppc64 ppc64le m68k \ mips mipsel mipsn32 mipsn32el mips64 mips64el \ sh4 sh4eb s390x aarch64 aarch64_be hppa riscv32 riscv64 xtensa xtensaeb \ -microblaze microblazeel or1k" +microblaze microblazeel or1k x86_64" i386_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x03\x00' i386_mask='\xff\xff\xff\xff\xff\xfe\xfe\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' @@ -14,6 +14,10 @@ i486_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x06\ i486_mask='\xff\xff\xff\xff\xff\xfe\xfe\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' i486_family=i386 +x86_64_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x3e\x00' +x86_64_mask='\xff\xff\xff\xff\xff\xfe\xfe\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' +x86_64_family=i386 + alpha_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x26\x90' alpha_mask='\xff\xff\xff\xff\xff\xfe\xfe\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff' alpha_family=alpha diff --git a/target/sh4/translate.c b/target/sh4/translate.c index 1b9a201d6d..ab254b0e8d 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -293,6 +293,7 @@ static void gen_conditional_jump(DisasContext *ctx, target_ulong dest, disallow it in use_goto_tb, but it handles exit + singlestep. */ gen_goto_tb(ctx, 0, dest); gen_set_label(l1); + ctx->base.is_jmp = DISAS_NEXT; return; } |