summary refs log tree commit diff stats
path: root/exec.c
diff options
context:
space:
mode:
authorMarcelo Tosatti <mtosatti@redhat.com>2010-03-01 20:25:08 -0300
committerMarcelo Tosatti <mtosatti@redhat.com>2010-03-04 00:28:47 -0300
commitc902760fb25f9c490af01e8f6bccaa8dd71cc224 (patch)
treeebf40c2fba4b138eee6721d011f9f82040a6e565 /exec.c
parent60e4c6317b8773d987729401aeca9d8c6b61b05f (diff)
downloadfocaccia-qemu-c902760fb25f9c490af01e8f6bccaa8dd71cc224.tar.gz
focaccia-qemu-c902760fb25f9c490af01e8f6bccaa8dd71cc224.zip
Add option to use file backed guest memory
Port qemu-kvm's -mem-path and -mem-prealloc options. These are useful
for backing guest memory with huge pages via hugetlbfs.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
CC: john cooper <john.cooper@redhat.com>
Diffstat (limited to 'exec.c')
-rw-r--r--exec.c115
1 files changed, 110 insertions, 5 deletions
diff --git a/exec.c b/exec.c
index 6a3c912b7f..f41518e8a3 100644
--- a/exec.c
+++ b/exec.c
@@ -2529,6 +2529,99 @@ void qemu_flush_coalesced_mmio_buffer(void)
         kvm_flush_coalesced_mmio_buffer();
 }
 
+#if defined(__linux__) && !defined(TARGET_S390X)
+
+#include <sys/vfs.h>
+
+#define HUGETLBFS_MAGIC       0x958458f6
+
+static long gethugepagesize(const char *path)
+{
+    struct statfs fs;
+    int ret;
+
+    do {
+	    ret = statfs(path, &fs);
+    } while (ret != 0 && errno == EINTR);
+
+    if (ret != 0) {
+	    perror("statfs");
+	    return 0;
+    }
+
+    if (fs.f_type != HUGETLBFS_MAGIC)
+	    fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
+
+    return fs.f_bsize;
+}
+
+static void *file_ram_alloc(ram_addr_t memory, const char *path)
+{
+    char *filename;
+    void *area;
+    int fd;
+#ifdef MAP_POPULATE
+    int flags;
+#endif
+    unsigned long hpagesize;
+
+    hpagesize = gethugepagesize(path);
+    if (!hpagesize) {
+	return NULL;
+    }
+
+    if (memory < hpagesize) {
+        return NULL;
+    }
+
+    if (kvm_enabled() && !kvm_has_sync_mmu()) {
+        fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
+        return NULL;
+    }
+
+    if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
+	return NULL;
+    }
+
+    fd = mkstemp(filename);
+    if (fd < 0) {
+	perror("mkstemp");
+	free(filename);
+	return NULL;
+    }
+    unlink(filename);
+    free(filename);
+
+    memory = (memory+hpagesize-1) & ~(hpagesize-1);
+
+    /*
+     * ftruncate is not supported by hugetlbfs in older
+     * hosts, so don't bother bailing out on errors.
+     * If anything goes wrong with it under other filesystems,
+     * mmap will fail.
+     */
+    if (ftruncate(fd, memory))
+	perror("ftruncate");
+
+#ifdef MAP_POPULATE
+    /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
+     * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
+     * to sidestep this quirk.
+     */
+    flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
+    area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
+#else
+    area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+#endif
+    if (area == MAP_FAILED) {
+	perror("file_ram_alloc: can't mmap RAM pages");
+	close(fd);
+	return (NULL);
+    }
+    return area;
+}
+#endif
+
 ram_addr_t qemu_ram_alloc(ram_addr_t size)
 {
     RAMBlock *new_block;
@@ -2536,16 +2629,28 @@ ram_addr_t qemu_ram_alloc(ram_addr_t size)
     size = TARGET_PAGE_ALIGN(size);
     new_block = qemu_malloc(sizeof(*new_block));
 
+    if (mem_path) {
+#if defined (__linux__) && !defined(TARGET_S390X)
+        new_block->host = file_ram_alloc(size, mem_path);
+        if (!new_block->host)
+            exit(1);
+#else
+        fprintf(stderr, "-mem-path option unsupported\n");
+        exit(1);
+#endif
+    } else {
 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
-    /* XXX S390 KVM requires the topmost vma of the RAM to be < 256GB */
-    new_block->host = mmap((void*)0x1000000, size, PROT_EXEC|PROT_READ|PROT_WRITE,
-                           MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+        /* XXX S390 KVM requires the topmost vma of the RAM to be < 256GB */
+        new_block->host = mmap((void*)0x1000000, size,
+                                PROT_EXEC|PROT_READ|PROT_WRITE,
+                                MAP_SHARED | MAP_ANONYMOUS, -1, 0);
 #else
-    new_block->host = qemu_vmalloc(size);
+        new_block->host = qemu_vmalloc(size);
 #endif
 #ifdef MADV_MERGEABLE
-    madvise(new_block->host, size, MADV_MERGEABLE);
+        madvise(new_block->host, size, MADV_MERGEABLE);
 #endif
+    }
     new_block->offset = last_ram_offset;
     new_block->length = size;