From d5b93ddfefe63d5869a8eb97ea3474867d3b105b Mon Sep 17 00:00:00 2001 From: Anthony PERARD Date: Fri, 25 Feb 2011 16:20:34 +0000 Subject: xen: Support new libxc calls from xen unstable. This patch updates the libxenctrl calls in Qemu to use the new interface, otherwise Qemu wouldn't be able to build against new versions of the library. We check libxenctrl version in configure, from Xen 3.3.0 to Xen unstable. Signed-off-by: Anthony PERARD Signed-off-by: Stefano Stabellini Acked-by: Alexander Graf Signed-off-by: Alexander Graf --- hw/xen_common.h | 95 ++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 81 insertions(+), 14 deletions(-) (limited to 'hw/xen_common.h') diff --git a/hw/xen_common.h b/hw/xen_common.h index 8a55b44f0b..47b8afea02 100644 --- a/hw/xen_common.h +++ b/hw/xen_common.h @@ -1,6 +1,8 @@ #ifndef QEMU_HW_XEN_COMMON_H #define QEMU_HW_XEN_COMMON_H 1 +#include "config-host.h" + #include #include @@ -13,22 +15,87 @@ #include "qemu-queue.h" /* - * tweaks needed to build with different xen versions - * 0x00030205 -> 3.1.0 - * 0x00030207 -> 3.2.0 - * 0x00030208 -> unstable + * We don't support Xen prior to 3.3.0. */ -#include -#if __XEN_LATEST_INTERFACE_VERSION__ < 0x00030205 -# define evtchn_port_or_error_t int -#endif -#if __XEN_LATEST_INTERFACE_VERSION__ < 0x00030207 -# define xc_map_foreign_pages xc_map_foreign_batch + +/* Xen before 4.0 */ +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 400 +static inline void *xc_map_foreign_bulk(int xc_handle, uint32_t dom, int prot, + xen_pfn_t *arr, int *err, + unsigned int num) +{ + return xc_map_foreign_batch(xc_handle, dom, prot, arr, num); +} #endif -#if __XEN_LATEST_INTERFACE_VERSION__ < 0x00030208 -# define xen_mb() mb() -# define xen_rmb() rmb() -# define xen_wmb() wmb() + + +/* Xen before 4.1 */ +#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 410 + +typedef int XenXC; +typedef int XenEvtchn; +typedef int XenGnttab; + +# define XC_INTERFACE_FMT "%i" +# define XC_HANDLER_INITIAL_VALUE -1 + +static inline XenEvtchn xen_xc_evtchn_open(void *logger, + unsigned int open_flags) +{ + return xc_evtchn_open(); +} + +static inline XenGnttab xen_xc_gnttab_open(void *logger, + unsigned int open_flags) +{ + return xc_gnttab_open(); +} + +static inline XenXC xen_xc_interface_open(void *logger, void *dombuild_logger, + unsigned int open_flags) +{ + return xc_interface_open(); +} + +static inline int xc_fd(int xen_xc) +{ + return xen_xc; +} + + +/* Xen 4.1 */ +#else + +typedef xc_interface *XenXC; +typedef xc_evtchn *XenEvtchn; +typedef xc_gnttab *XenGnttab; + +# define XC_INTERFACE_FMT "%p" +# define XC_HANDLER_INITIAL_VALUE NULL + +static inline XenEvtchn xen_xc_evtchn_open(void *logger, + unsigned int open_flags) +{ + return xc_evtchn_open(logger, open_flags); +} + +static inline XenGnttab xen_xc_gnttab_open(void *logger, + unsigned int open_flags) +{ + return xc_gnttab_open(logger, open_flags); +} + +static inline XenXC xen_xc_interface_open(void *logger, void *dombuild_logger, + unsigned int open_flags) +{ + return xc_interface_open(logger, dombuild_logger, open_flags); +} + +/* FIXME There is now way to have the xen fd */ +static inline int xc_fd(xc_interface *xen_xc) +{ + return -1; +} #endif #endif /* QEMU_HW_XEN_COMMON_H */ -- cgit 1.4.1 From 432d268c0552fd30c8be564f7ea2504a2b546101 Mon Sep 17 00:00:00 2001 From: Jun Nakajima Date: Tue, 31 Aug 2010 16:41:25 +0100 Subject: xen: Introduce the Xen mapcache On IA32 host or IA32 PAE host, at present, generally, we can't create an HVM guest with more than 2G memory, because generally it's almost impossible for Qemu to find a large enough and consecutive virtual address space to map an HVM guest's whole physical address space. The attached patch fixes this issue using dynamic mapping based on little blocks of memory. Each call to qemu_get_ram_ptr makes a call to qemu_map_cache with the lock option, so mapcache will not unmap these ram_ptr. Blocks that do not belong to the RAM, but usually to a device ROM or to a framebuffer, are handled in a separate function. So the whole RAMBlock can be map. Signed-off-by: Jun Nakajima Signed-off-by: Anthony PERARD Signed-off-by: Stefano Stabellini Signed-off-by: Alexander Graf --- Makefile.target | 3 + configure | 3 + exec.c | 48 +++++++- hw/xen.h | 13 ++ hw/xen_common.h | 9 ++ trace-events | 10 ++ xen-all.c | 66 ++++++++++ xen-mapcache-stub.c | 44 +++++++ xen-mapcache.c | 349 ++++++++++++++++++++++++++++++++++++++++++++++++++++ xen-mapcache.h | 37 ++++++ xen-stub.c | 4 + 11 files changed, 582 insertions(+), 4 deletions(-) create mode 100644 xen-mapcache-stub.c create mode 100644 xen-mapcache.c create mode 100644 xen-mapcache.h (limited to 'hw/xen_common.h') diff --git a/Makefile.target b/Makefile.target index fd84d467da..2e281a4588 100644 --- a/Makefile.target +++ b/Makefile.target @@ -214,8 +214,11 @@ else CONFIG_NO_XEN = y endif # xen support +CONFIG_NO_XEN_MAPCACHE = $(if $(subst n,,$(CONFIG_XEN_MAPCACHE)),n,y) obj-i386-$(CONFIG_XEN) += xen-all.o obj-$(CONFIG_NO_XEN) += xen-stub.o +obj-i386-$(CONFIG_XEN_MAPCACHE) += xen-mapcache.o +obj-$(CONFIG_NO_XEN_MAPCACHE) += xen-mapcache-stub.o # Inter-VM PCI shared memory CONFIG_IVSHMEM = diff --git a/configure b/configure index 5df84d24eb..6fc2bddc12 100755 --- a/configure +++ b/configure @@ -3299,6 +3299,9 @@ case "$target_arch2" in i386|x86_64) if test "$xen" = "yes" -a "$target_softmmu" = "yes" ; then echo "CONFIG_XEN=y" >> $config_target_mak + if test "$cpu" = "i386" -o "$cpu" = "x86_64"; then + echo "CONFIG_XEN_MAPCACHE=y" >> $config_target_mak + fi fi esac case "$target_arch2" in diff --git a/exec.c b/exec.c index 308a86dcc0..8c81cffaa7 100644 --- a/exec.c +++ b/exec.c @@ -32,6 +32,7 @@ #include "hw/qdev.h" #include "osdep.h" #include "kvm.h" +#include "hw/xen.h" #include "qemu-timer.h" #if defined(CONFIG_USER_ONLY) #include @@ -51,6 +52,8 @@ #include #endif #endif +#else /* !CONFIG_USER_ONLY */ +#include "xen-mapcache.h" #endif //#define DEBUG_TB_INVALIDATE @@ -2889,6 +2892,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name, } } + new_block->offset = find_ram_offset(size); if (host) { new_block->host = host; new_block->flags |= RAM_PREALLOC_MASK; @@ -2911,13 +2915,15 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name, PROT_EXEC|PROT_READ|PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); #else - new_block->host = qemu_vmalloc(size); + if (xen_mapcache_enabled()) { + xen_ram_alloc(new_block->offset, size); + } else { + new_block->host = qemu_vmalloc(size); + } #endif qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE); } } - - new_block->offset = find_ram_offset(size); new_block->length = size; QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next); @@ -2962,7 +2968,11 @@ void qemu_ram_free(ram_addr_t addr) #if defined(TARGET_S390X) && defined(CONFIG_KVM) munmap(block->host, block->length); #else - qemu_vfree(block->host); + if (xen_mapcache_enabled()) { + qemu_invalidate_entry(block->host); + } else { + qemu_vfree(block->host); + } #endif } qemu_free(block); @@ -3051,6 +3061,16 @@ void *qemu_get_ram_ptr(ram_addr_t addr) QLIST_REMOVE(block, next); QLIST_INSERT_HEAD(&ram_list.blocks, block, next); } + if (xen_mapcache_enabled()) { + /* We need to check if the requested address is in the RAM + * because we don't want to map the entire memory in QEMU. + */ + if (block->offset == 0) { + return qemu_map_cache(addr, 0, 1); + } else if (block->host == NULL) { + block->host = xen_map_block(block->offset, block->length); + } + } return block->host + (addr - block->offset); } } @@ -3070,6 +3090,16 @@ void *qemu_safe_ram_ptr(ram_addr_t addr) QLIST_FOREACH(block, &ram_list.blocks, next) { if (addr - block->offset < block->length) { + if (xen_mapcache_enabled()) { + /* We need to check if the requested address is in the RAM + * because we don't want to map the entire memory in QEMU. + */ + if (block->offset == 0) { + return qemu_map_cache(addr, 0, 1); + } else if (block->host == NULL) { + block->host = xen_map_block(block->offset, block->length); + } + } return block->host + (addr - block->offset); } } @@ -3086,11 +3116,21 @@ int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr) uint8_t *host = ptr; QLIST_FOREACH(block, &ram_list.blocks, next) { + /* This case append when the block is not mapped. */ + if (block->host == NULL) { + continue; + } if (host - block->host < block->length) { *ram_addr = block->offset + (host - block->host); return 0; } } + + if (xen_mapcache_enabled()) { + *ram_addr = qemu_ram_addr_from_mapcache(ptr); + return 0; + } + return -1; } diff --git a/hw/xen.h b/hw/xen.h index 9f00c0bfed..6245b38153 100644 --- a/hw/xen.h +++ b/hw/xen.h @@ -31,6 +31,15 @@ static inline int xen_enabled(void) #endif } +static inline int xen_mapcache_enabled(void) +{ +#ifdef CONFIG_XEN_MAPCACHE + return xen_enabled(); +#else + return 0; +#endif +} + int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num); void xen_piix3_set_irq(void *opaque, int irq_num, int level); void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len); @@ -41,6 +50,10 @@ int xen_init(void); int xen_hvm_init(void); void xen_vcpu_init(void); +#if defined(NEED_CPU_H) && !defined(CONFIG_USER_ONLY) +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size); +#endif + #if defined(CONFIG_XEN) && CONFIG_XEN_CTRL_INTERFACE_VERSION < 400 # define HVM_MAX_VCPUS 32 #endif diff --git a/hw/xen_common.h b/hw/xen_common.h index 47b8afea02..dd3e896e94 100644 --- a/hw/xen_common.h +++ b/hw/xen_common.h @@ -63,6 +63,15 @@ static inline int xc_fd(int xen_xc) } +static inline int xc_domain_populate_physmap_exact + (XenXC xc_handle, uint32_t domid, unsigned long nr_extents, + unsigned int extent_order, unsigned int mem_flags, xen_pfn_t *extent_start) +{ + return xc_domain_memory_populate_physmap + (xc_handle, domid, nr_extents, extent_order, mem_flags, extent_start); +} + + /* Xen 4.1 */ #else diff --git a/trace-events b/trace-events index 4f965e2ebd..d703347d40 100644 --- a/trace-events +++ b/trace-events @@ -361,3 +361,13 @@ disable milkymist_uart_pulse_irq_tx(void) "Pulse IRQ TX" # hw/milkymist-vgafb.c disable milkymist_vgafb_memory_read(uint32_t addr, uint32_t value) "addr %08x value %08x" disable milkymist_vgafb_memory_write(uint32_t addr, uint32_t value) "addr %08x value %08x" + +# xen-all.c +disable xen_ram_alloc(unsigned long ram_addr, unsigned long size) "requested: %#lx, size %#lx" + +# xen-mapcache.c +disable qemu_map_cache(uint64_t phys_addr) "want %#"PRIx64"" +disable qemu_remap_bucket(uint64_t index) "index %#"PRIx64"" +disable qemu_map_cache_return(void* ptr) "%p" +disable xen_map_block(uint64_t phys_addr, uint64_t size) "%#"PRIx64", size %#"PRIx64"" +disable xen_unmap_block(void* addr, unsigned long size) "%p, size %#lx" diff --git a/xen-all.c b/xen-all.c index bb809eff16..cb01ab9aad 100644 --- a/xen-all.c +++ b/xen-all.c @@ -10,6 +10,9 @@ #include "hw/xen_common.h" #include "hw/xen_backend.h" +#include "xen-mapcache.h" +#include "trace.h" + /* Xen specific function for piix pci */ int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num) @@ -52,6 +55,65 @@ qemu_irq *xen_interrupt_controller_init(void) return qemu_allocate_irqs(xen_set_irq, NULL, 16); } +/* Memory Ops */ + +static void xen_ram_init(ram_addr_t ram_size) +{ + RAMBlock *new_block; + ram_addr_t below_4g_mem_size, above_4g_mem_size = 0; + + new_block = qemu_mallocz(sizeof (*new_block)); + pstrcpy(new_block->idstr, sizeof (new_block->idstr), "xen.ram"); + new_block->host = NULL; + new_block->offset = 0; + new_block->length = ram_size; + + QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next); + + ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty, + new_block->length >> TARGET_PAGE_BITS); + memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS), + 0xff, new_block->length >> TARGET_PAGE_BITS); + + if (ram_size >= 0xe0000000 ) { + above_4g_mem_size = ram_size - 0xe0000000; + below_4g_mem_size = 0xe0000000; + } else { + below_4g_mem_size = ram_size; + } + + cpu_register_physical_memory(0, below_4g_mem_size, new_block->offset); +#if TARGET_PHYS_ADDR_BITS > 32 + if (above_4g_mem_size > 0) { + cpu_register_physical_memory(0x100000000ULL, above_4g_mem_size, + new_block->offset + below_4g_mem_size); + } +#endif +} + +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size) +{ + unsigned long nr_pfn; + xen_pfn_t *pfn_list; + int i; + + trace_xen_ram_alloc(ram_addr, size); + + nr_pfn = size >> TARGET_PAGE_BITS; + pfn_list = qemu_malloc(sizeof (*pfn_list) * nr_pfn); + + for (i = 0; i < nr_pfn; i++) { + pfn_list[i] = (ram_addr >> TARGET_PAGE_BITS) + i; + } + + if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) { + hw_error("xen: failed to populate ram at %lx", ram_addr); + } + + qemu_free(pfn_list); +} + + /* VCPU Operations, MMIO, IO ring ... */ static void xen_reset_vcpu(void *opaque) @@ -86,5 +148,9 @@ int xen_init(void) int xen_hvm_init(void) { + /* Init RAM management */ + qemu_map_cache_init(); + xen_ram_init(ram_size); + return 0; } diff --git a/xen-mapcache-stub.c b/xen-mapcache-stub.c new file mode 100644 index 0000000000..7c14b3d141 --- /dev/null +++ b/xen-mapcache-stub.c @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2011 Citrix Ltd. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include "config.h" + +#include "exec-all.h" +#include "qemu-common.h" +#include "cpu-common.h" +#include "xen-mapcache.h" + +void qemu_map_cache_init(void) +{ +} + +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, uint8_t lock) +{ + return qemu_get_ram_ptr(phys_addr); +} + +void qemu_map_cache_unlock(void *buffer) +{ +} + +ram_addr_t qemu_ram_addr_from_mapcache(void *ptr) +{ + return -1; +} + +void qemu_invalidate_map_cache(void) +{ +} + +void qemu_invalidate_entry(uint8_t *buffer) +{ +} +uint8_t *xen_map_block(target_phys_addr_t phys_addr, target_phys_addr_t size) +{ + return NULL; +} diff --git a/xen-mapcache.c b/xen-mapcache.c new file mode 100644 index 0000000000..a539358fb7 --- /dev/null +++ b/xen-mapcache.c @@ -0,0 +1,349 @@ +/* + * Copyright (C) 2011 Citrix Ltd. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include "config.h" + +#include + +#include "hw/xen_backend.h" +#include "blockdev.h" + +#include +#include + +#include "xen-mapcache.h" +#include "trace.h" + + +//#define MAPCACHE_DEBUG + +#ifdef MAPCACHE_DEBUG +# define DPRINTF(fmt, ...) do { \ + fprintf(stderr, "xen_mapcache: " fmt, ## __VA_ARGS__); \ +} while (0) +#else +# define DPRINTF(fmt, ...) do { } while (0) +#endif + +#if defined(__i386__) +# define MCACHE_BUCKET_SHIFT 16 +#elif defined(__x86_64__) +# define MCACHE_BUCKET_SHIFT 20 +#endif +#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT) + +#define BITS_PER_LONG (sizeof(long) * 8) +#define BITS_TO_LONGS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG) +#define DECLARE_BITMAP(name, bits) unsigned long name[BITS_TO_LONGS(bits)] + +typedef struct MapCacheEntry { + target_phys_addr_t paddr_index; + uint8_t *vaddr_base; + DECLARE_BITMAP(valid_mapping, MCACHE_BUCKET_SIZE >> XC_PAGE_SHIFT); + uint8_t lock; + struct MapCacheEntry *next; +} MapCacheEntry; + +typedef struct MapCacheRev { + uint8_t *vaddr_req; + target_phys_addr_t paddr_index; + QTAILQ_ENTRY(MapCacheRev) next; +} MapCacheRev; + +typedef struct MapCache { + MapCacheEntry *entry; + unsigned long nr_buckets; + QTAILQ_HEAD(map_cache_head, MapCacheRev) locked_entries; + + /* For most cases (>99.9%), the page address is the same. */ + target_phys_addr_t last_address_index; + uint8_t *last_address_vaddr; + unsigned long max_mcache_size; + unsigned int mcache_bucket_shift; +} MapCache; + +static MapCache *mapcache; + +static inline int test_bit(unsigned int bit, const unsigned long *map) +{ + return !!((map)[(bit) / BITS_PER_LONG] & (1UL << ((bit) % BITS_PER_LONG))); +} + +void qemu_map_cache_init(void) +{ + unsigned long size; + struct rlimit rlimit_as; + + mapcache = qemu_mallocz(sizeof (MapCache)); + + QTAILQ_INIT(&mapcache->locked_entries); + mapcache->last_address_index = -1; + + getrlimit(RLIMIT_AS, &rlimit_as); + rlimit_as.rlim_cur = rlimit_as.rlim_max; + setrlimit(RLIMIT_AS, &rlimit_as); + mapcache->max_mcache_size = rlimit_as.rlim_max; + + mapcache->nr_buckets = + (((mapcache->max_mcache_size >> XC_PAGE_SHIFT) + + (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >> + (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)); + + size = mapcache->nr_buckets * sizeof (MapCacheEntry); + size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1); + DPRINTF("qemu_map_cache_init, nr_buckets = %lx size %lu\n", mapcache->nr_buckets, size); + mapcache->entry = qemu_mallocz(size); +} + +static void qemu_remap_bucket(MapCacheEntry *entry, + target_phys_addr_t size, + target_phys_addr_t address_index) +{ + uint8_t *vaddr_base; + xen_pfn_t *pfns; + int *err; + unsigned int i, j; + target_phys_addr_t nb_pfn = size >> XC_PAGE_SHIFT; + + trace_qemu_remap_bucket(address_index); + + pfns = qemu_mallocz(nb_pfn * sizeof (xen_pfn_t)); + err = qemu_mallocz(nb_pfn * sizeof (int)); + + if (entry->vaddr_base != NULL) { + if (munmap(entry->vaddr_base, size) != 0) { + perror("unmap fails"); + exit(-1); + } + } + + for (i = 0; i < nb_pfn; i++) { + pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i; + } + + vaddr_base = xc_map_foreign_bulk(xen_xc, xen_domid, PROT_READ|PROT_WRITE, + pfns, err, nb_pfn); + if (vaddr_base == NULL) { + perror("xc_map_foreign_bulk"); + exit(-1); + } + + entry->vaddr_base = vaddr_base; + entry->paddr_index = address_index; + + for (i = 0; i < nb_pfn; i += BITS_PER_LONG) { + unsigned long word = 0; + if ((i + BITS_PER_LONG) > nb_pfn) { + j = nb_pfn % BITS_PER_LONG; + } else { + j = BITS_PER_LONG; + } + while (j > 0) { + word = (word << 1) | !err[i + --j]; + } + entry->valid_mapping[i / BITS_PER_LONG] = word; + } + + qemu_free(pfns); + qemu_free(err); +} + +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, uint8_t lock) +{ + MapCacheEntry *entry, *pentry = NULL; + target_phys_addr_t address_index = phys_addr >> MCACHE_BUCKET_SHIFT; + target_phys_addr_t address_offset = phys_addr & (MCACHE_BUCKET_SIZE - 1); + + trace_qemu_map_cache(phys_addr); + + if (address_index == mapcache->last_address_index && !lock) { + trace_qemu_map_cache_return(mapcache->last_address_vaddr + address_offset); + return mapcache->last_address_vaddr + address_offset; + } + + entry = &mapcache->entry[address_index % mapcache->nr_buckets]; + + while (entry && entry->lock && entry->paddr_index != address_index && entry->vaddr_base) { + pentry = entry; + entry = entry->next; + } + if (!entry) { + entry = qemu_mallocz(sizeof (MapCacheEntry)); + pentry->next = entry; + qemu_remap_bucket(entry, size ? : MCACHE_BUCKET_SIZE, address_index); + } else if (!entry->lock) { + if (!entry->vaddr_base || entry->paddr_index != address_index || + !test_bit(address_offset >> XC_PAGE_SHIFT, entry->valid_mapping)) { + qemu_remap_bucket(entry, size ? : MCACHE_BUCKET_SIZE, address_index); + } + } + + if (!test_bit(address_offset >> XC_PAGE_SHIFT, entry->valid_mapping)) { + mapcache->last_address_index = -1; + trace_qemu_map_cache_return(NULL); + return NULL; + } + + mapcache->last_address_index = address_index; + mapcache->last_address_vaddr = entry->vaddr_base; + if (lock) { + MapCacheRev *reventry = qemu_mallocz(sizeof(MapCacheRev)); + entry->lock++; + reventry->vaddr_req = mapcache->last_address_vaddr + address_offset; + reventry->paddr_index = mapcache->last_address_index; + QTAILQ_INSERT_HEAD(&mapcache->locked_entries, reventry, next); + } + + trace_qemu_map_cache_return(mapcache->last_address_vaddr + address_offset); + return mapcache->last_address_vaddr + address_offset; +} + +ram_addr_t qemu_ram_addr_from_mapcache(void *ptr) +{ + MapCacheRev *reventry; + target_phys_addr_t paddr_index; + int found = 0; + + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + if (reventry->vaddr_req == ptr) { + paddr_index = reventry->paddr_index; + found = 1; + break; + } + } + if (!found) { + fprintf(stderr, "qemu_ram_addr_from_mapcache, could not find %p\n", ptr); + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + DPRINTF(" "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index, + reventry->vaddr_req); + } + abort(); + return 0; + } + + return paddr_index << MCACHE_BUCKET_SHIFT; +} + +void qemu_invalidate_entry(uint8_t *buffer) +{ + MapCacheEntry *entry = NULL, *pentry = NULL; + MapCacheRev *reventry; + target_phys_addr_t paddr_index; + int found = 0; + + if (mapcache->last_address_vaddr == buffer) { + mapcache->last_address_index = -1; + } + + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + if (reventry->vaddr_req == buffer) { + paddr_index = reventry->paddr_index; + found = 1; + break; + } + } + if (!found) { + DPRINTF("qemu_invalidate_entry, could not find %p\n", buffer); + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + DPRINTF(" "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index, reventry->vaddr_req); + } + return; + } + QTAILQ_REMOVE(&mapcache->locked_entries, reventry, next); + qemu_free(reventry); + + entry = &mapcache->entry[paddr_index % mapcache->nr_buckets]; + while (entry && entry->paddr_index != paddr_index) { + pentry = entry; + entry = entry->next; + } + if (!entry) { + DPRINTF("Trying to unmap address %p that is not in the mapcache!\n", buffer); + return; + } + entry->lock--; + if (entry->lock > 0 || pentry == NULL) { + return; + } + + pentry->next = entry->next; + if (munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE) != 0) { + perror("unmap fails"); + exit(-1); + } + qemu_free(entry); +} + +void qemu_invalidate_map_cache(void) +{ + unsigned long i; + MapCacheRev *reventry; + + /* Flush pending AIO before destroying the mapcache */ + qemu_aio_flush(); + + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + DPRINTF("There should be no locked mappings at this time, " + "but "TARGET_FMT_plx" -> %p is present\n", + reventry->paddr_index, reventry->vaddr_req); + } + + mapcache_lock(); + + for (i = 0; i < mapcache->nr_buckets; i++) { + MapCacheEntry *entry = &mapcache->entry[i]; + + if (entry->vaddr_base == NULL) { + continue; + } + + if (munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE) != 0) { + perror("unmap fails"); + exit(-1); + } + + entry->paddr_index = 0; + entry->vaddr_base = NULL; + } + + mapcache->last_address_index = -1; + mapcache->last_address_vaddr = NULL; + + mapcache_unlock(); +} + +uint8_t *xen_map_block(target_phys_addr_t phys_addr, target_phys_addr_t size) +{ + uint8_t *vaddr_base; + xen_pfn_t *pfns; + int *err; + unsigned int i; + target_phys_addr_t nb_pfn = size >> XC_PAGE_SHIFT; + + trace_xen_map_block(phys_addr, size); + phys_addr >>= XC_PAGE_SHIFT; + + pfns = qemu_mallocz(nb_pfn * sizeof (xen_pfn_t)); + err = qemu_mallocz(nb_pfn * sizeof (int)); + + for (i = 0; i < nb_pfn; i++) { + pfns[i] = phys_addr + i; + } + + vaddr_base = xc_map_foreign_bulk(xen_xc, xen_domid, PROT_READ|PROT_WRITE, + pfns, err, nb_pfn); + if (vaddr_base == NULL) { + perror("xc_map_foreign_bulk"); + exit(-1); + } + + qemu_free(pfns); + qemu_free(err); + + return vaddr_base; +} diff --git a/xen-mapcache.h b/xen-mapcache.h new file mode 100644 index 0000000000..339444c94e --- /dev/null +++ b/xen-mapcache.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2011 Citrix Ltd. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef XEN_MAPCACHE_H +#define XEN_MAPCACHE_H + +#include +#include "trace.h" + +void qemu_map_cache_init(void); +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, uint8_t lock); +void qemu_map_cache_unlock(void *phys_addr); +ram_addr_t qemu_ram_addr_from_mapcache(void *ptr); +void qemu_invalidate_entry(uint8_t *buffer); +void qemu_invalidate_map_cache(void); + +uint8_t *xen_map_block(target_phys_addr_t phys_addr, target_phys_addr_t size); + +static inline void xen_unmap_block(void *addr, ram_addr_t size) +{ + trace_xen_unmap_block(addr, size); + + if (munmap(addr, size) != 0) { + hw_error("xen_unmap_block: %s", strerror(errno)); + } +} + + +#define mapcache_lock() ((void)0) +#define mapcache_unlock() ((void)0) + +#endif /* !XEN_MAPCACHE_H */ diff --git a/xen-stub.c b/xen-stub.c index 3a8449c148..8d2fa54b4b 100644 --- a/xen-stub.c +++ b/xen-stub.c @@ -22,6 +22,10 @@ void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len) { } +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size) +{ +} + qemu_irq *xen_interrupt_controller_init(void) { return NULL; -- cgit 1.4.1 From 9ce94e7c8a997472d79879e687d5ceaa14eca944 Mon Sep 17 00:00:00 2001 From: Arun Sharma Date: Mon, 6 Sep 2010 20:07:14 +0100 Subject: xen: Initialize event channels and io rings Open and bind event channels; map ioreq and buffered ioreq rings. Signed-off-by: Arun Sharma Signed-off-by: Anthony PERARD Signed-off-by: Stefano Stabellini Acked-by: Alexander Graf Signed-off-by: Alexander Graf --- hw/xen_common.h | 2 + xen-all.c | 417 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 419 insertions(+) (limited to 'hw/xen_common.h') diff --git a/hw/xen_common.h b/hw/xen_common.h index dd3e896e94..a1958a0af1 100644 --- a/hw/xen_common.h +++ b/hw/xen_common.h @@ -107,4 +107,6 @@ static inline int xc_fd(xc_interface *xen_xc) } #endif +void destroy_hvm_domain(void); + #endif /* QEMU_HW_XEN_COMMON_H */ diff --git a/xen-all.c b/xen-all.c index cb01ab9aad..e849a38788 100644 --- a/xen-all.c +++ b/xen-all.c @@ -6,6 +6,8 @@ * */ +#include + #include "hw/pci.h" #include "hw/xen_common.h" #include "hw/xen_backend.h" @@ -13,6 +15,58 @@ #include "xen-mapcache.h" #include "trace.h" +#include +#include + +//#define DEBUG_XEN + +#ifdef DEBUG_XEN +#define DPRINTF(fmt, ...) \ + do { fprintf(stderr, "xen: " fmt, ## __VA_ARGS__); } while (0) +#else +#define DPRINTF(fmt, ...) \ + do { } while (0) +#endif + +/* Compatibility with older version */ +#if __XEN_LATEST_INTERFACE_VERSION__ < 0x0003020a +static inline uint32_t xen_vcpu_eport(shared_iopage_t *shared_page, int i) +{ + return shared_page->vcpu_iodata[i].vp_eport; +} +static inline ioreq_t *xen_vcpu_ioreq(shared_iopage_t *shared_page, int vcpu) +{ + return &shared_page->vcpu_iodata[vcpu].vp_ioreq; +} +# define FMT_ioreq_size PRIx64 +#else +static inline uint32_t xen_vcpu_eport(shared_iopage_t *shared_page, int i) +{ + return shared_page->vcpu_ioreq[i].vp_eport; +} +static inline ioreq_t *xen_vcpu_ioreq(shared_iopage_t *shared_page, int vcpu) +{ + return &shared_page->vcpu_ioreq[vcpu]; +} +# define FMT_ioreq_size "u" +#endif + +#define BUFFER_IO_MAX_DELAY 100 + +typedef struct XenIOState { + shared_iopage_t *shared_page; + buffered_iopage_t *buffered_io_page; + QEMUTimer *buffered_io_timer; + /* the evtchn port for polling the notification, */ + evtchn_port_t *ioreq_local_port; + /* the evtchn fd for polling */ + XenEvtchn xce_handle; + /* which vcpu we are serving */ + int send_vcpu; + + Notifier exit; +} XenIOState; + /* Xen specific function for piix pci */ int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num) @@ -133,8 +187,304 @@ void xen_vcpu_init(void) } } +/* get the ioreq packets from share mem */ +static ioreq_t *cpu_get_ioreq_from_shared_memory(XenIOState *state, int vcpu) +{ + ioreq_t *req = xen_vcpu_ioreq(state->shared_page, vcpu); + + if (req->state != STATE_IOREQ_READY) { + DPRINTF("I/O request not ready: " + "%x, ptr: %x, port: %"PRIx64", " + "data: %"PRIx64", count: %" FMT_ioreq_size ", size: %" FMT_ioreq_size "\n", + req->state, req->data_is_ptr, req->addr, + req->data, req->count, req->size); + return NULL; + } + + xen_rmb(); /* see IOREQ_READY /then/ read contents of ioreq */ + + req->state = STATE_IOREQ_INPROCESS; + return req; +} + +/* use poll to get the port notification */ +/* ioreq_vec--out,the */ +/* retval--the number of ioreq packet */ +static ioreq_t *cpu_get_ioreq(XenIOState *state) +{ + int i; + evtchn_port_t port; + + port = xc_evtchn_pending(state->xce_handle); + if (port != -1) { + for (i = 0; i < smp_cpus; i++) { + if (state->ioreq_local_port[i] == port) { + break; + } + } + + if (i == smp_cpus) { + hw_error("Fatal error while trying to get io event!\n"); + } + + /* unmask the wanted port again */ + xc_evtchn_unmask(state->xce_handle, port); + + /* get the io packet from shared memory */ + state->send_vcpu = i; + return cpu_get_ioreq_from_shared_memory(state, i); + } + + /* read error or read nothing */ + return NULL; +} + +static uint32_t do_inp(pio_addr_t addr, unsigned long size) +{ + switch (size) { + case 1: + return cpu_inb(addr); + case 2: + return cpu_inw(addr); + case 4: + return cpu_inl(addr); + default: + hw_error("inp: bad size: %04"FMT_pioaddr" %lx", addr, size); + } +} + +static void do_outp(pio_addr_t addr, + unsigned long size, uint32_t val) +{ + switch (size) { + case 1: + return cpu_outb(addr, val); + case 2: + return cpu_outw(addr, val); + case 4: + return cpu_outl(addr, val); + default: + hw_error("outp: bad size: %04"FMT_pioaddr" %lx", addr, size); + } +} + +static void cpu_ioreq_pio(ioreq_t *req) +{ + int i, sign; + + sign = req->df ? -1 : 1; + + if (req->dir == IOREQ_READ) { + if (!req->data_is_ptr) { + req->data = do_inp(req->addr, req->size); + } else { + uint32_t tmp; + + for (i = 0; i < req->count; i++) { + tmp = do_inp(req->addr, req->size); + cpu_physical_memory_write(req->data + (sign * i * req->size), + (uint8_t *) &tmp, req->size); + } + } + } else if (req->dir == IOREQ_WRITE) { + if (!req->data_is_ptr) { + do_outp(req->addr, req->size, req->data); + } else { + for (i = 0; i < req->count; i++) { + uint32_t tmp = 0; + + cpu_physical_memory_read(req->data + (sign * i * req->size), + (uint8_t*) &tmp, req->size); + do_outp(req->addr, req->size, tmp); + } + } + } +} + +static void cpu_ioreq_move(ioreq_t *req) +{ + int i, sign; + + sign = req->df ? -1 : 1; + + if (!req->data_is_ptr) { + if (req->dir == IOREQ_READ) { + for (i = 0; i < req->count; i++) { + cpu_physical_memory_read(req->addr + (sign * i * req->size), + (uint8_t *) &req->data, req->size); + } + } else if (req->dir == IOREQ_WRITE) { + for (i = 0; i < req->count; i++) { + cpu_physical_memory_write(req->addr + (sign * i * req->size), + (uint8_t *) &req->data, req->size); + } + } + } else { + target_ulong tmp; + + if (req->dir == IOREQ_READ) { + for (i = 0; i < req->count; i++) { + cpu_physical_memory_read(req->addr + (sign * i * req->size), + (uint8_t*) &tmp, req->size); + cpu_physical_memory_write(req->data + (sign * i * req->size), + (uint8_t*) &tmp, req->size); + } + } else if (req->dir == IOREQ_WRITE) { + for (i = 0; i < req->count; i++) { + cpu_physical_memory_read(req->data + (sign * i * req->size), + (uint8_t*) &tmp, req->size); + cpu_physical_memory_write(req->addr + (sign * i * req->size), + (uint8_t*) &tmp, req->size); + } + } + } +} + +static void handle_ioreq(ioreq_t *req) +{ + if (!req->data_is_ptr && (req->dir == IOREQ_WRITE) && + (req->size < sizeof (target_ulong))) { + req->data &= ((target_ulong) 1 << (8 * req->size)) - 1; + } + + switch (req->type) { + case IOREQ_TYPE_PIO: + cpu_ioreq_pio(req); + break; + case IOREQ_TYPE_COPY: + cpu_ioreq_move(req); + break; + case IOREQ_TYPE_TIMEOFFSET: + break; + case IOREQ_TYPE_INVALIDATE: + qemu_invalidate_map_cache(); + break; + default: + hw_error("Invalid ioreq type 0x%x\n", req->type); + } +} + +static void handle_buffered_iopage(XenIOState *state) +{ + buf_ioreq_t *buf_req = NULL; + ioreq_t req; + int qw; + + if (!state->buffered_io_page) { + return; + } + + while (state->buffered_io_page->read_pointer != state->buffered_io_page->write_pointer) { + buf_req = &state->buffered_io_page->buf_ioreq[ + state->buffered_io_page->read_pointer % IOREQ_BUFFER_SLOT_NUM]; + req.size = 1UL << buf_req->size; + req.count = 1; + req.addr = buf_req->addr; + req.data = buf_req->data; + req.state = STATE_IOREQ_READY; + req.dir = buf_req->dir; + req.df = 1; + req.type = buf_req->type; + req.data_is_ptr = 0; + qw = (req.size == 8); + if (qw) { + buf_req = &state->buffered_io_page->buf_ioreq[ + (state->buffered_io_page->read_pointer + 1) % IOREQ_BUFFER_SLOT_NUM]; + req.data |= ((uint64_t)buf_req->data) << 32; + } + + handle_ioreq(&req); + + xen_mb(); + state->buffered_io_page->read_pointer += qw ? 2 : 1; + } +} + +static void handle_buffered_io(void *opaque) +{ + XenIOState *state = opaque; + + handle_buffered_iopage(state); + qemu_mod_timer(state->buffered_io_timer, + BUFFER_IO_MAX_DELAY + qemu_get_clock_ms(rt_clock)); +} + +static void cpu_handle_ioreq(void *opaque) +{ + XenIOState *state = opaque; + ioreq_t *req = cpu_get_ioreq(state); + + handle_buffered_iopage(state); + if (req) { + handle_ioreq(req); + + if (req->state != STATE_IOREQ_INPROCESS) { + fprintf(stderr, "Badness in I/O request ... not in service?!: " + "%x, ptr: %x, port: %"PRIx64", " + "data: %"PRIx64", count: %" FMT_ioreq_size ", size: %" FMT_ioreq_size "\n", + req->state, req->data_is_ptr, req->addr, + req->data, req->count, req->size); + destroy_hvm_domain(); + return; + } + + xen_wmb(); /* Update ioreq contents /then/ update state. */ + + /* + * We do this before we send the response so that the tools + * have the opportunity to pick up on the reset before the + * guest resumes and does a hlt with interrupts disabled which + * causes Xen to powerdown the domain. + */ + if (vm_running) { + if (qemu_shutdown_requested_get()) { + destroy_hvm_domain(); + } + if (qemu_reset_requested_get()) { + qemu_system_reset(); + } + } + + req->state = STATE_IORESP_READY; + xc_evtchn_notify(state->xce_handle, state->ioreq_local_port[state->send_vcpu]); + } +} + +static void xen_main_loop_prepare(XenIOState *state) +{ + int evtchn_fd = -1; + + if (state->xce_handle != XC_HANDLER_INITIAL_VALUE) { + evtchn_fd = xc_evtchn_fd(state->xce_handle); + } + + state->buffered_io_timer = qemu_new_timer_ms(rt_clock, handle_buffered_io, + state); + qemu_mod_timer(state->buffered_io_timer, qemu_get_clock_ms(rt_clock)); + + if (evtchn_fd != -1) { + qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, state); + } +} + + /* Initialise Xen */ +static void xen_vm_change_state_handler(void *opaque, int running, int reason) +{ + XenIOState *state = opaque; + if (running) { + xen_main_loop_prepare(state); + } +} + +static void xen_exit_notifier(Notifier *n) +{ + XenIOState *state = container_of(n, XenIOState, exit); + + xc_evtchn_close(state->xce_handle); +} + int xen_init(void) { xen_xc = xen_xc_interface_open(0, 0, 0); @@ -148,9 +498,76 @@ int xen_init(void) int xen_hvm_init(void) { + int i, rc; + unsigned long ioreq_pfn; + XenIOState *state; + + state = qemu_mallocz(sizeof (XenIOState)); + + state->xce_handle = xen_xc_evtchn_open(NULL, 0); + if (state->xce_handle == XC_HANDLER_INITIAL_VALUE) { + perror("xen: event channel open"); + return -errno; + } + + state->exit.notify = xen_exit_notifier; + qemu_add_exit_notifier(&state->exit); + + xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_IOREQ_PFN, &ioreq_pfn); + DPRINTF("shared page at pfn %lx\n", ioreq_pfn); + state->shared_page = xc_map_foreign_range(xen_xc, xen_domid, XC_PAGE_SIZE, + PROT_READ|PROT_WRITE, ioreq_pfn); + if (state->shared_page == NULL) { + hw_error("map shared IO page returned error %d handle=" XC_INTERFACE_FMT, + errno, xen_xc); + } + + xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_BUFIOREQ_PFN, &ioreq_pfn); + DPRINTF("buffered io page at pfn %lx\n", ioreq_pfn); + state->buffered_io_page = xc_map_foreign_range(xen_xc, xen_domid, XC_PAGE_SIZE, + PROT_READ|PROT_WRITE, ioreq_pfn); + if (state->buffered_io_page == NULL) { + hw_error("map buffered IO page returned error %d", errno); + } + + state->ioreq_local_port = qemu_mallocz(smp_cpus * sizeof (evtchn_port_t)); + + /* FIXME: how about if we overflow the page here? */ + for (i = 0; i < smp_cpus; i++) { + rc = xc_evtchn_bind_interdomain(state->xce_handle, xen_domid, + xen_vcpu_eport(state->shared_page, i)); + if (rc == -1) { + fprintf(stderr, "bind interdomain ioctl error %d\n", errno); + return -1; + } + state->ioreq_local_port[i] = rc; + } + /* Init RAM management */ qemu_map_cache_init(); xen_ram_init(ram_size); + qemu_add_vm_change_state_handler(xen_vm_change_state_handler, state); + return 0; } + +void destroy_hvm_domain(void) +{ + XenXC xc_handle; + int sts; + + xc_handle = xen_xc_interface_open(0, 0, 0); + if (xc_handle == XC_HANDLER_INITIAL_VALUE) { + fprintf(stderr, "Cannot acquire xenctrl handle\n"); + } else { + sts = xc_domain_shutdown(xc_handle, xen_domid, SHUTDOWN_poweroff); + if (sts != 0) { + fprintf(stderr, "? xc_domain_shutdown failed to issue poweroff, " + "sts %d, %s\n", sts, strerror(errno)); + } else { + fprintf(stderr, "Issued domain %d poweroff\n", xen_domid); + } + xc_interface_close(xc_handle); + } +} -- cgit 1.4.1