diff options
57 files changed, 1065 insertions, 868 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index e3e34fb4b1..7603ea2d44 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -667,6 +667,7 @@ F: hw/block/cdrom.c F: hw/block/hd-geometry.c F: tests/ide-test.c F: tests/ahci-test.c +F: tests/libqos/ahci* T: git git://github.com/jnsnow/qemu.git ide Floppy @@ -675,6 +676,7 @@ L: qemu-block@nongnu.org S: Supported F: hw/block/fdc.c F: include/hw/block/fdc.h +F: tests/fdc-test.c T: git git://github.com/jnsnow/qemu.git ide OMAP diff --git a/block.c b/block.c index 6268e37afb..1f90b4773f 100644 --- a/block.c +++ b/block.c @@ -1907,6 +1907,12 @@ void bdrv_close(BlockDriverState *bs) if (bs->job) { block_job_cancel_sync(bs->job); } + + /* Disable I/O limits and drain all pending throttled requests */ + if (bs->io_limits_enabled) { + bdrv_io_limits_disable(bs); + } + bdrv_drain(bs); /* complete I/O */ bdrv_flush(bs); bdrv_drain(bs); /* in case flush left pending I/O */ @@ -1958,11 +1964,6 @@ void bdrv_close(BlockDriverState *bs) blk_dev_change_media_cb(bs->blk, false); } - /*throttling disk I/O limits*/ - if (bs->io_limits_enabled) { - bdrv_io_limits_disable(bs); - } - QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { g_free(ban); } diff --git a/block/mirror.c b/block/mirror.c index a2589261f5..87928aba84 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -455,6 +455,8 @@ static void coroutine_fn mirror_run(void *opaque) if (!s->is_none_mode) { /* First part, loop on the sectors and initialize the dirty bitmap. */ BlockDriverState *base = s->base; + bool mark_all_dirty = s->base == NULL && !bdrv_has_zero_init(s->target); + for (sector_num = 0; sector_num < end; ) { /* Just to make sure we are not exceeding int limit. */ int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS, @@ -477,7 +479,7 @@ static void coroutine_fn mirror_run(void *opaque) } assert(n > 0); - if (ret == 1) { + if (ret == 1 || mark_all_dirty) { bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n); } sector_num += n; diff --git a/block/raw-posix.c b/block/raw-posix.c index 30df8adf7f..86f8562b5f 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -1648,7 +1648,7 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp) goto out; } - fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, + fd = qemu_open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, 0644); if (fd < 0) { result = -errno; diff --git a/block/raw-win32.c b/block/raw-win32.c index 68f2338acc..b562c94dad 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -119,9 +119,9 @@ static int aio_worker(void *arg) case QEMU_AIO_WRITE: count = handle_aiocb_rw(aiocb); if (count == aiocb->aio_nbytes) { - count = 0; + ret = 0; } else { - count = -EINVAL; + ret = -EINVAL; } break; case QEMU_AIO_FLUSH: diff --git a/cpus.c b/cpus.c index dddd056454..d44c0eda89 100644 --- a/cpus.c +++ b/cpus.c @@ -69,6 +69,14 @@ static CPUState *next_cpu; int64_t max_delay; int64_t max_advance; +/* vcpu throttling controls */ +static QEMUTimer *throttle_timer; +static unsigned int throttle_percentage; + +#define CPU_THROTTLE_PCT_MIN 1 +#define CPU_THROTTLE_PCT_MAX 99 +#define CPU_THROTTLE_TIMESLICE_NS 10000000 + bool cpu_is_stopped(CPUState *cpu) { return cpu->stopped || !runstate_is_running(); @@ -505,10 +513,80 @@ static const VMStateDescription vmstate_timers = { } }; +static void cpu_throttle_thread(void *opaque) +{ + CPUState *cpu = opaque; + double pct; + double throttle_ratio; + long sleeptime_ns; + + if (!cpu_throttle_get_percentage()) { + return; + } + + pct = (double)cpu_throttle_get_percentage()/100; + throttle_ratio = pct / (1 - pct); + sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS); + + qemu_mutex_unlock_iothread(); + atomic_set(&cpu->throttle_thread_scheduled, 0); + g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */ + qemu_mutex_lock_iothread(); +} + +static void cpu_throttle_timer_tick(void *opaque) +{ + CPUState *cpu; + double pct; + + /* Stop the timer if needed */ + if (!cpu_throttle_get_percentage()) { + return; + } + CPU_FOREACH(cpu) { + if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) { + async_run_on_cpu(cpu, cpu_throttle_thread, cpu); + } + } + + pct = (double)cpu_throttle_get_percentage()/100; + timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) + + CPU_THROTTLE_TIMESLICE_NS / (1-pct)); +} + +void cpu_throttle_set(int new_throttle_pct) +{ + /* Ensure throttle percentage is within valid range */ + new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX); + new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN); + + atomic_set(&throttle_percentage, new_throttle_pct); + + timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) + + CPU_THROTTLE_TIMESLICE_NS); +} + +void cpu_throttle_stop(void) +{ + atomic_set(&throttle_percentage, 0); +} + +bool cpu_throttle_active(void) +{ + return (cpu_throttle_get_percentage() != 0); +} + +int cpu_throttle_get_percentage(void) +{ + return atomic_read(&throttle_percentage); +} + void cpu_ticks_init(void) { seqlock_init(&timers_state.vm_clock_seqlock, NULL); vmstate_register(NULL, 0, &vmstate_timers, &timers_state); + throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, + cpu_throttle_timer_tick, NULL); } void configure_icount(QemuOpts *opts, Error **errp) diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak index 9393cf0ac9..43c96d1c91 100644 --- a/default-configs/i386-softmmu.mak +++ b/default-configs/i386-softmmu.mak @@ -44,7 +44,6 @@ CONFIG_LPC_ICH9=y CONFIG_PCI_Q35=y CONFIG_APIC=y CONFIG_IOAPIC=y -CONFIG_ICC_BUS=y CONFIG_PVPANIC=y CONFIG_MEM_HOTPLUG=y CONFIG_XIO3130=y diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak index 28e2099187..dfb80954d4 100644 --- a/default-configs/x86_64-softmmu.mak +++ b/default-configs/x86_64-softmmu.mak @@ -44,7 +44,6 @@ CONFIG_LPC_ICH9=y CONFIG_PCI_Q35=y CONFIG_APIC=y CONFIG_IOAPIC=y -CONFIG_ICC_BUS=y CONFIG_PVPANIC=y CONFIG_MEM_HOTPLUG=y CONFIG_XIO3130=y diff --git a/disas/cris.c b/disas/cris.c index 1b76a09dbf..4482a4113e 100644 --- a/disas/cris.c +++ b/disas/cris.c @@ -2492,7 +2492,7 @@ print_with_operands (const struct cris_opcode *opcodep, = spec_reg_info ((insn >> 12) & 15, disdata->distype); if (sregp->name == NULL) - /* Should have been caught as a non-match eariler. */ + /* Should have been caught as a non-match earlier. */ *tp++ = '?'; else { diff --git a/exec.c b/exec.c index 47ada31040..7d90a52252 100644 --- a/exec.c +++ b/exec.c @@ -84,6 +84,9 @@ static MemoryRegion io_mem_unassigned; */ #define RAM_RESIZEABLE (1 << 2) +/* An extra page is mapped on top of this RAM. + */ +#define RAM_EXTRA (1 << 3) #endif struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus); @@ -1185,10 +1188,13 @@ static void *file_ram_alloc(RAMBlock *block, char *filename; char *sanitized_name; char *c; + void *ptr; void *area = NULL; int fd; uint64_t hpagesize; + uint64_t total; Error *local_err = NULL; + size_t offset; hpagesize = gethugepagesize(path, &local_err); if (local_err) { @@ -1232,6 +1238,7 @@ static void *file_ram_alloc(RAMBlock *block, g_free(filename); memory = ROUND_UP(memory, hpagesize); + total = memory + hpagesize; /* * ftruncate is not supported by hugetlbfs in older @@ -1243,16 +1250,40 @@ static void *file_ram_alloc(RAMBlock *block, perror("ftruncate"); } - area = mmap(0, memory, PROT_READ | PROT_WRITE, - (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE), + ptr = mmap(0, total, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0); + if (ptr == MAP_FAILED) { + error_setg_errno(errp, errno, + "unable to allocate memory range for hugepages"); + close(fd); + goto error; + } + + offset = QEMU_ALIGN_UP((uintptr_t)ptr, hpagesize) - (uintptr_t)ptr; + + area = mmap(ptr + offset, memory, PROT_READ | PROT_WRITE, + (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE) | + MAP_FIXED, fd, 0); if (area == MAP_FAILED) { error_setg_errno(errp, errno, "unable to map backing store for hugepages"); + munmap(ptr, total); close(fd); goto error; } + if (offset > 0) { + munmap(ptr, offset); + } + ptr += offset; + total -= offset; + + if (total > memory + getpagesize()) { + munmap(ptr + memory + getpagesize(), + total - memory - getpagesize()); + } + if (mem_prealloc) { os_mem_prealloc(fd, area, memory); } @@ -1570,6 +1601,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, new_block->used_length = size; new_block->max_length = size; new_block->flags = share ? RAM_SHARED : 0; + new_block->flags |= RAM_EXTRA; new_block->host = file_ram_alloc(new_block, size, mem_path, errp); if (!new_block->host) { @@ -1671,7 +1703,11 @@ static void reclaim_ramblock(RAMBlock *block) xen_invalidate_map_cache_entry(block->host); #ifndef _WIN32 } else if (block->fd >= 0) { - munmap(block->host, block->max_length); + if (block->flags & RAM_EXTRA) { + munmap(block->host, block->max_length + getpagesize()); + } else { + munmap(block->host, block->max_length); + } close(block->fd); #endif } else { diff --git a/hmp.c b/hmp.c index 3f807b75f1..5048eeeb2d 100644 --- a/hmp.c +++ b/hmp.c @@ -232,6 +232,11 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) info->xbzrle_cache->overflow); } + if (info->has_x_cpu_throttle_percentage) { + monitor_printf(mon, "cpu throttle percentage: %" PRIu64 "\n", + info->x_cpu_throttle_percentage); + } + qapi_free_MigrationInfo(info); qapi_free_MigrationCapabilityStatusList(caps); } @@ -272,6 +277,12 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) monitor_printf(mon, " %s: %" PRId64, MigrationParameter_lookup[MIGRATION_PARAMETER_DECOMPRESS_THREADS], params->decompress_threads); + monitor_printf(mon, " %s: %" PRId64, + MigrationParameter_lookup[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL], + params->x_cpu_throttle_initial); + monitor_printf(mon, " %s: %" PRId64, + MigrationParameter_lookup[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT], + params->x_cpu_throttle_increment); monitor_printf(mon, "\n"); } @@ -1221,6 +1232,8 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) bool has_compress_level = false; bool has_compress_threads = false; bool has_decompress_threads = false; + bool has_x_cpu_throttle_initial = false; + bool has_x_cpu_throttle_increment = false; int i; for (i = 0; i < MIGRATION_PARAMETER_MAX; i++) { @@ -1235,10 +1248,18 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) case MIGRATION_PARAMETER_DECOMPRESS_THREADS: has_decompress_threads = true; break; + case MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL: + has_x_cpu_throttle_initial = true; + break; + case MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT: + has_x_cpu_throttle_increment = true; + break; } qmp_migrate_set_parameters(has_compress_level, value, has_compress_threads, value, has_decompress_threads, value, + has_x_cpu_throttle_initial, value, + has_x_cpu_throttle_increment, value, &err); break; } diff --git a/hw/cpu/Makefile.objs b/hw/cpu/Makefile.objs index 6381238cc5..0954a1872f 100644 --- a/hw/cpu/Makefile.objs +++ b/hw/cpu/Makefile.objs @@ -2,5 +2,4 @@ obj-$(CONFIG_ARM11MPCORE) += arm11mpcore.o obj-$(CONFIG_REALVIEW) += realview_mpcore.o obj-$(CONFIG_A9MPCORE) += a9mpcore.o obj-$(CONFIG_A15MPCORE) += a15mpcore.o -obj-$(CONFIG_ICC_BUS) += icc_bus.o diff --git a/hw/cpu/icc_bus.c b/hw/cpu/icc_bus.c deleted file mode 100644 index 6646ea2b34..0000000000 --- a/hw/cpu/icc_bus.c +++ /dev/null @@ -1,118 +0,0 @@ -/* icc_bus.c - * emulate x86 ICC (Interrupt Controller Communications) bus - * - * Copyright (c) 2013 Red Hat, Inc - * - * Authors: - * Igor Mammedov <imammedo@redhat.com> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see <http://www.gnu.org/licenses/> - */ -#include "hw/cpu/icc_bus.h" -#include "hw/sysbus.h" - -/* icc-bridge implementation */ - -static const TypeInfo icc_bus_info = { - .name = TYPE_ICC_BUS, - .parent = TYPE_BUS, - .instance_size = sizeof(ICCBus), -}; - - -/* icc-device implementation */ - -static void icc_device_realize(DeviceState *dev, Error **errp) -{ - ICCDeviceClass *idc = ICC_DEVICE_GET_CLASS(dev); - - /* convert to QOM */ - if (idc->realize) { - idc->realize(dev, errp); - } - -} - -static void icc_device_class_init(ObjectClass *oc, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(oc); - - dc->realize = icc_device_realize; - dc->bus_type = TYPE_ICC_BUS; -} - -static const TypeInfo icc_device_info = { - .name = TYPE_ICC_DEVICE, - .parent = TYPE_DEVICE, - .abstract = true, - .instance_size = sizeof(ICCDevice), - .class_size = sizeof(ICCDeviceClass), - .class_init = icc_device_class_init, -}; - - -/* icc-bridge implementation */ - -typedef struct ICCBridgeState { - /*< private >*/ - SysBusDevice parent_obj; - /*< public >*/ - - ICCBus icc_bus; - MemoryRegion apic_container; -} ICCBridgeState; - -#define ICC_BRIDGE(obj) OBJECT_CHECK(ICCBridgeState, (obj), TYPE_ICC_BRIDGE) - -static void icc_bridge_init(Object *obj) -{ - ICCBridgeState *s = ICC_BRIDGE(obj); - SysBusDevice *sb = SYS_BUS_DEVICE(obj); - - qbus_create_inplace(&s->icc_bus, sizeof(s->icc_bus), TYPE_ICC_BUS, - DEVICE(s), "icc"); - - /* Do not change order of registering regions, - * APIC must be first registered region, board maps it by 0 index - */ - memory_region_init(&s->apic_container, obj, "icc-apic-container", - APIC_SPACE_SIZE); - sysbus_init_mmio(sb, &s->apic_container); - s->icc_bus.apic_address_space = &s->apic_container; -} - -static void icc_bridge_class_init(ObjectClass *oc, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(oc); - - set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); -} - -static const TypeInfo icc_bridge_info = { - .name = TYPE_ICC_BRIDGE, - .parent = TYPE_SYS_BUS_DEVICE, - .instance_init = icc_bridge_init, - .instance_size = sizeof(ICCBridgeState), - .class_init = icc_bridge_class_init, -}; - - -static void icc_bus_register_types(void) -{ - type_register_static(&icc_bus_info); - type_register_static(&icc_device_info); - type_register_static(&icc_bridge_info); -} - -type_init(icc_bus_register_types) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 461c128d23..9275297adc 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -59,7 +59,6 @@ #include "qemu/error-report.h" #include "hw/acpi/acpi.h" #include "hw/acpi/cpu_hotplug.h" -#include "hw/cpu/icc_bus.h" #include "hw/boards.h" #include "hw/pci/pci_host.h" #include "acpi-build.h" @@ -1052,23 +1051,16 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level) } static X86CPU *pc_new_cpu(const char *cpu_model, int64_t apic_id, - DeviceState *icc_bridge, Error **errp) + Error **errp) { X86CPU *cpu = NULL; Error *local_err = NULL; - if (icc_bridge == NULL) { - error_setg(&local_err, "Invalid icc-bridge value"); - goto out; - } - cpu = cpu_x86_create(cpu_model, &local_err); if (local_err != NULL) { goto out; } - qdev_set_parent_bus(DEVICE(cpu), qdev_get_child_bus(icc_bridge, "icc")); - object_property_set_int(OBJECT(cpu), apic_id, "apic-id", &local_err); object_property_set_bool(OBJECT(cpu), true, "realized", &local_err); @@ -1085,7 +1077,6 @@ static const char *current_cpu_model; void pc_hot_add_cpu(const int64_t id, Error **errp) { - DeviceState *icc_bridge; X86CPU *cpu; int64_t apic_id = x86_cpu_apic_id_from_index(id); Error *local_err = NULL; @@ -1114,9 +1105,7 @@ void pc_hot_add_cpu(const int64_t id, Error **errp) return; } - icc_bridge = DEVICE(object_resolve_path_type("icc-bridge", - TYPE_ICC_BRIDGE, NULL)); - cpu = pc_new_cpu(current_cpu_model, apic_id, icc_bridge, &local_err); + cpu = pc_new_cpu(current_cpu_model, apic_id, &local_err); if (local_err) { error_propagate(errp, local_err); return; @@ -1124,7 +1113,7 @@ void pc_hot_add_cpu(const int64_t id, Error **errp) object_unref(OBJECT(cpu)); } -void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge) +void pc_cpus_init(const char *cpu_model) { int i; X86CPU *cpu = NULL; @@ -1150,7 +1139,7 @@ void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge) for (i = 0; i < smp_cpus; i++) { cpu = pc_new_cpu(cpu_model, x86_cpu_apic_id_from_index(i), - icc_bridge, &error); + &error); if (error) { error_report_err(error); exit(1); @@ -1158,13 +1147,6 @@ void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge) object_unref(OBJECT(cpu)); } - /* map APIC MMIO area if CPU has APIC */ - if (cpu && cpu->apic_state) { - /* XXX: what if the base changes? */ - sysbus_mmio_map_overlap(SYS_BUS_DEVICE(icc_bridge), 0, - APIC_DEFAULT_ADDRESS, 0x1000); - } - /* tell smbios about cpuid version and features */ smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]); } @@ -1629,6 +1611,7 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev, HotplugHandlerClass *hhc; Error *local_err = NULL; PCMachineState *pcms = PC_MACHINE(hotplug_dev); + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); PCDIMMDevice *dimm = PC_DIMM(dev); PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); MemoryRegion *mr = ddc->get_memory_region(dimm); @@ -1644,7 +1627,8 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev, goto out; } - pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, &local_err); + pc_dimm_memory_plug(dev, &pcms->hotplug_memory, mr, align, + pcmc->inter_dimm_gap, &local_err); if (local_err) { goto out; } @@ -1931,12 +1915,31 @@ static void pc_machine_initfn(Object *obj) NULL, &error_abort); } +static void pc_machine_reset(void) +{ + CPUState *cs; + X86CPU *cpu; + + qemu_devices_reset(); + + /* Reset APIC after devices have been reset to cancel + * any changes that qemu_devices_reset() might have done. + */ + CPU_FOREACH(cs) { + cpu = X86_CPU(cs); + + if (cpu->apic_state) { + device_reset(cpu->apic_state); + } + } +} + static unsigned pc_cpu_index_to_socket_id(unsigned cpu_index) { - unsigned pkg_id, core_id, smt_id; + X86CPUTopoInfo topo; x86_topo_ids_from_idx(smp_cores, smp_threads, cpu_index, - &pkg_id, &core_id, &smt_id); - return pkg_id; + &topo); + return topo.pkg_id; } static void pc_machine_class_init(ObjectClass *oc, void *data) @@ -1945,12 +1948,14 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) PCMachineClass *pcmc = PC_MACHINE_CLASS(oc); HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); + pcmc->inter_dimm_gap = true; pcmc->get_hotplug_handler = mc->get_hotplug_handler; mc->get_hotplug_handler = pc_get_hotpug_handler; mc->cpu_index_to_socket_id = pc_cpu_index_to_socket_id; mc->default_boot_order = "cad"; mc->hot_add_cpu = pc_hot_add_cpu; mc->max_cpus = 255; + mc->reset = pc_machine_reset; hc->plug = pc_machine_device_plug_cb; hc->unplug_request = pc_machine_device_unplug_request_cb; hc->unplug = pc_machine_device_unplug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 3ffb05f93e..ae7bbebd0f 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -39,7 +39,6 @@ #include "hw/kvm/clock.h" #include "sysemu/sysemu.h" #include "hw/sysbus.h" -#include "hw/cpu/icc_bus.h" #include "sysemu/arch_init.h" #include "sysemu/block-backend.h" #include "hw/i2c/smbus.h" @@ -98,7 +97,6 @@ static void pc_init1(MachineState *machine, MemoryRegion *ram_memory; MemoryRegion *pci_memory; MemoryRegion *rom_memory; - DeviceState *icc_bridge; PcGuestInfo *guest_info; ram_addr_t lowmem; @@ -141,11 +139,7 @@ static void pc_init1(MachineState *machine, exit(1); } - icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE); - object_property_add_child(qdev_get_machine(), "icc-bridge", - OBJECT(icc_bridge), NULL); - - pc_cpus_init(machine->cpu_model, icc_bridge); + pc_cpus_init(machine->cpu_model); if (kvm_enabled() && kvmclock_enabled) { kvmclock_create(); @@ -226,7 +220,6 @@ static void pc_init1(MachineState *machine, if (pci_enabled) { ioapic_init_gsi(gsi_state, "i440fx"); } - qdev_init_nofail(icc_bridge); pc_register_ferr_irq(gsi[13]); @@ -301,6 +294,13 @@ static void pc_init1(MachineState *machine, } } +/* Looking for a pc_compat_2_4() function? It doesn't exist. + * pc_compat_*() functions that run on machine-init time and + * change global QEMU state are deprecated. Please don't create + * one, and implement any pc-*-2.4 (and newer) compat code in + * HW_COMPAT_*, PC_COMPAT_*, or * pc_*_machine_options(). + */ + static void pc_compat_2_3(MachineState *machine) { PCMachineState *pcms = PC_MACHINE(machine); @@ -325,7 +325,7 @@ static void pc_compat_2_1(MachineState *machine) pc_compat_2_2(machine); smbios_uuid_encoded = false; - x86_cpu_compat_kvm_no_autodisable(FEAT_8000_0001_ECX, CPUID_EXT3_SVM); + x86_cpu_change_kvm_default("svm", NULL); pcms->enforce_aligned_dimm = false; } @@ -361,7 +361,7 @@ static void pc_compat_1_7(MachineState *machine) gigabyte_align = false; option_rom_has_mr = true; legacy_acpi_table_size = 6414; - x86_cpu_compat_kvm_no_autoenable(FEAT_1_ECX, CPUID_EXT_X2APIC); + x86_cpu_change_kvm_default("x2apic", NULL); } static void pc_compat_1_6(MachineState *machine) @@ -391,7 +391,7 @@ static void pc_compat_1_3(MachineState *machine) static void pc_compat_1_2(MachineState *machine) { pc_compat_1_3(machine); - x86_cpu_compat_kvm_no_autoenable(FEAT_KVM, 1 << KVM_FEATURE_PV_EOI); + x86_cpu_change_kvm_default("kvm-pv-eoi", NULL); } /* PC compat function for pc-0.10 to pc-0.13 */ @@ -414,7 +414,7 @@ static void pc_init_isa(MachineState *machine) if (!machine->cpu_model) { machine->cpu_model = "486"; } - x86_cpu_compat_kvm_no_autoenable(FEAT_KVM, 1 << KVM_FEATURE_PV_EOI); + x86_cpu_change_kvm_default("kvm-pv-eoi", NULL); enable_compat_apic_id_mode(); pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, TYPE_I440FX_PCI_DEVICE); } @@ -482,6 +482,7 @@ static void pc_i440fx_2_4_machine_options(MachineClass *m) m->alias = NULL; m->is_default = 0; pcmc->broken_reserved_end = true; + pcmc->inter_dimm_gap = false; SET_MACHINE_COMPAT(m, PC_COMPAT_2_4); } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 1b7d3b644e..19e66702e0 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -43,7 +43,6 @@ #include "hw/ide/pci.h" #include "hw/ide/ahci.h" #include "hw/usb.h" -#include "hw/cpu/icc_bus.h" #include "qemu/error-report.h" #include "migration/migration.h" @@ -83,7 +82,6 @@ static void pc_q35_init(MachineState *machine) int i; ICH9LPCState *ich9_lpc; PCIDevice *ahci; - DeviceState *icc_bridge; PcGuestInfo *guest_info; ram_addr_t lowmem; DriveInfo *hd[MAX_SATA_PORTS]; @@ -130,11 +128,7 @@ static void pc_q35_init(MachineState *machine) exit(1); } - icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE); - object_property_add_child(qdev_get_machine(), "icc-bridge", - OBJECT(icc_bridge), NULL); - - pc_cpus_init(machine->cpu_model, icc_bridge); + pc_cpus_init(machine->cpu_model); pc_acpi_init("q35-acpi-dsdt.aml"); kvmclock_create(); @@ -236,7 +230,6 @@ static void pc_q35_init(MachineState *machine) if (pci_enabled) { ioapic_init_gsi(gsi_state, "q35"); } - qdev_init_nofail(icc_bridge); pc_register_ferr_irq(gsi[13]); @@ -284,6 +277,13 @@ static void pc_q35_init(MachineState *machine) } } +/* Looking for a pc_compat_2_4() function? It doesn't exist. + * pc_compat_*() functions that run on machine-init time and + * change global QEMU state are deprecated. Please don't create + * one, and implement any pc-*-2.4 (and newer) compat code in + * HW_COMPAT_*, PC_COMPAT_*, or * pc_*_machine_options(). + */ + static void pc_compat_2_3(MachineState *machine) { PCMachineState *pcms = PC_MACHINE(machine); @@ -309,7 +309,7 @@ static void pc_compat_2_1(MachineState *machine) pc_compat_2_2(machine); pcms->enforce_aligned_dimm = false; smbios_uuid_encoded = false; - x86_cpu_compat_kvm_no_autodisable(FEAT_8000_0001_ECX, CPUID_EXT3_SVM); + x86_cpu_change_kvm_default("svm", NULL); } static void pc_compat_2_0(MachineState *machine) @@ -326,7 +326,7 @@ static void pc_compat_1_7(MachineState *machine) smbios_defaults = false; gigabyte_align = false; option_rom_has_mr = true; - x86_cpu_compat_kvm_no_autoenable(FEAT_1_ECX, CPUID_EXT_X2APIC); + x86_cpu_change_kvm_default("x2apic", NULL); } static void pc_compat_1_6(MachineState *machine) @@ -385,6 +385,7 @@ static void pc_q35_2_4_machine_options(MachineClass *m) pc_q35_2_5_machine_options(m); m->alias = NULL; pcmc->broken_reserved_end = true; + pcmc->inter_dimm_gap = false; SET_MACHINE_COMPAT(m, PC_COMPAT_2_4); } diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c index 0032b97c5f..ad959c4e77 100644 --- a/hw/intc/apic_common.c +++ b/hw/intc/apic_common.c @@ -296,7 +296,6 @@ static void apic_common_realize(DeviceState *dev, Error **errp) APICCommonClass *info; static DeviceState *vapic; static int apic_no; - static bool mmio_registered; if (apic_no >= MAX_APICS) { error_setg(errp, "%s initialization failed.", @@ -307,11 +306,6 @@ static void apic_common_realize(DeviceState *dev, Error **errp) info = APIC_COMMON_GET_CLASS(s); info->realize(dev, errp); - if (!mmio_registered) { - ICCBus *b = ICC_BUS(qdev_get_parent_bus(dev)); - memory_region_add_subregion(b->apic_address_space, 0, &s->io_memory); - mmio_registered = true; - } /* Note: We need at least 1M to map the VAPIC option ROM */ if (!vapic && s->vapic_control & VAPIC_ENABLE_MASK && @@ -425,13 +419,12 @@ static Property apic_properties_common[] = { static void apic_common_class_init(ObjectClass *klass, void *data) { - ICCDeviceClass *idc = ICC_DEVICE_CLASS(klass); DeviceClass *dc = DEVICE_CLASS(klass); dc->vmsd = &vmstate_apic_common; dc->reset = apic_reset_common; dc->props = apic_properties_common; - idc->realize = apic_common_realize; + dc->realize = apic_common_realize; /* * Reason: APIC and CPU need to be wired up by * x86_cpu_apic_create() @@ -441,7 +434,7 @@ static void apic_common_class_init(ObjectClass *klass, void *data) static const TypeInfo apic_common_type = { .name = TYPE_APIC_COMMON, - .parent = TYPE_ICC_DEVICE, + .parent = TYPE_DEVICE, .instance_size = sizeof(APICCommonState), .class_size = sizeof(APICCommonClass), .class_init = apic_common_class_init, diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c index bb04862de8..6cc6ac30e7 100644 --- a/hw/mem/pc-dimm.c +++ b/hw/mem/pc-dimm.c @@ -32,7 +32,8 @@ typedef struct pc_dimms_capacity { } pc_dimms_capacity; void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms, - MemoryRegion *mr, uint64_t align, Error **errp) + MemoryRegion *mr, uint64_t align, bool gap, + Error **errp) { int slot; MachineState *machine = MACHINE(qdev_get_machine()); @@ -48,7 +49,7 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms, addr = pc_dimm_get_free_addr(hpms->base, memory_region_size(&hpms->mr), - !addr ? NULL : &addr, align, + !addr ? NULL : &addr, align, gap, memory_region_size(mr), &local_err); if (local_err) { goto out; @@ -287,8 +288,8 @@ static int pc_dimm_built_list(Object *obj, void *opaque) uint64_t pc_dimm_get_free_addr(uint64_t address_space_start, uint64_t address_space_size, - uint64_t *hint, uint64_t align, uint64_t size, - Error **errp) + uint64_t *hint, uint64_t align, bool gap, + uint64_t size, Error **errp) { GSList *list = NULL, *item; uint64_t new_addr, ret = 0; @@ -333,13 +334,15 @@ uint64_t pc_dimm_get_free_addr(uint64_t address_space_start, goto out; } - if (ranges_overlap(dimm->addr, dimm_size, new_addr, size)) { + if (ranges_overlap(dimm->addr, dimm_size, new_addr, + size + (gap ? 1 : 0))) { if (hint) { DeviceState *d = DEVICE(dimm); error_setg(errp, "address range conflicts with '%s'", d->id); goto out; } - new_addr = QEMU_ALIGN_UP(dimm->addr + dimm_size, align); + new_addr = QEMU_ALIGN_UP(dimm->addr + dimm_size + (gap ? 1 : 0), + align); } } ret = new_addr; diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index d388c5571d..a877614e3e 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1094,13 +1094,7 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t * must have consumed the complete packet. * Otherwise, drop it. */ if (!n->mergeable_rx_bufs && offset < size) { -#if 0 - error_report("virtio-net truncated non-mergeable packet: " - "i %zd mergeable %d offset %zd, size %zd, " - "guest hdr len %zd, host hdr len %zd", - i, n->mergeable_rx_bufs, - offset, size, n->guest_hdr_len, n->host_hdr_len); -#endif + virtqueue_discard(q->rx_vq, &elem, total); return size; } diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index a9b5f2a669..d1b0e53668 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2096,7 +2096,7 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev, goto out; } - pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, &local_err); + pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, false, &local_err); if (local_err) { goto out; } diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index eda8205d58..6703806f83 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1491,12 +1491,17 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) pci_set_long(cfg_mask->pci_cfg_data, ~0x0); } - if (proxy->nvectors && - msix_init_exclusive_bar(&proxy->pci_dev, proxy->nvectors, - proxy->msix_bar)) { - error_report("unable to init msix vectors to %" PRIu32, - proxy->nvectors); - proxy->nvectors = 0; + if (proxy->nvectors) { + int err = msix_init_exclusive_bar(&proxy->pci_dev, proxy->nvectors, + proxy->msix_bar); + if (err) { + /* Notice when a system that supports MSIx can't initialize it. */ + if (err != -ENOTSUP) { + error_report("unable to init msix vectors to %" PRIu32, + proxy->nvectors); + } + proxy->nvectors = 0; + } } proxy->pci_dev.config_write = virtio_write_config; diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 7504f8b33a..d0bc72e0e4 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -244,14 +244,12 @@ int virtio_queue_empty(VirtQueue *vq) return vring_avail_idx(vq) == vq->last_avail_idx; } -void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, - unsigned int len, unsigned int idx) +static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len) { unsigned int offset; int i; - trace_virtqueue_fill(vq, elem, len, idx); - offset = 0; for (i = 0; i < elem->in_num; i++) { size_t size = MIN(len - offset, elem->in_sg[i].iov_len); @@ -267,6 +265,21 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, cpu_physical_memory_unmap(elem->out_sg[i].iov_base, elem->out_sg[i].iov_len, 0, elem->out_sg[i].iov_len); +} + +void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len) +{ + vq->last_avail_idx--; + virtqueue_unmap_sg(vq, elem, len); +} + +void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len, unsigned int idx) +{ + trace_virtqueue_fill(vq, elem, len, idx); + + virtqueue_unmap_sg(vq, elem, len); idx = (idx + vring_used_idx(vq)) % vq->vring.num; diff --git a/include/hw/cpu/icc_bus.h b/include/hw/cpu/icc_bus.h deleted file mode 100644 index 98a979fa1c..0000000000 --- a/include/hw/cpu/icc_bus.h +++ /dev/null @@ -1,82 +0,0 @@ -/* icc_bus.h - * emulate x86 ICC (Interrupt Controller Communications) bus - * - * Copyright (c) 2013 Red Hat, Inc - * - * Authors: - * Igor Mammedov <imammedo@redhat.com> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see <http://www.gnu.org/licenses/> - */ -#ifndef ICC_BUS_H -#define ICC_BUS_H - -#include "exec/memory.h" -#include "hw/qdev-core.h" - -#define TYPE_ICC_BUS "icc-bus" - -#ifndef CONFIG_USER_ONLY - -/** - * ICCBus: - * - * ICC bus - */ -typedef struct ICCBus { - /*< private >*/ - BusState parent_obj; - /*< public >*/ - - MemoryRegion *apic_address_space; -} ICCBus; - -#define ICC_BUS(obj) OBJECT_CHECK(ICCBus, (obj), TYPE_ICC_BUS) - -/** - * ICCDevice: - * - * ICC device - */ -typedef struct ICCDevice { - /*< private >*/ - DeviceState qdev; - /*< public >*/ -} ICCDevice; - -/** - * ICCDeviceClass: - * @init: Initialization callback for derived classes. - * - * ICC device class - */ -typedef struct ICCDeviceClass { - /*< private >*/ - DeviceClass parent_class; - /*< public >*/ - - DeviceRealize realize; -} ICCDeviceClass; - -#define TYPE_ICC_DEVICE "icc-device" -#define ICC_DEVICE(obj) OBJECT_CHECK(ICCDevice, (obj), TYPE_ICC_DEVICE) -#define ICC_DEVICE_CLASS(klass) \ - OBJECT_CLASS_CHECK(ICCDeviceClass, (klass), TYPE_ICC_DEVICE) -#define ICC_DEVICE_GET_CLASS(obj) \ - OBJECT_GET_CLASS(ICCDeviceClass, (obj), TYPE_ICC_DEVICE) - -#define TYPE_ICC_BRIDGE "icc-bridge" - -#endif /* CONFIG_USER_ONLY */ -#endif diff --git a/include/hw/i386/apic_internal.h b/include/hw/i386/apic_internal.h index 7813396e49..74fe935e8e 100644 --- a/include/hw/i386/apic_internal.h +++ b/include/hw/i386/apic_internal.h @@ -22,7 +22,6 @@ #include "cpu.h" #include "exec/memory.h" -#include "hw/cpu/icc_bus.h" #include "qemu/timer.h" /* APIC Local Vector Table */ @@ -135,7 +134,7 @@ typedef struct APICCommonState APICCommonState; typedef struct APICCommonClass { - ICCDeviceClass parent_class; + DeviceClass parent_class; DeviceRealize realize; void (*set_base)(APICCommonState *s, uint64_t val); @@ -150,7 +149,9 @@ typedef struct APICCommonClass } APICCommonClass; struct APICCommonState { - ICCDevice busdev; + /*< private >*/ + DeviceState parent_obj; + /*< public >*/ MemoryRegion io_memory; X86CPU *cpu; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index ab5413f561..0503485cd0 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -60,6 +60,7 @@ struct PCMachineClass { /*< public >*/ bool broken_reserved_end; + bool inter_dimm_gap; HotplugHandler *(*get_hotplug_handler)(MachineState *machine, DeviceState *dev); }; @@ -167,7 +168,7 @@ bool pc_machine_is_smm_enabled(PCMachineState *pcms); void pc_register_ferr_irq(qemu_irq irq); void pc_acpi_smi_interrupt(void *opaque, int irq, int level); -void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge); +void pc_cpus_init(const char *cpu_model); void pc_hot_add_cpu(const int64_t id, Error **errp); void pc_acpi_init(const char *default_dsdt); @@ -297,7 +298,27 @@ int e820_get_num_entries(void); bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); #define PC_COMPAT_2_4 \ - HW_COMPAT_2_4 + HW_COMPAT_2_4 \ + {\ + .driver = "Haswell-" TYPE_X86_CPU,\ + .property = "abm",\ + .value = "off",\ + },\ + {\ + .driver = "Haswell-noTSX-" TYPE_X86_CPU,\ + .property = "abm",\ + .value = "off",\ + },\ + {\ + .driver = "Broadwell-" TYPE_X86_CPU,\ + .property = "abm",\ + .value = "off",\ + },\ + {\ + .driver = "Broadwell-noTSX-" TYPE_X86_CPU,\ + .property = "abm",\ + .value = "off",\ + }, #define PC_COMPAT_2_3 \ PC_COMPAT_2_4 \ diff --git a/include/hw/i386/topology.h b/include/hw/i386/topology.h index 9c6f3a937a..148cc1bbc8 100644 --- a/include/hw/i386/topology.h +++ b/include/hw/i386/topology.h @@ -47,6 +47,12 @@ */ typedef uint32_t apic_id_t; +typedef struct X86CPUTopoInfo { + unsigned pkg_id; + unsigned core_id; + unsigned smt_id; +} X86CPUTopoInfo; + /* Return the bit width needed for 'count' IDs */ static unsigned apicid_bitwidth_for_count(unsigned count) @@ -92,13 +98,11 @@ static inline unsigned apicid_pkg_offset(unsigned nr_cores, unsigned nr_threads) */ static inline apic_id_t apicid_from_topo_ids(unsigned nr_cores, unsigned nr_threads, - unsigned pkg_id, - unsigned core_id, - unsigned smt_id) + const X86CPUTopoInfo *topo) { - return (pkg_id << apicid_pkg_offset(nr_cores, nr_threads)) | - (core_id << apicid_core_offset(nr_cores, nr_threads)) | - smt_id; + return (topo->pkg_id << apicid_pkg_offset(nr_cores, nr_threads)) | + (topo->core_id << apicid_core_offset(nr_cores, nr_threads)) | + topo->smt_id; } /* Calculate thread/core/package IDs for a specific topology, @@ -107,14 +111,12 @@ static inline apic_id_t apicid_from_topo_ids(unsigned nr_cores, static inline void x86_topo_ids_from_idx(unsigned nr_cores, unsigned nr_threads, unsigned cpu_index, - unsigned *pkg_id, - unsigned *core_id, - unsigned *smt_id) + X86CPUTopoInfo *topo) { unsigned core_index = cpu_index / nr_threads; - *smt_id = cpu_index % nr_threads; - *core_id = core_index % nr_cores; - *pkg_id = core_index / nr_cores; + topo->smt_id = cpu_index % nr_threads; + topo->core_id = core_index % nr_cores; + topo->pkg_id = core_index / nr_cores; } /* Make APIC ID for the CPU 'cpu_index' @@ -125,10 +127,9 @@ static inline apic_id_t x86_apicid_from_cpu_idx(unsigned nr_cores, unsigned nr_threads, unsigned cpu_index) { - unsigned pkg_id, core_id, smt_id; - x86_topo_ids_from_idx(nr_cores, nr_threads, cpu_index, - &pkg_id, &core_id, &smt_id); - return apicid_from_topo_ids(nr_cores, nr_threads, pkg_id, core_id, smt_id); + X86CPUTopoInfo topo; + x86_topo_ids_from_idx(nr_cores, nr_threads, cpu_index, &topo); + return apicid_from_topo_ids(nr_cores, nr_threads, &topo); } #endif /* HW_I386_TOPOLOGY_H */ diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h index d83bf30ea9..c1ee7b0408 100644 --- a/include/hw/mem/pc-dimm.h +++ b/include/hw/mem/pc-dimm.h @@ -83,15 +83,16 @@ typedef struct MemoryHotplugState { uint64_t pc_dimm_get_free_addr(uint64_t address_space_start, uint64_t address_space_size, - uint64_t *hint, uint64_t align, uint64_t size, - Error **errp); + uint64_t *hint, uint64_t align, bool gap, + uint64_t size, Error **errp); int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp); int qmp_pc_dimm_device_list(Object *obj, void *opaque); uint64_t pc_existing_dimms_capacity(Error **errp); void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms, - MemoryRegion *mr, uint64_t align, Error **errp); + MemoryRegion *mr, uint64_t align, bool gap, + Error **errp); void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms, MemoryRegion *mr); #endif diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 6201ee8ce0..9d09115fab 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -146,6 +146,8 @@ void virtio_del_queue(VirtIODevice *vdev, int n); void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len); void virtqueue_flush(VirtQueue *vq, unsigned int count); +void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len); void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len, unsigned int idx); diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 302673dbad..9405554a2b 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -321,6 +321,11 @@ struct CPUState { uint32_t can_do_io; int32_t exception_index; /* used by m68k TCG */ + /* Used to keep track of an outstanding cpu throttle thread for migration + * autoconverge + */ + bool throttle_thread_scheduled; + /* Note that this is accessed at the start of every TB via a negative offset from AREG0. Leave this field at the end so as to make the (absolute value) offset as small as possible. This reduces code @@ -565,6 +570,43 @@ CPUState *qemu_get_cpu(int index); */ bool cpu_exists(int64_t id); +/** + * cpu_throttle_set: + * @new_throttle_pct: Percent of sleep time. Valid range is 1 to 99. + * + * Throttles all vcpus by forcing them to sleep for the given percentage of + * time. A throttle_percentage of 25 corresponds to a 75% duty cycle roughly. + * (example: 10ms sleep for every 30ms awake). + * + * cpu_throttle_set can be called as needed to adjust new_throttle_pct. + * Once the throttling starts, it will remain in effect until cpu_throttle_stop + * is called. + */ +void cpu_throttle_set(int new_throttle_pct); + +/** + * cpu_throttle_stop: + * + * Stops the vcpu throttling started by cpu_throttle_set. + */ +void cpu_throttle_stop(void); + +/** + * cpu_throttle_active: + * + * Returns: %true if the vcpus are currently being throttled, %false otherwise. + */ +bool cpu_throttle_active(void); + +/** + * cpu_throttle_get_percentage: + * + * Returns the vcpu throttle percentage. See cpu_throttle_set for details. + * + * Returns: The throttle percentage in range 1 to 99. + */ +int cpu_throttle_get_percentage(void); + #ifndef CONFIG_USER_ONLY typedef void (*CPUInterruptHandler)(CPUState *, int); diff --git a/linux-user/elfload.c b/linux-user/elfload.c index a7ff58c8bb..fdae6a6cd1 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -1373,66 +1373,69 @@ static bool elf_check_ehdr(struct elfhdr *ehdr) * to be put directly into the top of new user memory. * */ -static abi_ulong copy_elf_strings(int argc,char ** argv, void **page, - abi_ulong p) +static abi_ulong copy_elf_strings(int argc, char **argv, char *scratch, + abi_ulong p, abi_ulong stack_limit) { - char *tmp, *tmp1, *pag = NULL; - int len, offset = 0; + char *tmp; + int len, offset; + abi_ulong top = p; if (!p) { return 0; /* bullet-proofing */ } + + offset = ((p - 1) % TARGET_PAGE_SIZE) + 1; + while (argc-- > 0) { tmp = argv[argc]; if (!tmp) { fprintf(stderr, "VFS: argc is wrong"); exit(-1); } - tmp1 = tmp; - while (*tmp++); - len = tmp - tmp1; - if (p < len) { /* this shouldn't happen - 128kB */ + len = strlen(tmp) + 1; + tmp += len; + + if (len > (p - stack_limit)) { return 0; } while (len) { - --p; --tmp; --len; - if (--offset < 0) { - offset = p % TARGET_PAGE_SIZE; - pag = (char *)page[p/TARGET_PAGE_SIZE]; - if (!pag) { - pag = g_try_malloc0(TARGET_PAGE_SIZE); - page[p/TARGET_PAGE_SIZE] = pag; - if (!pag) - return 0; - } - } - if (len == 0 || offset == 0) { - *(pag + offset) = *tmp; - } - else { - int bytes_to_copy = (len > offset) ? offset : len; - tmp -= bytes_to_copy; - p -= bytes_to_copy; - offset -= bytes_to_copy; - len -= bytes_to_copy; - memcpy_fromfs(pag + offset, tmp, bytes_to_copy + 1); + int bytes_to_copy = (len > offset) ? offset : len; + tmp -= bytes_to_copy; + p -= bytes_to_copy; + offset -= bytes_to_copy; + len -= bytes_to_copy; + + memcpy_fromfs(scratch + offset, tmp, bytes_to_copy); + + if (offset == 0) { + memcpy_to_target(p, scratch, top - p); + top = p; + offset = TARGET_PAGE_SIZE; } } } + if (offset) { + memcpy_to_target(p, scratch + offset, top - p); + } + return p; } -static abi_ulong setup_arg_pages(abi_ulong p, struct linux_binprm *bprm, +/* Older linux kernels provide up to MAX_ARG_PAGES (default: 32) of + * argument/environment space. Newer kernels (>2.6.33) allow more, + * dependent on stack size, but guarantee at least 32 pages for + * backwards compatibility. + */ +#define STACK_LOWER_LIMIT (32 * TARGET_PAGE_SIZE) + +static abi_ulong setup_arg_pages(struct linux_binprm *bprm, struct image_info *info) { - abi_ulong stack_base, size, error, guard; - int i; + abi_ulong size, error, guard; - /* Create enough stack to hold everything. If we don't use - it for args, we'll use it for something else. */ size = guest_stack_size; - if (size < MAX_ARG_PAGES*TARGET_PAGE_SIZE) { - size = MAX_ARG_PAGES*TARGET_PAGE_SIZE; + if (size < STACK_LOWER_LIMIT) { + size = STACK_LOWER_LIMIT; } guard = TARGET_PAGE_SIZE; if (guard < qemu_real_host_page_size) { @@ -1450,19 +1453,8 @@ static abi_ulong setup_arg_pages(abi_ulong p, struct linux_binprm *bprm, target_mprotect(error, guard, PROT_NONE); info->stack_limit = error + guard; - stack_base = info->stack_limit + size - MAX_ARG_PAGES*TARGET_PAGE_SIZE; - p += stack_base; - - for (i = 0 ; i < MAX_ARG_PAGES ; i++) { - if (bprm->page[i]) { - info->rss++; - /* FIXME - check return value of memcpy_to_target() for failure */ - memcpy_to_target(stack_base, bprm->page[i], TARGET_PAGE_SIZE); - g_free(bprm->page[i]); - } - stack_base += TARGET_PAGE_SIZE; - } - return p; + + return info->stack_limit + size - sizeof(void *); } /* Map and zero the bss. We need to explicitly zero any fractional pages @@ -2204,10 +2196,9 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info) struct image_info interp_info; struct elfhdr elf_ex; char *elf_interpreter = NULL; + char *scratch; info->start_mmap = (abi_ulong)ELF_START_MMAP; - info->mmap = 0; - info->rss = 0; load_elf_image(bprm->filename, bprm->fd, info, &elf_interpreter, bprm->buf); @@ -2217,18 +2208,24 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info) when we load the interpreter. */ elf_ex = *(struct elfhdr *)bprm->buf; - bprm->p = copy_elf_strings(1, &bprm->filename, bprm->page, bprm->p); - bprm->p = copy_elf_strings(bprm->envc,bprm->envp,bprm->page,bprm->p); - bprm->p = copy_elf_strings(bprm->argc,bprm->argv,bprm->page,bprm->p); + /* Do this so that we can load the interpreter, if need be. We will + change some of these later */ + bprm->p = setup_arg_pages(bprm, info); + + scratch = g_new0(char, TARGET_PAGE_SIZE); + bprm->p = copy_elf_strings(1, &bprm->filename, scratch, + bprm->p, info->stack_limit); + bprm->p = copy_elf_strings(bprm->envc, bprm->envp, scratch, + bprm->p, info->stack_limit); + bprm->p = copy_elf_strings(bprm->argc, bprm->argv, scratch, + bprm->p, info->stack_limit); + g_free(scratch); + if (!bprm->p) { fprintf(stderr, "%s: %s\n", bprm->filename, strerror(E2BIG)); exit(-1); } - /* Do this so that we can load the interpreter, if need be. We will - change some of these later */ - bprm->p = setup_arg_pages(bprm->p, bprm, info); - if (elf_interpreter) { load_elf_interp(elf_interpreter, &interp_info, bprm->buf); diff --git a/linux-user/flatload.c b/linux-user/flatload.c index 566a7a87a3..ceacb9844a 100644 --- a/linux-user/flatload.c +++ b/linux-user/flatload.c @@ -707,7 +707,7 @@ static int load_flat_shared_library(int id, struct lib_info *libs) int load_flt_binary(struct linux_binprm *bprm, struct image_info *info) { struct lib_info libinfo[MAX_SHARED_LIBS]; - abi_ulong p = bprm->p; + abi_ulong p; abi_ulong stack_len; abi_ulong start_addr; abi_ulong sp; diff --git a/linux-user/linuxload.c b/linux-user/linuxload.c index 506e837ae1..dbaf0ec586 100644 --- a/linux-user/linuxload.c +++ b/linux-user/linuxload.c @@ -135,10 +135,7 @@ int loader_exec(int fdexec, const char *filename, char **argv, char **envp, struct linux_binprm *bprm) { int retval; - int i; - bprm->p = TARGET_PAGE_SIZE*MAX_ARG_PAGES-sizeof(unsigned int); - memset(bprm->page, 0, sizeof(bprm->page)); bprm->fd = fdexec; bprm->filename = (char *)filename; bprm->argc = count(argv); @@ -172,9 +169,5 @@ int loader_exec(int fdexec, const char *filename, char **argv, char **envp, return retval; } - /* Something went wrong, return the inode and free the argument pages*/ - for (i=0 ; i<MAX_ARG_PAGES ; i++) { - g_free(bprm->page[i]); - } return(retval); } diff --git a/linux-user/main.c b/linux-user/main.c index 25cf8755ee..6599a41404 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -63,7 +63,7 @@ unsigned long reserved_va = 0xf7000000; unsigned long reserved_va; #endif -static void usage(void); +static void usage(int exitcode); static const char *interp_prefix = CONFIG_QEMU_INTERP_PREFIX; const char *qemu_uname_release; @@ -1414,7 +1414,7 @@ void cpu_loop (CPUSPARCState *env) default: printf ("Unhandled trap: 0x%x\n", trapnr); cpu_dump_state(cs, stderr, fprintf, 0); - exit (1); + exit(EXIT_FAILURE); } process_pending_signals (env); } @@ -2662,7 +2662,7 @@ void cpu_loop(CPUOpenRISCState *env) switch (trapnr) { case EXCP_RESET: qemu_log("\nReset request, exit, pc is %#x\n", env->pc); - exit(1); + exit(EXIT_FAILURE); break; case EXCP_BUSERR: qemu_log("\nBus error, exit, pc is %#x\n", env->pc); @@ -2726,7 +2726,7 @@ void cpu_loop(CPUOpenRISCState *env) if (gdbsig) { gdb_handlesig(cs, gdbsig); if (gdbsig != TARGET_SIGTRAP) { - exit(1); + exit(EXIT_FAILURE); } } @@ -2791,7 +2791,7 @@ void cpu_loop(CPUSH4State *env) default: printf ("Unhandled trap: 0x%x\n", trapnr); cpu_dump_state(cs, stderr, fprintf, 0); - exit (1); + exit(EXIT_FAILURE); } process_pending_signals (env); } @@ -2852,7 +2852,7 @@ void cpu_loop(CPUCRISState *env) default: printf ("Unhandled trap: 0x%x\n", trapnr); cpu_dump_state(cs, stderr, fprintf, 0); - exit (1); + exit(EXIT_FAILURE); } process_pending_signals (env); } @@ -2933,7 +2933,7 @@ void cpu_loop(CPUMBState *env) printf ("Unhandled hw-exception: 0x%x\n", env->sregs[SR_ESR] & ESR_EC_MASK); cpu_dump_state(cs, stderr, fprintf, 0); - exit (1); + exit(EXIT_FAILURE); break; } break; @@ -2954,7 +2954,7 @@ void cpu_loop(CPUMBState *env) default: printf ("Unhandled trap: 0x%x\n", trapnr); cpu_dump_state(cs, stderr, fprintf, 0); - exit (1); + exit(EXIT_FAILURE); } process_pending_signals (env); } @@ -3123,17 +3123,17 @@ void cpu_loop(CPUAlphaState *env) switch (trapnr) { case EXCP_RESET: fprintf(stderr, "Reset requested. Exit\n"); - exit(1); + exit(EXIT_FAILURE); break; case EXCP_MCHK: fprintf(stderr, "Machine check exception. Exit\n"); - exit(1); + exit(EXIT_FAILURE); break; case EXCP_SMP_INTERRUPT: case EXCP_CLK_INTERRUPT: case EXCP_DEV_INTERRUPT: fprintf(stderr, "External interrupt. Exit\n"); - exit(1); + exit(EXIT_FAILURE); break; case EXCP_MMFAULT: env->lock_addr = -1; @@ -3283,7 +3283,7 @@ void cpu_loop(CPUAlphaState *env) default: printf ("Unhandled trap: 0x%x\n", trapnr); cpu_dump_state(cs, stderr, fprintf, 0); - exit (1); + exit(EXIT_FAILURE); } process_pending_signals (env); } @@ -3387,7 +3387,7 @@ void cpu_loop(CPUS390XState *env) default: fprintf(stderr, "Unhandled program exception: %#x\n", n); cpu_dump_state(cs, stderr, fprintf, 0); - exit(1); + exit(EXIT_FAILURE); } break; @@ -3404,7 +3404,7 @@ void cpu_loop(CPUS390XState *env) default: fprintf(stderr, "Unhandled trap: 0x%x\n", trapnr); cpu_dump_state(cs, stderr, fprintf, 0); - exit(1); + exit(EXIT_FAILURE); } process_pending_signals (env); } @@ -3700,7 +3700,7 @@ CPUArchState *cpu_copy(CPUArchState *env) static void handle_arg_help(const char *arg) { - usage(); + usage(EXIT_SUCCESS); } static void handle_arg_log(const char *arg) @@ -3710,7 +3710,7 @@ static void handle_arg_log(const char *arg) mask = qemu_str_to_log_mask(arg); if (!mask) { qemu_print_log_usage(stdout); - exit(1); + exit(EXIT_FAILURE); } qemu_set_log(mask); } @@ -3726,7 +3726,7 @@ static void handle_arg_set_env(const char *arg) r = p = strdup(arg); while ((token = strsep(&p, ",")) != NULL) { if (envlist_setenv(envlist, token) != 0) { - usage(); + usage(EXIT_FAILURE); } } free(r); @@ -3738,7 +3738,7 @@ static void handle_arg_unset_env(const char *arg) r = p = strdup(arg); while ((token = strsep(&p, ",")) != NULL) { if (envlist_unsetenv(envlist, token) != 0) { - usage(); + usage(EXIT_FAILURE); } } free(r); @@ -3754,7 +3754,7 @@ static void handle_arg_stack_size(const char *arg) char *p; guest_stack_size = strtoul(arg, &p, 0); if (guest_stack_size == 0) { - usage(); + usage(EXIT_FAILURE); } if (*p == 'M') { @@ -3775,7 +3775,7 @@ static void handle_arg_pagesize(const char *arg) if (qemu_host_page_size == 0 || (qemu_host_page_size & (qemu_host_page_size - 1)) != 0) { fprintf(stderr, "page size must be a power of two\n"); - exit(1); + exit(EXIT_FAILURE); } } @@ -3785,7 +3785,7 @@ static void handle_arg_randseed(const char *arg) if (parse_uint_full(arg, &seed, 0) != 0 || seed > UINT_MAX) { fprintf(stderr, "Invalid seed number: %s\n", arg); - exit(1); + exit(EXIT_FAILURE); } srand(seed); } @@ -3808,7 +3808,7 @@ static void handle_arg_cpu(const char *arg) #if defined(cpu_list) cpu_list(stdout, &fprintf); #endif - exit(1); + exit(EXIT_FAILURE); } } @@ -3845,12 +3845,12 @@ static void handle_arg_reserved_va(const char *arg) #endif ) { fprintf(stderr, "Reserved virtual address too big\n"); - exit(1); + exit(EXIT_FAILURE); } } if (*p) { fprintf(stderr, "Unrecognised -R size suffix '%s'\n", p); - exit(1); + exit(EXIT_FAILURE); } } @@ -3868,7 +3868,7 @@ static void handle_arg_version(const char *arg) { printf("qemu-" TARGET_NAME " version " QEMU_VERSION QEMU_PKGVERSION ", Copyright (c) 2003-2008 Fabrice Bellard\n"); - exit(0); + exit(EXIT_SUCCESS); } struct qemu_argument { @@ -3883,6 +3883,8 @@ struct qemu_argument { static const struct qemu_argument arg_table[] = { {"h", "", false, handle_arg_help, "", "print this help"}, + {"help", "", false, handle_arg_help, + "", ""}, {"g", "QEMU_GDB", true, handle_arg_gdb, "port", "wait gdb connection to 'port'"}, {"L", "QEMU_LD_PREFIX", true, handle_arg_ld_prefix, @@ -3921,7 +3923,7 @@ static const struct qemu_argument arg_table[] = { {NULL, NULL, false, NULL, NULL, NULL} }; -static void usage(void) +static void usage(int exitcode) { const struct qemu_argument *arginfo; int maxarglen; @@ -3988,7 +3990,7 @@ static void usage(void) "Note that if you provide several changes to a single variable\n" "the last change will stay in effect.\n"); - exit(1); + exit(exitcode); } static int parse_args(int argc, char **argv) @@ -4022,12 +4024,18 @@ static int parse_args(int argc, char **argv) if (!strcmp(r, "-")) { break; } + /* Treat --foo the same as -foo. */ + if (r[0] == '-') { + r++; + } for (arginfo = arg_table; arginfo->handle_opt != NULL; arginfo++) { if (!strcmp(r, arginfo->argv)) { if (arginfo->has_arg) { if (optind >= argc) { - usage(); + (void) fprintf(stderr, + "qemu: missing argument for option '%s'\n", r); + exit(EXIT_FAILURE); } arginfo->handle_opt(argv[optind]); optind++; @@ -4040,12 +4048,14 @@ static int parse_args(int argc, char **argv) /* no option matched the current argv */ if (arginfo->handle_opt == NULL) { - usage(); + (void) fprintf(stderr, "qemu: unknown option '%s'\n", r); + exit(EXIT_FAILURE); } } if (optind >= argc) { - usage(); + (void) fprintf(stderr, "qemu: no user program specified\n"); + exit(EXIT_FAILURE); } filename = argv[optind]; @@ -4074,7 +4084,7 @@ int main(int argc, char **argv, char **envp) if ((envlist = envlist_create()) == NULL) { (void) fprintf(stderr, "Unable to allocate envlist\n"); - exit(1); + exit(EXIT_FAILURE); } /* add current environment into the list */ @@ -4160,7 +4170,7 @@ int main(int argc, char **argv, char **envp) cpu = cpu_init(cpu_model); if (!cpu) { fprintf(stderr, "Unable to find CPU definition\n"); - exit(1); + exit(EXIT_FAILURE); } env = cpu->env_ptr; cpu_reset(cpu); @@ -4192,7 +4202,7 @@ int main(int argc, char **argv, char **envp) "space for use as guest address space (check your virtual " "memory ulimit setting or reserve less using -R option)\n", reserved_va); - exit(1); + exit(EXIT_FAILURE); } if (reserved_va) { @@ -4225,7 +4235,7 @@ int main(int argc, char **argv, char **envp) target_argv = calloc(target_argc + 1, sizeof (char *)); if (target_argv == NULL) { (void) fprintf(stderr, "Unable to allocate memory for target_argv\n"); - exit(1); + exit(EXIT_FAILURE); } /* @@ -4254,7 +4264,7 @@ int main(int argc, char **argv, char **envp) execfd = open(filename, O_RDONLY); if (execfd < 0) { printf("Error while loading %s: %s\n", filename, strerror(errno)); - _exit(1); + _exit(EXIT_FAILURE); } } @@ -4262,7 +4272,7 @@ int main(int argc, char **argv, char **envp) info, &bprm); if (ret != 0) { printf("Error while loading %s: %s\n", filename, strerror(-ret)); - _exit(1); + _exit(EXIT_FAILURE); } for (wrk = target_environ; *wrk; wrk++) { @@ -4308,7 +4318,7 @@ int main(int argc, char **argv, char **envp) /* enable 64 bit mode if possible */ if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) { fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n"); - exit(1); + exit(EXIT_FAILURE); } env->cr[4] |= CR4_PAE_MASK; env->efer |= MSR_EFER_LMA | MSR_EFER_LME; @@ -4418,7 +4428,7 @@ int main(int argc, char **argv, char **envp) if (!(arm_feature(env, ARM_FEATURE_AARCH64))) { fprintf(stderr, "The selected ARM CPU does not support 64 bit mode\n"); - exit(1); + exit(EXIT_FAILURE); } for (i = 0; i < 31; i++) { @@ -4630,7 +4640,7 @@ int main(int argc, char **argv, char **envp) if (gdbserver_start(gdbstub_port) < 0) { fprintf(stderr, "qemu: could not open gdbserver on port %d\n", gdbstub_port); - exit(1); + exit(EXIT_FAILURE); } gdb_handlesig(cpu, 0); } diff --git a/linux-user/mmap.c b/linux-user/mmap.c index b2126c76fa..5606bcd164 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -514,10 +514,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot, goto fail; if (!(prot & PROT_WRITE)) { ret = target_mprotect(start, len, prot); - if (ret != 0) { - start = ret; - goto the_end; - } + assert(ret == 0); } goto the_end; } diff --git a/linux-user/qemu.h b/linux-user/qemu.h index e8606b290c..bd90cc3799 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -36,8 +36,6 @@ struct image_info { abi_ulong start_brk; abi_ulong brk; abi_ulong start_mmap; - abi_ulong mmap; - abi_ulong rss; abi_ulong start_stack; abi_ulong stack_limit; abi_ulong entry; @@ -145,12 +143,6 @@ extern const char *qemu_uname_release; extern unsigned long mmap_min_addr; /* ??? See if we can avoid exposing so much of the loader internals. */ -/* - * MAX_ARG_PAGES defines the number of pages allocated for arguments - * and envelope for the new program. 32 should suffice, this gives - * a maximum env+arg of 128kB w/4KB pages! - */ -#define MAX_ARG_PAGES 33 /* Read a good amount of data initially, to hopefully get all the program headers loaded. */ @@ -162,7 +154,6 @@ extern unsigned long mmap_min_addr; */ struct linux_binprm { char buf[BPRM_BUF_SIZE] __attribute__((aligned)); - void *page[MAX_ARG_PAGES]; abi_ulong p; int fd; int e_uid, e_gid; diff --git a/linux-user/signal.c b/linux-user/signal.c index 502efd9fc4..ac82baa0f0 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -3900,12 +3900,6 @@ static inline abi_ulong get_sigframe(struct target_sigaction *ka, return sp; } -static void setup_frame(int sig, struct target_sigaction *ka, - target_sigset_t *set, CPUOpenRISCState *env) -{ - qemu_log("Not implement.\n"); -} - static void setup_rt_frame(int sig, struct target_sigaction *ka, target_siginfo_t *info, target_sigset_t *set, CPUOpenRISCState *env) @@ -5662,7 +5656,8 @@ void process_pending_signals(CPUArchState *cpu_env) } #endif /* prepare the stack frame of the virtual CPU */ -#if defined(TARGET_ABI_MIPSN32) || defined(TARGET_ABI_MIPSN64) +#if defined(TARGET_ABI_MIPSN32) || defined(TARGET_ABI_MIPSN64) \ + || defined(TARGET_OPENRISC) /* These targets do not have traditional signals. */ setup_rt_frame(sig, sa, &q->info, &target_old_set, cpu_env); #else diff --git a/linux-user/syscall.c b/linux-user/syscall.c index d1d3eb2d78..98b5766d4a 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -457,6 +457,7 @@ static uint16_t target_to_host_errno_table[ERRNO_TABLE_SIZE] = { * minus the errnos that are not actually generic to all archs. */ static uint16_t host_to_target_errno_table[ERRNO_TABLE_SIZE] = { + [EAGAIN] = TARGET_EAGAIN, [EIDRM] = TARGET_EIDRM, [ECHRNG] = TARGET_ECHRNG, [EL2NSYNC] = TARGET_EL2NSYNC, @@ -1181,7 +1182,7 @@ static inline abi_long target_to_host_cmsg(struct msghdr *msgh, struct cmsghdr *cmsg = CMSG_FIRSTHDR(msgh); abi_long msg_controllen; abi_ulong target_cmsg_addr; - struct target_cmsghdr *target_cmsg; + struct target_cmsghdr *target_cmsg, *target_cmsg_start; socklen_t space = 0; msg_controllen = tswapal(target_msgh->msg_controllen); @@ -1189,6 +1190,7 @@ static inline abi_long target_to_host_cmsg(struct msghdr *msgh, goto the_end; target_cmsg_addr = tswapal(target_msgh->msg_control); target_cmsg = lock_user(VERIFY_READ, target_cmsg_addr, msg_controllen, 1); + target_cmsg_start = target_cmsg; if (!target_cmsg) return -TARGET_EFAULT; @@ -1247,7 +1249,8 @@ static inline abi_long target_to_host_cmsg(struct msghdr *msgh, } cmsg = CMSG_NXTHDR(msgh, cmsg); - target_cmsg = TARGET_CMSG_NXTHDR(target_msgh, target_cmsg); + target_cmsg = TARGET_CMSG_NXTHDR(target_msgh, target_cmsg, + target_cmsg_start); } unlock_user(target_cmsg, target_cmsg_addr, 0); the_end: @@ -1261,7 +1264,7 @@ static inline abi_long host_to_target_cmsg(struct target_msghdr *target_msgh, struct cmsghdr *cmsg = CMSG_FIRSTHDR(msgh); abi_long msg_controllen; abi_ulong target_cmsg_addr; - struct target_cmsghdr *target_cmsg; + struct target_cmsghdr *target_cmsg, *target_cmsg_start; socklen_t space = 0; msg_controllen = tswapal(target_msgh->msg_controllen); @@ -1269,6 +1272,7 @@ static inline abi_long host_to_target_cmsg(struct target_msghdr *target_msgh, goto the_end; target_cmsg_addr = tswapal(target_msgh->msg_control); target_cmsg = lock_user(VERIFY_WRITE, target_cmsg_addr, msg_controllen, 0); + target_cmsg_start = target_cmsg; if (!target_cmsg) return -TARGET_EFAULT; @@ -1382,14 +1386,15 @@ static inline abi_long host_to_target_cmsg(struct target_msghdr *target_msgh, } target_cmsg->cmsg_len = tswapal(tgt_len); - tgt_space = TARGET_CMSG_SPACE(tgt_len); + tgt_space = TARGET_CMSG_SPACE(len); if (msg_controllen < tgt_space) { tgt_space = msg_controllen; } msg_controllen -= tgt_space; space += tgt_space; cmsg = CMSG_NXTHDR(msgh, cmsg); - target_cmsg = TARGET_CMSG_NXTHDR(target_msgh, target_cmsg); + target_cmsg = TARGET_CMSG_NXTHDR(target_msgh, target_cmsg, + target_cmsg_start); } unlock_user(target_cmsg, target_cmsg_addr, space); the_end: @@ -4622,8 +4627,9 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp, pthread_mutex_unlock(&clone_lock); } else { /* if no CLONE_VM, we consider it is a fork */ - if ((flags & ~(CSIGNAL | CLONE_NPTL_FLAGS2)) != 0) - return -EINVAL; + if ((flags & ~(CSIGNAL | CLONE_NPTL_FLAGS2)) != 0) { + return -TARGET_EINVAL; + } fork_start(); ret = fork(); if (ret == 0) { @@ -5246,6 +5252,94 @@ static int do_futex(target_ulong uaddr, int op, int val, target_ulong timeout, return -TARGET_ENOSYS; } } +#if defined(TARGET_NR_name_to_handle_at) && defined(CONFIG_OPEN_BY_HANDLE) +static abi_long do_name_to_handle_at(abi_long dirfd, abi_long pathname, + abi_long handle, abi_long mount_id, + abi_long flags) +{ + struct file_handle *target_fh; + struct file_handle *fh; + int mid = 0; + abi_long ret; + char *name; + unsigned int size, total_size; + + if (get_user_s32(size, handle)) { + return -TARGET_EFAULT; + } + + name = lock_user_string(pathname); + if (!name) { + return -TARGET_EFAULT; + } + + total_size = sizeof(struct file_handle) + size; + target_fh = lock_user(VERIFY_WRITE, handle, total_size, 0); + if (!target_fh) { + unlock_user(name, pathname, 0); + return -TARGET_EFAULT; + } + + fh = g_malloc0(total_size); + fh->handle_bytes = size; + + ret = get_errno(name_to_handle_at(dirfd, path(name), fh, &mid, flags)); + unlock_user(name, pathname, 0); + + /* man name_to_handle_at(2): + * Other than the use of the handle_bytes field, the caller should treat + * the file_handle structure as an opaque data type + */ + + memcpy(target_fh, fh, total_size); + target_fh->handle_bytes = tswap32(fh->handle_bytes); + target_fh->handle_type = tswap32(fh->handle_type); + g_free(fh); + unlock_user(target_fh, handle, total_size); + + if (put_user_s32(mid, mount_id)) { + return -TARGET_EFAULT; + } + + return ret; + +} +#endif + +#if defined(TARGET_NR_open_by_handle_at) && defined(CONFIG_OPEN_BY_HANDLE) +static abi_long do_open_by_handle_at(abi_long mount_fd, abi_long handle, + abi_long flags) +{ + struct file_handle *target_fh; + struct file_handle *fh; + unsigned int size, total_size; + abi_long ret; + + if (get_user_s32(size, handle)) { + return -TARGET_EFAULT; + } + + total_size = sizeof(struct file_handle) + size; + target_fh = lock_user(VERIFY_READ, handle, total_size, 1); + if (!target_fh) { + return -TARGET_EFAULT; + } + + fh = g_malloc0(total_size); + memcpy(fh, target_fh, total_size); + fh->handle_bytes = size; + fh->handle_type = tswap32(target_fh->handle_type); + + ret = get_errno(open_by_handle_at(mount_fd, fh, + target_to_host_bitmask(flags, fcntl_flags_tbl))); + + g_free(fh); + + unlock_user(target_fh, handle, total_size); + + return ret; +} +#endif /* Map host to target signal numbers for the wait family of syscalls. Assume all other status bits are the same. */ @@ -5658,6 +5752,16 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, arg4)); unlock_user(p, arg2, 0); break; +#if defined(TARGET_NR_name_to_handle_at) && defined(CONFIG_OPEN_BY_HANDLE) + case TARGET_NR_name_to_handle_at: + ret = do_name_to_handle_at(arg1, arg2, arg3, arg4, arg5); + break; +#endif +#if defined(TARGET_NR_open_by_handle_at) && defined(CONFIG_OPEN_BY_HANDLE) + case TARGET_NR_open_by_handle_at: + ret = do_open_by_handle_at(arg1, arg2, arg3); + break; +#endif case TARGET_NR_close: ret = get_errno(close(arg1)); break; @@ -5808,12 +5912,6 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, } *q = NULL; - /* This case will not be caught by the host's execve() if its - page size is bigger than the target's. */ - if (total_size > MAX_ARG_PAGES * TARGET_PAGE_SIZE) { - ret = -TARGET_E2BIG; - goto execve_end; - } if (!(p = lock_user_string(arg1))) goto execve_efault; ret = get_errno(execve(p, argp, envp)); diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h index cdc8db421c..7ca33a6f62 100644 --- a/linux-user/syscall_defs.h +++ b/linux-user/syscall_defs.h @@ -235,7 +235,8 @@ struct target_cmsghdr { }; #define TARGET_CMSG_DATA(cmsg) ((unsigned char *) ((struct target_cmsghdr *) (cmsg) + 1)) -#define TARGET_CMSG_NXTHDR(mhdr, cmsg) __target_cmsg_nxthdr (mhdr, cmsg) +#define TARGET_CMSG_NXTHDR(mhdr, cmsg, cmsg_start) \ + __target_cmsg_nxthdr(mhdr, cmsg, cmsg_start) #define TARGET_CMSG_ALIGN(len) (((len) + sizeof (abi_long) - 1) \ & (size_t) ~(sizeof (abi_long) - 1)) #define TARGET_CMSG_SPACE(len) (TARGET_CMSG_ALIGN (len) \ @@ -243,17 +244,20 @@ struct target_cmsghdr { #define TARGET_CMSG_LEN(len) (TARGET_CMSG_ALIGN (sizeof (struct target_cmsghdr)) + (len)) static __inline__ struct target_cmsghdr * -__target_cmsg_nxthdr (struct target_msghdr *__mhdr, struct target_cmsghdr *__cmsg) +__target_cmsg_nxthdr(struct target_msghdr *__mhdr, + struct target_cmsghdr *__cmsg, + struct target_cmsghdr *__cmsg_start) { struct target_cmsghdr *__ptr; __ptr = (struct target_cmsghdr *)((unsigned char *) __cmsg + TARGET_CMSG_ALIGN (tswapal(__cmsg->cmsg_len))); - if ((unsigned long)((char *)(__ptr+1) - (char *)(size_t)tswapal(__mhdr->msg_control)) - > tswapal(__mhdr->msg_controllen)) + if ((unsigned long)((char *)(__ptr+1) - (char *)__cmsg_start) + > tswapal(__mhdr->msg_controllen)) { /* No more entries. */ return (struct target_cmsghdr *)0; - return __cmsg; + } + return __ptr; } struct target_mmsghdr { diff --git a/migration/migration.c b/migration/migration.c index e48dd13720..b7de9b7b3f 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -29,8 +29,9 @@ #include "trace.h" #include "qapi/util.h" #include "qapi-event.h" +#include "qom/cpu.h" -#define MAX_THROTTLE (32 << 20) /* Migration speed throttling */ +#define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */ /* Amount of time to allocate to each "chunk" of bandwidth-throttled * data. */ @@ -44,6 +45,9 @@ #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 /*0: means nocompress, 1: best speed, ... 9: best compress ratio */ #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 +/* Define default autoconverge cpu throttle migration parameters */ +#define DEFAULT_MIGRATE_X_CPU_THROTTLE_INITIAL 20 +#define DEFAULT_MIGRATE_X_CPU_THROTTLE_INCREMENT 10 /* Migration XBZRLE default cache size */ #define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024) @@ -71,6 +75,10 @@ MigrationState *migrate_get_current(void) DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT, .parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] = DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT, + .parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] = + DEFAULT_MIGRATE_X_CPU_THROTTLE_INITIAL, + .parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] = + DEFAULT_MIGRATE_X_CPU_THROTTLE_INCREMENT, }; return ¤t_migration; @@ -372,6 +380,10 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS]; params->decompress_threads = s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS]; + params->x_cpu_throttle_initial = + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL]; + params->x_cpu_throttle_increment = + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT]; return params; } @@ -435,6 +447,11 @@ MigrationInfo *qmp_query_migrate(Error **errp) info->disk->total = blk_mig_bytes_total(); } + if (cpu_throttle_active()) { + info->has_x_cpu_throttle_percentage = true; + info->x_cpu_throttle_percentage = cpu_throttle_get_percentage(); + } + get_xbzrle_cache_stats(info); break; case MIGRATION_STATUS_COMPLETED: @@ -494,7 +511,11 @@ void qmp_migrate_set_parameters(bool has_compress_level, bool has_compress_threads, int64_t compress_threads, bool has_decompress_threads, - int64_t decompress_threads, Error **errp) + int64_t decompress_threads, + bool has_x_cpu_throttle_initial, + int64_t x_cpu_throttle_initial, + bool has_x_cpu_throttle_increment, + int64_t x_cpu_throttle_increment, Error **errp) { MigrationState *s = migrate_get_current(); @@ -517,6 +538,18 @@ void qmp_migrate_set_parameters(bool has_compress_level, "is invalid, it should be in the range of 1 to 255"); return; } + if (has_x_cpu_throttle_initial && + (x_cpu_throttle_initial < 1 || x_cpu_throttle_initial > 99)) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, + "x_cpu_throttle_initial", + "an integer in the range of 1 to 99"); + } + if (has_x_cpu_throttle_increment && + (x_cpu_throttle_increment < 1 || x_cpu_throttle_increment > 99)) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, + "x_cpu_throttle_increment", + "an integer in the range of 1 to 99"); + } if (has_compress_level) { s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] = compress_level; @@ -528,6 +561,15 @@ void qmp_migrate_set_parameters(bool has_compress_level, s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] = decompress_threads; } + if (has_x_cpu_throttle_initial) { + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] = + x_cpu_throttle_initial; + } + + if (has_x_cpu_throttle_increment) { + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] = + x_cpu_throttle_increment; + } } /* shared migration helpers */ @@ -643,6 +685,10 @@ static MigrationState *migrate_init(const MigrationParams *params) s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS]; int decompress_thread_count = s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS]; + int x_cpu_throttle_initial = + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL]; + int x_cpu_throttle_increment = + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT]; memcpy(enabled_capabilities, s->enabled_capabilities, sizeof(enabled_capabilities)); @@ -658,6 +704,10 @@ static MigrationState *migrate_init(const MigrationParams *params) compress_thread_count; s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] = decompress_thread_count; + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] = + x_cpu_throttle_initial; + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] = + x_cpu_throttle_increment; s->bandwidth_limit = bandwidth_limit; migrate_set_state(s, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); @@ -1026,6 +1076,9 @@ static void *migration_thread(void *opaque) } } + /* If we enabled cpu throttling for auto-converge, turn it off. */ + cpu_throttle_stop(); + qemu_mutex_lock_iothread(); if (s->state == MIGRATION_STATUS_COMPLETED) { int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); diff --git a/migration/ram.c b/migration/ram.c index 5187637d45..2d1d0b99e4 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -47,9 +47,7 @@ do { } while (0) #endif -static bool mig_throttle_on; static int dirty_rate_high_cnt; -static void check_guest_throttling(void); static uint64_t bitmap_sync_count; @@ -407,6 +405,29 @@ static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset) return size; } +/* Reduce amount of guest cpu execution to hopefully slow down memory writes. + * If guest dirty memory rate is reduced below the rate at which we can + * transfer pages to the destination then we should be able to complete + * migration. Some workloads dirty memory way too fast and will not effectively + * converge, even with auto-converge. + */ +static void mig_throttle_guest_down(void) +{ + MigrationState *s = migrate_get_current(); + uint64_t pct_initial = + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL]; + uint64_t pct_icrement = + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT]; + + /* We have not started throttling yet. Let's start it. */ + if (!cpu_throttle_active()) { + cpu_throttle_set(pct_initial); + } else { + /* Throttling already on, just increase the rate */ + cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement); + } +} + /* Update the xbzrle cache to reflect a page that's been sent as all 0. * The important thing is that a stale (not-yet-0'd) page be replaced * by the new data. @@ -599,21 +620,21 @@ static void migration_bitmap_sync(void) /* The following detection logic can be refined later. For now: Check to see if the dirtied bytes is 50% more than the approx. amount of bytes that just got transferred since the last time we - were in this routine. If that happens >N times (for now N==4) - we turn on the throttle down logic */ + were in this routine. If that happens twice, start or increase + throttling */ bytes_xfer_now = ram_bytes_transferred(); + if (s->dirty_pages_rate && (num_dirty_pages_period * TARGET_PAGE_SIZE > (bytes_xfer_now - bytes_xfer_prev)/2) && - (dirty_rate_high_cnt++ > 4)) { + (dirty_rate_high_cnt++ >= 2)) { trace_migration_throttle(); - mig_throttle_on = true; dirty_rate_high_cnt = 0; + mig_throttle_guest_down(); } bytes_xfer_prev = bytes_xfer_now; - } else { - mig_throttle_on = false; } + if (migrate_use_xbzrle()) { if (iterations_prev != acct_info.iterations) { acct_info.xbzrle_cache_miss_rate = @@ -1146,7 +1167,6 @@ static int ram_save_setup(QEMUFile *f, void *opaque) RAMBlock *block; int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */ - mig_throttle_on = false; dirty_rate_high_cnt = 0; bitmap_sync_count = 0; migration_bitmap_sync_init(); @@ -1251,7 +1271,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) } pages_sent += pages; acct_info.iterations++; - check_guest_throttling(); + /* we want to check in the 1st loop, just in case it was the 1st time and we had to sync the dirty bitmap. qemu_get_clock_ns() is a bit expensive, so we only check each some @@ -1664,52 +1684,3 @@ void ram_mig_init(void) qemu_mutex_init(&XBZRLE.lock); register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL); } -/* Stub function that's gets run on the vcpu when its brought out of the - VM to run inside qemu via async_run_on_cpu()*/ - -static void mig_sleep_cpu(void *opq) -{ - qemu_mutex_unlock_iothread(); - g_usleep(30*1000); - qemu_mutex_lock_iothread(); -} - -/* To reduce the dirty rate explicitly disallow the VCPUs from spending - much time in the VM. The migration thread will try to catchup. - Workload will experience a performance drop. -*/ -static void mig_throttle_guest_down(void) -{ - CPUState *cpu; - - qemu_mutex_lock_iothread(); - CPU_FOREACH(cpu) { - async_run_on_cpu(cpu, mig_sleep_cpu, NULL); - } - qemu_mutex_unlock_iothread(); -} - -static void check_guest_throttling(void) -{ - static int64_t t0; - int64_t t1; - - if (!mig_throttle_on) { - return; - } - - if (!t0) { - t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - return; - } - - t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - - /* If it has been more than 40 ms since the last time the guest - * was throttled then do it again. - */ - if (40 < (t1-t0)/1000000) { - mig_throttle_guest_down(); - t0 = t1; - } -} diff --git a/qapi-schema.json b/qapi-schema.json index 582a817215..8b0520c2d0 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -480,6 +480,10 @@ # may be expensive, but do not actually occur during the iterative # migration rounds themselves. (since 1.6) # +# @x-cpu-throttle-percentage: #optional percentage of time guest cpus are being +# throttled during auto-converge. This is only present when auto-converge +# has started throttling guest cpus. (Since 2.5) +# # Since: 0.14.0 ## { 'struct': 'MigrationInfo', @@ -489,7 +493,8 @@ '*total-time': 'int', '*expected-downtime': 'int', '*downtime': 'int', - '*setup-time': 'int'} } + '*setup-time': 'int', + '*x-cpu-throttle-percentage': 'int'} } ## # @query-migrate @@ -596,10 +601,18 @@ # compression, so set the decompress-threads to the number about 1/4 # of compress-threads is adequate. # +# @x-cpu-throttle-initial: Initial percentage of time guest cpus are throttled +# when migration auto-converge is activated. The +# default value is 20. (Since 2.5) +# +# @x-cpu-throttle-increment: throttle percentage increase each time +# auto-converge detects that migration is not making +# progress. The default value is 10. (Since 2.5) # Since: 2.4 ## { 'enum': 'MigrationParameter', - 'data': ['compress-level', 'compress-threads', 'decompress-threads'] } + 'data': ['compress-level', 'compress-threads', 'decompress-threads', + 'x-cpu-throttle-initial', 'x-cpu-throttle-increment'] } # # @migrate-set-parameters @@ -612,12 +625,21 @@ # # @decompress-threads: decompression thread count # +# @x-cpu-throttle-initial: Initial percentage of time guest cpus are throttled +# when migration auto-converge is activated. The +# default value is 20. (Since 2.5) +# +# @x-cpu-throttle-increment: throttle percentage increase each time +# auto-converge detects that migration is not making +# progress. The default value is 10. (Since 2.5) # Since: 2.4 ## { 'command': 'migrate-set-parameters', 'data': { '*compress-level': 'int', '*compress-threads': 'int', - '*decompress-threads': 'int'} } + '*decompress-threads': 'int', + '*x-cpu-throttle-initial': 'int', + '*x-cpu-throttle-increment': 'int'} } # # @MigrationParameters @@ -628,12 +650,22 @@ # # @decompress-threads: decompression thread count # +# @x-cpu-throttle-initial: Initial percentage of time guest cpus are throttled +# when migration auto-converge is activated. The +# default value is 20. (Since 2.5) +# +# @x-cpu-throttle-increment: throttle percentage increase each time +# auto-converge detects that migration is not making +# progress. The default value is 10. (Since 2.5) +# # Since: 2.4 ## { 'struct': 'MigrationParameters', 'data': { 'compress-level': 'int', 'compress-threads': 'int', - 'decompress-threads': 'int'} } + 'decompress-threads': 'int', + 'x-cpu-throttle-initial': 'int', + 'x-cpu-throttle-increment': 'int'} } ## # @query-migrate-parameters # diff --git a/target-i386/Makefile.objs b/target-i386/Makefile.objs index 3da413e8bd..437d9975b9 100644 --- a/target-i386/Makefile.objs +++ b/target-i386/Makefile.objs @@ -1,4 +1,4 @@ -obj-y += translate.o helper.o cpu.o +obj-y += translate.o helper.o cpu.o bpt_helper.o obj-y += excp_helper.o fpu_helper.o cc_helper.o int_helper.o svm_helper.o obj-y += smm_helper.o misc_helper.o mem_helper.o seg_helper.o obj-y += gdbstub.o diff --git a/target-i386/bpt_helper.c b/target-i386/bpt_helper.c new file mode 100644 index 0000000000..c071c24782 --- /dev/null +++ b/target-i386/bpt_helper.c @@ -0,0 +1,182 @@ +/* + * i386 breakpoint helpers + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "cpu.h" +#include "exec/helper-proto.h" + + +void hw_breakpoint_insert(CPUX86State *env, int index) +{ + CPUState *cs = CPU(x86_env_get_cpu(env)); + int type = 0, err = 0; + + switch (hw_breakpoint_type(env->dr[7], index)) { + case DR7_TYPE_BP_INST: + if (hw_breakpoint_enabled(env->dr[7], index)) { + err = cpu_breakpoint_insert(cs, env->dr[index], BP_CPU, + &env->cpu_breakpoint[index]); + } + break; + case DR7_TYPE_DATA_WR: + type = BP_CPU | BP_MEM_WRITE; + break; + case DR7_TYPE_IO_RW: + /* No support for I/O watchpoints yet */ + break; + case DR7_TYPE_DATA_RW: + type = BP_CPU | BP_MEM_ACCESS; + break; + } + + if (type != 0) { + err = cpu_watchpoint_insert(cs, env->dr[index], + hw_breakpoint_len(env->dr[7], index), + type, &env->cpu_watchpoint[index]); + } + + if (err) { + env->cpu_breakpoint[index] = NULL; + } +} + +void hw_breakpoint_remove(CPUX86State *env, int index) +{ + CPUState *cs; + + if (!env->cpu_breakpoint[index]) { + return; + } + cs = CPU(x86_env_get_cpu(env)); + switch (hw_breakpoint_type(env->dr[7], index)) { + case DR7_TYPE_BP_INST: + if (hw_breakpoint_enabled(env->dr[7], index)) { + cpu_breakpoint_remove_by_ref(cs, env->cpu_breakpoint[index]); + } + break; + case DR7_TYPE_DATA_WR: + case DR7_TYPE_DATA_RW: + cpu_watchpoint_remove_by_ref(cs, env->cpu_watchpoint[index]); + break; + case DR7_TYPE_IO_RW: + /* No support for I/O watchpoints yet */ + break; + } +} + +static bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update) +{ + target_ulong dr6; + int reg; + bool hit_enabled = false; + + dr6 = env->dr[6] & ~0xf; + for (reg = 0; reg < DR7_MAX_BP; reg++) { + bool bp_match = false; + bool wp_match = false; + + switch (hw_breakpoint_type(env->dr[7], reg)) { + case DR7_TYPE_BP_INST: + if (env->dr[reg] == env->eip) { + bp_match = true; + } + break; + case DR7_TYPE_DATA_WR: + case DR7_TYPE_DATA_RW: + if (env->cpu_watchpoint[reg] && + env->cpu_watchpoint[reg]->flags & BP_WATCHPOINT_HIT) { + wp_match = true; + } + break; + case DR7_TYPE_IO_RW: + break; + } + if (bp_match || wp_match) { + dr6 |= 1 << reg; + if (hw_breakpoint_enabled(env->dr[7], reg)) { + hit_enabled = true; + } + } + } + + if (hit_enabled || force_dr6_update) { + env->dr[6] = dr6; + } + + return hit_enabled; +} + +void breakpoint_handler(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + CPUBreakpoint *bp; + + if (cs->watchpoint_hit) { + if (cs->watchpoint_hit->flags & BP_CPU) { + cs->watchpoint_hit = NULL; + if (check_hw_breakpoints(env, false)) { + raise_exception(env, EXCP01_DB); + } else { + cpu_resume_from_signal(cs, NULL); + } + } + } else { + QTAILQ_FOREACH(bp, &cs->breakpoints, entry) { + if (bp->pc == env->eip) { + if (bp->flags & BP_CPU) { + check_hw_breakpoints(env, true); + raise_exception(env, EXCP01_DB); + } + break; + } + } + } +} + +void helper_single_step(CPUX86State *env) +{ +#ifndef CONFIG_USER_ONLY + check_hw_breakpoints(env, true); + env->dr[6] |= DR6_BS; +#endif + raise_exception(env, EXCP01_DB); +} + +void helper_movl_drN_T0(CPUX86State *env, int reg, target_ulong t0) +{ +#ifndef CONFIG_USER_ONLY + int i; + + if (reg < 4) { + hw_breakpoint_remove(env, reg); + env->dr[reg] = t0; + hw_breakpoint_insert(env, reg); + } else if (reg == 7) { + for (i = 0; i < DR7_MAX_BP; i++) { + hw_breakpoint_remove(env, i); + } + env->dr[7] = t0; + for (i = 0; i < DR7_MAX_BP; i++) { + hw_breakpoint_insert(env, i); + } + } else { + env->dr[reg] = t0; + } +#endif +} diff --git a/target-i386/cpu.c b/target-i386/cpu.c index bd411b9d8d..c793812cc2 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -43,7 +43,6 @@ #include "sysemu/sysemu.h" #include "hw/qdev-properties.h" -#include "hw/cpu/icc_bus.h" #ifndef CONFIG_USER_ONLY #include "exec/address-spaces.h" #include "hw/xen/xen.h" @@ -478,38 +477,6 @@ const char *get_register_name_32(unsigned int reg) return x86_reg_info_32[reg].name; } -/* KVM-specific features that are automatically added to all CPU models - * when KVM is enabled. - */ -static uint32_t kvm_default_features[FEATURE_WORDS] = { - [FEAT_KVM] = (1 << KVM_FEATURE_CLOCKSOURCE) | - (1 << KVM_FEATURE_NOP_IO_DELAY) | - (1 << KVM_FEATURE_CLOCKSOURCE2) | - (1 << KVM_FEATURE_ASYNC_PF) | - (1 << KVM_FEATURE_STEAL_TIME) | - (1 << KVM_FEATURE_PV_EOI) | - (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT), - [FEAT_1_ECX] = CPUID_EXT_X2APIC, -}; - -/* Features that are not added by default to any CPU model when KVM is enabled. - */ -static uint32_t kvm_default_unset_features[FEATURE_WORDS] = { - [FEAT_1_EDX] = CPUID_ACPI, - [FEAT_1_ECX] = CPUID_EXT_MONITOR, - [FEAT_8000_0001_ECX] = CPUID_EXT3_SVM, -}; - -void x86_cpu_compat_kvm_no_autoenable(FeatureWord w, uint32_t features) -{ - kvm_default_features[w] &= ~features; -} - -void x86_cpu_compat_kvm_no_autodisable(FeatureWord w, uint32_t features) -{ - kvm_default_unset_features[w] &= ~features; -} - /* * Returns the set of feature flags that are supported and migratable by * QEMU, for a given FeatureWord. @@ -1113,7 +1080,7 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, .features[FEAT_8000_0001_ECX] = - CPUID_EXT3_LAHF_LM, + CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM, .features[FEAT_7_0_EBX] = CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | @@ -1148,7 +1115,7 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, .features[FEAT_8000_0001_ECX] = - CPUID_EXT3_LAHF_LM, + CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM, .features[FEAT_7_0_EBX] = CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | @@ -1185,7 +1152,7 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, .features[FEAT_8000_0001_ECX] = - CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH, + CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH, .features[FEAT_7_0_EBX] = CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | @@ -1223,7 +1190,7 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, .features[FEAT_8000_0001_ECX] = - CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH, + CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH, .features[FEAT_7_0_EBX] = CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | @@ -1392,6 +1359,43 @@ static X86CPUDefinition builtin_x86_defs[] = { }, }; +typedef struct PropValue { + const char *prop, *value; +} PropValue; + +/* KVM-specific features that are automatically added/removed + * from all CPU models when KVM is enabled. + */ +static PropValue kvm_default_props[] = { + { "kvmclock", "on" }, + { "kvm-nopiodelay", "on" }, + { "kvm-asyncpf", "on" }, + { "kvm-steal-time", "on" }, + { "kvm-pv-eoi", "on" }, + { "kvmclock-stable-bit", "on" }, + { "x2apic", "on" }, + { "acpi", "off" }, + { "monitor", "off" }, + { "svm", "off" }, + { NULL, NULL }, +}; + +void x86_cpu_change_kvm_default(const char *prop, const char *value) +{ + PropValue *pv; + for (pv = kvm_default_props; pv->prop; pv++) { + if (!strcmp(pv->prop, prop)) { + pv->value = value; + break; + } + } + + /* It is valid to call this function only for properties that + * are already present in the kvm_default_props table. + */ + assert(pv->prop); +} + static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w, bool migratable_only); @@ -2061,6 +2065,18 @@ static int x86_cpu_filter_features(X86CPU *cpu) return rv; } +static void x86_cpu_apply_props(X86CPU *cpu, PropValue *props) +{ + PropValue *pv; + for (pv = props; pv->prop; pv++) { + if (!pv->value) { + continue; + } + object_property_parse(OBJECT(cpu), pv->value, pv->prop, + &error_abort); + } +} + /* Load data from X86CPUDefinition */ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp) @@ -2084,11 +2100,7 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp) /* Special cases not set in the X86CPUDefinition structs: */ if (kvm_enabled()) { - FeatureWord w; - for (w = 0; w < FEATURE_WORDS; w++) { - env->features[w] |= kvm_default_features[w]; - env->features[w] &= ~kvm_default_unset_features[w]; - } + x86_cpu_apply_props(cpu, kvm_default_props); } env->features[FEAT_1_ECX] |= CPUID_EXT_HYPERVISOR; @@ -2723,7 +2735,6 @@ static void mce_init(X86CPU *cpu) #ifndef CONFIG_USER_ONLY static void x86_cpu_apic_create(X86CPU *cpu, Error **errp) { - DeviceState *dev = DEVICE(cpu); APICCommonState *apic; const char *apic_type = "apic"; @@ -2733,11 +2744,7 @@ static void x86_cpu_apic_create(X86CPU *cpu, Error **errp) apic_type = "xen-apic"; } - cpu->apic_state = qdev_try_create(qdev_get_parent_bus(dev), apic_type); - if (cpu->apic_state == NULL) { - error_setg(errp, "APIC device '%s' could not be created", apic_type); - return; - } + cpu->apic_state = DEVICE(object_new(apic_type)); object_property_add_child(OBJECT(cpu), "apic", OBJECT(cpu->apic_state), NULL); @@ -2745,15 +2752,30 @@ static void x86_cpu_apic_create(X86CPU *cpu, Error **errp) /* TODO: convert to link<> */ apic = APIC_COMMON(cpu->apic_state); apic->cpu = cpu; + apic->apicbase = APIC_DEFAULT_ADDRESS | MSR_IA32_APICBASE_ENABLE; } static void x86_cpu_apic_realize(X86CPU *cpu, Error **errp) { + APICCommonState *apic; + static bool apic_mmio_map_once; + if (cpu->apic_state == NULL) { return; } object_property_set_bool(OBJECT(cpu->apic_state), true, "realized", errp); + + /* Map APIC MMIO area */ + apic = APIC_COMMON(cpu->apic_state); + if (!apic_mmio_map_once) { + memory_region_add_subregion_overlap(get_system_memory(), + apic->apicbase & + MSR_IA32_APICBASE_BASE, + &apic->io_memory, + 0x1000); + apic_mmio_map_once = true; + } } static void x86_cpu_machine_done(Notifier *n, void *unused) @@ -3133,7 +3155,6 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data) xcc->parent_realize = dc->realize; dc->realize = x86_cpu_realizefn; - dc->bus_type = TYPE_ICC_BUS; dc->props = x86_cpu_properties; xcc->parent_reset = cc->reset; diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 034fab6f39..8926780e85 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -833,6 +833,7 @@ typedef struct CPUX86State { BNDReg bnd_regs[4]; BNDCSReg bndcs_regs; uint64_t msr_bndcfgs; + uint64_t efer; /* Beginning of state preserved by INIT (dummy marker). */ struct {} start_init_save; @@ -865,7 +866,6 @@ typedef struct CPUX86State { uint32_t sysenter_cs; target_ulong sysenter_esp; target_ulong sysenter_eip; - uint64_t efer; uint64_t star; uint64_t vm_hsave; @@ -1154,7 +1154,6 @@ static inline int hw_breakpoint_len(unsigned long dr7, int index) void hw_breakpoint_insert(CPUX86State *env, int index); void hw_breakpoint_remove(CPUX86State *env, int index); -bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update); void breakpoint_handler(CPUState *cs); /* will be suppressed */ @@ -1341,8 +1340,15 @@ void cpu_smm_update(X86CPU *cpu); void cpu_report_tpr_access(CPUX86State *env, TPRAccess access); -void x86_cpu_compat_kvm_no_autoenable(FeatureWord w, uint32_t features); -void x86_cpu_compat_kvm_no_autodisable(FeatureWord w, uint32_t features); +/* Change the value of a KVM-specific default + * + * If value is NULL, no default will be set and the original + * value from the CPU model table will be kept. + * + * It is valid to call this funciton only for properties that + * are already present in the kvm_default_props table. + */ +void x86_cpu_change_kvm_default(const char *prop, const char *value); /* Return name of 32-bit register, from a R_* constant */ diff --git a/target-i386/helper.c b/target-i386/helper.c index 9364d96f96..d18be95c3f 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -1096,134 +1096,6 @@ out: return pte | page_offset; } -void hw_breakpoint_insert(CPUX86State *env, int index) -{ - CPUState *cs = CPU(x86_env_get_cpu(env)); - int type = 0, err = 0; - - switch (hw_breakpoint_type(env->dr[7], index)) { - case DR7_TYPE_BP_INST: - if (hw_breakpoint_enabled(env->dr[7], index)) { - err = cpu_breakpoint_insert(cs, env->dr[index], BP_CPU, - &env->cpu_breakpoint[index]); - } - break; - case DR7_TYPE_DATA_WR: - type = BP_CPU | BP_MEM_WRITE; - break; - case DR7_TYPE_IO_RW: - /* No support for I/O watchpoints yet */ - break; - case DR7_TYPE_DATA_RW: - type = BP_CPU | BP_MEM_ACCESS; - break; - } - - if (type != 0) { - err = cpu_watchpoint_insert(cs, env->dr[index], - hw_breakpoint_len(env->dr[7], index), - type, &env->cpu_watchpoint[index]); - } - - if (err) { - env->cpu_breakpoint[index] = NULL; - } -} - -void hw_breakpoint_remove(CPUX86State *env, int index) -{ - CPUState *cs; - - if (!env->cpu_breakpoint[index]) { - return; - } - cs = CPU(x86_env_get_cpu(env)); - switch (hw_breakpoint_type(env->dr[7], index)) { - case DR7_TYPE_BP_INST: - if (hw_breakpoint_enabled(env->dr[7], index)) { - cpu_breakpoint_remove_by_ref(cs, env->cpu_breakpoint[index]); - } - break; - case DR7_TYPE_DATA_WR: - case DR7_TYPE_DATA_RW: - cpu_watchpoint_remove_by_ref(cs, env->cpu_watchpoint[index]); - break; - case DR7_TYPE_IO_RW: - /* No support for I/O watchpoints yet */ - break; - } -} - -bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update) -{ - target_ulong dr6; - int reg; - bool hit_enabled = false; - - dr6 = env->dr[6] & ~0xf; - for (reg = 0; reg < DR7_MAX_BP; reg++) { - bool bp_match = false; - bool wp_match = false; - - switch (hw_breakpoint_type(env->dr[7], reg)) { - case DR7_TYPE_BP_INST: - if (env->dr[reg] == env->eip) { - bp_match = true; - } - break; - case DR7_TYPE_DATA_WR: - case DR7_TYPE_DATA_RW: - if (env->cpu_watchpoint[reg] && - env->cpu_watchpoint[reg]->flags & BP_WATCHPOINT_HIT) { - wp_match = true; - } - break; - case DR7_TYPE_IO_RW: - break; - } - if (bp_match || wp_match) { - dr6 |= 1 << reg; - if (hw_breakpoint_enabled(env->dr[7], reg)) { - hit_enabled = true; - } - } - } - - if (hit_enabled || force_dr6_update) { - env->dr[6] = dr6; - } - - return hit_enabled; -} - -void breakpoint_handler(CPUState *cs) -{ - X86CPU *cpu = X86_CPU(cs); - CPUX86State *env = &cpu->env; - CPUBreakpoint *bp; - - if (cs->watchpoint_hit) { - if (cs->watchpoint_hit->flags & BP_CPU) { - cs->watchpoint_hit = NULL; - if (check_hw_breakpoints(env, false)) { - raise_exception(env, EXCP01_DB); - } else { - cpu_resume_from_signal(cs, NULL); - } - } - } else { - QTAILQ_FOREACH(bp, &cs->breakpoints, entry) { - if (bp->pc == env->eip) { - if (bp->flags & BP_CPU) { - check_hw_breakpoints(env, true); - raise_exception(env, EXCP01_DB); - } - break; - } - } - } -} - typedef struct MCEInjectionParams { Monitor *mon; X86CPU *cpu; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 7b0ba179cc..80d1a7e01e 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -67,6 +67,7 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = { static bool has_msr_star; static bool has_msr_hsave_pa; +static bool has_msr_tsc_aux; static bool has_msr_tsc_adjust; static bool has_msr_tsc_deadline; static bool has_msr_feature_control; @@ -825,6 +826,10 @@ static int kvm_get_supported_msrs(KVMState *s) has_msr_hsave_pa = true; continue; } + if (kvm_msr_list->indices[i] == MSR_TSC_AUX) { + has_msr_tsc_aux = true; + continue; + } if (kvm_msr_list->indices[i] == MSR_TSC_ADJUST) { has_msr_tsc_adjust = true; continue; @@ -1299,6 +1304,9 @@ static int kvm_put_msrs(X86CPU *cpu, int level) if (has_msr_hsave_pa) { kvm_msr_entry_set(&msrs[n++], MSR_VM_HSAVE_PA, env->vm_hsave); } + if (has_msr_tsc_aux) { + kvm_msr_entry_set(&msrs[n++], MSR_TSC_AUX, env->tsc_aux); + } if (has_msr_tsc_adjust) { kvm_msr_entry_set(&msrs[n++], MSR_TSC_ADJUST, env->tsc_adjust); } @@ -1671,6 +1679,9 @@ static int kvm_get_msrs(X86CPU *cpu) if (has_msr_hsave_pa) { msrs[n++].index = MSR_VM_HSAVE_PA; } + if (has_msr_tsc_aux) { + msrs[n++].index = MSR_TSC_AUX; + } if (has_msr_tsc_adjust) { msrs[n++].index = MSR_TSC_ADJUST; } @@ -1820,6 +1831,9 @@ static int kvm_get_msrs(X86CPU *cpu) case MSR_IA32_TSC: env->tsc = msrs[i].data; break; + case MSR_TSC_AUX: + env->tsc_aux = msrs[i].data; + break; case MSR_TSC_ADJUST: env->tsc_adjust = msrs[i].data; break; diff --git a/target-i386/misc_helper.c b/target-i386/misc_helper.c index 6bfc7dd24e..13bd4f5eec 100644 --- a/target-i386/misc_helper.c +++ b/target-i386/misc_helper.c @@ -95,15 +95,6 @@ void helper_into(CPUX86State *env, int next_eip_addend) } } -void helper_single_step(CPUX86State *env) -{ -#ifndef CONFIG_USER_ONLY - check_hw_breakpoints(env, true); - env->dr[6] |= DR6_BS; -#endif - raise_exception(env, EXCP01_DB); -} - void helper_cpuid(CPUX86State *env) { uint32_t eax, ebx, ecx, edx; @@ -127,10 +118,6 @@ target_ulong helper_read_crN(CPUX86State *env, int reg) void helper_write_crN(CPUX86State *env, int reg, target_ulong t0) { } - -void helper_movl_drN_T0(CPUX86State *env, int reg, target_ulong t0) -{ -} #else target_ulong helper_read_crN(CPUX86State *env, int reg) { @@ -176,27 +163,6 @@ void helper_write_crN(CPUX86State *env, int reg, target_ulong t0) break; } } - -void helper_movl_drN_T0(CPUX86State *env, int reg, target_ulong t0) -{ - int i; - - if (reg < 4) { - hw_breakpoint_remove(env, reg); - env->dr[reg] = t0; - hw_breakpoint_insert(env, reg); - } else if (reg == 7) { - for (i = 0; i < DR7_MAX_BP; i++) { - hw_breakpoint_remove(env, i); - } - env->dr[7] = t0; - for (i = 0; i < DR7_MAX_BP; i++) { - hw_breakpoint_insert(env, i); - } - } else { - env->dr[reg] = t0; - } -} #endif void helper_lmsw(CPUX86State *env, target_ulong t0) diff --git a/target-microblaze/cpu.c b/target-microblaze/cpu.c index 9ac509af3e..cbd84a22f7 100644 --- a/target-microblaze/cpu.c +++ b/target-microblaze/cpu.c @@ -107,6 +107,8 @@ static void mb_cpu_reset(CPUState *s) /* Disable stack protector. */ env->shr = ~0; + env->sregs[SR_PC] = cpu->cfg.base_vectors; + #if defined(CONFIG_USER_ONLY) /* start in user mode with interrupts enabled. */ env->sregs[SR_MSR] = MSR_EE | MSR_IE | MSR_VM | MSR_UM; @@ -183,8 +185,6 @@ static void mb_cpu_realizefn(DeviceState *dev, Error **errp) env->pvr.regs[10] = 0x0c000000; /* Default to spartan 3a dsp family. */ env->pvr.regs[11] = PVR11_USE_MMU | (16 << 17); - env->sregs[SR_PC] = cpu->cfg.base_vectors; - mcc->parent_realize(dev, errp); } diff --git a/tests/Makefile b/tests/Makefile index 4063639a59..e6474ba31b 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -188,7 +188,9 @@ gcov-files-i386-y += hw/usb/hcd-xhci.c check-qtest-i386-y += tests/pc-cpu-test$(EXESUF) check-qtest-i386-y += tests/q35-test$(EXESUF) gcov-files-i386-y += hw/pci-host/q35.c +ifeq ($(CONFIG_VHOST_NET),y) check-qtest-i386-$(CONFIG_LINUX) += tests/vhost-user-test$(EXESUF) +endif check-qtest-x86_64-y = $(check-qtest-i386-y) gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y)) diff --git a/tests/ide-test.c b/tests/ide-test.c index 559473812c..b6e9e1a232 100644 --- a/tests/ide-test.c +++ b/tests/ide-test.c @@ -633,7 +633,7 @@ static void send_scsi_cdb_read10(uint64_t lba, int nblocks) /* Send Packet */ for (i = 0; i < sizeof(Read10CDB)/2; i++) { - outw(IDE_BASE + reg_data, ((uint16_t *)&pkt)[i]); + outw(IDE_BASE + reg_data, cpu_to_le16(((uint16_t *)&pkt)[i])); } } @@ -733,7 +733,7 @@ static void cdrom_pio_impl(int nblocks) size_t offset = i * (limit / 2); size_t rem = (rxsize / 2) - offset; for (j = 0; j < MIN((limit / 2), rem); j++) { - rx[offset + j] = inw(IDE_BASE + reg_data); + rx[offset + j] = le16_to_cpu(inw(IDE_BASE + reg_data)); } ide_wait_intr(IDE_PRIMARY_IRQ); } diff --git a/tests/libqos/ahci.c b/tests/libqos/ahci.c index cf66b3e32c..adb2665c6d 100644 --- a/tests/libqos/ahci.c +++ b/tests/libqos/ahci.c @@ -742,7 +742,7 @@ AHCICommand *ahci_command_create(uint8_t command_name) g_assert(!(props->lba28 && props->lba48)); g_assert(!(props->read && props->write)); g_assert(!props->size || props->data); - g_assert(!props->ncq || (props->ncq && props->lba48)); + g_assert(!props->ncq || props->lba48); /* Defaults and book-keeping */ cmd->props = props; diff --git a/tests/qemu-iotests/049.out b/tests/qemu-iotests/049.out index 0425ae05e8..a2b6703956 100644 --- a/tests/qemu-iotests/049.out +++ b/tests/qemu-iotests/049.out @@ -118,6 +118,7 @@ qemu-img: kilobytes, megabytes, gigabytes, terabytes, petabytes and exabytes. qemu-img create -f qcow2 -o size=foobar TEST_DIR/t.qcow2 qemu-img: Parameter 'size' expects a size +You may use k, M, G or T suffixes for kilobytes, megabytes, gigabytes and terabytes. qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2' == Check correct interpretation of suffixes for cluster size == diff --git a/tests/qemu-iotests/128 b/tests/qemu-iotests/128 index e2a0f2f890..3d8107d2a3 100755 --- a/tests/qemu-iotests/128 +++ b/tests/qemu-iotests/128 @@ -31,6 +31,11 @@ status=1 # failure is the default! devname="eiodev$$" sudo="" +_sudo_qemu_io_wrapper() +{ + (exec $sudo "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@") +} + _setup_eiodev() { # This test should either be run as root or with passwordless sudo @@ -76,7 +81,9 @@ TEST_IMG="/dev/mapper/$devname" echo echo "== reading from error device ==" # Opening image should succeed but the read operation should fail -$sudo $QEMU_IO --format "$IMGFMT" --nocache -c "read 0 65536" "$TEST_IMG" | _filter_qemu_io +_sudo_qemu_io_wrapper --format "$IMGFMT" --nocache \ + -c "read 0 65536" "$TEST_IMG" \ + | _filter_qemu_io # success, all done echo "*** done" diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c index e301db79b9..56df5cc552 100644 --- a/tests/vhost-user-test.c +++ b/tests/vhost-user-test.c @@ -8,7 +8,6 @@ * */ -#define QEMU_GLIB_COMPAT_H #include <glib.h> #include "libqtest.h" @@ -30,12 +29,6 @@ #define HAVE_MONOTONIC_TIME #endif -#if GLIB_CHECK_VERSION(2, 32, 0) -#define HAVE_MUTEX_INIT -#define HAVE_COND_INIT -#define HAVE_THREAD_NEW -#endif - #define QEMU_CMD_ACCEL " -machine accel=tcg" #define QEMU_CMD_MEM " -m 512 -object memory-backend-file,id=mem,size=512M,"\ "mem-path=%s,share=on -numa node,memdev=mem" @@ -53,6 +46,8 @@ #define VHOST_MEMORY_MAX_NREGIONS 8 +#define VHOST_USER_F_PROTOCOL_FEATURES 30 + typedef enum VhostUserRequest { VHOST_USER_NONE = 0, VHOST_USER_GET_FEATURES = 1, @@ -69,6 +64,8 @@ typedef enum VhostUserRequest { VHOST_USER_SET_VRING_KICK = 12, VHOST_USER_SET_VRING_CALL = 13, VHOST_USER_SET_VRING_ERR = 14, + VHOST_USER_GET_PROTOCOL_FEATURES = 15, + VHOST_USER_SET_PROTOCOL_FEATURES = 16, VHOST_USER_MAX } VhostUserRequest; @@ -113,93 +110,21 @@ static VhostUserMsg m __attribute__ ((unused)); int fds_num = 0, fds[VHOST_MEMORY_MAX_NREGIONS]; static VhostUserMemory memory; -static GMutex *data_mutex; -static GCond *data_cond; - -static gint64 _get_time(void) -{ -#ifdef HAVE_MONOTONIC_TIME - return g_get_monotonic_time(); -#else - GTimeVal time; - g_get_current_time(&time); - - return time.tv_sec * G_TIME_SPAN_SECOND + time.tv_usec; -#endif -} - -static GMutex *_mutex_new(void) -{ - GMutex *mutex; - -#ifdef HAVE_MUTEX_INIT - mutex = g_new(GMutex, 1); - g_mutex_init(mutex); -#else - mutex = g_mutex_new(); -#endif - - return mutex; -} - -static void _mutex_free(GMutex *mutex) -{ -#ifdef HAVE_MUTEX_INIT - g_mutex_clear(mutex); - g_free(mutex); -#else - g_mutex_free(mutex); -#endif -} - -static GCond *_cond_new(void) -{ - GCond *cond; - -#ifdef HAVE_COND_INIT - cond = g_new(GCond, 1); - g_cond_init(cond); -#else - cond = g_cond_new(); -#endif - - return cond; -} +static CompatGMutex data_mutex; +static CompatGCond data_cond; -static gboolean _cond_wait_until(GCond *cond, GMutex *mutex, gint64 end_time) +#if !GLIB_CHECK_VERSION(2, 32, 0) +static gboolean g_cond_wait_until(CompatGCond cond, CompatGMutex mutex, + gint64 end_time) { gboolean ret = FALSE; -#ifdef HAVE_COND_INIT - ret = g_cond_wait_until(cond, mutex, end_time); -#else + end_time -= g_get_monotonic_time(); GTimeVal time = { end_time / G_TIME_SPAN_SECOND, end_time % G_TIME_SPAN_SECOND }; ret = g_cond_timed_wait(cond, mutex, &time); -#endif return ret; } - -static void _cond_free(GCond *cond) -{ -#ifdef HAVE_COND_INIT - g_cond_clear(cond); - g_free(cond); -#else - g_cond_free(cond); -#endif -} - -static GThread *_thread_new(const gchar *name, GThreadFunc func, gpointer data) -{ - GThread *thread = NULL; - GError *error = NULL; -#ifdef HAVE_THREAD_NEW - thread = g_thread_try_new(name, func, data, &error); -#else - thread = g_thread_create(func, data, TRUE, &error); #endif - return thread; -} static void read_guest_mem(void) { @@ -208,11 +133,11 @@ static void read_guest_mem(void) int i, j; size_t size; - g_mutex_lock(data_mutex); + g_mutex_lock(&data_mutex); - end_time = _get_time() + 5 * G_TIME_SPAN_SECOND; + end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND; while (!fds_num) { - if (!_cond_wait_until(data_cond, data_mutex, end_time)) { + if (!g_cond_wait_until(&data_cond, &data_mutex, end_time)) { /* timeout has passed */ g_assert(fds_num); break; @@ -252,7 +177,7 @@ static void read_guest_mem(void) } g_assert_cmpint(1, ==, 1); - g_mutex_unlock(data_mutex); + g_mutex_unlock(&data_mutex); } static void *thread_function(void *data) @@ -280,7 +205,7 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) return; } - g_mutex_lock(data_mutex); + g_mutex_lock(&data_mutex); memcpy(p, buf, VHOST_USER_HDR_SIZE); if (msg.size) { @@ -293,6 +218,20 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) /* send back features to qemu */ msg.flags |= VHOST_USER_REPLY_MASK; msg.size = sizeof(m.u64); + msg.u64 = 0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES; + p = (uint8_t *) &msg; + qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size); + break; + + case VHOST_USER_SET_FEATURES: + g_assert_cmpint(msg.u64 & (0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES), + !=, 0ULL); + break; + + case VHOST_USER_GET_PROTOCOL_FEATURES: + /* send back features to qemu */ + msg.flags |= VHOST_USER_REPLY_MASK; + msg.size = sizeof(m.u64); msg.u64 = 0; p = (uint8_t *) &msg; qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size); @@ -313,7 +252,7 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) fds_num = qemu_chr_fe_get_msgfds(chr, fds, sizeof(fds) / sizeof(int)); /* signal the test that it can continue */ - g_cond_signal(data_cond); + g_cond_signal(&data_cond); break; case VHOST_USER_SET_VRING_KICK: @@ -330,20 +269,14 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) default: break; } - g_mutex_unlock(data_mutex); + g_mutex_unlock(&data_mutex); } -static const char *init_hugepagefs(void) +static const char *init_hugepagefs(const char *path) { - const char *path; struct statfs fs; int ret; - path = getenv("QTEST_HUGETLBFS_PATH"); - if (!path) { - path = "/hugetlbfs"; - } - if (access(path, R_OK | W_OK | X_OK)) { g_test_message("access on path (%s): %s\n", path, strerror(errno)); return NULL; @@ -370,22 +303,34 @@ int main(int argc, char **argv) { QTestState *s = NULL; CharDriverState *chr = NULL; - const char *hugefs = 0; + const char *hugefs; char *socket_path = 0; char *qemu_cmd = 0; char *chr_path = 0; int ret; + char template[] = "/tmp/vhost-test-XXXXXX"; + const char *tmpfs; + const char *root; g_test_init(&argc, &argv, NULL); module_call_init(MODULE_INIT_QOM); - hugefs = init_hugepagefs(); - if (!hugefs) { - return 0; + tmpfs = mkdtemp(template); + if (!tmpfs) { + g_test_message("mkdtemp on path (%s): %s\n", template, strerror(errno)); + } + g_assert(tmpfs); + + hugefs = getenv("QTEST_HUGETLBFS_PATH"); + if (hugefs) { + root = init_hugepagefs(hugefs); + g_assert(root); + } else { + root = tmpfs; } - socket_path = g_strdup_printf("/tmp/vhost-%d.sock", getpid()); + socket_path = g_strdup_printf("%s/vhost.sock", tmpfs); /* create char dev and add read handlers */ qemu_add_opts(&qemu_chardev_opts); @@ -395,11 +340,11 @@ int main(int argc, char **argv) qemu_chr_add_handlers(chr, chr_can_read, chr_read, NULL, chr); /* run the main loop thread so the chardev may operate */ - data_mutex = _mutex_new(); - data_cond = _cond_new(); - _thread_new(NULL, thread_function, NULL); + g_mutex_init(&data_mutex); + g_cond_init(&data_cond); + g_thread_new(NULL, thread_function, NULL); - qemu_cmd = g_strdup_printf(QEMU_CMD, hugefs, socket_path); + qemu_cmd = g_strdup_printf(QEMU_CMD, root, socket_path); s = qtest_start(qemu_cmd); g_free(qemu_cmd); @@ -414,8 +359,13 @@ int main(int argc, char **argv) /* cleanup */ unlink(socket_path); g_free(socket_path); - _cond_free(data_cond); - _mutex_free(data_mutex); + + ret = rmdir(tmpfs); + if (ret != 0) { + g_test_message("unable to rmdir: path (%s): %s\n", + tmpfs, strerror(errno)); + } + g_assert_cmpint(ret, ==, 0); return ret; } diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 3ae4987b6b..a0fcdc2ede 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -128,10 +128,10 @@ void *qemu_memalign(size_t alignment, size_t size) void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment) { size_t align = QEMU_VMALLOC_ALIGN; - size_t total = size + align - getpagesize(); - void *ptr = mmap(0, total, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + size_t total = size + align; + void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr; + void *ptr1; if (ptr == MAP_FAILED) { return NULL; @@ -140,14 +140,22 @@ void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment) if (alignment) { *alignment = align; } + + ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (ptr1 == MAP_FAILED) { + munmap(ptr, total); + return NULL; + } + ptr += offset; total -= offset; if (offset > 0) { munmap(ptr - offset, offset); } - if (total > size) { - munmap(ptr + size, total - size); + if (total > size + getpagesize()) { + munmap(ptr + size + getpagesize(), total - size - getpagesize()); } trace_qemu_anon_ram_alloc(size, ptr); @@ -164,7 +172,7 @@ void qemu_anon_ram_free(void *ptr, size_t size) { trace_qemu_anon_ram_free(ptr, size); if (ptr) { - munmap(ptr, size); + munmap(ptr, size + getpagesize()); } } diff --git a/vl.c b/vl.c index e211f6aa36..f2bd8d20fb 100644 --- a/vl.c +++ b/vl.c @@ -580,6 +580,7 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_INMIGRATE, RUN_STATE_SUSPENDED }, { RUN_STATE_INMIGRATE, RUN_STATE_WATCHDOG }, { RUN_STATE_INMIGRATE, RUN_STATE_GUEST_PANICKED }, + { RUN_STATE_INMIGRATE, RUN_STATE_FINISH_MIGRATE }, { RUN_STATE_INTERNAL_ERROR, RUN_STATE_PAUSED }, { RUN_STATE_INTERNAL_ERROR, RUN_STATE_FINISH_MIGRATE }, @@ -1222,7 +1223,13 @@ static void smp_parse(QemuOpts *opts) exit(1); } - max_cpus = qemu_opt_get_number(opts, "maxcpus", 0); + max_cpus = qemu_opt_get_number(opts, "maxcpus", cpus); + if (sockets * cores * threads > max_cpus) { + fprintf(stderr, "cpu topology: error: " + "sockets (%u) * cores (%u) * threads (%u) > maxcpus (%u)\n", + sockets, cores, threads, max_cpus); + exit(1); + } smp_cpus = cpus; smp_cores = cores > 0 ? cores : 1; |