diff options
| -rw-r--r-- | MAINTAINERS | 1 | ||||
| -rw-r--r-- | docs/devel/kconfig.rst | 2 | ||||
| -rw-r--r-- | docs/interop/vhost-user.rst | 17 | ||||
| -rw-r--r-- | hw/arm/boot.c | 37 | ||||
| -rw-r--r-- | hw/dma/pl330.c | 17 | ||||
| -rw-r--r-- | hw/i386/acpi-build.c | 17 | ||||
| -rw-r--r-- | hw/input/stellaris_input.c | 10 | ||||
| -rw-r--r-- | hw/intc/ioapic.c | 8 | ||||
| -rw-r--r-- | hw/scsi/virtio-scsi.c | 7 | ||||
| -rw-r--r-- | hw/tpm/tpm_crb.c | 4 | ||||
| -rw-r--r-- | hw/tpm/tpm_emulator.c | 60 | ||||
| -rw-r--r-- | hw/tpm/tpm_int.h | 13 | ||||
| -rw-r--r-- | hw/tpm/tpm_tis.c | 4 | ||||
| -rw-r--r-- | hw/virtio/virtio-balloon.c | 115 | ||||
| -rw-r--r-- | include/hw/virtio/virtio-balloon.h | 3 | ||||
| -rw-r--r-- | include/migration/vmstate.h | 30 | ||||
| -rw-r--r-- | linux-user/hppa/signal.c | 3 | ||||
| -rw-r--r-- | linux-user/main.c | 5 | ||||
| -rw-r--r-- | linux-user/qemu.h | 2 | ||||
| -rw-r--r-- | linux-user/signal-common.h | 1 | ||||
| -rw-r--r-- | linux-user/signal.c | 35 | ||||
| -rw-r--r-- | migration/ram.c | 66 | ||||
| -rw-r--r-- | target/i386/kvm.c | 10 |
23 files changed, 321 insertions, 146 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index cc9636b43a..d6de200453 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1468,7 +1468,6 @@ F: include/hw/vfio/ vfio-ccw M: Cornelia Huck <cohuck@redhat.com> M: Eric Farman <farman@linux.ibm.com> -M: Farhan Ali <alifm@linux.ibm.com> S: Supported F: hw/vfio/ccw.c F: hw/s390x/s390-ccw.c diff --git a/docs/devel/kconfig.rst b/docs/devel/kconfig.rst index d6f8eb0977..b7bca44704 100644 --- a/docs/devel/kconfig.rst +++ b/docs/devel/kconfig.rst @@ -267,7 +267,7 @@ the default configuration by uncommenting lines in the first group, or commenting out lines in the second group. It is also possible to run QEMU's configure script with the -``--with-default-devices`` option. When this is done, everything defaults +``--without-default-devices`` option. When this is done, everything defaults to ``n`` unless it is ``select``ed or explicitly switched on in the ``.mak`` files. In other words, ``default`` and ``imply`` directives are disabled. When QEMU is built with this option, the user will probably diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst index 5750668aba..7827b710aa 100644 --- a/docs/interop/vhost-user.rst +++ b/docs/interop/vhost-user.rst @@ -324,6 +324,15 @@ must support changing some configuration aspects on the fly. Multiple queue support ---------------------- +Many devices have a fixed number of virtqueues. In this case the master +already knows the number of available virtqueues without communicating with the +slave. + +Some devices do not have a fixed number of virtqueues. Instead the maximum +number of virtqueues is chosen by the slave. The number can depend on host +resource availability or slave implementation details. Such devices are called +multiple queue devices. + Multiple queue support allows the slave to advertise the maximum number of queues. This is treated as a protocol extension, hence the slave has to implement protocol features first. The multiple queues feature is supported @@ -339,6 +348,14 @@ queue in the sent message to identify a specified queue. The master enables queues by sending message ``VHOST_USER_SET_VRING_ENABLE``. vhost-user-net has historically automatically enabled the first queue pair. +Slaves should always implement the ``VHOST_USER_PROTOCOL_F_MQ`` protocol +feature, even for devices with a fixed number of virtqueues, since it is simple +to implement and offers a degree of introspection. + +Masters must not rely on the ``VHOST_USER_PROTOCOL_F_MQ`` protocol feature for +devices with a fixed number of virtqueues. Only true multiqueue devices +require this protocol feature. + Migration --------- diff --git a/hw/arm/boot.c b/hw/arm/boot.c index 1fb24fbef2..c2b89b3bb9 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -986,7 +986,9 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, int kernel_size; int initrd_size; int is_linux = 0; - uint64_t elf_entry, elf_low_addr, elf_high_addr; + uint64_t elf_entry; + /* Addresses of first byte used and first byte not used by the image */ + uint64_t image_low_addr = 0, image_high_addr = 0; int elf_machine; hwaddr entry; static const ARMInsnFixup *primary_loader; @@ -1014,24 +1016,24 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, info->nb_cpus = 1; /* Assume that raw images are linux kernels, and ELF images are not. */ - kernel_size = arm_load_elf(info, &elf_entry, &elf_low_addr, - &elf_high_addr, elf_machine, as); + kernel_size = arm_load_elf(info, &elf_entry, &image_low_addr, + &image_high_addr, elf_machine, as); if (kernel_size > 0 && have_dtb(info)) { /* * If there is still some room left at the base of RAM, try and put * the DTB there like we do for images loaded with -bios or -pflash. */ - if (elf_low_addr > info->loader_start - || elf_high_addr < info->loader_start) { + if (image_low_addr > info->loader_start + || image_high_addr < info->loader_start) { /* - * Set elf_low_addr as address limit for arm_load_dtb if it may be + * Set image_low_addr as address limit for arm_load_dtb if it may be * pointing into RAM, otherwise pass '0' (no limit) */ - if (elf_low_addr < info->loader_start) { - elf_low_addr = 0; + if (image_low_addr < info->loader_start) { + image_low_addr = 0; } info->dtb_start = info->loader_start; - info->dtb_limit = elf_low_addr; + info->dtb_limit = image_low_addr; } } entry = elf_entry; @@ -1039,17 +1041,29 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, uint64_t loadaddr = info->loader_start + KERNEL_NOLOAD_ADDR; kernel_size = load_uimage_as(info->kernel_filename, &entry, &loadaddr, &is_linux, NULL, NULL, as); + if (kernel_size >= 0) { + image_low_addr = loadaddr; + image_high_addr = image_low_addr + kernel_size; + } } if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64) && kernel_size < 0) { kernel_size = load_aarch64_image(info->kernel_filename, info->loader_start, &entry, as); is_linux = 1; + if (kernel_size >= 0) { + image_low_addr = entry; + image_high_addr = image_low_addr + kernel_size; + } } else if (kernel_size < 0) { /* 32-bit ARM */ entry = info->loader_start + KERNEL_LOAD_ADDR; kernel_size = load_image_targphys_as(info->kernel_filename, entry, ram_end - KERNEL_LOAD_ADDR, as); is_linux = 1; + if (kernel_size >= 0) { + image_low_addr = entry; + image_high_addr = image_low_addr + kernel_size; + } } if (kernel_size < 0) { error_report("could not load kernel '%s'", info->kernel_filename); @@ -1081,7 +1095,10 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu, * we might still make a bad choice here. */ info->initrd_start = info->loader_start + - MAX(MIN(info->ram_size / 2, 128 * 1024 * 1024), kernel_size); + MIN(info->ram_size / 2, 128 * 1024 * 1024); + if (image_high_addr) { + info->initrd_start = MAX(info->initrd_start, image_high_addr); + } info->initrd_start = TARGET_PAGE_ALIGN(info->initrd_start); if (is_linux) { diff --git a/hw/dma/pl330.c b/hw/dma/pl330.c index 58df965a46..a56a3e7771 100644 --- a/hw/dma/pl330.c +++ b/hw/dma/pl330.c @@ -218,11 +218,12 @@ typedef struct PL330Queue { static const VMStateDescription vmstate_pl330_queue = { .name = "pl330_queue", - .version_id = 1, - .minimum_version_id = 1, + .version_id = 2, + .minimum_version_id = 2, .fields = (VMStateField[]) { - VMSTATE_STRUCT_VARRAY_UINT32(queue, PL330Queue, queue_size, 1, - vmstate_pl330_queue_entry, PL330QueueEntry), + VMSTATE_STRUCT_VARRAY_POINTER_UINT32(queue, PL330Queue, queue_size, + vmstate_pl330_queue_entry, + PL330QueueEntry), VMSTATE_END_OF_LIST() } }; @@ -278,12 +279,12 @@ struct PL330State { static const VMStateDescription vmstate_pl330 = { .name = "pl330", - .version_id = 1, - .minimum_version_id = 1, + .version_id = 2, + .minimum_version_id = 2, .fields = (VMStateField[]) { VMSTATE_STRUCT(manager, PL330State, 0, vmstate_pl330_chan, PL330Chan), - VMSTATE_STRUCT_VARRAY_UINT32(chan, PL330State, num_chnls, 0, - vmstate_pl330_chan, PL330Chan), + VMSTATE_STRUCT_VARRAY_POINTER_UINT32(chan, PL330State, num_chnls, + vmstate_pl330_chan, PL330Chan), VMSTATE_VBUFFER_UINT32(lo_seqn, PL330State, 1, NULL, num_chnls), VMSTATE_VBUFFER_UINT32(hi_seqn, PL330State, 1, NULL, num_chnls), VMSTATE_STRUCT(fifo, PL330State, 0, vmstate_pl330_fifo, PL330Fifo), diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index d281ffa89e..f3fdfefcd5 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -755,10 +755,16 @@ static void crs_range_set_free(CrsRangeSet *range_set) static gint crs_range_compare(gconstpointer a, gconstpointer b) { - CrsRangeEntry *entry_a = *(CrsRangeEntry **)a; - CrsRangeEntry *entry_b = *(CrsRangeEntry **)b; + CrsRangeEntry *entry_a = *(CrsRangeEntry **)a; + CrsRangeEntry *entry_b = *(CrsRangeEntry **)b; - return (int64_t)entry_a->base - (int64_t)entry_b->base; + if (entry_a->base < entry_b->base) { + return -1; + } else if (entry_a->base > entry_b->base) { + return 1; + } else { + return 0; + } } /* @@ -1908,10 +1914,13 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, scope = aml_scope("\\_SB"); dev = aml_device("PC%.02X", bus_num); aml_append(dev, aml_name_decl("_UID", aml_int(bus_num))); - aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03"))); aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num))); if (pci_bus_is_express(bus)) { + aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08"))); + aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03"))); aml_append(dev, build_q35_osc_method()); + } else { + aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03"))); } if (numa_node != NUMA_NODE_UNASSIGNED) { diff --git a/hw/input/stellaris_input.c b/hw/input/stellaris_input.c index 20c87d86f4..3a666d61d4 100644 --- a/hw/input/stellaris_input.c +++ b/hw/input/stellaris_input.c @@ -60,12 +60,14 @@ static const VMStateDescription vmstate_stellaris_button = { static const VMStateDescription vmstate_stellaris_gamepad = { .name = "stellaris_gamepad", - .version_id = 1, - .minimum_version_id = 1, + .version_id = 2, + .minimum_version_id = 2, .fields = (VMStateField[]) { VMSTATE_INT32(extension, gamepad_state), - VMSTATE_STRUCT_VARRAY_INT32(buttons, gamepad_state, num_buttons, 0, - vmstate_stellaris_button, gamepad_button), + VMSTATE_STRUCT_VARRAY_POINTER_INT32(buttons, gamepad_state, + num_buttons, + vmstate_stellaris_button, + gamepad_button), VMSTATE_END_OF_LIST() } }; diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c index c408749876..e99c37cceb 100644 --- a/hw/intc/ioapic.c +++ b/hw/intc/ioapic.c @@ -197,9 +197,11 @@ static void ioapic_update_kvm_routes(IOAPICCommonState *s) MSIMessage msg; struct ioapic_entry_info info; ioapic_entry_parse(s->ioredtbl[i], &info); - msg.address = info.addr; - msg.data = info.data; - kvm_irqchip_update_msi_route(kvm_state, i, msg, NULL); + if (!info.masked) { + msg.address = info.addr; + msg.data = info.data; + kvm_irqchip_update_msi_route(kvm_state, i, msg, NULL); + } } kvm_irqchip_commit_routes(kvm_state); } diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index d0bdbff090..8b9e5e2b49 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -832,6 +832,7 @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev, VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev); VirtIOSCSI *s = VIRTIO_SCSI(vdev); SCSIDevice *sd = SCSI_DEVICE(dev); + AioContext *ctx = s->ctx ?: qemu_get_aio_context(); if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) { virtio_scsi_acquire(s); @@ -841,14 +842,16 @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev, virtio_scsi_release(s); } + aio_disable_external(ctx); + qdev_simple_device_unplug_cb(hotplug_dev, dev, errp); + aio_enable_external(ctx); + if (s->ctx) { virtio_scsi_acquire(s); /* If other users keep the BlockBackend in the iothread, that's ok */ blk_set_aio_context(sd->conf.blk, qemu_get_aio_context(), NULL); virtio_scsi_release(s); } - - qdev_simple_device_unplug_cb(hotplug_dev, dev, errp); } static struct SCSIBusInfo virtio_scsi_scsi_info = { diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c index 5e2db9e0c4..db0e3e7c67 100644 --- a/hw/tpm/tpm_crb.c +++ b/hw/tpm/tpm_crb.c @@ -273,7 +273,9 @@ static void tpm_crb_reset(void *dev) s->be_buffer_size = MIN(tpm_backend_get_buffer_size(s->tpmbe), CRB_CTRL_CMD_SIZE); - tpm_backend_startup_tpm(s->tpmbe, s->be_buffer_size); + if (tpm_backend_startup_tpm(s->tpmbe, s->be_buffer_size) < 0) { + exit(1); + } } static void tpm_crb_realize(DeviceState *dev, Error **errp) diff --git a/hw/tpm/tpm_emulator.c b/hw/tpm/tpm_emulator.c index 1288cbcb8d..fc0b512f4f 100644 --- a/hw/tpm/tpm_emulator.c +++ b/hw/tpm/tpm_emulator.c @@ -82,6 +82,40 @@ typedef struct TPMEmulator { TPMBlobBuffers state_blobs; } TPMEmulator; +struct tpm_error { + uint32_t tpm_result; + const char *string; +}; + +static const struct tpm_error tpm_errors[] = { + /* TPM 1.2 error codes */ + { TPM_BAD_PARAMETER , "a parameter is bad" }, + { TPM_FAIL , "operation failed" }, + { TPM_KEYNOTFOUND , "key could not be found" }, + { TPM_BAD_PARAM_SIZE , "bad parameter size"}, + { TPM_ENCRYPT_ERROR , "encryption error" }, + { TPM_DECRYPT_ERROR , "decryption error" }, + { TPM_BAD_KEY_PROPERTY, "bad key property" }, + { TPM_BAD_MODE , "bad (encryption) mode" }, + { TPM_BAD_VERSION , "bad version identifier" }, + { TPM_BAD_LOCALITY , "bad locality" }, + /* TPM 2 error codes */ + { TPM_RC_FAILURE , "operation failed" }, + { TPM_RC_LOCALITY , "bad locality" }, + { TPM_RC_INSUFFICIENT, "insufficient amount of data" }, +}; + +static const char *tpm_emulator_strerror(uint32_t tpm_result) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(tpm_errors); i++) { + if (tpm_errors[i].tpm_result == tpm_result) { + return tpm_errors[i].string; + } + } + return ""; +} static int tpm_emulator_ctrlcmd(TPMEmulator *tpm, unsigned long cmd, void *msg, size_t msg_len_in, size_t msg_len_out) @@ -264,7 +298,8 @@ static int tpm_emulator_stop_tpm(TPMBackend *tb) res = be32_to_cpu(res); if (res) { - error_report("tpm-emulator: TPM result for CMD_STOP: 0x%x", res); + error_report("tpm-emulator: TPM result for CMD_STOP: 0x%x %s", res, + tpm_emulator_strerror(res)); return -1; } @@ -293,8 +328,9 @@ static int tpm_emulator_set_buffer_size(TPMBackend *tb, psbs.u.resp.tpm_result = be32_to_cpu(psbs.u.resp.tpm_result); if (psbs.u.resp.tpm_result != 0) { - error_report("tpm-emulator: TPM result for set buffer size : 0x%x", - psbs.u.resp.tpm_result); + error_report("tpm-emulator: TPM result for set buffer size : 0x%x %s", + psbs.u.resp.tpm_result, + tpm_emulator_strerror(psbs.u.resp.tpm_result)); return -1; } @@ -339,7 +375,8 @@ static int tpm_emulator_startup_tpm_resume(TPMBackend *tb, size_t buffersize, res = be32_to_cpu(init.u.resp.tpm_result); if (res) { - error_report("tpm-emulator: TPM result for CMD_INIT: 0x%x", res); + error_report("tpm-emulator: TPM result for CMD_INIT: 0x%x %s", res, + tpm_emulator_strerror(res)); goto err_exit; } return 0; @@ -399,8 +436,9 @@ static int tpm_emulator_reset_tpm_established_flag(TPMBackend *tb, res = be32_to_cpu(reset_est.u.resp.tpm_result); if (res) { - error_report("tpm-emulator: TPM result for rest establixhed flag: 0x%x", - res); + error_report( + "tpm-emulator: TPM result for rest established flag: 0x%x %s", + res, tpm_emulator_strerror(res)); return -1; } @@ -638,7 +676,8 @@ static int tpm_emulator_get_state_blob(TPMEmulator *tpm_emu, res = be32_to_cpu(pgs.u.resp.tpm_result); if (res != 0 && (res & 0x800) == 0) { error_report("tpm-emulator: Getting the stateblob (type %d) failed " - "with a TPM error 0x%x", type, res); + "with a TPM error 0x%x %s", type, res, + tpm_emulator_strerror(res)); return -1; } @@ -758,7 +797,8 @@ static int tpm_emulator_set_state_blob(TPMEmulator *tpm_emu, tpm_result = be32_to_cpu(pss.u.resp.tpm_result); if (tpm_result != 0) { error_report("tpm-emulator: Setting the stateblob (type %d) failed " - "with a TPM error 0x%x", type, tpm_result); + "with a TPM error 0x%x %s", type, tpm_result, + tpm_emulator_strerror(tpm_result)); return -1; } @@ -888,8 +928,8 @@ static void tpm_emulator_shutdown(TPMEmulator *tpm_emu) error_report("tpm-emulator: Could not cleanly shutdown the TPM: %s", strerror(errno)); } else if (res != 0) { - error_report("tpm-emulator: TPM result for sutdown: 0x%x", - be32_to_cpu(res)); + error_report("tpm-emulator: TPM result for shutdown: 0x%x %s", + be32_to_cpu(res), tpm_emulator_strerror(be32_to_cpu(res))); } } diff --git a/hw/tpm/tpm_int.h b/hw/tpm/tpm_int.h index a4c77fbd7e..3fb28a9d6c 100644 --- a/hw/tpm/tpm_int.h +++ b/hw/tpm/tpm_int.h @@ -39,7 +39,16 @@ struct tpm_resp_hdr { #define TPM_TAG_RSP_AUTH1_COMMAND 0xc5 #define TPM_TAG_RSP_AUTH2_COMMAND 0xc6 +#define TPM_BAD_PARAMETER 3 #define TPM_FAIL 9 +#define TPM_KEYNOTFOUND 13 +#define TPM_BAD_PARAM_SIZE 25 +#define TPM_ENCRYPT_ERROR 32 +#define TPM_DECRYPT_ERROR 33 +#define TPM_BAD_KEY_PROPERTY 40 +#define TPM_BAD_MODE 44 +#define TPM_BAD_VERSION 46 +#define TPM_BAD_LOCALITY 61 #define TPM_ORD_ContinueSelfTest 0x53 #define TPM_ORD_GetTicks 0xf1 @@ -59,4 +68,8 @@ struct tpm_resp_hdr { #define TPM2_PT_MAX_COMMAND_SIZE 0x11e +#define TPM_RC_INSUFFICIENT 0x9a +#define TPM_RC_FAILURE 0x101 +#define TPM_RC_LOCALITY 0x907 + #endif /* TPM_TPM_INT_H */ diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c index 174618ac30..d6b3212890 100644 --- a/hw/tpm/tpm_tis.c +++ b/hw/tpm/tpm_tis.c @@ -910,7 +910,9 @@ static void tpm_tis_reset(DeviceState *dev) s->rw_offset = 0; } - tpm_backend_startup_tpm(s->be_driver, s->be_buffer_size); + if (tpm_backend_startup_tpm(s->be_driver, s->be_buffer_size) < 0) { + exit(1); + } } /* persistent state handling */ diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index e85d1c0d5c..25de154307 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -34,31 +34,53 @@ #define BALLOON_PAGE_SIZE (1 << VIRTIO_BALLOON_PFN_SHIFT) -struct PartiallyBalloonedPage { - RAMBlock *rb; - ram_addr_t base; - unsigned long bitmap[]; -}; +typedef struct PartiallyBalloonedPage { + ram_addr_t base_gpa; + unsigned long *bitmap; +} PartiallyBalloonedPage; + +static void virtio_balloon_pbp_free(PartiallyBalloonedPage *pbp) +{ + if (!pbp->bitmap) { + return; + } + g_free(pbp->bitmap); + pbp->bitmap = NULL; +} + +static void virtio_balloon_pbp_alloc(PartiallyBalloonedPage *pbp, + ram_addr_t base_gpa, + long subpages) +{ + pbp->base_gpa = base_gpa; + pbp->bitmap = bitmap_new(subpages); +} + +static bool virtio_balloon_pbp_matches(PartiallyBalloonedPage *pbp, + ram_addr_t base_gpa) +{ + return pbp->base_gpa == base_gpa; +} static void balloon_inflate_page(VirtIOBalloon *balloon, - MemoryRegion *mr, hwaddr offset) + MemoryRegion *mr, hwaddr mr_offset, + PartiallyBalloonedPage *pbp) { - void *addr = memory_region_get_ram_ptr(mr) + offset; + void *addr = memory_region_get_ram_ptr(mr) + mr_offset; + ram_addr_t rb_offset, rb_aligned_offset, base_gpa; RAMBlock *rb; size_t rb_page_size; int subpages; - ram_addr_t ram_offset, host_page_base; /* XXX is there a better way to get to the RAMBlock than via a * host address? */ - rb = qemu_ram_block_from_host(addr, false, &ram_offset); + rb = qemu_ram_block_from_host(addr, false, &rb_offset); rb_page_size = qemu_ram_pagesize(rb); - host_page_base = ram_offset & ~(rb_page_size - 1); if (rb_page_size == BALLOON_PAGE_SIZE) { /* Easy case */ - ram_block_discard_range(rb, ram_offset, rb_page_size); + ram_block_discard_range(rb, rb_offset, rb_page_size); /* We ignore errors from ram_block_discard_range(), because it * has already reported them, and failing to discard a balloon * page is not fatal */ @@ -74,81 +96,51 @@ static void balloon_inflate_page(VirtIOBalloon *balloon, warn_report_once( "Balloon used with backing page size > 4kiB, this may not be reliable"); + rb_aligned_offset = QEMU_ALIGN_DOWN(rb_offset, rb_page_size); subpages = rb_page_size / BALLOON_PAGE_SIZE; + base_gpa = memory_region_get_ram_addr(mr) + mr_offset - + (rb_offset - rb_aligned_offset); - if (balloon->pbp - && (rb != balloon->pbp->rb - || host_page_base != balloon->pbp->base)) { + if (pbp->bitmap && !virtio_balloon_pbp_matches(pbp, base_gpa)) { /* We've partially ballooned part of a host page, but now * we're trying to balloon part of a different one. Too hard, * give up on the old partial page */ - g_free(balloon->pbp); - balloon->pbp = NULL; + virtio_balloon_pbp_free(pbp); } - if (!balloon->pbp) { - /* Starting on a new host page */ - size_t bitlen = BITS_TO_LONGS(subpages) * sizeof(unsigned long); - balloon->pbp = g_malloc0(sizeof(PartiallyBalloonedPage) + bitlen); - balloon->pbp->rb = rb; - balloon->pbp->base = host_page_base; + if (!pbp->bitmap) { + virtio_balloon_pbp_alloc(pbp, base_gpa, subpages); } - bitmap_set(balloon->pbp->bitmap, - (ram_offset - balloon->pbp->base) / BALLOON_PAGE_SIZE, - subpages); + set_bit((rb_offset - rb_aligned_offset) / BALLOON_PAGE_SIZE, + pbp->bitmap); - if (bitmap_full(balloon->pbp->bitmap, subpages)) { + if (bitmap_full(pbp->bitmap, subpages)) { /* We've accumulated a full host page, we can actually discard * it now */ - ram_block_discard_range(rb, balloon->pbp->base, rb_page_size); + ram_block_discard_range(rb, rb_aligned_offset, rb_page_size); /* We ignore errors from ram_block_discard_range(), because it * has already reported them, and failing to discard a balloon * page is not fatal */ - - g_free(balloon->pbp); - balloon->pbp = NULL; + virtio_balloon_pbp_free(pbp); } } static void balloon_deflate_page(VirtIOBalloon *balloon, - MemoryRegion *mr, hwaddr offset) + MemoryRegion *mr, hwaddr mr_offset) { - void *addr = memory_region_get_ram_ptr(mr) + offset; + void *addr = memory_region_get_ram_ptr(mr) + mr_offset; + ram_addr_t rb_offset; RAMBlock *rb; size_t rb_page_size; - ram_addr_t ram_offset, host_page_base; void *host_addr; int ret; /* XXX is there a better way to get to the RAMBlock than via a * host address? */ - rb = qemu_ram_block_from_host(addr, false, &ram_offset); + rb = qemu_ram_block_from_host(addr, false, &rb_offset); rb_page_size = qemu_ram_pagesize(rb); - host_page_base = ram_offset & ~(rb_page_size - 1); - - if (balloon->pbp - && rb == balloon->pbp->rb - && host_page_base == balloon->pbp->base) { - int subpages = rb_page_size / BALLOON_PAGE_SIZE; - - /* - * This means the guest has asked to discard some of the 4kiB - * subpages of a host page, but then changed its mind and - * asked to keep them after all. It's exceedingly unlikely - * for a guest to do this in practice, but handle it anyway, - * since getting it wrong could mean discarding memory the - * guest is still using. */ - bitmap_clear(balloon->pbp->bitmap, - (ram_offset - balloon->pbp->base) / BALLOON_PAGE_SIZE, - subpages); - - if (bitmap_empty(balloon->pbp->bitmap, subpages)) { - g_free(balloon->pbp); - balloon->pbp = NULL; - } - } host_addr = (void *)((uintptr_t)addr & ~(rb_page_size - 1)); @@ -335,16 +327,18 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) MemoryRegionSection section; for (;;) { + PartiallyBalloonedPage pbp = {}; size_t offset = 0; uint32_t pfn; + elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); if (!elem) { - return; + break; } while (iov_to_buf(elem->out_sg, elem->out_num, offset, &pfn, 4) == 4) { + unsigned int p = virtio_ldl_p(vdev, &pfn); hwaddr pa; - int p = virtio_ldl_p(vdev, &pfn); pa = (hwaddr) p << VIRTIO_BALLOON_PFN_SHIFT; offset += 4; @@ -368,7 +362,7 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) if (!qemu_balloon_is_inhibited()) { if (vq == s->ivq) { balloon_inflate_page(s, section.mr, - section.offset_within_region); + section.offset_within_region, &pbp); } else if (vq == s->dvq) { balloon_deflate_page(s, section.mr, section.offset_within_region); } else { @@ -381,6 +375,7 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) virtqueue_push(vq, elem, offset); virtio_notify(vdev, vq); g_free(elem); + virtio_balloon_pbp_free(&pbp); } } diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h index 5a99293a45..d1c968d237 100644 --- a/include/hw/virtio/virtio-balloon.h +++ b/include/hw/virtio/virtio-balloon.h @@ -33,8 +33,6 @@ typedef struct virtio_balloon_stat_modern { uint64_t val; } VirtIOBalloonStatModern; -typedef struct PartiallyBalloonedPage PartiallyBalloonedPage; - enum virtio_balloon_free_page_report_status { FREE_PAGE_REPORT_S_STOP = 0, FREE_PAGE_REPORT_S_REQUESTED = 1, @@ -70,7 +68,6 @@ typedef struct VirtIOBalloon { int64_t stats_last_update; int64_t stats_poll_interval; uint32_t host_features; - PartiallyBalloonedPage *pbp; bool qemu_4_0_config_size; } VirtIOBalloon; diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index ca68584eba..c2bfa7a7f0 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -227,8 +227,22 @@ extern const VMStateInfo vmstate_info_bitmap; extern const VMStateInfo vmstate_info_qtailq; #define type_check_2darray(t1,t2,n,m) ((t1(*)[n][m])0 - (t2*)0) +/* + * Check that type t2 is an array of type t1 of size n, + * e.g. if t1 is 'foo' and n is 32 then t2 must be 'foo[32]' + */ #define type_check_array(t1,t2,n) ((t1(*)[n])0 - (t2*)0) #define type_check_pointer(t1,t2) ((t1**)0 - (t2*)0) +/* + * type of element 0 of the specified (array) field of the type. + * Note that if the field is a pointer then this will return the + * pointed-to type rather than complaining. + */ +#define typeof_elt_of_field(type, field) typeof(((type *)0)->field[0]) +/* Check that field f in struct type t2 is an array of t1, of any size */ +#define type_check_varray(t1, t2, f) \ + (type_check(t1, typeof_elt_of_field(t2, f)) \ + + QEMU_BUILD_BUG_ON_ZERO(!QEMU_IS_ARRAY(((t2 *)0)->f))) #define vmstate_offset_value(_state, _field, _type) \ (offsetof(_state, _field) + \ @@ -253,6 +267,10 @@ extern const VMStateInfo vmstate_info_qtailq; vmstate_offset_array(_state, _field, uint8_t, \ sizeof(typeof_field(_state, _field))) +#define vmstate_offset_varray(_state, _field, _type) \ + (offsetof(_state, _field) + \ + type_check_varray(_type, _state, _field)) + /* In the macros below, if there is a _version, that means the macro's * field will be processed only if the version being received is >= * the _version specified. In general, if you add a new field, you @@ -347,7 +365,7 @@ extern const VMStateInfo vmstate_info_qtailq; .info = &(_info), \ .size = sizeof(_type), \ .flags = VMS_VARRAY_UINT32|VMS_MULTIPLY_ELEMENTS, \ - .offset = offsetof(_state, _field), \ + .offset = vmstate_offset_varray(_state, _field, _type), \ } #define VMSTATE_ARRAY_TEST(_field, _state, _num, _test, _info, _type) {\ @@ -376,7 +394,7 @@ extern const VMStateInfo vmstate_info_qtailq; .info = &(_info), \ .size = sizeof(_type), \ .flags = VMS_VARRAY_INT32, \ - .offset = offsetof(_state, _field), \ + .offset = vmstate_offset_varray(_state, _field, _type), \ } #define VMSTATE_VARRAY_INT32(_field, _state, _field_num, _version, _info, _type) {\ @@ -416,7 +434,7 @@ extern const VMStateInfo vmstate_info_qtailq; .info = &(_info), \ .size = sizeof(_type), \ .flags = VMS_VARRAY_UINT16, \ - .offset = offsetof(_state, _field), \ + .offset = vmstate_offset_varray(_state, _field, _type), \ } #define VMSTATE_VSTRUCT_TEST(_field, _state, _test, _version, _vmsd, _type, _struct_version) { \ @@ -520,7 +538,7 @@ extern const VMStateInfo vmstate_info_qtailq; .vmsd = &(_vmsd), \ .size = sizeof(_type), \ .flags = VMS_STRUCT|VMS_VARRAY_UINT8, \ - .offset = offsetof(_state, _field), \ + .offset = vmstate_offset_varray(_state, _field, _type), \ } /* a variable length array (i.e. _type *_field) but we know the @@ -573,7 +591,7 @@ extern const VMStateInfo vmstate_info_qtailq; .vmsd = &(_vmsd), \ .size = sizeof(_type), \ .flags = VMS_STRUCT|VMS_VARRAY_INT32, \ - .offset = offsetof(_state, _field), \ + .offset = vmstate_offset_varray(_state, _field, _type), \ } #define VMSTATE_STRUCT_VARRAY_UINT32(_field, _state, _field_num, _version, _vmsd, _type) { \ @@ -583,7 +601,7 @@ extern const VMStateInfo vmstate_info_qtailq; .vmsd = &(_vmsd), \ .size = sizeof(_type), \ .flags = VMS_STRUCT|VMS_VARRAY_UINT32, \ - .offset = offsetof(_state, _field), \ + .offset = vmstate_offset_varray(_state, _field, _type), \ } #define VMSTATE_STRUCT_VARRAY_ALLOC(_field, _state, _field_num, _version, _vmsd, _type) {\ diff --git a/linux-user/hppa/signal.c b/linux-user/hppa/signal.c index b6927ee673..d1a58feeb3 100644 --- a/linux-user/hppa/signal.c +++ b/linux-user/hppa/signal.c @@ -111,10 +111,11 @@ void setup_rt_frame(int sig, struct target_sigaction *ka, abi_ulong frame_addr, sp, haddr; struct target_rt_sigframe *frame; int i; + TaskState *ts = (TaskState *)thread_cpu->opaque; sp = get_sp_from_cpustate(env); if ((ka->sa_flags & TARGET_SA_ONSTACK) && !sas_ss_flags(sp)) { - sp = (target_sigaltstack_used.ss_sp + 0x7f) & ~0x3f; + sp = (ts->sigaltstack_used.ss_sp + 0x7f) & ~0x3f; } frame_addr = QEMU_ALIGN_UP(sp, 64); sp = frame_addr + PARISC_RT_SIGFRAME_SIZE32; diff --git a/linux-user/main.c b/linux-user/main.c index a59ae9439d..8ffc525195 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -180,6 +180,11 @@ void stop_all_tasks(void) void init_task_state(TaskState *ts) { ts->used = 1; + ts->sigaltstack_used = (struct target_sigaltstack) { + .ss_sp = 0, + .ss_size = 0, + .ss_flags = TARGET_SS_DISABLE, + }; } CPUArchState *cpu_copy(CPUArchState *env) diff --git a/linux-user/qemu.h b/linux-user/qemu.h index 4258e4162d..aac0334627 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -151,6 +151,8 @@ typedef struct TaskState { */ int signal_pending; + /* This thread's sigaltstack, if it has one */ + struct target_sigaltstack sigaltstack_used; } __attribute__((aligned(16))) TaskState; extern char *exec_path; diff --git a/linux-user/signal-common.h b/linux-user/signal-common.h index 51030a9306..1df1068552 100644 --- a/linux-user/signal-common.h +++ b/linux-user/signal-common.h @@ -19,7 +19,6 @@ #ifndef SIGNAL_COMMON_H #define SIGNAL_COMMON_H -extern struct target_sigaltstack target_sigaltstack_used; int on_sig_stack(unsigned long sp); int sas_ss_flags(unsigned long sp); diff --git a/linux-user/signal.c b/linux-user/signal.c index 5cd237834d..5ca6d62b15 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -25,12 +25,6 @@ #include "trace.h" #include "signal-common.h" -struct target_sigaltstack target_sigaltstack_used = { - .ss_sp = 0, - .ss_size = 0, - .ss_flags = TARGET_SS_DISABLE, -}; - static struct target_sigaction sigact_table[TARGET_NSIG]; static void host_signal_handler(int host_signum, siginfo_t *info, @@ -251,13 +245,17 @@ void set_sigmask(const sigset_t *set) int on_sig_stack(unsigned long sp) { - return (sp - target_sigaltstack_used.ss_sp - < target_sigaltstack_used.ss_size); + TaskState *ts = (TaskState *)thread_cpu->opaque; + + return (sp - ts->sigaltstack_used.ss_sp + < ts->sigaltstack_used.ss_size); } int sas_ss_flags(unsigned long sp) { - return (target_sigaltstack_used.ss_size == 0 ? SS_DISABLE + TaskState *ts = (TaskState *)thread_cpu->opaque; + + return (ts->sigaltstack_used.ss_size == 0 ? SS_DISABLE : on_sig_stack(sp) ? SS_ONSTACK : 0); } @@ -266,17 +264,21 @@ abi_ulong target_sigsp(abi_ulong sp, struct target_sigaction *ka) /* * This is the X/Open sanctioned signal stack switching. */ + TaskState *ts = (TaskState *)thread_cpu->opaque; + if ((ka->sa_flags & TARGET_SA_ONSTACK) && !sas_ss_flags(sp)) { - return target_sigaltstack_used.ss_sp + target_sigaltstack_used.ss_size; + return ts->sigaltstack_used.ss_sp + ts->sigaltstack_used.ss_size; } return sp; } void target_save_altstack(target_stack_t *uss, CPUArchState *env) { - __put_user(target_sigaltstack_used.ss_sp, &uss->ss_sp); + TaskState *ts = (TaskState *)thread_cpu->opaque; + + __put_user(ts->sigaltstack_used.ss_sp, &uss->ss_sp); __put_user(sas_ss_flags(get_sp_from_cpustate(env)), &uss->ss_flags); - __put_user(target_sigaltstack_used.ss_size, &uss->ss_size); + __put_user(ts->sigaltstack_used.ss_size, &uss->ss_size); } /* siginfo conversion */ @@ -708,12 +710,13 @@ abi_long do_sigaltstack(abi_ulong uss_addr, abi_ulong uoss_addr, abi_ulong sp) { int ret; struct target_sigaltstack oss; + TaskState *ts = (TaskState *)thread_cpu->opaque; /* XXX: test errors */ if(uoss_addr) { - __put_user(target_sigaltstack_used.ss_sp, &oss.ss_sp); - __put_user(target_sigaltstack_used.ss_size, &oss.ss_size); + __put_user(ts->sigaltstack_used.ss_sp, &oss.ss_sp); + __put_user(ts->sigaltstack_used.ss_size, &oss.ss_size); __put_user(sas_ss_flags(sp), &oss.ss_flags); } @@ -760,8 +763,8 @@ abi_long do_sigaltstack(abi_ulong uss_addr, abi_ulong uoss_addr, abi_ulong sp) } } - target_sigaltstack_used.ss_sp = ss.ss_sp; - target_sigaltstack_used.ss_size = ss.ss_size; + ts->sigaltstack_used.ss_sp = ss.ss_sp; + ts->sigaltstack_used.ss_size = ss.ss_size; } if (uoss_addr) { diff --git a/migration/ram.c b/migration/ram.c index 2b0774c2bf..889148dd84 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -677,6 +677,8 @@ typedef struct { QemuMutex mutex; /* is this channel thread running */ bool running; + /* should this thread finish */ + bool quit; /* array of pages to receive */ MultiFDPages_t *pages; /* packet allocated len */ @@ -920,7 +922,7 @@ struct { * false. */ -static void multifd_send_pages(void) +static int multifd_send_pages(void) { int i; static int next_channel; @@ -933,6 +935,11 @@ static void multifd_send_pages(void) p = &multifd_send_state->params[i]; qemu_mutex_lock(&p->mutex); + if (p->quit) { + error_report("%s: channel %d has already quit!", __func__, i); + qemu_mutex_unlock(&p->mutex); + return -1; + } if (!p->pending_job) { p->pending_job++; next_channel = (i + 1) % migrate_multifd_channels(); @@ -951,9 +958,11 @@ static void multifd_send_pages(void) ram_counters.transferred += transferred;; qemu_mutex_unlock(&p->mutex); qemu_sem_post(&p->sem); + + return 1; } -static void multifd_queue_page(RAMBlock *block, ram_addr_t offset) +static int multifd_queue_page(RAMBlock *block, ram_addr_t offset) { MultiFDPages_t *pages = multifd_send_state->pages; @@ -968,15 +977,19 @@ static void multifd_queue_page(RAMBlock *block, ram_addr_t offset) pages->used++; if (pages->used < pages->allocated) { - return; + return 1; } } - multifd_send_pages(); + if (multifd_send_pages() < 0) { + return -1; + } if (pages->block != block) { - multifd_queue_page(block, offset); + return multifd_queue_page(block, offset); } + + return 1; } static void multifd_send_terminate_threads(Error *err) @@ -1049,7 +1062,10 @@ static void multifd_send_sync_main(void) return; } if (multifd_send_state->pages->used) { - multifd_send_pages(); + if (multifd_send_pages() < 0) { + error_report("%s: multifd_send_pages fail", __func__); + return; + } } for (i = 0; i < migrate_multifd_channels(); i++) { MultiFDSendParams *p = &multifd_send_state->params[i]; @@ -1058,6 +1074,12 @@ static void multifd_send_sync_main(void) qemu_mutex_lock(&p->mutex); + if (p->quit) { + error_report("%s: channel %d has already quit", __func__, i); + qemu_mutex_unlock(&p->mutex); + return; + } + p->packet_num = multifd_send_state->packet_num++; p->flags |= MULTIFD_FLAG_SYNC; p->pending_job++; @@ -1077,7 +1099,8 @@ static void *multifd_send_thread(void *opaque) { MultiFDSendParams *p = opaque; Error *local_err = NULL; - int ret; + int ret = 0; + uint32_t flags = 0; trace_multifd_send_thread_start(p->id); rcu_register_thread(); @@ -1095,7 +1118,7 @@ static void *multifd_send_thread(void *opaque) if (p->pending_job) { uint32_t used = p->pages->used; uint64_t packet_num = p->packet_num; - uint32_t flags = p->flags; + flags = p->flags; p->next_packet_size = used * qemu_target_page_size(); multifd_send_fill_packet(p); @@ -1144,6 +1167,17 @@ out: multifd_send_terminate_threads(local_err); } + /* + * Error happen, I will exit, but I can't just leave, tell + * who pay attention to me. + */ + if (ret != 0) { + if (flags & MULTIFD_FLAG_SYNC) { + qemu_sem_post(&multifd_send_state->sem_sync); + } + qemu_sem_post(&multifd_send_state->channels_ready); + } + qemu_mutex_lock(&p->mutex); p->running = false; qemu_mutex_unlock(&p->mutex); @@ -1234,6 +1268,7 @@ static void multifd_recv_terminate_threads(Error *err) MultiFDRecvParams *p = &multifd_recv_state->params[i]; qemu_mutex_lock(&p->mutex); + p->quit = true; /* We could arrive here for two reasons: - normal quit, i.e. everything went fine, just finished - error quit: We close the channels so the channel threads @@ -1256,6 +1291,12 @@ int multifd_load_cleanup(Error **errp) MultiFDRecvParams *p = &multifd_recv_state->params[i]; if (p->running) { + p->quit = true; + /* + * multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code, + * however try to wakeup it without harm in cleanup phase. + */ + qemu_sem_post(&p->sem_sync); qemu_thread_join(&p->thread); } object_unref(OBJECT(p->c)); @@ -1319,6 +1360,10 @@ static void *multifd_recv_thread(void *opaque) uint32_t used; uint32_t flags; + if (p->quit) { + break; + } + ret = qio_channel_read_all_eof(p->c, (void *)p->packet, p->packet_len, &local_err); if (ret == 0) { /* EOF */ @@ -1390,6 +1435,7 @@ int multifd_load_setup(void) qemu_mutex_init(&p->mutex); qemu_sem_init(&p->sem_sync, 0); + p->quit = false; p->id = i; p->pages = multifd_pages_init(page_count); p->packet_len = sizeof(MultiFDPacket_t) @@ -2033,7 +2079,9 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) static int ram_save_multifd_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) { - multifd_queue_page(block, offset); + if (multifd_queue_page(block, offset) < 0) { + return -1; + } ram_counters.normal++; return 1; diff --git a/target/i386/kvm.c b/target/i386/kvm.c index ada89d27cc..dbbb13772a 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -3563,12 +3563,12 @@ int kvm_arch_put_registers(CPUState *cpu, int level) assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); - ret = kvm_put_nested_state(x86_cpu); - if (ret < 0) { - return ret; - } - if (level >= KVM_PUT_RESET_STATE) { + ret = kvm_put_nested_state(x86_cpu); + if (ret < 0) { + return ret; + } + ret = kvm_put_msr_feature_control(x86_cpu); if (ret < 0) { return ret; |