diff options
34 files changed, 878 insertions, 589 deletions
diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst index e3af79bb8c..b33d7c28dc 100644 --- a/docs/system/arm/emulation.rst +++ b/docs/system/arm/emulation.rst @@ -26,6 +26,7 @@ the following architecture extensions: - FEAT_DoubleFault (Double Fault Extension) - FEAT_E0PD (Preventing EL0 access to halves of address maps) - FEAT_ETS (Enhanced Translation Synchronization) +- FEAT_EVT (Enhanced Virtualization Traps) - FEAT_FCMA (Floating-point complex number instructions) - FEAT_FHM (Floating-point half-precision multiplication instructions) - FEAT_FP16 (Half-precision floating-point data processing) diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst index 20442ea2c1..1cab33f02e 100644 --- a/docs/system/arm/virt.rst +++ b/docs/system/arm/virt.rst @@ -54,6 +54,7 @@ Supported guest CPU types: - ``cortex-a15`` (32-bit; the default) - ``cortex-a35`` (64-bit) - ``cortex-a53`` (64-bit) +- ``cortex-a55`` (64-bit) - ``cortex-a57`` (64-bit) - ``cortex-a72`` (64-bit) - ``cortex-a76`` (64-bit) @@ -94,6 +95,23 @@ highmem address space above 32 bits. The default is ``on`` for machine types later than ``virt-2.12``. +compact-highmem + Set ``on``/``off`` to enable/disable the compact layout for high memory regions. + The default is ``on`` for machine types later than ``virt-7.2``. + +highmem-redists + Set ``on``/``off`` to enable/disable the high memory region for GICv3 or + GICv4 redistributor. The default is ``on``. Setting this to ``off`` will + limit the maximum number of CPUs when GICv3 or GICv4 is used. + +highmem-ecam + Set ``on``/``off`` to enable/disable the high memory region for PCI ECAM. + The default is ``on`` for machine types later than ``virt-3.0``. + +highmem-mmio + Set ``on``/``off`` to enable/disable the high memory region for PCI MMIO. + The default is ``on``. + gic-version Specify the version of the Generic Interrupt Controller (GIC) to provide. Valid values are: diff --git a/hw/arm/boot.c b/hw/arm/boot.c index 15c2bf1867..3d7d11f782 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -656,15 +656,17 @@ int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo, } if (binfo->initrd_size) { - rc = qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-start", - binfo->initrd_start); + rc = qemu_fdt_setprop_sized_cells(fdt, "/chosen", "linux,initrd-start", + acells, binfo->initrd_start); if (rc < 0) { fprintf(stderr, "couldn't set /chosen/linux,initrd-start\n"); goto fail; } - rc = qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-end", - binfo->initrd_start + binfo->initrd_size); + rc = qemu_fdt_setprop_sized_cells(fdt, "/chosen", "linux,initrd-end", + acells, + binfo->initrd_start + + binfo->initrd_size); if (rc < 0) { fprintf(stderr, "couldn't set /chosen/linux,initrd-end\n"); goto fail; diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c index e09b9c13b7..220838525d 100644 --- a/hw/arm/smmu-common.c +++ b/hw/arm/smmu-common.c @@ -526,9 +526,9 @@ static void smmu_base_realize(DeviceState *dev, Error **errp) } } -static void smmu_base_reset(DeviceState *dev) +static void smmu_base_reset_hold(Object *obj) { - SMMUState *s = ARM_SMMU(dev); + SMMUState *s = ARM_SMMU(obj); g_hash_table_remove_all(s->configs); g_hash_table_remove_all(s->iotlb); @@ -543,12 +543,13 @@ static Property smmu_dev_properties[] = { static void smmu_base_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); SMMUBaseClass *sbc = ARM_SMMU_CLASS(klass); device_class_set_props(dc, smmu_dev_properties); device_class_set_parent_realize(dc, smmu_base_realize, &sbc->parent_realize); - dc->reset = smmu_base_reset; + rc->phases.hold = smmu_base_reset_hold; } static const TypeInfo smmu_base_info = { diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c index daa80e9c7b..955b89c8d5 100644 --- a/hw/arm/smmuv3.c +++ b/hw/arm/smmuv3.c @@ -1431,12 +1431,14 @@ static void smmu_init_irq(SMMUv3State *s, SysBusDevice *dev) } } -static void smmu_reset(DeviceState *dev) +static void smmu_reset_hold(Object *obj) { - SMMUv3State *s = ARM_SMMUV3(dev); + SMMUv3State *s = ARM_SMMUV3(obj); SMMUv3Class *c = ARM_SMMUV3_GET_CLASS(s); - c->parent_reset(dev); + if (c->parent_phases.hold) { + c->parent_phases.hold(obj); + } smmuv3_init_regs(s); } @@ -1520,10 +1522,12 @@ static void smmuv3_instance_init(Object *obj) static void smmuv3_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); SMMUv3Class *c = ARM_SMMUV3_CLASS(klass); dc->vmsd = &vmstate_smmuv3; - device_class_set_parent_reset(dc, smmu_reset, &c->parent_reset); + resettable_class_set_parent_phases(rc, NULL, smmu_reset_hold, NULL, + &c->parent_phases); c->parent_realize = dc->realize; dc->realize = smmu_realize; } diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 02d627a5ab..04eb6c201d 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -174,6 +174,12 @@ static const MemMapEntry base_memmap[] = { * Note the extended_memmap is sized so that it eventually also includes the * base_memmap entries (VIRT_HIGH_GIC_REDIST2 index is greater than the last * index of base_memmap). + * + * The memory map for these Highmem IO Regions can be in legacy or compact + * layout, depending on 'compact-highmem' property. With legacy layout, the + * PA space for one specific region is always reserved, even if the region + * has been disabled or doesn't fit into the PA space. However, the PA space + * for the region won't be reserved in these circumstances with compact layout. */ static MemMapEntry extended_memmap[] = { /* Additional 64 MB redist region (can contain up to 512 redistributors) */ @@ -201,6 +207,7 @@ static const char *valid_cpus[] = { ARM_CPU_TYPE_NAME("cortex-a15"), ARM_CPU_TYPE_NAME("cortex-a35"), ARM_CPU_TYPE_NAME("cortex-a53"), + ARM_CPU_TYPE_NAME("cortex-a55"), ARM_CPU_TYPE_NAME("cortex-a57"), ARM_CPU_TYPE_NAME("cortex-a72"), ARM_CPU_TYPE_NAME("cortex-a76"), @@ -1608,9 +1615,11 @@ static void *machvirt_dtb(const struct arm_boot_info *binfo, int *fdt_size) static void virt_build_smbios(VirtMachineState *vms) { MachineClass *mc = MACHINE_GET_CLASS(vms); + MachineState *ms = MACHINE(vms); VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); uint8_t *smbios_tables, *smbios_anchor; size_t smbios_tables_len, smbios_anchor_len; + struct smbios_phys_mem_area mem_array; const char *product = "QEMU Virtual Machine"; if (kvm_enabled()) { @@ -1621,7 +1630,11 @@ static void virt_build_smbios(VirtMachineState *vms) vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, true, SMBIOS_ENTRY_POINT_TYPE_64); - smbios_get_tables(MACHINE(vms), NULL, 0, + /* build the array of physical mem area from base_memmap */ + mem_array.address = vms->memmap[VIRT_MEM].base; + mem_array.length = ms->ram_size; + + smbios_get_tables(ms, &mem_array, 1, &smbios_tables, &smbios_tables_len, &smbios_anchor, &smbios_anchor_len, &error_fatal); @@ -1690,6 +1703,58 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) return arm_cpu_mp_affinity(idx, clustersz); } +static inline bool *virt_get_high_memmap_enabled(VirtMachineState *vms, + int index) +{ + bool *enabled_array[] = { + &vms->highmem_redists, + &vms->highmem_ecam, + &vms->highmem_mmio, + }; + + assert(ARRAY_SIZE(extended_memmap) - VIRT_LOWMEMMAP_LAST == + ARRAY_SIZE(enabled_array)); + assert(index - VIRT_LOWMEMMAP_LAST < ARRAY_SIZE(enabled_array)); + + return enabled_array[index - VIRT_LOWMEMMAP_LAST]; +} + +static void virt_set_high_memmap(VirtMachineState *vms, + hwaddr base, int pa_bits) +{ + hwaddr region_base, region_size; + bool *region_enabled, fits; + int i; + + for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) { + region_enabled = virt_get_high_memmap_enabled(vms, i); + region_base = ROUND_UP(base, extended_memmap[i].size); + region_size = extended_memmap[i].size; + + vms->memmap[i].base = region_base; + vms->memmap[i].size = region_size; + + /* + * Check each device to see if it fits in the PA space, + * moving highest_gpa as we go. For compatibility, move + * highest_gpa for disabled fitting devices as well, if + * the compact layout has been disabled. + * + * For each device that doesn't fit, disable it. + */ + fits = (region_base + region_size) <= BIT_ULL(pa_bits); + *region_enabled &= fits; + if (vms->highmem_compact && !*region_enabled) { + continue; + } + + base = region_base + region_size; + if (fits) { + vms->highest_gpa = base - 1; + } + } +} + static void virt_set_memmap(VirtMachineState *vms, int pa_bits) { MachineState *ms = MACHINE(vms); @@ -1745,39 +1810,7 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits) /* We know for sure that at least the memory fits in the PA space */ vms->highest_gpa = memtop - 1; - for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) { - hwaddr size = extended_memmap[i].size; - bool fits; - - base = ROUND_UP(base, size); - vms->memmap[i].base = base; - vms->memmap[i].size = size; - - /* - * Check each device to see if they fit in the PA space, - * moving highest_gpa as we go. - * - * For each device that doesn't fit, disable it. - */ - fits = (base + size) <= BIT_ULL(pa_bits); - if (fits) { - vms->highest_gpa = base + size - 1; - } - - switch (i) { - case VIRT_HIGH_GIC_REDIST2: - vms->highmem_redists &= fits; - break; - case VIRT_HIGH_PCIE_ECAM: - vms->highmem_ecam &= fits; - break; - case VIRT_HIGH_PCIE_MMIO: - vms->highmem_mmio &= fits; - break; - } - - base += size; - } + virt_set_high_memmap(vms, base, pa_bits); if (device_memory_size > 0) { ms->device_memory = g_malloc0(sizeof(*ms->device_memory)); @@ -2070,14 +2103,20 @@ static void machvirt_init(MachineState *machine) if (vms->gic_version == VIRT_GIC_VERSION_2) { virt_max_cpus = GIC_NCPU; } else { - virt_max_cpus = virt_redist_capacity(vms, VIRT_GIC_REDIST) + - virt_redist_capacity(vms, VIRT_HIGH_GIC_REDIST2); + virt_max_cpus = virt_redist_capacity(vms, VIRT_GIC_REDIST); + if (vms->highmem_redists) { + virt_max_cpus += virt_redist_capacity(vms, VIRT_HIGH_GIC_REDIST2); + } } if (max_cpus > virt_max_cpus) { error_report("Number of SMP CPUs requested (%d) exceeds max CPUs " "supported by machine 'mach-virt' (%d)", max_cpus, virt_max_cpus); + if (vms->gic_version != VIRT_GIC_VERSION_2 && !vms->highmem_redists) { + error_printf("Try 'highmem-redists=on' for more CPUs\n"); + } + exit(1); } @@ -2332,6 +2371,63 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp) vms->highmem = value; } +static bool virt_get_compact_highmem(Object *obj, Error **errp) +{ + VirtMachineState *vms = VIRT_MACHINE(obj); + + return vms->highmem_compact; +} + +static void virt_set_compact_highmem(Object *obj, bool value, Error **errp) +{ + VirtMachineState *vms = VIRT_MACHINE(obj); + + vms->highmem_compact = value; +} + +static bool virt_get_highmem_redists(Object *obj, Error **errp) +{ + VirtMachineState *vms = VIRT_MACHINE(obj); + + return vms->highmem_redists; +} + +static void virt_set_highmem_redists(Object *obj, bool value, Error **errp) +{ + VirtMachineState *vms = VIRT_MACHINE(obj); + + vms->highmem_redists = value; +} + +static bool virt_get_highmem_ecam(Object *obj, Error **errp) +{ + VirtMachineState *vms = VIRT_MACHINE(obj); + + return vms->highmem_ecam; +} + +static void virt_set_highmem_ecam(Object *obj, bool value, Error **errp) +{ + VirtMachineState *vms = VIRT_MACHINE(obj); + + vms->highmem_ecam = value; +} + +static bool virt_get_highmem_mmio(Object *obj, Error **errp) +{ + VirtMachineState *vms = VIRT_MACHINE(obj); + + return vms->highmem_mmio; +} + +static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp) +{ + VirtMachineState *vms = VIRT_MACHINE(obj); + + vms->highmem_mmio = value; +} + + static bool virt_get_its(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); @@ -2946,6 +3042,35 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) "Set on/off to enable/disable using " "physical address space above 32 bits"); + object_class_property_add_bool(oc, "compact-highmem", + virt_get_compact_highmem, + virt_set_compact_highmem); + object_class_property_set_description(oc, "compact-highmem", + "Set on/off to enable/disable compact " + "layout for high memory regions"); + + object_class_property_add_bool(oc, "highmem-redists", + virt_get_highmem_redists, + virt_set_highmem_redists); + object_class_property_set_description(oc, "highmem-redists", + "Set on/off to enable/disable high " + "memory region for GICv3 or GICv4 " + "redistributor"); + + object_class_property_add_bool(oc, "highmem-ecam", + virt_get_highmem_ecam, + virt_set_highmem_ecam); + object_class_property_set_description(oc, "highmem-ecam", + "Set on/off to enable/disable high " + "memory region for PCI ECAM"); + + object_class_property_add_bool(oc, "highmem-mmio", + virt_get_highmem_mmio, + virt_set_highmem_mmio); + object_class_property_set_description(oc, "highmem-mmio", + "Set on/off to enable/disable high " + "memory region for PCI MMIO"); + object_class_property_add_str(oc, "gic-version", virt_get_gic_version, virt_set_gic_version); object_class_property_set_description(oc, "gic-version", @@ -3030,6 +3155,7 @@ static void virt_instance_init(Object *obj) /* High memory is enabled by default */ vms->highmem = true; + vms->highmem_compact = !vmc->no_highmem_compact; vms->gic_version = VIRT_GIC_VERSION_NOSEL; vms->highmem_ecam = !vmc->no_highmem_ecam; @@ -3099,8 +3225,12 @@ DEFINE_VIRT_MACHINE_AS_LATEST(7, 2) static void virt_machine_7_1_options(MachineClass *mc) { + VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); + virt_machine_7_2_options(mc); compat_props_add(mc->compat_props, hw_compat_7_1, hw_compat_7_1_len); + /* Compact layout for high memory regions was introduced with 7.2 */ + vmc->no_highmem_compact = true; } DEFINE_VIRT_MACHINE(7, 1) diff --git a/hw/intc/arm_gic_common.c b/hw/intc/arm_gic_common.c index 7b44d5625b..a379cea395 100644 --- a/hw/intc/arm_gic_common.c +++ b/hw/intc/arm_gic_common.c @@ -261,9 +261,9 @@ static inline void arm_gic_common_reset_irq_state(GICState *s, int first_cpu, } } -static void arm_gic_common_reset(DeviceState *dev) +static void arm_gic_common_reset_hold(Object *obj) { - GICState *s = ARM_GIC_COMMON(dev); + GICState *s = ARM_GIC_COMMON(obj); int i, j; int resetprio; @@ -364,9 +364,10 @@ static Property arm_gic_common_properties[] = { static void arm_gic_common_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); ARMLinuxBootIfClass *albifc = ARM_LINUX_BOOT_IF_CLASS(klass); - dc->reset = arm_gic_common_reset; + rc->phases.hold = arm_gic_common_reset_hold; dc->realize = arm_gic_common_realize; device_class_set_props(dc, arm_gic_common_properties); dc->vmsd = &vmstate_gic; diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c index 7d2a13273a..1d588946bc 100644 --- a/hw/intc/arm_gic_kvm.c +++ b/hw/intc/arm_gic_kvm.c @@ -38,7 +38,7 @@ DECLARE_OBJ_CHECKERS(GICState, KVMARMGICClass, struct KVMARMGICClass { ARMGICCommonClass parent_class; DeviceRealize parent_realize; - void (*parent_reset)(DeviceState *dev); + ResettablePhases parent_phases; }; void kvm_arm_gic_set_irq(uint32_t num_irq, int irq, int level) @@ -473,12 +473,14 @@ static void kvm_arm_gic_get(GICState *s) } } -static void kvm_arm_gic_reset(DeviceState *dev) +static void kvm_arm_gic_reset_hold(Object *obj) { - GICState *s = ARM_GIC_COMMON(dev); + GICState *s = ARM_GIC_COMMON(obj); KVMARMGICClass *kgc = KVM_ARM_GIC_GET_CLASS(s); - kgc->parent_reset(dev); + if (kgc->parent_phases.hold) { + kgc->parent_phases.hold(obj); + } if (kvm_arm_gic_can_save_restore(s)) { kvm_arm_gic_put(s); @@ -593,6 +595,7 @@ static void kvm_arm_gic_realize(DeviceState *dev, Error **errp) static void kvm_arm_gic_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); ARMGICCommonClass *agcc = ARM_GIC_COMMON_CLASS(klass); KVMARMGICClass *kgc = KVM_ARM_GIC_CLASS(klass); @@ -600,7 +603,8 @@ static void kvm_arm_gic_class_init(ObjectClass *klass, void *data) agcc->post_load = kvm_arm_gic_put; device_class_set_parent_realize(dc, kvm_arm_gic_realize, &kgc->parent_realize); - device_class_set_parent_reset(dc, kvm_arm_gic_reset, &kgc->parent_reset); + resettable_class_set_parent_phases(rc, NULL, kvm_arm_gic_reset_hold, NULL, + &kgc->parent_phases); } static const TypeInfo kvm_arm_gic_info = { diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c index 351843db4a..642a8243ed 100644 --- a/hw/intc/arm_gicv3_common.c +++ b/hw/intc/arm_gicv3_common.c @@ -450,9 +450,9 @@ static void arm_gicv3_finalize(Object *obj) g_free(s->redist_region_count); } -static void arm_gicv3_common_reset(DeviceState *dev) +static void arm_gicv3_common_reset_hold(Object *obj) { - GICv3State *s = ARM_GICV3_COMMON(dev); + GICv3State *s = ARM_GICV3_COMMON(obj); int i; for (i = 0; i < s->num_cpu; i++) { @@ -578,9 +578,10 @@ static Property arm_gicv3_common_properties[] = { static void arm_gicv3_common_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); ARMLinuxBootIfClass *albifc = ARM_LINUX_BOOT_IF_CLASS(klass); - dc->reset = arm_gicv3_common_reset; + rc->phases.hold = arm_gicv3_common_reset_hold; dc->realize = arm_gicv3_common_realize; device_class_set_props(dc, arm_gicv3_common_properties); dc->vmsd = &vmstate_gicv3; diff --git a/hw/intc/arm_gicv3_dist.c b/hw/intc/arm_gicv3_dist.c index eea0368118..d599fefcbc 100644 --- a/hw/intc/arm_gicv3_dist.c +++ b/hw/intc/arm_gicv3_dist.c @@ -390,9 +390,9 @@ static bool gicd_readl(GICv3State *s, hwaddr offset, * MBIS == 0 (message-based SPIs not supported) * SecurityExtn == 1 if security extns supported * CPUNumber == 0 since for us ARE is always 1 - * ITLinesNumber == (num external irqs / 32) - 1 + * ITLinesNumber == (((max SPI IntID + 1) / 32) - 1) */ - int itlinesnumber = ((s->num_irq - GIC_INTERNAL) / 32) - 1; + int itlinesnumber = (s->num_irq / 32) - 1; /* * SecurityExtn must be RAZ if GICD_CTLR.DS == 1, and * "security extensions not supported" always implies DS == 1, diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c index 2ff21ed6bb..57c79da5c5 100644 --- a/hw/intc/arm_gicv3_its.c +++ b/hw/intc/arm_gicv3_its.c @@ -27,7 +27,7 @@ DECLARE_OBJ_CHECKERS(GICv3ITSState, GICv3ITSClass, struct GICv3ITSClass { GICv3ITSCommonClass parent_class; - void (*parent_reset)(DeviceState *dev); + ResettablePhases parent_phases; }; /* @@ -1953,12 +1953,14 @@ static void gicv3_arm_its_realize(DeviceState *dev, Error **errp) } } -static void gicv3_its_reset(DeviceState *dev) +static void gicv3_its_reset_hold(Object *obj) { - GICv3ITSState *s = ARM_GICV3_ITS_COMMON(dev); + GICv3ITSState *s = ARM_GICV3_ITS_COMMON(obj); GICv3ITSClass *c = ARM_GICV3_ITS_GET_CLASS(s); - c->parent_reset(dev); + if (c->parent_phases.hold) { + c->parent_phases.hold(obj); + } /* Quiescent bit reset to 1 */ s->ctlr = FIELD_DP32(s->ctlr, GITS_CTLR, QUIESCENT, 1); @@ -2012,12 +2014,14 @@ static Property gicv3_its_props[] = { static void gicv3_its_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); GICv3ITSClass *ic = ARM_GICV3_ITS_CLASS(klass); GICv3ITSCommonClass *icc = ARM_GICV3_ITS_COMMON_CLASS(klass); dc->realize = gicv3_arm_its_realize; device_class_set_props(dc, gicv3_its_props); - device_class_set_parent_reset(dc, gicv3_its_reset, &ic->parent_reset); + resettable_class_set_parent_phases(rc, NULL, gicv3_its_reset_hold, NULL, + &ic->parent_phases); icc->post_load = gicv3_its_post_load; } diff --git a/hw/intc/arm_gicv3_its_common.c b/hw/intc/arm_gicv3_its_common.c index 90b85f1e25..d7532a7a89 100644 --- a/hw/intc/arm_gicv3_its_common.c +++ b/hw/intc/arm_gicv3_its_common.c @@ -122,9 +122,9 @@ void gicv3_its_init_mmio(GICv3ITSState *s, const MemoryRegionOps *ops, msi_nonbroken = true; } -static void gicv3_its_common_reset(DeviceState *dev) +static void gicv3_its_common_reset_hold(Object *obj) { - GICv3ITSState *s = ARM_GICV3_ITS_COMMON(dev); + GICv3ITSState *s = ARM_GICV3_ITS_COMMON(obj); s->ctlr = 0; s->cbaser = 0; @@ -137,8 +137,9 @@ static void gicv3_its_common_reset(DeviceState *dev) static void gicv3_its_common_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); - dc->reset = gicv3_its_common_reset; + rc->phases.hold = gicv3_its_common_reset_hold; dc->vmsd = &vmstate_its; } diff --git a/hw/intc/arm_gicv3_its_kvm.c b/hw/intc/arm_gicv3_its_kvm.c index 529c7bd494..7eda9fb86e 100644 --- a/hw/intc/arm_gicv3_its_kvm.c +++ b/hw/intc/arm_gicv3_its_kvm.c @@ -37,7 +37,7 @@ DECLARE_OBJ_CHECKERS(GICv3ITSState, KVMARMITSClass, struct KVMARMITSClass { GICv3ITSCommonClass parent_class; - void (*parent_reset)(DeviceState *dev); + ResettablePhases parent_phases; }; @@ -197,13 +197,15 @@ static void kvm_arm_its_post_load(GICv3ITSState *s) GITS_CTLR, &s->ctlr, true, &error_abort); } -static void kvm_arm_its_reset(DeviceState *dev) +static void kvm_arm_its_reset_hold(Object *obj) { - GICv3ITSState *s = ARM_GICV3_ITS_COMMON(dev); + GICv3ITSState *s = ARM_GICV3_ITS_COMMON(obj); KVMARMITSClass *c = KVM_ARM_ITS_GET_CLASS(s); int i; - c->parent_reset(dev); + if (c->parent_phases.hold) { + c->parent_phases.hold(obj); + } if (kvm_device_check_attr(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, KVM_DEV_ARM_ITS_CTRL_RESET)) { @@ -241,12 +243,14 @@ static Property kvm_arm_its_props[] = { static void kvm_arm_its_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); GICv3ITSCommonClass *icc = ARM_GICV3_ITS_COMMON_CLASS(klass); KVMARMITSClass *ic = KVM_ARM_ITS_CLASS(klass); dc->realize = kvm_arm_its_realize; device_class_set_props(dc, kvm_arm_its_props); - device_class_set_parent_reset(dc, kvm_arm_its_reset, &ic->parent_reset); + resettable_class_set_parent_phases(rc, NULL, kvm_arm_its_reset_hold, NULL, + &ic->parent_phases); icc->send_msi = kvm_its_send_msi; icc->pre_save = kvm_arm_its_pre_save; icc->post_load = kvm_arm_its_post_load; diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c index 3ca643ecba..72ad916d3d 100644 --- a/hw/intc/arm_gicv3_kvm.c +++ b/hw/intc/arm_gicv3_kvm.c @@ -77,7 +77,7 @@ DECLARE_OBJ_CHECKERS(GICv3State, KVMARMGICv3Class, struct KVMARMGICv3Class { ARMGICv3CommonClass parent_class; DeviceRealize parent_realize; - void (*parent_reset)(DeviceState *dev); + ResettablePhases parent_phases; }; static void kvm_arm_gicv3_set_irq(void *opaque, int irq, int level) @@ -703,14 +703,16 @@ static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri) c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS]; } -static void kvm_arm_gicv3_reset(DeviceState *dev) +static void kvm_arm_gicv3_reset_hold(Object *obj) { - GICv3State *s = ARM_GICV3_COMMON(dev); + GICv3State *s = ARM_GICV3_COMMON(obj); KVMARMGICv3Class *kgc = KVM_ARM_GICV3_GET_CLASS(s); DPRINTF("Reset\n"); - kgc->parent_reset(dev); + if (kgc->parent_phases.hold) { + kgc->parent_phases.hold(obj); + } if (s->migration_blocker) { DPRINTF("Cannot put kernel gic state, no kernel interface\n"); @@ -890,6 +892,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) static void kvm_arm_gicv3_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); ARMGICv3CommonClass *agcc = ARM_GICV3_COMMON_CLASS(klass); KVMARMGICv3Class *kgc = KVM_ARM_GICV3_CLASS(klass); @@ -897,7 +900,8 @@ static void kvm_arm_gicv3_class_init(ObjectClass *klass, void *data) agcc->post_load = kvm_arm_gicv3_put; device_class_set_parent_realize(dc, kvm_arm_gicv3_realize, &kgc->parent_realize); - device_class_set_parent_reset(dc, kvm_arm_gicv3_reset, &kgc->parent_reset); + resettable_class_set_parent_phases(rc, NULL, kvm_arm_gicv3_reset_hold, NULL, + &kgc->parent_phases); } static const TypeInfo kvm_arm_gicv3_info = { diff --git a/hw/misc/imx6_src.c b/hw/misc/imx6_src.c index 7b0e968804..a9c64d06eb 100644 --- a/hw/misc/imx6_src.c +++ b/hw/misc/imx6_src.c @@ -15,7 +15,7 @@ #include "qemu/log.h" #include "qemu/main-loop.h" #include "qemu/module.h" -#include "arm-powerctl.h" +#include "target/arm/arm-powerctl.h" #include "hw/core/cpu.h" #ifndef DEBUG_IMX6_SRC diff --git a/hw/misc/iotkit-sysctl.c b/hw/misc/iotkit-sysctl.c index 7147e2f84e..e664215ee6 100644 --- a/hw/misc/iotkit-sysctl.c +++ b/hw/misc/iotkit-sysctl.c @@ -30,7 +30,6 @@ #include "hw/qdev-properties.h" #include "hw/arm/armsse-version.h" #include "target/arm/arm-powerctl.h" -#include "target/arm/cpu.h" REG32(SECDBGSTAT, 0x0) REG32(SECDBGSET, 0x4) diff --git a/hw/misc/meson.build b/hw/misc/meson.build index 95268eddc0..ed0598dc9e 100644 --- a/hw/misc/meson.build +++ b/hw/misc/meson.build @@ -51,6 +51,7 @@ softmmu_ss.add(when: 'CONFIG_IMX', if_true: files( 'imx25_ccm.c', 'imx31_ccm.c', 'imx6_ccm.c', + 'imx6_src.c', 'imx6ul_ccm.c', 'imx7_ccm.c', 'imx7_gpr.c', @@ -84,8 +85,8 @@ softmmu_ss.add(when: 'CONFIG_RASPI', if_true: files( )) softmmu_ss.add(when: 'CONFIG_SLAVIO', if_true: files('slavio_misc.c')) softmmu_ss.add(when: 'CONFIG_ZYNQ', if_true: files('zynq_slcr.c')) -specific_ss.add(when: 'CONFIG_XLNX_ZYNQMP_ARM', if_true: files('xlnx-zynqmp-crf.c')) -specific_ss.add(when: 'CONFIG_XLNX_ZYNQMP_ARM', if_true: files('xlnx-zynqmp-apu-ctrl.c')) +softmmu_ss.add(when: 'CONFIG_XLNX_ZYNQMP_ARM', if_true: files('xlnx-zynqmp-crf.c')) +softmmu_ss.add(when: 'CONFIG_XLNX_ZYNQMP_ARM', if_true: files('xlnx-zynqmp-apu-ctrl.c')) specific_ss.add(when: 'CONFIG_XLNX_VERSAL', if_true: files('xlnx-versal-crl.c')) softmmu_ss.add(when: 'CONFIG_XLNX_VERSAL', if_true: files( 'xlnx-versal-xramc.c', @@ -101,6 +102,7 @@ softmmu_ss.add(when: 'CONFIG_TZ_MPC', if_true: files('tz-mpc.c')) softmmu_ss.add(when: 'CONFIG_TZ_MSC', if_true: files('tz-msc.c')) softmmu_ss.add(when: 'CONFIG_TZ_PPC', if_true: files('tz-ppc.c')) softmmu_ss.add(when: 'CONFIG_IOTKIT_SECCTL', if_true: files('iotkit-secctl.c')) +softmmu_ss.add(when: 'CONFIG_IOTKIT_SYSCTL', if_true: files('iotkit-sysctl.c')) softmmu_ss.add(when: 'CONFIG_IOTKIT_SYSINFO', if_true: files('iotkit-sysinfo.c')) softmmu_ss.add(when: 'CONFIG_ARMSSE_CPU_PWRCTRL', if_true: files('armsse-cpu-pwrctrl.c')) softmmu_ss.add(when: 'CONFIG_ARMSSE_CPUID', if_true: files('armsse-cpuid.c')) @@ -126,15 +128,12 @@ softmmu_ss.add(when: 'CONFIG_GRLIB', if_true: files('grlib_ahb_apb_pnp.c')) specific_ss.add(when: 'CONFIG_AVR_POWER', if_true: files('avr_power.c')) -specific_ss.add(when: 'CONFIG_IMX', if_true: files('imx6_src.c')) -specific_ss.add(when: 'CONFIG_IOTKIT_SYSCTL', if_true: files('iotkit-sysctl.c')) - specific_ss.add(when: 'CONFIG_MAC_VIA', if_true: files('mac_via.c')) specific_ss.add(when: 'CONFIG_MIPS_CPS', if_true: files('mips_cmgcr.c', 'mips_cpc.c')) specific_ss.add(when: 'CONFIG_MIPS_ITU', if_true: files('mips_itu.c')) -specific_ss.add(when: 'CONFIG_SBSA_REF', if_true: files('sbsa_ec.c')) +softmmu_ss.add(when: 'CONFIG_SBSA_REF', if_true: files('sbsa_ec.c')) # HPPA devices softmmu_ss.add(when: 'CONFIG_LASI', if_true: files('lasi.c')) diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h index c641e60735..f1921fdf9e 100644 --- a/include/hw/arm/smmuv3.h +++ b/include/hw/arm/smmuv3.h @@ -77,7 +77,7 @@ struct SMMUv3Class { /*< public >*/ DeviceRealize parent_realize; - DeviceReset parent_reset; + ResettablePhases parent_phases; }; #define TYPE_ARM_SMMUV3 "arm-smmuv3" diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h index 6ec479ca2b..c7dd59d7f1 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h @@ -125,6 +125,7 @@ struct VirtMachineClass { bool no_pmu; bool claim_edge_triggered_timers; bool smbios_old_sys_ver; + bool no_highmem_compact; bool no_highmem_ecam; bool no_ged; /* Machines < 4.2 have no support for ACPI GED device */ bool kvm_no_adjvtime; @@ -144,6 +145,7 @@ struct VirtMachineState { PFlashCFI01 *flash[2]; bool secure; bool highmem; + bool highmem_compact; bool highmem_ecam; bool highmem_mmio; bool highmem_redists; diff --git a/include/hw/misc/xlnx-zynqmp-apu-ctrl.h b/include/hw/misc/xlnx-zynqmp-apu-ctrl.h index b8ca9434af..c3bf3c1583 100644 --- a/include/hw/misc/xlnx-zynqmp-apu-ctrl.h +++ b/include/hw/misc/xlnx-zynqmp-apu-ctrl.h @@ -13,7 +13,7 @@ #include "hw/sysbus.h" #include "hw/register.h" -#include "target/arm/cpu.h" +#include "target/arm/cpu-qom.h" #define TYPE_XLNX_ZYNQMP_APU_CTRL "xlnx.apu-ctrl" OBJECT_DECLARE_SIMPLE_TYPE(XlnxZynqMPAPUCtrl, XLNX_ZYNQMP_APU_CTRL) diff --git a/migration/migration.c b/migration/migration.c index 78a0b010d4..52b5d39244 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1045,13 +1045,13 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) size_t page_size = qemu_target_page_size(); info->ram = g_malloc0(sizeof(*info->ram)); - info->ram->transferred = ram_counters.transferred; + info->ram->transferred = stat64_get(&ram_atomic_counters.transferred); info->ram->total = ram_bytes_total(); - info->ram->duplicate = ram_counters.duplicate; + info->ram->duplicate = stat64_get(&ram_atomic_counters.duplicate); /* legacy value. It is not used anymore */ info->ram->skipped = 0; - info->ram->normal = ram_counters.normal; - info->ram->normal_bytes = ram_counters.normal * page_size; + info->ram->normal = stat64_get(&ram_atomic_counters.normal); + info->ram->normal_bytes = info->ram->normal * page_size; info->ram->mbps = s->mbps; info->ram->dirty_sync_count = ram_counters.dirty_sync_count; info->ram->dirty_sync_missed_zero_copy = @@ -1062,7 +1062,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) info->ram->pages_per_second = s->pages_per_second; info->ram->precopy_bytes = ram_counters.precopy_bytes; info->ram->downtime_bytes = ram_counters.downtime_bytes; - info->ram->postcopy_bytes = ram_counters.postcopy_bytes; + info->ram->postcopy_bytes = stat64_get(&ram_atomic_counters.postcopy_bytes); if (migrate_use_xbzrle()) { info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache)); @@ -2840,8 +2840,11 @@ static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value) return 0; } -/* Release ms->rp_state.from_dst_file in a safe way */ -static void migration_release_from_dst_file(MigrationState *ms) +/* + * Release ms->rp_state.from_dst_file (and postcopy_qemufile_src if + * existed) in a safe way. + */ +static void migration_release_dst_files(MigrationState *ms) { QEMUFile *file; @@ -2854,6 +2857,18 @@ static void migration_release_from_dst_file(MigrationState *ms) ms->rp_state.from_dst_file = NULL; } + /* + * Do the same to postcopy fast path socket too if there is. No + * locking needed because this qemufile should only be managed by + * return path thread. + */ + if (ms->postcopy_qemufile_src) { + migration_ioc_unregister_yank_from_file(ms->postcopy_qemufile_src); + qemu_file_shutdown(ms->postcopy_qemufile_src); + qemu_fclose(ms->postcopy_qemufile_src); + ms->postcopy_qemufile_src = NULL; + } + qemu_fclose(file); } @@ -2998,7 +3013,7 @@ out: * Maybe there is something we can do: it looks like a * network down issue, and we pause for a recovery. */ - migration_release_from_dst_file(ms); + migration_release_dst_files(ms); rp = NULL; if (postcopy_pause_return_path_thread(ms)) { /* @@ -3016,7 +3031,7 @@ out: } trace_source_return_path_thread_end(); - migration_release_from_dst_file(ms); + migration_release_dst_files(ms); rcu_unregister_thread(); return NULL; } @@ -3539,18 +3554,6 @@ static MigThrError postcopy_pause(MigrationState *s) qemu_file_shutdown(file); qemu_fclose(file); - /* - * Do the same to postcopy fast path socket too if there is. No - * locking needed because no racer as long as we do this before setting - * status to paused. - */ - if (s->postcopy_qemufile_src) { - migration_ioc_unregister_yank_from_file(s->postcopy_qemufile_src); - qemu_file_shutdown(s->postcopy_qemufile_src); - qemu_fclose(s->postcopy_qemufile_src); - s->postcopy_qemufile_src = NULL; - } - migrate_set_state(&s->state, s->state, MIGRATION_STATUS_POSTCOPY_PAUSED); @@ -4391,8 +4394,6 @@ static Property migration_properties[] = { DEFINE_PROP_SIZE("announce-step", MigrationState, parameters.announce_step, DEFAULT_MIGRATE_ANNOUNCE_STEP), - DEFINE_PROP_BOOL("x-postcopy-preempt-break-huge", MigrationState, - postcopy_preempt_break_huge, true), DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds), DEFINE_PROP_STRING("tls-hostname", MigrationState, parameters.tls_hostname), DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz), diff --git a/migration/migration.h b/migration/migration.h index cdad8aceaa..ae4ffd3454 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -340,13 +340,6 @@ struct MigrationState { bool send_configuration; /* Whether we send section footer during migration */ bool send_section_footer; - /* - * Whether we allow break sending huge pages when postcopy preempt is - * enabled. When disabled, we won't interrupt precopy within sending a - * host huge page, which is the old behavior of vanilla postcopy. - * NOTE: this parameter is ignored if postcopy preempt is not enabled. - */ - bool postcopy_preempt_break_huge; /* Needed by postcopy-pause state */ QemuSemaphore postcopy_pause_sem; diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c index 18213a9513..37770248e1 100644 --- a/migration/multifd-zlib.c +++ b/migration/multifd-zlib.c @@ -116,7 +116,6 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) { struct zlib_data *z = p->data; - size_t page_size = qemu_target_page_size(); z_stream *zs = &z->zs; uint32_t out_size = 0; int ret; @@ -135,8 +134,8 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) * with compression. zlib does not guarantee that this is safe, * therefore copy the page before calling deflate(). */ - memcpy(z->buf, p->pages->block->host + p->normal[i], page_size); - zs->avail_in = page_size; + memcpy(z->buf, p->pages->block->host + p->normal[i], p->page_size); + zs->avail_in = p->page_size; zs->next_in = z->buf; zs->avail_out = available; @@ -242,12 +241,11 @@ static void zlib_recv_cleanup(MultiFDRecvParams *p) static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) { struct zlib_data *z = p->data; - size_t page_size = qemu_target_page_size(); z_stream *zs = &z->zs; uint32_t in_size = p->next_packet_size; /* we measure the change of total_out */ uint32_t out_size = zs->total_out; - uint32_t expected_size = p->normal_num * page_size; + uint32_t expected_size = p->normal_num * p->page_size; uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; int ret; int i; @@ -274,7 +272,7 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) flush = Z_SYNC_FLUSH; } - zs->avail_out = page_size; + zs->avail_out = p->page_size; zs->next_out = p->host + p->normal[i]; /* @@ -288,8 +286,8 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) do { ret = inflate(zs, flush); } while (ret == Z_OK && zs->avail_in - && (zs->total_out - start) < page_size); - if (ret == Z_OK && (zs->total_out - start) < page_size) { + && (zs->total_out - start) < p->page_size); + if (ret == Z_OK && (zs->total_out - start) < p->page_size) { error_setg(errp, "multifd %u: inflate generated too few output", p->id); return -1; diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c index d788d309f2..f4a8e1ed1f 100644 --- a/migration/multifd-zstd.c +++ b/migration/multifd-zstd.c @@ -113,7 +113,6 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) { struct zstd_data *z = p->data; - size_t page_size = qemu_target_page_size(); int ret; uint32_t i; @@ -128,7 +127,7 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) flush = ZSTD_e_flush; } z->in.src = p->pages->block->host + p->normal[i]; - z->in.size = page_size; + z->in.size = p->page_size; z->in.pos = 0; /* @@ -241,8 +240,7 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) { uint32_t in_size = p->next_packet_size; uint32_t out_size = 0; - size_t page_size = qemu_target_page_size(); - uint32_t expected_size = p->normal_num * page_size; + uint32_t expected_size = p->normal_num * p->page_size; uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; struct zstd_data *z = p->data; int ret; @@ -265,7 +263,7 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) for (i = 0; i < p->normal_num; i++) { z->out.dst = p->host + p->normal[i]; - z->out.size = page_size; + z->out.size = p->page_size; z->out.pos = 0; /* @@ -279,8 +277,8 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) do { ret = ZSTD_decompressStream(z->zds, &z->out, &z->in); } while (ret > 0 && (z->in.size - z->in.pos > 0) - && (z->out.pos < page_size)); - if (ret > 0 && (z->out.pos < page_size)) { + && (z->out.pos < p->page_size)); + if (ret > 0 && (z->out.pos < p->page_size)) { error_setg(errp, "multifd %u: decompressStream buffer too small", p->id); return -1; diff --git a/migration/multifd.c b/migration/multifd.c index 509bbbe3bf..000ca4d4ec 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -87,15 +87,14 @@ static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) { MultiFDPages_t *pages = p->pages; - size_t page_size = qemu_target_page_size(); for (int i = 0; i < p->normal_num; i++) { p->iov[p->iovs_num].iov_base = pages->block->host + p->normal[i]; - p->iov[p->iovs_num].iov_len = page_size; + p->iov[p->iovs_num].iov_len = p->page_size; p->iovs_num++; } - p->next_packet_size = p->normal_num * page_size; + p->next_packet_size = p->normal_num * p->page_size; p->flags |= MULTIFD_FLAG_NOCOMP; return 0; } @@ -139,7 +138,6 @@ static void nocomp_recv_cleanup(MultiFDRecvParams *p) static int nocomp_recv_pages(MultiFDRecvParams *p, Error **errp) { uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; - size_t page_size = qemu_target_page_size(); if (flags != MULTIFD_FLAG_NOCOMP) { error_setg(errp, "multifd %u: flags received %x flags expected %x", @@ -148,7 +146,7 @@ static int nocomp_recv_pages(MultiFDRecvParams *p, Error **errp) } for (int i = 0; i < p->normal_num; i++) { p->iov[i].iov_base = p->host + p->normal[i]; - p->iov[i].iov_len = page_size; + p->iov[i].iov_len = p->page_size; } return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp); } @@ -281,8 +279,6 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) { MultiFDPacket_t *packet = p->packet; - size_t page_size = qemu_target_page_size(); - uint32_t page_count = MULTIFD_PACKET_SIZE / page_size; RAMBlock *block; int i; @@ -309,10 +305,10 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) * If we received a packet that is 100 times bigger than expected * just stop migration. It is a magic number. */ - if (packet->pages_alloc > page_count) { + if (packet->pages_alloc > p->page_count) { error_setg(errp, "multifd: received packet " "with size %u and expected a size of %u", - packet->pages_alloc, page_count) ; + packet->pages_alloc, p->page_count) ; return -1; } @@ -344,7 +340,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) for (i = 0; i < p->normal_num; i++) { uint64_t offset = be64_to_cpu(packet->offset[i]); - if (offset > (block->used_length - page_size)) { + if (offset > (block->used_length - p->page_size)) { error_setg(errp, "multifd: offset too long %" PRIu64 " (max " RAM_ADDR_FMT ")", offset, block->used_length); @@ -433,11 +429,10 @@ static int multifd_send_pages(QEMUFile *f) p->packet_num = multifd_send_state->packet_num++; multifd_send_state->pages = p->pages; p->pages = pages; - transferred = ((uint64_t) pages->num) * qemu_target_page_size() - + p->packet_len; + transferred = ((uint64_t) pages->num) * p->page_size + p->packet_len; qemu_file_acct_rate_limit(f, transferred); ram_counters.multifd_bytes += transferred; - ram_counters.transferred += transferred; + stat64_add(&ram_atomic_counters.transferred, transferred); qemu_mutex_unlock(&p->mutex); qemu_sem_post(&p->sem); @@ -629,7 +624,7 @@ int multifd_send_sync_main(QEMUFile *f) p->pending_job++; qemu_file_acct_rate_limit(f, p->packet_len); ram_counters.multifd_bytes += p->packet_len; - ram_counters.transferred += p->packet_len; + stat64_add(&ram_atomic_counters.transferred, p->packet_len); qemu_mutex_unlock(&p->mutex); qemu_sem_post(&p->sem); @@ -947,6 +942,8 @@ int multifd_save_setup(Error **errp) /* We need one extra place for the packet header */ p->iov = g_new0(struct iovec, page_count + 1); p->normal = g_new0(ram_addr_t, page_count); + p->page_size = qemu_target_page_size(); + p->page_count = page_count; if (migrate_use_zero_copy_send()) { p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; @@ -1194,6 +1191,8 @@ int multifd_load_setup(Error **errp) p->name = g_strdup_printf("multifdrecv_%d", i); p->iov = g_new0(struct iovec, page_count); p->normal = g_new0(ram_addr_t, page_count); + p->page_count = page_count; + p->page_size = qemu_target_page_size(); } for (i = 0; i < thread_count; i++) { diff --git a/migration/multifd.h b/migration/multifd.h index 519f498643..e2802a9ce2 100644 --- a/migration/multifd.h +++ b/migration/multifd.h @@ -80,6 +80,10 @@ typedef struct { bool registered_yank; /* packet allocated len */ uint32_t packet_len; + /* guest page size */ + uint32_t page_size; + /* number of pages in a full packet */ + uint32_t page_count; /* multifd flags for sending ram */ int write_flags; @@ -143,6 +147,10 @@ typedef struct { QIOChannel *c; /* packet allocated len */ uint32_t packet_len; + /* guest page size */ + uint32_t page_size; + /* number of pages in a full packet */ + uint32_t page_count; /* syncs main thread and channels */ QemuSemaphore sem_sync; diff --git a/migration/ram.c b/migration/ram.c index 1338e47665..334309f1c6 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -85,6 +85,26 @@ XBZRLECacheStats xbzrle_counters; +/* used by the search for pages to send */ +struct PageSearchStatus { + /* The migration channel used for a specific host page */ + QEMUFile *pss_channel; + /* Last block from where we have sent data */ + RAMBlock *last_sent_block; + /* Current block being searched */ + RAMBlock *block; + /* Current page to search from */ + unsigned long page; + /* Set once we wrap around */ + bool complete_round; + /* Whether we're sending a host page */ + bool host_page_sending; + /* The start/end of current host page. Invalid if host_page_sending==false */ + unsigned long host_page_start; + unsigned long host_page_end; +}; +typedef struct PageSearchStatus PageSearchStatus; + /* struct contains XBZRLE cache and a static page used by the compression */ static struct { @@ -162,6 +182,11 @@ out: return ret; } +static bool postcopy_preempt_active(void) +{ + return migrate_postcopy_preempt() && migration_in_postcopy(); +} + bool ramblock_is_ignored(RAMBlock *block) { return !qemu_ram_is_migratable(block) || @@ -296,30 +321,17 @@ struct RAMSrcPageRequest { QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req; }; -typedef struct { - /* - * Cached ramblock/offset values if preempted. They're only meaningful if - * preempted==true below. - */ - RAMBlock *ram_block; - unsigned long ram_page; - /* - * Whether a postcopy preemption just happened. Will be reset after - * precopy recovered to background migration. - */ - bool preempted; -} PostcopyPreemptState; - /* State of RAM for migration */ struct RAMState { - /* QEMUFile used for this migration */ - QEMUFile *f; + /* + * PageSearchStatus structures for the channels when send pages. + * Protected by the bitmap_mutex. + */ + PageSearchStatus pss[RAM_CHANNEL_MAX]; /* UFFD file descriptor, used in 'write-tracking' migration */ int uffdio_fd; /* Last block that we have visited searching for dirty pages */ RAMBlock *last_seen_block; - /* Last block from where we have sent data */ - RAMBlock *last_sent_block; /* Last dirty target page we have sent */ ram_addr_t last_page; /* last ram version we have seen */ @@ -357,21 +369,18 @@ struct RAMState { uint64_t target_page_count; /* number of dirty bits in the bitmap */ uint64_t migration_dirty_pages; - /* Protects modification of the bitmap and migration dirty pages */ + /* + * Protects: + * - dirty/clear bitmap + * - migration_dirty_pages + * - pss structures + */ QemuMutex bitmap_mutex; /* The RAMBlock used in the last src_page_requests */ RAMBlock *last_req_rb; /* Queue of outstanding page requests from the destination */ QemuMutex src_page_req_mutex; QSIMPLEQ_HEAD(, RAMSrcPageRequest) src_page_requests; - - /* Postcopy preemption informations */ - PostcopyPreemptState postcopy_preempt_state; - /* - * Current channel we're using on src VM. Only valid if postcopy-preempt - * is enabled. - */ - unsigned int postcopy_channel; }; typedef struct RAMState RAMState; @@ -379,11 +388,6 @@ static RAMState *ram_state; static NotifierWithReturnList precopy_notifier_list; -static void postcopy_preempt_reset(RAMState *rs) -{ - memset(&rs->postcopy_preempt_state, 0, sizeof(PostcopyPreemptState)); -} - /* Whether postcopy has queued requests? */ static bool postcopy_has_request(RAMState *rs) { @@ -420,18 +424,25 @@ uint64_t ram_bytes_remaining(void) 0; } +/* + * NOTE: not all stats in ram_counters are used in reality. See comments + * for struct MigrationAtomicStats. The ultimate result of ram migration + * counters will be a merged version with both ram_counters and the atomic + * fields in ram_atomic_counters. + */ MigrationStats ram_counters; +MigrationAtomicStats ram_atomic_counters; -static void ram_transferred_add(uint64_t bytes) +void ram_transferred_add(uint64_t bytes) { if (runstate_is_running()) { ram_counters.precopy_bytes += bytes; } else if (migration_in_postcopy()) { - ram_counters.postcopy_bytes += bytes; + stat64_add(&ram_atomic_counters.postcopy_bytes, bytes); } else { ram_counters.downtime_bytes += bytes; } - ram_counters.transferred += bytes; + stat64_add(&ram_atomic_counters.transferred, bytes); } void dirty_sync_missed_zero_copy(void) @@ -439,39 +450,6 @@ void dirty_sync_missed_zero_copy(void) ram_counters.dirty_sync_missed_zero_copy++; } -/* used by the search for pages to send */ -struct PageSearchStatus { - /* Current block being searched */ - RAMBlock *block; - /* Current page to search from */ - unsigned long page; - /* Set once we wrap around */ - bool complete_round; - /* - * [POSTCOPY-ONLY] Whether current page is explicitly requested by - * postcopy. When set, the request is "urgent" because the dest QEMU - * threads are waiting for us. - */ - bool postcopy_requested; - /* - * [POSTCOPY-ONLY] The target channel to use to send current page. - * - * Note: This may _not_ match with the value in postcopy_requested - * above. Let's imagine the case where the postcopy request is exactly - * the page that we're sending in progress during precopy. In this case - * we'll have postcopy_requested set to true but the target channel - * will be the precopy channel (so that we don't split brain on that - * specific page since the precopy channel already contains partial of - * that page data). - * - * Besides that specific use case, postcopy_target_channel should - * always be equal to postcopy_requested, because by default we send - * postcopy pages via postcopy preempt channel. - */ - bool postcopy_target_channel; -}; -typedef struct PageSearchStatus PageSearchStatus; - CompressionStats compression_counters; struct CompressParam { @@ -517,11 +495,28 @@ static QemuThread *decompress_threads; static QemuMutex decomp_done_lock; static QemuCond decomp_done_cond; +static int ram_save_host_page_urgent(PageSearchStatus *pss); + static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block, ram_addr_t offset, uint8_t *source_buf); -static void postcopy_preempt_restore(RAMState *rs, PageSearchStatus *pss, - bool postcopy_requested); +/* NOTE: page is the PFN not real ram_addr_t. */ +static void pss_init(PageSearchStatus *pss, RAMBlock *rb, ram_addr_t page) +{ + pss->block = rb; + pss->page = page; + pss->complete_round = false; +} + +/* + * Check whether two PSSs are actively sending the same page. Return true + * if it is, false otherwise. + */ +static bool pss_overlap(PageSearchStatus *pss1, PageSearchStatus *pss2) +{ + return pss1->host_page_sending && pss2->host_page_sending && + (pss1->host_page_start == pss2->host_page_start); +} static void *do_data_compress(void *opaque) { @@ -647,28 +642,30 @@ exit: * * Returns the number of bytes written * - * @f: QEMUFile where to send the data + * @pss: current PSS channel status * @block: block that contains the page we want to send * @offset: offset inside the block for the page * in the lower bits, it contains flags */ -static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block, +static size_t save_page_header(PageSearchStatus *pss, RAMBlock *block, ram_addr_t offset) { size_t size, len; + bool same_block = (block == pss->last_sent_block); + QEMUFile *f = pss->pss_channel; - if (block == rs->last_sent_block) { + if (same_block) { offset |= RAM_SAVE_FLAG_CONTINUE; } qemu_put_be64(f, offset); size = 8; - if (!(offset & RAM_SAVE_FLAG_CONTINUE)) { + if (!same_block) { len = strlen(block->idstr); qemu_put_byte(f, len); qemu_put_buffer(f, (uint8_t *)block->idstr, len); size += 1 + len; - rs->last_sent_block = block; + pss->last_sent_block = block; } return size; } @@ -719,7 +716,7 @@ void mig_throttle_counter_reset(void) rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); rs->num_dirty_pages_period = 0; - rs->bytes_xfer_prev = ram_counters.transferred; + rs->bytes_xfer_prev = stat64_get(&ram_atomic_counters.transferred); } /** @@ -736,10 +733,6 @@ void mig_throttle_counter_reset(void) */ static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr) { - if (!rs->xbzrle_enabled) { - return; - } - /* We don't care if this fails to allocate a new cache page * as long as it updated an old one */ cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page, @@ -756,17 +749,19 @@ static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr) * -1 means that xbzrle would be longer than normal * * @rs: current RAM state + * @pss: current PSS channel * @current_data: pointer to the address of the page contents * @current_addr: addr of the page * @block: block that contains the page we want to send * @offset: offset inside the block for the page */ -static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, - ram_addr_t current_addr, RAMBlock *block, - ram_addr_t offset) +static int save_xbzrle_page(RAMState *rs, PageSearchStatus *pss, + uint8_t **current_data, ram_addr_t current_addr, + RAMBlock *block, ram_addr_t offset) { int encoded_len = 0, bytes_xbzrle; uint8_t *prev_cached_page; + QEMUFile *file = pss->pss_channel; if (!cache_is_cached(XBZRLE.cache, current_addr, ram_counters.dirty_sync_count)) { @@ -831,11 +826,11 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, } /* Send XBZRLE based compressed page */ - bytes_xbzrle = save_page_header(rs, rs->f, block, + bytes_xbzrle = save_page_header(pss, block, offset | RAM_SAVE_FLAG_XBZRLE); - qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE); - qemu_put_be16(rs->f, encoded_len); - qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len); + qemu_put_byte(file, ENCODING_FLAG_XBZRLE); + qemu_put_be16(file, encoded_len); + qemu_put_buffer(file, XBZRLE.encoded_buf, encoded_len); bytes_xbzrle += encoded_len + 1 + 2; /* * Like compressed_size (please see update_compress_thread_counts), @@ -849,26 +844,38 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, } /** - * migration_bitmap_find_dirty: find the next dirty page from start + * pss_find_next_dirty: find the next dirty page of current ramblock * - * Returns the page offset within memory region of the start of a dirty page + * This function updates pss->page to point to the next dirty page index + * within the ramblock to migrate, or the end of ramblock when nothing + * found. Note that when pss->host_page_sending==true it means we're + * during sending a host page, so we won't look for dirty page that is + * outside the host page boundary. * - * @rs: current RAM state - * @rb: RAMBlock where to search for dirty pages - * @start: page where we start the search + * @pss: the current page search status */ -static inline -unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb, - unsigned long start) +static void pss_find_next_dirty(PageSearchStatus *pss) { + RAMBlock *rb = pss->block; unsigned long size = rb->used_length >> TARGET_PAGE_BITS; unsigned long *bitmap = rb->bmap; if (ramblock_is_ignored(rb)) { - return size; + /* Points directly to the end, so we know no dirty page */ + pss->page = size; + return; + } + + /* + * If during sending a host page, only look for dirty pages within the + * current host page being send. + */ + if (pss->host_page_sending) { + assert(pss->host_page_end); + size = MIN(size, pss->host_page_end); } - return find_next_bit(bitmap, size, start); + pss->page = find_next_bit(bitmap, size, pss->page); } static void migration_clear_memory_region_dirty_bitmap(RAMBlock *rb, @@ -1083,8 +1090,9 @@ uint64_t ram_pagesize_summary(void) uint64_t ram_get_total_transferred_pages(void) { - return ram_counters.normal + ram_counters.duplicate + - compression_counters.pages + xbzrle_counters.pages; + return stat64_get(&ram_atomic_counters.normal) + + stat64_get(&ram_atomic_counters.duplicate) + + compression_counters.pages + xbzrle_counters.pages; } static void migration_update_rates(RAMState *rs, int64_t end_time) @@ -1143,8 +1151,8 @@ static void migration_trigger_throttle(RAMState *rs) { MigrationState *s = migrate_get_current(); uint64_t threshold = s->parameters.throttle_trigger_threshold; - - uint64_t bytes_xfer_period = ram_counters.transferred - rs->bytes_xfer_prev; + uint64_t bytes_xfer_period = + stat64_get(&ram_atomic_counters.transferred) - rs->bytes_xfer_prev; uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE; uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100; @@ -1207,7 +1215,7 @@ static void migration_bitmap_sync(RAMState *rs) /* reset period counters */ rs->time_last_bitmap_sync = end_time; rs->num_dirty_pages_period = 0; - rs->bytes_xfer_prev = ram_counters.transferred; + rs->bytes_xfer_prev = stat64_get(&ram_atomic_counters.transferred); } if (migrate_use_events()) { qapi_event_send_migration_pass(ram_counters.dirty_sync_count); @@ -1234,7 +1242,7 @@ static void migration_bitmap_sync_precopy(RAMState *rs) } } -static void ram_release_page(const char *rbname, uint64_t offset) +void ram_release_page(const char *rbname, uint64_t offset) { if (!migrate_release_ram() || !migration_in_postcopy()) { return; @@ -1249,19 +1257,19 @@ static void ram_release_page(const char *rbname, uint64_t offset) * Returns the size of data written to the file, 0 means the page is not * a zero page * - * @rs: current RAM state - * @file: the file where the data is saved + * @pss: current PSS channel * @block: block that contains the page we want to send * @offset: offset inside the block for the page */ -static int save_zero_page_to_file(RAMState *rs, QEMUFile *file, +static int save_zero_page_to_file(PageSearchStatus *pss, RAMBlock *block, ram_addr_t offset) { uint8_t *p = block->host + offset; + QEMUFile *file = pss->pss_channel; int len = 0; if (buffer_is_zero(p, TARGET_PAGE_SIZE)) { - len += save_page_header(rs, file, block, offset | RAM_SAVE_FLAG_ZERO); + len += save_page_header(pss, block, offset | RAM_SAVE_FLAG_ZERO); qemu_put_byte(file, 0); len += 1; ram_release_page(block->idstr, offset); @@ -1274,16 +1282,17 @@ static int save_zero_page_to_file(RAMState *rs, QEMUFile *file, * * Returns the number of pages written. * - * @rs: current RAM state + * @pss: current PSS channel * @block: block that contains the page we want to send * @offset: offset inside the block for the page */ -static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) +static int save_zero_page(PageSearchStatus *pss, RAMBlock *block, + ram_addr_t offset) { - int len = save_zero_page_to_file(rs, rs->f, block, offset); + int len = save_zero_page_to_file(pss, block, offset); if (len) { - ram_counters.duplicate++; + stat64_add(&ram_atomic_counters.duplicate, 1); ram_transferred_add(len); return 1; } @@ -1297,15 +1306,15 @@ static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) * * Return true if the pages has been saved, otherwise false is returned. */ -static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, - int *pages) +static bool control_save_page(PageSearchStatus *pss, RAMBlock *block, + ram_addr_t offset, int *pages) { uint64_t bytes_xmit = 0; int ret; *pages = -1; - ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE, - &bytes_xmit); + ret = ram_control_save_page(pss->pss_channel, block->offset, offset, + TARGET_PAGE_SIZE, &bytes_xmit); if (ret == RAM_SAVE_CONTROL_NOT_SUPP) { return false; } @@ -1320,9 +1329,9 @@ static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, } if (bytes_xmit > 0) { - ram_counters.normal++; + stat64_add(&ram_atomic_counters.normal, 1); } else if (bytes_xmit == 0) { - ram_counters.duplicate++; + stat64_add(&ram_atomic_counters.duplicate, 1); } return true; @@ -1333,26 +1342,28 @@ static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, * * Returns the number of pages written. * - * @rs: current RAM state + * @pss: current PSS channel * @block: block that contains the page we want to send * @offset: offset inside the block for the page * @buf: the page to be sent * @async: send to page asyncly */ -static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, - uint8_t *buf, bool async) +static int save_normal_page(PageSearchStatus *pss, RAMBlock *block, + ram_addr_t offset, uint8_t *buf, bool async) { - ram_transferred_add(save_page_header(rs, rs->f, block, + QEMUFile *file = pss->pss_channel; + + ram_transferred_add(save_page_header(pss, block, offset | RAM_SAVE_FLAG_PAGE)); if (async) { - qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE, + qemu_put_buffer_async(file, buf, TARGET_PAGE_SIZE, migrate_release_ram() && migration_in_postcopy()); } else { - qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE); + qemu_put_buffer(file, buf, TARGET_PAGE_SIZE); } ram_transferred_add(TARGET_PAGE_SIZE); - ram_counters.normal++; + stat64_add(&ram_atomic_counters.normal, 1); return 1; } @@ -1382,8 +1393,8 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss) XBZRLE_cache_lock(); if (rs->xbzrle_enabled && !migration_in_postcopy()) { - pages = save_xbzrle_page(rs, &p, current_addr, block, - offset); + pages = save_xbzrle_page(rs, pss, &p, current_addr, + block, offset); if (!rs->last_stage) { /* Can't send this cached data async, since the cache page * might get updated before it gets to the wire @@ -1394,7 +1405,7 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss) /* XBZRLE overflow or normal page */ if (pages == -1) { - pages = save_normal_page(rs, block, offset, p, send_async); + pages = save_normal_page(pss, block, offset, p, send_async); } XBZRLE_cache_unlock(); @@ -1402,13 +1413,13 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss) return pages; } -static int ram_save_multifd_page(RAMState *rs, RAMBlock *block, +static int ram_save_multifd_page(QEMUFile *file, RAMBlock *block, ram_addr_t offset) { - if (multifd_queue_page(rs->f, block, offset) < 0) { + if (multifd_queue_page(file, block, offset) < 0) { return -1; } - ram_counters.normal++; + stat64_add(&ram_atomic_counters.normal, 1); return 1; } @@ -1417,14 +1428,15 @@ static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block, ram_addr_t offset, uint8_t *source_buf) { RAMState *rs = ram_state; + PageSearchStatus *pss = &rs->pss[RAM_CHANNEL_PRECOPY]; uint8_t *p = block->host + offset; int ret; - if (save_zero_page_to_file(rs, f, block, offset)) { + if (save_zero_page_to_file(pss, block, offset)) { return true; } - save_page_header(rs, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE); + save_page_header(pss, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE); /* * copy it to a internal buffer to avoid it being modified by VM @@ -1446,7 +1458,7 @@ update_compress_thread_counts(const CompressParam *param, int bytes_xmit) ram_transferred_add(bytes_xmit); if (param->zero_page) { - ram_counters.duplicate++; + stat64_add(&ram_atomic_counters.duplicate, 1); return; } @@ -1459,6 +1471,7 @@ static bool save_page_use_compression(RAMState *rs); static void flush_compressed_data(RAMState *rs) { + MigrationState *ms = migrate_get_current(); int idx, len, thread_count; if (!save_page_use_compression(rs)) { @@ -1477,7 +1490,7 @@ static void flush_compressed_data(RAMState *rs) for (idx = 0; idx < thread_count; idx++) { qemu_mutex_lock(&comp_param[idx].mutex); if (!comp_param[idx].quit) { - len = qemu_put_qemu_file(rs->f, comp_param[idx].file); + len = qemu_put_qemu_file(ms->to_dst_file, comp_param[idx].file); /* * it's safe to fetch zero_page without holding comp_done_lock * as there is no further request submitted to the thread, @@ -1496,11 +1509,11 @@ static inline void set_compress_params(CompressParam *param, RAMBlock *block, param->offset = offset; } -static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block, - ram_addr_t offset) +static int compress_page_with_multi_thread(RAMBlock *block, ram_addr_t offset) { int idx, thread_count, bytes_xmit = -1, pages = -1; bool wait = migrate_compress_wait_thread(); + MigrationState *ms = migrate_get_current(); thread_count = migrate_compress_threads(); qemu_mutex_lock(&comp_done_lock); @@ -1508,7 +1521,8 @@ retry: for (idx = 0; idx < thread_count; idx++) { if (comp_param[idx].done) { comp_param[idx].done = false; - bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file); + bytes_xmit = qemu_put_qemu_file(ms->to_dst_file, + comp_param[idx].file); qemu_mutex_lock(&comp_param[idx].mutex); set_compress_params(&comp_param[idx], block, offset); qemu_cond_signal(&comp_param[idx].cond); @@ -1544,14 +1558,9 @@ retry: */ static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again) { - /* - * This is not a postcopy requested page, mark it "not urgent", and use - * precopy channel to send it. - */ - pss->postcopy_requested = false; - pss->postcopy_target_channel = RAM_CHANNEL_PRECOPY; + /* Update pss->page for the next dirty bit in ramblock */ + pss_find_next_dirty(pss); - pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page); if (pss->complete_round && pss->block == rs->last_seen_block && pss->page >= rs->last_page) { /* @@ -1696,7 +1705,7 @@ static int ram_save_release_protection(RAMState *rs, PageSearchStatus *pss, uint64_t run_length = (pss->page - start_page) << TARGET_PAGE_BITS; /* Flush async buffers before un-protect. */ - qemu_fflush(rs->f); + qemu_fflush(pss->pss_channel); /* Un-protect memory range. */ res = uffd_change_protection(rs->uffdio_fd, page_address, run_length, false, false); @@ -1999,55 +2008,6 @@ void ram_write_tracking_stop(void) } #endif /* defined(__linux__) */ -/* - * Check whether two addr/offset of the ramblock falls onto the same host huge - * page. Returns true if so, false otherwise. - */ -static bool offset_on_same_huge_page(RAMBlock *rb, uint64_t addr1, - uint64_t addr2) -{ - size_t page_size = qemu_ram_pagesize(rb); - - addr1 = ROUND_DOWN(addr1, page_size); - addr2 = ROUND_DOWN(addr2, page_size); - - return addr1 == addr2; -} - -/* - * Whether a previous preempted precopy huge page contains current requested - * page? Returns true if so, false otherwise. - * - * This should really happen very rarely, because it means when we were sending - * during background migration for postcopy we're sending exactly the page that - * some vcpu got faulted on on dest node. When it happens, we probably don't - * need to do much but drop the request, because we know right after we restore - * the precopy stream it'll be serviced. It'll slightly affect the order of - * postcopy requests to be serviced (e.g. it'll be the same as we move current - * request to the end of the queue) but it shouldn't be a big deal. The most - * imporant thing is we can _never_ try to send a partial-sent huge page on the - * POSTCOPY channel again, otherwise that huge page will got "split brain" on - * two channels (PRECOPY, POSTCOPY). - */ -static bool postcopy_preempted_contains(RAMState *rs, RAMBlock *block, - ram_addr_t offset) -{ - PostcopyPreemptState *state = &rs->postcopy_preempt_state; - - /* No preemption at all? */ - if (!state->preempted) { - return false; - } - - /* Not even the same ramblock? */ - if (state->ram_block != block) { - return false; - } - - return offset_on_same_huge_page(block, offset, - state->ram_page << TARGET_PAGE_BITS); -} - /** * get_queued_page: unqueue a page from the postcopy requests * @@ -2087,20 +2047,7 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss) } while (block && !dirty); - if (block) { - /* See comment above postcopy_preempted_contains() */ - if (postcopy_preempted_contains(rs, block, offset)) { - trace_postcopy_preempt_hit(block->idstr, offset); - /* - * If what we preempted previously was exactly what we're - * requesting right now, restore the preempted precopy - * immediately, boosting its priority as it's requested by - * postcopy. - */ - postcopy_preempt_restore(rs, pss, true); - return true; - } - } else { + if (!block) { /* * Poll write faults too if background snapshot is enabled; that's * when we have vcpus got blocked by the write protected pages. @@ -2122,9 +2069,6 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss) * really rare. */ pss->complete_round = false; - /* Mark it an urgent request, meanwhile using POSTCOPY channel */ - pss->postcopy_requested = true; - pss->postcopy_target_channel = RAM_CHANNEL_POSTCOPY; } return !!block; @@ -2202,6 +2146,56 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) return -1; } + /* + * When with postcopy preempt, we send back the page directly in the + * rp-return thread. + */ + if (postcopy_preempt_active()) { + ram_addr_t page_start = start >> TARGET_PAGE_BITS; + size_t page_size = qemu_ram_pagesize(ramblock); + PageSearchStatus *pss = &ram_state->pss[RAM_CHANNEL_POSTCOPY]; + int ret = 0; + + qemu_mutex_lock(&rs->bitmap_mutex); + + pss_init(pss, ramblock, page_start); + /* + * Always use the preempt channel, and make sure it's there. It's + * safe to access without lock, because when rp-thread is running + * we should be the only one who operates on the qemufile + */ + pss->pss_channel = migrate_get_current()->postcopy_qemufile_src; + assert(pss->pss_channel); + + /* + * It must be either one or multiple of host page size. Just + * assert; if something wrong we're mostly split brain anyway. + */ + assert(len % page_size == 0); + while (len) { + if (ram_save_host_page_urgent(pss)) { + error_report("%s: ram_save_host_page_urgent() failed: " + "ramblock=%s, start_addr=0x"RAM_ADDR_FMT, + __func__, ramblock->idstr, start); + ret = -1; + break; + } + /* + * NOTE: after ram_save_host_page_urgent() succeeded, pss->page + * will automatically be moved and point to the next host page + * we're going to send, so no need to update here. + * + * Normally QEMU never sends >1 host page in requests, so + * logically we don't even need that as the loop should only + * run once, but just to be consistent. + */ + len -= page_size; + }; + qemu_mutex_unlock(&rs->bitmap_mutex); + + return ret; + } + struct RAMSrcPageRequest *new_entry = g_new0(struct RAMSrcPageRequest, 1); new_entry->rb = ramblock; @@ -2240,7 +2234,8 @@ static bool save_page_use_compression(RAMState *rs) * has been properly handled by compression, otherwise needs other * paths to handle it */ -static bool save_compress_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) +static bool save_compress_page(RAMState *rs, PageSearchStatus *pss, + RAMBlock *block, ram_addr_t offset) { if (!save_page_use_compression(rs)) { return false; @@ -2256,12 +2251,12 @@ static bool save_compress_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) * We post the fist page as normal page as compression will take * much CPU resource. */ - if (block != rs->last_sent_block) { + if (block != pss->last_sent_block) { flush_compressed_data(rs); return false; } - if (compress_page_with_multi_thread(rs, block, offset) > 0) { + if (compress_page_with_multi_thread(block, offset) > 0) { return true; } @@ -2283,20 +2278,20 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss) ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; int res; - if (control_save_page(rs, block, offset, &res)) { + if (control_save_page(pss, block, offset, &res)) { return res; } - if (save_compress_page(rs, block, offset)) { + if (save_compress_page(rs, pss, block, offset)) { return 1; } - res = save_zero_page(rs, block, offset); + res = save_zero_page(pss, block, offset); if (res > 0) { /* Must let xbzrle know, otherwise a previous (now 0'd) cached * page would be stale */ - if (!save_page_use_compression(rs)) { + if (rs->xbzrle_enabled) { XBZRLE_cache_lock(); xbzrle_cache_zero_page(rs, block->offset + offset); XBZRLE_cache_unlock(); @@ -2311,133 +2306,97 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss) * still see partially copied pages which is data corruption. */ if (migrate_use_multifd() && !migration_in_postcopy()) { - return ram_save_multifd_page(rs, block, offset); + return ram_save_multifd_page(pss->pss_channel, block, offset); } return ram_save_page(rs, pss); } -static bool postcopy_needs_preempt(RAMState *rs, PageSearchStatus *pss) +/* Should be called before sending a host page */ +static void pss_host_page_prepare(PageSearchStatus *pss) { - MigrationState *ms = migrate_get_current(); - - /* Not enabled eager preempt? Then never do that. */ - if (!migrate_postcopy_preempt()) { - return false; - } + /* How many guest pages are there in one host page? */ + size_t guest_pfns = qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS; - /* If the user explicitly disabled breaking of huge page, skip */ - if (!ms->postcopy_preempt_break_huge) { - return false; - } + pss->host_page_sending = true; + pss->host_page_start = ROUND_DOWN(pss->page, guest_pfns); + pss->host_page_end = ROUND_UP(pss->page + 1, guest_pfns); +} - /* If the ramblock we're sending is a small page? Never bother. */ - if (qemu_ram_pagesize(pss->block) == TARGET_PAGE_SIZE) { - return false; - } +/* + * Whether the page pointed by PSS is within the host page being sent. + * Must be called after a previous pss_host_page_prepare(). + */ +static bool pss_within_range(PageSearchStatus *pss) +{ + ram_addr_t ram_addr; - /* Not in postcopy at all? */ - if (!migration_in_postcopy()) { - return false; - } + assert(pss->host_page_sending); - /* - * If we're already handling a postcopy request, don't preempt as this page - * has got the same high priority. - */ - if (pss->postcopy_requested) { + /* Over host-page boundary? */ + if (pss->page >= pss->host_page_end) { return false; } - /* If there's postcopy requests, then check it up! */ - return postcopy_has_request(rs); -} - -/* Returns true if we preempted precopy, false otherwise */ -static void postcopy_do_preempt(RAMState *rs, PageSearchStatus *pss) -{ - PostcopyPreemptState *p_state = &rs->postcopy_preempt_state; - - trace_postcopy_preempt_triggered(pss->block->idstr, pss->page); + ram_addr = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; - /* - * Time to preempt precopy. Cache current PSS into preempt state, so that - * after handling the postcopy pages we can recover to it. We need to do - * so because the dest VM will have partial of the precopy huge page kept - * over in its tmp huge page caches; better move on with it when we can. - */ - p_state->ram_block = pss->block; - p_state->ram_page = pss->page; - p_state->preempted = true; + return offset_in_ramblock(pss->block, ram_addr); } -/* Whether we're preempted by a postcopy request during sending a huge page */ -static bool postcopy_preempt_triggered(RAMState *rs) +static void pss_host_page_finish(PageSearchStatus *pss) { - return rs->postcopy_preempt_state.preempted; + pss->host_page_sending = false; + /* This is not needed, but just to reset it */ + pss->host_page_start = pss->host_page_end = 0; } -static void postcopy_preempt_restore(RAMState *rs, PageSearchStatus *pss, - bool postcopy_requested) +/* + * Send an urgent host page specified by `pss'. Need to be called with + * bitmap_mutex held. + * + * Returns 0 if save host page succeeded, false otherwise. + */ +static int ram_save_host_page_urgent(PageSearchStatus *pss) { - PostcopyPreemptState *state = &rs->postcopy_preempt_state; - - assert(state->preempted); + bool page_dirty, sent = false; + RAMState *rs = ram_state; + int ret = 0; - pss->block = state->ram_block; - pss->page = state->ram_page; + trace_postcopy_preempt_send_host_page(pss->block->idstr, pss->page); + pss_host_page_prepare(pss); - /* Whether this is a postcopy request? */ - pss->postcopy_requested = postcopy_requested; /* - * When restoring a preempted page, the old data resides in PRECOPY - * slow channel, even if postcopy_requested is set. So always use - * PRECOPY channel here. + * If precopy is sending the same page, let it be done in precopy, or + * we could send the same page in two channels and none of them will + * receive the whole page. */ - pss->postcopy_target_channel = RAM_CHANNEL_PRECOPY; - - trace_postcopy_preempt_restored(pss->block->idstr, pss->page); - - /* Reset preempt state, most importantly, set preempted==false */ - postcopy_preempt_reset(rs); -} - -static void postcopy_preempt_choose_channel(RAMState *rs, PageSearchStatus *pss) -{ - MigrationState *s = migrate_get_current(); - unsigned int channel = pss->postcopy_target_channel; - QEMUFile *next; - - if (channel != rs->postcopy_channel) { - if (channel == RAM_CHANNEL_PRECOPY) { - next = s->to_dst_file; - } else { - next = s->postcopy_qemufile_src; - } - /* Update and cache the current channel */ - rs->f = next; - rs->postcopy_channel = channel; - - /* - * If channel switched, reset last_sent_block since the old sent block - * may not be on the same channel. - */ - rs->last_sent_block = NULL; - - trace_postcopy_preempt_switch_channel(channel); + if (pss_overlap(pss, &ram_state->pss[RAM_CHANNEL_PRECOPY])) { + trace_postcopy_preempt_hit(pss->block->idstr, + pss->page << TARGET_PAGE_BITS); + return 0; } - trace_postcopy_preempt_send_host_page(pss->block->idstr, pss->page); -} - -/* We need to make sure rs->f always points to the default channel elsewhere */ -static void postcopy_preempt_reset_channel(RAMState *rs) -{ - if (migrate_postcopy_preempt() && migration_in_postcopy()) { - rs->postcopy_channel = RAM_CHANNEL_PRECOPY; - rs->f = migrate_get_current()->to_dst_file; - trace_postcopy_preempt_reset_channel(); + do { + page_dirty = migration_bitmap_clear_dirty(rs, pss->block, pss->page); + + if (page_dirty) { + /* Be strict to return code; it must be 1, or what else? */ + if (ram_save_target_page(rs, pss) != 1) { + error_report_once("%s: ram_save_target_page failed", __func__); + ret = -1; + goto out; + } + sent = true; + } + pss_find_next_dirty(pss); + } while (pss_within_range(pss)); +out: + pss_host_page_finish(pss); + /* For urgent requests, flush immediately if sent */ + if (sent) { + qemu_fflush(pss->pss_channel); } + return ret; } /** @@ -2448,9 +2407,14 @@ static void postcopy_preempt_reset_channel(RAMState *rs) * a host page in which case the remainder of the hostpage is sent. * Only dirty target pages are sent. Note that the host page size may * be a huge page for this block. + * * The saving stops at the boundary of the used_length of the block * if the RAMBlock isn't a multiple of the host page size. * + * The caller must be with ram_state.bitmap_mutex held to call this + * function. Note that this function can temporarily release the lock, but + * when the function is returned it'll make sure the lock is still held. + * * Returns the number of pages written or negative on error * * @rs: current RAM state @@ -2458,11 +2422,10 @@ static void postcopy_preempt_reset_channel(RAMState *rs) */ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss) { + bool page_dirty, preempt_active = postcopy_preempt_active(); int tmppages, pages = 0; size_t pagesize_bits = qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS; - unsigned long hostpage_boundary = - QEMU_ALIGN_UP(pss->page + 1, pagesize_bits); unsigned long start_page = pss->page; int res; @@ -2471,51 +2434,49 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss) return 0; } - if (migrate_postcopy_preempt() && migration_in_postcopy()) { - postcopy_preempt_choose_channel(rs, pss); - } + /* Update host page boundary information */ + pss_host_page_prepare(pss); do { - if (postcopy_needs_preempt(rs, pss)) { - postcopy_do_preempt(rs, pss); - break; - } + page_dirty = migration_bitmap_clear_dirty(rs, pss->block, pss->page); /* Check the pages is dirty and if it is send it */ - if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) { - tmppages = ram_save_target_page(rs, pss); - if (tmppages < 0) { - return tmppages; - } - - pages += tmppages; + if (page_dirty) { /* - * Allow rate limiting to happen in the middle of huge pages if - * something is sent in the current iteration. + * Properly yield the lock only in postcopy preempt mode + * because both migration thread and rp-return thread can + * operate on the bitmaps. */ - if (pagesize_bits > 1 && tmppages > 0) { - migration_rate_limit(); + if (preempt_active) { + qemu_mutex_unlock(&rs->bitmap_mutex); + } + tmppages = ram_save_target_page(rs, pss); + if (tmppages >= 0) { + pages += tmppages; + /* + * Allow rate limiting to happen in the middle of huge pages if + * something is sent in the current iteration. + */ + if (pagesize_bits > 1 && tmppages > 0) { + migration_rate_limit(); + } } + if (preempt_active) { + qemu_mutex_lock(&rs->bitmap_mutex); + } + } else { + tmppages = 0; } - pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page); - } while ((pss->page < hostpage_boundary) && - offset_in_ramblock(pss->block, - ((ram_addr_t)pss->page) << TARGET_PAGE_BITS)); - /* The offset we leave with is the min boundary of host page and block */ - pss->page = MIN(pss->page, hostpage_boundary); - /* - * When with postcopy preempt mode, flush the data as soon as possible for - * postcopy requests, because we've already sent a whole huge page, so the - * dst node should already have enough resource to atomically filling in - * the current missing page. - * - * More importantly, when using separate postcopy channel, we must do - * explicit flush or it won't flush until the buffer is full. - */ - if (migrate_postcopy_preempt() && pss->postcopy_requested) { - qemu_fflush(rs->f); - } + if (tmppages < 0) { + pss_host_page_finish(pss); + return tmppages; + } + + pss_find_next_dirty(pss); + } while (pss_within_range(pss)); + + pss_host_page_finish(pss); res = ram_save_release_protection(rs, pss, start_page); return (res < 0 ? res : pages); @@ -2536,7 +2497,7 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss) */ static int ram_find_and_save_block(RAMState *rs) { - PageSearchStatus pss; + PageSearchStatus *pss = &rs->pss[RAM_CHANNEL_PRECOPY]; int pages = 0; bool again, found; @@ -2557,35 +2518,24 @@ static int ram_find_and_save_block(RAMState *rs) rs->last_page = 0; } - pss.block = rs->last_seen_block; - pss.page = rs->last_page; - pss.complete_round = false; + pss_init(pss, rs->last_seen_block, rs->last_page); do { again = true; - found = get_queued_page(rs, &pss); + found = get_queued_page(rs, pss); if (!found) { - /* - * Recover previous precopy ramblock/offset if postcopy has - * preempted precopy. Otherwise find the next dirty bit. - */ - if (postcopy_preempt_triggered(rs)) { - postcopy_preempt_restore(rs, &pss, false); - found = true; - } else { - /* priority queue empty, so just search for something dirty */ - found = find_dirty_block(rs, &pss, &again); - } + /* priority queue empty, so just search for something dirty */ + found = find_dirty_block(rs, pss, &again); } if (found) { - pages = ram_save_host_page(rs, &pss); + pages = ram_save_host_page(rs, pss); } } while (!pages && again); - rs->last_seen_block = pss.block; - rs->last_page = pss.page; + rs->last_seen_block = pss->block; + rs->last_page = pss->page; return pages; } @@ -2595,9 +2545,9 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero) uint64_t pages = size / TARGET_PAGE_SIZE; if (zero) { - ram_counters.duplicate += pages; + stat64_add(&ram_atomic_counters.duplicate, pages); } else { - ram_counters.normal += pages; + stat64_add(&ram_atomic_counters.normal, pages); ram_transferred_add(size); qemu_file_credit_transfer(f, size); } @@ -2699,13 +2649,16 @@ static void ram_save_cleanup(void *opaque) static void ram_state_reset(RAMState *rs) { + int i; + + for (i = 0; i < RAM_CHANNEL_MAX; i++) { + rs->pss[i].last_sent_block = NULL; + } + rs->last_seen_block = NULL; - rs->last_sent_block = NULL; rs->last_page = 0; rs->last_version = ram_list.version; rs->xbzrle_enabled = false; - postcopy_preempt_reset(rs); - rs->postcopy_channel = RAM_CHANNEL_PRECOPY; } #define MAX_WAIT 50 /* ms, half buffered_file limit */ @@ -2894,8 +2847,8 @@ void ram_postcopy_send_discard_bitmap(MigrationState *ms) migration_bitmap_sync(rs); /* Easiest way to make sure we don't resume in the middle of a host-page */ + rs->pss[RAM_CHANNEL_PRECOPY].last_sent_block = NULL; rs->last_seen_block = NULL; - rs->last_sent_block = NULL; rs->last_page = 0; postcopy_each_ram_send_discard(ms); @@ -3132,7 +3085,7 @@ static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out) ram_state_reset(rs); /* Update RAMState cache of output QEMUFile */ - rs->f = out; + rs->pss[RAM_CHANNEL_PRECOPY].pss_channel = out; trace_ram_state_resume_prepare(pages); } @@ -3223,7 +3176,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) return -1; } } - (*rsp)->f = f; + (*rsp)->pss[RAM_CHANNEL_PRECOPY].pss_channel = f; WITH_RCU_READ_LOCK_GUARD() { qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE); @@ -3349,8 +3302,6 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) } qemu_mutex_unlock(&rs->bitmap_mutex); - postcopy_preempt_reset_channel(rs); - /* * Must occur before EOS (or any QEMUFile operation) * because of RDMA protocol. @@ -3360,7 +3311,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) out: if (ret >= 0 && migration_is_setup_or_active(migrate_get_current()->state)) { - ret = multifd_send_sync_main(rs->f); + ret = multifd_send_sync_main(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel); if (ret < 0) { return ret; } @@ -3406,6 +3357,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque) /* try transferring iterative blocks of memory */ /* flush all remaining blocks regardless of rate limiting */ + qemu_mutex_lock(&rs->bitmap_mutex); while (true) { int pages; @@ -3419,6 +3371,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque) break; } } + qemu_mutex_unlock(&rs->bitmap_mutex); flush_compressed_data(rs); ram_control_after_iterate(f, RAM_CONTROL_FINISH); @@ -3428,9 +3381,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque) return ret; } - postcopy_preempt_reset_channel(rs); - - ret = multifd_send_sync_main(rs->f); + ret = multifd_send_sync_main(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel); if (ret < 0) { return ret; } diff --git a/migration/ram.h b/migration/ram.h index c7af65ac74..81cbb0947c 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -32,7 +32,27 @@ #include "qapi/qapi-types-migration.h" #include "exec/cpu-common.h" #include "io/channel.h" +#include "qemu/stats64.h" +/* + * These are the migration statistic counters that need to be updated using + * atomic ops (can be accessed by more than one thread). Here since we + * cannot modify MigrationStats directly to use Stat64 as it was defined in + * the QAPI scheme, we define an internal structure to hold them, and we + * propagate the real values when QMP queries happen. + * + * IOW, the corresponding fields within ram_counters on these specific + * fields will be always zero and not being used at all; they're just + * placeholders to make it QAPI-compatible. + */ +typedef struct { + Stat64 transferred; + Stat64 duplicate; + Stat64 normal; + Stat64 postcopy_bytes; +} MigrationAtomicStats; + +extern MigrationAtomicStats ram_atomic_counters; extern MigrationStats ram_counters; extern XBZRLECacheStats xbzrle_counters; extern CompressionStats compression_counters; @@ -65,6 +85,9 @@ int ram_load_postcopy(QEMUFile *f, int channel); void ram_handle_compressed(void *host, uint8_t ch, uint64_t size); +void ram_transferred_add(uint64_t bytes); +void ram_release_page(const char *rbname, uint64_t offset); + int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr); bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset); void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr); diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 38d066c294..0f55004d7e 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -528,7 +528,7 @@ static void arm_cpu_reset(DeviceState *dev) arm_rebuild_hflags(env); } -#ifndef CONFIG_USER_ONLY +#if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx, unsigned int target_el, @@ -725,7 +725,8 @@ static bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request) cc->tcg_ops->do_interrupt(cs); return true; } -#endif /* !CONFIG_USER_ONLY */ + +#endif /* CONFIG_TCG && !CONFIG_USER_ONLY */ void arm_cpu_update_virq(ARMCPU *cpu) { diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 9aeed3c848..2b4bd20f9d 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -3757,6 +3757,16 @@ static inline bool isar_feature_aa32_tts2uxn(const ARMISARegisters *id) return FIELD_EX32(id->id_mmfr4, ID_MMFR4, XNX) != 0; } +static inline bool isar_feature_aa32_half_evt(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_mmfr4, ID_MMFR4, EVT) >= 1; +} + +static inline bool isar_feature_aa32_evt(const ARMISARegisters *id) +{ + return FIELD_EX32(id->id_mmfr4, ID_MMFR4, EVT) >= 2; +} + static inline bool isar_feature_aa32_dit(const ARMISARegisters *id) { return FIELD_EX32(id->id_pfr0, ID_PFR0, DIT) != 0; @@ -4029,6 +4039,16 @@ static inline bool isar_feature_aa64_ids(const ARMISARegisters *id) return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, IDS) != 0; } +static inline bool isar_feature_aa64_half_evt(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, EVT) >= 1; +} + +static inline bool isar_feature_aa64_evt(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, EVT) >= 2; +} + static inline bool isar_feature_aa64_bti(const ARMISARegisters *id) { return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, BT) != 0; @@ -4313,6 +4333,16 @@ static inline bool isar_feature_any_ras(const ARMISARegisters *id) return isar_feature_aa64_ras(id) || isar_feature_aa32_ras(id); } +static inline bool isar_feature_any_half_evt(const ARMISARegisters *id) +{ + return isar_feature_aa64_half_evt(id) || isar_feature_aa32_half_evt(id); +} + +static inline bool isar_feature_any_evt(const ARMISARegisters *id) +{ + return isar_feature_aa64_evt(id) || isar_feature_aa32_evt(id); +} + /* * Forward to the above feature tests given an ARMCPU pointer. */ diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 3d74f134f5..2cf2ca4ce5 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -792,6 +792,74 @@ static void aarch64_a53_initfn(Object *obj) define_cortex_a72_a57_a53_cp_reginfo(cpu); } +static void aarch64_a55_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,cortex-a55"; + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_AARCH64); + set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); + set_feature(&cpu->env, ARM_FEATURE_EL2); + set_feature(&cpu->env, ARM_FEATURE_EL3); + set_feature(&cpu->env, ARM_FEATURE_PMU); + + /* Ordered by B2.4 AArch64 registers by functional group */ + cpu->clidr = 0x82000023; + cpu->ctr = 0x84448004; /* L1Ip = VIPT */ + cpu->dcz_blocksize = 4; /* 64 bytes */ + cpu->isar.id_aa64dfr0 = 0x0000000010305408ull; + cpu->isar.id_aa64isar0 = 0x0000100010211120ull; + cpu->isar.id_aa64isar1 = 0x0000000000100001ull; + cpu->isar.id_aa64mmfr0 = 0x0000000000101122ull; + cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; + cpu->isar.id_aa64mmfr2 = 0x0000000000001011ull; + cpu->isar.id_aa64pfr0 = 0x0000000010112222ull; + cpu->isar.id_aa64pfr1 = 0x0000000000000010ull; + cpu->id_afr0 = 0x00000000; + cpu->isar.id_dfr0 = 0x04010088; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00011142; + cpu->isar.id_isar5 = 0x01011121; + cpu->isar.id_isar6 = 0x00000010; + cpu->isar.id_mmfr0 = 0x10201105; + cpu->isar.id_mmfr1 = 0x40000000; + cpu->isar.id_mmfr2 = 0x01260000; + cpu->isar.id_mmfr3 = 0x02122211; + cpu->isar.id_mmfr4 = 0x00021110; + cpu->isar.id_pfr0 = 0x10010131; + cpu->isar.id_pfr1 = 0x00011011; + cpu->isar.id_pfr2 = 0x00000011; + cpu->midr = 0x412FD050; /* r2p0 */ + cpu->revidr = 0; + + /* From B2.23 CCSIDR_EL1 */ + cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */ + cpu->ccsidr[1] = 0x200fe01a; /* 32KB L1 icache */ + cpu->ccsidr[2] = 0x703fe07a; /* 512KB L2 cache */ + + /* From B2.96 SCTLR_EL3 */ + cpu->reset_sctlr = 0x30c50838; + + /* From B4.45 ICH_VTR_EL2 */ + cpu->gic_num_lrs = 4; + cpu->gic_vpribits = 5; + cpu->gic_vprebits = 5; + cpu->gic_pribits = 5; + + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x13211111; + cpu->isar.mvfr2 = 0x00000043; + + /* From D5.4 AArch64 PMU register summary */ + cpu->isar.reset_pmcr_el0 = 0x410b3000; +} + static void aarch64_a72_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); @@ -1186,6 +1254,7 @@ static void aarch64_max_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64MMFR2, FWB, 1); /* FEAT_S2FWB */ t = FIELD_DP64(t, ID_AA64MMFR2, TTL, 1); /* FEAT_TTL */ t = FIELD_DP64(t, ID_AA64MMFR2, BBM, 2); /* FEAT_BBM at level 2 */ + t = FIELD_DP64(t, ID_AA64MMFR2, EVT, 2); /* FEAT_EVT */ t = FIELD_DP64(t, ID_AA64MMFR2, E0PD, 1); /* FEAT_E0PD */ cpu->isar.id_aa64mmfr2 = t; @@ -1243,6 +1312,7 @@ static const ARMCPUInfo aarch64_cpus[] = { { .name = "cortex-a35", .initfn = aarch64_a35_initfn }, { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, + { .name = "cortex-a55", .initfn = aarch64_a55_initfn }, { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, { .name = "cortex-a76", .initfn = aarch64_a76_initfn }, { .name = "a64fx", .initfn = aarch64_a64fx_initfn }, diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c index 9a2cef7d05..568cbcfc52 100644 --- a/target/arm/cpu_tcg.c +++ b/target/arm/cpu_tcg.c @@ -65,6 +65,7 @@ void aa32_max_features(ARMCPU *cpu) t = FIELD_DP32(t, ID_MMFR4, AC2, 1); /* ACTLR2, HACTLR2 */ t = FIELD_DP32(t, ID_MMFR4, CNP, 1); /* FEAT_TTCNP */ t = FIELD_DP32(t, ID_MMFR4, XNX, 1); /* FEAT_XNX */ + t = FIELD_DP32(t, ID_MMFR4, EVT, 2); /* FEAT_EVT */ cpu->isar.id_mmfr4 = t; t = cpu->isar.id_mmfr5; diff --git a/target/arm/helper.c b/target/arm/helper.c index d8c8223ec3..bac2ea62c4 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -362,6 +362,30 @@ static CPAccessResult access_ttlb(CPUARMState *env, const ARMCPRegInfo *ri, return CP_ACCESS_OK; } +/* Check for traps from EL1 due to HCR_EL2.TTLB or TTLBIS. */ +static CPAccessResult access_ttlbis(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) == 1 && + (arm_hcr_el2_eff(env) & (HCR_TTLB | HCR_TTLBIS))) { + return CP_ACCESS_TRAP_EL2; + } + return CP_ACCESS_OK; +} + +#ifdef TARGET_AARCH64 +/* Check for traps from EL1 due to HCR_EL2.TTLB or TTLBOS. */ +static CPAccessResult access_ttlbos(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if (arm_current_el(env) == 1 && + (arm_hcr_el2_eff(env) & (HCR_TTLB | HCR_TTLBOS))) { + return CP_ACCESS_TRAP_EL2; + } + return CP_ACCESS_OK; +} +#endif + static void dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) { ARMCPU *cpu = env_archcpu(env); @@ -1871,11 +1895,12 @@ static void scr_reset(CPUARMState *env, const ARMCPRegInfo *ri) scr_write(env, ri, 0); } -static CPAccessResult access_aa64_tid2(CPUARMState *env, - const ARMCPRegInfo *ri, - bool isread) +static CPAccessResult access_tid4(CPUARMState *env, + const ARMCPRegInfo *ri, + bool isread) { - if (arm_current_el(env) == 1 && (arm_hcr_el2_eff(env) & HCR_TID2)) { + if (arm_current_el(env) == 1 && + (arm_hcr_el2_eff(env) & (HCR_TID2 | HCR_TID4))) { return CP_ACCESS_TRAP_EL2; } @@ -2106,12 +2131,12 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { { .name = "CCSIDR", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 0, .access = PL1_R, - .accessfn = access_aa64_tid2, + .accessfn = access_tid4, .readfn = ccsidr_read, .type = ARM_CP_NO_RAW }, { .name = "CSSELR", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 2, .opc2 = 0, .access = PL1_RW, - .accessfn = access_aa64_tid2, + .accessfn = access_tid4, .writefn = csselr_write, .resetvalue = 0, .bank_fieldoffsets = { offsetof(CPUARMState, cp15.csselr_s), offsetof(CPUARMState, cp15.csselr_ns) } }, @@ -2206,16 +2231,16 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { static const ARMCPRegInfo v7mp_cp_reginfo[] = { /* 32 bit TLB invalidates, Inner Shareable */ { .name = "TLBIALLIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 0, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis, .writefn = tlbiall_is_write }, { .name = "TLBIMVAIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 1, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis, .writefn = tlbimva_is_write }, { .name = "TLBIASIDIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 2, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis, .writefn = tlbiasid_is_write }, { .name = "TLBIMVAAIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 3, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis, .writefn = tlbimvaa_is_write }, }; @@ -4249,9 +4274,7 @@ static CPAccessResult aa64_cacheop_poc_access(CPUARMState *env, return CP_ACCESS_OK; } -static CPAccessResult aa64_cacheop_pou_access(CPUARMState *env, - const ARMCPRegInfo *ri, - bool isread) +static CPAccessResult do_cacheop_pou_access(CPUARMState *env, uint64_t hcrflags) { /* Cache invalidate/clean to Point of Unification... */ switch (arm_current_el(env)) { @@ -4262,8 +4285,8 @@ static CPAccessResult aa64_cacheop_pou_access(CPUARMState *env, } /* fall through */ case 1: - /* ... EL1 must trap to EL2 if HCR_EL2.TPU is set. */ - if (arm_hcr_el2_eff(env) & HCR_TPU) { + /* ... EL1 must trap to EL2 if relevant HCR_EL2 flags are set. */ + if (arm_hcr_el2_eff(env) & hcrflags) { return CP_ACCESS_TRAP_EL2; } break; @@ -4271,6 +4294,18 @@ static CPAccessResult aa64_cacheop_pou_access(CPUARMState *env, return CP_ACCESS_OK; } +static CPAccessResult access_ticab(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + return do_cacheop_pou_access(env, HCR_TICAB | HCR_TPU); +} + +static CPAccessResult access_tocu(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + return do_cacheop_pou_access(env, HCR_TOCU | HCR_TPU); +} + /* See: D4.7.2 TLB maintenance requirements and the TLB maintenance instructions * Page D4-1736 (DDI0487A.b) */ @@ -4911,15 +4946,15 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { { .name = "IC_IALLUIS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0, .access = PL1_W, .type = ARM_CP_NOP, - .accessfn = aa64_cacheop_pou_access }, + .accessfn = access_ticab }, { .name = "IC_IALLU", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 0, .access = PL1_W, .type = ARM_CP_NOP, - .accessfn = aa64_cacheop_pou_access }, + .accessfn = access_tocu }, { .name = "IC_IVAU", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 5, .opc2 = 1, .access = PL0_W, .type = ARM_CP_NOP, - .accessfn = aa64_cacheop_pou_access }, + .accessfn = access_tocu }, { .name = "DC_IVAC", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 1, .access = PL1_W, .accessfn = aa64_cacheop_poc_access, @@ -4937,7 +4972,7 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { { .name = "DC_CVAU", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 11, .opc2 = 1, .access = PL0_W, .type = ARM_CP_NOP, - .accessfn = aa64_cacheop_pou_access }, + .accessfn = access_tocu }, { .name = "DC_CIVAC", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 1, .access = PL0_W, .type = ARM_CP_NOP, @@ -4948,27 +4983,27 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { /* TLBI operations */ { .name = "TLBI_VMALLE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 0, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_vmalle1is_write }, { .name = "TLBI_VAE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 1, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_vae1is_write }, { .name = "TLBI_ASIDE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 2, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_vmalle1is_write }, { .name = "TLBI_VAAE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 3, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_vae1is_write }, { .name = "TLBI_VALE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 5, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_vae1is_write }, { .name = "TLBI_VAALE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 7, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_vae1is_write }, { .name = "TLBI_VMALLE1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 0, @@ -5078,10 +5113,10 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { #endif /* TLB invalidate last level of translation table walk */ { .name = "TLBIMVALIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 5, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis, .writefn = tlbimva_is_write }, { .name = "TLBIMVAALIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 7, - .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, + .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlbis, .writefn = tlbimvaa_is_write }, { .name = "TLBIMVAL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 5, .type = ARM_CP_NO_RAW, .access = PL1_W, .accessfn = access_ttlb, @@ -5114,13 +5149,13 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { .writefn = tlbiipas2is_hyp_write }, /* 32 bit cache operations */ { .name = "ICIALLUIS", .cp = 15, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0, - .type = ARM_CP_NOP, .access = PL1_W, .accessfn = aa64_cacheop_pou_access }, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_ticab }, { .name = "BPIALLUIS", .cp = 15, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 6, .type = ARM_CP_NOP, .access = PL1_W }, { .name = "ICIALLU", .cp = 15, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 0, - .type = ARM_CP_NOP, .access = PL1_W, .accessfn = aa64_cacheop_pou_access }, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tocu }, { .name = "ICIMVAU", .cp = 15, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 1, - .type = ARM_CP_NOP, .access = PL1_W, .accessfn = aa64_cacheop_pou_access }, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tocu }, { .name = "BPIALL", .cp = 15, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 6, .type = ARM_CP_NOP, .access = PL1_W }, { .name = "BPIMVA", .cp = 15, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 7, @@ -5134,7 +5169,7 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { { .name = "DCCSW", .cp = 15, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 2, .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, { .name = "DCCMVAU", .cp = 15, .opc1 = 0, .crn = 7, .crm = 11, .opc2 = 1, - .type = ARM_CP_NOP, .access = PL1_W, .accessfn = aa64_cacheop_pou_access }, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tocu }, { .name = "DCCIMVAC", .cp = 15, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 1, .type = ARM_CP_NOP, .access = PL1_W, .accessfn = aa64_cacheop_poc_access }, { .name = "DCCISW", .cp = 15, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 2, @@ -5267,6 +5302,12 @@ static void do_hcr_write(CPUARMState *env, uint64_t value, uint64_t valid_mask) } } + if (cpu_isar_feature(any_evt, cpu)) { + valid_mask |= HCR_TTLBIS | HCR_TTLBOS | HCR_TICAB | HCR_TOCU | HCR_TID4; + } else if (cpu_isar_feature(any_half_evt, cpu)) { + valid_mask |= HCR_TICAB | HCR_TOCU | HCR_TID4; + } + /* Clear RES0 bits. */ value &= valid_mask; @@ -6720,35 +6761,35 @@ static const ARMCPRegInfo pauth_reginfo[] = { static const ARMCPRegInfo tlbirange_reginfo[] = { { .name = "TLBI_RVAE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 1, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_rvae1is_write }, { .name = "TLBI_RVAAE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 3, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_rvae1is_write }, { .name = "TLBI_RVALE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 5, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_rvae1is_write }, { .name = "TLBI_RVAALE1IS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 2, .opc2 = 7, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbis, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_rvae1is_write }, { .name = "TLBI_RVAE1OS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 1, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_rvae1is_write }, { .name = "TLBI_RVAAE1OS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 3, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_rvae1is_write }, { .name = "TLBI_RVALE1OS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 5, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_rvae1is_write }, { .name = "TLBI_RVAALE1OS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 7, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_rvae1is_write }, { .name = "TLBI_RVAE1", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 1, @@ -6835,27 +6876,27 @@ static const ARMCPRegInfo tlbirange_reginfo[] = { static const ARMCPRegInfo tlbios_reginfo[] = { { .name = "TLBI_VMALLE1OS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 0, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_vmalle1is_write }, { .name = "TLBI_VAE1OS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 1, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_vae1is_write }, { .name = "TLBI_ASIDE1OS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 2, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_vmalle1is_write }, { .name = "TLBI_VAAE1OS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 3, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_vae1is_write }, { .name = "TLBI_VALE1OS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 5, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_vae1is_write }, { .name = "TLBI_VAALE1OS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 1, .opc2 = 7, - .access = PL1_W, .accessfn = access_ttlb, .type = ARM_CP_NO_RAW, + .access = PL1_W, .accessfn = access_ttlbos, .type = ARM_CP_NO_RAW, .writefn = tlbi_aa64_vae1is_write }, { .name = "TLBI_ALLE2OS", .state = ARM_CP_STATE_AA64, .opc0 = 1, .opc1 = 4, .crn = 8, .crm = 1, .opc2 = 0, @@ -7241,7 +7282,7 @@ static const ARMCPRegInfo ccsidr2_reginfo[] = { { .name = "CCSIDR2", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .opc1 = 1, .crn = 0, .crm = 0, .opc2 = 2, .access = PL1_R, - .accessfn = access_aa64_tid2, + .accessfn = access_tid4, .readfn = ccsidr2_read, .type = ARM_CP_NO_RAW }, }; @@ -7541,7 +7582,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) .name = "CLIDR", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST, - .accessfn = access_aa64_tid2, + .accessfn = access_tid4, .resetvalue = cpu->clidr }; define_one_arm_cp_reg(cpu, &clidr); diff --git a/target/arm/kvm-consts.h b/target/arm/kvm-consts.h index faacf96fdc..09967ec5e6 100644 --- a/target/arm/kvm-consts.h +++ b/target/arm/kvm-consts.h @@ -14,16 +14,16 @@ #ifndef ARM_KVM_CONSTS_H #define ARM_KVM_CONSTS_H +#ifdef NEED_CPU_H #ifdef CONFIG_KVM #include <linux/kvm.h> #include <linux/psci.h> - #define MISMATCH_CHECK(X, Y) QEMU_BUILD_BUG_ON(X != Y) +#endif +#endif -#else - +#ifndef MISMATCH_CHECK #define MISMATCH_CHECK(X, Y) QEMU_BUILD_BUG_ON(0) - #endif #define CP_REG_SIZE_SHIFT 52 |