diff options
Diffstat (limited to 'hw')
88 files changed, 2677 insertions, 478 deletions
diff --git a/hw/alpha/dp264.c b/hw/alpha/dp264.c index 0347eb897c..9dfb835013 100644 --- a/hw/alpha/dp264.c +++ b/hw/alpha/dp264.c @@ -63,6 +63,7 @@ static void clipper_init(MachineState *machine) char *palcode_filename; uint64_t palcode_entry, palcode_low, palcode_high; uint64_t kernel_entry, kernel_low, kernel_high; + unsigned int smp_cpus = machine->smp.cpus; /* Create up to 4 cpus. */ memset(cpus, 0, sizeof(cpus)); diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c index 8b6d304247..843b708247 100644 --- a/hw/arm/aspeed.c +++ b/hw/arm/aspeed.c @@ -187,7 +187,7 @@ static void aspeed_board_init(MachineState *machine, &error_abort); object_property_set_int(OBJECT(&bmc->soc), cfg->num_cs, "num-cs", &error_abort); - object_property_set_int(OBJECT(&bmc->soc), smp_cpus, "num-cpus", + object_property_set_int(OBJECT(&bmc->soc), machine->smp.cpus, "num-cpus", &error_abort); if (machine->kernel_filename) { /* diff --git a/hw/arm/fsl-imx6.c b/hw/arm/fsl-imx6.c index 7129517378..de45833097 100644 --- a/hw/arm/fsl-imx6.c +++ b/hw/arm/fsl-imx6.c @@ -22,6 +22,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "hw/arm/fsl-imx6.h" +#include "hw/boards.h" #include "sysemu/sysemu.h" #include "chardev/char.h" #include "qemu/error-report.h" @@ -33,11 +34,12 @@ static void fsl_imx6_init(Object *obj) { + MachineState *ms = MACHINE(qdev_get_machine()); FslIMX6State *s = FSL_IMX6(obj); char name[NAME_SIZE]; int i; - for (i = 0; i < MIN(smp_cpus, FSL_IMX6_NUM_CPUS); i++) { + for (i = 0; i < MIN(ms->smp.cpus, FSL_IMX6_NUM_CPUS); i++) { snprintf(name, NAME_SIZE, "cpu%d", i); object_initialize_child(obj, name, &s->cpu[i], sizeof(s->cpu[i]), "cortex-a9-" TYPE_ARM_CPU, &error_abort, NULL); @@ -93,9 +95,11 @@ static void fsl_imx6_init(Object *obj) static void fsl_imx6_realize(DeviceState *dev, Error **errp) { + MachineState *ms = MACHINE(qdev_get_machine()); FslIMX6State *s = FSL_IMX6(dev); uint16_t i; Error *err = NULL; + unsigned int smp_cpus = ms->smp.cpus; if (smp_cpus > FSL_IMX6_NUM_CPUS) { error_setg(errp, "%s: Only %d CPUs are supported (%d requested)", diff --git a/hw/arm/fsl-imx6ul.c b/hw/arm/fsl-imx6ul.c index 05505bac56..f860165438 100644 --- a/hw/arm/fsl-imx6ul.c +++ b/hw/arm/fsl-imx6ul.c @@ -20,6 +20,7 @@ #include "qapi/error.h" #include "hw/arm/fsl-imx6ul.h" #include "hw/misc/unimp.h" +#include "hw/boards.h" #include "sysemu/sysemu.h" #include "qemu/error-report.h" #include "qemu/module.h" @@ -28,11 +29,12 @@ static void fsl_imx6ul_init(Object *obj) { + MachineState *ms = MACHINE(qdev_get_machine()); FslIMX6ULState *s = FSL_IMX6UL(obj); char name[NAME_SIZE]; int i; - for (i = 0; i < MIN(smp_cpus, FSL_IMX6UL_NUM_CPUS); i++) { + for (i = 0; i < MIN(ms->smp.cpus, FSL_IMX6UL_NUM_CPUS); i++) { snprintf(name, NAME_SIZE, "cpu%d", i); object_initialize_child(obj, name, &s->cpu[i], sizeof(s->cpu[i]), "cortex-a7-" TYPE_ARM_CPU, &error_abort, NULL); @@ -156,10 +158,12 @@ static void fsl_imx6ul_init(Object *obj) static void fsl_imx6ul_realize(DeviceState *dev, Error **errp) { + MachineState *ms = MACHINE(qdev_get_machine()); FslIMX6ULState *s = FSL_IMX6UL(dev); int i; qemu_irq irq; char name[NAME_SIZE]; + unsigned int smp_cpus = ms->smp.cpus; if (smp_cpus > FSL_IMX6UL_NUM_CPUS) { error_setg(errp, "%s: Only %d CPUs are supported (%d requested)", diff --git a/hw/arm/fsl-imx7.c b/hw/arm/fsl-imx7.c index 2eddf3f25c..119b281a50 100644 --- a/hw/arm/fsl-imx7.c +++ b/hw/arm/fsl-imx7.c @@ -22,6 +22,7 @@ #include "qapi/error.h" #include "hw/arm/fsl-imx7.h" #include "hw/misc/unimp.h" +#include "hw/boards.h" #include "sysemu/sysemu.h" #include "qemu/error-report.h" #include "qemu/module.h" @@ -30,12 +31,12 @@ static void fsl_imx7_init(Object *obj) { + MachineState *ms = MACHINE(qdev_get_machine()); FslIMX7State *s = FSL_IMX7(obj); char name[NAME_SIZE]; int i; - - for (i = 0; i < MIN(smp_cpus, FSL_IMX7_NUM_CPUS); i++) { + for (i = 0; i < MIN(ms->smp.cpus, FSL_IMX7_NUM_CPUS); i++) { snprintf(name, NAME_SIZE, "cpu%d", i); object_initialize_child(obj, name, &s->cpu[i], sizeof(s->cpu[i]), ARM_CPU_TYPE_NAME("cortex-a7"), &error_abort, @@ -155,11 +156,13 @@ static void fsl_imx7_init(Object *obj) static void fsl_imx7_realize(DeviceState *dev, Error **errp) { + MachineState *ms = MACHINE(qdev_get_machine()); FslIMX7State *s = FSL_IMX7(dev); Object *o; int i; qemu_irq irq; char name[NAME_SIZE]; + unsigned int smp_cpus = ms->smp.cpus; if (smp_cpus > FSL_IMX7_NUM_CPUS) { error_setg(errp, "%s: Only %d CPUs are supported (%d requested)", diff --git a/hw/arm/highbank.c b/hw/arm/highbank.c index 72ca78108a..def0f1ce6a 100644 --- a/hw/arm/highbank.c +++ b/hw/arm/highbank.c @@ -241,6 +241,7 @@ static void calxeda_init(MachineState *machine, enum cxmachines machine_id) SysBusDevice *busdev; qemu_irq pic[128]; int n; + unsigned int smp_cpus = machine->smp.cpus; qemu_irq cpu_irq[4]; qemu_irq cpu_fiq[4]; qemu_irq cpu_virq[4]; diff --git a/hw/arm/mcimx6ul-evk.c b/hw/arm/mcimx6ul-evk.c index 31511059e4..bbffb11c2a 100644 --- a/hw/arm/mcimx6ul-evk.c +++ b/hw/arm/mcimx6ul-evk.c @@ -42,7 +42,7 @@ static void mcimx6ul_evk_init(MachineState *machine) .kernel_filename = machine->kernel_filename, .kernel_cmdline = machine->kernel_cmdline, .initrd_filename = machine->initrd_filename, - .nb_cpus = smp_cpus, + .nb_cpus = machine->smp.cpus, }; object_initialize_child(OBJECT(machine), "soc", &s->soc, sizeof(s->soc), diff --git a/hw/arm/mcimx7d-sabre.c b/hw/arm/mcimx7d-sabre.c index d6b190d85d..72eab03a0c 100644 --- a/hw/arm/mcimx7d-sabre.c +++ b/hw/arm/mcimx7d-sabre.c @@ -45,7 +45,7 @@ static void mcimx7d_sabre_init(MachineState *machine) .kernel_filename = machine->kernel_filename, .kernel_cmdline = machine->kernel_cmdline, .initrd_filename = machine->initrd_filename, - .nb_cpus = smp_cpus, + .nb_cpus = machine->smp.cpus, }; object_initialize(&s->soc, sizeof(s->soc), TYPE_FSL_IMX7); diff --git a/hw/arm/raspi.c b/hw/arm/raspi.c index cb23330940..5b2620acb4 100644 --- a/hw/arm/raspi.c +++ b/hw/arm/raspi.c @@ -116,7 +116,7 @@ static void setup_boot(MachineState *machine, int version, size_t ram_size) binfo.board_id = raspi_boardid[version]; binfo.ram_size = ram_size; - binfo.nb_cpus = smp_cpus; + binfo.nb_cpus = machine->smp.cpus; if (version <= 2) { /* The rpi1 and 2 require some custom setup code to run in Secure @@ -194,7 +194,7 @@ static void raspi_init(MachineState *machine, int version) /* Setup the SOC */ object_property_add_const_link(OBJECT(&s->soc), "ram", OBJECT(&s->ram), &error_abort); - object_property_set_int(OBJECT(&s->soc), smp_cpus, "enabled-cpus", + object_property_set_int(OBJECT(&s->soc), machine->smp.cpus, "enabled-cpus", &error_abort); int board_rev = version == 3 ? 0xa02082 : 0xa21041; object_property_set_int(OBJECT(&s->soc), board_rev, "board-rev", diff --git a/hw/arm/realview.c b/hw/arm/realview.c index 12d6e93a35..7c56c8d2ed 100644 --- a/hw/arm/realview.c +++ b/hw/arm/realview.c @@ -69,6 +69,7 @@ static void realview_init(MachineState *machine, NICInfo *nd; I2CBus *i2c; int n; + unsigned int smp_cpus = machine->smp.cpus; int done_nic = 0; qemu_irq cpu_irq[4]; int is_mpcore = 0; diff --git a/hw/arm/sabrelite.c b/hw/arm/sabrelite.c index 97230ac827..934f4c9261 100644 --- a/hw/arm/sabrelite.c +++ b/hw/arm/sabrelite.c @@ -105,7 +105,7 @@ static void sabrelite_init(MachineState *machine) sabrelite_binfo.kernel_filename = machine->kernel_filename; sabrelite_binfo.kernel_cmdline = machine->kernel_cmdline; sabrelite_binfo.initrd_filename = machine->initrd_filename; - sabrelite_binfo.nb_cpus = smp_cpus; + sabrelite_binfo.nb_cpus = machine->smp.cpus; sabrelite_binfo.secure_boot = true; sabrelite_binfo.write_secondary_boot = sabrelite_write_secondary; sabrelite_binfo.secondary_cpu_reset_hook = sabrelite_reset_secondary; diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c index ee53f0ff60..9c67d5c6f9 100644 --- a/hw/arm/sbsa-ref.c +++ b/hw/arm/sbsa-ref.c @@ -254,8 +254,6 @@ static void sbsa_flash_map(SBSAMachineState *sms, * sysmem is the system memory space. secure_sysmem is the secure view * of the system, and the first flash device should be made visible only * there. The second flash device is visible to both secure and nonsecure. - * If sysmem == secure_sysmem this means there is no separate Secure - * address space and both flash devices are generally visible. */ hwaddr flashsize = sbsa_ref_memmap[SBSA_FLASH].size / 2; hwaddr flashbase = sbsa_ref_memmap[SBSA_FLASH].base; @@ -328,6 +326,7 @@ static void create_secure_ram(SBSAMachineState *sms, static void create_gic(SBSAMachineState *sms, qemu_irq *pic) { + unsigned int smp_cpus = MACHINE(sms)->smp.cpus; DeviceState *gicdev; SysBusDevice *gicbusdev; const char *gictype; @@ -585,10 +584,12 @@ static void *sbsa_ref_dtb(const struct arm_boot_info *binfo, int *fdt_size) static void sbsa_ref_init(MachineState *machine) { + unsigned int smp_cpus = machine->smp.cpus; + unsigned int max_cpus = machine->smp.max_cpus; SBSAMachineState *sms = SBSA_MACHINE(machine); MachineClass *mc = MACHINE_GET_CLASS(machine); MemoryRegion *sysmem = get_system_memory(); - MemoryRegion *secure_sysmem = NULL; + MemoryRegion *secure_sysmem = g_new(MemoryRegion, 1); MemoryRegion *ram = g_new(MemoryRegion, 1); bool firmware_loaded; const CPUArchIdList *possible_cpus; @@ -612,13 +613,11 @@ static void sbsa_ref_init(MachineState *machine) * containing the system memory at low priority; any secure-only * devices go in at higher priority and take precedence. */ - secure_sysmem = g_new(MemoryRegion, 1); memory_region_init(secure_sysmem, OBJECT(machine), "secure-memory", UINT64_MAX); memory_region_add_subregion_overlap(secure_sysmem, 0, sysmem, -1); - firmware_loaded = sbsa_firmware_init(sms, sysmem, - secure_sysmem ?: sysmem); + firmware_loaded = sbsa_firmware_init(sms, sysmem, secure_sysmem); if (machine->kernel_filename && firmware_loaded) { error_report("sbsa-ref: No fw_cfg device on this machine, " @@ -727,6 +726,7 @@ static uint64_t sbsa_ref_cpu_mp_affinity(SBSAMachineState *sms, int idx) static const CPUArchIdList *sbsa_ref_possible_cpu_arch_ids(MachineState *ms) { + unsigned int max_cpus = ms->smp.max_cpus; SBSAMachineState *sms = SBSA_MACHINE(ms); int n; diff --git a/hw/arm/vexpress.c b/hw/arm/vexpress.c index 2b3b0c2334..5d932c27c0 100644 --- a/hw/arm/vexpress.c +++ b/hw/arm/vexpress.c @@ -203,12 +203,14 @@ struct VEDBoardInfo { DBoardInitFn *init; }; -static void init_cpus(const char *cpu_type, const char *privdev, - hwaddr periphbase, qemu_irq *pic, bool secure, bool virt) +static void init_cpus(MachineState *ms, const char *cpu_type, + const char *privdev, hwaddr periphbase, + qemu_irq *pic, bool secure, bool virt) { DeviceState *dev; SysBusDevice *busdev; int n; + unsigned int smp_cpus = ms->smp.cpus; /* Create the actual CPUs */ for (n = 0; n < smp_cpus; n++) { @@ -269,6 +271,7 @@ static void a9_daughterboard_init(const VexpressMachineState *vms, const char *cpu_type, qemu_irq *pic) { + MachineState *machine = MACHINE(vms); MemoryRegion *sysmem = get_system_memory(); MemoryRegion *ram = g_new(MemoryRegion, 1); MemoryRegion *lowram = g_new(MemoryRegion, 1); @@ -295,7 +298,7 @@ static void a9_daughterboard_init(const VexpressMachineState *vms, memory_region_add_subregion(sysmem, 0x60000000, ram); /* 0x1e000000 A9MPCore (SCU) private memory region */ - init_cpus(cpu_type, TYPE_A9MPCORE_PRIV, 0x1e000000, pic, + init_cpus(machine, cpu_type, TYPE_A9MPCORE_PRIV, 0x1e000000, pic, vms->secure, vms->virt); /* Daughterboard peripherals : 0x10020000 .. 0x20000000 */ @@ -355,6 +358,7 @@ static void a15_daughterboard_init(const VexpressMachineState *vms, const char *cpu_type, qemu_irq *pic) { + MachineState *machine = MACHINE(vms); MemoryRegion *sysmem = get_system_memory(); MemoryRegion *ram = g_new(MemoryRegion, 1); MemoryRegion *sram = g_new(MemoryRegion, 1); @@ -377,8 +381,8 @@ static void a15_daughterboard_init(const VexpressMachineState *vms, memory_region_add_subregion(sysmem, 0x80000000, ram); /* 0x2c000000 A15MPCore private memory region (GIC) */ - init_cpus(cpu_type, TYPE_A15MPCORE_PRIV, 0x2c000000, pic, vms->secure, - vms->virt); + init_cpus(machine, cpu_type, TYPE_A15MPCORE_PRIV, + 0x2c000000, pic, vms->secure, vms->virt); /* A15 daughterboard peripherals: */ @@ -706,7 +710,7 @@ static void vexpress_common_init(MachineState *machine) daughterboard->bootinfo.kernel_filename = machine->kernel_filename; daughterboard->bootinfo.kernel_cmdline = machine->kernel_cmdline; daughterboard->bootinfo.initrd_filename = machine->initrd_filename; - daughterboard->bootinfo.nb_cpus = smp_cpus; + daughterboard->bootinfo.nb_cpus = machine->smp.cpus; daughterboard->bootinfo.board_id = VEXPRESS_BOARD_ID; daughterboard->bootinfo.loader_start = daughterboard->loader_start; daughterboard->bootinfo.smp_loader_start = map[VE_SRAM]; diff --git a/hw/arm/virt.c b/hw/arm/virt.c index ed009fa447..0b5138cb22 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -559,11 +559,13 @@ static void create_v2m(VirtMachineState *vms, qemu_irq *pic) static void create_gic(VirtMachineState *vms, qemu_irq *pic) { + MachineState *ms = MACHINE(vms); /* We create a standalone GIC */ DeviceState *gicdev; SysBusDevice *gicbusdev; const char *gictype; int type = vms->gic_version, i; + unsigned int smp_cpus = ms->smp.cpus; uint32_t nb_redist_regions = 0; gictype = (type == 3) ? gicv3_class_name() : gic_class_name(); @@ -1039,13 +1041,14 @@ static bool virt_firmware_init(VirtMachineState *vms, static FWCfgState *create_fw_cfg(const VirtMachineState *vms, AddressSpace *as) { + MachineState *ms = MACHINE(vms); hwaddr base = vms->memmap[VIRT_FW_CFG].base; hwaddr size = vms->memmap[VIRT_FW_CFG].size; FWCfgState *fw_cfg; char *nodename; fw_cfg = fw_cfg_init_mem_wide(base + 8, base, 8, base + 16, as); - fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)smp_cpus); + fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)ms->smp.cpus); nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base); qemu_fdt_add_subnode(vms->fdt, nodename); @@ -1345,7 +1348,7 @@ static void virt_build_smbios(VirtMachineState *vms) vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, true, SMBIOS_ENTRY_POINT_30); - smbios_get_tables(NULL, 0, &smbios_tables, &smbios_tables_len, + smbios_get_tables(MACHINE(vms), NULL, 0, &smbios_tables, &smbios_tables_len, &smbios_anchor, &smbios_anchor_len); if (smbios_anchor) { @@ -1478,6 +1481,8 @@ static void machvirt_init(MachineState *machine) MemoryRegion *ram = g_new(MemoryRegion, 1); bool firmware_loaded; bool aarch64 = true; + unsigned int smp_cpus = machine->smp.cpus; + unsigned int max_cpus = machine->smp.max_cpus; /* * In accelerated mode, the memory map is computed earlier in kvm_type() @@ -1845,6 +1850,7 @@ static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx) static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) { int n; + unsigned int max_cpus = ms->smp.max_cpus; VirtMachineState *vms = VIRT_MACHINE(ms); if (ms->possible_cpus) { @@ -1946,6 +1952,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) assert(!mc->get_hotplug_handler); mc->get_hotplug_handler = virt_machine_get_hotplug_handler; hc->plug = virt_machine_device_plug_cb; + mc->numa_mem_supported = true; } static void virt_instance_init(Object *obj) diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c index a1ca9b5adf..a60830d37a 100644 --- a/hw/arm/xlnx-zynqmp.c +++ b/hw/arm/xlnx-zynqmp.c @@ -21,6 +21,7 @@ #include "cpu.h" #include "hw/arm/xlnx-zynqmp.h" #include "hw/intc/arm_gic_common.h" +#include "hw/boards.h" #include "exec/address-spaces.h" #include "sysemu/kvm.h" #include "kvm_arm.h" @@ -171,12 +172,13 @@ static inline int arm_gic_ppi_index(int cpu_nr, int ppi_index) return GIC_NUM_SPI_INTR + cpu_nr * GIC_INTERNAL + ppi_index; } -static void xlnx_zynqmp_create_rpu(XlnxZynqMPState *s, const char *boot_cpu, - Error **errp) +static void xlnx_zynqmp_create_rpu(MachineState *ms, XlnxZynqMPState *s, + const char *boot_cpu, Error **errp) { Error *err = NULL; int i; - int num_rpus = MIN(smp_cpus - XLNX_ZYNQMP_NUM_APU_CPUS, XLNX_ZYNQMP_NUM_RPU_CPUS); + int num_rpus = MIN(ms->smp.cpus - XLNX_ZYNQMP_NUM_APU_CPUS, + XLNX_ZYNQMP_NUM_RPU_CPUS); if (num_rpus <= 0) { /* Don't create rpu-cluster object if there's nothing to put in it */ @@ -221,9 +223,10 @@ static void xlnx_zynqmp_create_rpu(XlnxZynqMPState *s, const char *boot_cpu, static void xlnx_zynqmp_init(Object *obj) { + MachineState *ms = MACHINE(qdev_get_machine()); XlnxZynqMPState *s = XLNX_ZYNQMP(obj); int i; - int num_apus = MIN(smp_cpus, XLNX_ZYNQMP_NUM_APU_CPUS); + int num_apus = MIN(ms->smp.cpus, XLNX_ZYNQMP_NUM_APU_CPUS); object_initialize_child(obj, "apu-cluster", &s->apu_cluster, sizeof(s->apu_cluster), TYPE_CPU_CLUSTER, @@ -290,11 +293,12 @@ static void xlnx_zynqmp_init(Object *obj) static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp) { + MachineState *ms = MACHINE(qdev_get_machine()); XlnxZynqMPState *s = XLNX_ZYNQMP(dev); MemoryRegion *system_memory = get_system_memory(); uint8_t i; uint64_t ram_size; - int num_apus = MIN(smp_cpus, XLNX_ZYNQMP_NUM_APU_CPUS); + int num_apus = MIN(ms->smp.cpus, XLNX_ZYNQMP_NUM_APU_CPUS); const char *boot_cpu = s->boot_cpu ? s->boot_cpu : "apu-cpu[0]"; ram_addr_t ddr_low_size, ddr_high_size; qemu_irq gic_spi[GIC_NUM_SPI_INTR]; @@ -456,7 +460,7 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp) "RPUs just use -smp 6."); } - xlnx_zynqmp_create_rpu(s, boot_cpu, &err); + xlnx_zynqmp_create_rpu(ms, s, boot_cpu, &err); if (err) { error_propagate(errp, err); return; diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 9cb61336a6..85bc4017e7 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -191,7 +191,7 @@ static void vhost_user_blk_stop(VirtIODevice *vdev) static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserBlk *s = VHOST_USER_BLK(vdev); - bool should_start = vdev->started; + bool should_start = virtio_device_started(vdev, status); int ret; if (!vdev->vm_running) { @@ -317,7 +317,7 @@ static int vhost_user_blk_connect(DeviceState *dev) } /* restore vhost state */ - if (vdev->started) { + if (virtio_device_started(vdev, vdev->status)) { ret = vhost_user_blk_start(vdev); if (ret < 0) { error_report("vhost-user-blk: vhost start failed: %s", diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c index 8f224ef81d..69d73196e2 100644 --- a/hw/block/xen-block.c +++ b/hw/block/xen-block.c @@ -11,7 +11,7 @@ #include "qemu/option.h" #include "qapi/error.h" #include "qapi/qapi-commands-block-core.h" -#include "qapi/qapi-commands-misc.h" +#include "qapi/qapi-commands-qom.h" #include "qapi/qapi-visit-block-core.h" #include "qapi/qobject-input-visitor.h" #include "qapi/visitor.h" diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs index a799c83815..f8481d959f 100644 --- a/hw/core/Makefile.objs +++ b/hw/core/Makefile.objs @@ -7,6 +7,7 @@ common-obj-$(CONFIG_SOFTMMU) += fw-path-provider.o common-obj-y += irq.o common-obj-y += hotplug.o common-obj-$(CONFIG_SOFTMMU) += nmi.o +common-obj-$(CONFIG_SOFTMMU) += vm-change-state-handler.o common-obj-$(CONFIG_EMPTY_SLOT) += empty_slot.o common-obj-$(CONFIG_XILINX_AXI) += stream.o @@ -22,3 +23,7 @@ common-obj-$(CONFIG_SOFTMMU) += split-irq.o common-obj-$(CONFIG_PLATFORM_BUS) += platform-bus.o common-obj-$(CONFIG_SOFTMMU) += generic-loader.o common-obj-$(CONFIG_SOFTMMU) += null-machine.o + +obj-$(CONFIG_SOFTMMU) += machine-qmp-cmds.o +obj-$(CONFIG_SOFTMMU) += numa.o +common-obj-$(CONFIG_SOFTMMU) += machine-hmp-cmds.o diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c new file mode 100644 index 0000000000..1f66bda346 --- /dev/null +++ b/hw/core/machine-hmp-cmds.c @@ -0,0 +1,167 @@ +/* + * HMP commands related to machines and CPUs + * + * Copyright IBM, Corp. 2011 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "qemu/osdep.h" +#include "monitor/hmp.h" +#include "monitor/monitor.h" +#include "qapi/error.h" +#include "qapi/qapi-builtin-visit.h" +#include "qapi/qapi-commands-machine.h" +#include "qapi/qmp/qdict.h" +#include "qapi/string-output-visitor.h" +#include "qemu/error-report.h" +#include "sysemu/numa.h" + +void hmp_info_cpus(Monitor *mon, const QDict *qdict) +{ + CpuInfoFastList *cpu_list, *cpu; + + cpu_list = qmp_query_cpus_fast(NULL); + + for (cpu = cpu_list; cpu; cpu = cpu->next) { + int active = ' '; + + if (cpu->value->cpu_index == monitor_get_cpu_index()) { + active = '*'; + } + + monitor_printf(mon, "%c CPU #%" PRId64 ":", active, + cpu->value->cpu_index); + monitor_printf(mon, " thread_id=%" PRId64 "\n", cpu->value->thread_id); + } + + qapi_free_CpuInfoFastList(cpu_list); +} + +void hmp_cpu_add(Monitor *mon, const QDict *qdict) +{ + int cpuid; + Error *err = NULL; + + error_report("cpu_add is deprecated, please use device_add instead"); + + cpuid = qdict_get_int(qdict, "id"); + qmp_cpu_add(cpuid, &err); + hmp_handle_error(mon, &err); +} + +void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict) +{ + Error *err = NULL; + HotpluggableCPUList *l = qmp_query_hotpluggable_cpus(&err); + HotpluggableCPUList *saved = l; + CpuInstanceProperties *c; + + if (err != NULL) { + hmp_handle_error(mon, &err); + return; + } + + monitor_printf(mon, "Hotpluggable CPUs:\n"); + while (l) { + monitor_printf(mon, " type: \"%s\"\n", l->value->type); + monitor_printf(mon, " vcpus_count: \"%" PRIu64 "\"\n", + l->value->vcpus_count); + if (l->value->has_qom_path) { + monitor_printf(mon, " qom_path: \"%s\"\n", l->value->qom_path); + } + + c = l->value->props; + monitor_printf(mon, " CPUInstance Properties:\n"); + if (c->has_node_id) { + monitor_printf(mon, " node-id: \"%" PRIu64 "\"\n", c->node_id); + } + if (c->has_socket_id) { + monitor_printf(mon, " socket-id: \"%" PRIu64 "\"\n", c->socket_id); + } + if (c->has_die_id) { + monitor_printf(mon, " die-id: \"%" PRIu64 "\"\n", c->die_id); + } + if (c->has_core_id) { + monitor_printf(mon, " core-id: \"%" PRIu64 "\"\n", c->core_id); + } + if (c->has_thread_id) { + monitor_printf(mon, " thread-id: \"%" PRIu64 "\"\n", c->thread_id); + } + + l = l->next; + } + + qapi_free_HotpluggableCPUList(saved); +} + +void hmp_info_memdev(Monitor *mon, const QDict *qdict) +{ + Error *err = NULL; + MemdevList *memdev_list = qmp_query_memdev(&err); + MemdevList *m = memdev_list; + Visitor *v; + char *str; + + while (m) { + v = string_output_visitor_new(false, &str); + visit_type_uint16List(v, NULL, &m->value->host_nodes, NULL); + monitor_printf(mon, "memory backend: %s\n", m->value->id); + monitor_printf(mon, " size: %" PRId64 "\n", m->value->size); + monitor_printf(mon, " merge: %s\n", + m->value->merge ? "true" : "false"); + monitor_printf(mon, " dump: %s\n", + m->value->dump ? "true" : "false"); + monitor_printf(mon, " prealloc: %s\n", + m->value->prealloc ? "true" : "false"); + monitor_printf(mon, " policy: %s\n", + HostMemPolicy_str(m->value->policy)); + visit_complete(v, &str); + monitor_printf(mon, " host nodes: %s\n", str); + + g_free(str); + visit_free(v); + m = m->next; + } + + monitor_printf(mon, "\n"); + + qapi_free_MemdevList(memdev_list); + hmp_handle_error(mon, &err); +} + +void hmp_info_numa(Monitor *mon, const QDict *qdict) +{ + int i; + NumaNodeMem *node_mem; + CpuInfoList *cpu_list, *cpu; + + cpu_list = qmp_query_cpus(&error_abort); + node_mem = g_new0(NumaNodeMem, nb_numa_nodes); + + query_numa_node_mem(node_mem); + monitor_printf(mon, "%d nodes\n", nb_numa_nodes); + for (i = 0; i < nb_numa_nodes; i++) { + monitor_printf(mon, "node %d cpus:", i); + for (cpu = cpu_list; cpu; cpu = cpu->next) { + if (cpu->value->has_props && cpu->value->props->has_node_id && + cpu->value->props->node_id == i) { + monitor_printf(mon, " %" PRIi64, cpu->value->CPU); + } + } + monitor_printf(mon, "\n"); + monitor_printf(mon, "node %d size: %" PRId64 " MB\n", i, + node_mem[i].node_mem >> 20); + monitor_printf(mon, "node %d plugged: %" PRId64 " MB\n", i, + node_mem[i].node_plugged_mem >> 20); + } + qapi_free_CpuInfoList(cpu_list); + g_free(node_mem); +} diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c new file mode 100644 index 0000000000..5bd95b8ab0 --- /dev/null +++ b/hw/core/machine-qmp-cmds.c @@ -0,0 +1,330 @@ +/* + * QMP commands related to machines and CPUs + * + * Copyright (C) 2014 Red Hat Inc + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "hw/boards.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-machine.h" +#include "qapi/qmp/qerror.h" +#include "sysemu/hostmem.h" +#include "sysemu/hw_accel.h" +#include "sysemu/numa.h" +#include "sysemu/sysemu.h" + +CpuInfoList *qmp_query_cpus(Error **errp) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + MachineClass *mc = MACHINE_GET_CLASS(ms); + CpuInfoList *head = NULL, *cur_item = NULL; + CPUState *cpu; + + CPU_FOREACH(cpu) { + CpuInfoList *info; +#if defined(TARGET_I386) + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; +#elif defined(TARGET_PPC) + PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu); + CPUPPCState *env = &ppc_cpu->env; +#elif defined(TARGET_SPARC) + SPARCCPU *sparc_cpu = SPARC_CPU(cpu); + CPUSPARCState *env = &sparc_cpu->env; +#elif defined(TARGET_RISCV) + RISCVCPU *riscv_cpu = RISCV_CPU(cpu); + CPURISCVState *env = &riscv_cpu->env; +#elif defined(TARGET_MIPS) + MIPSCPU *mips_cpu = MIPS_CPU(cpu); + CPUMIPSState *env = &mips_cpu->env; +#elif defined(TARGET_TRICORE) + TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu); + CPUTriCoreState *env = &tricore_cpu->env; +#elif defined(TARGET_S390X) + S390CPU *s390_cpu = S390_CPU(cpu); + CPUS390XState *env = &s390_cpu->env; +#endif + + cpu_synchronize_state(cpu); + + info = g_malloc0(sizeof(*info)); + info->value = g_malloc0(sizeof(*info->value)); + info->value->CPU = cpu->cpu_index; + info->value->current = (cpu == first_cpu); + info->value->halted = cpu->halted; + info->value->qom_path = object_get_canonical_path(OBJECT(cpu)); + info->value->thread_id = cpu->thread_id; +#if defined(TARGET_I386) + info->value->arch = CPU_INFO_ARCH_X86; + info->value->u.x86.pc = env->eip + env->segs[R_CS].base; +#elif defined(TARGET_PPC) + info->value->arch = CPU_INFO_ARCH_PPC; + info->value->u.ppc.nip = env->nip; +#elif defined(TARGET_SPARC) + info->value->arch = CPU_INFO_ARCH_SPARC; + info->value->u.q_sparc.pc = env->pc; + info->value->u.q_sparc.npc = env->npc; +#elif defined(TARGET_MIPS) + info->value->arch = CPU_INFO_ARCH_MIPS; + info->value->u.q_mips.PC = env->active_tc.PC; +#elif defined(TARGET_TRICORE) + info->value->arch = CPU_INFO_ARCH_TRICORE; + info->value->u.tricore.PC = env->PC; +#elif defined(TARGET_S390X) + info->value->arch = CPU_INFO_ARCH_S390; + info->value->u.s390.cpu_state = env->cpu_state; +#elif defined(TARGET_RISCV) + info->value->arch = CPU_INFO_ARCH_RISCV; + info->value->u.riscv.pc = env->pc; +#else + info->value->arch = CPU_INFO_ARCH_OTHER; +#endif + info->value->has_props = !!mc->cpu_index_to_instance_props; + if (info->value->has_props) { + CpuInstanceProperties *props; + props = g_malloc0(sizeof(*props)); + *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index); + info->value->props = props; + } + + /* XXX: waiting for the qapi to support GSList */ + if (!cur_item) { + head = cur_item = info; + } else { + cur_item->next = info; + cur_item = info; + } + } + + return head; +} + +static CpuInfoArch sysemu_target_to_cpuinfo_arch(SysEmuTarget target) +{ + /* + * The @SysEmuTarget -> @CpuInfoArch mapping below is based on the + * TARGET_ARCH -> TARGET_BASE_ARCH mapping in the "configure" script. + */ + switch (target) { + case SYS_EMU_TARGET_I386: + case SYS_EMU_TARGET_X86_64: + return CPU_INFO_ARCH_X86; + + case SYS_EMU_TARGET_PPC: + case SYS_EMU_TARGET_PPC64: + return CPU_INFO_ARCH_PPC; + + case SYS_EMU_TARGET_SPARC: + case SYS_EMU_TARGET_SPARC64: + return CPU_INFO_ARCH_SPARC; + + case SYS_EMU_TARGET_MIPS: + case SYS_EMU_TARGET_MIPSEL: + case SYS_EMU_TARGET_MIPS64: + case SYS_EMU_TARGET_MIPS64EL: + return CPU_INFO_ARCH_MIPS; + + case SYS_EMU_TARGET_TRICORE: + return CPU_INFO_ARCH_TRICORE; + + case SYS_EMU_TARGET_S390X: + return CPU_INFO_ARCH_S390; + + case SYS_EMU_TARGET_RISCV32: + case SYS_EMU_TARGET_RISCV64: + return CPU_INFO_ARCH_RISCV; + + default: + return CPU_INFO_ARCH_OTHER; + } +} + +static void cpustate_to_cpuinfo_s390(CpuInfoS390 *info, const CPUState *cpu) +{ +#ifdef TARGET_S390X + S390CPU *s390_cpu = S390_CPU(cpu); + CPUS390XState *env = &s390_cpu->env; + + info->cpu_state = env->cpu_state; +#else + abort(); +#endif +} + +/* + * fast means: we NEVER interrupt vCPU threads to retrieve + * information from KVM. + */ +CpuInfoFastList *qmp_query_cpus_fast(Error **errp) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + MachineClass *mc = MACHINE_GET_CLASS(ms); + CpuInfoFastList *head = NULL, *cur_item = NULL; + SysEmuTarget target = qapi_enum_parse(&SysEmuTarget_lookup, TARGET_NAME, + -1, &error_abort); + CPUState *cpu; + + CPU_FOREACH(cpu) { + CpuInfoFastList *info = g_malloc0(sizeof(*info)); + info->value = g_malloc0(sizeof(*info->value)); + + info->value->cpu_index = cpu->cpu_index; + info->value->qom_path = object_get_canonical_path(OBJECT(cpu)); + info->value->thread_id = cpu->thread_id; + + info->value->has_props = !!mc->cpu_index_to_instance_props; + if (info->value->has_props) { + CpuInstanceProperties *props; + props = g_malloc0(sizeof(*props)); + *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index); + info->value->props = props; + } + + info->value->arch = sysemu_target_to_cpuinfo_arch(target); + info->value->target = target; + if (target == SYS_EMU_TARGET_S390X) { + cpustate_to_cpuinfo_s390(&info->value->u.s390x, cpu); + } + + if (!cur_item) { + head = cur_item = info; + } else { + cur_item->next = info; + cur_item = info; + } + } + + return head; +} + +MachineInfoList *qmp_query_machines(Error **errp) +{ + GSList *el, *machines = object_class_get_list(TYPE_MACHINE, false); + MachineInfoList *mach_list = NULL; + + for (el = machines; el; el = el->next) { + MachineClass *mc = el->data; + MachineInfoList *entry; + MachineInfo *info; + + info = g_malloc0(sizeof(*info)); + if (mc->is_default) { + info->has_is_default = true; + info->is_default = true; + } + + if (mc->alias) { + info->has_alias = true; + info->alias = g_strdup(mc->alias); + } + + info->name = g_strdup(mc->name); + info->cpu_max = !mc->max_cpus ? 1 : mc->max_cpus; + info->hotpluggable_cpus = mc->has_hotpluggable_cpus; + info->numa_mem_supported = mc->numa_mem_supported; + info->deprecated = !!mc->deprecation_reason; + + entry = g_malloc0(sizeof(*entry)); + entry->value = info; + entry->next = mach_list; + mach_list = entry; + } + + g_slist_free(machines); + return mach_list; +} + +CurrentMachineParams *qmp_query_current_machine(Error **errp) +{ + CurrentMachineParams *params = g_malloc0(sizeof(*params)); + params->wakeup_suspend_support = qemu_wakeup_suspend_enabled(); + + return params; +} + +HotpluggableCPUList *qmp_query_hotpluggable_cpus(Error **errp) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + MachineClass *mc = MACHINE_GET_CLASS(ms); + + if (!mc->has_hotpluggable_cpus) { + error_setg(errp, QERR_FEATURE_DISABLED, "query-hotpluggable-cpus"); + return NULL; + } + + return machine_query_hotpluggable_cpus(ms); +} + +void qmp_cpu_add(int64_t id, Error **errp) +{ + MachineClass *mc; + + mc = MACHINE_GET_CLASS(current_machine); + if (mc->hot_add_cpu) { + mc->hot_add_cpu(current_machine, id, errp); + } else { + error_setg(errp, "Not supported"); + } +} + +void qmp_set_numa_node(NumaOptions *cmd, Error **errp) +{ + if (!runstate_check(RUN_STATE_PRECONFIG)) { + error_setg(errp, "The command is permitted only in '%s' state", + RunState_str(RUN_STATE_PRECONFIG)); + return; + } + + set_numa_options(MACHINE(qdev_get_machine()), cmd, errp); +} + +static int query_memdev(Object *obj, void *opaque) +{ + MemdevList **list = opaque; + MemdevList *m = NULL; + + if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) { + m = g_malloc0(sizeof(*m)); + + m->value = g_malloc0(sizeof(*m->value)); + + m->value->id = object_get_canonical_path_component(obj); + m->value->has_id = !!m->value->id; + + m->value->size = object_property_get_uint(obj, "size", + &error_abort); + m->value->merge = object_property_get_bool(obj, "merge", + &error_abort); + m->value->dump = object_property_get_bool(obj, "dump", + &error_abort); + m->value->prealloc = object_property_get_bool(obj, + "prealloc", + &error_abort); + m->value->policy = object_property_get_enum(obj, + "policy", + "HostMemPolicy", + &error_abort); + object_property_get_uint16List(obj, "host-nodes", + &m->value->host_nodes, + &error_abort); + + m->next = *list; + *list = m; + } + + return 0; +} + +MemdevList *qmp_query_memdev(Error **errp) +{ + Object *obj = object_get_objects_root(); + MemdevList *list = NULL; + + object_child_foreach(obj, query_memdev, &list); + return list; +} diff --git a/hw/core/machine.c b/hw/core/machine.c index ea5a01aa49..2be19ec0cd 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -11,6 +11,9 @@ */ #include "qemu/osdep.h" +#include "qemu/option.h" +#include "qapi/qmp/qerror.h" +#include "sysemu/replay.h" #include "qemu/units.h" #include "hw/boards.h" #include "qapi/error.h" @@ -30,6 +33,7 @@ GlobalProperty hw_compat_4_0[] = { { "bochs-display", "edid", "false" }, { "virtio-vga", "edid", "false" }, { "virtio-gpu-pci", "edid", "false" }, + { "virtio-device", "use-started", "false" }, }; const size_t hw_compat_4_0_len = G_N_ELEMENTS(hw_compat_4_0); @@ -682,6 +686,11 @@ void machine_set_cpu_numa_node(MachineState *machine, return; } + if (props->has_die_id && !slot->props.has_die_id) { + error_setg(errp, "die-id is not supported"); + return; + } + /* skip slots with explicit mismatch */ if (props->has_thread_id && props->thread_id != slot->props.thread_id) { continue; @@ -691,6 +700,10 @@ void machine_set_cpu_numa_node(MachineState *machine, continue; } + if (props->has_die_id && props->die_id != slot->props.die_id) { + continue; + } + if (props->has_socket_id && props->socket_id != slot->props.socket_id) { continue; } @@ -716,6 +729,78 @@ void machine_set_cpu_numa_node(MachineState *machine, } } +static void smp_parse(MachineState *ms, QemuOpts *opts) +{ + if (opts) { + unsigned cpus = qemu_opt_get_number(opts, "cpus", 0); + unsigned sockets = qemu_opt_get_number(opts, "sockets", 0); + unsigned cores = qemu_opt_get_number(opts, "cores", 0); + unsigned threads = qemu_opt_get_number(opts, "threads", 0); + + /* compute missing values, prefer sockets over cores over threads */ + if (cpus == 0 || sockets == 0) { + cores = cores > 0 ? cores : 1; + threads = threads > 0 ? threads : 1; + if (cpus == 0) { + sockets = sockets > 0 ? sockets : 1; + cpus = cores * threads * sockets; + } else { + ms->smp.max_cpus = + qemu_opt_get_number(opts, "maxcpus", cpus); + sockets = ms->smp.max_cpus / (cores * threads); + } + } else if (cores == 0) { + threads = threads > 0 ? threads : 1; + cores = cpus / (sockets * threads); + cores = cores > 0 ? cores : 1; + } else if (threads == 0) { + threads = cpus / (cores * sockets); + threads = threads > 0 ? threads : 1; + } else if (sockets * cores * threads < cpus) { + error_report("cpu topology: " + "sockets (%u) * cores (%u) * threads (%u) < " + "smp_cpus (%u)", + sockets, cores, threads, cpus); + exit(1); + } + + ms->smp.max_cpus = + qemu_opt_get_number(opts, "maxcpus", cpus); + + if (ms->smp.max_cpus < cpus) { + error_report("maxcpus must be equal to or greater than smp"); + exit(1); + } + + if (sockets * cores * threads > ms->smp.max_cpus) { + error_report("cpu topology: " + "sockets (%u) * cores (%u) * threads (%u) > " + "maxcpus (%u)", + sockets, cores, threads, + ms->smp.max_cpus); + exit(1); + } + + if (sockets * cores * threads != ms->smp.max_cpus) { + warn_report("Invalid CPU topology deprecated: " + "sockets (%u) * cores (%u) * threads (%u) " + "!= maxcpus (%u)", + sockets, cores, threads, + ms->smp.max_cpus); + } + + ms->smp.cpus = cpus; + ms->smp.cores = cores; + ms->smp.threads = threads; + } + + if (ms->smp.cpus > 1) { + Error *blocker = NULL; + error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED, "smp"); + replay_add_blocker(blocker); + } +} + static void machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); @@ -723,6 +808,7 @@ static void machine_class_init(ObjectClass *oc, void *data) /* Default 128 MB as guest ram size */ mc->default_ram_size = 128 * MiB; mc->rom_file_has_mr = true; + mc->smp_parse = smp_parse; /* numa node memory size aligned on 8MB by default. * On Linux, each node's border has to be 8MB aligned @@ -948,6 +1034,9 @@ static char *cpu_slot_to_string(const CPUArchId *cpu) if (cpu->props.has_socket_id) { g_string_append_printf(s, "socket-id: %"PRId64, cpu->props.socket_id); } + if (cpu->props.has_die_id) { + g_string_append_printf(s, "die-id: %"PRId64, cpu->props.die_id); + } if (cpu->props.has_core_id) { if (s->len) { g_string_append_printf(s, ", "); diff --git a/hw/core/numa.c b/hw/core/numa.c new file mode 100644 index 0000000000..a11431483c --- /dev/null +++ b/hw/core/numa.c @@ -0,0 +1,613 @@ +/* + * NUMA parameter parsing routines + * + * Copyright (c) 2014 Fujitsu Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "sysemu/numa.h" +#include "exec/cpu-common.h" +#include "exec/ramlist.h" +#include "qemu/bitmap.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "qapi/opts-visitor.h" +#include "qapi/qapi-visit-machine.h" +#include "sysemu/qtest.h" +#include "hw/mem/pc-dimm.h" +#include "hw/mem/memory-device.h" +#include "qemu/option.h" +#include "qemu/config-file.h" +#include "qemu/cutils.h" + +QemuOptsList qemu_numa_opts = { + .name = "numa", + .implied_opt_name = "type", + .head = QTAILQ_HEAD_INITIALIZER(qemu_numa_opts.head), + .desc = { { 0 } } /* validated with OptsVisitor */ +}; + +static int have_memdevs; +static int have_mem; +static int max_numa_nodeid; /* Highest specified NUMA node ID, plus one. + * For all nodes, nodeid < max_numa_nodeid + */ +int nb_numa_nodes; +bool have_numa_distance; +NodeInfo numa_info[MAX_NODES]; + + +static void parse_numa_node(MachineState *ms, NumaNodeOptions *node, + Error **errp) +{ + Error *err = NULL; + uint16_t nodenr; + uint16List *cpus = NULL; + MachineClass *mc = MACHINE_GET_CLASS(ms); + unsigned int max_cpus = ms->smp.max_cpus; + + if (node->has_nodeid) { + nodenr = node->nodeid; + } else { + nodenr = nb_numa_nodes; + } + + if (nodenr >= MAX_NODES) { + error_setg(errp, "Max number of NUMA nodes reached: %" + PRIu16 "", nodenr); + return; + } + + if (numa_info[nodenr].present) { + error_setg(errp, "Duplicate NUMA nodeid: %" PRIu16, nodenr); + return; + } + + if (!mc->cpu_index_to_instance_props || !mc->get_default_cpu_node_id) { + error_setg(errp, "NUMA is not supported by this machine-type"); + return; + } + for (cpus = node->cpus; cpus; cpus = cpus->next) { + CpuInstanceProperties props; + if (cpus->value >= max_cpus) { + error_setg(errp, + "CPU index (%" PRIu16 ")" + " should be smaller than maxcpus (%d)", + cpus->value, max_cpus); + return; + } + props = mc->cpu_index_to_instance_props(ms, cpus->value); + props.node_id = nodenr; + props.has_node_id = true; + machine_set_cpu_numa_node(ms, &props, &err); + if (err) { + error_propagate(errp, err); + return; + } + } + + have_memdevs = have_memdevs ? : node->has_memdev; + have_mem = have_mem ? : node->has_mem; + if ((node->has_mem && have_memdevs) || (node->has_memdev && have_mem)) { + error_setg(errp, "numa configuration should use either mem= or memdev=," + "mixing both is not allowed"); + return; + } + + if (node->has_mem) { + numa_info[nodenr].node_mem = node->mem; + if (!qtest_enabled()) { + warn_report("Parameter -numa node,mem is deprecated," + " use -numa node,memdev instead"); + } + } + if (node->has_memdev) { + Object *o; + o = object_resolve_path_type(node->memdev, TYPE_MEMORY_BACKEND, NULL); + if (!o) { + error_setg(errp, "memdev=%s is ambiguous", node->memdev); + return; + } + + object_ref(o); + numa_info[nodenr].node_mem = object_property_get_uint(o, "size", NULL); + numa_info[nodenr].node_memdev = MEMORY_BACKEND(o); + } + numa_info[nodenr].present = true; + max_numa_nodeid = MAX(max_numa_nodeid, nodenr + 1); + nb_numa_nodes++; +} + +static void parse_numa_distance(NumaDistOptions *dist, Error **errp) +{ + uint16_t src = dist->src; + uint16_t dst = dist->dst; + uint8_t val = dist->val; + + if (src >= MAX_NODES || dst >= MAX_NODES) { + error_setg(errp, "Parameter '%s' expects an integer between 0 and %d", + src >= MAX_NODES ? "src" : "dst", MAX_NODES - 1); + return; + } + + if (!numa_info[src].present || !numa_info[dst].present) { + error_setg(errp, "Source/Destination NUMA node is missing. " + "Please use '-numa node' option to declare it first."); + return; + } + + if (val < NUMA_DISTANCE_MIN) { + error_setg(errp, "NUMA distance (%" PRIu8 ") is invalid, " + "it shouldn't be less than %d.", + val, NUMA_DISTANCE_MIN); + return; + } + + if (src == dst && val != NUMA_DISTANCE_MIN) { + error_setg(errp, "Local distance of node %d should be %d.", + src, NUMA_DISTANCE_MIN); + return; + } + + numa_info[src].distance[dst] = val; + have_numa_distance = true; +} + +void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp) +{ + Error *err = NULL; + + switch (object->type) { + case NUMA_OPTIONS_TYPE_NODE: + parse_numa_node(ms, &object->u.node, &err); + if (err) { + goto end; + } + break; + case NUMA_OPTIONS_TYPE_DIST: + parse_numa_distance(&object->u.dist, &err); + if (err) { + goto end; + } + break; + case NUMA_OPTIONS_TYPE_CPU: + if (!object->u.cpu.has_node_id) { + error_setg(&err, "Missing mandatory node-id property"); + goto end; + } + if (!numa_info[object->u.cpu.node_id].present) { + error_setg(&err, "Invalid node-id=%" PRId64 ", NUMA node must be " + "defined with -numa node,nodeid=ID before it's used with " + "-numa cpu,node-id=ID", object->u.cpu.node_id); + goto end; + } + + machine_set_cpu_numa_node(ms, qapi_NumaCpuOptions_base(&object->u.cpu), + &err); + break; + default: + abort(); + } + +end: + error_propagate(errp, err); +} + +static int parse_numa(void *opaque, QemuOpts *opts, Error **errp) +{ + NumaOptions *object = NULL; + MachineState *ms = MACHINE(opaque); + Error *err = NULL; + Visitor *v = opts_visitor_new(opts); + + visit_type_NumaOptions(v, NULL, &object, &err); + visit_free(v); + if (err) { + goto end; + } + + /* Fix up legacy suffix-less format */ + if ((object->type == NUMA_OPTIONS_TYPE_NODE) && object->u.node.has_mem) { + const char *mem_str = qemu_opt_get(opts, "mem"); + qemu_strtosz_MiB(mem_str, NULL, &object->u.node.mem); + } + + set_numa_options(ms, object, &err); + +end: + qapi_free_NumaOptions(object); + if (err) { + error_propagate(errp, err); + return -1; + } + + return 0; +} + +/* If all node pair distances are symmetric, then only distances + * in one direction are enough. If there is even one asymmetric + * pair, though, then all distances must be provided. The + * distance from a node to itself is always NUMA_DISTANCE_MIN, + * so providing it is never necessary. + */ +static void validate_numa_distance(void) +{ + int src, dst; + bool is_asymmetrical = false; + + for (src = 0; src < nb_numa_nodes; src++) { + for (dst = src; dst < nb_numa_nodes; dst++) { + if (numa_info[src].distance[dst] == 0 && + numa_info[dst].distance[src] == 0) { + if (src != dst) { + error_report("The distance between node %d and %d is " + "missing, at least one distance value " + "between each nodes should be provided.", + src, dst); + exit(EXIT_FAILURE); + } + } + + if (numa_info[src].distance[dst] != 0 && + numa_info[dst].distance[src] != 0 && + numa_info[src].distance[dst] != + numa_info[dst].distance[src]) { + is_asymmetrical = true; + } + } + } + + if (is_asymmetrical) { + for (src = 0; src < nb_numa_nodes; src++) { + for (dst = 0; dst < nb_numa_nodes; dst++) { + if (src != dst && numa_info[src].distance[dst] == 0) { + error_report("At least one asymmetrical pair of " + "distances is given, please provide distances " + "for both directions of all node pairs."); + exit(EXIT_FAILURE); + } + } + } + } +} + +static void complete_init_numa_distance(void) +{ + int src, dst; + + /* Fixup NUMA distance by symmetric policy because if it is an + * asymmetric distance table, it should be a complete table and + * there would not be any missing distance except local node, which + * is verified by validate_numa_distance above. + */ + for (src = 0; src < nb_numa_nodes; src++) { + for (dst = 0; dst < nb_numa_nodes; dst++) { + if (numa_info[src].distance[dst] == 0) { + if (src == dst) { + numa_info[src].distance[dst] = NUMA_DISTANCE_MIN; + } else { + numa_info[src].distance[dst] = numa_info[dst].distance[src]; + } + } + } + } +} + +void numa_legacy_auto_assign_ram(MachineClass *mc, NodeInfo *nodes, + int nb_nodes, ram_addr_t size) +{ + int i; + uint64_t usedmem = 0; + + /* Align each node according to the alignment + * requirements of the machine class + */ + + for (i = 0; i < nb_nodes - 1; i++) { + nodes[i].node_mem = (size / nb_nodes) & + ~((1 << mc->numa_mem_align_shift) - 1); + usedmem += nodes[i].node_mem; + } + nodes[i].node_mem = size - usedmem; +} + +void numa_default_auto_assign_ram(MachineClass *mc, NodeInfo *nodes, + int nb_nodes, ram_addr_t size) +{ + int i; + uint64_t usedmem = 0, node_mem; + uint64_t granularity = size / nb_nodes; + uint64_t propagate = 0; + + for (i = 0; i < nb_nodes - 1; i++) { + node_mem = (granularity + propagate) & + ~((1 << mc->numa_mem_align_shift) - 1); + propagate = granularity + propagate - node_mem; + nodes[i].node_mem = node_mem; + usedmem += node_mem; + } + nodes[i].node_mem = size - usedmem; +} + +void numa_complete_configuration(MachineState *ms) +{ + int i; + MachineClass *mc = MACHINE_GET_CLASS(ms); + + /* + * If memory hotplug is enabled (slots > 0) but without '-numa' + * options explicitly on CLI, guestes will break. + * + * Windows: won't enable memory hotplug without SRAT table at all + * + * Linux: if QEMU is started with initial memory all below 4Gb + * and no SRAT table present, guest kernel will use nommu DMA ops, + * which breaks 32bit hw drivers when memory is hotplugged and + * guest tries to use it with that drivers. + * + * Enable NUMA implicitly by adding a new NUMA node automatically. + */ + if (ms->ram_slots > 0 && nb_numa_nodes == 0 && + mc->auto_enable_numa_with_memhp) { + NumaNodeOptions node = { }; + parse_numa_node(ms, &node, &error_abort); + } + + assert(max_numa_nodeid <= MAX_NODES); + + /* No support for sparse NUMA node IDs yet: */ + for (i = max_numa_nodeid - 1; i >= 0; i--) { + /* Report large node IDs first, to make mistakes easier to spot */ + if (!numa_info[i].present) { + error_report("numa: Node ID missing: %d", i); + exit(1); + } + } + + /* This must be always true if all nodes are present: */ + assert(nb_numa_nodes == max_numa_nodeid); + + if (nb_numa_nodes > 0) { + uint64_t numa_total; + + if (nb_numa_nodes > MAX_NODES) { + nb_numa_nodes = MAX_NODES; + } + + /* If no memory size is given for any node, assume the default case + * and distribute the available memory equally across all nodes + */ + for (i = 0; i < nb_numa_nodes; i++) { + if (numa_info[i].node_mem != 0) { + break; + } + } + if (i == nb_numa_nodes) { + assert(mc->numa_auto_assign_ram); + mc->numa_auto_assign_ram(mc, numa_info, nb_numa_nodes, ram_size); + if (!qtest_enabled()) { + warn_report("Default splitting of RAM between nodes is deprecated," + " Use '-numa node,memdev' to explictly define RAM" + " allocation per node"); + } + } + + numa_total = 0; + for (i = 0; i < nb_numa_nodes; i++) { + numa_total += numa_info[i].node_mem; + } + if (numa_total != ram_size) { + error_report("total memory for NUMA nodes (0x%" PRIx64 ")" + " should equal RAM size (0x" RAM_ADDR_FMT ")", + numa_total, ram_size); + exit(1); + } + + /* QEMU needs at least all unique node pair distances to build + * the whole NUMA distance table. QEMU treats the distance table + * as symmetric by default, i.e. distance A->B == distance B->A. + * Thus, QEMU is able to complete the distance table + * initialization even though only distance A->B is provided and + * distance B->A is not. QEMU knows the distance of a node to + * itself is always 10, so A->A distances may be omitted. When + * the distances of two nodes of a pair differ, i.e. distance + * A->B != distance B->A, then that means the distance table is + * asymmetric. In this case, the distances for both directions + * of all node pairs are required. + */ + if (have_numa_distance) { + /* Validate enough NUMA distance information was provided. */ + validate_numa_distance(); + + /* Validation succeeded, now fill in any missing distances. */ + complete_init_numa_distance(); + } + } +} + +void parse_numa_opts(MachineState *ms) +{ + qemu_opts_foreach(qemu_find_opts("numa"), parse_numa, ms, &error_fatal); +} + +void numa_cpu_pre_plug(const CPUArchId *slot, DeviceState *dev, Error **errp) +{ + int node_id = object_property_get_int(OBJECT(dev), "node-id", &error_abort); + + if (node_id == CPU_UNSET_NUMA_NODE_ID) { + /* due to bug in libvirt, it doesn't pass node-id from props on + * device_add as expected, so we have to fix it up here */ + if (slot->props.has_node_id) { + object_property_set_int(OBJECT(dev), slot->props.node_id, + "node-id", errp); + } + } else if (node_id != slot->props.node_id) { + error_setg(errp, "invalid node-id, must be %"PRId64, + slot->props.node_id); + } +} + +static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner, + const char *name, + uint64_t ram_size) +{ + if (mem_path) { +#ifdef __linux__ + Error *err = NULL; + memory_region_init_ram_from_file(mr, owner, name, ram_size, 0, 0, + mem_path, &err); + if (err) { + error_report_err(err); + if (mem_prealloc) { + exit(1); + } + warn_report("falling back to regular RAM allocation"); + error_printf("This is deprecated. Make sure that -mem-path " + " specified path has sufficient resources to allocate" + " -m specified RAM amount"); + /* Legacy behavior: if allocation failed, fall back to + * regular RAM allocation. + */ + mem_path = NULL; + memory_region_init_ram_nomigrate(mr, owner, name, ram_size, &error_fatal); + } +#else + fprintf(stderr, "-mem-path not supported on this host\n"); + exit(1); +#endif + } else { + memory_region_init_ram_nomigrate(mr, owner, name, ram_size, &error_fatal); + } + vmstate_register_ram_global(mr); +} + +void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner, + const char *name, + uint64_t ram_size) +{ + uint64_t addr = 0; + int i; + + if (nb_numa_nodes == 0 || !have_memdevs) { + allocate_system_memory_nonnuma(mr, owner, name, ram_size); + return; + } + + memory_region_init(mr, owner, name, ram_size); + for (i = 0; i < nb_numa_nodes; i++) { + uint64_t size = numa_info[i].node_mem; + HostMemoryBackend *backend = numa_info[i].node_memdev; + if (!backend) { + continue; + } + MemoryRegion *seg = host_memory_backend_get_memory(backend); + + if (memory_region_is_mapped(seg)) { + char *path = object_get_canonical_path_component(OBJECT(backend)); + error_report("memory backend %s is used multiple times. Each " + "-numa option must use a different memdev value.", + path); + g_free(path); + exit(1); + } + + host_memory_backend_set_mapped(backend, true); + memory_region_add_subregion(mr, addr, seg); + vmstate_register_ram_global(seg); + addr += size; + } +} + +static void numa_stat_memory_devices(NumaNodeMem node_mem[]) +{ + MemoryDeviceInfoList *info_list = qmp_memory_device_list(); + MemoryDeviceInfoList *info; + PCDIMMDeviceInfo *pcdimm_info; + VirtioPMEMDeviceInfo *vpi; + + for (info = info_list; info; info = info->next) { + MemoryDeviceInfo *value = info->value; + + if (value) { + switch (value->type) { + case MEMORY_DEVICE_INFO_KIND_DIMM: + case MEMORY_DEVICE_INFO_KIND_NVDIMM: + pcdimm_info = value->type == MEMORY_DEVICE_INFO_KIND_DIMM ? + value->u.dimm.data : value->u.nvdimm.data; + node_mem[pcdimm_info->node].node_mem += pcdimm_info->size; + node_mem[pcdimm_info->node].node_plugged_mem += + pcdimm_info->size; + break; + case MEMORY_DEVICE_INFO_KIND_VIRTIO_PMEM: + vpi = value->u.virtio_pmem.data; + /* TODO: once we support numa, assign to right node */ + node_mem[0].node_mem += vpi->size; + node_mem[0].node_plugged_mem += vpi->size; + break; + default: + g_assert_not_reached(); + } + } + } + qapi_free_MemoryDeviceInfoList(info_list); +} + +void query_numa_node_mem(NumaNodeMem node_mem[]) +{ + int i; + + if (nb_numa_nodes <= 0) { + return; + } + + numa_stat_memory_devices(node_mem); + for (i = 0; i < nb_numa_nodes; i++) { + node_mem[i].node_mem += numa_info[i].node_mem; + } +} + +void ram_block_notifier_add(RAMBlockNotifier *n) +{ + QLIST_INSERT_HEAD(&ram_list.ramblock_notifiers, n, next); +} + +void ram_block_notifier_remove(RAMBlockNotifier *n) +{ + QLIST_REMOVE(n, next); +} + +void ram_block_notify_add(void *host, size_t size) +{ + RAMBlockNotifier *notifier; + + QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) { + notifier->ram_block_added(notifier, host, size); + } +} + +void ram_block_notify_remove(void *host, size_t size) +{ + RAMBlockNotifier *notifier; + + QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) { + notifier->ram_block_removed(notifier, host, size); + } +} diff --git a/hw/core/qdev.c b/hw/core/qdev.c index f9b6efe509..94ebc0a4a1 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -29,7 +29,7 @@ #include "hw/qdev.h" #include "sysemu/sysemu.h" #include "qapi/error.h" -#include "qapi/qapi-events-misc.h" +#include "qapi/qapi-events-qdev.h" #include "qapi/qmp/qerror.h" #include "qapi/visitor.h" #include "qemu/error-report.h" diff --git a/hw/core/vm-change-state-handler.c b/hw/core/vm-change-state-handler.c new file mode 100644 index 0000000000..f814813bdd --- /dev/null +++ b/hw/core/vm-change-state-handler.c @@ -0,0 +1,61 @@ +/* + * qdev vm change state handlers + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "hw/qdev.h" + +static int qdev_get_dev_tree_depth(DeviceState *dev) +{ + int depth; + + for (depth = 0; dev; depth++) { + BusState *bus = dev->parent_bus; + + if (!bus) { + break; + } + + dev = bus->parent; + } + + return depth; +} + +/** + * qdev_add_vm_change_state_handler: + * @dev: the device that owns this handler + * @cb: the callback function to be invoked + * @opaque: user data passed to the callback function + * + * This function works like qemu_add_vm_change_state_handler() except callbacks + * are invoked in qdev tree depth order. Ordering is desirable when callbacks + * of children depend on their parent's callback having completed first. + * + * For example, when qdev_add_vm_change_state_handler() is used, a host + * controller's callback is invoked before the children on its bus when the VM + * starts running. The order is reversed when the VM stops running. + * + * Returns: an entry to be freed with qemu_del_vm_change_state_handler() + */ +VMChangeStateEntry *qdev_add_vm_change_state_handler(DeviceState *dev, + VMChangeStateHandler *cb, + void *opaque) +{ + int depth = qdev_get_dev_tree_depth(dev); + + return qemu_add_vm_change_state_handler_prio(cb, opaque, depth); +} diff --git a/hw/cpu/core.c b/hw/cpu/core.c index e57c73f0ce..9874c5c870 100644 --- a/hw/cpu/core.c +++ b/hw/cpu/core.c @@ -13,6 +13,7 @@ #include "qemu/module.h" #include "qapi/error.h" #include "sysemu/cpus.h" +#include "hw/boards.h" static void core_prop_get_core_id(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) @@ -71,13 +72,14 @@ static void core_prop_set_nr_threads(Object *obj, Visitor *v, const char *name, static void cpu_core_instance_init(Object *obj) { + MachineState *ms = MACHINE(qdev_get_machine()); CPUCore *core = CPU_CORE(obj); object_property_add(obj, "core-id", "int", core_prop_get_core_id, core_prop_set_core_id, NULL, NULL, NULL); object_property_add(obj, "nr-threads", "int", core_prop_get_nr_threads, core_prop_set_nr_threads, NULL, NULL, NULL); - core->nr_threads = smp_threads; + core->nr_threads = ms->smp.threads; } static void cpu_core_class_init(ObjectClass *oc, void *data) diff --git a/hw/display/ati.c b/hw/display/ati.c index 0cb1173848..a747c4cc98 100644 --- a/hw/display/ati.c +++ b/hw/display/ati.c @@ -89,6 +89,7 @@ static void ati_vga_switch_mode(ATIVGAState *s) DPRINTF("Switching to %dx%d %d %d @ %x\n", h, v, stride, bpp, offs); vbe_ioport_write_index(&s->vga, 0, VBE_DISPI_INDEX_ENABLE); vbe_ioport_write_data(&s->vga, 0, VBE_DISPI_DISABLED); + s->vga.big_endian_fb = false; /* reset VBE regs then set up mode */ s->vga.vbe_regs[VBE_DISPI_INDEX_XRES] = h; s->vga.vbe_regs[VBE_DISPI_INDEX_YRES] = v; @@ -538,7 +539,7 @@ static void ati_mm_write(void *opaque, hwaddr addr, break; case GPIO_DVI_DDC: if (s->dev_id != PCI_DEVICE_ID_ATI_RAGE128_PF) { - s->regs.gpio_dvi_ddc = ati_i2c(s->bbi2c, data, 0); + s->regs.gpio_dvi_ddc = ati_i2c(&s->bbi2c, data, 0); } break; case GPIO_MONID ... GPIO_MONID + 3: @@ -554,7 +555,7 @@ static void ati_mm_write(void *opaque, hwaddr addr, */ if ((s->regs.gpio_monid & BIT(25)) && addr <= GPIO_MONID + 2 && addr + size > GPIO_MONID + 2) { - s->regs.gpio_monid = ati_i2c(s->bbi2c, s->regs.gpio_monid, 1); + s->regs.gpio_monid = ati_i2c(&s->bbi2c, s->regs.gpio_monid, 1); } } break; @@ -688,7 +689,7 @@ static void ati_mm_write(void *opaque, hwaddr addr, break; case SRC_PITCH_OFFSET: if (s->dev_id == PCI_DEVICE_ID_ATI_RAGE128_PF) { - s->regs.src_offset = (data & 0x1fffff) << 4; + s->regs.src_offset = (data & 0x1fffff) << 5; s->regs.src_pitch = (data & 0x7fe00000) >> 21; s->regs.src_tile = data >> 31; } else { @@ -699,7 +700,7 @@ static void ati_mm_write(void *opaque, hwaddr addr, break; case DST_PITCH_OFFSET: if (s->dev_id == PCI_DEVICE_ID_ATI_RAGE128_PF) { - s->regs.dst_offset = (data & 0x1fffff) << 4; + s->regs.dst_offset = (data & 0x1fffff) << 5; s->regs.dst_pitch = (data & 0x7fe00000) >> 21; s->regs.dst_tile = data >> 31; } else { @@ -856,7 +857,7 @@ static void ati_vga_realize(PCIDevice *dev, Error **errp) /* ddc, edid */ I2CBus *i2cbus = i2c_init_bus(DEVICE(s), "ati-vga.ddc"); - s->bbi2c = bitbang_i2c_init(i2cbus); + bitbang_i2c_init(&s->bbi2c, i2cbus); I2CSlave *i2cddc = I2C_SLAVE(qdev_create(BUS(i2cbus), TYPE_I2CDDC)); i2c_set_slave_address(i2cddc, 0x50); @@ -885,7 +886,6 @@ static void ati_vga_exit(PCIDevice *dev) ATIVGAState *s = ATI_VGA(dev); graphic_console_close(s->vga.con); - g_free(s->bbi2c); } static Property ati_vga_properties[] = { diff --git a/hw/display/ati_2d.c b/hw/display/ati_2d.c index 2dbf53f039..42e82311eb 100644 --- a/hw/display/ati_2d.c +++ b/hw/display/ati_2d.c @@ -42,6 +42,8 @@ static int ati_bpp_from_datatype(ATIVGAState *s) } } +#define DEFAULT_CNTL (s->regs.dp_gui_master_cntl & GMC_DST_PITCH_OFFSET_CNTL) + void ati_2d_blt(ATIVGAState *s) { /* FIXME it is probably more complex than this and may need to be */ @@ -51,60 +53,88 @@ void ati_2d_blt(ATIVGAState *s) s->vga.vbe_start_addr, surface_data(ds), surface_stride(ds), surface_bits_per_pixel(ds), (s->regs.dp_mix & GMC_ROP3_MASK) >> 16); - DPRINTF("%d %d %d, %d %d %d, (%d,%d) -> (%d,%d) %dx%d\n", + int dst_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ? + s->regs.dst_x : s->regs.dst_x + 1 - s->regs.dst_width); + int dst_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ? + s->regs.dst_y : s->regs.dst_y + 1 - s->regs.dst_height); + int bpp = ati_bpp_from_datatype(s); + int dst_stride = DEFAULT_CNTL ? s->regs.dst_pitch : s->regs.default_pitch; + uint8_t *dst_bits = s->vga.vram_ptr + (DEFAULT_CNTL ? + s->regs.dst_offset : s->regs.default_offset); + + if (s->dev_id == PCI_DEVICE_ID_ATI_RAGE128_PF) { + dst_bits += s->regs.crtc_offset & 0x07ffffff; + dst_stride *= bpp; + } + uint8_t *end = s->vga.vram_ptr + s->vga.vram_size; + if (dst_bits >= end || dst_bits + dst_x + (dst_y + s->regs.dst_height) * + dst_stride >= end) { + qemu_log_mask(LOG_UNIMP, "blt outside vram not implemented\n"); + return; + } + DPRINTF("%d %d %d, %d %d %d, (%d,%d) -> (%d,%d) %dx%d %c %c\n", s->regs.src_offset, s->regs.dst_offset, s->regs.default_offset, s->regs.src_pitch, s->regs.dst_pitch, s->regs.default_pitch, s->regs.src_x, s->regs.src_y, s->regs.dst_x, s->regs.dst_y, - s->regs.dst_width, s->regs.dst_height); + s->regs.dst_width, s->regs.dst_height, + (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ? '>' : '<'), + (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ? 'v' : '^')); switch (s->regs.dp_mix & GMC_ROP3_MASK) { case ROP3_SRCCOPY: { - uint8_t *src_bits, *dst_bits, *end; - int src_stride, dst_stride, bpp = ati_bpp_from_datatype(s); - src_bits = s->vga.vram_ptr + - (s->regs.dp_gui_master_cntl & GMC_SRC_PITCH_OFFSET_CNTL ? - s->regs.src_offset : s->regs.default_offset); - dst_bits = s->vga.vram_ptr + - (s->regs.dp_gui_master_cntl & GMC_DST_PITCH_OFFSET_CNTL ? - s->regs.dst_offset : s->regs.default_offset); - src_stride = (s->regs.dp_gui_master_cntl & GMC_SRC_PITCH_OFFSET_CNTL ? - s->regs.src_pitch : s->regs.default_pitch); - dst_stride = (s->regs.dp_gui_master_cntl & GMC_DST_PITCH_OFFSET_CNTL ? - s->regs.dst_pitch : s->regs.default_pitch); + int src_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ? + s->regs.src_x : s->regs.src_x + 1 - s->regs.dst_width); + int src_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ? + s->regs.src_y : s->regs.src_y + 1 - s->regs.dst_height); + int src_stride = DEFAULT_CNTL ? + s->regs.src_pitch : s->regs.default_pitch; + uint8_t *src_bits = s->vga.vram_ptr + (DEFAULT_CNTL ? + s->regs.src_offset : s->regs.default_offset); if (s->dev_id == PCI_DEVICE_ID_ATI_RAGE128_PF) { src_bits += s->regs.crtc_offset & 0x07ffffff; - dst_bits += s->regs.crtc_offset & 0x07ffffff; src_stride *= bpp; - dst_stride *= bpp; } + if (src_bits >= end || src_bits + src_x + + (src_y + s->regs.dst_height) * src_stride >= end) { + qemu_log_mask(LOG_UNIMP, "blt outside vram not implemented\n"); + return; + } + src_stride /= sizeof(uint32_t); dst_stride /= sizeof(uint32_t); - DPRINTF("pixman_blt(%p, %p, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d)\n", src_bits, dst_bits, src_stride, dst_stride, bpp, bpp, - s->regs.src_x, s->regs.src_y, s->regs.dst_x, s->regs.dst_y, + src_x, src_y, dst_x, dst_y, s->regs.dst_width, s->regs.dst_height); - end = s->vga.vram_ptr + s->vga.vram_size; - if (src_bits >= end || dst_bits >= end || - src_bits + s->regs.src_x + (s->regs.src_y + s->regs.dst_height) * - src_stride * sizeof(uint32_t) >= end || - dst_bits + s->regs.dst_x + (s->regs.dst_y + s->regs.dst_height) * - dst_stride * sizeof(uint32_t) >= end) { - qemu_log_mask(LOG_UNIMP, "blt outside vram not implemented\n"); - return; + if (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT && + s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM) { + pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits, + src_stride, dst_stride, bpp, bpp, + src_x, src_y, dst_x, dst_y, + s->regs.dst_width, s->regs.dst_height); + } else { + /* FIXME: We only really need a temporary if src and dst overlap */ + int llb = s->regs.dst_width * (bpp / 8); + int tmp_stride = DIV_ROUND_UP(llb, sizeof(uint32_t)); + uint32_t *tmp = g_malloc(tmp_stride * sizeof(uint32_t) * + s->regs.dst_height); + pixman_blt((uint32_t *)src_bits, tmp, + src_stride, tmp_stride, bpp, bpp, + src_x, src_y, 0, 0, + s->regs.dst_width, s->regs.dst_height); + pixman_blt(tmp, (uint32_t *)dst_bits, + tmp_stride, dst_stride, bpp, bpp, + 0, 0, dst_x, dst_y, + s->regs.dst_width, s->regs.dst_height); + g_free(tmp); } - pixman_blt((uint32_t *)src_bits, (uint32_t *)dst_bits, - src_stride, dst_stride, bpp, bpp, - s->regs.src_x, s->regs.src_y, - s->regs.dst_x, s->regs.dst_y, - s->regs.dst_width, s->regs.dst_height); if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr && dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr + s->vga.vbe_regs[VBE_DISPI_INDEX_YRES] * s->vga.vbe_line_offset) { memory_region_set_dirty(&s->vga.vram, s->vga.vbe_start_addr + s->regs.dst_offset + - s->regs.dst_y * surface_stride(ds), + dst_y * surface_stride(ds), s->regs.dst_height * surface_stride(ds)); } s->regs.dst_x += s->regs.dst_width; @@ -115,57 +145,38 @@ void ati_2d_blt(ATIVGAState *s) case ROP3_BLACKNESS: case ROP3_WHITENESS: { - uint8_t *dst_bits, *end; - int dst_stride, bpp = ati_bpp_from_datatype(s); uint32_t filler = 0; - dst_bits = s->vga.vram_ptr + - (s->regs.dp_gui_master_cntl & GMC_DST_PITCH_OFFSET_CNTL ? - s->regs.dst_offset : s->regs.default_offset); - dst_stride = (s->regs.dp_gui_master_cntl & GMC_DST_PITCH_OFFSET_CNTL ? - s->regs.dst_pitch : s->regs.default_pitch); - - if (s->dev_id == PCI_DEVICE_ID_ATI_RAGE128_PF) { - dst_bits += s->regs.crtc_offset & 0x07ffffff; - dst_stride *= bpp; - } - dst_stride /= sizeof(uint32_t); switch (s->regs.dp_mix & GMC_ROP3_MASK) { case ROP3_PATCOPY: - filler = bswap32(s->regs.dp_brush_frgd_clr); + filler = s->regs.dp_brush_frgd_clr; break; case ROP3_BLACKNESS: - filler = rgb_to_pixel32(s->vga.palette[0], s->vga.palette[1], - s->vga.palette[2]) << 8 | 0xff; + filler = 0xffUL << 24 | rgb_to_pixel32(s->vga.palette[0], + s->vga.palette[1], s->vga.palette[2]); break; case ROP3_WHITENESS: - filler = rgb_to_pixel32(s->vga.palette[3], s->vga.palette[4], - s->vga.palette[5]) << 8 | 0xff; + filler = 0xffUL << 24 | rgb_to_pixel32(s->vga.palette[3], + s->vga.palette[4], s->vga.palette[5]); break; } + dst_stride /= sizeof(uint32_t); DPRINTF("pixman_fill(%p, %d, %d, %d, %d, %d, %d, %x)\n", dst_bits, dst_stride, bpp, s->regs.dst_x, s->regs.dst_y, s->regs.dst_width, s->regs.dst_height, filler); - end = s->vga.vram_ptr + s->vga.vram_size; - if (dst_bits >= end || - dst_bits + s->regs.dst_x + (s->regs.dst_y + s->regs.dst_height) * - dst_stride * sizeof(uint32_t) >= end) { - qemu_log_mask(LOG_UNIMP, "blt outside vram not implemented\n"); - return; - } pixman_fill((uint32_t *)dst_bits, dst_stride, bpp, - s->regs.dst_x, s->regs.dst_y, - s->regs.dst_width, s->regs.dst_height, - filler); + s->regs.dst_x, s->regs.dst_y, + s->regs.dst_width, s->regs.dst_height, + filler); if (dst_bits >= s->vga.vram_ptr + s->vga.vbe_start_addr && dst_bits < s->vga.vram_ptr + s->vga.vbe_start_addr + s->vga.vbe_regs[VBE_DISPI_INDEX_YRES] * s->vga.vbe_line_offset) { memory_region_set_dirty(&s->vga.vram, s->vga.vbe_start_addr + s->regs.dst_offset + - s->regs.dst_y * surface_stride(ds), + dst_y * surface_stride(ds), s->regs.dst_height * surface_stride(ds)); } s->regs.dst_y += s->regs.dst_height; diff --git a/hw/display/ati_int.h b/hw/display/ati_int.h index 9b67d0022a..31a1927b3e 100644 --- a/hw/display/ati_int.h +++ b/hw/display/ati_int.h @@ -88,7 +88,7 @@ typedef struct ATIVGAState { uint16_t cursor_size; uint32_t cursor_offset; QEMUCursor *cursor; - bitbang_i2c_interface *bbi2c; + bitbang_i2c_interface bbi2c; MemoryRegion io; MemoryRegion mm; ATIVGARegs regs; diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index 2b0f66b1d6..25d9e327fc 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -1002,6 +1002,11 @@ static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size, resource_id = qemu_get_be32(f); while (resource_id != 0) { + res = virtio_gpu_find_resource(g, resource_id); + if (res) { + return -EINVAL; + } + res = g_new0(struct virtio_gpu_simple_resource, 1); res->resource_id = resource_id; res->width = qemu_get_be32(f); @@ -1048,9 +1053,9 @@ static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size, if (res->iov[i].iov_base) { dma_memory_unmap(VIRTIO_DEVICE(g)->dma_as, res->iov[i].iov_base, - res->iov[i].iov_len, + len, DMA_DIRECTION_TO_DEVICE, - res->iov[i].iov_len); + 0); } /* ...and the mappings for previous loop iterations */ res->iov_cnt = i; diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c index d1b1d3caa4..662838d83b 100644 --- a/hw/hppa/machine.c +++ b/hw/hppa/machine.c @@ -72,6 +72,7 @@ static void machine_hppa_init(MachineState *machine) MemoryRegion *ram_region; MemoryRegion *cpu_region; long i; + unsigned int smp_cpus = machine->smp.cpus; ram_size = machine->ram_size; @@ -240,8 +241,9 @@ static void machine_hppa_init(MachineState *machine) cpu[0]->env.gr[21] = smp_cpus; } -static void hppa_machine_reset(void) +static void hppa_machine_reset(MachineState *ms) { + unsigned int smp_cpus = ms->smp.cpus; int i; qemu_devices_reset(); diff --git a/hw/i2c/bitbang_i2c.c b/hw/i2c/bitbang_i2c.c index 3cb0509b02..60c7a9be0b 100644 --- a/hw/i2c/bitbang_i2c.c +++ b/hw/i2c/bitbang_i2c.c @@ -25,39 +25,6 @@ do { printf("bitbang_i2c: " fmt , ## __VA_ARGS__); } while (0) #define DPRINTF(fmt, ...) do {} while(0) #endif -typedef enum bitbang_i2c_state { - STOPPED = 0, - SENDING_BIT7, - SENDING_BIT6, - SENDING_BIT5, - SENDING_BIT4, - SENDING_BIT3, - SENDING_BIT2, - SENDING_BIT1, - SENDING_BIT0, - WAITING_FOR_ACK, - RECEIVING_BIT7, - RECEIVING_BIT6, - RECEIVING_BIT5, - RECEIVING_BIT4, - RECEIVING_BIT3, - RECEIVING_BIT2, - RECEIVING_BIT1, - RECEIVING_BIT0, - SENDING_ACK, - SENT_NACK -} bitbang_i2c_state; - -struct bitbang_i2c_interface { - I2CBus *bus; - bitbang_i2c_state state; - int last_data; - int last_clock; - int device_out; - uint8_t buffer; - int current_addr; -}; - static void bitbang_i2c_enter_stop(bitbang_i2c_interface *i2c) { DPRINTF("STOP\n"); @@ -184,18 +151,12 @@ int bitbang_i2c_set(bitbang_i2c_interface *i2c, int line, int level) abort(); } -bitbang_i2c_interface *bitbang_i2c_init(I2CBus *bus) +void bitbang_i2c_init(bitbang_i2c_interface *s, I2CBus *bus) { - bitbang_i2c_interface *s; - - s = g_malloc0(sizeof(bitbang_i2c_interface)); - s->bus = bus; s->last_data = 1; s->last_clock = 1; s->device_out = 1; - - return s; } /* GPIO interface. */ @@ -207,7 +168,7 @@ typedef struct GPIOI2CState { SysBusDevice parent_obj; MemoryRegion dummy_iomem; - bitbang_i2c_interface *bitbang; + bitbang_i2c_interface bitbang; int last_level; qemu_irq out; } GPIOI2CState; @@ -216,7 +177,7 @@ static void bitbang_i2c_gpio_set(void *opaque, int irq, int level) { GPIOI2CState *s = opaque; - level = bitbang_i2c_set(s->bitbang, irq, level); + level = bitbang_i2c_set(&s->bitbang, irq, level); if (level != s->last_level) { s->last_level = level; qemu_set_irq(s->out, level); @@ -234,7 +195,7 @@ static void gpio_i2c_init(Object *obj) sysbus_init_mmio(sbd, &s->dummy_iomem); bus = i2c_init_bus(dev, "i2c"); - s->bitbang = bitbang_i2c_init(bus); + bitbang_i2c_init(&s->bitbang, bus); qdev_init_gpio_in(dev, bitbang_i2c_gpio_set, 2); qdev_init_gpio_out(dev, &s->out, 1); diff --git a/hw/i2c/ppc4xx_i2c.c b/hw/i2c/ppc4xx_i2c.c index 5fb4f86c38..462729db4e 100644 --- a/hw/i2c/ppc4xx_i2c.c +++ b/hw/i2c/ppc4xx_i2c.c @@ -311,9 +311,9 @@ static void ppc4xx_i2c_writeb(void *opaque, hwaddr addr, uint64_t value, case IIC_DIRECTCNTL: i2c->directcntl = value & (IIC_DIRECTCNTL_SDAC & IIC_DIRECTCNTL_SCLC); i2c->directcntl |= (value & IIC_DIRECTCNTL_SCLC ? 1 : 0); - bitbang_i2c_set(i2c->bitbang, BITBANG_I2C_SCL, + bitbang_i2c_set(&i2c->bitbang, BITBANG_I2C_SCL, i2c->directcntl & IIC_DIRECTCNTL_MSCL); - i2c->directcntl |= bitbang_i2c_set(i2c->bitbang, BITBANG_I2C_SDA, + i2c->directcntl |= bitbang_i2c_set(&i2c->bitbang, BITBANG_I2C_SDA, (value & IIC_DIRECTCNTL_SDAC) != 0) << 1; break; default: @@ -347,7 +347,7 @@ static void ppc4xx_i2c_init(Object *o) sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem); sysbus_init_irq(SYS_BUS_DEVICE(s), &s->irq); s->bus = i2c_init_bus(DEVICE(s), "i2c"); - s->bitbang = bitbang_i2c_init(s->bus); + bitbang_i2c_init(&s->bitbang, s->bus); } static void ppc4xx_i2c_class_init(ObjectClass *klass, void *data) diff --git a/hw/i2c/versatile_i2c.c b/hw/i2c/versatile_i2c.c index 24b6e36b6d..1ac2a6f59a 100644 --- a/hw/i2c/versatile_i2c.c +++ b/hw/i2c/versatile_i2c.c @@ -35,7 +35,7 @@ typedef struct VersatileI2CState { SysBusDevice parent_obj; MemoryRegion iomem; - bitbang_i2c_interface *bitbang; + bitbang_i2c_interface bitbang; int out; int in; } VersatileI2CState; @@ -70,8 +70,8 @@ static void versatile_i2c_write(void *opaque, hwaddr offset, qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%x\n", __func__, (int)offset); } - bitbang_i2c_set(s->bitbang, BITBANG_I2C_SCL, (s->out & 1) != 0); - s->in = bitbang_i2c_set(s->bitbang, BITBANG_I2C_SDA, (s->out & 2) != 0); + bitbang_i2c_set(&s->bitbang, BITBANG_I2C_SCL, (s->out & 1) != 0); + s->in = bitbang_i2c_set(&s->bitbang, BITBANG_I2C_SDA, (s->out & 2) != 0); } static const MemoryRegionOps versatile_i2c_ops = { @@ -88,7 +88,7 @@ static void versatile_i2c_init(Object *obj) I2CBus *bus; bus = i2c_init_bus(dev, "i2c"); - s->bitbang = bitbang_i2c_init(bus); + bitbang_i2c_init(&s->bitbang, bus); memory_region_init_io(&s->iomem, obj, &versatile_i2c_ops, s, "versatile_i2c", 0x1000); sysbus_init_mmio(sbd, &s->iomem); diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig index 9817888216..4ddf2a9c55 100644 --- a/hw/i386/Kconfig +++ b/hw/i386/Kconfig @@ -30,6 +30,7 @@ config PC # For ACPI builder: select SERIAL_ISA select ACPI_VMGENID + select VIRTIO_PMEM_SUPPORTED config PC_PCI bool diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 31a1c1e3ad..d281ffa89e 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -43,6 +43,7 @@ #include "sysemu/tpm.h" #include "hw/acpi/tpm.h" #include "hw/acpi/vmgenid.h" +#include "hw/boards.h" #include "sysemu/tpm_backend.h" #include "hw/timer/mc146818rtc_regs.h" #include "hw/mem/memory-device.h" @@ -123,7 +124,8 @@ typedef struct FwCfgTPMConfig { static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg); -static void init_common_fadt_data(Object *o, AcpiFadtData *data) +static void init_common_fadt_data(MachineState *ms, Object *o, + AcpiFadtData *data) { uint32_t io = object_property_get_uint(o, ACPI_PM_PROP_PM_IO_BASE, NULL); AmlAddressSpace as = AML_AS_SYSTEM_IO; @@ -139,7 +141,8 @@ static void init_common_fadt_data(Object *o, AcpiFadtData *data) * CPUs for more than 8 CPUs, "Clustered Logical" mode has to be * used */ - ((max_cpus > 8) ? (1 << ACPI_FADT_F_FORCE_APIC_CLUSTER_MODEL) : 0), + ((ms->smp.max_cpus > 8) ? + (1 << ACPI_FADT_F_FORCE_APIC_CLUSTER_MODEL) : 0), .int_model = 1 /* Multiple APIC */, .rtc_century = RTC_CENTURY, .plvl2_lat = 0xfff /* C2 state not supported */, @@ -173,7 +176,7 @@ static Object *object_resolve_type_unambiguous(const char *typename) return o; } -static void acpi_get_pm_info(AcpiPmInfo *pm) +static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) { Object *piix = object_resolve_type_unambiguous(TYPE_PIIX4_PM); Object *lpc = object_resolve_type_unambiguous(TYPE_ICH9_LPC_DEVICE); @@ -184,7 +187,7 @@ static void acpi_get_pm_info(AcpiPmInfo *pm) pm->pcihp_io_len = 0; assert(obj); - init_common_fadt_data(obj, &pm->fadt); + init_common_fadt_data(machine, obj, &pm->fadt); if (piix) { /* w2k requires FADT(rev1) or it won't boot, keep PC compatible */ pm->fadt.rev = 1; @@ -2612,7 +2615,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) AcpiSlicOem slic_oem = { .id = NULL, .table_id = NULL }; Object *vmgenid_dev; - acpi_get_pm_info(&pm); + acpi_get_pm_info(machine, &pm); acpi_get_misc_info(&misc); acpi_get_pci_holes(&pci_hole, &pci_hole64); acpi_get_slic_oem(&slic_oem); diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 44b1231157..de86f53b4e 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -3363,11 +3363,28 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) return vtd_dev_as; } +static uint64_t get_naturally_aligned_size(uint64_t start, + uint64_t size, int gaw) +{ + uint64_t max_mask = 1ULL << gaw; + uint64_t alignment = start ? start & -start : max_mask; + + alignment = MIN(alignment, max_mask); + size = MIN(size, max_mask); + + if (alignment <= size) { + /* Increase the alignment of start */ + return alignment; + } else { + /* Find the largest page mask from size */ + return 1ULL << (63 - clz64(size)); + } +} + /* Unmap the whole range in the notifier's scope. */ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) { - IOMMUTLBEntry entry; - hwaddr size; + hwaddr size, remain; hwaddr start = n->start; hwaddr end = n->end; IntelIOMMUState *s = as->iommu_state; @@ -3379,48 +3396,46 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) * VT-d spec), otherwise we need to consider overflow of 64 bits. */ - if (end > VTD_ADDRESS_SIZE(s->aw_bits)) { + if (end > VTD_ADDRESS_SIZE(s->aw_bits) - 1) { /* * Don't need to unmap regions that is bigger than the whole * VT-d supported address space size */ - end = VTD_ADDRESS_SIZE(s->aw_bits); + end = VTD_ADDRESS_SIZE(s->aw_bits) - 1; } assert(start <= end); - size = end - start; + size = remain = end - start + 1; - if (ctpop64(size) != 1) { - /* - * This size cannot format a correct mask. Let's enlarge it to - * suite the minimum available mask. - */ - int n = 64 - clz64(size); - if (n > s->aw_bits) { - /* should not happen, but in case it happens, limit it */ - n = s->aw_bits; - } - size = 1ULL << n; + while (remain >= VTD_PAGE_SIZE) { + IOMMUTLBEntry entry; + uint64_t mask = get_naturally_aligned_size(start, remain, s->aw_bits); + + assert(mask); + + entry.iova = start; + entry.addr_mask = mask - 1; + entry.target_as = &address_space_memory; + entry.perm = IOMMU_NONE; + /* This field is meaningless for unmap */ + entry.translated_addr = 0; + + memory_region_notify_one(n, &entry); + + start += mask; + remain -= mask; } - entry.target_as = &address_space_memory; - /* Adjust iova for the size */ - entry.iova = n->start & ~(size - 1); - /* This field is meaningless for unmap */ - entry.translated_addr = 0; - entry.perm = IOMMU_NONE; - entry.addr_mask = size - 1; + assert(!remain); trace_vtd_as_unmap_whole(pci_bus_num(as->bus), VTD_PCI_SLOT(as->devfn), VTD_PCI_FUNC(as->devfn), - entry.iova, size); + n->start, size); - map.iova = entry.iova; - map.size = entry.addr_mask; + map.iova = n->start; + map.size = size; iova_tree_remove(as->iova_tree, &map); - - memory_region_notify_one(n, &entry); } static void vtd_address_space_unmap_all(IntelIOMMUState *s) diff --git a/hw/i386/kvmvapic.c b/hw/i386/kvmvapic.c index ca8df462b6..9c2ab4aac5 100644 --- a/hw/i386/kvmvapic.c +++ b/hw/i386/kvmvapic.c @@ -18,6 +18,7 @@ #include "sysemu/kvm.h" #include "hw/i386/apic_internal.h" #include "hw/sysbus.h" +#include "hw/boards.h" #include "tcg/tcg.h" #define VAPIC_IO_PORT 0x7e @@ -442,11 +443,12 @@ static void do_patch_instruction(CPUState *cs, run_on_cpu_data data) static void patch_instruction(VAPICROMState *s, X86CPU *cpu, target_ulong ip) { + MachineState *ms = MACHINE(qdev_get_machine()); CPUState *cs = CPU(cpu); VAPICHandlers *handlers; PatchInfo *info; - if (smp_cpus == 1) { + if (ms->smp.cpus == 1) { handlers = &s->rom_state.up; } else { handlers = &s->rom_state.mp; @@ -747,6 +749,7 @@ static void do_vapic_enable(CPUState *cs, run_on_cpu_data data) static void kvmvapic_vm_state_change(void *opaque, int running, RunState state) { + MachineState *ms = MACHINE(qdev_get_machine()); VAPICROMState *s = opaque; uint8_t *zero; @@ -755,7 +758,7 @@ static void kvmvapic_vm_state_change(void *opaque, int running, } if (s->state == VAPIC_ACTIVE) { - if (smp_cpus == 1) { + if (ms->smp.cpus == 1) { run_on_cpu(first_cpu, do_vapic_enable, RUN_ON_CPU_HOST_PTR(s)); } else { zero = g_malloc0(s->rom_state.vapic_size); diff --git a/hw/i386/pc.c b/hw/i386/pc.c index e96360b47a..c33ce47578 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -79,6 +79,10 @@ #include "hw/i386/intel_iommu.h" #include "hw/net/ne2000-isa.h" #include "standard-headers/asm-x86/bootparam.h" +#include "hw/virtio/virtio-pmem-pci.h" +#include "hw/mem/memory-device.h" +#include "sysemu/replay.h" +#include "qapi/qmp/qerror.h" /* debug PC/ISA interrupts */ //#define DEBUG_IRQ @@ -913,14 +917,6 @@ bool e820_get_entry(int idx, uint32_t type, uint64_t *address, uint64_t *length) return false; } -/* Enables contiguous-apic-ID mode, for compatibility */ -static bool compat_apic_id_mode; - -void enable_compat_apic_id_mode(void) -{ - compat_apic_id_mode = true; -} - /* Calculates initial APIC ID for a specific CPU index * * Currently we need to be able to calculate the APIC ID from the CPU index @@ -928,13 +924,17 @@ void enable_compat_apic_id_mode(void) * no concept of "CPU index", and the NUMA tables on fw_cfg need the APIC ID of * all CPUs up to max_cpus. */ -static uint32_t x86_cpu_apic_id_from_index(unsigned int cpu_index) +static uint32_t x86_cpu_apic_id_from_index(PCMachineState *pcms, + unsigned int cpu_index) { + MachineState *ms = MACHINE(pcms); + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); uint32_t correct_id; static bool warned; - correct_id = x86_apicid_from_cpu_idx(smp_cores, smp_threads, cpu_index); - if (compat_apic_id_mode) { + correct_id = x86_apicid_from_cpu_idx(pcms->smp_dies, ms->smp.cores, + ms->smp.threads, cpu_index); + if (pcmc->compat_apic_id_mode) { if (cpu_index != correct_id && !warned && !qtest_enabled()) { error_report("APIC IDs set in compatibility mode, " "CPU topology won't match the configuration"); @@ -958,7 +958,7 @@ static void pc_build_smbios(PCMachineState *pcms) /* tell smbios about cpuid version and features */ smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]); - smbios_tables = smbios_get_table_legacy(&smbios_tables_len); + smbios_tables = smbios_get_table_legacy(ms, &smbios_tables_len); if (smbios_tables) { fw_cfg_add_bytes(pcms->fw_cfg, FW_CFG_SMBIOS_ENTRIES, smbios_tables, smbios_tables_len); @@ -975,7 +975,7 @@ static void pc_build_smbios(PCMachineState *pcms) array_count++; } } - smbios_get_tables(mem_array, array_count, + smbios_get_tables(ms, mem_array, array_count, &smbios_tables, &smbios_tables_len, &smbios_anchor, &smbios_anchor_len); g_free(mem_array); @@ -1516,12 +1516,16 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level) } } -static void pc_new_cpu(const char *typename, int64_t apic_id, Error **errp) +static void pc_new_cpu(PCMachineState *pcms, int64_t apic_id, Error **errp) { Object *cpu = NULL; Error *local_err = NULL; + CPUX86State *env = NULL; + + cpu = object_new(MACHINE(pcms)->cpu_type); - cpu = object_new(typename); + env = &X86_CPU(cpu)->env; + env->nr_dies = pcms->smp_dies; object_property_set_uint(cpu, apic_id, "apic-id", &local_err); object_property_set_bool(cpu, true, "realized", &local_err); @@ -1530,10 +1534,90 @@ static void pc_new_cpu(const char *typename, int64_t apic_id, Error **errp) error_propagate(errp, local_err); } -void pc_hot_add_cpu(const int64_t id, Error **errp) +/* + * This function is very similar to smp_parse() + * in hw/core/machine.c but includes CPU die support. + */ +void pc_smp_parse(MachineState *ms, QemuOpts *opts) +{ + PCMachineState *pcms = PC_MACHINE(ms); + + if (opts) { + unsigned cpus = qemu_opt_get_number(opts, "cpus", 0); + unsigned sockets = qemu_opt_get_number(opts, "sockets", 0); + unsigned dies = qemu_opt_get_number(opts, "dies", 1); + unsigned cores = qemu_opt_get_number(opts, "cores", 0); + unsigned threads = qemu_opt_get_number(opts, "threads", 0); + + /* compute missing values, prefer sockets over cores over threads */ + if (cpus == 0 || sockets == 0) { + cores = cores > 0 ? cores : 1; + threads = threads > 0 ? threads : 1; + if (cpus == 0) { + sockets = sockets > 0 ? sockets : 1; + cpus = cores * threads * dies * sockets; + } else { + ms->smp.max_cpus = + qemu_opt_get_number(opts, "maxcpus", cpus); + sockets = ms->smp.max_cpus / (cores * threads * dies); + } + } else if (cores == 0) { + threads = threads > 0 ? threads : 1; + cores = cpus / (sockets * dies * threads); + cores = cores > 0 ? cores : 1; + } else if (threads == 0) { + threads = cpus / (cores * dies * sockets); + threads = threads > 0 ? threads : 1; + } else if (sockets * dies * cores * threads < cpus) { + error_report("cpu topology: " + "sockets (%u) * dies (%u) * cores (%u) * threads (%u) < " + "smp_cpus (%u)", + sockets, dies, cores, threads, cpus); + exit(1); + } + + ms->smp.max_cpus = + qemu_opt_get_number(opts, "maxcpus", cpus); + + if (ms->smp.max_cpus < cpus) { + error_report("maxcpus must be equal to or greater than smp"); + exit(1); + } + + if (sockets * dies * cores * threads > ms->smp.max_cpus) { + error_report("cpu topology: " + "sockets (%u) * dies (%u) * cores (%u) * threads (%u) > " + "maxcpus (%u)", + sockets, dies, cores, threads, + ms->smp.max_cpus); + exit(1); + } + + if (sockets * dies * cores * threads != ms->smp.max_cpus) { + warn_report("Invalid CPU topology deprecated: " + "sockets (%u) * dies (%u) * cores (%u) * threads (%u) " + "!= maxcpus (%u)", + sockets, dies, cores, threads, + ms->smp.max_cpus); + } + + ms->smp.cpus = cpus; + ms->smp.cores = cores; + ms->smp.threads = threads; + pcms->smp_dies = dies; + } + + if (ms->smp.cpus > 1) { + Error *blocker = NULL; + error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED, "smp"); + replay_add_blocker(blocker); + } +} + +void pc_hot_add_cpu(MachineState *ms, const int64_t id, Error **errp) { - MachineState *ms = MACHINE(qdev_get_machine()); - int64_t apic_id = x86_cpu_apic_id_from_index(id); + PCMachineState *pcms = PC_MACHINE(ms); + int64_t apic_id = x86_cpu_apic_id_from_index(pcms, id); Error *local_err = NULL; if (id < 0) { @@ -1548,7 +1632,7 @@ void pc_hot_add_cpu(const int64_t id, Error **errp) return; } - pc_new_cpu(ms->cpu_type, apic_id, &local_err); + pc_new_cpu(PC_MACHINE(ms), apic_id, &local_err); if (local_err) { error_propagate(errp, local_err); return; @@ -1561,6 +1645,9 @@ void pc_cpus_init(PCMachineState *pcms) const CPUArchIdList *possible_cpus; MachineState *ms = MACHINE(pcms); MachineClass *mc = MACHINE_GET_CLASS(pcms); + PCMachineClass *pcmc = PC_MACHINE_CLASS(mc); + + x86_cpu_set_default_version(pcmc->default_cpu_version); /* Calculates the limit to CPU APIC ID values * @@ -1569,11 +1656,11 @@ void pc_cpus_init(PCMachineState *pcms) * * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init(). */ - pcms->apic_id_limit = x86_cpu_apic_id_from_index(max_cpus - 1) + 1; + pcms->apic_id_limit = x86_cpu_apic_id_from_index(pcms, + ms->smp.max_cpus - 1) + 1; possible_cpus = mc->possible_cpu_arch_ids(ms); - for (i = 0; i < smp_cpus; i++) { - pc_new_cpu(possible_cpus->cpus[i].type, possible_cpus->cpus[i].arch_id, - &error_fatal); + for (i = 0; i < ms->smp.cpus; i++) { + pc_new_cpu(pcms, possible_cpus->cpus[i].arch_id, &error_fatal); } } @@ -2293,8 +2380,11 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, CPUArchId *cpu_slot; X86CPUTopoInfo topo; X86CPU *cpu = X86_CPU(dev); + CPUX86State *env = &cpu->env; MachineState *ms = MACHINE(hotplug_dev); PCMachineState *pcms = PC_MACHINE(hotplug_dev); + unsigned int smp_cores = ms->smp.cores; + unsigned int smp_threads = ms->smp.threads; if(!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", @@ -2302,9 +2392,15 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, return; } - /* if APIC ID is not set, set it based on socket/core/thread properties */ + env->nr_dies = pcms->smp_dies; + + /* + * If APIC ID is not set, + * set it based on socket/die/core/thread properties. + */ if (cpu->apic_id == UNASSIGNED_APIC_ID) { - int max_socket = (max_cpus - 1) / smp_threads / smp_cores; + int max_socket = (ms->smp.max_cpus - 1) / + smp_threads / smp_cores / pcms->smp_dies; if (cpu->socket_id < 0) { error_setg(errp, "CPU socket-id is not set"); @@ -2313,6 +2409,10 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, error_setg(errp, "Invalid CPU socket-id: %u must be in range 0:%u", cpu->socket_id, max_socket); return; + } else if (cpu->die_id > pcms->smp_dies - 1) { + error_setg(errp, "Invalid CPU die-id: %u must be in range 0:%u", + cpu->die_id, max_socket); + return; } if (cpu->core_id < 0) { error_setg(errp, "CPU core-id is not set"); @@ -2332,20 +2432,24 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, } topo.pkg_id = cpu->socket_id; + topo.die_id = cpu->die_id; topo.core_id = cpu->core_id; topo.smt_id = cpu->thread_id; - cpu->apic_id = apicid_from_topo_ids(smp_cores, smp_threads, &topo); + cpu->apic_id = apicid_from_topo_ids(pcms->smp_dies, smp_cores, + smp_threads, &topo); } cpu_slot = pc_find_cpu_slot(MACHINE(pcms), cpu->apic_id, &idx); if (!cpu_slot) { MachineState *ms = MACHINE(pcms); - x86_topo_ids_from_apicid(cpu->apic_id, smp_cores, smp_threads, &topo); - error_setg(errp, "Invalid CPU [socket: %u, core: %u, thread: %u] with" - " APIC ID %" PRIu32 ", valid index range 0:%d", - topo.pkg_id, topo.core_id, topo.smt_id, cpu->apic_id, - ms->possible_cpus->len - 1); + x86_topo_ids_from_apicid(cpu->apic_id, pcms->smp_dies, + smp_cores, smp_threads, &topo); + error_setg(errp, + "Invalid CPU [socket: %u, die: %u, core: %u, thread: %u] with" + " APIC ID %" PRIu32 ", valid index range 0:%d", + topo.pkg_id, topo.die_id, topo.core_id, topo.smt_id, + cpu->apic_id, ms->possible_cpus->len - 1); return; } @@ -2361,7 +2465,8 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, /* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn() * once -smp refactoring is complete and there will be CPU private * CPUState::nr_cores and CPUState::nr_threads fields instead of globals */ - x86_topo_ids_from_apicid(cpu->apic_id, smp_cores, smp_threads, &topo); + x86_topo_ids_from_apicid(cpu->apic_id, pcms->smp_dies, + smp_cores, smp_threads, &topo); if (cpu->socket_id != -1 && cpu->socket_id != topo.pkg_id) { error_setg(errp, "property socket-id: %u doesn't match set apic-id:" " 0x%x (socket-id: %u)", cpu->socket_id, cpu->apic_id, topo.pkg_id); @@ -2369,6 +2474,13 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, } cpu->socket_id = topo.pkg_id; + if (cpu->die_id != -1 && cpu->die_id != topo.die_id) { + error_setg(errp, "property die-id: %u doesn't match set apic-id:" + " 0x%x (die-id: %u)", cpu->die_id, cpu->apic_id, topo.die_id); + return; + } + cpu->die_id = topo.die_id; + if (cpu->core_id != -1 && cpu->core_id != topo.core_id) { error_setg(errp, "property core-id: %u doesn't match set apic-id:" " 0x%x (core-id: %u)", cpu->core_id, cpu->apic_id, topo.core_id); @@ -2395,6 +2507,65 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, numa_cpu_pre_plug(cpu_slot, dev, errp); } +static void pc_virtio_pmem_pci_pre_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + HotplugHandler *hotplug_dev2 = qdev_get_bus_hotplug_handler(dev); + Error *local_err = NULL; + + if (!hotplug_dev2) { + /* + * Without a bus hotplug handler, we cannot control the plug/unplug + * order. This should never be the case on x86, however better add + * a safety net. + */ + error_setg(errp, "virtio-pmem-pci not supported on this bus."); + return; + } + /* + * First, see if we can plug this memory device at all. If that + * succeeds, branch of to the actual hotplug handler. + */ + memory_device_pre_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev), NULL, + &local_err); + if (!local_err) { + hotplug_handler_pre_plug(hotplug_dev2, dev, &local_err); + } + error_propagate(errp, local_err); +} + +static void pc_virtio_pmem_pci_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + HotplugHandler *hotplug_dev2 = qdev_get_bus_hotplug_handler(dev); + Error *local_err = NULL; + + /* + * Plug the memory device first and then branch off to the actual + * hotplug handler. If that one fails, we can easily undo the memory + * device bits. + */ + memory_device_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev)); + hotplug_handler_plug(hotplug_dev2, dev, &local_err); + if (local_err) { + memory_device_unplug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev)); + } + error_propagate(errp, local_err); +} + +static void pc_virtio_pmem_pci_unplug_request(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + /* We don't support virtio pmem hot unplug */ + error_setg(errp, "virtio pmem device unplug not supported."); +} + +static void pc_virtio_pmem_pci_unplug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + /* We don't support virtio pmem hot unplug */ +} + static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -2402,6 +2573,8 @@ static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, pc_memory_pre_plug(hotplug_dev, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { pc_cpu_pre_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) { + pc_virtio_pmem_pci_pre_plug(hotplug_dev, dev, errp); } } @@ -2412,6 +2585,8 @@ static void pc_machine_device_plug_cb(HotplugHandler *hotplug_dev, pc_memory_plug(hotplug_dev, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { pc_cpu_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) { + pc_virtio_pmem_pci_plug(hotplug_dev, dev, errp); } } @@ -2422,6 +2597,8 @@ static void pc_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev, pc_memory_unplug_request(hotplug_dev, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { pc_cpu_unplug_request_cb(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) { + pc_virtio_pmem_pci_unplug_request(hotplug_dev, dev, errp); } else { error_setg(errp, "acpi: device unplug request for not supported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -2435,6 +2612,8 @@ static void pc_machine_device_unplug_cb(HotplugHandler *hotplug_dev, pc_memory_unplug(hotplug_dev, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { pc_cpu_unplug_cb(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) { + pc_virtio_pmem_pci_unplug(hotplug_dev, dev, errp); } else { error_setg(errp, "acpi: device unplug for not supported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -2445,7 +2624,8 @@ static HotplugHandler *pc_get_hotplug_handler(MachineState *machine, DeviceState *dev) { if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) || - object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + object_dynamic_cast(OBJECT(dev), TYPE_CPU) || + object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI)) { return HOTPLUG_HANDLER(machine); } @@ -2458,7 +2638,11 @@ pc_machine_get_device_memory_region_size(Object *obj, Visitor *v, Error **errp) { MachineState *ms = MACHINE(obj); - int64_t value = memory_region_size(&ms->device_memory->mr); + int64_t value = 0; + + if (ms->device_memory) { + value = memory_region_size(&ms->device_memory->mr); + } visit_type_int(v, name, &value, errp); } @@ -2615,11 +2799,12 @@ static void pc_machine_initfn(Object *obj) pcms->smbus_enabled = true; pcms->sata_enabled = true; pcms->pit_enabled = true; + pcms->smp_dies = 1; pc_system_flash_create(pcms); } -static void pc_machine_reset(void) +static void pc_machine_reset(MachineState *machine) { CPUState *cs; X86CPU *cpu; @@ -2651,16 +2836,20 @@ pc_cpu_index_to_props(MachineState *ms, unsigned cpu_index) static int64_t pc_get_default_cpu_node_id(const MachineState *ms, int idx) { X86CPUTopoInfo topo; + PCMachineState *pcms = PC_MACHINE(ms); assert(idx < ms->possible_cpus->len); x86_topo_ids_from_apicid(ms->possible_cpus->cpus[idx].arch_id, - smp_cores, smp_threads, &topo); + pcms->smp_dies, ms->smp.cores, + ms->smp.threads, &topo); return topo.pkg_id % nb_numa_nodes; } static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms) { + PCMachineState *pcms = PC_MACHINE(ms); int i; + unsigned int max_cpus = ms->smp.max_cpus; if (ms->possible_cpus) { /* @@ -2679,11 +2868,14 @@ static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms) ms->possible_cpus->cpus[i].type = ms->cpu_type; ms->possible_cpus->cpus[i].vcpus_count = 1; - ms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(i); + ms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(pcms, i); x86_topo_ids_from_apicid(ms->possible_cpus->cpus[i].arch_id, - smp_cores, smp_threads, &topo); + pcms->smp_dies, ms->smp.cores, + ms->smp.threads, &topo); ms->possible_cpus->cpus[i].props.has_socket_id = true; ms->possible_cpus->cpus[i].props.socket_id = topo.pkg_id; + ms->possible_cpus->cpus[i].props.has_die_id = true; + ms->possible_cpus->cpus[i].props.die_id = topo.die_id; ms->possible_cpus->cpus[i].props.has_core_id = true; ms->possible_cpus->cpus[i].props.core_id = topo.core_id; ms->possible_cpus->cpus[i].props.has_thread_id = true; @@ -2739,6 +2931,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->has_hotpluggable_cpus = true; mc->default_boot_order = "cad"; mc->hot_add_cpu = pc_hot_add_cpu; + mc->smp_parse = pc_smp_parse; mc->block_default_type = IF_IDE; mc->max_cpus = 255; mc->reset = pc_machine_reset; @@ -2749,6 +2942,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) nc->nmi_monitor_handler = x86_nmi; mc->default_cpu_type = TARGET_DEFAULT_CPU_TYPE; mc->nvdimm_supported = true; + mc->numa_mem_supported = true; object_class_property_add(oc, PC_MACHINE_DEVMEM_REGION_SIZE, "int", pc_machine_get_device_memory_region_size, NULL, diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index c07c4a5b38..c2280c72ef 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -23,6 +23,7 @@ */ #include "qemu/osdep.h" +#include "config-devices.h" #include "qemu/units.h" #include "hw/hw.h" @@ -61,9 +62,11 @@ #define MAX_IDE_BUS 2 +#ifdef CONFIG_IDE_ISA static const int ide_iobase[MAX_IDE_BUS] = { 0x1f0, 0x170 }; static const int ide_iobase2[MAX_IDE_BUS] = { 0x3f6, 0x376 }; static const int ide_irq[MAX_IDE_BUS] = { 14, 15 }; +#endif /* PC hardware initialisation */ static void pc_init1(MachineState *machine, @@ -254,7 +257,10 @@ static void pc_init1(MachineState *machine, } idebus[0] = qdev_get_child_bus(&dev->qdev, "ide.0"); idebus[1] = qdev_get_child_bus(&dev->qdev, "ide.1"); - } else { + pc_cmos_init(pcms, idebus[0], idebus[1], rtc_state); + } +#ifdef CONFIG_IDE_ISA +else { for(i = 0; i < MAX_IDE_BUS; i++) { ISADevice *dev; char busname[] = "ide.0"; @@ -268,9 +274,9 @@ static void pc_init1(MachineState *machine, busname[4] = '0' + i; idebus[i] = qdev_get_child_bus(DEVICE(dev), busname); } + pc_cmos_init(pcms, idebus[0], idebus[1], rtc_state); } - - pc_cmos_init(pcms, idebus[0], idebus[1], rtc_state); +#endif if (pcmc->pci_enabled && machine_usb(machine)) { pci_create_simple(pci_bus, piix3_devfn + 2, "piix3-usb-uhci"); @@ -358,7 +364,6 @@ static void pc_compat_1_4_fn(MachineState *machine) static void pc_compat_1_3(MachineState *machine) { pc_compat_1_4_fn(machine); - enable_compat_apic_id_mode(); } /* PC compat function for pc-0.14 to pc-1.2 */ @@ -430,9 +435,11 @@ static void pc_i440fx_machine_options(MachineClass *m) static void pc_i440fx_4_1_machine_options(MachineClass *m) { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_i440fx_machine_options(m); m->alias = "pc"; m->is_default = 1; + pcmc->default_cpu_version = 1; } DEFINE_I440FX_MACHINE(v4_1, "pc-i440fx-4.1", NULL, @@ -440,9 +447,11 @@ DEFINE_I440FX_MACHINE(v4_1, "pc-i440fx-4.1", NULL, static void pc_i440fx_4_0_machine_options(MachineClass *m) { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_i440fx_4_1_machine_options(m); m->alias = NULL; m->is_default = 0; + pcmc->default_cpu_version = CPU_VERSION_LEGACY; compat_props_add(m->compat_props, hw_compat_4_0, hw_compat_4_0_len); compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len); } @@ -708,6 +717,7 @@ DEFINE_I440FX_MACHINE(v1_4, "pc-i440fx-1.4", pc_compat_1_4_fn, static void pc_i440fx_1_3_machine_options(MachineClass *m) { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); static GlobalProperty compat[] = { PC_CPU_MODEL_IDS("1.3.0") { "usb-tablet", "usb_version", "1" }, @@ -718,6 +728,7 @@ static void pc_i440fx_1_3_machine_options(MachineClass *m) pc_i440fx_1_4_machine_options(m); m->hw_version = "1.3.0"; + pcmc->compat_apic_id_mode = true; compat_props_add(m->compat_props, compat, G_N_ELEMENTS(compat)); } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 57232aed6b..397e1fdd2f 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -367,8 +367,10 @@ static void pc_q35_machine_options(MachineClass *m) static void pc_q35_4_1_machine_options(MachineClass *m) { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_q35_machine_options(m); m->alias = "q35"; + pcmc->default_cpu_version = 1; } DEFINE_Q35_MACHINE(v4_1, "pc-q35-4.1", NULL, @@ -376,8 +378,10 @@ DEFINE_Q35_MACHINE(v4_1, "pc-q35-4.1", NULL, static void pc_q35_4_0_1_machine_options(MachineClass *m) { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_q35_4_1_machine_options(m); m->alias = NULL; + pcmc->default_cpu_version = CPU_VERSION_LEGACY; /* * This is the default machine for the 4.0-stable branch. It is basically * a 4.0 that doesn't use split irqchip by default. It MUST hence apply the diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c index 469f1260a4..e8e79e0917 100644 --- a/hw/i386/xen/xen-hvm.c +++ b/hw/i386/xen/xen-hvm.c @@ -756,6 +756,8 @@ static ioreq_t *cpu_get_ioreq_from_shared_memory(XenIOState *state, int vcpu) /* retval--the number of ioreq packet */ static ioreq_t *cpu_get_ioreq(XenIOState *state) { + MachineState *ms = MACHINE(qdev_get_machine()); + unsigned int max_cpus = ms->smp.max_cpus; int i; evtchn_port_t port; @@ -1383,6 +1385,8 @@ static int xen_map_ioreq_server(XenIOState *state) void xen_hvm_init(PCMachineState *pcms, MemoryRegion **ram_memory) { + MachineState *ms = MACHINE(pcms); + unsigned int max_cpus = ms->smp.max_cpus; int i, rc; xen_pfn_t ioreq_pfn; XenIOState *state; diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c index b8ede30b3c..9f8f0d3ff5 100644 --- a/hw/intc/armv7m_nvic.c +++ b/hw/intc/armv7m_nvic.c @@ -812,15 +812,45 @@ void armv7m_nvic_get_pending_irq_info(void *opaque, int armv7m_nvic_complete_irq(void *opaque, int irq, bool secure) { NVICState *s = (NVICState *)opaque; - VecInfo *vec; + VecInfo *vec = NULL; int ret; assert(irq > ARMV7M_EXCP_RESET && irq < s->num_irq); - if (secure && exc_is_banked(irq)) { - vec = &s->sec_vectors[irq]; - } else { - vec = &s->vectors[irq]; + /* + * For negative priorities, v8M will forcibly deactivate the appropriate + * NMI or HardFault regardless of what interrupt we're being asked to + * deactivate (compare the DeActivate() pseudocode). This is a guard + * against software returning from NMI or HardFault with a corrupted + * IPSR and leaving the CPU in a negative-priority state. + * v7M does not do this, but simply deactivates the requested interrupt. + */ + if (arm_feature(&s->cpu->env, ARM_FEATURE_V8)) { + switch (armv7m_nvic_raw_execution_priority(s)) { + case -1: + if (s->cpu->env.v7m.aircr & R_V7M_AIRCR_BFHFNMINS_MASK) { + vec = &s->vectors[ARMV7M_EXCP_HARD]; + } else { + vec = &s->sec_vectors[ARMV7M_EXCP_HARD]; + } + break; + case -2: + vec = &s->vectors[ARMV7M_EXCP_NMI]; + break; + case -3: + vec = &s->sec_vectors[ARMV7M_EXCP_HARD]; + break; + default: + break; + } + } + + if (!vec) { + if (secure && exc_is_banked(irq)) { + vec = &s->sec_vectors[irq]; + } else { + vec = &s->vectors[irq]; + } } trace_nvic_complete_irq(irq, secure); @@ -830,7 +860,19 @@ int armv7m_nvic_complete_irq(void *opaque, int irq, bool secure) return -1; } - ret = nvic_rettobase(s); + /* + * If this is a configurable exception and it is currently + * targeting the opposite security state from the one we're trying + * to complete it for, this counts as an illegal exception return. + * We still need to deactivate whatever vector the logic above has + * selected, though, as it might not be the same as the one for the + * requested exception number. + */ + if (!exc_is_banked(irq) && exc_targets_secure(s, irq) != secure) { + ret = -1; + } else { + ret = nvic_rettobase(s); + } vec->active = 0; if (vec->level) { diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c index 7074489fdf..c408749876 100644 --- a/hw/intc/ioapic.c +++ b/hw/intc/ioapic.c @@ -245,8 +245,8 @@ void ioapic_eoi_broadcast(int vector) s->ioredtbl[n] = entry & ~IOAPIC_LVT_REMOTE_IRR; if (!(entry & IOAPIC_LVT_MASKED) && (s->irr & (1 << n))) { - ++s->irq_eoi[vector]; - if (s->irq_eoi[vector] >= SUCCESSIVE_IRQ_MAX_COUNT) { + ++s->irq_eoi[n]; + if (s->irq_eoi[n] >= SUCCESSIVE_IRQ_MAX_COUNT) { /* * Real hardware does not deliver the interrupt immediately * during eoi broadcast, and this lets a buggy guest make @@ -254,16 +254,16 @@ void ioapic_eoi_broadcast(int vector) * level-triggered interrupt. Emulate this behavior if we * detect an interrupt storm. */ - s->irq_eoi[vector] = 0; + s->irq_eoi[n] = 0; timer_mod_anticipate(s->delayed_ioapic_service_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + NANOSECONDS_PER_SECOND / 100); - trace_ioapic_eoi_delayed_reassert(vector); + trace_ioapic_eoi_delayed_reassert(n); } else { ioapic_service(s); } } else { - s->irq_eoi[vector] = 0; + s->irq_eoi[n] = 0; } } } @@ -380,6 +380,7 @@ ioapic_mem_write(void *opaque, hwaddr addr, uint64_t val, /* restore RO bits */ s->ioredtbl[index] &= IOAPIC_RW_BITS; s->ioredtbl[index] |= ro_bits; + s->irq_eoi[index] = 0; ioapic_fix_edge_remote_irr(&s->ioredtbl[index]); ioapic_service(s); } diff --git a/hw/mips/Kconfig b/hw/mips/Kconfig index cdc07e59b6..62aa01b29e 100644 --- a/hw/mips/Kconfig +++ b/hw/mips/Kconfig @@ -1,14 +1,44 @@ config R4K bool + select ISA_BUS + select SERIAL_ISA + select I8259 + select I8254 + select MC146818RTC + imply VGA_ISA + imply NE2000_ISA + select IDE_ISA + select PCKBD + select PFLASH_CFI01 config MALTA bool config MIPSSIM bool + select ISA_BUS + select SERIAL_ISA + select MIPSNET config JAZZ bool + select ISA_BUS + select RC4030 + select I8259 + select I8254 + select I8257 + select PCSPK + select VGA_ISA_MM + select G364FB + select DP8393X + select ESP + select FDC + select MC146818RTC + select PCKBD + select SERIAL + select PARALLEL + select DS1225Y + select JAZZ_LED config FULONG bool diff --git a/hw/mips/boston.c b/hw/mips/boston.c index 749582e5aa..9eeccbea9a 100644 --- a/hw/mips/boston.c +++ b/hw/mips/boston.c @@ -458,7 +458,7 @@ static void boston_mach_init(MachineState *machine) sizeof(s->cps), TYPE_MIPS_CPS); object_property_set_str(OBJECT(&s->cps), machine->cpu_type, "cpu-type", &err); - object_property_set_int(OBJECT(&s->cps), smp_cpus, "num-vp", &err); + object_property_set_int(OBJECT(&s->cps), machine->smp.cpus, "num-vp", &err); object_property_set_bool(OBJECT(&s->cps), true, "realized", &err); if (err != NULL) { diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c index 37ec89b07e..20e019bf66 100644 --- a/hw/mips/mips_malta.c +++ b/hw/mips/mips_malta.c @@ -1095,6 +1095,8 @@ static int64_t load_kernel (void) static void malta_mips_config(MIPSCPU *cpu) { + MachineState *ms = MACHINE(qdev_get_machine()); + unsigned int smp_cpus = ms->smp.cpus; CPUMIPSState *env = &cpu->env; CPUState *cs = CPU(cpu); @@ -1124,15 +1126,15 @@ static void main_cpu_reset(void *opaque) } } -static void create_cpu_without_cps(const char *cpu_type, +static void create_cpu_without_cps(MachineState *ms, qemu_irq *cbus_irq, qemu_irq *i8259_irq) { CPUMIPSState *env; MIPSCPU *cpu; int i; - for (i = 0; i < smp_cpus; i++) { - cpu = MIPS_CPU(cpu_create(cpu_type)); + for (i = 0; i < ms->smp.cpus; i++) { + cpu = MIPS_CPU(cpu_create(ms->cpu_type)); /* Init internal devices */ cpu_mips_irq_init_cpu(cpu); @@ -1146,15 +1148,15 @@ static void create_cpu_without_cps(const char *cpu_type, *cbus_irq = env->irq[4]; } -static void create_cps(MaltaState *s, const char *cpu_type, +static void create_cps(MachineState *ms, MaltaState *s, qemu_irq *cbus_irq, qemu_irq *i8259_irq) { Error *err = NULL; sysbus_init_child_obj(OBJECT(s), "cps", OBJECT(&s->cps), sizeof(s->cps), TYPE_MIPS_CPS); - object_property_set_str(OBJECT(&s->cps), cpu_type, "cpu-type", &err); - object_property_set_int(OBJECT(&s->cps), smp_cpus, "num-vp", &err); + object_property_set_str(OBJECT(&s->cps), ms->cpu_type, "cpu-type", &err); + object_property_set_int(OBJECT(&s->cps), ms->smp.cpus, "num-vp", &err); object_property_set_bool(OBJECT(&s->cps), true, "realized", &err); if (err != NULL) { error_report("%s", error_get_pretty(err)); @@ -1167,13 +1169,13 @@ static void create_cps(MaltaState *s, const char *cpu_type, *cbus_irq = NULL; } -static void mips_create_cpu(MaltaState *s, const char *cpu_type, +static void mips_create_cpu(MachineState *ms, MaltaState *s, qemu_irq *cbus_irq, qemu_irq *i8259_irq) { - if ((smp_cpus > 1) && cpu_supports_cps_smp(cpu_type)) { - create_cps(s, cpu_type, cbus_irq, i8259_irq); + if ((ms->smp.cpus > 1) && cpu_supports_cps_smp(ms->cpu_type)) { + create_cps(ms, s, cbus_irq, i8259_irq); } else { - create_cpu_without_cps(cpu_type, cbus_irq, i8259_irq); + create_cpu_without_cps(ms, cbus_irq, i8259_irq); } } @@ -1217,7 +1219,7 @@ void mips_malta_init(MachineState *machine) qdev_init_nofail(dev); /* create CPU */ - mips_create_cpu(s, machine->cpu_type, &cbus_irq, &i8259_irq); + mips_create_cpu(machine, s, &cbus_irq, &i8259_irq); /* allocate RAM */ if (ram_size > 2 * GiB) { diff --git a/hw/net/sunhme.c b/hw/net/sunhme.c index 1ebaee3c82..8b8603e696 100644 --- a/hw/net/sunhme.c +++ b/hw/net/sunhme.c @@ -44,6 +44,7 @@ #define HME_SEBI_STAT 0x100 #define HME_SEBI_STAT_LINUXBUG 0x108 #define HME_SEB_STAT_RXTOHOST 0x10000 +#define HME_SEB_STAT_NORXD 0x20000 #define HME_SEB_STAT_MIFIRQ 0x800000 #define HME_SEB_STAT_HOSTTOTX 0x1000000 #define HME_SEB_STAT_TXALL 0x2000000 @@ -209,6 +210,8 @@ static void sunhme_update_irq(SunHMEState *s) } level = (seb ? 1 : 0); + trace_sunhme_update_irq(mifmask, mif, sebmask, seb, level); + pci_set_irq(d, level); } @@ -371,10 +374,20 @@ static void sunhme_mac_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) { SunHMEState *s = SUNHME(opaque); + uint64_t oldval = s->macregs[addr >> 2]; trace_sunhme_mac_write(addr, val); s->macregs[addr >> 2] = val; + + switch (addr) { + case HME_MACI_RXCFG: + if (!(oldval & HME_MAC_RXCFG_ENABLE) && + (val & HME_MAC_RXCFG_ENABLE)) { + qemu_flush_queued_packets(qemu_get_queue(s->nic)); + } + break; + } } static uint64_t sunhme_mac_read(void *opaque, hwaddr addr, @@ -647,7 +660,7 @@ static int sunhme_can_receive(NetClientState *nc) { SunHMEState *s = qemu_get_nic_opaque(nc); - return s->macregs[HME_MAC_RXCFG_ENABLE >> 2] & HME_MAC_RXCFG_ENABLE; + return s->macregs[HME_MACI_RXCFG >> 2] & HME_MAC_RXCFG_ENABLE; } static void sunhme_link_status_changed(NetClientState *nc) @@ -716,7 +729,7 @@ static ssize_t sunhme_receive(NetClientState *nc, const uint8_t *buf, /* Do nothing if MAC RX disabled */ if (!(s->macregs[HME_MACI_RXCFG >> 2] & HME_MAC_RXCFG_ENABLE)) { - return -1; + return 0; } trace_sunhme_rx_filter_destmac(buf[0], buf[1], buf[2], @@ -745,14 +758,14 @@ static ssize_t sunhme_receive(NetClientState *nc, const uint8_t *buf, /* Didn't match hash filter */ trace_sunhme_rx_filter_hash_nomatch(); trace_sunhme_rx_filter_reject(); - return 0; + return -1; } else { trace_sunhme_rx_filter_hash_match(); } } else { /* Not for us */ trace_sunhme_rx_filter_reject(); - return 0; + return -1; } } else { trace_sunhme_rx_filter_promisc_match(); @@ -775,6 +788,14 @@ static ssize_t sunhme_receive(NetClientState *nc, const uint8_t *buf, pci_dma_read(d, rb + cr * HME_DESC_SIZE, &status, 4); pci_dma_read(d, rb + cr * HME_DESC_SIZE + 4, &buffer, 4); + /* If we don't own the current descriptor then indicate overflow error */ + if (!(status & HME_XD_OWN)) { + s->sebregs[HME_SEBI_STAT >> 2] |= HME_SEB_STAT_NORXD; + sunhme_update_irq(s); + trace_sunhme_rx_norxd(); + return -1; + } + rxoffset = (s->erxregs[HME_ERXI_CFG >> 2] & HME_ERX_CFG_BYTEOFFSET) >> HME_ERX_CFG_BYTEOFFSET_SHIFT; diff --git a/hw/net/trace-events b/hw/net/trace-events index 3cd9e122df..58665655cc 100644 --- a/hw/net/trace-events +++ b/hw/net/trace-events @@ -359,6 +359,8 @@ sunhme_rx_filter_reject(void) "rejecting incoming frame" sunhme_rx_filter_accept(void) "accepting incoming frame" sunhme_rx_desc(uint32_t addr, int offset, uint32_t status, int len, int cr, int nr) "addr 0x%"PRIx32"(+0x%x) status 0x%"PRIx32 " len %d (ring %d/%d)" sunhme_rx_xsum_calc(uint16_t xsum) "calculated incoming xsum as 0x%x" +sunhme_rx_norxd(void) "no free rx descriptors available" +sunhme_update_irq(uint32_t mifmask, uint32_t mif, uint32_t sebmask, uint32_t seb, int level) "mifmask: 0x%x mif: 0x%x sebmask: 0x%x seb: 0x%x level: %d" # virtio-net.c virtio_net_announce_notify(void) "" diff --git a/hw/openrisc/openrisc_sim.c b/hw/openrisc/openrisc_sim.c index 87b9feaa96..b85f0df323 100644 --- a/hw/openrisc/openrisc_sim.c +++ b/hw/openrisc/openrisc_sim.c @@ -130,6 +130,7 @@ static void openrisc_sim_init(MachineState *machine) qemu_irq *cpu_irqs[2]; qemu_irq serial_irq; int n; + unsigned int smp_cpus = machine->smp.cpus; for (n = 0; n < smp_cpus; n++) { cpu = OPENRISC_CPU(cpu_create(machine->cpu_type)); diff --git a/hw/pci-bridge/pcie_root_port.c b/hw/pci-bridge/pcie_root_port.c index 92f253c924..09019ca05d 100644 --- a/hw/pci-bridge/pcie_root_port.c +++ b/hw/pci-bridge/pcie_root_port.c @@ -31,10 +31,13 @@ static void rp_write_config(PCIDevice *d, uint32_t address, { uint32_t root_cmd = pci_get_long(d->config + d->exp.aer_cap + PCI_ERR_ROOT_COMMAND); + uint16_t slt_ctl, slt_sta; + + pcie_cap_slot_get(d, &slt_ctl, &slt_sta); pci_bridge_write_config(d, address, val, len); rp_aer_vector_update(d); - pcie_cap_slot_write_config(d, address, val, len); + pcie_cap_slot_write_config(d, slt_ctl, slt_sta, address, val, len); pcie_aer_write_config(d, address, val, len); pcie_aer_root_write_config(d, address, val, len, root_cmd); } diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c index 264e37d6a6..899b0fd6c9 100644 --- a/hw/pci-bridge/xio3130_downstream.c +++ b/hw/pci-bridge/xio3130_downstream.c @@ -41,9 +41,12 @@ static void xio3130_downstream_write_config(PCIDevice *d, uint32_t address, uint32_t val, int len) { + uint16_t slt_ctl, slt_sta; + + pcie_cap_slot_get(d, &slt_sta, &slt_ctl); pci_bridge_write_config(d, address, val, len); pcie_cap_flr_write_config(d, address, val, len); - pcie_cap_slot_write_config(d, address, val, len); + pcie_cap_slot_write_config(d, slt_ctl, slt_sta, address, val, len); pcie_aer_write_config(d, address, val, len); } diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 88c30ff74c..a6beb567bd 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -383,7 +383,7 @@ static void pcie_cap_slot_event(PCIDevice *dev, PCIExpressHotPlugEvent event) { /* Minor optimization: if nothing changed - no event is needed. */ if (pci_word_test_and_set_mask(dev->config + dev->exp.exp_cap + - PCI_EXP_SLTSTA, event)) { + PCI_EXP_SLTSTA, event) == event) { return; } hotplug_event_notify(dev); @@ -594,7 +594,16 @@ void pcie_cap_slot_reset(PCIDevice *dev) hotplug_event_update_event_status(dev); } +void pcie_cap_slot_get(PCIDevice *dev, uint16_t *slt_ctl, uint16_t *slt_sta) +{ + uint32_t pos = dev->exp.exp_cap; + uint8_t *exp_cap = dev->config + pos; + *slt_ctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL); + *slt_sta = pci_get_word(exp_cap + PCI_EXP_SLTSTA); +} + void pcie_cap_slot_write_config(PCIDevice *dev, + uint16_t old_slt_ctl, uint16_t old_slt_sta, uint32_t addr, uint32_t val, int len) { uint32_t pos = dev->exp.exp_cap; @@ -602,6 +611,25 @@ void pcie_cap_slot_write_config(PCIDevice *dev, uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA); if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) { + /* + * Guests tend to clears all bits during init. + * If they clear bits that weren't set this is racy and will lose events: + * not a big problem for manual button presses, but a problem for us. + * As a work-around, detect this and revert status to what it was + * before the write. + * + * Note: in theory this can be detected as a duplicate button press + * which cancels the previous press. Does not seem to happen in + * practice as guests seem to only have this bug during init. + */ +#define PCIE_SLOT_EVENTS (PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD | \ + PCI_EXP_SLTSTA_MRLSC | PCI_EXP_SLTSTA_PDC | \ + PCI_EXP_SLTSTA_CC) + + if (val & ~old_slt_sta & PCIE_SLOT_EVENTS) { + sltsta = (sltsta & ~PCIE_SLOT_EVENTS) | (old_slt_sta & PCIE_SLOT_EVENTS); + pci_set_word(exp_cap + PCI_EXP_SLTSTA, sltsta); + } hotplug_event_clear(dev); } @@ -619,11 +647,17 @@ void pcie_cap_slot_write_config(PCIDevice *dev, } /* - * If the slot is polulated, power indicator is off and power + * If the slot is populated, power indicator is off and power * controller is off, it is safe to detach the devices. + * + * Note: don't detach if condition was already true: + * this is a work around for guests that overwrite + * control of powered off slots before powering them on. */ if ((sltsta & PCI_EXP_SLTSTA_PDS) && (val & PCI_EXP_SLTCTL_PCC) && - ((val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF)) { + (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF && + (!(old_slt_ctl & PCI_EXP_SLTCTL_PCC) || + (old_slt_ctl & PCI_EXP_SLTCTL_PIC_OFF) != PCI_EXP_SLTCTL_PIC_OFF)) { PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev)); pci_for_each_device(sec_bus, pci_bus_num(sec_bus), pcie_unplug_device, NULL); diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index bfda1266af..a3eac7f057 100644 --- a/hw/ppc/e500.c +++ b/hw/ppc/e500.c @@ -308,6 +308,7 @@ static int ppce500_load_device_tree(PPCE500MachineState *pms, bool dry_run) { MachineState *machine = MACHINE(pms); + unsigned int smp_cpus = machine->smp.cpus; const PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(pms); CPUPPCState *env = first_cpu->env_ptr; int ret = -1; @@ -735,6 +736,7 @@ static DeviceState *ppce500_init_mpic_qemu(PPCE500MachineState *pms, SysBusDevice *s; int i, j, k; MachineState *machine = MACHINE(pms); + unsigned int smp_cpus = machine->smp.cpus; const PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(pms); dev = qdev_create(NULL, TYPE_OPENPIC); @@ -847,6 +849,7 @@ void ppce500_init(MachineState *machine) struct boot_info *boot_info; int dt_size; int i; + unsigned int smp_cpus = machine->smp.cpus; /* irq num for pin INTA, INTB, INTC and INTD is 1, 2, 3 and * 4 respectively */ unsigned int pci_irq_nrs[PCI_NUM_PINS] = {1, 2, 3, 4}; diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c index c8d3245524..09bc6068f3 100644 --- a/hw/ppc/mac_newworld.c +++ b/hw/ppc/mac_newworld.c @@ -136,6 +136,7 @@ static void ppc_core99_init(MachineState *machine) DeviceState *dev, *pic_dev; hwaddr nvram_addr = 0xFFF04000; uint64_t tbfreq; + unsigned int smp_cpus = machine->smp.cpus; linux_boot = (kernel_filename != NULL); @@ -463,7 +464,7 @@ static void ppc_core99_init(MachineState *machine) sysbus_mmio_map(s, 1, CFG_ADDR + 2); fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)smp_cpus); - fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus); + fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)machine->smp.max_cpus); fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size); fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, machine_arch); fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, kernel_base); diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c index da751addc4..9ffde5b6f7 100644 --- a/hw/ppc/mac_oldworld.c +++ b/hw/ppc/mac_oldworld.c @@ -101,6 +101,7 @@ static void ppc_heathrow_init(MachineState *machine) DeviceState *dev, *pic_dev; BusState *adb_bus; int bios_size; + unsigned int smp_cpus = machine->smp.cpus; uint16_t ppc_boot_device; DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS]; void *fw_cfg; @@ -324,7 +325,7 @@ static void ppc_heathrow_init(MachineState *machine) sysbus_mmio_map(s, 1, CFG_ADDR + 2); fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)smp_cpus); - fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus); + fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)machine->smp.max_cpus); fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size); fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, ARCH_HEATHROW); fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, kernel_base); diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index b87e01e5b9..bd4531c822 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -529,9 +529,8 @@ static void pnv_powerdown_notify(Notifier *n, void *opaque) } } -static void pnv_reset(void) +static void pnv_reset(MachineState *machine) { - MachineState *machine = MACHINE(qdev_get_machine()); PnvMachineState *pnv = PNV_MACHINE(machine); void *fdt; Object *obj; @@ -689,7 +688,8 @@ static void pnv_init(MachineState *machine) object_property_add_child(OBJECT(pnv), chip_name, chip, &error_fatal); object_property_set_int(chip, PNV_CHIP_HWID(i), "chip-id", &error_fatal); - object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal); + object_property_set_int(chip, machine->smp.cores, + "nr-cores", &error_fatal); object_property_set_bool(chip, true, "realized", &error_fatal); } g_free(chip_typename); @@ -1150,6 +1150,7 @@ static void pnv_chip_core_sanitize(PnvChip *chip, Error **errp) static void pnv_chip_core_realize(PnvChip *chip, Error **errp) { + MachineState *ms = MACHINE(qdev_get_machine()); Error *error = NULL; PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); const char *typename = pnv_chip_core_typename(chip); @@ -1183,7 +1184,7 @@ static void pnv_chip_core_realize(PnvChip *chip, Error **errp) snprintf(core_name, sizeof(core_name), "core[%d]", core_hwid); object_initialize_child(OBJECT(chip), core_name, pnv_core, typesize, typename, &error_fatal, NULL); - object_property_set_int(OBJECT(pnv_core), smp_threads, "nr-threads", + object_property_set_int(OBJECT(pnv_core), ms->smp.threads, "nr-threads", &error_fatal); object_property_set_int(OBJECT(pnv_core), core_hwid, CPU_CORE_PROP_CORE_ID, &error_fatal); diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c index a248ce480d..ab3c1df1fc 100644 --- a/hw/ppc/prep.c +++ b/hw/ppc/prep.c @@ -428,7 +428,7 @@ static void ppc_prep_init(MachineState *machine) linux_boot = (kernel_filename != NULL); /* init CPUs */ - for (i = 0; i < smp_cpus; i++) { + for (i = 0; i < machine->smp.cpus; i++) { cpu = POWERPC_CPU(cpu_create(machine->cpu_type)); env = &cpu->env; @@ -770,7 +770,7 @@ static void ibm_40p_init(MachineState *machine) boot_device = machine->boot_order[0]; } - fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus); + fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)machine->smp.max_cpus); fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)machine->ram_size); fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, ARCH_PREP); diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index b502fcac2e..821f0d4a49 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -106,6 +106,9 @@ */ static int spapr_vcpu_id(SpaprMachineState *spapr, int cpu_index) { + MachineState *ms = MACHINE(spapr); + unsigned int smp_threads = ms->smp.threads; + assert(spapr->vsmt); return (cpu_index / smp_threads) * spapr->vsmt + cpu_index % smp_threads; @@ -153,8 +156,10 @@ static void pre_2_10_vmstate_unregister_dummy_icp(int i) int spapr_max_server_number(SpaprMachineState *spapr) { + MachineState *ms = MACHINE(spapr); + assert(spapr->vsmt); - return DIV_ROUND_UP(max_cpus * spapr->vsmt, smp_threads); + return DIV_ROUND_UP(ms->smp.max_cpus * spapr->vsmt, ms->smp.threads); } static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu, @@ -287,6 +292,7 @@ static void spapr_populate_pa_features(SpaprMachineState *spapr, static int spapr_fixup_cpu_dt(void *fdt, SpaprMachineState *spapr) { + MachineState *ms = MACHINE(spapr); int ret = 0, offset, cpus_offset; CPUState *cs; char cpu_model[32]; @@ -296,7 +302,7 @@ static int spapr_fixup_cpu_dt(void *fdt, SpaprMachineState *spapr) PowerPCCPU *cpu = POWERPC_CPU(cs); DeviceClass *dc = DEVICE_GET_CLASS(cs); int index = spapr_get_vcpu_id(cpu); - int compat_smt = MIN(smp_threads, ppc_compat_max_vthreads(cpu)); + int compat_smt = MIN(ms->smp.threads, ppc_compat_max_vthreads(cpu)); if (!spapr_is_thread0_in_vcore(spapr, cpu)) { continue; @@ -442,6 +448,7 @@ static int spapr_populate_memory(SpaprMachineState *spapr, void *fdt) static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset, SpaprMachineState *spapr) { + MachineState *ms = MACHINE(spapr); PowerPCCPU *cpu = POWERPC_CPU(cs); CPUPPCState *env = &cpu->env; PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs); @@ -453,7 +460,8 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset, uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000; uint32_t page_sizes_prop[64]; size_t page_sizes_prop_size; - uint32_t vcpus_per_socket = smp_threads * smp_cores; + unsigned int smp_threads = ms->smp.threads; + uint32_t vcpus_per_socket = smp_threads * ms->smp.cores; uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)}; int compat_smt = MIN(smp_threads, ppc_compat_max_vthreads(cpu)); SpaprDrc *drc; @@ -1026,6 +1034,7 @@ int spapr_h_cas_compose_response(SpaprMachineState *spapr, static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt) { + MachineState *ms = MACHINE(spapr); int rtas; GString *hypertas = g_string_sized_new(256); GString *qemu_hypertas = g_string_sized_new(256); @@ -1036,7 +1045,7 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt) cpu_to_be32(max_device_addr >> 32), cpu_to_be32(max_device_addr & 0xffffffff), 0, cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE), - cpu_to_be32(max_cpus / smp_threads), + cpu_to_be32(ms->smp.max_cpus / ms->smp.threads), }; uint32_t maxdomain = cpu_to_be32(spapr->gpu_numa_id > 1 ? 1 : 0); uint32_t maxdomains[] = { @@ -1691,9 +1700,8 @@ static int spapr_reset_drcs(Object *child, void *opaque) return 0; } -static void spapr_machine_reset(void) +static void spapr_machine_reset(MachineState *machine) { - MachineState *machine = MACHINE(qdev_get_machine()); SpaprMachineState *spapr = SPAPR_MACHINE(machine); PowerPCCPU *first_ppc_cpu; uint32_t rtas_limit; @@ -2544,7 +2552,7 @@ static void spapr_validate_node_memory(MachineState *machine, Error **errp) /* find cpu slot in machine->possible_cpus by core_id */ static CPUArchId *spapr_find_cpu_slot(MachineState *ms, uint32_t id, int *idx) { - int index = id / smp_threads; + int index = id / ms->smp.threads; if (index >= ms->possible_cpus->len) { return NULL; @@ -2557,10 +2565,12 @@ static CPUArchId *spapr_find_cpu_slot(MachineState *ms, uint32_t id, int *idx) static void spapr_set_vsmt_mode(SpaprMachineState *spapr, Error **errp) { + MachineState *ms = MACHINE(spapr); Error *local_err = NULL; bool vsmt_user = !!spapr->vsmt; int kvm_smt = kvmppc_smt_threads(); int ret; + unsigned int smp_threads = ms->smp.threads; if (!kvm_enabled() && (smp_threads > 1)) { error_setg(&local_err, "TCG cannot support more than 1 thread/core " @@ -2634,6 +2644,9 @@ static void spapr_init_cpus(SpaprMachineState *spapr) SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine); const char *type = spapr_get_cpu_core_type(machine->cpu_type); const CPUArchIdList *possible_cpus; + unsigned int smp_cpus = machine->smp.cpus; + unsigned int smp_threads = machine->smp.threads; + unsigned int max_cpus = machine->smp.max_cpus; int boot_cores_nr = smp_cpus / smp_threads; int i; @@ -3860,6 +3873,7 @@ static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, const char *type = object_get_typename(OBJECT(dev)); CPUArchId *core_slot; int index; + unsigned int smp_threads = machine->smp.threads; if (dev->hotplugged && !mc->has_hotpluggable_cpus) { error_setg(&local_err, "CPU hotplug not supported for this machine"); @@ -4125,14 +4139,16 @@ spapr_cpu_index_to_props(MachineState *machine, unsigned cpu_index) static int64_t spapr_get_default_cpu_node_id(const MachineState *ms, int idx) { - return idx / smp_cores % nb_numa_nodes; + return idx / ms->smp.cores % nb_numa_nodes; } static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine) { int i; + unsigned int smp_threads = machine->smp.threads; + unsigned int smp_cpus = machine->smp.cpus; const char *core_type; - int spapr_max_cores = max_cpus / smp_threads; + int spapr_max_cores = machine->smp.max_cpus / smp_threads; MachineClass *mc = MACHINE_GET_CLASS(machine); if (!mc->has_hotpluggable_cpus) { @@ -4255,6 +4271,7 @@ int spapr_get_vcpu_id(PowerPCCPU *cpu) void spapr_set_vcpu_id(PowerPCCPU *cpu, int cpu_index, Error **errp) { SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + MachineState *ms = MACHINE(spapr); int vcpu_id; vcpu_id = spapr_vcpu_id(spapr, cpu_index); @@ -4263,7 +4280,7 @@ void spapr_set_vcpu_id(PowerPCCPU *cpu, int cpu_index, Error **errp) error_setg(errp, "Can't create CPU with id %d in KVM", vcpu_id); error_append_hint(errp, "Adjust the number of cpus to %d " "or try to raise the number of threads per core\n", - vcpu_id * smp_threads / spapr->vsmt); + vcpu_id * ms->smp.threads / spapr->vsmt); return; } @@ -4350,6 +4367,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) * in which LMBs are represented and hot-added */ mc->numa_mem_align_shift = 28; + mc->numa_mem_supported = true; smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_OFF; smc->default_caps.caps[SPAPR_CAP_VSX] = SPAPR_CAP_ON; diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index 5bc1a93271..a618a2ac0f 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -235,6 +235,8 @@ static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu, target_ulong args, uint32_t nret, target_ulong rets) { + MachineState *ms = MACHINE(qdev_get_machine()); + unsigned int max_cpus = ms->smp.max_cpus; target_ulong parameter = rtas_ld(args, 0); target_ulong buffer = rtas_ld(args, 1); target_ulong length = rtas_ld(args, 2); @@ -248,7 +250,7 @@ static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu, "MaxPlatProcs=%d", max_cpus, current_machine->ram_size / MiB, - smp_cpus, + ms->smp.cpus, max_cpus); ret = sysparm_st(buffer, length, param_val, strlen(param_val) + 1); g_free(param_val); diff --git a/hw/ppc/spapr_rtc.c b/hw/ppc/spapr_rtc.c index af1ef30a53..6cf0113b34 100644 --- a/hw/ppc/spapr_rtc.c +++ b/hw/ppc/spapr_rtc.c @@ -32,7 +32,7 @@ #include "sysemu/sysemu.h" #include "hw/ppc/spapr.h" #include "qapi/error.h" -#include "qapi/qapi-events-target.h" +#include "qapi/qapi-events-misc-target.h" #include "qemu/cutils.h" #include "qemu/module.h" diff --git a/hw/riscv/Makefile.objs b/hw/riscv/Makefile.objs index a65027304a..eb9d4f9ffc 100644 --- a/hw/riscv/Makefile.objs +++ b/hw/riscv/Makefile.objs @@ -1,3 +1,4 @@ +obj-y += boot.o obj-$(CONFIG_SPIKE) += riscv_htif.o obj-$(CONFIG_HART) += riscv_hart.o obj-$(CONFIG_SIFIVE_E) += sifive_e.o diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c new file mode 100644 index 0000000000..ff023f42d0 --- /dev/null +++ b/hw/riscv/boot.c @@ -0,0 +1,105 @@ +/* + * QEMU RISC-V Boot Helper + * + * Copyright (c) 2017 SiFive, Inc. + * Copyright (c) 2019 Alistair Francis <alistair.francis@wdc.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qemu/error-report.h" +#include "exec/cpu-defs.h" +#include "hw/loader.h" +#include "hw/riscv/boot.h" +#include "hw/boards.h" +#include "elf.h" + +#if defined(TARGET_RISCV32) +# define KERNEL_BOOT_ADDRESS 0x80400000 +#else +# define KERNEL_BOOT_ADDRESS 0x80200000 +#endif + +target_ulong riscv_load_firmware(const char *firmware_filename, + hwaddr firmware_load_addr) +{ + uint64_t firmware_entry, firmware_start, firmware_end; + + if (load_elf(firmware_filename, NULL, NULL, NULL, &firmware_entry, + &firmware_start, &firmware_end, 0, EM_RISCV, 1, 0) > 0) { + return firmware_entry; + } + + if (load_image_targphys_as(firmware_filename, firmware_load_addr, + ram_size, NULL) > 0) { + return firmware_load_addr; + } + + error_report("could not load firmware '%s'", firmware_filename); + exit(1); +} + +target_ulong riscv_load_kernel(const char *kernel_filename) +{ + uint64_t kernel_entry, kernel_high; + + if (load_elf(kernel_filename, NULL, NULL, NULL, + &kernel_entry, NULL, &kernel_high, 0, EM_RISCV, 1, 0) > 0) { + return kernel_entry; + } + + if (load_uimage_as(kernel_filename, &kernel_entry, NULL, NULL, + NULL, NULL, NULL) > 0) { + return kernel_entry; + } + + if (load_image_targphys_as(kernel_filename, KERNEL_BOOT_ADDRESS, + ram_size, NULL) > 0) { + return KERNEL_BOOT_ADDRESS; + } + + error_report("could not load kernel '%s'", kernel_filename); + exit(1); +} + +hwaddr riscv_load_initrd(const char *filename, uint64_t mem_size, + uint64_t kernel_entry, hwaddr *start) +{ + int size; + + /* + * We want to put the initrd far enough into RAM that when the + * kernel is uncompressed it will not clobber the initrd. However + * on boards without much RAM we must ensure that we still leave + * enough room for a decent sized initrd, and on boards with large + * amounts of RAM we must avoid the initrd being so far up in RAM + * that it is outside lowmem and inaccessible to the kernel. + * So for boards with less than 256MB of RAM we put the initrd + * halfway into RAM, and for boards with 256MB of RAM or more we put + * the initrd at 128MB. + */ + *start = kernel_entry + MIN(mem_size / 2, 128 * MiB); + + size = load_ramdisk(filename, *start, mem_size - *start); + if (size == -1) { + size = load_image_targphys(filename, *start, mem_size - *start); + if (size == -1) { + error_report("could not load ramdisk '%s'", filename); + exit(1); + } + } + + return *start + size; +} diff --git a/hw/riscv/sifive_e.c b/hw/riscv/sifive_e.c index 80ac56fa7d..2a499d8ed2 100644 --- a/hw/riscv/sifive_e.c +++ b/hw/riscv/sifive_e.c @@ -44,10 +44,10 @@ #include "hw/riscv/sifive_prci.h" #include "hw/riscv/sifive_uart.h" #include "hw/riscv/sifive_e.h" +#include "hw/riscv/boot.h" #include "chardev/char.h" #include "sysemu/arch_init.h" #include "exec/address-spaces.h" -#include "elf.h" static const struct MemmapEntry { hwaddr base; @@ -74,19 +74,6 @@ static const struct MemmapEntry { [SIFIVE_E_DTIM] = { 0x80000000, 0x4000 } }; -static target_ulong load_kernel(const char *kernel_filename) -{ - uint64_t kernel_entry, kernel_high; - - if (load_elf(kernel_filename, NULL, NULL, NULL, - &kernel_entry, NULL, &kernel_high, - 0, EM_RISCV, 1, 0) < 0) { - error_report("could not load kernel '%s'", kernel_filename); - exit(1); - } - return kernel_entry; -} - static void sifive_mmio_emulate(MemoryRegion *parent, const char *name, uintptr_t offset, uintptr_t length) { @@ -131,12 +118,13 @@ static void riscv_sifive_e_init(MachineState *machine) memmap[SIFIVE_E_MROM].base, &address_space_memory); if (machine->kernel_filename) { - load_kernel(machine->kernel_filename); + riscv_load_kernel(machine->kernel_filename); } } static void riscv_sifive_e_soc_init(Object *obj) { + MachineState *ms = MACHINE(qdev_get_machine()); SiFiveESoCState *s = RISCV_E_SOC(obj); object_initialize_child(obj, "cpus", &s->cpus, @@ -144,7 +132,7 @@ static void riscv_sifive_e_soc_init(Object *obj) &error_abort, NULL); object_property_set_str(OBJECT(&s->cpus), SIFIVE_E_CPU, "cpu-type", &error_abort); - object_property_set_int(OBJECT(&s->cpus), smp_cpus, "num-harts", + object_property_set_int(OBJECT(&s->cpus), ms->smp.cpus, "num-harts", &error_abort); sysbus_init_child_obj(obj, "riscv.sifive.e.gpio0", &s->gpio, sizeof(s->gpio), @@ -153,22 +141,21 @@ static void riscv_sifive_e_soc_init(Object *obj) static void riscv_sifive_e_soc_realize(DeviceState *dev, Error **errp) { + MachineState *ms = MACHINE(qdev_get_machine()); const struct MemmapEntry *memmap = sifive_e_memmap; Error *err = NULL; SiFiveESoCState *s = RISCV_E_SOC(dev); MemoryRegion *sys_mem = get_system_memory(); - MemoryRegion *xip_mem = g_new(MemoryRegion, 1); - MemoryRegion *mask_rom = g_new(MemoryRegion, 1); object_property_set_bool(OBJECT(&s->cpus), true, "realized", &error_abort); /* Mask ROM */ - memory_region_init_rom(mask_rom, NULL, "riscv.sifive.e.mrom", + memory_region_init_rom(&s->mask_rom, NULL, "riscv.sifive.e.mrom", memmap[SIFIVE_E_MROM].size, &error_fatal); memory_region_add_subregion(sys_mem, - memmap[SIFIVE_E_MROM].base, mask_rom); + memmap[SIFIVE_E_MROM].base, &s->mask_rom); /* MMIO */ s->plic = sifive_plic_create(memmap[SIFIVE_E_PLIC].base, @@ -183,7 +170,7 @@ static void riscv_sifive_e_soc_realize(DeviceState *dev, Error **errp) SIFIVE_E_PLIC_CONTEXT_STRIDE, memmap[SIFIVE_E_PLIC].size); sifive_clint_create(memmap[SIFIVE_E_CLINT].base, - memmap[SIFIVE_E_CLINT].size, smp_cpus, + memmap[SIFIVE_E_CLINT].size, ms->smp.cpus, SIFIVE_SIP_BASE, SIFIVE_TIMECMP_BASE, SIFIVE_TIME_BASE); sifive_mmio_emulate(sys_mem, "riscv.sifive.e.aon", memmap[SIFIVE_E_AON].base, memmap[SIFIVE_E_AON].size); @@ -228,10 +215,11 @@ static void riscv_sifive_e_soc_realize(DeviceState *dev, Error **errp) memmap[SIFIVE_E_PWM2].base, memmap[SIFIVE_E_PWM2].size); /* Flash memory */ - memory_region_init_ram(xip_mem, NULL, "riscv.sifive.e.xip", + memory_region_init_ram(&s->xip_mem, NULL, "riscv.sifive.e.xip", memmap[SIFIVE_E_XIP].size, &error_fatal); - memory_region_set_readonly(xip_mem, true); - memory_region_add_subregion(sys_mem, memmap[SIFIVE_E_XIP].base, xip_mem); + memory_region_set_readonly(&s->xip_mem, true); + memory_region_add_subregion(sys_mem, memmap[SIFIVE_E_XIP].base, + &s->xip_mem); } static void riscv_sifive_e_machine_init(MachineClass *mc) diff --git a/hw/riscv/sifive_plic.c b/hw/riscv/sifive_plic.c index 70a4413599..0950e89e15 100644 --- a/hw/riscv/sifive_plic.c +++ b/hw/riscv/sifive_plic.c @@ -24,6 +24,7 @@ #include "qemu/error-report.h" #include "hw/sysbus.h" #include "hw/pci/msi.h" +#include "hw/boards.h" #include "target/riscv/cpu.h" #include "sysemu/sysemu.h" #include "hw/riscv/sifive_plic.h" @@ -439,6 +440,8 @@ static void sifive_plic_irq_request(void *opaque, int irq, int level) static void sifive_plic_realize(DeviceState *dev, Error **errp) { + MachineState *ms = MACHINE(qdev_get_machine()); + unsigned int smp_cpus = ms->smp.cpus; SiFivePLICState *plic = SIFIVE_PLIC(dev); int i; diff --git a/hw/riscv/sifive_prci.c b/hw/riscv/sifive_prci.c index fa136b5a9f..f406682c91 100644 --- a/hw/riscv/sifive_prci.c +++ b/hw/riscv/sifive_prci.c @@ -24,15 +24,18 @@ #include "target/riscv/cpu.h" #include "hw/riscv/sifive_prci.h" -/* currently implements enough to mock freedom-e-sdk BSP clock programming */ - static uint64_t sifive_prci_read(void *opaque, hwaddr addr, unsigned int size) { - if (addr == 0 /* PRCI_HFROSCCFG */) { - return 1 << 31; /* ROSC_RDY */ - } - if (addr == 8 /* PRCI_PLLCFG */) { - return 1 << 31; /* PLL_LOCK */ + SiFivePRCIState *s = opaque; + switch (addr) { + case SIFIVE_PRCI_HFROSCCFG: + return s->hfrosccfg; + case SIFIVE_PRCI_HFXOSCCFG: + return s->hfxosccfg; + case SIFIVE_PRCI_PLLCFG: + return s->pllcfg; + case SIFIVE_PRCI_PLLOUTDIV: + return s->plloutdiv; } hw_error("%s: read: addr=0x%x\n", __func__, (int)addr); return 0; @@ -41,7 +44,30 @@ static uint64_t sifive_prci_read(void *opaque, hwaddr addr, unsigned int size) static void sifive_prci_write(void *opaque, hwaddr addr, uint64_t val64, unsigned int size) { - /* discard writes */ + SiFivePRCIState *s = opaque; + switch (addr) { + case SIFIVE_PRCI_HFROSCCFG: + s->hfrosccfg = (uint32_t) val64; + /* OSC stays ready */ + s->hfrosccfg |= SIFIVE_PRCI_HFROSCCFG_RDY; + break; + case SIFIVE_PRCI_HFXOSCCFG: + s->hfxosccfg = (uint32_t) val64; + /* OSC stays ready */ + s->hfxosccfg |= SIFIVE_PRCI_HFXOSCCFG_RDY; + break; + case SIFIVE_PRCI_PLLCFG: + s->pllcfg = (uint32_t) val64; + /* PLL stays locked */ + s->pllcfg |= SIFIVE_PRCI_PLLCFG_LOCK; + break; + case SIFIVE_PRCI_PLLOUTDIV: + s->plloutdiv = (uint32_t) val64; + break; + default: + hw_error("%s: bad write: addr=0x%x v=0x%x\n", + __func__, (int)addr, (int)val64); + } } static const MemoryRegionOps sifive_prci_ops = { @@ -61,6 +87,13 @@ static void sifive_prci_init(Object *obj) memory_region_init_io(&s->mmio, obj, &sifive_prci_ops, s, TYPE_SIFIVE_PRCI, 0x8000); sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio); + + s->hfrosccfg = (SIFIVE_PRCI_HFROSCCFG_RDY | SIFIVE_PRCI_HFROSCCFG_EN); + s->hfxosccfg = (SIFIVE_PRCI_HFROSCCFG_RDY | SIFIVE_PRCI_HFROSCCFG_EN); + s->pllcfg = (SIFIVE_PRCI_PLLCFG_REFSEL | SIFIVE_PRCI_PLLCFG_BYPASS | + SIFIVE_PRCI_PLLCFG_LOCK); + s->plloutdiv = SIFIVE_PRCI_PLLOUTDIV_DIV1; + } static const TypeInfo sifive_prci_info = { diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c index 5ecc47cea3..ca53a9290d 100644 --- a/hw/riscv/sifive_u.c +++ b/hw/riscv/sifive_u.c @@ -41,11 +41,11 @@ #include "hw/riscv/sifive_uart.h" #include "hw/riscv/sifive_prci.h" #include "hw/riscv/sifive_u.h" +#include "hw/riscv/boot.h" #include "chardev/char.h" #include "sysemu/arch_init.h" #include "sysemu/device_tree.h" #include "exec/address-spaces.h" -#include "elf.h" #include <libfdt.h> @@ -65,19 +65,6 @@ static const struct MemmapEntry { #define GEM_REVISION 0x10070109 -static target_ulong load_kernel(const char *kernel_filename) -{ - uint64_t kernel_entry, kernel_high; - - if (load_elf(kernel_filename, NULL, NULL, NULL, - &kernel_entry, NULL, &kernel_high, - 0, EM_RISCV, 1, 0) < 0) { - error_report("could not load kernel '%s'", kernel_filename); - exit(1); - } - return kernel_entry; -} - static void create_fdt(SiFiveUState *s, const struct MemmapEntry *memmap, uint64_t mem_size, const char *cmdline) { @@ -86,7 +73,7 @@ static void create_fdt(SiFiveUState *s, const struct MemmapEntry *memmap, uint32_t *cells; char *nodename; char ethclk_names[] = "pclk\0hclk\0tx_clk"; - uint32_t plic_phandle, ethclk_phandle; + uint32_t plic_phandle, ethclk_phandle, phandle = 1; fdt = s->fdt = create_device_tree(&s->fdt_size); if (!fdt) { @@ -121,6 +108,7 @@ static void create_fdt(SiFiveUState *s, const struct MemmapEntry *memmap, qemu_fdt_setprop_cell(fdt, "/cpus", "#address-cells", 0x1); for (cpu = s->soc.cpus.num_harts - 1; cpu >= 0; cpu--) { + int cpu_phandle = phandle++; nodename = g_strdup_printf("/cpus/cpu@%d", cpu); char *intc = g_strdup_printf("/cpus/cpu@%d/interrupt-controller", cpu); char *isa = riscv_isa_string(&s->soc.cpus.harts[cpu]); @@ -134,8 +122,8 @@ static void create_fdt(SiFiveUState *s, const struct MemmapEntry *memmap, qemu_fdt_setprop_cell(fdt, nodename, "reg", cpu); qemu_fdt_setprop_string(fdt, nodename, "device_type", "cpu"); qemu_fdt_add_subnode(fdt, intc); - qemu_fdt_setprop_cell(fdt, intc, "phandle", 1); - qemu_fdt_setprop_cell(fdt, intc, "linux,phandle", 1); + qemu_fdt_setprop_cell(fdt, intc, "phandle", cpu_phandle); + qemu_fdt_setprop_cell(fdt, intc, "linux,phandle", cpu_phandle); qemu_fdt_setprop_string(fdt, intc, "compatible", "riscv,cpu-intc"); qemu_fdt_setprop(fdt, intc, "interrupt-controller", NULL, 0); qemu_fdt_setprop_cell(fdt, intc, "#interrupt-cells", 1); @@ -167,6 +155,7 @@ static void create_fdt(SiFiveUState *s, const struct MemmapEntry *memmap, g_free(cells); g_free(nodename); + plic_phandle = phandle++; cells = g_new0(uint32_t, s->soc.cpus.num_harts * 4); for (cpu = 0; cpu < s->soc.cpus.num_harts; cpu++) { nodename = @@ -192,20 +181,21 @@ static void create_fdt(SiFiveUState *s, const struct MemmapEntry *memmap, qemu_fdt_setprop_string(fdt, nodename, "reg-names", "control"); qemu_fdt_setprop_cell(fdt, nodename, "riscv,max-priority", 7); qemu_fdt_setprop_cell(fdt, nodename, "riscv,ndev", 0x35); - qemu_fdt_setprop_cells(fdt, nodename, "phandle", 2); - qemu_fdt_setprop_cells(fdt, nodename, "linux,phandle", 2); + qemu_fdt_setprop_cells(fdt, nodename, "phandle", plic_phandle); + qemu_fdt_setprop_cells(fdt, nodename, "linux,phandle", plic_phandle); plic_phandle = qemu_fdt_get_phandle(fdt, nodename); g_free(cells); g_free(nodename); + ethclk_phandle = phandle++; nodename = g_strdup_printf("/soc/ethclk"); qemu_fdt_add_subnode(fdt, nodename); qemu_fdt_setprop_string(fdt, nodename, "compatible", "fixed-clock"); qemu_fdt_setprop_cell(fdt, nodename, "#clock-cells", 0x0); qemu_fdt_setprop_cell(fdt, nodename, "clock-frequency", SIFIVE_U_GEM_CLOCK_FREQ); - qemu_fdt_setprop_cell(fdt, nodename, "phandle", 3); - qemu_fdt_setprop_cell(fdt, nodename, "linux,phandle", 3); + qemu_fdt_setprop_cell(fdt, nodename, "phandle", ethclk_phandle); + qemu_fdt_setprop_cell(fdt, nodename, "linux,phandle", ethclk_phandle); ethclk_phandle = qemu_fdt_get_phandle(fdt, nodename); g_free(nodename); @@ -279,8 +269,12 @@ static void riscv_sifive_u_init(MachineState *machine) /* create device tree */ create_fdt(s, memmap, machine->ram_size, machine->kernel_cmdline); + if (machine->firmware) { + riscv_load_firmware(machine->firmware, memmap[SIFIVE_U_DRAM].base); + } + if (machine->kernel_filename) { - load_kernel(machine->kernel_filename); + riscv_load_kernel(machine->kernel_filename); } /* reset vector */ @@ -321,13 +315,14 @@ static void riscv_sifive_u_init(MachineState *machine) static void riscv_sifive_u_soc_init(Object *obj) { + MachineState *ms = MACHINE(qdev_get_machine()); SiFiveUSoCState *s = RISCV_U_SOC(obj); object_initialize_child(obj, "cpus", &s->cpus, sizeof(s->cpus), TYPE_RISCV_HART_ARRAY, &error_abort, NULL); object_property_set_str(OBJECT(&s->cpus), SIFIVE_U_CPU, "cpu-type", &error_abort); - object_property_set_int(OBJECT(&s->cpus), smp_cpus, "num-harts", + object_property_set_int(OBJECT(&s->cpus), ms->smp.cpus, "num-harts", &error_abort); sysbus_init_child_obj(obj, "gem", &s->gem, sizeof(s->gem), @@ -336,11 +331,14 @@ static void riscv_sifive_u_soc_init(Object *obj) static void riscv_sifive_u_soc_realize(DeviceState *dev, Error **errp) { + MachineState *ms = MACHINE(qdev_get_machine()); SiFiveUSoCState *s = RISCV_U_SOC(dev); const struct MemmapEntry *memmap = sifive_u_memmap; MemoryRegion *system_memory = get_system_memory(); MemoryRegion *mask_rom = g_new(MemoryRegion, 1); qemu_irq plic_gpios[SIFIVE_U_PLIC_NUM_SOURCES]; + char *plic_hart_config; + size_t plic_hart_config_len; int i; Error *err = NULL; NICInfo *nd = &nd_table[0]; @@ -354,9 +352,22 @@ static void riscv_sifive_u_soc_realize(DeviceState *dev, Error **errp) memory_region_add_subregion(system_memory, memmap[SIFIVE_U_MROM].base, mask_rom); + /* create PLIC hart topology configuration string */ + plic_hart_config_len = (strlen(SIFIVE_U_PLIC_HART_CONFIG) + 1) * + ms->smp.cpus; + plic_hart_config = g_malloc0(plic_hart_config_len); + for (i = 0; i < ms->smp.cpus; i++) { + if (i != 0) { + strncat(plic_hart_config, ",", plic_hart_config_len); + } + strncat(plic_hart_config, SIFIVE_U_PLIC_HART_CONFIG, + plic_hart_config_len); + plic_hart_config_len -= (strlen(SIFIVE_U_PLIC_HART_CONFIG) + 1); + } + /* MMIO */ s->plic = sifive_plic_create(memmap[SIFIVE_U_PLIC].base, - (char *)SIFIVE_U_PLIC_HART_CONFIG, + plic_hart_config, SIFIVE_U_PLIC_NUM_SOURCES, SIFIVE_U_PLIC_NUM_PRIORITIES, SIFIVE_U_PLIC_PRIORITY_BASE, @@ -371,7 +382,7 @@ static void riscv_sifive_u_soc_realize(DeviceState *dev, Error **errp) sifive_uart_create(system_memory, memmap[SIFIVE_U_UART1].base, serial_hd(1), qdev_get_gpio_in(DEVICE(s->plic), SIFIVE_U_UART1_IRQ)); sifive_clint_create(memmap[SIFIVE_U_CLINT].base, - memmap[SIFIVE_U_CLINT].size, smp_cpus, + memmap[SIFIVE_U_CLINT].size, ms->smp.cpus, SIFIVE_SIP_BASE, SIFIVE_TIMECMP_BASE, SIFIVE_TIME_BASE); for (i = 0; i < SIFIVE_U_PLIC_NUM_SOURCES; i++) { diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c index 5b33d4be3b..2991b341a2 100644 --- a/hw/riscv/spike.c +++ b/hw/riscv/spike.c @@ -36,12 +36,12 @@ #include "hw/riscv/riscv_hart.h" #include "hw/riscv/sifive_clint.h" #include "hw/riscv/spike.h" +#include "hw/riscv/boot.h" #include "chardev/char.h" #include "sysemu/arch_init.h" #include "sysemu/device_tree.h" #include "sysemu/qtest.h" #include "exec/address-spaces.h" -#include "elf.h" #include <libfdt.h> @@ -54,19 +54,6 @@ static const struct MemmapEntry { [SPIKE_DRAM] = { 0x80000000, 0x0 }, }; -static target_ulong load_kernel(const char *kernel_filename) -{ - uint64_t kernel_entry, kernel_high; - - if (load_elf_ram_sym(kernel_filename, NULL, NULL, NULL, - &kernel_entry, NULL, &kernel_high, 0, EM_RISCV, 1, 0, - NULL, true, htif_symbol_callback) < 0) { - error_report("could not load kernel '%s'", kernel_filename); - exit(1); - } - return kernel_entry; -} - static void create_fdt(SpikeState *s, const struct MemmapEntry *memmap, uint64_t mem_size, const char *cmdline) { @@ -172,6 +159,7 @@ static void spike_board_init(MachineState *machine) MemoryRegion *main_mem = g_new(MemoryRegion, 1); MemoryRegion *mask_rom = g_new(MemoryRegion, 1); int i; + unsigned int smp_cpus = machine->smp.cpus; /* Initialize SOC */ object_initialize_child(OBJECT(machine), "soc", &s->soc, sizeof(s->soc), @@ -199,7 +187,7 @@ static void spike_board_init(MachineState *machine) mask_rom); if (machine->kernel_filename) { - load_kernel(machine->kernel_filename); + riscv_load_kernel(machine->kernel_filename); } /* reset vector */ @@ -254,6 +242,7 @@ static void spike_v1_10_0_board_init(MachineState *machine) MemoryRegion *main_mem = g_new(MemoryRegion, 1); MemoryRegion *mask_rom = g_new(MemoryRegion, 1); int i; + unsigned int smp_cpus = machine->smp.cpus; if (!qtest_enabled()) { info_report("The Spike v1.10.0 machine has been deprecated. " @@ -287,7 +276,7 @@ static void spike_v1_10_0_board_init(MachineState *machine) mask_rom); if (machine->kernel_filename) { - load_kernel(machine->kernel_filename); + riscv_load_kernel(machine->kernel_filename); } /* reset vector */ @@ -342,6 +331,7 @@ static void spike_v1_09_1_board_init(MachineState *machine) MemoryRegion *main_mem = g_new(MemoryRegion, 1); MemoryRegion *mask_rom = g_new(MemoryRegion, 1); int i; + unsigned int smp_cpus = machine->smp.cpus; if (!qtest_enabled()) { info_report("The Spike v1.09.1 machine has been deprecated. " @@ -372,7 +362,7 @@ static void spike_v1_09_1_board_init(MachineState *machine) mask_rom); if (machine->kernel_filename) { - load_kernel(machine->kernel_filename); + riscv_load_kernel(machine->kernel_filename); } /* reset vector */ diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index 84d94d0c42..ecdc77d728 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -34,13 +34,13 @@ #include "hw/riscv/sifive_clint.h" #include "hw/riscv/sifive_test.h" #include "hw/riscv/virt.h" +#include "hw/riscv/boot.h" #include "chardev/char.h" #include "sysemu/arch_init.h" #include "sysemu/device_tree.h" #include "exec/address-spaces.h" #include "hw/pci/pci.h" #include "hw/pci-host/gpex.h" -#include "elf.h" #include <libfdt.h> @@ -61,47 +61,6 @@ static const struct MemmapEntry { [VIRT_PCIE_ECAM] = { 0x30000000, 0x10000000 }, }; -static target_ulong load_kernel(const char *kernel_filename) -{ - uint64_t kernel_entry, kernel_high; - - if (load_elf(kernel_filename, NULL, NULL, NULL, - &kernel_entry, NULL, &kernel_high, - 0, EM_RISCV, 1, 0) < 0) { - error_report("could not load kernel '%s'", kernel_filename); - exit(1); - } - return kernel_entry; -} - -static hwaddr load_initrd(const char *filename, uint64_t mem_size, - uint64_t kernel_entry, hwaddr *start) -{ - int size; - - /* We want to put the initrd far enough into RAM that when the - * kernel is uncompressed it will not clobber the initrd. However - * on boards without much RAM we must ensure that we still leave - * enough room for a decent sized initrd, and on boards with large - * amounts of RAM we must avoid the initrd being so far up in RAM - * that it is outside lowmem and inaccessible to the kernel. - * So for boards with less than 256MB of RAM we put the initrd - * halfway into RAM, and for boards with 256MB of RAM or more we put - * the initrd at 128MB. - */ - *start = kernel_entry + MIN(mem_size / 2, 128 * MiB); - - size = load_ramdisk(filename, *start, mem_size - *start); - if (size == -1) { - size = load_image_targphys(filename, *start, mem_size - *start); - if (size == -1) { - error_report("could not load ramdisk '%s'", filename); - exit(1); - } - } - return *start + size; -} - static void create_pcie_irq_map(void *fdt, char *nodename, uint32_t plic_phandle) { @@ -191,6 +150,7 @@ static void *create_fdt(RISCVVirtState *s, const struct MemmapEntry *memmap, for (cpu = s->soc.num_harts - 1; cpu >= 0; cpu--) { int cpu_phandle = phandle++; + int intc_phandle; nodename = g_strdup_printf("/cpus/cpu@%d", cpu); char *intc = g_strdup_printf("/cpus/cpu@%d/interrupt-controller", cpu); char *isa = riscv_isa_string(&s->soc.harts[cpu]); @@ -203,9 +163,12 @@ static void *create_fdt(RISCVVirtState *s, const struct MemmapEntry *memmap, qemu_fdt_setprop_string(fdt, nodename, "status", "okay"); qemu_fdt_setprop_cell(fdt, nodename, "reg", cpu); qemu_fdt_setprop_string(fdt, nodename, "device_type", "cpu"); + qemu_fdt_setprop_cell(fdt, nodename, "phandle", cpu_phandle); + qemu_fdt_setprop_cell(fdt, nodename, "linux,phandle", cpu_phandle); + intc_phandle = phandle++; qemu_fdt_add_subnode(fdt, intc); - qemu_fdt_setprop_cell(fdt, intc, "phandle", cpu_phandle); - qemu_fdt_setprop_cell(fdt, intc, "linux,phandle", cpu_phandle); + qemu_fdt_setprop_cell(fdt, intc, "phandle", intc_phandle); + qemu_fdt_setprop_cell(fdt, intc, "linux,phandle", intc_phandle); qemu_fdt_setprop_string(fdt, intc, "compatible", "riscv,cpu-intc"); qemu_fdt_setprop(fdt, intc, "interrupt-controller", NULL, 0); qemu_fdt_setprop_cell(fdt, intc, "#interrupt-cells", 1); @@ -214,6 +177,20 @@ static void *create_fdt(RISCVVirtState *s, const struct MemmapEntry *memmap, g_free(nodename); } + /* Add cpu-topology node */ + qemu_fdt_add_subnode(fdt, "/cpus/cpu-map"); + qemu_fdt_add_subnode(fdt, "/cpus/cpu-map/cluster0"); + for (cpu = s->soc.num_harts - 1; cpu >= 0; cpu--) { + char *core_nodename = g_strdup_printf("/cpus/cpu-map/cluster0/core%d", + cpu); + char *cpu_nodename = g_strdup_printf("/cpus/cpu@%d", cpu); + uint32_t intc_phandle = qemu_fdt_get_phandle(fdt, cpu_nodename); + qemu_fdt_add_subnode(fdt, core_nodename); + qemu_fdt_setprop_cell(fdt, core_nodename, "cpu", intc_phandle); + g_free(core_nodename); + g_free(cpu_nodename); + } + cells = g_new0(uint32_t, s->soc.num_harts * 4); for (cpu = 0; cpu < s->soc.num_harts; cpu++) { nodename = @@ -298,7 +275,7 @@ static void *create_fdt(RISCVVirtState *s, const struct MemmapEntry *memmap, qemu_fdt_setprop_string(fdt, nodename, "device_type", "pci"); qemu_fdt_setprop_cell(fdt, nodename, "linux,pci-domain", 0); qemu_fdt_setprop_cells(fdt, nodename, "bus-range", 0, - memmap[VIRT_PCIE_ECAM].base / + memmap[VIRT_PCIE_ECAM].size / PCIE_MMCFG_SIZE_MIN - 1); qemu_fdt_setprop(fdt, nodename, "dma-coherent", NULL, 0); qemu_fdt_setprop_cells(fdt, nodename, "reg", 0, memmap[VIRT_PCIE_ECAM].base, @@ -394,6 +371,7 @@ static void riscv_virt_board_init(MachineState *machine) char *plic_hart_config; size_t plic_hart_config_len; int i; + unsigned int smp_cpus = machine->smp.cpus; void *fdt; /* Initialize SOC */ @@ -421,14 +399,18 @@ static void riscv_virt_board_init(MachineState *machine) memory_region_add_subregion(system_memory, memmap[VIRT_MROM].base, mask_rom); + if (machine->firmware) { + riscv_load_firmware(machine->firmware, memmap[VIRT_DRAM].base); + } + if (machine->kernel_filename) { - uint64_t kernel_entry = load_kernel(machine->kernel_filename); + uint64_t kernel_entry = riscv_load_kernel(machine->kernel_filename); if (machine->initrd_filename) { hwaddr start; - hwaddr end = load_initrd(machine->initrd_filename, - machine->ram_size, kernel_entry, - &start); + hwaddr end = riscv_load_initrd(machine->initrd_filename, + machine->ram_size, kernel_entry, + &start); qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-start", start); qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-end", diff --git a/hw/s390x/s390-skeys.c b/hw/s390x/s390-skeys.c index daac936698..e5bd92c0c7 100644 --- a/hw/s390x/s390-skeys.c +++ b/hw/s390x/s390-skeys.c @@ -14,7 +14,7 @@ #include "hw/boards.h" #include "hw/s390x/storage-keys.h" #include "qapi/error.h" -#include "qapi/qapi-commands-target.h" +#include "qapi/qapi-commands-misc-target.h" #include "qapi/qmp/qdict.h" #include "qemu/error-report.h" #include "sysemu/kvm.h" diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 87b2039f1b..5b6a9a4e55 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -84,7 +84,7 @@ static void s390_init_cpus(MachineState *machine) /* initialize possible_cpus */ mc->possible_cpu_arch_ids(machine); - for (i = 0; i < smp_cpus; i++) { + for (i = 0; i < machine->smp.cpus; i++) { s390x_new_cpu(machine->cpu_type, i, &error_fatal); } } @@ -339,7 +339,7 @@ static inline void s390_do_cpu_ipl(CPUState *cs, run_on_cpu_data arg) s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu); } -static void s390_machine_reset(void) +static void s390_machine_reset(MachineState *machine) { enum s390_reset reset_type; CPUState *cs, *t; @@ -411,6 +411,7 @@ static CpuInstanceProperties s390_cpu_index_to_props(MachineState *ms, static const CPUArchIdList *s390_possible_cpu_arch_ids(MachineState *ms) { int i; + unsigned int max_cpus = ms->smp.max_cpus; if (ms->possible_cpus) { g_assert(ms->possible_cpus && ms->possible_cpus->len == max_cpus); @@ -440,9 +441,9 @@ static HotplugHandler *s390_get_hotplug_handler(MachineState *machine, return NULL; } -static void s390_hot_add_cpu(const int64_t id, Error **errp) +static void s390_hot_add_cpu(MachineState *machine, + const int64_t id, Error **errp) { - MachineState *machine = MACHINE(qdev_get_machine()); ObjectClass *oc; g_assert(machine->possible_cpus->cpus[0].cpu); diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c index 4510a800cb..fac7c3bb6c 100644 --- a/hw/s390x/sclp.c +++ b/hw/s390x/sclp.c @@ -64,7 +64,7 @@ static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb) prepare_cpu_entries(sclp, read_info->entries, &cpu_count); read_info->entries_cpu = cpu_to_be16(cpu_count); read_info->offset_cpu = cpu_to_be16(offsetof(ReadInfo, entries)); - read_info->highest_cpu = cpu_to_be16(max_cpus - 1); + read_info->highest_cpu = cpu_to_be16(machine->smp.max_cpus - 1); read_info->ibc_val = cpu_to_be32(s390_get_ibc_val()); diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c index 196136a307..fdc3a0e4e0 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c @@ -207,8 +207,8 @@ static void scsi_qdev_realize(DeviceState *qdev, Error **errp) error_propagate(errp, local_err); return; } - dev->vmsentry = qemu_add_vm_change_state_handler(scsi_dma_restart_cb, - dev); + dev->vmsentry = qdev_add_vm_change_state_handler(DEVICE(dev), + scsi_dma_restart_cb, dev); } static void scsi_qdev_unrealize(DeviceState *qdev, Error **errp) diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c index a36448fc8d..7bcd67b098 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -27,6 +27,7 @@ #include "sysemu/cpus.h" #include "hw/firmware/smbios.h" #include "hw/loader.h" +#include "hw/boards.h" #include "exec/cpu-common.h" #include "smbios_build.h" @@ -341,9 +342,10 @@ static void smbios_register_config(void) opts_init(smbios_register_config); -static void smbios_validate_table(void) +static void smbios_validate_table(MachineState *ms) { - uint32_t expect_t4_count = smbios_legacy ? smp_cpus : smbios_smp_sockets; + uint32_t expect_t4_count = smbios_legacy ? + ms->smp.cpus : smbios_smp_sockets; if (smbios_type4_count && smbios_type4_count != expect_t4_count) { error_report("Expected %d SMBIOS Type 4 tables, got %d instead", @@ -428,7 +430,7 @@ static void smbios_build_type_1_fields(void) } } -uint8_t *smbios_get_table_legacy(size_t *length) +uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length) { if (!smbios_legacy) { *length = 0; @@ -438,7 +440,7 @@ uint8_t *smbios_get_table_legacy(size_t *length) if (!smbios_immutable) { smbios_build_type_0_fields(); smbios_build_type_1_fields(); - smbios_validate_table(); + smbios_validate_table(ms); smbios_immutable = true; } *length = smbios_entries_len; @@ -570,7 +572,7 @@ static void smbios_build_type_3_table(void) SMBIOS_BUILD_TABLE_POST; } -static void smbios_build_type_4_table(unsigned instance) +static void smbios_build_type_4_table(MachineState *ms, unsigned instance) { char sock_str[128]; @@ -597,8 +599,8 @@ static void smbios_build_type_4_table(unsigned instance) SMBIOS_TABLE_SET_STR(4, serial_number_str, type4.serial); SMBIOS_TABLE_SET_STR(4, asset_tag_number_str, type4.asset); SMBIOS_TABLE_SET_STR(4, part_number_str, type4.part); - t->core_count = t->core_enabled = smp_cores; - t->thread_count = smp_threads; + t->core_count = t->core_enabled = ms->smp.cores; + t->thread_count = ms->smp.threads; t->processor_characteristics = cpu_to_le16(0x02); /* Unknown */ t->processor_family2 = cpu_to_le16(0x01); /* Other */ @@ -839,7 +841,8 @@ static void smbios_entry_point_setup(void) } } -void smbios_get_tables(const struct smbios_phys_mem_area *mem_array, +void smbios_get_tables(MachineState *ms, + const struct smbios_phys_mem_area *mem_array, const unsigned int mem_array_size, uint8_t **tables, size_t *tables_len, uint8_t **anchor, size_t *anchor_len) @@ -858,11 +861,12 @@ void smbios_get_tables(const struct smbios_phys_mem_area *mem_array, smbios_build_type_2_table(); smbios_build_type_3_table(); - smbios_smp_sockets = DIV_ROUND_UP(smp_cpus, smp_cores * smp_threads); + smbios_smp_sockets = DIV_ROUND_UP(ms->smp.cpus, + ms->smp.cores * ms->smp.threads); assert(smbios_smp_sockets >= 1); for (i = 0; i < smbios_smp_sockets; i++) { - smbios_build_type_4_table(i); + smbios_build_type_4_table(ms, i); } smbios_build_type_11_table(); @@ -888,7 +892,7 @@ void smbios_get_tables(const struct smbios_phys_mem_area *mem_array, smbios_build_type_38_table(); smbios_build_type_127_table(); - smbios_validate_table(); + smbios_validate_table(ms); smbios_entry_point_setup(); smbios_immutable = true; } diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c index 7e4f61fc3e..b2342f2a89 100644 --- a/hw/sparc/sun4m.c +++ b/hw/sparc/sun4m.c @@ -871,6 +871,8 @@ static void sun4m_hw_init(const struct sun4m_hwdef *hwdef, FWCfgState *fw_cfg; DeviceState *dev; SysBusDevice *s; + unsigned int smp_cpus = machine->smp.cpus; + unsigned int max_cpus = machine->smp.max_cpus; /* init CPUs */ for(i = 0; i < smp_cpus; i++) { @@ -1406,6 +1408,7 @@ static void ss5_class_init(ObjectClass *oc, void *data) mc->is_default = 1; mc->default_boot_order = "c"; mc->default_cpu_type = SPARC_CPU_TYPE_NAME("Fujitsu-MB86904"); + mc->default_display = "tcx"; } static const TypeInfo ss5_type = { @@ -1424,6 +1427,7 @@ static void ss10_class_init(ObjectClass *oc, void *data) mc->max_cpus = 4; mc->default_boot_order = "c"; mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-SuperSparc-II"); + mc->default_display = "tcx"; } static const TypeInfo ss10_type = { @@ -1442,6 +1446,7 @@ static void ss600mp_class_init(ObjectClass *oc, void *data) mc->max_cpus = 4; mc->default_boot_order = "c"; mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-SuperSparc-II"); + mc->default_display = "tcx"; } static const TypeInfo ss600mp_type = { @@ -1460,6 +1465,7 @@ static void ss20_class_init(ObjectClass *oc, void *data) mc->max_cpus = 4; mc->default_boot_order = "c"; mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-SuperSparc-II"); + mc->default_display = "tcx"; } static const TypeInfo ss20_type = { @@ -1477,6 +1483,7 @@ static void voyager_class_init(ObjectClass *oc, void *data) mc->block_default_type = IF_SCSI; mc->default_boot_order = "c"; mc->default_cpu_type = SPARC_CPU_TYPE_NAME("Fujitsu-MB86904"); + mc->default_display = "tcx"; } static const TypeInfo voyager_type = { @@ -1494,6 +1501,7 @@ static void ss_lx_class_init(ObjectClass *oc, void *data) mc->block_default_type = IF_SCSI; mc->default_boot_order = "c"; mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-MicroSparc-I"); + mc->default_display = "tcx"; } static const TypeInfo ss_lx_type = { @@ -1511,6 +1519,7 @@ static void ss4_class_init(ObjectClass *oc, void *data) mc->block_default_type = IF_SCSI; mc->default_boot_order = "c"; mc->default_cpu_type = SPARC_CPU_TYPE_NAME("Fujitsu-MB86904"); + mc->default_display = "tcx"; } static const TypeInfo ss4_type = { @@ -1528,6 +1537,7 @@ static void scls_class_init(ObjectClass *oc, void *data) mc->block_default_type = IF_SCSI; mc->default_boot_order = "c"; mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-MicroSparc-I"); + mc->default_display = "tcx"; } static const TypeInfo scls_type = { @@ -1545,6 +1555,7 @@ static void sbook_class_init(ObjectClass *oc, void *data) mc->block_default_type = IF_SCSI; mc->default_boot_order = "c"; mc->default_cpu_type = SPARC_CPU_TYPE_NAME("TI-MicroSparc-I"); + mc->default_display = "tcx"; } static const TypeInfo sbook_type = { diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c index 4230b17b87..5d87be811d 100644 --- a/hw/sparc64/sun4u.c +++ b/hw/sparc64/sun4u.c @@ -697,8 +697,8 @@ static void sun4uv_init(MemoryRegion *address_space_mem, &FW_CFG_IO(dev)->comb_iomem); fw_cfg = FW_CFG(dev); - fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)smp_cpus); - fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus); + fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)machine->smp.cpus); + fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)machine->smp.max_cpus); fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size); fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, hwdef->machine_id); fw_cfg_add_i64(fw_cfg, FW_CFG_KERNEL_ADDR, kernel_entry); diff --git a/hw/timer/armv7m_systick.c b/hw/timer/armv7m_systick.c index a17317ce2f..94640743b5 100644 --- a/hw/timer/armv7m_systick.c +++ b/hw/timer/armv7m_systick.c @@ -75,11 +75,17 @@ static void systick_timer_tick(void *opaque) } } -static uint64_t systick_read(void *opaque, hwaddr addr, unsigned size) +static MemTxResult systick_read(void *opaque, hwaddr addr, uint64_t *data, + unsigned size, MemTxAttrs attrs) { SysTickState *s = opaque; uint32_t val; + if (attrs.user) { + /* Generate BusFault for unprivileged accesses */ + return MEMTX_ERROR; + } + switch (addr) { case 0x0: /* SysTick Control and Status. */ val = s->control; @@ -121,14 +127,21 @@ static uint64_t systick_read(void *opaque, hwaddr addr, unsigned size) } trace_systick_read(addr, val, size); - return val; + *data = val; + return MEMTX_OK; } -static void systick_write(void *opaque, hwaddr addr, - uint64_t value, unsigned size) +static MemTxResult systick_write(void *opaque, hwaddr addr, + uint64_t value, unsigned size, + MemTxAttrs attrs) { SysTickState *s = opaque; + if (attrs.user) { + /* Generate BusFault for unprivileged accesses */ + return MEMTX_ERROR; + } + trace_systick_write(addr, value, size); switch (addr) { @@ -172,11 +185,12 @@ static void systick_write(void *opaque, hwaddr addr, qemu_log_mask(LOG_GUEST_ERROR, "SysTick: Bad write offset 0x%" HWADDR_PRIx "\n", addr); } + return MEMTX_OK; } static const MemoryRegionOps systick_ops = { - .read = systick_read, - .write = systick_write, + .read_with_attrs = systick_read, + .write_with_attrs = systick_write, .endianness = DEVICE_NATIVE_ENDIAN, .valid.min_access_size = 4, .valid.max_access_size = 4, diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c index 0d79e000d2..ce4550b6f2 100644 --- a/hw/timer/mc146818rtc.c +++ b/hw/timer/mc146818rtc.c @@ -33,8 +33,8 @@ #include "sysemu/replay.h" #include "hw/timer/mc146818rtc.h" #include "qapi/error.h" -#include "qapi/qapi-commands-target.h" -#include "qapi/qapi-events-target.h" +#include "qapi/qapi-commands-misc-target.h" +#include "qapi/qapi-events-misc-target.h" #include "qapi/visitor.h" #include "exec/address-spaces.h" diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index ce3fe96efe..d7a4e1875c 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -551,9 +551,12 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) */ if (vector->virq >= 0) { int32_t fd = event_notifier_get_fd(&vector->interrupt); + Error *err = NULL; - vfio_set_irq_signaling(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX, nr, - VFIO_IRQ_SET_ACTION_TRIGGER, fd, NULL); + if (vfio_set_irq_signaling(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX, nr, + VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) { + error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); + } } } diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index e0452de4ba..3724ff8bac 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -29,3 +29,13 @@ config VIRTIO_CRYPTO bool default y depends on VIRTIO + +config VIRTIO_PMEM_SUPPORTED + bool + +config VIRTIO_PMEM + bool + default y + depends on VIRTIO + depends on VIRTIO_PMEM_SUPPORTED + select MEM_DEVICE diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs index 5570ea8df8..964ce78607 100644 --- a/hw/virtio/Makefile.objs +++ b/hw/virtio/Makefile.objs @@ -12,6 +12,8 @@ common-obj-$(CONFIG_VIRTIO_MMIO) += virtio-mmio.o obj-$(CONFIG_VIRTIO_BALLOON) += virtio-balloon.o obj-$(CONFIG_VIRTIO_CRYPTO) += virtio-crypto.o obj-$(call land,$(CONFIG_VIRTIO_CRYPTO),$(CONFIG_VIRTIO_PCI)) += virtio-crypto-pci.o +obj-$(CONFIG_VIRTIO_PMEM) += virtio-pmem.o +common-obj-$(call land,$(CONFIG_VIRTIO_PMEM),$(CONFIG_VIRTIO_PCI)) += virtio-pmem-pci.o obj-$(CONFIG_VHOST_VSOCK) += vhost-vsock.o ifeq ($(CONFIG_VIRTIO_PCI),y) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index e6d5467e54..ce928f2429 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1913,13 +1913,6 @@ static void virtio_pci_generic_class_init(ObjectClass *klass, void *data) dc->props = virtio_pci_generic_properties; } -/* Used when the generic type and the base type is the same */ -static void virtio_pci_generic_base_class_init(ObjectClass *klass, void *data) -{ - virtio_pci_base_class_init(klass, data); - virtio_pci_generic_class_init(klass, NULL); -} - static void virtio_pci_transitional_instance_init(Object *obj) { VirtIOPCIProxy *proxy = VIRTIO_PCI(obj); @@ -1938,15 +1931,15 @@ static void virtio_pci_non_transitional_instance_init(Object *obj) void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t) { + char *base_name = NULL; TypeInfo base_type_info = { .name = t->base_name, .parent = t->parent ? t->parent : TYPE_VIRTIO_PCI, .instance_size = t->instance_size, .instance_init = t->instance_init, .class_size = t->class_size, - .class_init = virtio_pci_base_class_init, - .class_data = (void *)t, .abstract = true, + .interfaces = t->interfaces, }; TypeInfo generic_type_info = { .name = t->generic_name, @@ -1961,13 +1954,20 @@ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t) if (!base_type_info.name) { /* No base type -> register a single generic device type */ - base_type_info.name = t->generic_name; - base_type_info.class_init = virtio_pci_generic_base_class_init; - base_type_info.interfaces = generic_type_info.interfaces; - base_type_info.abstract = false; - generic_type_info.name = NULL; + /* use intermediate %s-base-type to add generic device props */ + base_name = g_strdup_printf("%s-base-type", t->generic_name); + base_type_info.name = base_name; + base_type_info.class_init = virtio_pci_generic_class_init; + + generic_type_info.parent = base_name; + generic_type_info.class_init = virtio_pci_base_class_init; + generic_type_info.class_data = (void *)t; + assert(!t->non_transitional_name); assert(!t->transitional_name); + } else { + base_type_info.class_init = virtio_pci_base_class_init; + base_type_info.class_data = (void *)t; } type_register(&base_type_info); @@ -2005,6 +2005,7 @@ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t) }; type_register(&transitional_type_info); } + g_free(base_name); } /* virtio-pci-bus */ diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h index bfea2892a5..619d9098c1 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h @@ -252,6 +252,7 @@ typedef struct VirtioPCIDeviceTypeInfo { size_t class_size; void (*instance_init)(Object *obj); void (*class_init)(ObjectClass *klass, void *data); + InterfaceInfo *interfaces; } VirtioPCIDeviceTypeInfo; /* Register virtio-pci type(s). @t must be static. */ diff --git a/hw/virtio/virtio-pmem-pci.c b/hw/virtio/virtio-pmem-pci.c new file mode 100644 index 0000000000..8b2d0dbccc --- /dev/null +++ b/hw/virtio/virtio-pmem-pci.c @@ -0,0 +1,131 @@ +/* + * Virtio PMEM PCI device + * + * Copyright (C) 2018-2019 Red Hat, Inc. + * + * Authors: + * Pankaj Gupta <pagupta@redhat.com> + * David Hildenbrand <david@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" + +#include "virtio-pmem-pci.h" +#include "hw/mem/memory-device.h" +#include "qapi/error.h" + +static void virtio_pmem_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ + VirtIOPMEMPCI *pmem_pci = VIRTIO_PMEM_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&pmem_pci->vdev); + + qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); + object_property_set_bool(OBJECT(vdev), true, "realized", errp); +} + +static void virtio_pmem_pci_set_addr(MemoryDeviceState *md, uint64_t addr, + Error **errp) +{ + object_property_set_uint(OBJECT(md), addr, VIRTIO_PMEM_ADDR_PROP, errp); +} + +static uint64_t virtio_pmem_pci_get_addr(const MemoryDeviceState *md) +{ + return object_property_get_uint(OBJECT(md), VIRTIO_PMEM_ADDR_PROP, + &error_abort); +} + +static MemoryRegion *virtio_pmem_pci_get_memory_region(MemoryDeviceState *md, + Error **errp) +{ + VirtIOPMEMPCI *pci_pmem = VIRTIO_PMEM_PCI(md); + VirtIOPMEM *pmem = VIRTIO_PMEM(&pci_pmem->vdev); + VirtIOPMEMClass *vpc = VIRTIO_PMEM_GET_CLASS(pmem); + + return vpc->get_memory_region(pmem, errp); +} + +static uint64_t virtio_pmem_pci_get_plugged_size(const MemoryDeviceState *md, + Error **errp) +{ + VirtIOPMEMPCI *pci_pmem = VIRTIO_PMEM_PCI(md); + VirtIOPMEM *pmem = VIRTIO_PMEM(&pci_pmem->vdev); + VirtIOPMEMClass *vpc = VIRTIO_PMEM_GET_CLASS(pmem); + MemoryRegion *mr = vpc->get_memory_region(pmem, errp); + + /* the plugged size corresponds to the region size */ + return mr ? 0 : memory_region_size(mr); +} + +static void virtio_pmem_pci_fill_device_info(const MemoryDeviceState *md, + MemoryDeviceInfo *info) +{ + VirtioPMEMDeviceInfo *vi = g_new0(VirtioPMEMDeviceInfo, 1); + VirtIOPMEMPCI *pci_pmem = VIRTIO_PMEM_PCI(md); + VirtIOPMEM *pmem = VIRTIO_PMEM(&pci_pmem->vdev); + VirtIOPMEMClass *vpc = VIRTIO_PMEM_GET_CLASS(pmem); + DeviceState *dev = DEVICE(md); + + if (dev->id) { + vi->has_id = true; + vi->id = g_strdup(dev->id); + } + + /* let the real device handle everything else */ + vpc->fill_device_info(pmem, vi); + + info->u.virtio_pmem.data = vi; + info->type = MEMORY_DEVICE_INFO_KIND_VIRTIO_PMEM; +} + +static void virtio_pmem_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); + MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(klass); + + k->realize = virtio_pmem_pci_realize; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; + pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_PMEM; + pcidev_k->revision = VIRTIO_PCI_ABI_VERSION; + pcidev_k->class_id = PCI_CLASS_OTHERS; + + mdc->get_addr = virtio_pmem_pci_get_addr; + mdc->set_addr = virtio_pmem_pci_set_addr; + mdc->get_plugged_size = virtio_pmem_pci_get_plugged_size; + mdc->get_memory_region = virtio_pmem_pci_get_memory_region; + mdc->fill_device_info = virtio_pmem_pci_fill_device_info; +} + +static void virtio_pmem_pci_instance_init(Object *obj) +{ + VirtIOPMEMPCI *dev = VIRTIO_PMEM_PCI(obj); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VIRTIO_PMEM); +} + +static const VirtioPCIDeviceTypeInfo virtio_pmem_pci_info = { + .base_name = TYPE_VIRTIO_PMEM_PCI, + .generic_name = "virtio-pmem-pci", + .transitional_name = "virtio-pmem-pci-transitional", + .non_transitional_name = "virtio-pmem-pci-non-transitional", + .instance_size = sizeof(VirtIOPMEMPCI), + .instance_init = virtio_pmem_pci_instance_init, + .class_init = virtio_pmem_pci_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_MEMORY_DEVICE }, + { } + }, +}; + +static void virtio_pmem_pci_register_types(void) +{ + virtio_pci_types_register(&virtio_pmem_pci_info); +} +type_init(virtio_pmem_pci_register_types) diff --git a/hw/virtio/virtio-pmem-pci.h b/hw/virtio/virtio-pmem-pci.h new file mode 100644 index 0000000000..616abef093 --- /dev/null +++ b/hw/virtio/virtio-pmem-pci.h @@ -0,0 +1,34 @@ +/* + * Virtio PMEM PCI device + * + * Copyright (C) 2018-2019 Red Hat, Inc. + * + * Authors: + * Pankaj Gupta <pagupta@redhat.com> + * David Hildenbrand <david@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_VIRTIO_PMEM_PCI_H +#define QEMU_VIRTIO_PMEM_PCI_H + +#include "hw/virtio/virtio-pci.h" +#include "hw/virtio/virtio-pmem.h" + +typedef struct VirtIOPMEMPCI VirtIOPMEMPCI; + +/* + * virtio-pmem-pci: This extends VirtioPCIProxy. + */ +#define TYPE_VIRTIO_PMEM_PCI "virtio-pmem-pci-base" +#define VIRTIO_PMEM_PCI(obj) \ + OBJECT_CHECK(VirtIOPMEMPCI, (obj), TYPE_VIRTIO_PMEM_PCI) + +struct VirtIOPMEMPCI { + VirtIOPCIProxy parent_obj; + VirtIOPMEM vdev; +}; + +#endif /* QEMU_VIRTIO_PMEM_PCI_H */ diff --git a/hw/virtio/virtio-pmem.c b/hw/virtio/virtio-pmem.c new file mode 100644 index 0000000000..adbfb603ab --- /dev/null +++ b/hw/virtio/virtio-pmem.c @@ -0,0 +1,189 @@ +/* + * Virtio PMEM device + * + * Copyright (C) 2018-2019 Red Hat, Inc. + * + * Authors: + * Pankaj Gupta <pagupta@redhat.com> + * David Hildenbrand <david@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu-common.h" +#include "qemu/error-report.h" +#include "hw/virtio/virtio-pmem.h" +#include "hw/virtio/virtio-access.h" +#include "standard-headers/linux/virtio_ids.h" +#include "standard-headers/linux/virtio_pmem.h" +#include "block/aio.h" +#include "block/thread-pool.h" + +typedef struct VirtIODeviceRequest { + VirtQueueElement elem; + int fd; + VirtIOPMEM *pmem; + VirtIODevice *vdev; + struct virtio_pmem_req req; + struct virtio_pmem_resp resp; +} VirtIODeviceRequest; + +static int worker_cb(void *opaque) +{ + VirtIODeviceRequest *req_data = opaque; + int err = 0; + + /* flush raw backing image */ + err = fsync(req_data->fd); + if (err != 0) { + err = 1; + } + + virtio_stw_p(req_data->vdev, &req_data->resp.ret, err); + + return 0; +} + +static void done_cb(void *opaque, int ret) +{ + VirtIODeviceRequest *req_data = opaque; + int len = iov_from_buf(req_data->elem.in_sg, req_data->elem.in_num, 0, + &req_data->resp, sizeof(struct virtio_pmem_resp)); + + /* Callbacks are serialized, so no need to use atomic ops. */ + virtqueue_push(req_data->pmem->rq_vq, &req_data->elem, len); + virtio_notify((VirtIODevice *)req_data->pmem, req_data->pmem->rq_vq); + g_free(req_data); +} + +static void virtio_pmem_flush(VirtIODevice *vdev, VirtQueue *vq) +{ + VirtIODeviceRequest *req_data; + VirtIOPMEM *pmem = VIRTIO_PMEM(vdev); + HostMemoryBackend *backend = MEMORY_BACKEND(pmem->memdev); + ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context()); + + req_data = virtqueue_pop(vq, sizeof(VirtIODeviceRequest)); + if (!req_data) { + virtio_error(vdev, "virtio-pmem missing request data"); + return; + } + + if (req_data->elem.out_num < 1 || req_data->elem.in_num < 1) { + virtio_error(vdev, "virtio-pmem request not proper"); + g_free(req_data); + return; + } + req_data->fd = memory_region_get_fd(&backend->mr); + req_data->pmem = pmem; + req_data->vdev = vdev; + thread_pool_submit_aio(pool, worker_cb, req_data, done_cb, req_data); +} + +static void virtio_pmem_get_config(VirtIODevice *vdev, uint8_t *config) +{ + VirtIOPMEM *pmem = VIRTIO_PMEM(vdev); + struct virtio_pmem_config *pmemcfg = (struct virtio_pmem_config *) config; + + virtio_stq_p(vdev, &pmemcfg->start, pmem->start); + virtio_stq_p(vdev, &pmemcfg->size, memory_region_size(&pmem->memdev->mr)); +} + +static uint64_t virtio_pmem_get_features(VirtIODevice *vdev, uint64_t features, + Error **errp) +{ + return features; +} + +static void virtio_pmem_realize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtIOPMEM *pmem = VIRTIO_PMEM(dev); + + if (!pmem->memdev) { + error_setg(errp, "virtio-pmem memdev not set"); + return; + } + + if (host_memory_backend_is_mapped(pmem->memdev)) { + char *path = object_get_canonical_path_component(OBJECT(pmem->memdev)); + error_setg(errp, "can't use already busy memdev: %s", path); + g_free(path); + return; + } + + host_memory_backend_set_mapped(pmem->memdev, true); + virtio_init(vdev, TYPE_VIRTIO_PMEM, VIRTIO_ID_PMEM, + sizeof(struct virtio_pmem_config)); + pmem->rq_vq = virtio_add_queue(vdev, 128, virtio_pmem_flush); +} + +static void virtio_pmem_unrealize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtIOPMEM *pmem = VIRTIO_PMEM(dev); + + host_memory_backend_set_mapped(pmem->memdev, false); + virtio_cleanup(vdev); +} + +static void virtio_pmem_fill_device_info(const VirtIOPMEM *pmem, + VirtioPMEMDeviceInfo *vi) +{ + vi->memaddr = pmem->start; + vi->size = pmem->memdev ? memory_region_size(&pmem->memdev->mr) : 0; + vi->memdev = object_get_canonical_path(OBJECT(pmem->memdev)); +} + +static MemoryRegion *virtio_pmem_get_memory_region(VirtIOPMEM *pmem, + Error **errp) +{ + if (!pmem->memdev) { + error_setg(errp, "'%s' property must be set", VIRTIO_PMEM_MEMDEV_PROP); + return NULL; + } + + return &pmem->memdev->mr; +} + +static Property virtio_pmem_properties[] = { + DEFINE_PROP_UINT64(VIRTIO_PMEM_ADDR_PROP, VirtIOPMEM, start, 0), + DEFINE_PROP_LINK(VIRTIO_PMEM_MEMDEV_PROP, VirtIOPMEM, memdev, + TYPE_MEMORY_BACKEND, HostMemoryBackend *), + DEFINE_PROP_END_OF_LIST(), +}; + +static void virtio_pmem_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + VirtIOPMEMClass *vpc = VIRTIO_PMEM_CLASS(klass); + + dc->props = virtio_pmem_properties; + + vdc->realize = virtio_pmem_realize; + vdc->unrealize = virtio_pmem_unrealize; + vdc->get_config = virtio_pmem_get_config; + vdc->get_features = virtio_pmem_get_features; + + vpc->fill_device_info = virtio_pmem_fill_device_info; + vpc->get_memory_region = virtio_pmem_get_memory_region; +} + +static TypeInfo virtio_pmem_info = { + .name = TYPE_VIRTIO_PMEM, + .parent = TYPE_VIRTIO_DEVICE, + .class_size = sizeof(VirtIOPMEMClass), + .class_init = virtio_pmem_class_init, + .instance_size = sizeof(VirtIOPMEM), +}; + +static void virtio_register_types(void) +{ + type_register_static(&virtio_pmem_info); +} + +type_init(virtio_register_types) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index e1e90fcfd6..a94ea18a9c 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -1162,9 +1162,10 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) } } } - vdev->started = val & VIRTIO_CONFIG_S_DRIVER_OK; - if (unlikely(vdev->start_on_kick && vdev->started)) { - vdev->start_on_kick = false; + + if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) != + (val & VIRTIO_CONFIG_S_DRIVER_OK)) { + virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK); } if (k->set_status) { @@ -1214,8 +1215,7 @@ void virtio_reset(void *opaque) k->reset(vdev); } - vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && - !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); + vdev->start_on_kick = false; vdev->started = false; vdev->broken = false; vdev->guest_features = 0; @@ -1536,8 +1536,7 @@ static bool virtio_queue_notify_aio_vq(VirtQueue *vq) ret = vq->handle_aio_output(vdev, vq); if (unlikely(vdev->start_on_kick)) { - vdev->started = true; - vdev->start_on_kick = false; + virtio_set_started(vdev, true); } } @@ -1557,8 +1556,7 @@ static void virtio_queue_notify_vq(VirtQueue *vq) vq->handle_output(vdev, vq); if (unlikely(vdev->start_on_kick)) { - vdev->started = true; - vdev->start_on_kick = false; + virtio_set_started(vdev, true); } } } @@ -1576,11 +1574,10 @@ void virtio_queue_notify(VirtIODevice *vdev, int n) event_notifier_set(&vq->host_notifier); } else if (vq->handle_output) { vq->handle_output(vdev, vq); - } - if (unlikely(vdev->start_on_kick)) { - vdev->started = true; - vdev->start_on_kick = false; + if (unlikely(vdev->start_on_kick)) { + virtio_set_started(vdev, true); + } } } @@ -2069,14 +2066,21 @@ int virtio_set_features(VirtIODevice *vdev, uint64_t val) return -EINVAL; } ret = virtio_set_features_nocheck(vdev, val); - if (!ret && virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { - /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches. */ - int i; - for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { - if (vdev->vq[i].vring.num != 0) { - virtio_init_region_cache(vdev, i); + if (!ret) { + if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { + /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches. */ + int i; + for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { + if (vdev->vq[i].vring.num != 0) { + virtio_init_region_cache(vdev, i); + } } } + + if (!virtio_device_started(vdev, vdev->status) && + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { + vdev->start_on_kick = true; + } } return ret; } @@ -2228,6 +2232,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) } } + if (!virtio_device_started(vdev, vdev->status) && + !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { + vdev->start_on_kick = true; + } + rcu_read_lock(); for (i = 0; i < num; i++) { if (vdev->vq[i].vring.desc) { @@ -2291,7 +2300,7 @@ static void virtio_vmstate_change(void *opaque, int running, RunState state) VirtIODevice *vdev = opaque; BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); - bool backend_run = running && vdev->started; + bool backend_run = running && virtio_device_started(vdev, vdev->status); vdev->vm_running = running; if (backend_run) { @@ -2330,8 +2339,7 @@ void virtio_init(VirtIODevice *vdev, const char *name, g_malloc0(sizeof(*vdev->vector_queues) * nvectors); } - vdev->start_on_kick = (virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1) && - !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); + vdev->start_on_kick = false; vdev->started = false; vdev->device_id = device_id; vdev->status = 0; @@ -2354,8 +2362,8 @@ void virtio_init(VirtIODevice *vdev, const char *name, } else { vdev->config = NULL; } - vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change, - vdev); + vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev), + virtio_vmstate_change, vdev); vdev->device_endian = virtio_default_endian(); vdev->use_guest_notifier_mask = true; } @@ -2669,6 +2677,7 @@ static void virtio_device_instance_finalize(Object *obj) static Property virtio_properties[] = { DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features), + DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/xtensa/sim.c b/hw/xtensa/sim.c index b6922c39d5..09165b6f4d 100644 --- a/hw/xtensa/sim.c +++ b/hw/xtensa/sim.c @@ -59,7 +59,7 @@ static void xtensa_sim_init(MachineState *machine) const char *kernel_filename = machine->kernel_filename; int n; - for (n = 0; n < smp_cpus; n++) { + for (n = 0; n < machine->smp.cpus; n++) { cpu = XTENSA_CPU(cpu_create(machine->cpu_type)); env = &cpu->env; diff --git a/hw/xtensa/xtfpga.c b/hw/xtensa/xtfpga.c index e05ef75a75..f7f3e11e93 100644 --- a/hw/xtensa/xtfpga.c +++ b/hw/xtensa/xtfpga.c @@ -238,6 +238,7 @@ static void xtfpga_init(const XtfpgaBoardDesc *board, MachineState *machine) const unsigned system_io_size = 224 * MiB; uint32_t freq = 10000000; int n; + unsigned int smp_cpus = machine->smp.cpus; if (smp_cpus > 1) { mx_pic = xtensa_mx_pic_init(31); |