diff options
130 files changed, 4316 insertions, 778 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 1b2c5b9e76..6769c19d03 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -886,7 +886,7 @@ S: Odd Fixes F: hw/arm/raspi.c F: hw/arm/raspi_platform.h F: hw/*/bcm283* -F: include/hw/arm/raspi* +F: include/hw/arm/rasp* F: include/hw/*/bcm283* F: docs/system/arm/raspi.rst @@ -1174,7 +1174,7 @@ F: hw/*/etraxfs_*.c HP-PARISC Machines ------------------ -HP B160L +HP B160L, HP C3700 M: Richard Henderson <richard.henderson@linaro.org> R: Helge Deller <deller@gmx.de> S: Odd Fixes @@ -1183,12 +1183,15 @@ F: hw/hppa/ F: hw/input/lasips2.c F: hw/net/*i82596* F: hw/misc/lasi.c +F: hw/pci-host/astro.c F: hw/pci-host/dino.c F: include/hw/input/lasips2.h F: include/hw/misc/lasi.h F: include/hw/net/lasi_82596.h +F: include/hw/pci-host/astro.h F: include/hw/pci-host/dino.h F: pc-bios/hppa-firmware.img +F: roms/seabios-hppa/ LoongArch Machines ------------------ @@ -1711,6 +1714,16 @@ F: hw/s390x/event-facility.c F: hw/s390x/sclp*.c L: qemu-s390x@nongnu.org +S390 CPU topology +M: Nina Schoetterl-Glausch <nsg@linux.ibm.com> +S: Supported +F: include/hw/s390x/cpu-topology.h +F: hw/s390x/cpu-topology.c +F: target/s390x/kvm/stsi-topology.c +F: docs/devel/s390-cpu-topology.rst +F: docs/system/s390x/cpu-topology.rst +F: tests/avocado/s390_topology.py + X86 Machines ------------ PC @@ -1794,6 +1807,7 @@ F: hw/core/null-machine.c F: hw/core/numa.c F: hw/cpu/cluster.c F: qapi/machine.json +F: qapi/machine-common.json F: qapi/machine-target.json F: include/hw/boards.h F: include/hw/core/cpu.h diff --git a/backends/tpm/tpm_emulator.c b/backends/tpm/tpm_emulator.c index 402a2d6312..bf1a90f5d7 100644 --- a/backends/tpm/tpm_emulator.c +++ b/backends/tpm/tpm_emulator.c @@ -534,11 +534,8 @@ static int tpm_emulator_block_migration(TPMEmulator *tpm_emu) error_setg(&tpm_emu->migration_blocker, "Migration disabled: TPM emulator does not support " "migration"); - if (migrate_add_blocker(tpm_emu->migration_blocker, &err) < 0) { + if (migrate_add_blocker(&tpm_emu->migration_blocker, &err) < 0) { error_report_err(err); - error_free(tpm_emu->migration_blocker); - tpm_emu->migration_blocker = NULL; - return -1; } } @@ -1016,10 +1013,7 @@ static void tpm_emulator_inst_finalize(Object *obj) qapi_free_TPMEmulatorOptions(tpm_emu->options); - if (tpm_emu->migration_blocker) { - migrate_del_blocker(tpm_emu->migration_blocker); - error_free(tpm_emu->migration_blocker); - } + migrate_del_blocker(&tpm_emu->migration_blocker); tpm_sized_buffer_reset(&state_blobs->volatil); tpm_sized_buffer_reset(&state_blobs->permanent); diff --git a/block/parallels.c b/block/parallels.c index 6b46623241..1d695ce7fb 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -1369,9 +1369,8 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, bdrv_get_device_or_node_name(bs)); bdrv_graph_rdunlock_main_loop(); - ret = migrate_add_blocker(s->migration_blocker, errp); + ret = migrate_add_blocker(&s->migration_blocker, errp); if (ret < 0) { - error_setg(errp, "Migration blocker error"); goto fail; } qemu_co_mutex_init(&s->lock); @@ -1406,7 +1405,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, ret = bdrv_check(bs, &res, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS); if (ret < 0) { error_setg_errno(errp, -ret, "Could not repair corrupted image"); - migrate_del_blocker(s->migration_blocker); + migrate_del_blocker(&s->migration_blocker); goto fail; } } @@ -1423,7 +1422,6 @@ fail: */ parallels_free_used_bitmap(bs); - error_free(s->migration_blocker); g_free(s->bat_dirty_bmap); qemu_vfree(s->header); return ret; @@ -1448,8 +1446,7 @@ static void parallels_close(BlockDriverState *bs) g_free(s->bat_dirty_bmap); qemu_vfree(s->header); - migrate_del_blocker(s->migration_blocker); - error_free(s->migration_blocker); + migrate_del_blocker(&s->migration_blocker); } static bool parallels_is_support_dirty_bitmaps(BlockDriverState *bs) diff --git a/block/qcow.c b/block/qcow.c index 38a16253b8..fdd4c83948 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -307,9 +307,8 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, bdrv_get_device_or_node_name(bs)); bdrv_graph_rdunlock_main_loop(); - ret = migrate_add_blocker(s->migration_blocker, errp); + ret = migrate_add_blocker(&s->migration_blocker, errp); if (ret < 0) { - error_free(s->migration_blocker); goto fail; } @@ -802,8 +801,7 @@ static void qcow_close(BlockDriverState *bs) g_free(s->cluster_cache); g_free(s->cluster_data); - migrate_del_blocker(s->migration_blocker); - error_free(s->migration_blocker); + migrate_del_blocker(&s->migration_blocker); } static int coroutine_fn GRAPH_UNLOCKED diff --git a/block/vdi.c b/block/vdi.c index 3ed43b6f35..fd7e365383 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -498,9 +498,8 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags, bdrv_get_device_or_node_name(bs)); bdrv_graph_rdunlock_main_loop(); - ret = migrate_add_blocker(s->migration_blocker, errp); + ret = migrate_add_blocker(&s->migration_blocker, errp); if (ret < 0) { - error_free(s->migration_blocker); goto fail_free_bmap; } @@ -988,8 +987,7 @@ static void vdi_close(BlockDriverState *bs) qemu_vfree(s->bmap); - migrate_del_blocker(s->migration_blocker); - error_free(s->migration_blocker); + migrate_del_blocker(&s->migration_blocker); } static int vdi_has_zero_init(BlockDriverState *bs) diff --git a/block/vhdx.c b/block/vhdx.c index 73cb214fb4..e37f8c0926 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -985,8 +985,7 @@ static void vhdx_close(BlockDriverState *bs) s->bat = NULL; qemu_vfree(s->parent_entries); s->parent_entries = NULL; - migrate_del_blocker(s->migration_blocker); - error_free(s->migration_blocker); + migrate_del_blocker(&s->migration_blocker); qemu_vfree(s->log.hdr); s->log.hdr = NULL; vhdx_region_unregister_all(s); @@ -1097,9 +1096,8 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, error_setg(&s->migration_blocker, "The vhdx format used by node '%s' " "does not support live migration", bdrv_get_device_or_node_name(bs)); - ret = migrate_add_blocker(s->migration_blocker, errp); + ret = migrate_add_blocker(&s->migration_blocker, errp); if (ret < 0) { - error_free(s->migration_blocker); goto fail; } diff --git a/block/vmdk.c b/block/vmdk.c index 8a3b152798..1335d39e16 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1386,9 +1386,8 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags, error_setg(&s->migration_blocker, "The vmdk format used by node '%s' " "does not support live migration", bdrv_get_device_or_node_name(bs)); - ret = migrate_add_blocker(s->migration_blocker, errp); + ret = migrate_add_blocker(&s->migration_blocker, errp); if (ret < 0) { - error_free(s->migration_blocker); goto fail; } @@ -2867,8 +2866,7 @@ static void vmdk_close(BlockDriverState *bs) vmdk_free_extents(bs); g_free(s->create_type); - migrate_del_blocker(s->migration_blocker); - error_free(s->migration_blocker); + migrate_del_blocker(&s->migration_blocker); } static int64_t coroutine_fn GRAPH_RDLOCK diff --git a/block/vpc.c b/block/vpc.c index 945847fe4a..c30cf8689a 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -452,9 +452,8 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, bdrv_get_device_or_node_name(bs)); bdrv_graph_rdunlock_main_loop(); - ret = migrate_add_blocker(s->migration_blocker, errp); + ret = migrate_add_blocker(&s->migration_blocker, errp); if (ret < 0) { - error_free(s->migration_blocker); goto fail; } @@ -1190,8 +1189,7 @@ static void vpc_close(BlockDriverState *bs) g_free(s->pageentry_u8); #endif - migrate_del_blocker(s->migration_blocker); - error_free(s->migration_blocker); + migrate_del_blocker(&s->migration_blocker); } static QemuOptsList vpc_create_opts = { diff --git a/block/vvfat.c b/block/vvfat.c index b0415798c0..266e036dcd 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -1268,9 +1268,8 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags, "The vvfat (rw) format used by node '%s' " "does not support live migration", bdrv_get_device_or_node_name(bs)); - ret = migrate_add_blocker(s->migration_blocker, errp); + ret = migrate_add_blocker(&s->migration_blocker, errp); if (ret < 0) { - error_free(s->migration_blocker); goto fail; } } @@ -3239,8 +3238,7 @@ static void vvfat_close(BlockDriverState *bs) g_free(s->cluster_buffer); if (s->qcow) { - migrate_del_blocker(s->migration_blocker); - error_free(s->migration_blocker); + migrate_del_blocker(&s->migration_blocker); } } diff --git a/contrib/elf2dmp/addrspace.c b/contrib/elf2dmp/addrspace.c index 64b5d680ad..6f608a517b 100644 --- a/contrib/elf2dmp/addrspace.c +++ b/contrib/elf2dmp/addrspace.c @@ -72,10 +72,7 @@ int pa_space_create(struct pa_space *ps, QEMU_Elf *qemu_elf) } } - ps->block = malloc(sizeof(*ps->block) * ps->block_nr); - if (!ps->block) { - return 1; - } + ps->block = g_new(struct pa_block, ps->block_nr); for (i = 0; i < phdr_nr; i++) { if (phdr[i].p_type == PT_LOAD) { @@ -97,7 +94,7 @@ int pa_space_create(struct pa_space *ps, QEMU_Elf *qemu_elf) void pa_space_destroy(struct pa_space *ps) { ps->block_nr = 0; - free(ps->block); + g_free(ps->block); } void va_space_set_dtb(struct va_space *vs, uint64_t dtb) diff --git a/contrib/elf2dmp/main.c b/contrib/elf2dmp/main.c index 5db163bdbe..cbc38a7c10 100644 --- a/contrib/elf2dmp/main.c +++ b/contrib/elf2dmp/main.c @@ -120,14 +120,11 @@ static KDDEBUGGER_DATA64 *get_kdbg(uint64_t KernBase, struct pdb_reader *pdb, } } - kdbg = malloc(kdbg_hdr.Size); - if (!kdbg) { - return NULL; - } + kdbg = g_malloc(kdbg_hdr.Size); if (va_space_rw(vs, KdDebuggerDataBlock, kdbg, kdbg_hdr.Size, 0)) { eprintf("Failed to extract entire KDBG\n"); - free(kdbg); + g_free(kdbg); return NULL; } @@ -478,7 +475,7 @@ static bool pe_check_pdb_name(uint64_t base, void *start_addr, } if (memcmp(&rsds->Signature, sign_rsds, sizeof(sign_rsds))) { - eprintf("CodeView signature is \'%.4s\', \'%s\' expected\n", + eprintf("CodeView signature is \'%.4s\', \'%.4s\' expected\n", rsds->Signature, sign_rsds); return false; } @@ -643,7 +640,7 @@ int main(int argc, char *argv[]) } out_kdbg: - free(kdbg); + g_free(kdbg); out_pdb: pdb_exit(&pdb); out_pdb_file: diff --git a/contrib/elf2dmp/pdb.c b/contrib/elf2dmp/pdb.c index 6ca5086f02..40991f5f4c 100644 --- a/contrib/elf2dmp/pdb.c +++ b/contrib/elf2dmp/pdb.c @@ -25,6 +25,10 @@ static uint32_t pdb_get_file_size(const struct pdb_reader *r, unsigned idx) { + if (idx >= r->ds.toc->num_files) { + return 0; + } + return r->ds.toc->file_size[idx]; } @@ -90,18 +94,18 @@ uint64_t pdb_resolve(uint64_t img_base, struct pdb_reader *r, const char *name) static void pdb_reader_ds_exit(struct pdb_reader *r) { - free(r->ds.toc); + g_free(r->ds.toc); } static void pdb_exit_symbols(struct pdb_reader *r) { - free(r->modimage); - free(r->symbols); + g_free(r->modimage); + g_free(r->symbols); } static void pdb_exit_segments(struct pdb_reader *r) { - free(r->segs); + g_free(r->segs); } static void *pdb_ds_read(const PDB_DS_HEADER *header, @@ -116,10 +120,7 @@ static void *pdb_ds_read(const PDB_DS_HEADER *header, nBlocks = (size + header->block_size - 1) / header->block_size; - buffer = malloc(nBlocks * header->block_size); - if (!buffer) { - return NULL; - } + buffer = g_malloc(nBlocks * header->block_size); for (i = 0; i < nBlocks; i++) { memcpy(buffer + i * header->block_size, (const char *)header + @@ -159,16 +160,17 @@ static void *pdb_ds_read_file(struct pdb_reader* r, uint32_t file_number) static int pdb_init_segments(struct pdb_reader *r) { - char *segs; unsigned stream_idx = r->segments; - segs = pdb_ds_read_file(r, stream_idx); - if (!segs) { + r->segs = pdb_ds_read_file(r, stream_idx); + if (!r->segs) { return 1; } - r->segs = segs; r->segs_size = pdb_get_file_size(r, stream_idx); + if (!r->segs_size) { + return 1; + } return 0; } @@ -201,7 +203,7 @@ static int pdb_init_symbols(struct pdb_reader *r) return 0; out_symbols: - free(symbols); + g_free(symbols); return err; } @@ -258,7 +260,7 @@ static int pdb_reader_init(struct pdb_reader *r, void *data) out_sym: pdb_exit_symbols(r); out_root: - free(r->ds.root); + g_free(r->ds.root); out_ds: pdb_reader_ds_exit(r); @@ -269,7 +271,7 @@ static void pdb_reader_exit(struct pdb_reader *r) { pdb_exit_segments(r); pdb_exit_symbols(r); - free(r->ds.root); + g_free(r->ds.root); pdb_reader_ds_exit(r); } diff --git a/contrib/elf2dmp/qemu_elf.c b/contrib/elf2dmp/qemu_elf.c index de6ad744c6..055e6f8792 100644 --- a/contrib/elf2dmp/qemu_elf.c +++ b/contrib/elf2dmp/qemu_elf.c @@ -94,10 +94,7 @@ static int init_states(QEMU_Elf *qe) printf("%zu CPU states has been found\n", cpu_nr); - qe->state = malloc(sizeof(*qe->state) * cpu_nr); - if (!qe->state) { - return 1; - } + qe->state = g_new(QEMUCPUState*, cpu_nr); cpu_nr = 0; @@ -115,7 +112,7 @@ static int init_states(QEMU_Elf *qe) static void exit_states(QEMU_Elf *qe) { - free(qe->state); + g_free(qe->state); } static bool check_ehdr(QEMU_Elf *qe) diff --git a/docs/devel/index-internals.rst b/docs/devel/index-internals.rst index e1a93df263..6f81df92bc 100644 --- a/docs/devel/index-internals.rst +++ b/docs/devel/index-internals.rst @@ -14,6 +14,7 @@ Details about QEMU's various subsystems including how to add features to them. migration multi-process reset + s390-cpu-topology s390-dasd-ipl tracing vfio-migration diff --git a/docs/devel/s390-cpu-topology.rst b/docs/devel/s390-cpu-topology.rst new file mode 100644 index 0000000000..9eab28d5e5 --- /dev/null +++ b/docs/devel/s390-cpu-topology.rst @@ -0,0 +1,170 @@ +QAPI interface for S390 CPU topology +==================================== + +The following sections will explain the QAPI interface for S390 CPU topology +with the help of exemplary output. +For this, let's assume that QEMU has been started with the following +command, defining 4 CPUs, where CPU[0] is defined by the -smp argument and will +have default values: + +.. code-block:: bash + + qemu-system-s390x \ + -enable-kvm \ + -cpu z14,ctop=on \ + -smp 1,drawers=3,books=3,sockets=2,cores=2,maxcpus=36 \ + -device z14-s390x-cpu,core-id=19,entitlement=high \ + -device z14-s390x-cpu,core-id=11,entitlement=low \ + -device z14-s390x-cpu,core-id=112,entitlement=high \ + ... + +Additions to query-cpus-fast +---------------------------- + +The command query-cpus-fast allows querying the topology tree and +modifiers for all configured vCPUs. + +.. code-block:: QMP + + { "execute": "query-cpus-fast" } + { + "return": [ + { + "dedicated": false, + "thread-id": 536993, + "props": { + "core-id": 0, + "socket-id": 0, + "drawer-id": 0, + "book-id": 0 + }, + "cpu-state": "operating", + "entitlement": "medium", + "qom-path": "/machine/unattached/device[0]", + "cpu-index": 0, + "target": "s390x" + }, + { + "dedicated": false, + "thread-id": 537003, + "props": { + "core-id": 19, + "socket-id": 1, + "drawer-id": 0, + "book-id": 2 + }, + "cpu-state": "operating", + "entitlement": "high", + "qom-path": "/machine/peripheral-anon/device[0]", + "cpu-index": 19, + "target": "s390x" + }, + { + "dedicated": false, + "thread-id": 537004, + "props": { + "core-id": 11, + "socket-id": 1, + "drawer-id": 0, + "book-id": 1 + }, + "cpu-state": "operating", + "entitlement": "low", + "qom-path": "/machine/peripheral-anon/device[1]", + "cpu-index": 11, + "target": "s390x" + }, + { + "dedicated": true, + "thread-id": 537005, + "props": { + "core-id": 112, + "socket-id": 0, + "drawer-id": 3, + "book-id": 2 + }, + "cpu-state": "operating", + "entitlement": "high", + "qom-path": "/machine/peripheral-anon/device[2]", + "cpu-index": 112, + "target": "s390x" + } + ] + } + + +QAPI command: set-cpu-topology +------------------------------ + +The command set-cpu-topology allows modifying the topology tree +or the topology modifiers of a vCPU in the configuration. + +.. code-block:: QMP + + { "execute": "set-cpu-topology", + "arguments": { + "core-id": 11, + "socket-id": 0, + "book-id": 0, + "drawer-id": 0, + "entitlement": "low", + "dedicated": false + } + } + {"return": {}} + +The core-id parameter is the only mandatory parameter and every +unspecified parameter keeps its previous value. + +QAPI event CPU_POLARIZATION_CHANGE +---------------------------------- + +When a guest requests a modification of the polarization, +QEMU sends a CPU_POLARIZATION_CHANGE event. + +When requesting the change, the guest only specifies horizontal or +vertical polarization. +It is the job of the entity administrating QEMU to set the dedication and fine +grained vertical entitlement in response to this event. + +Note that a vertical polarized dedicated vCPU can only have a high +entitlement, giving 6 possibilities for vCPU polarization: + +- Horizontal +- Horizontal dedicated +- Vertical low +- Vertical medium +- Vertical high +- Vertical high dedicated + +Example of the event received when the guest issues the CPU instruction +Perform Topology Function PTF(0) to request an horizontal polarization: + +.. code-block:: QMP + + { + "timestamp": { + "seconds": 1687870305, + "microseconds": 566299 + }, + "event": "CPU_POLARIZATION_CHANGE", + "data": { + "polarization": "horizontal" + } + } + +QAPI query command: query-s390x-cpu-polarization +------------------------------------------------ + +The query command query-s390x-cpu-polarization returns the current +CPU polarization of the machine. +In this case the guest previously issued a PTF(1) to request vertical polarization: + +.. code-block:: QMP + + { "execute": "query-s390x-cpu-polarization" } + { + "return": { + "polarization": "vertical" + } + } diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst index 965cbf84c5..47fd648035 100644 --- a/docs/system/arm/emulation.rst +++ b/docs/system/arm/emulation.rst @@ -46,6 +46,7 @@ the following architecture extensions: - FEAT_HCX (Support for the HCRX_EL2 register) - FEAT_HPDS (Hierarchical permission disables) - FEAT_HPDS2 (Translation table page-based hardware attributes) +- FEAT_HPMN0 (Setting of MDCR_EL2.HPMN to zero) - FEAT_I8MM (AArch64 Int8 matrix multiplication instructions) - FEAT_IDST (ID space trap handling) - FEAT_IESB (Implicit error synchronization event) diff --git a/docs/system/s390x/cpu-topology.rst b/docs/system/s390x/cpu-topology.rst new file mode 100644 index 0000000000..5133fdc362 --- /dev/null +++ b/docs/system/s390x/cpu-topology.rst @@ -0,0 +1,244 @@ +.. _cpu-topology-s390x: + +CPU topology on s390x +===================== + +Since QEMU 8.2, CPU topology on s390x provides up to 3 levels of +topology containers: drawers, books and sockets. They define a +tree-shaped hierarchy. + +The socket container has one or more CPU entries. +Each of these CPU entries consists of a bitmap and three CPU attributes: + +- CPU type +- entitlement +- dedication + +Each bit set in the bitmap correspond to a core-id of a vCPU with matching +attributes. + +This documentation provides general information on S390 CPU topology, +how to enable it and explains the new CPU attributes. +For information on how to modify the S390 CPU topology and how to +monitor polarization changes, see ``docs/devel/s390-cpu-topology.rst``. + +Prerequisites +------------- + +To use the CPU topology, you need to run with KVM on a s390x host that +uses the Linux kernel v6.0 or newer (which provide the so-called +``KVM_CAP_S390_CPU_TOPOLOGY`` capability that allows QEMU to signal the +CPU topology facility via the so-called STFLE bit 11 to the VM). + +Enabling CPU topology +--------------------- + +Currently, CPU topology is only enabled in the host model by default. + +Enabling CPU topology in a CPU model is done by setting the CPU flag +``ctop`` to ``on`` as in: + +.. code-block:: bash + + -cpu gen16b,ctop=on + +Having the topology disabled by default allows migration between +old and new QEMU without adding new flags. + +Default topology usage +---------------------- + +The CPU topology can be specified on the QEMU command line +with the ``-smp`` or the ``-device`` QEMU command arguments. + +Note also that since 7.2 threads are no longer supported in the topology +and the ``-smp`` command line argument accepts only ``threads=1``. + +If none of the containers attributes (drawers, books, sockets) are +specified for the ``-smp`` flag, the number of these containers +is 1. + +Thus the following two options will result in the same topology: + +.. code-block:: bash + + -smp cpus=5,drawer=1,books=1,sockets=8,cores=4,maxcpus=32 + +and + +.. code-block:: bash + + -smp cpus=5,sockets=8,cores=4,maxcpus=32 + +When a CPU is defined by the ``-smp`` command argument, its position +inside the topology is calculated by adding the CPUs to the topology +based on the core-id starting with core-0 at position 0 of socket-0, +book-0, drawer-0 and filling all CPUs of socket-0 before filling socket-1 +of book-0 and so on up to the last socket of the last book of the last +drawer. + +When a CPU is defined by the ``-device`` command argument, the +tree topology attributes must all be defined or all not defined. + +.. code-block:: bash + + -device gen16b-s390x-cpu,drawer-id=1,book-id=1,socket-id=2,core-id=1 + +or + +.. code-block:: bash + + -device gen16b-s390x-cpu,core-id=1,dedicated=true + +If none of the tree attributes (drawer, book, sockets), are specified +for the ``-device`` argument, like for all CPUs defined with the ``-smp`` +command argument the topology tree attributes will be set by simply +adding the CPUs to the topology based on the core-id. + +QEMU will not try to resolve collisions and will report an error if the +CPU topology defined explicitly or implicitly on a ``-device`` +argument collides with the definition of a CPU implicitly defined +on the ``-smp`` argument. + +When the topology modifier attributes are not defined for the +``-device`` command argument they takes following default values: + +- dedicated: ``false`` +- entitlement: ``medium`` + + +Hot plug +++++++++ + +New CPUs can be plugged using the device_add hmp command as in: + +.. code-block:: bash + + (qemu) device_add gen16b-s390x-cpu,core-id=9 + +The placement of the CPU is derived from the core-id as described above. + +The topology can of course also be fully defined: + +.. code-block:: bash + + (qemu) device_add gen16b-s390x-cpu,drawer-id=1,book-id=1,socket-id=2,core-id=1 + + +Examples +++++++++ + +In the following machine we define 8 sockets with 4 cores each. + +.. code-block:: bash + + $ qemu-system-s390x -m 2G \ + -cpu gen16b,ctop=on \ + -smp cpus=5,sockets=8,cores=4,maxcpus=32 \ + -device host-s390x-cpu,core-id=14 \ + +A new CPUs can be plugged using the device_add hmp command as before: + +.. code-block:: bash + + (qemu) device_add gen16b-s390x-cpu,core-id=9 + +The core-id defines the placement of the core in the topology by +starting with core 0 in socket 0 up to maxcpus. + +In the example above: + +* There are 5 CPUs provided to the guest with the ``-smp`` command line + They will take the core-ids 0,1,2,3,4 + As we have 4 cores in a socket, we have 4 CPUs provided + to the guest in socket 0, with core-ids 0,1,2,3. + The last CPU, with core-id 4, will be on socket 1. + +* the core with ID 14 provided by the ``-device`` command line will + be placed in socket 3, with core-id 14 + +* the core with ID 9 provided by the ``device_add`` qmp command will + be placed in socket 2, with core-id 9 + + +Polarization, entitlement and dedication +---------------------------------------- + +Polarization +++++++++++++ + +The polarization affects how the CPUs of a shared host are utilized/distributed +among guests. +The guest determines the polarization by using the PTF instruction. + +Polarization defines two models of CPU provisioning: horizontal +and vertical. + +The horizontal polarization is the default model on boot and after +subsystem reset. When horizontal polarization is in effect all vCPUs should +have about equal resource provisioning. + +In the vertical polarization model vCPUs are unequal, but overall more resources +might be available. +The guest can make use of the vCPU entitlement information provided by the host +to optimize kernel thread scheduling. + +A subsystem reset puts all vCPU of the configuration into the +horizontal polarization. + +Entitlement ++++++++++++ + +The vertical polarization specifies that the guest's vCPU can get +different real CPU provisioning: + +- a vCPU with vertical high entitlement specifies that this + vCPU gets 100% of the real CPU provisioning. + +- a vCPU with vertical medium entitlement specifies that this + vCPU shares the real CPU with other vCPUs. + +- a vCPU with vertical low entitlement specifies that this + vCPU only gets real CPU provisioning when no other vCPUs needs it. + +In the case a vCPU with vertical high entitlement does not use +the real CPU, the unused "slack" can be dispatched to other vCPU +with medium or low entitlement. + +A vCPU can be "dedicated" in which case the vCPU is fully dedicated to a single +real CPU. + +The dedicated bit is an indication of affinity of a vCPU for a real CPU +while the entitlement indicates the sharing or exclusivity of use. + +Defining the topology on the command line +----------------------------------------- + +The topology can entirely be defined using -device cpu statements, +with the exception of CPU 0 which must be defined with the -smp +argument. + +For example, here we set the position of the cores 1,2,3 to +drawer 1, book 1, socket 2 and cores 0,9 and 14 to drawer 0, +book 0, socket 0 without defining entitlement or dedication. +Core 4 will be set on its default position on socket 1 +(since we have 4 core per socket) and we define it as dedicated and +with vertical high entitlement. + +.. code-block:: bash + + $ qemu-system-s390x -m 2G \ + -cpu gen16b,ctop=on \ + -smp cpus=1,sockets=8,cores=4,maxcpus=32 \ + \ + -device gen16b-s390x-cpu,drawer-id=1,book-id=1,socket-id=2,core-id=1 \ + -device gen16b-s390x-cpu,drawer-id=1,book-id=1,socket-id=2,core-id=2 \ + -device gen16b-s390x-cpu,drawer-id=1,book-id=1,socket-id=2,core-id=3 \ + \ + -device gen16b-s390x-cpu,drawer-id=0,book-id=0,socket-id=0,core-id=9 \ + -device gen16b-s390x-cpu,drawer-id=0,book-id=0,socket-id=0,core-id=14 \ + \ + -device gen16b-s390x-cpu,core-id=4,dedicated=on,entitlement=high + +The entitlement defined for the CPU 4 will only be used after the guest +successfully enables vertical polarization by using the PTF instruction. diff --git a/docs/system/target-s390x.rst b/docs/system/target-s390x.rst index f6f11433c7..94c981e732 100644 --- a/docs/system/target-s390x.rst +++ b/docs/system/target-s390x.rst @@ -34,3 +34,4 @@ Architectural features .. toctree:: s390x/bootdevices s390x/protvirt + s390x/cpu-topology diff --git a/dump/dump.c b/dump/dump.c index d3578ddc62..d355ada62e 100644 --- a/dump/dump.c +++ b/dump/dump.c @@ -111,7 +111,7 @@ static int dump_cleanup(DumpState *s) qemu_mutex_unlock_iothread(); } } - migrate_del_blocker(dump_migration_blocker); + migrate_del_blocker(&dump_migration_blocker); return 0; } @@ -2158,7 +2158,7 @@ void qmp_dump_guest_memory(bool paging, const char *file, * Allows even for -only-migratable, but forbid migration during the * process of dump guest memory. */ - if (migrate_add_blocker_internal(dump_migration_blocker, errp)) { + if (migrate_add_blocker_internal(&dump_migration_blocker, errp)) { /* Remember to release the fd before passing it over to dump state */ close(fd); return; diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index 323f042e65..af636cfb2d 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -406,11 +406,7 @@ static int coroutine_fn put_fid(V9fsPDU *pdu, V9fsFidState *fidp) * delete the migration blocker. Ideally, this * should be hooked to transport close notification */ - if (pdu->s->migration_blocker) { - migrate_del_blocker(pdu->s->migration_blocker); - error_free(pdu->s->migration_blocker); - pdu->s->migration_blocker = NULL; - } + migrate_del_blocker(&pdu->s->migration_blocker); } return free_fid(pdu, fidp); } @@ -1505,10 +1501,8 @@ static void coroutine_fn v9fs_attach(void *opaque) error_setg(&s->migration_blocker, "Migration is disabled when VirtFS export path '%s' is mounted in the guest using mount_tag '%s'", s->ctx.fs_root ? s->ctx.fs_root : "NULL", s->tag); - err = migrate_add_blocker(s->migration_blocker, NULL); + err = migrate_add_blocker(&s->migration_blocker, NULL); if (err < 0) { - error_free(s->migration_blocker); - s->migration_blocker = NULL; clunk_fid(s, fid); goto out; } diff --git a/hw/arm/boot.c b/hw/arm/boot.c index 24fa169060..84ea6a807a 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -722,84 +722,35 @@ static void do_cpu_reset(void *opaque) cpu_set_pc(cs, entry); } else { - /* If we are booting Linux then we need to check whether we are - * booting into secure or non-secure state and adjust the state - * accordingly. Out of reset, ARM is defined to be in secure state - * (SCR.NS = 0), we change that here if non-secure boot has been - * requested. + /* + * If we are booting Linux then we might need to do so at: + * - AArch64 NS EL2 or NS EL1 + * - AArch32 Secure SVC (EL3) + * - AArch32 NS Hyp (EL2) + * - AArch32 NS SVC (EL1) + * Configure the CPU in the way boot firmware would do to + * drop us down to the appropriate level. */ - if (arm_feature(env, ARM_FEATURE_EL3)) { - /* AArch64 is defined to come out of reset into EL3 if enabled. - * If we are booting Linux then we need to adjust our EL as - * Linux expects us to be in EL2 or EL1. AArch32 resets into - * SVC, which Linux expects, so no privilege/exception level to - * adjust. - */ - if (env->aarch64) { - env->cp15.scr_el3 |= SCR_RW; - if (arm_feature(env, ARM_FEATURE_EL2)) { - env->cp15.hcr_el2 |= HCR_RW; - env->pstate = PSTATE_MODE_EL2h; - } else { - env->pstate = PSTATE_MODE_EL1h; - } - if (cpu_isar_feature(aa64_pauth, cpu)) { - env->cp15.scr_el3 |= SCR_API | SCR_APK; - } - if (cpu_isar_feature(aa64_mte, cpu)) { - env->cp15.scr_el3 |= SCR_ATA; - } - if (cpu_isar_feature(aa64_sve, cpu)) { - env->cp15.cptr_el[3] |= R_CPTR_EL3_EZ_MASK; - env->vfp.zcr_el[3] = 0xf; - } - if (cpu_isar_feature(aa64_sme, cpu)) { - env->cp15.cptr_el[3] |= R_CPTR_EL3_ESM_MASK; - env->cp15.scr_el3 |= SCR_ENTP2; - env->vfp.smcr_el[3] = 0xf; - } - if (cpu_isar_feature(aa64_hcx, cpu)) { - env->cp15.scr_el3 |= SCR_HXEN; - } - if (cpu_isar_feature(aa64_fgt, cpu)) { - env->cp15.scr_el3 |= SCR_FGTEN; - } + int target_el = arm_feature(env, ARM_FEATURE_EL2) ? 2 : 1; - /* AArch64 kernels never boot in secure mode */ - assert(!info->secure_boot); - /* This hook is only supported for AArch32 currently: - * bootloader_aarch64[] will not call the hook, and - * the code above has already dropped us into EL2 or EL1. - */ - assert(!info->secure_board_setup); - } - - if (arm_feature(env, ARM_FEATURE_EL2)) { - /* If we have EL2 then Linux expects the HVC insn to work */ - env->cp15.scr_el3 |= SCR_HCE; - } - - /* Set to non-secure if not a secure boot */ - if (!info->secure_boot && - (cs != first_cpu || !info->secure_board_setup)) { - /* Linux expects non-secure state */ - env->cp15.scr_el3 |= SCR_NS; - /* Set NSACR.{CP11,CP10} so NS can access the FPU */ - env->cp15.nsacr |= 3 << 10; - } - } - - if (!env->aarch64 && !info->secure_boot && - arm_feature(env, ARM_FEATURE_EL2)) { + if (env->aarch64) { /* - * This is an AArch32 boot not to Secure state, and - * we have Hyp mode available, so boot the kernel into - * Hyp mode. This is not how the CPU comes out of reset, - * so we need to manually put it there. + * AArch64 kernels never boot in secure mode, and we don't + * support the secure_board_setup hook for AArch64. */ - cpsr_write(env, ARM_CPU_MODE_HYP, CPSR_M, CPSRWriteRaw); + assert(!info->secure_boot); + assert(!info->secure_board_setup); + } else { + if (arm_feature(env, ARM_FEATURE_EL3) && + (info->secure_boot || + (info->secure_board_setup && cs == first_cpu))) { + /* Start this CPU in Secure SVC */ + target_el = 3; + } } + arm_emulate_firmware_reset(cs, target_el); + if (cs == first_cpu) { AddressSpace *as = arm_boot_address_space(cpu, info); diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c index 3c7dfcd6dc..e8a82618f0 100644 --- a/hw/arm/sbsa-ref.c +++ b/hw/arm/sbsa-ref.c @@ -2,6 +2,7 @@ * ARM SBSA Reference Platform emulation * * Copyright (c) 2018 Linaro Limited + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. * Written by Hongbo Zhang <hongbo.zhang@linaro.org> * * This program is free software; you can redistribute it and/or modify it @@ -30,6 +31,7 @@ #include "exec/hwaddr.h" #include "kvm_arm.h" #include "hw/arm/boot.h" +#include "hw/arm/bsa.h" #include "hw/arm/fdt.h" #include "hw/arm/smmuv3.h" #include "hw/block/flash.h" @@ -55,14 +57,6 @@ #define NUM_SMMU_IRQS 4 #define NUM_SATA_PORTS 6 -#define VIRTUAL_PMU_IRQ 7 -#define ARCH_GIC_MAINT_IRQ 9 -#define ARCH_TIMER_VIRT_IRQ 11 -#define ARCH_TIMER_S_EL1_IRQ 13 -#define ARCH_TIMER_NS_EL1_IRQ 14 -#define ARCH_TIMER_NS_EL2_IRQ 10 -#define ARCH_TIMER_NS_EL2_VIRT_IRQ 12 - enum { SBSA_FLASH, SBSA_MEM, @@ -479,7 +473,7 @@ static void create_gic(SBSAMachineState *sms, MemoryRegion *mem) */ for (i = 0; i < smp_cpus; i++) { DeviceState *cpudev = DEVICE(qemu_get_cpu(i)); - int ppibase = NUM_IRQS + i * GIC_INTERNAL + GIC_NR_SGIS; + int intidbase = NUM_IRQS + i * GIC_INTERNAL; int irq; /* * Mapping from the output timer irq lines from the CPU to the @@ -496,14 +490,17 @@ static void create_gic(SBSAMachineState *sms, MemoryRegion *mem) for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) { qdev_connect_gpio_out(cpudev, irq, qdev_get_gpio_in(sms->gic, - ppibase + timer_irq[irq])); + intidbase + timer_irq[irq])); } qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt", 0, - qdev_get_gpio_in(sms->gic, ppibase + qdev_get_gpio_in(sms->gic, + intidbase + ARCH_GIC_MAINT_IRQ)); + qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0, - qdev_get_gpio_in(sms->gic, ppibase + qdev_get_gpio_in(sms->gic, + intidbase + VIRTUAL_PMU_IRQ)); sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ)); diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h index 648c2e37a2..6076025ad6 100644 --- a/hw/arm/smmuv3-internal.h +++ b/hw/arm/smmuv3-internal.h @@ -38,33 +38,71 @@ REG32(IDR0, 0x0) FIELD(IDR0, S1P, 1 , 1) FIELD(IDR0, TTF, 2 , 2) FIELD(IDR0, COHACC, 4 , 1) + FIELD(IDR0, BTM, 5 , 1) + FIELD(IDR0, HTTU, 6 , 2) + FIELD(IDR0, DORMHINT, 8 , 1) + FIELD(IDR0, HYP, 9 , 1) + FIELD(IDR0, ATS, 10, 1) + FIELD(IDR0, NS1ATS, 11, 1) FIELD(IDR0, ASID16, 12, 1) + FIELD(IDR0, MSI, 13, 1) + FIELD(IDR0, SEV, 14, 1) + FIELD(IDR0, ATOS, 15, 1) + FIELD(IDR0, PRI, 16, 1) + FIELD(IDR0, VMW, 17, 1) FIELD(IDR0, VMID16, 18, 1) + FIELD(IDR0, CD2L, 19, 1) + FIELD(IDR0, VATOS, 20, 1) FIELD(IDR0, TTENDIAN, 21, 2) + FIELD(IDR0, ATSRECERR, 23, 1) FIELD(IDR0, STALL_MODEL, 24, 2) FIELD(IDR0, TERM_MODEL, 26, 1) FIELD(IDR0, STLEVEL, 27, 2) + FIELD(IDR0, RME_IMPL, 30, 1) REG32(IDR1, 0x4) FIELD(IDR1, SIDSIZE, 0 , 6) + FIELD(IDR1, SSIDSIZE, 6 , 5) + FIELD(IDR1, PRIQS, 11, 5) FIELD(IDR1, EVENTQS, 16, 5) FIELD(IDR1, CMDQS, 21, 5) + FIELD(IDR1, ATTR_PERMS_OVR, 26, 1) + FIELD(IDR1, ATTR_TYPES_OVR, 27, 1) + FIELD(IDR1, REL, 28, 1) + FIELD(IDR1, QUEUES_PRESET, 29, 1) + FIELD(IDR1, TABLES_PRESET, 30, 1) + FIELD(IDR1, ECMDQ, 31, 1) #define SMMU_IDR1_SIDSIZE 16 #define SMMU_CMDQS 19 #define SMMU_EVENTQS 19 REG32(IDR2, 0x8) + FIELD(IDR2, BA_VATOS, 0, 10) + REG32(IDR3, 0xc) FIELD(IDR3, HAD, 2, 1); + FIELD(IDR3, PBHA, 3, 1); + FIELD(IDR3, XNX, 4, 1); + FIELD(IDR3, PPS, 5, 1); + FIELD(IDR3, MPAM, 7, 1); + FIELD(IDR3, FWB, 8, 1); + FIELD(IDR3, STT, 9, 1); FIELD(IDR3, RIL, 10, 1); FIELD(IDR3, BBML, 11, 2); + FIELD(IDR3, E0PD, 13, 1); + FIELD(IDR3, PTWNNC, 14, 1); + FIELD(IDR3, DPT, 15, 1); + REG32(IDR4, 0x10) + REG32(IDR5, 0x14) FIELD(IDR5, OAS, 0, 3); FIELD(IDR5, GRAN4K, 4, 1); FIELD(IDR5, GRAN16K, 5, 1); FIELD(IDR5, GRAN64K, 6, 1); + FIELD(IDR5, VAX, 10, 2); + FIELD(IDR5, STALL_MAX, 16, 16); #define SMMU_IDR5_OAS 4 diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c index 6f2b2bd45f..c3871ae067 100644 --- a/hw/arm/smmuv3.c +++ b/hw/arm/smmuv3.c @@ -278,15 +278,19 @@ static void smmuv3_init_regs(SMMUv3State *s) s->idr[1] = FIELD_DP32(s->idr[1], IDR1, EVENTQS, SMMU_EVENTQS); s->idr[1] = FIELD_DP32(s->idr[1], IDR1, CMDQS, SMMU_CMDQS); - s->idr[3] = FIELD_DP32(s->idr[3], IDR3, RIL, 1); s->idr[3] = FIELD_DP32(s->idr[3], IDR3, HAD, 1); + if (FIELD_EX32(s->idr[0], IDR0, S2P)) { + /* XNX is a stage-2-specific feature */ + s->idr[3] = FIELD_DP32(s->idr[3], IDR3, XNX, 1); + } + s->idr[3] = FIELD_DP32(s->idr[3], IDR3, RIL, 1); s->idr[3] = FIELD_DP32(s->idr[3], IDR3, BBML, 2); + s->idr[5] = FIELD_DP32(s->idr[5], IDR5, OAS, SMMU_IDR5_OAS); /* 44 bits */ /* 4K, 16K and 64K granule support */ s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN4K, 1); s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN16K, 1); s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN64K, 1); - s->idr[5] = FIELD_DP32(s->idr[5], IDR5, OAS, SMMU_IDR5_OAS); /* 44 bits */ s->cmdq.base = deposit64(s->cmdq.base, 0, 5, SMMU_CMDQS); s->cmdq.prod = 0; diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index 6b674231c2..9ce136cd88 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -601,21 +601,21 @@ build_gtdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) * The interrupt values are the same with the device tree when adding 16 */ /* Secure EL1 timer GSIV */ - build_append_int_noprefix(table_data, ARCH_TIMER_S_EL1_IRQ + 16, 4); + build_append_int_noprefix(table_data, ARCH_TIMER_S_EL1_IRQ, 4); /* Secure EL1 timer Flags */ build_append_int_noprefix(table_data, irqflags, 4); /* Non-Secure EL1 timer GSIV */ - build_append_int_noprefix(table_data, ARCH_TIMER_NS_EL1_IRQ + 16, 4); + build_append_int_noprefix(table_data, ARCH_TIMER_NS_EL1_IRQ, 4); /* Non-Secure EL1 timer Flags */ build_append_int_noprefix(table_data, irqflags | 1UL << 2, /* Always-on Capability */ 4); /* Virtual timer GSIV */ - build_append_int_noprefix(table_data, ARCH_TIMER_VIRT_IRQ + 16, 4); + build_append_int_noprefix(table_data, ARCH_TIMER_VIRT_IRQ, 4); /* Virtual Timer Flags */ build_append_int_noprefix(table_data, irqflags, 4); /* Non-Secure EL2 timer GSIV */ - build_append_int_noprefix(table_data, ARCH_TIMER_NS_EL2_IRQ + 16, 4); + build_append_int_noprefix(table_data, ARCH_TIMER_NS_EL2_IRQ, 4); /* Non-Secure EL2 timer Flags */ build_append_int_noprefix(table_data, irqflags, 4); /* CntReadBase Physical address */ @@ -729,9 +729,9 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) for (i = 0; i < MACHINE(vms)->smp.cpus; i++) { ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(i)); uint64_t physical_base_address = 0, gich = 0, gicv = 0; - uint32_t vgic_interrupt = vms->virt ? PPI(ARCH_GIC_MAINT_IRQ) : 0; + uint32_t vgic_interrupt = vms->virt ? ARCH_GIC_MAINT_IRQ : 0; uint32_t pmu_interrupt = arm_feature(&armcpu->env, ARM_FEATURE_PMU) ? - PPI(VIRTUAL_PMU_IRQ) : 0; + VIRTUAL_PMU_IRQ : 0; if (vms->gic_version == VIRT_GIC_VERSION_2) { physical_base_address = memmap[VIRT_GIC_CPU].base; diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 5b08a98f07..529f1c089c 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -366,10 +366,14 @@ static void fdt_add_timer_nodes(const VirtMachineState *vms) } qemu_fdt_setprop(ms->fdt, "/timer", "always-on", NULL, 0); qemu_fdt_setprop_cells(ms->fdt, "/timer", "interrupts", - GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_S_EL1_IRQ, irqflags, - GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_NS_EL1_IRQ, irqflags, - GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_VIRT_IRQ, irqflags, - GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_NS_EL2_IRQ, irqflags); + GIC_FDT_IRQ_TYPE_PPI, + INTID_TO_PPI(ARCH_TIMER_S_EL1_IRQ), irqflags, + GIC_FDT_IRQ_TYPE_PPI, + INTID_TO_PPI(ARCH_TIMER_NS_EL1_IRQ), irqflags, + GIC_FDT_IRQ_TYPE_PPI, + INTID_TO_PPI(ARCH_TIMER_VIRT_IRQ), irqflags, + GIC_FDT_IRQ_TYPE_PPI, + INTID_TO_PPI(ARCH_TIMER_NS_EL2_IRQ), irqflags); } static void fdt_add_cpu_nodes(const VirtMachineState *vms) @@ -799,7 +803,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) */ for (i = 0; i < smp_cpus; i++) { DeviceState *cpudev = DEVICE(qemu_get_cpu(i)); - int ppibase = NUM_IRQS + i * GIC_INTERNAL + GIC_NR_SGIS; + int intidbase = NUM_IRQS + i * GIC_INTERNAL; /* Mapping from the output timer irq lines from the CPU to the * GIC PPI inputs we use for the virt board. */ @@ -813,22 +817,22 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) for (unsigned irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) { qdev_connect_gpio_out(cpudev, irq, qdev_get_gpio_in(vms->gic, - ppibase + timer_irq[irq])); + intidbase + timer_irq[irq])); } if (vms->gic_version != VIRT_GIC_VERSION_2) { qemu_irq irq = qdev_get_gpio_in(vms->gic, - ppibase + ARCH_GIC_MAINT_IRQ); + intidbase + ARCH_GIC_MAINT_IRQ); qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt", 0, irq); } else if (vms->virt) { qemu_irq irq = qdev_get_gpio_in(vms->gic, - ppibase + ARCH_GIC_MAINT_IRQ); + intidbase + ARCH_GIC_MAINT_IRQ); sysbus_connect_irq(gicbusdev, i + 4 * smp_cpus, irq); } qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0, - qdev_get_gpio_in(vms->gic, ppibase + qdev_get_gpio_in(vms->gic, intidbase + VIRTUAL_PMU_IRQ)); sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ)); @@ -1988,7 +1992,7 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) if (pmu) { assert(arm_feature(&ARM_CPU(cpu)->env, ARM_FEATURE_PMU)); if (kvm_irqchip_in_kernel()) { - kvm_arm_pmu_set_irq(cpu, PPI(VIRTUAL_PMU_IRQ)); + kvm_arm_pmu_set_irq(cpu, VIRTUAL_PMU_IRQ); } kvm_arm_pmu_init(cpu); } diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c index c3e55ef9e9..9a4b59c6f2 100644 --- a/hw/core/machine-hmp-cmds.c +++ b/hw/core/machine-hmp-cmds.c @@ -71,6 +71,12 @@ void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict) if (c->has_node_id) { monitor_printf(mon, " node-id: \"%" PRIu64 "\"\n", c->node_id); } + if (c->has_drawer_id) { + monitor_printf(mon, " drawer-id: \"%" PRIu64 "\"\n", c->drawer_id); + } + if (c->has_book_id) { + monitor_printf(mon, " book-id: \"%" PRIu64 "\"\n", c->book_id); + } if (c->has_socket_id) { monitor_printf(mon, " socket-id: \"%" PRIu64 "\"\n", c->socket_id); } diff --git a/hw/core/machine-smp.c b/hw/core/machine-smp.c index 0f4d9b6f7a..25019c91ee 100644 --- a/hw/core/machine-smp.c +++ b/hw/core/machine-smp.c @@ -33,6 +33,14 @@ static char *cpu_hierarchy_to_string(MachineState *ms) MachineClass *mc = MACHINE_GET_CLASS(ms); GString *s = g_string_new(NULL); + if (mc->smp_props.drawers_supported) { + g_string_append_printf(s, "drawers (%u) * ", ms->smp.drawers); + } + + if (mc->smp_props.books_supported) { + g_string_append_printf(s, "books (%u) * ", ms->smp.books); + } + g_string_append_printf(s, "sockets (%u)", ms->smp.sockets); if (mc->smp_props.dies_supported) { @@ -75,6 +83,8 @@ void machine_parse_smp_config(MachineState *ms, { MachineClass *mc = MACHINE_GET_CLASS(ms); unsigned cpus = config->has_cpus ? config->cpus : 0; + unsigned drawers = config->has_drawers ? config->drawers : 0; + unsigned books = config->has_books ? config->books : 0; unsigned sockets = config->has_sockets ? config->sockets : 0; unsigned dies = config->has_dies ? config->dies : 0; unsigned clusters = config->has_clusters ? config->clusters : 0; @@ -87,6 +97,8 @@ void machine_parse_smp_config(MachineState *ms, * explicit configuration like "cpus=0" is not allowed. */ if ((config->has_cpus && config->cpus == 0) || + (config->has_drawers && config->drawers == 0) || + (config->has_books && config->books == 0) || (config->has_sockets && config->sockets == 0) || (config->has_dies && config->dies == 0) || (config->has_clusters && config->clusters == 0) || @@ -113,6 +125,19 @@ void machine_parse_smp_config(MachineState *ms, dies = dies > 0 ? dies : 1; clusters = clusters > 0 ? clusters : 1; + if (!mc->smp_props.books_supported && books > 1) { + error_setg(errp, "books not supported by this machine's CPU topology"); + return; + } + books = books > 0 ? books : 1; + + if (!mc->smp_props.drawers_supported && drawers > 1) { + error_setg(errp, + "drawers not supported by this machine's CPU topology"); + return; + } + drawers = drawers > 0 ? drawers : 1; + /* compute missing values based on the provided ones */ if (cpus == 0 && maxcpus == 0) { sockets = sockets > 0 ? sockets : 1; @@ -126,33 +151,41 @@ void machine_parse_smp_config(MachineState *ms, if (sockets == 0) { cores = cores > 0 ? cores : 1; threads = threads > 0 ? threads : 1; - sockets = maxcpus / (dies * clusters * cores * threads); + sockets = maxcpus / + (drawers * books * dies * clusters * cores * threads); } else if (cores == 0) { threads = threads > 0 ? threads : 1; - cores = maxcpus / (sockets * dies * clusters * threads); + cores = maxcpus / + (drawers * books * sockets * dies * clusters * threads); } } else { /* prefer cores over sockets since 6.2 */ if (cores == 0) { sockets = sockets > 0 ? sockets : 1; threads = threads > 0 ? threads : 1; - cores = maxcpus / (sockets * dies * clusters * threads); + cores = maxcpus / + (drawers * books * sockets * dies * clusters * threads); } else if (sockets == 0) { threads = threads > 0 ? threads : 1; - sockets = maxcpus / (dies * clusters * cores * threads); + sockets = maxcpus / + (drawers * books * dies * clusters * cores * threads); } } /* try to calculate omitted threads at last */ if (threads == 0) { - threads = maxcpus / (sockets * dies * clusters * cores); + threads = maxcpus / + (drawers * books * sockets * dies * clusters * cores); } } - maxcpus = maxcpus > 0 ? maxcpus : sockets * dies * clusters * cores * threads; + maxcpus = maxcpus > 0 ? maxcpus : drawers * books * sockets * dies * + clusters * cores * threads; cpus = cpus > 0 ? cpus : maxcpus; ms->smp.cpus = cpus; + ms->smp.drawers = drawers; + ms->smp.books = books; ms->smp.sockets = sockets; ms->smp.dies = dies; ms->smp.clusters = clusters; @@ -163,7 +196,8 @@ void machine_parse_smp_config(MachineState *ms, mc->smp_props.has_clusters = config->has_clusters; /* sanity-check of the computed topology */ - if (sockets * dies * clusters * cores * threads != maxcpus) { + if (drawers * books * sockets * dies * clusters * cores * threads != + maxcpus) { g_autofree char *topo_msg = cpu_hierarchy_to_string(ms); error_setg(errp, "Invalid CPU topology: " "product of the hierarchy must match maxcpus: " diff --git a/hw/core/machine.c b/hw/core/machine.c index 05aef2cf9f..50edaab737 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -863,6 +863,8 @@ static void machine_get_smp(Object *obj, Visitor *v, const char *name, MachineState *ms = MACHINE(obj); SMPConfiguration *config = &(SMPConfiguration){ .has_cpus = true, .cpus = ms->smp.cpus, + .has_drawers = true, .drawers = ms->smp.drawers, + .has_books = true, .books = ms->smp.books, .has_sockets = true, .sockets = ms->smp.sockets, .has_dies = true, .dies = ms->smp.dies, .has_clusters = true, .clusters = ms->smp.clusters, @@ -1137,6 +1139,8 @@ static void machine_initfn(Object *obj) /* default to mc->default_cpus */ ms->smp.cpus = mc->default_cpus; ms->smp.max_cpus = mc->default_cpus; + ms->smp.drawers = 1; + ms->smp.books = 1; ms->smp.sockets = 1; ms->smp.dies = 1; ms->smp.clusters = 1; diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index 688340610e..7c6dfab128 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -1139,3 +1139,16 @@ const PropertyInfo qdev_prop_uuid = { .set = set_uuid, .set_default_value = set_default_uuid_auto, }; + +/* --- s390 cpu entitlement policy --- */ + +QEMU_BUILD_BUG_ON(sizeof(CpuS390Entitlement) != sizeof(int)); + +const PropertyInfo qdev_prop_cpus390entitlement = { + .name = "CpuS390Entitlement", + .description = "low/medium (default)/high", + .enum_table = &CpuS390Entitlement_lookup, + .get = qdev_propinfo_get_enum, + .set = qdev_propinfo_set_enum, + .set_default_value = qdev_propinfo_set_default_value_enum, +}; diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c index 50c5373b65..37af256219 100644 --- a/hw/display/virtio-gpu-base.c +++ b/hw/display/virtio-gpu-base.c @@ -184,8 +184,7 @@ virtio_gpu_base_device_realize(DeviceState *qdev, if (virtio_gpu_virgl_enabled(g->conf)) { error_setg(&g->migration_blocker, "virgl is not yet migratable"); - if (migrate_add_blocker(g->migration_blocker, errp) < 0) { - error_free(g->migration_blocker); + if (migrate_add_blocker(&g->migration_blocker, errp) < 0) { return false; } } @@ -253,10 +252,7 @@ virtio_gpu_base_device_unrealize(DeviceState *qdev) { VirtIOGPUBase *g = VIRTIO_GPU_BASE(qdev); - if (g->migration_blocker) { - migrate_del_blocker(g->migration_blocker); - error_free(g->migration_blocker); - } + migrate_del_blocker(&g->migration_blocker); } static void diff --git a/hw/hppa/Kconfig b/hw/hppa/Kconfig index 5dd8b5b21e..ff8528aaa8 100644 --- a/hw/hppa/Kconfig +++ b/hw/hppa/Kconfig @@ -3,6 +3,7 @@ config HPPA_B160L imply PCI_DEVICES imply E1000_PCI imply VIRTIO_VGA + select ASTRO select DINO select LASI select SERIAL diff --git a/hw/hppa/hppa_hardware.h b/hw/hppa/hppa_hardware.h index a5ac3dd0fd..a9be7bb851 100644 --- a/hw/hppa/hppa_hardware.h +++ b/hw/hppa/hppa_hardware.h @@ -18,7 +18,6 @@ #define LASI_UART_HPA 0xffd05000 #define LASI_SCSI_HPA 0xffd06000 #define LASI_LAN_HPA 0xffd07000 -#define LASI_RTC_HPA 0xffd09000 #define LASI_LPT_HPA 0xffd02000 #define LASI_AUDIO_HPA 0xffd04000 #define LASI_PS2KBD_HPA 0xffd08000 diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c index cf28cb9586..67d4d1b5e0 100644 --- a/hw/hppa/machine.c +++ b/hw/hppa/machine.c @@ -1,6 +1,8 @@ /* * QEMU HPPA hardware system emulator. - * Copyright 2018 Helge Deller <deller@gmx.de> + * (C) Copyright 2018-2023 Helge Deller <deller@gmx.de> + * + * This work is licensed under the GNU GPL license version 2 or later. */ #include "qemu/osdep.h" @@ -20,7 +22,10 @@ #include "hw/input/lasips2.h" #include "hw/net/lasi_82596.h" #include "hw/nmi.h" +#include "hw/usb.h" #include "hw/pci/pci.h" +#include "hw/pci/pci_device.h" +#include "hw/pci-host/astro.h" #include "hw/pci-host/dino.h" #include "hw/misc/lasi.h" #include "hppa_hardware.h" @@ -29,12 +34,13 @@ #include "net/net.h" #include "qemu/log.h" -#define MIN_SEABIOS_HPPA_VERSION 6 /* require at least this fw version */ +#define MIN_SEABIOS_HPPA_VERSION 10 /* require at least this fw version */ #define HPA_POWER_BUTTON (FIRMWARE_END - 0x10) #define enable_lasi_lan() 0 +static DeviceState *lasi_dev; static void hppa_powerdown_req(Notifier *n, void *opaque) { @@ -95,14 +101,69 @@ static ISABus *hppa_isa_bus(void) isa_bus = isa_bus_new(NULL, get_system_memory(), isa_region, &error_abort); - isa_irqs = i8259_init(isa_bus, - /* qemu_allocate_irq(dino_set_isa_irq, s, 0)); */ - NULL); + isa_irqs = i8259_init(isa_bus, NULL); isa_bus_register_input_irqs(isa_bus, isa_irqs); return isa_bus; } +/* + * Helper functions to emulate RTC clock and DebugOutputPort + */ +static time_t rtc_ref; + +static uint64_t io_cpu_read(void *opaque, hwaddr addr, unsigned size) +{ + uint64_t val = 0; + + switch (addr) { + case 0: /* RTC clock */ + val = time(NULL); + val += rtc_ref; + break; + case 8: /* DebugOutputPort */ + return 0xe9; /* readback */ + } + return val; +} + +static void io_cpu_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + unsigned char ch; + Chardev *debugout; + + switch (addr) { + case 0: /* RTC clock */ + rtc_ref = val - time(NULL); + break; + case 8: /* DebugOutputPort */ + ch = val; + debugout = serial_hd(0); + if (debugout) { + qemu_chr_fe_write_all(debugout->be, &ch, 1); + } else { + fprintf(stderr, "%c", ch); + } + break; + } +} + +static const MemoryRegionOps hppa_io_helper_ops = { + .read = io_cpu_read, + .write = io_cpu_write, + .endianness = DEVICE_BIG_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 8, + }, + .impl = { + .min_access_size = 1, + .max_access_size = 8, + }, +}; + + static uint64_t cpu_hppa_to_phys(void *opaque, uint64_t addr) { addr &= (0x10000000 - 1); @@ -118,11 +179,13 @@ static void fw_cfg_boot_set(void *opaque, const char *boot_device, fw_cfg_modify_i16(opaque, FW_CFG_BOOT_DEVICE, boot_device[0]); } -static FWCfgState *create_fw_cfg(MachineState *ms) +static FWCfgState *create_fw_cfg(MachineState *ms, PCIBus *pci_bus) { FWCfgState *fw_cfg; uint64_t val; const char qemu_version[] = QEMU_VERSION; + MachineClass *mc = MACHINE_GET_CLASS(ms); + int len; fw_cfg = fw_cfg_init_mem(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4); fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, ms->smp.cpus); @@ -137,8 +200,24 @@ static FWCfgState *create_fw_cfg(MachineState *ms) fw_cfg_add_file(fw_cfg, "/etc/cpu/tlb_entries", g_memdup(&val, sizeof(val)), sizeof(val)); + val = cpu_to_le64(HPPA_BTLB_ENTRIES); + fw_cfg_add_file(fw_cfg, "/etc/cpu/btlb_entries", + g_memdup(&val, sizeof(val)), sizeof(val)); + + len = strlen(mc->name) + 1; + fw_cfg_add_file(fw_cfg, "/etc/hppa/machine", + g_memdup(mc->name, len), len); + val = cpu_to_le64(HPA_POWER_BUTTON); - fw_cfg_add_file(fw_cfg, "/etc/power-button-addr", + fw_cfg_add_file(fw_cfg, "/etc/hppa/power-button-addr", + g_memdup(&val, sizeof(val)), sizeof(val)); + + val = cpu_to_le64(CPU_HPA + 16); + fw_cfg_add_file(fw_cfg, "/etc/hppa/rtc-addr", + g_memdup(&val, sizeof(val)), sizeof(val)); + + val = cpu_to_le64(CPU_HPA + 24); + fw_cfg_add_file(fw_cfg, "/etc/hppa/DebugOutputPort", g_memdup(&val, sizeof(val)), sizeof(val)); fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, ms->boot_config.order[0]); @@ -148,6 +227,8 @@ static FWCfgState *create_fw_cfg(MachineState *ms) g_memdup(qemu_version, sizeof(qemu_version)), sizeof(qemu_version)); + fw_cfg_add_extra_pci_roots(pci_bus, fw_cfg); + return fw_cfg; } @@ -173,29 +254,20 @@ static DinoState *dino_init(MemoryRegion *addr_space) return DINO_PCI_HOST_BRIDGE(dev); } -static void machine_hppa_init(MachineState *machine) +/* + * Step 1: Create CPUs and Memory + */ +static void machine_HP_common_init_cpus(MachineState *machine) { - const char *kernel_filename = machine->kernel_filename; - const char *kernel_cmdline = machine->kernel_cmdline; - const char *initrd_filename = machine->initrd_filename; - MachineClass *mc = MACHINE_GET_CLASS(machine); - DeviceState *dev, *dino_dev, *lasi_dev; - PCIBus *pci_bus; - ISABus *isa_bus; - char *firmware_filename; - uint64_t firmware_low, firmware_high; - long size; - uint64_t kernel_entry = 0, kernel_low, kernel_high; MemoryRegion *addr_space = get_system_memory(); - MemoryRegion *rom_region; MemoryRegion *cpu_region; long i; unsigned int smp_cpus = machine->smp.cpus; - SysBusDevice *s; + char *name; /* Create CPUs. */ for (i = 0; i < smp_cpus; i++) { - char *name = g_strdup_printf("cpu%ld-io-eir", i); + name = g_strdup_printf("cpu%ld-io-eir", i); cpu[i] = HPPA_CPU(cpu_create(machine->cpu_type)); cpu_region = g_new(MemoryRegion, 1); @@ -206,51 +278,40 @@ static void machine_hppa_init(MachineState *machine) g_free(name); } + /* RTC and DebugOutputPort on CPU #0 */ + cpu_region = g_new(MemoryRegion, 1); + memory_region_init_io(cpu_region, OBJECT(cpu[0]), &hppa_io_helper_ops, + cpu[0], "cpu0-io-rtc", 2 * sizeof(uint64_t)); + memory_region_add_subregion(addr_space, CPU_HPA + 16, cpu_region); + /* Main memory region. */ if (machine->ram_size > 3 * GiB) { error_report("RAM size is currently restricted to 3GB"); exit(EXIT_FAILURE); } memory_region_add_subregion_overlap(addr_space, 0, machine->ram, -1); +} - - /* Init Lasi chip */ - lasi_dev = DEVICE(lasi_init()); - memory_region_add_subregion(addr_space, LASI_HPA, - sysbus_mmio_get_region( - SYS_BUS_DEVICE(lasi_dev), 0)); - - /* Init Dino (PCI host bus chip). */ - dino_dev = DEVICE(dino_init(addr_space)); - memory_region_add_subregion(addr_space, DINO_HPA, - sysbus_mmio_get_region( - SYS_BUS_DEVICE(dino_dev), 0)); - pci_bus = PCI_BUS(qdev_get_child_bus(dino_dev, "pci")); - assert(pci_bus); - - /* Create ISA bus. */ - isa_bus = hppa_isa_bus(); - assert(isa_bus); - - /* Realtime clock, used by firmware for PDC_TOD call. */ - mc146818_rtc_init(isa_bus, 2000, NULL); - - /* Serial ports: Lasi and Dino use a 7.272727 MHz clock. */ - serial_mm_init(addr_space, LASI_UART_HPA + 0x800, 0, - qdev_get_gpio_in(lasi_dev, LASI_IRQ_UART_HPA), 7272727 / 16, - serial_hd(0), DEVICE_BIG_ENDIAN); - - serial_mm_init(addr_space, DINO_UART_HPA + 0x800, 0, - qdev_get_gpio_in(dino_dev, DINO_IRQ_RS232INT), 7272727 / 16, - serial_hd(1), DEVICE_BIG_ENDIAN); - - /* Parallel port */ - parallel_mm_init(addr_space, LASI_LPT_HPA + 0x800, 0, - qdev_get_gpio_in(lasi_dev, LASI_IRQ_LAN_HPA), - parallel_hds[0]); - - /* fw_cfg configuration interface */ - create_fw_cfg(machine); +/* + * Last creation step: Add SCSI discs, NICs, graphics & load firmware + */ +static void machine_HP_common_init_tail(MachineState *machine, PCIBus *pci_bus) +{ + const char *kernel_filename = machine->kernel_filename; + const char *kernel_cmdline = machine->kernel_cmdline; + const char *initrd_filename = machine->initrd_filename; + MachineClass *mc = MACHINE_GET_CLASS(machine); + DeviceState *dev; + PCIDevice *pci_dev; + char *firmware_filename; + uint64_t firmware_low, firmware_high; + long size; + uint64_t kernel_entry = 0, kernel_low, kernel_high; + MemoryRegion *addr_space = get_system_memory(); + MemoryRegion *rom_region; + long i; + unsigned int smp_cpus = machine->smp.cpus; + SysBusDevice *s; /* SCSI disk setup. */ dev = DEVICE(pci_create_simple(pci_bus, -1, "lsi53c895a")); @@ -278,21 +339,42 @@ static void machine_hppa_init(MachineState *machine) } } - /* PS/2 Keyboard/Mouse */ - dev = qdev_new(TYPE_LASIPS2); - sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); - sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, - qdev_get_gpio_in(lasi_dev, LASI_IRQ_PS2KBD_HPA)); - memory_region_add_subregion(addr_space, LASI_PS2KBD_HPA, - sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), - 0)); - memory_region_add_subregion(addr_space, LASI_PS2KBD_HPA + 0x100, - sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), - 1)); + /* BMC board: HP Powerbar SP2 Diva (with console only) */ + pci_dev = pci_new(-1, "pci-serial"); + if (!lasi_dev) { + /* bind default keyboard/serial to Diva card */ + qdev_prop_set_chr(DEVICE(pci_dev), "chardev", serial_hd(0)); + } + qdev_prop_set_uint8(DEVICE(pci_dev), "prog_if", 0); + pci_realize_and_unref(pci_dev, pci_bus, &error_fatal); + pci_config_set_vendor_id(pci_dev->config, PCI_VENDOR_ID_HP); + pci_config_set_device_id(pci_dev->config, 0x1048); + pci_set_word(&pci_dev->config[PCI_SUBSYSTEM_VENDOR_ID], PCI_VENDOR_ID_HP); + pci_set_word(&pci_dev->config[PCI_SUBSYSTEM_ID], 0x1227); /* Powerbar */ + + /* create a second serial PCI card when running Astro */ + if (!lasi_dev) { + pci_dev = pci_new(-1, "pci-serial-4x"); + qdev_prop_set_chr(DEVICE(pci_dev), "chardev1", serial_hd(1)); + qdev_prop_set_chr(DEVICE(pci_dev), "chardev2", serial_hd(2)); + qdev_prop_set_chr(DEVICE(pci_dev), "chardev3", serial_hd(3)); + qdev_prop_set_chr(DEVICE(pci_dev), "chardev4", serial_hd(4)); + pci_realize_and_unref(pci_dev, pci_bus, &error_fatal); + } + + /* create USB OHCI controller for USB keyboard & mouse on Astro machines */ + if (!lasi_dev && machine->enable_graphics) { + pci_create_simple(pci_bus, -1, "pci-ohci"); + usb_create_simple(usb_bus_find(-1), "usb-kbd"); + usb_create_simple(usb_bus_find(-1), "usb-mouse"); + } /* register power switch emulation */ qemu_register_powerdown_notifier(&hppa_system_powerdown_notifier); + /* fw_cfg configuration interface */ + create_fw_cfg(machine, pci_bus); + /* Load firmware. Given that this is not "real" firmware, but one explicitly written for the emulation, we might as well load it directly from an ELF image. */ @@ -410,6 +492,103 @@ static void machine_hppa_init(MachineState *machine) cpu[0]->env.gr[19] = FW_CFG_IO_BASE; } +/* + * Create HP B160L workstation + */ +static void machine_HP_B160L_init(MachineState *machine) +{ + DeviceState *dev, *dino_dev; + MemoryRegion *addr_space = get_system_memory(); + ISABus *isa_bus; + PCIBus *pci_bus; + + /* Create CPUs and RAM. */ + machine_HP_common_init_cpus(machine); + + /* Init Lasi chip */ + lasi_dev = DEVICE(lasi_init()); + memory_region_add_subregion(addr_space, LASI_HPA, + sysbus_mmio_get_region( + SYS_BUS_DEVICE(lasi_dev), 0)); + + /* Init Dino (PCI host bus chip). */ + dino_dev = DEVICE(dino_init(addr_space)); + memory_region_add_subregion(addr_space, DINO_HPA, + sysbus_mmio_get_region( + SYS_BUS_DEVICE(dino_dev), 0)); + pci_bus = PCI_BUS(qdev_get_child_bus(dino_dev, "pci")); + assert(pci_bus); + + /* Create ISA bus, needed for PS/2 kbd/mouse port emulation */ + isa_bus = hppa_isa_bus(); + assert(isa_bus); + + /* Serial ports: Lasi and Dino use a 7.272727 MHz clock. */ + serial_mm_init(addr_space, LASI_UART_HPA + 0x800, 0, + qdev_get_gpio_in(lasi_dev, LASI_IRQ_UART_HPA), 7272727 / 16, + serial_hd(0), DEVICE_BIG_ENDIAN); + + serial_mm_init(addr_space, DINO_UART_HPA + 0x800, 0, + qdev_get_gpio_in(dino_dev, DINO_IRQ_RS232INT), 7272727 / 16, + serial_hd(1), DEVICE_BIG_ENDIAN); + + /* Parallel port */ + parallel_mm_init(addr_space, LASI_LPT_HPA + 0x800, 0, + qdev_get_gpio_in(lasi_dev, LASI_IRQ_LAN_HPA), + parallel_hds[0]); + + /* PS/2 Keyboard/Mouse */ + dev = qdev_new(TYPE_LASIPS2); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, + qdev_get_gpio_in(lasi_dev, LASI_IRQ_PS2KBD_HPA)); + memory_region_add_subregion(addr_space, LASI_PS2KBD_HPA, + sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), + 0)); + memory_region_add_subregion(addr_space, LASI_PS2KBD_HPA + 0x100, + sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), + 1)); + + /* Add SCSI discs, NICs, graphics & load firmware */ + machine_HP_common_init_tail(machine, pci_bus); +} + +static AstroState *astro_init(void) +{ + DeviceState *dev; + + dev = qdev_new(TYPE_ASTRO_CHIP); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + + return ASTRO_CHIP(dev); +} + +/* + * Create HP C3700 workstation + */ +static void machine_HP_C3700_init(MachineState *machine) +{ + PCIBus *pci_bus; + AstroState *astro; + DeviceState *astro_dev; + MemoryRegion *addr_space = get_system_memory(); + + /* Create CPUs and RAM. */ + machine_HP_common_init_cpus(machine); + + /* Init Astro and the Elroys (PCI host bus chips). */ + astro = astro_init(); + astro_dev = DEVICE(astro); + memory_region_add_subregion(addr_space, ASTRO_HPA, + sysbus_mmio_get_region( + SYS_BUS_DEVICE(astro_dev), 0)); + pci_bus = PCI_BUS(qdev_get_child_bus(DEVICE(astro->elroy[0]), "pci")); + assert(pci_bus); + + /* Add SCSI discs, NICs, graphics & load firmware */ + machine_HP_common_init_tail(machine, pci_bus); +} + static void hppa_machine_reset(MachineState *ms, ShutdownCause reason) { unsigned int smp_cpus = ms->smp.cpus; @@ -458,14 +637,14 @@ static void hppa_nmi(NMIState *n, int cpu_index, Error **errp) } } -static void hppa_machine_init_class_init(ObjectClass *oc, void *data) +static void HP_B160L_machine_init_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); NMIClass *nc = NMI_CLASS(oc); - mc->desc = "HPPA B160L machine"; + mc->desc = "HP B160L workstation"; mc->default_cpu_type = TYPE_HPPA_CPU; - mc->init = machine_hppa_init; + mc->init = machine_HP_B160L_init; mc->reset = hppa_machine_reset; mc->block_default_type = IF_SCSI; mc->max_cpus = HPPA_MAX_CPUS; @@ -479,10 +658,41 @@ static void hppa_machine_init_class_init(ObjectClass *oc, void *data) nc->nmi_monitor_handler = hppa_nmi; } -static const TypeInfo hppa_machine_init_typeinfo = { - .name = MACHINE_TYPE_NAME("hppa"), +static const TypeInfo HP_B160L_machine_init_typeinfo = { + .name = MACHINE_TYPE_NAME("B160L"), + .parent = TYPE_MACHINE, + .class_init = HP_B160L_machine_init_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_NMI }, + { } + }, +}; + +static void HP_C3700_machine_init_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + NMIClass *nc = NMI_CLASS(oc); + + mc->desc = "HP C3700 workstation"; + mc->default_cpu_type = TYPE_HPPA_CPU; + mc->init = machine_HP_C3700_init; + mc->reset = hppa_machine_reset; + mc->block_default_type = IF_SCSI; + mc->max_cpus = HPPA_MAX_CPUS; + mc->default_cpus = 1; + mc->is_default = false; + mc->default_ram_size = 1024 * MiB; + mc->default_boot_order = "cd"; + mc->default_ram_id = "ram"; + mc->default_nic = "tulip"; + + nc->nmi_monitor_handler = hppa_nmi; +} + +static const TypeInfo HP_C3700_machine_init_typeinfo = { + .name = MACHINE_TYPE_NAME("C3700"), .parent = TYPE_MACHINE, - .class_init = hppa_machine_init_class_init, + .class_init = HP_C3700_machine_init_class_init, .interfaces = (InterfaceInfo[]) { { TYPE_NMI }, { } @@ -491,7 +701,8 @@ static const TypeInfo hppa_machine_init_typeinfo = { static void hppa_machine_init_register_types(void) { - type_register_static(&hppa_machine_init_typeinfo); + type_register_static(&HP_B160L_machine_init_typeinfo); + type_register_static(&HP_C3700_machine_init_typeinfo); } type_init(hppa_machine_init_register_types) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index bb3854d1d0..f7ee638bec 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1283,7 +1283,9 @@ void pc_basic_device_init(struct PCMachineState *pcms, /* connect PIT to output control line of the HPET */ qdev_connect_gpio_out(hpet, 0, qdev_get_gpio_in(DEVICE(pit), 0)); } - pcspk_init(pcms->pcspk, isa_bus, pit); + object_property_set_link(OBJECT(pcms->pcspk), "pit", + OBJECT(pit), &error_fatal); + isa_realize_and_unref(pcms->pcspk, isa_bus, &error_fatal); } /* Super I/O */ diff --git a/hw/input/lasips2.c b/hw/input/lasips2.c index ea7c07a2ba..6075121b72 100644 --- a/hw/input/lasips2.c +++ b/hw/input/lasips2.c @@ -351,6 +351,11 @@ static void lasips2_port_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + /* + * The PS/2 mouse port is integreal part of LASI and can not be + * created by users without LASI. + */ + dc->user_creatable = false; dc->realize = lasips2_port_realize; } @@ -397,6 +402,11 @@ static void lasips2_kbd_port_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); LASIPS2PortDeviceClass *lpdc = LASIPS2_PORT_CLASS(klass); + /* + * The PS/2 keyboard port is integreal part of LASI and can not be + * created by users without LASI. + */ + dc->user_creatable = false; device_class_set_parent_realize(dc, lasips2_kbd_port_realize, &lpdc->parent_realize); } diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c index 1d588946bc..e0d9e512a3 100644 --- a/hw/intc/arm_gic_kvm.c +++ b/hw/intc/arm_gic_kvm.c @@ -516,8 +516,7 @@ static void kvm_arm_gic_realize(DeviceState *dev, Error **errp) if (!kvm_arm_gic_can_save_restore(s)) { error_setg(&s->migration_blocker, "This operating system kernel does " "not support vGICv2 migration"); - if (migrate_add_blocker(s->migration_blocker, errp) < 0) { - error_free(s->migration_blocker); + if (migrate_add_blocker(&s->migration_blocker, errp) < 0) { return; } } diff --git a/hw/intc/arm_gicv3_its_kvm.c b/hw/intc/arm_gicv3_its_kvm.c index 7eda9fb86e..61c1cc7bdb 100644 --- a/hw/intc/arm_gicv3_its_kvm.c +++ b/hw/intc/arm_gicv3_its_kvm.c @@ -114,8 +114,7 @@ static void kvm_arm_its_realize(DeviceState *dev, Error **errp) GITS_CTLR)) { error_setg(&s->migration_blocker, "This operating system kernel " "does not support vITS migration"); - if (migrate_add_blocker(s->migration_blocker, errp) < 0) { - error_free(s->migration_blocker); + if (migrate_add_blocker(&s->migration_blocker, errp) < 0) { return; } } else { diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c index 72ad916d3d..77eb37e131 100644 --- a/hw/intc/arm_gicv3_kvm.c +++ b/hw/intc/arm_gicv3_kvm.c @@ -878,8 +878,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) GICD_CTLR)) { error_setg(&s->migration_blocker, "This operating system kernel does " "not support vGICv3 migration"); - if (migrate_add_blocker(s->migration_blocker, errp) < 0) { - error_free(s->migration_blocker); + if (migrate_add_blocker(&s->migration_blocker, errp) < 0) { return; } } diff --git a/hw/isa/i82378.c b/hw/isa/i82378.c index 63e0857208..79ffbb52a0 100644 --- a/hw/isa/i82378.c +++ b/hw/isa/i82378.c @@ -67,6 +67,7 @@ static void i82378_realize(PCIDevice *pci, Error **errp) uint8_t *pci_conf; ISABus *isabus; ISADevice *pit; + ISADevice *pcspk; pci_conf = pci->config; pci_set_word(pci_conf + PCI_COMMAND, @@ -102,7 +103,9 @@ static void i82378_realize(PCIDevice *pci, Error **errp) pit = i8254_pit_init(isabus, 0x40, 0, NULL); /* speaker */ - pcspk_init(isa_new(TYPE_PC_SPEAKER), isabus, pit); + pcspk = isa_new(TYPE_PC_SPEAKER); + object_property_set_link(OBJECT(pcspk), "pit", OBJECT(pit), &error_fatal); + isa_realize_and_unref(pcspk, isabus, &error_fatal); /* 2 82C37 (dma) */ isa_create_simple(isabus, "i82374"); diff --git a/hw/mips/jazz.c b/hw/mips/jazz.c index 86dfe05ea8..d33a76ad4d 100644 --- a/hw/mips/jazz.c +++ b/hw/mips/jazz.c @@ -176,6 +176,7 @@ static void mips_jazz_init(MachineState *machine, SysBusDevice *sysbus; ISABus *isa_bus; ISADevice *pit; + ISADevice *pcspk; DriveInfo *fds[MAX_FD]; MemoryRegion *bios = g_new(MemoryRegion, 1); MemoryRegion *bios2 = g_new(MemoryRegion, 1); @@ -278,7 +279,9 @@ static void mips_jazz_init(MachineState *machine, isa_bus_register_input_irqs(isa_bus, i8259); i8257_dma_init(isa_bus, 0); pit = i8254_pit_init(isa_bus, 0x40, 0, NULL); - pcspk_init(isa_new(TYPE_PC_SPEAKER), isa_bus, pit); + pcspk = isa_new(TYPE_PC_SPEAKER); + object_property_set_link(OBJECT(pcspk), "pit", OBJECT(pit), &error_fatal); + isa_realize_and_unref(pcspk, isa_bus, &error_fatal); /* Video card */ switch (jazz_model) { diff --git a/hw/misc/bcm2835_property.c b/hw/misc/bcm2835_property.c index 4ed9faa54a..ff55a4e2cd 100644 --- a/hw/misc/bcm2835_property.c +++ b/hw/misc/bcm2835_property.c @@ -12,7 +12,7 @@ #include "migration/vmstate.h" #include "hw/irq.h" #include "hw/misc/bcm2835_mbox_defs.h" -#include "hw/misc/raspberrypi-fw-defs.h" +#include "hw/arm/raspberrypi-fw-defs.h" #include "sysemu/dma.h" #include "qemu/log.h" #include "qemu/module.h" diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index d66d912172..0447888029 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -903,8 +903,7 @@ static void ivshmem_common_realize(PCIDevice *dev, Error **errp) if (!ivshmem_is_master(s)) { error_setg(&s->migration_blocker, "Migration is disabled when using feature 'peer mode' in device 'ivshmem'"); - if (migrate_add_blocker(s->migration_blocker, errp) < 0) { - error_free(s->migration_blocker); + if (migrate_add_blocker(&s->migration_blocker, errp) < 0) { return; } } @@ -922,10 +921,7 @@ static void ivshmem_exit(PCIDevice *dev) IVShmemState *s = IVSHMEM_COMMON(dev); int i; - if (s->migration_blocker) { - migrate_del_blocker(s->migration_blocker); - error_free(s->migration_blocker); - } + migrate_del_blocker(&s->migration_blocker); if (memory_region_is_mapped(s->ivshmem_bar2)) { if (!s->hostmem) { diff --git a/hw/net/tulip.c b/hw/net/tulip.c index 915e5fb595..11d866e431 100644 --- a/hw/net/tulip.c +++ b/hw/net/tulip.c @@ -1020,7 +1020,7 @@ static void tulip_class_init(ObjectClass *klass, void *data) k->exit = pci_tulip_exit; k->vendor_id = PCI_VENDOR_ID_DEC; k->device_id = PCI_DEVICE_ID_DEC_21143; - k->subsystem_vendor_id = 0x103c; + k->subsystem_vendor_id = PCI_VENDOR_ID_HP; k->subsystem_id = 0x104f; k->class_id = PCI_CLASS_NETWORK_ETHERNET; dc->vmsd = &vmstate_pci_tulip; diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 29e33ea5ed..b85c7946a7 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -3624,8 +3624,8 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) n->primary_listener.hide_device = failover_hide_primary_device; qatomic_set(&n->failover_primary_hidden, true); device_listener_register(&n->primary_listener); - n->migration_state.notify = virtio_net_migration_state_notifier; - add_migration_state_change_notifier(&n->migration_state); + migration_add_notifier(&n->migration_state, + virtio_net_migration_state_notifier); n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY); } @@ -3788,7 +3788,7 @@ static void virtio_net_device_unrealize(DeviceState *dev) if (n->failover) { qobject_unref(n->primary_opts); device_listener_unregister(&n->primary_listener); - remove_migration_state_change_notifier(&n->migration_state); + migration_remove_notifier(&n->migration_state); } else { assert(n->primary_opts == NULL); } diff --git a/hw/nvram/xlnx-bbram.c b/hw/nvram/xlnx-bbram.c index c6b484cc85..e18e7770e1 100644 --- a/hw/nvram/xlnx-bbram.c +++ b/hw/nvram/xlnx-bbram.c @@ -2,6 +2,7 @@ * QEMU model of the Xilinx BBRAM Battery Backed RAM * * Copyright (c) 2014-2021 Xilinx Inc. + * Copyright (c) 2023 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -416,9 +417,9 @@ static RegisterAccessInfo bbram_ctrl_regs_info[] = { } }; -static void bbram_ctrl_reset(DeviceState *dev) +static void bbram_ctrl_reset_hold(Object *obj) { - XlnxBBRam *s = XLNX_BBRAM(dev); + XlnxBBRam *s = XLNX_BBRAM(obj); unsigned int i; for (i = 0; i < ARRAY_SIZE(s->regs_info); ++i) { @@ -522,8 +523,9 @@ static Property bbram_ctrl_props[] = { static void bbram_ctrl_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); - dc->reset = bbram_ctrl_reset; + rc->phases.hold = bbram_ctrl_reset_hold; dc->realize = bbram_ctrl_realize; dc->vmsd = &vmstate_bbram_ctrl; device_class_set_props(dc, bbram_ctrl_props); diff --git a/hw/nvram/xlnx-versal-efuse-ctrl.c b/hw/nvram/xlnx-versal-efuse-ctrl.c index b35ba65ab5..beb5661c35 100644 --- a/hw/nvram/xlnx-versal-efuse-ctrl.c +++ b/hw/nvram/xlnx-versal-efuse-ctrl.c @@ -2,6 +2,7 @@ * QEMU model of the Versal eFuse controller * * Copyright (c) 2020 Xilinx Inc. + * Copyright (c) 2023 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -657,9 +658,9 @@ static void efuse_ctrl_register_reset(RegisterInfo *reg) register_reset(reg); } -static void efuse_ctrl_reset(DeviceState *dev) +static void efuse_ctrl_reset_hold(Object *obj) { - XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(dev); + XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(obj); unsigned int i; for (i = 0; i < ARRAY_SIZE(s->regs_info); ++i) { @@ -749,8 +750,9 @@ static Property efuse_ctrl_props[] = { static void efuse_ctrl_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); - dc->reset = efuse_ctrl_reset; + rc->phases.hold = efuse_ctrl_reset_hold; dc->realize = efuse_ctrl_realize; dc->vmsd = &vmstate_efuse_ctrl; device_class_set_props(dc, efuse_ctrl_props); diff --git a/hw/nvram/xlnx-zynqmp-efuse.c b/hw/nvram/xlnx-zynqmp-efuse.c index 228ba0bbfa..3db5f98ec1 100644 --- a/hw/nvram/xlnx-zynqmp-efuse.c +++ b/hw/nvram/xlnx-zynqmp-efuse.c @@ -2,6 +2,7 @@ * QEMU model of the ZynqMP eFuse * * Copyright (c) 2015 Xilinx Inc. + * Copyright (c) 2023 Advanced Micro Devices, Inc. * * Written by Edgar E. Iglesias <edgari@xilinx.com> * @@ -769,9 +770,9 @@ static void zynqmp_efuse_register_reset(RegisterInfo *reg) register_reset(reg); } -static void zynqmp_efuse_reset(DeviceState *dev) +static void zynqmp_efuse_reset_hold(Object *obj) { - XlnxZynqMPEFuse *s = XLNX_ZYNQMP_EFUSE(dev); + XlnxZynqMPEFuse *s = XLNX_ZYNQMP_EFUSE(obj); unsigned int i; for (i = 0; i < ARRAY_SIZE(s->regs_info); ++i) { @@ -837,8 +838,9 @@ static Property zynqmp_efuse_props[] = { static void zynqmp_efuse_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); - dc->reset = zynqmp_efuse_reset; + rc->phases.hold = zynqmp_efuse_reset_hold; dc->realize = zynqmp_efuse_realize; dc->vmsd = &vmstate_efuse; device_class_set_props(dc, zynqmp_efuse_props); diff --git a/hw/pci-host/Kconfig b/hw/pci-host/Kconfig index a07070eddf..54a609d2ca 100644 --- a/hw/pci-host/Kconfig +++ b/hw/pci-host/Kconfig @@ -82,6 +82,10 @@ config DINO bool select PCI +config ASTRO + bool + select PCI + config GT64120 bool select PCI diff --git a/hw/pci-host/astro.c b/hw/pci-host/astro.c new file mode 100644 index 0000000000..4b2d7caf2d --- /dev/null +++ b/hw/pci-host/astro.c @@ -0,0 +1,885 @@ +/* + * HP-PARISC Astro/Pluto/Ike/REO system bus adapter (SBA) + * with Elroy PCI bus (LBA) adapter emulation + * Found in C3000 and similar machines + * + * (C) 2023 by Helge Deller <deller@gmx.de> + * + * This work is licensed under the GNU GPL license version 2 or later. + * + * Chip documentation is available at: + * https://parisc.wiki.kernel.org/index.php/Technical_Documentation + * + * TODO: + * - All user-added devices are currently attached to the first + * Elroy (PCI bus) only for now. To fix this additional work in + * SeaBIOS and this driver is needed. See "user_creatable" flag below. + * - GMMIO (Greater than 4 GB MMIO) register + */ + +#define TYPE_ASTRO_IOMMU_MEMORY_REGION "astro-iommu-memory-region" + +#include "qemu/osdep.h" +#include "qemu/module.h" +#include "qemu/units.h" +#include "qapi/error.h" +#include "hw/irq.h" +#include "hw/pci/pci_device.h" +#include "hw/pci/pci_bus.h" +#include "hw/qdev-properties.h" +#include "hw/pci-host/astro.h" +#include "hw/hppa/hppa_hardware.h" +#include "migration/vmstate.h" +#include "trace.h" +#include "qom/object.h" + +/* + * Helper functions + */ + +static uint64_t mask_32bit_val(hwaddr addr, unsigned size, uint64_t val) +{ + if (size == 8) { + return val; + } + if (addr & 4) { + val >>= 32; + } else { + val = (uint32_t) val; + } + return val; +} + +static void put_val_in_int64(uint64_t *p, hwaddr addr, unsigned size, + uint64_t val) +{ + if (size == 8) { + *p = val; + } else if (size == 4) { + if (addr & 4) { + *p = ((*p << 32) >> 32) | (val << 32); + } else { + *p = ((*p >> 32) << 32) | (uint32_t) val; + } + } +} + +static void put_val_in_arrary(uint64_t *array, hwaddr start_addr, + hwaddr addr, unsigned size, uint64_t val) +{ + int index; + + index = (addr - start_addr) / 8; + put_val_in_int64(&array[index], addr, size, val); +} + + +/* + * The Elroy PCI host bridge. We have at least 4 of those under Astro. + */ + +static MemTxResult elroy_chip_read_with_attrs(void *opaque, hwaddr addr, + uint64_t *data, unsigned size, + MemTxAttrs attrs) +{ + MemTxResult ret = MEMTX_OK; + ElroyState *s = opaque; + uint64_t val = -1; + int index; + + switch ((addr >> 3) << 3) { + case 0x0008: + val = 0x6000005; /* func_class */ + break; + case 0x0058: + /* + * Scratch register, but firmware initializes it with the + * PCI BUS number and Linux/HP-UX uses it then. + */ + val = s->pci_bus_num; + /* Upper byte holds the end of this bus number */ + val |= s->pci_bus_num << 8; + break; + case 0x0080: + val = s->arb_mask; /* set ARB mask */ + break; + case 0x0108: + val = s->status_control; + break; + case 0x200 ... 0x250 - 1: /* LMMIO, GMMIO, WLMMIO, WGMMIO, ... */ + index = (addr - 0x200) / 8; + val = s->mmio_base[index]; + break; + case 0x0680: + val = s->error_config; + break; + case 0x0688: + val = 0; /* ERROR_STATUS */ + break; + case 0x0800: /* IOSAPIC_REG_SELECT */ + val = s->iosapic_reg_select; + break; + case 0x0808: + val = UINT64_MAX; /* XXX: tbc. */ + g_assert_not_reached(); + break; + case 0x0810: /* IOSAPIC_REG_WINDOW */ + switch (s->iosapic_reg_select) { + case 0x01: /* IOSAPIC_REG_VERSION */ + val = (32 << 16) | 1; /* upper 16bit holds max entries */ + break; + default: + if (s->iosapic_reg_select < ARRAY_SIZE(s->iosapic_reg)) { + val = s->iosapic_reg[s->iosapic_reg_select]; + } else { + trace_iosapic_reg_read(s->iosapic_reg_select, size, val); + g_assert_not_reached(); + } + } + trace_iosapic_reg_read(s->iosapic_reg_select, size, val); + break; + default: + trace_elroy_read(addr, size, val); + g_assert_not_reached(); + } + trace_elroy_read(addr, size, val); + + /* for 32-bit accesses mask return value */ + val = mask_32bit_val(addr, size, val); + + trace_astro_chip_read(addr, size, val); + *data = val; + return ret; +} + + +static MemTxResult elroy_chip_write_with_attrs(void *opaque, hwaddr addr, + uint64_t val, unsigned size, + MemTxAttrs attrs) +{ + ElroyState *s = opaque; + int i; + + trace_elroy_write(addr, size, val); + + switch ((addr >> 3) << 3) { + case 0x080: + put_val_in_int64(&s->arb_mask, addr, size, val); + break; + case 0x0108: + put_val_in_int64(&s->status_control, addr, size, val); + break; + case 0x200 ... 0x250 - 1: /* LMMIO, GMMIO, WLMMIO, WGMMIO, ... */ + put_val_in_arrary(s->mmio_base, 0x200, addr, size, val); + break; + case 0x0680: + put_val_in_int64(&s->error_config, addr, size, val); + break; + case 0x0800: /* IOSAPIC_REG_SELECT */ + s->iosapic_reg_select = val; + break; + case 0x0810: /* IOSAPIC_REG_WINDOW */ + trace_iosapic_reg_write(s->iosapic_reg_select, size, val); + if (s->iosapic_reg_select < ARRAY_SIZE(s->iosapic_reg)) { + s->iosapic_reg[s->iosapic_reg_select] = val; + } else { + g_assert_not_reached(); + } + break; + case 0x0840: /* IOSAPIC_REG_EOI */ + val = le64_to_cpu(val); + val &= 63; + for (i = 0; i < ELROY_IRQS; i++) { + if ((s->iosapic_reg[0x10 + 2 * i] & 63) == val) { + s->ilr &= ~(1ull << i); + } + } + break; + default: + g_assert_not_reached(); + } + return MEMTX_OK; +} + +static const MemoryRegionOps elroy_chip_ops = { + .read_with_attrs = elroy_chip_read_with_attrs, + .write_with_attrs = elroy_chip_write_with_attrs, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 8, + }, + .impl = { + .min_access_size = 4, + .max_access_size = 8, + }, +}; + + +/* Unlike pci_config_data_le_ops, no check of high bit set in config_reg. */ + +static uint64_t elroy_config_data_read(void *opaque, hwaddr addr, unsigned len) +{ + uint64_t val; + + PCIHostState *s = opaque; + val = pci_data_read(s->bus, s->config_reg | (addr & 3), len); + trace_elroy_pci_config_data_read(s->config_reg | (addr & 3), len, val); + return val; +} + +static void elroy_config_data_write(void *opaque, hwaddr addr, + uint64_t val, unsigned len) +{ + PCIHostState *s = opaque; + pci_data_write(s->bus, s->config_reg | (addr & 3), val, len); + trace_elroy_pci_config_data_write(s->config_reg | (addr & 3), len, val); +} + +static const MemoryRegionOps elroy_config_data_ops = { + .read = elroy_config_data_read, + .write = elroy_config_data_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static uint64_t elroy_config_addr_read(void *opaque, hwaddr addr, unsigned len) +{ + ElroyState *s = opaque; + return s->config_reg_elroy; +} + +static void elroy_config_addr_write(void *opaque, hwaddr addr, + uint64_t val, unsigned len) +{ + PCIHostState *s = opaque; + ElroyState *es = opaque; + es->config_reg_elroy = val; /* keep a copy of original value */ + s->config_reg = val; +} + +static const MemoryRegionOps elroy_config_addr_ops = { + .read = elroy_config_addr_read, + .write = elroy_config_addr_write, + .valid.min_access_size = 4, + .valid.max_access_size = 8, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + + +/* + * A subroutine of astro_translate_iommu that builds an IOMMUTLBEntry using the + * given translated address and mask. + */ +static bool make_iommu_tlbe(hwaddr addr, hwaddr taddr, hwaddr mask, + IOMMUTLBEntry *ret) +{ + hwaddr tce_mask = ~((1ull << 12) - 1); + ret->target_as = &address_space_memory; + ret->iova = addr & tce_mask; + ret->translated_addr = taddr & tce_mask; + ret->addr_mask = ~tce_mask; + ret->perm = IOMMU_RW; + return true; +} + +/* Handle PCI-to-system address translation. */ +static IOMMUTLBEntry astro_translate_iommu(IOMMUMemoryRegion *iommu, + hwaddr addr, + IOMMUAccessFlags flag, + int iommu_idx) +{ + AstroState *s = container_of(iommu, AstroState, iommu); + IOMMUTLBEntry ret = { + .target_as = &address_space_memory, + .iova = addr, + .translated_addr = 0, + .addr_mask = ~(hwaddr)0, + .perm = IOMMU_NONE, + }; + hwaddr pdir_ptr, index, a, ibase; + hwaddr addr_mask = 0xfff; /* 4k translation */ + uint64_t entry; + +#define IOVP_SHIFT 12 /* equals PAGE_SHIFT */ +#define PDIR_INDEX(iovp) ((iovp) >> IOVP_SHIFT) +#define IOVP_MASK PAGE_MASK +#define SBA_PDIR_VALID_BIT 0x8000000000000000ULL + + /* "range enable" flag cleared? */ + if ((s->tlb_ibase & 1) == 0) { + make_iommu_tlbe(addr, addr, addr_mask, &ret); + return ret; + } + + a = addr; + ibase = s->tlb_ibase & ~1ULL; + if ((a & s->tlb_imask) != ibase) { + /* do not translate this one! */ + make_iommu_tlbe(addr, addr, addr_mask, &ret); + return ret; + } + index = PDIR_INDEX(a); + pdir_ptr = s->tlb_pdir_base + index * sizeof(entry); + entry = ldq_le_phys(&address_space_memory, pdir_ptr); + if (!(entry & SBA_PDIR_VALID_BIT)) { /* I/O PDIR entry valid ? */ + g_assert_not_reached(); + goto failure; + } + entry &= ~SBA_PDIR_VALID_BIT; + entry >>= IOVP_SHIFT; + entry <<= 12; + entry |= addr & 0xfff; + make_iommu_tlbe(addr, entry, addr_mask, &ret); + goto success; + + failure: + ret = (IOMMUTLBEntry) { .perm = IOMMU_NONE }; + success: + return ret; +} + +static AddressSpace *elroy_pcihost_set_iommu(PCIBus *bus, void *opaque, + int devfn) +{ + ElroyState *s = opaque; + return &s->astro->iommu_as; +} + +/* + * Encoding in IOSAPIC: + * base_addr == 0xfffa0000, we want to get 0xa0ff0000. + * eid 0x0ff00000 -> 0x00ff0000 + * id 0x000ff000 -> 0xff000000 + */ +#define SWIZZLE_HPA(a) \ + ((((a) & 0x0ff00000) >> 4) | (((a) & 0x000ff000) << 12)) +#define UNSWIZZLE_HPA(a) \ + (((((a) << 4) & 0x0ff00000) | (((a) >> 12) & 0x000ff000) | 0xf0000000)) + +/* bits in the "low" I/O Sapic IRdT entry */ +#define IOSAPIC_IRDT_DISABLE 0x10000 /* if bit is set, mask this irq */ +#define IOSAPIC_IRDT_PO_LOW 0x02000 +#define IOSAPIC_IRDT_LEVEL_TRIG 0x08000 +#define IOSAPIC_IRDT_MODE_LPRI 0x00100 + +#define CPU_IRQ_OFFSET 2 + +static void elroy_set_irq(void *opaque, int irq, int level) +{ + ElroyState *s = opaque; + uint32_t bit; + uint32_t old_ilr = s->ilr; + hwaddr cpu_hpa; + uint32_t val; + + val = s->iosapic_reg[0x10 + 2 * irq]; + cpu_hpa = s->iosapic_reg[0x11 + 2 * irq]; + /* low nibble of val has value to write into CPU irq reg */ + bit = 1u << (val & (ELROY_IRQS - 1)); + cpu_hpa = UNSWIZZLE_HPA(cpu_hpa); + + if (level && (!(val & IOSAPIC_IRDT_DISABLE)) && cpu_hpa) { + uint32_t ena = bit & ~old_ilr; + s->ilr = old_ilr | bit; + if (ena != 0) { + stl_be_phys(&address_space_memory, cpu_hpa, val & 63); + } + } else { + s->ilr = old_ilr & ~bit; + } +} + +static int elroy_pci_map_irq(PCIDevice *d, int irq_num) +{ + int slot = PCI_SLOT(d->devfn); + + assert(irq_num >= 0 && irq_num < ELROY_IRQS); + return slot & (ELROY_IRQS - 1); +} + +static void elroy_reset(DeviceState *dev) +{ + ElroyState *s = ELROY_PCI_HOST_BRIDGE(dev); + int irq; + + /* + * Make sure to disable interrupts at reboot, otherwise the Linux kernel + * serial8250_config_port() in drivers/tty/serial/8250/8250_port.c + * will hang during autoconfig(). + */ + s->ilr = 0; + for (irq = 0; irq < ELROY_IRQS; irq++) { + s->iosapic_reg[0x10 + 2 * irq] = IOSAPIC_IRDT_PO_LOW | + IOSAPIC_IRDT_LEVEL_TRIG | (irq + CPU_IRQ_OFFSET) | + IOSAPIC_IRDT_DISABLE; + s->iosapic_reg[0x11 + 2 * irq] = SWIZZLE_HPA(CPU_HPA); + } +} + +static void elroy_pcihost_init(Object *obj) +{ + ElroyState *s = ELROY_PCI_HOST_BRIDGE(obj); + PCIHostState *phb = PCI_HOST_BRIDGE(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + + /* Elroy config access from CPU. */ + memory_region_init_io(&s->this_mem, OBJECT(s), &elroy_chip_ops, + s, "elroy", 0x2000); + + /* Elroy PCI config. */ + memory_region_init_io(&phb->conf_mem, OBJECT(phb), + &elroy_config_addr_ops, DEVICE(s), + "pci-conf-idx", 8); + memory_region_init_io(&phb->data_mem, OBJECT(phb), + &elroy_config_data_ops, DEVICE(s), + "pci-conf-data", 8); + memory_region_add_subregion(&s->this_mem, 0x40, + &phb->conf_mem); + memory_region_add_subregion(&s->this_mem, 0x48, + &phb->data_mem); + + /* Elroy PCI bus memory. */ + memory_region_init(&s->pci_mmio, OBJECT(s), "pci-mmio", UINT64_MAX); + memory_region_init_io(&s->pci_io, OBJECT(s), &unassigned_io_ops, obj, + "pci-isa-mmio", + ((uint32_t) IOS_DIST_BASE_SIZE) / ROPES_PER_IOC); + + phb->bus = pci_register_root_bus(DEVICE(s), "pci", + elroy_set_irq, elroy_pci_map_irq, s, + &s->pci_mmio, &s->pci_io, + PCI_DEVFN(0, 0), ELROY_IRQS, TYPE_PCI_BUS); + + sysbus_init_mmio(sbd, &s->this_mem); + + qdev_init_gpio_in(DEVICE(obj), elroy_set_irq, ELROY_IRQS); +} + +static Property elroy_pcihost_properties[] = { + DEFINE_PROP_END_OF_LIST(), +}; + +static const VMStateDescription vmstate_elroy = { + .name = "Elroy", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT64(hpa, ElroyState), + VMSTATE_UINT32(pci_bus_num, ElroyState), + VMSTATE_UINT64(config_address, ElroyState), + VMSTATE_UINT64(config_reg_elroy, ElroyState), + VMSTATE_UINT64(status_control, ElroyState), + VMSTATE_UINT64(arb_mask, ElroyState), + VMSTATE_UINT64_ARRAY(mmio_base, ElroyState, (0x0250 - 0x200) / 8), + VMSTATE_UINT64(error_config, ElroyState), + VMSTATE_UINT32(iosapic_reg_select, ElroyState), + VMSTATE_UINT64_ARRAY(iosapic_reg, ElroyState, 0x20), + VMSTATE_UINT32(ilr, ElroyState), + VMSTATE_END_OF_LIST() + } +}; + +static void elroy_pcihost_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->reset = elroy_reset; + device_class_set_props(dc, elroy_pcihost_properties); + dc->vmsd = &vmstate_elroy; + dc->user_creatable = false; +} + +static const TypeInfo elroy_pcihost_info = { + .name = TYPE_ELROY_PCI_HOST_BRIDGE, + .parent = TYPE_PCI_HOST_BRIDGE, + .instance_init = elroy_pcihost_init, + .instance_size = sizeof(ElroyState), + .class_init = elroy_pcihost_class_init, +}; + +static void elroy_register_types(void) +{ + type_register_static(&elroy_pcihost_info); +} + +type_init(elroy_register_types) + + +static ElroyState *elroy_init(int num) +{ + DeviceState *dev; + + dev = qdev_new(TYPE_ELROY_PCI_HOST_BRIDGE); + dev->id = g_strdup_printf("elroy%d", num); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + + return ELROY_PCI_HOST_BRIDGE(dev); +} + +/* + * Astro Runway chip. + */ + +static MemTxResult astro_chip_read_with_attrs(void *opaque, hwaddr addr, + uint64_t *data, unsigned size, + MemTxAttrs attrs) +{ + AstroState *s = opaque; + MemTxResult ret = MEMTX_OK; + uint64_t val = -1; + int index; + + switch ((addr >> 3) << 3) { + /* R2I registers */ + case 0x0000: /* ID */ + val = (0x01 << 3) | 0x01ULL; + break; + case 0x0008: /* IOC_CTRL */ + val = s->ioc_ctrl; + break; + case 0x0010: /* TOC_CLIENT_ID */ + break; + case 0x0030: /* HP-UX 10.20 and 11.11 reads it. No idea. */ + val = -1; + break; + case 0x0300 ... 0x03d8: /* LMMIO_DIRECT0_BASE... */ + index = (addr - 0x300) / 8; + val = s->ioc_ranges[index]; + break; + case 0x10200: + val = 0; + break; + case 0x10220: + case 0x10230: /* HP-UX 11.11 reads it. No idea. */ + val = -1; + break; + case 0x22108: /* IOC STATUS_CONTROL */ + val = s->ioc_status_ctrl; + break; + case 0x20200 ... 0x20240 - 1: /* IOC Rope0_Control ... */ + index = (addr - 0x20200) / 8; + val = s->ioc_rope_control[index]; + break; + case 0x20040: /* IOC Rope config */ + val = s->ioc_rope_config; + break; + case 0x20050: /* IOC Rope debug */ + val = 0; + break; + case 0x20108: /* IOC STATUS_CONTROL */ + val = s->ioc_status_control; + break; + case 0x20310: /* IOC_PCOM */ + val = s->tlb_pcom; + /* TODO: flush iommu */ + break; + case 0x20400: + val = s->ioc_flush_control; + break; + /* empty placeholders for non-existent elroys */ +#define EMPTY_PORT(x) case x: case x+8: val = 0; break; \ + case x+40: case x+48: val = UINT64_MAX; break; + EMPTY_PORT(0x30000) + EMPTY_PORT(0x32000) + EMPTY_PORT(0x34000) + EMPTY_PORT(0x36000) + EMPTY_PORT(0x38000) + EMPTY_PORT(0x3a000) + EMPTY_PORT(0x3c000) + EMPTY_PORT(0x3e000) +#undef EMPTY_PORT + + default: + trace_astro_chip_read(addr, size, val); + g_assert_not_reached(); + } + + /* for 32-bit accesses mask return value */ + val = mask_32bit_val(addr, size, val); + + trace_astro_chip_read(addr, size, val); + *data = val; + return ret; +} + +static MemTxResult astro_chip_write_with_attrs(void *opaque, hwaddr addr, + uint64_t val, unsigned size, + MemTxAttrs attrs) +{ + AstroState *s = opaque; + + trace_astro_chip_write(addr, size, val); + + switch ((addr >> 3) << 3) { + case 0x0000: /* ID */ + break; + case 0x0008: /* IOC_CTRL */ + val &= 0x0ffffff; + put_val_in_int64(&s->ioc_ctrl, addr, size, val); + break; + case 0x0010: /* TOC_CLIENT_ID */ + break; + case 0x0030: /* HP-UX 10.20 and 11.11 reads it. No idea. */ + break; + case 0x0300 ... 0x03d8 - 1: /* LMMIO_DIRECT0_BASE... */ + put_val_in_arrary(s->ioc_ranges, 0x300, addr, size, val); + break; + case 0x10200: + case 0x10220: + case 0x10230: /* HP-UX 11.11 reads it. No idea. */ + break; + case 0x22108: /* IOC STATUS_CONTROL */ + put_val_in_int64(&s->ioc_status_ctrl, addr, size, val); + break; + case 0x20200 ... 0x20240 - 1: /* IOC Rope0_Control ... */ + put_val_in_arrary(s->ioc_rope_control, 0x20200, addr, size, val); + break; + case 0x20040: /* IOC Rope config */ + put_val_in_int64(&s->ioc_rope_config, addr, size, val); + break; + case 0x20300: + put_val_in_int64(&s->tlb_ibase, addr, size, val); + break; + case 0x20308: + put_val_in_int64(&s->tlb_imask, addr, size, val); + break; + case 0x20310: + put_val_in_int64(&s->tlb_pcom, addr, size, val); + /* TODO: flush iommu */ + break; + case 0x20318: + put_val_in_int64(&s->tlb_tcnfg, addr, size, val); + break; + case 0x20320: + put_val_in_int64(&s->tlb_pdir_base, addr, size, val); + break; + /* + * empty placeholders for non-existent elroys, e.g. + * func_class, pci config & data + */ +#define EMPTY_PORT(x) case x: case x+8: case x+0x40: case x+0x48: + EMPTY_PORT(0x30000) + EMPTY_PORT(0x32000) + EMPTY_PORT(0x34000) + EMPTY_PORT(0x36000) + EMPTY_PORT(0x38000) + EMPTY_PORT(0x3a000) + EMPTY_PORT(0x3c000) + EMPTY_PORT(0x3e000) + break; +#undef EMPTY_PORT + + default: + /* Controlled by astro_chip_mem_valid above. */ + trace_astro_chip_write(addr, size, val); + g_assert_not_reached(); + } + return MEMTX_OK; +} + +static const MemoryRegionOps astro_chip_ops = { + .read_with_attrs = astro_chip_read_with_attrs, + .write_with_attrs = astro_chip_write_with_attrs, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 8, + }, + .impl = { + .min_access_size = 4, + .max_access_size = 8, + }, +}; + +static const VMStateDescription vmstate_astro = { + .name = "Astro", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT64(ioc_ctrl, AstroState), + VMSTATE_UINT64(ioc_status_ctrl, AstroState), + VMSTATE_UINT64_ARRAY(ioc_ranges, AstroState, (0x03d8 - 0x300) / 8), + VMSTATE_UINT64(ioc_rope_config, AstroState), + VMSTATE_UINT64(ioc_status_control, AstroState), + VMSTATE_UINT64(ioc_flush_control, AstroState), + VMSTATE_UINT64_ARRAY(ioc_rope_control, AstroState, 8), + VMSTATE_UINT64(tlb_ibase, AstroState), + VMSTATE_UINT64(tlb_imask, AstroState), + VMSTATE_UINT64(tlb_pcom, AstroState), + VMSTATE_UINT64(tlb_tcnfg, AstroState), + VMSTATE_UINT64(tlb_pdir_base, AstroState), + VMSTATE_END_OF_LIST() + } +}; + +static void astro_reset(DeviceState *dev) +{ + AstroState *s = ASTRO_CHIP(dev); + int i; + + s->ioc_ctrl = 0x29cf; + s->ioc_rope_config = 0xc5f; + s->ioc_flush_control = 0xb03; + s->ioc_status_control = 0; + memset(&s->ioc_rope_control, 0, sizeof(s->ioc_rope_control)); + + /* + * The SBA BASE/MASK registers control CPU -> IO routing. + * The LBA BASE/MASK registers control IO -> System routing (in Elroy) + */ + memset(&s->ioc_ranges, 0, sizeof(s->ioc_ranges)); + s->ioc_ranges[(0x360 - 0x300) / 8] = LMMIO_DIST_BASE_ADDR | 0x01; /* LMMIO_DIST_BASE (SBA) */ + s->ioc_ranges[(0x368 - 0x300) / 8] = 0xfc000000; /* LMMIO_DIST_MASK */ + s->ioc_ranges[(0x370 - 0x300) / 8] = 0; /* LMMIO_DIST_ROUTE */ + s->ioc_ranges[(0x390 - 0x300) / 8] = IOS_DIST_BASE_ADDR | 0x01; /* IOS_DIST_BASE */ + s->ioc_ranges[(0x398 - 0x300) / 8] = 0xffffff0000; /* IOS_DIST_MASK */ + s->ioc_ranges[(0x3a0 - 0x300) / 8] = 0x3400000000000000ULL; /* IOS_DIST_ROUTE */ + s->ioc_ranges[(0x3c0 - 0x300) / 8] = 0xfffee00000; /* IOS_DIRECT_BASE */ + s->ioc_ranges[(0x3c8 - 0x300) / 8] = 0xffffff0000; /* IOS_DIRECT_MASK */ + s->ioc_ranges[(0x3d0 - 0x300) / 8] = 0x0; /* IOS_DIRECT_ROUTE */ + + s->tlb_ibase = 0; + s->tlb_imask = 0; + s->tlb_pcom = 0; + s->tlb_tcnfg = 0; + s->tlb_pdir_base = 0; + + for (i = 0; i < ELROY_NUM; i++) { + elroy_reset(DEVICE(s->elroy[i])); + } +} + +static void astro_init(Object *obj) +{ +} + +static void astro_realize(DeviceState *obj, Error **errp) +{ + AstroState *s = ASTRO_CHIP(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + int i; + + memory_region_init_io(&s->this_mem, OBJECT(s), &astro_chip_ops, + s, "astro", 0x40000); + sysbus_init_mmio(sbd, &s->this_mem); + + /* Host memory as seen from Elroys PCI side, via the IOMMU. */ + memory_region_init_iommu(&s->iommu, sizeof(s->iommu), + TYPE_ASTRO_IOMMU_MEMORY_REGION, OBJECT(s), + "iommu-astro", UINT64_MAX); + address_space_init(&s->iommu_as, MEMORY_REGION(&s->iommu), + "bm-pci"); + + /* Create Elroys (PCI host bus chips). */ + for (i = 0; i < ELROY_NUM; i++) { + static const int elroy_hpa_offsets[ELROY_NUM] = { + 0x30000, 0x32000, 0x38000, 0x3c000 }; + static const char elroy_rope_nr[ELROY_NUM] = { + 0, 1, 4, 6 }; /* busnum path, e.g. [10:6] */ + int addr_offset; + ElroyState *elroy; + hwaddr map_addr; + uint64_t map_size; + int rope; + + addr_offset = elroy_hpa_offsets[i]; + rope = elroy_rope_nr[i]; + + elroy = elroy_init(i); + s->elroy[i] = elroy; + elroy->hpa = ASTRO_HPA + addr_offset; + elroy->pci_bus_num = i; + elroy->astro = s; + + /* + * NOTE: we only allow PCI devices on first Elroy for now. + * SeaBIOS will not find devices on the other busses. + */ + if (i > 0) { + qbus_mark_full(&PCI_HOST_BRIDGE(elroy)->bus->qbus); + } + + /* map elroy config addresses into Astro space */ + memory_region_add_subregion(&s->this_mem, addr_offset, + &elroy->this_mem); + + /* LMMIO */ + elroy->mmio_base[(0x0200 - 0x200) / 8] = 0xf0000001; + elroy->mmio_base[(0x0208 - 0x200) / 8] = 0xf8000000; + /* GMMIO */ + elroy->mmio_base[(0x0210 - 0x200) / 8] = 0x000000f800000001; + elroy->mmio_base[(0x0218 - 0x200) / 8] = 0x000000ff80000000; + /* WLMMIO */ + elroy->mmio_base[(0x0220 - 0x200) / 8] = 0xf0000001; + elroy->mmio_base[(0x0228 - 0x200) / 8] = 0xf0000000; + /* WGMMIO */ + elroy->mmio_base[(0x0230 - 0x200) / 8] = 0x000000f800000001; + elroy->mmio_base[(0x0238 - 0x200) / 8] = 0x000000fc00000000; + /* IOS_BASE */ + map_size = IOS_DIST_BASE_SIZE / ROPES_PER_IOC; + elroy->mmio_base[(0x0240 - 0x200) / 8] = rope * map_size | 0x01; + elroy->mmio_base[(0x0248 - 0x200) / 8] = 0x0000e000; + + /* map elroys mmio */ + map_size = LMMIO_DIST_BASE_SIZE / ROPES_PER_IOC; + map_addr = (uint32_t) (LMMIO_DIST_BASE_ADDR + rope * map_size); + memory_region_init_alias(&elroy->pci_mmio_alias, OBJECT(elroy), + "pci-mmio-alias", + &elroy->pci_mmio, map_addr, map_size); + memory_region_add_subregion(get_system_memory(), map_addr, + &elroy->pci_mmio_alias); + + map_size = IOS_DIST_BASE_SIZE / ROPES_PER_IOC; + map_addr = (uint32_t) (IOS_DIST_BASE_ADDR + rope * map_size); + memory_region_add_subregion(get_system_memory(), map_addr, + &elroy->pci_io); + + /* Host memory as seen from the PCI side, via the IOMMU. */ + pci_setup_iommu(PCI_HOST_BRIDGE(elroy)->bus, elroy_pcihost_set_iommu, + elroy); + } +} + +static void astro_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->reset = astro_reset; + dc->vmsd = &vmstate_astro; + dc->realize = astro_realize; + /* + * astro with elroys are hard part of the newer PA2.0 machines and can not + * be created without that hardware + */ + dc->user_creatable = false; +} + +static const TypeInfo astro_chip_info = { + .name = TYPE_ASTRO_CHIP, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_init = astro_init, + .instance_size = sizeof(AstroState), + .class_init = astro_class_init, +}; + +static void astro_iommu_memory_region_class_init(ObjectClass *klass, + void *data) +{ + IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); + + imrc->translate = astro_translate_iommu; +} + +static const TypeInfo astro_iommu_memory_region_info = { + .parent = TYPE_IOMMU_MEMORY_REGION, + .name = TYPE_ASTRO_IOMMU_MEMORY_REGION, + .class_init = astro_iommu_memory_region_class_init, +}; + + +static void astro_register_types(void) +{ + type_register_static(&astro_chip_info); + type_register_static(&astro_iommu_memory_region_info); +} + +type_init(astro_register_types) diff --git a/hw/pci-host/meson.build b/hw/pci-host/meson.build index 64eada76fe..f891f026cb 100644 --- a/hw/pci-host/meson.build +++ b/hw/pci-host/meson.build @@ -27,6 +27,7 @@ pci_ss.add(when: 'CONFIG_MV64361', if_true: files('mv64361.c')) pci_ss.add(when: 'CONFIG_VERSATILE_PCI', if_true: files('versatile.c')) # HPPA devices +pci_ss.add(when: 'CONFIG_ASTRO', if_true: files('astro.c')) pci_ss.add(when: 'CONFIG_DINO', if_true: files('dino.c')) system_ss.add_all(when: 'CONFIG_PCI', if_true: pci_ss) diff --git a/hw/pci-host/trace-events b/hw/pci-host/trace-events index 9d216bb89f..b2f47e6335 100644 --- a/hw/pci-host/trace-events +++ b/hw/pci-host/trace-events @@ -46,3 +46,14 @@ pnv_phb4_xive_notify_abt(uint64_t notif_port, uint64_t data) "notif=@0x%"PRIx64" dino_chip_mem_valid(uint64_t addr, uint32_t val) "access to addr 0x%"PRIx64" is %d" dino_chip_read(uint64_t addr, uint32_t val) "addr 0x%"PRIx64" val 0x%08x" dino_chip_write(uint64_t addr, uint32_t val) "addr 0x%"PRIx64" val 0x%08x" + +# astro.c +astro_chip_mem_valid(uint64_t addr, uint32_t val) "access to addr 0x%"PRIx64" is %d" +astro_chip_read(uint64_t addr, int size, uint64_t val) "addr 0x%"PRIx64" size %d val 0x%"PRIx64 +astro_chip_write(uint64_t addr, int size, uint64_t val) "addr 0x%"PRIx64" size %d val 0x%"PRIx64 +elroy_read(uint64_t addr, int size, uint64_t val) "addr 0x%"PRIx64" size %d val 0x%"PRIx64 +elroy_write(uint64_t addr, int size, uint64_t val) "addr 0x%"PRIx64" size %d val 0x%"PRIx64 +elroy_pci_config_data_read(uint64_t addr, int size, uint64_t val) "addr 0x%"PRIx64" size %d val 0x%"PRIx64 +elroy_pci_config_data_write(uint64_t addr, int size, uint64_t val) "addr 0x%"PRIx64" size %d val 0x%"PRIx64 +iosapic_reg_write(uint64_t reg_select, int size, uint64_t val) "reg_select 0x%"PRIx64" size %d val 0x%"PRIx64 +iosapic_reg_read(uint64_t reg_select, int size, uint64_t val) "reg_select 0x%"PRIx64" size %d val 0x%"PRIx64 diff --git a/hw/ppc/pef.c b/hw/ppc/pef.c index cc44d5e339..d28ed3ba73 100644 --- a/hw/ppc/pef.c +++ b/hw/ppc/pef.c @@ -63,7 +63,7 @@ static int kvmppc_svm_init(ConfidentialGuestSupport *cgs, Error **errp) /* add migration blocker */ error_setg(&pef_mig_blocker, "PEF: Migration is not implemented"); /* NB: This can fail if --only-migratable is used */ - migrate_add_blocker(pef_mig_blocker, &error_fatal); + migrate_add_blocker(&pef_mig_blocker, &error_fatal); cgs->ready = true; diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index cb840676d3..b25093be28 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1761,7 +1761,7 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) /* Signal all vCPUs waiting on this condition */ qemu_cond_broadcast(&spapr->fwnmi_machine_check_interlock_cond); - migrate_del_blocker(spapr->fwnmi_migration_blocker); + migrate_del_blocker(&spapr->fwnmi_migration_blocker); } static void spapr_create_nvram(SpaprMachineState *spapr) @@ -2937,13 +2937,6 @@ static void spapr_machine_init(MachineState *machine) spapr_create_lmb_dr_connectors(spapr); } - if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI) == SPAPR_CAP_ON) { - /* Create the error string for live migration blocker */ - error_setg(&spapr->fwnmi_migration_blocker, - "A machine check is being handled during migration. The handler" - "may run and log hardware error on the destination"); - } - if (mc->nvdimm_supported) { spapr_create_nvdimm_dr_connectors(spapr); } diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index 4508e40814..deb4641505 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -920,7 +920,11 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered) * fails when running with -only-migrate. A proper interface to * delay migration completion for a bit could avoid that. */ - ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, NULL); + error_setg(&spapr->fwnmi_migration_blocker, + "A machine check is being handled during migration. The handler" + "may run and log hardware error on the destination"); + + ret = migrate_add_blocker(&spapr->fwnmi_migration_blocker, NULL); if (ret == -EBUSY) { warn_report("Received a fwnmi while migration was in progress"); } diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index 7df21581c2..26c384b261 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -496,7 +496,7 @@ static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu, spapr->fwnmi_machine_check_interlock = -1; qemu_cond_signal(&spapr->fwnmi_machine_check_interlock_cond); rtas_st(rets, 0, RTAS_OUT_SUCCESS); - migrate_del_blocker(spapr->fwnmi_migration_blocker); + migrate_del_blocker(&spapr->fwnmi_migration_blocker); } static struct rtas_call { diff --git a/hw/remote/proxy.c b/hw/remote/proxy.c index 2052d721e5..fbc85a8d36 100644 --- a/hw/remote/proxy.c +++ b/hw/remote/proxy.c @@ -107,8 +107,7 @@ static void pci_proxy_dev_realize(PCIDevice *device, Error **errp) error_setg(&dev->migration_blocker, "%s does not support migration", TYPE_PCI_PROXY_DEV); - if (migrate_add_blocker(dev->migration_blocker, errp) < 0) { - error_free(dev->migration_blocker); + if (migrate_add_blocker(&dev->migration_blocker, errp) < 0) { object_unref(dev->ioc); return; } @@ -134,9 +133,7 @@ static void pci_proxy_dev_exit(PCIDevice *pdev) qio_channel_close(dev->ioc, NULL); } - migrate_del_blocker(dev->migration_blocker); - - error_free(dev->migration_blocker); + migrate_del_blocker(&dev->migration_blocker); proxy_memory_listener_deconfigure(&dev->proxy_listener); diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c new file mode 100644 index 0000000000..f16bdf65fa --- /dev/null +++ b/hw/s390x/cpu-topology.c @@ -0,0 +1,469 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * CPU Topology + * + * Copyright IBM Corp. 2022, 2023 + * Author(s): Pierre Morel <pmorel@linux.ibm.com> + * + * S390 topology handling can be divided in two parts: + * + * - The first part in this file is taking care of all common functions + * used by KVM and TCG to create and modify the topology. + * + * - The second part, building the topology information data for the + * guest with CPU and KVM specificity will be implemented inside + * the target/s390/kvm sub tree. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "hw/qdev-properties.h" +#include "hw/boards.h" +#include "target/s390x/cpu.h" +#include "hw/s390x/s390-virtio-ccw.h" +#include "hw/s390x/cpu-topology.h" +#include "qapi/qapi-commands-machine-target.h" +#include "qapi/qapi-events-machine-target.h" + +/* + * s390_topology is used to keep the topology information. + * .cores_per_socket: tracks information on the count of cores + * per socket. + * .polarization: tracks machine polarization. + */ +S390Topology s390_topology = { + /* will be initialized after the CPU model is realized */ + .cores_per_socket = NULL, + .polarization = S390_CPU_POLARIZATION_HORIZONTAL, +}; + +/** + * s390_socket_nb: + * @cpu: s390x CPU + * + * Returns the socket number used inside the cores_per_socket array + * for a topology tree entry + */ +static int s390_socket_nb_from_ids(int drawer_id, int book_id, int socket_id) +{ + return (drawer_id * current_machine->smp.books + book_id) * + current_machine->smp.sockets + socket_id; +} + +/** + * s390_socket_nb: + * @cpu: s390x CPU + * + * Returns the socket number used inside the cores_per_socket array + * for a cpu. + */ +static int s390_socket_nb(S390CPU *cpu) +{ + return s390_socket_nb_from_ids(cpu->env.drawer_id, cpu->env.book_id, + cpu->env.socket_id); +} + +/** + * s390_has_topology: + * + * Return: true if the topology is supported by the machine. + */ +bool s390_has_topology(void) +{ + return s390_has_feat(S390_FEAT_CONFIGURATION_TOPOLOGY); +} + +/** + * s390_topology_init: + * @ms: the machine state where the machine topology is defined + * + * Keep track of the machine topology. + * + * Allocate an array to keep the count of cores per socket. + * The index of the array starts at socket 0 from book 0 and + * drawer 0 up to the maximum allowed by the machine topology. + */ +static void s390_topology_init(MachineState *ms) +{ + CpuTopology *smp = &ms->smp; + + s390_topology.cores_per_socket = g_new0(uint8_t, smp->sockets * + smp->books * smp->drawers); +} + +/* + * s390_handle_ptf: + * + * @register 1: contains the function code + * + * Function codes 0 (horizontal) and 1 (vertical) define the CPU + * polarization requested by the guest. + * + * Function code 2 is handling topology changes and is interpreted + * by the SIE. + */ +void s390_handle_ptf(S390CPU *cpu, uint8_t r1, uintptr_t ra) +{ + CpuS390Polarization polarization; + CPUS390XState *env = &cpu->env; + uint64_t reg = env->regs[r1]; + int fc = reg & S390_TOPO_FC_MASK; + + if (!s390_has_feat(S390_FEAT_CONFIGURATION_TOPOLOGY)) { + s390_program_interrupt(env, PGM_OPERATION, ra); + return; + } + + if (env->psw.mask & PSW_MASK_PSTATE) { + s390_program_interrupt(env, PGM_PRIVILEGED, ra); + return; + } + + if (reg & ~S390_TOPO_FC_MASK) { + s390_program_interrupt(env, PGM_SPECIFICATION, ra); + return; + } + + polarization = S390_CPU_POLARIZATION_VERTICAL; + switch (fc) { + case 0: + polarization = S390_CPU_POLARIZATION_HORIZONTAL; + /* fallthrough */ + case 1: + if (s390_topology.polarization == polarization) { + env->regs[r1] |= S390_PTF_REASON_DONE; + setcc(cpu, 2); + } else { + s390_topology.polarization = polarization; + s390_cpu_topology_set_changed(true); + qapi_event_send_cpu_polarization_change(polarization); + setcc(cpu, 0); + } + break; + default: + /* Note that fc == 2 is interpreted by the SIE */ + s390_program_interrupt(env, PGM_SPECIFICATION, ra); + } +} + +/** + * s390_topology_reset: + * + * Generic reset for CPU topology, calls s390_topology_reset() + * to reset the kernel Modified Topology Change Record. + */ +void s390_topology_reset(void) +{ + s390_cpu_topology_set_changed(false); + s390_topology.polarization = S390_CPU_POLARIZATION_HORIZONTAL; +} + +/** + * s390_topology_cpu_default: + * @cpu: pointer to a S390CPU + * @errp: Error pointer + * + * Setup the default topology if no attributes are already set. + * Passing a CPU with some, but not all, attributes set is considered + * an error. + * + * The function calculates the (drawer_id, book_id, socket_id) + * topology by filling the cores starting from the first socket + * (0, 0, 0) up to the last (smp->drawers, smp->books, smp->sockets). + * + * CPU type and dedication have defaults values set in the + * s390x_cpu_properties, entitlement must be adjust depending on the + * dedication. + * + * Returns false if it is impossible to setup a default topology + * true otherwise. + */ +static bool s390_topology_cpu_default(S390CPU *cpu, Error **errp) +{ + CpuTopology *smp = ¤t_machine->smp; + CPUS390XState *env = &cpu->env; + + /* All geometry topology attributes must be set or all unset */ + if ((env->socket_id < 0 || env->book_id < 0 || env->drawer_id < 0) && + (env->socket_id >= 0 || env->book_id >= 0 || env->drawer_id >= 0)) { + error_setg(errp, + "Please define all or none of the topology geometry attributes"); + return false; + } + + /* If one value is unset all are unset -> calculate defaults */ + if (env->socket_id < 0) { + env->socket_id = s390_std_socket(env->core_id, smp); + env->book_id = s390_std_book(env->core_id, smp); + env->drawer_id = s390_std_drawer(env->core_id, smp); + } + + /* + * When the user specifies the entitlement as 'auto' on the command line, + * QEMU will set the entitlement as: + * Medium when the CPU is not dedicated. + * High when dedicated is true. + */ + if (env->entitlement == S390_CPU_ENTITLEMENT_AUTO) { + if (env->dedicated) { + env->entitlement = S390_CPU_ENTITLEMENT_HIGH; + } else { + env->entitlement = S390_CPU_ENTITLEMENT_MEDIUM; + } + } + return true; +} + +/** + * s390_topology_check: + * @socket_id: socket to check + * @book_id: book to check + * @drawer_id: drawer to check + * @entitlement: entitlement to check + * @dedicated: dedication to check + * @errp: Error pointer + * + * The function checks if the topology + * attributes fits inside the system topology. + * + * Returns false if the specified topology does not match with + * the machine topology. + */ +static bool s390_topology_check(uint16_t socket_id, uint16_t book_id, + uint16_t drawer_id, uint16_t entitlement, + bool dedicated, Error **errp) +{ + CpuTopology *smp = ¤t_machine->smp; + + if (socket_id >= smp->sockets) { + error_setg(errp, "Unavailable socket: %d", socket_id); + return false; + } + if (book_id >= smp->books) { + error_setg(errp, "Unavailable book: %d", book_id); + return false; + } + if (drawer_id >= smp->drawers) { + error_setg(errp, "Unavailable drawer: %d", drawer_id); + return false; + } + if (entitlement >= S390_CPU_ENTITLEMENT__MAX) { + error_setg(errp, "Unknown entitlement: %d", entitlement); + return false; + } + if (dedicated && (entitlement == S390_CPU_ENTITLEMENT_LOW || + entitlement == S390_CPU_ENTITLEMENT_MEDIUM)) { + error_setg(errp, "A dedicated CPU implies high entitlement"); + return false; + } + return true; +} + +/** + * s390_topology_need_report + * @cpu: Current cpu + * @drawer_id: future drawer ID + * @book_id: future book ID + * @socket_id: future socket ID + * @entitlement: future entitlement + * @dedicated: future dedicated + * + * A modified topology change report is needed if the topology + * tree or the topology attributes change. + */ +static bool s390_topology_need_report(S390CPU *cpu, int drawer_id, + int book_id, int socket_id, + uint16_t entitlement, bool dedicated) +{ + return cpu->env.drawer_id != drawer_id || + cpu->env.book_id != book_id || + cpu->env.socket_id != socket_id || + cpu->env.entitlement != entitlement || + cpu->env.dedicated != dedicated; +} + +/** + * s390_update_cpu_props: + * @ms: the machine state + * @cpu: the CPU for which to update the properties from the environment. + * + */ +static void s390_update_cpu_props(MachineState *ms, S390CPU *cpu) +{ + CpuInstanceProperties *props; + + props = &ms->possible_cpus->cpus[cpu->env.core_id].props; + + props->socket_id = cpu->env.socket_id; + props->book_id = cpu->env.book_id; + props->drawer_id = cpu->env.drawer_id; +} + +/** + * s390_topology_setup_cpu: + * @ms: MachineState used to initialize the topology structure on + * first call. + * @cpu: the new S390CPU to insert in the topology structure + * @errp: the error pointer + * + * Called from CPU hotplug to check and setup the CPU attributes + * before the CPU is inserted in the topology. + * There is no need to update the MTCR explicitly here because it + * will be updated by KVM on creation of the new CPU. + */ +void s390_topology_setup_cpu(MachineState *ms, S390CPU *cpu, Error **errp) +{ + int entry; + + /* + * We do not want to initialize the topology if the CPU model + * does not support topology, consequently, we have to wait for + * the first CPU to be realized, which realizes the CPU model + * to initialize the topology structures. + * + * s390_topology_setup_cpu() is called from the CPU hotplug. + */ + if (!s390_topology.cores_per_socket) { + s390_topology_init(ms); + } + + if (!s390_topology_cpu_default(cpu, errp)) { + return; + } + + if (!s390_topology_check(cpu->env.socket_id, cpu->env.book_id, + cpu->env.drawer_id, cpu->env.entitlement, + cpu->env.dedicated, errp)) { + return; + } + + /* Do we still have space in the socket */ + entry = s390_socket_nb(cpu); + if (s390_topology.cores_per_socket[entry] >= ms->smp.cores) { + error_setg(errp, "No more space on this socket"); + return; + } + + /* Update the count of cores in sockets */ + s390_topology.cores_per_socket[entry] += 1; + + /* topology tree is reflected in props */ + s390_update_cpu_props(ms, cpu); +} + +static void s390_change_topology(uint16_t core_id, + bool has_socket_id, uint16_t socket_id, + bool has_book_id, uint16_t book_id, + bool has_drawer_id, uint16_t drawer_id, + bool has_entitlement, + CpuS390Entitlement entitlement, + bool has_dedicated, bool dedicated, + Error **errp) +{ + MachineState *ms = current_machine; + int old_socket_entry; + int new_socket_entry; + bool report_needed; + S390CPU *cpu; + + cpu = s390_cpu_addr2state(core_id); + if (!cpu) { + error_setg(errp, "Core-id %d does not exist!", core_id); + return; + } + + /* Get attributes not provided from cpu and verify the new topology */ + if (!has_socket_id) { + socket_id = cpu->env.socket_id; + } + if (!has_book_id) { + book_id = cpu->env.book_id; + } + if (!has_drawer_id) { + drawer_id = cpu->env.drawer_id; + } + if (!has_dedicated) { + dedicated = cpu->env.dedicated; + } + + /* + * When the user specifies the entitlement as 'auto' on the command line, + * QEMU will set the entitlement as: + * Medium when the CPU is not dedicated. + * High when dedicated is true. + */ + if (!has_entitlement || entitlement == S390_CPU_ENTITLEMENT_AUTO) { + if (dedicated) { + entitlement = S390_CPU_ENTITLEMENT_HIGH; + } else { + entitlement = S390_CPU_ENTITLEMENT_MEDIUM; + } + } + + if (!s390_topology_check(socket_id, book_id, drawer_id, + entitlement, dedicated, errp)) { + return; + } + + /* Check for space on new socket */ + old_socket_entry = s390_socket_nb(cpu); + new_socket_entry = s390_socket_nb_from_ids(drawer_id, book_id, socket_id); + + if (new_socket_entry != old_socket_entry) { + if (s390_topology.cores_per_socket[new_socket_entry] >= + ms->smp.cores) { + error_setg(errp, "No more space on this socket"); + return; + } + /* Update the count of cores in sockets */ + s390_topology.cores_per_socket[new_socket_entry] += 1; + s390_topology.cores_per_socket[old_socket_entry] -= 1; + } + + /* Check if we will need to report the modified topology */ + report_needed = s390_topology_need_report(cpu, drawer_id, book_id, + socket_id, entitlement, + dedicated); + + /* All checks done, report new topology into the vCPU */ + cpu->env.drawer_id = drawer_id; + cpu->env.book_id = book_id; + cpu->env.socket_id = socket_id; + cpu->env.dedicated = dedicated; + cpu->env.entitlement = entitlement; + + /* topology tree is reflected in props */ + s390_update_cpu_props(ms, cpu); + + /* Advertise the topology change */ + if (report_needed) { + s390_cpu_topology_set_changed(true); + } +} + +void qmp_set_cpu_topology(uint16_t core, + bool has_socket, uint16_t socket, + bool has_book, uint16_t book, + bool has_drawer, uint16_t drawer, + bool has_entitlement, CpuS390Entitlement entitlement, + bool has_dedicated, bool dedicated, + Error **errp) +{ + if (!s390_has_topology()) { + error_setg(errp, "This machine doesn't support topology"); + return; + } + + s390_change_topology(core, has_socket, socket, has_book, book, + has_drawer, drawer, has_entitlement, entitlement, + has_dedicated, dedicated, errp); +} + +CpuPolarizationInfo *qmp_query_s390x_cpu_polarization(Error **errp) +{ + CpuPolarizationInfo *info = g_new0(CpuPolarizationInfo, 1); + + info->polarization = s390_topology.polarization; + return info; +} diff --git a/hw/s390x/meson.build b/hw/s390x/meson.build index 6fd096813a..482fd13420 100644 --- a/hw/s390x/meson.build +++ b/hw/s390x/meson.build @@ -23,6 +23,7 @@ s390x_ss.add(when: 'CONFIG_KVM', if_true: files( 's390-skeys-kvm.c', 's390-stattrib-kvm.c', 's390-pci-kvm.c', + 'cpu-topology.c', )) s390x_ss.add(when: 'CONFIG_TCG', if_true: files( 'tod-tcg.c', diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 2d75f2131f..7262725d2e 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -45,6 +45,7 @@ #include "target/s390x/kvm/pv.h" #include "migration/blocker.h" #include "qapi/visitor.h" +#include "hw/s390x/cpu-topology.h" static Error *pv_mig_blocker; @@ -123,6 +124,9 @@ static void subsystem_reset(void) device_cold_reset(dev); } } + if (s390_has_topology()) { + s390_topology_reset(); + } } static int virtio_ccw_hcall_notify(const uint64_t *args) @@ -309,10 +313,18 @@ static void s390_cpu_plug(HotplugHandler *hotplug_dev, { MachineState *ms = MACHINE(hotplug_dev); S390CPU *cpu = S390_CPU(dev); + ERRP_GUARD(); g_assert(!ms->possible_cpus->cpus[cpu->env.core_id].cpu); ms->possible_cpus->cpus[cpu->env.core_id].cpu = OBJECT(dev); + if (s390_has_topology()) { + s390_topology_setup_cpu(ms, cpu, errp); + if (*errp) { + return; + } + } + if (dev->hotplugged) { raise_irq_cpu_hotplug(); } @@ -332,8 +344,7 @@ static void s390_machine_unprotect(S390CcwMachineState *ms) s390_pv_vm_disable(); } ms->pv = false; - migrate_del_blocker(pv_mig_blocker); - error_free_or_abort(&pv_mig_blocker); + migrate_del_blocker(&pv_mig_blocker); ram_block_discard_disable(false); } @@ -356,11 +367,10 @@ static int s390_machine_protect(S390CcwMachineState *ms) error_setg(&pv_mig_blocker, "protected VMs are currently not migratable."); - rc = migrate_add_blocker(pv_mig_blocker, &local_err); + rc = migrate_add_blocker(&pv_mig_blocker, &local_err); if (rc) { ram_block_discard_disable(false); error_report_err(local_err); - error_free_or_abort(&pv_mig_blocker); return rc; } @@ -368,8 +378,7 @@ static int s390_machine_protect(S390CcwMachineState *ms) rc = s390_pv_vm_enable(); if (rc) { ram_block_discard_disable(false); - migrate_del_blocker(pv_mig_blocker); - error_free_or_abort(&pv_mig_blocker); + migrate_del_blocker(&pv_mig_blocker); return rc; } @@ -562,11 +571,20 @@ static const CPUArchIdList *s390_possible_cpu_arch_ids(MachineState *ms) sizeof(CPUArchId) * max_cpus); ms->possible_cpus->len = max_cpus; for (i = 0; i < ms->possible_cpus->len; i++) { + CpuInstanceProperties *props = &ms->possible_cpus->cpus[i].props; + ms->possible_cpus->cpus[i].type = ms->cpu_type; ms->possible_cpus->cpus[i].vcpus_count = 1; ms->possible_cpus->cpus[i].arch_id = i; - ms->possible_cpus->cpus[i].props.has_core_id = true; - ms->possible_cpus->cpus[i].props.core_id = i; + + props->has_core_id = true; + props->core_id = i; + props->has_socket_id = true; + props->socket_id = s390_std_socket(i, &ms->smp); + props->has_book_id = true; + props->book_id = s390_std_book(i, &ms->smp); + props->has_drawer_id = true; + props->drawer_id = s390_std_drawer(i, &ms->smp); } return ms->possible_cpus; @@ -744,6 +762,8 @@ static void ccw_machine_class_init(ObjectClass *oc, void *data) mc->no_sdcard = 1; mc->max_cpus = S390_MAX_CPUS; mc->has_hotpluggable_cpus = true; + mc->smp_props.books_supported = true; + mc->smp_props.drawers_supported = true; assert(!mc->get_hotplug_handler); mc->get_hotplug_handler = s390_get_hotplug_handler; mc->cpu_index_to_instance_props = s390_cpu_index_to_props; @@ -853,6 +873,8 @@ static void ccw_machine_8_1_class_options(MachineClass *mc) { ccw_machine_8_2_class_options(mc); compat_props_add(mc->compat_props, hw_compat_8_1, hw_compat_8_1_len); + mc->smp_props.drawers_supported = false; + mc->smp_props.books_supported = false; } DEFINE_CCW_MACHINE(8_1, "8.1", false); diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c index eff74479f4..d339cbb7e4 100644 --- a/hw/s390x/sclp.c +++ b/hw/s390x/sclp.c @@ -20,6 +20,7 @@ #include "hw/s390x/event-facility.h" #include "hw/s390x/s390-pci-bus.h" #include "hw/s390x/ipl.h" +#include "hw/s390x/cpu-topology.h" static inline SCLPDevice *get_sclp_device(void) { @@ -123,6 +124,10 @@ static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb) return; } + if (s390_has_topology()) { + read_info->stsi_parm = SCLP_READ_SCP_INFO_MNEST; + } + /* CPU information */ prepare_cpu_entries(machine, entries_start, &cpu_count); read_info->entries_cpu = cpu_to_be16(cpu_count); diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index 443f67daa4..14e23cc5b4 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -208,7 +208,7 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp) "When external environment supports it (Orchestrator migrates " "target SCSI device state or use shared storage over network), " "set 'migratable' property to true to enable migration."); - if (migrate_add_blocker(vsc->migration_blocker, errp) < 0) { + if (migrate_add_blocker(&vsc->migration_blocker, errp) < 0) { goto free_virtio; } } @@ -241,10 +241,9 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp) free_vqs: g_free(vqs); if (!vsc->migratable) { - migrate_del_blocker(vsc->migration_blocker); + migrate_del_blocker(&vsc->migration_blocker); } free_virtio: - error_free(vsc->migration_blocker); virtio_scsi_common_unrealize(dev); close_fd: if (vhostfd >= 0) { @@ -260,8 +259,7 @@ static void vhost_scsi_unrealize(DeviceState *dev) struct vhost_virtqueue *vqs = vsc->dev.vqs; if (!vsc->migratable) { - migrate_del_blocker(vsc->migration_blocker); - error_free(vsc->migration_blocker); + migrate_del_blocker(&vsc->migration_blocker); } /* This will stop vhost backend. */ diff --git a/hw/timer/npcm7xx_timer.c b/hw/timer/npcm7xx_timer.c index 32f5e021f8..a8bd93aeb2 100644 --- a/hw/timer/npcm7xx_timer.c +++ b/hw/timer/npcm7xx_timer.c @@ -138,6 +138,9 @@ static int64_t npcm7xx_timer_count_to_ns(NPCM7xxTimer *t, uint32_t count) /* Convert a time interval in nanoseconds to a timer cycle count. */ static uint32_t npcm7xx_timer_ns_to_count(NPCM7xxTimer *t, int64_t ns) { + if (ns < 0) { + return 0; + } return clock_ns_to_ticks(t->ctrl->clock, ns) / npcm7xx_tcsr_prescaler(t->tcsr); } diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 5ff5acf1d8..d806057b40 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -129,11 +129,7 @@ int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp) error_setg(&multiple_devices_migration_blocker, "Multiple VFIO devices migration is supported only if all of " "them support P2P migration"); - ret = migrate_add_blocker(multiple_devices_migration_blocker, errp); - if (ret < 0) { - error_free(multiple_devices_migration_blocker); - multiple_devices_migration_blocker = NULL; - } + ret = migrate_add_blocker(&multiple_devices_migration_blocker, errp); return ret; } @@ -145,9 +141,7 @@ void vfio_unblock_multiple_devices_migration(void) return; } - migrate_del_blocker(multiple_devices_migration_blocker); - error_free(multiple_devices_migration_blocker); - multiple_devices_migration_blocker = NULL; + migrate_del_blocker(&multiple_devices_migration_blocker); } bool vfio_viommu_preset(VFIODevice *vbasedev) diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index da43dcd2fe..28d422b39f 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -872,8 +872,8 @@ static int vfio_migration_init(VFIODevice *vbasedev) NULL; migration->vm_state = qdev_add_vm_change_state_handler_full( vbasedev->dev, vfio_vmstate_change, prepare_cb, vbasedev); - migration->migration_state.notify = vfio_migration_state_notifier; - add_migration_state_change_notifier(&migration->migration_state); + migration_add_notifier(&migration->migration_state, + vfio_migration_state_notifier); return 0; } @@ -882,7 +882,7 @@ static void vfio_migration_deinit(VFIODevice *vbasedev) { VFIOMigration *migration = vbasedev->migration; - remove_migration_state_change_notifier(&migration->migration_state); + migration_remove_notifier(&migration->migration_state); qemu_del_vm_change_state_handler(migration->vm_state); unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev); vfio_migration_free(vbasedev); @@ -891,8 +891,6 @@ static void vfio_migration_deinit(VFIODevice *vbasedev) static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp) { - int ret; - if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { error_propagate(errp, err); return -EINVAL; @@ -901,13 +899,7 @@ static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp) vbasedev->migration_blocker = error_copy(err); error_free(err); - ret = migrate_add_blocker(vbasedev->migration_blocker, errp); - if (ret < 0) { - error_free(vbasedev->migration_blocker); - vbasedev->migration_blocker = NULL; - } - - return ret; + return migrate_add_blocker(&vbasedev->migration_blocker, errp); } /* ---------------------------------------------------------------------- */ @@ -994,9 +986,5 @@ void vfio_migration_exit(VFIODevice *vbasedev) vfio_migration_deinit(vbasedev); } - if (vbasedev->migration_blocker) { - migrate_del_blocker(vbasedev->migration_blocker); - error_free(vbasedev->migration_blocker); - vbasedev->migration_blocker = NULL; - } + migrate_del_blocker(&vbasedev->migration_blocker); } diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 9f37206ba0..d737671028 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1527,9 +1527,8 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, } if (hdev->migration_blocker != NULL) { - r = migrate_add_blocker(hdev->migration_blocker, errp); + r = migrate_add_blocker(&hdev->migration_blocker, errp); if (r < 0) { - error_free(hdev->migration_blocker); goto fail_busyloop; } } @@ -1597,10 +1596,7 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) memory_listener_unregister(&hdev->memory_listener); QLIST_REMOVE(hdev, entry); } - if (hdev->migration_blocker) { - migrate_del_blocker(hdev->migration_blocker); - error_free(hdev->migration_blocker); - } + migrate_del_blocker(&hdev->migration_blocker); g_free(hdev->mem); g_free(hdev->mem_sections); if (hdev->vhost_ops) { diff --git a/include/hw/arm/bsa.h b/include/hw/arm/bsa.h new file mode 100644 index 0000000000..8eaab603c0 --- /dev/null +++ b/include/hw/arm/bsa.h @@ -0,0 +1,35 @@ +/* + * Common definitions for Arm Base System Architecture (BSA) platforms. + * + * Copyright (c) 2015 Linaro Limited + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + * + */ + +#ifndef QEMU_ARM_BSA_H +#define QEMU_ARM_BSA_H + +/* These are architectural INTID values */ +#define VIRTUAL_PMU_IRQ 23 +#define ARCH_GIC_MAINT_IRQ 25 +#define ARCH_TIMER_NS_EL2_IRQ 26 +#define ARCH_TIMER_VIRT_IRQ 27 +#define ARCH_TIMER_NS_EL2_VIRT_IRQ 28 +#define ARCH_TIMER_S_EL1_IRQ 29 +#define ARCH_TIMER_NS_EL1_IRQ 30 + +#define INTID_TO_PPI(irq) ((irq) - 16) + +#endif /* QEMU_ARM_BSA_H */ diff --git a/include/hw/arm/exynos4210.h b/include/hw/arm/exynos4210.h index 68db19f0cb..d33fe38586 100644 --- a/include/hw/arm/exynos4210.h +++ b/include/hw/arm/exynos4210.h @@ -30,7 +30,7 @@ #include "hw/intc/exynos4210_gic.h" #include "hw/intc/exynos4210_combiner.h" #include "hw/core/split-irq.h" -#include "target/arm/cpu-qom.h" +#include "hw/arm/boot.h" #include "qom/object.h" #define EXYNOS4210_NCPUS 2 diff --git a/include/hw/misc/raspberrypi-fw-defs.h b/include/hw/arm/raspberrypi-fw-defs.h index 4551fe7450..4551fe7450 100644 --- a/include/hw/misc/raspberrypi-fw-defs.h +++ b/include/hw/arm/raspberrypi-fw-defs.h diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h index e1ddbea96b..f69239850e 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h @@ -34,6 +34,7 @@ #include "qemu/notify.h" #include "hw/boards.h" #include "hw/arm/boot.h" +#include "hw/arm/bsa.h" #include "hw/block/flash.h" #include "sysemu/kvm.h" #include "hw/intc/arm_gicv3_common.h" @@ -43,17 +44,6 @@ #define NUM_VIRTIO_TRANSPORTS 32 #define NUM_SMMU_IRQS 4 -#define ARCH_GIC_MAINT_IRQ 9 - -#define ARCH_TIMER_VIRT_IRQ 11 -#define ARCH_TIMER_S_EL1_IRQ 13 -#define ARCH_TIMER_NS_EL1_IRQ 14 -#define ARCH_TIMER_NS_EL2_IRQ 10 - -#define VIRTUAL_PMU_IRQ 7 - -#define PPI(irq) ((irq) + 16) - /* See Linux kernel arch/arm64/include/asm/pvclock-abi.h */ #define PVTIME_SIZE_PER_CPU 64 diff --git a/include/hw/audio/pcspk.h b/include/hw/audio/pcspk.h index 9506179587..6be75a6b86 100644 --- a/include/hw/audio/pcspk.h +++ b/include/hw/audio/pcspk.h @@ -25,16 +25,6 @@ #ifndef HW_PCSPK_H #define HW_PCSPK_H -#include "hw/isa/isa.h" -#include "hw/qdev-properties.h" -#include "qapi/error.h" - #define TYPE_PC_SPEAKER "isa-pcspk" -static inline void pcspk_init(ISADevice *isadev, ISABus *bus, ISADevice *pit) -{ - object_property_set_link(OBJECT(isadev), "pit", OBJECT(pit), NULL); - isa_realize_and_unref(isadev, bus, &error_fatal); -} - #endif /* HW_PCSPK_H */ diff --git a/include/hw/boards.h b/include/hw/boards.h index 43a56dc51e..a735999298 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -135,12 +135,16 @@ typedef struct { * @clusters_supported - whether clusters are supported by the machine * @has_clusters - whether clusters are explicitly specified in the user * provided SMP configuration + * @books_supported - whether books are supported by the machine + * @drawers_supported - whether drawers are supported by the machine */ typedef struct { bool prefer_sockets; bool dies_supported; bool clusters_supported; bool has_clusters; + bool books_supported; + bool drawers_supported; } SMPCompatProps; /** @@ -323,7 +327,9 @@ typedef struct DeviceMemoryState { /** * CpuTopology: * @cpus: the number of present logical processors on the machine - * @sockets: the number of sockets on the machine + * @drawers: the number of drawers on the machine + * @books: the number of books in one drawer + * @sockets: the number of sockets in one book * @dies: the number of dies in one socket * @clusters: the number of clusters in one die * @cores: the number of cores in one cluster @@ -332,6 +338,8 @@ typedef struct DeviceMemoryState { */ typedef struct CpuTopology { unsigned int cpus; + unsigned int drawers; + unsigned int books; unsigned int sockets; unsigned int dies; unsigned int clusters; diff --git a/include/hw/nvram/xlnx-bbram.h b/include/hw/nvram/xlnx-bbram.h index 87d59ef3c0..6fc13f8cc1 100644 --- a/include/hw/nvram/xlnx-bbram.h +++ b/include/hw/nvram/xlnx-bbram.h @@ -34,7 +34,7 @@ #define RMAX_XLNX_BBRAM ((0x4c / 4) + 1) -#define TYPE_XLNX_BBRAM "xlnx,bbram-ctrl" +#define TYPE_XLNX_BBRAM "xlnx.bbram-ctrl" OBJECT_DECLARE_SIMPLE_TYPE(XlnxBBRam, XLNX_BBRAM); struct XlnxBBRam { diff --git a/include/hw/pci-host/astro.h b/include/hw/pci-host/astro.h new file mode 100644 index 0000000000..f63fd220f3 --- /dev/null +++ b/include/hw/pci-host/astro.h @@ -0,0 +1,92 @@ +/* + * HP-PARISC Astro Bus connector with Elroy PCI host bridges + */ + +#ifndef ASTRO_H +#define ASTRO_H + +#include "hw/pci/pci_host.h" + +#define ASTRO_HPA 0xfed00000 + +#define ROPES_PER_IOC 8 /* per Ike half or Pluto/Astro */ + +#define TYPE_ASTRO_CHIP "astro-chip" +OBJECT_DECLARE_SIMPLE_TYPE(AstroState, ASTRO_CHIP) + +#define TYPE_ELROY_PCI_HOST_BRIDGE "elroy-pcihost" +OBJECT_DECLARE_SIMPLE_TYPE(ElroyState, ELROY_PCI_HOST_BRIDGE) + +#define ELROY_NUM 4 /* # of Elroys */ +#define ELROY_IRQS 8 /* IOSAPIC IRQs */ + +/* ASTRO Memory and I/O regions */ +#define LMMIO_DIST_BASE_ADDR 0xf4000000ULL +#define LMMIO_DIST_BASE_SIZE 0x4000000ULL + +#define IOS_DIST_BASE_ADDR 0xfffee00000ULL +#define IOS_DIST_BASE_SIZE 0x10000ULL + +struct AstroState; + +struct ElroyState { + PCIHostState parent_obj; + + /* parent Astro device */ + struct AstroState *astro; + + /* HPA of this Elroy */ + hwaddr hpa; + + /* PCI bus number (Elroy number) */ + unsigned int pci_bus_num; + + uint64_t config_address; + uint64_t config_reg_elroy; + + uint64_t status_control; + uint64_t arb_mask; + uint64_t mmio_base[(0x0250 - 0x200) / 8]; + uint64_t error_config; + + uint32_t iosapic_reg_select; + uint64_t iosapic_reg[0x20]; + + uint32_t ilr; + + MemoryRegion this_mem; + + MemoryRegion pci_mmio; + MemoryRegion pci_mmio_alias; + MemoryRegion pci_hole; + MemoryRegion pci_io; +}; + +struct AstroState { + PCIHostState parent_obj; + + uint64_t ioc_ctrl; + uint64_t ioc_status_ctrl; + uint64_t ioc_ranges[(0x03d8 - 0x300) / 8]; + uint64_t ioc_rope_config; + uint64_t ioc_status_control; + uint64_t ioc_flush_control; + uint64_t ioc_rope_control[8]; + uint64_t tlb_ibase; + uint64_t tlb_imask; + uint64_t tlb_pcom; + uint64_t tlb_tcnfg; + uint64_t tlb_pdir_base; + + struct ElroyState *elroy[ELROY_NUM]; + + MemoryRegion this_mem; + + MemoryRegion pci_mmio; + MemoryRegion pci_io; + + IOMMUMemoryRegion iommu; + AddressSpace iommu_as; +}; + +#endif diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h index 85469b9b53..f1a53fea8d 100644 --- a/include/hw/pci/pci_ids.h +++ b/include/hw/pci/pci_ids.h @@ -179,6 +179,8 @@ #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_SCSI 0x2020 +#define PCI_VENDOR_ID_HP 0x103c + #define PCI_VENDOR_ID_TI 0x104c #define PCI_VENDOR_ID_MOTOROLA 0x1057 diff --git a/include/hw/qdev-properties-system.h b/include/hw/qdev-properties-system.h index 0ac327ae60..e4f8a13afc 100644 --- a/include/hw/qdev-properties-system.h +++ b/include/hw/qdev-properties-system.h @@ -22,6 +22,7 @@ extern const PropertyInfo qdev_prop_audiodev; extern const PropertyInfo qdev_prop_off_auto_pcibar; extern const PropertyInfo qdev_prop_pcie_link_speed; extern const PropertyInfo qdev_prop_pcie_link_width; +extern const PropertyInfo qdev_prop_cpus390entitlement; #define DEFINE_PROP_PCI_DEVFN(_n, _s, _f, _d) \ DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_pci_devfn, int32_t) @@ -73,5 +74,8 @@ extern const PropertyInfo qdev_prop_pcie_link_width; #define DEFINE_PROP_UUID_NODEFAULT(_name, _state, _field) \ DEFINE_PROP(_name, _state, _field, qdev_prop_uuid, QemuUUID) +#define DEFINE_PROP_CPUS390ENTITLEMENT(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_cpus390entitlement, \ + CpuS390Entitlement) #endif diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h new file mode 100644 index 0000000000..c064f427e9 --- /dev/null +++ b/include/hw/s390x/cpu-topology.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * CPU Topology + * + * Copyright IBM Corp. 2022, 2023 + * Author(s): Pierre Morel <pmorel@linux.ibm.com> + * + */ +#ifndef HW_S390X_CPU_TOPOLOGY_H +#define HW_S390X_CPU_TOPOLOGY_H + +#ifndef CONFIG_USER_ONLY + +#include "qemu/queue.h" +#include "hw/boards.h" +#include "qapi/qapi-types-machine-target.h" + +#define S390_TOPOLOGY_CPU_IFL 0x03 + +typedef struct S390TopologyId { + uint8_t sentinel; + uint8_t drawer; + uint8_t book; + uint8_t socket; + uint8_t type; + uint8_t vertical:1; + uint8_t entitlement:2; + uint8_t dedicated; + uint8_t origin; +} S390TopologyId; + +typedef struct S390TopologyEntry { + QTAILQ_ENTRY(S390TopologyEntry) next; + S390TopologyId id; + uint64_t mask; +} S390TopologyEntry; + +typedef struct S390Topology { + uint8_t *cores_per_socket; + CpuS390Polarization polarization; +} S390Topology; + +typedef QTAILQ_HEAD(, S390TopologyEntry) S390TopologyList; + +#ifdef CONFIG_KVM +bool s390_has_topology(void); +void s390_topology_setup_cpu(MachineState *ms, S390CPU *cpu, Error **errp); +void s390_topology_reset(void); +#else +static inline bool s390_has_topology(void) +{ + return false; +} +static inline void s390_topology_setup_cpu(MachineState *ms, + S390CPU *cpu, + Error **errp) {} +static inline void s390_topology_reset(void) +{ + /* Unreachable, CPU topology not implemented for TCG */ + assert(false); +} +#endif + +extern S390Topology s390_topology; + +static inline int s390_std_socket(int n, CpuTopology *smp) +{ + return (n / smp->cores) % smp->sockets; +} + +static inline int s390_std_book(int n, CpuTopology *smp) +{ + return (n / (smp->cores * smp->sockets)) % smp->books; +} + +static inline int s390_std_drawer(int n, CpuTopology *smp) +{ + return (n / (smp->cores * smp->sockets * smp->books)) % smp->drawers; +} + +#endif /* CONFIG_USER_ONLY */ + +#endif diff --git a/include/hw/s390x/s390-virtio-ccw.h b/include/hw/s390x/s390-virtio-ccw.h index 9bba21a916..c1d46e78af 100644 --- a/include/hw/s390x/s390-virtio-ccw.h +++ b/include/hw/s390x/s390-virtio-ccw.h @@ -30,6 +30,12 @@ struct S390CcwMachineState { uint8_t loadparm[8]; }; +#define S390_PTF_REASON_NONE (0x00 << 8) +#define S390_PTF_REASON_DONE (0x01 << 8) +#define S390_PTF_REASON_BUSY (0x02 << 8) +#define S390_TOPO_FC_MASK 0xffUL +void s390_handle_ptf(S390CPU *cpu, uint8_t r1, uintptr_t ra); + struct S390CcwMachineClass { /*< private >*/ MachineClass parent_class; diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h index cf1f2efae2..9aef6d9370 100644 --- a/include/hw/s390x/sclp.h +++ b/include/hw/s390x/sclp.h @@ -112,11 +112,13 @@ typedef struct CPUEntry { } QEMU_PACKED CPUEntry; #define SCLP_READ_SCP_INFO_FIXED_CPU_OFFSET 128 +#define SCLP_READ_SCP_INFO_MNEST 4 typedef struct ReadInfo { SCCBHeader h; uint16_t rnmax; uint8_t rnsize; - uint8_t _reserved1[16 - 11]; /* 11-15 */ + uint8_t _reserved1[15 - 11]; /* 11-14 */ + uint8_t stsi_parm; /* 15-15 */ uint16_t entries_cpu; /* 16-17 */ uint16_t offset_cpu; /* 18-19 */ uint8_t _reserved2[24 - 20]; /* 20-23 */ diff --git a/include/migration/blocker.h b/include/migration/blocker.h index 9cebe2ba06..b048f301b4 100644 --- a/include/migration/blocker.h +++ b/include/migration/blocker.h @@ -17,19 +17,23 @@ /** * @migrate_add_blocker - prevent migration from proceeding * - * @reason - an error to be returned whenever migration is attempted + * @reasonp - address of an error to be returned whenever migration is attempted * * @errp - [out] The reason (if any) we cannot block migration right now. * * @returns - 0 on success, -EBUSY/-EACCES on failure, with errp set. + * + * *@reasonp is freed and set to NULL if failure is returned. + * On success, the caller must not free @reasonp, except by + * calling migrate_del_blocker. */ -int migrate_add_blocker(Error *reason, Error **errp); +int migrate_add_blocker(Error **reasonp, Error **errp); /** * @migrate_add_blocker_internal - prevent migration from proceeding without * only-migrate implications * - * @reason - an error to be returned whenever migration is attempted + * @reasonp - address of an error to be returned whenever migration is attempted * * @errp - [out] The reason (if any) we cannot block migration right now. * @@ -38,14 +42,20 @@ int migrate_add_blocker(Error *reason, Error **errp); * Some of the migration blockers can be temporary (e.g., for a few seconds), * so it shouldn't need to conflict with "-only-migratable". For those cases, * we can call this function rather than @migrate_add_blocker(). + * + * *@reasonp is freed and set to NULL if failure is returned. + * On success, the caller must not free @reasonp, except by + * calling migrate_del_blocker. */ -int migrate_add_blocker_internal(Error *reason, Error **errp); +int migrate_add_blocker_internal(Error **reasonp, Error **errp); /** - * @migrate_del_blocker - remove a blocking error from migration + * @migrate_del_blocker - remove a blocking error from migration and free it. + * + * @reasonp - address of the error blocking migration * - * @reason - the error blocking migration + * This function frees *@reasonp and sets it to NULL. */ -void migrate_del_blocker(Error *reason); +void migrate_del_blocker(Error **reasonp); #endif diff --git a/include/migration/misc.h b/include/migration/misc.h index 7dcc0b5c2c..673ac490fb 100644 --- a/include/migration/misc.h +++ b/include/migration/misc.h @@ -60,8 +60,10 @@ void migration_object_init(void); void migration_shutdown(void); bool migration_is_idle(void); bool migration_is_active(MigrationState *); -void add_migration_state_change_notifier(Notifier *notify); -void remove_migration_state_change_notifier(Notifier *notify); +void migration_add_notifier(Notifier *notify, + void (*func)(Notifier *notifier, void *data)); +void migration_remove_notifier(Notifier *notify); +void migration_call_notifiers(MigrationState *s); bool migration_in_setup(MigrationState *); bool migration_has_finished(MigrationState *); bool migration_has_failed(MigrationState *); diff --git a/migration/migration.c b/migration/migration.c index 05c0b801ba..67547eb6a1 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1207,7 +1207,7 @@ static void migrate_fd_cleanup(MigrationState *s) /* It is used on info migrate. We can't free it */ error_report_err(error_copy(s->error)); } - notifier_list_notify(&migration_state_notifiers, s); + migration_call_notifiers(s); block_cleanup_parameters(); yank_unregister_instance(MIGRATION_YANK_INSTANCE); } @@ -1311,14 +1311,24 @@ static void migrate_fd_cancel(MigrationState *s) } } -void add_migration_state_change_notifier(Notifier *notify) +void migration_add_notifier(Notifier *notify, + void (*func)(Notifier *notifier, void *data)) { + notify->notify = func; notifier_list_add(&migration_state_notifiers, notify); } -void remove_migration_state_change_notifier(Notifier *notify) +void migration_remove_notifier(Notifier *notify) { - notifier_remove(notify); + if (notify->notify) { + notifier_remove(notify); + notify->notify = NULL; + } +} + +void migration_call_notifiers(MigrationState *s) +{ + notifier_list_notify(&migration_state_notifiers, s); } bool migration_in_setup(MigrationState *s) @@ -1465,35 +1475,41 @@ int migrate_init(MigrationState *s, Error **errp) return 0; } -int migrate_add_blocker_internal(Error *reason, Error **errp) +int migrate_add_blocker_internal(Error **reasonp, Error **errp) { /* Snapshots are similar to migrations, so check RUN_STATE_SAVE_VM too. */ if (runstate_check(RUN_STATE_SAVE_VM) || !migration_is_idle()) { - error_propagate_prepend(errp, error_copy(reason), + error_propagate_prepend(errp, *reasonp, "disallowing migration blocker " "(migration/snapshot in progress) for: "); + *reasonp = NULL; return -EBUSY; } - migration_blockers = g_slist_prepend(migration_blockers, reason); + migration_blockers = g_slist_prepend(migration_blockers, *reasonp); return 0; } -int migrate_add_blocker(Error *reason, Error **errp) +int migrate_add_blocker(Error **reasonp, Error **errp) { if (only_migratable) { - error_propagate_prepend(errp, error_copy(reason), + error_propagate_prepend(errp, *reasonp, "disallowing migration blocker " "(--only-migratable) for: "); + *reasonp = NULL; return -EACCES; } - return migrate_add_blocker_internal(reason, errp); + return migrate_add_blocker_internal(reasonp, errp); } -void migrate_del_blocker(Error *reason) +void migrate_del_blocker(Error **reasonp) { - migration_blockers = g_slist_remove(migration_blockers, reason); + if (*reasonp) { + migration_blockers = g_slist_remove(migration_blockers, *reasonp); + error_free(*reasonp); + *reasonp = NULL; + } } void qmp_migrate_incoming(const char *uri, Error **errp) @@ -2227,7 +2243,7 @@ static int postcopy_start(MigrationState *ms, Error **errp) * spice needs to trigger a transition now */ ms->postcopy_after_devices = true; - notifier_list_notify(&migration_state_notifiers, ms); + migration_call_notifiers(ms); ms->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop; @@ -3307,7 +3323,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) rate_limit = migrate_max_bandwidth(); /* Notify before starting migration thread */ - notifier_list_notify(&migration_state_notifiers, s); + migration_call_notifiers(s); } migration_rate_set(rate_limit); diff --git a/migration/multifd.c b/migration/multifd.c index 1fe53d3b98..e2a45c667a 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -743,9 +743,6 @@ static void *multifd_send_thread(void *opaque) if (flags & MULTIFD_FLAG_SYNC) { qemu_sem_post(&p->sem_sync); } - } else if (p->quit) { - qemu_mutex_unlock(&p->mutex); - break; } else { qemu_mutex_unlock(&p->mutex); /* sometimes there are spurious wakeups */ diff --git a/migration/ram.c b/migration/ram.c index 16c30a9d7a..92769902bb 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3873,6 +3873,7 @@ static int parse_ramblock(QEMUFile *f, RAMBlock *block, ram_addr_t length) ret = qemu_ram_resize(block, length, &local_err); if (local_err) { error_report_err(local_err); + return ret; } } /* For postcopy we need to check hugepage sizes match */ @@ -3883,7 +3884,7 @@ static int parse_ramblock(QEMUFile *f, RAMBlock *block, ram_addr_t length) error_report("Mismatched RAM page size %s " "(local) %zd != %" PRId64, block->idstr, block->page_size, remote_page_size); - ret = -EINVAL; + return -EINVAL; } } if (migrate_ignore_shared()) { @@ -3893,7 +3894,7 @@ static int parse_ramblock(QEMUFile *f, RAMBlock *block, ram_addr_t length) error_report("Mismatched GPAs for block %s " "%" PRId64 "!= %" PRId64, block->idstr, (uint64_t)addr, (uint64_t)block->mr->addr); - ret = -EINVAL; + return -EINVAL; } } ret = rdma_block_notification_handle(f, block->idstr); diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 939c984d5b..0f2e6fc58e 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -339,7 +339,8 @@ static void vhost_vdpa_net_data_start_first(VhostVDPAState *s) { struct vhost_vdpa *v = &s->vhost_vdpa; - add_migration_state_change_notifier(&s->migration_state); + migration_add_notifier(&s->migration_state, + vdpa_net_migration_state_notifier); if (v->shadow_vqs_enabled) { v->iova_tree = vhost_iova_tree_new(v->iova_range.first, v->iova_range.last); @@ -399,7 +400,7 @@ static void vhost_vdpa_net_client_stop(NetClientState *nc) assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); if (s->vhost_vdpa.index == 0) { - remove_migration_state_change_notifier(&s->migration_state); + migration_remove_notifier(&s->migration_state); } dev = s->vhost_vdpa.dev; @@ -1456,7 +1457,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, s->vhost_vdpa.device_fd = vdpa_device_fd; s->vhost_vdpa.index = queue_pair_index; s->always_svq = svq; - s->migration_state.notify = vdpa_net_migration_state_notifier; + s->migration_state.notify = NULL; s->vhost_vdpa.shadow_vqs_enabled = svq; s->vhost_vdpa.iova_range = iova_range; s->vhost_vdpa.shadow_data = svq; diff --git a/pc-bios/hppa-firmware.img b/pc-bios/hppa-firmware.img index c7196143b1..e976c0cc93 100644 --- a/pc-bios/hppa-firmware.img +++ b/pc-bios/hppa-firmware.img Binary files differdiff --git a/qapi/compat.json b/qapi/compat.json index f4c19837eb..42034d9368 100644 --- a/qapi/compat.json +++ b/qapi/compat.json @@ -43,8 +43,8 @@ # This is intended for testing users of the management interfaces. # # Limitation: covers only syntactic aspects of QMP, i.e. stuff tagged -# with feature 'deprecated'. We may want to extend it to cover -# semantic aspects and CLI. +# with feature 'deprecated' or 'unstable'. We may want to extend it +# to cover semantic aspects and CLI. # # Limitation: deprecated-output policy @hide is not implemented for # enumeration values. They behave the same as with policy @accept. diff --git a/qapi/machine-common.json b/qapi/machine-common.json new file mode 100644 index 0000000000..fa6bd71d12 --- /dev/null +++ b/qapi/machine-common.json @@ -0,0 +1,21 @@ +# -*- Mode: Python -*- +# vim: filetype=python +# +# This work is licensed under the terms of the GNU GPL, version 2 or later. +# See the COPYING file in the top-level directory. + +## +# = Machines S390 data types +## + +## +# @CpuS390Entitlement: +# +# An enumeration of CPU entitlements that can be assumed by a virtual +# S390 CPU +# +# Since: 8.2 +## +{ 'enum': 'CpuS390Entitlement', + 'prefix': 'S390_CPU_ENTITLEMENT', + 'data': [ 'auto', 'low', 'medium', 'high' ] } diff --git a/qapi/machine-target.json b/qapi/machine-target.json index f0a6b72414..4e55adbe00 100644 --- a/qapi/machine-target.json +++ b/qapi/machine-target.json @@ -4,6 +4,8 @@ # This work is licensed under the terms of the GNU GPL, version 2 or later. # See the COPYING file in the top-level directory. +{ 'include': 'machine-common.json' } + ## # @CpuModelInfo: # @@ -361,3 +363,122 @@ 'TARGET_MIPS', 'TARGET_LOONGARCH64', 'TARGET_RISCV' ] } } + +## +# @CpuS390Polarization: +# +# An enumeration of CPU polarization that can be assumed by a virtual +# S390 CPU +# +# Since: 8.2 +## +{ 'enum': 'CpuS390Polarization', + 'prefix': 'S390_CPU_POLARIZATION', + 'data': [ 'horizontal', 'vertical' ], + 'if': 'TARGET_S390X' +} + +## +# @set-cpu-topology: +# +# Modify the topology by moving the CPU inside the topology tree, +# or by changing a modifier attribute of a CPU. +# Absent values will not be modified. +# +# @core-id: the vCPU ID to be moved +# +# @socket-id: destination socket to move the vCPU to +# +# @book-id: destination book to move the vCPU to +# +# @drawer-id: destination drawer to move the vCPU to +# +# @entitlement: entitlement to set +# +# @dedicated: whether the provisioning of real to virtual CPU is dedicated +# +# Features: +# +# @unstable: This command is experimental. +# +# Returns: Nothing on success. +# +# Since: 8.2 +## +{ 'command': 'set-cpu-topology', + 'data': { + 'core-id': 'uint16', + '*socket-id': 'uint16', + '*book-id': 'uint16', + '*drawer-id': 'uint16', + '*entitlement': 'CpuS390Entitlement', + '*dedicated': 'bool' + }, + 'features': [ 'unstable' ], + 'if': { 'all': [ 'TARGET_S390X' , 'CONFIG_KVM' ] } +} + +## +# @CPU_POLARIZATION_CHANGE: +# +# Emitted when the guest asks to change the polarization. +# +# The guest can tell the host (via the PTF instruction) whether the +# CPUs should be provisioned using horizontal or vertical polarization. +# +# On horizontal polarization the host is expected to provision all vCPUs +# equally. +# +# On vertical polarization the host can provision each vCPU differently. +# The guest will get information on the details of the provisioning +# the next time it uses the STSI(15) instruction. +# +# @polarization: polarization specified by the guest +# +# Features: +# +# @unstable: This event is experimental. +# +# Since: 8.2 +# +# Example: +# +# <- { "event": "CPU_POLARIZATION_CHANGE", +# "data": { "polarization": "horizontal" }, +# "timestamp": { "seconds": 1401385907, "microseconds": 422329 } } +## +{ 'event': 'CPU_POLARIZATION_CHANGE', + 'data': { 'polarization': 'CpuS390Polarization' }, + 'features': [ 'unstable' ], + 'if': { 'all': [ 'TARGET_S390X', 'CONFIG_KVM' ] } +} + +## +# @CpuPolarizationInfo: +# +# The result of a CPU polarization query. +# +# @polarization: the CPU polarization +# +# Since: 8.2 +## +{ 'struct': 'CpuPolarizationInfo', + 'data': { 'polarization': 'CpuS390Polarization' }, + 'if': { 'all': [ 'TARGET_S390X', 'CONFIG_KVM' ] } +} + +## +# @query-s390x-cpu-polarization: +# +# Features: +# +# @unstable: This command is experimental. +# +# Returns: the machine's CPU polarization +# +# Since: 8.2 +## +{ 'command': 'query-s390x-cpu-polarization', 'returns': 'CpuPolarizationInfo', + 'features': [ 'unstable' ], + 'if': { 'all': [ 'TARGET_S390X', 'CONFIG_KVM' ] } +} diff --git a/qapi/machine.json b/qapi/machine.json index a08b6576ca..6c9d2f6dcf 100644 --- a/qapi/machine.json +++ b/qapi/machine.json @@ -9,6 +9,7 @@ ## { 'include': 'common.json' } +{ 'include': 'machine-common.json' } ## # @SysEmuTarget: @@ -56,9 +57,16 @@ # # @cpu-state: the virtual CPU's state # +# @dedicated: the virtual CPU's dedication (since 8.2) +# +# @entitlement: the virtual CPU's entitlement (since 8.2) +# # Since: 2.12 ## -{ 'struct': 'CpuInfoS390', 'data': { 'cpu-state': 'CpuS390State' } } +{ 'struct': 'CpuInfoS390', + 'data': { 'cpu-state': 'CpuS390State', + '*dedicated': 'bool', + '*entitlement': 'CpuS390Entitlement' } } ## # @CpuInfoFast: @@ -71,8 +79,7 @@ # # @thread-id: ID of the underlying host thread # -# @props: properties describing to which node/socket/core/thread -# virtual CPU belongs to, provided if supported by board +# @props: properties associated with a virtual CPU, e.g. the socket id # # @target: the QEMU system emulation target, which determines which # additional fields will be listed (since 3.0) @@ -899,29 +906,46 @@ # should be passed by management with device_add command when a CPU is # being hotplugged. # +# Which members are optional and which mandatory depends on the +# architecture and board. +# +# For s390x see :ref:`cpu-topology-s390x`. +# +# The ids other than the node-id specify the position of the CPU +# within the CPU topology (as defined by the machine property "smp", +# thus see also type @SMPConfiguration) +# # @node-id: NUMA node ID the CPU belongs to # -# @socket-id: socket number within node/board the CPU belongs to +# @drawer-id: drawer number within CPU topology the CPU belongs to +# (since 8.2) # -# @die-id: die number within socket the CPU belongs to (since 4.1) +# @book-id: book number within parent container the CPU belongs to +# (since 8.2) # -# @cluster-id: cluster number within die the CPU belongs to (since -# 7.1) +# @socket-id: socket number within parent container the CPU belongs to # -# @core-id: core number within cluster the CPU belongs to +# @die-id: die number within the parent container the CPU belongs to +# (since 4.1) # -# @thread-id: thread number within core the CPU belongs to +# @cluster-id: cluster number within the parent container the CPU +# belongs to (since 7.1) # -# Note: currently there are 6 properties that could be present but -# management should be prepared to pass through other properties -# with device_add command to allow for future interface extension. -# This also requires the filed names to be kept in sync with the -# properties passed to -device/device_add. +# @core-id: core number within the parent container the CPU +# belongs to +# +# @thread-id: thread number within the core the CPU belongs to +# +# Note: management should be prepared to pass through additional +# properties with device_add. # # Since: 2.7 ## { 'struct': 'CpuInstanceProperties', + # Keep these in sync with the properties device_add accepts 'data': { '*node-id': 'int', + '*drawer-id': 'int', + '*book-id': 'int', '*socket-id': 'int', '*die-id': 'int', '*cluster-id': 'int', @@ -1478,26 +1502,43 @@ # Schema for CPU topology configuration. A missing value lets QEMU # figure out a suitable value based on the ones that are provided. # -# @cpus: number of virtual CPUs in the virtual machine -# -# @sockets: number of sockets in the CPU topology +# The members other than @cpus and @maxcpus define a topology of +# containers. # -# @dies: number of dies per socket in the CPU topology +# The ordering from highest/coarsest to lowest/finest is: +# @drawers, @books, @sockets, @dies, @clusters, @cores, @threads. # -# @clusters: number of clusters per die in the CPU topology (since -# 7.0) +# Different architectures support different subsets of topology +# containers. # -# @cores: number of cores per cluster in the CPU topology +# For example, s390x does not have clusters and dies, and the socket +# is the parent container of cores. # -# @threads: number of threads per core in the CPU topology +# @cpus: number of virtual CPUs in the virtual machine # # @maxcpus: maximum number of hotpluggable virtual CPUs in the virtual # machine # +# @drawers: number of drawers in the CPU topology (since 8.2) +# +# @books: number of books in the CPU topology (since 8.2) +# +# @sockets: number of sockets per parent container +# +# @dies: number of dies per parent container +# +# @clusters: number of clusters per parent container (since 7.0) +# +# @cores: number of cores per parent container +# +# @threads: number of threads per core +# # Since: 6.1 ## { 'struct': 'SMPConfiguration', 'data': { '*cpus': 'int', + '*drawers': 'int', + '*books': 'int', '*sockets': 'int', '*dies': 'int', '*clusters': 'int', diff --git a/qapi/meson.build b/qapi/meson.build index 60a668b343..f81a37565c 100644 --- a/qapi/meson.build +++ b/qapi/meson.build @@ -36,6 +36,7 @@ qapi_all_modules = [ 'error', 'introspect', 'job', + 'machine-common', 'machine', 'machine-target', 'migration', diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json index 6594afba31..c01ec335e6 100644 --- a/qapi/qapi-schema.json +++ b/qapi/qapi-schema.json @@ -66,6 +66,7 @@ { 'include': 'introspect.json' } { 'include': 'qom.json' } { 'include': 'qdev.json' } +{ 'include': 'machine-common.json' } { 'include': 'machine.json' } { 'include': 'machine-target.json' } { 'include': 'replay.json' } diff --git a/qemu-options.hx b/qemu-options.hx index 54a7e94970..e26230bac5 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -272,11 +272,14 @@ SRST ERST DEF("smp", HAS_ARG, QEMU_OPTION_smp, - "-smp [[cpus=]n][,maxcpus=maxcpus][,sockets=sockets][,dies=dies][,clusters=clusters][,cores=cores][,threads=threads]\n" + "-smp [[cpus=]n][,maxcpus=maxcpus][,drawers=drawers][,books=books][,sockets=sockets]\n" + " [,dies=dies][,clusters=clusters][,cores=cores][,threads=threads]\n" " set the number of initial CPUs to 'n' [default=1]\n" " maxcpus= maximum number of total CPUs, including\n" " offline CPUs for hotplug, etc\n" - " sockets= number of sockets on the machine board\n" + " drawers= number of drawers on the machine board\n" + " books= number of books in one drawer\n" + " sockets= number of sockets in one book\n" " dies= number of dies in one socket\n" " clusters= number of clusters in one die\n" " cores= number of cores in one cluster\n" diff --git a/roms/seabios-hppa b/roms/seabios-hppa -Subproject 763e3b73499db5fef94087bd310bfc8ccbcf785 +Subproject fd5b6cf82369a1e53d68302fb6ede2b9e2afccd diff --git a/scripts/qapi/gen.py b/scripts/qapi/gen.py index bf5716b5f3..5412716617 100644 --- a/scripts/qapi/gen.py +++ b/scripts/qapi/gen.py @@ -13,8 +13,8 @@ from contextlib import contextmanager import os -import sys import re +import sys from typing import ( Dict, Iterator, diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py index 22e7bcc4b1..bf31018aef 100644 --- a/scripts/qapi/parser.py +++ b/scripts/qapi/parser.py @@ -22,6 +22,7 @@ from typing import ( Dict, List, Mapping, + Match, Optional, Set, Union, @@ -563,11 +564,11 @@ class QAPIDoc: self._switch_section(QAPIDoc.NullSection(self._parser)) @staticmethod - def _match_at_name_colon(string: str): + def _match_at_name_colon(string: str) -> Optional[Match[str]]: return re.match(r'@([^:]*): *', string) @staticmethod - def _match_section_tag(string: str): + def _match_section_tag(string: str) -> Optional[Match[str]]: return re.match(r'(Returns|Since|Notes?|Examples?|TODO): *', string) def _append_body_line(self, line: str) -> None: diff --git a/scripts/qapi/schema.py b/scripts/qapi/schema.py index 231ebf61ba..d739e558e9 100644 --- a/scripts/qapi/schema.py +++ b/scripts/qapi/schema.py @@ -73,6 +73,11 @@ class QAPISchemaEntity: self.features = features or [] self._checked = False + def __repr__(self): + if self.name is None: + return "<%s at 0x%x>" % (type(self).__name__, id(self)) + return "<%s:%s at 0x%x>" % type(self).__name__, self.name, id(self) + def c_name(self): return c_name(self.name) diff --git a/stubs/migr-blocker.c b/stubs/migr-blocker.c index 5676a2f93c..17a5dbf87b 100644 --- a/stubs/migr-blocker.c +++ b/stubs/migr-blocker.c @@ -1,11 +1,11 @@ #include "qemu/osdep.h" #include "migration/blocker.h" -int migrate_add_blocker(Error *reason, Error **errp) +int migrate_add_blocker(Error **reasonp, Error **errp) { return 0; } -void migrate_del_blocker(Error *reason) +void migrate_del_blocker(Error **reasonp) { } diff --git a/system/vl.c b/system/vl.c index 3100ac01ed..92d29bf521 100644 --- a/system/vl.c +++ b/system/vl.c @@ -727,6 +727,12 @@ static QemuOptsList qemu_smp_opts = { .name = "cpus", .type = QEMU_OPT_NUMBER, }, { + .name = "drawers", + .type = QEMU_OPT_NUMBER, + }, { + .name = "books", + .type = QEMU_OPT_NUMBER, + }, { .name = "sockets", .type = QEMU_OPT_NUMBER, }, { diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c index 326a03153d..c078849403 100644 --- a/target/arm/arm-powerctl.c +++ b/target/arm/arm-powerctl.c @@ -65,60 +65,9 @@ static void arm_set_cpu_on_async_work(CPUState *target_cpu_state, /* Initialize the cpu we are turning on */ cpu_reset(target_cpu_state); + arm_emulate_firmware_reset(target_cpu_state, info->target_el); target_cpu_state->halted = 0; - if (info->target_aa64) { - if ((info->target_el < 3) && arm_feature(&target_cpu->env, - ARM_FEATURE_EL3)) { - /* - * As target mode is AArch64, we need to set lower - * exception level (the requested level 2) to AArch64 - */ - target_cpu->env.cp15.scr_el3 |= SCR_RW; - } - - if ((info->target_el < 2) && arm_feature(&target_cpu->env, - ARM_FEATURE_EL2)) { - /* - * As target mode is AArch64, we need to set lower - * exception level (the requested level 1) to AArch64 - */ - target_cpu->env.cp15.hcr_el2 |= HCR_RW; - } - - target_cpu->env.pstate = aarch64_pstate_mode(info->target_el, true); - } else { - /* We are requested to boot in AArch32 mode */ - static const uint32_t mode_for_el[] = { 0, - ARM_CPU_MODE_SVC, - ARM_CPU_MODE_HYP, - ARM_CPU_MODE_SVC }; - - cpsr_write(&target_cpu->env, mode_for_el[info->target_el], CPSR_M, - CPSRWriteRaw); - } - - if (info->target_el == 3) { - /* Processor is in secure mode */ - target_cpu->env.cp15.scr_el3 &= ~SCR_NS; - } else { - /* Processor is not in secure mode */ - target_cpu->env.cp15.scr_el3 |= SCR_NS; - - /* Set NSACR.{CP11,CP10} so NS can access the FPU */ - target_cpu->env.cp15.nsacr |= 3 << 10; - - /* - * If QEMU is providing the equivalent of EL3 firmware, then we need - * to make sure a CPU targeting EL2 comes out of reset with a - * functional HVC insn. - */ - if (arm_feature(&target_cpu->env, ARM_FEATURE_EL3) - && info->target_el == 2) { - target_cpu->env.cp15.scr_el3 |= SCR_HCE; - } - } - /* We check if the started CPU is now at the correct level */ assert(info->target_el == arm_current_el(&target_cpu->env)); diff --git a/target/arm/common-semi-target.h b/target/arm/common-semi-target.h index 19438ed8cd..da51f2d7f5 100644 --- a/target/arm/common-semi-target.h +++ b/target/arm/common-semi-target.h @@ -10,9 +10,7 @@ #ifndef TARGET_ARM_COMMON_SEMI_TARGET_H #define TARGET_ARM_COMMON_SEMI_TARGET_H -#ifndef CONFIG_USER_ONLY -#include "hw/arm/boot.h" -#endif +#include "target/arm/cpu-qom.h" static inline target_ulong common_semi_arg(CPUState *cs, int argno) { diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h index 514c22ced9..d06c08a734 100644 --- a/target/arm/cpu-qom.h +++ b/target/arm/cpu-qom.h @@ -23,8 +23,6 @@ #include "hw/core/cpu.h" #include "qom/object.h" -struct arm_boot_info; - #define TYPE_ARM_CPU "arm-cpu" OBJECT_DECLARE_CPU_TYPE(ARMCPU, ARMCPUClass, ARM_CPU) diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 6c6c551573..aa4e006f21 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -553,6 +553,101 @@ static void arm_cpu_reset_hold(Object *obj) } } +void arm_emulate_firmware_reset(CPUState *cpustate, int target_el) +{ + ARMCPU *cpu = ARM_CPU(cpustate); + CPUARMState *env = &cpu->env; + bool have_el3 = arm_feature(env, ARM_FEATURE_EL3); + bool have_el2 = arm_feature(env, ARM_FEATURE_EL2); + + /* + * Check we have the EL we're aiming for. If that is the + * highest implemented EL, then cpu_reset has already done + * all the work. + */ + switch (target_el) { + case 3: + assert(have_el3); + return; + case 2: + assert(have_el2); + if (!have_el3) { + return; + } + break; + case 1: + if (!have_el3 && !have_el2) { + return; + } + break; + default: + g_assert_not_reached(); + } + + if (have_el3) { + /* + * Set the EL3 state so code can run at EL2. This should match + * the requirements set by Linux in its booting spec. + */ + if (env->aarch64) { + env->cp15.scr_el3 |= SCR_RW; + if (cpu_isar_feature(aa64_pauth, cpu)) { + env->cp15.scr_el3 |= SCR_API | SCR_APK; + } + if (cpu_isar_feature(aa64_mte, cpu)) { + env->cp15.scr_el3 |= SCR_ATA; + } + if (cpu_isar_feature(aa64_sve, cpu)) { + env->cp15.cptr_el[3] |= R_CPTR_EL3_EZ_MASK; + env->vfp.zcr_el[3] = 0xf; + } + if (cpu_isar_feature(aa64_sme, cpu)) { + env->cp15.cptr_el[3] |= R_CPTR_EL3_ESM_MASK; + env->cp15.scr_el3 |= SCR_ENTP2; + env->vfp.smcr_el[3] = 0xf; + } + if (cpu_isar_feature(aa64_hcx, cpu)) { + env->cp15.scr_el3 |= SCR_HXEN; + } + if (cpu_isar_feature(aa64_fgt, cpu)) { + env->cp15.scr_el3 |= SCR_FGTEN; + } + } + + if (target_el == 2) { + /* If the guest is at EL2 then Linux expects the HVC insn to work */ + env->cp15.scr_el3 |= SCR_HCE; + } + + /* Put CPU into non-secure state */ + env->cp15.scr_el3 |= SCR_NS; + /* Set NSACR.{CP11,CP10} so NS can access the FPU */ + env->cp15.nsacr |= 3 << 10; + } + + if (have_el2 && target_el < 2) { + /* Set EL2 state so code can run at EL1. */ + if (env->aarch64) { + env->cp15.hcr_el2 |= HCR_RW; + } + } + + /* Set the CPU to the desired state */ + if (env->aarch64) { + env->pstate = aarch64_pstate_mode(target_el, true); + } else { + static const uint32_t mode_for_el[] = { + 0, + ARM_CPU_MODE_SVC, + ARM_CPU_MODE_HYP, + ARM_CPU_MODE_SVC, + }; + + cpsr_write(env, mode_for_el[target_el], CPSR_M, CPSRWriteRaw); + } +} + + #if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx, diff --git a/target/arm/cpu.h b/target/arm/cpu.h index a9edfb8353..76d4cef9e3 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -1149,6 +1149,28 @@ int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, int cpuid, DumpState *s); +/** + * arm_emulate_firmware_reset: Emulate firmware CPU reset handling + * @cpu: CPU (which must have been freshly reset) + * @target_el: exception level to put the CPU into + * @secure: whether to put the CPU in secure state + * + * When QEMU is directly running a guest kernel at a lower level than + * EL3 it implicitly emulates some aspects of the guest firmware. + * This includes that on reset we need to configure the parts of the + * CPU corresponding to EL3 so that the real guest code can run at its + * lower exception level. This function does that post-reset CPU setup, + * for when we do direct boot of a guest kernel, and for when we + * emulate PSCI and similar firmware interfaces starting a CPU at a + * lower exception level. + * + * @target_el must be an EL implemented by the CPU between 1 and 3. + * We do not support dropping into a Secure EL other than 3. + * + * It is the responsibility of the caller to call arm_rebuild_hflags(). + */ +void arm_emulate_firmware_reset(CPUState *cpustate, int target_el); + #ifdef TARGET_AARCH64 int aarch64_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); int aarch64_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); diff --git a/target/arm/helper.c b/target/arm/helper.c index 74fbb6e1d7..b29edb26af 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -1283,7 +1283,7 @@ static bool pmevcntr_is_64_bit(CPUARMState *env, int counter) bool hlp = env->cp15.mdcr_el2 & MDCR_HLP; int hpmn = env->cp15.mdcr_el2 & MDCR_HPMN; - if (hpmn != 0 && counter >= hpmn) { + if (counter >= hpmn) { return hlp; } } @@ -2475,22 +2475,7 @@ static CPAccessResult gt_counter_access(CPUARMState *env, int timeridx, if (!extract32(env->cp15.c14_cntkctl, timeridx, 1)) { return CP_ACCESS_TRAP; } - - /* If HCR_EL2.<E2H,TGE> == '10': check CNTHCTL_EL2.EL1PCTEN. */ - if (hcr & HCR_E2H) { - if (timeridx == GTIMER_PHYS && - !extract32(env->cp15.cnthctl_el2, 10, 1)) { - return CP_ACCESS_TRAP_EL2; - } - } else { - /* If HCR_EL2.<E2H> == 0: check CNTHCTL_EL2.EL1PCEN. */ - if (has_el2 && timeridx == GTIMER_PHYS && - !extract32(env->cp15.cnthctl_el2, 1, 1)) { - return CP_ACCESS_TRAP_EL2; - } - } - break; - + /* fall through */ case 1: /* Check CNTHCTL_EL2.EL1PCTEN, which changes location based on E2H. */ if (has_el2 && timeridx == GTIMER_PHYS && diff --git a/target/arm/kvm.c b/target/arm/kvm.c index b66b936a95..7903e2ddde 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -553,24 +553,19 @@ bool write_kvmstate_to_list(ARMCPU *cpu) bool ok = true; for (i = 0; i < cpu->cpreg_array_len; i++) { - struct kvm_one_reg r; uint64_t regidx = cpu->cpreg_indexes[i]; uint32_t v32; int ret; - r.id = regidx; - switch (regidx & KVM_REG_SIZE_MASK) { case KVM_REG_SIZE_U32: - r.addr = (uintptr_t)&v32; - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r); + ret = kvm_get_one_reg(cs, regidx, &v32); if (!ret) { cpu->cpreg_values[i] = v32; } break; case KVM_REG_SIZE_U64: - r.addr = (uintptr_t)(cpu->cpreg_values + i); - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r); + ret = kvm_get_one_reg(cs, regidx, cpu->cpreg_values + i); break; default: g_assert_not_reached(); @@ -589,7 +584,6 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level) bool ok = true; for (i = 0; i < cpu->cpreg_array_len; i++) { - struct kvm_one_reg r; uint64_t regidx = cpu->cpreg_indexes[i]; uint32_t v32; int ret; @@ -598,19 +592,17 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level) continue; } - r.id = regidx; switch (regidx & KVM_REG_SIZE_MASK) { case KVM_REG_SIZE_U32: v32 = cpu->cpreg_values[i]; - r.addr = (uintptr_t)&v32; + ret = kvm_set_one_reg(cs, regidx, &v32); break; case KVM_REG_SIZE_U64: - r.addr = (uintptr_t)(cpu->cpreg_values + i); + ret = kvm_set_one_reg(cs, regidx, cpu->cpreg_values + i); break; default: g_assert_not_reached(); } - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r); if (ret) { /* We might fail for "unknown register" and also for * "you tried to set a register which is constant with @@ -709,17 +701,13 @@ int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu) void kvm_arm_get_virtual_time(CPUState *cs) { ARMCPU *cpu = ARM_CPU(cs); - struct kvm_one_reg reg = { - .id = KVM_REG_ARM_TIMER_CNT, - .addr = (uintptr_t)&cpu->kvm_vtime, - }; int ret; if (cpu->kvm_vtime_dirty) { return; } - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, KVM_REG_ARM_TIMER_CNT, &cpu->kvm_vtime); if (ret) { error_report("Failed to get KVM_REG_ARM_TIMER_CNT"); abort(); @@ -731,17 +719,13 @@ void kvm_arm_get_virtual_time(CPUState *cs) void kvm_arm_put_virtual_time(CPUState *cs) { ARMCPU *cpu = ARM_CPU(cs); - struct kvm_one_reg reg = { - .id = KVM_REG_ARM_TIMER_CNT, - .addr = (uintptr_t)&cpu->kvm_vtime, - }; int ret; if (!cpu->kvm_vtime_dirty) { return; } - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + ret = kvm_set_one_reg(cs, KVM_REG_ARM_TIMER_CNT, &cpu->kvm_vtime); if (ret) { error_report("Failed to set KVM_REG_ARM_TIMER_CNT"); abort(); diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c index 5e95c496bb..4bb68646e4 100644 --- a/target/arm/kvm64.c +++ b/target/arm/kvm64.c @@ -30,7 +30,6 @@ #include "internals.h" #include "hw/acpi/acpi.h" #include "hw/acpi/ghes.h" -#include "hw/arm/virt.h" static bool have_guest_debug; @@ -540,14 +539,10 @@ static int kvm_arm_sve_set_vls(CPUState *cs) { ARMCPU *cpu = ARM_CPU(cs); uint64_t vls[KVM_ARM64_SVE_VLS_WORDS] = { cpu->sve_vq.map }; - struct kvm_one_reg reg = { - .id = KVM_REG_ARM64_SVE_VLS, - .addr = (uint64_t)&vls[0], - }; assert(cpu->sve_max_vq <= KVM_ARM64_SVE_VQ_MAX); - return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + return kvm_set_one_reg(cs, KVM_REG_ARM64_SVE_VLS, &vls[0]); } #define ARM_CPU_ID_MPIDR 3, 0, 0, 0, 5 @@ -726,19 +721,17 @@ static void kvm_inject_arm_sea(CPUState *c) static int kvm_arch_put_fpsimd(CPUState *cs) { CPUARMState *env = &ARM_CPU(cs)->env; - struct kvm_one_reg reg; int i, ret; for (i = 0; i < 32; i++) { uint64_t *q = aa64_vfp_qreg(env, i); #if HOST_BIG_ENDIAN uint64_t fp_val[2] = { q[1], q[0] }; - reg.addr = (uintptr_t)fp_val; + ret = kvm_set_one_reg(cs, AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]), + fp_val); #else - reg.addr = (uintptr_t)q; + ret = kvm_set_one_reg(cs, AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]), q); #endif - reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]); - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); if (ret) { return ret; } @@ -759,14 +752,11 @@ static int kvm_arch_put_sve(CPUState *cs) CPUARMState *env = &cpu->env; uint64_t tmp[ARM_MAX_VQ * 2]; uint64_t *r; - struct kvm_one_reg reg; int n, ret; for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) { r = sve_bswap64(tmp, &env->vfp.zregs[n].d[0], cpu->sve_max_vq * 2); - reg.addr = (uintptr_t)r; - reg.id = KVM_REG_ARM64_SVE_ZREG(n, 0); - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + ret = kvm_set_one_reg(cs, KVM_REG_ARM64_SVE_ZREG(n, 0), r); if (ret) { return ret; } @@ -775,9 +765,7 @@ static int kvm_arch_put_sve(CPUState *cs) for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) { r = sve_bswap64(tmp, r = &env->vfp.pregs[n].p[0], DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); - reg.addr = (uintptr_t)r; - reg.id = KVM_REG_ARM64_SVE_PREG(n, 0); - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + ret = kvm_set_one_reg(cs, KVM_REG_ARM64_SVE_PREG(n, 0), r); if (ret) { return ret; } @@ -785,9 +773,7 @@ static int kvm_arch_put_sve(CPUState *cs) r = sve_bswap64(tmp, &env->vfp.pregs[FFR_PRED_NUM].p[0], DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); - reg.addr = (uintptr_t)r; - reg.id = KVM_REG_ARM64_SVE_FFR(0); - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + ret = kvm_set_one_reg(cs, KVM_REG_ARM64_SVE_FFR(0), r); if (ret) { return ret; } @@ -797,7 +783,6 @@ static int kvm_arch_put_sve(CPUState *cs) int kvm_arch_put_registers(CPUState *cs, int level) { - struct kvm_one_reg reg; uint64_t val; uint32_t fpr; int i, ret; @@ -814,9 +799,8 @@ int kvm_arch_put_registers(CPUState *cs, int level) } for (i = 0; i < 31; i++) { - reg.id = AARCH64_CORE_REG(regs.regs[i]); - reg.addr = (uintptr_t) &env->xregs[i]; - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.regs[i]), + &env->xregs[i]); if (ret) { return ret; } @@ -827,16 +811,12 @@ int kvm_arch_put_registers(CPUState *cs, int level) */ aarch64_save_sp(env, 1); - reg.id = AARCH64_CORE_REG(regs.sp); - reg.addr = (uintptr_t) &env->sp_el[0]; - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.sp), &env->sp_el[0]); if (ret) { return ret; } - reg.id = AARCH64_CORE_REG(sp_el1); - reg.addr = (uintptr_t) &env->sp_el[1]; - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(sp_el1), &env->sp_el[1]); if (ret) { return ret; } @@ -847,23 +827,17 @@ int kvm_arch_put_registers(CPUState *cs, int level) } else { val = cpsr_read(env); } - reg.id = AARCH64_CORE_REG(regs.pstate); - reg.addr = (uintptr_t) &val; - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.pstate), &val); if (ret) { return ret; } - reg.id = AARCH64_CORE_REG(regs.pc); - reg.addr = (uintptr_t) &env->pc; - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.pc), &env->pc); if (ret) { return ret; } - reg.id = AARCH64_CORE_REG(elr_el1); - reg.addr = (uintptr_t) &env->elr_el[1]; - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(elr_el1), &env->elr_el[1]); if (ret) { return ret; } @@ -882,9 +856,8 @@ int kvm_arch_put_registers(CPUState *cs, int level) /* KVM 0-4 map to QEMU banks 1-5 */ for (i = 0; i < KVM_NR_SPSR; i++) { - reg.id = AARCH64_CORE_REG(spsr[i]); - reg.addr = (uintptr_t) &env->banked_spsr[i + 1]; - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(spsr[i]), + &env->banked_spsr[i + 1]); if (ret) { return ret; } @@ -899,18 +872,14 @@ int kvm_arch_put_registers(CPUState *cs, int level) return ret; } - reg.addr = (uintptr_t)(&fpr); fpr = vfp_get_fpsr(env); - reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr); - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + ret = kvm_set_one_reg(cs, AARCH64_SIMD_CTRL_REG(fp_regs.fpsr), &fpr); if (ret) { return ret; } - reg.addr = (uintptr_t)(&fpr); fpr = vfp_get_fpcr(env); - reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr); - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + ret = kvm_set_one_reg(cs, AARCH64_SIMD_CTRL_REG(fp_regs.fpcr), &fpr); if (ret) { return ret; } @@ -939,14 +908,11 @@ int kvm_arch_put_registers(CPUState *cs, int level) static int kvm_arch_get_fpsimd(CPUState *cs) { CPUARMState *env = &ARM_CPU(cs)->env; - struct kvm_one_reg reg; int i, ret; for (i = 0; i < 32; i++) { uint64_t *q = aa64_vfp_qreg(env, i); - reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]); - reg.addr = (uintptr_t)q; - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]), q); if (ret) { return ret; } else { @@ -970,15 +936,12 @@ static int kvm_arch_get_sve(CPUState *cs) { ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; - struct kvm_one_reg reg; uint64_t *r; int n, ret; for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) { r = &env->vfp.zregs[n].d[0]; - reg.addr = (uintptr_t)r; - reg.id = KVM_REG_ARM64_SVE_ZREG(n, 0); - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, KVM_REG_ARM64_SVE_ZREG(n, 0), r); if (ret) { return ret; } @@ -987,9 +950,7 @@ static int kvm_arch_get_sve(CPUState *cs) for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) { r = &env->vfp.pregs[n].p[0]; - reg.addr = (uintptr_t)r; - reg.id = KVM_REG_ARM64_SVE_PREG(n, 0); - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, KVM_REG_ARM64_SVE_PREG(n, 0), r); if (ret) { return ret; } @@ -997,9 +958,7 @@ static int kvm_arch_get_sve(CPUState *cs) } r = &env->vfp.pregs[FFR_PRED_NUM].p[0]; - reg.addr = (uintptr_t)r; - reg.id = KVM_REG_ARM64_SVE_FFR(0); - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, KVM_REG_ARM64_SVE_FFR(0), r); if (ret) { return ret; } @@ -1010,7 +969,6 @@ static int kvm_arch_get_sve(CPUState *cs) int kvm_arch_get_registers(CPUState *cs) { - struct kvm_one_reg reg; uint64_t val; unsigned int el; uint32_t fpr; @@ -1020,31 +978,24 @@ int kvm_arch_get_registers(CPUState *cs) CPUARMState *env = &cpu->env; for (i = 0; i < 31; i++) { - reg.id = AARCH64_CORE_REG(regs.regs[i]); - reg.addr = (uintptr_t) &env->xregs[i]; - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.regs[i]), + &env->xregs[i]); if (ret) { return ret; } } - reg.id = AARCH64_CORE_REG(regs.sp); - reg.addr = (uintptr_t) &env->sp_el[0]; - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.sp), &env->sp_el[0]); if (ret) { return ret; } - reg.id = AARCH64_CORE_REG(sp_el1); - reg.addr = (uintptr_t) &env->sp_el[1]; - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(sp_el1), &env->sp_el[1]); if (ret) { return ret; } - reg.id = AARCH64_CORE_REG(regs.pstate); - reg.addr = (uintptr_t) &val; - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.pstate), &val); if (ret) { return ret; } @@ -1061,9 +1012,7 @@ int kvm_arch_get_registers(CPUState *cs) */ aarch64_restore_sp(env, 1); - reg.id = AARCH64_CORE_REG(regs.pc); - reg.addr = (uintptr_t) &env->pc; - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.pc), &env->pc); if (ret) { return ret; } @@ -1077,9 +1026,7 @@ int kvm_arch_get_registers(CPUState *cs) aarch64_sync_64_to_32(env); } - reg.id = AARCH64_CORE_REG(elr_el1); - reg.addr = (uintptr_t) &env->elr_el[1]; - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(elr_el1), &env->elr_el[1]); if (ret) { return ret; } @@ -1089,9 +1036,8 @@ int kvm_arch_get_registers(CPUState *cs) * KVM SPSRs 0-4 map to QEMU banks 1-5 */ for (i = 0; i < KVM_NR_SPSR; i++) { - reg.id = AARCH64_CORE_REG(spsr[i]); - reg.addr = (uintptr_t) &env->banked_spsr[i + 1]; - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(spsr[i]), + &env->banked_spsr[i + 1]); if (ret) { return ret; } @@ -1112,17 +1058,13 @@ int kvm_arch_get_registers(CPUState *cs) return ret; } - reg.addr = (uintptr_t)(&fpr); - reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr); - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, AARCH64_SIMD_CTRL_REG(fp_regs.fpsr), &fpr); if (ret) { return ret; } vfp_set_fpsr(env, fpr); - reg.addr = (uintptr_t)(&fpr); - reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr); - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, AARCH64_SIMD_CTRL_REG(fp_regs.fpcr), &fpr); if (ret) { return ret; } diff --git a/target/arm/tcg/cpu32.c b/target/arm/tcg/cpu32.c index 1f918ff537..0d5d8e307d 100644 --- a/target/arm/tcg/cpu32.c +++ b/target/arm/tcg/cpu32.c @@ -89,6 +89,10 @@ void aa32_max_features(ARMCPU *cpu) t = FIELD_DP32(t, ID_DFR0, COPSDBG, 9); /* FEAT_Debugv8p4 */ t = FIELD_DP32(t, ID_DFR0, PERFMON, 6); /* FEAT_PMUv3p5 */ cpu->isar.id_dfr0 = t; + + t = cpu->isar.id_dfr1; + t = FIELD_DP32(t, ID_DFR1, HPMN0, 1); /* FEAT_HPMN0 */ + cpu->isar.id_dfr1 = t; } /* CPU models. These are not needed for the AArch64 linux-user build. */ diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c index 68928e5127..d978aa5f7a 100644 --- a/target/arm/tcg/cpu64.c +++ b/target/arm/tcg/cpu64.c @@ -1109,6 +1109,7 @@ void aarch64_max_tcg_initfn(Object *obj) t = cpu->isar.id_aa64dfr0; t = FIELD_DP64(t, ID_AA64DFR0, DEBUGVER, 9); /* FEAT_Debugv8p4 */ t = FIELD_DP64(t, ID_AA64DFR0, PMUVER, 6); /* FEAT_PMUv3p5 */ + t = FIELD_DP64(t, ID_AA64DFR0, HPMN0, 1); /* FEAT_HPMN0 */ cpu->isar.id_aa64dfr0 = t; t = cpu->isar.id_aa64smfr0; diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c index 48927fbb8c..b3660173d1 100644 --- a/target/arm/tcg/translate.c +++ b/target/arm/tcg/translate.c @@ -7882,7 +7882,7 @@ static void op_addr_block_post(DisasContext *s, arg_ldst_block *a, } } -static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n) +static bool op_stm(DisasContext *s, arg_ldst_block *a) { int i, j, n, list, mem_idx; bool user = a->u; @@ -7899,7 +7899,14 @@ static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n) list = a->list; n = ctpop16(list); - if (n < min_n || a->rn == 15) { + /* + * This is UNPREDICTABLE for n < 1 in all encodings, and we choose + * to UNDEF. In the T32 STM encoding n == 1 is also UNPREDICTABLE, + * but hardware treats it like the A32 version and implements the + * single-register-store, and some in-the-wild (buggy) software + * assumes that, so we don't UNDEF on that case. + */ + if (n < 1 || a->rn == 15) { unallocated_encoding(s); return true; } @@ -7935,8 +7942,7 @@ static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n) static bool trans_STM(DisasContext *s, arg_ldst_block *a) { - /* BitCount(list) < 1 is UNPREDICTABLE */ - return op_stm(s, a, 1); + return op_stm(s, a); } static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a) @@ -7946,11 +7952,10 @@ static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a) unallocated_encoding(s); return true; } - /* BitCount(list) < 2 is UNPREDICTABLE */ - return op_stm(s, a, 2); + return op_stm(s, a); } -static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n) +static bool do_ldm(DisasContext *s, arg_ldst_block *a) { int i, j, n, list, mem_idx; bool loaded_base; @@ -7979,7 +7984,14 @@ static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n) list = a->list; n = ctpop16(list); - if (n < min_n || a->rn == 15) { + /* + * This is UNPREDICTABLE for n < 1 in all encodings, and we choose + * to UNDEF. In the T32 LDM encoding n == 1 is also UNPREDICTABLE, + * but hardware treats it like the A32 version and implements the + * single-register-load, and some in-the-wild (buggy) software + * assumes that, so we don't UNDEF on that case. + */ + if (n < 1 || a->rn == 15) { unallocated_encoding(s); return true; } @@ -8045,8 +8057,7 @@ static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a) unallocated_encoding(s); return true; } - /* BitCount(list) < 1 is UNPREDICTABLE */ - return do_ldm(s, a, 1); + return do_ldm(s, a); } static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a) @@ -8056,16 +8067,14 @@ static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a) unallocated_encoding(s); return true; } - /* BitCount(list) < 2 is UNPREDICTABLE */ - return do_ldm(s, a, 2); + return do_ldm(s, a); } static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a) { /* Writeback is conditional on the base register not being loaded. */ a->w = !(a->list & (1 << a->rn)); - /* BitCount(list) < 1 is UNPREDICTABLE */ - return do_ldm(s, a, 1); + return do_ldm(s, a); } static bool trans_CLRM(DisasContext *s, arg_CLRM *a) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index ab72bcdfad..e7c054cc16 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -1603,7 +1603,7 @@ static int hyperv_init_vcpu(X86CPU *cpu) error_setg(&hv_passthrough_mig_blocker, "'hv-passthrough' CPU flag prevents migration, use explicit" " set of hv-* flags instead"); - ret = migrate_add_blocker(hv_passthrough_mig_blocker, &local_err); + ret = migrate_add_blocker(&hv_passthrough_mig_blocker, &local_err); if (ret < 0) { error_report_err(local_err); return ret; @@ -1617,7 +1617,7 @@ static int hyperv_init_vcpu(X86CPU *cpu) " use explicit 'hv-no-nonarch-coresharing=on' instead (but" " make sure SMT is disabled and/or that vCPUs are properly" " pinned)"); - ret = migrate_add_blocker(hv_no_nonarch_cs_mig_blocker, &local_err); + ret = migrate_add_blocker(&hv_no_nonarch_cs_mig_blocker, &local_err); if (ret < 0) { error_report_err(local_err); return ret; @@ -2213,7 +2213,7 @@ int kvm_arch_init_vcpu(CPUState *cs) error_setg(&invtsc_mig_blocker, "State blocked by non-migratable CPU device" " (invtsc flag)"); - r = migrate_add_blocker(invtsc_mig_blocker, &local_err); + r = migrate_add_blocker(&invtsc_mig_blocker, &local_err); if (r < 0) { error_report_err(local_err); return r; @@ -2271,7 +2271,7 @@ int kvm_arch_init_vcpu(CPUState *cs) return 0; fail: - migrate_del_blocker(invtsc_mig_blocker); + migrate_del_blocker(&invtsc_mig_blocker); return r; } diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c index fb769868f2..7d752bc5e0 100644 --- a/target/i386/nvmm/nvmm-all.c +++ b/target/i386/nvmm/nvmm-all.c @@ -929,9 +929,8 @@ nvmm_init_vcpu(CPUState *cpu) error_setg(&nvmm_migration_blocker, "NVMM: Migration not supported"); - if (migrate_add_blocker(nvmm_migration_blocker, &local_error) < 0) { + if (migrate_add_blocker(&nvmm_migration_blocker, &local_error) < 0) { error_report_err(local_error); - error_free(nvmm_migration_blocker); return -EINVAL; } } diff --git a/target/i386/sev.c b/target/i386/sev.c index fe2144c038..9a71246682 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -891,7 +891,7 @@ sev_launch_finish(SevGuestState *sev) /* add migration blocker */ error_setg(&sev_mig_blocker, "SEV: Migration is not implemented"); - migrate_add_blocker(sev_mig_blocker, &error_fatal); + migrate_add_blocker(&sev_mig_blocker, &error_fatal); } static void diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c index df3aba2642..d29ba916a0 100644 --- a/target/i386/whpx/whpx-all.c +++ b/target/i386/whpx/whpx-all.c @@ -2160,9 +2160,8 @@ int whpx_init_vcpu(CPUState *cpu) "State blocked due to non-migratable CPUID feature support," "dirty memory tracking support, and XSAVE/XRSTOR support"); - if (migrate_add_blocker(whpx_migration_blocker, &local_error) < 0) { + if (migrate_add_blocker(&whpx_migration_blocker, &local_error) < 0) { error_report_err(local_error); - error_free(whpx_migration_blocker); ret = -EINVAL; goto error; } diff --git a/target/s390x/cpu-sysemu.c b/target/s390x/cpu-sysemu.c index 8112561e5e..1cd30c1d84 100644 --- a/target/s390x/cpu-sysemu.c +++ b/target/s390x/cpu-sysemu.c @@ -307,3 +307,16 @@ void s390_do_cpu_set_diag318(CPUState *cs, run_on_cpu_data arg) kvm_s390_set_diag318(cs, arg.host_ulong); } } + +void s390_cpu_topology_set_changed(bool changed) +{ + int ret; + + if (kvm_enabled()) { + ret = kvm_s390_topology_set_mtcr(changed); + if (ret) { + error_report("Failed to set Modified Topology Change Report: %s", + strerror(-ret)); + } + } +} diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c index 6093ab0a12..6acfa1c91b 100644 --- a/target/s390x/cpu.c +++ b/target/s390x/cpu.c @@ -31,12 +31,14 @@ #include "qapi/qapi-types-machine.h" #include "sysemu/hw_accel.h" #include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" #include "fpu/softfloat-helpers.h" #include "disas/capstone.h" #include "sysemu/tcg.h" #ifndef CONFIG_USER_ONLY #include "sysemu/reset.h" #endif +#include "hw/s390x/cpu-topology.h" #define CR0_RESET 0xE0UL #define CR14_RESET 0xC2000000UL; @@ -145,6 +147,14 @@ static void s390_query_cpu_fast(CPUState *cpu, CpuInfoFast *value) S390CPU *s390_cpu = S390_CPU(cpu); value->u.s390x.cpu_state = s390_cpu->env.cpu_state; +#if !defined(CONFIG_USER_ONLY) + if (s390_has_topology()) { + value->u.s390x.has_dedicated = true; + value->u.s390x.dedicated = s390_cpu->env.dedicated; + value->u.s390x.has_entitlement = true; + value->u.s390x.entitlement = s390_cpu->env.entitlement; + } +#endif } /* S390CPUClass::reset() */ @@ -290,6 +300,12 @@ static const gchar *s390_gdb_arch_name(CPUState *cs) static Property s390x_cpu_properties[] = { #if !defined(CONFIG_USER_ONLY) DEFINE_PROP_UINT32("core-id", S390CPU, env.core_id, 0), + DEFINE_PROP_INT32("socket-id", S390CPU, env.socket_id, -1), + DEFINE_PROP_INT32("book-id", S390CPU, env.book_id, -1), + DEFINE_PROP_INT32("drawer-id", S390CPU, env.drawer_id, -1), + DEFINE_PROP_BOOL("dedicated", S390CPU, env.dedicated, false), + DEFINE_PROP_CPUS390ENTITLEMENT("entitlement", S390CPU, env.entitlement, + S390_CPU_ENTITLEMENT_AUTO), #endif DEFINE_PROP_END_OF_LIST() }; diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h index 7bea7075e1..40c5cedd0e 100644 --- a/target/s390x/cpu.h +++ b/target/s390x/cpu.h @@ -30,6 +30,7 @@ #include "exec/cpu-defs.h" #include "qemu/cpu-float.h" #include "tcg/tcg_s390x.h" +#include "qapi/qapi-types-machine-common.h" #define ELF_MACHINE_UNAME "S390X" @@ -132,6 +133,11 @@ struct CPUArchState { #if !defined(CONFIG_USER_ONLY) uint32_t core_id; /* PoP "CPU address", same as cpu_index */ + int32_t socket_id; + int32_t book_id; + int32_t drawer_id; + bool dedicated; + CpuS390Entitlement entitlement; /* Used only for vertical polarization */ uint64_t cpuid; #endif @@ -564,6 +570,29 @@ typedef struct SysIB_322 { } SysIB_322; QEMU_BUILD_BUG_ON(sizeof(SysIB_322) != 4096); +/* + * Topology Magnitude fields (MAG) indicates the maximum number of + * topology list entries (TLE) at the corresponding nesting level. + */ +#define S390_TOPOLOGY_MAG 6 +#define S390_TOPOLOGY_MAG6 0 +#define S390_TOPOLOGY_MAG5 1 +#define S390_TOPOLOGY_MAG4 2 +#define S390_TOPOLOGY_MAG3 3 +#define S390_TOPOLOGY_MAG2 4 +#define S390_TOPOLOGY_MAG1 5 +/* Configuration topology */ +typedef struct SysIB_151x { + uint8_t reserved0[2]; + uint16_t length; + uint8_t mag[S390_TOPOLOGY_MAG]; + uint8_t reserved1; + uint8_t mnest; + uint32_t reserved2; + char tle[]; +} SysIB_151x; +QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16); + typedef union SysIB { SysIB_111 sysib_111; SysIB_121 sysib_121; @@ -571,9 +600,62 @@ typedef union SysIB { SysIB_221 sysib_221; SysIB_222 sysib_222; SysIB_322 sysib_322; + SysIB_151x sysib_151x; } SysIB; QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096); +/* + * CPU Topology List provided by STSI with fc=15 provides a list + * of two different Topology List Entries (TLE) types to specify + * the topology hierarchy. + * + * - Container Topology List Entry + * Defines a container to contain other Topology List Entries + * of any type, nested containers or CPU. + * - CPU Topology List Entry + * Specifies the CPUs position, type, entitlement and polarization + * of the CPUs contained in the last container TLE. + * + * There can be theoretically up to five levels of containers, QEMU + * uses only three levels, the drawer's, book's and socket's level. + * + * A container with a nesting level (NL) greater than 1 can only + * contain another container of nesting level NL-1. + * + * A container of nesting level 1 (socket), contains as many CPU TLE + * as needed to describe the position and qualities of all CPUs inside + * the container. + * The qualities of a CPU are polarization, entitlement and type. + * + * The CPU TLE defines the position of the CPUs of identical qualities + * using a 64bits mask which first bit has its offset defined by + * the CPU address origin field of the CPU TLE like in: + * CPU address = origin * 64 + bit position within the mask + */ +/* Container type Topology List Entry */ +typedef struct SYSIBContainerListEntry { + uint8_t nl; + uint8_t reserved[6]; + uint8_t id; +} SYSIBContainerListEntry; +QEMU_BUILD_BUG_ON(sizeof(SYSIBContainerListEntry) != 8); + +/* CPU type Topology List Entry */ +typedef struct SysIBCPUListEntry { + uint8_t nl; + uint8_t reserved0[3]; +#define SYSIB_TLE_POLARITY_MASK 0x03 +#define SYSIB_TLE_DEDICATED 0x04 + uint8_t flags; + uint8_t type; + uint16_t origin; + uint64_t mask; +} SysIBCPUListEntry; +QEMU_BUILD_BUG_ON(sizeof(SysIBCPUListEntry) != 16); + +void insert_stsi_15_1_x(S390CPU *cpu, int sel2, uint64_t addr, uint8_t ar, uintptr_t ra); +void s390_cpu_topology_set_changed(bool changed); + /* MMU defines */ #define ASCE_ORIGIN (~0xfffULL) /* segment table origin */ #define ASCE_SUBSPACE 0x200 /* subspace group control */ diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c index 98f14c09c2..4dead48650 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -255,6 +255,7 @@ bool s390_has_feat(S390Feat feat) case S390_FEAT_SIE_CMMA: case S390_FEAT_SIE_PFMFI: case S390_FEAT_SIE_IBS: + case S390_FEAT_CONFIGURATION_TOPOLOGY: return false; break; default: diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c index bc5c56a305..0f0e784b2a 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c @@ -86,6 +86,7 @@ #define PRIV_B9_EQBS 0x9c #define PRIV_B9_CLP 0xa0 +#define PRIV_B9_PTF 0xa2 #define PRIV_B9_PCISTG 0xd0 #define PRIV_B9_PCILG 0xd2 #define PRIV_B9_RPCIT 0xd3 @@ -138,7 +139,6 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = { KVM_CAP_LAST_INFO }; -static int cap_sync_regs; static int cap_async_pf; static int cap_mem_op; static int cap_mem_op_extension; @@ -337,21 +337,28 @@ int kvm_arch_get_default_type(MachineState *ms) int kvm_arch_init(MachineState *ms, KVMState *s) { + int required_caps[] = { + KVM_CAP_DEVICE_CTRL, + KVM_CAP_SYNC_REGS, + }; + + for (int i = 0; i < ARRAY_SIZE(required_caps); i++) { + if (!kvm_check_extension(s, required_caps[i])) { + error_report("KVM is missing capability #%d - " + "please use kernel 3.15 or newer", required_caps[i]); + return -1; + } + } + object_class_foreach(ccw_machine_class_foreach, TYPE_S390_CCW_MACHINE, false, NULL); - if (!kvm_check_extension(kvm_state, KVM_CAP_DEVICE_CTRL)) { - error_report("KVM is missing capability KVM_CAP_DEVICE_CTRL - " - "please use kernel 3.15 or newer"); - return -1; - } if (!kvm_check_extension(s, KVM_CAP_S390_COW)) { error_report("KVM is missing capability KVM_CAP_S390_COW - " "unsupported environment"); return -1; } - cap_sync_regs = kvm_check_extension(s, KVM_CAP_SYNC_REGS); cap_async_pf = kvm_check_extension(s, KVM_CAP_ASYNC_PF); cap_mem_op = kvm_check_extension(s, KVM_CAP_S390_MEM_OP); cap_mem_op_extension = kvm_check_extension(s, KVM_CAP_S390_MEM_OP_EXTENSION); @@ -365,6 +372,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) kvm_vm_enable_cap(s, KVM_CAP_S390_USER_SIGP, 0); kvm_vm_enable_cap(s, KVM_CAP_S390_VECTOR_REGISTERS, 0); kvm_vm_enable_cap(s, KVM_CAP_S390_USER_STSI, 0); + kvm_vm_enable_cap(s, KVM_CAP_S390_CPU_TOPOLOGY, 0); if (ri_allowed()) { if (kvm_vm_enable_cap(s, KVM_CAP_S390_RI, 0) == 0) { cap_ri = 1; @@ -458,37 +466,28 @@ void kvm_s390_reset_vcpu_normal(S390CPU *cpu) static int can_sync_regs(CPUState *cs, int regs) { - return cap_sync_regs && (cs->kvm_run->kvm_valid_regs & regs) == regs; + return (cs->kvm_run->kvm_valid_regs & regs) == regs; } +#define KVM_SYNC_REQUIRED_REGS (KVM_SYNC_GPRS | KVM_SYNC_ACRS | \ + KVM_SYNC_CRS | KVM_SYNC_PREFIX) + int kvm_arch_put_registers(CPUState *cs, int level) { S390CPU *cpu = S390_CPU(cs); CPUS390XState *env = &cpu->env; - struct kvm_sregs sregs; - struct kvm_regs regs; struct kvm_fpu fpu = {}; int r; int i; + g_assert(can_sync_regs(cs, KVM_SYNC_REQUIRED_REGS)); + /* always save the PSW and the GPRS*/ cs->kvm_run->psw_addr = env->psw.addr; cs->kvm_run->psw_mask = env->psw.mask; - if (can_sync_regs(cs, KVM_SYNC_GPRS)) { - for (i = 0; i < 16; i++) { - cs->kvm_run->s.regs.gprs[i] = env->regs[i]; - cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_GPRS; - } - } else { - for (i = 0; i < 16; i++) { - regs.gprs[i] = env->regs[i]; - } - r = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s); - if (r < 0) { - return r; - } - } + memcpy(cs->kvm_run->s.regs.gprs, env->regs, sizeof(cs->kvm_run->s.regs.gprs)); + cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_GPRS; if (can_sync_regs(cs, KVM_SYNC_VRS)) { for (i = 0; i < 32; i++) { @@ -521,6 +520,15 @@ int kvm_arch_put_registers(CPUState *cs, int level) return 0; } + /* + * Access registers, control registers and the prefix - these are + * always available via kvm_sync_regs in the kernels that we support + */ + memcpy(cs->kvm_run->s.regs.acrs, env->aregs, sizeof(cs->kvm_run->s.regs.acrs)); + memcpy(cs->kvm_run->s.regs.crs, env->cregs, sizeof(cs->kvm_run->s.regs.crs)); + cs->kvm_run->s.regs.prefix = env->psa; + cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_ACRS | KVM_SYNC_CRS | KVM_SYNC_PREFIX; + if (can_sync_regs(cs, KVM_SYNC_ARCH0)) { cs->kvm_run->s.regs.cputm = env->cputm; cs->kvm_run->s.regs.ckc = env->ckc; @@ -567,25 +575,6 @@ int kvm_arch_put_registers(CPUState *cs, int level) } } - /* access registers and control registers*/ - if (can_sync_regs(cs, KVM_SYNC_ACRS | KVM_SYNC_CRS)) { - for (i = 0; i < 16; i++) { - cs->kvm_run->s.regs.acrs[i] = env->aregs[i]; - cs->kvm_run->s.regs.crs[i] = env->cregs[i]; - } - cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_ACRS; - cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_CRS; - } else { - for (i = 0; i < 16; i++) { - sregs.acrs[i] = env->aregs[i]; - sregs.crs[i] = env->cregs[i]; - } - r = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs); - if (r < 0) { - return r; - } - } - if (can_sync_regs(cs, KVM_SYNC_GSCB)) { memcpy(cs->kvm_run->s.regs.gscb, env->gscb, 32); cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_GSCB; @@ -607,13 +596,6 @@ int kvm_arch_put_registers(CPUState *cs, int level) cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_DIAG318; } - /* Finally the prefix */ - if (can_sync_regs(cs, KVM_SYNC_PREFIX)) { - cs->kvm_run->s.regs.prefix = env->psa; - cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_PREFIX; - } else { - /* prefix is only supported via sync regs */ - } return 0; } @@ -621,8 +603,6 @@ int kvm_arch_get_registers(CPUState *cs) { S390CPU *cpu = S390_CPU(cs); CPUS390XState *env = &cpu->env; - struct kvm_sregs sregs; - struct kvm_regs regs; struct kvm_fpu fpu; int i, r; @@ -630,37 +610,14 @@ int kvm_arch_get_registers(CPUState *cs) env->psw.addr = cs->kvm_run->psw_addr; env->psw.mask = cs->kvm_run->psw_mask; - /* the GPRS */ - if (can_sync_regs(cs, KVM_SYNC_GPRS)) { - for (i = 0; i < 16; i++) { - env->regs[i] = cs->kvm_run->s.regs.gprs[i]; - } - } else { - r = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); - if (r < 0) { - return r; - } - for (i = 0; i < 16; i++) { - env->regs[i] = regs.gprs[i]; - } - } + /* the GPRS, ACRS and CRS */ + g_assert(can_sync_regs(cs, KVM_SYNC_REQUIRED_REGS)); + memcpy(env->regs, cs->kvm_run->s.regs.gprs, sizeof(env->regs)); + memcpy(env->aregs, cs->kvm_run->s.regs.acrs, sizeof(env->aregs)); + memcpy(env->cregs, cs->kvm_run->s.regs.crs, sizeof(env->cregs)); - /* The ACRS and CRS */ - if (can_sync_regs(cs, KVM_SYNC_ACRS | KVM_SYNC_CRS)) { - for (i = 0; i < 16; i++) { - env->aregs[i] = cs->kvm_run->s.regs.acrs[i]; - env->cregs[i] = cs->kvm_run->s.regs.crs[i]; - } - } else { - r = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs); - if (r < 0) { - return r; - } - for (i = 0; i < 16; i++) { - env->aregs[i] = sregs.acrs[i]; - env->cregs[i] = sregs.crs[i]; - } - } + /* The prefix */ + env->psa = cs->kvm_run->s.regs.prefix; /* Floating point and vector registers */ if (can_sync_regs(cs, KVM_SYNC_VRS)) { @@ -685,11 +642,6 @@ int kvm_arch_get_registers(CPUState *cs) env->fpc = fpu.fpc; } - /* The prefix */ - if (can_sync_regs(cs, KVM_SYNC_PREFIX)) { - env->psa = cs->kvm_run->s.regs.prefix; - } - if (can_sync_regs(cs, KVM_SYNC_ARCH0)) { env->cputm = cs->kvm_run->s.regs.cputm; env->ckc = cs->kvm_run->s.regs.ckc; @@ -1457,6 +1409,13 @@ static int kvm_mpcifc_service_call(S390CPU *cpu, struct kvm_run *run) } } +static void kvm_handle_ptf(S390CPU *cpu, struct kvm_run *run) +{ + uint8_t r1 = (run->s390_sieic.ipb >> 20) & 0x0f; + + s390_handle_ptf(cpu, r1, RA_IGNORED); +} + static int handle_b9(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1) { int r = 0; @@ -1474,6 +1433,9 @@ static int handle_b9(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1) case PRIV_B9_RPCIT: r = kvm_rpcit_service_call(cpu, run); break; + case PRIV_B9_PTF: + kvm_handle_ptf(cpu, run); + break; case PRIV_B9_EQBS: /* just inject exception */ r = -1; @@ -1911,9 +1873,12 @@ static int handle_stsi(S390CPU *cpu) if (run->s390_stsi.sel1 != 2 || run->s390_stsi.sel2 != 2) { return 0; } - /* Only sysib 3.2.2 needs post-handling for now. */ insert_stsi_3_2_2(cpu, run->s390_stsi.addr, run->s390_stsi.ar); return 0; + case 15: + insert_stsi_15_1_x(cpu, run->s390_stsi.sel2, run->s390_stsi.addr, + run->s390_stsi.ar, RA_IGNORED); + return 0; default: return 0; } @@ -2495,6 +2460,14 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp) set_bit(S390_FEAT_UNPACK, model->features); } + /* + * If we have kernel support for CPU Topology indicate the + * configuration-topology facility. + */ + if (kvm_check_extension(kvm_state, KVM_CAP_S390_CPU_TOPOLOGY)) { + set_bit(S390_FEAT_CONFIGURATION_TOPOLOGY, model->features); + } + /* We emulate a zPCI bus and AEN, therefore we don't need HW support */ set_bit(S390_FEAT_ZPCI, model->features); set_bit(S390_FEAT_ADAPTER_EVENT_NOTIFICATION, model->features); @@ -2661,6 +2634,23 @@ int kvm_s390_get_zpci_op(void) return cap_zpci_op; } +int kvm_s390_topology_set_mtcr(uint64_t attr) +{ + struct kvm_device_attr attribute = { + .group = KVM_S390_VM_CPU_TOPOLOGY, + .attr = attr, + }; + + if (!s390_has_feat(S390_FEAT_CONFIGURATION_TOPOLOGY)) { + return 0; + } + if (!kvm_vm_check_attr(kvm_state, KVM_S390_VM_CPU_TOPOLOGY, attr)) { + return -ENOTSUP; + } + + return kvm_vm_ioctl(kvm_state, KVM_SET_DEVICE_ATTR, &attribute); +} + void kvm_arch_accel_class_init(ObjectClass *oc) { } diff --git a/target/s390x/kvm/kvm_s390x.h b/target/s390x/kvm/kvm_s390x.h index f9785564d0..649dae5948 100644 --- a/target/s390x/kvm/kvm_s390x.h +++ b/target/s390x/kvm/kvm_s390x.h @@ -47,5 +47,6 @@ void kvm_s390_crypto_reset(void); void kvm_s390_restart_interrupt(S390CPU *cpu); void kvm_s390_stop_interrupt(S390CPU *cpu); void kvm_s390_set_diag318(CPUState *cs, uint64_t diag318_info); +int kvm_s390_topology_set_mtcr(uint64_t attr); #endif /* KVM_S390X_H */ diff --git a/target/s390x/kvm/meson.build b/target/s390x/kvm/meson.build index d6aca590ae..588a9aa737 100644 --- a/target/s390x/kvm/meson.build +++ b/target/s390x/kvm/meson.build @@ -1,7 +1,8 @@ s390x_ss.add(when: 'CONFIG_KVM', if_true: files( 'pv.c', - 'kvm.c' + 'kvm.c', + 'stsi-topology.c' ), if_false: files( 'stubs.c' )) diff --git a/target/s390x/kvm/stsi-topology.c b/target/s390x/kvm/stsi-topology.c new file mode 100644 index 0000000000..efd2aa71f1 --- /dev/null +++ b/target/s390x/kvm/stsi-topology.c @@ -0,0 +1,334 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * QEMU S390x CPU Topology + * + * Copyright IBM Corp. 2022, 2023 + * Author(s): Pierre Morel <pmorel@linux.ibm.com> + * + */ +#include "qemu/osdep.h" +#include "cpu.h" +#include "hw/s390x/sclp.h" +#include "hw/s390x/cpu-topology.h" + +QEMU_BUILD_BUG_ON(S390_CPU_ENTITLEMENT_LOW != 1); +QEMU_BUILD_BUG_ON(S390_CPU_ENTITLEMENT_MEDIUM != 2); +QEMU_BUILD_BUG_ON(S390_CPU_ENTITLEMENT_HIGH != 3); + +/** + * fill_container: + * @p: The address of the container TLE to fill + * @level: The level of nesting for this container + * @id: The container receives a unique ID inside its own container + * + * Returns the next free TLE entry. + */ +static char *fill_container(char *p, int level, int id) +{ + SYSIBContainerListEntry *tle = (SYSIBContainerListEntry *)p; + + tle->nl = level; + tle->id = id; + return p + sizeof(*tle); +} + +/** + * fill_tle_cpu: + * @p: The address of the CPU TLE to fill + * @entry: a pointer to the S390TopologyEntry defining this + * CPU container. + * + * Returns the next free TLE entry. + */ +static char *fill_tle_cpu(char *p, S390TopologyEntry *entry) +{ + SysIBCPUListEntry *tle = (SysIBCPUListEntry *)p; + S390TopologyId topology_id = entry->id; + + tle->nl = 0; + tle->flags = 0; + if (topology_id.vertical) { + tle->flags |= topology_id.entitlement; + } + if (topology_id.dedicated) { + tle->flags |= SYSIB_TLE_DEDICATED; + } + tle->type = topology_id.type; + tle->origin = cpu_to_be16(topology_id.origin * 64); + tle->mask = cpu_to_be64(entry->mask); + return p + sizeof(*tle); +} + +/* + * Macro to check that the size of data after increment + * will not get bigger than the size of the SysIB. + */ +#define SYSIB_GUARD(data, x) do { \ + data += x; \ + if (data > sizeof(SysIB)) { \ + return 0; \ + } \ + } while (0) + +/** + * stsi_topology_fill_sysib: + * @p: A pointer to the position of the first TLE + * @level: The nested level wanted by the guest + * + * Fill the SYSIB with the topology information as described in + * the PoP, nesting containers as appropriate, with the maximum + * nesting limited by @level. + * + * Return value: + * On success: the size of the SysIB_15x after being filled with TLE. + * On error: 0 in the case we would overrun the end of the SysIB. + */ +static int stsi_topology_fill_sysib(S390TopologyList *topology_list, + char *p, int level) +{ + S390TopologyEntry *entry; + int last_drawer = -1; + int last_book = -1; + int last_socket = -1; + int drawer_id = 0; + int book_id = 0; + int socket_id = 0; + int n = sizeof(SysIB_151x); + + QTAILQ_FOREACH(entry, topology_list, next) { + bool drawer_change = last_drawer != entry->id.drawer; + bool book_change = drawer_change || last_book != entry->id.book; + bool socket_change = book_change || last_socket != entry->id.socket; + + if (level > 3 && drawer_change) { + SYSIB_GUARD(n, sizeof(SYSIBContainerListEntry)); + p = fill_container(p, 3, drawer_id++); + book_id = 0; + } + if (level > 2 && book_change) { + SYSIB_GUARD(n, sizeof(SYSIBContainerListEntry)); + p = fill_container(p, 2, book_id++); + socket_id = 0; + } + if (socket_change) { + SYSIB_GUARD(n, sizeof(SYSIBContainerListEntry)); + p = fill_container(p, 1, socket_id++); + } + + SYSIB_GUARD(n, sizeof(SysIBCPUListEntry)); + p = fill_tle_cpu(p, entry); + last_drawer = entry->id.drawer; + last_book = entry->id.book; + last_socket = entry->id.socket; + } + + return n; +} + +/** + * setup_stsi: + * @topology_list: ordered list of groups of CPUs with same properties + * @sysib: pointer to a SysIB to be filled with SysIB_151x data + * @level: Nested level specified by the guest + * + * Setup the SYSIB for STSI 15.1, the header as well as the description + * of the topology. + */ +static int setup_stsi(S390TopologyList *topology_list, SysIB_151x *sysib, + int level) +{ + sysib->mnest = level; + switch (level) { + case 4: + sysib->mag[S390_TOPOLOGY_MAG4] = current_machine->smp.drawers; + sysib->mag[S390_TOPOLOGY_MAG3] = current_machine->smp.books; + sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.sockets; + sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores; + break; + case 3: + sysib->mag[S390_TOPOLOGY_MAG3] = current_machine->smp.drawers * + current_machine->smp.books; + sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.sockets; + sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores; + break; + case 2: + sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.drawers * + current_machine->smp.books * + current_machine->smp.sockets; + sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores; + break; + } + + return stsi_topology_fill_sysib(topology_list, sysib->tle, level); +} + +/** + * s390_topology_add_cpu_to_entry: + * @entry: Topology entry to setup + * @cpu: the S390CPU to add + * + * Set the core bit inside the topology mask. + */ +static void s390_topology_add_cpu_to_entry(S390TopologyEntry *entry, + S390CPU *cpu) +{ + set_bit(63 - (cpu->env.core_id % 64), &entry->mask); +} + +/** + * s390_topology_from_cpu: + * @cpu: S390CPU to calculate the topology id + * + * Initialize the topology id from the CPU environment. + */ +static S390TopologyId s390_topology_from_cpu(S390CPU *cpu) +{ + S390TopologyId topology_id = { + .drawer = cpu->env.drawer_id, + .book = cpu->env.book_id, + .socket = cpu->env.socket_id, + .type = S390_TOPOLOGY_CPU_IFL, + .vertical = s390_topology.polarization == S390_CPU_POLARIZATION_VERTICAL, + .entitlement = cpu->env.entitlement, + .dedicated = cpu->env.dedicated, + .origin = cpu->env.core_id / 64, + }; + + return topology_id; +} + +/** + * s390_topology_id_cmp: + * @l: first S390TopologyId + * @r: second S390TopologyId + * + * Compare two topology ids according to the sorting order specified by the PoP. + * + * Returns a negative number if the first id is less than, 0 if it is equal to + * and positive if it is larger than the second id. + */ +static int s390_topology_id_cmp(const S390TopologyId *l, + const S390TopologyId *r) +{ + /* + * lexical order, compare less significant values only if more significant + * ones are equal + */ + return l->sentinel - r->sentinel ?: + l->drawer - r->drawer ?: + l->book - r->book ?: + l->socket - r->socket ?: + l->type - r->type ?: + /* logic is inverted for the next three */ + r->vertical - l->vertical ?: + r->entitlement - l->entitlement ?: + r->dedicated - l->dedicated ?: + l->origin - r->origin; +} + +static bool s390_topology_id_eq(const S390TopologyId *l, + const S390TopologyId *r) +{ + return !s390_topology_id_cmp(l, r); +} + +static bool s390_topology_id_lt(const S390TopologyId *l, + const S390TopologyId *r) +{ + return s390_topology_id_cmp(l, r) < 0; +} + +/** + * s390_topology_fill_list_sorted: + * @topology_list: list to fill + * + * Create S390TopologyEntrys as appropriate from all CPUs and fill the + * topology_list with the entries according to the order specified by the PoP. + */ +static void s390_topology_fill_list_sorted(S390TopologyList *topology_list) +{ + CPUState *cs; + S390TopologyEntry sentinel = { .id.sentinel = 1 }; + + QTAILQ_INIT(topology_list); + + QTAILQ_INSERT_HEAD(topology_list, &sentinel, next); + + CPU_FOREACH(cs) { + S390TopologyId id = s390_topology_from_cpu(S390_CPU(cs)); + S390TopologyEntry *entry = NULL, *tmp; + + QTAILQ_FOREACH(tmp, topology_list, next) { + if (s390_topology_id_eq(&id, &tmp->id)) { + entry = tmp; + break; + } else if (s390_topology_id_lt(&id, &tmp->id)) { + entry = g_malloc0(sizeof(*entry)); + entry->id = id; + QTAILQ_INSERT_BEFORE(tmp, entry, next); + break; + } + } + assert(entry); + s390_topology_add_cpu_to_entry(entry, S390_CPU(cs)); + } + + QTAILQ_REMOVE(topology_list, &sentinel, next); +} + +/** + * s390_topology_empty_list: + * + * Clear all entries in the S390Topology list. + */ +static void s390_topology_empty_list(S390TopologyList *topology_list) +{ + S390TopologyEntry *entry = NULL; + S390TopologyEntry *tmp = NULL; + + QTAILQ_FOREACH_SAFE(entry, topology_list, next, tmp) { + QTAILQ_REMOVE(topology_list, entry, next); + g_free(entry); + } +} + +/** + * insert_stsi_15_1_x: + * @cpu: the CPU doing the call for which we set CC + * @sel2: the selector 2, containing the nested level + * @addr: Guest logical address of the guest SysIB + * @ar: the access register number + * @ra: the return address + * + * Emulate STSI 15.1.x, that is, perform all necessary checks and + * fill the SYSIB. + * In case the topology description is too long to fit into the SYSIB, + * set CC=3 and abort without writing the SYSIB. + */ +void insert_stsi_15_1_x(S390CPU *cpu, int sel2, uint64_t addr, uint8_t ar, uintptr_t ra) +{ + S390TopologyList topology_list; + SysIB sysib = {0}; + int length; + + if (!s390_has_topology() || sel2 < 2 || sel2 > SCLP_READ_SCP_INFO_MNEST) { + setcc(cpu, 3); + return; + } + + s390_topology_fill_list_sorted(&topology_list); + length = setup_stsi(&topology_list, &sysib.sysib_151x, sel2); + s390_topology_empty_list(&topology_list); + + if (!length) { + setcc(cpu, 3); + return; + } + + sysib.sysib_151x.length = cpu_to_be16(length); + if (!s390_cpu_virt_mem_write(cpu, addr, ar, &sysib, length)) { + setcc(cpu, 0); + } else { + s390_cpu_virt_mem_handle_exc(cpu, ra); + } +} diff --git a/tests/avocado/s390_topology.py b/tests/avocado/s390_topology.py new file mode 100644 index 0000000000..9154ac8776 --- /dev/null +++ b/tests/avocado/s390_topology.py @@ -0,0 +1,439 @@ +# Functional test that boots a Linux kernel and checks the console +# +# Copyright IBM Corp. 2023 +# +# Author: +# Pierre Morel <pmorel@linux.ibm.com> +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# later. See the COPYING file in the top-level directory. + +import os +import shutil +import time + +from avocado_qemu import QemuSystemTest +from avocado_qemu import exec_command +from avocado_qemu import exec_command_and_wait_for_pattern +from avocado_qemu import interrupt_interactive_console_until_pattern +from avocado_qemu import wait_for_console_pattern +from avocado.utils import process +from avocado.utils import archive + + +class S390CPUTopology(QemuSystemTest): + """ + S390x CPU topology consists of 4 topology layers, from bottom to top, + the cores, sockets, books and drawers and 2 modifiers attributes, + the entitlement and the dedication. + See: docs/system/s390x/cpu-topology.rst. + + S390x CPU topology is setup in different ways: + - implicitly from the '-smp' argument by completing each topology + level one after the other beginning with drawer 0, book 0 and + socket 0. + - explicitly from the '-device' argument on the QEMU command line + - explicitly by hotplug of a new CPU using QMP or HMP + - it is modified by using QMP 'set-cpu-topology' + + The S390x modifier attribute entitlement depends on the machine + polarization, which can be horizontal or vertical. + The polarization is changed on a request from the guest. + """ + timeout = 90 + event_timeout = 10 + + KERNEL_COMMON_COMMAND_LINE = ('printk.time=0 ' + 'root=/dev/ram ' + 'selinux=0 ' + 'rdinit=/bin/sh') + + def wait_until_booted(self): + wait_for_console_pattern(self, 'no job control', + failure_message='Kernel panic - not syncing', + vm=None) + + def check_topology(self, c, s, b, d, e, t): + res = self.vm.qmp('query-cpus-fast') + cpus = res['return'] + for cpu in cpus: + core = cpu['props']['core-id'] + socket = cpu['props']['socket-id'] + book = cpu['props']['book-id'] + drawer = cpu['props']['drawer-id'] + entitlement = cpu.get('entitlement') + dedicated = cpu.get('dedicated') + if core == c: + self.assertEqual(drawer, d) + self.assertEqual(book, b) + self.assertEqual(socket, s) + self.assertEqual(entitlement, e) + self.assertEqual(dedicated, t) + + def kernel_init(self): + """ + We need a VM that supports CPU topology, + currently this only the case when using KVM, not TCG. + We need a kernel supporting the CPU topology. + We need a minimal root filesystem with a shell. + """ + self.require_accelerator("kvm") + kernel_url = ('https://archives.fedoraproject.org/pub/archive' + '/fedora-secondary/releases/35/Server/s390x/os' + '/images/kernel.img') + kernel_hash = '0d1aaaf303f07cf0160c8c48e56fe638' + kernel_path = self.fetch_asset(kernel_url, algorithm='md5', + asset_hash=kernel_hash) + + initrd_url = ('https://archives.fedoraproject.org/pub/archive' + '/fedora-secondary/releases/35/Server/s390x/os' + '/images/initrd.img') + initrd_hash = 'a122057d95725ac030e2ec51df46e172' + initrd_path_xz = self.fetch_asset(initrd_url, algorithm='md5', + asset_hash=initrd_hash) + initrd_path = os.path.join(self.workdir, 'initrd-raw.img') + archive.lzma_uncompress(initrd_path_xz, initrd_path) + + self.vm.set_console() + kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + self.vm.add_args('-nographic', + '-enable-kvm', + '-cpu', 'max,ctop=on', + '-m', '512', + '-kernel', kernel_path, + '-initrd', initrd_path, + '-append', kernel_command_line) + + def system_init(self): + self.log.info("System init") + exec_command_and_wait_for_pattern(self, + """ mount proc -t proc /proc; + mount sys -t sysfs /sys; + cat /sys/devices/system/cpu/dispatching """, + '0') + + def test_single(self): + """ + This test checks the simplest topology with a single CPU. + + :avocado: tags=arch:s390x + :avocado: tags=machine:s390-ccw-virtio + """ + self.kernel_init() + self.vm.launch() + self.wait_until_booted() + self.check_topology(0, 0, 0, 0, 'medium', False) + + def test_default(self): + """ + This test checks the implicit topology. + + :avocado: tags=arch:s390x + :avocado: tags=machine:s390-ccw-virtio + """ + self.kernel_init() + self.vm.add_args('-smp', + '13,drawers=2,books=2,sockets=3,cores=2,maxcpus=24') + self.vm.launch() + self.wait_until_booted() + self.check_topology(0, 0, 0, 0, 'medium', False) + self.check_topology(1, 0, 0, 0, 'medium', False) + self.check_topology(2, 1, 0, 0, 'medium', False) + self.check_topology(3, 1, 0, 0, 'medium', False) + self.check_topology(4, 2, 0, 0, 'medium', False) + self.check_topology(5, 2, 0, 0, 'medium', False) + self.check_topology(6, 0, 1, 0, 'medium', False) + self.check_topology(7, 0, 1, 0, 'medium', False) + self.check_topology(8, 1, 1, 0, 'medium', False) + self.check_topology(9, 1, 1, 0, 'medium', False) + self.check_topology(10, 2, 1, 0, 'medium', False) + self.check_topology(11, 2, 1, 0, 'medium', False) + self.check_topology(12, 0, 0, 1, 'medium', False) + + def test_move(self): + """ + This test checks the topology modification by moving a CPU + to another socket: CPU 0 is moved from socket 0 to socket 2. + + :avocado: tags=arch:s390x + :avocado: tags=machine:s390-ccw-virtio + """ + self.kernel_init() + self.vm.add_args('-smp', + '1,drawers=2,books=2,sockets=3,cores=2,maxcpus=24') + self.vm.launch() + self.wait_until_booted() + + self.check_topology(0, 0, 0, 0, 'medium', False) + res = self.vm.qmp('set-cpu-topology', + {'core-id': 0, 'socket-id': 2, 'entitlement': 'low'}) + self.assertEqual(res['return'], {}) + self.check_topology(0, 2, 0, 0, 'low', False) + + def test_dash_device(self): + """ + This test verifies that a CPU defined with the '-device' + command line option finds its right place inside the topology. + + :avocado: tags=arch:s390x + :avocado: tags=machine:s390-ccw-virtio + """ + self.kernel_init() + self.vm.add_args('-smp', + '1,drawers=2,books=2,sockets=3,cores=2,maxcpus=24') + self.vm.add_args('-device', 'max-s390x-cpu,core-id=10') + self.vm.add_args('-device', + 'max-s390x-cpu,' + 'core-id=1,socket-id=0,book-id=1,drawer-id=1,entitlement=low') + self.vm.add_args('-device', + 'max-s390x-cpu,' + 'core-id=2,socket-id=0,book-id=1,drawer-id=1,entitlement=medium') + self.vm.add_args('-device', + 'max-s390x-cpu,' + 'core-id=3,socket-id=1,book-id=1,drawer-id=1,entitlement=high') + self.vm.add_args('-device', + 'max-s390x-cpu,' + 'core-id=4,socket-id=1,book-id=1,drawer-id=1') + self.vm.add_args('-device', + 'max-s390x-cpu,' + 'core-id=5,socket-id=2,book-id=1,drawer-id=1,dedicated=true') + + self.vm.launch() + self.wait_until_booted() + + self.check_topology(10, 2, 1, 0, 'medium', False) + self.check_topology(1, 0, 1, 1, 'low', False) + self.check_topology(2, 0, 1, 1, 'medium', False) + self.check_topology(3, 1, 1, 1, 'high', False) + self.check_topology(4, 1, 1, 1, 'medium', False) + self.check_topology(5, 2, 1, 1, 'high', True) + + + def guest_set_dispatching(self, dispatching): + exec_command(self, + f'echo {dispatching} > /sys/devices/system/cpu/dispatching') + self.vm.event_wait('CPU_POLARIZATION_CHANGE', self.event_timeout) + exec_command_and_wait_for_pattern(self, + 'cat /sys/devices/system/cpu/dispatching', dispatching) + + + def test_polarization(self): + """ + This test verifies that QEMU modifies the entitlement change after + several guest polarization change requests. + + :avocado: tags=arch:s390x + :avocado: tags=machine:s390-ccw-virtio + """ + self.kernel_init() + self.vm.launch() + self.wait_until_booted() + + self.system_init() + res = self.vm.qmp('query-s390x-cpu-polarization') + self.assertEqual(res['return']['polarization'], 'horizontal') + self.check_topology(0, 0, 0, 0, 'medium', False) + + self.guest_set_dispatching('1'); + res = self.vm.qmp('query-s390x-cpu-polarization') + self.assertEqual(res['return']['polarization'], 'vertical') + self.check_topology(0, 0, 0, 0, 'medium', False) + + self.guest_set_dispatching('0'); + res = self.vm.qmp('query-s390x-cpu-polarization') + self.assertEqual(res['return']['polarization'], 'horizontal') + self.check_topology(0, 0, 0, 0, 'medium', False) + + + def check_polarization(self, polarization): + #We need to wait for the change to have been propagated to the kernel + exec_command_and_wait_for_pattern(self, + "\n".join([ + "timeout 1 sh -c 'while true", + 'do', + ' syspath="/sys/devices/system/cpu/cpu0/polarization"', + ' polarization="$(cat "$syspath")" || exit', + f' if [ "$polarization" = "{polarization}" ]; then', + ' exit 0', + ' fi', + ' sleep 0.01', + #searched for strings mustn't show up in command, '' to obfuscate + "done' && echo succ''ess || echo fail''ure", + ]), + "success", "failure") + + + def test_entitlement(self): + """ + This test verifies that QEMU modifies the entitlement + after a guest request and that the guest sees the change. + + :avocado: tags=arch:s390x + :avocado: tags=machine:s390-ccw-virtio + """ + self.kernel_init() + self.vm.launch() + self.wait_until_booted() + + self.system_init() + + self.check_polarization('horizontal') + self.check_topology(0, 0, 0, 0, 'medium', False) + + self.guest_set_dispatching('1') + self.check_polarization('vertical:medium') + self.check_topology(0, 0, 0, 0, 'medium', False) + + res = self.vm.qmp('set-cpu-topology', + {'core-id': 0, 'entitlement': 'low'}) + self.assertEqual(res['return'], {}) + self.check_polarization('vertical:low') + self.check_topology(0, 0, 0, 0, 'low', False) + + res = self.vm.qmp('set-cpu-topology', + {'core-id': 0, 'entitlement': 'medium'}) + self.assertEqual(res['return'], {}) + self.check_polarization('vertical:medium') + self.check_topology(0, 0, 0, 0, 'medium', False) + + res = self.vm.qmp('set-cpu-topology', + {'core-id': 0, 'entitlement': 'high'}) + self.assertEqual(res['return'], {}) + self.check_polarization('vertical:high') + self.check_topology(0, 0, 0, 0, 'high', False) + + self.guest_set_dispatching('0'); + self.check_polarization("horizontal") + self.check_topology(0, 0, 0, 0, 'high', False) + + + def test_dedicated(self): + """ + This test verifies that QEMU adjusts the entitlement correctly when a + CPU is made dedicated. + QEMU retains the entitlement value when horizontal polarization is in effect. + For the guest, the field shows the effective value of the entitlement. + + :avocado: tags=arch:s390x + :avocado: tags=machine:s390-ccw-virtio + """ + self.kernel_init() + self.vm.launch() + self.wait_until_booted() + + self.system_init() + + self.check_polarization("horizontal") + + res = self.vm.qmp('set-cpu-topology', + {'core-id': 0, 'dedicated': True}) + self.assertEqual(res['return'], {}) + self.check_topology(0, 0, 0, 0, 'high', True) + self.check_polarization("horizontal") + + self.guest_set_dispatching('1'); + self.check_topology(0, 0, 0, 0, 'high', True) + self.check_polarization("vertical:high") + + self.guest_set_dispatching('0'); + self.check_topology(0, 0, 0, 0, 'high', True) + self.check_polarization("horizontal") + + + def test_socket_full(self): + """ + This test verifies that QEMU does not accept to overload a socket. + The socket-id 0 on book-id 0 already contains CPUs 0 and 1 and can + not accept any new CPU while socket-id 0 on book-id 1 is free. + + :avocado: tags=arch:s390x + :avocado: tags=machine:s390-ccw-virtio + """ + self.kernel_init() + self.vm.add_args('-smp', + '3,drawers=2,books=2,sockets=3,cores=2,maxcpus=24') + self.vm.launch() + self.wait_until_booted() + + self.system_init() + + res = self.vm.qmp('set-cpu-topology', + {'core-id': 2, 'socket-id': 0, 'book-id': 0}) + self.assertEqual(res['error']['class'], 'GenericError') + + res = self.vm.qmp('set-cpu-topology', + {'core-id': 2, 'socket-id': 0, 'book-id': 1}) + self.assertEqual(res['return'], {}) + + def test_dedicated_error(self): + """ + This test verifies that QEMU refuses to lower the entitlement + of a dedicated CPU + + :avocado: tags=arch:s390x + :avocado: tags=machine:s390-ccw-virtio + """ + self.kernel_init() + self.vm.launch() + self.wait_until_booted() + + self.system_init() + + res = self.vm.qmp('set-cpu-topology', + {'core-id': 0, 'dedicated': True}) + self.assertEqual(res['return'], {}) + + self.check_topology(0, 0, 0, 0, 'high', True) + + self.guest_set_dispatching('1'); + + self.check_topology(0, 0, 0, 0, 'high', True) + + res = self.vm.qmp('set-cpu-topology', + {'core-id': 0, 'entitlement': 'low', 'dedicated': True}) + self.assertEqual(res['error']['class'], 'GenericError') + + res = self.vm.qmp('set-cpu-topology', + {'core-id': 0, 'entitlement': 'low'}) + self.assertEqual(res['error']['class'], 'GenericError') + + res = self.vm.qmp('set-cpu-topology', + {'core-id': 0, 'entitlement': 'medium', 'dedicated': True}) + self.assertEqual(res['error']['class'], 'GenericError') + + res = self.vm.qmp('set-cpu-topology', + {'core-id': 0, 'entitlement': 'medium'}) + self.assertEqual(res['error']['class'], 'GenericError') + + res = self.vm.qmp('set-cpu-topology', + {'core-id': 0, 'entitlement': 'low', 'dedicated': False}) + self.assertEqual(res['return'], {}) + + res = self.vm.qmp('set-cpu-topology', + {'core-id': 0, 'entitlement': 'medium', 'dedicated': False}) + self.assertEqual(res['return'], {}) + + def test_move_error(self): + """ + This test verifies that QEMU refuses to move a CPU to an + nonexistent location + + :avocado: tags=arch:s390x + :avocado: tags=machine:s390-ccw-virtio + """ + self.kernel_init() + self.vm.launch() + self.wait_until_booted() + + self.system_init() + + res = self.vm.qmp('set-cpu-topology', {'core-id': 0, 'drawer-id': 1}) + self.assertEqual(res['error']['class'], 'GenericError') + + res = self.vm.qmp('set-cpu-topology', {'core-id': 0, 'book-id': 1}) + self.assertEqual(res['error']['class'], 'GenericError') + + res = self.vm.qmp('set-cpu-topology', {'core-id': 0, 'socket-id': 1}) + self.assertEqual(res['error']['class'], 'GenericError') + + self.check_topology(0, 0, 0, 0, 'medium', False) diff --git a/tests/qtest/libqtest.c b/tests/qtest/libqtest.c index dc7a55634c..f33a210861 100644 --- a/tests/qtest/libqtest.c +++ b/tests/qtest/libqtest.c @@ -91,6 +91,7 @@ struct QTestState static GHookList abrt_hooks; static void (*sighandler_old)(int); +static bool silence_spawn_log; static int qtest_query_target_endianness(QTestState *s); @@ -336,10 +337,17 @@ void qtest_remove_abrt_handler(void *data) } } -static const char *qtest_qemu_binary(void) +static const char *qtest_qemu_binary(const char *var) { const char *qemu_bin; + if (var) { + qemu_bin = getenv(var); + if (qemu_bin) { + return qemu_bin; + } + } + qemu_bin = getenv("QTEST_QEMU_BINARY"); if (!qemu_bin) { fprintf(stderr, "Environment variable QTEST_QEMU_BINARY required\n"); @@ -381,7 +389,8 @@ static pid_t qtest_create_process(char *cmd) } #endif /* _WIN32 */ -static QTestState *G_GNUC_PRINTF(1, 2) qtest_spawn_qemu(const char *fmt, ...) +static QTestState *G_GNUC_PRINTF(2, 3) qtest_spawn_qemu(const char *qemu_bin, + const char *fmt, ...) { va_list ap; QTestState *s = g_new0(QTestState, 1); @@ -391,14 +400,15 @@ static QTestState *G_GNUC_PRINTF(1, 2) qtest_spawn_qemu(const char *fmt, ...) g_autoptr(GString) command = g_string_new(""); va_start(ap, fmt); - g_string_append_printf(command, CMD_EXEC "%s %s", - qtest_qemu_binary(), tracearg); + g_string_append_printf(command, CMD_EXEC "%s %s", qemu_bin, tracearg); g_string_append_vprintf(command, fmt, ap); va_end(ap); qtest_add_abrt_handler(kill_qemu_hook_func, s); - g_test_message("starting QEMU: %s", command->str); + if (!silence_spawn_log) { + g_test_message("starting QEMU: %s", command->str); + } #ifndef _WIN32 s->qemu_pid = fork(); @@ -431,7 +441,8 @@ static QTestState *G_GNUC_PRINTF(1, 2) qtest_spawn_qemu(const char *fmt, ...) return s; } -QTestState *qtest_init_without_qmp_handshake(const char *extra_args) +static QTestState *qtest_init_internal(const char *qemu_bin, + const char *extra_args) { QTestState *s; int sock, qmpsock, i; @@ -456,7 +467,8 @@ QTestState *qtest_init_without_qmp_handshake(const char *extra_args) sock = init_socket(socket_path); qmpsock = init_socket(qmp_socket_path); - s = qtest_spawn_qemu("-qtest unix:%s " + s = qtest_spawn_qemu(qemu_bin, + "-qtest unix:%s " "-qtest-log %s " "-chardev socket,path=%s,id=char0 " "-mon chardev=char0,mode=control " @@ -509,9 +521,14 @@ QTestState *qtest_init_without_qmp_handshake(const char *extra_args) return s; } -QTestState *qtest_init(const char *extra_args) +QTestState *qtest_init_without_qmp_handshake(const char *extra_args) { - QTestState *s = qtest_init_without_qmp_handshake(extra_args); + return qtest_init_internal(qtest_qemu_binary(NULL), extra_args); +} + +QTestState *qtest_init_with_env(const char *var, const char *extra_args) +{ + QTestState *s = qtest_init_internal(qtest_qemu_binary(var), extra_args); QDict *greeting; /* Read the QMP greeting and then do the handshake */ @@ -522,6 +539,11 @@ QTestState *qtest_init(const char *extra_args) return s; } +QTestState *qtest_init(const char *extra_args) +{ + return qtest_init_with_env(NULL, extra_args); +} + QTestState *qtest_vinitf(const char *fmt, va_list ap) { char *args = g_strdup_vprintf(fmt, ap); @@ -905,7 +927,7 @@ char *qtest_hmp(QTestState *s, const char *fmt, ...) const char *qtest_get_arch(void) { - const char *qemu = qtest_qemu_binary(); + const char *qemu = qtest_qemu_binary(NULL); const char *end = strrchr(qemu, '-'); if (!end) { @@ -1449,13 +1471,26 @@ struct MachInfo { char *alias; }; +static void qtest_free_machine_list(struct MachInfo *machines) +{ + if (machines) { + for (int i = 0; machines[i].name != NULL; i++) { + g_free(machines[i].name); + g_free(machines[i].alias); + } + + g_free(machines); + } +} + /* * Returns an array with pointers to the available machine names. * The terminating entry has the name set to NULL. */ -static struct MachInfo *qtest_get_machines(void) +static struct MachInfo *qtest_get_machines(const char *var) { static struct MachInfo *machines; + static char *qemu_var; QDict *response, *minfo; QList *list; const QListEntry *p; @@ -1464,11 +1499,21 @@ static struct MachInfo *qtest_get_machines(void) QTestState *qts; int idx; + if (g_strcmp0(qemu_var, var)) { + qemu_var = g_strdup(var); + + /* new qemu, clear the cache */ + qtest_free_machine_list(machines); + machines = NULL; + } + if (machines) { return machines; } - qts = qtest_init("-machine none"); + silence_spawn_log = !g_test_verbose(); + + qts = qtest_init_with_env(qemu_var, "-machine none"); response = qtest_qmp(qts, "{ 'execute': 'query-machines' }"); g_assert(response); list = qdict_get_qlist(response, "return"); @@ -1499,6 +1544,8 @@ static struct MachInfo *qtest_get_machines(void) qtest_quit(qts); qobject_unref(response); + silence_spawn_log = false; + memset(&machines[idx], 0, sizeof(struct MachInfo)); /* Terminating entry */ return machines; } @@ -1509,7 +1556,7 @@ void qtest_cb_for_every_machine(void (*cb)(const char *machine), struct MachInfo *machines; int i; - machines = qtest_get_machines(); + machines = qtest_get_machines(NULL); for (i = 0; machines[i].name != NULL; i++) { /* Ignore machines that cannot be used for qtests */ @@ -1525,12 +1572,28 @@ void qtest_cb_for_every_machine(void (*cb)(const char *machine), } } -bool qtest_has_machine(const char *machine) +char *qtest_resolve_machine_alias(const char *var, const char *alias) { struct MachInfo *machines; int i; - machines = qtest_get_machines(); + machines = qtest_get_machines(var); + + for (i = 0; machines[i].name != NULL; i++) { + if (machines[i].alias && g_str_equal(alias, machines[i].alias)) { + return g_strdup(machines[i].name); + } + } + + return NULL; +} + +bool qtest_has_machine_with_env(const char *var, const char *machine) +{ + struct MachInfo *machines; + int i; + + machines = qtest_get_machines(var); for (i = 0; machines[i].name != NULL; i++) { if (g_str_equal(machine, machines[i].name) || @@ -1542,6 +1605,11 @@ bool qtest_has_machine(const char *machine) return false; } +bool qtest_has_machine(const char *machine) +{ + return qtest_has_machine_with_env(NULL, machine); +} + bool qtest_has_device(const char *device) { static QList *list; diff --git a/tests/qtest/libqtest.h b/tests/qtest/libqtest.h index 5fe3d13466..6e3d3525bf 100644 --- a/tests/qtest/libqtest.h +++ b/tests/qtest/libqtest.h @@ -56,6 +56,19 @@ QTestState *qtest_vinitf(const char *fmt, va_list ap) G_GNUC_PRINTF(1, 0); QTestState *qtest_init(const char *extra_args); /** + * qtest_init_with_env: + * @var: Environment variable from where to take the QEMU binary + * @extra_args: Other arguments to pass to QEMU. CAUTION: these + * arguments are subject to word splitting and shell evaluation. + * + * Like qtest_init(), but use a different environment variable for the + * QEMU binary. + * + * Returns: #QTestState instance. + */ +QTestState *qtest_init_with_env(const char *var, const char *extra_args); + +/** * qtest_init_without_qmp_handshake: * @extra_args: other arguments to pass to QEMU. CAUTION: these * arguments are subject to word splitting and shell evaluation. @@ -910,6 +923,16 @@ void qtest_cb_for_every_machine(void (*cb)(const char *machine), bool skip_old_versioned); /** + * qtest_resolve_machine_alias: + * @var: Environment variable from where to take the QEMU binary + * @alias: The alias to resolve + * + * Returns: the machine type corresponding to the alias if any, + * otherwise NULL. + */ +char *qtest_resolve_machine_alias(const char *var, const char *alias); + +/** * qtest_has_machine: * @machine: The machine to look for * @@ -918,6 +941,15 @@ void qtest_cb_for_every_machine(void (*cb)(const char *machine), bool qtest_has_machine(const char *machine); /** + * qtest_has_machine_with_env: + * @var: Environment variable from where to take the QEMU binary + * @machine: The machine to look for + * + * Returns: true if the machine is available in the specified binary. + */ +bool qtest_has_machine_with_env(const char *var, const char *machine); + +/** * qtest_has_device: * @device: The device to look for * diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c index 0c185db450..24fb7b3525 100644 --- a/tests/qtest/migration-helpers.c +++ b/tests/qtest/migration-helpers.c @@ -11,6 +11,7 @@ */ #include "qemu/osdep.h" +#include "qemu/ctype.h" #include "qapi/qmp/qjson.h" #include "migration-helpers.h" @@ -240,3 +241,54 @@ void wait_for_migration_fail(QTestState *from, bool allow_active) g_assert(qdict_get_bool(rsp_return, "running")); qobject_unref(rsp_return); } + +char *find_common_machine_version(const char *mtype, const char *var1, + const char *var2) +{ + g_autofree char *type1 = qtest_resolve_machine_alias(var1, mtype); + g_autofree char *type2 = qtest_resolve_machine_alias(var2, mtype); + + g_assert(type1 && type2); + + if (g_str_equal(type1, type2)) { + /* either can be used */ + return g_strdup(type1); + } + + if (qtest_has_machine_with_env(var2, type1)) { + return g_strdup(type1); + } + + if (qtest_has_machine_with_env(var1, type2)) { + return g_strdup(type2); + } + + g_test_message("No common machine version for machine type '%s' between " + "binaries %s and %s", mtype, getenv(var1), getenv(var2)); + g_assert_not_reached(); +} + +char *resolve_machine_version(const char *alias, const char *var1, + const char *var2) +{ + const char *mname = g_getenv("QTEST_QEMU_MACHINE_TYPE"); + g_autofree char *machine_name = NULL; + + if (mname) { + const char *dash = strrchr(mname, '-'); + const char *dot = strrchr(mname, '.'); + + machine_name = g_strdup(mname); + + if (dash && dot) { + assert(qtest_has_machine(machine_name)); + return g_steal_pointer(&machine_name); + } + /* else: probably an alias, let it be resolved below */ + } else { + /* use the hardcoded alias */ + machine_name = g_strdup(alias); + } + + return find_common_machine_version(machine_name, var1, var2); +} diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h index 4f51d0f8bc..e31dc85cc7 100644 --- a/tests/qtest/migration-helpers.h +++ b/tests/qtest/migration-helpers.h @@ -43,4 +43,8 @@ void wait_for_migration_complete(QTestState *who); void wait_for_migration_fail(QTestState *from, bool allow_active); +char *find_common_machine_version(const char *mtype, const char *var1, + const char *var2); +char *resolve_machine_version(const char *alias, const char *var1, + const char *var2); #endif /* MIGRATION_HELPERS_H */ diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index e1c110537b..bc70a14642 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -71,6 +71,8 @@ static bool got_dst_resume; #define QEMU_VM_FILE_MAGIC 0x5145564d #define FILE_TEST_FILENAME "migfile" #define FILE_TEST_OFFSET 0x1000 +#define QEMU_ENV_SRC "QTEST_QEMU_BINARY_SRC" +#define QEMU_ENV_DST "QTEST_QEMU_BINARY_DST" #if defined(__linux__) #include <sys/syscall.h> @@ -743,6 +745,8 @@ static int test_migrate_start(QTestState **from, QTestState **to, const char *kvm_opts = NULL; const char *arch = qtest_get_arch(); const char *memory_size; + const char *machine_alias, *machine_opts = ""; + g_autofree char *machine = NULL; if (args->use_shmem) { if (!g_file_test("/dev/shm", G_FILE_TEST_IS_DIR)) { @@ -755,11 +759,20 @@ static int test_migrate_start(QTestState **from, QTestState **to, got_dst_resume = false; if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) { memory_size = "150M"; - arch_opts = g_strdup_printf("-drive file=%s,format=raw", bootpath); + + if (g_str_equal(arch, "i386")) { + machine_alias = "pc"; + } else { + machine_alias = "q35"; + } + arch_opts = g_strdup_printf( + "-drive if=none,id=d0,file=%s,format=raw " + "-device ide-hd,drive=d0,secs=1,cyls=1,heads=1", bootpath); start_address = X86_TEST_MEM_START; end_address = X86_TEST_MEM_END; } else if (g_str_equal(arch, "s390x")) { memory_size = "128M"; + machine_alias = "s390-ccw-virtio"; arch_opts = g_strdup_printf("-bios %s", bootpath); start_address = S390_TEST_MEM_START; end_address = S390_TEST_MEM_END; @@ -771,11 +784,14 @@ static int test_migrate_start(QTestState **from, QTestState **to, "'nvramrc=hex .\" _\" begin %x %x " "do i c@ 1 + i c! 1000 +loop .\" B\" 0 " "until'", end_address, start_address); - arch_opts = g_strdup("-nodefaults -machine vsmt=8"); + machine_alias = "pseries"; + machine_opts = "vsmt=8"; + arch_opts = g_strdup("-nodefaults"); } else if (strcmp(arch, "aarch64") == 0) { memory_size = "150M"; - arch_opts = g_strdup_printf("-machine virt,gic-version=max -cpu max " - "-kernel %s", bootpath); + machine_alias = "virt"; + machine_opts = "gic-version=max"; + arch_opts = g_strdup_printf("-cpu max -kernel %s", bootpath); start_address = ARM_TEST_MEM_START; end_address = ARM_TEST_MEM_END; } else { @@ -809,12 +825,19 @@ static int test_migrate_start(QTestState **from, QTestState **to, kvm_opts = ",dirty-ring-size=4096"; } + machine = resolve_machine_version(machine_alias, QEMU_ENV_SRC, + QEMU_ENV_DST); + + g_test_message("Using machine type: %s", machine); + cmd_source = g_strdup_printf("-accel kvm%s -accel tcg " + "-machine %s,%s " "-name source,debug-threads=on " "-m %s " "-serial file:%s/src_serial " "%s %s %s %s %s", kvm_opts ? kvm_opts : "", + machine, machine_opts, memory_size, tmpfs, arch_opts ? arch_opts : "", arch_source ? arch_source : "", @@ -822,26 +845,28 @@ static int test_migrate_start(QTestState **from, QTestState **to, args->opts_source ? args->opts_source : "", ignore_stderr); if (!args->only_target) { - *from = qtest_init(cmd_source); + *from = qtest_init_with_env(QEMU_ENV_SRC, cmd_source); qtest_qmp_set_event_callback(*from, migrate_watch_for_stop, &got_src_stop); } cmd_target = g_strdup_printf("-accel kvm%s -accel tcg " + "-machine %s,%s " "-name target,debug-threads=on " "-m %s " "-serial file:%s/dest_serial " "-incoming %s " "%s %s %s %s %s", kvm_opts ? kvm_opts : "", + machine, machine_opts, memory_size, tmpfs, uri, arch_opts ? arch_opts : "", arch_target ? arch_target : "", shmem_opts ? shmem_opts : "", args->opts_target ? args->opts_target : "", ignore_stderr); - *to = qtest_init(cmd_target); + *to = qtest_init_with_env(QEMU_ENV_DST, cmd_target); qtest_qmp_set_event_callback(*to, migrate_watch_for_resume, &got_dst_resume); @@ -2972,10 +2997,23 @@ int main(int argc, char **argv) bool has_uffd; const char *arch; g_autoptr(GError) err = NULL; + const char *qemu_src = getenv(QEMU_ENV_SRC); + const char *qemu_dst = getenv(QEMU_ENV_DST); int ret; g_test_init(&argc, &argv, NULL); + /* + * The default QTEST_QEMU_BINARY must always be provided because + * that is what helpers use to query the accel type and + * architecture. + */ + if (qemu_src && qemu_dst) { + g_test_message("Only one of %s, %s is allowed", + QEMU_ENV_SRC, QEMU_ENV_DST); + exit(1); + } + has_kvm = qtest_has_accel("kvm"); has_tcg = qtest_has_accel("tcg"); @@ -3034,7 +3072,9 @@ int main(int argc, char **argv) qtest_add_func("/migration/bad_dest", test_baddest); #ifndef _WIN32 - qtest_add_func("/migration/analyze-script", test_analyze_script); + if (!g_str_equal(arch, "s390x")) { + qtest_add_func("/migration/analyze-script", test_analyze_script); + } #endif qtest_add_func("/migration/precopy/unix/plain", test_precopy_unix_plain); qtest_add_func("/migration/precopy/unix/xbzrle", test_precopy_unix_xbzrle); diff --git a/ui/spice-core.c b/ui/spice-core.c index 52a59386d7..db21db2c94 100644 --- a/ui/spice-core.c +++ b/ui/spice-core.c @@ -821,8 +821,7 @@ static void qemu_spice_init(void) }; using_spice = 1; - migration_state.notify = migration_state_notifier; - add_migration_state_change_notifier(&migration_state); + migration_add_notifier(&migration_state, migration_state_notifier); spice_migrate.base.sif = &migrate_interface.base; qemu_spice.add_interface(&spice_migrate.base); diff --git a/ui/vdagent.c b/ui/vdagent.c index 706d6d97bd..64d7ab245a 100644 --- a/ui/vdagent.c +++ b/ui/vdagent.c @@ -671,7 +671,7 @@ static void vdagent_chr_open(Chardev *chr, return; #endif - if (migrate_add_blocker(vd->migration_blocker, errp) != 0) { + if (migrate_add_blocker(&vd->migration_blocker, errp) != 0) { return; } @@ -924,13 +924,12 @@ static void vdagent_chr_fini(Object *obj) { VDAgentChardev *vd = QEMU_VDAGENT_CHARDEV(obj); - migrate_del_blocker(vd->migration_blocker); + migrate_del_blocker(&vd->migration_blocker); vdagent_disconnect(vd); if (vd->mouse_hs) { qemu_input_handler_unregister(vd->mouse_hs); } buffer_free(&vd->outbuf); - error_free(vd->migration_blocker); } static const TypeInfo vdagent_chr_type_info = { |