diff options
260 files changed, 7918 insertions, 3608 deletions
diff --git a/.mailmap b/.mailmap index d214959288..94f19a0ac9 100644 --- a/.mailmap +++ b/.mailmap @@ -81,6 +81,9 @@ Huacai Chen <chenhuacai@kernel.org> <chenhuacai@loongson.cn> James Hogan <jhogan@kernel.org> <james.hogan@imgtec.com> Leif Lindholm <quic_llindhol@quicinc.com> <leif.lindholm@linaro.org> Leif Lindholm <quic_llindhol@quicinc.com> <leif@nuviainc.com> +Luc Michel <luc@lmichel.fr> <luc.michel@git.antfield.fr> +Luc Michel <luc@lmichel.fr> <luc.michel@greensocs.com> +Luc Michel <luc@lmichel.fr> <lmichel@kalray.eu> Radoslaw Biernacki <rad@semihalf.com> <radoslaw.biernacki@linaro.org> Paul Brook <paul@nowt.org> <paul@codesourcery.com> Paul Burton <paulburton@kernel.org> <paul.burton@mips.com> diff --git a/MAINTAINERS b/MAINTAINERS index 2f435102ec..d36aa44661 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -245,6 +245,7 @@ M: Richard Henderson <richard.henderson@linaro.org> S: Maintained F: target/hppa/ F: disas/hppa.c +F: tests/tcg/hppa/ LoongArch TCG CPUs M: Song Gao <gaosong@loongson.cn> @@ -258,6 +259,7 @@ M: Laurent Vivier <laurent@vivier.eu> S: Maintained F: target/m68k/ F: disas/m68k.c +F: tests/tcg/m68k/ MicroBlaze TCG CPUs M: Edgar E. Iglesias <edgar.iglesias@gmail.com> @@ -284,7 +286,9 @@ R: Marek Vasut <marex@denx.de> S: Orphan F: target/nios2/ F: hw/nios2/ +F: hw/intc/nios2_vic.c F: disas/nios2.c +F: include/hw/intc/nios2_vic.h F: configs/devices/nios2-softmmu/default.mak F: tests/docker/dockerfiles/debian-nios2-cross.d/build-toolchain.sh F: tests/tcg/nios2/ @@ -295,6 +299,7 @@ S: Odd Fixes F: docs/system/openrisc/cpu-features.rst F: target/openrisc/ F: hw/openrisc/ +F: include/hw/openrisc/ F: tests/tcg/openrisc/ PowerPC TCG CPUs @@ -307,6 +312,12 @@ F: target/ppc/ F: hw/ppc/ppc.c F: hw/ppc/ppc_booke.c F: include/hw/ppc/ppc.h +F: hw/ppc/meson.build +F: hw/ppc/trace* +F: configs/devices/ppc* +F: docs/system/ppc/embedded.rst +F: docs/system/target-ppc.rst +F: tests/tcg/ppc*/* RISC-V TCG CPUs M: Palmer Dabbelt <palmer@dabbelt.com> @@ -325,6 +336,7 @@ F: hw/intc/riscv* F: include/hw/riscv/ F: linux-user/host/riscv32/ F: linux-user/host/riscv64/ +F: tests/tcg/riscv64/ RISC-V XThead* extensions M: Christoph Muellner <christoph.muellner@vrull.eu> @@ -366,6 +378,7 @@ F: target/sh4/ F: hw/sh4/ F: disas/sh4.c F: include/hw/sh4/ +F: tests/tcg/sh4/ SPARC TCG CPUs M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> @@ -376,6 +389,7 @@ F: hw/sparc/ F: hw/sparc64/ F: include/hw/sparc/sparc64.h F: disas/sparc.c +F: tests/tcg/sparc64/ X86 TCG CPUs M: Paolo Bonzini <pbonzini@redhat.com> @@ -885,7 +899,7 @@ S: Odd Fixes F: hw/arm/raspi.c F: hw/arm/raspi_platform.h F: hw/*/bcm283* -F: include/hw/arm/raspi* +F: include/hw/arm/rasp* F: include/hw/*/bcm283* F: docs/system/arm/raspi.rst @@ -1173,19 +1187,24 @@ F: hw/*/etraxfs_*.c HP-PARISC Machines ------------------ -HP B160L +HP B160L, HP C3700 M: Richard Henderson <richard.henderson@linaro.org> R: Helge Deller <deller@gmx.de> S: Odd Fixes F: configs/devices/hppa-softmmu/default.mak F: hw/hppa/ +F: hw/input/lasips2.c F: hw/net/*i82596* F: hw/misc/lasi.c +F: hw/pci-host/astro.c F: hw/pci-host/dino.c +F: include/hw/input/lasips2.h F: include/hw/misc/lasi.h F: include/hw/net/lasi_82596.h +F: include/hw/pci-host/astro.h F: include/hw/pci-host/dino.h F: pc-bios/hppa-firmware.img +F: roms/seabios-hppa/ LoongArch Machines ------------------ @@ -1322,10 +1341,7 @@ M: Philippe Mathieu-Daudé <philmd@linaro.org> R: Jiaxun Yang <jiaxun.yang@flygoat.com> S: Odd Fixes F: hw/mips/fuloong2e.c -F: hw/isa/vt82c686.c F: hw/pci-host/bonito.c -F: hw/usb/vt82c686-uhci-pci.c -F: include/hw/isa/vt82c686.h F: include/hw/pci-host/bonito.h F: tests/avocado/machine_mips_fuloong2e.py @@ -1337,6 +1353,7 @@ F: hw/intc/loongson_liointc.c F: hw/mips/loongson3_bootp.c F: hw/mips/loongson3_bootp.h F: hw/mips/loongson3_virt.c +F: include/hw/intc/loongson_liointc.h F: tests/avocado/machine_mips_loongson3v.py Boston @@ -1354,6 +1371,7 @@ or1k-sim M: Jia Liu <proljc@gmail.com> S: Maintained F: docs/system/openrisc/or1k-sim.rst +F: hw/intc/ompic.c F: hw/openrisc/openrisc_sim.c PowerPC Machines @@ -1361,7 +1379,8 @@ PowerPC Machines 405 (ref405ep) L: qemu-ppc@nongnu.org S: Orphan -F: hw/ppc/ppc405_boards.c +F: hw/ppc/ppc405* +F: tests/avocado/ppc_405.py Bamboo L: qemu-ppc@nongnu.org @@ -1373,6 +1392,7 @@ e500 L: qemu-ppc@nongnu.org S: Orphan F: hw/ppc/e500* +F: hw/ppc/ppce500_spin.c F: hw/gpio/mpc8xxx.c F: hw/i2c/mpc_i2c.c F: hw/net/fsl_etsec/ @@ -1380,8 +1400,9 @@ F: hw/pci-host/ppce500.c F: include/hw/ppc/ppc_e500.h F: include/hw/pci-host/ppce500.h F: pc-bios/u-boot.e500 -F: hw/intc/openpic_kvm.h +F: hw/intc/openpic_kvm.c F: include/hw/ppc/openpic_kvm.h +F: docs/system/ppc/ppce500.rst mpc8544ds L: qemu-ppc@nongnu.org @@ -1401,6 +1422,7 @@ F: hw/pci-bridge/dec.[hc] F: hw/misc/macio/ F: hw/misc/mos6522.c F: hw/nvram/mac_nvram.c +F: hw/ppc/fw_cfg.c F: hw/input/adb* F: include/hw/misc/macio/ F: include/hw/misc/mos6522.h @@ -1454,6 +1476,10 @@ F: hw/*/spapr* F: include/hw/*/spapr* F: hw/*/xics* F: include/hw/*/xics* +F: include/hw/ppc/fdt.h +F: hw/ppc/fdt.c +F: include/hw/ppc/pef.h +F: hw/ppc/pef.c F: pc-bios/slof.bin F: docs/system/ppc/pseries.rst F: docs/specs/ppc-spapr-* @@ -1491,6 +1517,7 @@ M: BALATON Zoltan <balaton@eik.bme.hu> L: qemu-ppc@nongnu.org S: Maintained F: hw/ppc/sam460ex.c +F: hw/ppc/ppc440_uc.c F: hw/ppc/ppc440_pcix.c F: hw/display/sm501* F: hw/ide/sii3112.c @@ -1710,6 +1737,16 @@ F: hw/s390x/event-facility.c F: hw/s390x/sclp*.c L: qemu-s390x@nongnu.org +S390 CPU topology +M: Nina Schoetterl-Glausch <nsg@linux.ibm.com> +S: Supported +F: include/hw/s390x/cpu-topology.h +F: hw/s390x/cpu-topology.c +F: target/s390x/kvm/stsi-topology.c +F: docs/devel/s390-cpu-topology.rst +F: docs/system/s390x/cpu-topology.rst +F: tests/avocado/s390_topology.py + X86 Machines ------------ PC @@ -1794,6 +1831,7 @@ F: hw/core/null-machine.c F: hw/core/numa.c F: hw/cpu/cluster.c F: qapi/machine.json +F: qapi/machine-common.json F: qapi/machine-target.json F: include/hw/boards.h F: include/hw/core/cpu.h @@ -1979,7 +2017,9 @@ F: docs/specs/acpi_hest_ghes.rst ppc4xx L: qemu-ppc@nongnu.org S: Orphan -F: hw/ppc/ppc4*.c +F: hw/ppc/ppc4xx*.c +F: hw/ppc/ppc440_uc.c +F: hw/ppc/ppc440.h F: hw/i2c/ppc4xx_i2c.c F: include/hw/ppc/ppc4xx.h F: include/hw/i2c/ppc4xx_i2c.h @@ -2482,6 +2522,15 @@ S: Maintained F: hw/isa/piix.c F: include/hw/southbridge/piix.h +VIA South Bridges (VT82C686B, VT8231) +M: BALATON Zoltan <balaton@eik.bme.hu> +M: Philippe Mathieu-Daudé <philmd@linaro.org> +R: Jiaxun Yang <jiaxun.yang@flygoat.com> +S: Maintained +F: hw/isa/vt82c686.c +F: hw/usb/vt82c686-uhci-pci.c +F: include/hw/isa/vt82c686.h + Firmware configuration (fw_cfg) M: Philippe Mathieu-Daudé <philmd@linaro.org> R: Gerd Hoffmann <kraxel@redhat.com> @@ -2575,7 +2624,7 @@ M: Halil Pasic <pasic@linux.ibm.com> M: Christian Borntraeger <borntraeger@linux.ibm.com> S: Supported F: hw/s390x/storage-keys.h -F: hw/390x/s390-skeys*.c +F: hw/s390x/s390-skeys*.c L: qemu-s390x@nongnu.org S390 storage attribute device @@ -2583,7 +2632,7 @@ M: Halil Pasic <pasic@linux.ibm.com> M: Christian Borntraeger <borntraeger@linux.ibm.com> S: Supported F: hw/s390x/storage-attributes.h -F: hw/s390/s390-stattrib*.c +F: hw/s390x/s390-stattrib*.c L: qemu-s390x@nongnu.org S390 floating interrupt controller @@ -2928,7 +2977,7 @@ F: include/qemu/main-loop.h F: include/sysemu/runstate.h F: include/sysemu/runstate-action.h F: util/main-loop.c -F: util/qemu-timer.c +F: util/qemu-timer*.c F: system/vl.c F: system/main.c F: system/cpus.c @@ -3913,6 +3962,7 @@ M: Jason Wang <jasowang@redhat.com> R: Andrew Melnychenko <andrew@daynix.com> R: Yuri Benditovich <yuri.benditovich@daynix.com> S: Maintained +F: docs/devel/ebpf_rss.rst F: ebpf/* F: tools/ebpf/* diff --git a/backends/tpm/tpm_emulator.c b/backends/tpm/tpm_emulator.c index 402a2d6312..bf1a90f5d7 100644 --- a/backends/tpm/tpm_emulator.c +++ b/backends/tpm/tpm_emulator.c @@ -534,11 +534,8 @@ static int tpm_emulator_block_migration(TPMEmulator *tpm_emu) error_setg(&tpm_emu->migration_blocker, "Migration disabled: TPM emulator does not support " "migration"); - if (migrate_add_blocker(tpm_emu->migration_blocker, &err) < 0) { + if (migrate_add_blocker(&tpm_emu->migration_blocker, &err) < 0) { error_report_err(err); - error_free(tpm_emu->migration_blocker); - tpm_emu->migration_blocker = NULL; - return -1; } } @@ -1016,10 +1013,7 @@ static void tpm_emulator_inst_finalize(Object *obj) qapi_free_TPMEmulatorOptions(tpm_emu->options); - if (tpm_emu->migration_blocker) { - migrate_del_blocker(tpm_emu->migration_blocker); - error_free(tpm_emu->migration_blocker); - } + migrate_del_blocker(&tpm_emu->migration_blocker); tpm_sized_buffer_reset(&state_blobs->volatil); tpm_sized_buffer_reset(&state_blobs->permanent); diff --git a/block/parallels.c b/block/parallels.c index 6b46623241..1d695ce7fb 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -1369,9 +1369,8 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, bdrv_get_device_or_node_name(bs)); bdrv_graph_rdunlock_main_loop(); - ret = migrate_add_blocker(s->migration_blocker, errp); + ret = migrate_add_blocker(&s->migration_blocker, errp); if (ret < 0) { - error_setg(errp, "Migration blocker error"); goto fail; } qemu_co_mutex_init(&s->lock); @@ -1406,7 +1405,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, ret = bdrv_check(bs, &res, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS); if (ret < 0) { error_setg_errno(errp, -ret, "Could not repair corrupted image"); - migrate_del_blocker(s->migration_blocker); + migrate_del_blocker(&s->migration_blocker); goto fail; } } @@ -1423,7 +1422,6 @@ fail: */ parallels_free_used_bitmap(bs); - error_free(s->migration_blocker); g_free(s->bat_dirty_bmap); qemu_vfree(s->header); return ret; @@ -1448,8 +1446,7 @@ static void parallels_close(BlockDriverState *bs) g_free(s->bat_dirty_bmap); qemu_vfree(s->header); - migrate_del_blocker(s->migration_blocker); - error_free(s->migration_blocker); + migrate_del_blocker(&s->migration_blocker); } static bool parallels_is_support_dirty_bitmaps(BlockDriverState *bs) diff --git a/block/qcow.c b/block/qcow.c index 38a16253b8..fdd4c83948 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -307,9 +307,8 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, bdrv_get_device_or_node_name(bs)); bdrv_graph_rdunlock_main_loop(); - ret = migrate_add_blocker(s->migration_blocker, errp); + ret = migrate_add_blocker(&s->migration_blocker, errp); if (ret < 0) { - error_free(s->migration_blocker); goto fail; } @@ -802,8 +801,7 @@ static void qcow_close(BlockDriverState *bs) g_free(s->cluster_cache); g_free(s->cluster_data); - migrate_del_blocker(s->migration_blocker); - error_free(s->migration_blocker); + migrate_del_blocker(&s->migration_blocker); } static int coroutine_fn GRAPH_UNLOCKED diff --git a/block/vdi.c b/block/vdi.c index 3ed43b6f35..fd7e365383 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -498,9 +498,8 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags, bdrv_get_device_or_node_name(bs)); bdrv_graph_rdunlock_main_loop(); - ret = migrate_add_blocker(s->migration_blocker, errp); + ret = migrate_add_blocker(&s->migration_blocker, errp); if (ret < 0) { - error_free(s->migration_blocker); goto fail_free_bmap; } @@ -988,8 +987,7 @@ static void vdi_close(BlockDriverState *bs) qemu_vfree(s->bmap); - migrate_del_blocker(s->migration_blocker); - error_free(s->migration_blocker); + migrate_del_blocker(&s->migration_blocker); } static int vdi_has_zero_init(BlockDriverState *bs) diff --git a/block/vhdx.c b/block/vhdx.c index 73cb214fb4..e37f8c0926 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -985,8 +985,7 @@ static void vhdx_close(BlockDriverState *bs) s->bat = NULL; qemu_vfree(s->parent_entries); s->parent_entries = NULL; - migrate_del_blocker(s->migration_blocker); - error_free(s->migration_blocker); + migrate_del_blocker(&s->migration_blocker); qemu_vfree(s->log.hdr); s->log.hdr = NULL; vhdx_region_unregister_all(s); @@ -1097,9 +1096,8 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, error_setg(&s->migration_blocker, "The vhdx format used by node '%s' " "does not support live migration", bdrv_get_device_or_node_name(bs)); - ret = migrate_add_blocker(s->migration_blocker, errp); + ret = migrate_add_blocker(&s->migration_blocker, errp); if (ret < 0) { - error_free(s->migration_blocker); goto fail; } diff --git a/block/vmdk.c b/block/vmdk.c index 8a3b152798..1335d39e16 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1386,9 +1386,8 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags, error_setg(&s->migration_blocker, "The vmdk format used by node '%s' " "does not support live migration", bdrv_get_device_or_node_name(bs)); - ret = migrate_add_blocker(s->migration_blocker, errp); + ret = migrate_add_blocker(&s->migration_blocker, errp); if (ret < 0) { - error_free(s->migration_blocker); goto fail; } @@ -2867,8 +2866,7 @@ static void vmdk_close(BlockDriverState *bs) vmdk_free_extents(bs); g_free(s->create_type); - migrate_del_blocker(s->migration_blocker); - error_free(s->migration_blocker); + migrate_del_blocker(&s->migration_blocker); } static int64_t coroutine_fn GRAPH_RDLOCK diff --git a/block/vpc.c b/block/vpc.c index 945847fe4a..c30cf8689a 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -452,9 +452,8 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, bdrv_get_device_or_node_name(bs)); bdrv_graph_rdunlock_main_loop(); - ret = migrate_add_blocker(s->migration_blocker, errp); + ret = migrate_add_blocker(&s->migration_blocker, errp); if (ret < 0) { - error_free(s->migration_blocker); goto fail; } @@ -1190,8 +1189,7 @@ static void vpc_close(BlockDriverState *bs) g_free(s->pageentry_u8); #endif - migrate_del_blocker(s->migration_blocker); - error_free(s->migration_blocker); + migrate_del_blocker(&s->migration_blocker); } static QemuOptsList vpc_create_opts = { diff --git a/block/vvfat.c b/block/vvfat.c index b0415798c0..266e036dcd 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -1268,9 +1268,8 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags, "The vvfat (rw) format used by node '%s' " "does not support live migration", bdrv_get_device_or_node_name(bs)); - ret = migrate_add_blocker(s->migration_blocker, errp); + ret = migrate_add_blocker(&s->migration_blocker, errp); if (ret < 0) { - error_free(s->migration_blocker); goto fail; } } @@ -3239,8 +3238,7 @@ static void vvfat_close(BlockDriverState *bs) g_free(s->cluster_buffer); if (s->qcow) { - migrate_del_blocker(s->migration_blocker); - error_free(s->migration_blocker); + migrate_del_blocker(&s->migration_blocker); } } diff --git a/chardev/msmouse.c b/chardev/msmouse.c index ab8fe981d6..a774c397b4 100644 --- a/chardev/msmouse.c +++ b/chardev/msmouse.c @@ -171,7 +171,7 @@ static int msmouse_chr_write(struct Chardev *s, const uint8_t *buf, int len) return len; } -static QemuInputHandler msmouse_handler = { +static const QemuInputHandler msmouse_handler = { .name = "QEMU Microsoft Mouse", .mask = INPUT_EVENT_MASK_BTN | INPUT_EVENT_MASK_REL, .event = msmouse_input_event, diff --git a/chardev/wctablet.c b/chardev/wctablet.c index 43bdf6b608..f4008bf35b 100644 --- a/chardev/wctablet.c +++ b/chardev/wctablet.c @@ -178,7 +178,7 @@ static void wctablet_input_sync(DeviceState *dev) } } -static QemuInputHandler wctablet_handler = { +static const QemuInputHandler wctablet_handler = { .name = "QEMU Wacom Pen Tablet", .mask = INPUT_EVENT_MASK_BTN | INPUT_EVENT_MASK_ABS, .event = wctablet_input_event, diff --git a/configs/meson/windows.txt b/configs/meson/windows.txt new file mode 100644 index 0000000000..55b192e71b --- /dev/null +++ b/configs/meson/windows.txt @@ -0,0 +1,9 @@ +# target-specific defaults, can still be overridden on +# the command line + +[built-in options] +bindir = '' +prefix = '/qemu' + +[project options] +qemu_suffix = '' diff --git a/configure b/configure index 96d0dd5ffc..b4ea78c77d 100755 --- a/configure +++ b/configure @@ -94,7 +94,7 @@ quote_sh() { printf "%s" "$1" | sed "s,','\\\\'',g; s,.*,'&'," } -print_error() { +error_exit() { (echo echo "ERROR: $1" while test -n "$2"; do @@ -102,10 +102,6 @@ print_error() { shift done echo) >&2 -} - -error_exit() { - print_error "$@" exit 1 } @@ -248,11 +244,8 @@ done default_cflags='-O2 -g' git_submodules_action="update" -git="git" docs="auto" EXESUF="" -prefix="/usr/local" -qemu_suffix="qemu" system="yes" linux_user="" bsd_user="" @@ -261,12 +254,10 @@ subdirs="" ninja="" python= download="enabled" -bindir="bin" skip_meson=no use_containers="yes" gdb_bin=$(command -v "gdb-multiarch" || command -v "gdb") gdb_arches="" -werror="" # Don't accept a target_list environment variable. unset target_list @@ -322,7 +313,6 @@ objcopy="${OBJCOPY-${cross_prefix}objcopy}" ld="${LD-${cross_prefix}ld}" ranlib="${RANLIB-${cross_prefix}ranlib}" nm="${NM-${cross_prefix}nm}" -smbd="$SMBD" strip="${STRIP-${cross_prefix}strip}" widl="${WIDL-${cross_prefix}widl}" windres="${WINDRES-${cross_prefix}windres}" @@ -373,18 +363,6 @@ else targetos=bogus fi -# OS specific - -case $targetos in -windows) - plugins="no" - pie="no" -;; -haiku) - pie="no" -;; -esac - if test ! -z "$cpu" ; then # command line argument : @@ -600,9 +578,6 @@ done if test "$targetos" = "windows" ; then EXESUF=".exe" - prefix="/qemu" - bindir="" - qemu_suffix="" fi meson_option_build_array() { @@ -625,7 +600,10 @@ meson_option_build_array() { meson_options= meson_option_add() { - meson_options="$meson_options $(quote_sh "$1")" + local arg + for arg; do + meson_options="$meson_options $(quote_sh "$arg")" + done } meson_option_parse() { meson_options="$meson_options $(_meson_option_parse "$@")" @@ -636,6 +614,14 @@ meson_option_parse() { fi } +meson_add_machine_file() { + if test "$cross_compile" = "yes"; then + meson_option_add --cross-file "$1" + else + meson_option_add --native-file "$1" + fi +} + for opt do optarg=$(expr "x$opt" : 'x[^=]*=\(.*\)') case "$opt" in @@ -643,8 +629,6 @@ for opt do ;; --version|-V) exec cat "$source_path/VERSION" ;; - --prefix=*) prefix="$optarg" - ;; --cross-prefix=*) ;; --cc=*) @@ -665,8 +649,6 @@ for opt do ;; --ninja=*) ninja="$optarg" ;; - --smbd=*) smbd="$optarg" - ;; --extra-cflags=*) ;; --extra-cxxflags=*) @@ -715,10 +697,6 @@ for opt do ;; --static) static="yes" ;; - --bindir=*) bindir="$optarg" - ;; - --with-suffix=*) qemu_suffix="$optarg" - ;; --host=*|--build=*|\ --disable-dependency-tracking|\ --sbindir=*|--sharedstatedir=*|\ @@ -738,7 +716,6 @@ for opt do default_cflags='-O0 -g' ;; --disable-tcg) tcg="disabled" - plugins="no" ;; --enable-tcg) tcg="enabled" ;; @@ -763,25 +740,15 @@ for opt do ;; --disable-pie) pie="no" ;; - --enable-werror) werror="yes" + --enable-cfi) cfi=true ;; - --disable-werror) werror="no" - ;; - --enable-cfi) - cfi="true"; - meson_option_add -Db_lto=true - ;; - --disable-cfi) cfi="false" + --disable-cfi) cfi=false ;; --disable-download) download="disabled"; git_submodules_action=validate; ;; --enable-download) download="enabled"; git_submodules_action=update; ;; - --enable-plugins) if test "$targetos" = "windows"; then - error_exit "TCG plugins not currently supported on Windows platforms" - else - plugins="yes" - fi + --enable-plugins) plugins="yes" ;; --disable-plugins) plugins="no" ;; @@ -807,11 +774,6 @@ then git_submodules_action="validate" fi -# test for any invalid configuration combinations -if test "$plugins" = "yes" -a "$tcg" = "disabled"; then - error_exit "Can't enable plugins on non-TCG builds" -fi - if ! test -f "$source_path/subprojects/keycodemapdb/README" \ && test "$download" = disabled then @@ -887,7 +849,6 @@ Options: [defaults in brackets after descriptions] Standard options: --help print this message - --prefix=PREFIX install in PREFIX [$prefix] --target-list=LIST set target list (default: build all) $(echo Available targets: $default_target_list | \ fold -s -w 53 | sed -e 's/^/ /') @@ -910,20 +871,14 @@ Advanced options (experts only): --cross-prefix-ARCH=PREFIX cross compiler prefix when building ARCH guest test cases --python=PYTHON use specified python [$python] --ninja=NINJA use specified ninja [$ninja] - --smbd=SMBD use specified smbd [$smbd] --static enable static build [$static] - --bindir=PATH install binaries in PATH - --with-suffix=SUFFIX suffix for QEMU data inside datadir/libdir/sysconfdir/docdir [$qemu_suffix] --without-default-features default all --enable-* options to "disabled" --without-default-devices do not include any device that is not needed to start the emulator (only use if you are including desired devices in configs/devices/) --with-devices-ARCH=NAME override default configs/devices --enable-debug enable common debug build options - --disable-werror disable compilation abort on warning --cpu=CPU Build for host CPU [$cpu] - --enable-plugins - enable plugins via shared library loading --disable-containers don't use containers for cross-building --container-engine=TYPE which container engine to use [$container_engine] --gdb=GDB-path gdb to use for gdbstub tests [$gdb_bin] @@ -1046,17 +1001,6 @@ if test -z "$ninja"; then fi fi -# Consult white-list to determine whether to enable werror -# by default. Only enable by default for git builds -if test -z "$werror" ; then - if test -e "$source_path/.git" && \ - { test "$targetos" = linux || test "$targetos" = "windows"; }; then - werror="yes" - else - werror="no" - fi -fi - if test "$targetos" = "bogus"; then # Now that we know that we're not printing the help and that # the compiler works (so the results of the check_defines we used @@ -1065,15 +1009,27 @@ if test "$targetos" = "bogus"; then error_exit "Unrecognized host OS (uname -s reports '$(uname -s)')" fi +# test for any invalid configuration combinations +if test "$targetos" = "windows"; then + if test "$plugins" = "yes"; then + error_exit "TCG plugins not currently supported on Windows platforms" + fi + plugins="no" +fi +if test "$tcg" = "disabled" ; then + if test "$plugins" = "yes"; then + error_exit "Can't enable plugins on non-TCG builds" + fi + plugins="no" +fi if test "$static" = "yes" ; then if test "$plugins" = "yes"; then error_exit "static and plugins are mutually incompatible" - else - plugins="no" fi + plugins="no" fi -test "$plugins" = "" && plugins=yes -if test "$plugins" = "yes"; then +if test "$plugins" != "no"; then + plugins=yes subdirs="$subdirs contrib/plugins" fi @@ -1088,19 +1044,23 @@ static THREAD int tls_var; int main(void) { return tls_var; } EOF -if test "$static" = "yes"; then - if test "$pie" != "no" && compile_prog "-Werror -fPIE -DPIE" "-static-pie"; then - pie="yes" - elif test "$pie" = "yes"; then - error_exit "-static-pie not available due to missing toolchain support" +if test "$targetos" = windows || test "$targetos" = haiku; then + if test "$pie" = "yes"; then + error_exit "PIE not available due to missing OS support" + fi + pie=no +fi + +if test "$pie" != "no"; then + if test "$static" = "yes"; then + pie_ldflags=-static-pie else - pie="no" + pie_ldflags=-pie fi -elif test "$pie" != "no"; then - if compile_prog "-Werror -fPIE -DPIE" "-pie"; then + if compile_prog "-Werror -fPIE -DPIE" "$pie_ldflags"; then pie="yes" elif test "$pie" = "yes"; then - error_exit "PIE not available due to missing toolchain support" + error_exit "-static-pie not available due to missing toolchain support" else echo "Disabling PIE due to missing toolchain support" pie="no" @@ -1139,6 +1099,18 @@ if test "$tcg" = "auto"; then fi fi +######################################### +# gdb test + +if test -n "$gdb_bin"; then + gdb_version=$($gdb_bin --version | head -n 1) + if version_ge ${gdb_version##* } 9.1; then + gdb_arches=$($python "$source_path/scripts/probe-gdb-support.py" $gdb_bin) + else + gdb_bin="" + fi +fi + ########################################## # big/little endian test cat > $TMPC << EOF @@ -1166,34 +1138,6 @@ EOF fi fi -######################################## -# check if ccache is interfering with -# semantic analysis of macros - -unset CCACHE_CPP2 -ccache_cpp2=no -cat > $TMPC << EOF -static const int Z = 1; -#define fn() ({ Z; }) -#define TAUT(X) ((X) == Z) -#define PAREN(X, Y) (X == Y) -#define ID(X) (X) -int main(void) -{ - int x = 0, y = 0; - x = ID(x); - x = fn(); - fn(); - if (PAREN(x, y)) return 0; - if (TAUT(Z)) return 0; - return 0; -} -EOF - -if ! compile_object "-Werror"; then - ccache_cpp2=yes -fi - ########################################## # functions to probe cross compilers @@ -1337,10 +1281,6 @@ probe_target_compiler() { container_cross_prefix=aarch64-linux-gnu- container_cross_cc=${container_cross_prefix}gcc ;; - alpha) - container_image=debian-alpha-cross - container_cross_prefix=alpha-linux-gnu- - ;; arm) # We don't have any bigendian build tools so we only use this for ARM container_image=debian-armhf-cross @@ -1355,10 +1295,6 @@ probe_target_compiler() { container_cross_prefix=hexagon-unknown-linux-musl- container_cross_cc=${container_cross_prefix}clang ;; - hppa) - container_image=debian-hppa-cross - container_cross_prefix=hppa-linux-gnu- - ;; i386) container_image=fedora-i386-cross container_cross_prefix= @@ -1367,10 +1303,6 @@ probe_target_compiler() { container_image=debian-loongarch-cross container_cross_prefix=loongarch64-unknown-linux-gnu- ;; - m68k) - container_image=debian-m68k-cross - container_cross_prefix=m68k-linux-gnu- - ;; microblaze) container_image=debian-microblaze-cross container_cross_prefix=microblaze-linux-musl- @@ -1383,14 +1315,6 @@ probe_target_compiler() { container_image=debian-mips64-cross container_cross_prefix=mips64-linux-gnuabi64- ;; - mipsel) - container_image=debian-mipsel-cross - container_cross_prefix=mipsel-linux-gnu- - ;; - mips) - container_image=debian-mips-cross - container_cross_prefix=mips-linux-gnu- - ;; nios2) container_image=debian-nios2-cross container_cross_prefix=nios2-linux-gnu- @@ -1405,22 +1329,6 @@ probe_target_compiler() { container_cross_prefix=powerpc${target_arch#ppc}-linux-gnu- container_cross_cc=${container_cross_prefix}gcc-10 ;; - riscv64) - container_image=debian-riscv64-test-cross - container_cross_prefix=riscv64-linux-gnu- - ;; - s390x) - container_image=debian-s390x-cross - container_cross_prefix=s390x-linux-gnu- - ;; - sh4) - container_image=debian-sh4-cross - container_cross_prefix=sh4-linux-gnu- - ;; - sparc64) - container_image=debian-sparc64-cross - container_cross_prefix=sparc64-linux-gnu- - ;; tricore) container_image=debian-tricore-cross container_cross_prefix=tricore- @@ -1435,6 +1343,11 @@ probe_target_compiler() { # default to the dc232b cpu container_cross_prefix=/opt/2020.07/xtensa-dc232b-elf/bin/xtensa-dc232b-elf- ;; + *) + # Debian and GNU architecture names usually match + container_image=debian-$target_arch-cross + container_cross_prefix=$target_arch-linux-gnu- + ;; esac : ${container_cross_cc:=${container_cross_prefix}gcc} : ${container_cross_ar:=${container_cross_prefix}ar} @@ -1679,25 +1592,9 @@ echo >> $config_host_mak echo all: >> $config_host_mak -if test "$targetos" = "windows"; then - echo "QEMU_GA_MANUFACTURER=${QEMU_GA_MANUFACTURER-QEMU}" >> $config_host_mak - echo "QEMU_GA_DISTRO=${QEMU_GA_DISTRO-Linux}" >> $config_host_mak - echo "QEMU_GA_VERSION=${QEMU_GA_VERSION-$(cat "$source_path"/VERSION)}" >> $config_host_mak -fi - echo "SRC_PATH=$source_path" >> $config_host_mak echo "TARGET_DIRS=$target_list" >> $config_host_mak - -if test -n "$gdb_bin"; then - gdb_version=$($gdb_bin --version | head -n 1) - if version_ge ${gdb_version##* } 9.1; then - echo "HAVE_GDB_BIN=$gdb_bin" >> $config_host_mak - gdb_arches=$($python "$source_path/scripts/probe-gdb-support.py" $gdb_bin) - else - gdb_bin="" - fi -fi - +echo "GDB=$gdb_bin" >> $config_host_mak if test "$container" != no; then echo "RUNC=$runc" >> $config_host_mak fi @@ -1726,10 +1623,6 @@ if test "$default_targets" = "yes"; then echo "CONFIG_DEFAULT_TARGETS=y" >> $config_host_mak fi -if test "$ccache_cpp2" = "yes"; then - echo "export CCACHE_CPP2=y" >> $config_host_mak -fi - # contrib/plugins configuration echo "# Automatically generated by configure - do not modify" > contrib/plugins/$config_host_mak echo "SRC_PATH=$source_path/contrib/plugins" >> contrib/plugins/$config_host_mak @@ -1746,14 +1639,6 @@ mkdir -p tests/tcg echo "# Automatically generated by configure - do not modify" > $config_host_mak echo "SRC_PATH=$source_path" >> $config_host_mak -# versioned checked in the main config_host.mak above -if test -n "$gdb_bin"; then - echo "HAVE_GDB_BIN=$gdb_bin" >> $config_host_mak -fi -if test "$plugins" = "yes" ; then - echo "CONFIG_PLUGIN=y" >> $config_host_mak -fi - tcg_tests_targets= for target in $target_list; do arch=${target%%-*} @@ -1786,7 +1671,7 @@ for target in $target_list; do # will GDB work with these binaries? if test "${gdb_arches#*$arch}" != "$gdb_arches"; then - echo "HOST_GDB_SUPPORTS_ARCH=y" >> "$config_target_mak" + echo "GDB=$gdb_bin" >> $config_target_mak fi echo "run-tcg-tests-$target: $qemu\$(EXESUF)" >> Makefile.prereqs @@ -1821,6 +1706,29 @@ if test "$skip_meson" = no; then test -n "$objcc" && echo "objc_args = [$(meson_quote $OBJCFLAGS $EXTRA_OBJCFLAGS)]" >> $cross echo "c_link_args = [$(meson_quote $CFLAGS $LDFLAGS $EXTRA_CFLAGS $EXTRA_LDFLAGS)]" >> $cross echo "cpp_link_args = [$(meson_quote $CXXFLAGS $LDFLAGS $EXTRA_CXXFLAGS $EXTRA_LDFLAGS)]" >> $cross + + # Only enable by default for git builds and on select OSes + echo "# environment defaults, can still be overridden on " >> $cross + echo "# the command line" >> $cross + if test -e "$source_path/.git" && \ + { test "$targetos" = linux || test "$targetos" = "windows"; }; then + echo 'werror = true' >> $cross + fi + echo "[project options]" >> $cross + if test "$SMBD" != ''; then + echo "smbd = $(meson_quote "$SMBD")" >> $cross + fi + if test "${QEMU_GA_MANUFACTURER}" != ''; then + echo "qemu_ga_manufacturer = $(meson_quote "${QEMU_GA_MANUFACTURER}")" >> $cross + fi + if test "${QEMU_GA_DISTRO}" != ''; then + echo "qemu_ga_distro = $(meson_quote "${QEMU_GA_DISTRO}")" >> $cross + fi + if test "${QEMU_GA_VERSION}" != ''; then + echo "qemu_ga_version = $(meson_quote "${QEMU_GA_VERSION}")" >> $cross + fi + + echo >> $cross echo "[binaries]" >> $cross echo "c = [$(meson_quote $cc $CPU_CFLAGS)]" >> $cross test -n "$cxx" && echo "cpp = [$(meson_quote $cxx $CPU_CFLAGS)]" >> $cross @@ -1828,6 +1736,7 @@ if test "$skip_meson" = no; then echo "ar = [$(meson_quote $ar)]" >> $cross echo "nm = [$(meson_quote $nm)]" >> $cross echo "pkgconfig = [$(meson_quote $pkg_config)]" >> $cross + echo "pkg-config = [$(meson_quote $pkg_config)]" >> $cross echo "ranlib = [$(meson_quote $ranlib)]" >> $cross if has $sdl2_config; then echo "sdl2-config = [$(meson_quote $sdl2_config)]" >> $cross @@ -1853,39 +1762,36 @@ if test "$skip_meson" = no; then else echo "endian = 'little'" >> $cross fi - cross_arg="--cross-file config-meson.cross" native="config-meson.native.new" echo "# Automatically generated by configure - do not modify" > $native echo "[binaries]" >> $native echo "c = [$(meson_quote $host_cc)]" >> $native mv $native config-meson.native - cross_arg="$cross_arg --native-file config-meson.native" - else - cross_arg="--native-file config-meson.cross" + meson_option_add --native-file + meson_option_add config-meson.native fi mv $cross config-meson.cross + meson_add_machine_file config-meson.cross + if test -f "$source_path/configs/meson/$targetos.txt"; then + meson_add_machine_file $source_path/configs/meson/$targetos.txt + fi rm -rf meson-private meson-info meson-logs - # Built-in options test "$download" = "disabled" && meson_option_add "--wrap-mode=nodownload" - test "$bindir" != "bin" && meson_option_add "-Dbindir=$bindir" test "$default_feature" = no && meson_option_add -Dauto_features=disabled test "$static" = yes && meson_option_add -Dprefer_static=true test "$pie" = no && meson_option_add -Db_pie=false - test "$werror" = yes && meson_option_add -Dwerror=true # QEMU options - test "$cfi" != false && meson_option_add "-Dcfi=$cfi" + test "$cfi" != false && meson_option_add "-Dcfi=$cfi" "-Db_lto=$cfi" test "$docs" != auto && meson_option_add "-Ddocs=$docs" test -n "${LIB_FUZZING_ENGINE+xxx}" && meson_option_add "-Dfuzzing_engine=$LIB_FUZZING_ENGINE" test "$plugins" = yes && meson_option_add "-Dplugins=true" - test "$qemu_suffix" != qemu && meson_option_add "-Dqemu_suffix=$qemu_suffix" - test "$smbd" != '' && meson_option_add "-Dsmbd=$smbd" test "$tcg" != enabled && meson_option_add "-Dtcg=$tcg" run_meson() { - NINJA=$ninja $meson setup --prefix "$prefix" "$@" $cross_arg "$PWD" "$source_path" + NINJA=$ninja $meson setup "$@" "$PWD" "$source_path" } eval run_meson $meson_options if test "$?" -ne 0 ; then diff --git a/contrib/elf2dmp/addrspace.c b/contrib/elf2dmp/addrspace.c index 64b5d680ad..6f608a517b 100644 --- a/contrib/elf2dmp/addrspace.c +++ b/contrib/elf2dmp/addrspace.c @@ -72,10 +72,7 @@ int pa_space_create(struct pa_space *ps, QEMU_Elf *qemu_elf) } } - ps->block = malloc(sizeof(*ps->block) * ps->block_nr); - if (!ps->block) { - return 1; - } + ps->block = g_new(struct pa_block, ps->block_nr); for (i = 0; i < phdr_nr; i++) { if (phdr[i].p_type == PT_LOAD) { @@ -97,7 +94,7 @@ int pa_space_create(struct pa_space *ps, QEMU_Elf *qemu_elf) void pa_space_destroy(struct pa_space *ps) { ps->block_nr = 0; - free(ps->block); + g_free(ps->block); } void va_space_set_dtb(struct va_space *vs, uint64_t dtb) diff --git a/contrib/elf2dmp/main.c b/contrib/elf2dmp/main.c index 5db163bdbe..cbc38a7c10 100644 --- a/contrib/elf2dmp/main.c +++ b/contrib/elf2dmp/main.c @@ -120,14 +120,11 @@ static KDDEBUGGER_DATA64 *get_kdbg(uint64_t KernBase, struct pdb_reader *pdb, } } - kdbg = malloc(kdbg_hdr.Size); - if (!kdbg) { - return NULL; - } + kdbg = g_malloc(kdbg_hdr.Size); if (va_space_rw(vs, KdDebuggerDataBlock, kdbg, kdbg_hdr.Size, 0)) { eprintf("Failed to extract entire KDBG\n"); - free(kdbg); + g_free(kdbg); return NULL; } @@ -478,7 +475,7 @@ static bool pe_check_pdb_name(uint64_t base, void *start_addr, } if (memcmp(&rsds->Signature, sign_rsds, sizeof(sign_rsds))) { - eprintf("CodeView signature is \'%.4s\', \'%s\' expected\n", + eprintf("CodeView signature is \'%.4s\', \'%.4s\' expected\n", rsds->Signature, sign_rsds); return false; } @@ -643,7 +640,7 @@ int main(int argc, char *argv[]) } out_kdbg: - free(kdbg); + g_free(kdbg); out_pdb: pdb_exit(&pdb); out_pdb_file: diff --git a/contrib/elf2dmp/pdb.c b/contrib/elf2dmp/pdb.c index 6ca5086f02..40991f5f4c 100644 --- a/contrib/elf2dmp/pdb.c +++ b/contrib/elf2dmp/pdb.c @@ -25,6 +25,10 @@ static uint32_t pdb_get_file_size(const struct pdb_reader *r, unsigned idx) { + if (idx >= r->ds.toc->num_files) { + return 0; + } + return r->ds.toc->file_size[idx]; } @@ -90,18 +94,18 @@ uint64_t pdb_resolve(uint64_t img_base, struct pdb_reader *r, const char *name) static void pdb_reader_ds_exit(struct pdb_reader *r) { - free(r->ds.toc); + g_free(r->ds.toc); } static void pdb_exit_symbols(struct pdb_reader *r) { - free(r->modimage); - free(r->symbols); + g_free(r->modimage); + g_free(r->symbols); } static void pdb_exit_segments(struct pdb_reader *r) { - free(r->segs); + g_free(r->segs); } static void *pdb_ds_read(const PDB_DS_HEADER *header, @@ -116,10 +120,7 @@ static void *pdb_ds_read(const PDB_DS_HEADER *header, nBlocks = (size + header->block_size - 1) / header->block_size; - buffer = malloc(nBlocks * header->block_size); - if (!buffer) { - return NULL; - } + buffer = g_malloc(nBlocks * header->block_size); for (i = 0; i < nBlocks; i++) { memcpy(buffer + i * header->block_size, (const char *)header + @@ -159,16 +160,17 @@ static void *pdb_ds_read_file(struct pdb_reader* r, uint32_t file_number) static int pdb_init_segments(struct pdb_reader *r) { - char *segs; unsigned stream_idx = r->segments; - segs = pdb_ds_read_file(r, stream_idx); - if (!segs) { + r->segs = pdb_ds_read_file(r, stream_idx); + if (!r->segs) { return 1; } - r->segs = segs; r->segs_size = pdb_get_file_size(r, stream_idx); + if (!r->segs_size) { + return 1; + } return 0; } @@ -201,7 +203,7 @@ static int pdb_init_symbols(struct pdb_reader *r) return 0; out_symbols: - free(symbols); + g_free(symbols); return err; } @@ -258,7 +260,7 @@ static int pdb_reader_init(struct pdb_reader *r, void *data) out_sym: pdb_exit_symbols(r); out_root: - free(r->ds.root); + g_free(r->ds.root); out_ds: pdb_reader_ds_exit(r); @@ -269,7 +271,7 @@ static void pdb_reader_exit(struct pdb_reader *r) { pdb_exit_segments(r); pdb_exit_symbols(r); - free(r->ds.root); + g_free(r->ds.root); pdb_reader_ds_exit(r); } diff --git a/contrib/elf2dmp/qemu_elf.c b/contrib/elf2dmp/qemu_elf.c index de6ad744c6..055e6f8792 100644 --- a/contrib/elf2dmp/qemu_elf.c +++ b/contrib/elf2dmp/qemu_elf.c @@ -94,10 +94,7 @@ static int init_states(QEMU_Elf *qe) printf("%zu CPU states has been found\n", cpu_nr); - qe->state = malloc(sizeof(*qe->state) * cpu_nr); - if (!qe->state) { - return 1; - } + qe->state = g_new(QEMUCPUState*, cpu_nr); cpu_nr = 0; @@ -115,7 +112,7 @@ static int init_states(QEMU_Elf *qe) static void exit_states(QEMU_Elf *qe) { - free(qe->state); + g_free(qe->state); } static bool check_ehdr(QEMU_Elf *qe) diff --git a/docs/devel/index-internals.rst b/docs/devel/index-internals.rst index e1a93df263..6f81df92bc 100644 --- a/docs/devel/index-internals.rst +++ b/docs/devel/index-internals.rst @@ -14,6 +14,7 @@ Details about QEMU's various subsystems including how to add features to them. migration multi-process reset + s390-cpu-topology s390-dasd-ipl tracing vfio-migration diff --git a/docs/devel/s390-cpu-topology.rst b/docs/devel/s390-cpu-topology.rst new file mode 100644 index 0000000000..9eab28d5e5 --- /dev/null +++ b/docs/devel/s390-cpu-topology.rst @@ -0,0 +1,170 @@ +QAPI interface for S390 CPU topology +==================================== + +The following sections will explain the QAPI interface for S390 CPU topology +with the help of exemplary output. +For this, let's assume that QEMU has been started with the following +command, defining 4 CPUs, where CPU[0] is defined by the -smp argument and will +have default values: + +.. code-block:: bash + + qemu-system-s390x \ + -enable-kvm \ + -cpu z14,ctop=on \ + -smp 1,drawers=3,books=3,sockets=2,cores=2,maxcpus=36 \ + -device z14-s390x-cpu,core-id=19,entitlement=high \ + -device z14-s390x-cpu,core-id=11,entitlement=low \ + -device z14-s390x-cpu,core-id=112,entitlement=high \ + ... + +Additions to query-cpus-fast +---------------------------- + +The command query-cpus-fast allows querying the topology tree and +modifiers for all configured vCPUs. + +.. code-block:: QMP + + { "execute": "query-cpus-fast" } + { + "return": [ + { + "dedicated": false, + "thread-id": 536993, + "props": { + "core-id": 0, + "socket-id": 0, + "drawer-id": 0, + "book-id": 0 + }, + "cpu-state": "operating", + "entitlement": "medium", + "qom-path": "/machine/unattached/device[0]", + "cpu-index": 0, + "target": "s390x" + }, + { + "dedicated": false, + "thread-id": 537003, + "props": { + "core-id": 19, + "socket-id": 1, + "drawer-id": 0, + "book-id": 2 + }, + "cpu-state": "operating", + "entitlement": "high", + "qom-path": "/machine/peripheral-anon/device[0]", + "cpu-index": 19, + "target": "s390x" + }, + { + "dedicated": false, + "thread-id": 537004, + "props": { + "core-id": 11, + "socket-id": 1, + "drawer-id": 0, + "book-id": 1 + }, + "cpu-state": "operating", + "entitlement": "low", + "qom-path": "/machine/peripheral-anon/device[1]", + "cpu-index": 11, + "target": "s390x" + }, + { + "dedicated": true, + "thread-id": 537005, + "props": { + "core-id": 112, + "socket-id": 0, + "drawer-id": 3, + "book-id": 2 + }, + "cpu-state": "operating", + "entitlement": "high", + "qom-path": "/machine/peripheral-anon/device[2]", + "cpu-index": 112, + "target": "s390x" + } + ] + } + + +QAPI command: set-cpu-topology +------------------------------ + +The command set-cpu-topology allows modifying the topology tree +or the topology modifiers of a vCPU in the configuration. + +.. code-block:: QMP + + { "execute": "set-cpu-topology", + "arguments": { + "core-id": 11, + "socket-id": 0, + "book-id": 0, + "drawer-id": 0, + "entitlement": "low", + "dedicated": false + } + } + {"return": {}} + +The core-id parameter is the only mandatory parameter and every +unspecified parameter keeps its previous value. + +QAPI event CPU_POLARIZATION_CHANGE +---------------------------------- + +When a guest requests a modification of the polarization, +QEMU sends a CPU_POLARIZATION_CHANGE event. + +When requesting the change, the guest only specifies horizontal or +vertical polarization. +It is the job of the entity administrating QEMU to set the dedication and fine +grained vertical entitlement in response to this event. + +Note that a vertical polarized dedicated vCPU can only have a high +entitlement, giving 6 possibilities for vCPU polarization: + +- Horizontal +- Horizontal dedicated +- Vertical low +- Vertical medium +- Vertical high +- Vertical high dedicated + +Example of the event received when the guest issues the CPU instruction +Perform Topology Function PTF(0) to request an horizontal polarization: + +.. code-block:: QMP + + { + "timestamp": { + "seconds": 1687870305, + "microseconds": 566299 + }, + "event": "CPU_POLARIZATION_CHANGE", + "data": { + "polarization": "horizontal" + } + } + +QAPI query command: query-s390x-cpu-polarization +------------------------------------------------ + +The query command query-s390x-cpu-polarization returns the current +CPU polarization of the machine. +In this case the guest previously issued a PTF(1) to request vertical polarization: + +.. code-block:: QMP + + { "execute": "query-s390x-cpu-polarization" } + { + "return": { + "polarization": "vertical" + } + } diff --git a/docs/sphinx/hxtool.py b/docs/sphinx/hxtool.py index fb0649a3d5..9f6b9d87dc 100644 --- a/docs/sphinx/hxtool.py +++ b/docs/sphinx/hxtool.py @@ -49,7 +49,7 @@ def serror(file, lnum, errtext): def parse_directive(line): """Return first word of line, if any""" - return re.split('\W', line)[0] + return re.split(r'\W', line)[0] def parse_defheading(file, lnum, line): """Handle a DEFHEADING directive""" diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst index 965cbf84c5..47fd648035 100644 --- a/docs/system/arm/emulation.rst +++ b/docs/system/arm/emulation.rst @@ -46,6 +46,7 @@ the following architecture extensions: - FEAT_HCX (Support for the HCRX_EL2 register) - FEAT_HPDS (Hierarchical permission disables) - FEAT_HPDS2 (Translation table page-based hardware attributes) +- FEAT_HPMN0 (Setting of MDCR_EL2.HPMN to zero) - FEAT_I8MM (AArch64 Int8 matrix multiplication instructions) - FEAT_IDST (ID space trap handling) - FEAT_IESB (Implicit error synchronization event) diff --git a/docs/system/s390x/cpu-topology.rst b/docs/system/s390x/cpu-topology.rst new file mode 100644 index 0000000000..5133fdc362 --- /dev/null +++ b/docs/system/s390x/cpu-topology.rst @@ -0,0 +1,244 @@ +.. _cpu-topology-s390x: + +CPU topology on s390x +===================== + +Since QEMU 8.2, CPU topology on s390x provides up to 3 levels of +topology containers: drawers, books and sockets. They define a +tree-shaped hierarchy. + +The socket container has one or more CPU entries. +Each of these CPU entries consists of a bitmap and three CPU attributes: + +- CPU type +- entitlement +- dedication + +Each bit set in the bitmap correspond to a core-id of a vCPU with matching +attributes. + +This documentation provides general information on S390 CPU topology, +how to enable it and explains the new CPU attributes. +For information on how to modify the S390 CPU topology and how to +monitor polarization changes, see ``docs/devel/s390-cpu-topology.rst``. + +Prerequisites +------------- + +To use the CPU topology, you need to run with KVM on a s390x host that +uses the Linux kernel v6.0 or newer (which provide the so-called +``KVM_CAP_S390_CPU_TOPOLOGY`` capability that allows QEMU to signal the +CPU topology facility via the so-called STFLE bit 11 to the VM). + +Enabling CPU topology +--------------------- + +Currently, CPU topology is only enabled in the host model by default. + +Enabling CPU topology in a CPU model is done by setting the CPU flag +``ctop`` to ``on`` as in: + +.. code-block:: bash + + -cpu gen16b,ctop=on + +Having the topology disabled by default allows migration between +old and new QEMU without adding new flags. + +Default topology usage +---------------------- + +The CPU topology can be specified on the QEMU command line +with the ``-smp`` or the ``-device`` QEMU command arguments. + +Note also that since 7.2 threads are no longer supported in the topology +and the ``-smp`` command line argument accepts only ``threads=1``. + +If none of the containers attributes (drawers, books, sockets) are +specified for the ``-smp`` flag, the number of these containers +is 1. + +Thus the following two options will result in the same topology: + +.. code-block:: bash + + -smp cpus=5,drawer=1,books=1,sockets=8,cores=4,maxcpus=32 + +and + +.. code-block:: bash + + -smp cpus=5,sockets=8,cores=4,maxcpus=32 + +When a CPU is defined by the ``-smp`` command argument, its position +inside the topology is calculated by adding the CPUs to the topology +based on the core-id starting with core-0 at position 0 of socket-0, +book-0, drawer-0 and filling all CPUs of socket-0 before filling socket-1 +of book-0 and so on up to the last socket of the last book of the last +drawer. + +When a CPU is defined by the ``-device`` command argument, the +tree topology attributes must all be defined or all not defined. + +.. code-block:: bash + + -device gen16b-s390x-cpu,drawer-id=1,book-id=1,socket-id=2,core-id=1 + +or + +.. code-block:: bash + + -device gen16b-s390x-cpu,core-id=1,dedicated=true + +If none of the tree attributes (drawer, book, sockets), are specified +for the ``-device`` argument, like for all CPUs defined with the ``-smp`` +command argument the topology tree attributes will be set by simply +adding the CPUs to the topology based on the core-id. + +QEMU will not try to resolve collisions and will report an error if the +CPU topology defined explicitly or implicitly on a ``-device`` +argument collides with the definition of a CPU implicitly defined +on the ``-smp`` argument. + +When the topology modifier attributes are not defined for the +``-device`` command argument they takes following default values: + +- dedicated: ``false`` +- entitlement: ``medium`` + + +Hot plug +++++++++ + +New CPUs can be plugged using the device_add hmp command as in: + +.. code-block:: bash + + (qemu) device_add gen16b-s390x-cpu,core-id=9 + +The placement of the CPU is derived from the core-id as described above. + +The topology can of course also be fully defined: + +.. code-block:: bash + + (qemu) device_add gen16b-s390x-cpu,drawer-id=1,book-id=1,socket-id=2,core-id=1 + + +Examples +++++++++ + +In the following machine we define 8 sockets with 4 cores each. + +.. code-block:: bash + + $ qemu-system-s390x -m 2G \ + -cpu gen16b,ctop=on \ + -smp cpus=5,sockets=8,cores=4,maxcpus=32 \ + -device host-s390x-cpu,core-id=14 \ + +A new CPUs can be plugged using the device_add hmp command as before: + +.. code-block:: bash + + (qemu) device_add gen16b-s390x-cpu,core-id=9 + +The core-id defines the placement of the core in the topology by +starting with core 0 in socket 0 up to maxcpus. + +In the example above: + +* There are 5 CPUs provided to the guest with the ``-smp`` command line + They will take the core-ids 0,1,2,3,4 + As we have 4 cores in a socket, we have 4 CPUs provided + to the guest in socket 0, with core-ids 0,1,2,3. + The last CPU, with core-id 4, will be on socket 1. + +* the core with ID 14 provided by the ``-device`` command line will + be placed in socket 3, with core-id 14 + +* the core with ID 9 provided by the ``device_add`` qmp command will + be placed in socket 2, with core-id 9 + + +Polarization, entitlement and dedication +---------------------------------------- + +Polarization +++++++++++++ + +The polarization affects how the CPUs of a shared host are utilized/distributed +among guests. +The guest determines the polarization by using the PTF instruction. + +Polarization defines two models of CPU provisioning: horizontal +and vertical. + +The horizontal polarization is the default model on boot and after +subsystem reset. When horizontal polarization is in effect all vCPUs should +have about equal resource provisioning. + +In the vertical polarization model vCPUs are unequal, but overall more resources +might be available. +The guest can make use of the vCPU entitlement information provided by the host +to optimize kernel thread scheduling. + +A subsystem reset puts all vCPU of the configuration into the +horizontal polarization. + +Entitlement ++++++++++++ + +The vertical polarization specifies that the guest's vCPU can get +different real CPU provisioning: + +- a vCPU with vertical high entitlement specifies that this + vCPU gets 100% of the real CPU provisioning. + +- a vCPU with vertical medium entitlement specifies that this + vCPU shares the real CPU with other vCPUs. + +- a vCPU with vertical low entitlement specifies that this + vCPU only gets real CPU provisioning when no other vCPUs needs it. + +In the case a vCPU with vertical high entitlement does not use +the real CPU, the unused "slack" can be dispatched to other vCPU +with medium or low entitlement. + +A vCPU can be "dedicated" in which case the vCPU is fully dedicated to a single +real CPU. + +The dedicated bit is an indication of affinity of a vCPU for a real CPU +while the entitlement indicates the sharing or exclusivity of use. + +Defining the topology on the command line +----------------------------------------- + +The topology can entirely be defined using -device cpu statements, +with the exception of CPU 0 which must be defined with the -smp +argument. + +For example, here we set the position of the cores 1,2,3 to +drawer 1, book 1, socket 2 and cores 0,9 and 14 to drawer 0, +book 0, socket 0 without defining entitlement or dedication. +Core 4 will be set on its default position on socket 1 +(since we have 4 core per socket) and we define it as dedicated and +with vertical high entitlement. + +.. code-block:: bash + + $ qemu-system-s390x -m 2G \ + -cpu gen16b,ctop=on \ + -smp cpus=1,sockets=8,cores=4,maxcpus=32 \ + \ + -device gen16b-s390x-cpu,drawer-id=1,book-id=1,socket-id=2,core-id=1 \ + -device gen16b-s390x-cpu,drawer-id=1,book-id=1,socket-id=2,core-id=2 \ + -device gen16b-s390x-cpu,drawer-id=1,book-id=1,socket-id=2,core-id=3 \ + \ + -device gen16b-s390x-cpu,drawer-id=0,book-id=0,socket-id=0,core-id=9 \ + -device gen16b-s390x-cpu,drawer-id=0,book-id=0,socket-id=0,core-id=14 \ + \ + -device gen16b-s390x-cpu,core-id=4,dedicated=on,entitlement=high + +The entitlement defined for the CPU 4 will only be used after the guest +successfully enables vertical polarization by using the PTF instruction. diff --git a/docs/system/target-s390x.rst b/docs/system/target-s390x.rst index f6f11433c7..94c981e732 100644 --- a/docs/system/target-s390x.rst +++ b/docs/system/target-s390x.rst @@ -34,3 +34,4 @@ Architectural features .. toctree:: s390x/bootdevices s390x/protvirt + s390x/cpu-topology diff --git a/dump/dump.c b/dump/dump.c index d3578ddc62..d355ada62e 100644 --- a/dump/dump.c +++ b/dump/dump.c @@ -111,7 +111,7 @@ static int dump_cleanup(DumpState *s) qemu_mutex_unlock_iothread(); } } - migrate_del_blocker(dump_migration_blocker); + migrate_del_blocker(&dump_migration_blocker); return 0; } @@ -2158,7 +2158,7 @@ void qmp_dump_guest_memory(bool paging, const char *file, * Allows even for -only-migratable, but forbid migration during the * process of dump guest memory. */ - if (migrate_add_blocker_internal(dump_migration_blocker, errp)) { + if (migrate_add_blocker_internal(&dump_migration_blocker, errp)) { /* Remember to release the fd before passing it over to dump state */ close(fd); return; diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index 323f042e65..af636cfb2d 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -406,11 +406,7 @@ static int coroutine_fn put_fid(V9fsPDU *pdu, V9fsFidState *fidp) * delete the migration blocker. Ideally, this * should be hooked to transport close notification */ - if (pdu->s->migration_blocker) { - migrate_del_blocker(pdu->s->migration_blocker); - error_free(pdu->s->migration_blocker); - pdu->s->migration_blocker = NULL; - } + migrate_del_blocker(&pdu->s->migration_blocker); } return free_fid(pdu, fidp); } @@ -1505,10 +1501,8 @@ static void coroutine_fn v9fs_attach(void *opaque) error_setg(&s->migration_blocker, "Migration is disabled when VirtFS export path '%s' is mounted in the guest using mount_tag '%s'", s->ctx.fs_root ? s->ctx.fs_root : "NULL", s->tag); - err = migrate_add_blocker(s->migration_blocker, NULL); + err = migrate_add_blocker(&s->migration_blocker, NULL); if (err < 0) { - error_free(s->migration_blocker); - s->migration_blocker = NULL; clunk_fid(s, fid); goto out; } diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c index cdd6f775a1..4f75c873e2 100644 --- a/hw/acpi/pcihp.c +++ b/hw/acpi/pcihp.c @@ -496,8 +496,7 @@ static const MemoryRegionOps acpi_pcihp_io_ops = { }; void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus, - MemoryRegion *address_space_io, - uint16_t io_base) + MemoryRegion *io, uint16_t io_base) { s->io_len = ACPI_PCIHP_SIZE; s->io_base = io_base; @@ -506,7 +505,7 @@ void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus, memory_region_init_io(&s->io, owner, &acpi_pcihp_io_ops, s, "acpi-pci-hotplug", s->io_len); - memory_region_add_subregion(address_space_io, s->io_base, &s->io); + memory_region_add_subregion(io, s->io_base, &s->io); object_property_add_uint16_ptr(owner, ACPI_PCIHP_IO_BASE_PROP, &s->io_base, OBJ_PROP_FLAG_READ); diff --git a/hw/arm/boot.c b/hw/arm/boot.c index 24fa169060..84ea6a807a 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -722,84 +722,35 @@ static void do_cpu_reset(void *opaque) cpu_set_pc(cs, entry); } else { - /* If we are booting Linux then we need to check whether we are - * booting into secure or non-secure state and adjust the state - * accordingly. Out of reset, ARM is defined to be in secure state - * (SCR.NS = 0), we change that here if non-secure boot has been - * requested. + /* + * If we are booting Linux then we might need to do so at: + * - AArch64 NS EL2 or NS EL1 + * - AArch32 Secure SVC (EL3) + * - AArch32 NS Hyp (EL2) + * - AArch32 NS SVC (EL1) + * Configure the CPU in the way boot firmware would do to + * drop us down to the appropriate level. */ - if (arm_feature(env, ARM_FEATURE_EL3)) { - /* AArch64 is defined to come out of reset into EL3 if enabled. - * If we are booting Linux then we need to adjust our EL as - * Linux expects us to be in EL2 or EL1. AArch32 resets into - * SVC, which Linux expects, so no privilege/exception level to - * adjust. - */ - if (env->aarch64) { - env->cp15.scr_el3 |= SCR_RW; - if (arm_feature(env, ARM_FEATURE_EL2)) { - env->cp15.hcr_el2 |= HCR_RW; - env->pstate = PSTATE_MODE_EL2h; - } else { - env->pstate = PSTATE_MODE_EL1h; - } - if (cpu_isar_feature(aa64_pauth, cpu)) { - env->cp15.scr_el3 |= SCR_API | SCR_APK; - } - if (cpu_isar_feature(aa64_mte, cpu)) { - env->cp15.scr_el3 |= SCR_ATA; - } - if (cpu_isar_feature(aa64_sve, cpu)) { - env->cp15.cptr_el[3] |= R_CPTR_EL3_EZ_MASK; - env->vfp.zcr_el[3] = 0xf; - } - if (cpu_isar_feature(aa64_sme, cpu)) { - env->cp15.cptr_el[3] |= R_CPTR_EL3_ESM_MASK; - env->cp15.scr_el3 |= SCR_ENTP2; - env->vfp.smcr_el[3] = 0xf; - } - if (cpu_isar_feature(aa64_hcx, cpu)) { - env->cp15.scr_el3 |= SCR_HXEN; - } - if (cpu_isar_feature(aa64_fgt, cpu)) { - env->cp15.scr_el3 |= SCR_FGTEN; - } + int target_el = arm_feature(env, ARM_FEATURE_EL2) ? 2 : 1; - /* AArch64 kernels never boot in secure mode */ - assert(!info->secure_boot); - /* This hook is only supported for AArch32 currently: - * bootloader_aarch64[] will not call the hook, and - * the code above has already dropped us into EL2 or EL1. - */ - assert(!info->secure_board_setup); - } - - if (arm_feature(env, ARM_FEATURE_EL2)) { - /* If we have EL2 then Linux expects the HVC insn to work */ - env->cp15.scr_el3 |= SCR_HCE; - } - - /* Set to non-secure if not a secure boot */ - if (!info->secure_boot && - (cs != first_cpu || !info->secure_board_setup)) { - /* Linux expects non-secure state */ - env->cp15.scr_el3 |= SCR_NS; - /* Set NSACR.{CP11,CP10} so NS can access the FPU */ - env->cp15.nsacr |= 3 << 10; - } - } - - if (!env->aarch64 && !info->secure_boot && - arm_feature(env, ARM_FEATURE_EL2)) { + if (env->aarch64) { /* - * This is an AArch32 boot not to Secure state, and - * we have Hyp mode available, so boot the kernel into - * Hyp mode. This is not how the CPU comes out of reset, - * so we need to manually put it there. + * AArch64 kernels never boot in secure mode, and we don't + * support the secure_board_setup hook for AArch64. */ - cpsr_write(env, ARM_CPU_MODE_HYP, CPSR_M, CPSRWriteRaw); + assert(!info->secure_boot); + assert(!info->secure_board_setup); + } else { + if (arm_feature(env, ARM_FEATURE_EL3) && + (info->secure_boot || + (info->secure_board_setup && cs == first_cpu))) { + /* Start this CPU in Secure SVC */ + target_el = 3; + } } + arm_emulate_firmware_reset(cs, target_el); + if (cs == first_cpu) { AddressSpace *as = arm_boot_address_space(cpu, info); diff --git a/hw/arm/meson.build b/hw/arm/meson.build index 11eb9112f8..a6feaf1af9 100644 --- a/hw/arm/meson.build +++ b/hw/arm/meson.build @@ -64,7 +64,6 @@ arm_ss.add(when: 'CONFIG_ARM_SMMUV3', if_true: files('smmuv3.c')) arm_ss.add(when: 'CONFIG_FSL_IMX6UL', if_true: files('fsl-imx6ul.c', 'mcimx6ul-evk.c')) arm_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_soc.c')) arm_ss.add(when: 'CONFIG_XEN', if_true: files('xen_arm.c')) -arm_ss.add_all(xen_ss) system_ss.add(when: 'CONFIG_ARM_SMMUV3', if_true: files('smmu-common.c')) system_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4_boards.c')) diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c index 3c7dfcd6dc..e8a82618f0 100644 --- a/hw/arm/sbsa-ref.c +++ b/hw/arm/sbsa-ref.c @@ -2,6 +2,7 @@ * ARM SBSA Reference Platform emulation * * Copyright (c) 2018 Linaro Limited + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. * Written by Hongbo Zhang <hongbo.zhang@linaro.org> * * This program is free software; you can redistribute it and/or modify it @@ -30,6 +31,7 @@ #include "exec/hwaddr.h" #include "kvm_arm.h" #include "hw/arm/boot.h" +#include "hw/arm/bsa.h" #include "hw/arm/fdt.h" #include "hw/arm/smmuv3.h" #include "hw/block/flash.h" @@ -55,14 +57,6 @@ #define NUM_SMMU_IRQS 4 #define NUM_SATA_PORTS 6 -#define VIRTUAL_PMU_IRQ 7 -#define ARCH_GIC_MAINT_IRQ 9 -#define ARCH_TIMER_VIRT_IRQ 11 -#define ARCH_TIMER_S_EL1_IRQ 13 -#define ARCH_TIMER_NS_EL1_IRQ 14 -#define ARCH_TIMER_NS_EL2_IRQ 10 -#define ARCH_TIMER_NS_EL2_VIRT_IRQ 12 - enum { SBSA_FLASH, SBSA_MEM, @@ -479,7 +473,7 @@ static void create_gic(SBSAMachineState *sms, MemoryRegion *mem) */ for (i = 0; i < smp_cpus; i++) { DeviceState *cpudev = DEVICE(qemu_get_cpu(i)); - int ppibase = NUM_IRQS + i * GIC_INTERNAL + GIC_NR_SGIS; + int intidbase = NUM_IRQS + i * GIC_INTERNAL; int irq; /* * Mapping from the output timer irq lines from the CPU to the @@ -496,14 +490,17 @@ static void create_gic(SBSAMachineState *sms, MemoryRegion *mem) for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) { qdev_connect_gpio_out(cpudev, irq, qdev_get_gpio_in(sms->gic, - ppibase + timer_irq[irq])); + intidbase + timer_irq[irq])); } qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt", 0, - qdev_get_gpio_in(sms->gic, ppibase + qdev_get_gpio_in(sms->gic, + intidbase + ARCH_GIC_MAINT_IRQ)); + qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0, - qdev_get_gpio_in(sms->gic, ppibase + qdev_get_gpio_in(sms->gic, + intidbase + VIRTUAL_PMU_IRQ)); sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ)); diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h index 648c2e37a2..6076025ad6 100644 --- a/hw/arm/smmuv3-internal.h +++ b/hw/arm/smmuv3-internal.h @@ -38,33 +38,71 @@ REG32(IDR0, 0x0) FIELD(IDR0, S1P, 1 , 1) FIELD(IDR0, TTF, 2 , 2) FIELD(IDR0, COHACC, 4 , 1) + FIELD(IDR0, BTM, 5 , 1) + FIELD(IDR0, HTTU, 6 , 2) + FIELD(IDR0, DORMHINT, 8 , 1) + FIELD(IDR0, HYP, 9 , 1) + FIELD(IDR0, ATS, 10, 1) + FIELD(IDR0, NS1ATS, 11, 1) FIELD(IDR0, ASID16, 12, 1) + FIELD(IDR0, MSI, 13, 1) + FIELD(IDR0, SEV, 14, 1) + FIELD(IDR0, ATOS, 15, 1) + FIELD(IDR0, PRI, 16, 1) + FIELD(IDR0, VMW, 17, 1) FIELD(IDR0, VMID16, 18, 1) + FIELD(IDR0, CD2L, 19, 1) + FIELD(IDR0, VATOS, 20, 1) FIELD(IDR0, TTENDIAN, 21, 2) + FIELD(IDR0, ATSRECERR, 23, 1) FIELD(IDR0, STALL_MODEL, 24, 2) FIELD(IDR0, TERM_MODEL, 26, 1) FIELD(IDR0, STLEVEL, 27, 2) + FIELD(IDR0, RME_IMPL, 30, 1) REG32(IDR1, 0x4) FIELD(IDR1, SIDSIZE, 0 , 6) + FIELD(IDR1, SSIDSIZE, 6 , 5) + FIELD(IDR1, PRIQS, 11, 5) FIELD(IDR1, EVENTQS, 16, 5) FIELD(IDR1, CMDQS, 21, 5) + FIELD(IDR1, ATTR_PERMS_OVR, 26, 1) + FIELD(IDR1, ATTR_TYPES_OVR, 27, 1) + FIELD(IDR1, REL, 28, 1) + FIELD(IDR1, QUEUES_PRESET, 29, 1) + FIELD(IDR1, TABLES_PRESET, 30, 1) + FIELD(IDR1, ECMDQ, 31, 1) #define SMMU_IDR1_SIDSIZE 16 #define SMMU_CMDQS 19 #define SMMU_EVENTQS 19 REG32(IDR2, 0x8) + FIELD(IDR2, BA_VATOS, 0, 10) + REG32(IDR3, 0xc) FIELD(IDR3, HAD, 2, 1); + FIELD(IDR3, PBHA, 3, 1); + FIELD(IDR3, XNX, 4, 1); + FIELD(IDR3, PPS, 5, 1); + FIELD(IDR3, MPAM, 7, 1); + FIELD(IDR3, FWB, 8, 1); + FIELD(IDR3, STT, 9, 1); FIELD(IDR3, RIL, 10, 1); FIELD(IDR3, BBML, 11, 2); + FIELD(IDR3, E0PD, 13, 1); + FIELD(IDR3, PTWNNC, 14, 1); + FIELD(IDR3, DPT, 15, 1); + REG32(IDR4, 0x10) + REG32(IDR5, 0x14) FIELD(IDR5, OAS, 0, 3); FIELD(IDR5, GRAN4K, 4, 1); FIELD(IDR5, GRAN16K, 5, 1); FIELD(IDR5, GRAN64K, 6, 1); + FIELD(IDR5, VAX, 10, 2); + FIELD(IDR5, STALL_MAX, 16, 16); #define SMMU_IDR5_OAS 4 diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c index 6f2b2bd45f..c3871ae067 100644 --- a/hw/arm/smmuv3.c +++ b/hw/arm/smmuv3.c @@ -278,15 +278,19 @@ static void smmuv3_init_regs(SMMUv3State *s) s->idr[1] = FIELD_DP32(s->idr[1], IDR1, EVENTQS, SMMU_EVENTQS); s->idr[1] = FIELD_DP32(s->idr[1], IDR1, CMDQS, SMMU_CMDQS); - s->idr[3] = FIELD_DP32(s->idr[3], IDR3, RIL, 1); s->idr[3] = FIELD_DP32(s->idr[3], IDR3, HAD, 1); + if (FIELD_EX32(s->idr[0], IDR0, S2P)) { + /* XNX is a stage-2-specific feature */ + s->idr[3] = FIELD_DP32(s->idr[3], IDR3, XNX, 1); + } + s->idr[3] = FIELD_DP32(s->idr[3], IDR3, RIL, 1); s->idr[3] = FIELD_DP32(s->idr[3], IDR3, BBML, 2); + s->idr[5] = FIELD_DP32(s->idr[5], IDR5, OAS, SMMU_IDR5_OAS); /* 44 bits */ /* 4K, 16K and 64K granule support */ s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN4K, 1); s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN16K, 1); s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN64K, 1); - s->idr[5] = FIELD_DP32(s->idr[5], IDR5, OAS, SMMU_IDR5_OAS); /* 44 bits */ s->cmdq.base = deposit64(s->cmdq.base, 0, 5, SMMU_CMDQS); s->cmdq.prod = 0; diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index 6b674231c2..9ce136cd88 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -601,21 +601,21 @@ build_gtdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) * The interrupt values are the same with the device tree when adding 16 */ /* Secure EL1 timer GSIV */ - build_append_int_noprefix(table_data, ARCH_TIMER_S_EL1_IRQ + 16, 4); + build_append_int_noprefix(table_data, ARCH_TIMER_S_EL1_IRQ, 4); /* Secure EL1 timer Flags */ build_append_int_noprefix(table_data, irqflags, 4); /* Non-Secure EL1 timer GSIV */ - build_append_int_noprefix(table_data, ARCH_TIMER_NS_EL1_IRQ + 16, 4); + build_append_int_noprefix(table_data, ARCH_TIMER_NS_EL1_IRQ, 4); /* Non-Secure EL1 timer Flags */ build_append_int_noprefix(table_data, irqflags | 1UL << 2, /* Always-on Capability */ 4); /* Virtual timer GSIV */ - build_append_int_noprefix(table_data, ARCH_TIMER_VIRT_IRQ + 16, 4); + build_append_int_noprefix(table_data, ARCH_TIMER_VIRT_IRQ, 4); /* Virtual Timer Flags */ build_append_int_noprefix(table_data, irqflags, 4); /* Non-Secure EL2 timer GSIV */ - build_append_int_noprefix(table_data, ARCH_TIMER_NS_EL2_IRQ + 16, 4); + build_append_int_noprefix(table_data, ARCH_TIMER_NS_EL2_IRQ, 4); /* Non-Secure EL2 timer Flags */ build_append_int_noprefix(table_data, irqflags, 4); /* CntReadBase Physical address */ @@ -729,9 +729,9 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) for (i = 0; i < MACHINE(vms)->smp.cpus; i++) { ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(i)); uint64_t physical_base_address = 0, gich = 0, gicv = 0; - uint32_t vgic_interrupt = vms->virt ? PPI(ARCH_GIC_MAINT_IRQ) : 0; + uint32_t vgic_interrupt = vms->virt ? ARCH_GIC_MAINT_IRQ : 0; uint32_t pmu_interrupt = arm_feature(&armcpu->env, ARM_FEATURE_PMU) ? - PPI(VIRTUAL_PMU_IRQ) : 0; + VIRTUAL_PMU_IRQ : 0; if (vms->gic_version == VIRT_GIC_VERSION_2) { physical_base_address = memmap[VIRT_GIC_CPU].base; diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 15e74249f9..529f1c089c 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -366,10 +366,14 @@ static void fdt_add_timer_nodes(const VirtMachineState *vms) } qemu_fdt_setprop(ms->fdt, "/timer", "always-on", NULL, 0); qemu_fdt_setprop_cells(ms->fdt, "/timer", "interrupts", - GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_S_EL1_IRQ, irqflags, - GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_NS_EL1_IRQ, irqflags, - GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_VIRT_IRQ, irqflags, - GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_NS_EL2_IRQ, irqflags); + GIC_FDT_IRQ_TYPE_PPI, + INTID_TO_PPI(ARCH_TIMER_S_EL1_IRQ), irqflags, + GIC_FDT_IRQ_TYPE_PPI, + INTID_TO_PPI(ARCH_TIMER_NS_EL1_IRQ), irqflags, + GIC_FDT_IRQ_TYPE_PPI, + INTID_TO_PPI(ARCH_TIMER_VIRT_IRQ), irqflags, + GIC_FDT_IRQ_TYPE_PPI, + INTID_TO_PPI(ARCH_TIMER_NS_EL2_IRQ), irqflags); } static void fdt_add_cpu_nodes(const VirtMachineState *vms) @@ -647,13 +651,12 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms) dev = qdev_new(TYPE_ACPI_GED); qdev_prop_set_uint32(dev, "ged-event", event); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_ACPI_GED].base); sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, vms->memmap[VIRT_PCDIMM_ACPI].base); sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, qdev_get_gpio_in(vms->gic, irq)); - sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); - return dev; } @@ -691,10 +694,10 @@ static void create_v2m(VirtMachineState *vms) DeviceState *dev; dev = qdev_new("arm-gicv2m"); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_GIC_V2M].base); qdev_prop_set_uint32(dev, "base-spi", irq); qdev_prop_set_uint32(dev, "num-spi", NUM_GICV2M_SPIS); sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_GIC_V2M].base); for (i = 0; i < NUM_GICV2M_SPIS; i++) { sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, @@ -800,7 +803,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) */ for (i = 0; i < smp_cpus; i++) { DeviceState *cpudev = DEVICE(qemu_get_cpu(i)); - int ppibase = NUM_IRQS + i * GIC_INTERNAL + GIC_NR_SGIS; + int intidbase = NUM_IRQS + i * GIC_INTERNAL; /* Mapping from the output timer irq lines from the CPU to the * GIC PPI inputs we use for the virt board. */ @@ -814,22 +817,22 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) for (unsigned irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) { qdev_connect_gpio_out(cpudev, irq, qdev_get_gpio_in(vms->gic, - ppibase + timer_irq[irq])); + intidbase + timer_irq[irq])); } if (vms->gic_version != VIRT_GIC_VERSION_2) { qemu_irq irq = qdev_get_gpio_in(vms->gic, - ppibase + ARCH_GIC_MAINT_IRQ); + intidbase + ARCH_GIC_MAINT_IRQ); qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt", 0, irq); } else if (vms->virt) { qemu_irq irq = qdev_get_gpio_in(vms->gic, - ppibase + ARCH_GIC_MAINT_IRQ); + intidbase + ARCH_GIC_MAINT_IRQ); sysbus_connect_irq(gicbusdev, i + 4 * smp_cpus, irq); } qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0, - qdev_get_gpio_in(vms->gic, ppibase + qdev_get_gpio_in(vms->gic, intidbase + VIRTUAL_PMU_IRQ)); sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ)); @@ -1989,7 +1992,7 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) if (pmu) { assert(arm_feature(&ARM_CPU(cpu)->env, ARM_FEATURE_PMU)); if (kvm_irqchip_in_kernel()) { - kvm_arm_pmu_set_irq(cpu, PPI(VIRTUAL_PMU_IRQ)); + kvm_arm_pmu_set_irq(cpu, VIRTUAL_PMU_IRQ); } kvm_arm_pmu_init(cpu); } diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 95c758200d..818b833108 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -403,7 +403,7 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event) static int vhost_user_blk_realize_connect(VHostUserBlk *s, Error **errp) { - DeviceState *dev = &s->parent_obj.parent_obj; + DeviceState *dev = DEVICE(s); int ret; s->connected = false; @@ -421,7 +421,7 @@ static int vhost_user_blk_realize_connect(VHostUserBlk *s, Error **errp) assert(s->connected); ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg, - s->parent_obj.config_len, errp); + VIRTIO_DEVICE(s)->config_len, errp); if (ret < 0) { qemu_chr_fe_disconnect(&s->chardev); vhost_dev_cleanup(&s->dev); diff --git a/hw/char/escc.c b/hw/char/escc.c index 4be66053c1..48b30ee760 100644 --- a/hw/char/escc.c +++ b/hw/char/escc.c @@ -845,7 +845,7 @@ static void sunkbd_handle_event(DeviceState *dev, QemuConsole *src, put_queue(s, keycode); } -static QemuInputHandler sunkbd_handler = { +static const QemuInputHandler sunkbd_handler = { .name = "sun keyboard", .mask = INPUT_EVENT_MASK_KEY, .event = sunkbd_handle_event, diff --git a/hw/core/cpu-sysemu.c b/hw/core/cpu-sysemu.c index 5eaf2e79e6..d0d6a910f9 100644 --- a/hw/core/cpu-sysemu.c +++ b/hw/core/cpu-sysemu.c @@ -34,17 +34,17 @@ bool cpu_paging_enabled(const CPUState *cpu) return false; } -void cpu_get_memory_mapping(CPUState *cpu, MemoryMappingList *list, +bool cpu_get_memory_mapping(CPUState *cpu, MemoryMappingList *list, Error **errp) { CPUClass *cc = CPU_GET_CLASS(cpu); if (cc->sysemu_ops->get_memory_mapping) { - cc->sysemu_ops->get_memory_mapping(cpu, list, errp); - return; + return cc->sysemu_ops->get_memory_mapping(cpu, list, errp); } error_setg(errp, "Obtaining memory mappings is unsupported on this CPU."); + return false; } hwaddr cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr, diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c index c3e55ef9e9..9a4b59c6f2 100644 --- a/hw/core/machine-hmp-cmds.c +++ b/hw/core/machine-hmp-cmds.c @@ -71,6 +71,12 @@ void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict) if (c->has_node_id) { monitor_printf(mon, " node-id: \"%" PRIu64 "\"\n", c->node_id); } + if (c->has_drawer_id) { + monitor_printf(mon, " drawer-id: \"%" PRIu64 "\"\n", c->drawer_id); + } + if (c->has_book_id) { + monitor_printf(mon, " book-id: \"%" PRIu64 "\"\n", c->book_id); + } if (c->has_socket_id) { monitor_printf(mon, " socket-id: \"%" PRIu64 "\"\n", c->socket_id); } diff --git a/hw/core/machine-smp.c b/hw/core/machine-smp.c index 0f4d9b6f7a..25019c91ee 100644 --- a/hw/core/machine-smp.c +++ b/hw/core/machine-smp.c @@ -33,6 +33,14 @@ static char *cpu_hierarchy_to_string(MachineState *ms) MachineClass *mc = MACHINE_GET_CLASS(ms); GString *s = g_string_new(NULL); + if (mc->smp_props.drawers_supported) { + g_string_append_printf(s, "drawers (%u) * ", ms->smp.drawers); + } + + if (mc->smp_props.books_supported) { + g_string_append_printf(s, "books (%u) * ", ms->smp.books); + } + g_string_append_printf(s, "sockets (%u)", ms->smp.sockets); if (mc->smp_props.dies_supported) { @@ -75,6 +83,8 @@ void machine_parse_smp_config(MachineState *ms, { MachineClass *mc = MACHINE_GET_CLASS(ms); unsigned cpus = config->has_cpus ? config->cpus : 0; + unsigned drawers = config->has_drawers ? config->drawers : 0; + unsigned books = config->has_books ? config->books : 0; unsigned sockets = config->has_sockets ? config->sockets : 0; unsigned dies = config->has_dies ? config->dies : 0; unsigned clusters = config->has_clusters ? config->clusters : 0; @@ -87,6 +97,8 @@ void machine_parse_smp_config(MachineState *ms, * explicit configuration like "cpus=0" is not allowed. */ if ((config->has_cpus && config->cpus == 0) || + (config->has_drawers && config->drawers == 0) || + (config->has_books && config->books == 0) || (config->has_sockets && config->sockets == 0) || (config->has_dies && config->dies == 0) || (config->has_clusters && config->clusters == 0) || @@ -113,6 +125,19 @@ void machine_parse_smp_config(MachineState *ms, dies = dies > 0 ? dies : 1; clusters = clusters > 0 ? clusters : 1; + if (!mc->smp_props.books_supported && books > 1) { + error_setg(errp, "books not supported by this machine's CPU topology"); + return; + } + books = books > 0 ? books : 1; + + if (!mc->smp_props.drawers_supported && drawers > 1) { + error_setg(errp, + "drawers not supported by this machine's CPU topology"); + return; + } + drawers = drawers > 0 ? drawers : 1; + /* compute missing values based on the provided ones */ if (cpus == 0 && maxcpus == 0) { sockets = sockets > 0 ? sockets : 1; @@ -126,33 +151,41 @@ void machine_parse_smp_config(MachineState *ms, if (sockets == 0) { cores = cores > 0 ? cores : 1; threads = threads > 0 ? threads : 1; - sockets = maxcpus / (dies * clusters * cores * threads); + sockets = maxcpus / + (drawers * books * dies * clusters * cores * threads); } else if (cores == 0) { threads = threads > 0 ? threads : 1; - cores = maxcpus / (sockets * dies * clusters * threads); + cores = maxcpus / + (drawers * books * sockets * dies * clusters * threads); } } else { /* prefer cores over sockets since 6.2 */ if (cores == 0) { sockets = sockets > 0 ? sockets : 1; threads = threads > 0 ? threads : 1; - cores = maxcpus / (sockets * dies * clusters * threads); + cores = maxcpus / + (drawers * books * sockets * dies * clusters * threads); } else if (sockets == 0) { threads = threads > 0 ? threads : 1; - sockets = maxcpus / (dies * clusters * cores * threads); + sockets = maxcpus / + (drawers * books * dies * clusters * cores * threads); } } /* try to calculate omitted threads at last */ if (threads == 0) { - threads = maxcpus / (sockets * dies * clusters * cores); + threads = maxcpus / + (drawers * books * sockets * dies * clusters * cores); } } - maxcpus = maxcpus > 0 ? maxcpus : sockets * dies * clusters * cores * threads; + maxcpus = maxcpus > 0 ? maxcpus : drawers * books * sockets * dies * + clusters * cores * threads; cpus = cpus > 0 ? cpus : maxcpus; ms->smp.cpus = cpus; + ms->smp.drawers = drawers; + ms->smp.books = books; ms->smp.sockets = sockets; ms->smp.dies = dies; ms->smp.clusters = clusters; @@ -163,7 +196,8 @@ void machine_parse_smp_config(MachineState *ms, mc->smp_props.has_clusters = config->has_clusters; /* sanity-check of the computed topology */ - if (sockets * dies * clusters * cores * threads != maxcpus) { + if (drawers * books * sockets * dies * clusters * cores * threads != + maxcpus) { g_autofree char *topo_msg = cpu_hierarchy_to_string(ms); error_setg(errp, "Invalid CPU topology: " "product of the hierarchy must match maxcpus: " diff --git a/hw/core/machine.c b/hw/core/machine.c index cfd1edfe20..50edaab737 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -34,6 +34,8 @@ GlobalProperty hw_compat_8_1[] = { { TYPE_PCI_BRIDGE, "x-pci-express-writeable-slt-bug", "true" }, + { "ramfb", "x-migrate", "off" }, + { "vfio-pci-nohotplug", "x-ramfb-migrate", "off" } }; const size_t hw_compat_8_1_len = G_N_ELEMENTS(hw_compat_8_1); @@ -861,6 +863,8 @@ static void machine_get_smp(Object *obj, Visitor *v, const char *name, MachineState *ms = MACHINE(obj); SMPConfiguration *config = &(SMPConfiguration){ .has_cpus = true, .cpus = ms->smp.cpus, + .has_drawers = true, .drawers = ms->smp.drawers, + .has_books = true, .books = ms->smp.books, .has_sockets = true, .sockets = ms->smp.sockets, .has_dies = true, .dies = ms->smp.dies, .has_clusters = true, .clusters = ms->smp.clusters, @@ -1135,6 +1139,8 @@ static void machine_initfn(Object *obj) /* default to mc->default_cpus */ ms->smp.cpus = mc->default_cpus; ms->smp.max_cpus = mc->default_cpus; + ms->smp.drawers = 1; + ms->smp.books = 1; ms->smp.sockets = 1; ms->smp.dies = 1; ms->smp.clusters = 1; diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index 688340610e..7c6dfab128 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -1139,3 +1139,16 @@ const PropertyInfo qdev_prop_uuid = { .set = set_uuid, .set_default_value = set_default_uuid_auto, }; + +/* --- s390 cpu entitlement policy --- */ + +QEMU_BUILD_BUG_ON(sizeof(CpuS390Entitlement) != sizeof(int)); + +const PropertyInfo qdev_prop_cpus390entitlement = { + .name = "CpuS390Entitlement", + .description = "low/medium (default)/high", + .enum_table = &CpuS390Entitlement_lookup, + .get = qdev_propinfo_get_enum, + .set = qdev_propinfo_set_enum, + .set_default_value = qdev_propinfo_set_default_value_enum, +}; diff --git a/hw/display/ramfb-standalone.c b/hw/display/ramfb-standalone.c index 8c0094397f..a96e7ebcd9 100644 --- a/hw/display/ramfb-standalone.c +++ b/hw/display/ramfb-standalone.c @@ -1,4 +1,5 @@ #include "qemu/osdep.h" +#include "migration/vmstate.h" #include "qapi/error.h" #include "qemu/module.h" #include "hw/loader.h" @@ -15,6 +16,7 @@ struct RAMFBStandaloneState { SysBusDevice parent_obj; QemuConsole *con; RAMFBState *state; + bool migrate; }; static void display_update_wrapper(void *dev) @@ -40,14 +42,39 @@ static void ramfb_realizefn(DeviceState *dev, Error **errp) ramfb->state = ramfb_setup(errp); } +static bool migrate_needed(void *opaque) +{ + RAMFBStandaloneState *ramfb = RAMFB(opaque); + + return ramfb->migrate; +} + +static const VMStateDescription ramfb_dev_vmstate = { + .name = "ramfb-dev", + .version_id = 1, + .minimum_version_id = 1, + .needed = migrate_needed, + .fields = (VMStateField[]) { + VMSTATE_STRUCT_POINTER(state, RAMFBStandaloneState, ramfb_vmstate, RAMFBState), + VMSTATE_END_OF_LIST() + } +}; + +static Property ramfb_properties[] = { + DEFINE_PROP_BOOL("x-migrate", RAMFBStandaloneState, migrate, true), + DEFINE_PROP_END_OF_LIST(), +}; + static void ramfb_class_initfn(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories); + dc->vmsd = &ramfb_dev_vmstate; dc->realize = ramfb_realizefn; dc->desc = "ram framebuffer standalone device"; dc->user_creatable = true; + device_class_set_props(dc, ramfb_properties); } static const TypeInfo ramfb_info = { diff --git a/hw/display/ramfb.c b/hw/display/ramfb.c index c2b002d534..477ef7272a 100644 --- a/hw/display/ramfb.c +++ b/hw/display/ramfb.c @@ -28,6 +28,8 @@ struct QEMU_PACKED RAMFBCfg { uint32_t stride; }; +typedef struct RAMFBCfg RAMFBCfg; + struct RAMFBState { DisplaySurface *ds; uint32_t width, height; @@ -116,6 +118,23 @@ void ramfb_display_update(QemuConsole *con, RAMFBState *s) dpy_gfx_update_full(con); } +static int ramfb_post_load(void *opaque, int version_id) +{ + ramfb_fw_cfg_write(opaque, 0, 0); + return 0; +} + +const VMStateDescription ramfb_vmstate = { + .name = "ramfb", + .version_id = 1, + .minimum_version_id = 1, + .post_load = ramfb_post_load, + .fields = (VMStateField[]) { + VMSTATE_BUFFER_UNSAFE(cfg, RAMFBState, 0, sizeof(RAMFBCfg)), + VMSTATE_END_OF_LIST() + } +}; + RAMFBState *ramfb_setup(Error **errp) { FWCfgState *fw_cfg = fw_cfg_find(); diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c index 50c5373b65..37af256219 100644 --- a/hw/display/virtio-gpu-base.c +++ b/hw/display/virtio-gpu-base.c @@ -184,8 +184,7 @@ virtio_gpu_base_device_realize(DeviceState *qdev, if (virtio_gpu_virgl_enabled(g->conf)) { error_setg(&g->migration_blocker, "virgl is not yet migratable"); - if (migrate_add_blocker(g->migration_blocker, errp) < 0) { - error_free(g->migration_blocker); + if (migrate_add_blocker(&g->migration_blocker, errp) < 0) { return false; } } @@ -253,10 +252,7 @@ virtio_gpu_base_device_unrealize(DeviceState *qdev) { VirtIOGPUBase *g = VIRTIO_GPU_BASE(qdev); - if (g->migration_blocker) { - migrate_del_blocker(g->migration_blocker); - error_free(g->migration_blocker); - } + migrate_del_blocker(&g->migration_blocker); } static void diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index 6efd15b6ae..4265316cbb 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -1128,7 +1128,7 @@ static void virtio_gpu_ctrl_bh(void *opaque) VirtIOGPU *g = opaque; VirtIOGPUClass *vgc = VIRTIO_GPU_GET_CLASS(g); - vgc->handle_ctrl(&g->parent_obj.parent_obj, g->ctrl_vq); + vgc->handle_ctrl(VIRTIO_DEVICE(g), g->ctrl_vq); } static void virtio_gpu_handle_cursor(VirtIODevice *vdev, VirtQueue *vq) diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c index 0074a9b6f8..b2130a0d70 100644 --- a/hw/display/xenfb.c +++ b/hw/display/xenfb.c @@ -321,20 +321,20 @@ static void xenfb_mouse_sync(DeviceState *dev) xenfb->wheel = 0; } -static QemuInputHandler xenfb_keyboard = { +static const QemuInputHandler xenfb_keyboard = { .name = "Xen PV Keyboard", .mask = INPUT_EVENT_MASK_KEY, .event = xenfb_key_event, }; -static QemuInputHandler xenfb_abs_mouse = { +static const QemuInputHandler xenfb_abs_mouse = { .name = "Xen PV Mouse", .mask = INPUT_EVENT_MASK_BTN | INPUT_EVENT_MASK_ABS, .event = xenfb_mouse_event, .sync = xenfb_mouse_sync, }; -static QemuInputHandler xenfb_rel_mouse = { +static const QemuInputHandler xenfb_rel_mouse = { .name = "Xen PV Mouse", .mask = INPUT_EVENT_MASK_BTN | INPUT_EVENT_MASK_REL, .event = xenfb_mouse_event, diff --git a/hw/dma/xilinx_axidma.c b/hw/dma/xilinx_axidma.c index 12c90267df..0ae056ed06 100644 --- a/hw/dma/xilinx_axidma.c +++ b/hw/dma/xilinx_axidma.c @@ -577,10 +577,6 @@ static void xilinx_axidma_init(Object *obj) object_initialize_child(OBJECT(s), "axistream-control-connected-target", &s->rx_control_dev, TYPE_XILINX_AXI_DMA_CONTROL_STREAM); - object_property_add_link(obj, "dma", TYPE_MEMORY_REGION, - (Object **)&s->dma_mr, - qdev_prop_allow_set_link_before_realize, - OBJ_PROP_LINK_STRONG); sysbus_init_irq(sbd, &s->streams[0].irq); sysbus_init_irq(sbd, &s->streams[1].irq); @@ -596,6 +592,8 @@ static Property axidma_properties[] = { tx_data_dev, TYPE_STREAM_SINK, StreamSink *), DEFINE_PROP_LINK("axistream-control-connected", XilinxAXIDMA, tx_control_dev, TYPE_STREAM_SINK, StreamSink *), + DEFINE_PROP_LINK("dma", XilinxAXIDMA, dma_mr, + TYPE_MEMORY_REGION, MemoryRegion *), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/dma/xlnx-zdma.c b/hw/dma/xlnx-zdma.c index 4eb7f66e9f..84c0083013 100644 --- a/hw/dma/xlnx-zdma.c +++ b/hw/dma/xlnx-zdma.c @@ -795,11 +795,6 @@ static void zdma_init(Object *obj) TYPE_XLNX_ZDMA, ZDMA_R_MAX * 4); sysbus_init_mmio(sbd, &s->iomem); sysbus_init_irq(sbd, &s->irq_zdma_ch_imr); - - object_property_add_link(obj, "dma", TYPE_MEMORY_REGION, - (Object **)&s->dma_mr, - qdev_prop_allow_set_link_before_realize, - OBJ_PROP_LINK_STRONG); } static const VMStateDescription vmstate_zdma = { @@ -817,6 +812,8 @@ static const VMStateDescription vmstate_zdma = { static Property zdma_props[] = { DEFINE_PROP_UINT32("bus-width", XlnxZDMA, cfg.bus_width, 64), + DEFINE_PROP_LINK("dma", XlnxZDMA, dma_mr, + TYPE_MEMORY_REGION, MemoryRegion *), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/dma/xlnx_csu_dma.c b/hw/dma/xlnx_csu_dma.c index 88002698a1..e89089821a 100644 --- a/hw/dma/xlnx_csu_dma.c +++ b/hw/dma/xlnx_csu_dma.c @@ -702,6 +702,10 @@ static Property xlnx_csu_dma_properties[] = { * which channel the device is connected to. */ DEFINE_PROP_BOOL("is-dst", XlnxCSUDMA, is_dst, true), + DEFINE_PROP_LINK("stream-connected-dma", XlnxCSUDMA, tx_dev, + TYPE_STREAM_SINK, StreamSink *), + DEFINE_PROP_LINK("dma", XlnxCSUDMA, dma_mr, + TYPE_MEMORY_REGION, MemoryRegion *), DEFINE_PROP_END_OF_LIST(), }; @@ -728,15 +732,6 @@ static void xlnx_csu_dma_init(Object *obj) memory_region_init(&s->iomem, obj, TYPE_XLNX_CSU_DMA, XLNX_CSU_DMA_R_MAX * 4); - - object_property_add_link(obj, "stream-connected-dma", TYPE_STREAM_SINK, - (Object **)&s->tx_dev, - qdev_prop_allow_set_link_before_realize, - OBJ_PROP_LINK_STRONG); - object_property_add_link(obj, "dma", TYPE_MEMORY_REGION, - (Object **)&s->dma_mr, - qdev_prop_allow_set_link_before_realize, - OBJ_PROP_LINK_STRONG); } static const TypeInfo xlnx_csu_dma_info = { diff --git a/hw/hppa/Kconfig b/hw/hppa/Kconfig index 5dd8b5b21e..ff8528aaa8 100644 --- a/hw/hppa/Kconfig +++ b/hw/hppa/Kconfig @@ -3,6 +3,7 @@ config HPPA_B160L imply PCI_DEVICES imply E1000_PCI imply VIRTIO_VGA + select ASTRO select DINO select LASI select SERIAL diff --git a/hw/hppa/hppa_hardware.h b/hw/hppa/hppa_hardware.h index a5ac3dd0fd..a9be7bb851 100644 --- a/hw/hppa/hppa_hardware.h +++ b/hw/hppa/hppa_hardware.h @@ -18,7 +18,6 @@ #define LASI_UART_HPA 0xffd05000 #define LASI_SCSI_HPA 0xffd06000 #define LASI_LAN_HPA 0xffd07000 -#define LASI_RTC_HPA 0xffd09000 #define LASI_LPT_HPA 0xffd02000 #define LASI_AUDIO_HPA 0xffd04000 #define LASI_PS2KBD_HPA 0xffd08000 diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c index cf28cb9586..67d4d1b5e0 100644 --- a/hw/hppa/machine.c +++ b/hw/hppa/machine.c @@ -1,6 +1,8 @@ /* * QEMU HPPA hardware system emulator. - * Copyright 2018 Helge Deller <deller@gmx.de> + * (C) Copyright 2018-2023 Helge Deller <deller@gmx.de> + * + * This work is licensed under the GNU GPL license version 2 or later. */ #include "qemu/osdep.h" @@ -20,7 +22,10 @@ #include "hw/input/lasips2.h" #include "hw/net/lasi_82596.h" #include "hw/nmi.h" +#include "hw/usb.h" #include "hw/pci/pci.h" +#include "hw/pci/pci_device.h" +#include "hw/pci-host/astro.h" #include "hw/pci-host/dino.h" #include "hw/misc/lasi.h" #include "hppa_hardware.h" @@ -29,12 +34,13 @@ #include "net/net.h" #include "qemu/log.h" -#define MIN_SEABIOS_HPPA_VERSION 6 /* require at least this fw version */ +#define MIN_SEABIOS_HPPA_VERSION 10 /* require at least this fw version */ #define HPA_POWER_BUTTON (FIRMWARE_END - 0x10) #define enable_lasi_lan() 0 +static DeviceState *lasi_dev; static void hppa_powerdown_req(Notifier *n, void *opaque) { @@ -95,14 +101,69 @@ static ISABus *hppa_isa_bus(void) isa_bus = isa_bus_new(NULL, get_system_memory(), isa_region, &error_abort); - isa_irqs = i8259_init(isa_bus, - /* qemu_allocate_irq(dino_set_isa_irq, s, 0)); */ - NULL); + isa_irqs = i8259_init(isa_bus, NULL); isa_bus_register_input_irqs(isa_bus, isa_irqs); return isa_bus; } +/* + * Helper functions to emulate RTC clock and DebugOutputPort + */ +static time_t rtc_ref; + +static uint64_t io_cpu_read(void *opaque, hwaddr addr, unsigned size) +{ + uint64_t val = 0; + + switch (addr) { + case 0: /* RTC clock */ + val = time(NULL); + val += rtc_ref; + break; + case 8: /* DebugOutputPort */ + return 0xe9; /* readback */ + } + return val; +} + +static void io_cpu_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + unsigned char ch; + Chardev *debugout; + + switch (addr) { + case 0: /* RTC clock */ + rtc_ref = val - time(NULL); + break; + case 8: /* DebugOutputPort */ + ch = val; + debugout = serial_hd(0); + if (debugout) { + qemu_chr_fe_write_all(debugout->be, &ch, 1); + } else { + fprintf(stderr, "%c", ch); + } + break; + } +} + +static const MemoryRegionOps hppa_io_helper_ops = { + .read = io_cpu_read, + .write = io_cpu_write, + .endianness = DEVICE_BIG_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 8, + }, + .impl = { + .min_access_size = 1, + .max_access_size = 8, + }, +}; + + static uint64_t cpu_hppa_to_phys(void *opaque, uint64_t addr) { addr &= (0x10000000 - 1); @@ -118,11 +179,13 @@ static void fw_cfg_boot_set(void *opaque, const char *boot_device, fw_cfg_modify_i16(opaque, FW_CFG_BOOT_DEVICE, boot_device[0]); } -static FWCfgState *create_fw_cfg(MachineState *ms) +static FWCfgState *create_fw_cfg(MachineState *ms, PCIBus *pci_bus) { FWCfgState *fw_cfg; uint64_t val; const char qemu_version[] = QEMU_VERSION; + MachineClass *mc = MACHINE_GET_CLASS(ms); + int len; fw_cfg = fw_cfg_init_mem(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4); fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, ms->smp.cpus); @@ -137,8 +200,24 @@ static FWCfgState *create_fw_cfg(MachineState *ms) fw_cfg_add_file(fw_cfg, "/etc/cpu/tlb_entries", g_memdup(&val, sizeof(val)), sizeof(val)); + val = cpu_to_le64(HPPA_BTLB_ENTRIES); + fw_cfg_add_file(fw_cfg, "/etc/cpu/btlb_entries", + g_memdup(&val, sizeof(val)), sizeof(val)); + + len = strlen(mc->name) + 1; + fw_cfg_add_file(fw_cfg, "/etc/hppa/machine", + g_memdup(mc->name, len), len); + val = cpu_to_le64(HPA_POWER_BUTTON); - fw_cfg_add_file(fw_cfg, "/etc/power-button-addr", + fw_cfg_add_file(fw_cfg, "/etc/hppa/power-button-addr", + g_memdup(&val, sizeof(val)), sizeof(val)); + + val = cpu_to_le64(CPU_HPA + 16); + fw_cfg_add_file(fw_cfg, "/etc/hppa/rtc-addr", + g_memdup(&val, sizeof(val)), sizeof(val)); + + val = cpu_to_le64(CPU_HPA + 24); + fw_cfg_add_file(fw_cfg, "/etc/hppa/DebugOutputPort", g_memdup(&val, sizeof(val)), sizeof(val)); fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, ms->boot_config.order[0]); @@ -148,6 +227,8 @@ static FWCfgState *create_fw_cfg(MachineState *ms) g_memdup(qemu_version, sizeof(qemu_version)), sizeof(qemu_version)); + fw_cfg_add_extra_pci_roots(pci_bus, fw_cfg); + return fw_cfg; } @@ -173,29 +254,20 @@ static DinoState *dino_init(MemoryRegion *addr_space) return DINO_PCI_HOST_BRIDGE(dev); } -static void machine_hppa_init(MachineState *machine) +/* + * Step 1: Create CPUs and Memory + */ +static void machine_HP_common_init_cpus(MachineState *machine) { - const char *kernel_filename = machine->kernel_filename; - const char *kernel_cmdline = machine->kernel_cmdline; - const char *initrd_filename = machine->initrd_filename; - MachineClass *mc = MACHINE_GET_CLASS(machine); - DeviceState *dev, *dino_dev, *lasi_dev; - PCIBus *pci_bus; - ISABus *isa_bus; - char *firmware_filename; - uint64_t firmware_low, firmware_high; - long size; - uint64_t kernel_entry = 0, kernel_low, kernel_high; MemoryRegion *addr_space = get_system_memory(); - MemoryRegion *rom_region; MemoryRegion *cpu_region; long i; unsigned int smp_cpus = machine->smp.cpus; - SysBusDevice *s; + char *name; /* Create CPUs. */ for (i = 0; i < smp_cpus; i++) { - char *name = g_strdup_printf("cpu%ld-io-eir", i); + name = g_strdup_printf("cpu%ld-io-eir", i); cpu[i] = HPPA_CPU(cpu_create(machine->cpu_type)); cpu_region = g_new(MemoryRegion, 1); @@ -206,51 +278,40 @@ static void machine_hppa_init(MachineState *machine) g_free(name); } + /* RTC and DebugOutputPort on CPU #0 */ + cpu_region = g_new(MemoryRegion, 1); + memory_region_init_io(cpu_region, OBJECT(cpu[0]), &hppa_io_helper_ops, + cpu[0], "cpu0-io-rtc", 2 * sizeof(uint64_t)); + memory_region_add_subregion(addr_space, CPU_HPA + 16, cpu_region); + /* Main memory region. */ if (machine->ram_size > 3 * GiB) { error_report("RAM size is currently restricted to 3GB"); exit(EXIT_FAILURE); } memory_region_add_subregion_overlap(addr_space, 0, machine->ram, -1); +} - - /* Init Lasi chip */ - lasi_dev = DEVICE(lasi_init()); - memory_region_add_subregion(addr_space, LASI_HPA, - sysbus_mmio_get_region( - SYS_BUS_DEVICE(lasi_dev), 0)); - - /* Init Dino (PCI host bus chip). */ - dino_dev = DEVICE(dino_init(addr_space)); - memory_region_add_subregion(addr_space, DINO_HPA, - sysbus_mmio_get_region( - SYS_BUS_DEVICE(dino_dev), 0)); - pci_bus = PCI_BUS(qdev_get_child_bus(dino_dev, "pci")); - assert(pci_bus); - - /* Create ISA bus. */ - isa_bus = hppa_isa_bus(); - assert(isa_bus); - - /* Realtime clock, used by firmware for PDC_TOD call. */ - mc146818_rtc_init(isa_bus, 2000, NULL); - - /* Serial ports: Lasi and Dino use a 7.272727 MHz clock. */ - serial_mm_init(addr_space, LASI_UART_HPA + 0x800, 0, - qdev_get_gpio_in(lasi_dev, LASI_IRQ_UART_HPA), 7272727 / 16, - serial_hd(0), DEVICE_BIG_ENDIAN); - - serial_mm_init(addr_space, DINO_UART_HPA + 0x800, 0, - qdev_get_gpio_in(dino_dev, DINO_IRQ_RS232INT), 7272727 / 16, - serial_hd(1), DEVICE_BIG_ENDIAN); - - /* Parallel port */ - parallel_mm_init(addr_space, LASI_LPT_HPA + 0x800, 0, - qdev_get_gpio_in(lasi_dev, LASI_IRQ_LAN_HPA), - parallel_hds[0]); - - /* fw_cfg configuration interface */ - create_fw_cfg(machine); +/* + * Last creation step: Add SCSI discs, NICs, graphics & load firmware + */ +static void machine_HP_common_init_tail(MachineState *machine, PCIBus *pci_bus) +{ + const char *kernel_filename = machine->kernel_filename; + const char *kernel_cmdline = machine->kernel_cmdline; + const char *initrd_filename = machine->initrd_filename; + MachineClass *mc = MACHINE_GET_CLASS(machine); + DeviceState *dev; + PCIDevice *pci_dev; + char *firmware_filename; + uint64_t firmware_low, firmware_high; + long size; + uint64_t kernel_entry = 0, kernel_low, kernel_high; + MemoryRegion *addr_space = get_system_memory(); + MemoryRegion *rom_region; + long i; + unsigned int smp_cpus = machine->smp.cpus; + SysBusDevice *s; /* SCSI disk setup. */ dev = DEVICE(pci_create_simple(pci_bus, -1, "lsi53c895a")); @@ -278,21 +339,42 @@ static void machine_hppa_init(MachineState *machine) } } - /* PS/2 Keyboard/Mouse */ - dev = qdev_new(TYPE_LASIPS2); - sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); - sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, - qdev_get_gpio_in(lasi_dev, LASI_IRQ_PS2KBD_HPA)); - memory_region_add_subregion(addr_space, LASI_PS2KBD_HPA, - sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), - 0)); - memory_region_add_subregion(addr_space, LASI_PS2KBD_HPA + 0x100, - sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), - 1)); + /* BMC board: HP Powerbar SP2 Diva (with console only) */ + pci_dev = pci_new(-1, "pci-serial"); + if (!lasi_dev) { + /* bind default keyboard/serial to Diva card */ + qdev_prop_set_chr(DEVICE(pci_dev), "chardev", serial_hd(0)); + } + qdev_prop_set_uint8(DEVICE(pci_dev), "prog_if", 0); + pci_realize_and_unref(pci_dev, pci_bus, &error_fatal); + pci_config_set_vendor_id(pci_dev->config, PCI_VENDOR_ID_HP); + pci_config_set_device_id(pci_dev->config, 0x1048); + pci_set_word(&pci_dev->config[PCI_SUBSYSTEM_VENDOR_ID], PCI_VENDOR_ID_HP); + pci_set_word(&pci_dev->config[PCI_SUBSYSTEM_ID], 0x1227); /* Powerbar */ + + /* create a second serial PCI card when running Astro */ + if (!lasi_dev) { + pci_dev = pci_new(-1, "pci-serial-4x"); + qdev_prop_set_chr(DEVICE(pci_dev), "chardev1", serial_hd(1)); + qdev_prop_set_chr(DEVICE(pci_dev), "chardev2", serial_hd(2)); + qdev_prop_set_chr(DEVICE(pci_dev), "chardev3", serial_hd(3)); + qdev_prop_set_chr(DEVICE(pci_dev), "chardev4", serial_hd(4)); + pci_realize_and_unref(pci_dev, pci_bus, &error_fatal); + } + + /* create USB OHCI controller for USB keyboard & mouse on Astro machines */ + if (!lasi_dev && machine->enable_graphics) { + pci_create_simple(pci_bus, -1, "pci-ohci"); + usb_create_simple(usb_bus_find(-1), "usb-kbd"); + usb_create_simple(usb_bus_find(-1), "usb-mouse"); + } /* register power switch emulation */ qemu_register_powerdown_notifier(&hppa_system_powerdown_notifier); + /* fw_cfg configuration interface */ + create_fw_cfg(machine, pci_bus); + /* Load firmware. Given that this is not "real" firmware, but one explicitly written for the emulation, we might as well load it directly from an ELF image. */ @@ -410,6 +492,103 @@ static void machine_hppa_init(MachineState *machine) cpu[0]->env.gr[19] = FW_CFG_IO_BASE; } +/* + * Create HP B160L workstation + */ +static void machine_HP_B160L_init(MachineState *machine) +{ + DeviceState *dev, *dino_dev; + MemoryRegion *addr_space = get_system_memory(); + ISABus *isa_bus; + PCIBus *pci_bus; + + /* Create CPUs and RAM. */ + machine_HP_common_init_cpus(machine); + + /* Init Lasi chip */ + lasi_dev = DEVICE(lasi_init()); + memory_region_add_subregion(addr_space, LASI_HPA, + sysbus_mmio_get_region( + SYS_BUS_DEVICE(lasi_dev), 0)); + + /* Init Dino (PCI host bus chip). */ + dino_dev = DEVICE(dino_init(addr_space)); + memory_region_add_subregion(addr_space, DINO_HPA, + sysbus_mmio_get_region( + SYS_BUS_DEVICE(dino_dev), 0)); + pci_bus = PCI_BUS(qdev_get_child_bus(dino_dev, "pci")); + assert(pci_bus); + + /* Create ISA bus, needed for PS/2 kbd/mouse port emulation */ + isa_bus = hppa_isa_bus(); + assert(isa_bus); + + /* Serial ports: Lasi and Dino use a 7.272727 MHz clock. */ + serial_mm_init(addr_space, LASI_UART_HPA + 0x800, 0, + qdev_get_gpio_in(lasi_dev, LASI_IRQ_UART_HPA), 7272727 / 16, + serial_hd(0), DEVICE_BIG_ENDIAN); + + serial_mm_init(addr_space, DINO_UART_HPA + 0x800, 0, + qdev_get_gpio_in(dino_dev, DINO_IRQ_RS232INT), 7272727 / 16, + serial_hd(1), DEVICE_BIG_ENDIAN); + + /* Parallel port */ + parallel_mm_init(addr_space, LASI_LPT_HPA + 0x800, 0, + qdev_get_gpio_in(lasi_dev, LASI_IRQ_LAN_HPA), + parallel_hds[0]); + + /* PS/2 Keyboard/Mouse */ + dev = qdev_new(TYPE_LASIPS2); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, + qdev_get_gpio_in(lasi_dev, LASI_IRQ_PS2KBD_HPA)); + memory_region_add_subregion(addr_space, LASI_PS2KBD_HPA, + sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), + 0)); + memory_region_add_subregion(addr_space, LASI_PS2KBD_HPA + 0x100, + sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), + 1)); + + /* Add SCSI discs, NICs, graphics & load firmware */ + machine_HP_common_init_tail(machine, pci_bus); +} + +static AstroState *astro_init(void) +{ + DeviceState *dev; + + dev = qdev_new(TYPE_ASTRO_CHIP); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + + return ASTRO_CHIP(dev); +} + +/* + * Create HP C3700 workstation + */ +static void machine_HP_C3700_init(MachineState *machine) +{ + PCIBus *pci_bus; + AstroState *astro; + DeviceState *astro_dev; + MemoryRegion *addr_space = get_system_memory(); + + /* Create CPUs and RAM. */ + machine_HP_common_init_cpus(machine); + + /* Init Astro and the Elroys (PCI host bus chips). */ + astro = astro_init(); + astro_dev = DEVICE(astro); + memory_region_add_subregion(addr_space, ASTRO_HPA, + sysbus_mmio_get_region( + SYS_BUS_DEVICE(astro_dev), 0)); + pci_bus = PCI_BUS(qdev_get_child_bus(DEVICE(astro->elroy[0]), "pci")); + assert(pci_bus); + + /* Add SCSI discs, NICs, graphics & load firmware */ + machine_HP_common_init_tail(machine, pci_bus); +} + static void hppa_machine_reset(MachineState *ms, ShutdownCause reason) { unsigned int smp_cpus = ms->smp.cpus; @@ -458,14 +637,14 @@ static void hppa_nmi(NMIState *n, int cpu_index, Error **errp) } } -static void hppa_machine_init_class_init(ObjectClass *oc, void *data) +static void HP_B160L_machine_init_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); NMIClass *nc = NMI_CLASS(oc); - mc->desc = "HPPA B160L machine"; + mc->desc = "HP B160L workstation"; mc->default_cpu_type = TYPE_HPPA_CPU; - mc->init = machine_hppa_init; + mc->init = machine_HP_B160L_init; mc->reset = hppa_machine_reset; mc->block_default_type = IF_SCSI; mc->max_cpus = HPPA_MAX_CPUS; @@ -479,10 +658,41 @@ static void hppa_machine_init_class_init(ObjectClass *oc, void *data) nc->nmi_monitor_handler = hppa_nmi; } -static const TypeInfo hppa_machine_init_typeinfo = { - .name = MACHINE_TYPE_NAME("hppa"), +static const TypeInfo HP_B160L_machine_init_typeinfo = { + .name = MACHINE_TYPE_NAME("B160L"), + .parent = TYPE_MACHINE, + .class_init = HP_B160L_machine_init_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_NMI }, + { } + }, +}; + +static void HP_C3700_machine_init_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + NMIClass *nc = NMI_CLASS(oc); + + mc->desc = "HP C3700 workstation"; + mc->default_cpu_type = TYPE_HPPA_CPU; + mc->init = machine_HP_C3700_init; + mc->reset = hppa_machine_reset; + mc->block_default_type = IF_SCSI; + mc->max_cpus = HPPA_MAX_CPUS; + mc->default_cpus = 1; + mc->is_default = false; + mc->default_ram_size = 1024 * MiB; + mc->default_boot_order = "cd"; + mc->default_ram_id = "ram"; + mc->default_nic = "tulip"; + + nc->nmi_monitor_handler = hppa_nmi; +} + +static const TypeInfo HP_C3700_machine_init_typeinfo = { + .name = MACHINE_TYPE_NAME("C3700"), .parent = TYPE_MACHINE, - .class_init = hppa_machine_init_class_init, + .class_init = HP_C3700_machine_init_class_init, .interfaces = (InterfaceInfo[]) { { TYPE_NMI }, { } @@ -491,7 +701,8 @@ static const TypeInfo hppa_machine_init_typeinfo = { static void hppa_machine_init_register_types(void) { - type_register_static(&hppa_machine_init_typeinfo); + type_register_static(&HP_B160L_machine_init_typeinfo); + type_register_static(&HP_C3700_machine_init_typeinfo); } type_init(hppa_machine_init_register_types) diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 8d0f2f99dd..7965415b47 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -1579,9 +1579,8 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) /* set up MMIO */ memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, "amdvi-mmio", AMDVI_MMIO_SIZE); - - sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->mmio); - sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, AMDVI_BASE_ADDR); + memory_region_add_subregion(get_system_memory(), AMDVI_BASE_ADDR, + &s->mmio); pci_setup_iommu(bus, amdvi_host_dma_iommu, s); amdvi_init(s); } diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 30a108a42b..1c6c18622f 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -4188,6 +4188,8 @@ static void vtd_realize(DeviceState *dev, Error **errp) qemu_mutex_init(&s->iommu_lock); memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s, "intel_iommu", DMAR_REG_SIZE); + memory_region_add_subregion(get_system_memory(), + Q35_HOST_BRIDGE_IOMMU_ADDR, &s->csrmem); /* Create the shared memory regions by all devices */ memory_region_init(&s->mr_nodmar, OBJECT(s), "vtd-nodmar", @@ -4202,15 +4204,12 @@ static void vtd_realize(DeviceState *dev, Error **errp) memory_region_add_subregion_overlap(&s->mr_nodmar, VTD_INTERRUPT_ADDR_FIRST, &s->mr_ir, 1); - - sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->csrmem); /* No corresponding destroy */ s->iotlb = g_hash_table_new_full(vtd_iotlb_hash, vtd_iotlb_equal, g_free, g_free); s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal, g_free, g_free); vtd_init(s); - sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR); pci_setup_iommu(bus, vtd_host_dma_iommu, dev); /* Pseudo address space under root PCI bus. */ x86ms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC); diff --git a/hw/i386/meson.build b/hw/i386/meson.build index ff879069c9..369c6bf823 100644 --- a/hw/i386/meson.build +++ b/hw/i386/meson.build @@ -32,6 +32,5 @@ subdir('kvm') subdir('xen') i386_ss.add_all(xenpv_ss) -i386_ss.add_all(xen_ss) hw_arch += {'i386': i386_ss} diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c index b9c93039e2..ca55aecc3b 100644 --- a/hw/i386/microvm.c +++ b/hw/i386/microvm.c @@ -206,12 +206,12 @@ static void microvm_devices_init(MicrovmMachineState *mms) if (x86_machine_is_acpi_enabled(x86ms)) { DeviceState *dev = qdev_new(TYPE_ACPI_GED); qdev_prop_set_uint32(dev, "ged-event", ACPI_GED_PWR_DOWN_EVT); + sysbus_realize(SYS_BUS_DEVICE(dev), &error_fatal); sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, GED_MMIO_BASE); /* sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, GED_MMIO_BASE_MEMHP); */ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, GED_MMIO_BASE_REGS); sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, x86ms->gsi[GED_MMIO_IRQ]); - sysbus_realize(SYS_BUS_DEVICE(dev), &error_fatal); x86ms->acpi_dev = HOTPLUG_HANDLER(dev); } diff --git a/hw/i386/pc.c b/hw/i386/pc.c index dbaefa7617..11fed78d17 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1281,7 +1281,9 @@ void pc_basic_device_init(struct PCMachineState *pcms, /* connect PIT to output control line of the HPET */ qdev_connect_gpio_out(hpet, 0, qdev_get_gpio_in(DEVICE(pit), 0)); } - pcspk_init(pcms->pcspk, isa_bus, pit); + object_property_set_link(OBJECT(pcms->pcspk), "pit", + OBJECT(pit), &error_fatal); + isa_realize_and_unref(pcms->pcspk, isa_bus, &error_fatal); } /* Super I/O */ diff --git a/hw/input/adb-kbd.c b/hw/input/adb-kbd.c index a9088c910c..e21edf9acd 100644 --- a/hw/input/adb-kbd.c +++ b/hw/input/adb-kbd.c @@ -355,7 +355,7 @@ static void adb_kbd_reset(DeviceState *dev) s->count = 0; } -static QemuInputHandler adb_keyboard_handler = { +static const QemuInputHandler adb_keyboard_handler = { .name = "QEMU ADB Keyboard", .mask = INPUT_EVENT_MASK_KEY, .event = adb_keyboard_event, diff --git a/hw/input/hid.c b/hw/input/hid.c index a9c7dd1ce1..b8e85374ca 100644 --- a/hw/input/hid.c +++ b/hw/input/hid.c @@ -510,20 +510,20 @@ void hid_free(HIDState *hs) hid_del_idle_timer(hs); } -static QemuInputHandler hid_keyboard_handler = { +static const QemuInputHandler hid_keyboard_handler = { .name = "QEMU HID Keyboard", .mask = INPUT_EVENT_MASK_KEY, .event = hid_keyboard_event, }; -static QemuInputHandler hid_mouse_handler = { +static const QemuInputHandler hid_mouse_handler = { .name = "QEMU HID Mouse", .mask = INPUT_EVENT_MASK_BTN | INPUT_EVENT_MASK_REL, .event = hid_pointer_event, .sync = hid_pointer_sync, }; -static QemuInputHandler hid_tablet_handler = { +static const QemuInputHandler hid_tablet_handler = { .name = "QEMU HID Tablet", .mask = INPUT_EVENT_MASK_BTN | INPUT_EVENT_MASK_ABS, .event = hid_pointer_event, diff --git a/hw/input/lasips2.c b/hw/input/lasips2.c index ea7c07a2ba..6075121b72 100644 --- a/hw/input/lasips2.c +++ b/hw/input/lasips2.c @@ -351,6 +351,11 @@ static void lasips2_port_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + /* + * The PS/2 mouse port is integreal part of LASI and can not be + * created by users without LASI. + */ + dc->user_creatable = false; dc->realize = lasips2_port_realize; } @@ -397,6 +402,11 @@ static void lasips2_kbd_port_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); LASIPS2PortDeviceClass *lpdc = LASIPS2_PORT_CLASS(klass); + /* + * The PS/2 keyboard port is integreal part of LASI and can not be + * created by users without LASI. + */ + dc->user_creatable = false; device_class_set_parent_realize(dc, lasips2_kbd_port_realize, &lpdc->parent_realize); } diff --git a/hw/input/ps2.c b/hw/input/ps2.c index 45af76a837..c8fd23cf36 100644 --- a/hw/input/ps2.c +++ b/hw/input/ps2.c @@ -1231,7 +1231,7 @@ static const VMStateDescription vmstate_ps2_mouse = { } }; -static QemuInputHandler ps2_keyboard_handler = { +static const QemuInputHandler ps2_keyboard_handler = { .name = "QEMU PS/2 Keyboard", .mask = INPUT_EVENT_MASK_KEY, .event = ps2_keyboard_event, @@ -1242,7 +1242,7 @@ static void ps2_kbd_realize(DeviceState *dev, Error **errp) qemu_input_handler_register(dev, &ps2_keyboard_handler); } -static QemuInputHandler ps2_mouse_handler = { +static const QemuInputHandler ps2_mouse_handler = { .name = "QEMU PS/2 Mouse", .mask = INPUT_EVENT_MASK_BTN | INPUT_EVENT_MASK_REL, .event = ps2_mouse_event, diff --git a/hw/input/virtio-input-hid.c b/hw/input/virtio-input-hid.c index 7053ad72d4..45e4d4c75d 100644 --- a/hw/input/virtio-input-hid.c +++ b/hw/input/virtio-input-hid.c @@ -265,7 +265,7 @@ static const TypeInfo virtio_input_hid_info = { /* ----------------------------------------------------------------- */ -static QemuInputHandler virtio_keyboard_handler = { +static const QemuInputHandler virtio_keyboard_handler = { .name = VIRTIO_ID_NAME_KEYBOARD, .mask = INPUT_EVENT_MASK_KEY, .event = virtio_input_handle_event, @@ -322,7 +322,7 @@ static const TypeInfo virtio_keyboard_info = { /* ----------------------------------------------------------------- */ -static QemuInputHandler virtio_mouse_handler = { +static const QemuInputHandler virtio_mouse_handler = { .name = VIRTIO_ID_NAME_MOUSE, .mask = INPUT_EVENT_MASK_BTN | INPUT_EVENT_MASK_REL, .event = virtio_input_handle_event, @@ -416,7 +416,7 @@ static const TypeInfo virtio_mouse_info = { /* ----------------------------------------------------------------- */ -static QemuInputHandler virtio_tablet_handler = { +static const QemuInputHandler virtio_tablet_handler = { .name = VIRTIO_ID_NAME_TABLET, .mask = INPUT_EVENT_MASK_BTN | INPUT_EVENT_MASK_ABS, .event = virtio_input_handle_event, @@ -541,7 +541,7 @@ static const TypeInfo virtio_tablet_info = { /* ----------------------------------------------------------------- */ -static QemuInputHandler virtio_multitouch_handler = { +static const QemuInputHandler virtio_multitouch_handler = { .name = VIRTIO_ID_NAME_MULTITOUCH, .mask = INPUT_EVENT_MASK_BTN | INPUT_EVENT_MASK_MTT, .event = virtio_input_handle_event, diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c index 68ad30e2f5..bccb4241c2 100644 --- a/hw/intc/apic_common.c +++ b/hw/intc/apic_common.c @@ -257,6 +257,7 @@ static const VMStateDescription vmstate_apic_common; static void apic_common_realize(DeviceState *dev, Error **errp) { + ERRP_GUARD(); APICCommonState *s = APIC_COMMON(dev); APICCommonClass *info; static DeviceState *vapic; @@ -267,6 +268,9 @@ static void apic_common_realize(DeviceState *dev, Error **errp) info = APIC_COMMON_GET_CLASS(s); info->realize(dev, errp); + if (*errp) { + return; + } /* Note: We need at least 1M to map the VAPIC option ROM */ if (!vapic && s->vapic_control & VAPIC_ENABLE_MASK && diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c index 1d588946bc..e0d9e512a3 100644 --- a/hw/intc/arm_gic_kvm.c +++ b/hw/intc/arm_gic_kvm.c @@ -516,8 +516,7 @@ static void kvm_arm_gic_realize(DeviceState *dev, Error **errp) if (!kvm_arm_gic_can_save_restore(s)) { error_setg(&s->migration_blocker, "This operating system kernel does " "not support vGICv2 migration"); - if (migrate_add_blocker(s->migration_blocker, errp) < 0) { - error_free(s->migration_blocker); + if (migrate_add_blocker(&s->migration_blocker, errp) < 0) { return; } } diff --git a/hw/intc/arm_gicv3_its_kvm.c b/hw/intc/arm_gicv3_its_kvm.c index 7eda9fb86e..61c1cc7bdb 100644 --- a/hw/intc/arm_gicv3_its_kvm.c +++ b/hw/intc/arm_gicv3_its_kvm.c @@ -114,8 +114,7 @@ static void kvm_arm_its_realize(DeviceState *dev, Error **errp) GITS_CTLR)) { error_setg(&s->migration_blocker, "This operating system kernel " "does not support vITS migration"); - if (migrate_add_blocker(s->migration_blocker, errp) < 0) { - error_free(s->migration_blocker); + if (migrate_add_blocker(&s->migration_blocker, errp) < 0) { return; } } else { diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c index 72ad916d3d..77eb37e131 100644 --- a/hw/intc/arm_gicv3_kvm.c +++ b/hw/intc/arm_gicv3_kvm.c @@ -878,8 +878,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) GICD_CTLR)) { error_setg(&s->migration_blocker, "This operating system kernel does " "not support vGICv3 migration"); - if (migrate_add_blocker(s->migration_blocker, errp) < 0) { - error_free(s->migration_blocker); + if (migrate_add_blocker(&s->migration_blocker, errp) < 0) { return; } } diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c index 7f701d414b..199c261b07 100644 --- a/hw/intc/spapr_xive.c +++ b/hw/intc/spapr_xive.c @@ -316,7 +316,6 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp) if (!qdev_realize(DEVICE(xsrc), NULL, errp)) { return; } - sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xsrc->esb_mmio); /* * Initialize the END ESB source @@ -328,7 +327,6 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp) if (!qdev_realize(DEVICE(end_xsrc), NULL, errp)) { return; } - sysbus_init_mmio(SYS_BUS_DEVICE(xive), &end_xsrc->esb_mmio); /* Set the mapping address of the END ESB pages after the source ESBs */ xive->end_base = xive->vc_base + xive_source_esb_len(xsrc); @@ -347,15 +345,17 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp) /* TIMA initialization */ memory_region_init_io(&xive->tm_mmio, OBJECT(xive), &spapr_xive_tm_ops, xive, "xive.tima", 4ull << TM_SHIFT); - sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xive->tm_mmio); /* * Map all regions. These will be enabled or disabled at reset and * can also be overridden by KVM memory regions if active */ - sysbus_mmio_map(SYS_BUS_DEVICE(xive), 0, xive->vc_base); - sysbus_mmio_map(SYS_BUS_DEVICE(xive), 1, xive->end_base); - sysbus_mmio_map(SYS_BUS_DEVICE(xive), 2, xive->tm_base); + memory_region_add_subregion(get_system_memory(), xive->vc_base, + &xsrc->esb_mmio); + memory_region_add_subregion(get_system_memory(), xive->end_base, + &end_xsrc->esb_mmio); + memory_region_add_subregion(get_system_memory(), xive->tm_base, + &xive->tm_mmio); } static int spapr_xive_get_eas(XiveRouter *xrtr, uint8_t eas_blk, diff --git a/hw/isa/i82378.c b/hw/isa/i82378.c index 63e0857208..79ffbb52a0 100644 --- a/hw/isa/i82378.c +++ b/hw/isa/i82378.c @@ -67,6 +67,7 @@ static void i82378_realize(PCIDevice *pci, Error **errp) uint8_t *pci_conf; ISABus *isabus; ISADevice *pit; + ISADevice *pcspk; pci_conf = pci->config; pci_set_word(pci_conf + PCI_COMMAND, @@ -102,7 +103,9 @@ static void i82378_realize(PCIDevice *pci, Error **errp) pit = i8254_pit_init(isabus, 0x40, 0, NULL); /* speaker */ - pcspk_init(isa_new(TYPE_PC_SPEAKER), isabus, pit); + pcspk = isa_new(TYPE_PC_SPEAKER); + object_property_set_link(OBJECT(pcspk), "pit", OBJECT(pit), &error_fatal); + isa_realize_and_unref(pcspk, isabus, &error_fatal); /* 2 82C37 (dma) */ isa_create_simple(isabus, "i82374"); diff --git a/hw/isa/isa-bus.c b/hw/isa/isa-bus.c index a289eccfb1..f1e0f14007 100644 --- a/hw/isa/isa-bus.c +++ b/hw/isa/isa-bus.c @@ -52,18 +52,25 @@ static const TypeInfo isa_bus_info = { ISABus *isa_bus_new(DeviceState *dev, MemoryRegion* address_space, MemoryRegion *address_space_io, Error **errp) { + DeviceState *bridge = NULL; + if (isabus) { error_setg(errp, "Can't create a second ISA bus"); return NULL; } if (!dev) { - dev = qdev_new("isabus-bridge"); - sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + bridge = qdev_new("isabus-bridge"); + dev = bridge; } isabus = ISA_BUS(qbus_new(TYPE_ISA_BUS, dev, NULL)); isabus->address_space = address_space; isabus->address_space_io = address_space_io; + + if (bridge) { + sysbus_realize_and_unref(SYS_BUS_DEVICE(bridge), &error_fatal); + } + return isabus; } diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c index 2952fe452e..4b7dc67a2d 100644 --- a/hw/loongarch/virt.c +++ b/hw/loongarch/virt.c @@ -412,6 +412,7 @@ static DeviceState *create_acpi_ged(DeviceState *pch_pic, LoongArchMachineState } dev = qdev_new(TYPE_ACPI_GED); qdev_prop_set_uint32(dev, "ged-event", event); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); /* ged event */ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, VIRT_GED_EVT_ADDR); @@ -422,7 +423,6 @@ static DeviceState *create_acpi_ged(DeviceState *pch_pic, LoongArchMachineState sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, qdev_get_gpio_in(pch_pic, VIRT_SCI_IRQ - VIRT_GSI_BASE)); - sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); return dev; } diff --git a/hw/mips/cps.c b/hw/mips/cps.c index 2b5269ebf1..b6612c1762 100644 --- a/hw/mips/cps.c +++ b/hw/mips/cps.c @@ -24,7 +24,6 @@ #include "hw/mips/mips.h" #include "hw/qdev-clock.h" #include "hw/qdev-properties.h" -#include "hw/mips/cpudevs.h" #include "sysemu/kvm.h" #include "sysemu/reset.h" diff --git a/hw/mips/fuloong2e.c b/hw/mips/fuloong2e.c index c6109633fe..97b2c8ed8e 100644 --- a/hw/mips/fuloong2e.c +++ b/hw/mips/fuloong2e.c @@ -30,7 +30,6 @@ #include "hw/block/flash.h" #include "hw/mips/mips.h" #include "hw/mips/bootloader.h" -#include "hw/mips/cpudevs.h" #include "hw/pci/pci.h" #include "hw/loader.h" #include "hw/ide/pci.h" diff --git a/hw/mips/jazz.c b/hw/mips/jazz.c index c32d2b0b0a..d33a76ad4d 100644 --- a/hw/mips/jazz.c +++ b/hw/mips/jazz.c @@ -26,7 +26,6 @@ #include "qemu/datadir.h" #include "hw/clock.h" #include "hw/mips/mips.h" -#include "hw/mips/cpudevs.h" #include "hw/intc/i8259.h" #include "hw/dma/i8257.h" #include "hw/char/serial.h" @@ -177,6 +176,7 @@ static void mips_jazz_init(MachineState *machine, SysBusDevice *sysbus; ISABus *isa_bus; ISADevice *pit; + ISADevice *pcspk; DriveInfo *fds[MAX_FD]; MemoryRegion *bios = g_new(MemoryRegion, 1); MemoryRegion *bios2 = g_new(MemoryRegion, 1); @@ -279,7 +279,9 @@ static void mips_jazz_init(MachineState *machine, isa_bus_register_input_irqs(isa_bus, i8259); i8257_dma_init(isa_bus, 0); pit = i8254_pit_init(isa_bus, 0x40, 0, NULL); - pcspk_init(isa_new(TYPE_PC_SPEAKER), isa_bus, pit); + pcspk = isa_new(TYPE_PC_SPEAKER); + object_property_set_link(OBJECT(pcspk), "pit", OBJECT(pit), &error_fatal); + isa_realize_and_unref(pcspk, isa_bus, &error_fatal); /* Video card */ switch (jazz_model) { diff --git a/hw/mips/loongson3_virt.c b/hw/mips/loongson3_virt.c index b74b358874..33eae01eca 100644 --- a/hw/mips/loongson3_virt.c +++ b/hw/mips/loongson3_virt.c @@ -32,7 +32,6 @@ #include "hw/char/serial.h" #include "hw/intc/loongson_liointc.h" #include "hw/mips/mips.h" -#include "hw/mips/cpudevs.h" #include "hw/mips/fw_cfg.h" #include "hw/mips/loongson3_bootp.h" #include "hw/misc/unimp.h" diff --git a/hw/mips/malta.c b/hw/mips/malta.c index 155f3c1cc8..049de46a9e 100644 --- a/hw/mips/malta.c +++ b/hw/mips/malta.c @@ -37,7 +37,6 @@ #include "hw/block/flash.h" #include "hw/mips/mips.h" #include "hw/mips/bootloader.h" -#include "hw/mips/cpudevs.h" #include "hw/pci/pci.h" #include "hw/pci/pci_bus.h" #include "qemu/log.h" @@ -206,7 +205,7 @@ static eeprom24c0x_t spd_eeprom = { static void generate_eeprom_spd(uint8_t *eeprom, ram_addr_t ram_size) { - enum { SDR = 0x4, DDR2 = 0x8 } type; + enum sdram_type type; uint8_t *spd = spd_eeprom.contents; uint8_t nbanks = 0; uint16_t density = 0; diff --git a/hw/mips/mips_int.c b/hw/mips/mips_int.c index 73437cd90f..6c32e466a3 100644 --- a/hw/mips/mips_int.c +++ b/hw/mips/mips_int.c @@ -23,7 +23,6 @@ #include "qemu/osdep.h" #include "qemu/main-loop.h" #include "hw/irq.h" -#include "hw/mips/cpudevs.h" #include "sysemu/kvm.h" #include "kvm_mips.h" diff --git a/hw/mips/mipssim.c b/hw/mips/mipssim.c index 2f951f7fc6..4f743f37eb 100644 --- a/hw/mips/mipssim.c +++ b/hw/mips/mipssim.c @@ -30,7 +30,6 @@ #include "qemu/datadir.h" #include "hw/clock.h" #include "hw/mips/mips.h" -#include "hw/mips/cpudevs.h" #include "hw/char/serial.h" #include "hw/isa/isa.h" #include "net/net.h" diff --git a/hw/misc/allwinner-r40-dramc.c b/hw/misc/allwinner-r40-dramc.c index 6944f84455..3d81ddb2e1 100644 --- a/hw/misc/allwinner-r40-dramc.c +++ b/hw/misc/allwinner-r40-dramc.c @@ -421,19 +421,23 @@ static void allwinner_r40_dramc_realize(DeviceState *dev, Error **errp) exit(1); } - /* detect_cells */ - sysbus_mmio_map_overlap(SYS_BUS_DEVICE(s), 3, s->ram_addr, 10); + /* R40 support max 2G memory but we only support up to 1G now. */ + memory_region_init_io(&s->detect_cells, OBJECT(s), + &allwinner_r40_detect_ops, s, + "DRAMCELLS", 1 * GiB); + memory_region_add_subregion_overlap(get_system_memory(), s->ram_addr, + &s->detect_cells, 10); memory_region_set_enabled(&s->detect_cells, false); /* * We only support DRAM size up to 1G now, so prepare a high memory page - * after 1G for dualrank detect. index = 4 + * after 1G for dualrank detect. */ memory_region_init_io(&s->dram_high, OBJECT(s), &allwinner_r40_dualrank_detect_ops, s, "DRAMHIGH", KiB); - sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->dram_high); - sysbus_mmio_map(SYS_BUS_DEVICE(s), 4, s->ram_addr + GiB); + memory_region_add_subregion(get_system_memory(), s->ram_addr + GiB, + &s->dram_high); } static void allwinner_r40_dramc_init(Object *obj) @@ -458,12 +462,6 @@ static void allwinner_r40_dramc_init(Object *obj) &allwinner_r40_dramphy_ops, s, "DRAMPHY", 4 * KiB); sysbus_init_mmio(sbd, &s->dramphy_iomem); - - /* R40 support max 2G memory but we only support up to 1G now. index 3 */ - memory_region_init_io(&s->detect_cells, OBJECT(s), - &allwinner_r40_detect_ops, s, - "DRAMCELLS", 1 * GiB); - sysbus_init_mmio(sbd, &s->detect_cells); } static Property allwinner_r40_dramc_properties[] = { diff --git a/hw/misc/bcm2835_property.c b/hw/misc/bcm2835_property.c index 4ed9faa54a..ff55a4e2cd 100644 --- a/hw/misc/bcm2835_property.c +++ b/hw/misc/bcm2835_property.c @@ -12,7 +12,7 @@ #include "migration/vmstate.h" #include "hw/irq.h" #include "hw/misc/bcm2835_mbox_defs.h" -#include "hw/misc/raspberrypi-fw-defs.h" +#include "hw/arm/raspberrypi-fw-defs.h" #include "sysemu/dma.h" #include "qemu/log.h" #include "qemu/module.h" diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index d66d912172..0447888029 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -903,8 +903,7 @@ static void ivshmem_common_realize(PCIDevice *dev, Error **errp) if (!ivshmem_is_master(s)) { error_setg(&s->migration_blocker, "Migration is disabled when using feature 'peer mode' in device 'ivshmem'"); - if (migrate_add_blocker(s->migration_blocker, errp) < 0) { - error_free(s->migration_blocker); + if (migrate_add_blocker(&s->migration_blocker, errp) < 0) { return; } } @@ -922,10 +921,7 @@ static void ivshmem_exit(PCIDevice *dev) IVShmemState *s = IVSHMEM_COMMON(dev); int i; - if (s->migration_blocker) { - migrate_del_blocker(s->migration_blocker); - error_free(s->migration_blocker); - } + migrate_del_blocker(&s->migration_blocker); if (memory_region_is_mapped(s->ivshmem_bar2)) { if (!s->hostmem) { diff --git a/hw/misc/mips_itu.c b/hw/misc/mips_itu.c index 0eda302db4..5a83ccc4e8 100644 --- a/hw/misc/mips_itu.c +++ b/hw/misc/mips_itu.c @@ -532,7 +532,7 @@ static void mips_itu_realize(DeviceState *dev, Error **errp) return; } - env = &s->cpu0->env; + env = &MIPS_CPU(s->cpu0)->env; if (env->saarp) { s->saar = env->CP0_SAAR; } @@ -563,7 +563,7 @@ static Property mips_itu_properties[] = { ITC_FIFO_NUM_MAX), DEFINE_PROP_UINT32("num-semaphores", MIPSITUState, num_semaphores, ITC_SEMAPH_NUM_MAX), - DEFINE_PROP_LINK("cpu[0]", MIPSITUState, cpu0, TYPE_MIPS_CPU, MIPSCPU *), + DEFINE_PROP_LINK("cpu[0]", MIPSITUState, cpu0, TYPE_MIPS_CPU, ArchCPU *), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c index f445d8bb5e..37e209cda6 100644 --- a/hw/net/cadence_gem.c +++ b/hw/net/cadence_gem.c @@ -1654,11 +1654,6 @@ static void gem_init(Object *obj) "enet", sizeof(s->regs)); sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->iomem); - - object_property_add_link(obj, "dma", TYPE_MEMORY_REGION, - (Object **)&s->dma_mr, - qdev_prop_allow_set_link_before_realize, - OBJ_PROP_LINK_STRONG); } static const VMStateDescription vmstate_cadence_gem = { @@ -1691,6 +1686,8 @@ static Property gem_properties[] = { num_type2_screeners, 4), DEFINE_PROP_UINT16("jumbo-max-len", CadenceGEMState, jumbo_max_len, 10240), + DEFINE_PROP_LINK("dma", CadenceGEMState, dma_mr, + TYPE_MEMORY_REGION, MemoryRegion *), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/net/tulip.c b/hw/net/tulip.c index 915e5fb595..11d866e431 100644 --- a/hw/net/tulip.c +++ b/hw/net/tulip.c @@ -1020,7 +1020,7 @@ static void tulip_class_init(ObjectClass *klass, void *data) k->exit = pci_tulip_exit; k->vendor_id = PCI_VENDOR_ID_DEC; k->device_id = PCI_DEVICE_ID_DEC_21143; - k->subsystem_vendor_id = 0x103c; + k->subsystem_vendor_id = PCI_VENDOR_ID_HP; k->subsystem_id = 0x104f; k->class_id = PCI_CLASS_NETWORK_ETHERNET; dc->vmsd = &vmstate_pci_tulip; diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 29e33ea5ed..b85c7946a7 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -3624,8 +3624,8 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) n->primary_listener.hide_device = failover_hide_primary_device; qatomic_set(&n->failover_primary_hidden, true); device_listener_register(&n->primary_listener); - n->migration_state.notify = virtio_net_migration_state_notifier; - add_migration_state_change_notifier(&n->migration_state); + migration_add_notifier(&n->migration_state, + virtio_net_migration_state_notifier); n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY); } @@ -3788,7 +3788,7 @@ static void virtio_net_device_unrealize(DeviceState *dev) if (n->failover) { qobject_unref(n->primary_opts); device_listener_unregister(&n->primary_listener); - remove_migration_state_change_notifier(&n->migration_state); + migration_remove_notifier(&n->migration_state); } else { assert(n->primary_opts == NULL); } diff --git a/hw/nvram/xlnx-bbram.c b/hw/nvram/xlnx-bbram.c index c6b484cc85..e18e7770e1 100644 --- a/hw/nvram/xlnx-bbram.c +++ b/hw/nvram/xlnx-bbram.c @@ -2,6 +2,7 @@ * QEMU model of the Xilinx BBRAM Battery Backed RAM * * Copyright (c) 2014-2021 Xilinx Inc. + * Copyright (c) 2023 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -416,9 +417,9 @@ static RegisterAccessInfo bbram_ctrl_regs_info[] = { } }; -static void bbram_ctrl_reset(DeviceState *dev) +static void bbram_ctrl_reset_hold(Object *obj) { - XlnxBBRam *s = XLNX_BBRAM(dev); + XlnxBBRam *s = XLNX_BBRAM(obj); unsigned int i; for (i = 0; i < ARRAY_SIZE(s->regs_info); ++i) { @@ -522,8 +523,9 @@ static Property bbram_ctrl_props[] = { static void bbram_ctrl_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); - dc->reset = bbram_ctrl_reset; + rc->phases.hold = bbram_ctrl_reset_hold; dc->realize = bbram_ctrl_realize; dc->vmsd = &vmstate_bbram_ctrl; device_class_set_props(dc, bbram_ctrl_props); diff --git a/hw/nvram/xlnx-versal-efuse-ctrl.c b/hw/nvram/xlnx-versal-efuse-ctrl.c index b35ba65ab5..beb5661c35 100644 --- a/hw/nvram/xlnx-versal-efuse-ctrl.c +++ b/hw/nvram/xlnx-versal-efuse-ctrl.c @@ -2,6 +2,7 @@ * QEMU model of the Versal eFuse controller * * Copyright (c) 2020 Xilinx Inc. + * Copyright (c) 2023 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -657,9 +658,9 @@ static void efuse_ctrl_register_reset(RegisterInfo *reg) register_reset(reg); } -static void efuse_ctrl_reset(DeviceState *dev) +static void efuse_ctrl_reset_hold(Object *obj) { - XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(dev); + XlnxVersalEFuseCtrl *s = XLNX_VERSAL_EFUSE_CTRL(obj); unsigned int i; for (i = 0; i < ARRAY_SIZE(s->regs_info); ++i) { @@ -749,8 +750,9 @@ static Property efuse_ctrl_props[] = { static void efuse_ctrl_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); - dc->reset = efuse_ctrl_reset; + rc->phases.hold = efuse_ctrl_reset_hold; dc->realize = efuse_ctrl_realize; dc->vmsd = &vmstate_efuse_ctrl; device_class_set_props(dc, efuse_ctrl_props); diff --git a/hw/nvram/xlnx-zynqmp-efuse.c b/hw/nvram/xlnx-zynqmp-efuse.c index 228ba0bbfa..3db5f98ec1 100644 --- a/hw/nvram/xlnx-zynqmp-efuse.c +++ b/hw/nvram/xlnx-zynqmp-efuse.c @@ -2,6 +2,7 @@ * QEMU model of the ZynqMP eFuse * * Copyright (c) 2015 Xilinx Inc. + * Copyright (c) 2023 Advanced Micro Devices, Inc. * * Written by Edgar E. Iglesias <edgari@xilinx.com> * @@ -769,9 +770,9 @@ static void zynqmp_efuse_register_reset(RegisterInfo *reg) register_reset(reg); } -static void zynqmp_efuse_reset(DeviceState *dev) +static void zynqmp_efuse_reset_hold(Object *obj) { - XlnxZynqMPEFuse *s = XLNX_ZYNQMP_EFUSE(dev); + XlnxZynqMPEFuse *s = XLNX_ZYNQMP_EFUSE(obj); unsigned int i; for (i = 0; i < ARRAY_SIZE(s->regs_info); ++i) { @@ -837,8 +838,9 @@ static Property zynqmp_efuse_props[] = { static void zynqmp_efuse_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); - dc->reset = zynqmp_efuse_reset; + rc->phases.hold = zynqmp_efuse_reset_hold; dc->realize = zynqmp_efuse_realize; dc->vmsd = &vmstate_efuse; device_class_set_props(dc, zynqmp_efuse_props); diff --git a/hw/pci-host/Kconfig b/hw/pci-host/Kconfig index a07070eddf..54a609d2ca 100644 --- a/hw/pci-host/Kconfig +++ b/hw/pci-host/Kconfig @@ -82,6 +82,10 @@ config DINO bool select PCI +config ASTRO + bool + select PCI + config GT64120 bool select PCI diff --git a/hw/pci-host/astro.c b/hw/pci-host/astro.c new file mode 100644 index 0000000000..4b2d7caf2d --- /dev/null +++ b/hw/pci-host/astro.c @@ -0,0 +1,885 @@ +/* + * HP-PARISC Astro/Pluto/Ike/REO system bus adapter (SBA) + * with Elroy PCI bus (LBA) adapter emulation + * Found in C3000 and similar machines + * + * (C) 2023 by Helge Deller <deller@gmx.de> + * + * This work is licensed under the GNU GPL license version 2 or later. + * + * Chip documentation is available at: + * https://parisc.wiki.kernel.org/index.php/Technical_Documentation + * + * TODO: + * - All user-added devices are currently attached to the first + * Elroy (PCI bus) only for now. To fix this additional work in + * SeaBIOS and this driver is needed. See "user_creatable" flag below. + * - GMMIO (Greater than 4 GB MMIO) register + */ + +#define TYPE_ASTRO_IOMMU_MEMORY_REGION "astro-iommu-memory-region" + +#include "qemu/osdep.h" +#include "qemu/module.h" +#include "qemu/units.h" +#include "qapi/error.h" +#include "hw/irq.h" +#include "hw/pci/pci_device.h" +#include "hw/pci/pci_bus.h" +#include "hw/qdev-properties.h" +#include "hw/pci-host/astro.h" +#include "hw/hppa/hppa_hardware.h" +#include "migration/vmstate.h" +#include "trace.h" +#include "qom/object.h" + +/* + * Helper functions + */ + +static uint64_t mask_32bit_val(hwaddr addr, unsigned size, uint64_t val) +{ + if (size == 8) { + return val; + } + if (addr & 4) { + val >>= 32; + } else { + val = (uint32_t) val; + } + return val; +} + +static void put_val_in_int64(uint64_t *p, hwaddr addr, unsigned size, + uint64_t val) +{ + if (size == 8) { + *p = val; + } else if (size == 4) { + if (addr & 4) { + *p = ((*p << 32) >> 32) | (val << 32); + } else { + *p = ((*p >> 32) << 32) | (uint32_t) val; + } + } +} + +static void put_val_in_arrary(uint64_t *array, hwaddr start_addr, + hwaddr addr, unsigned size, uint64_t val) +{ + int index; + + index = (addr - start_addr) / 8; + put_val_in_int64(&array[index], addr, size, val); +} + + +/* + * The Elroy PCI host bridge. We have at least 4 of those under Astro. + */ + +static MemTxResult elroy_chip_read_with_attrs(void *opaque, hwaddr addr, + uint64_t *data, unsigned size, + MemTxAttrs attrs) +{ + MemTxResult ret = MEMTX_OK; + ElroyState *s = opaque; + uint64_t val = -1; + int index; + + switch ((addr >> 3) << 3) { + case 0x0008: + val = 0x6000005; /* func_class */ + break; + case 0x0058: + /* + * Scratch register, but firmware initializes it with the + * PCI BUS number and Linux/HP-UX uses it then. + */ + val = s->pci_bus_num; + /* Upper byte holds the end of this bus number */ + val |= s->pci_bus_num << 8; + break; + case 0x0080: + val = s->arb_mask; /* set ARB mask */ + break; + case 0x0108: + val = s->status_control; + break; + case 0x200 ... 0x250 - 1: /* LMMIO, GMMIO, WLMMIO, WGMMIO, ... */ + index = (addr - 0x200) / 8; + val = s->mmio_base[index]; + break; + case 0x0680: + val = s->error_config; + break; + case 0x0688: + val = 0; /* ERROR_STATUS */ + break; + case 0x0800: /* IOSAPIC_REG_SELECT */ + val = s->iosapic_reg_select; + break; + case 0x0808: + val = UINT64_MAX; /* XXX: tbc. */ + g_assert_not_reached(); + break; + case 0x0810: /* IOSAPIC_REG_WINDOW */ + switch (s->iosapic_reg_select) { + case 0x01: /* IOSAPIC_REG_VERSION */ + val = (32 << 16) | 1; /* upper 16bit holds max entries */ + break; + default: + if (s->iosapic_reg_select < ARRAY_SIZE(s->iosapic_reg)) { + val = s->iosapic_reg[s->iosapic_reg_select]; + } else { + trace_iosapic_reg_read(s->iosapic_reg_select, size, val); + g_assert_not_reached(); + } + } + trace_iosapic_reg_read(s->iosapic_reg_select, size, val); + break; + default: + trace_elroy_read(addr, size, val); + g_assert_not_reached(); + } + trace_elroy_read(addr, size, val); + + /* for 32-bit accesses mask return value */ + val = mask_32bit_val(addr, size, val); + + trace_astro_chip_read(addr, size, val); + *data = val; + return ret; +} + + +static MemTxResult elroy_chip_write_with_attrs(void *opaque, hwaddr addr, + uint64_t val, unsigned size, + MemTxAttrs attrs) +{ + ElroyState *s = opaque; + int i; + + trace_elroy_write(addr, size, val); + + switch ((addr >> 3) << 3) { + case 0x080: + put_val_in_int64(&s->arb_mask, addr, size, val); + break; + case 0x0108: + put_val_in_int64(&s->status_control, addr, size, val); + break; + case 0x200 ... 0x250 - 1: /* LMMIO, GMMIO, WLMMIO, WGMMIO, ... */ + put_val_in_arrary(s->mmio_base, 0x200, addr, size, val); + break; + case 0x0680: + put_val_in_int64(&s->error_config, addr, size, val); + break; + case 0x0800: /* IOSAPIC_REG_SELECT */ + s->iosapic_reg_select = val; + break; + case 0x0810: /* IOSAPIC_REG_WINDOW */ + trace_iosapic_reg_write(s->iosapic_reg_select, size, val); + if (s->iosapic_reg_select < ARRAY_SIZE(s->iosapic_reg)) { + s->iosapic_reg[s->iosapic_reg_select] = val; + } else { + g_assert_not_reached(); + } + break; + case 0x0840: /* IOSAPIC_REG_EOI */ + val = le64_to_cpu(val); + val &= 63; + for (i = 0; i < ELROY_IRQS; i++) { + if ((s->iosapic_reg[0x10 + 2 * i] & 63) == val) { + s->ilr &= ~(1ull << i); + } + } + break; + default: + g_assert_not_reached(); + } + return MEMTX_OK; +} + +static const MemoryRegionOps elroy_chip_ops = { + .read_with_attrs = elroy_chip_read_with_attrs, + .write_with_attrs = elroy_chip_write_with_attrs, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 8, + }, + .impl = { + .min_access_size = 4, + .max_access_size = 8, + }, +}; + + +/* Unlike pci_config_data_le_ops, no check of high bit set in config_reg. */ + +static uint64_t elroy_config_data_read(void *opaque, hwaddr addr, unsigned len) +{ + uint64_t val; + + PCIHostState *s = opaque; + val = pci_data_read(s->bus, s->config_reg | (addr & 3), len); + trace_elroy_pci_config_data_read(s->config_reg | (addr & 3), len, val); + return val; +} + +static void elroy_config_data_write(void *opaque, hwaddr addr, + uint64_t val, unsigned len) +{ + PCIHostState *s = opaque; + pci_data_write(s->bus, s->config_reg | (addr & 3), val, len); + trace_elroy_pci_config_data_write(s->config_reg | (addr & 3), len, val); +} + +static const MemoryRegionOps elroy_config_data_ops = { + .read = elroy_config_data_read, + .write = elroy_config_data_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static uint64_t elroy_config_addr_read(void *opaque, hwaddr addr, unsigned len) +{ + ElroyState *s = opaque; + return s->config_reg_elroy; +} + +static void elroy_config_addr_write(void *opaque, hwaddr addr, + uint64_t val, unsigned len) +{ + PCIHostState *s = opaque; + ElroyState *es = opaque; + es->config_reg_elroy = val; /* keep a copy of original value */ + s->config_reg = val; +} + +static const MemoryRegionOps elroy_config_addr_ops = { + .read = elroy_config_addr_read, + .write = elroy_config_addr_write, + .valid.min_access_size = 4, + .valid.max_access_size = 8, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + + +/* + * A subroutine of astro_translate_iommu that builds an IOMMUTLBEntry using the + * given translated address and mask. + */ +static bool make_iommu_tlbe(hwaddr addr, hwaddr taddr, hwaddr mask, + IOMMUTLBEntry *ret) +{ + hwaddr tce_mask = ~((1ull << 12) - 1); + ret->target_as = &address_space_memory; + ret->iova = addr & tce_mask; + ret->translated_addr = taddr & tce_mask; + ret->addr_mask = ~tce_mask; + ret->perm = IOMMU_RW; + return true; +} + +/* Handle PCI-to-system address translation. */ +static IOMMUTLBEntry astro_translate_iommu(IOMMUMemoryRegion *iommu, + hwaddr addr, + IOMMUAccessFlags flag, + int iommu_idx) +{ + AstroState *s = container_of(iommu, AstroState, iommu); + IOMMUTLBEntry ret = { + .target_as = &address_space_memory, + .iova = addr, + .translated_addr = 0, + .addr_mask = ~(hwaddr)0, + .perm = IOMMU_NONE, + }; + hwaddr pdir_ptr, index, a, ibase; + hwaddr addr_mask = 0xfff; /* 4k translation */ + uint64_t entry; + +#define IOVP_SHIFT 12 /* equals PAGE_SHIFT */ +#define PDIR_INDEX(iovp) ((iovp) >> IOVP_SHIFT) +#define IOVP_MASK PAGE_MASK +#define SBA_PDIR_VALID_BIT 0x8000000000000000ULL + + /* "range enable" flag cleared? */ + if ((s->tlb_ibase & 1) == 0) { + make_iommu_tlbe(addr, addr, addr_mask, &ret); + return ret; + } + + a = addr; + ibase = s->tlb_ibase & ~1ULL; + if ((a & s->tlb_imask) != ibase) { + /* do not translate this one! */ + make_iommu_tlbe(addr, addr, addr_mask, &ret); + return ret; + } + index = PDIR_INDEX(a); + pdir_ptr = s->tlb_pdir_base + index * sizeof(entry); + entry = ldq_le_phys(&address_space_memory, pdir_ptr); + if (!(entry & SBA_PDIR_VALID_BIT)) { /* I/O PDIR entry valid ? */ + g_assert_not_reached(); + goto failure; + } + entry &= ~SBA_PDIR_VALID_BIT; + entry >>= IOVP_SHIFT; + entry <<= 12; + entry |= addr & 0xfff; + make_iommu_tlbe(addr, entry, addr_mask, &ret); + goto success; + + failure: + ret = (IOMMUTLBEntry) { .perm = IOMMU_NONE }; + success: + return ret; +} + +static AddressSpace *elroy_pcihost_set_iommu(PCIBus *bus, void *opaque, + int devfn) +{ + ElroyState *s = opaque; + return &s->astro->iommu_as; +} + +/* + * Encoding in IOSAPIC: + * base_addr == 0xfffa0000, we want to get 0xa0ff0000. + * eid 0x0ff00000 -> 0x00ff0000 + * id 0x000ff000 -> 0xff000000 + */ +#define SWIZZLE_HPA(a) \ + ((((a) & 0x0ff00000) >> 4) | (((a) & 0x000ff000) << 12)) +#define UNSWIZZLE_HPA(a) \ + (((((a) << 4) & 0x0ff00000) | (((a) >> 12) & 0x000ff000) | 0xf0000000)) + +/* bits in the "low" I/O Sapic IRdT entry */ +#define IOSAPIC_IRDT_DISABLE 0x10000 /* if bit is set, mask this irq */ +#define IOSAPIC_IRDT_PO_LOW 0x02000 +#define IOSAPIC_IRDT_LEVEL_TRIG 0x08000 +#define IOSAPIC_IRDT_MODE_LPRI 0x00100 + +#define CPU_IRQ_OFFSET 2 + +static void elroy_set_irq(void *opaque, int irq, int level) +{ + ElroyState *s = opaque; + uint32_t bit; + uint32_t old_ilr = s->ilr; + hwaddr cpu_hpa; + uint32_t val; + + val = s->iosapic_reg[0x10 + 2 * irq]; + cpu_hpa = s->iosapic_reg[0x11 + 2 * irq]; + /* low nibble of val has value to write into CPU irq reg */ + bit = 1u << (val & (ELROY_IRQS - 1)); + cpu_hpa = UNSWIZZLE_HPA(cpu_hpa); + + if (level && (!(val & IOSAPIC_IRDT_DISABLE)) && cpu_hpa) { + uint32_t ena = bit & ~old_ilr; + s->ilr = old_ilr | bit; + if (ena != 0) { + stl_be_phys(&address_space_memory, cpu_hpa, val & 63); + } + } else { + s->ilr = old_ilr & ~bit; + } +} + +static int elroy_pci_map_irq(PCIDevice *d, int irq_num) +{ + int slot = PCI_SLOT(d->devfn); + + assert(irq_num >= 0 && irq_num < ELROY_IRQS); + return slot & (ELROY_IRQS - 1); +} + +static void elroy_reset(DeviceState *dev) +{ + ElroyState *s = ELROY_PCI_HOST_BRIDGE(dev); + int irq; + + /* + * Make sure to disable interrupts at reboot, otherwise the Linux kernel + * serial8250_config_port() in drivers/tty/serial/8250/8250_port.c + * will hang during autoconfig(). + */ + s->ilr = 0; + for (irq = 0; irq < ELROY_IRQS; irq++) { + s->iosapic_reg[0x10 + 2 * irq] = IOSAPIC_IRDT_PO_LOW | + IOSAPIC_IRDT_LEVEL_TRIG | (irq + CPU_IRQ_OFFSET) | + IOSAPIC_IRDT_DISABLE; + s->iosapic_reg[0x11 + 2 * irq] = SWIZZLE_HPA(CPU_HPA); + } +} + +static void elroy_pcihost_init(Object *obj) +{ + ElroyState *s = ELROY_PCI_HOST_BRIDGE(obj); + PCIHostState *phb = PCI_HOST_BRIDGE(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + + /* Elroy config access from CPU. */ + memory_region_init_io(&s->this_mem, OBJECT(s), &elroy_chip_ops, + s, "elroy", 0x2000); + + /* Elroy PCI config. */ + memory_region_init_io(&phb->conf_mem, OBJECT(phb), + &elroy_config_addr_ops, DEVICE(s), + "pci-conf-idx", 8); + memory_region_init_io(&phb->data_mem, OBJECT(phb), + &elroy_config_data_ops, DEVICE(s), + "pci-conf-data", 8); + memory_region_add_subregion(&s->this_mem, 0x40, + &phb->conf_mem); + memory_region_add_subregion(&s->this_mem, 0x48, + &phb->data_mem); + + /* Elroy PCI bus memory. */ + memory_region_init(&s->pci_mmio, OBJECT(s), "pci-mmio", UINT64_MAX); + memory_region_init_io(&s->pci_io, OBJECT(s), &unassigned_io_ops, obj, + "pci-isa-mmio", + ((uint32_t) IOS_DIST_BASE_SIZE) / ROPES_PER_IOC); + + phb->bus = pci_register_root_bus(DEVICE(s), "pci", + elroy_set_irq, elroy_pci_map_irq, s, + &s->pci_mmio, &s->pci_io, + PCI_DEVFN(0, 0), ELROY_IRQS, TYPE_PCI_BUS); + + sysbus_init_mmio(sbd, &s->this_mem); + + qdev_init_gpio_in(DEVICE(obj), elroy_set_irq, ELROY_IRQS); +} + +static Property elroy_pcihost_properties[] = { + DEFINE_PROP_END_OF_LIST(), +}; + +static const VMStateDescription vmstate_elroy = { + .name = "Elroy", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT64(hpa, ElroyState), + VMSTATE_UINT32(pci_bus_num, ElroyState), + VMSTATE_UINT64(config_address, ElroyState), + VMSTATE_UINT64(config_reg_elroy, ElroyState), + VMSTATE_UINT64(status_control, ElroyState), + VMSTATE_UINT64(arb_mask, ElroyState), + VMSTATE_UINT64_ARRAY(mmio_base, ElroyState, (0x0250 - 0x200) / 8), + VMSTATE_UINT64(error_config, ElroyState), + VMSTATE_UINT32(iosapic_reg_select, ElroyState), + VMSTATE_UINT64_ARRAY(iosapic_reg, ElroyState, 0x20), + VMSTATE_UINT32(ilr, ElroyState), + VMSTATE_END_OF_LIST() + } +}; + +static void elroy_pcihost_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->reset = elroy_reset; + device_class_set_props(dc, elroy_pcihost_properties); + dc->vmsd = &vmstate_elroy; + dc->user_creatable = false; +} + +static const TypeInfo elroy_pcihost_info = { + .name = TYPE_ELROY_PCI_HOST_BRIDGE, + .parent = TYPE_PCI_HOST_BRIDGE, + .instance_init = elroy_pcihost_init, + .instance_size = sizeof(ElroyState), + .class_init = elroy_pcihost_class_init, +}; + +static void elroy_register_types(void) +{ + type_register_static(&elroy_pcihost_info); +} + +type_init(elroy_register_types) + + +static ElroyState *elroy_init(int num) +{ + DeviceState *dev; + + dev = qdev_new(TYPE_ELROY_PCI_HOST_BRIDGE); + dev->id = g_strdup_printf("elroy%d", num); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + + return ELROY_PCI_HOST_BRIDGE(dev); +} + +/* + * Astro Runway chip. + */ + +static MemTxResult astro_chip_read_with_attrs(void *opaque, hwaddr addr, + uint64_t *data, unsigned size, + MemTxAttrs attrs) +{ + AstroState *s = opaque; + MemTxResult ret = MEMTX_OK; + uint64_t val = -1; + int index; + + switch ((addr >> 3) << 3) { + /* R2I registers */ + case 0x0000: /* ID */ + val = (0x01 << 3) | 0x01ULL; + break; + case 0x0008: /* IOC_CTRL */ + val = s->ioc_ctrl; + break; + case 0x0010: /* TOC_CLIENT_ID */ + break; + case 0x0030: /* HP-UX 10.20 and 11.11 reads it. No idea. */ + val = -1; + break; + case 0x0300 ... 0x03d8: /* LMMIO_DIRECT0_BASE... */ + index = (addr - 0x300) / 8; + val = s->ioc_ranges[index]; + break; + case 0x10200: + val = 0; + break; + case 0x10220: + case 0x10230: /* HP-UX 11.11 reads it. No idea. */ + val = -1; + break; + case 0x22108: /* IOC STATUS_CONTROL */ + val = s->ioc_status_ctrl; + break; + case 0x20200 ... 0x20240 - 1: /* IOC Rope0_Control ... */ + index = (addr - 0x20200) / 8; + val = s->ioc_rope_control[index]; + break; + case 0x20040: /* IOC Rope config */ + val = s->ioc_rope_config; + break; + case 0x20050: /* IOC Rope debug */ + val = 0; + break; + case 0x20108: /* IOC STATUS_CONTROL */ + val = s->ioc_status_control; + break; + case 0x20310: /* IOC_PCOM */ + val = s->tlb_pcom; + /* TODO: flush iommu */ + break; + case 0x20400: + val = s->ioc_flush_control; + break; + /* empty placeholders for non-existent elroys */ +#define EMPTY_PORT(x) case x: case x+8: val = 0; break; \ + case x+40: case x+48: val = UINT64_MAX; break; + EMPTY_PORT(0x30000) + EMPTY_PORT(0x32000) + EMPTY_PORT(0x34000) + EMPTY_PORT(0x36000) + EMPTY_PORT(0x38000) + EMPTY_PORT(0x3a000) + EMPTY_PORT(0x3c000) + EMPTY_PORT(0x3e000) +#undef EMPTY_PORT + + default: + trace_astro_chip_read(addr, size, val); + g_assert_not_reached(); + } + + /* for 32-bit accesses mask return value */ + val = mask_32bit_val(addr, size, val); + + trace_astro_chip_read(addr, size, val); + *data = val; + return ret; +} + +static MemTxResult astro_chip_write_with_attrs(void *opaque, hwaddr addr, + uint64_t val, unsigned size, + MemTxAttrs attrs) +{ + AstroState *s = opaque; + + trace_astro_chip_write(addr, size, val); + + switch ((addr >> 3) << 3) { + case 0x0000: /* ID */ + break; + case 0x0008: /* IOC_CTRL */ + val &= 0x0ffffff; + put_val_in_int64(&s->ioc_ctrl, addr, size, val); + break; + case 0x0010: /* TOC_CLIENT_ID */ + break; + case 0x0030: /* HP-UX 10.20 and 11.11 reads it. No idea. */ + break; + case 0x0300 ... 0x03d8 - 1: /* LMMIO_DIRECT0_BASE... */ + put_val_in_arrary(s->ioc_ranges, 0x300, addr, size, val); + break; + case 0x10200: + case 0x10220: + case 0x10230: /* HP-UX 11.11 reads it. No idea. */ + break; + case 0x22108: /* IOC STATUS_CONTROL */ + put_val_in_int64(&s->ioc_status_ctrl, addr, size, val); + break; + case 0x20200 ... 0x20240 - 1: /* IOC Rope0_Control ... */ + put_val_in_arrary(s->ioc_rope_control, 0x20200, addr, size, val); + break; + case 0x20040: /* IOC Rope config */ + put_val_in_int64(&s->ioc_rope_config, addr, size, val); + break; + case 0x20300: + put_val_in_int64(&s->tlb_ibase, addr, size, val); + break; + case 0x20308: + put_val_in_int64(&s->tlb_imask, addr, size, val); + break; + case 0x20310: + put_val_in_int64(&s->tlb_pcom, addr, size, val); + /* TODO: flush iommu */ + break; + case 0x20318: + put_val_in_int64(&s->tlb_tcnfg, addr, size, val); + break; + case 0x20320: + put_val_in_int64(&s->tlb_pdir_base, addr, size, val); + break; + /* + * empty placeholders for non-existent elroys, e.g. + * func_class, pci config & data + */ +#define EMPTY_PORT(x) case x: case x+8: case x+0x40: case x+0x48: + EMPTY_PORT(0x30000) + EMPTY_PORT(0x32000) + EMPTY_PORT(0x34000) + EMPTY_PORT(0x36000) + EMPTY_PORT(0x38000) + EMPTY_PORT(0x3a000) + EMPTY_PORT(0x3c000) + EMPTY_PORT(0x3e000) + break; +#undef EMPTY_PORT + + default: + /* Controlled by astro_chip_mem_valid above. */ + trace_astro_chip_write(addr, size, val); + g_assert_not_reached(); + } + return MEMTX_OK; +} + +static const MemoryRegionOps astro_chip_ops = { + .read_with_attrs = astro_chip_read_with_attrs, + .write_with_attrs = astro_chip_write_with_attrs, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 8, + }, + .impl = { + .min_access_size = 4, + .max_access_size = 8, + }, +}; + +static const VMStateDescription vmstate_astro = { + .name = "Astro", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT64(ioc_ctrl, AstroState), + VMSTATE_UINT64(ioc_status_ctrl, AstroState), + VMSTATE_UINT64_ARRAY(ioc_ranges, AstroState, (0x03d8 - 0x300) / 8), + VMSTATE_UINT64(ioc_rope_config, AstroState), + VMSTATE_UINT64(ioc_status_control, AstroState), + VMSTATE_UINT64(ioc_flush_control, AstroState), + VMSTATE_UINT64_ARRAY(ioc_rope_control, AstroState, 8), + VMSTATE_UINT64(tlb_ibase, AstroState), + VMSTATE_UINT64(tlb_imask, AstroState), + VMSTATE_UINT64(tlb_pcom, AstroState), + VMSTATE_UINT64(tlb_tcnfg, AstroState), + VMSTATE_UINT64(tlb_pdir_base, AstroState), + VMSTATE_END_OF_LIST() + } +}; + +static void astro_reset(DeviceState *dev) +{ + AstroState *s = ASTRO_CHIP(dev); + int i; + + s->ioc_ctrl = 0x29cf; + s->ioc_rope_config = 0xc5f; + s->ioc_flush_control = 0xb03; + s->ioc_status_control = 0; + memset(&s->ioc_rope_control, 0, sizeof(s->ioc_rope_control)); + + /* + * The SBA BASE/MASK registers control CPU -> IO routing. + * The LBA BASE/MASK registers control IO -> System routing (in Elroy) + */ + memset(&s->ioc_ranges, 0, sizeof(s->ioc_ranges)); + s->ioc_ranges[(0x360 - 0x300) / 8] = LMMIO_DIST_BASE_ADDR | 0x01; /* LMMIO_DIST_BASE (SBA) */ + s->ioc_ranges[(0x368 - 0x300) / 8] = 0xfc000000; /* LMMIO_DIST_MASK */ + s->ioc_ranges[(0x370 - 0x300) / 8] = 0; /* LMMIO_DIST_ROUTE */ + s->ioc_ranges[(0x390 - 0x300) / 8] = IOS_DIST_BASE_ADDR | 0x01; /* IOS_DIST_BASE */ + s->ioc_ranges[(0x398 - 0x300) / 8] = 0xffffff0000; /* IOS_DIST_MASK */ + s->ioc_ranges[(0x3a0 - 0x300) / 8] = 0x3400000000000000ULL; /* IOS_DIST_ROUTE */ + s->ioc_ranges[(0x3c0 - 0x300) / 8] = 0xfffee00000; /* IOS_DIRECT_BASE */ + s->ioc_ranges[(0x3c8 - 0x300) / 8] = 0xffffff0000; /* IOS_DIRECT_MASK */ + s->ioc_ranges[(0x3d0 - 0x300) / 8] = 0x0; /* IOS_DIRECT_ROUTE */ + + s->tlb_ibase = 0; + s->tlb_imask = 0; + s->tlb_pcom = 0; + s->tlb_tcnfg = 0; + s->tlb_pdir_base = 0; + + for (i = 0; i < ELROY_NUM; i++) { + elroy_reset(DEVICE(s->elroy[i])); + } +} + +static void astro_init(Object *obj) +{ +} + +static void astro_realize(DeviceState *obj, Error **errp) +{ + AstroState *s = ASTRO_CHIP(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + int i; + + memory_region_init_io(&s->this_mem, OBJECT(s), &astro_chip_ops, + s, "astro", 0x40000); + sysbus_init_mmio(sbd, &s->this_mem); + + /* Host memory as seen from Elroys PCI side, via the IOMMU. */ + memory_region_init_iommu(&s->iommu, sizeof(s->iommu), + TYPE_ASTRO_IOMMU_MEMORY_REGION, OBJECT(s), + "iommu-astro", UINT64_MAX); + address_space_init(&s->iommu_as, MEMORY_REGION(&s->iommu), + "bm-pci"); + + /* Create Elroys (PCI host bus chips). */ + for (i = 0; i < ELROY_NUM; i++) { + static const int elroy_hpa_offsets[ELROY_NUM] = { + 0x30000, 0x32000, 0x38000, 0x3c000 }; + static const char elroy_rope_nr[ELROY_NUM] = { + 0, 1, 4, 6 }; /* busnum path, e.g. [10:6] */ + int addr_offset; + ElroyState *elroy; + hwaddr map_addr; + uint64_t map_size; + int rope; + + addr_offset = elroy_hpa_offsets[i]; + rope = elroy_rope_nr[i]; + + elroy = elroy_init(i); + s->elroy[i] = elroy; + elroy->hpa = ASTRO_HPA + addr_offset; + elroy->pci_bus_num = i; + elroy->astro = s; + + /* + * NOTE: we only allow PCI devices on first Elroy for now. + * SeaBIOS will not find devices on the other busses. + */ + if (i > 0) { + qbus_mark_full(&PCI_HOST_BRIDGE(elroy)->bus->qbus); + } + + /* map elroy config addresses into Astro space */ + memory_region_add_subregion(&s->this_mem, addr_offset, + &elroy->this_mem); + + /* LMMIO */ + elroy->mmio_base[(0x0200 - 0x200) / 8] = 0xf0000001; + elroy->mmio_base[(0x0208 - 0x200) / 8] = 0xf8000000; + /* GMMIO */ + elroy->mmio_base[(0x0210 - 0x200) / 8] = 0x000000f800000001; + elroy->mmio_base[(0x0218 - 0x200) / 8] = 0x000000ff80000000; + /* WLMMIO */ + elroy->mmio_base[(0x0220 - 0x200) / 8] = 0xf0000001; + elroy->mmio_base[(0x0228 - 0x200) / 8] = 0xf0000000; + /* WGMMIO */ + elroy->mmio_base[(0x0230 - 0x200) / 8] = 0x000000f800000001; + elroy->mmio_base[(0x0238 - 0x200) / 8] = 0x000000fc00000000; + /* IOS_BASE */ + map_size = IOS_DIST_BASE_SIZE / ROPES_PER_IOC; + elroy->mmio_base[(0x0240 - 0x200) / 8] = rope * map_size | 0x01; + elroy->mmio_base[(0x0248 - 0x200) / 8] = 0x0000e000; + + /* map elroys mmio */ + map_size = LMMIO_DIST_BASE_SIZE / ROPES_PER_IOC; + map_addr = (uint32_t) (LMMIO_DIST_BASE_ADDR + rope * map_size); + memory_region_init_alias(&elroy->pci_mmio_alias, OBJECT(elroy), + "pci-mmio-alias", + &elroy->pci_mmio, map_addr, map_size); + memory_region_add_subregion(get_system_memory(), map_addr, + &elroy->pci_mmio_alias); + + map_size = IOS_DIST_BASE_SIZE / ROPES_PER_IOC; + map_addr = (uint32_t) (IOS_DIST_BASE_ADDR + rope * map_size); + memory_region_add_subregion(get_system_memory(), map_addr, + &elroy->pci_io); + + /* Host memory as seen from the PCI side, via the IOMMU. */ + pci_setup_iommu(PCI_HOST_BRIDGE(elroy)->bus, elroy_pcihost_set_iommu, + elroy); + } +} + +static void astro_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->reset = astro_reset; + dc->vmsd = &vmstate_astro; + dc->realize = astro_realize; + /* + * astro with elroys are hard part of the newer PA2.0 machines and can not + * be created without that hardware + */ + dc->user_creatable = false; +} + +static const TypeInfo astro_chip_info = { + .name = TYPE_ASTRO_CHIP, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_init = astro_init, + .instance_size = sizeof(AstroState), + .class_init = astro_class_init, +}; + +static void astro_iommu_memory_region_class_init(ObjectClass *klass, + void *data) +{ + IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); + + imrc->translate = astro_translate_iommu; +} + +static const TypeInfo astro_iommu_memory_region_info = { + .parent = TYPE_IOMMU_MEMORY_REGION, + .name = TYPE_ASTRO_IOMMU_MEMORY_REGION, + .class_init = astro_iommu_memory_region_class_init, +}; + + +static void astro_register_types(void) +{ + type_register_static(&astro_chip_info); + type_register_static(&astro_iommu_memory_region_info); +} + +type_init(astro_register_types) diff --git a/hw/pci-host/bonito.c b/hw/pci-host/bonito.c index ee6cb85e97..bab661f3ce 100644 --- a/hw/pci-host/bonito.c +++ b/hw/pci-host/bonito.c @@ -654,7 +654,7 @@ static void bonito_host_realize(DeviceState *dev, Error **errp) static void bonito_pci_realize(PCIDevice *dev, Error **errp) { PCIBonitoState *s = PCI_BONITO(dev); - SysBusDevice *sysbus = SYS_BUS_DEVICE(s->pcihost); + MemoryRegion *host_mem = get_system_memory(); PCIHostState *phb = PCI_HOST_BRIDGE(s->pcihost); BonitoState *bs = s->pcihost; MemoryRegion *pcimem_alias = g_new(MemoryRegion, 1); @@ -668,48 +668,45 @@ static void bonito_pci_realize(PCIDevice *dev, Error **errp) /* set the north bridge register mapping */ memory_region_init_io(&s->iomem, OBJECT(s), &bonito_ops, s, "north-bridge-register", BONITO_INTERNAL_REG_SIZE); - sysbus_init_mmio(sysbus, &s->iomem); - sysbus_mmio_map(sysbus, 0, BONITO_INTERNAL_REG_BASE); + memory_region_add_subregion(host_mem, BONITO_INTERNAL_REG_BASE, &s->iomem); /* set the north bridge pci configure mapping */ memory_region_init_io(&phb->conf_mem, OBJECT(s), &bonito_pciconf_ops, s, "north-bridge-pci-config", BONITO_PCICONFIG_SIZE); - sysbus_init_mmio(sysbus, &phb->conf_mem); - sysbus_mmio_map(sysbus, 1, BONITO_PCICONFIG_BASE); + memory_region_add_subregion(host_mem, BONITO_PCICONFIG_BASE, + &phb->conf_mem); /* set the south bridge pci configure mapping */ memory_region_init_io(&phb->data_mem, OBJECT(s), &bonito_spciconf_ops, s, "south-bridge-pci-config", BONITO_SPCICONFIG_SIZE); - sysbus_init_mmio(sysbus, &phb->data_mem); - sysbus_mmio_map(sysbus, 2, BONITO_SPCICONFIG_BASE); + memory_region_add_subregion(host_mem, BONITO_SPCICONFIG_BASE, + &phb->data_mem); create_unimplemented_device("bonito", BONITO_REG_BASE, BONITO_REG_SIZE); memory_region_init_io(&s->iomem_ldma, OBJECT(s), &bonito_ldma_ops, s, "ldma", 0x100); - sysbus_init_mmio(sysbus, &s->iomem_ldma); - sysbus_mmio_map(sysbus, 3, 0x1fe00200); + memory_region_add_subregion(host_mem, 0x1fe00200, &s->iomem_ldma); /* PCI copier */ memory_region_init_io(&s->iomem_cop, OBJECT(s), &bonito_cop_ops, s, "cop", 0x100); - sysbus_init_mmio(sysbus, &s->iomem_cop); - sysbus_mmio_map(sysbus, 4, 0x1fe00300); + memory_region_add_subregion(host_mem, 0x1fe00300, &s->iomem_cop); create_unimplemented_device("ROMCS", BONITO_FLASH_BASE, 60 * MiB); /* Map PCI IO Space 0x1fd0 0000 - 0x1fd1 0000 */ memory_region_init_alias(&s->bonito_pciio, OBJECT(s), "isa_mmio", get_system_io(), 0, BONITO_PCIIO_SIZE); - sysbus_init_mmio(sysbus, &s->bonito_pciio); - sysbus_mmio_map(sysbus, 5, BONITO_PCIIO_BASE); + memory_region_add_subregion(host_mem, BONITO_PCIIO_BASE, + &s->bonito_pciio); /* add pci local io mapping */ memory_region_init_alias(&s->bonito_localio, OBJECT(s), "IOCS[0]", get_system_io(), 0, 256 * KiB); - sysbus_init_mmio(sysbus, &s->bonito_localio); - sysbus_mmio_map(sysbus, 6, BONITO_DEV_BASE); + memory_region_add_subregion(host_mem, BONITO_DEV_BASE, + &s->bonito_localio); create_unimplemented_device("IOCS[1]", BONITO_DEV_BASE + 1 * 256 * KiB, 256 * KiB); create_unimplemented_device("IOCS[2]", BONITO_DEV_BASE + 2 * 256 * KiB, @@ -719,8 +716,7 @@ static void bonito_pci_realize(PCIDevice *dev, Error **errp) memory_region_init_alias(pcimem_alias, NULL, "pci.mem.alias", &bs->pci_mem, 0, BONITO_PCIHI_SIZE); - memory_region_add_subregion(get_system_memory(), - BONITO_PCIHI_BASE, pcimem_alias); + memory_region_add_subregion(host_mem, BONITO_PCIHI_BASE, pcimem_alias); create_unimplemented_device("PCI_2", (hwaddr)BONITO_PCIHI_BASE + BONITO_PCIHI_SIZE, 2 * GiB); diff --git a/hw/pci-host/meson.build b/hw/pci-host/meson.build index 64eada76fe..f891f026cb 100644 --- a/hw/pci-host/meson.build +++ b/hw/pci-host/meson.build @@ -27,6 +27,7 @@ pci_ss.add(when: 'CONFIG_MV64361', if_true: files('mv64361.c')) pci_ss.add(when: 'CONFIG_VERSATILE_PCI', if_true: files('versatile.c')) # HPPA devices +pci_ss.add(when: 'CONFIG_ASTRO', if_true: files('astro.c')) pci_ss.add(when: 'CONFIG_DINO', if_true: files('dino.c')) system_ss.add_all(when: 'CONFIG_PCI', if_true: pci_ss) diff --git a/hw/pci-host/sh_pci.c b/hw/pci-host/sh_pci.c index 77e7bbc65f..4edebced5e 100644 --- a/hw/pci-host/sh_pci.c +++ b/hw/pci-host/sh_pci.c @@ -40,7 +40,7 @@ struct SHPCIState { PCIHostState parent_obj; PCIDevice *dev; - qemu_irq irq[4]; + qemu_irq irq[PCI_NUM_PINS]; MemoryRegion memconfig_p4; MemoryRegion memconfig_a7; MemoryRegion isa; @@ -116,7 +116,7 @@ static void sh_pci_set_irq(void *opaque, int irq_num, int level) qemu_set_irq(pic[irq_num], level); } -static void sh_pci_device_realize(DeviceState *dev, Error **errp) +static void sh_pcic_host_realize(DeviceState *dev, Error **errp) { SysBusDevice *sbd = SYS_BUS_DEVICE(dev); SHPCIState *s = SH_PCI_HOST_BRIDGE(dev); @@ -131,7 +131,8 @@ static void sh_pci_device_realize(DeviceState *dev, Error **errp) s->irq, get_system_memory(), get_system_io(), - PCI_DEVFN(0, 0), 4, TYPE_PCI_BUS); + PCI_DEVFN(0, 0), PCI_NUM_PINS, + TYPE_PCI_BUS); memory_region_init_io(&s->memconfig_p4, OBJECT(s), &sh_pci_reg_ops, s, "sh_pci", 0x224); memory_region_init_alias(&s->memconfig_a7, OBJECT(s), "sh_pci.2", @@ -145,19 +146,19 @@ static void sh_pci_device_realize(DeviceState *dev, Error **errp) s->dev = pci_create_simple(phb->bus, PCI_DEVFN(0, 0), "sh_pci_host"); } -static void sh_pci_host_realize(PCIDevice *d, Error **errp) +static void sh_pcic_pci_realize(PCIDevice *d, Error **errp) { pci_set_word(d->config + PCI_COMMAND, PCI_COMMAND_WAIT); pci_set_word(d->config + PCI_STATUS, PCI_STATUS_CAP_LIST | PCI_STATUS_FAST_BACK | PCI_STATUS_DEVSEL_MEDIUM); } -static void sh_pci_host_class_init(ObjectClass *klass, void *data) +static void sh_pcic_pci_class_init(ObjectClass *klass, void *data) { PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); DeviceClass *dc = DEVICE_CLASS(klass); - k->realize = sh_pci_host_realize; + k->realize = sh_pcic_pci_realize; k->vendor_id = PCI_VENDOR_ID_HITACHI; k->device_id = PCI_DEVICE_ID_HITACHI_SH7751R; /* @@ -167,35 +168,29 @@ static void sh_pci_host_class_init(ObjectClass *klass, void *data) dc->user_creatable = false; } -static const TypeInfo sh_pci_host_info = { - .name = "sh_pci_host", - .parent = TYPE_PCI_DEVICE, - .instance_size = sizeof(PCIDevice), - .class_init = sh_pci_host_class_init, - .interfaces = (InterfaceInfo[]) { - { INTERFACE_CONVENTIONAL_PCI_DEVICE }, - { }, - }, -}; - -static void sh_pci_device_class_init(ObjectClass *klass, void *data) +static void sh_pcic_host_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); - dc->realize = sh_pci_device_realize; + dc->realize = sh_pcic_host_realize; } -static const TypeInfo sh_pci_device_info = { - .name = TYPE_SH_PCI_HOST_BRIDGE, - .parent = TYPE_PCI_HOST_BRIDGE, - .instance_size = sizeof(SHPCIState), - .class_init = sh_pci_device_class_init, +static const TypeInfo sh_pcic_types[] = { + { + .name = TYPE_SH_PCI_HOST_BRIDGE, + .parent = TYPE_PCI_HOST_BRIDGE, + .instance_size = sizeof(SHPCIState), + .class_init = sh_pcic_host_class_init, + }, { + .name = "sh_pci_host", + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(PCIDevice), + .class_init = sh_pcic_pci_class_init, + .interfaces = (InterfaceInfo[]) { + { INTERFACE_CONVENTIONAL_PCI_DEVICE }, + { }, + }, + }, }; -static void sh_pci_register_types(void) -{ - type_register_static(&sh_pci_device_info); - type_register_static(&sh_pci_host_info); -} - -type_init(sh_pci_register_types) +DEFINE_TYPES(sh_pcic_types) diff --git a/hw/pci-host/trace-events b/hw/pci-host/trace-events index 9d216bb89f..b2f47e6335 100644 --- a/hw/pci-host/trace-events +++ b/hw/pci-host/trace-events @@ -46,3 +46,14 @@ pnv_phb4_xive_notify_abt(uint64_t notif_port, uint64_t data) "notif=@0x%"PRIx64" dino_chip_mem_valid(uint64_t addr, uint32_t val) "access to addr 0x%"PRIx64" is %d" dino_chip_read(uint64_t addr, uint32_t val) "addr 0x%"PRIx64" val 0x%08x" dino_chip_write(uint64_t addr, uint32_t val) "addr 0x%"PRIx64" val 0x%08x" + +# astro.c +astro_chip_mem_valid(uint64_t addr, uint32_t val) "access to addr 0x%"PRIx64" is %d" +astro_chip_read(uint64_t addr, int size, uint64_t val) "addr 0x%"PRIx64" size %d val 0x%"PRIx64 +astro_chip_write(uint64_t addr, int size, uint64_t val) "addr 0x%"PRIx64" size %d val 0x%"PRIx64 +elroy_read(uint64_t addr, int size, uint64_t val) "addr 0x%"PRIx64" size %d val 0x%"PRIx64 +elroy_write(uint64_t addr, int size, uint64_t val) "addr 0x%"PRIx64" size %d val 0x%"PRIx64 +elroy_pci_config_data_read(uint64_t addr, int size, uint64_t val) "addr 0x%"PRIx64" size %d val 0x%"PRIx64 +elroy_pci_config_data_write(uint64_t addr, int size, uint64_t val) "addr 0x%"PRIx64" size %d val 0x%"PRIx64 +iosapic_reg_write(uint64_t reg_select, int size, uint64_t val) "reg_select 0x%"PRIx64" size %d val 0x%"PRIx64 +iosapic_reg_read(uint64_t reg_select, int size, uint64_t val) "reg_select 0x%"PRIx64" size %d val 0x%"PRIx64 diff --git a/hw/pci/pci.c b/hw/pci/pci.c index b0d21bf43a..7d09e1a39d 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -500,15 +500,14 @@ bool pci_bus_bypass_iommu(PCIBus *bus) } static void pci_root_bus_internal_init(PCIBus *bus, DeviceState *parent, - MemoryRegion *address_space_mem, - MemoryRegion *address_space_io, + MemoryRegion *mem, MemoryRegion *io, uint8_t devfn_min) { assert(PCI_FUNC(devfn_min) == 0); bus->devfn_min = devfn_min; bus->slot_reserved_mask = 0x0; - bus->address_space_mem = address_space_mem; - bus->address_space_io = address_space_io; + bus->address_space_mem = mem; + bus->address_space_io = io; bus->flags |= PCI_BUS_IS_ROOT; /* host bridge */ @@ -529,25 +528,21 @@ bool pci_bus_is_express(const PCIBus *bus) void pci_root_bus_init(PCIBus *bus, size_t bus_size, DeviceState *parent, const char *name, - MemoryRegion *address_space_mem, - MemoryRegion *address_space_io, + MemoryRegion *mem, MemoryRegion *io, uint8_t devfn_min, const char *typename) { qbus_init(bus, bus_size, typename, parent, name); - pci_root_bus_internal_init(bus, parent, address_space_mem, - address_space_io, devfn_min); + pci_root_bus_internal_init(bus, parent, mem, io, devfn_min); } PCIBus *pci_root_bus_new(DeviceState *parent, const char *name, - MemoryRegion *address_space_mem, - MemoryRegion *address_space_io, + MemoryRegion *mem, MemoryRegion *io, uint8_t devfn_min, const char *typename) { PCIBus *bus; bus = PCI_BUS(qbus_new(typename, parent, name)); - pci_root_bus_internal_init(bus, parent, address_space_mem, - address_space_io, devfn_min); + pci_root_bus_internal_init(bus, parent, mem, io, devfn_min); return bus; } @@ -586,15 +581,13 @@ void pci_bus_irqs_cleanup(PCIBus *bus) PCIBus *pci_register_root_bus(DeviceState *parent, const char *name, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq, void *irq_opaque, - MemoryRegion *address_space_mem, - MemoryRegion *address_space_io, + MemoryRegion *mem, MemoryRegion *io, uint8_t devfn_min, int nirq, const char *typename) { PCIBus *bus; - bus = pci_root_bus_new(parent, name, address_space_mem, - address_space_io, devfn_min, typename); + bus = pci_root_bus_new(parent, name, mem, io, devfn_min, typename); pci_bus_irqs(bus, set_irq, irq_opaque, nirq); pci_bus_map_irqs(bus, map_irq); return bus; diff --git a/hw/ppc/pef.c b/hw/ppc/pef.c index cc44d5e339..d28ed3ba73 100644 --- a/hw/ppc/pef.c +++ b/hw/ppc/pef.c @@ -63,7 +63,7 @@ static int kvmppc_svm_init(ConfidentialGuestSupport *cgs, Error **errp) /* add migration blocker */ error_setg(&pef_mig_blocker, "PEF: Migration is not implemented"); /* NB: This can fail if --only-migratable is used */ - migrate_add_blocker(pef_mig_blocker, &error_fatal); + migrate_add_blocker(&pef_mig_blocker, &error_fatal); cgs->ready = true; diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index eb54f93986..c0e34fffbc 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -1217,10 +1217,9 @@ static void pnv_chip_icp_realize(Pnv8Chip *chip8, Error **errp) name = g_strdup_printf("icp-%x", chip->chip_id); memory_region_init(&chip8->icp_mmio, OBJECT(chip), name, PNV_ICP_SIZE); - sysbus_init_mmio(SYS_BUS_DEVICE(chip), &chip8->icp_mmio); g_free(name); - - sysbus_mmio_map(SYS_BUS_DEVICE(chip), 1, PNV_ICP_BASE(chip)); + memory_region_add_subregion(get_system_memory(), PNV_ICP_BASE(chip), + &chip8->icp_mmio); /* Map the ICP registers for each thread */ for (i = 0; i < chip->nr_cores; i++) { @@ -1249,12 +1248,7 @@ static void pnv_chip_power8_realize(DeviceState *dev, Error **errp) assert(chip8->xics); /* XSCOM bridge is first */ - pnv_xscom_realize(chip, PNV_XSCOM_SIZE, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - sysbus_mmio_map(SYS_BUS_DEVICE(chip), 0, PNV_XSCOM_BASE(chip)); + pnv_xscom_init(chip, PNV_XSCOM_SIZE, PNV_XSCOM_BASE(chip)); pcc->parent_realize(dev, &local_err); if (local_err) { @@ -1512,12 +1506,7 @@ static void pnv_chip_power9_realize(DeviceState *dev, Error **errp) Error *local_err = NULL; /* XSCOM bridge is first */ - pnv_xscom_realize(chip, PNV9_XSCOM_SIZE, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - sysbus_mmio_map(SYS_BUS_DEVICE(chip), 0, PNV9_XSCOM_BASE(chip)); + pnv_xscom_init(chip, PNV9_XSCOM_SIZE, PNV9_XSCOM_BASE(chip)); pcc->parent_realize(dev, &local_err); if (local_err) { @@ -1727,12 +1716,7 @@ static void pnv_chip_power10_realize(DeviceState *dev, Error **errp) Error *local_err = NULL; /* XSCOM bridge is first */ - pnv_xscom_realize(chip, PNV10_XSCOM_SIZE, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - sysbus_mmio_map(SYS_BUS_DEVICE(chip), 0, PNV10_XSCOM_BASE(chip)); + pnv_xscom_init(chip, PNV10_XSCOM_SIZE, PNV10_XSCOM_BASE(chip)); pcc->parent_realize(dev, &local_err); if (local_err) { diff --git a/hw/ppc/pnv_xscom.c b/hw/ppc/pnv_xscom.c index d820e05e40..805b1d0c87 100644 --- a/hw/ppc/pnv_xscom.c +++ b/hw/ppc/pnv_xscom.c @@ -221,15 +221,14 @@ const MemoryRegionOps pnv_xscom_ops = { .endianness = DEVICE_BIG_ENDIAN, }; -void pnv_xscom_realize(PnvChip *chip, uint64_t size, Error **errp) +void pnv_xscom_init(PnvChip *chip, uint64_t size, hwaddr addr) { - SysBusDevice *sbd = SYS_BUS_DEVICE(chip); char *name; name = g_strdup_printf("xscom-%x", chip->chip_id); memory_region_init_io(&chip->xscom_mmio, OBJECT(chip), &pnv_xscom_ops, chip, name, size); - sysbus_init_mmio(sbd, &chip->xscom_mmio); + memory_region_add_subregion(get_system_memory(), addr, &chip->xscom_mmio); memory_region_init(&chip->xscom, OBJECT(chip), name, size); address_space_init(&chip->xscom_as, &chip->xscom, name); diff --git a/hw/ppc/ppc440_bamboo.c b/hw/ppc/ppc440_bamboo.c index 45f409c838..a189942de4 100644 --- a/hw/ppc/ppc440_bamboo.c +++ b/hw/ppc/ppc440_bamboo.c @@ -24,7 +24,6 @@ #include "elf.h" #include "hw/char/serial.h" #include "hw/ppc/ppc.h" -#include "ppc405.h" #include "sysemu/sysemu.h" #include "sysemu/reset.h" #include "hw/sysbus.h" diff --git a/hw/ppc/ppc440_uc.c b/hw/ppc/ppc440_uc.c index 4181c843a8..7d6ca70387 100644 --- a/hw/ppc/ppc440_uc.c +++ b/hw/ppc/ppc440_uc.c @@ -73,46 +73,6 @@ typedef struct ppc4xx_l2sram_t { uint32_t isram0[11]; } ppc4xx_l2sram_t; -#ifdef MAP_L2SRAM -static void l2sram_update_mappings(ppc4xx_l2sram_t *l2sram, - uint32_t isarc, uint32_t isacntl, - uint32_t dsarc, uint32_t dsacntl) -{ - if (l2sram->isarc != isarc || - (l2sram->isacntl & 0x80000000) != (isacntl & 0x80000000)) { - if (l2sram->isacntl & 0x80000000) { - /* Unmap previously assigned memory region */ - memory_region_del_subregion(get_system_memory(), - &l2sram->isarc_ram); - } - if (isacntl & 0x80000000) { - /* Map new instruction memory region */ - memory_region_add_subregion(get_system_memory(), isarc, - &l2sram->isarc_ram); - } - } - if (l2sram->dsarc != dsarc || - (l2sram->dsacntl & 0x80000000) != (dsacntl & 0x80000000)) { - if (l2sram->dsacntl & 0x80000000) { - /* Beware not to unmap the region we just mapped */ - if (!(isacntl & 0x80000000) || l2sram->dsarc != isarc) { - /* Unmap previously assigned memory region */ - memory_region_del_subregion(get_system_memory(), - &l2sram->dsarc_ram); - } - } - if (dsacntl & 0x80000000) { - /* Beware not to remap the region we just mapped */ - if (!(isacntl & 0x80000000) || dsarc != isarc) { - /* Map new data memory region */ - memory_region_add_subregion(get_system_memory(), dsarc, - &l2sram->dsarc_ram); - } - } - } -} -#endif - static uint32_t dcr_read_l2sram(void *opaque, int dcrn) { ppc4xx_l2sram_t *l2sram = opaque; @@ -193,7 +153,6 @@ static void dcr_write_l2sram(void *opaque, int dcrn, uint32_t val) /*l2sram->isram1[dcrn - DCR_L2CACHE_BASE] = val;*/ break; } - /*l2sram_update_mappings(l2sram, isarc, isacntl, dsarc, dsacntl);*/ } static void l2sram_reset(void *opaque) @@ -203,7 +162,6 @@ static void l2sram_reset(void *opaque) memset(l2sram->l2cache, 0, sizeof(l2sram->l2cache)); l2sram->l2cache[DCR_L2CACHE_STAT - DCR_L2CACHE_BASE] = 0x80000000; memset(l2sram->isram0, 0, sizeof(l2sram->isram0)); - /*l2sram_update_mappings(l2sram, isarc, isacntl, dsarc, dsacntl);*/ } void ppc4xx_l2sram_init(CPUPPCState *env) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index cb840676d3..b25093be28 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1761,7 +1761,7 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) /* Signal all vCPUs waiting on this condition */ qemu_cond_broadcast(&spapr->fwnmi_machine_check_interlock_cond); - migrate_del_blocker(spapr->fwnmi_migration_blocker); + migrate_del_blocker(&spapr->fwnmi_migration_blocker); } static void spapr_create_nvram(SpaprMachineState *spapr) @@ -2937,13 +2937,6 @@ static void spapr_machine_init(MachineState *machine) spapr_create_lmb_dr_connectors(spapr); } - if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI) == SPAPR_CAP_ON) { - /* Create the error string for live migration blocker */ - error_setg(&spapr->fwnmi_migration_blocker, - "A machine check is being handled during migration. The handler" - "may run and log hardware error on the destination"); - } - if (mc->nvdimm_supported) { spapr_create_nvdimm_dr_connectors(spapr); } diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index 4508e40814..deb4641505 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -920,7 +920,11 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered) * fails when running with -only-migrate. A proper interface to * delay migration completion for a bit could avoid that. */ - ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, NULL); + error_setg(&spapr->fwnmi_migration_blocker, + "A machine check is being handled during migration. The handler" + "may run and log hardware error on the destination"); + + ret = migrate_add_blocker(&spapr->fwnmi_migration_blocker, NULL); if (ret == -EBUSY) { warn_report("Received a fwnmi while migration was in progress"); } diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index 7df21581c2..26c384b261 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -496,7 +496,7 @@ static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu, spapr->fwnmi_machine_check_interlock = -1; qemu_cond_signal(&spapr->fwnmi_machine_check_interlock_cond); rtas_st(rets, 0, RTAS_OUT_SUCCESS); - migrate_del_blocker(spapr->fwnmi_migration_blocker); + migrate_del_blocker(&spapr->fwnmi_migration_blocker); } static struct rtas_call { diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c index 9d4fec2c04..f8ef2b6fa8 100644 --- a/hw/ppc/spapr_vio.c +++ b/hw/ppc/spapr_vio.c @@ -574,13 +574,14 @@ SpaprVioBus *spapr_vio_bus_init(void) /* Create bridge device */ dev = qdev_new(TYPE_SPAPR_VIO_BRIDGE); - sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); /* Create bus on bridge device */ qbus = qbus_new(TYPE_SPAPR_VIO_BUS, dev, "spapr-vio"); bus = SPAPR_VIO_BUS(qbus); bus->next_reg = SPAPR_VIO_REG_BASE; + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + /* hcall-vio */ spapr_register_hypercall(H_VIO_SIGNAL, h_vio_signal); diff --git a/hw/ppc/virtex_ml507.c b/hw/ppc/virtex_ml507.c index f2f81bd425..d02f330650 100644 --- a/hw/ppc/virtex_ml507.c +++ b/hw/ppc/virtex_ml507.c @@ -43,7 +43,6 @@ #include "hw/ppc/ppc.h" #include "hw/ppc/ppc4xx.h" #include "hw/qdev-properties.h" -#include "ppc405.h" #include <libfdt.h> diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c index c6ed025982..d385d18d9c 100644 --- a/hw/rdma/vmw/pvrdma_cmd.c +++ b/hw/rdma/vmw/pvrdma_cmd.c @@ -129,23 +129,27 @@ static int query_port(PVRDMADev *dev, union pvrdma_cmd_req *req, { struct pvrdma_cmd_query_port *cmd = &req->query_port; struct pvrdma_cmd_query_port_resp *resp = &rsp->query_port_resp; - struct pvrdma_port_attr attrs = {}; + struct ibv_port_attr attrs = {}; if (cmd->port_num > MAX_PORTS) { return -EINVAL; } - if (rdma_backend_query_port(&dev->backend_dev, - (struct ibv_port_attr *)&attrs)) { + if (rdma_backend_query_port(&dev->backend_dev, &attrs)) { return -ENOMEM; } memset(resp, 0, sizeof(*resp)); - resp->attrs.state = dev->func0->device_active ? attrs.state : - PVRDMA_PORT_DOWN; - resp->attrs.max_mtu = attrs.max_mtu; - resp->attrs.active_mtu = attrs.active_mtu; + /* + * The state, max_mtu and active_mtu fields are enums; the values + * for pvrdma_port_state and pvrdma_mtu match those for + * ibv_port_state and ibv_mtu, so we can cast them safely. + */ + resp->attrs.state = dev->func0->device_active ? + (enum pvrdma_port_state)attrs.state : PVRDMA_PORT_DOWN; + resp->attrs.max_mtu = (enum pvrdma_mtu)attrs.max_mtu; + resp->attrs.active_mtu = (enum pvrdma_mtu)attrs.active_mtu; resp->attrs.phys_state = attrs.phys_state; resp->attrs.gid_tbl_len = MIN(MAX_PORT_GIDS, attrs.gid_tbl_len); resp->attrs.max_msg_sz = 1024; diff --git a/hw/remote/meson.build b/hw/remote/meson.build index a1e8708c73..a3aa29aaf1 100644 --- a/hw/remote/meson.build +++ b/hw/remote/meson.build @@ -7,9 +7,11 @@ remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('remote-obj.c')) remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('proxy.c')) remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iohub.c')) remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iommu.c')) -remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: files('vfio-user-obj.c')) remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: libvfio_user_dep) +remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: files('vfio-user-obj.c'), + if_false: files('vfio-user-obj-stub.c')) +remote_ss.add(when: 'CONFIG_ALL', if_true: files('vfio-user-obj-stub.c')) specific_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('memory.c')) specific_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('proxy-memory-listener.c')) diff --git a/hw/remote/proxy.c b/hw/remote/proxy.c index 2052d721e5..fbc85a8d36 100644 --- a/hw/remote/proxy.c +++ b/hw/remote/proxy.c @@ -107,8 +107,7 @@ static void pci_proxy_dev_realize(PCIDevice *device, Error **errp) error_setg(&dev->migration_blocker, "%s does not support migration", TYPE_PCI_PROXY_DEV); - if (migrate_add_blocker(dev->migration_blocker, errp) < 0) { - error_free(dev->migration_blocker); + if (migrate_add_blocker(&dev->migration_blocker, errp) < 0) { object_unref(dev->ioc); return; } @@ -134,9 +133,7 @@ static void pci_proxy_dev_exit(PCIDevice *pdev) qio_channel_close(dev->ioc, NULL); } - migrate_del_blocker(dev->migration_blocker); - - error_free(dev->migration_blocker); + migrate_del_blocker(&dev->migration_blocker); proxy_memory_listener_deconfigure(&dev->proxy_listener); diff --git a/stubs/vfio-user-obj.c b/hw/remote/vfio-user-obj-stub.c index 79100d768e..79100d768e 100644 --- a/stubs/vfio-user-obj.c +++ b/hw/remote/vfio-user-obj-stub.c diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c new file mode 100644 index 0000000000..f16bdf65fa --- /dev/null +++ b/hw/s390x/cpu-topology.c @@ -0,0 +1,469 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * CPU Topology + * + * Copyright IBM Corp. 2022, 2023 + * Author(s): Pierre Morel <pmorel@linux.ibm.com> + * + * S390 topology handling can be divided in two parts: + * + * - The first part in this file is taking care of all common functions + * used by KVM and TCG to create and modify the topology. + * + * - The second part, building the topology information data for the + * guest with CPU and KVM specificity will be implemented inside + * the target/s390/kvm sub tree. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "hw/qdev-properties.h" +#include "hw/boards.h" +#include "target/s390x/cpu.h" +#include "hw/s390x/s390-virtio-ccw.h" +#include "hw/s390x/cpu-topology.h" +#include "qapi/qapi-commands-machine-target.h" +#include "qapi/qapi-events-machine-target.h" + +/* + * s390_topology is used to keep the topology information. + * .cores_per_socket: tracks information on the count of cores + * per socket. + * .polarization: tracks machine polarization. + */ +S390Topology s390_topology = { + /* will be initialized after the CPU model is realized */ + .cores_per_socket = NULL, + .polarization = S390_CPU_POLARIZATION_HORIZONTAL, +}; + +/** + * s390_socket_nb: + * @cpu: s390x CPU + * + * Returns the socket number used inside the cores_per_socket array + * for a topology tree entry + */ +static int s390_socket_nb_from_ids(int drawer_id, int book_id, int socket_id) +{ + return (drawer_id * current_machine->smp.books + book_id) * + current_machine->smp.sockets + socket_id; +} + +/** + * s390_socket_nb: + * @cpu: s390x CPU + * + * Returns the socket number used inside the cores_per_socket array + * for a cpu. + */ +static int s390_socket_nb(S390CPU *cpu) +{ + return s390_socket_nb_from_ids(cpu->env.drawer_id, cpu->env.book_id, + cpu->env.socket_id); +} + +/** + * s390_has_topology: + * + * Return: true if the topology is supported by the machine. + */ +bool s390_has_topology(void) +{ + return s390_has_feat(S390_FEAT_CONFIGURATION_TOPOLOGY); +} + +/** + * s390_topology_init: + * @ms: the machine state where the machine topology is defined + * + * Keep track of the machine topology. + * + * Allocate an array to keep the count of cores per socket. + * The index of the array starts at socket 0 from book 0 and + * drawer 0 up to the maximum allowed by the machine topology. + */ +static void s390_topology_init(MachineState *ms) +{ + CpuTopology *smp = &ms->smp; + + s390_topology.cores_per_socket = g_new0(uint8_t, smp->sockets * + smp->books * smp->drawers); +} + +/* + * s390_handle_ptf: + * + * @register 1: contains the function code + * + * Function codes 0 (horizontal) and 1 (vertical) define the CPU + * polarization requested by the guest. + * + * Function code 2 is handling topology changes and is interpreted + * by the SIE. + */ +void s390_handle_ptf(S390CPU *cpu, uint8_t r1, uintptr_t ra) +{ + CpuS390Polarization polarization; + CPUS390XState *env = &cpu->env; + uint64_t reg = env->regs[r1]; + int fc = reg & S390_TOPO_FC_MASK; + + if (!s390_has_feat(S390_FEAT_CONFIGURATION_TOPOLOGY)) { + s390_program_interrupt(env, PGM_OPERATION, ra); + return; + } + + if (env->psw.mask & PSW_MASK_PSTATE) { + s390_program_interrupt(env, PGM_PRIVILEGED, ra); + return; + } + + if (reg & ~S390_TOPO_FC_MASK) { + s390_program_interrupt(env, PGM_SPECIFICATION, ra); + return; + } + + polarization = S390_CPU_POLARIZATION_VERTICAL; + switch (fc) { + case 0: + polarization = S390_CPU_POLARIZATION_HORIZONTAL; + /* fallthrough */ + case 1: + if (s390_topology.polarization == polarization) { + env->regs[r1] |= S390_PTF_REASON_DONE; + setcc(cpu, 2); + } else { + s390_topology.polarization = polarization; + s390_cpu_topology_set_changed(true); + qapi_event_send_cpu_polarization_change(polarization); + setcc(cpu, 0); + } + break; + default: + /* Note that fc == 2 is interpreted by the SIE */ + s390_program_interrupt(env, PGM_SPECIFICATION, ra); + } +} + +/** + * s390_topology_reset: + * + * Generic reset for CPU topology, calls s390_topology_reset() + * to reset the kernel Modified Topology Change Record. + */ +void s390_topology_reset(void) +{ + s390_cpu_topology_set_changed(false); + s390_topology.polarization = S390_CPU_POLARIZATION_HORIZONTAL; +} + +/** + * s390_topology_cpu_default: + * @cpu: pointer to a S390CPU + * @errp: Error pointer + * + * Setup the default topology if no attributes are already set. + * Passing a CPU with some, but not all, attributes set is considered + * an error. + * + * The function calculates the (drawer_id, book_id, socket_id) + * topology by filling the cores starting from the first socket + * (0, 0, 0) up to the last (smp->drawers, smp->books, smp->sockets). + * + * CPU type and dedication have defaults values set in the + * s390x_cpu_properties, entitlement must be adjust depending on the + * dedication. + * + * Returns false if it is impossible to setup a default topology + * true otherwise. + */ +static bool s390_topology_cpu_default(S390CPU *cpu, Error **errp) +{ + CpuTopology *smp = ¤t_machine->smp; + CPUS390XState *env = &cpu->env; + + /* All geometry topology attributes must be set or all unset */ + if ((env->socket_id < 0 || env->book_id < 0 || env->drawer_id < 0) && + (env->socket_id >= 0 || env->book_id >= 0 || env->drawer_id >= 0)) { + error_setg(errp, + "Please define all or none of the topology geometry attributes"); + return false; + } + + /* If one value is unset all are unset -> calculate defaults */ + if (env->socket_id < 0) { + env->socket_id = s390_std_socket(env->core_id, smp); + env->book_id = s390_std_book(env->core_id, smp); + env->drawer_id = s390_std_drawer(env->core_id, smp); + } + + /* + * When the user specifies the entitlement as 'auto' on the command line, + * QEMU will set the entitlement as: + * Medium when the CPU is not dedicated. + * High when dedicated is true. + */ + if (env->entitlement == S390_CPU_ENTITLEMENT_AUTO) { + if (env->dedicated) { + env->entitlement = S390_CPU_ENTITLEMENT_HIGH; + } else { + env->entitlement = S390_CPU_ENTITLEMENT_MEDIUM; + } + } + return true; +} + +/** + * s390_topology_check: + * @socket_id: socket to check + * @book_id: book to check + * @drawer_id: drawer to check + * @entitlement: entitlement to check + * @dedicated: dedication to check + * @errp: Error pointer + * + * The function checks if the topology + * attributes fits inside the system topology. + * + * Returns false if the specified topology does not match with + * the machine topology. + */ +static bool s390_topology_check(uint16_t socket_id, uint16_t book_id, + uint16_t drawer_id, uint16_t entitlement, + bool dedicated, Error **errp) +{ + CpuTopology *smp = ¤t_machine->smp; + + if (socket_id >= smp->sockets) { + error_setg(errp, "Unavailable socket: %d", socket_id); + return false; + } + if (book_id >= smp->books) { + error_setg(errp, "Unavailable book: %d", book_id); + return false; + } + if (drawer_id >= smp->drawers) { + error_setg(errp, "Unavailable drawer: %d", drawer_id); + return false; + } + if (entitlement >= S390_CPU_ENTITLEMENT__MAX) { + error_setg(errp, "Unknown entitlement: %d", entitlement); + return false; + } + if (dedicated && (entitlement == S390_CPU_ENTITLEMENT_LOW || + entitlement == S390_CPU_ENTITLEMENT_MEDIUM)) { + error_setg(errp, "A dedicated CPU implies high entitlement"); + return false; + } + return true; +} + +/** + * s390_topology_need_report + * @cpu: Current cpu + * @drawer_id: future drawer ID + * @book_id: future book ID + * @socket_id: future socket ID + * @entitlement: future entitlement + * @dedicated: future dedicated + * + * A modified topology change report is needed if the topology + * tree or the topology attributes change. + */ +static bool s390_topology_need_report(S390CPU *cpu, int drawer_id, + int book_id, int socket_id, + uint16_t entitlement, bool dedicated) +{ + return cpu->env.drawer_id != drawer_id || + cpu->env.book_id != book_id || + cpu->env.socket_id != socket_id || + cpu->env.entitlement != entitlement || + cpu->env.dedicated != dedicated; +} + +/** + * s390_update_cpu_props: + * @ms: the machine state + * @cpu: the CPU for which to update the properties from the environment. + * + */ +static void s390_update_cpu_props(MachineState *ms, S390CPU *cpu) +{ + CpuInstanceProperties *props; + + props = &ms->possible_cpus->cpus[cpu->env.core_id].props; + + props->socket_id = cpu->env.socket_id; + props->book_id = cpu->env.book_id; + props->drawer_id = cpu->env.drawer_id; +} + +/** + * s390_topology_setup_cpu: + * @ms: MachineState used to initialize the topology structure on + * first call. + * @cpu: the new S390CPU to insert in the topology structure + * @errp: the error pointer + * + * Called from CPU hotplug to check and setup the CPU attributes + * before the CPU is inserted in the topology. + * There is no need to update the MTCR explicitly here because it + * will be updated by KVM on creation of the new CPU. + */ +void s390_topology_setup_cpu(MachineState *ms, S390CPU *cpu, Error **errp) +{ + int entry; + + /* + * We do not want to initialize the topology if the CPU model + * does not support topology, consequently, we have to wait for + * the first CPU to be realized, which realizes the CPU model + * to initialize the topology structures. + * + * s390_topology_setup_cpu() is called from the CPU hotplug. + */ + if (!s390_topology.cores_per_socket) { + s390_topology_init(ms); + } + + if (!s390_topology_cpu_default(cpu, errp)) { + return; + } + + if (!s390_topology_check(cpu->env.socket_id, cpu->env.book_id, + cpu->env.drawer_id, cpu->env.entitlement, + cpu->env.dedicated, errp)) { + return; + } + + /* Do we still have space in the socket */ + entry = s390_socket_nb(cpu); + if (s390_topology.cores_per_socket[entry] >= ms->smp.cores) { + error_setg(errp, "No more space on this socket"); + return; + } + + /* Update the count of cores in sockets */ + s390_topology.cores_per_socket[entry] += 1; + + /* topology tree is reflected in props */ + s390_update_cpu_props(ms, cpu); +} + +static void s390_change_topology(uint16_t core_id, + bool has_socket_id, uint16_t socket_id, + bool has_book_id, uint16_t book_id, + bool has_drawer_id, uint16_t drawer_id, + bool has_entitlement, + CpuS390Entitlement entitlement, + bool has_dedicated, bool dedicated, + Error **errp) +{ + MachineState *ms = current_machine; + int old_socket_entry; + int new_socket_entry; + bool report_needed; + S390CPU *cpu; + + cpu = s390_cpu_addr2state(core_id); + if (!cpu) { + error_setg(errp, "Core-id %d does not exist!", core_id); + return; + } + + /* Get attributes not provided from cpu and verify the new topology */ + if (!has_socket_id) { + socket_id = cpu->env.socket_id; + } + if (!has_book_id) { + book_id = cpu->env.book_id; + } + if (!has_drawer_id) { + drawer_id = cpu->env.drawer_id; + } + if (!has_dedicated) { + dedicated = cpu->env.dedicated; + } + + /* + * When the user specifies the entitlement as 'auto' on the command line, + * QEMU will set the entitlement as: + * Medium when the CPU is not dedicated. + * High when dedicated is true. + */ + if (!has_entitlement || entitlement == S390_CPU_ENTITLEMENT_AUTO) { + if (dedicated) { + entitlement = S390_CPU_ENTITLEMENT_HIGH; + } else { + entitlement = S390_CPU_ENTITLEMENT_MEDIUM; + } + } + + if (!s390_topology_check(socket_id, book_id, drawer_id, + entitlement, dedicated, errp)) { + return; + } + + /* Check for space on new socket */ + old_socket_entry = s390_socket_nb(cpu); + new_socket_entry = s390_socket_nb_from_ids(drawer_id, book_id, socket_id); + + if (new_socket_entry != old_socket_entry) { + if (s390_topology.cores_per_socket[new_socket_entry] >= + ms->smp.cores) { + error_setg(errp, "No more space on this socket"); + return; + } + /* Update the count of cores in sockets */ + s390_topology.cores_per_socket[new_socket_entry] += 1; + s390_topology.cores_per_socket[old_socket_entry] -= 1; + } + + /* Check if we will need to report the modified topology */ + report_needed = s390_topology_need_report(cpu, drawer_id, book_id, + socket_id, entitlement, + dedicated); + + /* All checks done, report new topology into the vCPU */ + cpu->env.drawer_id = drawer_id; + cpu->env.book_id = book_id; + cpu->env.socket_id = socket_id; + cpu->env.dedicated = dedicated; + cpu->env.entitlement = entitlement; + + /* topology tree is reflected in props */ + s390_update_cpu_props(ms, cpu); + + /* Advertise the topology change */ + if (report_needed) { + s390_cpu_topology_set_changed(true); + } +} + +void qmp_set_cpu_topology(uint16_t core, + bool has_socket, uint16_t socket, + bool has_book, uint16_t book, + bool has_drawer, uint16_t drawer, + bool has_entitlement, CpuS390Entitlement entitlement, + bool has_dedicated, bool dedicated, + Error **errp) +{ + if (!s390_has_topology()) { + error_setg(errp, "This machine doesn't support topology"); + return; + } + + s390_change_topology(core, has_socket, socket, has_book, book, + has_drawer, drawer, has_entitlement, entitlement, + has_dedicated, dedicated, errp); +} + +CpuPolarizationInfo *qmp_query_s390x_cpu_polarization(Error **errp) +{ + CpuPolarizationInfo *info = g_new0(CpuPolarizationInfo, 1); + + info->polarization = s390_topology.polarization; + return info; +} diff --git a/hw/s390x/css-bridge.c b/hw/s390x/css-bridge.c index 4017081d49..15d26efc95 100644 --- a/hw/s390x/css-bridge.c +++ b/hw/s390x/css-bridge.c @@ -95,7 +95,6 @@ static const TypeInfo virtual_css_bus_info = { VirtualCssBus *virtual_css_bus_init(void) { - VirtualCssBus *cbus; BusState *bus; DeviceState *dev; @@ -103,19 +102,19 @@ VirtualCssBus *virtual_css_bus_init(void) dev = qdev_new(TYPE_VIRTUAL_CSS_BRIDGE); object_property_add_child(qdev_get_machine(), TYPE_VIRTUAL_CSS_BRIDGE, OBJECT(dev)); - sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); /* Create bus on bridge device */ bus = qbus_new(TYPE_VIRTUAL_CSS_BUS, dev, "virtual-css"); - cbus = VIRTUAL_CSS_BUS(bus); /* Enable hotplugging */ qbus_set_hotplug_handler(bus, OBJECT(dev)); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + css_register_io_adapters(CSS_IO_ADAPTER_VIRTIO, true, false, 0, &error_abort); - return cbus; + return VIRTUAL_CSS_BUS(bus); } /***************** Virtual-css Bus Bridge Device ********************/ diff --git a/hw/s390x/meson.build b/hw/s390x/meson.build index 6fd096813a..482fd13420 100644 --- a/hw/s390x/meson.build +++ b/hw/s390x/meson.build @@ -23,6 +23,7 @@ s390x_ss.add(when: 'CONFIG_KVM', if_true: files( 's390-skeys-kvm.c', 's390-stattrib-kvm.c', 's390-pci-kvm.c', + 'cpu-topology.c', )) s390x_ss.add(when: 'CONFIG_TCG', if_true: files( 'tod-tcg.c', diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 2d75f2131f..7262725d2e 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -45,6 +45,7 @@ #include "target/s390x/kvm/pv.h" #include "migration/blocker.h" #include "qapi/visitor.h" +#include "hw/s390x/cpu-topology.h" static Error *pv_mig_blocker; @@ -123,6 +124,9 @@ static void subsystem_reset(void) device_cold_reset(dev); } } + if (s390_has_topology()) { + s390_topology_reset(); + } } static int virtio_ccw_hcall_notify(const uint64_t *args) @@ -309,10 +313,18 @@ static void s390_cpu_plug(HotplugHandler *hotplug_dev, { MachineState *ms = MACHINE(hotplug_dev); S390CPU *cpu = S390_CPU(dev); + ERRP_GUARD(); g_assert(!ms->possible_cpus->cpus[cpu->env.core_id].cpu); ms->possible_cpus->cpus[cpu->env.core_id].cpu = OBJECT(dev); + if (s390_has_topology()) { + s390_topology_setup_cpu(ms, cpu, errp); + if (*errp) { + return; + } + } + if (dev->hotplugged) { raise_irq_cpu_hotplug(); } @@ -332,8 +344,7 @@ static void s390_machine_unprotect(S390CcwMachineState *ms) s390_pv_vm_disable(); } ms->pv = false; - migrate_del_blocker(pv_mig_blocker); - error_free_or_abort(&pv_mig_blocker); + migrate_del_blocker(&pv_mig_blocker); ram_block_discard_disable(false); } @@ -356,11 +367,10 @@ static int s390_machine_protect(S390CcwMachineState *ms) error_setg(&pv_mig_blocker, "protected VMs are currently not migratable."); - rc = migrate_add_blocker(pv_mig_blocker, &local_err); + rc = migrate_add_blocker(&pv_mig_blocker, &local_err); if (rc) { ram_block_discard_disable(false); error_report_err(local_err); - error_free_or_abort(&pv_mig_blocker); return rc; } @@ -368,8 +378,7 @@ static int s390_machine_protect(S390CcwMachineState *ms) rc = s390_pv_vm_enable(); if (rc) { ram_block_discard_disable(false); - migrate_del_blocker(pv_mig_blocker); - error_free_or_abort(&pv_mig_blocker); + migrate_del_blocker(&pv_mig_blocker); return rc; } @@ -562,11 +571,20 @@ static const CPUArchIdList *s390_possible_cpu_arch_ids(MachineState *ms) sizeof(CPUArchId) * max_cpus); ms->possible_cpus->len = max_cpus; for (i = 0; i < ms->possible_cpus->len; i++) { + CpuInstanceProperties *props = &ms->possible_cpus->cpus[i].props; + ms->possible_cpus->cpus[i].type = ms->cpu_type; ms->possible_cpus->cpus[i].vcpus_count = 1; ms->possible_cpus->cpus[i].arch_id = i; - ms->possible_cpus->cpus[i].props.has_core_id = true; - ms->possible_cpus->cpus[i].props.core_id = i; + + props->has_core_id = true; + props->core_id = i; + props->has_socket_id = true; + props->socket_id = s390_std_socket(i, &ms->smp); + props->has_book_id = true; + props->book_id = s390_std_book(i, &ms->smp); + props->has_drawer_id = true; + props->drawer_id = s390_std_drawer(i, &ms->smp); } return ms->possible_cpus; @@ -744,6 +762,8 @@ static void ccw_machine_class_init(ObjectClass *oc, void *data) mc->no_sdcard = 1; mc->max_cpus = S390_MAX_CPUS; mc->has_hotpluggable_cpus = true; + mc->smp_props.books_supported = true; + mc->smp_props.drawers_supported = true; assert(!mc->get_hotplug_handler); mc->get_hotplug_handler = s390_get_hotplug_handler; mc->cpu_index_to_instance_props = s390_cpu_index_to_props; @@ -853,6 +873,8 @@ static void ccw_machine_8_1_class_options(MachineClass *mc) { ccw_machine_8_2_class_options(mc); compat_props_add(mc->compat_props, hw_compat_8_1, hw_compat_8_1_len); + mc->smp_props.drawers_supported = false; + mc->smp_props.books_supported = false; } DEFINE_CCW_MACHINE(8_1, "8.1", false); diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c index eff74479f4..d339cbb7e4 100644 --- a/hw/s390x/sclp.c +++ b/hw/s390x/sclp.c @@ -20,6 +20,7 @@ #include "hw/s390x/event-facility.h" #include "hw/s390x/s390-pci-bus.h" #include "hw/s390x/ipl.h" +#include "hw/s390x/cpu-topology.h" static inline SCLPDevice *get_sclp_device(void) { @@ -123,6 +124,10 @@ static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb) return; } + if (s390_has_topology()) { + read_info->stsi_parm = SCLP_READ_SCP_INFO_MNEST; + } + /* CPU information */ prepare_cpu_entries(machine, entries_start, &cpu_count); read_info->entries_cpu = cpu_to_be16(cpu_count); diff --git a/hw/s390x/sclpquiesce.c b/hw/s390x/sclpquiesce.c index ce07b16884..a641089929 100644 --- a/hw/s390x/sclpquiesce.c +++ b/hw/s390x/sclpquiesce.c @@ -78,12 +78,10 @@ static const VMStateDescription vmstate_sclpquiesce = { } }; -typedef struct QuiesceNotifier QuiesceNotifier; - -static struct QuiesceNotifier { +typedef struct QuiesceNotifier { Notifier notifier; SCLPEvent *event; -} qn; +} QuiesceNotifier; static void quiesce_powerdown_req(Notifier *n, void *opaque) { @@ -97,6 +95,8 @@ static void quiesce_powerdown_req(Notifier *n, void *opaque) static int quiesce_init(SCLPEvent *event) { + static QuiesceNotifier qn; + qn.notifier.notify = quiesce_powerdown_req; qn.event = event; diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index 95cadb93e7..5d9e06a9bb 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -210,7 +210,7 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp) "When external environment supports it (Orchestrator migrates " "target SCSI device state or use shared storage over network), " "set 'migratable' property to true to enable migration."); - if (migrate_add_blocker(vsc->migration_blocker, errp) < 0) { + if (migrate_add_blocker(&vsc->migration_blocker, errp) < 0) { goto free_virtio; } } @@ -243,10 +243,9 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp) free_vqs: g_free(vqs); if (!vsc->migratable) { - migrate_del_blocker(vsc->migration_blocker); + migrate_del_blocker(&vsc->migration_blocker); } free_virtio: - error_free(vsc->migration_blocker); virtio_scsi_common_unrealize(dev); close_fd: if (vhostfd >= 0) { @@ -262,8 +261,7 @@ static void vhost_scsi_unrealize(DeviceState *dev) struct vhost_virtqueue *vqs = vsc->dev.vqs; if (!vsc->migratable) { - migrate_del_blocker(vsc->migration_blocker); - error_free(vsc->migration_blocker); + migrate_del_blocker(&vsc->migration_blocker); } /* This will stop vhost backend. */ diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 45b95ea070..fa53f0902c 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -761,7 +761,7 @@ static void virtio_scsi_fail_cmd_req(VirtIOSCSIReq *req) static int virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq *req) { - VirtIOSCSICommon *vs = &s->parent_obj; + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); SCSIDevice *d; int rc; diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c index 5564765a9b..40473b0db0 100644 --- a/hw/sd/sdhci.c +++ b/hw/sd/sdhci.c @@ -321,6 +321,8 @@ static void sdhci_poweron_reset(DeviceState *dev) static void sdhci_data_transfer(void *opaque); +#define BLOCK_SIZE_MASK (4 * KiB - 1) + static void sdhci_send_command(SDHCIState *s) { SDRequest request; @@ -371,7 +373,8 @@ static void sdhci_send_command(SDHCIState *s) sdhci_update_irq(s); - if (!timeout && s->blksize && (s->cmdreg & SDHC_CMD_DATA_PRESENT)) { + if (!timeout && (s->blksize & BLOCK_SIZE_MASK) && + (s->cmdreg & SDHC_CMD_DATA_PRESENT)) { s->data_count = 0; sdhci_data_transfer(s); } @@ -406,7 +409,6 @@ static void sdhci_end_transfer(SDHCIState *s) /* * Programmed i/o data transfer */ -#define BLOCK_SIZE_MASK (4 * KiB - 1) /* Fill host controller's read buffer with BLKSIZE bytes of data from card */ static void sdhci_read_block_from_card(SDHCIState *s) @@ -1154,7 +1156,8 @@ sdhci_write(void *opaque, hwaddr offset, uint64_t val, unsigned size) s->sdmasysad = (s->sdmasysad & mask) | value; MASKED_WRITE(s->sdmasysad, mask, value); /* Writing to last byte of sdmasysad might trigger transfer */ - if (!(mask & 0xFF000000) && s->blkcnt && s->blksize && + if (!(mask & 0xFF000000) && s->blkcnt && + (s->blksize & BLOCK_SIZE_MASK) && SDHC_DMA_TYPE(s->hostctl1) == SDHC_CTRL_SDMA) { if (s->trnmod & SDHC_TRNS_MULTI) { sdhci_sdma_transfer_multi_blocks(s); @@ -1168,7 +1171,11 @@ sdhci_write(void *opaque, hwaddr offset, uint64_t val, unsigned size) if (!TRANSFERRING_DATA(s->prnsts)) { uint16_t blksize = s->blksize; - MASKED_WRITE(s->blksize, mask, extract32(value, 0, 12)); + /* + * [14:12] SDMA Buffer Boundary + * [11:00] Transfer Block Size + */ + MASKED_WRITE(s->blksize, mask, extract32(value, 0, 15)); MASKED_WRITE(s->blkcnt, mask >> 16, value >> 16); /* Limit block size to the maximum buffer size */ diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c index d908a38f73..c871170378 100644 --- a/hw/sparc64/sun4u.c +++ b/hw/sparc64/sun4u.c @@ -360,11 +360,11 @@ static void ebus_realize(PCIDevice *pci_dev, Error **errp) pci_dev->config[0x09] = 0x00; // programming i/f pci_dev->config[0x0D] = 0x0a; // latency_timer - memory_region_init_alias(&s->bar0, OBJECT(s), "bar0", get_system_io(), - 0, 0x1000000); + memory_region_init_alias(&s->bar0, OBJECT(s), "bar0", + pci_address_space_io(pci_dev), 0, 0x1000000); pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar0); - memory_region_init_alias(&s->bar1, OBJECT(s), "bar1", get_system_io(), - 0, 0x8000); + memory_region_init_alias(&s->bar1, OBJECT(s), "bar1", + pci_address_space_io(pci_dev), 0, 0x8000); pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &s->bar1); } diff --git a/hw/timer/npcm7xx_timer.c b/hw/timer/npcm7xx_timer.c index 32f5e021f8..a8bd93aeb2 100644 --- a/hw/timer/npcm7xx_timer.c +++ b/hw/timer/npcm7xx_timer.c @@ -138,6 +138,9 @@ static int64_t npcm7xx_timer_count_to_ns(NPCM7xxTimer *t, uint32_t count) /* Convert a time interval in nanoseconds to a timer cycle count. */ static uint32_t npcm7xx_timer_ns_to_count(NPCM7xxTimer *t, int64_t ns) { + if (ns < 0) { + return 0; + } return clock_ns_to_ticks(t->ctrl->clock, ns) / npcm7xx_tcsr_prescaler(t->tcsr); } diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c index 6e21d1da5a..5f257bffb9 100644 --- a/hw/vfio/ap.c +++ b/hw/vfio/ap.c @@ -53,40 +53,6 @@ struct VFIODeviceOps vfio_ap_ops = { .vfio_compute_needs_reset = vfio_ap_compute_needs_reset, }; -static void vfio_ap_put_device(VFIOAPDevice *vapdev) -{ - g_free(vapdev->vdev.name); - vfio_put_base_device(&vapdev->vdev); -} - -static VFIOGroup *vfio_ap_get_group(VFIOAPDevice *vapdev, Error **errp) -{ - GError *gerror = NULL; - char *symlink, *group_path; - int groupid; - - symlink = g_strdup_printf("%s/iommu_group", vapdev->vdev.sysfsdev); - group_path = g_file_read_link(symlink, &gerror); - g_free(symlink); - - if (!group_path) { - error_setg(errp, "%s: no iommu_group found for %s: %s", - TYPE_VFIO_AP_DEVICE, vapdev->vdev.sysfsdev, gerror->message); - g_error_free(gerror); - return NULL; - } - - if (sscanf(basename(group_path), "%d", &groupid) != 1) { - error_setg(errp, "vfio: failed to read %s", group_path); - g_free(group_path); - return NULL; - } - - g_free(group_path); - - return vfio_get_group(groupid, &address_space_memory, errp); -} - static void vfio_ap_req_notifier_handler(void *opaque) { VFIOAPDevice *vapdev = opaque; @@ -189,22 +155,14 @@ static void vfio_ap_unregister_irq_notifier(VFIOAPDevice *vapdev, static void vfio_ap_realize(DeviceState *dev, Error **errp) { int ret; - char *mdevid; Error *err = NULL; - VFIOGroup *vfio_group; - APDevice *apdev = AP_DEVICE(dev); - VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev); + VFIOAPDevice *vapdev = VFIO_AP_DEVICE(dev); + VFIODevice *vbasedev = &vapdev->vdev; - vfio_group = vfio_ap_get_group(vapdev, errp); - if (!vfio_group) { - return; - } - - vapdev->vdev.ops = &vfio_ap_ops; - vapdev->vdev.type = VFIO_DEVICE_TYPE_AP; - mdevid = basename(vapdev->vdev.sysfsdev); - vapdev->vdev.name = g_strdup_printf("%s", mdevid); - vapdev->vdev.dev = dev; + vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); + vbasedev->ops = &vfio_ap_ops; + vbasedev->type = VFIO_DEVICE_TYPE_AP; + vbasedev->dev = dev; /* * vfio-ap devices operate in a way compatible with discarding of @@ -214,9 +172,10 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) */ vapdev->vdev.ram_block_discard_allowed = true; - ret = vfio_get_device(vfio_group, mdevid, &vapdev->vdev, errp); + ret = vfio_attach_device(vbasedev->name, vbasedev, + &address_space_memory, errp); if (ret) { - goto out_get_dev_err; + goto error; } vfio_ap_register_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX, &err); @@ -230,20 +189,18 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) return; -out_get_dev_err: - vfio_ap_put_device(vapdev); - vfio_put_group(vfio_group); +error: + error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name); + g_free(vbasedev->name); } static void vfio_ap_unrealize(DeviceState *dev) { - APDevice *apdev = AP_DEVICE(dev); - VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev); - VFIOGroup *group = vapdev->vdev.group; + VFIOAPDevice *vapdev = VFIO_AP_DEVICE(dev); vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX); - vfio_ap_put_device(vapdev); - vfio_put_group(group); + vfio_detach_device(&vapdev->vdev); + g_free(vapdev->vdev.name); } static Property vfio_ap_properties[] = { @@ -254,8 +211,7 @@ static Property vfio_ap_properties[] = { static void vfio_ap_reset(DeviceState *dev) { int ret; - APDevice *apdev = AP_DEVICE(dev); - VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev); + VFIOAPDevice *vapdev = VFIO_AP_DEVICE(dev); ret = ioctl(vapdev->vdev.fd, VFIO_DEVICE_RESET); if (ret) { diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c index 1e2fce83b0..6623ae237b 100644 --- a/hw/vfio/ccw.c +++ b/hw/vfio/ccw.c @@ -572,88 +572,14 @@ static void vfio_ccw_put_region(VFIOCCWDevice *vcdev) g_free(vcdev->io_region); } -static void vfio_ccw_put_device(VFIOCCWDevice *vcdev) -{ - g_free(vcdev->vdev.name); - vfio_put_base_device(&vcdev->vdev); -} - -static void vfio_ccw_get_device(VFIOGroup *group, VFIOCCWDevice *vcdev, - Error **errp) -{ - S390CCWDevice *cdev = S390_CCW_DEVICE(vcdev); - char *name = g_strdup_printf("%x.%x.%04x", cdev->hostid.cssid, - cdev->hostid.ssid, - cdev->hostid.devid); - VFIODevice *vbasedev; - - QLIST_FOREACH(vbasedev, &group->device_list, next) { - if (strcmp(vbasedev->name, name) == 0) { - error_setg(errp, "vfio: subchannel %s has already been attached", - name); - goto out_err; - } - } - - /* - * All vfio-ccw devices are believed to operate in a way compatible with - * discarding of memory in RAM blocks, ie. pages pinned in the host are - * in the current working set of the guest driver and therefore never - * overlap e.g., with pages available to the guest balloon driver. This - * needs to be set before vfio_get_device() for vfio common to handle - * ram_block_discard_disable(). - */ - vcdev->vdev.ram_block_discard_allowed = true; - - if (vfio_get_device(group, cdev->mdevid, &vcdev->vdev, errp)) { - goto out_err; - } - - vcdev->vdev.ops = &vfio_ccw_ops; - vcdev->vdev.type = VFIO_DEVICE_TYPE_CCW; - vcdev->vdev.name = name; - vcdev->vdev.dev = DEVICE(vcdev); - - return; - -out_err: - g_free(name); -} - -static VFIOGroup *vfio_ccw_get_group(S390CCWDevice *cdev, Error **errp) -{ - char *tmp, group_path[PATH_MAX]; - ssize_t len; - int groupid; - - tmp = g_strdup_printf("/sys/bus/css/devices/%x.%x.%04x/%s/iommu_group", - cdev->hostid.cssid, cdev->hostid.ssid, - cdev->hostid.devid, cdev->mdevid); - len = readlink(tmp, group_path, sizeof(group_path)); - g_free(tmp); - - if (len <= 0 || len >= sizeof(group_path)) { - error_setg(errp, "vfio: no iommu_group found"); - return NULL; - } - - group_path[len] = 0; - - if (sscanf(basename(group_path), "%d", &groupid) != 1) { - error_setg(errp, "vfio: failed to read %s", group_path); - return NULL; - } - - return vfio_get_group(groupid, &address_space_memory, errp); -} - static void vfio_ccw_realize(DeviceState *dev, Error **errp) { - VFIOGroup *group; S390CCWDevice *cdev = S390_CCW_DEVICE(dev); VFIOCCWDevice *vcdev = VFIO_CCW(cdev); S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev); + VFIODevice *vbasedev = &vcdev->vdev; Error *err = NULL; + int ret; /* Call the class init function for subchannel. */ if (cdc->realize) { @@ -663,14 +589,27 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp) } } - group = vfio_ccw_get_group(cdev, &err); - if (!group) { - goto out_group_err; - } + vbasedev->ops = &vfio_ccw_ops; + vbasedev->type = VFIO_DEVICE_TYPE_CCW; + vbasedev->name = g_strdup_printf("%x.%x.%04x", vcdev->cdev.hostid.cssid, + vcdev->cdev.hostid.ssid, + vcdev->cdev.hostid.devid); + vbasedev->dev = dev; - vfio_ccw_get_device(group, vcdev, &err); - if (err) { - goto out_device_err; + /* + * All vfio-ccw devices are believed to operate in a way compatible with + * discarding of memory in RAM blocks, ie. pages pinned in the host are + * in the current working set of the guest driver and therefore never + * overlap e.g., with pages available to the guest balloon driver. This + * needs to be set before vfio_get_device() for vfio common to handle + * ram_block_discard_disable(). + */ + vbasedev->ram_block_discard_allowed = true; + + ret = vfio_attach_device(cdev->mdevid, vbasedev, + &address_space_memory, errp); + if (ret) { + goto out_attach_dev_err; } vfio_ccw_get_region(vcdev, &err); @@ -708,10 +647,9 @@ out_irq_notifier_err: out_io_notifier_err: vfio_ccw_put_region(vcdev); out_region_err: - vfio_ccw_put_device(vcdev); -out_device_err: - vfio_put_group(group); -out_group_err: + vfio_detach_device(vbasedev); +out_attach_dev_err: + g_free(vbasedev->name); if (cdc->unrealize) { cdc->unrealize(cdev); } @@ -724,14 +662,13 @@ static void vfio_ccw_unrealize(DeviceState *dev) S390CCWDevice *cdev = S390_CCW_DEVICE(dev); VFIOCCWDevice *vcdev = VFIO_CCW(cdev); S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev); - VFIOGroup *group = vcdev->vdev.group; vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_REQ_IRQ_INDEX); vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX); vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX); vfio_ccw_put_region(vcdev); - vfio_ccw_put_device(vcdev); - vfio_put_group(group); + vfio_detach_device(&vcdev->vdev); + g_free(vcdev->vdev.name); if (cdc->unrealize) { cdc->unrealize(cdev); diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 134649226d..d806057b40 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -46,8 +46,8 @@ #include "migration/qemu-file.h" #include "sysemu/tpm.h" -VFIOGroupList vfio_group_list = - QLIST_HEAD_INITIALIZER(vfio_group_list); +VFIODeviceList vfio_device_list = + QLIST_HEAD_INITIALIZER(vfio_device_list); static QLIST_HEAD(, VFIOAddressSpace) vfio_address_spaces = QLIST_HEAD_INITIALIZER(vfio_address_spaces); @@ -59,304 +59,24 @@ static QLIST_HEAD(, VFIOAddressSpace) vfio_address_spaces = * initialized, this file descriptor is only released on QEMU exit and * we'll re-use it should another vfio device be attached before then. */ -static int vfio_kvm_device_fd = -1; +int vfio_kvm_device_fd = -1; #endif /* - * Common VFIO interrupt disable - */ -void vfio_disable_irqindex(VFIODevice *vbasedev, int index) -{ - struct vfio_irq_set irq_set = { - .argsz = sizeof(irq_set), - .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER, - .index = index, - .start = 0, - .count = 0, - }; - - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); -} - -void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index) -{ - struct vfio_irq_set irq_set = { - .argsz = sizeof(irq_set), - .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK, - .index = index, - .start = 0, - .count = 1, - }; - - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); -} - -void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index) -{ - struct vfio_irq_set irq_set = { - .argsz = sizeof(irq_set), - .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK, - .index = index, - .start = 0, - .count = 1, - }; - - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); -} - -static inline const char *action_to_str(int action) -{ - switch (action) { - case VFIO_IRQ_SET_ACTION_MASK: - return "MASK"; - case VFIO_IRQ_SET_ACTION_UNMASK: - return "UNMASK"; - case VFIO_IRQ_SET_ACTION_TRIGGER: - return "TRIGGER"; - default: - return "UNKNOWN ACTION"; - } -} - -static const char *index_to_str(VFIODevice *vbasedev, int index) -{ - if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) { - return NULL; - } - - switch (index) { - case VFIO_PCI_INTX_IRQ_INDEX: - return "INTX"; - case VFIO_PCI_MSI_IRQ_INDEX: - return "MSI"; - case VFIO_PCI_MSIX_IRQ_INDEX: - return "MSIX"; - case VFIO_PCI_ERR_IRQ_INDEX: - return "ERR"; - case VFIO_PCI_REQ_IRQ_INDEX: - return "REQ"; - default: - return NULL; - } -} - -static int vfio_ram_block_discard_disable(VFIOContainer *container, bool state) -{ - switch (container->iommu_type) { - case VFIO_TYPE1v2_IOMMU: - case VFIO_TYPE1_IOMMU: - /* - * We support coordinated discarding of RAM via the RamDiscardManager. - */ - return ram_block_uncoordinated_discard_disable(state); - default: - /* - * VFIO_SPAPR_TCE_IOMMU most probably works just fine with - * RamDiscardManager, however, it is completely untested. - * - * VFIO_SPAPR_TCE_v2_IOMMU with "DMA memory preregistering" does - * completely the opposite of managing mapping/pinning dynamically as - * required by RamDiscardManager. We would have to special-case sections - * with a RamDiscardManager. - */ - return ram_block_discard_disable(state); - } -} - -int vfio_set_irq_signaling(VFIODevice *vbasedev, int index, int subindex, - int action, int fd, Error **errp) -{ - struct vfio_irq_set *irq_set; - int argsz, ret = 0; - const char *name; - int32_t *pfd; - - argsz = sizeof(*irq_set) + sizeof(*pfd); - - irq_set = g_malloc0(argsz); - irq_set->argsz = argsz; - irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | action; - irq_set->index = index; - irq_set->start = subindex; - irq_set->count = 1; - pfd = (int32_t *)&irq_set->data; - *pfd = fd; - - if (ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) { - ret = -errno; - } - g_free(irq_set); - - if (!ret) { - return 0; - } - - error_setg_errno(errp, -ret, "VFIO_DEVICE_SET_IRQS failure"); - - name = index_to_str(vbasedev, index); - if (name) { - error_prepend(errp, "%s-%d: ", name, subindex); - } else { - error_prepend(errp, "index %d-%d: ", index, subindex); - } - error_prepend(errp, - "Failed to %s %s eventfd signaling for interrupt ", - fd < 0 ? "tear down" : "set up", action_to_str(action)); - return ret; -} - -/* - * IO Port/MMIO - Beware of the endians, VFIO is always little endian - */ -void vfio_region_write(void *opaque, hwaddr addr, - uint64_t data, unsigned size) -{ - VFIORegion *region = opaque; - VFIODevice *vbasedev = region->vbasedev; - union { - uint8_t byte; - uint16_t word; - uint32_t dword; - uint64_t qword; - } buf; - - switch (size) { - case 1: - buf.byte = data; - break; - case 2: - buf.word = cpu_to_le16(data); - break; - case 4: - buf.dword = cpu_to_le32(data); - break; - case 8: - buf.qword = cpu_to_le64(data); - break; - default: - hw_error("vfio: unsupported write size, %u bytes", size); - break; - } - - if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) { - error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64 - ",%d) failed: %m", - __func__, vbasedev->name, region->nr, - addr, data, size); - } - - trace_vfio_region_write(vbasedev->name, region->nr, addr, data, size); - - /* - * A read or write to a BAR always signals an INTx EOI. This will - * do nothing if not pending (including not in INTx mode). We assume - * that a BAR access is in response to an interrupt and that BAR - * accesses will service the interrupt. Unfortunately, we don't know - * which access will service the interrupt, so we're potentially - * getting quite a few host interrupts per guest interrupt. - */ - vbasedev->ops->vfio_eoi(vbasedev); -} - -uint64_t vfio_region_read(void *opaque, - hwaddr addr, unsigned size) -{ - VFIORegion *region = opaque; - VFIODevice *vbasedev = region->vbasedev; - union { - uint8_t byte; - uint16_t word; - uint32_t dword; - uint64_t qword; - } buf; - uint64_t data = 0; - - if (pread(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) { - error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %m", - __func__, vbasedev->name, region->nr, - addr, size); - return (uint64_t)-1; - } - switch (size) { - case 1: - data = buf.byte; - break; - case 2: - data = le16_to_cpu(buf.word); - break; - case 4: - data = le32_to_cpu(buf.dword); - break; - case 8: - data = le64_to_cpu(buf.qword); - break; - default: - hw_error("vfio: unsupported read size, %u bytes", size); - break; - } - - trace_vfio_region_read(vbasedev->name, region->nr, addr, size, data); - - /* Same as write above */ - vbasedev->ops->vfio_eoi(vbasedev); - - return data; -} - -const MemoryRegionOps vfio_region_ops = { - .read = vfio_region_read, - .write = vfio_region_write, - .endianness = DEVICE_LITTLE_ENDIAN, - .valid = { - .min_access_size = 1, - .max_access_size = 8, - }, - .impl = { - .min_access_size = 1, - .max_access_size = 8, - }, -}; - -/* * Device state interfaces */ -typedef struct { - unsigned long *bitmap; - hwaddr size; - hwaddr pages; -} VFIOBitmap; - -static int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size) -{ - vbmap->pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size(); - vbmap->size = ROUND_UP(vbmap->pages, sizeof(__u64) * BITS_PER_BYTE) / - BITS_PER_BYTE; - vbmap->bitmap = g_try_malloc0(vbmap->size); - if (!vbmap->bitmap) { - return -ENOMEM; - } - - return 0; -} - -static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, - uint64_t size, ram_addr_t ram_addr); - bool vfio_mig_active(void) { - VFIOGroup *group; VFIODevice *vbasedev; - if (QLIST_EMPTY(&vfio_group_list)) { + if (QLIST_EMPTY(&vfio_device_list)) { return false; } - QLIST_FOREACH(group, &vfio_group_list, next) { - QLIST_FOREACH(vbasedev, &group->device_list, next) { - if (vbasedev->migration_blocker) { - return false; - } + QLIST_FOREACH(vbasedev, &vfio_device_list, next) { + if (vbasedev->migration_blocker) { + return false; } } return true; @@ -371,19 +91,16 @@ static Error *multiple_devices_migration_blocker; */ static bool vfio_multiple_devices_migration_is_supported(void) { - VFIOGroup *group; VFIODevice *vbasedev; unsigned int device_num = 0; bool all_support_p2p = true; - QLIST_FOREACH(group, &vfio_group_list, next) { - QLIST_FOREACH(vbasedev, &group->device_list, next) { - if (vbasedev->migration) { - device_num++; + QLIST_FOREACH(vbasedev, &vfio_device_list, next) { + if (vbasedev->migration) { + device_num++; - if (!(vbasedev->migration->mig_flags & VFIO_MIGRATION_P2P)) { - all_support_p2p = false; - } + if (!(vbasedev->migration->mig_flags & VFIO_MIGRATION_P2P)) { + all_support_p2p = false; } } } @@ -412,11 +129,7 @@ int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp) error_setg(&multiple_devices_migration_blocker, "Multiple VFIO devices migration is supported only if all of " "them support P2P migration"); - ret = migrate_add_blocker(multiple_devices_migration_blocker, errp); - if (ret < 0) { - error_free(multiple_devices_migration_blocker); - multiple_devices_migration_blocker = NULL; - } + ret = migrate_add_blocker(&multiple_devices_migration_blocker, errp); return ret; } @@ -428,14 +141,12 @@ void vfio_unblock_multiple_devices_migration(void) return; } - migrate_del_blocker(multiple_devices_migration_blocker); - error_free(multiple_devices_migration_blocker); - multiple_devices_migration_blocker = NULL; + migrate_del_blocker(&multiple_devices_migration_blocker); } bool vfio_viommu_preset(VFIODevice *vbasedev) { - return vbasedev->group->container->space->as != &address_space_memory; + return vbasedev->container->space->as != &address_space_memory; } static void vfio_set_migration_error(int err) @@ -469,7 +180,6 @@ bool vfio_device_state_is_precopy(VFIODevice *vbasedev) static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) { - VFIOGroup *group; VFIODevice *vbasedev; MigrationState *ms = migrate_get_current(); @@ -478,34 +188,29 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) return false; } - QLIST_FOREACH(group, &container->group_list, container_next) { - QLIST_FOREACH(vbasedev, &group->device_list, next) { - VFIOMigration *migration = vbasedev->migration; + QLIST_FOREACH(vbasedev, &container->device_list, container_next) { + VFIOMigration *migration = vbasedev->migration; - if (!migration) { - return false; - } + if (!migration) { + return false; + } - if (vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF && - (vfio_device_state_is_running(vbasedev) || - vfio_device_state_is_precopy(vbasedev))) { - return false; - } + if (vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF && + (vfio_device_state_is_running(vbasedev) || + vfio_device_state_is_precopy(vbasedev))) { + return false; } } return true; } -static bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container) +bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container) { - VFIOGroup *group; VFIODevice *vbasedev; - QLIST_FOREACH(group, &container->group_list, container_next) { - QLIST_FOREACH(vbasedev, &group->device_list, next) { - if (!vbasedev->dirty_pages_supported) { - return false; - } + QLIST_FOREACH(vbasedev, &container->device_list, container_next) { + if (!vbasedev->dirty_pages_supported) { + return false; } } @@ -516,178 +221,33 @@ static bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container) * Check if all VFIO devices are running and migration is active, which is * essentially equivalent to the migration being in pre-copy phase. */ -static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) +bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) { - VFIOGroup *group; VFIODevice *vbasedev; if (!migration_is_active(migrate_get_current())) { return false; } - QLIST_FOREACH(group, &container->group_list, container_next) { - QLIST_FOREACH(vbasedev, &group->device_list, next) { - VFIOMigration *migration = vbasedev->migration; - - if (!migration) { - return false; - } + QLIST_FOREACH(vbasedev, &container->device_list, container_next) { + VFIOMigration *migration = vbasedev->migration; - if (vfio_device_state_is_running(vbasedev) || - vfio_device_state_is_precopy(vbasedev)) { - continue; - } else { - return false; - } + if (!migration) { + return false; } - } - return true; -} - -static int vfio_dma_unmap_bitmap(VFIOContainer *container, - hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb) -{ - struct vfio_iommu_type1_dma_unmap *unmap; - struct vfio_bitmap *bitmap; - VFIOBitmap vbmap; - int ret; - - ret = vfio_bitmap_alloc(&vbmap, size); - if (ret) { - return ret; - } - - unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap)); - - unmap->argsz = sizeof(*unmap) + sizeof(*bitmap); - unmap->iova = iova; - unmap->size = size; - unmap->flags |= VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP; - bitmap = (struct vfio_bitmap *)&unmap->data; - - /* - * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of - * qemu_real_host_page_size to mark those dirty. Hence set bitmap_pgsize - * to qemu_real_host_page_size. - */ - bitmap->pgsize = qemu_real_host_page_size(); - bitmap->size = vbmap.size; - bitmap->data = (__u64 *)vbmap.bitmap; - - if (vbmap.size > container->max_dirty_bitmap_size) { - error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, vbmap.size); - ret = -E2BIG; - goto unmap_exit; - } - - ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap); - if (!ret) { - cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap, - iotlb->translated_addr, vbmap.pages); - } else { - error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m"); - } -unmap_exit: - g_free(unmap); - g_free(vbmap.bitmap); - - return ret; -} - -/* - * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 - */ -static int vfio_dma_unmap(VFIOContainer *container, - hwaddr iova, ram_addr_t size, - IOMMUTLBEntry *iotlb) -{ - struct vfio_iommu_type1_dma_unmap unmap = { - .argsz = sizeof(unmap), - .flags = 0, - .iova = iova, - .size = size, - }; - bool need_dirty_sync = false; - int ret; - - if (iotlb && vfio_devices_all_running_and_mig_active(container)) { - if (!vfio_devices_all_device_dirty_tracking(container) && - container->dirty_pages_supported) { - return vfio_dma_unmap_bitmap(container, iova, size, iotlb); - } - - need_dirty_sync = true; - } - - while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) { - /* - * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c - * v4.15) where an overflow in its wrap-around check prevents us from - * unmapping the last page of the address space. Test for the error - * condition and re-try the unmap excluding the last page. The - * expectation is that we've never mapped the last page anyway and this - * unmap request comes via vIOMMU support which also makes it unlikely - * that this page is used. This bug was introduced well after type1 v2 - * support was introduced, so we shouldn't need to test for v1. A fix - * is queued for kernel v5.0 so this workaround can be removed once - * affected kernels are sufficiently deprecated. - */ - if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) && - container->iommu_type == VFIO_TYPE1v2_IOMMU) { - trace_vfio_dma_unmap_overflow_workaround(); - unmap.size -= 1ULL << ctz64(container->pgsizes); + if (vfio_device_state_is_running(vbasedev) || + vfio_device_state_is_precopy(vbasedev)) { continue; + } else { + return false; } - error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno)); - return -errno; - } - - if (need_dirty_sync) { - ret = vfio_get_dirty_bitmap(container, iova, size, - iotlb->translated_addr); - if (ret) { - return ret; - } - } - - return 0; -} - -static int vfio_dma_map(VFIOContainer *container, hwaddr iova, - ram_addr_t size, void *vaddr, bool readonly) -{ - struct vfio_iommu_type1_dma_map map = { - .argsz = sizeof(map), - .flags = VFIO_DMA_MAP_FLAG_READ, - .vaddr = (__u64)(uintptr_t)vaddr, - .iova = iova, - .size = size, - }; - - if (!readonly) { - map.flags |= VFIO_DMA_MAP_FLAG_WRITE; } - - /* - * Try the mapping, if it fails with EBUSY, unmap the region and try - * again. This shouldn't be necessary, but we sometimes see it in - * the VGA ROM space. - */ - if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || - (errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 && - ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) { - return 0; - } - - error_report("VFIO_MAP_DMA failed: %s", strerror(errno)); - return -errno; + return true; } -static void vfio_host_win_add(VFIOContainer *container, - hwaddr min_iova, hwaddr max_iova, - uint64_t iova_pgsizes) +void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova, + hwaddr max_iova, uint64_t iova_pgsizes) { VFIOHostDMAWindow *hostwin; @@ -708,8 +268,8 @@ static void vfio_host_win_add(VFIOContainer *container, QLIST_INSERT_HEAD(&container->hostwin_list, hostwin, hostwin_next); } -static int vfio_host_win_del(VFIOContainer *container, hwaddr min_iova, - hwaddr max_iova) +int vfio_host_win_del(VFIOContainer *container, + hwaddr min_iova, hwaddr max_iova) { VFIOHostDMAWindow *hostwin; @@ -1084,62 +644,8 @@ static void vfio_listener_region_add(MemoryListener *listener, return; } - if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { - hwaddr pgsize = 0; - - /* For now intersections are not allowed, we may relax this later */ - QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { - if (ranges_overlap(hostwin->min_iova, - hostwin->max_iova - hostwin->min_iova + 1, - section->offset_within_address_space, - int128_get64(section->size))) { - error_setg(&err, - "region [0x%"PRIx64",0x%"PRIx64"] overlaps with existing" - "host DMA window [0x%"PRIx64",0x%"PRIx64"]", - section->offset_within_address_space, - section->offset_within_address_space + - int128_get64(section->size) - 1, - hostwin->min_iova, hostwin->max_iova); - goto fail; - } - } - - ret = vfio_spapr_create_window(container, section, &pgsize); - if (ret) { - error_setg_errno(&err, -ret, "Failed to create SPAPR window"); - goto fail; - } - - vfio_host_win_add(container, section->offset_within_address_space, - section->offset_within_address_space + - int128_get64(section->size) - 1, pgsize); -#ifdef CONFIG_KVM - if (kvm_enabled()) { - VFIOGroup *group; - IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr); - struct kvm_vfio_spapr_tce param; - struct kvm_device_attr attr = { - .group = KVM_DEV_VFIO_GROUP, - .attr = KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE, - .addr = (uint64_t)(unsigned long)¶m, - }; - - if (!memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_SPAPR_TCE_FD, - ¶m.tablefd)) { - QLIST_FOREACH(group, &container->group_list, container_next) { - param.groupfd = group->fd; - if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) { - error_report("vfio: failed to setup fd %d " - "for a group with fd %d: %s", - param.tablefd, param.groupfd, - strerror(errno)); - return; - } - trace_vfio_spapr_group_attach(param.groupfd, param.tablefd); - } - } - } -#endif + if (vfio_container_add_section_window(container, section, &err)) { + goto fail; } hostwin = vfio_find_hostwin(container, iova, end); @@ -1251,7 +757,7 @@ static void vfio_listener_region_add(MemoryListener *listener, fail: if (memory_region_is_ram_device(section->mr)) { - error_report("failed to vfio_dma_map. pci p2p may not work"); + error_reportf_err(err, "PCI p2p may not work: "); return; } /* @@ -1356,44 +862,7 @@ static void vfio_listener_region_del(MemoryListener *listener, memory_region_unref(section->mr); - if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { - vfio_spapr_remove_window(container, - section->offset_within_address_space); - if (vfio_host_win_del(container, - section->offset_within_address_space, - section->offset_within_address_space + - int128_get64(section->size) - 1) < 0) { - hw_error("%s: Cannot delete missing window at %"HWADDR_PRIx, - __func__, section->offset_within_address_space); - } - } -} - -static int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) -{ - int ret; - struct vfio_iommu_type1_dirty_bitmap dirty = { - .argsz = sizeof(dirty), - }; - - if (!container->dirty_pages_supported) { - return 0; - } - - if (start) { - dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START; - } else { - dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP; - } - - ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty); - if (ret) { - ret = -errno; - error_report("Failed to set dirty tracking flag 0x%x errno: %d", - dirty.flags, errno); - } - - return ret; + vfio_container_del_section_window(container, section); } typedef struct VFIODirtyRanges { @@ -1416,20 +885,17 @@ static bool vfio_section_is_vfio_pci(MemoryRegionSection *section, { VFIOPCIDevice *pcidev; VFIODevice *vbasedev; - VFIOGroup *group; Object *owner; owner = memory_region_owner(section->mr); - QLIST_FOREACH(group, &container->group_list, container_next) { - QLIST_FOREACH(vbasedev, &group->device_list, next) { - if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) { - continue; - } - pcidev = container_of(vbasedev, VFIOPCIDevice, vbasedev); - if (OBJECT(pcidev) == owner) { - return true; - } + QLIST_FOREACH(vbasedev, &container->device_list, container_next) { + if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) { + continue; + } + pcidev = container_of(vbasedev, VFIOPCIDevice, vbasedev); + if (OBJECT(pcidev) == owner) { + return true; } } @@ -1525,24 +991,21 @@ static void vfio_devices_dma_logging_stop(VFIOContainer *container) sizeof(uint64_t))] = {}; struct vfio_device_feature *feature = (struct vfio_device_feature *)buf; VFIODevice *vbasedev; - VFIOGroup *group; feature->argsz = sizeof(buf); feature->flags = VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP; - QLIST_FOREACH(group, &container->group_list, container_next) { - QLIST_FOREACH(vbasedev, &group->device_list, next) { - if (!vbasedev->dirty_tracking) { - continue; - } + QLIST_FOREACH(vbasedev, &container->device_list, container_next) { + if (!vbasedev->dirty_tracking) { + continue; + } - if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { - warn_report("%s: Failed to stop DMA logging, err %d (%s)", - vbasedev->name, -errno, strerror(errno)); - } - vbasedev->dirty_tracking = false; + if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { + warn_report("%s: Failed to stop DMA logging, err %d (%s)", + vbasedev->name, -errno, strerror(errno)); } + vbasedev->dirty_tracking = false; } } @@ -1625,7 +1088,6 @@ static int vfio_devices_dma_logging_start(VFIOContainer *container) struct vfio_device_feature *feature; VFIODirtyRanges ranges; VFIODevice *vbasedev; - VFIOGroup *group; int ret = 0; vfio_dirty_tracking_init(container, &ranges); @@ -1635,21 +1097,19 @@ static int vfio_devices_dma_logging_start(VFIOContainer *container) return -errno; } - QLIST_FOREACH(group, &container->group_list, container_next) { - QLIST_FOREACH(vbasedev, &group->device_list, next) { - if (vbasedev->dirty_tracking) { - continue; - } + QLIST_FOREACH(vbasedev, &container->device_list, container_next) { + if (vbasedev->dirty_tracking) { + continue; + } - ret = ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature); - if (ret) { - ret = -errno; - error_report("%s: Failed to start DMA logging, err %d (%s)", - vbasedev->name, ret, strerror(errno)); - goto out; - } - vbasedev->dirty_tracking = true; + ret = ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature); + if (ret) { + ret = -errno; + error_report("%s: Failed to start DMA logging, err %d (%s)", + vbasedev->name, ret, strerror(errno)); + goto out; } + vbasedev->dirty_tracking = true; } out: @@ -1724,71 +1184,31 @@ static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova, return 0; } -static int vfio_devices_query_dirty_bitmap(VFIOContainer *container, - VFIOBitmap *vbmap, hwaddr iova, - hwaddr size) +int vfio_devices_query_dirty_bitmap(VFIOContainer *container, + VFIOBitmap *vbmap, hwaddr iova, + hwaddr size) { VFIODevice *vbasedev; - VFIOGroup *group; int ret; - QLIST_FOREACH(group, &container->group_list, container_next) { - QLIST_FOREACH(vbasedev, &group->device_list, next) { - ret = vfio_device_dma_logging_report(vbasedev, iova, size, - vbmap->bitmap); - if (ret) { - error_report("%s: Failed to get DMA logging report, iova: " - "0x%" HWADDR_PRIx ", size: 0x%" HWADDR_PRIx - ", err: %d (%s)", - vbasedev->name, iova, size, ret, strerror(-ret)); + QLIST_FOREACH(vbasedev, &container->device_list, container_next) { + ret = vfio_device_dma_logging_report(vbasedev, iova, size, + vbmap->bitmap); + if (ret) { + error_report("%s: Failed to get DMA logging report, iova: " + "0x%" HWADDR_PRIx ", size: 0x%" HWADDR_PRIx + ", err: %d (%s)", + vbasedev->name, iova, size, ret, strerror(-ret)); - return ret; - } + return ret; } } return 0; } -static int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, - hwaddr iova, hwaddr size) -{ - struct vfio_iommu_type1_dirty_bitmap *dbitmap; - struct vfio_iommu_type1_dirty_bitmap_get *range; - int ret; - - dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range)); - - dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range); - dbitmap->flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP; - range = (struct vfio_iommu_type1_dirty_bitmap_get *)&dbitmap->data; - range->iova = iova; - range->size = size; - - /* - * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of - * qemu_real_host_page_size to mark those dirty. Hence set bitmap's pgsize - * to qemu_real_host_page_size. - */ - range->bitmap.pgsize = qemu_real_host_page_size(); - range->bitmap.size = vbmap->size; - range->bitmap.data = (__u64 *)vbmap->bitmap; - - ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap); - if (ret) { - ret = -errno; - error_report("Failed to get dirty bitmap for iova: 0x%"PRIx64 - " size: 0x%"PRIx64" err: %d", (uint64_t)range->iova, - (uint64_t)range->size, errno); - } - - g_free(dbitmap); - - return ret; -} - -static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, - uint64_t size, ram_addr_t ram_addr) +int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + uint64_t size, ram_addr_t ram_addr) { bool all_device_dirty_tracking = vfio_devices_all_device_dirty_tracking(container); @@ -1977,7 +1397,7 @@ static void vfio_listener_log_sync(MemoryListener *listener, } } -static const MemoryListener vfio_memory_listener = { +const MemoryListener vfio_memory_listener = { .name = "vfio", .region_add = vfio_listener_region_add, .region_del = vfio_listener_region_del, @@ -1986,338 +1406,34 @@ static const MemoryListener vfio_memory_listener = { .log_sync = vfio_listener_log_sync, }; -static void vfio_listener_release(VFIOContainer *container) -{ - memory_listener_unregister(&container->listener); - if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { - memory_listener_unregister(&container->prereg_listener); - } -} - -static struct vfio_info_cap_header * -vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id) -{ - struct vfio_info_cap_header *hdr; - - for (hdr = ptr + cap_offset; hdr != ptr; hdr = ptr + hdr->next) { - if (hdr->id == id) { - return hdr; - } - } - - return NULL; -} - -struct vfio_info_cap_header * -vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id) -{ - if (!(info->flags & VFIO_REGION_INFO_FLAG_CAPS)) { - return NULL; - } - - return vfio_get_cap((void *)info, info->cap_offset, id); -} - -static struct vfio_info_cap_header * -vfio_get_iommu_type1_info_cap(struct vfio_iommu_type1_info *info, uint16_t id) -{ - if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) { - return NULL; - } - - return vfio_get_cap((void *)info, info->cap_offset, id); -} - -struct vfio_info_cap_header * -vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id) -{ - if (!(info->flags & VFIO_DEVICE_FLAGS_CAPS)) { - return NULL; - } - - return vfio_get_cap((void *)info, info->cap_offset, id); -} - -bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, - unsigned int *avail) -{ - struct vfio_info_cap_header *hdr; - struct vfio_iommu_type1_info_dma_avail *cap; - - /* If the capability cannot be found, assume no DMA limiting */ - hdr = vfio_get_iommu_type1_info_cap(info, - VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL); - if (hdr == NULL) { - return false; - } - - if (avail != NULL) { - cap = (void *) hdr; - *avail = cap->avail; - } - - return true; -} - -static int vfio_setup_region_sparse_mmaps(VFIORegion *region, - struct vfio_region_info *info) -{ - struct vfio_info_cap_header *hdr; - struct vfio_region_info_cap_sparse_mmap *sparse; - int i, j; - - hdr = vfio_get_region_info_cap(info, VFIO_REGION_INFO_CAP_SPARSE_MMAP); - if (!hdr) { - return -ENODEV; - } - - sparse = container_of(hdr, struct vfio_region_info_cap_sparse_mmap, header); - - trace_vfio_region_sparse_mmap_header(region->vbasedev->name, - region->nr, sparse->nr_areas); - - region->mmaps = g_new0(VFIOMmap, sparse->nr_areas); - - for (i = 0, j = 0; i < sparse->nr_areas; i++) { - if (sparse->areas[i].size) { - trace_vfio_region_sparse_mmap_entry(i, sparse->areas[i].offset, - sparse->areas[i].offset + - sparse->areas[i].size - 1); - region->mmaps[j].offset = sparse->areas[i].offset; - region->mmaps[j].size = sparse->areas[i].size; - j++; - } - } - - region->nr_mmaps = j; - region->mmaps = g_realloc(region->mmaps, j * sizeof(VFIOMmap)); - - return 0; -} - -int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, - int index, const char *name) -{ - struct vfio_region_info *info; - int ret; - - ret = vfio_get_region_info(vbasedev, index, &info); - if (ret) { - return ret; - } - - region->vbasedev = vbasedev; - region->flags = info->flags; - region->size = info->size; - region->fd_offset = info->offset; - region->nr = index; - - if (region->size) { - region->mem = g_new0(MemoryRegion, 1); - memory_region_init_io(region->mem, obj, &vfio_region_ops, - region, name, region->size); - - if (!vbasedev->no_mmap && - region->flags & VFIO_REGION_INFO_FLAG_MMAP) { - - ret = vfio_setup_region_sparse_mmaps(region, info); - - if (ret) { - region->nr_mmaps = 1; - region->mmaps = g_new0(VFIOMmap, region->nr_mmaps); - region->mmaps[0].offset = 0; - region->mmaps[0].size = region->size; - } - } - } - - g_free(info); - - trace_vfio_region_setup(vbasedev->name, index, name, - region->flags, region->fd_offset, region->size); - return 0; -} - -static void vfio_subregion_unmap(VFIORegion *region, int index) -{ - trace_vfio_region_unmap(memory_region_name(®ion->mmaps[index].mem), - region->mmaps[index].offset, - region->mmaps[index].offset + - region->mmaps[index].size - 1); - memory_region_del_subregion(region->mem, ®ion->mmaps[index].mem); - munmap(region->mmaps[index].mmap, region->mmaps[index].size); - object_unparent(OBJECT(®ion->mmaps[index].mem)); - region->mmaps[index].mmap = NULL; -} - -int vfio_region_mmap(VFIORegion *region) -{ - int i, prot = 0; - char *name; - - if (!region->mem) { - return 0; - } - - prot |= region->flags & VFIO_REGION_INFO_FLAG_READ ? PROT_READ : 0; - prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0; - - for (i = 0; i < region->nr_mmaps; i++) { - region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot, - MAP_SHARED, region->vbasedev->fd, - region->fd_offset + - region->mmaps[i].offset); - if (region->mmaps[i].mmap == MAP_FAILED) { - int ret = -errno; - - trace_vfio_region_mmap_fault(memory_region_name(region->mem), i, - region->fd_offset + - region->mmaps[i].offset, - region->fd_offset + - region->mmaps[i].offset + - region->mmaps[i].size - 1, ret); - - region->mmaps[i].mmap = NULL; - - for (i--; i >= 0; i--) { - vfio_subregion_unmap(region, i); - } - - return ret; - } - - name = g_strdup_printf("%s mmaps[%d]", - memory_region_name(region->mem), i); - memory_region_init_ram_device_ptr(®ion->mmaps[i].mem, - memory_region_owner(region->mem), - name, region->mmaps[i].size, - region->mmaps[i].mmap); - g_free(name); - memory_region_add_subregion(region->mem, region->mmaps[i].offset, - ®ion->mmaps[i].mem); - - trace_vfio_region_mmap(memory_region_name(®ion->mmaps[i].mem), - region->mmaps[i].offset, - region->mmaps[i].offset + - region->mmaps[i].size - 1); - } - - return 0; -} - -void vfio_region_unmap(VFIORegion *region) -{ - int i; - - if (!region->mem) { - return; - } - - for (i = 0; i < region->nr_mmaps; i++) { - if (region->mmaps[i].mmap) { - vfio_subregion_unmap(region, i); - } - } -} - -void vfio_region_exit(VFIORegion *region) -{ - int i; - - if (!region->mem) { - return; - } - - for (i = 0; i < region->nr_mmaps; i++) { - if (region->mmaps[i].mmap) { - memory_region_del_subregion(region->mem, ®ion->mmaps[i].mem); - } - } - - trace_vfio_region_exit(region->vbasedev->name, region->nr); -} - -void vfio_region_finalize(VFIORegion *region) -{ - int i; - - if (!region->mem) { - return; - } - - for (i = 0; i < region->nr_mmaps; i++) { - if (region->mmaps[i].mmap) { - munmap(region->mmaps[i].mmap, region->mmaps[i].size); - object_unparent(OBJECT(®ion->mmaps[i].mem)); - } - } - - object_unparent(OBJECT(region->mem)); - - g_free(region->mem); - g_free(region->mmaps); - - trace_vfio_region_finalize(region->vbasedev->name, region->nr); - - region->mem = NULL; - region->mmaps = NULL; - region->nr_mmaps = 0; - region->size = 0; - region->flags = 0; - region->nr = 0; -} - -void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled) -{ - int i; - - if (!region->mem) { - return; - } - - for (i = 0; i < region->nr_mmaps; i++) { - if (region->mmaps[i].mmap) { - memory_region_set_enabled(®ion->mmaps[i].mem, enabled); - } - } - - trace_vfio_region_mmaps_set_enabled(memory_region_name(region->mem), - enabled); -} - void vfio_reset_handler(void *opaque) { - VFIOGroup *group; VFIODevice *vbasedev; - QLIST_FOREACH(group, &vfio_group_list, next) { - QLIST_FOREACH(vbasedev, &group->device_list, next) { - if (vbasedev->dev->realized) { - vbasedev->ops->vfio_compute_needs_reset(vbasedev); - } + QLIST_FOREACH(vbasedev, &vfio_device_list, next) { + if (vbasedev->dev->realized) { + vbasedev->ops->vfio_compute_needs_reset(vbasedev); } } - QLIST_FOREACH(group, &vfio_group_list, next) { - QLIST_FOREACH(vbasedev, &group->device_list, next) { - if (vbasedev->dev->realized && vbasedev->needs_reset) { - vbasedev->ops->vfio_hot_reset_multi(vbasedev); - } + QLIST_FOREACH(vbasedev, &vfio_device_list, next) { + if (vbasedev->dev->realized && vbasedev->needs_reset) { + vbasedev->ops->vfio_hot_reset_multi(vbasedev); } } } -static void vfio_kvm_device_add_group(VFIOGroup *group) +int vfio_kvm_device_add_fd(int fd, Error **errp) { #ifdef CONFIG_KVM struct kvm_device_attr attr = { - .group = KVM_DEV_VFIO_GROUP, - .attr = KVM_DEV_VFIO_GROUP_ADD, - .addr = (uint64_t)(unsigned long)&group->fd, + .group = KVM_DEV_VFIO_FILE, + .attr = KVM_DEV_VFIO_FILE_ADD, + .addr = (uint64_t)(unsigned long)&fd, }; if (!kvm_enabled()) { - return; + return 0; } if (vfio_kvm_device_fd < 0) { @@ -2326,41 +1442,46 @@ static void vfio_kvm_device_add_group(VFIOGroup *group) }; if (kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &cd)) { - error_report("Failed to create KVM VFIO device: %m"); - return; + error_setg_errno(errp, errno, "Failed to create KVM VFIO device"); + return -errno; } vfio_kvm_device_fd = cd.fd; } if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) { - error_report("Failed to add group %d to KVM VFIO device: %m", - group->groupid); + error_setg_errno(errp, errno, "Failed to add fd %d to KVM VFIO device", + fd); + return -errno; } #endif + return 0; } -static void vfio_kvm_device_del_group(VFIOGroup *group) +int vfio_kvm_device_del_fd(int fd, Error **errp) { #ifdef CONFIG_KVM struct kvm_device_attr attr = { - .group = KVM_DEV_VFIO_GROUP, - .attr = KVM_DEV_VFIO_GROUP_DEL, - .addr = (uint64_t)(unsigned long)&group->fd, + .group = KVM_DEV_VFIO_FILE, + .attr = KVM_DEV_VFIO_FILE_DEL, + .addr = (uint64_t)(unsigned long)&fd, }; if (vfio_kvm_device_fd < 0) { - return; + error_setg(errp, "KVM VFIO device isn't created yet"); + return -EINVAL; } if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) { - error_report("Failed to remove group %d from KVM VFIO device: %m", - group->groupid); + error_setg_errno(errp, errno, + "Failed to remove fd %d from KVM VFIO device", fd); + return -errno; } #endif + return 0; } -static VFIOAddressSpace *vfio_get_address_space(AddressSpace *as) +VFIOAddressSpace *vfio_get_address_space(AddressSpace *as) { VFIOAddressSpace *space; @@ -2375,516 +1496,22 @@ static VFIOAddressSpace *vfio_get_address_space(AddressSpace *as) space->as = as; QLIST_INIT(&space->containers); + if (QLIST_EMPTY(&vfio_address_spaces)) { + qemu_register_reset(vfio_reset_handler, NULL); + } + QLIST_INSERT_HEAD(&vfio_address_spaces, space, list); return space; } -static void vfio_put_address_space(VFIOAddressSpace *space) +void vfio_put_address_space(VFIOAddressSpace *space) { if (QLIST_EMPTY(&space->containers)) { QLIST_REMOVE(space, list); g_free(space); } -} - -/* - * vfio_get_iommu_type - selects the richest iommu_type (v2 first) - */ -static int vfio_get_iommu_type(VFIOContainer *container, - Error **errp) -{ - int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU, - VFIO_SPAPR_TCE_v2_IOMMU, VFIO_SPAPR_TCE_IOMMU }; - int i; - - for (i = 0; i < ARRAY_SIZE(iommu_types); i++) { - if (ioctl(container->fd, VFIO_CHECK_EXTENSION, iommu_types[i])) { - return iommu_types[i]; - } - } - error_setg(errp, "No available IOMMU models"); - return -EINVAL; -} - -static int vfio_init_container(VFIOContainer *container, int group_fd, - Error **errp) -{ - int iommu_type, ret; - - iommu_type = vfio_get_iommu_type(container, errp); - if (iommu_type < 0) { - return iommu_type; - } - - ret = ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, &container->fd); - if (ret) { - error_setg_errno(errp, errno, "Failed to set group container"); - return -errno; - } - - while (ioctl(container->fd, VFIO_SET_IOMMU, iommu_type)) { - if (iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { - /* - * On sPAPR, despite the IOMMU subdriver always advertises v1 and - * v2, the running platform may not support v2 and there is no - * way to guess it until an IOMMU group gets added to the container. - * So in case it fails with v2, try v1 as a fallback. - */ - iommu_type = VFIO_SPAPR_TCE_IOMMU; - continue; - } - error_setg_errno(errp, errno, "Failed to set iommu for container"); - return -errno; - } - - container->iommu_type = iommu_type; - return 0; -} - -static int vfio_get_iommu_info(VFIOContainer *container, - struct vfio_iommu_type1_info **info) -{ - - size_t argsz = sizeof(struct vfio_iommu_type1_info); - - *info = g_new0(struct vfio_iommu_type1_info, 1); -again: - (*info)->argsz = argsz; - - if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) { - g_free(*info); - *info = NULL; - return -errno; - } - - if (((*info)->argsz > argsz)) { - argsz = (*info)->argsz; - *info = g_realloc(*info, argsz); - goto again; - } - - return 0; -} - -static struct vfio_info_cap_header * -vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id) -{ - struct vfio_info_cap_header *hdr; - void *ptr = info; - - if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) { - return NULL; - } - - for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) { - if (hdr->id == id) { - return hdr; - } - } - - return NULL; -} - -static void vfio_get_iommu_info_migration(VFIOContainer *container, - struct vfio_iommu_type1_info *info) -{ - struct vfio_info_cap_header *hdr; - struct vfio_iommu_type1_info_cap_migration *cap_mig; - - hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION); - if (!hdr) { - return; - } - - cap_mig = container_of(hdr, struct vfio_iommu_type1_info_cap_migration, - header); - - /* - * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of - * qemu_real_host_page_size to mark those dirty. - */ - if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) { - container->dirty_pages_supported = true; - container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; - container->dirty_pgsizes = cap_mig->pgsize_bitmap; - } -} - -static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, - Error **errp) -{ - VFIOContainer *container; - int ret, fd; - VFIOAddressSpace *space; - - space = vfio_get_address_space(as); - - /* - * VFIO is currently incompatible with discarding of RAM insofar as the - * madvise to purge (zap) the page from QEMU's address space does not - * interact with the memory API and therefore leaves stale virtual to - * physical mappings in the IOMMU if the page was previously pinned. We - * therefore set discarding broken for each group added to a container, - * whether the container is used individually or shared. This provides - * us with options to allow devices within a group to opt-in and allow - * discarding, so long as it is done consistently for a group (for instance - * if the device is an mdev device where it is known that the host vendor - * driver will never pin pages outside of the working set of the guest - * driver, which would thus not be discarding candidates). - * - * The first opportunity to induce pinning occurs here where we attempt to - * attach the group to existing containers within the AddressSpace. If any - * pages are already zapped from the virtual address space, such as from - * previous discards, new pinning will cause valid mappings to be - * re-established. Likewise, when the overall MemoryListener for a new - * container is registered, a replay of mappings within the AddressSpace - * will occur, re-establishing any previously zapped pages as well. - * - * Especially virtio-balloon is currently only prevented from discarding - * new memory, it will not yet set ram_block_discard_set_required() and - * therefore, neither stops us here or deals with the sudden memory - * consumption of inflated memory. - * - * We do support discarding of memory coordinated via the RamDiscardManager - * with some IOMMU types. vfio_ram_block_discard_disable() handles the - * details once we know which type of IOMMU we are using. - */ - - QLIST_FOREACH(container, &space->containers, next) { - if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { - ret = vfio_ram_block_discard_disable(container, true); - if (ret) { - error_setg_errno(errp, -ret, - "Cannot set discarding of RAM broken"); - if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, - &container->fd)) { - error_report("vfio: error disconnecting group %d from" - " container", group->groupid); - } - return ret; - } - group->container = container; - QLIST_INSERT_HEAD(&container->group_list, group, container_next); - vfio_kvm_device_add_group(group); - return 0; - } - } - - fd = qemu_open_old("/dev/vfio/vfio", O_RDWR); - if (fd < 0) { - error_setg_errno(errp, errno, "failed to open /dev/vfio/vfio"); - ret = -errno; - goto put_space_exit; - } - - ret = ioctl(fd, VFIO_GET_API_VERSION); - if (ret != VFIO_API_VERSION) { - error_setg(errp, "supported vfio version: %d, " - "reported version: %d", VFIO_API_VERSION, ret); - ret = -EINVAL; - goto close_fd_exit; - } - - container = g_malloc0(sizeof(*container)); - container->space = space; - container->fd = fd; - container->error = NULL; - container->dirty_pages_supported = false; - container->dma_max_mappings = 0; - QLIST_INIT(&container->giommu_list); - QLIST_INIT(&container->hostwin_list); - QLIST_INIT(&container->vrdl_list); - - ret = vfio_init_container(container, group->fd, errp); - if (ret) { - goto free_container_exit; - } - - ret = vfio_ram_block_discard_disable(container, true); - if (ret) { - error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken"); - goto free_container_exit; - } - - switch (container->iommu_type) { - case VFIO_TYPE1v2_IOMMU: - case VFIO_TYPE1_IOMMU: - { - struct vfio_iommu_type1_info *info; - - ret = vfio_get_iommu_info(container, &info); - if (ret) { - error_setg_errno(errp, -ret, "Failed to get VFIO IOMMU info"); - goto enable_discards_exit; - } - - if (info->flags & VFIO_IOMMU_INFO_PGSIZES) { - container->pgsizes = info->iova_pgsizes; - } else { - container->pgsizes = qemu_real_host_page_size(); - } - - if (!vfio_get_info_dma_avail(info, &container->dma_max_mappings)) { - container->dma_max_mappings = 65535; - } - vfio_get_iommu_info_migration(container, info); - g_free(info); - - /* - * FIXME: We should parse VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE - * information to get the actual window extent rather than assume - * a 64-bit IOVA address space. - */ - vfio_host_win_add(container, 0, (hwaddr)-1, container->pgsizes); - - break; - } - case VFIO_SPAPR_TCE_v2_IOMMU: - case VFIO_SPAPR_TCE_IOMMU: - { - struct vfio_iommu_spapr_tce_info info; - bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU; - - /* - * The host kernel code implementing VFIO_IOMMU_DISABLE is called - * when container fd is closed so we do not call it explicitly - * in this file. - */ - if (!v2) { - ret = ioctl(fd, VFIO_IOMMU_ENABLE); - if (ret) { - error_setg_errno(errp, errno, "failed to enable container"); - ret = -errno; - goto enable_discards_exit; - } - } else { - container->prereg_listener = vfio_prereg_listener; - - memory_listener_register(&container->prereg_listener, - &address_space_memory); - if (container->error) { - memory_listener_unregister(&container->prereg_listener); - ret = -1; - error_propagate_prepend(errp, container->error, - "RAM memory listener initialization failed: "); - goto enable_discards_exit; - } - } - - info.argsz = sizeof(info); - ret = ioctl(fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info); - if (ret) { - error_setg_errno(errp, errno, - "VFIO_IOMMU_SPAPR_TCE_GET_INFO failed"); - ret = -errno; - if (v2) { - memory_listener_unregister(&container->prereg_listener); - } - goto enable_discards_exit; - } - - if (v2) { - container->pgsizes = info.ddw.pgsizes; - /* - * There is a default window in just created container. - * To make region_add/del simpler, we better remove this - * window now and let those iommu_listener callbacks - * create/remove them when needed. - */ - ret = vfio_spapr_remove_window(container, info.dma32_window_start); - if (ret) { - error_setg_errno(errp, -ret, - "failed to remove existing window"); - goto enable_discards_exit; - } - } else { - /* The default table uses 4K pages */ - container->pgsizes = 0x1000; - vfio_host_win_add(container, info.dma32_window_start, - info.dma32_window_start + - info.dma32_window_size - 1, - 0x1000); - } - } - } - - vfio_kvm_device_add_group(group); - - QLIST_INIT(&container->group_list); - QLIST_INSERT_HEAD(&space->containers, container, next); - - group->container = container; - QLIST_INSERT_HEAD(&container->group_list, group, container_next); - - container->listener = vfio_memory_listener; - - memory_listener_register(&container->listener, container->space->as); - - if (container->error) { - ret = -1; - error_propagate_prepend(errp, container->error, - "memory listener initialization failed: "); - goto listener_release_exit; - } - - container->initialized = true; - - return 0; -listener_release_exit: - QLIST_REMOVE(group, container_next); - QLIST_REMOVE(container, next); - vfio_kvm_device_del_group(group); - vfio_listener_release(container); - -enable_discards_exit: - vfio_ram_block_discard_disable(container, false); - -free_container_exit: - g_free(container); - -close_fd_exit: - close(fd); - -put_space_exit: - vfio_put_address_space(space); - - return ret; -} - -static void vfio_disconnect_container(VFIOGroup *group) -{ - VFIOContainer *container = group->container; - - QLIST_REMOVE(group, container_next); - group->container = NULL; - - /* - * Explicitly release the listener first before unset container, - * since unset may destroy the backend container if it's the last - * group. - */ - if (QLIST_EMPTY(&container->group_list)) { - vfio_listener_release(container); - } - - if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) { - error_report("vfio: error disconnecting group %d from container", - group->groupid); - } - - if (QLIST_EMPTY(&container->group_list)) { - VFIOAddressSpace *space = container->space; - VFIOGuestIOMMU *giommu, *tmp; - VFIOHostDMAWindow *hostwin, *next; - - QLIST_REMOVE(container, next); - - QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) { - memory_region_unregister_iommu_notifier( - MEMORY_REGION(giommu->iommu_mr), &giommu->n); - QLIST_REMOVE(giommu, giommu_next); - g_free(giommu); - } - - QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next, - next) { - QLIST_REMOVE(hostwin, hostwin_next); - g_free(hostwin); - } - - trace_vfio_disconnect_container(container->fd); - close(container->fd); - g_free(container); - - vfio_put_address_space(space); - } -} - -VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp) -{ - VFIOGroup *group; - char path[32]; - struct vfio_group_status status = { .argsz = sizeof(status) }; - - QLIST_FOREACH(group, &vfio_group_list, next) { - if (group->groupid == groupid) { - /* Found it. Now is it already in the right context? */ - if (group->container->space->as == as) { - return group; - } else { - error_setg(errp, "group %d used in multiple address spaces", - group->groupid); - return NULL; - } - } - } - - group = g_malloc0(sizeof(*group)); - - snprintf(path, sizeof(path), "/dev/vfio/%d", groupid); - group->fd = qemu_open_old(path, O_RDWR); - if (group->fd < 0) { - error_setg_errno(errp, errno, "failed to open %s", path); - goto free_group_exit; - } - - if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &status)) { - error_setg_errno(errp, errno, "failed to get group %d status", groupid); - goto close_fd_exit; - } - - if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) { - error_setg(errp, "group %d is not viable", groupid); - error_append_hint(errp, - "Please ensure all devices within the iommu_group " - "are bound to their vfio bus driver.\n"); - goto close_fd_exit; - } - - group->groupid = groupid; - QLIST_INIT(&group->device_list); - - if (vfio_connect_container(group, as, errp)) { - error_prepend(errp, "failed to setup container for group %d: ", - groupid); - goto close_fd_exit; - } - - if (QLIST_EMPTY(&vfio_group_list)) { - qemu_register_reset(vfio_reset_handler, NULL); - } - - QLIST_INSERT_HEAD(&vfio_group_list, group, next); - - return group; - -close_fd_exit: - close(group->fd); - -free_group_exit: - g_free(group); - - return NULL; -} - -void vfio_put_group(VFIOGroup *group) -{ - if (!group || !QLIST_EMPTY(&group->device_list)) { - return; - } - - if (!group->ram_block_discard_allowed) { - vfio_ram_block_discard_disable(group->container, false); - } - vfio_kvm_device_del_group(group); - vfio_disconnect_container(group); - QLIST_REMOVE(group, next); - trace_vfio_put_group(group->fd); - close(group->fd); - g_free(group); - - if (QLIST_EMPTY(&vfio_group_list)) { + if (QLIST_EMPTY(&vfio_address_spaces)) { qemu_unregister_reset(vfio_reset_handler, NULL); } } @@ -2912,245 +1539,3 @@ retry: return info; } - -int vfio_get_device(VFIOGroup *group, const char *name, - VFIODevice *vbasedev, Error **errp) -{ - g_autofree struct vfio_device_info *info = NULL; - int fd; - - fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name); - if (fd < 0) { - error_setg_errno(errp, errno, "error getting device from group %d", - group->groupid); - error_append_hint(errp, - "Verify all devices in group %d are bound to vfio-<bus> " - "or pci-stub and not already in use\n", group->groupid); - return fd; - } - - info = vfio_get_device_info(fd); - if (!info) { - error_setg_errno(errp, errno, "error getting device info"); - close(fd); - return -1; - } - - /* - * Set discarding of RAM as not broken for this group if the driver knows - * the device operates compatibly with discarding. Setting must be - * consistent per group, but since compatibility is really only possible - * with mdev currently, we expect singleton groups. - */ - if (vbasedev->ram_block_discard_allowed != - group->ram_block_discard_allowed) { - if (!QLIST_EMPTY(&group->device_list)) { - error_setg(errp, "Inconsistent setting of support for discarding " - "RAM (e.g., balloon) within group"); - close(fd); - return -1; - } - - if (!group->ram_block_discard_allowed) { - group->ram_block_discard_allowed = true; - vfio_ram_block_discard_disable(group->container, false); - } - } - - vbasedev->fd = fd; - vbasedev->group = group; - QLIST_INSERT_HEAD(&group->device_list, vbasedev, next); - - vbasedev->num_irqs = info->num_irqs; - vbasedev->num_regions = info->num_regions; - vbasedev->flags = info->flags; - - trace_vfio_get_device(name, info->flags, info->num_regions, info->num_irqs); - - vbasedev->reset_works = !!(info->flags & VFIO_DEVICE_FLAGS_RESET); - - return 0; -} - -void vfio_put_base_device(VFIODevice *vbasedev) -{ - if (!vbasedev->group) { - return; - } - QLIST_REMOVE(vbasedev, next); - vbasedev->group = NULL; - trace_vfio_put_base_device(vbasedev->fd); - close(vbasedev->fd); -} - -int vfio_get_region_info(VFIODevice *vbasedev, int index, - struct vfio_region_info **info) -{ - size_t argsz = sizeof(struct vfio_region_info); - - *info = g_malloc0(argsz); - - (*info)->index = index; -retry: - (*info)->argsz = argsz; - - if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) { - g_free(*info); - *info = NULL; - return -errno; - } - - if ((*info)->argsz > argsz) { - argsz = (*info)->argsz; - *info = g_realloc(*info, argsz); - - goto retry; - } - - return 0; -} - -int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type, - uint32_t subtype, struct vfio_region_info **info) -{ - int i; - - for (i = 0; i < vbasedev->num_regions; i++) { - struct vfio_info_cap_header *hdr; - struct vfio_region_info_cap_type *cap_type; - - if (vfio_get_region_info(vbasedev, i, info)) { - continue; - } - - hdr = vfio_get_region_info_cap(*info, VFIO_REGION_INFO_CAP_TYPE); - if (!hdr) { - g_free(*info); - continue; - } - - cap_type = container_of(hdr, struct vfio_region_info_cap_type, header); - - trace_vfio_get_dev_region(vbasedev->name, i, - cap_type->type, cap_type->subtype); - - if (cap_type->type == type && cap_type->subtype == subtype) { - return 0; - } - - g_free(*info); - } - - *info = NULL; - return -ENODEV; -} - -bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type) -{ - struct vfio_region_info *info = NULL; - bool ret = false; - - if (!vfio_get_region_info(vbasedev, region, &info)) { - if (vfio_get_region_info_cap(info, cap_type)) { - ret = true; - } - g_free(info); - } - - return ret; -} - -/* - * Interfaces for IBM EEH (Enhanced Error Handling) - */ -static bool vfio_eeh_container_ok(VFIOContainer *container) -{ - /* - * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO - * implementation is broken if there are multiple groups in a - * container. The hardware works in units of Partitionable - * Endpoints (== IOMMU groups) and the EEH operations naively - * iterate across all groups in the container, without any logic - * to make sure the groups have their state synchronized. For - * certain operations (ENABLE) that might be ok, until an error - * occurs, but for others (GET_STATE) it's clearly broken. - */ - - /* - * XXX Once fixed kernels exist, test for them here - */ - - if (QLIST_EMPTY(&container->group_list)) { - return false; - } - - if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) { - return false; - } - - return true; -} - -static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op) -{ - struct vfio_eeh_pe_op pe_op = { - .argsz = sizeof(pe_op), - .op = op, - }; - int ret; - - if (!vfio_eeh_container_ok(container)) { - error_report("vfio/eeh: EEH_PE_OP 0x%x: " - "kernel requires a container with exactly one group", op); - return -EPERM; - } - - ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op); - if (ret < 0) { - error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op); - return -errno; - } - - return ret; -} - -static VFIOContainer *vfio_eeh_as_container(AddressSpace *as) -{ - VFIOAddressSpace *space = vfio_get_address_space(as); - VFIOContainer *container = NULL; - - if (QLIST_EMPTY(&space->containers)) { - /* No containers to act on */ - goto out; - } - - container = QLIST_FIRST(&space->containers); - - if (QLIST_NEXT(container, next)) { - /* We don't yet have logic to synchronize EEH state across - * multiple containers */ - container = NULL; - goto out; - } - -out: - vfio_put_address_space(space); - return container; -} - -bool vfio_eeh_as_ok(AddressSpace *as) -{ - VFIOContainer *container = vfio_eeh_as_container(as); - - return (container != NULL) && vfio_eeh_container_ok(container); -} - -int vfio_eeh_as_op(AddressSpace *as, uint32_t op) -{ - VFIOContainer *container = vfio_eeh_as_container(as); - - if (!container) { - return -ENODEV; - } - return vfio_eeh_container_op(container, op); -} diff --git a/hw/vfio/container.c b/hw/vfio/container.c new file mode 100644 index 0000000000..adc467210f --- /dev/null +++ b/hw/vfio/container.c @@ -0,0 +1,1161 @@ +/* + * generic functions used by VFIO devices + * + * Copyright Red Hat, Inc. 2012 + * + * Authors: + * Alex Williamson <alex.williamson@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Based on qemu-kvm device-assignment: + * Adapted for KVM by Qumranet. + * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com) + * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com) + * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com) + * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com) + * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com) + */ + +#include "qemu/osdep.h" +#include <sys/ioctl.h> +#ifdef CONFIG_KVM +#include <linux/kvm.h> +#endif +#include <linux/vfio.h> + +#include "hw/vfio/vfio-common.h" +#include "hw/vfio/vfio.h" +#include "exec/address-spaces.h" +#include "exec/memory.h" +#include "exec/ram_addr.h" +#include "hw/hw.h" +#include "qemu/error-report.h" +#include "qemu/range.h" +#include "sysemu/kvm.h" +#include "sysemu/reset.h" +#include "trace.h" +#include "qapi/error.h" +#include "migration/migration.h" + +VFIOGroupList vfio_group_list = + QLIST_HEAD_INITIALIZER(vfio_group_list); + +static int vfio_ram_block_discard_disable(VFIOContainer *container, bool state) +{ + switch (container->iommu_type) { + case VFIO_TYPE1v2_IOMMU: + case VFIO_TYPE1_IOMMU: + /* + * We support coordinated discarding of RAM via the RamDiscardManager. + */ + return ram_block_uncoordinated_discard_disable(state); + default: + /* + * VFIO_SPAPR_TCE_IOMMU most probably works just fine with + * RamDiscardManager, however, it is completely untested. + * + * VFIO_SPAPR_TCE_v2_IOMMU with "DMA memory preregistering" does + * completely the opposite of managing mapping/pinning dynamically as + * required by RamDiscardManager. We would have to special-case sections + * with a RamDiscardManager. + */ + return ram_block_discard_disable(state); + } +} + +static int vfio_dma_unmap_bitmap(VFIOContainer *container, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb) +{ + struct vfio_iommu_type1_dma_unmap *unmap; + struct vfio_bitmap *bitmap; + VFIOBitmap vbmap; + int ret; + + ret = vfio_bitmap_alloc(&vbmap, size); + if (ret) { + return ret; + } + + unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap)); + + unmap->argsz = sizeof(*unmap) + sizeof(*bitmap); + unmap->iova = iova; + unmap->size = size; + unmap->flags |= VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP; + bitmap = (struct vfio_bitmap *)&unmap->data; + + /* + * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of + * qemu_real_host_page_size to mark those dirty. Hence set bitmap_pgsize + * to qemu_real_host_page_size. + */ + bitmap->pgsize = qemu_real_host_page_size(); + bitmap->size = vbmap.size; + bitmap->data = (__u64 *)vbmap.bitmap; + + if (vbmap.size > container->max_dirty_bitmap_size) { + error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, vbmap.size); + ret = -E2BIG; + goto unmap_exit; + } + + ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap); + if (!ret) { + cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap, + iotlb->translated_addr, vbmap.pages); + } else { + error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m"); + } + +unmap_exit: + g_free(unmap); + g_free(vbmap.bitmap); + + return ret; +} + +/* + * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 + */ +int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, + ram_addr_t size, IOMMUTLBEntry *iotlb) +{ + struct vfio_iommu_type1_dma_unmap unmap = { + .argsz = sizeof(unmap), + .flags = 0, + .iova = iova, + .size = size, + }; + bool need_dirty_sync = false; + int ret; + + if (iotlb && vfio_devices_all_running_and_mig_active(container)) { + if (!vfio_devices_all_device_dirty_tracking(container) && + container->dirty_pages_supported) { + return vfio_dma_unmap_bitmap(container, iova, size, iotlb); + } + + need_dirty_sync = true; + } + + while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) { + /* + * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c + * v4.15) where an overflow in its wrap-around check prevents us from + * unmapping the last page of the address space. Test for the error + * condition and re-try the unmap excluding the last page. The + * expectation is that we've never mapped the last page anyway and this + * unmap request comes via vIOMMU support which also makes it unlikely + * that this page is used. This bug was introduced well after type1 v2 + * support was introduced, so we shouldn't need to test for v1. A fix + * is queued for kernel v5.0 so this workaround can be removed once + * affected kernels are sufficiently deprecated. + */ + if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) && + container->iommu_type == VFIO_TYPE1v2_IOMMU) { + trace_vfio_dma_unmap_overflow_workaround(); + unmap.size -= 1ULL << ctz64(container->pgsizes); + continue; + } + error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno)); + return -errno; + } + + if (need_dirty_sync) { + ret = vfio_get_dirty_bitmap(container, iova, size, + iotlb->translated_addr); + if (ret) { + return ret; + } + } + + return 0; +} + +int vfio_dma_map(VFIOContainer *container, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly) +{ + struct vfio_iommu_type1_dma_map map = { + .argsz = sizeof(map), + .flags = VFIO_DMA_MAP_FLAG_READ, + .vaddr = (__u64)(uintptr_t)vaddr, + .iova = iova, + .size = size, + }; + + if (!readonly) { + map.flags |= VFIO_DMA_MAP_FLAG_WRITE; + } + + /* + * Try the mapping, if it fails with EBUSY, unmap the region and try + * again. This shouldn't be necessary, but we sometimes see it in + * the VGA ROM space. + */ + if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || + (errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 && + ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) { + return 0; + } + + error_report("VFIO_MAP_DMA failed: %s", strerror(errno)); + return -errno; +} + +int vfio_container_add_section_window(VFIOContainer *container, + MemoryRegionSection *section, + Error **errp) +{ + VFIOHostDMAWindow *hostwin; + hwaddr pgsize = 0; + int ret; + + if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { + return 0; + } + + /* For now intersections are not allowed, we may relax this later */ + QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { + if (ranges_overlap(hostwin->min_iova, + hostwin->max_iova - hostwin->min_iova + 1, + section->offset_within_address_space, + int128_get64(section->size))) { + error_setg(errp, + "region [0x%"PRIx64",0x%"PRIx64"] overlaps with existing" + "host DMA window [0x%"PRIx64",0x%"PRIx64"]", + section->offset_within_address_space, + section->offset_within_address_space + + int128_get64(section->size) - 1, + hostwin->min_iova, hostwin->max_iova); + return -EINVAL; + } + } + + ret = vfio_spapr_create_window(container, section, &pgsize); + if (ret) { + error_setg_errno(errp, -ret, "Failed to create SPAPR window"); + return ret; + } + + vfio_host_win_add(container, section->offset_within_address_space, + section->offset_within_address_space + + int128_get64(section->size) - 1, pgsize); +#ifdef CONFIG_KVM + if (kvm_enabled()) { + VFIOGroup *group; + IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr); + struct kvm_vfio_spapr_tce param; + struct kvm_device_attr attr = { + .group = KVM_DEV_VFIO_GROUP, + .attr = KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE, + .addr = (uint64_t)(unsigned long)¶m, + }; + + if (!memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_SPAPR_TCE_FD, + ¶m.tablefd)) { + QLIST_FOREACH(group, &container->group_list, container_next) { + param.groupfd = group->fd; + if (ioctl(vfio_kvm_device_fd, KVM_SET_DEVICE_ATTR, &attr)) { + error_setg_errno(errp, errno, + "vfio: failed GROUP_SET_SPAPR_TCE for " + "KVM VFIO device %d and group fd %d", + param.tablefd, param.groupfd); + return -errno; + } + trace_vfio_spapr_group_attach(param.groupfd, param.tablefd); + } + } + } +#endif + return 0; +} + +void vfio_container_del_section_window(VFIOContainer *container, + MemoryRegionSection *section) +{ + if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { + return; + } + + vfio_spapr_remove_window(container, + section->offset_within_address_space); + if (vfio_host_win_del(container, + section->offset_within_address_space, + section->offset_within_address_space + + int128_get64(section->size) - 1) < 0) { + hw_error("%s: Cannot delete missing window at %"HWADDR_PRIx, + __func__, section->offset_within_address_space); + } +} + +int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) +{ + int ret; + struct vfio_iommu_type1_dirty_bitmap dirty = { + .argsz = sizeof(dirty), + }; + + if (!container->dirty_pages_supported) { + return 0; + } + + if (start) { + dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START; + } else { + dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP; + } + + ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty); + if (ret) { + ret = -errno; + error_report("Failed to set dirty tracking flag 0x%x errno: %d", + dirty.flags, errno); + } + + return ret; +} + +int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, + hwaddr iova, hwaddr size) +{ + struct vfio_iommu_type1_dirty_bitmap *dbitmap; + struct vfio_iommu_type1_dirty_bitmap_get *range; + int ret; + + dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range)); + + dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range); + dbitmap->flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP; + range = (struct vfio_iommu_type1_dirty_bitmap_get *)&dbitmap->data; + range->iova = iova; + range->size = size; + + /* + * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of + * qemu_real_host_page_size to mark those dirty. Hence set bitmap's pgsize + * to qemu_real_host_page_size. + */ + range->bitmap.pgsize = qemu_real_host_page_size(); + range->bitmap.size = vbmap->size; + range->bitmap.data = (__u64 *)vbmap->bitmap; + + ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap); + if (ret) { + ret = -errno; + error_report("Failed to get dirty bitmap for iova: 0x%"PRIx64 + " size: 0x%"PRIx64" err: %d", (uint64_t)range->iova, + (uint64_t)range->size, errno); + } + + g_free(dbitmap); + + return ret; +} + +static void vfio_listener_release(VFIOContainer *container) +{ + memory_listener_unregister(&container->listener); + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { + memory_listener_unregister(&container->prereg_listener); + } +} + +static struct vfio_info_cap_header * +vfio_get_iommu_type1_info_cap(struct vfio_iommu_type1_info *info, uint16_t id) +{ + if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) { + return NULL; + } + + return vfio_get_cap((void *)info, info->cap_offset, id); +} + +bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, + unsigned int *avail) +{ + struct vfio_info_cap_header *hdr; + struct vfio_iommu_type1_info_dma_avail *cap; + + /* If the capability cannot be found, assume no DMA limiting */ + hdr = vfio_get_iommu_type1_info_cap(info, + VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL); + if (hdr == NULL) { + return false; + } + + if (avail != NULL) { + cap = (void *) hdr; + *avail = cap->avail; + } + + return true; +} + +static void vfio_kvm_device_add_group(VFIOGroup *group) +{ + Error *err = NULL; + + if (vfio_kvm_device_add_fd(group->fd, &err)) { + error_reportf_err(err, "group ID %d: ", group->groupid); + } +} + +static void vfio_kvm_device_del_group(VFIOGroup *group) +{ + Error *err = NULL; + + if (vfio_kvm_device_del_fd(group->fd, &err)) { + error_reportf_err(err, "group ID %d: ", group->groupid); + } +} + +/* + * vfio_get_iommu_type - selects the richest iommu_type (v2 first) + */ +static int vfio_get_iommu_type(VFIOContainer *container, + Error **errp) +{ + int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU, + VFIO_SPAPR_TCE_v2_IOMMU, VFIO_SPAPR_TCE_IOMMU }; + int i; + + for (i = 0; i < ARRAY_SIZE(iommu_types); i++) { + if (ioctl(container->fd, VFIO_CHECK_EXTENSION, iommu_types[i])) { + return iommu_types[i]; + } + } + error_setg(errp, "No available IOMMU models"); + return -EINVAL; +} + +static int vfio_init_container(VFIOContainer *container, int group_fd, + Error **errp) +{ + int iommu_type, ret; + + iommu_type = vfio_get_iommu_type(container, errp); + if (iommu_type < 0) { + return iommu_type; + } + + ret = ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, &container->fd); + if (ret) { + error_setg_errno(errp, errno, "Failed to set group container"); + return -errno; + } + + while (ioctl(container->fd, VFIO_SET_IOMMU, iommu_type)) { + if (iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { + /* + * On sPAPR, despite the IOMMU subdriver always advertises v1 and + * v2, the running platform may not support v2 and there is no + * way to guess it until an IOMMU group gets added to the container. + * So in case it fails with v2, try v1 as a fallback. + */ + iommu_type = VFIO_SPAPR_TCE_IOMMU; + continue; + } + error_setg_errno(errp, errno, "Failed to set iommu for container"); + return -errno; + } + + container->iommu_type = iommu_type; + return 0; +} + +static int vfio_get_iommu_info(VFIOContainer *container, + struct vfio_iommu_type1_info **info) +{ + + size_t argsz = sizeof(struct vfio_iommu_type1_info); + + *info = g_new0(struct vfio_iommu_type1_info, 1); +again: + (*info)->argsz = argsz; + + if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) { + g_free(*info); + *info = NULL; + return -errno; + } + + if (((*info)->argsz > argsz)) { + argsz = (*info)->argsz; + *info = g_realloc(*info, argsz); + goto again; + } + + return 0; +} + +static struct vfio_info_cap_header * +vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id) +{ + struct vfio_info_cap_header *hdr; + void *ptr = info; + + if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) { + return NULL; + } + + for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) { + if (hdr->id == id) { + return hdr; + } + } + + return NULL; +} + +static void vfio_get_iommu_info_migration(VFIOContainer *container, + struct vfio_iommu_type1_info *info) +{ + struct vfio_info_cap_header *hdr; + struct vfio_iommu_type1_info_cap_migration *cap_mig; + + hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION); + if (!hdr) { + return; + } + + cap_mig = container_of(hdr, struct vfio_iommu_type1_info_cap_migration, + header); + + /* + * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of + * qemu_real_host_page_size to mark those dirty. + */ + if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) { + container->dirty_pages_supported = true; + container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; + container->dirty_pgsizes = cap_mig->pgsize_bitmap; + } +} + +static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + Error **errp) +{ + VFIOContainer *container; + int ret, fd; + VFIOAddressSpace *space; + + space = vfio_get_address_space(as); + + /* + * VFIO is currently incompatible with discarding of RAM insofar as the + * madvise to purge (zap) the page from QEMU's address space does not + * interact with the memory API and therefore leaves stale virtual to + * physical mappings in the IOMMU if the page was previously pinned. We + * therefore set discarding broken for each group added to a container, + * whether the container is used individually or shared. This provides + * us with options to allow devices within a group to opt-in and allow + * discarding, so long as it is done consistently for a group (for instance + * if the device is an mdev device where it is known that the host vendor + * driver will never pin pages outside of the working set of the guest + * driver, which would thus not be discarding candidates). + * + * The first opportunity to induce pinning occurs here where we attempt to + * attach the group to existing containers within the AddressSpace. If any + * pages are already zapped from the virtual address space, such as from + * previous discards, new pinning will cause valid mappings to be + * re-established. Likewise, when the overall MemoryListener for a new + * container is registered, a replay of mappings within the AddressSpace + * will occur, re-establishing any previously zapped pages as well. + * + * Especially virtio-balloon is currently only prevented from discarding + * new memory, it will not yet set ram_block_discard_set_required() and + * therefore, neither stops us here or deals with the sudden memory + * consumption of inflated memory. + * + * We do support discarding of memory coordinated via the RamDiscardManager + * with some IOMMU types. vfio_ram_block_discard_disable() handles the + * details once we know which type of IOMMU we are using. + */ + + QLIST_FOREACH(container, &space->containers, next) { + if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { + ret = vfio_ram_block_discard_disable(container, true); + if (ret) { + error_setg_errno(errp, -ret, + "Cannot set discarding of RAM broken"); + if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, + &container->fd)) { + error_report("vfio: error disconnecting group %d from" + " container", group->groupid); + } + return ret; + } + group->container = container; + QLIST_INSERT_HEAD(&container->group_list, group, container_next); + vfio_kvm_device_add_group(group); + return 0; + } + } + + fd = qemu_open_old("/dev/vfio/vfio", O_RDWR); + if (fd < 0) { + error_setg_errno(errp, errno, "failed to open /dev/vfio/vfio"); + ret = -errno; + goto put_space_exit; + } + + ret = ioctl(fd, VFIO_GET_API_VERSION); + if (ret != VFIO_API_VERSION) { + error_setg(errp, "supported vfio version: %d, " + "reported version: %d", VFIO_API_VERSION, ret); + ret = -EINVAL; + goto close_fd_exit; + } + + container = g_malloc0(sizeof(*container)); + container->space = space; + container->fd = fd; + container->error = NULL; + container->dirty_pages_supported = false; + container->dma_max_mappings = 0; + QLIST_INIT(&container->giommu_list); + QLIST_INIT(&container->hostwin_list); + QLIST_INIT(&container->vrdl_list); + + ret = vfio_init_container(container, group->fd, errp); + if (ret) { + goto free_container_exit; + } + + ret = vfio_ram_block_discard_disable(container, true); + if (ret) { + error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken"); + goto free_container_exit; + } + + switch (container->iommu_type) { + case VFIO_TYPE1v2_IOMMU: + case VFIO_TYPE1_IOMMU: + { + struct vfio_iommu_type1_info *info; + + ret = vfio_get_iommu_info(container, &info); + if (ret) { + error_setg_errno(errp, -ret, "Failed to get VFIO IOMMU info"); + goto enable_discards_exit; + } + + if (info->flags & VFIO_IOMMU_INFO_PGSIZES) { + container->pgsizes = info->iova_pgsizes; + } else { + container->pgsizes = qemu_real_host_page_size(); + } + + if (!vfio_get_info_dma_avail(info, &container->dma_max_mappings)) { + container->dma_max_mappings = 65535; + } + vfio_get_iommu_info_migration(container, info); + g_free(info); + + /* + * FIXME: We should parse VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE + * information to get the actual window extent rather than assume + * a 64-bit IOVA address space. + */ + vfio_host_win_add(container, 0, (hwaddr)-1, container->pgsizes); + + break; + } + case VFIO_SPAPR_TCE_v2_IOMMU: + case VFIO_SPAPR_TCE_IOMMU: + { + struct vfio_iommu_spapr_tce_info info; + bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU; + + /* + * The host kernel code implementing VFIO_IOMMU_DISABLE is called + * when container fd is closed so we do not call it explicitly + * in this file. + */ + if (!v2) { + ret = ioctl(fd, VFIO_IOMMU_ENABLE); + if (ret) { + error_setg_errno(errp, errno, "failed to enable container"); + ret = -errno; + goto enable_discards_exit; + } + } else { + container->prereg_listener = vfio_prereg_listener; + + memory_listener_register(&container->prereg_listener, + &address_space_memory); + if (container->error) { + memory_listener_unregister(&container->prereg_listener); + ret = -1; + error_propagate_prepend(errp, container->error, + "RAM memory listener initialization failed: "); + goto enable_discards_exit; + } + } + + info.argsz = sizeof(info); + ret = ioctl(fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info); + if (ret) { + error_setg_errno(errp, errno, + "VFIO_IOMMU_SPAPR_TCE_GET_INFO failed"); + ret = -errno; + if (v2) { + memory_listener_unregister(&container->prereg_listener); + } + goto enable_discards_exit; + } + + if (v2) { + container->pgsizes = info.ddw.pgsizes; + /* + * There is a default window in just created container. + * To make region_add/del simpler, we better remove this + * window now and let those iommu_listener callbacks + * create/remove them when needed. + */ + ret = vfio_spapr_remove_window(container, info.dma32_window_start); + if (ret) { + error_setg_errno(errp, -ret, + "failed to remove existing window"); + goto enable_discards_exit; + } + } else { + /* The default table uses 4K pages */ + container->pgsizes = 0x1000; + vfio_host_win_add(container, info.dma32_window_start, + info.dma32_window_start + + info.dma32_window_size - 1, + 0x1000); + } + } + } + + vfio_kvm_device_add_group(group); + + QLIST_INIT(&container->group_list); + QLIST_INSERT_HEAD(&space->containers, container, next); + + group->container = container; + QLIST_INSERT_HEAD(&container->group_list, group, container_next); + + container->listener = vfio_memory_listener; + + memory_listener_register(&container->listener, container->space->as); + + if (container->error) { + ret = -1; + error_propagate_prepend(errp, container->error, + "memory listener initialization failed: "); + goto listener_release_exit; + } + + container->initialized = true; + + return 0; +listener_release_exit: + QLIST_REMOVE(group, container_next); + QLIST_REMOVE(container, next); + vfio_kvm_device_del_group(group); + vfio_listener_release(container); + +enable_discards_exit: + vfio_ram_block_discard_disable(container, false); + +free_container_exit: + g_free(container); + +close_fd_exit: + close(fd); + +put_space_exit: + vfio_put_address_space(space); + + return ret; +} + +static void vfio_disconnect_container(VFIOGroup *group) +{ + VFIOContainer *container = group->container; + + QLIST_REMOVE(group, container_next); + group->container = NULL; + + /* + * Explicitly release the listener first before unset container, + * since unset may destroy the backend container if it's the last + * group. + */ + if (QLIST_EMPTY(&container->group_list)) { + vfio_listener_release(container); + } + + if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) { + error_report("vfio: error disconnecting group %d from container", + group->groupid); + } + + if (QLIST_EMPTY(&container->group_list)) { + VFIOAddressSpace *space = container->space; + VFIOGuestIOMMU *giommu, *tmp; + VFIOHostDMAWindow *hostwin, *next; + + QLIST_REMOVE(container, next); + + QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) { + memory_region_unregister_iommu_notifier( + MEMORY_REGION(giommu->iommu_mr), &giommu->n); + QLIST_REMOVE(giommu, giommu_next); + g_free(giommu); + } + + QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next, + next) { + QLIST_REMOVE(hostwin, hostwin_next); + g_free(hostwin); + } + + trace_vfio_disconnect_container(container->fd); + close(container->fd); + g_free(container); + + vfio_put_address_space(space); + } +} + +static VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp) +{ + VFIOGroup *group; + char path[32]; + struct vfio_group_status status = { .argsz = sizeof(status) }; + + QLIST_FOREACH(group, &vfio_group_list, next) { + if (group->groupid == groupid) { + /* Found it. Now is it already in the right context? */ + if (group->container->space->as == as) { + return group; + } else { + error_setg(errp, "group %d used in multiple address spaces", + group->groupid); + return NULL; + } + } + } + + group = g_malloc0(sizeof(*group)); + + snprintf(path, sizeof(path), "/dev/vfio/%d", groupid); + group->fd = qemu_open_old(path, O_RDWR); + if (group->fd < 0) { + error_setg_errno(errp, errno, "failed to open %s", path); + goto free_group_exit; + } + + if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &status)) { + error_setg_errno(errp, errno, "failed to get group %d status", groupid); + goto close_fd_exit; + } + + if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) { + error_setg(errp, "group %d is not viable", groupid); + error_append_hint(errp, + "Please ensure all devices within the iommu_group " + "are bound to their vfio bus driver.\n"); + goto close_fd_exit; + } + + group->groupid = groupid; + QLIST_INIT(&group->device_list); + + if (vfio_connect_container(group, as, errp)) { + error_prepend(errp, "failed to setup container for group %d: ", + groupid); + goto close_fd_exit; + } + + QLIST_INSERT_HEAD(&vfio_group_list, group, next); + + return group; + +close_fd_exit: + close(group->fd); + +free_group_exit: + g_free(group); + + return NULL; +} + +static void vfio_put_group(VFIOGroup *group) +{ + if (!group || !QLIST_EMPTY(&group->device_list)) { + return; + } + + if (!group->ram_block_discard_allowed) { + vfio_ram_block_discard_disable(group->container, false); + } + vfio_kvm_device_del_group(group); + vfio_disconnect_container(group); + QLIST_REMOVE(group, next); + trace_vfio_put_group(group->fd); + close(group->fd); + g_free(group); +} + +static int vfio_get_device(VFIOGroup *group, const char *name, + VFIODevice *vbasedev, Error **errp) +{ + g_autofree struct vfio_device_info *info = NULL; + int fd; + + fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name); + if (fd < 0) { + error_setg_errno(errp, errno, "error getting device from group %d", + group->groupid); + error_append_hint(errp, + "Verify all devices in group %d are bound to vfio-<bus> " + "or pci-stub and not already in use\n", group->groupid); + return fd; + } + + info = vfio_get_device_info(fd); + if (!info) { + error_setg_errno(errp, errno, "error getting device info"); + close(fd); + return -1; + } + + /* + * Set discarding of RAM as not broken for this group if the driver knows + * the device operates compatibly with discarding. Setting must be + * consistent per group, but since compatibility is really only possible + * with mdev currently, we expect singleton groups. + */ + if (vbasedev->ram_block_discard_allowed != + group->ram_block_discard_allowed) { + if (!QLIST_EMPTY(&group->device_list)) { + error_setg(errp, "Inconsistent setting of support for discarding " + "RAM (e.g., balloon) within group"); + close(fd); + return -1; + } + + if (!group->ram_block_discard_allowed) { + group->ram_block_discard_allowed = true; + vfio_ram_block_discard_disable(group->container, false); + } + } + + vbasedev->fd = fd; + vbasedev->group = group; + QLIST_INSERT_HEAD(&group->device_list, vbasedev, next); + + vbasedev->num_irqs = info->num_irqs; + vbasedev->num_regions = info->num_regions; + vbasedev->flags = info->flags; + + trace_vfio_get_device(name, info->flags, info->num_regions, info->num_irqs); + + vbasedev->reset_works = !!(info->flags & VFIO_DEVICE_FLAGS_RESET); + + return 0; +} + +static void vfio_put_base_device(VFIODevice *vbasedev) +{ + if (!vbasedev->group) { + return; + } + QLIST_REMOVE(vbasedev, next); + vbasedev->group = NULL; + trace_vfio_put_base_device(vbasedev->fd); + close(vbasedev->fd); +} + +/* + * Interfaces for IBM EEH (Enhanced Error Handling) + */ +static bool vfio_eeh_container_ok(VFIOContainer *container) +{ + /* + * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO + * implementation is broken if there are multiple groups in a + * container. The hardware works in units of Partitionable + * Endpoints (== IOMMU groups) and the EEH operations naively + * iterate across all groups in the container, without any logic + * to make sure the groups have their state synchronized. For + * certain operations (ENABLE) that might be ok, until an error + * occurs, but for others (GET_STATE) it's clearly broken. + */ + + /* + * XXX Once fixed kernels exist, test for them here + */ + + if (QLIST_EMPTY(&container->group_list)) { + return false; + } + + if (QLIST_NEXT(QLIST_FIRST(&container->group_list), container_next)) { + return false; + } + + return true; +} + +static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op) +{ + struct vfio_eeh_pe_op pe_op = { + .argsz = sizeof(pe_op), + .op = op, + }; + int ret; + + if (!vfio_eeh_container_ok(container)) { + error_report("vfio/eeh: EEH_PE_OP 0x%x: " + "kernel requires a container with exactly one group", op); + return -EPERM; + } + + ret = ioctl(container->fd, VFIO_EEH_PE_OP, &pe_op); + if (ret < 0) { + error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op); + return -errno; + } + + return ret; +} + +static VFIOContainer *vfio_eeh_as_container(AddressSpace *as) +{ + VFIOAddressSpace *space = vfio_get_address_space(as); + VFIOContainer *container = NULL; + + if (QLIST_EMPTY(&space->containers)) { + /* No containers to act on */ + goto out; + } + + container = QLIST_FIRST(&space->containers); + + if (QLIST_NEXT(container, next)) { + /* + * We don't yet have logic to synchronize EEH state across + * multiple containers + */ + container = NULL; + goto out; + } + +out: + vfio_put_address_space(space); + return container; +} + +bool vfio_eeh_as_ok(AddressSpace *as) +{ + VFIOContainer *container = vfio_eeh_as_container(as); + + return (container != NULL) && vfio_eeh_container_ok(container); +} + +int vfio_eeh_as_op(AddressSpace *as, uint32_t op) +{ + VFIOContainer *container = vfio_eeh_as_container(as); + + if (!container) { + return -ENODEV; + } + return vfio_eeh_container_op(container, op); +} + +static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp) +{ + char *tmp, group_path[PATH_MAX], *group_name; + int ret, groupid; + ssize_t len; + + tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev); + len = readlink(tmp, group_path, sizeof(group_path)); + g_free(tmp); + + if (len <= 0 || len >= sizeof(group_path)) { + ret = len < 0 ? -errno : -ENAMETOOLONG; + error_setg_errno(errp, -ret, "no iommu_group found"); + return ret; + } + + group_path[len] = 0; + + group_name = basename(group_path); + if (sscanf(group_name, "%d", &groupid) != 1) { + error_setg_errno(errp, errno, "failed to read %s", group_path); + return -errno; + } + return groupid; +} + +/* + * vfio_attach_device: attach a device to a security context + * @name and @vbasedev->name are likely to be different depending + * on the type of the device, hence the need for passing @name + */ +int vfio_attach_device(char *name, VFIODevice *vbasedev, + AddressSpace *as, Error **errp) +{ + int groupid = vfio_device_groupid(vbasedev, errp); + VFIODevice *vbasedev_iter; + VFIOGroup *group; + VFIOContainer *container; + int ret; + + if (groupid < 0) { + return groupid; + } + + trace_vfio_attach_device(vbasedev->name, groupid); + + group = vfio_get_group(groupid, as, errp); + if (!group) { + return -ENOENT; + } + + QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { + if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) { + error_setg(errp, "device is already attached"); + vfio_put_group(group); + return -EBUSY; + } + } + ret = vfio_get_device(group, name, vbasedev, errp); + if (ret) { + vfio_put_group(group); + return ret; + } + + container = group->container; + vbasedev->container = container; + QLIST_INSERT_HEAD(&container->device_list, vbasedev, container_next); + QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next); + + return ret; +} + +void vfio_detach_device(VFIODevice *vbasedev) +{ + VFIOGroup *group = vbasedev->group; + + if (!vbasedev->container) { + return; + } + + QLIST_REMOVE(vbasedev, global_next); + QLIST_REMOVE(vbasedev, container_next); + vbasedev->container = NULL; + trace_vfio_detach_device(vbasedev->name, group->groupid); + vfio_put_base_device(vbasedev); + vfio_put_group(group); +} diff --git a/hw/vfio/display.c b/hw/vfio/display.c index 837d9e6a30..7a10fa8604 100644 --- a/hw/vfio/display.c +++ b/hw/vfio/display.c @@ -544,3 +544,24 @@ void vfio_display_finalize(VFIOPCIDevice *vdev) vfio_display_edid_exit(vdev->dpy); g_free(vdev->dpy); } + +static bool migrate_needed(void *opaque) +{ + VFIODisplay *dpy = opaque; + bool ramfb_exists = dpy->ramfb != NULL; + + /* see vfio_display_migration_needed() */ + assert(ramfb_exists); + return ramfb_exists; +} + +const VMStateDescription vfio_display_vmstate = { + .name = "VFIODisplay", + .version_id = 1, + .minimum_version_id = 1, + .needed = migrate_needed, + .fields = (VMStateField[]) { + VMSTATE_STRUCT_POINTER(ramfb, VFIODisplay, ramfb_vmstate, RAMFBState), + VMSTATE_END_OF_LIST(), + } +}; diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c new file mode 100644 index 0000000000..7e5da21b31 --- /dev/null +++ b/hw/vfio/helpers.c @@ -0,0 +1,612 @@ +/* + * low level and IOMMU backend agnostic helpers used by VFIO devices, + * related to regions, interrupts, capabilities + * + * Copyright Red Hat, Inc. 2012 + * + * Authors: + * Alex Williamson <alex.williamson@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Based on qemu-kvm device-assignment: + * Adapted for KVM by Qumranet. + * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com) + * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com) + * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com) + * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com) + * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com) + */ + +#include "qemu/osdep.h" +#include <sys/ioctl.h> + +#include "hw/vfio/vfio-common.h" +#include "hw/vfio/vfio.h" +#include "hw/hw.h" +#include "trace.h" +#include "qapi/error.h" +#include "qemu/error-report.h" + +/* + * Common VFIO interrupt disable + */ +void vfio_disable_irqindex(VFIODevice *vbasedev, int index) +{ + struct vfio_irq_set irq_set = { + .argsz = sizeof(irq_set), + .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER, + .index = index, + .start = 0, + .count = 0, + }; + + ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); +} + +void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index) +{ + struct vfio_irq_set irq_set = { + .argsz = sizeof(irq_set), + .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK, + .index = index, + .start = 0, + .count = 1, + }; + + ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); +} + +void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index) +{ + struct vfio_irq_set irq_set = { + .argsz = sizeof(irq_set), + .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK, + .index = index, + .start = 0, + .count = 1, + }; + + ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set); +} + +static inline const char *action_to_str(int action) +{ + switch (action) { + case VFIO_IRQ_SET_ACTION_MASK: + return "MASK"; + case VFIO_IRQ_SET_ACTION_UNMASK: + return "UNMASK"; + case VFIO_IRQ_SET_ACTION_TRIGGER: + return "TRIGGER"; + default: + return "UNKNOWN ACTION"; + } +} + +static const char *index_to_str(VFIODevice *vbasedev, int index) +{ + if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) { + return NULL; + } + + switch (index) { + case VFIO_PCI_INTX_IRQ_INDEX: + return "INTX"; + case VFIO_PCI_MSI_IRQ_INDEX: + return "MSI"; + case VFIO_PCI_MSIX_IRQ_INDEX: + return "MSIX"; + case VFIO_PCI_ERR_IRQ_INDEX: + return "ERR"; + case VFIO_PCI_REQ_IRQ_INDEX: + return "REQ"; + default: + return NULL; + } +} + +int vfio_set_irq_signaling(VFIODevice *vbasedev, int index, int subindex, + int action, int fd, Error **errp) +{ + struct vfio_irq_set *irq_set; + int argsz, ret = 0; + const char *name; + int32_t *pfd; + + argsz = sizeof(*irq_set) + sizeof(*pfd); + + irq_set = g_malloc0(argsz); + irq_set->argsz = argsz; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | action; + irq_set->index = index; + irq_set->start = subindex; + irq_set->count = 1; + pfd = (int32_t *)&irq_set->data; + *pfd = fd; + + if (ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) { + ret = -errno; + } + g_free(irq_set); + + if (!ret) { + return 0; + } + + error_setg_errno(errp, -ret, "VFIO_DEVICE_SET_IRQS failure"); + + name = index_to_str(vbasedev, index); + if (name) { + error_prepend(errp, "%s-%d: ", name, subindex); + } else { + error_prepend(errp, "index %d-%d: ", index, subindex); + } + error_prepend(errp, + "Failed to %s %s eventfd signaling for interrupt ", + fd < 0 ? "tear down" : "set up", action_to_str(action)); + return ret; +} + +/* + * IO Port/MMIO - Beware of the endians, VFIO is always little endian + */ +void vfio_region_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + VFIORegion *region = opaque; + VFIODevice *vbasedev = region->vbasedev; + union { + uint8_t byte; + uint16_t word; + uint32_t dword; + uint64_t qword; + } buf; + + switch (size) { + case 1: + buf.byte = data; + break; + case 2: + buf.word = cpu_to_le16(data); + break; + case 4: + buf.dword = cpu_to_le32(data); + break; + case 8: + buf.qword = cpu_to_le64(data); + break; + default: + hw_error("vfio: unsupported write size, %u bytes", size); + break; + } + + if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) { + error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64 + ",%d) failed: %m", + __func__, vbasedev->name, region->nr, + addr, data, size); + } + + trace_vfio_region_write(vbasedev->name, region->nr, addr, data, size); + + /* + * A read or write to a BAR always signals an INTx EOI. This will + * do nothing if not pending (including not in INTx mode). We assume + * that a BAR access is in response to an interrupt and that BAR + * accesses will service the interrupt. Unfortunately, we don't know + * which access will service the interrupt, so we're potentially + * getting quite a few host interrupts per guest interrupt. + */ + vbasedev->ops->vfio_eoi(vbasedev); +} + +uint64_t vfio_region_read(void *opaque, + hwaddr addr, unsigned size) +{ + VFIORegion *region = opaque; + VFIODevice *vbasedev = region->vbasedev; + union { + uint8_t byte; + uint16_t word; + uint32_t dword; + uint64_t qword; + } buf; + uint64_t data = 0; + + if (pread(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) { + error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %m", + __func__, vbasedev->name, region->nr, + addr, size); + return (uint64_t)-1; + } + switch (size) { + case 1: + data = buf.byte; + break; + case 2: + data = le16_to_cpu(buf.word); + break; + case 4: + data = le32_to_cpu(buf.dword); + break; + case 8: + data = le64_to_cpu(buf.qword); + break; + default: + hw_error("vfio: unsupported read size, %u bytes", size); + break; + } + + trace_vfio_region_read(vbasedev->name, region->nr, addr, size, data); + + /* Same as write above */ + vbasedev->ops->vfio_eoi(vbasedev); + + return data; +} + +const MemoryRegionOps vfio_region_ops = { + .read = vfio_region_read, + .write = vfio_region_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 8, + }, + .impl = { + .min_access_size = 1, + .max_access_size = 8, + }, +}; + +int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size) +{ + vbmap->pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size(); + vbmap->size = ROUND_UP(vbmap->pages, sizeof(__u64) * BITS_PER_BYTE) / + BITS_PER_BYTE; + vbmap->bitmap = g_try_malloc0(vbmap->size); + if (!vbmap->bitmap) { + return -ENOMEM; + } + + return 0; +} + +struct vfio_info_cap_header * +vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id) +{ + struct vfio_info_cap_header *hdr; + + for (hdr = ptr + cap_offset; hdr != ptr; hdr = ptr + hdr->next) { + if (hdr->id == id) { + return hdr; + } + } + + return NULL; +} + +struct vfio_info_cap_header * +vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id) +{ + if (!(info->flags & VFIO_REGION_INFO_FLAG_CAPS)) { + return NULL; + } + + return vfio_get_cap((void *)info, info->cap_offset, id); +} + +struct vfio_info_cap_header * +vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id) +{ + if (!(info->flags & VFIO_DEVICE_FLAGS_CAPS)) { + return NULL; + } + + return vfio_get_cap((void *)info, info->cap_offset, id); +} + +static int vfio_setup_region_sparse_mmaps(VFIORegion *region, + struct vfio_region_info *info) +{ + struct vfio_info_cap_header *hdr; + struct vfio_region_info_cap_sparse_mmap *sparse; + int i, j; + + hdr = vfio_get_region_info_cap(info, VFIO_REGION_INFO_CAP_SPARSE_MMAP); + if (!hdr) { + return -ENODEV; + } + + sparse = container_of(hdr, struct vfio_region_info_cap_sparse_mmap, header); + + trace_vfio_region_sparse_mmap_header(region->vbasedev->name, + region->nr, sparse->nr_areas); + + region->mmaps = g_new0(VFIOMmap, sparse->nr_areas); + + for (i = 0, j = 0; i < sparse->nr_areas; i++) { + if (sparse->areas[i].size) { + trace_vfio_region_sparse_mmap_entry(i, sparse->areas[i].offset, + sparse->areas[i].offset + + sparse->areas[i].size - 1); + region->mmaps[j].offset = sparse->areas[i].offset; + region->mmaps[j].size = sparse->areas[i].size; + j++; + } + } + + region->nr_mmaps = j; + region->mmaps = g_realloc(region->mmaps, j * sizeof(VFIOMmap)); + + return 0; +} + +int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, + int index, const char *name) +{ + struct vfio_region_info *info; + int ret; + + ret = vfio_get_region_info(vbasedev, index, &info); + if (ret) { + return ret; + } + + region->vbasedev = vbasedev; + region->flags = info->flags; + region->size = info->size; + region->fd_offset = info->offset; + region->nr = index; + + if (region->size) { + region->mem = g_new0(MemoryRegion, 1); + memory_region_init_io(region->mem, obj, &vfio_region_ops, + region, name, region->size); + + if (!vbasedev->no_mmap && + region->flags & VFIO_REGION_INFO_FLAG_MMAP) { + + ret = vfio_setup_region_sparse_mmaps(region, info); + + if (ret) { + region->nr_mmaps = 1; + region->mmaps = g_new0(VFIOMmap, region->nr_mmaps); + region->mmaps[0].offset = 0; + region->mmaps[0].size = region->size; + } + } + } + + g_free(info); + + trace_vfio_region_setup(vbasedev->name, index, name, + region->flags, region->fd_offset, region->size); + return 0; +} + +static void vfio_subregion_unmap(VFIORegion *region, int index) +{ + trace_vfio_region_unmap(memory_region_name(®ion->mmaps[index].mem), + region->mmaps[index].offset, + region->mmaps[index].offset + + region->mmaps[index].size - 1); + memory_region_del_subregion(region->mem, ®ion->mmaps[index].mem); + munmap(region->mmaps[index].mmap, region->mmaps[index].size); + object_unparent(OBJECT(®ion->mmaps[index].mem)); + region->mmaps[index].mmap = NULL; +} + +int vfio_region_mmap(VFIORegion *region) +{ + int i, prot = 0; + char *name; + + if (!region->mem) { + return 0; + } + + prot |= region->flags & VFIO_REGION_INFO_FLAG_READ ? PROT_READ : 0; + prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0; + + for (i = 0; i < region->nr_mmaps; i++) { + region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot, + MAP_SHARED, region->vbasedev->fd, + region->fd_offset + + region->mmaps[i].offset); + if (region->mmaps[i].mmap == MAP_FAILED) { + int ret = -errno; + + trace_vfio_region_mmap_fault(memory_region_name(region->mem), i, + region->fd_offset + + region->mmaps[i].offset, + region->fd_offset + + region->mmaps[i].offset + + region->mmaps[i].size - 1, ret); + + region->mmaps[i].mmap = NULL; + + for (i--; i >= 0; i--) { + vfio_subregion_unmap(region, i); + } + + return ret; + } + + name = g_strdup_printf("%s mmaps[%d]", + memory_region_name(region->mem), i); + memory_region_init_ram_device_ptr(®ion->mmaps[i].mem, + memory_region_owner(region->mem), + name, region->mmaps[i].size, + region->mmaps[i].mmap); + g_free(name); + memory_region_add_subregion(region->mem, region->mmaps[i].offset, + ®ion->mmaps[i].mem); + + trace_vfio_region_mmap(memory_region_name(®ion->mmaps[i].mem), + region->mmaps[i].offset, + region->mmaps[i].offset + + region->mmaps[i].size - 1); + } + + return 0; +} + +void vfio_region_unmap(VFIORegion *region) +{ + int i; + + if (!region->mem) { + return; + } + + for (i = 0; i < region->nr_mmaps; i++) { + if (region->mmaps[i].mmap) { + vfio_subregion_unmap(region, i); + } + } +} + +void vfio_region_exit(VFIORegion *region) +{ + int i; + + if (!region->mem) { + return; + } + + for (i = 0; i < region->nr_mmaps; i++) { + if (region->mmaps[i].mmap) { + memory_region_del_subregion(region->mem, ®ion->mmaps[i].mem); + } + } + + trace_vfio_region_exit(region->vbasedev->name, region->nr); +} + +void vfio_region_finalize(VFIORegion *region) +{ + int i; + + if (!region->mem) { + return; + } + + for (i = 0; i < region->nr_mmaps; i++) { + if (region->mmaps[i].mmap) { + munmap(region->mmaps[i].mmap, region->mmaps[i].size); + object_unparent(OBJECT(®ion->mmaps[i].mem)); + } + } + + object_unparent(OBJECT(region->mem)); + + g_free(region->mem); + g_free(region->mmaps); + + trace_vfio_region_finalize(region->vbasedev->name, region->nr); + + region->mem = NULL; + region->mmaps = NULL; + region->nr_mmaps = 0; + region->size = 0; + region->flags = 0; + region->nr = 0; +} + +void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled) +{ + int i; + + if (!region->mem) { + return; + } + + for (i = 0; i < region->nr_mmaps; i++) { + if (region->mmaps[i].mmap) { + memory_region_set_enabled(®ion->mmaps[i].mem, enabled); + } + } + + trace_vfio_region_mmaps_set_enabled(memory_region_name(region->mem), + enabled); +} + +int vfio_get_region_info(VFIODevice *vbasedev, int index, + struct vfio_region_info **info) +{ + size_t argsz = sizeof(struct vfio_region_info); + + *info = g_malloc0(argsz); + + (*info)->index = index; +retry: + (*info)->argsz = argsz; + + if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) { + g_free(*info); + *info = NULL; + return -errno; + } + + if ((*info)->argsz > argsz) { + argsz = (*info)->argsz; + *info = g_realloc(*info, argsz); + + goto retry; + } + + return 0; +} + +int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type, + uint32_t subtype, struct vfio_region_info **info) +{ + int i; + + for (i = 0; i < vbasedev->num_regions; i++) { + struct vfio_info_cap_header *hdr; + struct vfio_region_info_cap_type *cap_type; + + if (vfio_get_region_info(vbasedev, i, info)) { + continue; + } + + hdr = vfio_get_region_info_cap(*info, VFIO_REGION_INFO_CAP_TYPE); + if (!hdr) { + g_free(*info); + continue; + } + + cap_type = container_of(hdr, struct vfio_region_info_cap_type, header); + + trace_vfio_get_dev_region(vbasedev->name, i, + cap_type->type, cap_type->subtype); + + if (cap_type->type == type && cap_type->subtype == subtype) { + return 0; + } + + g_free(*info); + } + + *info = NULL; + return -ENODEV; +} + +bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type) +{ + struct vfio_region_info *info = NULL; + bool ret = false; + + if (!vfio_get_region_info(vbasedev, region, &info)) { + if (vfio_get_region_info_cap(info, cap_type)) { + ret = true; + } + g_free(info); + } + + return ret; +} diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build index da9af297a0..2a6912c940 100644 --- a/hw/vfio/meson.build +++ b/hw/vfio/meson.build @@ -1,6 +1,8 @@ vfio_ss = ss.source_set() vfio_ss.add(files( + 'helpers.c', 'common.c', + 'container.c', 'spapr.c', 'migration.c', )) diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index da43dcd2fe..28d422b39f 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -872,8 +872,8 @@ static int vfio_migration_init(VFIODevice *vbasedev) NULL; migration->vm_state = qdev_add_vm_change_state_handler_full( vbasedev->dev, vfio_vmstate_change, prepare_cb, vbasedev); - migration->migration_state.notify = vfio_migration_state_notifier; - add_migration_state_change_notifier(&migration->migration_state); + migration_add_notifier(&migration->migration_state, + vfio_migration_state_notifier); return 0; } @@ -882,7 +882,7 @@ static void vfio_migration_deinit(VFIODevice *vbasedev) { VFIOMigration *migration = vbasedev->migration; - remove_migration_state_change_notifier(&migration->migration_state); + migration_remove_notifier(&migration->migration_state); qemu_del_vm_change_state_handler(migration->vm_state); unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev); vfio_migration_free(vbasedev); @@ -891,8 +891,6 @@ static void vfio_migration_deinit(VFIODevice *vbasedev) static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp) { - int ret; - if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { error_propagate(errp, err); return -EINVAL; @@ -901,13 +899,7 @@ static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp) vbasedev->migration_blocker = error_copy(err); error_free(err); - ret = migrate_add_blocker(vbasedev->migration_blocker, errp); - if (ret < 0) { - error_free(vbasedev->migration_blocker); - vbasedev->migration_blocker = NULL; - } - - return ret; + return migrate_add_blocker(&vbasedev->migration_blocker, errp); } /* ---------------------------------------------------------------------- */ @@ -994,9 +986,5 @@ void vfio_migration_exit(VFIODevice *vbasedev) vfio_migration_deinit(vbasedev); } - if (vbasedev->migration_blocker) { - migrate_del_blocker(vbasedev->migration_blocker); - error_free(vbasedev->migration_blocker); - vbasedev->migration_blocker = NULL; - } + migrate_del_blocker(&vbasedev->migration_blocker); } diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 898296fd54..b27011cee7 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2675,6 +2675,33 @@ static bool vfio_msix_present(void *opaque, int version_id) return msix_present(pdev); } +static bool vfio_display_migration_needed(void *opaque) +{ + VFIOPCIDevice *vdev = opaque; + + /* + * We need to migrate the VFIODisplay object if ramfb *migration* was + * explicitly requested (in which case we enforced both ramfb=on and + * display=on), or ramfb migration was left at the default "auto" + * setting, and *ramfb* was explicitly requested (in which case we + * enforced display=on). + */ + return vdev->ramfb_migrate == ON_OFF_AUTO_ON || + (vdev->ramfb_migrate == ON_OFF_AUTO_AUTO && vdev->enable_ramfb); +} + +const VMStateDescription vmstate_vfio_display = { + .name = "VFIOPCIDevice/VFIODisplay", + .version_id = 1, + .minimum_version_id = 1, + .needed = vfio_display_migration_needed, + .fields = (VMStateField[]){ + VMSTATE_STRUCT_POINTER(dpy, VFIOPCIDevice, vfio_display_vmstate, + VFIODisplay), + VMSTATE_END_OF_LIST() + } +}; + const VMStateDescription vmstate_vfio_pci_config = { .name = "VFIOPCIDevice", .version_id = 1, @@ -2683,6 +2710,10 @@ const VMStateDescription vmstate_vfio_pci_config = { VMSTATE_PCI_DEVICE(pdev, VFIOPCIDevice), VMSTATE_MSIX_TEST(pdev, VFIOPCIDevice, vfio_msix_present), VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription * []) { + &vmstate_vfio_display, + NULL } }; @@ -2895,10 +2926,10 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) static void vfio_pci_put_device(VFIOPCIDevice *vdev) { + vfio_detach_device(&vdev->vbasedev); + g_free(vdev->vbasedev.name); g_free(vdev->msix); - - vfio_put_base_device(&vdev->vbasedev); } static void vfio_err_notifier_handler(void *opaque) @@ -3045,13 +3076,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) { VFIOPCIDevice *vdev = VFIO_PCI(pdev); VFIODevice *vbasedev = &vdev->vbasedev; - VFIODevice *vbasedev_iter; - VFIOGroup *group; - char *tmp, *subsys, group_path[PATH_MAX], *group_name; + char *tmp, *subsys; Error *err = NULL; - ssize_t len; struct stat st; - int groupid; int i, ret; bool is_mdev; char uuid[UUID_FMT_LEN]; @@ -3082,39 +3109,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) vbasedev->type = VFIO_DEVICE_TYPE_PCI; vbasedev->dev = DEVICE(vdev); - tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev); - len = readlink(tmp, group_path, sizeof(group_path)); - g_free(tmp); - - if (len <= 0 || len >= sizeof(group_path)) { - error_setg_errno(errp, len < 0 ? errno : ENAMETOOLONG, - "no iommu_group found"); - goto error; - } - - group_path[len] = 0; - - group_name = basename(group_path); - if (sscanf(group_name, "%d", &groupid) != 1) { - error_setg_errno(errp, errno, "failed to read %s", group_path); - goto error; - } - - trace_vfio_realize(vbasedev->name, groupid); - - group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev), errp); - if (!group) { - goto error; - } - - QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { - if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) { - error_setg(errp, "device is already attached"); - vfio_put_group(group); - goto error; - } - } - /* * Mediated devices *might* operate compatibly with discarding of RAM, but * we cannot know for certain, it depends on whether the mdev vendor driver @@ -3132,7 +3126,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) if (vbasedev->ram_block_discard_allowed && !is_mdev) { error_setg(errp, "x-balloon-allowed only potentially compatible " "with mdev devices"); - vfio_put_group(group); goto error; } @@ -3143,10 +3136,10 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) name = g_strdup(vbasedev->name); } - ret = vfio_get_device(group, name, vbasedev, errp); + ret = vfio_attach_device(name, vbasedev, + pci_device_iommu_address_space(pdev), errp); g_free(name); if (ret) { - vfio_put_group(group); goto error; } @@ -3338,6 +3331,20 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) } } + if (vdev->ramfb_migrate == ON_OFF_AUTO_ON && !vdev->enable_ramfb) { + warn_report("x-ramfb-migrate=on but ramfb=off. " + "Forcing x-ramfb-migrate to off."); + vdev->ramfb_migrate = ON_OFF_AUTO_OFF; + } + if (vbasedev->enable_migration == ON_OFF_AUTO_OFF) { + if (vdev->ramfb_migrate == ON_OFF_AUTO_AUTO) { + vdev->ramfb_migrate = ON_OFF_AUTO_OFF; + } else if (vdev->ramfb_migrate == ON_OFF_AUTO_ON) { + error_setg(errp, "x-ramfb-migrate requires enable-migration"); + goto out_deregister; + } + } + if (!pdev->failover_pair_id) { if (!vfio_migration_realize(vbasedev, errp)) { goto out_deregister; @@ -3371,7 +3378,6 @@ error: static void vfio_instance_finalize(Object *obj) { VFIOPCIDevice *vdev = VFIO_PCI(obj); - VFIOGroup *group = vdev->vbasedev.group; vfio_display_finalize(vdev); vfio_bars_finalize(vdev); @@ -3385,7 +3391,6 @@ static void vfio_instance_finalize(Object *obj) * g_free(vdev->igd_opregion); */ vfio_pci_put_device(vdev); - vfio_put_group(group); } static void vfio_exitfn(PCIDevice *pdev) @@ -3551,6 +3556,8 @@ static const TypeInfo vfio_pci_dev_info = { static Property vfio_pci_dev_nohotplug_properties[] = { DEFINE_PROP_BOOL("ramfb", VFIOPCIDevice, enable_ramfb, false), + DEFINE_PROP_ON_OFF_AUTO("x-ramfb-migrate", VFIOPCIDevice, ramfb_migrate, + ON_OFF_AUTO_AUTO), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 0d89eb761e..fba8737ab2 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -174,6 +174,7 @@ struct VFIOPCIDevice { bool no_kvm_ioeventfd; bool no_vfio_ioeventfd; bool enable_ramfb; + OnOffAuto ramfb_migrate; bool defer_kvm_irq_routing; bool clear_parent_atomics_on_exit; VFIODisplay *dpy; @@ -227,4 +228,6 @@ void vfio_display_reset(VFIOPCIDevice *vdev); int vfio_display_probe(VFIOPCIDevice *vdev, Error **errp); void vfio_display_finalize(VFIOPCIDevice *vdev); +extern const VMStateDescription vfio_display_vmstate; + #endif /* HW_VFIO_VFIO_PCI_H */ diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index 5af73f9287..8e3d4ac458 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -529,12 +529,7 @@ static VFIODeviceOps vfio_platform_ops = { */ static int vfio_base_device_init(VFIODevice *vbasedev, Error **errp) { - VFIOGroup *group; - VFIODevice *vbasedev_iter; - char *tmp, group_path[PATH_MAX], *group_name; - ssize_t len; struct stat st; - int groupid; int ret; /* @sysfsdev takes precedence over @host */ @@ -557,47 +552,15 @@ static int vfio_base_device_init(VFIODevice *vbasedev, Error **errp) return -errno; } - tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev); - len = readlink(tmp, group_path, sizeof(group_path)); - g_free(tmp); - - if (len < 0 || len >= sizeof(group_path)) { - ret = len < 0 ? -errno : -ENAMETOOLONG; - error_setg_errno(errp, -ret, "no iommu_group found"); - return ret; - } - - group_path[len] = 0; - - group_name = basename(group_path); - if (sscanf(group_name, "%d", &groupid) != 1) { - error_setg_errno(errp, errno, "failed to read %s", group_path); - return -errno; - } - - trace_vfio_platform_base_device_init(vbasedev->name, groupid); - - group = vfio_get_group(groupid, &address_space_memory, errp); - if (!group) { - return -ENOENT; - } - - QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { - if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) { - error_setg(errp, "device is already attached"); - vfio_put_group(group); - return -EBUSY; - } - } - ret = vfio_get_device(group, vbasedev->name, vbasedev, errp); + ret = vfio_attach_device(vbasedev->name, vbasedev, + &address_space_memory, errp); if (ret) { - vfio_put_group(group); return ret; } ret = vfio_populate_device(vbasedev, errp); if (ret) { - vfio_put_group(group); + vfio_detach_device(vbasedev); } return ret; diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index 0ba3c5a0e2..0eb2387cf2 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -37,7 +37,8 @@ vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %s" vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" vfio_populate_device_get_irq_info_failure(const char *errstr) "VFIO_DEVICE_GET_IRQ_INFO failure: %s" -vfio_realize(const char *name, int group_id) " (%s) group %d" +vfio_attach_device(const char *name, int group_id) " (%s) group %d" +vfio_detach_device(const char *name, int group_id) " (%s) group %d" vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d" vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s 0x%x@0x%x" vfio_pci_reset(const char *name) " (%s)" @@ -120,7 +121,6 @@ vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64 # platform.c -vfio_platform_base_device_init(char *name, int groupid) "%s belongs to group #%d" vfio_platform_realize(char *name, char *compat) "vfio device %s, compat = %s" vfio_platform_eoi(int pin, int fd) "EOI IRQ pin %d (fd=%d)" vfio_platform_intp_mmap_enable(int pin) "IRQ #%d still active, stay in slow path" diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 92a6933f66..aa7b272452 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1527,9 +1527,8 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, } if (hdev->migration_blocker != NULL) { - r = migrate_add_blocker(hdev->migration_blocker, errp); + r = migrate_add_blocker(&hdev->migration_blocker, errp); if (r < 0) { - error_free(hdev->migration_blocker); goto fail_busyloop; } } @@ -1597,10 +1596,7 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) memory_listener_unregister(&hdev->memory_listener); QLIST_REMOVE(hdev, entry); } - if (hdev->migration_blocker) { - migrate_del_blocker(hdev->migration_blocker); - error_free(hdev->migration_blocker); - } + migrate_del_blocker(&hdev->migration_blocker); g_free(hdev->mem); g_free(hdev->mem_sections); if (hdev->vhost_ops) { diff --git a/hw/virtio/virtio-pmem.c b/hw/virtio/virtio-pmem.c index c3512c2dae..cc24812d2e 100644 --- a/hw/virtio/virtio-pmem.c +++ b/hw/virtio/virtio-pmem.c @@ -147,10 +147,7 @@ static void virtio_pmem_fill_device_info(const VirtIOPMEM *pmem, static MemoryRegion *virtio_pmem_get_memory_region(VirtIOPMEM *pmem, Error **errp) { - if (!pmem->memdev) { - error_setg(errp, "'%s' property must be set", VIRTIO_PMEM_MEMDEV_PROP); - return NULL; - } + assert(pmem->memdev); return &pmem->memdev->mr; } diff --git a/hw/xen/meson.build b/hw/xen/meson.build index 277f9f292b..d887fa9ba4 100644 --- a/hw/xen/meson.build +++ b/hw/xen/meson.build @@ -12,6 +12,10 @@ system_ss.add(when: ['CONFIG_XEN', xen], if_true: files( )) xen_specific_ss = ss.source_set() +xen_specific_ss.add(files( + 'xen-mapcache.c', + 'xen-hvm-common.c', +)) if have_xen_pci_passthrough xen_specific_ss.add(files( 'xen-host-pci-device.c', @@ -26,10 +30,3 @@ else endif specific_ss.add_all(when: ['CONFIG_XEN', xen], if_true: xen_specific_ss) - -xen_ss = ss.source_set() - -xen_ss.add(when: 'CONFIG_XEN', if_true: files( - 'xen-mapcache.c', - 'xen-hvm-common.c', -)) diff --git a/include/hw/acpi/pcihp.h b/include/hw/acpi/pcihp.h index ef59810c17..ac21a95913 100644 --- a/include/hw/acpi/pcihp.h +++ b/include/hw/acpi/pcihp.h @@ -56,7 +56,7 @@ typedef struct AcpiPciHpState { } AcpiPciHpState; void acpi_pcihp_init(Object *owner, AcpiPciHpState *, PCIBus *root, - MemoryRegion *address_space_io, uint16_t io_base); + MemoryRegion *io, uint16_t io_base); bool acpi_pcihp_is_hotpluggbale_bus(AcpiPciHpState *s, BusState *bus); void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev, diff --git a/include/hw/arm/bsa.h b/include/hw/arm/bsa.h new file mode 100644 index 0000000000..8eaab603c0 --- /dev/null +++ b/include/hw/arm/bsa.h @@ -0,0 +1,35 @@ +/* + * Common definitions for Arm Base System Architecture (BSA) platforms. + * + * Copyright (c) 2015 Linaro Limited + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + * + */ + +#ifndef QEMU_ARM_BSA_H +#define QEMU_ARM_BSA_H + +/* These are architectural INTID values */ +#define VIRTUAL_PMU_IRQ 23 +#define ARCH_GIC_MAINT_IRQ 25 +#define ARCH_TIMER_NS_EL2_IRQ 26 +#define ARCH_TIMER_VIRT_IRQ 27 +#define ARCH_TIMER_NS_EL2_VIRT_IRQ 28 +#define ARCH_TIMER_S_EL1_IRQ 29 +#define ARCH_TIMER_NS_EL1_IRQ 30 + +#define INTID_TO_PPI(irq) ((irq) - 16) + +#endif /* QEMU_ARM_BSA_H */ diff --git a/include/hw/arm/exynos4210.h b/include/hw/arm/exynos4210.h index 68db19f0cb..d33fe38586 100644 --- a/include/hw/arm/exynos4210.h +++ b/include/hw/arm/exynos4210.h @@ -30,7 +30,7 @@ #include "hw/intc/exynos4210_gic.h" #include "hw/intc/exynos4210_combiner.h" #include "hw/core/split-irq.h" -#include "target/arm/cpu-qom.h" +#include "hw/arm/boot.h" #include "qom/object.h" #define EXYNOS4210_NCPUS 2 diff --git a/include/hw/misc/raspberrypi-fw-defs.h b/include/hw/arm/raspberrypi-fw-defs.h index 4551fe7450..4551fe7450 100644 --- a/include/hw/misc/raspberrypi-fw-defs.h +++ b/include/hw/arm/raspberrypi-fw-defs.h diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h index e1ddbea96b..f69239850e 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h @@ -34,6 +34,7 @@ #include "qemu/notify.h" #include "hw/boards.h" #include "hw/arm/boot.h" +#include "hw/arm/bsa.h" #include "hw/block/flash.h" #include "sysemu/kvm.h" #include "hw/intc/arm_gicv3_common.h" @@ -43,17 +44,6 @@ #define NUM_VIRTIO_TRANSPORTS 32 #define NUM_SMMU_IRQS 4 -#define ARCH_GIC_MAINT_IRQ 9 - -#define ARCH_TIMER_VIRT_IRQ 11 -#define ARCH_TIMER_S_EL1_IRQ 13 -#define ARCH_TIMER_NS_EL1_IRQ 14 -#define ARCH_TIMER_NS_EL2_IRQ 10 - -#define VIRTUAL_PMU_IRQ 7 - -#define PPI(irq) ((irq) + 16) - /* See Linux kernel arch/arm64/include/asm/pvclock-abi.h */ #define PVTIME_SIZE_PER_CPU 64 diff --git a/include/hw/audio/pcspk.h b/include/hw/audio/pcspk.h index 9506179587..6be75a6b86 100644 --- a/include/hw/audio/pcspk.h +++ b/include/hw/audio/pcspk.h @@ -25,16 +25,6 @@ #ifndef HW_PCSPK_H #define HW_PCSPK_H -#include "hw/isa/isa.h" -#include "hw/qdev-properties.h" -#include "qapi/error.h" - #define TYPE_PC_SPEAKER "isa-pcspk" -static inline void pcspk_init(ISADevice *isadev, ISABus *bus, ISADevice *pit) -{ - object_property_set_link(OBJECT(isadev), "pit", OBJECT(pit), NULL); - isa_realize_and_unref(isadev, bus, &error_fatal); -} - #endif /* HW_PCSPK_H */ diff --git a/include/hw/boards.h b/include/hw/boards.h index 43a56dc51e..a735999298 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -135,12 +135,16 @@ typedef struct { * @clusters_supported - whether clusters are supported by the machine * @has_clusters - whether clusters are explicitly specified in the user * provided SMP configuration + * @books_supported - whether books are supported by the machine + * @drawers_supported - whether drawers are supported by the machine */ typedef struct { bool prefer_sockets; bool dies_supported; bool clusters_supported; bool has_clusters; + bool books_supported; + bool drawers_supported; } SMPCompatProps; /** @@ -323,7 +327,9 @@ typedef struct DeviceMemoryState { /** * CpuTopology: * @cpus: the number of present logical processors on the machine - * @sockets: the number of sockets on the machine + * @drawers: the number of drawers on the machine + * @books: the number of books in one drawer + * @sockets: the number of sockets in one book * @dies: the number of dies in one socket * @clusters: the number of clusters in one die * @cores: the number of cores in one cluster @@ -332,6 +338,8 @@ typedef struct DeviceMemoryState { */ typedef struct CpuTopology { unsigned int cpus; + unsigned int drawers; + unsigned int books; unsigned int sockets; unsigned int dies; unsigned int clusters; diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index 3968369554..18593db5b2 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -618,8 +618,10 @@ bool cpu_paging_enabled(const CPUState *cpu); * @cpu: The CPU whose memory mappings are to be obtained. * @list: Where to write the memory mappings to. * @errp: Pointer for reporting an #Error. + * + * Returns: %true on success, %false otherwise. */ -void cpu_get_memory_mapping(CPUState *cpu, MemoryMappingList *list, +bool cpu_get_memory_mapping(CPUState *cpu, MemoryMappingList *list, Error **errp); #if !defined(CONFIG_USER_ONLY) diff --git a/include/hw/core/sysemu-cpu-ops.h b/include/hw/core/sysemu-cpu-ops.h index ee169b872c..24d003fe04 100644 --- a/include/hw/core/sysemu-cpu-ops.h +++ b/include/hw/core/sysemu-cpu-ops.h @@ -19,7 +19,7 @@ typedef struct SysemuCPUOps { /** * @get_memory_mapping: Callback for obtaining the memory mappings. */ - void (*get_memory_mapping)(CPUState *cpu, MemoryMappingList *list, + bool (*get_memory_mapping)(CPUState *cpu, MemoryMappingList *list, Error **errp); /** * @get_paging_enabled: Callback for inquiring whether paging is enabled. diff --git a/include/hw/display/ramfb.h b/include/hw/display/ramfb.h index b33a2c467b..a7e0019144 100644 --- a/include/hw/display/ramfb.h +++ b/include/hw/display/ramfb.h @@ -1,11 +1,15 @@ #ifndef RAMFB_H #define RAMFB_H +#include "migration/vmstate.h" + /* ramfb.c */ typedef struct RAMFBState RAMFBState; void ramfb_display_update(QemuConsole *con, RAMFBState *s); RAMFBState *ramfb_setup(Error **errp); +extern const VMStateDescription ramfb_vmstate; + /* ramfb-standalone.c */ #define TYPE_RAMFB_DEVICE "ramfb" diff --git a/include/hw/mips/cpudevs.h b/include/hw/mips/cpudevs.h deleted file mode 100644 index f7c9728fa9..0000000000 --- a/include/hw/mips/cpudevs.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef HW_MIPS_CPUDEVS_H -#define HW_MIPS_CPUDEVS_H - -#include "target/mips/cpu-qom.h" - -/* Definitions for MIPS CPU internal devices. */ - -/* mips_int.c */ -void cpu_mips_irq_init_cpu(MIPSCPU *cpu); - -/* mips_timer.c */ -void cpu_mips_clock_init(MIPSCPU *cpu); - -#endif diff --git a/include/hw/misc/mips_itu.h b/include/hw/misc/mips_itu.h index 35218b2d14..5caed6cc36 100644 --- a/include/hw/misc/mips_itu.h +++ b/include/hw/misc/mips_itu.h @@ -73,10 +73,12 @@ struct MIPSITUState { /* SAAR */ uint64_t *saar; - MIPSCPU *cpu0; + ArchCPU *cpu0; }; /* Get ITC Configuration Tag memory region. */ MemoryRegion *mips_itu_get_tag_region(MIPSITUState *itu); +void itc_reconfigure(struct MIPSITUState *tag); + #endif /* MIPS_ITU_H */ diff --git a/include/hw/nvram/xlnx-bbram.h b/include/hw/nvram/xlnx-bbram.h index 87d59ef3c0..6fc13f8cc1 100644 --- a/include/hw/nvram/xlnx-bbram.h +++ b/include/hw/nvram/xlnx-bbram.h @@ -34,7 +34,7 @@ #define RMAX_XLNX_BBRAM ((0x4c / 4) + 1) -#define TYPE_XLNX_BBRAM "xlnx,bbram-ctrl" +#define TYPE_XLNX_BBRAM "xlnx.bbram-ctrl" OBJECT_DECLARE_SIMPLE_TYPE(XlnxBBRam, XLNX_BBRAM); struct XlnxBBRam { diff --git a/include/hw/pci-host/astro.h b/include/hw/pci-host/astro.h new file mode 100644 index 0000000000..f63fd220f3 --- /dev/null +++ b/include/hw/pci-host/astro.h @@ -0,0 +1,92 @@ +/* + * HP-PARISC Astro Bus connector with Elroy PCI host bridges + */ + +#ifndef ASTRO_H +#define ASTRO_H + +#include "hw/pci/pci_host.h" + +#define ASTRO_HPA 0xfed00000 + +#define ROPES_PER_IOC 8 /* per Ike half or Pluto/Astro */ + +#define TYPE_ASTRO_CHIP "astro-chip" +OBJECT_DECLARE_SIMPLE_TYPE(AstroState, ASTRO_CHIP) + +#define TYPE_ELROY_PCI_HOST_BRIDGE "elroy-pcihost" +OBJECT_DECLARE_SIMPLE_TYPE(ElroyState, ELROY_PCI_HOST_BRIDGE) + +#define ELROY_NUM 4 /* # of Elroys */ +#define ELROY_IRQS 8 /* IOSAPIC IRQs */ + +/* ASTRO Memory and I/O regions */ +#define LMMIO_DIST_BASE_ADDR 0xf4000000ULL +#define LMMIO_DIST_BASE_SIZE 0x4000000ULL + +#define IOS_DIST_BASE_ADDR 0xfffee00000ULL +#define IOS_DIST_BASE_SIZE 0x10000ULL + +struct AstroState; + +struct ElroyState { + PCIHostState parent_obj; + + /* parent Astro device */ + struct AstroState *astro; + + /* HPA of this Elroy */ + hwaddr hpa; + + /* PCI bus number (Elroy number) */ + unsigned int pci_bus_num; + + uint64_t config_address; + uint64_t config_reg_elroy; + + uint64_t status_control; + uint64_t arb_mask; + uint64_t mmio_base[(0x0250 - 0x200) / 8]; + uint64_t error_config; + + uint32_t iosapic_reg_select; + uint64_t iosapic_reg[0x20]; + + uint32_t ilr; + + MemoryRegion this_mem; + + MemoryRegion pci_mmio; + MemoryRegion pci_mmio_alias; + MemoryRegion pci_hole; + MemoryRegion pci_io; +}; + +struct AstroState { + PCIHostState parent_obj; + + uint64_t ioc_ctrl; + uint64_t ioc_status_ctrl; + uint64_t ioc_ranges[(0x03d8 - 0x300) / 8]; + uint64_t ioc_rope_config; + uint64_t ioc_status_control; + uint64_t ioc_flush_control; + uint64_t ioc_rope_control[8]; + uint64_t tlb_ibase; + uint64_t tlb_imask; + uint64_t tlb_pcom; + uint64_t tlb_tcnfg; + uint64_t tlb_pdir_base; + + struct ElroyState *elroy[ELROY_NUM]; + + MemoryRegion this_mem; + + MemoryRegion pci_mmio; + MemoryRegion pci_io; + + IOMMUMemoryRegion iommu; + AddressSpace iommu_as; +}; + +#endif diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index b70a0b95ff..ea5aff118b 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -279,12 +279,10 @@ bool pci_bus_is_express(const PCIBus *bus); void pci_root_bus_init(PCIBus *bus, size_t bus_size, DeviceState *parent, const char *name, - MemoryRegion *address_space_mem, - MemoryRegion *address_space_io, + MemoryRegion *mem, MemoryRegion *io, uint8_t devfn_min, const char *typename); PCIBus *pci_root_bus_new(DeviceState *parent, const char *name, - MemoryRegion *address_space_mem, - MemoryRegion *address_space_io, + MemoryRegion *mem, MemoryRegion *io, uint8_t devfn_min, const char *typename); void pci_root_bus_cleanup(PCIBus *bus); void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq, @@ -304,8 +302,7 @@ int pci_swizzle_map_irq_fn(PCIDevice *pci_dev, int pin); PCIBus *pci_register_root_bus(DeviceState *parent, const char *name, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq, void *irq_opaque, - MemoryRegion *address_space_mem, - MemoryRegion *address_space_io, + MemoryRegion *mem, MemoryRegion *io, uint8_t devfn_min, int nirq, const char *typename); void pci_unregister_root_bus(PCIBus *bus); diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h index 85469b9b53..f1a53fea8d 100644 --- a/include/hw/pci/pci_ids.h +++ b/include/hw/pci/pci_ids.h @@ -179,6 +179,8 @@ #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_SCSI 0x2020 +#define PCI_VENDOR_ID_HP 0x103c + #define PCI_VENDOR_ID_TI 0x104c #define PCI_VENDOR_ID_MOTOROLA 0x1057 diff --git a/include/hw/ppc/pnv_xscom.h b/include/hw/ppc/pnv_xscom.h index 9bc6463547..35b19610f7 100644 --- a/include/hw/ppc/pnv_xscom.h +++ b/include/hw/ppc/pnv_xscom.h @@ -170,7 +170,7 @@ struct PnvXScomInterfaceClass { #define PNV10_XSCOM_PEC_PCI_BASE 0x8010800 /* index goes upwards ... */ #define PNV10_XSCOM_PEC_PCI_SIZE 0x200 -void pnv_xscom_realize(PnvChip *chip, uint64_t size, Error **errp); +void pnv_xscom_init(PnvChip *chip, uint64_t size, hwaddr addr); int pnv_dt_xscom(PnvChip *chip, void *fdt, int root_offset, uint64_t xscom_base, uint64_t xscom_size, const char *compat, int compat_size); diff --git a/include/hw/qdev-properties-system.h b/include/hw/qdev-properties-system.h index 0ac327ae60..e4f8a13afc 100644 --- a/include/hw/qdev-properties-system.h +++ b/include/hw/qdev-properties-system.h @@ -22,6 +22,7 @@ extern const PropertyInfo qdev_prop_audiodev; extern const PropertyInfo qdev_prop_off_auto_pcibar; extern const PropertyInfo qdev_prop_pcie_link_speed; extern const PropertyInfo qdev_prop_pcie_link_width; +extern const PropertyInfo qdev_prop_cpus390entitlement; #define DEFINE_PROP_PCI_DEVFN(_n, _s, _f, _d) \ DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_pci_devfn, int32_t) @@ -73,5 +74,8 @@ extern const PropertyInfo qdev_prop_pcie_link_width; #define DEFINE_PROP_UUID_NODEFAULT(_name, _state, _field) \ DEFINE_PROP(_name, _state, _field, qdev_prop_uuid, QemuUUID) +#define DEFINE_PROP_CPUS390ENTITLEMENT(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_cpus390entitlement, \ + CpuS390Entitlement) #endif diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h new file mode 100644 index 0000000000..c064f427e9 --- /dev/null +++ b/include/hw/s390x/cpu-topology.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * CPU Topology + * + * Copyright IBM Corp. 2022, 2023 + * Author(s): Pierre Morel <pmorel@linux.ibm.com> + * + */ +#ifndef HW_S390X_CPU_TOPOLOGY_H +#define HW_S390X_CPU_TOPOLOGY_H + +#ifndef CONFIG_USER_ONLY + +#include "qemu/queue.h" +#include "hw/boards.h" +#include "qapi/qapi-types-machine-target.h" + +#define S390_TOPOLOGY_CPU_IFL 0x03 + +typedef struct S390TopologyId { + uint8_t sentinel; + uint8_t drawer; + uint8_t book; + uint8_t socket; + uint8_t type; + uint8_t vertical:1; + uint8_t entitlement:2; + uint8_t dedicated; + uint8_t origin; +} S390TopologyId; + +typedef struct S390TopologyEntry { + QTAILQ_ENTRY(S390TopologyEntry) next; + S390TopologyId id; + uint64_t mask; +} S390TopologyEntry; + +typedef struct S390Topology { + uint8_t *cores_per_socket; + CpuS390Polarization polarization; +} S390Topology; + +typedef QTAILQ_HEAD(, S390TopologyEntry) S390TopologyList; + +#ifdef CONFIG_KVM +bool s390_has_topology(void); +void s390_topology_setup_cpu(MachineState *ms, S390CPU *cpu, Error **errp); +void s390_topology_reset(void); +#else +static inline bool s390_has_topology(void) +{ + return false; +} +static inline void s390_topology_setup_cpu(MachineState *ms, + S390CPU *cpu, + Error **errp) {} +static inline void s390_topology_reset(void) +{ + /* Unreachable, CPU topology not implemented for TCG */ + assert(false); +} +#endif + +extern S390Topology s390_topology; + +static inline int s390_std_socket(int n, CpuTopology *smp) +{ + return (n / smp->cores) % smp->sockets; +} + +static inline int s390_std_book(int n, CpuTopology *smp) +{ + return (n / (smp->cores * smp->sockets)) % smp->books; +} + +static inline int s390_std_drawer(int n, CpuTopology *smp) +{ + return (n / (smp->cores * smp->sockets * smp->books)) % smp->drawers; +} + +#endif /* CONFIG_USER_ONLY */ + +#endif diff --git a/include/hw/s390x/s390-virtio-ccw.h b/include/hw/s390x/s390-virtio-ccw.h index 9bba21a916..c1d46e78af 100644 --- a/include/hw/s390x/s390-virtio-ccw.h +++ b/include/hw/s390x/s390-virtio-ccw.h @@ -30,6 +30,12 @@ struct S390CcwMachineState { uint8_t loadparm[8]; }; +#define S390_PTF_REASON_NONE (0x00 << 8) +#define S390_PTF_REASON_DONE (0x01 << 8) +#define S390_PTF_REASON_BUSY (0x02 << 8) +#define S390_TOPO_FC_MASK 0xffUL +void s390_handle_ptf(S390CPU *cpu, uint8_t r1, uintptr_t ra); + struct S390CcwMachineClass { /*< private >*/ MachineClass parent_class; diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h index cf1f2efae2..9aef6d9370 100644 --- a/include/hw/s390x/sclp.h +++ b/include/hw/s390x/sclp.h @@ -112,11 +112,13 @@ typedef struct CPUEntry { } QEMU_PACKED CPUEntry; #define SCLP_READ_SCP_INFO_FIXED_CPU_OFFSET 128 +#define SCLP_READ_SCP_INFO_MNEST 4 typedef struct ReadInfo { SCCBHeader h; uint16_t rnmax; uint8_t rnsize; - uint8_t _reserved1[16 - 11]; /* 11-15 */ + uint8_t _reserved1[15 - 11]; /* 11-14 */ + uint8_t stsi_parm; /* 15-15 */ uint16_t entries_cpu; /* 16-17 */ uint16_t offset_cpu; /* 18-19 */ uint8_t _reserved2[24 - 20]; /* 20-23 */ diff --git a/include/hw/s390x/vfio-ccw.h b/include/hw/s390x/vfio-ccw.h index 63a909eb7e..4209d27657 100644 --- a/include/hw/s390x/vfio-ccw.h +++ b/include/hw/s390x/vfio-ccw.h @@ -22,6 +22,4 @@ #define TYPE_VFIO_CCW "vfio-ccw" OBJECT_DECLARE_SIMPLE_TYPE(VFIOCCWDevice, VFIO_CCW) -#define TYPE_VFIO_CCW "vfio-ccw" - #endif diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index e9b8954595..7780b9073a 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -98,6 +98,7 @@ typedef struct VFIOContainer { QLIST_HEAD(, VFIOGroup) group_list; QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; QLIST_ENTRY(VFIOContainer) next; + QLIST_HEAD(, VFIODevice) device_list; } VFIOContainer; typedef struct VFIOGuestIOMMU { @@ -129,7 +130,10 @@ typedef struct VFIODeviceOps VFIODeviceOps; typedef struct VFIODevice { QLIST_ENTRY(VFIODevice) next; + QLIST_ENTRY(VFIODevice) container_next; + QLIST_ENTRY(VFIODevice) global_next; struct VFIOGroup *group; + VFIOContainer *container; char *sysfsdev; char *name; DeviceState *dev; @@ -196,7 +200,36 @@ typedef struct VFIODisplay { } dmabuf; } VFIODisplay; -void vfio_put_base_device(VFIODevice *vbasedev); +typedef struct { + unsigned long *bitmap; + hwaddr size; + hwaddr pages; +} VFIOBitmap; + +void vfio_host_win_add(VFIOContainer *container, + hwaddr min_iova, hwaddr max_iova, + uint64_t iova_pgsizes); +int vfio_host_win_del(VFIOContainer *container, hwaddr min_iova, + hwaddr max_iova); +VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); +void vfio_put_address_space(VFIOAddressSpace *space); +bool vfio_devices_all_running_and_saving(VFIOContainer *container); + +/* container->fd */ +int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, + ram_addr_t size, IOMMUTLBEntry *iotlb); +int vfio_dma_map(VFIOContainer *container, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly); +int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start); +int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, + hwaddr iova, hwaddr size); + +int vfio_container_add_section_window(VFIOContainer *container, + MemoryRegionSection *section, + Error **errp); +void vfio_container_del_section_window(VFIOContainer *container, + MemoryRegionSection *section); + void vfio_disable_irqindex(VFIODevice *vbasedev, int index); void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index); void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index); @@ -214,15 +247,22 @@ void vfio_region_unmap(VFIORegion *region); void vfio_region_exit(VFIORegion *region); void vfio_region_finalize(VFIORegion *region); void vfio_reset_handler(void *opaque); -VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp); -void vfio_put_group(VFIOGroup *group); struct vfio_device_info *vfio_get_device_info(int fd); -int vfio_get_device(VFIOGroup *group, const char *name, - VFIODevice *vbasedev, Error **errp); +int vfio_attach_device(char *name, VFIODevice *vbasedev, + AddressSpace *as, Error **errp); +void vfio_detach_device(VFIODevice *vbasedev); + +int vfio_kvm_device_add_fd(int fd, Error **errp); +int vfio_kvm_device_del_fd(int fd, Error **errp); extern const MemoryRegionOps vfio_region_ops; typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; +typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList; extern VFIOGroupList vfio_group_list; +extern VFIODeviceList vfio_device_list; + +extern const MemoryListener vfio_memory_listener; +extern int vfio_kvm_device_fd; bool vfio_mig_active(void); int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp); @@ -245,6 +285,8 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, unsigned int *avail); struct vfio_info_cap_header * vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id); +struct vfio_info_cap_header * +vfio_get_cap(void *ptr, uint32_t cap_offset, uint16_t id); #endif extern const MemoryListener vfio_prereg_listener; @@ -257,4 +299,12 @@ int vfio_spapr_remove_window(VFIOContainer *container, bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp); void vfio_migration_exit(VFIODevice *vbasedev); +int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size); +bool vfio_devices_all_running_and_mig_active(VFIOContainer *container); +bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container); +int vfio_devices_query_dirty_bitmap(VFIOContainer *container, + VFIOBitmap *vbmap, hwaddr iova, + hwaddr size); +int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + uint64_t size, ram_addr_t ram_addr); #endif /* HW_VFIO_VFIO_COMMON_H */ diff --git a/include/hw/virtio/virtio-input.h b/include/hw/virtio/virtio-input.h index 08f1591424..a6c9703644 100644 --- a/include/hw/virtio/virtio-input.h +++ b/include/hw/virtio/virtio-input.h @@ -84,7 +84,7 @@ struct VirtIOInputHID { VirtIOInput parent_obj; char *display; uint32_t head; - QemuInputHandler *handler; + const QemuInputHandler *handler; QemuInputHandlerState *hs; int ledstate; bool wheel_axis; diff --git a/include/migration/blocker.h b/include/migration/blocker.h index 9cebe2ba06..b048f301b4 100644 --- a/include/migration/blocker.h +++ b/include/migration/blocker.h @@ -17,19 +17,23 @@ /** * @migrate_add_blocker - prevent migration from proceeding * - * @reason - an error to be returned whenever migration is attempted + * @reasonp - address of an error to be returned whenever migration is attempted * * @errp - [out] The reason (if any) we cannot block migration right now. * * @returns - 0 on success, -EBUSY/-EACCES on failure, with errp set. + * + * *@reasonp is freed and set to NULL if failure is returned. + * On success, the caller must not free @reasonp, except by + * calling migrate_del_blocker. */ -int migrate_add_blocker(Error *reason, Error **errp); +int migrate_add_blocker(Error **reasonp, Error **errp); /** * @migrate_add_blocker_internal - prevent migration from proceeding without * only-migrate implications * - * @reason - an error to be returned whenever migration is attempted + * @reasonp - address of an error to be returned whenever migration is attempted * * @errp - [out] The reason (if any) we cannot block migration right now. * @@ -38,14 +42,20 @@ int migrate_add_blocker(Error *reason, Error **errp); * Some of the migration blockers can be temporary (e.g., for a few seconds), * so it shouldn't need to conflict with "-only-migratable". For those cases, * we can call this function rather than @migrate_add_blocker(). + * + * *@reasonp is freed and set to NULL if failure is returned. + * On success, the caller must not free @reasonp, except by + * calling migrate_del_blocker. */ -int migrate_add_blocker_internal(Error *reason, Error **errp); +int migrate_add_blocker_internal(Error **reasonp, Error **errp); /** - * @migrate_del_blocker - remove a blocking error from migration + * @migrate_del_blocker - remove a blocking error from migration and free it. + * + * @reasonp - address of the error blocking migration * - * @reason - the error blocking migration + * This function frees *@reasonp and sets it to NULL. */ -void migrate_del_blocker(Error *reason); +void migrate_del_blocker(Error **reasonp); #endif diff --git a/include/migration/misc.h b/include/migration/misc.h index 7dcc0b5c2c..673ac490fb 100644 --- a/include/migration/misc.h +++ b/include/migration/misc.h @@ -60,8 +60,10 @@ void migration_object_init(void); void migration_shutdown(void); bool migration_is_idle(void); bool migration_is_active(MigrationState *); -void add_migration_state_change_notifier(Notifier *notify); -void remove_migration_state_change_notifier(Notifier *notify); +void migration_add_notifier(Notifier *notify, + void (*func)(Notifier *notifier, void *data)); +void migration_remove_notifier(Notifier *notify); +void migration_call_notifiers(MigrationState *s); bool migration_in_setup(MigrationState *); bool migration_has_finished(MigrationState *); bool migration_has_failed(MigrationState *); diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h index 1109482a00..c797f0d457 100644 --- a/include/qemu/compiler.h +++ b/include/qemu/compiler.h @@ -212,4 +212,19 @@ # define QEMU_USED #endif +/* + * Ugly CPP trick that is like "defined FOO", but also works in C + * code. Useful to replace #ifdef with "if" statements; assumes + * the symbol was defined with Meson's "config.set()", so it is empty + * if defined. + */ +#define IS_ENABLED(x) IS_EMPTY(x) + +#define IS_EMPTY_JUNK_ junk, +#define IS_EMPTY(value) IS_EMPTY_(IS_EMPTY_JUNK_##value) + +/* Expands to either SECOND_ARG(junk, 1, 0) or SECOND_ARG(IS_EMPTY_JUNK_CONFIG_FOO 1, 0) */ +#define SECOND_ARG(first, second, ...) second +#define IS_EMPTY_(junk_maybecomma) SECOND_ARG(junk_maybecomma 1, 0) + #endif /* COMPILER_H */ diff --git a/include/sysemu/memory_mapping.h b/include/sysemu/memory_mapping.h index 3bbeb1bcb4..021e0a6230 100644 --- a/include/sysemu/memory_mapping.h +++ b/include/sysemu/memory_mapping.h @@ -71,7 +71,7 @@ void guest_phys_blocks_free(GuestPhysBlockList *list); void guest_phys_blocks_init(GuestPhysBlockList *list); void guest_phys_blocks_append(GuestPhysBlockList *list); -void qemu_get_guest_memory_mapping(MemoryMappingList *list, +bool qemu_get_guest_memory_mapping(MemoryMappingList *list, const GuestPhysBlockList *guest_phys_blocks, Error **errp); diff --git a/include/ui/input.h b/include/ui/input.h index 24d8e4579e..8f9aac562e 100644 --- a/include/ui/input.h +++ b/include/ui/input.h @@ -30,7 +30,7 @@ struct QemuInputHandler { }; QemuInputHandlerState *qemu_input_handler_register(DeviceState *dev, - QemuInputHandler *handler); + const QemuInputHandler *handler); void qemu_input_handler_activate(QemuInputHandlerState *s); void qemu_input_handler_deactivate(QemuInputHandlerState *s); void qemu_input_handler_unregister(QemuInputHandlerState *s); diff --git a/linux-headers/linux/iommufd.h b/linux-headers/linux/iommufd.h new file mode 100644 index 0000000000..218bf7ac98 --- /dev/null +++ b/linux-headers/linux/iommufd.h @@ -0,0 +1,444 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. + */ +#ifndef _IOMMUFD_H +#define _IOMMUFD_H + +#include <linux/types.h> +#include <linux/ioctl.h> + +#define IOMMUFD_TYPE (';') + +/** + * DOC: General ioctl format + * + * The ioctl interface follows a general format to allow for extensibility. Each + * ioctl is passed in a structure pointer as the argument providing the size of + * the structure in the first u32. The kernel checks that any structure space + * beyond what it understands is 0. This allows userspace to use the backward + * compatible portion while consistently using the newer, larger, structures. + * + * ioctls use a standard meaning for common errnos: + * + * - ENOTTY: The IOCTL number itself is not supported at all + * - E2BIG: The IOCTL number is supported, but the provided structure has + * non-zero in a part the kernel does not understand. + * - EOPNOTSUPP: The IOCTL number is supported, and the structure is + * understood, however a known field has a value the kernel does not + * understand or support. + * - EINVAL: Everything about the IOCTL was understood, but a field is not + * correct. + * - ENOENT: An ID or IOVA provided does not exist. + * - ENOMEM: Out of memory. + * - EOVERFLOW: Mathematics overflowed. + * + * As well as additional errnos, within specific ioctls. + */ +enum { + IOMMUFD_CMD_BASE = 0x80, + IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE, + IOMMUFD_CMD_IOAS_ALLOC, + IOMMUFD_CMD_IOAS_ALLOW_IOVAS, + IOMMUFD_CMD_IOAS_COPY, + IOMMUFD_CMD_IOAS_IOVA_RANGES, + IOMMUFD_CMD_IOAS_MAP, + IOMMUFD_CMD_IOAS_UNMAP, + IOMMUFD_CMD_OPTION, + IOMMUFD_CMD_VFIO_IOAS, + IOMMUFD_CMD_HWPT_ALLOC, + IOMMUFD_CMD_GET_HW_INFO, +}; + +/** + * struct iommu_destroy - ioctl(IOMMU_DESTROY) + * @size: sizeof(struct iommu_destroy) + * @id: iommufd object ID to destroy. Can be any destroyable object type. + * + * Destroy any object held within iommufd. + */ +struct iommu_destroy { + __u32 size; + __u32 id; +}; +#define IOMMU_DESTROY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DESTROY) + +/** + * struct iommu_ioas_alloc - ioctl(IOMMU_IOAS_ALLOC) + * @size: sizeof(struct iommu_ioas_alloc) + * @flags: Must be 0 + * @out_ioas_id: Output IOAS ID for the allocated object + * + * Allocate an IO Address Space (IOAS) which holds an IO Virtual Address (IOVA) + * to memory mapping. + */ +struct iommu_ioas_alloc { + __u32 size; + __u32 flags; + __u32 out_ioas_id; +}; +#define IOMMU_IOAS_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOC) + +/** + * struct iommu_iova_range - ioctl(IOMMU_IOVA_RANGE) + * @start: First IOVA + * @last: Inclusive last IOVA + * + * An interval in IOVA space. + */ +struct iommu_iova_range { + __aligned_u64 start; + __aligned_u64 last; +}; + +/** + * struct iommu_ioas_iova_ranges - ioctl(IOMMU_IOAS_IOVA_RANGES) + * @size: sizeof(struct iommu_ioas_iova_ranges) + * @ioas_id: IOAS ID to read ranges from + * @num_iovas: Input/Output total number of ranges in the IOAS + * @__reserved: Must be 0 + * @allowed_iovas: Pointer to the output array of struct iommu_iova_range + * @out_iova_alignment: Minimum alignment required for mapping IOVA + * + * Query an IOAS for ranges of allowed IOVAs. Mapping IOVA outside these ranges + * is not allowed. num_iovas will be set to the total number of iovas and + * the allowed_iovas[] will be filled in as space permits. + * + * The allowed ranges are dependent on the HW path the DMA operation takes, and + * can change during the lifetime of the IOAS. A fresh empty IOAS will have a + * full range, and each attached device will narrow the ranges based on that + * device's HW restrictions. Detaching a device can widen the ranges. Userspace + * should query ranges after every attach/detach to know what IOVAs are valid + * for mapping. + * + * On input num_iovas is the length of the allowed_iovas array. On output it is + * the total number of iovas filled in. The ioctl will return -EMSGSIZE and set + * num_iovas to the required value if num_iovas is too small. In this case the + * caller should allocate a larger output array and re-issue the ioctl. + * + * out_iova_alignment returns the minimum IOVA alignment that can be given + * to IOMMU_IOAS_MAP/COPY. IOVA's must satisfy:: + * + * starting_iova % out_iova_alignment == 0 + * (starting_iova + length) % out_iova_alignment == 0 + * + * out_iova_alignment can be 1 indicating any IOVA is allowed. It cannot + * be higher than the system PAGE_SIZE. + */ +struct iommu_ioas_iova_ranges { + __u32 size; + __u32 ioas_id; + __u32 num_iovas; + __u32 __reserved; + __aligned_u64 allowed_iovas; + __aligned_u64 out_iova_alignment; +}; +#define IOMMU_IOAS_IOVA_RANGES _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_IOVA_RANGES) + +/** + * struct iommu_ioas_allow_iovas - ioctl(IOMMU_IOAS_ALLOW_IOVAS) + * @size: sizeof(struct iommu_ioas_allow_iovas) + * @ioas_id: IOAS ID to allow IOVAs from + * @num_iovas: Input/Output total number of ranges in the IOAS + * @__reserved: Must be 0 + * @allowed_iovas: Pointer to array of struct iommu_iova_range + * + * Ensure a range of IOVAs are always available for allocation. If this call + * succeeds then IOMMU_IOAS_IOVA_RANGES will never return a list of IOVA ranges + * that are narrower than the ranges provided here. This call will fail if + * IOMMU_IOAS_IOVA_RANGES is currently narrower than the given ranges. + * + * When an IOAS is first created the IOVA_RANGES will be maximally sized, and as + * devices are attached the IOVA will narrow based on the device restrictions. + * When an allowed range is specified any narrowing will be refused, ie device + * attachment can fail if the device requires limiting within the allowed range. + * + * Automatic IOVA allocation is also impacted by this call. MAP will only + * allocate within the allowed IOVAs if they are present. + * + * This call replaces the entire allowed list with the given list. + */ +struct iommu_ioas_allow_iovas { + __u32 size; + __u32 ioas_id; + __u32 num_iovas; + __u32 __reserved; + __aligned_u64 allowed_iovas; +}; +#define IOMMU_IOAS_ALLOW_IOVAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOW_IOVAS) + +/** + * enum iommufd_ioas_map_flags - Flags for map and copy + * @IOMMU_IOAS_MAP_FIXED_IOVA: If clear the kernel will compute an appropriate + * IOVA to place the mapping at + * @IOMMU_IOAS_MAP_WRITEABLE: DMA is allowed to write to this mapping + * @IOMMU_IOAS_MAP_READABLE: DMA is allowed to read from this mapping + */ +enum iommufd_ioas_map_flags { + IOMMU_IOAS_MAP_FIXED_IOVA = 1 << 0, + IOMMU_IOAS_MAP_WRITEABLE = 1 << 1, + IOMMU_IOAS_MAP_READABLE = 1 << 2, +}; + +/** + * struct iommu_ioas_map - ioctl(IOMMU_IOAS_MAP) + * @size: sizeof(struct iommu_ioas_map) + * @flags: Combination of enum iommufd_ioas_map_flags + * @ioas_id: IOAS ID to change the mapping of + * @__reserved: Must be 0 + * @user_va: Userspace pointer to start mapping from + * @length: Number of bytes to map + * @iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is set + * then this must be provided as input. + * + * Set an IOVA mapping from a user pointer. If FIXED_IOVA is specified then the + * mapping will be established at iova, otherwise a suitable location based on + * the reserved and allowed lists will be automatically selected and returned in + * iova. + * + * If IOMMU_IOAS_MAP_FIXED_IOVA is specified then the iova range must currently + * be unused, existing IOVA cannot be replaced. + */ +struct iommu_ioas_map { + __u32 size; + __u32 flags; + __u32 ioas_id; + __u32 __reserved; + __aligned_u64 user_va; + __aligned_u64 length; + __aligned_u64 iova; +}; +#define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP) + +/** + * struct iommu_ioas_copy - ioctl(IOMMU_IOAS_COPY) + * @size: sizeof(struct iommu_ioas_copy) + * @flags: Combination of enum iommufd_ioas_map_flags + * @dst_ioas_id: IOAS ID to change the mapping of + * @src_ioas_id: IOAS ID to copy from + * @length: Number of bytes to copy and map + * @dst_iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is + * set then this must be provided as input. + * @src_iova: IOVA to start the copy + * + * Copy an already existing mapping from src_ioas_id and establish it in + * dst_ioas_id. The src iova/length must exactly match a range used with + * IOMMU_IOAS_MAP. + * + * This may be used to efficiently clone a subset of an IOAS to another, or as a + * kind of 'cache' to speed up mapping. Copy has an efficiency advantage over + * establishing equivalent new mappings, as internal resources are shared, and + * the kernel will pin the user memory only once. + */ +struct iommu_ioas_copy { + __u32 size; + __u32 flags; + __u32 dst_ioas_id; + __u32 src_ioas_id; + __aligned_u64 length; + __aligned_u64 dst_iova; + __aligned_u64 src_iova; +}; +#define IOMMU_IOAS_COPY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_COPY) + +/** + * struct iommu_ioas_unmap - ioctl(IOMMU_IOAS_UNMAP) + * @size: sizeof(struct iommu_ioas_unmap) + * @ioas_id: IOAS ID to change the mapping of + * @iova: IOVA to start the unmapping at + * @length: Number of bytes to unmap, and return back the bytes unmapped + * + * Unmap an IOVA range. The iova/length must be a superset of a previously + * mapped range used with IOMMU_IOAS_MAP or IOMMU_IOAS_COPY. Splitting or + * truncating ranges is not allowed. The values 0 to U64_MAX will unmap + * everything. + */ +struct iommu_ioas_unmap { + __u32 size; + __u32 ioas_id; + __aligned_u64 iova; + __aligned_u64 length; +}; +#define IOMMU_IOAS_UNMAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_UNMAP) + +/** + * enum iommufd_option - ioctl(IOMMU_OPTION_RLIMIT_MODE) and + * ioctl(IOMMU_OPTION_HUGE_PAGES) + * @IOMMU_OPTION_RLIMIT_MODE: + * Change how RLIMIT_MEMLOCK accounting works. The caller must have privilege + * to invoke this. Value 0 (default) is user based accouting, 1 uses process + * based accounting. Global option, object_id must be 0 + * @IOMMU_OPTION_HUGE_PAGES: + * Value 1 (default) allows contiguous pages to be combined when generating + * iommu mappings. Value 0 disables combining, everything is mapped to + * PAGE_SIZE. This can be useful for benchmarking. This is a per-IOAS + * option, the object_id must be the IOAS ID. + */ +enum iommufd_option { + IOMMU_OPTION_RLIMIT_MODE = 0, + IOMMU_OPTION_HUGE_PAGES = 1, +}; + +/** + * enum iommufd_option_ops - ioctl(IOMMU_OPTION_OP_SET) and + * ioctl(IOMMU_OPTION_OP_GET) + * @IOMMU_OPTION_OP_SET: Set the option's value + * @IOMMU_OPTION_OP_GET: Get the option's value + */ +enum iommufd_option_ops { + IOMMU_OPTION_OP_SET = 0, + IOMMU_OPTION_OP_GET = 1, +}; + +/** + * struct iommu_option - iommu option multiplexer + * @size: sizeof(struct iommu_option) + * @option_id: One of enum iommufd_option + * @op: One of enum iommufd_option_ops + * @__reserved: Must be 0 + * @object_id: ID of the object if required + * @val64: Option value to set or value returned on get + * + * Change a simple option value. This multiplexor allows controlling options + * on objects. IOMMU_OPTION_OP_SET will load an option and IOMMU_OPTION_OP_GET + * will return the current value. + */ +struct iommu_option { + __u32 size; + __u32 option_id; + __u16 op; + __u16 __reserved; + __u32 object_id; + __aligned_u64 val64; +}; +#define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION) + +/** + * enum iommufd_vfio_ioas_op - IOMMU_VFIO_IOAS_* ioctls + * @IOMMU_VFIO_IOAS_GET: Get the current compatibility IOAS + * @IOMMU_VFIO_IOAS_SET: Change the current compatibility IOAS + * @IOMMU_VFIO_IOAS_CLEAR: Disable VFIO compatibility + */ +enum iommufd_vfio_ioas_op { + IOMMU_VFIO_IOAS_GET = 0, + IOMMU_VFIO_IOAS_SET = 1, + IOMMU_VFIO_IOAS_CLEAR = 2, +}; + +/** + * struct iommu_vfio_ioas - ioctl(IOMMU_VFIO_IOAS) + * @size: sizeof(struct iommu_vfio_ioas) + * @ioas_id: For IOMMU_VFIO_IOAS_SET the input IOAS ID to set + * For IOMMU_VFIO_IOAS_GET will output the IOAS ID + * @op: One of enum iommufd_vfio_ioas_op + * @__reserved: Must be 0 + * + * The VFIO compatibility support uses a single ioas because VFIO APIs do not + * support the ID field. Set or Get the IOAS that VFIO compatibility will use. + * When VFIO_GROUP_SET_CONTAINER is used on an iommufd it will get the + * compatibility ioas, either by taking what is already set, or auto creating + * one. From then on VFIO will continue to use that ioas and is not effected by + * this ioctl. SET or CLEAR does not destroy any auto-created IOAS. + */ +struct iommu_vfio_ioas { + __u32 size; + __u32 ioas_id; + __u16 op; + __u16 __reserved; +}; +#define IOMMU_VFIO_IOAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VFIO_IOAS) + +/** + * struct iommu_hwpt_alloc - ioctl(IOMMU_HWPT_ALLOC) + * @size: sizeof(struct iommu_hwpt_alloc) + * @flags: Must be 0 + * @dev_id: The device to allocate this HWPT for + * @pt_id: The IOAS to connect this HWPT to + * @out_hwpt_id: The ID of the new HWPT + * @__reserved: Must be 0 + * + * Explicitly allocate a hardware page table object. This is the same object + * type that is returned by iommufd_device_attach() and represents the + * underlying iommu driver's iommu_domain kernel object. + * + * A HWPT will be created with the IOVA mappings from the given IOAS. + */ +struct iommu_hwpt_alloc { + __u32 size; + __u32 flags; + __u32 dev_id; + __u32 pt_id; + __u32 out_hwpt_id; + __u32 __reserved; +}; +#define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC) + +/** + * struct iommu_hw_info_vtd - Intel VT-d hardware information + * + * @flags: Must be 0 + * @__reserved: Must be 0 + * + * @cap_reg: Value of Intel VT-d capability register defined in VT-d spec + * section 11.4.2 Capability Register. + * @ecap_reg: Value of Intel VT-d capability register defined in VT-d spec + * section 11.4.3 Extended Capability Register. + * + * User needs to understand the Intel VT-d specification to decode the + * register value. + */ +struct iommu_hw_info_vtd { + __u32 flags; + __u32 __reserved; + __aligned_u64 cap_reg; + __aligned_u64 ecap_reg; +}; + +/** + * enum iommu_hw_info_type - IOMMU Hardware Info Types + * @IOMMU_HW_INFO_TYPE_NONE: Used by the drivers that do not report hardware + * info + * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type + */ +enum iommu_hw_info_type { + IOMMU_HW_INFO_TYPE_NONE, + IOMMU_HW_INFO_TYPE_INTEL_VTD, +}; + +/** + * struct iommu_hw_info - ioctl(IOMMU_GET_HW_INFO) + * @size: sizeof(struct iommu_hw_info) + * @flags: Must be 0 + * @dev_id: The device bound to the iommufd + * @data_len: Input the length of a user buffer in bytes. Output the length of + * data that kernel supports + * @data_uptr: User pointer to a user-space buffer used by the kernel to fill + * the iommu type specific hardware information data + * @out_data_type: Output the iommu hardware info type as defined in the enum + * iommu_hw_info_type. + * @__reserved: Must be 0 + * + * Query an iommu type specific hardware information data from an iommu behind + * a given device that has been bound to iommufd. This hardware info data will + * be used to sync capabilities between the virtual iommu and the physical + * iommu, e.g. a nested translation setup needs to check the hardware info, so + * a guest stage-1 page table can be compatible with the physical iommu. + * + * To capture an iommu type specific hardware information data, @data_uptr and + * its length @data_len must be provided. Trailing bytes will be zeroed if the + * user buffer is larger than the data that kernel has. Otherwise, kernel only + * fills the buffer using the given length in @data_len. If the ioctl succeeds, + * @data_len will be updated to the length that kernel actually supports, + * @out_data_type will be filled to decode the data filled in the buffer + * pointed by @data_uptr. Input @data_len == zero is allowed. + */ +struct iommu_hw_info { + __u32 size; + __u32 flags; + __u32 dev_id; + __u32 data_len; + __aligned_u64 data_uptr; + __u32 out_data_type; + __u32 __reserved; +}; +#define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO) +#endif diff --git a/linux-user/elfload.c b/linux-user/elfload.c index f21e2e0c3d..2e3809f03c 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -1237,6 +1237,14 @@ static uint32_t get_elf_hwcap(void) hwcaps |= HWCAP_LOONGARCH_LAM; } + if (FIELD_EX32(cpu->env.cpucfg[2], CPUCFG2, LSX)) { + hwcaps |= HWCAP_LOONGARCH_LSX; + } + + if (FIELD_EX32(cpu->env.cpucfg[2], CPUCFG2, LASX)) { + hwcaps |= HWCAP_LOONGARCH_LASX; + } + return hwcaps; } @@ -2362,31 +2370,58 @@ static abi_ulong setup_arg_pages(struct linux_binprm *bprm, * Map and zero the bss. We need to explicitly zero any fractional pages * after the data section (i.e. bss). Return false on mapping failure. */ -static bool zero_bss(abi_ulong start_bss, abi_ulong end_bss, int prot) +static bool zero_bss(abi_ulong start_bss, abi_ulong end_bss, + int prot, Error **errp) { abi_ulong align_bss; + /* We only expect writable bss; the code segment shouldn't need this. */ + if (!(prot & PROT_WRITE)) { + error_setg(errp, "PT_LOAD with non-writable bss"); + return false; + } + align_bss = TARGET_PAGE_ALIGN(start_bss); end_bss = TARGET_PAGE_ALIGN(end_bss); if (start_bss < align_bss) { int flags = page_get_flags(start_bss); - if (!(flags & PAGE_VALID)) { - /* Map the start of the bss. */ + if (!(flags & PAGE_BITS)) { + /* + * The whole address space of the executable was reserved + * at the start, therefore all pages will be VALID. + * But assuming there are no PROT_NONE PT_LOAD segments, + * a PROT_NONE page means no data all bss, and we can + * simply extend the new anon mapping back to the start + * of the page of bss. + */ align_bss -= TARGET_PAGE_SIZE; - } else if (flags & PAGE_WRITE) { - /* The page is already mapped writable. */ - memset(g2h_untagged(start_bss), 0, align_bss - start_bss); } else { - /* Read-only zeros? */ - g_assert_not_reached(); + /* + * The start of the bss shares a page with something. + * The only thing that we expect is the data section, + * which would already be marked writable. + * Overlapping the RX code segment seems malformed. + */ + if (!(flags & PAGE_WRITE)) { + error_setg(errp, "PT_LOAD with bss overlapping " + "non-writable page"); + return false; + } + + /* The page is already mapped and writable. */ + memset(g2h_untagged(start_bss), 0, align_bss - start_bss); } } - return align_bss >= end_bss || - target_mmap(align_bss, end_bss - align_bss, prot, - MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0) != -1; + if (align_bss < end_bss && + target_mmap(align_bss, end_bss - align_bss, prot, + MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0) == -1) { + error_setg_errno(errp, errno, "Error mapping bss"); + return false; + } + return true; } #if defined(TARGET_ARM) @@ -3410,8 +3445,8 @@ static void load_elf_image(const char *image_name, int image_fd, /* If the load segment requests extra zeros (e.g. bss), map it. */ if (vaddr_ef < vaddr_em && - !zero_bss(vaddr_ef, vaddr_em, elf_prot)) { - goto exit_mmap; + !zero_bss(vaddr_ef, vaddr_em, elf_prot, &err)) { + goto exit_errmsg; } /* Find the full program boundaries. */ diff --git a/linux-user/mips/cpu_loop.c b/linux-user/mips/cpu_loop.c index 8735e58bad..990b03e727 100644 --- a/linux-user/mips/cpu_loop.c +++ b/linux-user/mips/cpu_loop.c @@ -180,7 +180,9 @@ done_syscall: } force_sig_fault(TARGET_SIGFPE, si_code, env->active_tc.PC); break; - + case EXCP_OVERFLOW: + force_sig_fault(TARGET_SIGFPE, TARGET_FPE_INTOVF, env->active_tc.PC); + break; /* The code below was inspired by the MIPS Linux kernel trap * handling code in arch/mips/kernel/traps.c. */ diff --git a/linux-user/mmap.c b/linux-user/mmap.c index 8ccaab7859..7b44b9ff49 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -778,7 +778,7 @@ fail: return -1; } -static void mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) +static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) { abi_ulong real_start; abi_ulong real_last; @@ -807,7 +807,7 @@ static void mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) prot |= page_get_flags(a + 1); } if (prot != 0) { - return; + return 0; } } else { for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) { @@ -825,7 +825,7 @@ static void mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) } if (real_last < real_start) { - return; + return 0; } } @@ -836,32 +836,36 @@ static void mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) void *ptr = mmap(host_start, real_len, PROT_NONE, MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0); - assert(ptr == host_start); - } else { - int ret = munmap(host_start, real_len); - assert(ret == 0); + return ptr == host_start ? 0 : -1; } + return munmap(host_start, real_len); } int target_munmap(abi_ulong start, abi_ulong len) { + int ret; + trace_target_munmap(start, len); if (start & ~TARGET_PAGE_MASK) { - return -TARGET_EINVAL; + errno = EINVAL; + return -1; } len = TARGET_PAGE_ALIGN(len); if (len == 0 || !guest_range_valid_untagged(start, len)) { - return -TARGET_EINVAL; + errno = EINVAL; + return -1; } mmap_lock(); - mmap_reserve_or_unmap(start, len); - page_set_flags(start, start + len - 1, 0); - shm_region_rm_complete(start, start + len - 1); + ret = mmap_reserve_or_unmap(start, len); + if (likely(ret == 0)) { + page_set_flags(start, start + len - 1, 0); + shm_region_rm_complete(start, start + len - 1); + } mmap_unlock(); - return 0; + return ret; } abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, diff --git a/linux-user/sh4/signal.c b/linux-user/sh4/signal.c index c4ba962708..c16c2c2d57 100644 --- a/linux-user/sh4/signal.c +++ b/linux-user/sh4/signal.c @@ -104,6 +104,14 @@ static void unwind_gusa(CPUSH4State *regs) /* Reset the SP to the saved version in R1. */ regs->gregs[15] = regs->gregs[1]; + } else if (regs->gregs[15] >= -128u && regs->pc == regs->gregs[0]) { + /* If we are on the last instruction of a gUSA region, we must reset + the SP, otherwise we would be pushing the signal context to + invalid memory. */ + regs->gregs[15] = regs->gregs[1]; + } else if (regs->flags & TB_FLAG_DELAY_SLOT) { + /* If we are in a delay slot, push the previous instruction. */ + regs->pc -= 2; } } diff --git a/linux-user/signal.c b/linux-user/signal.c index a67ab47d30..3b8efec89f 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -32,6 +32,7 @@ #include "signal-common.h" #include "host-signal.h" #include "user/safe-syscall.h" +#include "tcg/tcg.h" static struct target_sigaction sigact_table[TARGET_NSIG]; @@ -43,9 +44,8 @@ abi_ulong default_sigreturn; abi_ulong default_rt_sigreturn; /* - * System includes define _NSIG as SIGRTMAX + 1, - * but qemu (like the kernel) defines TARGET_NSIG as TARGET_SIGRTMAX - * and the first signal is SIGHUP defined as 1 + * System includes define _NSIG as SIGRTMAX + 1, but qemu (like the kernel) + * defines TARGET_NSIG as TARGET_SIGRTMAX and the first signal is 1. * Signal number 0 is reserved for use as kill(pid, 0), to test whether * a process exists without sending it a signal. */ @@ -56,7 +56,6 @@ static uint8_t host_to_target_signal_table[_NSIG] = { #define MAKE_SIG_ENTRY(sig) [sig] = TARGET_##sig, MAKE_SIGNAL_LIST #undef MAKE_SIG_ENTRY - /* next signals stay the same */ }; static uint8_t target_to_host_signal_table[TARGET_NSIG + 1]; @@ -64,18 +63,24 @@ static uint8_t target_to_host_signal_table[TARGET_NSIG + 1]; /* valid sig is between 1 and _NSIG - 1 */ int host_to_target_signal(int sig) { - if (sig < 1 || sig >= _NSIG) { + if (sig < 1) { return sig; } + if (sig >= _NSIG) { + return TARGET_NSIG + 1; + } return host_to_target_signal_table[sig]; } /* valid sig is between 1 and TARGET_NSIG */ int target_to_host_signal(int sig) { - if (sig < 1 || sig > TARGET_NSIG) { + if (sig < 1) { return sig; } + if (sig > TARGET_NSIG) { + return _NSIG; + } return target_to_host_signal_table[sig]; } @@ -487,26 +492,6 @@ void target_to_host_siginfo(siginfo_t *info, const target_siginfo_t *tinfo) info->si_value.sival_ptr = (void *)(long)sival_ptr; } -static int fatal_signal (int sig) -{ - switch (sig) { - case TARGET_SIGCHLD: - case TARGET_SIGURG: - case TARGET_SIGWINCH: - /* Ignored by default. */ - return 0; - case TARGET_SIGCONT: - case TARGET_SIGSTOP: - case TARGET_SIGTSTP: - case TARGET_SIGTTIN: - case TARGET_SIGTTOU: - /* Job control signals. */ - return 0; - default: - return 1; - } -} - /* returns 1 if given signal should dump core if not handled */ static int core_dump_signal(int sig) { @@ -526,57 +511,69 @@ static int core_dump_signal(int sig) static void signal_table_init(void) { - int host_sig, target_sig, count; + int hsig, tsig, count; /* * Signals are supported starting from TARGET_SIGRTMIN and going up - * until we run out of host realtime signals. - * glibc at least uses only the lower 2 rt signals and probably - * nobody's using the upper ones. - * it's why SIGRTMIN (34) is generally greater than __SIGRTMIN (32) - * To fix this properly we need to do manual signal delivery multiplexed - * over a single host signal. + * until we run out of host realtime signals. Glibc uses the lower 2 + * RT signals and (hopefully) nobody uses the upper ones. + * This is why SIGRTMIN (34) is generally greater than __SIGRTMIN (32). + * To fix this properly we would need to do manual signal delivery + * multiplexed over a single host signal. * Attempts for configure "missing" signals via sigaction will be * silently ignored. + * + * Remap the target SIGABRT, so that we can distinguish host abort + * from guest abort. When the guest registers a signal handler or + * calls raise(SIGABRT), the host will raise SIG_RTn. If the guest + * arrives at dump_core_and_abort(), we will map back to host SIGABRT + * so that the parent (native or emulated) sees the correct signal. + * Finally, also map host to guest SIGABRT so that the emulated + * parent sees the correct mapping from wait status. */ - for (host_sig = SIGRTMIN; host_sig <= SIGRTMAX; host_sig++) { - target_sig = host_sig - SIGRTMIN + TARGET_SIGRTMIN; - if (target_sig <= TARGET_NSIG) { - host_to_target_signal_table[host_sig] = target_sig; + + hsig = SIGRTMIN; + host_to_target_signal_table[SIGABRT] = 0; + host_to_target_signal_table[hsig++] = TARGET_SIGABRT; + + for (; hsig <= SIGRTMAX; hsig++) { + tsig = hsig - SIGRTMIN + TARGET_SIGRTMIN; + if (tsig <= TARGET_NSIG) { + host_to_target_signal_table[hsig] = tsig; } } - /* generate signal conversion tables */ - for (target_sig = 1; target_sig <= TARGET_NSIG; target_sig++) { - target_to_host_signal_table[target_sig] = _NSIG; /* poison */ - } - for (host_sig = 1; host_sig < _NSIG; host_sig++) { - if (host_to_target_signal_table[host_sig] == 0) { - host_to_target_signal_table[host_sig] = host_sig; - } - target_sig = host_to_target_signal_table[host_sig]; - if (target_sig <= TARGET_NSIG) { - target_to_host_signal_table[target_sig] = host_sig; + /* Invert the mapping that has already been assigned. */ + for (hsig = 1; hsig < _NSIG; hsig++) { + tsig = host_to_target_signal_table[hsig]; + if (tsig) { + assert(target_to_host_signal_table[tsig] == 0); + target_to_host_signal_table[tsig] = hsig; } } - if (trace_event_get_state_backends(TRACE_SIGNAL_TABLE_INIT)) { - for (target_sig = 1, count = 0; target_sig <= TARGET_NSIG; target_sig++) { - if (target_to_host_signal_table[target_sig] == _NSIG) { - count++; - } + host_to_target_signal_table[SIGABRT] = TARGET_SIGABRT; + + /* Map everything else out-of-bounds. */ + for (hsig = 1; hsig < _NSIG; hsig++) { + if (host_to_target_signal_table[hsig] == 0) { + host_to_target_signal_table[hsig] = TARGET_NSIG + 1; } - trace_signal_table_init(count); } + for (count = 0, tsig = 1; tsig <= TARGET_NSIG; tsig++) { + if (target_to_host_signal_table[tsig] == 0) { + target_to_host_signal_table[tsig] = _NSIG; + count++; + } + } + + trace_signal_table_init(count); } void signal_init(void) { TaskState *ts = (TaskState *)thread_cpu->opaque; - struct sigaction act; - struct sigaction oact; - int i; - int host_sig; + struct sigaction act, oact; /* initialize signal conversion tables */ signal_table_init(); @@ -587,22 +584,36 @@ void signal_init(void) sigfillset(&act.sa_mask); act.sa_flags = SA_SIGINFO; act.sa_sigaction = host_signal_handler; - for(i = 1; i <= TARGET_NSIG; i++) { - host_sig = target_to_host_signal(i); - sigaction(host_sig, NULL, &oact); - if (oact.sa_sigaction == (void *)SIG_IGN) { - sigact_table[i - 1]._sa_handler = TARGET_SIG_IGN; - } else if (oact.sa_sigaction == (void *)SIG_DFL) { - sigact_table[i - 1]._sa_handler = TARGET_SIG_DFL; + + /* + * A parent process may configure ignored signals, but all other + * signals are default. For any target signals that have no host + * mapping, set to ignore. For all core_dump_signal, install our + * host signal handler so that we may invoke dump_core_and_abort. + * This includes SIGSEGV and SIGBUS, which are also need our signal + * handler for paging and exceptions. + */ + for (int tsig = 1; tsig <= TARGET_NSIG; tsig++) { + int hsig = target_to_host_signal(tsig); + abi_ptr thand = TARGET_SIG_IGN; + + if (hsig >= _NSIG) { + continue; } - /* If there's already a handler installed then something has - gone horribly wrong, so don't even try to handle that case. */ - /* Install some handlers for our own use. We need at least - SIGSEGV and SIGBUS, to detect exceptions. We can not just - trap all signals because it affects syscall interrupt - behavior. But do trap all default-fatal signals. */ - if (fatal_signal (i)) - sigaction(host_sig, &act, NULL); + + /* As we force remap SIGABRT, cannot probe and install in one step. */ + if (tsig == TARGET_SIGABRT) { + sigaction(SIGABRT, NULL, &oact); + sigaction(hsig, &act, NULL); + } else { + struct sigaction *iact = core_dump_signal(tsig) ? &act : NULL; + sigaction(hsig, iact, &oact); + } + + if (oact.sa_sigaction != (void *)SIG_IGN) { + thand = TARGET_SIG_DFL; + } + sigact_table[tsig - 1]._sa_handler = thand; } } @@ -690,14 +701,45 @@ void cpu_loop_exit_sigbus(CPUState *cpu, target_ulong addr, /* abort execution with signal */ static G_NORETURN +void die_with_signal(int host_sig) +{ + struct sigaction act = { + .sa_handler = SIG_DFL, + }; + + /* + * The proper exit code for dying from an uncaught signal is -<signal>. + * The kernel doesn't allow exit() or _exit() to pass a negative value. + * To get the proper exit code we need to actually die from an uncaught + * signal. Here the default signal handler is installed, we send + * the signal and we wait for it to arrive. + */ + sigfillset(&act.sa_mask); + sigaction(host_sig, &act, NULL); + + kill(getpid(), host_sig); + + /* Make sure the signal isn't masked (reusing the mask inside of act). */ + sigdelset(&act.sa_mask, host_sig); + sigsuspend(&act.sa_mask); + + /* unreachable */ + _exit(EXIT_FAILURE); +} + +static G_NORETURN void dump_core_and_abort(CPUArchState *env, int target_sig) { CPUState *cpu = env_cpu(env); TaskState *ts = (TaskState *)cpu->opaque; int host_sig, core_dumped = 0; - struct sigaction act; - host_sig = target_to_host_signal(target_sig); + /* On exit, undo the remapping of SIGABRT. */ + if (target_sig == TARGET_SIGABRT) { + host_sig = SIGABRT; + } else { + host_sig = target_to_host_signal(target_sig); + } trace_user_dump_core_and_abort(env, target_sig, host_sig); gdb_signalled(env, target_sig); @@ -719,29 +761,7 @@ void dump_core_and_abort(CPUArchState *env, int target_sig) } preexit_cleanup(env, 128 + target_sig); - - /* The proper exit code for dying from an uncaught signal is - * -<signal>. The kernel doesn't allow exit() or _exit() to pass - * a negative value. To get the proper exit code we need to - * actually die from an uncaught signal. Here the default signal - * handler is installed, we send ourself a signal and we wait for - * it to arrive. */ - sigfillset(&act.sa_mask); - act.sa_handler = SIG_DFL; - act.sa_flags = 0; - sigaction(host_sig, &act, NULL); - - /* For some reason raise(host_sig) doesn't send the signal when - * statically linked on x86-64. */ - kill(getpid(), host_sig); - - /* Make sure the signal isn't masked (just reuse the mask inside - of act) */ - sigdelset(&act.sa_mask, host_sig); - sigsuspend(&act.sa_mask); - - /* unreachable */ - abort(); + die_with_signal(host_sig); } /* queue a signal so that it will be send to the virtual CPU as soon @@ -775,6 +795,161 @@ static inline void rewind_if_in_safe_syscall(void *puc) } } +static G_NORETURN +void die_from_signal(siginfo_t *info) +{ + char sigbuf[4], codebuf[12]; + const char *sig, *code = NULL; + + switch (info->si_signo) { + case SIGSEGV: + sig = "SEGV"; + switch (info->si_code) { + case SEGV_MAPERR: + code = "MAPERR"; + break; + case SEGV_ACCERR: + code = "ACCERR"; + break; + } + break; + case SIGBUS: + sig = "BUS"; + switch (info->si_code) { + case BUS_ADRALN: + code = "ADRALN"; + break; + case BUS_ADRERR: + code = "ADRERR"; + break; + } + break; + case SIGILL: + sig = "ILL"; + switch (info->si_code) { + case ILL_ILLOPC: + code = "ILLOPC"; + break; + case ILL_ILLOPN: + code = "ILLOPN"; + break; + case ILL_ILLADR: + code = "ILLADR"; + break; + case ILL_PRVOPC: + code = "PRVOPC"; + break; + case ILL_PRVREG: + code = "PRVREG"; + break; + case ILL_COPROC: + code = "COPROC"; + break; + } + break; + case SIGFPE: + sig = "FPE"; + switch (info->si_code) { + case FPE_INTDIV: + code = "INTDIV"; + break; + case FPE_INTOVF: + code = "INTOVF"; + break; + } + break; + case SIGTRAP: + sig = "TRAP"; + break; + default: + snprintf(sigbuf, sizeof(sigbuf), "%d", info->si_signo); + sig = sigbuf; + break; + } + if (code == NULL) { + snprintf(codebuf, sizeof(sigbuf), "%d", info->si_code); + code = codebuf; + } + + error_report("QEMU internal SIG%s {code=%s, addr=%p}", + sig, code, info->si_addr); + die_with_signal(info->si_signo); +} + +static void host_sigsegv_handler(CPUState *cpu, siginfo_t *info, + host_sigcontext *uc) +{ + uintptr_t host_addr = (uintptr_t)info->si_addr; + /* + * Convert forcefully to guest address space: addresses outside + * reserved_va are still valid to report via SEGV_MAPERR. + */ + bool is_valid = h2g_valid(host_addr); + abi_ptr guest_addr = h2g_nocheck(host_addr); + uintptr_t pc = host_signal_pc(uc); + bool is_write = host_signal_write(info, uc); + MMUAccessType access_type = adjust_signal_pc(&pc, is_write); + bool maperr; + + /* If this was a write to a TB protected page, restart. */ + if (is_write + && is_valid + && info->si_code == SEGV_ACCERR + && handle_sigsegv_accerr_write(cpu, host_signal_mask(uc), + pc, guest_addr)) { + return; + } + + /* + * If the access was not on behalf of the guest, within the executable + * mapping of the generated code buffer, then it is a host bug. + */ + if (access_type != MMU_INST_FETCH + && !in_code_gen_buffer((void *)(pc - tcg_splitwx_diff))) { + die_from_signal(info); + } + + maperr = true; + if (is_valid && info->si_code == SEGV_ACCERR) { + /* + * With reserved_va, the whole address space is PROT_NONE, + * which means that we may get ACCERR when we want MAPERR. + */ + if (page_get_flags(guest_addr) & PAGE_VALID) { + maperr = false; + } else { + info->si_code = SEGV_MAPERR; + } + } + + sigprocmask(SIG_SETMASK, host_signal_mask(uc), NULL); + cpu_loop_exit_sigsegv(cpu, guest_addr, access_type, maperr, pc); +} + +static void host_sigbus_handler(CPUState *cpu, siginfo_t *info, + host_sigcontext *uc) +{ + uintptr_t pc = host_signal_pc(uc); + bool is_write = host_signal_write(info, uc); + MMUAccessType access_type = adjust_signal_pc(&pc, is_write); + + /* + * If the access was not on behalf of the guest, within the executable + * mapping of the generated code buffer, then it is a host bug. + */ + if (!in_code_gen_buffer((void *)(pc - tcg_splitwx_diff))) { + die_from_signal(info); + } + + if (info->si_code == BUS_ADRALN) { + uintptr_t host_addr = (uintptr_t)info->si_addr; + abi_ptr guest_addr = h2g_nocheck(host_addr); + + sigprocmask(SIG_SETMASK, host_signal_mask(uc), NULL); + cpu_loop_exit_sigbus(cpu, guest_addr, access_type, pc); + } +} + static void host_signal_handler(int host_sig, siginfo_t *info, void *puc) { CPUState *cpu = thread_cpu; @@ -786,61 +961,28 @@ static void host_signal_handler(int host_sig, siginfo_t *info, void *puc) int guest_sig; uintptr_t pc = 0; bool sync_sig = false; - void *sigmask = host_signal_mask(uc); + void *sigmask; /* * Non-spoofed SIGSEGV and SIGBUS are synchronous, and need special - * handling wrt signal blocking and unwinding. + * handling wrt signal blocking and unwinding. Non-spoofed SIGILL, + * SIGFPE, SIGTRAP are always host bugs. */ - if ((host_sig == SIGSEGV || host_sig == SIGBUS) && info->si_code > 0) { - MMUAccessType access_type; - uintptr_t host_addr; - abi_ptr guest_addr; - bool is_write; - - host_addr = (uintptr_t)info->si_addr; - - /* - * Convert forcefully to guest address space: addresses outside - * reserved_va are still valid to report via SEGV_MAPERR. - */ - guest_addr = h2g_nocheck(host_addr); - - pc = host_signal_pc(uc); - is_write = host_signal_write(info, uc); - access_type = adjust_signal_pc(&pc, is_write); - - if (host_sig == SIGSEGV) { - bool maperr = true; - - if (info->si_code == SEGV_ACCERR && h2g_valid(host_addr)) { - /* If this was a write to a TB protected page, restart. */ - if (is_write && - handle_sigsegv_accerr_write(cpu, sigmask, pc, guest_addr)) { - return; - } - - /* - * With reserved_va, the whole address space is PROT_NONE, - * which means that we may get ACCERR when we want MAPERR. - */ - if (page_get_flags(guest_addr) & PAGE_VALID) { - maperr = false; - } else { - info->si_code = SEGV_MAPERR; - } - } - - sigprocmask(SIG_SETMASK, sigmask, NULL); - cpu_loop_exit_sigsegv(cpu, guest_addr, access_type, maperr, pc); - } else { - sigprocmask(SIG_SETMASK, sigmask, NULL); - if (info->si_code == BUS_ADRALN) { - cpu_loop_exit_sigbus(cpu, guest_addr, access_type, pc); - } + if (info->si_code > 0) { + switch (host_sig) { + case SIGSEGV: + /* Only returns on handle_sigsegv_accerr_write success. */ + host_sigsegv_handler(cpu, info, uc); + return; + case SIGBUS: + host_sigbus_handler(cpu, info, uc); + sync_sig = true; + break; + case SIGILL: + case SIGFPE: + case SIGTRAP: + die_from_signal(info); } - - sync_sig = true; } /* get target signal number */ @@ -881,6 +1023,7 @@ static void host_signal_handler(int host_sig, siginfo_t *info, void *puc) * would write 0xff bytes off the end of the structure and trash * data on the struct. */ + sigmask = host_signal_mask(uc); memset(sigmask, 0xff, SIGSET_T_SIZE); sigdelset(sigmask, SIGSEGV); sigdelset(sigmask, SIGBUS); @@ -936,7 +1079,6 @@ int do_sigaction(int sig, const struct target_sigaction *act, struct target_sigaction *oact, abi_ulong ka_restorer) { struct target_sigaction *k; - struct sigaction act1; int host_sig; int ret = 0; @@ -996,22 +1138,27 @@ int do_sigaction(int sig, const struct target_sigaction *act, return 0; } if (host_sig != SIGSEGV && host_sig != SIGBUS) { + struct sigaction act1; + sigfillset(&act1.sa_mask); act1.sa_flags = SA_SIGINFO; - if (k->sa_flags & TARGET_SA_RESTART) - act1.sa_flags |= SA_RESTART; - /* NOTE: it is important to update the host kernel signal - ignore state to avoid getting unexpected interrupted - syscalls */ if (k->_sa_handler == TARGET_SIG_IGN) { + /* + * It is important to update the host kernel signal ignore + * state to avoid getting unexpected interrupted syscalls. + */ act1.sa_sigaction = (void *)SIG_IGN; } else if (k->_sa_handler == TARGET_SIG_DFL) { - if (fatal_signal (sig)) + if (core_dump_signal(sig)) { act1.sa_sigaction = host_signal_handler; - else + } else { act1.sa_sigaction = (void *)SIG_DFL; + } } else { act1.sa_sigaction = host_signal_handler; + if (k->sa_flags & TARGET_SA_RESTART) { + act1.sa_flags |= SA_RESTART; + } } ret = sigaction(host_sig, &act1, NULL); } diff --git a/meson.build b/meson.build index cbee764817..dcef8b1e79 100644 --- a/meson.build +++ b/meson.build @@ -2117,6 +2117,7 @@ config_host_data.set('CONFIG_OPENGL', opengl.found()) config_host_data.set('CONFIG_PLUGIN', get_option('plugins')) config_host_data.set('CONFIG_RBD', rbd.found()) config_host_data.set('CONFIG_RDMA', rdma.found()) +config_host_data.set('CONFIG_RELOCATABLE', get_option('relocatable')) config_host_data.set('CONFIG_SAFESTACK', get_option('safe_stack')) config_host_data.set('CONFIG_SDL', sdl.found()) config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found()) @@ -2201,7 +2202,7 @@ if get_option('debug_stack_usage') and have_coroutine_pool message('Disabling coroutine pool to measure stack usage') have_coroutine_pool = false endif -config_host_data.set10('CONFIG_COROUTINE_POOL', have_coroutine_pool) +config_host_data.set('CONFIG_COROUTINE_POOL', have_coroutine_pool) config_host_data.set('CONFIG_DEBUG_GRAPH_LOCK', get_option('debug_graph_lock')) config_host_data.set('CONFIG_DEBUG_MUTEX', get_option('debug_mutex')) config_host_data.set('CONFIG_DEBUG_STACK_USAGE', get_option('debug_stack_usage')) @@ -4027,8 +4028,13 @@ summary(summary_info, bool_yn: true, section: 'Directories') summary_info = {} summary_info += {'python': '@0@ (version: @1@)'.format(python.full_path(), python.language_version())} summary_info += {'sphinx-build': sphinx_build} -if config_host.has_key('HAVE_GDB_BIN') - summary_info += {'gdb': config_host['HAVE_GDB_BIN']} + +# FIXME: the [binaries] section of machine files, which can be probed +# with find_program(), would be great for passing gdb and genisoimage +# paths from configure to Meson. However, there seems to be no way to +# hide a program (for example if gdb is too old). +if config_host.has_key('GDB') + summary_info += {'gdb': config_host['GDB']} endif summary_info += {'iasl': iasl} summary_info += {'genisoimage': config_host['GENISOIMAGE']} @@ -4061,6 +4067,7 @@ if 'simple' in get_option('trace_backends') endif summary_info += {'D-Bus display': dbus_display} summary_info += {'QOM debugging': get_option('qom_cast_debug')} +summary_info += {'Relocatable install': get_option('relocatable')} summary_info += {'vhost-kernel support': have_vhost_kernel} summary_info += {'vhost-net support': have_vhost_net} summary_info += {'vhost-user support': have_vhost_user} @@ -4081,8 +4088,10 @@ if 'cpp' in all_languages else summary_info += {'C++ compiler': false} endif -if targetos == 'darwin' +if 'objc' in all_languages summary_info += {'Objective-C compiler': ' '.join(meson.get_compiler('objc').cmd_array())} +else + summary_info += {'Objective-C compiler': false} endif option_cflags = (get_option('debug') ? ['-g'] : []) if get_option('optimization') != 'plain' @@ -4092,7 +4101,7 @@ summary_info += {'CFLAGS': ' '.join(get_option('c_args') + option_cfl if 'cpp' in all_languages summary_info += {'CXXFLAGS': ' '.join(get_option('cpp_args') + option_cflags)} endif -if targetos == 'darwin' +if 'objc' in all_languages summary_info += {'OBJCFLAGS': ' '.join(get_option('objc_args') + option_cflags)} endif link_args = get_option('c_link_args') @@ -4364,3 +4373,21 @@ if host_arch == 'unknown' or not supported_oses.contains(targetos) message('If you want to help supporting QEMU on this platform, please') message('contact the developers at qemu-devel@nongnu.org.') endif + +actually_reloc = get_option('relocatable') +# check if get_relocated_path() is actually able to relocate paths +if get_option('relocatable') and \ + not (get_option('prefix') / get_option('bindir')).startswith(get_option('prefix') / '') + message() + warning('bindir not included within prefix, the installation will not be relocatable.') + actually_reloc = false +endif +if not actually_reloc and (targetos == 'windows' or get_option('relocatable')) + if targetos == 'windows' + message() + warning('Windows installs should usually be relocatable.') + endif + message() + message('QEMU will have to be installed under ' + get_option('prefix') + '.') + message('Use --disable-relocatable to remove this warning.') +endif diff --git a/meson_options.txt b/meson_options.txt index e49309dd78..3c7398f3c6 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -101,6 +101,8 @@ option('cfi_debug', type: 'boolean', value: false, description: 'Verbose errors in case of CFI violation') option('multiprocess', type: 'feature', value: 'auto', description: 'Out of process device emulation support') +option('relocatable', type : 'boolean', value : 'true', + description: 'toggle relocatable install') option('vfio_user_server', type: 'feature', value: 'disabled', description: 'vfio-user server support') option('dbus_display', type: 'feature', value: 'auto', @@ -353,5 +355,12 @@ option('qom_cast_debug', type: 'boolean', value: true, option('slirp_smbd', type : 'feature', value : 'auto', description: 'use smbd (at path --smbd=*) in slirp networking') +option('qemu_ga_manufacturer', type: 'string', value: 'QEMU', + description: '"manufacturer" name for qemu-ga registry entries') +option('qemu_ga_distro', type: 'string', value: 'Linux', + description: 'second path element in qemu-ga registry entries') +option('qemu_ga_version', type: 'string', value: '', + description: 'version number for qemu-ga installer') + option('hexagon_idef_parser', type : 'boolean', value : true, description: 'use idef-parser to automatically generate TCG code for the Hexagon frontend') diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c index d206700a43..a82597f18e 100644 --- a/migration/migration-hmp-cmds.c +++ b/migration/migration-hmp-cmds.c @@ -30,6 +30,7 @@ #include "sysemu/runstate.h" #include "ui/qemu-spice.h" #include "sysemu/sysemu.h" +#include "options.h" #include "migration.h" static void migration_global_dump(Monitor *mon) @@ -696,7 +697,6 @@ void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict) typedef struct HMPMigrationStatus { QEMUTimer *timer; Monitor *mon; - bool is_block_migration; } HMPMigrationStatus; static void hmp_migrate_status_cb(void *opaque) @@ -722,7 +722,7 @@ static void hmp_migrate_status_cb(void *opaque) timer_mod(status->timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000); } else { - if (status->is_block_migration) { + if (migrate_block()) { monitor_printf(status->mon, "\n"); } if (info->error_desc) { @@ -762,7 +762,6 @@ void hmp_migrate(Monitor *mon, const QDict *qdict) status = g_malloc0(sizeof(*status)); status->mon = mon; - status->is_block_migration = blk || inc; status->timer = timer_new_ms(QEMU_CLOCK_REALTIME, hmp_migrate_status_cb, status); timer_mod(status->timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME)); diff --git a/migration/migration.c b/migration/migration.c index 6ba5e145ac..67547eb6a1 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -447,6 +447,18 @@ static void qemu_start_incoming_migration(const char *uri, Error **errp) socket_start_incoming_migration(p ? p : uri, errp); #ifdef CONFIG_RDMA } else if (strstart(uri, "rdma:", &p)) { + if (migrate_compress()) { + error_setg(errp, "RDMA and compression can't be used together"); + return; + } + if (migrate_xbzrle()) { + error_setg(errp, "RDMA and XBZRLE can't be used together"); + return; + } + if (migrate_multifd()) { + error_setg(errp, "RDMA and multifd can't be used together"); + return; + } rdma_start_incoming_migration(p, errp); #endif } else if (strstart(uri, "exec:", &p)) { @@ -962,16 +974,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) info->xbzrle_cache->overflow = xbzrle_counters.overflow; } - if (migrate_compress()) { - info->compression = g_malloc0(sizeof(*info->compression)); - info->compression->pages = compression_counters.pages; - info->compression->busy = compression_counters.busy; - info->compression->busy_rate = compression_counters.busy_rate; - info->compression->compressed_size = - compression_counters.compressed_size; - info->compression->compression_rate = - compression_counters.compression_rate; - } + populate_compress(info); if (cpu_throttle_active()) { info->has_cpu_throttle_percentage = true; @@ -1204,7 +1207,7 @@ static void migrate_fd_cleanup(MigrationState *s) /* It is used on info migrate. We can't free it */ error_report_err(error_copy(s->error)); } - notifier_list_notify(&migration_state_notifiers, s); + migration_call_notifiers(s); block_cleanup_parameters(); yank_unregister_instance(MIGRATION_YANK_INSTANCE); } @@ -1308,14 +1311,24 @@ static void migrate_fd_cancel(MigrationState *s) } } -void add_migration_state_change_notifier(Notifier *notify) +void migration_add_notifier(Notifier *notify, + void (*func)(Notifier *notifier, void *data)) { + notify->notify = func; notifier_list_add(&migration_state_notifiers, notify); } -void remove_migration_state_change_notifier(Notifier *notify) +void migration_remove_notifier(Notifier *notify) { - notifier_remove(notify); + if (notify->notify) { + notifier_remove(notify); + notify->notify = NULL; + } +} + +void migration_call_notifiers(MigrationState *s) +{ + notifier_list_notify(&migration_state_notifiers, s); } bool migration_in_setup(MigrationState *s) @@ -1454,45 +1467,49 @@ int migrate_init(MigrationState *s, Error **errp) s->switchover_acked = false; s->rdma_migration = false; /* - * set mig_stats compression_counters memory to zero for a - * new migration + * set mig_stats memory to zero for a new migration */ memset(&mig_stats, 0, sizeof(mig_stats)); - memset(&compression_counters, 0, sizeof(compression_counters)); migration_reset_vfio_bytes_transferred(); return 0; } -int migrate_add_blocker_internal(Error *reason, Error **errp) +int migrate_add_blocker_internal(Error **reasonp, Error **errp) { /* Snapshots are similar to migrations, so check RUN_STATE_SAVE_VM too. */ if (runstate_check(RUN_STATE_SAVE_VM) || !migration_is_idle()) { - error_propagate_prepend(errp, error_copy(reason), + error_propagate_prepend(errp, *reasonp, "disallowing migration blocker " "(migration/snapshot in progress) for: "); + *reasonp = NULL; return -EBUSY; } - migration_blockers = g_slist_prepend(migration_blockers, reason); + migration_blockers = g_slist_prepend(migration_blockers, *reasonp); return 0; } -int migrate_add_blocker(Error *reason, Error **errp) +int migrate_add_blocker(Error **reasonp, Error **errp) { if (only_migratable) { - error_propagate_prepend(errp, error_copy(reason), + error_propagate_prepend(errp, *reasonp, "disallowing migration blocker " "(--only-migratable) for: "); + *reasonp = NULL; return -EACCES; } - return migrate_add_blocker_internal(reason, errp); + return migrate_add_blocker_internal(reasonp, errp); } -void migrate_del_blocker(Error *reason) +void migrate_del_blocker(Error **reasonp) { - migration_blockers = g_slist_remove(migration_blockers, reason); + if (*reasonp) { + migration_blockers = g_slist_remove(migration_blockers, *reasonp); + error_free(*reasonp); + *reasonp = NULL; + } } void qmp_migrate_incoming(const char *uri, Error **errp) @@ -2226,7 +2243,7 @@ static int postcopy_start(MigrationState *ms, Error **errp) * spice needs to trigger a transition now */ ms->postcopy_after_devices = true; - notifier_list_notify(&migration_state_notifiers, ms); + migration_call_notifiers(ms); ms->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop; @@ -3306,7 +3323,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) rate_limit = migrate_max_bandwidth(); /* Notify before starting migration thread */ - notifier_list_notify(&migration_state_notifiers, s); + migration_call_notifiers(s); } migration_rate_set(rate_limit); diff --git a/migration/multifd.c b/migration/multifd.c index 1fe53d3b98..e2a45c667a 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -743,9 +743,6 @@ static void *multifd_send_thread(void *opaque) if (flags & MULTIFD_FLAG_SYNC) { qemu_sem_post(&p->sem_sync); } - } else if (p->quit) { - qemu_mutex_unlock(&p->mutex); - break; } else { qemu_mutex_unlock(&p->mutex); /* sometimes there are spurious wakeups */ diff --git a/migration/ram-compress.c b/migration/ram-compress.c index 06254d8c69..d037dfe6cf 100644 --- a/migration/ram-compress.c +++ b/migration/ram-compress.c @@ -32,11 +32,14 @@ #include "ram-compress.h" #include "qemu/error-report.h" +#include "qemu/stats64.h" #include "migration.h" #include "options.h" #include "io/channel-null.h" #include "exec/target_page.h" #include "exec/ramblock.h" +#include "ram.h" +#include "migration-stats.h" CompressionStats compression_counters; @@ -227,27 +230,25 @@ static inline void compress_reset_result(CompressParam *param) void flush_compressed_data(int (send_queued_data(CompressParam *))) { - int idx, thread_count; - - thread_count = migrate_compress_threads(); + int thread_count = migrate_compress_threads(); qemu_mutex_lock(&comp_done_lock); - for (idx = 0; idx < thread_count; idx++) { - while (!comp_param[idx].done) { + for (int i = 0; i < thread_count; i++) { + while (!comp_param[i].done) { qemu_cond_wait(&comp_done_cond, &comp_done_lock); } } qemu_mutex_unlock(&comp_done_lock); - for (idx = 0; idx < thread_count; idx++) { - qemu_mutex_lock(&comp_param[idx].mutex); - if (!comp_param[idx].quit) { - CompressParam *param = &comp_param[idx]; + for (int i = 0; i < thread_count; i++) { + qemu_mutex_lock(&comp_param[i].mutex); + if (!comp_param[i].quit) { + CompressParam *param = &comp_param[i]; send_queued_data(param); assert(qemu_file_buffer_empty(param->file)); compress_reset_result(param); } - qemu_mutex_unlock(&comp_param[idx].mutex); + qemu_mutex_unlock(&comp_param[i].mutex); } } @@ -262,15 +263,15 @@ static inline void set_compress_params(CompressParam *param, RAMBlock *block, int compress_page_with_multi_thread(RAMBlock *block, ram_addr_t offset, int (send_queued_data(CompressParam *))) { - int idx, thread_count, pages = -1; + int thread_count, pages = -1; bool wait = migrate_compress_wait_thread(); thread_count = migrate_compress_threads(); qemu_mutex_lock(&comp_done_lock); retry: - for (idx = 0; idx < thread_count; idx++) { - if (comp_param[idx].done) { - CompressParam *param = &comp_param[idx]; + for (int i = 0; i < thread_count; i++) { + if (comp_param[i].done) { + CompressParam *param = &comp_param[i]; qemu_mutex_lock(¶m->mutex); param->done = false; send_queued_data(param); @@ -364,16 +365,14 @@ static void *do_data_decompress(void *opaque) int wait_for_decompress_done(void) { - int idx, thread_count; - if (!migrate_compress()) { return 0; } - thread_count = migrate_decompress_threads(); + int thread_count = migrate_decompress_threads(); qemu_mutex_lock(&decomp_done_lock); - for (idx = 0; idx < thread_count; idx++) { - while (!decomp_param[idx].done) { + for (int i = 0; i < thread_count; i++) { + while (!decomp_param[i].done) { qemu_cond_wait(&decomp_done_cond, &decomp_done_lock); } } @@ -430,6 +429,11 @@ int compress_threads_load_setup(QEMUFile *f) return 0; } + /* + * set compression_counters memory to zero for a new migration + */ + memset(&compression_counters, 0, sizeof(compression_counters)); + thread_count = migrate_decompress_threads(); decompress_threads = g_new0(QemuThread, thread_count); decomp_param = g_new0(DecompressParam, thread_count); @@ -459,27 +463,54 @@ exit: void decompress_data_with_multi_threads(QEMUFile *f, void *host, int len) { - int idx, thread_count; - - thread_count = migrate_decompress_threads(); + int thread_count = migrate_decompress_threads(); QEMU_LOCK_GUARD(&decomp_done_lock); while (true) { - for (idx = 0; idx < thread_count; idx++) { - if (decomp_param[idx].done) { - decomp_param[idx].done = false; - qemu_mutex_lock(&decomp_param[idx].mutex); - qemu_get_buffer(f, decomp_param[idx].compbuf, len); - decomp_param[idx].des = host; - decomp_param[idx].len = len; - qemu_cond_signal(&decomp_param[idx].cond); - qemu_mutex_unlock(&decomp_param[idx].mutex); - break; + for (int i = 0; i < thread_count; i++) { + if (decomp_param[i].done) { + decomp_param[i].done = false; + qemu_mutex_lock(&decomp_param[i].mutex); + qemu_get_buffer(f, decomp_param[i].compbuf, len); + decomp_param[i].des = host; + decomp_param[i].len = len; + qemu_cond_signal(&decomp_param[i].cond); + qemu_mutex_unlock(&decomp_param[i].mutex); + return; } } - if (idx < thread_count) { - break; - } else { - qemu_cond_wait(&decomp_done_cond, &decomp_done_lock); - } + qemu_cond_wait(&decomp_done_cond, &decomp_done_lock); + } +} + +void populate_compress(MigrationInfo *info) +{ + if (!migrate_compress()) { + return; + } + info->compression = g_malloc0(sizeof(*info->compression)); + info->compression->pages = compression_counters.pages; + info->compression->busy = compression_counters.busy; + info->compression->busy_rate = compression_counters.busy_rate; + info->compression->compressed_size = compression_counters.compressed_size; + info->compression->compression_rate = compression_counters.compression_rate; +} + +uint64_t ram_compressed_pages(void) +{ + return compression_counters.pages; +} + +void update_compress_thread_counts(const CompressParam *param, int bytes_xmit) +{ + ram_transferred_add(bytes_xmit); + + if (param->result == RES_ZEROPAGE) { + stat64_add(&mig_stats.zero_pages, 1); + return; } + + /* 8 means a header with RAM_SAVE_FLAG_CONTINUE. */ + compression_counters.compressed_size += bytes_xmit - 8; + compression_counters.pages++; } + diff --git a/migration/ram-compress.h b/migration/ram-compress.h index 6f7fe2f472..e55d3b50bd 100644 --- a/migration/ram-compress.h +++ b/migration/ram-compress.h @@ -30,6 +30,7 @@ #define QEMU_MIGRATION_COMPRESS_H #include "qemu-file.h" +#include "qapi/qapi-types-migration.h" enum CompressResult { RES_NONE = 0, @@ -67,4 +68,8 @@ void compress_threads_load_cleanup(void); int compress_threads_load_setup(QEMUFile *f); void decompress_data_with_multi_threads(QEMUFile *f, void *host, int len); +void populate_compress(MigrationInfo *info); +uint64_t ram_compressed_pages(void); +void update_compress_thread_counts(const CompressParam *param, int bytes_xmit); + #endif diff --git a/migration/ram.c b/migration/ram.c index c844151ee9..92769902bb 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -939,7 +939,7 @@ uint64_t ram_get_total_transferred_pages(void) { return stat64_get(&mig_stats.normal_pages) + stat64_get(&mig_stats.zero_pages) + - compression_counters.pages + xbzrle_counters.pages; + ram_compressed_pages() + xbzrle_counters.pages; } static void migration_update_rates(RAMState *rs, int64_t end_time) @@ -1144,13 +1144,12 @@ void ram_release_page(const char *rbname, uint64_t offset) * * @rs: current RAM state * @pss: current PSS channel - * @block: block that contains the page we want to send * @offset: offset inside the block for the page */ -static int save_zero_page(RAMState *rs, PageSearchStatus *pss, RAMBlock *block, +static int save_zero_page(RAMState *rs, PageSearchStatus *pss, ram_addr_t offset) { - uint8_t *p = block->host + offset; + uint8_t *p = pss->block->host + offset; QEMUFile *file = pss->pss_channel; int len = 0; @@ -1158,10 +1157,10 @@ static int save_zero_page(RAMState *rs, PageSearchStatus *pss, RAMBlock *block, return 0; } - len += save_page_header(pss, file, block, offset | RAM_SAVE_FLAG_ZERO); + len += save_page_header(pss, file, pss->block, offset | RAM_SAVE_FLAG_ZERO); qemu_put_byte(file, 0); len += 1; - ram_release_page(block->idstr, offset); + ram_release_page(pss->block->idstr, offset); stat64_add(&mig_stats.zero_pages, 1); ram_transferred_add(len); @@ -1172,7 +1171,7 @@ static int save_zero_page(RAMState *rs, PageSearchStatus *pss, RAMBlock *block, */ if (rs->xbzrle_started) { XBZRLE_cache_lock(); - xbzrle_cache_zero_page(block->offset + offset); + xbzrle_cache_zero_page(pss->block->offset + offset); XBZRLE_cache_unlock(); } @@ -1186,12 +1185,12 @@ static int save_zero_page(RAMState *rs, PageSearchStatus *pss, RAMBlock *block, * * Return true if the pages has been saved, otherwise false is returned. */ -static bool control_save_page(PageSearchStatus *pss, RAMBlock *block, +static bool control_save_page(PageSearchStatus *pss, ram_addr_t offset, int *pages) { int ret; - ret = rdma_control_save_page(pss->pss_channel, block->offset, offset, + ret = rdma_control_save_page(pss->pss_channel, pss->block->offset, offset, TARGET_PAGE_SIZE); if (ret == RAM_SAVE_CONTROL_NOT_SUPP) { return false; @@ -1292,21 +1291,6 @@ static int ram_save_multifd_page(QEMUFile *file, RAMBlock *block, return 1; } -static void -update_compress_thread_counts(const CompressParam *param, int bytes_xmit) -{ - ram_transferred_add(bytes_xmit); - - if (param->result == RES_ZEROPAGE) { - stat64_add(&mig_stats.zero_pages, 1); - return; - } - - /* 8 means a header with RAM_SAVE_FLAG_CONTINUE. */ - compression_counters.compressed_size += bytes_xmit - 8; - compression_counters.pages++; -} - static bool save_page_use_compression(RAMState *rs); static int send_queued_data(CompressParam *param) @@ -2082,7 +2066,7 @@ static bool save_page_use_compression(RAMState *rs) * paths to handle it */ static bool save_compress_page(RAMState *rs, PageSearchStatus *pss, - RAMBlock *block, ram_addr_t offset) + ram_addr_t offset) { if (!save_page_use_compression(rs)) { return false; @@ -2098,12 +2082,13 @@ static bool save_compress_page(RAMState *rs, PageSearchStatus *pss, * We post the fist page as normal page as compression will take * much CPU resource. */ - if (block != pss->last_sent_block) { + if (pss->block != pss->last_sent_block) { ram_flush_compressed_data(rs); return false; } - if (compress_page_with_multi_thread(block, offset, send_queued_data) > 0) { + if (compress_page_with_multi_thread(pss->block, offset, + send_queued_data) > 0) { return true; } @@ -2125,15 +2110,15 @@ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss) ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; int res; - if (control_save_page(pss, block, offset, &res)) { + if (control_save_page(pss, offset, &res)) { return res; } - if (save_compress_page(rs, pss, block, offset)) { + if (save_compress_page(rs, pss, offset)) { return 1; } - if (save_zero_page(rs, pss, block, offset)) { + if (save_zero_page(rs, pss, offset)) { return 1; } @@ -3888,6 +3873,7 @@ static int parse_ramblock(QEMUFile *f, RAMBlock *block, ram_addr_t length) ret = qemu_ram_resize(block, length, &local_err); if (local_err) { error_report_err(local_err); + return ret; } } /* For postcopy we need to check hugepage sizes match */ @@ -3898,7 +3884,7 @@ static int parse_ramblock(QEMUFile *f, RAMBlock *block, ram_addr_t length) error_report("Mismatched RAM page size %s " "(local) %zd != %" PRId64, block->idstr, block->page_size, remote_page_size); - ret = -EINVAL; + return -EINVAL; } } if (migrate_ignore_shared()) { @@ -3908,7 +3894,7 @@ static int parse_ramblock(QEMUFile *f, RAMBlock *block, ram_addr_t length) error_report("Mismatched GPAs for block %s " "%" PRId64 "!= %" PRId64, block->idstr, (uint64_t)addr, (uint64_t)block->mr->addr); - ret = -EINVAL; + return -EINVAL; } } ret = rdma_block_notification_handle(f, block->idstr); diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 4b7c3b81b8..7a226c93bc 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -339,7 +339,8 @@ static void vhost_vdpa_net_data_start_first(VhostVDPAState *s) { struct vhost_vdpa *v = &s->vhost_vdpa; - add_migration_state_change_notifier(&s->migration_state); + migration_add_notifier(&s->migration_state, + vdpa_net_migration_state_notifier); if (v->shadow_vqs_enabled) { v->iova_tree = vhost_iova_tree_new(v->iova_range.first, v->iova_range.last); @@ -399,7 +400,7 @@ static void vhost_vdpa_net_client_stop(NetClientState *nc) assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); if (s->vhost_vdpa.index == 0) { - remove_migration_state_change_notifier(&s->migration_state); + migration_remove_notifier(&s->migration_state); } dev = s->vhost_vdpa.dev; @@ -1566,7 +1567,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, s->vhost_vdpa.device_fd = vdpa_device_fd; s->vhost_vdpa.index = queue_pair_index; s->always_svq = svq; - s->migration_state.notify = vdpa_net_migration_state_notifier; + s->migration_state.notify = NULL; s->vhost_vdpa.shadow_vqs_enabled = svq; s->vhost_vdpa.iova_range = iova_range; s->vhost_vdpa.shadow_data = svq; diff --git a/pc-bios/hppa-firmware.img b/pc-bios/hppa-firmware.img index c7196143b1..e976c0cc93 100644 --- a/pc-bios/hppa-firmware.img +++ b/pc-bios/hppa-firmware.img Binary files differdiff --git a/qapi/compat.json b/qapi/compat.json index f4c19837eb..42034d9368 100644 --- a/qapi/compat.json +++ b/qapi/compat.json @@ -43,8 +43,8 @@ # This is intended for testing users of the management interfaces. # # Limitation: covers only syntactic aspects of QMP, i.e. stuff tagged -# with feature 'deprecated'. We may want to extend it to cover -# semantic aspects and CLI. +# with feature 'deprecated' or 'unstable'. We may want to extend it +# to cover semantic aspects and CLI. # # Limitation: deprecated-output policy @hide is not implemented for # enumeration values. They behave the same as with policy @accept. diff --git a/qapi/machine-common.json b/qapi/machine-common.json new file mode 100644 index 0000000000..fa6bd71d12 --- /dev/null +++ b/qapi/machine-common.json @@ -0,0 +1,21 @@ +# -*- Mode: Python -*- +# vim: filetype=python +# +# This work is licensed under the terms of the GNU GPL, version 2 or later. +# See the COPYING file in the top-level directory. + +## +# = Machines S390 data types +## + +## +# @CpuS390Entitlement: +# +# An enumeration of CPU entitlements that can be assumed by a virtual +# S390 CPU +# +# Since: 8.2 +## +{ 'enum': 'CpuS390Entitlement', + 'prefix': 'S390_CPU_ENTITLEMENT', + 'data': [ 'auto', 'low', 'medium', 'high' ] } diff --git a/qapi/machine-target.json b/qapi/machine-target.json index f0a6b72414..4e55adbe00 100644 --- a/qapi/machine-target.json +++ b/qapi/machine-target.json @@ -4,6 +4,8 @@ # This work is licensed under the terms of the GNU GPL, version 2 or later. # See the COPYING file in the top-level directory. +{ 'include': 'machine-common.json' } + ## # @CpuModelInfo: # @@ -361,3 +363,122 @@ 'TARGET_MIPS', 'TARGET_LOONGARCH64', 'TARGET_RISCV' ] } } + +## +# @CpuS390Polarization: +# +# An enumeration of CPU polarization that can be assumed by a virtual +# S390 CPU +# +# Since: 8.2 +## +{ 'enum': 'CpuS390Polarization', + 'prefix': 'S390_CPU_POLARIZATION', + 'data': [ 'horizontal', 'vertical' ], + 'if': 'TARGET_S390X' +} + +## +# @set-cpu-topology: +# +# Modify the topology by moving the CPU inside the topology tree, +# or by changing a modifier attribute of a CPU. +# Absent values will not be modified. +# +# @core-id: the vCPU ID to be moved +# +# @socket-id: destination socket to move the vCPU to +# +# @book-id: destination book to move the vCPU to +# +# @drawer-id: destination drawer to move the vCPU to +# +# @entitlement: entitlement to set +# +# @dedicated: whether the provisioning of real to virtual CPU is dedicated +# +# Features: +# +# @unstable: This command is experimental. +# +# Returns: Nothing on success. +# +# Since: 8.2 +## +{ 'command': 'set-cpu-topology', + 'data': { + 'core-id': 'uint16', + '*socket-id': 'uint16', + '*book-id': 'uint16', + '*drawer-id': 'uint16', + '*entitlement': 'CpuS390Entitlement', + '*dedicated': 'bool' + }, + 'features': [ 'unstable' ], + 'if': { 'all': [ 'TARGET_S390X' , 'CONFIG_KVM' ] } +} + +## +# @CPU_POLARIZATION_CHANGE: +# +# Emitted when the guest asks to change the polarization. +# +# The guest can tell the host (via the PTF instruction) whether the +# CPUs should be provisioned using horizontal or vertical polarization. +# +# On horizontal polarization the host is expected to provision all vCPUs +# equally. +# +# On vertical polarization the host can provision each vCPU differently. +# The guest will get information on the details of the provisioning +# the next time it uses the STSI(15) instruction. +# +# @polarization: polarization specified by the guest +# +# Features: +# +# @unstable: This event is experimental. +# +# Since: 8.2 +# +# Example: +# +# <- { "event": "CPU_POLARIZATION_CHANGE", +# "data": { "polarization": "horizontal" }, +# "timestamp": { "seconds": 1401385907, "microseconds": 422329 } } +## +{ 'event': 'CPU_POLARIZATION_CHANGE', + 'data': { 'polarization': 'CpuS390Polarization' }, + 'features': [ 'unstable' ], + 'if': { 'all': [ 'TARGET_S390X', 'CONFIG_KVM' ] } +} + +## +# @CpuPolarizationInfo: +# +# The result of a CPU polarization query. +# +# @polarization: the CPU polarization +# +# Since: 8.2 +## +{ 'struct': 'CpuPolarizationInfo', + 'data': { 'polarization': 'CpuS390Polarization' }, + 'if': { 'all': [ 'TARGET_S390X', 'CONFIG_KVM' ] } +} + +## +# @query-s390x-cpu-polarization: +# +# Features: +# +# @unstable: This command is experimental. +# +# Returns: the machine's CPU polarization +# +# Since: 8.2 +## +{ 'command': 'query-s390x-cpu-polarization', 'returns': 'CpuPolarizationInfo', + 'features': [ 'unstable' ], + 'if': { 'all': [ 'TARGET_S390X', 'CONFIG_KVM' ] } +} diff --git a/qapi/machine.json b/qapi/machine.json index a08b6576ca..6c9d2f6dcf 100644 --- a/qapi/machine.json +++ b/qapi/machine.json @@ -9,6 +9,7 @@ ## { 'include': 'common.json' } +{ 'include': 'machine-common.json' } ## # @SysEmuTarget: @@ -56,9 +57,16 @@ # # @cpu-state: the virtual CPU's state # +# @dedicated: the virtual CPU's dedication (since 8.2) +# +# @entitlement: the virtual CPU's entitlement (since 8.2) +# # Since: 2.12 ## -{ 'struct': 'CpuInfoS390', 'data': { 'cpu-state': 'CpuS390State' } } +{ 'struct': 'CpuInfoS390', + 'data': { 'cpu-state': 'CpuS390State', + '*dedicated': 'bool', + '*entitlement': 'CpuS390Entitlement' } } ## # @CpuInfoFast: @@ -71,8 +79,7 @@ # # @thread-id: ID of the underlying host thread # -# @props: properties describing to which node/socket/core/thread -# virtual CPU belongs to, provided if supported by board +# @props: properties associated with a virtual CPU, e.g. the socket id # # @target: the QEMU system emulation target, which determines which # additional fields will be listed (since 3.0) @@ -899,29 +906,46 @@ # should be passed by management with device_add command when a CPU is # being hotplugged. # +# Which members are optional and which mandatory depends on the +# architecture and board. +# +# For s390x see :ref:`cpu-topology-s390x`. +# +# The ids other than the node-id specify the position of the CPU +# within the CPU topology (as defined by the machine property "smp", +# thus see also type @SMPConfiguration) +# # @node-id: NUMA node ID the CPU belongs to # -# @socket-id: socket number within node/board the CPU belongs to +# @drawer-id: drawer number within CPU topology the CPU belongs to +# (since 8.2) # -# @die-id: die number within socket the CPU belongs to (since 4.1) +# @book-id: book number within parent container the CPU belongs to +# (since 8.2) # -# @cluster-id: cluster number within die the CPU belongs to (since -# 7.1) +# @socket-id: socket number within parent container the CPU belongs to # -# @core-id: core number within cluster the CPU belongs to +# @die-id: die number within the parent container the CPU belongs to +# (since 4.1) # -# @thread-id: thread number within core the CPU belongs to +# @cluster-id: cluster number within the parent container the CPU +# belongs to (since 7.1) # -# Note: currently there are 6 properties that could be present but -# management should be prepared to pass through other properties -# with device_add command to allow for future interface extension. -# This also requires the filed names to be kept in sync with the -# properties passed to -device/device_add. +# @core-id: core number within the parent container the CPU +# belongs to +# +# @thread-id: thread number within the core the CPU belongs to +# +# Note: management should be prepared to pass through additional +# properties with device_add. # # Since: 2.7 ## { 'struct': 'CpuInstanceProperties', + # Keep these in sync with the properties device_add accepts 'data': { '*node-id': 'int', + '*drawer-id': 'int', + '*book-id': 'int', '*socket-id': 'int', '*die-id': 'int', '*cluster-id': 'int', @@ -1478,26 +1502,43 @@ # Schema for CPU topology configuration. A missing value lets QEMU # figure out a suitable value based on the ones that are provided. # -# @cpus: number of virtual CPUs in the virtual machine -# -# @sockets: number of sockets in the CPU topology +# The members other than @cpus and @maxcpus define a topology of +# containers. # -# @dies: number of dies per socket in the CPU topology +# The ordering from highest/coarsest to lowest/finest is: +# @drawers, @books, @sockets, @dies, @clusters, @cores, @threads. # -# @clusters: number of clusters per die in the CPU topology (since -# 7.0) +# Different architectures support different subsets of topology +# containers. # -# @cores: number of cores per cluster in the CPU topology +# For example, s390x does not have clusters and dies, and the socket +# is the parent container of cores. # -# @threads: number of threads per core in the CPU topology +# @cpus: number of virtual CPUs in the virtual machine # # @maxcpus: maximum number of hotpluggable virtual CPUs in the virtual # machine # +# @drawers: number of drawers in the CPU topology (since 8.2) +# +# @books: number of books in the CPU topology (since 8.2) +# +# @sockets: number of sockets per parent container +# +# @dies: number of dies per parent container +# +# @clusters: number of clusters per parent container (since 7.0) +# +# @cores: number of cores per parent container +# +# @threads: number of threads per core +# # Since: 6.1 ## { 'struct': 'SMPConfiguration', 'data': { '*cpus': 'int', + '*drawers': 'int', + '*books': 'int', '*sockets': 'int', '*dies': 'int', '*clusters': 'int', diff --git a/qapi/meson.build b/qapi/meson.build index 60a668b343..f81a37565c 100644 --- a/qapi/meson.build +++ b/qapi/meson.build @@ -36,6 +36,7 @@ qapi_all_modules = [ 'error', 'introspect', 'job', + 'machine-common', 'machine', 'machine-target', 'migration', diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json index 6594afba31..c01ec335e6 100644 --- a/qapi/qapi-schema.json +++ b/qapi/qapi-schema.json @@ -66,6 +66,7 @@ { 'include': 'introspect.json' } { 'include': 'qom.json' } { 'include': 'qdev.json' } +{ 'include': 'machine-common.json' } { 'include': 'machine.json' } { 'include': 'machine-target.json' } { 'include': 'replay.json' } diff --git a/qemu-options.hx b/qemu-options.hx index 54a7e94970..e26230bac5 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -272,11 +272,14 @@ SRST ERST DEF("smp", HAS_ARG, QEMU_OPTION_smp, - "-smp [[cpus=]n][,maxcpus=maxcpus][,sockets=sockets][,dies=dies][,clusters=clusters][,cores=cores][,threads=threads]\n" + "-smp [[cpus=]n][,maxcpus=maxcpus][,drawers=drawers][,books=books][,sockets=sockets]\n" + " [,dies=dies][,clusters=clusters][,cores=cores][,threads=threads]\n" " set the number of initial CPUs to 'n' [default=1]\n" " maxcpus= maximum number of total CPUs, including\n" " offline CPUs for hotplug, etc\n" - " sockets= number of sockets on the machine board\n" + " drawers= number of drawers on the machine board\n" + " books= number of books in one drawer\n" + " sockets= number of sockets in one book\n" " dies= number of dies in one socket\n" " clusters= number of clusters in one die\n" " cores= number of cores in one cluster\n" diff --git a/qga/meson.build b/qga/meson.build index 59cae0cc6e..940a51d55d 100644 --- a/qga/meson.build +++ b/qga/meson.build @@ -145,6 +145,9 @@ if targetos == 'windows' else libpcre = 'libpcre2' endif + qga_msi_version = get_option('qemu_ga_version') == '' \ + ? project.version() \ + : get_option('qemu_ga_version') qga_msi = custom_target('QGA MSI', input: files('installer/qemu-ga.wxs'), output: 'qemu-ga-@0@.msi'.format(host_arch), @@ -155,9 +158,9 @@ if targetos == 'windows' qemu_ga_msi_vss, '-D', 'BUILD_DIR=' + meson.project_build_root(), '-D', 'BIN_DIR=' + glib_pc.get_variable('bindir'), - '-D', 'QEMU_GA_VERSION=' + config_host['QEMU_GA_VERSION'], - '-D', 'QEMU_GA_MANUFACTURER=' + config_host['QEMU_GA_MANUFACTURER'], - '-D', 'QEMU_GA_DISTRO=' + config_host['QEMU_GA_DISTRO'], + '-D', 'QEMU_GA_VERSION=' + qga_msi_version, + '-D', 'QEMU_GA_MANUFACTURER=' + get_option('qemu_ga_manufacturer'), + '-D', 'QEMU_GA_DISTRO=' + get_option('qemu_ga_distro'), '-D', 'LIBPCRE=' + libpcre, ]) all_qga += [qga_msi] diff --git a/roms/seabios-hppa b/roms/seabios-hppa -Subproject 763e3b73499db5fef94087bd310bfc8ccbcf785 +Subproject fd5b6cf82369a1e53d68302fb6ede2b9e2afccd diff --git a/scripts/feature_to_c.py b/scripts/feature_to_c.py index bcbcb83beb..bcbcb83beb 100755..100644 --- a/scripts/feature_to_c.py +++ b/scripts/feature_to_c.py diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 02fa828100..00a0870b26 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -907,6 +907,7 @@ sub get_subsystem_name { if (length($subsystem) > 20) { $subsystem = substr($subsystem, 0, 17); $subsystem =~ s/\s*$//; + $subsystem =~ s/[()]//g; $subsystem = $subsystem . "..."; } return $subsystem; diff --git a/scripts/meson-buildoptions.py b/scripts/meson-buildoptions.py index 8d2e526132..4814a8ff61 100644 --- a/scripts/meson-buildoptions.py +++ b/scripts/meson-buildoptions.py @@ -25,13 +25,15 @@ import textwrap import shlex import sys +# Options with nonstandard names (e.g. --with/--without) or OS-dependent +# defaults. Try not to add any. SKIP_OPTIONS = { "default_devices", "fuzzing_engine", - "qemu_suffix", - "smbd", } +# Options whose name doesn't match the option for backwards compatibility +# reasons, because Meson gives them a funny name, or both OPTION_NAMES = { "b_coverage": "gcov", "b_lto": "lto", @@ -40,13 +42,25 @@ OPTION_NAMES = { "malloc": "enable-malloc", "pkgversion": "with-pkgversion", "qemu_firmwarepath": "firmwarepath", + "qemu_suffix": "with-suffix", "trace_backends": "enable-trace-backends", "trace_file": "with-trace-file", } +# Options that configure autodetects, even though meson defines them as boolean +AUTO_OPTIONS = { + "plugins", + "werror", +} + +# Builtin options that should be definable via configure. Some of the others +# we really do not want (e.g. c_args is defined via the native file, not +# via -D, because it's a mix of CFLAGS and --extra-cflags); for specific +# cases "../configure -D" can be used as an escape hatch. BUILTIN_OPTIONS = { "b_coverage", "b_lto", + "bindir", "datadir", "debug", "includedir", @@ -55,8 +69,10 @@ BUILTIN_OPTIONS = { "localedir", "localstatedir", "mandir", + "prefix", "strip", "sysconfdir", + "werror", } LINE_WIDTH = 76 @@ -168,6 +184,7 @@ def cli_metavar(opt): def print_help(options): print("meson_options_help() {") + feature_opts = [] for opt in sorted(options, key=cli_help_key): key = cli_help_key(opt) # The first section includes options that have an arguments, @@ -176,7 +193,7 @@ def print_help(options): metavar = cli_metavar(opt) left = f"--{key}={metavar}" help_line(left, opt, 27, True) - elif opt["type"] == "boolean": + elif opt["type"] == "boolean" and opt["name"] not in AUTO_OPTIONS: left = f"--{key}" help_line(left, opt, 27, False) elif allow_arg(opt): @@ -185,16 +202,17 @@ def print_help(options): else: left = f"--{key}=CHOICE" help_line(left, opt, 27, True) + else: + feature_opts.append(opt) sh_print() sh_print("Optional features, enabled with --enable-FEATURE and") sh_print("disabled with --disable-FEATURE, default is enabled if available") sh_print("(unless built with --without-default-features):") sh_print() - for opt in options: - key = opt["name"].replace("_", "-") - if opt["type"] != "boolean" and not allow_arg(opt): - help_line(key, opt, 18, False) + for opt in sorted(feature_opts, key=cli_option): + key = cli_option(opt) + help_line(key, opt, 18, False) print("}") diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index a28ccbcaf6..7ca4b77eae 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -3,6 +3,7 @@ meson_options_help() { printf "%s\n" ' --audio-drv-list=CHOICES Set audio driver list [default] (choices: alsa/co' printf "%s\n" ' reaudio/default/dsound/jack/oss/pa/pipewire/sdl/s' printf "%s\n" ' ndio)' + printf "%s\n" ' --bindir=VALUE Executable directory [bin]' printf "%s\n" ' --block-drv-ro-whitelist=VALUE' printf "%s\n" ' set block driver read-only whitelist (by default' printf "%s\n" ' affects only QEMU, not tools like qemu-img)' @@ -17,6 +18,7 @@ meson_options_help() { printf "%s\n" ' code for the Hexagon frontend' printf "%s\n" ' --disable-install-blobs install provided firmware blobs' printf "%s\n" ' --disable-qom-cast-debug cast debugging support' + printf "%s\n" ' --disable-relocatable toggle relocatable install' printf "%s\n" ' --docdir=VALUE Base directory for documentation installation' printf "%s\n" ' (can be empty) [share/doc]' printf "%s\n" ' --enable-block-drv-whitelist-in-tools' @@ -39,7 +41,6 @@ meson_options_help() { printf "%s\n" ' jemalloc/system/tcmalloc)' printf "%s\n" ' --enable-module-upgrades try to load modules from alternate paths for' printf "%s\n" ' upgrades' - printf "%s\n" ' --enable-plugins TCG plugins via shared library loading' printf "%s\n" ' --enable-rng-none dummy RNG, avoid using /dev/(u)random and' printf "%s\n" ' getrandom()' printf "%s\n" ' --enable-safe-stack SafeStack Stack Smash Protection (requires' @@ -62,6 +63,14 @@ meson_options_help() { printf "%s\n" ' --localedir=VALUE Locale data directory [share/locale]' printf "%s\n" ' --localstatedir=VALUE Localstate data directory [/var/local]' printf "%s\n" ' --mandir=VALUE Manual page directory [share/man]' + printf "%s\n" ' --prefix=VALUE Installation prefix [/usr/local]' + printf "%s\n" ' --qemu-ga-distro=VALUE second path element in qemu-ga registry entries' + printf "%s\n" ' [Linux]' + printf "%s\n" ' --qemu-ga-manufacturer=VALUE' + printf "%s\n" ' "manufacturer" name for qemu-ga registry entries' + printf "%s\n" ' [QEMU]' + printf "%s\n" ' --qemu-ga-version=VALUE version number for qemu-ga installer' + printf "%s\n" ' --smbd=VALUE Path to smbd for slirp networking' printf "%s\n" ' --sysconfdir=VALUE Sysconf data directory [etc]' printf "%s\n" ' --tls-priority=VALUE Default TLS protocol/cipher priority string' printf "%s\n" ' [NORMAL]' @@ -69,6 +78,8 @@ meson_options_help() { printf "%s\n" ' auto/sigaltstack/ucontext/windows)' printf "%s\n" ' --with-pkgversion=VALUE use specified string as sub-version of the' printf "%s\n" ' package' + printf "%s\n" ' --with-suffix=VALUE Suffix for QEMU data/modules/config directories' + printf "%s\n" ' (can be empty) [qemu]' printf "%s\n" ' --with-trace-file=VALUE Trace file prefix for simple backend [trace]' printf "%s\n" '' printf "%s\n" 'Optional features, enabled with --enable-FEATURE and' @@ -148,6 +159,7 @@ meson_options_help() { printf "%s\n" ' pa PulseAudio sound support' printf "%s\n" ' parallels parallels image format support' printf "%s\n" ' pipewire PipeWire sound support' + printf "%s\n" ' plugins TCG plugins via shared library loading' printf "%s\n" ' png PNG support with libpng' printf "%s\n" ' pvrdma Enable PVRDMA support' printf "%s\n" ' qcow1 qcow1 image format support' @@ -201,6 +213,7 @@ meson_options_help() { printf "%s\n" ' vpc vpc image format support' printf "%s\n" ' vte vte support for the gtk UI' printf "%s\n" ' vvfat vvfat image format support' + printf "%s\n" ' werror Treat warnings as errors' printf "%s\n" ' whpx WHPX acceleration support' printf "%s\n" ' xen Xen backend support' printf "%s\n" ' xen-pci-passthrough' @@ -229,6 +242,7 @@ _meson_option_parse() { --disable-gcov) printf "%s" -Db_coverage=false ;; --enable-lto) printf "%s" -Db_lto=true ;; --disable-lto) printf "%s" -Db_lto=false ;; + --bindir=*) quote_sh "-Dbindir=$2" ;; --enable-blkio) printf "%s" -Dblkio=enabled ;; --disable-blkio) printf "%s" -Dblkio=disabled ;; --block-drv-ro-whitelist=*) quote_sh "-Dblock_drv_ro_whitelist=$2" ;; @@ -407,6 +421,7 @@ _meson_option_parse() { --disable-plugins) printf "%s" -Dplugins=false ;; --enable-png) printf "%s" -Dpng=enabled ;; --disable-png) printf "%s" -Dpng=disabled ;; + --prefix=*) quote_sh "-Dprefix=$2" ;; --enable-pvrdma) printf "%s" -Dpvrdma=enabled ;; --disable-pvrdma) printf "%s" -Dpvrdma=disabled ;; --enable-qcow1) printf "%s" -Dqcow1=enabled ;; @@ -414,6 +429,10 @@ _meson_option_parse() { --enable-qed) printf "%s" -Dqed=enabled ;; --disable-qed) printf "%s" -Dqed=disabled ;; --firmwarepath=*) quote_sh "-Dqemu_firmwarepath=$(meson_option_build_array $2)" ;; + --qemu-ga-distro=*) quote_sh "-Dqemu_ga_distro=$2" ;; + --qemu-ga-manufacturer=*) quote_sh "-Dqemu_ga_manufacturer=$2" ;; + --qemu-ga-version=*) quote_sh "-Dqemu_ga_version=$2" ;; + --with-suffix=*) quote_sh "-Dqemu_suffix=$2" ;; --enable-qga-vss) printf "%s" -Dqga_vss=enabled ;; --disable-qga-vss) printf "%s" -Dqga_vss=disabled ;; --enable-qom-cast-debug) printf "%s" -Dqom_cast_debug=true ;; @@ -422,6 +441,8 @@ _meson_option_parse() { --disable-rbd) printf "%s" -Drbd=disabled ;; --enable-rdma) printf "%s" -Drdma=enabled ;; --disable-rdma) printf "%s" -Drdma=disabled ;; + --enable-relocatable) printf "%s" -Drelocatable=true ;; + --disable-relocatable) printf "%s" -Drelocatable=false ;; --enable-replication) printf "%s" -Dreplication=enabled ;; --disable-replication) printf "%s" -Dreplication=disabled ;; --enable-rng-none) printf "%s" -Drng_none=true ;; @@ -446,6 +467,7 @@ _meson_option_parse() { --disable-slirp-smbd) printf "%s" -Dslirp_smbd=disabled ;; --enable-smartcard) printf "%s" -Dsmartcard=enabled ;; --disable-smartcard) printf "%s" -Dsmartcard=disabled ;; + --smbd=*) quote_sh "-Dsmbd=$2" ;; --enable-snappy) printf "%s" -Dsnappy=enabled ;; --disable-snappy) printf "%s" -Dsnappy=disabled ;; --enable-sndio) printf "%s" -Dsndio=enabled ;; @@ -522,6 +544,8 @@ _meson_option_parse() { --disable-vte) printf "%s" -Dvte=disabled ;; --enable-vvfat) printf "%s" -Dvvfat=enabled ;; --disable-vvfat) printf "%s" -Dvvfat=disabled ;; + --enable-werror) printf "%s" -Dwerror=true ;; + --disable-werror) printf "%s" -Dwerror=false ;; --enable-whpx) printf "%s" -Dwhpx=enabled ;; --disable-whpx) printf "%s" -Dwhpx=disabled ;; --enable-xen) printf "%s" -Dxen=enabled ;; diff --git a/scripts/qapi/gen.py b/scripts/qapi/gen.py index bf5716b5f3..5412716617 100644 --- a/scripts/qapi/gen.py +++ b/scripts/qapi/gen.py @@ -13,8 +13,8 @@ from contextlib import contextmanager import os -import sys import re +import sys from typing import ( Dict, Iterator, diff --git a/scripts/qapi/parser.py b/scripts/qapi/parser.py index 22e7bcc4b1..bf31018aef 100644 --- a/scripts/qapi/parser.py +++ b/scripts/qapi/parser.py @@ -22,6 +22,7 @@ from typing import ( Dict, List, Mapping, + Match, Optional, Set, Union, @@ -563,11 +564,11 @@ class QAPIDoc: self._switch_section(QAPIDoc.NullSection(self._parser)) @staticmethod - def _match_at_name_colon(string: str): + def _match_at_name_colon(string: str) -> Optional[Match[str]]: return re.match(r'@([^:]*): *', string) @staticmethod - def _match_section_tag(string: str): + def _match_section_tag(string: str) -> Optional[Match[str]]: return re.match(r'(Returns|Since|Notes?|Examples?|TODO): *', string) def _append_body_line(self, line: str) -> None: diff --git a/scripts/qapi/schema.py b/scripts/qapi/schema.py index 231ebf61ba..d739e558e9 100644 --- a/scripts/qapi/schema.py +++ b/scripts/qapi/schema.py @@ -73,6 +73,11 @@ class QAPISchemaEntity: self.features = features or [] self._checked = False + def __repr__(self): + if self.name is None: + return "<%s at 0x%x>" % (type(self).__name__, id(self)) + return "<%s:%s at 0x%x>" % type(self).__name__, self.name, id(self) + def c_name(self): return c_name(self.name) diff --git a/scripts/tracetool/__init__.py b/scripts/tracetool/__init__.py index 33cf85e2b0..b29594d75e 100644 --- a/scripts/tracetool/__init__.py +++ b/scripts/tracetool/__init__.py @@ -210,12 +210,12 @@ class Event(object): """ - _CRE = re.compile("((?P<props>[\w\s]+)\s+)?" - "(?P<name>\w+)" - "\((?P<args>[^)]*)\)" - "\s*" - "(?:(?:(?P<fmt_trans>\".+),)?\s*(?P<fmt>\".+))?" - "\s*") + _CRE = re.compile(r"((?P<props>[\w\s]+)\s+)?" + r"(?P<name>\w+)" + r"\((?P<args>[^)]*)\)" + r"\s*" + r"(?:(?:(?P<fmt_trans>\".+),)?\s*(?P<fmt>\".+))?" + r"\s*") _VALID_PROPS = set(["disable", "vcpu"]) @@ -326,7 +326,7 @@ class Event(object): fmt) # Star matching on PRI is dangerous as one might have multiple # arguments with that format, hence the non-greedy version of it. - _FMT = re.compile("(%[\d\.]*\w+|%.*?PRI\S+)") + _FMT = re.compile(r"(%[\d\.]*\w+|%.*?PRI\S+)") def formats(self): """List conversion specifiers in the argument print format string.""" diff --git a/scripts/tracetool/format/log_stap.py b/scripts/tracetool/format/log_stap.py index 0b6549d534..b49afababd 100644 --- a/scripts/tracetool/format/log_stap.py +++ b/scripts/tracetool/format/log_stap.py @@ -83,7 +83,7 @@ def c_fmt_to_stap(fmt): # and "%ll" is not valid at all. Similarly the size_t # based "%z" size qualifier is not valid. We just # strip all size qualifiers for sanity. - fmt = re.sub("%(\d*)(l+|z)(x|u|d)", "%\\1\\3", "".join(bits)) + fmt = re.sub(r"%(\d*)(l+|z)(x|u|d)", r"%\1\3", "".join(bits)) return fmt def generate(events, backend, group): diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index 35a64bb501..34295c0fe5 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -161,7 +161,8 @@ done rm -rf "$output/linux-headers/linux" mkdir -p "$output/linux-headers/linux" for header in const.h stddef.h kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \ - psci.h psp-sev.h userfaultfd.h memfd.h mman.h nvme_ioctl.h vduse.h; do + psci.h psp-sev.h userfaultfd.h memfd.h mman.h nvme_ioctl.h \ + vduse.h iommufd.h; do cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux" done diff --git a/stubs/meson.build b/stubs/meson.build index cde44972bf..0bf25e6ca5 100644 --- a/stubs/meson.build +++ b/stubs/meson.build @@ -65,4 +65,3 @@ else stub_ss.add(files('qdev.c')) endif stub_ss.add(files('semihost-all.c')) -stub_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_false: files('vfio-user-obj.c')) diff --git a/stubs/migr-blocker.c b/stubs/migr-blocker.c index 5676a2f93c..17a5dbf87b 100644 --- a/stubs/migr-blocker.c +++ b/stubs/migr-blocker.c @@ -1,11 +1,11 @@ #include "qemu/osdep.h" #include "migration/blocker.h" -int migrate_add_blocker(Error *reason, Error **errp) +int migrate_add_blocker(Error **reasonp, Error **errp) { return 0; } -void migrate_del_blocker(Error *reason) +void migrate_del_blocker(Error **reasonp) { } diff --git a/stubs/ramfb.c b/stubs/ramfb.c index 48143f3354..cf64733b10 100644 --- a/stubs/ramfb.c +++ b/stubs/ramfb.c @@ -2,6 +2,8 @@ #include "qapi/error.h" #include "hw/display/ramfb.h" +const VMStateDescription ramfb_vmstate = {}; + void ramfb_display_update(QemuConsole *con, RAMFBState *s) { } diff --git a/system/memory_mapping.c b/system/memory_mapping.c index d7f1d096e0..6f884c5b90 100644 --- a/system/memory_mapping.c +++ b/system/memory_mapping.c @@ -291,7 +291,7 @@ void guest_phys_blocks_append(GuestPhysBlockList *list) memory_listener_unregister(&g.listener); } -static CPUState *find_paging_enabled_cpu(CPUState *start_cpu) +static CPUState *find_paging_enabled_cpu(void) { CPUState *cpu; @@ -304,26 +304,24 @@ static CPUState *find_paging_enabled_cpu(CPUState *start_cpu) return NULL; } -void qemu_get_guest_memory_mapping(MemoryMappingList *list, +bool qemu_get_guest_memory_mapping(MemoryMappingList *list, const GuestPhysBlockList *guest_phys_blocks, Error **errp) { + ERRP_GUARD(); CPUState *cpu, *first_paging_enabled_cpu; GuestPhysBlock *block; ram_addr_t offset, length; - first_paging_enabled_cpu = find_paging_enabled_cpu(first_cpu); + first_paging_enabled_cpu = find_paging_enabled_cpu(); if (first_paging_enabled_cpu) { for (cpu = first_paging_enabled_cpu; cpu != NULL; cpu = CPU_NEXT(cpu)) { - Error *err = NULL; - cpu_get_memory_mapping(cpu, list, &err); - if (err) { - error_propagate(errp, err); - return; + if (!cpu_get_memory_mapping(cpu, list, errp)) { + return false; } } - return; + return true; } /* @@ -335,6 +333,7 @@ void qemu_get_guest_memory_mapping(MemoryMappingList *list, length = block->target_end - block->target_start; create_new_memory_mapping(list, offset, offset, length); } + return true; } void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list, diff --git a/system/vl.c b/system/vl.c index 3100ac01ed..92d29bf521 100644 --- a/system/vl.c +++ b/system/vl.c @@ -727,6 +727,12 @@ static QemuOptsList qemu_smp_opts = { .name = "cpus", .type = QEMU_OPT_NUMBER, }, { + .name = "drawers", + .type = QEMU_OPT_NUMBER, + }, { + .name = "books", + .type = QEMU_OPT_NUMBER, + }, { .name = "sockets", .type = QEMU_OPT_NUMBER, }, { diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c index 326a03153d..c078849403 100644 --- a/target/arm/arm-powerctl.c +++ b/target/arm/arm-powerctl.c @@ -65,60 +65,9 @@ static void arm_set_cpu_on_async_work(CPUState *target_cpu_state, /* Initialize the cpu we are turning on */ cpu_reset(target_cpu_state); + arm_emulate_firmware_reset(target_cpu_state, info->target_el); target_cpu_state->halted = 0; - if (info->target_aa64) { - if ((info->target_el < 3) && arm_feature(&target_cpu->env, - ARM_FEATURE_EL3)) { - /* - * As target mode is AArch64, we need to set lower - * exception level (the requested level 2) to AArch64 - */ - target_cpu->env.cp15.scr_el3 |= SCR_RW; - } - - if ((info->target_el < 2) && arm_feature(&target_cpu->env, - ARM_FEATURE_EL2)) { - /* - * As target mode is AArch64, we need to set lower - * exception level (the requested level 1) to AArch64 - */ - target_cpu->env.cp15.hcr_el2 |= HCR_RW; - } - - target_cpu->env.pstate = aarch64_pstate_mode(info->target_el, true); - } else { - /* We are requested to boot in AArch32 mode */ - static const uint32_t mode_for_el[] = { 0, - ARM_CPU_MODE_SVC, - ARM_CPU_MODE_HYP, - ARM_CPU_MODE_SVC }; - - cpsr_write(&target_cpu->env, mode_for_el[info->target_el], CPSR_M, - CPSRWriteRaw); - } - - if (info->target_el == 3) { - /* Processor is in secure mode */ - target_cpu->env.cp15.scr_el3 &= ~SCR_NS; - } else { - /* Processor is not in secure mode */ - target_cpu->env.cp15.scr_el3 |= SCR_NS; - - /* Set NSACR.{CP11,CP10} so NS can access the FPU */ - target_cpu->env.cp15.nsacr |= 3 << 10; - - /* - * If QEMU is providing the equivalent of EL3 firmware, then we need - * to make sure a CPU targeting EL2 comes out of reset with a - * functional HVC insn. - */ - if (arm_feature(&target_cpu->env, ARM_FEATURE_EL3) - && info->target_el == 2) { - target_cpu->env.cp15.scr_el3 |= SCR_HCE; - } - } - /* We check if the started CPU is now at the correct level */ assert(info->target_el == arm_current_el(&target_cpu->env)); diff --git a/target/arm/common-semi-target.h b/target/arm/common-semi-target.h index 19438ed8cd..da51f2d7f5 100644 --- a/target/arm/common-semi-target.h +++ b/target/arm/common-semi-target.h @@ -10,9 +10,7 @@ #ifndef TARGET_ARM_COMMON_SEMI_TARGET_H #define TARGET_ARM_COMMON_SEMI_TARGET_H -#ifndef CONFIG_USER_ONLY -#include "hw/arm/boot.h" -#endif +#include "target/arm/cpu-qom.h" static inline target_ulong common_semi_arg(CPUState *cs, int argno) { diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h index 514c22ced9..d06c08a734 100644 --- a/target/arm/cpu-qom.h +++ b/target/arm/cpu-qom.h @@ -23,8 +23,6 @@ #include "hw/core/cpu.h" #include "qom/object.h" -struct arm_boot_info; - #define TYPE_ARM_CPU "arm-cpu" OBJECT_DECLARE_CPU_TYPE(ARMCPU, ARMCPUClass, ARM_CPU) diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 6c6c551573..aa4e006f21 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -553,6 +553,101 @@ static void arm_cpu_reset_hold(Object *obj) } } +void arm_emulate_firmware_reset(CPUState *cpustate, int target_el) +{ + ARMCPU *cpu = ARM_CPU(cpustate); + CPUARMState *env = &cpu->env; + bool have_el3 = arm_feature(env, ARM_FEATURE_EL3); + bool have_el2 = arm_feature(env, ARM_FEATURE_EL2); + + /* + * Check we have the EL we're aiming for. If that is the + * highest implemented EL, then cpu_reset has already done + * all the work. + */ + switch (target_el) { + case 3: + assert(have_el3); + return; + case 2: + assert(have_el2); + if (!have_el3) { + return; + } + break; + case 1: + if (!have_el3 && !have_el2) { + return; + } + break; + default: + g_assert_not_reached(); + } + + if (have_el3) { + /* + * Set the EL3 state so code can run at EL2. This should match + * the requirements set by Linux in its booting spec. + */ + if (env->aarch64) { + env->cp15.scr_el3 |= SCR_RW; + if (cpu_isar_feature(aa64_pauth, cpu)) { + env->cp15.scr_el3 |= SCR_API | SCR_APK; + } + if (cpu_isar_feature(aa64_mte, cpu)) { + env->cp15.scr_el3 |= SCR_ATA; + } + if (cpu_isar_feature(aa64_sve, cpu)) { + env->cp15.cptr_el[3] |= R_CPTR_EL3_EZ_MASK; + env->vfp.zcr_el[3] = 0xf; + } + if (cpu_isar_feature(aa64_sme, cpu)) { + env->cp15.cptr_el[3] |= R_CPTR_EL3_ESM_MASK; + env->cp15.scr_el3 |= SCR_ENTP2; + env->vfp.smcr_el[3] = 0xf; + } + if (cpu_isar_feature(aa64_hcx, cpu)) { + env->cp15.scr_el3 |= SCR_HXEN; + } + if (cpu_isar_feature(aa64_fgt, cpu)) { + env->cp15.scr_el3 |= SCR_FGTEN; + } + } + + if (target_el == 2) { + /* If the guest is at EL2 then Linux expects the HVC insn to work */ + env->cp15.scr_el3 |= SCR_HCE; + } + + /* Put CPU into non-secure state */ + env->cp15.scr_el3 |= SCR_NS; + /* Set NSACR.{CP11,CP10} so NS can access the FPU */ + env->cp15.nsacr |= 3 << 10; + } + + if (have_el2 && target_el < 2) { + /* Set EL2 state so code can run at EL1. */ + if (env->aarch64) { + env->cp15.hcr_el2 |= HCR_RW; + } + } + + /* Set the CPU to the desired state */ + if (env->aarch64) { + env->pstate = aarch64_pstate_mode(target_el, true); + } else { + static const uint32_t mode_for_el[] = { + 0, + ARM_CPU_MODE_SVC, + ARM_CPU_MODE_HYP, + ARM_CPU_MODE_SVC, + }; + + cpsr_write(env, mode_for_el[target_el], CPSR_M, CPSRWriteRaw); + } +} + + #if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx, diff --git a/target/arm/cpu.h b/target/arm/cpu.h index a9edfb8353..76d4cef9e3 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -1149,6 +1149,28 @@ int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, int cpuid, DumpState *s); +/** + * arm_emulate_firmware_reset: Emulate firmware CPU reset handling + * @cpu: CPU (which must have been freshly reset) + * @target_el: exception level to put the CPU into + * @secure: whether to put the CPU in secure state + * + * When QEMU is directly running a guest kernel at a lower level than + * EL3 it implicitly emulates some aspects of the guest firmware. + * This includes that on reset we need to configure the parts of the + * CPU corresponding to EL3 so that the real guest code can run at its + * lower exception level. This function does that post-reset CPU setup, + * for when we do direct boot of a guest kernel, and for when we + * emulate PSCI and similar firmware interfaces starting a CPU at a + * lower exception level. + * + * @target_el must be an EL implemented by the CPU between 1 and 3. + * We do not support dropping into a Secure EL other than 3. + * + * It is the responsibility of the caller to call arm_rebuild_hflags(). + */ +void arm_emulate_firmware_reset(CPUState *cpustate, int target_el); + #ifdef TARGET_AARCH64 int aarch64_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); int aarch64_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); diff --git a/target/arm/helper.c b/target/arm/helper.c index 74fbb6e1d7..b29edb26af 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -1283,7 +1283,7 @@ static bool pmevcntr_is_64_bit(CPUARMState *env, int counter) bool hlp = env->cp15.mdcr_el2 & MDCR_HLP; int hpmn = env->cp15.mdcr_el2 & MDCR_HPMN; - if (hpmn != 0 && counter >= hpmn) { + if (counter >= hpmn) { return hlp; } } @@ -2475,22 +2475,7 @@ static CPAccessResult gt_counter_access(CPUARMState *env, int timeridx, if (!extract32(env->cp15.c14_cntkctl, timeridx, 1)) { return CP_ACCESS_TRAP; } - - /* If HCR_EL2.<E2H,TGE> == '10': check CNTHCTL_EL2.EL1PCTEN. */ - if (hcr & HCR_E2H) { - if (timeridx == GTIMER_PHYS && - !extract32(env->cp15.cnthctl_el2, 10, 1)) { - return CP_ACCESS_TRAP_EL2; - } - } else { - /* If HCR_EL2.<E2H> == 0: check CNTHCTL_EL2.EL1PCEN. */ - if (has_el2 && timeridx == GTIMER_PHYS && - !extract32(env->cp15.cnthctl_el2, 1, 1)) { - return CP_ACCESS_TRAP_EL2; - } - } - break; - + /* fall through */ case 1: /* Check CNTHCTL_EL2.EL1PCTEN, which changes location based on E2H. */ if (has_el2 && timeridx == GTIMER_PHYS && diff --git a/target/arm/kvm.c b/target/arm/kvm.c index b66b936a95..7903e2ddde 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -553,24 +553,19 @@ bool write_kvmstate_to_list(ARMCPU *cpu) bool ok = true; for (i = 0; i < cpu->cpreg_array_len; i++) { - struct kvm_one_reg r; uint64_t regidx = cpu->cpreg_indexes[i]; uint32_t v32; int ret; - r.id = regidx; - switch (regidx & KVM_REG_SIZE_MASK) { case KVM_REG_SIZE_U32: - r.addr = (uintptr_t)&v32; - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r); + ret = kvm_get_one_reg(cs, regidx, &v32); if (!ret) { cpu->cpreg_values[i] = v32; } break; case KVM_REG_SIZE_U64: - r.addr = (uintptr_t)(cpu->cpreg_values + i); - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r); + ret = kvm_get_one_reg(cs, regidx, cpu->cpreg_values + i); break; default: g_assert_not_reached(); @@ -589,7 +584,6 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level) bool ok = true; for (i = 0; i < cpu->cpreg_array_len; i++) { - struct kvm_one_reg r; uint64_t regidx = cpu->cpreg_indexes[i]; uint32_t v32; int ret; @@ -598,19 +592,17 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level) continue; } - r.id = regidx; switch (regidx & KVM_REG_SIZE_MASK) { case KVM_REG_SIZE_U32: v32 = cpu->cpreg_values[i]; - r.addr = (uintptr_t)&v32; + ret = kvm_set_one_reg(cs, regidx, &v32); break; case KVM_REG_SIZE_U64: - r.addr = (uintptr_t)(cpu->cpreg_values + i); + ret = kvm_set_one_reg(cs, regidx, cpu->cpreg_values + i); break; default: g_assert_not_reached(); } - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r); if (ret) { /* We might fail for "unknown register" and also for * "you tried to set a register which is constant with @@ -709,17 +701,13 @@ int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu) void kvm_arm_get_virtual_time(CPUState *cs) { ARMCPU *cpu = ARM_CPU(cs); - struct kvm_one_reg reg = { - .id = KVM_REG_ARM_TIMER_CNT, - .addr = (uintptr_t)&cpu->kvm_vtime, - }; int ret; if (cpu->kvm_vtime_dirty) { return; } - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, KVM_REG_ARM_TIMER_CNT, &cpu->kvm_vtime); if (ret) { error_report("Failed to get KVM_REG_ARM_TIMER_CNT"); abort(); @@ -731,17 +719,13 @@ void kvm_arm_get_virtual_time(CPUState *cs) void kvm_arm_put_virtual_time(CPUState *cs) { ARMCPU *cpu = ARM_CPU(cs); - struct kvm_one_reg reg = { - .id = KVM_REG_ARM_TIMER_CNT, - .addr = (uintptr_t)&cpu->kvm_vtime, - }; int ret; if (!cpu->kvm_vtime_dirty) { return; } - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + ret = kvm_set_one_reg(cs, KVM_REG_ARM_TIMER_CNT, &cpu->kvm_vtime); if (ret) { error_report("Failed to set KVM_REG_ARM_TIMER_CNT"); abort(); diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c index 5e95c496bb..4bb68646e4 100644 --- a/target/arm/kvm64.c +++ b/target/arm/kvm64.c @@ -30,7 +30,6 @@ #include "internals.h" #include "hw/acpi/acpi.h" #include "hw/acpi/ghes.h" -#include "hw/arm/virt.h" static bool have_guest_debug; @@ -540,14 +539,10 @@ static int kvm_arm_sve_set_vls(CPUState *cs) { ARMCPU *cpu = ARM_CPU(cs); uint64_t vls[KVM_ARM64_SVE_VLS_WORDS] = { cpu->sve_vq.map }; - struct kvm_one_reg reg = { - .id = KVM_REG_ARM64_SVE_VLS, - .addr = (uint64_t)&vls[0], - }; assert(cpu->sve_max_vq <= KVM_ARM64_SVE_VQ_MAX); - return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + return kvm_set_one_reg(cs, KVM_REG_ARM64_SVE_VLS, &vls[0]); } #define ARM_CPU_ID_MPIDR 3, 0, 0, 0, 5 @@ -726,19 +721,17 @@ static void kvm_inject_arm_sea(CPUState *c) static int kvm_arch_put_fpsimd(CPUState *cs) { CPUARMState *env = &ARM_CPU(cs)->env; - struct kvm_one_reg reg; int i, ret; for (i = 0; i < 32; i++) { uint64_t *q = aa64_vfp_qreg(env, i); #if HOST_BIG_ENDIAN uint64_t fp_val[2] = { q[1], q[0] }; - reg.addr = (uintptr_t)fp_val; + ret = kvm_set_one_reg(cs, AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]), + fp_val); #else - reg.addr = (uintptr_t)q; + ret = kvm_set_one_reg(cs, AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]), q); #endif - reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]); - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); if (ret) { return ret; } @@ -759,14 +752,11 @@ static int kvm_arch_put_sve(CPUState *cs) CPUARMState *env = &cpu->env; uint64_t tmp[ARM_MAX_VQ * 2]; uint64_t *r; - struct kvm_one_reg reg; int n, ret; for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) { r = sve_bswap64(tmp, &env->vfp.zregs[n].d[0], cpu->sve_max_vq * 2); - reg.addr = (uintptr_t)r; - reg.id = KVM_REG_ARM64_SVE_ZREG(n, 0); - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + ret = kvm_set_one_reg(cs, KVM_REG_ARM64_SVE_ZREG(n, 0), r); if (ret) { return ret; } @@ -775,9 +765,7 @@ static int kvm_arch_put_sve(CPUState *cs) for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) { r = sve_bswap64(tmp, r = &env->vfp.pregs[n].p[0], DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); - reg.addr = (uintptr_t)r; - reg.id = KVM_REG_ARM64_SVE_PREG(n, 0); - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + ret = kvm_set_one_reg(cs, KVM_REG_ARM64_SVE_PREG(n, 0), r); if (ret) { return ret; } @@ -785,9 +773,7 @@ static int kvm_arch_put_sve(CPUState *cs) r = sve_bswap64(tmp, &env->vfp.pregs[FFR_PRED_NUM].p[0], DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); - reg.addr = (uintptr_t)r; - reg.id = KVM_REG_ARM64_SVE_FFR(0); - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + ret = kvm_set_one_reg(cs, KVM_REG_ARM64_SVE_FFR(0), r); if (ret) { return ret; } @@ -797,7 +783,6 @@ static int kvm_arch_put_sve(CPUState *cs) int kvm_arch_put_registers(CPUState *cs, int level) { - struct kvm_one_reg reg; uint64_t val; uint32_t fpr; int i, ret; @@ -814,9 +799,8 @@ int kvm_arch_put_registers(CPUState *cs, int level) } for (i = 0; i < 31; i++) { - reg.id = AARCH64_CORE_REG(regs.regs[i]); - reg.addr = (uintptr_t) &env->xregs[i]; - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.regs[i]), + &env->xregs[i]); if (ret) { return ret; } @@ -827,16 +811,12 @@ int kvm_arch_put_registers(CPUState *cs, int level) */ aarch64_save_sp(env, 1); - reg.id = AARCH64_CORE_REG(regs.sp); - reg.addr = (uintptr_t) &env->sp_el[0]; - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.sp), &env->sp_el[0]); if (ret) { return ret; } - reg.id = AARCH64_CORE_REG(sp_el1); - reg.addr = (uintptr_t) &env->sp_el[1]; - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(sp_el1), &env->sp_el[1]); if (ret) { return ret; } @@ -847,23 +827,17 @@ int kvm_arch_put_registers(CPUState *cs, int level) } else { val = cpsr_read(env); } - reg.id = AARCH64_CORE_REG(regs.pstate); - reg.addr = (uintptr_t) &val; - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.pstate), &val); if (ret) { return ret; } - reg.id = AARCH64_CORE_REG(regs.pc); - reg.addr = (uintptr_t) &env->pc; - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.pc), &env->pc); if (ret) { return ret; } - reg.id = AARCH64_CORE_REG(elr_el1); - reg.addr = (uintptr_t) &env->elr_el[1]; - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(elr_el1), &env->elr_el[1]); if (ret) { return ret; } @@ -882,9 +856,8 @@ int kvm_arch_put_registers(CPUState *cs, int level) /* KVM 0-4 map to QEMU banks 1-5 */ for (i = 0; i < KVM_NR_SPSR; i++) { - reg.id = AARCH64_CORE_REG(spsr[i]); - reg.addr = (uintptr_t) &env->banked_spsr[i + 1]; - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(spsr[i]), + &env->banked_spsr[i + 1]); if (ret) { return ret; } @@ -899,18 +872,14 @@ int kvm_arch_put_registers(CPUState *cs, int level) return ret; } - reg.addr = (uintptr_t)(&fpr); fpr = vfp_get_fpsr(env); - reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr); - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + ret = kvm_set_one_reg(cs, AARCH64_SIMD_CTRL_REG(fp_regs.fpsr), &fpr); if (ret) { return ret; } - reg.addr = (uintptr_t)(&fpr); fpr = vfp_get_fpcr(env); - reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr); - ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + ret = kvm_set_one_reg(cs, AARCH64_SIMD_CTRL_REG(fp_regs.fpcr), &fpr); if (ret) { return ret; } @@ -939,14 +908,11 @@ int kvm_arch_put_registers(CPUState *cs, int level) static int kvm_arch_get_fpsimd(CPUState *cs) { CPUARMState *env = &ARM_CPU(cs)->env; - struct kvm_one_reg reg; int i, ret; for (i = 0; i < 32; i++) { uint64_t *q = aa64_vfp_qreg(env, i); - reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]); - reg.addr = (uintptr_t)q; - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]), q); if (ret) { return ret; } else { @@ -970,15 +936,12 @@ static int kvm_arch_get_sve(CPUState *cs) { ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; - struct kvm_one_reg reg; uint64_t *r; int n, ret; for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) { r = &env->vfp.zregs[n].d[0]; - reg.addr = (uintptr_t)r; - reg.id = KVM_REG_ARM64_SVE_ZREG(n, 0); - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, KVM_REG_ARM64_SVE_ZREG(n, 0), r); if (ret) { return ret; } @@ -987,9 +950,7 @@ static int kvm_arch_get_sve(CPUState *cs) for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) { r = &env->vfp.pregs[n].p[0]; - reg.addr = (uintptr_t)r; - reg.id = KVM_REG_ARM64_SVE_PREG(n, 0); - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, KVM_REG_ARM64_SVE_PREG(n, 0), r); if (ret) { return ret; } @@ -997,9 +958,7 @@ static int kvm_arch_get_sve(CPUState *cs) } r = &env->vfp.pregs[FFR_PRED_NUM].p[0]; - reg.addr = (uintptr_t)r; - reg.id = KVM_REG_ARM64_SVE_FFR(0); - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, KVM_REG_ARM64_SVE_FFR(0), r); if (ret) { return ret; } @@ -1010,7 +969,6 @@ static int kvm_arch_get_sve(CPUState *cs) int kvm_arch_get_registers(CPUState *cs) { - struct kvm_one_reg reg; uint64_t val; unsigned int el; uint32_t fpr; @@ -1020,31 +978,24 @@ int kvm_arch_get_registers(CPUState *cs) CPUARMState *env = &cpu->env; for (i = 0; i < 31; i++) { - reg.id = AARCH64_CORE_REG(regs.regs[i]); - reg.addr = (uintptr_t) &env->xregs[i]; - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.regs[i]), + &env->xregs[i]); if (ret) { return ret; } } - reg.id = AARCH64_CORE_REG(regs.sp); - reg.addr = (uintptr_t) &env->sp_el[0]; - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.sp), &env->sp_el[0]); if (ret) { return ret; } - reg.id = AARCH64_CORE_REG(sp_el1); - reg.addr = (uintptr_t) &env->sp_el[1]; - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(sp_el1), &env->sp_el[1]); if (ret) { return ret; } - reg.id = AARCH64_CORE_REG(regs.pstate); - reg.addr = (uintptr_t) &val; - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.pstate), &val); if (ret) { return ret; } @@ -1061,9 +1012,7 @@ int kvm_arch_get_registers(CPUState *cs) */ aarch64_restore_sp(env, 1); - reg.id = AARCH64_CORE_REG(regs.pc); - reg.addr = (uintptr_t) &env->pc; - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.pc), &env->pc); if (ret) { return ret; } @@ -1077,9 +1026,7 @@ int kvm_arch_get_registers(CPUState *cs) aarch64_sync_64_to_32(env); } - reg.id = AARCH64_CORE_REG(elr_el1); - reg.addr = (uintptr_t) &env->elr_el[1]; - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(elr_el1), &env->elr_el[1]); if (ret) { return ret; } @@ -1089,9 +1036,8 @@ int kvm_arch_get_registers(CPUState *cs) * KVM SPSRs 0-4 map to QEMU banks 1-5 */ for (i = 0; i < KVM_NR_SPSR; i++) { - reg.id = AARCH64_CORE_REG(spsr[i]); - reg.addr = (uintptr_t) &env->banked_spsr[i + 1]; - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(spsr[i]), + &env->banked_spsr[i + 1]); if (ret) { return ret; } @@ -1112,17 +1058,13 @@ int kvm_arch_get_registers(CPUState *cs) return ret; } - reg.addr = (uintptr_t)(&fpr); - reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr); - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, AARCH64_SIMD_CTRL_REG(fp_regs.fpsr), &fpr); if (ret) { return ret; } vfp_set_fpsr(env, fpr); - reg.addr = (uintptr_t)(&fpr); - reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr); - ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + ret = kvm_get_one_reg(cs, AARCH64_SIMD_CTRL_REG(fp_regs.fpcr), &fpr); if (ret) { return ret; } diff --git a/target/arm/tcg/cpu32.c b/target/arm/tcg/cpu32.c index 1f918ff537..0d5d8e307d 100644 --- a/target/arm/tcg/cpu32.c +++ b/target/arm/tcg/cpu32.c @@ -89,6 +89,10 @@ void aa32_max_features(ARMCPU *cpu) t = FIELD_DP32(t, ID_DFR0, COPSDBG, 9); /* FEAT_Debugv8p4 */ t = FIELD_DP32(t, ID_DFR0, PERFMON, 6); /* FEAT_PMUv3p5 */ cpu->isar.id_dfr0 = t; + + t = cpu->isar.id_dfr1; + t = FIELD_DP32(t, ID_DFR1, HPMN0, 1); /* FEAT_HPMN0 */ + cpu->isar.id_dfr1 = t; } /* CPU models. These are not needed for the AArch64 linux-user build. */ diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c index 68928e5127..d978aa5f7a 100644 --- a/target/arm/tcg/cpu64.c +++ b/target/arm/tcg/cpu64.c @@ -1109,6 +1109,7 @@ void aarch64_max_tcg_initfn(Object *obj) t = cpu->isar.id_aa64dfr0; t = FIELD_DP64(t, ID_AA64DFR0, DEBUGVER, 9); /* FEAT_Debugv8p4 */ t = FIELD_DP64(t, ID_AA64DFR0, PMUVER, 6); /* FEAT_PMUv3p5 */ + t = FIELD_DP64(t, ID_AA64DFR0, HPMN0, 1); /* FEAT_HPMN0 */ cpu->isar.id_aa64dfr0 = t; t = cpu->isar.id_aa64smfr0; diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c index 48927fbb8c..b3660173d1 100644 --- a/target/arm/tcg/translate.c +++ b/target/arm/tcg/translate.c @@ -7882,7 +7882,7 @@ static void op_addr_block_post(DisasContext *s, arg_ldst_block *a, } } -static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n) +static bool op_stm(DisasContext *s, arg_ldst_block *a) { int i, j, n, list, mem_idx; bool user = a->u; @@ -7899,7 +7899,14 @@ static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n) list = a->list; n = ctpop16(list); - if (n < min_n || a->rn == 15) { + /* + * This is UNPREDICTABLE for n < 1 in all encodings, and we choose + * to UNDEF. In the T32 STM encoding n == 1 is also UNPREDICTABLE, + * but hardware treats it like the A32 version and implements the + * single-register-store, and some in-the-wild (buggy) software + * assumes that, so we don't UNDEF on that case. + */ + if (n < 1 || a->rn == 15) { unallocated_encoding(s); return true; } @@ -7935,8 +7942,7 @@ static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n) static bool trans_STM(DisasContext *s, arg_ldst_block *a) { - /* BitCount(list) < 1 is UNPREDICTABLE */ - return op_stm(s, a, 1); + return op_stm(s, a); } static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a) @@ -7946,11 +7952,10 @@ static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a) unallocated_encoding(s); return true; } - /* BitCount(list) < 2 is UNPREDICTABLE */ - return op_stm(s, a, 2); + return op_stm(s, a); } -static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n) +static bool do_ldm(DisasContext *s, arg_ldst_block *a) { int i, j, n, list, mem_idx; bool loaded_base; @@ -7979,7 +7984,14 @@ static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n) list = a->list; n = ctpop16(list); - if (n < min_n || a->rn == 15) { + /* + * This is UNPREDICTABLE for n < 1 in all encodings, and we choose + * to UNDEF. In the T32 LDM encoding n == 1 is also UNPREDICTABLE, + * but hardware treats it like the A32 version and implements the + * single-register-load, and some in-the-wild (buggy) software + * assumes that, so we don't UNDEF on that case. + */ + if (n < 1 || a->rn == 15) { unallocated_encoding(s); return true; } @@ -8045,8 +8057,7 @@ static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a) unallocated_encoding(s); return true; } - /* BitCount(list) < 1 is UNPREDICTABLE */ - return do_ldm(s, a, 1); + return do_ldm(s, a); } static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a) @@ -8056,16 +8067,14 @@ static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a) unallocated_encoding(s); return true; } - /* BitCount(list) < 2 is UNPREDICTABLE */ - return do_ldm(s, a, 2); + return do_ldm(s, a); } static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a) { /* Writeback is conditional on the base register not being loaded. */ a->w = !(a->list & (1 << a->rn)); - /* BitCount(list) < 1 is UNPREDICTABLE */ - return do_ldm(s, a, 1); + return do_ldm(s, a); } static bool trans_CLRM(DisasContext *s, arg_CLRM *a) diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py index dce1b852a7..0da65d6dd6 100755 --- a/target/hexagon/hex_common.py +++ b/target/hexagon/hex_common.py @@ -45,7 +45,7 @@ regre = re.compile(r"((?<!DUP)[MNORCPQXSGVZA])([stuvwxyzdefg]+)([.]?[LlHh]?)(\d+ immre = re.compile(r"[#]([rRsSuUm])(\d+)(?:[:](\d+))?") reg_or_immre = re.compile( r"(((?<!DUP)[MNRCOPQXSGVZA])([stuvwxyzdefg]+)" - + "([.]?[LlHh]?)(\d+S?))|([#]([rRsSuUm])(\d+)[:]?(\d+)?)" + r"([.]?[LlHh]?)(\d+S?))|([#]([rRsSuUm])(\d+)[:]?(\d+)?)" ) relimmre = re.compile(r"[#]([rR])(\d+)(?:[:](\d+))?") absimmre = re.compile(r"[#]([sSuUm])(\d+)(?:[:](\d+))?") @@ -337,7 +337,7 @@ def read_attribs_file(name): def read_overrides_file(name): - overridere = re.compile("#define fGEN_TCG_([A-Za-z0-9_]+)\(.*") + overridere = re.compile(r"#define fGEN_TCG_([A-Za-z0-9_]+)\(.*") for line in open(name, "rt").readlines(): if not overridere.match(line): continue diff --git a/target/i386/arch_memory_mapping.c b/target/i386/arch_memory_mapping.c index 271cb5e41b..d1ff659128 100644 --- a/target/i386/arch_memory_mapping.c +++ b/target/i386/arch_memory_mapping.c @@ -266,7 +266,7 @@ static void walk_pml5e(MemoryMappingList *list, AddressSpace *as, } #endif -void x86_cpu_get_memory_mapping(CPUState *cs, MemoryMappingList *list, +bool x86_cpu_get_memory_mapping(CPUState *cs, MemoryMappingList *list, Error **errp) { X86CPU *cpu = X86_CPU(cs); @@ -275,7 +275,7 @@ void x86_cpu_get_memory_mapping(CPUState *cs, MemoryMappingList *list, if (!cpu_paging_enabled(cs)) { /* paging is disabled */ - return; + return true; } a20_mask = x86_get_a20_mask(env); @@ -310,5 +310,7 @@ void x86_cpu_get_memory_mapping(CPUState *cs, MemoryMappingList *list, pse = !!(env->cr[4] & CR4_PSE_MASK); walk_pde2(list, cs->as, pde_addr, a20_mask, pse); } + + return true; } diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 3aab05ddad..bdca901dfa 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -778,6 +778,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { }, .cpuid = {.eax = 1, .reg = R_EDX, }, .tcg_features = TCG_FEATURES, + .no_autoenable_flags = CPUID_HT, }, [FEAT_1_ECX] = { .type = CPUID_FEATURE_WORD, diff --git a/target/i386/cpu.h b/target/i386/cpu.h index e1875466b9..471e71dbc5 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -2055,7 +2055,7 @@ int x86_cpu_write_elf64_qemunote(WriteCoreDumpFunction f, CPUState *cpu, int x86_cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cpu, DumpState *s); -void x86_cpu_get_memory_mapping(CPUState *cpu, MemoryMappingList *list, +bool x86_cpu_get_memory_mapping(CPUState *cpu, MemoryMappingList *list, Error **errp); void x86_cpu_dump_state(CPUState *cs, FILE *f, int flags); diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index f6c7f7e268..e7c054cc16 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -373,6 +373,8 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, if (function == 1 && reg == R_EDX) { /* KVM before 2.6.30 misreports the following features */ ret |= CPUID_MTRR | CPUID_PAT | CPUID_MCE | CPUID_MCA; + /* KVM never reports CPUID_HT but QEMU can support when vcpus > 1 */ + ret |= CPUID_HT; } else if (function == 1 && reg == R_ECX) { /* We can set the hypervisor flag, even if KVM does not return it on * GET_SUPPORTED_CPUID @@ -1601,7 +1603,7 @@ static int hyperv_init_vcpu(X86CPU *cpu) error_setg(&hv_passthrough_mig_blocker, "'hv-passthrough' CPU flag prevents migration, use explicit" " set of hv-* flags instead"); - ret = migrate_add_blocker(hv_passthrough_mig_blocker, &local_err); + ret = migrate_add_blocker(&hv_passthrough_mig_blocker, &local_err); if (ret < 0) { error_report_err(local_err); return ret; @@ -1615,7 +1617,7 @@ static int hyperv_init_vcpu(X86CPU *cpu) " use explicit 'hv-no-nonarch-coresharing=on' instead (but" " make sure SMT is disabled and/or that vCPUs are properly" " pinned)"); - ret = migrate_add_blocker(hv_no_nonarch_cs_mig_blocker, &local_err); + ret = migrate_add_blocker(&hv_no_nonarch_cs_mig_blocker, &local_err); if (ret < 0) { error_report_err(local_err); return ret; @@ -2211,7 +2213,7 @@ int kvm_arch_init_vcpu(CPUState *cs) error_setg(&invtsc_mig_blocker, "State blocked by non-migratable CPU device" " (invtsc flag)"); - r = migrate_add_blocker(invtsc_mig_blocker, &local_err); + r = migrate_add_blocker(&invtsc_mig_blocker, &local_err); if (r < 0) { error_report_err(local_err); return r; @@ -2269,7 +2271,7 @@ int kvm_arch_init_vcpu(CPUState *cs) return 0; fail: - migrate_del_blocker(invtsc_mig_blocker); + migrate_del_blocker(&invtsc_mig_blocker); return r; } diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c index fb769868f2..7d752bc5e0 100644 --- a/target/i386/nvmm/nvmm-all.c +++ b/target/i386/nvmm/nvmm-all.c @@ -929,9 +929,8 @@ nvmm_init_vcpu(CPUState *cpu) error_setg(&nvmm_migration_blocker, "NVMM: Migration not supported"); - if (migrate_add_blocker(nvmm_migration_blocker, &local_error) < 0) { + if (migrate_add_blocker(&nvmm_migration_blocker, &local_error) < 0) { error_report_err(local_error); - error_free(nvmm_migration_blocker); return -EINVAL; } } diff --git a/target/i386/sev.c b/target/i386/sev.c index fe2144c038..9a71246682 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -891,7 +891,7 @@ sev_launch_finish(SevGuestState *sev) /* add migration blocker */ error_setg(&sev_mig_blocker, "SEV: Migration is not implemented"); - migrate_add_blocker(sev_mig_blocker, &error_fatal); + migrate_add_blocker(&sev_mig_blocker, &error_fatal); } static void diff --git a/target/i386/svm.h b/target/i386/svm.h index f9a785489d..1bd7844730 100644 --- a/target/i386/svm.h +++ b/target/i386/svm.h @@ -132,6 +132,7 @@ /* only included in documentation, maybe wrong */ #define SVM_EXIT_MONITOR 0x08a #define SVM_EXIT_MWAIT 0x08b +#define SVM_EXIT_XSETBV 0x08d #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_ERR -1 diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index d2061ec44a..4f6f9fa7e5 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -5916,6 +5916,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) | PREFIX_REPZ | PREFIX_REPNZ))) { goto illegal_op; } + gen_svm_check_intercept(s, SVM_EXIT_XSETBV); if (!check_cpl0(s)) { break; } diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c index df3aba2642..d29ba916a0 100644 --- a/target/i386/whpx/whpx-all.c +++ b/target/i386/whpx/whpx-all.c @@ -2160,9 +2160,8 @@ int whpx_init_vcpu(CPUState *cpu) "State blocked due to non-migratable CPUID feature support," "dirty memory tracking support, and XSAVE/XRSTOR support"); - if (migrate_add_blocker(whpx_migration_blocker, &local_error) < 0) { + if (migrate_add_blocker(&whpx_migration_blocker, &local_error) < 0) { error_report_err(local_error); - error_free(whpx_migration_blocker); ret = -EINVAL; goto error; } diff --git a/target/mips/cpu.h b/target/mips/cpu.h index 67f8e8b988..5fddceff3a 100644 --- a/target/mips/cpu.h +++ b/target/mips/cpu.h @@ -1345,11 +1345,10 @@ uint64_t cpu_mips_phys_to_kseg1(void *opaque, uint64_t addr); #if !defined(CONFIG_USER_ONLY) -/* mips_int.c */ +/* HW declaration specific to the MIPS target */ void cpu_mips_soft_irq(CPUMIPSState *env, int irq, int level); - -/* mips_itu.c */ -void itc_reconfigure(struct MIPSITUState *tag); +void cpu_mips_irq_init_cpu(MIPSCPU *cpu); +void cpu_mips_clock_init(MIPSCPU *cpu); #endif /* !CONFIG_USER_ONLY */ diff --git a/target/mips/sysemu/cp0_timer.c b/target/mips/sysemu/cp0_timer.c index 9d2bcb0dea..62de502caa 100644 --- a/target/mips/sysemu/cp0_timer.c +++ b/target/mips/sysemu/cp0_timer.c @@ -22,7 +22,6 @@ #include "qemu/osdep.h" #include "hw/irq.h" -#include "hw/mips/cpudevs.h" #include "qemu/timer.h" #include "sysemu/kvm.h" #include "internal.h" diff --git a/target/mips/tcg/sysemu/cp0_helper.c b/target/mips/tcg/sysemu/cp0_helper.c index 5da1124589..d349548743 100644 --- a/target/mips/tcg/sysemu/cp0_helper.c +++ b/target/mips/tcg/sysemu/cp0_helper.c @@ -28,6 +28,7 @@ #include "qemu/host-utils.h" #include "exec/helper-proto.h" #include "exec/exec-all.h" +#include "hw/misc/mips_itu.h" /* SMP helpers. */ diff --git a/target/mips/tcg/sysemu/tlb_helper.c b/target/mips/tcg/sysemu/tlb_helper.c index 7dbc2e24c4..4ede904800 100644 --- a/target/mips/tcg/sysemu/tlb_helper.c +++ b/target/mips/tcg/sysemu/tlb_helper.c @@ -24,7 +24,6 @@ #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "exec/log.h" -#include "hw/mips/cpudevs.h" #include "exec/helper-proto.h" /* TLB management */ diff --git a/target/s390x/cpu-sysemu.c b/target/s390x/cpu-sysemu.c index 8112561e5e..1cd30c1d84 100644 --- a/target/s390x/cpu-sysemu.c +++ b/target/s390x/cpu-sysemu.c @@ -307,3 +307,16 @@ void s390_do_cpu_set_diag318(CPUState *cs, run_on_cpu_data arg) kvm_s390_set_diag318(cs, arg.host_ulong); } } + +void s390_cpu_topology_set_changed(bool changed) +{ + int ret; + + if (kvm_enabled()) { + ret = kvm_s390_topology_set_mtcr(changed); + if (ret) { + error_report("Failed to set Modified Topology Change Report: %s", + strerror(-ret)); + } + } +} diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c index 6093ab0a12..6acfa1c91b 100644 --- a/target/s390x/cpu.c +++ b/target/s390x/cpu.c @@ -31,12 +31,14 @@ #include "qapi/qapi-types-machine.h" #include "sysemu/hw_accel.h" #include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" #include "fpu/softfloat-helpers.h" #include "disas/capstone.h" #include "sysemu/tcg.h" #ifndef CONFIG_USER_ONLY #include "sysemu/reset.h" #endif +#include "hw/s390x/cpu-topology.h" #define CR0_RESET 0xE0UL #define CR14_RESET 0xC2000000UL; @@ -145,6 +147,14 @@ static void s390_query_cpu_fast(CPUState *cpu, CpuInfoFast *value) S390CPU *s390_cpu = S390_CPU(cpu); value->u.s390x.cpu_state = s390_cpu->env.cpu_state; +#if !defined(CONFIG_USER_ONLY) + if (s390_has_topology()) { + value->u.s390x.has_dedicated = true; + value->u.s390x.dedicated = s390_cpu->env.dedicated; + value->u.s390x.has_entitlement = true; + value->u.s390x.entitlement = s390_cpu->env.entitlement; + } +#endif } /* S390CPUClass::reset() */ @@ -290,6 +300,12 @@ static const gchar *s390_gdb_arch_name(CPUState *cs) static Property s390x_cpu_properties[] = { #if !defined(CONFIG_USER_ONLY) DEFINE_PROP_UINT32("core-id", S390CPU, env.core_id, 0), + DEFINE_PROP_INT32("socket-id", S390CPU, env.socket_id, -1), + DEFINE_PROP_INT32("book-id", S390CPU, env.book_id, -1), + DEFINE_PROP_INT32("drawer-id", S390CPU, env.drawer_id, -1), + DEFINE_PROP_BOOL("dedicated", S390CPU, env.dedicated, false), + DEFINE_PROP_CPUS390ENTITLEMENT("entitlement", S390CPU, env.entitlement, + S390_CPU_ENTITLEMENT_AUTO), #endif DEFINE_PROP_END_OF_LIST() }; diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h index 7bea7075e1..40c5cedd0e 100644 --- a/target/s390x/cpu.h +++ b/target/s390x/cpu.h @@ -30,6 +30,7 @@ #include "exec/cpu-defs.h" #include "qemu/cpu-float.h" #include "tcg/tcg_s390x.h" +#include "qapi/qapi-types-machine-common.h" #define ELF_MACHINE_UNAME "S390X" @@ -132,6 +133,11 @@ struct CPUArchState { #if !defined(CONFIG_USER_ONLY) uint32_t core_id; /* PoP "CPU address", same as cpu_index */ + int32_t socket_id; + int32_t book_id; + int32_t drawer_id; + bool dedicated; + CpuS390Entitlement entitlement; /* Used only for vertical polarization */ uint64_t cpuid; #endif @@ -564,6 +570,29 @@ typedef struct SysIB_322 { } SysIB_322; QEMU_BUILD_BUG_ON(sizeof(SysIB_322) != 4096); +/* + * Topology Magnitude fields (MAG) indicates the maximum number of + * topology list entries (TLE) at the corresponding nesting level. + */ +#define S390_TOPOLOGY_MAG 6 +#define S390_TOPOLOGY_MAG6 0 +#define S390_TOPOLOGY_MAG5 1 +#define S390_TOPOLOGY_MAG4 2 +#define S390_TOPOLOGY_MAG3 3 +#define S390_TOPOLOGY_MAG2 4 +#define S390_TOPOLOGY_MAG1 5 +/* Configuration topology */ +typedef struct SysIB_151x { + uint8_t reserved0[2]; + uint16_t length; + uint8_t mag[S390_TOPOLOGY_MAG]; + uint8_t reserved1; + uint8_t mnest; + uint32_t reserved2; + char tle[]; +} SysIB_151x; +QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16); + typedef union SysIB { SysIB_111 sysib_111; SysIB_121 sysib_121; @@ -571,9 +600,62 @@ typedef union SysIB { SysIB_221 sysib_221; SysIB_222 sysib_222; SysIB_322 sysib_322; + SysIB_151x sysib_151x; } SysIB; QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096); +/* + * CPU Topology List provided by STSI with fc=15 provides a list + * of two different Topology List Entries (TLE) types to specify + * the topology hierarchy. + * + * - Container Topology List Entry + * Defines a container to contain other Topology List Entries + * of any type, nested containers or CPU. + * - CPU Topology List Entry + * Specifies the CPUs position, type, entitlement and polarization + * of the CPUs contained in the last container TLE. + * + * There can be theoretically up to five levels of containers, QEMU + * uses only three levels, the drawer's, book's and socket's level. + * + * A container with a nesting level (NL) greater than 1 can only + * contain another container of nesting level NL-1. + * + * A container of nesting level 1 (socket), contains as many CPU TLE + * as needed to describe the position and qualities of all CPUs inside + * the container. + * The qualities of a CPU are polarization, entitlement and type. + * + * The CPU TLE defines the position of the CPUs of identical qualities + * using a 64bits mask which first bit has its offset defined by + * the CPU address origin field of the CPU TLE like in: + * CPU address = origin * 64 + bit position within the mask + */ +/* Container type Topology List Entry */ +typedef struct SYSIBContainerListEntry { + uint8_t nl; + uint8_t reserved[6]; + uint8_t id; +} SYSIBContainerListEntry; +QEMU_BUILD_BUG_ON(sizeof(SYSIBContainerListEntry) != 8); + +/* CPU type Topology List Entry */ +typedef struct SysIBCPUListEntry { + uint8_t nl; + uint8_t reserved0[3]; +#define SYSIB_TLE_POLARITY_MASK 0x03 +#define SYSIB_TLE_DEDICATED 0x04 + uint8_t flags; + uint8_t type; + uint16_t origin; + uint64_t mask; +} SysIBCPUListEntry; +QEMU_BUILD_BUG_ON(sizeof(SysIBCPUListEntry) != 16); + +void insert_stsi_15_1_x(S390CPU *cpu, int sel2, uint64_t addr, uint8_t ar, uintptr_t ra); +void s390_cpu_topology_set_changed(bool changed); + /* MMU defines */ #define ASCE_ORIGIN (~0xfffULL) /* segment table origin */ #define ASCE_SUBSPACE 0x200 /* subspace group control */ diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c index 98f14c09c2..4dead48650 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -255,6 +255,7 @@ bool s390_has_feat(S390Feat feat) case S390_FEAT_SIE_CMMA: case S390_FEAT_SIE_PFMFI: case S390_FEAT_SIE_IBS: + case S390_FEAT_CONFIGURATION_TOPOLOGY: return false; break; default: diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c index bc5c56a305..0f0e784b2a 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c @@ -86,6 +86,7 @@ #define PRIV_B9_EQBS 0x9c #define PRIV_B9_CLP 0xa0 +#define PRIV_B9_PTF 0xa2 #define PRIV_B9_PCISTG 0xd0 #define PRIV_B9_PCILG 0xd2 #define PRIV_B9_RPCIT 0xd3 @@ -138,7 +139,6 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = { KVM_CAP_LAST_INFO }; -static int cap_sync_regs; static int cap_async_pf; static int cap_mem_op; static int cap_mem_op_extension; @@ -337,21 +337,28 @@ int kvm_arch_get_default_type(MachineState *ms) int kvm_arch_init(MachineState *ms, KVMState *s) { + int required_caps[] = { + KVM_CAP_DEVICE_CTRL, + KVM_CAP_SYNC_REGS, + }; + + for (int i = 0; i < ARRAY_SIZE(required_caps); i++) { + if (!kvm_check_extension(s, required_caps[i])) { + error_report("KVM is missing capability #%d - " + "please use kernel 3.15 or newer", required_caps[i]); + return -1; + } + } + object_class_foreach(ccw_machine_class_foreach, TYPE_S390_CCW_MACHINE, false, NULL); - if (!kvm_check_extension(kvm_state, KVM_CAP_DEVICE_CTRL)) { - error_report("KVM is missing capability KVM_CAP_DEVICE_CTRL - " - "please use kernel 3.15 or newer"); - return -1; - } if (!kvm_check_extension(s, KVM_CAP_S390_COW)) { error_report("KVM is missing capability KVM_CAP_S390_COW - " "unsupported environment"); return -1; } - cap_sync_regs = kvm_check_extension(s, KVM_CAP_SYNC_REGS); cap_async_pf = kvm_check_extension(s, KVM_CAP_ASYNC_PF); cap_mem_op = kvm_check_extension(s, KVM_CAP_S390_MEM_OP); cap_mem_op_extension = kvm_check_extension(s, KVM_CAP_S390_MEM_OP_EXTENSION); @@ -365,6 +372,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) kvm_vm_enable_cap(s, KVM_CAP_S390_USER_SIGP, 0); kvm_vm_enable_cap(s, KVM_CAP_S390_VECTOR_REGISTERS, 0); kvm_vm_enable_cap(s, KVM_CAP_S390_USER_STSI, 0); + kvm_vm_enable_cap(s, KVM_CAP_S390_CPU_TOPOLOGY, 0); if (ri_allowed()) { if (kvm_vm_enable_cap(s, KVM_CAP_S390_RI, 0) == 0) { cap_ri = 1; @@ -458,37 +466,28 @@ void kvm_s390_reset_vcpu_normal(S390CPU *cpu) static int can_sync_regs(CPUState *cs, int regs) { - return cap_sync_regs && (cs->kvm_run->kvm_valid_regs & regs) == regs; + return (cs->kvm_run->kvm_valid_regs & regs) == regs; } +#define KVM_SYNC_REQUIRED_REGS (KVM_SYNC_GPRS | KVM_SYNC_ACRS | \ + KVM_SYNC_CRS | KVM_SYNC_PREFIX) + int kvm_arch_put_registers(CPUState *cs, int level) { S390CPU *cpu = S390_CPU(cs); CPUS390XState *env = &cpu->env; - struct kvm_sregs sregs; - struct kvm_regs regs; struct kvm_fpu fpu = {}; int r; int i; + g_assert(can_sync_regs(cs, KVM_SYNC_REQUIRED_REGS)); + /* always save the PSW and the GPRS*/ cs->kvm_run->psw_addr = env->psw.addr; cs->kvm_run->psw_mask = env->psw.mask; - if (can_sync_regs(cs, KVM_SYNC_GPRS)) { - for (i = 0; i < 16; i++) { - cs->kvm_run->s.regs.gprs[i] = env->regs[i]; - cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_GPRS; - } - } else { - for (i = 0; i < 16; i++) { - regs.gprs[i] = env->regs[i]; - } - r = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s); - if (r < 0) { - return r; - } - } + memcpy(cs->kvm_run->s.regs.gprs, env->regs, sizeof(cs->kvm_run->s.regs.gprs)); + cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_GPRS; if (can_sync_regs(cs, KVM_SYNC_VRS)) { for (i = 0; i < 32; i++) { @@ -521,6 +520,15 @@ int kvm_arch_put_registers(CPUState *cs, int level) return 0; } + /* + * Access registers, control registers and the prefix - these are + * always available via kvm_sync_regs in the kernels that we support + */ + memcpy(cs->kvm_run->s.regs.acrs, env->aregs, sizeof(cs->kvm_run->s.regs.acrs)); + memcpy(cs->kvm_run->s.regs.crs, env->cregs, sizeof(cs->kvm_run->s.regs.crs)); + cs->kvm_run->s.regs.prefix = env->psa; + cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_ACRS | KVM_SYNC_CRS | KVM_SYNC_PREFIX; + if (can_sync_regs(cs, KVM_SYNC_ARCH0)) { cs->kvm_run->s.regs.cputm = env->cputm; cs->kvm_run->s.regs.ckc = env->ckc; @@ -567,25 +575,6 @@ int kvm_arch_put_registers(CPUState *cs, int level) } } - /* access registers and control registers*/ - if (can_sync_regs(cs, KVM_SYNC_ACRS | KVM_SYNC_CRS)) { - for (i = 0; i < 16; i++) { - cs->kvm_run->s.regs.acrs[i] = env->aregs[i]; - cs->kvm_run->s.regs.crs[i] = env->cregs[i]; - } - cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_ACRS; - cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_CRS; - } else { - for (i = 0; i < 16; i++) { - sregs.acrs[i] = env->aregs[i]; - sregs.crs[i] = env->cregs[i]; - } - r = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs); - if (r < 0) { - return r; - } - } - if (can_sync_regs(cs, KVM_SYNC_GSCB)) { memcpy(cs->kvm_run->s.regs.gscb, env->gscb, 32); cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_GSCB; @@ -607,13 +596,6 @@ int kvm_arch_put_registers(CPUState *cs, int level) cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_DIAG318; } - /* Finally the prefix */ - if (can_sync_regs(cs, KVM_SYNC_PREFIX)) { - cs->kvm_run->s.regs.prefix = env->psa; - cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_PREFIX; - } else { - /* prefix is only supported via sync regs */ - } return 0; } @@ -621,8 +603,6 @@ int kvm_arch_get_registers(CPUState *cs) { S390CPU *cpu = S390_CPU(cs); CPUS390XState *env = &cpu->env; - struct kvm_sregs sregs; - struct kvm_regs regs; struct kvm_fpu fpu; int i, r; @@ -630,37 +610,14 @@ int kvm_arch_get_registers(CPUState *cs) env->psw.addr = cs->kvm_run->psw_addr; env->psw.mask = cs->kvm_run->psw_mask; - /* the GPRS */ - if (can_sync_regs(cs, KVM_SYNC_GPRS)) { - for (i = 0; i < 16; i++) { - env->regs[i] = cs->kvm_run->s.regs.gprs[i]; - } - } else { - r = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); - if (r < 0) { - return r; - } - for (i = 0; i < 16; i++) { - env->regs[i] = regs.gprs[i]; - } - } + /* the GPRS, ACRS and CRS */ + g_assert(can_sync_regs(cs, KVM_SYNC_REQUIRED_REGS)); + memcpy(env->regs, cs->kvm_run->s.regs.gprs, sizeof(env->regs)); + memcpy(env->aregs, cs->kvm_run->s.regs.acrs, sizeof(env->aregs)); + memcpy(env->cregs, cs->kvm_run->s.regs.crs, sizeof(env->cregs)); - /* The ACRS and CRS */ - if (can_sync_regs(cs, KVM_SYNC_ACRS | KVM_SYNC_CRS)) { - for (i = 0; i < 16; i++) { - env->aregs[i] = cs->kvm_run->s.regs.acrs[i]; - env->cregs[i] = cs->kvm_run->s.regs.crs[i]; - } - } else { - r = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs); - if (r < 0) { - return r; - } - for (i = 0; i < 16; i++) { - env->aregs[i] = sregs.acrs[i]; - env->cregs[i] = sregs.crs[i]; - } - } + /* The prefix */ + env->psa = cs->kvm_run->s.regs.prefix; /* Floating point and vector registers */ if (can_sync_regs(cs, KVM_SYNC_VRS)) { @@ -685,11 +642,6 @@ int kvm_arch_get_registers(CPUState *cs) env->fpc = fpu.fpc; } - /* The prefix */ - if (can_sync_regs(cs, KVM_SYNC_PREFIX)) { - env->psa = cs->kvm_run->s.regs.prefix; - } - if (can_sync_regs(cs, KVM_SYNC_ARCH0)) { env->cputm = cs->kvm_run->s.regs.cputm; env->ckc = cs->kvm_run->s.regs.ckc; @@ -1457,6 +1409,13 @@ static int kvm_mpcifc_service_call(S390CPU *cpu, struct kvm_run *run) } } +static void kvm_handle_ptf(S390CPU *cpu, struct kvm_run *run) +{ + uint8_t r1 = (run->s390_sieic.ipb >> 20) & 0x0f; + + s390_handle_ptf(cpu, r1, RA_IGNORED); +} + static int handle_b9(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1) { int r = 0; @@ -1474,6 +1433,9 @@ static int handle_b9(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1) case PRIV_B9_RPCIT: r = kvm_rpcit_service_call(cpu, run); break; + case PRIV_B9_PTF: + kvm_handle_ptf(cpu, run); + break; case PRIV_B9_EQBS: /* just inject exception */ r = -1; @@ -1911,9 +1873,12 @@ static int handle_stsi(S390CPU *cpu) if (run->s390_stsi.sel1 != 2 || run->s390_stsi.sel2 != 2) { return 0; } - /* Only sysib 3.2.2 needs post-handling for now. */ insert_stsi_3_2_2(cpu, run->s390_stsi.addr, run->s390_stsi.ar); return 0; + case 15: + insert_stsi_15_1_x(cpu, run->s390_stsi.sel2, run->s390_stsi.addr, + run->s390_stsi.ar, RA_IGNORED); + return 0; default: return 0; } @@ -2495,6 +2460,14 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp) set_bit(S390_FEAT_UNPACK, model->features); } + /* + * If we have kernel support for CPU Topology indicate the + * configuration-topology facility. + */ + if (kvm_check_extension(kvm_state, KVM_CAP_S390_CPU_TOPOLOGY)) { + set_bit(S390_FEAT_CONFIGURATION_TOPOLOGY, model->features); + } + /* We emulate a zPCI bus and AEN, therefore we don't need HW support */ set_bit(S390_FEAT_ZPCI, model->features); set_bit(S390_FEAT_ADAPTER_EVENT_NOTIFICATION, model->features); @@ -2661,6 +2634,23 @@ int kvm_s390_get_zpci_op(void) return cap_zpci_op; } +int kvm_s390_topology_set_mtcr(uint64_t attr) +{ + struct kvm_device_attr attribute = { + .group = KVM_S390_VM_CPU_TOPOLOGY, + .attr = attr, + }; + + if (!s390_has_feat(S390_FEAT_CONFIGURATION_TOPOLOGY)) { + return 0; + } + if (!kvm_vm_check_attr(kvm_state, KVM_S390_VM_CPU_TOPOLOGY, attr)) { + return -ENOTSUP; + } + + return kvm_vm_ioctl(kvm_state, KVM_SET_DEVICE_ATTR, &attribute); +} + void kvm_arch_accel_class_init(ObjectClass *oc) { } diff --git a/target/s390x/kvm/kvm_s390x.h b/target/s390x/kvm/kvm_s390x.h index f9785564d0..649dae5948 100644 --- a/target/s390x/kvm/kvm_s390x.h +++ b/target/s390x/kvm/kvm_s390x.h @@ -47,5 +47,6 @@ void kvm_s390_crypto_reset(void); void kvm_s390_restart_interrupt(S390CPU *cpu); void kvm_s390_stop_interrupt(S390CPU *cpu); void kvm_s390_set_diag318(CPUState *cs, uint64_t diag318_info); +int kvm_s390_topology_set_mtcr(uint64_t attr); #endif /* KVM_S390X_H */ diff --git a/target/s390x/kvm/meson.build b/target/s390x/kvm/meson.build index d6aca590ae..588a9aa737 100644 --- a/target/s390x/kvm/meson.build +++ b/target/s390x/kvm/meson.build @@ -1,7 +1,8 @@ s390x_ss.add(when: 'CONFIG_KVM', if_true: files( 'pv.c', - 'kvm.c' + 'kvm.c', + 'stsi-topology.c' ), if_false: files( 'stubs.c' )) diff --git a/target/s390x/kvm/stsi-topology.c b/target/s390x/kvm/stsi-topology.c new file mode 100644 index 0000000000..efd2aa71f1 --- /dev/null +++ b/target/s390x/kvm/stsi-topology.c @@ -0,0 +1,334 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * QEMU S390x CPU Topology + * + * Copyright IBM Corp. 2022, 2023 + * Author(s): Pierre Morel <pmorel@linux.ibm.com> + * + */ +#include "qemu/osdep.h" +#include "cpu.h" +#include "hw/s390x/sclp.h" +#include "hw/s390x/cpu-topology.h" + +QEMU_BUILD_BUG_ON(S390_CPU_ENTITLEMENT_LOW != 1); +QEMU_BUILD_BUG_ON(S390_CPU_ENTITLEMENT_MEDIUM != 2); +QEMU_BUILD_BUG_ON(S390_CPU_ENTITLEMENT_HIGH != 3); + +/** + * fill_container: + * @p: The address of the container TLE to fill + * @level: The level of nesting for this container + * @id: The container receives a unique ID inside its own container + * + * Returns the next free TLE entry. + */ +static char *fill_container(char *p, int level, int id) +{ + SYSIBContainerListEntry *tle = (SYSIBContainerListEntry *)p; + + tle->nl = level; + tle->id = id; + return p + sizeof(*tle); +} + +/** + * fill_tle_cpu: + * @p: The address of the CPU TLE to fill + * @entry: a pointer to the S390TopologyEntry defining this + * CPU container. + * + * Returns the next free TLE entry. + */ +static char *fill_tle_cpu(char *p, S390TopologyEntry *entry) +{ + SysIBCPUListEntry *tle = (SysIBCPUListEntry *)p; + S390TopologyId topology_id = entry->id; + + tle->nl = 0; + tle->flags = 0; + if (topology_id.vertical) { + tle->flags |= topology_id.entitlement; + } + if (topology_id.dedicated) { + tle->flags |= SYSIB_TLE_DEDICATED; + } + tle->type = topology_id.type; + tle->origin = cpu_to_be16(topology_id.origin * 64); + tle->mask = cpu_to_be64(entry->mask); + return p + sizeof(*tle); +} + +/* + * Macro to check that the size of data after increment + * will not get bigger than the size of the SysIB. + */ +#define SYSIB_GUARD(data, x) do { \ + data += x; \ + if (data > sizeof(SysIB)) { \ + return 0; \ + } \ + } while (0) + +/** + * stsi_topology_fill_sysib: + * @p: A pointer to the position of the first TLE + * @level: The nested level wanted by the guest + * + * Fill the SYSIB with the topology information as described in + * the PoP, nesting containers as appropriate, with the maximum + * nesting limited by @level. + * + * Return value: + * On success: the size of the SysIB_15x after being filled with TLE. + * On error: 0 in the case we would overrun the end of the SysIB. + */ +static int stsi_topology_fill_sysib(S390TopologyList *topology_list, + char *p, int level) +{ + S390TopologyEntry *entry; + int last_drawer = -1; + int last_book = -1; + int last_socket = -1; + int drawer_id = 0; + int book_id = 0; + int socket_id = 0; + int n = sizeof(SysIB_151x); + + QTAILQ_FOREACH(entry, topology_list, next) { + bool drawer_change = last_drawer != entry->id.drawer; + bool book_change = drawer_change || last_book != entry->id.book; + bool socket_change = book_change || last_socket != entry->id.socket; + + if (level > 3 && drawer_change) { + SYSIB_GUARD(n, sizeof(SYSIBContainerListEntry)); + p = fill_container(p, 3, drawer_id++); + book_id = 0; + } + if (level > 2 && book_change) { + SYSIB_GUARD(n, sizeof(SYSIBContainerListEntry)); + p = fill_container(p, 2, book_id++); + socket_id = 0; + } + if (socket_change) { + SYSIB_GUARD(n, sizeof(SYSIBContainerListEntry)); + p = fill_container(p, 1, socket_id++); + } + + SYSIB_GUARD(n, sizeof(SysIBCPUListEntry)); + p = fill_tle_cpu(p, entry); + last_drawer = entry->id.drawer; + last_book = entry->id.book; + last_socket = entry->id.socket; + } + + return n; +} + +/** + * setup_stsi: + * @topology_list: ordered list of groups of CPUs with same properties + * @sysib: pointer to a SysIB to be filled with SysIB_151x data + * @level: Nested level specified by the guest + * + * Setup the SYSIB for STSI 15.1, the header as well as the description + * of the topology. + */ +static int setup_stsi(S390TopologyList *topology_list, SysIB_151x *sysib, + int level) +{ + sysib->mnest = level; + switch (level) { + case 4: + sysib->mag[S390_TOPOLOGY_MAG4] = current_machine->smp.drawers; + sysib->mag[S390_TOPOLOGY_MAG3] = current_machine->smp.books; + sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.sockets; + sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores; + break; + case 3: + sysib->mag[S390_TOPOLOGY_MAG3] = current_machine->smp.drawers * + current_machine->smp.books; + sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.sockets; + sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores; + break; + case 2: + sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.drawers * + current_machine->smp.books * + current_machine->smp.sockets; + sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores; + break; + } + + return stsi_topology_fill_sysib(topology_list, sysib->tle, level); +} + +/** + * s390_topology_add_cpu_to_entry: + * @entry: Topology entry to setup + * @cpu: the S390CPU to add + * + * Set the core bit inside the topology mask. + */ +static void s390_topology_add_cpu_to_entry(S390TopologyEntry *entry, + S390CPU *cpu) +{ + set_bit(63 - (cpu->env.core_id % 64), &entry->mask); +} + +/** + * s390_topology_from_cpu: + * @cpu: S390CPU to calculate the topology id + * + * Initialize the topology id from the CPU environment. + */ +static S390TopologyId s390_topology_from_cpu(S390CPU *cpu) +{ + S390TopologyId topology_id = { + .drawer = cpu->env.drawer_id, + .book = cpu->env.book_id, + .socket = cpu->env.socket_id, + .type = S390_TOPOLOGY_CPU_IFL, + .vertical = s390_topology.polarization == S390_CPU_POLARIZATION_VERTICAL, + .entitlement = cpu->env.entitlement, + .dedicated = cpu->env.dedicated, + .origin = cpu->env.core_id / 64, + }; + + return topology_id; +} + +/** + * s390_topology_id_cmp: + * @l: first S390TopologyId + * @r: second S390TopologyId + * + * Compare two topology ids according to the sorting order specified by the PoP. + * + * Returns a negative number if the first id is less than, 0 if it is equal to + * and positive if it is larger than the second id. + */ +static int s390_topology_id_cmp(const S390TopologyId *l, + const S390TopologyId *r) +{ + /* + * lexical order, compare less significant values only if more significant + * ones are equal + */ + return l->sentinel - r->sentinel ?: + l->drawer - r->drawer ?: + l->book - r->book ?: + l->socket - r->socket ?: + l->type - r->type ?: + /* logic is inverted for the next three */ + r->vertical - l->vertical ?: + r->entitlement - l->entitlement ?: + r->dedicated - l->dedicated ?: + l->origin - r->origin; +} + +static bool s390_topology_id_eq(const S390TopologyId *l, + const S390TopologyId *r) +{ + return !s390_topology_id_cmp(l, r); +} + +static bool s390_topology_id_lt(const S390TopologyId *l, + const S390TopologyId *r) +{ + return s390_topology_id_cmp(l, r) < 0; +} + +/** + * s390_topology_fill_list_sorted: + * @topology_list: list to fill + * + * Create S390TopologyEntrys as appropriate from all CPUs and fill the + * topology_list with the entries according to the order specified by the PoP. + */ +static void s390_topology_fill_list_sorted(S390TopologyList *topology_list) +{ + CPUState *cs; + S390TopologyEntry sentinel = { .id.sentinel = 1 }; + + QTAILQ_INIT(topology_list); + + QTAILQ_INSERT_HEAD(topology_list, &sentinel, next); + + CPU_FOREACH(cs) { + S390TopologyId id = s390_topology_from_cpu(S390_CPU(cs)); + S390TopologyEntry *entry = NULL, *tmp; + + QTAILQ_FOREACH(tmp, topology_list, next) { + if (s390_topology_id_eq(&id, &tmp->id)) { + entry = tmp; + break; + } else if (s390_topology_id_lt(&id, &tmp->id)) { + entry = g_malloc0(sizeof(*entry)); + entry->id = id; + QTAILQ_INSERT_BEFORE(tmp, entry, next); + break; + } + } + assert(entry); + s390_topology_add_cpu_to_entry(entry, S390_CPU(cs)); + } + + QTAILQ_REMOVE(topology_list, &sentinel, next); +} + +/** + * s390_topology_empty_list: + * + * Clear all entries in the S390Topology list. + */ +static void s390_topology_empty_list(S390TopologyList *topology_list) +{ + S390TopologyEntry *entry = NULL; + S390TopologyEntry *tmp = NULL; + + QTAILQ_FOREACH_SAFE(entry, topology_list, next, tmp) { + QTAILQ_REMOVE(topology_list, entry, next); + g_free(entry); + } +} + +/** + * insert_stsi_15_1_x: + * @cpu: the CPU doing the call for which we set CC + * @sel2: the selector 2, containing the nested level + * @addr: Guest logical address of the guest SysIB + * @ar: the access register number + * @ra: the return address + * + * Emulate STSI 15.1.x, that is, perform all necessary checks and + * fill the SYSIB. + * In case the topology description is too long to fit into the SYSIB, + * set CC=3 and abort without writing the SYSIB. + */ +void insert_stsi_15_1_x(S390CPU *cpu, int sel2, uint64_t addr, uint8_t ar, uintptr_t ra) +{ + S390TopologyList topology_list; + SysIB sysib = {0}; + int length; + + if (!s390_has_topology() || sel2 < 2 || sel2 > SCLP_READ_SCP_INFO_MNEST) { + setcc(cpu, 3); + return; + } + + s390_topology_fill_list_sorted(&topology_list); + length = setup_stsi(&topology_list, &sysib.sysib_151x, sel2); + s390_topology_empty_list(&topology_list); + + if (!length) { + setcc(cpu, 3); + return; + } + + sysib.sysib_151x.length = cpu_to_be16(length); + if (!s390_cpu_virt_mem_write(cpu, addr, ar, &sysib, length)) { + setcc(cpu, 0); + } else { + s390_cpu_virt_mem_handle_exc(cpu, ra); + } +} diff --git a/tests/avocado/s390_topology.py b/tests/avocado/s390_topology.py new file mode 100644 index 0000000000..9154ac8776 --- /dev/null +++ b/tests/avocado/s390_topology.py @@ -0,0 +1,439 @@ +# Functional test that boots a Linux kernel and checks the console +# +# Copyright IBM Corp. 2023 +# +# Author: +# Pierre Morel <pmorel@linux.ibm.com> +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# later. See the COPYING file in the top-level directory. + +import os +import shutil +import time + +from avocado_qemu import QemuSystemTest +from avocado_qemu import exec_command +from avocado_qemu import exec_command_and_wait_for_pattern +from avocado_qemu import interrupt_interactive_console_until_pattern +from avocado_qemu import wait_for_console_pattern +from avocado.utils import process +from avocado.utils import archive + + +class S390CPUTopology(QemuSystemTest): + """ + S390x CPU topology consists of 4 topology layers, from bottom to top, + the cores, sockets, books and drawers and 2 modifiers attributes, + the entitlement and the dedication. + See: docs/system/s390x/cpu-topology.rst. + + S390x CPU topology is setup in different ways: + - implicitly from the '-smp' argument by completing each topology + level one after the other beginning with drawer 0, book 0 and + socket 0. + - explicitly from the '-device' argument on the QEMU command line + - explicitly by hotplug of a new CPU using QMP or HMP + - it is modified by using QMP 'set-cpu-topology' + + The S390x modifier attribute entitlement depends on the machine + polarization, which can be horizontal or vertical. + The polarization is changed on a request from the guest. + """ + timeout = 90 + event_timeout = 10 + + KERNEL_COMMON_COMMAND_LINE = ('printk.time=0 ' + 'root=/dev/ram ' + 'selinux=0 ' + 'rdinit=/bin/sh') + + def wait_until_booted(self): + wait_for_console_pattern(self, 'no job control', + failure_message='Kernel panic - not syncing', + vm=None) + + def check_topology(self, c, s, b, d, e, t): + res = self.vm.qmp('query-cpus-fast') + cpus = res['return'] + for cpu in cpus: + core = cpu['props']['core-id'] + socket = cpu['props']['socket-id'] + book = cpu['props']['book-id'] + drawer = cpu['props']['drawer-id'] + entitlement = cpu.get('entitlement') + dedicated = cpu.get('dedicated') + if core == c: + self.assertEqual(drawer, d) + self.assertEqual(book, b) + self.assertEqual(socket, s) + self.assertEqual(entitlement, e) + self.assertEqual(dedicated, t) + + def kernel_init(self): + """ + We need a VM that supports CPU topology, + currently this only the case when using KVM, not TCG. + We need a kernel supporting the CPU topology. + We need a minimal root filesystem with a shell. + """ + self.require_accelerator("kvm") + kernel_url = ('https://archives.fedoraproject.org/pub/archive' + '/fedora-secondary/releases/35/Server/s390x/os' + '/images/kernel.img') + kernel_hash = '0d1aaaf303f07cf0160c8c48e56fe638' + kernel_path = self.fetch_asset(kernel_url, algorithm='md5', + asset_hash=kernel_hash) + + initrd_url = ('https://archives.fedoraproject.org/pub/archive' + '/fedora-secondary/releases/35/Server/s390x/os' + '/images/initrd.img') + initrd_hash = 'a122057d95725ac030e2ec51df46e172' + initrd_path_xz = self.fetch_asset(initrd_url, algorithm='md5', + asset_hash=initrd_hash) + initrd_path = os.path.join(self.workdir, 'initrd-raw.img') + archive.lzma_uncompress(initrd_path_xz, initrd_path) + + self.vm.set_console() + kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + self.vm.add_args('-nographic', + '-enable-kvm', + '-cpu', 'max,ctop=on', + '-m', '512', + '-kernel', kernel_path, + '-initrd', initrd_path, + '-append', kernel_command_line) + + def system_init(self): + self.log.info("System init") + exec_command_and_wait_for_pattern(self, + """ mount proc -t proc /proc; + mount sys -t sysfs /sys; + cat /sys/devices/system/cpu/dispatching """, + '0') + + def test_single(self): + """ + This test checks the simplest topology with a single CPU. + + :avocado: tags=arch:s390x + :avocado: tags=machine:s390-ccw-virtio + """ + self.kernel_init() + self.vm.launch() + self.wait_until_booted() + self.check_topology(0, 0, 0, 0, 'medium', False) + + def test_default(self): + """ + This test checks the implicit topology. + + :avocado: tags=arch:s390x + :avocado: tags=machine:s390-ccw-virtio + """ + self.kernel_init() + self.vm.add_args('-smp', + '13,drawers=2,books=2,sockets=3,cores=2,maxcpus=24') + self.vm.launch() + self.wait_until_booted() + self.check_topology(0, 0, 0, 0, 'medium', False) + self.check_topology(1, 0, 0, 0, 'medium', False) + self.check_topology(2, 1, 0, 0, 'medium', False) + self.check_topology(3, 1, 0, 0, 'medium', False) + self.check_topology(4, 2, 0, 0, 'medium', False) + self.check_topology(5, 2, 0, 0, 'medium', False) + self.check_topology(6, 0, 1, 0, 'medium', False) + self.check_topology(7, 0, 1, 0, 'medium', False) + self.check_topology(8, 1, 1, 0, 'medium', False) + self.check_topology(9, 1, 1, 0, 'medium', False) + self.check_topology(10, 2, 1, 0, 'medium', False) + self.check_topology(11, 2, 1, 0, 'medium', False) + self.check_topology(12, 0, 0, 1, 'medium', False) + + def test_move(self): + """ + This test checks the topology modification by moving a CPU + to another socket: CPU 0 is moved from socket 0 to socket 2. + + :avocado: tags=arch:s390x + :avocado: tags=machine:s390-ccw-virtio + """ + self.kernel_init() + self.vm.add_args('-smp', + '1,drawers=2,books=2,sockets=3,cores=2,maxcpus=24') + self.vm.launch() + self.wait_until_booted() + + self.check_topology(0, 0, 0, 0, 'medium', False) + res = self.vm.qmp('set-cpu-topology', + {'core-id': 0, 'socket-id': 2, 'entitlement': 'low'}) + self.assertEqual(res['return'], {}) + self.check_topology(0, 2, 0, 0, 'low', False) + + def test_dash_device(self): + """ + This test verifies that a CPU defined with the '-device' + command line option finds its right place inside the topology. + + :avocado: tags=arch:s390x + :avocado: tags=machine:s390-ccw-virtio + """ + self.kernel_init() + self.vm.add_args('-smp', + '1,drawers=2,books=2,sockets=3,cores=2,maxcpus=24') + self.vm.add_args('-device', 'max-s390x-cpu,core-id=10') + self.vm.add_args('-device', + 'max-s390x-cpu,' + 'core-id=1,socket-id=0,book-id=1,drawer-id=1,entitlement=low') + self.vm.add_args('-device', + 'max-s390x-cpu,' + 'core-id=2,socket-id=0,book-id=1,drawer-id=1,entitlement=medium') + self.vm.add_args('-device', + 'max-s390x-cpu,' + 'core-id=3,socket-id=1,book-id=1,drawer-id=1,entitlement=high') + self.vm.add_args('-device', + 'max-s390x-cpu,' + 'core-id=4,socket-id=1,book-id=1,drawer-id=1') + self.vm.add_args('-device', + 'max-s390x-cpu,' + 'core-id=5,socket-id=2,book-id=1,drawer-id=1,dedicated=true') + + self.vm.launch() + self.wait_until_booted() + + self.check_topology(10, 2, 1, 0, 'medium', False) + self.check_topology(1, 0, 1, 1, 'low', False) + self.check_topology(2, 0, 1, 1, 'medium', False) + self.check_topology(3, 1, 1, 1, 'high', False) + self.check_topology(4, 1, 1, 1, 'medium', False) + self.check_topology(5, 2, 1, 1, 'high', True) + + + def guest_set_dispatching(self, dispatching): + exec_command(self, + f'echo {dispatching} > /sys/devices/system/cpu/dispatching') + self.vm.event_wait('CPU_POLARIZATION_CHANGE', self.event_timeout) + exec_command_and_wait_for_pattern(self, + 'cat /sys/devices/system/cpu/dispatching', dispatching) + + + def test_polarization(self): + """ + This test verifies that QEMU modifies the entitlement change after + several guest polarization change requests. + + :avocado: tags=arch:s390x + :avocado: tags=machine:s390-ccw-virtio + """ + self.kernel_init() + self.vm.launch() + self.wait_until_booted() + + self.system_init() + res = self.vm.qmp('query-s390x-cpu-polarization') + self.assertEqual(res['return']['polarization'], 'horizontal') + self.check_topology(0, 0, 0, 0, 'medium', False) + + self.guest_set_dispatching('1'); + res = self.vm.qmp('query-s390x-cpu-polarization') + self.assertEqual(res['return']['polarization'], 'vertical') + self.check_topology(0, 0, 0, 0, 'medium', False) + + self.guest_set_dispatching('0'); + res = self.vm.qmp('query-s390x-cpu-polarization') + self.assertEqual(res['return']['polarization'], 'horizontal') + self.check_topology(0, 0, 0, 0, 'medium', False) + + + def check_polarization(self, polarization): + #We need to wait for the change to have been propagated to the kernel + exec_command_and_wait_for_pattern(self, + "\n".join([ + "timeout 1 sh -c 'while true", + 'do', + ' syspath="/sys/devices/system/cpu/cpu0/polarization"', + ' polarization="$(cat "$syspath")" || exit', + f' if [ "$polarization" = "{polarization}" ]; then', + ' exit 0', + ' fi', + ' sleep 0.01', + #searched for strings mustn't show up in command, '' to obfuscate + "done' && echo succ''ess || echo fail''ure", + ]), + "success", "failure") + + + def test_entitlement(self): + """ + This test verifies that QEMU modifies the entitlement + after a guest request and that the guest sees the change. + + :avocado: tags=arch:s390x + :avocado: tags=machine:s390-ccw-virtio + """ + self.kernel_init() + self.vm.launch() + self.wait_until_booted() + + self.system_init() + + self.check_polarization('horizontal') + self.check_topology(0, 0, 0, 0, 'medium', False) + + self.guest_set_dispatching('1') + self.check_polarization('vertical:medium') + self.check_topology(0, 0, 0, 0, 'medium', False) + + res = self.vm.qmp('set-cpu-topology', + {'core-id': 0, 'entitlement': 'low'}) + self.assertEqual(res['return'], {}) + self.check_polarization('vertical:low') + self.check_topology(0, 0, 0, 0, 'low', False) + + res = self.vm.qmp('set-cpu-topology', + {'core-id': 0, 'entitlement': 'medium'}) + self.assertEqual(res['return'], {}) + self.check_polarization('vertical:medium') + self.check_topology(0, 0, 0, 0, 'medium', False) + + res = self.vm.qmp('set-cpu-topology', + {'core-id': 0, 'entitlement': 'high'}) + self.assertEqual(res['return'], {}) + self.check_polarization('vertical:high') + self.check_topology(0, 0, 0, 0, 'high', False) + + self.guest_set_dispatching('0'); + self.check_polarization("horizontal") + self.check_topology(0, 0, 0, 0, 'high', False) + + + def test_dedicated(self): + """ + This test verifies that QEMU adjusts the entitlement correctly when a + CPU is made dedicated. + QEMU retains the entitlement value when horizontal polarization is in effect. + For the guest, the field shows the effective value of the entitlement. + + :avocado: tags=arch:s390x + :avocado: tags=machine:s390-ccw-virtio + """ + self.kernel_init() + self.vm.launch() + self.wait_until_booted() + + self.system_init() + + self.check_polarization("horizontal") + + res = self.vm.qmp('set-cpu-topology', + {'core-id': 0, 'dedicated': True}) + self.assertEqual(res['return'], {}) + self.check_topology(0, 0, 0, 0, 'high', True) + self.check_polarization("horizontal") + + self.guest_set_dispatching('1'); + self.check_topology(0, 0, 0, 0, 'high', True) + self.check_polarization("vertical:high") + + self.guest_set_dispatching('0'); + self.check_topology(0, 0, 0, 0, 'high', True) + self.check_polarization("horizontal") + + + def test_socket_full(self): + """ + This test verifies that QEMU does not accept to overload a socket. + The socket-id 0 on book-id 0 already contains CPUs 0 and 1 and can + not accept any new CPU while socket-id 0 on book-id 1 is free. + + :avocado: tags=arch:s390x + :avocado: tags=machine:s390-ccw-virtio + """ + self.kernel_init() + self.vm.add_args('-smp', + '3,drawers=2,books=2,sockets=3,cores=2,maxcpus=24') + self.vm.launch() + self.wait_until_booted() + + self.system_init() + + res = self.vm.qmp('set-cpu-topology', + {'core-id': 2, 'socket-id': 0, 'book-id': 0}) + self.assertEqual(res['error']['class'], 'GenericError') + + res = self.vm.qmp('set-cpu-topology', + {'core-id': 2, 'socket-id': 0, 'book-id': 1}) + self.assertEqual(res['return'], {}) + + def test_dedicated_error(self): + """ + This test verifies that QEMU refuses to lower the entitlement + of a dedicated CPU + + :avocado: tags=arch:s390x + :avocado: tags=machine:s390-ccw-virtio + """ + self.kernel_init() + self.vm.launch() + self.wait_until_booted() + + self.system_init() + + res = self.vm.qmp('set-cpu-topology', + {'core-id': 0, 'dedicated': True}) + self.assertEqual(res['return'], {}) + + self.check_topology(0, 0, 0, 0, 'high', True) + + self.guest_set_dispatching('1'); + + self.check_topology(0, 0, 0, 0, 'high', True) + + res = self.vm.qmp('set-cpu-topology', + {'core-id': 0, 'entitlement': 'low', 'dedicated': True}) + self.assertEqual(res['error']['class'], 'GenericError') + + res = self.vm.qmp('set-cpu-topology', + {'core-id': 0, 'entitlement': 'low'}) + self.assertEqual(res['error']['class'], 'GenericError') + + res = self.vm.qmp('set-cpu-topology', + {'core-id': 0, 'entitlement': 'medium', 'dedicated': True}) + self.assertEqual(res['error']['class'], 'GenericError') + + res = self.vm.qmp('set-cpu-topology', + {'core-id': 0, 'entitlement': 'medium'}) + self.assertEqual(res['error']['class'], 'GenericError') + + res = self.vm.qmp('set-cpu-topology', + {'core-id': 0, 'entitlement': 'low', 'dedicated': False}) + self.assertEqual(res['return'], {}) + + res = self.vm.qmp('set-cpu-topology', + {'core-id': 0, 'entitlement': 'medium', 'dedicated': False}) + self.assertEqual(res['return'], {}) + + def test_move_error(self): + """ + This test verifies that QEMU refuses to move a CPU to an + nonexistent location + + :avocado: tags=arch:s390x + :avocado: tags=machine:s390-ccw-virtio + """ + self.kernel_init() + self.vm.launch() + self.wait_until_booted() + + self.system_init() + + res = self.vm.qmp('set-cpu-topology', {'core-id': 0, 'drawer-id': 1}) + self.assertEqual(res['error']['class'], 'GenericError') + + res = self.vm.qmp('set-cpu-topology', {'core-id': 0, 'book-id': 1}) + self.assertEqual(res['error']['class'], 'GenericError') + + res = self.vm.qmp('set-cpu-topology', {'core-id': 0, 'socket-id': 1}) + self.assertEqual(res['error']['class'], 'GenericError') + + self.check_topology(0, 0, 0, 0, 'medium', False) diff --git a/tests/avocado/virtio_check_params.py b/tests/avocado/virtio_check_params.py index f4314ef824..5fe370a179 100644 --- a/tests/avocado/virtio_check_params.py +++ b/tests/avocado/virtio_check_params.py @@ -43,7 +43,7 @@ VM_DEV_PARAMS = {'virtio-scsi-pci': ['-device', 'virtio-scsi-pci,id=scsi0'], class VirtioMaxSegSettingsCheck(QemuSystemTest): @staticmethod def make_pattern(props): - pattern_items = ['{0} = \w+'.format(prop) for prop in props] + pattern_items = [r'{0} = \w+'.format(prop) for prop in props] return '|'.join(pattern_items) def query_virtqueue(self, vm, dev_type_name): diff --git a/tests/docker/docker.py b/tests/docker/docker.py index 688ef62989..3b8a26704d 100755 --- a/tests/docker/docker.py +++ b/tests/docker/docker.py @@ -186,7 +186,7 @@ def _check_binfmt_misc(executable): (binary)) return None, True - m = re.search("interpreter (\S+)\n", entry) + m = re.search(r"interpreter (\S+)\n", entry) interp = m.group(1) if interp and interp != executable: print("binfmt_misc for %s does not point to %s, using %s" % diff --git a/tests/qemu-iotests/linters.py b/tests/qemu-iotests/linters.py index 65c4c4e827..9fb3fd1449 100644 --- a/tests/qemu-iotests/linters.py +++ b/tests/qemu-iotests/linters.py @@ -68,7 +68,7 @@ def run_linter( :raise CalledProcessError: If the linter process exits with failure. """ subprocess.run( - ('python3', '-m', tool, *args), + (sys.executable, '-m', tool, *args), env=env, check=True, stdout=subprocess.PIPE if suppress_output else None, diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py index 9a37ad9152..e67ebd254b 100644 --- a/tests/qemu-iotests/testenv.py +++ b/tests/qemu-iotests/testenv.py @@ -216,7 +216,7 @@ class TestEnv(ContextManager['TestEnv']): self.source_iotests = source_dir self.build_iotests = build_dir - self.build_root = os.path.join(self.build_iotests, '..', '..') + self.build_root = Path(self.build_iotests).parent.parent self.init_directories() diff --git a/tests/qtest/libqtest.c b/tests/qtest/libqtest.c index dc7a55634c..f33a210861 100644 --- a/tests/qtest/libqtest.c +++ b/tests/qtest/libqtest.c @@ -91,6 +91,7 @@ struct QTestState static GHookList abrt_hooks; static void (*sighandler_old)(int); +static bool silence_spawn_log; static int qtest_query_target_endianness(QTestState *s); @@ -336,10 +337,17 @@ void qtest_remove_abrt_handler(void *data) } } -static const char *qtest_qemu_binary(void) +static const char *qtest_qemu_binary(const char *var) { const char *qemu_bin; + if (var) { + qemu_bin = getenv(var); + if (qemu_bin) { + return qemu_bin; + } + } + qemu_bin = getenv("QTEST_QEMU_BINARY"); if (!qemu_bin) { fprintf(stderr, "Environment variable QTEST_QEMU_BINARY required\n"); @@ -381,7 +389,8 @@ static pid_t qtest_create_process(char *cmd) } #endif /* _WIN32 */ -static QTestState *G_GNUC_PRINTF(1, 2) qtest_spawn_qemu(const char *fmt, ...) +static QTestState *G_GNUC_PRINTF(2, 3) qtest_spawn_qemu(const char *qemu_bin, + const char *fmt, ...) { va_list ap; QTestState *s = g_new0(QTestState, 1); @@ -391,14 +400,15 @@ static QTestState *G_GNUC_PRINTF(1, 2) qtest_spawn_qemu(const char *fmt, ...) g_autoptr(GString) command = g_string_new(""); va_start(ap, fmt); - g_string_append_printf(command, CMD_EXEC "%s %s", - qtest_qemu_binary(), tracearg); + g_string_append_printf(command, CMD_EXEC "%s %s", qemu_bin, tracearg); g_string_append_vprintf(command, fmt, ap); va_end(ap); qtest_add_abrt_handler(kill_qemu_hook_func, s); - g_test_message("starting QEMU: %s", command->str); + if (!silence_spawn_log) { + g_test_message("starting QEMU: %s", command->str); + } #ifndef _WIN32 s->qemu_pid = fork(); @@ -431,7 +441,8 @@ static QTestState *G_GNUC_PRINTF(1, 2) qtest_spawn_qemu(const char *fmt, ...) return s; } -QTestState *qtest_init_without_qmp_handshake(const char *extra_args) +static QTestState *qtest_init_internal(const char *qemu_bin, + const char *extra_args) { QTestState *s; int sock, qmpsock, i; @@ -456,7 +467,8 @@ QTestState *qtest_init_without_qmp_handshake(const char *extra_args) sock = init_socket(socket_path); qmpsock = init_socket(qmp_socket_path); - s = qtest_spawn_qemu("-qtest unix:%s " + s = qtest_spawn_qemu(qemu_bin, + "-qtest unix:%s " "-qtest-log %s " "-chardev socket,path=%s,id=char0 " "-mon chardev=char0,mode=control " @@ -509,9 +521,14 @@ QTestState *qtest_init_without_qmp_handshake(const char *extra_args) return s; } -QTestState *qtest_init(const char *extra_args) +QTestState *qtest_init_without_qmp_handshake(const char *extra_args) { - QTestState *s = qtest_init_without_qmp_handshake(extra_args); + return qtest_init_internal(qtest_qemu_binary(NULL), extra_args); +} + +QTestState *qtest_init_with_env(const char *var, const char *extra_args) +{ + QTestState *s = qtest_init_internal(qtest_qemu_binary(var), extra_args); QDict *greeting; /* Read the QMP greeting and then do the handshake */ @@ -522,6 +539,11 @@ QTestState *qtest_init(const char *extra_args) return s; } +QTestState *qtest_init(const char *extra_args) +{ + return qtest_init_with_env(NULL, extra_args); +} + QTestState *qtest_vinitf(const char *fmt, va_list ap) { char *args = g_strdup_vprintf(fmt, ap); @@ -905,7 +927,7 @@ char *qtest_hmp(QTestState *s, const char *fmt, ...) const char *qtest_get_arch(void) { - const char *qemu = qtest_qemu_binary(); + const char *qemu = qtest_qemu_binary(NULL); const char *end = strrchr(qemu, '-'); if (!end) { @@ -1449,13 +1471,26 @@ struct MachInfo { char *alias; }; +static void qtest_free_machine_list(struct MachInfo *machines) +{ + if (machines) { + for (int i = 0; machines[i].name != NULL; i++) { + g_free(machines[i].name); + g_free(machines[i].alias); + } + + g_free(machines); + } +} + /* * Returns an array with pointers to the available machine names. * The terminating entry has the name set to NULL. */ -static struct MachInfo *qtest_get_machines(void) +static struct MachInfo *qtest_get_machines(const char *var) { static struct MachInfo *machines; + static char *qemu_var; QDict *response, *minfo; QList *list; const QListEntry *p; @@ -1464,11 +1499,21 @@ static struct MachInfo *qtest_get_machines(void) QTestState *qts; int idx; + if (g_strcmp0(qemu_var, var)) { + qemu_var = g_strdup(var); + + /* new qemu, clear the cache */ + qtest_free_machine_list(machines); + machines = NULL; + } + if (machines) { return machines; } - qts = qtest_init("-machine none"); + silence_spawn_log = !g_test_verbose(); + + qts = qtest_init_with_env(qemu_var, "-machine none"); response = qtest_qmp(qts, "{ 'execute': 'query-machines' }"); g_assert(response); list = qdict_get_qlist(response, "return"); @@ -1499,6 +1544,8 @@ static struct MachInfo *qtest_get_machines(void) qtest_quit(qts); qobject_unref(response); + silence_spawn_log = false; + memset(&machines[idx], 0, sizeof(struct MachInfo)); /* Terminating entry */ return machines; } @@ -1509,7 +1556,7 @@ void qtest_cb_for_every_machine(void (*cb)(const char *machine), struct MachInfo *machines; int i; - machines = qtest_get_machines(); + machines = qtest_get_machines(NULL); for (i = 0; machines[i].name != NULL; i++) { /* Ignore machines that cannot be used for qtests */ @@ -1525,12 +1572,28 @@ void qtest_cb_for_every_machine(void (*cb)(const char *machine), } } -bool qtest_has_machine(const char *machine) +char *qtest_resolve_machine_alias(const char *var, const char *alias) { struct MachInfo *machines; int i; - machines = qtest_get_machines(); + machines = qtest_get_machines(var); + + for (i = 0; machines[i].name != NULL; i++) { + if (machines[i].alias && g_str_equal(alias, machines[i].alias)) { + return g_strdup(machines[i].name); + } + } + + return NULL; +} + +bool qtest_has_machine_with_env(const char *var, const char *machine) +{ + struct MachInfo *machines; + int i; + + machines = qtest_get_machines(var); for (i = 0; machines[i].name != NULL; i++) { if (g_str_equal(machine, machines[i].name) || @@ -1542,6 +1605,11 @@ bool qtest_has_machine(const char *machine) return false; } +bool qtest_has_machine(const char *machine) +{ + return qtest_has_machine_with_env(NULL, machine); +} + bool qtest_has_device(const char *device) { static QList *list; diff --git a/tests/qtest/libqtest.h b/tests/qtest/libqtest.h index 5fe3d13466..6e3d3525bf 100644 --- a/tests/qtest/libqtest.h +++ b/tests/qtest/libqtest.h @@ -56,6 +56,19 @@ QTestState *qtest_vinitf(const char *fmt, va_list ap) G_GNUC_PRINTF(1, 0); QTestState *qtest_init(const char *extra_args); /** + * qtest_init_with_env: + * @var: Environment variable from where to take the QEMU binary + * @extra_args: Other arguments to pass to QEMU. CAUTION: these + * arguments are subject to word splitting and shell evaluation. + * + * Like qtest_init(), but use a different environment variable for the + * QEMU binary. + * + * Returns: #QTestState instance. + */ +QTestState *qtest_init_with_env(const char *var, const char *extra_args); + +/** * qtest_init_without_qmp_handshake: * @extra_args: other arguments to pass to QEMU. CAUTION: these * arguments are subject to word splitting and shell evaluation. @@ -910,6 +923,16 @@ void qtest_cb_for_every_machine(void (*cb)(const char *machine), bool skip_old_versioned); /** + * qtest_resolve_machine_alias: + * @var: Environment variable from where to take the QEMU binary + * @alias: The alias to resolve + * + * Returns: the machine type corresponding to the alias if any, + * otherwise NULL. + */ +char *qtest_resolve_machine_alias(const char *var, const char *alias); + +/** * qtest_has_machine: * @machine: The machine to look for * @@ -918,6 +941,15 @@ void qtest_cb_for_every_machine(void (*cb)(const char *machine), bool qtest_has_machine(const char *machine); /** + * qtest_has_machine_with_env: + * @var: Environment variable from where to take the QEMU binary + * @machine: The machine to look for + * + * Returns: true if the machine is available in the specified binary. + */ +bool qtest_has_machine_with_env(const char *var, const char *machine); + +/** * qtest_has_device: * @device: The device to look for * diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c index 0c185db450..24fb7b3525 100644 --- a/tests/qtest/migration-helpers.c +++ b/tests/qtest/migration-helpers.c @@ -11,6 +11,7 @@ */ #include "qemu/osdep.h" +#include "qemu/ctype.h" #include "qapi/qmp/qjson.h" #include "migration-helpers.h" @@ -240,3 +241,54 @@ void wait_for_migration_fail(QTestState *from, bool allow_active) g_assert(qdict_get_bool(rsp_return, "running")); qobject_unref(rsp_return); } + +char *find_common_machine_version(const char *mtype, const char *var1, + const char *var2) +{ + g_autofree char *type1 = qtest_resolve_machine_alias(var1, mtype); + g_autofree char *type2 = qtest_resolve_machine_alias(var2, mtype); + + g_assert(type1 && type2); + + if (g_str_equal(type1, type2)) { + /* either can be used */ + return g_strdup(type1); + } + + if (qtest_has_machine_with_env(var2, type1)) { + return g_strdup(type1); + } + + if (qtest_has_machine_with_env(var1, type2)) { + return g_strdup(type2); + } + + g_test_message("No common machine version for machine type '%s' between " + "binaries %s and %s", mtype, getenv(var1), getenv(var2)); + g_assert_not_reached(); +} + +char *resolve_machine_version(const char *alias, const char *var1, + const char *var2) +{ + const char *mname = g_getenv("QTEST_QEMU_MACHINE_TYPE"); + g_autofree char *machine_name = NULL; + + if (mname) { + const char *dash = strrchr(mname, '-'); + const char *dot = strrchr(mname, '.'); + + machine_name = g_strdup(mname); + + if (dash && dot) { + assert(qtest_has_machine(machine_name)); + return g_steal_pointer(&machine_name); + } + /* else: probably an alias, let it be resolved below */ + } else { + /* use the hardcoded alias */ + machine_name = g_strdup(alias); + } + + return find_common_machine_version(machine_name, var1, var2); +} diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h index 4f51d0f8bc..e31dc85cc7 100644 --- a/tests/qtest/migration-helpers.h +++ b/tests/qtest/migration-helpers.h @@ -43,4 +43,8 @@ void wait_for_migration_complete(QTestState *who); void wait_for_migration_fail(QTestState *from, bool allow_active); +char *find_common_machine_version(const char *mtype, const char *var1, + const char *var2); +char *resolve_machine_version(const char *alias, const char *var1, + const char *var2); #endif /* MIGRATION_HELPERS_H */ diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index e1c110537b..bc70a14642 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -71,6 +71,8 @@ static bool got_dst_resume; #define QEMU_VM_FILE_MAGIC 0x5145564d #define FILE_TEST_FILENAME "migfile" #define FILE_TEST_OFFSET 0x1000 +#define QEMU_ENV_SRC "QTEST_QEMU_BINARY_SRC" +#define QEMU_ENV_DST "QTEST_QEMU_BINARY_DST" #if defined(__linux__) #include <sys/syscall.h> @@ -743,6 +745,8 @@ static int test_migrate_start(QTestState **from, QTestState **to, const char *kvm_opts = NULL; const char *arch = qtest_get_arch(); const char *memory_size; + const char *machine_alias, *machine_opts = ""; + g_autofree char *machine = NULL; if (args->use_shmem) { if (!g_file_test("/dev/shm", G_FILE_TEST_IS_DIR)) { @@ -755,11 +759,20 @@ static int test_migrate_start(QTestState **from, QTestState **to, got_dst_resume = false; if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) { memory_size = "150M"; - arch_opts = g_strdup_printf("-drive file=%s,format=raw", bootpath); + + if (g_str_equal(arch, "i386")) { + machine_alias = "pc"; + } else { + machine_alias = "q35"; + } + arch_opts = g_strdup_printf( + "-drive if=none,id=d0,file=%s,format=raw " + "-device ide-hd,drive=d0,secs=1,cyls=1,heads=1", bootpath); start_address = X86_TEST_MEM_START; end_address = X86_TEST_MEM_END; } else if (g_str_equal(arch, "s390x")) { memory_size = "128M"; + machine_alias = "s390-ccw-virtio"; arch_opts = g_strdup_printf("-bios %s", bootpath); start_address = S390_TEST_MEM_START; end_address = S390_TEST_MEM_END; @@ -771,11 +784,14 @@ static int test_migrate_start(QTestState **from, QTestState **to, "'nvramrc=hex .\" _\" begin %x %x " "do i c@ 1 + i c! 1000 +loop .\" B\" 0 " "until'", end_address, start_address); - arch_opts = g_strdup("-nodefaults -machine vsmt=8"); + machine_alias = "pseries"; + machine_opts = "vsmt=8"; + arch_opts = g_strdup("-nodefaults"); } else if (strcmp(arch, "aarch64") == 0) { memory_size = "150M"; - arch_opts = g_strdup_printf("-machine virt,gic-version=max -cpu max " - "-kernel %s", bootpath); + machine_alias = "virt"; + machine_opts = "gic-version=max"; + arch_opts = g_strdup_printf("-cpu max -kernel %s", bootpath); start_address = ARM_TEST_MEM_START; end_address = ARM_TEST_MEM_END; } else { @@ -809,12 +825,19 @@ static int test_migrate_start(QTestState **from, QTestState **to, kvm_opts = ",dirty-ring-size=4096"; } + machine = resolve_machine_version(machine_alias, QEMU_ENV_SRC, + QEMU_ENV_DST); + + g_test_message("Using machine type: %s", machine); + cmd_source = g_strdup_printf("-accel kvm%s -accel tcg " + "-machine %s,%s " "-name source,debug-threads=on " "-m %s " "-serial file:%s/src_serial " "%s %s %s %s %s", kvm_opts ? kvm_opts : "", + machine, machine_opts, memory_size, tmpfs, arch_opts ? arch_opts : "", arch_source ? arch_source : "", @@ -822,26 +845,28 @@ static int test_migrate_start(QTestState **from, QTestState **to, args->opts_source ? args->opts_source : "", ignore_stderr); if (!args->only_target) { - *from = qtest_init(cmd_source); + *from = qtest_init_with_env(QEMU_ENV_SRC, cmd_source); qtest_qmp_set_event_callback(*from, migrate_watch_for_stop, &got_src_stop); } cmd_target = g_strdup_printf("-accel kvm%s -accel tcg " + "-machine %s,%s " "-name target,debug-threads=on " "-m %s " "-serial file:%s/dest_serial " "-incoming %s " "%s %s %s %s %s", kvm_opts ? kvm_opts : "", + machine, machine_opts, memory_size, tmpfs, uri, arch_opts ? arch_opts : "", arch_target ? arch_target : "", shmem_opts ? shmem_opts : "", args->opts_target ? args->opts_target : "", ignore_stderr); - *to = qtest_init(cmd_target); + *to = qtest_init_with_env(QEMU_ENV_DST, cmd_target); qtest_qmp_set_event_callback(*to, migrate_watch_for_resume, &got_dst_resume); @@ -2972,10 +2997,23 @@ int main(int argc, char **argv) bool has_uffd; const char *arch; g_autoptr(GError) err = NULL; + const char *qemu_src = getenv(QEMU_ENV_SRC); + const char *qemu_dst = getenv(QEMU_ENV_DST); int ret; g_test_init(&argc, &argv, NULL); + /* + * The default QTEST_QEMU_BINARY must always be provided because + * that is what helpers use to query the accel type and + * architecture. + */ + if (qemu_src && qemu_dst) { + g_test_message("Only one of %s, %s is allowed", + QEMU_ENV_SRC, QEMU_ENV_DST); + exit(1); + } + has_kvm = qtest_has_accel("kvm"); has_tcg = qtest_has_accel("tcg"); @@ -3034,7 +3072,9 @@ int main(int argc, char **argv) qtest_add_func("/migration/bad_dest", test_baddest); #ifndef _WIN32 - qtest_add_func("/migration/analyze-script", test_analyze_script); + if (!g_str_equal(arch, "s390x")) { + qtest_add_func("/migration/analyze-script", test_analyze_script); + } #endif qtest_add_func("/migration/precopy/unix/plain", test_precopy_unix_plain); qtest_add_func("/migration/precopy/unix/xbzrle", test_precopy_unix_xbzrle); diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target index 2efacf9a5a..62b38c792f 100644 --- a/tests/tcg/aarch64/Makefile.target +++ b/tests/tcg/aarch64/Makefile.target @@ -73,11 +73,6 @@ endif # System Registers Tests AARCH64_TESTS += sysregs -ifneq ($(CROSS_CC_HAS_SVE),) -# SVE ioctl test -AARCH64_TESTS += sve-ioctls -sve-ioctls: CFLAGS+=-march=armv8.1-a+sve - AARCH64_TESTS += test-aes test-aes: CFLAGS += -O -march=armv8-a+aes test-aes: test-aes-main.c.inc @@ -100,26 +95,29 @@ sha512-vector: sha512.c TESTS += sha512-vector ifneq ($(CROSS_CC_HAS_SVE),) +# SVE ioctl test +AARCH64_TESTS += sve-ioctls +sve-ioctls: CFLAGS+=-march=armv8.1-a+sve + sha512-sve: CFLAGS=-O3 -march=armv8.1-a+sve sha512-sve: sha512.c $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS) TESTS += sha512-sve -endif -ifeq ($(HOST_GDB_SUPPORTS_ARCH),y) +ifneq ($(GDB),) GDB_SCRIPT=$(SRC_PATH)/tests/guest-debug/run-test.py run-gdbstub-sysregs: sysregs $(call run-test, $@, $(GDB_SCRIPT) \ - --gdb $(HAVE_GDB_BIN) \ + --gdb $(GDB) \ --qemu $(QEMU) --qargs "$(QEMU_OPTS)" \ --bin $< --test $(AARCH64_SRC)/gdbstub/test-sve.py, \ basic gdbstub SVE support) run-gdbstub-sve-ioctls: sve-ioctls $(call run-test, $@, $(GDB_SCRIPT) \ - --gdb $(HAVE_GDB_BIN) \ + --gdb $(GDB) \ --qemu $(QEMU) --qargs "$(QEMU_OPTS)" \ --bin $< --test $(AARCH64_SRC)/gdbstub/test-sve-ioctl.py, \ basic gdbstub SVE ZLEN support) diff --git a/tests/tcg/multiarch/Makefile.target b/tests/tcg/multiarch/Makefile.target index 43bddeaf21..f3bfaf1a22 100644 --- a/tests/tcg/multiarch/Makefile.target +++ b/tests/tcg/multiarch/Makefile.target @@ -63,45 +63,39 @@ run-test-mmap: test-mmap run-test-mmap-%: test-mmap $(call run-test, test-mmap-$*, $(QEMU) -p $* $<, $< ($* byte pages)) -ifneq ($(HAVE_GDB_BIN),) -ifeq ($(HOST_GDB_SUPPORTS_ARCH),y) +ifneq ($(GDB),) GDB_SCRIPT=$(SRC_PATH)/tests/guest-debug/run-test.py run-gdbstub-sha1: sha1 $(call run-test, $@, $(GDB_SCRIPT) \ - --gdb $(HAVE_GDB_BIN) \ + --gdb $(GDB) \ --qemu $(QEMU) --qargs "$(QEMU_OPTS)" \ --bin $< --test $(MULTIARCH_SRC)/gdbstub/sha1.py, \ basic gdbstub support) run-gdbstub-qxfer-auxv-read: sha1 $(call run-test, $@, $(GDB_SCRIPT) \ - --gdb $(HAVE_GDB_BIN) \ + --gdb $(GDB) \ --qemu $(QEMU) --qargs "$(QEMU_OPTS)" \ --bin $< --test $(MULTIARCH_SRC)/gdbstub/test-qxfer-auxv-read.py, \ basic gdbstub qXfer:auxv:read support) run-gdbstub-proc-mappings: sha1 $(call run-test, $@, $(GDB_SCRIPT) \ - --gdb $(HAVE_GDB_BIN) \ + --gdb $(GDB) \ --qemu $(QEMU) --qargs "$(QEMU_OPTS)" \ --bin $< --test $(MULTIARCH_SRC)/gdbstub/test-proc-mappings.py, \ proc mappings support) run-gdbstub-thread-breakpoint: testthread $(call run-test, $@, $(GDB_SCRIPT) \ - --gdb $(HAVE_GDB_BIN) \ + --gdb $(GDB) \ --qemu $(QEMU) --qargs "$(QEMU_OPTS)" \ --bin $< --test $(MULTIARCH_SRC)/gdbstub/test-thread-breakpoint.py, \ hitting a breakpoint on non-main thread) - -else -run-gdbstub-%: - $(call skip-test, "gdbstub test $*", "no guest arch support") -endif else run-gdbstub-%: - $(call skip-test, "gdbstub test $*", "need working gdb") + $(call skip-test, "gdbstub test $*", "need working gdb with $(patsubst -%,,$(TARGET_NAME)) support") endif EXTRA_RUNS += run-gdbstub-sha1 run-gdbstub-qxfer-auxv-read \ run-gdbstub-proc-mappings run-gdbstub-thread-breakpoint diff --git a/tests/tcg/multiarch/system/Makefile.softmmu-target b/tests/tcg/multiarch/system/Makefile.softmmu-target index 90810a32b2..dee4f58dea 100644 --- a/tests/tcg/multiarch/system/Makefile.softmmu-target +++ b/tests/tcg/multiarch/system/Makefile.softmmu-target @@ -14,13 +14,12 @@ VPATH+=$(MULTIARCH_SYSTEM_SRC) MULTIARCH_TEST_SRCS=$(wildcard $(MULTIARCH_SYSTEM_SRC)/*.c) MULTIARCH_TESTS = $(patsubst $(MULTIARCH_SYSTEM_SRC)/%.c, %, $(MULTIARCH_TEST_SRCS)) -ifneq ($(HAVE_GDB_BIN),) -ifeq ($(HOST_GDB_SUPPORTS_ARCH),y) +ifneq ($(GDB),) GDB_SCRIPT=$(SRC_PATH)/tests/guest-debug/run-test.py run-gdbstub-memory: memory $(call run-test, $@, $(GDB_SCRIPT) \ - --gdb $(HAVE_GDB_BIN) \ + --gdb $(GDB) \ --qemu $(QEMU) \ --output $<.gdb.out \ --qargs \ @@ -29,7 +28,7 @@ run-gdbstub-memory: memory softmmu gdbstub support) run-gdbstub-interrupt: interrupt $(call run-test, $@, $(GDB_SCRIPT) \ - --gdb $(HAVE_GDB_BIN) \ + --gdb $(GDB) \ --qemu $(QEMU) \ --output $<.gdb.out \ --qargs \ @@ -38,7 +37,7 @@ run-gdbstub-interrupt: interrupt softmmu gdbstub support) run-gdbstub-untimely-packet: hello $(call run-test, $@, $(GDB_SCRIPT) \ - --gdb $(HAVE_GDB_BIN) \ + --gdb $(GDB) \ --gdb-args "-ex 'set debug remote 1'" \ --output untimely-packet.gdb.out \ --stderr untimely-packet.gdb.err \ @@ -51,11 +50,7 @@ run-gdbstub-untimely-packet: hello "GREP", file untimely-packet.gdb.err) else run-gdbstub-%: - $(call skip-test, "gdbstub test $*", "no guest arch support") -endif -else -run-gdbstub-%: - $(call skip-test, "gdbstub test $*", "need working gdb") + $(call skip-test, "gdbstub test $*", "need working gdb with $(patsubst -%,,$(TARGET_NAME)) support") endif MULTIARCH_RUNS += run-gdbstub-memory run-gdbstub-interrupt run-gdbstub-untimely-packet diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target index c650aefe5c..826f0a18e4 100644 --- a/tests/tcg/s390x/Makefile.target +++ b/tests/tcg/s390x/Makefile.target @@ -81,12 +81,12 @@ $(Z15_TESTS): CFLAGS+=-march=z15 -O2 TESTS+=$(Z15_TESTS) endif -ifeq ($(HOST_GDB_SUPPORTS_ARCH),y) +ifneq ($(GDB),) GDB_SCRIPT=$(SRC_PATH)/tests/guest-debug/run-test.py run-gdbstub-signals-s390x: signals-s390x $(call run-test, $@, $(GDB_SCRIPT) \ - --gdb $(HAVE_GDB_BIN) \ + --gdb $(GDB) \ --qemu $(QEMU) --qargs "$(QEMU_OPTS)" \ --bin $< --test $(S390X_SRC)/gdbstub/test-signals-s390x.py, \ mixing signals and debugging) @@ -95,7 +95,7 @@ hello-s390x-asm: CFLAGS+=-nostdlib run-gdbstub-svc: hello-s390x-asm $(call run-test, $@, $(GDB_SCRIPT) \ - --gdb $(HAVE_GDB_BIN) \ + --gdb $(GDB) \ --qemu $(QEMU) --qargs "$(QEMU_OPTS)" \ --bin $< --test $(S390X_SRC)/gdbstub/test-svc.py, \ single-stepping svc) diff --git a/tests/unit/test-coroutine.c b/tests/unit/test-coroutine.c index b0d21d673a..a2563647e7 100644 --- a/tests/unit/test-coroutine.c +++ b/tests/unit/test-coroutine.c @@ -645,7 +645,7 @@ int main(int argc, char **argv) * with a sentinel value. If there is no freelist this would legitimately * crash, so skip it. */ - if (CONFIG_COROUTINE_POOL) { + if (IS_ENABLED(CONFIG_COROUTINE_POOL)) { g_test_add_func("/basic/no-dangling-access", test_no_dangling_access); } diff --git a/tests/vm/basevm.py b/tests/vm/basevm.py index 8aef4cff96..61725b8325 100644 --- a/tests/vm/basevm.py +++ b/tests/vm/basevm.py @@ -331,8 +331,8 @@ class BaseVM(object): def console_log(self, text): for line in re.split("[\r\n]", text): # filter out terminal escape sequences - line = re.sub("\x1b\[[0-9;?]*[a-zA-Z]", "", line) - line = re.sub("\x1b\([0-9;?]*[a-zA-Z]", "", line) + line = re.sub("\x1b\\[[0-9;?]*[a-zA-Z]", "", line) + line = re.sub("\x1b\\([0-9;?]*[a-zA-Z]", "", line) # replace unprintable chars line = re.sub("\x1b", "<esc>", line) line = re.sub("[\x00-\x1f]", ".", line) @@ -530,7 +530,7 @@ def get_qemu_version(qemu_path): and return the major number.""" output = subprocess.check_output([qemu_path, '--version']) version_line = output.decode("utf-8") - version_num = re.split(' |\(', version_line)[3].split('.')[0] + version_num = re.split(r' |\(', version_line)[3].split('.')[0] return int(version_num) def parse_config(config, args): diff --git a/tests/vm/netbsd b/tests/vm/netbsd index c7e3f1e735..40b27a3469 100755 --- a/tests/vm/netbsd +++ b/tests/vm/netbsd @@ -40,6 +40,9 @@ class NetBSDVM(basevm.BaseVM): "gsed", "gettext-tools", + # libs: basic + "dtc", + # libs: crypto "gnutls", @@ -67,7 +70,8 @@ class NetBSDVM(basevm.BaseVM): mkdir src build; cd src; tar -xf /dev/rld1a; cd ../build - ../src/configure --disable-opengl {configure_opts}; + ../src/configure --disable-opengl --extra-ldflags=-L/usr/pkg/lib \ + --extra-cflags=-I/usr/pkg/include {configure_opts}; gmake --output-sync -j{jobs} {target} {verbose}; """ poweroff = "/sbin/poweroff" diff --git a/ui/input-legacy.c b/ui/input-legacy.c index 46ea74e44d..210ae5eaca 100644 --- a/ui/input-legacy.c +++ b/ui/input-legacy.c @@ -127,7 +127,7 @@ static void legacy_kbd_event(DeviceState *dev, QemuConsole *src, } } -static QemuInputHandler legacy_kbd_handler = { +static const QemuInputHandler legacy_kbd_handler = { .name = "legacy-kbd", .mask = INPUT_EVENT_MASK_KEY, .event = legacy_kbd_event, diff --git a/ui/input.c b/ui/input.c index cbe8573c5c..dc745860f4 100644 --- a/ui/input.c +++ b/ui/input.c @@ -10,7 +10,7 @@ struct QemuInputHandlerState { DeviceState *dev; - QemuInputHandler *handler; + const QemuInputHandler *handler; int id; int events; QemuConsole *con; @@ -46,7 +46,7 @@ static uint32_t queue_count; static uint32_t queue_limit = 1024; QemuInputHandlerState *qemu_input_handler_register(DeviceState *dev, - QemuInputHandler *handler) + const QemuInputHandler *handler) { QemuInputHandlerState *s = g_new0(QemuInputHandlerState, 1); static int id = 1; diff --git a/ui/shader/meson.build b/ui/shader/meson.build index 592bf596b9..3137e65578 100644 --- a/ui/shader/meson.build +++ b/ui/shader/meson.build @@ -10,5 +10,6 @@ foreach e : shaders output: output, capture: true, input: files('@0@.@1@'.format(e[0], e[1])), + build_by_default: false, command: [shaderinclude, '@INPUT0@']) endforeach diff --git a/ui/spice-core.c b/ui/spice-core.c index 52a59386d7..db21db2c94 100644 --- a/ui/spice-core.c +++ b/ui/spice-core.c @@ -821,8 +821,7 @@ static void qemu_spice_init(void) }; using_spice = 1; - migration_state.notify = migration_state_notifier; - add_migration_state_change_notifier(&migration_state); + migration_add_notifier(&migration_state, migration_state_notifier); spice_migrate.base.sif = &migrate_interface.base; qemu_spice.add_interface(&spice_migrate.base); diff --git a/ui/vdagent.c b/ui/vdagent.c index 00d36a8677..64d7ab245a 100644 --- a/ui/vdagent.c +++ b/ui/vdagent.c @@ -297,7 +297,7 @@ static void vdagent_pointer_sync(DeviceState *dev) } } -static QemuInputHandler vdagent_mouse_handler = { +static const QemuInputHandler vdagent_mouse_handler = { .name = "vdagent mouse", .mask = INPUT_EVENT_MASK_BTN | INPUT_EVENT_MASK_ABS, .event = vdagent_pointer_event, @@ -671,7 +671,7 @@ static void vdagent_chr_open(Chardev *chr, return; #endif - if (migrate_add_blocker(vd->migration_blocker, errp) != 0) { + if (migrate_add_blocker(&vd->migration_blocker, errp) != 0) { return; } @@ -924,13 +924,12 @@ static void vdagent_chr_fini(Object *obj) { VDAgentChardev *vd = QEMU_VDAGENT_CHARDEV(obj); - migrate_del_blocker(vd->migration_blocker); + migrate_del_blocker(&vd->migration_blocker); vdagent_disconnect(vd); if (vd->mouse_hs) { qemu_input_handler_unregister(vd->mouse_hs); } buffer_free(&vd->outbuf); - error_free(vd->migration_blocker); } static const TypeInfo vdagent_chr_type_info = { diff --git a/util/cutils.c b/util/cutils.c index c99d26c5e2..42364039a5 100644 --- a/util/cutils.c +++ b/util/cutils.c @@ -1163,24 +1163,30 @@ char *get_relocated_path(const char *dir) g_string_append(result, "/qemu-bundle"); if (access(result->str, R_OK) == 0) { #ifdef G_OS_WIN32 - size_t size = mbsrtowcs(NULL, &dir, 0, &(mbstate_t){0}) + 1; + const char *src = dir; + size_t size = mbsrtowcs(NULL, &src, 0, &(mbstate_t){0}) + 1; PWSTR wdir = g_new(WCHAR, size); - mbsrtowcs(wdir, &dir, size, &(mbstate_t){0}); + mbsrtowcs(wdir, &src, size, &(mbstate_t){0}); PCWSTR wdir_skipped_root; - PathCchSkipRoot(wdir, &wdir_skipped_root); + if (PathCchSkipRoot(wdir, &wdir_skipped_root) == S_OK) { + size = wcsrtombs(NULL, &wdir_skipped_root, 0, &(mbstate_t){0}); + char *cursor = result->str + result->len; + g_string_set_size(result, result->len + size); + wcsrtombs(cursor, &wdir_skipped_root, size + 1, &(mbstate_t){0}); + } else { + g_string_append(result, dir); + } - size = wcsrtombs(NULL, &wdir_skipped_root, 0, &(mbstate_t){0}); - char *cursor = result->str + result->len; - g_string_set_size(result, result->len + size); - wcsrtombs(cursor, &wdir_skipped_root, size + 1, &(mbstate_t){0}); g_free(wdir); #else g_string_append(result, dir); #endif - } else if (!starts_with_prefix(dir) || !starts_with_prefix(bindir)) { - g_string_assign(result, dir); - } else { + goto out; + } + + if (IS_ENABLED(CONFIG_RELOCATABLE) && + starts_with_prefix(dir) && starts_with_prefix(bindir)) { g_string_assign(result, exec_dir); /* Advance over common components. */ @@ -1203,7 +1209,10 @@ char *get_relocated_path(const char *dir) assert(G_IS_DIR_SEPARATOR(dir[-1])); g_string_append(result, dir - 1); } + goto out; } + g_string_assign(result, dir); +out: return g_string_free(result, false); } diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c index 17a88f6505..5fd2dbaf8b 100644 --- a/util/qemu-coroutine.c +++ b/util/qemu-coroutine.c @@ -57,7 +57,7 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) { Coroutine *co = NULL; - if (CONFIG_COROUTINE_POOL) { + if (IS_ENABLED(CONFIG_COROUTINE_POOL)) { CoroutineQSList *alloc_pool = get_ptr_alloc_pool(); co = QSLIST_FIRST(alloc_pool); @@ -99,7 +99,7 @@ static void coroutine_delete(Coroutine *co) { co->caller = NULL; - if (CONFIG_COROUTINE_POOL) { + if (IS_ENABLED(CONFIG_COROUTINE_POOL)) { if (release_pool_size < qatomic_read(&pool_max_size) * 2) { QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next); qatomic_inc(&release_pool_size); |