diff options
125 files changed, 3341 insertions, 1547 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 3f449bfe58..9b28cbb9e9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -318,8 +318,11 @@ R: Daniel Henrique Barboza <dbarboza@ventanamicro.com> R: Liu Zhiwei <zhiwei_liu@linux.alibaba.com> L: qemu-riscv@nongnu.org S: Supported +F: configs/targets/riscv* +F: docs/system/target-riscv.rst F: target/riscv/ F: hw/riscv/ +F: hw/intc/riscv* F: include/hw/riscv/ F: linux-user/host/riscv32/ F: linux-user/host/riscv64/ @@ -331,6 +334,7 @@ L: qemu-riscv@nongnu.org S: Supported F: target/riscv/insn_trans/trans_xthead.c.inc F: target/riscv/xthead*.decode +F: disas/riscv-xthead* RISC-V XVentanaCondOps extension M: Philipp Tomsich <philipp.tomsich@vrull.eu> @@ -338,6 +342,7 @@ L: qemu-riscv@nongnu.org S: Maintained F: target/riscv/XVentanaCondOps.decode F: target/riscv/insn_trans/trans_xventanacondops.c.inc +F: disas/riscv-xventana* RENESAS RX CPUs R: Yoshinori Sato <ysato@users.sourceforge.jp> @@ -557,6 +562,7 @@ M: Cornelia Huck <cohuck@redhat.com> M: Paolo Bonzini <pbonzini@redhat.com> S: Maintained F: linux-headers/ +F: include/standard-headers/ F: scripts/update-linux-headers.sh POSIX @@ -939,6 +945,9 @@ R: Marcin Juszkiewicz <marcin.juszkiewicz@linaro.org> L: qemu-arm@nongnu.org S: Maintained F: hw/arm/sbsa-ref.c +F: hw/misc/sbsa_ec.c +F: hw/watchdog/sbsa_gwdt.c +F: include/hw/watchdog/sbsa_gwdt.h F: docs/system/arm/sbsa.rst F: tests/avocado/machine_aarch64_sbsaref.py @@ -1286,8 +1295,10 @@ M: Hervé Poussineau <hpoussin@reactos.org> R: Aleksandar Rikalo <aleksandar.rikalo@syrmia.com> S: Maintained F: hw/mips/jazz.c +F: hw/display/g364fb.c F: hw/display/jazz_led.c F: hw/dma/rc4030.c +F: hw/nvram/ds1225y.c Malta M: Philippe Mathieu-Daudé <philmd@linaro.org> @@ -1525,6 +1536,7 @@ Microchip PolarFire SoC Icicle Kit M: Bin Meng <bin.meng@windriver.com> L: qemu-riscv@nongnu.org S: Supported +F: docs/system/riscv/microchip-icicle-kit.rst F: hw/riscv/microchip_pfsoc.c F: hw/char/mchp_pfsoc_mmuart.c F: hw/misc/mchp_pfsoc_dmc.c @@ -1540,6 +1552,7 @@ Shakti C class SoC M: Vijai Kumar K <vijai@behindbytes.com> L: qemu-riscv@nongnu.org S: Supported +F: docs/system/riscv/shakti-c.rst F: hw/riscv/shakti_c.c F: hw/char/shakti_uart.c F: include/hw/riscv/shakti_c.h @@ -1551,6 +1564,7 @@ M: Bin Meng <bin.meng@windriver.com> M: Palmer Dabbelt <palmer@dabbelt.com> L: qemu-riscv@nongnu.org S: Supported +F: docs/system/riscv/sifive_u.rst F: hw/*/*sifive*.c F: include/hw/*/*sifive*.h @@ -1978,6 +1992,7 @@ M: Marc-André Lureau <marcandre.lureau@redhat.com> R: Paolo Bonzini <pbonzini@redhat.com> S: Odd Fixes F: hw/char/ +F: include/hw/char/ Network devices M: Jason Wang <jasowang@redhat.com> @@ -2876,6 +2891,7 @@ F: hw/mem/pc-dimm.c F: include/hw/mem/memory-device.h F: include/hw/mem/nvdimm.h F: include/hw/mem/pc-dimm.h +F: stubs/memory_device.c F: docs/nvdimm.txt SPICE @@ -3407,6 +3423,12 @@ M: Viktor Prutyanov <viktor.prutyanov@phystech.edu> S: Maintained F: contrib/elf2dmp/ +Overall sensors +M: Philippe Mathieu-Daudé <philmd@linaro.org> +S: Odd Fixes +F: hw/sensor +F: include/hw/sensor + I2C and SMBus M: Corey Minyard <cminyard@mvista.com> S: Maintained @@ -3572,7 +3594,7 @@ M: Alistair Francis <Alistair.Francis@wdc.com> L: qemu-riscv@nongnu.org S: Maintained F: tcg/riscv/ -F: disas/riscv.c +F: disas/riscv.[ch] S390 TCG target M: Richard Henderson <richard.henderson@linaro.org> @@ -3908,6 +3930,7 @@ F: .github/workflows/lockdown.yml F: .gitlab-ci.yml F: .gitlab-ci.d/ F: .travis.yml +F: docs/devel/ci* F: scripts/ci/ F: tests/docker/ F: tests/vm/ diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 72e1d1141c..3f7eafe08c 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -174,13 +174,31 @@ void kvm_resample_fd_notify(int gsi) } } -int kvm_get_max_memslots(void) +unsigned int kvm_get_max_memslots(void) { KVMState *s = KVM_STATE(current_accel()); return s->nr_slots; } +unsigned int kvm_get_free_memslots(void) +{ + unsigned int used_slots = 0; + KVMState *s = kvm_state; + int i; + + kvm_slots_lock(); + for (i = 0; i < s->nr_as; i++) { + if (!s->as[i].ml) { + continue; + } + used_slots = MAX(used_slots, s->as[i].ml->nr_used_slots); + } + kvm_slots_unlock(); + + return s->nr_slots - used_slots; +} + /* Called with KVMMemoryListener.slots_lock held */ static KVMSlot *kvm_get_free_slot(KVMMemoryListener *kml) { @@ -196,19 +214,6 @@ static KVMSlot *kvm_get_free_slot(KVMMemoryListener *kml) return NULL; } -bool kvm_has_free_slot(MachineState *ms) -{ - KVMState *s = KVM_STATE(ms->accelerator); - bool result; - KVMMemoryListener *kml = &s->memory_listener; - - kvm_slots_lock(); - result = !!kvm_get_free_slot(kml); - kvm_slots_unlock(); - - return result; -} - /* Called with KVMMemoryListener.slots_lock held */ static KVMSlot *kvm_alloc_slot(KVMMemoryListener *kml) { @@ -1387,6 +1392,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, } start_addr += slot_size; size -= slot_size; + kml->nr_used_slots--; } while (size); return; } @@ -1412,6 +1418,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, ram_start_offset += slot_size; ram += slot_size; size -= slot_size; + kml->nr_used_slots++; } while (size); } diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c index 235dc661bc..51f522e52e 100644 --- a/accel/stubs/kvm-stub.c +++ b/accel/stubs/kvm-stub.c @@ -109,9 +109,14 @@ int kvm_irqchip_remove_irqfd_notifier_gsi(KVMState *s, EventNotifier *n, return -ENOSYS; } -bool kvm_has_free_slot(MachineState *ms) +unsigned int kvm_get_max_memslots(void) { - return false; + return 0; +} + +unsigned int kvm_get_free_memslots(void) +{ + return 0; } void kvm_init_cpu_signals(CPUState *cpu) diff --git a/block.c b/block.c index af04c8ac6f..f9cf05ddcf 100644 --- a/block.c +++ b/block.c @@ -279,8 +279,9 @@ bool bdrv_is_read_only(BlockDriverState *bs) return !(bs->open_flags & BDRV_O_RDWR); } -static int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, - bool ignore_allow_rdw, Error **errp) +static int GRAPH_RDLOCK +bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, + bool ignore_allow_rdw, Error **errp) { IO_CODE(); @@ -371,8 +372,9 @@ char *bdrv_get_full_backing_filename_from_filename(const char *backed, * setting @errp. In all other cases, NULL will only be returned with * @errp set. */ -static char *bdrv_make_absolute_filename(BlockDriverState *relative_to, - const char *filename, Error **errp) +static char * GRAPH_RDLOCK +bdrv_make_absolute_filename(BlockDriverState *relative_to, + const char *filename, Error **errp) { char *dir, *full_name; @@ -1192,19 +1194,19 @@ static char *bdrv_child_get_parent_desc(BdrvChild *c) return g_strdup_printf("node '%s'", bdrv_get_node_name(parent)); } -static void bdrv_child_cb_drained_begin(BdrvChild *child) +static void GRAPH_RDLOCK bdrv_child_cb_drained_begin(BdrvChild *child) { BlockDriverState *bs = child->opaque; bdrv_do_drained_begin_quiesce(bs, NULL); } -static bool bdrv_child_cb_drained_poll(BdrvChild *child) +static bool GRAPH_RDLOCK bdrv_child_cb_drained_poll(BdrvChild *child) { BlockDriverState *bs = child->opaque; return bdrv_drain_poll(bs, NULL, false); } -static void bdrv_child_cb_drained_end(BdrvChild *child) +static void GRAPH_RDLOCK bdrv_child_cb_drained_end(BdrvChild *child) { BlockDriverState *bs = child->opaque; bdrv_drained_end(bs); @@ -1250,7 +1252,7 @@ static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options, *child_flags &= ~BDRV_O_NATIVE_AIO; } -static void bdrv_backing_attach(BdrvChild *c) +static void GRAPH_WRLOCK bdrv_backing_attach(BdrvChild *c) { BlockDriverState *parent = c->opaque; BlockDriverState *backing_hd = c->bs; @@ -1874,7 +1876,10 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, } if (file != NULL) { + bdrv_graph_rdlock_main_loop(); bdrv_refresh_filename(blk_bs(file)); + bdrv_graph_rdunlock_main_loop(); + filename = blk_bs(file)->filename; } else { /* @@ -1901,7 +1906,9 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) { if (!ro && bdrv_is_whitelisted(drv, true)) { + bdrv_graph_rdlock_main_loop(); ret = bdrv_apply_auto_read_only(bs, NULL, NULL); + bdrv_graph_rdunlock_main_loop(); } else { ret = -ENOTSUP; } @@ -2966,6 +2973,8 @@ static void bdrv_child_free(BdrvChild *child) { assert(!child->bs); GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + assert(!child->next.le_prev); /* not in children list */ g_free(child->name); @@ -3644,7 +3653,10 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, implicit_backing = !strcmp(bs->auto_backing_file, bs->backing_file); } + bdrv_graph_rdlock_main_loop(); backing_filename = bdrv_get_full_backing_filename(bs, &local_err); + bdrv_graph_rdunlock_main_loop(); + if (local_err) { ret = -EINVAL; error_propagate(errp, local_err); @@ -3675,7 +3687,9 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, } if (implicit_backing) { + bdrv_graph_rdlock_main_loop(); bdrv_refresh_filename(backing_hd); + bdrv_graph_rdunlock_main_loop(); pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file), backing_hd->filename); } @@ -4314,8 +4328,8 @@ static int bdrv_reset_options_allowed(BlockDriverState *bs, /* * Returns true if @child can be reached recursively from @bs */ -static bool bdrv_recurse_has_child(BlockDriverState *bs, - BlockDriverState *child) +static bool GRAPH_RDLOCK +bdrv_recurse_has_child(BlockDriverState *bs, BlockDriverState *child) { BdrvChild *c; @@ -4356,15 +4370,12 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, * * To be called with bs->aio_context locked. */ -static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, - BlockDriverState *bs, - QDict *options, - const BdrvChildClass *klass, - BdrvChildRole role, - bool parent_is_format, - QDict *parent_options, - int parent_flags, - bool keep_old_opts) +static BlockReopenQueue * GRAPH_RDLOCK +bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, BlockDriverState *bs, + QDict *options, const BdrvChildClass *klass, + BdrvChildRole role, bool parent_is_format, + QDict *parent_options, int parent_flags, + bool keep_old_opts) { assert(bs != NULL); @@ -4376,6 +4387,11 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, GLOBAL_STATE_CODE(); + /* + * Strictly speaking, draining is illegal under GRAPH_RDLOCK. We know that + * we've been called with bdrv_graph_rdlock_main_loop(), though, so it's ok + * in practice. + */ bdrv_drained_begin(bs); if (bs_queue == NULL) { @@ -4517,6 +4533,7 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, QDict *options, bool keep_old_opts) { GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, 0, false, NULL, 0, keep_old_opts); @@ -4736,9 +4753,10 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, * Callers must make sure that their AioContext locking is still correct after * this. */ -static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, - bool is_backing, Transaction *tran, - Error **errp) +static int GRAPH_UNLOCKED +bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, + bool is_backing, Transaction *tran, + Error **errp) { BlockDriverState *bs = reopen_state->bs; BlockDriverState *new_child_bs; @@ -4748,6 +4766,7 @@ static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, QObject *value; const char *str; AioContext *ctx, *old_ctx; + bool has_child; int ret; GLOBAL_STATE_CODE(); @@ -4767,7 +4786,13 @@ static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, new_child_bs = bdrv_lookup_bs(NULL, str, errp); if (new_child_bs == NULL) { return -EINVAL; - } else if (bdrv_recurse_has_child(new_child_bs, bs)) { + } + + bdrv_graph_rdlock_main_loop(); + has_child = bdrv_recurse_has_child(new_child_bs, bs); + bdrv_graph_rdunlock_main_loop(); + + if (has_child) { error_setg(errp, "Making '%s' a %s child of '%s' would create a " "cycle", str, child_name, bs->node_name); return -EINVAL; @@ -4866,9 +4891,9 @@ static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, * After calling this function, the transaction @change_child_tran may only be * completed while holding a writer lock for the graph. */ -static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, - BlockReopenQueue *queue, - Transaction *change_child_tran, Error **errp) +static int GRAPH_UNLOCKED +bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, + Transaction *change_child_tran, Error **errp) { int ret = -1; int old_flags; @@ -4930,7 +4955,10 @@ static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, * to r/w. Attempting to set to r/w may fail if either BDRV_O_ALLOW_RDWR is * not set, or if the BDS still has copy_on_read enabled */ read_only = !(reopen_state->flags & BDRV_O_RDWR); + + bdrv_graph_rdlock_main_loop(); ret = bdrv_can_set_read_only(reopen_state->bs, read_only, true, &local_err); + bdrv_graph_rdunlock_main_loop(); if (local_err) { error_propagate(errp, local_err); goto error; @@ -4953,7 +4981,9 @@ static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, if (local_err != NULL) { error_propagate(errp, local_err); } else { + bdrv_graph_rdlock_main_loop(); bdrv_refresh_filename(reopen_state->bs); + bdrv_graph_rdunlock_main_loop(); error_setg(errp, "failed while preparing to reopen image '%s'", reopen_state->bs->filename); } @@ -4962,9 +4992,11 @@ static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, } else { /* It is currently mandatory to have a bdrv_reopen_prepare() * handler for each supported drv. */ + bdrv_graph_rdlock_main_loop(); error_setg(errp, "Block format '%s' used by node '%s' " "does not support reopening files", drv->format_name, bdrv_get_device_or_node_name(reopen_state->bs)); + bdrv_graph_rdunlock_main_loop(); ret = -1; goto error; } @@ -5010,6 +5042,8 @@ static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, if (qdict_size(reopen_state->options)) { const QDictEntry *entry = qdict_first(reopen_state->options); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + do { QObject *new = entry->value; QObject *old = qdict_get(reopen_state->bs->options, entry->key); @@ -5083,7 +5117,7 @@ error: * makes them final by swapping the staging BlockDriverState contents into * the active BlockDriverState contents. */ -static void bdrv_reopen_commit(BDRVReopenState *reopen_state) +static void GRAPH_UNLOCKED bdrv_reopen_commit(BDRVReopenState *reopen_state) { BlockDriver *drv; BlockDriverState *bs; @@ -5100,6 +5134,8 @@ static void bdrv_reopen_commit(BDRVReopenState *reopen_state) drv->bdrv_reopen_commit(reopen_state); } + GRAPH_RDLOCK_GUARD_MAINLOOP(); + /* set BDS specific flags now */ qobject_unref(bs->explicit_options); qobject_unref(bs->options); @@ -5121,9 +5157,7 @@ static void bdrv_reopen_commit(BDRVReopenState *reopen_state) qdict_del(bs->explicit_options, "backing"); qdict_del(bs->options, "backing"); - bdrv_graph_rdlock_main_loop(); bdrv_refresh_limits(bs, NULL, NULL); - bdrv_graph_rdunlock_main_loop(); bdrv_refresh_total_sectors(bs, bs->total_sectors); } @@ -5131,7 +5165,7 @@ static void bdrv_reopen_commit(BDRVReopenState *reopen_state) * Abort the reopen, and delete and free the staged changes in * reopen_state */ -static void bdrv_reopen_abort(BDRVReopenState *reopen_state) +static void GRAPH_UNLOCKED bdrv_reopen_abort(BDRVReopenState *reopen_state) { BlockDriver *drv; @@ -5918,6 +5952,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, bdrv_ref(top); bdrv_drained_begin(base); + bdrv_graph_rdlock_main_loop(); if (!top->drv || !base->drv) { goto exit; @@ -5942,11 +5977,9 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, backing_file_str = base->filename; } - bdrv_graph_rdlock_main_loop(); QLIST_FOREACH(c, &top->parents, next_parent) { updated_children = g_slist_prepend(updated_children, c); } - bdrv_graph_rdunlock_main_loop(); /* * It seems correct to pass detach_subchain=true here, but it triggers @@ -5992,6 +6025,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, ret = 0; exit: + bdrv_graph_rdunlock_main_loop(); bdrv_drained_end(base); bdrv_unref(top); return ret; @@ -6282,6 +6316,7 @@ BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, BlockDriverState *bs; GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); list = NULL; QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { @@ -6667,7 +6702,8 @@ void coroutine_fn bdrv_co_debug_event(BlockDriverState *bs, BlkdebugEvent event) bs->drv->bdrv_co_debug_event(bs, event); } -static BlockDriverState *bdrv_find_debug_node(BlockDriverState *bs) +static BlockDriverState * GRAPH_RDLOCK +bdrv_find_debug_node(BlockDriverState *bs) { GLOBAL_STATE_CODE(); while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) { @@ -6686,6 +6722,8 @@ int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, const char *tag) { GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + bs = bdrv_find_debug_node(bs); if (bs) { return bs->drv->bdrv_debug_breakpoint(bs, event, tag); @@ -6697,6 +6735,8 @@ int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag) { GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + bs = bdrv_find_debug_node(bs); if (bs) { return bs->drv->bdrv_debug_remove_breakpoint(bs, tag); @@ -6708,6 +6748,8 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag) int bdrv_debug_resume(BlockDriverState *bs, const char *tag) { GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) { bs = bdrv_primary_bs(bs); } @@ -6722,6 +6764,8 @@ int bdrv_debug_resume(BlockDriverState *bs, const char *tag) bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag) { GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) { bs = bdrv_primary_bs(bs); } @@ -6750,6 +6794,7 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, BlockDriverState *bs_below; GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); if (!bs || !bs->drv || !backing_file) { return NULL; @@ -6961,6 +7006,7 @@ void bdrv_activate_all(Error **errp) BdrvNextIterator it; GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { AioContext *aio_context = bdrv_get_aio_context(bs); @@ -6976,7 +7022,8 @@ void bdrv_activate_all(Error **errp) } } -static bool bdrv_has_bds_parent(BlockDriverState *bs, bool only_active) +static bool GRAPH_RDLOCK +bdrv_has_bds_parent(BlockDriverState *bs, bool only_active) { BdrvChild *parent; GLOBAL_STATE_CODE(); @@ -6993,14 +7040,13 @@ static bool bdrv_has_bds_parent(BlockDriverState *bs, bool only_active) return false; } -static int bdrv_inactivate_recurse(BlockDriverState *bs) +static int GRAPH_RDLOCK bdrv_inactivate_recurse(BlockDriverState *bs) { BdrvChild *child, *parent; int ret; uint64_t cumulative_perms, cumulative_shared_perms; GLOBAL_STATE_CODE(); - GRAPH_RDLOCK_GUARD_MAINLOOP(); if (!bs->drv) { return -ENOMEDIUM; @@ -7066,6 +7112,7 @@ int bdrv_inactivate_all(void) GSList *aio_ctxs = NULL, *ctx; GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { AioContext *aio_context = bdrv_get_aio_context(bs); @@ -7205,6 +7252,7 @@ bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) { BdrvOpBlocker *blocker; GLOBAL_STATE_CODE(); + assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); if (!QLIST_EMPTY(&bs->op_blockers[op])) { blocker = QLIST_FIRST(&bs->op_blockers[op]); diff --git a/block/backup.c b/block/backup.c index db3791f4d1..9a3c4bdc82 100644 --- a/block/backup.c +++ b/block/backup.c @@ -374,6 +374,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, assert(bs); assert(target); GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); /* QMP interface protects us from these cases */ assert(sync_mode != MIRROR_SYNC_MODE_INCREMENTAL); diff --git a/block/block-backend.c b/block/block-backend.c index efe2e7cbf8..39aac1bbce 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -780,11 +780,12 @@ BlockDriverState *blk_bs(BlockBackend *blk) return blk->root ? blk->root->bs : NULL; } -static BlockBackend *bdrv_first_blk(BlockDriverState *bs) +static BlockBackend * GRAPH_RDLOCK bdrv_first_blk(BlockDriverState *bs) { BdrvChild *child; GLOBAL_STATE_CODE(); + assert_bdrv_graph_readable(); QLIST_FOREACH(child, &bs->parents, next_parent) { if (child->klass == &child_root) { @@ -812,6 +813,8 @@ bool bdrv_is_root_node(BlockDriverState *bs) BdrvChild *c; GLOBAL_STATE_CODE(); + assert_bdrv_graph_readable(); + QLIST_FOREACH(c, &bs->parents, next_parent) { if (c->klass != &child_root) { return false; @@ -2259,6 +2262,7 @@ void blk_activate(BlockBackend *blk, Error **errp) if (qemu_in_coroutine()) { bdrv_co_activate(bs, errp); } else { + GRAPH_RDLOCK_GUARD_MAINLOOP(); bdrv_activate(bs, errp); } } @@ -2384,6 +2388,7 @@ bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp) { BlockDriverState *bs = blk_bs(blk); GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); if (!bs) { return false; @@ -2901,6 +2906,8 @@ const BdrvChild *blk_root(BlockBackend *blk) int blk_make_empty(BlockBackend *blk, Error **errp) { GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + if (!blk_is_available(blk)) { error_setg(errp, "No medium inserted"); return -ENOMEDIUM; diff --git a/block/bochs.c b/block/bochs.c index 66e7a58e5e..8c659fa9b9 100644 --- a/block/bochs.c +++ b/block/bochs.c @@ -106,7 +106,9 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags, int ret; /* No write support yet */ + bdrv_graph_rdlock_main_loop(); ret = bdrv_apply_auto_read_only(bs, NULL, errp); + bdrv_graph_rdunlock_main_loop(); if (ret < 0) { return ret; } diff --git a/block/cloop.c b/block/cloop.c index 835a0fe3da..773d7918be 100644 --- a/block/cloop.c +++ b/block/cloop.c @@ -67,7 +67,9 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags, uint32_t offsets_size, max_compressed_block_size = 1, i; int ret; + bdrv_graph_rdlock_main_loop(); ret = bdrv_apply_auto_read_only(bs, NULL, errp); + bdrv_graph_rdunlock_main_loop(); if (ret < 0) { return ret; } diff --git a/block/commit.c b/block/commit.c index aa45beb0f0..43d1de7577 100644 --- a/block/commit.c +++ b/block/commit.c @@ -434,6 +434,7 @@ int bdrv_commit(BlockDriverState *bs) Error *local_err = NULL; GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); if (!drv) return -ENOMEDIUM; diff --git a/block/copy-before-write.c b/block/copy-before-write.c index aeaff3bb82..4ffabc5ca2 100644 --- a/block/copy-before-write.c +++ b/block/copy-before-write.c @@ -305,7 +305,7 @@ cbw_co_snapshot_block_status(BlockDriverState *bs, return -EACCES; } - ret = bdrv_block_status(child->bs, offset, cur_bytes, pnum, map, file); + ret = bdrv_co_block_status(child->bs, offset, cur_bytes, pnum, map, file); if (child == s->target) { /* * We refer to s->target only for areas that we've written to it. diff --git a/block/copy-on-read.c b/block/copy-on-read.c index b4d6b7efc3..5149fcf63a 100644 --- a/block/copy-on-read.c +++ b/block/copy-on-read.c @@ -146,11 +146,11 @@ cor_co_preadv_part(BlockDriverState *bs, int64_t offset, int64_t bytes, local_flags = flags; /* In case of failure, try to copy-on-read anyway */ - ret = bdrv_is_allocated(bs->file->bs, offset, bytes, &n); + ret = bdrv_co_is_allocated(bs->file->bs, offset, bytes, &n); if (ret <= 0) { - ret = bdrv_is_allocated_above(bdrv_backing_chain_next(bs->file->bs), - state->bottom_bs, true, offset, - n, &n); + ret = bdrv_co_is_allocated_above(bdrv_backing_chain_next(bs->file->bs), + state->bottom_bs, true, offset, + n, &n); if (ret > 0 || ret < 0) { local_flags |= BDRV_REQ_COPY_ON_READ; } diff --git a/block/crypto.c b/block/crypto.c index c9c9a39fa3..b3f0233d53 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -828,7 +828,7 @@ block_crypto_amend_options_generic_luks(BlockDriverState *bs, errp); } -static int +static int GRAPH_RDLOCK block_crypto_amend_options_luks(BlockDriverState *bs, QemuOpts *opts, BlockDriverAmendStatusCB *status_cb, @@ -841,8 +841,6 @@ block_crypto_amend_options_luks(BlockDriverState *bs, QCryptoBlockAmendOptions *amend_options = NULL; int ret = -EINVAL; - assume_graph_lock(); /* FIXME */ - assert(crypto); assert(crypto->block); diff --git a/block/curl.c b/block/curl.c index 0fc42d03d7..419f7c89ef 100644 --- a/block/curl.c +++ b/block/curl.c @@ -696,8 +696,10 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, const char *protocol_delimiter; int ret; + bdrv_graph_rdlock_main_loop(); ret = bdrv_apply_auto_read_only(bs, "curl driver does not support writes", errp); + bdrv_graph_rdunlock_main_loop(); if (ret < 0) { return ret; } diff --git a/block/dmg.c b/block/dmg.c index 06a0244a9c..38ee72bbe5 100644 --- a/block/dmg.c +++ b/block/dmg.c @@ -452,7 +452,9 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags, int64_t offset; int ret; + bdrv_graph_rdlock_main_loop(); ret = bdrv_apply_auto_read_only(bs, NULL, errp); + bdrv_graph_rdunlock_main_loop(); if (ret < 0) { return ret; } diff --git a/block/export/export.c b/block/export/export.c index 10316b43c5..a8f274e526 100644 --- a/block/export/export.c +++ b/block/export/export.c @@ -83,6 +83,8 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) uint64_t perm; int ret; + GLOBAL_STATE_CODE(); + if (!id_wellformed(export->id)) { error_setg(errp, "Invalid block export id"); return NULL; @@ -145,7 +147,9 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) * access since the export could be available before migration handover. * ctx was acquired in the caller. */ + bdrv_graph_rdlock_main_loop(); bdrv_activate(bs, NULL); + bdrv_graph_rdunlock_main_loop(); perm = BLK_PERM_CONSISTENT_READ; if (export->writable) { diff --git a/block/gluster.c b/block/gluster.c index ad5fadbe79..cc74af06dc 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -863,11 +863,13 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options, if (ret == -EACCES || ret == -EROFS) { /* Try to degrade to read-only, but if it doesn't work, still use the * normal error message. */ + bdrv_graph_rdlock_main_loop(); if (bdrv_apply_auto_read_only(bs, NULL, NULL) == 0) { open_flags = (open_flags & ~O_RDWR) | O_RDONLY; s->fd = glfs_open(s->glfs, gconf->path, open_flags); ret = s->fd ? 0 : -errno; } + bdrv_graph_rdunlock_main_loop(); } s->supports_seek_data = qemu_gluster_test_seek(s->fd); diff --git a/block/graph-lock.c b/block/graph-lock.c index 58a799065f..e5525ee2db 100644 --- a/block/graph-lock.c +++ b/block/graph-lock.c @@ -106,12 +106,13 @@ static uint32_t reader_count(void) return rd; } -void bdrv_graph_wrlock(BlockDriverState *bs) +void no_coroutine_fn bdrv_graph_wrlock(BlockDriverState *bs) { AioContext *ctx = NULL; GLOBAL_STATE_CODE(); assert(!qatomic_read(&has_writer)); + assert(!qemu_in_coroutine()); /* * Release only non-mainloop AioContext. The mainloop often relies on the diff --git a/block/io.c b/block/io.c index e7f9448d5a..527a1de04e 100644 --- a/block/io.c +++ b/block/io.c @@ -42,13 +42,18 @@ /* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */ #define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS) -static void bdrv_parent_cb_resize(BlockDriverState *bs); +static void coroutine_fn GRAPH_RDLOCK +bdrv_parent_cb_resize(BlockDriverState *bs); + static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes, BdrvRequestFlags flags); -static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) +static void GRAPH_RDLOCK +bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) { BdrvChild *c, *next; + IO_OR_GS_CODE(); + assert_bdrv_graph_readable(); QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { if (c == ignore) { @@ -70,9 +75,12 @@ void bdrv_parent_drained_end_single(BdrvChild *c) } } -static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) +static void GRAPH_RDLOCK +bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) { BdrvChild *c; + IO_OR_GS_CODE(); + assert_bdrv_graph_readable(); QLIST_FOREACH(c, &bs->parents, next_parent) { if (c == ignore) { @@ -84,17 +92,22 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) bool bdrv_parent_drained_poll_single(BdrvChild *c) { + IO_OR_GS_CODE(); + if (c->klass->drained_poll) { return c->klass->drained_poll(c); } return false; } -static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore, - bool ignore_bds_parents) +static bool GRAPH_RDLOCK +bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore, + bool ignore_bds_parents) { BdrvChild *c, *next; bool busy = false; + IO_OR_GS_CODE(); + assert_bdrv_graph_readable(); QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { @@ -114,6 +127,7 @@ void bdrv_parent_drained_begin_single(BdrvChild *c) c->quiesced_parent = true; if (c->klass->drained_begin) { + /* called with rdlock taken, but it doesn't really need it. */ c->klass->drained_begin(c); } } @@ -263,6 +277,9 @@ bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, static bool bdrv_drain_poll_top_level(BlockDriverState *bs, BdrvChild *ignore_parent) { + GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + return bdrv_drain_poll(bs, ignore_parent, false); } @@ -362,6 +379,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, /* Stop things in parent-to-child order */ if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { + GRAPH_RDLOCK_GUARD_MAINLOOP(); bdrv_parent_drained_begin(bs, parent); if (bs->drv && bs->drv->bdrv_drain_begin) { bs->drv->bdrv_drain_begin(bs); @@ -408,12 +426,16 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) bdrv_co_yield_to_drain(bs, false, parent, false); return; } + + /* At this point, we should be always running in the main loop. */ + GLOBAL_STATE_CODE(); assert(bs->quiesce_counter > 0); GLOBAL_STATE_CODE(); /* Re-enable things in child-to-parent order */ old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); if (old_quiesce_counter == 1) { + GRAPH_RDLOCK_GUARD_MAINLOOP(); if (bs->drv && bs->drv->bdrv_drain_end) { bs->drv->bdrv_drain_end(bs); } @@ -437,6 +459,8 @@ void bdrv_drain(BlockDriverState *bs) static void bdrv_drain_assert_idle(BlockDriverState *bs) { BdrvChild *child, *next; + GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); assert(qatomic_read(&bs->in_flight) == 0); QLIST_FOREACH_SAFE(child, &bs->children, next, next) { @@ -450,7 +474,9 @@ static bool bdrv_drain_all_poll(void) { BlockDriverState *bs = NULL; bool result = false; + GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); /* bdrv_drain_poll() can't make changes to the graph and we are holding the * main AioContext lock, so iterating bdrv_next_all_states() is safe. */ @@ -1223,8 +1249,8 @@ bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes, ret = 1; /* "already allocated", so nothing will be copied */ pnum = MIN(align_bytes, max_transfer); } else { - ret = bdrv_is_allocated(bs, align_offset, - MIN(align_bytes, max_transfer), &pnum); + ret = bdrv_co_is_allocated(bs, align_offset, + MIN(align_bytes, max_transfer), &pnum); if (ret < 0) { /* * Safe to treat errors in querying allocation as if @@ -1371,7 +1397,7 @@ bdrv_aligned_preadv(BdrvChild *child, BdrvTrackedRequest *req, /* The flag BDRV_REQ_COPY_ON_READ has reached its addressee */ flags &= ~BDRV_REQ_COPY_ON_READ; - ret = bdrv_is_allocated(bs, offset, bytes, &pnum); + ret = bdrv_co_is_allocated(bs, offset, bytes, &pnum); if (ret < 0) { goto out; } @@ -2003,7 +2029,7 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes, } } -static inline void coroutine_fn +static inline void coroutine_fn GRAPH_RDLOCK bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, int64_t bytes, BdrvTrackedRequest *req, int ret) { @@ -2330,6 +2356,7 @@ int bdrv_flush_all(void) int result = 0; GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); /* * bdrv queue is managed by record/replay, @@ -2383,9 +2410,9 @@ int bdrv_flush_all(void) * set to the host mapping and BDS corresponding to the guest offset. */ static int coroutine_fn GRAPH_RDLOCK -bdrv_co_block_status(BlockDriverState *bs, bool want_zero, - int64_t offset, int64_t bytes, - int64_t *pnum, int64_t *map, BlockDriverState **file) +bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, BlockDriverState **file) { int64_t total_size; int64_t n; /* bytes */ @@ -2544,8 +2571,8 @@ bdrv_co_block_status(BlockDriverState *bs, bool want_zero, if (ret & BDRV_BLOCK_RAW) { assert(ret & BDRV_BLOCK_OFFSET_VALID && local_file); - ret = bdrv_co_block_status(local_file, want_zero, local_map, - *pnum, pnum, &local_map, &local_file); + ret = bdrv_co_do_block_status(local_file, want_zero, local_map, + *pnum, pnum, &local_map, &local_file); goto out; } @@ -2572,8 +2599,8 @@ bdrv_co_block_status(BlockDriverState *bs, bool want_zero, int64_t file_pnum; int ret2; - ret2 = bdrv_co_block_status(local_file, want_zero, local_map, - *pnum, &file_pnum, NULL, NULL); + ret2 = bdrv_co_do_block_status(local_file, want_zero, local_map, + *pnum, &file_pnum, NULL, NULL); if (ret2 >= 0) { /* Ignore errors. This is just providing extra information, it * is useful but not necessary. @@ -2640,7 +2667,8 @@ bdrv_co_common_block_status_above(BlockDriverState *bs, return 0; } - ret = bdrv_co_block_status(bs, want_zero, offset, bytes, pnum, map, file); + ret = bdrv_co_do_block_status(bs, want_zero, offset, bytes, pnum, + map, file); ++*depth; if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED || bs == base) { return ret; @@ -2656,8 +2684,8 @@ bdrv_co_common_block_status_above(BlockDriverState *bs, for (p = bdrv_filter_or_cow_bs(bs); include_base || p != base; p = bdrv_filter_or_cow_bs(p)) { - ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map, - file); + ret = bdrv_co_do_block_status(p, want_zero, offset, bytes, pnum, + map, file); ++*depth; if (ret < 0) { return ret; @@ -2723,21 +2751,13 @@ int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs, bytes, pnum, map, file, NULL); } -int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base, - int64_t offset, int64_t bytes, int64_t *pnum, - int64_t *map, BlockDriverState **file) -{ - IO_CODE(); - return bdrv_common_block_status_above(bs, base, false, true, offset, bytes, - pnum, map, file, NULL); -} - -int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes, - int64_t *pnum, int64_t *map, BlockDriverState **file) +int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, int64_t offset, + int64_t bytes, int64_t *pnum, + int64_t *map, BlockDriverState **file) { IO_CODE(); - return bdrv_block_status_above(bs, bdrv_filter_or_cow_bs(bs), - offset, bytes, pnum, map, file); + return bdrv_co_block_status_above(bs, bdrv_filter_or_cow_bs(bs), + offset, bytes, pnum, map, file); } /* @@ -2784,45 +2804,6 @@ int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset, return !!(ret & BDRV_BLOCK_ALLOCATED); } -int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes, - int64_t *pnum) -{ - int ret; - int64_t dummy; - IO_CODE(); - - ret = bdrv_common_block_status_above(bs, bs, true, false, offset, - bytes, pnum ? pnum : &dummy, NULL, - NULL, NULL); - if (ret < 0) { - return ret; - } - return !!(ret & BDRV_BLOCK_ALLOCATED); -} - -/* See bdrv_is_allocated_above for documentation */ -int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top, - BlockDriverState *base, - bool include_base, int64_t offset, - int64_t bytes, int64_t *pnum) -{ - int depth; - int ret; - IO_CODE(); - - ret = bdrv_co_common_block_status_above(top, base, include_base, false, - offset, bytes, pnum, NULL, NULL, - &depth); - if (ret < 0) { - return ret; - } - - if (ret & BDRV_BLOCK_ALLOCATED) { - return depth; - } - return 0; -} - /* * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP] * @@ -2840,18 +2821,18 @@ int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top, * words, the result is not necessarily the maximum possible range); * but 'pnum' will only be 0 when end of file is reached. */ -int bdrv_is_allocated_above(BlockDriverState *top, - BlockDriverState *base, - bool include_base, int64_t offset, - int64_t bytes, int64_t *pnum) +int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *bs, + BlockDriverState *base, + bool include_base, int64_t offset, + int64_t bytes, int64_t *pnum) { int depth; int ret; IO_CODE(); - ret = bdrv_common_block_status_above(top, base, include_base, false, - offset, bytes, pnum, NULL, NULL, - &depth); + ret = bdrv_co_common_block_status_above(bs, base, include_base, false, + offset, bytes, pnum, NULL, NULL, + &depth); if (ret < 0) { return ret; } @@ -3551,9 +3532,13 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset, bytes, read_flags, write_flags); } -static void bdrv_parent_cb_resize(BlockDriverState *bs) +static void coroutine_fn GRAPH_RDLOCK +bdrv_parent_cb_resize(BlockDriverState *bs) { BdrvChild *c; + + assert_bdrv_graph_readable(); + QLIST_FOREACH(c, &bs->parents, next_parent) { if (c->klass->resize) { c->klass->resize(c); diff --git a/block/iscsi.c b/block/iscsi.c index 5640c8b565..2ff14b7472 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -1925,7 +1925,9 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, /* Check the write protect flag of the LUN if we want to write */ if (iscsilun->type == TYPE_DISK && (flags & BDRV_O_RDWR) && iscsilun->write_protected) { + bdrv_graph_rdlock_main_loop(); ret = bdrv_apply_auto_read_only(bs, "LUN is write protected", errp); + bdrv_graph_rdunlock_main_loop(); if (ret < 0) { goto out; } diff --git a/block/mirror.c b/block/mirror.c index 3cc0757a03..dcd88de2e3 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -559,9 +559,9 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s) assert(!(offset % s->granularity)); WITH_GRAPH_RDLOCK_GUARD() { - ret = bdrv_block_status_above(source, NULL, offset, - nb_chunks * s->granularity, - &io_bytes, NULL, NULL); + ret = bdrv_co_block_status_above(source, NULL, offset, + nb_chunks * s->granularity, + &io_bytes, NULL, NULL); } if (ret < 0) { io_bytes = MIN(nb_chunks * s->granularity, max_io_bytes); @@ -879,8 +879,8 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) } WITH_GRAPH_RDLOCK_GUARD() { - ret = bdrv_is_allocated_above(bs, s->base_overlay, true, offset, - bytes, &count); + ret = bdrv_co_is_allocated_above(bs, s->base_overlay, true, offset, + bytes, &count); } if (ret < 0) { return ret; diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c index ca2599de44..7645c7e5fb 100644 --- a/block/monitor/block-hmp-cmds.c +++ b/block/monitor/block-hmp-cmds.c @@ -144,6 +144,9 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict) AioContext *aio_context; Error *local_err = NULL; + GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + bs = bdrv_find_node(id); if (bs) { qmp_blockdev_del(id, &local_err); @@ -896,6 +899,8 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict) SnapshotEntry *snapshot_entry; Error *err = NULL; + GRAPH_RDLOCK_GUARD_MAINLOOP(); + bs = bdrv_all_find_vmstate_bs(NULL, false, NULL, &err); if (!bs) { error_report_err(err); diff --git a/block/nbd.c b/block/nbd.c index 52ebc8b2f5..b9d4f935e0 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -275,7 +275,8 @@ static bool nbd_client_will_reconnect(BDRVNBDState *s) * Return failure if the server's advertised options are incompatible with the * client's needs. */ -static int nbd_handle_updated_info(BlockDriverState *bs, Error **errp) +static int coroutine_fn GRAPH_RDLOCK +nbd_handle_updated_info(BlockDriverState *bs, Error **errp) { BDRVNBDState *s = (BDRVNBDState *)bs->opaque; int ret; diff --git a/block/nfs.c b/block/nfs.c index c24df49747..f737e19cd3 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -843,7 +843,7 @@ static void nfs_refresh_filename(BlockDriverState *bs) } } -static char *nfs_dirname(BlockDriverState *bs, Error **errp) +static char * GRAPH_RDLOCK nfs_dirname(BlockDriverState *bs, Error **errp) { NFSClient *client = bs->opaque; diff --git a/block/parallels.c b/block/parallels.c index d026ce9e2f..6b46623241 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -1363,9 +1363,12 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, bitmap_new(DIV_ROUND_UP(s->header_size, s->bat_dirty_block)); /* Disable migration until bdrv_activate method is added */ + bdrv_graph_rdlock_main_loop(); error_setg(&s->migration_blocker, "The Parallels format used by node '%s' " "does not support live migration", bdrv_get_device_or_node_name(bs)); + bdrv_graph_rdunlock_main_loop(); + ret = migrate_add_blocker(s->migration_blocker, errp); if (ret < 0) { error_setg(errp, "Migration blocker error"); diff --git a/block/qapi-sysemu.c b/block/qapi-sysemu.c index ef07151892..3f614cbc04 100644 --- a/block/qapi-sysemu.c +++ b/block/qapi-sysemu.c @@ -169,14 +169,16 @@ void qmp_blockdev_close_tray(const char *device, } } -static void blockdev_remove_medium(const char *device, const char *id, - Error **errp) +static void GRAPH_UNLOCKED +blockdev_remove_medium(const char *device, const char *id, Error **errp) { BlockBackend *blk; BlockDriverState *bs; AioContext *aio_context; bool has_attached_device; + GLOBAL_STATE_CODE(); + blk = qmp_get_blk(device, id, errp); if (!blk) { return; @@ -205,9 +207,12 @@ static void blockdev_remove_medium(const char *device, const char *id, aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); + bdrv_graph_rdlock_main_loop(); if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_EJECT, errp)) { + bdrv_graph_rdunlock_main_loop(); goto out; } + bdrv_graph_rdunlock_main_loop(); blk_remove_bs(blk); @@ -279,6 +284,8 @@ static void blockdev_insert_medium(const char *device, const char *id, BlockBackend *blk; BlockDriverState *bs; + GRAPH_RDLOCK_GUARD_MAINLOOP(); + blk = qmp_get_blk(device, id, errp); if (!blk) { return; diff --git a/block/qapi.c b/block/qapi.c index 1cbb0935ff..82a30b38fe 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -225,9 +225,8 @@ int bdrv_query_snapshot_info_list(BlockDriverState *bs, * Helper function for other query info functions. Store information about @bs * in @info, setting @errp on error. */ -static void bdrv_do_query_node_info(BlockDriverState *bs, - BlockNodeInfo *info, - Error **errp) +static void GRAPH_RDLOCK +bdrv_do_query_node_info(BlockDriverState *bs, BlockNodeInfo *info, Error **errp) { int64_t size; const char *backing_filename; @@ -423,8 +422,8 @@ fail: } /* @p_info will be set only on success. */ -static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info, - Error **errp) +static void GRAPH_RDLOCK +bdrv_query_info(BlockBackend *blk, BlockInfo **p_info, Error **errp) { BlockInfo *info = g_malloc0(sizeof(*info)); BlockDriverState *bs = blk_bs(blk); @@ -672,6 +671,8 @@ BlockInfoList *qmp_query_block(Error **errp) BlockBackend *blk; Error *local_err = NULL; + GRAPH_RDLOCK_GUARD_MAINLOOP(); + for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) { BlockInfoList *info; diff --git a/block/qcow.c b/block/qcow.c index d56d24ab6d..38a16253b8 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -301,9 +301,12 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, } /* Disable migration when qcow images are used */ + bdrv_graph_rdlock_main_loop(); error_setg(&s->migration_blocker, "The qcow format used by node '%s' " "does not support live migration", bdrv_get_device_or_node_name(bs)); + bdrv_graph_rdunlock_main_loop(); + ret = migrate_add_blocker(s->migration_blocker, errp); if (ret < 0) { error_free(s->migration_blocker); diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index ffd5cd3b23..3058309c47 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -156,10 +156,9 @@ static int64_t get_bitmap_bytes_needed(int64_t len, uint32_t granularity) return DIV_ROUND_UP(num_bits, 8); } -static int check_constraints_on_bitmap(BlockDriverState *bs, - const char *name, - uint32_t granularity, - Error **errp) +static int GRAPH_RDLOCK +check_constraints_on_bitmap(BlockDriverState *bs, const char *name, + uint32_t granularity, Error **errp) { BDRVQcow2State *s = bs->opaque; int granularity_bits = ctz32(granularity); @@ -204,8 +203,9 @@ static int check_constraints_on_bitmap(BlockDriverState *bs, return 0; } -static void clear_bitmap_table(BlockDriverState *bs, uint64_t *bitmap_table, - uint32_t bitmap_table_size) +static void GRAPH_RDLOCK +clear_bitmap_table(BlockDriverState *bs, uint64_t *bitmap_table, + uint32_t bitmap_table_size) { BDRVQcow2State *s = bs->opaque; int i; @@ -259,7 +259,8 @@ fail: return ret; } -static int free_bitmap_clusters(BlockDriverState *bs, Qcow2BitmapTable *tb) +static int GRAPH_RDLOCK +free_bitmap_clusters(BlockDriverState *bs, Qcow2BitmapTable *tb) { int ret; uint64_t *bitmap_table; @@ -730,8 +731,9 @@ out: * Store bitmap list to qcow2 image as a bitmap directory. * Everything is checked. */ -static int bitmap_list_store(BlockDriverState *bs, Qcow2BitmapList *bm_list, - uint64_t *offset, uint64_t *size, bool in_place) +static int GRAPH_RDLOCK +bitmap_list_store(BlockDriverState *bs, Qcow2BitmapList *bm_list, + uint64_t *offset, uint64_t *size, bool in_place) { int ret; uint8_t *dir; @@ -829,8 +831,9 @@ fail: * Bitmap List end */ -static int update_ext_header_and_dir_in_place(BlockDriverState *bs, - Qcow2BitmapList *bm_list) +static int GRAPH_RDLOCK +update_ext_header_and_dir_in_place(BlockDriverState *bs, + Qcow2BitmapList *bm_list) { BDRVQcow2State *s = bs->opaque; int ret; @@ -877,8 +880,8 @@ static int update_ext_header_and_dir_in_place(BlockDriverState *bs, */ } -static int update_ext_header_and_dir(BlockDriverState *bs, - Qcow2BitmapList *bm_list) +static int GRAPH_RDLOCK +update_ext_header_and_dir(BlockDriverState *bs, Qcow2BitmapList *bm_list) { BDRVQcow2State *s = bs->opaque; int ret; @@ -1271,9 +1274,9 @@ out: /* store_bitmap_data() * Store bitmap to image, filling bitmap table accordingly. */ -static uint64_t *store_bitmap_data(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, - uint32_t *bitmap_table_size, Error **errp) +static uint64_t * GRAPH_RDLOCK +store_bitmap_data(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, + uint32_t *bitmap_table_size, Error **errp) { int ret; BDRVQcow2State *s = bs->opaque; @@ -1370,7 +1373,8 @@ fail: * Store bm->dirty_bitmap to qcow2. * Set bm->table_offset and bm->table_size accordingly. */ -static int store_bitmap(BlockDriverState *bs, Qcow2Bitmap *bm, Error **errp) +static int GRAPH_RDLOCK +store_bitmap(BlockDriverState *bs, Qcow2Bitmap *bm, Error **errp) { int ret; uint64_t *tb; diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c index 01c67bdddc..23d9588b08 100644 --- a/block/qcow2-cache.c +++ b/block/qcow2-cache.c @@ -163,7 +163,8 @@ int qcow2_cache_destroy(Qcow2Cache *c) return 0; } -static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c) +static int GRAPH_RDLOCK +qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c) { int ret; @@ -178,7 +179,8 @@ static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c) return 0; } -static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i) +static int GRAPH_RDLOCK +qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i) { BDRVQcow2State *s = bs->opaque; int ret = 0; @@ -318,8 +320,9 @@ int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c) return 0; } -static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, - uint64_t offset, void **table, bool read_from_disk) +static int GRAPH_RDLOCK +qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, + void **table, bool read_from_disk) { BDRVQcow2State *s = bs->opaque; int i; diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index f4f6cd6ad0..904f00d1b3 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -207,8 +207,9 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, * the cache is used; otherwise the L2 slice is loaded from the image * file. */ -static int l2_load(BlockDriverState *bs, uint64_t offset, - uint64_t l2_offset, uint64_t **l2_slice) +static int GRAPH_RDLOCK +l2_load(BlockDriverState *bs, uint64_t offset, + uint64_t l2_offset, uint64_t **l2_slice) { BDRVQcow2State *s = bs->opaque; int start_of_slice = l2_entry_size(s) * @@ -269,7 +270,7 @@ int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index) * */ -static int l2_allocate(BlockDriverState *bs, int l1_index) +static int GRAPH_RDLOCK l2_allocate(BlockDriverState *bs, int l1_index) { BDRVQcow2State *s = bs->opaque; uint64_t old_l2_offset; @@ -751,9 +752,9 @@ fail: * * Returns 0 on success, -errno in failure case */ -static int get_cluster_table(BlockDriverState *bs, uint64_t offset, - uint64_t **new_l2_slice, - int *new_l2_index) +static int GRAPH_RDLOCK +get_cluster_table(BlockDriverState *bs, uint64_t offset, + uint64_t **new_l2_slice, int *new_l2_index) { BDRVQcow2State *s = bs->opaque; unsigned int l2_index; @@ -1155,11 +1156,10 @@ void coroutine_fn qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m) * * Returns 0 on success, -errno on failure. */ -static int coroutine_fn calculate_l2_meta(BlockDriverState *bs, - uint64_t host_cluster_offset, - uint64_t guest_offset, unsigned bytes, - uint64_t *l2_slice, QCowL2Meta **m, - bool keep_old) +static int coroutine_fn GRAPH_RDLOCK +calculate_l2_meta(BlockDriverState *bs, uint64_t host_cluster_offset, + uint64_t guest_offset, unsigned bytes, uint64_t *l2_slice, + QCowL2Meta **m, bool keep_old) { BDRVQcow2State *s = bs->opaque; int sc_index, l2_index = offset_to_l2_slice_index(s, guest_offset); @@ -1490,9 +1490,9 @@ static int coroutine_fn handle_dependencies(BlockDriverState *bs, * * -errno: in error cases */ -static int coroutine_fn handle_copied(BlockDriverState *bs, - uint64_t guest_offset, uint64_t *host_offset, uint64_t *bytes, - QCowL2Meta **m) +static int coroutine_fn GRAPH_RDLOCK +handle_copied(BlockDriverState *bs, uint64_t guest_offset, + uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m) { BDRVQcow2State *s = bs->opaque; int l2_index; @@ -1600,10 +1600,9 @@ out: * function has been waiting for another request and the allocation must be * restarted, but the whole request should not be failed. */ -static int coroutine_fn do_alloc_cluster_offset(BlockDriverState *bs, - uint64_t guest_offset, - uint64_t *host_offset, - uint64_t *nb_clusters) +static int coroutine_fn GRAPH_RDLOCK +do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, + uint64_t *host_offset, uint64_t *nb_clusters) { BDRVQcow2State *s = bs->opaque; @@ -1658,9 +1657,9 @@ static int coroutine_fn do_alloc_cluster_offset(BlockDriverState *bs, * * -errno: in error cases */ -static int coroutine_fn handle_alloc(BlockDriverState *bs, - uint64_t guest_offset, uint64_t *host_offset, uint64_t *bytes, - QCowL2Meta **m) +static int coroutine_fn GRAPH_RDLOCK +handle_alloc(BlockDriverState *bs, uint64_t guest_offset, + uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m) { BDRVQcow2State *s = bs->opaque; int l2_index; @@ -1898,9 +1897,9 @@ again: * all clusters in the same L2 slice) and returns the number of discarded * clusters. */ -static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset, - uint64_t nb_clusters, - enum qcow2_discard_type type, bool full_discard) +static int GRAPH_RDLOCK +discard_in_l2_slice(BlockDriverState *bs, uint64_t offset, uint64_t nb_clusters, + enum qcow2_discard_type type, bool full_discard) { BDRVQcow2State *s = bs->opaque; uint64_t *l2_slice; @@ -2037,7 +2036,7 @@ fail: * all clusters in the same L2 slice) and returns the number of zeroed * clusters. */ -static int coroutine_fn +static int coroutine_fn GRAPH_RDLOCK zero_in_l2_slice(BlockDriverState *bs, uint64_t offset, uint64_t nb_clusters, int flags) { @@ -2093,7 +2092,7 @@ zero_in_l2_slice(BlockDriverState *bs, uint64_t offset, return nb_clusters; } -static int coroutine_fn +static int coroutine_fn GRAPH_RDLOCK zero_l2_subclusters(BlockDriverState *bs, uint64_t offset, unsigned nb_subclusters) { @@ -2231,11 +2230,12 @@ fail: * status_cb(). l1_entries contains the total number of L1 entries and * *visited_l1_entries counts all visited L1 entries. */ -static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, - int l1_size, int64_t *visited_l1_entries, - int64_t l1_entries, - BlockDriverAmendStatusCB *status_cb, - void *cb_opaque) +static int GRAPH_RDLOCK +expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, + int l1_size, int64_t *visited_l1_entries, + int64_t l1_entries, + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque) { BDRVQcow2State *s = bs->opaque; bool is_active_l1 = (l1_table == s->l1_table); diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 996d1217d0..0266542cee 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -229,9 +229,9 @@ static void set_refcount_ro6(void *refcount_array, uint64_t index, } -static int load_refcount_block(BlockDriverState *bs, - int64_t refcount_block_offset, - void **refcount_block) +static int GRAPH_RDLOCK +load_refcount_block(BlockDriverState *bs, int64_t refcount_block_offset, + void **refcount_block) { BDRVQcow2State *s = bs->opaque; @@ -302,8 +302,9 @@ static int in_same_refcount_block(BDRVQcow2State *s, uint64_t offset_a, * * Returns 0 on success or -errno in error case */ -static int alloc_refcount_block(BlockDriverState *bs, - int64_t cluster_index, void **refcount_block) +static int GRAPH_RDLOCK +alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index, + void **refcount_block) { BDRVQcow2State *s = bs->opaque; unsigned int refcount_table_index; @@ -806,12 +807,9 @@ found: /* XXX: cache several refcount block clusters ? */ /* @addend is the absolute value of the addend; if @decrease is set, @addend * will be subtracted from the current refcount, otherwise it will be added */ -static int update_refcount(BlockDriverState *bs, - int64_t offset, - int64_t length, - uint64_t addend, - bool decrease, - enum qcow2_discard_type type) +static int GRAPH_RDLOCK +update_refcount(BlockDriverState *bs, int64_t offset, int64_t length, + uint64_t addend, bool decrease, enum qcow2_discard_type type) { BDRVQcow2State *s = bs->opaque; int64_t start, last, cluster_offset; @@ -967,8 +965,8 @@ int qcow2_update_cluster_refcount(BlockDriverState *bs, /* return < 0 if error */ -static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size, - uint64_t max) +static int64_t GRAPH_RDLOCK +alloc_clusters_noref(BlockDriverState *bs, uint64_t size, uint64_t max) { BDRVQcow2State *s = bs->opaque; uint64_t i, nb_clusters, refcount; @@ -2302,7 +2300,7 @@ calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res, * Compares the actual reference count for each cluster in the image against the * refcount as reported by the refcount structures on-disk. */ -static void coroutine_fn +static void coroutine_fn GRAPH_RDLOCK compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix, bool *rebuild, int64_t *highest_cluster, @@ -3103,20 +3101,22 @@ int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset, * * @allocated should be set to true if a new cluster has been allocated. */ -typedef int (RefblockFinishOp)(BlockDriverState *bs, uint64_t **reftable, - uint64_t reftable_index, uint64_t *reftable_size, - void *refblock, bool refblock_empty, - bool *allocated, Error **errp); +typedef int /* GRAPH_RDLOCK_PTR */ + (RefblockFinishOp)(BlockDriverState *bs, uint64_t **reftable, + uint64_t reftable_index, uint64_t *reftable_size, + void *refblock, bool refblock_empty, + bool *allocated, Error **errp); /** * This "operation" for walk_over_reftable() allocates the refblock on disk (if * it is not empty) and inserts its offset into the new reftable. The size of * this new reftable is increased as required. */ -static int alloc_refblock(BlockDriverState *bs, uint64_t **reftable, - uint64_t reftable_index, uint64_t *reftable_size, - void *refblock, bool refblock_empty, bool *allocated, - Error **errp) +static int GRAPH_RDLOCK +alloc_refblock(BlockDriverState *bs, uint64_t **reftable, + uint64_t reftable_index, uint64_t *reftable_size, + void *refblock, bool refblock_empty, bool *allocated, + Error **errp) { BDRVQcow2State *s = bs->opaque; int64_t offset; @@ -3166,10 +3166,11 @@ static int alloc_refblock(BlockDriverState *bs, uint64_t **reftable, * offset specified by the new reftable's entry. It does not modify the new * reftable or change any refcounts. */ -static int flush_refblock(BlockDriverState *bs, uint64_t **reftable, - uint64_t reftable_index, uint64_t *reftable_size, - void *refblock, bool refblock_empty, bool *allocated, - Error **errp) +static int GRAPH_RDLOCK +flush_refblock(BlockDriverState *bs, uint64_t **reftable, + uint64_t reftable_index, uint64_t *reftable_size, + void *refblock, bool refblock_empty, bool *allocated, + Error **errp) { BDRVQcow2State *s = bs->opaque; int64_t offset; @@ -3210,16 +3211,17 @@ static int flush_refblock(BlockDriverState *bs, uint64_t **reftable, * * @allocated is set to true if a new cluster has been allocated. */ -static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable, - uint64_t *new_reftable_index, - uint64_t *new_reftable_size, - void *new_refblock, int new_refblock_size, - int new_refcount_bits, - RefblockFinishOp *operation, bool *allocated, - Qcow2SetRefcountFunc *new_set_refcount, - BlockDriverAmendStatusCB *status_cb, - void *cb_opaque, int index, int total, - Error **errp) +static int GRAPH_RDLOCK +walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable, + uint64_t *new_reftable_index, + uint64_t *new_reftable_size, + void *new_refblock, int new_refblock_size, + int new_refcount_bits, + RefblockFinishOp *operation, bool *allocated, + Qcow2SetRefcountFunc *new_set_refcount, + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque, int index, int total, + Error **errp) { BDRVQcow2State *s = bs->opaque; uint64_t reftable_index; @@ -3545,8 +3547,8 @@ done: return ret; } -static int64_t coroutine_fn get_refblock_offset(BlockDriverState *bs, - uint64_t offset) +static int64_t coroutine_fn GRAPH_RDLOCK +get_refblock_offset(BlockDriverState *bs, uint64_t offset) { BDRVQcow2State *s = bs->opaque; uint32_t index = offset_to_reftable_index(s, offset); @@ -3565,7 +3567,7 @@ static int64_t coroutine_fn get_refblock_offset(BlockDriverState *bs, return covering_refblock_offset; } -static int coroutine_fn +static int coroutine_fn GRAPH_RDLOCK qcow2_discard_refcount_block(BlockDriverState *bs, uint64_t discard_block_offs) { BDRVQcow2State *s = bs->opaque; diff --git a/block/qcow2.c b/block/qcow2.c index 5a3c537f14..aa01d9e7b5 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -536,7 +536,7 @@ int qcow2_mark_dirty(BlockDriverState *bs) * function when there are no pending requests, it does not guard against * concurrent requests dirtying the image. */ -static int qcow2_mark_clean(BlockDriverState *bs) +static int GRAPH_RDLOCK qcow2_mark_clean(BlockDriverState *bs) { BDRVQcow2State *s = bs->opaque; @@ -570,7 +570,8 @@ int qcow2_mark_corrupt(BlockDriverState *bs) * Marks the image as consistent, i.e., unsets the corrupt bit, and flushes * before if necessary. */ -static int coroutine_fn qcow2_mark_consistent(BlockDriverState *bs) +static int coroutine_fn GRAPH_RDLOCK +qcow2_mark_consistent(BlockDriverState *bs) { BDRVQcow2State *s = bs->opaque; @@ -980,10 +981,9 @@ typedef struct Qcow2ReopenState { QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */ } Qcow2ReopenState; -static int qcow2_update_options_prepare(BlockDriverState *bs, - Qcow2ReopenState *r, - QDict *options, int flags, - Error **errp) +static int GRAPH_RDLOCK +qcow2_update_options_prepare(BlockDriverState *bs, Qcow2ReopenState *r, + QDict *options, int flags, Error **errp) { BDRVQcow2State *s = bs->opaque; QemuOpts *opts = NULL; @@ -1260,7 +1260,7 @@ static void qcow2_update_options_abort(BlockDriverState *bs, qapi_free_QCryptoBlockOpenOptions(r->crypto_opts); } -static int coroutine_fn +static int coroutine_fn GRAPH_RDLOCK qcow2_update_options(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -1969,13 +1969,17 @@ static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp) bs->bl.pdiscard_alignment = s->cluster_size; } -static int qcow2_reopen_prepare(BDRVReopenState *state, - BlockReopenQueue *queue, Error **errp) +static int GRAPH_UNLOCKED +qcow2_reopen_prepare(BDRVReopenState *state,BlockReopenQueue *queue, + Error **errp) { BDRVQcow2State *s = state->bs->opaque; Qcow2ReopenState *r; int ret; + GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + r = g_new0(Qcow2ReopenState, 1); state->opaque = r; @@ -2038,6 +2042,8 @@ static void qcow2_reopen_commit(BDRVReopenState *state) static void qcow2_reopen_commit_post(BDRVReopenState *state) { + GRAPH_RDLOCK_GUARD_MAINLOOP(); + if (state->flags & BDRV_O_RDWR) { Error *local_err = NULL; @@ -2731,7 +2737,7 @@ fail_nometa: return ret; } -static int qcow2_inactivate(BlockDriverState *bs) +static int GRAPH_RDLOCK qcow2_inactivate(BlockDriverState *bs) { BDRVQcow2State *s = bs->opaque; int ret, result = 0; @@ -2766,7 +2772,8 @@ static int qcow2_inactivate(BlockDriverState *bs) return result; } -static void qcow2_do_close(BlockDriverState *bs, bool close_data_file) +static void coroutine_mixed_fn GRAPH_RDLOCK +qcow2_do_close(BlockDriverState *bs, bool close_data_file) { BDRVQcow2State *s = bs->opaque; qemu_vfree(s->l1_table); @@ -2793,18 +2800,24 @@ static void qcow2_do_close(BlockDriverState *bs, bool close_data_file) g_free(s->image_backing_format); if (close_data_file && has_data_file(bs)) { + GLOBAL_STATE_CODE(); + bdrv_graph_rdunlock_main_loop(); bdrv_graph_wrlock(NULL); bdrv_unref_child(bs, s->data_file); bdrv_graph_wrunlock(); s->data_file = NULL; + bdrv_graph_rdlock_main_loop(); } qcow2_refcount_close(bs); qcow2_free_snapshots(bs); } -static void qcow2_close(BlockDriverState *bs) +static void GRAPH_UNLOCKED qcow2_close(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + qcow2_do_close(bs, true); } @@ -3991,7 +4004,8 @@ finish: } -static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes) +static bool coroutine_fn GRAPH_RDLOCK +is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes) { int64_t nr; int res; @@ -4012,7 +4026,7 @@ static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes) * backing file. So, we need a loop. */ do { - res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL); + res = bdrv_co_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL); offset += nr; bytes -= nr; } while (res >= 0 && (res & BDRV_BLOCK_ZERO) && nr && bytes); @@ -4076,8 +4090,8 @@ qcow2_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes, return ret; } -static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs, - int64_t offset, int64_t bytes) +static int coroutine_fn GRAPH_RDLOCK +qcow2_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) { int ret; BDRVQcow2State *s = bs->opaque; @@ -4822,7 +4836,7 @@ fail: return ret; } -static int make_completely_empty(BlockDriverState *bs) +static int GRAPH_RDLOCK make_completely_empty(BlockDriverState *bs) { BDRVQcow2State *s = bs->opaque; Error *local_err = NULL; @@ -4973,7 +4987,7 @@ fail: return ret; } -static int qcow2_make_empty(BlockDriverState *bs) +static int GRAPH_RDLOCK qcow2_make_empty(BlockDriverState *bs) { BDRVQcow2State *s = bs->opaque; uint64_t offset, end_offset; @@ -5017,7 +5031,7 @@ static int qcow2_make_empty(BlockDriverState *bs) return ret; } -static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs) +static coroutine_fn GRAPH_RDLOCK int qcow2_co_flush_to_os(BlockDriverState *bs) { BDRVQcow2State *s = bs->opaque; int ret; @@ -5366,7 +5380,7 @@ qcow2_co_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) return bs->drv->bdrv_co_preadv_part(bs, offset, qiov->size, qiov, 0, 0); } -static int qcow2_has_compressed_clusters(BlockDriverState *bs) +static int GRAPH_RDLOCK qcow2_has_compressed_clusters(BlockDriverState *bs) { int64_t offset = 0; int64_t bytes = bdrv_getlength(bs); @@ -5402,9 +5416,10 @@ static int qcow2_has_compressed_clusters(BlockDriverState *bs) * Downgrades an image's version. To achieve this, any incompatible features * have to be removed. */ -static int qcow2_downgrade(BlockDriverState *bs, int target_version, - BlockDriverAmendStatusCB *status_cb, void *cb_opaque, - Error **errp) +static int GRAPH_RDLOCK +qcow2_downgrade(BlockDriverState *bs, int target_version, + BlockDriverAmendStatusCB *status_cb, void *cb_opaque, + Error **errp) { BDRVQcow2State *s = bs->opaque; int current_version = s->qcow_version; @@ -5512,9 +5527,10 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version, * features of older versions, some things may have to be presented * differently. */ -static int qcow2_upgrade(BlockDriverState *bs, int target_version, - BlockDriverAmendStatusCB *status_cb, void *cb_opaque, - Error **errp) +static int GRAPH_RDLOCK +qcow2_upgrade(BlockDriverState *bs, int target_version, + BlockDriverAmendStatusCB *status_cb, void *cb_opaque, + Error **errp) { BDRVQcow2State *s = bs->opaque; bool need_snapshot_update; @@ -5640,11 +5656,10 @@ static void qcow2_amend_helper_cb(BlockDriverState *bs, info->original_cb_opaque); } -static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, - BlockDriverAmendStatusCB *status_cb, - void *cb_opaque, - bool force, - Error **errp) +static int GRAPH_RDLOCK +qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, + BlockDriverAmendStatusCB *status_cb, void *cb_opaque, + bool force, Error **errp) { BDRVQcow2State *s = bs->opaque; int old_version = s->qcow_version, new_version = old_version; diff --git a/block/qcow2.h b/block/qcow2.h index f789ce3ae0..29958c512b 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -838,9 +838,10 @@ int qcow2_mark_dirty(BlockDriverState *bs); int qcow2_mark_corrupt(BlockDriverState *bs); int qcow2_update_header(BlockDriverState *bs); -void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset, - int64_t size, const char *message_format, ...) - G_GNUC_PRINTF(5, 6); +void GRAPH_RDLOCK +qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset, + int64_t size, const char *message_format, ...) + G_GNUC_PRINTF(5, 6); int qcow2_validate_table(BlockDriverState *bs, uint64_t offset, uint64_t entries, size_t entry_len, @@ -851,33 +852,41 @@ int qcow2_validate_table(BlockDriverState *bs, uint64_t offset, int coroutine_fn GRAPH_RDLOCK qcow2_refcount_init(BlockDriverState *bs); void qcow2_refcount_close(BlockDriverState *bs); -int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index, - uint64_t *refcount); +int GRAPH_RDLOCK qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index, + uint64_t *refcount); -int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index, - uint64_t addend, bool decrease, - enum qcow2_discard_type type); +int GRAPH_RDLOCK +qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index, + uint64_t addend, bool decrease, + enum qcow2_discard_type type); -int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t offset, - uint64_t additional_clusters, bool exact_size, - int new_refblock_index, - uint64_t new_refblock_offset); +int64_t GRAPH_RDLOCK +qcow2_refcount_area(BlockDriverState *bs, uint64_t offset, + uint64_t additional_clusters, bool exact_size, + int new_refblock_index, + uint64_t new_refblock_offset); -int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size); -int64_t coroutine_fn qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, - int64_t nb_clusters); -int64_t coroutine_fn GRAPH_RDLOCK qcow2_alloc_bytes(BlockDriverState *bs, int size); -void qcow2_free_clusters(BlockDriverState *bs, - int64_t offset, int64_t size, - enum qcow2_discard_type type); -void qcow2_free_any_cluster(BlockDriverState *bs, uint64_t l2_entry, - enum qcow2_discard_type type); +int64_t GRAPH_RDLOCK +qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size); -int qcow2_update_snapshot_refcount(BlockDriverState *bs, - int64_t l1_table_offset, int l1_size, int addend); +int64_t GRAPH_RDLOCK coroutine_fn +qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, + int64_t nb_clusters); -int qcow2_flush_caches(BlockDriverState *bs); -int qcow2_write_caches(BlockDriverState *bs); +int64_t coroutine_fn GRAPH_RDLOCK qcow2_alloc_bytes(BlockDriverState *bs, int size); +void GRAPH_RDLOCK qcow2_free_clusters(BlockDriverState *bs, + int64_t offset, int64_t size, + enum qcow2_discard_type type); +void GRAPH_RDLOCK +qcow2_free_any_cluster(BlockDriverState *bs, uint64_t l2_entry, + enum qcow2_discard_type type); + +int GRAPH_RDLOCK +qcow2_update_snapshot_refcount(BlockDriverState *bs, int64_t l1_table_offset, + int l1_size, int addend); + +int GRAPH_RDLOCK qcow2_flush_caches(BlockDriverState *bs); +int GRAPH_RDLOCK qcow2_write_caches(BlockDriverState *bs); int coroutine_fn qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix); @@ -885,39 +894,48 @@ void qcow2_process_discards(BlockDriverState *bs, int ret); int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset, int64_t size); -int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset, - int64_t size, bool data_file); +int GRAPH_RDLOCK +qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset, + int64_t size, bool data_file); + int coroutine_fn qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res, void **refcount_table, int64_t *refcount_table_size, int64_t offset, int64_t size); -int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order, - BlockDriverAmendStatusCB *status_cb, - void *cb_opaque, Error **errp); +int GRAPH_RDLOCK +qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order, + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque, Error **errp); int coroutine_fn GRAPH_RDLOCK qcow2_shrink_reftable(BlockDriverState *bs); -int64_t coroutine_fn qcow2_get_last_cluster(BlockDriverState *bs, int64_t size); + +int64_t coroutine_fn GRAPH_RDLOCK +qcow2_get_last_cluster(BlockDriverState *bs, int64_t size); int coroutine_fn GRAPH_RDLOCK qcow2_detect_metadata_preallocation(BlockDriverState *bs); /* qcow2-cluster.c functions */ -int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, - bool exact_size); +int GRAPH_RDLOCK +qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, bool exact_size); int coroutine_fn GRAPH_RDLOCK qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t max_size); -int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index); +int GRAPH_RDLOCK qcow2_write_l1_entry(BlockDriverState *bs, int l1_index); int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num, uint8_t *buf, int nb_sectors, bool enc, Error **errp); -int qcow2_get_host_offset(BlockDriverState *bs, uint64_t offset, - unsigned int *bytes, uint64_t *host_offset, - QCow2SubclusterType *subcluster_type); -int coroutine_fn qcow2_alloc_host_offset(BlockDriverState *bs, uint64_t offset, - unsigned int *bytes, - uint64_t *host_offset, QCowL2Meta **m); +int GRAPH_RDLOCK +qcow2_get_host_offset(BlockDriverState *bs, uint64_t offset, + unsigned int *bytes, uint64_t *host_offset, + QCow2SubclusterType *subcluster_type); + +int coroutine_fn GRAPH_RDLOCK +qcow2_alloc_host_offset(BlockDriverState *bs, uint64_t offset, + unsigned int *bytes, uint64_t *host_offset, + QCowL2Meta **m); + int coroutine_fn GRAPH_RDLOCK qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, uint64_t offset, int compressed_size, uint64_t *host_offset); @@ -927,26 +945,33 @@ void qcow2_parse_compressed_l2_entry(BlockDriverState *bs, uint64_t l2_entry, int coroutine_fn GRAPH_RDLOCK qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m); -void coroutine_fn qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m); -int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset, - uint64_t bytes, enum qcow2_discard_type type, - bool full_discard); +void coroutine_fn GRAPH_RDLOCK +qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m); + +int GRAPH_RDLOCK +qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + enum qcow2_discard_type type, bool full_discard); int coroutine_fn GRAPH_RDLOCK qcow2_subcluster_zeroize(BlockDriverState *bs, uint64_t offset, uint64_t bytes, int flags); -int qcow2_expand_zero_clusters(BlockDriverState *bs, - BlockDriverAmendStatusCB *status_cb, - void *cb_opaque); +int GRAPH_RDLOCK +qcow2_expand_zero_clusters(BlockDriverState *bs, + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque); /* qcow2-snapshot.c functions */ -int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info); -int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id); -int qcow2_snapshot_delete(BlockDriverState *bs, - const char *snapshot_id, - const char *name, - Error **errp); +int GRAPH_RDLOCK +qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info); + +int GRAPH_RDLOCK +qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id); + +int GRAPH_RDLOCK +qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id, + const char *name, Error **errp); + int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab); int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_id, @@ -956,15 +981,15 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs, void qcow2_free_snapshots(BlockDriverState *bs); int coroutine_fn GRAPH_RDLOCK qcow2_read_snapshots(BlockDriverState *bs, Error **errp); -int qcow2_write_snapshots(BlockDriverState *bs); +int GRAPH_RDLOCK qcow2_write_snapshots(BlockDriverState *bs); int coroutine_fn GRAPH_RDLOCK qcow2_check_read_snapshot_table(BlockDriverState *bs, BdrvCheckResult *result, BdrvCheckMode fix); -int coroutine_fn qcow2_check_fix_snapshot_table(BlockDriverState *bs, - BdrvCheckResult *result, - BdrvCheckMode fix); +int coroutine_fn GRAPH_RDLOCK +qcow2_check_fix_snapshot_table(BlockDriverState *bs, BdrvCheckResult *result, + BdrvCheckMode fix); /* qcow2-cache.c functions */ Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables, @@ -972,19 +997,23 @@ Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables, int qcow2_cache_destroy(Qcow2Cache *c); void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table); -int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c); -int qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c); -int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c, - Qcow2Cache *dependency); +int GRAPH_RDLOCK qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c); +int GRAPH_RDLOCK qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c); +int GRAPH_RDLOCK qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c, + Qcow2Cache *dependency); void qcow2_cache_depends_on_flush(Qcow2Cache *c); void qcow2_cache_clean_unused(Qcow2Cache *c); -int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c); +int GRAPH_RDLOCK qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c); + +int GRAPH_RDLOCK +qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, + void **table); + +int GRAPH_RDLOCK +qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, + void **table); -int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, - void **table); -int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, - void **table); void qcow2_cache_put(Qcow2Cache *c, void **table); void *qcow2_cache_is_table_offset(Qcow2Cache *c, uint64_t offset); void qcow2_cache_discard(Qcow2Cache *c, void *table); @@ -998,18 +1027,22 @@ bool coroutine_fn GRAPH_RDLOCK qcow2_load_dirty_bitmaps(BlockDriverState *bs, bool *header_updated, Error **errp); bool qcow2_get_bitmap_info_list(BlockDriverState *bs, Qcow2BitmapInfoList **info_list, Error **errp); -int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp); +int GRAPH_RDLOCK qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp); +int GRAPH_RDLOCK qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp); int coroutine_fn qcow2_truncate_bitmaps_check(BlockDriverState *bs, Error **errp); -bool qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, - bool release_stored, Error **errp); -int qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp); -bool coroutine_fn qcow2_co_can_store_new_dirty_bitmap(BlockDriverState *bs, - const char *name, - uint32_t granularity, - Error **errp); -int coroutine_fn qcow2_co_remove_persistent_dirty_bitmap(BlockDriverState *bs, - const char *name, - Error **errp); + +bool GRAPH_RDLOCK +qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, bool release_stored, + Error **errp); + +bool coroutine_fn GRAPH_RDLOCK +qcow2_co_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name, + uint32_t granularity, Error **errp); + +int coroutine_fn GRAPH_RDLOCK +qcow2_co_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name, + Error **errp); + bool qcow2_supports_persistent_dirty_bitmap(BlockDriverState *bs); uint64_t qcow2_get_persistent_dirty_bitmap_size(BlockDriverState *bs, uint32_t cluster_size); diff --git a/block/quorum.c b/block/quorum.c index 05220cab7f..d3ffc2ee33 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -206,7 +206,7 @@ static void quorum_report_bad(QuorumOpType type, uint64_t offset, end_sector - start_sector); } -static void quorum_report_failure(QuorumAIOCB *acb) +static void GRAPH_RDLOCK quorum_report_failure(QuorumAIOCB *acb) { const char *reference = bdrv_get_device_or_node_name(acb->bs); int64_t start_sector = acb->offset / BDRV_SECTOR_SIZE; @@ -219,7 +219,7 @@ static void quorum_report_failure(QuorumAIOCB *acb) static int quorum_vote_error(QuorumAIOCB *acb); -static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb) +static bool GRAPH_RDLOCK quorum_has_too_much_io_failed(QuorumAIOCB *acb) { BDRVQuorumState *s = acb->bs->opaque; diff --git a/block/raw-format.c b/block/raw-format.c index a8bdee5279..8ff03adfa4 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -505,7 +505,9 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, BDRV_REQ_ZERO_WRITE; if (bs->probed && !bdrv_is_read_only(bs)) { + bdrv_graph_rdlock_main_loop(); bdrv_refresh_filename(bs->file->bs); + bdrv_graph_rdunlock_main_loop(); fprintf(stderr, "WARNING: Image format was not specified for '%s' and probing " "guessed raw.\n" diff --git a/block/rbd.c b/block/rbd.c index 472ca05cba..84bb2fa5d7 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -1168,7 +1168,9 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, /* If we are using an rbd snapshot, we must be r/o, otherwise * leave as-is */ if (s->snap != NULL) { + bdrv_graph_rdlock_main_loop(); r = bdrv_apply_auto_read_only(bs, "rbd snapshots are read-only", errp); + bdrv_graph_rdunlock_main_loop(); if (r < 0) { goto failed_post_open; } @@ -1208,6 +1210,8 @@ static int qemu_rbd_reopen_prepare(BDRVReopenState *state, BDRVRBDState *s = state->bs->opaque; int ret = 0; + GRAPH_RDLOCK_GUARD_MAINLOOP(); + if (s->snap && state->flags & BDRV_O_RDWR) { error_setg(errp, "Cannot change node '%s' to r/w when using RBD snapshot", diff --git a/block/replication.c b/block/replication.c index dd166d2d82..d522c7396f 100644 --- a/block/replication.c +++ b/block/replication.c @@ -276,10 +276,10 @@ replication_co_writev(BlockDriverState *bs, int64_t sector_num, while (remaining_sectors > 0) { int64_t count; - ret = bdrv_is_allocated_above(top->bs, base->bs, false, - sector_num * BDRV_SECTOR_SIZE, - remaining_sectors * BDRV_SECTOR_SIZE, - &count); + ret = bdrv_co_is_allocated_above(top->bs, base->bs, false, + sector_num * BDRV_SECTOR_SIZE, + remaining_sectors * BDRV_SECTOR_SIZE, + &count); if (ret < 0) { goto out1; } @@ -307,13 +307,16 @@ out: return ret; } -static void secondary_do_checkpoint(BlockDriverState *bs, Error **errp) +static void GRAPH_UNLOCKED +secondary_do_checkpoint(BlockDriverState *bs, Error **errp) { BDRVReplicationState *s = bs->opaque; BdrvChild *active_disk = bs->file; Error *local_err = NULL; int ret; + GRAPH_RDLOCK_GUARD_MAINLOOP(); + if (!s->backup_job) { error_setg(errp, "Backup job was cancelled unexpectedly"); return; @@ -427,7 +430,8 @@ static void backup_job_completed(void *opaque, int ret) backup_job_cleanup(bs); } -static bool check_top_bs(BlockDriverState *top_bs, BlockDriverState *bs) +static bool GRAPH_RDLOCK +check_top_bs(BlockDriverState *top_bs, BlockDriverState *bs) { BdrvChild *child; @@ -458,6 +462,8 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, Error *local_err = NULL; BackupPerf perf = { .use_copy_range = true, .max_workers = 1 }; + GLOBAL_STATE_CODE(); + aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); s = bs->opaque; @@ -504,12 +510,15 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, return; } + bdrv_graph_rdlock_main_loop(); secondary_disk = hidden_disk->bs->backing; if (!secondary_disk->bs || !bdrv_has_blk(secondary_disk->bs)) { error_setg(errp, "The secondary disk doesn't have block backend"); + bdrv_graph_rdunlock_main_loop(); aio_context_release(aio_context); return; } + bdrv_graph_rdunlock_main_loop(); /* verify the length */ active_length = bdrv_getlength(active_disk->bs); @@ -526,13 +535,16 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, /* Must be true, or the bdrv_getlength() calls would have failed */ assert(active_disk->bs->drv && hidden_disk->bs->drv); + bdrv_graph_rdlock_main_loop(); if (!active_disk->bs->drv->bdrv_make_empty || !hidden_disk->bs->drv->bdrv_make_empty) { error_setg(errp, "Active disk or hidden disk doesn't support make_empty"); aio_context_release(aio_context); + bdrv_graph_rdunlock_main_loop(); return; } + bdrv_graph_rdunlock_main_loop(); /* reopen the backing file in r/w mode */ reopen_backing_file(bs, true, &local_err); @@ -566,8 +578,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, return; } - bdrv_graph_wrunlock(); - /* start backup job now */ error_setg(&s->blocker, "Block device is in use by internal backup job"); @@ -576,6 +586,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, if (!top_bs || !bdrv_is_root_node(top_bs) || !check_top_bs(top_bs, bs)) { error_setg(errp, "No top_bs or it is invalid"); + bdrv_graph_wrunlock(); reopen_backing_file(bs, false, NULL); aio_context_release(aio_context); return; @@ -583,6 +594,8 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, bdrv_op_block_all(top_bs, s->blocker); bdrv_op_unblock(top_bs, BLOCK_OP_TYPE_DATAPLANE, s->blocker); + bdrv_graph_wrunlock(); + s->backup_job = backup_job_create( NULL, s->secondary_disk->bs, s->hidden_disk->bs, 0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, NULL, diff --git a/block/snapshot.c b/block/snapshot.c index b86b5b24ad..6e16eb803a 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -155,11 +155,15 @@ bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs, * back if the given BDS does not support snapshots. * Return NULL if there is no BDS to (safely) fall back to. */ -static BdrvChild *bdrv_snapshot_fallback_child(BlockDriverState *bs) +static BdrvChild * GRAPH_RDLOCK +bdrv_snapshot_fallback_child(BlockDriverState *bs) { BdrvChild *fallback = bdrv_primary_child(bs); BdrvChild *child; + GLOBAL_STATE_CODE(); + assert_bdrv_graph_readable(); + /* We allow fallback only to primary child */ if (!fallback) { return NULL; @@ -182,8 +186,10 @@ static BdrvChild *bdrv_snapshot_fallback_child(BlockDriverState *bs) return fallback; } -static BlockDriverState *bdrv_snapshot_fallback(BlockDriverState *bs) +static BlockDriverState * GRAPH_RDLOCK +bdrv_snapshot_fallback(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); return child_bs(bdrv_snapshot_fallback_child(bs)); } @@ -254,7 +260,10 @@ int bdrv_snapshot_goto(BlockDriverState *bs, return ret; } + bdrv_graph_rdlock_main_loop(); fallback = bdrv_snapshot_fallback_child(bs); + bdrv_graph_rdunlock_main_loop(); + if (fallback) { QDict *options; QDict *file_options; @@ -302,7 +311,10 @@ int bdrv_snapshot_goto(BlockDriverState *bs, * respective option (with the qdict_put_str() call above). * Assert that .bdrv_open() has attached the right BDS as primary child. */ + bdrv_graph_rdlock_main_loop(); assert(bdrv_primary_bs(bs) == fallback_bs); + bdrv_graph_rdunlock_main_loop(); + bdrv_unref(fallback_bs); return ret; } @@ -374,10 +386,12 @@ int bdrv_snapshot_delete(BlockDriverState *bs, int bdrv_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_info) { + GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + BlockDriver *drv = bs->drv; BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs); - GLOBAL_STATE_CODE(); if (!drv) { return -ENOMEDIUM; } @@ -418,6 +432,7 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs, BlockDriver *drv = bs->drv; GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); if (!drv) { error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs)); @@ -462,9 +477,9 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, } -static int bdrv_all_get_snapshot_devices(bool has_devices, strList *devices, - GList **all_bdrvs, - Error **errp) +static int GRAPH_RDLOCK +bdrv_all_get_snapshot_devices(bool has_devices, strList *devices, + GList **all_bdrvs, Error **errp) { g_autoptr(GList) bdrvs = NULL; @@ -496,8 +511,11 @@ static int bdrv_all_get_snapshot_devices(bool has_devices, strList *devices, } -static bool bdrv_all_snapshots_includes_bs(BlockDriverState *bs) +static bool GRAPH_RDLOCK bdrv_all_snapshots_includes_bs(BlockDriverState *bs) { + GLOBAL_STATE_CODE(); + assert_bdrv_graph_readable(); + if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { return false; } @@ -518,6 +536,7 @@ bool bdrv_all_can_snapshot(bool has_devices, strList *devices, GList *iterbdrvs; GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { return false; @@ -554,6 +573,7 @@ int bdrv_all_delete_snapshot(const char *name, GList *iterbdrvs; GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { return -1; @@ -593,10 +613,15 @@ int bdrv_all_goto_snapshot(const char *name, { g_autoptr(GList) bdrvs = NULL; GList *iterbdrvs; + int ret; GLOBAL_STATE_CODE(); - if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { + bdrv_graph_rdlock_main_loop(); + ret = bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp); + bdrv_graph_rdunlock_main_loop(); + + if (ret < 0) { return -1; } @@ -605,15 +630,22 @@ int bdrv_all_goto_snapshot(const char *name, BlockDriverState *bs = iterbdrvs->data; AioContext *ctx = bdrv_get_aio_context(bs); int ret = 0; + bool all_snapshots_includes_bs; aio_context_acquire(ctx); - if (devices || bdrv_all_snapshots_includes_bs(bs)) { + bdrv_graph_rdlock_main_loop(); + all_snapshots_includes_bs = bdrv_all_snapshots_includes_bs(bs); + bdrv_graph_rdunlock_main_loop(); + + if (devices || all_snapshots_includes_bs) { ret = bdrv_snapshot_goto(bs, name, errp); } aio_context_release(ctx); if (ret < 0) { + bdrv_graph_rdlock_main_loop(); error_prepend(errp, "Could not load snapshot '%s' on '%s': ", name, bdrv_get_device_or_node_name(bs)); + bdrv_graph_rdunlock_main_loop(); return -1; } @@ -631,6 +663,7 @@ int bdrv_all_has_snapshot(const char *name, GList *iterbdrvs; GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { return -1; @@ -673,7 +706,9 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, { g_autoptr(GList) bdrvs = NULL; GList *iterbdrvs; + GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { return -1; @@ -715,6 +750,7 @@ BlockDriverState *bdrv_all_find_vmstate_bs(const char *vmstate_bs, GList *iterbdrvs; GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { return NULL; diff --git a/block/stream.c b/block/stream.c index 133cb72fb4..ddaab7dbbd 100644 --- a/block/stream.c +++ b/block/stream.c @@ -172,7 +172,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp) copy = false; WITH_GRAPH_RDLOCK_GUARD() { - ret = bdrv_is_allocated(unfiltered_bs, offset, STREAM_CHUNK, &n); + ret = bdrv_co_is_allocated(unfiltered_bs, offset, STREAM_CHUNK, &n); if (ret == 1) { /* Allocated in the top, no need to copy. */ } else if (ret >= 0) { @@ -180,9 +180,9 @@ static int coroutine_fn stream_run(Job *job, Error **errp) * Copy if allocated in the intermediate images. Limit to the * known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE). */ - ret = bdrv_is_allocated_above(bdrv_cow_bs(unfiltered_bs), - s->base_overlay, true, - offset, n, &n); + ret = bdrv_co_is_allocated_above(bdrv_cow_bs(unfiltered_bs), + s->base_overlay, true, + offset, n, &n); /* Finish early if end of backing file has been reached */ if (ret == 0 && n == 0) { n = len - offset; diff --git a/block/vdi.c b/block/vdi.c index 934e1b849b..3ed43b6f35 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -492,9 +492,12 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags, } /* Disable migration when vdi images are used */ + bdrv_graph_rdlock_main_loop(); error_setg(&s->migration_blocker, "The vdi format used by node '%s' " "does not support live migration", bdrv_get_device_or_node_name(bs)); + bdrv_graph_rdunlock_main_loop(); + ret = migrate_add_blocker(s->migration_blocker, errp); if (ret < 0) { error_free(s->migration_blocker); diff --git a/block/vhdx.c b/block/vhdx.c index a67edcc03e..73cb214fb4 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -1001,11 +1001,15 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, uint64_t signature; Error *local_err = NULL; + GLOBAL_STATE_CODE(); + ret = bdrv_open_file_child(NULL, options, "file", bs, errp); if (ret < 0) { return ret; } + GRAPH_RDLOCK_GUARD_MAINLOOP(); + s->bat = NULL; s->first_visible_write = true; diff --git a/block/vhdx.h b/block/vhdx.h index 455a627a46..85594a5380 100644 --- a/block/vhdx.h +++ b/block/vhdx.h @@ -410,8 +410,9 @@ uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size, bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset); -int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed, - Error **errp); +int GRAPH_RDLOCK +vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed, + Error **errp); int coroutine_fn GRAPH_RDLOCK vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s, diff --git a/block/vmdk.c b/block/vmdk.c index e90649c8bf..8a3b152798 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -578,8 +578,8 @@ static int vmdk_add_extent(BlockDriverState *bs, return 0; } -static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent, - Error **errp) +static int GRAPH_RDLOCK +vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent, Error **errp) { int ret; size_t l1_size; @@ -641,9 +641,9 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent, return ret; } -static int vmdk_open_vmfs_sparse(BlockDriverState *bs, - BdrvChild *file, - int flags, Error **errp) +static int GRAPH_RDLOCK +vmdk_open_vmfs_sparse(BlockDriverState *bs, BdrvChild *file, int flags, + Error **errp) { int ret; uint32_t magic; @@ -797,9 +797,9 @@ static int check_se_sparse_volatile_header(VMDKSESparseVolatileHeader *header, return 0; } -static int vmdk_open_se_sparse(BlockDriverState *bs, - BdrvChild *file, - int flags, Error **errp) +static int GRAPH_RDLOCK +vmdk_open_se_sparse(BlockDriverState *bs, BdrvChild *file, int flags, + Error **errp) { int ret; VMDKSESparseConstHeader const_header; @@ -913,9 +913,9 @@ static char *vmdk_read_desc(BdrvChild *file, uint64_t desc_offset, Error **errp) return buf; } -static int vmdk_open_vmdk4(BlockDriverState *bs, - BdrvChild *file, - int flags, QDict *options, Error **errp) +static int GRAPH_RDLOCK +vmdk_open_vmdk4(BlockDriverState *bs, BdrvChild *file, int flags, + QDict *options, Error **errp) { int ret; uint32_t magic; @@ -1095,8 +1095,9 @@ static int vmdk_parse_description(const char *desc, const char *opt_name, } /* Open an extent file and append to bs array */ -static int vmdk_open_sparse(BlockDriverState *bs, BdrvChild *file, int flags, - char *buf, QDict *options, Error **errp) +static int GRAPH_RDLOCK +vmdk_open_sparse(BlockDriverState *bs, BdrvChild *file, int flags, + char *buf, QDict *options, Error **errp) { uint32_t magic; @@ -1123,8 +1124,9 @@ static const char *next_line(const char *s) return s; } -static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, - QDict *options, Error **errp) +static int GRAPH_RDLOCK +vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, + Error **errp) { int ret; int matches; @@ -1143,6 +1145,8 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, char extent_opt_prefix[32]; Error *local_err = NULL; + GLOBAL_STATE_CODE(); + for (p = desc; *p; p = next_line(p)) { /* parse extent line in one of below formats: * @@ -1223,9 +1227,11 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, ret = vmdk_add_extent(bs, extent_file, true, sectors, 0, 0, 0, 0, 0, &extent, errp); if (ret < 0) { + bdrv_graph_rdunlock_main_loop(); bdrv_graph_wrlock(NULL); bdrv_unref_child(bs, extent_file); bdrv_graph_wrunlock(); + bdrv_graph_rdlock_main_loop(); goto out; } extent->flat_start_offset = flat_offset << 9; @@ -1240,26 +1246,32 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, } g_free(buf); if (ret) { + bdrv_graph_rdunlock_main_loop(); bdrv_graph_wrlock(NULL); bdrv_unref_child(bs, extent_file); bdrv_graph_wrunlock(); + bdrv_graph_rdlock_main_loop(); goto out; } extent = &s->extents[s->num_extents - 1]; } else if (!strcmp(type, "SESPARSE")) { ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp); if (ret) { + bdrv_graph_rdunlock_main_loop(); bdrv_graph_wrlock(NULL); bdrv_unref_child(bs, extent_file); bdrv_graph_wrunlock(); + bdrv_graph_rdlock_main_loop(); goto out; } extent = &s->extents[s->num_extents - 1]; } else { error_setg(errp, "Unsupported extent type '%s'", type); + bdrv_graph_rdunlock_main_loop(); bdrv_graph_wrlock(NULL); bdrv_unref_child(bs, extent_file); bdrv_graph_wrunlock(); + bdrv_graph_rdlock_main_loop(); ret = -ENOTSUP; goto out; } @@ -1283,8 +1295,9 @@ out: return ret; } -static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf, - QDict *options, Error **errp) +static int GRAPH_RDLOCK +vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf, QDict *options, + Error **errp) { int ret; char ct[128]; @@ -2900,7 +2913,7 @@ static int vmdk_has_zero_init(BlockDriverState *bs) return 1; } -static VmdkExtentInfo *vmdk_get_extent_info(VmdkExtent *extent) +static VmdkExtentInfo * GRAPH_RDLOCK vmdk_get_extent_info(VmdkExtent *extent) { VmdkExtentInfo *info = g_new0(VmdkExtentInfo, 1); @@ -2977,8 +2990,8 @@ vmdk_co_check(BlockDriverState *bs, BdrvCheckResult *result, BdrvCheckMode fix) return ret; } -static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs, - Error **errp) +static ImageInfoSpecific * GRAPH_RDLOCK +vmdk_get_specific_info(BlockDriverState *bs, Error **errp) { int i; BDRVVmdkState *s = bs->opaque; diff --git a/block/vpc.c b/block/vpc.c index ceb87dd3d8..945847fe4a 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -446,9 +446,12 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, } /* Disable migration when VHD images are used */ + bdrv_graph_rdlock_main_loop(); error_setg(&s->migration_blocker, "The vpc format used by node '%s' " "does not support live migration", bdrv_get_device_or_node_name(bs)); + bdrv_graph_rdunlock_main_loop(); + ret = migrate_add_blocker(s->migration_blocker, errp); if (ret < 0) { error_free(s->migration_blocker); diff --git a/block/vvfat.c b/block/vvfat.c index 856b479c91..b0415798c0 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -1144,6 +1144,8 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags, QemuOpts *opts; int ret; + GRAPH_RDLOCK_GUARD_MAINLOOP(); + #ifdef DEBUG vvv = s; #endif @@ -1480,8 +1482,8 @@ vvfat_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sector if (s->qcow) { int64_t n; int ret; - ret = bdrv_is_allocated(s->qcow->bs, sector_num * BDRV_SECTOR_SIZE, - (nb_sectors - i) * BDRV_SECTOR_SIZE, &n); + ret = bdrv_co_is_allocated(s->qcow->bs, sector_num * BDRV_SECTOR_SIZE, + (nb_sectors - i) * BDRV_SECTOR_SIZE, &n); if (ret < 0) { return ret; } @@ -1806,10 +1808,10 @@ cluster_was_modified(BDRVVVFATState *s, uint32_t cluster_num) } for (i = 0; !was_modified && i < s->sectors_per_cluster; i++) { - was_modified = bdrv_is_allocated(s->qcow->bs, - (cluster2sector(s, cluster_num) + - i) * BDRV_SECTOR_SIZE, - BDRV_SECTOR_SIZE, NULL); + was_modified = bdrv_co_is_allocated(s->qcow->bs, + (cluster2sector(s, cluster_num) + + i) * BDRV_SECTOR_SIZE, + BDRV_SECTOR_SIZE, NULL); } /* @@ -1967,9 +1969,9 @@ get_cluster_count_for_direntry(BDRVVVFATState* s, direntry_t* direntry, const ch for (i = 0; i < s->sectors_per_cluster; i++) { int res; - res = bdrv_is_allocated(s->qcow->bs, - (offs + i) * BDRV_SECTOR_SIZE, - BDRV_SECTOR_SIZE, NULL); + res = bdrv_co_is_allocated(s->qcow->bs, + (offs + i) * BDRV_SECTOR_SIZE, + BDRV_SECTOR_SIZE, NULL); if (res < 0) { return -1; } diff --git a/blockdev.c b/blockdev.c index 325b7a3bef..a01c62596b 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1041,6 +1041,8 @@ static BlockDriverState *qmp_get_root_bs(const char *name, Error **errp) BlockDriverState *bs; AioContext *aio_context; + GRAPH_RDLOCK_GUARD_MAINLOOP(); + bs = bdrv_lookup_bs(name, name, errp); if (bs == NULL) { return NULL; @@ -1136,6 +1138,9 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device, SnapshotInfo *info = NULL; int ret; + GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + bs = qmp_get_root_bs(device, errp); if (!bs) { return NULL; @@ -1221,6 +1226,9 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal, AioContext *aio_context; int ret1; + GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + tran_add(tran, &internal_snapshot_drv, state); device = internal->device; @@ -1309,6 +1317,9 @@ static void internal_snapshot_abort(void *opaque) AioContext *aio_context; Error *local_error = NULL; + GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + if (!state->created) { return; } @@ -1654,6 +1665,8 @@ static void drive_backup_action(DriveBackup *backup, bool set_backing_hd = false; int ret; + GLOBAL_STATE_CODE(); + tran_add(tran, &drive_backup_drv, state); if (!backup->has_mode) { @@ -1683,9 +1696,12 @@ static void drive_backup_action(DriveBackup *backup, } /* Early check to avoid creating target */ + bdrv_graph_rdlock_main_loop(); if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { + bdrv_graph_rdunlock_main_loop(); goto out; } + bdrv_graph_rdunlock_main_loop(); flags = bs->open_flags | BDRV_O_RDWR; @@ -1724,7 +1740,10 @@ static void drive_backup_action(DriveBackup *backup, BlockDriverState *explicit_backing = bdrv_skip_implicit_filters(source); + bdrv_graph_rdlock_main_loop(); bdrv_refresh_filename(explicit_backing); + bdrv_graph_rdunlock_main_loop(); + bdrv_img_create(backup->target, format, explicit_backing->filename, explicit_backing->drv->format_name, NULL, @@ -2344,10 +2363,13 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name, return; } + bdrv_graph_co_rdlock(); if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_RESIZE, NULL)) { error_setg(errp, QERR_DEVICE_IN_USE, device); + bdrv_graph_co_rdunlock(); return; } + bdrv_graph_co_rdunlock(); blk = blk_co_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp); if (!blk) { @@ -2387,6 +2409,8 @@ void qmp_block_stream(const char *job_id, const char *device, Error *local_err = NULL; int job_flags = JOB_DEFAULT; + GLOBAL_STATE_CODE(); + if (base && base_node) { error_setg(errp, "'base' and 'base-node' cannot be specified " "at the same time"); @@ -2437,7 +2461,10 @@ void qmp_block_stream(const char *job_id, const char *device, goto out; } assert(bdrv_get_aio_context(base_bs) == aio_context); + + bdrv_graph_rdlock_main_loop(); bdrv_refresh_filename(base_bs); + bdrv_graph_rdunlock_main_loop(); } if (bottom) { @@ -2466,13 +2493,16 @@ void qmp_block_stream(const char *job_id, const char *device, * Check for op blockers in the whole chain between bs and base (or bottom) */ iter_end = bottom ? bdrv_filter_or_cow_bs(bottom_bs) : base_bs; + bdrv_graph_rdlock_main_loop(); for (iter = bs; iter && iter != iter_end; iter = bdrv_filter_or_cow_bs(iter)) { if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_STREAM, errp)) { + bdrv_graph_rdunlock_main_loop(); goto out; } } + bdrv_graph_rdunlock_main_loop(); /* if we are streaming the entire chain, the result will have no backing * file, and specifying one is therefore an error */ @@ -2835,6 +2865,8 @@ BlockDeviceInfoList *qmp_query_named_block_nodes(bool has_flat, XDbgBlockGraph *qmp_x_debug_query_block_graph(Error **errp) { + GRAPH_RDLOCK_GUARD_MAINLOOP(); + return bdrv_get_xdbg_block_graph(errp); } @@ -2998,9 +3030,12 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) } /* Early check to avoid creating target */ + bdrv_graph_rdlock_main_loop(); if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_MIRROR_SOURCE, errp)) { + bdrv_graph_rdunlock_main_loop(); return; } + bdrv_graph_rdunlock_main_loop(); aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); @@ -3063,7 +3098,10 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) break; case NEW_IMAGE_MODE_ABSOLUTE_PATHS: /* create new image with backing file */ + bdrv_graph_rdlock_main_loop(); bdrv_refresh_filename(explicit_backing); + bdrv_graph_rdunlock_main_loop(); + bdrv_img_create(arg->target, format, explicit_backing->filename, explicit_backing->drv->format_name, @@ -3383,9 +3421,12 @@ void qmp_change_backing_file(const char *device, /* even though we are not necessarily operating on bs, we need it to * determine if block ops are currently prohibited on the chain */ + bdrv_graph_rdlock_main_loop(); if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_CHANGE, errp)) { + bdrv_graph_rdunlock_main_loop(); goto out; } + bdrv_graph_rdunlock_main_loop(); /* final sanity check */ if (!bdrv_chain_contains(bs, image_bs)) { @@ -3509,6 +3550,7 @@ void qmp_blockdev_del(const char *node_name, Error **errp) BlockDriverState *bs; GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); bs = bdrv_find_node(node_name); if (!bs) { @@ -3636,6 +3678,8 @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread, AioContext *new_context; BlockDriverState *bs; + GRAPH_RDLOCK_GUARD_MAINLOOP(); + bs = bdrv_find_node(node_name); if (!bs) { error_setg(errp, "Failed to find node with node-name='%s'", node_name); diff --git a/blockjob.c b/blockjob.c index 58c5d64539..807f992b59 100644 --- a/blockjob.c +++ b/blockjob.c @@ -485,6 +485,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, BlockJob *job; int ret; GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); if (job_id == NULL && !(flags & JOB_INTERNAL)) { job_id = bdrv_get_device_name(bs); diff --git a/contrib/vhost-user-gpu/vhost-user-gpu.c b/contrib/vhost-user-gpu/vhost-user-gpu.c index aa304475a0..bb41758e34 100644 --- a/contrib/vhost-user-gpu/vhost-user-gpu.c +++ b/contrib/vhost-user-gpu/vhost-user-gpu.c @@ -834,7 +834,7 @@ vg_resource_flush(VuGpu *g, .width = width, .height = height, }; - pixman_image_t *i = + pixman_image_t *img = pixman_image_create_bits(pixman_image_get_format(res->image), msg->payload.update.width, msg->payload.update.height, @@ -842,11 +842,11 @@ vg_resource_flush(VuGpu *g, payload.update.data), width * bpp); pixman_image_composite(PIXMAN_OP_SRC, - res->image, NULL, i, + res->image, NULL, img, extents->x1, extents->y1, 0, 0, 0, 0, width, height); - pixman_image_unref(i); + pixman_image_unref(img); vg_send_msg(g, msg, -1); g_free(msg); } diff --git a/contrib/vhost-user-gpu/vugpu.h b/contrib/vhost-user-gpu/vugpu.h index 509b679f03..654c392fbb 100644 --- a/contrib/vhost-user-gpu/vugpu.h +++ b/contrib/vhost-user-gpu/vugpu.h @@ -164,12 +164,12 @@ struct virtio_gpu_ctrl_command { }; #define VUGPU_FILL_CMD(out) do { \ - size_t s; \ - s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, 0, \ + size_t vugpufillcmd_s_ = \ + iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, 0, \ &out, sizeof(out)); \ - if (s != sizeof(out)) { \ + if (vugpufillcmd_s_ != sizeof(out)) { \ g_critical("%s: command size incorrect %zu vs %zu", \ - __func__, s, sizeof(out)); \ + __func__, vugpufillcmd_s_, sizeof(out)); \ return; \ } \ } while (0) diff --git a/disas/riscv.c b/disas/riscv.c index 3873a69157..8e89e1d115 100644 --- a/disas/riscv.c +++ b/disas/riscv.c @@ -2116,8 +2116,8 @@ static const char *csr_name(int csrno) case 0x03ba: return "pmpaddr10"; case 0x03bb: return "pmpaddr11"; case 0x03bc: return "pmpaddr12"; - case 0x03bd: return "pmpaddr14"; - case 0x03be: return "pmpaddr13"; + case 0x03bd: return "pmpaddr13"; + case 0x03be: return "pmpaddr14"; case 0x03bf: return "pmpaddr15"; case 0x0780: return "mtohost"; case 0x0781: return "mfromhost"; diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index ffd0a8c896..2febd2d12f 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -327,6 +327,42 @@ QEMU's ``vhost`` feature, which would eliminate the high latency costs under which the 9p ``proxy`` backend currently suffers. However as of to date nobody has indicated plans for such kind of reimplementation unfortunately. +RISC-V 'any' CPU type ``-cpu any`` (since 8.2) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The 'any' CPU type was introduced back in 2018 and has been around since the +initial RISC-V QEMU port. Its usage has always been unclear: users don't know +what to expect from a CPU called 'any', and in fact the CPU does not do anything +special that isn't already done by the default CPUs rv32/rv64. + +After the introduction of the 'max' CPU type, RISC-V now has a good coverage +of generic CPUs: rv32 and rv64 as default CPUs and 'max' as a feature complete +CPU for both 32 and 64 bit builds. Users are then discouraged to use the 'any' +CPU type starting in 8.2. + +RISC-V CPU properties which start with capital 'Z' (since 8.2) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +All RISC-V CPU properties which start with capital 'Z' are being deprecated +starting in 8.2. The reason is that they were wrongly added with capital 'Z' +in the past. CPU properties were later added with lower-case names, which +is the format we want to use from now on. + +Users which try to use these deprecated properties will receive a warning +recommending to switch to their stable counterparts: + +- "Zifencei" should be replaced with "zifencei" +- "Zicsr" should be replaced with "zicsr" +- "Zihintntl" should be replaced with "zihintntl" +- "Zihintpause" should be replaced with "zihintpause" +- "Zawrs" should be replaced with "zawrs" +- "Zfa" should be replaced with "zfa" +- "Zfh" should be replaced with "zfh" +- "Zfhmin" should be replaced with "zfhmin" +- "Zve32f" should be replaced with "zve32f" +- "Zve64f" should be replaced with "zve64f" +- "Zve64d" should be replaced with "zve64d" + ``-device pvrdma`` and the rdma subsystem (since 8.2) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/system/i386/amd-memory-encryption.rst b/docs/system/i386/amd-memory-encryption.rst index dcf4add0e7..e9bc142bc1 100644 --- a/docs/system/i386/amd-memory-encryption.rst +++ b/docs/system/i386/amd-memory-encryption.rst @@ -183,13 +183,13 @@ References ---------- `AMD Memory Encryption whitepaper -<https://developer.amd.com/wordpress/media/2013/12/AMD_Memory_Encryption_Whitepaper_v7-Public.pdf>`_ +<https://www.amd.com/content/dam/amd/en/documents/epyc-business-docs/white-papers/memory-encryption-white-paper.pdf>`_ .. [SEVAPI] `Secure Encrypted Virtualization API <https://www.amd.com/system/files/TechDocs/55766_SEV-KM_API_Specification.pdf>`_ .. [APMVOL2] `AMD64 Architecture Programmer's Manual Volume 2: System Programming - <https://www.amd.com/system/files/TechDocs/24593.pdf>`_ + <https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/24593.pdf>`_ KVM Forum slides: @@ -199,7 +199,7 @@ KVM Forum slides: <https://www.linux-kvm.org/images/9/94/Extending-Secure-Encrypted-Virtualization-with-SEV-ES-Thomas-Lendacky-AMD.pdf>`_ `AMD64 Architecture Programmer's Manual: -<http://support.amd.com/TechDocs/24593.pdf>`_ +<https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/24593.pdf>`_ * SME is section 7.10 * SEV is section 15.34 diff --git a/gdbstub/gdbstub.c b/gdbstub/gdbstub.c index b1532118d1..1e96a71c0c 100644 --- a/gdbstub/gdbstub.c +++ b/gdbstub/gdbstub.c @@ -1324,7 +1324,7 @@ static void handle_v_kill(GArray *params, void *user_ctx) gdb_put_packet("OK"); error_report("QEMU: Terminated via GDBstub"); gdb_exit(0); - exit(0); + gdb_qemu_exit(0); } static const GdbCmdParseEntry gdb_v_commands_table[] = { @@ -1843,7 +1843,8 @@ static int gdb_handle_packet(const char *line_buf) /* Kill the target */ error_report("QEMU: Terminated via GDBstub"); gdb_exit(0); - exit(0); + gdb_qemu_exit(0); + break; case 'D': { static const GdbCmdParseEntry detach_cmd_desc = { diff --git a/gdbstub/system.c b/gdbstub/system.c index 48976873d2..783ac140b9 100644 --- a/gdbstub/system.c +++ b/gdbstub/system.c @@ -435,6 +435,12 @@ void gdb_exit(int code) qemu_chr_fe_deinit(&gdbserver_system_state.chr, true); } +void gdb_qemu_exit(int code) +{ + qemu_system_shutdown_request_with_code(SHUTDOWN_CAUSE_GUEST_SHUTDOWN, + code); +} + /* * Memory access */ diff --git a/gdbstub/user.c b/gdbstub/user.c index 7ab6e5d975..dbe1d9b887 100644 --- a/gdbstub/user.c +++ b/gdbstub/user.c @@ -113,6 +113,12 @@ void gdb_exit(int code) gdb_put_packet(buf); gdbserver_state.allow_stop_reply = false; } + +} + +void gdb_qemu_exit(int code) +{ + exit(code); } int gdb_handlesig(CPUState *cpu, int sig) diff --git a/hw/char/riscv_htif.c b/hw/char/riscv_htif.c index 40de6b8b77..9bef60def1 100644 --- a/hw/char/riscv_htif.c +++ b/hw/char/riscv_htif.c @@ -32,6 +32,7 @@ #include "exec/address-spaces.h" #include "exec/tswap.h" #include "sysemu/dma.h" +#include "sysemu/runstate.h" #define RISCV_DEBUG_HTIF 0 #define HTIF_DEBUG(fmt, ...) \ @@ -206,7 +207,9 @@ static void htif_handle_tohost_write(HTIFState *s, uint64_t val_written) g_free(sig_data); } - exit(exit_code); + qemu_system_shutdown_request_with_code( + SHUTDOWN_CAUSE_GUEST_SHUTDOWN, exit_code); + return; } else { uint64_t syscall[8]; cpu_physical_memory_read(payload, syscall, sizeof(syscall)); diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c index 99aae8ccbe..c677b5cfbb 100644 --- a/hw/intc/riscv_aplic.c +++ b/hw/intc/riscv_aplic.c @@ -32,7 +32,7 @@ #include "target/riscv/cpu.h" #include "sysemu/sysemu.h" #include "sysemu/kvm.h" -#include "kvm_riscv.h" +#include "kvm/kvm_riscv.h" #include "migration/vmstate.h" #define APLIC_MAX_IDC (1UL << 14) diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c index 667d56bd29..ae38f48f16 100644 --- a/hw/mem/memory-device.c +++ b/hw/mem/memory-device.c @@ -52,19 +52,135 @@ static int memory_device_build_list(Object *obj, void *opaque) return 0; } -static void memory_device_check_addable(MachineState *ms, MemoryRegion *mr, - Error **errp) +static unsigned int memory_device_get_memslots(MemoryDeviceState *md) { + const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md); + + if (mdc->get_memslots) { + return mdc->get_memslots(md); + } + return 1; +} + +/* + * Memslots that are reserved by memory devices (required but still reported + * as free from KVM / vhost). + */ +static unsigned int get_reserved_memslots(MachineState *ms) +{ + if (ms->device_memory->used_memslots > + ms->device_memory->required_memslots) { + /* This is unexpected, and we warned already in the memory notifier. */ + return 0; + } + return ms->device_memory->required_memslots - + ms->device_memory->used_memslots; +} + +unsigned int memory_devices_get_reserved_memslots(void) +{ + if (!current_machine->device_memory) { + return 0; + } + return get_reserved_memslots(current_machine); +} + +bool memory_devices_memslot_auto_decision_active(void) +{ + if (!current_machine->device_memory) { + return false; + } + + return current_machine->device_memory->memslot_auto_decision_active; +} + +static unsigned int memory_device_memslot_decision_limit(MachineState *ms, + MemoryRegion *mr) +{ + const unsigned int reserved = get_reserved_memslots(ms); + const uint64_t size = memory_region_size(mr); + unsigned int max = vhost_get_max_memslots(); + unsigned int free = vhost_get_free_memslots(); + uint64_t available_space; + unsigned int memslots; + + if (kvm_enabled()) { + max = MIN(max, kvm_get_max_memslots()); + free = MIN(free, kvm_get_free_memslots()); + } + + /* + * If we only have less overall memslots than what we consider reasonable, + * just keep it to a minimum. + */ + if (max < MEMORY_DEVICES_SAFE_MAX_MEMSLOTS) { + return 1; + } + + /* + * Consider our soft-limit across all memory devices. We don't really + * expect to exceed this limit in reasonable configurations. + */ + if (MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT <= + ms->device_memory->required_memslots) { + return 1; + } + memslots = MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT - + ms->device_memory->required_memslots; + + /* + * Consider the actually still free memslots. This is only relevant if + * other memslot consumers would consume *significantly* more memslots than + * what we prepared for (> 253). Unlikely, but let's just handle it + * cleanly. + */ + memslots = MIN(memslots, free - reserved); + if (memslots < 1 || unlikely(free < reserved)) { + return 1; + } + + /* We cannot have any other memory devices? So give all to this device. */ + if (size == ms->maxram_size - ms->ram_size) { + return memslots; + } + + /* + * Simple heuristic: equally distribute the memslots over the space + * still available for memory devices. + */ + available_space = ms->maxram_size - ms->ram_size - + ms->device_memory->used_region_size; + memslots = (double)memslots * size / available_space; + return memslots < 1 ? 1 : memslots; +} + +static void memory_device_check_addable(MachineState *ms, MemoryDeviceState *md, + MemoryRegion *mr, Error **errp) +{ + const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md); const uint64_t used_region_size = ms->device_memory->used_region_size; const uint64_t size = memory_region_size(mr); + const unsigned int reserved_memslots = get_reserved_memslots(ms); + unsigned int required_memslots, memslot_limit; + + /* + * Instruct the device to decide how many memslots to use, if applicable, + * before we query the number of required memslots the first time. + */ + if (mdc->decide_memslots) { + memslot_limit = memory_device_memslot_decision_limit(ms, mr); + mdc->decide_memslots(md, memslot_limit); + } + required_memslots = memory_device_get_memslots(md); - /* we will need a new memory slot for kvm and vhost */ - if (kvm_enabled() && !kvm_has_free_slot(ms)) { - error_setg(errp, "hypervisor has no free memory slots left"); + /* we will need memory slots for kvm and vhost */ + if (kvm_enabled() && + kvm_get_free_memslots() < required_memslots + reserved_memslots) { + error_setg(errp, "hypervisor has not enough free memory slots left"); return; } - if (!vhost_has_free_slot()) { - error_setg(errp, "a used vhost backend has no free memory slots left"); + if (vhost_get_free_memslots() < required_memslots + reserved_memslots) { + error_setg(errp, "a used vhost backend has not enough free memory slots left"); return; } @@ -233,7 +349,7 @@ void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms, goto out; } - memory_device_check_addable(ms, mr, &local_err); + memory_device_check_addable(ms, md, mr, &local_err); if (local_err) { goto out; } @@ -264,6 +380,7 @@ out: void memory_device_plug(MemoryDeviceState *md, MachineState *ms) { const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md); + const unsigned int memslots = memory_device_get_memslots(md); const uint64_t addr = mdc->get_addr(md); MemoryRegion *mr; @@ -275,6 +392,11 @@ void memory_device_plug(MemoryDeviceState *md, MachineState *ms) g_assert(ms->device_memory); ms->device_memory->used_region_size += memory_region_size(mr); + ms->device_memory->required_memslots += memslots; + if (mdc->decide_memslots && memslots > 1) { + ms->device_memory->memslot_auto_decision_active++; + } + memory_region_add_subregion(&ms->device_memory->mr, addr - ms->device_memory->base, mr); trace_memory_device_plug(DEVICE(md)->id ? DEVICE(md)->id : "", addr); @@ -283,6 +405,7 @@ void memory_device_plug(MemoryDeviceState *md, MachineState *ms) void memory_device_unplug(MemoryDeviceState *md, MachineState *ms) { const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md); + const unsigned int memslots = memory_device_get_memslots(md); MemoryRegion *mr; /* @@ -293,7 +416,12 @@ void memory_device_unplug(MemoryDeviceState *md, MachineState *ms) g_assert(ms->device_memory); memory_region_del_subregion(&ms->device_memory->mr, mr); + + if (mdc->decide_memslots && memslots > 1) { + ms->device_memory->memslot_auto_decision_active--; + } ms->device_memory->used_region_size -= memory_region_size(mr); + ms->device_memory->required_memslots -= memslots; trace_memory_device_unplug(DEVICE(md)->id ? DEVICE(md)->id : "", mdc->get_addr(md)); } @@ -313,6 +441,50 @@ uint64_t memory_device_get_region_size(const MemoryDeviceState *md, return memory_region_size(mr); } +static void memory_devices_region_mod(MemoryListener *listener, + MemoryRegionSection *mrs, bool add) +{ + DeviceMemoryState *dms = container_of(listener, DeviceMemoryState, + listener); + + if (!memory_region_is_ram(mrs->mr)) { + warn_report("Unexpected memory region mapped into device memory region."); + return; + } + + /* + * The expectation is that each distinct RAM memory region section in + * our region for memory devices consumes exactly one memslot in KVM + * and in vhost. For vhost, this is true, except: + * * ROM memory regions don't consume a memslot. These get used very + * rarely for memory devices (R/O NVDIMMs). + * * Memslots without a fd (memory-backend-ram) don't necessarily + * consume a memslot. Such setups are quite rare and possibly bogus: + * the memory would be inaccessible by such vhost devices. + * + * So for vhost, in corner cases we might over-estimate the number of + * memslots that are currently used or that might still be reserved + * (required - used). + */ + dms->used_memslots += add ? 1 : -1; + + if (dms->used_memslots > dms->required_memslots) { + warn_report("Memory devices use more memory slots than indicated as required."); + } +} + +static void memory_devices_region_add(MemoryListener *listener, + MemoryRegionSection *mrs) +{ + return memory_devices_region_mod(listener, mrs, true); +} + +static void memory_devices_region_del(MemoryListener *listener, + MemoryRegionSection *mrs) +{ + return memory_devices_region_mod(listener, mrs, false); +} + void machine_memory_devices_init(MachineState *ms, hwaddr base, uint64_t size) { g_assert(size); @@ -322,8 +494,16 @@ void machine_memory_devices_init(MachineState *ms, hwaddr base, uint64_t size) memory_region_init(&ms->device_memory->mr, OBJECT(ms), "device-memory", size); + address_space_init(&ms->device_memory->as, &ms->device_memory->mr, + "device-memory"); memory_region_add_subregion(get_system_memory(), ms->device_memory->base, &ms->device_memory->mr); + + /* Track the number of memslots used by memory devices. */ + ms->device_memory->listener.region_add = memory_devices_region_add; + ms->device_memory->listener.region_del = memory_devices_region_del; + memory_listener_register(&ms->device_memory->listener, + &ms->device_memory->as); } static const TypeInfo memory_device_info = { diff --git a/hw/misc/sifive_test.c b/hw/misc/sifive_test.c index 56df45bfe5..ad688079c4 100644 --- a/hw/misc/sifive_test.c +++ b/hw/misc/sifive_test.c @@ -25,6 +25,7 @@ #include "qemu/module.h" #include "sysemu/runstate.h" #include "hw/misc/sifive_test.h" +#include "sysemu/sysemu.h" static uint64_t sifive_test_read(void *opaque, hwaddr addr, unsigned int size) { @@ -39,9 +40,13 @@ static void sifive_test_write(void *opaque, hwaddr addr, int code = (val64 >> 16) & 0xffff; switch (status) { case FINISHER_FAIL: - exit(code); + qemu_system_shutdown_request_with_code( + SHUTDOWN_CAUSE_GUEST_PANIC, code); + return; case FINISHER_PASS: - exit(0); + qemu_system_shutdown_request_with_code( + SHUTDOWN_CAUSE_GUEST_SHUTDOWN, code); + return; case FINISHER_RESET: qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); return; diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index 5edc1d98d2..9de578c756 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -35,7 +35,7 @@ #include "hw/riscv/virt.h" #include "hw/riscv/boot.h" #include "hw/riscv/numa.h" -#include "kvm_riscv.h" +#include "kvm/kvm_riscv.h" #include "hw/intc/riscv_aclint.h" #include "hw/intc/riscv_aplic.h" #include "hw/intc/riscv_imsic.h" diff --git a/hw/virtio/vhost-stub.c b/hw/virtio/vhost-stub.c index aa858ef3fb..52d42adab2 100644 --- a/hw/virtio/vhost-stub.c +++ b/hw/virtio/vhost-stub.c @@ -2,9 +2,14 @@ #include "hw/virtio/vhost.h" #include "hw/virtio/vhost-user.h" -bool vhost_has_free_slot(void) +unsigned int vhost_get_max_memslots(void) { - return true; + return UINT_MAX; +} + +unsigned int vhost_get_free_memslots(void) +{ + return UINT_MAX; } bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 3766b415f8..68eb1f0c99 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -2327,19 +2327,6 @@ static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) return -ENOTSUP; } -static bool vhost_user_can_merge(struct vhost_dev *dev, - uint64_t start1, uint64_t size1, - uint64_t start2, uint64_t size2) -{ - ram_addr_t offset; - int mfd, rfd; - - (void)vhost_user_get_mr_data(start1, &offset, &mfd); - (void)vhost_user_get_mr_data(start2, &offset, &rfd); - - return mfd == rfd; -} - static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) { VhostUserMsg msg; @@ -2622,10 +2609,9 @@ vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id) return 0; } -static bool vhost_user_mem_section_filter(struct vhost_dev *dev, - MemoryRegionSection *section) +static bool vhost_user_no_private_memslots(struct vhost_dev *dev) { - return memory_region_get_fd(section->mr) >= 0; + return true; } static int vhost_user_get_inflight_fd(struct vhost_dev *dev, @@ -2868,6 +2854,7 @@ const VhostOps user_ops = { .vhost_backend_init = vhost_user_backend_init, .vhost_backend_cleanup = vhost_user_backend_cleanup, .vhost_backend_memslots_limit = vhost_user_memslots_limit, + .vhost_backend_no_private_memslots = vhost_user_no_private_memslots, .vhost_set_log_base = vhost_user_set_log_base, .vhost_set_mem_table = vhost_user_set_mem_table, .vhost_set_vring_addr = vhost_user_set_vring_addr, @@ -2886,7 +2873,6 @@ const VhostOps user_ops = { .vhost_set_vring_enable = vhost_user_set_vring_enable, .vhost_requires_shm_log = vhost_user_requires_shm_log, .vhost_migration_done = vhost_user_migration_done, - .vhost_backend_can_merge = vhost_user_can_merge, .vhost_net_set_mtu = vhost_user_net_set_mtu, .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, @@ -2894,7 +2880,6 @@ const VhostOps user_ops = { .vhost_set_config = vhost_user_set_config, .vhost_crypto_create_session = vhost_user_crypto_create_session, .vhost_crypto_close_session = vhost_user_crypto_close_session, - .vhost_backend_mem_section_filter = vhost_user_mem_section_filter, .vhost_get_inflight_fd = vhost_user_get_inflight_fd, .vhost_set_inflight_fd = vhost_user_set_inflight_fd, .vhost_dev_start = vhost_user_dev_start, diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 89ff02a999..819b2d811a 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -1512,7 +1512,6 @@ const VhostOps vdpa_ops = { .vhost_set_config = vhost_vdpa_set_config, .vhost_requires_shm_log = NULL, .vhost_migration_done = NULL, - .vhost_backend_can_merge = NULL, .vhost_net_set_mtu = NULL, .vhost_set_iotlb_callback = NULL, .vhost_send_device_iotlb_msg = NULL, diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 9cfac40fde..9f37206ba0 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -23,6 +23,7 @@ #include "qemu/log.h" #include "standard-headers/linux/vhost_types.h" #include "hw/virtio/virtio-bus.h" +#include "hw/mem/memory-device.h" #include "migration/blocker.h" #include "migration/qemu-file-types.h" #include "sysemu/dma.h" @@ -45,20 +46,44 @@ static struct vhost_log *vhost_log; static struct vhost_log *vhost_log_shm; +/* Memslots used by backends that support private memslots (without an fd). */ static unsigned int used_memslots; + +/* Memslots used by backends that only support shared memslots (with an fd). */ +static unsigned int used_shared_memslots; + static QLIST_HEAD(, vhost_dev) vhost_devices = QLIST_HEAD_INITIALIZER(vhost_devices); -bool vhost_has_free_slot(void) +unsigned int vhost_get_max_memslots(void) +{ + unsigned int max = UINT_MAX; + struct vhost_dev *hdev; + + QLIST_FOREACH(hdev, &vhost_devices, entry) { + max = MIN(max, hdev->vhost_ops->vhost_backend_memslots_limit(hdev)); + } + return max; +} + +unsigned int vhost_get_free_memslots(void) { - unsigned int slots_limit = ~0U; + unsigned int free = UINT_MAX; struct vhost_dev *hdev; QLIST_FOREACH(hdev, &vhost_devices, entry) { unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev); - slots_limit = MIN(slots_limit, r); + unsigned int cur_free; + + if (hdev->vhost_ops->vhost_backend_no_private_memslots && + hdev->vhost_ops->vhost_backend_no_private_memslots(hdev)) { + cur_free = r - used_shared_memslots; + } else { + cur_free = r - used_memslots; + } + free = MIN(free, cur_free); } - return slots_limit > used_memslots; + return free; } static void vhost_dev_sync_region(struct vhost_dev *dev, @@ -474,8 +499,7 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev, * vhost_section: identify sections needed for vhost access * * We only care about RAM sections here (where virtqueue and guest - * internals accessed by virtio might live). If we find one we still - * allow the backend to potentially filter it out of our list. + * internals accessed by virtio might live). */ static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section) { @@ -502,8 +526,16 @@ static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section) return false; } - if (dev->vhost_ops->vhost_backend_mem_section_filter && - !dev->vhost_ops->vhost_backend_mem_section_filter(dev, section)) { + /* + * Some backends (like vhost-user) can only handle memory regions + * that have an fd (can be mapped into a different process). Filter + * the ones without an fd out, if requested. + * + * TODO: we might have to limit to MAP_SHARED as well. + */ + if (memory_region_get_fd(section->mr) < 0 && + dev->vhost_ops->vhost_backend_no_private_memslots && + dev->vhost_ops->vhost_backend_no_private_memslots(dev)) { trace_vhost_reject_section(mr->name, 2); return false; } @@ -568,7 +600,14 @@ static void vhost_commit(MemoryListener *listener) dev->n_mem_sections * sizeof dev->mem->regions[0]; dev->mem = g_realloc(dev->mem, regions_size); dev->mem->nregions = dev->n_mem_sections; - used_memslots = dev->mem->nregions; + + if (dev->vhost_ops->vhost_backend_no_private_memslots && + dev->vhost_ops->vhost_backend_no_private_memslots(dev)) { + used_shared_memslots = dev->mem->nregions; + } else { + used_memslots = dev->mem->nregions; + } + for (i = 0; i < dev->n_mem_sections; i++) { struct vhost_memory_region *cur_vmr = dev->mem->regions + i; struct MemoryRegionSection *mrs = dev->mem_sections + i; @@ -668,7 +707,7 @@ static void vhost_region_add_section(struct vhost_dev *dev, mrs_size, mrs_host); } - if (dev->n_tmp_sections) { + if (dev->n_tmp_sections && !section->unmergeable) { /* Since we already have at least one section, lets see if * this extends it; since we're scanning in order, we only * have to look at the last one, and the FlatView that calls @@ -701,11 +740,7 @@ static void vhost_region_add_section(struct vhost_dev *dev, size_t offset = mrs_gpa - prev_gpa_start; if (prev_host_start + offset == mrs_host && - section->mr == prev_sec->mr && - (!dev->vhost_ops->vhost_backend_can_merge || - dev->vhost_ops->vhost_backend_can_merge(dev, - mrs_host, mrs_size, - prev_host_start, prev_size))) { + section->mr == prev_sec->mr && !prev_sec->unmergeable) { uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size); need_add = false; prev_sec->offset_within_address_space = @@ -1400,6 +1435,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, VhostBackendType backend_type, uint32_t busyloop_timeout, Error **errp) { + unsigned int used, reserved, limit; uint64_t features; int i, r, n_initialized_vqs = 0; @@ -1426,6 +1462,19 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, goto fail; } + limit = hdev->vhost_ops->vhost_backend_memslots_limit(hdev); + if (limit < MEMORY_DEVICES_SAFE_MAX_MEMSLOTS && + memory_devices_memslot_auto_decision_active()) { + error_setg(errp, "some memory device (like virtio-mem)" + " decided how many memory slots to use based on the overall" + " number of memory slots; this vhost backend would further" + " restricts the overall number of memory slots"); + error_append_hint(errp, "Try plugging this vhost backend before" + " plugging such memory devices.\n"); + r = -EINVAL; + goto fail; + } + for (i = 0; i < hdev->nvqs; ++i, ++n_initialized_vqs) { r = vhost_virtqueue_init(hdev, hdev->vqs + i, hdev->vq_index + i); if (r < 0) { @@ -1495,9 +1544,27 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, memory_listener_register(&hdev->memory_listener, &address_space_memory); QLIST_INSERT_HEAD(&vhost_devices, hdev, entry); - if (used_memslots > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) { - error_setg(errp, "vhost backend memory slots limit is less" - " than current number of present memory slots"); + /* + * The listener we registered properly updated the corresponding counter. + * So we can trust that these values are accurate. + */ + if (hdev->vhost_ops->vhost_backend_no_private_memslots && + hdev->vhost_ops->vhost_backend_no_private_memslots(hdev)) { + used = used_shared_memslots; + } else { + used = used_memslots; + } + /* + * We assume that all reserved memslots actually require a real memslot + * in our vhost backend. This might not be true, for example, if the + * memslot would be ROM. If ever relevant, we can optimize for that -- + * but we'll need additional information about the reservations. + */ + reserved = memory_devices_get_reserved_memslots(); + if (used + reserved > limit) { + error_setg(errp, "vhost backend memory slots limit (%d) is less" + " than current number of used (%d) and reserved (%d)" + " memory slots for memory devices.", limit, used, reserved); r = -EINVAL; goto fail_busyloop; } diff --git a/hw/virtio/virtio-mem-pci.c b/hw/virtio/virtio-mem-pci.c index c4597e029e..1b4e9a3284 100644 --- a/hw/virtio/virtio-mem-pci.c +++ b/hw/virtio/virtio-mem-pci.c @@ -48,6 +48,25 @@ static MemoryRegion *virtio_mem_pci_get_memory_region(MemoryDeviceState *md, return vmc->get_memory_region(vmem, errp); } +static void virtio_mem_pci_decide_memslots(MemoryDeviceState *md, + unsigned int limit) +{ + VirtIOMEMPCI *pci_mem = VIRTIO_MEM_PCI(md); + VirtIOMEM *vmem = VIRTIO_MEM(&pci_mem->vdev); + VirtIOMEMClass *vmc = VIRTIO_MEM_GET_CLASS(vmem); + + vmc->decide_memslots(vmem, limit); +} + +static unsigned int virtio_mem_pci_get_memslots(MemoryDeviceState *md) +{ + VirtIOMEMPCI *pci_mem = VIRTIO_MEM_PCI(md); + VirtIOMEM *vmem = VIRTIO_MEM(&pci_mem->vdev); + VirtIOMEMClass *vmc = VIRTIO_MEM_GET_CLASS(vmem); + + return vmc->get_memslots(vmem); +} + static uint64_t virtio_mem_pci_get_plugged_size(const MemoryDeviceState *md, Error **errp) { @@ -150,6 +169,8 @@ static void virtio_mem_pci_class_init(ObjectClass *klass, void *data) mdc->set_addr = virtio_mem_pci_set_addr; mdc->get_plugged_size = virtio_mem_pci_get_plugged_size; mdc->get_memory_region = virtio_mem_pci_get_memory_region; + mdc->decide_memslots = virtio_mem_pci_decide_memslots; + mdc->get_memslots = virtio_mem_pci_get_memslots; mdc->fill_device_info = virtio_mem_pci_fill_device_info; mdc->get_min_alignment = virtio_mem_pci_get_min_alignment; diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index da5b09cefc..9dc3c61b5a 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -67,6 +67,13 @@ static uint32_t virtio_mem_default_thp_size(void) } /* + * The minimum memslot size depends on this setting ("sane default"), the + * device block size, and the memory backend page size. The last (or single) + * memslot might be smaller than this constant. + */ +#define VIRTIO_MEM_MIN_MEMSLOT_SIZE (1 * GiB) + +/* * We want to have a reasonable default block size such that * 1. We avoid splitting THPs when unplugging memory, which degrades * performance. @@ -177,10 +184,10 @@ static bool virtio_mem_is_busy(void) return migration_in_incoming_postcopy() || !migration_is_idle(); } -typedef int (*virtio_mem_range_cb)(const VirtIOMEM *vmem, void *arg, +typedef int (*virtio_mem_range_cb)(VirtIOMEM *vmem, void *arg, uint64_t offset, uint64_t size); -static int virtio_mem_for_each_unplugged_range(const VirtIOMEM *vmem, void *arg, +static int virtio_mem_for_each_unplugged_range(VirtIOMEM *vmem, void *arg, virtio_mem_range_cb cb) { unsigned long first_zero_bit, last_zero_bit; @@ -204,7 +211,7 @@ static int virtio_mem_for_each_unplugged_range(const VirtIOMEM *vmem, void *arg, return ret; } -static int virtio_mem_for_each_plugged_range(const VirtIOMEM *vmem, void *arg, +static int virtio_mem_for_each_plugged_range(VirtIOMEM *vmem, void *arg, virtio_mem_range_cb cb) { unsigned long first_bit, last_bit; @@ -483,6 +490,96 @@ static bool virtio_mem_valid_range(const VirtIOMEM *vmem, uint64_t gpa, return true; } +static void virtio_mem_activate_memslot(VirtIOMEM *vmem, unsigned int idx) +{ + const uint64_t memslot_offset = idx * vmem->memslot_size; + + assert(vmem->memslots); + + /* + * Instead of enabling/disabling memslots, we add/remove them. This should + * make address space updates faster, because we don't have to loop over + * many disabled subregions. + */ + if (memory_region_is_mapped(&vmem->memslots[idx])) { + return; + } + memory_region_add_subregion(vmem->mr, memslot_offset, &vmem->memslots[idx]); +} + +static void virtio_mem_deactivate_memslot(VirtIOMEM *vmem, unsigned int idx) +{ + assert(vmem->memslots); + + if (!memory_region_is_mapped(&vmem->memslots[idx])) { + return; + } + memory_region_del_subregion(vmem->mr, &vmem->memslots[idx]); +} + +static void virtio_mem_activate_memslots_to_plug(VirtIOMEM *vmem, + uint64_t offset, uint64_t size) +{ + const unsigned int start_idx = offset / vmem->memslot_size; + const unsigned int end_idx = (offset + size + vmem->memslot_size - 1) / + vmem->memslot_size; + unsigned int idx; + + if (!vmem->dynamic_memslots) { + return; + } + + /* Activate all involved memslots in a single transaction. */ + memory_region_transaction_begin(); + for (idx = start_idx; idx < end_idx; idx++) { + virtio_mem_activate_memslot(vmem, idx); + } + memory_region_transaction_commit(); +} + +static void virtio_mem_deactivate_unplugged_memslots(VirtIOMEM *vmem, + uint64_t offset, + uint64_t size) +{ + const uint64_t region_size = memory_region_size(&vmem->memdev->mr); + const unsigned int start_idx = offset / vmem->memslot_size; + const unsigned int end_idx = (offset + size + vmem->memslot_size - 1) / + vmem->memslot_size; + unsigned int idx; + + if (!vmem->dynamic_memslots) { + return; + } + + /* Deactivate all memslots with unplugged blocks in a single transaction. */ + memory_region_transaction_begin(); + for (idx = start_idx; idx < end_idx; idx++) { + const uint64_t memslot_offset = idx * vmem->memslot_size; + uint64_t memslot_size = vmem->memslot_size; + + /* The size of the last memslot might be smaller. */ + if (idx == vmem->nb_memslots - 1) { + memslot_size = region_size - memslot_offset; + } + + /* + * Partially covered memslots might still have some blocks plugged and + * have to remain active if that's the case. + */ + if (offset > memslot_offset || + offset + size < memslot_offset + memslot_size) { + const uint64_t gpa = vmem->addr + memslot_offset; + + if (!virtio_mem_is_range_unplugged(vmem, gpa, memslot_size)) { + continue; + } + } + + virtio_mem_deactivate_memslot(vmem, idx); + } + memory_region_transaction_commit(); +} + static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa, uint64_t size, bool plug) { @@ -500,6 +597,8 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa, } virtio_mem_notify_unplug(vmem, offset, size); virtio_mem_set_range_unplugged(vmem, start_gpa, size); + /* Deactivate completely unplugged memslots after updating the state. */ + virtio_mem_deactivate_unplugged_memslots(vmem, offset, size); return 0; } @@ -527,7 +626,20 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa, } if (!ret) { + /* + * Activate before notifying and rollback in case of any errors. + * + * When activating a yet inactive memslot, memory notifiers will get + * notified about the added memory region and can register with the + * RamDiscardManager; this will traverse all plugged blocks and skip the + * blocks we are plugging here. The following notification will inform + * registered listeners about the blocks we're plugging. + */ + virtio_mem_activate_memslots_to_plug(vmem, offset, size); ret = virtio_mem_notify_plug(vmem, offset, size); + if (ret) { + virtio_mem_deactivate_unplugged_memslots(vmem, offset, size); + } } if (ret) { /* Could be preallocation or a notifier populated memory. */ @@ -620,6 +732,7 @@ static void virtio_mem_resize_usable_region(VirtIOMEM *vmem, static int virtio_mem_unplug_all(VirtIOMEM *vmem) { + const uint64_t region_size = memory_region_size(&vmem->memdev->mr); RAMBlock *rb = vmem->memdev->mr.ram_block; if (vmem->size) { @@ -634,6 +747,9 @@ static int virtio_mem_unplug_all(VirtIOMEM *vmem) bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size); vmem->size = 0; notifier_list_notify(&vmem->size_change_notifiers, &vmem->size); + + /* Deactivate all memslots after updating the state. */ + virtio_mem_deactivate_unplugged_memslots(vmem, 0, region_size); } trace_virtio_mem_unplugged_all(); @@ -790,6 +906,49 @@ static void virtio_mem_system_reset(void *opaque) virtio_mem_unplug_all(vmem); } +static void virtio_mem_prepare_mr(VirtIOMEM *vmem) +{ + const uint64_t region_size = memory_region_size(&vmem->memdev->mr); + + assert(!vmem->mr && vmem->dynamic_memslots); + vmem->mr = g_new0(MemoryRegion, 1); + memory_region_init(vmem->mr, OBJECT(vmem), "virtio-mem", + region_size); + vmem->mr->align = memory_region_get_alignment(&vmem->memdev->mr); +} + +static void virtio_mem_prepare_memslots(VirtIOMEM *vmem) +{ + const uint64_t region_size = memory_region_size(&vmem->memdev->mr); + unsigned int idx; + + g_assert(!vmem->memslots && vmem->nb_memslots && vmem->dynamic_memslots); + vmem->memslots = g_new0(MemoryRegion, vmem->nb_memslots); + + /* Initialize our memslots, but don't map them yet. */ + for (idx = 0; idx < vmem->nb_memslots; idx++) { + const uint64_t memslot_offset = idx * vmem->memslot_size; + uint64_t memslot_size = vmem->memslot_size; + char name[20]; + + /* The size of the last memslot might be smaller. */ + if (idx == vmem->nb_memslots - 1) { + memslot_size = region_size - memslot_offset; + } + + snprintf(name, sizeof(name), "memslot-%u", idx); + memory_region_init_alias(&vmem->memslots[idx], OBJECT(vmem), name, + &vmem->memdev->mr, memslot_offset, + memslot_size); + /* + * We want to be able to atomically and efficiently activate/deactivate + * individual memslots without affecting adjacent memslots in memory + * notifiers. + */ + memory_region_set_unmergeable(&vmem->memslots[idx], true); + } +} + static void virtio_mem_device_realize(DeviceState *dev, Error **errp) { MachineState *ms = MACHINE(qdev_get_machine()); @@ -861,6 +1020,14 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) vmem->unplugged_inaccessible = ON_OFF_AUTO_ON; #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */ + if (vmem->dynamic_memslots && + vmem->unplugged_inaccessible != ON_OFF_AUTO_ON) { + error_setg(errp, "'%s' property set to 'on' requires '%s' to be 'on'", + VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP, + VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP); + return; + } + /* * If the block size wasn't configured by the user, use a sane default. This * allows using hugetlbfs backends of any page size without manual @@ -930,6 +1097,25 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) virtio_init(vdev, VIRTIO_ID_MEM, sizeof(struct virtio_mem_config)); vmem->vq = virtio_add_queue(vdev, 128, virtio_mem_handle_request); + /* + * With "dynamic-memslots=off" (old behavior) we always map the whole + * RAM memory region directly. + */ + if (vmem->dynamic_memslots) { + if (!vmem->mr) { + virtio_mem_prepare_mr(vmem); + } + if (vmem->nb_memslots <= 1) { + vmem->nb_memslots = 1; + vmem->memslot_size = memory_region_size(&vmem->memdev->mr); + } + if (!vmem->memslots) { + virtio_mem_prepare_memslots(vmem); + } + } else { + assert(!vmem->mr && !vmem->nb_memslots && !vmem->memslots); + } + host_memory_backend_set_mapped(vmem->memdev, true); vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem)); if (vmem->early_migration) { @@ -969,7 +1155,7 @@ static void virtio_mem_device_unrealize(DeviceState *dev) ram_block_coordinated_discard_require(false); } -static int virtio_mem_discard_range_cb(const VirtIOMEM *vmem, void *arg, +static int virtio_mem_discard_range_cb(VirtIOMEM *vmem, void *arg, uint64_t offset, uint64_t size) { RAMBlock *rb = vmem->memdev->mr.ram_block; @@ -984,13 +1170,32 @@ static int virtio_mem_restore_unplugged(VirtIOMEM *vmem) virtio_mem_discard_range_cb); } -static int virtio_mem_post_load(void *opaque, int version_id) +static int virtio_mem_activate_memslot_range_cb(VirtIOMEM *vmem, void *arg, + uint64_t offset, uint64_t size) +{ + virtio_mem_activate_memslots_to_plug(vmem, offset, size); + return 0; +} + +static int virtio_mem_post_load_bitmap(VirtIOMEM *vmem) { - VirtIOMEM *vmem = VIRTIO_MEM(opaque); RamDiscardListener *rdl; int ret; /* + * We restored the bitmap and updated the requested size; activate all + * memslots (so listeners register) before notifying about plugged blocks. + */ + if (vmem->dynamic_memslots) { + /* + * We don't expect any active memslots at this point to deactivate: no + * memory was plugged on the migration destination. + */ + virtio_mem_for_each_plugged_range(vmem, NULL, + virtio_mem_activate_memslot_range_cb); + } + + /* * We started out with all memory discarded and our memory region is mapped * into an address space. Replay, now that we updated the bitmap. */ @@ -1001,6 +1206,20 @@ static int virtio_mem_post_load(void *opaque, int version_id) return ret; } } + return 0; +} + +static int virtio_mem_post_load(void *opaque, int version_id) +{ + VirtIOMEM *vmem = VIRTIO_MEM(opaque); + int ret; + + if (!vmem->early_migration) { + ret = virtio_mem_post_load_bitmap(vmem); + if (ret) { + return ret; + } + } /* * If shared RAM is migrated using the file content and not using QEMU, @@ -1021,7 +1240,7 @@ static int virtio_mem_post_load(void *opaque, int version_id) return virtio_mem_restore_unplugged(vmem); } -static int virtio_mem_prealloc_range_cb(const VirtIOMEM *vmem, void *arg, +static int virtio_mem_prealloc_range_cb(VirtIOMEM *vmem, void *arg, uint64_t offset, uint64_t size) { void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset; @@ -1043,7 +1262,7 @@ static int virtio_mem_post_load_early(void *opaque, int version_id) int ret; if (!vmem->prealloc) { - return 0; + goto post_load_bitmap; } /* @@ -1051,7 +1270,7 @@ static int virtio_mem_post_load_early(void *opaque, int version_id) * don't mess with preallocation and postcopy. */ if (migrate_ram_is_ignored(rb)) { - return 0; + goto post_load_bitmap; } /* @@ -1084,7 +1303,10 @@ static int virtio_mem_post_load_early(void *opaque, int version_id) return -EBUSY; } } - return 0; + +post_load_bitmap: + /* Finally, update any other state to be consistent with the new bitmap. */ + return virtio_mem_post_load_bitmap(vmem); } typedef struct VirtIOMEMMigSanityChecks { @@ -1235,11 +1457,79 @@ static MemoryRegion *virtio_mem_get_memory_region(VirtIOMEM *vmem, Error **errp) if (!vmem->memdev) { error_setg(errp, "'%s' property must be set", VIRTIO_MEM_MEMDEV_PROP); return NULL; + } else if (vmem->dynamic_memslots) { + if (!vmem->mr) { + virtio_mem_prepare_mr(vmem); + } + return vmem->mr; } return &vmem->memdev->mr; } +static void virtio_mem_decide_memslots(VirtIOMEM *vmem, unsigned int limit) +{ + uint64_t region_size, memslot_size, min_memslot_size; + unsigned int memslots; + RAMBlock *rb; + + if (!vmem->dynamic_memslots) { + return; + } + + /* We're called exactly once, before realizing the device. */ + assert(!vmem->nb_memslots); + + /* If realizing the device will fail, just assume a single memslot. */ + if (limit <= 1 || !vmem->memdev || !vmem->memdev->mr.ram_block) { + vmem->nb_memslots = 1; + return; + } + + rb = vmem->memdev->mr.ram_block; + region_size = memory_region_size(&vmem->memdev->mr); + + /* + * Determine the default block size now, to determine the minimum memslot + * size. We want the minimum slot size to be at least the device block size. + */ + if (!vmem->block_size) { + vmem->block_size = virtio_mem_default_block_size(rb); + } + /* If realizing the device will fail, just assume a single memslot. */ + if (vmem->block_size < qemu_ram_pagesize(rb) || + !QEMU_IS_ALIGNED(region_size, vmem->block_size)) { + vmem->nb_memslots = 1; + return; + } + + /* + * All memslots except the last one have a reasonable minimum size, and + * and all memslot sizes are aligned to the device block size. + */ + memslot_size = QEMU_ALIGN_UP(region_size / limit, vmem->block_size); + min_memslot_size = MAX(vmem->block_size, VIRTIO_MEM_MIN_MEMSLOT_SIZE); + memslot_size = MAX(memslot_size, min_memslot_size); + + memslots = QEMU_ALIGN_UP(region_size, memslot_size) / memslot_size; + if (memslots != 1) { + vmem->memslot_size = memslot_size; + } + vmem->nb_memslots = memslots; +} + +static unsigned int virtio_mem_get_memslots(VirtIOMEM *vmem) +{ + if (!vmem->dynamic_memslots) { + /* Exactly one static RAM memory region. */ + return 1; + } + + /* We're called after instructed to make a decision. */ + g_assert(vmem->nb_memslots); + return vmem->nb_memslots; +} + static void virtio_mem_add_size_change_notifier(VirtIOMEM *vmem, Notifier *notifier) { @@ -1377,6 +1667,21 @@ static void virtio_mem_instance_init(Object *obj) NULL, NULL); } +static void virtio_mem_instance_finalize(Object *obj) +{ + VirtIOMEM *vmem = VIRTIO_MEM(obj); + + /* + * Note: the core already dropped the references on all memory regions + * (it's passed as the owner to memory_region_init_*()) and finalized + * these objects. We can simply free the memory. + */ + g_free(vmem->memslots); + vmem->memslots = NULL; + g_free(vmem->mr); + vmem->mr = NULL; +} + static Property virtio_mem_properties[] = { DEFINE_PROP_UINT64(VIRTIO_MEM_ADDR_PROP, VirtIOMEM, addr, 0), DEFINE_PROP_UINT32(VIRTIO_MEM_NODE_PROP, VirtIOMEM, node, 0), @@ -1389,6 +1694,8 @@ static Property virtio_mem_properties[] = { #endif DEFINE_PROP_BOOL(VIRTIO_MEM_EARLY_MIGRATION_PROP, VirtIOMEM, early_migration, true), + DEFINE_PROP_BOOL(VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP, VirtIOMEM, + dynamic_memslots, false), DEFINE_PROP_END_OF_LIST(), }; @@ -1556,6 +1863,8 @@ static void virtio_mem_class_init(ObjectClass *klass, void *data) vmc->fill_device_info = virtio_mem_fill_device_info; vmc->get_memory_region = virtio_mem_get_memory_region; + vmc->decide_memslots = virtio_mem_decide_memslots; + vmc->get_memslots = virtio_mem_get_memslots; vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier; vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier; vmc->unplug_request_check = virtio_mem_unplug_request_check; @@ -1573,6 +1882,7 @@ static const TypeInfo virtio_mem_info = { .parent = TYPE_VIRTIO_DEVICE, .instance_size = sizeof(VirtIOMEM), .instance_init = virtio_mem_instance_init, + .instance_finalize = virtio_mem_instance_finalize, .class_init = virtio_mem_class_init, .class_size = sizeof(VirtIOMEMClass), .interfaces = (InterfaceInfo[]) { diff --git a/include/block/block-common.h b/include/block/block-common.h index 2d2af7230d..d7599564db 100644 --- a/include/block/block-common.h +++ b/include/block/block-common.h @@ -66,13 +66,16 @@ * function. The coroutine yields after scheduling the BH and is reentered when * the wrapped function returns. * - * A no_co_wrapper_bdrv_wrlock function is a no_co_wrapper function that - * automatically takes the graph wrlock when calling the wrapped function. + * A no_co_wrapper_bdrv_rdlock function is a no_co_wrapper function that + * automatically takes the graph rdlock when calling the wrapped function. In + * the same way, no_co_wrapper_bdrv_wrlock functions automatically take the + * graph wrlock. * * If the first parameter of the function is a BlockDriverState, BdrvChild or * BlockBackend pointer, the AioContext lock for it is taken in the wrapper. */ #define no_co_wrapper +#define no_co_wrapper_bdrv_rdlock #define no_co_wrapper_bdrv_wrlock #include "block/blockjob.h" diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h index 6061220a6c..6bfafe781d 100644 --- a/include/block/block-global-state.h +++ b/include/block/block-global-state.h @@ -132,13 +132,13 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, Error **errp); BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, const char *backing_file); -void bdrv_refresh_filename(BlockDriverState *bs); +void GRAPH_RDLOCK bdrv_refresh_filename(BlockDriverState *bs); void GRAPH_RDLOCK bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp); int bdrv_commit(BlockDriverState *bs); -int bdrv_make_empty(BdrvChild *c, Error **errp); +int GRAPH_RDLOCK bdrv_make_empty(BdrvChild *c, Error **errp); int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, const char *backing_fmt, bool warn); void bdrv_register(BlockDriver *bdrv); @@ -160,19 +160,20 @@ void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base); */ typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset, int64_t total_work_size, void *opaque); -int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts, - BlockDriverAmendStatusCB *status_cb, void *cb_opaque, - bool force, - Error **errp); +int GRAPH_RDLOCK +bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts, + BlockDriverAmendStatusCB *status_cb, void *cb_opaque, + bool force, Error **errp); /* check if a named node can be replaced when doing drive-mirror */ BlockDriverState * GRAPH_RDLOCK check_to_replace_node(BlockDriverState *parent_bs, const char *node_name, Error **errp); -int no_coroutine_fn bdrv_activate(BlockDriverState *bs, Error **errp); +int no_coroutine_fn GRAPH_RDLOCK +bdrv_activate(BlockDriverState *bs, Error **errp); -int coroutine_fn no_co_wrapper +int coroutine_fn no_co_wrapper_bdrv_rdlock bdrv_co_activate(BlockDriverState *bs, Error **errp); void bdrv_activate_all(Error **errp); @@ -191,7 +192,7 @@ int bdrv_has_zero_init_1(BlockDriverState *bs); int bdrv_has_zero_init(BlockDriverState *bs); BlockDriverState *bdrv_find_node(const char *node_name); BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, Error **errp); -XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp); +XDbgBlockGraph * GRAPH_RDLOCK bdrv_get_xdbg_block_graph(Error **errp); BlockDriverState *bdrv_lookup_bs(const char *device, const char *node_name, Error **errp); @@ -208,15 +209,18 @@ typedef struct BdrvNextIterator { BlockDriverState *bs; } BdrvNextIterator; -BlockDriverState *bdrv_first(BdrvNextIterator *it); -BlockDriverState *bdrv_next(BdrvNextIterator *it); +BlockDriverState * GRAPH_RDLOCK bdrv_first(BdrvNextIterator *it); +BlockDriverState * GRAPH_RDLOCK bdrv_next(BdrvNextIterator *it); void bdrv_next_cleanup(BdrvNextIterator *it); BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs); void bdrv_iterate_format(void (*it)(void *opaque, const char *name), void *opaque, bool read_only); -char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp); -char *bdrv_dirname(BlockDriverState *bs, Error **errp); + +char * GRAPH_RDLOCK +bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp); + +char * GRAPH_RDLOCK bdrv_dirname(BlockDriverState *bs, Error **errp); void bdrv_img_create(const char *filename, const char *fmt, const char *base_filename, const char *base_fmt, @@ -242,7 +246,9 @@ bdrv_attach_child(BlockDriverState *parent_bs, BdrvChildRole child_role, Error **errp); -bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp); +bool GRAPH_RDLOCK +bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp); + void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason); void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason); void bdrv_op_block_all(BlockDriverState *bs, Error *reason); diff --git a/include/block/block-io.h b/include/block/block-io.h index f1c796a1ce..ad270b6ad2 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -127,37 +127,46 @@ int coroutine_fn GRAPH_RDLOCK bdrv_co_zone_append(BlockDriverState *bs, BdrvRequestFlags flags); bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs); -int bdrv_block_status(BlockDriverState *bs, int64_t offset, - int64_t bytes, int64_t *pnum, int64_t *map, - BlockDriverState **file); + +int coroutine_fn GRAPH_RDLOCK +bdrv_co_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, BlockDriverState **file); +int co_wrapper_mixed_bdrv_rdlock +bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, BlockDriverState **file); int coroutine_fn GRAPH_RDLOCK bdrv_co_block_status_above(BlockDriverState *bs, BlockDriverState *base, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file); -int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base, - int64_t offset, int64_t bytes, int64_t *pnum, - int64_t *map, BlockDriverState **file); +int co_wrapper_mixed_bdrv_rdlock +bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base, + int64_t offset, int64_t bytes, int64_t *pnum, + int64_t *map, BlockDriverState **file); int coroutine_fn GRAPH_RDLOCK bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes, int64_t *pnum); -int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes, - int64_t *pnum); +int co_wrapper_mixed_bdrv_rdlock +bdrv_is_allocated(BlockDriverState *bs, int64_t offset, + int64_t bytes, int64_t *pnum); int coroutine_fn GRAPH_RDLOCK bdrv_co_is_allocated_above(BlockDriverState *top, BlockDriverState *base, bool include_base, int64_t offset, int64_t bytes, int64_t *pnum); -int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, - bool include_base, int64_t offset, int64_t bytes, - int64_t *pnum); +int co_wrapper_mixed_bdrv_rdlock +bdrv_is_allocated_above(BlockDriverState *bs, BlockDriverState *base, + bool include_base, int64_t offset, + int64_t bytes, int64_t *pnum); int coroutine_fn GRAPH_RDLOCK bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, int64_t bytes); -int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, - Error **errp); +int GRAPH_RDLOCK +bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg, + Error **errp); + bool bdrv_is_read_only(BlockDriverState *bs); bool bdrv_is_writable(BlockDriverState *bs); bool bdrv_is_sg(BlockDriverState *bs); @@ -176,8 +185,12 @@ const char *bdrv_get_format_name(BlockDriverState *bs); bool bdrv_supports_compressed_writes(BlockDriverState *bs); const char *bdrv_get_node_name(const BlockDriverState *bs); -const char *bdrv_get_device_name(const BlockDriverState *bs); -const char *bdrv_get_device_or_node_name(const BlockDriverState *bs); + +const char * GRAPH_RDLOCK +bdrv_get_device_name(const BlockDriverState *bs); + +const char * GRAPH_RDLOCK +bdrv_get_device_or_node_name(const BlockDriverState *bs); int coroutine_fn GRAPH_RDLOCK bdrv_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); @@ -185,8 +198,9 @@ bdrv_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); int co_wrapper_mixed_bdrv_rdlock bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); -ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs, - Error **errp); +ImageInfoSpecific * GRAPH_RDLOCK +bdrv_get_specific_info(BlockDriverState *bs, Error **errp); + BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs); void bdrv_round_to_subclusters(BlockDriverState *bs, int64_t offset, int64_t bytes, @@ -363,7 +377,7 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); * * Begin a quiesced section for the parent of @c. */ -void bdrv_parent_drained_begin_single(BdrvChild *c); +void GRAPH_RDLOCK bdrv_parent_drained_begin_single(BdrvChild *c); /** * bdrv_parent_drained_poll_single: @@ -371,14 +385,14 @@ void bdrv_parent_drained_begin_single(BdrvChild *c); * Returns true if there is any pending activity to cease before @c can be * called quiesced, false otherwise. */ -bool bdrv_parent_drained_poll_single(BdrvChild *c); +bool GRAPH_RDLOCK bdrv_parent_drained_poll_single(BdrvChild *c); /** * bdrv_parent_drained_end_single: * * End a quiesced section for the parent of @c. */ -void bdrv_parent_drained_end_single(BdrvChild *c); +void GRAPH_RDLOCK bdrv_parent_drained_end_single(BdrvChild *c); /** * bdrv_drain_poll: @@ -391,8 +405,9 @@ void bdrv_parent_drained_end_single(BdrvChild *c); * * This is part of bdrv_drained_begin. */ -bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, - bool ignore_bds_parents); +bool GRAPH_RDLOCK +bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, + bool ignore_bds_parents); /** * bdrv_drained_begin: @@ -400,6 +415,12 @@ bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, * Begin a quiesced section for exclusive access to the BDS, by disabling * external request sources including NBD server, block jobs, and device model. * + * This function can only be invoked by the main loop or a coroutine + * (regardless of the AioContext where it is running). + * If the coroutine is running in an Iothread AioContext, this function will + * just schedule a BH to run in the main loop. + * However, it cannot be directly called by an Iothread. + * * This function can be recursive. */ void bdrv_drained_begin(BlockDriverState *bs); @@ -416,6 +437,12 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent); * bdrv_drained_end: * * End a quiescent section started by bdrv_drained_begin(). + * + * This function can only be invoked by the main loop or a coroutine + * (regardless of the AioContext where it is running). + * If the coroutine is running in an Iothread AioContext, this function will + * just schedule a BH to run in the main loop. + * However, it cannot be directly called by an Iothread. */ void bdrv_drained_end(BlockDriverState *bs); diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 2ca3758cb8..b8d9d24f39 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -235,11 +235,14 @@ struct BlockDriver { Error **errp); /* For handling image reopen for split or non-split files. */ - int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state, - BlockReopenQueue *queue, Error **errp); - void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state); - void (*bdrv_reopen_commit_post)(BDRVReopenState *reopen_state); - void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state); + int GRAPH_UNLOCKED_PTR (*bdrv_reopen_prepare)( + BDRVReopenState *reopen_state, BlockReopenQueue *queue, Error **errp); + void GRAPH_UNLOCKED_PTR (*bdrv_reopen_commit)( + BDRVReopenState *reopen_state); + void GRAPH_UNLOCKED_PTR (*bdrv_reopen_commit_post)( + BDRVReopenState *reopen_state); + void GRAPH_UNLOCKED_PTR (*bdrv_reopen_abort)( + BDRVReopenState *reopen_state); void (*bdrv_join_options)(QDict *options, QDict *old_options); int GRAPH_UNLOCKED_PTR (*bdrv_open)( @@ -256,20 +259,18 @@ struct BlockDriver { int coroutine_fn GRAPH_UNLOCKED_PTR (*bdrv_co_create_opts)( BlockDriver *drv, const char *filename, QemuOpts *opts, Error **errp); - int (*bdrv_amend_options)(BlockDriverState *bs, - QemuOpts *opts, - BlockDriverAmendStatusCB *status_cb, - void *cb_opaque, - bool force, - Error **errp); + int GRAPH_RDLOCK_PTR (*bdrv_amend_options)( + BlockDriverState *bs, QemuOpts *opts, + BlockDriverAmendStatusCB *status_cb, void *cb_opaque, + bool force, Error **errp); - int (*bdrv_make_empty)(BlockDriverState *bs); + int GRAPH_RDLOCK_PTR (*bdrv_make_empty)(BlockDriverState *bs); /* * Refreshes the bs->exact_filename field. If that is impossible, * bs->exact_filename has to be left empty. */ - void (*bdrv_refresh_filename)(BlockDriverState *bs); + void GRAPH_RDLOCK_PTR (*bdrv_refresh_filename)(BlockDriverState *bs); /* * Gathers the open options for all children into @target. @@ -292,15 +293,15 @@ struct BlockDriver { * block driver which implements it is probably doing something * shady regarding its runtime option structure. */ - void (*bdrv_gather_child_options)(BlockDriverState *bs, QDict *target, - bool backing_overridden); + void GRAPH_RDLOCK_PTR (*bdrv_gather_child_options)( + BlockDriverState *bs, QDict *target, bool backing_overridden); /* * Returns an allocated string which is the directory name of this BDS: It * will be used to make relative filenames absolute by prepending this * function's return value to them. */ - char *(*bdrv_dirname)(BlockDriverState *bs, Error **errp); + char * GRAPH_RDLOCK_PTR (*bdrv_dirname)(BlockDriverState *bs, Error **errp); /* * This informs the driver that we are no longer interested in the result @@ -313,14 +314,16 @@ struct BlockDriver { int GRAPH_RDLOCK_PTR (*bdrv_inactivate)(BlockDriverState *bs); - int (*bdrv_snapshot_create)(BlockDriverState *bs, - QEMUSnapshotInfo *sn_info); - int (*bdrv_snapshot_goto)(BlockDriverState *bs, - const char *snapshot_id); - int (*bdrv_snapshot_delete)(BlockDriverState *bs, - const char *snapshot_id, - const char *name, - Error **errp); + int GRAPH_RDLOCK_PTR (*bdrv_snapshot_create)( + BlockDriverState *bs, QEMUSnapshotInfo *sn_info); + + int GRAPH_UNLOCKED_PTR (*bdrv_snapshot_goto)( + BlockDriverState *bs, const char *snapshot_id); + + int GRAPH_RDLOCK_PTR (*bdrv_snapshot_delete)( + BlockDriverState *bs, const char *snapshot_id, const char *name, + Error **errp); + int (*bdrv_snapshot_list)(BlockDriverState *bs, QEMUSnapshotInfo **psn_info); int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs, @@ -725,8 +728,8 @@ struct BlockDriver { int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_get_info)( BlockDriverState *bs, BlockDriverInfo *bdi); - ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs, - Error **errp); + ImageInfoSpecific * GRAPH_RDLOCK_PTR (*bdrv_get_specific_info)( + BlockDriverState *bs, Error **errp); BlockStatsSpecific *(*bdrv_get_specific_stats)(BlockDriverState *bs); int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_save_vmstate)( @@ -963,15 +966,15 @@ struct BdrvChildClass { * Note that this can be nested. If drained_begin() was called twice, new * I/O is allowed only after drained_end() was called twice, too. */ - void (*drained_begin)(BdrvChild *child); - void (*drained_end)(BdrvChild *child); + void GRAPH_RDLOCK_PTR (*drained_begin)(BdrvChild *child); + void GRAPH_RDLOCK_PTR (*drained_end)(BdrvChild *child); /* * Returns whether the parent has pending requests for the child. This * callback is polled after .drained_begin() has been called until all * activity on the child has stopped. */ - bool (*drained_poll)(BdrvChild *child); + bool GRAPH_RDLOCK_PTR (*drained_poll)(BdrvChild *child); /* * Notifies the parent that the filename of its child has changed (e.g. @@ -1039,8 +1042,8 @@ struct BdrvChild { */ bool quiesced_parent; - QLIST_ENTRY(BdrvChild) next; - QLIST_ENTRY(BdrvChild) next_parent; + QLIST_ENTRY(BdrvChild GRAPH_RDLOCK_PTR) next; + QLIST_ENTRY(BdrvChild GRAPH_RDLOCK_PTR) next_parent; }; /* @@ -1173,11 +1176,11 @@ struct BlockDriverState { * See also comment in include/block/block.h, to learn how backing and file * are connected with BdrvChildRole. */ - QLIST_HEAD(, BdrvChild) children; + QLIST_HEAD(, BdrvChild GRAPH_RDLOCK_PTR) children; BdrvChild *backing; BdrvChild *file; - QLIST_HEAD(, BdrvChild) parents; + QLIST_HEAD(, BdrvChild GRAPH_RDLOCK_PTR) parents; QDict *options; QDict *explicit_options; diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h index eb0da7232e..34eac72d7a 100644 --- a/include/block/block_int-io.h +++ b/include/block/block_int-io.h @@ -99,7 +99,7 @@ BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, */ void bdrv_wakeup(BlockDriverState *bs); -const char *bdrv_get_parent_name(const BlockDriverState *bs); +const char * GRAPH_RDLOCK bdrv_get_parent_name(const BlockDriverState *bs); bool blk_dev_has_tray(BlockBackend *blk); bool blk_dev_is_tray_open(BlockBackend *blk); @@ -133,7 +133,7 @@ bdrv_refresh_total_sectors(BlockDriverState *bs, int64_t hint); BdrvChild *bdrv_cow_child(BlockDriverState *bs); BdrvChild *bdrv_filter_child(BlockDriverState *bs); BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs); -BdrvChild *bdrv_primary_child(BlockDriverState *bs); +BdrvChild * GRAPH_RDLOCK bdrv_primary_child(BlockDriverState *bs); BlockDriverState *bdrv_skip_filters(BlockDriverState *bs); BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs); @@ -155,7 +155,8 @@ static inline BlockDriverState *bdrv_filter_or_cow_bs(BlockDriverState *bs) return child_bs(bdrv_filter_or_cow_child(bs)); } -static inline BlockDriverState *bdrv_primary_bs(BlockDriverState *bs) +static inline BlockDriverState * GRAPH_RDLOCK +bdrv_primary_bs(BlockDriverState *bs) { IO_CODE(); return child_bs(bdrv_primary_child(bs)); diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h index 7e04f98ff0..6f1cd12745 100644 --- a/include/block/graph-lock.h +++ b/include/block/graph-lock.h @@ -116,7 +116,8 @@ void unregister_aiocontext(AioContext *ctx); * This function polls. Callers must not hold the lock of any AioContext other * than the current one and the one of @bs. */ -void bdrv_graph_wrlock(BlockDriverState *bs) TSA_ACQUIRE(graph_lock) TSA_NO_TSA; +void no_coroutine_fn TSA_ACQUIRE(graph_lock) TSA_NO_TSA +bdrv_graph_wrlock(BlockDriverState *bs); /* * bdrv_graph_wrunlock: diff --git a/include/block/qapi.h b/include/block/qapi.h index 8663971c58..54c48de26a 100644 --- a/include/block/qapi.h +++ b/include/block/qapi.h @@ -29,18 +29,17 @@ #include "block/snapshot.h" #include "qapi/qapi-types-block-core.h" -BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, - BlockDriverState *bs, - bool flat, - Error **errp); -int bdrv_query_snapshot_info_list(BlockDriverState *bs, - SnapshotInfoList **p_list, - Error **errp); -void bdrv_query_image_info(BlockDriverState *bs, - ImageInfo **p_info, - bool flat, - bool skip_implicit_filters, - Error **errp); +BlockDeviceInfo * GRAPH_RDLOCK +bdrv_block_device_info(BlockBackend *blk, BlockDriverState *bs, + bool flat, Error **errp); + +int GRAPH_RDLOCK +bdrv_query_snapshot_info_list(BlockDriverState *bs, + SnapshotInfoList **p_list, + Error **errp); +void GRAPH_RDLOCK +bdrv_query_image_info(BlockDriverState *bs, ImageInfo **p_info, bool flat, + bool skip_implicit_filters, Error **errp); void GRAPH_RDLOCK bdrv_query_block_graph_info(BlockDriverState *bs, BlockGraphInfo **p_info, Error **errp); diff --git a/include/block/snapshot.h b/include/block/snapshot.h index 50ff924710..d49c5599d9 100644 --- a/include/block/snapshot.h +++ b/include/block/snapshot.h @@ -25,6 +25,7 @@ #ifndef SNAPSHOT_H #define SNAPSHOT_H +#include "block/graph-lock.h" #include "qapi/qapi-builtin-types.h" #define SNAPSHOT_OPT_BASE "snapshot." @@ -59,16 +60,19 @@ bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs, const char *name, QEMUSnapshotInfo *sn_info, Error **errp); -int bdrv_can_snapshot(BlockDriverState *bs); -int bdrv_snapshot_create(BlockDriverState *bs, - QEMUSnapshotInfo *sn_info); -int bdrv_snapshot_goto(BlockDriverState *bs, - const char *snapshot_id, - Error **errp); -int bdrv_snapshot_delete(BlockDriverState *bs, - const char *snapshot_id, - const char *name, - Error **errp); + +int GRAPH_RDLOCK bdrv_can_snapshot(BlockDriverState *bs); + +int GRAPH_RDLOCK +bdrv_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info); + +int GRAPH_UNLOCKED +bdrv_snapshot_goto(BlockDriverState *bs, const char *snapshot_id, Error **errp); + +int GRAPH_RDLOCK +bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id, + const char *name, Error **errp); + int bdrv_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_info); int bdrv_snapshot_load_tmp(BlockDriverState *bs, diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 605b160a7e..30c376a4de 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -83,6 +83,21 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length); ram_addr_t qemu_ram_addr_from_host(void *ptr); ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr); RAMBlock *qemu_ram_block_by_name(const char *name); + +/* + * Translates a host ptr back to a RAMBlock and an offset in that RAMBlock. + * + * @ptr: The host pointer to translate. + * @round_offset: Whether to round the result offset down to a target page + * @offset: Will be set to the offset within the returned RAMBlock. + * + * Returns: RAMBlock (or NULL if not found) + * + * By the time this function returns, the returned pointer is not protected + * by RCU anymore. If the caller is not within an RCU critical section and + * does not hold the iothread lock, it must have other means of protecting the + * pointer, such as a reference to the memory region that owns the RAMBlock. + */ RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset, ram_addr_t *offset); ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host); diff --git a/include/exec/memory.h b/include/exec/memory.h index c99842d2fc..653a32ea10 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -95,6 +95,7 @@ struct ReservedRegion { * relative to the region's address space * @readonly: writes to this section are ignored * @nonvolatile: this section is non-volatile + * @unmergeable: this section should not get merged with adjacent sections */ struct MemoryRegionSection { Int128 size; @@ -104,6 +105,7 @@ struct MemoryRegionSection { hwaddr offset_within_address_space; bool readonly; bool nonvolatile; + bool unmergeable; }; typedef struct IOMMUTLBEntry IOMMUTLBEntry; @@ -599,8 +601,9 @@ typedef void (*ReplayRamDiscard)(MemoryRegionSection *section, void *opaque); * populated (consuming memory), to be used/accessed by the VM. * * A #RamDiscardManager can only be set for a RAM #MemoryRegion while the - * #MemoryRegion isn't mapped yet; it cannot change while the #MemoryRegion is - * mapped. + * #MemoryRegion isn't mapped into an address space yet (either directly + * or via an alias); it cannot change while the #MemoryRegion is + * mapped into an address space. * * The #RamDiscardManager is intended to be used by technologies that are * incompatible with discarding of RAM (e.g., VFIO, which may pin all @@ -772,6 +775,7 @@ struct MemoryRegion { bool nonvolatile; bool rom_device; bool flush_coalesced_mmio; + bool unmergeable; uint8_t dirty_log_mask; bool is_iommu; RAMBlock *ram_block; @@ -2350,6 +2354,25 @@ void memory_region_set_size(MemoryRegion *mr, uint64_t size); void memory_region_set_alias_offset(MemoryRegion *mr, hwaddr offset); +/* + * memory_region_set_unmergeable: Set a memory region unmergeable + * + * Mark a memory region unmergeable, resulting in the memory region (or + * everything contained in a memory region container) not getting merged when + * simplifying the address space and notifying memory listeners. Consequently, + * memory listeners will never get notified about ranges that are larger than + * the original memory regions. + * + * This is primarily useful when multiple aliases to a RAM memory region are + * mapped into a memory region container, and updates (e.g., enable/disable or + * map/unmap) of individual memory region aliases are not supposed to affect + * other memory regions in the same container. + * + * @mr: the #MemoryRegion to be updated + * @unmergeable: whether to mark the #MemoryRegion unmergeable + */ +void memory_region_set_unmergeable(MemoryRegion *mr, bool unmergeable); + /** * memory_region_present: checks if an address relative to a @container * translates into #MemoryRegion within @container diff --git a/include/gdbstub/syscalls.h b/include/gdbstub/syscalls.h index 243eaf8ce4..54ff7245a1 100644 --- a/include/gdbstub/syscalls.h +++ b/include/gdbstub/syscalls.h @@ -110,4 +110,13 @@ int use_gdb_syscalls(void); */ void gdb_exit(int code); +/** + * gdb_qemu_exit: ask qemu to exit + * @code: exit code reported + * + * This requests qemu to exit. This function is allowed to return as + * the exit request might be processed asynchronously by qemu backend. + */ +void gdb_qemu_exit(int code); + #endif /* _SYSCALLS_H_ */ diff --git a/include/hw/boards.h b/include/hw/boards.h index 55a64a13fd..43a56dc51e 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -297,15 +297,27 @@ struct MachineClass { * DeviceMemoryState: * @base: address in guest physical address space where the memory * address space for memory devices starts - * @mr: address space container for memory devices + * @mr: memory region container for memory devices + * @as: address space for memory devices + * @listener: memory listener used to track used memslots in the address space * @dimm_size: the sum of plugged DIMMs' sizes * @used_region_size: the part of @mr already used by memory devices + * @required_memslots: the number of memslots required by memory devices + * @used_memslots: the number of memslots currently used by memory devices + * @memslot_auto_decision_active: whether any plugged memory device + * automatically decided to use more than + * one memslot */ typedef struct DeviceMemoryState { hwaddr base; MemoryRegion mr; + AddressSpace as; + MemoryListener listener; uint64_t dimm_size; uint64_t used_region_size; + unsigned int required_memslots; + unsigned int used_memslots; + unsigned int memslot_auto_decision_active; } DeviceMemoryState; /** diff --git a/include/hw/mem/memory-device.h b/include/hw/mem/memory-device.h index 48d2611fc5..3354d6c166 100644 --- a/include/hw/mem/memory-device.h +++ b/include/hw/mem/memory-device.h @@ -14,6 +14,7 @@ #define MEMORY_DEVICE_H #include "hw/qdev-core.h" +#include "qemu/typedefs.h" #include "qapi/qapi-types-machine.h" #include "qom/object.h" @@ -41,6 +42,17 @@ typedef struct MemoryDeviceState MemoryDeviceState; * successive memory regions are used, a covering memory region has to * be provided. Scattered memory regions are not supported for single * devices. + * + * The device memory region returned via @get_memory_region may either be a + * single RAM memory region or a memory region container with subregions + * that are RAM memory regions or aliases to RAM memory regions. Other + * memory regions or subregions are not supported. + * + * If the device memory region returned via @get_memory_region is a + * memory region container, it's supported to dynamically (un)map subregions + * as long as the number of memslots returned by @get_memslots() won't + * be exceeded and as long as all memory regions are of the same kind (e.g., + * all RAM or all ROM). */ struct MemoryDeviceClass { /* private */ @@ -89,6 +101,28 @@ struct MemoryDeviceClass { MemoryRegion *(*get_memory_region)(MemoryDeviceState *md, Error **errp); /* + * Optional: Instruct the memory device to decide how many memory slots + * it requires, not exceeding the given limit. + * + * Called exactly once when pre-plugging the memory device, before + * querying the number of memslots using @get_memslots the first time. + */ + void (*decide_memslots)(MemoryDeviceState *md, unsigned int limit); + + /* + * Optional for memory devices that require only a single memslot, + * required for all other memory devices: Return the number of memslots + * (distinct RAM memory regions in the device memory region) that are + * required by the device. + * + * If this function is not implemented, the assumption is "1". + * + * Called when (un)plugging the memory device, to check if the requirements + * can be satisfied, and to do proper accounting. + */ + unsigned int (*get_memslots)(MemoryDeviceState *md); + + /* * Optional: Return the desired minimum alignment of the device in guest * physical address space. The final alignment is computed based on this * alignment and the alignment requirements of the memory region. @@ -105,8 +139,31 @@ struct MemoryDeviceClass { MemoryDeviceInfo *info); }; +/* + * Traditionally, KVM/vhost in many setups supported 509 memslots, whereby + * 253 memslots were "reserved" for boot memory and other devices (such + * as PCI BARs, which can get mapped dynamically) and 256 memslots were + * dedicated for DIMMs. These magic numbers worked reliably in the past. + * + * Further, using many memslots can negatively affect performance, so setting + * the soft-limit of memslots used by memory devices to the traditional + * DIMM limit of 256 sounds reasonable. + * + * If we have less than 509 memslots, we will instruct memory devices that + * support automatically deciding how many memslots to use to only use a single + * one. + * + * Hotplugging vhost devices with at least 509 memslots is not expected to + * cause problems, not even when devices automatically decided how many memslots + * to use. + */ +#define MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT 256 +#define MEMORY_DEVICES_SAFE_MAX_MEMSLOTS 509 + MemoryDeviceInfoList *qmp_memory_device_list(void); uint64_t get_plugged_memory_size(void); +unsigned int memory_devices_get_reserved_memslots(void); +bool memory_devices_memslot_auto_decision_active(void); void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms, const uint64_t *legacy_align, Error **errp); void memory_device_plug(MemoryDeviceState *md, MachineState *ms); diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index 1860b541d8..96ccc18cd3 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -86,9 +86,6 @@ typedef int (*vhost_set_vring_enable_op)(struct vhost_dev *dev, typedef bool (*vhost_requires_shm_log_op)(struct vhost_dev *dev); typedef int (*vhost_migration_done_op)(struct vhost_dev *dev, char *mac_addr); -typedef bool (*vhost_backend_can_merge_op)(struct vhost_dev *dev, - uint64_t start1, uint64_t size1, - uint64_t start2, uint64_t size2); typedef int (*vhost_vsock_set_guest_cid_op)(struct vhost_dev *dev, uint64_t guest_cid); typedef int (*vhost_vsock_set_running_op)(struct vhost_dev *dev, int start); @@ -108,8 +105,7 @@ typedef int (*vhost_crypto_create_session_op)(struct vhost_dev *dev, typedef int (*vhost_crypto_close_session_op)(struct vhost_dev *dev, uint64_t session_id); -typedef bool (*vhost_backend_mem_section_filter_op)(struct vhost_dev *dev, - MemoryRegionSection *section); +typedef bool (*vhost_backend_no_private_memslots_op)(struct vhost_dev *dev); typedef int (*vhost_get_inflight_fd_op)(struct vhost_dev *dev, uint16_t queue_size, @@ -138,6 +134,7 @@ typedef struct VhostOps { vhost_backend_init vhost_backend_init; vhost_backend_cleanup vhost_backend_cleanup; vhost_backend_memslots_limit vhost_backend_memslots_limit; + vhost_backend_no_private_memslots_op vhost_backend_no_private_memslots; vhost_net_set_backend_op vhost_net_set_backend; vhost_net_set_mtu_op vhost_net_set_mtu; vhost_scsi_set_endpoint_op vhost_scsi_set_endpoint; @@ -163,7 +160,6 @@ typedef struct VhostOps { vhost_set_vring_enable_op vhost_set_vring_enable; vhost_requires_shm_log_op vhost_requires_shm_log; vhost_migration_done_op vhost_migration_done; - vhost_backend_can_merge_op vhost_backend_can_merge; vhost_vsock_set_guest_cid_op vhost_vsock_set_guest_cid; vhost_vsock_set_running_op vhost_vsock_set_running; vhost_set_iotlb_callback_op vhost_set_iotlb_callback; @@ -172,7 +168,6 @@ typedef struct VhostOps { vhost_set_config_op vhost_set_config; vhost_crypto_create_session_op vhost_crypto_create_session; vhost_crypto_close_session_op vhost_crypto_close_session; - vhost_backend_mem_section_filter_op vhost_backend_mem_section_filter; vhost_get_inflight_fd_op vhost_get_inflight_fd; vhost_set_inflight_fd_op vhost_set_inflight_fd; vhost_dev_start_op vhost_dev_start; diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 6a173cb9fa..c7e5467693 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -315,7 +315,8 @@ uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits, */ void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits, uint64_t features); -bool vhost_has_free_slot(void); +unsigned int vhost_get_max_memslots(void); +unsigned int vhost_get_free_memslots(void); int vhost_net_set_backend(struct vhost_dev *hdev, struct vhost_vring_file *file); diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h index 390c4642b8..4739fa4689 100644 --- a/include/hw/virtio/virtio-gpu.h +++ b/include/hw/virtio/virtio-gpu.h @@ -225,13 +225,13 @@ struct VhostUserGPU { }; #define VIRTIO_GPU_FILL_CMD(out) do { \ - size_t s; \ - s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, 0, \ + size_t virtiogpufillcmd_s_ = \ + iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, 0, \ &out, sizeof(out)); \ - if (s != sizeof(out)) { \ + if (virtiogpufillcmd_s_ != sizeof(out)) { \ qemu_log_mask(LOG_GUEST_ERROR, \ "%s: command size incorrect %zu vs %zu\n", \ - __func__, s, sizeof(out)); \ + __func__, virtiogpufillcmd_s_, sizeof(out)); \ return; \ } \ } while (0) diff --git a/include/hw/virtio/virtio-mem.h b/include/hw/virtio/virtio-mem.h index ab0fe2b4f2..5f5b02b8f9 100644 --- a/include/hw/virtio/virtio-mem.h +++ b/include/hw/virtio/virtio-mem.h @@ -33,6 +33,7 @@ OBJECT_DECLARE_TYPE(VirtIOMEM, VirtIOMEMClass, #define VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP "unplugged-inaccessible" #define VIRTIO_MEM_EARLY_MIGRATION_PROP "x-early-migration" #define VIRTIO_MEM_PREALLOC_PROP "prealloc" +#define VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP "dynamic-memslots" struct VirtIOMEM { VirtIODevice parent_obj; @@ -44,7 +45,28 @@ struct VirtIOMEM { int32_t bitmap_size; unsigned long *bitmap; - /* assigned memory backend and memory region */ + /* + * With "dynamic-memslots=on": Device memory region in which we dynamically + * map the memslots. + */ + MemoryRegion *mr; + + /* + * With "dynamic-memslots=on": The individual memslots (aliases into the + * memory backend). + */ + MemoryRegion *memslots; + + /* With "dynamic-memslots=on": The total number of memslots. */ + uint16_t nb_memslots; + + /* + * With "dynamic-memslots=on": Size of one memslot (the size of the + * last one can differ). + */ + uint64_t memslot_size; + + /* Assigned memory backend with the RAM memory region. */ HostMemoryBackend *memdev; /* NUMA node */ @@ -82,6 +104,12 @@ struct VirtIOMEM { */ bool early_migration; + /* + * Whether we dynamically map (multiple, if possible) memslots instead of + * statically mapping the whole RAM memory region. + */ + bool dynamic_memslots; + /* notifiers to notify when "size" changes */ NotifierList size_change_notifiers; @@ -96,6 +124,8 @@ struct VirtIOMEMClass { /* public */ void (*fill_device_info)(const VirtIOMEM *vmen, VirtioMEMDeviceInfo *vi); MemoryRegion *(*get_memory_region)(VirtIOMEM *vmem, Error **errp); + void (*decide_memslots)(VirtIOMEM *vmem, unsigned int limit); + unsigned int (*get_memslots)(VirtIOMEM *vmem); void (*add_size_change_notifier)(VirtIOMEM *vmem, Notifier *notifier); void (*remove_size_change_notifier)(VirtIOMEM *vmem, Notifier *notifier); void (*unplug_request_check)(VirtIOMEM *vmem, Error **errp); diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h index d5f675493a..49c12b0fa9 100644 --- a/include/sysemu/block-backend-global-state.h +++ b/include/sysemu/block-backend-global-state.h @@ -59,8 +59,8 @@ BlockBackend *blk_by_public(BlockBackendPublic *public); void blk_remove_bs(BlockBackend *blk); int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp); int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp); -bool bdrv_has_blk(BlockDriverState *bs); -bool bdrv_is_root_node(BlockDriverState *bs); +bool GRAPH_RDLOCK bdrv_has_blk(BlockDriverState *bs); +bool GRAPH_RDLOCK bdrv_is_root_node(BlockDriverState *bs); int GRAPH_UNLOCKED blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, Error **errp); void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm); diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h index 0535a4c68a..b4a566cfe7 100644 --- a/include/sysemu/cpus.h +++ b/include/sysemu/cpus.h @@ -50,11 +50,4 @@ void cpu_synchronize_all_post_reset(void); void cpu_synchronize_all_post_init(void); void cpu_synchronize_all_pre_loadvm(void); -#ifndef CONFIG_USER_ONLY -/* vl.c */ -/* *-user doesn't have configurable SMP topology */ -extern int smp_cores; -extern int smp_threads; -#endif - #endif diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index ee9025f8e9..97a8a4f201 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -215,7 +215,8 @@ typedef struct KVMRouteChange { /* external API */ -bool kvm_has_free_slot(MachineState *ms); +unsigned int kvm_get_max_memslots(void); +unsigned int kvm_get_free_memslots(void); bool kvm_has_sync_mmu(void); int kvm_has_vcpu_events(void); int kvm_has_robust_singlestep(void); @@ -552,7 +553,6 @@ int kvm_set_one_reg(CPUState *cs, uint64_t id, void *source); */ int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target); struct ppc_radix_page_info *kvm_get_radix_page_info(void); -int kvm_get_max_memslots(void); /* Notify resamplefd for EOI of specific interrupts. */ void kvm_resample_fd_notify(int gsi); diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h index a5b9122cb8..075939a3c4 100644 --- a/include/sysemu/kvm_int.h +++ b/include/sysemu/kvm_int.h @@ -40,6 +40,7 @@ typedef struct KVMMemoryUpdate { typedef struct KVMMemoryListener { MemoryListener listener; KVMSlot *slots; + unsigned int nr_used_slots; int as_id; QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_add; QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_del; diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h index 08afb97695..c8c2bd8a61 100644 --- a/include/sysemu/runstate.h +++ b/include/sysemu/runstate.h @@ -68,6 +68,8 @@ void qemu_system_wakeup_request(WakeupReason reason, Error **errp); void qemu_system_wakeup_enable(WakeupReason reason, bool enabled); void qemu_register_wakeup_notifier(Notifier *notifier); void qemu_register_wakeup_support(void); +void qemu_system_shutdown_request_with_code(ShutdownCause reason, + int exit_code); void qemu_system_shutdown_request(ShutdownCause reason); void qemu_system_powerdown_request(void); void qemu_register_powerdown_notifier(Notifier *notifier); diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index 25be2a692e..73a37949c2 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -101,7 +101,7 @@ bool defaults_enabled(void); void qemu_init(int argc, char **argv); int qemu_main_loop(void); -void qemu_cleanup(void); +void qemu_cleanup(int); extern QemuOptsList qemu_legacy_drive_opts; extern QemuOptsList qemu_common_drive_opts; diff --git a/migration/block.c b/migration/block.c index 5f930870a5..d115e1cfa5 100644 --- a/migration/block.c +++ b/migration/block.c @@ -388,6 +388,8 @@ static int init_blk_migration(QEMUFile *f) Error *local_err = NULL; int ret; + GRAPH_RDLOCK_GUARD_MAINLOOP(); + block_mig_state.submitted = 0; block_mig_state.read_done = 0; block_mig_state.transferred = 0; diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c index c115ef2d23..5b25ba24f7 100644 --- a/migration/migration-hmp-cmds.c +++ b/migration/migration-hmp-cmds.c @@ -794,6 +794,8 @@ static void vm_completion(ReadLineState *rs, const char *str) BlockDriverState *bs; BdrvNextIterator it; + GRAPH_RDLOCK_GUARD_MAINLOOP(); + len = strlen(str); readline_set_completion_index(rs, len); diff --git a/qemu-img.c b/qemu-img.c index 6068ab0d27..585b65640f 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -3165,7 +3165,9 @@ static int get_block_status(BlockDriverState *bs, int64_t offset, has_offset = !!(ret & BDRV_BLOCK_OFFSET_VALID); if (file && has_offset) { + bdrv_graph_rdlock_main_loop(); bdrv_refresh_filename(file); + bdrv_graph_rdunlock_main_loop(); filename = file->filename; } @@ -3470,7 +3472,10 @@ static int img_snapshot(int argc, char **argv) sn.date_sec = rt / G_USEC_PER_SEC; sn.date_nsec = (rt % G_USEC_PER_SEC) * 1000; + bdrv_graph_rdlock_main_loop(); ret = bdrv_snapshot_create(bs, &sn); + bdrv_graph_rdunlock_main_loop(); + if (ret) { error_report("Could not create snapshot '%s': %s", snapshot_name, strerror(-ret)); @@ -3486,6 +3491,7 @@ static int img_snapshot(int argc, char **argv) break; case SNAPSHOT_DELETE: + bdrv_graph_rdlock_main_loop(); ret = bdrv_snapshot_find(bs, &sn, snapshot_name); if (ret < 0) { error_report("Could not delete snapshot '%s': snapshot not " @@ -3499,6 +3505,7 @@ static int img_snapshot(int argc, char **argv) ret = 1; } } + bdrv_graph_rdunlock_main_loop(); break; } @@ -3683,7 +3690,9 @@ static int img_rebase(int argc, char **argv) qdict_put_bool(options, BDRV_OPT_FORCE_SHARE, true); } + bdrv_graph_rdlock_main_loop(); bdrv_refresh_filename(bs); + bdrv_graph_rdunlock_main_loop(); overlay_filename = bs->exact_filename[0] ? bs->exact_filename : bs->filename; out_real_path = @@ -4120,6 +4129,8 @@ static int print_amend_option_help(const char *format) { BlockDriver *drv; + GRAPH_RDLOCK_GUARD_MAINLOOP(); + /* Find driver and parse its options */ drv = bdrv_find_format(format); if (!drv) { @@ -4258,9 +4269,11 @@ static int img_amend(int argc, char **argv) goto out; } + bdrv_graph_rdlock_main_loop(); if (!bs->drv->bdrv_amend_options) { error_report("Format driver '%s' does not support option amendment", fmt); + bdrv_graph_rdunlock_main_loop(); ret = -1; goto out; } @@ -4280,6 +4293,7 @@ static int img_amend(int argc, char **argv) "This option is only supported for image creation\n"); } + bdrv_graph_rdunlock_main_loop(); error_report_err(err); ret = -1; goto out; @@ -4289,6 +4303,8 @@ static int img_amend(int argc, char **argv) qemu_progress_print(0.f, 0); ret = bdrv_amend_options(bs, opts, &amend_status_cb, NULL, force, &err); qemu_progress_print(100.f, 0); + bdrv_graph_rdunlock_main_loop(); + if (ret < 0) { error_report_err(err); goto out; diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 3f75d2f5a6..f5d7202a13 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -2037,6 +2037,9 @@ static int info_f(BlockBackend *blk, int argc, char **argv) char s1[64], s2[64]; int ret; + GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + if (bs->drv && bs->drv->format_name) { printf("format name: %s\n", bs->drv->format_name); } diff --git a/qga/commands-win32.c b/qga/commands-win32.c index 6beae659b7..697c65507c 100644 --- a/qga/commands-win32.c +++ b/qga/commands-win32.c @@ -501,13 +501,6 @@ static GuestDiskBusType find_bus_type(STORAGE_BUS_TYPE bus) return win2qemu[(int)bus]; } -DEFINE_GUID(GUID_DEVINTERFACE_DISK, - 0x53f56307L, 0xb6bf, 0x11d0, 0x94, 0xf2, - 0x00, 0xa0, 0xc9, 0x1e, 0xfb, 0x8b); -DEFINE_GUID(GUID_DEVINTERFACE_STORAGEPORT, - 0x2accfe60L, 0xc130, 0x11d2, 0xb0, 0x82, - 0x00, 0xa0, 0xc9, 0x1e, 0xfb, 0x8b); - static void get_pci_address_for_device(GuestPCIAddress *pci, HDEVINFO dev_info) { diff --git a/qga/commands.c b/qga/commands.c index 09c683e263..ce172edd2d 100644 --- a/qga/commands.c +++ b/qga/commands.c @@ -206,15 +206,15 @@ GuestExecStatus *qmp_guest_exec_status(int64_t pid, Error **errp) #endif if (gei->out.length > 0) { ges->out_data = g_base64_encode(gei->out.data, gei->out.length); - g_free(gei->out.data); ges->has_out_truncated = gei->out.truncated; } + g_free(gei->out.data); if (gei->err.length > 0) { ges->err_data = g_base64_encode(gei->err.data, gei->err.length); - g_free(gei->err.data); ges->has_err_truncated = gei->err.truncated; } + g_free(gei->err.data); QTAILQ_REMOVE(&guest_exec_state.processes, gei, next); g_free(gei); diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json index b720dd4379..876e2a8ea8 100644 --- a/qga/qapi-schema.json +++ b/qga/qapi-schema.json @@ -1220,11 +1220,13 @@ # @signal: signal number (linux) or unhandled exception code (windows) # if the process was abnormally terminated. # -# @out-data: base64-encoded stdout of the process +# @out-data: base64-encoded stdout of the process. This field will only +# be populated after the process exits. # -# @err-data: base64-encoded stderr of the process Note: @out-data and +# @err-data: base64-encoded stderr of the process. Note: @out-data and # @err-data are present only if 'capture-output' was specified for -# 'guest-exec' +# 'guest-exec'. This field will only be populated after the process +# exits. # # @out-truncated: true if stdout was not fully captured due to size # limitation. diff --git a/roms/Makefile b/roms/Makefile index 6859685290..67f709ba2d 100644 --- a/roms/Makefile +++ b/roms/Makefile @@ -147,7 +147,7 @@ skiboot: cp skiboot/skiboot.lid ../pc-bios/skiboot.lid efi: - python3 edk2-build.py --config edk2-build.config \ + $(PYTHON) edk2-build.py --config edk2-build.config \ --version-override "edk2-stable202302-for-qemu" \ --release-date "03/01/2023" rm -f ../pc-bios/edk2-*.fd.bz2 diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py index 685d0b4ed4..a601c3c672 100644 --- a/scripts/block-coroutine-wrapper.py +++ b/scripts/block-coroutine-wrapper.py @@ -87,8 +87,9 @@ class FuncDecl: raise ValueError(f"Invalid no_co function name: {self.name}") if not self.create_only_co: raise ValueError(f"no_co function can't be mixed: {self.name}") - if self.graph_rdlock: - raise ValueError(f"no_co function can't be rdlock: {self.name}") + if self.graph_rdlock and self.graph_wrlock: + raise ValueError("function can't be both rdlock and wrlock: " + f"{self.name}") self.target_name = f'{subsystem}_{subname}' self.ctx = self.gen_ctx() @@ -256,7 +257,10 @@ def gen_no_co_wrapper(func: FuncDecl) -> str: graph_lock='' graph_unlock='' - if func.graph_wrlock: + if func.graph_rdlock: + graph_lock=' bdrv_graph_rdlock_main_loop();' + graph_unlock=' bdrv_graph_rdunlock_main_loop();' + elif func.graph_wrlock: graph_lock=' bdrv_graph_wrlock(NULL);' graph_unlock=' bdrv_graph_wrunlock();' diff --git a/scripts/xml-preprocess.py b/scripts/xml-preprocess.py index 57f1d28912..57f1d28912 100755..100644 --- a/scripts/xml-preprocess.py +++ b/scripts/xml-preprocess.py diff --git a/stubs/qmp_memory_device.c b/stubs/memory_device.c index e75cac62dc..15fd93ff67 100644 --- a/stubs/qmp_memory_device.c +++ b/stubs/memory_device.c @@ -10,3 +10,13 @@ uint64_t get_plugged_memory_size(void) { return (uint64_t)-1; } + +unsigned int memory_devices_get_reserved_memslots(void) +{ + return 0; +} + +bool memory_devices_memslot_auto_decision_active(void) +{ + return false; +} diff --git a/stubs/meson.build b/stubs/meson.build index ef6e39a64d..cde44972bf 100644 --- a/stubs/meson.build +++ b/stubs/meson.build @@ -32,7 +32,7 @@ stub_ss.add(files('monitor.c')) stub_ss.add(files('monitor-core.c')) stub_ss.add(files('physmem.c')) stub_ss.add(files('qemu-timer-notify-cb.c')) -stub_ss.add(files('qmp_memory_device.c')) +stub_ss.add(files('memory_device.c')) stub_ss.add(files('qmp-command-available.c')) stub_ss.add(files('qmp-quit.c')) stub_ss.add(files('qtest.c')) diff --git a/subprojects/libvduse/libvduse.c b/subprojects/libvduse/libvduse.c index 377959a0b4..21ffbb5b8d 100644 --- a/subprojects/libvduse/libvduse.c +++ b/subprojects/libvduse/libvduse.c @@ -1031,7 +1031,7 @@ int vduse_dev_handler(VduseDev *dev) /* The iova will be updated by iova_to_va() later, so just remove it */ vduse_iova_remove_region(dev, req.iova.start, req.iova.last); for (i = 0; i < dev->num_queues; i++) { - VduseVirtq *vq = &dev->vqs[i]; + vq = &dev->vqs[i]; if (vq->ready) { if (vduse_queue_update_vring(vq, vq->vring.desc_addr, vq->vring.avail_addr, diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c index 051a611da3..6684057370 100644 --- a/subprojects/libvhost-user/libvhost-user.c +++ b/subprojects/libvhost-user/libvhost-user.c @@ -631,9 +631,9 @@ static bool generate_faults(VuDev *dev) { unsigned int i; for (i = 0; i < dev->nregions; i++) { +#ifdef UFFDIO_REGISTER VuDevRegion *dev_region = &dev->regions[i]; int ret; -#ifdef UFFDIO_REGISTER struct uffdio_register reg_struct; /* @@ -870,10 +870,10 @@ vu_rem_mem_reg(VuDev *dev, VhostUserMsg *vmsg) { for (i = 0; i < dev->nregions; i++) { if (reg_equal(&dev->regions[i], msg_region)) { VuDevRegion *r = &dev->regions[i]; - void *m = (void *) (uintptr_t) r->mmap_addr; + void *ma = (void *) (uintptr_t) r->mmap_addr; - if (m) { - munmap(m, r->size + r->mmap_offset); + if (ma) { + munmap(ma, r->size + r->mmap_offset); } /* @@ -1005,10 +1005,10 @@ vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg) for (i = 0; i < dev->nregions; i++) { VuDevRegion *r = &dev->regions[i]; - void *m = (void *) (uintptr_t) r->mmap_addr; + void *ma = (void *) (uintptr_t) r->mmap_addr; - if (m) { - munmap(m, r->size + r->mmap_offset); + if (ma) { + munmap(ma, r->size + r->mmap_offset); } } dev->nregions = memory->nregions; diff --git a/system/main.c b/system/main.c index 694388bd7f..9b91d21ea8 100644 --- a/system/main.c +++ b/system/main.c @@ -35,7 +35,7 @@ int qemu_default_main(void) int status; status = qemu_main_loop(); - qemu_cleanup(); + qemu_cleanup(status); return status; } diff --git a/system/memory.c b/system/memory.c index fa1c99f9ba..a800fbc9e5 100644 --- a/system/memory.c +++ b/system/memory.c @@ -224,6 +224,7 @@ struct FlatRange { bool romd_mode; bool readonly; bool nonvolatile; + bool unmergeable; }; #define FOR_EACH_FLAT_RANGE(var, view) \ @@ -240,6 +241,7 @@ section_from_flat_range(FlatRange *fr, FlatView *fv) .offset_within_address_space = int128_get64(fr->addr.start), .readonly = fr->readonly, .nonvolatile = fr->nonvolatile, + .unmergeable = fr->unmergeable, }; } @@ -250,7 +252,8 @@ static bool flatrange_equal(FlatRange *a, FlatRange *b) && a->offset_in_region == b->offset_in_region && a->romd_mode == b->romd_mode && a->readonly == b->readonly - && a->nonvolatile == b->nonvolatile; + && a->nonvolatile == b->nonvolatile + && a->unmergeable == b->unmergeable; } static FlatView *flatview_new(MemoryRegion *mr_root) @@ -323,7 +326,8 @@ static bool can_merge(FlatRange *r1, FlatRange *r2) && r1->dirty_log_mask == r2->dirty_log_mask && r1->romd_mode == r2->romd_mode && r1->readonly == r2->readonly - && r1->nonvolatile == r2->nonvolatile; + && r1->nonvolatile == r2->nonvolatile + && !r1->unmergeable && !r2->unmergeable; } /* Attempt to simplify a view by merging adjacent ranges */ @@ -599,7 +603,8 @@ static void render_memory_region(FlatView *view, Int128 base, AddrRange clip, bool readonly, - bool nonvolatile) + bool nonvolatile, + bool unmergeable) { MemoryRegion *subregion; unsigned i; @@ -616,6 +621,7 @@ static void render_memory_region(FlatView *view, int128_addto(&base, int128_make64(mr->addr)); readonly |= mr->readonly; nonvolatile |= mr->nonvolatile; + unmergeable |= mr->unmergeable; tmp = addrrange_make(base, mr->size); @@ -629,14 +635,14 @@ static void render_memory_region(FlatView *view, int128_subfrom(&base, int128_make64(mr->alias->addr)); int128_subfrom(&base, int128_make64(mr->alias_offset)); render_memory_region(view, mr->alias, base, clip, - readonly, nonvolatile); + readonly, nonvolatile, unmergeable); return; } /* Render subregions in priority order. */ QTAILQ_FOREACH(subregion, &mr->subregions, subregions_link) { render_memory_region(view, subregion, base, clip, - readonly, nonvolatile); + readonly, nonvolatile, unmergeable); } if (!mr->terminates) { @@ -652,6 +658,7 @@ static void render_memory_region(FlatView *view, fr.romd_mode = mr->romd_mode; fr.readonly = readonly; fr.nonvolatile = nonvolatile; + fr.unmergeable = unmergeable; /* Render the region itself into any gaps left by the current view. */ for (i = 0; i < view->nr && int128_nz(remain); ++i) { @@ -753,7 +760,7 @@ static FlatView *generate_memory_topology(MemoryRegion *mr) if (mr) { render_memory_region(view, mr, int128_zero(), addrrange_make(int128_zero(), int128_2_64()), - false, false); + false, false, false); } flatview_simplify(view); @@ -2085,7 +2092,7 @@ int memory_region_iommu_num_indexes(IOMMUMemoryRegion *iommu_mr) RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr) { - if (!memory_region_is_mapped(mr) || !memory_region_is_ram(mr)) { + if (!memory_region_is_ram(mr)) { return NULL; } return mr->rdm; @@ -2094,7 +2101,7 @@ RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr) void memory_region_set_ram_discard_manager(MemoryRegion *mr, RamDiscardManager *rdm) { - g_assert(memory_region_is_ram(mr) && !memory_region_is_mapped(mr)); + g_assert(memory_region_is_ram(mr)); g_assert(!rdm || !mr->rdm); mr->rdm = rdm; } @@ -2755,6 +2762,18 @@ void memory_region_set_alias_offset(MemoryRegion *mr, hwaddr offset) memory_region_transaction_commit(); } +void memory_region_set_unmergeable(MemoryRegion *mr, bool unmergeable) +{ + if (unmergeable == mr->unmergeable) { + return; + } + + memory_region_transaction_begin(); + mr->unmergeable = unmergeable; + memory_region_update_pending |= mr->enabled; + memory_region_transaction_commit(); +} + uint64_t memory_region_get_alignment(const MemoryRegion *mr) { return mr->align; diff --git a/system/physmem.c b/system/physmem.c index edc3ed8ab9..fc2b0fee01 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -2221,23 +2221,6 @@ ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host) return res; } -/* - * Translates a host ptr back to a RAMBlock, a ram_addr and an offset - * in that RAMBlock. - * - * ptr: Host pointer to look up - * round_offset: If true round the result offset down to a page boundary - * *ram_addr: set to result ram_addr - * *offset: set to result offset within the RAMBlock - * - * Returns: RAMBlock (or NULL if not found) - * - * By the time this function returns, the returned pointer is not protected - * by RCU anymore. If the caller is not within an RCU critical section and - * does not hold the iothread lock, it must have other means of protecting the - * pointer, such as a reference to the region that includes the incoming - * ram_addr_t. - */ RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset, ram_addr_t *offset) { diff --git a/system/runstate.c b/system/runstate.c index 1652ed0439..ea9d6c2a32 100644 --- a/system/runstate.c +++ b/system/runstate.c @@ -385,6 +385,7 @@ void vm_state_notify(bool running, RunState state) static ShutdownCause reset_requested; static ShutdownCause shutdown_requested; +static int shutdown_exit_code = EXIT_SUCCESS; static int shutdown_signal; static pid_t shutdown_pid; static int powerdown_requested; @@ -664,6 +665,13 @@ void qemu_system_killed(int signal, pid_t pid) qemu_notify_event(); } +void qemu_system_shutdown_request_with_code(ShutdownCause reason, + int exit_code) +{ + shutdown_exit_code = exit_code; + qemu_system_shutdown_request(reason); +} + void qemu_system_shutdown_request(ShutdownCause reason) { trace_qemu_system_shutdown_request(reason); @@ -725,7 +733,9 @@ static bool main_loop_should_exit(int *status) if (shutdown_action == SHUTDOWN_ACTION_PAUSE) { vm_stop(RUN_STATE_SHUTDOWN); } else { - if (request == SHUTDOWN_CAUSE_GUEST_PANIC && + if (shutdown_exit_code != EXIT_SUCCESS) { + *status = shutdown_exit_code; + } else if (request == SHUTDOWN_CAUSE_GUEST_PANIC && panic_action == PANIC_ACTION_EXIT_FAILURE) { *status = EXIT_FAILURE; } @@ -824,9 +834,9 @@ void qemu_init_subsystems(void) } -void qemu_cleanup(void) +void qemu_cleanup(int status) { - gdb_exit(0); + gdb_exit(status); /* * cleaning up the migration object cancels any existing migration diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c index 2b92aee207..eb29a1fd4e 100644 --- a/target/i386/tcg/seg_helper.c +++ b/target/i386/tcg/seg_helper.c @@ -229,7 +229,7 @@ static void tss_load_seg(CPUX86State *env, X86Seg seg_reg, int selector, static void tss_set_busy(CPUX86State *env, int tss_selector, bool value, uintptr_t retaddr) { - target_ulong ptr = env->gdt.base + (env->tr.selector & ~7); + target_ulong ptr = env->gdt.base + (tss_selector & ~7); uint32_t e2 = cpu_ldl_kernel_ra(env, ptr + 4, retaddr); if (value) { diff --git a/target/riscv/cpu-qom.h b/target/riscv/cpu-qom.h index 04af50983e..f3fbe37a2c 100644 --- a/target/riscv/cpu-qom.h +++ b/target/riscv/cpu-qom.h @@ -30,6 +30,7 @@ #define CPU_RESOLVING_TYPE TYPE_RISCV_CPU #define TYPE_RISCV_CPU_ANY RISCV_CPU_TYPE_NAME("any") +#define TYPE_RISCV_CPU_MAX RISCV_CPU_TYPE_NAME("max") #define TYPE_RISCV_CPU_BASE32 RISCV_CPU_TYPE_NAME("rv32") #define TYPE_RISCV_CPU_BASE64 RISCV_CPU_TYPE_NAME("rv64") #define TYPE_RISCV_CPU_BASE128 RISCV_CPU_TYPE_NAME("x-rv128") diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index f5572704de..ac4a6c7eec 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -23,9 +23,7 @@ #include "qemu/log.h" #include "cpu.h" #include "cpu_vendorid.h" -#include "pmu.h" #include "internals.h" -#include "time_helper.h" #include "exec/exec-all.h" #include "qapi/error.h" #include "qapi/visitor.h" @@ -35,20 +33,13 @@ #include "fpu/softfloat-helpers.h" #include "sysemu/kvm.h" #include "sysemu/tcg.h" -#include "kvm_riscv.h" +#include "kvm/kvm_riscv.h" #include "tcg/tcg.h" /* RISC-V CPU definitions */ static const char riscv_single_letter_exts[] = "IEMAFDQCPVH"; - -struct isa_ext_data { - const char *name; - int min_version; - int ext_enable_offset; -}; - -#define ISA_EXT_DATA_ENTRY(_name, _min_ver, _prop) \ - {#_name, _min_ver, offsetof(struct RISCVCPUConfig, _prop)} +const uint32_t misa_bits[] = {RVI, RVE, RVM, RVA, RVF, RVD, RVV, + RVC, RVS, RVU, RVH, RVJ, RVG, 0}; /* * From vector_helper.c @@ -61,6 +52,9 @@ struct isa_ext_data { #define BYTE(x) (x) #endif +#define ISA_EXT_DATA_ENTRY(_name, _min_ver, _prop) \ + {#_name, _min_ver, CPU_CFG_OFFSET(_prop)} + /* * Here are the ordering rules of extension naming defined by RISC-V * specification : @@ -81,7 +75,7 @@ struct isa_ext_data { * Single letter extensions are checked in riscv_cpu_validate_misa_priv() * instead. */ -static const struct isa_ext_data isa_edata_arr[] = { +const RISCVIsaExtData isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(zicbom, PRIV_VERSION_1_12_0, ext_icbom), ISA_EXT_DATA_ENTRY(zicboz, PRIV_VERSION_1_12_0, ext_icboz), ISA_EXT_DATA_ENTRY(zicond, PRIV_VERSION_1_12_0, ext_zicond), @@ -160,20 +154,20 @@ static const struct isa_ext_data isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(xtheadmempair, PRIV_VERSION_1_11_0, ext_xtheadmempair), ISA_EXT_DATA_ENTRY(xtheadsync, PRIV_VERSION_1_11_0, ext_xtheadsync), ISA_EXT_DATA_ENTRY(xventanacondops, PRIV_VERSION_1_12_0, ext_XVentanaCondOps), + + DEFINE_PROP_END_OF_LIST(), }; -static bool isa_ext_is_enabled(RISCVCPU *cpu, - const struct isa_ext_data *edata) +bool isa_ext_is_enabled(RISCVCPU *cpu, uint32_t ext_offset) { - bool *ext_enabled = (void *)&cpu->cfg + edata->ext_enable_offset; + bool *ext_enabled = (void *)&cpu->cfg + ext_offset; return *ext_enabled; } -static void isa_ext_update_enabled(RISCVCPU *cpu, - const struct isa_ext_data *edata, bool en) +void isa_ext_update_enabled(RISCVCPU *cpu, uint32_t ext_offset, bool en) { - bool *ext_enabled = (void *)&cpu->cfg + edata->ext_enable_offset; + bool *ext_enabled = (void *)&cpu->cfg + ext_offset; *ext_enabled = en; } @@ -258,8 +252,6 @@ static const char * const riscv_intr_names[] = { "reserved" }; -static void riscv_cpu_add_user_properties(Object *obj); - const char *riscv_cpu_get_trap_name(target_ulong cause, bool async) { if (async) { @@ -271,7 +263,7 @@ const char *riscv_cpu_get_trap_name(target_ulong cause, bool async) } } -static void set_misa(CPURISCVState *env, RISCVMXL mxl, uint32_t ext) +void riscv_cpu_set_misa(CPURISCVState *env, RISCVMXL mxl, uint32_t ext) { env->misa_mxl_max = env->misa_mxl = mxl; env->misa_ext_mask = env->misa_ext = ext; @@ -376,9 +368,9 @@ static void riscv_any_cpu_init(Object *obj) RISCVCPU *cpu = RISCV_CPU(obj); CPURISCVState *env = &cpu->env; #if defined(TARGET_RISCV32) - set_misa(env, MXL_RV32, RVI | RVM | RVA | RVF | RVD | RVC | RVU); + riscv_cpu_set_misa(env, MXL_RV32, RVI | RVM | RVA | RVF | RVD | RVC | RVU); #elif defined(TARGET_RISCV64) - set_misa(env, MXL_RV64, RVI | RVM | RVA | RVF | RVD | RVC | RVU); + riscv_cpu_set_misa(env, MXL_RV64, RVI | RVM | RVA | RVF | RVD | RVC | RVU); #endif #ifndef CONFIG_USER_ONLY @@ -396,13 +388,29 @@ static void riscv_any_cpu_init(Object *obj) cpu->cfg.pmp = true; } +static void riscv_max_cpu_init(Object *obj) +{ + RISCVCPU *cpu = RISCV_CPU(obj); + CPURISCVState *env = &cpu->env; + RISCVMXL mlx = MXL_RV64; + +#ifdef TARGET_RISCV32 + mlx = MXL_RV32; +#endif + riscv_cpu_set_misa(env, mlx, 0); + env->priv_ver = PRIV_VERSION_LATEST; +#ifndef CONFIG_USER_ONLY + set_satp_mode_max_supported(RISCV_CPU(obj), mlx == MXL_RV32 ? + VM_1_10_SV32 : VM_1_10_SV57); +#endif +} + #if defined(TARGET_RISCV64) static void rv64_base_cpu_init(Object *obj) { CPURISCVState *env = &RISCV_CPU(obj)->env; /* We set this in the realise function */ - set_misa(env, MXL_RV64, 0); - riscv_cpu_add_user_properties(obj); + riscv_cpu_set_misa(env, MXL_RV64, 0); /* Set latest version of privileged specification */ env->priv_ver = PRIV_VERSION_LATEST; #ifndef CONFIG_USER_ONLY @@ -414,7 +422,8 @@ static void rv64_sifive_u_cpu_init(Object *obj) { RISCVCPU *cpu = RISCV_CPU(obj); CPURISCVState *env = &cpu->env; - set_misa(env, MXL_RV64, RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU); + riscv_cpu_set_misa(env, MXL_RV64, + RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU); env->priv_ver = PRIV_VERSION_1_10_0; #ifndef CONFIG_USER_ONLY set_satp_mode_max_supported(RISCV_CPU(obj), VM_1_10_SV39); @@ -432,7 +441,7 @@ static void rv64_sifive_e_cpu_init(Object *obj) CPURISCVState *env = &RISCV_CPU(obj)->env; RISCVCPU *cpu = RISCV_CPU(obj); - set_misa(env, MXL_RV64, RVI | RVM | RVA | RVC | RVU); + riscv_cpu_set_misa(env, MXL_RV64, RVI | RVM | RVA | RVC | RVU); env->priv_ver = PRIV_VERSION_1_10_0; #ifndef CONFIG_USER_ONLY set_satp_mode_max_supported(cpu, VM_1_10_MBARE); @@ -449,7 +458,7 @@ static void rv64_thead_c906_cpu_init(Object *obj) CPURISCVState *env = &RISCV_CPU(obj)->env; RISCVCPU *cpu = RISCV_CPU(obj); - set_misa(env, MXL_RV64, RVG | RVC | RVS | RVU); + riscv_cpu_set_misa(env, MXL_RV64, RVG | RVC | RVS | RVU); env->priv_ver = PRIV_VERSION_1_11_0; cpu->cfg.ext_zfa = true; @@ -480,7 +489,7 @@ static void rv64_veyron_v1_cpu_init(Object *obj) CPURISCVState *env = &RISCV_CPU(obj)->env; RISCVCPU *cpu = RISCV_CPU(obj); - set_misa(env, MXL_RV64, RVG | RVC | RVS | RVU | RVH); + riscv_cpu_set_misa(env, MXL_RV64, RVG | RVC | RVS | RVU | RVH); env->priv_ver = PRIV_VERSION_1_12_0; /* Enable ISA extensions */ @@ -525,8 +534,7 @@ static void rv128_base_cpu_init(Object *obj) } CPURISCVState *env = &RISCV_CPU(obj)->env; /* We set this in the realise function */ - set_misa(env, MXL_RV128, 0); - riscv_cpu_add_user_properties(obj); + riscv_cpu_set_misa(env, MXL_RV128, 0); /* Set latest version of privileged specification */ env->priv_ver = PRIV_VERSION_LATEST; #ifndef CONFIG_USER_ONLY @@ -538,8 +546,7 @@ static void rv32_base_cpu_init(Object *obj) { CPURISCVState *env = &RISCV_CPU(obj)->env; /* We set this in the realise function */ - set_misa(env, MXL_RV32, 0); - riscv_cpu_add_user_properties(obj); + riscv_cpu_set_misa(env, MXL_RV32, 0); /* Set latest version of privileged specification */ env->priv_ver = PRIV_VERSION_LATEST; #ifndef CONFIG_USER_ONLY @@ -551,7 +558,8 @@ static void rv32_sifive_u_cpu_init(Object *obj) { RISCVCPU *cpu = RISCV_CPU(obj); CPURISCVState *env = &cpu->env; - set_misa(env, MXL_RV32, RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU); + riscv_cpu_set_misa(env, MXL_RV32, + RVI | RVM | RVA | RVF | RVD | RVC | RVS | RVU); env->priv_ver = PRIV_VERSION_1_10_0; #ifndef CONFIG_USER_ONLY set_satp_mode_max_supported(RISCV_CPU(obj), VM_1_10_SV32); @@ -569,7 +577,7 @@ static void rv32_sifive_e_cpu_init(Object *obj) CPURISCVState *env = &RISCV_CPU(obj)->env; RISCVCPU *cpu = RISCV_CPU(obj); - set_misa(env, MXL_RV32, RVI | RVM | RVA | RVC | RVU); + riscv_cpu_set_misa(env, MXL_RV32, RVI | RVM | RVA | RVC | RVU); env->priv_ver = PRIV_VERSION_1_10_0; #ifndef CONFIG_USER_ONLY set_satp_mode_max_supported(cpu, VM_1_10_MBARE); @@ -586,7 +594,7 @@ static void rv32_ibex_cpu_init(Object *obj) CPURISCVState *env = &RISCV_CPU(obj)->env; RISCVCPU *cpu = RISCV_CPU(obj); - set_misa(env, MXL_RV32, RVI | RVM | RVC | RVU); + riscv_cpu_set_misa(env, MXL_RV32, RVI | RVM | RVC | RVU); env->priv_ver = PRIV_VERSION_1_11_0; #ifndef CONFIG_USER_ONLY set_satp_mode_max_supported(cpu, VM_1_10_MBARE); @@ -604,7 +612,7 @@ static void rv32_imafcu_nommu_cpu_init(Object *obj) CPURISCVState *env = &RISCV_CPU(obj)->env; RISCVCPU *cpu = RISCV_CPU(obj); - set_misa(env, MXL_RV32, RVI | RVM | RVA | RVF | RVC | RVU); + riscv_cpu_set_misa(env, MXL_RV32, RVI | RVM | RVA | RVF | RVC | RVU); env->priv_ver = PRIV_VERSION_1_10_0; #ifndef CONFIG_USER_ONLY set_satp_mode_max_supported(cpu, VM_1_10_MBARE); @@ -617,19 +625,6 @@ static void rv32_imafcu_nommu_cpu_init(Object *obj) } #endif -#if defined(CONFIG_KVM) -static void riscv_host_cpu_init(Object *obj) -{ - CPURISCVState *env = &RISCV_CPU(obj)->env; -#if defined(TARGET_RISCV32) - set_misa(env, MXL_RV32, 0); -#elif defined(TARGET_RISCV64) - set_misa(env, MXL_RV64, 0); -#endif - riscv_cpu_add_user_properties(obj); -} -#endif /* CONFIG_KVM */ - static ObjectClass *riscv_cpu_class_by_name(const char *cpu_model) { ObjectClass *oc; @@ -648,6 +643,17 @@ static ObjectClass *riscv_cpu_class_by_name(const char *cpu_model) return oc; } +char *riscv_cpu_get_name(RISCVCPU *cpu) +{ + RISCVCPUClass *rcc = RISCV_CPU_GET_CLASS(cpu); + const char *typename = object_class_get_name(OBJECT_CLASS(rcc)); + + g_assert(g_str_has_suffix(typename, RISCV_CPU_TYPE_SUFFIX)); + + return g_strndup(typename, + strlen(typename) - strlen(RISCV_CPU_TYPE_SUFFIX)); +} + static void riscv_cpu_dump_state(CPUState *cs, FILE *f, int flags) { RISCVCPU *cpu = RISCV_CPU(cs); @@ -798,24 +804,6 @@ static vaddr riscv_cpu_get_pc(CPUState *cs) return env->pc; } -static void riscv_cpu_synchronize_from_tb(CPUState *cs, - const TranslationBlock *tb) -{ - if (!(tb_cflags(tb) & CF_PCREL)) { - RISCVCPU *cpu = RISCV_CPU(cs); - CPURISCVState *env = &cpu->env; - RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL); - - tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); - - if (xl == MXL_RV32) { - env->pc = (int32_t) tb->pc; - } else { - env->pc = tb->pc; - } - } -} - static bool riscv_cpu_has_work(CPUState *cs) { #ifndef CONFIG_USER_ONLY @@ -831,29 +819,6 @@ static bool riscv_cpu_has_work(CPUState *cs) #endif } -static void riscv_restore_state_to_opc(CPUState *cs, - const TranslationBlock *tb, - const uint64_t *data) -{ - RISCVCPU *cpu = RISCV_CPU(cs); - CPURISCVState *env = &cpu->env; - RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL); - target_ulong pc; - - if (tb_cflags(tb) & CF_PCREL) { - pc = (env->pc & TARGET_PAGE_MASK) | data[0]; - } else { - pc = data[0]; - } - - if (xl == MXL_RV32) { - env->pc = (int32_t)pc; - } else { - env->pc = pc; - } - env->bins = data[1]; -} - static void riscv_cpu_reset_hold(Object *obj) { #ifndef CONFIG_USER_ONLY @@ -956,390 +921,6 @@ static void riscv_cpu_disas_set_info(CPUState *s, disassemble_info *info) } } -static void riscv_cpu_validate_v(CPURISCVState *env, RISCVCPUConfig *cfg, - Error **errp) -{ - int vext_version = VEXT_VERSION_1_00_0; - - if (!is_power_of_2(cfg->vlen)) { - error_setg(errp, "Vector extension VLEN must be power of 2"); - return; - } - if (cfg->vlen > RV_VLEN_MAX || cfg->vlen < 128) { - error_setg(errp, - "Vector extension implementation only supports VLEN " - "in the range [128, %d]", RV_VLEN_MAX); - return; - } - if (!is_power_of_2(cfg->elen)) { - error_setg(errp, "Vector extension ELEN must be power of 2"); - return; - } - if (cfg->elen > 64 || cfg->elen < 8) { - error_setg(errp, - "Vector extension implementation only supports ELEN " - "in the range [8, 64]"); - return; - } - if (cfg->vext_spec) { - if (!g_strcmp0(cfg->vext_spec, "v1.0")) { - vext_version = VEXT_VERSION_1_00_0; - } else { - error_setg(errp, "Unsupported vector spec version '%s'", - cfg->vext_spec); - return; - } - } else { - qemu_log("vector version is not specified, " - "use the default value v1.0\n"); - } - env->vext_ver = vext_version; -} - -static void riscv_cpu_validate_priv_spec(RISCVCPU *cpu, Error **errp) -{ - CPURISCVState *env = &cpu->env; - int priv_version = -1; - - if (cpu->cfg.priv_spec) { - if (!g_strcmp0(cpu->cfg.priv_spec, "v1.12.0")) { - priv_version = PRIV_VERSION_1_12_0; - } else if (!g_strcmp0(cpu->cfg.priv_spec, "v1.11.0")) { - priv_version = PRIV_VERSION_1_11_0; - } else if (!g_strcmp0(cpu->cfg.priv_spec, "v1.10.0")) { - priv_version = PRIV_VERSION_1_10_0; - } else { - error_setg(errp, - "Unsupported privilege spec version '%s'", - cpu->cfg.priv_spec); - return; - } - - env->priv_ver = priv_version; - } -} - -static void riscv_cpu_disable_priv_spec_isa_exts(RISCVCPU *cpu) -{ - CPURISCVState *env = &cpu->env; - int i; - - /* Force disable extensions if priv spec version does not match */ - for (i = 0; i < ARRAY_SIZE(isa_edata_arr); i++) { - if (isa_ext_is_enabled(cpu, &isa_edata_arr[i]) && - (env->priv_ver < isa_edata_arr[i].min_version)) { - isa_ext_update_enabled(cpu, &isa_edata_arr[i], false); -#ifndef CONFIG_USER_ONLY - warn_report("disabling %s extension for hart 0x" TARGET_FMT_lx - " because privilege spec version does not match", - isa_edata_arr[i].name, env->mhartid); -#else - warn_report("disabling %s extension because " - "privilege spec version does not match", - isa_edata_arr[i].name); -#endif - } - } -} - -static void riscv_cpu_validate_misa_mxl(RISCVCPU *cpu, Error **errp) -{ - RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(cpu); - CPUClass *cc = CPU_CLASS(mcc); - CPURISCVState *env = &cpu->env; - - /* Validate that MISA_MXL is set properly. */ - switch (env->misa_mxl_max) { -#ifdef TARGET_RISCV64 - case MXL_RV64: - case MXL_RV128: - cc->gdb_core_xml_file = "riscv-64bit-cpu.xml"; - break; -#endif - case MXL_RV32: - cc->gdb_core_xml_file = "riscv-32bit-cpu.xml"; - break; - default: - g_assert_not_reached(); - } - - if (env->misa_mxl_max != env->misa_mxl) { - error_setg(errp, "misa_mxl_max must be equal to misa_mxl"); - return; - } -} - -/* - * Check consistency between chosen extensions while setting - * cpu->cfg accordingly. - */ -void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) -{ - CPURISCVState *env = &cpu->env; - Error *local_err = NULL; - - /* Do some ISA extension error checking */ - if (riscv_has_ext(env, RVG) && - !(riscv_has_ext(env, RVI) && riscv_has_ext(env, RVM) && - riscv_has_ext(env, RVA) && riscv_has_ext(env, RVF) && - riscv_has_ext(env, RVD) && - cpu->cfg.ext_icsr && cpu->cfg.ext_ifencei)) { - warn_report("Setting G will also set IMAFD_Zicsr_Zifencei"); - cpu->cfg.ext_icsr = true; - cpu->cfg.ext_ifencei = true; - - env->misa_ext |= RVI | RVM | RVA | RVF | RVD; - env->misa_ext_mask |= RVI | RVM | RVA | RVF | RVD; - } - - if (riscv_has_ext(env, RVI) && riscv_has_ext(env, RVE)) { - error_setg(errp, - "I and E extensions are incompatible"); - return; - } - - if (!riscv_has_ext(env, RVI) && !riscv_has_ext(env, RVE)) { - error_setg(errp, - "Either I or E extension must be set"); - return; - } - - if (riscv_has_ext(env, RVS) && !riscv_has_ext(env, RVU)) { - error_setg(errp, - "Setting S extension without U extension is illegal"); - return; - } - - if (riscv_has_ext(env, RVH) && !riscv_has_ext(env, RVI)) { - error_setg(errp, - "H depends on an I base integer ISA with 32 x registers"); - return; - } - - if (riscv_has_ext(env, RVH) && !riscv_has_ext(env, RVS)) { - error_setg(errp, "H extension implicitly requires S-mode"); - return; - } - - if (riscv_has_ext(env, RVF) && !cpu->cfg.ext_icsr) { - error_setg(errp, "F extension requires Zicsr"); - return; - } - - if ((cpu->cfg.ext_zawrs) && !riscv_has_ext(env, RVA)) { - error_setg(errp, "Zawrs extension requires A extension"); - return; - } - - if (cpu->cfg.ext_zfa && !riscv_has_ext(env, RVF)) { - error_setg(errp, "Zfa extension requires F extension"); - return; - } - - if (cpu->cfg.ext_zfh) { - cpu->cfg.ext_zfhmin = true; - } - - if (cpu->cfg.ext_zfhmin && !riscv_has_ext(env, RVF)) { - error_setg(errp, "Zfh/Zfhmin extensions require F extension"); - return; - } - - if (cpu->cfg.ext_zfbfmin && !riscv_has_ext(env, RVF)) { - error_setg(errp, "Zfbfmin extension depends on F extension"); - return; - } - - if (riscv_has_ext(env, RVD) && !riscv_has_ext(env, RVF)) { - error_setg(errp, "D extension requires F extension"); - return; - } - - if (riscv_has_ext(env, RVV)) { - riscv_cpu_validate_v(env, &cpu->cfg, &local_err); - if (local_err != NULL) { - error_propagate(errp, local_err); - return; - } - - /* The V vector extension depends on the Zve64d extension */ - cpu->cfg.ext_zve64d = true; - } - - /* The Zve64d extension depends on the Zve64f extension */ - if (cpu->cfg.ext_zve64d) { - cpu->cfg.ext_zve64f = true; - } - - /* The Zve64f extension depends on the Zve32f extension */ - if (cpu->cfg.ext_zve64f) { - cpu->cfg.ext_zve32f = true; - } - - if (cpu->cfg.ext_zve64d && !riscv_has_ext(env, RVD)) { - error_setg(errp, "Zve64d/V extensions require D extension"); - return; - } - - if (cpu->cfg.ext_zve32f && !riscv_has_ext(env, RVF)) { - error_setg(errp, "Zve32f/Zve64f extensions require F extension"); - return; - } - - if (cpu->cfg.ext_zvfh) { - cpu->cfg.ext_zvfhmin = true; - } - - if (cpu->cfg.ext_zvfhmin && !cpu->cfg.ext_zve32f) { - error_setg(errp, "Zvfh/Zvfhmin extensions require Zve32f extension"); - return; - } - - if (cpu->cfg.ext_zvfh && !cpu->cfg.ext_zfhmin) { - error_setg(errp, "Zvfh extensions requires Zfhmin extension"); - return; - } - - if (cpu->cfg.ext_zvfbfmin && !cpu->cfg.ext_zfbfmin) { - error_setg(errp, "Zvfbfmin extension depends on Zfbfmin extension"); - return; - } - - if (cpu->cfg.ext_zvfbfmin && !cpu->cfg.ext_zve32f) { - error_setg(errp, "Zvfbfmin extension depends on Zve32f extension"); - return; - } - - if (cpu->cfg.ext_zvfbfwma && !cpu->cfg.ext_zvfbfmin) { - error_setg(errp, "Zvfbfwma extension depends on Zvfbfmin extension"); - return; - } - - /* Set the ISA extensions, checks should have happened above */ - if (cpu->cfg.ext_zhinx) { - cpu->cfg.ext_zhinxmin = true; - } - - if ((cpu->cfg.ext_zdinx || cpu->cfg.ext_zhinxmin) && !cpu->cfg.ext_zfinx) { - error_setg(errp, "Zdinx/Zhinx/Zhinxmin extensions require Zfinx"); - return; - } - - if (cpu->cfg.ext_zfinx) { - if (!cpu->cfg.ext_icsr) { - error_setg(errp, "Zfinx extension requires Zicsr"); - return; - } - if (riscv_has_ext(env, RVF)) { - error_setg(errp, - "Zfinx cannot be supported together with F extension"); - return; - } - } - - if (cpu->cfg.ext_zce) { - cpu->cfg.ext_zca = true; - cpu->cfg.ext_zcb = true; - cpu->cfg.ext_zcmp = true; - cpu->cfg.ext_zcmt = true; - if (riscv_has_ext(env, RVF) && env->misa_mxl_max == MXL_RV32) { - cpu->cfg.ext_zcf = true; - } - } - - /* zca, zcd and zcf has a PRIV 1.12.0 restriction */ - if (riscv_has_ext(env, RVC) && env->priv_ver >= PRIV_VERSION_1_12_0) { - cpu->cfg.ext_zca = true; - if (riscv_has_ext(env, RVF) && env->misa_mxl_max == MXL_RV32) { - cpu->cfg.ext_zcf = true; - } - if (riscv_has_ext(env, RVD)) { - cpu->cfg.ext_zcd = true; - } - } - - if (env->misa_mxl_max != MXL_RV32 && cpu->cfg.ext_zcf) { - error_setg(errp, "Zcf extension is only relevant to RV32"); - return; - } - - if (!riscv_has_ext(env, RVF) && cpu->cfg.ext_zcf) { - error_setg(errp, "Zcf extension requires F extension"); - return; - } - - if (!riscv_has_ext(env, RVD) && cpu->cfg.ext_zcd) { - error_setg(errp, "Zcd extension requires D extension"); - return; - } - - if ((cpu->cfg.ext_zcf || cpu->cfg.ext_zcd || cpu->cfg.ext_zcb || - cpu->cfg.ext_zcmp || cpu->cfg.ext_zcmt) && !cpu->cfg.ext_zca) { - error_setg(errp, "Zcf/Zcd/Zcb/Zcmp/Zcmt extensions require Zca " - "extension"); - return; - } - - if (cpu->cfg.ext_zcd && (cpu->cfg.ext_zcmp || cpu->cfg.ext_zcmt)) { - error_setg(errp, "Zcmp/Zcmt extensions are incompatible with " - "Zcd extension"); - return; - } - - if (cpu->cfg.ext_zcmt && !cpu->cfg.ext_icsr) { - error_setg(errp, "Zcmt extension requires Zicsr extension"); - return; - } - - /* - * In principle Zve*x would also suffice here, were they supported - * in qemu - */ - if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkg || cpu->cfg.ext_zvkned || - cpu->cfg.ext_zvknha || cpu->cfg.ext_zvksed || cpu->cfg.ext_zvksh) && - !cpu->cfg.ext_zve32f) { - error_setg(errp, - "Vector crypto extensions require V or Zve* extensions"); - return; - } - - if ((cpu->cfg.ext_zvbc || cpu->cfg.ext_zvknhb) && !cpu->cfg.ext_zve64f) { - error_setg( - errp, - "Zvbc and Zvknhb extensions require V or Zve64{f,d} extensions"); - return; - } - - if (cpu->cfg.ext_zk) { - cpu->cfg.ext_zkn = true; - cpu->cfg.ext_zkr = true; - cpu->cfg.ext_zkt = true; - } - - if (cpu->cfg.ext_zkn) { - cpu->cfg.ext_zbkb = true; - cpu->cfg.ext_zbkc = true; - cpu->cfg.ext_zbkx = true; - cpu->cfg.ext_zkne = true; - cpu->cfg.ext_zknd = true; - cpu->cfg.ext_zknh = true; - } - - if (cpu->cfg.ext_zks) { - cpu->cfg.ext_zbkb = true; - cpu->cfg.ext_zbkc = true; - cpu->cfg.ext_zbkx = true; - cpu->cfg.ext_zksed = true; - cpu->cfg.ext_zksh = true; - } - - /* - * Disable isa extensions based on priv spec after we - * validated and set everything we need. - */ - riscv_cpu_disable_priv_spec_isa_exts(cpu); -} - #ifndef CONFIG_USER_ONLY static void riscv_cpu_satp_mode_finalize(RISCVCPU *cpu, Error **errp) { @@ -1428,74 +1009,6 @@ static void riscv_cpu_finalize_features(RISCVCPU *cpu, Error **errp) #endif } -static void riscv_cpu_validate_misa_priv(CPURISCVState *env, Error **errp) -{ - if (riscv_has_ext(env, RVH) && env->priv_ver < PRIV_VERSION_1_12_0) { - error_setg(errp, "H extension requires priv spec 1.12.0"); - return; - } -} - -static void riscv_cpu_realize_tcg(DeviceState *dev, Error **errp) -{ - RISCVCPU *cpu = RISCV_CPU(dev); - CPURISCVState *env = &cpu->env; - Error *local_err = NULL; - - if (object_dynamic_cast(OBJECT(dev), TYPE_RISCV_CPU_HOST)) { - error_setg(errp, "'host' CPU is not compatible with TCG acceleration"); - return; - } - - riscv_cpu_validate_misa_mxl(cpu, &local_err); - if (local_err != NULL) { - error_propagate(errp, local_err); - return; - } - - riscv_cpu_validate_priv_spec(cpu, &local_err); - if (local_err != NULL) { - error_propagate(errp, local_err); - return; - } - - riscv_cpu_validate_misa_priv(env, &local_err); - if (local_err != NULL) { - error_propagate(errp, local_err); - return; - } - - if (cpu->cfg.epmp && !cpu->cfg.pmp) { - /* - * Enhanced PMP should only be available - * on harts with PMP support - */ - error_setg(errp, "Invalid configuration: EPMP requires PMP support"); - return; - } - - riscv_cpu_validate_set_extensions(cpu, &local_err); - if (local_err != NULL) { - error_propagate(errp, local_err); - return; - } - -#ifndef CONFIG_USER_ONLY - CPU(dev)->tcg_cflags |= CF_PCREL; - - if (cpu->cfg.ext_sstc) { - riscv_timer_init(cpu); - } - - if (cpu->cfg.pmu_num) { - if (!riscv_pmu_init(cpu, cpu->cfg.pmu_num) && cpu->cfg.ext_sscofpmf) { - cpu->pmu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, - riscv_pmu_timer_cb, cpu); - } - } -#endif -} - static void riscv_cpu_realize(DeviceState *dev, Error **errp) { CPUState *cs = CPU(dev); @@ -1503,20 +1016,17 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp) RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(dev); Error *local_err = NULL; + if (object_dynamic_cast(OBJECT(dev), TYPE_RISCV_CPU_ANY) != NULL) { + warn_report("The 'any' CPU is deprecated and will be " + "removed in the future."); + } + cpu_exec_realizefn(cs, &local_err); if (local_err != NULL) { error_propagate(errp, local_err); return; } - if (tcg_enabled()) { - riscv_cpu_realize_tcg(dev, &local_err); - if (local_err != NULL) { - error_propagate(errp, local_err); - return; - } - } - riscv_cpu_finalize_features(cpu, &local_err); if (local_err != NULL) { error_propagate(errp, local_err); @@ -1565,7 +1075,7 @@ static void cpu_riscv_set_satp(Object *obj, Visitor *v, const char *name, satp_map->init |= 1 << satp; } -static void riscv_add_satp_mode_properties(Object *obj) +void riscv_add_satp_mode_properties(Object *obj) { RISCVCPU *cpu = RISCV_CPU(obj); @@ -1647,55 +1157,22 @@ static void riscv_cpu_set_irq(void *opaque, int irq, int level) } #endif /* CONFIG_USER_ONLY */ -static void riscv_cpu_init(Object *obj) +static bool riscv_cpu_is_dynamic(Object *cpu_obj) { -#ifndef CONFIG_USER_ONLY - qdev_init_gpio_in(DEVICE(obj), riscv_cpu_set_irq, - IRQ_LOCAL_MAX + IRQ_LOCAL_GUEST_MAX); -#endif /* CONFIG_USER_ONLY */ + return object_dynamic_cast(cpu_obj, TYPE_RISCV_DYNAMIC_CPU) != NULL; } -typedef struct RISCVCPUMisaExtConfig { - const char *name; - const char *description; - target_ulong misa_bit; - bool enabled; -} RISCVCPUMisaExtConfig; - -static void cpu_set_misa_ext_cfg(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) +static void riscv_cpu_post_init(Object *obj) { - const RISCVCPUMisaExtConfig *misa_ext_cfg = opaque; - target_ulong misa_bit = misa_ext_cfg->misa_bit; - RISCVCPU *cpu = RISCV_CPU(obj); - CPURISCVState *env = &cpu->env; - bool value; - - if (!visit_type_bool(v, name, &value, errp)) { - return; - } - - if (value) { - env->misa_ext |= misa_bit; - env->misa_ext_mask |= misa_bit; - } else { - env->misa_ext &= ~misa_bit; - env->misa_ext_mask &= ~misa_bit; - } + accel_cpu_instance_init(CPU(obj)); } -static void cpu_get_misa_ext_cfg(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) +static void riscv_cpu_init(Object *obj) { - const RISCVCPUMisaExtConfig *misa_ext_cfg = opaque; - target_ulong misa_bit = misa_ext_cfg->misa_bit; - RISCVCPU *cpu = RISCV_CPU(obj); - CPURISCVState *env = &cpu->env; - bool value; - - value = env->misa_ext & misa_bit; - - visit_type_bool(v, name, &value, errp); +#ifndef CONFIG_USER_ONLY + qdev_init_gpio_in(DEVICE(obj), riscv_cpu_set_irq, + IRQ_LOCAL_MAX + IRQ_LOCAL_GUEST_MAX); +#endif /* CONFIG_USER_ONLY */ } typedef struct misa_ext_info { @@ -1758,231 +1235,150 @@ const char *riscv_get_misa_ext_description(uint32_t bit) return val; } -#define MISA_CFG(_bit, _enabled) \ - {.misa_bit = _bit, .enabled = _enabled} - -static RISCVCPUMisaExtConfig misa_ext_cfgs[] = { - MISA_CFG(RVA, true), - MISA_CFG(RVC, true), - MISA_CFG(RVD, true), - MISA_CFG(RVF, true), - MISA_CFG(RVI, true), - MISA_CFG(RVE, false), - MISA_CFG(RVM, true), - MISA_CFG(RVS, true), - MISA_CFG(RVU, true), - MISA_CFG(RVH, true), - MISA_CFG(RVJ, false), - MISA_CFG(RVV, false), - MISA_CFG(RVG, false), -}; - -static void riscv_cpu_add_misa_properties(Object *cpu_obj) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(misa_ext_cfgs); i++) { - RISCVCPUMisaExtConfig *misa_cfg = &misa_ext_cfgs[i]; - int bit = misa_cfg->misa_bit; +#define MULTI_EXT_CFG_BOOL(_name, _prop, _defval) \ + {.name = _name, .offset = CPU_CFG_OFFSET(_prop), \ + .enabled = _defval} - misa_cfg->name = riscv_get_misa_ext_name(bit); - misa_cfg->description = riscv_get_misa_ext_description(bit); - - /* Check if KVM already created the property */ - if (object_property_find(cpu_obj, misa_cfg->name)) { - continue; - } - - object_property_add(cpu_obj, misa_cfg->name, "bool", - cpu_get_misa_ext_cfg, - cpu_set_misa_ext_cfg, - NULL, (void *)misa_cfg); - object_property_set_description(cpu_obj, misa_cfg->name, - misa_cfg->description); - object_property_set_bool(cpu_obj, misa_cfg->name, - misa_cfg->enabled, NULL); - } -} - -static Property riscv_cpu_extensions[] = { +const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = { /* Defaults for standard extensions */ - DEFINE_PROP_UINT8("pmu-num", RISCVCPU, cfg.pmu_num, 16), - DEFINE_PROP_BOOL("sscofpmf", RISCVCPU, cfg.ext_sscofpmf, false), - DEFINE_PROP_BOOL("Zifencei", RISCVCPU, cfg.ext_ifencei, true), - DEFINE_PROP_BOOL("Zicsr", RISCVCPU, cfg.ext_icsr, true), - DEFINE_PROP_BOOL("Zihintntl", RISCVCPU, cfg.ext_zihintntl, true), - DEFINE_PROP_BOOL("Zihintpause", RISCVCPU, cfg.ext_zihintpause, true), - DEFINE_PROP_BOOL("Zawrs", RISCVCPU, cfg.ext_zawrs, true), - DEFINE_PROP_BOOL("Zfa", RISCVCPU, cfg.ext_zfa, true), - DEFINE_PROP_BOOL("Zfh", RISCVCPU, cfg.ext_zfh, false), - DEFINE_PROP_BOOL("Zfhmin", RISCVCPU, cfg.ext_zfhmin, false), - DEFINE_PROP_BOOL("Zve32f", RISCVCPU, cfg.ext_zve32f, false), - DEFINE_PROP_BOOL("Zve64f", RISCVCPU, cfg.ext_zve64f, false), - DEFINE_PROP_BOOL("Zve64d", RISCVCPU, cfg.ext_zve64d, false), - DEFINE_PROP_BOOL("mmu", RISCVCPU, cfg.mmu, true), - DEFINE_PROP_BOOL("pmp", RISCVCPU, cfg.pmp, true), - DEFINE_PROP_BOOL("sstc", RISCVCPU, cfg.ext_sstc, true), + MULTI_EXT_CFG_BOOL("sscofpmf", ext_sscofpmf, false), + MULTI_EXT_CFG_BOOL("zifencei", ext_ifencei, true), + MULTI_EXT_CFG_BOOL("zicsr", ext_icsr, true), + MULTI_EXT_CFG_BOOL("zihintntl", ext_zihintntl, true), + MULTI_EXT_CFG_BOOL("zihintpause", ext_zihintpause, true), + MULTI_EXT_CFG_BOOL("zawrs", ext_zawrs, true), + MULTI_EXT_CFG_BOOL("zfa", ext_zfa, true), + MULTI_EXT_CFG_BOOL("zfh", ext_zfh, false), + MULTI_EXT_CFG_BOOL("zfhmin", ext_zfhmin, false), + MULTI_EXT_CFG_BOOL("zve32f", ext_zve32f, false), + MULTI_EXT_CFG_BOOL("zve64f", ext_zve64f, false), + MULTI_EXT_CFG_BOOL("zve64d", ext_zve64d, false), + MULTI_EXT_CFG_BOOL("sstc", ext_sstc, true), + + MULTI_EXT_CFG_BOOL("smstateen", ext_smstateen, false), + MULTI_EXT_CFG_BOOL("svadu", ext_svadu, true), + MULTI_EXT_CFG_BOOL("svinval", ext_svinval, false), + MULTI_EXT_CFG_BOOL("svnapot", ext_svnapot, false), + MULTI_EXT_CFG_BOOL("svpbmt", ext_svpbmt, false), + + MULTI_EXT_CFG_BOOL("zba", ext_zba, true), + MULTI_EXT_CFG_BOOL("zbb", ext_zbb, true), + MULTI_EXT_CFG_BOOL("zbc", ext_zbc, true), + MULTI_EXT_CFG_BOOL("zbkb", ext_zbkb, false), + MULTI_EXT_CFG_BOOL("zbkc", ext_zbkc, false), + MULTI_EXT_CFG_BOOL("zbkx", ext_zbkx, false), + MULTI_EXT_CFG_BOOL("zbs", ext_zbs, true), + MULTI_EXT_CFG_BOOL("zk", ext_zk, false), + MULTI_EXT_CFG_BOOL("zkn", ext_zkn, false), + MULTI_EXT_CFG_BOOL("zknd", ext_zknd, false), + MULTI_EXT_CFG_BOOL("zkne", ext_zkne, false), + MULTI_EXT_CFG_BOOL("zknh", ext_zknh, false), + MULTI_EXT_CFG_BOOL("zkr", ext_zkr, false), + MULTI_EXT_CFG_BOOL("zks", ext_zks, false), + MULTI_EXT_CFG_BOOL("zksed", ext_zksed, false), + MULTI_EXT_CFG_BOOL("zksh", ext_zksh, false), + MULTI_EXT_CFG_BOOL("zkt", ext_zkt, false), + + MULTI_EXT_CFG_BOOL("zdinx", ext_zdinx, false), + MULTI_EXT_CFG_BOOL("zfinx", ext_zfinx, false), + MULTI_EXT_CFG_BOOL("zhinx", ext_zhinx, false), + MULTI_EXT_CFG_BOOL("zhinxmin", ext_zhinxmin, false), + + MULTI_EXT_CFG_BOOL("zicbom", ext_icbom, true), + MULTI_EXT_CFG_BOOL("zicboz", ext_icboz, true), + + MULTI_EXT_CFG_BOOL("zmmul", ext_zmmul, false), + + MULTI_EXT_CFG_BOOL("zca", ext_zca, false), + MULTI_EXT_CFG_BOOL("zcb", ext_zcb, false), + MULTI_EXT_CFG_BOOL("zcd", ext_zcd, false), + MULTI_EXT_CFG_BOOL("zce", ext_zce, false), + MULTI_EXT_CFG_BOOL("zcf", ext_zcf, false), + MULTI_EXT_CFG_BOOL("zcmp", ext_zcmp, false), + MULTI_EXT_CFG_BOOL("zcmt", ext_zcmt, false), + MULTI_EXT_CFG_BOOL("zicond", ext_zicond, false), - DEFINE_PROP_STRING("priv_spec", RISCVCPU, cfg.priv_spec), - DEFINE_PROP_STRING("vext_spec", RISCVCPU, cfg.vext_spec), - DEFINE_PROP_UINT16("vlen", RISCVCPU, cfg.vlen, 128), - DEFINE_PROP_UINT16("elen", RISCVCPU, cfg.elen, 64), + DEFINE_PROP_END_OF_LIST(), +}; - DEFINE_PROP_BOOL("smstateen", RISCVCPU, cfg.ext_smstateen, false), - DEFINE_PROP_BOOL("svadu", RISCVCPU, cfg.ext_svadu, true), - DEFINE_PROP_BOOL("svinval", RISCVCPU, cfg.ext_svinval, false), - DEFINE_PROP_BOOL("svnapot", RISCVCPU, cfg.ext_svnapot, false), - DEFINE_PROP_BOOL("svpbmt", RISCVCPU, cfg.ext_svpbmt, false), - - DEFINE_PROP_BOOL("zba", RISCVCPU, cfg.ext_zba, true), - DEFINE_PROP_BOOL("zbb", RISCVCPU, cfg.ext_zbb, true), - DEFINE_PROP_BOOL("zbc", RISCVCPU, cfg.ext_zbc, true), - DEFINE_PROP_BOOL("zbkb", RISCVCPU, cfg.ext_zbkb, false), - DEFINE_PROP_BOOL("zbkc", RISCVCPU, cfg.ext_zbkc, false), - DEFINE_PROP_BOOL("zbkx", RISCVCPU, cfg.ext_zbkx, false), - DEFINE_PROP_BOOL("zbs", RISCVCPU, cfg.ext_zbs, true), - DEFINE_PROP_BOOL("zk", RISCVCPU, cfg.ext_zk, false), - DEFINE_PROP_BOOL("zkn", RISCVCPU, cfg.ext_zkn, false), - DEFINE_PROP_BOOL("zknd", RISCVCPU, cfg.ext_zknd, false), - DEFINE_PROP_BOOL("zkne", RISCVCPU, cfg.ext_zkne, false), - DEFINE_PROP_BOOL("zknh", RISCVCPU, cfg.ext_zknh, false), - DEFINE_PROP_BOOL("zkr", RISCVCPU, cfg.ext_zkr, false), - DEFINE_PROP_BOOL("zks", RISCVCPU, cfg.ext_zks, false), - DEFINE_PROP_BOOL("zksed", RISCVCPU, cfg.ext_zksed, false), - DEFINE_PROP_BOOL("zksh", RISCVCPU, cfg.ext_zksh, false), - DEFINE_PROP_BOOL("zkt", RISCVCPU, cfg.ext_zkt, false), - - DEFINE_PROP_BOOL("zdinx", RISCVCPU, cfg.ext_zdinx, false), - DEFINE_PROP_BOOL("zfinx", RISCVCPU, cfg.ext_zfinx, false), - DEFINE_PROP_BOOL("zhinx", RISCVCPU, cfg.ext_zhinx, false), - DEFINE_PROP_BOOL("zhinxmin", RISCVCPU, cfg.ext_zhinxmin, false), - - DEFINE_PROP_BOOL("zicbom", RISCVCPU, cfg.ext_icbom, true), - DEFINE_PROP_UINT16("cbom_blocksize", RISCVCPU, cfg.cbom_blocksize, 64), - DEFINE_PROP_BOOL("zicboz", RISCVCPU, cfg.ext_icboz, true), - DEFINE_PROP_UINT16("cboz_blocksize", RISCVCPU, cfg.cboz_blocksize, 64), +const RISCVCPUMultiExtConfig riscv_cpu_vendor_exts[] = { + MULTI_EXT_CFG_BOOL("xtheadba", ext_xtheadba, false), + MULTI_EXT_CFG_BOOL("xtheadbb", ext_xtheadbb, false), + MULTI_EXT_CFG_BOOL("xtheadbs", ext_xtheadbs, false), + MULTI_EXT_CFG_BOOL("xtheadcmo", ext_xtheadcmo, false), + MULTI_EXT_CFG_BOOL("xtheadcondmov", ext_xtheadcondmov, false), + MULTI_EXT_CFG_BOOL("xtheadfmemidx", ext_xtheadfmemidx, false), + MULTI_EXT_CFG_BOOL("xtheadfmv", ext_xtheadfmv, false), + MULTI_EXT_CFG_BOOL("xtheadmac", ext_xtheadmac, false), + MULTI_EXT_CFG_BOOL("xtheadmemidx", ext_xtheadmemidx, false), + MULTI_EXT_CFG_BOOL("xtheadmempair", ext_xtheadmempair, false), + MULTI_EXT_CFG_BOOL("xtheadsync", ext_xtheadsync, false), + MULTI_EXT_CFG_BOOL("xventanacondops", ext_XVentanaCondOps, false), - DEFINE_PROP_BOOL("zmmul", RISCVCPU, cfg.ext_zmmul, false), - - DEFINE_PROP_BOOL("zca", RISCVCPU, cfg.ext_zca, false), - DEFINE_PROP_BOOL("zcb", RISCVCPU, cfg.ext_zcb, false), - DEFINE_PROP_BOOL("zcd", RISCVCPU, cfg.ext_zcd, false), - DEFINE_PROP_BOOL("zce", RISCVCPU, cfg.ext_zce, false), - DEFINE_PROP_BOOL("zcf", RISCVCPU, cfg.ext_zcf, false), - DEFINE_PROP_BOOL("zcmp", RISCVCPU, cfg.ext_zcmp, false), - DEFINE_PROP_BOOL("zcmt", RISCVCPU, cfg.ext_zcmt, false), - DEFINE_PROP_BOOL("zicond", RISCVCPU, cfg.ext_zicond, false), - - /* Vendor-specific custom extensions */ - DEFINE_PROP_BOOL("xtheadba", RISCVCPU, cfg.ext_xtheadba, false), - DEFINE_PROP_BOOL("xtheadbb", RISCVCPU, cfg.ext_xtheadbb, false), - DEFINE_PROP_BOOL("xtheadbs", RISCVCPU, cfg.ext_xtheadbs, false), - DEFINE_PROP_BOOL("xtheadcmo", RISCVCPU, cfg.ext_xtheadcmo, false), - DEFINE_PROP_BOOL("xtheadcondmov", RISCVCPU, cfg.ext_xtheadcondmov, false), - DEFINE_PROP_BOOL("xtheadfmemidx", RISCVCPU, cfg.ext_xtheadfmemidx, false), - DEFINE_PROP_BOOL("xtheadfmv", RISCVCPU, cfg.ext_xtheadfmv, false), - DEFINE_PROP_BOOL("xtheadmac", RISCVCPU, cfg.ext_xtheadmac, false), - DEFINE_PROP_BOOL("xtheadmemidx", RISCVCPU, cfg.ext_xtheadmemidx, false), - DEFINE_PROP_BOOL("xtheadmempair", RISCVCPU, cfg.ext_xtheadmempair, false), - DEFINE_PROP_BOOL("xtheadsync", RISCVCPU, cfg.ext_xtheadsync, false), - DEFINE_PROP_BOOL("xventanacondops", RISCVCPU, cfg.ext_XVentanaCondOps, false), - - /* These are experimental so mark with 'x-' */ + DEFINE_PROP_END_OF_LIST(), +}; +/* These are experimental so mark with 'x-' */ +const RISCVCPUMultiExtConfig riscv_cpu_experimental_exts[] = { /* ePMP 0.9.3 */ - DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false), - DEFINE_PROP_BOOL("x-smaia", RISCVCPU, cfg.ext_smaia, false), - DEFINE_PROP_BOOL("x-ssaia", RISCVCPU, cfg.ext_ssaia, false), + MULTI_EXT_CFG_BOOL("x-epmp", epmp, false), + MULTI_EXT_CFG_BOOL("x-smaia", ext_smaia, false), + MULTI_EXT_CFG_BOOL("x-ssaia", ext_ssaia, false), - DEFINE_PROP_BOOL("x-zvfh", RISCVCPU, cfg.ext_zvfh, false), - DEFINE_PROP_BOOL("x-zvfhmin", RISCVCPU, cfg.ext_zvfhmin, false), + MULTI_EXT_CFG_BOOL("x-zvfh", ext_zvfh, false), + MULTI_EXT_CFG_BOOL("x-zvfhmin", ext_zvfhmin, false), - DEFINE_PROP_BOOL("x-zfbfmin", RISCVCPU, cfg.ext_zfbfmin, false), - DEFINE_PROP_BOOL("x-zvfbfmin", RISCVCPU, cfg.ext_zvfbfmin, false), - DEFINE_PROP_BOOL("x-zvfbfwma", RISCVCPU, cfg.ext_zvfbfwma, false), + MULTI_EXT_CFG_BOOL("x-zfbfmin", ext_zfbfmin, false), + MULTI_EXT_CFG_BOOL("x-zvfbfmin", ext_zvfbfmin, false), + MULTI_EXT_CFG_BOOL("x-zvfbfwma", ext_zvfbfwma, false), /* Vector cryptography extensions */ - DEFINE_PROP_BOOL("x-zvbb", RISCVCPU, cfg.ext_zvbb, false), - DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false), - DEFINE_PROP_BOOL("x-zvkg", RISCVCPU, cfg.ext_zvkg, false), - DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false), - DEFINE_PROP_BOOL("x-zvknha", RISCVCPU, cfg.ext_zvknha, false), - DEFINE_PROP_BOOL("x-zvknhb", RISCVCPU, cfg.ext_zvknhb, false), - DEFINE_PROP_BOOL("x-zvksed", RISCVCPU, cfg.ext_zvksed, false), - DEFINE_PROP_BOOL("x-zvksh", RISCVCPU, cfg.ext_zvksh, false), + MULTI_EXT_CFG_BOOL("x-zvbb", ext_zvbb, false), + MULTI_EXT_CFG_BOOL("x-zvbc", ext_zvbc, false), + MULTI_EXT_CFG_BOOL("x-zvkg", ext_zvkg, false), + MULTI_EXT_CFG_BOOL("x-zvkned", ext_zvkned, false), + MULTI_EXT_CFG_BOOL("x-zvknha", ext_zvknha, false), + MULTI_EXT_CFG_BOOL("x-zvknhb", ext_zvknhb, false), + MULTI_EXT_CFG_BOOL("x-zvksed", ext_zvksed, false), + MULTI_EXT_CFG_BOOL("x-zvksh", ext_zvksh, false), DEFINE_PROP_END_OF_LIST(), }; +/* Deprecated entries marked for future removal */ +const RISCVCPUMultiExtConfig riscv_cpu_deprecated_exts[] = { + MULTI_EXT_CFG_BOOL("Zifencei", ext_ifencei, true), + MULTI_EXT_CFG_BOOL("Zicsr", ext_icsr, true), + MULTI_EXT_CFG_BOOL("Zihintntl", ext_zihintntl, true), + MULTI_EXT_CFG_BOOL("Zihintpause", ext_zihintpause, true), + MULTI_EXT_CFG_BOOL("Zawrs", ext_zawrs, true), + MULTI_EXT_CFG_BOOL("Zfa", ext_zfa, true), + MULTI_EXT_CFG_BOOL("Zfh", ext_zfh, false), + MULTI_EXT_CFG_BOOL("Zfhmin", ext_zfhmin, false), + MULTI_EXT_CFG_BOOL("Zve32f", ext_zve32f, false), + MULTI_EXT_CFG_BOOL("Zve64f", ext_zve64f, false), + MULTI_EXT_CFG_BOOL("Zve64d", ext_zve64d, false), -#ifndef CONFIG_USER_ONLY -static void cpu_set_cfg_unavailable(Object *obj, Visitor *v, - const char *name, - void *opaque, Error **errp) -{ - const char *propname = opaque; - bool value; - - if (!visit_type_bool(v, name, &value, errp)) { - return; - } - - if (value) { - error_setg(errp, "extension %s is not available with KVM", - propname); - } -} -#endif + DEFINE_PROP_END_OF_LIST(), +}; -/* - * Add CPU properties with user-facing flags. - * - * This will overwrite existing env->misa_ext values with the - * defaults set via riscv_cpu_add_misa_properties(). - */ -static void riscv_cpu_add_user_properties(Object *obj) -{ - Property *prop; - DeviceState *dev = DEVICE(obj); +Property riscv_cpu_options[] = { + DEFINE_PROP_UINT8("pmu-num", RISCVCPU, cfg.pmu_num, 16), -#ifndef CONFIG_USER_ONLY - riscv_add_satp_mode_properties(obj); + DEFINE_PROP_BOOL("mmu", RISCVCPU, cfg.mmu, true), + DEFINE_PROP_BOOL("pmp", RISCVCPU, cfg.pmp, true), - if (kvm_enabled()) { - kvm_riscv_init_user_properties(obj); - } -#endif + DEFINE_PROP_STRING("priv_spec", RISCVCPU, cfg.priv_spec), + DEFINE_PROP_STRING("vext_spec", RISCVCPU, cfg.vext_spec), - riscv_cpu_add_misa_properties(obj); + DEFINE_PROP_UINT16("vlen", RISCVCPU, cfg.vlen, 128), + DEFINE_PROP_UINT16("elen", RISCVCPU, cfg.elen, 64), - for (prop = riscv_cpu_extensions; prop && prop->name; prop++) { -#ifndef CONFIG_USER_ONLY - if (kvm_enabled()) { - /* Check if KVM created the property already */ - if (object_property_find(obj, prop->name)) { - continue; - } + DEFINE_PROP_UINT16("cbom_blocksize", RISCVCPU, cfg.cbom_blocksize, 64), + DEFINE_PROP_UINT16("cboz_blocksize", RISCVCPU, cfg.cboz_blocksize, 64), - /* - * Set the default to disabled for every extension - * unknown to KVM and error out if the user attempts - * to enable any of them. - * - * We're giving a pass for non-bool properties since they're - * not related to the availability of extensions and can be - * safely ignored as is. - */ - if (prop->info == &qdev_prop_bool) { - object_property_add(obj, prop->name, "bool", - NULL, cpu_set_cfg_unavailable, - NULL, (void *)prop->name); - continue; - } - } -#endif - qdev_property_add_static(dev, prop); - } -} + DEFINE_PROP_END_OF_LIST(), +}; static Property riscv_cpu_properties[] = { DEFINE_PROP_BOOL("debug", RISCVCPU, cfg.debug, true), @@ -2051,30 +1447,6 @@ static const struct SysemuCPUOps riscv_sysemu_ops = { }; #endif -#include "hw/core/tcg-cpu-ops.h" - -static const struct TCGCPUOps riscv_tcg_ops = { - .initialize = riscv_translate_init, - .synchronize_from_tb = riscv_cpu_synchronize_from_tb, - .restore_state_to_opc = riscv_restore_state_to_opc, - -#ifndef CONFIG_USER_ONLY - .tlb_fill = riscv_cpu_tlb_fill, - .cpu_exec_interrupt = riscv_cpu_exec_interrupt, - .do_interrupt = riscv_cpu_do_interrupt, - .do_transaction_failed = riscv_cpu_do_transaction_failed, - .do_unaligned_access = riscv_cpu_do_unaligned_access, - .debug_excp_handler = riscv_cpu_debug_excp_handler, - .debug_check_breakpoint = riscv_cpu_debug_check_breakpoint, - .debug_check_watchpoint = riscv_cpu_debug_check_watchpoint, -#endif /* !CONFIG_USER_ONLY */ -}; - -static bool riscv_cpu_is_dynamic(Object *cpu_obj) -{ - return object_dynamic_cast(cpu_obj, TYPE_RISCV_DYNAMIC_CPU) != NULL; -} - static void cpu_set_mvendorid(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { @@ -2212,7 +1584,6 @@ static void riscv_cpu_class_init(ObjectClass *c, void *data) #endif cc->gdb_arch_name = riscv_gdb_arch_name; cc->gdb_get_dynamic_xml = riscv_gdb_get_dynamic_xml; - cc->tcg_ops = &riscv_tcg_ops; object_class_property_add(c, "mvendorid", "uint32", cpu_get_mvendorid, cpu_set_mvendorid, NULL, NULL); @@ -2229,13 +1600,13 @@ static void riscv_cpu_class_init(ObjectClass *c, void *data) static void riscv_isa_string_ext(RISCVCPU *cpu, char **isa_str, int max_str_len) { + const RISCVIsaExtData *edata; char *old = *isa_str; char *new = *isa_str; - int i; - for (i = 0; i < ARRAY_SIZE(isa_edata_arr); i++) { - if (isa_ext_is_enabled(cpu, &isa_edata_arr[i])) { - new = g_strconcat(old, "_", isa_edata_arr[i].name, NULL); + for (edata = isa_edata_arr; edata && edata->name; edata++) { + if (isa_ext_is_enabled(cpu, edata->ext_enable_offset)) { + new = g_strconcat(old, "_", edata->name, NULL); g_free(old); old = new; } @@ -2312,6 +1683,7 @@ static const TypeInfo riscv_cpu_type_infos[] = { .instance_size = sizeof(RISCVCPU), .instance_align = __alignof(RISCVCPU), .instance_init = riscv_cpu_init, + .instance_post_init = riscv_cpu_post_init, .abstract = true, .class_size = sizeof(RISCVCPUClass), .class_init = riscv_cpu_class_init, @@ -2322,9 +1694,7 @@ static const TypeInfo riscv_cpu_type_infos[] = { .abstract = true, }, DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_ANY, riscv_any_cpu_init), -#if defined(CONFIG_KVM) - DEFINE_CPU(TYPE_RISCV_CPU_HOST, riscv_host_cpu_init), -#endif + DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_MAX, riscv_max_cpu_init), #if defined(TARGET_RISCV32) DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_BASE32, rv32_base_cpu_init), DEFINE_CPU(TYPE_RISCV_CPU_IBEX, rv32_ibex_cpu_init), diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index ef9cf21c0c..f8ffa5ee38 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -22,6 +22,7 @@ #include "hw/core/cpu.h" #include "hw/registerfields.h" +#include "hw/qdev-properties.h" #include "exec/cpu-defs.h" #include "qemu/cpu-float.h" #include "qom/object.h" @@ -42,7 +43,7 @@ #define RV(x) ((target_ulong)1 << (x - 'A')) /* - * Consider updating misa_ext_info_arr[] and misa_ext_cfgs[] + * Update misa_bits[], misa_ext_info_arr[] and misa_ext_cfgs[] * when adding new MISA bits here. */ #define RVI RV('I') @@ -59,9 +60,12 @@ #define RVJ RV('J') #define RVG RV('G') +extern const uint32_t misa_bits[]; const char *riscv_get_misa_ext_name(uint32_t bit); const char *riscv_get_misa_ext_description(uint32_t bit); +#define CPU_CFG_OFFSET(_prop) offsetof(struct RISCVCPUConfig, _prop) + /* Privileged specification version */ enum { PRIV_VERSION_1_10_0 = 0, @@ -443,7 +447,6 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size, bool probe, uintptr_t retaddr); char *riscv_isa_string(RISCVCPU *cpu); void riscv_cpu_list(void); -void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp); #define cpu_list riscv_cpu_list #define cpu_mmu_index riscv_cpu_mmu_index @@ -705,6 +708,33 @@ enum riscv_pmu_event_idx { RISCV_PMU_EVENT_CACHE_ITLB_PREFETCH_MISS = 0x10021, }; +/* used by tcg/tcg-cpu.c*/ +void isa_ext_update_enabled(RISCVCPU *cpu, uint32_t ext_offset, bool en); +bool isa_ext_is_enabled(RISCVCPU *cpu, uint32_t ext_offset); +void riscv_cpu_set_misa(CPURISCVState *env, RISCVMXL mxl, uint32_t ext); + +typedef struct RISCVCPUMultiExtConfig { + const char *name; + uint32_t offset; + bool enabled; +} RISCVCPUMultiExtConfig; + +extern const RISCVCPUMultiExtConfig riscv_cpu_extensions[]; +extern const RISCVCPUMultiExtConfig riscv_cpu_vendor_exts[]; +extern const RISCVCPUMultiExtConfig riscv_cpu_experimental_exts[]; +extern const RISCVCPUMultiExtConfig riscv_cpu_deprecated_exts[]; +extern Property riscv_cpu_options[]; + +typedef struct isa_ext_data { + const char *name; + int min_version; + int ext_enable_offset; +} RISCVIsaExtData; +extern const RISCVIsaExtData isa_edata_arr[]; +char *riscv_cpu_get_name(RISCVCPU *cpu); + +void riscv_add_satp_mode_properties(Object *obj); + /* CSR function table */ extern riscv_csr_operations csr_ops[CSR_TABLE_SIZE]; diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c index 3a02079290..8c28241c18 100644 --- a/target/riscv/cpu_helper.c +++ b/target/riscv/cpu_helper.c @@ -65,8 +65,7 @@ int riscv_cpu_mmu_index(CPURISCVState *env, bool ifetch) void cpu_get_tb_cpu_state(CPURISCVState *env, vaddr *pc, uint64_t *cs_base, uint32_t *pflags) { - CPUState *cs = env_cpu(env); - RISCVCPU *cpu = RISCV_CPU(cs); + RISCVCPU *cpu = env_archcpu(env); RISCVExtStatus fs, vs; uint32_t flags = 0; diff --git a/target/riscv/csr.c b/target/riscv/csr.c index 85a31dc420..4b4ab56c40 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -21,6 +21,7 @@ #include "qemu/log.h" #include "qemu/timer.h" #include "cpu.h" +#include "tcg/tcg-cpu.h" #include "pmu.h" #include "time_helper.h" #include "exec/exec-all.h" diff --git a/target/riscv/kvm-stub.c b/target/riscv/kvm-stub.c deleted file mode 100644 index 4e8fc31a21..0000000000 --- a/target/riscv/kvm-stub.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * QEMU KVM RISC-V specific function stubs - * - * Copyright (c) 2020 Huawei Technologies Co., Ltd - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2 or later, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. - */ -#include "qemu/osdep.h" -#include "cpu.h" -#include "kvm_riscv.h" - -void kvm_riscv_reset_vcpu(RISCVCPU *cpu) -{ - abort(); -} - -void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level) -{ - abort(); -} diff --git a/target/riscv/kvm.c b/target/riscv/kvm/kvm-cpu.c index c01cfb03f4..090d617627 100644 --- a/target/riscv/kvm.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -31,6 +31,7 @@ #include "sysemu/kvm_int.h" #include "cpu.h" #include "trace.h" +#include "hw/core/accel-cpu.h" #include "hw/pci/pci.h" #include "exec/memattrs.h" #include "exec/address-spaces.h" @@ -51,6 +52,8 @@ void riscv_kvm_aplic_request(void *opaque, int irq, int level) kvm_set_irq(kvm_state, irq, !!level); } +static bool cap_has_mp_state; + static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type, uint64_t idx) { @@ -205,10 +208,8 @@ static void kvm_riscv_update_cpu_misa_ext(RISCVCPU *cpu, CPUState *cs) } } -#define CPUCFG(_prop) offsetof(struct RISCVCPUConfig, _prop) - #define KVM_EXT_CFG(_name, _prop, _reg_id) \ - {.name = _name, .offset = CPUCFG(_prop), \ + {.name = _name, .offset = CPU_CFG_OFFSET(_prop), \ .kvm_reg_id = _reg_id} static KVMCPUConfig kvm_multi_ext_cfgs[] = { @@ -285,13 +286,13 @@ static void kvm_cpu_set_multi_ext_cfg(Object *obj, Visitor *v, static KVMCPUConfig kvm_cbom_blocksize = { .name = "cbom_blocksize", - .offset = CPUCFG(cbom_blocksize), + .offset = CPU_CFG_OFFSET(cbom_blocksize), .kvm_reg_id = KVM_REG_RISCV_CONFIG_REG(zicbom_block_size) }; static KVMCPUConfig kvm_cboz_blocksize = { .name = "cboz_blocksize", - .offset = CPUCFG(cboz_blocksize), + .offset = CPU_CFG_OFFSET(cboz_blocksize), .kvm_reg_id = KVM_REG_RISCV_CONFIG_REG(zicboz_block_size) }; @@ -345,10 +346,58 @@ static void kvm_riscv_update_cpu_cfg_isa_ext(RISCVCPU *cpu, CPUState *cs) } } +static void cpu_set_cfg_unavailable(Object *obj, Visitor *v, + const char *name, + void *opaque, Error **errp) +{ + const char *propname = opaque; + bool value; + + if (!visit_type_bool(v, name, &value, errp)) { + return; + } + + if (value) { + error_setg(errp, "extension %s is not available with KVM", + propname); + } +} + +static void riscv_cpu_add_kvm_unavail_prop(Object *obj, const char *prop_name) +{ + /* Check if KVM created the property already */ + if (object_property_find(obj, prop_name)) { + return; + } + + /* + * Set the default to disabled for every extension + * unknown to KVM and error out if the user attempts + * to enable any of them. + */ + object_property_add(obj, prop_name, "bool", + NULL, cpu_set_cfg_unavailable, + NULL, (void *)prop_name); +} + +static void riscv_cpu_add_kvm_unavail_prop_array(Object *obj, + const RISCVCPUMultiExtConfig *array) +{ + const RISCVCPUMultiExtConfig *prop; + + g_assert(array); + + for (prop = array; prop && prop->name; prop++) { + riscv_cpu_add_kvm_unavail_prop(obj, prop->name); + } +} + static void kvm_riscv_add_cpu_user_properties(Object *cpu_obj) { int i; + riscv_add_satp_mode_properties(cpu_obj); + for (i = 0; i < ARRAY_SIZE(kvm_misa_ext_cfgs); i++) { KVMCPUConfig *misa_cfg = &kvm_misa_ext_cfgs[i]; int bit = misa_cfg->offset; @@ -364,6 +413,11 @@ static void kvm_riscv_add_cpu_user_properties(Object *cpu_obj) misa_cfg->description); } + for (i = 0; misa_bits[i] != 0; i++) { + const char *ext_name = riscv_get_misa_ext_name(misa_bits[i]); + riscv_cpu_add_kvm_unavail_prop(cpu_obj, ext_name); + } + for (i = 0; i < ARRAY_SIZE(kvm_multi_ext_cfgs); i++) { KVMCPUConfig *multi_cfg = &kvm_multi_ext_cfgs[i]; @@ -380,6 +434,10 @@ static void kvm_riscv_add_cpu_user_properties(Object *cpu_obj) object_property_add(cpu_obj, "cboz_blocksize", "uint16", NULL, kvm_cpu_set_cbomz_blksize, NULL, &kvm_cboz_blocksize); + + riscv_cpu_add_kvm_unavail_prop_array(cpu_obj, riscv_cpu_extensions); + riscv_cpu_add_kvm_unavail_prop_array(cpu_obj, riscv_cpu_vendor_exts); + riscv_cpu_add_kvm_unavail_prop_array(cpu_obj, riscv_cpu_experimental_exts); } static int kvm_riscv_get_regs_core(CPUState *cs) @@ -713,7 +771,8 @@ static void kvm_riscv_read_cbomz_blksize(RISCVCPU *cpu, KVMScratchCPU *kvmcpu, } } -static void kvm_riscv_init_multiext_cfg(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) +static void kvm_riscv_read_multiext_legacy(RISCVCPU *cpu, + KVMScratchCPU *kvmcpu) { CPURISCVState *env = &cpu->env; uint64_t val; @@ -734,8 +793,8 @@ static void kvm_riscv_init_multiext_cfg(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) val = false; } else { error_report("Unable to read ISA_EXT KVM register %s, " - "error %d", multi_ext_cfg->name, ret); - kvm_riscv_destroy_scratch_vcpu(kvmcpu); + "error code: %s", multi_ext_cfg->name, + strerrorname_np(errno)); exit(EXIT_FAILURE); } } else { @@ -754,7 +813,100 @@ static void kvm_riscv_init_multiext_cfg(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) } } -void kvm_riscv_init_user_properties(Object *cpu_obj) +static int uint64_cmp(const void *a, const void *b) +{ + uint64_t val1 = *(const uint64_t *)a; + uint64_t val2 = *(const uint64_t *)b; + + if (val1 < val2) { + return -1; + } + + if (val1 > val2) { + return 1; + } + + return 0; +} + +static void kvm_riscv_init_multiext_cfg(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) +{ + KVMCPUConfig *multi_ext_cfg; + struct kvm_one_reg reg; + struct kvm_reg_list rl_struct; + struct kvm_reg_list *reglist; + uint64_t val, reg_id, *reg_search; + int i, ret; + + rl_struct.n = 0; + ret = ioctl(kvmcpu->cpufd, KVM_GET_REG_LIST, &rl_struct); + + /* + * If KVM_GET_REG_LIST isn't supported we'll get errno 22 + * (EINVAL). Use read_legacy() in this case. + */ + if (errno == EINVAL) { + return kvm_riscv_read_multiext_legacy(cpu, kvmcpu); + } else if (errno != E2BIG) { + /* + * E2BIG is an expected error message for the API since we + * don't know the number of registers. The right amount will + * be written in rl_struct.n. + * + * Error out if we get any other errno. + */ + error_report("Error when accessing get-reg-list, code: %s", + strerrorname_np(errno)); + exit(EXIT_FAILURE); + } + + reglist = g_malloc(sizeof(struct kvm_reg_list) + + rl_struct.n * sizeof(uint64_t)); + reglist->n = rl_struct.n; + ret = ioctl(kvmcpu->cpufd, KVM_GET_REG_LIST, reglist); + if (ret) { + error_report("Error when reading KVM_GET_REG_LIST, code %s ", + strerrorname_np(errno)); + exit(EXIT_FAILURE); + } + + /* sort reglist to use bsearch() */ + qsort(®list->reg, reglist->n, sizeof(uint64_t), uint64_cmp); + + for (i = 0; i < ARRAY_SIZE(kvm_multi_ext_cfgs); i++) { + multi_ext_cfg = &kvm_multi_ext_cfgs[i]; + reg_id = kvm_riscv_reg_id(&cpu->env, KVM_REG_RISCV_ISA_EXT, + multi_ext_cfg->kvm_reg_id); + reg_search = bsearch(®_id, reglist->reg, reglist->n, + sizeof(uint64_t), uint64_cmp); + if (!reg_search) { + continue; + } + + reg.id = reg_id; + reg.addr = (uint64_t)&val; + ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, ®); + if (ret != 0) { + error_report("Unable to read ISA_EXT KVM register %s, " + "error code: %s", multi_ext_cfg->name, + strerrorname_np(errno)); + exit(EXIT_FAILURE); + } + + multi_ext_cfg->supported = true; + kvm_cpu_cfg_set(cpu, multi_ext_cfg, val); + } + + if (cpu->cfg.ext_icbom) { + kvm_riscv_read_cbomz_blksize(cpu, kvmcpu, &kvm_cbom_blocksize); + } + + if (cpu->cfg.ext_icboz) { + kvm_riscv_read_cbomz_blksize(cpu, kvmcpu, &kvm_cboz_blocksize); + } +} + +static void riscv_init_kvm_registers(Object *cpu_obj) { RISCVCPU *cpu = RISCV_CPU(cpu_obj); KVMScratchCPU kvmcpu; @@ -763,7 +915,6 @@ void kvm_riscv_init_user_properties(Object *cpu_obj) return; } - kvm_riscv_add_cpu_user_properties(cpu_obj); kvm_riscv_init_machine_ids(cpu, &kvmcpu); kvm_riscv_init_misa_ext_mask(cpu, &kvmcpu); kvm_riscv_init_multiext_cfg(cpu, &kvmcpu); @@ -797,6 +948,24 @@ int kvm_arch_get_registers(CPUState *cs) return ret; } +int kvm_riscv_sync_mpstate_to_kvm(RISCVCPU *cpu, int state) +{ + if (cap_has_mp_state) { + struct kvm_mp_state mp_state = { + .mp_state = state + }; + + int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state); + if (ret) { + fprintf(stderr, "%s: failed to sync MP_STATE %d/%s\n", + __func__, ret, strerror(-ret)); + return -1; + } + } + + return 0; +} + int kvm_arch_put_registers(CPUState *cs, int level) { int ret = 0; @@ -816,6 +985,18 @@ int kvm_arch_put_registers(CPUState *cs, int level) return ret; } + if (KVM_PUT_RESET_STATE == level) { + RISCVCPU *cpu = RISCV_CPU(cs); + if (cs->cpu_index == 0) { + ret = kvm_riscv_sync_mpstate_to_kvm(cpu, KVM_MP_STATE_RUNNABLE); + } else { + ret = kvm_riscv_sync_mpstate_to_kvm(cpu, KVM_MP_STATE_STOPPED); + } + if (ret) { + return ret; + } + } + return ret; } @@ -928,6 +1109,7 @@ int kvm_arch_get_default_type(MachineState *ms) int kvm_arch_init(MachineState *ms, KVMState *s) { + cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE); return 0; } @@ -1010,14 +1192,25 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) void kvm_riscv_reset_vcpu(RISCVCPU *cpu) { CPURISCVState *env = &cpu->env; + int i; if (!kvm_enabled()) { return; } + for (i = 0; i < 32; i++) { + env->gpr[i] = 0; + } env->pc = cpu->env.kernel_addr; env->gpr[10] = kvm_arch_vcpu_id(CPU(cpu)); /* a0 */ env->gpr[11] = cpu->env.fdt_addr; /* a1 */ env->satp = 0; + env->mie = 0; + env->stvec = 0; + env->sscratch = 0; + env->sepc = 0; + env->scause = 0; + env->stval = 0; + env->mip = 0; } void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level) @@ -1229,3 +1422,62 @@ void kvm_riscv_aia_create(MachineState *machine, uint64_t group_shift, kvm_msi_via_irqfd_allowed = kvm_irqfds_enabled(); } + +static void kvm_cpu_instance_init(CPUState *cs) +{ + Object *obj = OBJECT(RISCV_CPU(cs)); + DeviceState *dev = DEVICE(obj); + + riscv_init_kvm_registers(obj); + + kvm_riscv_add_cpu_user_properties(obj); + + for (Property *prop = riscv_cpu_options; prop && prop->name; prop++) { + /* Check if we have a specific KVM handler for the option */ + if (object_property_find(obj, prop->name)) { + continue; + } + qdev_property_add_static(dev, prop); + } +} + +static void kvm_cpu_accel_class_init(ObjectClass *oc, void *data) +{ + AccelCPUClass *acc = ACCEL_CPU_CLASS(oc); + + acc->cpu_instance_init = kvm_cpu_instance_init; +} + +static const TypeInfo kvm_cpu_accel_type_info = { + .name = ACCEL_CPU_NAME("kvm"), + + .parent = TYPE_ACCEL_CPU, + .class_init = kvm_cpu_accel_class_init, + .abstract = true, +}; +static void kvm_cpu_accel_register_types(void) +{ + type_register_static(&kvm_cpu_accel_type_info); +} +type_init(kvm_cpu_accel_register_types); + +static void riscv_host_cpu_init(Object *obj) +{ + CPURISCVState *env = &RISCV_CPU(obj)->env; + +#if defined(TARGET_RISCV32) + env->misa_mxl_max = env->misa_mxl = MXL_RV32; +#elif defined(TARGET_RISCV64) + env->misa_mxl_max = env->misa_mxl = MXL_RV64; +#endif +} + +static const TypeInfo riscv_kvm_cpu_type_infos[] = { + { + .name = TYPE_RISCV_CPU_HOST, + .parent = TYPE_RISCV_CPU, + .instance_init = riscv_host_cpu_init, + } +}; + +DEFINE_TYPES(riscv_kvm_cpu_type_infos) diff --git a/target/riscv/kvm_riscv.h b/target/riscv/kvm/kvm_riscv.h index de8c209ebc..8329cfab82 100644 --- a/target/riscv/kvm_riscv.h +++ b/target/riscv/kvm/kvm_riscv.h @@ -19,7 +19,6 @@ #ifndef QEMU_KVM_RISCV_H #define QEMU_KVM_RISCV_H -void kvm_riscv_init_user_properties(Object *cpu_obj); void kvm_riscv_reset_vcpu(RISCVCPU *cpu); void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level); void kvm_riscv_aia_create(MachineState *machine, uint64_t group_shift, @@ -27,5 +26,6 @@ void kvm_riscv_aia_create(MachineState *machine, uint64_t group_shift, uint64_t aplic_base, uint64_t imsic_base, uint64_t guest_num); void riscv_kvm_aplic_request(void *opaque, int irq, int level); +int kvm_riscv_sync_mpstate_to_kvm(RISCVCPU *cpu, int state); #endif diff --git a/target/riscv/kvm/meson.build b/target/riscv/kvm/meson.build new file mode 100644 index 0000000000..7e92415091 --- /dev/null +++ b/target/riscv/kvm/meson.build @@ -0,0 +1 @@ +riscv_ss.add(when: 'CONFIG_KVM', if_true: files('kvm-cpu.c')) diff --git a/target/riscv/meson.build b/target/riscv/meson.build index ff60b21d04..a5e0734e7f 100644 --- a/target/riscv/meson.build +++ b/target/riscv/meson.build @@ -24,7 +24,6 @@ riscv_ss.add(files( 'zce_helper.c', 'vcrypto_helper.c' )) -riscv_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c'), if_false: files('kvm-stub.c')) riscv_system_ss = ss.source_set() riscv_system_ss.add(files( @@ -38,5 +37,8 @@ riscv_system_ss.add(files( 'riscv-qmp-cmds.c', )) +subdir('tcg') +subdir('kvm') + target_arch += {'riscv': riscv_ss} target_system_arch += {'riscv': riscv_system_ss} diff --git a/target/riscv/tcg/meson.build b/target/riscv/tcg/meson.build new file mode 100644 index 0000000000..061df3d74a --- /dev/null +++ b/target/riscv/tcg/meson.build @@ -0,0 +1,2 @@ +riscv_ss.add(when: 'CONFIG_TCG', if_true: files( + 'tcg-cpu.c')) diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c new file mode 100644 index 0000000000..a28918ab30 --- /dev/null +++ b/target/riscv/tcg/tcg-cpu.c @@ -0,0 +1,949 @@ +/* + * riscv TCG cpu class initialization + * + * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu + * Copyright (c) 2017-2018 SiFive, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "exec/exec-all.h" +#include "tcg-cpu.h" +#include "cpu.h" +#include "pmu.h" +#include "time_helper.h" +#include "qapi/error.h" +#include "qapi/visitor.h" +#include "qemu/accel.h" +#include "qemu/error-report.h" +#include "qemu/log.h" +#include "hw/core/accel-cpu.h" +#include "hw/core/tcg-cpu-ops.h" +#include "tcg/tcg.h" + +/* Hash that stores user set extensions */ +static GHashTable *multi_ext_user_opts; + +static bool cpu_cfg_ext_is_user_set(uint32_t ext_offset) +{ + return g_hash_table_contains(multi_ext_user_opts, + GUINT_TO_POINTER(ext_offset)); +} + +static void riscv_cpu_synchronize_from_tb(CPUState *cs, + const TranslationBlock *tb) +{ + if (!(tb_cflags(tb) & CF_PCREL)) { + RISCVCPU *cpu = RISCV_CPU(cs); + CPURISCVState *env = &cpu->env; + RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL); + + tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); + + if (xl == MXL_RV32) { + env->pc = (int32_t) tb->pc; + } else { + env->pc = tb->pc; + } + } +} + +static void riscv_restore_state_to_opc(CPUState *cs, + const TranslationBlock *tb, + const uint64_t *data) +{ + RISCVCPU *cpu = RISCV_CPU(cs); + CPURISCVState *env = &cpu->env; + RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL); + target_ulong pc; + + if (tb_cflags(tb) & CF_PCREL) { + pc = (env->pc & TARGET_PAGE_MASK) | data[0]; + } else { + pc = data[0]; + } + + if (xl == MXL_RV32) { + env->pc = (int32_t)pc; + } else { + env->pc = pc; + } + env->bins = data[1]; +} + +static const struct TCGCPUOps riscv_tcg_ops = { + .initialize = riscv_translate_init, + .synchronize_from_tb = riscv_cpu_synchronize_from_tb, + .restore_state_to_opc = riscv_restore_state_to_opc, + +#ifndef CONFIG_USER_ONLY + .tlb_fill = riscv_cpu_tlb_fill, + .cpu_exec_interrupt = riscv_cpu_exec_interrupt, + .do_interrupt = riscv_cpu_do_interrupt, + .do_transaction_failed = riscv_cpu_do_transaction_failed, + .do_unaligned_access = riscv_cpu_do_unaligned_access, + .debug_excp_handler = riscv_cpu_debug_excp_handler, + .debug_check_breakpoint = riscv_cpu_debug_check_breakpoint, + .debug_check_watchpoint = riscv_cpu_debug_check_watchpoint, +#endif /* !CONFIG_USER_ONLY */ +}; + +static int cpu_cfg_ext_get_min_version(uint32_t ext_offset) +{ + const RISCVIsaExtData *edata; + + for (edata = isa_edata_arr; edata && edata->name; edata++) { + if (edata->ext_enable_offset != ext_offset) { + continue; + } + + return edata->min_version; + } + + g_assert_not_reached(); +} + +static void cpu_cfg_ext_auto_update(RISCVCPU *cpu, uint32_t ext_offset, + bool value) +{ + CPURISCVState *env = &cpu->env; + bool prev_val = isa_ext_is_enabled(cpu, ext_offset); + int min_version; + + if (prev_val == value) { + return; + } + + if (cpu_cfg_ext_is_user_set(ext_offset)) { + return; + } + + if (value && env->priv_ver != PRIV_VERSION_LATEST) { + /* Do not enable it if priv_ver is older than min_version */ + min_version = cpu_cfg_ext_get_min_version(ext_offset); + if (env->priv_ver < min_version) { + return; + } + } + + isa_ext_update_enabled(cpu, ext_offset, value); +} + +static void riscv_cpu_validate_misa_priv(CPURISCVState *env, Error **errp) +{ + if (riscv_has_ext(env, RVH) && env->priv_ver < PRIV_VERSION_1_12_0) { + error_setg(errp, "H extension requires priv spec 1.12.0"); + return; + } +} + +static void riscv_cpu_validate_misa_mxl(RISCVCPU *cpu, Error **errp) +{ + RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(cpu); + CPUClass *cc = CPU_CLASS(mcc); + CPURISCVState *env = &cpu->env; + + /* Validate that MISA_MXL is set properly. */ + switch (env->misa_mxl_max) { +#ifdef TARGET_RISCV64 + case MXL_RV64: + case MXL_RV128: + cc->gdb_core_xml_file = "riscv-64bit-cpu.xml"; + break; +#endif + case MXL_RV32: + cc->gdb_core_xml_file = "riscv-32bit-cpu.xml"; + break; + default: + g_assert_not_reached(); + } + + if (env->misa_mxl_max != env->misa_mxl) { + error_setg(errp, "misa_mxl_max must be equal to misa_mxl"); + return; + } +} + +static void riscv_cpu_validate_priv_spec(RISCVCPU *cpu, Error **errp) +{ + CPURISCVState *env = &cpu->env; + int priv_version = -1; + + if (cpu->cfg.priv_spec) { + if (!g_strcmp0(cpu->cfg.priv_spec, "v1.12.0")) { + priv_version = PRIV_VERSION_1_12_0; + } else if (!g_strcmp0(cpu->cfg.priv_spec, "v1.11.0")) { + priv_version = PRIV_VERSION_1_11_0; + } else if (!g_strcmp0(cpu->cfg.priv_spec, "v1.10.0")) { + priv_version = PRIV_VERSION_1_10_0; + } else { + error_setg(errp, + "Unsupported privilege spec version '%s'", + cpu->cfg.priv_spec); + return; + } + + env->priv_ver = priv_version; + } +} + +static void riscv_cpu_validate_v(CPURISCVState *env, RISCVCPUConfig *cfg, + Error **errp) +{ + if (!is_power_of_2(cfg->vlen)) { + error_setg(errp, "Vector extension VLEN must be power of 2"); + return; + } + + if (cfg->vlen > RV_VLEN_MAX || cfg->vlen < 128) { + error_setg(errp, + "Vector extension implementation only supports VLEN " + "in the range [128, %d]", RV_VLEN_MAX); + return; + } + + if (!is_power_of_2(cfg->elen)) { + error_setg(errp, "Vector extension ELEN must be power of 2"); + return; + } + + if (cfg->elen > 64 || cfg->elen < 8) { + error_setg(errp, + "Vector extension implementation only supports ELEN " + "in the range [8, 64]"); + return; + } + + if (cfg->vext_spec) { + if (!g_strcmp0(cfg->vext_spec, "v1.0")) { + env->vext_ver = VEXT_VERSION_1_00_0; + } else { + error_setg(errp, "Unsupported vector spec version '%s'", + cfg->vext_spec); + return; + } + } else if (env->vext_ver == 0) { + qemu_log("vector version is not specified, " + "use the default value v1.0\n"); + + env->vext_ver = VEXT_VERSION_1_00_0; + } +} + +static void riscv_cpu_disable_priv_spec_isa_exts(RISCVCPU *cpu) +{ + CPURISCVState *env = &cpu->env; + const RISCVIsaExtData *edata; + + /* Force disable extensions if priv spec version does not match */ + for (edata = isa_edata_arr; edata && edata->name; edata++) { + if (isa_ext_is_enabled(cpu, edata->ext_enable_offset) && + (env->priv_ver < edata->min_version)) { + isa_ext_update_enabled(cpu, edata->ext_enable_offset, false); +#ifndef CONFIG_USER_ONLY + warn_report("disabling %s extension for hart 0x" TARGET_FMT_lx + " because privilege spec version does not match", + edata->name, env->mhartid); +#else + warn_report("disabling %s extension because " + "privilege spec version does not match", + edata->name); +#endif + } + } +} + +/* + * Check consistency between chosen extensions while setting + * cpu->cfg accordingly. + */ +void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) +{ + CPURISCVState *env = &cpu->env; + Error *local_err = NULL; + + /* Do some ISA extension error checking */ + if (riscv_has_ext(env, RVG) && + !(riscv_has_ext(env, RVI) && riscv_has_ext(env, RVM) && + riscv_has_ext(env, RVA) && riscv_has_ext(env, RVF) && + riscv_has_ext(env, RVD) && + cpu->cfg.ext_icsr && cpu->cfg.ext_ifencei)) { + + if (cpu_cfg_ext_is_user_set(CPU_CFG_OFFSET(ext_icsr)) && + !cpu->cfg.ext_icsr) { + error_setg(errp, "RVG requires Zicsr but user set Zicsr to false"); + return; + } + + if (cpu_cfg_ext_is_user_set(CPU_CFG_OFFSET(ext_ifencei)) && + !cpu->cfg.ext_ifencei) { + error_setg(errp, "RVG requires Zifencei but user set " + "Zifencei to false"); + return; + } + + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_icsr), true); + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_ifencei), true); + + env->misa_ext |= RVI | RVM | RVA | RVF | RVD; + env->misa_ext_mask |= RVI | RVM | RVA | RVF | RVD; + } + + if (riscv_has_ext(env, RVI) && riscv_has_ext(env, RVE)) { + error_setg(errp, + "I and E extensions are incompatible"); + return; + } + + if (!riscv_has_ext(env, RVI) && !riscv_has_ext(env, RVE)) { + error_setg(errp, + "Either I or E extension must be set"); + return; + } + + if (riscv_has_ext(env, RVS) && !riscv_has_ext(env, RVU)) { + error_setg(errp, + "Setting S extension without U extension is illegal"); + return; + } + + if (riscv_has_ext(env, RVH) && !riscv_has_ext(env, RVI)) { + error_setg(errp, + "H depends on an I base integer ISA with 32 x registers"); + return; + } + + if (riscv_has_ext(env, RVH) && !riscv_has_ext(env, RVS)) { + error_setg(errp, "H extension implicitly requires S-mode"); + return; + } + + if (riscv_has_ext(env, RVF) && !cpu->cfg.ext_icsr) { + error_setg(errp, "F extension requires Zicsr"); + return; + } + + if ((cpu->cfg.ext_zawrs) && !riscv_has_ext(env, RVA)) { + error_setg(errp, "Zawrs extension requires A extension"); + return; + } + + if (cpu->cfg.ext_zfa && !riscv_has_ext(env, RVF)) { + error_setg(errp, "Zfa extension requires F extension"); + return; + } + + if (cpu->cfg.ext_zfh) { + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zfhmin), true); + } + + if (cpu->cfg.ext_zfhmin && !riscv_has_ext(env, RVF)) { + error_setg(errp, "Zfh/Zfhmin extensions require F extension"); + return; + } + + if (cpu->cfg.ext_zfbfmin && !riscv_has_ext(env, RVF)) { + error_setg(errp, "Zfbfmin extension depends on F extension"); + return; + } + + if (riscv_has_ext(env, RVD) && !riscv_has_ext(env, RVF)) { + error_setg(errp, "D extension requires F extension"); + return; + } + + if (riscv_has_ext(env, RVV)) { + riscv_cpu_validate_v(env, &cpu->cfg, &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + return; + } + + /* The V vector extension depends on the Zve64d extension */ + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve64d), true); + } + + /* The Zve64d extension depends on the Zve64f extension */ + if (cpu->cfg.ext_zve64d) { + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve64f), true); + } + + /* The Zve64f extension depends on the Zve32f extension */ + if (cpu->cfg.ext_zve64f) { + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve32f), true); + } + + if (cpu->cfg.ext_zve64d && !riscv_has_ext(env, RVD)) { + error_setg(errp, "Zve64d/V extensions require D extension"); + return; + } + + if (cpu->cfg.ext_zve32f && !riscv_has_ext(env, RVF)) { + error_setg(errp, "Zve32f/Zve64f extensions require F extension"); + return; + } + + if (cpu->cfg.ext_zvfh) { + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvfhmin), true); + } + + if (cpu->cfg.ext_zvfhmin && !cpu->cfg.ext_zve32f) { + error_setg(errp, "Zvfh/Zvfhmin extensions require Zve32f extension"); + return; + } + + if (cpu->cfg.ext_zvfh && !cpu->cfg.ext_zfhmin) { + error_setg(errp, "Zvfh extensions requires Zfhmin extension"); + return; + } + + if (cpu->cfg.ext_zvfbfmin && !cpu->cfg.ext_zfbfmin) { + error_setg(errp, "Zvfbfmin extension depends on Zfbfmin extension"); + return; + } + + if (cpu->cfg.ext_zvfbfmin && !cpu->cfg.ext_zve32f) { + error_setg(errp, "Zvfbfmin extension depends on Zve32f extension"); + return; + } + + if (cpu->cfg.ext_zvfbfwma && !cpu->cfg.ext_zvfbfmin) { + error_setg(errp, "Zvfbfwma extension depends on Zvfbfmin extension"); + return; + } + + /* Set the ISA extensions, checks should have happened above */ + if (cpu->cfg.ext_zhinx) { + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zca), true); + } + + if ((cpu->cfg.ext_zdinx || cpu->cfg.ext_zhinxmin) && !cpu->cfg.ext_zfinx) { + error_setg(errp, "Zdinx/Zhinx/Zhinxmin extensions require Zfinx"); + return; + } + + if (cpu->cfg.ext_zfinx) { + if (!cpu->cfg.ext_icsr) { + error_setg(errp, "Zfinx extension requires Zicsr"); + return; + } + if (riscv_has_ext(env, RVF)) { + error_setg(errp, + "Zfinx cannot be supported together with F extension"); + return; + } + } + + if (cpu->cfg.ext_zce) { + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zca), true); + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcb), true); + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcmp), true); + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcmt), true); + if (riscv_has_ext(env, RVF) && env->misa_mxl_max == MXL_RV32) { + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcf), true); + } + } + + /* zca, zcd and zcf has a PRIV 1.12.0 restriction */ + if (riscv_has_ext(env, RVC) && env->priv_ver >= PRIV_VERSION_1_12_0) { + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zca), true); + if (riscv_has_ext(env, RVF) && env->misa_mxl_max == MXL_RV32) { + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcf), true); + } + if (riscv_has_ext(env, RVD)) { + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zcd), true); + } + } + + if (env->misa_mxl_max != MXL_RV32 && cpu->cfg.ext_zcf) { + error_setg(errp, "Zcf extension is only relevant to RV32"); + return; + } + + if (!riscv_has_ext(env, RVF) && cpu->cfg.ext_zcf) { + error_setg(errp, "Zcf extension requires F extension"); + return; + } + + if (!riscv_has_ext(env, RVD) && cpu->cfg.ext_zcd) { + error_setg(errp, "Zcd extension requires D extension"); + return; + } + + if ((cpu->cfg.ext_zcf || cpu->cfg.ext_zcd || cpu->cfg.ext_zcb || + cpu->cfg.ext_zcmp || cpu->cfg.ext_zcmt) && !cpu->cfg.ext_zca) { + error_setg(errp, "Zcf/Zcd/Zcb/Zcmp/Zcmt extensions require Zca " + "extension"); + return; + } + + if (cpu->cfg.ext_zcd && (cpu->cfg.ext_zcmp || cpu->cfg.ext_zcmt)) { + error_setg(errp, "Zcmp/Zcmt extensions are incompatible with " + "Zcd extension"); + return; + } + + if (cpu->cfg.ext_zcmt && !cpu->cfg.ext_icsr) { + error_setg(errp, "Zcmt extension requires Zicsr extension"); + return; + } + + /* + * In principle Zve*x would also suffice here, were they supported + * in qemu + */ + if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkg || cpu->cfg.ext_zvkned || + cpu->cfg.ext_zvknha || cpu->cfg.ext_zvksed || cpu->cfg.ext_zvksh) && + !cpu->cfg.ext_zve32f) { + error_setg(errp, + "Vector crypto extensions require V or Zve* extensions"); + return; + } + + if ((cpu->cfg.ext_zvbc || cpu->cfg.ext_zvknhb) && !cpu->cfg.ext_zve64f) { + error_setg( + errp, + "Zvbc and Zvknhb extensions require V or Zve64{f,d} extensions"); + return; + } + + if (cpu->cfg.ext_zk) { + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zkn), true); + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zkr), true); + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zkt), true); + } + + if (cpu->cfg.ext_zkn) { + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zbkb), true); + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zbkc), true); + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zbkx), true); + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zkne), true); + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zknd), true); + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zknh), true); + } + + if (cpu->cfg.ext_zks) { + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zbkb), true); + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zbkc), true); + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zbkx), true); + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zksed), true); + cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zksh), true); + } + + /* + * Disable isa extensions based on priv spec after we + * validated and set everything we need. + */ + riscv_cpu_disable_priv_spec_isa_exts(cpu); +} + +static bool riscv_cpu_is_generic(Object *cpu_obj) +{ + return object_dynamic_cast(cpu_obj, TYPE_RISCV_DYNAMIC_CPU) != NULL; +} + +/* + * We'll get here via the following path: + * + * riscv_cpu_realize() + * -> cpu_exec_realizefn() + * -> tcg_cpu_realize() (via accel_cpu_common_realize()) + */ +static bool tcg_cpu_realize(CPUState *cs, Error **errp) +{ + RISCVCPU *cpu = RISCV_CPU(cs); + CPURISCVState *env = &cpu->env; + Error *local_err = NULL; + + if (object_dynamic_cast(OBJECT(cpu), TYPE_RISCV_CPU_HOST)) { + g_autofree char *name = riscv_cpu_get_name(cpu); + error_setg(errp, "'%s' CPU is not compatible with TCG acceleration", + name); + return false; + } + + riscv_cpu_validate_misa_mxl(cpu, &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + return false; + } + + riscv_cpu_validate_priv_spec(cpu, &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + return false; + } + + riscv_cpu_validate_misa_priv(env, &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + return false; + } + + if (cpu->cfg.epmp && !cpu->cfg.pmp) { + /* + * Enhanced PMP should only be available + * on harts with PMP support + */ + error_setg(errp, "Invalid configuration: EPMP requires PMP support"); + return false; + } + + riscv_cpu_validate_set_extensions(cpu, &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + return false; + } + +#ifndef CONFIG_USER_ONLY + CPU(cs)->tcg_cflags |= CF_PCREL; + + if (cpu->cfg.ext_sstc) { + riscv_timer_init(cpu); + } + + if (cpu->cfg.pmu_num) { + if (!riscv_pmu_init(cpu, cpu->cfg.pmu_num) && cpu->cfg.ext_sscofpmf) { + cpu->pmu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, + riscv_pmu_timer_cb, cpu); + } + } +#endif + + return true; +} + +typedef struct RISCVCPUMisaExtConfig { + target_ulong misa_bit; + bool enabled; +} RISCVCPUMisaExtConfig; + +static void cpu_set_misa_ext_cfg(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + const RISCVCPUMisaExtConfig *misa_ext_cfg = opaque; + target_ulong misa_bit = misa_ext_cfg->misa_bit; + RISCVCPU *cpu = RISCV_CPU(obj); + CPURISCVState *env = &cpu->env; + bool generic_cpu = riscv_cpu_is_generic(obj); + bool prev_val, value; + + if (!visit_type_bool(v, name, &value, errp)) { + return; + } + + prev_val = env->misa_ext & misa_bit; + + if (value == prev_val) { + return; + } + + if (value) { + if (!generic_cpu) { + g_autofree char *cpuname = riscv_cpu_get_name(cpu); + error_setg(errp, "'%s' CPU does not allow enabling extensions", + cpuname); + return; + } + + env->misa_ext |= misa_bit; + env->misa_ext_mask |= misa_bit; + } else { + env->misa_ext &= ~misa_bit; + env->misa_ext_mask &= ~misa_bit; + } +} + +static void cpu_get_misa_ext_cfg(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + const RISCVCPUMisaExtConfig *misa_ext_cfg = opaque; + target_ulong misa_bit = misa_ext_cfg->misa_bit; + RISCVCPU *cpu = RISCV_CPU(obj); + CPURISCVState *env = &cpu->env; + bool value; + + value = env->misa_ext & misa_bit; + + visit_type_bool(v, name, &value, errp); +} + +#define MISA_CFG(_bit, _enabled) \ + {.misa_bit = _bit, .enabled = _enabled} + +static const RISCVCPUMisaExtConfig misa_ext_cfgs[] = { + MISA_CFG(RVA, true), + MISA_CFG(RVC, true), + MISA_CFG(RVD, true), + MISA_CFG(RVF, true), + MISA_CFG(RVI, true), + MISA_CFG(RVE, false), + MISA_CFG(RVM, true), + MISA_CFG(RVS, true), + MISA_CFG(RVU, true), + MISA_CFG(RVH, true), + MISA_CFG(RVJ, false), + MISA_CFG(RVV, false), + MISA_CFG(RVG, false), +}; + +/* + * We do not support user choice tracking for MISA + * extensions yet because, so far, we do not silently + * change MISA bits during realize() (RVG enables MISA + * bits but the user is warned about it). + */ +static void riscv_cpu_add_misa_properties(Object *cpu_obj) +{ + bool use_def_vals = riscv_cpu_is_generic(cpu_obj); + int i; + + for (i = 0; i < ARRAY_SIZE(misa_ext_cfgs); i++) { + const RISCVCPUMisaExtConfig *misa_cfg = &misa_ext_cfgs[i]; + int bit = misa_cfg->misa_bit; + const char *name = riscv_get_misa_ext_name(bit); + const char *desc = riscv_get_misa_ext_description(bit); + + /* Check if KVM already created the property */ + if (object_property_find(cpu_obj, name)) { + continue; + } + + object_property_add(cpu_obj, name, "bool", + cpu_get_misa_ext_cfg, + cpu_set_misa_ext_cfg, + NULL, (void *)misa_cfg); + object_property_set_description(cpu_obj, name, desc); + if (use_def_vals) { + object_property_set_bool(cpu_obj, name, misa_cfg->enabled, NULL); + } + } +} + +static bool cpu_ext_is_deprecated(const char *ext_name) +{ + return isupper(ext_name[0]); +} + +/* + * String will be allocated in the heap. Caller is responsible + * for freeing it. + */ +static char *cpu_ext_to_lower(const char *ext_name) +{ + char *ret = g_malloc0(strlen(ext_name) + 1); + + strcpy(ret, ext_name); + ret[0] = tolower(ret[0]); + + return ret; +} + +static void cpu_set_multi_ext_cfg(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + const RISCVCPUMultiExtConfig *multi_ext_cfg = opaque; + RISCVCPU *cpu = RISCV_CPU(obj); + bool generic_cpu = riscv_cpu_is_generic(obj); + bool prev_val, value; + + if (!visit_type_bool(v, name, &value, errp)) { + return; + } + + if (cpu_ext_is_deprecated(multi_ext_cfg->name)) { + g_autofree char *lower = cpu_ext_to_lower(multi_ext_cfg->name); + + warn_report("CPU property '%s' is deprecated. Please use '%s' instead", + multi_ext_cfg->name, lower); + } + + g_hash_table_insert(multi_ext_user_opts, + GUINT_TO_POINTER(multi_ext_cfg->offset), + (gpointer)value); + + prev_val = isa_ext_is_enabled(cpu, multi_ext_cfg->offset); + + if (value == prev_val) { + return; + } + + if (value && !generic_cpu) { + g_autofree char *cpuname = riscv_cpu_get_name(cpu); + error_setg(errp, "'%s' CPU does not allow enabling extensions", + cpuname); + return; + } + + isa_ext_update_enabled(cpu, multi_ext_cfg->offset, value); +} + +static void cpu_get_multi_ext_cfg(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + const RISCVCPUMultiExtConfig *multi_ext_cfg = opaque; + bool value = isa_ext_is_enabled(RISCV_CPU(obj), multi_ext_cfg->offset); + + visit_type_bool(v, name, &value, errp); +} + +static void cpu_add_multi_ext_prop(Object *cpu_obj, + const RISCVCPUMultiExtConfig *multi_cfg) +{ + bool generic_cpu = riscv_cpu_is_generic(cpu_obj); + bool deprecated_ext = cpu_ext_is_deprecated(multi_cfg->name); + + object_property_add(cpu_obj, multi_cfg->name, "bool", + cpu_get_multi_ext_cfg, + cpu_set_multi_ext_cfg, + NULL, (void *)multi_cfg); + + if (!generic_cpu || deprecated_ext) { + return; + } + + /* + * Set def val directly instead of using + * object_property_set_bool() to save the set() + * callback hash for user inputs. + */ + isa_ext_update_enabled(RISCV_CPU(cpu_obj), multi_cfg->offset, + multi_cfg->enabled); +} + +static void riscv_cpu_add_multiext_prop_array(Object *obj, + const RISCVCPUMultiExtConfig *array) +{ + const RISCVCPUMultiExtConfig *prop; + + g_assert(array); + + for (prop = array; prop && prop->name; prop++) { + cpu_add_multi_ext_prop(obj, prop); + } +} + +/* + * Add CPU properties with user-facing flags. + * + * This will overwrite existing env->misa_ext values with the + * defaults set via riscv_cpu_add_misa_properties(). + */ +static void riscv_cpu_add_user_properties(Object *obj) +{ +#ifndef CONFIG_USER_ONLY + riscv_add_satp_mode_properties(obj); +#endif + + riscv_cpu_add_misa_properties(obj); + + riscv_cpu_add_multiext_prop_array(obj, riscv_cpu_extensions); + riscv_cpu_add_multiext_prop_array(obj, riscv_cpu_vendor_exts); + riscv_cpu_add_multiext_prop_array(obj, riscv_cpu_experimental_exts); + + riscv_cpu_add_multiext_prop_array(obj, riscv_cpu_deprecated_exts); + + for (Property *prop = riscv_cpu_options; prop && prop->name; prop++) { + qdev_property_add_static(DEVICE(obj), prop); + } +} + +/* + * The 'max' type CPU will have all possible ratified + * non-vendor extensions enabled. + */ +static void riscv_init_max_cpu_extensions(Object *obj) +{ + RISCVCPU *cpu = RISCV_CPU(obj); + CPURISCVState *env = &cpu->env; + const RISCVCPUMultiExtConfig *prop; + + /* Enable RVG, RVJ and RVV that are disabled by default */ + riscv_cpu_set_misa(env, env->misa_mxl, env->misa_ext | RVG | RVJ | RVV); + + for (prop = riscv_cpu_extensions; prop && prop->name; prop++) { + isa_ext_update_enabled(cpu, prop->offset, true); + } + + /* set vector version */ + env->vext_ver = VEXT_VERSION_1_00_0; + + /* Zfinx is not compatible with F. Disable it */ + isa_ext_update_enabled(cpu, CPU_CFG_OFFSET(ext_zfinx), false); + isa_ext_update_enabled(cpu, CPU_CFG_OFFSET(ext_zdinx), false); + isa_ext_update_enabled(cpu, CPU_CFG_OFFSET(ext_zhinx), false); + isa_ext_update_enabled(cpu, CPU_CFG_OFFSET(ext_zhinxmin), false); + + isa_ext_update_enabled(cpu, CPU_CFG_OFFSET(ext_zce), false); + isa_ext_update_enabled(cpu, CPU_CFG_OFFSET(ext_zcmp), false); + isa_ext_update_enabled(cpu, CPU_CFG_OFFSET(ext_zcmt), false); + + if (env->misa_mxl != MXL_RV32) { + isa_ext_update_enabled(cpu, CPU_CFG_OFFSET(ext_zcf), false); + } +} + +static bool riscv_cpu_has_max_extensions(Object *cpu_obj) +{ + return object_dynamic_cast(cpu_obj, TYPE_RISCV_CPU_MAX) != NULL; +} + +static void tcg_cpu_instance_init(CPUState *cs) +{ + RISCVCPU *cpu = RISCV_CPU(cs); + Object *obj = OBJECT(cpu); + + multi_ext_user_opts = g_hash_table_new(NULL, g_direct_equal); + riscv_cpu_add_user_properties(obj); + + if (riscv_cpu_has_max_extensions(obj)) { + riscv_init_max_cpu_extensions(obj); + } +} + +static void tcg_cpu_init_ops(AccelCPUClass *accel_cpu, CPUClass *cc) +{ + /* + * All cpus use the same set of operations. + */ + cc->tcg_ops = &riscv_tcg_ops; +} + +static void tcg_cpu_class_init(CPUClass *cc) +{ + cc->init_accel_cpu = tcg_cpu_init_ops; +} + +static void tcg_cpu_accel_class_init(ObjectClass *oc, void *data) +{ + AccelCPUClass *acc = ACCEL_CPU_CLASS(oc); + + acc->cpu_class_init = tcg_cpu_class_init; + acc->cpu_instance_init = tcg_cpu_instance_init; + acc->cpu_target_realize = tcg_cpu_realize; +} + +static const TypeInfo tcg_cpu_accel_type_info = { + .name = ACCEL_CPU_NAME("tcg"), + + .parent = TYPE_ACCEL_CPU, + .class_init = tcg_cpu_accel_class_init, + .abstract = true, +}; + +static void tcg_cpu_accel_register_types(void) +{ + type_register_static(&tcg_cpu_accel_type_info); +} +type_init(tcg_cpu_accel_register_types); diff --git a/target/riscv/tcg/tcg-cpu.h b/target/riscv/tcg/tcg-cpu.h new file mode 100644 index 0000000000..630184759d --- /dev/null +++ b/target/riscv/tcg/tcg-cpu.h @@ -0,0 +1,27 @@ +/* + * riscv TCG cpu class initialization + * + * Copyright (c) 2023 Ventana Micro Systems Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef RISCV_TCG_CPU_H +#define RISCV_TCG_CPU_H + +#include "cpu.h" + +void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp); + +#endif diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index c9b39fb67f..c1c3a4d1ea 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -3361,7 +3361,7 @@ static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s) RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16) GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4) -RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmacc16) +RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16) GEN_VEXT_VF(vfwmaccbf16_vf, 4) static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) diff --git a/tests/avocado/tuxrun_baselines.py b/tests/avocado/tuxrun_baselines.py index e12250eabb..c99bea6c0b 100644 --- a/tests/avocado/tuxrun_baselines.py +++ b/tests/avocado/tuxrun_baselines.py @@ -501,6 +501,38 @@ class TuxRunBaselineTest(QemuSystemTest): self.common_tuxrun(csums=sums) + def test_riscv32_maxcpu(self): + """ + :avocado: tags=arch:riscv32 + :avocado: tags=machine:virt + :avocado: tags=cpu:max + :avocado: tags=tuxboot:riscv32 + """ + sums = { "Image" : + "89599407d7334de629a40e7ad6503c73670359eb5f5ae9d686353a3d6deccbd5", + "fw_jump.elf" : + "f2ef28a0b77826f79d085d3e4aa686f1159b315eff9099a37046b18936676985", + "rootfs.ext4.zst" : + "7168d296d0283238ea73cd5a775b3dd608e55e04c7b92b76ecce31bb13108cba" } + + self.common_tuxrun(csums=sums) + + def test_riscv64_maxcpu(self): + """ + :avocado: tags=arch:riscv64 + :avocado: tags=machine:virt + :avocado: tags=cpu:max + :avocado: tags=tuxboot:riscv64 + """ + sums = { "Image" : + "cd634badc65e52fb63465ec99e309c0de0369f0841b7d9486f9729e119bac25e", + "fw_jump.elf" : + "6e3373abcab4305fe151b564a4c71110d833c21f2c0a1753b7935459e36aedcf", + "rootfs.ext4.zst" : + "b18e3a3bdf27be03da0b285e84cb71bf09eca071c3a087b42884b6982ed679eb" } + + self.common_tuxrun(csums=sums) + def test_s390(self): """ :avocado: tags=arch:s390x diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c index 0b603e7c57..f67e9df01c 100644 --- a/tests/unit/test-bdrv-drain.c +++ b/tests/unit/test-bdrv-drain.c @@ -1034,9 +1034,13 @@ static void coroutine_fn test_co_delete_by_drain(void *opaque) blk_co_unref(blk); } else { BdrvChild *c, *next_c; + bdrv_graph_co_rdlock(); QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) { + bdrv_graph_co_rdunlock(); bdrv_co_unref_child(bs, c); + bdrv_graph_co_rdlock(); } + bdrv_graph_co_rdunlock(); } dbdd->done = true; @@ -1168,7 +1172,7 @@ struct detach_by_parent_data { }; static struct detach_by_parent_data detach_by_parent_data; -static void detach_indirect_bh(void *opaque) +static void no_coroutine_fn detach_indirect_bh(void *opaque) { struct detach_by_parent_data *data = opaque; @@ -1184,18 +1188,19 @@ static void detach_indirect_bh(void *opaque) bdrv_graph_wrunlock(); } -static void detach_by_parent_aio_cb(void *opaque, int ret) +static void coroutine_mixed_fn detach_by_parent_aio_cb(void *opaque, int ret) { struct detach_by_parent_data *data = &detach_by_parent_data; g_assert_cmpint(ret, ==, 0); if (data->by_parent_cb) { bdrv_inc_in_flight(data->child_b->bs); - detach_indirect_bh(data); + aio_bh_schedule_oneshot(qemu_get_current_aio_context(), + detach_indirect_bh, &detach_by_parent_data); } } -static void detach_by_driver_cb_drained_begin(BdrvChild *child) +static void GRAPH_RDLOCK detach_by_driver_cb_drained_begin(BdrvChild *child) { struct detach_by_parent_data *data = &detach_by_parent_data; @@ -1232,7 +1237,7 @@ static BdrvChildClass detach_by_driver_cb_class; * state is messed up, but if it is only polled in the single * BDRV_POLL_WHILE() at the end of the drain, this should work fine. */ -static void test_detach_indirect(bool by_parent_cb) +static void TSA_NO_TSA test_detach_indirect(bool by_parent_cb) { BlockBackend *blk; BlockDriverState *parent_a, *parent_b, *a, *b, *c; diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c index 9155547313..9b15d2768c 100644 --- a/tests/unit/test-block-iothread.c +++ b/tests/unit/test-block-iothread.c @@ -383,6 +383,9 @@ static void test_sync_op_check(BdrvChild *c) static void test_sync_op_activate(BdrvChild *c) { + GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + /* Early success: Image is not inactive */ bdrv_activate(c->bs, NULL); } @@ -468,11 +471,16 @@ static void test_sync_op(const void *opaque) BlockDriverState *bs; BdrvChild *c; + GLOBAL_STATE_CODE(); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort); bs->total_sectors = 65536 / BDRV_SECTOR_SIZE; blk_insert_bs(blk, bs, &error_abort); + + bdrv_graph_rdlock_main_loop(); c = QLIST_FIRST(&bs->parents); + bdrv_graph_rdunlock_main_loop(); blk_set_aio_context(blk, ctx, &error_abort); aio_context_acquire(ctx); |