From 00eb93b5886519a35a06bf403e5be4c4cb3df25a Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 7 Nov 2022 19:35:55 +0300 Subject: block: Inline bdrv_detach_child() The only caller is bdrv_root_unref_child(), let's just do the logic directly in it. It simplifies further conversion of bdrv_root_unref_child() to transaction actions. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz Message-Id: <20221107163558.618889-2-vsementsov@yandex-team.ru> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block.c | 46 +++++++++++++++++++--------------------------- 1 file changed, 19 insertions(+), 27 deletions(-) (limited to 'block.c') diff --git a/block.c b/block.c index a18f052374..c0c1b3df91 100644 --- a/block.c +++ b/block.c @@ -3070,30 +3070,6 @@ static BdrvChild *bdrv_attach_child_noperm(BlockDriverState *parent_bs, tran, errp); } -static void bdrv_detach_child(BdrvChild *child) -{ - BlockDriverState *old_bs = child->bs; - - GLOBAL_STATE_CODE(); - bdrv_replace_child_noperm(child, NULL); - bdrv_child_free(child); - - if (old_bs) { - /* - * Update permissions for old node. We're just taking a parent away, so - * we're loosening restrictions. Errors of permission update are not - * fatal in this case, ignore them. - */ - bdrv_refresh_perms(old_bs, NULL); - - /* - * When the parent requiring a non-default AioContext is removed, the - * node moves back to the main AioContext - */ - bdrv_try_change_aio_context(old_bs, qemu_get_aio_context(), NULL, NULL); - } -} - /* * This function steals the reference to child_bs from the caller. * That reference is later dropped by bdrv_root_unref_child(). @@ -3182,12 +3158,28 @@ out: /* Callers must ensure that child->frozen is false. */ void bdrv_root_unref_child(BdrvChild *child) { - BlockDriverState *child_bs; + BlockDriverState *child_bs = child->bs; GLOBAL_STATE_CODE(); + bdrv_replace_child_noperm(child, NULL); + bdrv_child_free(child); + + if (child_bs) { + /* + * Update permissions for old node. We're just taking a parent away, so + * we're loosening restrictions. Errors of permission update are not + * fatal in this case, ignore them. + */ + bdrv_refresh_perms(child_bs, NULL); + + /* + * When the parent requiring a non-default AioContext is removed, the + * node moves back to the main AioContext + */ + bdrv_try_change_aio_context(child_bs, qemu_get_aio_context(), NULL, + NULL); + } - child_bs = child->bs; - bdrv_detach_child(child); bdrv_unref(child_bs); } -- cgit 1.4.1 From f38eaec4c3618dfc4a23e20435cefb5bf8325264 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 7 Nov 2022 19:35:56 +0300 Subject: block: drop bdrv_remove_filter_or_cow_child Drop this simple wrapper used only in one place. We have too many graph modifying functions even without it. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz Message-Id: <20221107163558.618889-3-vsementsov@yandex-team.ru> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'block.c') diff --git a/block.c b/block.c index c0c1b3df91..dc761209ac 100644 --- a/block.c +++ b/block.c @@ -93,8 +93,6 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, static void bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs); static void bdrv_remove_child(BdrvChild *child, Transaction *tran); -static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, - Transaction *tran); static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, @@ -5065,17 +5063,6 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran) tran_add(tran, &bdrv_remove_child_drv, child); } -/* - * A function to remove backing-chain child of @bs if exists: cow child for - * format nodes (always .backing) and filter child for filters (may be .file or - * .backing) - */ -static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, - Transaction *tran) -{ - bdrv_remove_child(bdrv_filter_or_cow_child(bs), tran); -} - static int bdrv_replace_node_noperm(BlockDriverState *from, BlockDriverState *to, bool auto_skip, Transaction *tran, @@ -5160,7 +5147,7 @@ static int bdrv_replace_node_common(BlockDriverState *from, } if (detach_subchain) { - bdrv_remove_filter_or_cow_child(to_cow_parent, tran); + bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent), tran); } found = g_hash_table_new(NULL, NULL); -- cgit 1.4.1 From f1316edbfcea3b31181f22d737ac7cf80b395355 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 7 Nov 2022 19:35:57 +0300 Subject: block: bdrv_refresh_perms(): allow external tran Allow passing external Transaction pointer, stop creating extra Transaction objects. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz Message-Id: <20221107163558.618889-4-vsementsov@yandex-team.ru> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) (limited to 'block.c') diff --git a/block.c b/block.c index dc761209ac..f2f9178832 100644 --- a/block.c +++ b/block.c @@ -2591,15 +2591,24 @@ char *bdrv_perm_names(uint64_t perm) } -static int bdrv_refresh_perms(BlockDriverState *bs, Error **errp) +/* @tran is allowed to be NULL. In this case no rollback is possible */ +static int bdrv_refresh_perms(BlockDriverState *bs, Transaction *tran, + Error **errp) { int ret; - Transaction *tran = tran_new(); + Transaction *local_tran = NULL; g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs); GLOBAL_STATE_CODE(); + if (!tran) { + tran = local_tran = tran_new(); + } + ret = bdrv_list_refresh_perms(list, NULL, tran, errp); - tran_finalize(tran, ret); + + if (local_tran) { + tran_finalize(local_tran, ret); + } return ret; } @@ -2615,7 +2624,7 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, bdrv_child_set_perm(c, perm, shared, tran); - ret = bdrv_refresh_perms(c->bs, &local_err); + ret = bdrv_refresh_perms(c->bs, tran, &local_err); tran_finalize(tran, ret); @@ -3099,7 +3108,7 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, goto out; } - ret = bdrv_refresh_perms(child_bs, errp); + ret = bdrv_refresh_perms(child_bs, tran, errp); out: tran_finalize(tran, ret); @@ -3140,7 +3149,7 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, goto out; } - ret = bdrv_refresh_perms(parent_bs, errp); + ret = bdrv_refresh_perms(parent_bs, tran, errp); if (ret < 0) { goto out; } @@ -3168,7 +3177,7 @@ void bdrv_root_unref_child(BdrvChild *child) * we're loosening restrictions. Errors of permission update are not * fatal in this case, ignore them. */ - bdrv_refresh_perms(child_bs, NULL); + bdrv_refresh_perms(child_bs, NULL, NULL); /* * When the parent requiring a non-default AioContext is removed, the @@ -3410,7 +3419,7 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, goto out; } - ret = bdrv_refresh_perms(bs, errp); + ret = bdrv_refresh_perms(bs, tran, errp); out: tran_finalize(tran, ret); @@ -5223,7 +5232,7 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, goto out; } - ret = bdrv_refresh_perms(bs_new, errp); + ret = bdrv_refresh_perms(bs_new, tran, errp); out: tran_finalize(tran, ret); @@ -6523,7 +6532,7 @@ int bdrv_activate(BlockDriverState *bs, Error **errp) */ if (bs->open_flags & BDRV_O_INACTIVE) { bs->open_flags &= ~BDRV_O_INACTIVE; - ret = bdrv_refresh_perms(bs, errp); + ret = bdrv_refresh_perms(bs, NULL, errp); if (ret < 0) { bs->open_flags |= BDRV_O_INACTIVE; return ret; @@ -6668,7 +6677,7 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs) * We only tried to loosen restrictions, so errors are not fatal, ignore * them. */ - bdrv_refresh_perms(bs, NULL); + bdrv_refresh_perms(bs, NULL, NULL); /* Recursively inactivate children */ QLIST_FOREACH(child, &bs->children, next) { -- cgit 1.4.1 From fb0ff4d1baf8012e7f358daf007967d65e1f545a Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 7 Nov 2022 19:35:58 +0300 Subject: block: refactor bdrv_list_refresh_perms to allow any list of nodes We are going to increase usage of collecting nodes in a list to then update, and calling bdrv_topological_dfs() each time is not convenient, and not correct as we are going to interleave graph modifying with filling the node list. So, let's switch to a function that takes any list of nodes, adds all their subtrees and do topological sort. And finally, refresh permissions. While being here, make the function public, as we'll want to use it from blockdev.c in near future. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz Message-Id: <20221107163558.618889-5-vsementsov@yandex-team.ru> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block.c | 51 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 19 deletions(-) (limited to 'block.c') diff --git a/block.c b/block.c index f2f9178832..385ed3cd53 100644 --- a/block.c +++ b/block.c @@ -2521,8 +2521,12 @@ static int bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q, return 0; } -static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q, - Transaction *tran, Error **errp) +/* + * @list is a product of bdrv_topological_dfs() (may be called several times) - + * a topologically sorted subgraph. + */ +static int bdrv_do_refresh_perms(GSList *list, BlockReopenQueue *q, + Transaction *tran, Error **errp) { int ret; BlockDriverState *bs; @@ -2544,6 +2548,24 @@ static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q, return 0; } +/* + * @list is any list of nodes. List is completed by all subtrees and + * topologically sorted. It's not a problem if some node occurs in the @list + * several times. + */ +static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q, + Transaction *tran, Error **errp) +{ + g_autoptr(GHashTable) found = g_hash_table_new(NULL, NULL); + g_autoptr(GSList) refresh_list = NULL; + + for ( ; list; list = list->next) { + refresh_list = bdrv_topological_dfs(refresh_list, found, list->data); + } + + return bdrv_do_refresh_perms(refresh_list, q, tran, errp); +} + void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm, uint64_t *shared_perm) { @@ -2604,7 +2626,7 @@ static int bdrv_refresh_perms(BlockDriverState *bs, Transaction *tran, tran = local_tran = tran_new(); } - ret = bdrv_list_refresh_perms(list, NULL, tran, errp); + ret = bdrv_do_refresh_perms(list, NULL, tran, errp); if (local_tran) { tran_finalize(local_tran, ret); @@ -4360,7 +4382,6 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) BlockReopenQueueEntry *bs_entry, *next; AioContext *ctx; Transaction *tran = tran_new(); - g_autoptr(GHashTable) found = NULL; g_autoptr(GSList) refresh_list = NULL; assert(qemu_get_current_aio_context() == qemu_get_aio_context()); @@ -4390,18 +4411,15 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) bs_entry->prepared = true; } - found = g_hash_table_new(NULL, NULL); QTAILQ_FOREACH(bs_entry, bs_queue, entry) { BDRVReopenState *state = &bs_entry->state; - refresh_list = bdrv_topological_dfs(refresh_list, found, state->bs); + refresh_list = g_slist_prepend(refresh_list, state->bs); if (state->old_backing_bs) { - refresh_list = bdrv_topological_dfs(refresh_list, found, - state->old_backing_bs); + refresh_list = g_slist_prepend(refresh_list, state->old_backing_bs); } if (state->old_file_bs) { - refresh_list = bdrv_topological_dfs(refresh_list, found, - state->old_file_bs); + refresh_list = g_slist_prepend(refresh_list, state->old_file_bs); } } @@ -5118,7 +5136,6 @@ static int bdrv_replace_node_common(BlockDriverState *from, Error **errp) { Transaction *tran = tran_new(); - g_autoptr(GHashTable) found = NULL; g_autoptr(GSList) refresh_list = NULL; BlockDriverState *to_cow_parent = NULL; int ret; @@ -5159,10 +5176,8 @@ static int bdrv_replace_node_common(BlockDriverState *from, bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent), tran); } - found = g_hash_table_new(NULL, NULL); - - refresh_list = bdrv_topological_dfs(refresh_list, found, to); - refresh_list = bdrv_topological_dfs(refresh_list, found, from); + refresh_list = g_slist_prepend(refresh_list, to); + refresh_list = g_slist_prepend(refresh_list, from); ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp); if (ret < 0) { @@ -5247,7 +5262,6 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, { int ret; Transaction *tran = tran_new(); - g_autoptr(GHashTable) found = NULL; g_autoptr(GSList) refresh_list = NULL; BlockDriverState *old_bs = child->bs; @@ -5259,9 +5273,8 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, bdrv_replace_child_tran(child, new_bs, tran); - found = g_hash_table_new(NULL, NULL); - refresh_list = bdrv_topological_dfs(refresh_list, found, old_bs); - refresh_list = bdrv_topological_dfs(refresh_list, found, new_bs); + refresh_list = g_slist_prepend(refresh_list, old_bs); + refresh_list = g_slist_prepend(refresh_list, new_bs); ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp); -- cgit 1.4.1 From 5e8ac21717373cbe96ef7a91e216bf5788815d63 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:40:58 +0100 Subject: block: Revert .bdrv_drained_begin/end to non-coroutine_fn Polling during bdrv_drained_end() can be problematic (and in the future, we may get cases for bdrv_drained_begin() where polling is forbidden, and we don't care about already in-flight requests, but just want to prevent new requests from arriving). The .bdrv_drained_begin/end callbacks running in a coroutine is the only reason why we have to do this polling, so make them non-coroutine callbacks again. None of the callers actually yield any more. This means that bdrv_drained_end() effectively doesn't poll any more, even if AIO_WAIT_WHILE() loops are still there (their condition is false from the beginning). This is generally not a problem, but in test-bdrv-drain, some additional explicit aio_poll() calls need to be added because the test case wants to verify the final state after BHs have executed. Signed-off-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Emanuele Giuseppe Esposito Reviewed-by: Hanna Reitz Message-Id: <20221118174110.55183-4-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 4 ++-- block/io.c | 49 ++++++---------------------------------- block/qed.c | 6 ++--- block/throttle.c | 8 +++---- include/block/block_int-common.h | 10 ++++---- tests/unit/test-bdrv-drain.c | 18 ++++++++------- 6 files changed, 32 insertions(+), 63 deletions(-) (limited to 'block.c') diff --git a/block.c b/block.c index 385ed3cd53..466770b9ac 100644 --- a/block.c +++ b/block.c @@ -1713,8 +1713,8 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, assert(is_power_of_2(bs->bl.request_alignment)); for (i = 0; i < bs->quiesce_counter; i++) { - if (drv->bdrv_co_drain_begin) { - drv->bdrv_co_drain_begin(bs); + if (drv->bdrv_drain_begin) { + drv->bdrv_drain_begin(bs); } } diff --git a/block/io.c b/block/io.c index b9424024f9..c2ed4b2af9 100644 --- a/block/io.c +++ b/block/io.c @@ -252,55 +252,20 @@ typedef struct { int *drained_end_counter; } BdrvCoDrainData; -static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) -{ - BdrvCoDrainData *data = opaque; - BlockDriverState *bs = data->bs; - - if (data->begin) { - bs->drv->bdrv_co_drain_begin(bs); - } else { - bs->drv->bdrv_co_drain_end(bs); - } - - /* Set data->done and decrement drained_end_counter before bdrv_wakeup() */ - qatomic_mb_set(&data->done, true); - if (!data->begin) { - qatomic_dec(data->drained_end_counter); - } - bdrv_dec_in_flight(bs); - - g_free(data); -} - -/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */ +/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, int *drained_end_counter) { - BdrvCoDrainData *data; - - if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) || - (!begin && !bs->drv->bdrv_co_drain_end)) { + if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) || + (!begin && !bs->drv->bdrv_drain_end)) { return; } - data = g_new(BdrvCoDrainData, 1); - *data = (BdrvCoDrainData) { - .bs = bs, - .done = false, - .begin = begin, - .drained_end_counter = drained_end_counter, - }; - - if (!begin) { - qatomic_inc(drained_end_counter); + if (begin) { + bs->drv->bdrv_drain_begin(bs); + } else { + bs->drv->bdrv_drain_end(bs); } - - /* Make sure the driver callback completes during the polling phase for - * drain_begin. */ - bdrv_inc_in_flight(bs); - data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data); - aio_co_schedule(bdrv_get_aio_context(bs), data->co); } /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ diff --git a/block/qed.c b/block/qed.c index af2fbec0b7..e8ee332542 100644 --- a/block/qed.c +++ b/block/qed.c @@ -262,7 +262,7 @@ static bool coroutine_fn qed_plug_allocating_write_reqs(BDRVQEDState *s) assert(!s->allocating_write_reqs_plugged); if (s->allocating_acb != NULL) { /* Another allocating write came concurrently. This cannot happen - * from bdrv_qed_co_drain_begin, but it can happen when the timer runs. + * from bdrv_qed_drain_begin, but it can happen when the timer runs. */ qemu_co_mutex_unlock(&s->table_lock); return false; @@ -365,7 +365,7 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs, } } -static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs) +static void bdrv_qed_drain_begin(BlockDriverState *bs) { BDRVQEDState *s = bs->opaque; @@ -1661,7 +1661,7 @@ static BlockDriver bdrv_qed = { .bdrv_co_check = bdrv_qed_co_check, .bdrv_detach_aio_context = bdrv_qed_detach_aio_context, .bdrv_attach_aio_context = bdrv_qed_attach_aio_context, - .bdrv_co_drain_begin = bdrv_qed_co_drain_begin, + .bdrv_drain_begin = bdrv_qed_drain_begin, }; static void bdrv_qed_init(void) diff --git a/block/throttle.c b/block/throttle.c index 131eba3ab4..88851c84f4 100644 --- a/block/throttle.c +++ b/block/throttle.c @@ -214,7 +214,7 @@ static void throttle_reopen_abort(BDRVReopenState *reopen_state) reopen_state->opaque = NULL; } -static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs) +static void throttle_drain_begin(BlockDriverState *bs) { ThrottleGroupMember *tgm = bs->opaque; if (qatomic_fetch_inc(&tgm->io_limits_disabled) == 0) { @@ -222,7 +222,7 @@ static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs) } } -static void coroutine_fn throttle_co_drain_end(BlockDriverState *bs) +static void throttle_drain_end(BlockDriverState *bs) { ThrottleGroupMember *tgm = bs->opaque; assert(tgm->io_limits_disabled); @@ -261,8 +261,8 @@ static BlockDriver bdrv_throttle = { .bdrv_reopen_commit = throttle_reopen_commit, .bdrv_reopen_abort = throttle_reopen_abort, - .bdrv_co_drain_begin = throttle_co_drain_begin, - .bdrv_co_drain_end = throttle_co_drain_end, + .bdrv_drain_begin = throttle_drain_begin, + .bdrv_drain_end = throttle_drain_end, .is_filter = true, .strong_runtime_opts = throttle_strong_runtime_opts, diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 31ae91e56e..40d646d1ed 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -735,17 +735,19 @@ struct BlockDriver { void (*bdrv_io_unplug)(BlockDriverState *bs); /** - * bdrv_co_drain_begin is called if implemented in the beginning of a + * bdrv_drain_begin is called if implemented in the beginning of a * drain operation to drain and stop any internal sources of requests in * the driver. - * bdrv_co_drain_end is called if implemented at the end of the drain. + * bdrv_drain_end is called if implemented at the end of the drain. * * They should be used by the driver to e.g. manage scheduled I/O * requests, or toggle an internal state. After the end of the drain new * requests will continue normally. + * + * Implementations of both functions must not call aio_poll(). */ - void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs); - void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs); + void (*bdrv_drain_begin)(BlockDriverState *bs); + void (*bdrv_drain_end)(BlockDriverState *bs); bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs); bool coroutine_fn (*bdrv_co_can_store_new_dirty_bitmap)( diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c index 24f34e24ad..695519ee02 100644 --- a/tests/unit/test-bdrv-drain.c +++ b/tests/unit/test-bdrv-drain.c @@ -46,7 +46,7 @@ static void coroutine_fn sleep_in_drain_begin(void *opaque) bdrv_dec_in_flight(bs); } -static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) +static void bdrv_test_drain_begin(BlockDriverState *bs) { BDRVTestState *s = bs->opaque; s->drain_count++; @@ -57,7 +57,7 @@ static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) } } -static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs) +static void bdrv_test_drain_end(BlockDriverState *bs) { BDRVTestState *s = bs->opaque; s->drain_count--; @@ -111,8 +111,8 @@ static BlockDriver bdrv_test = { .bdrv_close = bdrv_test_close, .bdrv_co_preadv = bdrv_test_co_preadv, - .bdrv_co_drain_begin = bdrv_test_co_drain_begin, - .bdrv_co_drain_end = bdrv_test_co_drain_end, + .bdrv_drain_begin = bdrv_test_drain_begin, + .bdrv_drain_end = bdrv_test_drain_end, .bdrv_child_perm = bdrv_default_perms, @@ -1703,6 +1703,7 @@ static void test_blockjob_commit_by_drained_end(void) bdrv_drained_begin(bs_child); g_assert(!job_has_completed); bdrv_drained_end(bs_child); + aio_poll(qemu_get_aio_context(), false); g_assert(job_has_completed); bdrv_unref(bs_parents[0]); @@ -1858,6 +1859,7 @@ static void test_drop_intermediate_poll(void) g_assert(!job_has_completed); ret = bdrv_drop_intermediate(chain[1], chain[0], NULL); + aio_poll(qemu_get_aio_context(), false); g_assert(ret == 0); g_assert(job_has_completed); @@ -1946,7 +1948,7 @@ static void coroutine_fn bdrv_replace_test_drain_co(void *opaque) * .was_drained. * Increment .drain_count. */ -static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs) +static void bdrv_replace_test_drain_begin(BlockDriverState *bs) { BDRVReplaceTestState *s = bs->opaque; @@ -1977,7 +1979,7 @@ static void coroutine_fn bdrv_replace_test_read_entry(void *opaque) * If .drain_count reaches 0 and the node has a backing file, issue a * read request. */ -static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs) +static void bdrv_replace_test_drain_end(BlockDriverState *bs) { BDRVReplaceTestState *s = bs->opaque; @@ -2002,8 +2004,8 @@ static BlockDriver bdrv_replace_test = { .bdrv_close = bdrv_replace_test_close, .bdrv_co_preadv = bdrv_replace_test_co_preadv, - .bdrv_co_drain_begin = bdrv_replace_test_co_drain_begin, - .bdrv_co_drain_end = bdrv_replace_test_co_drain_end, + .bdrv_drain_begin = bdrv_replace_test_drain_begin, + .bdrv_drain_end = bdrv_replace_test_drain_end, .bdrv_child_perm = bdrv_default_perms, }; -- cgit 1.4.1 From 2f65df6e16dea2d6e7212fa675f4779d9281e26f Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:40:59 +0100 Subject: block: Remove drained_end_counter drained_end_counter is unused now, nobody changes its value any more. It can be removed. In cases where we had two almost identical functions that only differed in whether the caller passes drained_end_counter, or whether they would poll for a local drained_end_counter to reach 0, these become a single function. Signed-off-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Emanuele Giuseppe Esposito Message-Id: <20221118174110.55183-5-kwolf@redhat.com> Reviewed-by: Hanna Reitz Signed-off-by: Kevin Wolf --- block.c | 5 +- block/block-backend.c | 4 +- block/io.c | 98 ++++++++++------------------------------ blockjob.c | 2 +- include/block/block-io.h | 24 ---------- include/block/block_int-common.h | 6 +-- 6 files changed, 30 insertions(+), 109 deletions(-) (limited to 'block.c') diff --git a/block.c b/block.c index 466770b9ac..ce71545551 100644 --- a/block.c +++ b/block.c @@ -1235,11 +1235,10 @@ static bool bdrv_child_cb_drained_poll(BdrvChild *child) return bdrv_drain_poll(bs, false, NULL, false); } -static void bdrv_child_cb_drained_end(BdrvChild *child, - int *drained_end_counter) +static void bdrv_child_cb_drained_end(BdrvChild *child) { BlockDriverState *bs = child->opaque; - bdrv_drained_end_no_poll(bs, drained_end_counter); + bdrv_drained_end(bs); } static int bdrv_child_cb_inactivate(BdrvChild *child) diff --git a/block/block-backend.c b/block/block-backend.c index bf0ea3cfed..91e1d33a58 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -129,7 +129,7 @@ static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format, } static void blk_root_drained_begin(BdrvChild *child); static bool blk_root_drained_poll(BdrvChild *child); -static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter); +static void blk_root_drained_end(BdrvChild *child); static void blk_root_change_media(BdrvChild *child, bool load); static void blk_root_resize(BdrvChild *child); @@ -2556,7 +2556,7 @@ static bool blk_root_drained_poll(BdrvChild *child) return busy || !!blk->in_flight; } -static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter) +static void blk_root_drained_end(BdrvChild *child) { BlockBackend *blk = child->opaque; assert(blk->quiesce_counter); diff --git a/block/io.c b/block/io.c index c2ed4b2af9..f4ca62b034 100644 --- a/block/io.c +++ b/block/io.c @@ -58,28 +58,19 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore, } } -static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c, - int *drained_end_counter) +void bdrv_parent_drained_end_single(BdrvChild *c) { + IO_OR_GS_CODE(); + assert(c->parent_quiesce_counter > 0); c->parent_quiesce_counter--; if (c->klass->drained_end) { - c->klass->drained_end(c, drained_end_counter); + c->klass->drained_end(c); } } -void bdrv_parent_drained_end_single(BdrvChild *c) -{ - int drained_end_counter = 0; - AioContext *ctx = bdrv_child_get_parent_aio_context(c); - IO_OR_GS_CODE(); - bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter); - AIO_WAIT_WHILE(ctx, qatomic_read(&drained_end_counter) > 0); -} - static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, - bool ignore_bds_parents, - int *drained_end_counter) + bool ignore_bds_parents) { BdrvChild *c; @@ -87,7 +78,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { continue; } - bdrv_parent_drained_end_single_no_poll(c, drained_end_counter); + bdrv_parent_drained_end_single(c); } } @@ -249,12 +240,10 @@ typedef struct { bool poll; BdrvChild *parent; bool ignore_bds_parents; - int *drained_end_counter; } BdrvCoDrainData; /* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */ -static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, - int *drained_end_counter) +static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) { if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) || (!begin && !bs->drv->bdrv_drain_end)) { @@ -305,8 +294,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, BdrvChild *parent, bool ignore_bds_parents, bool poll); static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, - BdrvChild *parent, bool ignore_bds_parents, - int *drained_end_counter); + BdrvChild *parent, bool ignore_bds_parents); static void bdrv_co_drain_bh_cb(void *opaque) { @@ -319,14 +307,12 @@ static void bdrv_co_drain_bh_cb(void *opaque) aio_context_acquire(ctx); bdrv_dec_in_flight(bs); if (data->begin) { - assert(!data->drained_end_counter); bdrv_do_drained_begin(bs, data->recursive, data->parent, data->ignore_bds_parents, data->poll); } else { assert(!data->poll); bdrv_do_drained_end(bs, data->recursive, data->parent, - data->ignore_bds_parents, - data->drained_end_counter); + data->ignore_bds_parents); } aio_context_release(ctx); } else { @@ -342,8 +328,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, bool begin, bool recursive, BdrvChild *parent, bool ignore_bds_parents, - bool poll, - int *drained_end_counter) + bool poll) { BdrvCoDrainData data; Coroutine *self = qemu_coroutine_self(); @@ -363,7 +348,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, .parent = parent, .ignore_bds_parents = ignore_bds_parents, .poll = poll, - .drained_end_counter = drained_end_counter, }; if (bs) { @@ -406,7 +390,7 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, } bdrv_parent_drained_begin(bs, parent, ignore_bds_parents); - bdrv_drain_invoke(bs, true, NULL); + bdrv_drain_invoke(bs, true); } static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, @@ -417,7 +401,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, if (qemu_in_coroutine()) { bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents, - poll, NULL); + poll); return; } @@ -461,38 +445,24 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs) /** * This function does not poll, nor must any of its recursively called - * functions. The *drained_end_counter pointee will be incremented - * once for every background operation scheduled, and decremented once - * the operation settles. Therefore, the pointer must remain valid - * until the pointee reaches 0. That implies that whoever sets up the - * pointee has to poll until it is 0. - * - * We use atomic operations to access *drained_end_counter, because - * (1) when called from bdrv_set_aio_context_ignore(), the subgraph of - * @bs may contain nodes in different AioContexts, - * (2) bdrv_drain_all_end() uses the same counter for all nodes, - * regardless of which AioContext they are in. + * functions. */ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, - BdrvChild *parent, bool ignore_bds_parents, - int *drained_end_counter) + BdrvChild *parent, bool ignore_bds_parents) { BdrvChild *child; int old_quiesce_counter; - assert(drained_end_counter != NULL); - if (qemu_in_coroutine()) { bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents, - false, drained_end_counter); + false); return; } assert(bs->quiesce_counter > 0); /* Re-enable things in child-to-parent order */ - bdrv_drain_invoke(bs, false, drained_end_counter); - bdrv_parent_drained_end(bs, parent, ignore_bds_parents, - drained_end_counter); + bdrv_drain_invoke(bs, false); + bdrv_parent_drained_end(bs, parent, ignore_bds_parents); old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); if (old_quiesce_counter == 1) { @@ -503,32 +473,21 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, assert(!ignore_bds_parents); bs->recursive_quiesce_counter--; QLIST_FOREACH(child, &bs->children, next) { - bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents, - drained_end_counter); + bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents); } } } void bdrv_drained_end(BlockDriverState *bs) { - int drained_end_counter = 0; IO_OR_GS_CODE(); - bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter); - BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); -} - -void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter) -{ - IO_CODE(); - bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter); + bdrv_do_drained_end(bs, false, NULL, false); } void bdrv_subtree_drained_end(BlockDriverState *bs) { - int drained_end_counter = 0; IO_OR_GS_CODE(); - bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter); - BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); + bdrv_do_drained_end(bs, true, NULL, false); } void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) @@ -543,16 +502,12 @@ void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) { - int drained_end_counter = 0; int i; IO_OR_GS_CODE(); for (i = 0; i < old_parent->recursive_quiesce_counter; i++) { - bdrv_do_drained_end(child->bs, true, child, false, - &drained_end_counter); + bdrv_do_drained_end(child->bs, true, child, false); } - - BDRV_POLL_WHILE(child->bs, qatomic_read(&drained_end_counter) > 0); } void bdrv_drain(BlockDriverState *bs) @@ -610,7 +565,7 @@ void bdrv_drain_all_begin(void) GLOBAL_STATE_CODE(); if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL); + bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true); return; } @@ -649,22 +604,19 @@ void bdrv_drain_all_begin(void) void bdrv_drain_all_end_quiesce(BlockDriverState *bs) { - int drained_end_counter = 0; GLOBAL_STATE_CODE(); g_assert(bs->quiesce_counter > 0); g_assert(!bs->refcnt); while (bs->quiesce_counter) { - bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter); + bdrv_do_drained_end(bs, false, NULL, true); } - BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); } void bdrv_drain_all_end(void) { BlockDriverState *bs = NULL; - int drained_end_counter = 0; GLOBAL_STATE_CODE(); /* @@ -680,13 +632,11 @@ void bdrv_drain_all_end(void) AioContext *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter); + bdrv_do_drained_end(bs, false, NULL, true); aio_context_release(aio_context); } assert(qemu_get_current_aio_context() == qemu_get_aio_context()); - AIO_WAIT_WHILE(NULL, qatomic_read(&drained_end_counter) > 0); - assert(bdrv_drain_all_count > 0); bdrv_drain_all_count--; } diff --git a/blockjob.c b/blockjob.c index 3c8f3543a2..b7daf2a9f6 100644 --- a/blockjob.c +++ b/blockjob.c @@ -120,7 +120,7 @@ static bool child_job_drained_poll(BdrvChild *c) } } -static void child_job_drained_end(BdrvChild *c, int *drained_end_counter) +static void child_job_drained_end(BdrvChild *c) { BlockJob *job = c->opaque; job_resume(&job->job); diff --git a/include/block/block-io.h b/include/block/block-io.h index b099d7db45..054e964c9b 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -237,21 +237,6 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset, int64_t bytes, BdrvRequestFlags read_flags, BdrvRequestFlags write_flags); -/** - * bdrv_drained_end_no_poll: - * - * Same as bdrv_drained_end(), but do not poll for the subgraph to - * actually become unquiesced. Therefore, no graph changes will occur - * with this function. - * - * *drained_end_counter is incremented for every background operation - * that is scheduled, and will be decremented for every operation once - * it settles. The caller must poll until it reaches 0. The counter - * should be accessed using atomic operations only. - */ -void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter); - - /* * "I/O or GS" API functions. These functions can run without * the BQL, but only in one specific iothread/main loop. @@ -311,9 +296,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); * bdrv_parent_drained_end_single: * * End a quiesced section for the parent of @c. - * - * This polls @bs's AioContext until all scheduled sub-drained_ends - * have settled, which may result in graph changes. */ void bdrv_parent_drained_end_single(BdrvChild *c); @@ -361,12 +343,6 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs); * bdrv_drained_end: * * End a quiescent section started by bdrv_drained_begin(). - * - * This polls @bs's AioContext until all scheduled sub-drained_ends - * have settled. On one hand, that may result in graph changes. On - * the other, this requires that the caller either runs in the main - * loop; or that all involved nodes (@bs and all of its parents) are - * in the caller's AioContext. */ void bdrv_drained_end(BlockDriverState *bs); diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 40d646d1ed..2b97576f6d 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -939,15 +939,11 @@ struct BdrvChildClass { * These functions must not change the graph (and therefore also must not * call aio_poll(), which could change the graph indirectly). * - * If drained_end() schedules background operations, it must atomically - * increment *drained_end_counter for each such operation and atomically - * decrement it once the operation has settled. - * * Note that this can be nested. If drained_begin() was called twice, new * I/O is allowed only after drained_end() was called twice, too. */ void (*drained_begin)(BdrvChild *child); - void (*drained_end)(BdrvChild *child, int *drained_end_counter); + void (*drained_end)(BdrvChild *child); /* * Returns whether the parent has pending requests for the child. This -- cgit 1.4.1 From 2e117866d7c96cc17e84cd2946fee1bf3292d814 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:41:01 +0100 Subject: block: Fix locking for bdrv_reopen_queue_child() Callers don't agree whether bdrv_reopen_queue_child() should be called with the AioContext lock held or not. Standardise on holding the lock (as done by QMP blockdev-reopen and the replication block driver) and fix bdrv_reopen() to do the same. Signed-off-by: Kevin Wolf Message-Id: <20221118174110.55183-7-kwolf@redhat.com> Reviewed-by: Hanna Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- block.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'block.c') diff --git a/block.c b/block.c index ce71545551..3266519455 100644 --- a/block.c +++ b/block.c @@ -4174,6 +4174,8 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, * bs_queue, or the existing bs_queue being used. * * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). + * + * To be called with bs->aio_context locked. */ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, BlockDriverState *bs, @@ -4332,6 +4334,7 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, return bs_queue; } +/* To be called with bs->aio_context locked */ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, BlockDriverState *bs, QDict *options, bool keep_old_opts) @@ -4492,11 +4495,11 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, GLOBAL_STATE_CODE(); bdrv_subtree_drained_begin(bs); + queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); + if (ctx != qemu_get_aio_context()) { aio_context_release(ctx); } - - queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); ret = bdrv_reopen_multiple(queue, errp); if (ctx != qemu_get_aio_context()) { -- cgit 1.4.1 From d22933acd2f470eeef779e4d444e848f76dcfaf8 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:41:02 +0100 Subject: block: Drain individual nodes during reopen bdrv_reopen() and friends use subtree drains as a lazy way of covering all the nodes they touch. Turns out that this lazy way is a lot more complicated than just draining the nodes individually, even not accounting for the additional complexity in the drain mechanism itself. Simplify the code by switching to draining the individual nodes that are already managed in the BlockReopenQueue anyway. Signed-off-by: Kevin Wolf Message-Id: <20221118174110.55183-8-kwolf@redhat.com> Reviewed-by: Hanna Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- block.c | 16 +++++++++------- block/replication.c | 6 ------ blockdev.c | 13 ------------- 3 files changed, 9 insertions(+), 26 deletions(-) (limited to 'block.c') diff --git a/block.c b/block.c index 3266519455..dd329a16ce 100644 --- a/block.c +++ b/block.c @@ -4173,7 +4173,7 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, * returns a pointer to bs_queue, which is either the newly allocated * bs_queue, or the existing bs_queue being used. * - * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). + * bs is drained here and undrained by bdrv_reopen_queue_free(). * * To be called with bs->aio_context locked. */ @@ -4195,12 +4195,10 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, int flags; QemuOpts *opts; - /* Make sure that the caller remembered to use a drained section. This is - * important to avoid graph changes between the recursive queuing here and - * bdrv_reopen_multiple(). */ - assert(bs->quiesce_counter > 0); GLOBAL_STATE_CODE(); + bdrv_drained_begin(bs); + if (bs_queue == NULL) { bs_queue = g_new0(BlockReopenQueue, 1); QTAILQ_INIT(bs_queue); @@ -4351,6 +4349,12 @@ void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue) if (bs_queue) { BlockReopenQueueEntry *bs_entry, *next; QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { + AioContext *ctx = bdrv_get_aio_context(bs_entry->state.bs); + + aio_context_acquire(ctx); + bdrv_drained_end(bs_entry->state.bs); + aio_context_release(ctx); + qobject_unref(bs_entry->state.explicit_options); qobject_unref(bs_entry->state.options); g_free(bs_entry); @@ -4494,7 +4498,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, GLOBAL_STATE_CODE(); - bdrv_subtree_drained_begin(bs); queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); if (ctx != qemu_get_aio_context()) { @@ -4505,7 +4508,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, if (ctx != qemu_get_aio_context()) { aio_context_acquire(ctx); } - bdrv_subtree_drained_end(bs); return ret; } diff --git a/block/replication.c b/block/replication.c index f1eed25e43..c62f48a874 100644 --- a/block/replication.c +++ b/block/replication.c @@ -374,9 +374,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, s->orig_secondary_read_only = bdrv_is_read_only(secondary_disk->bs); } - bdrv_subtree_drained_begin(hidden_disk->bs); - bdrv_subtree_drained_begin(secondary_disk->bs); - if (s->orig_hidden_read_only) { QDict *opts = qdict_new(); qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable); @@ -401,9 +398,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, aio_context_acquire(ctx); } } - - bdrv_subtree_drained_end(hidden_disk->bs); - bdrv_subtree_drained_end(secondary_disk->bs); } static void backup_job_cleanup(BlockDriverState *bs) diff --git a/blockdev.c b/blockdev.c index 75eef8535e..d2f80b0386 100644 --- a/blockdev.c +++ b/blockdev.c @@ -3515,8 +3515,6 @@ fail: void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) { BlockReopenQueue *queue = NULL; - GSList *drained = NULL; - GSList *p; /* Add each one of the BDS that we want to reopen to the queue */ for (; reopen_list != NULL; reopen_list = reopen_list->next) { @@ -3553,9 +3551,7 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) ctx = bdrv_get_aio_context(bs); aio_context_acquire(ctx); - bdrv_subtree_drained_begin(bs); queue = bdrv_reopen_queue(queue, bs, qdict, false); - drained = g_slist_prepend(drained, bs); aio_context_release(ctx); } @@ -3566,15 +3562,6 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) fail: bdrv_reopen_queue_free(queue); - for (p = drained; p; p = p->next) { - BlockDriverState *bs = p->data; - AioContext *ctx = bdrv_get_aio_context(bs); - - aio_context_acquire(ctx); - bdrv_subtree_drained_end(bs); - aio_context_release(ctx); - } - g_slist_free(drained); } void qmp_blockdev_del(const char *node_name, Error **errp) -- cgit 1.4.1 From 631086deefc32690ee56efed1c5b891dec31ae37 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:41:03 +0100 Subject: block: Don't use subtree drains in bdrv_drop_intermediate() Instead of using a subtree drain from the top node (which also drains child nodes of base that we're not even interested in), use a normal drain for base, which automatically drains all of the parents, too. Signed-off-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz Message-Id: <20221118174110.55183-9-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'block.c') diff --git a/block.c b/block.c index dd329a16ce..db043346d8 100644 --- a/block.c +++ b/block.c @@ -5600,7 +5600,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, GLOBAL_STATE_CODE(); bdrv_ref(top); - bdrv_subtree_drained_begin(top); + bdrv_drained_begin(base); if (!top->drv || !base->drv) { goto exit; @@ -5673,7 +5673,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, ret = 0; exit: - bdrv_subtree_drained_end(top); + bdrv_drained_end(base); bdrv_unref(top); return ret; } -- cgit 1.4.1 From 92140b9f3f07d80e2c27edcc6e32f392be2135e6 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:41:04 +0100 Subject: stream: Replace subtree drain with a single node drain The subtree drain was introduced in commit b1e1af394d9 as a way to avoid graph changes between finding the base node and changing the block graph as necessary on completion of the image streaming job. The block graph could change between these two points because bdrv_set_backing_hd() first drains the parent node, which involved polling and can do anything. Subtree draining was an imperfect way to make this less likely (because with it, fewer callbacks are called during this window). Everyone agreed that it's not really the right solution, and it was only committed as a stopgap solution. This replaces the subtree drain with a solution that simply drains the parent node before we try to find the base node, and then call a version of bdrv_set_backing_hd() that doesn't drain, but just asserts that the parent node is already drained. This way, any graph changes caused by draining happen before we start looking at the graph and things stay consistent between finding the base node and changing the graph. Signed-off-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz Message-Id: <20221118174110.55183-10-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 17 ++++++++++++++--- block/stream.c | 26 ++++++++++++++++---------- include/block/block-global-state.h | 3 +++ 3 files changed, 33 insertions(+), 13 deletions(-) (limited to 'block.c') diff --git a/block.c b/block.c index db043346d8..97bfb1494f 100644 --- a/block.c +++ b/block.c @@ -3426,14 +3426,15 @@ static int bdrv_set_backing_noperm(BlockDriverState *bs, return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp); } -int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, - Error **errp) +int bdrv_set_backing_hd_drained(BlockDriverState *bs, + BlockDriverState *backing_hd, + Error **errp) { int ret; Transaction *tran = tran_new(); GLOBAL_STATE_CODE(); - bdrv_drained_begin(bs); + assert(bs->quiesce_counter > 0); ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp); if (ret < 0) { @@ -3443,7 +3444,17 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, ret = bdrv_refresh_perms(bs, tran, errp); out: tran_finalize(tran, ret); + return ret; +} +int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + Error **errp) +{ + int ret; + GLOBAL_STATE_CODE(); + + bdrv_drained_begin(bs); + ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp); bdrv_drained_end(bs); return ret; diff --git a/block/stream.c b/block/stream.c index 694709bd25..8744ad103f 100644 --- a/block/stream.c +++ b/block/stream.c @@ -64,13 +64,16 @@ static int stream_prepare(Job *job) bdrv_cor_filter_drop(s->cor_filter_bs); s->cor_filter_bs = NULL; - bdrv_subtree_drained_begin(s->above_base); + /* + * bdrv_set_backing_hd() requires that unfiltered_bs is drained. Drain + * already here and use bdrv_set_backing_hd_drained() instead because + * the polling during drained_begin() might change the graph, and if we do + * this only later, we may end up working with the wrong base node (or it + * might even have gone away by the time we want to use it). + */ + bdrv_drained_begin(unfiltered_bs); base = bdrv_filter_or_cow_bs(s->above_base); - if (base) { - bdrv_ref(base); - } - unfiltered_base = bdrv_skip_filters(base); if (bdrv_cow_child(unfiltered_bs)) { @@ -82,7 +85,13 @@ static int stream_prepare(Job *job) } } - bdrv_set_backing_hd(unfiltered_bs, base, &local_err); + bdrv_set_backing_hd_drained(unfiltered_bs, base, &local_err); + + /* + * This call will do I/O, so the graph can change again from here on. + * We have already completed the graph change, so we are not in danger + * of operating on the wrong node any more if this happens. + */ ret = bdrv_change_backing_file(unfiltered_bs, base_id, base_fmt, false); if (local_err) { error_report_err(local_err); @@ -92,10 +101,7 @@ static int stream_prepare(Job *job) } out: - if (base) { - bdrv_unref(base); - } - bdrv_subtree_drained_end(s->above_base); + bdrv_drained_end(unfiltered_bs); return ret; } diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h index c7bd4a2088..00e0cf8aea 100644 --- a/include/block/block-global-state.h +++ b/include/block/block-global-state.h @@ -82,6 +82,9 @@ int bdrv_open_file_child(const char *filename, BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp); int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, Error **errp); +int bdrv_set_backing_hd_drained(BlockDriverState *bs, + BlockDriverState *backing_hd, + Error **errp); int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, const char *bdref_key, Error **errp); BlockDriverState *bdrv_open(const char *filename, const char *reference, -- cgit 1.4.1 From 299403aedaeb7f08d8e98aa8614b29d4e5546066 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:41:05 +0100 Subject: block: Remove subtree drains Subtree drains are not used any more. Remove them. After this, BdrvChildClass.attach/detach() don't poll any more. Signed-off-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz Message-Id: <20221118174110.55183-11-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 20 +-- block/io.c | 121 ++++-------------- include/block/block-io.h | 18 +-- include/block/block_int-common.h | 1 - include/block/block_int-io.h | 12 -- tests/unit/test-bdrv-drain.c | 261 ++------------------------------------- 6 files changed, 44 insertions(+), 389 deletions(-) (limited to 'block.c') diff --git a/block.c b/block.c index 97bfb1494f..7ea0b82049 100644 --- a/block.c +++ b/block.c @@ -1232,7 +1232,7 @@ static void bdrv_child_cb_drained_begin(BdrvChild *child) static bool bdrv_child_cb_drained_poll(BdrvChild *child) { BlockDriverState *bs = child->opaque; - return bdrv_drain_poll(bs, false, NULL, false); + return bdrv_drain_poll(bs, NULL, false); } static void bdrv_child_cb_drained_end(BdrvChild *child) @@ -1482,8 +1482,6 @@ static void bdrv_child_cb_attach(BdrvChild *child) assert(!bs->file); bs->file = child; } - - bdrv_apply_subtree_drain(child, bs); } static void bdrv_child_cb_detach(BdrvChild *child) @@ -1494,8 +1492,6 @@ static void bdrv_child_cb_detach(BdrvChild *child) bdrv_backing_detach(child); } - bdrv_unapply_subtree_drain(child, bs); - assert_bdrv_graph_writable(bs); QLIST_REMOVE(child, next); if (child == bs->backing) { @@ -2882,9 +2878,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, } if (old_bs) { - /* Detach first so that the recursive drain sections coming from @child - * are already gone and we only end the drain sections that came from - * elsewhere. */ if (child->klass->detach) { child->klass->detach(child); } @@ -2899,17 +2892,14 @@ static void bdrv_replace_child_noperm(BdrvChild *child, QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); /* - * Detaching the old node may have led to the new node's - * quiesce_counter having been decreased. Not a problem, we - * just need to recognize this here and then invoke - * drained_end appropriately more often. + * Polling in bdrv_parent_drained_begin_single() may have led to the new + * node's quiesce_counter having been decreased. Not a problem, we just + * need to recognize this here and then invoke drained_end appropriately + * more often. */ assert(new_bs->quiesce_counter <= new_bs_quiesce_counter); drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter; - /* Attach only after starting new drained sections, so that recursive - * drain sections coming from @child don't get an extra .drained_begin - * callback. */ if (child->klass->attach) { child->klass->attach(child); } diff --git a/block/io.c b/block/io.c index a25103be6f..75224480d0 100644 --- a/block/io.c +++ b/block/io.c @@ -236,17 +236,15 @@ typedef struct { BlockDriverState *bs; bool done; bool begin; - bool recursive; bool poll; BdrvChild *parent; bool ignore_bds_parents; } BdrvCoDrainData; /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ -bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, - BdrvChild *ignore_parent, bool ignore_bds_parents) +bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, + bool ignore_bds_parents) { - BdrvChild *child, *next; IO_OR_GS_CODE(); if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) { @@ -257,29 +255,19 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, return true; } - if (recursive) { - assert(!ignore_bds_parents); - QLIST_FOREACH_SAFE(child, &bs->children, next, next) { - if (bdrv_drain_poll(child->bs, recursive, child, false)) { - return true; - } - } - } - return false; } -static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive, +static bool bdrv_drain_poll_top_level(BlockDriverState *bs, BdrvChild *ignore_parent) { - return bdrv_drain_poll(bs, recursive, ignore_parent, false); + return bdrv_drain_poll(bs, ignore_parent, false); } -static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, - BdrvChild *parent, bool ignore_bds_parents, - bool poll); -static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, - BdrvChild *parent, bool ignore_bds_parents); +static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, + bool ignore_bds_parents, bool poll); +static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, + bool ignore_bds_parents); static void bdrv_co_drain_bh_cb(void *opaque) { @@ -292,12 +280,11 @@ static void bdrv_co_drain_bh_cb(void *opaque) aio_context_acquire(ctx); bdrv_dec_in_flight(bs); if (data->begin) { - bdrv_do_drained_begin(bs, data->recursive, data->parent, - data->ignore_bds_parents, data->poll); + bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents, + data->poll); } else { assert(!data->poll); - bdrv_do_drained_end(bs, data->recursive, data->parent, - data->ignore_bds_parents); + bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents); } aio_context_release(ctx); } else { @@ -310,7 +297,7 @@ static void bdrv_co_drain_bh_cb(void *opaque) } static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - bool begin, bool recursive, + bool begin, BdrvChild *parent, bool ignore_bds_parents, bool poll) @@ -329,7 +316,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, .bs = bs, .done = false, .begin = begin, - .recursive = recursive, .parent = parent, .ignore_bds_parents = ignore_bds_parents, .poll = poll, @@ -380,29 +366,16 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, } } -static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, - BdrvChild *parent, bool ignore_bds_parents, - bool poll) +static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, + bool ignore_bds_parents, bool poll) { - BdrvChild *child, *next; - if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents, - poll); + bdrv_co_yield_to_drain(bs, true, parent, ignore_bds_parents, poll); return; } bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents); - if (recursive) { - assert(!ignore_bds_parents); - bs->recursive_quiesce_counter++; - QLIST_FOREACH_SAFE(child, &bs->children, next, next) { - bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents, - false); - } - } - /* * Wait for drained requests to finish. * @@ -414,35 +387,27 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, */ if (poll) { assert(!ignore_bds_parents); - BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent)); + BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent)); } } void bdrv_drained_begin(BlockDriverState *bs) { IO_OR_GS_CODE(); - bdrv_do_drained_begin(bs, false, NULL, false, true); -} - -void bdrv_subtree_drained_begin(BlockDriverState *bs) -{ - IO_OR_GS_CODE(); - bdrv_do_drained_begin(bs, true, NULL, false, true); + bdrv_do_drained_begin(bs, NULL, false, true); } /** * This function does not poll, nor must any of its recursively called * functions. */ -static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, - BdrvChild *parent, bool ignore_bds_parents) +static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, + bool ignore_bds_parents) { - BdrvChild *child; int old_quiesce_counter; if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents, - false); + bdrv_co_yield_to_drain(bs, false, parent, ignore_bds_parents, false); return; } assert(bs->quiesce_counter > 0); @@ -457,46 +422,12 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, if (old_quiesce_counter == 1) { aio_enable_external(bdrv_get_aio_context(bs)); } - - if (recursive) { - assert(!ignore_bds_parents); - bs->recursive_quiesce_counter--; - QLIST_FOREACH(child, &bs->children, next) { - bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents); - } - } } void bdrv_drained_end(BlockDriverState *bs) { IO_OR_GS_CODE(); - bdrv_do_drained_end(bs, false, NULL, false); -} - -void bdrv_subtree_drained_end(BlockDriverState *bs) -{ - IO_OR_GS_CODE(); - bdrv_do_drained_end(bs, true, NULL, false); -} - -void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) -{ - int i; - IO_OR_GS_CODE(); - - for (i = 0; i < new_parent->recursive_quiesce_counter; i++) { - bdrv_do_drained_begin(child->bs, true, child, false, true); - } -} - -void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) -{ - int i; - IO_OR_GS_CODE(); - - for (i = 0; i < old_parent->recursive_quiesce_counter; i++) { - bdrv_do_drained_end(child->bs, true, child, false); - } + bdrv_do_drained_end(bs, NULL, false); } void bdrv_drain(BlockDriverState *bs) @@ -529,7 +460,7 @@ static bool bdrv_drain_all_poll(void) while ((bs = bdrv_next_all_states(bs))) { AioContext *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - result |= bdrv_drain_poll(bs, false, NULL, true); + result |= bdrv_drain_poll(bs, NULL, true); aio_context_release(aio_context); } @@ -554,7 +485,7 @@ void bdrv_drain_all_begin(void) GLOBAL_STATE_CODE(); if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true); + bdrv_co_yield_to_drain(NULL, true, NULL, true, true); return; } @@ -579,7 +510,7 @@ void bdrv_drain_all_begin(void) AioContext *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - bdrv_do_drained_begin(bs, false, NULL, true, false); + bdrv_do_drained_begin(bs, NULL, true, false); aio_context_release(aio_context); } @@ -599,7 +530,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs) g_assert(!bs->refcnt); while (bs->quiesce_counter) { - bdrv_do_drained_end(bs, false, NULL, true); + bdrv_do_drained_end(bs, NULL, true); } } @@ -621,7 +552,7 @@ void bdrv_drain_all_end(void) AioContext *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - bdrv_do_drained_end(bs, false, NULL, true); + bdrv_do_drained_end(bs, NULL, true); aio_context_release(aio_context); } diff --git a/include/block/block-io.h b/include/block/block-io.h index 054e964c9b..9c36a16a1f 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -302,8 +302,7 @@ void bdrv_parent_drained_end_single(BdrvChild *c); /** * bdrv_drain_poll: * - * Poll for pending requests in @bs, its parents (except for @ignore_parent), - * and if @recursive is true its children as well (used for subtree drain). + * Poll for pending requests in @bs and its parents (except for @ignore_parent). * * If @ignore_bds_parents is true, parents that are BlockDriverStates must * ignore the drain request because they will be drained separately (used for @@ -311,8 +310,8 @@ void bdrv_parent_drained_end_single(BdrvChild *c); * * This is part of bdrv_drained_begin. */ -bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, - BdrvChild *ignore_parent, bool ignore_bds_parents); +bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, + bool ignore_bds_parents); /** * bdrv_drained_begin: @@ -333,12 +332,6 @@ void bdrv_drained_begin(BlockDriverState *bs); void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent, bool ignore_bds_parents); -/** - * Like bdrv_drained_begin, but recursively begins a quiesced section for - * exclusive access to all child nodes as well. - */ -void bdrv_subtree_drained_begin(BlockDriverState *bs); - /** * bdrv_drained_end: * @@ -346,9 +339,4 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs); */ void bdrv_drained_end(BlockDriverState *bs); -/** - * End a quiescent section started by bdrv_subtree_drained_begin(). - */ -void bdrv_subtree_drained_end(BlockDriverState *bs); - #endif /* BLOCK_IO_H */ diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 2b97576f6d..791dddfd7d 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -1184,7 +1184,6 @@ struct BlockDriverState { /* Accessed with atomic ops. */ int quiesce_counter; - int recursive_quiesce_counter; unsigned int write_gen; /* Current data generation */ diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h index 4b0b3e17ef..8bc061ebb8 100644 --- a/include/block/block_int-io.h +++ b/include/block/block_int-io.h @@ -179,16 +179,4 @@ void bdrv_bsc_invalidate_range(BlockDriverState *bs, */ void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes); - -/* - * "I/O or GS" API functions. These functions can run without - * the BQL, but only in one specific iothread/main loop. - * - * See include/block/block-io.h for more information about - * the "I/O or GS" API. - */ - -void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); -void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); - #endif /* BLOCK_INT_IO_H */ diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c index 695519ee02..dda08de8db 100644 --- a/tests/unit/test-bdrv-drain.c +++ b/tests/unit/test-bdrv-drain.c @@ -156,7 +156,6 @@ static void call_in_coroutine(void (*entry)(void)) enum drain_type { BDRV_DRAIN_ALL, BDRV_DRAIN, - BDRV_SUBTREE_DRAIN, DRAIN_TYPE_MAX, }; @@ -165,7 +164,6 @@ static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs) switch (drain_type) { case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break; case BDRV_DRAIN: bdrv_drained_begin(bs); break; - case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_begin(bs); break; default: g_assert_not_reached(); } } @@ -175,7 +173,6 @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs) switch (drain_type) { case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break; case BDRV_DRAIN: bdrv_drained_end(bs); break; - case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_end(bs); break; default: g_assert_not_reached(); } } @@ -271,11 +268,6 @@ static void test_drv_cb_drain(void) test_drv_cb_common(BDRV_DRAIN, false); } -static void test_drv_cb_drain_subtree(void) -{ - test_drv_cb_common(BDRV_SUBTREE_DRAIN, true); -} - static void test_drv_cb_co_drain_all(void) { call_in_coroutine(test_drv_cb_drain_all); @@ -286,11 +278,6 @@ static void test_drv_cb_co_drain(void) call_in_coroutine(test_drv_cb_drain); } -static void test_drv_cb_co_drain_subtree(void) -{ - call_in_coroutine(test_drv_cb_drain_subtree); -} - static void test_quiesce_common(enum drain_type drain_type, bool recursive) { BlockBackend *blk; @@ -332,11 +319,6 @@ static void test_quiesce_drain(void) test_quiesce_common(BDRV_DRAIN, false); } -static void test_quiesce_drain_subtree(void) -{ - test_quiesce_common(BDRV_SUBTREE_DRAIN, true); -} - static void test_quiesce_co_drain_all(void) { call_in_coroutine(test_quiesce_drain_all); @@ -347,11 +329,6 @@ static void test_quiesce_co_drain(void) call_in_coroutine(test_quiesce_drain); } -static void test_quiesce_co_drain_subtree(void) -{ - call_in_coroutine(test_quiesce_drain_subtree); -} - static void test_nested(void) { BlockBackend *blk; @@ -402,158 +379,6 @@ static void test_nested(void) blk_unref(blk); } -static void test_multiparent(void) -{ - BlockBackend *blk_a, *blk_b; - BlockDriverState *bs_a, *bs_b, *backing; - BDRVTestState *a_s, *b_s, *backing_s; - - blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); - bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, - &error_abort); - a_s = bs_a->opaque; - blk_insert_bs(blk_a, bs_a, &error_abort); - - blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); - bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, - &error_abort); - b_s = bs_b->opaque; - blk_insert_bs(blk_b, bs_b, &error_abort); - - backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); - backing_s = backing->opaque; - bdrv_set_backing_hd(bs_a, backing, &error_abort); - bdrv_set_backing_hd(bs_b, backing, &error_abort); - - g_assert_cmpint(bs_a->quiesce_counter, ==, 0); - g_assert_cmpint(bs_b->quiesce_counter, ==, 0); - g_assert_cmpint(backing->quiesce_counter, ==, 0); - g_assert_cmpint(a_s->drain_count, ==, 0); - g_assert_cmpint(b_s->drain_count, ==, 0); - g_assert_cmpint(backing_s->drain_count, ==, 0); - - do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); - - g_assert_cmpint(bs_a->quiesce_counter, ==, 1); - g_assert_cmpint(bs_b->quiesce_counter, ==, 1); - g_assert_cmpint(backing->quiesce_counter, ==, 1); - g_assert_cmpint(a_s->drain_count, ==, 1); - g_assert_cmpint(b_s->drain_count, ==, 1); - g_assert_cmpint(backing_s->drain_count, ==, 1); - - do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); - - g_assert_cmpint(bs_a->quiesce_counter, ==, 2); - g_assert_cmpint(bs_b->quiesce_counter, ==, 2); - g_assert_cmpint(backing->quiesce_counter, ==, 2); - g_assert_cmpint(a_s->drain_count, ==, 2); - g_assert_cmpint(b_s->drain_count, ==, 2); - g_assert_cmpint(backing_s->drain_count, ==, 2); - - do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); - - g_assert_cmpint(bs_a->quiesce_counter, ==, 1); - g_assert_cmpint(bs_b->quiesce_counter, ==, 1); - g_assert_cmpint(backing->quiesce_counter, ==, 1); - g_assert_cmpint(a_s->drain_count, ==, 1); - g_assert_cmpint(b_s->drain_count, ==, 1); - g_assert_cmpint(backing_s->drain_count, ==, 1); - - do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); - - g_assert_cmpint(bs_a->quiesce_counter, ==, 0); - g_assert_cmpint(bs_b->quiesce_counter, ==, 0); - g_assert_cmpint(backing->quiesce_counter, ==, 0); - g_assert_cmpint(a_s->drain_count, ==, 0); - g_assert_cmpint(b_s->drain_count, ==, 0); - g_assert_cmpint(backing_s->drain_count, ==, 0); - - bdrv_unref(backing); - bdrv_unref(bs_a); - bdrv_unref(bs_b); - blk_unref(blk_a); - blk_unref(blk_b); -} - -static void test_graph_change_drain_subtree(void) -{ - BlockBackend *blk_a, *blk_b; - BlockDriverState *bs_a, *bs_b, *backing; - BDRVTestState *a_s, *b_s, *backing_s; - - blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); - bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, - &error_abort); - a_s = bs_a->opaque; - blk_insert_bs(blk_a, bs_a, &error_abort); - - blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); - bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, - &error_abort); - b_s = bs_b->opaque; - blk_insert_bs(blk_b, bs_b, &error_abort); - - backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); - backing_s = backing->opaque; - bdrv_set_backing_hd(bs_a, backing, &error_abort); - - g_assert_cmpint(bs_a->quiesce_counter, ==, 0); - g_assert_cmpint(bs_b->quiesce_counter, ==, 0); - g_assert_cmpint(backing->quiesce_counter, ==, 0); - g_assert_cmpint(a_s->drain_count, ==, 0); - g_assert_cmpint(b_s->drain_count, ==, 0); - g_assert_cmpint(backing_s->drain_count, ==, 0); - - do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); - do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); - do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); - do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); - do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); - - bdrv_set_backing_hd(bs_b, backing, &error_abort); - g_assert_cmpint(bs_a->quiesce_counter, ==, 5); - g_assert_cmpint(bs_b->quiesce_counter, ==, 5); - g_assert_cmpint(backing->quiesce_counter, ==, 5); - g_assert_cmpint(a_s->drain_count, ==, 5); - g_assert_cmpint(b_s->drain_count, ==, 5); - g_assert_cmpint(backing_s->drain_count, ==, 5); - - bdrv_set_backing_hd(bs_b, NULL, &error_abort); - g_assert_cmpint(bs_a->quiesce_counter, ==, 3); - g_assert_cmpint(bs_b->quiesce_counter, ==, 2); - g_assert_cmpint(backing->quiesce_counter, ==, 3); - g_assert_cmpint(a_s->drain_count, ==, 3); - g_assert_cmpint(b_s->drain_count, ==, 2); - g_assert_cmpint(backing_s->drain_count, ==, 3); - - bdrv_set_backing_hd(bs_b, backing, &error_abort); - g_assert_cmpint(bs_a->quiesce_counter, ==, 5); - g_assert_cmpint(bs_b->quiesce_counter, ==, 5); - g_assert_cmpint(backing->quiesce_counter, ==, 5); - g_assert_cmpint(a_s->drain_count, ==, 5); - g_assert_cmpint(b_s->drain_count, ==, 5); - g_assert_cmpint(backing_s->drain_count, ==, 5); - - do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); - do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); - do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); - do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); - do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); - - g_assert_cmpint(bs_a->quiesce_counter, ==, 0); - g_assert_cmpint(bs_b->quiesce_counter, ==, 0); - g_assert_cmpint(backing->quiesce_counter, ==, 0); - g_assert_cmpint(a_s->drain_count, ==, 0); - g_assert_cmpint(b_s->drain_count, ==, 0); - g_assert_cmpint(backing_s->drain_count, ==, 0); - - bdrv_unref(backing); - bdrv_unref(bs_a); - bdrv_unref(bs_b); - blk_unref(blk_a); - blk_unref(blk_b); -} - static void test_graph_change_drain_all(void) { BlockBackend *blk_a, *blk_b; @@ -773,12 +598,6 @@ static void test_iothread_drain(void) test_iothread_common(BDRV_DRAIN, 1); } -static void test_iothread_drain_subtree(void) -{ - test_iothread_common(BDRV_SUBTREE_DRAIN, 0); - test_iothread_common(BDRV_SUBTREE_DRAIN, 1); -} - typedef struct TestBlockJob { BlockJob common; @@ -863,7 +682,6 @@ enum test_job_result { enum test_job_drain_node { TEST_JOB_DRAIN_SRC, TEST_JOB_DRAIN_SRC_CHILD, - TEST_JOB_DRAIN_SRC_PARENT, }; static void test_blockjob_common_drain_node(enum drain_type drain_type, @@ -901,9 +719,6 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, case TEST_JOB_DRAIN_SRC_CHILD: drain_bs = src_backing; break; - case TEST_JOB_DRAIN_SRC_PARENT: - drain_bs = src_overlay; - break; default: g_assert_not_reached(); } @@ -1055,10 +870,6 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, TEST_JOB_DRAIN_SRC); test_blockjob_common_drain_node(drain_type, use_iothread, result, TEST_JOB_DRAIN_SRC_CHILD); - if (drain_type == BDRV_SUBTREE_DRAIN) { - test_blockjob_common_drain_node(drain_type, use_iothread, result, - TEST_JOB_DRAIN_SRC_PARENT); - } } static void test_blockjob_drain_all(void) @@ -1071,11 +882,6 @@ static void test_blockjob_drain(void) test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_SUCCESS); } -static void test_blockjob_drain_subtree(void) -{ - test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_SUCCESS); -} - static void test_blockjob_error_drain_all(void) { test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_RUN); @@ -1088,12 +894,6 @@ static void test_blockjob_error_drain(void) test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_PREPARE); } -static void test_blockjob_error_drain_subtree(void) -{ - test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_RUN); - test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_PREPARE); -} - static void test_blockjob_iothread_drain_all(void) { test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_SUCCESS); @@ -1104,11 +904,6 @@ static void test_blockjob_iothread_drain(void) test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_SUCCESS); } -static void test_blockjob_iothread_drain_subtree(void) -{ - test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_SUCCESS); -} - static void test_blockjob_iothread_error_drain_all(void) { test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_RUN); @@ -1121,12 +916,6 @@ static void test_blockjob_iothread_error_drain(void) test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_PREPARE); } -static void test_blockjob_iothread_error_drain_subtree(void) -{ - test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_RUN); - test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_PREPARE); -} - typedef struct BDRVTestTopState { BdrvChild *wait_child; @@ -1273,14 +1062,6 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, bdrv_drain(child_bs); bdrv_unref(child_bs); break; - case BDRV_SUBTREE_DRAIN: - /* Would have to ref/unref bs here for !detach_instead_of_delete, but - * then the whole test becomes pointless because the graph changes - * don't occur during the drain any more. */ - assert(detach_instead_of_delete); - bdrv_subtree_drained_begin(bs); - bdrv_subtree_drained_end(bs); - break; case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); bdrv_drain_all_end(); @@ -1315,11 +1096,6 @@ static void test_detach_by_drain(void) do_test_delete_by_drain(true, BDRV_DRAIN); } -static void test_detach_by_drain_subtree(void) -{ - do_test_delete_by_drain(true, BDRV_SUBTREE_DRAIN); -} - struct detach_by_parent_data { BlockDriverState *parent_b; @@ -1452,7 +1228,10 @@ static void test_detach_indirect(bool by_parent_cb) g_assert(acb != NULL); /* Drain and check the expected result */ - bdrv_subtree_drained_begin(parent_b); + bdrv_drained_begin(parent_b); + bdrv_drained_begin(a); + bdrv_drained_begin(b); + bdrv_drained_begin(c); g_assert(detach_by_parent_data.child_c != NULL); @@ -1467,12 +1246,15 @@ static void test_detach_indirect(bool by_parent_cb) g_assert(QLIST_NEXT(child_a, next) == NULL); g_assert_cmpint(parent_a->quiesce_counter, ==, 1); - g_assert_cmpint(parent_b->quiesce_counter, ==, 1); + g_assert_cmpint(parent_b->quiesce_counter, ==, 3); g_assert_cmpint(a->quiesce_counter, ==, 1); - g_assert_cmpint(b->quiesce_counter, ==, 0); + g_assert_cmpint(b->quiesce_counter, ==, 1); g_assert_cmpint(c->quiesce_counter, ==, 1); - bdrv_subtree_drained_end(parent_b); + bdrv_drained_end(parent_b); + bdrv_drained_end(a); + bdrv_drained_end(b); + bdrv_drained_end(c); bdrv_unref(parent_b); blk_unref(blk); @@ -2202,70 +1984,47 @@ int main(int argc, char **argv) g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain); - g_test_add_func("/bdrv-drain/driver-cb/drain_subtree", - test_drv_cb_drain_subtree); g_test_add_func("/bdrv-drain/driver-cb/co/drain_all", test_drv_cb_co_drain_all); g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain); - g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree", - test_drv_cb_co_drain_subtree); - g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); - g_test_add_func("/bdrv-drain/quiesce/drain_subtree", - test_quiesce_drain_subtree); g_test_add_func("/bdrv-drain/quiesce/co/drain_all", test_quiesce_co_drain_all); g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain); - g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree", - test_quiesce_co_drain_subtree); g_test_add_func("/bdrv-drain/nested", test_nested); - g_test_add_func("/bdrv-drain/multiparent", test_multiparent); - g_test_add_func("/bdrv-drain/graph-change/drain_subtree", - test_graph_change_drain_subtree); g_test_add_func("/bdrv-drain/graph-change/drain_all", test_graph_change_drain_all); g_test_add_func("/bdrv-drain/iothread/drain_all", test_iothread_drain_all); g_test_add_func("/bdrv-drain/iothread/drain", test_iothread_drain); - g_test_add_func("/bdrv-drain/iothread/drain_subtree", - test_iothread_drain_subtree); g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); - g_test_add_func("/bdrv-drain/blockjob/drain_subtree", - test_blockjob_drain_subtree); g_test_add_func("/bdrv-drain/blockjob/error/drain_all", test_blockjob_error_drain_all); g_test_add_func("/bdrv-drain/blockjob/error/drain", test_blockjob_error_drain); - g_test_add_func("/bdrv-drain/blockjob/error/drain_subtree", - test_blockjob_error_drain_subtree); g_test_add_func("/bdrv-drain/blockjob/iothread/drain_all", test_blockjob_iothread_drain_all); g_test_add_func("/bdrv-drain/blockjob/iothread/drain", test_blockjob_iothread_drain); - g_test_add_func("/bdrv-drain/blockjob/iothread/drain_subtree", - test_blockjob_iothread_drain_subtree); g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_all", test_blockjob_iothread_error_drain_all); g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain", test_blockjob_iothread_error_drain); - g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_subtree", - test_blockjob_iothread_error_drain_subtree); g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain); g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all); g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain); - g_test_add_func("/bdrv-drain/detach/drain_subtree", test_detach_by_drain_subtree); g_test_add_func("/bdrv-drain/detach/parent_cb", test_detach_by_parent_cb); g_test_add_func("/bdrv-drain/detach/driver_cb", test_detach_by_driver_cb); -- cgit 1.4.1 From 57e05be343f33f4e5899a8d8946a8596d68424a1 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:41:06 +0100 Subject: block: Call drain callbacks only once We only need to call both the BlockDriver's callback and the parent callbacks when going from undrained to drained or vice versa. A second drain section doesn't make a difference for the driver or the parent, they weren't supposed to send new requests before and after the second drain. One thing that gets in the way is the 'ignore_bds_parents' parameter in bdrv_do_drained_begin_quiesce() and bdrv_do_drained_end(): It means that bdrv_drain_all_begin() increases bs->quiesce_counter, but does not quiesce the parent through BdrvChildClass callbacks. If an additional drain section is started now, bs->quiesce_counter will be non-zero, but we would still need to quiesce the parent through BdrvChildClass in order to keep things consistent (and unquiesce it on the matching bdrv_drained_end(), even though the counter would not reach 0 yet as long as the bdrv_drain_all() section is still active). Instead of keeping track of this, let's just get rid of the parameter. It was introduced in commit 6cd5c9d7b2d as an optimisation so that during bdrv_drain_all(), we wouldn't recursively drain all parents up to the root for each node, resulting in quadratic complexity. As it happens, calling the callbacks only once solves the same problem, so as of this patch, we'll still have O(n) complexity and ignore_bds_parents is not needed any more. This patch only ignores the 'ignore_bds_parents' parameter. It will be removed in a separate patch. Signed-off-by: Kevin Wolf Reviewed-by: Hanna Reitz Message-Id: <20221118174110.55183-12-kwolf@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- block.c | 25 +++++++------------------ block/io.c | 30 ++++++++++++++++++------------ include/block/block_int-common.h | 8 ++++---- tests/unit/test-bdrv-drain.c | 16 ++++++++++------ 4 files changed, 39 insertions(+), 40 deletions(-) (limited to 'block.c') diff --git a/block.c b/block.c index 7ea0b82049..b8bab06e55 100644 --- a/block.c +++ b/block.c @@ -2855,7 +2855,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, { BlockDriverState *old_bs = child->bs; int new_bs_quiesce_counter; - int drain_saldo; assert(!child->frozen); assert(old_bs != new_bs); @@ -2865,16 +2864,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child, assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); } - new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); - drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter; - /* * If the new child node is drained but the old one was not, flush * all outstanding requests to the old child node. */ - while (drain_saldo > 0 && child->klass->drained_begin) { + new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); + if (new_bs_quiesce_counter && !child->quiesced_parent) { bdrv_parent_drained_begin_single(child, true); - drain_saldo--; } if (old_bs) { @@ -2890,16 +2886,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, if (new_bs) { assert_bdrv_graph_writable(new_bs); QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); - - /* - * Polling in bdrv_parent_drained_begin_single() may have led to the new - * node's quiesce_counter having been decreased. Not a problem, we just - * need to recognize this here and then invoke drained_end appropriately - * more often. - */ - assert(new_bs->quiesce_counter <= new_bs_quiesce_counter); - drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter; - if (child->klass->attach) { child->klass->attach(child); } @@ -2908,10 +2894,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child, /* * If the old child node was drained but the new one is not, allow * requests to come in only after the new node has been attached. + * + * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single() + * polls, which could have changed the value. */ - while (drain_saldo < 0 && child->klass->drained_end) { + new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); + if (!new_bs_quiesce_counter && child->quiesced_parent) { bdrv_parent_drained_end_single(child); - drain_saldo++; } } diff --git a/block/io.c b/block/io.c index 75224480d0..87d6f22ec4 100644 --- a/block/io.c +++ b/block/io.c @@ -62,8 +62,9 @@ void bdrv_parent_drained_end_single(BdrvChild *c) { IO_OR_GS_CODE(); - assert(c->parent_quiesce_counter > 0); - c->parent_quiesce_counter--; + assert(c->quiesced_parent); + c->quiesced_parent = false; + if (c->klass->drained_end) { c->klass->drained_end(c); } @@ -110,7 +111,10 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) { AioContext *ctx = bdrv_child_get_parent_aio_context(c); IO_OR_GS_CODE(); - c->parent_quiesce_counter++; + + assert(!c->quiesced_parent); + c->quiesced_parent = true; + if (c->klass->drained_begin) { c->klass->drained_begin(c); } @@ -358,11 +362,12 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, /* Stop things in parent-to-child order */ if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { aio_disable_external(bdrv_get_aio_context(bs)); - } - bdrv_parent_drained_begin(bs, parent, ignore_bds_parents); - if (bs->drv && bs->drv->bdrv_drain_begin) { - bs->drv->bdrv_drain_begin(bs); + /* TODO Remove ignore_bds_parents, we don't consider it any more */ + bdrv_parent_drained_begin(bs, parent, false); + if (bs->drv && bs->drv->bdrv_drain_begin) { + bs->drv->bdrv_drain_begin(bs); + } } } @@ -413,13 +418,14 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, assert(bs->quiesce_counter > 0); /* Re-enable things in child-to-parent order */ - if (bs->drv && bs->drv->bdrv_drain_end) { - bs->drv->bdrv_drain_end(bs); - } - bdrv_parent_drained_end(bs, parent, ignore_bds_parents); - old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); if (old_quiesce_counter == 1) { + if (bs->drv && bs->drv->bdrv_drain_end) { + bs->drv->bdrv_drain_end(bs); + } + /* TODO Remove ignore_bds_parents, we don't consider it any more */ + bdrv_parent_drained_end(bs, parent, false); + aio_enable_external(bdrv_get_aio_context(bs)); } } diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 791dddfd7d..a6bc6b7fe9 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -980,13 +980,13 @@ struct BdrvChild { bool frozen; /* - * How many times the parent of this child has been drained + * True if the parent of this child has been drained by this BdrvChild * (through klass->drained_*). - * Usually, this is equal to bs->quiesce_counter (potentially - * reduced by bdrv_drain_all_count). It may differ while the + * + * It is generally true if bs->quiesce_counter > 0. It may differ while the * child is entering or leaving a drained section. */ - int parent_quiesce_counter; + bool quiesced_parent; QLIST_ENTRY(BdrvChild) next; QLIST_ENTRY(BdrvChild) next_parent; diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c index dda08de8db..172bc6debc 100644 --- a/tests/unit/test-bdrv-drain.c +++ b/tests/unit/test-bdrv-drain.c @@ -296,7 +296,11 @@ static void test_quiesce_common(enum drain_type drain_type, bool recursive) do_drain_begin(drain_type, bs); - g_assert_cmpint(bs->quiesce_counter, ==, 1); + if (drain_type == BDRV_DRAIN_ALL) { + g_assert_cmpint(bs->quiesce_counter, ==, 2); + } else { + g_assert_cmpint(bs->quiesce_counter, ==, 1); + } g_assert_cmpint(backing->quiesce_counter, ==, !!recursive); do_drain_end(drain_type, bs); @@ -348,8 +352,8 @@ static void test_nested(void) for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) { for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) { - int backing_quiesce = (outer != BDRV_DRAIN) + - (inner != BDRV_DRAIN); + int backing_quiesce = (outer == BDRV_DRAIN_ALL) + + (inner == BDRV_DRAIN_ALL); g_assert_cmpint(bs->quiesce_counter, ==, 0); g_assert_cmpint(backing->quiesce_counter, ==, 0); @@ -359,10 +363,10 @@ static void test_nested(void) do_drain_begin(outer, bs); do_drain_begin(inner, bs); - g_assert_cmpint(bs->quiesce_counter, ==, 2); + g_assert_cmpint(bs->quiesce_counter, ==, 2 + !!backing_quiesce); g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce); - g_assert_cmpint(s->drain_count, ==, 2); - g_assert_cmpint(backing_s->drain_count, ==, backing_quiesce); + g_assert_cmpint(s->drain_count, ==, 1); + g_assert_cmpint(backing_s->drain_count, ==, !!backing_quiesce); do_drain_end(inner, bs); do_drain_end(outer, bs); -- cgit 1.4.1 From a82a3bd135078d14f1bb4b5e50f51e77d3748270 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:41:07 +0100 Subject: block: Remove ignore_bds_parents parameter from drain_begin/end. ignore_bds_parents is now ignored during drain_begin and drain_end, so we can just remove it there. It is still a valid optimisation for drain_all in bdrv_drained_poll(), so leave it around there. Signed-off-by: Kevin Wolf Message-Id: <20221118174110.55183-13-kwolf@redhat.com> Reviewed-by: Hanna Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- block.c | 2 +- block/io.c | 58 ++++++++++++++++++------------------------------ include/block/block-io.h | 3 +-- 3 files changed, 24 insertions(+), 39 deletions(-) (limited to 'block.c') diff --git a/block.c b/block.c index b8bab06e55..1a2a8d9de9 100644 --- a/block.c +++ b/block.c @@ -1226,7 +1226,7 @@ static char *bdrv_child_get_parent_desc(BdrvChild *c) static void bdrv_child_cb_drained_begin(BdrvChild *child) { BlockDriverState *bs = child->opaque; - bdrv_do_drained_begin_quiesce(bs, NULL, false); + bdrv_do_drained_begin_quiesce(bs, NULL); } static bool bdrv_child_cb_drained_poll(BdrvChild *child) diff --git a/block/io.c b/block/io.c index 87d6f22ec4..2e9503df6a 100644 --- a/block/io.c +++ b/block/io.c @@ -45,13 +45,12 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs); static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes, BdrvRequestFlags flags); -static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore, - bool ignore_bds_parents) +static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) { BdrvChild *c, *next; QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { - if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { + if (c == ignore) { continue; } bdrv_parent_drained_begin_single(c, false); @@ -70,13 +69,12 @@ void bdrv_parent_drained_end_single(BdrvChild *c) } } -static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, - bool ignore_bds_parents) +static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) { BdrvChild *c; QLIST_FOREACH(c, &bs->parents, next_parent) { - if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { + if (c == ignore) { continue; } bdrv_parent_drained_end_single(c); @@ -242,7 +240,6 @@ typedef struct { bool begin; bool poll; BdrvChild *parent; - bool ignore_bds_parents; } BdrvCoDrainData; /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ @@ -269,9 +266,8 @@ static bool bdrv_drain_poll_top_level(BlockDriverState *bs, } static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, - bool ignore_bds_parents, bool poll); -static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, - bool ignore_bds_parents); + bool poll); +static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent); static void bdrv_co_drain_bh_cb(void *opaque) { @@ -284,11 +280,10 @@ static void bdrv_co_drain_bh_cb(void *opaque) aio_context_acquire(ctx); bdrv_dec_in_flight(bs); if (data->begin) { - bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents, - data->poll); + bdrv_do_drained_begin(bs, data->parent, data->poll); } else { assert(!data->poll); - bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents); + bdrv_do_drained_end(bs, data->parent); } aio_context_release(ctx); } else { @@ -303,7 +298,6 @@ static void bdrv_co_drain_bh_cb(void *opaque) static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, bool begin, BdrvChild *parent, - bool ignore_bds_parents, bool poll) { BdrvCoDrainData data; @@ -321,7 +315,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, .done = false, .begin = begin, .parent = parent, - .ignore_bds_parents = ignore_bds_parents, .poll = poll, }; @@ -353,8 +346,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, } } -void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - BdrvChild *parent, bool ignore_bds_parents) +void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) { IO_OR_GS_CODE(); assert(!qemu_in_coroutine()); @@ -362,9 +354,7 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, /* Stop things in parent-to-child order */ if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { aio_disable_external(bdrv_get_aio_context(bs)); - - /* TODO Remove ignore_bds_parents, we don't consider it any more */ - bdrv_parent_drained_begin(bs, parent, false); + bdrv_parent_drained_begin(bs, parent); if (bs->drv && bs->drv->bdrv_drain_begin) { bs->drv->bdrv_drain_begin(bs); } @@ -372,14 +362,14 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, } static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, - bool ignore_bds_parents, bool poll) + bool poll) { if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(bs, true, parent, ignore_bds_parents, poll); + bdrv_co_yield_to_drain(bs, true, parent, poll); return; } - bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents); + bdrv_do_drained_begin_quiesce(bs, parent); /* * Wait for drained requests to finish. @@ -391,7 +381,6 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, * nodes. */ if (poll) { - assert(!ignore_bds_parents); BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent)); } } @@ -399,20 +388,19 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, void bdrv_drained_begin(BlockDriverState *bs) { IO_OR_GS_CODE(); - bdrv_do_drained_begin(bs, NULL, false, true); + bdrv_do_drained_begin(bs, NULL, true); } /** * This function does not poll, nor must any of its recursively called * functions. */ -static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, - bool ignore_bds_parents) +static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) { int old_quiesce_counter; if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(bs, false, parent, ignore_bds_parents, false); + bdrv_co_yield_to_drain(bs, false, parent, false); return; } assert(bs->quiesce_counter > 0); @@ -423,9 +411,7 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, if (bs->drv && bs->drv->bdrv_drain_end) { bs->drv->bdrv_drain_end(bs); } - /* TODO Remove ignore_bds_parents, we don't consider it any more */ - bdrv_parent_drained_end(bs, parent, false); - + bdrv_parent_drained_end(bs, parent); aio_enable_external(bdrv_get_aio_context(bs)); } } @@ -433,7 +419,7 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, void bdrv_drained_end(BlockDriverState *bs) { IO_OR_GS_CODE(); - bdrv_do_drained_end(bs, NULL, false); + bdrv_do_drained_end(bs, NULL); } void bdrv_drain(BlockDriverState *bs) @@ -491,7 +477,7 @@ void bdrv_drain_all_begin(void) GLOBAL_STATE_CODE(); if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(NULL, true, NULL, true, true); + bdrv_co_yield_to_drain(NULL, true, NULL, true); return; } @@ -516,7 +502,7 @@ void bdrv_drain_all_begin(void) AioContext *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - bdrv_do_drained_begin(bs, NULL, true, false); + bdrv_do_drained_begin(bs, NULL, false); aio_context_release(aio_context); } @@ -536,7 +522,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs) g_assert(!bs->refcnt); while (bs->quiesce_counter) { - bdrv_do_drained_end(bs, NULL, true); + bdrv_do_drained_end(bs, NULL); } } @@ -558,7 +544,7 @@ void bdrv_drain_all_end(void) AioContext *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - bdrv_do_drained_end(bs, NULL, true); + bdrv_do_drained_end(bs, NULL); aio_context_release(aio_context); } diff --git a/include/block/block-io.h b/include/block/block-io.h index 9c36a16a1f..8f5e75756a 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -329,8 +329,7 @@ void bdrv_drained_begin(BlockDriverState *bs); * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already * running requests to complete. */ -void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - BdrvChild *parent, bool ignore_bds_parents); +void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent); /** * bdrv_drained_end: -- cgit 1.4.1 From 23987471285a26397e3152a9244b652445fd36c4 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:41:09 +0100 Subject: block: Don't poll in bdrv_replace_child_noperm() In order to make sure that bdrv_replace_child_noperm() doesn't have to poll any more, get rid of the bdrv_parent_drained_begin_single() call. This is possible now because we can require that the parent is already drained through the child in question when the function is called and we don't call the parent drain callbacks more than once. The additional drain calls needed in callers cause the test case to run its code in the drain handler too early (bdrv_attach_child() drains now), so modify it to only enable the code after the test setup has completed. Signed-off-by: Kevin Wolf Message-Id: <20221118174110.55183-15-kwolf@redhat.com> Reviewed-by: Hanna Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- block.c | 103 +++++++++++++++++++++++++++++++++++++------ block/io.c | 2 +- include/block/block-io.h | 8 ++++ tests/unit/test-bdrv-drain.c | 10 +++++ 4 files changed, 108 insertions(+), 15 deletions(-) (limited to 'block.c') diff --git a/block.c b/block.c index 1a2a8d9de9..faaeca8472 100644 --- a/block.c +++ b/block.c @@ -2407,6 +2407,20 @@ static void bdrv_replace_child_abort(void *opaque) GLOBAL_STATE_CODE(); /* old_bs reference is transparently moved from @s to @s->child */ + if (!s->child->bs) { + /* + * The parents were undrained when removing old_bs from the child. New + * requests can't have been made, though, because the child was empty. + * + * TODO Make bdrv_replace_child_noperm() transactionable to avoid + * undraining the parent in the first place. Once this is done, having + * new_bs drained when calling bdrv_replace_child_tran() is not a + * requirement any more. + */ + bdrv_parent_drained_begin_single(s->child, false); + assert(!bdrv_parent_drained_poll_single(s->child)); + } + assert(s->child->quiesced_parent); bdrv_replace_child_noperm(s->child, s->old_bs); bdrv_unref(new_bs); } @@ -2422,12 +2436,19 @@ static TransactionActionDrv bdrv_replace_child_drv = { * * Note: real unref of old_bs is done only on commit. * + * Both @child->bs and @new_bs (if non-NULL) must be drained. @new_bs must be + * kept drained until the transaction is completed. + * * The function doesn't update permissions, caller is responsible for this. */ static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, Transaction *tran) { BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); + + assert(child->quiesced_parent); + assert(!new_bs || new_bs->quiesce_counter); + *s = (BdrvReplaceChildState) { .child = child, .old_bs = child->bs, @@ -2850,6 +2871,14 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) return permissions[qapi_perm]; } +/* + * Replaces the node that a BdrvChild points to without updating permissions. + * + * If @new_bs is non-NULL, the parent of @child must already be drained through + * @child. + * + * This function does not poll. + */ static void bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs) { @@ -2857,6 +2886,28 @@ static void bdrv_replace_child_noperm(BdrvChild *child, int new_bs_quiesce_counter; assert(!child->frozen); + + /* + * If we want to change the BdrvChild to point to a drained node as its new + * child->bs, we need to make sure that its new parent is drained, too. In + * other words, either child->quiesce_parent must already be true or we must + * be able to set it and keep the parent's quiesce_counter consistent with + * that, but without polling or starting new requests (this function + * guarantees that it doesn't poll, and starting new requests would be + * against the invariants of drain sections). + * + * To keep things simple, we pick the first option (child->quiesce_parent + * must already be true). We also generalise the rule a bit to make it + * easier to verify in callers and more likely to be covered in test cases: + * The parent must be quiesced through this child even if new_bs isn't + * currently drained. + * + * The only exception is for callers that always pass new_bs == NULL. In + * this case, we obviously never need to consider the case of a drained + * new_bs, so we can keep the callers simpler by allowing them not to drain + * the parent. + */ + assert(!new_bs || child->quiesced_parent); assert(old_bs != new_bs); GLOBAL_STATE_CODE(); @@ -2864,15 +2915,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); } - /* - * If the new child node is drained but the old one was not, flush - * all outstanding requests to the old child node. - */ - new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); - if (new_bs_quiesce_counter && !child->quiesced_parent) { - bdrv_parent_drained_begin_single(child, true); - } - if (old_bs) { if (child->klass->detach) { child->klass->detach(child); @@ -2892,11 +2934,9 @@ static void bdrv_replace_child_noperm(BdrvChild *child, } /* - * If the old child node was drained but the new one is not, allow - * requests to come in only after the new node has been attached. - * - * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single() - * polls, which could have changed the value. + * If the parent was drained through this BdrvChild previously, but new_bs + * is not drained, allow requests to come in only after the new node has + * been attached. */ new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); if (!new_bs_quiesce_counter && child->quiesced_parent) { @@ -3033,6 +3073,24 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs, } bdrv_ref(child_bs); + /* + * Let every new BdrvChild start with a drained parent. Inserting the child + * in the graph with bdrv_replace_child_noperm() will undrain it if + * @child_bs is not drained. + * + * The child was only just created and is not yet visible in global state + * until bdrv_replace_child_noperm() inserts it into the graph, so nobody + * could have sent requests and polling is not necessary. + * + * Note that this means that the parent isn't fully drained yet, we only + * stop new requests from coming in. This is fine, we don't care about the + * old requests here, they are not for this child. If another place enters a + * drain section for the same parent, but wants it to be fully quiesced, it + * will not run most of the the code in .drained_begin() again (which is not + * a problem, we already did this), but it will still poll until the parent + * is fully quiesced, so it will not be negatively affected either. + */ + bdrv_parent_drained_begin_single(new_child, false); bdrv_replace_child_noperm(new_child, child_bs); BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1); @@ -5078,12 +5136,24 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran) } if (child->bs) { + BlockDriverState *bs = child->bs; + bdrv_drained_begin(bs); bdrv_replace_child_tran(child, NULL, tran); + bdrv_drained_end(bs); } tran_add(tran, &bdrv_remove_child_drv, child); } +static void undrain_on_clean_cb(void *opaque) +{ + bdrv_drained_end(opaque); +} + +static TransactionActionDrv undrain_on_clean = { + .clean = undrain_on_clean_cb, +}; + static int bdrv_replace_node_noperm(BlockDriverState *from, BlockDriverState *to, bool auto_skip, Transaction *tran, @@ -5093,6 +5163,11 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, GLOBAL_STATE_CODE(); + bdrv_drained_begin(from); + bdrv_drained_begin(to); + tran_add(tran, &undrain_on_clean, from); + tran_add(tran, &undrain_on_clean, to); + QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { assert(c->bs == from); if (!should_update_child(c, to)) { diff --git a/block/io.c b/block/io.c index 5e9150d92c..ae64830eac 100644 --- a/block/io.c +++ b/block/io.c @@ -81,7 +81,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) } } -static bool bdrv_parent_drained_poll_single(BdrvChild *c) +bool bdrv_parent_drained_poll_single(BdrvChild *c) { if (c->klass->drained_poll) { return c->klass->drained_poll(c); diff --git a/include/block/block-io.h b/include/block/block-io.h index 8f5e75756a..65e6d2569b 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -292,6 +292,14 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); */ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); +/** + * bdrv_parent_drained_poll_single: + * + * Returns true if there is any pending activity to cease before @c can be + * called quiesced, false otherwise. + */ +bool bdrv_parent_drained_poll_single(BdrvChild *c); + /** * bdrv_parent_drained_end_single: * diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c index 172bc6debc..2686a8acee 100644 --- a/tests/unit/test-bdrv-drain.c +++ b/tests/unit/test-bdrv-drain.c @@ -1654,6 +1654,7 @@ static void test_drop_intermediate_poll(void) typedef struct BDRVReplaceTestState { + bool setup_completed; bool was_drained; bool was_undrained; bool has_read; @@ -1738,6 +1739,10 @@ static void bdrv_replace_test_drain_begin(BlockDriverState *bs) { BDRVReplaceTestState *s = bs->opaque; + if (!s->setup_completed) { + return; + } + if (!s->drain_count) { s->drain_co = qemu_coroutine_create(bdrv_replace_test_drain_co, bs); bdrv_inc_in_flight(bs); @@ -1769,6 +1774,10 @@ static void bdrv_replace_test_drain_end(BlockDriverState *bs) { BDRVReplaceTestState *s = bs->opaque; + if (!s->setup_completed) { + return; + } + g_assert(s->drain_count > 0); if (!--s->drain_count) { s->was_undrained = true; @@ -1867,6 +1876,7 @@ static void do_test_replace_child_mid_drain(int old_drain_count, bdrv_ref(old_child_bs); bdrv_attach_child(parent_bs, old_child_bs, "child", &child_of_bds, BDRV_CHILD_COW, &error_abort); + parent_s->setup_completed = true; for (i = 0; i < old_drain_count; i++) { bdrv_drained_begin(old_child_bs); -- cgit 1.4.1 From 606ed756c1d69cba4822be8923248d2fd714f069 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:41:10 +0100 Subject: block: Remove poll parameter from bdrv_parent_drained_begin_single() All callers of bdrv_parent_drained_begin_single() pass poll=false now, so we don't need the parameter any more. Signed-off-by: Kevin Wolf Message-Id: <20221118174110.55183-16-kwolf@redhat.com> Reviewed-by: Hanna Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- block.c | 4 ++-- block/io.c | 8 ++------ include/block/block-io.h | 5 ++--- 3 files changed, 6 insertions(+), 11 deletions(-) (limited to 'block.c') diff --git a/block.c b/block.c index faaeca8472..97073092c4 100644 --- a/block.c +++ b/block.c @@ -2417,7 +2417,7 @@ static void bdrv_replace_child_abort(void *opaque) * new_bs drained when calling bdrv_replace_child_tran() is not a * requirement any more. */ - bdrv_parent_drained_begin_single(s->child, false); + bdrv_parent_drained_begin_single(s->child); assert(!bdrv_parent_drained_poll_single(s->child)); } assert(s->child->quiesced_parent); @@ -3090,7 +3090,7 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs, * a problem, we already did this), but it will still poll until the parent * is fully quiesced, so it will not be negatively affected either. */ - bdrv_parent_drained_begin_single(new_child, false); + bdrv_parent_drained_begin_single(new_child); bdrv_replace_child_noperm(new_child, child_bs); BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1); diff --git a/block/io.c b/block/io.c index ae64830eac..38e57d1f67 100644 --- a/block/io.c +++ b/block/io.c @@ -53,7 +53,7 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) if (c == ignore) { continue; } - bdrv_parent_drained_begin_single(c, false); + bdrv_parent_drained_begin_single(c); } } @@ -105,9 +105,8 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore, return busy; } -void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) +void bdrv_parent_drained_begin_single(BdrvChild *c) { - AioContext *ctx = bdrv_child_get_parent_aio_context(c); IO_OR_GS_CODE(); assert(!c->quiesced_parent); @@ -116,9 +115,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) if (c->klass->drained_begin) { c->klass->drained_begin(c); } - if (poll) { - AIO_WAIT_WHILE(ctx, bdrv_parent_drained_poll_single(c)); - } } static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src) diff --git a/include/block/block-io.h b/include/block/block-io.h index 65e6d2569b..92aaa7c1e9 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -287,10 +287,9 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); /** * bdrv_parent_drained_begin_single: * - * Begin a quiesced section for the parent of @c. If @poll is true, wait for - * any pending activity to cease. + * Begin a quiesced section for the parent of @c. */ -void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); +void bdrv_parent_drained_begin_single(BdrvChild *c); /** * bdrv_parent_drained_poll_single: -- cgit 1.4.1 From a212e675cde67fb783b7f8e5b31dd02e9c880f58 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 28 Nov 2022 09:23:29 -0500 Subject: block: avoid duplicating filename string in bdrv_create We know that the string will stay around until the function returns, and the parameter of drv->bdrv_co_create_opts is const char*, so it must not be modified either. Suggested-by: Kevin Wolf Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20221128142337.657646-7-eesposit@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'block.c') diff --git a/block.c b/block.c index 97073092c4..9d77ec8818 100644 --- a/block.c +++ b/block.c @@ -551,7 +551,7 @@ int bdrv_create(BlockDriver *drv, const char* filename, Coroutine *co; CreateCo cco = { .drv = drv, - .filename = g_strdup(filename), + .filename = filename, .opts = opts, .ret = NOT_DONE, .err = NULL, @@ -559,8 +559,7 @@ int bdrv_create(BlockDriver *drv, const char* filename, if (!drv->bdrv_co_create_opts) { error_setg(errp, "Driver '%s' does not support image creation", drv->format_name); - ret = -ENOTSUP; - goto out; + return -ENOTSUP; } if (qemu_in_coroutine()) { @@ -583,8 +582,6 @@ int bdrv_create(BlockDriver *drv, const char* filename, } } -out: - g_free(cco.filename); return ret; } -- cgit 1.4.1 From 84bdf21f97e670d61615d44a95d2f33b41f849b1 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 28 Nov 2022 09:23:30 -0500 Subject: block: distinguish between bdrv_create running in coroutine and not Call two different functions depending on whether bdrv_create is in coroutine or not, following the same pattern as generated_co_wrapper functions. This allows to also call the coroutine function directly, without using CreateCo or relying in bdrv_create(). Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20221128142337.657646-8-eesposit@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 69 ++++++++++++++++++++++++++++++++--------------------------------- 1 file changed, 34 insertions(+), 35 deletions(-) (limited to 'block.c') diff --git a/block.c b/block.c index 9d77ec8818..0a1d484a27 100644 --- a/block.c +++ b/block.c @@ -526,63 +526,62 @@ typedef struct CreateCo { Error *err; } CreateCo; -static void coroutine_fn bdrv_create_co_entry(void *opaque) +static int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename, + QemuOpts *opts, Error **errp) { - Error *local_err = NULL; int ret; + GLOBAL_STATE_CODE(); + ERRP_GUARD(); + if (!drv->bdrv_co_create_opts) { + error_setg(errp, "Driver '%s' does not support image creation", + drv->format_name); + return -ENOTSUP; + } + + ret = drv->bdrv_co_create_opts(drv, filename, opts, errp); + if (ret < 0 && !*errp) { + error_setg_errno(errp, -ret, "Could not create image"); + } + + return ret; +} + +static void coroutine_fn bdrv_create_co_entry(void *opaque) +{ CreateCo *cco = opaque; - assert(cco->drv); GLOBAL_STATE_CODE(); - ret = cco->drv->bdrv_co_create_opts(cco->drv, - cco->filename, cco->opts, &local_err); - error_propagate(&cco->err, local_err); - cco->ret = ret; + cco->ret = bdrv_co_create(cco->drv, cco->filename, cco->opts, &cco->err); + aio_wait_kick(); } int bdrv_create(BlockDriver *drv, const char* filename, QemuOpts *opts, Error **errp) { - int ret; - GLOBAL_STATE_CODE(); - Coroutine *co; - CreateCo cco = { - .drv = drv, - .filename = filename, - .opts = opts, - .ret = NOT_DONE, - .err = NULL, - }; - - if (!drv->bdrv_co_create_opts) { - error_setg(errp, "Driver '%s' does not support image creation", drv->format_name); - return -ENOTSUP; - } - if (qemu_in_coroutine()) { /* Fast-path if already in coroutine context */ - bdrv_create_co_entry(&cco); + return bdrv_co_create(drv, filename, opts, errp); } else { + Coroutine *co; + CreateCo cco = { + .drv = drv, + .filename = filename, + .opts = opts, + .ret = NOT_DONE, + .err = NULL, + }; + co = qemu_coroutine_create(bdrv_create_co_entry, &cco); qemu_coroutine_enter(co); while (cco.ret == NOT_DONE) { aio_poll(qemu_get_aio_context(), true); } + error_propagate(errp, cco.err); + return cco.ret; } - - ret = cco.ret; - if (ret < 0) { - if (cco.err) { - error_propagate(errp, cco.err); - } else { - error_setg_errno(errp, -ret, "Could not create image"); - } - } - - return ret; } /** -- cgit 1.4.1 From 2475a0d0f4b0b450f25f7672c7072b4fdae6df00 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 28 Nov 2022 09:23:31 -0500 Subject: block: bdrv_create_file is a coroutine_fn It is always called in coroutine_fn callbacks, therefore it can directly call bdrv_co_create(). Rename it to bdrv_co_create_file too. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20221128142337.657646-9-eesposit@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 5 +++-- block/crypto.c | 2 +- block/parallels.c | 2 +- block/qcow.c | 2 +- block/qcow2.c | 4 ++-- block/qed.c | 2 +- block/raw-format.c | 2 +- block/vdi.c | 2 +- block/vhdx.c | 2 +- block/vmdk.c | 2 +- block/vpc.c | 2 +- include/block/block-global-state.h | 3 ++- 12 files changed, 16 insertions(+), 14 deletions(-) (limited to 'block.c') diff --git a/block.c b/block.c index 0a1d484a27..c8ac91eb63 100644 --- a/block.c +++ b/block.c @@ -719,7 +719,8 @@ out: return ret; } -int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) +int coroutine_fn bdrv_co_create_file(const char *filename, QemuOpts *opts, + Error **errp) { QemuOpts *protocol_opts; BlockDriver *drv; @@ -760,7 +761,7 @@ int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) goto out; } - ret = bdrv_create(drv, filename, protocol_opts, errp); + ret = bdrv_co_create(drv, filename, protocol_opts, errp); out: qemu_opts_del(protocol_opts); qobject_unref(qdict); diff --git a/block/crypto.c b/block/crypto.c index 2fb8add458..bbeb9f437c 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -703,7 +703,7 @@ static int coroutine_fn block_crypto_co_create_opts_luks(BlockDriver *drv, } /* Create protocol layer */ - ret = bdrv_create_file(filename, opts, errp); + ret = bdrv_co_create_file(filename, opts, errp); if (ret < 0) { goto fail; } diff --git a/block/parallels.c b/block/parallels.c index fa08c1104b..bbea2f2221 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -646,7 +646,7 @@ static int coroutine_fn parallels_co_create_opts(BlockDriver *drv, } /* Create and open the file (protocol layer) */ - ret = bdrv_create_file(filename, opts, errp); + ret = bdrv_co_create_file(filename, opts, errp); if (ret < 0) { goto done; } diff --git a/block/qcow.c b/block/qcow.c index 8bffc41531..5d99f00411 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -973,7 +973,7 @@ static int coroutine_fn qcow_co_create_opts(BlockDriver *drv, } /* Create and open the file (protocol layer) */ - ret = bdrv_create_file(filename, opts, errp); + ret = bdrv_co_create_file(filename, opts, errp); if (ret < 0) { goto fail; } diff --git a/block/qcow2.c b/block/qcow2.c index 941782a011..bafbd077b9 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -3871,7 +3871,7 @@ static int coroutine_fn qcow2_co_create_opts(BlockDriver *drv, } /* Create and open the file (protocol layer) */ - ret = bdrv_create_file(filename, opts, errp); + ret = bdrv_co_create_file(filename, opts, errp); if (ret < 0) { goto finish; } @@ -3886,7 +3886,7 @@ static int coroutine_fn qcow2_co_create_opts(BlockDriver *drv, /* Create and open an external data file (protocol layer) */ val = qdict_get_try_str(qdict, BLOCK_OPT_DATA_FILE); if (val) { - ret = bdrv_create_file(val, opts, errp); + ret = bdrv_co_create_file(val, opts, errp); if (ret < 0) { goto finish; } diff --git a/block/qed.c b/block/qed.c index e8ee332542..faa606618e 100644 --- a/block/qed.c +++ b/block/qed.c @@ -778,7 +778,7 @@ static int coroutine_fn bdrv_qed_co_create_opts(BlockDriver *drv, } /* Create and open the file (protocol layer) */ - ret = bdrv_create_file(filename, opts, errp); + ret = bdrv_co_create_file(filename, opts, errp); if (ret < 0) { goto fail; } diff --git a/block/raw-format.c b/block/raw-format.c index a68014ef0b..28905b09ee 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -433,7 +433,7 @@ static int coroutine_fn raw_co_create_opts(BlockDriver *drv, QemuOpts *opts, Error **errp) { - return bdrv_create_file(filename, opts, errp); + return bdrv_co_create_file(filename, opts, errp); } static int raw_open(BlockDriverState *bs, QDict *options, int flags, diff --git a/block/vdi.c b/block/vdi.c index c0c111c4b9..479bcfe820 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -934,7 +934,7 @@ static int coroutine_fn vdi_co_create_opts(BlockDriver *drv, qdict = qemu_opts_to_qdict_filtered(opts, NULL, &vdi_create_opts, true); /* Create and open the file (protocol layer) */ - ret = bdrv_create_file(filename, opts, errp); + ret = bdrv_co_create_file(filename, opts, errp); if (ret < 0) { goto done; } diff --git a/block/vhdx.c b/block/vhdx.c index bad9ca691b..4c929800fe 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -2084,7 +2084,7 @@ static int coroutine_fn vhdx_co_create_opts(BlockDriver *drv, } /* Create and open the file (protocol layer) */ - ret = bdrv_create_file(filename, opts, errp); + ret = bdrv_co_create_file(filename, opts, errp); if (ret < 0) { goto fail; } diff --git a/block/vmdk.c b/block/vmdk.c index 8204eb9ff4..8894dac2d4 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -2294,7 +2294,7 @@ static int coroutine_fn vmdk_create_extent(const char *filename, int ret; BlockBackend *blk = NULL; - ret = bdrv_create_file(filename, opts, errp); + ret = bdrv_co_create_file(filename, opts, errp); if (ret < 0) { goto exit; } diff --git a/block/vpc.c b/block/vpc.c index 95841f259a..6ee95dcb96 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -1111,7 +1111,7 @@ static int coroutine_fn vpc_co_create_opts(BlockDriver *drv, } /* Create and open the file (protocol layer) */ - ret = bdrv_create_file(filename, opts, errp); + ret = bdrv_co_create_file(filename, opts, errp); if (ret < 0) { goto fail; } diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h index 00e0cf8aea..387a7cbb2e 100644 --- a/include/block/block-global-state.h +++ b/include/block/block-global-state.h @@ -57,7 +57,8 @@ BlockDriver *bdrv_find_protocol(const char *filename, BlockDriver *bdrv_find_format(const char *format_name); int bdrv_create(BlockDriver *drv, const char* filename, QemuOpts *opts, Error **errp); -int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp); +int coroutine_fn bdrv_co_create_file(const char *filename, QemuOpts *opts, + Error **errp); BlockDriverState *bdrv_new(void); int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, -- cgit 1.4.1 From 741443eb4301eb130dab812c7ae7cfd71a68a679 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 28 Nov 2022 09:23:36 -0500 Subject: block: convert bdrv_create to co_wrapper This function is never called in coroutine context, therefore instead of manually creating a new coroutine, delegate it to the block-coroutine-wrapper script, defining it as co_wrapper. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20221128142337.657646-14-eesposit@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 41 ++------------------------------------ include/block/block-global-state.h | 8 ++++++-- 2 files changed, 8 insertions(+), 41 deletions(-) (limited to 'block.c') diff --git a/block.c b/block.c index c8ac91eb63..6191ac1f44 100644 --- a/block.c +++ b/block.c @@ -526,8 +526,8 @@ typedef struct CreateCo { Error *err; } CreateCo; -static int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename, - QemuOpts *opts, Error **errp) +int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename, + QemuOpts *opts, Error **errp) { int ret; GLOBAL_STATE_CODE(); @@ -547,43 +547,6 @@ static int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename, return ret; } -static void coroutine_fn bdrv_create_co_entry(void *opaque) -{ - CreateCo *cco = opaque; - GLOBAL_STATE_CODE(); - - cco->ret = bdrv_co_create(cco->drv, cco->filename, cco->opts, &cco->err); - aio_wait_kick(); -} - -int bdrv_create(BlockDriver *drv, const char* filename, - QemuOpts *opts, Error **errp) -{ - GLOBAL_STATE_CODE(); - - if (qemu_in_coroutine()) { - /* Fast-path if already in coroutine context */ - return bdrv_co_create(drv, filename, opts, errp); - } else { - Coroutine *co; - CreateCo cco = { - .drv = drv, - .filename = filename, - .opts = opts, - .ret = NOT_DONE, - .err = NULL, - }; - - co = qemu_coroutine_create(bdrv_create_co_entry, &cco); - qemu_coroutine_enter(co); - while (cco.ret == NOT_DONE) { - aio_poll(qemu_get_aio_context(), true); - } - error_propagate(errp, cco.err); - return cco.ret; - } -} - /** * Helper function for bdrv_create_file_fallback(): Resize @blk to at * least the given @minimum_size. diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h index 387a7cbb2e..1f8b54f2df 100644 --- a/include/block/block-global-state.h +++ b/include/block/block-global-state.h @@ -55,8 +55,12 @@ BlockDriver *bdrv_find_protocol(const char *filename, bool allow_protocol_prefix, Error **errp); BlockDriver *bdrv_find_format(const char *format_name); -int bdrv_create(BlockDriver *drv, const char* filename, - QemuOpts *opts, Error **errp); + +int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename, + QemuOpts *opts, Error **errp); +int co_wrapper bdrv_create(BlockDriver *drv, const char *filename, + QemuOpts *opts, Error **errp); + int coroutine_fn bdrv_co_create_file(const char *filename, QemuOpts *opts, Error **errp); -- cgit 1.4.1 From e13550558840422f980a0a71efe52ee83f37933d Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 7 Dec 2022 14:18:30 +0100 Subject: block: Fix locking in external_snapshot_prepare() bdrv_img_create() polls internally (when calling bdrv_create(), which is a co_wrapper), so it can't be called while holding the lock of any AioContext except the current one without causing deadlocks. Drop the lock around the call in external_snapshot_prepare(). Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-11-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf --- block.c | 4 ++++ blockdev.c | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'block.c') diff --git a/block.c b/block.c index 6191ac1f44..44d59362d6 100644 --- a/block.c +++ b/block.c @@ -6924,6 +6924,10 @@ bool bdrv_op_blocker_is_empty(BlockDriverState *bs) return true; } +/* + * Must not be called while holding the lock of an AioContext other than the + * current one. + */ void bdrv_img_create(const char *filename, const char *fmt, const char *base_filename, const char *base_fmt, char *options, uint64_t img_size, int flags, bool quiet, diff --git a/blockdev.c b/blockdev.c index d2f80b0386..ebf952cd21 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1507,10 +1507,14 @@ static void external_snapshot_prepare(BlkActionState *common, goto out; } bdrv_refresh_filename(state->old_bs); + + aio_context_release(aio_context); bdrv_img_create(new_image_file, format, state->old_bs->filename, state->old_bs->drv->format_name, NULL, size, flags, false, &local_err); + aio_context_acquire(aio_context); + if (local_err) { error_propagate(errp, local_err); goto out; -- cgit 1.4.1 From 293125078086027ee625b3fae23b374ad08f98c8 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Wed, 7 Dec 2022 14:18:31 +0100 Subject: block: wrlock in bdrv_replace_child_noperm Protect the main function where graph is modified. Signed-off-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-12-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'block.c') diff --git a/block.c b/block.c index 44d59362d6..df52c6b012 100644 --- a/block.c +++ b/block.c @@ -2836,8 +2836,6 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) * * If @new_bs is non-NULL, the parent of @child must already be drained through * @child. - * - * This function does not poll. */ static void bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs) @@ -2875,23 +2873,24 @@ static void bdrv_replace_child_noperm(BdrvChild *child, assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); } + /* TODO Pull this up into the callers to avoid polling here */ + bdrv_graph_wrlock(); if (old_bs) { if (child->klass->detach) { child->klass->detach(child); } - assert_bdrv_graph_writable(old_bs); QLIST_REMOVE(child, next_parent); } child->bs = new_bs; if (new_bs) { - assert_bdrv_graph_writable(new_bs); QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); if (child->klass->attach) { child->klass->attach(child); } } + bdrv_graph_wrunlock(); /* * If the parent was drained through this BdrvChild previously, but new_bs -- cgit 1.4.1 From 1af823923541ddfa0bfe51af5f40e9a8469e8992 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Wed, 7 Dec 2022 14:18:32 +0100 Subject: block: remove unnecessary assert_bdrv_graph_writable() We don't protect bdrv->aio_context with the graph rwlock, so these assertions are not needed Signed-off-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-13-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'block.c') diff --git a/block.c b/block.c index df52c6b012..bdffadcdaa 100644 --- a/block.c +++ b/block.c @@ -7214,7 +7214,6 @@ static void bdrv_detach_aio_context(BlockDriverState *bs) if (bs->quiesce_counter) { aio_enable_external(bs->aio_context); } - assert_bdrv_graph_writable(bs); bs->aio_context = NULL; } @@ -7228,7 +7227,6 @@ static void bdrv_attach_aio_context(BlockDriverState *bs, aio_disable_external(new_context); } - assert_bdrv_graph_writable(bs); bs->aio_context = new_context; if (bs->drv && bs->drv->bdrv_attach_aio_context) { @@ -7309,7 +7307,6 @@ static void bdrv_set_aio_context_commit(void *opaque) BlockDriverState *bs = (BlockDriverState *) state->bs; AioContext *new_context = state->new_ctx; AioContext *old_context = bdrv_get_aio_context(bs); - assert_bdrv_graph_writable(bs); /* * Take the old AioContex when detaching it from bs. -- cgit 1.4.1 From 3f35f82e04923affb3283b451b6d66880f266a5a Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Wed, 7 Dec 2022 14:18:33 +0100 Subject: block: assert that graph read and writes are performed correctly Remove the old assert_bdrv_graph_writable, and replace it with the new version using graph-lock API. See the function documentation for more information. Signed-off-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-14-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block.c | 4 ++-- block/graph-lock.c | 11 +++++++++++ include/block/block_int-global-state.h | 17 ----------------- include/block/graph-lock.h | 15 +++++++++++++++ 4 files changed, 28 insertions(+), 19 deletions(-) (limited to 'block.c') diff --git a/block.c b/block.c index bdffadcdaa..ff53b41af3 100644 --- a/block.c +++ b/block.c @@ -1406,7 +1406,7 @@ static void bdrv_child_cb_attach(BdrvChild *child) { BlockDriverState *bs = child->opaque; - assert_bdrv_graph_writable(bs); + assert_bdrv_graph_writable(); QLIST_INSERT_HEAD(&bs->children, child, next); if (bs->drv->is_filter || (child->role & BDRV_CHILD_FILTERED)) { /* @@ -1452,7 +1452,7 @@ static void bdrv_child_cb_detach(BdrvChild *child) bdrv_backing_detach(child); } - assert_bdrv_graph_writable(bs); + assert_bdrv_graph_writable(); QLIST_REMOVE(child, next); if (child == bs->backing) { assert(child != bs->file); diff --git a/block/graph-lock.c b/block/graph-lock.c index e033c6d9ac..c4d9d2c274 100644 --- a/block/graph-lock.c +++ b/block/graph-lock.c @@ -259,3 +259,14 @@ void bdrv_graph_rdunlock_main_loop(void) GLOBAL_STATE_CODE(); assert(!qemu_in_coroutine()); } + +void assert_bdrv_graph_readable(void) +{ + assert(qemu_in_main_thread() || reader_count()); +} + +void assert_bdrv_graph_writable(void) +{ + assert(qemu_in_main_thread()); + assert(qatomic_read(&has_writer)); +} diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h index b49f4eb35b..2f0993f6e9 100644 --- a/include/block/block_int-global-state.h +++ b/include/block/block_int-global-state.h @@ -310,21 +310,4 @@ void bdrv_remove_aio_context_notifier(BlockDriverState *bs, */ void bdrv_drain_all_end_quiesce(BlockDriverState *bs); -/** - * Make sure that the function is running under both drain and BQL. - * The latter protects from concurrent writings - * from the GS API, while the former prevents concurrent reads - * from I/O. - */ -static inline void assert_bdrv_graph_writable(BlockDriverState *bs) -{ - /* - * TODO: this function is incomplete. Because the users of this - * assert lack the necessary drains, check only for BQL. - * Once the necessary drains are added, - * assert also for qatomic_read(&bs->quiesce_counter) > 0 - */ - assert(qemu_in_main_thread()); -} - #endif /* BLOCK_INT_GLOBAL_STATE_H */ diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h index b27d8a5fb1..85e8a53b73 100644 --- a/include/block/graph-lock.h +++ b/include/block/graph-lock.h @@ -135,6 +135,21 @@ void coroutine_fn bdrv_graph_co_rdunlock(void); void bdrv_graph_rdlock_main_loop(void); void bdrv_graph_rdunlock_main_loop(void); +/* + * assert_bdrv_graph_readable: + * Make sure that the reader is either the main loop, + * or there is at least a reader helding the rdlock. + * In this way an incoming writer is aware of the read and waits. + */ +void assert_bdrv_graph_readable(void); + +/* + * assert_bdrv_graph_writable: + * Make sure that the writer is the main loop and has set @has_writer, + * so that incoming readers will pause. + */ +void assert_bdrv_graph_writable(void); + typedef struct GraphLockable { } GraphLockable; /* -- cgit 1.4.1 From 303de47b2c1e20a7f326ad11976d6006f5498709 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 7 Dec 2022 14:18:35 +0100 Subject: Mark assert_bdrv_graph_readable/writable() GRAPH_RD/WRLOCK Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-16-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf --- block.c | 4 ++-- include/block/block_int-common.h | 4 ++-- include/block/graph-lock.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'block.c') diff --git a/block.c b/block.c index ff53b41af3..1a82fd101a 100644 --- a/block.c +++ b/block.c @@ -1402,7 +1402,7 @@ static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format, *child_flags = flags; } -static void bdrv_child_cb_attach(BdrvChild *child) +static void GRAPH_WRLOCK bdrv_child_cb_attach(BdrvChild *child) { BlockDriverState *bs = child->opaque; @@ -1444,7 +1444,7 @@ static void bdrv_child_cb_attach(BdrvChild *child) } } -static void bdrv_child_cb_detach(BdrvChild *child) +static void GRAPH_WRLOCK bdrv_child_cb_detach(BdrvChild *child) { BlockDriverState *bs = child->opaque; diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index a6bc6b7fe9..b1f0d88307 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -898,8 +898,8 @@ struct BdrvChildClass { void (*activate)(BdrvChild *child, Error **errp); int (*inactivate)(BdrvChild *child); - void (*attach)(BdrvChild *child); - void (*detach)(BdrvChild *child); + void GRAPH_WRLOCK_PTR (*attach)(BdrvChild *child); + void GRAPH_WRLOCK_PTR (*detach)(BdrvChild *child); /* * Notifies the parent that the filename of its child has changed (e.g. diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h index 33c05b331a..4c92cd8edf 100644 --- a/include/block/graph-lock.h +++ b/include/block/graph-lock.h @@ -176,14 +176,14 @@ bdrv_graph_rdunlock_main_loop(void); * or there is at least a reader helding the rdlock. * In this way an incoming writer is aware of the read and waits. */ -void assert_bdrv_graph_readable(void); +void GRAPH_RDLOCK assert_bdrv_graph_readable(void); /* * assert_bdrv_graph_writable: * Make sure that the writer is the main loop and has set @has_writer, * so that incoming readers will pause. */ -void assert_bdrv_graph_writable(void); +void GRAPH_WRLOCK assert_bdrv_graph_writable(void); /* * Calling this function tells TSA that we know that the lock is effectively -- cgit 1.4.1 From 1b3ff9feb942c2ad0b01ac931e99ad451ab0ef39 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 7 Dec 2022 14:18:38 +0100 Subject: block: GRAPH_RDLOCK for functions only called by co_wrappers The generated coroutine wrappers already take care to take the lock in the non-coroutine path, and assume that the lock is already taken in the coroutine path. The only thing we need to do for the wrapped function is adding the GRAPH_RDLOCK annotation. Doing so also allows us to mark the corresponding callbacks in BlockDriver as GRAPH_RDLOCK_PTR. Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-19-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf --- block.c | 2 ++ block/coroutines.h | 17 ++++++++++------- block/io.c | 2 ++ include/block/block_int-common.h | 20 +++++++++----------- 4 files changed, 23 insertions(+), 18 deletions(-) (limited to 'block.c') diff --git a/block.c b/block.c index 1a82fd101a..9c2ac757e4 100644 --- a/block.c +++ b/block.c @@ -5402,6 +5402,7 @@ int coroutine_fn bdrv_co_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) { IO_CODE(); + assert_bdrv_graph_readable(); if (bs->drv == NULL) { return -ENOMEDIUM; } @@ -6617,6 +6618,7 @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp) IO_CODE(); assert(!(bs->open_flags & BDRV_O_INACTIVE)); + assert_bdrv_graph_readable(); if (bs->drv->bdrv_co_invalidate_cache) { bs->drv->bdrv_co_invalidate_cache(bs, &local_err); diff --git a/block/coroutines.h b/block/coroutines.h index 48e9081aa1..2a1e0b3c9d 100644 --- a/block/coroutines.h +++ b/block/coroutines.h @@ -37,9 +37,11 @@ * the I/O API. */ -int coroutine_fn bdrv_co_check(BlockDriverState *bs, - BdrvCheckResult *res, BdrvCheckMode fix); -int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp); +int coroutine_fn GRAPH_RDLOCK +bdrv_co_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix); + +int coroutine_fn GRAPH_RDLOCK +bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp); int coroutine_fn bdrv_co_common_block_status_above(BlockDriverState *bs, @@ -53,10 +55,11 @@ bdrv_co_common_block_status_above(BlockDriverState *bs, BlockDriverState **file, int *depth); -int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs, - QEMUIOVector *qiov, int64_t pos); -int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs, - QEMUIOVector *qiov, int64_t pos); +int coroutine_fn GRAPH_RDLOCK +bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); + +int coroutine_fn GRAPH_RDLOCK +bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); int coroutine_fn nbd_co_do_establish_connection(BlockDriverState *bs, bool blocking, diff --git a/block/io.c b/block/io.c index d160d2e273..d87788dfbb 100644 --- a/block/io.c +++ b/block/io.c @@ -2697,6 +2697,7 @@ bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) BlockDriverState *child_bs = bdrv_primary_bs(bs); int ret; IO_CODE(); + assert_bdrv_graph_readable(); ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL); if (ret < 0) { @@ -2729,6 +2730,7 @@ bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) BlockDriverState *child_bs = bdrv_primary_bs(bs); int ret; IO_CODE(); + assert_bdrv_graph_readable(); ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL); if (ret < 0) { diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index b1f0d88307..c34c525fa6 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -641,8 +641,8 @@ struct BlockDriver { /* * Invalidate any cached meta-data. */ - void coroutine_fn (*bdrv_co_invalidate_cache)(BlockDriverState *bs, - Error **errp); + void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_invalidate_cache)( + BlockDriverState *bs, Error **errp); /* * Flushes all data for all layers by calling bdrv_co_flush for underlying @@ -701,12 +701,11 @@ struct BlockDriver { Error **errp); BlockStatsSpecific *(*bdrv_get_specific_stats)(BlockDriverState *bs); - int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs, - QEMUIOVector *qiov, - int64_t pos); - int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs, - QEMUIOVector *qiov, - int64_t pos); + int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_save_vmstate)( + BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); + + int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_load_vmstate)( + BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); /* removable device specific */ bool (*bdrv_is_inserted)(BlockDriverState *bs); @@ -724,9 +723,8 @@ struct BlockDriver { * Returns 0 for completed check, -errno for internal errors. * The check results are stored in result. */ - int coroutine_fn (*bdrv_co_check)(BlockDriverState *bs, - BdrvCheckResult *result, - BdrvCheckMode fix); + int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_check)( + BlockDriverState *bs, BdrvCheckResult *result, BdrvCheckMode fix); void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event); -- cgit 1.4.1