diff options
Diffstat (limited to 'block.c')
| -rw-r--r-- | block.c | 348 |
1 files changed, 251 insertions, 97 deletions
diff --git a/block.c b/block.c index 8da89aaa62..e7f349b25c 100644 --- a/block.c +++ b/block.c @@ -91,9 +91,11 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, static bool bdrv_recurse_has_child(BlockDriverState *bs, BlockDriverState *child); -static void bdrv_replace_child_noperm(BdrvChild *child, - BlockDriverState *new_bs); -static void bdrv_remove_child(BdrvChild *child, Transaction *tran); +static void GRAPH_WRLOCK +bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs); + +static void GRAPH_WRLOCK +bdrv_remove_child(BdrvChild *child, Transaction *tran); static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, @@ -1699,7 +1701,9 @@ bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, open_failed: bs->drv = NULL; if (bs->file != NULL) { + bdrv_graph_wrlock(NULL); bdrv_unref_child(bs, bs->file); + bdrv_graph_wrunlock(); assert(!bs->file); } g_free(bs->opaque); @@ -2115,7 +2119,6 @@ static int bdrv_fill_options(QDict **options, const char *filename, typedef struct BlockReopenQueueEntry { bool prepared; - bool perms_checked; BDRVReopenState state; QTAILQ_ENTRY(BlockReopenQueueEntry) entry; } BlockReopenQueueEntry; @@ -2201,7 +2204,8 @@ static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp) return false; } -static bool bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp) +static bool GRAPH_RDLOCK +bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp) { BdrvChild *a, *b; GLOBAL_STATE_CODE(); @@ -2226,11 +2230,12 @@ static bool bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp) return false; } -static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, - BdrvChild *c, BdrvChildRole role, - BlockReopenQueue *reopen_queue, - uint64_t parent_perm, uint64_t parent_shared, - uint64_t *nperm, uint64_t *nshared) +static void GRAPH_RDLOCK +bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, + BdrvChild *c, BdrvChildRole role, + BlockReopenQueue *reopen_queue, + uint64_t parent_perm, uint64_t parent_shared, + uint64_t *nperm, uint64_t *nshared) { assert(bs->drv && bs->drv->bdrv_child_perm); GLOBAL_STATE_CODE(); @@ -2254,8 +2259,8 @@ static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, * simplest way to satisfy this criteria: use only result of * bdrv_topological_dfs() or NULL as @list parameter. */ -static GSList *bdrv_topological_dfs(GSList *list, GHashTable *found, - BlockDriverState *bs) +static GSList * GRAPH_RDLOCK +bdrv_topological_dfs(GSList *list, GHashTable *found, BlockDriverState *bs) { BdrvChild *child; g_autoptr(GHashTable) local_found = NULL; @@ -2318,7 +2323,7 @@ static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, tran_add(tran, &bdrv_child_set_pem_drv, s); } -static void bdrv_drv_set_perm_commit(void *opaque) +static void GRAPH_RDLOCK bdrv_drv_set_perm_commit(void *opaque) { BlockDriverState *bs = opaque; uint64_t cumulative_perms, cumulative_shared_perms; @@ -2331,7 +2336,7 @@ static void bdrv_drv_set_perm_commit(void *opaque) } } -static void bdrv_drv_set_perm_abort(void *opaque) +static void GRAPH_RDLOCK bdrv_drv_set_perm_abort(void *opaque) { BlockDriverState *bs = opaque; GLOBAL_STATE_CODE(); @@ -2346,9 +2351,13 @@ TransactionActionDrv bdrv_drv_set_perm_drv = { .commit = bdrv_drv_set_perm_commit, }; -static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm, - uint64_t shared_perm, Transaction *tran, - Error **errp) +/* + * After calling this function, the transaction @tran may only be completed + * while holding a reader lock for the graph. + */ +static int GRAPH_RDLOCK +bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared_perm, + Transaction *tran, Error **errp) { GLOBAL_STATE_CODE(); if (!bs->drv) { @@ -2374,20 +2383,22 @@ typedef struct BdrvReplaceChildState { BlockDriverState *old_bs; } BdrvReplaceChildState; -static void bdrv_replace_child_commit(void *opaque) +static void GRAPH_WRLOCK bdrv_replace_child_commit(void *opaque) { BdrvReplaceChildState *s = opaque; GLOBAL_STATE_CODE(); - bdrv_unref(s->old_bs); + bdrv_schedule_unref(s->old_bs); } -static void bdrv_replace_child_abort(void *opaque) +static void GRAPH_WRLOCK bdrv_replace_child_abort(void *opaque) { BdrvReplaceChildState *s = opaque; BlockDriverState *new_bs = s->child->bs; GLOBAL_STATE_CODE(); + assert_bdrv_graph_writable(); + /* old_bs reference is transparently moved from @s to @s->child */ if (!s->child->bs) { /* @@ -2404,6 +2415,7 @@ static void bdrv_replace_child_abort(void *opaque) } assert(s->child->quiesced_parent); bdrv_replace_child_noperm(s->child, s->old_bs); + bdrv_unref(new_bs); } @@ -2421,10 +2433,14 @@ static TransactionActionDrv bdrv_replace_child_drv = { * Both @child->bs and @new_bs (if non-NULL) must be drained. @new_bs must be * kept drained until the transaction is completed. * + * After calling this function, the transaction @tran may only be completed + * while holding a writer lock for the graph. + * * The function doesn't update permissions, caller is responsible for this. */ -static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, - Transaction *tran) +static void GRAPH_WRLOCK +bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, + Transaction *tran) { BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); @@ -2440,6 +2456,7 @@ static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, if (new_bs) { bdrv_ref(new_bs); } + bdrv_replace_child_noperm(child, new_bs); /* old_bs reference is transparently moved from @child to @s */ } @@ -2447,9 +2464,13 @@ static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, /* * Refresh permissions in @bs subtree. The function is intended to be called * after some graph modification that was done without permission update. + * + * After calling this function, the transaction @tran may only be completed + * while holding a reader lock for the graph. */ -static int bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q, - Transaction *tran, Error **errp) +static int GRAPH_RDLOCK +bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q, + Transaction *tran, Error **errp) { BlockDriver *drv = bs->drv; BdrvChild *c; @@ -2522,9 +2543,13 @@ static int bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q, /* * @list is a product of bdrv_topological_dfs() (may be called several times) - * a topologically sorted subgraph. + * + * After calling this function, the transaction @tran may only be completed + * while holding a reader lock for the graph. */ -static int bdrv_do_refresh_perms(GSList *list, BlockReopenQueue *q, - Transaction *tran, Error **errp) +static int GRAPH_RDLOCK +bdrv_do_refresh_perms(GSList *list, BlockReopenQueue *q, Transaction *tran, + Error **errp) { int ret; BlockDriverState *bs; @@ -2550,9 +2575,13 @@ static int bdrv_do_refresh_perms(GSList *list, BlockReopenQueue *q, * @list is any list of nodes. List is completed by all subtrees and * topologically sorted. It's not a problem if some node occurs in the @list * several times. + * + * After calling this function, the transaction @tran may only be completed + * while holding a reader lock for the graph. */ -static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q, - Transaction *tran, Error **errp) +static int GRAPH_RDLOCK +bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q, Transaction *tran, + Error **errp) { g_autoptr(GHashTable) found = g_hash_table_new(NULL, NULL); g_autoptr(GSList) refresh_list = NULL; @@ -2611,9 +2640,14 @@ char *bdrv_perm_names(uint64_t perm) } -/* @tran is allowed to be NULL. In this case no rollback is possible */ -static int bdrv_refresh_perms(BlockDriverState *bs, Transaction *tran, - Error **errp) +/* + * @tran is allowed to be NULL. In this case no rollback is possible. + * + * After calling this function, the transaction @tran may only be completed + * while holding a reader lock for the graph. + */ +static int GRAPH_RDLOCK +bdrv_refresh_perms(BlockDriverState *bs, Transaction *tran, Error **errp) { int ret; Transaction *local_tran = NULL; @@ -2859,8 +2893,8 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) * If @new_bs is non-NULL, the parent of @child must already be drained through * @child and the caller must hold the AioContext lock for @new_bs. */ -static void bdrv_replace_child_noperm(BdrvChild *child, - BlockDriverState *new_bs) +static void GRAPH_WRLOCK +bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs) { BlockDriverState *old_bs = child->bs; int new_bs_quiesce_counter; @@ -2895,8 +2929,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); } - /* TODO Pull this up into the callers to avoid polling here */ - bdrv_graph_wrlock(new_bs); if (old_bs) { if (child->klass->detach) { child->klass->detach(child); @@ -2912,7 +2944,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, child->klass->attach(child); } } - bdrv_graph_wrunlock(); /* * If the parent was drained through this BdrvChild previously, but new_bs @@ -2947,12 +2978,14 @@ typedef struct BdrvAttachChildCommonState { AioContext *old_child_ctx; } BdrvAttachChildCommonState; -static void bdrv_attach_child_common_abort(void *opaque) +static void GRAPH_WRLOCK bdrv_attach_child_common_abort(void *opaque) { BdrvAttachChildCommonState *s = opaque; BlockDriverState *bs = s->child->bs; GLOBAL_STATE_CODE(); + assert_bdrv_graph_writable(); + bdrv_replace_child_noperm(s->child, NULL); if (bdrv_get_aio_context(bs) != s->old_child_ctx) { @@ -2977,7 +3010,7 @@ static void bdrv_attach_child_common_abort(void *opaque) tran_commit(tran); } - bdrv_unref(bs); + bdrv_schedule_unref(bs); bdrv_child_free(s->child); } @@ -2991,19 +3024,23 @@ static TransactionActionDrv bdrv_attach_child_common_drv = { * * Function doesn't update permissions, caller is responsible for this. * + * After calling this function, the transaction @tran may only be completed + * while holding a writer lock for the graph. + * * Returns new created child. * * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and * @child_bs can move to a different AioContext in this function. Callers must * make sure that their AioContext locking is still correct after this. */ -static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs, - const char *child_name, - const BdrvChildClass *child_class, - BdrvChildRole child_role, - uint64_t perm, uint64_t shared_perm, - void *opaque, - Transaction *tran, Error **errp) +static BdrvChild * GRAPH_WRLOCK +bdrv_attach_child_common(BlockDriverState *child_bs, + const char *child_name, + const BdrvChildClass *child_class, + BdrvChildRole child_role, + uint64_t perm, uint64_t shared_perm, + void *opaque, + Transaction *tran, Error **errp) { BdrvChild *new_child; AioContext *parent_ctx, *new_child_ctx; @@ -3106,14 +3143,18 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs, * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and * @child_bs can move to a different AioContext in this function. Callers must * make sure that their AioContext locking is still correct after this. + * + * After calling this function, the transaction @tran may only be completed + * while holding a writer lock for the graph. */ -static BdrvChild *bdrv_attach_child_noperm(BlockDriverState *parent_bs, - BlockDriverState *child_bs, - const char *child_name, - const BdrvChildClass *child_class, - BdrvChildRole child_role, - Transaction *tran, - Error **errp) +static BdrvChild * GRAPH_WRLOCK +bdrv_attach_child_noperm(BlockDriverState *parent_bs, + BlockDriverState *child_bs, + const char *child_name, + const BdrvChildClass *child_class, + BdrvChildRole child_role, + Transaction *tran, + Error **errp) { uint64_t perm, shared_perm; @@ -3158,6 +3199,8 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, GLOBAL_STATE_CODE(); + bdrv_graph_wrlock(child_bs); + child = bdrv_attach_child_common(child_bs, child_name, child_class, child_role, perm, shared_perm, opaque, tran, errp); @@ -3170,6 +3213,7 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, out: tran_finalize(tran, ret); + bdrv_graph_wrunlock(); bdrv_unref(child_bs); @@ -3215,7 +3259,7 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, out: tran_finalize(tran, ret); - bdrv_unref(child_bs); + bdrv_schedule_unref(child_bs); return ret < 0 ? NULL : child; } @@ -3245,7 +3289,7 @@ void bdrv_root_unref_child(BdrvChild *child) NULL); } - bdrv_unref(child_bs); + bdrv_schedule_unref(child_bs); } typedef struct BdrvSetInheritsFrom { @@ -3289,8 +3333,9 @@ static void bdrv_set_inherits_from(BlockDriverState *bs, * @root that point to @root, where necessary. * @tran is allowed to be NULL. In this case no rollback is possible */ -static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child, - Transaction *tran) +static void GRAPH_WRLOCK +bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child, + Transaction *tran) { BdrvChild *c; @@ -3327,7 +3372,8 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) } -static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load) +static void GRAPH_RDLOCK +bdrv_parent_cb_change_media(BlockDriverState *bs, bool load) { BdrvChild *c; GLOBAL_STATE_CODE(); @@ -3368,16 +3414,23 @@ static BdrvChildRole bdrv_backing_role(BlockDriverState *bs) * Sets the bs->backing or bs->file link of a BDS. A new reference is created; * callers which don't need their own reference any more must call bdrv_unref(). * + * If the respective child is already present (i.e. we're detaching a node), + * that child node must be drained. + * * Function doesn't update permissions, caller is responsible for this. * * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and * @child_bs can move to a different AioContext in this function. Callers must * make sure that their AioContext locking is still correct after this. + * + * After calling this function, the transaction @tran may only be completed + * while holding a writer lock for the graph. */ -static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs, - BlockDriverState *child_bs, - bool is_backing, - Transaction *tran, Error **errp) +static int GRAPH_WRLOCK +bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs, + BlockDriverState *child_bs, + bool is_backing, + Transaction *tran, Error **errp) { bool update_inherits_from = bdrv_inherits_from_recursive(child_bs, parent_bs); @@ -3428,6 +3481,7 @@ static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs, } if (child) { + assert(child->bs->quiesce_counter); bdrv_unset_inherits_from(parent_bs, child, tran); bdrv_remove_child(child, tran); } @@ -3454,9 +3508,7 @@ static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs, } out: - bdrv_graph_rdlock_main_loop(); bdrv_refresh_limits(parent_bs, tran, NULL); - bdrv_graph_rdunlock_main_loop(); return 0; } @@ -3465,10 +3517,17 @@ out: * The caller must hold the AioContext lock for @backing_hd. Both @bs and * @backing_hd can move to a different AioContext in this function. Callers must * make sure that their AioContext locking is still correct after this. + * + * If a backing child is already present (i.e. we're detaching a node), that + * child node must be drained. + * + * After calling this function, the transaction @tran may only be completed + * while holding a writer lock for the graph. */ -static int bdrv_set_backing_noperm(BlockDriverState *bs, - BlockDriverState *backing_hd, - Transaction *tran, Error **errp) +static int GRAPH_WRLOCK +bdrv_set_backing_noperm(BlockDriverState *bs, + BlockDriverState *backing_hd, + Transaction *tran, Error **errp) { GLOBAL_STATE_CODE(); return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp); @@ -3483,6 +3542,10 @@ int bdrv_set_backing_hd_drained(BlockDriverState *bs, GLOBAL_STATE_CODE(); assert(bs->quiesce_counter > 0); + if (bs->backing) { + assert(bs->backing->bs->quiesce_counter > 0); + } + bdrv_graph_wrlock(backing_hd); ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp); if (ret < 0) { @@ -3492,18 +3555,22 @@ int bdrv_set_backing_hd_drained(BlockDriverState *bs, ret = bdrv_refresh_perms(bs, tran, errp); out: tran_finalize(tran, ret); + bdrv_graph_wrunlock(); return ret; } int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, Error **errp) { + BlockDriverState *drain_bs = bs->backing ? bs->backing->bs : bs; int ret; GLOBAL_STATE_CODE(); - bdrv_drained_begin(bs); + bdrv_ref(drain_bs); + bdrv_drained_begin(drain_bs); ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp); - bdrv_drained_end(bs); + bdrv_drained_end(drain_bs); + bdrv_unref(drain_bs); return ret; } @@ -3713,11 +3780,13 @@ BdrvChild *bdrv_open_child(const char *filename, return NULL; } + bdrv_graph_wrlock(NULL); ctx = bdrv_get_aio_context(bs); aio_context_acquire(ctx); child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, errp); aio_context_release(ctx); + bdrv_graph_wrunlock(); return child; } @@ -3902,6 +3971,9 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, GLOBAL_STATE_CODE(); assert(!qemu_in_coroutine()); + /* TODO We'll eventually have to take a writer lock in this function */ + GRAPH_RDLOCK_GUARD_MAINLOOP(); + if (reference) { bool options_non_empty = options ? qdict_size(options) : false; qobject_unref(options); @@ -4541,7 +4613,10 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) * reconfiguring the fd and that's why it does it in raw_check_perm(), not * in raw_reopen_prepare() which is called with "old" permissions. */ + bdrv_graph_rdlock_main_loop(); ret = bdrv_list_refresh_perms(refresh_list, bs_queue, tran, errp); + bdrv_graph_rdunlock_main_loop(); + if (ret < 0) { goto abort; } @@ -4562,7 +4637,9 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) aio_context_release(ctx); } + bdrv_graph_wrlock(NULL); tran_commit(tran); + bdrv_graph_wrunlock(); QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { BlockDriverState *bs = bs_entry->state.bs; @@ -4579,7 +4656,10 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) goto cleanup; abort: + bdrv_graph_wrlock(NULL); tran_abort(tran); + bdrv_graph_wrunlock(); + QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { if (bs_entry->prepared) { ctx = bdrv_get_aio_context(bs_entry->state.bs); @@ -4645,6 +4725,9 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, * true and reopen_state->new_backing_bs contains a pointer to the new * backing BlockDriverState (or NULL). * + * After calling this function, the transaction @tran may only be completed + * while holding a writer lock for the graph. + * * Return 0 on success, otherwise return < 0 and set @errp. * * The caller must hold the AioContext lock of @reopen_state->bs. @@ -4729,6 +4812,11 @@ static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, reopen_state->old_file_bs = old_child_bs; } + if (old_child_bs) { + bdrv_ref(old_child_bs); + bdrv_drained_begin(old_child_bs); + } + old_ctx = bdrv_get_aio_context(bs); ctx = bdrv_get_aio_context(new_child_bs); if (old_ctx != ctx) { @@ -4736,14 +4824,23 @@ static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, aio_context_acquire(ctx); } + bdrv_graph_wrlock(new_child_bs); + ret = bdrv_set_file_or_backing_noperm(bs, new_child_bs, is_backing, tran, errp); + bdrv_graph_wrunlock(); + if (old_ctx != ctx) { aio_context_release(ctx); aio_context_acquire(old_ctx); } + if (old_child_bs) { + bdrv_drained_end(old_child_bs); + bdrv_unref(old_child_bs); + } + return ret; } @@ -4764,6 +4861,9 @@ static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, * commit() for any other BDS that have been left in a prepare() state * * The caller must hold the AioContext lock of @reopen_state->bs. + * + * After calling this function, the transaction @change_child_tran may only be + * completed while holding a writer lock for the graph. */ static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, @@ -5065,9 +5165,11 @@ static void bdrv_close(BlockDriverState *bs) bs->drv = NULL; } + bdrv_graph_wrlock(NULL); QLIST_FOREACH_SAFE(child, &bs->children, next, next) { bdrv_unref_child(bs, child); } + bdrv_graph_wrunlock(); assert(!bs->backing); assert(!bs->file); @@ -5122,7 +5224,7 @@ void bdrv_close_all(void) assert(QTAILQ_EMPTY(&all_bdrv_states)); } -static bool should_update_child(BdrvChild *c, BlockDriverState *to) +static bool GRAPH_RDLOCK should_update_child(BdrvChild *c, BlockDriverState *to) { GQueue *queue; GHashTable *found; @@ -5211,45 +5313,47 @@ static TransactionActionDrv bdrv_remove_child_drv = { .commit = bdrv_remove_child_commit, }; -/* Function doesn't update permissions, caller is responsible for this. */ -static void bdrv_remove_child(BdrvChild *child, Transaction *tran) +/* + * Function doesn't update permissions, caller is responsible for this. + * + * @child->bs (if non-NULL) must be drained. + * + * After calling this function, the transaction @tran may only be completed + * while holding a writer lock for the graph. + */ +static void GRAPH_WRLOCK bdrv_remove_child(BdrvChild *child, Transaction *tran) { if (!child) { return; } if (child->bs) { - BlockDriverState *bs = child->bs; - bdrv_drained_begin(bs); + assert(child->quiesced_parent); bdrv_replace_child_tran(child, NULL, tran); - bdrv_drained_end(bs); } tran_add(tran, &bdrv_remove_child_drv, child); } -static void undrain_on_clean_cb(void *opaque) -{ - bdrv_drained_end(opaque); -} - -static TransactionActionDrv undrain_on_clean = { - .clean = undrain_on_clean_cb, -}; - -static int bdrv_replace_node_noperm(BlockDriverState *from, - BlockDriverState *to, - bool auto_skip, Transaction *tran, - Error **errp) +/* + * Both @from and @to (if non-NULL) must be drained. @to must be kept drained + * until the transaction is completed. + * + * After calling this function, the transaction @tran may only be completed + * while holding a writer lock for the graph. + */ +static int GRAPH_WRLOCK +bdrv_replace_node_noperm(BlockDriverState *from, + BlockDriverState *to, + bool auto_skip, Transaction *tran, + Error **errp) { BdrvChild *c, *next; GLOBAL_STATE_CODE(); - bdrv_drained_begin(from); - bdrv_drained_begin(to); - tran_add(tran, &undrain_on_clean, from); - tran_add(tran, &undrain_on_clean, to); + assert(from->quiesce_counter); + assert(to->quiesce_counter); QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { assert(c->bs == from); @@ -5312,6 +5416,9 @@ static int bdrv_replace_node_common(BlockDriverState *from, assert(qemu_get_current_aio_context() == qemu_get_aio_context()); assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to)); bdrv_drained_begin(from); + bdrv_drained_begin(to); + + bdrv_graph_wrlock(to); /* * Do the replacement without permission update. @@ -5325,6 +5432,7 @@ static int bdrv_replace_node_common(BlockDriverState *from, } if (detach_subchain) { + /* to_cow_parent is already drained because from is drained */ bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent), tran); } @@ -5340,7 +5448,9 @@ static int bdrv_replace_node_common(BlockDriverState *from, out: tran_finalize(tran, ret); + bdrv_graph_wrunlock(); + bdrv_drained_end(to); bdrv_drained_end(from); bdrv_unref(from); @@ -5390,6 +5500,22 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, assert(!bs_new->backing); old_context = bdrv_get_aio_context(bs_top); + bdrv_drained_begin(bs_top); + + /* + * bdrv_drained_begin() requires that only the AioContext of the drained + * node is locked, and at this point it can still differ from the AioContext + * of bs_top. + */ + new_context = bdrv_get_aio_context(bs_new); + aio_context_release(old_context); + aio_context_acquire(new_context); + bdrv_drained_begin(bs_new); + aio_context_release(new_context); + aio_context_acquire(old_context); + new_context = NULL; + + bdrv_graph_wrlock(bs_top); child = bdrv_attach_child_noperm(bs_new, bs_top, "backing", &child_of_bds, bdrv_backing_role(bs_new), @@ -5400,10 +5526,9 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, } /* - * bdrv_attach_child_noperm could change the AioContext of bs_top. - * bdrv_replace_node_noperm calls bdrv_drained_begin, so let's temporarily - * hold the new AioContext, since bdrv_drained_begin calls BDRV_POLL_WHILE - * that assumes the new lock is taken. + * bdrv_attach_child_noperm could change the AioContext of bs_top and + * bs_new, but at least they are in the same AioContext now. This is the + * AioContext that we need to lock for the rest of the function. */ new_context = bdrv_get_aio_context(bs_top); @@ -5421,9 +5546,11 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, out: tran_finalize(tran, ret); - bdrv_graph_rdlock_main_loop(); bdrv_refresh_limits(bs_top, NULL, NULL); - bdrv_graph_rdunlock_main_loop(); + bdrv_graph_wrunlock(); + + bdrv_drained_end(bs_top); + bdrv_drained_end(bs_new); if (new_context && old_context != new_context) { aio_context_release(new_context); @@ -5447,6 +5574,7 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, bdrv_ref(old_bs); bdrv_drained_begin(old_bs); bdrv_drained_begin(new_bs); + bdrv_graph_wrlock(new_bs); bdrv_replace_child_tran(child, new_bs, tran); @@ -5457,6 +5585,7 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, tran_finalize(tran, ret); + bdrv_graph_wrunlock(); bdrv_drained_end(old_bs); bdrv_drained_end(new_bs); bdrv_unref(old_bs); @@ -5812,9 +5941,11 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, backing_file_str = base->filename; } + bdrv_graph_rdlock_main_loop(); QLIST_FOREACH(c, &top->parents, next_parent) { updated_children = g_slist_prepend(updated_children, c); } + bdrv_graph_rdunlock_main_loop(); /* * It seems correct to pass detach_subchain=true here, but it triggers @@ -6737,6 +6868,7 @@ int bdrv_activate(BlockDriverState *bs, Error **errp) BdrvDirtyBitmap *bm; GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); if (!bs->drv) { return -ENOMEDIUM; @@ -6867,6 +6999,7 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs) uint64_t cumulative_perms, cumulative_shared_perms; GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); if (!bs->drv) { return -ENOMEDIUM; @@ -7045,6 +7178,23 @@ void bdrv_unref(BlockDriverState *bs) } } +/* + * Release a BlockDriverState reference while holding the graph write lock. + * + * Calling bdrv_unref() directly is forbidden while holding the graph lock + * because bdrv_close() both involves polling and taking the graph lock + * internally. bdrv_schedule_unref() instead delays decreasing the refcount and + * possibly closing @bs until the graph lock is released. + */ +void bdrv_schedule_unref(BlockDriverState *bs) +{ + if (!bs) { + return; + } + aio_bh_schedule_oneshot(qemu_get_aio_context(), + (QEMUBHFunc *) bdrv_unref, bs); +} + struct BdrvOpBlocker { Error *reason; QLIST_ENTRY(BdrvOpBlocker) list; @@ -7541,17 +7691,21 @@ static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, return true; } + bdrv_graph_rdlock_main_loop(); QLIST_FOREACH(c, &bs->parents, next_parent) { if (!bdrv_parent_change_aio_context(c, ctx, visited, tran, errp)) { + bdrv_graph_rdunlock_main_loop(); return false; } } QLIST_FOREACH(c, &bs->children, next) { if (!bdrv_child_change_aio_context(c, ctx, visited, tran, errp)) { + bdrv_graph_rdunlock_main_loop(); return false; } } + bdrv_graph_rdunlock_main_loop(); state = g_new(BdrvStateSetAioContext, 1); *state = (BdrvStateSetAioContext) { |