diff options
Diffstat (limited to 'block')
| -rw-r--r-- | block/block-backend.c | 30 | ||||
| -rw-r--r-- | block/commit.c | 1 | ||||
| -rw-r--r-- | block/curl.c | 11 | ||||
| -rw-r--r-- | block/export/fuse.c | 49 | ||||
| -rw-r--r-- | block/export/vhost-user-blk-server.c | 5 | ||||
| -rw-r--r-- | block/file-posix.c | 37 | ||||
| -rw-r--r-- | block/io.c | 6 | ||||
| -rw-r--r-- | block/io_uring.c | 19 | ||||
| -rw-r--r-- | block/iscsi.c | 4 | ||||
| -rw-r--r-- | block/linux-aio.c | 16 | ||||
| -rw-r--r-- | block/mirror.c | 15 | ||||
| -rw-r--r-- | block/monitor/block-hmp-cmds.c | 2 | ||||
| -rw-r--r-- | block/nfs.c | 6 | ||||
| -rw-r--r-- | block/nvme.c | 51 | ||||
| -rw-r--r-- | block/qcow2.c | 58 | ||||
| -rw-r--r-- | block/rbd.c | 52 | ||||
| -rw-r--r-- | block/ssh.c | 4 | ||||
| -rw-r--r-- | block/vvfat.c | 37 | ||||
| -rw-r--r-- | block/win32-aio.c | 4 |
19 files changed, 280 insertions, 127 deletions
diff --git a/block/block-backend.c b/block/block-backend.c index 12ef80ea17..4ff6b4d785 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -190,6 +190,7 @@ static void blk_root_activate(BdrvChild *child, Error **errp) { BlockBackend *blk = child->opaque; Error *local_err = NULL; + uint64_t saved_shared_perm; if (!blk->disable_perm) { return; @@ -197,12 +198,22 @@ static void blk_root_activate(BdrvChild *child, Error **errp) blk->disable_perm = false; + /* + * blk->shared_perm contains the permissions we want to share once + * migration is really completely done. For now, we need to share + * all; but we also need to retain blk->shared_perm, which is + * overwritten by a successful blk_set_perm() call. Save it and + * restore it below. + */ + saved_shared_perm = blk->shared_perm; + blk_set_perm(blk, blk->perm, BLK_PERM_ALL, &local_err); if (local_err) { error_propagate(errp, local_err); blk->disable_perm = true; return; } + blk->shared_perm = saved_shared_perm; if (runstate_check(RUN_STATE_INMIGRATE)) { /* Activation can happen when migration process is still active, for @@ -822,16 +833,22 @@ BlockBackend *blk_by_public(BlockBackendPublic *public) void blk_remove_bs(BlockBackend *blk) { ThrottleGroupMember *tgm = &blk->public.throttle_group_member; - BlockDriverState *bs; BdrvChild *root; notifier_list_notify(&blk->remove_bs_notifiers, blk); if (tgm->throttle_state) { - bs = blk_bs(blk); + BlockDriverState *bs = blk_bs(blk); + + /* + * Take a ref in case blk_bs() changes across bdrv_drained_begin(), for + * example, if a temporary filter node is removed by a blockjob. + */ + bdrv_ref(bs); bdrv_drained_begin(bs); throttle_group_detach_aio_context(tgm); throttle_group_attach_aio_context(tgm, qemu_get_aio_context()); bdrv_drained_end(bs); + bdrv_unref(bs); } blk_update_root_state(blk); @@ -1705,6 +1722,7 @@ void blk_drain(BlockBackend *blk) BlockDriverState *bs = blk_bs(blk); if (bs) { + bdrv_ref(bs); bdrv_drained_begin(bs); } @@ -1714,6 +1732,7 @@ void blk_drain(BlockBackend *blk) if (bs) { bdrv_drained_end(bs); + bdrv_unref(bs); } } @@ -2044,10 +2063,13 @@ static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context, int ret; if (bs) { + bdrv_ref(bs); + if (update_root_node) { ret = bdrv_child_try_set_aio_context(bs, new_context, blk->root, errp); if (ret < 0) { + bdrv_unref(bs); return ret; } } @@ -2057,6 +2079,8 @@ static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context, throttle_group_attach_aio_context(tgm, new_context); bdrv_drained_end(bs); } + + bdrv_unref(bs); } blk->ctx = new_context; @@ -2326,11 +2350,13 @@ void blk_io_limits_disable(BlockBackend *blk) ThrottleGroupMember *tgm = &blk->public.throttle_group_member; assert(tgm->throttle_state); if (bs) { + bdrv_ref(bs); bdrv_drained_begin(bs); } throttle_group_unregister_tgm(tgm); if (bs) { bdrv_drained_end(bs); + bdrv_unref(bs); } } diff --git a/block/commit.c b/block/commit.c index 10cc5ff451..b1fc7b908b 100644 --- a/block/commit.c +++ b/block/commit.c @@ -370,7 +370,6 @@ void commit_start(const char *job_id, BlockDriverState *bs, s->base = blk_new(s->common.job.aio_context, base_perms, BLK_PERM_CONSISTENT_READ - | BLK_PERM_GRAPH_MOD | BLK_PERM_WRITE_UNCHANGED); ret = blk_insert_bs(s->base, base, errp); if (ret < 0) { diff --git a/block/curl.c b/block/curl.c index 4a8ae2b269..6a6cd72975 100644 --- a/block/curl.c +++ b/block/curl.c @@ -125,7 +125,7 @@ static gboolean curl_drop_socket(void *key, void *value, void *opaque) BDRVCURLState *s = socket->s; aio_set_fd_handler(s->aio_context, socket->fd, false, - NULL, NULL, NULL, NULL); + NULL, NULL, NULL, NULL, NULL); return true; } @@ -173,19 +173,20 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action, switch (action) { case CURL_POLL_IN: aio_set_fd_handler(s->aio_context, fd, false, - curl_multi_do, NULL, NULL, socket); + curl_multi_do, NULL, NULL, NULL, socket); break; case CURL_POLL_OUT: aio_set_fd_handler(s->aio_context, fd, false, - NULL, curl_multi_do, NULL, socket); + NULL, curl_multi_do, NULL, NULL, socket); break; case CURL_POLL_INOUT: aio_set_fd_handler(s->aio_context, fd, false, - curl_multi_do, curl_multi_do, NULL, socket); + curl_multi_do, curl_multi_do, + NULL, NULL, socket); break; case CURL_POLL_REMOVE: aio_set_fd_handler(s->aio_context, fd, false, - NULL, NULL, NULL, NULL); + NULL, NULL, NULL, NULL, NULL); break; } diff --git a/block/export/fuse.c b/block/export/fuse.c index 823c126d23..fdda8e3c81 100644 --- a/block/export/fuse.c +++ b/block/export/fuse.c @@ -223,7 +223,7 @@ static int setup_fuse_export(FuseExport *exp, const char *mountpoint, aio_set_fd_handler(exp->common.ctx, fuse_session_fd(exp->fuse_session), true, - read_from_fuse_export, NULL, NULL, exp); + read_from_fuse_export, NULL, NULL, NULL, exp); exp->fd_handler_set_up = true; return 0; @@ -267,7 +267,7 @@ static void fuse_export_shutdown(BlockExport *blk_exp) if (exp->fd_handler_set_up) { aio_set_fd_handler(exp->common.ctx, fuse_session_fd(exp->fuse_session), true, - NULL, NULL, NULL, NULL); + NULL, NULL, NULL, NULL, NULL); exp->fd_handler_set_up = false; } } @@ -625,11 +625,33 @@ static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode, return; } +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE if (mode & FALLOC_FL_KEEP_SIZE) { length = MIN(length, blk_len - offset); } +#endif /* CONFIG_FALLOCATE_PUNCH_HOLE */ - if (mode & FALLOC_FL_PUNCH_HOLE) { + if (!mode) { + /* We can only fallocate at the EOF with a truncate */ + if (offset < blk_len) { + fuse_reply_err(req, EOPNOTSUPP); + return; + } + + if (offset > blk_len) { + /* No preallocation needed here */ + ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF); + if (ret < 0) { + fuse_reply_err(req, -ret); + return; + } + } + + ret = fuse_do_truncate(exp, offset + length, true, + PREALLOC_MODE_FALLOC); + } +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE + else if (mode & FALLOC_FL_PUNCH_HOLE) { if (!(mode & FALLOC_FL_KEEP_SIZE)) { fuse_reply_err(req, EINVAL); return; @@ -643,6 +665,7 @@ static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode, length -= size; } while (ret == 0 && length > 0); } +#endif /* CONFIG_FALLOCATE_PUNCH_HOLE */ #ifdef CONFIG_FALLOCATE_ZERO_RANGE else if (mode & FALLOC_FL_ZERO_RANGE) { if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + length > blk_len) { @@ -665,25 +688,7 @@ static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode, } while (ret == 0 && length > 0); } #endif /* CONFIG_FALLOCATE_ZERO_RANGE */ - else if (!mode) { - /* We can only fallocate at the EOF with a truncate */ - if (offset < blk_len) { - fuse_reply_err(req, EOPNOTSUPP); - return; - } - - if (offset > blk_len) { - /* No preallocation needed here */ - ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF); - if (ret < 0) { - fuse_reply_err(req, -ret); - return; - } - } - - ret = fuse_do_truncate(exp, offset + length, true, - PREALLOC_MODE_FALLOC); - } else { + else { ret = -EOPNOTSUPP; } diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c index 1862563336..a129204c44 100644 --- a/block/export/vhost-user-blk-server.c +++ b/block/export/vhost-user-blk-server.c @@ -172,6 +172,7 @@ vu_blk_discard_write_zeroes(VuBlkExport *vexp, struct iovec *iov, return VIRTIO_BLK_S_IOERR; } +/* Called with server refcount increased, must decrease before returning */ static void coroutine_fn vu_blk_virtio_process_req(void *opaque) { VuBlkReq *req = opaque; @@ -286,10 +287,12 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque) } vu_blk_req_complete(req); + vhost_user_server_unref(server); return; err: free(req); + vhost_user_server_unref(server); } static void vu_blk_process_vq(VuDev *vu_dev, int idx) @@ -310,6 +313,8 @@ static void vu_blk_process_vq(VuDev *vu_dev, int idx) Coroutine *co = qemu_coroutine_create(vu_blk_virtio_process_req, req); + + vhost_user_server_ref(server); qemu_coroutine_enter(co); } } diff --git a/block/file-posix.c b/block/file-posix.c index b283093e5b..1f1756e192 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -106,10 +106,6 @@ #include <sys/diskslice.h> #endif -#ifdef CONFIG_XFS -#include <xfs/xfs.h> -#endif - /* OS X does not have O_DSYNC */ #ifndef O_DSYNC #ifdef O_SYNC @@ -156,9 +152,6 @@ typedef struct BDRVRawState { int perm_change_flags; BDRVReopenState *reopen_state; -#ifdef CONFIG_XFS - bool is_xfs:1; -#endif bool has_discard:1; bool has_write_zeroes:1; bool discard_zeroes:1; @@ -409,14 +402,22 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) if (probe_logical_blocksize(fd, &bs->bl.request_alignment) < 0) { bs->bl.request_alignment = 0; } -#ifdef CONFIG_XFS - if (s->is_xfs) { - struct dioattr da; - if (xfsctl(NULL, fd, XFS_IOC_DIOINFO, &da) >= 0) { - bs->bl.request_alignment = da.d_miniosz; - /* The kernel returns wrong information for d_mem */ - /* s->buf_align = da.d_mem; */ - } + +#ifdef __linux__ + /* + * The XFS ioctl definitions are shipped in extra packages that might + * not always be available. Since we just need the XFS_IOC_DIOINFO ioctl + * here, we simply use our own definition instead: + */ + struct xfs_dioattr { + uint32_t d_mem; + uint32_t d_miniosz; + uint32_t d_maxiosz; + } da; + if (ioctl(fd, _IOR('X', 30, struct xfs_dioattr), &da) >= 0) { + bs->bl.request_alignment = da.d_miniosz; + /* The kernel returns wrong information for d_mem */ + /* s->buf_align = da.d_mem; */ } #endif @@ -798,12 +799,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, #endif s->needs_alignment = raw_needs_alignment(bs); -#ifdef CONFIG_XFS - if (platform_test_xfs_fd(s->fd)) { - s->is_xfs = true; - } -#endif - bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK; if (S_ISREG(st.st_mode)) { /* When extending regular files, we get zeros from the OS */ diff --git a/block/io.c b/block/io.c index bb0a254def..4e4cb556c5 100644 --- a/block/io.c +++ b/block/io.c @@ -2497,8 +2497,12 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, * non-protocol nodes, and then it is never used. However, filling * the cache requires an RCU update, so double check here to avoid * such an update if possible. + * + * Check want_zero, because we only want to update the cache when we + * have accurate information about what is zero and what is data. */ - if (ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) && + if (want_zero && + ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) && QLIST_EMPTY(&bs->children)) { /* diff --git a/block/io_uring.c b/block/io_uring.c index dfa475cc87..782afdb433 100644 --- a/block/io_uring.c +++ b/block/io_uring.c @@ -292,12 +292,14 @@ static bool qemu_luring_poll_cb(void *opaque) { LuringState *s = opaque; - if (io_uring_cq_ready(&s->ring)) { - luring_process_completions_and_submit(s); - return true; - } + return io_uring_cq_ready(&s->ring); +} + +static void qemu_luring_poll_ready(void *opaque) +{ + LuringState *s = opaque; - return false; + luring_process_completions_and_submit(s); } static void ioq_init(LuringQueue *io_q) @@ -402,8 +404,8 @@ int coroutine_fn luring_co_submit(BlockDriverState *bs, LuringState *s, int fd, void luring_detach_aio_context(LuringState *s, AioContext *old_context) { - aio_set_fd_handler(old_context, s->ring.ring_fd, false, NULL, NULL, NULL, - s); + aio_set_fd_handler(old_context, s->ring.ring_fd, false, + NULL, NULL, NULL, NULL, s); qemu_bh_delete(s->completion_bh); s->aio_context = NULL; } @@ -413,7 +415,8 @@ void luring_attach_aio_context(LuringState *s, AioContext *new_context) s->aio_context = new_context; s->completion_bh = aio_bh_new(new_context, qemu_luring_completion_bh, s); aio_set_fd_handler(s->aio_context, s->ring.ring_fd, false, - qemu_luring_completion_cb, NULL, qemu_luring_poll_cb, s); + qemu_luring_completion_cb, NULL, + qemu_luring_poll_cb, qemu_luring_poll_ready, s); } LuringState *luring_init(Error **errp) diff --git a/block/iscsi.c b/block/iscsi.c index 57aa07a40d..51f2a5eeaa 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -363,7 +363,7 @@ iscsi_set_events(IscsiLun *iscsilun) false, (ev & POLLIN) ? iscsi_process_read : NULL, (ev & POLLOUT) ? iscsi_process_write : NULL, - NULL, + NULL, NULL, iscsilun); iscsilun->events = ev; } @@ -1534,7 +1534,7 @@ static void iscsi_detach_aio_context(BlockDriverState *bs) IscsiLun *iscsilun = bs->opaque; aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi), - false, NULL, NULL, NULL, NULL); + false, NULL, NULL, NULL, NULL, NULL); iscsilun->events = 0; if (iscsilun->nop_timer) { diff --git a/block/linux-aio.c b/block/linux-aio.c index f53ae72e21..4c423fcccf 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c @@ -263,12 +263,15 @@ static bool qemu_laio_poll_cb(void *opaque) LinuxAioState *s = container_of(e, LinuxAioState, e); struct io_event *events; - if (!io_getevents_peek(s->ctx, &events)) { - return false; - } + return io_getevents_peek(s->ctx, &events); +} + +static void qemu_laio_poll_ready(EventNotifier *opaque) +{ + EventNotifier *e = opaque; + LinuxAioState *s = container_of(e, LinuxAioState, e); qemu_laio_process_completions_and_submit(s); - return true; } static void ioq_init(LaioQueue *io_q) @@ -427,7 +430,7 @@ int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd, void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context) { - aio_set_event_notifier(old_context, &s->e, false, NULL, NULL); + aio_set_event_notifier(old_context, &s->e, false, NULL, NULL, NULL); qemu_bh_delete(s->completion_bh); s->aio_context = NULL; } @@ -438,7 +441,8 @@ void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context) s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s); aio_set_event_notifier(new_context, &s->e, false, qemu_laio_completion_cb, - qemu_laio_poll_cb); + qemu_laio_poll_cb, + qemu_laio_poll_ready); } LinuxAioState *laio_init(Error **errp) diff --git a/block/mirror.c b/block/mirror.c index 959e3dfbd6..69b2c1c697 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1139,10 +1139,7 @@ static void mirror_complete(Job *job, Error **errp) replace_aio_context = bdrv_get_aio_context(s->to_replace); aio_context_acquire(replace_aio_context); - /* TODO Translate this into permission system. Current definition of - * GRAPH_MOD would require to request it for the parents; they might - * not even be BlockDriverStates, however, so a BdrvChild can't address - * them. May need redefinition of GRAPH_MOD. */ + /* TODO Translate this into child freeze system. */ error_setg(&s->replace_blocker, "block device is in use by block-job-complete"); bdrv_op_block_all(s->to_replace, s->replace_blocker); @@ -1666,7 +1663,7 @@ static BlockJob *mirror_start_job( s = block_job_create(job_id, driver, NULL, mirror_top_bs, BLK_PERM_CONSISTENT_READ, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | - BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD, speed, + BLK_PERM_WRITE, speed, creation_flags, cb, opaque, errp); if (!s) { goto fail; @@ -1710,9 +1707,7 @@ static BlockJob *mirror_start_job( target_perms |= BLK_PERM_RESIZE; } - target_shared_perms |= BLK_PERM_CONSISTENT_READ - | BLK_PERM_WRITE - | BLK_PERM_GRAPH_MOD; + target_shared_perms |= BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE; } else if (bdrv_chain_contains(bs, bdrv_skip_filters(target))) { /* * We may want to allow this in the future, but it would @@ -1723,10 +1718,6 @@ static BlockJob *mirror_start_job( goto fail; } - if (backing_mode != MIRROR_LEAVE_BACKING_CHAIN) { - target_perms |= BLK_PERM_GRAPH_MOD; - } - s->target = blk_new(s->common.job.aio_context, target_perms, target_shared_perms); ret = blk_insert_bs(s->target, target, errp); diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c index 2ac4aedfff..bfb3c043a0 100644 --- a/block/monitor/block-hmp-cmds.c +++ b/block/monitor/block-hmp-cmds.c @@ -101,7 +101,7 @@ void hmp_drive_add(Monitor *mon, const QDict *qdict) return; } - opts = drive_def(optstr); + opts = qemu_opts_parse_noisily(qemu_find_opts("drive"), optstr, false); if (!opts) return; diff --git a/block/nfs.c b/block/nfs.c index 577aea1d22..444c40b458 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -197,7 +197,7 @@ static void nfs_set_events(NFSClient *client) false, (ev & POLLIN) ? nfs_process_read : NULL, (ev & POLLOUT) ? nfs_process_write : NULL, - NULL, client); + NULL, NULL, client); } client->events = ev; @@ -372,7 +372,7 @@ static void nfs_detach_aio_context(BlockDriverState *bs) NFSClient *client = bs->opaque; aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context), - false, NULL, NULL, NULL, NULL); + false, NULL, NULL, NULL, NULL, NULL); client->events = 0; } @@ -390,7 +390,7 @@ static void nfs_client_close(NFSClient *client) if (client->context) { qemu_mutex_lock(&client->mutex); aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context), - false, NULL, NULL, NULL, NULL); + false, NULL, NULL, NULL, NULL, NULL); qemu_mutex_unlock(&client->mutex); if (client->fh) { nfs_close(client->context, client->fh); diff --git a/block/nvme.c b/block/nvme.c index fa360b9b3c..dd20de3865 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -605,10 +605,8 @@ out: return ret; } -static bool nvme_poll_queue(NVMeQueuePair *q) +static void nvme_poll_queue(NVMeQueuePair *q) { - bool progress = false; - const size_t cqe_offset = q->cq.head * NVME_CQ_ENTRY_BYTES; NvmeCqe *cqe = (NvmeCqe *)&q->cq.queue[cqe_offset]; @@ -619,30 +617,23 @@ static bool nvme_poll_queue(NVMeQueuePair *q) * cannot race with itself. */ if ((le16_to_cpu(cqe->status) & 0x1) == q->cq_phase) { - return false; + return; } qemu_mutex_lock(&q->lock); while (nvme_process_completion(q)) { /* Keep polling */ - progress = true; } qemu_mutex_unlock(&q->lock); - - return progress; } -static bool nvme_poll_queues(BDRVNVMeState *s) +static void nvme_poll_queues(BDRVNVMeState *s) { - bool progress = false; int i; for (i = 0; i < s->queue_count; i++) { - if (nvme_poll_queue(s->queues[i])) { - progress = true; - } + nvme_poll_queue(s->queues[i]); } - return progress; } static void nvme_handle_event(EventNotifier *n) @@ -703,8 +694,30 @@ static bool nvme_poll_cb(void *opaque) EventNotifier *e = opaque; BDRVNVMeState *s = container_of(e, BDRVNVMeState, irq_notifier[MSIX_SHARED_IRQ_IDX]); + int i; - return nvme_poll_queues(s); + for (i = 0; i < s->queue_count; i++) { + NVMeQueuePair *q = s->queues[i]; + const size_t cqe_offset = q->cq.head * NVME_CQ_ENTRY_BYTES; + NvmeCqe *cqe = (NvmeCqe *)&q->cq.queue[cqe_offset]; + + /* + * q->lock isn't needed because nvme_process_completion() only runs in + * the event loop thread and cannot race with itself. + */ + if ((le16_to_cpu(cqe->status) & 0x1) != q->cq_phase) { + return true; + } + } + return false; +} + +static void nvme_poll_ready(EventNotifier *e) +{ + BDRVNVMeState *s = container_of(e, BDRVNVMeState, + irq_notifier[MSIX_SHARED_IRQ_IDX]); + + nvme_poll_queues(s); } static int nvme_init(BlockDriverState *bs, const char *device, int namespace, @@ -839,7 +852,8 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace, } aio_set_event_notifier(bdrv_get_aio_context(bs), &s->irq_notifier[MSIX_SHARED_IRQ_IDX], - false, nvme_handle_event, nvme_poll_cb); + false, nvme_handle_event, nvme_poll_cb, + nvme_poll_ready); if (!nvme_identify(bs, namespace, errp)) { ret = -EIO; @@ -924,7 +938,7 @@ static void nvme_close(BlockDriverState *bs) g_free(s->queues); aio_set_event_notifier(bdrv_get_aio_context(bs), &s->irq_notifier[MSIX_SHARED_IRQ_IDX], - false, NULL, NULL); + false, NULL, NULL, NULL); event_notifier_cleanup(&s->irq_notifier[MSIX_SHARED_IRQ_IDX]); qemu_vfio_pci_unmap_bar(s->vfio, 0, s->bar0_wo_map, 0, sizeof(NvmeBar) + NVME_DOORBELL_SIZE); @@ -1520,7 +1534,7 @@ static void nvme_detach_aio_context(BlockDriverState *bs) aio_set_event_notifier(bdrv_get_aio_context(bs), &s->irq_notifier[MSIX_SHARED_IRQ_IDX], - false, NULL, NULL); + false, NULL, NULL, NULL); } static void nvme_attach_aio_context(BlockDriverState *bs, @@ -1530,7 +1544,8 @@ static void nvme_attach_aio_context(BlockDriverState *bs, s->aio_context = new_context; aio_set_event_notifier(new_context, &s->irq_notifier[MSIX_SHARED_IRQ_IDX], - false, nvme_handle_event, nvme_poll_cb); + false, nvme_handle_event, nvme_poll_cb, + nvme_poll_ready); for (unsigned i = 0; i < s->queue_count; i++) { NVMeQueuePair *q = s->queues[i]; diff --git a/block/qcow2.c b/block/qcow2.c index d509016756..c8115e1cba 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -5279,6 +5279,38 @@ static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, return bs->drv->bdrv_co_preadv_part(bs, offset, qiov->size, qiov, 0, 0); } +static int qcow2_has_compressed_clusters(BlockDriverState *bs) +{ + int64_t offset = 0; + int64_t bytes = bdrv_getlength(bs); + + if (bytes < 0) { + return bytes; + } + + while (bytes != 0) { + int ret; + QCow2SubclusterType type; + unsigned int cur_bytes = MIN(INT_MAX, bytes); + uint64_t host_offset; + + ret = qcow2_get_host_offset(bs, offset, &cur_bytes, &host_offset, + &type); + if (ret < 0) { + return ret; + } + + if (type == QCOW2_SUBCLUSTER_COMPRESSED) { + return 1; + } + + offset += cur_bytes; + bytes -= cur_bytes; + } + + return 0; +} + /* * Downgrades an image's version. To achieve this, any incompatible features * have to be removed. @@ -5336,9 +5368,10 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version, * the first place; if that happens nonetheless, returning -ENOTSUP is the * best thing to do anyway */ - if (s->incompatible_features) { + if (s->incompatible_features & ~QCOW2_INCOMPAT_COMPRESSION) { error_setg(errp, "Cannot downgrade an image with incompatible features " - "%#" PRIx64 " set", s->incompatible_features); + "0x%" PRIx64 " set", + s->incompatible_features & ~QCOW2_INCOMPAT_COMPRESSION); return -ENOTSUP; } @@ -5356,6 +5389,27 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version, return ret; } + if (s->incompatible_features & QCOW2_INCOMPAT_COMPRESSION) { + ret = qcow2_has_compressed_clusters(bs); + if (ret < 0) { + error_setg(errp, "Failed to check block status"); + return -EINVAL; + } + if (ret) { + error_setg(errp, "Cannot downgrade an image with zstd compression " + "type and existing compressed clusters"); + return -ENOTSUP; + } + /* + * No compressed clusters for now, so just chose default zlib + * compression. + */ + s->incompatible_features &= ~QCOW2_INCOMPAT_COMPRESSION; + s->compression_type = QCOW2_COMPRESSION_TYPE_ZLIB; + } + + assert(s->incompatible_features == 0); + s->qcow_version = target_version; ret = qcow2_update_header(bs); if (ret < 0) { diff --git a/block/rbd.c b/block/rbd.c index def96292e0..8f183eba2a 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -1279,11 +1279,11 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len, RBDDiffIterateReq *req = opaque; assert(req->offs + req->bytes <= offs); - /* - * we do not diff against a snapshot so we should never receive a callback - * for a hole. - */ - assert(exists); + + /* treat a hole like an unallocated area and bail out */ + if (!exists) { + return 0; + } if (!req->exists && offs > req->offs) { /* @@ -1320,6 +1320,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, int status, r; RBDDiffIterateReq req = { .offs = offset }; uint64_t features, flags; + uint64_t head = 0; assert(offset + bytes <= s->image_size); @@ -1347,7 +1348,43 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, return status; } - r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true, +#if LIBRBD_VERSION_CODE < LIBRBD_VERSION(1, 17, 0) + /* + * librbd had a bug until early 2022 that affected all versions of ceph that + * supported fast-diff. This bug results in reporting of incorrect offsets + * if the offset parameter to rbd_diff_iterate2 is not object aligned. + * Work around this bug by rounding down the offset to object boundaries. + * This is OK because we call rbd_diff_iterate2 with whole_object = true. + * However, this workaround only works for non cloned images with default + * striping. + * + * See: https://tracker.ceph.com/issues/53784 + */ + + /* check if RBD image has non-default striping enabled */ + if (features & RBD_FEATURE_STRIPINGV2) { + return status; + } + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + /* + * check if RBD image is a clone (= has a parent). + * + * rbd_get_parent_info is deprecated from Nautilus onwards, but the + * replacement rbd_get_parent is not present in Luminous and Mimic. + */ + if (rbd_get_parent_info(s->image, NULL, 0, NULL, 0, NULL, 0) != -ENOENT) { + return status; + } +#pragma GCC diagnostic pop + + head = req.offs & (s->object_size - 1); + req.offs -= head; + bytes += head; +#endif + + r = rbd_diff_iterate2(s->image, NULL, req.offs, bytes, true, true, qemu_rbd_diff_iterate_cb, &req); if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) { return status; @@ -1366,7 +1403,8 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID; } - *pnum = req.bytes; + assert(req.bytes > head); + *pnum = req.bytes - head; return status; } diff --git a/block/ssh.c b/block/ssh.c index e0fbb4934b..3b5bf34031 100644 --- a/block/ssh.c +++ b/block/ssh.c @@ -990,7 +990,7 @@ static void restart_coroutine(void *opaque) AioContext *ctx = bdrv_get_aio_context(bs); trace_ssh_restart_coroutine(restart->co); - aio_set_fd_handler(ctx, s->sock, false, NULL, NULL, NULL, NULL); + aio_set_fd_handler(ctx, s->sock, false, NULL, NULL, NULL, NULL, NULL); aio_co_wake(restart->co); } @@ -1020,7 +1020,7 @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs) trace_ssh_co_yield(s->sock, rd_handler, wr_handler); aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, - false, rd_handler, wr_handler, NULL, &restart); + false, rd_handler, wr_handler, NULL, NULL, &restart); qemu_coroutine_yield(); trace_ssh_co_yield_back(s->sock); } diff --git a/block/vvfat.c b/block/vvfat.c index 5dacc6cfac..b2b58d93b8 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -882,7 +882,7 @@ static int read_directory(BDRVVVFATState* s, int mapping_index) return 0; } -static inline uint32_t sector2cluster(BDRVVVFATState* s,off_t sector_num) +static inline int32_t sector2cluster(BDRVVVFATState* s,off_t sector_num) { return (sector_num - s->offset_to_root_dir) / s->sectors_per_cluster; } @@ -1230,6 +1230,7 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags, dirname, cyls, heads, secs)); s->sector_count = cyls * heads * secs - s->offset_to_bootsector; + bs->total_sectors = cyls * heads * secs; if (qemu_opt_get_bool(opts, "rw", false)) { if (!bdrv_is_read_only(bs)) { @@ -1250,8 +1251,6 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags, } } - bs->total_sectors = cyls * heads * secs; - if (init_directories(s, dirname, heads, secs, errp)) { ret = -EIO; goto fail; @@ -2982,6 +2981,7 @@ static int vvfat_write(BlockDriverState *bs, int64_t sector_num, { BDRVVVFATState *s = bs->opaque; int i, ret; + int first_cluster, last_cluster; DLOG(checkpoint()); @@ -3000,9 +3000,20 @@ DLOG(checkpoint()); if (sector_num < s->offset_to_fat) return -1; - for (i = sector2cluster(s, sector_num); - i <= sector2cluster(s, sector_num + nb_sectors - 1);) { - mapping_t* mapping = find_mapping_for_cluster(s, i); + /* + * Values will be negative for writes to the FAT, which is located before + * the root directory. + */ + first_cluster = sector2cluster(s, sector_num); + last_cluster = sector2cluster(s, sector_num + nb_sectors - 1); + + for (i = first_cluster; i <= last_cluster;) { + mapping_t *mapping = NULL; + + if (i >= 0) { + mapping = find_mapping_for_cluster(s, i); + } + if (mapping) { if (mapping->read_only) { fprintf(stderr, "Tried to write to write-protected file %s\n", @@ -3042,8 +3053,9 @@ DLOG(checkpoint()); } } i = mapping->end; - } else + } else { i++; + } } /* @@ -3057,10 +3069,11 @@ DLOG(fprintf(stderr, "Write to qcow backend: %d + %d\n", (int)sector_num, nb_sec return ret; } - for (i = sector2cluster(s, sector_num); - i <= sector2cluster(s, sector_num + nb_sectors - 1); i++) - if (i >= 0) + for (i = first_cluster; i <= last_cluster; i++) { + if (i >= 0) { s->used_clusters[i] |= USED_ALLOCATED; + } + } DLOG(checkpoint()); /* TODO: add timeout */ @@ -3147,8 +3160,8 @@ static int enable_write_target(BlockDriverState *bs, Error **errp) } opts = qemu_opts_create(bdrv_qcow->create_opts, NULL, 0, &error_abort); - qemu_opt_set_number(opts, BLOCK_OPT_SIZE, s->sector_count * 512, - &error_abort); + qemu_opt_set_number(opts, BLOCK_OPT_SIZE, + bs->total_sectors * BDRV_SECTOR_SIZE, &error_abort); qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, "fat:", &error_abort); ret = bdrv_create(bdrv_qcow, s->qcow_filename, opts, errp); diff --git a/block/win32-aio.c b/block/win32-aio.c index b7221a272f..c57e10c997 100644 --- a/block/win32-aio.c +++ b/block/win32-aio.c @@ -172,7 +172,7 @@ int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile) void win32_aio_detach_aio_context(QEMUWin32AIOState *aio, AioContext *old_context) { - aio_set_event_notifier(old_context, &aio->e, false, NULL, NULL); + aio_set_event_notifier(old_context, &aio->e, false, NULL, NULL, NULL); aio->aio_ctx = NULL; } @@ -181,7 +181,7 @@ void win32_aio_attach_aio_context(QEMUWin32AIOState *aio, { aio->aio_ctx = new_context; aio_set_event_notifier(new_context, &aio->e, false, - win32_aio_completion_cb, NULL); + win32_aio_completion_cb, NULL, NULL); } QEMUWin32AIOState *win32_aio_init(void) |