diff options
Diffstat (limited to 'block/qcow2.c')
| -rw-r--r-- | block/qcow2.c | 466 |
1 files changed, 305 insertions, 161 deletions
diff --git a/block/qcow2.c b/block/qcow2.c index 4d16393e61..7961c05783 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -41,6 +41,7 @@ #include "qapi/qobject-input-visitor.h" #include "qapi/qapi-visit-block-core.h" #include "crypto.h" +#include "block/aio_task.h" /* Differences with QCOW: @@ -1972,20 +1973,184 @@ out: return ret; } +static coroutine_fn int +qcow2_co_preadv_encrypted(BlockDriverState *bs, + uint64_t file_cluster_offset, + uint64_t offset, + uint64_t bytes, + QEMUIOVector *qiov, + uint64_t qiov_offset) +{ + int ret; + BDRVQcow2State *s = bs->opaque; + uint8_t *buf; + + assert(bs->encrypted && s->crypto); + assert(bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); + + /* + * For encrypted images, read everything into a temporary + * contiguous buffer on which the AES functions can work. + * Also, decryption in a separate buffer is better as it + * prevents the guest from learning information about the + * encrypted nature of the virtual disk. + */ + + buf = qemu_try_blockalign(s->data_file->bs, bytes); + if (buf == NULL) { + return -ENOMEM; + } + + BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); + ret = bdrv_co_pread(s->data_file, + file_cluster_offset + offset_into_cluster(s, offset), + bytes, buf, 0); + if (ret < 0) { + goto fail; + } + + assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); + assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); + if (qcow2_co_decrypt(bs, + file_cluster_offset + offset_into_cluster(s, offset), + offset, buf, bytes) < 0) + { + ret = -EIO; + goto fail; + } + qemu_iovec_from_buf(qiov, qiov_offset, buf, bytes); + +fail: + qemu_vfree(buf); + + return ret; +} + +typedef struct Qcow2AioTask { + AioTask task; + + BlockDriverState *bs; + QCow2ClusterType cluster_type; /* only for read */ + uint64_t file_cluster_offset; + uint64_t offset; + uint64_t bytes; + QEMUIOVector *qiov; + uint64_t qiov_offset; + QCowL2Meta *l2meta; /* only for write */ +} Qcow2AioTask; + +static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task); +static coroutine_fn int qcow2_add_task(BlockDriverState *bs, + AioTaskPool *pool, + AioTaskFunc func, + QCow2ClusterType cluster_type, + uint64_t file_cluster_offset, + uint64_t offset, + uint64_t bytes, + QEMUIOVector *qiov, + size_t qiov_offset, + QCowL2Meta *l2meta) +{ + Qcow2AioTask local_task; + Qcow2AioTask *task = pool ? g_new(Qcow2AioTask, 1) : &local_task; + + *task = (Qcow2AioTask) { + .task.func = func, + .bs = bs, + .cluster_type = cluster_type, + .qiov = qiov, + .file_cluster_offset = file_cluster_offset, + .offset = offset, + .bytes = bytes, + .qiov_offset = qiov_offset, + .l2meta = l2meta, + }; + + trace_qcow2_add_task(qemu_coroutine_self(), bs, pool, + func == qcow2_co_preadv_task_entry ? "read" : "write", + cluster_type, file_cluster_offset, offset, bytes, + qiov, qiov_offset); + + if (!pool) { + return func(&task->task); + } + + aio_task_pool_start_task(pool, &task->task); + + return 0; +} + +static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs, + QCow2ClusterType cluster_type, + uint64_t file_cluster_offset, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, + size_t qiov_offset) +{ + BDRVQcow2State *s = bs->opaque; + int offset_in_cluster = offset_into_cluster(s, offset); + + switch (cluster_type) { + case QCOW2_CLUSTER_ZERO_PLAIN: + case QCOW2_CLUSTER_ZERO_ALLOC: + /* Both zero types are handled in qcow2_co_preadv_part */ + g_assert_not_reached(); + + case QCOW2_CLUSTER_UNALLOCATED: + assert(bs->backing); /* otherwise handled in qcow2_co_preadv_part */ + + BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); + return bdrv_co_preadv_part(bs->backing, offset, bytes, + qiov, qiov_offset, 0); + + case QCOW2_CLUSTER_COMPRESSED: + return qcow2_co_preadv_compressed(bs, file_cluster_offset, + offset, bytes, qiov, qiov_offset); + + case QCOW2_CLUSTER_NORMAL: + if ((file_cluster_offset & 511) != 0) { + return -EIO; + } + + if (bs->encrypted) { + return qcow2_co_preadv_encrypted(bs, file_cluster_offset, + offset, bytes, qiov, qiov_offset); + } + + BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); + return bdrv_co_preadv_part(s->data_file, + file_cluster_offset + offset_in_cluster, + bytes, qiov, qiov_offset, 0); + + default: + g_assert_not_reached(); + } + + g_assert_not_reached(); +} + +static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task) +{ + Qcow2AioTask *t = container_of(task, Qcow2AioTask, task); + + assert(!t->l2meta); + + return qcow2_co_preadv_task(t->bs, t->cluster_type, t->file_cluster_offset, + t->offset, t->bytes, t->qiov, t->qiov_offset); +} + static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, size_t qiov_offset, int flags) { BDRVQcow2State *s = bs->opaque; - int offset_in_cluster; - int ret; + int ret = 0; unsigned int cur_bytes; /* number of bytes in current iteration */ uint64_t cluster_offset = 0; - uint8_t *cluster_data = NULL; - - while (bytes != 0) { + AioTaskPool *aio = NULL; + while (bytes != 0 && aio_task_pool_status(aio) == 0) { /* prepare next request */ cur_bytes = MIN(bytes, INT_MAX); if (s->crypto) { @@ -1997,110 +2162,39 @@ static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs, ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset); qemu_co_mutex_unlock(&s->lock); if (ret < 0) { - goto fail; + goto out; } - offset_in_cluster = offset_into_cluster(s, offset); - - switch (ret) { - case QCOW2_CLUSTER_UNALLOCATED: - - if (bs->backing) { - BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); - ret = bdrv_co_preadv_part(bs->backing, offset, cur_bytes, - qiov, qiov_offset, 0); - if (ret < 0) { - goto fail; - } - } else { - /* Note: in this case, no need to wait */ - qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes); - } - break; - - case QCOW2_CLUSTER_ZERO_PLAIN: - case QCOW2_CLUSTER_ZERO_ALLOC: + if (ret == QCOW2_CLUSTER_ZERO_PLAIN || + ret == QCOW2_CLUSTER_ZERO_ALLOC || + (ret == QCOW2_CLUSTER_UNALLOCATED && !bs->backing)) + { qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes); - break; - - case QCOW2_CLUSTER_COMPRESSED: - ret = qcow2_co_preadv_compressed(bs, cluster_offset, - offset, cur_bytes, - qiov, qiov_offset); - if (ret < 0) { - goto fail; - } - - break; - - case QCOW2_CLUSTER_NORMAL: - if ((cluster_offset & 511) != 0) { - ret = -EIO; - goto fail; + } else { + if (!aio && cur_bytes != bytes) { + aio = aio_task_pool_new(QCOW2_MAX_WORKERS); } - - if (bs->encrypted) { - assert(s->crypto); - - /* - * For encrypted images, read everything into a temporary - * contiguous buffer on which the AES functions can work. - */ - if (!cluster_data) { - cluster_data = - qemu_try_blockalign(s->data_file->bs, - QCOW_MAX_CRYPT_CLUSTERS - * s->cluster_size); - if (cluster_data == NULL) { - ret = -ENOMEM; - goto fail; - } - } - - assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); - - BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); - ret = bdrv_co_pread(s->data_file, - cluster_offset + offset_in_cluster, - cur_bytes, cluster_data, 0); - if (ret < 0) { - goto fail; - } - - assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); - assert(QEMU_IS_ALIGNED(cur_bytes, BDRV_SECTOR_SIZE)); - if (qcow2_co_decrypt(bs, cluster_offset + offset_in_cluster, - offset, - cluster_data, cur_bytes) < 0) { - ret = -EIO; - goto fail; - } - qemu_iovec_from_buf(qiov, qiov_offset, cluster_data, cur_bytes); - } else { - BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); - ret = bdrv_co_preadv_part(s->data_file, - cluster_offset + offset_in_cluster, - cur_bytes, qiov, qiov_offset, 0); - if (ret < 0) { - goto fail; - } + ret = qcow2_add_task(bs, aio, qcow2_co_preadv_task_entry, ret, + cluster_offset, offset, cur_bytes, + qiov, qiov_offset, NULL); + if (ret < 0) { + goto out; } - break; - - default: - g_assert_not_reached(); - ret = -EIO; - goto fail; } bytes -= cur_bytes; offset += cur_bytes; qiov_offset += cur_bytes; } - ret = 0; -fail: - qemu_vfree(cluster_data); +out: + if (aio) { + aio_task_pool_wait_all(aio); + if (ret == 0) { + ret = aio_task_pool_status(aio); + } + g_free(aio); + } return ret; } @@ -2225,6 +2319,99 @@ static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta) return 0; } +/* + * qcow2_co_pwritev_task + * Called with s->lock unlocked + * l2meta - if not NULL, qcow2_co_pwritev_task() will consume it. Caller must + * not use it somehow after qcow2_co_pwritev_task() call + */ +static coroutine_fn int qcow2_co_pwritev_task(BlockDriverState *bs, + uint64_t file_cluster_offset, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, + uint64_t qiov_offset, + QCowL2Meta *l2meta) +{ + int ret; + BDRVQcow2State *s = bs->opaque; + void *crypt_buf = NULL; + int offset_in_cluster = offset_into_cluster(s, offset); + QEMUIOVector encrypted_qiov; + + if (bs->encrypted) { + assert(s->crypto); + assert(bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); + crypt_buf = qemu_try_blockalign(bs->file->bs, bytes); + if (crypt_buf == NULL) { + ret = -ENOMEM; + goto out_unlocked; + } + qemu_iovec_to_buf(qiov, qiov_offset, crypt_buf, bytes); + + if (qcow2_co_encrypt(bs, file_cluster_offset + offset_in_cluster, + offset, crypt_buf, bytes) < 0) + { + ret = -EIO; + goto out_unlocked; + } + + qemu_iovec_init_buf(&encrypted_qiov, crypt_buf, bytes); + qiov = &encrypted_qiov; + qiov_offset = 0; + } + + /* Try to efficiently initialize the physical space with zeroes */ + ret = handle_alloc_space(bs, l2meta); + if (ret < 0) { + goto out_unlocked; + } + + /* + * If we need to do COW, check if it's possible to merge the + * writing of the guest data together with that of the COW regions. + * If it's not possible (or not necessary) then write the + * guest data now. + */ + if (!merge_cow(offset, bytes, qiov, qiov_offset, l2meta)) { + BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); + trace_qcow2_writev_data(qemu_coroutine_self(), + file_cluster_offset + offset_in_cluster); + ret = bdrv_co_pwritev_part(s->data_file, + file_cluster_offset + offset_in_cluster, + bytes, qiov, qiov_offset, 0); + if (ret < 0) { + goto out_unlocked; + } + } + + qemu_co_mutex_lock(&s->lock); + + ret = qcow2_handle_l2meta(bs, &l2meta, true); + goto out_locked; + +out_unlocked: + qemu_co_mutex_lock(&s->lock); + +out_locked: + qcow2_handle_l2meta(bs, &l2meta, false); + qemu_co_mutex_unlock(&s->lock); + + qemu_vfree(crypt_buf); + + return ret; +} + +static coroutine_fn int qcow2_co_pwritev_task_entry(AioTask *task) +{ + Qcow2AioTask *t = container_of(task, Qcow2AioTask, task); + + assert(!t->cluster_type); + + return qcow2_co_pwritev_task(t->bs, t->file_cluster_offset, + t->offset, t->bytes, t->qiov, t->qiov_offset, + t->l2meta); +} + static coroutine_fn int qcow2_co_pwritev_part( BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, size_t qiov_offset, int flags) @@ -2234,16 +2421,12 @@ static coroutine_fn int qcow2_co_pwritev_part( int ret; unsigned int cur_bytes; /* number of sectors in current iteration */ uint64_t cluster_offset; - QEMUIOVector encrypted_qiov; - uint64_t bytes_done = 0; - uint8_t *cluster_data = NULL; QCowL2Meta *l2meta = NULL; + AioTaskPool *aio = NULL; trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes); - qemu_co_mutex_lock(&s->lock); - - while (bytes != 0) { + while (bytes != 0 && aio_task_pool_status(aio) == 0) { l2meta = NULL; @@ -2256,6 +2439,8 @@ static coroutine_fn int qcow2_co_pwritev_part( - offset_in_cluster); } + qemu_co_mutex_lock(&s->lock); + ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes, &cluster_offset, &l2meta); if (ret < 0) { @@ -2273,73 +2458,24 @@ static coroutine_fn int qcow2_co_pwritev_part( qemu_co_mutex_unlock(&s->lock); - if (bs->encrypted) { - assert(s->crypto); - if (!cluster_data) { - cluster_data = qemu_try_blockalign(bs->file->bs, - QCOW_MAX_CRYPT_CLUSTERS - * s->cluster_size); - if (cluster_data == NULL) { - ret = -ENOMEM; - goto out_unlocked; - } - } - - assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); - qemu_iovec_to_buf(qiov, qiov_offset + bytes_done, - cluster_data, cur_bytes); - - if (qcow2_co_encrypt(bs, cluster_offset + offset_in_cluster, offset, - cluster_data, cur_bytes) < 0) { - ret = -EIO; - goto out_unlocked; - } - - qemu_iovec_init_buf(&encrypted_qiov, cluster_data, cur_bytes); + if (!aio && cur_bytes != bytes) { + aio = aio_task_pool_new(QCOW2_MAX_WORKERS); } - - /* Try to efficiently initialize the physical space with zeroes */ - ret = handle_alloc_space(bs, l2meta); + ret = qcow2_add_task(bs, aio, qcow2_co_pwritev_task_entry, 0, + cluster_offset, offset, cur_bytes, + qiov, qiov_offset, l2meta); + l2meta = NULL; /* l2meta is consumed by qcow2_co_pwritev_task() */ if (ret < 0) { - goto out_unlocked; - } - - /* If we need to do COW, check if it's possible to merge the - * writing of the guest data together with that of the COW regions. - * If it's not possible (or not necessary) then write the - * guest data now. */ - if (!merge_cow(offset, cur_bytes, - bs->encrypted ? &encrypted_qiov : qiov, - bs->encrypted ? 0 : qiov_offset + bytes_done, l2meta)) - { - BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); - trace_qcow2_writev_data(qemu_coroutine_self(), - cluster_offset + offset_in_cluster); - ret = bdrv_co_pwritev_part( - s->data_file, cluster_offset + offset_in_cluster, cur_bytes, - bs->encrypted ? &encrypted_qiov : qiov, - bs->encrypted ? 0 : qiov_offset + bytes_done, 0); - if (ret < 0) { - goto out_unlocked; - } - } - - qemu_co_mutex_lock(&s->lock); - - ret = qcow2_handle_l2meta(bs, &l2meta, true); - if (ret) { - goto out_locked; + goto fail_nometa; } bytes -= cur_bytes; offset += cur_bytes; - bytes_done += cur_bytes; + qiov_offset += cur_bytes; trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes); } ret = 0; - goto out_locked; -out_unlocked: qemu_co_mutex_lock(&s->lock); out_locked: @@ -2347,7 +2483,15 @@ out_locked: qemu_co_mutex_unlock(&s->lock); - qemu_vfree(cluster_data); +fail_nometa: + if (aio) { + aio_task_pool_wait_all(aio); + if (ret == 0) { + ret = aio_task_pool_status(aio); + } + g_free(aio); + } + trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); return ret; |