diff options
Diffstat (limited to 'block/io.c')
| -rw-r--r-- | block/io.c | 306 |
1 files changed, 196 insertions, 110 deletions
diff --git a/block/io.c b/block/io.c index fb99a7151c..ebdb9d834c 100644 --- a/block/io.c +++ b/block/io.c @@ -289,15 +289,21 @@ void bdrv_drain_all(void) bool busy = true; BlockDriverState *bs; BdrvNextIterator it; + BlockJob *job = NULL; GSList *aio_ctxs = NULL, *ctx; + while ((job = block_job_next(job))) { + AioContext *aio_context = blk_get_aio_context(job->blk); + + aio_context_acquire(aio_context); + block_job_pause(job); + aio_context_release(aio_context); + } + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { AioContext *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - if (bs->job) { - block_job_pause(bs->job); - } bdrv_parent_drained_begin(bs); bdrv_io_unplugged_begin(bs); bdrv_drain_recurse(bs); @@ -340,12 +346,18 @@ void bdrv_drain_all(void) aio_context_acquire(aio_context); bdrv_io_unplugged_end(bs); bdrv_parent_drained_end(bs); - if (bs->job) { - block_job_resume(bs->job); - } aio_context_release(aio_context); } g_slist_free(aio_ctxs); + + job = NULL; + while ((job = block_job_next(job))) { + AioContext *aio_context = blk_get_aio_context(job->blk); + + aio_context_acquire(aio_context); + block_job_resume(job); + aio_context_release(aio_context); + } } /** @@ -404,12 +416,12 @@ static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) } /** - * Round a region to cluster boundaries + * Round a region to cluster boundaries (sector-based) */ -void bdrv_round_to_clusters(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, - int64_t *cluster_sector_num, - int *cluster_nb_sectors) +void bdrv_round_sectors_to_clusters(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + int64_t *cluster_sector_num, + int *cluster_nb_sectors) { BlockDriverInfo bdi; @@ -424,6 +436,26 @@ void bdrv_round_to_clusters(BlockDriverState *bs, } } +/** + * Round a region to cluster boundaries + */ +void bdrv_round_to_clusters(BlockDriverState *bs, + int64_t offset, unsigned int bytes, + int64_t *cluster_offset, + unsigned int *cluster_bytes) +{ + BlockDriverInfo bdi; + + if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) { + *cluster_offset = offset; + *cluster_bytes = bytes; + } else { + int64_t c = bdi.cluster_size; + *cluster_offset = QEMU_ALIGN_DOWN(offset, c); + *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c); + } +} + static int bdrv_get_cluster_size(BlockDriverState *bs) { BlockDriverInfo bdi; @@ -680,6 +712,18 @@ int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags) } } +int bdrv_preadv(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov) +{ + int ret; + + ret = bdrv_prwv_co(bs, offset, qiov, false, 0); + if (ret < 0) { + return ret; + } + + return qiov->size; +} + int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes) { QEMUIOVector qiov; @@ -687,19 +731,13 @@ int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes) .iov_base = (void *)buf, .iov_len = bytes, }; - int ret; if (bytes < 0) { return -EINVAL; } qemu_iovec_init_external(&qiov, &iov, 1); - ret = bdrv_prwv_co(bs, offset, &qiov, false, 0); - if (ret < 0) { - return ret; - } - - return bytes; + return bdrv_preadv(bs, offset, &qiov); } int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov) @@ -776,6 +814,8 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs, int64_t sector_num; unsigned int nb_sectors; + assert(!(flags & ~BDRV_REQ_MASK)); + if (drv->bdrv_co_preadv) { return drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags); } @@ -815,6 +855,8 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs, unsigned int nb_sectors; int ret; + assert(!(flags & ~BDRV_REQ_MASK)); + if (drv->bdrv_co_pwritev) { ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov, flags & bs->supported_write_flags); @@ -861,7 +903,7 @@ emulate_flags: } static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) + int64_t offset, unsigned int bytes, QEMUIOVector *qiov) { /* Perform I/O through a temporary buffer so that users who scribble over * their read buffer while the operation is in progress do not end up @@ -873,21 +915,20 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, BlockDriver *drv = bs->drv; struct iovec iov; QEMUIOVector bounce_qiov; - int64_t cluster_sector_num; - int cluster_nb_sectors; + int64_t cluster_offset; + unsigned int cluster_bytes; size_t skip_bytes; int ret; /* Cover entire cluster so no additional backing file I/O is required when * allocating cluster in the image file. */ - bdrv_round_to_clusters(bs, sector_num, nb_sectors, - &cluster_sector_num, &cluster_nb_sectors); + bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes); - trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, - cluster_sector_num, cluster_nb_sectors); + trace_bdrv_co_do_copy_on_readv(bs, offset, bytes, + cluster_offset, cluster_bytes); - iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE; + iov.iov_len = cluster_bytes; iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len); if (bounce_buffer == NULL) { ret = -ENOMEM; @@ -896,8 +937,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, qemu_iovec_init_external(&bounce_qiov, &iov, 1); - ret = bdrv_driver_preadv(bs, cluster_sector_num * BDRV_SECTOR_SIZE, - cluster_nb_sectors * BDRV_SECTOR_SIZE, + ret = bdrv_driver_preadv(bs, cluster_offset, cluster_bytes, &bounce_qiov, 0); if (ret < 0) { goto err; @@ -905,16 +945,12 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, if (drv->bdrv_co_pwrite_zeroes && buffer_is_zero(bounce_buffer, iov.iov_len)) { - ret = bdrv_co_do_pwrite_zeroes(bs, - cluster_sector_num * BDRV_SECTOR_SIZE, - cluster_nb_sectors * BDRV_SECTOR_SIZE, - 0); + ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, cluster_bytes, 0); } else { /* This does not change the data on the disk, it is not necessary * to flush even in cache=writethrough mode. */ - ret = bdrv_driver_pwritev(bs, cluster_sector_num * BDRV_SECTOR_SIZE, - cluster_nb_sectors * BDRV_SECTOR_SIZE, + ret = bdrv_driver_pwritev(bs, cluster_offset, cluster_bytes, &bounce_qiov, 0); } @@ -926,9 +962,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, goto err; } - skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE; - qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes, - nb_sectors * BDRV_SECTOR_SIZE); + skip_bytes = offset - cluster_offset; + qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes, bytes); err: qemu_vfree(bounce_buffer); @@ -944,15 +979,15 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, int64_t align, QEMUIOVector *qiov, int flags) { + int64_t total_bytes, max_bytes; int ret; - int64_t sector_num = offset >> BDRV_SECTOR_BITS; - unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS; - - assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); - assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); + assert(is_power_of_2(align)); + assert((offset & (align - 1)) == 0); + assert((bytes & (align - 1)) == 0); assert(!qiov || bytes == qiov->size); assert((bs->open_flags & BDRV_O_NO_IO) == 0); + assert(!(flags & ~BDRV_REQ_MASK)); /* Handle Copy on Read and associated serialisation */ if (flags & BDRV_REQ_COPY_ON_READ) { @@ -969,59 +1004,50 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, } if (flags & BDRV_REQ_COPY_ON_READ) { + int64_t start_sector = offset >> BDRV_SECTOR_BITS; + int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); + unsigned int nb_sectors = end_sector - start_sector; int pnum; - ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum); + ret = bdrv_is_allocated(bs, start_sector, nb_sectors, &pnum); if (ret < 0) { goto out; } if (!ret || pnum != nb_sectors) { - ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov); + ret = bdrv_co_do_copy_on_readv(bs, offset, bytes, qiov); goto out; } } /* Forward the request to the BlockDriver */ - if (!bs->zero_beyond_eof) { - ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0); - } else { - /* Read zeros after EOF */ - int64_t total_sectors, max_nb_sectors; - - total_sectors = bdrv_nb_sectors(bs); - if (total_sectors < 0) { - ret = total_sectors; - goto out; - } + total_bytes = bdrv_getlength(bs); + if (total_bytes < 0) { + ret = total_bytes; + goto out; + } - max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num), - align >> BDRV_SECTOR_BITS); - if (nb_sectors < max_nb_sectors) { - ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0); - } else if (max_nb_sectors > 0) { - QEMUIOVector local_qiov; + max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align); + if (bytes < max_bytes) { + ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0); + } else if (max_bytes > 0) { + QEMUIOVector local_qiov; - qemu_iovec_init(&local_qiov, qiov->niov); - qemu_iovec_concat(&local_qiov, qiov, 0, - max_nb_sectors * BDRV_SECTOR_SIZE); + qemu_iovec_init(&local_qiov, qiov->niov); + qemu_iovec_concat(&local_qiov, qiov, 0, max_bytes); - ret = bdrv_driver_preadv(bs, offset, - max_nb_sectors * BDRV_SECTOR_SIZE, - &local_qiov, 0); + ret = bdrv_driver_preadv(bs, offset, max_bytes, &local_qiov, 0); - qemu_iovec_destroy(&local_qiov); - } else { - ret = 0; - } + qemu_iovec_destroy(&local_qiov); + } else { + ret = 0; + } - /* Reading beyond end of file is supposed to produce zeroes */ - if (ret == 0 && total_sectors < sector_num + nb_sectors) { - uint64_t offset = MAX(0, total_sectors - sector_num); - uint64_t bytes = (sector_num + nb_sectors - offset) * - BDRV_SECTOR_SIZE; - qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes); - } + /* Reading beyond end of file is supposed to produce zeroes */ + if (ret == 0 && total_bytes < offset + bytes) { + uint64_t zero_offset = MAX(0, total_bytes - offset); + uint64_t zero_bytes = offset + bytes - zero_offset; + qemu_iovec_memset(qiov, zero_offset, 0, zero_bytes); } out: @@ -1038,8 +1064,7 @@ int coroutine_fn bdrv_co_preadv(BlockDriverState *bs, BlockDriver *drv = bs->drv; BdrvTrackedRequest req; - /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */ - uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment); + uint64_t align = bs->request_alignment; uint8_t *head_buf = NULL; uint8_t *tail_buf = NULL; QEMUIOVector local_qiov; @@ -1235,13 +1260,12 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, bool waited; int ret; - int64_t sector_num = offset >> BDRV_SECTOR_BITS; - unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS; + int64_t start_sector = offset >> BDRV_SECTOR_BITS; + int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); - assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); - assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); assert(!qiov || bytes == qiov->size); assert((bs->open_flags & BDRV_O_NO_IO) == 0); + assert(!(flags & ~BDRV_REQ_MASK)); waited = wait_serialising_requests(req); assert(!waited || !req->serialising); @@ -1263,22 +1287,21 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, /* Do nothing, write notifier decided to fail this request */ } else if (flags & BDRV_REQ_ZERO_WRITE) { bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO); - ret = bdrv_co_do_pwrite_zeroes(bs, sector_num << BDRV_SECTOR_BITS, - nb_sectors << BDRV_SECTOR_BITS, flags); + ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags); } else { bdrv_debug_event(bs, BLKDBG_PWRITEV); ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, flags); } bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE); - bdrv_set_dirty(bs, sector_num, nb_sectors); + bdrv_set_dirty(bs, start_sector, end_sector - start_sector); if (bs->wr_highest_offset < offset + bytes) { bs->wr_highest_offset = offset + bytes; } if (ret >= 0) { - bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors); + bs->total_sectors = MAX(bs->total_sectors, end_sector); } return ret; @@ -1293,7 +1316,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, uint8_t *buf = NULL; QEMUIOVector local_qiov; struct iovec iov; - uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment); + uint64_t align = bs->request_alignment; unsigned int head_padding_bytes, tail_padding_bytes; int ret = 0; @@ -1380,8 +1403,7 @@ int coroutine_fn bdrv_co_pwritev(BlockDriverState *bs, BdrvRequestFlags flags) { BdrvTrackedRequest req; - /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */ - uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment); + uint64_t align = bs->request_alignment; uint8_t *head_buf = NULL; uint8_t *tail_buf = NULL; QEMUIOVector local_qiov; @@ -1824,6 +1846,62 @@ int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors); } +typedef struct BdrvVmstateCo { + BlockDriverState *bs; + QEMUIOVector *qiov; + int64_t pos; + bool is_read; + int ret; +} BdrvVmstateCo; + +static int coroutine_fn +bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos, + bool is_read) +{ + BlockDriver *drv = bs->drv; + + if (!drv) { + return -ENOMEDIUM; + } else if (drv->bdrv_load_vmstate) { + return is_read ? drv->bdrv_load_vmstate(bs, qiov, pos) + : drv->bdrv_save_vmstate(bs, qiov, pos); + } else if (bs->file) { + return bdrv_co_rw_vmstate(bs->file->bs, qiov, pos, is_read); + } + + return -ENOTSUP; +} + +static void coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque) +{ + BdrvVmstateCo *co = opaque; + co->ret = bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read); +} + +static inline int +bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos, + bool is_read) +{ + if (qemu_in_coroutine()) { + return bdrv_co_rw_vmstate(bs, qiov, pos, is_read); + } else { + BdrvVmstateCo data = { + .bs = bs, + .qiov = qiov, + .pos = pos, + .is_read = is_read, + .ret = -EINPROGRESS, + }; + Coroutine *co = qemu_coroutine_create(bdrv_co_rw_vmstate_entry); + + qemu_coroutine_enter(co, &data); + while (data.ret == -EINPROGRESS) { + aio_poll(bdrv_get_aio_context(bs), true); + } + return data.ret; + } +} + int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, int64_t pos, int size) { @@ -1832,37 +1910,45 @@ int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, .iov_base = (void *) buf, .iov_len = size, }; + int ret; qemu_iovec_init_external(&qiov, &iov, 1); - return bdrv_writev_vmstate(bs, &qiov, pos); + + ret = bdrv_writev_vmstate(bs, &qiov, pos); + if (ret < 0) { + return ret; + } + + return size; } int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) { - BlockDriver *drv = bs->drv; + return bdrv_rw_vmstate(bs, qiov, pos, false); +} - if (!drv) { - return -ENOMEDIUM; - } else if (drv->bdrv_save_vmstate) { - return drv->bdrv_save_vmstate(bs, qiov, pos); - } else if (bs->file) { - return bdrv_writev_vmstate(bs->file->bs, qiov, pos); +int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, + int64_t pos, int size) +{ + QEMUIOVector qiov; + struct iovec iov = { + .iov_base = buf, + .iov_len = size, + }; + int ret; + + qemu_iovec_init_external(&qiov, &iov, 1); + ret = bdrv_readv_vmstate(bs, &qiov, pos); + if (ret < 0) { + return ret; } - return -ENOTSUP; + return size; } -int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, - int64_t pos, int size) +int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) { - BlockDriver *drv = bs->drv; - if (!drv) - return -ENOMEDIUM; - if (drv->bdrv_load_vmstate) - return drv->bdrv_load_vmstate(bs, buf, pos, size); - if (bs->file) - return bdrv_load_vmstate(bs->file->bs, buf, pos, size); - return -ENOTSUP; + return bdrv_rw_vmstate(bs, qiov, pos, true); } /**************************************************************/ |