From fe0480d6294270ff0d6fb60e66bb725a6aad2043 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 22 Mar 2019 13:38:43 +0100 Subject: block: Add BDRV_REQ_NO_FALLBACK For qemu-img convert, we want an operation that zeroes out the whole image if this can be done efficiently, but that returns an error otherwise so we don't write explicit zeroes and immediately overwrite them with the real data, potentially doubling the amount of data to be written. Signed-off-by: Kevin Wolf Acked-by: Eric Blake --- include/block/block.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/block/block.h b/include/block/block.h index e452988b66..c7a26199aa 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -83,8 +83,13 @@ typedef enum { */ BDRV_REQ_SERIALISING = 0x80, + /* Execute the request only if the operation can be offloaded or otherwise + * be executed efficiently, but return an error instead of using a slow + * fallback. */ + BDRV_REQ_NO_FALLBACK = 0x100, + /* Mask of valid flags */ - BDRV_REQ_MASK = 0xff, + BDRV_REQ_MASK = 0x1ff, } BdrvRequestFlags; typedef struct BlockSizes { -- cgit 1.4.1 From 738301e11758171defaa5a5237d584f8226af89f Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 22 Mar 2019 13:45:23 +0100 Subject: file-posix: Support BDRV_REQ_NO_FALLBACK for zero writes We know that the kernel implements a slow fallback code path for BLKZEROOUT, so if BDRV_REQ_NO_FALLBACK is given, we shouldn't call it. The other operations we call in the context of .bdrv_co_pwrite_zeroes should usually be quick, so no modification should be needed for them. If we ever notice that there are additional problematic cases, we can still make these conditional as well. Signed-off-by: Kevin Wolf Acked-by: Eric Blake --- block/file-posix.c | 24 ++++++++++++++++-------- include/block/raw-aio.h | 1 + 2 files changed, 17 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/block/file-posix.c b/block/file-posix.c index d102f3b222..db4cccbe51 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -652,7 +652,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, } #endif - bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP; + bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK; ret = 0; fail: if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) { @@ -1500,14 +1500,19 @@ static ssize_t handle_aiocb_write_zeroes_block(RawPosixAIOData *aiocb) } #ifdef BLKZEROOUT - do { - uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes }; - if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) { - return 0; - } - } while (errno == EINTR); + /* The BLKZEROOUT implementation in the kernel doesn't set + * BLKDEV_ZERO_NOFALLBACK, so we can't call this if we have to avoid slow + * fallbacks. */ + if (!(aiocb->aio_type & QEMU_AIO_NO_FALLBACK)) { + do { + uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes }; + if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) { + return 0; + } + } while (errno == EINTR); - ret = translate_err(-errno); + ret = translate_err(-errno); + } #endif if (ret == -ENOTSUP) { @@ -2659,6 +2664,9 @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, if (blkdev) { acb.aio_type |= QEMU_AIO_BLKDEV; } + if (flags & BDRV_REQ_NO_FALLBACK) { + acb.aio_type |= QEMU_AIO_NO_FALLBACK; + } if (flags & BDRV_REQ_MAY_UNMAP) { acb.aio_type |= QEMU_AIO_DISCARD; diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h index 6799614e56..ba223dd1f1 100644 --- a/include/block/raw-aio.h +++ b/include/block/raw-aio.h @@ -40,6 +40,7 @@ /* AIO flags */ #define QEMU_AIO_MISALIGNED 0x1000 #define QEMU_AIO_BLKDEV 0x2000 +#define QEMU_AIO_NO_FALLBACK 0x4000 /* linux-aio.c - Linux native implementation */ -- cgit 1.4.1