diff options
Diffstat (limited to 'block/backup.c')
| -rw-r--r-- | block/backup.c | 443 |
1 files changed, 78 insertions, 365 deletions
diff --git a/block/backup.c b/block/backup.c index 763f0d7ff6..46978c1785 100644 --- a/block/backup.c +++ b/block/backup.c @@ -2,6 +2,7 @@ * QEMU backup * * Copyright (C) 2013 Proxmox Server Solutions + * Copyright (c) 2019 Virtuozzo International GmbH. * * Authors: * Dietmar Maurer (dietmar@proxmox.com) @@ -18,6 +19,7 @@ #include "block/block_int.h" #include "block/blockjob_int.h" #include "block/block_backup.h" +#include "block/block-copy.h" #include "qapi/error.h" #include "qapi/qmp/qerror.h" #include "qemu/ratelimit.h" @@ -26,333 +28,68 @@ #include "qemu/bitmap.h" #include "qemu/error-report.h" -#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16) +#include "block/backup-top.h" -typedef struct CowRequest { - int64_t start_byte; - int64_t end_byte; - QLIST_ENTRY(CowRequest) list; - CoQueue wait_queue; /* coroutines blocked on this request */ -} CowRequest; +#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16) typedef struct BackupBlockJob { BlockJob common; - BlockBackend *target; + BlockDriverState *backup_top; + BlockDriverState *source_bs; BdrvDirtyBitmap *sync_bitmap; - BdrvDirtyBitmap *copy_bitmap; MirrorSyncMode sync_mode; BitmapSyncMode bitmap_mode; BlockdevOnError on_source_error; BlockdevOnError on_target_error; - CoRwlock flush_rwlock; uint64_t len; uint64_t bytes_read; int64_t cluster_size; - NotifierWithReturn before_write; - QLIST_HEAD(, CowRequest) inflight_reqs; - bool use_copy_range; - int64_t copy_range_size; - - BdrvRequestFlags write_flags; - bool initializing_bitmap; + BlockCopyState *bcs; } BackupBlockJob; static const BlockJobDriver backup_job_driver; -/* See if in-flight requests overlap and wait for them to complete */ -static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job, - int64_t start, - int64_t end) +static void backup_progress_bytes_callback(int64_t bytes, void *opaque) { - CowRequest *req; - bool retry; - - do { - retry = false; - QLIST_FOREACH(req, &job->inflight_reqs, list) { - if (end > req->start_byte && start < req->end_byte) { - qemu_co_queue_wait(&req->wait_queue, NULL); - retry = true; - break; - } - } - } while (retry); -} + BackupBlockJob *s = opaque; -/* Keep track of an in-flight request */ -static void cow_request_begin(CowRequest *req, BackupBlockJob *job, - int64_t start, int64_t end) -{ - req->start_byte = start; - req->end_byte = end; - qemu_co_queue_init(&req->wait_queue); - QLIST_INSERT_HEAD(&job->inflight_reqs, req, list); + s->bytes_read += bytes; + job_progress_update(&s->common.job, bytes); } -/* Forget about a completed request */ -static void cow_request_end(CowRequest *req) +static void backup_progress_reset_callback(void *opaque) { - QLIST_REMOVE(req, list); - qemu_co_queue_restart_all(&req->wait_queue); -} - -/* Copy range to target with a bounce buffer and return the bytes copied. If - * error occurred, return a negative error number */ -static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job, - int64_t start, - int64_t end, - bool is_write_notifier, - bool *error_is_read, - void **bounce_buffer) -{ - int ret; - BlockBackend *blk = job->common.blk; - int nbytes; - int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0; - - assert(QEMU_IS_ALIGNED(start, job->cluster_size)); - bdrv_reset_dirty_bitmap(job->copy_bitmap, start, job->cluster_size); - nbytes = MIN(job->cluster_size, job->len - start); - if (!*bounce_buffer) { - *bounce_buffer = blk_blockalign(blk, job->cluster_size); - } - - ret = blk_co_pread(blk, start, nbytes, *bounce_buffer, read_flags); - if (ret < 0) { - trace_backup_do_cow_read_fail(job, start, ret); - if (error_is_read) { - *error_is_read = true; - } - goto fail; - } - - ret = blk_co_pwrite(job->target, start, nbytes, *bounce_buffer, - job->write_flags); - if (ret < 0) { - trace_backup_do_cow_write_fail(job, start, ret); - if (error_is_read) { - *error_is_read = false; - } - goto fail; - } - - return nbytes; -fail: - bdrv_set_dirty_bitmap(job->copy_bitmap, start, job->cluster_size); - return ret; - -} - -/* Copy range to target and return the bytes copied. If error occurred, return a - * negative error number. */ -static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job, - int64_t start, - int64_t end, - bool is_write_notifier) -{ - int ret; - int nr_clusters; - BlockBackend *blk = job->common.blk; - int nbytes; - int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0; - - assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size)); - assert(QEMU_IS_ALIGNED(start, job->cluster_size)); - nbytes = MIN(job->copy_range_size, end - start); - nr_clusters = DIV_ROUND_UP(nbytes, job->cluster_size); - bdrv_reset_dirty_bitmap(job->copy_bitmap, start, - job->cluster_size * nr_clusters); - ret = blk_co_copy_range(blk, start, job->target, start, nbytes, - read_flags, job->write_flags); - if (ret < 0) { - trace_backup_do_cow_copy_range_fail(job, start, ret); - bdrv_set_dirty_bitmap(job->copy_bitmap, start, - job->cluster_size * nr_clusters); - return ret; - } - - return nbytes; -} - -/* - * Check if the cluster starting at offset is allocated or not. - * return via pnum the number of contiguous clusters sharing this allocation. - */ -static int backup_is_cluster_allocated(BackupBlockJob *s, int64_t offset, - int64_t *pnum) -{ - BlockDriverState *bs = blk_bs(s->common.blk); - int64_t count, total_count = 0; - int64_t bytes = s->len - offset; - int ret; - - assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); - - while (true) { - ret = bdrv_is_allocated(bs, offset, bytes, &count); - if (ret < 0) { - return ret; - } - - total_count += count; - - if (ret || count == 0) { - /* - * ret: partial segment(s) are considered allocated. - * otherwise: unallocated tail is treated as an entire segment. - */ - *pnum = DIV_ROUND_UP(total_count, s->cluster_size); - return ret; - } - - /* Unallocated segment(s) with uncertain following segment(s) */ - if (total_count >= s->cluster_size) { - *pnum = total_count / s->cluster_size; - return 0; - } - - offset += count; - bytes -= count; - } -} - -/** - * Reset bits in copy_bitmap starting at offset if they represent unallocated - * data in the image. May reset subsequent contiguous bits. - * @return 0 when the cluster at @offset was unallocated, - * 1 otherwise, and -ret on error. - */ -static int64_t backup_bitmap_reset_unallocated(BackupBlockJob *s, - int64_t offset, int64_t *count) -{ - int ret; - int64_t clusters, bytes, estimate; - - ret = backup_is_cluster_allocated(s, offset, &clusters); - if (ret < 0) { - return ret; - } - - bytes = clusters * s->cluster_size; - - if (!ret) { - bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes); - estimate = bdrv_get_dirty_count(s->copy_bitmap); - job_progress_set_remaining(&s->common.job, estimate); - } + BackupBlockJob *s = opaque; + uint64_t estimate = bdrv_get_dirty_count(s->bcs->copy_bitmap); - *count = bytes; - return ret; + job_progress_set_remaining(&s->common.job, estimate); } static int coroutine_fn backup_do_cow(BackupBlockJob *job, int64_t offset, uint64_t bytes, - bool *error_is_read, - bool is_write_notifier) + bool *error_is_read) { - CowRequest cow_request; int ret = 0; int64_t start, end; /* bytes */ - void *bounce_buffer = NULL; - int64_t status_bytes; - - qemu_co_rwlock_rdlock(&job->flush_rwlock); start = QEMU_ALIGN_DOWN(offset, job->cluster_size); end = QEMU_ALIGN_UP(bytes + offset, job->cluster_size); trace_backup_do_cow_enter(job, start, offset, bytes); - wait_for_overlapping_requests(job, start, end); - cow_request_begin(&cow_request, job, start, end); - - while (start < end) { - int64_t dirty_end; - - if (!bdrv_dirty_bitmap_get(job->copy_bitmap, start)) { - trace_backup_do_cow_skip(job, start); - start += job->cluster_size; - continue; /* already copied */ - } - - dirty_end = bdrv_dirty_bitmap_next_zero(job->copy_bitmap, start, - (end - start)); - if (dirty_end < 0) { - dirty_end = end; - } - - if (job->initializing_bitmap) { - ret = backup_bitmap_reset_unallocated(job, start, &status_bytes); - if (ret == 0) { - trace_backup_do_cow_skip_range(job, start, status_bytes); - start += status_bytes; - continue; - } - /* Clamp to known allocated region */ - dirty_end = MIN(dirty_end, start + status_bytes); - } - - trace_backup_do_cow_process(job, start); - - if (job->use_copy_range) { - ret = backup_cow_with_offload(job, start, dirty_end, - is_write_notifier); - if (ret < 0) { - job->use_copy_range = false; - } - } - if (!job->use_copy_range) { - ret = backup_cow_with_bounce_buffer(job, start, dirty_end, - is_write_notifier, - error_is_read, &bounce_buffer); - } - if (ret < 0) { - break; - } - - /* Publish progress, guest I/O counts as progress too. Note that the - * offset field is an opaque progress value, it is not a disk offset. - */ - start += ret; - job->bytes_read += ret; - job_progress_update(&job->common.job, ret); - ret = 0; - } - - if (bounce_buffer) { - qemu_vfree(bounce_buffer); - } - - cow_request_end(&cow_request); + ret = block_copy(job->bcs, start, end - start, error_is_read); trace_backup_do_cow_return(job, offset, bytes, ret); - qemu_co_rwlock_unlock(&job->flush_rwlock); - return ret; } -static int coroutine_fn backup_before_write_notify( - NotifierWithReturn *notifier, - void *opaque) -{ - BackupBlockJob *job = container_of(notifier, BackupBlockJob, before_write); - BdrvTrackedRequest *req = opaque; - - assert(req->bs == blk_bs(job->common.blk)); - assert(QEMU_IS_ALIGNED(req->offset, BDRV_SECTOR_SIZE)); - assert(QEMU_IS_ALIGNED(req->bytes, BDRV_SECTOR_SIZE)); - - return backup_do_cow(job, req->offset, req->bytes, NULL, true); -} - static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret) { BdrvDirtyBitmap *bm; - BlockDriverState *bs = blk_bs(job->common.blk); bool sync = (((ret == 0) || (job->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS)) \ && (job->bitmap_mode != BITMAP_SYNC_MODE_NEVER)); @@ -361,20 +98,20 @@ static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret) * We succeeded, or we always intended to sync the bitmap. * Delete this bitmap and install the child. */ - bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL); + bm = bdrv_dirty_bitmap_abdicate(job->source_bs, job->sync_bitmap, NULL); } else { /* * We failed, or we never intended to sync the bitmap anyway. * Merge the successor back into the parent, keeping all data. */ - bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL); + bm = bdrv_reclaim_dirty_bitmap(job->source_bs, job->sync_bitmap, NULL); } assert(bm); if (ret < 0 && job->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS) { /* If we failed and synced, merge in the bits we didn't copy: */ - bdrv_dirty_bitmap_merge_internal(bm, job->copy_bitmap, + bdrv_dirty_bitmap_merge_internal(bm, job->bcs->copy_bitmap, NULL, true); } } @@ -398,16 +135,8 @@ static void backup_abort(Job *job) static void backup_clean(Job *job) { BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); - BlockDriverState *bs = blk_bs(s->common.blk); - - if (s->copy_bitmap) { - bdrv_release_dirty_bitmap(bs, s->copy_bitmap); - s->copy_bitmap = NULL; - } - assert(s->target); - blk_unref(s->target); - s->target = NULL; + bdrv_backup_top_drop(s->backup_top); } void backup_do_checkpoint(BlockJob *job, Error **errp) @@ -422,7 +151,7 @@ void backup_do_checkpoint(BlockJob *job, Error **errp) return; } - bdrv_set_dirty_bitmap(backup_job->copy_bitmap, 0, backup_job->len); + bdrv_set_dirty_bitmap(backup_job->bcs->copy_bitmap, 0, backup_job->len); } static BlockErrorAction backup_error_action(BackupBlockJob *job, @@ -445,8 +174,10 @@ static bool coroutine_fn yield_and_check(BackupBlockJob *job) return true; } - /* We need to yield even for delay_ns = 0 so that bdrv_drain_all() can - * return. Without a yield, the VM would not reboot. */ + /* + * We need to yield even for delay_ns = 0 so that bdrv_drain_all() can + * return. Without a yield, the VM would not reboot. + */ delay_ns = block_job_ratelimit_get_delay(&job->common, job->bytes_read); job->bytes_read = 0; job_sleep_ns(&job->common.job, delay_ns); @@ -465,14 +196,13 @@ static int coroutine_fn backup_loop(BackupBlockJob *job) BdrvDirtyBitmapIter *bdbi; int ret = 0; - bdbi = bdrv_dirty_iter_new(job->copy_bitmap); + bdbi = bdrv_dirty_iter_new(job->bcs->copy_bitmap); while ((offset = bdrv_dirty_iter_next(bdbi)) != -1) { do { if (yield_and_check(job)) { goto out; } - ret = backup_do_cow(job, offset, - job->cluster_size, &error_is_read, false); + ret = backup_do_cow(job, offset, job->cluster_size, &error_is_read); if (ret < 0 && backup_error_action(job, error_is_read, -ret) == BLOCK_ERROR_ACTION_REPORT) { @@ -492,7 +222,7 @@ static void backup_init_copy_bitmap(BackupBlockJob *job) uint64_t estimate; if (job->sync_mode == MIRROR_SYNC_MODE_BITMAP) { - ret = bdrv_dirty_bitmap_merge_internal(job->copy_bitmap, + ret = bdrv_dirty_bitmap_merge_internal(job->bcs->copy_bitmap, job->sync_bitmap, NULL, true); assert(ret); @@ -502,29 +232,22 @@ static void backup_init_copy_bitmap(BackupBlockJob *job) * We can't hog the coroutine to initialize this thoroughly. * Set a flag and resume work when we are able to yield safely. */ - job->initializing_bitmap = true; + job->bcs->skip_unallocated = true; } - bdrv_set_dirty_bitmap(job->copy_bitmap, 0, job->len); + bdrv_set_dirty_bitmap(job->bcs->copy_bitmap, 0, job->len); } - estimate = bdrv_get_dirty_count(job->copy_bitmap); + estimate = bdrv_get_dirty_count(job->bcs->copy_bitmap); job_progress_set_remaining(&job->common.job, estimate); } static int coroutine_fn backup_run(Job *job, Error **errp) { BackupBlockJob *s = container_of(job, BackupBlockJob, common.job); - BlockDriverState *bs = blk_bs(s->common.blk); int ret = 0; - QLIST_INIT(&s->inflight_reqs); - qemu_co_rwlock_init(&s->flush_rwlock); - backup_init_copy_bitmap(s); - s->before_write.notify = backup_before_write_notify; - bdrv_add_before_write_notifier(bs, &s->before_write); - if (s->sync_mode == MIRROR_SYNC_MODE_TOP) { int64_t offset = 0; int64_t count; @@ -535,22 +258,26 @@ static int coroutine_fn backup_run(Job *job, Error **errp) goto out; } - ret = backup_bitmap_reset_unallocated(s, offset, &count); + ret = block_copy_reset_unallocated(s->bcs, offset, &count); if (ret < 0) { goto out; } offset += count; } - s->initializing_bitmap = false; + s->bcs->skip_unallocated = false; } if (s->sync_mode == MIRROR_SYNC_MODE_NONE) { - /* All bits are set in copy_bitmap to allow any cluster to be copied. - * This does not actually require them to be copied. */ + /* + * All bits are set in copy_bitmap to allow any cluster to be copied. + * This does not actually require them to be copied. + */ while (!job_is_cancelled(job)) { - /* Yield until the job is cancelled. We just let our before_write - * notify callback service CoW requests. */ + /* + * Yield until the job is cancelled. We just let our before_write + * notify callback service CoW requests. + */ job_yield(job); } } else { @@ -558,12 +285,6 @@ static int coroutine_fn backup_run(Job *job, Error **errp) } out: - notifier_with_return_remove(&s->before_write); - - /* wait until pending backup_do_cow() calls have completed */ - qemu_co_rwlock_wrlock(&s->flush_rwlock); - qemu_co_rwlock_unlock(&s->flush_rwlock); - return ret; } @@ -621,6 +342,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap, BitmapSyncMode bitmap_mode, bool compress, + const char *filter_node_name, BlockdevOnError on_source_error, BlockdevOnError on_target_error, int creation_flags, @@ -629,9 +351,10 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, { int64_t len; BackupBlockJob *job = NULL; - int ret; int64_t cluster_size; - BdrvDirtyBitmap *copy_bitmap = NULL; + BdrvRequestFlags write_flags; + BlockDriverState *backup_top = NULL; + BlockCopyState *bcs = NULL; assert(bs); assert(target); @@ -696,76 +419,66 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, goto error; } - copy_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp); - if (!copy_bitmap) { + /* + * If source is in backing chain of target assume that target is going to be + * used for "image fleecing", i.e. it should represent a kind of snapshot of + * source at backup-start point in time. And target is going to be read by + * somebody (for example, used as NBD export) during backup job. + * + * In this case, we need to add BDRV_REQ_SERIALISING write flag to avoid + * intersection of backup writes and third party reads from target, + * otherwise reading from target we may occasionally read already updated by + * guest data. + * + * For more information see commit f8d59dfb40bb and test + * tests/qemu-iotests/222 + */ + write_flags = (bdrv_chain_contains(target, bs) ? BDRV_REQ_SERIALISING : 0) | + (compress ? BDRV_REQ_WRITE_COMPRESSED : 0), + + backup_top = bdrv_backup_top_append(bs, target, filter_node_name, + cluster_size, write_flags, &bcs, errp); + if (!backup_top) { goto error; } - bdrv_disable_dirty_bitmap(copy_bitmap); /* job->len is fixed, so we can't allow resize */ - job = block_job_create(job_id, &backup_job_driver, txn, bs, - BLK_PERM_CONSISTENT_READ, - BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | - BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD, + job = block_job_create(job_id, &backup_job_driver, txn, backup_top, + 0, BLK_PERM_ALL, speed, creation_flags, cb, opaque, errp); if (!job) { goto error; } - /* The target must match the source in size, so no resize here either */ - job->target = blk_new(job->common.job.aio_context, - BLK_PERM_WRITE, - BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | - BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD); - ret = blk_insert_bs(job->target, target, errp); - if (ret < 0) { - goto error; - } - blk_set_disable_request_queuing(job->target, true); - + job->backup_top = backup_top; + job->source_bs = bs; job->on_source_error = on_source_error; job->on_target_error = on_target_error; job->sync_mode = sync_mode; job->sync_bitmap = sync_bitmap; job->bitmap_mode = bitmap_mode; + job->bcs = bcs; + job->cluster_size = cluster_size; + job->len = len; - /* - * Set write flags: - * 1. Detect image-fleecing (and similar) schemes - * 2. Handle compression - */ - job->write_flags = - (bdrv_chain_contains(target, bs) ? BDRV_REQ_SERIALISING : 0) | - (compress ? BDRV_REQ_WRITE_COMPRESSED : 0); + block_copy_set_callbacks(bcs, backup_progress_bytes_callback, + backup_progress_reset_callback, job); - job->cluster_size = cluster_size; - job->copy_bitmap = copy_bitmap; - copy_bitmap = NULL; - job->use_copy_range = !compress; /* compression isn't supported for it */ - job->copy_range_size = MIN_NON_ZERO(blk_get_max_transfer(job->common.blk), - blk_get_max_transfer(job->target)); - job->copy_range_size = MAX(job->cluster_size, - QEMU_ALIGN_UP(job->copy_range_size, - job->cluster_size)); - - /* Required permissions are already taken with target's blk_new() */ + /* Required permissions are already taken by backup-top target */ block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL, &error_abort); - job->len = len; return &job->common; error: - if (copy_bitmap) { - assert(!job || !job->copy_bitmap); - bdrv_release_dirty_bitmap(bs, copy_bitmap); - } if (sync_bitmap) { bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL); } if (job) { backup_clean(&job->common.job); job_early_fail(&job->common.job); + } else if (backup_top) { + bdrv_backup_top_drop(backup_top); } return NULL; |