diff options
Diffstat (limited to 'include')
| -rw-r--r-- | include/block/accounting.h | 2 | ||||
| -rw-r--r-- | include/block/aio_task.h | 54 | ||||
| -rw-r--r-- | include/block/block-copy.h | 93 | ||||
| -rw-r--r-- | include/block/block.h | 1 | ||||
| -rw-r--r-- | include/block/block_int.h | 2 | ||||
| -rw-r--r-- | include/exec/exec-all.h | 17 | ||||
| -rw-r--r-- | include/exec/ram_addr.h | 138 | ||||
| -rw-r--r-- | include/migration/misc.h | 1 | ||||
| -rw-r--r-- | include/migration/vmstate.h | 40 | ||||
| -rw-r--r-- | include/qemu/rcu.h | 25 |
10 files changed, 300 insertions, 73 deletions
diff --git a/include/block/accounting.h b/include/block/accounting.h index d1f67b10dd..878b4c3581 100644 --- a/include/block/accounting.h +++ b/include/block/accounting.h @@ -33,9 +33,11 @@ typedef struct BlockAcctTimedStats BlockAcctTimedStats; typedef struct BlockAcctStats BlockAcctStats; enum BlockAcctType { + BLOCK_ACCT_NONE = 0, BLOCK_ACCT_READ, BLOCK_ACCT_WRITE, BLOCK_ACCT_FLUSH, + BLOCK_ACCT_UNMAP, BLOCK_MAX_IOTYPE, }; diff --git a/include/block/aio_task.h b/include/block/aio_task.h new file mode 100644 index 0000000000..50bc1e1817 --- /dev/null +++ b/include/block/aio_task.h @@ -0,0 +1,54 @@ +/* + * Aio tasks loops + * + * Copyright (c) 2019 Virtuozzo International GmbH. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef BLOCK_AIO_TASK_H +#define BLOCK_AIO_TASK_H + +#include "qemu/coroutine.h" + +typedef struct AioTaskPool AioTaskPool; +typedef struct AioTask AioTask; +typedef int coroutine_fn (*AioTaskFunc)(AioTask *task); +struct AioTask { + AioTaskPool *pool; + AioTaskFunc func; + int ret; +}; + +AioTaskPool *coroutine_fn aio_task_pool_new(int max_busy_tasks); +void aio_task_pool_free(AioTaskPool *); + +/* error code of failed task or 0 if all is OK */ +int aio_task_pool_status(AioTaskPool *pool); + +bool aio_task_pool_empty(AioTaskPool *pool); + +/* User provides filled @task, however task->pool will be set automatically */ +void coroutine_fn aio_task_pool_start_task(AioTaskPool *pool, AioTask *task); + +void coroutine_fn aio_task_pool_wait_slot(AioTaskPool *pool); +void coroutine_fn aio_task_pool_wait_one(AioTaskPool *pool); +void coroutine_fn aio_task_pool_wait_all(AioTaskPool *pool); + +#endif /* BLOCK_AIO_TASK_H */ diff --git a/include/block/block-copy.h b/include/block/block-copy.h new file mode 100644 index 0000000000..e2e135ff1b --- /dev/null +++ b/include/block/block-copy.h @@ -0,0 +1,93 @@ +/* + * block_copy API + * + * Copyright (C) 2013 Proxmox Server Solutions + * Copyright (c) 2019 Virtuozzo International GmbH. + * + * Authors: + * Dietmar Maurer (dietmar@proxmox.com) + * Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef BLOCK_COPY_H +#define BLOCK_COPY_H + +#include "block/block.h" + +typedef struct BlockCopyInFlightReq { + int64_t start_byte; + int64_t end_byte; + QLIST_ENTRY(BlockCopyInFlightReq) list; + CoQueue wait_queue; /* coroutines blocked on this request */ +} BlockCopyInFlightReq; + +typedef void (*ProgressBytesCallbackFunc)(int64_t bytes, void *opaque); +typedef void (*ProgressResetCallbackFunc)(void *opaque); +typedef struct BlockCopyState { + /* + * BdrvChild objects are not owned or managed by block-copy. They are + * provided by block-copy user and user is responsible for appropriate + * permissions on these children. + */ + BdrvChild *source; + BdrvChild *target; + BdrvDirtyBitmap *copy_bitmap; + int64_t cluster_size; + bool use_copy_range; + int64_t copy_range_size; + uint64_t len; + QLIST_HEAD(, BlockCopyInFlightReq) inflight_reqs; + + BdrvRequestFlags write_flags; + + /* + * skip_unallocated: + * + * Used by sync=top jobs, which first scan the source node for unallocated + * areas and clear them in the copy_bitmap. During this process, the bitmap + * is thus not fully initialized: It may still have bits set for areas that + * are unallocated and should actually not be copied. + * + * This is indicated by skip_unallocated. + * + * In this case, block_copy() will query the source’s allocation status, + * skip unallocated regions, clear them in the copy_bitmap, and invoke + * block_copy_reset_unallocated() every time it does. + */ + bool skip_unallocated; + + /* progress_bytes_callback: called when some copying progress is done. */ + ProgressBytesCallbackFunc progress_bytes_callback; + + /* + * progress_reset_callback: called when some bytes reset from copy_bitmap + * (see @skip_unallocated above). The callee is assumed to recalculate how + * many bytes remain based on the dirty bit count of copy_bitmap. + */ + ProgressResetCallbackFunc progress_reset_callback; + void *progress_opaque; +} BlockCopyState; + +BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target, + int64_t cluster_size, + BdrvRequestFlags write_flags, + Error **errp); + +void block_copy_set_callbacks( + BlockCopyState *s, + ProgressBytesCallbackFunc progress_bytes_callback, + ProgressResetCallbackFunc progress_reset_callback, + void *progress_opaque); + +void block_copy_state_free(BlockCopyState *s); + +int64_t block_copy_reset_unallocated(BlockCopyState *s, + int64_t offset, int64_t *count); + +int coroutine_fn block_copy(BlockCopyState *s, int64_t start, uint64_t bytes, + bool *error_is_read); + +#endif /* BLOCK_COPY_H */ diff --git a/include/block/block.h b/include/block/block.h index 37c9de7446..792bb826db 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -501,6 +501,7 @@ int bdrv_get_flags(BlockDriverState *bs); int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs, Error **errp); +BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs); void bdrv_round_to_clusters(BlockDriverState *bs, int64_t offset, int64_t bytes, int64_t *cluster_offset, diff --git a/include/block/block_int.h b/include/block/block_int.h index 0422acdf1c..05056b308a 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -366,6 +366,7 @@ struct BlockDriver { int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi); ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs, Error **errp); + BlockStatsSpecific *(*bdrv_get_specific_stats)(BlockDriverState *bs); int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs, QEMUIOVector *qiov, @@ -1196,6 +1197,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, BdrvDirtyBitmap *sync_bitmap, BitmapSyncMode bitmap_mode, bool compress, + const char *filter_node_name, BlockdevOnError on_source_error, BlockdevOnError on_target_error, int creation_flags, diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 49db07ba0b..04795c49bf 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -72,6 +72,23 @@ void QEMU_NORETURN cpu_loop_exit(CPUState *cpu); void QEMU_NORETURN cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc); void QEMU_NORETURN cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc); +/** + * cpu_loop_exit_requested: + * @cpu: The CPU state to be tested + * + * Indicate if somebody asked for a return of the CPU to the main loop + * (e.g., via cpu_exit() or cpu_interrupt()). + * + * This is helpful for architectures that support interruptible + * instructions. After writing back all state to registers/memory, this + * call can be used to check if it makes sense to return to the main loop + * or to continue executing the interruptible instruction. + */ +static inline bool cpu_loop_exit_requested(CPUState *cpu) +{ + return (int32_t)atomic_read(&cpu_neg(cpu)->icount_decr.u32) < 0; +} + #if !defined(CONFIG_USER_ONLY) void cpu_reloading_memory_map(void); /** diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index e96e621de5..ad158bb247 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -193,30 +193,29 @@ static inline bool cpu_physical_memory_get_dirty(ram_addr_t start, end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; page = start >> TARGET_PAGE_BITS; - rcu_read_lock(); - - blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); + WITH_RCU_READ_LOCK_GUARD() { + blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); + + idx = page / DIRTY_MEMORY_BLOCK_SIZE; + offset = page % DIRTY_MEMORY_BLOCK_SIZE; + base = page - offset; + while (page < end) { + unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); + unsigned long num = next - base; + unsigned long found = find_next_bit(blocks->blocks[idx], + num, offset); + if (found < num) { + dirty = true; + break; + } - idx = page / DIRTY_MEMORY_BLOCK_SIZE; - offset = page % DIRTY_MEMORY_BLOCK_SIZE; - base = page - offset; - while (page < end) { - unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); - unsigned long num = next - base; - unsigned long found = find_next_bit(blocks->blocks[idx], num, offset); - if (found < num) { - dirty = true; - break; + page = next; + idx++; + offset = 0; + base += DIRTY_MEMORY_BLOCK_SIZE; } - - page = next; - idx++; - offset = 0; - base += DIRTY_MEMORY_BLOCK_SIZE; } - rcu_read_unlock(); - return dirty; } @@ -234,7 +233,7 @@ static inline bool cpu_physical_memory_all_dirty(ram_addr_t start, end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; page = start >> TARGET_PAGE_BITS; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); @@ -256,8 +255,6 @@ static inline bool cpu_physical_memory_all_dirty(ram_addr_t start, base += DIRTY_MEMORY_BLOCK_SIZE; } - rcu_read_unlock(); - return dirty; } @@ -309,13 +306,11 @@ static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr, idx = page / DIRTY_MEMORY_BLOCK_SIZE; offset = page % DIRTY_MEMORY_BLOCK_SIZE; - rcu_read_lock(); + RCU_READ_LOCK_GUARD(); blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); set_bit_atomic(offset, blocks->blocks[idx]); - - rcu_read_unlock(); } static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start, @@ -334,39 +329,37 @@ static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start, end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; page = start >> TARGET_PAGE_BITS; - rcu_read_lock(); + WITH_RCU_READ_LOCK_GUARD() { + for (i = 0; i < DIRTY_MEMORY_NUM; i++) { + blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i]); + } - for (i = 0; i < DIRTY_MEMORY_NUM; i++) { - blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i]); - } + idx = page / DIRTY_MEMORY_BLOCK_SIZE; + offset = page % DIRTY_MEMORY_BLOCK_SIZE; + base = page - offset; + while (page < end) { + unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); - idx = page / DIRTY_MEMORY_BLOCK_SIZE; - offset = page % DIRTY_MEMORY_BLOCK_SIZE; - base = page - offset; - while (page < end) { - unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); + if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) { + bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx], + offset, next - page); + } + if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) { + bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx], + offset, next - page); + } + if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) { + bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx], + offset, next - page); + } - if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) { - bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx], - offset, next - page); + page = next; + idx++; + offset = 0; + base += DIRTY_MEMORY_BLOCK_SIZE; } - if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) { - bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx], - offset, next - page); - } - if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) { - bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx], - offset, next - page); - } - - page = next; - idx++; - offset = 0; - base += DIRTY_MEMORY_BLOCK_SIZE; } - rcu_read_unlock(); - xen_hvm_modified_memory(start, length); } @@ -396,36 +389,35 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, offset = BIT_WORD((start >> TARGET_PAGE_BITS) % DIRTY_MEMORY_BLOCK_SIZE); - rcu_read_lock(); + WITH_RCU_READ_LOCK_GUARD() { + for (i = 0; i < DIRTY_MEMORY_NUM; i++) { + blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i])->blocks; + } - for (i = 0; i < DIRTY_MEMORY_NUM; i++) { - blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i])->blocks; - } + for (k = 0; k < nr; k++) { + if (bitmap[k]) { + unsigned long temp = leul_to_cpu(bitmap[k]); - for (k = 0; k < nr; k++) { - if (bitmap[k]) { - unsigned long temp = leul_to_cpu(bitmap[k]); + atomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp); - atomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp); + if (global_dirty_log) { + atomic_or(&blocks[DIRTY_MEMORY_MIGRATION][idx][offset], + temp); + } - if (global_dirty_log) { - atomic_or(&blocks[DIRTY_MEMORY_MIGRATION][idx][offset], - temp); + if (tcg_enabled()) { + atomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], + temp); + } } - if (tcg_enabled()) { - atomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], temp); + if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { + offset = 0; + idx++; } } - - if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { - offset = 0; - idx++; - } } - rcu_read_unlock(); - xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS); } else { uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE; diff --git a/include/migration/misc.h b/include/migration/misc.h index b9d8e787af..d2762257aa 100644 --- a/include/migration/misc.h +++ b/include/migration/misc.h @@ -60,6 +60,7 @@ void migration_object_init(void); void migration_shutdown(void); void qemu_start_incoming_migration(const char *uri, Error **errp); bool migration_is_idle(void); +bool migration_is_active(MigrationState *); void add_migration_state_change_notifier(Notifier *notify); void remove_migration_state_change_notifier(Notifier *notify); bool migration_in_setup(MigrationState *); diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 1fbfd099dd..b9ee563aa4 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -224,6 +224,7 @@ extern const VMStateInfo vmstate_info_unused_buffer; extern const VMStateInfo vmstate_info_tmp; extern const VMStateInfo vmstate_info_bitmap; extern const VMStateInfo vmstate_info_qtailq; +extern const VMStateInfo vmstate_info_gtree; #define type_check_2darray(t1,t2,n,m) ((t1(*)[n][m])0 - (t2*)0) /* @@ -754,6 +755,45 @@ extern const VMStateInfo vmstate_info_qtailq; .start = offsetof(_type, _next), \ } +/* + * For migrating a GTree whose key is a pointer to _key_type and the + * value, a pointer to _val_type + * The target tree must have been properly initialized + * _vmsd: Start address of the 2 element array containing the data vmsd + * and the key vmsd, in that order + * _key_type: type of the key + * _val_type: type of the value + */ +#define VMSTATE_GTREE_V(_field, _state, _version, _vmsd, \ + _key_type, _val_type) \ +{ \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .vmsd = (_vmsd), \ + .info = &vmstate_info_gtree, \ + .start = sizeof(_key_type), \ + .size = sizeof(_val_type), \ + .offset = offsetof(_state, _field), \ +} + +/* + * For migrating a GTree with direct key and the value a pointer + * to _val_type + * The target tree must have been properly initialized + * _vmsd: data vmsd + * _val_type: type of the value + */ +#define VMSTATE_GTREE_DIRECT_KEY_V(_field, _state, _version, _vmsd, _val_type) \ +{ \ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .vmsd = (_vmsd), \ + .info = &vmstate_info_gtree, \ + .start = 0, \ + .size = sizeof(_val_type), \ + .offset = offsetof(_state, _field), \ +} + /* _f : field name _f_n : num of elements field_name _n : num of elements diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h index 22876d1428..9c82683e37 100644 --- a/include/qemu/rcu.h +++ b/include/qemu/rcu.h @@ -154,6 +154,31 @@ extern void call_rcu1(struct rcu_head *head, RCUCBFunc *func); }), \ (RCUCBFunc *)g_free); +typedef void RCUReadAuto; +static inline RCUReadAuto *rcu_read_auto_lock(void) +{ + rcu_read_lock(); + /* Anything non-NULL causes the cleanup function to be called */ + return (void *)(uintptr_t)0x1; +} + +static inline void rcu_read_auto_unlock(RCUReadAuto *r) +{ + rcu_read_unlock(); +} + +G_DEFINE_AUTOPTR_CLEANUP_FUNC(RCUReadAuto, rcu_read_auto_unlock) + +#define WITH_RCU_READ_LOCK_GUARD() \ + WITH_RCU_READ_LOCK_GUARD_(_rcu_read_auto##__COUNTER__) + +#define WITH_RCU_READ_LOCK_GUARD_(var) \ + for (g_autoptr(RCUReadAuto) var = rcu_read_auto_lock(); \ + (var); rcu_read_auto_unlock(var), (var) = NULL) + +#define RCU_READ_LOCK_GUARD() \ + g_autoptr(RCUReadAuto) _rcu_read_auto __attribute__((unused)) = rcu_read_auto_lock() + #ifdef __cplusplus } #endif |