diff options
Diffstat (limited to 'include')
| -rw-r--r-- | include/block/aio.h | 38 | ||||
| -rw-r--r-- | include/block/block.h | 2 | ||||
| -rw-r--r-- | include/block/block_int.h | 3 | ||||
| -rw-r--r-- | include/exec/exec-all.h | 14 | ||||
| -rw-r--r-- | include/qemu/futex.h | 36 | ||||
| -rw-r--r-- | include/qemu/thread.h | 112 |
6 files changed, 173 insertions, 32 deletions
diff --git a/include/block/aio.h b/include/block/aio.h index 4dca54d9c7..7df271d2b9 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -53,18 +53,12 @@ struct LinuxAioState; struct AioContext { GSource source; - /* Protects all fields from multi-threaded access */ + /* Used by AioContext users to protect from multi-threaded access. */ QemuRecMutex lock; - /* The list of registered AIO handlers */ + /* The list of registered AIO handlers. Protected by ctx->list_lock. */ QLIST_HEAD(, AioHandler) aio_handlers; - /* This is a simple lock used to protect the aio_handlers list. - * Specifically, it's used to ensure that no callbacks are removed while - * we're walking and dispatching callbacks. - */ - int walking_handlers; - /* Used to avoid unnecessary event_notifier_set calls in aio_notify; * accessed with atomic primitives. If this field is 0, everything * (file descriptors, bottom halves, timers) will be re-evaluated @@ -90,17 +84,15 @@ struct AioContext { */ uint32_t notify_me; - /* lock to protect between bh's adders and deleter */ - QemuMutex bh_lock; + /* A lock to protect between QEMUBH and AioHandler adders and deleter, + * and to ensure that no callbacks are removed while we're walking and + * dispatching them. + */ + QemuLockCnt list_lock; /* Anchor of the list of Bottom Halves belonging to the context */ struct QEMUBH *first_bh; - /* A simple lock used to protect the first_bh list, and ensure that - * no callbacks are removed while we're walking and dispatching callbacks. - */ - int walking_bh; - /* Used by aio_notify. * * "notified" is used to avoid expensive event_notifier_test_and_clear @@ -116,7 +108,9 @@ struct AioContext { bool notified; EventNotifier notifier; - /* Thread pool for performing work and receiving completion callbacks */ + /* Thread pool for performing work and receiving completion callbacks. + * Has its own locking. + */ struct ThreadPool *thread_pool; #ifdef CONFIG_LINUX_AIO @@ -126,7 +120,9 @@ struct AioContext { struct LinuxAioState *linux_aio; #endif - /* TimerLists for calling timers - one per clock type */ + /* TimerLists for calling timers - one per clock type. Has its own + * locking. + */ QEMUTimerListGroup tlg; int external_disable_cnt; @@ -180,9 +176,11 @@ void aio_context_unref(AioContext *ctx); * automatically takes care of calling aio_context_acquire and * aio_context_release. * - * Access to timers and BHs from a thread that has not acquired AioContext - * is possible. Access to callbacks for now must be done while the AioContext - * is owned by the thread (FIXME). + * Note that this is separate from bdrv_drained_begin/bdrv_drained_end. A + * thread still has to call those to avoid being interrupted by the guest. + * + * Bottom halves, timers and callbacks can be created or removed without + * acquiring the AioContext. */ void aio_context_acquire(AioContext *ctx); diff --git a/include/block/block.h b/include/block/block.h index 49bb0b239a..8b0dcdaa70 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -526,8 +526,6 @@ int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo); void bdrv_io_plug(BlockDriverState *bs); void bdrv_io_unplug(BlockDriverState *bs); -void bdrv_io_unplugged_begin(BlockDriverState *bs); -void bdrv_io_unplugged_end(BlockDriverState *bs); /** * bdrv_drained_begin: diff --git a/include/block/block_int.h b/include/block/block_int.h index 4e4562d444..2d92d7edfe 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -526,9 +526,8 @@ struct BlockDriverState { uint64_t write_threshold_offset; NotifierWithReturn write_threshold_notifier; - /* counters for nested bdrv_io_plug and bdrv_io_unplugged_begin */ + /* counter for nested bdrv_io_plug */ unsigned io_plugged; - unsigned io_plug_disabled; int quiesce_counter; }; diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index a8c13cee66..bbc9478a50 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -95,15 +95,13 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr); /** * tlb_flush: * @cpu: CPU whose TLB should be flushed - * @flush_global: ignored * - * Flush the entire TLB for the specified CPU. - * The flush_global flag is in theory an indicator of whether the whole - * TLB should be flushed, or only those entries not marked global. - * In practice QEMU does not implement any global/not global flag for - * TLB entries, and the argument is ignored. + * Flush the entire TLB for the specified CPU. Most CPU architectures + * allow the implementation to drop entries from the TLB at any time + * so this is generally safe. If more selective flushing is required + * use one of the other functions for efficiency. */ -void tlb_flush(CPUState *cpu, int flush_global); +void tlb_flush(CPUState *cpu); /** * tlb_flush_page_by_mmuidx: * @cpu: CPU whose TLB should be flushed @@ -165,7 +163,7 @@ static inline void tlb_flush_page(CPUState *cpu, target_ulong addr) { } -static inline void tlb_flush(CPUState *cpu, int flush_global) +static inline void tlb_flush(CPUState *cpu) { } diff --git a/include/qemu/futex.h b/include/qemu/futex.h new file mode 100644 index 0000000000..bb7dc9e296 --- /dev/null +++ b/include/qemu/futex.h @@ -0,0 +1,36 @@ +/* + * Wrappers around Linux futex syscall + * + * Copyright Red Hat, Inc. 2017 + * + * Author: + * Paolo Bonzini <pbonzini@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include <sys/syscall.h> +#include <linux/futex.h> + +#define qemu_futex(...) syscall(__NR_futex, __VA_ARGS__) + +static inline void qemu_futex_wake(void *f, int n) +{ + qemu_futex(f, FUTEX_WAKE, n, NULL, NULL, 0); +} + +static inline void qemu_futex_wait(void *f, unsigned val) +{ + while (qemu_futex(f, FUTEX_WAIT, (int) val, NULL, NULL, 0)) { + switch (errno) { + case EWOULDBLOCK: + return; + case EINTR: + break; /* get out of switch and retry */ + default: + abort(); + } + } +} diff --git a/include/qemu/thread.h b/include/qemu/thread.h index e8e665f020..9910f49b3a 100644 --- a/include/qemu/thread.h +++ b/include/qemu/thread.h @@ -8,6 +8,7 @@ typedef struct QemuMutex QemuMutex; typedef struct QemuCond QemuCond; typedef struct QemuSemaphore QemuSemaphore; typedef struct QemuEvent QemuEvent; +typedef struct QemuLockCnt QemuLockCnt; typedef struct QemuThread QemuThread; #ifdef _WIN32 @@ -98,4 +99,115 @@ static inline void qemu_spin_unlock(QemuSpin *spin) __sync_lock_release(&spin->value); } +struct QemuLockCnt { +#ifndef CONFIG_LINUX + QemuMutex mutex; +#endif + unsigned count; +}; + +/** + * qemu_lockcnt_init: initialize a QemuLockcnt + * @lockcnt: the lockcnt to initialize + * + * Initialize lockcnt's counter to zero and prepare its mutex + * for usage. + */ +void qemu_lockcnt_init(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_destroy: destroy a QemuLockcnt + * @lockcnt: the lockcnt to destruct + * + * Destroy lockcnt's mutex. + */ +void qemu_lockcnt_destroy(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_inc: increment a QemuLockCnt's counter + * @lockcnt: the lockcnt to operate on + * + * If the lockcnt's count is zero, wait for critical sections + * to finish and increment lockcnt's count to 1. If the count + * is not zero, just increment it. + * + * Because this function can wait on the mutex, it must not be + * called while the lockcnt's mutex is held by the current thread. + * For the same reason, qemu_lockcnt_inc can also contribute to + * AB-BA deadlocks. This is a sample deadlock scenario: + * + * thread 1 thread 2 + * ------------------------------------------------------- + * qemu_lockcnt_lock(&lc1); + * qemu_lockcnt_lock(&lc2); + * qemu_lockcnt_inc(&lc2); + * qemu_lockcnt_inc(&lc1); + */ +void qemu_lockcnt_inc(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_dec: decrement a QemuLockCnt's counter + * @lockcnt: the lockcnt to operate on + */ +void qemu_lockcnt_dec(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_dec_and_lock: decrement a QemuLockCnt's counter and + * possibly lock it. + * @lockcnt: the lockcnt to operate on + * + * Decrement lockcnt's count. If the new count is zero, lock + * the mutex and return true. Otherwise, return false. + */ +bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_dec_if_lock: possibly decrement a QemuLockCnt's counter and + * lock it. + * @lockcnt: the lockcnt to operate on + * + * If the count is 1, decrement the count to zero, lock + * the mutex and return true. Otherwise, return false. + */ +bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_lock: lock a QemuLockCnt's mutex. + * @lockcnt: the lockcnt to operate on + * + * Remember that concurrent visits are not blocked unless the count is + * also zero. You can use qemu_lockcnt_count to check for this inside a + * critical section. + */ +void qemu_lockcnt_lock(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_unlock: release a QemuLockCnt's mutex. + * @lockcnt: the lockcnt to operate on. + */ +void qemu_lockcnt_unlock(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_inc_and_unlock: combined unlock/increment on a QemuLockCnt. + * @lockcnt: the lockcnt to operate on. + * + * This is the same as + * + * qemu_lockcnt_unlock(lockcnt); + * qemu_lockcnt_inc(lockcnt); + * + * but more efficient. + */ +void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt); + +/** + * qemu_lockcnt_count: query a LockCnt's count. + * @lockcnt: the lockcnt to query. + * + * Note that the count can change at any time. Still, while the + * lockcnt is locked, one can usefully check whether the count + * is non-zero. + */ +unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt); + #endif |