diff options
Diffstat (limited to 'util')
| -rw-r--r-- | util/aio-wait.c | 2 | ||||
| -rw-r--r-- | util/async.c | 34 | ||||
| -rw-r--r-- | util/main-loop.c | 6 | ||||
| -rw-r--r-- | util/mmap-alloc.c | 28 | ||||
| -rw-r--r-- | util/qemu-coroutine.c | 10 | ||||
| -rw-r--r-- | util/rcu.c | 69 | ||||
| -rw-r--r-- | util/stats64.c | 11 | ||||
| -rw-r--r-- | util/systemd.c | 1 | ||||
| -rw-r--r-- | util/thread-pool.c | 25 | ||||
| -rw-r--r-- | util/trace-events | 1 |
10 files changed, 141 insertions, 46 deletions
diff --git a/util/aio-wait.c b/util/aio-wait.c index 98c5accd29..b5336cf5fd 100644 --- a/util/aio-wait.c +++ b/util/aio-wait.c @@ -82,5 +82,5 @@ void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque) assert(qemu_get_current_aio_context() == qemu_get_aio_context()); aio_bh_schedule_oneshot(ctx, aio_wait_bh, &data); - AIO_WAIT_WHILE(ctx, !data.done); + AIO_WAIT_WHILE_UNLOCKED(NULL, !data.done); } diff --git a/util/async.c b/util/async.c index 21016a1ac7..055070ffbd 100644 --- a/util/async.c +++ b/util/async.c @@ -65,6 +65,7 @@ struct QEMUBH { void *opaque; QSLIST_ENTRY(QEMUBH) next; unsigned flags; + MemReentrancyGuard *reentrancy_guard; }; /* Called concurrently from any thread */ @@ -137,7 +138,7 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, } QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, - const char *name) + const char *name, MemReentrancyGuard *reentrancy_guard) { QEMUBH *bh; bh = g_new(QEMUBH, 1); @@ -146,13 +147,30 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, .cb = cb, .opaque = opaque, .name = name, + .reentrancy_guard = reentrancy_guard, }; return bh; } void aio_bh_call(QEMUBH *bh) { + bool last_engaged_in_io = false; + + /* Make a copy of the guard-pointer as cb may free the bh */ + MemReentrancyGuard *reentrancy_guard = bh->reentrancy_guard; + if (reentrancy_guard) { + last_engaged_in_io = reentrancy_guard->engaged_in_io; + if (reentrancy_guard->engaged_in_io) { + trace_reentrant_aio(bh->ctx, bh->name); + } + reentrancy_guard->engaged_in_io = true; + } + bh->cb(bh->opaque); + + if (reentrancy_guard) { + reentrancy_guard->engaged_in_io = last_engaged_in_io; + } } /* Multiple occurrences of aio_bh_poll cannot be called concurrently. */ @@ -164,7 +182,21 @@ int aio_bh_poll(AioContext *ctx) /* Synchronizes with QSLIST_INSERT_HEAD_ATOMIC in aio_bh_enqueue(). */ QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list); + + /* + * GCC13 [-Werror=dangling-pointer=] complains that the local variable + * 'slice' is being stored in the global 'ctx->bh_slice_list' but the + * list is emptied before this function returns. + */ +#if !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpragmas" +#pragma GCC diagnostic ignored "-Wdangling-pointer=" +#endif QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next); +#if !defined(__clang__) +#pragma GCC diagnostic pop +#endif while ((s = QSIMPLEQ_FIRST(&ctx->bh_slice_list))) { QEMUBH *bh; diff --git a/util/main-loop.c b/util/main-loop.c index e180c85145..7022f02ef8 100644 --- a/util/main-loop.c +++ b/util/main-loop.c @@ -605,9 +605,11 @@ void main_loop_wait(int nonblocking) /* Functions to operate on the main QEMU AioContext. */ -QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name) +QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, + MemReentrancyGuard *reentrancy_guard) { - return aio_bh_new_full(qemu_aio_context, cb, opaque, name); + return aio_bh_new_full(qemu_aio_context, cb, opaque, name, + reentrancy_guard); } /* diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c index 5ed7d29183..ed14f9c64d 100644 --- a/util/mmap-alloc.c +++ b/util/mmap-alloc.c @@ -27,8 +27,36 @@ #ifdef CONFIG_LINUX #include <sys/vfs.h> +#include <linux/magic.h> #endif +QemuFsType qemu_fd_getfs(int fd) +{ +#ifdef CONFIG_LINUX + struct statfs fs; + int ret; + + if (fd < 0) { + return QEMU_FS_TYPE_UNKNOWN; + } + + do { + ret = fstatfs(fd, &fs); + } while (ret != 0 && errno == EINTR); + + switch (fs.f_type) { + case TMPFS_MAGIC: + return QEMU_FS_TYPE_TMPFS; + case HUGETLBFS_MAGIC: + return QEMU_FS_TYPE_HUGETLBFS; + default: + return QEMU_FS_TYPE_UNKNOWN; + } +#else + return QEMU_FS_TYPE_UNKNOWN; +#endif +} + size_t qemu_fd_getpagesize(int fd) { #ifdef CONFIG_LINUX diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c index 8494523692..17a88f6505 100644 --- a/util/qemu-coroutine.c +++ b/util/qemu-coroutine.c @@ -127,9 +127,13 @@ void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co) Coroutine *to = QSIMPLEQ_FIRST(&pending); CoroutineAction ret; - /* Cannot rely on the read barrier for to in aio_co_wake(), as there are - * callers outside of aio_co_wake() */ - const char *scheduled = qatomic_mb_read(&to->scheduled); + /* + * Read to before to->scheduled; pairs with qatomic_cmpxchg in + * qemu_co_sleep(), aio_co_schedule() etc. + */ + smp_read_barrier_depends(); + + const char *scheduled = qatomic_read(&to->scheduled); QSIMPLEQ_REMOVE_HEAD(&pending, co_queue_next); diff --git a/util/rcu.c b/util/rcu.c index b6d6c71cff..30a7e22026 100644 --- a/util/rcu.c +++ b/util/rcu.c @@ -83,12 +83,6 @@ static void wait_for_readers(void) */ qemu_event_reset(&rcu_gp_event); - /* Instead of using qatomic_mb_set for index->waiting, and - * qatomic_mb_read for index->ctr, memory barriers are placed - * manually since writes to different threads are independent. - * qemu_event_reset has acquire semantics, so no memory barrier - * is needed here. - */ QLIST_FOREACH(index, ®istry, node) { qatomic_set(&index->waiting, true); } @@ -96,6 +90,10 @@ static void wait_for_readers(void) /* Here, order the stores to index->waiting before the loads of * index->ctr. Pairs with smp_mb_placeholder() in rcu_read_unlock(), * ensuring that the loads of index->ctr are sequentially consistent. + * + * If this is the last iteration, this barrier also prevents + * frees from seeping upwards, and orders the two wait phases + * on architectures with 32-bit longs; see synchronize_rcu(). */ smp_mb_global(); @@ -104,7 +102,7 @@ static void wait_for_readers(void) QLIST_REMOVE(index, node); QLIST_INSERT_HEAD(&qsreaders, index, node); - /* No need for mb_set here, worst of all we + /* No need for memory barriers here, worst of all we * get some extra futex wakeups. */ qatomic_set(&index->waiting, false); @@ -149,26 +147,26 @@ void synchronize_rcu(void) /* Write RCU-protected pointers before reading p_rcu_reader->ctr. * Pairs with smp_mb_placeholder() in rcu_read_lock(). + * + * Also orders write to RCU-protected pointers before + * write to rcu_gp_ctr. */ smp_mb_global(); QEMU_LOCK_GUARD(&rcu_registry_lock); if (!QLIST_EMPTY(®istry)) { - /* In either case, the qatomic_mb_set below blocks stores that free - * old RCU-protected pointers. - */ if (sizeof(rcu_gp_ctr) < 8) { /* For architectures with 32-bit longs, a two-subphases algorithm * ensures we do not encounter overflow bugs. * * Switch parity: 0 -> 1, 1 -> 0. */ - qatomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR); + qatomic_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR); wait_for_readers(); - qatomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR); + qatomic_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR); } else { /* Increment current grace period. */ - qatomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR); + qatomic_set(&rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR); } wait_for_readers(); @@ -191,8 +189,22 @@ static void enqueue(struct rcu_head *node) struct rcu_head **old_tail; node->next = NULL; + + /* + * Make this node the tail of the list. The node will be + * used by further enqueue operations, but it will not + * be dequeued yet... + */ old_tail = qatomic_xchg(&tail, &node->next); - qatomic_mb_set(old_tail, node); + + /* + * ... until it is pointed to from another item in the list. + * In the meantime, try_dequeue() will find a NULL next pointer + * and loop. + * + * Synchronizes with qatomic_load_acquire() in try_dequeue(). + */ + qatomic_store_release(old_tail, node); } static struct rcu_head *try_dequeue(void) @@ -200,26 +212,31 @@ static struct rcu_head *try_dequeue(void) struct rcu_head *node, *next; retry: - /* Test for an empty list, which we do not expect. Note that for + /* Head is only written by this thread, so no need for barriers. */ + node = head; + + /* + * If the head node has NULL in its next pointer, the value is + * wrong and we need to wait until its enqueuer finishes the update. + */ + next = qatomic_load_acquire(&node->next); + if (!next) { + return NULL; + } + + /* + * Test for an empty list, which we do not expect. Note that for * the consumer head and tail are always consistent. The head * is consistent because only the consumer reads/writes it. * The tail, because it is the first step in the enqueuing. * It is only the next pointers that might be inconsistent. */ - if (head == &dummy && qatomic_mb_read(&tail) == &dummy.next) { + if (head == &dummy && qatomic_read(&tail) == &dummy.next) { abort(); } - /* If the head node has NULL in its next pointer, the value is - * wrong and we need to wait until its enqueuer finishes the update. - */ - node = head; - next = qatomic_mb_read(&head->next); - if (!next) { - return NULL; - } - - /* Since we are the sole consumer, and we excluded the empty case + /* + * Since we are the sole consumer, and we excluded the empty case * above, the queue will always have at least two nodes: the * dummy node, and the one being removed. So we do not need to update * the tail pointer. diff --git a/util/stats64.c b/util/stats64.c index 897613c949..09736014ec 100644 --- a/util/stats64.c +++ b/util/stats64.c @@ -57,6 +57,17 @@ uint64_t stat64_get(const Stat64 *s) return ((uint64_t)high << 32) | low; } +void stat64_set(Stat64 *s, uint64_t val) +{ + while (!stat64_wrtrylock(s)) { + cpu_relax(); + } + + qatomic_set(&s->high, val >> 32); + qatomic_set(&s->low, val); + stat64_wrunlock(s); +} + bool stat64_add32_carry(Stat64 *s, uint32_t low, uint32_t high) { uint32_t old; diff --git a/util/systemd.c b/util/systemd.c index 5bcac9b401..ced518f771 100644 --- a/util/systemd.c +++ b/util/systemd.c @@ -51,6 +51,7 @@ unsigned int check_socket_activation(void) /* So these are not passed to any child processes we might start. */ unsetenv("LISTEN_FDS"); unsetenv("LISTEN_PID"); + unsetenv("LISTEN_FDNAMES"); /* So the file descriptors don't leak into child processes. */ for (i = 0; i < nr_fds; ++i) { diff --git a/util/thread-pool.c b/util/thread-pool.c index 31113b5860..0d97888df0 100644 --- a/util/thread-pool.c +++ b/util/thread-pool.c @@ -48,7 +48,7 @@ struct ThreadPoolElement { /* Access to this list is protected by lock. */ QTAILQ_ENTRY(ThreadPoolElement) reqs; - /* Access to this list is protected by the global mutex. */ + /* This list is only written by the thread pool's mother thread. */ QLIST_ENTRY(ThreadPoolElement) all; }; @@ -175,7 +175,6 @@ static void thread_pool_completion_bh(void *opaque) ThreadPool *pool = opaque; ThreadPoolElement *elem, *next; - aio_context_acquire(pool->ctx); restart: QLIST_FOREACH_SAFE(elem, &pool->head, all, next) { if (elem->state != THREAD_DONE) { @@ -195,9 +194,7 @@ restart: */ qemu_bh_schedule(pool->completion_bh); - aio_context_release(pool->ctx); elem->common.cb(elem->common.opaque, elem->ret); - aio_context_acquire(pool->ctx); /* We can safely cancel the completion_bh here regardless of someone * else having scheduled it meanwhile because we reenter the @@ -211,7 +208,6 @@ restart: qemu_aio_unref(elem); } } - aio_context_release(pool->ctx); } static void thread_pool_cancel(BlockAIOCB *acb) @@ -245,11 +241,15 @@ static const AIOCBInfo thread_pool_aiocb_info = { .get_aio_context = thread_pool_get_aio_context, }; -BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool, - ThreadPoolFunc *func, void *arg, - BlockCompletionFunc *cb, void *opaque) +BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg, + BlockCompletionFunc *cb, void *opaque) { ThreadPoolElement *req; + AioContext *ctx = qemu_get_current_aio_context(); + ThreadPool *pool = aio_get_thread_pool(ctx); + + /* Assert that the thread submitting work is the same running the pool */ + assert(pool->ctx == qemu_get_current_aio_context()); req = qemu_aio_get(&thread_pool_aiocb_info, NULL, cb, opaque); req->func = func; @@ -284,19 +284,18 @@ static void thread_pool_co_cb(void *opaque, int ret) aio_co_wake(co->co); } -int coroutine_fn thread_pool_submit_co(ThreadPool *pool, ThreadPoolFunc *func, - void *arg) +int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg) { ThreadPoolCo tpc = { .co = qemu_coroutine_self(), .ret = -EINPROGRESS }; assert(qemu_in_coroutine()); - thread_pool_submit_aio(pool, func, arg, thread_pool_co_cb, &tpc); + thread_pool_submit_aio(func, arg, thread_pool_co_cb, &tpc); qemu_coroutine_yield(); return tpc.ret; } -void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg) +void thread_pool_submit(ThreadPoolFunc *func, void *arg) { - thread_pool_submit_aio(pool, func, arg, NULL, NULL); + thread_pool_submit_aio(func, arg, NULL, NULL); } void thread_pool_update_params(ThreadPool *pool, AioContext *ctx) diff --git a/util/trace-events b/util/trace-events index 16f78d8fe5..3f7e766683 100644 --- a/util/trace-events +++ b/util/trace-events @@ -11,6 +11,7 @@ poll_remove(void *ctx, void *node, int fd) "ctx %p node %p fd %d" # async.c aio_co_schedule(void *ctx, void *co) "ctx %p co %p" aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p" +reentrant_aio(void *ctx, const char *name) "ctx %p name %s" # thread-pool.c thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p" |