From 9f02cfc84b85929947b32fe1674fbc6a429f332a Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 2 Dec 2014 11:23:14 +0000 Subject: bitmap: add atomic set functions Use atomic_or() for atomic bitmaps where several threads may set bits at the same time. This avoids the race condition between threads loading an element, bitwise ORing, and then storing the element. When setting all bits in a word we can avoid atomic ops and instead just use an smp_mb() at the end. Most bitmap users don't need atomicity so introduce new functions. Signed-off-by: Stefan Hajnoczi Message-Id: <1417519399-3166-2-git-send-email-stefanha@redhat.com> [Avoid barrier in the single word case, use full barrier instead of write. - Paolo] Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- include/qemu/bitmap.h | 2 ++ include/qemu/bitops.h | 14 ++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'include/qemu') diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h index f0273c965f..3e0a4f3e0c 100644 --- a/include/qemu/bitmap.h +++ b/include/qemu/bitmap.h @@ -39,6 +39,7 @@ * bitmap_empty(src, nbits) Are all bits zero in *src? * bitmap_full(src, nbits) Are all bits set in *src? * bitmap_set(dst, pos, nbits) Set specified bit area + * bitmap_set_atomic(dst, pos, nbits) Set specified bit area with atomic ops * bitmap_clear(dst, pos, nbits) Clear specified bit area * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area */ @@ -226,6 +227,7 @@ static inline int bitmap_intersects(const unsigned long *src1, } void bitmap_set(unsigned long *map, long i, long len); +void bitmap_set_atomic(unsigned long *map, long i, long len); void bitmap_clear(unsigned long *map, long start, long nr); unsigned long bitmap_find_next_zero_area(unsigned long *map, unsigned long size, diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h index 8abdcf9077..8164225152 100644 --- a/include/qemu/bitops.h +++ b/include/qemu/bitops.h @@ -16,6 +16,7 @@ #include #include "host-utils.h" +#include "atomic.h" #define BITS_PER_BYTE CHAR_BIT #define BITS_PER_LONG (sizeof (unsigned long) * BITS_PER_BYTE) @@ -38,6 +39,19 @@ static inline void set_bit(long nr, unsigned long *addr) *p |= mask; } +/** + * set_bit_atomic - Set a bit in memory atomically + * @nr: the bit to set + * @addr: the address to start counting from + */ +static inline void set_bit_atomic(long nr, unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = addr + BIT_WORD(nr); + + atomic_or(p, mask); +} + /** * clear_bit - Clears a bit in memory * @nr: Bit to clear -- cgit 1.4.1 From 36546e5b803f6e363906607307f27c489441fd15 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 2 Dec 2014 11:23:15 +0000 Subject: bitmap: add atomic test and clear The new bitmap_test_and_clear_atomic() function clears a range and returns whether or not the bits were set. Signed-off-by: Stefan Hajnoczi Message-Id: <1417519399-3166-3-git-send-email-stefanha@redhat.com> [Test before xchg; then a full barrier is needed at the end just like in the previous patch. The barrier can be avoided if we did at least one xchg. - Paolo] Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- include/qemu/bitmap.h | 2 ++ util/bitmap.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) (limited to 'include/qemu') diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h index 3e0a4f3e0c..86dd9cd5fc 100644 --- a/include/qemu/bitmap.h +++ b/include/qemu/bitmap.h @@ -41,6 +41,7 @@ * bitmap_set(dst, pos, nbits) Set specified bit area * bitmap_set_atomic(dst, pos, nbits) Set specified bit area with atomic ops * bitmap_clear(dst, pos, nbits) Clear specified bit area + * bitmap_test_and_clear_atomic(dst, pos, nbits) Test and clear area * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area */ @@ -229,6 +230,7 @@ static inline int bitmap_intersects(const unsigned long *src1, void bitmap_set(unsigned long *map, long i, long len); void bitmap_set_atomic(unsigned long *map, long i, long len); void bitmap_clear(unsigned long *map, long start, long nr); +bool bitmap_test_and_clear_atomic(unsigned long *map, long start, long nr); unsigned long bitmap_find_next_zero_area(unsigned long *map, unsigned long size, unsigned long start, diff --git a/util/bitmap.c b/util/bitmap.c index 39994af675..300a68e38c 100644 --- a/util/bitmap.c +++ b/util/bitmap.c @@ -235,6 +235,51 @@ void bitmap_clear(unsigned long *map, long start, long nr) } } +bool bitmap_test_and_clear_atomic(unsigned long *map, long start, long nr) +{ + unsigned long *p = map + BIT_WORD(start); + const long size = start + nr; + int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); + unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); + unsigned long dirty = 0; + unsigned long old_bits; + + /* First word */ + if (nr - bits_to_clear > 0) { + old_bits = atomic_fetch_and(p, ~mask_to_clear); + dirty |= old_bits & mask_to_clear; + nr -= bits_to_clear; + bits_to_clear = BITS_PER_LONG; + mask_to_clear = ~0UL; + p++; + } + + /* Full words */ + if (bits_to_clear == BITS_PER_LONG) { + while (nr >= BITS_PER_LONG) { + if (*p) { + old_bits = atomic_xchg(p, 0); + dirty |= old_bits; + } + nr -= BITS_PER_LONG; + p++; + } + } + + /* Last word */ + if (nr) { + mask_to_clear &= BITMAP_LAST_WORD_MASK(size); + old_bits = atomic_fetch_and(p, ~mask_to_clear); + dirty |= old_bits & mask_to_clear; + } else { + if (!dirty) { + smp_mb(); + } + } + + return dirty != 0; +} + #define ALIGN_MASK(x,mask) (((x)+(mask))&~(mask)) /** -- cgit 1.4.1 From 3bbf572345c65813f86a8fc434ea1b23beb08e16 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 3 Jun 2015 14:21:20 +0200 Subject: atomics: add explicit compiler fence in __atomic memory barriers __atomic_thread_fence does not include a compiler barrier; in the C++11 memory model, fences take effect in combination with other atomic operations. GCC implements this by making __atomic_load and __atomic_store access memory as if the pointer was volatile, and leaves no trace whatsoever of acquire and release fences in the compiler's intermediate representation. In QEMU, we want memory barriers to act on all memory, but at the same time we would like to use __atomic_thread_fence for portability reasons. Add compiler barriers manually around the __atomic_thread_fence. Message-Id: <1433334080-14912-1-git-send-email-pbonzini@redhat.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Paolo Bonzini --- include/qemu/atomic.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/qemu') diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h index 98e05ca875..bd2c075343 100644 --- a/include/qemu/atomic.h +++ b/include/qemu/atomic.h @@ -99,7 +99,13 @@ #ifndef smp_wmb #ifdef __ATOMIC_RELEASE -#define smp_wmb() __atomic_thread_fence(__ATOMIC_RELEASE) +/* __atomic_thread_fence does not include a compiler barrier; instead, + * the barrier is part of __atomic_load/__atomic_store's "volatile-like" + * semantics. If smp_wmb() is a no-op, absence of the barrier means that + * the compiler is free to reorder stores on each side of the barrier. + * Add one here, and similarly in smp_rmb() and smp_read_barrier_depends(). + */ +#define smp_wmb() ({ barrier(); __atomic_thread_fence(__ATOMIC_RELEASE); barrier(); }) #else #define smp_wmb() __sync_synchronize() #endif @@ -107,7 +113,7 @@ #ifndef smp_rmb #ifdef __ATOMIC_ACQUIRE -#define smp_rmb() __atomic_thread_fence(__ATOMIC_ACQUIRE) +#define smp_rmb() ({ barrier(); __atomic_thread_fence(__ATOMIC_ACQUIRE); barrier(); }) #else #define smp_rmb() __sync_synchronize() #endif @@ -115,7 +121,7 @@ #ifndef smp_read_barrier_depends #ifdef __ATOMIC_CONSUME -#define smp_read_barrier_depends() __atomic_thread_fence(__ATOMIC_CONSUME) +#define smp_read_barrier_depends() ({ barrier(); __atomic_thread_fence(__ATOMIC_CONSUME); barrier(); }) #else #define smp_read_barrier_depends() barrier() #endif -- cgit 1.4.1