summary refs log tree commit diff stats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/block/aio-wait.h116
-rw-r--r--include/block/aio.h7
-rw-r--r--include/block/block.h54
-rw-r--r--include/block/block_int.h61
4 files changed, 171 insertions, 67 deletions
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
new file mode 100644
index 0000000000..a48c744fa8
--- /dev/null
+++ b/include/block/aio-wait.h
@@ -0,0 +1,116 @@
+/*
+ * AioContext wait support
+ *
+ * Copyright (C) 2018 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_AIO_WAIT_H
+#define QEMU_AIO_WAIT_H
+
+#include "block/aio.h"
+
+/**
+ * AioWait:
+ *
+ * An object that facilitates synchronous waiting on a condition.  The main
+ * loop can wait on an operation running in an IOThread as follows:
+ *
+ *   AioWait *wait = ...;
+ *   AioContext *ctx = ...;
+ *   MyWork work = { .done = false };
+ *   schedule_my_work_in_iothread(ctx, &work);
+ *   AIO_WAIT_WHILE(wait, ctx, !work.done);
+ *
+ * The IOThread must call aio_wait_kick() to notify the main loop when
+ * work.done changes:
+ *
+ *   static void do_work(...)
+ *   {
+ *       ...
+ *       work.done = true;
+ *       aio_wait_kick(wait);
+ *   }
+ */
+typedef struct {
+    /* Is the main loop waiting for a kick?  Accessed with atomic ops. */
+    bool need_kick;
+} AioWait;
+
+/**
+ * AIO_WAIT_WHILE:
+ * @wait: the aio wait object
+ * @ctx: the aio context
+ * @cond: wait while this conditional expression is true
+ *
+ * Wait while a condition is true.  Use this to implement synchronous
+ * operations that require event loop activity.
+ *
+ * The caller must be sure that something calls aio_wait_kick() when the value
+ * of @cond might have changed.
+ *
+ * The caller's thread must be the IOThread that owns @ctx or the main loop
+ * thread (with @ctx acquired exactly once).  This function cannot be used to
+ * wait on conditions between two IOThreads since that could lead to deadlock,
+ * go via the main loop instead.
+ */
+#define AIO_WAIT_WHILE(wait, ctx, cond) ({                  \
+    bool waited_ = false;                                   \
+    bool busy_ = true;                                      \
+    AioWait *wait_ = (wait);                                \
+    AioContext *ctx_ = (ctx);                               \
+    if (in_aio_context_home_thread(ctx_)) {                 \
+        while ((cond) || busy_) {                           \
+            busy_ = aio_poll(ctx_, (cond));                 \
+            waited_ |= !!(cond) | busy_;                    \
+        }                                                   \
+    } else {                                                \
+        assert(qemu_get_current_aio_context() ==            \
+               qemu_get_aio_context());                     \
+        assert(!wait_->need_kick);                          \
+        /* Set wait_->need_kick before evaluating cond.  */ \
+        atomic_mb_set(&wait_->need_kick, true);             \
+        while (busy_) {                                     \
+            if ((cond)) {                                   \
+                waited_ = busy_ = true;                     \
+                aio_context_release(ctx_);                  \
+                aio_poll(qemu_get_aio_context(), true);     \
+                aio_context_acquire(ctx_);                  \
+            } else {                                        \
+                busy_ = aio_poll(ctx_, false);              \
+                waited_ |= busy_;                           \
+            }                                               \
+        }                                                   \
+        atomic_set(&wait_->need_kick, false);               \
+    }                                                       \
+    waited_; })
+
+/**
+ * aio_wait_kick:
+ * @wait: the aio wait object that should re-evaluate its condition
+ *
+ * Wake up the main thread if it is waiting on AIO_WAIT_WHILE().  During
+ * synchronous operations performed in an IOThread, the main thread lets the
+ * IOThread's event loop run, waiting for the operation to complete.  A
+ * aio_wait_kick() call will wake up the main thread.
+ */
+void aio_wait_kick(AioWait *wait);
+
+#endif /* QEMU_AIO_WAIT */
diff --git a/include/block/aio.h b/include/block/aio.h
index e9aeeaec94..a1d6b9e249 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -534,11 +534,14 @@ void aio_co_enter(AioContext *ctx, struct Coroutine *co);
 AioContext *qemu_get_current_aio_context(void);
 
 /**
+ * in_aio_context_home_thread:
  * @ctx: the aio context
  *
- * Return whether we are running in the I/O thread that manages @ctx.
+ * Return whether we are running in the thread that normally runs @ctx.  Note
+ * that acquiring/releasing ctx does not affect the outcome, each AioContext
+ * still only has one home thread that is responsible for running it.
  */
-static inline bool aio_context_in_iothread(AioContext *ctx)
+static inline bool in_aio_context_home_thread(AioContext *ctx)
 {
     return ctx == qemu_get_current_aio_context();
 }
diff --git a/include/block/block.h b/include/block/block.h
index fac401ba3e..8b6db952a2 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -3,6 +3,7 @@
 
 #include "block/aio.h"
 #include "qapi/qapi-types-block-core.h"
+#include "block/aio-wait.h"
 #include "qemu/iov.h"
 #include "qemu/coroutine.h"
 #include "block/accounting.h"
@@ -115,19 +116,19 @@ typedef struct HDGeometry {
  * BDRV_BLOCK_ZERO: offset reads as zero
  * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data
  * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this
- *                       layer (short for DATA || ZERO), set by block layer
- * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this layer
+ *                       layer rather than any backing, set by block layer
+ * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this
+ *                 layer, set by block layer
  *
  * Internal flag:
  * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request
  *                 that the block layer recompute the answer from the returned
  *                 BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID.
  *
- * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 (BDRV_BLOCK_OFFSET_MASK) of
- * the return value (old interface) or the entire map parameter (new
- * interface) represent the offset in the returned BDS that is allocated for
- * the corresponding raw data.  However, whether that offset actually
- * contains data also depends on BDRV_BLOCK_DATA, as follows:
+ * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the
+ * host offset within the returned BDS that is allocated for the
+ * corresponding raw guest data.  However, whether that offset
+ * actually contains data also depends on BDRV_BLOCK_DATA, as follows:
  *
  * DATA ZERO OFFSET_VALID
  *  t    t        t       sectors read as zero, returned file is zero at offset
@@ -367,41 +368,14 @@ void bdrv_drain_all_begin(void);
 void bdrv_drain_all_end(void);
 void bdrv_drain_all(void);
 
+/* Returns NULL when bs == NULL */
+AioWait *bdrv_get_aio_wait(BlockDriverState *bs);
+
 #define BDRV_POLL_WHILE(bs, cond) ({                       \
-    bool waited_ = false;                                  \
-    bool busy_ = true;                                     \
     BlockDriverState *bs_ = (bs);                          \
-    AioContext *ctx_ = bdrv_get_aio_context(bs_);          \
-    if (aio_context_in_iothread(ctx_)) {                   \
-        while ((cond) || busy_) {                          \
-            busy_ = aio_poll(ctx_, (cond));                \
-            waited_ |= !!(cond) | busy_;                   \
-        }                                                  \
-    } else {                                               \
-        assert(qemu_get_current_aio_context() ==           \
-               qemu_get_aio_context());                    \
-        /* Ask bdrv_dec_in_flight to wake up the main      \
-         * QEMU AioContext.  Extra I/O threads never take  \
-         * other I/O threads' AioContexts (see for example \
-         * block_job_defer_to_main_loop for how to do it). \
-         */                                                \
-        assert(!bs_->wakeup);                              \
-        /* Set bs->wakeup before evaluating cond.  */      \
-        atomic_mb_set(&bs_->wakeup, true);                 \
-        while (busy_) {                                    \
-            if ((cond)) {                                  \
-                waited_ = busy_ = true;                    \
-                aio_context_release(ctx_);                 \
-                aio_poll(qemu_get_aio_context(), true);    \
-                aio_context_acquire(ctx_);                 \
-            } else {                                       \
-                busy_ = aio_poll(ctx_, false);             \
-                waited_ |= busy_;                          \
-            }                                              \
-        }                                                  \
-        atomic_set(&bs_->wakeup, false);                   \
-    }                                                      \
-    waited_; })
+    AIO_WAIT_WHILE(bdrv_get_aio_wait(bs_),                 \
+                   bdrv_get_aio_context(bs_),              \
+                   cond); })
 
 int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int bytes);
 int bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 5ea63f8fa8..64a5700f2b 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -26,6 +26,7 @@
 
 #include "block/accounting.h"
 #include "block/block.h"
+#include "block/aio-wait.h"
 #include "qemu/queue.h"
 #include "qemu/coroutine.h"
 #include "qemu/stats64.h"
@@ -128,7 +129,8 @@ struct BlockDriver {
     int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags,
                           Error **errp);
     void (*bdrv_close)(BlockDriverState *bs);
-    int (*bdrv_create)(const char *filename, QemuOpts *opts, Error **errp);
+    int coroutine_fn (*bdrv_co_create_opts)(const char *filename, QemuOpts *opts,
+                                       Error **errp);
     int (*bdrv_make_empty)(BlockDriverState *bs);
 
     void (*bdrv_refresh_filename)(BlockDriverState *bs, QDict *options);
@@ -202,15 +204,22 @@ struct BlockDriver {
     /*
      * Building block for bdrv_block_status[_above] and
      * bdrv_is_allocated[_above].  The driver should answer only
-     * according to the current layer, and should not set
-     * BDRV_BLOCK_ALLOCATED, but may set BDRV_BLOCK_RAW.  See block.h
-     * for the meaning of _DATA, _ZERO, and _OFFSET_VALID.  The block
-     * layer guarantees input aligned to request_alignment, as well as
-     * non-NULL pnum and file.
+     * according to the current layer, and should only need to set
+     * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID,
+     * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing
+     * layer, the result should be 0 (and not BDRV_BLOCK_ZERO).  See
+     * block.h for the overall meaning of the bits.  As a hint, the
+     * flag want_zero is true if the caller cares more about precise
+     * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for
+     * overall allocation (favor larger *pnum, perhaps by reporting
+     * _DATA instead of _ZERO).  The block layer guarantees input
+     * clamped to bdrv_getlength() and aligned to request_alignment,
+     * as well as non-NULL pnum, map, and file; in turn, the driver
+     * must return an error or set pnum to an aligned non-zero value.
      */
-    int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, int *pnum,
-        BlockDriverState **file);
+    int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs,
+        bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
+        int64_t *map, BlockDriverState **file);
 
     /*
      * Invalidate any cached meta-data.
@@ -709,10 +718,8 @@ struct BlockDriverState {
     unsigned int in_flight;
     unsigned int serialising_in_flight;
 
-    /* Internal to BDRV_POLL_WHILE and bdrv_wakeup.  Accessed with atomic
-     * ops.
-     */
-    bool wakeup;
+    /* Kicked to signal main loop when a request completes. */
+    AioWait wait;
 
     /* counter for nested bdrv_io_plug.
      * Accessed with atomic ops.
@@ -1031,23 +1038,27 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
                                uint64_t *nperm, uint64_t *nshared);
 
 /*
- * Default implementation for drivers to pass bdrv_co_get_block_status() to
+ * Default implementation for drivers to pass bdrv_co_block_status() to
  * their file.
  */
-int64_t coroutine_fn bdrv_co_get_block_status_from_file(BlockDriverState *bs,
-                                                        int64_t sector_num,
-                                                        int nb_sectors,
-                                                        int *pnum,
-                                                        BlockDriverState **file);
+int coroutine_fn bdrv_co_block_status_from_file(BlockDriverState *bs,
+                                                bool want_zero,
+                                                int64_t offset,
+                                                int64_t bytes,
+                                                int64_t *pnum,
+                                                int64_t *map,
+                                                BlockDriverState **file);
 /*
- * Default implementation for drivers to pass bdrv_co_get_block_status() to
+ * Default implementation for drivers to pass bdrv_co_block_status() to
  * their backing file.
  */
-int64_t coroutine_fn bdrv_co_get_block_status_from_backing(BlockDriverState *bs,
-                                                           int64_t sector_num,
-                                                           int nb_sectors,
-                                                           int *pnum,
-                                                           BlockDriverState **file);
+int coroutine_fn bdrv_co_block_status_from_backing(BlockDriverState *bs,
+                                                   bool want_zero,
+                                                   int64_t offset,
+                                                   int64_t bytes,
+                                                   int64_t *pnum,
+                                                   int64_t *map,
+                                                   BlockDriverState **file);
 const char *bdrv_get_parent_name(const BlockDriverState *bs);
 void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp);
 bool blk_dev_has_removable_media(BlockBackend *blk);