From 421919d76b1755e1f9478f85cb71829ca5150ade Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Mon, 22 Jul 2019 17:22:00 +0200
Subject: block: Remove blk_pread_unthrottled()

The functionality offered by blk_pread_unthrottled() goes back to commit
498e386c584. Then, we couldn't perform I/O throttling with synchronous
requests because timers wouldn't be executed in polling loops. So the
commit automatically disabled I/O throttling as soon as a synchronous
request was issued.

However, for geometry detection during disk initialisation, we always
used (and still use) synchronous requests even if guest requests use AIO
later. Geometry detection was not wanted to disable I/O throttling, so
bdrv_pread_unthrottled() was introduced which disabled throttling only
temporarily.

All of this isn't necessary any more because we do run timers in polling
loop and even synchronous requests are now using coroutine
infrastructure internally. For this reason, commit 90c78624f already
removed the automatic disabling of I/O throttling.

It's time to get rid of the workaround for the removed code, and its
abuse of blk_root_drained_begin()/end(), as well.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 include/sysemu/block-backend.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 733c4957eb..7320b58467 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -117,8 +117,6 @@ char *blk_get_attached_dev_id(BlockBackend *blk);
 BlockBackend *blk_by_dev(void *dev);
 BlockBackend *blk_by_qdev_id(const char *id, Error **errp);
 void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque);
-int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
-                          int bytes);
 int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
                                unsigned int bytes, QEMUIOVector *qiov,
                                BdrvRequestFlags flags);
-- 
cgit 1.4.1


From cf3129323f900ef5ddbccbe86e4fa801e88c566e Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Mon, 22 Jul 2019 17:46:23 +0200
Subject: block-backend: Queue requests while drained

This fixes devices like IDE that can still start new requests from I/O
handlers in the CPU thread while the block backend is drained.

The basic assumption is that in a drain section, no new requests should
be allowed through a BlockBackend (blk_drained_begin/end don't exist,
we get drain sections only on the node level). However, there are two
special cases where requests should not be queued:

1. Block jobs: We already make sure that block jobs are paused in a
   drain section, so they won't start new requests. However, if the
   drain_begin is called on the job's BlockBackend first, it can happen
   that we deadlock because the job stays busy until it reaches a pause
   point - which it can't if its requests aren't processed any more.

   The proper solution here would be to make all requests through the
   job's filter node instead of using a BlockBackend. For now, just
   disabling request queuing on the job BlockBackend is simpler.

2. In test cases where making requests through bdrv_* would be
   cumbersome because we'd need a BdrvChild. As we already got the
   functionality to disable request queuing from 1., use it in tests,
   too, for convenience.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 block/backup.c                 |  1 +
 block/block-backend.c          | 53 +++++++++++++++++++++++++++++++++++++++---
 block/commit.c                 |  2 ++
 block/mirror.c                 |  1 +
 blockjob.c                     |  3 +++
 include/sysemu/block-backend.h |  1 +
 tests/test-bdrv-drain.c        |  1 +
 7 files changed, 59 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/block/backup.c b/block/backup.c
index b26c22c4b8..4743c8f0bc 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -644,6 +644,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
     if (ret < 0) {
         goto error;
     }
+    blk_set_disable_request_queuing(job->target, true);
 
     job->on_source_error = on_source_error;
     job->on_target_error = on_target_error;
diff --git a/block/block-backend.c b/block/block-backend.c
index fdd6b01ecf..c13c5c83b0 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -79,6 +79,9 @@ struct BlockBackend {
     QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
 
     int quiesce_counter;
+    CoQueue queued_requests;
+    bool disable_request_queuing;
+
     VMChangeStateEntry *vmsh;
     bool force_allow_inactivate;
 
@@ -339,6 +342,7 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
 
     block_acct_init(&blk->stats);
 
+    qemu_co_queue_init(&blk->queued_requests);
     notifier_list_init(&blk->remove_bs_notifiers);
     notifier_list_init(&blk->insert_bs_notifiers);
     QLIST_INIT(&blk->aio_notifiers);
@@ -1096,6 +1100,11 @@ void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow)
     blk->allow_aio_context_change = allow;
 }
 
+void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
+{
+    blk->disable_request_queuing = disable;
+}
+
 static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
                                   size_t size)
 {
@@ -1127,13 +1136,24 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
     return 0;
 }
 
+static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
+{
+    if (blk->quiesce_counter && !blk->disable_request_queuing) {
+        qemu_co_queue_wait(&blk->queued_requests, NULL);
+    }
+}
+
 int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
                                unsigned int bytes, QEMUIOVector *qiov,
                                BdrvRequestFlags flags)
 {
     int ret;
-    BlockDriverState *bs = blk_bs(blk);
+    BlockDriverState *bs;
 
+    blk_wait_while_drained(blk);
+
+    /* Call blk_bs() only after waiting, the graph may have changed */
+    bs = blk_bs(blk);
     trace_blk_co_preadv(blk, bs, offset, bytes, flags);
 
     ret = blk_check_byte_request(blk, offset, bytes);
@@ -1159,8 +1179,12 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
                                 BdrvRequestFlags flags)
 {
     int ret;
-    BlockDriverState *bs = blk_bs(blk);
+    BlockDriverState *bs;
 
+    blk_wait_while_drained(blk);
+
+    /* Call blk_bs() only after waiting, the graph may have changed */
+    bs = blk_bs(blk);
     trace_blk_co_pwritev(blk, bs, offset, bytes, flags);
 
     ret = blk_check_byte_request(blk, offset, bytes);
@@ -1349,6 +1373,12 @@ static void blk_aio_read_entry(void *opaque)
     BlkRwCo *rwco = &acb->rwco;
     QEMUIOVector *qiov = rwco->iobuf;
 
+    if (rwco->blk->quiesce_counter) {
+        blk_dec_in_flight(rwco->blk);
+        blk_wait_while_drained(rwco->blk);
+        blk_inc_in_flight(rwco->blk);
+    }
+
     assert(qiov->size == acb->bytes);
     rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
                               qiov, rwco->flags);
@@ -1361,6 +1391,12 @@ static void blk_aio_write_entry(void *opaque)
     BlkRwCo *rwco = &acb->rwco;
     QEMUIOVector *qiov = rwco->iobuf;
 
+    if (rwco->blk->quiesce_counter) {
+        blk_dec_in_flight(rwco->blk);
+        blk_wait_while_drained(rwco->blk);
+        blk_inc_in_flight(rwco->blk);
+    }
+
     assert(!qiov || qiov->size == acb->bytes);
     rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
                                qiov, rwco->flags);
@@ -1482,6 +1518,8 @@ void blk_aio_cancel_async(BlockAIOCB *acb)
 
 int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
 {
+    blk_wait_while_drained(blk);
+
     if (!blk_is_available(blk)) {
         return -ENOMEDIUM;
     }
@@ -1522,7 +1560,11 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
 
 int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
 {
-    int ret = blk_check_byte_request(blk, offset, bytes);
+    int ret;
+
+    blk_wait_while_drained(blk);
+
+    ret = blk_check_byte_request(blk, offset, bytes);
     if (ret < 0) {
         return ret;
     }
@@ -1532,6 +1574,8 @@ int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
 
 int blk_co_flush(BlockBackend *blk)
 {
+    blk_wait_while_drained(blk);
+
     if (!blk_is_available(blk)) {
         return -ENOMEDIUM;
     }
@@ -2232,6 +2276,9 @@ static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter)
         if (blk->dev_ops && blk->dev_ops->drained_end) {
             blk->dev_ops->drained_end(blk->dev_opaque);
         }
+        while (qemu_co_enter_next(&blk->queued_requests, NULL)) {
+            /* Resume all queued requests */
+        }
     }
 }
 
diff --git a/block/commit.c b/block/commit.c
index 2c5a6d4ebc..408ae15389 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -350,6 +350,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
     if (ret < 0) {
         goto fail;
     }
+    blk_set_disable_request_queuing(s->base, true);
     s->base_bs = base;
 
     /* Required permissions are already taken with block_job_add_bdrv() */
@@ -358,6 +359,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
     if (ret < 0) {
         goto fail;
     }
+    blk_set_disable_request_queuing(s->top, true);
 
     s->backing_file_str = g_strdup(backing_file_str);
     s->on_error = on_error;
diff --git a/block/mirror.c b/block/mirror.c
index 642d6570cc..9b36391bb9 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1636,6 +1636,7 @@ static BlockJob *mirror_start_job(
         blk_set_force_allow_inactivate(s->target);
     }
     blk_set_allow_aio_context_change(s->target, true);
+    blk_set_disable_request_queuing(s->target, true);
 
     s->replaces = g_strdup(replaces);
     s->on_source_error = on_source_error;
diff --git a/blockjob.c b/blockjob.c
index 20b7f557da..73d9f1ba2b 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -445,6 +445,9 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
 
     bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
 
+    /* Disable request queuing in the BlockBackend to avoid deadlocks on drain:
+     * The job reports that it's busy until it reaches a pause point. */
+    blk_set_disable_request_queuing(blk, true);
     blk_set_allow_aio_context_change(blk, true);
 
     /* Only set speed when necessary to avoid NotSupported error */
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 7320b58467..368d53af77 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -104,6 +104,7 @@ void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm);
 
 void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow);
 void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow);
+void blk_set_disable_request_queuing(BlockBackend *blk, bool disable);
 void blk_iostatus_enable(BlockBackend *blk);
 bool blk_iostatus_is_enabled(const BlockBackend *blk);
 BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk);
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
index 9dffd86c47..468bbcc9a1 100644
--- a/tests/test-bdrv-drain.c
+++ b/tests/test-bdrv-drain.c
@@ -686,6 +686,7 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread)
                               &error_abort);
     s = bs->opaque;
     blk_insert_bs(blk, bs, &error_abort);
+    blk_set_disable_request_queuing(blk, true);
 
     blk_set_aio_context(blk, ctx_a, &error_abort);
     aio_context_acquire(ctx_a);
-- 
cgit 1.4.1