summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2018-03-16 13:14:07 +0000
committerPeter Maydell <peter.maydell@linaro.org>2018-03-16 13:14:07 +0000
commit475fe4576f11e9389a188bd5698ae05458c397c2 (patch)
treed3028363e5200ce114295204a072c390cf992069
parent3788c7b6e56fa34ee2a73e41706eb2a2447ba75a (diff)
parent65374c1aa6263a4e2b566d15a9fd9b2105954a1b (diff)
downloadfocaccia-qemu-475fe4576f11e9389a188bd5698ae05458c397c2.tar.gz
focaccia-qemu-475fe4576f11e9389a188bd5698ae05458c397c2.zip
Merge remote-tracking branch 'remotes/ericb/tags/pull-nbd-2018-03-13-v2' into staging
nbd patches for 2018-03-13

- Eric Blake: iotests: Fix stuck NBD process on 33
- Vladimir Sementsov-Ogievskiy: 0/5 nbd server fixing and refactoring before BLOCK_STATUS
- Eric Blake: nbd/server: Honor FUA request on NBD_CMD_TRIM
- Stefan Hajnoczi: 0/2 block: fix nbd-server-stop crash after blockdev-snapshot-sync
- Vladimir Sementsov-Ogievskiy: nbd block status base:allocation

# gpg: Signature made Tue 13 Mar 2018 20:48:37 GMT
# gpg:                using RSA key A7A16B4A2527436A
# gpg: Good signature from "Eric Blake <eblake@redhat.com>"
# gpg:                 aka "Eric Blake (Free Software Programmer) <ebb9@byu.net>"
# gpg:                 aka "[jpeg image of size 6874]"
# Primary key fingerprint: 71C2 CC22 B1C4 6029 27D2  F3AA A7A1 6B4A 2527 436A

* remotes/ericb/tags/pull-nbd-2018-03-13-v2:
  iotests: new test 209 for NBD BLOCK_STATUS
  iotests: add file_path helper
  iotests.py: tiny refactor: move system imports up
  nbd: BLOCK_STATUS for standard get_block_status function: client part
  block/nbd-client: save first fatal error in nbd_iter_error
  nbd: BLOCK_STATUS for standard get_block_status function: server part
  nbd/server: add nbd_read_opt_name helper
  nbd/server: add nbd_opt_invalid helper
  iotests: add 208 nbd-server + blockdev-snapshot-sync test case
  block: let blk_add/remove_aio_context_notifier() tolerate BDS changes
  nbd/server: Honor FUA request on NBD_CMD_TRIM
  nbd/server: refactor nbd_trip: split out nbd_handle_request
  nbd/server: refactor nbd_trip: cmd_read and generic reply
  nbd/server: fix: check client->closing before sending reply
  nbd/server: fix sparse read
  nbd/server: move nbd_co_send_structured_error up
  iotests: Fix stuck NBD process on 33

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to '')
-rw-r--r--block/block-backend.c63
-rw-r--r--block/nbd-client.c154
-rw-r--r--block/nbd-client.h6
-rw-r--r--block/nbd.c3
-rw-r--r--block/trace-events2
-rw-r--r--include/block/nbd.h3
-rw-r--r--nbd/client.c117
-rw-r--r--nbd/server.c701
-rwxr-xr-xtests/qemu-iotests/0331
-rwxr-xr-xtests/qemu-iotests/20855
-rw-r--r--tests/qemu-iotests/208.out9
-rwxr-xr-xtests/qemu-iotests/20934
-rw-r--r--tests/qemu-iotests/209.out2
-rw-r--r--tests/qemu-iotests/group2
-rw-r--r--tests/qemu-iotests/iotests.py37
15 files changed, 1035 insertions, 154 deletions
diff --git a/block/block-backend.c b/block/block-backend.c
index f2e0a855ff..681b240b12 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -31,6 +31,13 @@
 
 static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
 
+typedef struct BlockBackendAioNotifier {
+    void (*attached_aio_context)(AioContext *new_context, void *opaque);
+    void (*detach_aio_context)(void *opaque);
+    void *opaque;
+    QLIST_ENTRY(BlockBackendAioNotifier) list;
+} BlockBackendAioNotifier;
+
 struct BlockBackend {
     char *name;
     int refcnt;
@@ -69,6 +76,7 @@ struct BlockBackend {
     bool allow_write_beyond_eof;
 
     NotifierList remove_bs_notifiers, insert_bs_notifiers;
+    QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
 
     int quiesce_counter;
     VMChangeStateEntry *vmsh;
@@ -247,6 +255,36 @@ static int blk_root_inactivate(BdrvChild *child)
     return 0;
 }
 
+static void blk_root_attach(BdrvChild *child)
+{
+    BlockBackend *blk = child->opaque;
+    BlockBackendAioNotifier *notifier;
+
+    trace_blk_root_attach(child, blk, child->bs);
+
+    QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
+        bdrv_add_aio_context_notifier(child->bs,
+                notifier->attached_aio_context,
+                notifier->detach_aio_context,
+                notifier->opaque);
+    }
+}
+
+static void blk_root_detach(BdrvChild *child)
+{
+    BlockBackend *blk = child->opaque;
+    BlockBackendAioNotifier *notifier;
+
+    trace_blk_root_detach(child, blk, child->bs);
+
+    QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
+        bdrv_remove_aio_context_notifier(child->bs,
+                notifier->attached_aio_context,
+                notifier->detach_aio_context,
+                notifier->opaque);
+    }
+}
+
 static const BdrvChildRole child_root = {
     .inherit_options    = blk_root_inherit_options,
 
@@ -260,6 +298,9 @@ static const BdrvChildRole child_root = {
 
     .activate           = blk_root_activate,
     .inactivate         = blk_root_inactivate,
+
+    .attach             = blk_root_attach,
+    .detach             = blk_root_detach,
 };
 
 /*
@@ -287,6 +328,7 @@ BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
 
     notifier_list_init(&blk->remove_bs_notifiers);
     notifier_list_init(&blk->insert_bs_notifiers);
+    QLIST_INIT(&blk->aio_notifiers);
 
     QTAILQ_INSERT_TAIL(&block_backends, blk, link);
     return blk;
@@ -364,6 +406,7 @@ static void blk_delete(BlockBackend *blk)
     }
     assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
     assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
+    assert(QLIST_EMPTY(&blk->aio_notifiers));
     QTAILQ_REMOVE(&block_backends, blk, link);
     drive_info_del(blk->legacy_dinfo);
     block_acct_cleanup(&blk->stats);
@@ -1857,8 +1900,15 @@ void blk_add_aio_context_notifier(BlockBackend *blk,
         void (*attached_aio_context)(AioContext *new_context, void *opaque),
         void (*detach_aio_context)(void *opaque), void *opaque)
 {
+    BlockBackendAioNotifier *notifier;
     BlockDriverState *bs = blk_bs(blk);
 
+    notifier = g_new(BlockBackendAioNotifier, 1);
+    notifier->attached_aio_context = attached_aio_context;
+    notifier->detach_aio_context = detach_aio_context;
+    notifier->opaque = opaque;
+    QLIST_INSERT_HEAD(&blk->aio_notifiers, notifier, list);
+
     if (bs) {
         bdrv_add_aio_context_notifier(bs, attached_aio_context,
                                       detach_aio_context, opaque);
@@ -1871,12 +1921,25 @@ void blk_remove_aio_context_notifier(BlockBackend *blk,
                                      void (*detach_aio_context)(void *),
                                      void *opaque)
 {
+    BlockBackendAioNotifier *notifier;
     BlockDriverState *bs = blk_bs(blk);
 
     if (bs) {
         bdrv_remove_aio_context_notifier(bs, attached_aio_context,
                                          detach_aio_context, opaque);
     }
+
+    QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
+        if (notifier->attached_aio_context == attached_aio_context &&
+            notifier->detach_aio_context == detach_aio_context &&
+            notifier->opaque == opaque) {
+            QLIST_REMOVE(notifier, list);
+            g_free(notifier);
+            return;
+        }
+    }
+
+    abort();
 }
 
 void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify)
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 7b68499b76..e64e346d69 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -228,6 +228,48 @@ static int nbd_parse_offset_hole_payload(NBDStructuredReplyChunk *chunk,
     return 0;
 }
 
+/* nbd_parse_blockstatus_payload
+ * support only one extent in reply and only for
+ * base:allocation context
+ */
+static int nbd_parse_blockstatus_payload(NBDClientSession *client,
+                                         NBDStructuredReplyChunk *chunk,
+                                         uint8_t *payload, uint64_t orig_length,
+                                         NBDExtent *extent, Error **errp)
+{
+    uint32_t context_id;
+
+    if (chunk->length != sizeof(context_id) + sizeof(extent)) {
+        error_setg(errp, "Protocol error: invalid payload for "
+                         "NBD_REPLY_TYPE_BLOCK_STATUS");
+        return -EINVAL;
+    }
+
+    context_id = payload_advance32(&payload);
+    if (client->info.meta_base_allocation_id != context_id) {
+        error_setg(errp, "Protocol error: unexpected context id %d for "
+                         "NBD_REPLY_TYPE_BLOCK_STATUS, when negotiated context "
+                         "id is %d", context_id,
+                         client->info.meta_base_allocation_id);
+        return -EINVAL;
+    }
+
+    extent->length = payload_advance32(&payload);
+    extent->flags = payload_advance32(&payload);
+
+    if (extent->length == 0 ||
+        (client->info.min_block && !QEMU_IS_ALIGNED(extent->length,
+                                                    client->info.min_block)) ||
+        extent->length > orig_length)
+    {
+        error_setg(errp, "Protocol error: server sent status chunk with "
+                   "invalid length");
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
 /* nbd_parse_error_payload
  * on success @errp contains message describing nbd error reply
  */
@@ -481,6 +523,7 @@ static coroutine_fn int nbd_co_receive_one_chunk(
 
 typedef struct NBDReplyChunkIter {
     int ret;
+    bool fatal;
     Error *err;
     bool done, only_structured;
 } NBDReplyChunkIter;
@@ -490,11 +533,12 @@ static void nbd_iter_error(NBDReplyChunkIter *iter, bool fatal,
 {
     assert(ret < 0);
 
-    if (fatal || iter->ret == 0) {
+    if ((fatal && !iter->fatal) || iter->ret == 0) {
         if (iter->ret != 0) {
             error_free(iter->err);
             iter->err = NULL;
         }
+        iter->fatal = fatal;
         iter->ret = ret;
         error_propagate(&iter->err, *local_err);
     } else {
@@ -640,6 +684,68 @@ static int nbd_co_receive_cmdread_reply(NBDClientSession *s, uint64_t handle,
     return iter.ret;
 }
 
+static int nbd_co_receive_blockstatus_reply(NBDClientSession *s,
+                                            uint64_t handle, uint64_t length,
+                                            NBDExtent *extent, Error **errp)
+{
+    NBDReplyChunkIter iter;
+    NBDReply reply;
+    void *payload = NULL;
+    Error *local_err = NULL;
+    bool received = false;
+
+    assert(!extent->length);
+    NBD_FOREACH_REPLY_CHUNK(s, iter, handle, s->info.structured_reply,
+                            NULL, &reply, &payload)
+    {
+        int ret;
+        NBDStructuredReplyChunk *chunk = &reply.structured;
+
+        assert(nbd_reply_is_structured(&reply));
+
+        switch (chunk->type) {
+        case NBD_REPLY_TYPE_BLOCK_STATUS:
+            if (received) {
+                s->quit = true;
+                error_setg(&local_err, "Several BLOCK_STATUS chunks in reply");
+                nbd_iter_error(&iter, true, -EINVAL, &local_err);
+            }
+            received = true;
+
+            ret = nbd_parse_blockstatus_payload(s, &reply.structured,
+                                                payload, length, extent,
+                                                &local_err);
+            if (ret < 0) {
+                s->quit = true;
+                nbd_iter_error(&iter, true, ret, &local_err);
+            }
+            break;
+        default:
+            if (!nbd_reply_type_is_error(chunk->type)) {
+                s->quit = true;
+                error_setg(&local_err,
+                           "Unexpected reply type: %d (%s) "
+                           "for CMD_BLOCK_STATUS",
+                           chunk->type, nbd_reply_type_lookup(chunk->type));
+                nbd_iter_error(&iter, true, -EINVAL, &local_err);
+            }
+        }
+
+        g_free(payload);
+        payload = NULL;
+    }
+
+    if (!extent->length && !iter.err) {
+        error_setg(&iter.err,
+                   "Server did not reply with any status extents");
+        if (!iter.ret) {
+            iter.ret = -EIO;
+        }
+    }
+    error_propagate(errp, iter.err);
+    return iter.ret;
+}
+
 static int nbd_co_request(BlockDriverState *bs, NBDRequest *request,
                           QEMUIOVector *write_qiov)
 {
@@ -782,6 +888,51 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
     return nbd_co_request(bs, &request, NULL);
 }
 
+int coroutine_fn nbd_client_co_block_status(BlockDriverState *bs,
+                                            bool want_zero,
+                                            int64_t offset, int64_t bytes,
+                                            int64_t *pnum, int64_t *map,
+                                            BlockDriverState **file)
+{
+    int64_t ret;
+    NBDExtent extent = { 0 };
+    NBDClientSession *client = nbd_get_client_session(bs);
+    Error *local_err = NULL;
+
+    NBDRequest request = {
+        .type = NBD_CMD_BLOCK_STATUS,
+        .from = offset,
+        .len = MIN(MIN_NON_ZERO(QEMU_ALIGN_DOWN(INT_MAX,
+                                                bs->bl.request_alignment),
+                                client->info.max_block), bytes),
+        .flags = NBD_CMD_FLAG_REQ_ONE,
+    };
+
+    if (!client->info.base_allocation) {
+        *pnum = bytes;
+        return BDRV_BLOCK_DATA;
+    }
+
+    ret = nbd_co_send_request(bs, &request, NULL);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = nbd_co_receive_blockstatus_reply(client, request.handle, bytes,
+                                           &extent, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+    }
+    if (ret < 0) {
+        return ret;
+    }
+
+    assert(extent.length);
+    *pnum = extent.length;
+    return (extent.flags & NBD_STATE_HOLE ? 0 : BDRV_BLOCK_DATA) |
+           (extent.flags & NBD_STATE_ZERO ? BDRV_BLOCK_ZERO : 0);
+}
+
 void nbd_client_detach_aio_context(BlockDriverState *bs)
 {
     NBDClientSession *client = nbd_get_client_session(bs);
@@ -826,6 +977,7 @@ int nbd_client_init(BlockDriverState *bs,
 
     client->info.request_sizes = true;
     client->info.structured_reply = true;
+    client->info.base_allocation = true;
     ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export,
                                 tlscreds, hostname,
                                 &client->ioc, &client->info, errp);
diff --git a/block/nbd-client.h b/block/nbd-client.h
index 612c4c21a0..0ece76e5af 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -61,4 +61,10 @@ void nbd_client_detach_aio_context(BlockDriverState *bs);
 void nbd_client_attach_aio_context(BlockDriverState *bs,
                                    AioContext *new_context);
 
+int coroutine_fn nbd_client_co_block_status(BlockDriverState *bs,
+                                            bool want_zero,
+                                            int64_t offset, int64_t bytes,
+                                            int64_t *pnum, int64_t *map,
+                                            BlockDriverState **file);
+
 #endif /* NBD_CLIENT_H */
diff --git a/block/nbd.c b/block/nbd.c
index d4e4172c08..1e2b3ba2d3 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -585,6 +585,7 @@ static BlockDriver bdrv_nbd = {
     .bdrv_detach_aio_context    = nbd_detach_aio_context,
     .bdrv_attach_aio_context    = nbd_attach_aio_context,
     .bdrv_refresh_filename      = nbd_refresh_filename,
+    .bdrv_co_block_status       = nbd_client_co_block_status,
 };
 
 static BlockDriver bdrv_nbd_tcp = {
@@ -604,6 +605,7 @@ static BlockDriver bdrv_nbd_tcp = {
     .bdrv_detach_aio_context    = nbd_detach_aio_context,
     .bdrv_attach_aio_context    = nbd_attach_aio_context,
     .bdrv_refresh_filename      = nbd_refresh_filename,
+    .bdrv_co_block_status       = nbd_client_co_block_status,
 };
 
 static BlockDriver bdrv_nbd_unix = {
@@ -623,6 +625,7 @@ static BlockDriver bdrv_nbd_unix = {
     .bdrv_detach_aio_context    = nbd_detach_aio_context,
     .bdrv_attach_aio_context    = nbd_attach_aio_context,
     .bdrv_refresh_filename      = nbd_refresh_filename,
+    .bdrv_co_block_status       = nbd_client_co_block_status,
 };
 
 static void bdrv_nbd_init(void)
diff --git a/block/trace-events b/block/trace-events
index 02dd80ff0c..7493d521dc 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -7,6 +7,8 @@ bdrv_lock_medium(void *bs, bool locked) "bs %p locked %d"
 # block/block-backend.c
 blk_co_preadv(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags 0x%x"
 blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags 0x%x"
+blk_root_attach(void *child, void *blk, void *bs) "child %p blk %p bs %p"
+blk_root_detach(void *child, void *blk, void *bs) "child %p blk %p bs %p"
 
 # block/io.c
 bdrv_co_preadv(void *bs, int64_t offset, int64_t nbytes, unsigned int flags) "bs %p offset %"PRId64" nbytes %"PRId64" flags 0x%x"
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 2285637e67..fcdcd54502 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -260,6 +260,7 @@ struct NBDExportInfo {
     /* In-out fields, set by client before nbd_receive_negotiate() and
      * updated by server results during nbd_receive_negotiate() */
     bool structured_reply;
+    bool base_allocation; /* base:allocation context for NBD_CMD_BLOCK_STATUS */
 
     /* Set by server results during nbd_receive_negotiate() */
     uint64_t size;
@@ -267,6 +268,8 @@ struct NBDExportInfo {
     uint32_t min_block;
     uint32_t opt_block;
     uint32_t max_block;
+
+    uint32_t meta_base_allocation_id;
 };
 typedef struct NBDExportInfo NBDExportInfo;
 
diff --git a/nbd/client.c b/nbd/client.c
index dcad23a053..9b9b7f0ea2 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -595,6 +595,111 @@ static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
     return QIO_CHANNEL(tioc);
 }
 
+/* nbd_negotiate_simple_meta_context:
+ * Set one meta context. Simple means that reply must contain zero (not
+ * negotiated) or one (negotiated) contexts. More contexts would be considered
+ * as a protocol error. It's also implied that meta-data query equals queried
+ * context name, so, if server replies with something different then @context,
+ * it considered as error too.
+ * return 1 for successful negotiation, context_id is set
+ *        0 if operation is unsupported,
+ *        -1 with errp set for any other error
+ */
+static int nbd_negotiate_simple_meta_context(QIOChannel *ioc,
+                                             const char *export,
+                                             const char *context,
+                                             uint32_t *context_id,
+                                             Error **errp)
+{
+    int ret;
+    NBDOptionReply reply;
+    uint32_t received_id;
+    bool received;
+    uint32_t export_len = strlen(export);
+    uint32_t context_len = strlen(context);
+    uint32_t data_len = sizeof(export_len) + export_len +
+                        sizeof(uint32_t) + /* number of queries */
+                        sizeof(context_len) + context_len;
+    char *data = g_malloc(data_len);
+    char *p = data;
+
+    stl_be_p(p, export_len);
+    memcpy(p += sizeof(export_len), export, export_len);
+    stl_be_p(p += export_len, 1);
+    stl_be_p(p += sizeof(uint32_t), context_len);
+    memcpy(p += sizeof(context_len), context, context_len);
+
+    ret = nbd_send_option_request(ioc, NBD_OPT_SET_META_CONTEXT, data_len, data,
+                                  errp);
+    g_free(data);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (nbd_receive_option_reply(ioc, NBD_OPT_SET_META_CONTEXT, &reply,
+                                 errp) < 0)
+    {
+        return -1;
+    }
+
+    ret = nbd_handle_reply_err(ioc, &reply, errp);
+    if (ret <= 0) {
+        return ret;
+    }
+
+    if (reply.type == NBD_REP_META_CONTEXT) {
+        char *name;
+        size_t len;
+
+        if (nbd_read(ioc, &received_id, sizeof(received_id), errp) < 0) {
+            return -1;
+        }
+        be32_to_cpus(&received_id);
+
+        len = reply.length - sizeof(received_id);
+        name = g_malloc(len + 1);
+        if (nbd_read(ioc, name, len, errp) < 0) {
+            g_free(name);
+            return -1;
+        }
+        name[len] = '\0';
+        if (strcmp(context, name)) {
+            error_setg(errp, "Failed to negotiate meta context '%s', server "
+                       "answered with different context '%s'", context,
+                       name);
+            g_free(name);
+            return -1;
+        }
+        g_free(name);
+
+        received = true;
+
+        /* receive NBD_REP_ACK */
+        if (nbd_receive_option_reply(ioc, NBD_OPT_SET_META_CONTEXT, &reply,
+                                     errp) < 0)
+        {
+            return -1;
+        }
+
+        ret = nbd_handle_reply_err(ioc, &reply, errp);
+        if (ret <= 0) {
+            return ret;
+        }
+    }
+
+    if (reply.type != NBD_REP_ACK) {
+        error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x",
+                   reply.type, NBD_REP_ACK);
+        return -1;
+    }
+
+    if (received) {
+        *context_id = received_id;
+        return 1;
+    }
+
+    return 0;
+}
 
 int nbd_receive_negotiate(QIOChannel *ioc, const char *name,
                           QCryptoTLSCreds *tlscreds, const char *hostname,
@@ -606,10 +711,12 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name,
     int rc;
     bool zeroes = true;
     bool structured_reply = info->structured_reply;
+    bool base_allocation = info->base_allocation;
 
     trace_nbd_receive_negotiate(tlscreds, hostname ? hostname : "<null>");
 
     info->structured_reply = false;
+    info->base_allocation = false;
     rc = -EINVAL;
 
     if (outioc) {
@@ -700,6 +807,16 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name,
                 info->structured_reply = result == 1;
             }
 
+            if (info->structured_reply && base_allocation) {
+                result = nbd_negotiate_simple_meta_context(
+                        ioc, name, "base:allocation",
+                        &info->meta_base_allocation_id, errp);
+                if (result < 0) {
+                    goto fail;
+                }
+                info->base_allocation = result == 1;
+            }
+
             /* Try NBD_OPT_GO first - if it works, we are done (it
              * also gives us a good message if the server requires
              * TLS).  If it is not available, fall back to
diff --git a/nbd/server.c b/nbd/server.c
index e714bfe6a1..cea158913b 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -22,6 +22,8 @@
 #include "trace.h"
 #include "nbd-internal.h"
 
+#define NBD_META_ID_BASE_ALLOCATION 0
+
 static int system_errno_to_nbd_errno(int err)
 {
     switch (err) {
@@ -82,6 +84,16 @@ struct NBDExport {
 
 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
 
+/* NBDExportMetaContexts represents a list of contexts to be exported,
+ * as selected by NBD_OPT_SET_META_CONTEXT. Also used for
+ * NBD_OPT_LIST_META_CONTEXT. */
+typedef struct NBDExportMetaContexts {
+    char export_name[NBD_MAX_NAME_SIZE + 1];
+    bool valid; /* means that negotiation of the option finished without
+                   errors */
+    bool base_allocation; /* export base:allocation context (block status) */
+} NBDExportMetaContexts;
+
 struct NBDClient {
     int refcount;
     void (*close_fn)(NBDClient *client, bool negotiated);
@@ -102,6 +114,7 @@ struct NBDClient {
     bool closing;
 
     bool structured_reply;
+    NBDExportMetaContexts export_meta;
 
     uint32_t opt; /* Current option being negotiated */
     uint32_t optlen; /* remaining length of data in ioc for the option being
@@ -218,22 +231,46 @@ nbd_negotiate_send_rep_err(NBDClient *client, uint32_t type,
 /* Drop remainder of the current option, and send a reply with the
  * given error type and message. Return -errno on read or write
  * failure; or 0 if connection is still live. */
-static int GCC_FMT_ATTR(4, 5)
-nbd_opt_drop(NBDClient *client, uint32_t type, Error **errp,
-             const char *fmt, ...)
+static int GCC_FMT_ATTR(4, 0)
+nbd_opt_vdrop(NBDClient *client, uint32_t type, Error **errp,
+              const char *fmt, va_list va)
 {
     int ret = nbd_drop(client->ioc, client->optlen, errp);
-    va_list va;
 
     client->optlen = 0;
     if (!ret) {
-        va_start(va, fmt);
         ret = nbd_negotiate_send_rep_verr(client, type, errp, fmt, va);
-        va_end(va);
     }
     return ret;
 }
 
+static int GCC_FMT_ATTR(4, 5)
+nbd_opt_drop(NBDClient *client, uint32_t type, Error **errp,
+             const char *fmt, ...)
+{
+    int ret;
+    va_list va;
+
+    va_start(va, fmt);
+    ret = nbd_opt_vdrop(client, type, errp, fmt, va);
+    va_end(va);
+
+    return ret;
+}
+
+static int GCC_FMT_ATTR(3, 4)
+nbd_opt_invalid(NBDClient *client, Error **errp, const char *fmt, ...)
+{
+    int ret;
+    va_list va;
+
+    va_start(va, fmt);
+    ret = nbd_opt_vdrop(client, NBD_REP_ERR_INVALID, errp, fmt, va);
+    va_end(va);
+
+    return ret;
+}
+
 /* Read size bytes from the unparsed payload of the current option.
  * Return -errno on I/O error, 0 if option was completely handled by
  * sending a reply about inconsistent lengths, or 1 on success. */
@@ -241,14 +278,70 @@ static int nbd_opt_read(NBDClient *client, void *buffer, size_t size,
                         Error **errp)
 {
     if (size > client->optlen) {
-        return nbd_opt_drop(client, NBD_REP_ERR_INVALID, errp,
-                            "Inconsistent lengths in option %s",
-                            nbd_opt_lookup(client->opt));
+        return nbd_opt_invalid(client, errp,
+                               "Inconsistent lengths in option %s",
+                               nbd_opt_lookup(client->opt));
     }
     client->optlen -= size;
     return qio_channel_read_all(client->ioc, buffer, size, errp) < 0 ? -EIO : 1;
 }
 
+/* Drop size bytes from the unparsed payload of the current option.
+ * Return -errno on I/O error, 0 if option was completely handled by
+ * sending a reply about inconsistent lengths, or 1 on success. */
+static int nbd_opt_skip(NBDClient *client, size_t size, Error **errp)
+{
+    if (size > client->optlen) {
+        return nbd_opt_invalid(client, errp,
+                               "Inconsistent lengths in option %s",
+                               nbd_opt_lookup(client->opt));
+    }
+    client->optlen -= size;
+    return nbd_drop(client->ioc, size, errp) < 0 ? -EIO : 1;
+}
+
+/* nbd_opt_read_name
+ *
+ * Read a string with the format:
+ *   uint32_t len     (<= NBD_MAX_NAME_SIZE)
+ *   len bytes string (not 0-terminated)
+ *
+ * @name should be enough to store NBD_MAX_NAME_SIZE+1.
+ * If @length is non-null, it will be set to the actual string length.
+ *
+ * Return -errno on I/O error, 0 if option was completely handled by
+ * sending a reply about inconsistent lengths, or 1 on success.
+ */
+static int nbd_opt_read_name(NBDClient *client, char *name, uint32_t *length,
+                             Error **errp)
+{
+    int ret;
+    uint32_t len;
+
+    ret = nbd_opt_read(client, &len, sizeof(len), errp);
+    if (ret <= 0) {
+        return ret;
+    }
+    cpu_to_be32s(&len);
+
+    if (len > NBD_MAX_NAME_SIZE) {
+        return nbd_opt_invalid(client, errp,
+                               "Invalid name length: %" PRIu32, len);
+    }
+
+    ret = nbd_opt_read(client, name, len, errp);
+    if (ret <= 0) {
+        return ret;
+    }
+    name[len] = '\0';
+
+    if (length) {
+        *length = len;
+    }
+
+    return 1;
+}
+
 /* Send a single NBD_REP_SERVER reply to NBD_OPT_LIST, including payload.
  * Return -errno on error, 0 on success. */
 static int nbd_negotiate_send_rep_list(NBDClient *client, NBDExport *exp,
@@ -306,6 +399,12 @@ static int nbd_negotiate_handle_list(NBDClient *client, Error **errp)
     return nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
 }
 
+static void nbd_check_meta_export_name(NBDClient *client)
+{
+    client->export_meta.valid &= !strcmp(client->exp->name,
+                                         client->export_meta.export_name);
+}
+
 /* Send a reply to NBD_OPT_EXPORT_NAME.
  * Return -errno on error, 0 on success. */
 static int nbd_negotiate_handle_export_name(NBDClient *client,
@@ -357,6 +456,7 @@ static int nbd_negotiate_handle_export_name(NBDClient *client,
 
     QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
     nbd_export_get(client->exp);
+    nbd_check_meta_export_name(client);
 
     return 0;
 }
@@ -398,9 +498,8 @@ static int nbd_reject_length(NBDClient *client, bool fatal, Error **errp)
     int ret;
 
     assert(client->optlen);
-    ret = nbd_opt_drop(client, NBD_REP_ERR_INVALID, errp,
-                       "option '%s' has unexpected length",
-                       nbd_opt_lookup(client->opt));
+    ret = nbd_opt_invalid(client, errp, "option '%s' has unexpected length",
+                          nbd_opt_lookup(client->opt));
     if (fatal && !ret) {
         error_setg(errp, "option '%s' has unexpected length",
                    nbd_opt_lookup(client->opt));
@@ -432,20 +531,10 @@ static int nbd_negotiate_handle_info(NBDClient *client, uint16_t myflags,
         2 bytes: N, number of requests (can be 0)
         N * 2 bytes: N requests
     */
-    rc = nbd_opt_read(client, &namelen, sizeof(namelen), errp);
-    if (rc <= 0) {
-        return rc;
-    }
-    be32_to_cpus(&namelen);
-    if (namelen >= sizeof(name)) {
-        return nbd_opt_drop(client, NBD_REP_ERR_INVALID, errp,
-                            "name too long for qemu");
-    }
-    rc = nbd_opt_read(client, name, namelen, errp);
+    rc = nbd_opt_read_name(client, name, &namelen, errp);
     if (rc <= 0) {
         return rc;
     }
-    name[namelen] = '\0';
     trace_nbd_negotiate_handle_export_name_request(name);
 
     rc = nbd_opt_read(client, &requests, sizeof(requests), errp);
@@ -561,6 +650,7 @@ static int nbd_negotiate_handle_info(NBDClient *client, uint16_t myflags,
         client->exp = exp;
         QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
         nbd_export_get(client->exp);
+        nbd_check_meta_export_name(client);
         rc = 1;
     }
     return rc;
@@ -615,6 +705,189 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
     return QIO_CHANNEL(tioc);
 }
 
+/* nbd_negotiate_send_meta_context
+ *
+ * Send one chunk of reply to NBD_OPT_{LIST,SET}_META_CONTEXT
+ *
+ * For NBD_OPT_LIST_META_CONTEXT @context_id is ignored, 0 is used instead.
+ */
+static int nbd_negotiate_send_meta_context(NBDClient *client,
+                                           const char *context,
+                                           uint32_t context_id,
+                                           Error **errp)
+{
+    NBDOptionReplyMetaContext opt;
+    struct iovec iov[] = {
+        {.iov_base = &opt, .iov_len = sizeof(opt)},
+        {.iov_base = (void *)context, .iov_len = strlen(context)}
+    };
+
+    if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
+        context_id = 0;
+    }
+
+    set_be_option_rep(&opt.h, client->opt, NBD_REP_META_CONTEXT,
+                      sizeof(opt) - sizeof(opt.h) + iov[1].iov_len);
+    stl_be_p(&opt.context_id, context_id);
+
+    return qio_channel_writev_all(client->ioc, iov, 2, errp) < 0 ? -EIO : 0;
+}
+
+/* nbd_meta_base_query
+ *
+ * Handle query to 'base' namespace. For now, only base:allocation context is
+ * available in it.  'len' is the amount of text remaining to be read from
+ * the current name, after the 'base:' portion has been stripped.
+ *
+ * Return -errno on I/O error, 0 if option was completely handled by
+ * sending a reply about inconsistent lengths, or 1 on success. */
+static int nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta,
+                               uint32_t len, Error **errp)
+{
+    int ret;
+    char query[sizeof("allocation") - 1];
+    size_t alen = strlen("allocation");
+
+    if (len == 0) {
+        if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
+            meta->base_allocation = true;
+        }
+        return 1;
+    }
+
+    if (len != alen) {
+        return nbd_opt_skip(client, len, errp);
+    }
+
+    ret = nbd_opt_read(client, query, len, errp);
+    if (ret <= 0) {
+        return ret;
+    }
+
+    if (strncmp(query, "allocation", alen) == 0) {
+        meta->base_allocation = true;
+    }
+
+    return 1;
+}
+
+/* nbd_negotiate_meta_query
+ *
+ * Parse namespace name and call corresponding function to parse body of the
+ * query.
+ *
+ * The only supported namespace now is 'base'.
+ *
+ * The function aims not wasting time and memory to read long unknown namespace
+ * names.
+ *
+ * Return -errno on I/O error, 0 if option was completely handled by
+ * sending a reply about inconsistent lengths, or 1 on success. */
+static int nbd_negotiate_meta_query(NBDClient *client,
+                                    NBDExportMetaContexts *meta, Error **errp)
+{
+    int ret;
+    char query[sizeof("base:") - 1];
+    size_t baselen = strlen("base:");
+    uint32_t len;
+
+    ret = nbd_opt_read(client, &len, sizeof(len), errp);
+    if (ret <= 0) {
+        return ret;
+    }
+    cpu_to_be32s(&len);
+
+    /* The only supported namespace for now is 'base'. So query should start
+     * with 'base:'. Otherwise, we can ignore it and skip the remainder. */
+    if (len < baselen) {
+        return nbd_opt_skip(client, len, errp);
+    }
+
+    len -= baselen;
+    ret = nbd_opt_read(client, query, baselen, errp);
+    if (ret <= 0) {
+        return ret;
+    }
+    if (strncmp(query, "base:", baselen) != 0) {
+        return nbd_opt_skip(client, len, errp);
+    }
+
+    return nbd_meta_base_query(client, meta, len, errp);
+}
+
+/* nbd_negotiate_meta_queries
+ * Handle NBD_OPT_LIST_META_CONTEXT and NBD_OPT_SET_META_CONTEXT
+ *
+ * Return -errno on I/O error, or 0 if option was completely handled. */
+static int nbd_negotiate_meta_queries(NBDClient *client,
+                                      NBDExportMetaContexts *meta, Error **errp)
+{
+    int ret;
+    NBDExport *exp;
+    NBDExportMetaContexts local_meta;
+    uint32_t nb_queries;
+    int i;
+
+    if (!client->structured_reply) {
+        return nbd_opt_invalid(client, errp,
+                               "request option '%s' when structured reply "
+                               "is not negotiated",
+                               nbd_opt_lookup(client->opt));
+    }
+
+    if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
+        /* Only change the caller's meta on SET. */
+        meta = &local_meta;
+    }
+
+    memset(meta, 0, sizeof(*meta));
+
+    ret = nbd_opt_read_name(client, meta->export_name, NULL, errp);
+    if (ret <= 0) {
+        return ret;
+    }
+
+    exp = nbd_export_find(meta->export_name);
+    if (exp == NULL) {
+        return nbd_opt_drop(client, NBD_REP_ERR_UNKNOWN, errp,
+                            "export '%s' not present", meta->export_name);
+    }
+
+    ret = nbd_opt_read(client, &nb_queries, sizeof(nb_queries), errp);
+    if (ret <= 0) {
+        return ret;
+    }
+    cpu_to_be32s(&nb_queries);
+
+    if (client->opt == NBD_OPT_LIST_META_CONTEXT && !nb_queries) {
+        /* enable all known contexts */
+        meta->base_allocation = true;
+    } else {
+        for (i = 0; i < nb_queries; ++i) {
+            ret = nbd_negotiate_meta_query(client, meta, errp);
+            if (ret <= 0) {
+                return ret;
+            }
+        }
+    }
+
+    if (meta->base_allocation) {
+        ret = nbd_negotiate_send_meta_context(client, "base:allocation",
+                                              NBD_META_ID_BASE_ALLOCATION,
+                                              errp);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
+    if (ret == 0) {
+        meta->valid = true;
+    }
+
+    return ret;
+}
+
 /* nbd_negotiate_options
  * Process all NBD_OPT_* client option commands, during fixed newstyle
  * negotiation.
@@ -805,6 +1078,12 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags,
                 }
                 break;
 
+            case NBD_OPT_LIST_META_CONTEXT:
+            case NBD_OPT_SET_META_CONTEXT:
+                ret = nbd_negotiate_meta_queries(client, &client->export_meta,
+                                                 errp);
+                break;
+
             default:
                 ret = nbd_opt_drop(client, NBD_REP_ERR_UNSUP, errp,
                                    "Unsupported option %" PRIu32 " (%s)",
@@ -1342,6 +1621,34 @@ static int coroutine_fn nbd_co_send_structured_read(NBDClient *client,
     return nbd_co_send_iov(client, iov, 2, errp);
 }
 
+static int coroutine_fn nbd_co_send_structured_error(NBDClient *client,
+                                                     uint64_t handle,
+                                                     uint32_t error,
+                                                     const char *msg,
+                                                     Error **errp)
+{
+    NBDStructuredError chunk;
+    int nbd_err = system_errno_to_nbd_errno(error);
+    struct iovec iov[] = {
+        {.iov_base = &chunk, .iov_len = sizeof(chunk)},
+        {.iov_base = (char *)msg, .iov_len = msg ? strlen(msg) : 0},
+    };
+
+    assert(nbd_err);
+    trace_nbd_co_send_structured_error(handle, nbd_err,
+                                       nbd_err_lookup(nbd_err), msg ? msg : "");
+    set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_ERROR, handle,
+                 sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
+    stl_be_p(&chunk.error, nbd_err);
+    stw_be_p(&chunk.message_length, iov[1].iov_len);
+
+    return nbd_co_send_iov(client, iov, 1 + !!iov[1].iov_len, errp);
+}
+
+/* Do a sparse read and send the structured reply to the client.
+ * Returns -errno if sending fails. bdrv_block_status_above() failure is
+ * reported to the client, at which point this function succeeds.
+ */
 static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client,
                                                 uint64_t handle,
                                                 uint64_t offset,
@@ -1362,8 +1669,13 @@ static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client,
         bool final;
 
         if (status < 0) {
-            error_setg_errno(errp, -status, "unable to check for holes");
-            return status;
+            char *msg = g_strdup_printf("unable to check for holes: %s",
+                                        strerror(-status));
+
+            ret = nbd_co_send_structured_error(client, handle, -status, msg,
+                                               errp);
+            g_free(msg);
+            return ret;
         }
         assert(pnum && pnum <= size - progress);
         final = progress + pnum == size;
@@ -1401,28 +1713,77 @@ static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client,
     return ret;
 }
 
-static int coroutine_fn nbd_co_send_structured_error(NBDClient *client,
-                                                     uint64_t handle,
-                                                     uint32_t error,
-                                                     const char *msg,
-                                                     Error **errp)
+static int blockstatus_to_extent_be(BlockDriverState *bs, uint64_t offset,
+                                    uint64_t bytes, NBDExtent *extent)
 {
-    NBDStructuredError chunk;
-    int nbd_err = system_errno_to_nbd_errno(error);
+    uint64_t remaining_bytes = bytes;
+
+    while (remaining_bytes) {
+        uint32_t flags;
+        int64_t num;
+        int ret = bdrv_block_status_above(bs, NULL, offset, remaining_bytes,
+                                          &num, NULL, NULL);
+        if (ret < 0) {
+            return ret;
+        }
+
+        flags = (ret & BDRV_BLOCK_ALLOCATED ? 0 : NBD_STATE_HOLE) |
+                (ret & BDRV_BLOCK_ZERO      ? NBD_STATE_ZERO : 0);
+
+        if (remaining_bytes == bytes) {
+            extent->flags = flags;
+        }
+
+        if (flags != extent->flags) {
+            break;
+        }
+
+        offset += num;
+        remaining_bytes -= num;
+    }
+
+    cpu_to_be32s(&extent->flags);
+    extent->length = cpu_to_be32(bytes - remaining_bytes);
+
+    return 0;
+}
+
+/* nbd_co_send_extents
+ * @extents should be in big-endian */
+static int nbd_co_send_extents(NBDClient *client, uint64_t handle,
+                               NBDExtent *extents, unsigned nb_extents,
+                               uint32_t context_id, Error **errp)
+{
+    NBDStructuredMeta chunk;
+
     struct iovec iov[] = {
         {.iov_base = &chunk, .iov_len = sizeof(chunk)},
-        {.iov_base = (char *)msg, .iov_len = msg ? strlen(msg) : 0},
+        {.iov_base = extents, .iov_len = nb_extents * sizeof(extents[0])}
     };
 
-    assert(nbd_err);
-    trace_nbd_co_send_structured_error(handle, nbd_err,
-                                       nbd_err_lookup(nbd_err), msg ? msg : "");
-    set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_ERROR, handle,
-                 sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
-    stl_be_p(&chunk.error, nbd_err);
-    stw_be_p(&chunk.message_length, iov[1].iov_len);
+    set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_BLOCK_STATUS,
+                 handle, sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
+    stl_be_p(&chunk.context_id, context_id);
 
-    return nbd_co_send_iov(client, iov, 1 + !!iov[1].iov_len, errp);
+    return nbd_co_send_iov(client, iov, 2, errp);
+}
+
+/* Get block status from the exported device and send it to the client */
+static int nbd_co_send_block_status(NBDClient *client, uint64_t handle,
+                                    BlockDriverState *bs, uint64_t offset,
+                                    uint64_t length, uint32_t context_id,
+                                    Error **errp)
+{
+    int ret;
+    NBDExtent extent;
+
+    ret = blockstatus_to_extent_be(bs, offset, length, &extent);
+    if (ret < 0) {
+        return nbd_co_send_structured_error(
+                client, handle, -ret, "can't get block status", errp);
+    }
+
+    return nbd_co_send_extents(client, handle, &extent, 1, context_id, errp);
 }
 
 /* nbd_co_receive_request
@@ -1502,6 +1863,8 @@ static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
         valid_flags |= NBD_CMD_FLAG_DF;
     } else if (request->type == NBD_CMD_WRITE_ZEROES) {
         valid_flags |= NBD_CMD_FLAG_NO_HOLE;
+    } else if (request->type == NBD_CMD_BLOCK_STATUS) {
+        valid_flags |= NBD_CMD_FLAG_REQ_ONE;
     }
     if (request->flags & ~valid_flags) {
         error_setg(errp, "unsupported flags for command %s (got 0x%x)",
@@ -1512,159 +1875,195 @@ static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
     return 0;
 }
 
-/* Owns a reference to the NBDClient passed as opaque.  */
-static coroutine_fn void nbd_trip(void *opaque)
+/* Send simple reply without a payload, or a structured error
+ * @error_msg is ignored if @ret >= 0
+ * Returns 0 if connection is still live, -errno on failure to talk to client
+ */
+static coroutine_fn int nbd_send_generic_reply(NBDClient *client,
+                                               uint64_t handle,
+                                               int ret,
+                                               const char *error_msg,
+                                               Error **errp)
+{
+    if (client->structured_reply && ret < 0) {
+        return nbd_co_send_structured_error(client, handle, -ret, error_msg,
+                                            errp);
+    } else {
+        return nbd_co_send_simple_reply(client, handle, ret < 0 ? -ret : 0,
+                                        NULL, 0, errp);
+    }
+}
+
+/* Handle NBD_CMD_READ request.
+ * Return -errno if sending fails. Other errors are reported directly to the
+ * client as an error reply. */
+static coroutine_fn int nbd_do_cmd_read(NBDClient *client, NBDRequest *request,
+                                        uint8_t *data, Error **errp)
 {
-    NBDClient *client = opaque;
-    NBDExport *exp = client->exp;
-    NBDRequestData *req;
-    NBDRequest request = { 0 };    /* GCC thinks it can be used uninitialized */
     int ret;
-    int flags;
-    int reply_data_len = 0;
-    Error *local_err = NULL;
-    char *msg = NULL;
+    NBDExport *exp = client->exp;
 
-    trace_nbd_trip();
-    if (client->closing) {
-        nbd_client_put(client);
-        return;
-    }
+    assert(request->type == NBD_CMD_READ);
 
-    req = nbd_request_get(client);
-    ret = nbd_co_receive_request(req, &request, &local_err);
-    client->recv_coroutine = NULL;
-    nbd_client_receive_next_request(client);
-    if (ret == -EIO) {
-        goto disconnect;
+    /* XXX: NBD Protocol only documents use of FUA with WRITE */
+    if (request->flags & NBD_CMD_FLAG_FUA) {
+        ret = blk_co_flush(exp->blk);
+        if (ret < 0) {
+            return nbd_send_generic_reply(client, request->handle, ret,
+                                          "flush failed", errp);
+        }
     }
 
-    if (ret < 0) {
-        goto reply;
+    if (client->structured_reply && !(request->flags & NBD_CMD_FLAG_DF) &&
+        request->len) {
+        return nbd_co_send_sparse_read(client, request->handle, request->from,
+                                       data, request->len, errp);
     }
 
-    if (client->closing) {
-        /*
-         * The client may be closed when we are blocked in
-         * nbd_co_receive_request()
-         */
-        goto done;
+    ret = blk_pread(exp->blk, request->from + exp->dev_offset, data,
+                    request->len);
+    if (ret < 0) {
+        return nbd_send_generic_reply(client, request->handle, ret,
+                                      "reading from file failed", errp);
     }
 
-    switch (request.type) {
-    case NBD_CMD_READ:
-        /* XXX: NBD Protocol only documents use of FUA with WRITE */
-        if (request.flags & NBD_CMD_FLAG_FUA) {
-            ret = blk_co_flush(exp->blk);
-            if (ret < 0) {
-                error_setg_errno(&local_err, -ret, "flush failed");
-                break;
-            }
-        }
-
-        if (client->structured_reply && !(request.flags & NBD_CMD_FLAG_DF) &&
-            request.len) {
-            ret = nbd_co_send_sparse_read(req->client, request.handle,
-                                          request.from, req->data, request.len,
-                                          &local_err);
-            if (ret < 0) {
-                goto reply;
-            }
-            goto done;
+    if (client->structured_reply) {
+        if (request->len) {
+            return nbd_co_send_structured_read(client, request->handle,
+                                               request->from, data,
+                                               request->len, true, errp);
+        } else {
+            return nbd_co_send_structured_done(client, request->handle, errp);
         }
+    } else {
+        return nbd_co_send_simple_reply(client, request->handle, 0,
+                                        data, request->len, errp);
+    }
+}
 
-        ret = blk_pread(exp->blk, request.from + exp->dev_offset,
-                        req->data, request.len);
-        if (ret < 0) {
-            error_setg_errno(&local_err, -ret, "reading from file failed");
-            break;
-        }
+/* Handle NBD request.
+ * Return -errno if sending fails. Other errors are reported directly to the
+ * client as an error reply. */
+static coroutine_fn int nbd_handle_request(NBDClient *client,
+                                           NBDRequest *request,
+                                           uint8_t *data, Error **errp)
+{
+    int ret;
+    int flags;
+    NBDExport *exp = client->exp;
+    char *msg;
 
-        reply_data_len = request.len;
+    switch (request->type) {
+    case NBD_CMD_READ:
+        return nbd_do_cmd_read(client, request, data, errp);
 
-        break;
     case NBD_CMD_WRITE:
         flags = 0;
-        if (request.flags & NBD_CMD_FLAG_FUA) {
+        if (request->flags & NBD_CMD_FLAG_FUA) {
             flags |= BDRV_REQ_FUA;
         }
-        ret = blk_pwrite(exp->blk, request.from + exp->dev_offset,
-                         req->data, request.len, flags);
-        if (ret < 0) {
-            error_setg_errno(&local_err, -ret, "writing to file failed");
-        }
+        ret = blk_pwrite(exp->blk, request->from + exp->dev_offset,
+                         data, request->len, flags);
+        return nbd_send_generic_reply(client, request->handle, ret,
+                                      "writing to file failed", errp);
 
-        break;
     case NBD_CMD_WRITE_ZEROES:
         flags = 0;
-        if (request.flags & NBD_CMD_FLAG_FUA) {
+        if (request->flags & NBD_CMD_FLAG_FUA) {
             flags |= BDRV_REQ_FUA;
         }
-        if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) {
+        if (!(request->flags & NBD_CMD_FLAG_NO_HOLE)) {
             flags |= BDRV_REQ_MAY_UNMAP;
         }
-        ret = blk_pwrite_zeroes(exp->blk, request.from + exp->dev_offset,
-                                request.len, flags);
-        if (ret < 0) {
-            error_setg_errno(&local_err, -ret, "writing to file failed");
-        }
+        ret = blk_pwrite_zeroes(exp->blk, request->from + exp->dev_offset,
+                                request->len, flags);
+        return nbd_send_generic_reply(client, request->handle, ret,
+                                      "writing to file failed", errp);
 
-        break;
     case NBD_CMD_DISC:
         /* unreachable, thanks to special case in nbd_co_receive_request() */
         abort();
 
     case NBD_CMD_FLUSH:
         ret = blk_co_flush(exp->blk);
-        if (ret < 0) {
-            error_setg_errno(&local_err, -ret, "flush failed");
-        }
+        return nbd_send_generic_reply(client, request->handle, ret,
+                                      "flush failed", errp);
 
-        break;
     case NBD_CMD_TRIM:
-        ret = blk_co_pdiscard(exp->blk, request.from + exp->dev_offset,
-                              request.len);
-        if (ret < 0) {
-            error_setg_errno(&local_err, -ret, "discard failed");
+        ret = blk_co_pdiscard(exp->blk, request->from + exp->dev_offset,
+                              request->len);
+        if (ret == 0 && request->flags & NBD_CMD_FLAG_FUA) {
+            ret = blk_co_flush(exp->blk);
+        }
+        return nbd_send_generic_reply(client, request->handle, ret,
+                                      "discard failed", errp);
+
+    case NBD_CMD_BLOCK_STATUS:
+        if (client->export_meta.valid && client->export_meta.base_allocation) {
+            return nbd_co_send_block_status(client, request->handle,
+                                            blk_bs(exp->blk), request->from,
+                                            request->len,
+                                            NBD_META_ID_BASE_ALLOCATION, errp);
+        } else {
+            return nbd_send_generic_reply(client, request->handle, -EINVAL,
+                                          "CMD_BLOCK_STATUS not negotiated",
+                                          errp);
         }
 
-        break;
     default:
-        error_setg(&local_err, "invalid request type (%" PRIu32 ") received",
-                   request.type);
-        ret = -EINVAL;
+        msg = g_strdup_printf("invalid request type (%" PRIu32 ") received",
+                              request->type);
+        ret = nbd_send_generic_reply(client, request->handle, -EINVAL, msg,
+                                     errp);
+        g_free(msg);
+        return ret;
     }
+}
 
-reply:
-    if (local_err) {
-        /* If we get here, local_err was not a fatal error, and should be sent
-         * to the client. */
-        assert(ret < 0);
-        msg = g_strdup(error_get_pretty(local_err));
-        error_report_err(local_err);
-        local_err = NULL;
+/* Owns a reference to the NBDClient passed as opaque.  */
+static coroutine_fn void nbd_trip(void *opaque)
+{
+    NBDClient *client = opaque;
+    NBDRequestData *req;
+    NBDRequest request = { 0 };    /* GCC thinks it can be used uninitialized */
+    int ret;
+    Error *local_err = NULL;
+
+    trace_nbd_trip();
+    if (client->closing) {
+        nbd_client_put(client);
+        return;
     }
 
-    if (client->structured_reply &&
-        (ret < 0 || request.type == NBD_CMD_READ)) {
-        if (ret < 0) {
-            ret = nbd_co_send_structured_error(req->client, request.handle,
-                                               -ret, msg, &local_err);
-        } else if (reply_data_len) {
-            ret = nbd_co_send_structured_read(req->client, request.handle,
-                                              request.from, req->data,
-                                              reply_data_len, true,
-                                              &local_err);
-        } else {
-            ret = nbd_co_send_structured_done(req->client, request.handle,
-                                              &local_err);
-        }
+    req = nbd_request_get(client);
+    ret = nbd_co_receive_request(req, &request, &local_err);
+    client->recv_coroutine = NULL;
+
+    if (client->closing) {
+        /*
+         * The client may be closed when we are blocked in
+         * nbd_co_receive_request()
+         */
+        goto done;
+    }
+
+    nbd_client_receive_next_request(client);
+    if (ret == -EIO) {
+        goto disconnect;
+    }
+
+    if (ret < 0) {
+        /* It wans't -EIO, so, according to nbd_co_receive_request()
+         * semantics, we should return the error to the client. */
+        Error *export_err = local_err;
+
+        local_err = NULL;
+        ret = nbd_send_generic_reply(client, request.handle, -EINVAL,
+                                     error_get_pretty(export_err), &local_err);
+        error_free(export_err);
     } else {
-        ret = nbd_co_send_simple_reply(req->client, request.handle,
-                                       ret < 0 ? -ret : 0,
-                                       req->data, reply_data_len, &local_err);
+        ret = nbd_handle_request(client, &request, req->data, &local_err);
     }
-    g_free(msg);
     if (ret < 0) {
         error_prepend(&local_err, "Failed to send reply: ");
         goto disconnect;
diff --git a/tests/qemu-iotests/033 b/tests/qemu-iotests/033
index a1d8357331..ee8a1338bb 100755
--- a/tests/qemu-iotests/033
+++ b/tests/qemu-iotests/033
@@ -105,6 +105,7 @@ for align in 512 4k; do
 done
 done
 
+_cleanup_test_img
 
 # Trigger truncate that would shrink qcow2 L1 table, which is done by
 #   clearing one entry (8 bytes) with bdrv_co_pwrite_zeroes()
diff --git a/tests/qemu-iotests/208 b/tests/qemu-iotests/208
new file mode 100755
index 0000000000..4e82b96c82
--- /dev/null
+++ b/tests/qemu-iotests/208
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2018 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# Creator/Owner: Stefan Hajnoczi <stefanha@redhat.com>
+#
+# Check that the runtime NBD server does not crash when stopped after
+# blockdev-snapshot-sync.
+
+import iotests
+
+with iotests.FilePath('disk.img') as disk_img_path, \
+     iotests.FilePath('disk-snapshot.img') as disk_snapshot_img_path, \
+     iotests.FilePath('nbd.sock') as nbd_sock_path, \
+     iotests.VM() as vm:
+
+    img_size = '10M'
+    iotests.qemu_img_pipe('create', '-f', iotests.imgfmt, disk_img_path, img_size)
+
+    iotests.log('Launching VM...')
+    (vm.add_drive(disk_img_path, 'node-name=drive0-node', interface='none')
+       .launch())
+
+    iotests.log('Starting NBD server...')
+    iotests.log(vm.qmp('nbd-server-start', addr={
+            "type": "unix",
+            "data": {
+                "path": nbd_sock_path,
+            }
+        }))
+
+    iotests.log('Adding NBD export...')
+    iotests.log(vm.qmp('nbd-server-add', device='drive0-node', writable=True))
+
+    iotests.log('Creating external snapshot...')
+    iotests.log(vm.qmp('blockdev-snapshot-sync',
+        node_name='drive0-node',
+        snapshot_node_name='drive0-snapshot-node',
+        snapshot_file=disk_snapshot_img_path))
+
+    iotests.log('Stopping NBD server...')
+    iotests.log(vm.qmp('nbd-server-stop'))
diff --git a/tests/qemu-iotests/208.out b/tests/qemu-iotests/208.out
new file mode 100644
index 0000000000..3687e9d0dd
--- /dev/null
+++ b/tests/qemu-iotests/208.out
@@ -0,0 +1,9 @@
+Launching VM...
+Starting NBD server...
+{u'return': {}}
+Adding NBD export...
+{u'return': {}}
+Creating external snapshot...
+{u'return': {}}
+Stopping NBD server...
+{u'return': {}}
diff --git a/tests/qemu-iotests/209 b/tests/qemu-iotests/209
new file mode 100755
index 0000000000..259e991ec6
--- /dev/null
+++ b/tests/qemu-iotests/209
@@ -0,0 +1,34 @@
+#!/usr/bin/env python
+#
+# Tests for NBD BLOCK_STATUS extension
+#
+# Copyright (c) 2018 Virtuozzo International GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import iotests
+from iotests import qemu_img_create, qemu_io, qemu_img_verbose, qemu_nbd, \
+                    file_path
+
+iotests.verify_image_format(supported_fmts=['qcow2'])
+
+disk, nbd_sock = file_path('disk', 'nbd-sock')
+nbd_uri = 'nbd+unix:///exp?socket=' + nbd_sock
+
+qemu_img_create('-f', iotests.imgfmt, disk, '1M')
+qemu_io('-f', iotests.imgfmt, '-c', 'write 0 512K', disk)
+
+qemu_nbd('-k', nbd_sock, '-x', 'exp', '-f', iotests.imgfmt, disk)
+qemu_img_verbose('map', '-f', 'raw', '--output=json', nbd_uri)
diff --git a/tests/qemu-iotests/209.out b/tests/qemu-iotests/209.out
new file mode 100644
index 0000000000..0d29724e84
--- /dev/null
+++ b/tests/qemu-iotests/209.out
@@ -0,0 +1,2 @@
+[{ "start": 0, "length": 524288, "depth": 0, "zero": false, "data": true},
+{ "start": 524288, "length": 524288, "depth": 0, "zero": true, "data": false}]
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index c401791fcd..624e1fbd4f 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -204,3 +204,5 @@
 205 rw auto quick
 206 rw auto
 207 rw auto
+208 rw auto quick
+209 rw auto quick
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 1bcc9ca57d..90cd751e2a 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -23,12 +23,14 @@ import subprocess
 import string
 import unittest
 import sys
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'scripts'))
-import qtest
 import struct
 import json
 import signal
 import logging
+import atexit
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'scripts'))
+import qtest
 
 
 # This will not work if arguments contain spaces but is necessary if we
@@ -249,6 +251,37 @@ class FilePath(object):
         return False
 
 
+def file_path_remover():
+    for path in reversed(file_path_remover.paths):
+        try:
+            os.remove(path)
+        except OSError:
+            pass
+
+
+def file_path(*names):
+    ''' Another way to get auto-generated filename that cleans itself up.
+
+    Use is as simple as:
+
+    img_a, img_b = file_path('a.img', 'b.img')
+    sock = file_path('socket')
+    '''
+
+    if not hasattr(file_path_remover, 'paths'):
+        file_path_remover.paths = []
+        atexit.register(file_path_remover)
+
+    paths = []
+    for name in names:
+        filename = '{0}-{1}'.format(os.getpid(), name)
+        path = os.path.join(test_dir, filename)
+        file_path_remover.paths.append(path)
+        paths.append(path)
+
+    return paths[0] if len(paths) == 1 else paths
+
+
 class VM(qtest.QEMUQtestMachine):
     '''A QEMU VM'''