summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2017-10-31 13:12:21 +0000
committerPeter Maydell <peter.maydell@linaro.org>2017-10-31 13:12:21 +0000
commit92c7ec5cd4d15c76218703f7bd3ca75bd46353b7 (patch)
tree0e17111be31efbc5ab657c4736b3e8556b4cf191
parentd2b9d71432687eef947f243a57409381151701d5 (diff)
parentf140e3000371e67ff4e00df3213e2d576d9c91be (diff)
downloadfocaccia-qemu-92c7ec5cd4d15c76218703f7bd3ca75bd46353b7.tar.gz
focaccia-qemu-92c7ec5cd4d15c76218703f7bd3ca75bd46353b7.zip
Merge remote-tracking branch 'remotes/ericb/tags/pull-nbd-2017-10-30' into staging
nbd patches for 2017-10-30

Vladimir Sementsov-Ogievskiy (some patches co-authored by Eric Blake):
00/12 nbd minimal structured read

# gpg: Signature made Mon 30 Oct 2017 20:54:54 GMT
# gpg:                using RSA key 0xA7A16B4A2527436A
# gpg: Good signature from "Eric Blake <eblake@redhat.com>"
# gpg:                 aka "Eric Blake (Free Software Programmer) <ebb9@byu.net>"
# gpg:                 aka "[jpeg image of size 6874]"
# Primary key fingerprint: 71C2 CC22 B1C4 6029 27D2  F3AA A7A1 6B4A 2527 436A

* remotes/ericb/tags/pull-nbd-2017-10-30:
  nbd: Minimal structured read for client
  nbd: Move nbd_read() to common header
  nbd/client: prepare nbd_receive_reply for structured reply
  nbd/client: refactor nbd_receive_starttls
  nbd/server: Include human-readable message in structured errors
  nbd: Minimal structured read for server
  nbd/server: Refactor zero-length option check
  nbd/server: Simplify nbd_negotiate_options loop
  nbd/server: Report error for write to read-only export
  nbd: Expose constants and structs for structured read
  nbd: Move nbd_errno_to_system_errno() to public header
  nbd: Include error names in trace messages

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--block/nbd-client.c492
-rw-r--r--block/nbd-client.h1
-rw-r--r--include/block/nbd.h106
-rw-r--r--nbd/client.c203
-rw-r--r--nbd/common.c84
-rw-r--r--nbd/nbd-internal.h23
-rw-r--r--nbd/server.c210
-rw-r--r--nbd/trace-events15
-rw-r--r--tests/qemu-iotests/083.out15
9 files changed, 962 insertions, 187 deletions
diff --git a/block/nbd-client.c b/block/nbd-client.c
index c0683c3c83..b44d4d4a01 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -92,7 +92,9 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
         i = HANDLE_TO_INDEX(s, s->reply.handle);
         if (i >= MAX_NBD_REQUESTS ||
             !s->requests[i].coroutine ||
-            !s->requests[i].receiving) {
+            !s->requests[i].receiving ||
+            (nbd_reply_is_structured(&s->reply) && !s->info.structured_reply))
+        {
             break;
         }
 
@@ -139,6 +141,7 @@ static int nbd_co_send_request(BlockDriverState *bs,
     assert(i < MAX_NBD_REQUESTS);
 
     s->requests[i].coroutine = qemu_coroutine_self();
+    s->requests[i].offset = request->from;
     s->requests[i].receiving = false;
 
     request->handle = INDEX_TO_HANDLE(s, i);
@@ -179,75 +182,489 @@ err:
     return rc;
 }
 
-static int nbd_co_receive_reply(NBDClientSession *s,
-                                uint64_t handle,
-                                QEMUIOVector *qiov)
+static inline uint16_t payload_advance16(uint8_t **payload)
+{
+    *payload += 2;
+    return lduw_be_p(*payload - 2);
+}
+
+static inline uint32_t payload_advance32(uint8_t **payload)
+{
+    *payload += 4;
+    return ldl_be_p(*payload - 4);
+}
+
+static inline uint64_t payload_advance64(uint8_t **payload)
+{
+    *payload += 8;
+    return ldq_be_p(*payload - 8);
+}
+
+static int nbd_parse_offset_hole_payload(NBDStructuredReplyChunk *chunk,
+                                         uint8_t *payload, uint64_t orig_offset,
+                                         QEMUIOVector *qiov, Error **errp)
+{
+    uint64_t offset;
+    uint32_t hole_size;
+
+    if (chunk->length != sizeof(offset) + sizeof(hole_size)) {
+        error_setg(errp, "Protocol error: invalid payload for "
+                         "NBD_REPLY_TYPE_OFFSET_HOLE");
+        return -EINVAL;
+    }
+
+    offset = payload_advance64(&payload);
+    hole_size = payload_advance32(&payload);
+
+    if (offset < orig_offset || hole_size > qiov->size ||
+        offset > orig_offset + qiov->size - hole_size) {
+        error_setg(errp, "Protocol error: server sent chunk exceeding requested"
+                         " region");
+        return -EINVAL;
+    }
+
+    qemu_iovec_memset(qiov, offset - orig_offset, 0, hole_size);
+
+    return 0;
+}
+
+/* nbd_parse_error_payload
+ * on success @errp contains message describing nbd error reply
+ */
+static int nbd_parse_error_payload(NBDStructuredReplyChunk *chunk,
+                                   uint8_t *payload, int *request_ret,
+                                   Error **errp)
+{
+    uint32_t error;
+    uint16_t message_size;
+
+    assert(chunk->type & (1 << 15));
+
+    if (chunk->length < sizeof(error) + sizeof(message_size)) {
+        error_setg(errp,
+                   "Protocol error: invalid payload for structured error");
+        return -EINVAL;
+    }
+
+    error = nbd_errno_to_system_errno(payload_advance32(&payload));
+    if (error == 0) {
+        error_setg(errp, "Protocol error: server sent structured error chunk"
+                         "with error = 0");
+        return -EINVAL;
+    }
+
+    *request_ret = -error;
+    message_size = payload_advance16(&payload);
+
+    if (message_size > chunk->length - sizeof(error) - sizeof(message_size)) {
+        error_setg(errp, "Protocol error: server sent structured error chunk"
+                         "with incorrect message size");
+        return -EINVAL;
+    }
+
+    /* TODO: Add a trace point to mention the server complaint */
+
+    /* TODO handle ERROR_OFFSET */
+
+    return 0;
+}
+
+static int nbd_co_receive_offset_data_payload(NBDClientSession *s,
+                                              uint64_t orig_offset,
+                                              QEMUIOVector *qiov, Error **errp)
+{
+    QEMUIOVector sub_qiov;
+    uint64_t offset;
+    size_t data_size;
+    int ret;
+    NBDStructuredReplyChunk *chunk = &s->reply.structured;
+
+    assert(nbd_reply_is_structured(&s->reply));
+
+    if (chunk->length < sizeof(offset)) {
+        error_setg(errp, "Protocol error: invalid payload for "
+                         "NBD_REPLY_TYPE_OFFSET_DATA");
+        return -EINVAL;
+    }
+
+    if (nbd_read(s->ioc, &offset, sizeof(offset), errp) < 0) {
+        return -EIO;
+    }
+    be64_to_cpus(&offset);
+
+    data_size = chunk->length - sizeof(offset);
+    if (offset < orig_offset || data_size > qiov->size ||
+        offset > orig_offset + qiov->size - data_size) {
+        error_setg(errp, "Protocol error: server sent chunk exceeding requested"
+                         " region");
+        return -EINVAL;
+    }
+
+    qemu_iovec_init(&sub_qiov, qiov->niov);
+    qemu_iovec_concat(&sub_qiov, qiov, offset - orig_offset, data_size);
+    ret = qio_channel_readv_all(s->ioc, sub_qiov.iov, sub_qiov.niov, errp);
+    qemu_iovec_destroy(&sub_qiov);
+
+    return ret < 0 ? -EIO : 0;
+}
+
+#define NBD_MAX_MALLOC_PAYLOAD 1000
+/* nbd_co_receive_structured_payload
+ */
+static coroutine_fn int nbd_co_receive_structured_payload(
+        NBDClientSession *s, void **payload, Error **errp)
+{
+    int ret;
+    uint32_t len;
+
+    assert(nbd_reply_is_structured(&s->reply));
+
+    len = s->reply.structured.length;
+
+    if (len == 0) {
+        return 0;
+    }
+
+    if (payload == NULL) {
+        error_setg(errp, "Unexpected structured payload");
+        return -EINVAL;
+    }
+
+    if (len > NBD_MAX_MALLOC_PAYLOAD) {
+        error_setg(errp, "Payload too large");
+        return -EINVAL;
+    }
+
+    *payload = g_new(char, len);
+    ret = nbd_read(s->ioc, *payload, len, errp);
+    if (ret < 0) {
+        g_free(*payload);
+        *payload = NULL;
+        return ret;
+    }
+
+    return 0;
+}
+
+/* nbd_co_do_receive_one_chunk
+ * for simple reply:
+ *   set request_ret to received reply error
+ *   if qiov is not NULL: read payload to @qiov
+ * for structured reply chunk:
+ *   if error chunk: read payload, set @request_ret, do not set @payload
+ *   else if offset_data chunk: read payload data to @qiov, do not set @payload
+ *   else: read payload to @payload
+ *
+ * If function fails, @errp contains corresponding error message, and the
+ * connection with the server is suspect.  If it returns 0, then the
+ * transaction succeeded (although @request_ret may be a negative errno
+ * corresponding to the server's error reply), and errp is unchanged.
+ */
+static coroutine_fn int nbd_co_do_receive_one_chunk(
+        NBDClientSession *s, uint64_t handle, bool only_structured,
+        int *request_ret, QEMUIOVector *qiov, void **payload, Error **errp)
 {
     int ret;
     int i = HANDLE_TO_INDEX(s, handle);
+    void *local_payload = NULL;
+    NBDStructuredReplyChunk *chunk;
+
+    if (payload) {
+        *payload = NULL;
+    }
+    *request_ret = 0;
 
     /* Wait until we're woken up by nbd_read_reply_entry.  */
     s->requests[i].receiving = true;
     qemu_coroutine_yield();
     s->requests[i].receiving = false;
     if (!s->ioc || s->quit) {
-        ret = -EIO;
-    } else {
-        assert(s->reply.handle == handle);
-        ret = -s->reply.error;
-        if (qiov && s->reply.error == 0) {
-            if (qio_channel_readv_all(s->ioc, qiov->iov, qiov->niov,
-                                      NULL) < 0) {
-                ret = -EIO;
-                s->quit = true;
-            }
+        error_setg(errp, "Connection closed");
+        return -EIO;
+    }
+
+    assert(s->reply.handle == handle);
+
+    if (nbd_reply_is_simple(&s->reply)) {
+        if (only_structured) {
+            error_setg(errp, "Protocol error: simple reply when structured "
+                             "reply chunk was expected");
+            return -EINVAL;
         }
 
-        /* Tell the read handler to read another header.  */
-        s->reply.handle = 0;
+        *request_ret = -nbd_errno_to_system_errno(s->reply.simple.error);
+        if (*request_ret < 0 || !qiov) {
+            return 0;
+        }
+
+        return qio_channel_readv_all(s->ioc, qiov->iov, qiov->niov,
+                                     errp) < 0 ? -EIO : 0;
+    }
+
+    /* handle structured reply chunk */
+    assert(s->info.structured_reply);
+    chunk = &s->reply.structured;
+
+    if (chunk->type == NBD_REPLY_TYPE_NONE) {
+        if (!(chunk->flags & NBD_REPLY_FLAG_DONE)) {
+            error_setg(errp, "Protocol error: NBD_REPLY_TYPE_NONE chunk without"
+                             "NBD_REPLY_FLAG_DONE flag set");
+            return -EINVAL;
+        }
+        return 0;
+    }
+
+    if (chunk->type == NBD_REPLY_TYPE_OFFSET_DATA) {
+        if (!qiov) {
+            error_setg(errp, "Unexpected NBD_REPLY_TYPE_OFFSET_DATA chunk");
+            return -EINVAL;
+        }
+
+        return nbd_co_receive_offset_data_payload(s, s->requests[i].offset,
+                                                  qiov, errp);
+    }
+
+    if (nbd_reply_type_is_error(chunk->type)) {
+        payload = &local_payload;
+    }
+
+    ret = nbd_co_receive_structured_payload(s, payload, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (nbd_reply_type_is_error(chunk->type)) {
+        ret = nbd_parse_error_payload(chunk, local_payload, request_ret, errp);
+        g_free(local_payload);
+        return ret;
     }
 
-    s->requests[i].coroutine = NULL;
+    return 0;
+}
+
+/* nbd_co_receive_one_chunk
+ * Read reply, wake up read_reply_co and set s->quit if needed.
+ * Return value is a fatal error code or normal nbd reply error code
+ */
+static coroutine_fn int nbd_co_receive_one_chunk(
+        NBDClientSession *s, uint64_t handle, bool only_structured,
+        QEMUIOVector *qiov, NBDReply *reply, void **payload, Error **errp)
+{
+    int request_ret;
+    int ret = nbd_co_do_receive_one_chunk(s, handle, only_structured,
+                                          &request_ret, qiov, payload, errp);
+
+    if (ret < 0) {
+        s->quit = true;
+    } else {
+        /* For assert at loop start in nbd_read_reply_entry */
+        if (reply) {
+            *reply = s->reply;
+        }
+        s->reply.handle = 0;
+        ret = request_ret;
+    }
 
-    /* Kick the read_reply_co to get the next reply.  */
     if (s->read_reply_co) {
         aio_co_wake(s->read_reply_co);
     }
 
+    return ret;
+}
+
+typedef struct NBDReplyChunkIter {
+    int ret;
+    Error *err;
+    bool done, only_structured;
+} NBDReplyChunkIter;
+
+static void nbd_iter_error(NBDReplyChunkIter *iter, bool fatal,
+                           int ret, Error **local_err)
+{
+    assert(ret < 0);
+
+    if (fatal || iter->ret == 0) {
+        if (iter->ret != 0) {
+            error_free(iter->err);
+            iter->err = NULL;
+        }
+        iter->ret = ret;
+        error_propagate(&iter->err, *local_err);
+    } else {
+        error_free(*local_err);
+    }
+
+    *local_err = NULL;
+}
+
+/* NBD_FOREACH_REPLY_CHUNK
+ */
+#define NBD_FOREACH_REPLY_CHUNK(s, iter, handle, structured, \
+                                qiov, reply, payload) \
+    for (iter = (NBDReplyChunkIter) { .only_structured = structured }; \
+         nbd_reply_chunk_iter_receive(s, &iter, handle, qiov, reply, payload);)
+
+/* nbd_reply_chunk_iter_receive
+ */
+static bool nbd_reply_chunk_iter_receive(NBDClientSession *s,
+                                         NBDReplyChunkIter *iter,
+                                         uint64_t handle,
+                                         QEMUIOVector *qiov, NBDReply *reply,
+                                         void **payload)
+{
+    int ret;
+    NBDReply local_reply;
+    NBDStructuredReplyChunk *chunk;
+    Error *local_err = NULL;
+    if (s->quit) {
+        error_setg(&local_err, "Connection closed");
+        nbd_iter_error(iter, true, -EIO, &local_err);
+        goto break_loop;
+    }
+
+    if (iter->done) {
+        /* Previous iteration was last. */
+        goto break_loop;
+    }
+
+    if (reply == NULL) {
+        reply = &local_reply;
+    }
+
+    ret = nbd_co_receive_one_chunk(s, handle, iter->only_structured,
+                                   qiov, reply, payload, &local_err);
+    if (ret < 0) {
+        /* If it is a fatal error s->quit is set by nbd_co_receive_one_chunk */
+        nbd_iter_error(iter, s->quit, ret, &local_err);
+    }
+
+    /* Do not execute the body of NBD_FOREACH_REPLY_CHUNK for simple reply. */
+    if (nbd_reply_is_simple(&s->reply) || s->quit) {
+        goto break_loop;
+    }
+
+    chunk = &reply->structured;
+    iter->only_structured = true;
+
+    if (chunk->type == NBD_REPLY_TYPE_NONE) {
+        /* NBD_REPLY_FLAG_DONE is already checked in nbd_co_receive_one_chunk */
+        assert(chunk->flags & NBD_REPLY_FLAG_DONE);
+        goto break_loop;
+    }
+
+    if (chunk->flags & NBD_REPLY_FLAG_DONE) {
+        /* This iteration is last. */
+        iter->done = true;
+    }
+
+    /* Execute the loop body */
+    return true;
+
+break_loop:
+    s->requests[HANDLE_TO_INDEX(s, handle)].coroutine = NULL;
+
     qemu_co_mutex_lock(&s->send_mutex);
     s->in_flight--;
     qemu_co_queue_next(&s->free_sema);
     qemu_co_mutex_unlock(&s->send_mutex);
 
-    return ret;
+    return false;
 }
 
-static int nbd_co_request(BlockDriverState *bs,
-                          NBDRequest *request,
-                          QEMUIOVector *qiov)
+static int nbd_co_receive_return_code(NBDClientSession *s, uint64_t handle,
+                                      Error **errp)
+{
+    NBDReplyChunkIter iter;
+
+    NBD_FOREACH_REPLY_CHUNK(s, iter, handle, false, NULL, NULL, NULL) {
+        /* nbd_reply_chunk_iter_receive does all the work */
+    }
+
+    error_propagate(errp, iter.err);
+    return iter.ret;
+}
+
+static int nbd_co_receive_cmdread_reply(NBDClientSession *s, uint64_t handle,
+                                        uint64_t offset, QEMUIOVector *qiov,
+                                        Error **errp)
+{
+    NBDReplyChunkIter iter;
+    NBDReply reply;
+    void *payload = NULL;
+    Error *local_err = NULL;
+
+    NBD_FOREACH_REPLY_CHUNK(s, iter, handle, s->info.structured_reply,
+                            qiov, &reply, &payload)
+    {
+        int ret;
+        NBDStructuredReplyChunk *chunk = &reply.structured;
+
+        assert(nbd_reply_is_structured(&reply));
+
+        switch (chunk->type) {
+        case NBD_REPLY_TYPE_OFFSET_DATA:
+            /* special cased in nbd_co_receive_one_chunk, data is already
+             * in qiov */
+            break;
+        case NBD_REPLY_TYPE_OFFSET_HOLE:
+            ret = nbd_parse_offset_hole_payload(&reply.structured, payload,
+                                                offset, qiov, &local_err);
+            if (ret < 0) {
+                s->quit = true;
+                nbd_iter_error(&iter, true, ret, &local_err);
+            }
+            break;
+        default:
+            if (!nbd_reply_type_is_error(chunk->type)) {
+                /* not allowed reply type */
+                s->quit = true;
+                error_setg(&local_err,
+                           "Unexpected reply type: %d (%s) for CMD_READ",
+                           chunk->type, nbd_reply_type_lookup(chunk->type));
+                nbd_iter_error(&iter, true, -EINVAL, &local_err);
+            }
+        }
+
+        g_free(payload);
+        payload = NULL;
+    }
+
+    error_propagate(errp, iter.err);
+    return iter.ret;
+}
+
+static int nbd_co_request(BlockDriverState *bs, NBDRequest *request,
+                          QEMUIOVector *write_qiov)
 {
-    NBDClientSession *client = nbd_get_client_session(bs);
     int ret;
+    Error *local_err = NULL;
+    NBDClientSession *client = nbd_get_client_session(bs);
 
-    if (qiov) {
-        assert(request->type == NBD_CMD_WRITE || request->type == NBD_CMD_READ);
-        assert(request->len == iov_size(qiov->iov, qiov->niov));
+    assert(request->type != NBD_CMD_READ);
+    if (write_qiov) {
+        assert(request->type == NBD_CMD_WRITE);
+        assert(request->len == iov_size(write_qiov->iov, write_qiov->niov));
     } else {
-        assert(request->type != NBD_CMD_WRITE && request->type != NBD_CMD_READ);
+        assert(request->type != NBD_CMD_WRITE);
     }
-    ret = nbd_co_send_request(bs, request,
-                              request->type == NBD_CMD_WRITE ? qiov : NULL);
+    ret = nbd_co_send_request(bs, request, write_qiov);
     if (ret < 0) {
         return ret;
     }
 
-    return nbd_co_receive_reply(client, request->handle,
-                                request->type == NBD_CMD_READ ? qiov : NULL);
+    ret = nbd_co_receive_return_code(client, request->handle, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+    }
+    return ret;
 }
 
 int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
                          uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
+    int ret;
+    Error *local_err = NULL;
+    NBDClientSession *client = nbd_get_client_session(bs);
     NBDRequest request = {
         .type = NBD_CMD_READ,
         .from = offset,
@@ -257,7 +674,17 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
     assert(bytes <= NBD_MAX_BUFFER_SIZE);
     assert(!flags);
 
-    return nbd_co_request(bs, &request, qiov);
+    ret = nbd_co_send_request(bs, &request, NULL);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = nbd_co_receive_cmdread_reply(client, request.handle, offset, qiov,
+                                       &local_err);
+    if (ret < 0) {
+        error_report_err(local_err);
+    }
+    return ret;
 }
 
 int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
@@ -379,6 +806,7 @@ int nbd_client_init(BlockDriverState *bs,
     qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);
 
     client->info.request_sizes = true;
+    client->info.structured_reply = true;
     ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export,
                                 tlscreds, hostname,
                                 &client->ioc, &client->info, errp);
diff --git a/block/nbd-client.h b/block/nbd-client.h
index b435754b82..612c4c21a0 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -19,6 +19,7 @@
 
 typedef struct {
     Coroutine *coroutine;
+    uint64_t offset;        /* original offset of the request */
     bool receiving;         /* waiting for read_reply_co? */
 } NBDClientRequest;
 
diff --git a/include/block/nbd.h b/include/block/nbd.h
index a6df5ce8b5..92d1723d7c 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -57,18 +57,48 @@ struct NBDRequest {
 };
 typedef struct NBDRequest NBDRequest;
 
-struct NBDReply {
-    uint64_t handle;
-    uint32_t error;
-};
-typedef struct NBDReply NBDReply;
-
 typedef struct NBDSimpleReply {
     uint32_t magic;  /* NBD_SIMPLE_REPLY_MAGIC */
     uint32_t error;
     uint64_t handle;
 } QEMU_PACKED NBDSimpleReply;
 
+/* Header of all structured replies */
+typedef struct NBDStructuredReplyChunk {
+    uint32_t magic;  /* NBD_STRUCTURED_REPLY_MAGIC */
+    uint16_t flags;  /* combination of NBD_REPLY_FLAG_* */
+    uint16_t type;   /* NBD_REPLY_TYPE_* */
+    uint64_t handle; /* request handle */
+    uint32_t length; /* length of payload */
+} QEMU_PACKED NBDStructuredReplyChunk;
+
+typedef union NBDReply {
+    NBDSimpleReply simple;
+    NBDStructuredReplyChunk structured;
+    struct {
+        /* @magic and @handle fields have the same offset and size both in
+         * simple reply and structured reply chunk, so let them be accessible
+         * without ".simple." or ".structured." specification
+         */
+        uint32_t magic;
+        uint32_t _skip;
+        uint64_t handle;
+    } QEMU_PACKED;
+} NBDReply;
+
+/* Header of NBD_REPLY_TYPE_OFFSET_DATA, complete NBD_REPLY_TYPE_OFFSET_HOLE */
+typedef struct NBDStructuredRead {
+    NBDStructuredReplyChunk h;
+    uint64_t offset;
+} QEMU_PACKED NBDStructuredRead;
+
+/* Header of all NBD_REPLY_TYPE_ERROR* errors */
+typedef struct NBDStructuredError {
+    NBDStructuredReplyChunk h;
+    uint32_t error;
+    uint16_t message_length;
+} QEMU_PACKED NBDStructuredError;
+
 /* Transmission (export) flags: sent from server to client during handshake,
    but describe what will happen during transmission */
 #define NBD_FLAG_HAS_FLAGS         (1 << 0) /* Flags are there */
@@ -79,6 +109,7 @@ typedef struct NBDSimpleReply {
                                                rotational media */
 #define NBD_FLAG_SEND_TRIM         (1 << 5) /* Send TRIM (discard) */
 #define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* Send WRITE_ZEROES */
+#define NBD_FLAG_SEND_DF           (1 << 7) /* Send DF (Do not Fragment) */
 
 /* New-style handshake (global) flags, sent from server to client, and
    control what will happen during handshake phase. */
@@ -125,6 +156,7 @@ typedef struct NBDSimpleReply {
 /* Request flags, sent from client to server during transmission phase */
 #define NBD_CMD_FLAG_FUA        (1 << 0) /* 'force unit access' during write */
 #define NBD_CMD_FLAG_NO_HOLE    (1 << 1) /* don't punch hole on zero run */
+#define NBD_CMD_FLAG_DF         (1 << 2) /* don't fragment structured read */
 
 /* Supported request types */
 enum {
@@ -149,10 +181,49 @@ enum {
  * aren't overflowing some other buffer. */
 #define NBD_MAX_NAME_SIZE 256
 
+/* Two types of reply structures */
+#define NBD_SIMPLE_REPLY_MAGIC      0x67446698
+#define NBD_STRUCTURED_REPLY_MAGIC  0x668e33ef
+
+/* Structured reply flags */
+#define NBD_REPLY_FLAG_DONE          (1 << 0) /* This reply-chunk is last */
+
+/* Structured reply types */
+#define NBD_REPLY_ERR(value)         ((1 << 15) | (value))
+
+#define NBD_REPLY_TYPE_NONE          0
+#define NBD_REPLY_TYPE_OFFSET_DATA   1
+#define NBD_REPLY_TYPE_OFFSET_HOLE   2
+#define NBD_REPLY_TYPE_ERROR         NBD_REPLY_ERR(1)
+#define NBD_REPLY_TYPE_ERROR_OFFSET  NBD_REPLY_ERR(2)
+
+static inline bool nbd_reply_type_is_error(int type)
+{
+    return type & (1 << 15);
+}
+
+/* NBD errors are based on errno numbers, so there is a 1:1 mapping,
+ * but only a limited set of errno values is specified in the protocol.
+ * Everything else is squashed to EINVAL.
+ */
+#define NBD_SUCCESS    0
+#define NBD_EPERM      1
+#define NBD_EIO        5
+#define NBD_ENOMEM     12
+#define NBD_EINVAL     22
+#define NBD_ENOSPC     28
+#define NBD_EOVERFLOW  75
+#define NBD_ESHUTDOWN  108
+
 /* Details collected by NBD_OPT_EXPORT_NAME and NBD_OPT_GO */
 struct NBDExportInfo {
     /* Set by client before nbd_receive_negotiate() */
     bool request_sizes;
+
+    /* In-out fields, set by client before nbd_receive_negotiate() and
+     * updated by server results during nbd_receive_negotiate() */
+    bool structured_reply;
+
     /* Set by server results during nbd_receive_negotiate() */
     uint64_t size;
     uint16_t flags;
@@ -172,6 +243,7 @@ int nbd_send_request(QIOChannel *ioc, NBDRequest *request);
 int nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp);
 int nbd_client(int fd);
 int nbd_disconnect(int fd);
+int nbd_errno_to_system_errno(int err);
 
 typedef struct NBDExport NBDExport;
 typedef struct NBDClient NBDClient;
@@ -202,4 +274,26 @@ void nbd_client_put(NBDClient *client);
 void nbd_server_start(SocketAddress *addr, const char *tls_creds,
                       Error **errp);
 
+
+/* nbd_read
+ * Reads @size bytes from @ioc. Returns 0 on success.
+ */
+static inline int nbd_read(QIOChannel *ioc, void *buffer, size_t size,
+                           Error **errp)
+{
+    return qio_channel_read_all(ioc, buffer, size, errp) < 0 ? -EIO : 0;
+}
+
+static inline bool nbd_reply_is_simple(NBDReply *reply)
+{
+    return reply->magic == NBD_SIMPLE_REPLY_MAGIC;
+}
+
+static inline bool nbd_reply_is_structured(NBDReply *reply)
+{
+    return reply->magic == NBD_STRUCTURED_REPLY_MAGIC;
+}
+
+const char *nbd_reply_type_lookup(uint16_t type);
+
 #endif
diff --git a/nbd/client.c b/nbd/client.c
index cd5a2c80ac..3d680e63e1 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -22,38 +22,6 @@
 #include "trace.h"
 #include "nbd-internal.h"
 
-static int nbd_errno_to_system_errno(int err)
-{
-    int ret;
-    switch (err) {
-    case NBD_SUCCESS:
-        ret = 0;
-        break;
-    case NBD_EPERM:
-        ret = EPERM;
-        break;
-    case NBD_EIO:
-        ret = EIO;
-        break;
-    case NBD_ENOMEM:
-        ret = ENOMEM;
-        break;
-    case NBD_ENOSPC:
-        ret = ENOSPC;
-        break;
-    case NBD_ESHUTDOWN:
-        ret = ESHUTDOWN;
-        break;
-    default:
-        trace_nbd_unknown_error(err);
-        /* fallthrough */
-    case NBD_EINVAL:
-        ret = EINVAL;
-        break;
-    }
-    return ret;
-}
-
 /* Definitions for opaque data types */
 
 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
@@ -540,35 +508,61 @@ static int nbd_receive_query_exports(QIOChannel *ioc,
     }
 }
 
-static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
-                                        QCryptoTLSCreds *tlscreds,
-                                        const char *hostname, Error **errp)
+/* nbd_request_simple_option: Send an option request, and parse the reply
+ * return 1 for successful negotiation,
+ *        0 if operation is unsupported,
+ *        -1 with errp set for any other error
+ */
+static int nbd_request_simple_option(QIOChannel *ioc, int opt, Error **errp)
 {
     nbd_opt_reply reply;
-    QIOChannelTLS *tioc;
-    struct NBDTLSHandshakeData data = { 0 };
+    int error;
 
-    trace_nbd_receive_starttls_request();
-    if (nbd_send_option_request(ioc, NBD_OPT_STARTTLS, 0, NULL, errp) < 0) {
-        return NULL;
+    if (nbd_send_option_request(ioc, opt, 0, NULL, errp) < 0) {
+        return -1;
     }
 
-    trace_nbd_receive_starttls_reply();
-    if (nbd_receive_option_reply(ioc, NBD_OPT_STARTTLS, &reply, errp) < 0) {
-        return NULL;
+    if (nbd_receive_option_reply(ioc, opt, &reply, errp) < 0) {
+        return -1;
+    }
+    error = nbd_handle_reply_err(ioc, &reply, errp);
+    if (error <= 0) {
+        return error;
     }
 
     if (reply.type != NBD_REP_ACK) {
-        error_setg(errp, "Server rejected request to start TLS %" PRIx32,
-                   reply.type);
+        error_setg(errp, "Server answered option %d (%s) with unexpected "
+                   "reply %" PRIx32 " (%s)", opt, nbd_opt_lookup(opt),
+                   reply.type, nbd_rep_lookup(reply.type));
         nbd_send_opt_abort(ioc);
-        return NULL;
+        return -1;
     }
 
     if (reply.length != 0) {
-        error_setg(errp, "Start TLS response was not zero %" PRIu32,
+        error_setg(errp, "Option %d ('%s') response length is %" PRIu32
+                   " (it should be zero)", opt, nbd_opt_lookup(opt),
                    reply.length);
         nbd_send_opt_abort(ioc);
+        return -1;
+    }
+
+    return 1;
+}
+
+static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
+                                        QCryptoTLSCreds *tlscreds,
+                                        const char *hostname, Error **errp)
+{
+    int ret;
+    QIOChannelTLS *tioc;
+    struct NBDTLSHandshakeData data = { 0 };
+
+    ret = nbd_request_simple_option(ioc, NBD_OPT_STARTTLS, errp);
+    if (ret <= 0) {
+        if (ret == 0) {
+            error_setg(errp, "Server don't support STARTTLS option");
+            nbd_send_opt_abort(ioc);
+        }
         return NULL;
     }
 
@@ -608,9 +602,11 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name,
     uint64_t magic;
     int rc;
     bool zeroes = true;
+    bool structured_reply = info->structured_reply;
 
     trace_nbd_receive_negotiate(tlscreds, hostname ? hostname : "<null>");
 
+    info->structured_reply = false;
     rc = -EINVAL;
 
     if (outioc) {
@@ -691,6 +687,16 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name,
         if (fixedNewStyle) {
             int result;
 
+            if (structured_reply) {
+                result = nbd_request_simple_option(ioc,
+                                                   NBD_OPT_STRUCTURED_REPLY,
+                                                   errp);
+                if (result < 0) {
+                    goto fail;
+                }
+                info->structured_reply = result == 1;
+            }
+
             /* Try NBD_OPT_GO first - if it works, we are done (it
              * also gives us a good message if the server requires
              * TLS).  If it is not available, fall back to
@@ -914,6 +920,57 @@ int nbd_send_request(QIOChannel *ioc, NBDRequest *request)
     return nbd_write(ioc, buf, sizeof(buf), NULL);
 }
 
+/* nbd_receive_simple_reply
+ * Read simple reply except magic field (which should be already read).
+ * Payload is not read (payload is possible for CMD_READ, but here we even
+ * don't know whether it take place or not).
+ */
+static int nbd_receive_simple_reply(QIOChannel *ioc, NBDSimpleReply *reply,
+                                    Error **errp)
+{
+    int ret;
+
+    assert(reply->magic == NBD_SIMPLE_REPLY_MAGIC);
+
+    ret = nbd_read(ioc, (uint8_t *)reply + sizeof(reply->magic),
+                   sizeof(*reply) - sizeof(reply->magic), errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    be32_to_cpus(&reply->error);
+    be64_to_cpus(&reply->handle);
+
+    return 0;
+}
+
+/* nbd_receive_structured_reply_chunk
+ * Read structured reply chunk except magic field (which should be already
+ * read).
+ * Payload is not read.
+ */
+static int nbd_receive_structured_reply_chunk(QIOChannel *ioc,
+                                              NBDStructuredReplyChunk *chunk,
+                                              Error **errp)
+{
+    int ret;
+
+    assert(chunk->magic == NBD_STRUCTURED_REPLY_MAGIC);
+
+    ret = nbd_read(ioc, (uint8_t *)chunk + sizeof(chunk->magic),
+                   sizeof(*chunk) - sizeof(chunk->magic), errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    be16_to_cpus(&chunk->flags);
+    be16_to_cpus(&chunk->type);
+    be64_to_cpus(&chunk->handle);
+    be32_to_cpus(&chunk->length);
+
+    return 0;
+}
+
 /* nbd_receive_reply
  * Returns 1 on success
  *         0 on eof, when no data was read (errp is not set)
@@ -921,37 +978,47 @@ int nbd_send_request(QIOChannel *ioc, NBDRequest *request)
  */
 int nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp)
 {
-    uint8_t buf[NBD_REPLY_SIZE];
-    uint32_t magic;
     int ret;
 
-    ret = nbd_read_eof(ioc, buf, sizeof(buf), errp);
+    ret = nbd_read_eof(ioc, &reply->magic, sizeof(reply->magic), errp);
     if (ret <= 0) {
         return ret;
     }
 
-    /* Reply
-       [ 0 ..  3]    magic   (NBD_SIMPLE_REPLY_MAGIC)
-       [ 4 ..  7]    error   (0 == no error)
-       [ 7 .. 15]    handle
-     */
-
-    magic = ldl_be_p(buf);
-    reply->error  = ldl_be_p(buf + 4);
-    reply->handle = ldq_be_p(buf + 8);
+    be32_to_cpus(&reply->magic);
 
-    reply->error = nbd_errno_to_system_errno(reply->error);
+    switch (reply->magic) {
+    case NBD_SIMPLE_REPLY_MAGIC:
+        ret = nbd_receive_simple_reply(ioc, &reply->simple, errp);
+        if (ret < 0) {
+            break;
+        }
 
-    if (reply->error == ESHUTDOWN) {
-        /* This works even on mingw which lacks a native ESHUTDOWN */
-        error_setg(errp, "server shutting down");
+        trace_nbd_receive_simple_reply(reply->simple.error,
+                                       nbd_err_lookup(reply->simple.error),
+                                       reply->handle);
+        if (reply->simple.error == NBD_ESHUTDOWN) {
+            /* This works even on mingw which lacks a native ESHUTDOWN */
+            error_setg(errp, "server shutting down");
+            return -EINVAL;
+        }
+        break;
+    case NBD_STRUCTURED_REPLY_MAGIC:
+        ret = nbd_receive_structured_reply_chunk(ioc, &reply->structured, errp);
+        if (ret < 0) {
+            break;
+        }
+        trace_nbd_receive_structured_reply_chunk(reply->structured.flags,
+                                                 reply->structured.type,
+                                                 reply->structured.handle,
+                                                 reply->structured.length);
+        break;
+    default:
+        error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", reply->magic);
         return -EINVAL;
     }
-    trace_nbd_receive_reply(magic, reply->error, reply->handle);
-
-    if (magic != NBD_SIMPLE_REPLY_MAGIC) {
-        error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic);
-        return -EINVAL;
+    if (ret < 0) {
+        return ret;
     }
 
     return 1;
diff --git a/nbd/common.c b/nbd/common.c
index 59a5316be9..6047d71748 100644
--- a/nbd/common.c
+++ b/nbd/common.c
@@ -18,6 +18,7 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
+#include "trace.h"
 #include "nbd-internal.h"
 
 /* Discard length bytes from channel.  Return -errno on failure and 0 on
@@ -148,3 +149,86 @@ const char *nbd_cmd_lookup(uint16_t cmd)
         return "<unknown>";
     }
 }
+
+
+const char *nbd_reply_type_lookup(uint16_t type)
+{
+    switch (type) {
+    case NBD_REPLY_TYPE_NONE:
+        return "none";
+    case NBD_REPLY_TYPE_OFFSET_DATA:
+        return "data";
+    case NBD_REPLY_TYPE_OFFSET_HOLE:
+        return "hole";
+    case NBD_REPLY_TYPE_ERROR:
+        return "generic error";
+    case NBD_REPLY_TYPE_ERROR_OFFSET:
+        return "error at offset";
+    default:
+        if (type & (1 << 15)) {
+            return "<unknown error>";
+        }
+        return "<unknown>";
+    }
+}
+
+
+const char *nbd_err_lookup(int err)
+{
+    switch (err) {
+    case NBD_SUCCESS:
+        return "success";
+    case NBD_EPERM:
+        return "EPERM";
+    case NBD_EIO:
+        return "EIO";
+    case NBD_ENOMEM:
+        return "ENOMEM";
+    case NBD_EINVAL:
+        return "EINVAL";
+    case NBD_ENOSPC:
+        return "ENOSPC";
+    case NBD_EOVERFLOW:
+        return "EOVERFLOW";
+    case NBD_ESHUTDOWN:
+        return "ESHUTDOWN";
+    default:
+        return "<unknown>";
+    }
+}
+
+
+int nbd_errno_to_system_errno(int err)
+{
+    int ret;
+    switch (err) {
+    case NBD_SUCCESS:
+        ret = 0;
+        break;
+    case NBD_EPERM:
+        ret = EPERM;
+        break;
+    case NBD_EIO:
+        ret = EIO;
+        break;
+    case NBD_ENOMEM:
+        ret = ENOMEM;
+        break;
+    case NBD_ENOSPC:
+        ret = ENOSPC;
+        break;
+    case NBD_EOVERFLOW:
+        ret = EOVERFLOW;
+        break;
+    case NBD_ESHUTDOWN:
+        ret = ESHUTDOWN;
+        break;
+    default:
+        trace_nbd_unknown_error(err);
+        /* fallthrough */
+    case NBD_EINVAL:
+        ret = EINVAL;
+        break;
+    }
+    return ret;
+}
diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index 11a130d050..eeff78d3c9 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -47,7 +47,6 @@
 #define NBD_OLDSTYLE_NEGOTIATE_SIZE (8 + 8 + 8 + 4 + 124)
 
 #define NBD_REQUEST_MAGIC           0x25609513
-#define NBD_SIMPLE_REPLY_MAGIC      0x67446698
 #define NBD_OPTS_MAGIC              0x49484156454F5054LL
 #define NBD_CLIENT_MAGIC            0x0000420281861253LL
 #define NBD_REP_MAGIC               0x0003e889045565a9LL
@@ -64,18 +63,6 @@
 #define NBD_SET_TIMEOUT             _IO(0xab, 9)
 #define NBD_SET_FLAGS               _IO(0xab, 10)
 
-/* NBD errors are based on errno numbers, so there is a 1:1 mapping,
- * but only a limited set of errno values is specified in the protocol.
- * Everything else is squashed to EINVAL.
- */
-#define NBD_SUCCESS    0
-#define NBD_EPERM      1
-#define NBD_EIO        5
-#define NBD_ENOMEM     12
-#define NBD_EINVAL     22
-#define NBD_ENOSPC     28
-#define NBD_ESHUTDOWN  108
-
 /* nbd_read_eof
  * Tries to read @size bytes from @ioc.
  * Returns 1 on success
@@ -95,15 +82,6 @@ static inline int nbd_read_eof(QIOChannel *ioc, void *buffer, size_t size,
     return ret;
 }
 
-/* nbd_read
- * Reads @size bytes from @ioc. Returns 0 on success.
- */
-static inline int nbd_read(QIOChannel *ioc, void *buffer, size_t size,
-                           Error **errp)
-{
-    return qio_channel_read_all(ioc, buffer, size, errp) < 0 ? -EIO : 0;
-}
-
 /* nbd_write
  * Writes @size bytes to @ioc. Returns 0 on success.
  */
@@ -126,6 +104,7 @@ const char *nbd_opt_lookup(uint32_t opt);
 const char *nbd_rep_lookup(uint32_t rep);
 const char *nbd_info_lookup(uint16_t info);
 const char *nbd_cmd_lookup(uint16_t info);
+const char *nbd_err_lookup(int err);
 
 int nbd_drop(QIOChannel *ioc, size_t size, Error **errp);
 
diff --git a/nbd/server.c b/nbd/server.c
index 3df3548d6d..70b40ed27e 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -40,6 +40,8 @@ static int system_errno_to_nbd_errno(int err)
     case EFBIG:
     case ENOSPC:
         return NBD_ENOSPC;
+    case EOVERFLOW:
+        return NBD_EOVERFLOW;
     case ESHUTDOWN:
         return NBD_ESHUTDOWN;
     case EINVAL:
@@ -98,6 +100,8 @@ struct NBDClient {
     QTAILQ_ENTRY(NBDClient) next;
     int nb_requests;
     bool closing;
+
+    bool structured_reply;
 };
 
 /* That's all folks */
@@ -251,21 +255,10 @@ static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp,
 
 /* Process the NBD_OPT_LIST command, with a potential series of replies.
  * Return -errno on error, 0 on success. */
-static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length,
-                                     Error **errp)
+static int nbd_negotiate_handle_list(NBDClient *client, Error **errp)
 {
     NBDExport *exp;
 
-    if (length) {
-        if (nbd_drop(client->ioc, length, errp) < 0) {
-            return -EIO;
-        }
-        return nbd_negotiate_send_rep_err(client->ioc,
-                                          NBD_REP_ERR_INVALID, NBD_OPT_LIST,
-                                          errp,
-                                          "OPT_LIST should not have length");
-    }
-
     /* For each export, send a NBD_REP_SERVER reply. */
     QTAILQ_FOREACH(exp, &exports, next) {
         if (nbd_negotiate_send_rep_list(client->ioc, exp, errp)) {
@@ -529,7 +522,6 @@ static int nbd_negotiate_handle_info(NBDClient *client, uint32_t length,
 /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the
  * new channel for all further (now-encrypted) communication. */
 static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
-                                                 uint32_t length,
                                                  Error **errp)
 {
     QIOChannel *ioc;
@@ -538,15 +530,6 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
 
     trace_nbd_negotiate_handle_starttls();
     ioc = client->ioc;
-    if (length) {
-        if (nbd_drop(ioc, length, errp) < 0) {
-            return NULL;
-        }
-        nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS,
-                                   errp,
-                                   "OPT_STARTTLS should not have length");
-        return NULL;
-    }
 
     if (nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK,
                                NBD_OPT_STARTTLS, errp) < 0) {
@@ -582,6 +565,34 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
     return QIO_CHANNEL(tioc);
 }
 
+/* nbd_reject_length: Handle any unexpected payload.
+ * @fatal requests that we quit talking to the client, even if we are able
+ * to successfully send an error to the guest.
+ * Return:
+ * -errno  transmission error occurred or @fatal was requested, errp is set
+ * 0       error message successfully sent to client, errp is not set
+ */
+static int nbd_reject_length(NBDClient *client, uint32_t length,
+                             uint32_t option, bool fatal, Error **errp)
+{
+    int ret;
+
+    assert(length);
+    if (nbd_drop(client->ioc, length, errp) < 0) {
+        return -EIO;
+    }
+    ret = nbd_negotiate_send_rep_err(client->ioc, NBD_REP_ERR_INVALID,
+                                     option, errp,
+                                     "option '%s' should have zero length",
+                                     nbd_opt_lookup(option));
+    if (fatal && !ret) {
+        error_setg(errp, "option '%s' should have zero length",
+                   nbd_opt_lookup(option));
+        return -EINVAL;
+    }
+    return ret;
+}
+
 /* nbd_negotiate_options
  * Process all NBD_OPT_* client option commands, during fixed newstyle
  * negotiation.
@@ -672,10 +683,17 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags,
             }
             switch (option) {
             case NBD_OPT_STARTTLS:
-                tioc = nbd_negotiate_handle_starttls(client, length, errp);
+                if (length) {
+                    /* Unconditionally drop the connection if the client
+                     * can't start a TLS negotiation correctly */
+                    return nbd_reject_length(client, length, option, true,
+                                             errp);
+                }
+                tioc = nbd_negotiate_handle_starttls(client, errp);
                 if (!tioc) {
                     return -EIO;
                 }
+                ret = 0;
                 object_unref(OBJECT(client->ioc));
                 client->ioc = QIO_CHANNEL(tioc);
                 break;
@@ -696,9 +714,6 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags,
                                                  "Option 0x%" PRIx32
                                                  "not permitted before TLS",
                                                  option);
-                if (ret < 0) {
-                    return ret;
-                }
                 /* Let the client keep trying, unless they asked to
                  * quit. In this mode, we've already sent an error, so
                  * we can't ack the abort.  */
@@ -710,9 +725,11 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags,
         } else if (fixedNewstyle) {
             switch (option) {
             case NBD_OPT_LIST:
-                ret = nbd_negotiate_handle_list(client, length, errp);
-                if (ret < 0) {
-                    return ret;
+                if (length) {
+                    ret = nbd_reject_length(client, length, option, false,
+                                            errp);
+                } else {
+                    ret = nbd_negotiate_handle_list(client, errp);
                 }
                 break;
 
@@ -736,16 +753,13 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags,
                     assert(option == NBD_OPT_GO);
                     return 0;
                 }
-                if (ret) {
-                    return ret;
-                }
                 break;
 
             case NBD_OPT_STARTTLS:
-                if (nbd_drop(client->ioc, length, errp) < 0) {
-                    return -EIO;
-                }
-                if (client->tlscreds) {
+                if (length) {
+                    ret = nbd_reject_length(client, length, option, false,
+                                            errp);
+                } else if (client->tlscreds) {
                     ret = nbd_negotiate_send_rep_err(client->ioc,
                                                      NBD_REP_ERR_INVALID,
                                                      option, errp,
@@ -756,10 +770,24 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags,
                                                      option, errp,
                                                      "TLS not configured");
                 }
-                if (ret < 0) {
-                    return ret;
+                break;
+
+            case NBD_OPT_STRUCTURED_REPLY:
+                if (length) {
+                    ret = nbd_reject_length(client, length, option, false,
+                                            errp);
+                } else if (client->structured_reply) {
+                    ret = nbd_negotiate_send_rep_err(
+                        client->ioc, NBD_REP_ERR_INVALID, option, errp,
+                        "structured reply already negotiated");
+                } else {
+                    ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK,
+                                                 option, errp);
+                    client->structured_reply = true;
+                    myflags |= NBD_FLAG_SEND_DF;
                 }
                 break;
+
             default:
                 if (nbd_drop(client->ioc, length, errp) < 0) {
                     return -EIO;
@@ -770,9 +798,6 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags,
                                                  "Unsupported option 0x%"
                                                  PRIx32 " (%s)", option,
                                                  nbd_opt_lookup(option));
-                if (ret < 0) {
-                    return ret;
-                }
                 break;
             }
         } else {
@@ -792,6 +817,9 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags,
                 return -EINVAL;
             }
         }
+        if (ret < 0) {
+            return ret;
+        }
     }
 }
 
@@ -1227,12 +1255,68 @@ static int nbd_co_send_simple_reply(NBDClient *client,
         {.iov_base = data, .iov_len = len}
     };
 
-    trace_nbd_co_send_simple_reply(handle, nbd_err, len);
+    trace_nbd_co_send_simple_reply(handle, nbd_err, nbd_err_lookup(nbd_err),
+                                   len);
     set_be_simple_reply(&reply, nbd_err, handle);
 
     return nbd_co_send_iov(client, iov, len ? 2 : 1, errp);
 }
 
+static inline void set_be_chunk(NBDStructuredReplyChunk *chunk, uint16_t flags,
+                                uint16_t type, uint64_t handle, uint32_t length)
+{
+    stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC);
+    stw_be_p(&chunk->flags, flags);
+    stw_be_p(&chunk->type, type);
+    stq_be_p(&chunk->handle, handle);
+    stl_be_p(&chunk->length, length);
+}
+
+static int coroutine_fn nbd_co_send_structured_read(NBDClient *client,
+                                                    uint64_t handle,
+                                                    uint64_t offset,
+                                                    void *data,
+                                                    size_t size,
+                                                    Error **errp)
+{
+    NBDStructuredRead chunk;
+    struct iovec iov[] = {
+        {.iov_base = &chunk, .iov_len = sizeof(chunk)},
+        {.iov_base = data, .iov_len = size}
+    };
+
+    trace_nbd_co_send_structured_read(handle, offset, data, size);
+    set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_OFFSET_DATA,
+                 handle, sizeof(chunk) - sizeof(chunk.h) + size);
+    stq_be_p(&chunk.offset, offset);
+
+    return nbd_co_send_iov(client, iov, 2, errp);
+}
+
+static int coroutine_fn nbd_co_send_structured_error(NBDClient *client,
+                                                     uint64_t handle,
+                                                     uint32_t error,
+                                                     const char *msg,
+                                                     Error **errp)
+{
+    NBDStructuredError chunk;
+    int nbd_err = system_errno_to_nbd_errno(error);
+    struct iovec iov[] = {
+        {.iov_base = &chunk, .iov_len = sizeof(chunk)},
+        {.iov_base = (char *)msg, .iov_len = msg ? strlen(msg) : 0},
+    };
+
+    assert(nbd_err);
+    trace_nbd_co_send_structured_error(handle, nbd_err,
+                                       nbd_err_lookup(nbd_err), msg ? msg : "");
+    set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_ERROR, handle,
+                 sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
+    stl_be_p(&chunk.error, nbd_err);
+    stw_be_p(&chunk.message_length, iov[1].iov_len);
+
+    return nbd_co_send_iov(client, iov, 1 + !!iov[1].iov_len, errp);
+}
+
 /* nbd_co_receive_request
  * Collect a client request. Return 0 if request looks valid, -EIO to drop
  * connection right away, and any other negative value to report an error to
@@ -1243,6 +1327,7 @@ static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
                                   Error **errp)
 {
     NBDClient *client = req->client;
+    int valid_flags;
 
     g_assert(qemu_in_coroutine());
     assert(client->recv_coroutine == qemu_coroutine_self());
@@ -1304,13 +1389,15 @@ static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
                    (uint64_t)client->exp->size);
         return request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL;
     }
-    if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
-        error_setg(errp, "unsupported flags (got 0x%x)", request->flags);
-        return -EINVAL;
+    valid_flags = NBD_CMD_FLAG_FUA;
+    if (request->type == NBD_CMD_READ && client->structured_reply) {
+        valid_flags |= NBD_CMD_FLAG_DF;
+    } else if (request->type == NBD_CMD_WRITE_ZEROES) {
+        valid_flags |= NBD_CMD_FLAG_NO_HOLE;
     }
-    if (request->type != NBD_CMD_WRITE_ZEROES &&
-        (request->flags & NBD_CMD_FLAG_NO_HOLE)) {
-        error_setg(errp, "unexpected flags (got 0x%x)", request->flags);
+    if (request->flags & ~valid_flags) {
+        error_setg(errp, "unsupported flags for command %s (got 0x%x)",
+                   nbd_cmd_lookup(request->type), request->flags);
         return -EINVAL;
     }
 
@@ -1328,6 +1415,7 @@ static coroutine_fn void nbd_trip(void *opaque)
     int flags;
     int reply_data_len = 0;
     Error *local_err = NULL;
+    char *msg = NULL;
 
     trace_nbd_trip();
     if (client->closing) {
@@ -1378,6 +1466,7 @@ static coroutine_fn void nbd_trip(void *opaque)
         break;
     case NBD_CMD_WRITE:
         if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
+            error_setg(&local_err, "Export is read-only");
             ret = -EROFS;
             break;
         }
@@ -1395,7 +1484,7 @@ static coroutine_fn void nbd_trip(void *opaque)
         break;
     case NBD_CMD_WRITE_ZEROES:
         if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
-            error_setg(&local_err, "Server is read-only, return error");
+            error_setg(&local_err, "Export is read-only");
             ret = -EROFS;
             break;
         }
@@ -1443,14 +1532,29 @@ reply:
     if (local_err) {
         /* If we get here, local_err was not a fatal error, and should be sent
          * to the client. */
+        assert(ret < 0);
+        msg = g_strdup(error_get_pretty(local_err));
         error_report_err(local_err);
         local_err = NULL;
     }
 
-    if (nbd_co_send_simple_reply(req->client, request.handle,
-                                 ret < 0 ? -ret : 0,
-                                 req->data, reply_data_len, &local_err) < 0)
-    {
+    if (client->structured_reply &&
+        (ret < 0 || request.type == NBD_CMD_READ)) {
+        if (ret < 0) {
+            ret = nbd_co_send_structured_error(req->client, request.handle,
+                                               -ret, msg, &local_err);
+        } else {
+            ret = nbd_co_send_structured_read(req->client, request.handle,
+                                              request.from, req->data,
+                                              reply_data_len, &local_err);
+        }
+    } else {
+        ret = nbd_co_send_simple_reply(req->client, request.handle,
+                                       ret < 0 ? -ret : 0,
+                                       req->data, reply_data_len, &local_err);
+    }
+    g_free(msg);
+    if (ret < 0) {
         error_prepend(&local_err, "Failed to send reply: ");
         goto disconnect;
     }
diff --git a/nbd/trace-events b/nbd/trace-events
index e27614f050..4a13757524 100644
--- a/nbd/trace-events
+++ b/nbd/trace-events
@@ -1,5 +1,4 @@
 # nbd/client.c
-nbd_unknown_error(int err) "Squashing unexpected error %d to EINVAL"
 nbd_send_option_request(uint32_t opt, const char *name, uint32_t len) "Sending option request %" PRIu32" (%s), len %" PRIu32
 nbd_receive_option_reply(uint32_t option, const char *optname, uint32_t type, const char *typename, uint32_t length) "Received option reply 0x%" PRIx32" (%s), type 0x%" PRIx32" (%s), len %" PRIu32
 nbd_reply_err_unsup(uint32_t option, const char *name) "server doesn't understand request 0x%" PRIx32 " (%s), attempting fallback"
@@ -9,9 +8,7 @@ nbd_opt_go_info_unknown(int info, const char *name) "Ignoring unknown info %d (%
 nbd_opt_go_info_block_size(uint32_t minimum, uint32_t preferred, uint32_t maximum) "Block sizes are 0x%" PRIx32 ", 0x%" PRIx32 ", 0x%" PRIx32
 nbd_receive_query_exports_start(const char *wantname) "Querying export list for '%s'"
 nbd_receive_query_exports_success(const char *wantname) "Found desired export name '%s'"
-nbd_receive_starttls_request(void) "Requesting TLS from server"
-nbd_receive_starttls_reply(void) "Getting TLS reply from server"
-nbd_receive_starttls_new_client(void) "TLS request approved, setting up TLS"
+nbd_receive_starttls_new_client(void) "Setting up TLS"
 nbd_receive_starttls_tls_handshake(void) "Starting TLS handshake"
 nbd_receive_negotiate(void *tlscreds, const char *hostname) "Receiving negotiation tlscreds=%p hostname=%s"
 nbd_receive_negotiate_magic(uint64_t magic) "Magic is 0x%" PRIx64
@@ -29,7 +26,11 @@ nbd_client_loop_ret(int ret, const char *error) "NBD loop returned %d: %s"
 nbd_client_clear_queue(void) "Clearing NBD queue"
 nbd_client_clear_socket(void) "Clearing NBD socket"
 nbd_send_request(uint64_t from, uint32_t len, uint64_t handle, uint16_t flags, uint16_t type, const char *name) "Sending request to server: { .from = %" PRIu64", .len = %" PRIu32 ", .handle = %" PRIu64 ", .flags = 0x%" PRIx16 ", .type = %" PRIu16 " (%s) }"
-nbd_receive_reply(uint32_t magic, int32_t error, uint64_t handle) "Got reply: { magic = 0x%" PRIx32 ", .error = % " PRId32 ", handle = %" PRIu64" }"
+nbd_receive_simple_reply(int32_t error, const char *errname, uint64_t handle) "Got simple reply: { .error = %" PRId32 " (%s), handle = %" PRIu64" }"
+nbd_receive_structured_reply_chunk(uint16_t flags, uint16_t type, uint64_t handle, uint32_t length) "Got structured reply chunk: { flags = 0x%" PRIx16 ", type = %d, handle = %" PRIu64 ", length = %" PRIu32 " }"
+
+# nbd/common.c
+nbd_unknown_error(int err) "Squashing unexpected error %d to EINVAL"
 
 # nbd/server.c
 nbd_negotiate_send_rep_len(uint32_t opt, const char *optname, uint32_t type, const char *typename, uint32_t len) "Reply opt=0x%" PRIx32 " (%s), type=0x%" PRIx32 " (%s), len=%" PRIu32
@@ -53,7 +54,9 @@ nbd_negotiate_success(void) "Negotiation succeeded"
 nbd_receive_request(uint32_t magic, uint16_t flags, uint16_t type, uint64_t from, uint32_t len) "Got request: { magic = 0x%" PRIx32 ", .flags = 0x%" PRIx16 ", .type = 0x%" PRIx16 ", from = %" PRIu64 ", len = %" PRIu32 " }"
 nbd_blk_aio_attached(const char *name, void *ctx) "Export %s: Attaching clients to AIO context %p\n"
 nbd_blk_aio_detach(const char *name, void *ctx) "Export %s: Detaching clients from AIO context %p\n"
-nbd_co_send_simple_reply(uint64_t handle, uint32_t error, int len) "Send simple reply: handle = %" PRIu64 ", error = %" PRIu32 ", len = %d"
+nbd_co_send_simple_reply(uint64_t handle, uint32_t error, const char *errname, int len) "Send simple reply: handle = %" PRIu64 ", error = %" PRIu32 " (%s), len = %d"
+nbd_co_send_structured_read(uint64_t handle, uint64_t offset, void *data, size_t size) "Send structured read data reply: handle = %" PRIu64 ", offset = %" PRIu64 ", data = %p, len = %zu"
+nbd_co_send_structured_error(uint64_t handle, int err, const char *errname, const char *msg) "Send structured error reply: handle = %" PRIu64 ", error = %d (%s), msg = '%s'"
 nbd_co_receive_request_decode_type(uint64_t handle, uint16_t type, const char *name) "Decoding type: handle = %" PRIu64 ", type = %" PRIu16 " (%s)"
 nbd_co_receive_request_payload_received(uint64_t handle, uint32_t len) "Payload received: handle = %" PRIu64 ", len = %" PRIu32
 nbd_co_receive_request_cmd_write(uint32_t len) "Reading %" PRIu32 " byte(s)"
diff --git a/tests/qemu-iotests/083.out b/tests/qemu-iotests/083.out
index 25dde519e3..be6079d27e 100644
--- a/tests/qemu-iotests/083.out
+++ b/tests/qemu-iotests/083.out
@@ -41,6 +41,7 @@ can't open device nbd+tcp://127.0.0.1:PORT/foo
 
 === Check disconnect after neg2 ===
 
+Connection closed
 read failed: Input/output error
 
 === Check disconnect 8 neg2 ===
@@ -53,32 +54,39 @@ can't open device nbd+tcp://127.0.0.1:PORT/foo
 
 === Check disconnect before request ===
 
+Connection closed
 read failed: Input/output error
 
 === Check disconnect after request ===
 
+Connection closed
 read failed: Input/output error
 
 === Check disconnect before reply ===
 
+Connection closed
 read failed: Input/output error
 
 === Check disconnect after reply ===
 
+Unexpected end-of-file before all bytes were read
 read failed: Input/output error
 
 === Check disconnect 4 reply ===
 
 Unexpected end-of-file before all bytes were read
+Connection closed
 read failed: Input/output error
 
 === Check disconnect 8 reply ===
 
 Unexpected end-of-file before all bytes were read
+Connection closed
 read failed: Input/output error
 
 === Check disconnect before data ===
 
+Unexpected end-of-file before all bytes were read
 read failed: Input/output error
 
 === Check disconnect after data ===
@@ -108,6 +116,7 @@ can't open device nbd+tcp://127.0.0.1:PORT/
 
 === Check disconnect after neg-classic ===
 
+Connection closed
 read failed: Input/output error
 
 === Check disconnect before neg1 ===
@@ -168,28 +177,34 @@ read failed: Input/output error
 
 === Check disconnect after request ===
 
+Connection closed
 read failed: Input/output error
 
 === Check disconnect before reply ===
 
+Connection closed
 read failed: Input/output error
 
 === Check disconnect after reply ===
 
+Unexpected end-of-file before all bytes were read
 read failed: Input/output error
 
 === Check disconnect 4 reply ===
 
 Unexpected end-of-file before all bytes were read
+Connection closed
 read failed: Input/output error
 
 === Check disconnect 8 reply ===
 
 Unexpected end-of-file before all bytes were read
+Connection closed
 read failed: Input/output error
 
 === Check disconnect before data ===
 
+Unexpected end-of-file before all bytes were read
 read failed: Input/output error
 
 === Check disconnect after data ===