summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2019-02-05 12:46:18 +0000
committerPeter Maydell <peter.maydell@linaro.org>2019-02-05 12:46:18 +0000
commit1c3d45df5e94042d5fb2bb31416072563ab30e49 (patch)
treeb93bbcb015f1a7e2b2cc14d26930536b0559faa1
parent9ae805637a9cfab2edc15f56fbc3219815c8118e (diff)
parentbc5a03350c220698229e7d6929dd242d5d358345 (diff)
downloadfocaccia-qemu-1c3d45df5e94042d5fb2bb31416072563ab30e49.tar.gz
focaccia-qemu-1c3d45df5e94042d5fb2bb31416072563ab30e49.zip
Merge remote-tracking branch 'remotes/ericb/tags/pull-nbd-2019-02-04' into staging
nbd patches for 2019-02-04

- deprecate 'qemu-nbd --partition'
- preparation for NBD reconnect, including better logging of read errors

# gpg: Signature made Tue 05 Feb 2019 03:50:56 GMT
# gpg:                using RSA key A7A16B4A2527436A
# gpg: Good signature from "Eric Blake <eblake@redhat.com>" [full]
# gpg:                 aka "Eric Blake (Free Software Programmer) <ebb9@byu.net>" [full]
# gpg:                 aka "[jpeg image of size 6874]" [full]
# Primary key fingerprint: 71C2 CC22 B1C4 6029 27D2  F3AA A7A1 6B4A 2527 436A

* remotes/ericb/tags/pull-nbd-2019-02-04:
  block/nbd-client: rename read_reply_co to connection_co
  block/nbd-client: don't check ioc
  block/nbd-client: fix nbd_reply_chunk_iter_receive
  block/nbd-client: split connection from initialization
  block/nbd: move connection code from block/nbd to block/nbd-client
  block/nbd-client: split channel errors from export errors
  nbd: generalize usage of nbd_read
  qemu-nbd: Deprecate qemu-nbd --partition

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--block/nbd-client.c199
-rw-r--r--block/nbd-client.h6
-rw-r--r--block/nbd.c40
-rw-r--r--include/block/nbd.h32
-rw-r--r--nbd/client.c88
-rw-r--r--nbd/common.c2
-rw-r--r--nbd/server.c27
-rw-r--r--qemu-deprecated.texi33
-rw-r--r--qemu-nbd.c2
-rw-r--r--qemu-nbd.texi6
10 files changed, 239 insertions, 196 deletions
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 50a8dadd85..f0ad54ce21 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -53,15 +53,13 @@ static void nbd_teardown_connection(BlockDriverState *bs)
 {
     NBDClientSession *client = nbd_get_client_session(bs);
 
-    if (!client->ioc) { /* Already closed */
-        return;
-    }
+    assert(client->ioc);
 
     /* finish any pending coroutines */
     qio_channel_shutdown(client->ioc,
                          QIO_CHANNEL_SHUTDOWN_BOTH,
                          NULL);
-    BDRV_POLL_WHILE(bs, client->read_reply_co);
+    BDRV_POLL_WHILE(bs, client->connection_co);
 
     nbd_client_detach_aio_context(bs);
     object_unref(OBJECT(client->sioc));
@@ -70,7 +68,7 @@ static void nbd_teardown_connection(BlockDriverState *bs)
     client->ioc = NULL;
 }
 
-static coroutine_fn void nbd_read_reply_entry(void *opaque)
+static coroutine_fn void nbd_connection_entry(void *opaque)
 {
     NBDClientSession *s = opaque;
     uint64_t i;
@@ -102,14 +100,14 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
         }
 
         /* We're woken up again by the request itself.  Note that there
-         * is no race between yielding and reentering read_reply_co.  This
+         * is no race between yielding and reentering connection_co.  This
          * is because:
          *
          * - if the request runs on the same AioContext, it is only
          *   entered after we yield
          *
          * - if the request runs on a different AioContext, reentering
-         *   read_reply_co happens through a bottom half, which can only
+         *   connection_co happens through a bottom half, which can only
          *   run after we yield.
          */
         aio_co_wake(s->requests[i].coroutine);
@@ -118,7 +116,7 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
 
     s->quit = true;
     nbd_recv_coroutines_wake_all(s);
-    s->read_reply_co = NULL;
+    s->connection_co = NULL;
     aio_wait_kick();
 }
 
@@ -154,10 +152,7 @@ static int nbd_co_send_request(BlockDriverState *bs,
         rc = -EIO;
         goto err;
     }
-    if (!s->ioc) {
-        rc = -EPIPE;
-        goto err;
-    }
+    assert(s->ioc);
 
     if (qiov) {
         qio_channel_set_cork(s->ioc, true);
@@ -338,10 +333,9 @@ static int nbd_co_receive_offset_data_payload(NBDClientSession *s,
         return -EINVAL;
     }
 
-    if (nbd_read(s->ioc, &offset, sizeof(offset), errp) < 0) {
+    if (nbd_read64(s->ioc, &offset, "OFFSET_DATA offset", errp) < 0) {
         return -EIO;
     }
-    be64_to_cpus(&offset);
 
     data_size = chunk->length - sizeof(offset);
     assert(data_size);
@@ -388,7 +382,7 @@ static coroutine_fn int nbd_co_receive_structured_payload(
     }
 
     *payload = g_new(char, len);
-    ret = nbd_read(s->ioc, *payload, len, errp);
+    ret = nbd_read(s->ioc, *payload, len, "structured payload", errp);
     if (ret < 0) {
         g_free(*payload);
         *payload = NULL;
@@ -426,14 +420,15 @@ static coroutine_fn int nbd_co_do_receive_one_chunk(
     }
     *request_ret = 0;
 
-    /* Wait until we're woken up by nbd_read_reply_entry.  */
+    /* Wait until we're woken up by nbd_connection_entry.  */
     s->requests[i].receiving = true;
     qemu_coroutine_yield();
     s->requests[i].receiving = false;
-    if (!s->ioc || s->quit) {
+    if (s->quit) {
         error_setg(errp, "Connection closed");
         return -EIO;
     }
+    assert(s->ioc);
 
     assert(s->reply.handle == handle);
 
@@ -500,30 +495,29 @@ static coroutine_fn int nbd_co_do_receive_one_chunk(
 }
 
 /* nbd_co_receive_one_chunk
- * Read reply, wake up read_reply_co and set s->quit if needed.
+ * Read reply, wake up connection_co and set s->quit if needed.
  * Return value is a fatal error code or normal nbd reply error code
  */
 static coroutine_fn int nbd_co_receive_one_chunk(
         NBDClientSession *s, uint64_t handle, bool only_structured,
-        QEMUIOVector *qiov, NBDReply *reply, void **payload, Error **errp)
+        int *request_ret, QEMUIOVector *qiov, NBDReply *reply, void **payload,
+        Error **errp)
 {
-    int request_ret;
     int ret = nbd_co_do_receive_one_chunk(s, handle, only_structured,
-                                          &request_ret, qiov, payload, errp);
+                                          request_ret, qiov, payload, errp);
 
     if (ret < 0) {
         s->quit = true;
     } else {
-        /* For assert at loop start in nbd_read_reply_entry */
+        /* For assert at loop start in nbd_connection_entry */
         if (reply) {
             *reply = s->reply;
         }
         s->reply.handle = 0;
-        ret = request_ret;
     }
 
-    if (s->read_reply_co) {
-        aio_co_wake(s->read_reply_co);
+    if (s->connection_co) {
+        aio_co_wake(s->connection_co);
     }
 
     return ret;
@@ -531,22 +525,17 @@ static coroutine_fn int nbd_co_receive_one_chunk(
 
 typedef struct NBDReplyChunkIter {
     int ret;
-    bool fatal;
+    int request_ret;
     Error *err;
     bool done, only_structured;
 } NBDReplyChunkIter;
 
-static void nbd_iter_error(NBDReplyChunkIter *iter, bool fatal,
-                           int ret, Error **local_err)
+static void nbd_iter_channel_error(NBDReplyChunkIter *iter,
+                                   int ret, Error **local_err)
 {
     assert(ret < 0);
 
-    if ((fatal && !iter->fatal) || iter->ret == 0) {
-        if (iter->ret != 0) {
-            error_free(iter->err);
-            iter->err = NULL;
-        }
-        iter->fatal = fatal;
+    if (!iter->ret) {
         iter->ret = ret;
         error_propagate(&iter->err, *local_err);
     } else {
@@ -556,6 +545,15 @@ static void nbd_iter_error(NBDReplyChunkIter *iter, bool fatal,
     *local_err = NULL;
 }
 
+static void nbd_iter_request_error(NBDReplyChunkIter *iter, int ret)
+{
+    assert(ret < 0);
+
+    if (!iter->request_ret) {
+        iter->request_ret = ret;
+    }
+}
+
 /* NBD_FOREACH_REPLY_CHUNK
  */
 #define NBD_FOREACH_REPLY_CHUNK(s, iter, handle, structured, \
@@ -571,13 +569,13 @@ static bool nbd_reply_chunk_iter_receive(NBDClientSession *s,
                                          QEMUIOVector *qiov, NBDReply *reply,
                                          void **payload)
 {
-    int ret;
+    int ret, request_ret;
     NBDReply local_reply;
     NBDStructuredReplyChunk *chunk;
     Error *local_err = NULL;
     if (s->quit) {
         error_setg(&local_err, "Connection closed");
-        nbd_iter_error(iter, true, -EIO, &local_err);
+        nbd_iter_channel_error(iter, -EIO, &local_err);
         goto break_loop;
     }
 
@@ -591,14 +589,16 @@ static bool nbd_reply_chunk_iter_receive(NBDClientSession *s,
     }
 
     ret = nbd_co_receive_one_chunk(s, handle, iter->only_structured,
-                                   qiov, reply, payload, &local_err);
+                                   &request_ret, qiov, reply, payload,
+                                   &local_err);
     if (ret < 0) {
-        /* If it is a fatal error s->quit is set by nbd_co_receive_one_chunk */
-        nbd_iter_error(iter, s->quit, ret, &local_err);
+        nbd_iter_channel_error(iter, ret, &local_err);
+    } else if (request_ret < 0) {
+        nbd_iter_request_error(iter, request_ret);
     }
 
     /* Do not execute the body of NBD_FOREACH_REPLY_CHUNK for simple reply. */
-    if (nbd_reply_is_simple(&s->reply) || s->quit) {
+    if (nbd_reply_is_simple(reply) || s->quit) {
         goto break_loop;
     }
 
@@ -631,7 +631,7 @@ break_loop:
 }
 
 static int nbd_co_receive_return_code(NBDClientSession *s, uint64_t handle,
-                                      Error **errp)
+                                      int *request_ret, Error **errp)
 {
     NBDReplyChunkIter iter;
 
@@ -640,12 +640,13 @@ static int nbd_co_receive_return_code(NBDClientSession *s, uint64_t handle,
     }
 
     error_propagate(errp, iter.err);
+    *request_ret = iter.request_ret;
     return iter.ret;
 }
 
 static int nbd_co_receive_cmdread_reply(NBDClientSession *s, uint64_t handle,
                                         uint64_t offset, QEMUIOVector *qiov,
-                                        Error **errp)
+                                        int *request_ret, Error **errp)
 {
     NBDReplyChunkIter iter;
     NBDReply reply;
@@ -670,7 +671,7 @@ static int nbd_co_receive_cmdread_reply(NBDClientSession *s, uint64_t handle,
                                                 offset, qiov, &local_err);
             if (ret < 0) {
                 s->quit = true;
-                nbd_iter_error(&iter, true, ret, &local_err);
+                nbd_iter_channel_error(&iter, ret, &local_err);
             }
             break;
         default:
@@ -680,7 +681,7 @@ static int nbd_co_receive_cmdread_reply(NBDClientSession *s, uint64_t handle,
                 error_setg(&local_err,
                            "Unexpected reply type: %d (%s) for CMD_READ",
                            chunk->type, nbd_reply_type_lookup(chunk->type));
-                nbd_iter_error(&iter, true, -EINVAL, &local_err);
+                nbd_iter_channel_error(&iter, -EINVAL, &local_err);
             }
         }
 
@@ -689,12 +690,14 @@ static int nbd_co_receive_cmdread_reply(NBDClientSession *s, uint64_t handle,
     }
 
     error_propagate(errp, iter.err);
+    *request_ret = iter.request_ret;
     return iter.ret;
 }
 
 static int nbd_co_receive_blockstatus_reply(NBDClientSession *s,
                                             uint64_t handle, uint64_t length,
-                                            NBDExtent *extent, Error **errp)
+                                            NBDExtent *extent,
+                                            int *request_ret, Error **errp)
 {
     NBDReplyChunkIter iter;
     NBDReply reply;
@@ -716,7 +719,7 @@ static int nbd_co_receive_blockstatus_reply(NBDClientSession *s,
             if (received) {
                 s->quit = true;
                 error_setg(&local_err, "Several BLOCK_STATUS chunks in reply");
-                nbd_iter_error(&iter, true, -EINVAL, &local_err);
+                nbd_iter_channel_error(&iter, -EINVAL, &local_err);
             }
             received = true;
 
@@ -725,7 +728,7 @@ static int nbd_co_receive_blockstatus_reply(NBDClientSession *s,
                                                 &local_err);
             if (ret < 0) {
                 s->quit = true;
-                nbd_iter_error(&iter, true, ret, &local_err);
+                nbd_iter_channel_error(&iter, ret, &local_err);
             }
             break;
         default:
@@ -735,7 +738,7 @@ static int nbd_co_receive_blockstatus_reply(NBDClientSession *s,
                            "Unexpected reply type: %d (%s) "
                            "for CMD_BLOCK_STATUS",
                            chunk->type, nbd_reply_type_lookup(chunk->type));
-                nbd_iter_error(&iter, true, -EINVAL, &local_err);
+                nbd_iter_channel_error(&iter, -EINVAL, &local_err);
             }
         }
 
@@ -750,14 +753,16 @@ static int nbd_co_receive_blockstatus_reply(NBDClientSession *s,
             iter.ret = -EIO;
         }
     }
+
     error_propagate(errp, iter.err);
+    *request_ret = iter.request_ret;
     return iter.ret;
 }
 
 static int nbd_co_request(BlockDriverState *bs, NBDRequest *request,
                           QEMUIOVector *write_qiov)
 {
-    int ret;
+    int ret, request_ret;
     Error *local_err = NULL;
     NBDClientSession *client = nbd_get_client_session(bs);
 
@@ -773,7 +778,8 @@ static int nbd_co_request(BlockDriverState *bs, NBDRequest *request,
         return ret;
     }
 
-    ret = nbd_co_receive_return_code(client, request->handle, &local_err);
+    ret = nbd_co_receive_return_code(client, request->handle,
+                                     &request_ret, &local_err);
     if (local_err) {
         trace_nbd_co_request_fail(request->from, request->len, request->handle,
                                   request->flags, request->type,
@@ -781,13 +787,13 @@ static int nbd_co_request(BlockDriverState *bs, NBDRequest *request,
                                   ret, error_get_pretty(local_err));
         error_free(local_err);
     }
-    return ret;
+    return ret ? ret : request_ret;
 }
 
 int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
                          uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
-    int ret;
+    int ret, request_ret;
     Error *local_err = NULL;
     NBDClientSession *client = nbd_get_client_session(bs);
     NBDRequest request = {
@@ -808,7 +814,7 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
     }
 
     ret = nbd_co_receive_cmdread_reply(client, request.handle, offset, qiov,
-                                       &local_err);
+                                       &request_ret, &local_err);
     if (local_err) {
         trace_nbd_co_request_fail(request.from, request.len, request.handle,
                                   request.flags, request.type,
@@ -816,7 +822,7 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
                                   ret, error_get_pretty(local_err));
         error_free(local_err);
     }
-    return ret;
+    return ret ? ret : request_ret;
 }
 
 int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
@@ -910,7 +916,7 @@ int coroutine_fn nbd_client_co_block_status(BlockDriverState *bs,
                                             int64_t *pnum, int64_t *map,
                                             BlockDriverState **file)
 {
-    int64_t ret;
+    int ret, request_ret;
     NBDExtent extent = { 0 };
     NBDClientSession *client = nbd_get_client_session(bs);
     Error *local_err = NULL;
@@ -935,7 +941,7 @@ int coroutine_fn nbd_client_co_block_status(BlockDriverState *bs,
     }
 
     ret = nbd_co_receive_blockstatus_reply(client, request.handle, bytes,
-                                           &extent, &local_err);
+                                           &extent, &request_ret, &local_err);
     if (local_err) {
         trace_nbd_co_request_fail(request.from, request.len, request.handle,
                                   request.flags, request.type,
@@ -943,8 +949,8 @@ int coroutine_fn nbd_client_co_block_status(BlockDriverState *bs,
                                   ret, error_get_pretty(local_err));
         error_free(local_err);
     }
-    if (ret < 0) {
-        return ret;
+    if (ret < 0 || request_ret < 0) {
+        return ret ? ret : request_ret;
     }
 
     assert(extent.length);
@@ -964,7 +970,7 @@ void nbd_client_attach_aio_context(BlockDriverState *bs,
 {
     NBDClientSession *client = nbd_get_client_session(bs);
     qio_channel_attach_aio_context(QIO_CHANNEL(client->ioc), new_context);
-    aio_co_schedule(new_context, client->read_reply_co);
+    aio_co_schedule(new_context, client->connection_co);
 }
 
 void nbd_client_close(BlockDriverState *bs)
@@ -972,26 +978,55 @@ void nbd_client_close(BlockDriverState *bs)
     NBDClientSession *client = nbd_get_client_session(bs);
     NBDRequest request = { .type = NBD_CMD_DISC };
 
-    if (client->ioc == NULL) {
-        return;
-    }
+    assert(client->ioc);
 
     nbd_send_request(client->ioc, &request);
 
     nbd_teardown_connection(bs);
 }
 
-int nbd_client_init(BlockDriverState *bs,
-                    QIOChannelSocket *sioc,
-                    const char *export,
-                    QCryptoTLSCreds *tlscreds,
-                    const char *hostname,
-                    const char *x_dirty_bitmap,
-                    Error **errp)
+static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
+                                                  Error **errp)
+{
+    QIOChannelSocket *sioc;
+    Error *local_err = NULL;
+
+    sioc = qio_channel_socket_new();
+    qio_channel_set_name(QIO_CHANNEL(sioc), "nbd-client");
+
+    qio_channel_socket_connect_sync(sioc, saddr, &local_err);
+    if (local_err) {
+        object_unref(OBJECT(sioc));
+        error_propagate(errp, local_err);
+        return NULL;
+    }
+
+    qio_channel_set_delay(QIO_CHANNEL(sioc), false);
+
+    return sioc;
+}
+
+static int nbd_client_connect(BlockDriverState *bs,
+                              SocketAddress *saddr,
+                              const char *export,
+                              QCryptoTLSCreds *tlscreds,
+                              const char *hostname,
+                              const char *x_dirty_bitmap,
+                              Error **errp)
 {
     NBDClientSession *client = nbd_get_client_session(bs);
     int ret;
 
+    /*
+     * establish TCP connection, return error if it fails
+     * TODO: Configurable retry-until-timeout behaviour.
+     */
+    QIOChannelSocket *sioc = nbd_establish_connection(saddr, errp);
+
+    if (!sioc) {
+        return -ECONNREFUSED;
+    }
+
     /* NBD handshake */
     logout("session init %s\n", export);
     qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);
@@ -1007,6 +1042,7 @@ int nbd_client_init(BlockDriverState *bs,
     g_free(client->info.name);
     if (ret < 0) {
         logout("Failed to negotiate with the NBD server\n");
+        object_unref(OBJECT(sioc));
         return ret;
     }
     if (x_dirty_bitmap && !client->info.base_allocation) {
@@ -1029,10 +1065,7 @@ int nbd_client_init(BlockDriverState *bs,
         bs->supported_zero_flags |= BDRV_REQ_MAY_UNMAP;
     }
 
-    qemu_co_mutex_init(&client->send_mutex);
-    qemu_co_queue_init(&client->free_sema);
     client->sioc = sioc;
-    object_ref(OBJECT(client->sioc));
 
     if (!client->ioc) {
         client->ioc = QIO_CHANNEL(sioc);
@@ -1042,7 +1075,7 @@ int nbd_client_init(BlockDriverState *bs,
     /* Now that we're connected, set the socket to be non-blocking and
      * kick the reply mechanism.  */
     qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
-    client->read_reply_co = qemu_coroutine_create(nbd_read_reply_entry, client);
+    client->connection_co = qemu_coroutine_create(nbd_connection_entry, client);
     nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs));
 
     logout("Established connection with NBD server\n");
@@ -1058,6 +1091,26 @@ int nbd_client_init(BlockDriverState *bs,
         NBDRequest request = { .type = NBD_CMD_DISC };
 
         nbd_send_request(client->ioc ?: QIO_CHANNEL(sioc), &request);
+
+        object_unref(OBJECT(sioc));
+
         return ret;
     }
 }
+
+int nbd_client_init(BlockDriverState *bs,
+                    SocketAddress *saddr,
+                    const char *export,
+                    QCryptoTLSCreds *tlscreds,
+                    const char *hostname,
+                    const char *x_dirty_bitmap,
+                    Error **errp)
+{
+    NBDClientSession *client = nbd_get_client_session(bs);
+
+    qemu_co_mutex_init(&client->send_mutex);
+    qemu_co_queue_init(&client->free_sema);
+
+    return nbd_client_connect(bs, saddr, export, tlscreds, hostname,
+                              x_dirty_bitmap, errp);
+}
diff --git a/block/nbd-client.h b/block/nbd-client.h
index cfc90550b9..d990207a5c 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -20,7 +20,7 @@
 typedef struct {
     Coroutine *coroutine;
     uint64_t offset;        /* original offset of the request */
-    bool receiving;         /* waiting for read_reply_co? */
+    bool receiving;         /* waiting for connection_co? */
 } NBDClientRequest;
 
 typedef struct NBDClientSession {
@@ -30,7 +30,7 @@ typedef struct NBDClientSession {
 
     CoMutex send_mutex;
     CoQueue free_sema;
-    Coroutine *read_reply_co;
+    Coroutine *connection_co;
     int in_flight;
 
     NBDClientRequest requests[MAX_NBD_REQUESTS];
@@ -41,7 +41,7 @@ typedef struct NBDClientSession {
 NBDClientSession *nbd_get_client_session(BlockDriverState *bs);
 
 int nbd_client_init(BlockDriverState *bs,
-                    QIOChannelSocket *sock,
+                    SocketAddress *saddr,
                     const char *export_name,
                     QCryptoTLSCreds *tlscreds,
                     const char *hostname,
diff --git a/block/nbd.c b/block/nbd.c
index e87699fb73..9db5eded89 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -295,30 +295,6 @@ NBDClientSession *nbd_get_client_session(BlockDriverState *bs)
     return &s->client;
 }
 
-static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
-                                                  Error **errp)
-{
-    QIOChannelSocket *sioc;
-    Error *local_err = NULL;
-
-    sioc = qio_channel_socket_new();
-    qio_channel_set_name(QIO_CHANNEL(sioc), "nbd-client");
-
-    qio_channel_socket_connect_sync(sioc,
-                                    saddr,
-                                    &local_err);
-    if (local_err) {
-        object_unref(OBJECT(sioc));
-        error_propagate(errp, local_err);
-        return NULL;
-    }
-
-    qio_channel_set_delay(QIO_CHANNEL(sioc), false);
-
-    return sioc;
-}
-
-
 static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, Error **errp)
 {
     Object *obj;
@@ -394,7 +370,6 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
     BDRVNBDState *s = bs->opaque;
     QemuOpts *opts = NULL;
     Error *local_err = NULL;
-    QIOChannelSocket *sioc = NULL;
     QCryptoTLSCreds *tlscreds = NULL;
     const char *hostname = NULL;
     int ret = -EINVAL;
@@ -434,22 +409,11 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
         hostname = s->saddr->u.inet.host;
     }
 
-    /* establish TCP connection, return error if it fails
-     * TODO: Configurable retry-until-timeout behaviour.
-     */
-    sioc = nbd_establish_connection(s->saddr, errp);
-    if (!sioc) {
-        ret = -ECONNREFUSED;
-        goto error;
-    }
-
     /* NBD handshake */
-    ret = nbd_client_init(bs, sioc, s->export, tlscreds, hostname,
+    ret = nbd_client_init(bs, s->saddr, s->export, tlscreds, hostname,
                           qemu_opt_get(opts, "x-dirty-bitmap"), errp);
+
  error:
-    if (sioc) {
-        object_unref(OBJECT(sioc));
-    }
     if (tlscreds) {
         object_unref(OBJECT(tlscreds));
     }
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 4faf394e34..96cfb1d7d5 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -23,6 +23,7 @@
 #include "qapi/qapi-types-block.h"
 #include "io/channel-socket.h"
 #include "crypto/tlscreds.h"
+#include "qapi/error.h"
 
 /* Handshake phase structs - this struct is passed on the wire */
 
@@ -336,11 +337,38 @@ void nbd_server_start(SocketAddress *addr, const char *tls_creds,
  * Reads @size bytes from @ioc. Returns 0 on success.
  */
 static inline int nbd_read(QIOChannel *ioc, void *buffer, size_t size,
-                           Error **errp)
+                           const char *desc, Error **errp)
 {
-    return qio_channel_read_all(ioc, buffer, size, errp) < 0 ? -EIO : 0;
+    int ret = qio_channel_read_all(ioc, buffer, size, errp) < 0 ? -EIO : 0;
+
+    if (ret < 0) {
+        if (desc) {
+            error_prepend(errp, "Failed to read %s: ", desc);
+        }
+        return -1;
+    }
+
+    return 0;
+}
+
+#define DEF_NBD_READ_N(bits)                                            \
+static inline int nbd_read##bits(QIOChannel *ioc,                       \
+                                 uint##bits##_t *val,                   \
+                                 const char *desc, Error **errp)        \
+{                                                                       \
+    if (nbd_read(ioc, val, sizeof(*val), desc, errp) < 0) {             \
+        return -1;                                                      \
+    }                                                                   \
+    *val = be##bits##_to_cpu(*val);                                     \
+    return 0;                                                           \
 }
 
+DEF_NBD_READ_N(16) /* Defines nbd_read16(). */
+DEF_NBD_READ_N(32) /* Defines nbd_read32(). */
+DEF_NBD_READ_N(64) /* Defines nbd_read64(). */
+
+#undef DEF_NBD_READ_N
+
 static inline bool nbd_reply_is_simple(NBDReply *reply)
 {
     return reply->magic == NBD_SIMPLE_REPLY_MAGIC;
diff --git a/nbd/client.c b/nbd/client.c
index 8a083c2f42..10a52ad7d0 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -113,8 +113,7 @@ static int nbd_receive_option_reply(QIOChannel *ioc, uint32_t opt,
                                     NBDOptionReply *reply, Error **errp)
 {
     QEMU_BUILD_BUG_ON(sizeof(*reply) != 20);
-    if (nbd_read(ioc, reply, sizeof(*reply), errp) < 0) {
-        error_prepend(errp, "failed to read option reply: ");
+    if (nbd_read(ioc, reply, sizeof(*reply), "option reply", errp) < 0) {
         nbd_send_opt_abort(ioc);
         return -1;
     }
@@ -166,8 +165,8 @@ static int nbd_handle_reply_err(QIOChannel *ioc, NBDOptionReply *reply,
             goto cleanup;
         }
         msg = g_malloc(reply->length + 1);
-        if (nbd_read(ioc, msg, reply->length, errp) < 0) {
-            error_prepend(errp, "failed to read option error %" PRIu32
+        if (nbd_read(ioc, msg, reply->length, NULL, errp) < 0) {
+            error_prepend(errp, "Failed to read option error %" PRIu32
                           " (%s) message: ",
                           reply->type, nbd_rep_lookup(reply->type));
             goto cleanup;
@@ -284,12 +283,10 @@ static int nbd_receive_list(QIOChannel *ioc, char **name, char **description,
         nbd_send_opt_abort(ioc);
         return -1;
     }
-    if (nbd_read(ioc, &namelen, sizeof(namelen), errp) < 0) {
-        error_prepend(errp, "failed to read option name length: ");
+    if (nbd_read32(ioc, &namelen, "option name length", errp) < 0) {
         nbd_send_opt_abort(ioc);
         return -1;
     }
-    namelen = be32_to_cpu(namelen);
     len -= sizeof(namelen);
     if (len < namelen) {
         error_setg(errp, "incorrect option name length");
@@ -298,8 +295,7 @@ static int nbd_receive_list(QIOChannel *ioc, char **name, char **description,
     }
 
     local_name = g_malloc(namelen + 1);
-    if (nbd_read(ioc, local_name, namelen, errp) < 0) {
-        error_prepend(errp, "failed to read export name: ");
+    if (nbd_read(ioc, local_name, namelen, "export name", errp) < 0) {
         nbd_send_opt_abort(ioc);
         goto out;
     }
@@ -307,8 +303,7 @@ static int nbd_receive_list(QIOChannel *ioc, char **name, char **description,
     len -= namelen;
     if (len) {
         local_desc = g_malloc(len + 1);
-        if (nbd_read(ioc, local_desc, len, errp) < 0) {
-            error_prepend(errp, "failed to read export description: ");
+        if (nbd_read(ioc, local_desc, len, "export description", errp) < 0) {
             nbd_send_opt_abort(ioc);
             goto out;
         }
@@ -410,13 +405,11 @@ static int nbd_opt_info_or_go(QIOChannel *ioc, uint32_t opt,
             nbd_send_opt_abort(ioc);
             return -1;
         }
-        if (nbd_read(ioc, &type, sizeof(type), errp) < 0) {
-            error_prepend(errp, "failed to read info type: ");
+        if (nbd_read16(ioc, &type, "info type", errp) < 0) {
             nbd_send_opt_abort(ioc);
             return -1;
         }
         len -= sizeof(type);
-        type = be16_to_cpu(type);
         switch (type) {
         case NBD_INFO_EXPORT:
             if (len != sizeof(info->size) + sizeof(info->flags)) {
@@ -425,18 +418,14 @@ static int nbd_opt_info_or_go(QIOChannel *ioc, uint32_t opt,
                 nbd_send_opt_abort(ioc);
                 return -1;
             }
-            if (nbd_read(ioc, &info->size, sizeof(info->size), errp) < 0) {
-                error_prepend(errp, "failed to read info size: ");
+            if (nbd_read64(ioc, &info->size, "info size", errp) < 0) {
                 nbd_send_opt_abort(ioc);
                 return -1;
             }
-            info->size = be64_to_cpu(info->size);
-            if (nbd_read(ioc, &info->flags, sizeof(info->flags), errp) < 0) {
-                error_prepend(errp, "failed to read info flags: ");
+            if (nbd_read16(ioc, &info->flags, "info flags", errp) < 0) {
                 nbd_send_opt_abort(ioc);
                 return -1;
             }
-            info->flags = be16_to_cpu(info->flags);
             trace_nbd_receive_negotiate_size_flags(info->size, info->flags);
             break;
 
@@ -447,27 +436,23 @@ static int nbd_opt_info_or_go(QIOChannel *ioc, uint32_t opt,
                 nbd_send_opt_abort(ioc);
                 return -1;
             }
-            if (nbd_read(ioc, &info->min_block, sizeof(info->min_block),
-                         errp) < 0) {
-                error_prepend(errp, "failed to read info minimum block size: ");
+            if (nbd_read32(ioc, &info->min_block, "info minimum block size",
+                           errp) < 0) {
                 nbd_send_opt_abort(ioc);
                 return -1;
             }
-            info->min_block = be32_to_cpu(info->min_block);
             if (!is_power_of_2(info->min_block)) {
                 error_setg(errp, "server minimum block size %" PRIu32
                            " is not a power of two", info->min_block);
                 nbd_send_opt_abort(ioc);
                 return -1;
             }
-            if (nbd_read(ioc, &info->opt_block, sizeof(info->opt_block),
-                         errp) < 0) {
-                error_prepend(errp,
-                              "failed to read info preferred block size: ");
+            if (nbd_read32(ioc, &info->opt_block, "info preferred block size",
+                           errp) < 0)
+            {
                 nbd_send_opt_abort(ioc);
                 return -1;
             }
-            info->opt_block = be32_to_cpu(info->opt_block);
             if (!is_power_of_2(info->opt_block) ||
                 info->opt_block < info->min_block) {
                 error_setg(errp, "server preferred block size %" PRIu32
@@ -475,13 +460,12 @@ static int nbd_opt_info_or_go(QIOChannel *ioc, uint32_t opt,
                 nbd_send_opt_abort(ioc);
                 return -1;
             }
-            if (nbd_read(ioc, &info->max_block, sizeof(info->max_block),
-                         errp) < 0) {
-                error_prepend(errp, "failed to read info maximum block size: ");
+            if (nbd_read32(ioc, &info->max_block, "info maximum block size",
+                           errp) < 0)
+            {
                 nbd_send_opt_abort(ioc);
                 return -1;
             }
-            info->max_block = be32_to_cpu(info->max_block);
             if (info->max_block < info->min_block) {
                 error_setg(errp, "server maximum block size %" PRIu32
                            " is not valid", info->max_block);
@@ -731,14 +715,13 @@ static int nbd_receive_one_meta_context(QIOChannel *ioc,
         return -1;
     }
 
-    if (nbd_read(ioc, &local_id, sizeof(local_id), errp) < 0) {
+    if (nbd_read32(ioc, &local_id, "context id", errp) < 0) {
         return -1;
     }
-    local_id = be32_to_cpu(local_id);
 
     reply.length -= sizeof(local_id);
     local_name = g_malloc(reply.length + 1);
-    if (nbd_read(ioc, local_name, reply.length, errp) < 0) {
+    if (nbd_read(ioc, local_name, reply.length, "context name", errp) < 0) {
         g_free(local_name);
         return -1;
     }
@@ -896,11 +879,9 @@ static int nbd_start_negotiate(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
         return -EINVAL;
     }
 
-    if (nbd_read(ioc, &magic, sizeof(magic), errp) < 0) {
-        error_prepend(errp, "Failed to read initial magic: ");
+    if (nbd_read64(ioc, &magic, "initial magic", errp) < 0) {
         return -EINVAL;
     }
-    magic = be64_to_cpu(magic);
     trace_nbd_receive_negotiate_magic(magic);
 
     if (magic != NBD_INIT_MAGIC) {
@@ -908,11 +889,9 @@ static int nbd_start_negotiate(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
         return -EINVAL;
     }
 
-    if (nbd_read(ioc, &magic, sizeof(magic), errp) < 0) {
-        error_prepend(errp, "Failed to read server magic: ");
+    if (nbd_read64(ioc, &magic, "server magic", errp) < 0) {
         return -EINVAL;
     }
-    magic = be64_to_cpu(magic);
     trace_nbd_receive_negotiate_magic(magic);
 
     if (magic == NBD_OPTS_MAGIC) {
@@ -920,11 +899,9 @@ static int nbd_start_negotiate(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
         uint16_t globalflags;
         bool fixedNewStyle = false;
 
-        if (nbd_read(ioc, &globalflags, sizeof(globalflags), errp) < 0) {
-            error_prepend(errp, "Failed to read server flags: ");
+        if (nbd_read16(ioc, &globalflags, "server flags", errp) < 0) {
             return -EINVAL;
         }
-        globalflags = be16_to_cpu(globalflags);
         trace_nbd_receive_negotiate_server_flags(globalflags);
         if (globalflags & NBD_FLAG_FIXED_NEWSTYLE) {
             fixedNewStyle = true;
@@ -992,17 +969,13 @@ static int nbd_negotiate_finish_oldstyle(QIOChannel *ioc, NBDExportInfo *info,
 {
     uint32_t oldflags;
 
-    if (nbd_read(ioc, &info->size, sizeof(info->size), errp) < 0) {
-        error_prepend(errp, "Failed to read export length: ");
+    if (nbd_read64(ioc, &info->size, "export length", errp) < 0) {
         return -EINVAL;
     }
-    info->size = be64_to_cpu(info->size);
 
-    if (nbd_read(ioc, &oldflags, sizeof(oldflags), errp) < 0) {
-        error_prepend(errp, "Failed to read export flags: ");
+    if (nbd_read32(ioc, &oldflags, "export flags", errp) < 0) {
         return -EINVAL;
     }
-    oldflags = be32_to_cpu(oldflags);
     if (oldflags & ~0xffff) {
         error_setg(errp, "Unexpected export flags %0x" PRIx32, oldflags);
         return -EINVAL;
@@ -1079,17 +1052,13 @@ int nbd_receive_negotiate(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
         }
 
         /* Read the response */
-        if (nbd_read(ioc, &info->size, sizeof(info->size), errp) < 0) {
-            error_prepend(errp, "Failed to read export length: ");
+        if (nbd_read64(ioc, &info->size, "export length", errp) < 0) {
             return -EINVAL;
         }
-        info->size = be64_to_cpu(info->size);
 
-        if (nbd_read(ioc, &info->flags, sizeof(info->flags), errp) < 0) {
-            error_prepend(errp, "Failed to read export flags: ");
+        if (nbd_read16(ioc, &info->flags, "export flags", errp) < 0) {
             return -EINVAL;
         }
-        info->flags = be16_to_cpu(info->flags);
         break;
     case 0: /* oldstyle, parse length and flags */
         if (*info->name) {
@@ -1379,7 +1348,7 @@ static int nbd_receive_simple_reply(QIOChannel *ioc, NBDSimpleReply *reply,
     assert(reply->magic == NBD_SIMPLE_REPLY_MAGIC);
 
     ret = nbd_read(ioc, (uint8_t *)reply + sizeof(reply->magic),
-                   sizeof(*reply) - sizeof(reply->magic), errp);
+                   sizeof(*reply) - sizeof(reply->magic), "reply", errp);
     if (ret < 0) {
         return ret;
     }
@@ -1404,7 +1373,8 @@ static int nbd_receive_structured_reply_chunk(QIOChannel *ioc,
     assert(chunk->magic == NBD_STRUCTURED_REPLY_MAGIC);
 
     ret = nbd_read(ioc, (uint8_t *)chunk + sizeof(chunk->magic),
-                   sizeof(*chunk) - sizeof(chunk->magic), errp);
+                   sizeof(*chunk) - sizeof(chunk->magic), "structured chunk",
+                   errp);
     if (ret < 0) {
         return ret;
     }
diff --git a/nbd/common.c b/nbd/common.c
index 41f5ed8d9f..cc8b278e54 100644
--- a/nbd/common.c
+++ b/nbd/common.c
@@ -31,7 +31,7 @@ int nbd_drop(QIOChannel *ioc, size_t size, Error **errp)
     buffer = sizeof(small) >= size ? small : g_malloc(MIN(65536, size));
     while (size > 0) {
         ssize_t count = MIN(65536, size);
-        ret = nbd_read(ioc, buffer, MIN(65536, size), errp);
+        ret = nbd_read(ioc, buffer, MIN(65536, size), NULL, errp);
 
         if (ret < 0) {
             goto cleanup;
diff --git a/nbd/server.c b/nbd/server.c
index cb0d5634fa..838c150d8c 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -438,8 +438,7 @@ static int nbd_negotiate_handle_export_name(NBDClient *client,
         error_setg(errp, "Bad length received");
         return -EINVAL;
     }
-    if (nbd_read(client->ioc, name, client->optlen, errp) < 0) {
-        error_prepend(errp, "read failed: ");
+    if (nbd_read(client->ioc, name, client->optlen, "export name", errp) < 0) {
         return -EIO;
     }
     name[client->optlen] = '\0';
@@ -1046,11 +1045,9 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags,
         ...           Rest of request
     */
 
-    if (nbd_read(client->ioc, &flags, sizeof(flags), errp) < 0) {
-        error_prepend(errp, "read failed: ");
+    if (nbd_read32(client->ioc, &flags, "flags", errp) < 0) {
         return -EIO;
     }
-    flags = be32_to_cpu(flags);
     trace_nbd_negotiate_options_flags(flags);
     if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) {
         fixedNewstyle = true;
@@ -1070,30 +1067,23 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags,
         uint32_t option, length;
         uint64_t magic;
 
-        if (nbd_read(client->ioc, &magic, sizeof(magic), errp) < 0) {
-            error_prepend(errp, "read failed: ");
+        if (nbd_read64(client->ioc, &magic, "opts magic", errp) < 0) {
             return -EINVAL;
         }
-        magic = be64_to_cpu(magic);
         trace_nbd_negotiate_options_check_magic(magic);
         if (magic != NBD_OPTS_MAGIC) {
             error_setg(errp, "Bad magic received");
             return -EINVAL;
         }
 
-        if (nbd_read(client->ioc, &option,
-                     sizeof(option), errp) < 0) {
-            error_prepend(errp, "read failed: ");
+        if (nbd_read32(client->ioc, &option, "option", errp) < 0) {
             return -EINVAL;
         }
-        option = be32_to_cpu(option);
         client->opt = option;
 
-        if (nbd_read(client->ioc, &length, sizeof(length), errp) < 0) {
-            error_prepend(errp, "read failed: ");
+        if (nbd_read32(client->ioc, &length, "option length", errp) < 0) {
             return -EINVAL;
         }
-        length = be32_to_cpu(length);
         assert(!client->optlen);
         client->optlen = length;
 
@@ -1306,7 +1296,7 @@ static int nbd_receive_request(QIOChannel *ioc, NBDRequest *request,
     uint32_t magic;
     int ret;
 
-    ret = nbd_read(ioc, buf, sizeof(buf), errp);
+    ret = nbd_read(ioc, buf, sizeof(buf), "request", errp);
     if (ret < 0) {
         return ret;
     }
@@ -2111,8 +2101,9 @@ static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
         }
     }
     if (request->type == NBD_CMD_WRITE) {
-        if (nbd_read(client->ioc, req->data, request->len, errp) < 0) {
-            error_prepend(errp, "reading from socket failed: ");
+        if (nbd_read(client->ioc, req->data, request->len, "CMD_WRITE data",
+                     errp) < 0)
+        {
             return -EIO;
         }
         req->complete = true;
diff --git a/qemu-deprecated.texi b/qemu-deprecated.texi
index 9cc20b365c..8a6174df0c 100644
--- a/qemu-deprecated.texi
+++ b/qemu-deprecated.texi
@@ -160,3 +160,36 @@ Example of legacy encoding:
 The above, converted to the current supported format:
 
 @code{json:@{"file.driver":"rbd", "file.pool":"rbd", "file.image":"name"@}}
+
+@section Related binaries
+
+@subsection qemu-nbd --partition (since 4.0.0)
+
+The ``qemu-nbd --partition $digit'' code (also spelled @option{-P})
+can only handle MBR partitions, and has never correctly handled
+logical partitions beyond partition 5.  If you know the offset and
+length of the partition (perhaps by using @code{sfdisk} within the
+guest), you can achieve the effect of exporting just that subset of
+the disk by use of the @option{--image-opts} option with a raw
+blockdev using the @code{offset} and @code{size} parameters layered on
+top of any other existing blockdev. For example, if partition 1 is
+100MiB long starting at 1MiB, the old command:
+
+@code{qemu-nbd -t -P 1 -f qcow2 file.qcow2}
+
+can be rewritten as:
+
+@code{qemu-nbd -t --image-opts driver=raw,offset=1M,size=100M,file.driver=qcow2,file.backing.driver=file,file.backing.filename=file.qcow2}
+
+Alternatively, the @code{nbdkit} project provides a more powerful
+partition filter on top of its nbd plugin, which can be used to select
+an arbitrary MBR or GPT partition on top of any other full-image NBD
+export.  Using this to rewrite the above example results in:
+
+@code{qemu-nbd -t -k /tmp/sock -f qcow2 file.qcow2 &}
+@code{nbdkit -f --filter=partition nbd socket=/tmp/sock partition=1}
+
+Note that if you are exposing the export via /dev/nbd0, it is easier
+to just export the entire image and then mount only /dev/nbd0p1 than
+it is to reinvoke @command{qemu-nbd -c /dev/nbd0} limited to just a
+subset of the image.
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 1f7b2a03f5..00c07fd27e 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -787,6 +787,8 @@ int main(int argc, char **argv)
             flags &= ~BDRV_O_RDWR;
             break;
         case 'P':
+            warn_report("The '-P' option is deprecated; use --image-opts with "
+                        "a raw device wrapper for subset exports instead");
             if (qemu_strtoi(optarg, NULL, 0, &partition) < 0 ||
                 partition < 1 || partition > 8) {
                 error_report("Invalid partition '%s'", optarg);
diff --git a/qemu-nbd.texi b/qemu-nbd.texi
index 386bece468..d0c5182814 100644
--- a/qemu-nbd.texi
+++ b/qemu-nbd.texi
@@ -56,8 +56,10 @@ auto-detecting.
 @item -r, --read-only
 Export the disk as read-only.
 @item -P, --partition=@var{num}
-Only expose MBR partition @var{num}.  Understands physical partitions
-1-4 and logical partitions 5-8.
+Deprecated: Only expose MBR partition @var{num}.  Understands physical
+partitions 1-4 and logical partition 5. New code should instead use
+@option{--image-opts} with the raw driver wrapping a subset of the
+original image.
 @item -B, --bitmap=@var{name}
 If @var{filename} has a qcow2 persistent bitmap @var{name}, expose
 that bitmap via the ``qemu:dirty-bitmap:@var{name}'' context