summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--block/nbd-client.c4
-rw-r--r--block/nbd.c14
-rw-r--r--include/block/nbd.h6
-rw-r--r--nbd/client.c81
-rw-r--r--nbd/trace-events1
-rw-r--r--qemu-nbd.c2
6 files changed, 92 insertions, 16 deletions
diff --git a/block/nbd-client.c b/block/nbd-client.c
index aab1e320b2..25dd28406b 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -384,6 +384,7 @@ int nbd_client_init(BlockDriverState *bs,
     logout("session init %s\n", export);
     qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);
 
+    client->info.request_sizes = true;
     ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export,
                                 tlscreds, hostname,
                                 &client->ioc, &client->info, errp);
@@ -398,6 +399,9 @@ int nbd_client_init(BlockDriverState *bs,
     if (client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES) {
         bs->supported_zero_flags |= BDRV_REQ_MAY_UNMAP;
     }
+    if (client->info.min_block > bs->bl.request_alignment) {
+        bs->bl.request_alignment = client->info.min_block;
+    }
 
     qemu_co_mutex_init(&client->send_mutex);
     qemu_co_queue_init(&client->free_sema);
diff --git a/block/nbd.c b/block/nbd.c
index 4a9048c280..a50d24b50a 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -472,9 +472,17 @@ static int nbd_co_flush(BlockDriverState *bs)
 
 static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
 {
-    bs->bl.max_pdiscard = NBD_MAX_BUFFER_SIZE;
-    bs->bl.max_pwrite_zeroes = NBD_MAX_BUFFER_SIZE;
-    bs->bl.max_transfer = NBD_MAX_BUFFER_SIZE;
+    NBDClientSession *s = nbd_get_client_session(bs);
+    uint32_t max = MIN_NON_ZERO(NBD_MAX_BUFFER_SIZE, s->info.max_block);
+
+    bs->bl.max_pdiscard = max;
+    bs->bl.max_pwrite_zeroes = max;
+    bs->bl.max_transfer = max;
+
+    if (s->info.opt_block &&
+        s->info.opt_block > bs->bl.opt_transfer) {
+        bs->bl.opt_transfer = s->info.opt_block;
+    }
 }
 
 static void nbd_close(BlockDriverState *bs)
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 4a22eca98b..9c3d0a5868 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -144,8 +144,14 @@ enum {
 
 /* Details collected by NBD_OPT_EXPORT_NAME and NBD_OPT_GO */
 struct NBDExportInfo {
+    /* Set by client before nbd_receive_negotiate() */
+    bool request_sizes;
+    /* Set by server results during nbd_receive_negotiate() */
     uint64_t size;
     uint16_t flags;
+    uint32_t min_block;
+    uint32_t opt_block;
+    uint32_t max_block;
 };
 typedef struct NBDExportInfo NBDExportInfo;
 
diff --git a/nbd/client.c b/nbd/client.c
index 1e98ca9613..c3ee9f36b1 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -369,12 +369,17 @@ static int nbd_opt_go(QIOChannel *ioc, const char *wantname,
     info->flags = 0;
 
     trace_nbd_opt_go_start(wantname);
-    buf = g_malloc(4 + len + 2 + 1);
+    buf = g_malloc(4 + len + 2 + 2 * info->request_sizes + 1);
     stl_be_p(buf, len);
     memcpy(buf + 4, wantname, len);
-    /* No requests, live with whatever server sends */
-    stw_be_p(buf + 4 + len, 0);
-    if (nbd_send_option_request(ioc, NBD_OPT_GO, len + 6, buf, errp) < 0) {
+    /* At most one request, everything else up to server */
+    stw_be_p(buf + 4 + len, info->request_sizes);
+    if (info->request_sizes) {
+        stw_be_p(buf + 4 + len + 2, NBD_INFO_BLOCK_SIZE);
+    }
+    if (nbd_send_option_request(ioc, NBD_OPT_GO,
+                                4 + len + 2 + 2 * info->request_sizes, buf,
+                                errp) < 0) {
         return -1;
     }
 
@@ -405,8 +410,9 @@ static int nbd_opt_go(QIOChannel *ioc, const char *wantname,
             return 1;
         }
         if (reply.type != NBD_REP_INFO) {
-            error_setg(errp, "unexpected reply type %" PRIx32 ", expected %x",
-                       reply.type, NBD_REP_INFO);
+            error_setg(errp, "unexpected reply type %" PRIx32
+                       " (%s), expected %x",
+                       reply.type, nbd_rep_lookup(reply.type), NBD_REP_INFO);
             nbd_send_opt_abort(ioc);
             return -1;
         }
@@ -446,6 +452,51 @@ static int nbd_opt_go(QIOChannel *ioc, const char *wantname,
             trace_nbd_receive_negotiate_size_flags(info->size, info->flags);
             break;
 
+        case NBD_INFO_BLOCK_SIZE:
+            if (len != sizeof(info->min_block) * 3) {
+                error_setg(errp, "remaining export info len %" PRIu32
+                           " is unexpected size", len);
+                nbd_send_opt_abort(ioc);
+                return -1;
+            }
+            if (nbd_read(ioc, &info->min_block, sizeof(info->min_block),
+                         errp) < 0) {
+                error_prepend(errp, "failed to read info minimum block size");
+                nbd_send_opt_abort(ioc);
+                return -1;
+            }
+            be32_to_cpus(&info->min_block);
+            if (!is_power_of_2(info->min_block)) {
+                error_setg(errp, "server minimum block size %" PRId32
+                           "is not a power of two", info->min_block);
+                nbd_send_opt_abort(ioc);
+                return -1;
+            }
+            if (nbd_read(ioc, &info->opt_block, sizeof(info->opt_block),
+                         errp) < 0) {
+                error_prepend(errp, "failed to read info preferred block size");
+                nbd_send_opt_abort(ioc);
+                return -1;
+            }
+            be32_to_cpus(&info->opt_block);
+            if (!is_power_of_2(info->opt_block) ||
+                info->opt_block < info->min_block) {
+                error_setg(errp, "server preferred block size %" PRId32
+                           "is not valid", info->opt_block);
+                nbd_send_opt_abort(ioc);
+                return -1;
+            }
+            if (nbd_read(ioc, &info->max_block, sizeof(info->max_block),
+                         errp) < 0) {
+                error_prepend(errp, "failed to read info maximum block size");
+                nbd_send_opt_abort(ioc);
+                return -1;
+            }
+            be32_to_cpus(&info->max_block);
+            trace_nbd_opt_go_info_block_size(info->min_block, info->opt_block,
+                                             info->max_block);
+            break;
+
         default:
             trace_nbd_opt_go_info_unknown(type, nbd_info_lookup(type));
             if (nbd_drop(ioc, len, errp) < 0) {
@@ -729,8 +780,14 @@ fail:
 int nbd_init(int fd, QIOChannelSocket *sioc, NBDExportInfo *info,
              Error **errp)
 {
-    unsigned long sectors = info->size / BDRV_SECTOR_SIZE;
-    if (info->size / BDRV_SECTOR_SIZE != sectors) {
+    unsigned long sector_size = MAX(BDRV_SECTOR_SIZE, info->min_block);
+    unsigned long sectors = info->size / sector_size;
+
+    /* FIXME: Once the kernel module is patched to honor block sizes,
+     * and to advertise that fact to user space, we should update the
+     * hand-off to the kernel to use any block sizes we learned. */
+    assert(!info->request_sizes);
+    if (info->size / sector_size != sectors) {
         error_setg(errp, "Export size %" PRIu64 " too large for 32-bit kernel",
                    info->size);
         return -E2BIG;
@@ -744,17 +801,17 @@ int nbd_init(int fd, QIOChannelSocket *sioc, NBDExportInfo *info,
         return -serrno;
     }
 
-    trace_nbd_init_set_block_size(BDRV_SECTOR_SIZE);
+    trace_nbd_init_set_block_size(sector_size);
 
-    if (ioctl(fd, NBD_SET_BLKSIZE, (unsigned long)BDRV_SECTOR_SIZE) < 0) {
+    if (ioctl(fd, NBD_SET_BLKSIZE, sector_size) < 0) {
         int serrno = errno;
         error_setg(errp, "Failed setting NBD block size");
         return -serrno;
     }
 
     trace_nbd_init_set_size(sectors);
-    if (info->size % BDRV_SECTOR_SIZE) {
-        trace_nbd_init_trailing_bytes(info->size % BDRV_SECTOR_SIZE);
+    if (info->size % sector_size) {
+        trace_nbd_init_trailing_bytes(info->size % sector_size);
     }
 
     if (ioctl(fd, NBD_SET_SIZE_BLOCKS, sectors) < 0) {
diff --git a/nbd/trace-events b/nbd/trace-events
index be3dce773e..f5024d85a1 100644
--- a/nbd/trace-events
+++ b/nbd/trace-events
@@ -6,6 +6,7 @@ nbd_reply_err_unsup(uint32_t option, const char *name) "server doesn't understan
 nbd_opt_go_start(const char *name) "Attempting NBD_OPT_GO for export '%s'"
 nbd_opt_go_success(void) "Export is good to go"
 nbd_opt_go_info_unknown(int info, const char *name) "Ignoring unknown info %d (%s)"
+nbd_opt_go_info_block_size(uint32_t minimum, uint32_t preferred, uint32_t maximum) "Block sizes are 0x%" PRIx32 ", 0x%" PRIx32 ", 0x%" PRIx32
 nbd_receive_query_exports_start(const char *wantname) "Querying export list for '%s'"
 nbd_receive_query_exports_success(const char *wantname) "Found desired export name '%s'"
 nbd_receive_starttls_request(void) "Requesting TLS from server"
diff --git a/qemu-nbd.c b/qemu-nbd.c
index c8bd47fe77..78d05bea2d 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -255,7 +255,7 @@ static void *show_parts(void *arg)
 static void *nbd_client_thread(void *arg)
 {
     char *device = arg;
-    NBDExportInfo info;
+    NBDExportInfo info = { .request_sizes = false, };
     QIOChannelSocket *sioc;
     int fd;
     int ret;