summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--block.c13
-rw-r--r--block/io.c5
-rw-r--r--block/nvme.c52
-rw-r--r--include/block/block.h8
4 files changed, 55 insertions, 23 deletions
diff --git a/block.c b/block.c
index 9c94f7f28a..cbd8da5f3b 100644
--- a/block.c
+++ b/block.c
@@ -5914,6 +5914,8 @@ static void bdrv_attach_aio_context(BlockDriverState *bs,
  * Changes the AioContext used for fd handlers, timers, and BHs by this
  * BlockDriverState and all its children and parents.
  *
+ * Must be called from the main AioContext.
+ *
  * The caller must own the AioContext lock for the old AioContext of bs, but it
  * must not own the AioContext lock for new_context (unless new_context is the
  * same as the current context of bs).
@@ -5925,9 +5927,10 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs,
                                  AioContext *new_context, GSList **ignore)
 {
     AioContext *old_context = bdrv_get_aio_context(bs);
-    AioContext *current_context = qemu_get_current_aio_context();
     BdrvChild *child;
 
+    g_assert(qemu_get_current_aio_context() == qemu_get_aio_context());
+
     if (old_context == new_context) {
         return;
     }
@@ -5953,7 +5956,7 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs,
     bdrv_detach_aio_context(bs);
 
     /* Acquire the new context, if necessary */
-    if (current_context != new_context) {
+    if (qemu_get_aio_context() != new_context) {
         aio_context_acquire(new_context);
     }
 
@@ -5965,16 +5968,16 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs,
      * subtree that have not yet been moved to the new AioContext.
      * Release the old one so bdrv_drained_end() can poll them.
      */
-    if (current_context != old_context) {
+    if (qemu_get_aio_context() != old_context) {
         aio_context_release(old_context);
     }
 
     bdrv_drained_end(bs);
 
-    if (current_context != old_context) {
+    if (qemu_get_aio_context() != old_context) {
         aio_context_acquire(old_context);
     }
-    if (current_context != new_context) {
+    if (qemu_get_aio_context() != new_context) {
         aio_context_release(new_context);
     }
 }
diff --git a/block/io.c b/block/io.c
index b89e155d21..06305c6ea6 100644
--- a/block/io.c
+++ b/block/io.c
@@ -217,13 +217,12 @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
         bs->drv->bdrv_co_drain_end(bs);
     }
 
-    /* Set data->done before reading bs->wakeup.  */
+    /* Set data->done and decrement drained_end_counter before bdrv_wakeup() */
     atomic_mb_set(&data->done, true);
-    bdrv_dec_in_flight(bs);
-
     if (!data->begin) {
         atomic_dec(data->drained_end_counter);
     }
+    bdrv_dec_in_flight(bs);
 
     g_free(data);
 }
diff --git a/block/nvme.c b/block/nvme.c
index 9896b7f7c6..c28755cc31 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -102,8 +102,11 @@ typedef struct {
     size_t doorbell_scale;
     bool write_cache_supported;
     EventNotifier irq_notifier;
+
     uint64_t nsze; /* Namespace size reported by identify command */
     int nsid;      /* The namespace id to read/write data. */
+    size_t blkshift;
+
     uint64_t max_transfer;
     bool plugged;
 
@@ -217,7 +220,7 @@ static NVMeQueuePair *nvme_create_queue_pair(BlockDriverState *bs,
         error_propagate(errp, local_err);
         goto fail;
     }
-    q->cq.doorbell = &s->regs->doorbells[idx * 2 * s->doorbell_scale + 1];
+    q->cq.doorbell = &s->regs->doorbells[(idx * 2 + 1) * s->doorbell_scale];
 
     return q;
 fail:
@@ -315,7 +318,7 @@ static bool nvme_process_completion(BDRVNVMeState *s, NVMeQueuePair *q)
     while (q->inflight) {
         int16_t cid;
         c = (NvmeCqe *)&q->cq.queue[q->cq.head * NVME_CQ_ENTRY_BYTES];
-        if (!c->cid || (le16_to_cpu(c->status) & 0x1) == q->cq_phase) {
+        if ((le16_to_cpu(c->status) & 0x1) == q->cq_phase) {
             break;
         }
         q->cq.head = (q->cq.head + 1) % NVME_QUEUE_SIZE;
@@ -339,10 +342,7 @@ static bool nvme_process_completion(BDRVNVMeState *s, NVMeQueuePair *q)
         qemu_mutex_unlock(&q->lock);
         req.cb(req.opaque, nvme_translate_error(c));
         qemu_mutex_lock(&q->lock);
-        c->cid = cpu_to_le16(0);
         q->inflight--;
-        /* Flip Phase Tag bit. */
-        c->status = cpu_to_le16(le16_to_cpu(c->status) ^ 0x1);
         progress = true;
     }
     if (progress) {
@@ -418,8 +418,9 @@ static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
     BDRVNVMeState *s = bs->opaque;
     NvmeIdCtrl *idctrl;
     NvmeIdNs *idns;
+    NvmeLBAF *lbaf;
     uint8_t *resp;
-    int r;
+    int r, hwsect_size;
     uint64_t iova;
     NvmeCmd cmd = {
         .opcode = NVME_ADM_CMD_IDENTIFY,
@@ -466,7 +467,22 @@ static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
     }
 
     s->nsze = le64_to_cpu(idns->nsze);
+    lbaf = &idns->lbaf[NVME_ID_NS_FLBAS_INDEX(idns->flbas)];
+
+    if (lbaf->ms) {
+        error_setg(errp, "Namespaces with metadata are not yet supported");
+        goto out;
+    }
+
+    hwsect_size = 1 << lbaf->ds;
+
+    if (hwsect_size < BDRV_SECTOR_SIZE || hwsect_size > s->page_size) {
+        error_setg(errp, "Namespace has unsupported block size (%d)",
+                hwsect_size);
+        goto out;
+    }
 
+    s->blkshift = lbaf->ds;
 out:
     qemu_vfio_dma_unmap(s->vfio, resp);
     qemu_vfree(resp);
@@ -785,8 +801,22 @@ fail:
 static int64_t nvme_getlength(BlockDriverState *bs)
 {
     BDRVNVMeState *s = bs->opaque;
+    return s->nsze << s->blkshift;
+}
 
-    return s->nsze << BDRV_SECTOR_BITS;
+static int64_t nvme_get_blocksize(BlockDriverState *bs)
+{
+    BDRVNVMeState *s = bs->opaque;
+    assert(s->blkshift >= BDRV_SECTOR_BITS);
+    return 1 << s->blkshift;
+}
+
+static int nvme_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
+{
+    int64_t blocksize = nvme_get_blocksize(bs);
+    bsz->phys = blocksize;
+    bsz->log = blocksize;
+    return 0;
 }
 
 /* Called with s->dma_map_lock */
@@ -917,13 +947,14 @@ static coroutine_fn int nvme_co_prw_aligned(BlockDriverState *bs,
     BDRVNVMeState *s = bs->opaque;
     NVMeQueuePair *ioq = s->queues[1];
     NVMeRequest *req;
-    uint32_t cdw12 = (((bytes >> BDRV_SECTOR_BITS) - 1) & 0xFFFF) |
+
+    uint32_t cdw12 = (((bytes >> s->blkshift) - 1) & 0xFFFF) |
                        (flags & BDRV_REQ_FUA ? 1 << 30 : 0);
     NvmeCmd cmd = {
         .opcode = is_write ? NVME_CMD_WRITE : NVME_CMD_READ,
         .nsid = cpu_to_le32(s->nsid),
-        .cdw10 = cpu_to_le32((offset >> BDRV_SECTOR_BITS) & 0xFFFFFFFF),
-        .cdw11 = cpu_to_le32(((offset >> BDRV_SECTOR_BITS) >> 32) & 0xFFFFFFFF),
+        .cdw10 = cpu_to_le32((offset >> s->blkshift) & 0xFFFFFFFF),
+        .cdw11 = cpu_to_le32(((offset >> s->blkshift) >> 32) & 0xFFFFFFFF),
         .cdw12 = cpu_to_le32(cdw12),
     };
     NVMeCoData data = {
@@ -1154,6 +1185,7 @@ static BlockDriver bdrv_nvme = {
     .bdrv_file_open           = nvme_file_open,
     .bdrv_close               = nvme_close,
     .bdrv_getlength           = nvme_getlength,
+    .bdrv_probe_blocksizes    = nvme_probe_blocksizes,
 
     .bdrv_co_preadv           = nvme_co_preadv,
     .bdrv_co_pwritev          = nvme_co_pwritev,
diff --git a/include/block/block.h b/include/block/block.h
index 60f00479e0..50a07c1c33 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -667,11 +667,9 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs);
  *
  * This polls @bs's AioContext until all scheduled sub-drained_ends
  * have settled.  On one hand, that may result in graph changes.  On
- * the other, this requires that all involved nodes (@bs and all of
- * its parents) are in the same AioContext, and that the caller has
- * acquired it.
- * If there are any nodes that are in different contexts from @bs,
- * these contexts must not be acquired.
+ * the other, this requires that the caller either runs in the main
+ * loop; or that all involved nodes (@bs and all of its parents) are
+ * in the caller's AioContext.
  */
 void bdrv_drained_end(BlockDriverState *bs);