summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2015-04-09 12:05:00 +0100
committerPeter Maydell <peter.maydell@linaro.org>2015-04-09 12:05:00 +0100
commita6f2cb037a82fb8679e70e175cfbc879dd829e06 (patch)
tree2474354310450cbe8857870456a83a0eb92fedc0
parentcf811fff2ae20008f00455d0ab2212a4dea0b56f (diff)
parent05b685fbabb7fdcab72cb42b27db916fd74b2265 (diff)
downloadfocaccia-qemu-a6f2cb037a82fb8679e70e175cfbc879dd829e06.tar.gz
focaccia-qemu-a6f2cb037a82fb8679e70e175cfbc879dd829e06.zip
Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
# gpg: Signature made Thu Apr  9 10:55:11 2015 BST using RSA key ID 81AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>"

* remotes/stefanha/tags/block-pull-request:
  block/iscsi: handle zero events from iscsi_which_events
  aio: strengthen memory barriers for bottom half scheduling
  virtio-blk: correctly dirty guest memory
  qcow2: Fix header update with overridden backing file

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--async.c28
-rw-r--r--block/iscsi.c33
-rw-r--r--block/qcow2.c29
-rw-r--r--block/qcow2.h6
-rw-r--r--hw/block/dataplane/virtio-blk.c3
-rw-r--r--hw/block/virtio-blk.c13
-rw-r--r--include/hw/virtio/virtio-blk.h1
-rwxr-xr-xtests/qemu-iotests/13095
-rw-r--r--tests/qemu-iotests/130.out43
-rw-r--r--tests/qemu-iotests/group1
10 files changed, 220 insertions, 32 deletions
diff --git a/async.c b/async.c
index 2be88cc9e9..2b51e87679 100644
--- a/async.c
+++ b/async.c
@@ -72,12 +72,13 @@ int aio_bh_poll(AioContext *ctx)
         /* Make sure that fetching bh happens before accessing its members */
         smp_read_barrier_depends();
         next = bh->next;
-        if (!bh->deleted && bh->scheduled) {
-            bh->scheduled = 0;
-            /* Paired with write barrier in bh schedule to ensure reading for
-             * idle & callbacks coming after bh's scheduling.
-             */
-            smp_rmb();
+        /* The atomic_xchg is paired with the one in qemu_bh_schedule.  The
+         * implicit memory barrier ensures that the callback sees all writes
+         * done by the scheduling thread.  It also ensures that the scheduling
+         * thread sees the zero before bh->cb has run, and thus will call
+         * aio_notify again if necessary.
+         */
+        if (!bh->deleted && atomic_xchg(&bh->scheduled, 0)) {
             if (!bh->idle)
                 ret = 1;
             bh->idle = 0;
@@ -108,33 +109,28 @@ int aio_bh_poll(AioContext *ctx)
 
 void qemu_bh_schedule_idle(QEMUBH *bh)
 {
-    if (bh->scheduled)
-        return;
     bh->idle = 1;
     /* Make sure that idle & any writes needed by the callback are done
      * before the locations are read in the aio_bh_poll.
      */
-    smp_wmb();
-    bh->scheduled = 1;
+    atomic_mb_set(&bh->scheduled, 1);
 }
 
 void qemu_bh_schedule(QEMUBH *bh)
 {
     AioContext *ctx;
 
-    if (bh->scheduled)
-        return;
     ctx = bh->ctx;
     bh->idle = 0;
-    /* Make sure that:
+    /* The memory barrier implicit in atomic_xchg makes sure that:
      * 1. idle & any writes needed by the callback are done before the
      *    locations are read in the aio_bh_poll.
      * 2. ctx is loaded before scheduled is set and the callback has a chance
      *    to execute.
      */
-    smp_mb();
-    bh->scheduled = 1;
-    aio_notify(ctx);
+    if (atomic_xchg(&bh->scheduled, 1) == 0) {
+        aio_notify(ctx);
+    }
 }
 
 
diff --git a/block/iscsi.c b/block/iscsi.c
index 3e34b1f3a2..ba33290000 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -56,6 +56,7 @@ typedef struct IscsiLun {
     uint64_t num_blocks;
     int events;
     QEMUTimer *nop_timer;
+    QEMUTimer *event_timer;
     uint8_t lbpme;
     uint8_t lbprz;
     uint8_t has_write_same;
@@ -95,6 +96,7 @@ typedef struct IscsiAIOCB {
 #endif
 } IscsiAIOCB;
 
+#define EVENT_INTERVAL 250
 #define NOP_INTERVAL 5000
 #define MAX_NOP_FAILURES 3
 #define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
@@ -256,21 +258,30 @@ static void
 iscsi_set_events(IscsiLun *iscsilun)
 {
     struct iscsi_context *iscsi = iscsilun->iscsi;
-    int ev;
+    int ev = iscsi_which_events(iscsi);
 
-    /* We always register a read handler.  */
-    ev = POLLIN;
-    ev |= iscsi_which_events(iscsi);
     if (ev != iscsilun->events) {
         aio_set_fd_handler(iscsilun->aio_context,
                            iscsi_get_fd(iscsi),
-                           iscsi_process_read,
+                           (ev & POLLIN) ? iscsi_process_read : NULL,
                            (ev & POLLOUT) ? iscsi_process_write : NULL,
                            iscsilun);
+        iscsilun->events = ev;
+    }
 
+    /* newer versions of libiscsi may return zero events. In this
+     * case start a timer to ensure we are able to return to service
+     * once this situation changes. */
+    if (!ev) {
+        timer_mod(iscsilun->event_timer,
+                  qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
     }
+}
 
-    iscsilun->events = ev;
+static void iscsi_timed_set_events(void *opaque)
+{
+    IscsiLun *iscsilun = opaque;
+    iscsi_set_events(iscsilun);
 }
 
 static void
@@ -1214,6 +1225,11 @@ static void iscsi_detach_aio_context(BlockDriverState *bs)
         timer_free(iscsilun->nop_timer);
         iscsilun->nop_timer = NULL;
     }
+    if (iscsilun->event_timer) {
+        timer_del(iscsilun->event_timer);
+        timer_free(iscsilun->event_timer);
+        iscsilun->event_timer = NULL;
+    }
 }
 
 static void iscsi_attach_aio_context(BlockDriverState *bs,
@@ -1230,6 +1246,11 @@ static void iscsi_attach_aio_context(BlockDriverState *bs,
                                         iscsi_nop_timed_event, iscsilun);
     timer_mod(iscsilun->nop_timer,
               qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
+
+    /* Prepare a timer for a delayed call to iscsi_set_events */
+    iscsilun->event_timer = aio_timer_new(iscsilun->aio_context,
+                                          QEMU_CLOCK_REALTIME, SCALE_MS,
+                                          iscsi_timed_set_events, iscsilun);
 }
 
 static bool iscsi_is_write_protected(IscsiLun *iscsilun)
diff --git a/block/qcow2.c b/block/qcow2.c
index 32bdf756ca..316a8db22b 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -140,6 +140,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
                 return 3;
             }
             bs->backing_format[ext.len] = '\0';
+            s->image_backing_format = g_strdup(bs->backing_format);
 #ifdef DEBUG_EXT
             printf("Qcow2: Got format extension %s\n", bs->backing_format);
 #endif
@@ -884,6 +885,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
             goto fail;
         }
         bs->backing_file[len] = '\0';
+        s->image_backing_file = g_strdup(bs->backing_file);
     }
 
     /* Internal snapshots */
@@ -1457,6 +1459,9 @@ static void qcow2_close(BlockDriverState *bs)
     g_free(s->unknown_header_fields);
     cleanup_unknown_header_ext(bs);
 
+    g_free(s->image_backing_file);
+    g_free(s->image_backing_format);
+
     g_free(s->cluster_cache);
     qemu_vfree(s->cluster_data);
     qcow2_refcount_close(bs);
@@ -1622,9 +1627,10 @@ int qcow2_update_header(BlockDriverState *bs)
     }
 
     /* Backing file format header extension */
-    if (*bs->backing_format) {
+    if (s->image_backing_format) {
         ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT,
-                             bs->backing_format, strlen(bs->backing_format),
+                             s->image_backing_format,
+                             strlen(s->image_backing_format),
                              buflen);
         if (ret < 0) {
             goto fail;
@@ -1682,8 +1688,8 @@ int qcow2_update_header(BlockDriverState *bs)
     buflen -= ret;
 
     /* Backing file name */
-    if (*bs->backing_file) {
-        size_t backing_file_len = strlen(bs->backing_file);
+    if (s->image_backing_file) {
+        size_t backing_file_len = strlen(s->image_backing_file);
 
         if (buflen < backing_file_len) {
             ret = -ENOSPC;
@@ -1691,7 +1697,7 @@ int qcow2_update_header(BlockDriverState *bs)
         }
 
         /* Using strncpy is ok here, since buf is not NUL-terminated. */
-        strncpy(buf, bs->backing_file, buflen);
+        strncpy(buf, s->image_backing_file, buflen);
 
         header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
         header->backing_file_size   = cpu_to_be32(backing_file_len);
@@ -1712,9 +1718,17 @@ fail:
 static int qcow2_change_backing_file(BlockDriverState *bs,
     const char *backing_file, const char *backing_fmt)
 {
+    BDRVQcowState *s = bs->opaque;
+
     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
     pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
 
+    g_free(s->image_backing_file);
+    g_free(s->image_backing_format);
+
+    s->image_backing_file = backing_file ? g_strdup(bs->backing_file) : NULL;
+    s->image_backing_format = backing_fmt ? g_strdup(bs->backing_format) : NULL;
+
     return qcow2_update_header(bs);
 }
 
@@ -2751,8 +2765,9 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
     }
 
     if (backing_file || backing_format) {
-        ret = qcow2_change_backing_file(bs, backing_file ?: bs->backing_file,
-                                        backing_format ?: bs->backing_format);
+        ret = qcow2_change_backing_file(bs,
+                    backing_file ?: s->image_backing_file,
+                    backing_format ?: s->image_backing_format);
         if (ret < 0) {
             return ret;
         }
diff --git a/block/qcow2.h b/block/qcow2.h
index aa6d367818..422b825138 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -283,6 +283,12 @@ typedef struct BDRVQcowState {
     QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext;
     QTAILQ_HEAD (, Qcow2DiscardRegion) discards;
     bool cache_discards;
+
+    /* Backing file path and format as stored in the image (this is not the
+     * effective path/format, which may be the result of a runtime option
+     * override) */
+    char *image_backing_file;
+    char *image_backing_format;
 } BDRVQcowState;
 
 struct QCowAIOCB;
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index cd41478b08..3db139b8a4 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -77,8 +77,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
     VirtIOBlockDataPlane *s = req->dev->dataplane;
     stb_p(&req->in->status, status);
 
-    vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem,
-               req->qiov.size + sizeof(*req->in));
+    vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, req->in_len);
 
     /* Suppress notification to guest by BH and its scheduled
      * flag because requests are completed as a batch after io
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 000c38d2a1..9546fd2919 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -33,6 +33,7 @@ VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
     VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
     req->dev = s;
     req->qiov.size = 0;
+    req->in_len = 0;
     req->next = NULL;
     req->mr_next = NULL;
     return req;
@@ -54,7 +55,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req,
     trace_virtio_blk_req_complete(req, status);
 
     stb_p(&req->in->status, status);
-    virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
+    virtqueue_push(s->vq, &req->elem, req->in_len);
     virtio_notify(vdev, s->vq);
 }
 
@@ -102,6 +103,14 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
         if (ret) {
             int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
             bool is_read = !(p & VIRTIO_BLK_T_OUT);
+            /* Note that memory may be dirtied on read failure.  If the
+             * virtio request is not completed here, as is the case for
+             * BLOCK_ERROR_ACTION_STOP, the memory may not be copied
+             * correctly during live migration.  While this is ugly,
+             * it is acceptable because the device is free to write to
+             * the memory until the request is completed (which will
+             * happen on the other side of the migration).
+             */
             if (virtio_blk_handle_rw_error(req, -ret, is_read)) {
                 continue;
             }
@@ -496,6 +505,8 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
         exit(1);
     }
 
+    /* We always touch the last byte, so just see how big in_iov is.  */
+    req->in_len = iov_size(in_iov, in_num);
     req->in = (void *)in_iov[in_num - 1].iov_base
               + in_iov[in_num - 1].iov_len
               - sizeof(struct virtio_blk_inhdr);
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index b3ffcd96b8..6bf5905c52 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -67,6 +67,7 @@ typedef struct VirtIOBlockReq {
     struct virtio_blk_inhdr *in;
     struct virtio_blk_outhdr out;
     QEMUIOVector qiov;
+    size_t in_len;
     struct VirtIOBlockReq *next;
     struct VirtIOBlockReq *mr_next;
     BlockAcctCookie acct;
diff --git a/tests/qemu-iotests/130 b/tests/qemu-iotests/130
new file mode 100755
index 0000000000..bc26247e3f
--- /dev/null
+++ b/tests/qemu-iotests/130
@@ -0,0 +1,95 @@
+#!/bin/bash
+#
+# Test that temporary backing file overrides (on the command line or in
+# blockdev-add) don't replace the original path stored in the image during
+# header updates.
+#
+# Copyright (C) 2015 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=kwolf@redhat.com
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+here="$PWD"
+tmp=/tmp/$$
+status=1	# failure is the default!
+
+_cleanup()
+{
+    _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+. ./common.qemu
+
+_supported_fmt qcow2
+_supported_proto generic
+_supported_os Linux
+
+qemu_comm_method="monitor"
+
+
+TEST_IMG="$TEST_IMG.orig" _make_test_img 64M
+TEST_IMG="$TEST_IMG.base" _make_test_img 64M
+_make_test_img 64M
+_img_info | _filter_img_info
+
+echo
+echo "=== HMP commit ==="
+echo
+# bdrv_make_empty() involves a header update for qcow2
+
+# Test that a backing file isn't written
+_launch_qemu -drive file="$TEST_IMG",backing.file.filename="$TEST_IMG.base"
+_send_qemu_cmd $QEMU_HANDLE "commit ide0-hd0" "(qemu)"
+_send_qemu_cmd $QEMU_HANDLE '' '(qemu)'
+_cleanup_qemu
+_img_info | _filter_img_info
+
+# Make sure that if there was a backing file that was just overridden on the
+# command line, that backing file is retained, with the right format
+_make_test_img -F raw -b "$TEST_IMG.orig" 64M
+_launch_qemu -drive file="$TEST_IMG",backing.file.filename="$TEST_IMG.base",backing.driver=$IMGFMT
+_send_qemu_cmd $QEMU_HANDLE "commit ide0-hd0" "(qemu)"
+_send_qemu_cmd $QEMU_HANDLE '' '(qemu)'
+_cleanup_qemu
+_img_info | _filter_img_info
+
+echo
+echo "=== Marking image dirty (lazy refcounts) ==="
+echo
+
+# Test that a backing file isn't written
+_make_test_img 64M
+$QEMU_IO -c "open -o backing.file.filename=$TEST_IMG.base,lazy-refcounts=on $TEST_IMG" -c "write 0 4k" | _filter_qemu_io
+_img_info | _filter_img_info
+
+# Make sure that if there was a backing file that was just overridden on the
+# command line, that backing file is retained, with the right format
+_make_test_img -F raw -b "$TEST_IMG.orig" 64M
+$QEMU_IO -c "open -o backing.file.filename=$TEST_IMG.base,backing.driver=$IMGFMT,lazy-refcounts=on $TEST_IMG" -c "write 0 4k" | _filter_qemu_io
+_img_info | _filter_img_info
+
+# success, all done
+echo '*** done'
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/130.out b/tests/qemu-iotests/130.out
new file mode 100644
index 0000000000..ea68b5d283
--- /dev/null
+++ b/tests/qemu-iotests/130.out
@@ -0,0 +1,43 @@
+QA output created by 130
+Formatting 'TEST_DIR/t.IMGFMT.orig', fmt=IMGFMT size=67108864
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+image: TEST_DIR/t.IMGFMT
+file format: IMGFMT
+virtual size: 64M (67108864 bytes)
+
+=== HMP commit ===
+
+QEMU X.Y.Z monitor - type 'help' for more information
+(qemu) ccocomcommcommicommitcommit commit icommit idcommit idecommit ide0commit ide0-commit ide0-hcommit ide0-hdcommit ide0-hd0
+(qemu) 
+image: TEST_DIR/t.IMGFMT
+file format: IMGFMT
+virtual size: 64M (67108864 bytes)
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.orig' backing_fmt='raw'
+QEMU X.Y.Z monitor - type 'help' for more information
+(qemu) ccocomcommcommicommitcommit commit icommit idcommit idecommit ide0commit ide0-commit ide0-hcommit ide0-hdcommit ide0-hd0
+(qemu) 
+image: TEST_DIR/t.IMGFMT
+file format: IMGFMT
+virtual size: 64M (67108864 bytes)
+backing file: TEST_DIR/t.IMGFMT.orig
+backing file format: raw
+
+=== Marking image dirty (lazy refcounts) ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+wrote 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+image: TEST_DIR/t.IMGFMT
+file format: IMGFMT
+virtual size: 64M (67108864 bytes)
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.orig' backing_fmt='raw'
+wrote 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+image: TEST_DIR/t.IMGFMT
+file format: IMGFMT
+virtual size: 64M (67108864 bytes)
+backing file: TEST_DIR/t.IMGFMT.orig
+backing file format: raw
+*** done
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 62621278e2..bcf25786ab 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -124,3 +124,4 @@
 121 rw auto
 123 rw auto quick
 128 rw auto quick
+130 rw auto quick