summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2015-01-16 10:16:14 +0000
committerPeter Maydell <peter.maydell@linaro.org>2015-01-16 10:16:14 +0000
commite68cba36360a2ab5bf0576b66df4d0eb0d822f8d (patch)
tree933ecff5cc8e380fe38693dc0cca2d7b8e479dfc
parentdf58887b20fab8fe8a6dcca4db30cd4e4077d53a (diff)
parentea987c2c21d4326bb58ee28f6888fdcf8fbda067 (diff)
downloadfocaccia-qemu-e68cba36360a2ab5bf0576b66df4d0eb0d822f8d.tar.gz
focaccia-qemu-e68cba36360a2ab5bf0576b66df4d0eb0d822f8d.zip
Merge remote-tracking branch 'remotes/amit-migration/tags/mig-2.3-1' into staging
A set of patches collected over the holidays.  Mix of optimizations and
fixes.

# gpg: Signature made Fri 16 Jan 2015 07:42:00 GMT using RSA key ID 854083B6
# gpg: Good signature from "Amit Shah <amit@amitshah.net>"
# gpg:                 aka "Amit Shah <amit@kernel.org>"
# gpg:                 aka "Amit Shah <amitshah@gmx.net>"

* remotes/amit-migration/tags/mig-2.3-1:
  vmstate: type-check sub-arrays
  migration_cancel: shutdown migration socket
  Handle bi-directional communication for fd migration
  socket shutdown
  Tests: QEMUSizedBuffer/QEMUBuffer
  QEMUSizedBuffer: only free qsb that qemu_bufopen allocated
  xbzrle: rebuild the cache_is_cached function
  xbzrle: optimize XBZRLE to decrease the cache misses

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--arch_init.c8
-rw-r--r--docs/xbzrle.txt8
-rw-r--r--include/migration/page_cache.h10
-rw-r--r--include/migration/qemu-file.h10
-rw-r--r--include/migration/vmstate.h2
-rw-r--r--include/qemu/sockets.h7
-rw-r--r--migration/fd.c24
-rw-r--r--migration/migration.c12
-rw-r--r--migration/qemu-file-buf.c10
-rw-r--r--migration/qemu-file-unix.c23
-rw-r--r--migration/qemu-file.c12
-rw-r--r--page_cache.c43
-rw-r--r--tests/test-vmstate.c20
13 files changed, 143 insertions, 46 deletions
diff --git a/arch_init.c b/arch_init.c
index cfedbf08af..89c8fa46bb 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -346,7 +346,8 @@ static void xbzrle_cache_zero_page(ram_addr_t current_addr)
 
     /* We don't care if this fails to allocate a new cache page
      * as long as it updated an old one */
-    cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE);
+    cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
+                 bitmap_sync_count);
 }
 
 #define ENCODING_FLAG_XBZRLE 0x1
@@ -358,10 +359,11 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
     int encoded_len = 0, bytes_sent = -1;
     uint8_t *prev_cached_page;
 
-    if (!cache_is_cached(XBZRLE.cache, current_addr)) {
+    if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) {
         acct_info.xbzrle_cache_miss++;
         if (!last_stage) {
-            if (cache_insert(XBZRLE.cache, current_addr, *current_data) == -1) {
+            if (cache_insert(XBZRLE.cache, current_addr, *current_data,
+                             bitmap_sync_count) == -1) {
                 return -1;
             } else {
                 /* update *current_data when the page has been
diff --git a/docs/xbzrle.txt b/docs/xbzrle.txt
index cc3a26a91d..52c8511a4c 100644
--- a/docs/xbzrle.txt
+++ b/docs/xbzrle.txt
@@ -71,6 +71,14 @@ encoded buffer:
 encoded length 24
 e9 07 0f 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 03 01 67 01 01 69
 
+Cache update strategy
+=====================
+Keeping the hot pages in the cache is effective for decreased cache
+misses. XBZRLE uses a counter as the age of each page. The counter will
+increase after each ram dirty bitmap sync. When a cache conflict is
+detected, XBZRLE will only evict pages in the cache that are older than
+a threshold.
+
 Usage
 ======================
 1. Verify the destination QEMU version is able to decode the new format.
diff --git a/include/migration/page_cache.h b/include/migration/page_cache.h
index 2d5ce2dd7a..10ed53274c 100644
--- a/include/migration/page_cache.h
+++ b/include/migration/page_cache.h
@@ -43,8 +43,10 @@ void cache_fini(PageCache *cache);
  *
  * @cache pointer to the PageCache struct
  * @addr: page addr
+ * @current_age: current bitmap generation
  */
-bool cache_is_cached(const PageCache *cache, uint64_t addr);
+bool cache_is_cached(const PageCache *cache, uint64_t addr,
+                     uint64_t current_age);
 
 /**
  * get_cached_data: Get the data cached for an addr
@@ -60,13 +62,15 @@ uint8_t *get_cached_data(const PageCache *cache, uint64_t addr);
  * cache_insert: insert the page into the cache. the page cache
  * will dup the data on insert. the previous value will be overwritten
  *
- * Returns -1 on error
+ * Returns -1 when the page isn't inserted into cache
  *
  * @cache pointer to the PageCache struct
  * @addr: page address
  * @pdata: pointer to the page
+ * @current_age: current bitmap generation
  */
-int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata);
+int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata,
+                 uint64_t current_age);
 
 /**
  * cache_resize: resize the page cache. In case of size reduction the extra
diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
index 401676bf4d..d843c0010c 100644
--- a/include/migration/qemu-file.h
+++ b/include/migration/qemu-file.h
@@ -84,6 +84,14 @@ typedef size_t (QEMURamSaveFunc)(QEMUFile *f, void *opaque,
                                size_t size,
                                int *bytes_sent);
 
+/*
+ * Stop any read or write (depending on flags) on the underlying
+ * transport on the QEMUFile.
+ * Existing blocking reads/writes must be woken
+ * Returns 0 on success, -err on error
+ */
+typedef int (QEMUFileShutdownFunc)(void *opaque, bool rd, bool wr);
+
 typedef struct QEMUFileOps {
     QEMUFilePutBufferFunc *put_buffer;
     QEMUFileGetBufferFunc *get_buffer;
@@ -94,6 +102,7 @@ typedef struct QEMUFileOps {
     QEMURamHookFunc *after_ram_iterate;
     QEMURamHookFunc *hook_ram_load;
     QEMURamSaveFunc *save_page;
+    QEMUFileShutdownFunc *shut_down;
 } QEMUFileOps;
 
 struct QEMUSizedBuffer {
@@ -177,6 +186,7 @@ void qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate);
 int64_t qemu_file_get_rate_limit(QEMUFile *f);
 int qemu_file_get_error(QEMUFile *f);
 void qemu_file_set_error(QEMUFile *f, int ret);
+int qemu_file_shutdown(QEMUFile *f);
 void qemu_fflush(QEMUFile *f);
 
 static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv)
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index e45fc49cb1..d712a651ca 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -189,7 +189,7 @@ extern const VMStateInfo vmstate_info_bitmap;
      type_check_2darray(_type, typeof_field(_state, _field), _n1, _n2))
 
 #define vmstate_offset_sub_array(_state, _field, _type, _start)      \
-    (offsetof(_state, _field[_start]))
+    vmstate_offset_value(_state, _field[_start], _type)
 
 #define vmstate_offset_buffer(_state, _field)                        \
     vmstate_offset_array(_state, _field, uint8_t,                    \
diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h
index f47dae614a..7992ece72a 100644
--- a/include/qemu/sockets.h
+++ b/include/qemu/sockets.h
@@ -44,6 +44,13 @@ int socket_set_fast_reuse(int fd);
 int send_all(int fd, const void *buf, int len1);
 int recv_all(int fd, void *buf, int len1, bool single_read);
 
+#ifdef WIN32
+/* Windows has different names for the same constants with the same values */
+#define SHUT_RD   0
+#define SHUT_WR   1
+#define SHUT_RDWR 2
+#endif
+
 /* callback function for nonblocking connect
  * valid fd on success, negative error code on failure
  */
diff --git a/migration/fd.c b/migration/fd.c
index d2e523af74..129da9910b 100644
--- a/migration/fd.c
+++ b/migration/fd.c
@@ -31,13 +31,29 @@
     do { } while (0)
 #endif
 
+static bool fd_is_socket(int fd)
+{
+    struct stat stat;
+    int ret = fstat(fd, &stat);
+    if (ret == -1) {
+        /* When in doubt say no */
+        return false;
+    }
+    return S_ISSOCK(stat.st_mode);
+}
+
 void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error **errp)
 {
     int fd = monitor_get_fd(cur_mon, fdname, errp);
     if (fd == -1) {
         return;
     }
-    s->file = qemu_fdopen(fd, "wb");
+
+    if (fd_is_socket(fd)) {
+        s->file = qemu_fopen_socket(fd, "wb");
+    } else {
+        s->file = qemu_fdopen(fd, "wb");
+    }
 
     migrate_fd_connect(s);
 }
@@ -58,7 +74,11 @@ void fd_start_incoming_migration(const char *infd, Error **errp)
     DPRINTF("Attempting to start an incoming migration via fd\n");
 
     fd = strtol(infd, NULL, 0);
-    f = qemu_fdopen(fd, "rb");
+    if (fd_is_socket(fd)) {
+        f = qemu_fopen_socket(fd, "rb");
+    } else {
+        f = qemu_fdopen(fd, "rb");
+    }
     if(f == NULL) {
         error_setg_errno(errp, errno, "failed to open the source descriptor");
         return;
diff --git a/migration/migration.c b/migration/migration.c
index c49a05a165..b3adbc653a 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -330,6 +330,7 @@ void migrate_fd_error(MigrationState *s)
 static void migrate_fd_cancel(MigrationState *s)
 {
     int old_state ;
+    QEMUFile *f = migrate_get_current()->file;
     trace_migrate_fd_cancel();
 
     do {
@@ -339,6 +340,17 @@ static void migrate_fd_cancel(MigrationState *s)
         }
         migrate_set_state(s, old_state, MIG_STATE_CANCELLING);
     } while (s->state != MIG_STATE_CANCELLING);
+
+    /*
+     * If we're unlucky the migration code might be stuck somewhere in a
+     * send/write while the network has failed and is waiting to timeout;
+     * if we've got shutdown(2) available then we can force it to quit.
+     * The outgoing qemu file gets closed in migrate_fd_cleanup that is
+     * called in a bh, so there is no race against this cancel.
+     */
+    if (s->state == MIG_STATE_CANCELLING && f) {
+        qemu_file_shutdown(f);
+    }
 }
 
 void add_migration_state_change_notifier(Notifier *notify)
diff --git a/migration/qemu-file-buf.c b/migration/qemu-file-buf.c
index d33dd44747..e97e0bd655 100644
--- a/migration/qemu-file-buf.c
+++ b/migration/qemu-file-buf.c
@@ -395,6 +395,7 @@ QEMUSizedBuffer *qsb_clone(const QEMUSizedBuffer *qsb)
 typedef struct QEMUBuffer {
     QEMUSizedBuffer *qsb;
     QEMUFile *file;
+    bool qsb_allocated;
 } QEMUBuffer;
 
 static int buf_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
@@ -424,7 +425,9 @@ static int buf_close(void *opaque)
 {
     QEMUBuffer *s = opaque;
 
-    qsb_free(s->qsb);
+    if (s->qsb_allocated) {
+        qsb_free(s->qsb);
+    }
 
     g_free(s);
 
@@ -463,12 +466,11 @@ QEMUFile *qemu_bufopen(const char *mode, QEMUSizedBuffer *input)
     }
 
     s = g_malloc0(sizeof(QEMUBuffer));
-    if (mode[0] == 'r') {
-        s->qsb = input;
-    }
+    s->qsb = input;
 
     if (s->qsb == NULL) {
         s->qsb = qsb_create(NULL, 0);
+        s->qsb_allocated = true;
     }
     if (!s->qsb) {
         g_free(s);
diff --git a/migration/qemu-file-unix.c b/migration/qemu-file-unix.c
index 9682396d97..bfbc0861ab 100644
--- a/migration/qemu-file-unix.c
+++ b/migration/qemu-file-unix.c
@@ -26,6 +26,7 @@
 #include "qemu/sockets.h"
 #include "block/coroutine.h"
 #include "migration/qemu-file.h"
+#include "migration/qemu-file-internal.h"
 
 typedef struct QEMUFileSocket {
     int fd;
@@ -84,6 +85,17 @@ static int socket_close(void *opaque)
     return 0;
 }
 
+static int socket_shutdown(void *opaque, bool rd, bool wr)
+{
+    QEMUFileSocket *s = opaque;
+
+    if (shutdown(s->fd, rd ? (wr ? SHUT_RDWR : SHUT_RD) : SHUT_WR)) {
+        return -errno;
+    } else {
+        return 0;
+    }
+}
+
 static ssize_t unix_writev_buffer(void *opaque, struct iovec *iov, int iovcnt,
                                   int64_t pos)
 {
@@ -192,15 +204,18 @@ QEMUFile *qemu_fdopen(int fd, const char *mode)
 }
 
 static const QEMUFileOps socket_read_ops = {
-    .get_fd =     socket_get_fd,
+    .get_fd     = socket_get_fd,
     .get_buffer = socket_get_buffer,
-    .close =      socket_close
+    .close      = socket_close,
+    .shut_down  = socket_shutdown
+
 };
 
 static const QEMUFileOps socket_write_ops = {
-    .get_fd =     socket_get_fd,
+    .get_fd        = socket_get_fd,
     .writev_buffer = socket_writev_buffer,
-    .close =      socket_close
+    .close         = socket_close,
+    .shut_down     = socket_shutdown
 };
 
 QEMUFile *qemu_fopen_socket(int fd, const char *mode)
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index a7f2a34430..edc283073a 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -30,6 +30,18 @@
 #include "migration/qemu-file-internal.h"
 #include "trace.h"
 
+/*
+ * Stop a file from being read/written - not all backing files can do this
+ * typically only sockets can.
+ */
+int qemu_file_shutdown(QEMUFile *f)
+{
+    if (!f->ops->shut_down) {
+        return -ENOSYS;
+    }
+    return f->ops->shut_down(f->opaque, true, true);
+}
+
 bool qemu_file_mode_is_not_valid(const char *mode)
 {
     if (mode == NULL ||
diff --git a/page_cache.c b/page_cache.c
index 89bb1ec3a0..cf8878d1d7 100644
--- a/page_cache.c
+++ b/page_cache.c
@@ -33,6 +33,9 @@
     do { } while (0)
 #endif
 
+/* the page in cache will not be replaced in two cycles */
+#define CACHED_PAGE_LIFETIME 2
+
 typedef struct CacheItem CacheItem;
 
 struct CacheItem {
@@ -122,18 +125,6 @@ static size_t cache_get_cache_pos(const PageCache *cache,
     return pos;
 }
 
-bool cache_is_cached(const PageCache *cache, uint64_t addr)
-{
-    size_t pos;
-
-    g_assert(cache);
-    g_assert(cache->page_cache);
-
-    pos = cache_get_cache_pos(cache, addr);
-
-    return (cache->page_cache[pos].it_addr == addr);
-}
-
 static CacheItem *cache_get_by_addr(const PageCache *cache, uint64_t addr)
 {
     size_t pos;
@@ -151,17 +142,35 @@ uint8_t *get_cached_data(const PageCache *cache, uint64_t addr)
     return cache_get_by_addr(cache, addr)->it_data;
 }
 
-int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata)
+bool cache_is_cached(const PageCache *cache, uint64_t addr,
+                     uint64_t current_age)
 {
+    CacheItem *it;
 
-    CacheItem *it = NULL;
+    it = cache_get_by_addr(cache, addr);
 
-    g_assert(cache);
-    g_assert(cache->page_cache);
+    if (it->it_addr == addr) {
+        /* update the it_age when the cache hit */
+        it->it_age = current_age;
+        return true;
+    }
+    return false;
+}
+
+int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata,
+                 uint64_t current_age)
+{
+
+    CacheItem *it;
 
     /* actual update of entry */
     it = cache_get_by_addr(cache, addr);
 
+    if (it->it_data && it->it_addr != addr &&
+        it->it_age + CACHED_PAGE_LIFETIME > current_age) {
+        /* the cache page is fresh, don't replace it */
+        return -1;
+    }
     /* allocate page */
     if (!it->it_data) {
         it->it_data = g_try_malloc(cache->page_size);
@@ -174,7 +183,7 @@ int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata)
 
     memcpy(it->it_data, pdata, cache->page_size);
 
-    it->it_age = ++cache->max_item_age;
+    it->it_age = current_age;
     it->it_addr = addr;
 
     return 0;
diff --git a/tests/test-vmstate.c b/tests/test-vmstate.c
index 5e0fd13cc4..39b7b01734 100644
--- a/tests/test-vmstate.c
+++ b/tests/test-vmstate.c
@@ -60,16 +60,6 @@ static QEMUFile *open_test_file(bool write)
     return qemu_fdopen(fd, write ? "wb" : "rb");
 }
 
-/* Open a read-only qemu-file from an existing memory block */
-static QEMUFile *open_mem_file_read(const void *data, size_t len)
-{
-    /* The qsb gets freed by qemu_fclose */
-    QEMUSizedBuffer *qsb = qsb_create(data, len);
-    g_assert(qsb);
-
-    return qemu_bufopen("r", qsb);
-}
-
 /*
  * Check that the contents of the memory-buffered file f match
  * the given size/data.
@@ -450,7 +440,9 @@ static void test_load_noskip(void)
         QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */
     };
 
-    QEMUFile *loading = open_mem_file_read(buf, sizeof(buf));
+    QEMUSizedBuffer *qsb = qsb_create(buf, sizeof(buf));
+    g_assert(qsb);
+    QEMUFile *loading = qemu_bufopen("r", qsb);
     TestStruct obj = { .skip_c_e = false };
     vmstate_load_state(loading, &vmstate_skipping, &obj, 2);
     g_assert(!qemu_file_get_error(loading));
@@ -461,6 +453,7 @@ static void test_load_noskip(void)
     g_assert_cmpint(obj.e, ==, 50);
     g_assert_cmpint(obj.f, ==, 60);
     qemu_fclose(loading);
+    qsb_free(qsb);
 }
 
 static void test_load_skip(void)
@@ -473,7 +466,9 @@ static void test_load_skip(void)
         QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */
     };
 
-    QEMUFile *loading = open_mem_file_read(buf, sizeof(buf));
+    QEMUSizedBuffer *qsb = qsb_create(buf, sizeof(buf));
+    g_assert(qsb);
+    QEMUFile *loading = qemu_bufopen("r", qsb);
     TestStruct obj = { .skip_c_e = true, .c = 300, .e = 500 };
     vmstate_load_state(loading, &vmstate_skipping, &obj, 2);
     g_assert(!qemu_file_get_error(loading));
@@ -484,6 +479,7 @@ static void test_load_skip(void)
     g_assert_cmpint(obj.e, ==, 500);
     g_assert_cmpint(obj.f, ==, 60);
     qemu_fclose(loading);
+    qsb_free(qsb);
 }
 
 int main(int argc, char **argv)