summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--block/backup.c118
-rw-r--r--block/curl.c24
-rw-r--r--block/dirty-bitmap.c5
-rw-r--r--block/sheepdog.c166
-rw-r--r--blockjob.c30
-rw-r--r--gdbstub.c113
-rw-r--r--hw/core/qdev-properties-system.c64
-rw-r--r--hw/tpm/Makefile.objs5
-rw-r--r--hw/tpm/tpm_util.c67
-rw-r--r--hw/tpm/tpm_util.h3
-rw-r--r--include/block/dirty-bitmap.h1
-rw-r--r--include/hw/qdev-properties.h2
-rw-r--r--include/qemu/hbitmap.h8
-rw-r--r--linux-user/main.c19
-rw-r--r--target/sh4/cpu.h4
-rw-r--r--target/sh4/helper.c1
-rw-r--r--target/sh4/translate.c273
-rw-r--r--tests/test-hbitmap.c61
-rw-r--r--trace-events28
-rw-r--r--trace/ftrace.c33
-rw-r--r--util/hbitmap.c39
21 files changed, 683 insertions, 381 deletions
diff --git a/block/backup.c b/block/backup.c
index 99e6bcc748..4a16a37229 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -40,11 +40,12 @@ typedef struct BackupBlockJob {
     BlockdevOnError on_target_error;
     CoRwlock flush_rwlock;
     uint64_t bytes_read;
-    unsigned long *done_bitmap;
     int64_t cluster_size;
     bool compress;
     NotifierWithReturn before_write;
     QLIST_HEAD(, CowRequest) inflight_reqs;
+
+    HBitmap *copy_bitmap;
 } BackupBlockJob;
 
 /* See if in-flight requests overlap and wait for them to complete */
@@ -109,10 +110,11 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
     cow_request_begin(&cow_request, job, start, end);
 
     for (; start < end; start += job->cluster_size) {
-        if (test_bit(start / job->cluster_size, job->done_bitmap)) {
+        if (!hbitmap_get(job->copy_bitmap, start / job->cluster_size)) {
             trace_backup_do_cow_skip(job, start);
             continue; /* already copied */
         }
+        hbitmap_reset(job->copy_bitmap, start / job->cluster_size, 1);
 
         trace_backup_do_cow_process(job, start);
 
@@ -132,6 +134,7 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
             if (error_is_read) {
                 *error_is_read = true;
             }
+            hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
             goto out;
         }
 
@@ -148,11 +151,10 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
             if (error_is_read) {
                 *error_is_read = false;
             }
+            hbitmap_set(job->copy_bitmap, start / job->cluster_size, 1);
             goto out;
         }
 
-        set_bit(start / job->cluster_size, job->done_bitmap);
-
         /* Publish progress, guest I/O counts as progress too.  Note that the
          * offset field is an opaque progress value, it is not a disk offset.
          */
@@ -260,7 +262,7 @@ void backup_do_checkpoint(BlockJob *job, Error **errp)
     }
 
     len = DIV_ROUND_UP(backup_job->common.len, backup_job->cluster_size);
-    bitmap_zero(backup_job->done_bitmap, len);
+    hbitmap_set(backup_job->copy_bitmap, 0, len);
 }
 
 void backup_wait_for_overlapping_requests(BlockJob *job, int64_t offset,
@@ -360,64 +362,68 @@ static bool coroutine_fn yield_and_check(BackupBlockJob *job)
 
 static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
 {
+    int ret;
     bool error_is_read;
-    int ret = 0;
-    int clusters_per_iter;
-    uint32_t granularity;
-    int64_t offset;
     int64_t cluster;
-    int64_t end;
-    int64_t last_cluster = -1;
-    BdrvDirtyBitmapIter *dbi;
+    HBitmapIter hbi;
 
-    granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap);
-    clusters_per_iter = MAX((granularity / job->cluster_size), 1);
-    dbi = bdrv_dirty_iter_new(job->sync_bitmap);
+    hbitmap_iter_init(&hbi, job->copy_bitmap, 0);
+    while ((cluster = hbitmap_iter_next(&hbi)) != -1) {
+        do {
+            if (yield_and_check(job)) {
+                return 0;
+            }
+            ret = backup_do_cow(job, cluster * job->cluster_size,
+                                job->cluster_size, &error_is_read, false);
+            if (ret < 0 && backup_error_action(job, error_is_read, -ret) ==
+                           BLOCK_ERROR_ACTION_REPORT)
+            {
+                return ret;
+            }
+        } while (ret < 0);
+    }
+
+    return 0;
+}
 
-    /* Find the next dirty sector(s) */
-    while ((offset = bdrv_dirty_iter_next(dbi)) >= 0) {
-        cluster = offset / job->cluster_size;
+/* init copy_bitmap from sync_bitmap */
+static void backup_incremental_init_copy_bitmap(BackupBlockJob *job)
+{
+    BdrvDirtyBitmapIter *dbi;
+    int64_t offset;
+    int64_t end = DIV_ROUND_UP(bdrv_dirty_bitmap_size(job->sync_bitmap),
+                               job->cluster_size);
 
-        /* Fake progress updates for any clusters we skipped */
-        if (cluster != last_cluster + 1) {
-            job->common.offset += ((cluster - last_cluster - 1) *
-                                   job->cluster_size);
+    dbi = bdrv_dirty_iter_new(job->sync_bitmap);
+    while ((offset = bdrv_dirty_iter_next(dbi)) != -1) {
+        int64_t cluster = offset / job->cluster_size;
+        int64_t next_cluster;
+
+        offset += bdrv_dirty_bitmap_granularity(job->sync_bitmap);
+        if (offset >= bdrv_dirty_bitmap_size(job->sync_bitmap)) {
+            hbitmap_set(job->copy_bitmap, cluster, end - cluster);
+            break;
         }
 
-        for (end = cluster + clusters_per_iter; cluster < end; cluster++) {
-            do {
-                if (yield_and_check(job)) {
-                    goto out;
-                }
-                ret = backup_do_cow(job, cluster * job->cluster_size,
-                                    job->cluster_size, &error_is_read,
-                                    false);
-                if ((ret < 0) &&
-                    backup_error_action(job, error_is_read, -ret) ==
-                    BLOCK_ERROR_ACTION_REPORT) {
-                    goto out;
-                }
-            } while (ret < 0);
+        offset = bdrv_dirty_bitmap_next_zero(job->sync_bitmap, offset);
+        if (offset == -1) {
+            hbitmap_set(job->copy_bitmap, cluster, end - cluster);
+            break;
         }
 
-        /* If the bitmap granularity is smaller than the backup granularity,
-         * we need to advance the iterator pointer to the next cluster. */
-        if (granularity < job->cluster_size) {
-            bdrv_set_dirty_iter(dbi, cluster * job->cluster_size);
+        next_cluster = DIV_ROUND_UP(offset, job->cluster_size);
+        hbitmap_set(job->copy_bitmap, cluster, next_cluster - cluster);
+        if (next_cluster >= end) {
+            break;
         }
 
-        last_cluster = cluster - 1;
+        bdrv_set_dirty_iter(dbi, next_cluster * job->cluster_size);
     }
 
-    /* Play some final catchup with the progress meter */
-    end = DIV_ROUND_UP(job->common.len, job->cluster_size);
-    if (last_cluster + 1 < end) {
-        job->common.offset += ((end - last_cluster - 1) * job->cluster_size);
-    }
+    job->common.offset = job->common.len -
+                         hbitmap_count(job->copy_bitmap) * job->cluster_size;
 
-out:
     bdrv_dirty_iter_free(dbi);
-    return ret;
 }
 
 static void coroutine_fn backup_run(void *opaque)
@@ -425,19 +431,27 @@ static void coroutine_fn backup_run(void *opaque)
     BackupBlockJob *job = opaque;
     BackupCompleteData *data;
     BlockDriverState *bs = blk_bs(job->common.blk);
-    int64_t offset;
+    int64_t offset, nb_clusters;
     int ret = 0;
 
     QLIST_INIT(&job->inflight_reqs);
     qemu_co_rwlock_init(&job->flush_rwlock);
 
-    job->done_bitmap = bitmap_new(DIV_ROUND_UP(job->common.len,
-                                               job->cluster_size));
+    nb_clusters = DIV_ROUND_UP(job->common.len, job->cluster_size);
+    job->copy_bitmap = hbitmap_alloc(nb_clusters, 0);
+    if (job->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
+        backup_incremental_init_copy_bitmap(job);
+    } else {
+        hbitmap_set(job->copy_bitmap, 0, nb_clusters);
+    }
+
 
     job->before_write.notify = backup_before_write_notify;
     bdrv_add_before_write_notifier(bs, &job->before_write);
 
     if (job->sync_mode == MIRROR_SYNC_MODE_NONE) {
+        /* All bits are set in copy_bitmap to allow any cluster to be copied.
+         * This does not actually require them to be copied. */
         while (!block_job_is_cancelled(&job->common)) {
             /* Yield until the job is cancelled.  We just let our before_write
              * notify callback service CoW requests. */
@@ -512,7 +526,7 @@ static void coroutine_fn backup_run(void *opaque)
     /* wait until pending backup_do_cow() calls have completed */
     qemu_co_rwlock_wrlock(&job->flush_rwlock);
     qemu_co_rwlock_unlock(&job->flush_rwlock);
-    g_free(job->done_bitmap);
+    hbitmap_free(job->copy_bitmap);
 
     data = g_malloc(sizeof(*data));
     data->ret = ret;
diff --git a/block/curl.c b/block/curl.c
index 2a244e2439..35cf417f59 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -89,6 +89,8 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,
 
 struct BDRVCURLState;
 
+static bool libcurl_initialized;
+
 typedef struct CURLAIOCB {
     Coroutine *co;
     QEMUIOVector *qiov;
@@ -686,14 +688,23 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
     double d;
     const char *secretid;
     const char *protocol_delimiter;
+    int ret;
 
-    static int inited = 0;
 
     if (flags & BDRV_O_RDWR) {
         error_setg(errp, "curl block device does not support writes");
         return -EROFS;
     }
 
+    if (!libcurl_initialized) {
+        ret = curl_global_init(CURL_GLOBAL_ALL);
+        if (ret) {
+            error_setg(errp, "libcurl initialization failed with %d", ret);
+            return -EIO;
+        }
+        libcurl_initialized = true;
+    }
+
     qemu_mutex_init(&s->mutex);
     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
     qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -772,11 +783,6 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
         }
     }
 
-    if (!inited) {
-        curl_global_init(CURL_GLOBAL_ALL);
-        inited = 1;
-    }
-
     DPRINTF("CURL: Opening %s\n", file);
     QSIMPLEQ_INIT(&s->free_state_waitq);
     s->aio_context = bdrv_get_aio_context(bs);
@@ -851,6 +857,9 @@ out_noclean:
     qemu_mutex_destroy(&s->mutex);
     g_free(s->cookie);
     g_free(s->url);
+    g_free(s->username);
+    g_free(s->proxyusername);
+    g_free(s->proxypassword);
     qemu_opts_del(opts);
     return -EINVAL;
 }
@@ -949,6 +958,9 @@ static void curl_close(BlockDriverState *bs)
 
     g_free(s->cookie);
     g_free(s->url);
+    g_free(s->username);
+    g_free(s->proxyusername);
+    g_free(s->proxypassword);
 }
 
 static int64_t curl_getlength(BlockDriverState *bs)
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index bd04e991b1..7879d13ddb 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -715,3 +715,8 @@ char *bdrv_dirty_bitmap_sha256(const BdrvDirtyBitmap *bitmap, Error **errp)
 {
     return hbitmap_sha256(bitmap->bitmap, errp);
 }
+
+int64_t bdrv_dirty_bitmap_next_zero(BdrvDirtyBitmap *bitmap, uint64_t offset)
+{
+    return hbitmap_next_zero(bitmap->bitmap, offset);
+}
diff --git a/block/sheepdog.c b/block/sheepdog.c
index a1edb992ff..f684477328 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -400,7 +400,7 @@ typedef struct BDRVSheepdogReopenState {
     int cache_flags;
 } BDRVSheepdogReopenState;
 
-static const char * sd_strerror(int err)
+static const char *sd_strerror(int err)
 {
     int i;
 
@@ -1631,7 +1631,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
     if (!tag) {
         tag = "";
     }
-    if (tag && strlen(tag) >= SD_MAX_VDI_TAG_LEN) {
+    if (strlen(tag) >= SD_MAX_VDI_TAG_LEN) {
         error_setg(errp, "value of parameter 'tag' is too long");
         ret = -EINVAL;
         goto err_no_fd;
@@ -3077,111 +3077,111 @@ static QemuOptsList sd_create_opts = {
 };
 
 static BlockDriver bdrv_sheepdog = {
-    .format_name    = "sheepdog",
-    .protocol_name  = "sheepdog",
-    .instance_size  = sizeof(BDRVSheepdogState),
-    .bdrv_parse_filename    = sd_parse_filename,
-    .bdrv_file_open = sd_open,
-    .bdrv_reopen_prepare    = sd_reopen_prepare,
-    .bdrv_reopen_commit     = sd_reopen_commit,
-    .bdrv_reopen_abort      = sd_reopen_abort,
-    .bdrv_close     = sd_close,
-    .bdrv_create    = sd_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,
-    .bdrv_getlength = sd_getlength,
+    .format_name                  = "sheepdog",
+    .protocol_name                = "sheepdog",
+    .instance_size                = sizeof(BDRVSheepdogState),
+    .bdrv_parse_filename          = sd_parse_filename,
+    .bdrv_file_open               = sd_open,
+    .bdrv_reopen_prepare          = sd_reopen_prepare,
+    .bdrv_reopen_commit           = sd_reopen_commit,
+    .bdrv_reopen_abort            = sd_reopen_abort,
+    .bdrv_close                   = sd_close,
+    .bdrv_create                  = sd_create,
+    .bdrv_has_zero_init           = bdrv_has_zero_init_1,
+    .bdrv_getlength               = sd_getlength,
     .bdrv_get_allocated_file_size = sd_get_allocated_file_size,
-    .bdrv_truncate  = sd_truncate,
+    .bdrv_truncate                = sd_truncate,
 
-    .bdrv_co_readv  = sd_co_readv,
-    .bdrv_co_writev = sd_co_writev,
-    .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
-    .bdrv_co_pdiscard = sd_co_pdiscard,
-    .bdrv_co_get_block_status = sd_co_get_block_status,
+    .bdrv_co_readv                = sd_co_readv,
+    .bdrv_co_writev               = sd_co_writev,
+    .bdrv_co_flush_to_disk        = sd_co_flush_to_disk,
+    .bdrv_co_pdiscard             = sd_co_pdiscard,
+    .bdrv_co_get_block_status     = sd_co_get_block_status,
 
-    .bdrv_snapshot_create   = sd_snapshot_create,
-    .bdrv_snapshot_goto     = sd_snapshot_goto,
-    .bdrv_snapshot_delete   = sd_snapshot_delete,
-    .bdrv_snapshot_list     = sd_snapshot_list,
+    .bdrv_snapshot_create         = sd_snapshot_create,
+    .bdrv_snapshot_goto           = sd_snapshot_goto,
+    .bdrv_snapshot_delete         = sd_snapshot_delete,
+    .bdrv_snapshot_list           = sd_snapshot_list,
 
-    .bdrv_save_vmstate  = sd_save_vmstate,
-    .bdrv_load_vmstate  = sd_load_vmstate,
+    .bdrv_save_vmstate            = sd_save_vmstate,
+    .bdrv_load_vmstate            = sd_load_vmstate,
 
-    .bdrv_detach_aio_context = sd_detach_aio_context,
-    .bdrv_attach_aio_context = sd_attach_aio_context,
+    .bdrv_detach_aio_context      = sd_detach_aio_context,
+    .bdrv_attach_aio_context      = sd_attach_aio_context,
 
-    .create_opts    = &sd_create_opts,
+    .create_opts                  = &sd_create_opts,
 };
 
 static BlockDriver bdrv_sheepdog_tcp = {
-    .format_name    = "sheepdog",
-    .protocol_name  = "sheepdog+tcp",
-    .instance_size  = sizeof(BDRVSheepdogState),
-    .bdrv_parse_filename    = sd_parse_filename,
-    .bdrv_file_open = sd_open,
-    .bdrv_reopen_prepare    = sd_reopen_prepare,
-    .bdrv_reopen_commit     = sd_reopen_commit,
-    .bdrv_reopen_abort      = sd_reopen_abort,
-    .bdrv_close     = sd_close,
-    .bdrv_create    = sd_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,
-    .bdrv_getlength = sd_getlength,
+    .format_name                  = "sheepdog",
+    .protocol_name                = "sheepdog+tcp",
+    .instance_size                = sizeof(BDRVSheepdogState),
+    .bdrv_parse_filename          = sd_parse_filename,
+    .bdrv_file_open               = sd_open,
+    .bdrv_reopen_prepare          = sd_reopen_prepare,
+    .bdrv_reopen_commit           = sd_reopen_commit,
+    .bdrv_reopen_abort            = sd_reopen_abort,
+    .bdrv_close                   = sd_close,
+    .bdrv_create                  = sd_create,
+    .bdrv_has_zero_init           = bdrv_has_zero_init_1,
+    .bdrv_getlength               = sd_getlength,
     .bdrv_get_allocated_file_size = sd_get_allocated_file_size,
-    .bdrv_truncate  = sd_truncate,
+    .bdrv_truncate                = sd_truncate,
 
-    .bdrv_co_readv  = sd_co_readv,
-    .bdrv_co_writev = sd_co_writev,
-    .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
-    .bdrv_co_pdiscard = sd_co_pdiscard,
-    .bdrv_co_get_block_status = sd_co_get_block_status,
+    .bdrv_co_readv                = sd_co_readv,
+    .bdrv_co_writev               = sd_co_writev,
+    .bdrv_co_flush_to_disk        = sd_co_flush_to_disk,
+    .bdrv_co_pdiscard             = sd_co_pdiscard,
+    .bdrv_co_get_block_status     = sd_co_get_block_status,
 
-    .bdrv_snapshot_create   = sd_snapshot_create,
-    .bdrv_snapshot_goto     = sd_snapshot_goto,
-    .bdrv_snapshot_delete   = sd_snapshot_delete,
-    .bdrv_snapshot_list     = sd_snapshot_list,
+    .bdrv_snapshot_create         = sd_snapshot_create,
+    .bdrv_snapshot_goto           = sd_snapshot_goto,
+    .bdrv_snapshot_delete         = sd_snapshot_delete,
+    .bdrv_snapshot_list           = sd_snapshot_list,
 
-    .bdrv_save_vmstate  = sd_save_vmstate,
-    .bdrv_load_vmstate  = sd_load_vmstate,
+    .bdrv_save_vmstate            = sd_save_vmstate,
+    .bdrv_load_vmstate            = sd_load_vmstate,
 
-    .bdrv_detach_aio_context = sd_detach_aio_context,
-    .bdrv_attach_aio_context = sd_attach_aio_context,
+    .bdrv_detach_aio_context      = sd_detach_aio_context,
+    .bdrv_attach_aio_context      = sd_attach_aio_context,
 
-    .create_opts    = &sd_create_opts,
+    .create_opts                  = &sd_create_opts,
 };
 
 static BlockDriver bdrv_sheepdog_unix = {
-    .format_name    = "sheepdog",
-    .protocol_name  = "sheepdog+unix",
-    .instance_size  = sizeof(BDRVSheepdogState),
-    .bdrv_parse_filename    = sd_parse_filename,
-    .bdrv_file_open = sd_open,
-    .bdrv_reopen_prepare    = sd_reopen_prepare,
-    .bdrv_reopen_commit     = sd_reopen_commit,
-    .bdrv_reopen_abort      = sd_reopen_abort,
-    .bdrv_close     = sd_close,
-    .bdrv_create    = sd_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,
-    .bdrv_getlength = sd_getlength,
+    .format_name                  = "sheepdog",
+    .protocol_name                = "sheepdog+unix",
+    .instance_size                = sizeof(BDRVSheepdogState),
+    .bdrv_parse_filename          = sd_parse_filename,
+    .bdrv_file_open               = sd_open,
+    .bdrv_reopen_prepare          = sd_reopen_prepare,
+    .bdrv_reopen_commit           = sd_reopen_commit,
+    .bdrv_reopen_abort            = sd_reopen_abort,
+    .bdrv_close                   = sd_close,
+    .bdrv_create                  = sd_create,
+    .bdrv_has_zero_init           = bdrv_has_zero_init_1,
+    .bdrv_getlength               = sd_getlength,
     .bdrv_get_allocated_file_size = sd_get_allocated_file_size,
-    .bdrv_truncate  = sd_truncate,
+    .bdrv_truncate                = sd_truncate,
 
-    .bdrv_co_readv  = sd_co_readv,
-    .bdrv_co_writev = sd_co_writev,
-    .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
-    .bdrv_co_pdiscard = sd_co_pdiscard,
-    .bdrv_co_get_block_status = sd_co_get_block_status,
+    .bdrv_co_readv                = sd_co_readv,
+    .bdrv_co_writev               = sd_co_writev,
+    .bdrv_co_flush_to_disk        = sd_co_flush_to_disk,
+    .bdrv_co_pdiscard             = sd_co_pdiscard,
+    .bdrv_co_get_block_status     = sd_co_get_block_status,
 
-    .bdrv_snapshot_create   = sd_snapshot_create,
-    .bdrv_snapshot_goto     = sd_snapshot_goto,
-    .bdrv_snapshot_delete   = sd_snapshot_delete,
-    .bdrv_snapshot_list     = sd_snapshot_list,
+    .bdrv_snapshot_create         = sd_snapshot_create,
+    .bdrv_snapshot_goto           = sd_snapshot_goto,
+    .bdrv_snapshot_delete         = sd_snapshot_delete,
+    .bdrv_snapshot_list           = sd_snapshot_list,
 
-    .bdrv_save_vmstate  = sd_save_vmstate,
-    .bdrv_load_vmstate  = sd_load_vmstate,
+    .bdrv_save_vmstate            = sd_save_vmstate,
+    .bdrv_load_vmstate            = sd_load_vmstate,
 
-    .bdrv_detach_aio_context = sd_detach_aio_context,
-    .bdrv_attach_aio_context = sd_attach_aio_context,
+    .bdrv_detach_aio_context      = sd_detach_aio_context,
+    .bdrv_attach_aio_context      = sd_attach_aio_context,
 
-    .create_opts    = &sd_create_opts,
+    .create_opts                  = &sd_create_opts,
 };
 
 static void bdrv_sheepdog_init(void)
diff --git a/blockjob.c b/blockjob.c
index 715c2c2680..6173e4728c 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -59,6 +59,7 @@ static void __attribute__((__constructor__)) block_job_init(void)
 
 static void block_job_event_cancelled(BlockJob *job);
 static void block_job_event_completed(BlockJob *job, const char *msg);
+static void block_job_enter_cond(BlockJob *job, bool(*fn)(BlockJob *job));
 
 /* Transactional group of block jobs */
 struct BlockJobTxn {
@@ -480,9 +481,16 @@ static void block_job_completed_txn_success(BlockJob *job)
     }
 }
 
+/* Assumes the block_job_mutex is held */
+static bool block_job_timer_pending(BlockJob *job)
+{
+    return timer_pending(&job->sleep_timer);
+}
+
 void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
 {
     Error *local_err = NULL;
+    int64_t old_speed = job->speed;
 
     if (!job->driver->set_speed) {
         error_setg(errp, QERR_UNSUPPORTED);
@@ -495,6 +503,12 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
     }
 
     job->speed = speed;
+    if (speed <= old_speed) {
+        return;
+    }
+
+    /* kick only if a timer is pending */
+    block_job_enter_cond(job, block_job_timer_pending);
 }
 
 void block_job_complete(BlockJob *job, Error **errp)
@@ -821,7 +835,11 @@ void block_job_resume_all(void)
     }
 }
 
-void block_job_enter(BlockJob *job)
+/*
+ * Conditionally enter a block_job pending a call to fn() while
+ * under the block_job_lock critical section.
+ */
+static void block_job_enter_cond(BlockJob *job, bool(*fn)(BlockJob *job))
 {
     if (!block_job_started(job)) {
         return;
@@ -836,6 +854,11 @@ void block_job_enter(BlockJob *job)
         return;
     }
 
+    if (fn && !fn(job)) {
+        block_job_unlock();
+        return;
+    }
+
     assert(!job->deferred_to_main_loop);
     timer_del(&job->sleep_timer);
     job->busy = true;
@@ -843,6 +866,11 @@ void block_job_enter(BlockJob *job)
     aio_co_wake(job->co);
 }
 
+void block_job_enter(BlockJob *job)
+{
+    block_job_enter_cond(job, NULL);
+}
+
 bool block_job_is_cancelled(BlockJob *job)
 {
     return job->cancelled;
diff --git a/gdbstub.c b/gdbstub.c
index 2a94030d3b..f1d51480f7 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -21,6 +21,7 @@
 #include "qemu/error-report.h"
 #include "qemu/cutils.h"
 #include "cpu.h"
+#include "trace-root.h"
 #ifdef CONFIG_USER_ONLY
 #include "qemu.h"
 #else
@@ -287,21 +288,6 @@ static int gdb_signal_to_target (int sig)
         return -1;
 }
 
-/* #define DEBUG_GDB */
-
-#ifdef DEBUG_GDB
-# define DEBUG_GDB_GATE 1
-#else
-# define DEBUG_GDB_GATE 0
-#endif
-
-#define gdb_debug(fmt, ...) do { \
-    if (DEBUG_GDB_GATE) { \
-        fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
-    } \
-} while (0)
-
-
 typedef struct GDBRegisterState {
     int base_reg;
     int num_regs;
@@ -410,10 +396,13 @@ int use_gdb_syscalls(void)
 /* Resume execution.  */
 static inline void gdb_continue(GDBState *s)
 {
+
 #ifdef CONFIG_USER_ONLY
     s->running_state = 1;
+    trace_gdbstub_op_continue();
 #else
     if (!runstate_needs_reset()) {
+        trace_gdbstub_op_continue();
         vm_start();
     }
 #endif
@@ -434,6 +423,7 @@ static int gdb_continue_partial(GDBState *s, char *newstates)
      */
     CPU_FOREACH(cpu) {
         if (newstates[cpu->cpu_index] == 's') {
+            trace_gdbstub_op_stepping(cpu->cpu_index);
             cpu_single_step(cpu, sstep_flags);
         }
     }
@@ -452,11 +442,13 @@ static int gdb_continue_partial(GDBState *s, char *newstates)
             case 1:
                 break; /* nothing to do here */
             case 's':
+                trace_gdbstub_op_stepping(cpu->cpu_index);
                 cpu_single_step(cpu, sstep_flags);
                 cpu_resume(cpu);
                 flag = 1;
                 break;
             case 'c':
+                trace_gdbstub_op_continue_cpu(cpu->cpu_index);
                 cpu_resume(cpu);
                 flag = 1;
                 break;
@@ -538,12 +530,49 @@ static void hextomem(uint8_t *mem, const char *buf, int len)
     }
 }
 
+static void hexdump(const char *buf, int len,
+                    void (*trace_fn)(size_t ofs, char const *text))
+{
+    char line_buffer[3 * 16 + 4 + 16 + 1];
+
+    size_t i;
+    for (i = 0; i < len || (i & 0xF); ++i) {
+        size_t byte_ofs = i & 15;
+
+        if (byte_ofs == 0) {
+            memset(line_buffer, ' ', 3 * 16 + 4 + 16);
+            line_buffer[3 * 16 + 4 + 16] = 0;
+        }
+
+        size_t col_group = (i >> 2) & 3;
+        size_t hex_col = byte_ofs * 3 + col_group;
+        size_t txt_col = 3 * 16 + 4 + byte_ofs;
+
+        if (i < len) {
+            char value = buf[i];
+
+            line_buffer[hex_col + 0] = tohex((value >> 4) & 0xF);
+            line_buffer[hex_col + 1] = tohex((value >> 0) & 0xF);
+            line_buffer[txt_col + 0] = (value >= ' ' && value < 127)
+                    ? value
+                    : '.';
+        }
+
+        if (byte_ofs == 0xF)
+            trace_fn(i & -16, line_buffer);
+    }
+}
+
 /* return -1 if error, 0 if OK */
-static int put_packet_binary(GDBState *s, const char *buf, int len)
+static int put_packet_binary(GDBState *s, const char *buf, int len, bool dump)
 {
     int csum, i;
     uint8_t *p;
 
+    if (dump && trace_event_get_state_backends(TRACE_GDBSTUB_IO_BINARYREPLY)) {
+        hexdump(buf, len, trace_gdbstub_io_binaryreply);
+    }
+
     for(;;) {
         p = s->last_packet;
         *(p++) = '$';
@@ -576,9 +605,9 @@ static int put_packet_binary(GDBState *s, const char *buf, int len)
 /* return -1 if error, 0 if OK */
 static int put_packet(GDBState *s, const char *buf)
 {
-    gdb_debug("reply='%s'\n", buf);
+    trace_gdbstub_io_reply(buf);
 
-    return put_packet_binary(s, buf, strlen(buf));
+    return put_packet_binary(s, buf, strlen(buf), false);
 }
 
 /* Encode data using the encoding for 'x' packets.  */
@@ -975,8 +1004,7 @@ static int gdb_handle_packet(GDBState *s, const char *line_buf)
     uint8_t *registers;
     target_ulong addr, len;
 
-
-    gdb_debug("command='%s'\n", line_buf);
+    trace_gdbstub_io_command(line_buf);
 
     p = line_buf;
     ch = *p++;
@@ -999,7 +1027,7 @@ static int gdb_handle_packet(GDBState *s, const char *line_buf)
         }
         s->signal = 0;
         gdb_continue(s);
-	return RS_IDLE;
+        return RS_IDLE;
     case 'C':
         s->signal = gdb_signal_to_target (strtoul(p, (char **)&p, 16));
         if (s->signal == -1)
@@ -1045,7 +1073,7 @@ static int gdb_handle_packet(GDBState *s, const char *line_buf)
         }
         cpu_single_step(s->c_cpu, sstep_flags);
         gdb_continue(s);
-	return RS_IDLE;
+        return RS_IDLE;
     case 'F':
         {
             target_ulong ret;
@@ -1267,6 +1295,7 @@ static int gdb_handle_packet(GDBState *s, const char *line_buf)
                 len = snprintf((char *)mem_buf, sizeof(buf) / 2,
                                "CPU#%d [%s]", cpu->cpu_index,
                                cpu->halted ? "halted " : "running");
+                trace_gdbstub_op_extra_info((char *)mem_buf);
                 memtohex(buf, mem_buf, len);
                 put_packet(s, buf);
             }
@@ -1350,7 +1379,7 @@ static int gdb_handle_packet(GDBState *s, const char *line_buf)
                 buf[0] = 'l';
                 len = memtox(buf + 1, xml + addr, total_len - addr);
             }
-            put_packet_binary(s, buf, len + 1);
+            put_packet_binary(s, buf, len + 1, true);
             break;
         }
         if (is_query_packet(p, "Attached", ':')) {
@@ -1407,29 +1436,38 @@ static void gdb_vm_state_change(void *opaque, int running, RunState state)
                 type = "";
                 break;
             }
+            trace_gdbstub_hit_watchpoint(type, cpu_gdb_index(cpu),
+                    (target_ulong)cpu->watchpoint_hit->vaddr);
             snprintf(buf, sizeof(buf),
                      "T%02xthread:%02x;%swatch:" TARGET_FMT_lx ";",
                      GDB_SIGNAL_TRAP, cpu_gdb_index(cpu), type,
                      (target_ulong)cpu->watchpoint_hit->vaddr);
             cpu->watchpoint_hit = NULL;
             goto send_packet;
+        } else {
+            trace_gdbstub_hit_break();
         }
         tb_flush(cpu);
         ret = GDB_SIGNAL_TRAP;
         break;
     case RUN_STATE_PAUSED:
+        trace_gdbstub_hit_paused();
         ret = GDB_SIGNAL_INT;
         break;
     case RUN_STATE_SHUTDOWN:
+        trace_gdbstub_hit_shutdown();
         ret = GDB_SIGNAL_QUIT;
         break;
     case RUN_STATE_IO_ERROR:
+        trace_gdbstub_hit_io_error();
         ret = GDB_SIGNAL_IO;
         break;
     case RUN_STATE_WATCHDOG:
+        trace_gdbstub_hit_watchdog();
         ret = GDB_SIGNAL_ALRM;
         break;
     case RUN_STATE_INTERNAL_ERROR:
+        trace_gdbstub_hit_internal_error();
         ret = GDB_SIGNAL_ABRT;
         break;
     case RUN_STATE_SAVE_VM:
@@ -1439,6 +1477,7 @@ static void gdb_vm_state_change(void *opaque, int running, RunState state)
         ret = GDB_SIGNAL_XCPU;
         break;
     default:
+        trace_gdbstub_hit_unknown(state);
         ret = GDB_SIGNAL_UNKNOWN;
         break;
     }
@@ -1538,12 +1577,12 @@ static void gdb_read_byte(GDBState *s, int ch)
         /* Waiting for a response to the last packet.  If we see the start
            of a new command then abandon the previous response.  */
         if (ch == '-') {
-            gdb_debug("Got NACK, retransmitting\n");
+            trace_gdbstub_err_got_nack();
             put_buffer(s, (uint8_t *)s->last_packet, s->last_packet_len);
         } else if (ch == '+') {
-            gdb_debug("Got ACK\n");
+            trace_gdbstub_io_got_ack();
         } else {
-            gdb_debug("Got '%c' when expecting ACK/NACK\n", ch);
+            trace_gdbstub_io_got_unexpected((uint8_t)ch);
         }
 
         if (ch == '+' || ch == '$')
@@ -1566,7 +1605,7 @@ static void gdb_read_byte(GDBState *s, int ch)
                 s->line_sum = 0;
                 s->state = RS_GETLINE;
             } else {
-                gdb_debug("received garbage between packets: 0x%x\n", ch);
+                trace_gdbstub_err_garbage((uint8_t)ch);
             }
             break;
         case RS_GETLINE:
@@ -1582,7 +1621,7 @@ static void gdb_read_byte(GDBState *s, int ch)
                 /* end of command, start of checksum*/
                 s->state = RS_CHKSUM1;
             } else if (s->line_buf_index >= sizeof(s->line_buf) - 1) {
-                gdb_debug("command buffer overrun, dropping command\n");
+                trace_gdbstub_err_overrun();
                 s->state = RS_IDLE;
             } else {
                 /* unescaped command character */
@@ -1596,7 +1635,7 @@ static void gdb_read_byte(GDBState *s, int ch)
                 s->state = RS_CHKSUM1;
             } else if (s->line_buf_index >= sizeof(s->line_buf) - 1) {
                 /* command buffer overrun */
-                gdb_debug("command buffer overrun, dropping command\n");
+                trace_gdbstub_err_overrun();
                 s->state = RS_IDLE;
             } else {
                 /* parse escaped character and leave escape state */
@@ -1608,18 +1647,18 @@ static void gdb_read_byte(GDBState *s, int ch)
         case RS_GETLINE_RLE:
             if (ch < ' ') {
                 /* invalid RLE count encoding */
-                gdb_debug("got invalid RLE count: 0x%x\n", ch);
+                trace_gdbstub_err_invalid_repeat((uint8_t)ch);
                 s->state = RS_GETLINE;
             } else {
                 /* decode repeat length */
                 int repeat = (unsigned char)ch - ' ' + 3;
                 if (s->line_buf_index + repeat >= sizeof(s->line_buf) - 1) {
                     /* that many repeats would overrun the command buffer */
-                    gdb_debug("command buffer overrun, dropping command\n");
+                    trace_gdbstub_err_overrun();
                     s->state = RS_IDLE;
                 } else if (s->line_buf_index < 1) {
                     /* got a repeat but we have nothing to repeat */
-                    gdb_debug("got invalid RLE sequence\n");
+                    trace_gdbstub_err_invalid_rle();
                     s->state = RS_GETLINE;
                 } else {
                     /* repeat the last character */
@@ -1634,7 +1673,7 @@ static void gdb_read_byte(GDBState *s, int ch)
         case RS_CHKSUM1:
             /* get high hex digit of checksum */
             if (!isxdigit(ch)) {
-                gdb_debug("got invalid command checksum digit\n");
+                trace_gdbstub_err_checksum_invalid((uint8_t)ch);
                 s->state = RS_GETLINE;
                 break;
             }
@@ -1645,14 +1684,14 @@ static void gdb_read_byte(GDBState *s, int ch)
         case RS_CHKSUM2:
             /* get low hex digit of checksum */
             if (!isxdigit(ch)) {
-                gdb_debug("got invalid command checksum digit\n");
+                trace_gdbstub_err_checksum_invalid((uint8_t)ch);
                 s->state = RS_GETLINE;
                 break;
             }
             s->line_csum |= fromhex(ch);
 
             if (s->line_csum != (s->line_sum & 0xff)) {
-                gdb_debug("got command packet with incorrect checksum\n");
+                trace_gdbstub_err_checksum_incorrect(s->line_sum, s->line_csum);
                 /* send NAK reply */
                 reply = '-';
                 put_buffer(s, &reply, 1);
@@ -1686,6 +1725,8 @@ void gdb_exit(CPUArchState *env, int code)
   }
 #endif
 
+  trace_gdbstub_op_exiting((uint8_t)code);
+
   snprintf(buf, sizeof(buf), "W%02x", (uint8_t)code);
   put_packet(s, buf);
 
@@ -1944,6 +1985,8 @@ static const TypeInfo char_gdb_type_info = {
 
 int gdbserver_start(const char *device)
 {
+    trace_gdbstub_op_start(device);
+
     GDBState *s;
     char gdbstub_device_name[128];
     Chardev *chr = NULL;
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 46b3843cf8..1d3ba722fa 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -21,6 +21,7 @@
 #include "net/hub.h"
 #include "qapi/visitor.h"
 #include "chardev/char-fe.h"
+#include "sysemu/iothread.h"
 #include "sysemu/tpm_backend.h"
 
 static void get_pointer(Object *obj, Visitor *v, Property *prop,
@@ -236,69 +237,6 @@ const PropertyInfo qdev_prop_chr = {
     .release = release_chr,
 };
 
-/* --- character device --- */
-
-static void get_tpm(Object *obj, Visitor *v, const char *name, void *opaque,
-                    Error **errp)
-{
-    DeviceState *dev = DEVICE(obj);
-    TPMBackend **be = qdev_get_prop_ptr(dev, opaque);
-    char *p;
-
-    p = g_strdup(*be ? (*be)->id : "");
-    visit_type_str(v, name, &p, errp);
-    g_free(p);
-}
-
-static void set_tpm(Object *obj, Visitor *v, const char *name, void *opaque,
-                    Error **errp)
-{
-    DeviceState *dev = DEVICE(obj);
-    Error *local_err = NULL;
-    Property *prop = opaque;
-    TPMBackend *s, **be = qdev_get_prop_ptr(dev, prop);
-    char *str;
-
-    if (dev->realized) {
-        qdev_prop_set_after_realize(dev, name, errp);
-        return;
-    }
-
-    visit_type_str(v, name, &str, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
-    }
-
-    s = qemu_find_tpm_be(str);
-    if (s == NULL) {
-        error_setg(errp, "Property '%s.%s' can't find value '%s'",
-                   object_get_typename(obj), prop->name, str);
-    } else if (tpm_backend_init(s, TPM_IF(obj), errp) == 0) {
-        *be = s; /* weak reference, avoid cyclic ref */
-    }
-    g_free(str);
-}
-
-static void release_tpm(Object *obj, const char *name, void *opaque)
-{
-    DeviceState *dev = DEVICE(obj);
-    Property *prop = opaque;
-    TPMBackend **be = qdev_get_prop_ptr(dev, prop);
-
-    if (*be) {
-        tpm_backend_reset(*be);
-    }
-}
-
-const PropertyInfo qdev_prop_tpm = {
-    .name  = "str",
-    .description = "ID of a tpm to use as a backend",
-    .get   = get_tpm,
-    .set   = set_tpm,
-    .release = release_tpm,
-};
-
 /* --- netdev device --- */
 static void get_netdev(Object *obj, Visitor *v, const char *name,
                        void *opaque, Error **errp)
diff --git a/hw/tpm/Makefile.objs b/hw/tpm/Makefile.objs
index 41f0b7a590..7a93b24636 100644
--- a/hw/tpm/Makefile.objs
+++ b/hw/tpm/Makefile.objs
@@ -1,3 +1,4 @@
+common-obj-y += tpm_util.o
 common-obj-$(CONFIG_TPM_TIS) += tpm_tis.o
-common-obj-$(CONFIG_TPM_PASSTHROUGH) += tpm_passthrough.o tpm_util.o
-common-obj-$(CONFIG_TPM_EMULATOR) += tpm_emulator.o tpm_util.o
+common-obj-$(CONFIG_TPM_PASSTHROUGH) += tpm_passthrough.o
+common-obj-$(CONFIG_TPM_EMULATOR) += tpm_emulator.o
diff --git a/hw/tpm/tpm_util.c b/hw/tpm/tpm_util.c
index a317243a7e..17cafbe6b3 100644
--- a/hw/tpm/tpm_util.c
+++ b/hw/tpm/tpm_util.c
@@ -21,9 +21,13 @@
 
 #include "qemu/osdep.h"
 #include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qapi/visitor.h"
 #include "tpm_util.h"
 #include "tpm_int.h"
 #include "exec/memory.h"
+#include "sysemu/tpm_backend.h"
+#include "hw/qdev.h"
 
 #define DEBUG_TPM 0
 
@@ -33,6 +37,69 @@
     } \
 } while (0)
 
+/* tpm backend property */
+
+static void get_tpm(Object *obj, Visitor *v, const char *name, void *opaque,
+                    Error **errp)
+{
+    DeviceState *dev = DEVICE(obj);
+    TPMBackend **be = qdev_get_prop_ptr(dev, opaque);
+    char *p;
+
+    p = g_strdup(*be ? (*be)->id : "");
+    visit_type_str(v, name, &p, errp);
+    g_free(p);
+}
+
+static void set_tpm(Object *obj, Visitor *v, const char *name, void *opaque,
+                    Error **errp)
+{
+    DeviceState *dev = DEVICE(obj);
+    Error *local_err = NULL;
+    Property *prop = opaque;
+    TPMBackend *s, **be = qdev_get_prop_ptr(dev, prop);
+    char *str;
+
+    if (dev->realized) {
+        qdev_prop_set_after_realize(dev, name, errp);
+        return;
+    }
+
+    visit_type_str(v, name, &str, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    s = qemu_find_tpm_be(str);
+    if (s == NULL) {
+        error_setg(errp, "Property '%s.%s' can't find value '%s'",
+                   object_get_typename(obj), prop->name, str);
+    } else if (tpm_backend_init(s, TPM_IF(obj), errp) == 0) {
+        *be = s; /* weak reference, avoid cyclic ref */
+    }
+    g_free(str);
+}
+
+static void release_tpm(Object *obj, const char *name, void *opaque)
+{
+    DeviceState *dev = DEVICE(obj);
+    Property *prop = opaque;
+    TPMBackend **be = qdev_get_prop_ptr(dev, prop);
+
+    if (*be) {
+        tpm_backend_reset(*be);
+    }
+}
+
+const PropertyInfo qdev_prop_tpm = {
+    .name  = "str",
+    .description = "ID of a tpm to use as a backend",
+    .get   = get_tpm,
+    .set   = set_tpm,
+    .release = release_tpm,
+};
+
 /*
  * Write an error message in the given output buffer.
  */
diff --git a/hw/tpm/tpm_util.h b/hw/tpm/tpm_util.h
index 1c17e3913b..2393b6bc0e 100644
--- a/hw/tpm/tpm_util.h
+++ b/hw/tpm/tpm_util.h
@@ -39,4 +39,7 @@ static inline uint32_t tpm_cmd_get_size(const void *b)
 int tpm_util_get_buffer_size(int tpm_fd, TPMVersion tpm_version,
                              size_t *buffersize);
 
+#define DEFINE_PROP_TPMBE(_n, _s, _f)                     \
+    DEFINE_PROP(_n, _s, _f, qdev_prop_tpm, TPMBackend *)
+
 #endif /* TPM_TPM_UTIL_H */
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 3579a7597c..a591c27213 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -91,5 +91,6 @@ bool bdrv_has_changed_persistent_bitmaps(BlockDriverState *bs);
 BdrvDirtyBitmap *bdrv_dirty_bitmap_next(BlockDriverState *bs,
                                         BdrvDirtyBitmap *bitmap);
 char *bdrv_dirty_bitmap_sha256(const BdrvDirtyBitmap *bitmap, Error **errp);
+int64_t bdrv_dirty_bitmap_next_zero(BdrvDirtyBitmap *bitmap, uint64_t start);
 
 #endif
diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h
index 4d24cdf8d6..60b42ac561 100644
--- a/include/hw/qdev-properties.h
+++ b/include/hw/qdev-properties.h
@@ -187,8 +187,6 @@ extern const PropertyInfo qdev_prop_link;
 
 #define DEFINE_PROP_CHR(_n, _s, _f)             \
     DEFINE_PROP(_n, _s, _f, qdev_prop_chr, CharBackend)
-#define DEFINE_PROP_TPMBE(_n, _s, _f)                     \
-    DEFINE_PROP(_n, _s, _f, qdev_prop_tpm, TPMBackend *)
 #define DEFINE_PROP_STRING(_n, _s, _f)             \
     DEFINE_PROP(_n, _s, _f, qdev_prop_string, char*)
 #define DEFINE_PROP_NETDEV(_n, _s, _f)             \
diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
index 81e78043d1..6b6490ecad 100644
--- a/include/qemu/hbitmap.h
+++ b/include/qemu/hbitmap.h
@@ -292,6 +292,14 @@ void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first);
  */
 unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi);
 
+/* hbitmap_next_zero:
+ * @hb: The HBitmap to operate on
+ * @start: The bit to start from.
+ *
+ * Find next not dirty bit.
+ */
+int64_t hbitmap_next_zero(const HBitmap *hb, uint64_t start);
+
 /* hbitmap_create_meta:
  * Create a "meta" hbitmap to track dirtiness of the bits in this HBitmap.
  * The caller owns the created bitmap and must call hbitmap_free_meta(hb) to
diff --git a/linux-user/main.c b/linux-user/main.c
index 2fd2a143ed..71696ed33d 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -2679,6 +2679,8 @@ void cpu_loop(CPUSH4State *env)
     target_siginfo_t info;
 
     while (1) {
+        bool arch_interrupt = true;
+
         cpu_exec_start(cs);
         trapnr = cpu_exec(cs);
         cpu_exec_end(cs);
@@ -2710,13 +2712,14 @@ void cpu_loop(CPUSH4State *env)
                 int sig;
 
                 sig = gdb_handlesig(cs, TARGET_SIGTRAP);
-                if (sig)
-                  {
+                if (sig) {
                     info.si_signo = sig;
                     info.si_errno = 0;
                     info.si_code = TARGET_TRAP_BRKPT;
                     queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
-                  }
+                } else {
+                    arch_interrupt = false;
+                }
             }
             break;
 	case 0xa0:
@@ -2727,9 +2730,9 @@ void cpu_loop(CPUSH4State *env)
             info._sifields._sigfault._addr = env->tea;
             queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
 	    break;
-
         case EXCP_ATOMIC:
             cpu_exec_step_atomic(cs);
+            arch_interrupt = false;
             break;
         default:
             printf ("Unhandled trap: 0x%x\n", trapnr);
@@ -2737,6 +2740,14 @@ void cpu_loop(CPUSH4State *env)
             exit(EXIT_FAILURE);
         }
         process_pending_signals (env);
+
+        /* Most of the traps imply an exception or interrupt, which
+           implies an REI instruction has been executed.  Which means
+           that LDST (aka LOK_ADDR) should be cleared.  But there are
+           a few exceptions for traps internal to QEMU.  */
+        if (arch_interrupt) {
+            env->lock_addr = -1;
+        }
     }
 }
 #endif
diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
index 960b46870d..a2c26e0597 100644
--- a/target/sh4/cpu.h
+++ b/target/sh4/cpu.h
@@ -188,7 +188,9 @@ typedef struct CPUSH4State {
     tlb_t itlb[ITLB_SIZE];	/* instruction translation table */
     tlb_t utlb[UTLB_SIZE];	/* unified translation table */
 
-    uint32_t ldst;
+    /* LDST = LOCK_ADDR != -1.  */
+    uint32_t lock_addr;
+    uint32_t lock_value;
 
     /* Fields up to this point are cleared by a CPU reset */
     struct {} end_reset_fields;
diff --git a/target/sh4/helper.c b/target/sh4/helper.c
index 28d93c2543..680b583e53 100644
--- a/target/sh4/helper.c
+++ b/target/sh4/helper.c
@@ -171,6 +171,7 @@ void superh_cpu_do_interrupt(CPUState *cs)
     env->spc = env->pc;
     env->sgr = env->gregs[15];
     env->sr |= (1u << SR_BL) | (1u << SR_MD) | (1u << SR_RB);
+    env->lock_addr = -1;
 
     if (env->flags & DELAY_SLOT_MASK) {
         /* Branch instruction should be executed again before delay slot. */
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 8569179883..038663cc05 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -25,28 +25,27 @@
 #include "exec/exec-all.h"
 #include "tcg-op.h"
 #include "exec/cpu_ldst.h"
-
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
-
+#include "exec/translator.h"
 #include "trace-tcg.h"
 #include "exec/log.h"
 
 
 typedef struct DisasContext {
-    struct TranslationBlock *tb;
-    target_ulong pc;
-    uint16_t opcode;
-    uint32_t tbflags;    /* should stay unmodified during the TB translation */
-    uint32_t envflags;   /* should stay in sync with env->flags using TCG ops */
-    int bstate;
+    DisasContextBase base;
+
+    uint32_t tbflags;  /* should stay unmodified during the TB translation */
+    uint32_t envflags; /* should stay in sync with env->flags using TCG ops */
     int memidx;
     int gbank;
     int fbank;
     uint32_t delayed_pc;
-    int singlestep_enabled;
     uint32_t features;
-    int has_movcal;
+
+    uint16_t opcode;
+
+    bool has_movcal;
 } DisasContext;
 
 #if defined(CONFIG_USER_ONLY)
@@ -55,21 +54,18 @@ typedef struct DisasContext {
 #define IS_USER(ctx) (!(ctx->tbflags & (1u << SR_MD)))
 #endif
 
-enum {
-    BS_NONE     = 0, /* We go out of the TB without reaching a branch or an
-                      * exception condition
-                      */
-    BS_STOP     = 1, /* We want to stop translation for any reason */
-    BS_BRANCH   = 2, /* We reached a branch condition     */
-    BS_EXCP     = 3, /* We reached an exception condition */
-};
+/* Target-specific values for ctx->base.is_jmp.  */
+/* We want to exit back to the cpu loop for some reason.
+   Usually this is to recognize interrupts immediately.  */
+#define DISAS_STOP    DISAS_TARGET_0
 
 /* global register indexes */
 static TCGv cpu_gregs[32];
 static TCGv cpu_sr, cpu_sr_m, cpu_sr_q, cpu_sr_t;
 static TCGv cpu_pc, cpu_ssr, cpu_spc, cpu_gbr;
 static TCGv cpu_vbr, cpu_sgr, cpu_dbr, cpu_mach, cpu_macl;
-static TCGv cpu_pr, cpu_fpscr, cpu_fpul, cpu_ldst;
+static TCGv cpu_pr, cpu_fpscr, cpu_fpul;
+static TCGv cpu_lock_addr, cpu_lock_value;
 static TCGv cpu_fregs[32];
 
 /* internal register indexes */
@@ -147,8 +143,12 @@ void sh4_translate_init(void)
                                               offsetof(CPUSH4State,
                                                        delayed_cond),
                                               "_delayed_cond_");
-    cpu_ldst = tcg_global_mem_new_i32(cpu_env,
-				      offsetof(CPUSH4State, ldst), "_ldst_");
+    cpu_lock_addr = tcg_global_mem_new_i32(cpu_env,
+                                           offsetof(CPUSH4State, lock_addr),
+                                           "_lock_addr_");
+    cpu_lock_value = tcg_global_mem_new_i32(cpu_env,
+                                            offsetof(CPUSH4State, lock_value),
+                                            "_lock_value_");
 
     for (i = 0; i < 32; i++)
         cpu_fregs[i] = tcg_global_mem_new_i32(cpu_env,
@@ -209,7 +209,7 @@ static void gen_write_sr(TCGv src)
 static inline void gen_save_cpu_state(DisasContext *ctx, bool save_pc)
 {
     if (save_pc) {
-        tcg_gen_movi_i32(cpu_pc, ctx->pc);
+        tcg_gen_movi_i32(cpu_pc, ctx->base.pc_next);
     }
     if (ctx->delayed_pc != (uint32_t) -1) {
         tcg_gen_movi_i32(cpu_delayed_pc, ctx->delayed_pc);
@@ -227,11 +227,11 @@ static inline bool use_exit_tb(DisasContext *ctx)
 static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest)
 {
     /* Use a direct jump if in same page and singlestep not enabled */
-    if (unlikely(ctx->singlestep_enabled || use_exit_tb(ctx))) {
+    if (unlikely(ctx->base.singlestep_enabled || use_exit_tb(ctx))) {
         return false;
     }
 #ifndef CONFIG_USER_ONLY
-    return (ctx->tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
+    return (ctx->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
 #else
     return true;
 #endif
@@ -242,10 +242,10 @@ static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
     if (use_goto_tb(ctx, dest)) {
         tcg_gen_goto_tb(n);
         tcg_gen_movi_i32(cpu_pc, dest);
-        tcg_gen_exit_tb((uintptr_t)ctx->tb + n);
+        tcg_gen_exit_tb((uintptr_t)ctx->base.tb + n);
     } else {
         tcg_gen_movi_i32(cpu_pc, dest);
-        if (ctx->singlestep_enabled) {
+        if (ctx->base.singlestep_enabled) {
             gen_helper_debug(cpu_env);
         } else if (use_exit_tb(ctx)) {
             tcg_gen_exit_tb(0);
@@ -253,6 +253,7 @@ static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
             tcg_gen_lookup_and_goto_ptr();
         }
     }
+    ctx->base.is_jmp = DISAS_NORETURN;
 }
 
 static void gen_jump(DisasContext * ctx)
@@ -262,13 +263,14 @@ static void gen_jump(DisasContext * ctx)
 	   delayed jump as immediate jump are conditinal jumps */
 	tcg_gen_mov_i32(cpu_pc, cpu_delayed_pc);
         tcg_gen_discard_i32(cpu_delayed_pc);
-        if (ctx->singlestep_enabled) {
+        if (ctx->base.singlestep_enabled) {
             gen_helper_debug(cpu_env);
         } else if (use_exit_tb(ctx)) {
             tcg_gen_exit_tb(0);
         } else {
             tcg_gen_lookup_and_goto_ptr();
         }
+        ctx->base.is_jmp = DISAS_NORETURN;
     } else {
 	gen_goto_tb(ctx, 0, ctx->delayed_pc);
     }
@@ -298,8 +300,8 @@ static void gen_conditional_jump(DisasContext *ctx, target_ulong dest,
     tcg_gen_brcondi_i32(cond_not_taken, cpu_sr_t, 0, l1);
     gen_goto_tb(ctx, 0, dest);
     gen_set_label(l1);
-    gen_goto_tb(ctx, 1, ctx->pc + 2);
-    ctx->bstate = BS_BRANCH;
+    gen_goto_tb(ctx, 1, ctx->base.pc_next + 2);
+    ctx->base.is_jmp = DISAS_NORETURN;
 }
 
 /* Delayed conditional jump (bt or bf) */
@@ -322,11 +324,12 @@ static void gen_delayed_conditional_jump(DisasContext * ctx)
         gen_jump(ctx);
 
         gen_set_label(l1);
+        ctx->base.is_jmp = DISAS_NEXT;
         return;
     }
 
     tcg_gen_brcondi_i32(TCG_COND_NE, ds, 0, l1);
-    gen_goto_tb(ctx, 1, ctx->pc + 2);
+    gen_goto_tb(ctx, 1, ctx->base.pc_next + 2);
     gen_set_label(l1);
     gen_jump(ctx);
 }
@@ -463,7 +466,7 @@ static void _decode_opc(DisasContext * ctx)
 	tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc);
         ctx->envflags |= DELAY_SLOT_RTE;
 	ctx->delayed_pc = (uint32_t) - 1;
-        ctx->bstate = BS_STOP;
+        ctx->base.is_jmp = DISAS_STOP;
 	return;
     case 0x0058:		/* sets */
         tcg_gen_ori_i32(cpu_sr, cpu_sr, (1u << SR_S));
@@ -474,23 +477,23 @@ static void _decode_opc(DisasContext * ctx)
     case 0xfbfd:		/* frchg */
         CHECK_FPSCR_PR_0
 	tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_FR);
-	ctx->bstate = BS_STOP;
+        ctx->base.is_jmp = DISAS_STOP;
 	return;
     case 0xf3fd:		/* fschg */
         CHECK_FPSCR_PR_0
         tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_SZ);
-	ctx->bstate = BS_STOP;
+        ctx->base.is_jmp = DISAS_STOP;
 	return;
     case 0xf7fd:                /* fpchg */
         CHECK_SH4A
         tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_PR);
-        ctx->bstate = BS_STOP;
+        ctx->base.is_jmp = DISAS_STOP;
         return;
     case 0x0009:		/* nop */
 	return;
     case 0x001b:		/* sleep */
 	CHECK_PRIVILEGED
-        tcg_gen_movi_i32(cpu_pc, ctx->pc + 2);
+        tcg_gen_movi_i32(cpu_pc, ctx->base.pc_next + 2);
         gen_helper_sleep(cpu_env);
 	return;
     }
@@ -517,23 +520,24 @@ static void _decode_opc(DisasContext * ctx)
         /* Detect the start of a gUSA region.  If so, update envflags
            and end the TB.  This will allow us to see the end of the
            region (stored in R0) in the next TB.  */
-        if (B11_8 == 15 && B7_0s < 0 && (tb_cflags(ctx->tb) & CF_PARALLEL)) {
+        if (B11_8 == 15 && B7_0s < 0 &&
+            (tb_cflags(ctx->base.tb) & CF_PARALLEL)) {
             ctx->envflags = deposit32(ctx->envflags, GUSA_SHIFT, 8, B7_0s);
-            ctx->bstate = BS_STOP;
+            ctx->base.is_jmp = DISAS_STOP;
         }
 #endif
 	tcg_gen_movi_i32(REG(B11_8), B7_0s);
 	return;
     case 0x9000:		/* mov.w @(disp,PC),Rn */
 	{
-	    TCGv addr = tcg_const_i32(ctx->pc + 4 + B7_0 * 2);
+            TCGv addr = tcg_const_i32(ctx->base.pc_next + 4 + B7_0 * 2);
             tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESW);
 	    tcg_temp_free(addr);
 	}
 	return;
     case 0xd000:		/* mov.l @(disp,PC),Rn */
 	{
-	    TCGv addr = tcg_const_i32((ctx->pc + 4 + B7_0 * 4) & ~3);
+            TCGv addr = tcg_const_i32((ctx->base.pc_next + 4 + B7_0 * 4) & ~3);
             tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESL);
 	    tcg_temp_free(addr);
 	}
@@ -543,13 +547,13 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0xa000:		/* bra disp */
 	CHECK_NOT_DELAY_SLOT
-	ctx->delayed_pc = ctx->pc + 4 + B11_0s * 2;
+        ctx->delayed_pc = ctx->base.pc_next + 4 + B11_0s * 2;
         ctx->envflags |= DELAY_SLOT;
 	return;
     case 0xb000:		/* bsr disp */
 	CHECK_NOT_DELAY_SLOT
-	tcg_gen_movi_i32(cpu_pr, ctx->pc + 4);
-	ctx->delayed_pc = ctx->pc + 4 + B11_0s * 2;
+        tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
+        ctx->delayed_pc = ctx->base.pc_next + 4 + B11_0s * 2;
         ctx->envflags |= DELAY_SLOT;
 	return;
     }
@@ -601,6 +605,7 @@ static void _decode_opc(DisasContext * ctx)
 	    tcg_gen_subi_i32(addr, REG(B11_8), 4);
             tcg_gen_qemu_st_i32(REG(B7_4), addr, ctx->memidx, MO_TEUL);
 	    tcg_gen_mov_i32(REG(B11_8), addr);
+        tcg_temp_free(addr);
 	}
 	return;
     case 0x6004:		/* mov.b @Rm+,Rn */
@@ -1176,22 +1181,22 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0x8b00:		/* bf label */
 	CHECK_NOT_DELAY_SLOT
-        gen_conditional_jump(ctx, ctx->pc + 4 + B7_0s * 2, false);
+        gen_conditional_jump(ctx, ctx->base.pc_next + 4 + B7_0s * 2, false);
 	return;
     case 0x8f00:		/* bf/s label */
 	CHECK_NOT_DELAY_SLOT
         tcg_gen_xori_i32(cpu_delayed_cond, cpu_sr_t, 1);
-        ctx->delayed_pc = ctx->pc + 4 + B7_0s * 2;
+        ctx->delayed_pc = ctx->base.pc_next + 4 + B7_0s * 2;
         ctx->envflags |= DELAY_SLOT_CONDITIONAL;
 	return;
     case 0x8900:		/* bt label */
 	CHECK_NOT_DELAY_SLOT
-        gen_conditional_jump(ctx, ctx->pc + 4 + B7_0s * 2, true);
+        gen_conditional_jump(ctx, ctx->base.pc_next + 4 + B7_0s * 2, true);
 	return;
     case 0x8d00:		/* bt/s label */
 	CHECK_NOT_DELAY_SLOT
         tcg_gen_mov_i32(cpu_delayed_cond, cpu_sr_t);
-        ctx->delayed_pc = ctx->pc + 4 + B7_0s * 2;
+        ctx->delayed_pc = ctx->base.pc_next + 4 + B7_0s * 2;
         ctx->envflags |= DELAY_SLOT_CONDITIONAL;
 	return;
     case 0x8800:		/* cmp/eq #imm,R0 */
@@ -1278,7 +1283,8 @@ static void _decode_opc(DisasContext * ctx)
 	}
 	return;
     case 0xc700:		/* mova @(disp,PC),R0 */
-	tcg_gen_movi_i32(REG(0), ((ctx->pc & 0xfffffffc) + 4 + B7_0 * 4) & ~3);
+        tcg_gen_movi_i32(REG(0), ((ctx->base.pc_next & 0xfffffffc) +
+                                  4 + B7_0 * 4) & ~3);
 	return;
     case 0xcb00:		/* or #imm,R0 */
 	tcg_gen_ori_i32(REG(0), REG(0), B7_0);
@@ -1304,7 +1310,7 @@ static void _decode_opc(DisasContext * ctx)
 	    imm = tcg_const_i32(B7_0);
             gen_helper_trapa(cpu_env, imm);
 	    tcg_temp_free(imm);
-            ctx->bstate = BS_EXCP;
+            ctx->base.is_jmp = DISAS_NORETURN;
 	}
 	return;
     case 0xc800:		/* tst #imm,R0 */
@@ -1372,13 +1378,13 @@ static void _decode_opc(DisasContext * ctx)
     switch (ctx->opcode & 0xf0ff) {
     case 0x0023:		/* braf Rn */
 	CHECK_NOT_DELAY_SLOT
-	tcg_gen_addi_i32(cpu_delayed_pc, REG(B11_8), ctx->pc + 4);
+        tcg_gen_addi_i32(cpu_delayed_pc, REG(B11_8), ctx->base.pc_next + 4);
         ctx->envflags |= DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
     case 0x0003:		/* bsrf Rn */
 	CHECK_NOT_DELAY_SLOT
-	tcg_gen_movi_i32(cpu_pr, ctx->pc + 4);
+        tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
 	tcg_gen_add_i32(cpu_delayed_pc, REG(B11_8), cpu_pr);
         ctx->envflags |= DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
@@ -1401,7 +1407,7 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0x400b:		/* jsr @Rn */
 	CHECK_NOT_DELAY_SLOT
-	tcg_gen_movi_i32(cpu_pr, ctx->pc + 4);
+        tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
 	tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
         ctx->envflags |= DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
@@ -1413,7 +1419,7 @@ static void _decode_opc(DisasContext * ctx)
             tcg_gen_andi_i32(val, REG(B11_8), 0x700083f3);
             gen_write_sr(val);
             tcg_temp_free(val);
-            ctx->bstate = BS_STOP;
+            ctx->base.is_jmp = DISAS_STOP;
         }
 	return;
     case 0x4007:		/* ldc.l @Rm+,SR */
@@ -1425,7 +1431,7 @@ static void _decode_opc(DisasContext * ctx)
             gen_write_sr(val);
 	    tcg_temp_free(val);
 	    tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
-	    ctx->bstate = BS_STOP;
+            ctx->base.is_jmp = DISAS_STOP;
 	}
 	return;
     case 0x0002:		/* stc SR,Rn */
@@ -1487,7 +1493,7 @@ static void _decode_opc(DisasContext * ctx)
     case 0x406a:		/* lds Rm,FPSCR */
 	CHECK_FPU_ENABLED
         gen_helper_ld_fpscr(cpu_env, REG(B11_8));
-	ctx->bstate = BS_STOP;
+        ctx->base.is_jmp = DISAS_STOP;
 	return;
     case 0x4066:		/* lds.l @Rm+,FPSCR */
 	CHECK_FPU_ENABLED
@@ -1497,7 +1503,7 @@ static void _decode_opc(DisasContext * ctx)
 	    tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
             gen_helper_ld_fpscr(cpu_env, addr);
 	    tcg_temp_free(addr);
-	    ctx->bstate = BS_STOP;
+            ctx->base.is_jmp = DISAS_STOP;
 	}
 	return;
     case 0x006a:		/* sts FPSCR,Rn */
@@ -1524,6 +1530,7 @@ static void _decode_opc(DisasContext * ctx)
             tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx, MO_TEUL);
             gen_helper_movcal(cpu_env, REG(B11_8), val);
             tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
+            tcg_temp_free(val);
         }
         ctx->has_movcal = 1;
 	return;
@@ -1547,31 +1554,64 @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0x0073:
         /* MOVCO.L
-	       LDST -> T
-               If (T == 1) R0 -> (Rn)
-               0 -> LDST
-        */
+         *     LDST -> T
+         *     If (T == 1) R0 -> (Rn)
+         *     0 -> LDST
+         *
+         * The above description doesn't work in a parallel context.
+         * Since we currently support no smp boards, this implies user-mode.
+         * But we can still support the official mechanism while user-mode
+         * is single-threaded.  */
         CHECK_SH4A
         {
-            TCGLabel *label = gen_new_label();
-            tcg_gen_mov_i32(cpu_sr_t, cpu_ldst);
-	    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ldst, 0, label);
-            tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
-	    gen_set_label(label);
-	    tcg_gen_movi_i32(cpu_ldst, 0);
-	    return;
+            TCGLabel *fail = gen_new_label();
+            TCGLabel *done = gen_new_label();
+
+            if ((tb_cflags(ctx->base.tb) & CF_PARALLEL)) {
+                TCGv tmp;
+
+                tcg_gen_brcond_i32(TCG_COND_NE, REG(B11_8),
+                                   cpu_lock_addr, fail);
+                tmp = tcg_temp_new();
+                tcg_gen_atomic_cmpxchg_i32(tmp, REG(B11_8), cpu_lock_value,
+                                           REG(0), ctx->memidx, MO_TEUL);
+                tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t, tmp, cpu_lock_value);
+                tcg_temp_free(tmp);
+            } else {
+                tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_lock_addr, -1, fail);
+                tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
+                tcg_gen_movi_i32(cpu_sr_t, 1);
+            }
+            tcg_gen_br(done);
+
+            gen_set_label(fail);
+            tcg_gen_movi_i32(cpu_sr_t, 0);
+
+            gen_set_label(done);
+            tcg_gen_movi_i32(cpu_lock_addr, -1);
         }
+        return;
     case 0x0063:
         /* MOVLI.L @Rm,R0
-               1 -> LDST
-               (Rm) -> R0
-               When interrupt/exception
-               occurred 0 -> LDST
-        */
+         *     1 -> LDST
+         *     (Rm) -> R0
+         *     When interrupt/exception
+         *     occurred 0 -> LDST
+         *
+         * In a parallel context, we must also save the loaded value
+         * for use with the cmpxchg that we'll use with movco.l.  */
         CHECK_SH4A
-        tcg_gen_movi_i32(cpu_ldst, 0);
-        tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL);
-        tcg_gen_movi_i32(cpu_ldst, 1);
+        if ((tb_cflags(ctx->base.tb) & CF_PARALLEL)) {
+            TCGv tmp = tcg_temp_new();
+            tcg_gen_mov_i32(tmp, REG(B11_8));
+            tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL);
+            tcg_gen_mov_i32(cpu_lock_value, REG(0));
+            tcg_gen_mov_i32(cpu_lock_addr, tmp);
+            tcg_temp_free(tmp);
+        } else {
+            tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL);
+            tcg_gen_movi_i32(cpu_lock_addr, 0);
+        }
         return;
     case 0x0093:		/* ocbi @Rn */
 	{
@@ -1789,7 +1829,7 @@ static void _decode_opc(DisasContext * ctx)
     }
 #if 0
     fprintf(stderr, "unknown instruction 0x%04x at pc 0x%08x\n",
-	    ctx->opcode, ctx->pc);
+            ctx->opcode, ctx->base.pc_next);
     fflush(stderr);
 #endif
  do_illegal:
@@ -1801,7 +1841,7 @@ static void _decode_opc(DisasContext * ctx)
         gen_save_cpu_state(ctx, true);
         gen_helper_raise_illegal_instruction(cpu_env);
     }
-    ctx->bstate = BS_EXCP;
+    ctx->base.is_jmp = DISAS_NORETURN;
     return;
 
  do_fpu_disabled:
@@ -1811,7 +1851,7 @@ static void _decode_opc(DisasContext * ctx)
     } else {
         gen_helper_raise_fpu_disable(cpu_env);
     }
-    ctx->bstate = BS_EXCP;
+    ctx->base.is_jmp = DISAS_NORETURN;
     return;
 }
 
@@ -1837,7 +1877,6 @@ static void decode_opc(DisasContext * ctx)
         ctx->envflags &= ~GUSA_MASK;
 
         tcg_gen_movi_i32(cpu_flags, ctx->envflags);
-        ctx->bstate = BS_BRANCH;
         if (old_flags & DELAY_SLOT_CONDITIONAL) {
 	    gen_delayed_conditional_jump(ctx);
         } else {
@@ -1864,8 +1903,8 @@ static int decode_gusa(DisasContext *ctx, CPUSH4State *env, int *pmax_insns)
     int mv_src, mt_dst, st_src, st_mop;
     TCGv op_arg;
 
-    uint32_t pc = ctx->pc;
-    uint32_t pc_end = ctx->tb->cs_base;
+    uint32_t pc = ctx->base.pc_next;
+    uint32_t pc_end = ctx->base.tb->cs_base;
     int backup = sextract32(ctx->tbflags, GUSA_SHIFT, 8);
     int max_insns = (pc_end - pc) / 2;
     int i;
@@ -2189,13 +2228,13 @@ static int decode_gusa(DisasContext *ctx, CPUSH4State *env, int *pmax_insns)
     }
 
     /* If op_src is not a valid register, then op_arg was a constant.  */
-    if (op_src < 0) {
+    if (op_src < 0 && !TCGV_IS_UNUSED(op_arg)) {
         tcg_temp_free_i32(op_arg);
     }
 
     /* The entire region has been translated.  */
     ctx->envflags &= ~GUSA_MASK;
-    ctx->pc = pc_end;
+    ctx->base.pc_next = pc_end;
     return max_insns;
 
  fail:
@@ -2208,13 +2247,13 @@ static int decode_gusa(DisasContext *ctx, CPUSH4State *env, int *pmax_insns)
     ctx->envflags |= GUSA_EXCLUSIVE;
     gen_save_cpu_state(ctx, false);
     gen_helper_exclusive(cpu_env);
-    ctx->bstate = BS_EXCP;
+    ctx->base.is_jmp = DISAS_NORETURN;
 
     /* We're not executing an instruction, but we must report one for the
        purposes of accounting within the TB.  We might as well report the
-       entire region consumed via ctx->pc so that it's immediately available
-       in the disassembly dump.  */
-    ctx->pc = pc_end;
+       entire region consumed via ctx->base.pc_next so that it's immediately
+       available in the disassembly dump.  */
+    ctx->base.pc_next = pc_end;
     return 1;
 }
 #endif
@@ -2228,16 +2267,16 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb)
     int max_insns;
 
     pc_start = tb->pc;
-    ctx.pc = pc_start;
+    ctx.base.pc_next = pc_start;
     ctx.tbflags = (uint32_t)tb->flags;
     ctx.envflags = tb->flags & TB_FLAG_ENVFLAGS_MASK;
-    ctx.bstate = BS_NONE;
+    ctx.base.is_jmp = DISAS_NEXT;
     ctx.memidx = (ctx.tbflags & (1u << SR_MD)) == 0 ? 1 : 0;
     /* We don't know if the delayed pc came from a dynamic or static branch,
        so assume it is a dynamic branch.  */
     ctx.delayed_pc = -1; /* use delayed pc from env pointer */
-    ctx.tb = tb;
-    ctx.singlestep_enabled = cs->singlestep_enabled;
+    ctx.base.tb = tb;
+    ctx.base.singlestep_enabled = cs->singlestep_enabled;
     ctx.features = env->features;
     ctx.has_movcal = (ctx.tbflags & TB_FLAG_PENDING_MOVCA);
     ctx.gbank = ((ctx.tbflags & (1 << SR_MD)) &&
@@ -2252,11 +2291,11 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb)
 
     /* Since the ISA is fixed-width, we can bound by the number
        of instructions remaining on the page.  */
-    num_insns = -(ctx.pc | TARGET_PAGE_MASK) / 2;
+    num_insns = -(ctx.base.pc_next | TARGET_PAGE_MASK) / 2;
     max_insns = MIN(max_insns, num_insns);
 
     /* Single stepping means just that.  */
-    if (ctx.singlestep_enabled || singlestep) {
+    if (ctx.base.singlestep_enabled || singlestep) {
         max_insns = 1;
     }
 
@@ -2269,22 +2308,22 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb)
     }
 #endif
 
-    while (ctx.bstate == BS_NONE
+    while (ctx.base.is_jmp == DISAS_NEXT
            && num_insns < max_insns
            && !tcg_op_buf_full()) {
-        tcg_gen_insn_start(ctx.pc, ctx.envflags);
+        tcg_gen_insn_start(ctx.base.pc_next, ctx.envflags);
         num_insns++;
 
-        if (unlikely(cpu_breakpoint_test(cs, ctx.pc, BP_ANY))) {
+        if (unlikely(cpu_breakpoint_test(cs, ctx.base.pc_next, BP_ANY))) {
             /* We have hit a breakpoint - make sure PC is up-to-date */
             gen_save_cpu_state(&ctx, true);
             gen_helper_debug(cpu_env);
-            ctx.bstate = BS_EXCP;
+            ctx.base.is_jmp = DISAS_NORETURN;
             /* The address covered by the breakpoint must be included in
                [tb->pc, tb->pc + tb->size) in order to for it to be
                properly cleared -- thus we increment the PC here so that
                the logic setting tb->size below does the right thing.  */
-            ctx.pc += 2;
+            ctx.base.pc_next += 2;
             break;
         }
 
@@ -2292,9 +2331,9 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb)
             gen_io_start();
         }
 
-        ctx.opcode = cpu_lduw_code(env, ctx.pc);
+        ctx.opcode = cpu_lduw_code(env, ctx.base.pc_next);
 	decode_opc(&ctx);
-	ctx.pc += 2;
+        ctx.base.pc_next += 2;
     }
     if (tb_cflags(tb) & CF_LAST_IO) {
         gen_io_end();
@@ -2305,30 +2344,28 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb)
         ctx.envflags &= ~GUSA_MASK;
     }
 
-    if (cs->singlestep_enabled) {
+    switch (ctx.base.is_jmp) {
+    case DISAS_STOP:
         gen_save_cpu_state(&ctx, true);
-        gen_helper_debug(cpu_env);
-    } else {
-	switch (ctx.bstate) {
-        case BS_STOP:
-            gen_save_cpu_state(&ctx, true);
+        if (ctx.base.singlestep_enabled) {
+            gen_helper_debug(cpu_env);
+        } else {
             tcg_gen_exit_tb(0);
-            break;
-        case BS_NONE:
-            gen_save_cpu_state(&ctx, false);
-            gen_goto_tb(&ctx, 0, ctx.pc);
-            break;
-        case BS_EXCP:
-            /* fall through */
-        case BS_BRANCH:
-        default:
-            break;
-	}
+        }
+        break;
+    case DISAS_NEXT:
+        gen_save_cpu_state(&ctx, false);
+        gen_goto_tb(&ctx, 0, ctx.base.pc_next);
+        break;
+    case DISAS_NORETURN:
+        break;
+    default:
+        g_assert_not_reached();
     }
 
     gen_tb_end(tb, num_insns);
 
-    tb->size = ctx.pc - pc_start;
+    tb->size = ctx.base.pc_next - pc_start;
     tb->icount = num_insns;
 
 #ifdef DEBUG_DISAS
@@ -2336,7 +2373,7 @@ void gen_intermediate_code(CPUState *cs, struct TranslationBlock *tb)
         && qemu_log_in_addr_range(pc_start)) {
         qemu_log_lock();
 	qemu_log("IN:\n");	/* , lookup_symbol(pc_start)); */
-        log_target_disas(cs, pc_start, ctx.pc - pc_start);
+        log_target_disas(cs, pc_start, ctx.base.pc_next - pc_start);
 	qemu_log("\n");
         qemu_log_unlock();
     }
diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c
index af41642346..9091c639b3 100644
--- a/tests/test-hbitmap.c
+++ b/tests/test-hbitmap.c
@@ -925,6 +925,61 @@ static void test_hbitmap_iter_and_reset(TestHBitmapData *data,
     hbitmap_iter_next(&hbi);
 }
 
+static void test_hbitmap_next_zero_check(TestHBitmapData *data, int64_t start)
+{
+    int64_t ret1 = hbitmap_next_zero(data->hb, start);
+    int64_t ret2 = start;
+    for ( ; ret2 < data->size && hbitmap_get(data->hb, ret2); ret2++) {
+        ;
+    }
+    if (ret2 == data->size) {
+        ret2 = -1;
+    }
+
+    g_assert_cmpint(ret1, ==, ret2);
+}
+
+static void test_hbitmap_next_zero_do(TestHBitmapData *data, int granularity)
+{
+    hbitmap_test_init(data, L3, granularity);
+    test_hbitmap_next_zero_check(data, 0);
+    test_hbitmap_next_zero_check(data, L3 - 1);
+
+    hbitmap_set(data->hb, L2, 1);
+    test_hbitmap_next_zero_check(data, 0);
+    test_hbitmap_next_zero_check(data, L2 - 1);
+    test_hbitmap_next_zero_check(data, L2);
+    test_hbitmap_next_zero_check(data, L2 + 1);
+
+    hbitmap_set(data->hb, L2 + 5, L1);
+    test_hbitmap_next_zero_check(data, 0);
+    test_hbitmap_next_zero_check(data, L2 + 1);
+    test_hbitmap_next_zero_check(data, L2 + 2);
+    test_hbitmap_next_zero_check(data, L2 + 5);
+    test_hbitmap_next_zero_check(data, L2 + L1 - 1);
+    test_hbitmap_next_zero_check(data, L2 + L1);
+
+    hbitmap_set(data->hb, L2 * 2, L3 - L2 * 2);
+    test_hbitmap_next_zero_check(data, L2 * 2 - L1);
+    test_hbitmap_next_zero_check(data, L2 * 2 - 2);
+    test_hbitmap_next_zero_check(data, L2 * 2 - 1);
+    test_hbitmap_next_zero_check(data, L2 * 2);
+    test_hbitmap_next_zero_check(data, L3 - 1);
+
+    hbitmap_set(data->hb, 0, L3);
+    test_hbitmap_next_zero_check(data, 0);
+}
+
+static void test_hbitmap_next_zero_0(TestHBitmapData *data, const void *unused)
+{
+    test_hbitmap_next_zero_do(data, 0);
+}
+
+static void test_hbitmap_next_zero_4(TestHBitmapData *data, const void *unused)
+{
+    test_hbitmap_next_zero_do(data, 4);
+}
+
 int main(int argc, char **argv)
 {
     g_test_init(&argc, &argv, NULL);
@@ -985,6 +1040,12 @@ int main(int argc, char **argv)
 
     hbitmap_test_add("/hbitmap/iter/iter_and_reset",
                      test_hbitmap_iter_and_reset);
+
+    hbitmap_test_add("/hbitmap/next_zero/next_zero_0",
+                     test_hbitmap_next_zero_0);
+    hbitmap_test_add("/hbitmap/next_zero/next_zero_4",
+                     test_hbitmap_next_zero_4);
+
     g_test_run();
 
     return 0;
diff --git a/trace-events b/trace-events
index 1d2eb5d3e4..3695959d0a 100644
--- a/trace-events
+++ b/trace-events
@@ -68,6 +68,34 @@ flatview_new(FlatView *view, MemoryRegion *root) "%p (root %p)"
 flatview_destroy(FlatView *view, MemoryRegion *root) "%p (root %p)"
 flatview_destroy_rcu(FlatView *view, MemoryRegion *root) "%p (root %p)"
 
+# gdbstub.c
+gdbstub_op_start(char const *device) "Starting gdbstub using device %s"
+gdbstub_op_exiting(uint8_t code) "notifying exit with code=0x%02x"
+gdbstub_op_continue(void) "Continuing all CPUs"
+gdbstub_op_continue_cpu(int cpu_index) "Continuing CPU %d"
+gdbstub_op_stepping(int cpu_index) "Stepping CPU %d"
+gdbstub_op_extra_info(char const *info) "Thread extra info: %s"
+gdbstub_hit_watchpoint(char const *type, int cpu_gdb_index, uint64_t vaddr) "Watchpoint hit, type=\"%s\" cpu=%d, vaddr=0x%" PRIx64 ""
+gdbstub_hit_internal_error(void) "RUN_STATE_INTERNAL_ERROR"
+gdbstub_hit_break(void) "RUN_STATE_DEBUG"
+gdbstub_hit_paused(void) "RUN_STATE_PAUSED"
+gdbstub_hit_shutdown(void) "RUN_STATE_SHUTDOWN"
+gdbstub_hit_io_error(void) "RUN_STATE_IO_ERROR"
+gdbstub_hit_watchdog(void) "RUN_STATE_WATCHDOG"
+gdbstub_hit_unknown(int state) "Unknown run state=0x%x"
+gdbstub_io_reply(char const *message) "Sent: %s"
+gdbstub_io_binaryreply(size_t ofs, char const *line) "0x%04zx: %s"
+gdbstub_io_command(char const *command) "Received: %s"
+gdbstub_io_got_ack(void) "Got ACK"
+gdbstub_io_got_unexpected(uint8_t ch) "Got 0x%02x when expecting ACK/NACK"
+gdbstub_err_got_nack(void) "Got NACK, retransmitting"
+gdbstub_err_garbage(uint8_t ch) "received garbage between packets: 0x%02x"
+gdbstub_err_overrun(void) "command buffer overrun, dropping command"
+gdbstub_err_invalid_repeat(uint8_t ch) "got invalid RLE count: 0x%02x"
+gdbstub_err_invalid_rle(void) "got invalid RLE sequence"
+gdbstub_err_checksum_invalid(uint8_t ch) "got invalid command checksum digit: 0x%02x"
+gdbstub_err_checksum_incorrect(uint8_t expected, uint8_t got) "got command packet with incorrect checksum, expected=0x%02x, received=0x%02x"
+
 ### Guest events, keep at bottom
 
 
diff --git a/trace/ftrace.c b/trace/ftrace.c
index 7de104deba..61692a8682 100644
--- a/trace/ftrace.c
+++ b/trace/ftrace.c
@@ -15,10 +15,11 @@
 
 int trace_marker_fd;
 
-static int find_debugfs(char *debugfs)
+static int find_mount(char *mount_point, const char *fstype)
 {
     char type[100];
     FILE *fp;
+    int ret = 0;
 
     fp = fopen("/proc/mounts", "r");
     if (fp == NULL) {
@@ -26,29 +27,33 @@ static int find_debugfs(char *debugfs)
     }
 
     while (fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n",
-                  debugfs, type) == 2) {
-        if (strcmp(type, "debugfs") == 0) {
+                  mount_point, type) == 2) {
+        if (strcmp(type, fstype) == 0) {
+            ret = 1;
             break;
         }
     }
     fclose(fp);
 
-    if (strcmp(type, "debugfs") != 0) {
-        return 0;
-    }
-    return 1;
+    return ret;
 }
 
 bool ftrace_init(void)
 {
-    char debugfs[PATH_MAX];
+    char mount_point[PATH_MAX];
     char path[PATH_MAX];
-    int debugfs_found;
+    int tracefs_found;
     int trace_fd = -1;
+    const char *subdir = "";
+
+    tracefs_found = find_mount(mount_point, "tracefs");
+    if (!tracefs_found) {
+        tracefs_found = find_mount(mount_point, "debugfs");
+        subdir = "/tracing";
+    }
 
-    debugfs_found = find_debugfs(debugfs);
-    if (debugfs_found) {
-        snprintf(path, PATH_MAX, "%s/tracing/tracing_on", debugfs);
+    if (tracefs_found) {
+        snprintf(path, PATH_MAX, "%s%s/tracing_on", mount_point, subdir);
         trace_fd = open(path, O_WRONLY);
         if (trace_fd < 0) {
             if (errno == EACCES) {
@@ -67,14 +72,14 @@ bool ftrace_init(void)
             }
             close(trace_fd);
         }
-        snprintf(path, PATH_MAX, "%s/tracing/trace_marker", debugfs);
+        snprintf(path, PATH_MAX, "%s%s/trace_marker", mount_point, subdir);
         trace_marker_fd = open(path, O_WRONLY);
         if (trace_marker_fd < 0) {
             perror("Could not open ftrace 'trace_marker' file");
             return false;
         }
     } else {
-        fprintf(stderr, "debugfs is not mounted\n");
+        fprintf(stderr, "tracefs is not mounted\n");
         return false;
     }
 
diff --git a/util/hbitmap.c b/util/hbitmap.c
index 2f9d0fdbd0..289778a55c 100644
--- a/util/hbitmap.c
+++ b/util/hbitmap.c
@@ -188,6 +188,45 @@ void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first)
     }
 }
 
+int64_t hbitmap_next_zero(const HBitmap *hb, uint64_t start)
+{
+    size_t pos = (start >> hb->granularity) >> BITS_PER_LEVEL;
+    unsigned long *last_lev = hb->levels[HBITMAP_LEVELS - 1];
+    uint64_t sz = hb->sizes[HBITMAP_LEVELS - 1];
+    unsigned long cur = last_lev[pos];
+    unsigned start_bit_offset =
+            (start >> hb->granularity) & (BITS_PER_LONG - 1);
+    int64_t res;
+
+    cur |= (1UL << start_bit_offset) - 1;
+    assert((start >> hb->granularity) < hb->size);
+
+    if (cur == (unsigned long)-1) {
+        do {
+            pos++;
+        } while (pos < sz && last_lev[pos] == (unsigned long)-1);
+
+        if (pos >= sz) {
+            return -1;
+        }
+
+        cur = last_lev[pos];
+    }
+
+    res = (pos << BITS_PER_LEVEL) + ctol(cur);
+    if (res >= hb->size) {
+        return -1;
+    }
+
+    res = res << hb->granularity;
+    if (res < start) {
+        assert(((start - res) >> hb->granularity) == 0);
+        return start;
+    }
+
+    return res;
+}
+
 bool hbitmap_empty(const HBitmap *hb)
 {
     return hb->count == 0;