summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--arch_init.c64
-rwxr-xr-xconfigure12
-rw-r--r--cpu-exec.c4
-rw-r--r--docs/rdma.txt24
-rw-r--r--hw/arm/musicpal.c6
-rw-r--r--hw/arm/pxa2xx.c6
-rw-r--r--hw/audio/hda-codec.c60
-rw-r--r--hw/dma/pl330.c55
-rw-r--r--hw/intc/arm_gic_kvm.c446
-rw-r--r--hw/intc/exynos4210_combiner.c2
-rw-r--r--hw/intc/gic_internal.h2
-rw-r--r--hw/misc/arm_sysctl.c4
-rw-r--r--hw/net/stellaris_enet.c3
-rw-r--r--hw/net/vhost_net.c6
-rw-r--r--hw/net/virtio-net.c16
-rw-r--r--hw/net/vmxnet3.c22
-rw-r--r--hw/timer/arm_timer.c2
-rw-r--r--include/hw/intc/arm_gic_common.h1
-rw-r--r--include/migration/page_cache.h2
-rw-r--r--include/net/net.h19
-rw-r--r--include/net/tap.h6
-rw-r--r--include/qemu/crc32c.h4
-rw-r--r--include/sysemu/kvm.h34
-rw-r--r--kvm-all.c50
-rw-r--r--linux-headers/asm-arm/kvm.h28
-rw-r--r--linux-headers/asm-arm64/kvm.h30
-rw-r--r--linux-headers/asm-powerpc/kvm.h3
-rw-r--r--linux-headers/asm-x86/hyperv.h16
-rw-r--r--linux-headers/linux/kvm.h1
-rw-r--r--migration-rdma.c2
-rw-r--r--migration.c6
-rw-r--r--net/net.c55
-rw-r--r--net/netmap.c123
-rw-r--r--net/tap-win32.c92
-rw-r--r--net/tap.c20
-rw-r--r--page_cache.c2
-rw-r--r--qapi-schema.json7
-rw-r--r--qemu-file.c9
-rw-r--r--stubs/Makefile.objs1
-rw-r--r--stubs/kvm.c7
-rw-r--r--target-arm/cpu-qom.h10
-rw-r--r--target-arm/cpu.c13
-rw-r--r--target-arm/cpu.h80
-rw-r--r--target-arm/cpu64.c1
-rw-r--r--target-arm/helper.c562
-rw-r--r--target-arm/helper.h5
-rw-r--r--target-arm/kvm.c55
-rw-r--r--target-arm/kvm_arm.h17
-rw-r--r--target-arm/op_helper.c25
-rw-r--r--target-arm/translate-a64.c39
-rw-r--r--target-arm/translate.c56
-rw-r--r--trace-events1
-rw-r--r--vmstate.c15
53 files changed, 1739 insertions, 392 deletions
diff --git a/arch_init.c b/arch_init.c
index 80574a090c..fe1727922c 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -122,7 +122,6 @@ static void check_guest_throttling(void);
 #define RAM_SAVE_FLAG_XBZRLE   0x40
 /* 0x80 is reserved in migration.h start with 0x100 next */
 
-
 static struct defconfig_file {
     const char *filename;
     /* Indicates it is an user config file (disabled by -no-user-config) */
@@ -133,6 +132,7 @@ static struct defconfig_file {
     { NULL }, /* end of list */
 };
 
+static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];
 
 int qemu_read_default_config_files(bool userconfig)
 {
@@ -273,6 +273,34 @@ static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
     return size;
 }
 
+/* This is the last block that we have visited serching for dirty pages
+ */
+static RAMBlock *last_seen_block;
+/* This is the last block from where we have sent data */
+static RAMBlock *last_sent_block;
+static ram_addr_t last_offset;
+static unsigned long *migration_bitmap;
+static uint64_t migration_dirty_pages;
+static uint32_t last_version;
+static bool ram_bulk_stage;
+
+/* Update the xbzrle cache to reflect a page that's been sent as all 0.
+ * The important thing is that a stale (not-yet-0'd) page be replaced
+ * by the new data.
+ * As a bonus, if the page wasn't in the cache it gets added so that
+ * when a small write is made into the 0'd page it gets XBZRLE sent
+ */
+static void xbzrle_cache_zero_page(ram_addr_t current_addr)
+{
+    if (ram_bulk_stage || !migrate_use_xbzrle()) {
+        return;
+    }
+
+    /* We don't care if this fails to allocate a new cache page
+     * as long as it updated an old one */
+    cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE);
+}
+
 #define ENCODING_FLAG_XBZRLE 0x1
 
 static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
@@ -329,18 +357,6 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
     return bytes_sent;
 }
 
-
-/* This is the last block that we have visited serching for dirty pages
- */
-static RAMBlock *last_seen_block;
-/* This is the last block from where we have sent data */
-static RAMBlock *last_sent_block;
-static ram_addr_t last_offset;
-static unsigned long *migration_bitmap;
-static uint64_t migration_dirty_pages;
-static uint32_t last_version;
-static bool ram_bulk_stage;
-
 static inline
 ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
                                                  ram_addr_t start)
@@ -512,6 +528,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
         } else {
             int ret;
             uint8_t *p;
+            bool send_async = true;
             int cont = (block == last_sent_block) ?
                 RAM_SAVE_FLAG_CONTINUE : 0;
 
@@ -522,6 +539,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
             ret = ram_control_save_page(f, block->offset,
                                offset, TARGET_PAGE_SIZE, &bytes_sent);
 
+            current_addr = block->offset + offset;
             if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
                 if (ret != RAM_SAVE_CONTROL_DELAYED) {
                     if (bytes_sent > 0) {
@@ -536,19 +554,35 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
                                             RAM_SAVE_FLAG_COMPRESS);
                 qemu_put_byte(f, 0);
                 bytes_sent++;
+                /* Must let xbzrle know, otherwise a previous (now 0'd) cached
+                 * page would be stale
+                 */
+                xbzrle_cache_zero_page(current_addr);
             } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
-                current_addr = block->offset + offset;
                 bytes_sent = save_xbzrle_page(f, p, current_addr, block,
                                               offset, cont, last_stage);
                 if (!last_stage) {
+                    /* We must send exactly what's in the xbzrle cache
+                     * even if the page wasn't xbzrle compressed, so that
+                     * it's right next time.
+                     */
                     p = get_cached_data(XBZRLE.cache, current_addr);
+
+                    /* Can't send this cached data async, since the cache page
+                     * might get updated before it gets to the wire
+                     */
+                    send_async = false;
                 }
             }
 
             /* XBZRLE overflow or normal page */
             if (bytes_sent == -1) {
                 bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
-                qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
+                if (send_async) {
+                    qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
+                } else {
+                    qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
+                }
                 bytes_sent += TARGET_PAGE_SIZE;
                 acct_info.norm_pages++;
             }
diff --git a/configure b/configure
index 423f435550..8ad03ea17d 100755
--- a/configure
+++ b/configure
@@ -1657,7 +1657,7 @@ EOF
             "Make sure to have the zlib libs and headers installed."
     fi
 fi
-libs_softmmu="$libs_softmmu -lz"
+LIBS="$LIBS -lz"
 
 ##########################################
 # libseccomp check
@@ -2249,13 +2249,21 @@ EOF
 fi
 
 ##########################################
-# netmap headers probe
+# netmap support probe
+# Apart from looking for netmap headers, we make sure that the host API version
+# supports the netmap backend (>=11). The upper bound (15) is meant to simulate
+# a minor/major version number. Minor new features will be marked with values up
+# to 15, and if something happens that requires a change to the backend we will
+# move above 15, submit the backend fixes and modify this two bounds.
 if test "$netmap" != "no" ; then
   cat > $TMPC << EOF
 #include <inttypes.h>
 #include <net/if.h>
 #include <net/netmap.h>
 #include <net/netmap_user.h>
+#if (NETMAP_API < 11) || (NETMAP_API > 15)
+#error
+#endif
 int main(void) { return 0; }
 EOF
   if compile_prog "" "" ; then
diff --git a/cpu-exec.c b/cpu-exec.c
index 8943493001..1b0f617c19 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -477,7 +477,7 @@ int cpu_exec(CPUArchState *env)
                     }
 #elif defined(TARGET_ARM)
                     if (interrupt_request & CPU_INTERRUPT_FIQ
-                        && !(env->uncached_cpsr & CPSR_F)) {
+                        && !(env->daif & PSTATE_F)) {
                         env->exception_index = EXCP_FIQ;
                         cc->do_interrupt(cpu);
                         next_tb = 0;
@@ -493,7 +493,7 @@ int cpu_exec(CPUArchState *env)
                        pc contains a magic address.  */
                     if (interrupt_request & CPU_INTERRUPT_HARD
                         && ((IS_M(env) && env->regs[15] < 0xfffffff0)
-                            || !(env->uncached_cpsr & CPSR_I))) {
+                            || !(env->daif & PSTATE_I))) {
                         env->exception_index = EXCP_IRQ;
                         cc->do_interrupt(cpu);
                         next_tb = 0;
diff --git a/docs/rdma.txt b/docs/rdma.txt
index 2aca63bd72..1f5d9e9fe4 100644
--- a/docs/rdma.txt
+++ b/docs/rdma.txt
@@ -66,7 +66,7 @@ bulk-phase round of the migration and can be enabled for extremely
 high-performance RDMA hardware using the following command:
 
 QEMU Monitor Command:
-$ migrate_set_capability x-rdma-pin-all on # disabled by default
+$ migrate_set_capability rdma-pin-all on # disabled by default
 
 Performing this action will cause all 8GB to be pinned, so if that's
 not what you want, then please ignore this step altogether.
@@ -93,12 +93,12 @@ $ migrate_set_speed 40g # or whatever is the MAX of your RDMA device
 
 Next, on the destination machine, add the following to the QEMU command line:
 
-qemu ..... -incoming x-rdma:host:port
+qemu ..... -incoming rdma:host:port
 
 Finally, perform the actual migration on the source machine:
 
 QEMU Monitor Command:
-$ migrate -d x-rdma:host:port
+$ migrate -d rdma:host:port
 
 PERFORMANCE
 ===========
@@ -120,8 +120,8 @@ For example, in the same 8GB RAM example with all 8GB of memory in
 active use and the VM itself is completely idle using the same 40 gbps
 infiniband link:
 
-1. x-rdma-pin-all disabled total time: approximately 7.5 seconds @ 9.5 Gbps
-2. x-rdma-pin-all enabled total time: approximately 4 seconds @ 26 Gbps
+1. rdma-pin-all disabled total time: approximately 7.5 seconds @ 9.5 Gbps
+2. rdma-pin-all enabled total time: approximately 4 seconds @ 26 Gbps
 
 These numbers would of course scale up to whatever size virtual machine
 you have to migrate using RDMA.
@@ -407,18 +407,14 @@ socket is broken during a non-RDMA based migration.
 
 TODO:
 =====
-1. 'migrate x-rdma:host:port' and '-incoming x-rdma' options will be
-   renamed to 'rdma' after the experimental phase of this work has
-   completed upstream.
-2. Currently, 'ulimit -l' mlock() limits as well as cgroups swap limits
+1. Currently, 'ulimit -l' mlock() limits as well as cgroups swap limits
    are not compatible with infinband memory pinning and will result in
    an aborted migration (but with the source VM left unaffected).
-3. Use of the recent /proc/<pid>/pagemap would likely speed up
+2. Use of the recent /proc/<pid>/pagemap would likely speed up
    the use of KSM and ballooning while using RDMA.
-4. Also, some form of balloon-device usage tracking would also
+3. Also, some form of balloon-device usage tracking would also
    help alleviate some issues.
-5. Move UNREGISTER requests to a separate thread.
-6. Use LRU to provide more fine-grained direction of UNREGISTER
+4. Use LRU to provide more fine-grained direction of UNREGISTER
    requests for unpinning memory in an overcommitted environment.
-7. Expose UNREGISTER support to the user by way of workload-specific
+5. Expose UNREGISTER support to the user by way of workload-specific
    hints about application behavior.
diff --git a/hw/arm/musicpal.c b/hw/arm/musicpal.c
index 50a3b8fe4f..cce7127598 100644
--- a/hw/arm/musicpal.c
+++ b/hw/arm/musicpal.c
@@ -92,8 +92,6 @@
 #define MP_ETH_CRDP3            0x4AC
 #define MP_ETH_CTDP0            0x4E0
 #define MP_ETH_CTDP1            0x4E4
-#define MP_ETH_CTDP2            0x4E8
-#define MP_ETH_CTDP3            0x4EC
 
 /* MII PHY access */
 #define MP_ETH_SMIR_DATA        0x0000FFFF
@@ -308,7 +306,7 @@ static uint64_t mv88w8618_eth_read(void *opaque, hwaddr offset,
     case MP_ETH_CRDP0 ... MP_ETH_CRDP3:
         return s->rx_queue[(offset - MP_ETH_CRDP0)/4];
 
-    case MP_ETH_CTDP0 ... MP_ETH_CTDP3:
+    case MP_ETH_CTDP0 ... MP_ETH_CTDP1:
         return s->tx_queue[(offset - MP_ETH_CTDP0)/4];
 
     default:
@@ -362,7 +360,7 @@ static void mv88w8618_eth_write(void *opaque, hwaddr offset,
             s->cur_rx[(offset - MP_ETH_CRDP0)/4] = value;
         break;
 
-    case MP_ETH_CTDP0 ... MP_ETH_CTDP3:
+    case MP_ETH_CTDP0 ... MP_ETH_CTDP1:
         s->tx_queue[(offset - MP_ETH_CTDP0)/4] = value;
         break;
     }
diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c
index 45a99c819d..904277a9da 100644
--- a/hw/arm/pxa2xx.c
+++ b/hw/arm/pxa2xx.c
@@ -272,11 +272,11 @@ static void pxa2xx_pwrmode_write(CPUARMState *env, const ARMCPRegInfo *ri,
         goto message;
 
     case 3:
-        s->cpu->env.uncached_cpsr =
-            ARM_CPU_MODE_SVC | CPSR_A | CPSR_F | CPSR_I;
+        s->cpu->env.uncached_cpsr = ARM_CPU_MODE_SVC;
+        s->cpu->env.daif = PSTATE_A | PSTATE_F | PSTATE_I;
         s->cpu->env.cp15.c1_sys = 0;
         s->cpu->env.cp15.c1_coproc = 0;
-        s->cpu->env.cp15.c2_base0 = 0;
+        s->cpu->env.cp15.ttbr0_el1 = 0;
         s->cpu->env.cp15.c3 = 0;
         s->pm_regs[PSSR >> 2] |= 0x8; /* Set STS */
         s->pm_regs[RCSR >> 2] |= 0x8; /* Set GPR */
diff --git a/hw/audio/hda-codec.c b/hw/audio/hda-codec.c
index 986f2a9c92..a67ca91ca7 100644
--- a/hw/audio/hda-codec.c
+++ b/hw/audio/hda-codec.c
@@ -157,6 +157,9 @@ struct HDAAudioStream {
     uint32_t bpos;
 };
 
+#define TYPE_HDA_AUDIO "hda-audio"
+#define HDA_AUDIO(obj) OBJECT_CHECK(HDAAudioState, (obj), TYPE_HDA_AUDIO)
+
 struct HDAAudioState {
     HDACodecDevice hda;
     const char *name;
@@ -288,7 +291,7 @@ static void hda_audio_setup(HDAAudioStream *st)
 
 static void hda_audio_command(HDACodecDevice *hda, uint32_t nid, uint32_t data)
 {
-    HDAAudioState *a = DO_UPCAST(HDAAudioState, hda, hda);
+    HDAAudioState *a = HDA_AUDIO(hda);
     HDAAudioStream *st;
     const desc_node *node = NULL;
     const desc_param *param;
@@ -448,7 +451,7 @@ fail:
 
 static void hda_audio_stream(HDACodecDevice *hda, uint32_t stnr, bool running, bool output)
 {
-    HDAAudioState *a = DO_UPCAST(HDAAudioState, hda, hda);
+    HDAAudioState *a = HDA_AUDIO(hda);
     int s;
 
     a->running_compat[stnr] = running;
@@ -469,7 +472,7 @@ static void hda_audio_stream(HDACodecDevice *hda, uint32_t stnr, bool running, b
 
 static int hda_audio_init(HDACodecDevice *hda, const struct desc_codec *desc)
 {
-    HDAAudioState *a = DO_UPCAST(HDAAudioState, hda, hda);
+    HDAAudioState *a = HDA_AUDIO(hda);
     HDAAudioStream *st;
     const desc_node *node;
     const desc_param *param;
@@ -514,7 +517,7 @@ static int hda_audio_init(HDACodecDevice *hda, const struct desc_codec *desc)
 
 static int hda_audio_exit(HDACodecDevice *hda)
 {
-    HDAAudioState *a = DO_UPCAST(HDAAudioState, hda, hda);
+    HDAAudioState *a = HDA_AUDIO(hda);
     HDAAudioStream *st;
     int i;
 
@@ -561,7 +564,7 @@ static int hda_audio_post_load(void *opaque, int version)
 
 static void hda_audio_reset(DeviceState *dev)
 {
-    HDAAudioState *a = DO_UPCAST(HDAAudioState, hda.qdev, dev);
+    HDAAudioState *a = HDA_AUDIO(dev);
     HDAAudioStream *st;
     int i;
 
@@ -613,7 +616,7 @@ static Property hda_audio_properties[] = {
 
 static int hda_audio_init_output(HDACodecDevice *hda)
 {
-    HDAAudioState *a = DO_UPCAST(HDAAudioState, hda, hda);
+    HDAAudioState *a = HDA_AUDIO(hda);
 
     if (!a->mixer) {
         return hda_audio_init(hda, &output_nomixemu);
@@ -624,7 +627,7 @@ static int hda_audio_init_output(HDACodecDevice *hda)
 
 static int hda_audio_init_duplex(HDACodecDevice *hda)
 {
-    HDAAudioState *a = DO_UPCAST(HDAAudioState, hda, hda);
+    HDAAudioState *a = HDA_AUDIO(hda);
 
     if (!a->mixer) {
         return hda_audio_init(hda, &duplex_nomixemu);
@@ -635,7 +638,7 @@ static int hda_audio_init_duplex(HDACodecDevice *hda)
 
 static int hda_audio_init_micro(HDACodecDevice *hda)
 {
-    HDAAudioState *a = DO_UPCAST(HDAAudioState, hda, hda);
+    HDAAudioState *a = HDA_AUDIO(hda);
 
     if (!a->mixer) {
         return hda_audio_init(hda, &micro_nomixemu);
@@ -644,25 +647,39 @@ static int hda_audio_init_micro(HDACodecDevice *hda)
     }
 }
 
-static void hda_audio_output_class_init(ObjectClass *klass, void *data)
+static void hda_audio_base_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     HDACodecDeviceClass *k = HDA_CODEC_DEVICE_CLASS(klass);
 
-    k->init = hda_audio_init_output;
     k->exit = hda_audio_exit;
     k->command = hda_audio_command;
     k->stream = hda_audio_stream;
     set_bit(DEVICE_CATEGORY_SOUND, dc->categories);
-    dc->desc = "HDA Audio Codec, output-only (line-out)";
     dc->reset = hda_audio_reset;
     dc->vmsd = &vmstate_hda_audio;
     dc->props = hda_audio_properties;
 }
 
+static const TypeInfo hda_audio_info = {
+    .name          = TYPE_HDA_AUDIO,
+    .parent        = TYPE_HDA_CODEC_DEVICE,
+    .class_init    = hda_audio_base_class_init,
+    .abstract      = true,
+};
+
+static void hda_audio_output_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    HDACodecDeviceClass *k = HDA_CODEC_DEVICE_CLASS(klass);
+
+    k->init = hda_audio_init_output;
+    dc->desc = "HDA Audio Codec, output-only (line-out)";
+}
+
 static const TypeInfo hda_audio_output_info = {
     .name          = "hda-output",
-    .parent        = TYPE_HDA_CODEC_DEVICE,
+    .parent        = TYPE_HDA_AUDIO,
     .instance_size = sizeof(HDAAudioState),
     .class_init    = hda_audio_output_class_init,
 };
@@ -673,19 +690,12 @@ static void hda_audio_duplex_class_init(ObjectClass *klass, void *data)
     HDACodecDeviceClass *k = HDA_CODEC_DEVICE_CLASS(klass);
 
     k->init = hda_audio_init_duplex;
-    k->exit = hda_audio_exit;
-    k->command = hda_audio_command;
-    k->stream = hda_audio_stream;
-    set_bit(DEVICE_CATEGORY_SOUND, dc->categories);
     dc->desc = "HDA Audio Codec, duplex (line-out, line-in)";
-    dc->reset = hda_audio_reset;
-    dc->vmsd = &vmstate_hda_audio;
-    dc->props = hda_audio_properties;
 }
 
 static const TypeInfo hda_audio_duplex_info = {
     .name          = "hda-duplex",
-    .parent        = TYPE_HDA_CODEC_DEVICE,
+    .parent        = TYPE_HDA_AUDIO,
     .instance_size = sizeof(HDAAudioState),
     .class_init    = hda_audio_duplex_class_init,
 };
@@ -696,25 +706,19 @@ static void hda_audio_micro_class_init(ObjectClass *klass, void *data)
     HDACodecDeviceClass *k = HDA_CODEC_DEVICE_CLASS(klass);
 
     k->init = hda_audio_init_micro;
-    k->exit = hda_audio_exit;
-    k->command = hda_audio_command;
-    k->stream = hda_audio_stream;
-    set_bit(DEVICE_CATEGORY_SOUND, dc->categories);
     dc->desc = "HDA Audio Codec, duplex (speaker, microphone)";
-    dc->reset = hda_audio_reset;
-    dc->vmsd = &vmstate_hda_audio;
-    dc->props = hda_audio_properties;
 }
 
 static const TypeInfo hda_audio_micro_info = {
     .name          = "hda-micro",
-    .parent        = TYPE_HDA_CODEC_DEVICE,
+    .parent        = TYPE_HDA_AUDIO,
     .instance_size = sizeof(HDAAudioState),
     .class_init    = hda_audio_micro_class_init,
 };
 
 static void hda_audio_register_types(void)
 {
+    type_register_static(&hda_audio_info);
     type_register_static(&hda_audio_output_info);
     type_register_static(&hda_audio_duplex_info);
     type_register_static(&hda_audio_micro_info);
diff --git a/hw/dma/pl330.c b/hw/dma/pl330.c
index 401399d330..608a58c47d 100644
--- a/hw/dma/pl330.c
+++ b/hw/dma/pl330.c
@@ -227,7 +227,8 @@ static const VMStateDescription vmstate_pl330_queue = {
 };
 
 struct PL330State {
-    SysBusDevice busdev;
+    SysBusDevice parent_obj;
+
     MemoryRegion iomem;
     qemu_irq irq_abort;
     qemu_irq *irq;
@@ -577,7 +578,7 @@ static inline void pl330_queue_remove_tagged(PL330Queue *s, uint8_t tag)
 
 static inline void pl330_fault(PL330Chan *ch, uint32_t flags)
 {
-    DB_PRINT("ch: %p, flags: %x\n", ch, flags);
+    DB_PRINT("ch: %p, flags: %" PRIx32 "\n", ch, flags);
     ch->fault_type |= flags;
     if (ch->state == pl330_chan_fault) {
         return;
@@ -600,10 +601,12 @@ static inline void pl330_fault(PL330Chan *ch, uint32_t flags)
  *   LEN - number of elements in ARGS array
  */
 
-static void pl330_dmaaddh(PL330Chan *ch, uint8_t opcode, uint8_t *args, int len)
+static void pl330_dmaadxh(PL330Chan *ch, uint8_t *args, bool ra, bool neg)
 {
-    uint16_t im = (((uint16_t)args[1]) << 8) | ((uint16_t)args[0]);
-    uint8_t ra = (opcode >> 1) & 1;
+    uint32_t im = (args[1] << 8) | args[0];
+    if (neg) {
+        im |= 0xffffu << 16;
+    }
 
     if (ch->is_manager) {
         pl330_fault(ch, PL330_FAULT_UNDEF_INSTR);
@@ -616,6 +619,16 @@ static void pl330_dmaaddh(PL330Chan *ch, uint8_t opcode, uint8_t *args, int len)
     }
 }
 
+static void pl330_dmaaddh(PL330Chan *ch, uint8_t opcode, uint8_t *args, int len)
+{
+    pl330_dmaadxh(ch, args, extract32(opcode, 1, 1), false);
+}
+
+static void pl330_dmaadnh(PL330Chan *ch, uint8_t opcode, uint8_t *args, int len)
+{
+    pl330_dmaadxh(ch, args, extract32(opcode, 1, 1), true);
+}
+
 static void pl330_dmaend(PL330Chan *ch, uint8_t opcode,
                          uint8_t *args, int len)
 {
@@ -723,7 +736,8 @@ static void pl330_dmald(PL330Chan *ch, uint8_t opcode, uint8_t *args, int len)
     ch->stall = pl330_queue_put_insn(&ch->parent->read_queue, ch->src,
                                     size, num, inc, 0, ch->tag);
     if (!ch->stall) {
-        DB_PRINT("channel:%d address:%08x size:%d num:%d %c\n",
+        DB_PRINT("channel:%" PRId8 " address:%08" PRIx32 " size:%" PRIx32
+                 " num:%" PRId32 " %c\n",
                  ch->tag, ch->src, size, num, inc ? 'Y' : 'N');
         ch->src += inc ? size * num - (ch->src & (size - 1)) : 0;
     }
@@ -868,9 +882,10 @@ static void pl330_dmasev(PL330Chan *ch, uint8_t opcode, uint8_t *args, int len)
     }
     if (ch->parent->inten & (1 << ev_id)) {
         ch->parent->int_status |= (1 << ev_id);
-        DB_PRINT("event interrupt raised %d\n", ev_id);
+        DB_PRINT("event interrupt raised %" PRId8 "\n", ev_id);
         qemu_irq_raise(ch->parent->irq[ev_id]);
     }
+    DB_PRINT("event raised %" PRId8 "\n", ev_id);
     ch->parent->ev_status |= (1 << ev_id);
 }
 
@@ -895,7 +910,8 @@ static void pl330_dmast(PL330Chan *ch, uint8_t opcode, uint8_t *args, int len)
     ch->stall = pl330_queue_put_insn(&ch->parent->write_queue, ch->dst,
                                     size, num, inc, 0, ch->tag);
     if (!ch->stall) {
-        DB_PRINT("channel:%d address:%08x size:%d num:%d %c\n",
+        DB_PRINT("channel:%" PRId8 " address:%08" PRIx32 " size:%" PRIx32
+                 " num:%" PRId32 " %c\n",
                  ch->tag, ch->dst, size, num, inc ? 'Y' : 'N');
         ch->dst += inc ? size * num - (ch->dst & (size - 1)) : 0;
     }
@@ -972,6 +988,7 @@ static void pl330_dmawfe(PL330Chan *ch, uint8_t opcode,
             }
         }
         ch->parent->ev_status &= ~(1 << ev_id);
+        DB_PRINT("event lowered %" PRIx8 "\n", ev_id);
     } else {
         ch->stall = 1;
     }
@@ -1037,6 +1054,7 @@ static void pl330_dmawmb(PL330Chan *ch, uint8_t opcode,
 /* NULL terminated array of the instruction descriptions. */
 static const PL330InsnDesc insn_desc[] = {
     { .opcode = 0x54, .opmask = 0xFD, .size = 3, .exec = pl330_dmaaddh, },
+    { .opcode = 0x5c, .opmask = 0xFD, .size = 3, .exec = pl330_dmaadnh, },
     { .opcode = 0x00, .opmask = 0xFF, .size = 1, .exec = pl330_dmaend, },
     { .opcode = 0x35, .opmask = 0xFF, .size = 2, .exec = pl330_dmaflushp, },
     { .opcode = 0xA0, .opmask = 0xFD, .size = 6, .exec = pl330_dmago, },
@@ -1108,7 +1126,6 @@ static int pl330_chan_exec(PL330Chan *ch)
             ch->state != pl330_chan_waiting_periph &&
             ch->state != pl330_chan_at_barrier &&
             ch->state != pl330_chan_waiting_event) {
-        DB_PRINT("%d\n", ch->state);
         return 0;
     }
     ch->stall = 0;
@@ -1155,7 +1172,7 @@ static int pl330_exec_cycle(PL330Chan *channel)
 
         dma_memory_read(&address_space_memory, q->addr, buf, len);
         if (PL330_ERR_DEBUG > 1) {
-            DB_PRINT("PL330 read from memory @%08x (size = %08x):\n",
+            DB_PRINT("PL330 read from memory @%08" PRIx32 " (size = %08x):\n",
                       q->addr, len);
             qemu_hexdump((char *)buf, stderr, "", len);
         }
@@ -1187,8 +1204,8 @@ static int pl330_exec_cycle(PL330Chan *channel)
         if (fifo_res == PL330_FIFO_OK || q->z) {
             dma_memory_write(&address_space_memory, q->addr, buf, len);
             if (PL330_ERR_DEBUG > 1) {
-                DB_PRINT("PL330 read from memory @%08x (size = %08x):\n",
-                         q->addr, len);
+                DB_PRINT("PL330 read from memory @%08" PRIx32
+                         " (size = %08x):\n", q->addr, len);
                 qemu_hexdump((char *)buf, stderr, "", len);
             }
             if (q->inc) {
@@ -1277,7 +1294,7 @@ static void pl330_debug_exec(PL330State *s)
     args[2] = (s->dbg[1] >>  8) & 0xff;
     args[3] = (s->dbg[1] >> 16) & 0xff;
     args[4] = (s->dbg[1] >> 24) & 0xff;
-    DB_PRINT("chan id: %d\n", chan_id);
+    DB_PRINT("chan id: %" PRIx8 "\n", chan_id);
     if (s->dbg[0] & 1) {
         ch = &s->chan[chan_id];
     } else {
@@ -1311,7 +1328,7 @@ static void pl330_iomem_write(void *opaque, hwaddr offset,
                               uint64_t value, unsigned size)
 {
     PL330State *s = (PL330State *) opaque;
-    uint32_t i;
+    int i;
 
     DB_PRINT("addr: %08x data: %08x\n", (unsigned)offset, (unsigned)value);
 
@@ -1467,8 +1484,8 @@ static inline uint32_t pl330_iomem_read_imp(void *opaque,
 static uint64_t pl330_iomem_read(void *opaque, hwaddr offset,
         unsigned size)
 {
-    int ret = pl330_iomem_read_imp(opaque, offset);
-    DB_PRINT("addr: %08x data: %08x\n", (unsigned)offset, ret);
+    uint32_t ret = pl330_iomem_read_imp(opaque, offset);
+    DB_PRINT("addr: %08" HWADDR_PRIx " data: %08" PRIx32 "\n", offset, ret);
     return ret;
 }
 
@@ -1554,7 +1571,7 @@ static void pl330_realize(DeviceState *dev, Error **errp)
         s->cfg[1] |= 5;
         break;
     default:
-        error_setg(errp, "Bad value for i-cache_len property: %d\n",
+        error_setg(errp, "Bad value for i-cache_len property: %" PRIx8 "\n",
                    s->i_cache_len);
         return;
     }
@@ -1589,7 +1606,7 @@ static void pl330_realize(DeviceState *dev, Error **errp)
         s->cfg[CFG_CRD] |= 0x4;
         break;
     default:
-        error_setg(errp, "Bad value for data_width property: %d\n",
+        error_setg(errp, "Bad value for data_width property: %" PRIx8 "\n",
                    s->data_width);
         return;
     }
@@ -1602,7 +1619,7 @@ static void pl330_realize(DeviceState *dev, Error **errp)
 
     pl330_queue_init(&s->read_queue, s->rd_q_dep, s);
     pl330_queue_init(&s->write_queue, s->wr_q_dep, s);
-    pl330_fifo_init(&s->fifo, s->data_buffer_dep);
+    pl330_fifo_init(&s->fifo, s->data_width / 4 * s->data_buffer_dep);
 }
 
 static Property pl330_properties[] = {
diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c
index 59a3da5a6b..100b6bf3de 100644
--- a/hw/intc/arm_gic_kvm.c
+++ b/hw/intc/arm_gic_kvm.c
@@ -3,6 +3,7 @@
  *
  * Copyright (c) 2012 Linaro Limited
  * Written by Peter Maydell
+ * Save/Restore logic added by Christoffer Dall.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -23,6 +24,20 @@
 #include "kvm_arm.h"
 #include "gic_internal.h"
 
+//#define DEBUG_GIC_KVM
+
+#ifdef DEBUG_GIC_KVM
+static const int debug_gic_kvm = 1;
+#else
+static const int debug_gic_kvm = 0;
+#endif
+
+#define DPRINTF(fmt, ...) do { \
+        if (debug_gic_kvm) { \
+            printf("arm_gic: " fmt , ## __VA_ARGS__); \
+        } \
+    } while (0)
+
 #define TYPE_KVM_ARM_GIC "kvm-arm-gic"
 #define KVM_ARM_GIC(obj) \
      OBJECT_CHECK(GICState, (obj), TYPE_KVM_ARM_GIC)
@@ -72,14 +87,419 @@ static void kvm_arm_gic_set_irq(void *opaque, int irq, int level)
     kvm_set_irq(kvm_state, kvm_irq, !!level);
 }
 
+static bool kvm_arm_gic_can_save_restore(GICState *s)
+{
+    return s->dev_fd >= 0;
+}
+
+static void kvm_gic_access(GICState *s, int group, int offset,
+                                   int cpu, uint32_t *val, bool write)
+{
+    struct kvm_device_attr attr;
+    int type;
+    int err;
+
+    cpu = cpu & 0xff;
+
+    attr.flags = 0;
+    attr.group = group;
+    attr.attr = (((uint64_t)cpu << KVM_DEV_ARM_VGIC_CPUID_SHIFT) &
+                 KVM_DEV_ARM_VGIC_CPUID_MASK) |
+                (((uint64_t)offset << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) &
+                 KVM_DEV_ARM_VGIC_OFFSET_MASK);
+    attr.addr = (uintptr_t)val;
+
+    if (write) {
+        type = KVM_SET_DEVICE_ATTR;
+    } else {
+        type = KVM_GET_DEVICE_ATTR;
+    }
+
+    err = kvm_device_ioctl(s->dev_fd, type, &attr);
+    if (err < 0) {
+        fprintf(stderr, "KVM_{SET/GET}_DEVICE_ATTR failed: %s\n",
+                strerror(-err));
+        abort();
+    }
+}
+
+static void kvm_gicd_access(GICState *s, int offset, int cpu,
+                            uint32_t *val, bool write)
+{
+    kvm_gic_access(s, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+                   offset, cpu, val, write);
+}
+
+static void kvm_gicc_access(GICState *s, int offset, int cpu,
+                            uint32_t *val, bool write)
+{
+    kvm_gic_access(s, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
+                   offset, cpu, val, write);
+}
+
+#define for_each_irq_reg(_ctr, _max_irq, _field_width) \
+    for (_ctr = 0; _ctr < ((_max_irq) / (32 / (_field_width))); _ctr++)
+
+/*
+ * Translate from the in-kernel field for an IRQ value to/from the qemu
+ * representation.
+ */
+typedef void (*vgic_translate_fn)(GICState *s, int irq, int cpu,
+                                  uint32_t *field, bool to_kernel);
+
+/* synthetic translate function used for clear/set registers to completely
+ * clear a setting using a clear-register before setting the remaing bits
+ * using a set-register */
+static void translate_clear(GICState *s, int irq, int cpu,
+                            uint32_t *field, bool to_kernel)
+{
+    if (to_kernel) {
+        *field = ~0;
+    } else {
+        /* does not make sense: qemu model doesn't use set/clear regs */
+        abort();
+    }
+}
+
+static void translate_enabled(GICState *s, int irq, int cpu,
+                              uint32_t *field, bool to_kernel)
+{
+    int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK;
+
+    if (to_kernel) {
+        *field = GIC_TEST_ENABLED(irq, cm);
+    } else {
+        if (*field & 1) {
+            GIC_SET_ENABLED(irq, cm);
+        }
+    }
+}
+
+static void translate_pending(GICState *s, int irq, int cpu,
+                              uint32_t *field, bool to_kernel)
+{
+    int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK;
+
+    if (to_kernel) {
+        *field = gic_test_pending(s, irq, cm);
+    } else {
+        if (*field & 1) {
+            GIC_SET_PENDING(irq, cm);
+            /* TODO: Capture is level-line is held high in the kernel */
+        }
+    }
+}
+
+static void translate_active(GICState *s, int irq, int cpu,
+                             uint32_t *field, bool to_kernel)
+{
+    int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK;
+
+    if (to_kernel) {
+        *field = GIC_TEST_ACTIVE(irq, cm);
+    } else {
+        if (*field & 1) {
+            GIC_SET_ACTIVE(irq, cm);
+        }
+    }
+}
+
+static void translate_trigger(GICState *s, int irq, int cpu,
+                              uint32_t *field, bool to_kernel)
+{
+    if (to_kernel) {
+        *field = (GIC_TEST_EDGE_TRIGGER(irq)) ? 0x2 : 0x0;
+    } else {
+        if (*field & 0x2) {
+            GIC_SET_EDGE_TRIGGER(irq);
+        }
+    }
+}
+
+static void translate_priority(GICState *s, int irq, int cpu,
+                               uint32_t *field, bool to_kernel)
+{
+    if (to_kernel) {
+        *field = GIC_GET_PRIORITY(irq, cpu) & 0xff;
+    } else {
+        gic_set_priority(s, cpu, irq, *field & 0xff);
+    }
+}
+
+static void translate_targets(GICState *s, int irq, int cpu,
+                              uint32_t *field, bool to_kernel)
+{
+    if (to_kernel) {
+        *field = s->irq_target[irq] & 0xff;
+    } else {
+        s->irq_target[irq] = *field & 0xff;
+    }
+}
+
+static void translate_sgisource(GICState *s, int irq, int cpu,
+                                uint32_t *field, bool to_kernel)
+{
+    if (to_kernel) {
+        *field = s->sgi_pending[irq][cpu] & 0xff;
+    } else {
+        s->sgi_pending[irq][cpu] = *field & 0xff;
+    }
+}
+
+/* Read a register group from the kernel VGIC */
+static void kvm_dist_get(GICState *s, uint32_t offset, int width,
+                         int maxirq, vgic_translate_fn translate_fn)
+{
+    uint32_t reg;
+    int i;
+    int j;
+    int irq;
+    int cpu;
+    int regsz = 32 / width; /* irqs per kernel register */
+    uint32_t field;
+
+    for_each_irq_reg(i, maxirq, width) {
+        irq = i * regsz;
+        cpu = 0;
+        while ((cpu < s->num_cpu && irq < GIC_INTERNAL) || cpu == 0) {
+            kvm_gicd_access(s, offset, cpu, &reg, false);
+            for (j = 0; j < regsz; j++) {
+                field = extract32(reg, j * width, width);
+                translate_fn(s, irq + j, cpu, &field, false);
+            }
+
+            cpu++;
+        }
+        offset += 4;
+    }
+}
+
+/* Write a register group to the kernel VGIC */
+static void kvm_dist_put(GICState *s, uint32_t offset, int width,
+                         int maxirq, vgic_translate_fn translate_fn)
+{
+    uint32_t reg;
+    int i;
+    int j;
+    int irq;
+    int cpu;
+    int regsz = 32 / width; /* irqs per kernel register */
+    uint32_t field;
+
+    for_each_irq_reg(i, maxirq, width) {
+        irq = i * regsz;
+        cpu = 0;
+        while ((cpu < s->num_cpu && irq < GIC_INTERNAL) || cpu == 0) {
+            reg = 0;
+            for (j = 0; j < regsz; j++) {
+                translate_fn(s, irq + j, cpu, &field, true);
+                reg = deposit32(reg, j * width, width, field);
+            }
+            kvm_gicd_access(s, offset, cpu, &reg, true);
+
+            cpu++;
+        }
+        offset += 4;
+    }
+}
+
 static void kvm_arm_gic_put(GICState *s)
 {
-    /* TODO: there isn't currently a kernel interface to set the GIC state */
+    uint32_t reg;
+    int i;
+    int cpu;
+    int num_cpu;
+    int num_irq;
+
+    if (!kvm_arm_gic_can_save_restore(s)) {
+            DPRINTF("Cannot put kernel gic state, no kernel interface");
+            return;
+    }
+
+    /* Note: We do the restore in a slightly different order than the save
+     * (where the order doesn't matter and is simply ordered according to the
+     * register offset values */
+
+    /*****************************************************************
+     * Distributor State
+     */
+
+    /* s->enabled -> GICD_CTLR */
+    reg = s->enabled;
+    kvm_gicd_access(s, 0x0, 0, &reg, true);
+
+    /* Sanity checking on GICD_TYPER and s->num_irq, s->num_cpu */
+    kvm_gicd_access(s, 0x4, 0, &reg, false);
+    num_irq = ((reg & 0x1f) + 1) * 32;
+    num_cpu = ((reg & 0xe0) >> 5) + 1;
+
+    if (num_irq < s->num_irq) {
+            fprintf(stderr, "Restoring %u IRQs, but kernel supports max %d\n",
+                    s->num_irq, num_irq);
+            abort();
+    } else if (num_cpu != s->num_cpu) {
+            fprintf(stderr, "Restoring %u CPU interfaces, kernel only has %d\n",
+                    s->num_cpu, num_cpu);
+            /* Did we not create the VCPUs in the kernel yet? */
+            abort();
+    }
+
+    /* TODO: Consider checking compatibility with the IIDR ? */
+
+    /* irq_state[n].enabled -> GICD_ISENABLERn */
+    kvm_dist_put(s, 0x180, 1, s->num_irq, translate_clear);
+    kvm_dist_put(s, 0x100, 1, s->num_irq, translate_enabled);
+
+    /* s->irq_target[irq] -> GICD_ITARGETSRn
+     * (restore targets before pending to ensure the pending state is set on
+     * the appropriate CPU interfaces in the kernel) */
+    kvm_dist_put(s, 0x800, 8, s->num_irq, translate_targets);
+
+    /* irq_state[n].pending + irq_state[n].level -> GICD_ISPENDRn */
+    kvm_dist_put(s, 0x280, 1, s->num_irq, translate_clear);
+    kvm_dist_put(s, 0x200, 1, s->num_irq, translate_pending);
+
+    /* irq_state[n].active -> GICD_ISACTIVERn */
+    kvm_dist_put(s, 0x380, 1, s->num_irq, translate_clear);
+    kvm_dist_put(s, 0x300, 1, s->num_irq, translate_active);
+
+    /* irq_state[n].trigger -> GICD_ICFRn */
+    kvm_dist_put(s, 0xc00, 2, s->num_irq, translate_trigger);
+
+    /* s->priorityX[irq] -> ICD_IPRIORITYRn */
+    kvm_dist_put(s, 0x400, 8, s->num_irq, translate_priority);
+
+    /* s->sgi_pending -> ICD_CPENDSGIRn */
+    kvm_dist_put(s, 0xf10, 8, GIC_NR_SGIS, translate_clear);
+    kvm_dist_put(s, 0xf20, 8, GIC_NR_SGIS, translate_sgisource);
+
+
+    /*****************************************************************
+     * CPU Interface(s) State
+     */
+
+    for (cpu = 0; cpu < s->num_cpu; cpu++) {
+        /* s->cpu_enabled[cpu] -> GICC_CTLR */
+        reg = s->cpu_enabled[cpu];
+        kvm_gicc_access(s, 0x00, cpu, &reg, true);
+
+        /* s->priority_mask[cpu] -> GICC_PMR */
+        reg = (s->priority_mask[cpu] & 0xff);
+        kvm_gicc_access(s, 0x04, cpu, &reg, true);
+
+        /* s->bpr[cpu] -> GICC_BPR */
+        reg = (s->bpr[cpu] & 0x7);
+        kvm_gicc_access(s, 0x08, cpu, &reg, true);
+
+        /* s->abpr[cpu] -> GICC_ABPR */
+        reg = (s->abpr[cpu] & 0x7);
+        kvm_gicc_access(s, 0x1c, cpu, &reg, true);
+
+        /* s->apr[n][cpu] -> GICC_APRn */
+        for (i = 0; i < 4; i++) {
+            reg = s->apr[i][cpu];
+            kvm_gicc_access(s, 0xd0 + i * 4, cpu, &reg, true);
+        }
+    }
 }
 
 static void kvm_arm_gic_get(GICState *s)
 {
-    /* TODO: there isn't currently a kernel interface to get the GIC state */
+    uint32_t reg;
+    int i;
+    int cpu;
+
+    if (!kvm_arm_gic_can_save_restore(s)) {
+            DPRINTF("Cannot get kernel gic state, no kernel interface");
+            return;
+    }
+
+    /*****************************************************************
+     * Distributor State
+     */
+
+    /* GICD_CTLR -> s->enabled */
+    kvm_gicd_access(s, 0x0, 0, &reg, false);
+    s->enabled = reg & 1;
+
+    /* Sanity checking on GICD_TYPER -> s->num_irq, s->num_cpu */
+    kvm_gicd_access(s, 0x4, 0, &reg, false);
+    s->num_irq = ((reg & 0x1f) + 1) * 32;
+    s->num_cpu = ((reg & 0xe0) >> 5) + 1;
+
+    if (s->num_irq > GIC_MAXIRQ) {
+            fprintf(stderr, "Too many IRQs reported from the kernel: %d\n",
+                    s->num_irq);
+            abort();
+    }
+
+    /* GICD_IIDR -> ? */
+    kvm_gicd_access(s, 0x8, 0, &reg, false);
+
+    /* Verify no GROUP 1 interrupts configured in the kernel */
+    for_each_irq_reg(i, s->num_irq, 1) {
+        kvm_gicd_access(s, 0x80 + (i * 4), 0, &reg, false);
+        if (reg != 0) {
+            fprintf(stderr, "Unsupported GICD_IGROUPRn value: %08x\n",
+                    reg);
+            abort();
+        }
+    }
+
+    /* Clear all the IRQ settings */
+    for (i = 0; i < s->num_irq; i++) {
+        memset(&s->irq_state[i], 0, sizeof(s->irq_state[0]));
+    }
+
+    /* GICD_ISENABLERn -> irq_state[n].enabled */
+    kvm_dist_get(s, 0x100, 1, s->num_irq, translate_enabled);
+
+    /* GICD_ISPENDRn -> irq_state[n].pending + irq_state[n].level */
+    kvm_dist_get(s, 0x200, 1, s->num_irq, translate_pending);
+
+    /* GICD_ISACTIVERn -> irq_state[n].active */
+    kvm_dist_get(s, 0x300, 1, s->num_irq, translate_active);
+
+    /* GICD_ICFRn -> irq_state[n].trigger */
+    kvm_dist_get(s, 0xc00, 2, s->num_irq, translate_trigger);
+
+    /* GICD_IPRIORITYRn -> s->priorityX[irq] */
+    kvm_dist_get(s, 0x400, 8, s->num_irq, translate_priority);
+
+    /* GICD_ITARGETSRn -> s->irq_target[irq] */
+    kvm_dist_get(s, 0x800, 8, s->num_irq, translate_targets);
+
+    /* GICD_CPENDSGIRn -> s->sgi_pending */
+    kvm_dist_get(s, 0xf10, 8, GIC_NR_SGIS, translate_sgisource);
+
+
+    /*****************************************************************
+     * CPU Interface(s) State
+     */
+
+    for (cpu = 0; cpu < s->num_cpu; cpu++) {
+        /* GICC_CTLR -> s->cpu_enabled[cpu] */
+        kvm_gicc_access(s, 0x00, cpu, &reg, false);
+        s->cpu_enabled[cpu] = (reg & 1);
+
+        /* GICC_PMR -> s->priority_mask[cpu] */
+        kvm_gicc_access(s, 0x04, cpu, &reg, false);
+        s->priority_mask[cpu] = (reg & 0xff);
+
+        /* GICC_BPR -> s->bpr[cpu] */
+        kvm_gicc_access(s, 0x08, cpu, &reg, false);
+        s->bpr[cpu] = (reg & 0x7);
+
+        /* GICC_ABPR -> s->abpr[cpu] */
+        kvm_gicc_access(s, 0x1c, cpu, &reg, false);
+        s->abpr[cpu] = (reg & 0x7);
+
+        /* GICC_APRn -> s->apr[n][cpu] */
+        for (i = 0; i < 4; i++) {
+            kvm_gicc_access(s, 0xd0 + i * 4, cpu, &reg, false);
+            s->apr[i][cpu] = reg;
+        }
+    }
 }
 
 static void kvm_arm_gic_reset(DeviceState *dev)
@@ -97,6 +517,7 @@ static void kvm_arm_gic_realize(DeviceState *dev, Error **errp)
     GICState *s = KVM_ARM_GIC(dev);
     SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
     KVMARMGICClass *kgc = KVM_ARM_GIC_GET_CLASS(s);
+    int ret;
 
     kgc->parent_realize(dev, errp);
     if (error_is_set(errp)) {
@@ -119,13 +540,27 @@ static void kvm_arm_gic_realize(DeviceState *dev, Error **errp)
     for (i = 0; i < s->num_cpu; i++) {
         sysbus_init_irq(sbd, &s->parent_irq[i]);
     }
+
+    /* Try to create the device via the device control API */
+    s->dev_fd = -1;
+    ret = kvm_create_device(kvm_state, KVM_DEV_TYPE_ARM_VGIC_V2, false);
+    if (ret >= 0) {
+        s->dev_fd = ret;
+    } else if (ret != -ENODEV && ret != -ENOTSUP) {
+        error_setg_errno(errp, -ret, "error creating in-kernel VGIC");
+        return;
+    }
+
     /* Distributor */
     memory_region_init_reservation(&s->iomem, OBJECT(s),
                                    "kvm-gic_dist", 0x1000);
     sysbus_init_mmio(sbd, &s->iomem);
     kvm_arm_register_device(&s->iomem,
                             (KVM_ARM_DEVICE_VGIC_V2 << KVM_ARM_DEVICE_ID_SHIFT)
-                            | KVM_VGIC_V2_ADDR_TYPE_DIST);
+                            | KVM_VGIC_V2_ADDR_TYPE_DIST,
+                            KVM_DEV_ARM_VGIC_GRP_ADDR,
+                            KVM_VGIC_V2_ADDR_TYPE_DIST,
+                            s->dev_fd);
     /* CPU interface for current core. Unlike arm_gic, we don't
      * provide the "interface for core #N" memory regions, because
      * cores with a VGIC don't have those.
@@ -135,7 +570,10 @@ static void kvm_arm_gic_realize(DeviceState *dev, Error **errp)
     sysbus_init_mmio(sbd, &s->cpuiomem[0]);
     kvm_arm_register_device(&s->cpuiomem[0],
                             (KVM_ARM_DEVICE_VGIC_V2 << KVM_ARM_DEVICE_ID_SHIFT)
-                            | KVM_VGIC_V2_ADDR_TYPE_CPU);
+                            | KVM_VGIC_V2_ADDR_TYPE_CPU,
+                            KVM_DEV_ARM_VGIC_GRP_ADDR,
+                            KVM_VGIC_V2_ADDR_TYPE_CPU,
+                            s->dev_fd);
 }
 
 static void kvm_arm_gic_class_init(ObjectClass *klass, void *data)
diff --git a/hw/intc/exynos4210_combiner.c b/hw/intc/exynos4210_combiner.c
index ef5e8eb22f..3287479456 100644
--- a/hw/intc/exynos4210_combiner.c
+++ b/hw/intc/exynos4210_combiner.c
@@ -418,7 +418,7 @@ static int exynos4210_combiner_init(SysBusDevice *sbd)
     qdev_init_gpio_in(dev, exynos4210_combiner_handler, IIC_NIRQ);
 
     /* Connect SysBusDev irqs to device specific irqs */
-    for (i = 0; i < IIC_NIRQ; i++) {
+    for (i = 0; i < IIC_NGRP; i++) {
         sysbus_init_irq(sbd, &s->output_irq[i]);
     }
 
diff --git a/hw/intc/gic_internal.h b/hw/intc/gic_internal.h
index 92a6f7a3ff..48a58d7890 100644
--- a/hw/intc/gic_internal.h
+++ b/hw/intc/gic_internal.h
@@ -40,7 +40,7 @@
 #define GIC_SET_MODEL(irq) s->irq_state[irq].model = true
 #define GIC_CLEAR_MODEL(irq) s->irq_state[irq].model = false
 #define GIC_TEST_MODEL(irq) s->irq_state[irq].model
-#define GIC_SET_LEVEL(irq, cm) s->irq_state[irq].level = (cm)
+#define GIC_SET_LEVEL(irq, cm) s->irq_state[irq].level |= (cm)
 #define GIC_CLEAR_LEVEL(irq, cm) s->irq_state[irq].level &= ~(cm)
 #define GIC_TEST_LEVEL(irq, cm) ((s->irq_state[irq].level & (cm)) != 0)
 #define GIC_SET_EDGE_TRIGGER(irq) s->irq_state[irq].edge_trigger = true
diff --git a/hw/misc/arm_sysctl.c b/hw/misc/arm_sysctl.c
index 0fc26d29a5..3fad6f86de 100644
--- a/hw/misc/arm_sysctl.c
+++ b/hw/misc/arm_sysctl.c
@@ -276,7 +276,7 @@ static bool vexpress_cfgctrl_read(arm_sysctl_state *s, unsigned int dcc,
         }
         break;
     case SYS_CFG_OSC:
-        if (site == SYS_CFG_SITE_MB && device < sizeof(s->mb_clock)) {
+        if (site == SYS_CFG_SITE_MB && device < ARRAY_SIZE(s->mb_clock)) {
             /* motherboard clock */
             *val = s->mb_clock[device];
             return true;
@@ -324,7 +324,7 @@ static bool vexpress_cfgctrl_write(arm_sysctl_state *s, unsigned int dcc,
 
     switch (function) {
     case SYS_CFG_OSC:
-        if (site == SYS_CFG_SITE_MB && device < sizeof(s->mb_clock)) {
+        if (site == SYS_CFG_SITE_MB && device < ARRAY_SIZE(s->mb_clock)) {
             /* motherboard clock */
             s->mb_clock[device] = val;
             return true;
diff --git a/hw/net/stellaris_enet.c b/hw/net/stellaris_enet.c
index 9dd77f7571..d04e6a46f8 100644
--- a/hw/net/stellaris_enet.c
+++ b/hw/net/stellaris_enet.c
@@ -176,7 +176,8 @@ static uint64_t stellaris_enet_read(void *opaque, hwaddr offset,
         return val;
     case 0x14: /* IA0 */
         return s->conf.macaddr.a[0] | (s->conf.macaddr.a[1] << 8)
-               | (s->conf.macaddr.a[2] << 16) | (s->conf.macaddr.a[3] << 24);
+            | (s->conf.macaddr.a[2] << 16)
+            | ((uint32_t)s->conf.macaddr.a[3] << 24);
     case 0x18: /* IA1 */
         return s->conf.macaddr.a[4] | (s->conf.macaddr.a[5] << 8);
     case 0x1c: /* THR */
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 854997d9ba..a1de2f43a0 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -106,7 +106,7 @@ struct vhost_net *vhost_net_init(NetClientState *backend, int devfd,
         goto fail;
     }
     net->nc = backend;
-    net->dev.backend_features = tap_has_vnet_hdr(backend) ? 0 :
+    net->dev.backend_features = qemu_has_vnet_hdr(backend) ? 0 :
         (1 << VHOST_NET_F_VIRTIO_NET_HDR);
     net->backend = r;
 
@@ -117,8 +117,8 @@ struct vhost_net *vhost_net_init(NetClientState *backend, int devfd,
     if (r < 0) {
         goto fail;
     }
-    if (!tap_has_vnet_hdr_len(backend,
-                              sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
+    if (!qemu_has_vnet_hdr_len(backend,
+                               sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
         net->dev.features &= ~(1 << VIRTIO_NET_F_MRG_RXBUF);
     }
     if (~net->dev.features & net->dev.backend_features) {
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 36266083b2..3c0342e17a 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -325,11 +325,7 @@ static void peer_test_vnet_hdr(VirtIONet *n)
         return;
     }
 
-    if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
-        return;
-    }
-
-    n->has_vnet_hdr = tap_has_vnet_hdr(nc->peer);
+    n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
 }
 
 static int peer_has_vnet_hdr(VirtIONet *n)
@@ -342,7 +338,7 @@ static int peer_has_ufo(VirtIONet *n)
     if (!peer_has_vnet_hdr(n))
         return 0;
 
-    n->has_ufo = tap_has_ufo(qemu_get_queue(n->nic)->peer);
+    n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
 
     return n->has_ufo;
 }
@@ -361,8 +357,8 @@ static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs)
         nc = qemu_get_subqueue(n->nic, i);
 
         if (peer_has_vnet_hdr(n) &&
-            tap_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
-            tap_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
+            qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
+            qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
             n->host_hdr_len = n->guest_hdr_len;
         }
     }
@@ -463,7 +459,7 @@ static uint32_t virtio_net_bad_features(VirtIODevice *vdev)
 
 static void virtio_net_apply_guest_offloads(VirtIONet *n)
 {
-    tap_set_offload(qemu_get_subqueue(n->nic, 0)->peer,
+    qemu_set_offload(qemu_get_queue(n->nic)->peer,
             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
@@ -1544,7 +1540,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
     peer_test_vnet_hdr(n);
     if (peer_has_vnet_hdr(n)) {
         for (i = 0; i < n->max_queues; i++) {
-            tap_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
+            qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
         }
         n->host_hdr_len = sizeof(struct virtio_net_hdr);
     } else {
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 19687aa03c..5be807ce82 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -1290,12 +1290,12 @@ static void vmxnet3_update_features(VMXNET3State *s)
               s->lro_supported, rxcso_supported,
               s->rx_vlan_stripping);
     if (s->peer_has_vhdr) {
-        tap_set_offload(qemu_get_queue(s->nic)->peer,
-                        rxcso_supported,
-                        s->lro_supported,
-                        s->lro_supported,
-                        0,
-                        0);
+        qemu_set_offload(qemu_get_queue(s->nic)->peer,
+                         rxcso_supported,
+                         s->lro_supported,
+                         s->lro_supported,
+                         0,
+                         0);
     }
 }
 
@@ -1883,11 +1883,9 @@ static NetClientInfo net_vmxnet3_info = {
 
 static bool vmxnet3_peer_has_vnet_hdr(VMXNET3State *s)
 {
-    NetClientState *peer = qemu_get_queue(s->nic)->peer;
+    NetClientState *nc = qemu_get_queue(s->nic);
 
-    if ((NULL != peer)                              &&
-        (peer->info->type == NET_CLIENT_OPTIONS_KIND_TAP)   &&
-        tap_has_vnet_hdr(peer)) {
+    if (qemu_has_vnet_hdr(nc->peer)) {
         return true;
     }
 
@@ -1935,10 +1933,10 @@ static void vmxnet3_net_init(VMXNET3State *s)
     s->lro_supported = false;
 
     if (s->peer_has_vhdr) {
-        tap_set_vnet_hdr_len(qemu_get_queue(s->nic)->peer,
+        qemu_set_vnet_hdr_len(qemu_get_queue(s->nic)->peer,
             sizeof(struct virtio_net_hdr));
 
-        tap_using_vnet_hdr(qemu_get_queue(s->nic)->peer, 1);
+        qemu_using_vnet_hdr(qemu_get_queue(s->nic)->peer, 1);
     }
 
     qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
diff --git a/hw/timer/arm_timer.c b/hw/timer/arm_timer.c
index a47afde23a..fb0a45c889 100644
--- a/hw/timer/arm_timer.c
+++ b/hw/timer/arm_timer.c
@@ -320,6 +320,7 @@ static uint64_t icp_pit_read(void *opaque, hwaddr offset,
     n = offset >> 8;
     if (n > 2) {
         qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad timer %d\n", __func__, n);
+        return 0;
     }
 
     return arm_timer_read(s->timer[n], offset & 0xff);
@@ -334,6 +335,7 @@ static void icp_pit_write(void *opaque, hwaddr offset,
     n = offset >> 8;
     if (n > 2) {
         qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad timer %d\n", __func__, n);
+        return;
     }
 
     arm_timer_write(s->timer[n], offset & 0xff, value);
diff --git a/include/hw/intc/arm_gic_common.h b/include/hw/intc/arm_gic_common.h
index 89384c2bb4..f6887ed92b 100644
--- a/include/hw/intc/arm_gic_common.h
+++ b/include/hw/intc/arm_gic_common.h
@@ -104,6 +104,7 @@ typedef struct GICState {
     MemoryRegion cpuiomem[GIC_NCPU + 1]; /* CPU interfaces */
     uint32_t num_irq;
     uint32_t revision;
+    int dev_fd; /* kvm device fd if backed by kvm vgic support */
 } GICState;
 
 #define TYPE_ARM_GIC_COMMON "arm_gic_common"
diff --git a/include/migration/page_cache.h b/include/migration/page_cache.h
index d156f0d398..2d5ce2dd7a 100644
--- a/include/migration/page_cache.h
+++ b/include/migration/page_cache.h
@@ -66,7 +66,7 @@ uint8_t *get_cached_data(const PageCache *cache, uint64_t addr);
  * @addr: page address
  * @pdata: pointer to the page
  */
-int cache_insert(PageCache *cache, uint64_t addr, uint8_t *pdata);
+int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata);
 
 /**
  * cache_resize: resize the page cache. In case of size reduction the extra
diff --git a/include/net/net.h b/include/net/net.h
index 11e146888b..8166345a13 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -50,6 +50,12 @@ typedef void (NetCleanup) (NetClientState *);
 typedef void (LinkStatusChanged)(NetClientState *);
 typedef void (NetClientDestructor)(NetClientState *);
 typedef RxFilterInfo *(QueryRxFilter)(NetClientState *);
+typedef bool (HasUfo)(NetClientState *);
+typedef bool (HasVnetHdr)(NetClientState *);
+typedef bool (HasVnetHdrLen)(NetClientState *, int);
+typedef void (UsingVnetHdr)(NetClientState *, bool);
+typedef void (SetOffload)(NetClientState *, int, int, int, int, int);
+typedef void (SetVnetHdrLen)(NetClientState *, int);
 
 typedef struct NetClientInfo {
     NetClientOptionsKind type;
@@ -62,6 +68,12 @@ typedef struct NetClientInfo {
     LinkStatusChanged *link_status_changed;
     QueryRxFilter *query_rx_filter;
     NetPoll *poll;
+    HasUfo *has_ufo;
+    HasVnetHdr *has_vnet_hdr;
+    HasVnetHdrLen *has_vnet_hdr_len;
+    UsingVnetHdr *using_vnet_hdr;
+    SetOffload *set_offload;
+    SetVnetHdrLen *set_vnet_hdr_len;
 } NetClientInfo;
 
 struct NetClientState {
@@ -120,6 +132,13 @@ ssize_t qemu_send_packet_async(NetClientState *nc, const uint8_t *buf,
 void qemu_purge_queued_packets(NetClientState *nc);
 void qemu_flush_queued_packets(NetClientState *nc);
 void qemu_format_nic_info_str(NetClientState *nc, uint8_t macaddr[6]);
+bool qemu_has_ufo(NetClientState *nc);
+bool qemu_has_vnet_hdr(NetClientState *nc);
+bool qemu_has_vnet_hdr_len(NetClientState *nc, int len);
+void qemu_using_vnet_hdr(NetClientState *nc, bool enable);
+void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
+                      int ecn, int ufo);
+void qemu_set_vnet_hdr_len(NetClientState *nc, int len);
 void qemu_macaddr_default_if_unset(MACAddr *macaddr);
 int qemu_show_nic_models(const char *arg, const char *const *models);
 void qemu_check_nic_model(NICInfo *nd, const char *model);
diff --git a/include/net/tap.h b/include/net/tap.h
index a994f20447..6daeb42b0f 100644
--- a/include/net/tap.h
+++ b/include/net/tap.h
@@ -29,12 +29,6 @@
 #include "qemu-common.h"
 #include "qapi-types.h"
 
-bool tap_has_ufo(NetClientState *nc);
-int tap_has_vnet_hdr(NetClientState *nc);
-int tap_has_vnet_hdr_len(NetClientState *nc, int len);
-void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr);
-void tap_set_offload(NetClientState *nc, int csum, int tso4, int tso6, int ecn, int ufo);
-void tap_set_vnet_hdr_len(NetClientState *nc, int len);
 int tap_enable(NetClientState *nc);
 int tap_disable(NetClientState *nc);
 
diff --git a/include/qemu/crc32c.h b/include/qemu/crc32c.h
index 56d1c3bfde..dafb6a1ada 100644
--- a/include/qemu/crc32c.h
+++ b/include/qemu/crc32c.h
@@ -25,8 +25,8 @@
  *
  */
 
-#ifndef QEMU_CRC32_H
-#define QEMU_CRC32_H
+#ifndef QEMU_CRC32C_H
+#define QEMU_CRC32C_H
 
 #include "qemu-common.h"
 
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 3b25f27a7c..a02d67cd5a 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -194,6 +194,28 @@ int kvm_vm_ioctl(KVMState *s, int type, ...);
 
 int kvm_vcpu_ioctl(CPUState *cpu, int type, ...);
 
+/**
+ * kvm_device_ioctl - call an ioctl on a kvm device
+ * @fd: The KVM device file descriptor as returned from KVM_CREATE_DEVICE
+ * @type: The device-ctrl ioctl number
+ *
+ * Returns: -errno on error, nonnegative on success
+ */
+int kvm_device_ioctl(int fd, int type, ...);
+
+/**
+ * kvm_create_device - create a KVM device for the device control API
+ * @KVMState: The KVMState pointer
+ * @type: The KVM device type (see Documentation/virtual/kvm/devices in the
+ *        kernel source)
+ * @test: If true, only test if device can be created, but don't actually
+ *        create the device.
+ *
+ * Returns: -errno on error, nonnegative on success: @test ? 0 : device fd;
+ */
+int kvm_create_device(KVMState *s, uint64_t type, bool test);
+
+
 /* Arch specific hooks */
 
 extern const KVMCapabilityInfo kvm_arch_required_capabilities[];
@@ -319,4 +341,16 @@ int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, int virq);
 void kvm_pc_gsi_handler(void *opaque, int n, int level);
 void kvm_pc_setup_irq_routing(bool pci_enabled);
 void kvm_init_irq_routing(KVMState *s);
+
+/**
+ * kvm_arch_irqchip_create:
+ * @KVMState: The KVMState pointer
+ *
+ * Allow architectures to create an in-kernel irq chip themselves.
+ *
+ * Returns: < 0: error
+ *            0: irq chip was not created
+ *          > 0: irq chip was created
+ */
+int kvm_arch_irqchip_create(KVMState *s);
 #endif
diff --git a/kvm-all.c b/kvm-all.c
index 2ca9143beb..290e726cfe 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1298,10 +1298,17 @@ static int kvm_irqchip_create(KVMState *s)
         return 0;
     }
 
-    ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP);
+    /* First probe and see if there's a arch-specific hook to create the
+     * in-kernel irqchip for us */
+    ret = kvm_arch_irqchip_create(s);
     if (ret < 0) {
-        fprintf(stderr, "Create kernel irqchip failed\n");
         return ret;
+    } else if (ret == 0) {
+        ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP);
+        if (ret < 0) {
+            fprintf(stderr, "Create kernel irqchip failed\n");
+            return ret;
+        }
     }
 
     kvm_kernel_irqchip = true;
@@ -1777,6 +1784,24 @@ int kvm_vcpu_ioctl(CPUState *cpu, int type, ...)
     return ret;
 }
 
+int kvm_device_ioctl(int fd, int type, ...)
+{
+    int ret;
+    void *arg;
+    va_list ap;
+
+    va_start(ap, type);
+    arg = va_arg(ap, void *);
+    va_end(ap);
+
+    trace_kvm_device_ioctl(fd, type, arg);
+    ret = ioctl(fd, type, arg);
+    if (ret == -1) {
+        ret = -errno;
+    }
+    return ret;
+}
+
 int kvm_has_sync_mmu(void)
 {
     return kvm_check_extension(kvm_state, KVM_CAP_SYNC_MMU);
@@ -2058,3 +2083,24 @@ int kvm_on_sigbus(int code, void *addr)
 {
     return kvm_arch_on_sigbus(code, addr);
 }
+
+int kvm_create_device(KVMState *s, uint64_t type, bool test)
+{
+    int ret;
+    struct kvm_create_device create_dev;
+
+    create_dev.type = type;
+    create_dev.fd = -1;
+    create_dev.flags = test ? KVM_CREATE_DEVICE_TEST : 0;
+
+    if (!kvm_check_extension(s, KVM_CAP_DEVICE_CTRL)) {
+        return -ENOTSUP;
+    }
+
+    ret = kvm_vm_ioctl(s, KVM_CREATE_DEVICE, &create_dev);
+    if (ret) {
+        return ret;
+    }
+
+    return test ? 0 : create_dev.fd;
+}
diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h
index c498b60c05..ef0c8785ba 100644
--- a/linux-headers/asm-arm/kvm.h
+++ b/linux-headers/asm-arm/kvm.h
@@ -119,6 +119,26 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_32_CRN_MASK		0x0000000000007800
 #define KVM_REG_ARM_32_CRN_SHIFT	11
 
+#define ARM_CP15_REG_SHIFT_MASK(x,n) \
+	(((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK)
+
+#define __ARM_CP15_REG(op1,crn,crm,op2) \
+	(KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT) | \
+	ARM_CP15_REG_SHIFT_MASK(op1, OPC1) | \
+	ARM_CP15_REG_SHIFT_MASK(crn, 32_CRN) | \
+	ARM_CP15_REG_SHIFT_MASK(crm, CRM) | \
+	ARM_CP15_REG_SHIFT_MASK(op2, 32_OPC2))
+
+#define ARM_CP15_REG32(...) (__ARM_CP15_REG(__VA_ARGS__) | KVM_REG_SIZE_U32)
+
+#define __ARM_CP15_REG64(op1,crm) \
+	(__ARM_CP15_REG(op1, 0, crm, 0) | KVM_REG_SIZE_U64)
+#define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__)
+
+#define KVM_REG_ARM_TIMER_CTL		ARM_CP15_REG32(0, 14, 3, 1)
+#define KVM_REG_ARM_TIMER_CNT		ARM_CP15_REG64(1, 14) 
+#define KVM_REG_ARM_TIMER_CVAL		ARM_CP15_REG64(3, 14) 
+
 /* Normal registers are mapped as coprocessor 16. */
 #define KVM_REG_ARM_CORE		(0x0010 << KVM_REG_ARM_COPROC_SHIFT)
 #define KVM_REG_ARM_CORE_REG(name)	(offsetof(struct kvm_regs, name) / 4)
@@ -143,6 +163,14 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_VFP_FPINST		0x1009
 #define KVM_REG_ARM_VFP_FPINST2		0x100A
 
+/* Device Control API: ARM VGIC */
+#define KVM_DEV_ARM_VGIC_GRP_ADDR	0
+#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS	1
+#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS	2
+#define   KVM_DEV_ARM_VGIC_CPUID_SHIFT	32
+#define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
+#define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h
index 5031f42639..eaf54a30be 100644
--- a/linux-headers/asm-arm64/kvm.h
+++ b/linux-headers/asm-arm64/kvm.h
@@ -55,8 +55,9 @@ struct kvm_regs {
 #define KVM_ARM_TARGET_AEM_V8		0
 #define KVM_ARM_TARGET_FOUNDATION_V8	1
 #define KVM_ARM_TARGET_CORTEX_A57	2
+#define KVM_ARM_TARGET_XGENE_POTENZA	3
 
-#define KVM_ARM_NUM_TARGETS		3
+#define KVM_ARM_NUM_TARGETS		4
 
 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
 #define KVM_ARM_DEVICE_TYPE_SHIFT	0
@@ -129,6 +130,33 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM64_SYSREG_OP2_MASK	0x0000000000000007
 #define KVM_REG_ARM64_SYSREG_OP2_SHIFT	0
 
+#define ARM64_SYS_REG_SHIFT_MASK(x,n) \
+	(((x) << KVM_REG_ARM64_SYSREG_ ## n ## _SHIFT) & \
+	KVM_REG_ARM64_SYSREG_ ## n ## _MASK)
+
+#define __ARM64_SYS_REG(op0,op1,crn,crm,op2) \
+	(KVM_REG_ARM64 | KVM_REG_ARM64_SYSREG | \
+	ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
+	ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \
+	ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \
+	ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \
+	ARM64_SYS_REG_SHIFT_MASK(op2, OP2))
+
+#define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_SIZE_U64)
+
+#define KVM_REG_ARM_TIMER_CTL		ARM64_SYS_REG(3, 3, 14, 3, 1)
+#define KVM_REG_ARM_TIMER_CNT		ARM64_SYS_REG(3, 3, 14, 3, 2)
+#define KVM_REG_ARM_TIMER_CVAL		ARM64_SYS_REG(3, 3, 14, 0, 2)
+
+/* Device Control API: ARM VGIC */
+#define KVM_DEV_ARM_VGIC_GRP_ADDR	0
+#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS	1
+#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS	2
+#define   KVM_DEV_ARM_VGIC_CPUID_SHIFT	32
+#define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
+#define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
 #define KVM_ARM_IRQ_TYPE_MASK		0xff
diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h
index 6836ec79a8..a6665be4f3 100644
--- a/linux-headers/asm-powerpc/kvm.h
+++ b/linux-headers/asm-powerpc/kvm.h
@@ -545,6 +545,7 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_TCSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb1)
 #define KVM_REG_PPC_PID		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb2)
 #define KVM_REG_PPC_ACOP	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb3)
+#define KVM_REG_PPC_WORT	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb4)
 
 #define KVM_REG_PPC_VRSAVE	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb4)
 #define KVM_REG_PPC_LPCR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb5)
@@ -553,6 +554,8 @@ struct kvm_get_htab_header {
 /* Architecture compatibility level */
 #define KVM_REG_PPC_ARCH_COMPAT	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb7)
 
+#define KVM_REG_PPC_DABRX	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8)
+
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
  */
diff --git a/linux-headers/asm-x86/hyperv.h b/linux-headers/asm-x86/hyperv.h
index 3b400ee9f7..462efe746d 100644
--- a/linux-headers/asm-x86/hyperv.h
+++ b/linux-headers/asm-x86/hyperv.h
@@ -28,6 +28,9 @@
 /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
 #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
 
+/* A partition's reference time stamp counter (TSC) page */
+#define HV_X64_MSR_REFERENCE_TSC		0x40000021
+
 /*
  * There is a single feature flag that signifies the presence of the MSR
  * that can be used to retrieve both the local APIC Timer frequency as
@@ -149,9 +152,6 @@
 /* MSR used to read the per-partition time reference counter */
 #define HV_X64_MSR_TIME_REF_COUNT		0x40000020
 
-/* A partition's reference time stamp counter (TSC) page */
-#define HV_X64_MSR_REFERENCE_TSC		0x40000021
-
 /* MSR used to retrieve the TSC frequency */
 #define HV_X64_MSR_TSC_FREQUENCY		0x40000022
 
@@ -201,6 +201,9 @@
 #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
 		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
 
+#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
+#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
+
 #define HV_PROCESSOR_POWER_STATE_C0		0
 #define HV_PROCESSOR_POWER_STATE_C1		1
 #define HV_PROCESSOR_POWER_STATE_C2		2
@@ -213,4 +216,11 @@
 #define HV_STATUS_INVALID_ALIGNMENT		4
 #define HV_STATUS_INSUFFICIENT_BUFFERS		19
 
+typedef struct _HV_REFERENCE_TSC_PAGE {
+	__u32 tsc_sequence;
+	__u32 res1;
+	__u64 tsc_scale;
+	__s64 tsc_offset;
+} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
+
 #endif
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 999fb135e1..77ad35cd67 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -854,6 +854,7 @@ struct kvm_device_attr {
 #define  KVM_DEV_VFIO_GROUP			1
 #define   KVM_DEV_VFIO_GROUP_ADD			1
 #define   KVM_DEV_VFIO_GROUP_DEL			2
+#define KVM_DEV_TYPE_ARM_VGIC_V2	5
 
 /*
  * ioctls for VM fds
diff --git a/migration-rdma.c b/migration-rdma.c
index f94f3b4e3a..eeb4302215 100644
--- a/migration-rdma.c
+++ b/migration-rdma.c
@@ -3412,7 +3412,7 @@ void rdma_start_outgoing_migration(void *opaque,
     }
 
     ret = qemu_rdma_source_init(rdma, &local_err,
-        s->enabled_capabilities[MIGRATION_CAPABILITY_X_RDMA_PIN_ALL]);
+        s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL]);
 
     if (ret) {
         goto err;
diff --git a/migration.c b/migration.c
index 25add6f9e2..14235b280a 100644
--- a/migration.c
+++ b/migration.c
@@ -82,7 +82,7 @@ void qemu_start_incoming_migration(const char *uri, Error **errp)
     if (strstart(uri, "tcp:", &p))
         tcp_start_incoming_migration(p, errp);
 #ifdef CONFIG_RDMA
-    else if (strstart(uri, "x-rdma:", &p))
+    else if (strstart(uri, "rdma:", &p))
         rdma_start_incoming_migration(p, errp);
 #endif
 #if !defined(WIN32)
@@ -438,7 +438,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
     if (strstart(uri, "tcp:", &p)) {
         tcp_start_outgoing_migration(s, p, &local_err);
 #ifdef CONFIG_RDMA
-    } else if (strstart(uri, "x-rdma:", &p)) {
+    } else if (strstart(uri, "rdma:", &p)) {
         rdma_start_outgoing_migration(s, p, &local_err);
 #endif
 #if !defined(WIN32)
@@ -532,7 +532,7 @@ bool migrate_rdma_pin_all(void)
 
     s = migrate_get_current();
 
-    return s->enabled_capabilities[MIGRATION_CAPABILITY_X_RDMA_PIN_ALL];
+    return s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL];
 }
 
 bool migrate_auto_converge(void)
diff --git a/net/net.c b/net/net.c
index 41b38830ea..e3ef1e4f1d 100644
--- a/net/net.c
+++ b/net/net.c
@@ -378,6 +378,61 @@ void qemu_foreach_nic(qemu_nic_foreach func, void *opaque)
     }
 }
 
+bool qemu_has_ufo(NetClientState *nc)
+{
+    if (!nc || !nc->info->has_ufo) {
+        return false;
+    }
+
+    return nc->info->has_ufo(nc);
+}
+
+bool qemu_has_vnet_hdr(NetClientState *nc)
+{
+    if (!nc || !nc->info->has_vnet_hdr) {
+        return false;
+    }
+
+    return nc->info->has_vnet_hdr(nc);
+}
+
+bool qemu_has_vnet_hdr_len(NetClientState *nc, int len)
+{
+    if (!nc || !nc->info->has_vnet_hdr_len) {
+        return false;
+    }
+
+    return nc->info->has_vnet_hdr_len(nc, len);
+}
+
+void qemu_using_vnet_hdr(NetClientState *nc, bool enable)
+{
+    if (!nc || !nc->info->using_vnet_hdr) {
+        return;
+    }
+
+    nc->info->using_vnet_hdr(nc, enable);
+}
+
+void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
+                          int ecn, int ufo)
+{
+    if (!nc || !nc->info->set_offload) {
+        return;
+    }
+
+    nc->info->set_offload(nc, csum, tso4, tso6, ecn, ufo);
+}
+
+void qemu_set_vnet_hdr_len(NetClientState *nc, int len)
+{
+    if (!nc || !nc->info->set_vnet_hdr_len) {
+        return;
+    }
+
+    nc->info->set_vnet_hdr_len(nc, len);
+}
+
 int qemu_can_send_packet(NetClientState *sender)
 {
     if (!sender->peer) {
diff --git a/net/netmap.c b/net/netmap.c
index 0ccc4976b5..8213304a5b 100644
--- a/net/netmap.c
+++ b/net/netmap.c
@@ -27,10 +27,13 @@
 #include <net/if.h>
 #include <sys/mman.h>
 #include <stdint.h>
+#include <stdio.h>
+#define NETMAP_WITH_LIBS
 #include <net/netmap.h>
 #include <net/netmap_user.h>
 
 #include "net/net.h"
+#include "net/tap.h"
 #include "clients.h"
 #include "sysemu/sysemu.h"
 #include "qemu/error-report.h"
@@ -54,33 +57,9 @@ typedef struct NetmapState {
     bool                read_poll;
     bool                write_poll;
     struct iovec        iov[IOV_MAX];
+    int                 vnet_hdr_len;  /* Current virtio-net header length. */
 } NetmapState;
 
-#define D(format, ...)                                          \
-    do {                                                        \
-        struct timeval __xxts;                                  \
-        gettimeofday(&__xxts, NULL);                            \
-        printf("%03d.%06d %s [%d] " format "\n",                \
-                (int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec, \
-                __func__, __LINE__, ##__VA_ARGS__);         \
-    } while (0)
-
-/* Rate limited version of "D", lps indicates how many per second */
-#define RD(lps, format, ...)                                    \
-    do {                                                        \
-        static int t0, __cnt;                                   \
-        struct timeval __xxts;                                  \
-        gettimeofday(&__xxts, NULL);                            \
-        if (t0 != __xxts.tv_sec) {                              \
-            t0 = __xxts.tv_sec;                                 \
-            __cnt = 0;                                          \
-        }                                                       \
-        if (__cnt++ < lps) {                                    \
-            D(format, ##__VA_ARGS__);                           \
-        }                                                       \
-    } while (0)
-
-
 #ifndef __FreeBSD__
 #define pkt_copy bcopy
 #else
@@ -237,7 +216,7 @@ static ssize_t netmap_receive(NetClientState *nc,
         return size;
     }
 
-    if (ring->avail == 0) {
+    if (nm_ring_empty(ring)) {
         /* No available slots in the netmap TX ring. */
         netmap_write_poll(s, true);
         return 0;
@@ -250,8 +229,7 @@ static ssize_t netmap_receive(NetClientState *nc,
     ring->slot[i].len = size;
     ring->slot[i].flags = 0;
     pkt_copy(buf, dst, size);
-    ring->cur = NETMAP_RING_NEXT(ring, i);
-    ring->avail--;
+    ring->cur = ring->head = nm_ring_next(ring, i);
     ioctl(s->me.fd, NIOCTXSYNC, NULL);
 
     return size;
@@ -267,17 +245,15 @@ static ssize_t netmap_receive_iov(NetClientState *nc,
     uint8_t *dst;
     int j;
     uint32_t i;
-    uint32_t avail;
 
     if (unlikely(!ring)) {
         /* Drop the packet. */
         return iov_size(iov, iovcnt);
     }
 
-    i = ring->cur;
-    avail = ring->avail;
+    last = i = ring->cur;
 
-    if (avail < iovcnt) {
+    if (nm_ring_space(ring) < iovcnt) {
         /* Not enough netmap slots. */
         netmap_write_poll(s, true);
         return 0;
@@ -293,7 +269,7 @@ static ssize_t netmap_receive_iov(NetClientState *nc,
         while (iov_frag_size) {
             nm_frag_size = MIN(iov_frag_size, ring->nr_buf_size);
 
-            if (unlikely(avail == 0)) {
+            if (unlikely(nm_ring_empty(ring))) {
                 /* We run out of netmap slots while splitting the
                    iovec fragments. */
                 netmap_write_poll(s, true);
@@ -308,8 +284,7 @@ static ssize_t netmap_receive_iov(NetClientState *nc,
             pkt_copy(iov[j].iov_base + offset, dst, nm_frag_size);
 
             last = i;
-            i = NETMAP_RING_NEXT(ring, i);
-            avail--;
+            i = nm_ring_next(ring, i);
 
             offset += nm_frag_size;
             iov_frag_size -= nm_frag_size;
@@ -318,9 +293,8 @@ static ssize_t netmap_receive_iov(NetClientState *nc,
     /* The last slot must not have NS_MOREFRAG set. */
     ring->slot[last].flags &= ~NS_MOREFRAG;
 
-    /* Now update ring->cur and ring->avail. */
-    ring->cur = i;
-    ring->avail = avail;
+    /* Now update ring->cur and ring->head. */
+    ring->cur = ring->head = i;
 
     ioctl(s->me.fd, NIOCTXSYNC, NULL);
 
@@ -343,7 +317,7 @@ static void netmap_send(void *opaque)
 
     /* Keep sending while there are available packets into the netmap
        RX ring and the forwarding path towards the peer is open. */
-    while (ring->avail > 0 && qemu_can_send_packet(&s->nc)) {
+    while (!nm_ring_empty(ring) && qemu_can_send_packet(&s->nc)) {
         uint32_t i;
         uint32_t idx;
         bool morefrag;
@@ -358,11 +332,10 @@ static void netmap_send(void *opaque)
             s->iov[iovcnt].iov_len = ring->slot[i].len;
             iovcnt++;
 
-            ring->cur = NETMAP_RING_NEXT(ring, i);
-            ring->avail--;
-        } while (ring->avail && morefrag);
+            ring->cur = ring->head = nm_ring_next(ring, i);
+        } while (!nm_ring_empty(ring) && morefrag);
 
-        if (unlikely(!ring->avail && morefrag)) {
+        if (unlikely(nm_ring_empty(ring) && morefrag)) {
             RD(5, "[netmap_send] ran out of slots, with a pending"
                    "incomplete packet\n");
         }
@@ -394,6 +367,63 @@ static void netmap_cleanup(NetClientState *nc)
     s->me.fd = -1;
 }
 
+/* Offloading manipulation support callbacks. */
+static bool netmap_has_ufo(NetClientState *nc)
+{
+    return true;
+}
+
+static bool netmap_has_vnet_hdr(NetClientState *nc)
+{
+    return true;
+}
+
+static bool netmap_has_vnet_hdr_len(NetClientState *nc, int len)
+{
+    return len == 0 || len == sizeof(struct virtio_net_hdr) ||
+                len == sizeof(struct virtio_net_hdr_mrg_rxbuf);
+}
+
+static void netmap_using_vnet_hdr(NetClientState *nc, bool enable)
+{
+}
+
+static void netmap_set_vnet_hdr_len(NetClientState *nc, int len)
+{
+    NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
+    int err;
+    struct nmreq req;
+
+    /* Issue a NETMAP_BDG_VNET_HDR command to change the virtio-net header
+     * length for the netmap adapter associated to 'me->ifname'.
+     */
+    memset(&req, 0, sizeof(req));
+    pstrcpy(req.nr_name, sizeof(req.nr_name), s->me.ifname);
+    req.nr_version = NETMAP_API;
+    req.nr_cmd = NETMAP_BDG_VNET_HDR;
+    req.nr_arg1 = len;
+    err = ioctl(s->me.fd, NIOCREGIF, &req);
+    if (err) {
+        error_report("Unable to execute NETMAP_BDG_VNET_HDR on %s: %s",
+                     s->me.ifname, strerror(errno));
+    } else {
+        /* Keep track of the current length. */
+        s->vnet_hdr_len = len;
+    }
+}
+
+static void netmap_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
+                               int ecn, int ufo)
+{
+    NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
+
+    /* Setting a virtio-net header length greater than zero automatically
+     * enables the offloadings.
+     */
+    if (!s->vnet_hdr_len) {
+        netmap_set_vnet_hdr_len(nc, sizeof(struct virtio_net_hdr));
+    }
+}
 
 /* NetClientInfo methods */
 static NetClientInfo net_netmap_info = {
@@ -403,6 +433,12 @@ static NetClientInfo net_netmap_info = {
     .receive_iov = netmap_receive_iov,
     .poll = netmap_poll,
     .cleanup = netmap_cleanup,
+    .has_ufo = netmap_has_ufo,
+    .has_vnet_hdr = netmap_has_vnet_hdr,
+    .has_vnet_hdr_len = netmap_has_vnet_hdr_len,
+    .using_vnet_hdr = netmap_using_vnet_hdr,
+    .set_offload = netmap_set_offload,
+    .set_vnet_hdr_len = netmap_set_vnet_hdr_len,
 };
 
 /* The exported init function
@@ -428,6 +464,7 @@ int net_init_netmap(const NetClientOptions *opts,
     nc = qemu_new_net_client(&net_netmap_info, peer, "netmap", name);
     s = DO_UPCAST(NetmapState, nc, nc);
     s->me = me;
+    s->vnet_hdr_len = 0;
     netmap_read_poll(s, true); /* Initially only poll for reads. */
 
     return 0;
diff --git a/net/tap-win32.c b/net/tap-win32.c
index 91e9e844a0..8aee611f7d 100644
--- a/net/tap-win32.c
+++ b/net/tap-win32.c
@@ -669,11 +669,60 @@ static void tap_win32_send(void *opaque)
     }
 }
 
+static bool tap_has_ufo(NetClientState *nc)
+{
+    return false;
+}
+
+static bool tap_has_vnet_hdr(NetClientState *nc)
+{
+    return false;
+}
+
+int tap_probe_vnet_hdr_len(int fd, int len)
+{
+    return 0;
+}
+
+void tap_fd_set_vnet_hdr_len(int fd, int len)
+{
+}
+
+static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
+{
+}
+
+static void tap_set_offload(NetClientState *nc, int csum, int tso4,
+                     int tso6, int ecn, int ufo)
+{
+}
+
+struct vhost_net *tap_get_vhost_net(NetClientState *nc)
+{
+    return NULL;
+}
+
+static bool tap_has_vnet_hdr_len(NetClientState *nc, int len)
+{
+    return false;
+}
+
+static void tap_set_vnet_hdr_len(NetClientState *nc, int len)
+{
+    abort();
+}
+
 static NetClientInfo net_tap_win32_info = {
     .type = NET_CLIENT_OPTIONS_KIND_TAP,
     .size = sizeof(TAPState),
     .receive = tap_receive,
     .cleanup = tap_cleanup,
+    .has_ufo = tap_has_ufo,
+    .has_vnet_hdr = tap_has_vnet_hdr,
+    .has_vnet_hdr_len = tap_has_vnet_hdr_len,
+    .using_vnet_hdr = tap_using_vnet_hdr,
+    .set_offload = tap_set_offload,
+    .set_vnet_hdr_len = tap_set_vnet_hdr_len,
 };
 
 static int tap_win32_init(NetClientState *peer, const char *model,
@@ -722,49 +771,6 @@ int net_init_tap(const NetClientOptions *opts, const char *name,
     return 0;
 }
 
-bool tap_has_ufo(NetClientState *nc)
-{
-    return false;
-}
-
-int tap_has_vnet_hdr(NetClientState *nc)
-{
-    return 0;
-}
-
-int tap_probe_vnet_hdr_len(int fd, int len)
-{
-    return 0;
-}
-
-void tap_fd_set_vnet_hdr_len(int fd, int len)
-{
-}
-
-void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
-{
-}
-
-void tap_set_offload(NetClientState *nc, int csum, int tso4,
-                     int tso6, int ecn, int ufo)
-{
-}
-
-struct vhost_net *tap_get_vhost_net(NetClientState *nc)
-{
-    return NULL;
-}
-
-int tap_has_vnet_hdr_len(NetClientState *nc, int len)
-{
-    return 0;
-}
-
-void tap_set_vnet_hdr_len(NetClientState *nc, int len)
-{
-    abort();
-}
-
 int tap_enable(NetClientState *nc)
 {
     abort();
diff --git a/net/tap.c b/net/tap.c
index 39c1cda3e4..2d5099b9be 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -210,7 +210,7 @@ static void tap_send(void *opaque)
     } while (size > 0 && qemu_can_send_packet(&s->nc));
 }
 
-bool tap_has_ufo(NetClientState *nc)
+static bool tap_has_ufo(NetClientState *nc)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
 
@@ -219,7 +219,7 @@ bool tap_has_ufo(NetClientState *nc)
     return s->has_ufo;
 }
 
-int tap_has_vnet_hdr(NetClientState *nc)
+static bool tap_has_vnet_hdr(NetClientState *nc)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
 
@@ -228,16 +228,16 @@ int tap_has_vnet_hdr(NetClientState *nc)
     return !!s->host_vnet_hdr_len;
 }
 
-int tap_has_vnet_hdr_len(NetClientState *nc, int len)
+static bool tap_has_vnet_hdr_len(NetClientState *nc, int len)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
 
     assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
 
-    return tap_probe_vnet_hdr_len(s->fd, len);
+    return !!tap_probe_vnet_hdr_len(s->fd, len);
 }
 
-void tap_set_vnet_hdr_len(NetClientState *nc, int len)
+static void tap_set_vnet_hdr_len(NetClientState *nc, int len)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
 
@@ -249,7 +249,7 @@ void tap_set_vnet_hdr_len(NetClientState *nc, int len)
     s->host_vnet_hdr_len = len;
 }
 
-void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
+static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
 
@@ -259,7 +259,7 @@ void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
     s->using_vnet_hdr = using_vnet_hdr;
 }
 
-void tap_set_offload(NetClientState *nc, int csum, int tso4,
+static void tap_set_offload(NetClientState *nc, int csum, int tso4,
                      int tso6, int ecn, int ufo)
 {
     TAPState *s = DO_UPCAST(TAPState, nc, nc);
@@ -314,6 +314,12 @@ static NetClientInfo net_tap_info = {
     .receive_iov = tap_receive_iov,
     .poll = tap_poll,
     .cleanup = tap_cleanup,
+    .has_ufo = tap_has_ufo,
+    .has_vnet_hdr = tap_has_vnet_hdr,
+    .has_vnet_hdr_len = tap_has_vnet_hdr_len,
+    .using_vnet_hdr = tap_using_vnet_hdr,
+    .set_offload = tap_set_offload,
+    .set_vnet_hdr_len = tap_set_vnet_hdr_len,
 };
 
 static TAPState *net_tap_fd_init(NetClientState *peer,
diff --git a/page_cache.c b/page_cache.c
index 3ef6ee7ad2..b033681a93 100644
--- a/page_cache.c
+++ b/page_cache.c
@@ -150,7 +150,7 @@ uint8_t *get_cached_data(const PageCache *cache, uint64_t addr)
     return cache_get_by_addr(cache, addr)->it_data;
 }
 
-int cache_insert(PageCache *cache, uint64_t addr, uint8_t *pdata)
+int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata)
 {
 
     CacheItem *it = NULL;
diff --git a/qapi-schema.json b/qapi-schema.json
index fcb2280053..ac8ad24966 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -751,10 +751,9 @@
 #          This feature allows us to minimize migration traffic for certain work
 #          loads, by sending compressed difference of the pages
 #
-# @x-rdma-pin-all: Controls whether or not the entire VM memory footprint is
+# @rdma-pin-all: Controls whether or not the entire VM memory footprint is
 #          mlock()'d on demand or all at once. Refer to docs/rdma.txt for usage.
-#          Disabled by default. Experimental: may (or may not) be renamed after
-#          further testing is complete. (since 1.6)
+#          Disabled by default. (since 2.0)
 #
 # @zero-blocks: During storage migration encode blocks of zeroes efficiently. This
 #          essentially saves 1MB of zeroes per block on the wire. Enabling requires
@@ -768,7 +767,7 @@
 # Since: 1.2
 ##
 { 'enum': 'MigrationCapability',
-  'data': ['xbzrle', 'x-rdma-pin-all', 'auto-converge', 'zero-blocks'] }
+  'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks'] }
 
 ##
 # @MigrationCapabilityStatus
diff --git a/qemu-file.c b/qemu-file.c
index 9473b674ba..f074af15c3 100644
--- a/qemu-file.c
+++ b/qemu-file.c
@@ -100,7 +100,14 @@ static int stdio_put_buffer(void *opaque, const uint8_t *buf, int64_t pos,
                             int size)
 {
     QEMUFileStdio *s = opaque;
-    return fwrite(buf, 1, size, s->stdio_file);
+    int res;
+
+    res = fwrite(buf, 1, size, s->stdio_file);
+
+    if (res != size) {
+        return -EIO;	/* fake errno value */
+    }
+    return res;
 }
 
 static int stdio_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size)
diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs
index df92fe5b15..df3aa7a64e 100644
--- a/stubs/Makefile.objs
+++ b/stubs/Makefile.objs
@@ -27,3 +27,4 @@ stub-obj-y += vm-stop.o
 stub-obj-y += vmstate.o
 stub-obj-$(CONFIG_WIN32) += fd-register.o
 stub-obj-y += cpus.o
+stub-obj-y += kvm.o
diff --git a/stubs/kvm.c b/stubs/kvm.c
new file mode 100644
index 0000000000..e7c60b6e0c
--- /dev/null
+++ b/stubs/kvm.c
@@ -0,0 +1,7 @@
+#include "qemu-common.h"
+#include "sysemu/kvm.h"
+
+int kvm_arch_irqchip_create(KVMState *s)
+{
+    return 0;
+}
diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
index afbd4222c5..00234e1d3d 100644
--- a/target-arm/cpu-qom.h
+++ b/target-arm/cpu-qom.h
@@ -132,6 +132,16 @@ typedef struct ARMCPU {
     uint32_t id_isar3;
     uint32_t id_isar4;
     uint32_t id_isar5;
+    uint64_t id_aa64pfr0;
+    uint64_t id_aa64pfr1;
+    uint64_t id_aa64dfr0;
+    uint64_t id_aa64dfr1;
+    uint64_t id_aa64afr0;
+    uint64_t id_aa64afr1;
+    uint64_t id_aa64isar0;
+    uint64_t id_aa64isar1;
+    uint64_t id_aa64mmfr0;
+    uint64_t id_aa64mmfr1;
     uint32_t clidr;
     /* The elements of this array are the CCSIDR values for each cache,
      * in the order L1DCache, L1ICache, L2DCache, L2ICache, etc.
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 6e7ce8905e..1ce8a9bc38 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -60,7 +60,7 @@ static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque)
         return;
     }
 
-    if (ri->type & ARM_CP_64BIT) {
+    if (cpreg_field_is_64bit(ri)) {
         CPREG_FIELD64(&cpu->env, ri) = ri->resetvalue;
     } else {
         CPREG_FIELD32(&cpu->env, ri) = ri->resetvalue;
@@ -91,9 +91,10 @@ static void arm_cpu_reset(CPUState *s)
         env->aarch64 = 1;
 #if defined(CONFIG_USER_ONLY)
         env->pstate = PSTATE_MODE_EL0t;
+        /* Userspace expects access to CTL_EL0 and the cache ops */
+        env->cp15.c1_sys |= SCTLR_UCT | SCTLR_UCI;
 #else
-        env->pstate = PSTATE_D | PSTATE_A | PSTATE_I | PSTATE_F
-            | PSTATE_MODE_EL1h;
+        env->pstate = PSTATE_MODE_EL1h;
 #endif
     }
 
@@ -108,13 +109,14 @@ static void arm_cpu_reset(CPUState *s)
     }
 #else
     /* SVC mode with interrupts disabled.  */
-    env->uncached_cpsr = ARM_CPU_MODE_SVC | CPSR_A | CPSR_F | CPSR_I;
+    env->uncached_cpsr = ARM_CPU_MODE_SVC;
+    env->daif = PSTATE_D | PSTATE_A | PSTATE_I | PSTATE_F;
     /* On ARMv7-M the CPSR_I is the value of the PRIMASK register, and is
        clear at reset.  Initial SP and PC are loaded from ROM.  */
     if (IS_M(env)) {
         uint32_t pc;
         uint8_t *rom;
-        env->uncached_cpsr &= ~CPSR_I;
+        env->daif &= ~PSTATE_I;
         rom = rom_ptr(0);
         if (rom) {
             /* We should really use ldl_phys here, in case the guest
@@ -922,6 +924,7 @@ static void arm_any_initfn(Object *obj)
     set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
     set_feature(&cpu->env, ARM_FEATURE_ARM_DIV);
     set_feature(&cpu->env, ARM_FEATURE_V7MP);
+    set_feature(&cpu->env, ARM_FEATURE_CRC);
 #ifdef TARGET_AARCH64
     set_feature(&cpu->env, ARM_FEATURE_AARCH64);
 #endif
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 3c8a2dba2f..49fef3fcbe 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -74,8 +74,10 @@
  */
 #ifdef HOST_WORDS_BIGENDIAN
 #define offsetoflow32(S, M) (offsetof(S, M) + sizeof(uint32_t))
+#define offsetofhigh32(S, M) offsetof(S, M)
 #else
 #define offsetoflow32(S, M) offsetof(S, M)
+#define offsetofhigh32(S, M) (offsetof(S, M) + sizeof(uint32_t))
 #endif
 
 /* Meanings of the ARMCPU object's two inbound GPIO lines */
@@ -102,7 +104,7 @@ struct arm_boot_info;
 /* CPU state for each instance of a generic timer (in cp15 c14) */
 typedef struct ARMGenericTimer {
     uint64_t cval; /* Timer CompareValue register */
-    uint32_t ctl; /* Timer Control register */
+    uint64_t ctl; /* Timer Control register */
 } ARMGenericTimer;
 
 #define GTIMER_PHYS 0
@@ -133,6 +135,7 @@ typedef struct CPUARMState {
      *  NZCV are kept in the split out env->CF/VF/NF/ZF, (which have the same
      *    semantics as for AArch32, as described in the comments on each field)
      *  nRW (also known as M[4]) is kept, inverted, in env->aarch64
+     *  DAIF (exception masks) are kept in env->daif
      *  all other bits are stored in their correct places in env->pstate
      */
     uint32_t pstate;
@@ -162,20 +165,19 @@ typedef struct CPUARMState {
     uint32_t GE; /* cpsr[19:16] */
     uint32_t thumb; /* cpsr[5]. 0 = arm mode, 1 = thumb mode. */
     uint32_t condexec_bits; /* IT bits.  cpsr[15:10,26:25].  */
+    uint32_t daif; /* exception masks, in the bits they are in in PSTATE */
 
     /* System control coprocessor (cp15) */
     struct {
         uint32_t c0_cpuid;
-        uint32_t c0_cssel; /* Cache size selection.  */
-        uint32_t c1_sys; /* System control register.  */
-        uint32_t c1_coproc; /* Coprocessor access register.  */
+        uint64_t c0_cssel; /* Cache size selection.  */
+        uint64_t c1_sys; /* System control register.  */
+        uint64_t c1_coproc; /* Coprocessor access register.  */
         uint32_t c1_xscaleauxcr; /* XScale auxiliary control register.  */
         uint32_t c1_scr; /* secure config register.  */
-        uint32_t c2_base0; /* MMU translation table base 0.  */
-        uint32_t c2_base0_hi; /* MMU translation table base 0, high 32 bits */
-        uint32_t c2_base1; /* MMU translation table base 0.  */
-        uint32_t c2_base1_hi; /* MMU translation table base 1, high 32 bits */
-        uint32_t c2_control; /* MMU translation table base control.  */
+        uint64_t ttbr0_el1; /* MMU translation table base 0. */
+        uint64_t ttbr1_el1; /* MMU translation table base 1. */
+        uint64_t c2_control; /* MMU translation table base control.  */
         uint32_t c2_mask; /* MMU translation table base selection mask.  */
         uint32_t c2_base_mask; /* MMU translation table base 0 mask. */
         uint32_t c2_data; /* MPU data cachable bits.  */
@@ -197,14 +199,15 @@ typedef struct CPUARMState {
         uint32_t c9_pmxevtyper; /* perf monitor event type */
         uint32_t c9_pmuserenr; /* perf monitor user enable */
         uint32_t c9_pminten; /* perf monitor interrupt enables */
-        uint32_t c12_vbar; /* vector base address register */
+        uint64_t mair_el1;
+        uint64_t c12_vbar; /* vector base address register */
         uint32_t c13_fcse; /* FCSE PID.  */
         uint32_t c13_context; /* Context ID.  */
         uint64_t tpidr_el0; /* User RW Thread register.  */
         uint64_t tpidrro_el0; /* User RO Thread register.  */
         uint64_t tpidr_el1; /* Privileged Thread register.  */
-        uint32_t c14_cntfrq; /* Counter Frequency register */
-        uint32_t c14_cntkctl; /* Timer Control register */
+        uint64_t c14_cntfrq; /* Counter Frequency register */
+        uint64_t c14_cntkctl; /* Timer Control register */
         ARMGenericTimer c14_timer[NUM_GTIMERS];
         uint32_t c15_cpar; /* XScale Coprocessor Access Register */
         uint32_t c15_ticonfig; /* TI925T configuration byte.  */
@@ -215,6 +218,10 @@ typedef struct CPUARMState {
         uint32_t c15_diagnostic; /* diagnostic register */
         uint32_t c15_power_diagnostic;
         uint32_t c15_power_control; /* power control */
+        uint64_t dbgbvr[16]; /* breakpoint value registers */
+        uint64_t dbgbcr[16]; /* breakpoint control registers */
+        uint64_t dbgwvr[16]; /* watchpoint value registers */
+        uint64_t dbgwcr[16]; /* watchpoint control registers */
     } cp15;
 
     struct {
@@ -401,9 +408,11 @@ int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address, int rw,
 #define CPSR_Z (1U << 30)
 #define CPSR_N (1U << 31)
 #define CPSR_NZCV (CPSR_N | CPSR_Z | CPSR_C | CPSR_V)
+#define CPSR_AIF (CPSR_A | CPSR_I | CPSR_F)
 
 #define CPSR_IT (CPSR_IT_0_1 | CPSR_IT_2_7)
-#define CACHED_CPSR_BITS (CPSR_T | CPSR_GE | CPSR_IT | CPSR_Q | CPSR_NZCV)
+#define CACHED_CPSR_BITS (CPSR_T | CPSR_AIF | CPSR_GE | CPSR_IT | CPSR_Q \
+    | CPSR_NZCV)
 /* Bits writable in user mode.  */
 #define CPSR_USER (CPSR_NZCV | CPSR_Q | CPSR_GE)
 /* Execution state bits.  MRS read as zero, MSR writes ignored.  */
@@ -426,7 +435,8 @@ int cpu_arm_handle_mmu_fault (CPUARMState *env, target_ulong address, int rw,
 #define PSTATE_Z (1U << 30)
 #define PSTATE_N (1U << 31)
 #define PSTATE_NZCV (PSTATE_N | PSTATE_Z | PSTATE_C | PSTATE_V)
-#define CACHED_PSTATE_BITS (PSTATE_NZCV)
+#define PSTATE_DAIF (PSTATE_D | PSTATE_A | PSTATE_I | PSTATE_F)
+#define CACHED_PSTATE_BITS (PSTATE_NZCV | PSTATE_DAIF)
 /* Mode values for AArch64 */
 #define PSTATE_MODE_EL3h 13
 #define PSTATE_MODE_EL3t 12
@@ -447,7 +457,7 @@ static inline uint32_t pstate_read(CPUARMState *env)
     ZF = (env->ZF == 0);
     return (env->NF & 0x80000000) | (ZF << 30)
         | (env->CF << 29) | ((env->VF & 0x80000000) >> 3)
-        | env->pstate;
+        | env->pstate | env->daif;
 }
 
 static inline void pstate_write(CPUARMState *env, uint32_t val)
@@ -456,6 +466,7 @@ static inline void pstate_write(CPUARMState *env, uint32_t val)
     env->NF = val;
     env->CF = (val >> 29) & 1;
     env->VF = (val << 3) & 0x80000000;
+    env->daif = val & PSTATE_DAIF;
     env->pstate = val & ~CACHED_PSTATE_BITS;
 }
 
@@ -615,6 +626,7 @@ enum arm_features {
     ARM_FEATURE_AARCH64, /* supports 64 bit mode */
     ARM_FEATURE_V8_AES, /* implements AES part of v8 Crypto Extensions */
     ARM_FEATURE_CBAR, /* has cp15 CBAR */
+    ARM_FEATURE_CRC, /* ARMv8 CRC instructions */
 };
 
 static inline int arm_feature(CPUARMState *env, int feature)
@@ -622,6 +634,22 @@ static inline int arm_feature(CPUARMState *env, int feature)
     return (env->features & (1ULL << feature)) != 0;
 }
 
+/* Return true if the specified exception level is running in AArch64 state. */
+static inline bool arm_el_is_aa64(CPUARMState *env, int el)
+{
+    /* We don't currently support EL2 or EL3, and this isn't valid for EL0
+     * (if we're in EL0, is_a64() is what you want, and if we're not in EL0
+     * then the state of EL0 isn't well defined.)
+     */
+    assert(el == 1);
+    /* AArch64-capable CPUs always run with EL1 in AArch64 mode. This
+     * is a QEMU-imposed simplification which we may wish to change later.
+     * If we in future support EL2 and/or EL3, then the state of lower
+     * exception levels is controlled by the HCR.RW and SCR.RW bits.
+     */
+    return arm_feature(env, ARM_FEATURE_AARCH64);
+}
+
 void arm_cpu_list(FILE *f, fprintf_function cpu_fprintf);
 
 /* Interface between CPU and Interrupt controller.  */
@@ -731,7 +759,8 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
 #define ARM_CP_NOP (ARM_CP_SPECIAL | (1 << 8))
 #define ARM_CP_WFI (ARM_CP_SPECIAL | (2 << 8))
 #define ARM_CP_NZCV (ARM_CP_SPECIAL | (3 << 8))
-#define ARM_LAST_SPECIAL ARM_CP_NZCV
+#define ARM_CP_CURRENTEL (ARM_CP_SPECIAL | (4 << 8))
+#define ARM_LAST_SPECIAL ARM_CP_CURRENTEL
 /* Used only as a terminator for ARMCPRegInfo lists */
 #define ARM_CP_SENTINEL 0xffff
 /* Mask of only the flag bits in a type field */
@@ -959,6 +988,14 @@ uint64_t arm_cp_read_zero(CPUARMState *env, const ARMCPRegInfo *ri);
  */
 void arm_cp_reset_ignore(CPUARMState *env, const ARMCPRegInfo *opaque);
 
+/* Return true if this reginfo struct's field in the cpu state struct
+ * is 64 bits wide.
+ */
+static inline bool cpreg_field_is_64bit(const ARMCPRegInfo *ri)
+{
+    return (ri->state == ARM_CP_STATE_AA64) || (ri->type & ARM_CP_64BIT);
+}
+
 static inline bool cp_access_ok(int current_pl,
                                 const ARMCPRegInfo *ri, int isread)
 {
@@ -1043,7 +1080,7 @@ static inline CPUARMState *cpu_init(const char *cpu_model)
 #define MMU_USER_IDX 1
 static inline int cpu_mmu_index (CPUARMState *env)
 {
-    return (env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR ? 1 : 0;
+    return arm_current_pl(env) ? 0 : 1;
 }
 
 #include "exec/cpu-all.h"
@@ -1070,7 +1107,9 @@ static inline int cpu_mmu_index (CPUARMState *env)
 #define ARM_TBFLAG_BSWAP_CODE_SHIFT 16
 #define ARM_TBFLAG_BSWAP_CODE_MASK  (1 << ARM_TBFLAG_BSWAP_CODE_SHIFT)
 
-/* Bit usage when in AArch64 state: currently no bits defined */
+/* Bit usage when in AArch64 state */
+#define ARM_TBFLAG_AA64_EL_SHIFT    0
+#define ARM_TBFLAG_AA64_EL_MASK     (0x3 << ARM_TBFLAG_AA64_EL_SHIFT)
 
 /* some convenience accessor macros */
 #define ARM_TBFLAG_AARCH64_STATE(F) \
@@ -1089,13 +1128,16 @@ static inline int cpu_mmu_index (CPUARMState *env)
     (((F) & ARM_TBFLAG_CONDEXEC_MASK) >> ARM_TBFLAG_CONDEXEC_SHIFT)
 #define ARM_TBFLAG_BSWAP_CODE(F) \
     (((F) & ARM_TBFLAG_BSWAP_CODE_MASK) >> ARM_TBFLAG_BSWAP_CODE_SHIFT)
+#define ARM_TBFLAG_AA64_EL(F) \
+    (((F) & ARM_TBFLAG_AA64_EL_MASK) >> ARM_TBFLAG_AA64_EL_SHIFT)
 
 static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
                                         target_ulong *cs_base, int *flags)
 {
     if (is_a64(env)) {
         *pc = env->pc;
-        *flags = ARM_TBFLAG_AARCH64_STATE_MASK;
+        *flags = ARM_TBFLAG_AARCH64_STATE_MASK
+            | (arm_current_pl(env) << ARM_TBFLAG_AA64_EL_SHIFT);
     } else {
         int privmode;
         *pc = env->regs[15];
diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c
index a639c2e476..8426bf1333 100644
--- a/target-arm/cpu64.c
+++ b/target-arm/cpu64.c
@@ -45,6 +45,7 @@ static void aarch64_any_initfn(Object *obj)
     set_feature(&cpu->env, ARM_FEATURE_ARM_DIV);
     set_feature(&cpu->env, ARM_FEATURE_V7MP);
     set_feature(&cpu->env, ARM_FEATURE_AARCH64);
+    cpu->ctr = 0x80030003; /* 32 byte I and D cacheline size, VIPT icache */
 }
 #endif
 
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 1b111b6e85..90f85f1899 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -5,6 +5,8 @@
 #include "sysemu/arch_init.h"
 #include "sysemu/sysemu.h"
 #include "qemu/bitops.h"
+#include "qemu/crc32c.h"
+#include <zlib.h> /* For crc32 */
 
 #ifndef CONFIG_USER_ONLY
 static inline int get_phys_addr(CPUARMState *env, uint32_t address,
@@ -109,7 +111,7 @@ static int aarch64_fpu_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg)
 
 static uint64_t raw_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    if (ri->type & ARM_CP_64BIT) {
+    if (cpreg_field_is_64bit(ri)) {
         return CPREG_FIELD64(env, ri);
     } else {
         return CPREG_FIELD32(env, ri);
@@ -119,7 +121,7 @@ static uint64_t raw_read(CPUARMState *env, const ARMCPRegInfo *ri)
 static void raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
                       uint64_t value)
 {
-    if (ri->type & ARM_CP_64BIT) {
+    if (cpreg_field_is_64bit(ri)) {
         CPREG_FIELD64(env, ri) = value;
     } else {
         CPREG_FIELD32(env, ri) = value;
@@ -458,7 +460,8 @@ static const ARMCPRegInfo v6_cp_reginfo[] = {
      */
     { .name = "WFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 1,
       .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, },
-    { .name = "CPACR", .cp = 15, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 2,
+    { .name = "CPACR", .state = ARM_CP_STATE_BOTH, .opc0 = 3,
+      .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 2,
       .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c1_coproc),
       .resetvalue = 0, .writefn = cpacr_write },
     REGINFO_SENTINEL
@@ -533,6 +536,12 @@ static void pmintenclr_write(CPUARMState *env, const ARMCPRegInfo *ri,
 static void vbar_write(CPUARMState *env, const ARMCPRegInfo *ri,
                        uint64_t value)
 {
+    /* Note that even though the AArch64 view of this register has bits
+     * [10:0] all RES0 we can only mask the bottom 5, to comply with the
+     * architectural requirements for bits which are RES0 only in some
+     * contexts. (ARMv8 would permit us to do no masking at all, but ARMv7
+     * requires the bottom five bits to be RAZ/WI because they're UNK/SBZP.)
+     */
     env->cp15.c12_vbar = value & ~0x1Ful;
 }
 
@@ -622,16 +631,19 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
       .access = PL1_RW, .type = ARM_CP_NO_MIGRATE,
       .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten),
       .resetvalue = 0, .writefn = pmintenclr_write, },
-    { .name = "VBAR", .cp = 15, .crn = 12, .crm = 0, .opc1 = 0, .opc2 = 0,
+    { .name = "VBAR", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .crn = 12, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW, .writefn = vbar_write,
       .fieldoffset = offsetof(CPUARMState, cp15.c12_vbar),
       .resetvalue = 0 },
     { .name = "SCR", .cp = 15, .crn = 1, .crm = 1, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c1_scr),
       .resetvalue = 0, },
-    { .name = "CCSIDR", .cp = 15, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 0,
+    { .name = "CCSIDR", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 0,
       .access = PL1_R, .readfn = ccsidr_read, .type = ARM_CP_NO_MIGRATE },
-    { .name = "CSSELR", .cp = 15, .crn = 0, .crm = 0, .opc1 = 2, .opc2 = 0,
+    { .name = "CSSELR", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 2, .opc2 = 0,
       .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c0_cssel),
       .writefn = csselr_write, .resetvalue = 0 },
     /* Auxiliary ID register: this actually has an IMPDEF value but for now
@@ -639,6 +651,26 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
      */
     { .name = "AIDR", .cp = 15, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 7,
       .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+    /* MAIR can just read-as-written because we don't implement caches
+     * and so don't need to care about memory attributes.
+     */
+    { .name = "MAIR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 10, .crm = 2, .opc2 = 0,
+      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.mair_el1),
+      .resetvalue = 0 },
+    /* For non-long-descriptor page tables these are PRRR and NMRR;
+     * regardless they still act as reads-as-written for QEMU.
+     * The override is necessary because of the overly-broad TLB_LOCKDOWN
+     * definition.
+     */
+    { .name = "MAIR0", .state = ARM_CP_STATE_AA32, .type = ARM_CP_OVERRIDE,
+      .cp = 15, .opc1 = 0, .crn = 10, .crm = 2, .opc2 = 0, .access = PL1_RW,
+      .fieldoffset = offsetoflow32(CPUARMState, cp15.mair_el1),
+      .resetfn = arm_cp_reset_ignore },
+    { .name = "MAIR1", .state = ARM_CP_STATE_AA32, .type = ARM_CP_OVERRIDE,
+      .cp = 15, .opc1 = 0, .crn = 10, .crm = 2, .opc2 = 1, .access = PL1_RW,
+      .fieldoffset = offsetofhigh32(CPUARMState, cp15.mair_el1),
+      .resetfn = arm_cp_reset_ignore },
     REGINFO_SENTINEL
 };
 
@@ -872,30 +904,55 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
      * Our reset value matches the fixed frequency we implement the timer at.
      */
     { .name = "CNTFRQ", .cp = 15, .crn = 14, .crm = 0, .opc1 = 0, .opc2 = 0,
-      .access = PL1_RW | PL0_R,
+      .type = ARM_CP_NO_MIGRATE,
+      .access = PL1_RW | PL0_R, .accessfn = gt_cntfrq_access,
+      .fieldoffset = offsetoflow32(CPUARMState, cp15.c14_cntfrq),
+      .resetfn = arm_cp_reset_ignore,
+    },
+    { .name = "CNTFRQ_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 0,
+      .access = PL1_RW | PL0_R, .accessfn = gt_cntfrq_access,
       .fieldoffset = offsetof(CPUARMState, cp15.c14_cntfrq),
       .resetvalue = (1000 * 1000 * 1000) / GTIMER_SCALE,
-      .accessfn = gt_cntfrq_access,
     },
     /* overall control: mostly access permissions */
-    { .name = "CNTKCTL", .cp = 15, .crn = 14, .crm = 1, .opc1 = 0, .opc2 = 0,
+    { .name = "CNTKCTL", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 0, .crn = 14, .crm = 1, .opc2 = 0,
       .access = PL1_RW,
       .fieldoffset = offsetof(CPUARMState, cp15.c14_cntkctl),
       .resetvalue = 0,
     },
     /* per-timer control */
     { .name = "CNTP_CTL", .cp = 15, .crn = 14, .crm = 2, .opc1 = 0, .opc2 = 1,
+      .type = ARM_CP_IO | ARM_CP_NO_MIGRATE, .access = PL1_RW | PL0_R,
+      .accessfn = gt_ptimer_access,
+      .fieldoffset = offsetoflow32(CPUARMState,
+                                   cp15.c14_timer[GTIMER_PHYS].ctl),
+      .resetfn = arm_cp_reset_ignore,
+      .writefn = gt_ctl_write, .raw_writefn = raw_write,
+    },
+    { .name = "CNTP_CTL_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 2, .opc2 = 1,
       .type = ARM_CP_IO, .access = PL1_RW | PL0_R,
+      .accessfn = gt_ptimer_access,
       .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].ctl),
       .resetvalue = 0,
-      .accessfn = gt_ptimer_access,
       .writefn = gt_ctl_write, .raw_writefn = raw_write,
     },
     { .name = "CNTV_CTL", .cp = 15, .crn = 14, .crm = 3, .opc1 = 0, .opc2 = 1,
+      .type = ARM_CP_IO | ARM_CP_NO_MIGRATE, .access = PL1_RW | PL0_R,
+      .accessfn = gt_vtimer_access,
+      .fieldoffset = offsetoflow32(CPUARMState,
+                                   cp15.c14_timer[GTIMER_VIRT].ctl),
+      .resetfn = arm_cp_reset_ignore,
+      .writefn = gt_ctl_write, .raw_writefn = raw_write,
+    },
+    { .name = "CNTV_CTL_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 3, .opc2 = 1,
       .type = ARM_CP_IO, .access = PL1_RW | PL0_R,
+      .accessfn = gt_vtimer_access,
       .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].ctl),
       .resetvalue = 0,
-      .accessfn = gt_vtimer_access,
       .writefn = gt_ctl_write, .raw_writefn = raw_write,
     },
     /* TimerValue views: a 32 bit downcounting view of the underlying state */
@@ -904,37 +961,73 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
       .accessfn = gt_ptimer_access,
       .readfn = gt_tval_read, .writefn = gt_tval_write,
     },
+    { .name = "CNTP_TVAL_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 2, .opc2 = 0,
+      .type = ARM_CP_NO_MIGRATE | ARM_CP_IO, .access = PL1_RW | PL0_R,
+      .readfn = gt_tval_read, .writefn = gt_tval_write,
+    },
     { .name = "CNTV_TVAL", .cp = 15, .crn = 14, .crm = 3, .opc1 = 0, .opc2 = 0,
       .type = ARM_CP_NO_MIGRATE | ARM_CP_IO, .access = PL1_RW | PL0_R,
       .accessfn = gt_vtimer_access,
       .readfn = gt_tval_read, .writefn = gt_tval_write,
     },
+    { .name = "CNTV_TVAL_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 3, .opc2 = 0,
+      .type = ARM_CP_NO_MIGRATE | ARM_CP_IO, .access = PL1_RW | PL0_R,
+      .readfn = gt_tval_read, .writefn = gt_tval_write,
+    },
     /* The counter itself */
     { .name = "CNTPCT", .cp = 15, .crm = 14, .opc1 = 0,
       .access = PL0_R, .type = ARM_CP_64BIT | ARM_CP_NO_MIGRATE | ARM_CP_IO,
       .accessfn = gt_pct_access,
+      .readfn = gt_cnt_read, .resetfn = arm_cp_reset_ignore,
+    },
+    { .name = "CNTPCT_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 1,
+      .access = PL0_R, .type = ARM_CP_NO_MIGRATE | ARM_CP_IO,
+      .accessfn = gt_pct_access,
       .readfn = gt_cnt_read, .resetfn = gt_cnt_reset,
     },
     { .name = "CNTVCT", .cp = 15, .crm = 14, .opc1 = 1,
       .access = PL0_R, .type = ARM_CP_64BIT | ARM_CP_NO_MIGRATE | ARM_CP_IO,
       .accessfn = gt_vct_access,
+      .readfn = gt_cnt_read, .resetfn = arm_cp_reset_ignore,
+    },
+    { .name = "CNTVCT_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 2,
+      .access = PL0_R, .type = ARM_CP_NO_MIGRATE | ARM_CP_IO,
+      .accessfn = gt_vct_access,
       .readfn = gt_cnt_read, .resetfn = gt_cnt_reset,
     },
     /* Comparison value, indicating when the timer goes off */
     { .name = "CNTP_CVAL", .cp = 15, .crm = 14, .opc1 = 2,
       .access = PL1_RW | PL0_R,
-      .type = ARM_CP_64BIT | ARM_CP_IO,
+      .type = ARM_CP_64BIT | ARM_CP_IO | ARM_CP_NO_MIGRATE,
       .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].cval),
-      .resetvalue = 0,
-      .accessfn = gt_ptimer_access,
+      .accessfn = gt_ptimer_access, .resetfn = arm_cp_reset_ignore,
+      .writefn = gt_cval_write, .raw_writefn = raw_write,
+    },
+    { .name = "CNTP_CVAL_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 2, .opc2 = 2,
+      .access = PL1_RW | PL0_R,
+      .type = ARM_CP_IO,
+      .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].cval),
+      .resetvalue = 0, .accessfn = gt_vtimer_access,
       .writefn = gt_cval_write, .raw_writefn = raw_write,
     },
     { .name = "CNTV_CVAL", .cp = 15, .crm = 14, .opc1 = 3,
       .access = PL1_RW | PL0_R,
-      .type = ARM_CP_64BIT | ARM_CP_IO,
+      .type = ARM_CP_64BIT | ARM_CP_IO | ARM_CP_NO_MIGRATE,
       .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].cval),
-      .resetvalue = 0,
-      .accessfn = gt_vtimer_access,
+      .accessfn = gt_vtimer_access, .resetfn = arm_cp_reset_ignore,
+      .writefn = gt_cval_write, .raw_writefn = raw_write,
+    },
+    { .name = "CNTV_CVAL_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 3, .opc2 = 2,
+      .access = PL1_RW | PL0_R,
+      .type = ARM_CP_IO,
+      .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].cval),
+      .resetvalue = 0, .accessfn = gt_vtimer_access,
       .writefn = gt_cval_write, .raw_writefn = raw_write,
     },
     REGINFO_SENTINEL
@@ -1031,8 +1124,8 @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
                 env->cp15.c7_par = phys_addr & 0xfffff000;
             }
         } else {
-            env->cp15.c7_par = ((ret & (10 << 1)) >> 5) |
-                ((ret & (12 << 1)) >> 6) |
+            env->cp15.c7_par = ((ret & (1 << 10)) >> 5) |
+                ((ret & (1 << 12)) >> 6) |
                 ((ret & 0xf) << 1) | 1;
         }
         env->cp15.c7_par_hi = 0;
@@ -1193,6 +1286,26 @@ static void vmsa_ttbcr_reset(CPUARMState *env, const ARMCPRegInfo *ri)
     env->cp15.c2_mask = 0;
 }
 
+static void vmsa_tcr_el1_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                               uint64_t value)
+{
+    /* For AArch64 the A1 bit could result in a change of ASID, so TLB flush. */
+    tlb_flush(env, 1);
+    env->cp15.c2_control = value;
+}
+
+static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                            uint64_t value)
+{
+    /* 64 bit accesses to the TTBRs can change the ASID and so we
+     * must flush the TLB.
+     */
+    if (cpreg_field_is_64bit(ri)) {
+        tlb_flush(env, 1);
+    }
+    raw_write(env, ri, value);
+}
+
 static const ARMCPRegInfo vmsa_cp_reginfo[] = {
     { .name = "DFSR", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW,
@@ -1200,16 +1313,23 @@ static const ARMCPRegInfo vmsa_cp_reginfo[] = {
     { .name = "IFSR", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 1,
       .access = PL1_RW,
       .fieldoffset = offsetof(CPUARMState, cp15.c5_insn), .resetvalue = 0, },
-    { .name = "TTBR0", .cp = 15, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 0,
-      .access = PL1_RW,
-      .fieldoffset = offsetof(CPUARMState, cp15.c2_base0), .resetvalue = 0, },
-    { .name = "TTBR1", .cp = 15, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 1,
-      .access = PL1_RW,
-      .fieldoffset = offsetof(CPUARMState, cp15.c2_base1), .resetvalue = 0, },
-    { .name = "TTBCR", .cp = 15, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 2,
-      .access = PL1_RW, .writefn = vmsa_ttbcr_write,
-      .resetfn = vmsa_ttbcr_reset, .raw_writefn = vmsa_ttbcr_raw_write,
+    { .name = "TTBR0_EL1", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 0,
+      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el1),
+      .writefn = vmsa_ttbr_write, .resetvalue = 0 },
+    { .name = "TTBR1_EL1", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 1,
+      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.ttbr1_el1),
+      .writefn = vmsa_ttbr_write, .resetvalue = 0 },
+    { .name = "TCR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 2,
+      .access = PL1_RW, .writefn = vmsa_tcr_el1_write,
+      .resetfn = vmsa_ttbcr_reset, .raw_writefn = raw_write,
       .fieldoffset = offsetof(CPUARMState, cp15.c2_control) },
+    { .name = "TTBCR", .cp = 15, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 2,
+      .access = PL1_RW, .type = ARM_CP_NO_MIGRATE, .writefn = vmsa_ttbcr_write,
+      .resetfn = arm_cp_reset_ignore, .raw_writefn = vmsa_ttbcr_raw_write,
+      .fieldoffset = offsetoflow32(CPUARMState, cp15.c2_control) },
     { .name = "DFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c6_data),
       .resetvalue = 0, },
@@ -1379,7 +1499,8 @@ static uint64_t mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     CPUState *cs = CPU(arm_env_get_cpu(env));
     uint32_t mpidr = cs->cpu_index;
-    /* We don't support setting cluster ID ([8..11])
+    /* We don't support setting cluster ID ([8..11]) (known as Aff1
+     * in later ARM ARM versions), or any of the higher affinity level fields,
      * so these bits always RAZ.
      */
     if (arm_feature(env, ARM_FEATURE_V7MP)) {
@@ -1394,7 +1515,8 @@ static uint64_t mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri)
 }
 
 static const ARMCPRegInfo mpidr_cp_reginfo[] = {
-    { .name = "MPIDR", .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 5,
+    { .name = "MPIDR", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 5,
       .access = PL1_R, .readfn = mpidr_read, .type = ARM_CP_NO_MIGRATE },
     REGINFO_SENTINEL
 };
@@ -1417,57 +1539,15 @@ static void par64_reset(CPUARMState *env, const ARMCPRegInfo *ri)
     env->cp15.c7_par = 0;
 }
 
-static uint64_t ttbr064_read(CPUARMState *env, const ARMCPRegInfo *ri)
-{
-    return ((uint64_t)env->cp15.c2_base0_hi << 32) | env->cp15.c2_base0;
-}
-
-static void ttbr064_raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                              uint64_t value)
-{
-    env->cp15.c2_base0_hi = value >> 32;
-    env->cp15.c2_base0 = value;
-}
-
-static void ttbr064_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                          uint64_t value)
-{
-    /* Writes to the 64 bit format TTBRs may change the ASID */
-    tlb_flush(env, 1);
-    ttbr064_raw_write(env, ri, value);
-}
-
-static void ttbr064_reset(CPUARMState *env, const ARMCPRegInfo *ri)
-{
-    env->cp15.c2_base0_hi = 0;
-    env->cp15.c2_base0 = 0;
-}
-
-static uint64_t ttbr164_read(CPUARMState *env, const ARMCPRegInfo *ri)
-{
-    return ((uint64_t)env->cp15.c2_base1_hi << 32) | env->cp15.c2_base1;
-}
-
-static void ttbr164_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                          uint64_t value)
-{
-    env->cp15.c2_base1_hi = value >> 32;
-    env->cp15.c2_base1 = value;
-}
-
-static void ttbr164_reset(CPUARMState *env, const ARMCPRegInfo *ri)
-{
-    env->cp15.c2_base1_hi = 0;
-    env->cp15.c2_base1 = 0;
-}
-
 static const ARMCPRegInfo lpae_cp_reginfo[] = {
     /* NOP AMAIR0/1: the override is because these clash with the rather
      * broadly specified TLB_LOCKDOWN entry in the generic cp_reginfo.
      */
-    { .name = "AMAIR0", .cp = 15, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 0,
+    { .name = "AMAIR0", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_OVERRIDE,
       .resetvalue = 0 },
+    /* AMAIR1 is mapped to AMAIR_EL1[63:32] */
     { .name = "AMAIR1", .cp = 15, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 1,
       .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_OVERRIDE,
       .resetvalue = 0 },
@@ -1480,12 +1560,13 @@ static const ARMCPRegInfo lpae_cp_reginfo[] = {
       .access = PL1_RW, .type = ARM_CP_64BIT,
       .readfn = par64_read, .writefn = par64_write, .resetfn = par64_reset },
     { .name = "TTBR0", .cp = 15, .crm = 2, .opc1 = 0,
-      .access = PL1_RW, .type = ARM_CP_64BIT, .readfn = ttbr064_read,
-      .writefn = ttbr064_write, .raw_writefn = ttbr064_raw_write,
-      .resetfn = ttbr064_reset },
+      .access = PL1_RW, .type = ARM_CP_64BIT | ARM_CP_NO_MIGRATE,
+      .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el1),
+      .writefn = vmsa_ttbr_write, .resetfn = arm_cp_reset_ignore },
     { .name = "TTBR1", .cp = 15, .crm = 2, .opc1 = 1,
-      .access = PL1_RW, .type = ARM_CP_64BIT, .readfn = ttbr164_read,
-      .writefn = ttbr164_write, .resetfn = ttbr164_reset },
+      .access = PL1_RW, .type = ARM_CP_64BIT | ARM_CP_NO_MIGRATE,
+      .fieldoffset = offsetof(CPUARMState, cp15.ttbr1_el1),
+      .writefn = vmsa_ttbr_write, .resetfn = arm_cp_reset_ignore },
     REGINFO_SENTINEL
 };
 
@@ -1511,6 +1592,42 @@ static void aa64_fpsr_write(CPUARMState *env, const ARMCPRegInfo *ri,
     vfp_set_fpsr(env, value);
 }
 
+static CPAccessResult aa64_cacheop_access(CPUARMState *env,
+                                          const ARMCPRegInfo *ri)
+{
+    /* Cache invalidate/clean: NOP, but EL0 must UNDEF unless
+     * SCTLR_EL1.UCI is set.
+     */
+    if (arm_current_pl(env) == 0 && !(env->cp15.c1_sys & SCTLR_UCI)) {
+        return CP_ACCESS_TRAP;
+    }
+    return CP_ACCESS_OK;
+}
+
+static void tlbi_aa64_va_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                               uint64_t value)
+{
+    /* Invalidate by VA (AArch64 version) */
+    uint64_t pageaddr = value << 12;
+    tlb_flush_page(env, pageaddr);
+}
+
+static void tlbi_aa64_vaa_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                                uint64_t value)
+{
+    /* Invalidate by VA, all ASIDs (AArch64 version) */
+    uint64_t pageaddr = value << 12;
+    tlb_flush_page(env, pageaddr);
+}
+
+static void tlbi_aa64_asid_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                                 uint64_t value)
+{
+    /* Invalidate by ASID (AArch64 version) */
+    int asid = extract64(value, 48, 16);
+    tlb_flush(env, asid == 0);
+}
+
 static const ARMCPRegInfo v8_cp_reginfo[] = {
     /* Minimal set of EL0-visible registers. This will need to be expanded
      * significantly for system emulation of AArch64 CPUs.
@@ -1524,13 +1641,6 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
     { .name = "FPSR", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 4,
       .access = PL0_RW, .readfn = aa64_fpsr_read, .writefn = aa64_fpsr_write },
-    /* This claims a 32 byte cacheline size for icache and dcache, VIPT icache.
-     * It will eventually need to have a CPU-specified reset value.
-     */
-    { .name = "CTR_EL0", .state = ARM_CP_STATE_AA64,
-      .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 0, .crm = 0,
-      .access = PL0_R, .type = ARM_CP_CONST,
-      .resetvalue = 0x80030003 },
     /* Prohibit use of DC ZVA. OPTME: implement DC ZVA and allow its use.
      * For system mode the DZP bit here will need to be computed, not constant.
      */
@@ -1538,6 +1648,103 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
       .opc0 = 3, .opc1 = 3, .opc2 = 7, .crn = 0, .crm = 0,
       .access = PL0_R, .type = ARM_CP_CONST,
       .resetvalue = 0x10 },
+    { .name = "CURRENTEL", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .opc2 = 2, .crn = 4, .crm = 2,
+      .access = PL1_R, .type = ARM_CP_CURRENTEL },
+    /* Cache ops: all NOPs since we don't emulate caches */
+    { .name = "IC_IALLUIS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0,
+      .access = PL1_W, .type = ARM_CP_NOP },
+    { .name = "IC_IALLU", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 0,
+      .access = PL1_W, .type = ARM_CP_NOP },
+    { .name = "IC_IVAU", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 5, .opc2 = 1,
+      .access = PL0_W, .type = ARM_CP_NOP,
+      .accessfn = aa64_cacheop_access },
+    { .name = "DC_IVAC", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 1,
+      .access = PL1_W, .type = ARM_CP_NOP },
+    { .name = "DC_ISW", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 2,
+      .access = PL1_W, .type = ARM_CP_NOP },
+    { .name = "DC_CVAC", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 1,
+      .access = PL0_W, .type = ARM_CP_NOP,
+      .accessfn = aa64_cacheop_access },
+    { .name = "DC_CSW", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 2,
+      .access = PL1_W, .type = ARM_CP_NOP },
+    { .name = "DC_CVAU", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 11, .opc2 = 1,
+      .access = PL0_W, .type = ARM_CP_NOP,
+      .accessfn = aa64_cacheop_access },
+    { .name = "DC_CIVAC", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 1,
+      .access = PL0_W, .type = ARM_CP_NOP,
+      .accessfn = aa64_cacheop_access },
+    { .name = "DC_CISW", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 2,
+      .access = PL1_W, .type = ARM_CP_NOP },
+    /* TLBI operations */
+    { .name = "TLBI_VMALLE1IS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 0,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+      .writefn = tlbiall_write },
+    { .name = "TLBI_VAE1IS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 1,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+      .writefn = tlbi_aa64_va_write },
+    { .name = "TLBI_ASIDE1IS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 2,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+      .writefn = tlbi_aa64_asid_write },
+    { .name = "TLBI_VAAE1IS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 3,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+      .writefn = tlbi_aa64_vaa_write },
+    { .name = "TLBI_VALE1IS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 5,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+      .writefn = tlbi_aa64_va_write },
+    { .name = "TLBI_VAALE1IS", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 7,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+      .writefn = tlbi_aa64_vaa_write },
+    { .name = "TLBI_VMALLE1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 0,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+      .writefn = tlbiall_write },
+    { .name = "TLBI_VAE1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 1,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+      .writefn = tlbi_aa64_va_write },
+    { .name = "TLBI_ASIDE1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 2,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+      .writefn = tlbi_aa64_asid_write },
+    { .name = "TLBI_VAAE1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 3,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+      .writefn = tlbi_aa64_vaa_write },
+    { .name = "TLBI_VALE1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 5,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+      .writefn = tlbi_aa64_va_write },
+    { .name = "TLBI_VAALE1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 7,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
+      .writefn = tlbi_aa64_vaa_write },
+    /* Dummy implementation of monitor debug system control register:
+     * we don't support debug.
+     */
+    { .name = "MDSCR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    /* We define a dummy WI OSLAR_EL1, because Linux writes to it. */
+    { .name = "OSLAR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 4,
+      .access = PL1_W, .type = ARM_CP_NOP },
     REGINFO_SENTINEL
 };
 
@@ -1550,6 +1757,48 @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri,
     tlb_flush(env, 1);
 }
 
+static CPAccessResult ctr_el0_access(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    /* Only accessible in EL0 if SCTLR.UCT is set (and only in AArch64,
+     * but the AArch32 CTR has its own reginfo struct)
+     */
+    if (arm_current_pl(env) == 0 && !(env->cp15.c1_sys & SCTLR_UCT)) {
+        return CP_ACCESS_TRAP;
+    }
+    return CP_ACCESS_OK;
+}
+
+static void define_aarch64_debug_regs(ARMCPU *cpu)
+{
+    /* Define breakpoint and watchpoint registers. These do nothing
+     * but read as written, for now.
+     */
+    int i;
+
+    for (i = 0; i < 16; i++) {
+        ARMCPRegInfo dbgregs[] = {
+            { .name = "DBGBVR", .state = ARM_CP_STATE_AA64,
+              .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 4,
+              .access = PL1_RW,
+              .fieldoffset = offsetof(CPUARMState, cp15.dbgbvr[i]) },
+            { .name = "DBGBCR", .state = ARM_CP_STATE_AA64,
+              .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 5,
+              .access = PL1_RW,
+              .fieldoffset = offsetof(CPUARMState, cp15.dbgbcr[i]) },
+            { .name = "DBGWVR", .state = ARM_CP_STATE_AA64,
+              .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 6,
+              .access = PL1_RW,
+              .fieldoffset = offsetof(CPUARMState, cp15.dbgwvr[i]) },
+            { .name = "DBGWCR", .state = ARM_CP_STATE_AA64,
+              .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 7,
+              .access = PL1_RW,
+              .fieldoffset = offsetof(CPUARMState, cp15.dbgwcr[i]) },
+               REGINFO_SENTINEL
+        };
+        define_arm_cp_regs(cpu, dbgregs);
+    }
+}
+
 void register_cp_regs_for_features(ARMCPU *cpu)
 {
     /* Register all the coprocessor registers based on feature bits */
@@ -1634,7 +1883,8 @@ void register_cp_regs_for_features(ARMCPU *cpu)
             .raw_writefn = raw_write,
         };
         ARMCPRegInfo clidr = {
-            .name = "CLIDR", .cp = 15, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 1,
+            .name = "CLIDR", .state = ARM_CP_STATE_BOTH,
+            .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 1,
             .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->clidr
         };
         define_one_arm_cp_reg(cpu, &pmcr);
@@ -1644,7 +1894,53 @@ void register_cp_regs_for_features(ARMCPU *cpu)
         define_arm_cp_regs(cpu, not_v7_cp_reginfo);
     }
     if (arm_feature(env, ARM_FEATURE_V8)) {
+        /* AArch64 ID registers, which all have impdef reset values */
+        ARMCPRegInfo v8_idregs[] = {
+            { .name = "ID_AA64PFR0_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 0,
+              .access = PL1_R, .type = ARM_CP_CONST,
+              .resetvalue = cpu->id_aa64pfr0 },
+            { .name = "ID_AA64PFR1_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 1,
+              .access = PL1_R, .type = ARM_CP_CONST,
+              .resetvalue = cpu->id_aa64pfr1},
+            { .name = "ID_AA64DFR0_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 0,
+              .access = PL1_R, .type = ARM_CP_CONST,
+              .resetvalue = cpu->id_aa64dfr0 },
+            { .name = "ID_AA64DFR1_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 1,
+              .access = PL1_R, .type = ARM_CP_CONST,
+              .resetvalue = cpu->id_aa64dfr1 },
+            { .name = "ID_AA64AFR0_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 4,
+              .access = PL1_R, .type = ARM_CP_CONST,
+              .resetvalue = cpu->id_aa64afr0 },
+            { .name = "ID_AA64AFR1_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 5,
+              .access = PL1_R, .type = ARM_CP_CONST,
+              .resetvalue = cpu->id_aa64afr1 },
+            { .name = "ID_AA64ISAR0_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 0,
+              .access = PL1_R, .type = ARM_CP_CONST,
+              .resetvalue = cpu->id_aa64isar0 },
+            { .name = "ID_AA64ISAR1_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 1,
+              .access = PL1_R, .type = ARM_CP_CONST,
+              .resetvalue = cpu->id_aa64isar1 },
+            { .name = "ID_AA64MMFR0_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 0,
+              .access = PL1_R, .type = ARM_CP_CONST,
+              .resetvalue = cpu->id_aa64mmfr0 },
+            { .name = "ID_AA64MMFR1_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 1,
+              .access = PL1_R, .type = ARM_CP_CONST,
+              .resetvalue = cpu->id_aa64mmfr1 },
+            REGINFO_SENTINEL
+        };
+        define_arm_cp_regs(cpu, v8_idregs);
         define_arm_cp_regs(cpu, v8_cp_reginfo);
+        define_aarch64_debug_regs(cpu);
     }
     if (arm_feature(env, ARM_FEATURE_MPU)) {
         /* These are the MPU registers prior to PMSAv6. Any new
@@ -1710,9 +2006,16 @@ void register_cp_regs_for_features(ARMCPU *cpu)
               .writefn = arm_cp_write_ignore, .raw_writefn = raw_write,
               .fieldoffset = offsetof(CPUARMState, cp15.c0_cpuid),
               .type = ARM_CP_OVERRIDE },
+            { .name = "MIDR_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .opc2 = 0, .crn = 0, .crm = 0,
+              .access = PL1_R, .resetvalue = cpu->midr, .type = ARM_CP_CONST },
             { .name = "CTR",
               .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 1,
               .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->ctr },
+            { .name = "CTR_EL0", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 0, .crm = 0,
+              .access = PL0_R, .accessfn = ctr_el0_access,
+              .type = ARM_CP_CONST, .resetvalue = cpu->ctr },
             { .name = "TCMTR",
               .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 2,
               .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
@@ -1783,7 +2086,8 @@ void register_cp_regs_for_features(ARMCPU *cpu)
     /* Generic registers whose values depend on the implementation */
     {
         ARMCPRegInfo sctlr = {
-            .name = "SCTLR", .cp = 15, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 0,
+            .name = "SCTLR", .state = ARM_CP_STATE_BOTH,
+            .opc0 = 3, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 0,
             .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c1_sys),
             .writefn = sctlr_write, .resetvalue = cpu->reset_sctlr,
             .raw_writefn = raw_write,
@@ -1962,6 +2266,10 @@ static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r,
     if (opaque) {
         r2->opaque = opaque;
     }
+    /* reginfo passed to helpers is correct for the actual access,
+     * and is never ARM_CP_STATE_BOTH:
+     */
+    r2->state = state;
     /* Make sure reginfo passed to helpers for wildcarded regs
      * has the correct crm/opc1/opc2 for this reg, not CP_ANY:
      */
@@ -2170,7 +2478,7 @@ uint32_t cpsr_read(CPUARMState *env)
         (env->CF << 29) | ((env->VF & 0x80000000) >> 3) | (env->QF << 27)
         | (env->thumb << 5) | ((env->condexec_bits & 3) << 25)
         | ((env->condexec_bits & 0xfc) << 8)
-        | (env->GE << 16);
+        | (env->GE << 16) | env->daif;
 }
 
 void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask)
@@ -2197,6 +2505,9 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask)
         env->GE = (val >> 16) & 0xf;
     }
 
+    env->daif &= ~(CPSR_AIF & mask);
+    env->daif |= val & CPSR_AIF & mask;
+
     if ((env->uncached_cpsr ^ val) & mask & CPSR_M) {
         if (bad_mode_switch(env, val & CPSR_M)) {
             /* Attempt to switch to an invalid mode: this is UNPREDICTABLE.
@@ -2658,7 +2969,7 @@ void arm_cpu_do_interrupt(CPUState *cs)
     env->condexec_bits = 0;
     /* Switch to the new mode, and to the correct instruction set.  */
     env->uncached_cpsr = (env->uncached_cpsr & ~CPSR_M) | new_mode;
-    env->uncached_cpsr |= mask;
+    env->daif |= mask;
     /* this is a lie, as the was no c1_sys on V4T/V5, but who cares
      * and we should just guard the thumb mode on V4 */
     if (arm_feature(env, ARM_FEATURE_V4T)) {
@@ -2730,9 +3041,9 @@ static uint32_t get_level1_table_address(CPUARMState *env, uint32_t address)
     uint32_t table;
 
     if (address & env->cp15.c2_mask)
-        table = env->cp15.c2_base1 & 0xffffc000;
+        table = env->cp15.ttbr1_el1 & 0xffffc000;
     else
-        table = env->cp15.c2_base0 & env->cp15.c2_base_mask;
+        table = env->cp15.ttbr0_el1 & env->cp15.c2_base_mask;
 
     table |= (address >> 18) & 0x3ffc;
     return table;
@@ -2798,7 +3109,7 @@ static int get_phys_addr_v5(CPUARMState *env, uint32_t address, int access_type,
             break;
         case 2: /* 4k page.  */
             phys_addr = (desc & 0xfffff000) | (address & 0xfff);
-            ap = (desc >> (4 + ((address >> 13) & 6))) & 3;
+            ap = (desc >> (4 + ((address >> 9) & 6))) & 3;
             *page_size = 0x1000;
             break;
         case 3: /* 1k page.  */
@@ -3008,11 +3319,11 @@ static int get_phys_addr_lpae(CPUARMState *env, uint32_t address,
      * we will always flush the TLB any time the ASID is changed).
      */
     if (ttbr_select == 0) {
-        ttbr = ((uint64_t)env->cp15.c2_base0_hi << 32) | env->cp15.c2_base0;
+        ttbr = env->cp15.ttbr0_el1;
         epd = extract32(env->cp15.c2_control, 7, 1);
         tsz = t0sz;
     } else {
-        ttbr = ((uint64_t)env->cp15.c2_base1_hi << 32) | env->cp15.c2_base1;
+        ttbr = env->cp15.ttbr1_el1;
         epd = extract32(env->cp15.c2_control, 23, 1);
         tsz = t1sz;
     }
@@ -3331,12 +3642,12 @@ uint32_t HELPER(v7m_mrs)(CPUARMState *env, uint32_t reg)
     case 9: /* PSP */
         return env->v7m.current_sp ? env->regs[13] : env->v7m.other_sp;
     case 16: /* PRIMASK */
-        return (env->uncached_cpsr & CPSR_I) != 0;
+        return (env->daif & PSTATE_I) != 0;
     case 17: /* BASEPRI */
     case 18: /* BASEPRI_MAX */
         return env->v7m.basepri;
     case 19: /* FAULTMASK */
-        return (env->uncached_cpsr & CPSR_F) != 0;
+        return (env->daif & PSTATE_F) != 0;
     case 20: /* CONTROL */
         return env->v7m.control;
     default:
@@ -3383,10 +3694,11 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t reg, uint32_t val)
             env->v7m.other_sp = val;
         break;
     case 16: /* PRIMASK */
-        if (val & 1)
-            env->uncached_cpsr |= CPSR_I;
-        else
-            env->uncached_cpsr &= ~CPSR_I;
+        if (val & 1) {
+            env->daif |= PSTATE_I;
+        } else {
+            env->daif &= ~PSTATE_I;
+        }
         break;
     case 17: /* BASEPRI */
         env->v7m.basepri = val & 0xff;
@@ -3397,10 +3709,11 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t reg, uint32_t val)
             env->v7m.basepri = val;
         break;
     case 19: /* FAULTMASK */
-        if (val & 1)
-            env->uncached_cpsr |= CPSR_F;
-        else
-            env->uncached_cpsr &= ~CPSR_F;
+        if (val & 1) {
+            env->daif |= PSTATE_F;
+        } else {
+            env->daif &= ~PSTATE_F;
+        }
         break;
     case 20: /* CONTROL */
         env->v7m.control = val & 3;
@@ -4392,3 +4705,40 @@ int arm_rmode_to_sf(int rmode)
     }
     return rmode;
 }
+
+static void crc_init_buffer(uint8_t *buf, uint32_t val, uint32_t bytes)
+{
+    memset(buf, 0, 4);
+
+    if (bytes == 1) {
+        buf[0] = val & 0xff;
+    } else if (bytes == 2) {
+        buf[0] = val & 0xff;
+        buf[1] = (val >> 8) & 0xff;
+    } else {
+        buf[0] = val & 0xff;
+        buf[1] = (val >> 8) & 0xff;
+        buf[2] = (val >> 16) & 0xff;
+        buf[3] = (val >> 24) & 0xff;
+    }
+}
+
+uint32_t HELPER(crc32)(uint32_t acc, uint32_t val, uint32_t bytes)
+{
+    uint8_t buf[4];
+
+    crc_init_buffer(buf, val, bytes);
+
+    /* zlib crc32 converts the accumulator and output to one's complement.  */
+    return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff;
+}
+
+uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes)
+{
+    uint8_t buf[4];
+
+    crc_init_buffer(buf, val, bytes);
+
+    /* Linux crc32c converts the output to one's complement.  */
+    return crc32c(acc, buf, bytes) ^ 0xffffffff;
+}
diff --git a/target-arm/helper.h b/target-arm/helper.h
index 19bd620532..276f3a9149 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -63,6 +63,8 @@ DEF_HELPER_2(get_cp_reg, i32, env, ptr)
 DEF_HELPER_3(set_cp_reg64, void, env, ptr, i64)
 DEF_HELPER_2(get_cp_reg64, i64, env, ptr)
 
+DEF_HELPER_3(msr_i_pstate, void, env, i32, i32)
+
 DEF_HELPER_2(get_r13_banked, i32, env, i32)
 DEF_HELPER_3(set_r13_banked, void, env, i32, i32)
 
@@ -497,6 +499,9 @@ DEF_HELPER_3(neon_qzip32, void, env, i32, i32)
 DEF_HELPER_4(crypto_aese, void, env, i32, i32, i32)
 DEF_HELPER_4(crypto_aesmc, void, env, i32, i32, i32)
 
+DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
+DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
+
 #ifdef TARGET_AARCH64
 #include "helper-a64.h"
 #endif
diff --git a/target-arm/kvm.c b/target-arm/kvm.c
index 1d2688dda7..39202d7eea 100644
--- a/target-arm/kvm.c
+++ b/target-arm/kvm.c
@@ -165,8 +165,10 @@ unsigned long kvm_arch_vcpu_id(CPUState *cpu)
  */
 typedef struct KVMDevice {
     struct kvm_arm_device_addr kda;
+    struct kvm_device_attr kdattr;
     MemoryRegion *mr;
     QSLIST_ENTRY(KVMDevice) entries;
+    int dev_fd;
 } KVMDevice;
 
 static QSLIST_HEAD(kvm_devices_head, KVMDevice) kvm_devices_head;
@@ -200,6 +202,29 @@ static MemoryListener devlistener = {
     .region_del = kvm_arm_devlistener_del,
 };
 
+static void kvm_arm_set_device_addr(KVMDevice *kd)
+{
+    struct kvm_device_attr *attr = &kd->kdattr;
+    int ret;
+
+    /* If the device control API is available and we have a device fd on the
+     * KVMDevice struct, let's use the newer API
+     */
+    if (kd->dev_fd >= 0) {
+        uint64_t addr = kd->kda.addr;
+        attr->addr = (uintptr_t)&addr;
+        ret = kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr);
+    } else {
+        ret = kvm_vm_ioctl(kvm_state, KVM_ARM_SET_DEVICE_ADDR, &kd->kda);
+    }
+
+    if (ret < 0) {
+        fprintf(stderr, "Failed to set device address: %s\n",
+                strerror(-ret));
+        abort();
+    }
+}
+
 static void kvm_arm_machine_init_done(Notifier *notifier, void *data)
 {
     KVMDevice *kd, *tkd;
@@ -207,12 +232,7 @@ static void kvm_arm_machine_init_done(Notifier *notifier, void *data)
     memory_listener_unregister(&devlistener);
     QSLIST_FOREACH_SAFE(kd, &kvm_devices_head, entries, tkd) {
         if (kd->kda.addr != -1) {
-            if (kvm_vm_ioctl(kvm_state, KVM_ARM_SET_DEVICE_ADDR,
-                             &kd->kda) < 0) {
-                fprintf(stderr, "KVM_ARM_SET_DEVICE_ADDRESS failed: %s\n",
-                        strerror(errno));
-                abort();
-            }
+            kvm_arm_set_device_addr(kd);
         }
         memory_region_unref(kd->mr);
         g_free(kd);
@@ -223,7 +243,8 @@ static Notifier notify = {
     .notify = kvm_arm_machine_init_done,
 };
 
-void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid)
+void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group,
+                             uint64_t attr, int dev_fd)
 {
     KVMDevice *kd;
 
@@ -239,6 +260,10 @@ void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid)
     kd->mr = mr;
     kd->kda.id = devid;
     kd->kda.addr = -1;
+    kd->kdattr.flags = 0;
+    kd->kdattr.group = group;
+    kd->kdattr.attr = attr;
+    kd->dev_fd = dev_fd;
     QSLIST_INSERT_HEAD(&kvm_devices_head, kd, entries);
     memory_region_ref(kd->mr);
 }
@@ -389,3 +414,19 @@ void kvm_arch_remove_all_hw_breakpoints(void)
 void kvm_arch_init_irq_routing(KVMState *s)
 {
 }
+
+int kvm_arch_irqchip_create(KVMState *s)
+{
+    int ret;
+
+    /* If we can create the VGIC using the newer device control API, we
+     * let the device do this when it initializes itself, otherwise we
+     * fall back to the old API */
+
+    ret = kvm_create_device(s, KVM_DEV_TYPE_ARM_VGIC_V2, true);
+    if (ret == 0) {
+        return 1;
+    }
+
+    return 0;
+}
diff --git a/target-arm/kvm_arm.h b/target-arm/kvm_arm.h
index cd3d13ca2d..137c5671e9 100644
--- a/target-arm/kvm_arm.h
+++ b/target-arm/kvm_arm.h
@@ -18,16 +18,21 @@
  * kvm_arm_register_device:
  * @mr: memory region for this device
  * @devid: the KVM device ID
+ * @group: device control API group for setting addresses
+ * @attr: device control API address type
+ * @dev_fd: device control device file descriptor (or -1 if not supported)
  *
  * Remember the memory region @mr, and when it is mapped by the
  * machine model, tell the kernel that base address using the
- * KVM_SET_DEVICE_ADDRESS ioctl. @devid should be the ID of
- * the device as defined by KVM_SET_DEVICE_ADDRESS.
- * The machine model may map and unmap the device multiple times;
- * the kernel will only be told the final address at the point
- * where machine init is complete.
+ * KVM_ARM_SET_DEVICE_ADDRESS ioctl or the newer device control API.  @devid
+ * should be the ID of the device as defined by KVM_ARM_SET_DEVICE_ADDRESS or
+ * the arm-vgic device in the device control API.
+ * The machine model may map
+ * and unmap the device multiple times; the kernel will only be told the final
+ * address at the point where machine init is complete.
  */
-void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid);
+void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group,
+                             uint64_t attr, int dev_fd);
 
 /**
  * write_list_to_kvmstate:
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index eb0fccd98f..7d06d2f9a5 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -319,6 +319,31 @@ uint64_t HELPER(get_cp_reg64)(CPUARMState *env, void *rip)
     return ri->readfn(env, ri);
 }
 
+void HELPER(msr_i_pstate)(CPUARMState *env, uint32_t op, uint32_t imm)
+{
+    /* MSR_i to update PSTATE. This is OK from EL0 only if UMA is set.
+     * Note that SPSel is never OK from EL0; we rely on handle_msr_i()
+     * to catch that case at translate time.
+     */
+    if (arm_current_pl(env) == 0 && !(env->cp15.c1_sys & SCTLR_UMA)) {
+        raise_exception(env, EXCP_UDEF);
+    }
+
+    switch (op) {
+    case 0x05: /* SPSel */
+        env->pstate = deposit32(env->pstate, 0, 1, imm);
+        break;
+    case 0x1e: /* DAIFSet */
+        env->daif |= (imm << 6) & PSTATE_DAIF;
+        break;
+    case 0x1f: /* DAIFClear */
+        env->daif &= ~((imm << 6) & PSTATE_DAIF);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
 /* ??? Flag setting arithmetic is awkward because we need to do comparisons.
    The only way to do that in TCG is a conditional branch, which clobbers
    all our temporaries.  For now implement these as helper functions.  */
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 8752e7e16c..08ac6591b6 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -1080,9 +1080,11 @@ static void handle_hint(DisasContext *s, uint32_t insn,
     switch (selector) {
     case 0: /* NOP */
         return;
+    case 3: /* WFI */
+        s->is_jmp = DISAS_WFI;
+        return;
     case 1: /* YIELD */
     case 2: /* WFE */
-    case 3: /* WFI */
     case 4: /* SEV */
     case 5: /* SEVL */
         /* we treat all as NOP at least for now */
@@ -1126,7 +1128,30 @@ static void handle_sync(DisasContext *s, uint32_t insn,
 static void handle_msr_i(DisasContext *s, uint32_t insn,
                          unsigned int op1, unsigned int op2, unsigned int crm)
 {
-    unsupported_encoding(s, insn);
+    int op = op1 << 3 | op2;
+    switch (op) {
+    case 0x05: /* SPSel */
+        if (s->current_pl == 0) {
+            unallocated_encoding(s);
+            return;
+        }
+        /* fall through */
+    case 0x1e: /* DAIFSet */
+    case 0x1f: /* DAIFClear */
+    {
+        TCGv_i32 tcg_imm = tcg_const_i32(crm);
+        TCGv_i32 tcg_op = tcg_const_i32(op);
+        gen_a64_set_pc_im(s->pc - 4);
+        gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
+        tcg_temp_free_i32(tcg_imm);
+        tcg_temp_free_i32(tcg_op);
+        s->is_jmp = DISAS_UPDATE;
+        break;
+    }
+    default:
+        unallocated_encoding(s);
+        return;
+    }
 }
 
 static void gen_get_nzcv(TCGv_i64 tcg_rt)
@@ -1231,6 +1256,13 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
             gen_set_nzcv(tcg_rt);
         }
         return;
+    case ARM_CP_CURRENTEL:
+        /* Reads as current EL value from pstate, which is
+         * guaranteed to be constant by the tb flags.
+         */
+        tcg_rt = cpu_reg(s, rt);
+        tcg_gen_movi_i64(tcg_rt, s->current_pl << 2);
+        return;
     default:
         break;
     }
@@ -9006,7 +9038,7 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
     dc->condexec_mask = 0;
     dc->condexec_cond = 0;
 #if !defined(CONFIG_USER_ONLY)
-    dc->user = 0;
+    dc->user = (ARM_TBFLAG_AA64_EL(tb->flags) == 0);
 #endif
     dc->vfp_enabled = 0;
     dc->vec_len = 0;
@@ -9117,6 +9149,7 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
             /* This is a special case because we don't want to just halt the CPU
              * if trying to debug across a WFI.
              */
+            gen_a64_set_pc_im(dc->pc);
             gen_helper_wfi(cpu_env);
             break;
         }
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 6ccf0ba482..253d2a13eb 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -7561,6 +7561,36 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
             store_reg(s, 14, tmp2);
             gen_bx(s, tmp);
             break;
+        case 0x4:
+        {
+            /* crc32/crc32c */
+            uint32_t c = extract32(insn, 8, 4);
+
+            /* Check this CPU supports ARMv8 CRC instructions.
+             * op1 == 3 is UNPREDICTABLE but handle as UNDEFINED.
+             * Bits 8, 10 and 11 should be zero.
+             */
+            if (!arm_feature(env, ARM_FEATURE_CRC) || op1 == 0x3 ||
+                (c & 0xd) != 0) {
+                goto illegal_op;
+            }
+
+            rn = extract32(insn, 16, 4);
+            rd = extract32(insn, 12, 4);
+
+            tmp = load_reg(s, rn);
+            tmp2 = load_reg(s, rm);
+            tmp3 = tcg_const_i32(1 << op1);
+            if (c & 0x2) {
+                gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
+            } else {
+                gen_helper_crc32(tmp, tmp, tmp2, tmp3);
+            }
+            tcg_temp_free_i32(tmp2);
+            tcg_temp_free_i32(tmp3);
+            store_reg(s, rd, tmp);
+            break;
+        }
         case 0x5: /* saturating add/subtract */
             ARCH(5TE);
             rd = (insn >> 12) & 0xf;
@@ -9145,6 +9175,32 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw
                 case 0x18: /* clz */
                     gen_helper_clz(tmp, tmp);
                     break;
+                case 0x20:
+                case 0x21:
+                case 0x22:
+                case 0x28:
+                case 0x29:
+                case 0x2a:
+                {
+                    /* crc32/crc32c */
+                    uint32_t sz = op & 0x3;
+                    uint32_t c = op & 0x8;
+
+                    if (!arm_feature(env, ARM_FEATURE_CRC)) {
+                        goto illegal_op;
+                    }
+
+                    tmp2 = load_reg(s, rm);
+                    tmp3 = tcg_const_i32(1 << sz);
+                    if (c) {
+                        gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
+                    } else {
+                        gen_helper_crc32(tmp, tmp, tmp2, tmp3);
+                    }
+                    tcg_temp_free_i32(tmp2);
+                    tcg_temp_free_i32(tmp3);
+                    break;
+                }
                 default:
                     goto illegal_op;
                 }
diff --git a/trace-events b/trace-events
index 37130638ff..580281d9ca 100644
--- a/trace-events
+++ b/trace-events
@@ -1170,6 +1170,7 @@ kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"
 kvm_vm_ioctl(int type, void *arg) "type 0x%x, arg %p"
 kvm_vcpu_ioctl(int cpu_index, int type, void *arg) "cpu_index %d, type 0x%x, arg %p"
 kvm_run_exit(int cpu_index, uint32_t reason) "cpu_index %d, reason %d"
+kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p"
 
 # memory.c
 memory_region_ops_read(void *mr, uint64_t addr, uint64_t value, unsigned size) "mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
diff --git a/vmstate.c b/vmstate.c
index 284b080f46..d1f5eb0e6a 100644
--- a/vmstate.c
+++ b/vmstate.c
@@ -321,23 +321,24 @@ const VMStateInfo vmstate_info_int32_equal = {
     .put  = put_int32,
 };
 
-/* 32 bit int. See that the received value is the less or the same
-   than the one in the field */
+/* 32 bit int. Check that the received value is less than or equal to
+   the one in the field */
 
 static int get_int32_le(QEMUFile *f, void *pv, size_t size)
 {
-    int32_t *old = pv;
-    int32_t new;
-    qemu_get_sbe32s(f, &new);
+    int32_t *cur = pv;
+    int32_t loaded;
+    qemu_get_sbe32s(f, &loaded);
 
-    if (*old <= new) {
+    if (loaded <= *cur) {
+        *cur = loaded;
         return 0;
     }
     return -EINVAL;
 }
 
 const VMStateInfo vmstate_info_int32_le = {
-    .name = "int32 equal",
+    .name = "int32 le",
     .get  = get_int32_le,
     .put  = put_int32,
 };