summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2014-04-17 21:37:26 +0100
committerPeter Maydell <peter.maydell@linaro.org>2014-04-17 21:37:26 +0100
commit2d03b49c3f225994c4b0b46146437d8c887d6774 (patch)
tree00663246afe656b777e3c218cfea25e7ed32ffc2
parentc6138aabfb2a8769392d605dc1e339b3095aab6a (diff)
parente44a90c59697cf98e05619fbb6f77a403d347495 (diff)
downloadfocaccia-qemu-2d03b49c3f225994c4b0b46146437d8c887d6774.tar.gz
focaccia-qemu-2d03b49c3f225994c4b0b46146437d8c887d6774.zip
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20140417-1' into staging
target-arm queue:
 * AArch64 system mode support; this is all the CPU emulation code
   but not the virt board support
 * cadence_ttc match register bugfix
 * Allwinner A10 PIC, PIT and ethernet fixes
   [with update to avoid duplicate typedef]
 * zynq-slcr rewrite
 * cadence_gem bugfix
 * fix for SMLALD/SMLSLD insn in A32
 * fix for SQXTUN in A64

# gpg: Signature made Thu 17 Apr 2014 21:35:57 BST using RSA key ID 14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"

* remotes/pmaydell/tags/pull-target-arm-20140417-1: (51 commits)
  target-arm: A64: fix unallocated test of scalar SQXTUN
  arm: translate.c: Fix smlald Instruction
  net: cadence_gem: Make phy respond to broadcast
  misc: zynq_slcr: Make DB_PRINTs always compile
  misc: zynq_slcr: Convert SBD::init to object init
  misc: zynq-slcr: Rewrite
  allwinner-emac: update irq status after writes to interrupt registers
  allwinner-emac: set autonegotiation complete bit on link up
  allwinner-a10-pit: implement prescaler and source selection
  allwinner-a10-pit: use level triggered interrupts
  allwinner-a10-pit: avoid generation of spurious interrupts
  allwinner-a10-pic: fix behaviour of pending register
  allwinner-a10-pic: set vector address when an interrupt is pending
  timer: cadence_ttc: Fix match register write logic
  target-arm/gdbstub64.c: remove useless 'break' statement.
  target-arm: Dump 32-bit CPU state if 64 bit CPU is in AArch32
  target-arm: Handle the CPU being in AArch32 mode in the AArch64 set_pc
  target-arm: Make Cortex-A15 CBAR read-only
  target-arm: Implement CBAR for Cortex-A57
  target-arm: Implement Cortex-A57 implementation-defined system registers
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--hw/arm/cubieboard.c13
-rw-r--r--hw/intc/allwinner-a10-pic.c22
-rw-r--r--hw/misc/zynq_slcr.c697
-rw-r--r--hw/net/allwinner_emac.c6
-rw-r--r--hw/net/cadence_gem.c4
-rw-r--r--hw/timer/allwinner-a10-pit.c66
-rw-r--r--hw/timer/cadence_ttc.c2
-rw-r--r--include/exec/softmmu_exec.h52
-rw-r--r--include/hw/net/allwinner_emac.h1
-rw-r--r--include/hw/timer/allwinner-a10-pit.h13
-rw-r--r--linux-user/main.c56
-rw-r--r--target-arm/cpu-qom.h10
-rw-r--r--target-arm/cpu.c30
-rw-r--r--target-arm/cpu.h81
-rw-r--r--target-arm/cpu64.c115
-rw-r--r--target-arm/gdbstub64.c2
-rw-r--r--target-arm/helper-a64.c76
-rw-r--r--target-arm/helper.c810
-rw-r--r--target-arm/helper.h7
-rw-r--r--target-arm/internals.h267
-rw-r--r--target-arm/kvm32.c19
-rw-r--r--target-arm/kvm64.c71
-rw-r--r--target-arm/machine.c13
-rw-r--r--target-arm/op_helper.c92
-rw-r--r--target-arm/translate-a64.c443
-rw-r--r--target-arm/translate.c229
-rw-r--r--target-arm/translate.h23
27 files changed, 2415 insertions, 805 deletions
diff --git a/hw/arm/cubieboard.c b/hw/arm/cubieboard.c
index d95a7f35eb..9d158c7248 100644
--- a/hw/arm/cubieboard.c
+++ b/hw/arm/cubieboard.c
@@ -43,6 +43,19 @@ static void cubieboard_init(QEMUMachineInitArgs *args)
         exit(1);
     }
 
+    object_property_set_int(OBJECT(&s->a10->timer), 32768, "clk0-freq", &err);
+    if (err != NULL) {
+        error_report("Couldn't set clk0 frequency: %s", error_get_pretty(err));
+        exit(1);
+    }
+
+    object_property_set_int(OBJECT(&s->a10->timer), 24000000, "clk1-freq",
+                            &err);
+    if (err != NULL) {
+        error_report("Couldn't set clk1 frequency: %s", error_get_pretty(err));
+        exit(1);
+    }
+
     object_property_set_bool(OBJECT(s->a10), true, "realized", &err);
     if (err != NULL) {
         error_report("Couldn't realize Allwinner A10: %s",
diff --git a/hw/intc/allwinner-a10-pic.c b/hw/intc/allwinner-a10-pic.c
index 407d563514..0924d9855c 100644
--- a/hw/intc/allwinner-a10-pic.c
+++ b/hw/intc/allwinner-a10-pic.c
@@ -23,11 +23,20 @@
 static void aw_a10_pic_update(AwA10PICState *s)
 {
     uint8_t i;
-    int irq = 0, fiq = 0;
+    int irq = 0, fiq = 0, pending;
+
+    s->vector = 0;
 
     for (i = 0; i < AW_A10_PIC_REG_NUM; i++) {
         irq |= s->irq_pending[i] & ~s->mask[i];
         fiq |= s->select[i] & s->irq_pending[i] & ~s->mask[i];
+
+        if (!s->vector) {
+            pending = ffs(s->irq_pending[i] & ~s->mask[i]);
+            if (pending) {
+                s->vector = (i * 32 + pending - 1) * 4;
+            }
+        }
     }
 
     qemu_set_irq(s->parent_irq, !!irq);
@@ -40,6 +49,8 @@ static void aw_a10_pic_set_irq(void *opaque, int irq, int level)
 
     if (level) {
         set_bit(irq % 32, (void *)&s->irq_pending[irq / 32]);
+    } else {
+        clear_bit(irq % 32, (void *)&s->irq_pending[irq / 32]);
     }
     aw_a10_pic_update(s);
 }
@@ -84,9 +95,6 @@ static void aw_a10_pic_write(void *opaque, hwaddr offset, uint64_t value,
     uint8_t index = (offset & 0xc) / 4;
 
     switch (offset) {
-    case AW_A10_PIC_VECTOR:
-        s->vector = value & ~0x3;
-        break;
     case AW_A10_PIC_BASE_ADDR:
         s->base_addr = value & ~0x3;
     case AW_A10_PIC_PROTECT:
@@ -96,7 +104,11 @@ static void aw_a10_pic_write(void *opaque, hwaddr offset, uint64_t value,
         s->nmi = value;
         break;
     case AW_A10_PIC_IRQ_PENDING ... AW_A10_PIC_IRQ_PENDING + 8:
-        s->irq_pending[index] &= ~value;
+        /*
+         * The register is read-only; nevertheless, Linux (including
+         * the version originally shipped by Allwinner) pretends to
+         * write to the register. Just ignore it.
+         */
         break;
     case AW_A10_PIC_FIQ_PENDING ... AW_A10_PIC_FIQ_PENDING + 8:
         s->fiq_pending[index] &= ~value;
diff --git a/hw/misc/zynq_slcr.c b/hw/misc/zynq_slcr.c
index d1cc23303a..2e53a2e21f 100644
--- a/hw/misc/zynq_slcr.c
+++ b/hw/misc/zynq_slcr.c
@@ -19,102 +19,155 @@
 #include "hw/sysbus.h"
 #include "sysemu/sysemu.h"
 
-#ifdef ZYNQ_ARM_SLCR_ERR_DEBUG
+#ifndef ZYNQ_SLCR_ERR_DEBUG
+#define ZYNQ_SLCR_ERR_DEBUG 0
+#endif
+
 #define DB_PRINT(...) do { \
-    fprintf(stderr,  ": %s: ", __func__); \
-    fprintf(stderr, ## __VA_ARGS__); \
+        if (ZYNQ_SLCR_ERR_DEBUG) { \
+            fprintf(stderr,  ": %s: ", __func__); \
+            fprintf(stderr, ## __VA_ARGS__); \
+        } \
     } while (0);
-#else
-    #define DB_PRINT(...)
-#endif
 
 #define XILINX_LOCK_KEY 0x767b
 #define XILINX_UNLOCK_KEY 0xdf0d
 
 #define R_PSS_RST_CTRL_SOFT_RST 0x1
 
-typedef enum {
-  ARM_PLL_CTRL,
-  DDR_PLL_CTRL,
-  IO_PLL_CTRL,
-  PLL_STATUS,
-  ARM_PPL_CFG,
-  DDR_PLL_CFG,
-  IO_PLL_CFG,
-  PLL_BG_CTRL,
-  PLL_MAX
-} PLLValues;
-
-typedef enum {
-  ARM_CLK_CTRL,
-  DDR_CLK_CTRL,
-  DCI_CLK_CTRL,
-  APER_CLK_CTRL,
-  USB0_CLK_CTRL,
-  USB1_CLK_CTRL,
-  GEM0_RCLK_CTRL,
-  GEM1_RCLK_CTRL,
-  GEM0_CLK_CTRL,
-  GEM1_CLK_CTRL,
-  SMC_CLK_CTRL,
-  LQSPI_CLK_CTRL,
-  SDIO_CLK_CTRL,
-  UART_CLK_CTRL,
-  SPI_CLK_CTRL,
-  CAN_CLK_CTRL,
-  CAN_MIOCLK_CTRL,
-  DBG_CLK_CTRL,
-  PCAP_CLK_CTRL,
-  TOPSW_CLK_CTRL,
-  CLK_MAX
-} ClkValues;
-
-typedef enum {
-  CLK_CTRL,
-  THR_CTRL,
-  THR_CNT,
-  THR_STA,
-  FPGA_MAX
-} FPGAValues;
-
-typedef enum {
-  SYNC_CTRL,
-  SYNC_STATUS,
-  BANDGAP_TRIP,
-  CC_TEST,
-  PLL_PREDIVISOR,
-  CLK_621_TRUE,
-  PICTURE_DBG,
-  PICTURE_DBG_UCNT,
-  PICTURE_DBG_LCNT,
-  MISC_MAX
-} MiscValues;
-
-typedef enum {
-  PSS,
-  DDDR,
-  DMAC = 3,
-  USB,
-  GEM,
-  SDIO,
-  SPI,
-  CAN,
-  I2C,
-  UART,
-  GPIO,
-  LQSPI,
-  SMC,
-  OCM,
-  DEVCI,
-  FPGA,
-  A9_CPU,
-  RS_AWDT,
-  RST_REASON,
-  RST_REASON_CLR,
-  REBOOT_STATUS,
-  BOOT_MODE,
-  RESET_MAX
-} ResetValues;
+enum {
+    SCL             = 0x000 / 4,
+    LOCK,
+    UNLOCK,
+    LOCKSTA,
+
+    ARM_PLL_CTRL    = 0x100 / 4,
+    DDR_PLL_CTRL,
+    IO_PLL_CTRL,
+    PLL_STATUS,
+    ARM_PLL_CFG,
+    DDR_PLL_CFG,
+    IO_PLL_CFG,
+
+    ARM_CLK_CTRL    = 0x120 / 4,
+    DDR_CLK_CTRL,
+    DCI_CLK_CTRL,
+    APER_CLK_CTRL,
+    USB0_CLK_CTRL,
+    USB1_CLK_CTRL,
+    GEM0_RCLK_CTRL,
+    GEM1_RCLK_CTRL,
+    GEM0_CLK_CTRL,
+    GEM1_CLK_CTRL,
+    SMC_CLK_CTRL,
+    LQSPI_CLK_CTRL,
+    SDIO_CLK_CTRL,
+    UART_CLK_CTRL,
+    SPI_CLK_CTRL,
+    CAN_CLK_CTRL,
+    CAN_MIOCLK_CTRL,
+    DBG_CLK_CTRL,
+    PCAP_CLK_CTRL,
+    TOPSW_CLK_CTRL,
+
+#define FPGA_CTRL_REGS(n, start) \
+    FPGA ## n ## _CLK_CTRL = (start) / 4, \
+    FPGA ## n ## _THR_CTRL, \
+    FPGA ## n ## _THR_CNT, \
+    FPGA ## n ## _THR_STA,
+    FPGA_CTRL_REGS(0, 0x170)
+    FPGA_CTRL_REGS(1, 0x180)
+    FPGA_CTRL_REGS(2, 0x190)
+    FPGA_CTRL_REGS(3, 0x1a0)
+
+    BANDGAP_TRIP    = 0x1b8 / 4,
+    PLL_PREDIVISOR  = 0x1c0 / 4,
+    CLK_621_TRUE,
+
+    PSS_RST_CTRL    = 0x200 / 4,
+    DDR_RST_CTRL,
+    TOPSW_RESET_CTRL,
+    DMAC_RST_CTRL,
+    USB_RST_CTRL,
+    GEM_RST_CTRL,
+    SDIO_RST_CTRL,
+    SPI_RST_CTRL,
+    CAN_RST_CTRL,
+    I2C_RST_CTRL,
+    UART_RST_CTRL,
+    GPIO_RST_CTRL,
+    LQSPI_RST_CTRL,
+    SMC_RST_CTRL,
+    OCM_RST_CTRL,
+    FPGA_RST_CTRL   = 0x240 / 4,
+    A9_CPU_RST_CTRL,
+
+    RS_AWDT_CTRL    = 0x24c / 4,
+    RST_REASON,
+
+    REBOOT_STATUS   = 0x258 / 4,
+    BOOT_MODE,
+
+    APU_CTRL        = 0x300 / 4,
+    WDT_CLK_SEL,
+
+    TZ_DMA_NS       = 0x440 / 4,
+    TZ_DMA_IRQ_NS,
+    TZ_DMA_PERIPH_NS,
+
+    PSS_IDCODE      = 0x530 / 4,
+
+    DDR_URGENT      = 0x600 / 4,
+    DDR_CAL_START   = 0x60c / 4,
+    DDR_REF_START   = 0x614 / 4,
+    DDR_CMD_STA,
+    DDR_URGENT_SEL,
+    DDR_DFI_STATUS,
+
+    MIO             = 0x700 / 4,
+#define MIO_LENGTH 54
+
+    MIO_LOOPBACK    = 0x804 / 4,
+    MIO_MST_TRI0,
+    MIO_MST_TRI1,
+
+    SD0_WP_CD_SEL   = 0x830 / 4,
+    SD1_WP_CD_SEL,
+
+    LVL_SHFTR_EN    = 0x900 / 4,
+    OCM_CFG         = 0x910 / 4,
+
+    CPU_RAM         = 0xa00 / 4,
+
+    IOU             = 0xa30 / 4,
+
+    DMAC_RAM        = 0xa50 / 4,
+
+    AFI0            = 0xa60 / 4,
+    AFI1 = AFI0 + 3,
+    AFI2 = AFI1 + 3,
+    AFI3 = AFI2 + 3,
+#define AFI_LENGTH 3
+
+    OCM             = 0xa90 / 4,
+
+    DEVCI_RAM       = 0xaa0 / 4,
+
+    CSG_RAM         = 0xab0 / 4,
+
+    GPIOB_CTRL      = 0xb00 / 4,
+    GPIOB_CFG_CMOS18,
+    GPIOB_CFG_CMOS25,
+    GPIOB_CFG_CMOS33,
+    GPIOB_CFG_HSTL  = 0xb14 / 4,
+    GPIOB_DRVR_BIAS_CTRL,
+
+    DDRIOB          = 0xb40 / 4,
+#define DDRIOB_LENGTH 14
+};
+
+#define ZYNQ_SLCR_MMIO_SIZE     0x1000
+#define ZYNQ_SLCR_NUM_REGS      (ZYNQ_SLCR_MMIO_SIZE / 4)
 
 #define TYPE_ZYNQ_SLCR "xilinx,zynq_slcr"
 #define ZYNQ_SLCR(obj) OBJECT_CHECK(ZynqSLCRState, (obj), TYPE_ZYNQ_SLCR)
@@ -124,42 +177,7 @@ typedef struct ZynqSLCRState {
 
     MemoryRegion iomem;
 
-    union {
-        struct {
-            uint16_t scl;
-            uint16_t lockval;
-            uint32_t pll[PLL_MAX]; /* 0x100 - 0x11C */
-            uint32_t clk[CLK_MAX]; /* 0x120 - 0x16C */
-            uint32_t fpga[4][FPGA_MAX]; /* 0x170 - 0x1AC */
-            uint32_t misc[MISC_MAX]; /* 0x1B0 - 0x1D8 */
-            uint32_t reset[RESET_MAX]; /* 0x200 - 0x25C */
-            uint32_t apu_ctrl; /* 0x300 */
-            uint32_t wdt_clk_sel; /* 0x304 */
-            uint32_t tz_ocm[3]; /* 0x400 - 0x408 */
-            uint32_t tz_ddr; /* 0x430 */
-            uint32_t tz_dma[3]; /* 0x440 - 0x448 */
-            uint32_t tz_misc[3]; /* 0x450 - 0x458 */
-            uint32_t tz_fpga[2]; /* 0x484 - 0x488 */
-            uint32_t dbg_ctrl; /* 0x500 */
-            uint32_t pss_idcode; /* 0x530 */
-            uint32_t ddr[8]; /* 0x600 - 0x620 - 0x604-missing */
-            uint32_t mio[54]; /* 0x700 - 0x7D4 */
-            uint32_t mio_func[4]; /* 0x800 - 0x810 */
-            uint32_t sd[2]; /* 0x830 - 0x834 */
-            uint32_t lvl_shftr_en; /* 0x900 */
-            uint32_t ocm_cfg; /* 0x910 */
-            uint32_t cpu_ram[8]; /* 0xA00 - 0xA1C */
-            uint32_t iou[7]; /* 0xA30 - 0xA48 */
-            uint32_t dmac_ram; /* 0xA50 */
-            uint32_t afi[4][3]; /* 0xA60 - 0xA8C */
-            uint32_t ocm[3]; /* 0xA90 - 0xA98 */
-            uint32_t devci_ram; /* 0xAA0 */
-            uint32_t csg_ram; /* 0xAB0 */
-            uint32_t gpiob[12]; /* 0xB00 - 0xB2C */
-            uint32_t ddriob[14]; /* 0xB40 - 0xB74 */
-        };
-        uint8_t data[0x1000];
-    };
+    uint32_t regs[ZYNQ_SLCR_NUM_REGS];
 } ZynqSLCRState;
 
 static void zynq_slcr_reset(DeviceState *d)
@@ -169,177 +187,169 @@ static void zynq_slcr_reset(DeviceState *d)
 
     DB_PRINT("RESET\n");
 
-    s->lockval = 1;
+    s->regs[LOCKSTA] = 1;
     /* 0x100 - 0x11C */
-    s->pll[ARM_PLL_CTRL] = 0x0001A008;
-    s->pll[DDR_PLL_CTRL] = 0x0001A008;
-    s->pll[IO_PLL_CTRL] = 0x0001A008;
-    s->pll[PLL_STATUS] = 0x0000003F;
-    s->pll[ARM_PPL_CFG] = 0x00014000;
-    s->pll[DDR_PLL_CFG] = 0x00014000;
-    s->pll[IO_PLL_CFG] = 0x00014000;
+    s->regs[ARM_PLL_CTRL]   = 0x0001A008;
+    s->regs[DDR_PLL_CTRL]   = 0x0001A008;
+    s->regs[IO_PLL_CTRL]    = 0x0001A008;
+    s->regs[PLL_STATUS]     = 0x0000003F;
+    s->regs[ARM_PLL_CFG]    = 0x00014000;
+    s->regs[DDR_PLL_CFG]    = 0x00014000;
+    s->regs[IO_PLL_CFG]     = 0x00014000;
 
     /* 0x120 - 0x16C */
-    s->clk[ARM_CLK_CTRL] = 0x1F000400;
-    s->clk[DDR_CLK_CTRL] = 0x18400003;
-    s->clk[DCI_CLK_CTRL] = 0x01E03201;
-    s->clk[APER_CLK_CTRL] = 0x01FFCCCD;
-    s->clk[USB0_CLK_CTRL] = s->clk[USB1_CLK_CTRL] = 0x00101941;
-    s->clk[GEM0_RCLK_CTRL] = s->clk[GEM1_RCLK_CTRL] = 0x00000001;
-    s->clk[GEM0_CLK_CTRL] = s->clk[GEM1_CLK_CTRL] = 0x00003C01;
-    s->clk[SMC_CLK_CTRL] = 0x00003C01;
-    s->clk[LQSPI_CLK_CTRL] = 0x00002821;
-    s->clk[SDIO_CLK_CTRL] = 0x00001E03;
-    s->clk[UART_CLK_CTRL] = 0x00003F03;
-    s->clk[SPI_CLK_CTRL] = 0x00003F03;
-    s->clk[CAN_CLK_CTRL] = 0x00501903;
-    s->clk[DBG_CLK_CTRL] = 0x00000F03;
-    s->clk[PCAP_CLK_CTRL] = 0x00000F01;
+    s->regs[ARM_CLK_CTRL]   = 0x1F000400;
+    s->regs[DDR_CLK_CTRL]   = 0x18400003;
+    s->regs[DCI_CLK_CTRL]   = 0x01E03201;
+    s->regs[APER_CLK_CTRL]  = 0x01FFCCCD;
+    s->regs[USB0_CLK_CTRL]  = s->regs[USB1_CLK_CTRL]    = 0x00101941;
+    s->regs[GEM0_RCLK_CTRL] = s->regs[GEM1_RCLK_CTRL]   = 0x00000001;
+    s->regs[GEM0_CLK_CTRL]  = s->regs[GEM1_CLK_CTRL]    = 0x00003C01;
+    s->regs[SMC_CLK_CTRL]   = 0x00003C01;
+    s->regs[LQSPI_CLK_CTRL] = 0x00002821;
+    s->regs[SDIO_CLK_CTRL]  = 0x00001E03;
+    s->regs[UART_CLK_CTRL]  = 0x00003F03;
+    s->regs[SPI_CLK_CTRL]   = 0x00003F03;
+    s->regs[CAN_CLK_CTRL]   = 0x00501903;
+    s->regs[DBG_CLK_CTRL]   = 0x00000F03;
+    s->regs[PCAP_CLK_CTRL]  = 0x00000F01;
 
     /* 0x170 - 0x1AC */
-    s->fpga[0][CLK_CTRL] = s->fpga[1][CLK_CTRL] = s->fpga[2][CLK_CTRL] =
-            s->fpga[3][CLK_CTRL] = 0x00101800;
-    s->fpga[0][THR_STA] = s->fpga[1][THR_STA] = s->fpga[2][THR_STA] =
-            s->fpga[3][THR_STA] = 0x00010000;
+    s->regs[FPGA0_CLK_CTRL] = s->regs[FPGA1_CLK_CTRL] = s->regs[FPGA2_CLK_CTRL]
+                            = s->regs[FPGA3_CLK_CTRL] = 0x00101800;
+    s->regs[FPGA0_THR_STA] = s->regs[FPGA1_THR_STA] = s->regs[FPGA2_THR_STA]
+                           = s->regs[FPGA3_THR_STA] = 0x00010000;
 
     /* 0x1B0 - 0x1D8 */
-    s->misc[BANDGAP_TRIP] = 0x0000001F;
-    s->misc[PLL_PREDIVISOR] = 0x00000001;
-    s->misc[CLK_621_TRUE] = 0x00000001;
+    s->regs[BANDGAP_TRIP]   = 0x0000001F;
+    s->regs[PLL_PREDIVISOR] = 0x00000001;
+    s->regs[CLK_621_TRUE]   = 0x00000001;
 
     /* 0x200 - 0x25C */
-    s->reset[FPGA] = 0x01F33F0F;
-    s->reset[RST_REASON] = 0x00000040;
+    s->regs[FPGA_RST_CTRL]  = 0x01F33F0F;
+    s->regs[RST_REASON]     = 0x00000040;
+
+    s->regs[BOOT_MODE]      = 0x00000001;
 
     /* 0x700 - 0x7D4 */
     for (i = 0; i < 54; i++) {
-        s->mio[i] = 0x00001601;
+        s->regs[MIO + i] = 0x00001601;
     }
     for (i = 2; i <= 8; i++) {
-        s->mio[i] = 0x00000601;
+        s->regs[MIO + i] = 0x00000601;
     }
 
-    /* MIO_MST_TRI0, MIO_MST_TRI1 */
-    s->mio_func[2] = s->mio_func[3] = 0xFFFFFFFF;
+    s->regs[MIO_MST_TRI0] = s->regs[MIO_MST_TRI1] = 0xFFFFFFFF;
 
-    s->cpu_ram[0] = s->cpu_ram[1] = s->cpu_ram[3] =
-            s->cpu_ram[4] = s->cpu_ram[7] = 0x00010101;
-    s->cpu_ram[2] = s->cpu_ram[5] = 0x01010101;
-    s->cpu_ram[6] = 0x00000001;
+    s->regs[CPU_RAM + 0] = s->regs[CPU_RAM + 1] = s->regs[CPU_RAM + 3]
+                         = s->regs[CPU_RAM + 4] = s->regs[CPU_RAM + 7]
+                         = 0x00010101;
+    s->regs[CPU_RAM + 2] = s->regs[CPU_RAM + 5] = 0x01010101;
+    s->regs[CPU_RAM + 6] = 0x00000001;
 
-    s->iou[0] = s->iou[1] = s->iou[2] = s->iou[3] = 0x09090909;
-    s->iou[4] = s->iou[5] = 0x00090909;
-    s->iou[6] = 0x00000909;
+    s->regs[IOU + 0] = s->regs[IOU + 1] = s->regs[IOU + 2] = s->regs[IOU + 3]
+                     = 0x09090909;
+    s->regs[IOU + 4] = s->regs[IOU + 5] = 0x00090909;
+    s->regs[IOU + 6] = 0x00000909;
 
-    s->dmac_ram = 0x00000009;
+    s->regs[DMAC_RAM] = 0x00000009;
 
-    s->afi[0][0] = s->afi[0][1] = 0x09090909;
-    s->afi[1][0] = s->afi[1][1] = 0x09090909;
-    s->afi[2][0] = s->afi[2][1] = 0x09090909;
-    s->afi[3][0] = s->afi[3][1] = 0x09090909;
-    s->afi[0][2] = s->afi[1][2] = s->afi[2][2] = s->afi[3][2] = 0x00000909;
+    s->regs[AFI0 + 0] = s->regs[AFI0 + 1] = 0x09090909;
+    s->regs[AFI1 + 0] = s->regs[AFI1 + 1] = 0x09090909;
+    s->regs[AFI2 + 0] = s->regs[AFI2 + 1] = 0x09090909;
+    s->regs[AFI3 + 0] = s->regs[AFI3 + 1] = 0x09090909;
+    s->regs[AFI0 + 2] = s->regs[AFI1 + 2] = s->regs[AFI2 + 2]
+                      = s->regs[AFI3 + 2] = 0x00000909;
 
-    s->ocm[0] = 0x01010101;
-    s->ocm[1] = s->ocm[2] = 0x09090909;
+    s->regs[OCM + 0]    = 0x01010101;
+    s->regs[OCM + 1]    = s->regs[OCM + 2] = 0x09090909;
 
-    s->devci_ram = 0x00000909;
-    s->csg_ram = 0x00000001;
+    s->regs[DEVCI_RAM]  = 0x00000909;
+    s->regs[CSG_RAM]    = 0x00000001;
 
-    s->ddriob[0] = s->ddriob[1] = s->ddriob[2] = s->ddriob[3] = 0x00000e00;
-    s->ddriob[4] = s->ddriob[5] = s->ddriob[6] = 0x00000e00;
-    s->ddriob[12] = 0x00000021;
+    s->regs[DDRIOB + 0] = s->regs[DDRIOB + 1] = s->regs[DDRIOB + 2]
+                        = s->regs[DDRIOB + 3] = 0x00000e00;
+    s->regs[DDRIOB + 4] = s->regs[DDRIOB + 5] = s->regs[DDRIOB + 6]
+                        = 0x00000e00;
+    s->regs[DDRIOB + 12] = 0x00000021;
 }
 
-static inline uint32_t zynq_slcr_read_imp(void *opaque,
-    hwaddr offset)
-{
-    ZynqSLCRState *s = (ZynqSLCRState *)opaque;
 
+static bool zynq_slcr_check_offset(hwaddr offset, bool rnw)
+{
     switch (offset) {
-    case 0x0: /* SCL */
-        return s->scl;
-    case 0x4: /* LOCK */
-    case 0x8: /* UNLOCK */
-        DB_PRINT("Reading SCLR_LOCK/UNLOCK is not enabled\n");
-        return 0;
-    case 0x0C: /* LOCKSTA */
-        return s->lockval;
-    case 0x100 ... 0x11C:
-        return s->pll[(offset - 0x100) / 4];
-    case 0x120 ... 0x16C:
-        return s->clk[(offset - 0x120) / 4];
-    case 0x170 ... 0x1AC:
-        return s->fpga[0][(offset - 0x170) / 4];
-    case 0x1B0 ... 0x1D8:
-        return s->misc[(offset - 0x1B0) / 4];
-    case 0x200 ... 0x258:
-        return s->reset[(offset - 0x200) / 4];
-    case 0x25c:
-        return 1;
-    case 0x300:
-        return s->apu_ctrl;
-    case 0x304:
-        return s->wdt_clk_sel;
-    case 0x400 ... 0x408:
-        return s->tz_ocm[(offset - 0x400) / 4];
-    case 0x430:
-        return s->tz_ddr;
-    case 0x440 ... 0x448:
-        return s->tz_dma[(offset - 0x440) / 4];
-    case 0x450 ... 0x458:
-        return s->tz_misc[(offset - 0x450) / 4];
-    case 0x484 ... 0x488:
-        return s->tz_fpga[(offset - 0x484) / 4];
-    case 0x500:
-        return s->dbg_ctrl;
-    case 0x530:
-        return s->pss_idcode;
-    case 0x600 ... 0x620:
-        if (offset == 0x604) {
-            goto bad_reg;
-        }
-        return s->ddr[(offset - 0x600) / 4];
-    case 0x700 ... 0x7D4:
-        return s->mio[(offset - 0x700) / 4];
-    case 0x800 ... 0x810:
-        return s->mio_func[(offset - 0x800) / 4];
-    case 0x830 ... 0x834:
-        return s->sd[(offset - 0x830) / 4];
-    case 0x900:
-        return s->lvl_shftr_en;
-    case 0x910:
-        return s->ocm_cfg;
-    case 0xA00 ... 0xA1C:
-        return s->cpu_ram[(offset - 0xA00) / 4];
-    case 0xA30 ... 0xA48:
-        return s->iou[(offset - 0xA30) / 4];
-    case 0xA50:
-        return s->dmac_ram;
-    case 0xA60 ... 0xA8C:
-        return s->afi[0][(offset - 0xA60) / 4];
-    case 0xA90 ... 0xA98:
-        return s->ocm[(offset - 0xA90) / 4];
-    case 0xAA0:
-        return s->devci_ram;
-    case 0xAB0:
-        return s->csg_ram;
-    case 0xB00 ... 0xB2C:
-        return s->gpiob[(offset - 0xB00) / 4];
-    case 0xB40 ... 0xB74:
-        return s->ddriob[(offset - 0xB40) / 4];
+    case LOCK:
+    case UNLOCK:
+    case DDR_CAL_START:
+    case DDR_REF_START:
+        return !rnw; /* Write only */
+    case LOCKSTA:
+    case FPGA0_THR_STA:
+    case FPGA1_THR_STA:
+    case FPGA2_THR_STA:
+    case FPGA3_THR_STA:
+    case BOOT_MODE:
+    case PSS_IDCODE:
+    case DDR_CMD_STA:
+    case DDR_DFI_STATUS:
+    case PLL_STATUS:
+        return rnw;/* read only */
+    case SCL:
+    case ARM_PLL_CTRL ... IO_PLL_CTRL:
+    case ARM_PLL_CFG ... IO_PLL_CFG:
+    case ARM_CLK_CTRL ... TOPSW_CLK_CTRL:
+    case FPGA0_CLK_CTRL ... FPGA0_THR_CNT:
+    case FPGA1_CLK_CTRL ... FPGA1_THR_CNT:
+    case FPGA2_CLK_CTRL ... FPGA2_THR_CNT:
+    case FPGA3_CLK_CTRL ... FPGA3_THR_CNT:
+    case BANDGAP_TRIP:
+    case PLL_PREDIVISOR:
+    case CLK_621_TRUE:
+    case PSS_RST_CTRL ... A9_CPU_RST_CTRL:
+    case RS_AWDT_CTRL:
+    case RST_REASON:
+    case REBOOT_STATUS:
+    case APU_CTRL:
+    case WDT_CLK_SEL:
+    case TZ_DMA_NS ... TZ_DMA_PERIPH_NS:
+    case DDR_URGENT:
+    case DDR_URGENT_SEL:
+    case MIO ... MIO + MIO_LENGTH - 1:
+    case MIO_LOOPBACK ... MIO_MST_TRI1:
+    case SD0_WP_CD_SEL:
+    case SD1_WP_CD_SEL:
+    case LVL_SHFTR_EN:
+    case OCM_CFG:
+    case CPU_RAM:
+    case IOU:
+    case DMAC_RAM:
+    case AFI0 ... AFI3 + AFI_LENGTH - 1:
+    case OCM:
+    case DEVCI_RAM:
+    case CSG_RAM:
+    case GPIOB_CTRL ... GPIOB_CFG_CMOS33:
+    case GPIOB_CFG_HSTL:
+    case GPIOB_DRVR_BIAS_CTRL:
+    case DDRIOB ... DDRIOB + DDRIOB_LENGTH - 1:
+        return true;
     default:
-    bad_reg:
-        DB_PRINT("Bad register offset 0x%x\n", (int)offset);
-        return 0;
+        return false;
     }
 }
 
 static uint64_t zynq_slcr_read(void *opaque, hwaddr offset,
     unsigned size)
 {
-    uint32_t ret = zynq_slcr_read_imp(opaque, offset);
+    ZynqSLCRState *s = opaque;
+    offset /= 4;
+    uint32_t ret = s->regs[offset];
+
+    if (!zynq_slcr_check_offset(offset, true)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "zynq_slcr: Invalid read access to "
+                      " addr %" HWADDR_PRIx "\n", offset * 4);
+    }
 
-    DB_PRINT("addr: %08x data: %08x\n", (unsigned)offset, (unsigned)ret);
+    DB_PRINT("addr: %08" HWADDR_PRIx " data: %08" PRIx32 "\n", offset * 4, ret);
     return ret;
 }
 
@@ -347,148 +357,55 @@ static void zynq_slcr_write(void *opaque, hwaddr offset,
                           uint64_t val, unsigned size)
 {
     ZynqSLCRState *s = (ZynqSLCRState *)opaque;
+    offset /= 4;
 
-    DB_PRINT("offset: %08x data: %08x\n", (unsigned)offset, (unsigned)val);
+    DB_PRINT("addr: %08" HWADDR_PRIx " data: %08" PRIx64 "\n", offset * 4, val);
+
+    if (!zynq_slcr_check_offset(offset, false)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "zynq_slcr: Invalid write access to "
+                      "addr %" HWADDR_PRIx "\n", offset * 4);
+        return;
+    }
 
     switch (offset) {
-    case 0x00: /* SCL */
-        s->scl = val & 0x1;
-    return;
-    case 0x4: /* SLCR_LOCK */
+    case SCL:
+        s->regs[SCL] = val & 0x1;
+        return;
+    case LOCK:
         if ((val & 0xFFFF) == XILINX_LOCK_KEY) {
             DB_PRINT("XILINX LOCK 0xF8000000 + 0x%x <= 0x%x\n", (int)offset,
                 (unsigned)val & 0xFFFF);
-            s->lockval = 1;
+            s->regs[LOCKSTA] = 1;
         } else {
             DB_PRINT("WRONG XILINX LOCK KEY 0xF8000000 + 0x%x <= 0x%x\n",
                 (int)offset, (unsigned)val & 0xFFFF);
         }
         return;
-    case 0x8: /* SLCR_UNLOCK */
+    case UNLOCK:
         if ((val & 0xFFFF) == XILINX_UNLOCK_KEY) {
             DB_PRINT("XILINX UNLOCK 0xF8000000 + 0x%x <= 0x%x\n", (int)offset,
                 (unsigned)val & 0xFFFF);
-            s->lockval = 0;
+            s->regs[LOCKSTA] = 0;
         } else {
             DB_PRINT("WRONG XILINX UNLOCK KEY 0xF8000000 + 0x%x <= 0x%x\n",
                 (int)offset, (unsigned)val & 0xFFFF);
         }
         return;
-    case 0xc: /* LOCKSTA */
-        DB_PRINT("Writing SCLR_LOCKSTA is not enabled\n");
-        return;
     }
 
-    if (!s->lockval) {
-        switch (offset) {
-        case 0x100 ... 0x11C:
-            if (offset == 0x10C) {
-                goto bad_reg;
-            }
-            s->pll[(offset - 0x100) / 4] = val;
-            break;
-        case 0x120 ... 0x16C:
-            s->clk[(offset - 0x120) / 4] = val;
-            break;
-        case 0x170 ... 0x1AC:
-            s->fpga[0][(offset - 0x170) / 4] = val;
-            break;
-        case 0x1B0 ... 0x1D8:
-            s->misc[(offset - 0x1B0) / 4] = val;
-            break;
-        case 0x200 ... 0x25C:
-            if (offset == 0x250) {
-                goto bad_reg;
-            }
-            s->reset[(offset - 0x200) / 4] = val;
-            if (offset == 0x200 && (val & R_PSS_RST_CTRL_SOFT_RST)) {
-                qemu_system_reset_request();
-            }
-            break;
-        case 0x300:
-            s->apu_ctrl = val;
-            break;
-        case 0x304:
-            s->wdt_clk_sel = val;
-            break;
-        case 0x400 ... 0x408:
-            s->tz_ocm[(offset - 0x400) / 4] = val;
-            break;
-        case 0x430:
-            s->tz_ddr = val;
-            break;
-        case 0x440 ... 0x448:
-            s->tz_dma[(offset - 0x440) / 4] = val;
-            break;
-        case 0x450 ... 0x458:
-            s->tz_misc[(offset - 0x450) / 4] = val;
-            break;
-        case 0x484 ... 0x488:
-            s->tz_fpga[(offset - 0x484) / 4] = val;
-            break;
-        case 0x500:
-            s->dbg_ctrl = val;
-            break;
-        case 0x530:
-            s->pss_idcode = val;
-            break;
-        case 0x600 ... 0x620:
-            if (offset == 0x604) {
-                goto bad_reg;
-            }
-            s->ddr[(offset - 0x600) / 4] = val;
-            break;
-        case 0x700 ... 0x7D4:
-            s->mio[(offset - 0x700) / 4] = val;
-            break;
-        case 0x800 ... 0x810:
-            s->mio_func[(offset - 0x800) / 4] = val;
-            break;
-        case 0x830 ... 0x834:
-            s->sd[(offset - 0x830) / 4] = val;
-            break;
-        case 0x900:
-            s->lvl_shftr_en = val;
-            break;
-        case 0x910:
-            break;
-        case 0xA00 ... 0xA1C:
-            s->cpu_ram[(offset - 0xA00) / 4] = val;
-            break;
-        case 0xA30 ... 0xA48:
-            s->iou[(offset - 0xA30) / 4] = val;
-            break;
-        case 0xA50:
-            s->dmac_ram = val;
-            break;
-        case 0xA60 ... 0xA8C:
-            s->afi[0][(offset - 0xA60) / 4] = val;
-            break;
-        case 0xA90:
-            s->ocm[0] = val;
-            break;
-        case 0xAA0:
-            s->devci_ram = val;
-            break;
-        case 0xAB0:
-            s->csg_ram = val;
-            break;
-        case 0xB00 ... 0xB2C:
-            if (offset == 0xB20 || offset == 0xB2C) {
-                goto bad_reg;
-            }
-            s->gpiob[(offset - 0xB00) / 4] = val;
-            break;
-        case 0xB40 ... 0xB74:
-            s->ddriob[(offset - 0xB40) / 4] = val;
-            break;
-        default:
-        bad_reg:
-            DB_PRINT("Bad register write %x <= %08x\n", (int)offset,
-                     (unsigned)val);
-        }
+    if (!s->regs[LOCKSTA]) {
+        s->regs[offset / 4] = val;
     } else {
         DB_PRINT("SCLR registers are locked. Unlock them first\n");
+        return;
+    }
+
+    switch (offset) {
+    case PSS_RST_CTRL:
+        if (val & R_PSS_RST_CTRL_SOFT_RST) {
+            qemu_system_reset_request();
+        }
+        break;
     }
 }
 
@@ -498,23 +415,22 @@ static const MemoryRegionOps slcr_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static int zynq_slcr_init(SysBusDevice *dev)
+static void zynq_slcr_init(Object *obj)
 {
-    ZynqSLCRState *s = ZYNQ_SLCR(dev);
-
-    memory_region_init_io(&s->iomem, OBJECT(s), &slcr_ops, s, "slcr", 0x1000);
-    sysbus_init_mmio(dev, &s->iomem);
+    ZynqSLCRState *s = ZYNQ_SLCR(obj);
 
-    return 0;
+    memory_region_init_io(&s->iomem, obj, &slcr_ops, s, "slcr",
+                          ZYNQ_SLCR_MMIO_SIZE);
+    sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->iomem);
 }
 
 static const VMStateDescription vmstate_zynq_slcr = {
     .name = "zynq_slcr",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .minimum_version_id_old = 2,
     .fields      = (VMStateField[]) {
-        VMSTATE_UINT8_ARRAY(data, ZynqSLCRState, 0x1000),
+        VMSTATE_UINT32_ARRAY(regs, ZynqSLCRState, ZYNQ_SLCR_NUM_REGS),
         VMSTATE_END_OF_LIST()
     }
 };
@@ -522,9 +438,7 @@ static const VMStateDescription vmstate_zynq_slcr = {
 static void zynq_slcr_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    SysBusDeviceClass *sdc = SYS_BUS_DEVICE_CLASS(klass);
 
-    sdc->init = zynq_slcr_init;
     dc->vmsd = &vmstate_zynq_slcr;
     dc->reset = zynq_slcr_reset;
 }
@@ -534,6 +448,7 @@ static const TypeInfo zynq_slcr_info = {
     .name  = TYPE_ZYNQ_SLCR,
     .parent = TYPE_SYS_BUS_DEVICE,
     .instance_size  = sizeof(ZynqSLCRState),
+    .instance_init = zynq_slcr_init,
 };
 
 static void zynq_slcr_register_types(void)
diff --git a/hw/net/allwinner_emac.c b/hw/net/allwinner_emac.c
index 469f2f0ede..d780ba0fcb 100644
--- a/hw/net/allwinner_emac.c
+++ b/hw/net/allwinner_emac.c
@@ -27,11 +27,11 @@ static uint8_t padding[60];
 static void mii_set_link(RTL8201CPState *mii, bool link_ok)
 {
     if (link_ok) {
-        mii->bmsr |= MII_BMSR_LINK_ST;
+        mii->bmsr |= MII_BMSR_LINK_ST | MII_BMSR_AN_COMP;
         mii->anlpar |= MII_ANAR_TXFD | MII_ANAR_10FD | MII_ANAR_10 |
                        MII_ANAR_CSMACD;
     } else {
-        mii->bmsr &= ~MII_BMSR_LINK_ST;
+        mii->bmsr &= ~(MII_BMSR_LINK_ST | MII_BMSR_AN_COMP);
         mii->anlpar = MII_ANAR_TX;
     }
 }
@@ -391,9 +391,11 @@ static void aw_emac_write(void *opaque, hwaddr offset, uint64_t value,
         break;
     case EMAC_INT_CTL_REG:
         s->int_ctl = value;
+        aw_emac_update_irq(s);
         break;
     case EMAC_INT_STA_REG:
         s->int_sta &= ~value;
+        aw_emac_update_irq(s);
         break;
     case EMAC_MAC_MADR_REG:
         s->phy_target = value;
diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c
index 92dc2f21fa..e34b25e734 100644
--- a/hw/net/cadence_gem.c
+++ b/hw/net/cadence_gem.c
@@ -1093,7 +1093,7 @@ static uint64_t gem_read(void *opaque, hwaddr offset, unsigned size)
             uint32_t phy_addr, reg_num;
 
             phy_addr = (retval & GEM_PHYMNTNC_ADDR) >> GEM_PHYMNTNC_ADDR_SHFT;
-            if (phy_addr == BOARD_PHY_ADDRESS) {
+            if (phy_addr == BOARD_PHY_ADDRESS || phy_addr == 0) {
                 reg_num = (retval & GEM_PHYMNTNC_REG) >> GEM_PHYMNTNC_REG_SHIFT;
                 retval &= 0xFFFF0000;
                 retval |= gem_phy_read(s, reg_num);
@@ -1193,7 +1193,7 @@ static void gem_write(void *opaque, hwaddr offset, uint64_t val,
             uint32_t phy_addr, reg_num;
 
             phy_addr = (val & GEM_PHYMNTNC_ADDR) >> GEM_PHYMNTNC_ADDR_SHFT;
-            if (phy_addr == BOARD_PHY_ADDRESS) {
+            if (phy_addr == BOARD_PHY_ADDRESS || phy_addr == 0) {
                 reg_num = (val & GEM_PHYMNTNC_REG) >> GEM_PHYMNTNC_REG_SHIFT;
                 gem_phy_write(s, reg_num, val);
             }
diff --git a/hw/timer/allwinner-a10-pit.c b/hw/timer/allwinner-a10-pit.c
index b27fce8cd2..d3c02ea18f 100644
--- a/hw/timer/allwinner-a10-pit.c
+++ b/hw/timer/allwinner-a10-pit.c
@@ -19,6 +19,15 @@
 #include "sysemu/sysemu.h"
 #include "hw/timer/allwinner-a10-pit.h"
 
+static void a10_pit_update_irq(AwA10PITState *s)
+{
+    int i;
+
+    for (i = 0; i < AW_A10_PIT_TIMER_NR; i++) {
+        qemu_set_irq(s->irq[i], !!(s->irq_status & s->irq_enable & (1 << i)));
+    }
+}
+
 static uint64_t a10_pit_read(void *opaque, hwaddr offset, unsigned size)
 {
     AwA10PITState *s = AW_A10_PIT(opaque);
@@ -65,6 +74,22 @@ static uint64_t a10_pit_read(void *opaque, hwaddr offset, unsigned size)
     return 0;
 }
 
+static void a10_pit_set_freq(AwA10PITState *s, int index)
+{
+    uint32_t prescaler, source, source_freq;
+
+    prescaler = 1 << extract32(s->control[index], 4, 3);
+    source = extract32(s->control[index], 2, 2);
+    source_freq = s->clk_freq[source];
+
+    if (source_freq) {
+        ptimer_set_freq(s->timer[index], source_freq / prescaler);
+    } else {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Invalid clock source %u\n",
+                      __func__, source);
+    }
+}
+
 static void a10_pit_write(void *opaque, hwaddr offset, uint64_t value,
                             unsigned size)
 {
@@ -74,9 +99,11 @@ static void a10_pit_write(void *opaque, hwaddr offset, uint64_t value,
     switch (offset) {
     case AW_A10_PIT_TIMER_IRQ_EN:
         s->irq_enable = value;
+        a10_pit_update_irq(s);
         break;
     case AW_A10_PIT_TIMER_IRQ_ST:
         s->irq_status &= ~value;
+        a10_pit_update_irq(s);
         break;
     case AW_A10_PIT_TIMER_BASE ... AW_A10_PIT_TIMER_BASE_END:
         index = offset & 0xf0;
@@ -85,6 +112,7 @@ static void a10_pit_write(void *opaque, hwaddr offset, uint64_t value,
         switch (offset & 0x0f) {
         case AW_A10_PIT_TIMER_CONTROL:
             s->control[index] = value;
+            a10_pit_set_freq(s, index);
             if (s->control[index] & AW_A10_PIT_TIMER_RELOAD) {
                 ptimer_set_count(s->timer[index], s->interval[index]);
             }
@@ -150,6 +178,14 @@ static const MemoryRegionOps a10_pit_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
+static Property a10_pit_properties[] = {
+    DEFINE_PROP_UINT32("clk0-freq", AwA10PITState, clk_freq[0], 0),
+    DEFINE_PROP_UINT32("clk1-freq", AwA10PITState, clk_freq[1], 0),
+    DEFINE_PROP_UINT32("clk2-freq", AwA10PITState, clk_freq[2], 0),
+    DEFINE_PROP_UINT32("clk3-freq", AwA10PITState, clk_freq[3], 0),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static const VMStateDescription vmstate_a10_pit = {
     .name = "a10.pit",
     .version_id = 1,
@@ -178,11 +214,14 @@ static void a10_pit_reset(DeviceState *dev)
 
     s->irq_enable = 0;
     s->irq_status = 0;
+    a10_pit_update_irq(s);
+
     for (i = 0; i < 6; i++) {
         s->control[i] = AW_A10_PIT_DEFAULT_CLOCK;
         s->interval[i] = 0;
         s->count[i] = 0;
         ptimer_stop(s->timer[i]);
+        a10_pit_set_freq(s, i);
     }
     s->watch_dog_mode = 0;
     s->watch_dog_control = 0;
@@ -193,18 +232,17 @@ static void a10_pit_reset(DeviceState *dev)
 
 static void a10_pit_timer_cb(void *opaque)
 {
-    AwA10PITState *s = AW_A10_PIT(opaque);
-    uint8_t i;
+    AwA10TimerContext *tc = opaque;
+    AwA10PITState *s = tc->container;
+    uint8_t i = tc->index;
 
-    for (i = 0; i < AW_A10_PIT_TIMER_NR; i++) {
-        if (s->control[i] & AW_A10_PIT_TIMER_EN) {
-            s->irq_status |= 1 << i;
-            if (s->control[i] & AW_A10_PIT_TIMER_MODE) {
-                ptimer_stop(s->timer[i]);
-                s->control[i] &= ~AW_A10_PIT_TIMER_EN;
-            }
-            qemu_irq_pulse(s->irq[i]);
+    if (s->control[i] & AW_A10_PIT_TIMER_EN) {
+        s->irq_status |= 1 << i;
+        if (s->control[i] & AW_A10_PIT_TIMER_MODE) {
+            ptimer_stop(s->timer[i]);
+            s->control[i] &= ~AW_A10_PIT_TIMER_EN;
         }
+        a10_pit_update_irq(s);
     }
 }
 
@@ -223,9 +261,12 @@ static void a10_pit_init(Object *obj)
     sysbus_init_mmio(sbd, &s->iomem);
 
     for (i = 0; i < AW_A10_PIT_TIMER_NR; i++) {
-        bh[i] = qemu_bh_new(a10_pit_timer_cb, s);
+        AwA10TimerContext *tc = &s->timer_context[i];
+
+        tc->container = s;
+        tc->index = i;
+        bh[i] = qemu_bh_new(a10_pit_timer_cb, tc);
         s->timer[i] = ptimer_init(bh[i]);
-        ptimer_set_freq(s->timer[i], 240000);
     }
 }
 
@@ -234,6 +275,7 @@ static void a10_pit_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->reset = a10_pit_reset;
+    dc->props = a10_pit_properties;
     dc->desc = "allwinner a10 timer";
     dc->vmsd = &vmstate_a10_pit;
 }
diff --git a/hw/timer/cadence_ttc.c b/hw/timer/cadence_ttc.c
index a279bced78..28cb328f9b 100644
--- a/hw/timer/cadence_ttc.c
+++ b/hw/timer/cadence_ttc.c
@@ -346,11 +346,13 @@ static void cadence_ttc_write(void *opaque, hwaddr offset,
     case 0x34:
     case 0x38:
         s->reg_match[0] = value & 0xffff;
+        break;
 
     case 0x3c: /* match register */
     case 0x40:
     case 0x44:
         s->reg_match[1] = value & 0xffff;
+        break;
 
     case 0x48: /* match register */
     case 0x4c:
diff --git a/include/exec/softmmu_exec.h b/include/exec/softmmu_exec.h
index 6fde154527..470db20174 100644
--- a/include/exec/softmmu_exec.h
+++ b/include/exec/softmmu_exec.h
@@ -162,3 +162,55 @@
 #define stw(p, v) stw_data(p, v)
 #define stl(p, v) stl_data(p, v)
 #define stq(p, v) stq_data(p, v)
+
+/**
+ * tlb_vaddr_to_host:
+ * @env: CPUArchState
+ * @addr: guest virtual address to look up
+ * @access_type: 0 for read, 1 for write, 2 for execute
+ * @mmu_idx: MMU index to use for lookup
+ *
+ * Look up the specified guest virtual index in the TCG softmmu TLB.
+ * If the TLB contains a host virtual address suitable for direct RAM
+ * access, then return it. Otherwise (TLB miss, TLB entry is for an
+ * I/O access, etc) return NULL.
+ *
+ * This is the equivalent of the initial fast-path code used by
+ * TCG backends for guest load and store accesses.
+ */
+static inline void *tlb_vaddr_to_host(CPUArchState *env, target_ulong addr,
+                                      int access_type, int mmu_idx)
+{
+    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    CPUTLBEntry *tlbentry = &env->tlb_table[mmu_idx][index];
+    target_ulong tlb_addr;
+    uintptr_t haddr;
+
+    switch (access_type) {
+    case 0:
+        tlb_addr = tlbentry->addr_read;
+        break;
+    case 1:
+        tlb_addr = tlbentry->addr_write;
+        break;
+    case 2:
+        tlb_addr = tlbentry->addr_code;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    if ((addr & TARGET_PAGE_MASK)
+        != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+        /* TLB entry is for a different page */
+        return NULL;
+    }
+
+    if (tlb_addr & ~TARGET_PAGE_MASK) {
+        /* IO access */
+        return NULL;
+    }
+
+    haddr = addr + env->tlb_table[mmu_idx][index].addend;
+    return (void *)haddr;
+}
diff --git a/include/hw/net/allwinner_emac.h b/include/hw/net/allwinner_emac.h
index a5e944af05..5ae7717300 100644
--- a/include/hw/net/allwinner_emac.h
+++ b/include/hw/net/allwinner_emac.h
@@ -144,6 +144,7 @@
 #define MII_BMSR_10T_FD     (1 << 12)
 #define MII_BMSR_10T_HD     (1 << 11)
 #define MII_BMSR_MFPS       (1 << 6)
+#define MII_BMSR_AN_COMP    (1 << 5)
 #define MII_BMSR_AUTONEG    (1 << 3)
 #define MII_BMSR_LINK_ST    (1 << 2)
 
diff --git a/include/hw/timer/allwinner-a10-pit.h b/include/hw/timer/allwinner-a10-pit.h
index 15efab8b5f..770bdc03c1 100644
--- a/include/hw/timer/allwinner-a10-pit.h
+++ b/include/hw/timer/allwinner-a10-pit.h
@@ -35,13 +35,22 @@
 
 #define AW_A10_PIT_DEFAULT_CLOCK   0x4
 
-typedef struct AwA10PITState {
+typedef struct AwA10PITState AwA10PITState;
+
+typedef struct AwA10TimerContext {
+    AwA10PITState *container;
+    int index;
+} AwA10TimerContext;
+
+struct AwA10PITState {
     /*< private >*/
     SysBusDevice parent_obj;
     /*< public >*/
     qemu_irq irq[AW_A10_PIT_TIMER_NR];
     ptimer_state * timer[AW_A10_PIT_TIMER_NR];
+    AwA10TimerContext timer_context[AW_A10_PIT_TIMER_NR];
     MemoryRegion iomem;
+    uint32_t clk_freq[4];
 
     uint32_t irq_enable;
     uint32_t irq_status;
@@ -53,6 +62,6 @@ typedef struct AwA10PITState {
     uint32_t count_lo;
     uint32_t count_hi;
     uint32_t count_ctl;
-} AwA10PITState;
+};
 
 #endif
diff --git a/linux-user/main.c b/linux-user/main.c
index af924dcc9d..947358a886 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -483,17 +483,17 @@ static void arm_kernel_cmpxchg64_helper(CPUARMState *env)
     addr = env->regs[2];
 
     if (get_user_u64(oldval, env->regs[0])) {
-        env->cp15.c6_data = env->regs[0];
+        env->exception.vaddress = env->regs[0];
         goto segv;
     };
 
     if (get_user_u64(newval, env->regs[1])) {
-        env->cp15.c6_data = env->regs[1];
+        env->exception.vaddress = env->regs[1];
         goto segv;
     };
 
     if (get_user_u64(val, addr)) {
-        env->cp15.c6_data = addr;
+        env->exception.vaddress = addr;
         goto segv;
     }
 
@@ -501,7 +501,7 @@ static void arm_kernel_cmpxchg64_helper(CPUARMState *env)
         val = newval;
 
         if (put_user_u64(val, addr)) {
-            env->cp15.c6_data = addr;
+            env->exception.vaddress = addr;
             goto segv;
         };
 
@@ -523,7 +523,7 @@ segv:
     info.si_errno = 0;
     /* XXX: check env->error_code */
     info.si_code = TARGET_SEGV_MAPERR;
-    info._sifields._sigfault._addr = env->cp15.c6_data;
+    info._sifields._sigfault._addr = env->exception.vaddress;
     queue_signal(env, info.si_signo, &info);
 
     end_exclusive();
@@ -620,14 +620,14 @@ static int do_strex(CPUARMState *env)
         abort();
     }
     if (segv) {
-        env->cp15.c6_data = addr;
+        env->exception.vaddress = addr;
         goto done;
     }
     if (size == 3) {
         uint32_t valhi;
         segv = get_user_u32(valhi, addr + 4);
         if (segv) {
-            env->cp15.c6_data = addr + 4;
+            env->exception.vaddress = addr + 4;
             goto done;
         }
         val = deposit64(val, 32, 32, valhi);
@@ -650,14 +650,14 @@ static int do_strex(CPUARMState *env)
         break;
     }
     if (segv) {
-        env->cp15.c6_data = addr;
+        env->exception.vaddress = addr;
         goto done;
     }
     if (size == 3) {
         val = env->regs[(env->exclusive_info >> 12) & 0xf];
         segv = put_user_u32(val, addr + 4);
         if (segv) {
-            env->cp15.c6_data = addr + 4;
+            env->exception.vaddress = addr + 4;
             goto done;
         }
     }
@@ -832,12 +832,14 @@ void cpu_loop(CPUARMState *env)
         case EXCP_INTERRUPT:
             /* just indicate that signals should be handled asap */
             break;
+        case EXCP_STREX:
+            if (!do_strex(env)) {
+                break;
+            }
+            /* fall through for segv */
         case EXCP_PREFETCH_ABORT:
-            addr = env->cp15.c6_insn;
-            goto do_segv;
         case EXCP_DATA_ABORT:
-            addr = env->cp15.c6_data;
-        do_segv:
+            addr = env->exception.vaddress;
             {
                 info.si_signo = SIGSEGV;
                 info.si_errno = 0;
@@ -865,12 +867,6 @@ void cpu_loop(CPUARMState *env)
             if (do_kernel_trap(env))
               goto error;
             break;
-        case EXCP_STREX:
-            if (do_strex(env)) {
-                addr = env->cp15.c6_data;
-                goto do_segv;
-            }
-            break;
         default:
         error:
             fprintf(stderr, "qemu: unhandled CPU exception 0x%x - aborting\n",
@@ -933,7 +929,7 @@ static int do_strex_a64(CPUARMState *env)
         abort();
     }
     if (segv) {
-        env->cp15.c6_data = addr;
+        env->exception.vaddress = addr;
         goto error;
     }
     if (val != env->exclusive_val) {
@@ -946,7 +942,7 @@ static int do_strex_a64(CPUARMState *env)
             segv = get_user_u64(val, addr + 8);
         }
         if (segv) {
-            env->cp15.c6_data = addr + (size == 2 ? 4 : 8);
+            env->exception.vaddress = addr + (size == 2 ? 4 : 8);
             goto error;
         }
         if (val != env->exclusive_high) {
@@ -981,7 +977,7 @@ static int do_strex_a64(CPUARMState *env)
             segv = put_user_u64(val, addr + 8);
         }
         if (segv) {
-            env->cp15.c6_data = addr + (size == 2 ? 4 : 8);
+            env->exception.vaddress = addr + (size == 2 ? 4 : 8);
             goto error;
         }
     }
@@ -1037,12 +1033,14 @@ void cpu_loop(CPUARMState *env)
             info._sifields._sigfault._addr = env->pc;
             queue_signal(env, info.si_signo, &info);
             break;
+        case EXCP_STREX:
+            if (!do_strex_a64(env)) {
+                break;
+            }
+            /* fall through for segv */
         case EXCP_PREFETCH_ABORT:
-            addr = env->cp15.c6_insn;
-            goto do_segv;
         case EXCP_DATA_ABORT:
-            addr = env->cp15.c6_data;
-        do_segv:
+            addr = env->exception.vaddress;
             info.si_signo = SIGSEGV;
             info.si_errno = 0;
             /* XXX: check env->error_code */
@@ -1060,12 +1058,6 @@ void cpu_loop(CPUARMState *env)
                 queue_signal(env, info.si_signo, &info);
             }
             break;
-        case EXCP_STREX:
-            if (do_strex_a64(env)) {
-                addr = env->cp15.c6_data;
-                goto do_segv;
-            }
-            break;
         default:
             fprintf(stderr, "qemu: unhandled CPU exception 0x%x - aborting\n",
                     trapnr);
diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
index 00234e1d3d..edc7f262fc 100644
--- a/target-arm/cpu-qom.h
+++ b/target-arm/cpu-qom.h
@@ -116,6 +116,7 @@ typedef struct ARMCPU {
     uint32_t reset_fpsid;
     uint32_t mvfr0;
     uint32_t mvfr1;
+    uint32_t mvfr2;
     uint32_t ctr;
     uint32_t reset_sctlr;
     uint32_t id_pfr0;
@@ -147,9 +148,12 @@ typedef struct ARMCPU {
      * in the order L1DCache, L1ICache, L2DCache, L2ICache, etc.
      */
     uint32_t ccsidr[16];
-    uint32_t reset_cbar;
+    uint64_t reset_cbar;
     uint32_t reset_auxcr;
     bool reset_hivecs;
+    /* DCZ blocksize, in log_2(words), ie low 4 bits of DCZID_EL0 */
+    uint32_t dcz_blocksize;
+    uint64_t rvbar;
 } ARMCPU;
 
 #define TYPE_AARCH64_CPU "aarch64-cpu"
@@ -196,10 +200,10 @@ void arm_gt_ptimer_cb(void *opaque);
 void arm_gt_vtimer_cb(void *opaque);
 
 #ifdef TARGET_AARCH64
-void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
-                            fprintf_function cpu_fprintf, int flags);
 int aarch64_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg);
 int aarch64_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
+
+void aarch64_cpu_do_interrupt(CPUState *cs);
 #endif
 
 #endif
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index c32d8c4855..c0ddc3e3df 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -19,6 +19,7 @@
  */
 
 #include "cpu.h"
+#include "internals.h"
 #include "qemu-common.h"
 #include "hw/qdev-properties.h"
 #include "qapi/qmp/qerror.h"
@@ -87,6 +88,7 @@ static void arm_cpu_reset(CPUState *s)
     env->vfp.xregs[ARM_VFP_FPSID] = cpu->reset_fpsid;
     env->vfp.xregs[ARM_VFP_MVFR0] = cpu->mvfr0;
     env->vfp.xregs[ARM_VFP_MVFR1] = cpu->mvfr1;
+    env->vfp.xregs[ARM_VFP_MVFR2] = cpu->mvfr2;
 
     if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
         env->iwmmxt.cregs[ARM_IWMMXT_wCID] = 0x69051000 | 'Q';
@@ -99,8 +101,16 @@ static void arm_cpu_reset(CPUState *s)
         env->pstate = PSTATE_MODE_EL0t;
         /* Userspace expects access to CTL_EL0 and the cache ops */
         env->cp15.c1_sys |= SCTLR_UCT | SCTLR_UCI;
+        /* and to the FP/Neon instructions */
+        env->cp15.c1_coproc = deposit64(env->cp15.c1_coproc, 20, 2, 3);
 #else
         env->pstate = PSTATE_MODE_EL1h;
+        env->pc = cpu->rvbar;
+#endif
+    } else {
+#if defined(CONFIG_USER_ONLY)
+        /* Userspace expects access to cp10 and cp11 for FP/Neon */
+        env->cp15.c1_coproc = deposit64(env->cp15.c1_coproc, 20, 4, 0xf);
 #endif
     }
 
@@ -252,16 +262,20 @@ static void arm_cpu_initfn(Object *obj)
 }
 
 static Property arm_cpu_reset_cbar_property =
-            DEFINE_PROP_UINT32("reset-cbar", ARMCPU, reset_cbar, 0);
+            DEFINE_PROP_UINT64("reset-cbar", ARMCPU, reset_cbar, 0);
 
 static Property arm_cpu_reset_hivecs_property =
             DEFINE_PROP_BOOL("reset-hivecs", ARMCPU, reset_hivecs, false);
 
+static Property arm_cpu_rvbar_property =
+            DEFINE_PROP_UINT64("rvbar", ARMCPU, rvbar, 0);
+
 static void arm_cpu_post_init(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
 
-    if (arm_feature(&cpu->env, ARM_FEATURE_CBAR)) {
+    if (arm_feature(&cpu->env, ARM_FEATURE_CBAR) ||
+        arm_feature(&cpu->env, ARM_FEATURE_CBAR_RO)) {
         qdev_property_add_static(DEVICE(obj), &arm_cpu_reset_cbar_property,
                                  &error_abort);
     }
@@ -270,6 +284,11 @@ static void arm_cpu_post_init(Object *obj)
         qdev_property_add_static(DEVICE(obj), &arm_cpu_reset_hivecs_property,
                                  &error_abort);
     }
+
+    if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
+        qdev_property_add_static(DEVICE(obj), &arm_cpu_rvbar_property,
+                                 &error_abort);
+    }
 }
 
 static void arm_cpu_finalizefn(Object *obj)
@@ -331,6 +350,9 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
         set_feature(env, ARM_FEATURE_V7MP);
         set_feature(env, ARM_FEATURE_PXN);
     }
+    if (arm_feature(env, ARM_FEATURE_CBAR_RO)) {
+        set_feature(env, ARM_FEATURE_CBAR);
+    }
 
     if (cpu->reset_hivecs) {
             cpu->reset_sctlr |= (1 << 13);
@@ -417,7 +439,7 @@ static void arm1026_initfn(Object *obj)
         ARMCPRegInfo ifar = {
             .name = "IFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 1,
             .access = PL1_RW,
-            .fieldoffset = offsetof(CPUARMState, cp15.c6_insn),
+            .fieldoffset = offsetofhigh32(CPUARMState, cp15.far_el1),
             .resetvalue = 0
         };
         define_one_arm_cp_reg(cpu, &ifar);
@@ -722,7 +744,7 @@ static void cortex_a15_initfn(Object *obj)
     set_feature(&cpu->env, ARM_FEATURE_ARM_DIV);
     set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
     set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
-    set_feature(&cpu->env, ARM_FEATURE_CBAR);
+    set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
     set_feature(&cpu->env, ARM_FEATURE_LPAE);
     cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A15;
     cpu->midr = 0x412fc0f1;
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index bf37cd60d0..c83f2495a8 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -111,11 +111,6 @@ typedef struct ARMGenericTimer {
 #define GTIMER_VIRT 1
 #define NUM_GTIMERS 2
 
-/* Scale factor for generic timers, ie number of ns per tick.
- * This gives a 62.5MHz timer.
- */
-#define GTIMER_SCALE 16
-
 typedef struct CPUARMState {
     /* Regs for current mode.  */
     uint32_t regs[16];
@@ -148,7 +143,7 @@ typedef struct CPUARMState {
     uint32_t spsr;
 
     /* Banked registers.  */
-    uint32_t banked_spsr[6];
+    uint64_t banked_spsr[6];
     uint32_t banked_r13[6];
     uint32_t banked_r14[6];
 
@@ -165,7 +160,10 @@ typedef struct CPUARMState {
     uint32_t GE; /* cpsr[19:16] */
     uint32_t thumb; /* cpsr[5]. 0 = arm mode, 1 = thumb mode. */
     uint32_t condexec_bits; /* IT bits.  cpsr[15:10,26:25].  */
-    uint32_t daif; /* exception masks, in the bits they are in in PSTATE */
+    uint64_t daif; /* exception masks, in the bits they are in in PSTATE */
+
+    uint64_t elr_el1; /* AArch64 ELR_EL1 */
+    uint64_t sp_el[2]; /* AArch64 banked stack pointers */
 
     /* System control coprocessor (cp15) */
     struct {
@@ -184,13 +182,13 @@ typedef struct CPUARMState {
         uint32_t c2_insn; /* MPU instruction cachable bits.  */
         uint32_t c3; /* MMU domain access control register
                         MPU write buffer control.  */
-        uint32_t c5_insn; /* Fault status registers.  */
-        uint32_t c5_data;
+        uint32_t pmsav5_data_ap; /* PMSAv5 MPU data access permissions */
+        uint32_t pmsav5_insn_ap; /* PMSAv5 MPU insn access permissions */
+        uint32_t ifsr_el2; /* Fault status registers.  */
+        uint64_t esr_el1;
         uint32_t c6_region[8]; /* MPU base/size registers.  */
-        uint32_t c6_insn; /* Fault address registers.  */
-        uint32_t c6_data;
-        uint32_t c7_par;  /* Translation result. */
-        uint32_t c7_par_hi;  /* Translation result, high 32 bits */
+        uint64_t far_el1; /* Fault address registers.  */
+        uint64_t par_el1;  /* Translation result. */
         uint32_t c9_insn; /* Cache lockdown registers.  */
         uint32_t c9_data;
         uint32_t c9_pmcr; /* performance monitor control register */
@@ -202,7 +200,7 @@ typedef struct CPUARMState {
         uint64_t mair_el1;
         uint64_t c12_vbar; /* vector base address register */
         uint32_t c13_fcse; /* FCSE PID.  */
-        uint32_t c13_context; /* Context ID.  */
+        uint64_t contextidr_el1; /* Context ID.  */
         uint64_t tpidr_el0; /* User RW Thread register.  */
         uint64_t tpidrro_el0; /* User RO Thread register.  */
         uint64_t tpidr_el1; /* Privileged Thread register.  */
@@ -238,6 +236,21 @@ typedef struct CPUARMState {
         int pending_exception;
     } v7m;
 
+    /* Information associated with an exception about to be taken:
+     * code which raises an exception must set cs->exception_index and
+     * the relevant parts of this structure; the cpu_do_interrupt function
+     * will then set the guest-visible registers as part of the exception
+     * entry process.
+     */
+    struct {
+        uint32_t syndrome; /* AArch64 format syndrome register */
+        uint32_t fsr; /* AArch32 format fault status register info */
+        uint64_t vaddress; /* virtual addr associated with exception, if any */
+        /* If we implement EL2 we will also need to store information
+         * about the intermediate physical address for stage 2 faults.
+         */
+    } exception;
+
     /* Thumb-2 EE state.  */
     uint32_t teecr;
     uint32_t teehbr;
@@ -322,11 +335,7 @@ typedef struct CPUARMState {
 #include "cpu-qom.h"
 
 ARMCPU *cpu_arm_init(const char *cpu_model);
-void arm_translate_init(void);
-void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu);
 int cpu_arm_exec(CPUARMState *s);
-int bank_number(int mode);
-void switch_mode(CPUARMState *, int);
 uint32_t do_arm_semihosting(CPUARMState *env);
 
 static inline bool is_a64(CPUARMState *env)
@@ -425,6 +434,7 @@ int arm_cpu_handle_mmu_fault(CPUState *cpu, vaddr address, int rw,
  * Only these are valid when in AArch64 mode; in
  * AArch32 mode SPSRs are basically CPSR-format.
  */
+#define PSTATE_SP (1U)
 #define PSTATE_M (0xFU)
 #define PSTATE_nRW (1U << 4)
 #define PSTATE_F (1U << 6)
@@ -548,17 +558,6 @@ static inline void vfp_set_fpcr(CPUARMState *env, uint32_t val)
     vfp_set_fpscr(env, new_fpscr);
 }
 
-enum arm_fprounding {
-    FPROUNDING_TIEEVEN,
-    FPROUNDING_POSINF,
-    FPROUNDING_NEGINF,
-    FPROUNDING_ZERO,
-    FPROUNDING_TIEAWAY,
-    FPROUNDING_ODD
-};
-
-int arm_rmode_to_sf(int rmode);
-
 enum arm_cpu_mode {
   ARM_CPU_MODE_USR = 0x10,
   ARM_CPU_MODE_FIQ = 0x11,
@@ -572,6 +571,7 @@ enum arm_cpu_mode {
 /* VFP system registers.  */
 #define ARM_VFP_FPSID   0
 #define ARM_VFP_FPSCR   1
+#define ARM_VFP_MVFR2   5
 #define ARM_VFP_MVFR1   6
 #define ARM_VFP_MVFR0   7
 #define ARM_VFP_FPEXC   8
@@ -630,6 +630,7 @@ enum arm_features {
     ARM_FEATURE_V8_AES, /* implements AES part of v8 Crypto Extensions */
     ARM_FEATURE_CBAR, /* has cp15 CBAR */
     ARM_FEATURE_CRC, /* ARMv8 CRC instructions */
+    ARM_FEATURE_CBAR_RO, /* has cp15 CBAR and it is read-only */
 };
 
 static inline int arm_feature(CPUARMState *env, int feature)
@@ -763,7 +764,8 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
 #define ARM_CP_WFI (ARM_CP_SPECIAL | (2 << 8))
 #define ARM_CP_NZCV (ARM_CP_SPECIAL | (3 << 8))
 #define ARM_CP_CURRENTEL (ARM_CP_SPECIAL | (4 << 8))
-#define ARM_LAST_SPECIAL ARM_CP_CURRENTEL
+#define ARM_CP_DC_ZVA (ARM_CP_SPECIAL | (5 << 8))
+#define ARM_LAST_SPECIAL ARM_CP_DC_ZVA
 /* Used only as a terminator for ARMCPRegInfo lists */
 #define ARM_CP_SENTINEL 0xffff
 /* Mask of only the flag bits in a type field */
@@ -1109,10 +1111,14 @@ static inline int cpu_mmu_index (CPUARMState *env)
 #define ARM_TBFLAG_CONDEXEC_MASK    (0xff << ARM_TBFLAG_CONDEXEC_SHIFT)
 #define ARM_TBFLAG_BSWAP_CODE_SHIFT 16
 #define ARM_TBFLAG_BSWAP_CODE_MASK  (1 << ARM_TBFLAG_BSWAP_CODE_SHIFT)
+#define ARM_TBFLAG_CPACR_FPEN_SHIFT 17
+#define ARM_TBFLAG_CPACR_FPEN_MASK  (1 << ARM_TBFLAG_CPACR_FPEN_SHIFT)
 
 /* Bit usage when in AArch64 state */
 #define ARM_TBFLAG_AA64_EL_SHIFT    0
 #define ARM_TBFLAG_AA64_EL_MASK     (0x3 << ARM_TBFLAG_AA64_EL_SHIFT)
+#define ARM_TBFLAG_AA64_FPEN_SHIFT  2
+#define ARM_TBFLAG_AA64_FPEN_MASK   (1 << ARM_TBFLAG_AA64_FPEN_SHIFT)
 
 /* some convenience accessor macros */
 #define ARM_TBFLAG_AARCH64_STATE(F) \
@@ -1131,16 +1137,25 @@ static inline int cpu_mmu_index (CPUARMState *env)
     (((F) & ARM_TBFLAG_CONDEXEC_MASK) >> ARM_TBFLAG_CONDEXEC_SHIFT)
 #define ARM_TBFLAG_BSWAP_CODE(F) \
     (((F) & ARM_TBFLAG_BSWAP_CODE_MASK) >> ARM_TBFLAG_BSWAP_CODE_SHIFT)
+#define ARM_TBFLAG_CPACR_FPEN(F) \
+    (((F) & ARM_TBFLAG_CPACR_FPEN_MASK) >> ARM_TBFLAG_CPACR_FPEN_SHIFT)
 #define ARM_TBFLAG_AA64_EL(F) \
     (((F) & ARM_TBFLAG_AA64_EL_MASK) >> ARM_TBFLAG_AA64_EL_SHIFT)
+#define ARM_TBFLAG_AA64_FPEN(F) \
+    (((F) & ARM_TBFLAG_AA64_FPEN_MASK) >> ARM_TBFLAG_AA64_FPEN_SHIFT)
 
 static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
                                         target_ulong *cs_base, int *flags)
 {
+    int fpen = extract32(env->cp15.c1_coproc, 20, 2);
+
     if (is_a64(env)) {
         *pc = env->pc;
         *flags = ARM_TBFLAG_AARCH64_STATE_MASK
             | (arm_current_pl(env) << ARM_TBFLAG_AA64_EL_SHIFT);
+        if (fpen == 3 || (fpen == 1 && arm_current_pl(env) != 0)) {
+            *flags |= ARM_TBFLAG_AA64_FPEN_MASK;
+        }
     } else {
         int privmode;
         *pc = env->regs[15];
@@ -1157,9 +1172,13 @@ static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
         if (privmode) {
             *flags |= ARM_TBFLAG_PRIV_MASK;
         }
-        if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) {
+        if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)
+            || arm_el_is_aa64(env, 1)) {
             *flags |= ARM_TBFLAG_VFPEN_MASK;
         }
+        if (fpen == 3 || (fpen == 1 && arm_current_pl(env) != 0)) {
+            *flags |= ARM_TBFLAG_CPACR_FPEN_MASK;
+        }
     }
 
     *cs_base = 0;
diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c
index 8426bf1333..8daa622bdc 100644
--- a/target-arm/cpu64.c
+++ b/target-arm/cpu64.c
@@ -32,6 +32,104 @@ static inline void set_feature(CPUARMState *env, int feature)
     env->features |= 1ULL << feature;
 }
 
+#ifndef CONFIG_USER_ONLY
+static uint64_t a57_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    /* Number of processors is in [25:24]; otherwise we RAZ */
+    return (smp_cpus - 1) << 24;
+}
+#endif
+
+static const ARMCPRegInfo cortexa57_cp_reginfo[] = {
+#ifndef CONFIG_USER_ONLY
+    { .name = "L2CTLR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 1, .crn = 11, .crm = 0, .opc2 = 2,
+      .access = PL1_RW, .readfn = a57_l2ctlr_read,
+      .writefn = arm_cp_write_ignore },
+    { .name = "L2CTLR",
+      .cp = 15, .opc1 = 1, .crn = 9, .crm = 0, .opc2 = 2,
+      .access = PL1_RW, .readfn = a57_l2ctlr_read,
+      .writefn = arm_cp_write_ignore },
+#endif
+    { .name = "L2ECTLR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 1, .crn = 11, .crm = 0, .opc2 = 3,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "L2ECTLR",
+      .cp = 15, .opc1 = 1, .crn = 9, .crm = 0, .opc2 = 3,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "L2ACTLR", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 0, .opc2 = 0,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "CPUACTLR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 0,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "CPUACTLR",
+      .cp = 15, .opc1 = 0, .crm = 15,
+      .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 },
+    { .name = "CPUECTLR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 1,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "CPUECTLR",
+      .cp = 15, .opc1 = 1, .crm = 15,
+      .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 },
+    { .name = "CPUMERRSR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 2,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "CPUMERRSR",
+      .cp = 15, .opc1 = 2, .crm = 15,
+      .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 },
+    { .name = "L2MERRSR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 2, .opc2 = 3,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "L2MERRSR",
+      .cp = 15, .opc1 = 3, .crm = 15,
+      .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 },
+    REGINFO_SENTINEL
+};
+
+static void aarch64_a57_initfn(Object *obj)
+{
+    ARMCPU *cpu = ARM_CPU(obj);
+
+    set_feature(&cpu->env, ARM_FEATURE_V8);
+    set_feature(&cpu->env, ARM_FEATURE_VFP4);
+    set_feature(&cpu->env, ARM_FEATURE_VFP_FP16);
+    set_feature(&cpu->env, ARM_FEATURE_NEON);
+    set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
+    set_feature(&cpu->env, ARM_FEATURE_AARCH64);
+    set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
+    cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A57;
+    cpu->midr = 0x411fd070;
+    cpu->reset_fpsid = 0x41034070;
+    cpu->mvfr0 = 0x10110222;
+    cpu->mvfr1 = 0x12111111;
+    cpu->mvfr2 = 0x00000043;
+    cpu->ctr = 0x8444c004;
+    cpu->reset_sctlr = 0x00c50838;
+    cpu->id_pfr0 = 0x00000131;
+    cpu->id_pfr1 = 0x00011011;
+    cpu->id_dfr0 = 0x03010066;
+    cpu->id_afr0 = 0x00000000;
+    cpu->id_mmfr0 = 0x10101105;
+    cpu->id_mmfr1 = 0x40000000;
+    cpu->id_mmfr2 = 0x01260000;
+    cpu->id_mmfr3 = 0x02102211;
+    cpu->id_isar0 = 0x02101110;
+    cpu->id_isar1 = 0x13112111;
+    cpu->id_isar2 = 0x21232042;
+    cpu->id_isar3 = 0x01112131;
+    cpu->id_isar4 = 0x00011142;
+    cpu->id_aa64pfr0 = 0x00002222;
+    cpu->id_aa64dfr0 = 0x10305106;
+    cpu->id_aa64isar0 = 0x00010000;
+    cpu->id_aa64mmfr0 = 0x00001124;
+    cpu->clidr = 0x0a200023;
+    cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */
+    cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */
+    cpu->ccsidr[2] = 0x70ffe07a; /* 2048KB L2 cache */
+    cpu->dcz_blocksize = 4; /* 64 bytes */
+}
+
 #ifdef CONFIG_USER_ONLY
 static void aarch64_any_initfn(Object *obj)
 {
@@ -41,11 +139,11 @@ static void aarch64_any_initfn(Object *obj)
     set_feature(&cpu->env, ARM_FEATURE_VFP4);
     set_feature(&cpu->env, ARM_FEATURE_VFP_FP16);
     set_feature(&cpu->env, ARM_FEATURE_NEON);
-    set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
     set_feature(&cpu->env, ARM_FEATURE_ARM_DIV);
     set_feature(&cpu->env, ARM_FEATURE_V7MP);
     set_feature(&cpu->env, ARM_FEATURE_AARCH64);
     cpu->ctr = 0x80030003; /* 32 byte I and D cacheline size, VIPT icache */
+    cpu->dcz_blocksize = 7; /*  512 bytes */
 }
 #endif
 
@@ -56,6 +154,7 @@ typedef struct ARMCPUInfo {
 } ARMCPUInfo;
 
 static const ARMCPUInfo aarch64_cpus[] = {
+    { .name = "cortex-a57",         .initfn = aarch64_a57_initfn },
 #ifdef CONFIG_USER_ONLY
     { .name = "any",         .initfn = aarch64_any_initfn },
 #endif
@@ -73,18 +172,22 @@ static void aarch64_cpu_finalizefn(Object *obj)
 static void aarch64_cpu_set_pc(CPUState *cs, vaddr value)
 {
     ARMCPU *cpu = ARM_CPU(cs);
-    /*
-     * TODO: this will need updating for system emulation,
-     * when the core may be in AArch32 mode.
+    /* It's OK to look at env for the current mode here, because it's
+     * never possible for an AArch64 TB to chain to an AArch32 TB.
+     * (Otherwise we would need to use synchronize_from_tb instead.)
      */
-    cpu->env.pc = value;
+    if (is_a64(&cpu->env)) {
+        cpu->env.pc = value;
+    } else {
+        cpu->env.regs[15] = value;
+    }
 }
 
 static void aarch64_cpu_class_init(ObjectClass *oc, void *data)
 {
     CPUClass *cc = CPU_CLASS(oc);
 
-    cc->dump_state = aarch64_cpu_dump_state;
+    cc->do_interrupt = aarch64_cpu_do_interrupt;
     cc->set_pc = aarch64_cpu_set_pc;
     cc->gdb_read_register = aarch64_cpu_gdb_read_register;
     cc->gdb_write_register = aarch64_cpu_gdb_write_register;
diff --git a/target-arm/gdbstub64.c b/target-arm/gdbstub64.c
index e8a82952a4..8f3b8d1778 100644
--- a/target-arm/gdbstub64.c
+++ b/target-arm/gdbstub64.c
@@ -32,10 +32,8 @@ int aarch64_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n)
     switch (n) {
     case 31:
         return gdb_get_reg64(mem_buf, env->xregs[31]);
-        break;
     case 32:
         return gdb_get_reg64(mem_buf, env->pc);
-        break;
     case 33:
         return gdb_get_reg32(mem_buf, pstate_read(env));
     }
diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c
index ec0258295f..bf921ccdc0 100644
--- a/target-arm/helper-a64.c
+++ b/target-arm/helper-a64.c
@@ -23,6 +23,7 @@
 #include "qemu/host-utils.h"
 #include "sysemu/sysemu.h"
 #include "qemu/bitops.h"
+#include "internals.h"
 
 /* C2.4.7 Multiply and divide */
 /* special cases for 0 and LLONG_MIN are mandated by the standard */
@@ -436,3 +437,78 @@ float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env)
     set_float_exception_flags(exflags, fpst);
     return r;
 }
+
+/* Handle a CPU exception.  */
+void aarch64_cpu_do_interrupt(CPUState *cs)
+{
+    ARMCPU *cpu = ARM_CPU(cs);
+    CPUARMState *env = &cpu->env;
+    target_ulong addr = env->cp15.c12_vbar;
+    int i;
+
+    if (arm_current_pl(env) == 0) {
+        if (env->aarch64) {
+            addr += 0x400;
+        } else {
+            addr += 0x600;
+        }
+    } else if (pstate_read(env) & PSTATE_SP) {
+        addr += 0x200;
+    }
+
+    arm_log_exception(cs->exception_index);
+    qemu_log_mask(CPU_LOG_INT, "...from EL%d\n", arm_current_pl(env));
+    if (qemu_loglevel_mask(CPU_LOG_INT)
+        && !excp_is_internal(cs->exception_index)) {
+        qemu_log_mask(CPU_LOG_INT, "...with ESR 0x%" PRIx32 "\n",
+                      env->exception.syndrome);
+    }
+
+    env->cp15.esr_el1 = env->exception.syndrome;
+    env->cp15.far_el1 = env->exception.vaddress;
+
+    switch (cs->exception_index) {
+    case EXCP_PREFETCH_ABORT:
+    case EXCP_DATA_ABORT:
+        qemu_log_mask(CPU_LOG_INT, "...with FAR 0x%" PRIx64 "\n",
+                      env->cp15.far_el1);
+        break;
+    case EXCP_BKPT:
+    case EXCP_UDEF:
+    case EXCP_SWI:
+        break;
+    case EXCP_IRQ:
+        addr += 0x80;
+        break;
+    case EXCP_FIQ:
+        addr += 0x100;
+        break;
+    default:
+        cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index);
+    }
+
+    if (is_a64(env)) {
+        env->banked_spsr[0] = pstate_read(env);
+        env->sp_el[arm_current_pl(env)] = env->xregs[31];
+        env->xregs[31] = env->sp_el[1];
+        env->elr_el1 = env->pc;
+    } else {
+        env->banked_spsr[0] = cpsr_read(env);
+        if (!env->thumb) {
+            env->cp15.esr_el1 |= 1 << 25;
+        }
+        env->elr_el1 = env->regs[15];
+
+        for (i = 0; i < 15; i++) {
+            env->xregs[i] = env->regs[i];
+        }
+
+        env->condexec_bits = 0;
+    }
+
+    pstate_write(env, PSTATE_DAIF | PSTATE_MODE_EL1h);
+    env->aarch64 = 1;
+
+    env->pc = addr;
+    cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
+}
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 55077ed1b6..43c1b4f01d 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -1,4 +1,5 @@
 #include "cpu.h"
+#include "internals.h"
 #include "exec/gdbstub.h"
 #include "helper.h"
 #include "qemu/host-utils.h"
@@ -9,7 +10,9 @@
 #include <zlib.h> /* For crc32 */
 
 #ifndef CONFIG_USER_ONLY
-static inline int get_phys_addr(CPUARMState *env, uint32_t address,
+#include "exec/softmmu_exec.h"
+
+static inline int get_phys_addr(CPUARMState *env, target_ulong address,
                                 int access_type, int is_user,
                                 hwaddr *phys_ptr, int *prot,
                                 target_ulong *page_size);
@@ -301,6 +304,17 @@ void init_cpreg_list(ARMCPU *cpu)
     g_list_free(keys);
 }
 
+/* Return true if extended addresses are enabled.
+ * This is always the case if our translation regime is 64 bit,
+ * but depends on TTBCR.EAE for 32 bit.
+ */
+static inline bool extended_addresses_enabled(CPUARMState *env)
+{
+    return arm_el_is_aa64(env, 1)
+        || ((arm_feature(env, ARM_FEATURE_LPAE)
+             && (env->cp15.c2_control & (1U << 31))));
+}
+
 static void dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 {
     ARMCPU *cpu = arm_env_get_cpu(env);
@@ -327,14 +341,15 @@ static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri,
 {
     ARMCPU *cpu = arm_env_get_cpu(env);
 
-    if (env->cp15.c13_context != value && !arm_feature(env, ARM_FEATURE_MPU)) {
+    if (env->cp15.contextidr_el1 != value && !arm_feature(env, ARM_FEATURE_MPU)
+        && !extended_addresses_enabled(env)) {
         /* For VMSA (when not using the LPAE long descriptor page table
          * format) this register includes the ASID, so do a TLB flush.
          * For PMSA it is purely a process ID and no action is needed.
          */
         tlb_flush(CPU(cpu), 1);
     }
-    env->cp15.c13_context = value;
+    env->cp15.contextidr_el1 = value;
 }
 
 static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -380,17 +395,26 @@ static const ARMCPRegInfo cp_reginfo[] = {
      */
     { .name = "DBGDIDR", .cp = 14, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL0_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "FCSEIDR", .cp = 15, .crn = 13, .crm = 0, .opc1 = 0, .opc2 = 0,
+      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c13_fcse),
+      .resetvalue = 0, .writefn = fcse_write, .raw_writefn = raw_write, },
+    { .name = "CONTEXTIDR", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 0, .crn = 13, .crm = 0, .opc2 = 1,
+      .access = PL1_RW,
+      .fieldoffset = offsetof(CPUARMState, cp15.contextidr_el1),
+      .resetvalue = 0, .writefn = contextidr_write, .raw_writefn = raw_write, },
+    REGINFO_SENTINEL
+};
+
+static const ARMCPRegInfo not_v8_cp_reginfo[] = {
+    /* NB: Some of these registers exist in v8 but with more precise
+     * definitions that don't use CP_ANY wildcards (mostly in v8_cp_reginfo[]).
+     */
     /* MMU Domain access control / MPU write buffer control */
     { .name = "DACR", .cp = 15,
       .crn = 3, .crm = CP_ANY, .opc1 = CP_ANY, .opc2 = CP_ANY,
       .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c3),
       .resetvalue = 0, .writefn = dacr_write, .raw_writefn = raw_write, },
-    { .name = "FCSEIDR", .cp = 15, .crn = 13, .crm = 0, .opc1 = 0, .opc2 = 0,
-      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c13_fcse),
-      .resetvalue = 0, .writefn = fcse_write, .raw_writefn = raw_write, },
-    { .name = "CONTEXTIDR", .cp = 15, .crn = 13, .crm = 0, .opc1 = 0, .opc2 = 1,
-      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c13_context),
-      .resetvalue = 0, .writefn = contextidr_write, .raw_writefn = raw_write, },
     /* ??? This covers not just the impdef TLB lockdown registers but also
      * some v7VMSA registers relating to TEX remap, so it is overly broad.
      */
@@ -472,7 +496,8 @@ static const ARMCPRegInfo v6_cp_reginfo[] = {
     { .name = "DMB", .cp = 15, .crn = 7, .crm = 10, .opc1 = 0, .opc2 = 5,
       .access = PL0_W, .type = ARM_CP_NOP },
     { .name = "IFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 2,
-      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c6_insn),
+      .access = PL1_RW,
+      .fieldoffset = offsetofhigh32(CPUARMState, cp15.far_el1),
       .resetvalue = 0, },
     /* Watchpoint Fault Address Register : should actually only be present
      * for 1136, 1176, 11MPCore.
@@ -647,6 +672,21 @@ static void csselr_write(CPUARMState *env, const ARMCPRegInfo *ri,
     env->cp15.c0_cssel = value & 0xf;
 }
 
+static uint64_t isr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    CPUState *cs = ENV_GET_CPU(env);
+    uint64_t ret = 0;
+
+    if (cs->interrupt_request & CPU_INTERRUPT_HARD) {
+        ret |= CPSR_I;
+    }
+    if (cs->interrupt_request & CPU_INTERRUPT_FIQ) {
+        ret |= CPSR_F;
+    }
+    /* External aborts are not possible in QEMU so A bit is always clear */
+    return ret;
+}
+
 static const ARMCPRegInfo v7_cp_reginfo[] = {
     /* DBGDRAR, DBGDSAR: always RAZ since we don't implement memory mapped
      * debug components
@@ -741,8 +781,18 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
     /* Auxiliary ID register: this actually has an IMPDEF value but for now
      * just RAZ for all cores:
      */
-    { .name = "AIDR", .cp = 15, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 7,
+    { .name = "AIDR", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 1, .crn = 0, .crm = 0, .opc2 = 7,
       .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+    /* Auxiliary fault status registers: these also are IMPDEF, and we
+     * choose to RAZ/WI for all cores.
+     */
+    { .name = "AFSR0_EL1", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 1, .opc2 = 0,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "AFSR1_EL1", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 1, .opc2 = 1,
+      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
     /* MAIR can just read-as-written because we don't implement caches
      * and so don't need to care about memory attributes.
      */
@@ -763,6 +813,9 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
       .cp = 15, .opc1 = 0, .crn = 10, .crm = 2, .opc2 = 1, .access = PL1_RW,
       .fieldoffset = offsetofhigh32(CPUARMState, cp15.mair_el1),
       .resetfn = arm_cp_reset_ignore },
+    { .name = "ISR_EL1", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 1, .opc2 = 0,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_R, .readfn = isr_read },
     REGINFO_SENTINEL
 };
 
@@ -1139,27 +1192,17 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
 static void par_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 {
     if (arm_feature(env, ARM_FEATURE_LPAE)) {
-        env->cp15.c7_par = value;
+        env->cp15.par_el1 = value;
     } else if (arm_feature(env, ARM_FEATURE_V7)) {
-        env->cp15.c7_par = value & 0xfffff6ff;
+        env->cp15.par_el1 = value & 0xfffff6ff;
     } else {
-        env->cp15.c7_par = value & 0xfffff1ff;
+        env->cp15.par_el1 = value & 0xfffff1ff;
     }
 }
 
 #ifndef CONFIG_USER_ONLY
 /* get_phys_addr() isn't present for user-mode-only targets */
 
-/* Return true if extended addresses are enabled, ie this is an
- * LPAE implementation and we are using the long-descriptor translation
- * table format because the TTBCR EAE bit is set.
- */
-static inline bool extended_addresses_enabled(CPUARMState *env)
-{
-    return arm_feature(env, ARM_FEATURE_LPAE)
-        && (env->cp15.c2_control & (1U << 31));
-}
-
 static CPAccessResult ats_access(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     if (ri->opc2 & 4) {
@@ -1200,8 +1243,7 @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
              * fault.
              */
         }
-        env->cp15.c7_par = par64;
-        env->cp15.c7_par_hi = par64 >> 32;
+        env->cp15.par_el1 = par64;
     } else {
         /* ret is a DFSR/IFSR value for the short descriptor
          * translation table format (with WnR always clear).
@@ -1211,16 +1253,15 @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
             /* We do not set any attribute bits in the PAR */
             if (page_size == (1 << 24)
                 && arm_feature(env, ARM_FEATURE_V7)) {
-                env->cp15.c7_par = (phys_addr & 0xff000000) | 1 << 1;
+                env->cp15.par_el1 = (phys_addr & 0xff000000) | 1 << 1;
             } else {
-                env->cp15.c7_par = phys_addr & 0xfffff000;
+                env->cp15.par_el1 = phys_addr & 0xfffff000;
             }
         } else {
-            env->cp15.c7_par = ((ret & (1 << 10)) >> 5) |
+            env->cp15.par_el1 = ((ret & (1 << 10)) >> 5) |
                 ((ret & (1 << 12)) >> 6) |
                 ((ret & 0xf) << 1) | 1;
         }
-        env->cp15.c7_par_hi = 0;
     }
 }
 #endif
@@ -1228,7 +1269,7 @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 static const ARMCPRegInfo vapa_cp_reginfo[] = {
     { .name = "PAR", .cp = 15, .crn = 7, .crm = 4, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW, .resetvalue = 0,
-      .fieldoffset = offsetof(CPUARMState, cp15.c7_par),
+      .fieldoffset = offsetoflow32(CPUARMState, cp15.par_el1),
       .writefn = par_write },
 #ifndef CONFIG_USER_ONLY
     { .name = "ATS", .cp = 15, .crn = 7, .crm = 8, .opc1 = 0, .opc2 = CP_ANY,
@@ -1271,40 +1312,44 @@ static uint32_t extended_mpu_ap_bits(uint32_t val)
 static void pmsav5_data_ap_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                  uint64_t value)
 {
-    env->cp15.c5_data = extended_mpu_ap_bits(value);
+    env->cp15.pmsav5_data_ap = extended_mpu_ap_bits(value);
 }
 
 static uint64_t pmsav5_data_ap_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    return simple_mpu_ap_bits(env->cp15.c5_data);
+    return simple_mpu_ap_bits(env->cp15.pmsav5_data_ap);
 }
 
 static void pmsav5_insn_ap_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                  uint64_t value)
 {
-    env->cp15.c5_insn = extended_mpu_ap_bits(value);
+    env->cp15.pmsav5_insn_ap = extended_mpu_ap_bits(value);
 }
 
 static uint64_t pmsav5_insn_ap_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    return simple_mpu_ap_bits(env->cp15.c5_insn);
+    return simple_mpu_ap_bits(env->cp15.pmsav5_insn_ap);
 }
 
 static const ARMCPRegInfo pmsav5_cp_reginfo[] = {
     { .name = "DATA_AP", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW, .type = ARM_CP_NO_MIGRATE,
-      .fieldoffset = offsetof(CPUARMState, cp15.c5_data), .resetvalue = 0,
+      .fieldoffset = offsetof(CPUARMState, cp15.pmsav5_data_ap),
+      .resetvalue = 0,
       .readfn = pmsav5_data_ap_read, .writefn = pmsav5_data_ap_write, },
     { .name = "INSN_AP", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 1,
       .access = PL1_RW, .type = ARM_CP_NO_MIGRATE,
-      .fieldoffset = offsetof(CPUARMState, cp15.c5_insn), .resetvalue = 0,
+      .fieldoffset = offsetof(CPUARMState, cp15.pmsav5_insn_ap),
+      .resetvalue = 0,
       .readfn = pmsav5_insn_ap_read, .writefn = pmsav5_insn_ap_write, },
     { .name = "DATA_EXT_AP", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 2,
       .access = PL1_RW,
-      .fieldoffset = offsetof(CPUARMState, cp15.c5_data), .resetvalue = 0, },
+      .fieldoffset = offsetof(CPUARMState, cp15.pmsav5_data_ap),
+      .resetvalue = 0, },
     { .name = "INSN_EXT_AP", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 3,
       .access = PL1_RW,
-      .fieldoffset = offsetof(CPUARMState, cp15.c5_insn), .resetvalue = 0, },
+      .fieldoffset = offsetof(CPUARMState, cp15.pmsav5_insn_ap),
+      .resetvalue = 0, },
     { .name = "DCACHE_CFG", .cp = 15, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW,
       .fieldoffset = offsetof(CPUARMState, cp15.c2_data), .resetvalue = 0, },
@@ -1406,11 +1451,16 @@ static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
 
 static const ARMCPRegInfo vmsa_cp_reginfo[] = {
     { .name = "DFSR", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 0,
-      .access = PL1_RW,
-      .fieldoffset = offsetof(CPUARMState, cp15.c5_data), .resetvalue = 0, },
+      .access = PL1_RW, .type = ARM_CP_NO_MIGRATE,
+      .fieldoffset = offsetoflow32(CPUARMState, cp15.esr_el1),
+      .resetfn = arm_cp_reset_ignore, },
     { .name = "IFSR", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 1,
       .access = PL1_RW,
-      .fieldoffset = offsetof(CPUARMState, cp15.c5_insn), .resetvalue = 0, },
+      .fieldoffset = offsetof(CPUARMState, cp15.ifsr_el2), .resetvalue = 0, },
+    { .name = "ESR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .crn = 5, .crm = 2, .opc1 = 0, .opc2 = 0,
+      .access = PL1_RW,
+      .fieldoffset = offsetof(CPUARMState, cp15.esr_el1), .resetvalue = 0, },
     { .name = "TTBR0_EL1", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el1),
@@ -1428,8 +1478,10 @@ static const ARMCPRegInfo vmsa_cp_reginfo[] = {
       .access = PL1_RW, .type = ARM_CP_NO_MIGRATE, .writefn = vmsa_ttbcr_write,
       .resetfn = arm_cp_reset_ignore, .raw_writefn = vmsa_ttbcr_raw_write,
       .fieldoffset = offsetoflow32(CPUARMState, cp15.c2_control) },
-    { .name = "DFAR", .cp = 15, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 0,
-      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c6_data),
+    /* 64-bit FAR; this entry also gives us the AArch32 DFAR */
+    { .name = "FAR_EL1", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .crn = 6, .crm = 0, .opc1 = 0, .opc2 = 0,
+      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.far_el1),
       .resetvalue = 0, },
     REGINFO_SENTINEL
 };
@@ -1469,7 +1521,8 @@ static void omap_cachemaint_write(CPUARMState *env, const ARMCPRegInfo *ri,
 static const ARMCPRegInfo omap_cp_reginfo[] = {
     { .name = "DFSR", .cp = 15, .crn = 5, .crm = CP_ANY,
       .opc1 = CP_ANY, .opc2 = CP_ANY, .access = PL1_RW, .type = ARM_CP_OVERRIDE,
-      .fieldoffset = offsetof(CPUARMState, cp15.c5_data), .resetvalue = 0, },
+      .fieldoffset = offsetoflow32(CPUARMState, cp15.esr_el1),
+      .resetvalue = 0, },
     { .name = "", .cp = 15, .crn = 15, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW, .type = ARM_CP_NOP },
     { .name = "TICONFIG", .cp = 15, .crn = 15, .crm = 1, .opc1 = 0, .opc2 = 0,
@@ -1619,24 +1672,6 @@ static const ARMCPRegInfo mpidr_cp_reginfo[] = {
     REGINFO_SENTINEL
 };
 
-static uint64_t par64_read(CPUARMState *env, const ARMCPRegInfo *ri)
-{
-    return ((uint64_t)env->cp15.c7_par_hi << 32) | env->cp15.c7_par;
-}
-
-static void par64_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                        uint64_t value)
-{
-    env->cp15.c7_par_hi = value >> 32;
-    env->cp15.c7_par = value;
-}
-
-static void par64_reset(CPUARMState *env, const ARMCPRegInfo *ri)
-{
-    env->cp15.c7_par_hi = 0;
-    env->cp15.c7_par = 0;
-}
-
 static const ARMCPRegInfo lpae_cp_reginfo[] = {
     /* NOP AMAIR0/1: the override is because these clash with the rather
      * broadly specified TLB_LOCKDOWN entry in the generic cp_reginfo.
@@ -1656,7 +1691,7 @@ static const ARMCPRegInfo lpae_cp_reginfo[] = {
       .access = PL0_R, .type = ARM_CP_CONST|ARM_CP_64BIT, .resetvalue = 0 },
     { .name = "PAR", .cp = 15, .crm = 7, .opc1 = 0,
       .access = PL1_RW, .type = ARM_CP_64BIT,
-      .readfn = par64_read, .writefn = par64_write, .resetfn = par64_reset },
+      .fieldoffset = offsetof(CPUARMState, cp15.par_el1), .resetvalue = 0 },
     { .name = "TTBR0", .cp = 15, .crm = 2, .opc1 = 0,
       .access = PL1_RW, .type = ARM_CP_64BIT | ARM_CP_NO_MIGRATE,
       .fieldoffset = offsetof(CPUARMState, cp15.ttbr0_el1),
@@ -1690,6 +1725,20 @@ static void aa64_fpsr_write(CPUARMState *env, const ARMCPRegInfo *ri,
     vfp_set_fpsr(env, value);
 }
 
+static CPAccessResult aa64_daif_access(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    if (arm_current_pl(env) == 0 && !(env->cp15.c1_sys & SCTLR_UMA)) {
+        return CP_ACCESS_TRAP;
+    }
+    return CP_ACCESS_OK;
+}
+
+static void aa64_daif_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                            uint64_t value)
+{
+    env->daif = value & PSTATE_DAIF;
+}
+
 static CPAccessResult aa64_cacheop_access(CPUARMState *env,
                                           const ARMCPRegInfo *ri)
 {
@@ -1729,6 +1778,50 @@ static void tlbi_aa64_asid_write(CPUARMState *env, const ARMCPRegInfo *ri,
     tlb_flush(CPU(cpu), asid == 0);
 }
 
+static CPAccessResult aa64_zva_access(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    /* We don't implement EL2, so the only control on DC ZVA is the
+     * bit in the SCTLR which can prohibit access for EL0.
+     */
+    if (arm_current_pl(env) == 0 && !(env->cp15.c1_sys & SCTLR_DZE)) {
+        return CP_ACCESS_TRAP;
+    }
+    return CP_ACCESS_OK;
+}
+
+static uint64_t aa64_dczid_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    int dzp_bit = 1 << 4;
+
+    /* DZP indicates whether DC ZVA access is allowed */
+    if (aa64_zva_access(env, NULL) != CP_ACCESS_OK) {
+        dzp_bit = 0;
+    }
+    return cpu->dcz_blocksize | dzp_bit;
+}
+
+static CPAccessResult sp_el0_access(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    if (!env->pstate & PSTATE_SP) {
+        /* Access to SP_EL0 is undefined if it's being used as
+         * the stack pointer.
+         */
+        return CP_ACCESS_TRAP_UNCATEGORIZED;
+    }
+    return CP_ACCESS_OK;
+}
+
+static uint64_t spsel_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    return env->pstate & PSTATE_SP;
+}
+
+static void spsel_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t val)
+{
+    update_spsel(env, val);
+}
+
 static const ARMCPRegInfo v8_cp_reginfo[] = {
     /* Minimal set of EL0-visible registers. This will need to be expanded
      * significantly for system emulation of AArch64 CPUs.
@@ -1736,19 +1829,30 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
     { .name = "NZCV", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 3, .opc2 = 0, .crn = 4, .crm = 2,
       .access = PL0_RW, .type = ARM_CP_NZCV },
+    { .name = "DAIF", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 2,
+      .type = ARM_CP_NO_MIGRATE,
+      .access = PL0_RW, .accessfn = aa64_daif_access,
+      .fieldoffset = offsetof(CPUARMState, daif),
+      .writefn = aa64_daif_write, .resetfn = arm_cp_reset_ignore },
     { .name = "FPCR", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 3, .opc2 = 0, .crn = 4, .crm = 4,
       .access = PL0_RW, .readfn = aa64_fpcr_read, .writefn = aa64_fpcr_write },
     { .name = "FPSR", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 4,
       .access = PL0_RW, .readfn = aa64_fpsr_read, .writefn = aa64_fpsr_write },
-    /* Prohibit use of DC ZVA. OPTME: implement DC ZVA and allow its use.
-     * For system mode the DZP bit here will need to be computed, not constant.
-     */
     { .name = "DCZID_EL0", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 3, .opc2 = 7, .crn = 0, .crm = 0,
-      .access = PL0_R, .type = ARM_CP_CONST,
-      .resetvalue = 0x10 },
+      .access = PL0_R, .type = ARM_CP_NO_MIGRATE,
+      .readfn = aa64_dczid_read },
+    { .name = "DC_ZVA", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 4, .opc2 = 1,
+      .access = PL0_W, .type = ARM_CP_DC_ZVA,
+#ifndef CONFIG_USER_ONLY
+      /* Avoid overhead of an access check that always passes in user-mode */
+      .accessfn = aa64_zva_access,
+#endif
+    },
     { .name = "CURRENTEL", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 0, .opc2 = 2, .crn = 4, .crm = 2,
       .access = PL1_R, .type = ARM_CP_CURRENTEL },
@@ -1836,6 +1940,93 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
       .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 7,
       .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
       .writefn = tlbi_aa64_vaa_write },
+#ifndef CONFIG_USER_ONLY
+    /* 64 bit address translation operations */
+    { .name = "AT_S1E1R", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 0,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE, .writefn = ats_write },
+    { .name = "AT_S1E1W", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 1,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE, .writefn = ats_write },
+    { .name = "AT_S1E0R", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 2,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE, .writefn = ats_write },
+    { .name = "AT_S1E0W", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 3,
+      .access = PL1_W, .type = ARM_CP_NO_MIGRATE, .writefn = ats_write },
+#endif
+    /* 32 bit TLB invalidates, Inner Shareable */
+    { .name = "TLBIALLIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 0,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiall_write },
+    { .name = "TLBIMVAIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 1,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_write },
+    { .name = "TLBIASIDIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 2,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiasid_write },
+    { .name = "TLBIMVAAIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 3,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimvaa_write },
+    { .name = "TLBIMVALIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 5,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_write },
+    { .name = "TLBIMVAALIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 7,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimvaa_write },
+    /* 32 bit ITLB invalidates */
+    { .name = "ITLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 0,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiall_write },
+    { .name = "ITLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 1,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_write },
+    { .name = "ITLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 2,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiasid_write },
+    /* 32 bit DTLB invalidates */
+    { .name = "DTLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 0,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiall_write },
+    { .name = "DTLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 1,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_write },
+    { .name = "DTLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 2,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiasid_write },
+    /* 32 bit TLB invalidates */
+    { .name = "TLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 0,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiall_write },
+    { .name = "TLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 1,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_write },
+    { .name = "TLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 2,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiasid_write },
+    { .name = "TLBIMVAA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 3,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimvaa_write },
+    { .name = "TLBIMVAL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 5,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_write },
+    { .name = "TLBIMVAAL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 7,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimvaa_write },
+    /* 32 bit cache operations */
+    { .name = "ICIALLUIS", .cp = 15, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 0,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "BPIALLUIS", .cp = 15, .opc1 = 0, .crn = 7, .crm = 1, .opc2 = 6,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "ICIALLU", .cp = 15, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 0,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "ICIMVAU", .cp = 15, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 1,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "BPIALL", .cp = 15, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 6,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "BPIMVA", .cp = 15, .opc1 = 0, .crn = 7, .crm = 5, .opc2 = 7,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "DCIMVAC", .cp = 15, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 1,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "DCISW", .cp = 15, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 2,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "DCCMVAC", .cp = 15, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 1,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "DCCSW", .cp = 15, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 2,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "DCCMVAU", .cp = 15, .opc1 = 0, .crn = 7, .crm = 11, .opc2 = 1,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "DCCIMVAC", .cp = 15, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 1,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    { .name = "DCCISW", .cp = 15, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 2,
+      .type = ARM_CP_NOP, .access = PL1_W },
+    /* MMU Domain access control / MPU write buffer control */
+    { .name = "DACR", .cp = 15,
+      .opc1 = 0, .crn = 3, .crm = 0, .opc2 = 0,
+      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c3),
+      .resetvalue = 0, .writefn = dacr_write, .raw_writefn = raw_write, },
     /* Dummy implementation of monitor debug system control register:
      * we don't support debug.
      */
@@ -1846,6 +2037,27 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
     { .name = "OSLAR_EL1", .state = ARM_CP_STATE_AA64,
       .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 4,
       .access = PL1_W, .type = ARM_CP_NOP },
+    { .name = "ELR_EL1", .state = ARM_CP_STATE_AA64,
+      .type = ARM_CP_NO_MIGRATE,
+      .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 1,
+      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, elr_el1) },
+    { .name = "SPSR_EL1", .state = ARM_CP_STATE_AA64,
+      .type = ARM_CP_NO_MIGRATE,
+      .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 0,
+      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[0]) },
+    /* We rely on the access checks not allowing the guest to write to the
+     * state field when SPSel indicates that it's being used as the stack
+     * pointer.
+     */
+    { .name = "SP_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 1, .opc2 = 0,
+      .access = PL1_RW, .accessfn = sp_el0_access,
+      .type = ARM_CP_NO_MIGRATE,
+      .fieldoffset = offsetof(CPUARMState, sp_el[0]) },
+    { .name = "SPSel", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 2, .opc2 = 0,
+      .type = ARM_CP_NO_MIGRATE,
+      .access = PL1_RW, .readfn = spsel_read, .writefn = spsel_write },
     REGINFO_SENTINEL
 };
 
@@ -1912,50 +2124,71 @@ void register_cp_regs_for_features(ARMCPU *cpu)
     }
 
     define_arm_cp_regs(cpu, cp_reginfo);
+    if (!arm_feature(env, ARM_FEATURE_V8)) {
+        /* Must go early as it is full of wildcards that may be
+         * overridden by later definitions.
+         */
+        define_arm_cp_regs(cpu, not_v8_cp_reginfo);
+    }
+
     if (arm_feature(env, ARM_FEATURE_V6)) {
         /* The ID registers all have impdef reset values */
         ARMCPRegInfo v6_idregs[] = {
-            { .name = "ID_PFR0", .cp = 15, .crn = 0, .crm = 1,
-              .opc1 = 0, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_PFR0", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 0,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_pfr0 },
-            { .name = "ID_PFR1", .cp = 15, .crn = 0, .crm = 1,
-              .opc1 = 0, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_PFR1", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 1,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_pfr1 },
-            { .name = "ID_DFR0", .cp = 15, .crn = 0, .crm = 1,
-              .opc1 = 0, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_DFR0", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 2,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_dfr0 },
-            { .name = "ID_AFR0", .cp = 15, .crn = 0, .crm = 1,
-              .opc1 = 0, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_AFR0", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 3,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_afr0 },
-            { .name = "ID_MMFR0", .cp = 15, .crn = 0, .crm = 1,
-              .opc1 = 0, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_MMFR0", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 4,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_mmfr0 },
-            { .name = "ID_MMFR1", .cp = 15, .crn = 0, .crm = 1,
-              .opc1 = 0, .opc2 = 5, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_MMFR1", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 5,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_mmfr1 },
-            { .name = "ID_MMFR2", .cp = 15, .crn = 0, .crm = 1,
-              .opc1 = 0, .opc2 = 6, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_MMFR2", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 6,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_mmfr2 },
-            { .name = "ID_MMFR3", .cp = 15, .crn = 0, .crm = 1,
-              .opc1 = 0, .opc2 = 7, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_MMFR3", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 7,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_mmfr3 },
-            { .name = "ID_ISAR0", .cp = 15, .crn = 0, .crm = 2,
-              .opc1 = 0, .opc2 = 0, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_ISAR0", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_isar0 },
-            { .name = "ID_ISAR1", .cp = 15, .crn = 0, .crm = 2,
-              .opc1 = 0, .opc2 = 1, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_ISAR1", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 1,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_isar1 },
-            { .name = "ID_ISAR2", .cp = 15, .crn = 0, .crm = 2,
-              .opc1 = 0, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_ISAR2", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_isar2 },
-            { .name = "ID_ISAR3", .cp = 15, .crn = 0, .crm = 2,
-              .opc1 = 0, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_ISAR3", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 3,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_isar3 },
-            { .name = "ID_ISAR4", .cp = 15, .crn = 0, .crm = 2,
-              .opc1 = 0, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_ISAR4", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 4,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_isar4 },
-            { .name = "ID_ISAR5", .cp = 15, .crn = 0, .crm = 2,
-              .opc1 = 0, .opc2 = 5, .access = PL1_R, .type = ARM_CP_CONST,
+            { .name = "ID_ISAR5", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 5,
+              .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_isar5 },
             /* 6..7 are as yet unallocated and must RAZ */
             { .name = "ID_ISAR6", .cp = 15, .crn = 0, .crm = 2,
@@ -2014,7 +2247,12 @@ void register_cp_regs_for_features(ARMCPU *cpu)
             { .name = "ID_AA64DFR0_EL1", .state = ARM_CP_STATE_AA64,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 0,
               .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->id_aa64dfr0 },
+              /* We mask out the PMUVer field, beacuse we don't currently
+               * implement the PMU. Not advertising it prevents the guest
+               * from trying to use it and getting UNDEFs on registers we
+               * don't implement.
+               */
+              .resetvalue = cpu->id_aa64dfr0 & ~0xf00 },
             { .name = "ID_AA64DFR1_EL1", .state = ARM_CP_STATE_AA64,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 1,
               .access = PL1_R, .type = ARM_CP_CONST,
@@ -2043,8 +2281,26 @@ void register_cp_regs_for_features(ARMCPU *cpu)
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 1,
               .access = PL1_R, .type = ARM_CP_CONST,
               .resetvalue = cpu->id_aa64mmfr1 },
+            { .name = "MVFR0_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 0,
+              .access = PL1_R, .type = ARM_CP_CONST,
+              .resetvalue = cpu->mvfr0 },
+            { .name = "MVFR1_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 1,
+              .access = PL1_R, .type = ARM_CP_CONST,
+              .resetvalue = cpu->mvfr1 },
+            { .name = "MVFR2_EL1", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 2,
+              .access = PL1_R, .type = ARM_CP_CONST,
+              .resetvalue = cpu->mvfr2 },
             REGINFO_SENTINEL
         };
+        ARMCPRegInfo rvbar = {
+            .name = "RVBAR_EL1", .state = ARM_CP_STATE_AA64,
+            .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 2,
+            .type = ARM_CP_CONST, .access = PL1_R, .resetvalue = cpu->rvbar
+        };
+        define_one_arm_cp_reg(cpu, &rvbar);
         define_arm_cp_regs(cpu, v8_idregs);
         define_arm_cp_regs(cpu, v8_cp_reginfo);
         define_aarch64_debug_regs(cpu);
@@ -2098,8 +2354,9 @@ void register_cp_regs_for_features(ARMCPU *cpu)
      * be read-only (ie write causes UNDEF exception).
      */
     {
-        ARMCPRegInfo id_cp_reginfo[] = {
-            /* Note that the MIDR isn't a simple constant register because
+        ARMCPRegInfo id_pre_v8_midr_cp_reginfo[] = {
+            /* Pre-v8 MIDR space.
+             * Note that the MIDR isn't a simple constant register because
              * of the TI925 behaviour where writes to another register can
              * cause the MIDR value to change.
              *
@@ -2113,22 +2370,6 @@ void register_cp_regs_for_features(ARMCPU *cpu)
               .writefn = arm_cp_write_ignore, .raw_writefn = raw_write,
               .fieldoffset = offsetof(CPUARMState, cp15.c0_cpuid),
               .type = ARM_CP_OVERRIDE },
-            { .name = "MIDR_EL1", .state = ARM_CP_STATE_AA64,
-              .opc0 = 3, .opc1 = 0, .opc2 = 0, .crn = 0, .crm = 0,
-              .access = PL1_R, .resetvalue = cpu->midr, .type = ARM_CP_CONST },
-            { .name = "CTR",
-              .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 1,
-              .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->ctr },
-            { .name = "CTR_EL0", .state = ARM_CP_STATE_AA64,
-              .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 0, .crm = 0,
-              .access = PL0_R, .accessfn = ctr_el0_access,
-              .type = ARM_CP_CONST, .resetvalue = cpu->ctr },
-            { .name = "TCMTR",
-              .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 2,
-              .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
-            { .name = "TLBTR",
-              .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 3,
-              .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
             /* crn = 0 op1 = 0 crm = 3..7 : currently unassigned; we RAZ. */
             { .name = "DUMMY",
               .cp = 15, .crn = 0, .crm = 3, .opc1 = 0, .opc2 = CP_ANY,
@@ -2147,6 +2388,37 @@ void register_cp_regs_for_features(ARMCPU *cpu)
               .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
             REGINFO_SENTINEL
         };
+        ARMCPRegInfo id_v8_midr_cp_reginfo[] = {
+            /* v8 MIDR -- the wildcard isn't necessary, and nor is the
+             * variable-MIDR TI925 behaviour. Instead we have a single
+             * (strictly speaking IMPDEF) alias of the MIDR, REVIDR.
+             */
+            { .name = "MIDR_EL1", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 0, .opc2 = 0,
+              .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->midr },
+            { .name = "REVIDR_EL1", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 0, .opc2 = 6,
+              .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->midr },
+            REGINFO_SENTINEL
+        };
+        ARMCPRegInfo id_cp_reginfo[] = {
+            /* These are common to v8 and pre-v8 */
+            { .name = "CTR",
+              .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 1,
+              .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->ctr },
+            { .name = "CTR_EL0", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 0, .crm = 0,
+              .access = PL0_R, .accessfn = ctr_el0_access,
+              .type = ARM_CP_CONST, .resetvalue = cpu->ctr },
+            /* TCMTR and TLBTR exist in v8 but have no 64-bit versions */
+            { .name = "TCMTR",
+              .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 2,
+              .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+            { .name = "TLBTR",
+              .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 3,
+              .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+            REGINFO_SENTINEL
+        };
         ARMCPRegInfo crn0_wi_reginfo = {
             .name = "CRN0_WI", .cp = 15, .crn = 0, .crm = CP_ANY,
             .opc1 = CP_ANY, .opc2 = CP_ANY, .access = PL1_W,
@@ -2161,10 +2433,19 @@ void register_cp_regs_for_features(ARMCPU *cpu)
              * UNDEF.
              */
             define_one_arm_cp_reg(cpu, &crn0_wi_reginfo);
+            for (r = id_pre_v8_midr_cp_reginfo;
+                 r->type != ARM_CP_SENTINEL; r++) {
+                r->access = PL1_RW;
+            }
             for (r = id_cp_reginfo; r->type != ARM_CP_SENTINEL; r++) {
                 r->access = PL1_RW;
             }
         }
+        if (arm_feature(env, ARM_FEATURE_V8)) {
+            define_arm_cp_regs(cpu, id_v8_midr_cp_reginfo);
+        } else {
+            define_arm_cp_regs(cpu, id_pre_v8_midr_cp_reginfo);
+        }
         define_arm_cp_regs(cpu, id_cp_reginfo);
     }
 
@@ -2174,7 +2455,8 @@ void register_cp_regs_for_features(ARMCPU *cpu)
 
     if (arm_feature(env, ARM_FEATURE_AUXCR)) {
         ARMCPRegInfo auxcr = {
-            .name = "AUXCR", .cp = 15, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 1,
+            .name = "ACTLR_EL1", .state = ARM_CP_STATE_BOTH,
+            .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 1,
             .access = PL1_RW, .type = ARM_CP_CONST,
             .resetvalue = cpu->reset_auxcr
         };
@@ -2182,12 +2464,39 @@ void register_cp_regs_for_features(ARMCPU *cpu)
     }
 
     if (arm_feature(env, ARM_FEATURE_CBAR)) {
-        ARMCPRegInfo cbar = {
-            .name = "CBAR", .cp = 15, .crn = 15, .crm = 0, .opc1 = 4, .opc2 = 0,
-            .access = PL1_R|PL3_W, .resetvalue = cpu->reset_cbar,
-            .fieldoffset = offsetof(CPUARMState, cp15.c15_config_base_address)
-        };
-        define_one_arm_cp_reg(cpu, &cbar);
+        if (arm_feature(env, ARM_FEATURE_AARCH64)) {
+            /* 32 bit view is [31:18] 0...0 [43:32]. */
+            uint32_t cbar32 = (extract64(cpu->reset_cbar, 18, 14) << 18)
+                | extract64(cpu->reset_cbar, 32, 12);
+            ARMCPRegInfo cbar_reginfo[] = {
+                { .name = "CBAR",
+                  .type = ARM_CP_CONST,
+                  .cp = 15, .crn = 15, .crm = 0, .opc1 = 4, .opc2 = 0,
+                  .access = PL1_R, .resetvalue = cpu->reset_cbar },
+                { .name = "CBAR_EL1", .state = ARM_CP_STATE_AA64,
+                  .type = ARM_CP_CONST,
+                  .opc0 = 3, .opc1 = 1, .crn = 15, .crm = 3, .opc2 = 0,
+                  .access = PL1_R, .resetvalue = cbar32 },
+                REGINFO_SENTINEL
+            };
+            /* We don't implement a r/w 64 bit CBAR currently */
+            assert(arm_feature(env, ARM_FEATURE_CBAR_RO));
+            define_arm_cp_regs(cpu, cbar_reginfo);
+        } else {
+            ARMCPRegInfo cbar = {
+                .name = "CBAR",
+                .cp = 15, .crn = 15, .crm = 0, .opc1 = 4, .opc2 = 0,
+                .access = PL1_R|PL3_W, .resetvalue = cpu->reset_cbar,
+                .fieldoffset = offsetof(CPUARMState,
+                                        cp15.c15_config_base_address)
+            };
+            if (arm_feature(env, ARM_FEATURE_CBAR_RO)) {
+                cbar.access = PL1_R;
+                cbar.fieldoffset = 0;
+                cbar.type = ARM_CP_CONST;
+            }
+            define_one_arm_cp_reg(cpu, &cbar);
+        }
     }
 
     /* Generic registers whose values depend on the implementation */
@@ -2684,12 +2993,11 @@ int arm_cpu_handle_mmu_fault(CPUState *cs, vaddr address, int rw,
     ARMCPU *cpu = ARM_CPU(cs);
     CPUARMState *env = &cpu->env;
 
+    env->exception.vaddress = address;
     if (rw == 2) {
         cs->exception_index = EXCP_PREFETCH_ABORT;
-        env->cp15.c6_insn = address;
     } else {
         cs->exception_index = EXCP_DATA_ABORT;
-        env->cp15.c6_data = address;
     }
     return 1;
 }
@@ -2846,37 +3154,6 @@ static void do_v7m_exception_exit(CPUARMState *env)
        pointer.  */
 }
 
-/* Exception names for debug logging; note that not all of these
- * precisely correspond to architectural exceptions.
- */
-static const char * const excnames[] = {
-    [EXCP_UDEF] = "Undefined Instruction",
-    [EXCP_SWI] = "SVC",
-    [EXCP_PREFETCH_ABORT] = "Prefetch Abort",
-    [EXCP_DATA_ABORT] = "Data Abort",
-    [EXCP_IRQ] = "IRQ",
-    [EXCP_FIQ] = "FIQ",
-    [EXCP_BKPT] = "Breakpoint",
-    [EXCP_EXCEPTION_EXIT] = "QEMU v7M exception exit",
-    [EXCP_KERNEL_TRAP] = "QEMU intercept of kernel commpage",
-    [EXCP_STREX] = "QEMU intercept of STREX",
-};
-
-static inline void arm_log_exception(int idx)
-{
-    if (qemu_loglevel_mask(CPU_LOG_INT)) {
-        const char *exc = NULL;
-
-        if (idx >= 0 && idx < ARRAY_SIZE(excnames)) {
-            exc = excnames[idx];
-        }
-        if (!exc) {
-            exc = "unknown";
-        }
-        qemu_log_mask(CPU_LOG_INT, "Taking exception %d [%s]\n", idx, exc);
-    }
-}
-
 void arm_v7m_cpu_do_interrupt(CPUState *cs)
 {
     ARMCPU *cpu = ARM_CPU(cs);
@@ -2907,6 +3184,9 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs)
         return;
     case EXCP_PREFETCH_ABORT:
     case EXCP_DATA_ABORT:
+        /* TODO: if we implemented the MPU registers, this is where we
+         * should set the MMFAR, etc from exception.fsr and exception.vaddress.
+         */
         armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_MEM);
         return;
     case EXCP_BKPT:
@@ -3021,19 +3301,26 @@ void arm_cpu_do_interrupt(CPUState *cs)
                 return;
             }
         }
-        env->cp15.c5_insn = 2;
+        env->exception.fsr = 2;
         /* Fall through to prefetch abort.  */
     case EXCP_PREFETCH_ABORT:
+        env->cp15.ifsr_el2 = env->exception.fsr;
+        env->cp15.far_el1 = deposit64(env->cp15.far_el1, 32, 32,
+                                      env->exception.vaddress);
         qemu_log_mask(CPU_LOG_INT, "...with IFSR 0x%x IFAR 0x%x\n",
-                      env->cp15.c5_insn, env->cp15.c6_insn);
+                      env->cp15.ifsr_el2, (uint32_t)env->exception.vaddress);
         new_mode = ARM_CPU_MODE_ABT;
         addr = 0x0c;
         mask = CPSR_A | CPSR_I;
         offset = 4;
         break;
     case EXCP_DATA_ABORT:
+        env->cp15.esr_el1 = env->exception.fsr;
+        env->cp15.far_el1 = deposit64(env->cp15.far_el1, 0, 32,
+                                      env->exception.vaddress);
         qemu_log_mask(CPU_LOG_INT, "...with DFSR 0x%x DFAR 0x%x\n",
-                      env->cp15.c5_data, env->cp15.c6_data);
+                      (uint32_t)env->cp15.esr_el1,
+                      (uint32_t)env->exception.vaddress);
         new_mode = ARM_CPU_MODE_ABT;
         addr = 0x10;
         mask = CPSR_A | CPSR_I;
@@ -3375,7 +3662,7 @@ typedef enum {
     permission_fault = 3,
 } MMUFaultType;
 
-static int get_phys_addr_lpae(CPUARMState *env, uint32_t address,
+static int get_phys_addr_lpae(CPUARMState *env, target_ulong address,
                               int access_type, int is_user,
                               hwaddr *phys_ptr, int *prot,
                               target_ulong *page_size_ptr)
@@ -3385,26 +3672,46 @@ static int get_phys_addr_lpae(CPUARMState *env, uint32_t address,
     MMUFaultType fault_type = translation_fault;
     uint32_t level = 1;
     uint32_t epd;
-    uint32_t tsz;
+    int32_t tsz;
+    uint32_t tg;
     uint64_t ttbr;
     int ttbr_select;
-    int n;
-    hwaddr descaddr;
+    hwaddr descaddr, descmask;
     uint32_t tableattrs;
     target_ulong page_size;
     uint32_t attrs;
+    int32_t granule_sz = 9;
+    int32_t va_size = 32;
+    int32_t tbi = 0;
+
+    if (arm_el_is_aa64(env, 1)) {
+        va_size = 64;
+        if (extract64(address, 55, 1))
+            tbi = extract64(env->cp15.c2_control, 38, 1);
+        else
+            tbi = extract64(env->cp15.c2_control, 37, 1);
+        tbi *= 8;
+    }
 
     /* Determine whether this address is in the region controlled by
      * TTBR0 or TTBR1 (or if it is in neither region and should fault).
      * This is a Non-secure PL0/1 stage 1 translation, so controlled by
      * TTBCR/TTBR0/TTBR1 in accordance with ARM ARM DDI0406C table B-32:
      */
-    uint32_t t0sz = extract32(env->cp15.c2_control, 0, 3);
-    uint32_t t1sz = extract32(env->cp15.c2_control, 16, 3);
-    if (t0sz && !extract32(address, 32 - t0sz, t0sz)) {
+    uint32_t t0sz = extract32(env->cp15.c2_control, 0, 6);
+    if (arm_el_is_aa64(env, 1)) {
+        t0sz = MIN(t0sz, 39);
+        t0sz = MAX(t0sz, 16);
+    }
+    uint32_t t1sz = extract32(env->cp15.c2_control, 16, 6);
+    if (arm_el_is_aa64(env, 1)) {
+        t1sz = MIN(t1sz, 39);
+        t1sz = MAX(t1sz, 16);
+    }
+    if (t0sz && !extract64(address, va_size - t0sz, t0sz - tbi)) {
         /* there is a ttbr0 region and we are in it (high bits all zero) */
         ttbr_select = 0;
-    } else if (t1sz && !extract32(~address, 32 - t1sz, t1sz)) {
+    } else if (t1sz && !extract64(~address, va_size - t1sz, t1sz - tbi)) {
         /* there is a ttbr1 region and we are in it (high bits all one) */
         ttbr_select = 1;
     } else if (!t0sz) {
@@ -3430,10 +3737,26 @@ static int get_phys_addr_lpae(CPUARMState *env, uint32_t address,
         ttbr = env->cp15.ttbr0_el1;
         epd = extract32(env->cp15.c2_control, 7, 1);
         tsz = t0sz;
+
+        tg = extract32(env->cp15.c2_control, 14, 2);
+        if (tg == 1) { /* 64KB pages */
+            granule_sz = 13;
+        }
+        if (tg == 2) { /* 16KB pages */
+            granule_sz = 11;
+        }
     } else {
         ttbr = env->cp15.ttbr1_el1;
         epd = extract32(env->cp15.c2_control, 23, 1);
         tsz = t1sz;
+
+        tg = extract32(env->cp15.c2_control, 30, 2);
+        if (tg == 3)  { /* 64KB pages */
+            granule_sz = 13;
+        }
+        if (tg == 1) { /* 16KB pages */
+            granule_sz = 11;
+        }
     }
 
     if (epd) {
@@ -3441,34 +3764,37 @@ static int get_phys_addr_lpae(CPUARMState *env, uint32_t address,
         goto do_fault;
     }
 
-    /* If the region is small enough we will skip straight to a 2nd level
-     * lookup. This affects the number of bits of the address used in
-     * combination with the TTBR to find the first descriptor. ('n' here
-     * matches the usage in the ARM ARM sB3.6.6, where bits [39..n] are
-     * from the TTBR, [n-1..3] from the vaddr, and [2..0] always zero).
+    /* The starting level depends on the virtual address size which can be
+     * up to 48-bits and the translation granule size.
      */
-    if (tsz > 1) {
-        level = 2;
-        n = 14 - tsz;
+    if ((va_size - tsz) > (granule_sz * 4 + 3)) {
+        level = 0;
+    } else if ((va_size - tsz) > (granule_sz * 3 + 3)) {
+        level = 1;
     } else {
-        n = 5 - tsz;
+        level = 2;
     }
 
     /* Clear the vaddr bits which aren't part of the within-region address,
      * so that we don't have to special case things when calculating the
      * first descriptor address.
      */
-    address &= (0xffffffffU >> tsz);
+    if (tsz) {
+        address &= (1ULL << (va_size - tsz)) - 1;
+    }
+
+    descmask = (1ULL << (granule_sz + 3)) - 1;
 
     /* Now we can extract the actual base address from the TTBR */
-    descaddr = extract64(ttbr, 0, 40);
-    descaddr &= ~((1ULL << n) - 1);
+    descaddr = extract64(ttbr, 0, 48);
+    descaddr &= ~((1ULL << (va_size - tsz - (granule_sz * (4 - level)))) - 1);
 
     tableattrs = 0;
     for (;;) {
         uint64_t descriptor;
 
-        descaddr |= ((address >> (9 * (4 - level))) & 0xff8);
+        descaddr |= (address >> (granule_sz * (4 - level))) & descmask;
+        descaddr &= ~7ULL;
         descriptor = ldq_phys(cs->as, descaddr);
         if (!(descriptor & 1) ||
             (!(descriptor & 2) && (level == 3))) {
@@ -3491,11 +3817,16 @@ static int get_phys_addr_lpae(CPUARMState *env, uint32_t address,
          * These are basically the same thing, although the number
          * of bits we pull in from the vaddr varies.
          */
-        page_size = (1 << (39 - (9 * level)));
+        page_size = (1 << ((granule_sz * (4 - level)) + 3));
         descaddr |= (address & (page_size - 1));
         /* Extract attributes from the descriptor and merge with table attrs */
-        attrs = extract64(descriptor, 2, 10)
-            | (extract64(descriptor, 52, 12) << 10);
+        if (arm_feature(env, ARM_FEATURE_V8)) {
+            attrs = extract64(descriptor, 2, 10)
+                | (extract64(descriptor, 53, 11) << 10);
+        } else {
+            attrs = extract64(descriptor, 2, 10)
+                | (extract64(descriptor, 52, 12) << 10);
+        }
         attrs |= extract32(tableattrs, 0, 2) << 11; /* XN, PXN */
         attrs |= extract32(tableattrs, 3, 1) << 5; /* APTable[1] => AP[2] */
         /* The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1
@@ -3569,9 +3900,9 @@ static int get_phys_addr_mpu(CPUARMState *env, uint32_t address,
 	return 2;
 
     if (access_type == 2) {
-	mask = env->cp15.c5_insn;
+        mask = env->cp15.pmsav5_insn_ap;
     } else {
-	mask = env->cp15.c5_data;
+        mask = env->cp15.pmsav5_data_ap;
     }
     mask = (mask >> (n * 4)) & 0xf;
     switch (mask) {
@@ -3629,7 +3960,7 @@ static int get_phys_addr_mpu(CPUARMState *env, uint32_t address,
  * @prot: set to the permissions for the page containing phys_ptr
  * @page_size: set to the size of the page containing phys_ptr
  */
-static inline int get_phys_addr(CPUARMState *env, uint32_t address,
+static inline int get_phys_addr(CPUARMState *env, target_ulong address,
                                 int access_type, int is_user,
                                 hwaddr *phys_ptr, int *prot,
                                 target_ulong *page_size)
@@ -3669,6 +4000,8 @@ int arm_cpu_handle_mmu_fault(CPUState *cs, vaddr address,
     target_ulong page_size;
     int prot;
     int ret, is_user;
+    uint32_t syn;
+    bool same_el = (arm_current_pl(env) != 0);
 
     is_user = mmu_idx == MMU_USER_IDX;
     ret = get_phys_addr(env, address, access_type, is_user, &phys_addr, &prot,
@@ -3676,22 +4009,31 @@ int arm_cpu_handle_mmu_fault(CPUState *cs, vaddr address,
     if (ret == 0) {
         /* Map a single [sub]page.  */
         phys_addr &= ~(hwaddr)0x3ff;
-        address &= ~(uint32_t)0x3ff;
+        address &= ~(target_ulong)0x3ff;
         tlb_set_page(cs, address, phys_addr, prot, mmu_idx, page_size);
         return 0;
     }
 
+    /* AArch64 syndrome does not have an LPAE bit */
+    syn = ret & ~(1 << 9);
+
+    /* For insn and data aborts we assume there is no instruction syndrome
+     * information; this is always true for exceptions reported to EL1.
+     */
     if (access_type == 2) {
-        env->cp15.c5_insn = ret;
-        env->cp15.c6_insn = address;
+        syn = syn_insn_abort(same_el, 0, 0, syn);
         cs->exception_index = EXCP_PREFETCH_ABORT;
     } else {
-        env->cp15.c5_data = ret;
-        if (access_type == 1 && arm_feature(env, ARM_FEATURE_V6))
-            env->cp15.c5_data |= (1 << 11);
-        env->cp15.c6_data = address;
+        syn = syn_data_abort(same_el, 0, 0, 0, access_type == 1, syn);
+        if (access_type == 1 && arm_feature(env, ARM_FEATURE_V6)) {
+            ret |= (1 << 11);
+        }
         cs->exception_index = EXCP_DATA_ABORT;
     }
+
+    env->exception.syndrome = syn;
+    env->exception.vaddress = address;
+    env->exception.fsr = ret;
     return 1;
 }
 
@@ -3842,6 +4184,88 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t reg, uint32_t val)
 
 #endif
 
+void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
+{
+    /* Implement DC ZVA, which zeroes a fixed-length block of memory.
+     * Note that we do not implement the (architecturally mandated)
+     * alignment fault for attempts to use this on Device memory
+     * (which matches the usual QEMU behaviour of not implementing either
+     * alignment faults or any memory attribute handling).
+     */
+
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    uint64_t blocklen = 4 << cpu->dcz_blocksize;
+    uint64_t vaddr = vaddr_in & ~(blocklen - 1);
+
+#ifndef CONFIG_USER_ONLY
+    {
+        /* Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than
+         * the block size so we might have to do more than one TLB lookup.
+         * We know that in fact for any v8 CPU the page size is at least 4K
+         * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only
+         * 1K as an artefact of legacy v5 subpage support being present in the
+         * same QEMU executable.
+         */
+        int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE);
+        void *hostaddr[maxidx];
+        int try, i;
+
+        for (try = 0; try < 2; try++) {
+
+            for (i = 0; i < maxidx; i++) {
+                hostaddr[i] = tlb_vaddr_to_host(env,
+                                                vaddr + TARGET_PAGE_SIZE * i,
+                                                1, cpu_mmu_index(env));
+                if (!hostaddr[i]) {
+                    break;
+                }
+            }
+            if (i == maxidx) {
+                /* If it's all in the TLB it's fair game for just writing to;
+                 * we know we don't need to update dirty status, etc.
+                 */
+                for (i = 0; i < maxidx - 1; i++) {
+                    memset(hostaddr[i], 0, TARGET_PAGE_SIZE);
+                }
+                memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE));
+                return;
+            }
+            /* OK, try a store and see if we can populate the tlb. This
+             * might cause an exception if the memory isn't writable,
+             * in which case we will longjmp out of here. We must for
+             * this purpose use the actual register value passed to us
+             * so that we get the fault address right.
+             */
+            helper_ret_stb_mmu(env, vaddr_in, 0, cpu_mmu_index(env), GETRA());
+            /* Now we can populate the other TLB entries, if any */
+            for (i = 0; i < maxidx; i++) {
+                uint64_t va = vaddr + TARGET_PAGE_SIZE * i;
+                if (va != (vaddr_in & TARGET_PAGE_MASK)) {
+                    helper_ret_stb_mmu(env, va, 0, cpu_mmu_index(env), GETRA());
+                }
+            }
+        }
+
+        /* Slow path (probably attempt to do this to an I/O device or
+         * similar, or clearing of a block of code we have translations
+         * cached for). Just do a series of byte writes as the architecture
+         * demands. It's not worth trying to use a cpu_physical_memory_map(),
+         * memset(), unmap() sequence here because:
+         *  + we'd need to account for the blocksize being larger than a page
+         *  + the direct-RAM access case is almost always going to be dealt
+         *    with in the fastpath code above, so there's no speed benefit
+         *  + we would have to deal with the map returning NULL because the
+         *    bounce buffer was in use
+         */
+        for (i = 0; i < blocklen; i++) {
+            helper_ret_stb_mmu(env, vaddr + i, 0, cpu_mmu_index(env), GETRA());
+        }
+    }
+#else
+    memset(g2h(vaddr), 0, blocklen);
+#endif
+}
+
 /* Note that signed overflow is undefined in C.  The following routines are
    careful to use unsigned types where modulo arithmetic is required.
    Failure to do so _will_ break on newer gcc.  */
diff --git a/target-arm/helper.h b/target-arm/helper.h
index 366c1b3ea5..a5449e7b6f 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -48,7 +48,8 @@ DEF_HELPER_FLAGS_2(usad8, TCG_CALL_NO_RWG_SE, i32, i32, i32)
 
 DEF_HELPER_FLAGS_3(sel_flags, TCG_CALL_NO_RWG_SE,
                    i32, i32, i32, i32)
-DEF_HELPER_2(exception, void, env, i32)
+DEF_HELPER_2(exception_internal, void, env, i32)
+DEF_HELPER_3(exception_with_syndrome, void, env, i32, i32)
 DEF_HELPER_1(wfi, void, env)
 DEF_HELPER_1(wfe, void, env)
 
@@ -58,13 +59,14 @@ DEF_HELPER_1(cpsr_read, i32, env)
 DEF_HELPER_3(v7m_msr, void, env, i32, i32)
 DEF_HELPER_2(v7m_mrs, i32, env, i32)
 
-DEF_HELPER_2(access_check_cp_reg, void, env, ptr)
+DEF_HELPER_3(access_check_cp_reg, void, env, ptr, i32)
 DEF_HELPER_3(set_cp_reg, void, env, ptr, i32)
 DEF_HELPER_2(get_cp_reg, i32, env, ptr)
 DEF_HELPER_3(set_cp_reg64, void, env, ptr, i64)
 DEF_HELPER_2(get_cp_reg64, i64, env, ptr)
 
 DEF_HELPER_3(msr_i_pstate, void, env, i32, i32)
+DEF_HELPER_1(exception_return, void, env)
 
 DEF_HELPER_2(get_r13_banked, i32, env, i32)
 DEF_HELPER_3(set_r13_banked, void, env, i32, i32)
@@ -514,6 +516,7 @@ DEF_HELPER_4(crypto_aesmc, void, env, i32, i32, i32)
 
 DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
 DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
+DEF_HELPER_2(dc_zva, void, env, i64)
 
 #ifdef TARGET_AARCH64
 #include "helper-a64.h"
diff --git a/target-arm/internals.h b/target-arm/internals.h
new file mode 100644
index 0000000000..d63a975a7e
--- /dev/null
+++ b/target-arm/internals.h
@@ -0,0 +1,267 @@
+/*
+ * QEMU ARM CPU -- internal functions and types
+ *
+ * Copyright (c) 2014 Linaro Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see
+ * <http://www.gnu.org/licenses/gpl-2.0.html>
+ *
+ * This header defines functions, types, etc which need to be shared
+ * between different source files within target-arm/ but which are
+ * private to it and not required by the rest of QEMU.
+ */
+
+#ifndef TARGET_ARM_INTERNALS_H
+#define TARGET_ARM_INTERNALS_H
+
+static inline bool excp_is_internal(int excp)
+{
+    /* Return true if this exception number represents a QEMU-internal
+     * exception that will not be passed to the guest.
+     */
+    return excp == EXCP_INTERRUPT
+        || excp == EXCP_HLT
+        || excp == EXCP_DEBUG
+        || excp == EXCP_HALTED
+        || excp == EXCP_EXCEPTION_EXIT
+        || excp == EXCP_KERNEL_TRAP
+        || excp == EXCP_STREX;
+}
+
+/* Exception names for debug logging; note that not all of these
+ * precisely correspond to architectural exceptions.
+ */
+static const char * const excnames[] = {
+    [EXCP_UDEF] = "Undefined Instruction",
+    [EXCP_SWI] = "SVC",
+    [EXCP_PREFETCH_ABORT] = "Prefetch Abort",
+    [EXCP_DATA_ABORT] = "Data Abort",
+    [EXCP_IRQ] = "IRQ",
+    [EXCP_FIQ] = "FIQ",
+    [EXCP_BKPT] = "Breakpoint",
+    [EXCP_EXCEPTION_EXIT] = "QEMU v7M exception exit",
+    [EXCP_KERNEL_TRAP] = "QEMU intercept of kernel commpage",
+    [EXCP_STREX] = "QEMU intercept of STREX",
+};
+
+static inline void arm_log_exception(int idx)
+{
+    if (qemu_loglevel_mask(CPU_LOG_INT)) {
+        const char *exc = NULL;
+
+        if (idx >= 0 && idx < ARRAY_SIZE(excnames)) {
+            exc = excnames[idx];
+        }
+        if (!exc) {
+            exc = "unknown";
+        }
+        qemu_log_mask(CPU_LOG_INT, "Taking exception %d [%s]\n", idx, exc);
+    }
+}
+
+/* Scale factor for generic timers, ie number of ns per tick.
+ * This gives a 62.5MHz timer.
+ */
+#define GTIMER_SCALE 16
+
+int bank_number(int mode);
+void switch_mode(CPUARMState *, int);
+void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu);
+void arm_translate_init(void);
+
+enum arm_fprounding {
+    FPROUNDING_TIEEVEN,
+    FPROUNDING_POSINF,
+    FPROUNDING_NEGINF,
+    FPROUNDING_ZERO,
+    FPROUNDING_TIEAWAY,
+    FPROUNDING_ODD
+};
+
+int arm_rmode_to_sf(int rmode);
+
+static inline void update_spsel(CPUARMState *env, uint32_t imm)
+{
+    /* Update PSTATE SPSel bit; this requires us to update the
+     * working stack pointer in xregs[31].
+     */
+    if (!((imm ^ env->pstate) & PSTATE_SP)) {
+        return;
+    }
+    env->pstate = deposit32(env->pstate, 0, 1, imm);
+
+    /* EL0 has no access rights to update SPSel, and this code
+     * assumes we are updating SP for EL1 while running as EL1.
+     */
+    assert(arm_current_pl(env) == 1);
+    if (env->pstate & PSTATE_SP) {
+        /* Switch from using SP_EL0 to using SP_ELx */
+        env->sp_el[0] = env->xregs[31];
+        env->xregs[31] = env->sp_el[1];
+    } else {
+        /* Switch from SP_EL0 to SP_ELx */
+        env->sp_el[1] = env->xregs[31];
+        env->xregs[31] = env->sp_el[0];
+    }
+}
+
+/* Valid Syndrome Register EC field values */
+enum arm_exception_class {
+    EC_UNCATEGORIZED          = 0x00,
+    EC_WFX_TRAP               = 0x01,
+    EC_CP15RTTRAP             = 0x03,
+    EC_CP15RRTTRAP            = 0x04,
+    EC_CP14RTTRAP             = 0x05,
+    EC_CP14DTTRAP             = 0x06,
+    EC_ADVSIMDFPACCESSTRAP    = 0x07,
+    EC_FPIDTRAP               = 0x08,
+    EC_CP14RRTTRAP            = 0x0c,
+    EC_ILLEGALSTATE           = 0x0e,
+    EC_AA32_SVC               = 0x11,
+    EC_AA32_HVC               = 0x12,
+    EC_AA32_SMC               = 0x13,
+    EC_AA64_SVC               = 0x15,
+    EC_AA64_HVC               = 0x16,
+    EC_AA64_SMC               = 0x17,
+    EC_SYSTEMREGISTERTRAP     = 0x18,
+    EC_INSNABORT              = 0x20,
+    EC_INSNABORT_SAME_EL      = 0x21,
+    EC_PCALIGNMENT            = 0x22,
+    EC_DATAABORT              = 0x24,
+    EC_DATAABORT_SAME_EL      = 0x25,
+    EC_SPALIGNMENT            = 0x26,
+    EC_AA32_FPTRAP            = 0x28,
+    EC_AA64_FPTRAP            = 0x2c,
+    EC_SERROR                 = 0x2f,
+    EC_BREAKPOINT             = 0x30,
+    EC_BREAKPOINT_SAME_EL     = 0x31,
+    EC_SOFTWARESTEP           = 0x32,
+    EC_SOFTWARESTEP_SAME_EL   = 0x33,
+    EC_WATCHPOINT             = 0x34,
+    EC_WATCHPOINT_SAME_EL     = 0x35,
+    EC_AA32_BKPT              = 0x38,
+    EC_VECTORCATCH            = 0x3a,
+    EC_AA64_BKPT              = 0x3c,
+};
+
+#define ARM_EL_EC_SHIFT 26
+#define ARM_EL_IL_SHIFT 25
+#define ARM_EL_IL (1 << ARM_EL_IL_SHIFT)
+
+/* Utility functions for constructing various kinds of syndrome value.
+ * Note that in general we follow the AArch64 syndrome values; in a
+ * few cases the value in HSR for exceptions taken to AArch32 Hyp
+ * mode differs slightly, so if we ever implemented Hyp mode then the
+ * syndrome value would need some massaging on exception entry.
+ * (One example of this is that AArch64 defaults to IL bit set for
+ * exceptions which don't specifically indicate information about the
+ * trapping instruction, whereas AArch32 defaults to IL bit clear.)
+ */
+static inline uint32_t syn_uncategorized(void)
+{
+    return (EC_UNCATEGORIZED << ARM_EL_EC_SHIFT) | ARM_EL_IL;
+}
+
+static inline uint32_t syn_aa64_svc(uint32_t imm16)
+{
+    return (EC_AA64_SVC << ARM_EL_EC_SHIFT) | ARM_EL_IL | (imm16 & 0xffff);
+}
+
+static inline uint32_t syn_aa32_svc(uint32_t imm16, bool is_thumb)
+{
+    return (EC_AA32_SVC << ARM_EL_EC_SHIFT) | (imm16 & 0xffff)
+        | (is_thumb ? 0 : ARM_EL_IL);
+}
+
+static inline uint32_t syn_aa64_bkpt(uint32_t imm16)
+{
+    return (EC_AA64_BKPT << ARM_EL_EC_SHIFT) | ARM_EL_IL | (imm16 & 0xffff);
+}
+
+static inline uint32_t syn_aa32_bkpt(uint32_t imm16, bool is_thumb)
+{
+    return (EC_AA32_BKPT << ARM_EL_EC_SHIFT) | (imm16 & 0xffff)
+        | (is_thumb ? 0 : ARM_EL_IL);
+}
+
+static inline uint32_t syn_aa64_sysregtrap(int op0, int op1, int op2,
+                                           int crn, int crm, int rt,
+                                           int isread)
+{
+    return (EC_SYSTEMREGISTERTRAP << ARM_EL_EC_SHIFT) | ARM_EL_IL
+        | (op0 << 20) | (op2 << 17) | (op1 << 14) | (crn << 10) | (rt << 5)
+        | (crm << 1) | isread;
+}
+
+static inline uint32_t syn_cp14_rt_trap(int cv, int cond, int opc1, int opc2,
+                                        int crn, int crm, int rt, int isread,
+                                        bool is_thumb)
+{
+    return (EC_CP14RTTRAP << ARM_EL_EC_SHIFT)
+        | (is_thumb ? 0 : ARM_EL_IL)
+        | (cv << 24) | (cond << 20) | (opc2 << 17) | (opc1 << 14)
+        | (crn << 10) | (rt << 5) | (crm << 1) | isread;
+}
+
+static inline uint32_t syn_cp15_rt_trap(int cv, int cond, int opc1, int opc2,
+                                        int crn, int crm, int rt, int isread,
+                                        bool is_thumb)
+{
+    return (EC_CP15RTTRAP << ARM_EL_EC_SHIFT)
+        | (is_thumb ? 0 : ARM_EL_IL)
+        | (cv << 24) | (cond << 20) | (opc2 << 17) | (opc1 << 14)
+        | (crn << 10) | (rt << 5) | (crm << 1) | isread;
+}
+
+static inline uint32_t syn_cp14_rrt_trap(int cv, int cond, int opc1, int crm,
+                                         int rt, int rt2, int isread,
+                                         bool is_thumb)
+{
+    return (EC_CP14RRTTRAP << ARM_EL_EC_SHIFT)
+        | (is_thumb ? 0 : ARM_EL_IL)
+        | (cv << 24) | (cond << 20) | (opc1 << 16)
+        | (rt2 << 10) | (rt << 5) | (crm << 1) | isread;
+}
+
+static inline uint32_t syn_cp15_rrt_trap(int cv, int cond, int opc1, int crm,
+                                         int rt, int rt2, int isread,
+                                         bool is_thumb)
+{
+    return (EC_CP15RRTTRAP << ARM_EL_EC_SHIFT)
+        | (is_thumb ? 0 : ARM_EL_IL)
+        | (cv << 24) | (cond << 20) | (opc1 << 16)
+        | (rt2 << 10) | (rt << 5) | (crm << 1) | isread;
+}
+
+static inline uint32_t syn_fp_access_trap(int cv, int cond, bool is_thumb)
+{
+    return (EC_ADVSIMDFPACCESSTRAP << ARM_EL_EC_SHIFT)
+        | (is_thumb ? 0 : ARM_EL_IL)
+        | (cv << 24) | (cond << 20);
+}
+
+static inline uint32_t syn_insn_abort(int same_el, int ea, int s1ptw, int fsc)
+{
+    return (EC_INSNABORT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT)
+        | (ea << 9) | (s1ptw << 7) | fsc;
+}
+
+static inline uint32_t syn_data_abort(int same_el, int ea, int cm, int s1ptw,
+                                      int wnr, int fsc)
+{
+    return (EC_DATAABORT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT)
+        | (ea << 9) | (cm << 8) | (s1ptw << 7) | (wnr << 6) | fsc;
+}
+
+#endif
diff --git a/target-arm/kvm32.c b/target-arm/kvm32.c
index a4fde07969..a690d9935f 100644
--- a/target-arm/kvm32.c
+++ b/target-arm/kvm32.c
@@ -21,6 +21,7 @@
 #include "sysemu/kvm.h"
 #include "kvm_arm.h"
 #include "cpu.h"
+#include "internals.h"
 #include "hw/arm/arm.h"
 
 static inline void set_feature(uint64_t *features, int feature)
@@ -294,6 +295,14 @@ typedef struct Reg {
         offsetof(CPUARMState, vfp.xregs[ARM_VFP_##R])      \
     }
 
+/* Like COREREG, but handle fields which are in a uint64_t in CPUARMState. */
+#define COREREG64(KERNELNAME, QEMUFIELD)                     \
+    {                                                        \
+        KVM_REG_ARM | KVM_REG_SIZE_U32 |                     \
+        KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(KERNELNAME), \
+        offsetoflow32(CPUARMState, QEMUFIELD)                \
+    }
+
 static const Reg regs[] = {
     /* R0_usr .. R14_usr */
     COREREG(usr_regs.uregs[0], regs[0]),
@@ -314,16 +323,16 @@ static const Reg regs[] = {
     /* R13, R14, SPSR for SVC, ABT, UND, IRQ banks */
     COREREG(svc_regs[0], banked_r13[1]),
     COREREG(svc_regs[1], banked_r14[1]),
-    COREREG(svc_regs[2], banked_spsr[1]),
+    COREREG64(svc_regs[2], banked_spsr[1]),
     COREREG(abt_regs[0], banked_r13[2]),
     COREREG(abt_regs[1], banked_r14[2]),
-    COREREG(abt_regs[2], banked_spsr[2]),
+    COREREG64(abt_regs[2], banked_spsr[2]),
     COREREG(und_regs[0], banked_r13[3]),
     COREREG(und_regs[1], banked_r14[3]),
-    COREREG(und_regs[2], banked_spsr[3]),
+    COREREG64(und_regs[2], banked_spsr[3]),
     COREREG(irq_regs[0], banked_r13[4]),
     COREREG(irq_regs[1], banked_r14[4]),
-    COREREG(irq_regs[2], banked_spsr[4]),
+    COREREG64(irq_regs[2], banked_spsr[4]),
     /* R8_fiq .. R14_fiq and SPSR_fiq */
     COREREG(fiq_regs[0], fiq_regs[0]),
     COREREG(fiq_regs[1], fiq_regs[1]),
@@ -332,7 +341,7 @@ static const Reg regs[] = {
     COREREG(fiq_regs[4], fiq_regs[4]),
     COREREG(fiq_regs[5], banked_r13[5]),
     COREREG(fiq_regs[6], banked_r14[5]),
-    COREREG(fiq_regs[7], banked_spsr[5]),
+    COREREG64(fiq_regs[7], banked_spsr[5]),
     /* R15 */
     COREREG(usr_regs.uregs[15], regs[15]),
     /* VFP system registers */
diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c
index 1b7ca90374..e115879d9a 100644
--- a/target-arm/kvm64.c
+++ b/target-arm/kvm64.c
@@ -121,8 +121,24 @@ int kvm_arch_put_registers(CPUState *cs, int level)
         }
     }
 
+    /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the
+     * QEMU side we keep the current SP in xregs[31] as well.
+     */
+    if (env->pstate & PSTATE_SP) {
+        env->sp_el[1] = env->xregs[31];
+    } else {
+        env->sp_el[0] = env->xregs[31];
+    }
+
     reg.id = AARCH64_CORE_REG(regs.sp);
-    reg.addr = (uintptr_t) &env->xregs[31];
+    reg.addr = (uintptr_t) &env->sp_el[0];
+    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
+    if (ret) {
+        return ret;
+    }
+
+    reg.id = AARCH64_CORE_REG(sp_el1);
+    reg.addr = (uintptr_t) &env->sp_el[1];
     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
     if (ret) {
         return ret;
@@ -144,10 +160,23 @@ int kvm_arch_put_registers(CPUState *cs, int level)
         return ret;
     }
 
+    reg.id = AARCH64_CORE_REG(elr_el1);
+    reg.addr = (uintptr_t) &env->elr_el1;
+    ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
+    if (ret) {
+        return ret;
+    }
+
+    for (i = 0; i < KVM_NR_SPSR; i++) {
+        reg.id = AARCH64_CORE_REG(spsr[i]);
+        reg.addr = (uintptr_t) &env->banked_spsr[i - 1];
+        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
+        if (ret) {
+            return ret;
+        }
+    }
+
     /* TODO:
-     * SP_EL1
-     * ELR_EL1
-     * SPSR[]
      * FP state
      * system registers
      */
@@ -174,7 +203,14 @@ int kvm_arch_get_registers(CPUState *cs)
     }
 
     reg.id = AARCH64_CORE_REG(regs.sp);
-    reg.addr = (uintptr_t) &env->xregs[31];
+    reg.addr = (uintptr_t) &env->sp_el[0];
+    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
+    if (ret) {
+        return ret;
+    }
+
+    reg.id = AARCH64_CORE_REG(sp_el1);
+    reg.addr = (uintptr_t) &env->sp_el[1];
     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
     if (ret) {
         return ret;
@@ -188,6 +224,15 @@ int kvm_arch_get_registers(CPUState *cs)
     }
     pstate_write(env, val);
 
+    /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the
+     * QEMU side we keep the current SP in xregs[31] as well.
+     */
+    if (env->pstate & PSTATE_SP) {
+        env->xregs[31] = env->sp_el[1];
+    } else {
+        env->xregs[31] = env->sp_el[0];
+    }
+
     reg.id = AARCH64_CORE_REG(regs.pc);
     reg.addr = (uintptr_t) &env->pc;
     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
@@ -195,6 +240,22 @@ int kvm_arch_get_registers(CPUState *cs)
         return ret;
     }
 
+    reg.id = AARCH64_CORE_REG(elr_el1);
+    reg.addr = (uintptr_t) &env->elr_el1;
+    ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
+    if (ret) {
+        return ret;
+    }
+
+    for (i = 0; i < KVM_NR_SPSR; i++) {
+        reg.id = AARCH64_CORE_REG(spsr[i]);
+        reg.addr = (uintptr_t) &env->banked_spsr[i - 1];
+        ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
+        if (ret) {
+            return ret;
+        }
+    }
+
     /* TODO: other registers */
     return ret;
 }
diff --git a/target-arm/machine.c b/target-arm/machine.c
index 7ced87af58..b967223fc0 100644
--- a/target-arm/machine.c
+++ b/target-arm/machine.c
@@ -222,9 +222,9 @@ static int cpu_post_load(void *opaque, int version_id)
 
 const VMStateDescription vmstate_arm_cpu = {
     .name = "cpu",
-    .version_id = 14,
-    .minimum_version_id = 14,
-    .minimum_version_id_old = 14,
+    .version_id = 17,
+    .minimum_version_id = 17,
+    .minimum_version_id_old = 17,
     .pre_save = cpu_pre_save,
     .post_load = cpu_post_load,
     .fields = (VMStateField[]) {
@@ -238,11 +238,13 @@ const VMStateDescription vmstate_arm_cpu = {
             .offset = 0,
         },
         VMSTATE_UINT32(env.spsr, ARMCPU),
-        VMSTATE_UINT32_ARRAY(env.banked_spsr, ARMCPU, 6),
+        VMSTATE_UINT64_ARRAY(env.banked_spsr, ARMCPU, 6),
         VMSTATE_UINT32_ARRAY(env.banked_r13, ARMCPU, 6),
         VMSTATE_UINT32_ARRAY(env.banked_r14, ARMCPU, 6),
         VMSTATE_UINT32_ARRAY(env.usr_regs, ARMCPU, 5),
         VMSTATE_UINT32_ARRAY(env.fiq_regs, ARMCPU, 5),
+        VMSTATE_UINT64(env.elr_el1, ARMCPU),
+        VMSTATE_UINT64_ARRAY(env.sp_el, ARMCPU, 2),
         /* The length-check must come before the arrays to avoid
          * incoming data possibly overflowing the array.
          */
@@ -257,6 +259,9 @@ const VMStateDescription vmstate_arm_cpu = {
         VMSTATE_UINT64(env.exclusive_val, ARMCPU),
         VMSTATE_UINT64(env.exclusive_high, ARMCPU),
         VMSTATE_UINT64(env.features, ARMCPU),
+        VMSTATE_UINT32(env.exception.syndrome, ARMCPU),
+        VMSTATE_UINT32(env.exception.fsr, ARMCPU),
+        VMSTATE_UINT64(env.exception.vaddress, ARMCPU),
         VMSTATE_TIMER(gt_timer[GTIMER_PHYS], ARMCPU),
         VMSTATE_TIMER(gt_timer[GTIMER_VIRT], ARMCPU),
         VMSTATE_END_OF_LIST()
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index 21ff58e754..57e7d9c480 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -18,6 +18,7 @@
  */
 #include "cpu.h"
 #include "helper.h"
+#include "internals.h"
 
 #define SIGNBIT (uint32_t)0x80000000
 #define SIGNBIT64 ((uint64_t)1 << 63)
@@ -243,11 +244,30 @@ void HELPER(wfe)(CPUARMState *env)
     cpu_loop_exit(cs);
 }
 
-void HELPER(exception)(CPUARMState *env, uint32_t excp)
+/* Raise an internal-to-QEMU exception. This is limited to only
+ * those EXCP values which are special cases for QEMU to interrupt
+ * execution and not to be used for exceptions which are passed to
+ * the guest (those must all have syndrome information and thus should
+ * use exception_with_syndrome).
+ */
+void HELPER(exception_internal)(CPUARMState *env, uint32_t excp)
+{
+    CPUState *cs = CPU(arm_env_get_cpu(env));
+
+    assert(excp_is_internal(excp));
+    cs->exception_index = excp;
+    cpu_loop_exit(cs);
+}
+
+/* Raise an exception with the specified syndrome register value */
+void HELPER(exception_with_syndrome)(CPUARMState *env, uint32_t excp,
+                                     uint32_t syndrome)
 {
     CPUState *cs = CPU(arm_env_get_cpu(env));
 
+    assert(!excp_is_internal(excp));
     cs->exception_index = excp;
+    env->exception.syndrome = syndrome;
     cpu_loop_exit(cs);
 }
 
@@ -293,17 +313,17 @@ void HELPER(set_user_reg)(CPUARMState *env, uint32_t regno, uint32_t val)
     }
 }
 
-void HELPER(access_check_cp_reg)(CPUARMState *env, void *rip)
+void HELPER(access_check_cp_reg)(CPUARMState *env, void *rip, uint32_t syndrome)
 {
     const ARMCPRegInfo *ri = rip;
     switch (ri->accessfn(env, ri)) {
     case CP_ACCESS_OK:
         return;
     case CP_ACCESS_TRAP:
+        env->exception.syndrome = syndrome;
+        break;
     case CP_ACCESS_TRAP_UNCATEGORIZED:
-        /* These cases will eventually need to generate different
-         * syndrome information.
-         */
+        env->exception.syndrome = syn_uncategorized();
         break;
     default:
         g_assert_not_reached();
@@ -351,7 +371,7 @@ void HELPER(msr_i_pstate)(CPUARMState *env, uint32_t op, uint32_t imm)
 
     switch (op) {
     case 0x05: /* SPSel */
-        env->pstate = deposit32(env->pstate, 0, 1, imm);
+        update_spsel(env, imm);
         break;
     case 0x1e: /* DAIFSet */
         env->daif |= (imm << 6) & PSTATE_DAIF;
@@ -364,6 +384,66 @@ void HELPER(msr_i_pstate)(CPUARMState *env, uint32_t op, uint32_t imm)
     }
 }
 
+void HELPER(exception_return)(CPUARMState *env)
+{
+    uint32_t spsr = env->banked_spsr[0];
+    int new_el, i;
+
+    if (env->pstate & PSTATE_SP) {
+        env->sp_el[1] = env->xregs[31];
+    } else {
+        env->sp_el[0] = env->xregs[31];
+    }
+
+    env->exclusive_addr = -1;
+
+    if (spsr & PSTATE_nRW) {
+        env->aarch64 = 0;
+        new_el = 0;
+        env->uncached_cpsr = 0x10;
+        cpsr_write(env, spsr, ~0);
+        for (i = 0; i < 15; i++) {
+            env->regs[i] = env->xregs[i];
+        }
+
+        env->regs[15] = env->elr_el1 & ~0x1;
+    } else {
+        new_el = extract32(spsr, 2, 2);
+        if (new_el > 1) {
+            /* Return to unimplemented EL */
+            goto illegal_return;
+        }
+        if (extract32(spsr, 1, 1)) {
+            /* Return with reserved M[1] bit set */
+            goto illegal_return;
+        }
+        if (new_el == 0 && (spsr & PSTATE_SP)) {
+            /* Return to EL1 with M[0] bit set */
+            goto illegal_return;
+        }
+        env->aarch64 = 1;
+        pstate_write(env, spsr);
+        env->xregs[31] = env->sp_el[new_el];
+        env->pc = env->elr_el1;
+    }
+
+    return;
+
+illegal_return:
+    /* Illegal return events of various kinds have architecturally
+     * mandated behaviour:
+     * restore NZCV and DAIF from SPSR_ELx
+     * set PSTATE.IL
+     * restore PC from ELR_ELx
+     * no change to exception level, execution state or stack pointer
+     */
+    env->pstate |= PSTATE_IL;
+    env->pc = env->elr_el1;
+    spsr &= PSTATE_NZCV | PSTATE_DAIF;
+    spsr |= pstate_read(env) & ~(PSTATE_NZCV | PSTATE_DAIF);
+    pstate_write(env, spsr);
+}
+
 /* ??? Flag setting arithmetic is awkward because we need to do comparisons.
    The only way to do that in TCG is a conditional branch, which clobbers
    all our temporaries.  For now implement these as helper functions.  */
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 9175e48797..d86b8ffa55 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -26,6 +26,7 @@
 #include "tcg-op.h"
 #include "qemu/log.h"
 #include "translate.h"
+#include "internals.h"
 #include "qemu/host-utils.h"
 
 #include "exec/gen-icount.h"
@@ -175,18 +176,37 @@ void gen_a64_set_pc_im(uint64_t val)
     tcg_gen_movi_i64(cpu_pc, val);
 }
 
-static void gen_exception(int excp)
+static void gen_exception_internal(int excp)
 {
-    TCGv_i32 tmp = tcg_temp_new_i32();
-    tcg_gen_movi_i32(tmp, excp);
-    gen_helper_exception(cpu_env, tmp);
-    tcg_temp_free_i32(tmp);
+    TCGv_i32 tcg_excp = tcg_const_i32(excp);
+
+    assert(excp_is_internal(excp));
+    gen_helper_exception_internal(cpu_env, tcg_excp);
+    tcg_temp_free_i32(tcg_excp);
+}
+
+static void gen_exception(int excp, uint32_t syndrome)
+{
+    TCGv_i32 tcg_excp = tcg_const_i32(excp);
+    TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
+
+    gen_helper_exception_with_syndrome(cpu_env, tcg_excp, tcg_syn);
+    tcg_temp_free_i32(tcg_syn);
+    tcg_temp_free_i32(tcg_excp);
+}
+
+static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
+{
+    gen_a64_set_pc_im(s->pc - offset);
+    gen_exception_internal(excp);
+    s->is_jmp = DISAS_EXC;
 }
 
-static void gen_exception_insn(DisasContext *s, int offset, int excp)
+static void gen_exception_insn(DisasContext *s, int offset, int excp,
+                               uint32_t syndrome)
 {
     gen_a64_set_pc_im(s->pc - offset);
-    gen_exception(excp);
+    gen_exception(excp, syndrome);
     s->is_jmp = DISAS_EXC;
 }
 
@@ -218,7 +238,7 @@ static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
     } else {
         gen_a64_set_pc_im(dest);
         if (s->singlestep_enabled) {
-            gen_exception(EXCP_DEBUG);
+            gen_exception_internal(EXCP_DEBUG);
         }
         tcg_gen_exit_tb(0);
         s->is_jmp = DISAS_JUMP;
@@ -227,7 +247,8 @@ static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 
 static void unallocated_encoding(DisasContext *s)
 {
-    gen_exception_insn(s, 4, EXCP_UDEF);
+    /* Unallocated and reserved encodings are uncategorized */
+    gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized());
 }
 
 #define unsupported_encoding(s, insn)                                    \
@@ -332,11 +353,30 @@ static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
     return v;
 }
 
+/* We should have at some point before trying to access an FP register
+ * done the necessary access check, so assert that
+ * (a) we did the check and
+ * (b) we didn't then just plough ahead anyway if it failed.
+ * Print the instruction pattern in the abort message so we can figure
+ * out what we need to fix if a user encounters this problem in the wild.
+ */
+static inline void assert_fp_access_checked(DisasContext *s)
+{
+#ifdef CONFIG_DEBUG_TCG
+    if (unlikely(!s->fp_access_checked || !s->cpacr_fpen)) {
+        fprintf(stderr, "target-arm: FP access check missing for "
+                "instruction 0x%08x\n", s->insn);
+        abort();
+    }
+#endif
+}
+
 /* Return the offset into CPUARMState of an element of specified
  * size, 'element' places in from the least significant end of
  * the FP/vector register Qn.
  */
-static inline int vec_reg_offset(int regno, int element, TCGMemOp size)
+static inline int vec_reg_offset(DisasContext *s, int regno,
+                                 int element, TCGMemOp size)
 {
     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 #ifdef HOST_WORDS_BIGENDIAN
@@ -351,6 +391,7 @@ static inline int vec_reg_offset(int regno, int element, TCGMemOp size)
 #else
     offs += element * (1 << size);
 #endif
+    assert_fp_access_checked(s);
     return offs;
 }
 
@@ -359,18 +400,20 @@ static inline int vec_reg_offset(int regno, int element, TCGMemOp size)
  * Dn, Sn, Hn or Bn).
  * (Note that this is not the same mapping as for A32; see cpu.h)
  */
-static inline int fp_reg_offset(int regno, TCGMemOp size)
+static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
 {
     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 #ifdef HOST_WORDS_BIGENDIAN
     offs += (8 - (1 << size));
 #endif
+    assert_fp_access_checked(s);
     return offs;
 }
 
 /* Offset of the high half of the 128 bit vector Qn */
-static inline int fp_reg_hi_offset(int regno)
+static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 {
+    assert_fp_access_checked(s);
     return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
 }
 
@@ -384,7 +427,7 @@ static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 {
     TCGv_i64 v = tcg_temp_new_i64();
 
-    tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(reg, MO_64));
+    tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
     return v;
 }
 
@@ -392,7 +435,7 @@ static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 {
     TCGv_i32 v = tcg_temp_new_i32();
 
-    tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(reg, MO_32));
+    tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
     return v;
 }
 
@@ -400,8 +443,8 @@ static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 {
     TCGv_i64 tcg_zero = tcg_const_i64(0);
 
-    tcg_gen_st_i64(v, cpu_env, fp_reg_offset(reg, MO_64));
-    tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(reg));
+    tcg_gen_st_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
+    tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(s, reg));
     tcg_temp_free_i64(tcg_zero);
 }
 
@@ -672,14 +715,14 @@ static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 {
     /* This writes the bottom N bits of a 128 bit wide vector to memory */
     TCGv_i64 tmp = tcg_temp_new_i64();
-    tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(srcidx, MO_64));
+    tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
     if (size < 4) {
         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TE + size);
     } else {
         TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TEQ);
         tcg_gen_qemu_st64(tmp, tcg_addr, get_mem_index(s));
-        tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(srcidx));
+        tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
         tcg_gen_qemu_st_i64(tmp, tcg_hiaddr, get_mem_index(s), MO_TEQ);
         tcg_temp_free_i64(tcg_hiaddr);
@@ -712,8 +755,8 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
         tcg_temp_free_i64(tcg_hiaddr);
     }
 
-    tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(destidx, MO_64));
-    tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(destidx));
+    tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
+    tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
 
     tcg_temp_free_i64(tmplo);
     tcg_temp_free_i64(tmphi);
@@ -735,7 +778,7 @@ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
                              int element, TCGMemOp memop)
 {
-    int vect_off = vec_reg_offset(srcidx, element, memop & MO_SIZE);
+    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
     switch (memop) {
     case MO_8:
         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
@@ -767,7 +810,7 @@ static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
                                  int element, TCGMemOp memop)
 {
-    int vect_off = vec_reg_offset(srcidx, element, memop & MO_SIZE);
+    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
     switch (memop) {
     case MO_8:
         tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
@@ -794,7 +837,7 @@ static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
                               int element, TCGMemOp memop)
 {
-    int vect_off = vec_reg_offset(destidx, element, memop & MO_SIZE);
+    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
     switch (memop) {
     case MO_8:
         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
@@ -816,7 +859,7 @@ static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
                                   int destidx, int element, TCGMemOp memop)
 {
-    int vect_off = vec_reg_offset(destidx, element, memop & MO_SIZE);
+    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
     switch (memop) {
     case MO_8:
         tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
@@ -869,6 +912,26 @@ static void do_vec_ld(DisasContext *s, int destidx, int element,
     tcg_temp_free_i64(tcg_tmp);
 }
 
+/* Check that FP/Neon access is enabled. If it is, return
+ * true. If not, emit code to generate an appropriate exception,
+ * and return false; the caller should not emit any code for
+ * the instruction. Note that this check must happen after all
+ * unallocated-encoding checks (otherwise the syndrome information
+ * for the resulting exception will be incorrect).
+ */
+static inline bool fp_access_check(DisasContext *s)
+{
+    assert(!s->fp_access_checked);
+    s->fp_access_checked = true;
+
+    if (s->cpacr_fpen) {
+        return true;
+    }
+
+    gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false));
+    return false;
+}
+
 /*
  * This utility function is for doing register extension with an
  * optional shift. You will likely want to pass a temporary for the
@@ -1241,10 +1304,16 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
          * runtime; this may result in an exception.
          */
         TCGv_ptr tmpptr;
+        TCGv_i32 tcg_syn;
+        uint32_t syndrome;
+
         gen_a64_set_pc_im(s->pc - 4);
         tmpptr = tcg_const_ptr(ri);
-        gen_helper_access_check_cp_reg(cpu_env, tmpptr);
+        syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
+        tcg_syn = tcg_const_i32(syndrome);
+        gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn);
         tcg_temp_free_ptr(tmpptr);
+        tcg_temp_free_i32(tcg_syn);
     }
 
     /* Handle special cases first */
@@ -1266,6 +1335,11 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
         tcg_rt = cpu_reg(s, rt);
         tcg_gen_movi_i64(tcg_rt, s->current_pl << 2);
         return;
+    case ARM_CP_DC_ZVA:
+        /* Writes clear the aligned block of memory which rt points into. */
+        tcg_rt = cpu_reg(s, rt);
+        gen_helper_dc_zva(cpu_env, tcg_rt);
+        return;
     default:
         break;
     }
@@ -1366,6 +1440,7 @@ static void disas_exc(DisasContext *s, uint32_t insn)
 {
     int opc = extract32(insn, 21, 3);
     int op2_ll = extract32(insn, 0, 5);
+    int imm16 = extract32(insn, 5, 16);
 
     switch (opc) {
     case 0:
@@ -1376,7 +1451,7 @@ static void disas_exc(DisasContext *s, uint32_t insn)
             unallocated_encoding(s);
             break;
         }
-        gen_exception_insn(s, 0, EXCP_SWI);
+        gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16));
         break;
     case 1:
         if (op2_ll != 0) {
@@ -1384,7 +1459,7 @@ static void disas_exc(DisasContext *s, uint32_t insn)
             break;
         }
         /* BRK */
-        gen_exception_insn(s, 0, EXCP_BKPT);
+        gen_exception_insn(s, 0, EXCP_BKPT, syn_aa64_bkpt(imm16));
         break;
     case 2:
         if (op2_ll != 0) {
@@ -1437,6 +1512,9 @@ static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
         break;
     case 4: /* ERET */
+        gen_helper_exception_return(cpu_env);
+        s->is_jmp = DISAS_JUMP;
+        return;
     case 5: /* DRPS */
         if (rn != 0x1f) {
             unallocated_encoding(s);
@@ -1533,7 +1611,7 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
     tcg_gen_mov_i64(cpu_exclusive_test, addr);
     tcg_gen_movi_i32(cpu_exclusive_info,
                      size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 << 14));
-    gen_exception_insn(s, 4, EXCP_STREX);
+    gen_exception_internal_insn(s, 4, EXCP_STREX);
 }
 #else
 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
@@ -1700,6 +1778,9 @@ static void disas_ld_lit(DisasContext *s, uint32_t insn)
             return;
         }
         size = 2 + opc;
+        if (!fp_access_check(s)) {
+            return;
+        }
     } else {
         if (opc == 3) {
             /* PRFM (literal) : prefetch */
@@ -1809,6 +1890,10 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
         break;
     }
 
+    if (is_vector && !fp_access_check(s)) {
+        return;
+    }
+
     offset <<= size;
 
     if (rn == 31) {
@@ -1902,6 +1987,9 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
             return;
         }
         is_store = ((opc & 1) == 0);
+        if (!fp_access_check(s)) {
+            return;
+        }
     } else {
         if (size == 3 && opc == 2) {
             /* PRFM - prefetch */
@@ -2022,6 +2110,9 @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn)
             return;
         }
         is_store = !extract32(opc, 0, 1);
+        if (!fp_access_check(s)) {
+            return;
+        }
     } else {
         if (size == 3 && opc == 2) {
             /* PRFM - prefetch */
@@ -2102,6 +2193,9 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn)
             return;
         }
         is_store = !extract32(opc, 0, 1);
+        if (!fp_access_check(s)) {
+            return;
+        }
     } else {
         if (size == 3 && opc == 2) {
             /* PRFM - prefetch */
@@ -2244,6 +2338,10 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     if (rn == 31) {
         gen_check_sp_alignment(s);
     }
@@ -2370,6 +2468,10 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
         g_assert_not_reached();
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     ebytes = 1 << scale;
 
     if (rn == 31) {
@@ -3846,6 +3948,10 @@ static void disas_fp_compare(DisasContext *s, uint32_t insn)
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
 }
 
@@ -3874,6 +3980,10 @@ static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     if (cond < 0x0e) { /* not always */
         int label_match = gen_new_label();
         label_continue = gen_new_label();
@@ -3930,6 +4040,10 @@ static void disas_fp_csel(DisasContext *s, uint32_t insn)
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     if (cond < 0x0e) { /* not always */
         int label_match = gen_new_label();
         label_continue = gen_new_label();
@@ -4147,6 +4261,10 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn)
             unallocated_encoding(s);
             return;
         }
+        if (!fp_access_check(s)) {
+            return;
+        }
+
         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
         break;
     }
@@ -4156,9 +4274,17 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn)
         /* 32-to-32 and 64-to-64 ops */
         switch (type) {
         case 0:
+            if (!fp_access_check(s)) {
+                return;
+            }
+
             handle_fp_1src_single(s, opcode, rd, rn);
             break;
         case 1:
+            if (!fp_access_check(s)) {
+                return;
+            }
+
             handle_fp_1src_double(s, opcode, rd, rn);
             break;
         default:
@@ -4298,9 +4424,15 @@ static void disas_fp_2src(DisasContext *s, uint32_t insn)
 
     switch (type) {
     case 0:
+        if (!fp_access_check(s)) {
+            return;
+        }
         handle_fp_2src_single(s, opcode, rd, rn, rm);
         break;
     case 1:
+        if (!fp_access_check(s)) {
+            return;
+        }
         handle_fp_2src_double(s, opcode, rd, rn, rm);
         break;
     default:
@@ -4402,9 +4534,15 @@ static void disas_fp_3src(DisasContext *s, uint32_t insn)
 
     switch (type) {
     case 0:
+        if (!fp_access_check(s)) {
+            return;
+        }
         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
         break;
     case 1:
+        if (!fp_access_check(s)) {
+            return;
+        }
         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
         break;
     default:
@@ -4431,6 +4569,10 @@ static void disas_fp_imm(DisasContext *s, uint32_t insn)
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     /* The imm8 encodes the sign bit, enough bits to represent
      * an exponent in the range 01....1xx to 10....0xx,
      * and the most significant 4 bits of the mantissa; see
@@ -4617,6 +4759,10 @@ static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
 }
 
@@ -4635,9 +4781,9 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
             /* 32 bit */
             TCGv_i64 tmp = tcg_temp_new_i64();
             tcg_gen_ext32u_i64(tmp, tcg_rn);
-            tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(rd, MO_64));
+            tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64));
             tcg_gen_movi_i64(tmp, 0);
-            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(rd));
+            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
             tcg_temp_free_i64(tmp);
             break;
         }
@@ -4645,14 +4791,14 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
         {
             /* 64 bit */
             TCGv_i64 tmp = tcg_const_i64(0);
-            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(rd, MO_64));
-            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(rd));
+            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64));
+            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
             tcg_temp_free_i64(tmp);
             break;
         }
         case 2:
             /* 64 bit to top half. */
-            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(rd));
+            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
             break;
         }
     } else {
@@ -4661,15 +4807,15 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
         switch (type) {
         case 0:
             /* 32 bit */
-            tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(rn, MO_32));
+            tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
             break;
         case 1:
             /* 64 bit */
-            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(rn, MO_64));
+            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
             break;
         case 2:
             /* 64 bits from top half */
-            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(rn));
+            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
             break;
         }
     }
@@ -4716,6 +4862,9 @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
             break;
         }
 
+        if (!fp_access_check(s)) {
+            return;
+        }
         handle_fmov(s, rd, rn, type, itof);
     } else {
         /* actual FP conversions */
@@ -4726,6 +4875,9 @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
             return;
         }
 
+        if (!fp_access_check(s)) {
+            return;
+        }
         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
     }
 }
@@ -4826,6 +4978,10 @@ static void disas_simd_ext(DisasContext *s, uint32_t insn)
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     tcg_resh = tcg_temp_new_i64();
     tcg_resl = tcg_temp_new_i64();
 
@@ -4896,6 +5052,10 @@ static void disas_simd_tb(DisasContext *s, uint32_t insn)
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     /* This does a table lookup: for every byte element in the input
      * we index into a table formed from up to four vector registers,
      * and then the output is the result of the lookups. Our helper
@@ -4966,6 +5126,10 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     tcg_resl = tcg_const_i64(0);
     tcg_resh = tcg_const_i64(0);
     tcg_res = tcg_temp_new_i64();
@@ -5099,6 +5263,10 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     esize = 8 << size;
     elements = (is_q ? 128 : 64) / esize;
 
@@ -5231,6 +5399,10 @@ static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     index = imm5 >> (size + 1);
 
     tmp = tcg_temp_new_i64();
@@ -5265,6 +5437,10 @@ static void handle_simd_dupes(DisasContext *s, int rd, int rn,
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     index = imm5 >> (size + 1);
 
     /* This instruction just extracts the specified element and
@@ -5297,6 +5473,11 @@ static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
         unallocated_encoding(s);
         return;
     }
+
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     for (i = 0; i < elements; i++) {
         write_vec_element(s, cpu_reg(s, rn), rd, i, size);
     }
@@ -5326,6 +5507,11 @@ static void handle_simd_inse(DisasContext *s, int rd, int rn,
         unallocated_encoding(s);
         return;
     }
+
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     dst_index = extract32(imm5, 1+size, 5);
     src_index = extract32(imm4, size, 4);
 
@@ -5358,6 +5544,10 @@ static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     idx = extract32(imm5, 1 + size, 4 - size);
     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
 }
@@ -5395,6 +5585,11 @@ static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
             return;
         }
     }
+
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     element = extract32(imm5, 1+size, 4);
 
     tcg_rd = cpu_reg(s, rd);
@@ -5487,6 +5682,10 @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     /* See AdvSIMDExpandImm() in ARM ARM */
     switch (cmode_3_1) {
     case 0: /* Replicate(Zeros(24):imm8, 2) */
@@ -5561,7 +5760,7 @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
     tcg_rd = new_tmp_a64(s);
 
     for (i = 0; i < 2; i++) {
-        int foffs = i ? fp_reg_hi_offset(rd) : fp_reg_offset(rd, MO_64);
+        int foffs = i ? fp_reg_hi_offset(s, rd) : fp_reg_offset(s, rd, MO_64);
 
         if (i == 1 && !is_q) {
             /* non-quad ops clear high half of vector */
@@ -5635,6 +5834,10 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
             unallocated_encoding(s);
             return;
         }
+        if (!fp_access_check(s)) {
+            return;
+        }
+
         TCGV_UNUSED_PTR(fpst);
         break;
     case 0xc: /* FMAXNMP */
@@ -5647,6 +5850,10 @@ static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
             unallocated_encoding(s);
             return;
         }
+        if (!fp_access_check(s)) {
+            return;
+        }
+
         size = extract32(size, 0, 1) ? 3 : 2;
         fpst = get_fpstatus_ptr();
         break;
@@ -5865,6 +6072,10 @@ static void handle_scalar_simd_shri(DisasContext *s,
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     switch (opcode) {
     case 0x02: /* SSRA / USRA (accumulate) */
         accumulate = true;
@@ -5922,6 +6133,10 @@ static void handle_scalar_simd_shli(DisasContext *s, bool insert,
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     tcg_rn = read_fp_dreg(s, rn);
     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
 
@@ -5977,6 +6192,10 @@ static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     if (is_u_shift) {
         narrowfn = unsigned_narrow_fns[size];
     } else {
@@ -6059,6 +6278,10 @@ static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
         }
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     if (size == 3) {
         TCGv_i64 tcg_shift = tcg_const_i64(shift);
         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
@@ -6219,6 +6442,11 @@ static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
             return;
         }
     }
+
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     /* immh == 0 would be a failure of the decode logic */
     g_assert(immh);
 
@@ -6247,6 +6475,10 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     assert(!(is_scalar && is_q));
 
     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
@@ -6410,6 +6642,10 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     if (size == 2) {
         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
@@ -6794,6 +7030,10 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
             return;
         }
 
+        if (!fp_access_check(s)) {
+            return;
+        }
+
         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
         return;
     }
@@ -6826,6 +7066,10 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     tcg_rd = tcg_temp_new_i64();
 
     if (size == 3) {
@@ -7029,7 +7273,13 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
                                    int size, int rn, int rd)
 {
     bool is_double = (size == 3);
-    TCGv_ptr fpst = get_fpstatus_ptr();
+    TCGv_ptr fpst;
+
+    if (!fp_access_check(s)) {
+        return;
+    }
+
+    fpst = get_fpstatus_ptr();
 
     if (is_double) {
         TCGv_i64 tcg_op = tcg_temp_new_i64();
@@ -7436,6 +7686,9 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
 
     switch (opcode) {
     case 0x3: /* USQADD / SUQADD*/
+        if (!fp_access_check(s)) {
+            return;
+        }
         handle_2misc_satacc(s, true, u, false, size, rn, rd);
         return;
     case 0x7: /* SQABS / SQNEG */
@@ -7455,7 +7708,7 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
         }
         break;
     case 0x12: /* SQXTUN */
-        if (u) {
+        if (!u) {
             unallocated_encoding(s);
             return;
         }
@@ -7465,6 +7718,9 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
             unallocated_encoding(s);
             return;
         }
+        if (!fp_access_check(s)) {
+            return;
+        }
         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
         return;
     case 0xc ... 0xf:
@@ -7487,12 +7743,18 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
         case 0x5d: /* UCVTF */
         {
             bool is_signed = (opcode == 0x1d);
+            if (!fp_access_check(s)) {
+                return;
+            }
             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
             return;
         }
         case 0x3d: /* FRECPE */
         case 0x3f: /* FRECPX */
         case 0x7d: /* FRSQRTE */
+            if (!fp_access_check(s)) {
+                return;
+            }
             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
             return;
         case 0x1a: /* FCVTNS */
@@ -7517,6 +7779,9 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
                 unallocated_encoding(s);
                 return;
             }
+            if (!fp_access_check(s)) {
+                return;
+            }
             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
             return;
         default:
@@ -7529,6 +7794,10 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     if (is_fcvt) {
         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
@@ -7632,6 +7901,10 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     switch (opcode) {
     case 0x02: /* SSRA / USRA (accumulate) */
         accumulate = true;
@@ -7703,6 +7976,10 @@ static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     for (i = 0; i < elements; i++) {
         read_vec_element(s, tcg_rn, rn, i, size);
         if (insert) {
@@ -7738,6 +8015,10 @@ static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     /* For the LL variants the store is larger than the load,
      * so if rd == rn we would overwrite parts of our input.
      * So load everything right now and use shifts in the main loop.
@@ -7772,6 +8053,10 @@ static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     tcg_rn = tcg_temp_new_i64();
     tcg_rd = tcg_temp_new_i64();
     tcg_final = tcg_temp_new_i64();
@@ -8268,6 +8553,9 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
             unallocated_encoding(s);
             return;
         }
+        if (!fp_access_check(s)) {
+            return;
+        }
         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
         break;
     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
@@ -8277,6 +8565,9 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
             unallocated_encoding(s);
             return;
         }
+        if (!fp_access_check(s)) {
+            return;
+        }
         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
         break;
     case 14: /* PMULL, PMULL2 */
@@ -8289,6 +8580,9 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
                 unallocated_encoding(s);
                 return;
             }
+            if (!fp_access_check(s)) {
+                return;
+            }
             handle_pmull_64(s, is_q, rd, rn, rm);
             return;
         }
@@ -8314,6 +8608,10 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
             return;
         }
     is_widening:
+        if (!fp_access_check(s)) {
+            return;
+        }
+
         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
         break;
     default:
@@ -8332,11 +8630,15 @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
     int size = extract32(insn, 22, 2);
     bool is_u = extract32(insn, 29, 1);
     bool is_q = extract32(insn, 30, 1);
-    TCGv_i64 tcg_op1 = tcg_temp_new_i64();
-    TCGv_i64 tcg_op2 = tcg_temp_new_i64();
-    TCGv_i64 tcg_res[2];
+    TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
     int pass;
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
+    tcg_op1 = tcg_temp_new_i64();
+    tcg_op2 = tcg_temp_new_i64();
     tcg_res[0] = tcg_temp_new_i64();
     tcg_res[1] = tcg_temp_new_i64();
 
@@ -8439,6 +8741,10 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
         TCGV_UNUSED_PTR(fpst);
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     /* These operations work on the concatenated rm:rn, with each pair of
      * adjacent elements being operated on to produce an element in the result.
      */
@@ -8631,6 +8937,10 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
     case 0x5f: /* FDIV */
     case 0x7a: /* FABD */
     case 0x7c: /* FCMGT */
+        if (!fp_access_check(s)) {
+            return;
+        }
+
         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
         return;
     default:
@@ -8685,6 +8995,10 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
         break;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     if (size == 3) {
         for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
@@ -9049,6 +9363,10 @@ static void handle_rev(DisasContext *s, int opcode, bool u,
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     if (size == 0) {
         /* Special case bytes, use bswap op on each group of elements */
         int groups = dsize / (8 << grp_size);
@@ -9251,6 +9569,10 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
             unallocated_encoding(s);
             return;
         }
+        if (!fp_access_check(s)) {
+            return;
+        }
+
         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
         return;
     case 0x4: /* CLS, CLZ */
@@ -9265,6 +9587,9 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
             unallocated_encoding(s);
             return;
         }
+        if (!fp_access_check(s)) {
+            return;
+        }
         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
         return;
     case 0x13: /* SHLL, SHLL2 */
@@ -9272,6 +9597,9 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
             unallocated_encoding(s);
             return;
         }
+        if (!fp_access_check(s)) {
+            return;
+        }
         handle_shll(s, is_q, size, rn, rd);
         return;
     case 0xa: /* CMLT */
@@ -9293,6 +9621,9 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
             unallocated_encoding(s);
             return;
         }
+        if (!fp_access_check(s)) {
+            return;
+        }
         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
         return;
     case 0x7: /* SQABS, SQNEG */
@@ -9328,6 +9659,9 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
                 unallocated_encoding(s);
                 return;
             }
+            if (!fp_access_check(s)) {
+                return;
+            }
             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
             return;
         }
@@ -9386,6 +9720,9 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
                 unallocated_encoding(s);
                 return;
             }
+            if (!fp_access_check(s)) {
+                return;
+            }
             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
             return;
         case 0x56: /* FCVTXN, FCVTXN2 */
@@ -9398,9 +9735,15 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
             /* handle_2misc_narrow does a 2*size -> size operation, but these
              * instructions encode the source size rather than dest size.
              */
+            if (!fp_access_check(s)) {
+                return;
+            }
             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
             return;
         case 0x17: /* FCVTL, FCVTL2 */
+            if (!fp_access_check(s)) {
+                return;
+            }
             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
             return;
         case 0x18: /* FRINTN */
@@ -9445,6 +9788,10 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     if (need_fpstatus) {
         tcg_fpstatus = get_fpstatus_ptr();
     } else {
@@ -9808,6 +10155,10 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
         }
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     if (is_fp) {
         fpst = get_fpstatus_ptr();
     } else {
@@ -10239,6 +10590,8 @@ static void disas_a64_insn(CPUARMState *env, DisasContext *s)
     s->insn = insn;
     s->pc += 4;
 
+    s->fp_access_checked = false;
+
     switch (extract32(insn, 25, 4)) {
     case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
         unallocated_encoding(s);
@@ -10306,7 +10659,7 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
 #if !defined(CONFIG_USER_ONLY)
     dc->user = (ARM_TBFLAG_AA64_EL(tb->flags) == 0);
 #endif
-    dc->vfp_enabled = 0;
+    dc->cpacr_fpen = ARM_TBFLAG_AA64_FPEN(tb->flags);
     dc->vec_len = 0;
     dc->vec_stride = 0;
     dc->cp_regs = cpu->cp_regs;
@@ -10331,7 +10684,7 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
         if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
             QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
                 if (bp->pc == dc->pc) {
-                    gen_exception_insn(dc, 0, EXCP_DEBUG);
+                    gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
                     /* Advance PC so that clearing the breakpoint will
                        invalidate this TB.  */
                     dc->pc += 2;
@@ -10394,7 +10747,7 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
         if (dc->is_jmp != DISAS_JUMP) {
             gen_a64_set_pc_im(dc->pc);
         }
-        gen_exception(EXCP_DEBUG);
+        gen_exception_internal(EXCP_DEBUG);
     } else {
         switch (dc->is_jmp) {
         case DISAS_NEXT:
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 56e3b4bf7f..a4d920b629 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -25,6 +25,7 @@
 #include <inttypes.h>
 
 #include "cpu.h"
+#include "internals.h"
 #include "disas/disas.h"
 #include "tcg-op.h"
 #include "qemu/log.h"
@@ -182,12 +183,23 @@ static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
 /* Set NZCV flags from the high 4 bits of var.  */
 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
 
-static void gen_exception(int excp)
+static void gen_exception_internal(int excp)
 {
-    TCGv_i32 tmp = tcg_temp_new_i32();
-    tcg_gen_movi_i32(tmp, excp);
-    gen_helper_exception(cpu_env, tmp);
-    tcg_temp_free_i32(tmp);
+    TCGv_i32 tcg_excp = tcg_const_i32(excp);
+
+    assert(excp_is_internal(excp));
+    gen_helper_exception_internal(cpu_env, tcg_excp);
+    tcg_temp_free_i32(tcg_excp);
+}
+
+static void gen_exception(int excp, uint32_t syndrome)
+{
+    TCGv_i32 tcg_excp = tcg_const_i32(excp);
+    TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
+
+    gen_helper_exception_with_syndrome(cpu_env, tcg_excp, tcg_syn);
+    tcg_temp_free_i32(tcg_syn);
+    tcg_temp_free_i32(tcg_excp);
 }
 
 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
@@ -899,6 +911,33 @@ static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
     tcg_gen_movi_i32(cpu_R[15], val);
 }
 
+static inline void
+gen_set_condexec (DisasContext *s)
+{
+    if (s->condexec_mask) {
+        uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
+        TCGv_i32 tmp = tcg_temp_new_i32();
+        tcg_gen_movi_i32(tmp, val);
+        store_cpu_field(tmp, condexec_bits);
+    }
+}
+
+static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
+{
+    gen_set_condexec(s);
+    gen_set_pc_im(s, s->pc - offset);
+    gen_exception_internal(excp);
+    s->is_jmp = DISAS_JUMP;
+}
+
+static void gen_exception_insn(DisasContext *s, int offset, int excp, int syn)
+{
+    gen_set_condexec(s);
+    gen_set_pc_im(s, s->pc - offset);
+    gen_exception(excp, syn);
+    s->is_jmp = DISAS_JUMP;
+}
+
 /* Force a TB lookup after an instruction that changes the CPU state.  */
 static inline void gen_lookup_tb(DisasContext *s)
 {
@@ -2913,14 +2952,25 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
     if (!arm_feature(env, ARM_FEATURE_VFP))
         return 1;
 
+    /* FIXME: this access check should not take precedence over UNDEF
+     * for invalid encodings; we will generate incorrect syndrome information
+     * for attempts to execute invalid vfp/neon encodings with FP disabled.
+     */
+    if (!s->cpacr_fpen) {
+        gen_exception_insn(s, 4, EXCP_UDEF,
+                           syn_fp_access_trap(1, 0xe, s->thumb));
+        return 0;
+    }
+
     if (!s->vfp_enabled) {
         /* VFP disabled.  Only allow fmxr/fmrx to/from some control regs.  */
         if ((insn & 0x0fe00fff) != 0x0ee00a10)
             return 1;
         rn = (insn >> 16) & 0xf;
-        if (rn != ARM_VFP_FPSID && rn != ARM_VFP_FPEXC
-            && rn != ARM_VFP_MVFR1 && rn != ARM_VFP_MVFR0)
+        if (rn != ARM_VFP_FPSID && rn != ARM_VFP_FPEXC && rn != ARM_VFP_MVFR2
+            && rn != ARM_VFP_MVFR1 && rn != ARM_VFP_MVFR0) {
             return 1;
+        }
     }
 
     if (extract32(insn, 28, 4) == 0xf) {
@@ -3066,6 +3116,11 @@ static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
                                 gen_helper_vfp_get_fpscr(tmp, cpu_env);
                             }
                             break;
+                        case ARM_VFP_MVFR2:
+                            if (!arm_feature(env, ARM_FEATURE_V8)) {
+                                return 1;
+                            }
+                            /* fall through */
                         case ARM_VFP_MVFR0:
                         case ARM_VFP_MVFR1:
                             if (IS_USER(s)
@@ -3912,25 +3967,6 @@ static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
     s->is_jmp = DISAS_UPDATE;
 }
 
-static inline void
-gen_set_condexec (DisasContext *s)
-{
-    if (s->condexec_mask) {
-        uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
-        TCGv_i32 tmp = tcg_temp_new_i32();
-        tcg_gen_movi_i32(tmp, val);
-        store_cpu_field(tmp, condexec_bits);
-    }
-}
-
-static void gen_exception_insn(DisasContext *s, int offset, int excp)
-{
-    gen_set_condexec(s);
-    gen_set_pc_im(s, s->pc - offset);
-    gen_exception(excp);
-    s->is_jmp = DISAS_JUMP;
-}
-
 static void gen_nop_hint(DisasContext *s, int val)
 {
     switch (val) {
@@ -4212,6 +4248,16 @@ static int disas_neon_ls_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
     TCGv_i32 tmp2;
     TCGv_i64 tmp64;
 
+    /* FIXME: this access check should not take precedence over UNDEF
+     * for invalid encodings; we will generate incorrect syndrome information
+     * for attempts to execute invalid vfp/neon encodings with FP disabled.
+     */
+    if (!s->cpacr_fpen) {
+        gen_exception_insn(s, 4, EXCP_UDEF,
+                           syn_fp_access_trap(1, 0xe, s->thumb));
+        return 0;
+    }
+
     if (!s->vfp_enabled)
       return 1;
     VFP_DREG_D(rd, insn);
@@ -4934,6 +4980,16 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
     TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
     TCGv_i64 tmp64;
 
+    /* FIXME: this access check should not take precedence over UNDEF
+     * for invalid encodings; we will generate incorrect syndrome information
+     * for attempts to execute invalid vfp/neon encodings with FP disabled.
+     */
+    if (!s->cpacr_fpen) {
+        gen_exception_insn(s, 4, EXCP_UDEF,
+                           syn_fp_access_trap(1, 0xe, s->thumb));
+        return 0;
+    }
+
     if (!s->vfp_enabled)
       return 1;
     q = (insn & (1 << 6)) != 0;
@@ -6861,10 +6917,53 @@ static int disas_coproc_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
              * runtime; this may result in an exception.
              */
             TCGv_ptr tmpptr;
+            TCGv_i32 tcg_syn;
+            uint32_t syndrome;
+
+            /* Note that since we are an implementation which takes an
+             * exception on a trapped conditional instruction only if the
+             * instruction passes its condition code check, we can take
+             * advantage of the clause in the ARM ARM that allows us to set
+             * the COND field in the instruction to 0xE in all cases.
+             * We could fish the actual condition out of the insn (ARM)
+             * or the condexec bits (Thumb) but it isn't necessary.
+             */
+            switch (cpnum) {
+            case 14:
+                if (is64) {
+                    syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
+                                                 isread, s->thumb);
+                } else {
+                    syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
+                                                rt, isread, s->thumb);
+                }
+                break;
+            case 15:
+                if (is64) {
+                    syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
+                                                 isread, s->thumb);
+                } else {
+                    syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
+                                                rt, isread, s->thumb);
+                }
+                break;
+            default:
+                /* ARMv8 defines that only coprocessors 14 and 15 exist,
+                 * so this can only happen if this is an ARMv7 or earlier CPU,
+                 * in which case the syndrome information won't actually be
+                 * guest visible.
+                 */
+                assert(!arm_feature(env, ARM_FEATURE_V8));
+                syndrome = syn_uncategorized();
+                break;
+            }
+
             gen_set_pc_im(s, s->pc);
             tmpptr = tcg_const_ptr(ri);
-            gen_helper_access_check_cp_reg(cpu_env, tmpptr);
+            tcg_syn = tcg_const_i32(syndrome);
+            gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn);
             tcg_temp_free_ptr(tmpptr);
+            tcg_temp_free_i32(tcg_syn);
         }
 
         /* Handle special cases first */
@@ -7116,7 +7215,7 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
     tcg_gen_extu_i32_i64(cpu_exclusive_test, addr);
     tcg_gen_movi_i32(cpu_exclusive_info,
                      size | (rd << 4) | (rt << 8) | (rt2 << 12));
-    gen_exception_insn(s, 4, EXCP_STREX);
+    gen_exception_internal_insn(s, 4, EXCP_STREX);
 }
 #else
 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
@@ -7626,6 +7725,8 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
             store_reg(s, rd, tmp);
             break;
         case 7:
+        {
+            int imm16 = extract32(insn, 0, 4) | (extract32(insn, 8, 12) << 4);
             /* SMC instruction (op1 == 3)
                and undefined instructions (op1 == 0 || op1 == 2)
                will trap */
@@ -7634,8 +7735,9 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
             }
             /* bkpt */
             ARCH(5);
-            gen_exception_insn(s, 4, EXCP_BKPT);
+            gen_exception_insn(s, 4, EXCP_BKPT, syn_aa32_bkpt(imm16, false));
             break;
+        }
         case 0x8: /* signed multiply */
         case 0xa:
         case 0xc:
@@ -8328,27 +8430,39 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
                         if (insn & (1 << 5))
                             gen_swap_half(tmp2);
                         gen_smul_dual(tmp, tmp2);
-                        if (insn & (1 << 6)) {
-                            /* This subtraction cannot overflow. */
-                            tcg_gen_sub_i32(tmp, tmp, tmp2);
-                        } else {
-                            /* This addition cannot overflow 32 bits;
-                             * however it may overflow considered as a signed
-                             * operation, in which case we must set the Q flag.
-                             */
-                            gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
-                        }
-                        tcg_temp_free_i32(tmp2);
                         if (insn & (1 << 22)) {
                             /* smlald, smlsld */
+                            TCGv_i64 tmp64_2;
+
                             tmp64 = tcg_temp_new_i64();
+                            tmp64_2 = tcg_temp_new_i64();
                             tcg_gen_ext_i32_i64(tmp64, tmp);
+                            tcg_gen_ext_i32_i64(tmp64_2, tmp2);
                             tcg_temp_free_i32(tmp);
+                            tcg_temp_free_i32(tmp2);
+                            if (insn & (1 << 6)) {
+                                tcg_gen_sub_i64(tmp64, tmp64, tmp64_2);
+                            } else {
+                                tcg_gen_add_i64(tmp64, tmp64, tmp64_2);
+                            }
+                            tcg_temp_free_i64(tmp64_2);
                             gen_addq(s, tmp64, rd, rn);
                             gen_storeq_reg(s, rd, rn, tmp64);
                             tcg_temp_free_i64(tmp64);
                         } else {
                             /* smuad, smusd, smlad, smlsd */
+                            if (insn & (1 << 6)) {
+                                /* This subtraction cannot overflow. */
+                                tcg_gen_sub_i32(tmp, tmp, tmp2);
+                            } else {
+                                /* This addition cannot overflow 32 bits;
+                                 * however it may overflow considered as a
+                                 * signed operation, in which case we must set
+                                 * the Q flag.
+                                 */
+                                gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
+                            }
+                            tcg_temp_free_i32(tmp2);
                             if (rd != 15)
                               {
                                 tmp2 = load_reg(s, rd);
@@ -8642,11 +8756,12 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
         case 0xf:
             /* swi */
             gen_set_pc_im(s, s->pc);
+            s->svc_imm = extract32(insn, 0, 24);
             s->is_jmp = DISAS_SWI;
             break;
         default:
         illegal_op:
-            gen_exception_insn(s, 4, EXCP_UDEF);
+            gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized());
             break;
         }
     }
@@ -10457,9 +10572,12 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
             break;
 
         case 0xe: /* bkpt */
+        {
+            int imm8 = extract32(insn, 0, 8);
             ARCH(5);
-            gen_exception_insn(s, 2, EXCP_BKPT);
+            gen_exception_insn(s, 2, EXCP_BKPT, syn_aa32_bkpt(imm8, true));
             break;
+        }
 
         case 0xa: /* rev */
             ARCH(6);
@@ -10576,6 +10694,7 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
         if (cond == 0xf) {
             /* swi */
             gen_set_pc_im(s, s->pc);
+            s->svc_imm = extract32(insn, 0, 8);
             s->is_jmp = DISAS_SWI;
             break;
         }
@@ -10611,11 +10730,11 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
     }
     return;
 undef32:
-    gen_exception_insn(s, 4, EXCP_UDEF);
+    gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized());
     return;
 illegal_op:
 undef:
-    gen_exception_insn(s, 2, EXCP_UDEF);
+    gen_exception_insn(s, 2, EXCP_UDEF, syn_uncategorized());
 }
 
 /* generate intermediate code in gen_opc_buf and gen_opparam_buf for
@@ -10665,6 +10784,7 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
 #if !defined(CONFIG_USER_ONLY)
     dc->user = (ARM_TBFLAG_PRIV(tb->flags) == 0);
 #endif
+    dc->cpacr_fpen = ARM_TBFLAG_CPACR_FPEN(tb->flags);
     dc->vfp_enabled = ARM_TBFLAG_VFPEN(tb->flags);
     dc->vec_len = ARM_TBFLAG_VECLEN(tb->flags);
     dc->vec_stride = ARM_TBFLAG_VECSTRIDE(tb->flags);
@@ -10736,7 +10856,7 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
         if (dc->pc >= 0xffff0000) {
             /* We always get here via a jump, so know we are not in a
                conditional execution block.  */
-            gen_exception(EXCP_KERNEL_TRAP);
+            gen_exception_internal(EXCP_KERNEL_TRAP);
             dc->is_jmp = DISAS_UPDATE;
             break;
         }
@@ -10744,7 +10864,7 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
         if (dc->pc >= 0xfffffff0 && IS_M(env)) {
             /* We always get here via a jump, so know we are not in a
                conditional execution block.  */
-            gen_exception(EXCP_EXCEPTION_EXIT);
+            gen_exception_internal(EXCP_EXCEPTION_EXIT);
             dc->is_jmp = DISAS_UPDATE;
             break;
         }
@@ -10753,7 +10873,7 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
         if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
             QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
                 if (bp->pc == dc->pc) {
-                    gen_exception_insn(dc, 0, EXCP_DEBUG);
+                    gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
                     /* Advance PC so that clearing the breakpoint will
                        invalidate this TB.  */
                     dc->pc += 2;
@@ -10833,9 +10953,9 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
         if (dc->condjmp) {
             gen_set_condexec(dc);
             if (dc->is_jmp == DISAS_SWI) {
-                gen_exception(EXCP_SWI);
+                gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
             } else {
-                gen_exception(EXCP_DEBUG);
+                gen_exception_internal(EXCP_DEBUG);
             }
             gen_set_label(dc->condlabel);
         }
@@ -10845,11 +10965,11 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
         }
         gen_set_condexec(dc);
         if (dc->is_jmp == DISAS_SWI && !dc->condjmp) {
-            gen_exception(EXCP_SWI);
+            gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
         } else {
             /* FIXME: Single stepping a WFI insn will not halt
                the CPU.  */
-            gen_exception(EXCP_DEBUG);
+            gen_exception_internal(EXCP_DEBUG);
         }
     } else {
         /* While branches must always occur at the end of an IT block,
@@ -10881,7 +11001,7 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
             gen_helper_wfe(cpu_env);
             break;
         case DISAS_SWI:
-            gen_exception(EXCP_SWI);
+            gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
             break;
         }
         if (dc->condjmp) {
@@ -10939,6 +11059,11 @@ void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
     int i;
     uint32_t psr;
 
+    if (is_a64(env)) {
+        aarch64_cpu_dump_state(cs, f, cpu_fprintf, flags);
+        return;
+    }
+
     for(i=0;i<16;i++) {
         cpu_fprintf(f, "R%02d=%08x", i, env->regs[i]);
         if ((i % 4) == 3)
diff --git a/target-arm/translate.h b/target-arm/translate.h
index 3525ffcecb..34328f4660 100644
--- a/target-arm/translate.h
+++ b/target-arm/translate.h
@@ -20,13 +20,26 @@ typedef struct DisasContext {
 #if !defined(CONFIG_USER_ONLY)
     int user;
 #endif
-    int vfp_enabled;
+    bool cpacr_fpen; /* FP enabled via CPACR.FPEN */
+    bool vfp_enabled; /* FP enabled via FPSCR.EN */
     int vec_len;
     int vec_stride;
+    /* Immediate value in AArch32 SVC insn; must be set if is_jmp == DISAS_SWI
+     * so that top level loop can generate correct syndrome information.
+     */
+    uint32_t svc_imm;
     int aarch64;
     int current_pl;
     GHashTable *cp_regs;
     uint64_t features; /* CPU features bits */
+    /* Because unallocated encodings generate different exception syndrome
+     * information from traps due to FP being disabled, we can't do a single
+     * "is fp access disabled" check at a high level in the decode tree.
+     * To help in catching bugs where the access check was forgotten in some
+     * code path, we set this flag when the access check is done, and assert
+     * that it is set at the point where we actually touch the FP regs.
+     */
+    bool fp_access_checked;
 #define TMP_A64_MAX 16
     int tmp_a64_count;
     TCGv_i64 tmp_a64[TMP_A64_MAX];
@@ -59,6 +72,8 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
                                         TranslationBlock *tb,
                                         bool search_pc);
 void gen_a64_set_pc_im(uint64_t val);
+void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
+                            fprintf_function cpu_fprintf, int flags);
 #else
 static inline void a64_translate_init(void)
 {
@@ -73,6 +88,12 @@ static inline void gen_intermediate_code_internal_a64(ARMCPU *cpu,
 static inline void gen_a64_set_pc_im(uint64_t val)
 {
 }
+
+static inline void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
+                                          fprintf_function cpu_fprintf,
+                                          int flags)
+{
+}
 #endif
 
 void arm_gen_test_cc(int cc, int label);