summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--hw/arm/virt.c25
-rw-r--r--hw/char/imx_serial.c50
-rw-r--r--hw/gpio/imx_gpio.c27
-rw-r--r--hw/i2c/imx_i2c.c43
-rw-r--r--hw/intc/arm_gic_kvm.c22
-rw-r--r--hw/intc/imx_avic.c44
-rw-r--r--hw/misc/imx_ccm.c34
-rw-r--r--hw/net/imx_fec.c64
-rw-r--r--hw/timer/imx_epit.c48
-rw-r--r--hw/timer/imx_gpt.c56
-rw-r--r--include/hw/intc/arm_gic_common.h1
-rw-r--r--target-arm/cpu.h83
-rw-r--r--target-arm/helper.c388
-rw-r--r--target-arm/internals.h40
-rw-r--r--target-arm/op_helper.c18
-rw-r--r--target-arm/translate.c45
16 files changed, 679 insertions, 309 deletions
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 5d38c47444..77d9267599 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -923,7 +923,7 @@ static void machvirt_init(MachineState *machine)
     qemu_irq pic[NUM_IRQS];
     MemoryRegion *sysmem = get_system_memory();
     int gic_version = vms->gic_version;
-    int n;
+    int n, max_cpus;
     MemoryRegion *ram = g_new(MemoryRegion, 1);
     const char *cpu_model = machine->cpu_model;
     VirtBoardInfo *vbi;
@@ -957,6 +957,22 @@ static void machvirt_init(MachineState *machine)
         exit(1);
     }
 
+    /* The maximum number of CPUs depends on the GIC version, or on how
+     * many redistributors we can fit into the memory map.
+     */
+    if (gic_version == 3) {
+        max_cpus = vbi->memmap[VIRT_GIC_REDIST].size / 0x20000;
+    } else {
+        max_cpus = GIC_NCPU;
+    }
+
+    if (smp_cpus > max_cpus) {
+        error_report("Number of SMP CPUs requested (%d) exceeds max CPUs "
+                     "supported by machine 'mach-virt' (%d)",
+                     smp_cpus, max_cpus);
+        exit(1);
+    }
+
     vbi->smp_cpus = smp_cpus;
 
     if (machine->ram_size > vbi->memmap[VIRT_MEM].size) {
@@ -1155,10 +1171,11 @@ static void virt_class_init(ObjectClass *oc, void *data)
 
     mc->desc = "ARM Virtual Machine",
     mc->init = machvirt_init;
-    /* Our maximum number of CPUs depends on how many redistributors
-     * we can fit into memory map
+    /* Start max_cpus at the maximum QEMU supports. We'll further restrict
+     * it later in machvirt_init, where we have more information about the
+     * configuration of the particular instance.
      */
-    mc->max_cpus = a15memmap[VIRT_GIC_REDIST].size / 0x20000;
+    mc->max_cpus = MAX_CPUMASK_BITS;
     mc->has_dynamic_sysbus = true;
     mc->block_default_type = IF_VIRTIO;
     mc->no_cdrom = 1;
diff --git a/hw/char/imx_serial.c b/hw/char/imx_serial.c
index f0c4c722d6..f30f9c24be 100644
--- a/hw/char/imx_serial.c
+++ b/hw/char/imx_serial.c
@@ -22,25 +22,17 @@
 #include "sysemu/sysemu.h"
 #include "sysemu/char.h"
 
-//#define DEBUG_SERIAL 1
-#ifdef DEBUG_SERIAL
-#define DPRINTF(fmt, args...) \
-do { printf("%s: " fmt , TYPE_IMX_SERIAL, ##args); } while (0)
-#else
-#define DPRINTF(fmt, args...) do {} while (0)
+#ifndef DEBUG_IMX_UART
+#define DEBUG_IMX_UART 0
 #endif
 
-/*
- * Define to 1 for messages about attempts to
- * access unimplemented registers or similar.
- */
-//#define DEBUG_IMPLEMENTATION 1
-#ifdef DEBUG_IMPLEMENTATION
-#  define IPRINTF(fmt, args...) \
-    do  { fprintf(stderr, "%s: " fmt, TYPE_IMX_SERIAL, ##args); } while (0)
-#else
-#  define IPRINTF(fmt, args...) do {} while (0)
-#endif
+#define DPRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_UART) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_SERIAL, \
+                                             __func__, ##args); \
+        } \
+    } while (0)
 
 static const VMStateDescription vmstate_imx_serial = {
     .name = TYPE_IMX_SERIAL,
@@ -115,7 +107,8 @@ static uint64_t imx_serial_read(void *opaque, hwaddr offset,
     IMXSerialState *s = (IMXSerialState *)opaque;
     uint32_t c;
 
-    DPRINTF("read(offset=%x)\n", offset >> 2);
+    DPRINTF("read(offset=0x%" HWADDR_PRIx ")\n", offset);
+
     switch (offset >> 2) {
     case 0x0: /* URXD */
         c = s->readbuff;
@@ -167,7 +160,8 @@ static uint64_t imx_serial_read(void *opaque, hwaddr offset,
         return 0x0; /* TODO */
 
     default:
-        IPRINTF("%s: bad offset: 0x%x\n", __func__, (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_SERIAL, __func__, offset);
         return 0;
     }
 }
@@ -178,9 +172,8 @@ static void imx_serial_write(void *opaque, hwaddr offset,
     IMXSerialState *s = (IMXSerialState *)opaque;
     unsigned char ch;
 
-    DPRINTF("write(offset=%x, value = %x) to %s\n",
-            offset >> 2,
-            (unsigned int)value, s->chr ? s->chr->label : "NODEV");
+    DPRINTF("write(offset=0x%" HWADDR_PRIx ", value = 0x%x) to %s\n",
+            offset, (unsigned int)value, s->chr ? s->chr->label : "NODEV");
 
     switch (offset >> 2) {
     case 0x10: /* UTXD */
@@ -198,7 +191,9 @@ static void imx_serial_write(void *opaque, hwaddr offset,
 
     case 0x20: /* UCR1 */
         s->ucr1 = value & 0xffff;
+
         DPRINTF("write(ucr1=%x)\n", (unsigned int)value);
+
         imx_update(s);
         break;
 
@@ -266,12 +261,14 @@ static void imx_serial_write(void *opaque, hwaddr offset,
 
     case 0x2d: /* UTS1 */
     case 0x23: /* UCR4 */
-        IPRINTF("Unimplemented Register %x written to\n", offset >> 2);
+        qemu_log_mask(LOG_UNIMP, "[%s]%s: Unimplemented reg 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_SERIAL, __func__, offset);
         /* TODO */
         break;
 
     default:
-        IPRINTF("%s: Bad offset 0x%x\n", __func__, (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_SERIAL, __func__, offset);
     }
 }
 
@@ -284,7 +281,9 @@ static int imx_can_receive(void *opaque)
 static void imx_put_data(void *opaque, uint32_t value)
 {
     IMXSerialState *s = (IMXSerialState *)opaque;
+
     DPRINTF("received char\n");
+
     s->usr1 |= USR1_RRDY;
     s->usr2 |= USR2_RDR;
     s->uts1 &= ~UTS1_RXEMPTY;
@@ -319,8 +318,7 @@ static void imx_serial_realize(DeviceState *dev, Error **errp)
         qemu_chr_add_handlers(s->chr, imx_can_receive, imx_receive,
                               imx_event, s);
     } else {
-        DPRINTF("No char dev for uart at 0x%lx\n",
-                (unsigned long)s->iomem.ram_addr);
+        DPRINTF("No char dev for uart\n");
     }
 }
 
diff --git a/hw/gpio/imx_gpio.c b/hw/gpio/imx_gpio.c
index d56ffcd8d7..3170585a27 100644
--- a/hw/gpio/imx_gpio.c
+++ b/hw/gpio/imx_gpio.c
@@ -29,11 +29,12 @@ typedef enum IMXGPIOLevel {
 } IMXGPIOLevel;
 
 #define DPRINTF(fmt, args...) \
-          do { \
-              if (DEBUG_IMX_GPIO) { \
-                  fprintf(stderr, "%s: " fmt , __func__, ##args); \
-              } \
-          } while (0)
+    do { \
+        if (DEBUG_IMX_GPIO) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_GPIO, \
+                                             __func__, ##args); \
+        } \
+    } while (0)
 
 static const char *imx_gpio_reg_name(uint32_t reg)
 {
@@ -176,19 +177,19 @@ static uint64_t imx_gpio_read(void *opaque, hwaddr offset, unsigned size)
         if (s->has_edge_sel) {
             reg_value = s->edge_sel;
         } else {
-            qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: EDGE_SEL register not "
+            qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: EDGE_SEL register not "
                           "present on this version of GPIO device\n",
                           TYPE_IMX_GPIO, __func__);
         }
         break;
 
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad register at offset %d\n",
-                      TYPE_IMX_GPIO, __func__, (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_GPIO, __func__, offset);
         break;
     }
 
-    DPRINTF("(%s) = 0x%"PRIx32"\n", imx_gpio_reg_name(offset), reg_value);
+    DPRINTF("(%s) = 0x%" PRIx32 "\n", imx_gpio_reg_name(offset), reg_value);
 
     return reg_value;
 }
@@ -198,7 +199,7 @@ static void imx_gpio_write(void *opaque, hwaddr offset, uint64_t value,
 {
     IMXGPIOState *s = IMX_GPIO(opaque);
 
-    DPRINTF("(%s, value = 0x%"PRIx32")\n", imx_gpio_reg_name(offset),
+    DPRINTF("(%s, value = 0x%" PRIx32 ")\n", imx_gpio_reg_name(offset),
             (uint32_t)value);
 
     switch (offset) {
@@ -238,15 +239,15 @@ static void imx_gpio_write(void *opaque, hwaddr offset, uint64_t value,
             s->edge_sel = value;
             imx_gpio_set_all_int_lines(s);
         } else {
-            qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: EDGE_SEL register not "
+            qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: EDGE_SEL register not "
                           "present on this version of GPIO device\n",
                           TYPE_IMX_GPIO, __func__);
         }
         break;
 
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad register at offset %d\n",
-                      TYPE_IMX_GPIO, __func__, (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_GPIO, __func__, offset);
         break;
     }
 
diff --git a/hw/i2c/imx_i2c.c b/hw/i2c/imx_i2c.c
index 8474872e07..cb62c7a2c9 100644
--- a/hw/i2c/imx_i2c.c
+++ b/hw/i2c/imx_i2c.c
@@ -21,13 +21,17 @@
 #include "hw/i2c/imx_i2c.h"
 #include "hw/i2c/i2c.h"
 
-#ifndef IMX_I2C_DEBUG
-#define IMX_I2C_DEBUG                 0
+#ifndef DEBUG_IMX_I2C
+#define DEBUG_IMX_I2C 0
 #endif
 
-#if IMX_I2C_DEBUG
-#define DPRINT(fmt, args...)              \
-    do { fprintf(stderr, "%s: "fmt, __func__, ## args); } while (0)
+#define DPRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_I2C) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_I2C, \
+                                             __func__, ##args); \
+        } \
+    } while (0)
 
 static const char *imx_i2c_get_regname(unsigned offset)
 {
@@ -46,9 +50,6 @@ static const char *imx_i2c_get_regname(unsigned offset)
         return "[?]";
     }
 }
-#else
-#define DPRINT(fmt, args...)              do { } while (0)
-#endif
 
 static inline bool imx_i2c_is_enabled(IMXI2CState *s)
 {
@@ -121,11 +122,11 @@ static uint64_t imx_i2c_read(void *opaque, hwaddr offset,
 
             if (s->address == ADDR_RESET) {
                 /* something is wrong as the address is not set */
-                qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Trying to read "
+                qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Trying to read "
                               "without specifying the slave address\n",
                               TYPE_IMX_I2C, __func__);
             } else if (s->i2cr & I2CR_MTX) {
-                qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Trying to read "
+                qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Trying to read "
                               "but MTX is set\n", TYPE_IMX_I2C, __func__);
             } else {
                 /* get the next byte */
@@ -134,7 +135,7 @@ static uint64_t imx_i2c_read(void *opaque, hwaddr offset,
                 if (ret >= 0) {
                     imx_i2c_raise_interrupt(s);
                 } else {
-                    qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: read failed "
+                    qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: read failed "
                                   "for device 0x%02x\n", TYPE_IMX_I2C,
                                   __func__, s->address);
                     ret = 0xff;
@@ -143,19 +144,19 @@ static uint64_t imx_i2c_read(void *opaque, hwaddr offset,
 
             s->i2dr_read = ret;
         } else {
-            qemu_log_mask(LOG_UNIMP, "%s[%s]: slave mode not implemented\n",
+            qemu_log_mask(LOG_UNIMP, "[%s]%s: slave mode not implemented\n",
                           TYPE_IMX_I2C, __func__);
         }
         break;
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad address at offset %d\n",
-                      TYPE_IMX_I2C, __func__, s->address);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad address at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_I2C, __func__, offset);
         value = 0;
         break;
     }
 
-    DPRINT("read %s [0x%02x] -> 0x%02x\n", imx_i2c_get_regname(offset),
-           (unsigned int)offset, value);
+    DPRINTF("read %s [0x%" HWADDR_PRIx "] -> 0x%02x\n",
+            imx_i2c_get_regname(offset), offset, value);
 
     return (uint64_t)value;
 }
@@ -165,8 +166,8 @@ static void imx_i2c_write(void *opaque, hwaddr offset,
 {
     IMXI2CState *s = IMX_I2C(opaque);
 
-    DPRINT("write %s [0x%02x] <- 0x%02x\n", imx_i2c_get_regname(offset),
-           (unsigned int)offset, (int)value);
+    DPRINTF("write %s [0x%" HWADDR_PRIx "] <- 0x%02x\n",
+            imx_i2c_get_regname(offset), offset, (int)value);
 
     value &= 0xff;
 
@@ -264,13 +265,13 @@ static void imx_i2c_write(void *opaque, hwaddr offset,
                 }
             }
         } else {
-            qemu_log_mask(LOG_UNIMP, "%s[%s]: slave mode not implemented\n",
+            qemu_log_mask(LOG_UNIMP, "[%s]%s: slave mode not implemented\n",
                           TYPE_IMX_I2C, __func__);
         }
         break;
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad address at offset %d\n",
-                      TYPE_IMX_I2C, __func__, s->address);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad address at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_I2C, __func__, offset);
         break;
     }
 }
diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c
index e8b2386908..0ceebbf87e 100644
--- a/hw/intc/arm_gic_kvm.c
+++ b/hw/intc/arm_gic_kvm.c
@@ -20,6 +20,7 @@
  */
 
 #include "hw/sysbus.h"
+#include "migration/migration.h"
 #include "sysemu/kvm.h"
 #include "kvm_arm.h"
 #include "gic_internal.h"
@@ -307,11 +308,6 @@ static void kvm_arm_gic_put(GICState *s)
     int num_cpu;
     int num_irq;
 
-    if (!kvm_arm_gic_can_save_restore(s)) {
-            DPRINTF("Cannot put kernel gic state, no kernel interface");
-            return;
-    }
-
     /* Note: We do the restore in a slightly different order than the save
      * (where the order doesn't matter and is simply ordered according to the
      * register offset values */
@@ -411,11 +407,6 @@ static void kvm_arm_gic_get(GICState *s)
     int i;
     int cpu;
 
-    if (!kvm_arm_gic_can_save_restore(s)) {
-            DPRINTF("Cannot get kernel gic state, no kernel interface");
-            return;
-    }
-
     /*****************************************************************
      * Distributor State
      */
@@ -503,7 +494,10 @@ static void kvm_arm_gic_reset(DeviceState *dev)
     KVMARMGICClass *kgc = KVM_ARM_GIC_GET_CLASS(s);
 
     kgc->parent_reset(dev);
-    kvm_arm_gic_put(s);
+
+    if (kvm_arm_gic_can_save_restore(s)) {
+        kvm_arm_gic_put(s);
+    }
 }
 
 static void kvm_arm_gic_realize(DeviceState *dev, Error **errp)
@@ -573,6 +567,12 @@ static void kvm_arm_gic_realize(DeviceState *dev, Error **errp)
                             KVM_DEV_ARM_VGIC_GRP_ADDR,
                             KVM_VGIC_V2_ADDR_TYPE_CPU,
                             s->dev_fd);
+
+    if (!kvm_arm_gic_can_save_restore(s)) {
+        error_setg(&s->migration_blocker, "This operating system kernel does "
+                                          "not support vGICv2 migration");
+        migrate_add_blocker(s->migration_blocker);
+    }
 }
 
 static void kvm_arm_gic_class_init(ObjectClass *klass, void *data)
diff --git a/hw/intc/imx_avic.c b/hw/intc/imx_avic.c
index 96c376b6af..e0535ffc57 100644
--- a/hw/intc/imx_avic.c
+++ b/hw/intc/imx_avic.c
@@ -17,27 +17,17 @@
 
 #include "hw/intc/imx_avic.h"
 
-#define DEBUG_INT 1
-#undef DEBUG_INT /* comment out for debugging */
-
-#ifdef DEBUG_INT
-#define DPRINTF(fmt, args...) \
-do { printf("%s: " fmt , TYPE_IMX_AVIC, ##args); } while (0)
-#else
-#define DPRINTF(fmt, args...) do {} while (0)
+#ifndef DEBUG_IMX_AVIC
+#define DEBUG_IMX_AVIC 0
 #endif
 
-/*
- * Define to 1 for messages about attempts to
- * access unimplemented registers or similar.
- */
-#define DEBUG_IMPLEMENTATION 1
-#if DEBUG_IMPLEMENTATION
-#  define IPRINTF(fmt, args...) \
-    do  { fprintf(stderr, "%s: " fmt, TYPE_IMX_AVIC, ##args); } while (0)
-#else
-#  define IPRINTF(fmt, args...) do {} while (0)
-#endif
+#define DPRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_AVIC) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_AVIC, \
+                                             __func__, ##args); \
+        } \
+    } while (0)
 
 static const VMStateDescription vmstate_imx_avic = {
     .name = TYPE_IMX_AVIC,
@@ -115,8 +105,8 @@ static uint64_t imx_avic_read(void *opaque,
 {
     IMXAVICState *s = (IMXAVICState *)opaque;
 
+    DPRINTF("read(offset = 0x%" HWADDR_PRIx ")\n", offset);
 
-    DPRINTF("read(offset = 0x%x)\n", offset >> 2);
     switch (offset >> 2) {
     case 0: /* INTCNTL */
         return s->intcntl;
@@ -213,7 +203,8 @@ static uint64_t imx_avic_read(void *opaque,
         return 0x4;
 
     default:
-        IPRINTF("%s: Bad offset 0x%x\n", __func__, (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_AVIC, __func__, offset);
         return 0;
     }
 }
@@ -225,13 +216,13 @@ static void imx_avic_write(void *opaque, hwaddr offset,
 
     /* Vector Registers not yet supported */
     if (offset >= 0x100 && offset <= 0x2fc) {
-        IPRINTF("%s to vector register %d ignored\n", __func__,
-                (unsigned int)((offset - 0x100) >> 2));
+        qemu_log_mask(LOG_UNIMP, "[%s]%s: vector %d ignored\n",
+                      TYPE_IMX_AVIC, __func__, (int)((offset - 0x100) >> 2));
         return;
     }
 
-    DPRINTF("%s(0x%x) = %x\n", __func__,
-            (unsigned int)offset>>2, (unsigned int)val);
+    DPRINTF("(0x%" HWADDR_PRIx ") = 0x%x\n", offset, (unsigned int)val);
+
     switch (offset >> 2) {
     case 0: /* Interrupt Control Register, INTCNTL */
         s->intcntl = val & (ABFEN | NIDIS | FIDIS | NIAD | FIAD | NM);
@@ -305,7 +296,8 @@ static void imx_avic_write(void *opaque, hwaddr offset,
         return;
 
     default:
-        IPRINTF("%s: Bad offset %x\n", __func__, (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_AVIC, __func__, offset);
     }
     imx_avic_update(s);
 }
diff --git a/hw/misc/imx_ccm.c b/hw/misc/imx_ccm.c
index 2e19dbb1bb..4cc2bbc0ed 100644
--- a/hw/misc/imx_ccm.c
+++ b/hw/misc/imx_ccm.c
@@ -16,14 +16,18 @@
 #define CKIH_FREQ 26000000 /* 26MHz crystal input */
 #define CKIL_FREQ    32768 /* nominal 32khz clock */
 
-//#define DEBUG_CCM 1
-#ifdef DEBUG_CCM
-#define DPRINTF(fmt, args...) \
-do { printf("%s: " fmt , TYPE_IMX_CCM, ##args); } while (0)
-#else
-#define DPRINTF(fmt, args...) do {} while (0)
+#ifndef DEBUG_IMX_CCM
+#define DEBUG_IMX_CCM 0
 #endif
 
+#define DPRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_CCM) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_CCM, \
+                                             __func__, ##args); \
+        } \
+    } while (0)
+
 static int imx_ccm_post_load(void *opaque, int version_id);
 
 static const VMStateDescription vmstate_imx_ccm = {
@@ -109,7 +113,7 @@ static void update_clocks(IMXCCMState *s)
     s->hsp_clk_freq = s->mcu_clk_freq / (1 + EXTRACT(s->pdr0, HSP));
     s->ipg_clk_freq = s->hsp_clk_freq / (1 + EXTRACT(s->pdr0, IPG));
 
-    DPRINTF("%s: mcu %uMHz, HSP %uMHz, IPG %uHz\n", __func__,
+    DPRINTF("mcu %uMHz, HSP %uMHz, IPG %uHz\n",
             s->mcu_clk_freq / 1000000,
             s->hsp_clk_freq / 1000000,
             s->ipg_clk_freq);
@@ -135,7 +139,8 @@ static uint64_t imx_ccm_read(void *opaque, hwaddr offset,
 {
     IMXCCMState *s = (IMXCCMState *)opaque;
 
-    DPRINTF("%s(offset=%x)", __func__, offset >> 2);
+    DPRINTF("(offset=0x%" HWADDR_PRIx ")\n", offset);
+
     switch (offset >> 2) {
     case 0: /* CCMR */
         DPRINTF(" ccmr = 0x%x\n", s->ccmr);
@@ -166,9 +171,11 @@ static uint64_t imx_ccm_read(void *opaque, hwaddr offset,
     case 23:
         DPRINTF(" pcmr0 = 0x%x\n", s->pmcr0);
         return s->pmcr0;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_CCM, __func__, offset);
+        return 0;
     }
-    DPRINTF(" return 0\n");
-    return 0;
 }
 
 static void imx_ccm_write(void *opaque, hwaddr offset,
@@ -176,8 +183,9 @@ static void imx_ccm_write(void *opaque, hwaddr offset,
 {
     IMXCCMState *s = (IMXCCMState *)opaque;
 
-    DPRINTF("%s(offset=%x, value = %x)\n", __func__,
-            offset >> 2, (unsigned int)value);
+    DPRINTF("(offset=0x%" HWADDR_PRIx ", value = 0x%x)\n",
+            offset, (unsigned int)value);
+
     switch (offset >> 2) {
     case 0:
         s->ccmr = CCMR_FPMF | (value & 0x3b6fdfff);
@@ -205,6 +213,8 @@ static void imx_ccm_write(void *opaque, hwaddr offset,
         return;
 
     default:
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_CCM, __func__, offset);
         return;
     }
     update_clocks(s);
diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c
index 725f3fa335..c50bf7ff34 100644
--- a/hw/net/imx_fec.c
+++ b/hw/net/imx_fec.c
@@ -27,31 +27,29 @@
 /* For crc32 */
 #include <zlib.h>
 
-#ifndef IMX_FEC_DEBUG
-#define IMX_FEC_DEBUG          0
+#ifndef DEBUG_IMX_FEC
+#define DEBUG_IMX_FEC 0
 #endif
 
-#ifndef IMX_PHY_DEBUG
-#define IMX_PHY_DEBUG          0
-#endif
-
-#if IMX_FEC_DEBUG
-#define FEC_PRINTF(fmt, ...) \
-    do { fprintf(stderr, "%s[%s]: " fmt , TYPE_IMX_FEC, __func__, \
-                 ## __VA_ARGS__); \
+#define FEC_PRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_FEC) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_FEC, \
+                                             __func__, ##args); \
+        } \
     } while (0)
-#else
-#define FEC_PRINTF(fmt, ...) do {} while (0)
+
+#ifndef DEBUG_IMX_PHY
+#define DEBUG_IMX_PHY 0
 #endif
 
-#if IMX_PHY_DEBUG
-#define PHY_PRINTF(fmt, ...) \
-    do { fprintf(stderr, "%s.phy[%s]: " fmt , TYPE_IMX_FEC, __func__, \
-                 ## __VA_ARGS__); \
+#define PHY_PRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_PHY) { \
+            fprintf(stderr, "[%s.phy]%s: " fmt , TYPE_IMX_FEC, \
+                                                 __func__, ##args); \
+        } \
     } while (0)
-#else
-#define PHY_PRINTF(fmt, ...) do {} while (0)
-#endif
 
 static const VMStateDescription vmstate_imx_fec = {
     .name = TYPE_IMX_FEC,
@@ -182,12 +180,12 @@ static uint32_t do_phy_read(IMXFECState *s, int reg)
     case 18:
     case 27:
     case 31:
-        qemu_log_mask(LOG_UNIMP, "%s.phy[%s]: reg %d not implemented\n",
+        qemu_log_mask(LOG_UNIMP, "[%s.phy]%s: reg %d not implemented\n",
                       TYPE_IMX_FEC, __func__, reg);
         val = 0;
         break;
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad address at offset %d\n",
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s.phy]%s: Bad address at offset %d\n",
                       TYPE_IMX_FEC, __func__, reg);
         val = 0;
         break;
@@ -230,11 +228,11 @@ static void do_phy_write(IMXFECState *s, int reg, uint32_t val)
     case 18:
     case 27:
     case 31:
-        qemu_log_mask(LOG_UNIMP, "%s.phy[%s]: reg %d not implemented\n",
+        qemu_log_mask(LOG_UNIMP, "[%s.phy)%s: reg %d not implemented\n",
                       TYPE_IMX_FEC, __func__, reg);
         break;
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s.phy[%s]: Bad address at offset %d\n",
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s.phy]%s: Bad address at offset %d\n",
                       TYPE_IMX_FEC, __func__, reg);
         break;
     }
@@ -357,7 +355,7 @@ static uint64_t imx_fec_read(void *opaque, hwaddr addr, unsigned size)
 {
     IMXFECState *s = IMX_FEC(opaque);
 
-    FEC_PRINTF("reading from @ 0x%03x\n", (int)addr);
+    FEC_PRINTF("reading from @ 0x%" HWADDR_PRIx "\n", addr);
 
     switch (addr & 0x3ff) {
     case 0x004:
@@ -417,8 +415,8 @@ static uint64_t imx_fec_read(void *opaque, hwaddr addr, unsigned size)
     case 0x308:
         return s->miigsk_enr;
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad address at offset %d\n",
-                      TYPE_IMX_FEC, __func__, (int)addr);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad address at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_FEC, __func__, addr);
         return 0;
     }
 }
@@ -428,7 +426,7 @@ static void imx_fec_write(void *opaque, hwaddr addr,
 {
     IMXFECState *s = IMX_FEC(opaque);
 
-    FEC_PRINTF("writing 0x%08x @ 0x%03x\n", (int)value, (int)addr);
+    FEC_PRINTF("writing 0x%08x @ 0x%" HWADDR_PRIx "\n", (int)value, addr);
 
     switch (addr & 0x3ff) {
     case 0x004: /* EIR */
@@ -530,8 +528,8 @@ static void imx_fec_write(void *opaque, hwaddr addr,
         s->miigsk_enr = (value & 0x2) ? 0x6 : 0;
         break;
     default:
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad address at offset %d\n",
-                      TYPE_IMX_FEC, __func__, (int)addr);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad address at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_FEC, __func__, addr);
         break;
     }
 
@@ -561,7 +559,7 @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf,
     FEC_PRINTF("len %d\n", (int)size);
 
     if (!s->rx_enabled) {
-        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Unexpected packet\n",
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Unexpected packet\n",
                       TYPE_IMX_FEC, __func__);
         return 0;
     }
@@ -592,14 +590,16 @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf,
              * save the remainder for when more RX buffers are
              * available, or flag an error.
              */
-            qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Lost end of frame\n",
+            qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Lost end of frame\n",
                           TYPE_IMX_FEC, __func__);
             break;
         }
         buf_len = (size <= s->emrbr) ? size : s->emrbr;
         bd.length = buf_len;
         size -= buf_len;
-        FEC_PRINTF("rx_bd %x length %d\n", addr, bd.length);
+
+        FEC_PRINTF("rx_bd 0x%x length %d\n", addr, bd.length);
+
         /* The last 4 bytes are the CRC.  */
         if (size < 4) {
             buf_len += size - 4;
diff --git a/hw/timer/imx_epit.c b/hw/timer/imx_epit.c
index 9649851526..967be4ad27 100644
--- a/hw/timer/imx_epit.c
+++ b/hw/timer/imx_epit.c
@@ -16,8 +16,17 @@
 #include "hw/misc/imx_ccm.h"
 #include "qemu/main-loop.h"
 
-#define DEBUG_TIMER 0
-#if DEBUG_TIMER
+#ifndef DEBUG_IMX_EPIT
+#define DEBUG_IMX_EPIT 0
+#endif
+
+#define DPRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_EPIT) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_EPIT, \
+                                             __func__, ##args); \
+        } \
+    } while (0)
 
 static char const *imx_epit_reg_name(uint32_t reg)
 {
@@ -37,24 +46,6 @@ static char const *imx_epit_reg_name(uint32_t reg)
     }
 }
 
-#  define DPRINTF(fmt, args...) \
-    do { fprintf(stderr, "%s: " fmt , __func__, ##args); } while (0)
-#else
-#  define DPRINTF(fmt, args...) do {} while (0)
-#endif
-
-/*
- * Define to 1 for messages about attempts to
- * access unimplemented registers or similar.
- */
-#define DEBUG_IMPLEMENTATION 1
-#if DEBUG_IMPLEMENTATION
-#  define IPRINTF(fmt, args...) \
-          do { fprintf(stderr, "%s: " fmt, __func__, ##args); } while (0)
-#else
-#  define IPRINTF(fmt, args...) do {} while (0)
-#endif
-
 /*
  * Exact clock frequencies vary from board to board.
  * These are typical.
@@ -136,9 +127,8 @@ static uint64_t imx_epit_read(void *opaque, hwaddr offset, unsigned size)
 {
     IMXEPITState *s = IMX_EPIT(opaque);
     uint32_t reg_value = 0;
-    uint32_t reg = offset >> 2;
 
-    switch (reg) {
+    switch (offset >> 2) {
     case 0: /* Control Register */
         reg_value = s->cr;
         break;
@@ -161,11 +151,12 @@ static uint64_t imx_epit_read(void *opaque, hwaddr offset, unsigned size)
         break;
 
     default:
-        IPRINTF("Bad offset %x\n", reg);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_EPIT, __func__, offset);
         break;
     }
 
-    DPRINTF("(%s) = 0x%08x\n", imx_epit_reg_name(reg), reg_value);
+    DPRINTF("(%s) = 0x%08x\n", imx_epit_reg_name(offset >> 2), reg_value);
 
     return reg_value;
 }
@@ -190,12 +181,12 @@ static void imx_epit_write(void *opaque, hwaddr offset, uint64_t value,
                            unsigned size)
 {
     IMXEPITState *s = IMX_EPIT(opaque);
-    uint32_t reg = offset >> 2;
     uint64_t oldcr;
 
-    DPRINTF("(%s, value = 0x%08x)\n", imx_epit_reg_name(reg), (uint32_t)value);
+    DPRINTF("(%s, value = 0x%08x)\n", imx_epit_reg_name(offset >> 2),
+            (uint32_t)value);
 
-    switch (reg) {
+    switch (offset >> 2) {
     case 0: /* CR */
 
         oldcr = s->cr;
@@ -271,7 +262,8 @@ static void imx_epit_write(void *opaque, hwaddr offset, uint64_t value,
         break;
 
     default:
-        IPRINTF("Bad offset %x\n", reg);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_EPIT, __func__, offset);
 
         break;
     }
diff --git a/hw/timer/imx_gpt.c b/hw/timer/imx_gpt.c
index 4bac67d333..7257f4201a 100644
--- a/hw/timer/imx_gpt.c
+++ b/hw/timer/imx_gpt.c
@@ -16,11 +16,17 @@
 #include "hw/misc/imx_ccm.h"
 #include "qemu/main-loop.h"
 
-/*
- * Define to 1 for debug messages
- */
-#define DEBUG_TIMER 0
-#if DEBUG_TIMER
+#ifndef DEBUG_IMX_GPT
+#define DEBUG_IMX_GPT 0
+#endif
+
+#define DPRINTF(fmt, args...) \
+    do { \
+        if (DEBUG_IMX_GPT) { \
+            fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_GPT, \
+                                             __func__, ##args); \
+        } \
+    } while (0)
 
 static char const *imx_gpt_reg_name(uint32_t reg)
 {
@@ -50,24 +56,6 @@ static char const *imx_gpt_reg_name(uint32_t reg)
     }
 }
 
-#  define DPRINTF(fmt, args...) \
-          do { printf("%s: " fmt , __func__, ##args); } while (0)
-#else
-#  define DPRINTF(fmt, args...) do {} while (0)
-#endif
-
-/*
- * Define to 1 for messages about attempts to
- * access unimplemented registers or similar.
- */
-#define DEBUG_IMPLEMENTATION 1
-#if DEBUG_IMPLEMENTATION
-#  define IPRINTF(fmt, args...) \
-          do { fprintf(stderr, "%s: " fmt, __func__, ##args); } while (0)
-#else
-#  define IPRINTF(fmt, args...) do {} while (0)
-#endif
-
 static const VMStateDescription vmstate_imx_timer_gpt = {
     .name = TYPE_IMX_GPT,
     .version_id = 3,
@@ -224,9 +212,8 @@ static uint64_t imx_gpt_read(void *opaque, hwaddr offset, unsigned size)
 {
     IMXGPTState *s = IMX_GPT(opaque);
     uint32_t reg_value = 0;
-    uint32_t reg = offset >> 2;
 
-    switch (reg) {
+    switch (offset >> 2) {
     case 0: /* Control Register */
         reg_value = s->cr;
         break;
@@ -256,12 +243,14 @@ static uint64_t imx_gpt_read(void *opaque, hwaddr offset, unsigned size)
         break;
 
     case 7: /* input Capture Register 1 */
-        qemu_log_mask(LOG_UNIMP, "icr1 feature is not implemented\n");
+        qemu_log_mask(LOG_UNIMP, "[%s]%s: icr1 feature is not implemented\n",
+                      TYPE_IMX_GPT, __func__);
         reg_value = s->icr1;
         break;
 
     case 8: /* input Capture Register 2 */
-        qemu_log_mask(LOG_UNIMP, "icr2 feature is not implemented\n");
+        qemu_log_mask(LOG_UNIMP, "[%s]%s: icr2 feature is not implemented\n",
+                      TYPE_IMX_GPT, __func__);
         reg_value = s->icr2;
         break;
 
@@ -271,11 +260,12 @@ static uint64_t imx_gpt_read(void *opaque, hwaddr offset, unsigned size)
         break;
 
     default:
-        IPRINTF("Bad offset %x\n", reg);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_GPT, __func__, offset);
         break;
     }
 
-    DPRINTF("(%s) = 0x%08x\n", imx_gpt_reg_name(reg), reg_value);
+    DPRINTF("(%s) = 0x%08x\n", imx_gpt_reg_name(offset >> 2), reg_value);
 
     return reg_value;
 }
@@ -322,12 +312,11 @@ static void imx_gpt_write(void *opaque, hwaddr offset, uint64_t value,
 {
     IMXGPTState *s = IMX_GPT(opaque);
     uint32_t oldreg;
-    uint32_t reg = offset >> 2;
 
-    DPRINTF("(%s, value = 0x%08x)\n", imx_gpt_reg_name(reg),
+    DPRINTF("(%s, value = 0x%08x)\n", imx_gpt_reg_name(offset >> 2),
             (uint32_t)value);
 
-    switch (reg) {
+    switch (offset >> 2) {
     case 0:
         oldreg = s->cr;
         s->cr = value & ~0x7c14;
@@ -403,7 +392,8 @@ static void imx_gpt_write(void *opaque, hwaddr offset, uint64_t value,
         break;
 
     default:
-        IPRINTF("Bad offset %x\n", reg);
+        qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%"
+                      HWADDR_PRIx "\n", TYPE_IMX_GPT, __func__, offset);
         break;
     }
 }
diff --git a/include/hw/intc/arm_gic_common.h b/include/hw/intc/arm_gic_common.h
index 564a72b2cf..f4c349a2ef 100644
--- a/include/hw/intc/arm_gic_common.h
+++ b/include/hw/intc/arm_gic_common.h
@@ -111,6 +111,7 @@ typedef struct GICState {
     bool security_extn;
     bool irq_reset_nonsecure; /* configure IRQs as group 1 (NS) on reset? */
     int dev_fd; /* kvm device fd if backed by kvm vgic support */
+    Error *migration_blocker;
 } GICState;
 
 #define TYPE_ARM_GIC_COMMON "arm_gic_common"
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 3daa7f58f9..815fef8a30 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -279,6 +279,7 @@ typedef struct CPUARMState {
             };
             uint64_t far_el[4];
         };
+        uint64_t hpfar_el2;
         union { /* Translation result. */
             struct {
                 uint64_t _unused_par_0;
@@ -1525,8 +1526,6 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx,
     CPUARMState *env = cs->env_ptr;
     unsigned int cur_el = arm_current_el(env);
     bool secure = arm_is_secure(env);
-    bool scr;
-    bool hcr;
     bool pstate_unmasked;
     int8_t unmasked = 0;
 
@@ -1540,31 +1539,10 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx,
 
     switch (excp_idx) {
     case EXCP_FIQ:
-        /* If FIQs are routed to EL3 or EL2 then there are cases where we
-         * override the CPSR.F in determining if the exception is masked or
-         * not.  If neither of these are set then we fall back to the CPSR.F
-         * setting otherwise we further assess the state below.
-         */
-        hcr = (env->cp15.hcr_el2 & HCR_FMO);
-        scr = (env->cp15.scr_el3 & SCR_FIQ);
-
-        /* When EL3 is 32-bit, the SCR.FW bit controls whether the CPSR.F bit
-         * masks FIQ interrupts when taken in non-secure state.  If SCR.FW is
-         * set then FIQs can be masked by CPSR.F when non-secure but only
-         * when FIQs are only routed to EL3.
-         */
-        scr = scr && !((env->cp15.scr_el3 & SCR_FW) && !hcr);
         pstate_unmasked = !(env->daif & PSTATE_F);
         break;
 
     case EXCP_IRQ:
-        /* When EL3 execution state is 32-bit, if HCR.IMO is set then we may
-         * override the CPSR.I masking when in non-secure state.  The SCR.IRQ
-         * setting has already been taken into consideration when setting the
-         * target EL, so it does not have a further affect here.
-         */
-        hcr = (env->cp15.hcr_el2 & HCR_IMO);
-        scr = false;
         pstate_unmasked = !(env->daif & PSTATE_I);
         break;
 
@@ -1589,13 +1567,58 @@ static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx,
      * interrupt.
      */
     if ((target_el > cur_el) && (target_el != 1)) {
-        /* ARM_FEATURE_AARCH64 enabled means the highest EL is AArch64.
-         * This code currently assumes that EL2 is not implemented
-         * (and so that highest EL will be 3 and the target_el also 3).
-         */
-        if (arm_feature(env, ARM_FEATURE_AARCH64) ||
-            ((scr || hcr) && (!secure))) {
-            unmasked = 1;
+        /* Exceptions targeting a higher EL may not be maskable */
+        if (arm_feature(env, ARM_FEATURE_AARCH64)) {
+            /* 64-bit masking rules are simple: exceptions to EL3
+             * can't be masked, and exceptions to EL2 can only be
+             * masked from Secure state. The HCR and SCR settings
+             * don't affect the masking logic, only the interrupt routing.
+             */
+            if (target_el == 3 || !secure) {
+                unmasked = 1;
+            }
+        } else {
+            /* The old 32-bit-only environment has a more complicated
+             * masking setup. HCR and SCR bits not only affect interrupt
+             * routing but also change the behaviour of masking.
+             */
+            bool hcr, scr;
+
+            switch (excp_idx) {
+            case EXCP_FIQ:
+                /* If FIQs are routed to EL3 or EL2 then there are cases where
+                 * we override the CPSR.F in determining if the exception is
+                 * masked or not. If neither of these are set then we fall back
+                 * to the CPSR.F setting otherwise we further assess the state
+                 * below.
+                 */
+                hcr = (env->cp15.hcr_el2 & HCR_FMO);
+                scr = (env->cp15.scr_el3 & SCR_FIQ);
+
+                /* When EL3 is 32-bit, the SCR.FW bit controls whether the
+                 * CPSR.F bit masks FIQ interrupts when taken in non-secure
+                 * state. If SCR.FW is set then FIQs can be masked by CPSR.F
+                 * when non-secure but only when FIQs are only routed to EL3.
+                 */
+                scr = scr && !((env->cp15.scr_el3 & SCR_FW) && !hcr);
+                break;
+            case EXCP_IRQ:
+                /* When EL3 execution state is 32-bit, if HCR.IMO is set then
+                 * we may override the CPSR.I masking when in non-secure state.
+                 * The SCR.IRQ setting has already been taken into consideration
+                 * when setting the target EL, so it does not have a further
+                 * affect here.
+                 */
+                hcr = (env->cp15.hcr_el2 & HCR_IMO);
+                scr = false;
+                break;
+            default:
+                g_assert_not_reached();
+            }
+
+            if ((scr || hcr) && !secure) {
+                unmasked = 1;
+            }
         }
     }
 
diff --git a/target-arm/helper.c b/target-arm/helper.c
index e7fda37466..1966f9c045 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -15,10 +15,17 @@
 #define ARM_CPU_FREQ 1000000000 /* FIXME: 1 GHz, should be configurable */
 
 #ifndef CONFIG_USER_ONLY
-static inline bool get_phys_addr(CPUARMState *env, target_ulong address,
-                                 int access_type, ARMMMUIdx mmu_idx,
-                                 hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
-                                 target_ulong *page_size, uint32_t *fsr);
+static bool get_phys_addr(CPUARMState *env, target_ulong address,
+                          int access_type, ARMMMUIdx mmu_idx,
+                          hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
+                          target_ulong *page_size, uint32_t *fsr,
+                          ARMMMUFaultInfo *fi);
+
+static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
+                               int access_type, ARMMMUIdx mmu_idx,
+                               hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot,
+                               target_ulong *page_size_ptr, uint32_t *fsr,
+                               ARMMMUFaultInfo *fi);
 
 /* Definitions for the PMCCNTR and PMCR registers */
 #define PMCRD   0x8
@@ -1778,9 +1785,10 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value,
     bool ret;
     uint64_t par64;
     MemTxAttrs attrs = {};
+    ARMMMUFaultInfo fi = {};
 
     ret = get_phys_addr(env, value, access_type, mmu_idx,
-                        &phys_addr, &attrs, &prot, &page_size, &fsr);
+                        &phys_addr, &attrs, &prot, &page_size, &fsr, &fi);
     if (extended_addresses_enabled(env)) {
         /* fsr is a DFSR/IFSR value for the long descriptor
          * translation table format, but with WnR always clear.
@@ -3230,6 +3238,10 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = {
     { .name = "MDCR_EL2", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 1,
       .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "HPFAR_EL2", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 4,
+      .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any,
+      .type = ARM_CP_CONST, .resetvalue = 0 },
     REGINFO_SENTINEL
 };
 
@@ -3288,6 +3300,22 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
       .type = ARM_CP_ALIAS,
       .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 0,
       .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[6]) },
+    { .name = "SPSR_IRQ", .state = ARM_CP_STATE_AA64,
+      .type = ARM_CP_ALIAS,
+      .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 3, .opc2 = 0,
+      .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[4]) },
+    { .name = "SPSR_ABT", .state = ARM_CP_STATE_AA64,
+      .type = ARM_CP_ALIAS,
+      .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 3, .opc2 = 1,
+      .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[2]) },
+    { .name = "SPSR_UND", .state = ARM_CP_STATE_AA64,
+      .type = ARM_CP_ALIAS,
+      .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 3, .opc2 = 2,
+      .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[3]) },
+    { .name = "SPSR_FIQ", .state = ARM_CP_STATE_AA64,
+      .type = ARM_CP_ALIAS,
+      .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 3, .opc2 = 3,
+      .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[5]) },
     { .name = "VBAR_EL2", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 0,
       .access = PL2_RW, .writefn = vbar_write,
@@ -3460,6 +3488,14 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
       .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 1,
       .access = PL2_RW, .resetvalue = 0,
       .fieldoffset = offsetof(CPUARMState, cp15.mdcr_el2), },
+    { .name = "HPFAR", .state = ARM_CP_STATE_AA32,
+      .cp = 15, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 4,
+      .access = PL2_RW, .accessfn = access_el3_aa32ns,
+      .fieldoffset = offsetof(CPUARMState, cp15.hpfar_el2) },
+    { .name = "HPFAR_EL2", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 4,
+      .access = PL2_RW,
+      .fieldoffset = offsetof(CPUARMState, cp15.hpfar_el2) },
     REGINFO_SENTINEL
 };
 
@@ -6051,6 +6087,28 @@ simple_ap_to_rw_prot(CPUARMState *env, ARMMMUIdx mmu_idx, int ap)
     return simple_ap_to_rw_prot_is_user(ap, regime_is_user(env, mmu_idx));
 }
 
+/* Translate S2 section/page access permissions to protection flags
+ *
+ * @env:     CPUARMState
+ * @s2ap:    The 2-bit stage2 access permissions (S2AP)
+ * @xn:      XN (execute-never) bit
+ */
+static int get_S2prot(CPUARMState *env, int s2ap, int xn)
+{
+    int prot = 0;
+
+    if (s2ap & 1) {
+        prot |= PAGE_READ;
+    }
+    if (s2ap & 2) {
+        prot |= PAGE_WRITE;
+    }
+    if (!xn) {
+        prot |= PAGE_EXEC;
+    }
+    return prot;
+}
+
 /* Translate section/page access permissions to protection flags
  *
  * @env:     CPUARMState
@@ -6155,6 +6213,32 @@ static bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx,
     return true;
 }
 
+/* Translate a S1 pagetable walk through S2 if needed.  */
+static hwaddr S1_ptw_translate(CPUARMState *env, ARMMMUIdx mmu_idx,
+                               hwaddr addr, MemTxAttrs txattrs,
+                               uint32_t *fsr,
+                               ARMMMUFaultInfo *fi)
+{
+    if ((mmu_idx == ARMMMUIdx_S1NSE0 || mmu_idx == ARMMMUIdx_S1NSE1) &&
+        !regime_translation_disabled(env, ARMMMUIdx_S2NS)) {
+        target_ulong s2size;
+        hwaddr s2pa;
+        int s2prot;
+        int ret;
+
+        ret = get_phys_addr_lpae(env, addr, 0, ARMMMUIdx_S2NS, &s2pa,
+                                 &txattrs, &s2prot, &s2size, fsr, fi);
+        if (ret) {
+            fi->s2addr = addr;
+            fi->stage2 = true;
+            fi->s1ptw = true;
+            return ~0;
+        }
+        addr = s2pa;
+    }
+    return addr;
+}
+
 /* All loads done in the course of a page table walk go through here.
  * TODO: rather than ignoring errors from physical memory reads (which
  * are external aborts in ARM terminology) we should propagate this
@@ -6162,26 +6246,43 @@ static bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx,
  * was being done for a CPU load/store or an address translation instruction
  * (but not if it was for a debug access).
  */
-static uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure)
+static uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure,
+                            ARMMMUIdx mmu_idx, uint32_t *fsr,
+                            ARMMMUFaultInfo *fi)
 {
+    ARMCPU *cpu = ARM_CPU(cs);
+    CPUARMState *env = &cpu->env;
     MemTxAttrs attrs = {};
 
     attrs.secure = is_secure;
+    addr = S1_ptw_translate(env, mmu_idx, addr, attrs, fsr, fi);
+    if (fi->s1ptw) {
+        return 0;
+    }
     return address_space_ldl(cs->as, addr, attrs, NULL);
 }
 
-static uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure)
+static uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure,
+                            ARMMMUIdx mmu_idx, uint32_t *fsr,
+                            ARMMMUFaultInfo *fi)
 {
+    ARMCPU *cpu = ARM_CPU(cs);
+    CPUARMState *env = &cpu->env;
     MemTxAttrs attrs = {};
 
     attrs.secure = is_secure;
+    addr = S1_ptw_translate(env, mmu_idx, addr, attrs, fsr, fi);
+    if (fi->s1ptw) {
+        return 0;
+    }
     return address_space_ldq(cs->as, addr, attrs, NULL);
 }
 
 static bool get_phys_addr_v5(CPUARMState *env, uint32_t address,
                              int access_type, ARMMMUIdx mmu_idx,
                              hwaddr *phys_ptr, int *prot,
-                             target_ulong *page_size, uint32_t *fsr)
+                             target_ulong *page_size, uint32_t *fsr,
+                             ARMMMUFaultInfo *fi)
 {
     CPUState *cs = CPU(arm_env_get_cpu(env));
     int code;
@@ -6201,7 +6302,8 @@ static bool get_phys_addr_v5(CPUARMState *env, uint32_t address,
         code = 5;
         goto do_fault;
     }
-    desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx));
+    desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
+                       mmu_idx, fsr, fi);
     type = (desc & 3);
     domain = (desc >> 5) & 0x0f;
     if (regime_el(env, mmu_idx) == 1) {
@@ -6237,7 +6339,8 @@ static bool get_phys_addr_v5(CPUARMState *env, uint32_t address,
             /* Fine pagetable.  */
             table = (desc & 0xfffff000) | ((address >> 8) & 0xffc);
         }
-        desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx));
+        desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
+                           mmu_idx, fsr, fi);
         switch (desc & 3) {
         case 0: /* Page translation fault.  */
             code = 7;
@@ -6294,7 +6397,8 @@ do_fault:
 static bool get_phys_addr_v6(CPUARMState *env, uint32_t address,
                              int access_type, ARMMMUIdx mmu_idx,
                              hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
-                             target_ulong *page_size, uint32_t *fsr)
+                             target_ulong *page_size, uint32_t *fsr,
+                             ARMMMUFaultInfo *fi)
 {
     CPUState *cs = CPU(arm_env_get_cpu(env));
     int code;
@@ -6317,7 +6421,8 @@ static bool get_phys_addr_v6(CPUARMState *env, uint32_t address,
         code = 5;
         goto do_fault;
     }
-    desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx));
+    desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
+                       mmu_idx, fsr, fi);
     type = (desc & 3);
     if (type == 0 || (type == 3 && !arm_feature(env, ARM_FEATURE_PXN))) {
         /* Section translation fault, or attempt to use the encoding
@@ -6368,7 +6473,8 @@ static bool get_phys_addr_v6(CPUARMState *env, uint32_t address,
         ns = extract32(desc, 3, 1);
         /* Lookup l2 entry.  */
         table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc);
-        desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx));
+        desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx),
+                           mmu_idx, fsr, fi);
         ap = ((desc >> 4) & 3) | ((desc >> 7) & 4);
         switch (desc & 3) {
         case 0: /* Page translation fault.  */
@@ -6442,17 +6548,78 @@ typedef enum {
     permission_fault = 3,
 } MMUFaultType;
 
+/*
+ * check_s2_startlevel
+ * @cpu:        ARMCPU
+ * @is_aa64:    True if the translation regime is in AArch64 state
+ * @startlevel: Suggested starting level
+ * @inputsize:  Bitsize of IPAs
+ * @stride:     Page-table stride (See the ARM ARM)
+ *
+ * Returns true if the suggested starting level is OK and false otherwise.
+ */
+static bool check_s2_startlevel(ARMCPU *cpu, bool is_aa64, int level,
+                                int inputsize, int stride)
+{
+    /* Negative levels are never allowed.  */
+    if (level < 0) {
+        return false;
+    }
+
+    if (is_aa64) {
+        unsigned int pamax = arm_pamax(cpu);
+
+        switch (stride) {
+        case 13: /* 64KB Pages.  */
+            if (level == 0 || (level == 1 && pamax <= 42)) {
+                return false;
+            }
+            break;
+        case 11: /* 16KB Pages.  */
+            if (level == 0 || (level == 1 && pamax <= 40)) {
+                return false;
+            }
+            break;
+        case 9: /* 4KB Pages.  */
+            if (level == 0 && pamax <= 42) {
+                return false;
+            }
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    } else {
+        const int grainsize = stride + 3;
+        int startsizecheck;
+
+        /* AArch32 only supports 4KB pages. Assert on that.  */
+        assert(stride == 9);
+
+        if (level == 0) {
+            return false;
+        }
+
+        startsizecheck = inputsize - ((3 - level) * stride + grainsize);
+        if (startsizecheck < 1 || startsizecheck > stride + 4) {
+            return false;
+        }
+    }
+    return true;
+}
+
 static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
                                int access_type, ARMMMUIdx mmu_idx,
                                hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot,
-                               target_ulong *page_size_ptr, uint32_t *fsr)
+                               target_ulong *page_size_ptr, uint32_t *fsr,
+                               ARMMMUFaultInfo *fi)
 {
-    CPUState *cs = CPU(arm_env_get_cpu(env));
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    CPUState *cs = CPU(cpu);
     /* Read an LPAE long-descriptor translation table. */
     MMUFaultType fault_type = translation_fault;
     uint32_t level = 1;
     uint32_t epd = 0;
-    int32_t tsz;
+    int32_t t0sz, t1sz;
     uint32_t tg;
     uint64_t ttbr;
     int ttbr_select;
@@ -6460,8 +6627,9 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
     uint32_t tableattrs;
     target_ulong page_size;
     uint32_t attrs;
-    int32_t granule_sz = 9;
+    int32_t stride = 9;
     int32_t va_size = 32;
+    int inputsize;
     int32_t tbi = 0;
     TCR *tcr = regime_tcr(env, mmu_idx);
     int ap, ns, xn, pxn;
@@ -6507,12 +6675,28 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
      * This is a Non-secure PL0/1 stage 1 translation, so controlled by
      * TTBCR/TTBR0/TTBR1 in accordance with ARM ARM DDI0406C table B-32:
      */
-    uint32_t t0sz = extract32(tcr->raw_tcr, 0, 6);
     if (va_size == 64) {
+        /* AArch64 translation.  */
+        t0sz = extract32(tcr->raw_tcr, 0, 6);
         t0sz = MIN(t0sz, 39);
         t0sz = MAX(t0sz, 16);
+    } else if (mmu_idx != ARMMMUIdx_S2NS) {
+        /* AArch32 stage 1 translation.  */
+        t0sz = extract32(tcr->raw_tcr, 0, 3);
+    } else {
+        /* AArch32 stage 2 translation.  */
+        bool sext = extract32(tcr->raw_tcr, 4, 1);
+        bool sign = extract32(tcr->raw_tcr, 3, 1);
+        t0sz = sextract32(tcr->raw_tcr, 0, 4);
+
+        /* If the sign-extend bit is not the same as t0sz[3], the result
+         * is unpredictable. Flag this as a guest error.  */
+        if (sign != sext) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "AArch32: VTCR.S / VTCR.T0SZ[3] missmatch\n");
+        }
     }
-    uint32_t t1sz = extract32(tcr->raw_tcr, 16, 6);
+    t1sz = extract32(tcr->raw_tcr, 16, 6);
     if (va_size == 64) {
         t1sz = MIN(t1sz, 39);
         t1sz = MAX(t1sz, 16);
@@ -6548,14 +6732,14 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
         if (el < 2) {
             epd = extract32(tcr->raw_tcr, 7, 1);
         }
-        tsz = t0sz;
+        inputsize = va_size - t0sz;
 
         tg = extract32(tcr->raw_tcr, 14, 2);
         if (tg == 1) { /* 64KB pages */
-            granule_sz = 13;
+            stride = 13;
         }
         if (tg == 2) { /* 16KB pages */
-            granule_sz = 11;
+            stride = 11;
         }
     } else {
         /* We should only be here if TTBR1 is valid */
@@ -6563,19 +6747,19 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
 
         ttbr = regime_ttbr(env, mmu_idx, 1);
         epd = extract32(tcr->raw_tcr, 23, 1);
-        tsz = t1sz;
+        inputsize = va_size - t1sz;
 
         tg = extract32(tcr->raw_tcr, 30, 2);
         if (tg == 3)  { /* 64KB pages */
-            granule_sz = 13;
+            stride = 13;
         }
         if (tg == 1) { /* 16KB pages */
-            granule_sz = 11;
+            stride = 11;
         }
     }
 
     /* Here we should have set up all the parameters for the translation:
-     * va_size, ttbr, epd, tsz, granule_sz, tbi
+     * va_size, inputsize, ttbr, epd, stride, tbi
      */
 
     if (epd) {
@@ -6585,32 +6769,60 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
         goto do_fault;
     }
 
-    /* The starting level depends on the virtual address size (which can be
-     * up to 48 bits) and the translation granule size. It indicates the number
-     * of strides (granule_sz bits at a time) needed to consume the bits
-     * of the input address. In the pseudocode this is:
-     *  level = 4 - RoundUp((inputsize - grainsize) / stride)
-     * where their 'inputsize' is our 'va_size - tsz', 'grainsize' is
-     * our 'granule_sz + 3' and 'stride' is our 'granule_sz'.
-     * Applying the usual "rounded up m/n is (m+n-1)/n" and simplifying:
-     *     = 4 - (va_size - tsz - granule_sz - 3 + granule_sz - 1) / granule_sz
-     *     = 4 - (va_size - tsz - 4) / granule_sz;
-     */
-    level = 4 - (va_size - tsz - 4) / granule_sz;
+    if (mmu_idx != ARMMMUIdx_S2NS) {
+        /* The starting level depends on the virtual address size (which can
+         * be up to 48 bits) and the translation granule size. It indicates
+         * the number of strides (stride bits at a time) needed to
+         * consume the bits of the input address. In the pseudocode this is:
+         *  level = 4 - RoundUp((inputsize - grainsize) / stride)
+         * where their 'inputsize' is our 'inputsize', 'grainsize' is
+         * our 'stride + 3' and 'stride' is our 'stride'.
+         * Applying the usual "rounded up m/n is (m+n-1)/n" and simplifying:
+         * = 4 - (inputsize - stride - 3 + stride - 1) / stride
+         * = 4 - (inputsize - 4) / stride;
+         */
+        level = 4 - (inputsize - 4) / stride;
+    } else {
+        /* For stage 2 translations the starting level is specified by the
+         * VTCR_EL2.SL0 field (whose interpretation depends on the page size)
+         */
+        int startlevel = extract32(tcr->raw_tcr, 6, 2);
+        bool ok;
+
+        if (va_size == 32 || stride == 9) {
+            /* AArch32 or 4KB pages */
+            level = 2 - startlevel;
+        } else {
+            /* 16KB or 64KB pages */
+            level = 3 - startlevel;
+        }
+
+        /* Check that the starting level is valid. */
+        ok = check_s2_startlevel(cpu, va_size == 64, level,
+                                 inputsize, stride);
+        if (!ok) {
+            /* AArch64 reports these as level 0 faults.
+             * AArch32 reports these as level 1 faults.
+             */
+            level = va_size == 64 ? 0 : 1;
+            fault_type = translation_fault;
+            goto do_fault;
+        }
+    }
 
     /* Clear the vaddr bits which aren't part of the within-region address,
      * so that we don't have to special case things when calculating the
      * first descriptor address.
      */
-    if (tsz) {
-        address &= (1ULL << (va_size - tsz)) - 1;
+    if (va_size != inputsize) {
+        address &= (1ULL << inputsize) - 1;
     }
 
-    descmask = (1ULL << (granule_sz + 3)) - 1;
+    descmask = (1ULL << (stride + 3)) - 1;
 
     /* Now we can extract the actual base address from the TTBR */
     descaddr = extract64(ttbr, 0, 48);
-    descaddr &= ~((1ULL << (va_size - tsz - (granule_sz * (4 - level)))) - 1);
+    descaddr &= ~((1ULL << (inputsize - (stride * (4 - level)))) - 1);
 
     /* Secure accesses start with the page table in secure memory and
      * can be downgraded to non-secure at any step. Non-secure accesses
@@ -6622,10 +6834,14 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
         uint64_t descriptor;
         bool nstable;
 
-        descaddr |= (address >> (granule_sz * (4 - level))) & descmask;
+        descaddr |= (address >> (stride * (4 - level))) & descmask;
         descaddr &= ~7ULL;
         nstable = extract32(tableattrs, 4, 1);
-        descriptor = arm_ldq_ptw(cs, descaddr, !nstable);
+        descriptor = arm_ldq_ptw(cs, descaddr, !nstable, mmu_idx, fsr, fi);
+        if (fi->s1ptw) {
+            goto do_fault;
+        }
+
         if (!(descriptor & 1) ||
             (!(descriptor & 2) && (level == 3))) {
             /* Invalid, or the Reserved level 3 encoding */
@@ -6647,11 +6863,17 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
          * These are basically the same thing, although the number
          * of bits we pull in from the vaddr varies.
          */
-        page_size = (1ULL << ((granule_sz * (4 - level)) + 3));
+        page_size = (1ULL << ((stride * (4 - level)) + 3));
         descaddr |= (address & (page_size - 1));
-        /* Extract attributes from the descriptor and merge with table attrs */
+        /* Extract attributes from the descriptor */
         attrs = extract64(descriptor, 2, 10)
             | (extract64(descriptor, 52, 12) << 10);
+
+        if (mmu_idx == ARMMMUIdx_S2NS) {
+            /* Stage 2 table descriptors do not include any attribute fields */
+            break;
+        }
+        /* Merge in attributes from table descriptors */
         attrs |= extract32(tableattrs, 0, 2) << 11; /* XN, PXN */
         attrs |= extract32(tableattrs, 3, 1) << 5; /* APTable[1] => AP[2] */
         /* The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1
@@ -6673,11 +6895,16 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
     }
 
     ap = extract32(attrs, 4, 2);
-    ns = extract32(attrs, 3, 1);
     xn = extract32(attrs, 12, 1);
-    pxn = extract32(attrs, 11, 1);
 
-    *prot = get_S1prot(env, mmu_idx, va_size == 64, ap, ns, xn, pxn);
+    if (mmu_idx == ARMMMUIdx_S2NS) {
+        ns = true;
+        *prot = get_S2prot(env, ap, xn);
+    } else {
+        ns = extract32(attrs, 3, 1);
+        pxn = extract32(attrs, 11, 1);
+        *prot = get_S1prot(env, mmu_idx, va_size == 64, ap, ns, xn, pxn);
+    }
 
     fault_type = permission_fault;
     if (!(*prot & (1 << access_type))) {
@@ -6698,6 +6925,8 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
 do_fault:
     /* Long-descriptor format IFSR/DFSR value */
     *fsr = (1 << 9) | (fault_type << 2) | level;
+    /* Tag the error as S2 for failed S1 PTW at S2 or ordinary S2.  */
+    fi->stage2 = fi->s1ptw || (mmu_idx == ARMMMUIdx_S2NS);
     return true;
 }
 
@@ -6960,20 +7189,45 @@ static bool get_phys_addr_pmsav5(CPUARMState *env, uint32_t address,
  * @page_size: set to the size of the page containing phys_ptr
  * @fsr: set to the DFSR/IFSR value on failure
  */
-static inline bool get_phys_addr(CPUARMState *env, target_ulong address,
-                                 int access_type, ARMMMUIdx mmu_idx,
-                                 hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
-                                 target_ulong *page_size, uint32_t *fsr)
+static bool get_phys_addr(CPUARMState *env, target_ulong address,
+                          int access_type, ARMMMUIdx mmu_idx,
+                          hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
+                          target_ulong *page_size, uint32_t *fsr,
+                          ARMMMUFaultInfo *fi)
 {
     if (mmu_idx == ARMMMUIdx_S12NSE0 || mmu_idx == ARMMMUIdx_S12NSE1) {
-        /* TODO: when we support EL2 we should here call ourselves recursively
-         * to do the stage 1 and then stage 2 translations. The arm_ld*_ptw
-         * functions will also need changing to perform ARMMMUIdx_S2NS loads
-         * rather than direct physical memory loads when appropriate.
-         * For non-EL2 CPUs a stage1+stage2 translation is just stage 1.
+        /* Call ourselves recursively to do the stage 1 and then stage 2
+         * translations.
          */
-        assert(!arm_feature(env, ARM_FEATURE_EL2));
-        mmu_idx += ARMMMUIdx_S1NSE0;
+        if (arm_feature(env, ARM_FEATURE_EL2)) {
+            hwaddr ipa;
+            int s2_prot;
+            int ret;
+
+            ret = get_phys_addr(env, address, access_type,
+                                mmu_idx + ARMMMUIdx_S1NSE0, &ipa, attrs,
+                                prot, page_size, fsr, fi);
+
+            /* If S1 fails or S2 is disabled, return early.  */
+            if (ret || regime_translation_disabled(env, ARMMMUIdx_S2NS)) {
+                *phys_ptr = ipa;
+                return ret;
+            }
+
+            /* S1 is done. Now do S2 translation.  */
+            ret = get_phys_addr_lpae(env, ipa, access_type, ARMMMUIdx_S2NS,
+                                     phys_ptr, attrs, &s2_prot,
+                                     page_size, fsr, fi);
+            fi->s2addr = ipa;
+            /* Combine the S1 and S2 perms.  */
+            *prot &= s2_prot;
+            return ret;
+        } else {
+            /*
+             * For non-EL2 CPUs a stage1+stage2 translation is just stage 1.
+             */
+            mmu_idx += ARMMMUIdx_S1NSE0;
+        }
     }
 
     /* The page table entries may downgrade secure to non-secure, but
@@ -7022,13 +7276,13 @@ static inline bool get_phys_addr(CPUARMState *env, target_ulong address,
 
     if (regime_using_lpae_format(env, mmu_idx)) {
         return get_phys_addr_lpae(env, address, access_type, mmu_idx, phys_ptr,
-                                  attrs, prot, page_size, fsr);
+                                  attrs, prot, page_size, fsr, fi);
     } else if (regime_sctlr(env, mmu_idx) & SCTLR_XP) {
         return get_phys_addr_v6(env, address, access_type, mmu_idx, phys_ptr,
-                                attrs, prot, page_size, fsr);
+                                attrs, prot, page_size, fsr, fi);
     } else {
         return get_phys_addr_v5(env, address, access_type, mmu_idx, phys_ptr,
-                                prot, page_size, fsr);
+                                prot, page_size, fsr, fi);
     }
 }
 
@@ -7037,7 +7291,8 @@ static inline bool get_phys_addr(CPUARMState *env, target_ulong address,
  * fsr with ARM DFSR/IFSR fault register format value on failure.
  */
 bool arm_tlb_fill(CPUState *cs, vaddr address,
-                  int access_type, int mmu_idx, uint32_t *fsr)
+                  int access_type, int mmu_idx, uint32_t *fsr,
+                  ARMMMUFaultInfo *fi)
 {
     ARMCPU *cpu = ARM_CPU(cs);
     CPUARMState *env = &cpu->env;
@@ -7048,7 +7303,7 @@ bool arm_tlb_fill(CPUState *cs, vaddr address,
     MemTxAttrs attrs = {};
 
     ret = get_phys_addr(env, address, access_type, mmu_idx, &phys_addr,
-                        &attrs, &prot, &page_size, fsr);
+                        &attrs, &prot, &page_size, fsr, fi);
     if (!ret) {
         /* Map a single [sub]page.  */
         phys_addr &= TARGET_PAGE_MASK;
@@ -7071,9 +7326,10 @@ hwaddr arm_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
     bool ret;
     uint32_t fsr;
     MemTxAttrs attrs = {};
+    ARMMMUFaultInfo fi = {};
 
     ret = get_phys_addr(env, addr, 0, cpu_mmu_index(env, false), &phys_addr,
-                        &attrs, &prot, &page_size, &fsr);
+                        &attrs, &prot, &page_size, &fsr, &fi);
 
     if (ret) {
         return -1;
diff --git a/target-arm/internals.h b/target-arm/internals.h
index 36a56aadb0..412827bcbf 100644
--- a/target-arm/internals.h
+++ b/target-arm/internals.h
@@ -152,6 +152,31 @@ static inline void update_spsel(CPUARMState *env, uint32_t imm)
     aarch64_restore_sp(env, cur_el);
 }
 
+/*
+ * arm_pamax
+ * @cpu: ARMCPU
+ *
+ * Returns the implementation defined bit-width of physical addresses.
+ * The ARMv8 reference manuals refer to this as PAMax().
+ */
+static inline unsigned int arm_pamax(ARMCPU *cpu)
+{
+    static const unsigned int pamax_map[] = {
+        [0] = 32,
+        [1] = 36,
+        [2] = 40,
+        [3] = 42,
+        [4] = 44,
+        [5] = 48,
+    };
+    unsigned int parange = extract32(cpu->id_aa64mmfr0, 0, 4);
+
+    /* id_aa64mmfr0 is a read-only register so values outside of the
+     * supported mappings can be considered an implementation error.  */
+    assert(parange < ARRAY_SIZE(pamax_map));
+    return pamax_map[parange];
+}
+
 /* Return true if extended addresses are enabled.
  * This is always the case if our translation regime is 64 bit,
  * but depends on TTBCR.EAE for 32 bit.
@@ -389,8 +414,21 @@ bool arm_is_psci_call(ARMCPU *cpu, int excp_type);
 void arm_handle_psci_call(ARMCPU *cpu);
 #endif
 
+/**
+ * ARMMMUFaultInfo: Information describing an ARM MMU Fault
+ * @s2addr: Address that caused a fault at stage 2
+ * @stage2: True if we faulted at stage 2
+ * @s1ptw: True if we faulted at stage 2 while doing a stage 1 page-table walk
+ */
+typedef struct ARMMMUFaultInfo ARMMMUFaultInfo;
+struct ARMMMUFaultInfo {
+    target_ulong s2addr;
+    bool stage2;
+    bool s1ptw;
+};
+
 /* Do a page table walk and add page to TLB if possible */
 bool arm_tlb_fill(CPUState *cpu, vaddr address, int rw, int mmu_idx,
-                  uint32_t *fsr);
+                  uint32_t *fsr, ARMMMUFaultInfo *fi);
 
 #endif
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index 7929c71b43..a4c4ebf9cd 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -83,19 +83,27 @@ void tlb_fill(CPUState *cs, target_ulong addr, int is_write, int mmu_idx,
 {
     bool ret;
     uint32_t fsr = 0;
+    ARMMMUFaultInfo fi = {};
 
-    ret = arm_tlb_fill(cs, addr, is_write, mmu_idx, &fsr);
+    ret = arm_tlb_fill(cs, addr, is_write, mmu_idx, &fsr, &fi);
     if (unlikely(ret)) {
         ARMCPU *cpu = ARM_CPU(cs);
         CPUARMState *env = &cpu->env;
         uint32_t syn, exc;
-        bool same_el = (arm_current_el(env) != 0);
+        unsigned int target_el;
+        bool same_el;
 
         if (retaddr) {
             /* now we have a real cpu fault */
             cpu_restore_state(cs, retaddr);
         }
 
+        target_el = exception_target_el(env);
+        if (fi.stage2) {
+            target_el = 2;
+            env->cp15.hpfar_el2 = extract64(fi.s2addr, 12, 47) << 4;
+        }
+        same_el = arm_current_el(env) == target_el;
         /* AArch64 syndrome does not have an LPAE bit */
         syn = fsr & ~(1 << 9);
 
@@ -103,10 +111,10 @@ void tlb_fill(CPUState *cs, target_ulong addr, int is_write, int mmu_idx,
          * information; this is always true for exceptions reported to EL1.
          */
         if (is_write == 2) {
-            syn = syn_insn_abort(same_el, 0, 0, syn);
+            syn = syn_insn_abort(same_el, 0, fi.s1ptw, syn);
             exc = EXCP_PREFETCH_ABORT;
         } else {
-            syn = syn_data_abort(same_el, 0, 0, 0, is_write == 1, syn);
+            syn = syn_data_abort(same_el, 0, 0, fi.s1ptw, is_write == 1, syn);
             if (is_write == 1 && arm_feature(env, ARM_FEATURE_V6)) {
                 fsr |= (1 << 11);
             }
@@ -115,7 +123,7 @@ void tlb_fill(CPUState *cs, target_ulong addr, int is_write, int mmu_idx,
 
         env->exception.vaddress = addr;
         env->exception.fsr = fsr;
-        raise_exception(env, exc, syn, exception_target_el(env));
+        raise_exception(env, exc, syn, target_el);
     }
 }
 #endif
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 9f1d740b4e..6be2c728f0 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -11179,6 +11179,35 @@ undef:
                        default_exception_el(s));
 }
 
+static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
+{
+    /* Return true if the insn at dc->pc might cross a page boundary.
+     * (False positives are OK, false negatives are not.)
+     */
+    uint16_t insn;
+
+    if ((s->pc & 3) == 0) {
+        /* At a 4-aligned address we can't be crossing a page */
+        return false;
+    }
+
+    /* This must be a Thumb insn */
+    insn = arm_lduw_code(env, s->pc, s->bswap_code);
+
+    if ((insn >> 11) >= 0x1d) {
+        /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
+         * First half of a 32-bit Thumb insn. Thumb-1 cores might
+         * end up actually treating this as two 16-bit insns (see the
+         * code at the start of disas_thumb2_insn()) but we don't bother
+         * to check for that as it is unlikely, and false positives here
+         * are harmless.
+         */
+        return true;
+    }
+    /* Definitely a 16-bit insn, can't be crossing a page. */
+    return false;
+}
+
 /* generate intermediate code in gen_opc_buf and gen_opparam_buf for
    basic block 'tb'.  */
 void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
@@ -11190,6 +11219,7 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
     target_ulong next_page_start;
     int num_insns;
     int max_insns;
+    bool end_of_page;
 
     /* generate intermediate code */
 
@@ -11411,11 +11441,24 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
          * Otherwise the subsequent code could get translated several times.
          * Also stop translation when a page boundary is reached.  This
          * ensures prefetch aborts occur at the right place.  */
+
+        /* We want to stop the TB if the next insn starts in a new page,
+         * or if it spans between this page and the next. This means that
+         * if we're looking at the last halfword in the page we need to
+         * see if it's a 16-bit Thumb insn (which will fit in this TB)
+         * or a 32-bit Thumb insn (which won't).
+         * This is to avoid generating a silly TB with a single 16-bit insn
+         * in it at the end of this page (which would execute correctly
+         * but isn't very efficient).
+         */
+        end_of_page = (dc->pc >= next_page_start) ||
+            ((dc->pc >= next_page_start - 3) && insn_crosses_page(env, dc));
+
     } while (!dc->is_jmp && !tcg_op_buf_full() &&
              !cs->singlestep_enabled &&
              !singlestep &&
              !dc->ss_active &&
-             dc->pc < next_page_start &&
+             !end_of_page &&
              num_insns < max_insns);
 
     if (tb->cflags & CF_LAST_IO) {