summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--cpu-exec.c17
-rw-r--r--exec.c61
-rw-r--r--hw/arm/boot.c71
-rw-r--r--hw/arm/virt.c76
-rw-r--r--hw/gpio/pl061.c59
-rw-r--r--include/exec/exec-all.h4
-rw-r--r--include/qom/cpu.h10
-rw-r--r--linux-user/main.c3
-rw-r--r--qom/cpu.c5
-rw-r--r--target-arm/cpu.c37
-rw-r--r--target-arm/cpu.h2
-rw-r--r--target-arm/helper.c397
-rw-r--r--target-arm/internals.h30
-rw-r--r--target-arm/machine.c3
-rw-r--r--target-arm/op_helper.c188
-rw-r--r--target-i386/cpu.c6
-rw-r--r--target-i386/cpu.h2
-rw-r--r--target-i386/helper.c5
-rw-r--r--target-lm32/cpu.c2
-rw-r--r--target-lm32/cpu.h2
-rw-r--r--target-lm32/helper.c5
-rw-r--r--target-xtensa/cpu.c2
-rw-r--r--target-xtensa/cpu.h2
-rw-r--r--target-xtensa/helper.c5
24 files changed, 840 insertions, 154 deletions
diff --git a/cpu-exec.c b/cpu-exec.c
index 7b5d2e21d0..bd93165209 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -295,16 +295,10 @@ static inline TranslationBlock *tb_find_fast(CPUArchState *env)
     return tb;
 }
 
-static CPUDebugExcpHandler *debug_excp_handler;
-
-void cpu_set_debug_excp_handler(CPUDebugExcpHandler *handler)
-{
-    debug_excp_handler = handler;
-}
-
 static void cpu_handle_debug_exception(CPUArchState *env)
 {
     CPUState *cpu = ENV_GET_CPU(env);
+    CPUClass *cc = CPU_GET_CLASS(cpu);
     CPUWatchpoint *wp;
 
     if (!cpu->watchpoint_hit) {
@@ -312,9 +306,8 @@ static void cpu_handle_debug_exception(CPUArchState *env)
             wp->flags &= ~BP_WATCHPOINT_HIT;
         }
     }
-    if (debug_excp_handler) {
-        debug_excp_handler(env);
-    }
+
+    cc->debug_excp_handler(cpu);
 }
 
 /* main execution loop */
@@ -618,8 +611,8 @@ int cpu_exec(CPUArchState *env)
                        We avoid this by disabling interrupts when
                        pc contains a magic address.  */
                     if (interrupt_request & CPU_INTERRUPT_HARD
-                        && ((IS_M(env) && env->regs[15] < 0xfffffff0)
-                            || !(env->daif & PSTATE_I))) {
+                        && !(env->daif & PSTATE_I)
+                        && (!IS_M(env) || env->regs[15] < 0xfffffff0)) {
                         cpu->exception_index = EXCP_IRQ;
                         cc->do_interrupt(cpu);
                         next_tb = 0;
diff --git a/exec.c b/exec.c
index 7dddcc8034..2794b4ba23 100644
--- a/exec.c
+++ b/exec.c
@@ -572,6 +572,16 @@ void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
 {
 }
 
+int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
+                          int flags)
+{
+    return -ENOSYS;
+}
+
+void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
+{
+}
+
 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
                           int flags, CPUWatchpoint **watchpoint)
 {
@@ -582,12 +592,10 @@ int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
                           int flags, CPUWatchpoint **watchpoint)
 {
-    vaddr len_mask = ~(len - 1);
     CPUWatchpoint *wp;
 
-    /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
-    if ((len & (len - 1)) || (addr & ~len_mask) ||
-            len == 0 || len > TARGET_PAGE_SIZE) {
+    /* forbid ranges which are empty or run off the end of the address space */
+    if (len == 0 || (addr + len - 1) <= addr) {
         error_report("tried to set invalid watchpoint at %"
                      VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
         return -EINVAL;
@@ -595,7 +603,7 @@ int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
     wp = g_malloc(sizeof(*wp));
 
     wp->vaddr = addr;
-    wp->len_mask = len_mask;
+    wp->len = len;
     wp->flags = flags;
 
     /* keep all GDB-injected watchpoints in front */
@@ -616,11 +624,10 @@ int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
                           int flags)
 {
-    vaddr len_mask = ~(len - 1);
     CPUWatchpoint *wp;
 
     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
-        if (addr == wp->vaddr && len_mask == wp->len_mask
+        if (addr == wp->vaddr && len == wp->len
                 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
             cpu_watchpoint_remove_by_ref(cpu, wp);
             return 0;
@@ -650,6 +657,27 @@ void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
         }
     }
 }
+
+/* Return true if this watchpoint address matches the specified
+ * access (ie the address range covered by the watchpoint overlaps
+ * partially or completely with the address range covered by the
+ * access).
+ */
+static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
+                                                  vaddr addr,
+                                                  vaddr len)
+{
+    /* We know the lengths are non-zero, but a little caution is
+     * required to avoid errors in the case where the range ends
+     * exactly at the top of the address space and so addr + len
+     * wraps round to zero.
+     */
+    vaddr wpend = wp->vaddr + wp->len - 1;
+    vaddr addrend = addr + len - 1;
+
+    return !(addr > wpend || wp->vaddr > addrend);
+}
+
 #endif
 
 /* Add a breakpoint.  */
@@ -861,7 +889,7 @@ hwaddr memory_region_section_get_iotlb(CPUState *cpu,
     /* Make accesses to pages with watchpoints go via the
        watchpoint trap routines.  */
     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
-        if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
+        if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
             /* Avoid trapping reads of pages with a write breakpoint. */
             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
                 iotlb = PHYS_SECTION_WATCH + paddr;
@@ -1625,7 +1653,7 @@ static const MemoryRegionOps notdirty_mem_ops = {
 };
 
 /* Generate a debug exception if a watchpoint has been hit.  */
-static void check_watchpoint(int offset, int len_mask, int flags)
+static void check_watchpoint(int offset, int len, int flags)
 {
     CPUState *cpu = current_cpu;
     CPUArchState *env = cpu->env_ptr;
@@ -1643,9 +1671,14 @@ static void check_watchpoint(int offset, int len_mask, int flags)
     }
     vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
-        if ((vaddr == (wp->vaddr & len_mask) ||
-             (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
-            wp->flags |= BP_WATCHPOINT_HIT;
+        if (cpu_watchpoint_address_matches(wp, vaddr, len)
+            && (wp->flags & flags)) {
+            if (flags == BP_MEM_READ) {
+                wp->flags |= BP_WATCHPOINT_HIT_READ;
+            } else {
+                wp->flags |= BP_WATCHPOINT_HIT_WRITE;
+            }
+            wp->hitaddr = vaddr;
             if (!cpu->watchpoint_hit) {
                 cpu->watchpoint_hit = wp;
                 tb_check_watchpoint(cpu);
@@ -1670,7 +1703,7 @@ static void check_watchpoint(int offset, int len_mask, int flags)
 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
                                unsigned size)
 {
-    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
+    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_READ);
     switch (size) {
     case 1: return ldub_phys(&address_space_memory, addr);
     case 2: return lduw_phys(&address_space_memory, addr);
@@ -1682,7 +1715,7 @@ static uint64_t watch_mem_read(void *opaque, hwaddr addr,
 static void watch_mem_write(void *opaque, hwaddr addr,
                             uint64_t val, unsigned size)
 {
-    check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
+    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_WRITE);
     switch (size) {
     case 1:
         stb_phys(&address_space_memory, addr, val);
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index e32f2f4158..c8dc34f086 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -312,7 +312,26 @@ static void set_kernel_args_old(const struct arm_boot_info *info)
     }
 }
 
-static int load_dtb(hwaddr addr, const struct arm_boot_info *binfo)
+/**
+ * load_dtb() - load a device tree binary image into memory
+ * @addr:       the address to load the image at
+ * @binfo:      struct describing the boot environment
+ * @addr_limit: upper limit of the available memory area at @addr
+ *
+ * Load a device tree supplied by the machine or by the user  with the
+ * '-dtb' command line option, and put it at offset @addr in target
+ * memory.
+ *
+ * If @addr_limit contains a meaningful value (i.e., it is strictly greater
+ * than @addr), the device tree is only loaded if its size does not exceed
+ * the limit.
+ *
+ * Returns: the size of the device tree image on success,
+ *          0 if the image size exceeds the limit,
+ *          -1 on errors.
+ */
+static int load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
+                    hwaddr addr_limit)
 {
     void *fdt = NULL;
     int size, rc;
@@ -341,6 +360,15 @@ static int load_dtb(hwaddr addr, const struct arm_boot_info *binfo)
         }
     }
 
+    if (addr_limit > addr && size > (addr_limit - addr)) {
+        /* Installing the device tree blob at addr would exceed addr_limit.
+         * Whether this constitutes failure is up to the caller to decide,
+         * so just return 0 as size, i.e., no error.
+         */
+        g_free(fdt);
+        return 0;
+    }
+
     acells = qemu_fdt_getprop_cell(fdt, "/", "#address-cells");
     scells = qemu_fdt_getprop_cell(fdt, "/", "#size-cells");
     if (acells == 0 || scells == 0) {
@@ -396,11 +424,14 @@ static int load_dtb(hwaddr addr, const struct arm_boot_info *binfo)
 
     qemu_fdt_dumpdtb(fdt, size);
 
-    cpu_physical_memory_write(addr, fdt, size);
+    /* Put the DTB into the memory map as a ROM image: this will ensure
+     * the DTB is copied again upon reset, even if addr points into RAM.
+     */
+    rom_add_blob_fixed("dtb", fdt, size, addr);
 
     g_free(fdt);
 
-    return 0;
+    return size;
 
 fail:
     g_free(fdt);
@@ -451,7 +482,7 @@ void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info)
     int kernel_size;
     int initrd_size;
     int is_linux = 0;
-    uint64_t elf_entry;
+    uint64_t elf_entry, elf_low_addr, elf_high_addr;
     int elf_machine;
     hwaddr entry, kernel_load_offset;
     int big_endian;
@@ -459,6 +490,16 @@ void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info)
 
     /* Load the kernel.  */
     if (!info->kernel_filename) {
+
+        if (have_dtb(info)) {
+            /* If we have a device tree blob, but no kernel to supply it to,
+             * copy it to the base of RAM for a bootloader to pick up.
+             */
+            if (load_dtb(info->loader_start, info, 0) < 0) {
+                exit(1);
+            }
+        }
+
         /* If no kernel specified, do nothing; we will start from address 0
          * (typically a boot ROM image) in the same way as hardware.
          */
@@ -508,7 +549,25 @@ void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info)
 
     /* Assume that raw images are linux kernels, and ELF images are not.  */
     kernel_size = load_elf(info->kernel_filename, NULL, NULL, &elf_entry,
-                           NULL, NULL, big_endian, elf_machine, 1);
+                           &elf_low_addr, &elf_high_addr, big_endian,
+                           elf_machine, 1);
+    if (kernel_size > 0 && have_dtb(info)) {
+        /* If there is still some room left at the base of RAM, try and put
+         * the DTB there like we do for images loaded with -bios or -pflash.
+         */
+        if (elf_low_addr > info->loader_start
+            || elf_high_addr < info->loader_start) {
+            /* Pass elf_low_addr as address limit to load_dtb if it may be
+             * pointing into RAM, otherwise pass '0' (no limit)
+             */
+            if (elf_low_addr < info->loader_start) {
+                elf_low_addr = 0;
+            }
+            if (load_dtb(info->loader_start, info, elf_low_addr) < 0) {
+                exit(1);
+            }
+        }
+    }
     entry = elf_entry;
     if (kernel_size < 0) {
         kernel_size = load_uimage(info->kernel_filename, &entry, NULL,
@@ -569,7 +628,7 @@ void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info)
              */
             hwaddr dtb_start = QEMU_ALIGN_UP(info->initrd_start + initrd_size,
                                              4096);
-            if (load_dtb(dtb_start, info)) {
+            if (load_dtb(dtb_start, info, 0) < 0) {
                 exit(1);
             }
             fixupcontext[FIXUP_ARGPTR] = dtb_start;
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index d6fffc75bd..cc7981cfdc 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -37,6 +37,7 @@
 #include "sysemu/sysemu.h"
 #include "sysemu/kvm.h"
 #include "hw/boards.h"
+#include "hw/loader.h"
 #include "exec/address-spaces.h"
 #include "qemu/bitops.h"
 #include "qemu/error-report.h"
@@ -371,11 +372,13 @@ static void create_uart(const VirtBoardInfo *vbi, qemu_irq *pic)
                                      2, base, 2, size);
     qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupts",
                                GIC_FDT_IRQ_TYPE_SPI, irq,
-                               GIC_FDT_IRQ_FLAGS_EDGE_LO_HI);
+                               GIC_FDT_IRQ_FLAGS_LEVEL_HI);
     qemu_fdt_setprop_cells(vbi->fdt, nodename, "clocks",
                                vbi->clock_phandle, vbi->clock_phandle);
     qemu_fdt_setprop(vbi->fdt, nodename, "clock-names",
                          clocknames, sizeof(clocknames));
+
+    qemu_fdt_setprop_string(vbi->fdt, "/chosen", "linux,stdout-path", nodename);
     g_free(nodename);
 }
 
@@ -396,7 +399,7 @@ static void create_rtc(const VirtBoardInfo *vbi, qemu_irq *pic)
                                  2, base, 2, size);
     qemu_fdt_setprop_cells(vbi->fdt, nodename, "interrupts",
                            GIC_FDT_IRQ_TYPE_SPI, irq,
-                           GIC_FDT_IRQ_FLAGS_EDGE_LO_HI);
+                           GIC_FDT_IRQ_FLAGS_LEVEL_HI);
     qemu_fdt_setprop_cell(vbi->fdt, nodename, "clocks", vbi->clock_phandle);
     qemu_fdt_setprop_string(vbi->fdt, nodename, "clock-names", "apb_pclk");
     g_free(nodename);
@@ -437,6 +440,73 @@ static void create_virtio_devices(const VirtBoardInfo *vbi, qemu_irq *pic)
     }
 }
 
+static void create_one_flash(const char *name, hwaddr flashbase,
+                             hwaddr flashsize)
+{
+    /* Create and map a single flash device. We use the same
+     * parameters as the flash devices on the Versatile Express board.
+     */
+    DriveInfo *dinfo = drive_get_next(IF_PFLASH);
+    DeviceState *dev = qdev_create(NULL, "cfi.pflash01");
+    const uint64_t sectorlength = 256 * 1024;
+
+    if (dinfo && qdev_prop_set_drive(dev, "drive", dinfo->bdrv)) {
+        abort();
+    }
+
+    qdev_prop_set_uint32(dev, "num-blocks", flashsize / sectorlength);
+    qdev_prop_set_uint64(dev, "sector-length", sectorlength);
+    qdev_prop_set_uint8(dev, "width", 4);
+    qdev_prop_set_uint8(dev, "device-width", 2);
+    qdev_prop_set_uint8(dev, "big-endian", 0);
+    qdev_prop_set_uint16(dev, "id0", 0x89);
+    qdev_prop_set_uint16(dev, "id1", 0x18);
+    qdev_prop_set_uint16(dev, "id2", 0x00);
+    qdev_prop_set_uint16(dev, "id3", 0x00);
+    qdev_prop_set_string(dev, "name", name);
+    qdev_init_nofail(dev);
+
+    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, flashbase);
+}
+
+static void create_flash(const VirtBoardInfo *vbi)
+{
+    /* Create two flash devices to fill the VIRT_FLASH space in the memmap.
+     * Any file passed via -bios goes in the first of these.
+     */
+    hwaddr flashsize = vbi->memmap[VIRT_FLASH].size / 2;
+    hwaddr flashbase = vbi->memmap[VIRT_FLASH].base;
+    char *nodename;
+
+    if (bios_name) {
+        const char *fn;
+
+        if (drive_get(IF_PFLASH, 0, 0)) {
+            error_report("The contents of the first flash device may be "
+                         "specified with -bios or with -drive if=pflash... "
+                         "but you cannot use both options at once");
+            exit(1);
+        }
+        fn = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+        if (!fn || load_image_targphys(fn, flashbase, flashsize) < 0) {
+            error_report("Could not load ROM image '%s'", bios_name);
+            exit(1);
+        }
+    }
+
+    create_one_flash("virt.flash0", flashbase, flashsize);
+    create_one_flash("virt.flash1", flashbase + flashsize, flashsize);
+
+    nodename = g_strdup_printf("/flash@%" PRIx64, flashbase);
+    qemu_fdt_add_subnode(vbi->fdt, nodename);
+    qemu_fdt_setprop_string(vbi->fdt, nodename, "compatible", "cfi-flash");
+    qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
+                                 2, flashbase, 2, flashsize,
+                                 2, flashbase + flashsize, 2, flashsize);
+    qemu_fdt_setprop_cell(vbi->fdt, nodename, "bank-width", 4);
+    g_free(nodename);
+}
+
 static void *machvirt_dtb(const struct arm_boot_info *binfo, int *fdt_size)
 {
     const VirtBoardInfo *board = (const VirtBoardInfo *)binfo;
@@ -514,6 +584,8 @@ static void machvirt_init(MachineState *machine)
     vmstate_register_ram_global(ram);
     memory_region_add_subregion(sysmem, vbi->memmap[VIRT_MEM].base, ram);
 
+    create_flash(vbi);
+
     create_gic(vbi, pic);
 
     create_uart(vbi, pic);
diff --git a/hw/gpio/pl061.c b/hw/gpio/pl061.c
index dd4ea293e2..bd03e99975 100644
--- a/hw/gpio/pl061.c
+++ b/hw/gpio/pl061.c
@@ -37,7 +37,8 @@ typedef struct PL061State {
     MemoryRegion iomem;
     uint32_t locked;
     uint32_t data;
-    uint32_t old_data;
+    uint32_t old_out_data;
+    uint32_t old_in_data;
     uint32_t dir;
     uint32_t isense;
     uint32_t ibe;
@@ -63,12 +64,13 @@ typedef struct PL061State {
 
 static const VMStateDescription vmstate_pl061 = {
     .name = "pl061",
-    .version_id = 2,
-    .minimum_version_id = 1,
+    .version_id = 3,
+    .minimum_version_id = 3,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(locked, PL061State),
         VMSTATE_UINT32(data, PL061State),
-        VMSTATE_UINT32(old_data, PL061State),
+        VMSTATE_UINT32(old_out_data, PL061State),
+        VMSTATE_UINT32(old_in_data, PL061State),
         VMSTATE_UINT32(dir, PL061State),
         VMSTATE_UINT32(isense, PL061State),
         VMSTATE_UINT32(ibe, PL061State),
@@ -98,23 +100,52 @@ static void pl061_update(PL061State *s)
     uint8_t out;
     int i;
 
+    DPRINTF("dir = %d, data = %d\n", s->dir, s->data);
+
     /* Outputs float high.  */
     /* FIXME: This is board dependent.  */
     out = (s->data & s->dir) | ~s->dir;
-    changed = s->old_data ^ out;
-    if (!changed)
-        return;
+    changed = s->old_out_data ^ out;
+    if (changed) {
+        s->old_out_data = out;
+        for (i = 0; i < 8; i++) {
+            mask = 1 << i;
+            if (changed & mask) {
+                DPRINTF("Set output %d = %d\n", i, (out & mask) != 0);
+                qemu_set_irq(s->out[i], (out & mask) != 0);
+            }
+        }
+    }
 
-    s->old_data = out;
-    for (i = 0; i < 8; i++) {
-        mask = 1 << i;
-        if (changed & mask) {
-            DPRINTF("Set output %d = %d\n", i, (out & mask) != 0);
-            qemu_set_irq(s->out[i], (out & mask) != 0);
+    /* Inputs */
+    changed = (s->old_in_data ^ s->data) & ~s->dir;
+    if (changed) {
+        s->old_in_data = s->data;
+        for (i = 0; i < 8; i++) {
+            mask = 1 << i;
+            if (changed & mask) {
+                DPRINTF("Changed input %d = %d\n", i, (s->data & mask) != 0);
+
+                if (!(s->isense & mask)) {
+                    /* Edge interrupt */
+                    if (s->ibe & mask) {
+                        /* Any edge triggers the interrupt */
+                        s->istate |= mask;
+                    } else {
+                        /* Edge is selected by IEV */
+                        s->istate |= ~(s->data ^ s->iev) & mask;
+                    }
+                }
+            }
         }
     }
 
-    /* FIXME: Implement input interrupts.  */
+    /* Level interrupt */
+    s->istate |= ~(s->data ^ s->iev) & s->isense;
+
+    DPRINTF("istate = %02X\n", s->istate);
+
+    qemu_set_irq(s->irq, (s->istate & s->im) != 0);
 }
 
 static uint64_t pl061_read(void *opaque, hwaddr offset,
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 5e5d86ec46..421a142a0d 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -356,10 +356,6 @@ static inline tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong
 tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr);
 #endif
 
-typedef void (CPUDebugExcpHandler)(CPUArchState *env);
-
-void cpu_set_debug_excp_handler(CPUDebugExcpHandler *handler);
-
 /* vl.c */
 extern int singlestep;
 
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 1aafbf5f34..370b3ebee9 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -95,6 +95,7 @@ struct TranslationBlock;
  * @get_phys_page_debug: Callback for obtaining a physical address.
  * @gdb_read_register: Callback for letting GDB read a register.
  * @gdb_write_register: Callback for letting GDB write a register.
+ * @debug_excp_handler: Callback for handling debug exceptions.
  * @vmsd: State description for migration.
  * @gdb_num_core_regs: Number of core registers accessible to GDB.
  * @gdb_core_xml_file: File name for core registers GDB XML description.
@@ -134,6 +135,7 @@ typedef struct CPUClass {
     hwaddr (*get_phys_page_debug)(CPUState *cpu, vaddr addr);
     int (*gdb_read_register)(CPUState *cpu, uint8_t *buf, int reg);
     int (*gdb_write_register)(CPUState *cpu, uint8_t *buf, int reg);
+    void (*debug_excp_handler)(CPUState *cpu);
 
     int (*write_elf64_note)(WriteCoreDumpFunction f, CPUState *cpu,
                             int cpuid, void *opaque);
@@ -169,7 +171,8 @@ typedef struct CPUBreakpoint {
 
 typedef struct CPUWatchpoint {
     vaddr vaddr;
-    vaddr len_mask;
+    vaddr len;
+    vaddr hitaddr;
     int flags; /* BP_* */
     QTAILQ_ENTRY(CPUWatchpoint) entry;
 } CPUWatchpoint;
@@ -622,9 +625,12 @@ void cpu_single_step(CPUState *cpu, int enabled);
 #define BP_MEM_WRITE          0x02
 #define BP_MEM_ACCESS         (BP_MEM_READ | BP_MEM_WRITE)
 #define BP_STOP_BEFORE_ACCESS 0x04
-#define BP_WATCHPOINT_HIT     0x08
+/* 0x08 currently unused */
 #define BP_GDB                0x10
 #define BP_CPU                0x20
+#define BP_WATCHPOINT_HIT_READ 0x40
+#define BP_WATCHPOINT_HIT_WRITE 0x80
+#define BP_WATCHPOINT_HIT (BP_WATCHPOINT_HIT_READ | BP_WATCHPOINT_HIT_WRITE)
 
 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
                           CPUBreakpoint **breakpoint);
diff --git a/linux-user/main.c b/linux-user/main.c
index 472a16d2db..483eb3fec2 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -3458,8 +3458,7 @@ CPUArchState *cpu_copy(CPUArchState *env)
         cpu_breakpoint_insert(new_cpu, bp->pc, bp->flags, NULL);
     }
     QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
-        cpu_watchpoint_insert(new_cpu, wp->vaddr, (~wp->len_mask) + 1,
-                              wp->flags, NULL);
+        cpu_watchpoint_insert(new_cpu, wp->vaddr, wp->len, wp->flags, NULL);
     }
 #endif
 
diff --git a/qom/cpu.c b/qom/cpu.c
index b32dd0a562..ba8b402617 100644
--- a/qom/cpu.c
+++ b/qom/cpu.c
@@ -202,6 +202,10 @@ static bool cpu_common_virtio_is_big_endian(CPUState *cpu)
     return target_words_bigendian();
 }
 
+static void cpu_common_debug_excp_handler(CPUState *cpu)
+{
+}
+
 void cpu_dump_state(CPUState *cpu, FILE *f, fprintf_function cpu_fprintf,
                     int flags)
 {
@@ -340,6 +344,7 @@ static void cpu_class_init(ObjectClass *klass, void *data)
     k->gdb_read_register = cpu_common_gdb_read_register;
     k->gdb_write_register = cpu_common_gdb_write_register;
     k->virtio_is_big_endian = cpu_common_virtio_is_big_endian;
+    k->debug_excp_handler = cpu_common_debug_excp_handler;
     dc->realize = cpu_common_realizefn;
     /*
      * Reason: CPUs still need special care by board code: wiring up
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 8199f32e32..7ea12bda1c 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -129,26 +129,38 @@ static void arm_cpu_reset(CPUState *s)
     env->uncached_cpsr = ARM_CPU_MODE_SVC;
     env->daif = PSTATE_D | PSTATE_A | PSTATE_I | PSTATE_F;
     /* On ARMv7-M the CPSR_I is the value of the PRIMASK register, and is
-       clear at reset.  Initial SP and PC are loaded from ROM.  */
+     * clear at reset. Initial SP and PC are loaded from ROM.
+     */
     if (IS_M(env)) {
-        uint32_t pc;
+        uint32_t initial_msp; /* Loaded from 0x0 */
+        uint32_t initial_pc; /* Loaded from 0x4 */
         uint8_t *rom;
+
         env->daif &= ~PSTATE_I;
         rom = rom_ptr(0);
         if (rom) {
-            /* We should really use ldl_phys here, in case the guest
-               modified flash and reset itself.  However images
-               loaded via -kernel have not been copied yet, so load the
-               values directly from there.  */
-            env->regs[13] = ldl_p(rom) & 0xFFFFFFFC;
-            pc = ldl_p(rom + 4);
-            env->thumb = pc & 1;
-            env->regs[15] = pc & ~1;
+            /* Address zero is covered by ROM which hasn't yet been
+             * copied into physical memory.
+             */
+            initial_msp = ldl_p(rom);
+            initial_pc = ldl_p(rom + 4);
+        } else {
+            /* Address zero not covered by a ROM blob, or the ROM blob
+             * is in non-modifiable memory and this is a second reset after
+             * it got copied into memory. In the latter case, rom_ptr
+             * will return a NULL pointer and we should use ldl_phys instead.
+             */
+            initial_msp = ldl_phys(s->as, 0);
+            initial_pc = ldl_phys(s->as, 4);
         }
+
+        env->regs[13] = initial_msp & 0xFFFFFFFC;
+        env->regs[15] = initial_pc & ~1;
+        env->thumb = initial_pc & 1;
     }
 
     if (env->cp15.c1_sys & SCTLR_V) {
-            env->regs[15] = 0xFFFF0000;
+        env->regs[15] = 0xFFFF0000;
     }
 
     env->vfp.xregs[ARM_VFP_FPEXC] = 0;
@@ -172,6 +184,8 @@ static void arm_cpu_reset(CPUState *s)
         kvm_arm_reset_vcpu(cpu);
     }
 #endif
+
+    hw_watchpoint_update_all(cpu);
 }
 
 #ifndef CONFIG_USER_ONLY
@@ -1051,6 +1065,7 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data)
 #endif
     cc->gdb_num_core_regs = 26;
     cc->gdb_core_xml_file = "arm-core.xml";
+    cc->debug_excp_handler = arm_debug_excp_handler;
 }
 
 static void cpu_register(const ARMCPUInfo *info)
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 51bedc8262..d1e1ccb605 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -323,6 +323,8 @@ typedef struct CPUARMState {
     int eabi;
 #endif
 
+    struct CPUWatchpoint *cpu_watchpoint[16];
+
     CPU_COMMON
 
     /* These fields after the common ones so they are preserved on reset.  */
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 2b95f33872..ece967397f 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -304,17 +304,6 @@ void init_cpreg_list(ARMCPU *cpu)
     g_list_free(keys);
 }
 
-/* Return true if extended addresses are enabled.
- * This is always the case if our translation regime is 64 bit,
- * but depends on TTBCR.EAE for 32 bit.
- */
-static inline bool extended_addresses_enabled(CPUARMState *env)
-{
-    return arm_el_is_aa64(env, 1)
-        || ((arm_feature(env, ARM_FEATURE_LPAE)
-             && (env->cp15.c2_control & TTBCR_EAE)));
-}
-
 static void dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 {
     ARMCPU *cpu = arm_env_get_cpu(env);
@@ -388,6 +377,47 @@ static void tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri,
     tlb_flush_page(CPU(cpu), value & TARGET_PAGE_MASK);
 }
 
+/* IS variants of TLB operations must affect all cores */
+static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                             uint64_t value)
+{
+    CPUState *other_cs;
+
+    CPU_FOREACH(other_cs) {
+        tlb_flush(other_cs, 1);
+    }
+}
+
+static void tlbiasid_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                             uint64_t value)
+{
+    CPUState *other_cs;
+
+    CPU_FOREACH(other_cs) {
+        tlb_flush(other_cs, value == 0);
+    }
+}
+
+static void tlbimva_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                             uint64_t value)
+{
+    CPUState *other_cs;
+
+    CPU_FOREACH(other_cs) {
+        tlb_flush_page(other_cs, value & TARGET_PAGE_MASK);
+    }
+}
+
+static void tlbimvaa_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                             uint64_t value)
+{
+    CPUState *other_cs;
+
+    CPU_FOREACH(other_cs) {
+        tlb_flush_page(other_cs, value & TARGET_PAGE_MASK);
+    }
+}
+
 static const ARMCPRegInfo cp_reginfo[] = {
     { .name = "FCSEIDR", .cp = 15, .crn = 13, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c13_fcse),
@@ -414,21 +444,6 @@ static const ARMCPRegInfo not_v8_cp_reginfo[] = {
      */
     { .name = "TLB_LOCKDOWN", .cp = 15, .crn = 10, .crm = CP_ANY,
       .opc1 = CP_ANY, .opc2 = CP_ANY, .access = PL1_RW, .type = ARM_CP_NOP },
-    /* MMU TLB control. Note that the wildcarding means we cover not just
-     * the unified TLB ops but also the dside/iside/inner-shareable variants.
-     */
-    { .name = "TLBIALL", .cp = 15, .crn = 8, .crm = CP_ANY,
-      .opc1 = CP_ANY, .opc2 = 0, .access = PL1_W, .writefn = tlbiall_write,
-      .type = ARM_CP_NO_MIGRATE },
-    { .name = "TLBIMVA", .cp = 15, .crn = 8, .crm = CP_ANY,
-      .opc1 = CP_ANY, .opc2 = 1, .access = PL1_W, .writefn = tlbimva_write,
-      .type = ARM_CP_NO_MIGRATE },
-    { .name = "TLBIASID", .cp = 15, .crn = 8, .crm = CP_ANY,
-      .opc1 = CP_ANY, .opc2 = 2, .access = PL1_W, .writefn = tlbiasid_write,
-      .type = ARM_CP_NO_MIGRATE },
-    { .name = "TLBIMVAA", .cp = 15, .crn = 8, .crm = CP_ANY,
-      .opc1 = CP_ANY, .opc2 = 3, .access = PL1_W, .writefn = tlbimvaa_write,
-      .type = ARM_CP_NO_MIGRATE },
     /* Cache maintenance ops; some of this space may be overridden later. */
     { .name = "CACHEMAINT", .cp = 15, .crn = 7, .crm = CP_ANY,
       .opc1 = 0, .opc2 = CP_ANY, .access = PL1_W,
@@ -472,6 +487,21 @@ static const ARMCPRegInfo not_v7_cp_reginfo[] = {
      */
     { .name = "DBGDIDR", .cp = 14, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL0_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+    /* MMU TLB control. Note that the wildcarding means we cover not just
+     * the unified TLB ops but also the dside/iside/inner-shareable variants.
+     */
+    { .name = "TLBIALL", .cp = 15, .crn = 8, .crm = CP_ANY,
+      .opc1 = CP_ANY, .opc2 = 0, .access = PL1_W, .writefn = tlbiall_write,
+      .type = ARM_CP_NO_MIGRATE },
+    { .name = "TLBIMVA", .cp = 15, .crn = 8, .crm = CP_ANY,
+      .opc1 = CP_ANY, .opc2 = 1, .access = PL1_W, .writefn = tlbimva_write,
+      .type = ARM_CP_NO_MIGRATE },
+    { .name = "TLBIASID", .cp = 15, .crn = 8, .crm = CP_ANY,
+      .opc1 = CP_ANY, .opc2 = 2, .access = PL1_W, .writefn = tlbiasid_write,
+      .type = ARM_CP_NO_MIGRATE },
+    { .name = "TLBIMVAA", .cp = 15, .crn = 8, .crm = CP_ANY,
+      .opc1 = CP_ANY, .opc2 = 3, .access = PL1_W, .writefn = tlbimvaa_write,
+      .type = ARM_CP_NO_MIGRATE },
     REGINFO_SENTINEL
 };
 
@@ -890,6 +920,44 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
     { .name = "ISR_EL1", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 1, .opc2 = 0,
       .type = ARM_CP_NO_MIGRATE, .access = PL1_R, .readfn = isr_read },
+    /* 32 bit ITLB invalidates */
+    { .name = "ITLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 0,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiall_write },
+    { .name = "ITLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 1,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_write },
+    { .name = "ITLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 2,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiasid_write },
+    /* 32 bit DTLB invalidates */
+    { .name = "DTLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 0,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiall_write },
+    { .name = "DTLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 1,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_write },
+    { .name = "DTLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 2,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiasid_write },
+    /* 32 bit TLB invalidates */
+    { .name = "TLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 0,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiall_write },
+    { .name = "TLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 1,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_write },
+    { .name = "TLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 2,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiasid_write },
+    { .name = "TLBIMVAA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 3,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimvaa_write },
+    REGINFO_SENTINEL
+};
+
+static const ARMCPRegInfo v7mp_cp_reginfo[] = {
+    /* 32 bit TLB invalidates, Inner Shareable */
+    { .name = "TLBIALLIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 0,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiall_is_write },
+    { .name = "TLBIMVAIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 1,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_is_write },
+    { .name = "TLBIASIDIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 2,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W,
+      .writefn = tlbiasid_is_write },
+    { .name = "TLBIMVAAIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 3,
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W,
+      .writefn = tlbimvaa_is_write },
     REGINFO_SENTINEL
 };
 
@@ -1879,6 +1947,39 @@ static void tlbi_aa64_asid_write(CPUARMState *env, const ARMCPRegInfo *ri,
     tlb_flush(CPU(cpu), asid == 0);
 }
 
+static void tlbi_aa64_va_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                                  uint64_t value)
+{
+    CPUState *other_cs;
+    uint64_t pageaddr = sextract64(value << 12, 0, 56);
+
+    CPU_FOREACH(other_cs) {
+        tlb_flush_page(other_cs, pageaddr);
+    }
+}
+
+static void tlbi_aa64_vaa_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                                  uint64_t value)
+{
+    CPUState *other_cs;
+    uint64_t pageaddr = sextract64(value << 12, 0, 56);
+
+    CPU_FOREACH(other_cs) {
+        tlb_flush_page(other_cs, pageaddr);
+    }
+}
+
+static void tlbi_aa64_asid_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                                  uint64_t value)
+{
+    CPUState *other_cs;
+    int asid = extract64(value, 48, 16);
+
+    CPU_FOREACH(other_cs) {
+        tlb_flush(other_cs, asid == 0);
+    }
+}
+
 static CPAccessResult aa64_zva_access(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     /* We don't implement EL2, so the only control on DC ZVA is the
@@ -1996,27 +2097,27 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
     { .name = "TLBI_VMALLE1IS", .state = ARM_CP_STATE_AA64,
       .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 0,
       .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
-      .writefn = tlbiall_write },
+      .writefn = tlbiall_is_write },
     { .name = "TLBI_VAE1IS", .state = ARM_CP_STATE_AA64,
       .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 1,
       .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
-      .writefn = tlbi_aa64_va_write },
+      .writefn = tlbi_aa64_va_is_write },
     { .name = "TLBI_ASIDE1IS", .state = ARM_CP_STATE_AA64,
       .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 2,
       .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
-      .writefn = tlbi_aa64_asid_write },
+      .writefn = tlbi_aa64_asid_is_write },
     { .name = "TLBI_VAAE1IS", .state = ARM_CP_STATE_AA64,
       .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 3,
       .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
-      .writefn = tlbi_aa64_vaa_write },
+      .writefn = tlbi_aa64_vaa_is_write },
     { .name = "TLBI_VALE1IS", .state = ARM_CP_STATE_AA64,
       .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 5,
       .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
-      .writefn = tlbi_aa64_va_write },
+      .writefn = tlbi_aa64_va_is_write },
     { .name = "TLBI_VAALE1IS", .state = ARM_CP_STATE_AA64,
       .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 7,
       .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
-      .writefn = tlbi_aa64_vaa_write },
+      .writefn = tlbi_aa64_vaa_is_write },
     { .name = "TLBI_VMALLE1", .state = ARM_CP_STATE_AA64,
       .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 0,
       .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
@@ -2056,42 +2157,12 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
       .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 8, .opc2 = 3,
       .access = PL1_W, .type = ARM_CP_NO_MIGRATE, .writefn = ats_write },
 #endif
-    /* 32 bit TLB invalidates, Inner Shareable */
-    { .name = "TLBIALLIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 0,
-      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiall_write },
-    { .name = "TLBIMVAIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 1,
-      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_write },
-    { .name = "TLBIASIDIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 2,
-      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiasid_write },
-    { .name = "TLBIMVAAIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 3,
-      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimvaa_write },
+    /* TLB invalidate last level of translation table walk */
     { .name = "TLBIMVALIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 5,
-      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_write },
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_is_write },
     { .name = "TLBIMVAALIS", .cp = 15, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 7,
-      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimvaa_write },
-    /* 32 bit ITLB invalidates */
-    { .name = "ITLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 0,
-      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiall_write },
-    { .name = "ITLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 1,
-      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_write },
-    { .name = "ITLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 5, .opc2 = 2,
-      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiasid_write },
-    /* 32 bit DTLB invalidates */
-    { .name = "DTLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 0,
-      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiall_write },
-    { .name = "DTLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 1,
-      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_write },
-    { .name = "DTLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 6, .opc2 = 2,
-      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiasid_write },
-    /* 32 bit TLB invalidates */
-    { .name = "TLBIALL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 0,
-      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiall_write },
-    { .name = "TLBIMVA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 1,
-      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_write },
-    { .name = "TLBIASID", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 2,
-      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbiasid_write },
-    { .name = "TLBIMVAA", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 3,
-      .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimvaa_write },
+      .type = ARM_CP_NO_MIGRATE, .access = PL1_W,
+      .writefn = tlbimvaa_is_write },
     { .name = "TLBIMVAL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 5,
       .type = ARM_CP_NO_MIGRATE, .access = PL1_W, .writefn = tlbimva_write },
     { .name = "TLBIMVAAL", .cp = 15, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 7,
@@ -2255,18 +2326,35 @@ static const ARMCPRegInfo debug_cp_reginfo[] = {
       .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
     { .name = "DBGDSAR", .cp = 14, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL0_R, .type = ARM_CP_CONST, .resetvalue = 0 },
-    /* Dummy implementation of monitor debug system control register:
-     * we don't support debug. (The 32-bit alias is DBGDSCRext.)
-     */
+    /* Monitor debug system control register; the 32-bit alias is DBGDSCRext. */
     { .name = "MDSCR_EL1", .state = ARM_CP_STATE_BOTH,
       .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2,
       .access = PL1_RW,
       .fieldoffset = offsetof(CPUARMState, cp15.mdscr_el1),
       .resetvalue = 0 },
+    /* MDCCSR_EL0, aka DBGDSCRint. This is a read-only mirror of MDSCR_EL1.
+     * We don't implement the configurable EL0 access.
+     */
+    { .name = "MDCCSR_EL0", .state = ARM_CP_STATE_BOTH,
+      .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 0,
+      .type = ARM_CP_NO_MIGRATE,
+      .access = PL1_R,
+      .fieldoffset = offsetof(CPUARMState, cp15.mdscr_el1),
+      .resetfn = arm_cp_reset_ignore },
     /* We define a dummy WI OSLAR_EL1, because Linux writes to it. */
     { .name = "OSLAR_EL1", .state = ARM_CP_STATE_BOTH,
       .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 4,
       .access = PL1_W, .type = ARM_CP_NOP },
+    /* Dummy OSDLR_EL1: 32-bit Linux will read this */
+    { .name = "OSDLR_EL1", .state = ARM_CP_STATE_BOTH,
+      .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 3, .opc2 = 4,
+      .access = PL1_RW, .type = ARM_CP_NOP },
+    /* Dummy DBGVCR: Linux wants to clear this on startup, but we don't
+     * implement vector catch debug events yet.
+     */
+    { .name = "DBGVCR",
+      .cp = 14, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 0,
+      .access = PL1_RW, .type = ARM_CP_NOP },
     REGINFO_SENTINEL
 };
 
@@ -2279,20 +2367,149 @@ static const ARMCPRegInfo debug_lpae_cp_reginfo[] = {
     REGINFO_SENTINEL
 };
 
+void hw_watchpoint_update(ARMCPU *cpu, int n)
+{
+    CPUARMState *env = &cpu->env;
+    vaddr len = 0;
+    vaddr wvr = env->cp15.dbgwvr[n];
+    uint64_t wcr = env->cp15.dbgwcr[n];
+    int mask;
+    int flags = BP_CPU | BP_STOP_BEFORE_ACCESS;
+
+    if (env->cpu_watchpoint[n]) {
+        cpu_watchpoint_remove_by_ref(CPU(cpu), env->cpu_watchpoint[n]);
+        env->cpu_watchpoint[n] = NULL;
+    }
+
+    if (!extract64(wcr, 0, 1)) {
+        /* E bit clear : watchpoint disabled */
+        return;
+    }
+
+    switch (extract64(wcr, 3, 2)) {
+    case 0:
+        /* LSC 00 is reserved and must behave as if the wp is disabled */
+        return;
+    case 1:
+        flags |= BP_MEM_READ;
+        break;
+    case 2:
+        flags |= BP_MEM_WRITE;
+        break;
+    case 3:
+        flags |= BP_MEM_ACCESS;
+        break;
+    }
+
+    /* Attempts to use both MASK and BAS fields simultaneously are
+     * CONSTRAINED UNPREDICTABLE; we opt to ignore BAS in this case,
+     * thus generating a watchpoint for every byte in the masked region.
+     */
+    mask = extract64(wcr, 24, 4);
+    if (mask == 1 || mask == 2) {
+        /* Reserved values of MASK; we must act as if the mask value was
+         * some non-reserved value, or as if the watchpoint were disabled.
+         * We choose the latter.
+         */
+        return;
+    } else if (mask) {
+        /* Watchpoint covers an aligned area up to 2GB in size */
+        len = 1ULL << mask;
+        /* If masked bits in WVR are not zero it's CONSTRAINED UNPREDICTABLE
+         * whether the watchpoint fires when the unmasked bits match; we opt
+         * to generate the exceptions.
+         */
+        wvr &= ~(len - 1);
+    } else {
+        /* Watchpoint covers bytes defined by the byte address select bits */
+        int bas = extract64(wcr, 5, 8);
+        int basstart;
+
+        if (bas == 0) {
+            /* This must act as if the watchpoint is disabled */
+            return;
+        }
+
+        if (extract64(wvr, 2, 1)) {
+            /* Deprecated case of an only 4-aligned address. BAS[7:4] are
+             * ignored, and BAS[3:0] define which bytes to watch.
+             */
+            bas &= 0xf;
+        }
+        /* The BAS bits are supposed to be programmed to indicate a contiguous
+         * range of bytes. Otherwise it is CONSTRAINED UNPREDICTABLE whether
+         * we fire for each byte in the word/doubleword addressed by the WVR.
+         * We choose to ignore any non-zero bits after the first range of 1s.
+         */
+        basstart = ctz32(bas);
+        len = cto32(bas >> basstart);
+        wvr += basstart;
+    }
+
+    cpu_watchpoint_insert(CPU(cpu), wvr, len, flags,
+                          &env->cpu_watchpoint[n]);
+}
+
+void hw_watchpoint_update_all(ARMCPU *cpu)
+{
+    int i;
+    CPUARMState *env = &cpu->env;
+
+    /* Completely clear out existing QEMU watchpoints and our array, to
+     * avoid possible stale entries following migration load.
+     */
+    cpu_watchpoint_remove_all(CPU(cpu), BP_CPU);
+    memset(env->cpu_watchpoint, 0, sizeof(env->cpu_watchpoint));
+
+    for (i = 0; i < ARRAY_SIZE(cpu->env.cpu_watchpoint); i++) {
+        hw_watchpoint_update(cpu, i);
+    }
+}
+
+static void dbgwvr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                         uint64_t value)
+{
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    int i = ri->crm;
+
+    /* Bits [63:49] are hardwired to the value of bit [48]; that is, the
+     * register reads and behaves as if values written are sign extended.
+     * Bits [1:0] are RES0.
+     */
+    value = sextract64(value, 0, 49) & ~3ULL;
+
+    raw_write(env, ri, value);
+    hw_watchpoint_update(cpu, i);
+}
+
+static void dbgwcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                         uint64_t value)
+{
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    int i = ri->crm;
+
+    raw_write(env, ri, value);
+    hw_watchpoint_update(cpu, i);
+}
+
 static void define_debug_regs(ARMCPU *cpu)
 {
     /* Define v7 and v8 architectural debug registers.
      * These are just dummy implementations for now.
      */
     int i;
-    int wrps, brps;
+    int wrps, brps, ctx_cmps;
     ARMCPRegInfo dbgdidr = {
         .name = "DBGDIDR", .cp = 14, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 0,
         .access = PL0_R, .type = ARM_CP_CONST, .resetvalue = cpu->dbgdidr,
     };
 
+    /* Note that all these register fields hold "number of Xs minus 1". */
     brps = extract32(cpu->dbgdidr, 24, 4);
     wrps = extract32(cpu->dbgdidr, 28, 4);
+    ctx_cmps = extract32(cpu->dbgdidr, 20, 4);
+
+    assert(ctx_cmps <= brps);
 
     /* The DBGDIDR and ID_AA64DFR0_EL1 define various properties
      * of the debug registers such as number of breakpoints;
@@ -2301,6 +2518,7 @@ static void define_debug_regs(ARMCPU *cpu)
     if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
         assert(extract32(cpu->id_aa64dfr0, 12, 4) == brps);
         assert(extract32(cpu->id_aa64dfr0, 20, 4) == wrps);
+        assert(extract32(cpu->id_aa64dfr0, 28, 4) == ctx_cmps);
     }
 
     define_one_arm_cp_reg(cpu, &dbgdidr);
@@ -2330,12 +2548,16 @@ static void define_debug_regs(ARMCPU *cpu)
             { .name = "DBGWVR", .state = ARM_CP_STATE_BOTH,
               .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 6,
               .access = PL1_RW,
-              .fieldoffset = offsetof(CPUARMState, cp15.dbgwvr[i]) },
+              .fieldoffset = offsetof(CPUARMState, cp15.dbgwvr[i]),
+              .writefn = dbgwvr_write, .raw_writefn = raw_write
+            },
             { .name = "DBGWCR", .state = ARM_CP_STATE_BOTH,
               .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 7,
               .access = PL1_RW,
-              .fieldoffset = offsetof(CPUARMState, cp15.dbgwcr[i]) },
-               REGINFO_SENTINEL
+              .fieldoffset = offsetof(CPUARMState, cp15.dbgwcr[i]),
+              .writefn = dbgwcr_write, .raw_writefn = raw_write
+            },
+            REGINFO_SENTINEL
         };
         define_arm_cp_regs(cpu, dbgregs);
     }
@@ -2434,6 +2656,9 @@ void register_cp_regs_for_features(ARMCPU *cpu)
     if (arm_feature(env, ARM_FEATURE_V6K)) {
         define_arm_cp_regs(cpu, v6k_cp_reginfo);
     }
+    if (arm_feature(env, ARM_FEATURE_V7MP)) {
+        define_arm_cp_regs(cpu, v7mp_cp_reginfo);
+    }
     if (arm_feature(env, ARM_FEATURE_V7)) {
         /* v7 performance monitor control register: same implementor
          * field as main ID register, and we implement only the cycle
@@ -3506,11 +3731,37 @@ void arm_cpu_do_interrupt(CPUState *cs)
     uint32_t mask;
     int new_mode;
     uint32_t offset;
+    uint32_t moe;
 
     assert(!IS_M(env));
 
     arm_log_exception(cs->exception_index);
 
+    /* If this is a debug exception we must update the DBGDSCR.MOE bits */
+    switch (env->exception.syndrome >> ARM_EL_EC_SHIFT) {
+    case EC_BREAKPOINT:
+    case EC_BREAKPOINT_SAME_EL:
+        moe = 1;
+        break;
+    case EC_WATCHPOINT:
+    case EC_WATCHPOINT_SAME_EL:
+        moe = 10;
+        break;
+    case EC_AA32_BKPT:
+        moe = 3;
+        break;
+    case EC_VECTORCATCH:
+        moe = 5;
+        break;
+    default:
+        moe = 0;
+        break;
+    }
+
+    if (moe) {
+        env->cp15.mdscr_el1 = deposit64(env->cp15.mdscr_el1, 2, 4, moe);
+    }
+
     /* TODO: Vectored interrupt controller.  */
     switch (cs->exception_index) {
     case EXCP_UDEF:
diff --git a/target-arm/internals.h b/target-arm/internals.h
index 53c2e3cf3e..64751a0798 100644
--- a/target-arm/internals.h
+++ b/target-arm/internals.h
@@ -142,6 +142,17 @@ static inline void update_spsel(CPUARMState *env, uint32_t imm)
     aarch64_restore_sp(env, cur_el);
 }
 
+/* Return true if extended addresses are enabled.
+ * This is always the case if our translation regime is 64 bit,
+ * but depends on TTBCR.EAE for 32 bit.
+ */
+static inline bool extended_addresses_enabled(CPUARMState *env)
+{
+    return arm_el_is_aa64(env, 1)
+        || ((arm_feature(env, ARM_FEATURE_LPAE)
+             && (env->cp15.c2_control & TTBCR_EAE)));
+}
+
 /* Valid Syndrome Register EC field values */
 enum arm_exception_class {
     EC_UNCATEGORIZED          = 0x00,
@@ -296,4 +307,23 @@ static inline uint32_t syn_swstep(int same_el, int isv, int ex)
         | (isv << 24) | (ex << 6) | 0x22;
 }
 
+static inline uint32_t syn_watchpoint(int same_el, int cm, int wnr)
+{
+    return (EC_WATCHPOINT << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT)
+        | (cm << 8) | (wnr << 6) | 0x22;
+}
+
+/* Update a QEMU watchpoint based on the information the guest has set in the
+ * DBGWCR<n>_EL1 and DBGWVR<n>_EL1 registers.
+ */
+void hw_watchpoint_update(ARMCPU *cpu, int n);
+/* Update the QEMU watchpoints for every guest watchpoint. This does a
+ * complete delete-and-reinstate of the QEMU watchpoint list and so is
+ * suitable for use after migration or on reset.
+ */
+void hw_watchpoint_update_all(ARMCPU *cpu);
+
+/* Callback function for when a watchpoint or breakpoint triggers. */
+void arm_debug_excp_handler(CPUState *cs);
+
 #endif
diff --git a/target-arm/machine.c b/target-arm/machine.c
index 3bcc7cc833..8dfe87cb6b 100644
--- a/target-arm/machine.c
+++ b/target-arm/machine.c
@@ -2,6 +2,7 @@
 #include "hw/boards.h"
 #include "sysemu/kvm.h"
 #include "kvm_arm.h"
+#include "internals.h"
 
 static bool vfp_needed(void *opaque)
 {
@@ -213,6 +214,8 @@ static int cpu_post_load(void *opaque, int version_id)
         }
     }
 
+    hw_watchpoint_update_all(cpu);
+
     return 0;
 }
 
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index fe40358c96..b956216c4b 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -456,6 +456,194 @@ illegal_return:
     }
 }
 
+/* Return true if the linked breakpoint entry lbn passes its checks */
+static bool linked_bp_matches(ARMCPU *cpu, int lbn)
+{
+    CPUARMState *env = &cpu->env;
+    uint64_t bcr = env->cp15.dbgbcr[lbn];
+    int brps = extract32(cpu->dbgdidr, 24, 4);
+    int ctx_cmps = extract32(cpu->dbgdidr, 20, 4);
+    int bt;
+    uint32_t contextidr;
+
+    /* Links to unimplemented or non-context aware breakpoints are
+     * CONSTRAINED UNPREDICTABLE: either behave as if disabled, or
+     * as if linked to an UNKNOWN context-aware breakpoint (in which
+     * case DBGWCR<n>_EL1.LBN must indicate that breakpoint).
+     * We choose the former.
+     */
+    if (lbn > brps || lbn < (brps - ctx_cmps)) {
+        return false;
+    }
+
+    bcr = env->cp15.dbgbcr[lbn];
+
+    if (extract64(bcr, 0, 1) == 0) {
+        /* Linked breakpoint disabled : generate no events */
+        return false;
+    }
+
+    bt = extract64(bcr, 20, 4);
+
+    /* We match the whole register even if this is AArch32 using the
+     * short descriptor format (in which case it holds both PROCID and ASID),
+     * since we don't implement the optional v7 context ID masking.
+     */
+    contextidr = extract64(env->cp15.contextidr_el1, 0, 32);
+
+    switch (bt) {
+    case 3: /* linked context ID match */
+        if (arm_current_pl(env) > 1) {
+            /* Context matches never fire in EL2 or (AArch64) EL3 */
+            return false;
+        }
+        return (contextidr == extract64(env->cp15.dbgbvr[lbn], 0, 32));
+    case 5: /* linked address mismatch (reserved in AArch64) */
+    case 9: /* linked VMID match (reserved if no EL2) */
+    case 11: /* linked context ID and VMID match (reserved if no EL2) */
+    default:
+        /* Links to Unlinked context breakpoints must generate no
+         * events; we choose to do the same for reserved values too.
+         */
+        return false;
+    }
+
+    return false;
+}
+
+static bool wp_matches(ARMCPU *cpu, int n)
+{
+    CPUARMState *env = &cpu->env;
+    uint64_t wcr = env->cp15.dbgwcr[n];
+    int pac, hmc, ssc, wt, lbn;
+    /* TODO: check against CPU security state when we implement TrustZone */
+    bool is_secure = false;
+
+    if (!env->cpu_watchpoint[n]
+        || !(env->cpu_watchpoint[n]->flags & BP_WATCHPOINT_HIT)) {
+        return false;
+    }
+
+    /* The WATCHPOINT_HIT flag guarantees us that the watchpoint is
+     * enabled and that the address and access type match; check the
+     * remaining fields, including linked breakpoints.
+     * Note that some combinations of {PAC, HMC SSC} are reserved and
+     * must act either like some valid combination or as if the watchpoint
+     * were disabled. We choose the former, and use this together with
+     * the fact that EL3 must always be Secure and EL2 must always be
+     * Non-Secure to simplify the code slightly compared to the full
+     * table in the ARM ARM.
+     */
+    pac = extract64(wcr, 1, 2);
+    hmc = extract64(wcr, 13, 1);
+    ssc = extract64(wcr, 14, 2);
+
+    switch (ssc) {
+    case 0:
+        break;
+    case 1:
+    case 3:
+        if (is_secure) {
+            return false;
+        }
+        break;
+    case 2:
+        if (!is_secure) {
+            return false;
+        }
+        break;
+    }
+
+    /* TODO: this is not strictly correct because the LDRT/STRT/LDT/STT
+     * "unprivileged access" instructions should match watchpoints as if
+     * they were accesses done at EL0, even if the CPU is at EL1 or higher.
+     * Implementing this would require reworking the core watchpoint code
+     * to plumb the mmu_idx through to this point. Luckily Linux does not
+     * rely on this behaviour currently.
+     */
+    switch (arm_current_pl(env)) {
+    case 3:
+    case 2:
+        if (!hmc) {
+            return false;
+        }
+        break;
+    case 1:
+        if (extract32(pac, 0, 1) == 0) {
+            return false;
+        }
+        break;
+    case 0:
+        if (extract32(pac, 1, 1) == 0) {
+            return false;
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    wt = extract64(wcr, 20, 1);
+    lbn = extract64(wcr, 16, 4);
+
+    if (wt && !linked_bp_matches(cpu, lbn)) {
+        return false;
+    }
+
+    return true;
+}
+
+static bool check_watchpoints(ARMCPU *cpu)
+{
+    CPUARMState *env = &cpu->env;
+    int n;
+
+    /* If watchpoints are disabled globally or we can't take debug
+     * exceptions here then watchpoint firings are ignored.
+     */
+    if (extract32(env->cp15.mdscr_el1, 15, 1) == 0
+        || !arm_generate_debug_exceptions(env)) {
+        return false;
+    }
+
+    for (n = 0; n < ARRAY_SIZE(env->cpu_watchpoint); n++) {
+        if (wp_matches(cpu, n)) {
+            return true;
+        }
+    }
+    return false;
+}
+
+void arm_debug_excp_handler(CPUState *cs)
+{
+    /* Called by core code when a watchpoint or breakpoint fires;
+     * need to check which one and raise the appropriate exception.
+     */
+    ARMCPU *cpu = ARM_CPU(cs);
+    CPUARMState *env = &cpu->env;
+    CPUWatchpoint *wp_hit = cs->watchpoint_hit;
+
+    if (wp_hit) {
+        if (wp_hit->flags & BP_CPU) {
+            cs->watchpoint_hit = NULL;
+            if (check_watchpoints(cpu)) {
+                bool wnr = (wp_hit->flags & BP_WATCHPOINT_HIT_WRITE) != 0;
+                bool same_el = arm_debug_target_el(env) == arm_current_pl(env);
+
+                env->exception.syndrome = syn_watchpoint(same_el, 0, wnr);
+                if (extended_addresses_enabled(env)) {
+                    env->exception.fsr = (1 << 9) | 0x22;
+                } else {
+                    env->exception.fsr = 0x2;
+                }
+                env->exception.vaddress = wp_hit->hitaddr;
+                raise_exception(env, EXCP_DATA_ABORT);
+            } else {
+                cpu_resume_from_signal(cs, NULL);
+            }
+        }
+    }
+}
+
 /* ??? Flag setting arithmetic is awkward because we need to do comparisons.
    The only way to do that in TCG is a conditional branch, which clobbers
    all our temporaries.  For now implement these as helper functions.  */
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 88b64d8b66..90d0a05eb1 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -2843,9 +2843,6 @@ static void x86_cpu_initfn(Object *obj)
     if (tcg_enabled() && !inited) {
         inited = 1;
         optimize_flags_init();
-#ifndef CONFIG_USER_ONLY
-        cpu_set_debug_excp_handler(breakpoint_handler);
-#endif
     }
 }
 
@@ -2942,6 +2939,9 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data)
     cc->vmsd = &vmstate_x86_cpu;
 #endif
     cc->gdb_num_core_regs = CPU_NB_REGS * 2 + 25;
+#ifndef CONFIG_USER_ONLY
+    cc->debug_excp_handler = breakpoint_handler;
+#endif
 }
 
 static const TypeInfo x86_cpu_type_info = {
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 3460b12139..71b505f56c 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -1121,7 +1121,7 @@ static inline int hw_breakpoint_len(unsigned long dr7, int index)
 void hw_breakpoint_insert(CPUX86State *env, int index);
 void hw_breakpoint_remove(CPUX86State *env, int index);
 bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update);
-void breakpoint_handler(CPUX86State *env);
+void breakpoint_handler(CPUState *cs);
 
 /* will be suppressed */
 void cpu_x86_update_cr0(CPUX86State *env, uint32_t new_cr0);
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 30cb0d0143..28fefe0a1f 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -1011,9 +1011,10 @@ bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update)
     return hit_enabled;
 }
 
-void breakpoint_handler(CPUX86State *env)
+void breakpoint_handler(CPUState *cs)
 {
-    CPUState *cs = CPU(x86_env_get_cpu(env));
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
     CPUBreakpoint *bp;
 
     if (cs->watchpoint_hit) {
diff --git a/target-lm32/cpu.c b/target-lm32/cpu.c
index c5c20d74c4..419d664845 100644
--- a/target-lm32/cpu.c
+++ b/target-lm32/cpu.c
@@ -158,7 +158,6 @@ static void lm32_cpu_initfn(Object *obj)
     if (tcg_enabled() && !tcg_initialized) {
         tcg_initialized = true;
         lm32_translate_init();
-        cpu_set_debug_excp_handler(lm32_debug_excp_handler);
     }
 }
 
@@ -273,6 +272,7 @@ static void lm32_cpu_class_init(ObjectClass *oc, void *data)
     cc->vmsd = &vmstate_lm32_cpu;
 #endif
     cc->gdb_num_core_regs = 32 + 7;
+    cc->debug_excp_handler = lm32_debug_excp_handler;
 }
 
 static void lm32_register_cpu_type(const LM32CPUInfo *info)
diff --git a/target-lm32/cpu.h b/target-lm32/cpu.h
index 70600aa47a..0dab6e89ab 100644
--- a/target-lm32/cpu.h
+++ b/target-lm32/cpu.h
@@ -211,7 +211,7 @@ void lm32_cpu_list(FILE *f, fprintf_function cpu_fprintf);
 void lm32_translate_init(void);
 void cpu_lm32_set_phys_msb_ignore(CPULM32State *env, int value);
 void QEMU_NORETURN raise_exception(CPULM32State *env, int index);
-void lm32_debug_excp_handler(CPULM32State *env);
+void lm32_debug_excp_handler(CPUState *cs);
 void lm32_breakpoint_insert(CPULM32State *env, int index, target_ulong address);
 void lm32_breakpoint_remove(CPULM32State *env, int index);
 void lm32_watchpoint_insert(CPULM32State *env, int index, target_ulong address,
diff --git a/target-lm32/helper.c b/target-lm32/helper.c
index 1bca1961af..ad724aecbc 100644
--- a/target-lm32/helper.c
+++ b/target-lm32/helper.c
@@ -125,9 +125,10 @@ static bool check_watchpoints(CPULM32State *env)
     return false;
 }
 
-void lm32_debug_excp_handler(CPULM32State *env)
+void lm32_debug_excp_handler(CPUState *cs)
 {
-    CPUState *cs = CPU(lm32_env_get_cpu(env));
+    LM32CPU *cpu = LM32_CPU(cs);
+    CPULM32State *env = &cpu->env;
     CPUBreakpoint *bp;
 
     if (cs->watchpoint_hit) {
diff --git a/target-xtensa/cpu.c b/target-xtensa/cpu.c
index 9d8801b70e..936d526d41 100644
--- a/target-xtensa/cpu.c
+++ b/target-xtensa/cpu.c
@@ -119,7 +119,6 @@ static void xtensa_cpu_initfn(Object *obj)
     if (tcg_enabled() && !tcg_inited) {
         tcg_inited = true;
         xtensa_translate_init();
-        cpu_set_debug_excp_handler(xtensa_breakpoint_handler);
     }
 }
 
@@ -151,6 +150,7 @@ static void xtensa_cpu_class_init(ObjectClass *oc, void *data)
     cc->do_unaligned_access = xtensa_cpu_do_unaligned_access;
     cc->get_phys_page_debug = xtensa_cpu_get_phys_page_debug;
 #endif
+    cc->debug_excp_handler = xtensa_breakpoint_handler;
     dc->vmsd = &vmstate_xtensa_cpu;
 }
 
diff --git a/target-xtensa/cpu.h b/target-xtensa/cpu.h
index d797d2649a..9cf52758c7 100644
--- a/target-xtensa/cpu.h
+++ b/target-xtensa/cpu.h
@@ -390,7 +390,7 @@ static inline CPUXtensaState *cpu_init(const char *cpu_model)
 }
 
 void xtensa_translate_init(void);
-void xtensa_breakpoint_handler(CPUXtensaState *env);
+void xtensa_breakpoint_handler(CPUState *cs);
 int cpu_xtensa_exec(CPUXtensaState *s);
 void xtensa_register_core(XtensaConfigList *node);
 void check_interrupts(CPUXtensaState *s);
diff --git a/target-xtensa/helper.c b/target-xtensa/helper.c
index 94dcd9442e..6671e40289 100644
--- a/target-xtensa/helper.c
+++ b/target-xtensa/helper.c
@@ -79,9 +79,10 @@ static uint32_t check_hw_breakpoints(CPUXtensaState *env)
     return 0;
 }
 
-void xtensa_breakpoint_handler(CPUXtensaState *env)
+void xtensa_breakpoint_handler(CPUState *cs)
 {
-    CPUState *cs = CPU(xtensa_env_get_cpu(env));
+    XtensaCPU *cpu = XTENSA_CPU(cs);
+    CPUXtensaState *env = &cpu->env;
 
     if (cs->watchpoint_hit) {
         if (cs->watchpoint_hit->flags & BP_CPU) {