summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rwxr-xr-xconfigure37
-rw-r--r--hw/arm/armv7m.c8
-rw-r--r--hw/arm/boot.c7
-rw-r--r--hw/arm/stellaris.c3
-rw-r--r--hw/arm/virt.c31
-rw-r--r--hw/core/loader.c48
-rw-r--r--include/exec/cpu-all.h2
-rw-r--r--include/hw/arm/arm.h2
-rw-r--r--include/hw/loader.h1
-rw-r--r--linux-user/aarch64/syscall.h3
-rw-r--r--linux-user/alpha/syscall.h3
-rw-r--r--linux-user/arm/syscall.h4
-rw-r--r--linux-user/cris/syscall.h3
-rw-r--r--linux-user/elfload.c9
-rw-r--r--linux-user/i386/syscall.h3
-rw-r--r--linux-user/m68k/syscall.h4
-rw-r--r--linux-user/main.c1
-rw-r--r--linux-user/microblaze/syscall.h3
-rw-r--r--linux-user/mips/syscall.h3
-rw-r--r--linux-user/mips64/syscall.h3
-rw-r--r--linux-user/openrisc/syscall.h4
-rw-r--r--linux-user/ppc/syscall.h4
-rw-r--r--linux-user/ppc/target_cpu.h10
-rw-r--r--linux-user/s390x/syscall.h3
-rw-r--r--linux-user/sh4/syscall.h4
-rw-r--r--linux-user/signal.c12
-rw-r--r--linux-user/sparc/syscall.h3
-rw-r--r--linux-user/sparc64/syscall.h3
-rw-r--r--linux-user/strace.list12
-rw-r--r--linux-user/syscall.c279
-rw-r--r--linux-user/unicore32/syscall.h4
-rw-r--r--linux-user/x86_64/syscall.h3
-rw-r--r--qemu-seccomp.c3
-rw-r--r--target-arm/cpu-qom.h1
-rw-r--r--target-arm/cpu.c7
-rw-r--r--target-arm/cpu.h115
-rw-r--r--target-arm/cpu64.c1
-rw-r--r--target-arm/helper.c145
-rw-r--r--target-arm/helper.h1
-rw-r--r--target-arm/internals.h6
-rw-r--r--target-arm/kvm-consts.h49
-rw-r--r--target-arm/op_helper.c27
-rw-r--r--target-arm/translate-a64.c98
-rw-r--r--target-arm/translate.c89
-rw-r--r--target-arm/translate.h12
-rw-r--r--target-i386/seg_helper.c4
46 files changed, 922 insertions, 155 deletions
diff --git a/configure b/configure
index 283c71cb7a..c4e47e1ea1 100755
--- a/configure
+++ b/configure
@@ -3456,6 +3456,37 @@ if compile_prog "" "" ; then
   sendfile=yes
 fi
 
+# check for timerfd support (glibc 2.8 and newer)
+timerfd=no
+cat > $TMPC << EOF
+#include <sys/timerfd.h>
+
+int main(void)
+{
+    return(timerfd_create(CLOCK_REALTIME, 0));
+}
+EOF
+if compile_prog "" "" ; then
+  timerfd=yes
+fi
+
+# check for setns and unshare support
+setns=no
+cat > $TMPC << EOF
+#include <sched.h>
+
+int main(void)
+{
+    int ret;
+    ret = setns(0, 0);
+    ret = unshare(0);
+    return ret;
+}
+EOF
+if compile_prog "" "" ; then
+  setns=yes
+fi
+
 # Check if tools are available to build documentation.
 if test "$docs" != "no" ; then
   if has makeinfo && has pod2man; then
@@ -4524,6 +4555,12 @@ fi
 if test "$sendfile" = "yes" ; then
   echo "CONFIG_SENDFILE=y" >> $config_host_mak
 fi
+if test "$timerfd" = "yes" ; then
+  echo "CONFIG_TIMERFD=y" >> $config_host_mak
+fi
+if test "$setns" = "yes" ; then
+  echo "CONFIG_SETNS=y" >> $config_host_mak
+fi
 if test "$inotify" = "yes" ; then
   echo "CONFIG_INOTIFY=y" >> $config_host_mak
 fi
diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c
index 397e8dfb37..aedef13002 100644
--- a/hw/arm/armv7m.c
+++ b/hw/arm/armv7m.c
@@ -166,7 +166,7 @@ static void armv7m_reset(void *opaque)
    flash_size and sram_size are in kb.
    Returns the NVIC array.  */
 
-qemu_irq *armv7m_init(MemoryRegion *address_space_mem,
+qemu_irq *armv7m_init(MemoryRegion *system_memory,
                       int flash_size, int sram_size,
                       const char *kernel_filename, const char *cpu_model)
 {
@@ -213,10 +213,10 @@ qemu_irq *armv7m_init(MemoryRegion *address_space_mem,
     memory_region_init_ram(flash, NULL, "armv7m.flash", flash_size);
     vmstate_register_ram_global(flash);
     memory_region_set_readonly(flash, true);
-    memory_region_add_subregion(address_space_mem, 0, flash);
+    memory_region_add_subregion(system_memory, 0, flash);
     memory_region_init_ram(sram, NULL, "armv7m.sram", sram_size);
     vmstate_register_ram_global(sram);
-    memory_region_add_subregion(address_space_mem, 0x20000000, sram);
+    memory_region_add_subregion(system_memory, 0x20000000, sram);
     armv7m_bitband_init();
 
     nvic = qdev_create(NULL, "armv7m_nvic");
@@ -257,7 +257,7 @@ qemu_irq *armv7m_init(MemoryRegion *address_space_mem,
        when returning from an exception.  */
     memory_region_init_ram(hack, NULL, "armv7m.hack", 0x1000);
     vmstate_register_ram_global(hack);
-    memory_region_add_subregion(address_space_mem, 0xfffff000, hack);
+    memory_region_add_subregion(system_memory, 0xfffff000, hack);
 
     qemu_register_reset(armv7m_reset, cpu);
     return pic;
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index 12417617a3..e32f2f4158 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -514,6 +514,13 @@ void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info)
         kernel_size = load_uimage(info->kernel_filename, &entry, NULL,
                                   &is_linux);
     }
+    /* On aarch64, it's the bootloader's job to uncompress the kernel. */
+    if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64) && kernel_size < 0) {
+        entry = info->loader_start + kernel_load_offset;
+        kernel_size = load_image_gzipped(info->kernel_filename, entry,
+                                         info->ram_size - kernel_load_offset);
+        is_linux = 1;
+    }
     if (kernel_size < 0) {
         entry = info->loader_start + kernel_load_offset;
         kernel_size = load_image_targphys(info->kernel_filename, entry,
diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c
index 80028e80cf..64bd4b4c4b 100644
--- a/hw/arm/stellaris.c
+++ b/hw/arm/stellaris.c
@@ -1208,7 +1208,6 @@ static void stellaris_init(const char *kernel_filename, const char *cpu_model,
         0x40024000, 0x40025000, 0x40026000};
     static const int gpio_irq[7] = {0, 1, 2, 3, 4, 30, 31};
 
-    MemoryRegion *address_space_mem = get_system_memory();
     qemu_irq *pic;
     DeviceState *gpio_dev[7];
     qemu_irq gpio_in[7][8];
@@ -1223,7 +1222,7 @@ static void stellaris_init(const char *kernel_filename, const char *cpu_model,
 
     flash_size = ((board->dc0 & 0xffff) + 1) << 1;
     sram_size = (board->dc0 >> 18) + 1;
-    pic = armv7m_init(address_space_mem,
+    pic = armv7m_init(get_system_memory(),
                       flash_size, sram_size, kernel_filename, cpu_model);
 
     if (board->dc1 & (1 << 16)) {
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index ba94298555..bd206a019a 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -194,20 +194,41 @@ static void fdt_add_psci_node(const VirtBoardInfo *vbi)
 
     /* No PSCI for TCG yet */
     if (kvm_enabled()) {
+        uint32_t cpu_suspend_fn;
+        uint32_t cpu_off_fn;
+        uint32_t cpu_on_fn;
+        uint32_t migrate_fn;
+
         qemu_fdt_add_subnode(fdt, "/psci");
         if (armcpu->psci_version == 2) {
             const char comp[] = "arm,psci-0.2\0arm,psci";
             qemu_fdt_setprop(fdt, "/psci", "compatible", comp, sizeof(comp));
+
+            cpu_off_fn = QEMU_PSCI_0_2_FN_CPU_OFF;
+            if (arm_feature(&armcpu->env, ARM_FEATURE_AARCH64)) {
+                cpu_suspend_fn = QEMU_PSCI_0_2_FN64_CPU_SUSPEND;
+                cpu_on_fn = QEMU_PSCI_0_2_FN64_CPU_ON;
+                migrate_fn = QEMU_PSCI_0_2_FN64_MIGRATE;
+            } else {
+                cpu_suspend_fn = QEMU_PSCI_0_2_FN_CPU_SUSPEND;
+                cpu_on_fn = QEMU_PSCI_0_2_FN_CPU_ON;
+                migrate_fn = QEMU_PSCI_0_2_FN_MIGRATE;
+            }
         } else {
             qemu_fdt_setprop_string(fdt, "/psci", "compatible", "arm,psci");
+
+            cpu_suspend_fn = QEMU_PSCI_0_1_FN_CPU_SUSPEND;
+            cpu_off_fn = QEMU_PSCI_0_1_FN_CPU_OFF;
+            cpu_on_fn = QEMU_PSCI_0_1_FN_CPU_ON;
+            migrate_fn = QEMU_PSCI_0_1_FN_MIGRATE;
         }
 
         qemu_fdt_setprop_string(fdt, "/psci", "method", "hvc");
-        qemu_fdt_setprop_cell(fdt, "/psci", "cpu_suspend",
-                                  PSCI_FN_CPU_SUSPEND);
-        qemu_fdt_setprop_cell(fdt, "/psci", "cpu_off", PSCI_FN_CPU_OFF);
-        qemu_fdt_setprop_cell(fdt, "/psci", "cpu_on", PSCI_FN_CPU_ON);
-        qemu_fdt_setprop_cell(fdt, "/psci", "migrate", PSCI_FN_MIGRATE);
+
+        qemu_fdt_setprop_cell(fdt, "/psci", "cpu_suspend", cpu_suspend_fn);
+        qemu_fdt_setprop_cell(fdt, "/psci", "cpu_off", cpu_off_fn);
+        qemu_fdt_setprop_cell(fdt, "/psci", "cpu_on", cpu_on_fn);
+        qemu_fdt_setprop_cell(fdt, "/psci", "migrate", migrate_fn);
     }
 }
 
diff --git a/hw/core/loader.c b/hw/core/loader.c
index 1a53f0fd90..193f0f8400 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -577,6 +577,54 @@ int load_ramdisk(const char *filename, hwaddr addr, uint64_t max_sz)
     return load_uboot_image(filename, NULL, &addr, NULL, IH_TYPE_RAMDISK);
 }
 
+/* This simply prevents g_malloc in the function below from allocating
+ * a huge amount of memory, by placing a limit on the maximum
+ * uncompressed image size that load_image_gzipped will read.
+ */
+#define LOAD_IMAGE_MAX_GUNZIP_BYTES (256 << 20)
+
+/* Load a gzip-compressed kernel. */
+int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz)
+{
+    uint8_t *compressed_data = NULL;
+    uint8_t *data = NULL;
+    gsize len;
+    ssize_t bytes;
+    int ret = -1;
+
+    if (!g_file_get_contents(filename, (char **) &compressed_data, &len,
+                             NULL)) {
+        goto out;
+    }
+
+    /* Is it a gzip-compressed file? */
+    if (len < 2 ||
+        compressed_data[0] != 0x1f ||
+        compressed_data[1] != 0x8b) {
+        goto out;
+    }
+
+    if (max_sz > LOAD_IMAGE_MAX_GUNZIP_BYTES) {
+        max_sz = LOAD_IMAGE_MAX_GUNZIP_BYTES;
+    }
+
+    data = g_malloc(max_sz);
+    bytes = gunzip(data, max_sz, compressed_data, len);
+    if (bytes < 0) {
+        fprintf(stderr, "%s: unable to decompress gzipped kernel file\n",
+                filename);
+        goto out;
+    }
+
+    rom_add_blob_fixed(filename, data, bytes, addr);
+    ret = bytes;
+
+ out:
+    g_free(compressed_data);
+    g_free(data);
+    return ret;
+}
+
 /*
  * Functions for reboot-persistent memory regions.
  *  - used for vga bios and option roms.
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index f91581fc65..f9d132fc0b 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -198,6 +198,8 @@ extern unsigned long reserved_va;
 #define RESERVED_VA 0ul
 #endif
 
+#define GUEST_ADDR_MAX (RESERVED_VA ? RESERVED_VA : \
+                                    (1ul << TARGET_VIRT_ADDR_SPACE_BITS) - 1)
 #endif
 
 /* page related stuff */
diff --git a/include/hw/arm/arm.h b/include/hw/arm/arm.h
index cbbf4ca4cb..cefc9e6988 100644
--- a/include/hw/arm/arm.h
+++ b/include/hw/arm/arm.h
@@ -15,7 +15,7 @@
 #include "hw/irq.h"
 
 /* armv7m.c */
-qemu_irq *armv7m_init(MemoryRegion *address_space_mem,
+qemu_irq *armv7m_init(MemoryRegion *system_memory,
                       int flash_size, int sram_size,
                       const char *kernel_filename, const char *cpu_model);
 
diff --git a/include/hw/loader.h b/include/hw/loader.h
index 796cbf9b39..00c9117ee0 100644
--- a/include/hw/loader.h
+++ b/include/hw/loader.h
@@ -15,6 +15,7 @@ int get_image_size(const char *filename);
 int load_image(const char *filename, uint8_t *addr); /* deprecated */
 int load_image_targphys(const char *filename, hwaddr,
                         uint64_t max_sz);
+int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz);
 
 #define ELF_LOAD_FAILED       -1
 #define ELF_LOAD_NOT_ELF      -2
diff --git a/linux-user/aarch64/syscall.h b/linux-user/aarch64/syscall.h
index 18f44a8a40..dc72a15c5e 100644
--- a/linux-user/aarch64/syscall.h
+++ b/linux-user/aarch64/syscall.h
@@ -8,3 +8,6 @@ struct target_pt_regs {
 #define UNAME_MACHINE "aarch64"
 #define UNAME_MINIMUM_RELEASE "3.8.0"
 #define TARGET_CLONE_BACKWARDS
+#define TARGET_MINSIGSTKSZ       2048
+#define TARGET_MLOCKALL_MCL_CURRENT 1
+#define TARGET_MLOCKALL_MCL_FUTURE  2
diff --git a/linux-user/alpha/syscall.h b/linux-user/alpha/syscall.h
index ed13d9a718..245cff2545 100644
--- a/linux-user/alpha/syscall.h
+++ b/linux-user/alpha/syscall.h
@@ -252,3 +252,6 @@ struct target_pt_regs {
 #define TARGET_UAC_NOPRINT		1
 #define TARGET_UAC_NOFIX		2
 #define TARGET_UAC_SIGBUS		4
+#define TARGET_MINSIGSTKSZ              4096
+#define TARGET_MLOCKALL_MCL_CURRENT     0x2000
+#define TARGET_MLOCKALL_MCL_FUTURE      0x4000
diff --git a/linux-user/arm/syscall.h b/linux-user/arm/syscall.h
index e0d2cc3e5d..3844a96112 100644
--- a/linux-user/arm/syscall.h
+++ b/linux-user/arm/syscall.h
@@ -44,3 +44,7 @@ struct target_pt_regs {
 #define UNAME_MINIMUM_RELEASE "2.6.32"
 
 #define TARGET_CLONE_BACKWARDS
+
+#define TARGET_MINSIGSTKSZ 2048
+#define TARGET_MLOCKALL_MCL_CURRENT 1
+#define TARGET_MLOCKALL_MCL_FUTURE  2
diff --git a/linux-user/cris/syscall.h b/linux-user/cris/syscall.h
index f5783c0557..2957b0d6ae 100644
--- a/linux-user/cris/syscall.h
+++ b/linux-user/cris/syscall.h
@@ -39,5 +39,8 @@ struct target_pt_regs {
 };
 
 #define TARGET_CLONE_BACKWARDS2
+#define TARGET_MINSIGSTKSZ 2048
+#define TARGET_MLOCKALL_MCL_CURRENT 1
+#define TARGET_MLOCKALL_MCL_FUTURE  2
 
 #endif
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 60777fecf6..bea803bd13 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -824,8 +824,6 @@ static uint32_t get_elf_hwcap2(void)
         NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC);        \
     } while (0)
 
-static inline uint32_t get_ppc64_abi(struct image_info *infop);
-
 static inline void init_thread(struct target_pt_regs *_regs, struct image_info *infop)
 {
     _regs->gpr[1] = infop->start_stack;
@@ -1205,13 +1203,6 @@ static inline void init_thread(struct target_pt_regs *regs, struct image_info *i
 
 #include "elf.h"
 
-#ifdef TARGET_PPC
-static inline uint32_t get_ppc64_abi(struct image_info *infop)
-{
-  return infop->elf_flags & EF_PPC64_ABI;
-}
-#endif
-
 struct exec
 {
     unsigned int a_info;   /* Use macros N_MAGIC, etc for access */
diff --git a/linux-user/i386/syscall.h b/linux-user/i386/syscall.h
index 9bfc1ad8f7..906aaac0b1 100644
--- a/linux-user/i386/syscall.h
+++ b/linux-user/i386/syscall.h
@@ -147,3 +147,6 @@ struct target_vm86plus_struct {
 #define UNAME_MINIMUM_RELEASE "2.6.32"
 
 #define TARGET_CLONE_BACKWARDS
+#define TARGET_MINSIGSTKSZ 2048
+#define TARGET_MLOCKALL_MCL_CURRENT 1
+#define TARGET_MLOCKALL_MCL_FUTURE  2
diff --git a/linux-user/m68k/syscall.h b/linux-user/m68k/syscall.h
index 889eaf7323..9218493a44 100644
--- a/linux-user/m68k/syscall.h
+++ b/linux-user/m68k/syscall.h
@@ -18,4 +18,8 @@ struct target_pt_regs {
 #define UNAME_MACHINE "m68k"
 #define UNAME_MINIMUM_RELEASE "2.6.32"
 
+#define TARGET_MINSIGSTKSZ 2048
+#define TARGET_MLOCKALL_MCL_CURRENT 1
+#define TARGET_MLOCKALL_MCL_FUTURE  2
+
 void do_m68k_simcall(CPUM68KState *, int);
diff --git a/linux-user/main.c b/linux-user/main.c
index b453a39853..472a16d2db 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -309,7 +309,6 @@ void cpu_loop(CPUX86State *env)
                                           env->regs[8],
                                           env->regs[9],
                                           0, 0);
-            env->eip = env->exception_next_eip;
             break;
 #endif
         case EXCP0B_NOSEG:
diff --git a/linux-user/microblaze/syscall.h b/linux-user/microblaze/syscall.h
index 5b5f6b447d..3c1ed27c04 100644
--- a/linux-user/microblaze/syscall.h
+++ b/linux-user/microblaze/syscall.h
@@ -49,5 +49,8 @@ struct target_pt_regs {
 };
 
 #define TARGET_CLONE_BACKWARDS
+#define TARGET_MINSIGSTKSZ      2048
+#define TARGET_MLOCKALL_MCL_CURRENT 1
+#define TARGET_MLOCKALL_MCL_FUTURE  2
 
 #endif
diff --git a/linux-user/mips/syscall.h b/linux-user/mips/syscall.h
index 5bc56962a4..35ca23b166 100644
--- a/linux-user/mips/syscall.h
+++ b/linux-user/mips/syscall.h
@@ -228,3 +228,6 @@ struct target_pt_regs {
 #define UNAME_MINIMUM_RELEASE "2.6.32"
 
 #define TARGET_CLONE_BACKWARDS
+#define TARGET_MINSIGSTKSZ 2048
+#define TARGET_MLOCKALL_MCL_CURRENT 1
+#define TARGET_MLOCKALL_MCL_FUTURE  2
diff --git a/linux-user/mips64/syscall.h b/linux-user/mips64/syscall.h
index a7f5a5802a..6733107ddb 100644
--- a/linux-user/mips64/syscall.h
+++ b/linux-user/mips64/syscall.h
@@ -225,3 +225,6 @@ struct target_pt_regs {
 #define UNAME_MINIMUM_RELEASE "2.6.32"
 
 #define TARGET_CLONE_BACKWARDS
+#define TARGET_MINSIGSTKSZ      2048
+#define TARGET_MLOCKALL_MCL_CURRENT 1
+#define TARGET_MLOCKALL_MCL_FUTURE  2
diff --git a/linux-user/openrisc/syscall.h b/linux-user/openrisc/syscall.h
index c3b36da83c..8ac03656d4 100644
--- a/linux-user/openrisc/syscall.h
+++ b/linux-user/openrisc/syscall.h
@@ -23,3 +23,7 @@ struct target_pt_regs {
 
 #define UNAME_MACHINE "openrisc"
 #define UNAME_MINIMUM_RELEASE "2.6.32"
+
+#define TARGET_MINSIGSTKSZ 2048
+#define TARGET_MLOCKALL_MCL_CURRENT 1
+#define TARGET_MLOCKALL_MCL_FUTURE  2
diff --git a/linux-user/ppc/syscall.h b/linux-user/ppc/syscall.h
index db92bbee17..0daf5cd2df 100644
--- a/linux-user/ppc/syscall.h
+++ b/linux-user/ppc/syscall.h
@@ -69,3 +69,7 @@ struct target_revectored_struct {
 #define UNAME_MINIMUM_RELEASE "2.6.32"
 
 #define TARGET_CLONE_BACKWARDS
+
+#define TARGET_MINSIGSTKSZ 2048
+#define TARGET_MLOCKALL_MCL_CURRENT 0x2000
+#define TARGET_MLOCKALL_MCL_FUTURE  0x4000
diff --git a/linux-user/ppc/target_cpu.h b/linux-user/ppc/target_cpu.h
index 9cc0c3ba9c..26f4ba297f 100644
--- a/linux-user/ppc/target_cpu.h
+++ b/linux-user/ppc/target_cpu.h
@@ -38,4 +38,14 @@ static inline void cpu_set_tls(CPUPPCState *env, target_ulong newtls)
 #endif
 }
 
+#ifndef EF_PPC64_ABI
+#define EF_PPC64_ABI           0x3
+#endif
+
+static inline uint32_t get_ppc64_abi(struct image_info *infop)
+{
+  return infop->elf_flags & EF_PPC64_ABI;
+}
+
+
 #endif
diff --git a/linux-user/s390x/syscall.h b/linux-user/s390x/syscall.h
index aaad512d4d..35f170af25 100644
--- a/linux-user/s390x/syscall.h
+++ b/linux-user/s390x/syscall.h
@@ -24,3 +24,6 @@ struct target_pt_regs {
 #define UNAME_MINIMUM_RELEASE "2.6.32"
 
 #define TARGET_CLONE_BACKWARDS2
+#define TARGET_MINSIGSTKSZ        2048
+#define TARGET_MLOCKALL_MCL_CURRENT 1
+#define TARGET_MLOCKALL_MCL_FUTURE  2
diff --git a/linux-user/sh4/syscall.h b/linux-user/sh4/syscall.h
index ccd2216e38..7aa4f239c5 100644
--- a/linux-user/sh4/syscall.h
+++ b/linux-user/sh4/syscall.h
@@ -11,3 +11,7 @@ struct target_pt_regs {
 
 #define UNAME_MACHINE "sh4"
 #define UNAME_MINIMUM_RELEASE "2.6.32"
+
+#define TARGET_MINSIGSTKSZ 2048
+#define TARGET_MLOCKALL_MCL_CURRENT 1
+#define TARGET_MLOCKALL_MCL_FUTURE  2
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 1141054be2..26929c59de 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -617,6 +617,15 @@ abi_long do_sigaltstack(abi_ulong uss_addr, abi_ulong uoss_addr, abi_ulong sp)
     {
         struct target_sigaltstack *uss;
         struct target_sigaltstack ss;
+        size_t minstacksize = TARGET_MINSIGSTKSZ;
+
+#if defined(TARGET_PPC64)
+        /* ELF V2 for PPC64 has a 4K minimum stack size for signal handlers */
+        struct image_info *image = ((TaskState *)thread_cpu->opaque)->info;
+        if (get_ppc64_abi(image) > 1) {
+            minstacksize = 4096;
+        }
+#endif
 
 	ret = -TARGET_EFAULT;
         if (!lock_user_struct(VERIFY_READ, uss, uss_addr, 1)) {
@@ -642,8 +651,9 @@ abi_long do_sigaltstack(abi_ulong uss_addr, abi_ulong uoss_addr, abi_ulong sp)
             ss.ss_sp = 0;
 	} else {
             ret = -TARGET_ENOMEM;
-            if (ss.ss_size < MINSIGSTKSZ)
+            if (ss.ss_size < minstacksize) {
                 goto out;
+            }
 	}
 
         target_sigaltstack_used.ss_sp = ss.ss_sp;
diff --git a/linux-user/sparc/syscall.h b/linux-user/sparc/syscall.h
index 9549ea0a2f..58573b92ea 100644
--- a/linux-user/sparc/syscall.h
+++ b/linux-user/sparc/syscall.h
@@ -15,3 +15,6 @@ struct target_pt_regs {
  * and copy_thread().
  */
 #define TARGET_CLONE_BACKWARDS
+#define TARGET_MINSIGSTKSZ      4096
+#define TARGET_MLOCKALL_MCL_CURRENT 0x2000
+#define TARGET_MLOCKALL_MCL_FUTURE  0x4000
diff --git a/linux-user/sparc64/syscall.h b/linux-user/sparc64/syscall.h
index 82b1680cb6..8398d3f463 100644
--- a/linux-user/sparc64/syscall.h
+++ b/linux-user/sparc64/syscall.h
@@ -16,3 +16,6 @@ struct target_pt_regs {
  * and copy_thread().
  */
 #define TARGET_CLONE_BACKWARDS
+#define TARGET_MINSIGSTKSZ      4096
+#define TARGET_MLOCKALL_MCL_CURRENT 0x2000
+#define TARGET_MLOCKALL_MCL_FUTURE  0x4000
diff --git a/linux-user/strace.list b/linux-user/strace.list
index fcb258d348..aa0cd735cc 100644
--- a/linux-user/strace.list
+++ b/linux-user/strace.list
@@ -1185,6 +1185,9 @@
 #ifdef TARGET_NR_set_mempolicy
 { TARGET_NR_set_mempolicy, "set_mempolicy" , NULL, NULL, NULL },
 #endif
+#ifdef TARGET_NR_setns
+{ TARGET_NR_setns, "setns" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_setpgid
 { TARGET_NR_setpgid, "setpgid" , NULL, NULL, NULL },
 #endif
@@ -1404,6 +1407,15 @@
 #ifdef TARGET_NR_timer_settime
 { TARGET_NR_timer_settime, "timer_settime" , NULL, NULL, NULL },
 #endif
+#ifdef TARGET_NR_timerfd_create
+{ TARGET_NR_timerfd_create, "timerfd_create" , NULL, NULL, NULL },
+#endif
+#ifdef TARGET_NR_timerfd_gettime
+{ TARGET_NR_timerfd_gettime, "timerfd_gettime" , NULL, NULL, NULL },
+#endif
+#ifdef TARGET_NR_timerfd_settime
+{ TARGET_NR_timerfd_settime, "timerfd_settime" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_times
 { TARGET_NR_times, "times" , NULL, NULL, NULL },
 #endif
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index a50229d0d7..7ff7c21255 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -58,6 +58,7 @@ int __clone2(int (*fn)(void *), void *child_stack_base,
 #include <sys/shm.h>
 #include <sys/sem.h>
 #include <sys/statfs.h>
+#include <sys/timerfd.h>
 #include <utime.h>
 #include <sys/sysinfo.h>
 //#include <sys/user.h>
@@ -251,6 +252,12 @@ _syscall2(int, capget, struct __user_cap_header_struct *, header,
           struct __user_cap_data_struct *, data);
 _syscall2(int, capset, struct __user_cap_header_struct *, header,
           struct __user_cap_data_struct *, data);
+#if defined(TARGET_NR_ioprio_get) && defined(__NR_ioprio_get)
+_syscall2(int, ioprio_get, int, which, int, who)
+#endif
+#if defined(TARGET_NR_ioprio_set) && defined(__NR_ioprio_set)
+_syscall3(int, ioprio_set, int, which, int, who, int, ioprio)
+#endif
 
 static bitmask_transtbl fcntl_flags_tbl[] = {
   { TARGET_O_ACCMODE,   TARGET_O_WRONLY,    O_ACCMODE,   O_WRONLY,    },
@@ -294,7 +301,6 @@ static int sys_getcwd1(char *buf, size_t size)
   return strlen(buf)+1;
 }
 
-#ifdef TARGET_NR_openat
 static int sys_openat(int dirfd, const char *pathname, int flags, mode_t mode)
 {
   /*
@@ -306,7 +312,6 @@ static int sys_openat(int dirfd, const char *pathname, int flags, mode_t mode)
   }
   return (openat(dirfd, pathname, flags));
 }
-#endif
 
 #ifdef TARGET_NR_utimensat
 #ifdef CONFIG_UTIMENSAT
@@ -1798,6 +1803,7 @@ static struct iovec *lock_iovec(int type, abi_ulong target_addr,
     abi_ulong total_len, max_len;
     int i;
     int err = 0;
+    bool bad_address = false;
 
     if (count == 0) {
         errno = 0;
@@ -1838,9 +1844,20 @@ static struct iovec *lock_iovec(int type, abi_ulong target_addr,
             vec[i].iov_base = 0;
         } else {
             vec[i].iov_base = lock_user(type, base, len, copy);
+            /* If the first buffer pointer is bad, this is a fault.  But
+             * subsequent bad buffers will result in a partial write; this
+             * is realized by filling the vector with null pointers and
+             * zero lengths. */
             if (!vec[i].iov_base) {
-                err = EFAULT;
-                goto fail;
+                if (i == 0) {
+                    err = EFAULT;
+                    goto fail;
+                } else {
+                    bad_address = true;
+                }
+            }
+            if (bad_address) {
+                len = 0;
             }
             if (len > max_len - total_len) {
                 len = max_len - total_len;
@@ -2419,9 +2436,13 @@ struct target_semid_ds
 {
   struct target_ipc_perm sem_perm;
   abi_ulong sem_otime;
+#if !defined(TARGET_PPC64)
   abi_ulong __unused1;
+#endif
   abi_ulong sem_ctime;
+#if !defined(TARGET_PPC64)
   abi_ulong __unused2;
+#endif
   abi_ulong sem_nsems;
   abi_ulong __unused3;
   abi_ulong __unused4;
@@ -2643,9 +2664,18 @@ static inline abi_long do_semctl(int semid, int semnum, int cmd,
     switch( cmd ) {
 	case GETVAL:
 	case SETVAL:
-            arg.val = tswap32(target_su.val);
+            /* In 64 bit cross-endian situations, we will erroneously pick up
+             * the wrong half of the union for the "val" element.  To rectify
+             * this, the entire 8-byte structure is byteswapped, followed by
+	     * a swap of the 4 byte val field. In other cases, the data is
+	     * already in proper host byte order. */
+	    if (sizeof(target_su.val) != (sizeof(target_su.buf))) {
+		target_su.buf = tswapal(target_su.buf);
+		arg.val = tswap32(target_su.val);
+	    } else {
+		arg.val = target_su.val;
+	    }
             ret = get_errno(semctl(semid, semnum, cmd, arg));
-            target_su.val = tswap32(arg.val);
             break;
 	case GETALL:
 	case SETALL:
@@ -2861,15 +2891,23 @@ struct target_msgbuf {
 };
 
 static inline abi_long do_msgsnd(int msqid, abi_long msgp,
-                                 unsigned int msgsz, int msgflg)
+                                 ssize_t msgsz, int msgflg)
 {
     struct target_msgbuf *target_mb;
     struct msgbuf *host_mb;
     abi_long ret = 0;
 
+    if (msgsz < 0) {
+        return -TARGET_EINVAL;
+    }
+
     if (!lock_user_struct(VERIFY_READ, target_mb, msgp, 0))
         return -TARGET_EFAULT;
     host_mb = malloc(msgsz+sizeof(long));
+    if (!host_mb) {
+        unlock_user_struct(target_mb, msgp, 0);
+        return -TARGET_ENOMEM;
+    }
     host_mb->mtype = (abi_long) tswapal(target_mb->mtype);
     memcpy(host_mb->mtext, target_mb->mtext, msgsz);
     ret = get_errno(msgsnd(msqid, host_mb, msgsz, msgflg));
@@ -3112,8 +3150,8 @@ static inline abi_long do_shmdt(abi_ulong shmaddr)
 #ifdef TARGET_NR_ipc
 /* ??? This only works with linear mappings.  */
 /* do_ipc() must return target values and target errnos. */
-static abi_long do_ipc(unsigned int call, int first,
-                       int second, int third,
+static abi_long do_ipc(unsigned int call, abi_long first,
+                       abi_long second, abi_long third,
                        abi_long ptr, abi_long fifth)
 {
     int version;
@@ -3131,9 +3169,15 @@ static abi_long do_ipc(unsigned int call, int first,
         ret = get_errno(semget(first, second, third));
         break;
 
-    case IPCOP_semctl:
-        ret = do_semctl(first, second, third, (union target_semun)(abi_ulong) ptr);
+    case IPCOP_semctl: {
+        /* The semun argument to semctl is passed by value, so dereference the
+         * ptr argument. */
+        abi_ulong atptr;
+        get_user_ual(atptr, ptr);
+        ret = do_semctl(first, second, third,
+                (union target_semun) atptr);
         break;
+    }
 
     case IPCOP_msgget:
         ret = get_errno(msgget(first, second));
@@ -4914,6 +4958,47 @@ static inline abi_long host_to_target_itimerspec(abi_ulong target_addr,
     return 0;
 }
 
+static inline abi_long target_to_host_sigevent(struct sigevent *host_sevp,
+                                               abi_ulong target_addr)
+{
+    struct target_sigevent *target_sevp;
+
+    if (!lock_user_struct(VERIFY_READ, target_sevp, target_addr, 1)) {
+        return -TARGET_EFAULT;
+    }
+
+    /* This union is awkward on 64 bit systems because it has a 32 bit
+     * integer and a pointer in it; we follow the conversion approach
+     * used for handling sigval types in signal.c so the guest should get
+     * the correct value back even if we did a 64 bit byteswap and it's
+     * using the 32 bit integer.
+     */
+    host_sevp->sigev_value.sival_ptr =
+        (void *)(uintptr_t)tswapal(target_sevp->sigev_value.sival_ptr);
+    host_sevp->sigev_signo =
+        target_to_host_signal(tswap32(target_sevp->sigev_signo));
+    host_sevp->sigev_notify = tswap32(target_sevp->sigev_notify);
+    host_sevp->_sigev_un._tid = tswap32(target_sevp->_sigev_un._tid);
+
+    unlock_user_struct(target_sevp, target_addr, 1);
+    return 0;
+}
+
+#if defined(TARGET_NR_mlockall)
+static inline int target_to_host_mlockall_arg(int arg)
+{
+    int result = 0;
+
+    if (arg & TARGET_MLOCKALL_MCL_CURRENT) {
+        result |= MCL_CURRENT;
+    }
+    if (arg & TARGET_MLOCKALL_MCL_FUTURE) {
+        result |= MCL_FUTURE;
+    }
+    return result;
+}
+#endif
+
 #if defined(TARGET_NR_stat64) || defined(TARGET_NR_newfstatat)
 static inline abi_long host_to_target_stat64(void *cpu_env,
                                              abi_ulong target_addr,
@@ -5092,10 +5177,8 @@ static int open_self_cmdline(void *cpu_env, int fd)
 
 static int open_self_maps(void *cpu_env, int fd)
 {
-#if defined(TARGET_ARM) || defined(TARGET_M68K) || defined(TARGET_UNICORE32)
     CPUState *cpu = ENV_GET_CPU((CPUArchState *)cpu_env);
     TaskState *ts = cpu->opaque;
-#endif
     FILE *fp;
     char *line = NULL;
     size_t len = 0;
@@ -5118,13 +5201,18 @@ static int open_self_maps(void *cpu_env, int fd)
         if ((fields < 10) || (fields > 11)) {
             continue;
         }
-        if (!strncmp(path, "[stack]", 7)) {
-            continue;
-        }
-        if (h2g_valid(min) && h2g_valid(max)) {
+        if (h2g_valid(min)) {
+            int flags = page_get_flags(h2g(min));
+            max = h2g_valid(max - 1) ? max : (uintptr_t)g2h(GUEST_ADDR_MAX);
+            if (page_check_range(h2g(min), max - min, flags) == -1) {
+                continue;
+            }
+            if (h2g(min) == ts->info->stack_limit) {
+                pstrcpy(path, sizeof(path), "      [stack]");
+            }
             dprintf(fd, TARGET_ABI_FMT_lx "-" TARGET_ABI_FMT_lx
                     " %c%c%c%c %08" PRIx64 " %02x:%02x %d %s%s\n",
-                    h2g(min), h2g(max), flag_r, flag_w,
+                    h2g(min), h2g(max - 1) + 1, flag_r, flag_w,
                     flag_x, flag_p, offset, dev_maj, dev_min, inode,
                     path[0] ? "         " : "", path);
         }
@@ -5133,14 +5221,6 @@ static int open_self_maps(void *cpu_env, int fd)
     free(line);
     fclose(fp);
 
-#if defined(TARGET_ARM) || defined(TARGET_M68K) || defined(TARGET_UNICORE32)
-    dprintf(fd, "%08llx-%08llx rw-p %08llx 00:00 0          [stack]\n",
-                (unsigned long long)ts->info->stack_limit,
-                (unsigned long long)(ts->info->start_stack +
-                                     (TARGET_PAGE_SIZE - 1)) & TARGET_PAGE_MASK,
-                (unsigned long long)0);
-#endif
-
     return 0;
 }
 
@@ -5279,7 +5359,7 @@ static int open_net_route(void *cpu_env, int fd)
 }
 #endif
 
-static int do_open(void *cpu_env, const char *pathname, int flags, mode_t mode)
+static int do_openat(void *cpu_env, int dirfd, const char *pathname, int flags, mode_t mode)
 {
     struct fake_open {
         const char *filename;
@@ -5300,7 +5380,7 @@ static int do_open(void *cpu_env, const char *pathname, int flags, mode_t mode)
 
     if (is_proc_myself(pathname, "exe")) {
         int execfd = qemu_getauxval(AT_EXECFD);
-        return execfd ? execfd : get_errno(open(exec_path, flags, mode));
+        return execfd ? execfd : get_errno(sys_openat(dirfd, exec_path, flags, mode));
     }
 
     for (fake_open = fakes; fake_open->filename; fake_open++) {
@@ -5334,7 +5414,7 @@ static int do_open(void *cpu_env, const char *pathname, int flags, mode_t mode)
         return fd;
     }
 
-    return get_errno(open(path(pathname), flags, mode));
+    return get_errno(sys_openat(dirfd, path(pathname), flags, mode));
 }
 
 /* do_syscall() should always have a single exit point at the end so
@@ -5409,22 +5489,19 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
     case TARGET_NR_open:
         if (!(p = lock_user_string(arg1)))
             goto efault;
-        ret = get_errno(do_open(cpu_env, p,
-                                target_to_host_bitmask(arg2, fcntl_flags_tbl),
-                                arg3));
+        ret = get_errno(do_openat(cpu_env, AT_FDCWD, p,
+                                  target_to_host_bitmask(arg2, fcntl_flags_tbl),
+                                  arg3));
         unlock_user(p, arg1, 0);
         break;
-#if defined(TARGET_NR_openat) && defined(__NR_openat)
     case TARGET_NR_openat:
         if (!(p = lock_user_string(arg2)))
             goto efault;
-        ret = get_errno(sys_openat(arg1,
-                                   path(p),
-                                   target_to_host_bitmask(arg3, fcntl_flags_tbl),
-                                   arg4));
+        ret = get_errno(do_openat(cpu_env, arg1, p,
+                                  target_to_host_bitmask(arg3, fcntl_flags_tbl),
+                                  arg4));
         unlock_user(p, arg2, 0);
         break;
-#endif
     case TARGET_NR_close:
         ret = get_errno(close(arg1));
         break;
@@ -6620,11 +6697,22 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
             p2 = lock_user(VERIFY_WRITE, arg2, arg3, 0);
             if (!p || !p2) {
                 ret = -TARGET_EFAULT;
+            } else if (!arg3) {
+                /* Short circuit this for the magic exe check. */
+                ret = -TARGET_EINVAL;
             } else if (is_proc_myself((const char *)p, "exe")) {
                 char real[PATH_MAX], *temp;
                 temp = realpath(exec_path, real);
-                ret = temp == NULL ? get_errno(-1) : strlen(real) ;
-                snprintf((char *)p2, arg3, "%s", real);
+                /* Return value is # of bytes that we wrote to the buffer. */
+                if (temp == NULL) {
+                    ret = get_errno(-1);
+                } else {
+                    /* Don't worry about sign mismatch as earlier mapping
+                     * logic would have thrown a bad address error. */
+                    ret = MIN(strlen(real), arg3);
+                    /* We cannot NUL terminate the string. */
+                    memcpy(p2, real, ret);
+                }
             } else {
                 ret = get_errno(readlink(path(p), p2, arg3));
             }
@@ -6763,7 +6851,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 #endif
 #ifdef TARGET_NR_mlockall
     case TARGET_NR_mlockall:
-        ret = get_errno(mlockall(arg1));
+        ret = get_errno(mlockall(target_to_host_mlockall_arg(arg1)));
         break;
 #endif
 #ifdef TARGET_NR_munlockall
@@ -7679,6 +7767,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
             struct sched_param *target_schp;
             struct sched_param schp;
 
+            if (arg2 == 0) {
+                return -TARGET_EINVAL;
+            }
             if (!lock_user_struct(VERIFY_READ, target_schp, arg2, 1))
                 goto efault;
             schp.sched_priority = tswap32(target_schp->sched_priority);
@@ -7690,6 +7781,10 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         {
             struct sched_param *target_schp;
             struct sched_param schp;
+
+            if (arg2 == 0) {
+                return -TARGET_EINVAL;
+            }
             ret = get_errno(sched_getparam(arg1, &schp));
             if (!is_error(ret)) {
                 if (!lock_user_struct(VERIFY_WRITE, target_schp, arg2, 0))
@@ -7703,6 +7798,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         {
             struct sched_param *target_schp;
             struct sched_param schp;
+            if (arg3 == 0) {
+                return -TARGET_EINVAL;
+            }
             if (!lock_user_struct(VERIFY_READ, target_schp, arg3, 1))
                 goto efault;
             schp.sched_priority = tswap32(target_schp->sched_priority);
@@ -7727,7 +7825,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
             struct timespec ts;
             ret = get_errno(sched_rr_get_interval(arg1, &ts));
             if (!is_error(ret)) {
-                host_to_target_timespec(arg2, &ts);
+                ret = host_to_target_timespec(arg2, &ts);
             }
         }
         break;
@@ -8966,6 +9064,14 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         ret = get_errno(clock_nanosleep(arg1, arg2, &ts, arg4 ? &ts : NULL));
         if (arg4)
             host_to_target_timespec(arg4, &ts);
+
+#if defined(TARGET_PPC)
+        /* clock_nanosleep is odd in that it returns positive errno values.
+         * On PPC, CR0 bit 3 should be set in such a situation. */
+        if (ret) {
+            ((CPUPPCState *)cpu_env)->crf[0] |= 1;
+        }
+#endif
         break;
     }
 #endif
@@ -9062,12 +9168,16 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 #if defined(TARGET_NR_mq_open) && defined(__NR_mq_open)
     case TARGET_NR_mq_open:
         {
-            struct mq_attr posix_mq_attr;
+            struct mq_attr posix_mq_attr, *attrp;
 
             p = lock_user_string(arg1 - 1);
-            if (arg4 != 0)
+            if (arg4 != 0) {
                 copy_from_user_mq_attr (&posix_mq_attr, arg4);
-            ret = get_errno(mq_open(p, arg2, arg3, &posix_mq_attr));
+                attrp = &posix_mq_attr;
+            } else {
+                attrp = 0;
+            }
+            ret = get_errno(mq_open(p, arg2, arg3, attrp));
             unlock_user (p, arg1, 0);
         }
         break;
@@ -9413,7 +9523,6 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         /* args: clockid_t clockid, struct sigevent *sevp, timer_t *timerid */
 
         struct sigevent host_sevp = { {0}, }, *phost_sevp = NULL;
-        struct target_sigevent *ptarget_sevp;
         struct target_timer_t *ptarget_timer;
 
         int clkid = arg1;
@@ -9425,14 +9534,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
             timer_t *phtimer = g_posix_timers  + timer_index;
 
             if (arg2) {
-                if (!lock_user_struct(VERIFY_READ, ptarget_sevp, arg2, 1)) {
-                    goto efault;
-                }
-
-                host_sevp.sigev_signo = tswap32(ptarget_sevp->sigev_signo);
-                host_sevp.sigev_notify = tswap32(ptarget_sevp->sigev_notify);
-
                 phost_sevp = &host_sevp;
+                ret = target_to_host_sigevent(phost_sevp, arg2);
+                if (ret != 0) {
+                    break;
+                }
             }
 
             ret = get_errno(timer_create(clkid, phost_sevp, phtimer));
@@ -9524,6 +9630,73 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
     }
 #endif
 
+#if defined(TARGET_NR_timerfd_create) && defined(CONFIG_TIMERFD)
+    case TARGET_NR_timerfd_create:
+        ret = get_errno(timerfd_create(arg1,
+                target_to_host_bitmask(arg2, fcntl_flags_tbl)));
+        break;
+#endif
+
+#if defined(TARGET_NR_timerfd_gettime) && defined(CONFIG_TIMERFD)
+    case TARGET_NR_timerfd_gettime:
+        {
+            struct itimerspec its_curr;
+
+            ret = get_errno(timerfd_gettime(arg1, &its_curr));
+
+            if (arg2 && host_to_target_itimerspec(arg2, &its_curr)) {
+                goto efault;
+            }
+        }
+        break;
+#endif
+
+#if defined(TARGET_NR_timerfd_settime) && defined(CONFIG_TIMERFD)
+    case TARGET_NR_timerfd_settime:
+        {
+            struct itimerspec its_new, its_old, *p_new;
+
+            if (arg3) {
+                if (target_to_host_itimerspec(&its_new, arg3)) {
+                    goto efault;
+                }
+                p_new = &its_new;
+            } else {
+                p_new = NULL;
+            }
+
+            ret = get_errno(timerfd_settime(arg1, arg2, p_new, &its_old));
+
+            if (arg4 && host_to_target_itimerspec(arg4, &its_old)) {
+                goto efault;
+            }
+        }
+        break;
+#endif
+
+#if defined(TARGET_NR_ioprio_get) && defined(__NR_ioprio_get)
+    case TARGET_NR_ioprio_get:
+        ret = get_errno(ioprio_get(arg1, arg2));
+        break;
+#endif
+
+#if defined(TARGET_NR_ioprio_set) && defined(__NR_ioprio_set)
+    case TARGET_NR_ioprio_set:
+        ret = get_errno(ioprio_set(arg1, arg2, arg3));
+        break;
+#endif
+
+#if defined(TARGET_NR_setns) && defined(CONFIG_SETNS)
+    case TARGET_NR_setns:
+        ret = get_errno(setns(arg1, arg2));
+        break;
+#endif
+#if defined(TARGET_NR_unshare) && defined(CONFIG_SETNS)
+    case TARGET_NR_unshare:
+        ret = get_errno(unshare(arg1));
+        break;
+#endif
+
     default:
     unimplemented:
         gemu_log("qemu: Unsupported syscall: %d\n", num);
diff --git a/linux-user/unicore32/syscall.h b/linux-user/unicore32/syscall.h
index f7e55254cf..385a97562d 100644
--- a/linux-user/unicore32/syscall.h
+++ b/linux-user/unicore32/syscall.h
@@ -53,4 +53,8 @@ struct target_pt_regs {
 #define UNAME_MACHINE "UniCore-II"
 #define UNAME_MINIMUM_RELEASE "2.6.32"
 
+#define TARGET_MINSIGSTKSZ 2048
+#define TARGET_MLOCKALL_MCL_CURRENT 1
+#define TARGET_MLOCKALL_MCL_FUTURE  2
+
 #endif /* __UC32_SYSCALL_H__ */
diff --git a/linux-user/x86_64/syscall.h b/linux-user/x86_64/syscall.h
index e03b5a0cfc..88b3c3fe31 100644
--- a/linux-user/x86_64/syscall.h
+++ b/linux-user/x86_64/syscall.h
@@ -97,3 +97,6 @@ struct target_msqid64_ds {
 #define TARGET_ARCH_SET_FS 0x1002
 #define TARGET_ARCH_GET_FS 0x1003
 #define TARGET_ARCH_GET_GS 0x1004
+#define TARGET_MINSIGSTKSZ 2048
+#define TARGET_MLOCKALL_MCL_CURRENT 1
+#define TARGET_MLOCKALL_MCL_FUTURE  2
diff --git a/qemu-seccomp.c b/qemu-seccomp.c
index ea8094d043..0503764047 100644
--- a/qemu-seccomp.c
+++ b/qemu-seccomp.c
@@ -230,7 +230,8 @@ static const struct QemuSeccompSyscall seccomp_whitelist[] = {
     { SCMP_SYS(timerfd_create), 240 },
     { SCMP_SYS(shmctl), 240 },
     { SCMP_SYS(mlock), 240 },
-    { SCMP_SYS(munlock), 240 }
+    { SCMP_SYS(munlock), 240 },
+    { SCMP_SYS(semctl), 240 }
 };
 
 int seccomp_start(void)
diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
index ee4fbb1dad..07f3c9e866 100644
--- a/target-arm/cpu-qom.h
+++ b/target-arm/cpu-qom.h
@@ -148,6 +148,7 @@ typedef struct ARMCPU {
     uint64_t id_aa64isar1;
     uint64_t id_aa64mmfr0;
     uint64_t id_aa64mmfr1;
+    uint32_t dbgdidr;
     uint32_t clidr;
     /* The elements of this array are the CCSIDR values for each cache,
      * in the order L1DCache, L1ICache, L2DCache, L2ICache, etc.
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 7cebb76656..8199f32e32 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -640,6 +640,7 @@ static void cortex_a8_initfn(Object *obj)
     cpu->id_isar2 = 0x21232031;
     cpu->id_isar3 = 0x11112131;
     cpu->id_isar4 = 0x00111142;
+    cpu->dbgdidr = 0x15141000;
     cpu->clidr = (1 << 27) | (2 << 24) | 3;
     cpu->ccsidr[0] = 0xe007e01a; /* 16k L1 dcache. */
     cpu->ccsidr[1] = 0x2007e01a; /* 16k L1 icache. */
@@ -712,9 +713,10 @@ static void cortex_a9_initfn(Object *obj)
     cpu->id_isar2 = 0x21232041;
     cpu->id_isar3 = 0x11112131;
     cpu->id_isar4 = 0x00111142;
+    cpu->dbgdidr = 0x35141000;
     cpu->clidr = (1 << 27) | (1 << 24) | 3;
-    cpu->ccsidr[0] = 0xe00fe015; /* 16k L1 dcache. */
-    cpu->ccsidr[1] = 0x200fe015; /* 16k L1 icache. */
+    cpu->ccsidr[0] = 0xe00fe019; /* 16k L1 dcache. */
+    cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */
     define_arm_cp_regs(cpu, cortexa9_cp_reginfo);
 }
 
@@ -773,6 +775,7 @@ static void cortex_a15_initfn(Object *obj)
     cpu->id_isar2 = 0x21232041;
     cpu->id_isar3 = 0x11112131;
     cpu->id_isar4 = 0x10011142;
+    cpu->dbgdidr = 0x3515f021;
     cpu->clidr = 0x0a200023;
     cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */
     cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 79205ba335..8098b8d357 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -220,6 +220,7 @@ typedef struct CPUARMState {
         uint64_t dbgbcr[16]; /* breakpoint control registers */
         uint64_t dbgwvr[16]; /* watchpoint value registers */
         uint64_t dbgwcr[16]; /* watchpoint control registers */
+        uint64_t mdscr_el1;
         /* If the counter is enabled, this stores the last time the counter
          * was reset. Otherwise it stores the counter value
          */
@@ -411,7 +412,13 @@ int arm_cpu_handle_mmu_fault(CPUState *cpu, vaddr address, int rw,
 #define CPSR_E (1U << 9)
 #define CPSR_IT_2_7 (0xfc00U)
 #define CPSR_GE (0xfU << 16)
-#define CPSR_RESERVED (0xfU << 20)
+#define CPSR_IL (1U << 20)
+/* Note that the RESERVED bits include bit 21, which is PSTATE_SS in
+ * an AArch64 SPSR but RES0 in AArch32 SPSR and CPSR. In QEMU we use
+ * env->uncached_cpsr bit 21 to store PSTATE.SS when executing in AArch32,
+ * where it is live state but not accessible to the AArch32 code.
+ */
+#define CPSR_RESERVED (0x7U << 21)
 #define CPSR_J (1U << 24)
 #define CPSR_IT_0_1 (3U << 25)
 #define CPSR_Q (1U << 27)
@@ -428,7 +435,9 @@ int arm_cpu_handle_mmu_fault(CPUState *cpu, vaddr address, int rw,
 /* Bits writable in user mode.  */
 #define CPSR_USER (CPSR_NZCV | CPSR_Q | CPSR_GE)
 /* Execution state bits.  MRS read as zero, MSR writes ignored.  */
-#define CPSR_EXEC (CPSR_T | CPSR_IT | CPSR_J)
+#define CPSR_EXEC (CPSR_T | CPSR_IT | CPSR_J | CPSR_IL)
+/* Mask of bits which may be set by exception return copying them from SPSR */
+#define CPSR_ERET_MASK (~CPSR_RESERVED)
 
 #define TTBCR_N      (7U << 0) /* TTBCR.EAE==0 */
 #define TTBCR_T0SZ   (7U << 0) /* TTBCR.EAE==1 */
@@ -1111,6 +1120,66 @@ static inline int cpu_mmu_index (CPUARMState *env)
     return arm_current_pl(env);
 }
 
+/* Return the Exception Level targeted by debug exceptions;
+ * currently always EL1 since we don't implement EL2 or EL3.
+ */
+static inline int arm_debug_target_el(CPUARMState *env)
+{
+    return 1;
+}
+
+static inline bool aa64_generate_debug_exceptions(CPUARMState *env)
+{
+    if (arm_current_pl(env) == arm_debug_target_el(env)) {
+        if ((extract32(env->cp15.mdscr_el1, 13, 1) == 0)
+            || (env->daif & PSTATE_D)) {
+            return false;
+        }
+    }
+    return true;
+}
+
+static inline bool aa32_generate_debug_exceptions(CPUARMState *env)
+{
+    if (arm_current_pl(env) == 0 && arm_el_is_aa64(env, 1)) {
+        return aa64_generate_debug_exceptions(env);
+    }
+    return arm_current_pl(env) != 2;
+}
+
+/* Return true if debugging exceptions are currently enabled.
+ * This corresponds to what in ARM ARM pseudocode would be
+ *    if UsingAArch32() then
+ *        return AArch32.GenerateDebugExceptions()
+ *    else
+ *        return AArch64.GenerateDebugExceptions()
+ * We choose to push the if() down into this function for clarity,
+ * since the pseudocode has it at all callsites except for the one in
+ * CheckSoftwareStep(), where it is elided because both branches would
+ * always return the same value.
+ *
+ * Parts of the pseudocode relating to EL2 and EL3 are omitted because we
+ * don't yet implement those exception levels or their associated trap bits.
+ */
+static inline bool arm_generate_debug_exceptions(CPUARMState *env)
+{
+    if (env->aarch64) {
+        return aa64_generate_debug_exceptions(env);
+    } else {
+        return aa32_generate_debug_exceptions(env);
+    }
+}
+
+/* Is single-stepping active? (Note that the "is EL_D AArch64?" check
+ * implicitly means this always returns false in pre-v8 CPUs.)
+ */
+static inline bool arm_singlestep_active(CPUARMState *env)
+{
+    return extract32(env->cp15.mdscr_el1, 0, 1)
+        && arm_el_is_aa64(env, arm_debug_target_el(env))
+        && arm_generate_debug_exceptions(env);
+}
+
 #include "exec/cpu-all.h"
 
 /* Bit usage in the TB flags field: bit 31 indicates whether we are
@@ -1136,12 +1205,20 @@ static inline int cpu_mmu_index (CPUARMState *env)
 #define ARM_TBFLAG_BSWAP_CODE_MASK  (1 << ARM_TBFLAG_BSWAP_CODE_SHIFT)
 #define ARM_TBFLAG_CPACR_FPEN_SHIFT 17
 #define ARM_TBFLAG_CPACR_FPEN_MASK  (1 << ARM_TBFLAG_CPACR_FPEN_SHIFT)
+#define ARM_TBFLAG_SS_ACTIVE_SHIFT 18
+#define ARM_TBFLAG_SS_ACTIVE_MASK (1 << ARM_TBFLAG_SS_ACTIVE_SHIFT)
+#define ARM_TBFLAG_PSTATE_SS_SHIFT 19
+#define ARM_TBFLAG_PSTATE_SS_MASK (1 << ARM_TBFLAG_PSTATE_SS_SHIFT)
 
 /* Bit usage when in AArch64 state */
 #define ARM_TBFLAG_AA64_EL_SHIFT    0
 #define ARM_TBFLAG_AA64_EL_MASK     (0x3 << ARM_TBFLAG_AA64_EL_SHIFT)
 #define ARM_TBFLAG_AA64_FPEN_SHIFT  2
 #define ARM_TBFLAG_AA64_FPEN_MASK   (1 << ARM_TBFLAG_AA64_FPEN_SHIFT)
+#define ARM_TBFLAG_AA64_SS_ACTIVE_SHIFT 3
+#define ARM_TBFLAG_AA64_SS_ACTIVE_MASK (1 << ARM_TBFLAG_AA64_SS_ACTIVE_SHIFT)
+#define ARM_TBFLAG_AA64_PSTATE_SS_SHIFT 4
+#define ARM_TBFLAG_AA64_PSTATE_SS_MASK (1 << ARM_TBFLAG_AA64_PSTATE_SS_SHIFT)
 
 /* some convenience accessor macros */
 #define ARM_TBFLAG_AARCH64_STATE(F) \
@@ -1162,10 +1239,18 @@ static inline int cpu_mmu_index (CPUARMState *env)
     (((F) & ARM_TBFLAG_BSWAP_CODE_MASK) >> ARM_TBFLAG_BSWAP_CODE_SHIFT)
 #define ARM_TBFLAG_CPACR_FPEN(F) \
     (((F) & ARM_TBFLAG_CPACR_FPEN_MASK) >> ARM_TBFLAG_CPACR_FPEN_SHIFT)
+#define ARM_TBFLAG_SS_ACTIVE(F) \
+    (((F) & ARM_TBFLAG_SS_ACTIVE_MASK) >> ARM_TBFLAG_SS_ACTIVE_SHIFT)
+#define ARM_TBFLAG_PSTATE_SS(F) \
+    (((F) & ARM_TBFLAG_PSTATE_SS_MASK) >> ARM_TBFLAG_PSTATE_SS_SHIFT)
 #define ARM_TBFLAG_AA64_EL(F) \
     (((F) & ARM_TBFLAG_AA64_EL_MASK) >> ARM_TBFLAG_AA64_EL_SHIFT)
 #define ARM_TBFLAG_AA64_FPEN(F) \
     (((F) & ARM_TBFLAG_AA64_FPEN_MASK) >> ARM_TBFLAG_AA64_FPEN_SHIFT)
+#define ARM_TBFLAG_AA64_SS_ACTIVE(F) \
+    (((F) & ARM_TBFLAG_AA64_SS_ACTIVE_MASK) >> ARM_TBFLAG_AA64_SS_ACTIVE_SHIFT)
+#define ARM_TBFLAG_AA64_PSTATE_SS(F) \
+    (((F) & ARM_TBFLAG_AA64_PSTATE_SS_MASK) >> ARM_TBFLAG_AA64_PSTATE_SS_SHIFT)
 
 static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
                                         target_ulong *cs_base, int *flags)
@@ -1179,6 +1264,19 @@ static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
         if (fpen == 3 || (fpen == 1 && arm_current_pl(env) != 0)) {
             *flags |= ARM_TBFLAG_AA64_FPEN_MASK;
         }
+        /* The SS_ACTIVE and PSTATE_SS bits correspond to the state machine
+         * states defined in the ARM ARM for software singlestep:
+         *  SS_ACTIVE   PSTATE.SS   State
+         *     0            x       Inactive (the TB flag for SS is always 0)
+         *     1            0       Active-pending
+         *     1            1       Active-not-pending
+         */
+        if (arm_singlestep_active(env)) {
+            *flags |= ARM_TBFLAG_AA64_SS_ACTIVE_MASK;
+            if (env->pstate & PSTATE_SS) {
+                *flags |= ARM_TBFLAG_AA64_PSTATE_SS_MASK;
+            }
+        }
     } else {
         int privmode;
         *pc = env->regs[15];
@@ -1202,6 +1300,19 @@ static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
         if (fpen == 3 || (fpen == 1 && arm_current_pl(env) != 0)) {
             *flags |= ARM_TBFLAG_CPACR_FPEN_MASK;
         }
+        /* The SS_ACTIVE and PSTATE_SS bits correspond to the state machine
+         * states defined in the ARM ARM for software singlestep:
+         *  SS_ACTIVE   PSTATE.SS   State
+         *     0            x       Inactive (the TB flag for SS is always 0)
+         *     1            0       Active-pending
+         *     1            1       Active-not-pending
+         */
+        if (arm_singlestep_active(env)) {
+            *flags |= ARM_TBFLAG_SS_ACTIVE_MASK;
+            if (env->uncached_cpsr & PSTATE_SS) {
+                *flags |= ARM_TBFLAG_PSTATE_SS_MASK;
+            }
+        }
     }
 
     *cs_base = 0;
diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c
index 8b2081c246..38d2b8445a 100644
--- a/target-arm/cpu64.c
+++ b/target-arm/cpu64.c
@@ -127,6 +127,7 @@ static void aarch64_a57_initfn(Object *obj)
     cpu->id_aa64dfr0 = 0x10305106;
     cpu->id_aa64isar0 = 0x00010000;
     cpu->id_aa64mmfr0 = 0x00001124;
+    cpu->dbgdidr = 0x3516d000;
     cpu->clidr = 0x0a200023;
     cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */
     cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */
diff --git a/target-arm/helper.c b/target-arm/helper.c
index f630d96306..2a77c97c7b 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -389,12 +389,6 @@ static void tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri,
 }
 
 static const ARMCPRegInfo cp_reginfo[] = {
-    /* DBGDIDR: just RAZ. In particular this means the "debug architecture
-     * version" bits will read as a reserved value, which should cause
-     * Linux to not try to use the debug hardware.
-     */
-    { .name = "DBGDIDR", .cp = 14, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 0,
-      .access = PL0_R, .type = ARM_CP_CONST, .resetvalue = 0 },
     { .name = "FCSEIDR", .cp = 15, .crn = 13, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c13_fcse),
       .resetvalue = 0, .writefn = fcse_write, .raw_writefn = raw_write, },
@@ -471,6 +465,13 @@ static const ARMCPRegInfo not_v7_cp_reginfo[] = {
     { .name = "DUMMY", .cp = 15, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = CP_ANY,
       .access = PL1_R, .type = ARM_CP_CONST | ARM_CP_NO_MIGRATE,
       .resetvalue = 0 },
+    /* We don't implement pre-v7 debug but most CPUs had at least a DBGDIDR;
+     * implementing it as RAZ means the "debug architecture version" bits
+     * will read as a reserved value, which should cause Linux to not try
+     * to use the debug hardware.
+     */
+    { .name = "DBGDIDR", .cp = 14, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 0,
+      .access = PL0_R, .type = ARM_CP_CONST, .resetvalue = 0 },
     REGINFO_SENTINEL
 };
 
@@ -712,13 +713,6 @@ static uint64_t isr_read(CPUARMState *env, const ARMCPRegInfo *ri)
 }
 
 static const ARMCPRegInfo v7_cp_reginfo[] = {
-    /* DBGDRAR, DBGDSAR: always RAZ since we don't implement memory mapped
-     * debug components
-     */
-    { .name = "DBGDRAR", .cp = 14, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 0,
-      .access = PL0_R, .type = ARM_CP_CONST, .resetvalue = 0 },
-    { .name = "DBGDSAR", .cp = 14, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 0,
-      .access = PL0_R, .type = ARM_CP_CONST, .resetvalue = 0 },
     /* the old v6 WFI, UNPREDICTABLE in v7 but we choose to NOP */
     { .name = "NOP", .cp = 15, .crn = 7, .crm = 0, .opc1 = 0, .opc2 = 4,
       .access = PL1_W, .type = ARM_CP_NOP },
@@ -1734,11 +1728,6 @@ static const ARMCPRegInfo lpae_cp_reginfo[] = {
     { .name = "AMAIR1", .cp = 15, .crn = 10, .crm = 3, .opc1 = 0, .opc2 = 1,
       .access = PL1_RW, .type = ARM_CP_CONST | ARM_CP_OVERRIDE,
       .resetvalue = 0 },
-    /* 64 bit access versions of the (dummy) debug registers */
-    { .name = "DBGDRAR", .cp = 14, .crm = 1, .opc1 = 0,
-      .access = PL0_R, .type = ARM_CP_CONST|ARM_CP_64BIT, .resetvalue = 0 },
-    { .name = "DBGDSAR", .cp = 14, .crm = 2, .opc1 = 0,
-      .access = PL0_R, .type = ARM_CP_CONST|ARM_CP_64BIT, .resetvalue = 0 },
     { .name = "PAR", .cp = 15, .crm = 7, .opc1 = 0,
       .access = PL1_RW, .type = ARM_CP_64BIT,
       .fieldoffset = offsetof(CPUARMState, cp15.par_el1), .resetvalue = 0 },
@@ -2083,16 +2072,6 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
       .opc1 = 0, .crn = 3, .crm = 0, .opc2 = 0,
       .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c3),
       .resetvalue = 0, .writefn = dacr_write, .raw_writefn = raw_write, },
-    /* Dummy implementation of monitor debug system control register:
-     * we don't support debug.
-     */
-    { .name = "MDSCR_EL1", .state = ARM_CP_STATE_AA64,
-      .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2,
-      .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
-    /* We define a dummy WI OSLAR_EL1, because Linux writes to it. */
-    { .name = "OSLAR_EL1", .state = ARM_CP_STATE_AA64,
-      .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 4,
-      .access = PL1_W, .type = ARM_CP_NOP },
     { .name = "ELR_EL1", .state = ARM_CP_STATE_AA64,
       .type = ARM_CP_NO_MIGRATE,
       .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 1,
@@ -2206,29 +2185,98 @@ static CPAccessResult ctr_el0_access(CPUARMState *env, const ARMCPRegInfo *ri)
     return CP_ACCESS_OK;
 }
 
-static void define_aarch64_debug_regs(ARMCPU *cpu)
+static const ARMCPRegInfo debug_cp_reginfo[] = {
+    /* DBGDRAR, DBGDSAR: always RAZ since we don't implement memory mapped
+     * debug components. The AArch64 version of DBGDRAR is named MDRAR_EL1;
+     * unlike DBGDRAR it is never accessible from EL0.
+     * DBGDSAR is deprecated and must RAZ from v8 anyway, so it has no AArch64
+     * accessor.
+     */
+    { .name = "DBGDRAR", .cp = 14, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 0,
+      .access = PL0_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "MDRAR_EL1", .state = ARM_CP_STATE_AA64,
+      .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 0,
+      .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "DBGDSAR", .cp = 14, .crn = 2, .crm = 0, .opc1 = 0, .opc2 = 0,
+      .access = PL0_R, .type = ARM_CP_CONST, .resetvalue = 0 },
+    /* Dummy implementation of monitor debug system control register:
+     * we don't support debug. (The 32-bit alias is DBGDSCRext.)
+     */
+    { .name = "MDSCR_EL1", .state = ARM_CP_STATE_BOTH,
+      .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2,
+      .access = PL1_RW,
+      .fieldoffset = offsetof(CPUARMState, cp15.mdscr_el1),
+      .resetvalue = 0 },
+    /* We define a dummy WI OSLAR_EL1, because Linux writes to it. */
+    { .name = "OSLAR_EL1", .state = ARM_CP_STATE_BOTH,
+      .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 4,
+      .access = PL1_W, .type = ARM_CP_NOP },
+    REGINFO_SENTINEL
+};
+
+static const ARMCPRegInfo debug_lpae_cp_reginfo[] = {
+    /* 64 bit access versions of the (dummy) debug registers */
+    { .name = "DBGDRAR", .cp = 14, .crm = 1, .opc1 = 0,
+      .access = PL0_R, .type = ARM_CP_CONST|ARM_CP_64BIT, .resetvalue = 0 },
+    { .name = "DBGDSAR", .cp = 14, .crm = 2, .opc1 = 0,
+      .access = PL0_R, .type = ARM_CP_CONST|ARM_CP_64BIT, .resetvalue = 0 },
+    REGINFO_SENTINEL
+};
+
+static void define_debug_regs(ARMCPU *cpu)
 {
-    /* Define breakpoint and watchpoint registers. These do nothing
-     * but read as written, for now.
+    /* Define v7 and v8 architectural debug registers.
+     * These are just dummy implementations for now.
      */
     int i;
+    int wrps, brps;
+    ARMCPRegInfo dbgdidr = {
+        .name = "DBGDIDR", .cp = 14, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 0,
+        .access = PL0_R, .type = ARM_CP_CONST, .resetvalue = cpu->dbgdidr,
+    };
 
-    for (i = 0; i < 16; i++) {
+    brps = extract32(cpu->dbgdidr, 24, 4);
+    wrps = extract32(cpu->dbgdidr, 28, 4);
+
+    /* The DBGDIDR and ID_AA64DFR0_EL1 define various properties
+     * of the debug registers such as number of breakpoints;
+     * check that if they both exist then they agree.
+     */
+    if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
+        assert(extract32(cpu->id_aa64dfr0, 12, 4) == brps);
+        assert(extract32(cpu->id_aa64dfr0, 20, 4) == wrps);
+    }
+
+    define_one_arm_cp_reg(cpu, &dbgdidr);
+    define_arm_cp_regs(cpu, debug_cp_reginfo);
+
+    if (arm_feature(&cpu->env, ARM_FEATURE_LPAE)) {
+        define_arm_cp_regs(cpu, debug_lpae_cp_reginfo);
+    }
+
+    for (i = 0; i < brps + 1; i++) {
         ARMCPRegInfo dbgregs[] = {
-            { .name = "DBGBVR", .state = ARM_CP_STATE_AA64,
-              .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 4,
+            { .name = "DBGBVR", .state = ARM_CP_STATE_BOTH,
+              .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 4,
               .access = PL1_RW,
               .fieldoffset = offsetof(CPUARMState, cp15.dbgbvr[i]) },
-            { .name = "DBGBCR", .state = ARM_CP_STATE_AA64,
-              .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 5,
+            { .name = "DBGBCR", .state = ARM_CP_STATE_BOTH,
+              .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 5,
               .access = PL1_RW,
               .fieldoffset = offsetof(CPUARMState, cp15.dbgbcr[i]) },
-            { .name = "DBGWVR", .state = ARM_CP_STATE_AA64,
-              .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 6,
+            REGINFO_SENTINEL
+        };
+        define_arm_cp_regs(cpu, dbgregs);
+    }
+
+    for (i = 0; i < wrps + 1; i++) {
+        ARMCPRegInfo dbgregs[] = {
+            { .name = "DBGWVR", .state = ARM_CP_STATE_BOTH,
+              .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 6,
               .access = PL1_RW,
               .fieldoffset = offsetof(CPUARMState, cp15.dbgwvr[i]) },
-            { .name = "DBGWCR", .state = ARM_CP_STATE_AA64,
-              .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 7,
+            { .name = "DBGWCR", .state = ARM_CP_STATE_BOTH,
+              .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 7,
               .access = PL1_RW,
               .fieldoffset = offsetof(CPUARMState, cp15.dbgwcr[i]) },
                REGINFO_SENTINEL
@@ -2353,6 +2401,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
         };
         define_one_arm_cp_reg(cpu, &clidr);
         define_arm_cp_regs(cpu, v7_cp_reginfo);
+        define_debug_regs(cpu);
     } else {
         define_arm_cp_regs(cpu, not_v7_cp_reginfo);
     }
@@ -2426,7 +2475,6 @@ void register_cp_regs_for_features(ARMCPU *cpu)
         define_one_arm_cp_reg(cpu, &rvbar);
         define_arm_cp_regs(cpu, v8_idregs);
         define_arm_cp_regs(cpu, v8_cp_reginfo);
-        define_aarch64_debug_regs(cpu);
     }
     if (arm_feature(env, ARM_FEATURE_EL2)) {
         define_arm_cp_regs(cpu, v8_el2_cp_reginfo);
@@ -2779,9 +2827,11 @@ static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r,
         /* The AArch32 view of a shared register sees the lower 32 bits
          * of a 64 bit backing field. It is not migratable as the AArch64
          * view handles that. AArch64 also handles reset.
-         * We assume it is a cp15 register.
+         * We assume it is a cp15 register if the .cp field is left unset.
          */
-        r2->cp = 15;
+        if (r2->cp == 0) {
+            r2->cp = 15;
+        }
         r2->type |= ARM_CP_NO_MIGRATE;
         r2->resetfn = arm_cp_reset_ignore;
 #ifdef HOST_WORDS_BIGENDIAN
@@ -2794,8 +2844,11 @@ static void add_cpreg_to_hashtable(ARMCPU *cpu, const ARMCPRegInfo *r,
         /* To allow abbreviation of ARMCPRegInfo
          * definitions, we treat cp == 0 as equivalent to
          * the value for "standard guest-visible sysreg".
+         * STATE_BOTH definitions are also always "standard
+         * sysreg" in their AArch64 view (the .cp value may
+         * be non-zero for the benefit of the AArch32 view).
          */
-        if (r->cp == 0) {
+        if (r->cp == 0 || r->state == ARM_CP_STATE_BOTH) {
             r2->cp = CP_REG_ARM64_SYSREG_CP;
         }
         *key = ENCODE_AA64_CP_REG(r2->cp, r2->crn, crm,
@@ -3499,6 +3552,10 @@ void arm_cpu_do_interrupt(CPUState *cs)
         addr += env->cp15.vbar_el[1];
     }
     switch_mode (env, new_mode);
+    /* For exceptions taken to AArch32 we must clear the SS bit in both
+     * PSTATE and in the old-state value we save to SPSR_<mode>, so zero it now.
+     */
+    env->uncached_cpsr &= ~PSTATE_SS;
     env->spsr = cpsr_read(env);
     /* Clear IT bits.  */
     env->condexec_bits = 0;
diff --git a/target-arm/helper.h b/target-arm/helper.h
index facfcd2c11..1d7003b70a 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -64,6 +64,7 @@ DEF_HELPER_3(set_cp_reg64, void, env, ptr, i64)
 DEF_HELPER_2(get_cp_reg64, i64, env, ptr)
 
 DEF_HELPER_3(msr_i_pstate, void, env, i32, i32)
+DEF_HELPER_1(clear_pstate_ss, void, env)
 DEF_HELPER_1(exception_return, void, env)
 
 DEF_HELPER_2(get_r13_banked, i32, env, i32)
diff --git a/target-arm/internals.h b/target-arm/internals.h
index 08fa69757d..53c2e3cf3e 100644
--- a/target-arm/internals.h
+++ b/target-arm/internals.h
@@ -290,4 +290,10 @@ static inline uint32_t syn_data_abort(int same_el, int ea, int cm, int s1ptw,
         | (ea << 9) | (cm << 8) | (s1ptw << 7) | (wnr << 6) | fsc;
 }
 
+static inline uint32_t syn_swstep(int same_el, int isv, int ex)
+{
+    return (EC_SOFTWARESTEP << ARM_EL_EC_SHIFT) | (same_el << ARM_EL_EC_SHIFT)
+        | (isv << 24) | (ex << 6) | 0x22;
+}
+
 #endif
diff --git a/target-arm/kvm-consts.h b/target-arm/kvm-consts.h
index 6009a33f10..091c1267d6 100644
--- a/target-arm/kvm-consts.h
+++ b/target-arm/kvm-consts.h
@@ -17,6 +17,7 @@
 #ifdef CONFIG_KVM
 #include "qemu/compiler.h"
 #include <linux/kvm.h>
+#include <linux/psci.h>
 
 #define MISMATCH_CHECK(X, Y) QEMU_BUILD_BUG_ON(X != Y)
 
@@ -38,17 +39,43 @@ MISMATCH_CHECK(CP_REG_SIZE_U64, KVM_REG_SIZE_U64)
 MISMATCH_CHECK(CP_REG_ARM, KVM_REG_ARM)
 MISMATCH_CHECK(CP_REG_ARCH_MASK, KVM_REG_ARCH_MASK)
 
-#define PSCI_FN_BASE 0x95c1ba5e
-#define PSCI_FN(n) (PSCI_FN_BASE + (n))
-#define PSCI_FN_CPU_SUSPEND PSCI_FN(0)
-#define PSCI_FN_CPU_OFF PSCI_FN(1)
-#define PSCI_FN_CPU_ON PSCI_FN(2)
-#define PSCI_FN_MIGRATE PSCI_FN(3)
-
-MISMATCH_CHECK(PSCI_FN_CPU_SUSPEND, KVM_PSCI_FN_CPU_SUSPEND)
-MISMATCH_CHECK(PSCI_FN_CPU_OFF, KVM_PSCI_FN_CPU_OFF)
-MISMATCH_CHECK(PSCI_FN_CPU_ON, KVM_PSCI_FN_CPU_ON)
-MISMATCH_CHECK(PSCI_FN_MIGRATE, KVM_PSCI_FN_MIGRATE)
+#define QEMU_PSCI_0_1_FN_BASE 0x95c1ba5e
+#define QEMU_PSCI_0_1_FN(n) (QEMU_PSCI_0_1_FN_BASE + (n))
+#define QEMU_PSCI_0_1_FN_CPU_SUSPEND QEMU_PSCI_0_1_FN(0)
+#define QEMU_PSCI_0_1_FN_CPU_OFF QEMU_PSCI_0_1_FN(1)
+#define QEMU_PSCI_0_1_FN_CPU_ON QEMU_PSCI_0_1_FN(2)
+#define QEMU_PSCI_0_1_FN_MIGRATE QEMU_PSCI_0_1_FN(3)
+
+MISMATCH_CHECK(QEMU_PSCI_0_1_FN_CPU_SUSPEND, KVM_PSCI_FN_CPU_SUSPEND)
+MISMATCH_CHECK(QEMU_PSCI_0_1_FN_CPU_OFF, KVM_PSCI_FN_CPU_OFF)
+MISMATCH_CHECK(QEMU_PSCI_0_1_FN_CPU_ON, KVM_PSCI_FN_CPU_ON)
+MISMATCH_CHECK(QEMU_PSCI_0_1_FN_MIGRATE, KVM_PSCI_FN_MIGRATE)
+
+#define QEMU_PSCI_0_2_FN_BASE 0x84000000
+#define QEMU_PSCI_0_2_FN(n) (QEMU_PSCI_0_2_FN_BASE + (n))
+
+#define QEMU_PSCI_0_2_64BIT 0x40000000
+#define QEMU_PSCI_0_2_FN64_BASE \
+        (QEMU_PSCI_0_2_FN_BASE + QEMU_PSCI_0_2_64BIT)
+#define QEMU_PSCI_0_2_FN64(n) (QEMU_PSCI_0_2_FN64_BASE + (n))
+
+#define QEMU_PSCI_0_2_FN_CPU_SUSPEND QEMU_PSCI_0_2_FN(1)
+#define QEMU_PSCI_0_2_FN_CPU_OFF QEMU_PSCI_0_2_FN(2)
+#define QEMU_PSCI_0_2_FN_CPU_ON QEMU_PSCI_0_2_FN(3)
+#define QEMU_PSCI_0_2_FN_MIGRATE QEMU_PSCI_0_2_FN(5)
+
+#define QEMU_PSCI_0_2_FN64_CPU_SUSPEND QEMU_PSCI_0_2_FN64(1)
+#define QEMU_PSCI_0_2_FN64_CPU_OFF QEMU_PSCI_0_2_FN64(2)
+#define QEMU_PSCI_0_2_FN64_CPU_ON QEMU_PSCI_0_2_FN64(3)
+#define QEMU_PSCI_0_2_FN64_MIGRATE QEMU_PSCI_0_2_FN64(5)
+
+MISMATCH_CHECK(QEMU_PSCI_0_2_FN_CPU_SUSPEND, PSCI_0_2_FN_CPU_SUSPEND)
+MISMATCH_CHECK(QEMU_PSCI_0_2_FN_CPU_OFF, PSCI_0_2_FN_CPU_OFF)
+MISMATCH_CHECK(QEMU_PSCI_0_2_FN_CPU_ON, PSCI_0_2_FN_CPU_ON)
+MISMATCH_CHECK(QEMU_PSCI_0_2_FN_MIGRATE, PSCI_0_2_FN_MIGRATE)
+MISMATCH_CHECK(QEMU_PSCI_0_2_FN64_CPU_SUSPEND, PSCI_0_2_FN64_CPU_SUSPEND)
+MISMATCH_CHECK(QEMU_PSCI_0_2_FN64_CPU_ON, PSCI_0_2_FN64_CPU_ON)
+MISMATCH_CHECK(QEMU_PSCI_0_2_FN64_MIGRATE, PSCI_0_2_FN64_MIGRATE)
 
 /* Note that KVM uses overlapping values for AArch32 and AArch64
  * target CPU numbers. AArch32 targets:
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index 25ad902e04..fe40358c96 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -258,7 +258,7 @@ void HELPER(exception_with_syndrome)(CPUARMState *env, uint32_t excp,
 
 uint32_t HELPER(cpsr_read)(CPUARMState *env)
 {
-    return cpsr_read(env) & ~CPSR_EXEC;
+    return cpsr_read(env) & ~(CPSR_EXEC | CPSR_RESERVED);
 }
 
 void HELPER(cpsr_write)(CPUARMState *env, uint32_t val, uint32_t mask)
@@ -369,6 +369,11 @@ void HELPER(msr_i_pstate)(CPUARMState *env, uint32_t op, uint32_t imm)
     }
 }
 
+void HELPER(clear_pstate_ss)(CPUARMState *env)
+{
+    env->pstate &= ~PSTATE_SS;
+}
+
 void HELPER(exception_return)(CPUARMState *env)
 {
     int cur_el = arm_current_pl(env);
@@ -380,12 +385,26 @@ void HELPER(exception_return)(CPUARMState *env)
 
     env->exclusive_addr = -1;
 
+    /* We must squash the PSTATE.SS bit to zero unless both of the
+     * following hold:
+     *  1. debug exceptions are currently disabled
+     *  2. singlestep will be active in the EL we return to
+     * We check 1 here and 2 after we've done the pstate/cpsr write() to
+     * transition to the EL we're going to.
+     */
+    if (arm_generate_debug_exceptions(env)) {
+        spsr &= ~PSTATE_SS;
+    }
+
     if (spsr & PSTATE_nRW) {
         /* TODO: We currently assume EL1/2/3 are running in AArch64.  */
         env->aarch64 = 0;
         new_el = 0;
         env->uncached_cpsr = 0x10;
         cpsr_write(env, spsr, ~0);
+        if (!arm_singlestep_active(env)) {
+            env->uncached_cpsr &= ~PSTATE_SS;
+        }
         for (i = 0; i < 15; i++) {
             env->regs[i] = env->xregs[i];
         }
@@ -410,6 +429,9 @@ void HELPER(exception_return)(CPUARMState *env)
         }
         env->aarch64 = 1;
         pstate_write(env, spsr);
+        if (!arm_singlestep_active(env)) {
+            env->pstate &= ~PSTATE_SS;
+        }
         aarch64_restore_sp(env, new_el);
         env->pc = env->elr_el[cur_el];
     }
@@ -429,6 +451,9 @@ illegal_return:
     spsr &= PSTATE_NZCV | PSTATE_DAIF;
     spsr |= pstate_read(env) & ~(PSTATE_NZCV | PSTATE_DAIF);
     pstate_write(env, spsr);
+    if (!arm_singlestep_active(env)) {
+        env->pstate &= ~PSTATE_SS;
+    }
 }
 
 /* ??? Flag setting arithmetic is awkward because we need to do comparisons.
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index f04ca49631..8e66b6c972 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -205,10 +205,39 @@ static void gen_exception_insn(DisasContext *s, int offset, int excp,
     s->is_jmp = DISAS_EXC;
 }
 
+static void gen_ss_advance(DisasContext *s)
+{
+    /* If the singlestep state is Active-not-pending, advance to
+     * Active-pending.
+     */
+    if (s->ss_active) {
+        s->pstate_ss = 0;
+        gen_helper_clear_pstate_ss(cpu_env);
+    }
+}
+
+static void gen_step_complete_exception(DisasContext *s)
+{
+    /* We just completed step of an insn. Move from Active-not-pending
+     * to Active-pending, and then also take the swstep exception.
+     * This corresponds to making the (IMPDEF) choice to prioritize
+     * swstep exceptions over asynchronous exceptions taken to an exception
+     * level where debug is disabled. This choice has the advantage that
+     * we do not need to maintain internal state corresponding to the
+     * ISV/EX syndrome bits between completion of the step and generation
+     * of the exception, and our syndrome information is always correct.
+     */
+    gen_ss_advance(s);
+    gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex));
+    s->is_jmp = DISAS_EXC;
+}
+
 static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 {
-    /* No direct tb linking with singlestep or deterministic io */
-    if (s->singlestep_enabled || (s->tb->cflags & CF_LAST_IO)) {
+    /* No direct tb linking with singlestep (either QEMU's or the ARM
+     * debug architecture kind) or deterministic io
+     */
+    if (s->singlestep_enabled || s->ss_active || (s->tb->cflags & CF_LAST_IO)) {
         return false;
     }
 
@@ -232,11 +261,14 @@ static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
         s->is_jmp = DISAS_TB_JUMP;
     } else {
         gen_a64_set_pc_im(dest);
-        if (s->singlestep_enabled) {
+        if (s->ss_active) {
+            gen_step_complete_exception(s);
+        } else if (s->singlestep_enabled) {
             gen_exception_internal(EXCP_DEBUG);
+        } else {
+            tcg_gen_exit_tb(0);
+            s->is_jmp = DISAS_TB_JUMP;
         }
-        tcg_gen_exit_tb(0);
-        s->is_jmp = DISAS_JUMP;
     }
 }
 
@@ -1448,6 +1480,12 @@ static void disas_exc(DisasContext *s, uint32_t insn)
             unallocated_encoding(s);
             break;
         }
+        /* For SVC, HVC and SMC we advance the single-step state
+         * machine before taking the exception. This is architecturally
+         * mandated, to ensure that single-stepping a system call
+         * instruction works properly.
+         */
+        gen_ss_advance(s);
         gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16));
         break;
     case 1:
@@ -1456,7 +1494,7 @@ static void disas_exc(DisasContext *s, uint32_t insn)
             break;
         }
         /* BRK */
-        gen_exception_insn(s, 0, EXCP_BKPT, syn_aa64_bkpt(imm16));
+        gen_exception_insn(s, 4, EXCP_BKPT, syn_aa64_bkpt(imm16));
         break;
     case 2:
         if (op2_ll != 0) {
@@ -1728,6 +1766,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
 
     if (is_excl) {
         if (!is_store) {
+            s->is_ldex = true;
             gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
         } else {
             gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
@@ -10868,6 +10907,26 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
     dc->current_pl = arm_current_pl(env);
     dc->features = env->features;
 
+    /* Single step state. The code-generation logic here is:
+     *  SS_ACTIVE == 0:
+     *   generate code with no special handling for single-stepping (except
+     *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
+     *   this happens anyway because those changes are all system register or
+     *   PSTATE writes).
+     *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
+     *   emit code for one insn
+     *   emit code to clear PSTATE.SS
+     *   emit code to generate software step exception for completed step
+     *   end TB (as usual for having generated an exception)
+     *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
+     *   emit code to generate a software step exception
+     *   end the TB
+     */
+    dc->ss_active = ARM_TBFLAG_AA64_SS_ACTIVE(tb->flags);
+    dc->pstate_ss = ARM_TBFLAG_AA64_PSTATE_SS(tb->flags);
+    dc->is_ldex = false;
+    dc->ss_same_el = (arm_debug_target_el(env) == dc->current_pl);
+
     init_tmp_a64_array(dc);
 
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
@@ -10916,6 +10975,23 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
             tcg_gen_debug_insn_start(dc->pc);
         }
 
+        if (dc->ss_active && !dc->pstate_ss) {
+            /* Singlestep state is Active-pending.
+             * If we're in this state at the start of a TB then either
+             *  a) we just took an exception to an EL which is being debugged
+             *     and this is the first insn in the exception handler
+             *  b) debug exceptions were masked and we just unmasked them
+             *     without changing EL (eg by clearing PSTATE.D)
+             * In either case we're going to take a swstep exception in the
+             * "did not step an insn" case, and so the syndrome ISV and EX
+             * bits should be zero.
+             */
+            assert(num_insns == 0);
+            gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0));
+            dc->is_jmp = DISAS_EXC;
+            break;
+        }
+
         disas_a64_insn(env, dc);
 
         if (tcg_check_temp_count()) {
@@ -10932,6 +11008,7 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
     } while (!dc->is_jmp && tcg_ctx.gen_opc_ptr < gen_opc_end &&
              !cs->singlestep_enabled &&
              !singlestep &&
+             !dc->ss_active &&
              dc->pc < next_page_start &&
              num_insns < max_insns);
 
@@ -10939,7 +11016,8 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
         gen_io_end();
     }
 
-    if (unlikely(cs->singlestep_enabled) && dc->is_jmp != DISAS_EXC) {
+    if (unlikely(cs->singlestep_enabled || dc->ss_active)
+        && dc->is_jmp != DISAS_EXC) {
         /* Note that this means single stepping WFI doesn't halt the CPU.
          * For conditional branch insns this is harmless unreachable code as
          * gen_goto_tb() has already handled emitting the debug exception
@@ -10949,7 +11027,11 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu,
         if (dc->is_jmp != DISAS_JUMP) {
             gen_a64_set_pc_im(dc->pc);
         }
-        gen_exception_internal(EXCP_DEBUG);
+        if (cs->singlestep_enabled) {
+            gen_exception_internal(EXCP_DEBUG);
+        } else {
+            gen_step_complete_exception(dc);
+        }
     } else {
         switch (dc->is_jmp) {
         case DISAS_NEXT:
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 40121858d6..2c0b1deaea 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -205,6 +205,33 @@ static void gen_exception(int excp, uint32_t syndrome)
     tcg_temp_free_i32(tcg_excp);
 }
 
+static void gen_ss_advance(DisasContext *s)
+{
+    /* If the singlestep state is Active-not-pending, advance to
+     * Active-pending.
+     */
+    if (s->ss_active) {
+        s->pstate_ss = 0;
+        gen_helper_clear_pstate_ss(cpu_env);
+    }
+}
+
+static void gen_step_complete_exception(DisasContext *s)
+{
+    /* We just completed step of an insn. Move from Active-not-pending
+     * to Active-pending, and then also take the swstep exception.
+     * This corresponds to making the (IMPDEF) choice to prioritize
+     * swstep exceptions over asynchronous exceptions taken to an exception
+     * level where debug is disabled. This choice has the advantage that
+     * we do not need to maintain internal state corresponding to the
+     * ISV/EX syndrome bits between completion of the step and generation
+     * of the exception, and our syndrome information is always correct.
+     */
+    gen_ss_advance(s);
+    gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex));
+    s->is_jmp = DISAS_EXC;
+}
+
 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
 {
     TCGv_i32 tmp1 = tcg_temp_new_i32();
@@ -3860,7 +3887,7 @@ static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
 
 static inline void gen_jmp (DisasContext *s, uint32_t dest)
 {
-    if (unlikely(s->singlestep_enabled)) {
+    if (unlikely(s->singlestep_enabled || s->ss_active)) {
         /* An indirect jump so that we still trigger the debug exception.  */
         if (s->thumb)
             dest |= 1;
@@ -3908,9 +3935,10 @@ static uint32_t msr_mask(CPUARMState *env, DisasContext *s, int flags, int spsr)
         mask &= ~(CPSR_E | CPSR_GE);
     if (!arm_feature(env, ARM_FEATURE_THUMB2))
         mask &= ~CPSR_IT;
-    /* Mask out execution state bits.  */
-    if (!spsr)
-        mask &= ~CPSR_EXEC;
+    /* Mask out execution state and reserved bits.  */
+    if (!spsr) {
+        mask &= ~(CPSR_EXEC | CPSR_RESERVED);
+    }
     /* Mask out privileged bits.  */
     if (IS_USER(s))
         mask &= CPSR_USER;
@@ -3954,7 +3982,7 @@ static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
     TCGv_i32 tmp;
     store_reg(s, 15, pc);
     tmp = load_cpu_field(spsr);
-    gen_set_cpsr(tmp, 0xffffffff);
+    gen_set_cpsr(tmp, CPSR_ERET_MASK);
     tcg_temp_free_i32(tmp);
     s->is_jmp = DISAS_UPDATE;
 }
@@ -3962,7 +3990,7 @@ static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
 /* Generate a v6 exception return.  Marks both values as dead.  */
 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
 {
-    gen_set_cpsr(cpsr, 0xffffffff);
+    gen_set_cpsr(cpsr, CPSR_ERET_MASK);
     tcg_temp_free_i32(cpsr);
     store_reg(s, 15, pc);
     s->is_jmp = DISAS_UPDATE;
@@ -7280,6 +7308,8 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
 {
     TCGv_i32 tmp = tcg_temp_new_i32();
 
+    s->is_ldex = true;
+
     switch (size) {
     case 0:
         gen_aa32_ld8u(tmp, addr, get_mem_index(s));
@@ -8836,7 +8866,7 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
                 if ((insn & (1 << 22)) && !user) {
                     /* Restore CPSR from SPSR.  */
                     tmp = load_cpu_field(spsr);
-                    gen_set_cpsr(tmp, 0xffffffff);
+                    gen_set_cpsr(tmp, CPSR_ERET_MASK);
                     tcg_temp_free_i32(tmp);
                     s->is_jmp = DISAS_UPDATE;
                 }
@@ -10916,6 +10946,26 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
     dc->current_pl = arm_current_pl(env);
     dc->features = env->features;
 
+    /* Single step state. The code-generation logic here is:
+     *  SS_ACTIVE == 0:
+     *   generate code with no special handling for single-stepping (except
+     *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
+     *   this happens anyway because those changes are all system register or
+     *   PSTATE writes).
+     *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
+     *   emit code for one insn
+     *   emit code to clear PSTATE.SS
+     *   emit code to generate software step exception for completed step
+     *   end TB (as usual for having generated an exception)
+     *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
+     *   emit code to generate a software step exception
+     *   end the TB
+     */
+    dc->ss_active = ARM_TBFLAG_SS_ACTIVE(tb->flags);
+    dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(tb->flags);
+    dc->is_ldex = false;
+    dc->ss_same_el = false; /* Can't be true since EL_d must be AArch64 */
+
     cpu_F0s = tcg_temp_new_i32();
     cpu_F1s = tcg_temp_new_i32();
     cpu_F0d = tcg_temp_new_i64();
@@ -11025,6 +11075,22 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
             tcg_gen_debug_insn_start(dc->pc);
         }
 
+        if (dc->ss_active && !dc->pstate_ss) {
+            /* Singlestep state is Active-pending.
+             * If we're in this state at the start of a TB then either
+             *  a) we just took an exception to an EL which is being debugged
+             *     and this is the first insn in the exception handler
+             *  b) debug exceptions were masked and we just unmasked them
+             *     without changing EL (eg by clearing PSTATE.D)
+             * In either case we're going to take a swstep exception in the
+             * "did not step an insn" case, and so the syndrome ISV and EX
+             * bits should be zero.
+             */
+            assert(num_insns == 0);
+            gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0));
+            goto done_generating;
+        }
+
         if (dc->thumb) {
             disas_thumb_insn(env, dc);
             if (dc->condexec_mask) {
@@ -11057,6 +11123,7 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
     } while (!dc->is_jmp && tcg_ctx.gen_opc_ptr < gen_opc_end &&
              !cs->singlestep_enabled &&
              !singlestep &&
+             !dc->ss_active &&
              dc->pc < next_page_start &&
              num_insns < max_insns);
 
@@ -11072,12 +11139,15 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
     /* At this stage dc->condjmp will only be set when the skipped
        instruction was a conditional branch or trap, and the PC has
        already been written.  */
-    if (unlikely(cs->singlestep_enabled)) {
+    if (unlikely(cs->singlestep_enabled || dc->ss_active)) {
         /* Make sure the pc is updated, and raise a debug exception.  */
         if (dc->condjmp) {
             gen_set_condexec(dc);
             if (dc->is_jmp == DISAS_SWI) {
+                gen_ss_advance(dc);
                 gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
+            } else if (dc->ss_active) {
+                gen_step_complete_exception(dc);
             } else {
                 gen_exception_internal(EXCP_DEBUG);
             }
@@ -11089,7 +11159,10 @@ static inline void gen_intermediate_code_internal(ARMCPU *cpu,
         }
         gen_set_condexec(dc);
         if (dc->is_jmp == DISAS_SWI && !dc->condjmp) {
+            gen_ss_advance(dc);
             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
+        } else if (dc->ss_active) {
+            gen_step_complete_exception(dc);
         } else {
             /* FIXME: Single stepping a WFI insn will not halt
                the CPU.  */
diff --git a/target-arm/translate.h b/target-arm/translate.h
index 31a0104b58..b90d27514d 100644
--- a/target-arm/translate.h
+++ b/target-arm/translate.h
@@ -40,6 +40,18 @@ typedef struct DisasContext {
      * that it is set at the point where we actually touch the FP regs.
      */
     bool fp_access_checked;
+    /* ARMv8 single-step state (this is distinct from the QEMU gdbstub
+     * single-step support).
+     */
+    bool ss_active;
+    bool pstate_ss;
+    /* True if the insn just emitted was a load-exclusive instruction
+     * (necessary for syndrome information for single step exceptions),
+     * ie A64 LDX*, LDAX*, A32/T32 LDREX*, LDAEX*.
+     */
+    bool is_ldex;
+    /* True if a single-step exception will be taken to the current EL */
+    bool ss_same_el;
 #define TMP_A64_MAX 16
     int tmp_a64_count;
     TCGv_i64 tmp_a64[TMP_A64_MAX];
diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c
index 2d970d0cb9..13eefbac3b 100644
--- a/target-i386/seg_helper.c
+++ b/target-i386/seg_helper.c
@@ -1127,8 +1127,8 @@ static void do_interrupt_user(CPUX86State *env, int intno, int is_int,
 
     /* Since we emulate only user space, we cannot do more than
        exiting the emulation with the suitable exception and error
-       code */
-    if (is_int) {
+       code. So update EIP for INT 0x80 and EXCP_SYSCALL. */
+    if (is_int || intno == EXCP_SYSCALL) {
         env->eip = next_eip;
     }
 }