summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--arch_init.c3
-rw-r--r--docs/q35-chipset.cfg23
-rw-r--r--hw/core/loader.c49
-rw-r--r--hw/i386/pc_piix.c129
-rw-r--r--hw/i386/pc_q35.c53
-rw-r--r--hw/ppc/e500.c35
-rw-r--r--hw/ppc/e500.h13
-rw-r--r--hw/ppc/e500plat.c14
-rw-r--r--hw/ppc/mpc8544ds.c14
-rw-r--r--hw/sparc/sun4m.c113
-rw-r--r--hw/sparc64/sun4u.c52
-rw-r--r--hw/virtio/virtio.c2
-rw-r--r--include/exec/exec-all.h19
-rw-r--r--include/exec/softmmu_defs.h46
-rw-r--r--include/exec/softmmu_template.h309
-rw-r--r--include/hw/loader.h1
-rw-r--r--target-mips/helper.c2
-rw-r--r--tcg/i386/tcg-target.c274
-rw-r--r--tcg/tcg.c17
19 files changed, 530 insertions, 638 deletions
diff --git a/arch_init.c b/arch_init.c
index fdd5243770..0471cd5a6b 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -342,7 +342,8 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
 {
     unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
     unsigned long nr = base + (start >> TARGET_PAGE_BITS);
-    unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS);
+    uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr));
+    unsigned long size = base + (mr_size >> TARGET_PAGE_BITS);
 
     unsigned long next;
 
diff --git a/docs/q35-chipset.cfg b/docs/q35-chipset.cfg
index 1b6efc0f2c..e4ddb7d9cc 100644
--- a/docs/q35-chipset.cfg
+++ b/docs/q35-chipset.cfg
@@ -91,6 +91,29 @@
   port = "4"
   chassis = "4"
 
+##
+# Example PCIe switch with two downstream ports
+#
+#[device "pcie-switch-upstream-port-1"]
+#  driver = "x3130-upstream"
+#  bus = "ich9-pcie-port-4"
+#  addr = "00.0"
+#
+#[device "pcie-switch-downstream-port-1-1"]
+#  driver = "xio3130-downstream"
+#  multifunction = "on"
+#  bus = "pcie-switch-upstream-port-1"
+#  addr = "00.0"
+#  port = "1"
+#  chassis = "5"
+#
+#[device "pcie-switch-downstream-port-1-2"]
+#  driver = "xio3130-downstream"
+#  multifunction = "on"
+#  bus = "pcie-switch-upstream-port-1"
+#  addr = "00.1"
+#  port = "1"
+#  chassis = "6"
 
 [device "ich9-ehci-1"]
   driver = "ich9-usb-ehci1"
diff --git a/hw/core/loader.c b/hw/core/loader.c
index 6875b7ecf3..7b3d3ee6a0 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -54,6 +54,8 @@
 
 #include <zlib.h>
 
+bool rom_file_in_ram = true;
+
 static int roms_loaded;
 
 /* return the size or -1 if error */
@@ -576,6 +578,7 @@ struct Rom {
     size_t datasize;
 
     uint8_t *data;
+    MemoryRegion *mr;
     int isrom;
     char *fw_dir;
     char *fw_file;
@@ -605,6 +608,21 @@ static void rom_insert(Rom *rom)
     QTAILQ_INSERT_TAIL(&roms, rom, next);
 }
 
+static void *rom_set_mr(Rom *rom, Object *owner, const char *name)
+{
+    void *data;
+
+    rom->mr = g_malloc(sizeof(*rom->mr));
+    memory_region_init_ram(rom->mr, owner, name, rom->datasize);
+    memory_region_set_readonly(rom->mr, true);
+    vmstate_register_ram_global(rom->mr);
+
+    data = memory_region_get_ram_ptr(rom->mr);
+    memcpy(data, rom->data, rom->datasize);
+
+    return data;
+}
+
 int rom_add_file(const char *file, const char *fw_dir,
                  hwaddr addr, int32_t bootindex)
 {
@@ -646,6 +664,7 @@ int rom_add_file(const char *file, const char *fw_dir,
     if (rom->fw_file && fw_cfg) {
         const char *basename;
         char fw_file_name[56];
+        void *data;
 
         basename = strrchr(rom->fw_file, '/');
         if (basename) {
@@ -655,8 +674,15 @@ int rom_add_file(const char *file, const char *fw_dir,
         }
         snprintf(fw_file_name, sizeof(fw_file_name), "%s/%s", rom->fw_dir,
                  basename);
-        fw_cfg_add_file(fw_cfg, fw_file_name, rom->data, rom->romsize);
         snprintf(devpath, sizeof(devpath), "/rom@%s", fw_file_name);
+
+        if (rom_file_in_ram) {
+            data = rom_set_mr(rom, OBJECT(fw_cfg), devpath);
+        } else {
+            data = rom->data;
+        }
+
+        fw_cfg_add_file(fw_cfg, fw_file_name, data, rom->romsize);
     } else {
         snprintf(devpath, sizeof(devpath), "/rom@" TARGET_FMT_plx, addr);
     }
@@ -731,7 +757,12 @@ static void rom_reset(void *unused)
         if (rom->data == NULL) {
             continue;
         }
-        cpu_physical_memory_write_rom(rom->addr, rom->data, rom->datasize);
+        if (rom->mr) {
+            void *host = memory_region_get_ram_ptr(rom->mr);
+            memcpy(host, rom->data, rom->datasize);
+        } else {
+            cpu_physical_memory_write_rom(rom->addr, rom->data, rom->datasize);
+        }
         if (rom->isrom) {
             /* rom needs to be written only once */
             g_free(rom->data);
@@ -781,6 +812,9 @@ static Rom *find_rom(hwaddr addr)
         if (rom->fw_file) {
             continue;
         }
+        if (rom->mr) {
+            continue;
+        }
         if (rom->addr > addr) {
             continue;
         }
@@ -808,6 +842,9 @@ int rom_copy(uint8_t *dest, hwaddr addr, size_t size)
         if (rom->fw_file) {
             continue;
         }
+        if (rom->mr) {
+            continue;
+        }
         if (rom->addr + rom->romsize < addr) {
             continue;
         }
@@ -866,7 +903,13 @@ void do_info_roms(Monitor *mon, const QDict *qdict)
     Rom *rom;
 
     QTAILQ_FOREACH(rom, &roms, next) {
-        if (!rom->fw_file) {
+        if (rom->mr) {
+            monitor_printf(mon, "%s"
+                           " size=0x%06zx name=\"%s\"\n",
+                           rom->mr->name,
+                           rom->romsize,
+                           rom->name);
+        } else if (!rom->fw_file) {
             monitor_printf(mon, "addr=" TARGET_FMT_plx
                            " size=0x%06zx mem=%s name=\"%s\"\n",
                            rom->addr, rom->romsize,
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 6e1e654f3f..3c36a2a1c3 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -25,6 +25,7 @@
 #include <glib.h>
 
 #include "hw/hw.h"
+#include "hw/loader.h"
 #include "hw/i386/pc.h"
 #include "hw/i386/apic.h"
 #include "hw/pci/pci.h"
@@ -60,17 +61,12 @@ static bool has_pvpanic;
 static bool has_pci_info = true;
 
 /* PC hardware initialisation */
-static void pc_init1(MemoryRegion *system_memory,
-                     MemoryRegion *system_io,
-                     ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename,
-                     const char *kernel_cmdline,
-                     const char *initrd_filename,
-                     const char *cpu_model,
+static void pc_init1(QEMUMachineInitArgs *args,
                      int pci_enabled,
                      int kvmclock_enabled)
 {
+    MemoryRegion *system_memory = get_system_memory();
+    MemoryRegion *system_io = get_system_io();
     int i;
     ram_addr_t below_4g_mem_size, above_4g_mem_size;
     PCIBus *pci_bus;
@@ -102,18 +98,18 @@ static void pc_init1(MemoryRegion *system_memory,
     object_property_add_child(qdev_get_machine(), "icc-bridge",
                               OBJECT(icc_bridge), NULL);
 
-    pc_cpus_init(cpu_model, icc_bridge);
+    pc_cpus_init(args->cpu_model, icc_bridge);
 
     if (kvm_enabled() && kvmclock_enabled) {
         kvmclock_create();
     }
 
-    if (ram_size >= 0xe0000000 ) {
-        above_4g_mem_size = ram_size - 0xe0000000;
+    if (args->ram_size >= 0xe0000000) {
+        above_4g_mem_size = args->ram_size - 0xe0000000;
         below_4g_mem_size = 0xe0000000;
     } else {
         above_4g_mem_size = 0;
-        below_4g_mem_size = ram_size;
+        below_4g_mem_size = args->ram_size;
     }
 
     if (pci_enabled) {
@@ -132,7 +128,8 @@ static void pc_init1(MemoryRegion *system_memory,
     /* allocate ram and load rom/bios */
     if (!xen_enabled()) {
         fw_cfg = pc_memory_init(system_memory,
-                       kernel_filename, kernel_cmdline, initrd_filename,
+                       args->kernel_filename, args->kernel_cmdline,
+                       args->initrd_filename,
                        below_4g_mem_size, above_4g_mem_size,
                        rom_memory, &ram_memory, guest_info);
     }
@@ -148,7 +145,7 @@ static void pc_init1(MemoryRegion *system_memory,
 
     if (pci_enabled) {
         pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, &isa_bus, gsi,
-                              system_memory, system_io, ram_size,
+                              system_memory, system_io, args->ram_size,
                               below_4g_mem_size,
                               0x100000000ULL - below_4g_mem_size,
                               above_4g_mem_size,
@@ -207,7 +204,7 @@ static void pc_init1(MemoryRegion *system_memory,
         }
     }
 
-    pc_cmos_init(below_4g_mem_size, above_4g_mem_size, boot_device,
+    pc_cmos_init(below_4g_mem_size, above_4g_mem_size, args->boot_device,
                  floppy, idebus[0], idebus[1], rtc_state);
 
     if (pci_enabled && usb_enabled(false)) {
@@ -236,95 +233,91 @@ static void pc_init1(MemoryRegion *system_memory,
 
 static void pc_init_pci(QEMUMachineInitArgs *args)
 {
-    ram_addr_t ram_size = args->ram_size;
-    const char *cpu_model = args->cpu_model;
-    const char *kernel_filename = args->kernel_filename;
-    const char *kernel_cmdline = args->kernel_cmdline;
-    const char *initrd_filename = args->initrd_filename;
-    const char *boot_device = args->boot_device;
-    pc_init1(get_system_memory(),
-             get_system_io(),
-             ram_size, boot_device,
-             kernel_filename, kernel_cmdline,
-             initrd_filename, cpu_model, 1, 1);
+    pc_init1(args, 1, 1);
 }
 
-static void pc_init_pci_1_6(QEMUMachineInitArgs *args)
+static void pc_compat_1_6(QEMUMachineInitArgs *args)
 {
     has_pci_info = false;
-    pc_init_pci(args);
+    rom_file_in_ram = false;
 }
 
-static void pc_init_pci_1_5(QEMUMachineInitArgs *args)
+static void pc_compat_1_5(QEMUMachineInitArgs *args)
 {
+    pc_compat_1_6(args);
     has_pvpanic = true;
-    pc_init_pci_1_6(args);
 }
 
-static void pc_init_pci_1_4(QEMUMachineInitArgs *args)
+static void pc_compat_1_4(QEMUMachineInitArgs *args)
 {
+    pc_compat_1_5(args);
+    has_pvpanic = false;
     x86_cpu_compat_set_features("n270", FEAT_1_ECX, 0, CPUID_EXT_MOVBE);
     x86_cpu_compat_set_features("Westmere", FEAT_1_ECX, 0, CPUID_EXT_PCLMULQDQ);
-    has_pci_info = false;
-    pc_init_pci(args);
 }
 
-static void pc_init_pci_1_3(QEMUMachineInitArgs *args)
+static void pc_compat_1_3(QEMUMachineInitArgs *args)
 {
+    pc_compat_1_4(args);
     enable_compat_apic_id_mode();
-    pc_init_pci_1_4(args);
 }
 
-/* PC machine init function for pc-1.1 to pc-1.2 */
-static void pc_init_pci_1_2(QEMUMachineInitArgs *args)
+/* PC compat function for pc-0.14 to pc-1.2 */
+static void pc_compat_1_2(QEMUMachineInitArgs *args)
 {
+    pc_compat_1_3(args);
     disable_kvm_pv_eoi();
-    pc_init_pci_1_3(args);
 }
 
-/* PC machine init function for pc-0.14 to pc-1.0 */
-static void pc_init_pci_1_0(QEMUMachineInitArgs *args)
+static void pc_init_pci_1_6(QEMUMachineInitArgs *args)
+{
+    pc_compat_1_6(args);
+    pc_init_pci(args);
+}
+
+static void pc_init_pci_1_5(QEMUMachineInitArgs *args)
+{
+    pc_compat_1_5(args);
+    pc_init_pci(args);
+}
+
+static void pc_init_pci_1_4(QEMUMachineInitArgs *args)
 {
-    pc_init_pci_1_2(args);
+    pc_compat_1_4(args);
+    pc_init_pci(args);
+}
+
+static void pc_init_pci_1_3(QEMUMachineInitArgs *args)
+{
+    pc_compat_1_3(args);
+    pc_init_pci(args);
+}
+
+/* PC machine init function for pc-0.14 to pc-1.2 */
+static void pc_init_pci_1_2(QEMUMachineInitArgs *args)
+{
+    pc_compat_1_2(args);
+    pc_init_pci(args);
 }
 
 /* PC init function for pc-0.10 to pc-0.13, and reused by xenfv */
 static void pc_init_pci_no_kvmclock(QEMUMachineInitArgs *args)
 {
-    ram_addr_t ram_size = args->ram_size;
-    const char *cpu_model = args->cpu_model;
-    const char *kernel_filename = args->kernel_filename;
-    const char *kernel_cmdline = args->kernel_cmdline;
-    const char *initrd_filename = args->initrd_filename;
-    const char *boot_device = args->boot_device;
     has_pci_info = false;
     disable_kvm_pv_eoi();
     enable_compat_apic_id_mode();
-    pc_init1(get_system_memory(),
-             get_system_io(),
-             ram_size, boot_device,
-             kernel_filename, kernel_cmdline,
-             initrd_filename, cpu_model, 1, 0);
+    pc_init1(args, 1, 0);
 }
 
 static void pc_init_isa(QEMUMachineInitArgs *args)
 {
-    ram_addr_t ram_size = args->ram_size;
-    const char *cpu_model = args->cpu_model;
-    const char *kernel_filename = args->kernel_filename;
-    const char *kernel_cmdline = args->kernel_cmdline;
-    const char *initrd_filename = args->initrd_filename;
-    const char *boot_device = args->boot_device;
     has_pci_info = false;
-    if (cpu_model == NULL)
-        cpu_model = "486";
+    if (!args->cpu_model) {
+        args->cpu_model = "486";
+    }
     disable_kvm_pv_eoi();
     enable_compat_apic_id_mode();
-    pc_init1(get_system_memory(),
-             get_system_io(),
-             ram_size, boot_device,
-             kernel_filename, kernel_cmdline,
-             initrd_filename, cpu_model, 0, 1);
+    pc_init1(args, 0, 1);
 }
 
 #ifdef CONFIG_XEN
@@ -516,7 +509,7 @@ static QEMUMachine pc_machine_v1_1 = {
 static QEMUMachine pc_machine_v1_0 = {
     .name = "pc-1.0",
     .desc = "Standard PC",
-    .init = pc_init_pci_1_0,
+    .init = pc_init_pci_1_2,
     .max_cpus = 255,
     .compat_props = (GlobalProperty[]) {
         PC_COMPAT_1_0,
@@ -532,7 +525,7 @@ static QEMUMachine pc_machine_v1_0 = {
 static QEMUMachine pc_machine_v0_15 = {
     .name = "pc-0.15",
     .desc = "Standard PC",
-    .init = pc_init_pci_1_0,
+    .init = pc_init_pci_1_2,
     .max_cpus = 255,
     .compat_props = (GlobalProperty[]) {
         PC_COMPAT_0_15,
@@ -565,7 +558,7 @@ static QEMUMachine pc_machine_v0_15 = {
 static QEMUMachine pc_machine_v0_14 = {
     .name = "pc-0.14",
     .desc = "Standard PC",
-    .init = pc_init_pci_1_0,
+    .init = pc_init_pci_1_2,
     .max_cpus = 255,
     .compat_props = (GlobalProperty[]) {
         PC_COMPAT_0_14, 
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 10e770e362..198c7851b3 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -28,6 +28,7 @@
  * THE SOFTWARE.
  */
 #include "hw/hw.h"
+#include "hw/loader.h"
 #include "sysemu/arch_init.h"
 #include "hw/i2c/smbus.h"
 #include "hw/boards.h"
@@ -52,12 +53,6 @@ static bool has_pci_info = true;
 /* PC hardware initialisation */
 static void pc_q35_init(QEMUMachineInitArgs *args)
 {
-    ram_addr_t ram_size = args->ram_size;
-    const char *cpu_model = args->cpu_model;
-    const char *kernel_filename = args->kernel_filename;
-    const char *kernel_cmdline = args->kernel_cmdline;
-    const char *initrd_filename = args->initrd_filename;
-    const char *boot_device = args->boot_device;
     ram_addr_t below_4g_mem_size, above_4g_mem_size;
     Q35PCIHost *q35_host;
     PCIHostState *phb;
@@ -85,17 +80,17 @@ static void pc_q35_init(QEMUMachineInitArgs *args)
     object_property_add_child(qdev_get_machine(), "icc-bridge",
                               OBJECT(icc_bridge), NULL);
 
-    pc_cpus_init(cpu_model, icc_bridge);
+    pc_cpus_init(args->cpu_model, icc_bridge);
     pc_acpi_init("q35-acpi-dsdt.aml");
 
     kvmclock_create();
 
-    if (ram_size >= 0xb0000000) {
-        above_4g_mem_size = ram_size - 0xb0000000;
+    if (args->ram_size >= 0xb0000000) {
+        above_4g_mem_size = args->ram_size - 0xb0000000;
         below_4g_mem_size = 0xb0000000;
     } else {
         above_4g_mem_size = 0;
-        below_4g_mem_size = ram_size;
+        below_4g_mem_size = args->ram_size;
     }
 
     /* pci enabled */
@@ -114,8 +109,10 @@ static void pc_q35_init(QEMUMachineInitArgs *args)
 
     /* allocate ram and load rom/bios */
     if (!xen_enabled()) {
-        pc_memory_init(get_system_memory(), kernel_filename, kernel_cmdline,
-                       initrd_filename, below_4g_mem_size, above_4g_mem_size,
+        pc_memory_init(get_system_memory(),
+                       args->kernel_filename, args->kernel_cmdline,
+                       args->initrd_filename,
+                       below_4g_mem_size, above_4g_mem_size,
                        rom_memory, &ram_memory, guest_info);
     }
 
@@ -203,7 +200,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args)
                                     0xb100),
                       8, NULL, 0);
 
-    pc_cmos_init(below_4g_mem_size, above_4g_mem_size, boot_device,
+    pc_cmos_init(below_4g_mem_size, above_4g_mem_size, args->boot_device,
                  floppy, idebus[0], idebus[1], rtc_state);
 
     /* the rest devices to which pci devfn is automatically assigned */
@@ -218,23 +215,41 @@ static void pc_q35_init(QEMUMachineInitArgs *args)
     }
 }
 
-static void pc_q35_init_1_6(QEMUMachineInitArgs *args)
+static void pc_compat_1_6(QEMUMachineInitArgs *args)
 {
     has_pci_info = false;
-    pc_q35_init(args);
+    rom_file_in_ram = false;
 }
 
-static void pc_q35_init_1_5(QEMUMachineInitArgs *args)
+static void pc_compat_1_5(QEMUMachineInitArgs *args)
 {
+    pc_compat_1_6(args);
     has_pvpanic = true;
-    pc_q35_init_1_6(args);
 }
 
-static void pc_q35_init_1_4(QEMUMachineInitArgs *args)
+static void pc_compat_1_4(QEMUMachineInitArgs *args)
 {
+    pc_compat_1_5(args);
+    has_pvpanic = false;
     x86_cpu_compat_set_features("n270", FEAT_1_ECX, 0, CPUID_EXT_MOVBE);
     x86_cpu_compat_set_features("Westmere", FEAT_1_ECX, 0, CPUID_EXT_PCLMULQDQ);
-    has_pci_info = false;
+}
+
+static void pc_q35_init_1_6(QEMUMachineInitArgs *args)
+{
+    pc_compat_1_6(args);
+    pc_q35_init(args);
+}
+
+static void pc_q35_init_1_5(QEMUMachineInitArgs *args)
+{
+    pc_compat_1_5(args);
+    pc_q35_init(args);
+}
+
+static void pc_q35_init_1_4(QEMUMachineInitArgs *args)
+{
+    pc_compat_1_4(args);
     pc_q35_init(args);
 }
 
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index f00a62a1ca..e79612b0e9 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -124,13 +124,14 @@ static void dt_serial_create(void *fdt, unsigned long long offset,
 }
 
 static int ppce500_load_device_tree(CPUPPCState *env,
+                                    QEMUMachineInitArgs *args,
                                     PPCE500Params *params,
                                     hwaddr addr,
                                     hwaddr initrd_base,
                                     hwaddr initrd_size)
 {
     int ret = -1;
-    uint64_t mem_reg_property[] = { 0, cpu_to_be64(params->ram_size) };
+    uint64_t mem_reg_property[] = { 0, cpu_to_be64(args->ram_size) };
     int fdt_size;
     void *fdt;
     uint8_t hypercall[16];
@@ -205,7 +206,7 @@ static int ppce500_load_device_tree(CPUPPCState *env,
     }
 
     ret = qemu_devtree_setprop_string(fdt, "/chosen", "bootargs",
-                                      params->kernel_cmdline);
+                                      args->kernel_cmdline);
     if (ret < 0)
         fprintf(stderr, "couldn't set /chosen/bootargs\n");
 
@@ -559,7 +560,7 @@ static qemu_irq *ppce500_init_mpic(PPCE500Params *params, MemoryRegion *ccsr,
     return mpic;
 }
 
-void ppce500_init(PPCE500Params *params)
+void ppce500_init(QEMUMachineInitArgs *args, PPCE500Params *params)
 {
     MemoryRegion *address_space_mem = get_system_memory();
     MemoryRegion *ram = g_new(MemoryRegion, 1);
@@ -584,8 +585,8 @@ void ppce500_init(PPCE500Params *params)
     PPCE500CCSRState *ccsr;
 
     /* Setup CPUs */
-    if (params->cpu_model == NULL) {
-        params->cpu_model = "e500v2_v30";
+    if (args->cpu_model == NULL) {
+        args->cpu_model = "e500v2_v30";
     }
 
     irqs = g_malloc0(smp_cpus * sizeof(qemu_irq *));
@@ -595,7 +596,7 @@ void ppce500_init(PPCE500Params *params)
         CPUState *cs;
         qemu_irq *input;
 
-        cpu = cpu_ppc_init(params->cpu_model);
+        cpu = cpu_ppc_init(args->cpu_model);
         if (cpu == NULL) {
             fprintf(stderr, "Unable to initialize CPU!\n");
             exit(1);
@@ -634,7 +635,7 @@ void ppce500_init(PPCE500Params *params)
 
     /* Fixup Memory size on a alignment boundary */
     ram_size &= ~(RAM_SIZES_ALIGN - 1);
-    params->ram_size = ram_size;
+    args->ram_size = ram_size;
 
     /* Register Memory */
     memory_region_init_ram(ram, NULL, "mpc8544ds.ram", ram_size);
@@ -701,11 +702,11 @@ void ppce500_init(PPCE500Params *params)
     sysbus_create_simple("e500-spin", MPC8544_SPIN_BASE, NULL);
 
     /* Load kernel. */
-    if (params->kernel_filename) {
-        kernel_size = load_uimage(params->kernel_filename, &entry,
+    if (args->kernel_filename) {
+        kernel_size = load_uimage(args->kernel_filename, &entry,
                                   &loadaddr, NULL);
         if (kernel_size < 0) {
-            kernel_size = load_elf(params->kernel_filename, NULL, NULL,
+            kernel_size = load_elf(args->kernel_filename, NULL, NULL,
                                    &elf_entry, &elf_lowaddr, NULL, 1,
                                    ELF_MACHINE, 0);
             entry = elf_entry;
@@ -714,7 +715,7 @@ void ppce500_init(PPCE500Params *params)
         /* XXX try again as binary */
         if (kernel_size < 0) {
             fprintf(stderr, "qemu: could not load kernel '%s'\n",
-                    params->kernel_filename);
+                    args->kernel_filename);
             exit(1);
         }
 
@@ -726,14 +727,14 @@ void ppce500_init(PPCE500Params *params)
     }
 
     /* Load initrd. */
-    if (params->initrd_filename) {
+    if (args->initrd_filename) {
         initrd_base = (cur_base + INITRD_LOAD_PAD) & ~INITRD_PAD_MASK;
-        initrd_size = load_image_targphys(params->initrd_filename, initrd_base,
+        initrd_size = load_image_targphys(args->initrd_filename, initrd_base,
                                           ram_size - initrd_base);
 
         if (initrd_size < 0) {
             fprintf(stderr, "qemu: could not load initial ram disk '%s'\n",
-                    params->initrd_filename);
+                    args->initrd_filename);
             exit(1);
         }
 
@@ -741,12 +742,12 @@ void ppce500_init(PPCE500Params *params)
     }
 
     /* If we're loading a kernel directly, we must load the device tree too. */
-    if (params->kernel_filename) {
+    if (args->kernel_filename) {
         struct boot_info *boot_info;
         int dt_size;
 
-        dt_size = ppce500_load_device_tree(env, params, dt_base, initrd_base,
-                                           initrd_size);
+        dt_size = ppce500_load_device_tree(env, args, params, dt_base,
+                                           initrd_base, initrd_size);
         if (dt_size < 0) {
             fprintf(stderr, "couldn't load device tree\n");
             exit(1);
diff --git a/hw/ppc/e500.h b/hw/ppc/e500.h
index 226c93d248..52726a2ec0 100644
--- a/hw/ppc/e500.h
+++ b/hw/ppc/e500.h
@@ -1,25 +1,18 @@
 #ifndef PPCE500_H
 #define PPCE500_H
 
+#include "hw/boards.h"
+
 typedef struct PPCE500Params {
-    /* Standard QEMU machine init params */
-    ram_addr_t ram_size;
-    const char *boot_device;
-    const char *kernel_filename;
-    const char *kernel_cmdline;
-    const char *initrd_filename;
-    const char *cpu_model;
     int pci_first_slot;
     int pci_nr_slots;
 
-    /* e500-specific params */
-
     /* required -- must at least add toplevel board compatible */
     void (*fixup_devtree)(struct PPCE500Params *params, void *fdt);
 
     int mpic_version;
 } PPCE500Params;
 
-void ppce500_init(PPCE500Params *params);
+void ppce500_init(QEMUMachineInitArgs *args, PPCE500Params *params);
 
 #endif
diff --git a/hw/ppc/e500plat.c b/hw/ppc/e500plat.c
index c85299588c..bf65b69366 100644
--- a/hw/ppc/e500plat.c
+++ b/hw/ppc/e500plat.c
@@ -30,19 +30,7 @@ static void e500plat_fixup_devtree(PPCE500Params *params, void *fdt)
 
 static void e500plat_init(QEMUMachineInitArgs *args)
 {
-    ram_addr_t ram_size = args->ram_size;
-    const char *boot_device = args->boot_device;
-    const char *cpu_model = args->cpu_model;
-    const char *kernel_filename = args->kernel_filename;
-    const char *kernel_cmdline = args->kernel_cmdline;
-    const char *initrd_filename = args->initrd_filename;
     PPCE500Params params = {
-        .ram_size = ram_size,
-        .boot_device = boot_device,
-        .kernel_filename = kernel_filename,
-        .kernel_cmdline = kernel_cmdline,
-        .initrd_filename = initrd_filename,
-        .cpu_model = cpu_model,
         .pci_first_slot = 0x1,
         .pci_nr_slots = PCI_SLOT_MAX - 1,
         .fixup_devtree = e500plat_fixup_devtree,
@@ -55,7 +43,7 @@ static void e500plat_init(QEMUMachineInitArgs *args)
         params.mpic_version = OPENPIC_MODEL_FSL_MPIC_20;
     }
 
-    ppce500_init(&params);
+    ppce500_init(args, &params);
 }
 
 static QEMUMachine e500plat_machine = {
diff --git a/hw/ppc/mpc8544ds.c b/hw/ppc/mpc8544ds.c
index 444da0246d..1888e75545 100644
--- a/hw/ppc/mpc8544ds.c
+++ b/hw/ppc/mpc8544ds.c
@@ -28,26 +28,14 @@ static void mpc8544ds_fixup_devtree(PPCE500Params *params, void *fdt)
 
 static void mpc8544ds_init(QEMUMachineInitArgs *args)
 {
-    ram_addr_t ram_size = args->ram_size;
-    const char *boot_device = args->boot_device;
-    const char *cpu_model = args->cpu_model;
-    const char *kernel_filename = args->kernel_filename;
-    const char *kernel_cmdline = args->kernel_cmdline;
-    const char *initrd_filename = args->initrd_filename;
     PPCE500Params params = {
-        .ram_size = ram_size,
-        .boot_device = boot_device,
-        .kernel_filename = kernel_filename,
-        .kernel_cmdline = kernel_cmdline,
-        .initrd_filename = initrd_filename,
-        .cpu_model = cpu_model,
         .pci_first_slot = 0x11,
         .pci_nr_slots = 2,
         .fixup_devtree = mpc8544ds_fixup_devtree,
         .mpic_version = OPENPIC_MODEL_FSL_MPIC_20,
     };
 
-    ppce500_init(&params);
+    ppce500_init(args, &params);
 }
 
 
diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c
index 942ca37c54..36ef36f5fe 100644
--- a/hw/sparc/sun4m.c
+++ b/hw/sparc/sun4m.c
@@ -836,12 +836,10 @@ static void dummy_fdc_tc(void *opaque, int irq, int level)
 {
 }
 
-static void sun4m_hw_init(const struct sun4m_hwdef *hwdef, ram_addr_t RAM_size,
-                          const char *boot_device,
-                          const char *kernel_filename,
-                          const char *kernel_cmdline,
-                          const char *initrd_filename, const char *cpu_model)
+static void sun4m_hw_init(const struct sun4m_hwdef *hwdef,
+                          QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
     unsigned int i;
     void *iommu, *espdma, *ledma, *nvram;
     qemu_irq *cpu_irqs[MAX_CPUS], slavio_irq[32], slavio_cpu_irq[MAX_CPUS],
@@ -867,10 +865,10 @@ static void sun4m_hw_init(const struct sun4m_hwdef *hwdef, ram_addr_t RAM_size,
 
 
     /* set up devices */
-    ram_init(0, RAM_size, hwdef->max_mem);
+    ram_init(0, args->ram_size, hwdef->max_mem);
     /* models without ECC don't trap when missing ram is accessed */
     if (!hwdef->ecc_base) {
-        empty_slot_init(RAM_size, hwdef->max_mem - RAM_size);
+        empty_slot_init(args->ram_size, hwdef->max_mem - args->ram_size);
     }
 
     prom_init(hwdef->slavio_base, bios_name);
@@ -993,11 +991,12 @@ static void sun4m_hw_init(const struct sun4m_hwdef *hwdef, ram_addr_t RAM_size,
         empty_slot_init(hwdef->bpp_base, 0x20);
     }
 
-    kernel_size = sun4m_load_kernel(kernel_filename, initrd_filename,
-                                    RAM_size);
+    kernel_size = sun4m_load_kernel(args->kernel_filename,
+                                    args->initrd_filename,
+                                    args->ram_size);
 
-    nvram_init(nvram, (uint8_t *)&nd_table[0].macaddr, kernel_cmdline,
-               boot_device, RAM_size, kernel_size, graphic_width,
+    nvram_init(nvram, (uint8_t *)&nd_table[0].macaddr, args->kernel_cmdline,
+               args->boot_device, args->ram_size, kernel_size, graphic_width,
                graphic_height, graphic_depth, hwdef->nvram_machine_id,
                "Sun4m");
 
@@ -1015,19 +1014,20 @@ static void sun4m_hw_init(const struct sun4m_hwdef *hwdef, ram_addr_t RAM_size,
     fw_cfg_add_i16(fw_cfg, FW_CFG_SUN4M_HEIGHT, graphic_height);
     fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, KERNEL_LOAD_ADDR);
     fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
-    if (kernel_cmdline) {
+    if (args->kernel_cmdline) {
         fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_CMDLINE, CMDLINE_ADDR);
-        pstrcpy_targphys("cmdline", CMDLINE_ADDR, TARGET_PAGE_SIZE, kernel_cmdline);
-        fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline);
+        pstrcpy_targphys("cmdline", CMDLINE_ADDR, TARGET_PAGE_SIZE,
+                         args->kernel_cmdline);
+        fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, args->kernel_cmdline);
         fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE,
-                       strlen(kernel_cmdline) + 1);
+                       strlen(args->kernel_cmdline) + 1);
     } else {
         fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_CMDLINE, 0);
         fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, 0);
     }
     fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, INITRD_LOAD_ADDR);
     fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, 0); // not used
-    fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, boot_device[0]);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, args->boot_device[0]);
     qemu_register_boot_set(fw_cfg_boot_set, fw_cfg);
 }
 
@@ -1291,118 +1291,55 @@ static const struct sun4m_hwdef sun4m_hwdefs[] = {
 /* SPARCstation 5 hardware initialisation */
 static void ss5_init(QEMUMachineInitArgs *args)
 {
-    ram_addr_t RAM_size = args->ram_size;
-    const char *cpu_model = args->cpu_model;
-    const char *kernel_filename = args->kernel_filename;
-    const char *kernel_cmdline = args->kernel_cmdline;
-    const char *initrd_filename = args->initrd_filename;
-    const char *boot_device = args->boot_device;
-    sun4m_hw_init(&sun4m_hwdefs[0], RAM_size, boot_device, kernel_filename,
-                  kernel_cmdline, initrd_filename, cpu_model);
+    sun4m_hw_init(&sun4m_hwdefs[0], args);
 }
 
 /* SPARCstation 10 hardware initialisation */
 static void ss10_init(QEMUMachineInitArgs *args)
 {
-    ram_addr_t RAM_size = args->ram_size;
-    const char *cpu_model = args->cpu_model;
-    const char *kernel_filename = args->kernel_filename;
-    const char *kernel_cmdline = args->kernel_cmdline;
-    const char *initrd_filename = args->initrd_filename;
-    const char *boot_device = args->boot_device;
-    sun4m_hw_init(&sun4m_hwdefs[1], RAM_size, boot_device, kernel_filename,
-                  kernel_cmdline, initrd_filename, cpu_model);
+    sun4m_hw_init(&sun4m_hwdefs[1], args);
 }
 
 /* SPARCserver 600MP hardware initialisation */
 static void ss600mp_init(QEMUMachineInitArgs *args)
 {
-    ram_addr_t RAM_size = args->ram_size;
-    const char *cpu_model = args->cpu_model;
-    const char *kernel_filename = args->kernel_filename;
-    const char *kernel_cmdline = args->kernel_cmdline;
-    const char *initrd_filename = args->initrd_filename;
-    const char *boot_device = args->boot_device;
-    sun4m_hw_init(&sun4m_hwdefs[2], RAM_size, boot_device, kernel_filename,
-                  kernel_cmdline, initrd_filename, cpu_model);
+    sun4m_hw_init(&sun4m_hwdefs[2], args);
 }
 
 /* SPARCstation 20 hardware initialisation */
 static void ss20_init(QEMUMachineInitArgs *args)
 {
-    ram_addr_t RAM_size = args->ram_size;
-    const char *cpu_model = args->cpu_model;
-    const char *kernel_filename = args->kernel_filename;
-    const char *kernel_cmdline = args->kernel_cmdline;
-    const char *initrd_filename = args->initrd_filename;
-    const char *boot_device = args->boot_device;
-    sun4m_hw_init(&sun4m_hwdefs[3], RAM_size, boot_device, kernel_filename,
-                  kernel_cmdline, initrd_filename, cpu_model);
+    sun4m_hw_init(&sun4m_hwdefs[3], args);
 }
 
 /* SPARCstation Voyager hardware initialisation */
 static void vger_init(QEMUMachineInitArgs *args)
 {
-    ram_addr_t RAM_size = args->ram_size;
-    const char *cpu_model = args->cpu_model;
-    const char *kernel_filename = args->kernel_filename;
-    const char *kernel_cmdline = args->kernel_cmdline;
-    const char *initrd_filename = args->initrd_filename;
-    const char *boot_device = args->boot_device;
-    sun4m_hw_init(&sun4m_hwdefs[4], RAM_size, boot_device, kernel_filename,
-                  kernel_cmdline, initrd_filename, cpu_model);
+    sun4m_hw_init(&sun4m_hwdefs[4], args);
 }
 
 /* SPARCstation LX hardware initialisation */
 static void ss_lx_init(QEMUMachineInitArgs *args)
 {
-    ram_addr_t RAM_size = args->ram_size;
-    const char *cpu_model = args->cpu_model;
-    const char *kernel_filename = args->kernel_filename;
-    const char *kernel_cmdline = args->kernel_cmdline;
-    const char *initrd_filename = args->initrd_filename;
-    const char *boot_device = args->boot_device;
-    sun4m_hw_init(&sun4m_hwdefs[5], RAM_size, boot_device, kernel_filename,
-                  kernel_cmdline, initrd_filename, cpu_model);
+    sun4m_hw_init(&sun4m_hwdefs[5], args);
 }
 
 /* SPARCstation 4 hardware initialisation */
 static void ss4_init(QEMUMachineInitArgs *args)
 {
-    ram_addr_t RAM_size = args->ram_size;
-    const char *cpu_model = args->cpu_model;
-    const char *kernel_filename = args->kernel_filename;
-    const char *kernel_cmdline = args->kernel_cmdline;
-    const char *initrd_filename = args->initrd_filename;
-    const char *boot_device = args->boot_device;
-    sun4m_hw_init(&sun4m_hwdefs[6], RAM_size, boot_device, kernel_filename,
-                  kernel_cmdline, initrd_filename, cpu_model);
+    sun4m_hw_init(&sun4m_hwdefs[6], args);
 }
 
 /* SPARCClassic hardware initialisation */
 static void scls_init(QEMUMachineInitArgs *args)
 {
-    ram_addr_t RAM_size = args->ram_size;
-    const char *cpu_model = args->cpu_model;
-    const char *kernel_filename = args->kernel_filename;
-    const char *kernel_cmdline = args->kernel_cmdline;
-    const char *initrd_filename = args->initrd_filename;
-    const char *boot_device = args->boot_device;
-    sun4m_hw_init(&sun4m_hwdefs[7], RAM_size, boot_device, kernel_filename,
-                  kernel_cmdline, initrd_filename, cpu_model);
+    sun4m_hw_init(&sun4m_hwdefs[7], args);
 }
 
 /* SPARCbook hardware initialisation */
 static void sbook_init(QEMUMachineInitArgs *args)
 {
-    ram_addr_t RAM_size = args->ram_size;
-    const char *cpu_model = args->cpu_model;
-    const char *kernel_filename = args->kernel_filename;
-    const char *kernel_cmdline = args->kernel_cmdline;
-    const char *initrd_filename = args->initrd_filename;
-    const char *boot_device = args->boot_device;
-    sun4m_hw_init(&sun4m_hwdefs[8], RAM_size, boot_device, kernel_filename,
-                  kernel_cmdline, initrd_filename, cpu_model);
+    sun4m_hw_init(&sun4m_hwdefs[8], args);
 }
 
 static QEMUMachine ss5_machine = {
diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c
index 50a9f24a2e..9da083310c 100644
--- a/hw/sparc64/sun4u.c
+++ b/hw/sparc64/sun4u.c
@@ -811,10 +811,7 @@ static SPARCCPU *cpu_devinit(const char *cpu_model, const struct hwdef *hwdef)
 }
 
 static void sun4uv_init(MemoryRegion *address_space_mem,
-                        ram_addr_t RAM_size,
-                        const char *boot_devices,
-                        const char *kernel_filename, const char *kernel_cmdline,
-                        const char *initrd_filename, const char *cpu_model,
+                        QEMUMachineInitArgs *args,
                         const struct hwdef *hwdef)
 {
     SPARCCPU *cpu;
@@ -829,10 +826,10 @@ static void sun4uv_init(MemoryRegion *address_space_mem,
     FWCfgState *fw_cfg;
 
     /* init CPUs */
-    cpu = cpu_devinit(cpu_model, hwdef);
+    cpu = cpu_devinit(args->cpu_model, hwdef);
 
     /* set up devices */
-    ram_init(0, RAM_size);
+    ram_init(0, args->ram_size);
 
     prom_init(hwdef->prom_addr, bios_name);
 
@@ -878,13 +875,15 @@ static void sun4uv_init(MemoryRegion *address_space_mem,
 
     initrd_size = 0;
     initrd_addr = 0;
-    kernel_size = sun4u_load_kernel(kernel_filename, initrd_filename,
+    kernel_size = sun4u_load_kernel(args->kernel_filename,
+                                    args->initrd_filename,
                                     ram_size, &initrd_size, &initrd_addr,
                                     &kernel_addr, &kernel_entry);
 
-    sun4u_NVRAM_set_params(nvram, NVRAM_SIZE, "Sun4u", RAM_size, boot_devices,
+    sun4u_NVRAM_set_params(nvram, NVRAM_SIZE, "Sun4u", args->ram_size,
+                           args->boot_device,
                            kernel_addr, kernel_size,
-                           kernel_cmdline,
+                           args->kernel_cmdline,
                            initrd_addr, initrd_size,
                            /* XXX: need an option to load a NVRAM image */
                            0,
@@ -898,16 +897,16 @@ static void sun4uv_init(MemoryRegion *address_space_mem,
     fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, hwdef->machine_id);
     fw_cfg_add_i64(fw_cfg, FW_CFG_KERNEL_ADDR, kernel_entry);
     fw_cfg_add_i64(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
-    if (kernel_cmdline) {
+    if (args->kernel_cmdline) {
         fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE,
-                       strlen(kernel_cmdline) + 1);
-        fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline);
+                       strlen(args->kernel_cmdline) + 1);
+        fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, args->kernel_cmdline);
     } else {
         fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, 0);
     }
     fw_cfg_add_i64(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr);
     fw_cfg_add_i64(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size);
-    fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, boot_devices[0]);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, args->boot_device[0]);
 
     fw_cfg_add_i16(fw_cfg, FW_CFG_SPARC64_WIDTH, graphic_width);
     fw_cfg_add_i16(fw_cfg, FW_CFG_SPARC64_HEIGHT, graphic_height);
@@ -949,40 +948,19 @@ static const struct hwdef hwdefs[] = {
 /* Sun4u hardware initialisation */
 static void sun4u_init(QEMUMachineInitArgs *args)
 {
-    ram_addr_t RAM_size = args->ram_size;
-    const char *cpu_model = args->cpu_model;
-    const char *kernel_filename = args->kernel_filename;
-    const char *kernel_cmdline = args->kernel_cmdline;
-    const char *initrd_filename = args->initrd_filename;
-    const char *boot_devices = args->boot_device;
-    sun4uv_init(get_system_memory(), RAM_size, boot_devices, kernel_filename,
-                kernel_cmdline, initrd_filename, cpu_model, &hwdefs[0]);
+    sun4uv_init(get_system_memory(), args, &hwdefs[0]);
 }
 
 /* Sun4v hardware initialisation */
 static void sun4v_init(QEMUMachineInitArgs *args)
 {
-    ram_addr_t RAM_size = args->ram_size;
-    const char *cpu_model = args->cpu_model;
-    const char *kernel_filename = args->kernel_filename;
-    const char *kernel_cmdline = args->kernel_cmdline;
-    const char *initrd_filename = args->initrd_filename;
-    const char *boot_devices = args->boot_device;
-    sun4uv_init(get_system_memory(), RAM_size, boot_devices, kernel_filename,
-                kernel_cmdline, initrd_filename, cpu_model, &hwdefs[1]);
+    sun4uv_init(get_system_memory(), args, &hwdefs[1]);
 }
 
 /* Niagara hardware initialisation */
 static void niagara_init(QEMUMachineInitArgs *args)
 {
-    ram_addr_t RAM_size = args->ram_size;
-    const char *cpu_model = args->cpu_model;
-    const char *kernel_filename = args->kernel_filename;
-    const char *kernel_cmdline = args->kernel_cmdline;
-    const char *initrd_filename = args->initrd_filename;
-    const char *boot_devices = args->boot_device;
-    sun4uv_init(get_system_memory(), RAM_size, boot_devices, kernel_filename,
-                kernel_cmdline, initrd_filename, cpu_model, &hwdefs[2]);
+    sun4uv_init(get_system_memory(), args, &hwdefs[2]);
 }
 
 static QEMUMachine sun4u_machine = {
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index f03c45dff5..2f1e73bc75 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -377,8 +377,8 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
             /* loop over the indirect descriptor table */
             indirect = 1;
             max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
-            num_bufs = i = 0;
             desc_pa = vring_desc_addr(desc_pa, i);
+            num_bufs = i = 0;
         }
 
         do {
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 5920f73c90..ffb69a4c70 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -326,18 +326,7 @@ extern uintptr_t tci_tb_ptr;
    (6) jump to corresponding code of the next of fast path
  */
 # if defined(__i386__) || defined(__x86_64__)
-/* To avoid broken disassembling, long jmp is used for embedding fast path pc,
-   so that the destination is the next code of fast path, though this jmp is
-   never executed.
-
-   call MMU helper
-   jmp POST_PROC (2byte)    <- GETRA()
-   jmp NEXT_CODE (5byte)
-   POST_PROCESS ...         <- GETRA() + 7
- */
-#  define GETRA() ((uintptr_t)__builtin_return_address(0))
-#  define GETPC_LDST() ((uintptr_t)(GETRA() + 7 + \
-                                    *(int32_t *)((void *)GETRA() + 3) - 1))
+#  define GETPC_EXT()  GETPC()
 # elif defined (_ARCH_PPC) && !defined (_ARCH_PPC64)
 #  define GETRA() ((uintptr_t)__builtin_return_address(0))
 #  define GETPC_LDST() ((uintptr_t) ((*(int32_t *)(GETRA() - 4)) - 1))
@@ -358,7 +347,7 @@ static inline uintptr_t tcg_getpc_ldst(uintptr_t ra)
                                    not the start of the next opcode  */
     return ra;
 }
-#elif defined(__aarch64__)
+# elif defined(__aarch64__)
 #  define GETRA()       ((uintptr_t)__builtin_return_address(0))
 #  define GETPC_LDST()  tcg_getpc_ldst(GETRA())
 static inline uintptr_t tcg_getpc_ldst(uintptr_t ra)
@@ -376,7 +365,9 @@ static inline uintptr_t tcg_getpc_ldst(uintptr_t ra)
 #  error "CONFIG_QEMU_LDST_OPTIMIZATION needs GETPC_LDST() implementation!"
 # endif
 bool is_tcg_gen_code(uintptr_t pc_ptr);
-# define GETPC_EXT() (is_tcg_gen_code(GETRA()) ? GETPC_LDST() : GETPC())
+# ifndef GETPC_EXT
+#  define GETPC_EXT() (is_tcg_gen_code(GETRA()) ? GETPC_LDST() : GETPC())
+# endif
 #else
 # define GETPC_EXT() GETPC()
 #endif
diff --git a/include/exec/softmmu_defs.h b/include/exec/softmmu_defs.h
index 1f25e33ce4..e55e7178c6 100644
--- a/include/exec/softmmu_defs.h
+++ b/include/exec/softmmu_defs.h
@@ -9,29 +9,41 @@
 #ifndef SOFTMMU_DEFS_H
 #define SOFTMMU_DEFS_H
 
+uint8_t helper_ret_ldb_mmu(CPUArchState *env, target_ulong addr,
+                           int mmu_idx, uintptr_t retaddr);
+uint16_t helper_ret_ldw_mmu(CPUArchState *env, target_ulong addr,
+                            int mmu_idx, uintptr_t retaddr);
+uint32_t helper_ret_ldl_mmu(CPUArchState *env, target_ulong addr,
+                            int mmu_idx, uintptr_t retaddr);
+uint64_t helper_ret_ldq_mmu(CPUArchState *env, target_ulong addr,
+                            int mmu_idx, uintptr_t retaddr);
+
+void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
+                        int mmu_idx, uintptr_t retaddr);
+void helper_ret_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
+                        int mmu_idx, uintptr_t retaddr);
+void helper_ret_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
+                        int mmu_idx, uintptr_t retaddr);
+void helper_ret_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
+                        int mmu_idx, uintptr_t retaddr);
+
 uint8_t helper_ldb_mmu(CPUArchState *env, target_ulong addr, int mmu_idx);
-void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
-                    int mmu_idx);
 uint16_t helper_ldw_mmu(CPUArchState *env, target_ulong addr, int mmu_idx);
-void helper_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
-                    int mmu_idx);
 uint32_t helper_ldl_mmu(CPUArchState *env, target_ulong addr, int mmu_idx);
-void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
-                    int mmu_idx);
 uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr, int mmu_idx);
-void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
-                    int mmu_idx);
+
+void helper_stb_mmu(CPUArchState *env, target_ulong addr,
+                    uint8_t val, int mmu_idx);
+void helper_stw_mmu(CPUArchState *env, target_ulong addr,
+                    uint16_t val, int mmu_idx);
+void helper_stl_mmu(CPUArchState *env, target_ulong addr,
+                    uint32_t val, int mmu_idx);
+void helper_stq_mmu(CPUArchState *env, target_ulong addr,
+                    uint64_t val, int mmu_idx);
 
 uint8_t helper_ldb_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx);
-void helper_stb_cmmu(CPUArchState *env, target_ulong addr, uint8_t val,
-int mmu_idx);
 uint16_t helper_ldw_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx);
-void helper_stw_cmmu(CPUArchState *env, target_ulong addr, uint16_t val,
-                     int mmu_idx);
 uint32_t helper_ldl_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx);
-void helper_stl_cmmu(CPUArchState *env, target_ulong addr, uint32_t val,
-                     int mmu_idx);
 uint64_t helper_ldq_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx);
-void helper_stq_cmmu(CPUArchState *env, target_ulong addr, uint64_t val,
-                     int mmu_idx);
-#endif
+
+#endif /* SOFTMMU_DEFS_H */
diff --git a/include/exec/softmmu_template.h b/include/exec/softmmu_template.h
index 8584902cbe..eaca9e1035 100644
--- a/include/exec/softmmu_template.h
+++ b/include/exec/softmmu_template.h
@@ -54,10 +54,6 @@
 #define ADDR_READ addr_read
 #endif
 
-static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env,
-                                                        target_ulong addr,
-                                                        int mmu_idx,
-                                                        uintptr_t retaddr);
 static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
                                               hwaddr physaddr,
                                               target_ulong addr,
@@ -78,123 +74,86 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
 }
 
 /* handle all cases except unaligned access which span two pages */
+#ifdef SOFTMMU_CODE_ACCESS
+static
+#endif
 DATA_TYPE
-glue(glue(helper_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
-                                         int mmu_idx)
+glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env,
+                                             target_ulong addr, int mmu_idx,
+                                             uintptr_t retaddr)
 {
-    DATA_TYPE res;
-    int index;
-    target_ulong tlb_addr;
-    hwaddr ioaddr;
-    uintptr_t retaddr;
+    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
+    uintptr_t haddr;
 
-    /* test if there is match for unaligned or IO access */
-    /* XXX: could done more in memory macro in a non portable way */
-    index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
- redo:
-    tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
-    if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
-        if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            retaddr = GETPC_EXT();
-            ioaddr = env->iotlb[mmu_idx][index];
-            res = glue(io_read, SUFFIX)(env, ioaddr, addr, retaddr);
-        } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
-            /* slow unaligned access (it spans two pages or IO) */
-        do_unaligned_access:
-            retaddr = GETPC_EXT();
+    /* If the TLB entry is for a different page, reload and try again.  */
+    if ((addr & TARGET_PAGE_MASK)
+         != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
 #ifdef ALIGNED_ONLY
+        if ((addr & (DATA_SIZE - 1)) != 0) {
             do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
-#endif
-            res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(env, addr,
-                                                         mmu_idx, retaddr);
-        } else {
-            /* unaligned/aligned access in the same page */
-            uintptr_t addend;
-#ifdef ALIGNED_ONLY
-            if ((addr & (DATA_SIZE - 1)) != 0) {
-                retaddr = GETPC_EXT();
-                do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
-            }
-#endif
-            addend = env->tlb_table[mmu_idx][index].addend;
-            res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)(intptr_t)
-                                                (addr + addend));
         }
-    } else {
-        /* the page is not in the TLB : fill it */
-        retaddr = GETPC_EXT();
-#ifdef ALIGNED_ONLY
-        if ((addr & (DATA_SIZE - 1)) != 0)
-            do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
 #endif
         tlb_fill(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
-        goto redo;
+        tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
     }
-    return res;
-}
 
-/* handle all unaligned cases */
-static DATA_TYPE
-glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env,
-                                       target_ulong addr,
-                                       int mmu_idx,
-                                       uintptr_t retaddr)
-{
-    DATA_TYPE res, res1, res2;
-    int index, shift;
-    hwaddr ioaddr;
-    target_ulong tlb_addr, addr1, addr2;
+    /* Handle an IO access.  */
+    if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
+        hwaddr ioaddr;
+        if ((addr & (DATA_SIZE - 1)) != 0) {
+            goto do_unaligned_access;
+        }
+        ioaddr = env->iotlb[mmu_idx][index];
+        return glue(io_read, SUFFIX)(env, ioaddr, addr, retaddr);
+    }
 
-    index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
- redo:
-    tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
-    if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
-        if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            ioaddr = env->iotlb[mmu_idx][index];
-            res = glue(io_read, SUFFIX)(env, ioaddr, addr, retaddr);
-        } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
-        do_unaligned_access:
-            /* slow unaligned access (it spans two pages) */
-            addr1 = addr & ~(DATA_SIZE - 1);
-            addr2 = addr1 + DATA_SIZE;
-            res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(env, addr1,
-                                                          mmu_idx, retaddr);
-            res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(env, addr2,
-                                                          mmu_idx, retaddr);
-            shift = (addr & (DATA_SIZE - 1)) * 8;
+    /* Handle slow unaligned access (it spans two pages or IO).  */
+    if (DATA_SIZE > 1
+        && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
+                    >= TARGET_PAGE_SIZE)) {
+        target_ulong addr1, addr2;
+        DATA_TYPE res1, res2, res;
+        unsigned shift;
+    do_unaligned_access:
+#ifdef ALIGNED_ONLY
+        do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+#endif
+        addr1 = addr & ~(DATA_SIZE - 1);
+        addr2 = addr1 + DATA_SIZE;
+        res1 = glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)(env, addr1,
+                                                            mmu_idx, retaddr);
+        res2 = glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)(env, addr2,
+                                                            mmu_idx, retaddr);
+        shift = (addr & (DATA_SIZE - 1)) * 8;
 #ifdef TARGET_WORDS_BIGENDIAN
-            res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift));
+        res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift));
 #else
-            res = (res1 >> shift) | (res2 << ((DATA_SIZE * 8) - shift));
+        res = (res1 >> shift) | (res2 << ((DATA_SIZE * 8) - shift));
 #endif
-            res = (DATA_TYPE)res;
-        } else {
-            /* unaligned/aligned access in the same page */
-            uintptr_t addend = env->tlb_table[mmu_idx][index].addend;
-            res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)(intptr_t)
-                                                (addr + addend));
-        }
-    } else {
-        /* the page is not in the TLB : fill it */
-        tlb_fill(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
-        goto redo;
+        return res;
+    }
+
+    /* Handle aligned access or unaligned access in the same page.  */
+#ifdef ALIGNED_ONLY
+    if ((addr & (DATA_SIZE - 1)) != 0) {
+        do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
     }
-    return res;
+#endif
+
+    haddr = addr + env->tlb_table[mmu_idx][index].addend;
+    return glue(glue(ld, USUFFIX), _raw)((uint8_t *)haddr);
 }
 
-#ifndef SOFTMMU_CODE_ACCESS
+DATA_TYPE
+glue(glue(helper_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
+                                         int mmu_idx)
+{
+    return glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)(env, addr, mmu_idx,
+                                                        GETPC_EXT());
+}
 
-static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
-                                                   target_ulong addr,
-                                                   DATA_TYPE val,
-                                                   int mmu_idx,
-                                                   uintptr_t retaddr);
+#ifndef SOFTMMU_CODE_ACCESS
 
 static inline void glue(io_write, SUFFIX)(CPUArchState *env,
                                           hwaddr physaddr,
@@ -214,107 +173,79 @@ static inline void glue(io_write, SUFFIX)(CPUArchState *env,
     io_mem_write(mr, physaddr, val, 1 << SHIFT);
 }
 
-void glue(glue(helper_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
-                                              target_ulong addr, DATA_TYPE val,
-                                              int mmu_idx)
+void
+glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
+                                             target_ulong addr, DATA_TYPE val,
+                                             int mmu_idx, uintptr_t retaddr)
 {
-    hwaddr ioaddr;
-    target_ulong tlb_addr;
-    uintptr_t retaddr;
-    int index;
+    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+    uintptr_t haddr;
 
-    index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
- redo:
-    tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
-    if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
-        if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            retaddr = GETPC_EXT();
-            ioaddr = env->iotlb[mmu_idx][index];
-            glue(io_write, SUFFIX)(env, ioaddr, val, addr, retaddr);
-        } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
-        do_unaligned_access:
-            retaddr = GETPC_EXT();
+    /* If the TLB entry is for a different page, reload and try again.  */
+    if ((addr & TARGET_PAGE_MASK)
+        != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
 #ifdef ALIGNED_ONLY
+        if ((addr & (DATA_SIZE - 1)) != 0) {
             do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
-#endif
-            glue(glue(slow_st, SUFFIX), MMUSUFFIX)(env, addr, val,
-                                                   mmu_idx, retaddr);
-        } else {
-            /* aligned/unaligned access in the same page */
-            uintptr_t addend;
-#ifdef ALIGNED_ONLY
-            if ((addr & (DATA_SIZE - 1)) != 0) {
-                retaddr = GETPC_EXT();
-                do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
-            }
-#endif
-            addend = env->tlb_table[mmu_idx][index].addend;
-            glue(glue(st, SUFFIX), _raw)((uint8_t *)(intptr_t)
-                                         (addr + addend), val);
         }
-    } else {
-        /* the page is not in the TLB : fill it */
-        retaddr = GETPC_EXT();
-#ifdef ALIGNED_ONLY
-        if ((addr & (DATA_SIZE - 1)) != 0)
-            do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
 #endif
         tlb_fill(env, addr, 1, mmu_idx, retaddr);
-        goto redo;
+        tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
     }
-}
 
-/* handles all unaligned cases */
-static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
-                                                   target_ulong addr,
-                                                   DATA_TYPE val,
-                                                   int mmu_idx,
-                                                   uintptr_t retaddr)
-{
-    hwaddr ioaddr;
-    target_ulong tlb_addr;
-    int index, i;
+    /* Handle an IO access.  */
+    if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
+        hwaddr ioaddr;
+        if ((addr & (DATA_SIZE - 1)) != 0) {
+            goto do_unaligned_access;
+        }
+        ioaddr = env->iotlb[mmu_idx][index];
+        glue(io_write, SUFFIX)(env, ioaddr, val, addr, retaddr);
+        return;
+    }
 
-    index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
- redo:
-    tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
-    if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
-        if (tlb_addr & ~TARGET_PAGE_MASK) {
-            /* IO access */
-            if ((addr & (DATA_SIZE - 1)) != 0)
-                goto do_unaligned_access;
-            ioaddr = env->iotlb[mmu_idx][index];
-            glue(io_write, SUFFIX)(env, ioaddr, val, addr, retaddr);
-        } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
-        do_unaligned_access:
-            /* XXX: not efficient, but simple */
-            /* Note: relies on the fact that tlb_fill() does not remove the
-             * previous page from the TLB cache.  */
-            for(i = DATA_SIZE - 1; i >= 0; i--) {
+    /* Handle slow unaligned access (it spans two pages or IO).  */
+    if (DATA_SIZE > 1
+        && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
+                     >= TARGET_PAGE_SIZE)) {
+        int i;
+    do_unaligned_access:
+#ifdef ALIGNED_ONLY
+        do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
+#endif
+        /* XXX: not efficient, but simple */
+        /* Note: relies on the fact that tlb_fill() does not remove the
+         * previous page from the TLB cache.  */
+        for (i = DATA_SIZE - 1; i >= 0; i--) {
 #ifdef TARGET_WORDS_BIGENDIAN
-                glue(slow_stb, MMUSUFFIX)(env, addr + i,
-                                          val >> (((DATA_SIZE - 1) * 8) - (i * 8)),
-                                          mmu_idx, retaddr);
+            uint8_t val8 = val >> (((DATA_SIZE - 1) * 8) - (i * 8));
 #else
-                glue(slow_stb, MMUSUFFIX)(env, addr + i,
-                                          val >> (i * 8),
-                                          mmu_idx, retaddr);
+            uint8_t val8 = val >> (i * 8);
 #endif
-            }
-        } else {
-            /* aligned/unaligned access in the same page */
-            uintptr_t addend = env->tlb_table[mmu_idx][index].addend;
-            glue(glue(st, SUFFIX), _raw)((uint8_t *)(intptr_t)
-                                         (addr + addend), val);
+            glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
+                                            mmu_idx, retaddr);
         }
-    } else {
-        /* the page is not in the TLB : fill it */
-        tlb_fill(env, addr, 1, mmu_idx, retaddr);
-        goto redo;
+        return;
+    }
+
+    /* Handle aligned access or unaligned access in the same page.  */
+#ifdef ALIGNED_ONLY
+    if ((addr & (DATA_SIZE - 1)) != 0) {
+        do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
     }
+#endif
+
+    haddr = addr + env->tlb_table[mmu_idx][index].addend;
+    glue(glue(st, SUFFIX), _raw)((uint8_t *)haddr, val);
+}
+
+void
+glue(glue(helper_st, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
+                                         DATA_TYPE val, int mmu_idx)
+{
+    glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, val, mmu_idx,
+                                                 GETPC_EXT());
 }
 
 #endif /* !defined(SOFTMMU_CODE_ACCESS) */
diff --git a/include/hw/loader.h b/include/hw/loader.h
index eb9c9a3612..61457360f6 100644
--- a/include/hw/loader.h
+++ b/include/hw/loader.h
@@ -36,6 +36,7 @@ void pstrcpy_targphys(const char *name,
                       hwaddr dest, int buf_size,
                       const char *source);
 
+extern bool rom_file_in_ram;
 
 int rom_add_file(const char *file, const char *fw_dir,
                  hwaddr addr, int32_t bootindex);
diff --git a/target-mips/helper.c b/target-mips/helper.c
index 6feef7bcd6..33e0e88637 100644
--- a/target-mips/helper.c
+++ b/target-mips/helper.c
@@ -193,7 +193,7 @@ static int get_physical_address (CPUMIPSState *env, hwaddr *physical,
         }
     }
 #if 0
-    qemu_log(TARGET_FMT_lx " %d %d => " TARGET_FMT_lx " %d (%d)\n",
+    qemu_log(TARGET_FMT_lx " %d %d => %" HWADDR_PRIx " %d (%d)\n",
             address, rw, access_type, *physical, *prot, ret);
 #endif
 
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 87eeab3d30..12a7ca3440 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -190,11 +190,11 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
         /* qemu_ld/st address constraint */
     case 'L':
         ct->ct |= TCG_CT_REG;
-#if TCG_TARGET_REG_BITS == 64
+        if (TCG_TARGET_REG_BITS == 64) {
             tcg_regset_set32(ct->u.regs, 0, 0xffff);
-#else
+        } else {
             tcg_regset_set32(ct->u.regs, 0, 0xff);
-#endif
+        }
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
         break;
@@ -541,20 +541,34 @@ static inline void tcg_out_mov(TCGContext *s, TCGType type,
 static void tcg_out_movi(TCGContext *s, TCGType type,
                          TCGReg ret, tcg_target_long arg)
 {
+    tcg_target_long diff;
+
     if (arg == 0) {
         tgen_arithr(s, ARITH_XOR, ret, ret);
         return;
-    } else if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
+    }
+    if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
         tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
         tcg_out32(s, arg);
-    } else if (arg == (int32_t)arg) {
+        return;
+    }
+    if (arg == (int32_t)arg) {
         tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
         tcg_out32(s, arg);
-    } else {
-        tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
-        tcg_out32(s, arg);
-        tcg_out32(s, arg >> 31 >> 1);
+        return;
+    }
+
+    /* Try a 7 byte pc-relative lea before the 10 byte movq.  */
+    diff = arg - ((tcg_target_long)s->code_ptr + 7);
+    if (diff == (int32_t)diff) {
+        tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
+        tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
+        tcg_out32(s, diff);
+        return;
     }
+
+    tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
+    tcg_out64(s, arg);
 }
 
 static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
@@ -594,6 +608,14 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
     tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
 }
 
+static inline void tcg_out_sti(TCGContext *s, TCGType type, TCGReg base,
+                               tcg_target_long ofs, tcg_target_long val)
+{
+    int opc = OPC_MOVL_EvIz + (type == TCG_TYPE_I64 ? P_REXW : 0);
+    tcg_out_modrm_offset(s, opc, 0, base, ofs);
+    tcg_out32(s, val);
+}
+
 static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
 {
     /* Propagate an opcode prefix, such as P_DATA16.  */
@@ -1003,22 +1025,24 @@ static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
 
 #include "exec/softmmu_defs.h"
 
-/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
-   int mmu_idx) */
-static const void *qemu_ld_helpers[4] = {
-    helper_ldb_mmu,
-    helper_ldw_mmu,
-    helper_ldl_mmu,
-    helper_ldq_mmu,
+/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
+ *                                     int mmu_idx, uintptr_t ra)
+ */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ret_ldb_mmu,
+    helper_ret_ldw_mmu,
+    helper_ret_ldl_mmu,
+    helper_ret_ldq_mmu,
 };
 
-/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
-   uintxx_t val, int mmu_idx) */
-static const void *qemu_st_helpers[4] = {
-    helper_stb_mmu,
-    helper_stw_mmu,
-    helper_stl_mmu,
-    helper_stq_mmu,
+/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
+ *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
+ */
+static const void * const qemu_st_helpers[4] = {
+    helper_ret_stb_mmu,
+    helper_ret_stw_mmu,
+    helper_ret_stl_mmu,
+    helper_ret_stq_mmu,
 };
 
 static void add_qemu_ldst_label(TCGContext *s,
@@ -1446,25 +1470,21 @@ static void add_qemu_ldst_label(TCGContext *s,
     }
 }
 
+/* See the GETPC definition in include/exec/exec-all.h.  */
+static inline uintptr_t do_getpc(uint8_t *raddr)
+{
+    return (uintptr_t)raddr - 1;
+}
+
 /*
  * Generate code for the slow path for a load at the end of block
  */
-static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *label)
+static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 {
-    int s_bits;
-    int opc = label->opc;
-    int mem_index = label->mem_index;
-#if TCG_TARGET_REG_BITS == 32
-    int stack_adjust;
-    int addrlo_reg = label->addrlo_reg;
-    int addrhi_reg = label->addrhi_reg;
-#endif
-    int data_reg = label->datalo_reg;
-    int data_reg2 = label->datahi_reg;
-    uint8_t *raddr = label->raddr;
-    uint8_t **label_ptr = &label->label_ptr[0];
-
-    s_bits = opc & 3;
+    int opc = l->opc;
+    int s_bits = opc & 3;
+    TCGReg data_reg;
+    uint8_t **label_ptr = &l->label_ptr[0];
 
     /* resolve label address */
     *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
@@ -1472,52 +1492,36 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *label)
         *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
     }
 
-#if TCG_TARGET_REG_BITS == 32
-    tcg_out_pushi(s, mem_index);
-    stack_adjust = 4;
-    if (TARGET_LONG_BITS == 64) {
-        tcg_out_push(s, addrhi_reg);
-        stack_adjust += 4;
-    }
-    tcg_out_push(s, addrlo_reg);
-    stack_adjust += 4;
-    tcg_out_push(s, TCG_AREG0);
-    stack_adjust += 4;
-#else
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
-    /* The second argument is already loaded with addrlo.  */
-    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], mem_index);
-#endif
+    if (TCG_TARGET_REG_BITS == 32) {
+        int ofs = 0;
 
-    /* Code generation of qemu_ld/st's slow path calling MMU helper
+        tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
+        ofs += 4;
 
-       PRE_PROC ...
-       call MMU helper
-       jmp POST_PROC (2b) : short forward jump <- GETRA()
-       jmp next_code (5b) : dummy long backward jump which is never executed
-       POST_PROC ... : do post-processing <- GETRA() + 7
-       jmp next_code : jump to the code corresponding to next IR of qemu_ld/st
-    */
+        tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
+        ofs += 4;
 
-    tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
+        if (TARGET_LONG_BITS == 64) {
+            tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
+            ofs += 4;
+        }
 
-    /* Jump to post-processing code */
-    tcg_out8(s, OPC_JMP_short);
-    tcg_out8(s, 5);
-    /* Dummy backward jump having information of fast path'pc for MMU helpers */
-    tcg_out8(s, OPC_JMP_long);
-    *(int32_t *)s->code_ptr = (int32_t)(raddr - s->code_ptr - 4);
-    s->code_ptr += 4;
+        tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index);
+        ofs += 4;
 
-#if TCG_TARGET_REG_BITS == 32
-    if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
-        /* Pop and discard.  This is 2 bytes smaller than the add.  */
-        tcg_out_pop(s, TCG_REG_ECX);
-    } else if (stack_adjust != 0) {
-        tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
+        tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, do_getpc(l->raddr));
+    } else {
+        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+        /* The second argument is already loaded with addrlo.  */
+        tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                     l->mem_index);
+        tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
+                     do_getpc(l->raddr));
     }
-#endif
 
+    tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
+
+    data_reg = l->datalo_reg;
     switch(opc) {
     case 0 | 4:
         tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
@@ -1545,10 +1549,10 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *label)
         } else if (data_reg == TCG_REG_EDX) {
             /* xchg %edx, %eax */
             tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
-            tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
+            tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
         } else {
             tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
-            tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX);
+            tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
         }
         break;
     default:
@@ -1556,28 +1560,17 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *label)
     }
 
     /* Jump to the code corresponding to next IR of qemu_st */
-    tcg_out_jmp(s, (tcg_target_long)raddr);
+    tcg_out_jmp(s, (tcg_target_long)l->raddr);
 }
 
 /*
  * Generate code for the slow path for a store at the end of block
  */
-static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *label)
+static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 {
-    int s_bits;
-    int stack_adjust;
-    int opc = label->opc;
-    int mem_index = label->mem_index;
-    int data_reg = label->datalo_reg;
-#if TCG_TARGET_REG_BITS == 32
-    int data_reg2 = label->datahi_reg;
-    int addrlo_reg = label->addrlo_reg;
-    int addrhi_reg = label->addrhi_reg;
-#endif
-    uint8_t *raddr = label->raddr;
-    uint8_t **label_ptr = &label->label_ptr[0];
-
-    s_bits = opc & 3;
+    int opc = l->opc;
+    int s_bits = opc & 3;
+    uint8_t **label_ptr = &l->label_ptr[0];
 
     /* resolve label address */
     *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
@@ -1585,61 +1578,56 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *label)
         *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
     }
 
-#if TCG_TARGET_REG_BITS == 32
-    tcg_out_pushi(s, mem_index);
-    stack_adjust = 4;
-    if (opc == 3) {
-        tcg_out_push(s, data_reg2);
-        stack_adjust += 4;
-    }
-    tcg_out_push(s, data_reg);
-    stack_adjust += 4;
-    if (TARGET_LONG_BITS == 64) {
-        tcg_out_push(s, addrhi_reg);
-        stack_adjust += 4;
-    }
-    tcg_out_push(s, addrlo_reg);
-    stack_adjust += 4;
-    tcg_out_push(s, TCG_AREG0);
-    stack_adjust += 4;
-#else
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
-    /* The second argument is already loaded with addrlo.  */
-    tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
-                tcg_target_call_iarg_regs[2], data_reg);
-    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], mem_index);
-    stack_adjust = 0;
-#endif
+    if (TCG_TARGET_REG_BITS == 32) {
+        int ofs = 0;
 
-    /* Code generation of qemu_ld/st's slow path calling MMU helper
+        tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
+        ofs += 4;
 
-       PRE_PROC ...
-       call MMU helper
-       jmp POST_PROC (2b) : short forward jump <- GETRA()
-       jmp next_code (5b) : dummy long backward jump which is never executed
-       POST_PROC ... : do post-processing <- GETRA() + 7
-       jmp next_code : jump to the code corresponding to next IR of qemu_ld/st
-    */
+        tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
+        ofs += 4;
 
-    tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
+        if (TARGET_LONG_BITS == 64) {
+            tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
+            ofs += 4;
+        }
 
-    /* Jump to post-processing code */
-    tcg_out8(s, OPC_JMP_short);
-    tcg_out8(s, 5);
-    /* Dummy backward jump having information of fast path'pc for MMU helpers */
-    tcg_out8(s, OPC_JMP_long);
-    *(int32_t *)s->code_ptr = (int32_t)(raddr - s->code_ptr - 4);
-    s->code_ptr += 4;
+        tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
+        ofs += 4;
+
+        if (opc == 3) {
+            tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
+            ofs += 4;
+        }
 
-    if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
-        /* Pop and discard.  This is 2 bytes smaller than the add.  */
-        tcg_out_pop(s, TCG_REG_ECX);
-    } else if (stack_adjust != 0) {
-        tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
+        tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index);
+        ofs += 4;
+
+        tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, do_getpc(l->raddr));
+    } else {
+        uintptr_t pc;
+
+        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+        /* The second argument is already loaded with addrlo.  */
+        tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
+                    tcg_target_call_iarg_regs[2], l->datalo_reg);
+        tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                     l->mem_index);
+
+        pc = do_getpc(l->raddr);
+        if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
+            tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[4], pc);
+        } else if (pc == (int32_t)pc) {
+            tcg_out_sti(s, TCG_TYPE_PTR, TCG_REG_ESP, 0, pc);
+        } else {
+            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RAX, pc);
+            tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_RAX, TCG_REG_ESP, 0);
+        }
     }
 
-    /* Jump to the code corresponding to next IR of qemu_st */
-    tcg_out_jmp(s, (tcg_target_long)raddr);
+    tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
+
+    tcg_out_jmp(s, (tcg_target_long)l->raddr);
 }
 
 /*
diff --git a/tcg/tcg.c b/tcg/tcg.c
index dac8224024..19bd5a39bf 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -121,14 +121,23 @@ static inline void tcg_out8(TCGContext *s, uint8_t v)
 
 static inline void tcg_out16(TCGContext *s, uint16_t v)
 {
-    *(uint16_t *)s->code_ptr = v;
-    s->code_ptr += 2;
+    uint8_t *p = s->code_ptr;
+    *(uint16_t *)p = v;
+    s->code_ptr = p + 2;
 }
 
 static inline void tcg_out32(TCGContext *s, uint32_t v)
 {
-    *(uint32_t *)s->code_ptr = v;
-    s->code_ptr += 4;
+    uint8_t *p = s->code_ptr;
+    *(uint32_t *)p = v;
+    s->code_ptr = p + 4;
+}
+
+static inline void tcg_out64(TCGContext *s, uint64_t v)
+{
+    uint8_t *p = s->code_ptr;
+    *(uint64_t *)p = v;
+    s->code_ptr = p + 8;
 }
 
 /* label relocation processing */