summary refs log tree commit diff stats
path: root/tcg
diff options
context:
space:
mode:
Diffstat (limited to 'tcg')
-rw-r--r--tcg/arm/tcg-target.c75
-rw-r--r--tcg/arm/tcg-target.h3
-rw-r--r--tcg/hppa/tcg-target.c60
-rw-r--r--tcg/hppa/tcg-target.h4
-rw-r--r--tcg/i386/tcg-target.c179
-rw-r--r--tcg/i386/tcg-target.h3
-rw-r--r--tcg/ia64/tcg-target.c56
-rw-r--r--tcg/ia64/tcg-target.h3
-rw-r--r--tcg/mips/tcg-target.c58
-rw-r--r--tcg/mips/tcg-target.h3
-rw-r--r--tcg/ppc/tcg-target.c49
-rw-r--r--tcg/ppc/tcg-target.h2
-rw-r--r--tcg/ppc64/tcg-target.c48
-rw-r--r--tcg/s390/tcg-target.c52
-rw-r--r--tcg/s390/tcg-target.h3
-rw-r--r--tcg/sparc/tcg-target.c71
-rw-r--r--tcg/sparc/tcg-target.h3
-rw-r--r--tcg/tcg.c43
-rw-r--r--tcg/tcg.h15
-rw-r--r--tcg/tci/tcg-target.c8
-rw-r--r--tcg/tci/tcg-target.h5
21 files changed, 588 insertions, 155 deletions
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 5b233f564c..4d59a63855 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -929,6 +929,27 @@ static inline void tcg_out_goto_label(TCGContext *s, int cond, int label_index)
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -936,6 +957,8 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -943,6 +966,7 @@ static void *qemu_st_helpers[4] = {
     __stq_mmu,
 };
 #endif
+#endif
 
 #define TLB_SHIFT	(CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
 
@@ -990,10 +1014,10 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
     tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_AREG0,
                     TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
     /* In the
-     *  ldr r1 [r0, #(offsetof(CPUState, tlb_table[mem_index][0].addr_read))]
+     *  ldr r1 [r0, #(offsetof(CPUArchState, tlb_table[mem_index][0].addr_read))]
      * below, the offset is likely to exceed 12 bits if mem_index != 0 and
      * not exceed otherwise, so use an
-     *  add r0, r0, #(mem_index * sizeof *CPUState.tlb_table)
+     *  add r0, r0, #(mem_index * sizeof *CPUArchState.tlb_table)
      * before.
      */
     if (mem_index)
@@ -1001,7 +1025,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                         (mem_index << (TLB_SHIFT & 1)) |
                         ((16 - (TLB_SHIFT >> 1)) << 8));
     tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0,
-                    offsetof(CPUState, tlb_table[0][0].addr_read));
+                    offsetof(CPUArchState, tlb_table[0][0].addr_read));
     tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1,
                     TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
     /* Check alignment.  */
@@ -1012,12 +1036,12 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
     /* XXX: possibly we could use a block data load or writeback in
      * the first access.  */
     tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
-                    offsetof(CPUState, tlb_table[0][0].addr_read) + 4);
+                    offsetof(CPUArchState, tlb_table[0][0].addr_read) + 4);
     tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
                     TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0));
 #  endif
     tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
-                    offsetof(CPUState, tlb_table[0][0].addend));
+                    offsetof(CPUArchState, tlb_table[0][0].addend));
 
     switch (opc) {
     case 0:
@@ -1075,6 +1099,19 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                     TCG_REG_R1, 0, addr_reg2, SHIFT_IMM_LSL(0));
     tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R2, 0, mem_index);
 # endif
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal and incorrect for 64 bit */
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[2], 0,
+                    tcg_target_call_iarg_regs[1], SHIFT_IMM_LSL(0));
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[1], 0,
+                    tcg_target_call_iarg_regs[0], SHIFT_IMM_LSL(0));
+
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[0], 0, TCG_AREG0,
+                    SHIFT_IMM_LSL(0));
+#endif
     tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[s_bits]);
 
     switch (opc) {
@@ -1210,10 +1247,10 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
     tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0,
                     TCG_AREG0, TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
     /* In the
-     *  ldr r1 [r0, #(offsetof(CPUState, tlb_table[mem_index][0].addr_write))]
+     *  ldr r1 [r0, #(offsetof(CPUArchState, tlb_table[mem_index][0].addr_write))]
      * below, the offset is likely to exceed 12 bits if mem_index != 0 and
      * not exceed otherwise, so use an
-     *  add r0, r0, #(mem_index * sizeof *CPUState.tlb_table)
+     *  add r0, r0, #(mem_index * sizeof *CPUArchState.tlb_table)
      * before.
      */
     if (mem_index)
@@ -1221,7 +1258,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
                         (mem_index << (TLB_SHIFT & 1)) |
                         ((16 - (TLB_SHIFT >> 1)) << 8));
     tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0,
-                    offsetof(CPUState, tlb_table[0][0].addr_write));
+                    offsetof(CPUArchState, tlb_table[0][0].addr_write));
     tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1,
                     TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
     /* Check alignment.  */
@@ -1232,12 +1269,12 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
     /* XXX: possibly we could use a block data load or writeback in
      * the first access.  */
     tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
-                    offsetof(CPUState, tlb_table[0][0].addr_write) + 4);
+                    offsetof(CPUArchState, tlb_table[0][0].addr_write) + 4);
     tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
                     TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0));
 #  endif
     tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
-                    offsetof(CPUState, tlb_table[0][0].addend));
+                    offsetof(CPUArchState, tlb_table[0][0].addend));
 
     switch (opc) {
     case 0:
@@ -1341,6 +1378,22 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
     }
 # endif
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal and incorrect for 64 bit */
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[3], 0,
+                    tcg_target_call_iarg_regs[2], SHIFT_IMM_LSL(0));
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[2], 0,
+                    tcg_target_call_iarg_regs[1], SHIFT_IMM_LSL(0));
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[1], 0,
+                    tcg_target_call_iarg_regs[0], SHIFT_IMM_LSL(0));
+
+    tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
+                    tcg_target_call_iarg_regs[0], 0, TCG_AREG0,
+                    SHIFT_IMM_LSL(0));
+#endif
     tcg_out_call(s, (tcg_target_long) qemu_st_helpers[s_bits]);
     if (opc == 3)
         tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R13, TCG_REG_R13, 0x10);
@@ -1797,7 +1850,7 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
 
     tcg_add_target_add_op_defs(arm_op_defs);
-    tcg_set_frame(s, TCG_AREG0, offsetof(CPUState, temp_buf),
+    tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf),
                   CPU_TEMP_BUF_NLONGS * sizeof(long));
 }
 
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 0035b471fe..f90b834795 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -81,7 +81,8 @@ enum {
     TCG_AREG0 = TCG_REG_R6,
 };
 
-static inline void flush_icache_range(unsigned long start, unsigned long stop)
+static inline void flush_icache_range(tcg_target_ulong start,
+                                      tcg_target_ulong stop)
 {
 #if QEMU_GNUC_PREREQ(4, 1)
     __builtin___clear_cache((char *) start, (char *) stop);
diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
index 59d4d12ba6..e579ef06a1 100644
--- a/tcg/hppa/tcg-target.c
+++ b/tcg/hppa/tcg-target.c
@@ -882,6 +882,27 @@ static void tcg_out_setcond2(TCGContext *s, int cond, TCGArg ret,
 #if defined(CONFIG_SOFTMMU)
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -889,12 +910,15 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
     __stl_mmu,
     __stq_mmu,
 };
+#endif
 
 /* Load and compare a TLB entry, and branch if TLB miss.  OFFSET is set to
    the offset of the first ADDR_READ or ADDR_WRITE member of the appropriate
@@ -1040,19 +1064,19 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
     lab1 = gen_new_label();
     lab2 = gen_new_label();
 
-    offset = offsetof(CPUState, tlb_table[mem_index][0].addr_read);
+    offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read);
     offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg,
                               opc & 3, lab1, offset);
 
     /* TLB Hit.  */
     tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25),
-               offsetof(CPUState, tlb_table[mem_index][0].addend) - offset);
+               offsetof(CPUArchState, tlb_table[mem_index][0].addend) - offset);
     tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg, TCG_REG_R20, opc);
     tcg_out_branch(s, lab2, 1);
 
     /* TLB Miss.  */
     /* label1: */
-    tcg_out_label(s, lab1, (tcg_target_long)s->code_ptr);
+    tcg_out_label(s, lab1, s->code_ptr);
 
     argreg = TCG_REG_R26;
     tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg);
@@ -1061,6 +1085,15 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
     }
     tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call(s, qemu_ld_helpers[opc & 3]);
 
     switch (opc) {
@@ -1089,7 +1122,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
     }
 
     /* label2: */
-    tcg_out_label(s, lab2, (tcg_target_long)s->code_ptr);
+    tcg_out_label(s, lab2, s->code_ptr);
 #else
     tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg,
                            (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_R0), opc);
@@ -1155,13 +1188,13 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
     lab1 = gen_new_label();
     lab2 = gen_new_label();
 
-    offset = offsetof(CPUState, tlb_table[mem_index][0].addr_write);
+    offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
     offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg,
                               opc, lab1, offset);
 
     /* TLB Hit.  */
     tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25),
-               offsetof(CPUState, tlb_table[mem_index][0].addend) - offset);
+               offsetof(CPUArchState, tlb_table[mem_index][0].addend) - offset);
 
     /* There are no indexed stores, so we must do this addition explitly.
        Careful to avoid R20, which is used for the bswaps to follow.  */
@@ -1171,7 +1204,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
 
     /* TLB Miss.  */
     /* label1: */
-    tcg_out_label(s, lab1, (tcg_target_long)s->code_ptr);
+    tcg_out_label(s, lab1, s->code_ptr);
 
     argreg = TCG_REG_R26;
     tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg);
@@ -1212,10 +1245,21 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
         tcg_abort();
     }
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call(s, qemu_st_helpers[opc]);
 
     /* label2: */
-    tcg_out_label(s, lab2, (tcg_target_long)s->code_ptr);
+    tcg_out_label(s, lab2, s->code_ptr);
 #else
     /* There are no indexed stores, so if GUEST_BASE is set we must do the add
        explicitly.  Careful to avoid R20, which is used for the bswaps to follow.  */
diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h
index 7f3c4cc3e5..d4bf6fe756 100644
--- a/tcg/hppa/tcg-target.h
+++ b/tcg/hppa/tcg-target.h
@@ -107,7 +107,9 @@ typedef enum {
 /* Note: must be synced with dyngen-exec.h */
 #define TCG_AREG0 TCG_REG_R17
 
-static inline void flush_icache_range(unsigned long start, unsigned long stop)
+
+static inline void flush_icache_range(tcg_target_ulong start,
+                                      tcg_target_ulong stop)
 {
     start &= ~31;
     while (start <= stop) {
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index dc81572c09..43a51a1c54 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -116,17 +116,7 @@ static inline int tcg_target_get_call_iarg_regs_count(int flags)
         return 6;
     }
 
-    flags &= TCG_CALL_TYPE_MASK;
-    switch(flags) {
-    case TCG_CALL_TYPE_STD:
-        return 0;
-    case TCG_CALL_TYPE_REGPARM_1:
-    case TCG_CALL_TYPE_REGPARM_2:
-    case TCG_CALL_TYPE_REGPARM:
-        return flags - TCG_CALL_TYPE_REGPARM_1 + 1;
-    default:
-        tcg_abort();
-    }
+    return 0;
 }
 
 /* parse target specific constraints */
@@ -188,6 +178,9 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
             tcg_regset_set32(ct->u.regs, 0, 0xffff);
             tcg_regset_reset_reg(ct->u.regs, TCG_REG_RSI);
             tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDI);
+#ifdef CONFIG_TCG_PASS_AREG0
+            tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDX);
+#endif
         } else {
             tcg_regset_set32(ct->u.regs, 0, 0xff);
             tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
@@ -875,7 +868,7 @@ static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
     default:
         tcg_abort();
     }
-    tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
+    tcg_out_label(s, label_next, s->code_ptr);
 }
 #endif
 
@@ -917,10 +910,10 @@ static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
 
         tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
         tcg_out_jxx(s, JCC_JMP, label_over, 1);
-        tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr);
+        tcg_out_label(s, label_true, s->code_ptr);
 
         tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
-        tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
+        tcg_out_label(s, label_over, s->code_ptr);
     } else {
         /* When the destination does not overlap one of the arguments,
            clear the destination first, jump if cond false, and emit an
@@ -934,7 +927,7 @@ static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
         tcg_out_brcond2(s, new_args, const_args+1, 1);
 
         tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
-        tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
+        tcg_out_label(s, label_over, s->code_ptr);
     }
 }
 #endif
@@ -967,6 +960,27 @@ static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void *qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void *qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -974,12 +988,15 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
     __stl_mmu,
     __stq_mmu,
 };
+#endif
 
 /* Perform the TLB load and compare.
 
@@ -1031,7 +1048,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
                 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
 
     tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0,
-                             offsetof(CPUState, tlb_table[mem_index][0])
+                             offsetof(CPUArchState, tlb_table[mem_index][0])
                              + which);
 
     /* cmp 0(r1), r0 */
@@ -1148,7 +1165,12 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     int data_reg, data_reg2 = 0;
     int addrlo_idx;
 #if defined(CONFIG_SOFTMMU)
-    int mem_index, s_bits, arg_idx;
+    int mem_index, s_bits;
+#if TCG_TARGET_REG_BITS == 64
+    int arg_idx;
+#else
+    int stack_adjust;
+#endif
     uint8_t *label_ptr[3];
 #endif
 
@@ -1184,16 +1206,48 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     }
 
     /* XXX: move that code at the end of the TB */
+#if TCG_TARGET_REG_BITS == 32
+    tcg_out_pushi(s, mem_index);
+    stack_adjust = 4;
+    if (TARGET_LONG_BITS == 64) {
+        tcg_out_push(s, args[addrlo_idx + 1]);
+        stack_adjust += 4;
+    }
+    tcg_out_push(s, args[addrlo_idx]);
+    stack_adjust += 4;
+#ifdef CONFIG_TCG_PASS_AREG0
+    tcg_out_push(s, TCG_AREG0);
+    stack_adjust += 4;
+#endif
+#else
     /* The first argument is already loaded with addrlo.  */
     arg_idx = 1;
-    if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
-        tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++],
-                    args[addrlo_idx + 1]);
-    }
     tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
                  mem_index);
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
+#endif
+
     tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
 
+#if TCG_TARGET_REG_BITS == 32
+    if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
+        /* Pop and discard.  This is 2 bytes smaller than the add.  */
+        tcg_out_pop(s, TCG_REG_ECX);
+    } else if (stack_adjust != 0) {
+        tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
+    }
+#endif
+
     switch(opc) {
     case 0 | 4:
         tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
@@ -1359,45 +1413,42 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     }
 
     /* XXX: move that code at the end of the TB */
-    if (TCG_TARGET_REG_BITS == 64) {
-        tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
-                    TCG_REG_RSI, data_reg);
-        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
-        stack_adjust = 0;
-    } else if (TARGET_LONG_BITS == 32) {
-        tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, data_reg);
-        if (opc == 3) {
-            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg2);
-            tcg_out_pushi(s, mem_index);
-            stack_adjust = 4;
-        } else {
-            tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index);
-            stack_adjust = 0;
-        }
-    } else {
-        if (opc == 3) {
-            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
-            tcg_out_pushi(s, mem_index);
-            tcg_out_push(s, data_reg2);
-            tcg_out_push(s, data_reg);
-            stack_adjust = 12;
-        } else {
-            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
-            switch(opc) {
-            case 0:
-                tcg_out_ext8u(s, TCG_REG_ECX, data_reg);
-                break;
-            case 1:
-                tcg_out_ext16u(s, TCG_REG_ECX, data_reg);
-                break;
-            case 2:
-                tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg);
-                break;
-            }
-            tcg_out_pushi(s, mem_index);
-            stack_adjust = 4;
-        }
+#if TCG_TARGET_REG_BITS == 32
+    tcg_out_pushi(s, mem_index);
+    stack_adjust = 4;
+    if (opc == 3) {
+        tcg_out_push(s, data_reg2);
+        stack_adjust += 4;
     }
+    tcg_out_push(s, data_reg);
+    stack_adjust += 4;
+    if (TARGET_LONG_BITS == 64) {
+        tcg_out_push(s, args[addrlo_idx + 1]);
+        stack_adjust += 4;
+    }
+    tcg_out_push(s, args[addrlo_idx]);
+    stack_adjust += 4;
+#ifdef CONFIG_TCG_PASS_AREG0
+    tcg_out_push(s, TCG_AREG0);
+    stack_adjust += 4;
+#endif
+#else
+    tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
+                TCG_REG_RSI, data_reg);
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
+    stack_adjust = 0;
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
+#endif
 
     tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
 
@@ -1962,9 +2013,15 @@ static void tcg_target_qemu_prologue(TCGContext *s)
         tcg_out_push(s, tcg_target_callee_save_regs[i]);
     }
 
-    tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
-
+#if TCG_TARGET_REG_BITS == 32
+    tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
+               (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
+    tcg_out_ld(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[1], TCG_REG_ESP,
+               (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4);
+#else
     tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
+#endif
+    tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
 
     /* jmp *tb.  */
     tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index adbb036905..c3cfe05f62 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -123,6 +123,7 @@ typedef enum {
 # define TCG_AREG0 TCG_REG_EBP
 #endif
 
-static inline void flush_icache_range(unsigned long start, unsigned long stop)
+static inline void flush_icache_range(tcg_target_ulong start,
+                                      tcg_target_ulong stop)
 {
 }
diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index e3de79fdb6..e02dacc84f 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1452,12 +1452,25 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
                                TCG_REG_P7, TCG_REG_R3, TCG_REG_R57));
 }
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
     __ldl_mmu,
     __ldq_mmu,
 };
+#endif
 
 static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
 {
@@ -1479,8 +1492,8 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
 
     /* Read the TLB entry */
     tcg_out_qemu_tlb(s, addr_reg, s_bits,
-                     offsetof(CPUState, tlb_table[mem_index][0].addr_read),
-                     offsetof(CPUState, tlb_table[mem_index][0].addend));
+                     offsetof(CPUArchState, tlb_table[mem_index][0].addr_read),
+                     offsetof(CPUArchState, tlb_table[mem_index][0].addend));
 
     /* P6 is the fast path, and P7 the slow path */
     tcg_out_bundle(s, mLX,
@@ -1517,6 +1530,15 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
     }
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     if (!bswap || s_bits == 0) {
         tcg_out_bundle(s, miB,
                        tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
@@ -1547,12 +1569,25 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
     }
 }
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
     __stl_mmu,
     __stq_mmu,
 };
+#endif
 
 static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
 {
@@ -1570,8 +1605,8 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
 #endif
 
     tcg_out_qemu_tlb(s, addr_reg, opc,
-                     offsetof(CPUState, tlb_table[mem_index][0].addr_write),
-                     offsetof(CPUState, tlb_table[mem_index][0].addend));
+                     offsetof(CPUArchState, tlb_table[mem_index][0].addr_write),
+                     offsetof(CPUArchState, tlb_table[mem_index][0].addend));
 
     /* P6 is the fast path, and P7 the slow path */
     tcg_out_bundle(s, mLX,
@@ -1622,6 +1657,17 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
         data_reg = TCG_REG_R2;
     }
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_bundle(s, miB,
                    tcg_opc_m4 (TCG_REG_P6, opc_st_m4[opc],
                                data_reg, TCG_REG_R3),
@@ -2368,6 +2414,6 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_R6);
 
     tcg_add_target_add_op_defs(ia64_op_defs);
-    tcg_set_frame(s, TCG_AREG0, offsetof(CPUState, temp_buf),
+    tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf),
                   CPU_TEMP_BUF_NLONGS * sizeof(long));
 }
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index c388089004..0631b9f180 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -146,7 +146,8 @@ typedef enum {
 /* Guest base is supported */
 #define TCG_TARGET_HAS_GUEST_BASE
 
-static inline void flush_icache_range(unsigned long start, unsigned long stop)
+static inline void flush_icache_range(tcg_target_ulong start,
+                                      tcg_target_ulong stop)
 {
     start = start & ~(32UL - 1UL);
     stop = (stop + (32UL - 1UL)) & ~(32UL - 1UL);
diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index c5c32825f0..393ba07f25 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -750,6 +750,27 @@ static void tcg_out_setcond2(TCGContext *s, TCGCond cond, int ret,
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -757,6 +778,8 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -764,6 +787,7 @@ static void *qemu_st_helpers[4] = {
     __stq_mmu,
 };
 #endif
+#endif
 
 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                             int opc)
@@ -827,7 +851,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
     tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_A0, TCG_REG_A0, TCG_AREG0);
     tcg_out_opc_imm(s, OPC_LW, TCG_REG_AT, TCG_REG_A0,
-                    offsetof(CPUState, tlb_table[mem_index][0].addr_read) + addr_meml);
+                    offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + addr_meml);
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T0, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
     tcg_out_opc_reg(s, OPC_AND, TCG_REG_T0, TCG_REG_T0, addr_regl);
 
@@ -837,7 +861,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     tcg_out_nop(s);
 
     tcg_out_opc_imm(s, OPC_LW, TCG_REG_AT, TCG_REG_A0,
-                    offsetof(CPUState, tlb_table[mem_index][0].addr_read) + addr_memh);
+                    offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) + addr_memh);
 
     label1_ptr = s->code_ptr;
     tcg_out_opc_br(s, OPC_BEQ, addr_regh, TCG_REG_AT);
@@ -858,6 +882,15 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
 # endif
     tcg_out_movi(s, TCG_TYPE_I32, sp_args++, mem_index);
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T9, (tcg_target_long)qemu_ld_helpers[s_bits]);
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal and incorrect for 64 on 32 bit */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0);
     tcg_out_nop(s);
 
@@ -893,7 +926,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     reloc_pc16(label1_ptr, (tcg_target_long) s->code_ptr);
 
     tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0,
-                    offsetof(CPUState, tlb_table[mem_index][0].addend));
+                    offsetof(CPUArchState, tlb_table[mem_index][0].addend));
     tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_V0, TCG_REG_A0, addr_regl);
 #else
     if (GUEST_BASE == (int16_t)GUEST_BASE) {
@@ -1013,7 +1046,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
     tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_A0, TCG_REG_A0, TCG_AREG0);
     tcg_out_opc_imm(s, OPC_LW, TCG_REG_AT, TCG_REG_A0,
-                    offsetof(CPUState, tlb_table[mem_index][0].addr_write) + addr_meml);
+                    offsetof(CPUArchState, tlb_table[mem_index][0].addr_write) + addr_meml);
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T0, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
     tcg_out_opc_reg(s, OPC_AND, TCG_REG_T0, TCG_REG_T0, addr_regl);
 
@@ -1023,7 +1056,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     tcg_out_nop(s);
 
     tcg_out_opc_imm(s, OPC_LW, TCG_REG_AT, TCG_REG_A0,
-                    offsetof(CPUState, tlb_table[mem_index][0].addr_write) + addr_memh);
+                    offsetof(CPUArchState, tlb_table[mem_index][0].addr_write) + addr_memh);
 
     label1_ptr = s->code_ptr;
     tcg_out_opc_br(s, OPC_BEQ, addr_regh, TCG_REG_AT);
@@ -1069,6 +1102,17 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     }
 
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T9, (tcg_target_long)qemu_st_helpers[s_bits]);
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal and incorrect for 64 on 32 bit */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0);
     tcg_out_nop(s);
 
@@ -1080,7 +1124,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     reloc_pc16(label1_ptr, (tcg_target_long) s->code_ptr);
 
     tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0,
-                    offsetof(CPUState, tlb_table[mem_index][0].addend));
+                    offsetof(CPUArchState, tlb_table[mem_index][0].addend));
     tcg_out_opc_reg(s, OPC_ADDU, TCG_REG_A0, TCG_REG_A0, addr_regl);
 #else
     if (GUEST_BASE == (int16_t)GUEST_BASE) {
@@ -1529,6 +1573,6 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);   /* stack pointer */
 
     tcg_add_target_add_op_defs(mips_op_defs);
-    tcg_set_frame(s, TCG_AREG0, offsetof(CPUState, temp_buf),
+    tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf),
                   CPU_TEMP_BUF_NLONGS * sizeof(long));
 }
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index 477bc38b01..d3c804d9a4 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -108,7 +108,8 @@ typedef enum {
 #include <sys/cachectl.h>
 #endif
 
-static inline void flush_icache_range(unsigned long start, unsigned long stop)
+static inline void flush_icache_range(tcg_target_ulong start,
+                                      tcg_target_ulong stop)
 {
     cacheflush ((void *)start, stop-start, ICACHE);
 }
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index f5d9bf3b00..b0aa914798 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -508,6 +508,27 @@ static void tcg_out_call (TCGContext *s, tcg_target_long arg, int const_arg)
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -515,6 +536,8 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };
 
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -522,6 +545,7 @@ static void *qemu_st_helpers[4] = {
     __stq_mmu,
 };
 #endif
+#endif
 
 static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
 {
@@ -564,7 +588,7 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
     tcg_out32 (s, (LWZU
                    | RT (r1)
                    | RA (r0)
-                   | offsetof (CPUState, tlb_table[mem_index][0].addr_read)
+                   | offsetof (CPUArchState, tlb_table[mem_index][0].addr_read)
                    )
         );
     tcg_out32 (s, (RLWINM
@@ -598,6 +622,16 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
     tcg_out_movi (s, TCG_TYPE_I32, 5, mem_index);
 #endif
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
+
     tcg_out_call (s, (tcg_target_long) qemu_ld_helpers[s_bits], 1);
     switch (opc) {
     case 0|4:
@@ -760,7 +794,7 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
     tcg_out32 (s, (LWZU
                    | RT (r1)
                    | RA (r0)
-                   | offsetof (CPUState, tlb_table[mem_index][0].addr_write)
+                   | offsetof (CPUArchState, tlb_table[mem_index][0].addr_write)
                    )
         );
     tcg_out32 (s, (RLWINM
@@ -829,6 +863,17 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
     ir++;
 
     tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call (s, (tcg_target_long) qemu_st_helpers[opc], 1);
     label2_ptr = s->code_ptr;
     tcg_out32 (s, B);
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 3f22aaac9d..2f37fd289b 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -98,5 +98,5 @@ typedef enum {
 #define TCG_TARGET_HAS_GUEST_BASE
 
 #define tcg_qemu_tb_exec(env, tb_ptr) \
-    ((long REGPARM __attribute__ ((longcall)) \
+    ((long __attribute__ ((longcall)) \
       (*)(void *, void *))code_gen_prologue)(env, tb_ptr)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 44193784f2..409a1ac1ce 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -552,6 +552,27 @@ static void tcg_out_ldsta (TCGContext *s, int ret, int addr,
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -559,12 +580,15 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
     __stl_mmu,
     __stq_mmu,
 };
+#endif
 
 static void tcg_out_tlb_read (TCGContext *s, int r0, int r1, int r2,
                               int addr_reg, int s_bits, int offset)
@@ -635,7 +659,7 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
     rbase = 0;
 
     tcg_out_tlb_read (s, r0, r1, r2, addr_reg, s_bits,
-                      offsetof (CPUState, tlb_table[mem_index][0].addr_read));
+                      offsetof (CPUArchState, tlb_table[mem_index][0].addr_read));
 
     tcg_out32 (s, CMP | BF (7) | RA (r2) | RB (r1) | CMP_L);
 
@@ -648,6 +672,15 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
     tcg_out_mov (s, TCG_TYPE_I64, 3, addr_reg);
     tcg_out_movi (s, TCG_TYPE_I64, 4, mem_index);
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call (s, (tcg_target_long) qemu_ld_helpers[s_bits], 1);
 
     switch (opc) {
@@ -782,7 +815,7 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
     rbase = 0;
 
     tcg_out_tlb_read (s, r0, r1, r2, addr_reg, opc,
-                      offsetof (CPUState, tlb_table[mem_index][0].addr_write));
+                      offsetof (CPUArchState, tlb_table[mem_index][0].addr_write));
 
     tcg_out32 (s, CMP | BF (7) | RA (r2) | RB (r1) | CMP_L);
 
@@ -796,6 +829,17 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
     tcg_out_rld (s, RLDICL, 4, data_reg, 0, 64 - (1 << (3 + opc)));
     tcg_out_movi (s, TCG_TYPE_I64, 5, mem_index);
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     tcg_out_call (s, (tcg_target_long) qemu_st_helpers[opc], 1);
 
     label2_ptr = s->code_ptr;
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index 9317fe88ef..04662c15fd 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -301,6 +301,27 @@ static const uint8_t tcg_cond_to_ltr_cond[10] = {
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static void *qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -308,6 +329,8 @@ static void *qemu_ld_helpers[4] = {
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static void *qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -315,6 +338,7 @@ static void *qemu_st_helpers[4] = {
     __stq_mmu,
 };
 #endif
+#endif
 
 static uint8_t *tb_ret_addr;
 
@@ -1439,9 +1463,9 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
     tgen64_andi_tmp(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
 
     if (is_store) {
-        ofs = offsetof(CPUState, tlb_table[mem_index][0].addr_write);
+        ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
     } else {
-        ofs = offsetof(CPUState, tlb_table[mem_index][0].addr_read);
+        ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read);
     }
     assert(ofs < 0x80000);
 
@@ -1483,9 +1507,29 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
             tcg_abort();
         }
         tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, mem_index);
+#ifdef CONFIG_TCG_PASS_AREG0
+        /* XXX/FIXME: suboptimal */
+        tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+                    tcg_target_call_iarg_regs[1]);
+        tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                    tcg_target_call_iarg_regs[0]);
+        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                    TCG_AREG0);
+#endif
         tgen_calli(s, (tcg_target_ulong)qemu_st_helpers[s_bits]);
     } else {
         tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index);
+#ifdef CONFIG_TCG_PASS_AREG0
+        /* XXX/FIXME: suboptimal */
+        tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                    tcg_target_call_iarg_regs[2]);
+        tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                    tcg_target_call_iarg_regs[1]);
+        tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                    tcg_target_call_iarg_regs[0]);
+        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                    TCG_AREG0);
+#endif
         tgen_calli(s, (tcg_target_ulong)qemu_ld_helpers[s_bits]);
 
         /* sign extension */
@@ -1515,7 +1559,7 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
     *(label1_ptr + 1) = ((unsigned long)s->code_ptr -
                          (unsigned long)label1_ptr) >> 1;
 
-    ofs = offsetof(CPUState, tlb_table[mem_index][0].addend);
+    ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
     assert(ofs < 0x80000);
 
     tcg_out_mem(s, 0, RXY_AG, arg0, arg1, TCG_AREG0, ofs);
@@ -2293,7 +2337,7 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
 
     tcg_add_target_add_op_defs(s390_op_defs);
-    tcg_set_frame(s, TCG_AREG0, offsetof(CPUState, temp_buf),
+    tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf),
                   CPU_TEMP_BUF_NLONGS * sizeof(long));
 }
 
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index e4cd6418a9..d12f90b705 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -100,6 +100,7 @@ enum {
     TCG_AREG0 = TCG_REG_R10,
 };
 
-static inline void flush_icache_range(unsigned long start, unsigned long stop)
+static inline void flush_icache_range(tcg_target_ulong start,
+                                      tcg_target_ulong stop)
 {
 }
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 5cd5a3b6f6..491c9797d3 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -59,6 +59,12 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 };
 #endif
 
+#ifdef CONFIG_TCG_PASS_AREG0
+#define ARG_OFFSET 1
+#else
+#define ARG_OFFSET 0
+#endif
+
 static const int tcg_target_reg_alloc_order[] = {
     TCG_REG_L0,
     TCG_REG_L1,
@@ -86,9 +92,9 @@ static const int tcg_target_call_iarg_regs[6] = {
 
 static const int tcg_target_call_oarg_regs[] = {
     TCG_REG_O0,
-#if TCG_TARGET_REG_BITS == 32
-    TCG_REG_O1
-#endif
+    TCG_REG_O1,
+    TCG_REG_O2,
+    TCG_REG_O3,
 };
 
 static inline int check_fit_tl(tcg_target_long val, unsigned int bits)
@@ -155,6 +161,9 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
+#ifdef CONFIG_TCG_PASS_AREG0
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O3);
+#endif
         break;
     case 'I':
         ct->ct |= TCG_CT_CONST_S11;
@@ -582,7 +591,7 @@ static void tcg_out_brcond2_i32(TCGContext *s, TCGCond cond,
     }
     tcg_out_nop(s);
 
-    tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
+    tcg_out_label(s, label_next, s->code_ptr);
 }
 #endif
 
@@ -628,7 +637,7 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGArg ret,
         tcg_out_branch_i32(s, INSN_COND(tcg_cond_to_bcond[cond], 1), t);
         tcg_out_movi_imm13(s, ret, 1);
         tcg_out_movi_imm13(s, ret, 0);
-        tcg_out_label(s, t, (tcg_target_long)s->code_ptr);
+        tcg_out_label(s, t, s->code_ptr);
 #endif
         return;
     }
@@ -683,7 +692,7 @@ static void tcg_out_setcond2_i32(TCGContext *s, TCGCond cond, TCGArg ret,
 
         tcg_out_setcond_i32(s, tcg_unsigned_cond(cond), ret, al, bl, blconst);
 
-        tcg_out_label(s, lab, (tcg_target_long)s->code_ptr);
+        tcg_out_label(s, lab, s->code_ptr);
         break;
     }
 }
@@ -706,6 +715,27 @@ static void tcg_target_qemu_prologue(TCGContext *s)
 
 #include "../../softmmu_defs.h"
 
+#ifdef CONFIG_TCG_PASS_AREG0
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+   int mmu_idx) */
+static const void * const qemu_ld_helpers[4] = {
+    helper_ldb_mmu,
+    helper_ldw_mmu,
+    helper_ldl_mmu,
+    helper_ldq_mmu,
+};
+
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+   uintxx_t val, int mmu_idx) */
+static const void * const qemu_st_helpers[4] = {
+    helper_stb_mmu,
+    helper_stw_mmu,
+    helper_stl_mmu,
+    helper_stq_mmu,
+};
+#else
+/* legacy helper signature: __ld_mmu(target_ulong addr, int
+   mmu_idx) */
 static const void * const qemu_ld_helpers[4] = {
     __ldb_mmu,
     __ldw_mmu,
@@ -713,6 +743,8 @@ static const void * const qemu_ld_helpers[4] = {
     __ldq_mmu,
 };
 
+/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
+   int mmu_idx) */
 static const void * const qemu_st_helpers[4] = {
     __stb_mmu,
     __stw_mmu,
@@ -720,6 +752,7 @@ static const void * const qemu_st_helpers[4] = {
     __stq_mmu,
 };
 #endif
+#endif
 
 #if TARGET_LONG_BITS == 32
 #define TARGET_LD_OP LDUW
@@ -776,7 +809,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     tcg_out_andi(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
 
     /* add arg1, x, arg1 */
-    tcg_out_addi(s, arg1, offsetof(CPUState,
+    tcg_out_addi(s, arg1, offsetof(CPUArchState,
                                    tlb_table[mem_index][0].addr_read));
 
     /* add env, arg1, arg1 */
@@ -801,6 +834,17 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
 
     /* mov */
     tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index);
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
 
     /* XXX: move that code at the end of the TB */
     /* qemu_ld_helper[s_bits](arg0, arg1) */
@@ -988,7 +1032,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     tcg_out_andi(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
 
     /* add arg1, x, arg1 */
-    tcg_out_addi(s, arg1, offsetof(CPUState,
+    tcg_out_addi(s, arg1, offsetof(CPUArchState,
                                    tlb_table[mem_index][0].addr_write));
 
     /* add env, arg1, arg1 */
@@ -1017,6 +1061,17 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     /* mov */
     tcg_out_movi(s, TCG_TYPE_I32, arg2, mem_index);
 
+#ifdef CONFIG_TCG_PASS_AREG0
+    /* XXX/FIXME: suboptimal */
+    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
+                tcg_target_call_iarg_regs[2]);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
+                tcg_target_call_iarg_regs[1]);
+    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+                tcg_target_call_iarg_regs[0]);
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+                TCG_AREG0);
+#endif
     /* XXX: move that code at the end of the TB */
     /* qemu_st_helper[s_bits](arg0, arg1, arg2) */
     tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_st_helpers[s_bits]
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index c3fe131119..ee2274d595 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -134,7 +134,8 @@ typedef enum {
 #define TCG_AREG0 TCG_REG_G6
 #endif
 
-static inline void flush_icache_range(unsigned long start, unsigned long stop)
+static inline void flush_icache_range(tcg_target_ulong start,
+                                      tcg_target_ulong stop)
 {
     unsigned long p;
 
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 351a0a3033..ccfcd1abe1 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -128,11 +128,11 @@ static void tcg_out_reloc(TCGContext *s, uint8_t *code_ptr, int type,
     }
 }
 
-static void tcg_out_label(TCGContext *s, int label_index, 
-                          tcg_target_long value)
+static void tcg_out_label(TCGContext *s, int label_index, void *ptr)
 {
     TCGLabel *l;
     TCGRelocation *r;
+    tcg_target_long value = (tcg_target_long)ptr;
 
     l = &s->labels[label_index];
     if (l->has_value)
@@ -173,11 +173,9 @@ void *tcg_malloc_internal(TCGContext *s, int size)
         /* big malloc: insert a new pool (XXX: could optimize) */
         p = g_malloc(sizeof(TCGPool) + size);
         p->size = size;
-        if (s->pool_current)
-            s->pool_current->next = p;
-        else
-            s->pool_first = p;
-        p->next = s->pool_current;
+        p->next = s->pool_first_large;
+        s->pool_first_large = p;
+        return p->data;
     } else {
         p = s->pool_current;
         if (!p) {
@@ -208,6 +206,12 @@ void *tcg_malloc_internal(TCGContext *s, int size)
 
 void tcg_pool_reset(TCGContext *s)
 {
+    TCGPool *p, *t;
+    for (p = s->pool_first_large; p; p = t) {
+        t = p->next;
+        g_free(p);
+    }
+    s->pool_first_large = NULL;
     s->pool_cur = s->pool_end = NULL;
     s->pool_current = NULL;
 }
@@ -253,8 +257,8 @@ void tcg_prologue_init(TCGContext *s)
     s->code_buf = code_gen_prologue;
     s->code_ptr = s->code_buf;
     tcg_target_qemu_prologue(s);
-    flush_icache_range((unsigned long)s->code_buf, 
-                       (unsigned long)s->code_ptr);
+    flush_icache_range((tcg_target_ulong)s->code_buf,
+                       (tcg_target_ulong)s->code_ptr);
 }
 
 void tcg_set_frame(TCGContext *s, int reg,
@@ -590,9 +594,6 @@ void tcg_register_helper(void *func, const char *name)
 void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags,
                    int sizemask, TCGArg ret, int nargs, TCGArg *args)
 {
-#if defined(TCG_TARGET_I386) && TCG_TARGET_REG_BITS < 64
-    int call_type;
-#endif
     int i;
     int real_args;
     int nb_rets;
@@ -617,9 +618,6 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags,
 
     *gen_opc_ptr++ = INDEX_op_call;
     nparam = gen_opparam_ptr++;
-#if defined(TCG_TARGET_I386) && TCG_TARGET_REG_BITS < 64
-    call_type = (flags & TCG_CALL_TYPE_MASK);
-#endif
     if (ret != TCG_CALL_DUMMY_ARG) {
 #if TCG_TARGET_REG_BITS < 64
         if (sizemask & 1) {
@@ -645,14 +643,6 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags,
 #if TCG_TARGET_REG_BITS < 64
         int is_64bit = sizemask & (1 << (i+1)*2);
         if (is_64bit) {
-#ifdef TCG_TARGET_I386
-            /* REGPARM case: if the third parameter is 64 bit, it is
-               allocated on the stack */
-            if (i == 2 && call_type == TCG_CALL_TYPE_REGPARM) {
-                call_type = TCG_CALL_TYPE_REGPARM_2;
-                flags = (flags & ~TCG_CALL_TYPE_MASK) | call_type;
-            }
-#endif
 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
             /* some targets want aligned 64 bit args */
             if (real_args & 1) {
@@ -2123,7 +2113,7 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
             break;
         case INDEX_op_set_label:
             tcg_reg_alloc_bb_end(s, s->reserved_regs);
-            tcg_out_label(s, args[0], (long)s->code_ptr);
+            tcg_out_label(s, args[0], s->code_ptr);
             break;
         case INDEX_op_call:
             dead_args = s->op_dead_args[op_index];
@@ -2176,8 +2166,9 @@ int tcg_gen_code(TCGContext *s, uint8_t *gen_code_buf)
     tcg_gen_code_common(s, gen_code_buf, -1);
 
     /* flush instruction cache */
-    flush_icache_range((unsigned long)gen_code_buf, 
-                       (unsigned long)s->code_ptr);
+    flush_icache_range((tcg_target_ulong)gen_code_buf,
+                       (tcg_target_ulong)s->code_ptr);
+
     return s->code_ptr -  gen_code_buf;
 }
 
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 5c28239682..5f6c647ea5 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -32,9 +32,6 @@
 # error Unknown pointer size for tcg target
 #endif
 
-#include "tcg-target.h"
-#include "tcg-runtime.h"
-
 #if TCG_TARGET_REG_BITS == 32
 typedef int32_t tcg_target_long;
 typedef uint32_t tcg_target_ulong;
@@ -49,6 +46,9 @@ typedef uint64_t tcg_target_ulong;
 #error unsupported
 #endif
 
+#include "tcg-target.h"
+#include "tcg-runtime.h"
+
 #if TCG_TARGET_NB_REGS <= 32
 typedef uint32_t TCGRegSet;
 #elif TCG_TARGET_NB_REGS <= 64
@@ -252,11 +252,6 @@ typedef int TCGv_i64;
 #define TCGV_UNUSED_I64(x) x = MAKE_TCGV_I64(-1)
 
 /* call flags */
-#define TCG_CALL_TYPE_MASK      0x000f
-#define TCG_CALL_TYPE_STD       0x0000 /* standard C call */
-#define TCG_CALL_TYPE_REGPARM_1 0x0001 /* i386 style regparm call (1 reg) */
-#define TCG_CALL_TYPE_REGPARM_2 0x0002 /* i386 style regparm call (2 regs) */
-#define TCG_CALL_TYPE_REGPARM   0x0003 /* i386 style regparm call (3 regs) */
 /* A pure function only reads its arguments and TCG global variables
    and cannot raise exceptions. Hence a call to a pure function can be
    safely suppressed if the return value is not used. */
@@ -337,7 +332,7 @@ typedef struct TCGContext TCGContext;
 
 struct TCGContext {
     uint8_t *pool_cur, *pool_end;
-    TCGPool *pool_first, *pool_current;
+    TCGPool *pool_first, *pool_current, *pool_first_large;
     TCGLabel *labels;
     int nb_labels;
     TCGTemp *temps; /* globals first, temps after */
@@ -589,5 +584,5 @@ extern uint8_t code_gen_prologue[];
 /* TCG targets may use a different definition of tcg_qemu_tb_exec. */
 #if !defined(tcg_qemu_tb_exec)
 # define tcg_qemu_tb_exec(env, tb_ptr) \
-    ((long REGPARM (*)(void *, void *))code_gen_prologue)(env, tb_ptr)
+    ((tcg_target_ulong (*)(void *, void *))code_gen_prologue)(env, tb_ptr)
 #endif
diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c
index fc0880cec5..453f1875e2 100644
--- a/tcg/tci/tcg-target.c
+++ b/tcg/tci/tcg-target.c
@@ -798,6 +798,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
     case INDEX_op_qemu_st8:
     case INDEX_op_qemu_st16:
     case INDEX_op_qemu_st32:
+#ifdef CONFIG_TCG_PASS_AREG0
+        tcg_out_r(s, TCG_AREG0);
+#endif
         tcg_out_r(s, *args++);
         tcg_out_r(s, *args++);
 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
@@ -808,6 +811,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
 #endif
         break;
     case INDEX_op_qemu_st64:
+#ifdef CONFIG_TCG_PASS_AREG0
+        tcg_out_r(s, TCG_AREG0);
+#endif
         tcg_out_r(s, *args++);
 #if TCG_TARGET_REG_BITS == 32
         tcg_out_r(s, *args++);
@@ -891,7 +897,7 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_clear(s->reserved_regs);
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
     tcg_add_target_add_op_defs(tcg_target_op_defs);
-    tcg_set_frame(s, TCG_AREG0, offsetof(CPUState, temp_buf),
+    tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf),
                   CPU_TEMP_BUF_NLONGS * sizeof(long));
 }
 
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 03e0fd1a7f..30a0f21596 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -154,10 +154,11 @@ typedef enum {
 
 void tci_disas(uint8_t opc);
 
-unsigned long tcg_qemu_tb_exec(CPUState *env, uint8_t *tb_ptr);
+tcg_target_ulong tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr);
 #define tcg_qemu_tb_exec tcg_qemu_tb_exec
 
-static inline void flush_icache_range(unsigned long start, unsigned long stop)
+static inline void flush_icache_range(tcg_target_ulong start,
+                                      tcg_target_ulong stop)
 {
 }