summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--include/exec/exec-all.h4
-rw-r--r--tcg/aarch64/tcg-target.c141
-rw-r--r--tcg/aarch64/tcg-target.h1
-rw-r--r--tcg/arm/tcg-target.c174
-rw-r--r--tcg/arm/tcg-target.h1
-rw-r--r--tcg/i386/tcg-target.c82
-rw-r--r--tcg/i386/tcg-target.h2
-rw-r--r--tcg/ia64/tcg-target.c255
-rw-r--r--tcg/ia64/tcg-target.h6
-rw-r--r--tcg/mips/tcg-target.c145
-rw-r--r--tcg/mips/tcg-target.h1
-rw-r--r--tcg/optimize.c75
-rw-r--r--tcg/ppc/tcg-target.c202
-rw-r--r--tcg/ppc/tcg-target.h1
-rw-r--r--tcg/ppc64/tcg-target.c170
-rw-r--r--tcg/ppc64/tcg-target.h1
-rw-r--r--tcg/s390/tcg-target.c124
-rw-r--r--tcg/s390/tcg-target.h2
-rw-r--r--tcg/sparc/tcg-target.c161
-rw-r--r--tcg/sparc/tcg-target.h1
-rw-r--r--tcg/tcg-be-ldst.h4
-rw-r--r--tcg/tcg-op.h16
-rw-r--r--tcg/tcg-opc.h10
-rw-r--r--tcg/tcg.c257
-rw-r--r--tcg/tcg.h85
-rw-r--r--tcg/tci/tcg-target.c50
-rw-r--r--tcg/tci/tcg-target.h1
-rw-r--r--translate-all.c13
28 files changed, 829 insertions, 1156 deletions
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index f9ac332f9d..0766e24f8e 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -145,7 +145,7 @@ struct TranslationBlock {
 #define CF_COUNT_MASK  0x7fff
 #define CF_LAST_IO     0x8000 /* Last insn may be an IO access.  */
 
-    uint8_t *tc_ptr;    /* pointer to the translated code */
+    void *tc_ptr;    /* pointer to the translated code */
     /* next matching tb for physical address. */
     struct TranslationBlock *phys_hash_next;
     /* first and second physical page containing code. The lower bit
@@ -229,7 +229,7 @@ void ppc_tb_set_jmp_target(unsigned long jmp_addr, unsigned long addr);
 static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
 {
     /* patch the branch destination */
-    *(uint32_t *)jmp_addr = addr - (jmp_addr + 4);
+    stl_p((void*)jmp_addr, addr - (jmp_addr + 4));
     /* no need to flush icache explicitly */
 }
 #elif defined(__aarch64__)
diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 0a580b65ea..77bb6d97de 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -63,40 +63,34 @@ static const int tcg_target_call_oarg_regs[1] = {
 # endif
 #endif
 
-static inline void reloc_pc26(void *code_ptr, intptr_t target)
+static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
 {
-    intptr_t offset = (target - (intptr_t)code_ptr) / 4;
+    ptrdiff_t offset = target - code_ptr;
+    assert(offset == sextract64(offset, 0, 26));
     /* read instruction, mask away previous PC_REL26 parameter contents,
        set the proper offset, then write back the instruction. */
-    uint32_t insn = *(uint32_t *)code_ptr;
-    insn = deposit32(insn, 0, 26, offset);
-    *(uint32_t *)code_ptr = insn;
+    *code_ptr = deposit32(*code_ptr, 0, 26, offset);
 }
 
-static inline void reloc_pc19(void *code_ptr, intptr_t target)
+static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
 {
-    intptr_t offset = (target - (intptr_t)code_ptr) / 4;
-    /* read instruction, mask away previous PC_REL19 parameter contents,
-       set the proper offset, then write back the instruction. */
-    uint32_t insn = *(uint32_t *)code_ptr;
-    insn = deposit32(insn, 5, 19, offset);
-    *(uint32_t *)code_ptr = insn;
+    ptrdiff_t offset = target - code_ptr;
+    assert(offset == sextract64(offset, 0, 19));
+    *code_ptr = deposit32(*code_ptr, 5, 19, offset);
 }
 
-static inline void patch_reloc(uint8_t *code_ptr, int type,
+static inline void patch_reloc(tcg_insn_unit *code_ptr, int type,
                                intptr_t value, intptr_t addend)
 {
-    value += addend;
-
+    assert(addend == 0);
     switch (type) {
     case R_AARCH64_JUMP26:
     case R_AARCH64_CALL26:
-        reloc_pc26(code_ptr, value);
+        reloc_pc26(code_ptr, (tcg_insn_unit *)value);
         break;
     case R_AARCH64_CONDBR19:
-        reloc_pc19(code_ptr, value);
+        reloc_pc19(code_ptr, (tcg_insn_unit *)value);
         break;
-
     default:
         tcg_abort();
     }
@@ -794,15 +788,10 @@ static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
     }
 }
 
-static inline void tcg_out_goto(TCGContext *s, intptr_t target)
+static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
 {
-    intptr_t offset = (target - (intptr_t)s->code_ptr) / 4;
-
-    if (offset < -0x02000000 || offset >= 0x02000000) {
-        /* out of 26bit range */
-        tcg_abort();
-    }
-
+    ptrdiff_t offset = target - s->code_ptr;
+    assert(offset == sextract64(offset, 0, 26));
     tcg_out_insn(s, 3206, B, offset);
 }
 
@@ -828,29 +817,23 @@ static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
     tcg_out_insn(s, 3207, BLR, reg);
 }
 
-static inline void tcg_out_call(TCGContext *s, intptr_t target)
+static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
 {
-    intptr_t offset = (target - (intptr_t)s->code_ptr) / 4;
-
-    if (offset < -0x02000000 || offset >= 0x02000000) { /* out of 26bit rng */
-        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, target);
-        tcg_out_callr(s, TCG_REG_TMP);
-    } else {
+    ptrdiff_t offset = target - s->code_ptr;
+    if (offset == sextract64(offset, 0, 26)) {
         tcg_out_insn(s, 3206, BL, offset);
+    } else {
+        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
+        tcg_out_callr(s, TCG_REG_TMP);
     }
 }
 
 void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
 {
-    intptr_t target = addr;
-    intptr_t offset = (target - (intptr_t)jmp_addr) / 4;
+    tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr;
+    tcg_insn_unit *target = (tcg_insn_unit *)addr;
 
-    if (offset < -0x02000000 || offset >= 0x02000000) {
-        /* out of 26bit range */
-        tcg_abort();
-    }
-
-    patch_reloc((uint8_t *)jmp_addr, R_AARCH64_JUMP26, target, 0);
+    reloc_pc26(code_ptr, target);
     flush_icache_range(jmp_addr, jmp_addr + 4);
 }
 
@@ -862,7 +845,7 @@ static inline void tcg_out_goto_label(TCGContext *s, int label_index)
         tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, label_index, 0);
         tcg_out_goto_noaddr(s);
     } else {
-        tcg_out_goto(s, l->u.value);
+        tcg_out_goto(s, l->u.value_ptr);
     }
 }
 
@@ -884,9 +867,8 @@ static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a,
         tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, label, 0);
         offset = tcg_in32(s) >> 5;
     } else {
-        offset = l->u.value - (uintptr_t)s->code_ptr;
-        offset >>= 2;
-        assert(offset >= -0x40000 && offset < 0x40000);
+        offset = l->u.value_ptr - s->code_ptr;
+        assert(offset == sextract64(offset, 0, 19));
     }
 
     if (need_cmp) {
@@ -982,7 +964,7 @@ static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
  *                                     int mmu_idx, uintptr_t ra)
  */
-static const void * const qemu_ld_helpers[16] = {
+static void * const qemu_ld_helpers[16] = {
     [MO_UB]   = helper_ret_ldub_mmu,
     [MO_LEUW] = helper_le_lduw_mmu,
     [MO_LEUL] = helper_le_ldul_mmu,
@@ -995,7 +977,7 @@ static const void * const qemu_ld_helpers[16] = {
 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
  *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
  */
-static const void * const qemu_st_helpers[16] = {
+static void * const qemu_st_helpers[16] = {
     [MO_UB]   = helper_ret_stb_mmu,
     [MO_LEUW] = helper_le_stw_mmu,
     [MO_LEUL] = helper_le_stl_mmu,
@@ -1005,11 +987,11 @@ static const void * const qemu_st_helpers[16] = {
     [MO_BEQ]  = helper_be_stq_mmu,
 };
 
-static inline void tcg_out_adr(TCGContext *s, TCGReg rd, uintptr_t addr)
+static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
 {
-    addr -= (uintptr_t)s->code_ptr;
-    assert(addr == sextract64(addr, 0, 21));
-    tcg_out_insn(s, 3406, ADR, rd, addr);
+    ptrdiff_t offset = tcg_pcrel_diff(s, target);
+    assert(offset == sextract64(offset, 0, 21));
+    tcg_out_insn(s, 3406, ADR, rd, offset);
 }
 
 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
@@ -1017,20 +999,20 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
     TCGMemOp opc = lb->opc;
     TCGMemOp size = opc & MO_SIZE;
 
-    reloc_pc19(lb->label_ptr[0], (intptr_t)s->code_ptr);
+    reloc_pc19(lb->label_ptr[0], s->code_ptr);
 
     tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_X0, TCG_AREG0);
     tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, lb->mem_index);
-    tcg_out_adr(s, TCG_REG_X3, (intptr_t)lb->raddr);
-    tcg_out_call(s, (intptr_t)qemu_ld_helpers[opc & ~MO_SIGN]);
+    tcg_out_adr(s, TCG_REG_X3, lb->raddr);
+    tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]);
     if (opc & MO_SIGN) {
         tcg_out_sxt(s, TCG_TYPE_I64, size, lb->datalo_reg, TCG_REG_X0);
     } else {
         tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
     }
 
-    tcg_out_goto(s, (intptr_t)lb->raddr);
+    tcg_out_goto(s, lb->raddr);
 }
 
 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
@@ -1038,21 +1020,21 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
     TCGMemOp opc = lb->opc;
     TCGMemOp size = opc & MO_SIZE;
 
-    reloc_pc19(lb->label_ptr[0], (intptr_t)s->code_ptr);
+    reloc_pc19(lb->label_ptr[0], s->code_ptr);
 
     tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_X0, TCG_AREG0);
     tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
     tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, lb->mem_index);
-    tcg_out_adr(s, TCG_REG_X4, (intptr_t)lb->raddr);
-    tcg_out_call(s, (intptr_t)qemu_st_helpers[opc]);
-    tcg_out_goto(s, (intptr_t)lb->raddr);
+    tcg_out_adr(s, TCG_REG_X4, lb->raddr);
+    tcg_out_call(s, qemu_st_helpers[opc]);
+    tcg_out_goto(s, lb->raddr);
 }
 
 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc,
                                 TCGReg data_reg, TCGReg addr_reg,
-                                int mem_index,
-                                uint8_t *raddr, uint8_t *label_ptr)
+                                int mem_index, tcg_insn_unit *raddr,
+                                tcg_insn_unit *label_ptr)
 {
     TCGLabelQemuLdst *label = new_ldst_label(s);
 
@@ -1070,7 +1052,8 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc,
    the slow path. Generated code returns the host addend in X1,
    clobbers X0,X2,X3,TMP. */
 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp s_bits,
-                             uint8_t **label_ptr, int mem_index, bool is_read)
+                             tcg_insn_unit **label_ptr, int mem_index,
+                             bool is_read)
 {
     TCGReg base = TCG_AREG0;
     int tlb_offset = is_read ?
@@ -1218,7 +1201,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
 {
 #ifdef CONFIG_SOFTMMU
     TCGMemOp s_bits = memop & MO_SIZE;
-    uint8_t *label_ptr;
+    tcg_insn_unit *label_ptr;
 
     tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 1);
     tcg_out_qemu_ld_direct(s, memop, data_reg, addr_reg, TCG_REG_X1);
@@ -1235,7 +1218,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
 {
 #ifdef CONFIG_SOFTMMU
     TCGMemOp s_bits = memop & MO_SIZE;
-    uint8_t *label_ptr;
+    tcg_insn_unit *label_ptr;
 
     tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 0);
     tcg_out_qemu_st_direct(s, memop, data_reg, addr_reg, TCG_REG_X1);
@@ -1247,7 +1230,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
 #endif /* CONFIG_SOFTMMU */
 }
 
-static uint8_t *tb_ret_addr;
+static tcg_insn_unit *tb_ret_addr;
 
 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
                        const TCGArg args[TCG_MAX_OP_ARGS],
@@ -1270,7 +1253,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     switch (opc) {
     case INDEX_op_exit_tb:
         tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
-        tcg_out_goto(s, (intptr_t)tb_ret_addr);
+        tcg_out_goto(s, tb_ret_addr);
         break;
 
     case INDEX_op_goto_tb:
@@ -1278,19 +1261,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 #error "USE_DIRECT_JUMP required for aarch64"
 #endif
         assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */
-        s->tb_jmp_offset[a0] = s->code_ptr - s->code_buf;
+        s->tb_jmp_offset[a0] = tcg_current_code_size(s);
         /* actual branch destination will be patched by
            aarch64_tb_set_jmp_target later, beware retranslation. */
         tcg_out_goto_noaddr(s);
-        s->tb_next_offset[a0] = s->code_ptr - s->code_buf;
-        break;
-
-    case INDEX_op_call:
-        if (const_args[0]) {
-            tcg_out_call(s, a0);
-        } else {
-            tcg_out_callr(s, a0);
-        }
+        s->tb_next_offset[a0] = tcg_current_code_size(s);
         break;
 
     case INDEX_op_br:
@@ -1613,13 +1588,12 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
         break;
 
+    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
     case INDEX_op_mov_i64:
-    case INDEX_op_mov_i32:
+    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
     case INDEX_op_movi_i64:
-    case INDEX_op_movi_i32:
-        /* Always implemented with tcg_out_mov/i, never with tcg_out_op.  */
+    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
     default:
-        /* Opcode not implemented.  */
         tcg_abort();
     }
 
@@ -1629,15 +1603,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 static const TCGTargetOpDef aarch64_op_defs[] = {
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
-    { INDEX_op_call, { "ri" } },
     { INDEX_op_br, { } },
 
-    { INDEX_op_mov_i32, { "r", "r" } },
-    { INDEX_op_mov_i64, { "r", "r" } },
-
-    { INDEX_op_movi_i32, { "r" } },
-    { INDEX_op_movi_i64, { "r" } },
-
     { INDEX_op_ld8u_i32, { "r", "r" } },
     { INDEX_op_ld8s_i32, { "r", "r" } },
     { INDEX_op_ld16u_i32, { "r", "r" } },
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index a1d4322165..a32aea66fa 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -13,6 +13,7 @@
 #ifndef TCG_TARGET_AARCH64
 #define TCG_TARGET_AARCH64 1
 
+#define TCG_TARGET_INSN_UNIT_SIZE  4
 #undef TCG_TARGET_STACK_GROWSUP
 
 typedef enum {
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 7535175f9c..538ca2aed0 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -115,36 +115,18 @@ static const int tcg_target_call_oarg_regs[2] = {
 
 #define TCG_REG_TMP  TCG_REG_R12
 
-static inline void reloc_abs32(void *code_ptr, intptr_t target)
+static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
 {
-    *(uint32_t *) code_ptr = target;
+    ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
+    *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff);
 }
 
-static inline void reloc_pc24(void *code_ptr, intptr_t target)
-{
-    uint32_t offset = ((target - ((intptr_t)code_ptr + 8)) >> 2);
-
-    *(uint32_t *) code_ptr = ((*(uint32_t *) code_ptr) & ~0xffffff)
-                             | (offset & 0xffffff);
-}
-
-static void patch_reloc(uint8_t *code_ptr, int type,
+static void patch_reloc(tcg_insn_unit *code_ptr, int type,
                         intptr_t value, intptr_t addend)
 {
-    switch (type) {
-    case R_ARM_ABS32:
-        reloc_abs32(code_ptr, value);
-        break;
-
-    case R_ARM_CALL:
-    case R_ARM_JUMP24:
-    default:
-        tcg_abort();
-
-    case R_ARM_PC24:
-        reloc_pc24(code_ptr, value);
-        break;
-    }
+    assert(type == R_ARM_PC24);
+    assert(addend == 0);
+    reloc_pc24(code_ptr, (tcg_insn_unit *)value);
 }
 
 #define TCG_CT_CONST_ARM  0x100
@@ -379,20 +361,18 @@ static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset)
 
 static inline void tcg_out_b_noaddr(TCGContext *s, int cond)
 {
-    /* We pay attention here to not modify the branch target by skipping
-       the corresponding bytes. This ensure that caches and memory are
+    /* We pay attention here to not modify the branch target by masking
+       the corresponding bytes.  This ensure that caches and memory are
        kept coherent during retranslation. */
-    s->code_ptr += 3;
-    tcg_out8(s, (cond << 4) | 0x0a);
+    tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0a));
 }
 
 static inline void tcg_out_bl_noaddr(TCGContext *s, int cond)
 {
-    /* We pay attention here to not modify the branch target by skipping
-       the corresponding bytes. This ensure that caches and memory are
+    /* We pay attention here to not modify the branch target by masking
+       the corresponding bytes.  This ensure that caches and memory are
        kept coherent during retranslation. */
-    s->code_ptr += 3;
-    tcg_out8(s, (cond << 4) | 0x0b);
+    tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0b));
 }
 
 static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset)
@@ -1010,20 +990,21 @@ static inline void tcg_out_st8(TCGContext *s, int cond,
  * with the code buffer limited to 16MB we wouldn't need the long case.
  * But we also use it for the tail-call to the qemu_ld/st helpers, which does.
  */
-static inline void tcg_out_goto(TCGContext *s, int cond, uint32_t addr)
+static inline void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr)
 {
-    int32_t disp = addr - (tcg_target_long) s->code_ptr;
+    intptr_t addri = (intptr_t)addr;
+    ptrdiff_t disp = tcg_pcrel_diff(s, addr);
 
-    if ((addr & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) {
+    if ((addri & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) {
         tcg_out_b(s, cond, disp);
         return;
     }
 
-    tcg_out_movi32(s, cond, TCG_REG_TMP, addr);
+    tcg_out_movi32(s, cond, TCG_REG_TMP, addri);
     if (use_armv5t_instructions) {
         tcg_out_bx(s, cond, TCG_REG_TMP);
     } else {
-        if (addr & 1) {
+        if (addri & 1) {
             tcg_abort();
         }
         tcg_out_mov_reg(s, cond, TCG_REG_PC, TCG_REG_TMP);
@@ -1032,39 +1013,28 @@ static inline void tcg_out_goto(TCGContext *s, int cond, uint32_t addr)
 
 /* The call case is mostly used for helpers - so it's not unreasonable
  * for them to be beyond branch range */
-static inline void tcg_out_call(TCGContext *s, uint32_t addr)
+static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr)
 {
-    int32_t val;
+    intptr_t addri = (intptr_t)addr;
+    ptrdiff_t disp = tcg_pcrel_diff(s, addr);
 
-    val = addr - (tcg_target_long) s->code_ptr;
-    if (val - 8 < 0x02000000 && val - 8 >= -0x02000000) {
-        if (addr & 1) {
+    if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) {
+        if (addri & 1) {
             /* Use BLX if the target is in Thumb mode */
             if (!use_armv5t_instructions) {
                 tcg_abort();
             }
-            tcg_out_blx_imm(s, val);
+            tcg_out_blx_imm(s, disp);
         } else {
-            tcg_out_bl(s, COND_AL, val);
+            tcg_out_bl(s, COND_AL, disp);
         }
     } else if (use_armv7_instructions) {
-        tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addr);
+        tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
         tcg_out_blx(s, COND_AL, TCG_REG_TMP);
     } else {
         tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4);
         tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
-        tcg_out32(s, addr);
-    }
-}
-
-static inline void tcg_out_callr(TCGContext *s, int cond, int arg)
-{
-    if (use_armv5t_instructions) {
-        tcg_out_blx(s, cond, arg);
-    } else {
-        tcg_out_dat_reg(s, cond, ARITH_MOV, TCG_REG_R14, 0,
-                        TCG_REG_PC, SHIFT_IMM_LSL(0));
-        tcg_out_bx(s, cond, arg);
+        tcg_out32(s, addri);
     }
 }
 
@@ -1073,9 +1043,9 @@ static inline void tcg_out_goto_label(TCGContext *s, int cond, int label_index)
     TCGLabel *l = &s->labels[label_index];
 
     if (l->has_value) {
-        tcg_out_goto(s, cond, l->u.value);
+        tcg_out_goto(s, cond, l->u.value_ptr);
     } else {
-        tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, label_index, 31337);
+        tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, label_index, 0);
         tcg_out_b_noaddr(s, cond);
     }
 }
@@ -1084,7 +1054,7 @@ static inline void tcg_out_goto_label(TCGContext *s, int cond, int label_index)
 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
  *                                     int mmu_idx, uintptr_t ra)
  */
-static const void * const qemu_ld_helpers[16] = {
+static void * const qemu_ld_helpers[16] = {
     [MO_UB]   = helper_ret_ldub_mmu,
     [MO_SB]   = helper_ret_ldsb_mmu,
 
@@ -1104,7 +1074,7 @@ static const void * const qemu_ld_helpers[16] = {
 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
  *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
  */
-static const void * const qemu_st_helpers[16] = {
+static void * const qemu_st_helpers[16] = {
     [MO_UB]   = helper_ret_stb_mmu,
     [MO_LEUW] = helper_le_stw_mmu,
     [MO_LEUL] = helper_le_stl_mmu,
@@ -1256,7 +1226,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc,
                                 TCGReg datalo, TCGReg datahi, TCGReg addrlo,
                                 TCGReg addrhi, int mem_index,
-                                uint8_t *raddr, uint8_t *label_ptr)
+                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
 {
     TCGLabelQemuLdst *label = new_ldst_label(s);
 
@@ -1275,9 +1245,9 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
     TCGReg argreg, datalo, datahi;
     TCGMemOp opc = lb->opc;
-    uintptr_t func;
+    void *func;
 
-    reloc_pc24(lb->label_ptr[0], (tcg_target_long)s->code_ptr);
+    reloc_pc24(lb->label_ptr[0], s->code_ptr);
 
     argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
     if (TARGET_LONG_BITS == 64) {
@@ -1292,9 +1262,9 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
        icache usage.  For pre-armv6, use the signed helpers since we do
        not have a single insn sign-extend.  */
     if (use_armv6_instructions) {
-        func = (uintptr_t)qemu_ld_helpers[opc & ~MO_SIGN];
+        func = qemu_ld_helpers[opc & ~MO_SIGN];
     } else {
-        func = (uintptr_t)qemu_ld_helpers[opc];
+        func = qemu_ld_helpers[opc];
         if (opc & MO_SIGN) {
             opc = MO_UL;
         }
@@ -1328,7 +1298,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
         break;
     }
 
-    tcg_out_goto(s, COND_AL, (tcg_target_long)lb->raddr);
+    tcg_out_goto(s, COND_AL, lb->raddr);
 }
 
 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
@@ -1336,7 +1306,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
     TCGReg argreg, datalo, datahi;
     TCGMemOp opc = lb->opc;
 
-    reloc_pc24(lb->label_ptr[0], (tcg_target_long)s->code_ptr);
+    reloc_pc24(lb->label_ptr[0], s->code_ptr);
 
     argreg = TCG_REG_R0;
     argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
@@ -1368,7 +1338,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
     argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
 
     /* Tail-call to the helper, which will return to the fast path.  */
-    tcg_out_goto(s, COND_AL, (uintptr_t)qemu_st_helpers[opc]);
+    tcg_out_goto(s, COND_AL, qemu_st_helpers[opc]);
 }
 #endif /* SOFTMMU */
 
@@ -1499,7 +1469,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
 #ifdef CONFIG_SOFTMMU
     int mem_index;
     TCGReg addend;
-    uint8_t *label_ptr;
+    tcg_insn_unit *label_ptr;
 #endif
 
     datalo = *args++;
@@ -1628,7 +1598,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
 #ifdef CONFIG_SOFTMMU
     int mem_index;
     TCGReg addend;
-    uint8_t *label_ptr;
+    tcg_insn_unit *label_ptr;
 #endif
 
     datalo = *args++;
@@ -1660,7 +1630,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
 #endif
 }
 
-static uint8_t *tb_ret_addr;
+static tcg_insn_unit *tb_ret_addr;
 
 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
                 const TCGArg *args, const int *const_args)
@@ -1670,51 +1640,21 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
 
     switch (opc) {
     case INDEX_op_exit_tb:
-        if (use_armv7_instructions || check_fit_imm(args[0])) {
-            tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
-            tcg_out_goto(s, COND_AL, (tcg_target_ulong) tb_ret_addr);
-        } else {
-            uint8_t *ld_ptr = s->code_ptr;
-            tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_PC, 0);
-            tcg_out_goto(s, COND_AL, (tcg_target_ulong) tb_ret_addr);
-            *ld_ptr = (uint8_t) (s->code_ptr - ld_ptr) - 8;
-            tcg_out32(s, args[0]);
-        }
+        tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
+        tcg_out_goto(s, COND_AL, tb_ret_addr);
         break;
     case INDEX_op_goto_tb:
         if (s->tb_jmp_offset) {
             /* Direct jump method */
-#if defined(USE_DIRECT_JUMP)
-            s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
+            s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
             tcg_out_b_noaddr(s, COND_AL);
-#else
-            tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
-            s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
-            tcg_out32(s, 0);
-#endif
         } else {
             /* Indirect jump method */
-#if 1
-            c = (int) (s->tb_next + args[0]) - ((int) s->code_ptr + 8);
-            if (c > 0xfff || c < -0xfff) {
-                tcg_out_movi32(s, COND_AL, TCG_REG_R0,
-                                (tcg_target_long) (s->tb_next + args[0]));
-                tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, 0);
-            } else
-                tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, c);
-#else
-            tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_PC, 0);
-            tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, 0);
-            tcg_out32(s, (tcg_target_long) (s->tb_next + args[0]));
-#endif
+            intptr_t ptr = (intptr_t)(s->tb_next + args[0]);
+            tcg_out_movi32(s, COND_AL, TCG_REG_R0, ptr & ~0xfff);
+            tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, ptr & 0xfff);
         }
-        s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
-        break;
-    case INDEX_op_call:
-        if (const_args[0])
-            tcg_out_call(s, args[0]);
-        else
-            tcg_out_callr(s, COND_AL, args[0]);
+        s->tb_next_offset[args[0]] = tcg_current_code_size(s);
         break;
     case INDEX_op_br:
         tcg_out_goto_label(s, COND_AL, args[0]);
@@ -1745,13 +1685,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_st32(s, COND_AL, args[0], args[1], args[2]);
         break;
 
-    case INDEX_op_mov_i32:
-        tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
-                        args[0], 0, args[1], SHIFT_IMM_LSL(0));
-        break;
-    case INDEX_op_movi_i32:
-        tcg_out_movi32(s, COND_AL, args[0], args[1]);
-        break;
     case INDEX_op_movcond_i32:
         /* Constraints mean that v2 is always in the same register as dest,
          * so we only need to do "if condition passed, move v1 to dest".
@@ -1967,6 +1900,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]);
         break;
 
+    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
+    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
+    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
     default:
         tcg_abort();
     }
@@ -1975,12 +1911,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
 static const TCGTargetOpDef arm_op_defs[] = {
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
-    { INDEX_op_call, { "ri" } },
     { INDEX_op_br, { } },
 
-    { INDEX_op_mov_i32, { "r", "r" } },
-    { INDEX_op_movi_i32, { "r" } },
-
     { INDEX_op_ld8u_i32, { "r", "r" } },
     { INDEX_op_ld8s_i32, { "r", "r" } },
     { INDEX_op_ld16u_i32, { "r", "r" } },
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 1bc5dacec0..73f10c4424 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -26,6 +26,7 @@
 #define TCG_TARGET_ARM 1
 
 #undef TCG_TARGET_STACK_GROWSUP
+#define TCG_TARGET_INSN_UNIT_SIZE 4
 
 typedef enum {
     TCG_REG_R0 = 0,
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 34ece1f80b..a373073ff8 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -139,9 +139,9 @@ static bool have_bmi2;
 # define have_bmi2 0
 #endif
 
-static uint8_t *tb_ret_addr;
+static tcg_insn_unit *tb_ret_addr;
 
-static void patch_reloc(uint8_t *code_ptr, int type,
+static void patch_reloc(tcg_insn_unit *code_ptr, int type,
                         intptr_t value, intptr_t addend)
 {
     value += addend;
@@ -151,14 +151,14 @@ static void patch_reloc(uint8_t *code_ptr, int type,
         if (value != (int32_t)value) {
             tcg_abort();
         }
-        *(uint32_t *)code_ptr = value;
+        tcg_patch32(code_ptr, value);
         break;
     case R_386_PC8:
         value -= (uintptr_t)code_ptr;
         if (value != (int8_t)value) {
             tcg_abort();
         }
-        *(uint8_t *)code_ptr = value;
+        tcg_patch8(code_ptr, value);
         break;
     default:
         tcg_abort();
@@ -859,7 +859,7 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
     TCGLabel *l = &s->labels[label_index];
 
     if (l->has_value) {
-        val = l->u.value - (intptr_t)s->code_ptr;
+        val = tcg_pcrel_diff(s, l->u.value_ptr);
         val1 = val - 2;
         if ((int8_t)val1 == val1) {
             if (opc == -1) {
@@ -1099,26 +1099,26 @@ static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
 }
 #endif
 
-static void tcg_out_branch(TCGContext *s, int call, uintptr_t dest)
+static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest)
 {
-    intptr_t disp = dest - (intptr_t)s->code_ptr - 5;
+    intptr_t disp = tcg_pcrel_diff(s, dest) - 5;
 
     if (disp == (int32_t)disp) {
         tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
         tcg_out32(s, disp);
     } else {
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, (uintptr_t)dest);
         tcg_out_modrm(s, OPC_GRP5,
                       call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
     }
 }
 
-static inline void tcg_out_calli(TCGContext *s, uintptr_t dest)
+static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
 {
     tcg_out_branch(s, 1, dest);
 }
 
-static void tcg_out_jmp(TCGContext *s, uintptr_t dest)
+static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest)
 {
     tcg_out_branch(s, 0, dest);
 }
@@ -1127,7 +1127,7 @@ static void tcg_out_jmp(TCGContext *s, uintptr_t dest)
 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
  *                                     int mmu_idx, uintptr_t ra)
  */
-static const void * const qemu_ld_helpers[16] = {
+static void * const qemu_ld_helpers[16] = {
     [MO_UB]   = helper_ret_ldub_mmu,
     [MO_LEUW] = helper_le_lduw_mmu,
     [MO_LEUL] = helper_le_ldul_mmu,
@@ -1140,7 +1140,7 @@ static const void * const qemu_ld_helpers[16] = {
 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
  *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
  */
-static const void * const qemu_st_helpers[16] = {
+static void * const qemu_st_helpers[16] = {
     [MO_UB]   = helper_ret_stb_mmu,
     [MO_LEUW] = helper_le_stw_mmu,
     [MO_LEUL] = helper_le_stl_mmu,
@@ -1173,7 +1173,7 @@ static const void * const qemu_st_helpers[16] = {
 
 static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
                                     int mem_index, TCGMemOp s_bits,
-                                    uint8_t **label_ptr, int which)
+                                    tcg_insn_unit **label_ptr, int which)
 {
     const TCGReg r0 = TCG_REG_L0;
     const TCGReg r1 = TCG_REG_L1;
@@ -1247,8 +1247,8 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc,
                                 TCGReg datalo, TCGReg datahi,
                                 TCGReg addrlo, TCGReg addrhi,
-                                int mem_index, uint8_t *raddr,
-                                uint8_t **label_ptr)
+                                int mem_index, tcg_insn_unit *raddr,
+                                tcg_insn_unit **label_ptr)
 {
     TCGLabelQemuLdst *label = new_ldst_label(s);
 
@@ -1273,12 +1273,12 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 {
     TCGMemOp opc = l->opc;
     TCGReg data_reg;
-    uint8_t **label_ptr = &l->label_ptr[0];
+    tcg_insn_unit **label_ptr = &l->label_ptr[0];
 
     /* resolve label address */
-    *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
+    tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
     if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
-        *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
+        tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
     }
 
     if (TCG_TARGET_REG_BITS == 32) {
@@ -1308,7 +1308,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
                      (uintptr_t)l->raddr);
     }
 
-    tcg_out_calli(s, (uintptr_t)qemu_ld_helpers[opc & ~MO_SIGN]);
+    tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]);
 
     data_reg = l->datalo_reg;
     switch (opc & MO_SSIZE) {
@@ -1346,7 +1346,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
     }
 
     /* Jump to the code corresponding to next IR of qemu_st */
-    tcg_out_jmp(s, (uintptr_t)l->raddr);
+    tcg_out_jmp(s, l->raddr);
 }
 
 /*
@@ -1356,13 +1356,13 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 {
     TCGMemOp opc = l->opc;
     TCGMemOp s_bits = opc & MO_SIZE;
-    uint8_t **label_ptr = &l->label_ptr[0];
+    tcg_insn_unit **label_ptr = &l->label_ptr[0];
     TCGReg retaddr;
 
     /* resolve label address */
-    *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
+    tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
     if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
-        *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
+        tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
     }
 
     if (TCG_TARGET_REG_BITS == 32) {
@@ -1413,7 +1413,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 
     /* "Tail call" to the helper, with the return address back inline.  */
     tcg_out_push(s, retaddr);
-    tcg_out_jmp(s, (uintptr_t)qemu_st_helpers[opc]);
+    tcg_out_jmp(s, qemu_st_helpers[opc]);
 }
 #elif defined(__x86_64__) && defined(__linux__)
 # include <asm/prctl.h>
@@ -1534,7 +1534,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
 #if defined(CONFIG_SOFTMMU)
     int mem_index;
     TCGMemOp s_bits;
-    uint8_t *label_ptr[2];
+    tcg_insn_unit *label_ptr[2];
 #endif
 
     datalo = *args++;
@@ -1665,7 +1665,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
 #if defined(CONFIG_SOFTMMU)
     int mem_index;
     TCGMemOp s_bits;
-    uint8_t *label_ptr[2];
+    tcg_insn_unit *label_ptr[2];
 #endif
 
     datalo = *args++;
@@ -1731,35 +1731,24 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     switch(opc) {
     case INDEX_op_exit_tb:
         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
-        tcg_out_jmp(s, (uintptr_t)tb_ret_addr);
+        tcg_out_jmp(s, tb_ret_addr);
         break;
     case INDEX_op_goto_tb:
         if (s->tb_jmp_offset) {
             /* direct jump method */
             tcg_out8(s, OPC_JMP_long); /* jmp im */
-            s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
+            s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
             tcg_out32(s, 0);
         } else {
             /* indirect jump method */
             tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
                                  (intptr_t)(s->tb_next + args[0]));
         }
-        s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
-        break;
-    case INDEX_op_call:
-        if (const_args[0]) {
-            tcg_out_calli(s, args[0]);
-        } else {
-            /* call *reg */
-            tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
-        }
+        s->tb_next_offset[args[0]] = tcg_current_code_size(s);
         break;
     case INDEX_op_br:
         tcg_out_jxx(s, JCC_JMP, args[0], 0);
         break;
-    case INDEX_op_movi_i32:
-        tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
-        break;
     OP_32_64(ld8u):
         /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
         tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
@@ -2009,9 +1998,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_setcond2(s, args, const_args);
         break;
 #else /* TCG_TARGET_REG_BITS == 64 */
-    case INDEX_op_movi_i64:
-        tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
-        break;
     case INDEX_op_ld32s_i64:
         tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
         break;
@@ -2068,6 +2054,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         }
         break;
 
+    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
+    case INDEX_op_mov_i64:
+    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
+    case INDEX_op_movi_i64:
+    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
     default:
         tcg_abort();
     }
@@ -2078,10 +2069,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
 static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
-    { INDEX_op_call, { "ri" } },
     { INDEX_op_br, { } },
-    { INDEX_op_mov_i32, { "r", "r" } },
-    { INDEX_op_movi_i32, { "r" } },
     { INDEX_op_ld8u_i32, { "r", "r" } },
     { INDEX_op_ld8s_i32, { "r", "r" } },
     { INDEX_op_ld16u_i32, { "r", "r" } },
@@ -2135,8 +2123,6 @@ static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
     { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
 #else
-    { INDEX_op_mov_i64, { "r", "r" } },
-    { INDEX_op_movi_i64, { "r" } },
     { INDEX_op_ld8u_i64, { "r", "r" } },
     { INDEX_op_ld8s_i64, { "r", "r" } },
     { INDEX_op_ld16u_i64, { "r", "r" } },
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index dbeb16d3ac..6c94e5cde0 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -24,6 +24,8 @@
 #ifndef TCG_TARGET_I386 
 #define TCG_TARGET_I386 1
 
+#define TCG_TARGET_INSN_UNIT_SIZE  1
+
 #ifdef __x86_64__
 # define TCG_TARGET_REG_BITS  64
 # define TCG_TARGET_NB_REGS   16
diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 1f523d6466..6bc9924641 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -692,112 +692,32 @@ static inline uint64_t tcg_opc_x4(int qp, uint64_t opc, int b1, uint64_t imm)
 
 
 /*
- * Relocations
+ * Relocations - Note that we never encode branches elsewhere than slot 2.
  */
 
-static inline void reloc_pcrel21b(void *pc, intptr_t target)
+static void reloc_pcrel21b_slot2(tcg_insn_unit *pc, tcg_insn_unit *target)
 {
-    uint64_t imm;
-    int64_t disp;
-    int slot;
-
-    slot = (intptr_t)pc & 3;
-    pc = (void *)((intptr_t)pc & ~3);
-
-    disp = target - (intptr_t)pc;
-    imm = (uint64_t) disp >> 4;
-
-    switch(slot) {
-    case 0:
-        *(uint64_t *)(pc + 0) = (*(uint64_t *)(pc + 8) & 0xfffffdc00003ffffull)
-                                | ((imm & 0x100000) << 21)  /* s */
-                                | ((imm & 0x0fffff) << 18); /* imm20b */
-        break;
-    case 1:
-        *(uint64_t *)(pc + 8) = (*(uint64_t *)(pc + 8) & 0xfffffffffffb8000ull)
-                                | ((imm & 0x100000) >> 2)   /* s */
-                                | ((imm & 0x0fffe0) >> 5);  /* imm20b */
-        *(uint64_t *)(pc + 0) = (*(uint64_t *)(pc + 0) & 0x07ffffffffffffffull)
-                                | ((imm & 0x00001f) << 59); /* imm20b */
-        break;
-    case 2:
-        *(uint64_t *)(pc + 8) = (*(uint64_t *)(pc + 8) & 0xf700000fffffffffull)
-                                | ((imm & 0x100000) << 39)  /* s */
-                                | ((imm & 0x0fffff) << 36); /* imm20b */
-        break;
-    }
-}
+    uint64_t imm = target - pc;
 
-static inline uint64_t get_reloc_pcrel21b (void *pc)
-{
-    int64_t low, high;
-    int slot;
-
-    slot = (tcg_target_long) pc & 3;
-    pc = (void *)((tcg_target_long) pc & ~3);
-
-    low  = (*(uint64_t *)(pc + 0));
-    high = (*(uint64_t *)(pc + 8));
-
-    switch(slot) {
-    case 0:
-        return ((low >> 21) & 0x100000) + /* s */
-               ((low >> 18) & 0x0fffff);  /* imm20b */
-    case 1:
-        return ((high << 2) & 0x100000) + /* s */
-               ((high << 5) & 0x0fffe0) + /* imm20b */
-               ((low >> 59) & 0x00001f);  /* imm20b */
-    case 2:
-        return ((high >> 39) & 0x100000) + /* s */
-               ((high >> 36) & 0x0fffff);  /* imm20b */
-    default:
-        tcg_abort();
-    }
+    pc->hi = (pc->hi & 0xf700000fffffffffull)
+             | ((imm & 0x100000) << 39)  /* s */
+             | ((imm & 0x0fffff) << 36); /* imm20b */
 }
 
-static inline void reloc_pcrel60b(void *pc, intptr_t target)
+static uint64_t get_reloc_pcrel21b_slot2(tcg_insn_unit *pc)
 {
-    int64_t disp;
-    uint64_t imm;
-
-    disp = target - (intptr_t)pc;
-    imm = (uint64_t) disp >> 4;
+    int64_t high = pc->hi;
 
-    *(uint64_t *)(pc + 8) = (*(uint64_t *)(pc + 8) & 0xf700000fff800000ull)
-                             |  (imm & 0x0800000000000000ull)         /* s */
-                             | ((imm & 0x07fffff000000000ull) >> 36)  /* imm39 */
-                             | ((imm & 0x00000000000fffffull) << 36); /* imm20b */
-    *(uint64_t *)(pc + 0) = (*(uint64_t *)(pc + 0) & 0x00003fffffffffffull)
-                             | ((imm & 0x0000000ffff00000ull) << 28); /* imm39 */
+    return ((high >> 39) & 0x100000) + /* s */
+           ((high >> 36) & 0x0fffff);  /* imm20b */
 }
 
-static inline uint64_t get_reloc_pcrel60b (void *pc)
-{
-    int64_t low, high;
-
-    low  = (*(uint64_t *)(pc + 0));
-    high = (*(uint64_t *)(pc + 8));
-
-    return ((high)       & 0x0800000000000000ull) + /* s */
-           ((high >> 36) & 0x00000000000fffffull) + /* imm20b */
-           ((high << 36) & 0x07fffff000000000ull) + /* imm39 */
-           ((low >> 28)  & 0x0000000ffff00000ull);  /* imm39 */
-}
-
-
-static void patch_reloc(uint8_t *code_ptr, int type,
+static void patch_reloc(tcg_insn_unit *code_ptr, int type,
                         intptr_t value, intptr_t addend)
 {
-    value += addend;
-    switch (type) {
-    case R_IA64_PCREL21B:
-        reloc_pcrel21b(code_ptr, value);
-        break;
-    case R_IA64_PCREL60B:
-        reloc_pcrel60b(code_ptr, value);
-    default:
-        tcg_abort();
-    }
+    assert(addend == 0);
+    assert(type == R_IA64_PCREL21B);
+    reloc_pcrel21b_slot2(code_ptr, (tcg_insn_unit *)value);
 }
 
 /*
@@ -861,7 +781,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
  * Code generation
  */
 
-static uint8_t *tb_ret_addr;
+static tcg_insn_unit *tb_ret_addr;
 
 static inline void tcg_out_bundle(TCGContext *s, int template,
                                   uint64_t slot0, uint64_t slot1,
@@ -872,9 +792,10 @@ static inline void tcg_out_bundle(TCGContext *s, int template,
     slot1 &= 0x1ffffffffffull; /* 41 bits */
     slot2 &= 0x1ffffffffffull; /* 41 bits */
 
-    *(uint64_t *)(s->code_ptr + 0) = (slot1 << 46) | (slot0 << 5) | template;
-    *(uint64_t *)(s->code_ptr + 8) = (slot2 << 23) | (slot1 >> 18);
-    s->code_ptr += 16;
+    *s->code_ptr++ = (tcg_insn_unit){
+        (slot1 << 46) | (slot0 << 5) | template,
+        (slot2 << 23) | (slot1 >> 18)
+    };
 }
 
 static inline uint64_t tcg_opc_mov_a(int qp, TCGReg dst, TCGReg src)
@@ -909,33 +830,34 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type,
 static void tcg_out_br(TCGContext *s, int label_index)
 {
     TCGLabel *l = &s->labels[label_index];
+    uint64_t imm;
 
     /* We pay attention here to not modify the branch target by reading
        the existing value and using it again. This ensure that caches and
        memory are kept coherent during retranslation. */
-    tcg_out_bundle(s, mmB,
-                   INSN_NOP_M,
-                   INSN_NOP_M,
-                   tcg_opc_b1 (TCG_REG_P0, OPC_BR_SPTK_MANY_B1,
-                               get_reloc_pcrel21b(s->code_ptr + 2)));
-
     if (l->has_value) {
-        reloc_pcrel21b((s->code_ptr - 16) + 2, l->u.value);
+        imm = l->u.value_ptr -  s->code_ptr;
     } else {
-        tcg_out_reloc(s, (s->code_ptr - 16) + 2,
-                      R_IA64_PCREL21B, label_index, 0);
+        imm = get_reloc_pcrel21b_slot2(s->code_ptr);
+        tcg_out_reloc(s, s->code_ptr, R_IA64_PCREL21B, label_index, 0);
     }
+
+    tcg_out_bundle(s, mmB,
+                   INSN_NOP_M,
+                   INSN_NOP_M,
+                   tcg_opc_b1(TCG_REG_P0, OPC_BR_SPTK_MANY_B1, imm));
 }
 
-static inline void tcg_out_calli(TCGContext *s, uintptr_t addr)
+static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *desc)
 {
+    uintptr_t func = desc->lo, gp = desc->hi, disp;
+
     /* Look through the function descriptor.  */
-    uintptr_t disp, *desc = (uintptr_t *)addr;
     tcg_out_bundle(s, mlx,
                    INSN_NOP_M,
-                   tcg_opc_l2 (desc[1]),
-                   tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, TCG_REG_R1, desc[1]));
-    disp = (desc[0] - (uintptr_t)s->code_ptr) >> 4;
+                   tcg_opc_l2 (gp),
+                   tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, TCG_REG_R1, gp));
+    disp = (tcg_insn_unit *)func - s->code_ptr;
     tcg_out_bundle(s, mLX,
                    INSN_NOP_M,
                    tcg_opc_l4 (disp),
@@ -943,23 +865,8 @@ static inline void tcg_out_calli(TCGContext *s, uintptr_t addr)
                                TCG_REG_B0, disp));
 }
 
-static inline void tcg_out_callr(TCGContext *s, TCGReg addr)
-{
-    tcg_out_bundle(s, MmI,
-                   tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R2, addr),
-                   tcg_opc_a4 (TCG_REG_P0, OPC_ADDS_A4, TCG_REG_R3, 8, addr),
-                   tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21,
-                               TCG_REG_B6, TCG_REG_R2, 0));
-    tcg_out_bundle(s, mmB,
-                   tcg_opc_m1 (TCG_REG_P0, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R3),
-                   INSN_NOP_M,
-                   tcg_opc_b5 (TCG_REG_P0, OPC_BR_CALL_SPTK_MANY_B5,
-                               TCG_REG_B0, TCG_REG_B6));
-}
-
 static void tcg_out_exit_tb(TCGContext *s, tcg_target_long arg)
 {
-    int64_t disp;
     uint64_t imm, opc1;
 
     /* At least arg == 0 is a common operation.  */
@@ -970,8 +877,7 @@ static void tcg_out_exit_tb(TCGContext *s, tcg_target_long arg)
         opc1 = INSN_NOP_M;
     }
 
-    disp = tb_ret_addr - s->code_ptr;
-    imm = (uint64_t)disp >> 4;
+    imm = tb_ret_addr - s->code_ptr;
 
     tcg_out_bundle(s, mLX,
                    opc1,
@@ -1000,7 +906,7 @@ static inline void tcg_out_goto_tb(TCGContext *s, TCGArg arg)
                        tcg_opc_b4 (TCG_REG_P0, OPC_BR_SPTK_MANY_B4,
                                    TCG_REG_B6));
     }
-    s->tb_next_offset[arg] = s->code_ptr - s->code_buf;
+    s->tb_next_offset[arg] = tcg_current_code_size(s);
 }
 
 static inline void tcg_out_jmp(TCGContext *s, TCGArg addr)
@@ -1521,19 +1427,22 @@ static inline void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
                                   TCGReg arg2, int label_index, int cmp4)
 {
     TCGLabel *l = &s->labels[label_index];
+    uint64_t imm;
 
-    tcg_out_bundle(s, miB,
-                   INSN_NOP_M,
-                   tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4),
-                   tcg_opc_b1(TCG_REG_P6, OPC_BR_DPTK_FEW_B1,
-                              get_reloc_pcrel21b(s->code_ptr + 2)));
-
+    /* We pay attention here to not modify the branch target by reading
+       the existing value and using it again. This ensure that caches and
+       memory are kept coherent during retranslation. */
     if (l->has_value) {
-        reloc_pcrel21b((s->code_ptr - 16) + 2, l->u.value);
+        imm = l->u.value_ptr - s->code_ptr;
     } else {
-        tcg_out_reloc(s, (s->code_ptr - 16) + 2,
-                      R_IA64_PCREL21B, label_index, 0);
+        imm = get_reloc_pcrel21b_slot2(s->code_ptr);
+        tcg_out_reloc(s, s->code_ptr, R_IA64_PCREL21B, label_index, 0);
     }
+
+    tcg_out_bundle(s, miB,
+                   INSN_NOP_M,
+                   tcg_opc_cmp_a(TCG_REG_P0, cond, arg1, arg2, cmp4),
+                   tcg_opc_b1(TCG_REG_P6, OPC_BR_DPTK_FEW_B1, imm));
 }
 
 static inline void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGArg ret,
@@ -1646,7 +1555,7 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
 typedef struct TCGLabelQemuLdst {
     bool is_ld;
     TCGMemOp size;
-    uint8_t *label_ptr;     /* label pointers to be updated */
+    tcg_insn_unit *label_ptr;     /* label pointers to be updated */
 } TCGLabelQemuLdst;
 
 typedef struct TCGBackendData {
@@ -1660,7 +1569,7 @@ static inline void tcg_out_tb_init(TCGContext *s)
 }
 
 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc,
-                                uint8_t *label_ptr)
+                                tcg_insn_unit *label_ptr)
 {
     TCGBackendData *be = s->be;
     TCGLabelQemuLdst *l = &be->ldst_labels[be->nb_ldst_labels++];
@@ -1683,43 +1592,44 @@ static void tcg_out_tb_finalize(TCGContext *s)
         helper_le_ldul_mmu,
         helper_le_ldq_mmu,
     };
-    uintptr_t thunks[8] = { };
+    tcg_insn_unit *thunks[8] = { };
     TCGBackendData *be = s->be;
     size_t i, n = be->nb_ldst_labels;
 
     for (i = 0; i < n; i++) {
         TCGLabelQemuLdst *l = &be->ldst_labels[i];
         long x = l->is_ld * 4 + l->size;
-        uintptr_t dest = thunks[x];
+        tcg_insn_unit *dest = thunks[x];
 
         /* The out-of-line thunks are all the same; load the return address
            from B0, load the GP, and branch to the code.  Note that we are
            always post-call, so the register window has rolled, so we're
            using incomming parameter register numbers, not outgoing.  */
-        if (dest == 0) {
-            uintptr_t disp, *desc = (uintptr_t *)helpers[x];
+        if (dest == NULL) {
+            uintptr_t *desc = (uintptr_t *)helpers[x];
+            uintptr_t func = desc[0], gp = desc[1], disp;
 
-            thunks[x] = dest = (uintptr_t)s->code_ptr;
+            thunks[x] = dest = s->code_ptr;
 
             tcg_out_bundle(s, mlx,
                            INSN_NOP_M,
-                           tcg_opc_l2 (desc[1]),
+                           tcg_opc_l2 (gp),
                            tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2,
-                                       TCG_REG_R1, desc[1]));
+                                       TCG_REG_R1, gp));
             tcg_out_bundle(s, mii,
                            INSN_NOP_M,
                            INSN_NOP_I,
                            tcg_opc_i22(TCG_REG_P0, OPC_MOV_I22,
                                        l->is_ld ? TCG_REG_R35 : TCG_REG_R36,
                                        TCG_REG_B0));
-            disp = (desc[0] - (uintptr_t)s->code_ptr) >> 4;
+            disp = (tcg_insn_unit *)func - s->code_ptr;
             tcg_out_bundle(s, mLX,
                            INSN_NOP_M,
                            tcg_opc_l3 (disp),
                            tcg_opc_x3 (TCG_REG_P0, OPC_BRL_SPTK_MANY_X3, disp));
         }
 
-        reloc_pcrel21b(l->label_ptr, dest);
+        reloc_pcrel21b_slot2(l->label_ptr, dest);
     }
 }
 
@@ -1731,7 +1641,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args)
     int addr_reg, data_reg, mem_index;
     TCGMemOp opc, s_bits;
     uint64_t fin1, fin2;
-    uint8_t *label_ptr;
+    tcg_insn_unit *label_ptr;
 
     data_reg = args[0];
     addr_reg = args[1];
@@ -1765,13 +1675,13 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args)
                    tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2,
                                TCG_REG_R2, TCG_REG_R57),
                    tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R58, mem_index));
-    label_ptr = s->code_ptr + 2;
+    label_ptr = s->code_ptr;
     tcg_out_bundle(s, miB,
                    tcg_opc_m1 (TCG_REG_P6, opc_ld_m1[s_bits],
                                TCG_REG_R8, TCG_REG_R2),
                    INSN_NOP_I,
                    tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0,
-                               get_reloc_pcrel21b(label_ptr)));
+                               get_reloc_pcrel21b_slot2(label_ptr)));
 
     add_qemu_ldst_label(s, 1, opc, label_ptr);
 
@@ -1792,7 +1702,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args)
     int mem_index;
     uint64_t pre1, pre2;
     TCGMemOp opc, s_bits;
-    uint8_t *label_ptr;
+    tcg_insn_unit *label_ptr;
 
     data_reg = args[0];
     addr_reg = args[1];
@@ -1827,13 +1737,13 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args)
                    tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R2,
                                TCG_REG_R2, TCG_REG_R57),
                    tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index));
-    label_ptr = s->code_ptr + 2;
+    label_ptr = s->code_ptr;
     tcg_out_bundle(s, miB,
                    tcg_opc_m4 (TCG_REG_P6, opc_st_m4[s_bits],
                                TCG_REG_R58, TCG_REG_R2),
                    INSN_NOP_I,
                    tcg_opc_b3 (TCG_REG_P7, OPC_BR_CALL_SPNT_FEW_B3, TCG_REG_B0,
-                               get_reloc_pcrel21b(label_ptr)));
+                               get_reloc_pcrel21b_slot2(label_ptr)));
 
     add_qemu_ldst_label(s, 0, opc, label_ptr);
 }
@@ -2085,24 +1995,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_br:
         tcg_out_br(s, args[0]);
         break;
-    case INDEX_op_call:
-        if (likely(const_args[0])) {
-            tcg_out_calli(s, args[0]);
-        } else {
-            tcg_out_callr(s, args[0]);
-        }
-        break;
     case INDEX_op_goto_tb:
         tcg_out_goto_tb(s, args[0]);
         break;
 
-    case INDEX_op_movi_i32:
-        tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
-        break;
-    case INDEX_op_movi_i64:
-        tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
-        break;
-
     case INDEX_op_ld8u_i32:
     case INDEX_op_ld8u_i64:
         tcg_out_ld_rel(s, OPC_LD1_M1, args[0], args[1], args[2]);
@@ -2312,6 +2208,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_qemu_st(s, args);
         break;
 
+    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
+    case INDEX_op_mov_i64:
+    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
+    case INDEX_op_movi_i64:
+    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
     default:
         tcg_abort();
     }
@@ -2319,13 +2220,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
 
 static const TCGTargetOpDef ia64_op_defs[] = {
     { INDEX_op_br, { } },
-    { INDEX_op_call, { "ri" } },
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
 
-    { INDEX_op_mov_i32, { "r", "r" } },
-    { INDEX_op_movi_i32, { "r" } },
-
     { INDEX_op_ld8u_i32, { "r", "r" } },
     { INDEX_op_ld8s_i32, { "r", "r" } },
     { INDEX_op_ld16u_i32, { "r", "r" } },
@@ -2367,9 +2264,6 @@ static const TCGTargetOpDef ia64_op_defs[] = {
     { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } },
     { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rI", "rI" } },
 
-    { INDEX_op_mov_i64, { "r", "r" } },
-    { INDEX_op_movi_i64, { "r" } },
-
     { INDEX_op_ld8u_i64, { "r", "r" } },
     { INDEX_op_ld8s_i64, { "r", "r" } },
     { INDEX_op_ld16u_i64, { "r", "r" } },
@@ -2442,8 +2336,11 @@ static void tcg_target_qemu_prologue(TCGContext *s)
                   CPU_TEMP_BUF_NLONGS * sizeof(long));
 
     /* First emit adhoc function descriptor */
-    *(uint64_t *)(s->code_ptr) = (uint64_t)s->code_ptr + 16; /* entry point */
-    s->code_ptr += 16; /* skip GP */
+    *s->code_ptr = (tcg_insn_unit){
+        (uint64_t)(s->code_ptr + 1), /* entry point */
+        0                            /* skip gp */
+    };
+    s->code_ptr++;
 
     /* prologue */
     tcg_out_bundle(s, miI,
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index d834beb323..3a59b50349 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -25,6 +25,12 @@
 #ifndef TCG_TARGET_IA64 
 #define TCG_TARGET_IA64 1
 
+#define TCG_TARGET_INSN_UNIT_SIZE 16
+typedef struct {
+    uint64_t lo __attribute__((aligned(16)));
+    uint64_t hi;
+} tcg_insn_unit;
+
 /* We only map the first 64 registers */
 #define TCG_TARGET_NB_REGS 64
 typedef enum {
diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index 37241b224b..0ae495c586 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -108,83 +108,38 @@ static const TCGReg tcg_target_call_oarg_regs[2] = {
     TCG_REG_V1
 };
 
-static uint8_t *tb_ret_addr;
+static tcg_insn_unit *tb_ret_addr;
 
-static inline uint32_t reloc_lo16_val(void *pc, intptr_t target)
+static inline uint32_t reloc_pc16_val(tcg_insn_unit *pc, tcg_insn_unit *target)
 {
-    return target & 0xffff;
+    /* Let the compiler perform the right-shift as part of the arithmetic.  */
+    ptrdiff_t disp = target - (pc + 1);
+    assert(disp == (int16_t)disp);
+    return disp & 0xffff;
 }
 
-static inline void reloc_lo16(void *pc, intptr_t target)
+static inline void reloc_pc16(tcg_insn_unit *pc, tcg_insn_unit *target)
 {
-    *(uint32_t *) pc = (*(uint32_t *) pc & ~0xffff)
-                       | reloc_lo16_val(pc, target);
+    *pc = deposit32(*pc, 0, 16, reloc_pc16_val(pc, target));
 }
 
-static inline uint32_t reloc_hi16_val(void *pc, intptr_t target)
+static inline uint32_t reloc_26_val(tcg_insn_unit *pc, tcg_insn_unit *target)
 {
-    return (target >> 16) & 0xffff;
+    assert((((uintptr_t)pc ^ (uintptr_t)target) & 0xf0000000) == 0);
+    return ((uintptr_t)target >> 2) & 0x3ffffff;
 }
 
-static inline void reloc_hi16(void *pc, intptr_t target)
+static inline void reloc_26(tcg_insn_unit *pc, tcg_insn_unit *target)
 {
-    *(uint32_t *) pc = (*(uint32_t *) pc & ~0xffff)
-                       | reloc_hi16_val(pc, target);
+    *pc = deposit32(*pc, 0, 26, reloc_26_val(pc, target));
 }
 
-static inline uint32_t reloc_pc16_val(void *pc, intptr_t target)
-{
-    int32_t disp;
-
-    disp = target - (intptr_t)pc - 4;
-    if (disp != (disp << 14) >> 14) {
-        tcg_abort ();
-    }
-
-    return (disp >> 2) & 0xffff;
-}
-
-static inline void reloc_pc16 (void *pc, tcg_target_long target)
-{
-    *(uint32_t *) pc = (*(uint32_t *) pc & ~0xffff)
-                       | reloc_pc16_val(pc, target);
-}
-
-static inline uint32_t reloc_26_val (void *pc, tcg_target_long target)
-{
-    if ((((tcg_target_long)pc + 4) & 0xf0000000) != (target & 0xf0000000)) {
-        tcg_abort ();
-    }
-
-    return (target >> 2) & 0x3ffffff;
-}
-
-static inline void reloc_pc26(void *pc, intptr_t target)
-{
-    *(uint32_t *) pc = (*(uint32_t *) pc & ~0x3ffffff)
-                       | reloc_26_val(pc, target);
-}
-
-static void patch_reloc(uint8_t *code_ptr, int type,
+static void patch_reloc(tcg_insn_unit *code_ptr, int type,
                         intptr_t value, intptr_t addend)
 {
-    value += addend;
-    switch(type) {
-    case R_MIPS_LO16:
-        reloc_lo16(code_ptr, value);
-        break;
-    case R_MIPS_HI16:
-        reloc_hi16(code_ptr, value);
-        break;
-    case R_MIPS_PC16:
-        reloc_pc16(code_ptr, value);
-        break;
-    case R_MIPS_26:
-        reloc_pc26(code_ptr, value);
-        break;
-    default:
-        tcg_abort();
-    }
+    assert(type == R_MIPS_PC16);
+    assert(addend == 0);
+    reloc_pc16(code_ptr, (tcg_insn_unit *)value);
 }
 
 /* parse target specific constraints */
@@ -198,11 +153,6 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
         ct->ct |= TCG_CT_REG;
         tcg_regset_set(ct->u.regs, 0xffffffff);
         break;
-    case 'C':
-        ct->ct |= TCG_CT_REG;
-        tcg_regset_clear(ct->u.regs);
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_T9);
-        break;
     case 'L': /* qemu_ld output arg constraint */
         ct->ct |= TCG_CT_REG;
         tcg_regset_set(ct->u.regs, 0xffffffff);
@@ -374,7 +324,7 @@ static inline void tcg_out_opc_br(TCGContext *s, int opc,
     /* We pay attention here to not modify the branch target by reading
        the existing value and using it again. This ensure that caches and
        memory are kept coherent during retranslation. */
-    uint16_t offset = (uint16_t)(*(uint32_t *) s->code_ptr);
+    uint16_t offset = (uint16_t)*s->code_ptr;
 
     tcg_out_opc_imm(s, opc, rt, rs, offset);
 }
@@ -663,9 +613,9 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1,
         break;
     }
     if (l->has_value) {
-        reloc_pc16(s->code_ptr - 4, l->u.value);
+        reloc_pc16(s->code_ptr - 1, l->u.value_ptr);
     } else {
-        tcg_out_reloc(s, s->code_ptr - 4, R_MIPS_PC16, label_index, 0);
+        tcg_out_reloc(s, s->code_ptr - 1, R_MIPS_PC16, label_index, 0);
     }
     tcg_out_nop(s);
 }
@@ -676,7 +626,7 @@ static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGArg arg1,
                             TCGArg arg2, TCGArg arg3, TCGArg arg4,
                             int label_index)
 {
-    void *label_ptr;
+    tcg_insn_unit *label_ptr;
 
     switch(cond) {
     case TCG_COND_NE:
@@ -733,7 +683,7 @@ static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGArg arg1,
         tcg_abort();
     }
 
-    reloc_pc16(label_ptr, (tcg_target_long) s->code_ptr);
+    reloc_pc16(label_ptr, s->code_ptr);
 }
 
 static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret,
@@ -945,12 +895,12 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
 {
     TCGReg addr_regl, data_regl, data_regh, data_reg1, data_reg2;
 #if defined(CONFIG_SOFTMMU)
-    void *label1_ptr, *label2_ptr;
+    tcg_insn_unit *label1_ptr, *label2_ptr;
     int arg_num;
     int mem_index, s_bits;
     int addr_meml;
 # if TARGET_LONG_BITS == 64
-    uint8_t *label3_ptr;
+    tcg_insn_unit *label3_ptr;
     TCGReg addr_regh;
     int addr_memh;
 # endif
@@ -1011,7 +961,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     tcg_out_opc_br(s, OPC_BEQ, addr_regh, TCG_REG_AT);
     tcg_out_nop(s);
 
-    reloc_pc16(label3_ptr, (tcg_target_long) s->code_ptr);
+    reloc_pc16(label3_ptr, s->code_ptr);
 # else
     label1_ptr = s->code_ptr;
     tcg_out_opc_br(s, OPC_BEQ, TCG_REG_T0, TCG_REG_AT);
@@ -1060,7 +1010,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     tcg_out_nop(s);
 
     /* label1: fast path */
-    reloc_pc16(label1_ptr, (tcg_target_long) s->code_ptr);
+    reloc_pc16(label1_ptr, s->code_ptr);
 
     tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0,
                     offsetof(CPUArchState, tlb_table[mem_index][0].addend));
@@ -1121,7 +1071,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     }
 
 #if defined(CONFIG_SOFTMMU)
-    reloc_pc16(label2_ptr, (tcg_target_long) s->code_ptr);
+    reloc_pc16(label2_ptr, s->code_ptr);
 #endif
 }
 
@@ -1130,14 +1080,14 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
 {
     TCGReg addr_regl, data_regl, data_regh, data_reg1, data_reg2;
 #if defined(CONFIG_SOFTMMU)
-    uint8_t *label1_ptr, *label2_ptr;
+    tcg_insn_unit *label1_ptr, *label2_ptr;
     int arg_num;
     int mem_index, s_bits;
     int addr_meml;
 #endif
 #if TARGET_LONG_BITS == 64
 # if defined(CONFIG_SOFTMMU)
-    uint8_t *label3_ptr;
+    tcg_insn_unit *label3_ptr;
     TCGReg addr_regh;
     int addr_memh;
 # endif
@@ -1200,7 +1150,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     tcg_out_opc_br(s, OPC_BEQ, addr_regh, TCG_REG_AT);
     tcg_out_nop(s);
 
-    reloc_pc16(label3_ptr, (tcg_target_long) s->code_ptr);
+    reloc_pc16(label3_ptr, s->code_ptr);
 # else
     label1_ptr = s->code_ptr;
     tcg_out_opc_br(s, OPC_BEQ, TCG_REG_T0, TCG_REG_AT);
@@ -1241,7 +1191,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     tcg_out_nop(s);
 
     /* label1: fast path */
-    reloc_pc16(label1_ptr, (tcg_target_long) s->code_ptr);
+    reloc_pc16(label1_ptr, s->code_ptr);
 
     tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0,
                     offsetof(CPUArchState, tlb_table[mem_index][0].addend));
@@ -1293,17 +1243,24 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     }
 
 #if defined(CONFIG_SOFTMMU)
-    reloc_pc16(label2_ptr, (tcg_target_long) s->code_ptr);
+    reloc_pc16(label2_ptr, s->code_ptr);
 #endif
 }
 
+static void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
+{
+    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T9, (intptr_t)target);
+    tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, TCG_REG_T9, 0);
+    tcg_out_nop(s);
+}
+
 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
                               const TCGArg *args, const int *const_args)
 {
     switch(opc) {
     case INDEX_op_exit_tb:
         tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_V0, args[0]);
-        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_AT, (tcg_target_long)tb_ret_addr);
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_AT, (uintptr_t)tb_ret_addr);
         tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_AT, 0);
         tcg_out_nop(s);
         break;
@@ -1313,28 +1270,18 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
             tcg_abort();
         } else {
             /* indirect jump method */
-            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_AT, (tcg_target_long)(s->tb_next + args[0]));
+            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_AT,
+                         (uintptr_t)(s->tb_next + args[0]));
             tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_AT, TCG_REG_AT, 0);
             tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_AT, 0);
         }
         tcg_out_nop(s);
-        s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
-        break;
-    case INDEX_op_call:
-        tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, args[0], 0);
-        tcg_out_nop(s);
+        s->tb_next_offset[args[0]] = tcg_current_code_size(s);
         break;
     case INDEX_op_br:
         tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO, args[0]);
         break;
 
-    case INDEX_op_mov_i32:
-        tcg_out_mov(s, TCG_TYPE_I32, args[0], args[1]);
-        break;
-    case INDEX_op_movi_i32:
-        tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
-        break;
-
     case INDEX_op_ld8u_i32:
         tcg_out_ldst(s, OPC_LBU, args[0], args[1], args[2]);
         break;
@@ -1582,6 +1529,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_qemu_st(s, args, 3);
         break;
 
+    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
+    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
+    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
     default:
         tcg_abort();
     }
@@ -1590,11 +1540,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
 static const TCGTargetOpDef mips_op_defs[] = {
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
-    { INDEX_op_call, { "C" } },
     { INDEX_op_br, { } },
 
-    { INDEX_op_mov_i32, { "r", "r" } },
-    { INDEX_op_movi_i32, { "r" } },
     { INDEX_op_ld8u_i32, { "r", "r" } },
     { INDEX_op_ld8s_i32, { "r", "r" } },
     { INDEX_op_ld16u_i32, { "r", "r" } },
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index 9576db514d..c6d2267d77 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -26,6 +26,7 @@
 #ifndef TCG_TARGET_MIPS 
 #define TCG_TARGET_MIPS 1
 
+#define TCG_TARGET_INSN_UNIT_SIZE 4
 #define TCG_TARGET_NB_REGS 32
 
 typedef enum {
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 0302f4f99a..3a504a1961 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -513,12 +513,8 @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                                     TCGArg *args, TCGOpDef *tcg_op_defs)
 {
-    int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args;
-    tcg_target_ulong mask, affected;
-    TCGOpcode op;
-    const TCGOpDef *def;
+    int nb_ops, op_index, nb_temps, nb_globals;
     TCGArg *gen_args;
-    TCGArg tmp;
 
     /* Array VALS has an element for each temp.
        If this temp holds a constant then its value is kept in VALS' element.
@@ -532,22 +528,27 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
     nb_ops = tcg_opc_ptr - s->gen_opc_buf;
     gen_args = args;
     for (op_index = 0; op_index < nb_ops; op_index++) {
-        op = s->gen_opc_buf[op_index];
-        def = &tcg_op_defs[op];
-        /* Do copy propagation */
+        TCGOpcode op = s->gen_opc_buf[op_index];
+        const TCGOpDef *def = &tcg_op_defs[op];
+        tcg_target_ulong mask, affected;
+        int nb_oargs, nb_iargs, nb_args, i;
+        TCGArg tmp;
+
         if (op == INDEX_op_call) {
-            int nb_oargs = args[0] >> 16;
-            int nb_iargs = args[0] & 0xffff;
-            for (i = nb_oargs + 1; i < nb_oargs + nb_iargs + 1; i++) {
-                if (temps[args[i]].state == TCG_TEMP_COPY) {
-                    args[i] = find_better_copy(s, args[i]);
-                }
-            }
+            *gen_args++ = tmp = *args++;
+            nb_oargs = tmp >> 16;
+            nb_iargs = tmp & 0xffff;
+            nb_args = nb_oargs + nb_iargs + def->nb_cargs;
         } else {
-            for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
-                if (temps[args[i]].state == TCG_TEMP_COPY) {
-                    args[i] = find_better_copy(s, args[i]);
-                }
+            nb_oargs = def->nb_oargs;
+            nb_iargs = def->nb_iargs;
+            nb_args = def->nb_args;
+        }
+
+        /* Do copy propagation */
+        for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+            if (temps[args[i]].state == TCG_TEMP_COPY) {
+                args[i] = find_better_copy(s, args[i]);
             }
         }
 
@@ -882,7 +883,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
 
         CASE_OP_32_64(qemu_ld):
             {
-                TCGMemOp mop = args[def->nb_oargs + def->nb_iargs];
+                TCGMemOp mop = args[nb_oargs + nb_iargs];
                 if (!(mop & MO_SIGN)) {
                     mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
                 }
@@ -900,15 +901,15 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
         }
 
         if (mask == 0) {
-            assert(def->nb_oargs == 1);
+            assert(nb_oargs == 1);
             s->gen_opc_buf[op_index] = op_to_movi(op);
             tcg_opt_gen_movi(gen_args, args[0], 0);
-            args += def->nb_oargs + def->nb_iargs + def->nb_cargs;
+            args += nb_args;
             gen_args += 2;
             continue;
         }
         if (affected == 0) {
-            assert(def->nb_oargs == 1);
+            assert(nb_oargs == 1);
             if (temps_are_copies(args[0], args[1])) {
                 s->gen_opc_buf[op_index] = INDEX_op_nop;
             } else if (temps[args[1]].state != TCG_TEMP_CONST) {
@@ -920,7 +921,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                 tcg_opt_gen_movi(gen_args, args[0], temps[args[1]].val);
                 gen_args += 2;
             }
-            args += def->nb_iargs + 1;
+            args += nb_args;
             continue;
         }
 
@@ -1246,24 +1247,13 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             break;
 
         case INDEX_op_call:
-            nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
-            if (!(args[nb_call_args + 1] & (TCG_CALL_NO_READ_GLOBALS |
-                                            TCG_CALL_NO_WRITE_GLOBALS))) {
+            if (!(args[nb_oargs + nb_iargs + 1]
+                  & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
                 for (i = 0; i < nb_globals; i++) {
                     reset_temp(i);
                 }
             }
-            for (i = 0; i < (args[0] >> 16); i++) {
-                reset_temp(args[i + 1]);
-            }
-            i = nb_call_args + 3;
-            while (i) {
-                *gen_args = *args;
-                args++;
-                gen_args++;
-                i--;
-            }
-            break;
+            goto do_reset_output;
 
         default:
         do_default:
@@ -1275,7 +1265,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
             if (def->flags & TCG_OPF_BB_END) {
                 reset_all_temps(nb_temps);
             } else {
-                for (i = 0; i < def->nb_oargs; i++) {
+        do_reset_output:
+                for (i = 0; i < nb_oargs; i++) {
                     reset_temp(args[i]);
                     /* Save the corresponding known-zero bits mask for the
                        first output argument (only one supported so far). */
@@ -1284,11 +1275,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                     }
                 }
             }
-            for (i = 0; i < def->nb_args; i++) {
+            for (i = 0; i < nb_args; i++) {
                 gen_args[i] = args[i];
             }
-            args += def->nb_args;
-            gen_args += def->nb_args;
+            args += nb_args;
+            gen_args += nb_args;
             break;
         }
     }
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 83d9340fca..436b65bffb 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -24,7 +24,7 @@
 
 #include "tcg-be-ldst.h"
 
-static uint8_t *tb_ret_addr;
+static tcg_insn_unit *tb_ret_addr;
 
 #if defined _CALL_DARWIN || defined __APPLE__
 #define TCG_TARGET_CALL_DARWIN
@@ -171,50 +171,47 @@ static const int tcg_target_callee_save_regs[] = {
     TCG_REG_R31
 };
 
-static uint32_t reloc_pc24_val (void *pc, tcg_target_long target)
+static inline bool in_range_b(tcg_target_long target)
 {
-    tcg_target_long disp;
-
-    disp = target - (tcg_target_long) pc;
-    if ((disp << 6) >> 6 != disp)
-        tcg_abort ();
+    return target == sextract32(target, 0, 26);
+}
 
+static uint32_t reloc_pc24_val(tcg_insn_unit *pc, tcg_insn_unit *target)
+{
+    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
+    assert(in_range_b(disp));
     return disp & 0x3fffffc;
 }
 
-static void reloc_pc24 (void *pc, tcg_target_long target)
+static void reloc_pc24(tcg_insn_unit *pc, tcg_insn_unit *target)
 {
-    *(uint32_t *) pc = (*(uint32_t *) pc & ~0x3fffffc)
-        | reloc_pc24_val (pc, target);
+    *pc = (*pc & ~0x3fffffc) | reloc_pc24_val(pc, target);
 }
 
-static uint16_t reloc_pc14_val (void *pc, tcg_target_long target)
+static uint16_t reloc_pc14_val(tcg_insn_unit *pc, tcg_insn_unit *target)
 {
-    tcg_target_long disp;
-
-    disp = target - (tcg_target_long) pc;
-    if (disp != (int16_t) disp)
-        tcg_abort ();
-
+    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
+    assert(disp == (int16_t) disp);
     return disp & 0xfffc;
 }
 
-static void reloc_pc14 (void *pc, tcg_target_long target)
+static void reloc_pc14(tcg_insn_unit *pc, tcg_insn_unit *target)
 {
-    *(uint32_t *) pc = (*(uint32_t *) pc & ~0xfffc)
-        | reloc_pc14_val (pc, target);
+    *pc = (*pc & ~0xfffc) | reloc_pc14_val(pc, target);
 }
 
-static void patch_reloc(uint8_t *code_ptr, int type,
+static void patch_reloc(tcg_insn_unit *code_ptr, int type,
                         intptr_t value, intptr_t addend)
 {
-    value += addend;
+    tcg_insn_unit *target = (tcg_insn_unit *)value;
+
+    assert(addend == 0);
     switch (type) {
     case R_PPC_REL14:
-        reloc_pc14 (code_ptr, value);
+        reloc_pc14(code_ptr, target);
         break;
     case R_PPC_REL24:
-        reloc_pc24 (code_ptr, value);
+        reloc_pc24(code_ptr, target);
         break;
     default:
         tcg_abort();
@@ -480,47 +477,36 @@ static void tcg_out_ldst (TCGContext *s, int ret, int addr,
     }
 }
 
-static void tcg_out_b (TCGContext *s, int mask, tcg_target_long target)
+static void tcg_out_b(TCGContext *s, int mask, tcg_insn_unit *target)
 {
-    tcg_target_long disp;
-
-    disp = target - (tcg_target_long) s->code_ptr;
-    if ((disp << 6) >> 6 == disp)
-        tcg_out32 (s, B | (disp & 0x3fffffc) | mask);
-    else {
-        tcg_out_movi (s, TCG_TYPE_I32, 0, (tcg_target_long) target);
-        tcg_out32 (s, MTSPR | RS (0) | CTR);
-        tcg_out32 (s, BCCTR | BO_ALWAYS | mask);
+    ptrdiff_t disp = tcg_pcrel_diff(s, target);
+    if (in_range_b(disp)) {
+        tcg_out32(s, B | (disp & 0x3fffffc) | mask);
+    } else {
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target);
+        tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
+        tcg_out32(s, BCCTR | BO_ALWAYS | mask);
     }
 }
 
-static void tcg_out_call (TCGContext *s, tcg_target_long arg, int const_arg,
-                          int lk)
+static void tcg_out_call1(TCGContext *s, tcg_insn_unit *target, int lk)
 {
 #ifdef _CALL_AIX
-    int reg;
-
-    if (const_arg) {
-        reg = 2;
-        tcg_out_movi (s, TCG_TYPE_I32, reg, arg);
-    }
-    else reg = arg;
-
-    tcg_out32 (s, LWZ | RT (0) | RA (reg));
-    tcg_out32 (s, MTSPR | RA (0) | CTR);
-    tcg_out32 (s, LWZ | RT (2) | RA (reg) | 4);
-    tcg_out32 (s, BCCTR | BO_ALWAYS | lk);
+    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, (uintptr_t)target);
+    tcg_out32(s, LWZ | RT(TCG_REG_R0) | RA(reg));
+    tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
+    tcg_out32(s, LWZ | RT(TCG_REG_R2) | RA(reg) | 4);
+    tcg_out32(s, BCCTR | BO_ALWAYS | lk);
 #else
-    if (const_arg) {
-        tcg_out_b (s, lk, arg);
-    }
-    else {
-        tcg_out32 (s, MTSPR | RS (arg) | LR);
-        tcg_out32 (s, BCLR | BO_ALWAYS | lk);
-    }
+    tcg_out_b(s, lk, target);
 #endif
 }
 
+static void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
+{
+    tcg_out_call1(s, target, LK);
+}
+
 #if defined(CONFIG_SOFTMMU)
 
 static void add_qemu_ldst_label (TCGContext *s,
@@ -531,8 +517,8 @@ static void add_qemu_ldst_label (TCGContext *s,
                                  int addrlo_reg,
                                  int addrhi_reg,
                                  int mem_index,
-                                 uint8_t *raddr,
-                                 uint8_t *label_ptr)
+                                 tcg_insn_unit *raddr,
+                                 tcg_insn_unit *label_ptr)
 {
     TCGLabelQemuLdst *label = new_ldst_label(s);
 
@@ -550,7 +536,7 @@ static void add_qemu_ldst_label (TCGContext *s,
 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
  *                                     int mmu_idx, uintptr_t ra)
  */
-static const void * const qemu_ld_helpers[16] = {
+static void * const qemu_ld_helpers[16] = {
     [MO_UB]   = helper_ret_ldub_mmu,
     [MO_LEUW] = helper_le_lduw_mmu,
     [MO_LEUL] = helper_le_ldul_mmu,
@@ -563,7 +549,7 @@ static const void * const qemu_ld_helpers[16] = {
 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
  *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
  */
-static const void * const qemu_st_helpers[16] = {
+static void * const qemu_st_helpers[16] = {
     [MO_UB]   = helper_ret_stb_mmu,
     [MO_LEUW] = helper_le_stw_mmu,
     [MO_LEUL] = helper_le_stl_mmu,
@@ -573,8 +559,8 @@ static const void * const qemu_st_helpers[16] = {
     [MO_BEQ]  = helper_be_stq_mmu,
 };
 
-static void *ld_trampolines[16];
-static void *st_trampolines[16];
+static tcg_insn_unit *ld_trampolines[16];
+static tcg_insn_unit *st_trampolines[16];
 
 /* Perform the TLB load and compare.  Branches to the slow path, placing the
    address of the branch in *LABEL_PTR.  Loads the addend of the TLB into R0.
@@ -582,14 +568,15 @@ static void *st_trampolines[16];
 
 static void tcg_out_tlb_check(TCGContext *s, TCGReg r0, TCGReg r1, TCGReg r2,
                               TCGReg addrlo, TCGReg addrhi, TCGMemOp s_bits,
-                              int mem_index, int is_load, uint8_t **label_ptr)
+                              int mem_index, int is_load,
+                              tcg_insn_unit **label_ptr)
 {
     int cmp_off =
         (is_load
          ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
          : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
     int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
-    uint16_t retranst;
+    tcg_insn_unit retranst;
     TCGReg base = TCG_AREG0;
 
     /* Extract the page index, shifted into place for tlb index.  */
@@ -648,7 +635,7 @@ static void tcg_out_tlb_check(TCGContext *s, TCGReg r0, TCGReg r1, TCGReg r2,
        This address cannot be used for a tail call, but it's shorter
        than forming an address from scratch.  */
     *label_ptr = s->code_ptr;
-    retranst = ((uint16_t *) s->code_ptr)[1] & ~3;
+    retranst = *s->code_ptr & 0xfffc;
     tcg_out32(s, BC | BI(7, CR_EQ) | retranst | BO_COND_FALSE | LK);
 }
 #endif
@@ -659,7 +646,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
     TCGMemOp opc, bswap;
 #ifdef CONFIG_SOFTMMU
     int mem_index;
-    uint8_t *label_ptr;
+    tcg_insn_unit *label_ptr;
 #endif
 
     datalo = *args++;
@@ -731,7 +718,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
     TCGMemOp opc, bswap, s_bits;
 #ifdef CONFIG_SOFTMMU
     int mem_index;
-    uint8_t *label_ptr;
+    tcg_insn_unit *label_ptr;
 #endif
 
     datalo = *args++;
@@ -790,7 +777,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
     TCGReg ir, datalo, datahi;
     TCGMemOp opc = l->opc;
 
-    reloc_pc14 (l->label_ptr[0], (uintptr_t)s->code_ptr);
+    reloc_pc14(l->label_ptr[0], s->code_ptr);
 
     ir = TCG_REG_R4;
     if (TARGET_LONG_BITS == 32) {
@@ -804,7 +791,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
     }
     tcg_out_movi(s, TCG_TYPE_I32, ir++, l->mem_index);
     tcg_out32(s, MFSPR | RT(ir++) | LR);
-    tcg_out_b(s, LK, (uintptr_t)ld_trampolines[opc & ~MO_SIGN]);
+    tcg_out_b(s, LK, ld_trampolines[opc & ~MO_SIGN]);
 
     datalo = l->datalo_reg;
     switch (opc & MO_SSIZE) {
@@ -832,7 +819,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
         }
         break;
     }
-    tcg_out_b (s, 0, (uintptr_t)l->raddr);
+    tcg_out_b(s, 0, l->raddr);
 }
 
 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
@@ -840,7 +827,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
     TCGReg ir, datalo;
     TCGMemOp opc = l->opc;
 
-    reloc_pc14 (l->label_ptr[0], (tcg_target_long) s->code_ptr);
+    reloc_pc14(l->label_ptr[0], s->code_ptr);
 
     ir = TCG_REG_R4;
     if (TARGET_LONG_BITS == 32) {
@@ -878,16 +865,16 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 
     tcg_out_movi(s, TCG_TYPE_I32, ir++, l->mem_index);
     tcg_out32(s, MFSPR | RT(ir++) | LR);
-    tcg_out_b(s, LK, (uintptr_t)st_trampolines[opc]);
-    tcg_out_b(s, 0, (uintptr_t)l->raddr);
+    tcg_out_b(s, LK, st_trampolines[opc]);
+    tcg_out_b(s, 0, l->raddr);
 }
 #endif
 
 #ifdef CONFIG_SOFTMMU
-static void emit_ldst_trampoline (TCGContext *s, const void *ptr)
+static void emit_ldst_trampoline(TCGContext *s, tcg_insn_unit *ptr)
 {
-    tcg_out_mov (s, TCG_TYPE_I32, 3, TCG_AREG0);
-    tcg_out_call (s, (tcg_target_long) ptr, 1, 0);
+    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0);
+    tcg_out_call1(s, ptr, 0);
 }
 #endif
 
@@ -909,12 +896,13 @@ static void tcg_target_qemu_prologue (TCGContext *s)
 
 #ifdef _CALL_AIX
     {
-        uint32_t addr;
+        uintptr_t addr;
 
         /* First emit adhoc function descriptor */
-        addr = (uint32_t) s->code_ptr + 12;
-        tcg_out32 (s, addr);        /* entry point */
-        s->code_ptr += 8;           /* skip TOC and environment pointer */
+        addr = (uintptr_t)s->code_ptr + 12;
+        tcg_out32(s, addr);        /* entry point */
+        tcg_out32(s, 0);           /* toc */
+        tcg_out32(s, 0);           /* environment pointer */
     }
 #endif
     tcg_out32 (s, MFSPR | RT (0) | LR);
@@ -1065,18 +1053,17 @@ static void tcg_out_cmp (TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
 
 }
 
-static void tcg_out_bc (TCGContext *s, int bc, int label_index)
+static void tcg_out_bc(TCGContext *s, int bc, int label_index)
 {
     TCGLabel *l = &s->labels[label_index];
 
-    if (l->has_value)
-        tcg_out32 (s, bc | reloc_pc14_val (s->code_ptr, l->u.value));
-    else {
-        uint16_t val = *(uint16_t *) &s->code_ptr[2];
-
+    if (l->has_value) {
+        tcg_out32(s, bc | reloc_pc14_val(s->code_ptr, l->u.value_ptr));
+    } else {
         /* Thanks to Andrzej Zaborowski */
-        tcg_out32 (s, bc | (val & 0xfffc));
-        tcg_out_reloc (s, s->code_ptr - 4, R_PPC_REL14, label_index, 0);
+        tcg_insn_unit retrans = *s->code_ptr & 0xfffc;
+        tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, label_index, 0);
+        tcg_out32(s, bc | retrans);
     }
 }
 
@@ -1367,43 +1354,33 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
 {
     switch (opc) {
     case INDEX_op_exit_tb:
-        tcg_out_movi (s, TCG_TYPE_I32, TCG_REG_R3, args[0]);
-        tcg_out_b (s, 0, (tcg_target_long) tb_ret_addr);
+        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R3, args[0]);
+        tcg_out_b(s, 0, tb_ret_addr);
         break;
     case INDEX_op_goto_tb:
         if (s->tb_jmp_offset) {
             /* direct jump method */
-
-            s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
-            s->code_ptr += 16;
-        }
-        else {
+            s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
+            s->code_ptr += 4;
+        } else {
             tcg_abort ();
         }
-        s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
+        s->tb_next_offset[args[0]] = tcg_current_code_size(s);
         break;
     case INDEX_op_br:
         {
             TCGLabel *l = &s->labels[args[0]];
 
             if (l->has_value) {
-                tcg_out_b (s, 0, l->u.value);
-            }
-            else {
-                uint32_t val = *(uint32_t *) s->code_ptr;
-
+                tcg_out_b(s, 0, l->u.value_ptr);
+            } else {
                 /* Thanks to Andrzej Zaborowski */
-                tcg_out32 (s, B | (val & 0x3fffffc));
-                tcg_out_reloc (s, s->code_ptr - 4, R_PPC_REL24, args[0], 0);
+                tcg_insn_unit retrans = *s->code_ptr & 0x3fffffc;
+                tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, args[0], 0);
+                tcg_out32(s, B | retrans);
             }
         }
         break;
-    case INDEX_op_call:
-        tcg_out_call (s, args[0], const_args[0], LK);
-        break;
-    case INDEX_op_movi_i32:
-        tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
-        break;
     case INDEX_op_ld8u_i32:
         tcg_out_ldst (s, args[0], args[1], args[2], LBZ, LBZX);
         break;
@@ -1839,20 +1816,19 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
                          const_args[2]);
         break;
 
+    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
+    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
+    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
     default:
-        tcg_dump_ops (s);
-        tcg_abort ();
+        tcg_abort();
     }
 }
 
 static const TCGTargetOpDef ppc_op_defs[] = {
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
-    { INDEX_op_call, { "ri" } },
     { INDEX_op_br, { } },
 
-    { INDEX_op_mov_i32, { "r", "r" } },
-    { INDEX_op_movi_i32, { "r" } },
     { INDEX_op_ld8u_i32, { "r", "r" } },
     { INDEX_op_ld8s_i32, { "r", "r" } },
     { INDEX_op_ld16u_i32, { "r", "r" } },
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 0d4f5959a7..dd7e557948 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -25,6 +25,7 @@
 #define TCG_TARGET_PPC 1
 
 #define TCG_TARGET_NB_REGS 32
+#define TCG_TARGET_INSN_UNIT_SIZE 4
 
 typedef enum {
     TCG_REG_R0 = 0,
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 45b1c06910..c90ddcd03a 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -31,7 +31,7 @@
 #define TCG_CT_CONST_ZERO 0x1000
 #define TCG_CT_CONST_MONE 0x2000
 
-static uint8_t *tb_ret_addr;
+static tcg_insn_unit *tb_ret_addr;
 
 #if TARGET_LONG_BITS == 32
 #define LD_ADDR LWZ
@@ -168,61 +168,54 @@ static inline bool in_range_b(tcg_target_long target)
     return target == sextract64(target, 0, 26);
 }
 
-static uint32_t reloc_pc24_val(void *pc, tcg_target_long target)
+static uint32_t reloc_pc24_val(tcg_insn_unit *pc, tcg_insn_unit *target)
 {
-    tcg_target_long disp;
-
-    disp = target - (tcg_target_long)pc;
+    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
     assert(in_range_b(disp));
-
     return disp & 0x3fffffc;
 }
 
-static void reloc_pc24(void *pc, tcg_target_long target)
+static void reloc_pc24(tcg_insn_unit *pc, tcg_insn_unit *target)
 {
-    *(uint32_t *)pc = (*(uint32_t *)pc & ~0x3fffffc)
-        | reloc_pc24_val(pc, target);
+    *pc = (*pc & ~0x3fffffc) | reloc_pc24_val(pc, target);
 }
 
-static uint16_t reloc_pc14_val(void *pc, tcg_target_long target)
+static uint16_t reloc_pc14_val(tcg_insn_unit *pc, tcg_insn_unit *target)
 {
-    tcg_target_long disp;
-
-    disp = target - (tcg_target_long)pc;
-    if (disp != (int16_t) disp) {
-        tcg_abort();
-    }
-
+    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
+    assert(disp == (int16_t) disp);
     return disp & 0xfffc;
 }
 
-static void reloc_pc14(void *pc, tcg_target_long target)
+static void reloc_pc14(tcg_insn_unit *pc, tcg_insn_unit *target)
 {
-    *(uint32_t *)pc = (*(uint32_t *)pc & ~0xfffc) | reloc_pc14_val(pc, target);
+    *pc = (*pc & ~0xfffc) | reloc_pc14_val(pc, target);
 }
 
 static inline void tcg_out_b_noaddr(TCGContext *s, int insn)
 {
-    unsigned retrans = *(uint32_t *)s->code_ptr & 0x3fffffc;
+    unsigned retrans = *s->code_ptr & 0x3fffffc;
     tcg_out32(s, insn | retrans);
 }
 
 static inline void tcg_out_bc_noaddr(TCGContext *s, int insn)
 {
-    unsigned retrans = *(uint32_t *)s->code_ptr & 0xfffc;
+    unsigned retrans = *s->code_ptr & 0xfffc;
     tcg_out32(s, insn | retrans);
 }
 
-static void patch_reloc(uint8_t *code_ptr, int type,
+static void patch_reloc(tcg_insn_unit *code_ptr, int type,
                         intptr_t value, intptr_t addend)
 {
-    value += addend;
+    tcg_insn_unit *target = (tcg_insn_unit *)value;
+
+    assert(addend == 0);
     switch (type) {
     case R_PPC_REL14:
-        reloc_pc14(code_ptr, value);
+        reloc_pc14(code_ptr, target);
         break;
     case R_PPC_REL24:
-        reloc_pc24(code_ptr, value);
+        reloc_pc24(code_ptr, target);
         break;
     default:
         tcg_abort();
@@ -702,61 +695,48 @@ static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
     tcg_out_zori32(s, dst, src, c, XORI, XORIS);
 }
 
-static void tcg_out_b(TCGContext *s, int mask, tcg_target_long target)
+static void tcg_out_b(TCGContext *s, int mask, tcg_insn_unit *target)
 {
-    tcg_target_long disp;
-
-    disp = target - (tcg_target_long)s->code_ptr;
+    ptrdiff_t disp = tcg_pcrel_diff(s, target);
     if (in_range_b(disp)) {
         tcg_out32(s, B | (disp & 0x3fffffc) | mask);
     } else {
-        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, (tcg_target_long)target);
+        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, (uintptr_t)target);
         tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
         tcg_out32(s, BCCTR | BO_ALWAYS | mask);
     }
 }
 
-static void tcg_out_call(TCGContext *s, tcg_target_long arg, int const_arg)
+static void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
 {
 #ifdef __APPLE__
-    if (const_arg) {
-        tcg_out_b(s, LK, arg);
-    } else {
-        tcg_out32(s, MTSPR | RS(arg) | LR);
-        tcg_out32(s, BCLR | BO_ALWAYS | LK);
-    }
+    tcg_out_b(s, LK, target);
 #else
-    TCGReg reg = arg;
-    int ofs = 0;
-
-    if (const_arg) {
-        /* Look through the descriptor.  If the branch is in range, and we
-           don't have to spend too much effort on building the toc.  */
-        intptr_t tgt = ((intptr_t *)arg)[0];
-        intptr_t toc = ((intptr_t *)arg)[1];
-        intptr_t diff = tgt - (intptr_t)s->code_ptr;
-
-        if (in_range_b(diff) && toc == (uint32_t)toc) {
-            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R2, toc);
-            tcg_out_b(s, LK, tgt);
-            return;
-        }
-
+    /* Look through the descriptor.  If the branch is in range, and we
+       don't have to spend too much effort on building the toc.  */
+    void *tgt = ((void **)target)[0];
+    uintptr_t toc = ((uintptr_t *)target)[1];
+    intptr_t diff = tcg_pcrel_diff(s, tgt);
+
+    if (in_range_b(diff) && toc == (uint32_t)toc) {
+        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R2, toc);
+        tcg_out_b(s, LK, tgt);
+    } else {
         /* Fold the low bits of the constant into the addresses below.  */
-        ofs = (int16_t)arg;
+        intptr_t arg = (intptr_t)target;
+        int ofs = (int16_t)arg;
+
         if (ofs + 8 < 0x8000) {
             arg -= ofs;
         } else {
             ofs = 0;
         }
-        reg = TCG_REG_R2;
-        tcg_out_movi(s, TCG_TYPE_I64, reg, arg);
+        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R2, arg);
+        tcg_out32(s, LD | TAI(TCG_REG_R0, TCG_REG_R2, ofs));
+        tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
+        tcg_out32(s, LD | TAI(TCG_REG_R2, TCG_REG_R2, ofs + 8));
+        tcg_out32(s, BCCTR | BO_ALWAYS | LK);
     }
-
-    tcg_out32(s, LD | TAI(TCG_REG_R0, reg, ofs));
-    tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
-    tcg_out32(s, LD | TAI(TCG_REG_R2, reg, ofs + 8));
-    tcg_out32(s, BCCTR | BO_ALWAYS | LK);
 #endif
 }
 
@@ -844,7 +824,7 @@ static const uint32_t qemu_exts_opc[4] = {
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
  *                                 int mmu_idx, uintptr_t ra)
  */
-static const void * const qemu_ld_helpers[16] = {
+static void * const qemu_ld_helpers[16] = {
     [MO_UB]   = helper_ret_ldub_mmu,
     [MO_LEUW] = helper_le_lduw_mmu,
     [MO_LEUL] = helper_le_ldul_mmu,
@@ -857,7 +837,7 @@ static const void * const qemu_ld_helpers[16] = {
 /* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
  *                                 uintxx_t val, int mmu_idx, uintptr_t ra)
  */
-static const void * const qemu_st_helpers[16] = {
+static void * const qemu_st_helpers[16] = {
     [MO_UB]   = helper_ret_stb_mmu,
     [MO_LEUW] = helper_le_stw_mmu,
     [MO_LEUL] = helper_le_stl_mmu,
@@ -946,7 +926,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits, TCGReg addr_reg,
    helper code.  */
 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc,
                                 int data_reg, int addr_reg, int mem_index,
-                                uint8_t *raddr, uint8_t *label_ptr)
+                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
 {
     TCGLabelQemuLdst *label = new_ldst_label(s);
 
@@ -963,7 +943,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
     TCGMemOp opc = lb->opc;
 
-    reloc_pc14(lb->label_ptr[0], (uintptr_t)s->code_ptr);
+    reloc_pc14(lb->label_ptr[0], s->code_ptr);
 
     tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0);
 
@@ -974,7 +954,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, lb->mem_index);
     tcg_out32(s, MFSPR | RT(TCG_REG_R6) | LR);
 
-    tcg_out_call(s, (tcg_target_long)qemu_ld_helpers[opc & ~MO_SIGN], 1);
+    tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]);
 
     if (opc & MO_SIGN) {
         uint32_t insn = qemu_exts_opc[opc & MO_SIZE];
@@ -983,7 +963,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
         tcg_out_mov(s, TCG_TYPE_I64, lb->datalo_reg, TCG_REG_R3);
     }
 
-    tcg_out_b(s, 0, (uintptr_t)lb->raddr);
+    tcg_out_b(s, 0, lb->raddr);
 }
 
 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
@@ -991,7 +971,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
     TCGMemOp opc = lb->opc;
     TCGMemOp s_bits = opc & MO_SIZE;
 
-    reloc_pc14(lb->label_ptr[0], (uintptr_t)s->code_ptr);
+    reloc_pc14(lb->label_ptr[0], s->code_ptr);
 
     tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, TCG_AREG0);
 
@@ -1004,9 +984,9 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R6, lb->mem_index);
     tcg_out32(s, MFSPR | RT(TCG_REG_R7) | LR);
 
-    tcg_out_call(s, (tcg_target_long)qemu_st_helpers[opc], 1);
+    tcg_out_call(s, qemu_st_helpers[opc]);
 
-    tcg_out_b(s, 0, (uintptr_t)lb->raddr);
+    tcg_out_b(s, 0, lb->raddr);
 }
 #endif /* SOFTMMU */
 
@@ -1017,7 +997,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
     uint32_t insn;
     TCGMemOp s_bits = opc & MO_SIZE;
 #ifdef CONFIG_SOFTMMU
-    void *label_ptr;
+    tcg_insn_unit *label_ptr;
 #endif
 
 #ifdef CONFIG_SOFTMMU
@@ -1063,7 +1043,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
     TCGReg rbase;
     uint32_t insn;
 #ifdef CONFIG_SOFTMMU
-    void *label_ptr;
+    tcg_insn_unit *label_ptr;
 #endif
 
 #ifdef CONFIG_SOFTMMU
@@ -1123,7 +1103,8 @@ static void tcg_target_qemu_prologue(TCGContext *s)
 #ifndef __APPLE__
     /* First emit adhoc function descriptor */
     tcg_out64(s, (uint64_t)s->code_ptr + 24); /* entry point */
-    s->code_ptr += 16;          /* skip TOC and environment pointer */
+    tcg_out64(s, 0);                          /* toc */
+    tcg_out64(s, 0);                          /* environment pointer */
 #endif
 
     /* Prologue */
@@ -1415,7 +1396,7 @@ static void tcg_out_bc(TCGContext *s, int bc, int label_index)
     TCGLabel *l = &s->labels[label_index];
 
     if (l->has_value) {
-        tcg_out32(s, bc | reloc_pc14_val(s->code_ptr, l->u.value));
+        tcg_out32(s, bc | reloc_pc14_val(s->code_ptr, l->u.value_ptr));
     } else {
         tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, label_index, 0);
         tcg_out_bc_noaddr(s, bc);
@@ -1478,15 +1459,13 @@ static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
     }
 }
 
-void ppc_tb_set_jmp_target(unsigned long jmp_addr, unsigned long addr)
+void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
 {
     TCGContext s;
-    unsigned long patch_size;
 
-    s.code_ptr = (uint8_t *) jmp_addr;
-    tcg_out_b(&s, 0, addr);
-    patch_size = s.code_ptr - (uint8_t *) jmp_addr;
-    flush_icache_range(jmp_addr, jmp_addr + patch_size);
+    s.code_buf = s.code_ptr = (tcg_insn_unit *)jmp_addr;
+    tcg_out_b(&s, 0, (tcg_insn_unit *)addr);
+    flush_icache_range(jmp_addr, jmp_addr + tcg_current_code_size(&s));
 }
 
 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
@@ -1498,40 +1477,31 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
     switch (opc) {
     case INDEX_op_exit_tb:
         tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R3, args[0]);
-        tcg_out_b(s, 0, (tcg_target_long)tb_ret_addr);
+        tcg_out_b(s, 0, tb_ret_addr);
         break;
     case INDEX_op_goto_tb:
         if (s->tb_jmp_offset) {
             /* Direct jump method.  */
-            s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
-            s->code_ptr += 28;
+            s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
+            s->code_ptr += 7;
         } else {
             /* Indirect jump method.  */
             tcg_abort();
         }
-        s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
+        s->tb_next_offset[args[0]] = tcg_current_code_size(s);
         break;
     case INDEX_op_br:
         {
             TCGLabel *l = &s->labels[args[0]];
 
             if (l->has_value) {
-                tcg_out_b(s, 0, l->u.value);
+                tcg_out_b(s, 0, l->u.value_ptr);
             } else {
                 tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, args[0], 0);
                 tcg_out_b_noaddr(s, B);
             }
         }
         break;
-    case INDEX_op_call:
-        tcg_out_call(s, args[0], const_args[0]);
-        break;
-    case INDEX_op_movi_i32:
-        tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
-        break;
-    case INDEX_op_movi_i64:
-        tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
-        break;
     case INDEX_op_ld8u_i32:
     case INDEX_op_ld8u_i64:
         tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
@@ -2012,8 +1982,12 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
         break;
 
+    case INDEX_op_mov_i32:   /* Always emitted via tcg_out_mov.  */
+    case INDEX_op_mov_i64:
+    case INDEX_op_movi_i32:  /* Always emitted via tcg_out_movi.  */
+    case INDEX_op_movi_i64:
+    case INDEX_op_call:      /* Always emitted via tcg_out_call.  */
     default:
-        tcg_dump_ops(s);
         tcg_abort();
     }
 }
@@ -2021,14 +1995,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
 static const TCGTargetOpDef ppc_op_defs[] = {
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
-    { INDEX_op_call, { "ri" } },
     { INDEX_op_br, { } },
 
-    { INDEX_op_mov_i32, { "r", "r" } },
-    { INDEX_op_mov_i64, { "r", "r" } },
-    { INDEX_op_movi_i32, { "r" } },
-    { INDEX_op_movi_i64, { "r" } },
-
     { INDEX_op_ld8u_i32, { "r", "r" } },
     { INDEX_op_ld8s_i32, { "r", "r" } },
     { INDEX_op_ld16u_i32, { "r", "r" } },
diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
index 3815b840a6..29f479a3cf 100644
--- a/tcg/ppc64/tcg-target.h
+++ b/tcg/ppc64/tcg-target.h
@@ -25,6 +25,7 @@
 #define TCG_TARGET_PPC64 1
 
 #define TCG_TARGET_NB_REGS 32
+#define TCG_TARGET_INSN_UNIT_SIZE 4
 
 typedef enum {
     TCG_REG_R0 = 0,
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index 1d912a7937..ebdd0743cf 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -320,7 +320,7 @@ static const uint8_t tcg_cond_to_ltr_cond[] = {
 #ifdef CONFIG_SOFTMMU
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
    int mmu_idx) */
-static const void * const qemu_ld_helpers[4] = {
+static void * const qemu_ld_helpers[4] = {
     helper_ldb_mmu,
     helper_ldw_mmu,
     helper_ldl_mmu,
@@ -329,7 +329,7 @@ static const void * const qemu_ld_helpers[4] = {
 
 /* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
    uintxx_t val, int mmu_idx) */
-static const void * const qemu_st_helpers[4] = {
+static void * const qemu_st_helpers[4] = {
     helper_stb_mmu,
     helper_stw_mmu,
     helper_stl_mmu,
@@ -337,7 +337,7 @@ static const void * const qemu_st_helpers[4] = {
 };
 #endif
 
-static uint8_t *tb_ret_addr;
+static tcg_insn_unit *tb_ret_addr;
 
 /* A list of relevant facilities used by this translator.  Some of these
    are required for proper operation, and these are checked at startup.  */
@@ -350,23 +350,20 @@ static uint8_t *tb_ret_addr;
 
 static uint64_t facilities;
 
-static void patch_reloc(uint8_t *code_ptr, int type,
+static void patch_reloc(tcg_insn_unit *code_ptr, int type,
                         intptr_t value, intptr_t addend)
 {
-    intptr_t code_ptr_tl = (intptr_t)code_ptr;
-    intptr_t pcrel2;
-
-    /* ??? Not the usual definition of "addend".  */
-    pcrel2 = (value - (code_ptr_tl + addend)) >> 1;
+    intptr_t pcrel2 = (tcg_insn_unit *)value - (code_ptr - 1);
+    assert(addend == -2);
 
     switch (type) {
     case R_390_PC16DBL:
         assert(pcrel2 == (int16_t)pcrel2);
-        *(int16_t *)code_ptr = pcrel2;
+        tcg_patch16(code_ptr, pcrel2);
         break;
     case R_390_PC32DBL:
         assert(pcrel2 == (int32_t)pcrel2);
-        *(int32_t *)code_ptr = pcrel2;
+        tcg_patch32(code_ptr, pcrel2);
         break;
     default:
         tcg_abort();
@@ -672,7 +669,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type,
 
     /* Try for PC-relative address load.  */
     if ((sval & 1) == 0) {
-        intptr_t off = (sval - (intptr_t)s->code_ptr) >> 1;
+        ptrdiff_t off = tcg_pcrel_diff(s, (void *)sval) >> 1;
         if (off == (int32_t)off) {
             tcg_out_insn(s, RIL, LARL, ret, off);
             return;
@@ -789,10 +786,10 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
 /* load data from an absolute host address */
 static void tcg_out_ld_abs(TCGContext *s, TCGType type, TCGReg dest, void *abs)
 {
-    tcg_target_long addr = (tcg_target_long)abs;
+    intptr_t addr = (intptr_t)abs;
 
-    if (facilities & FACILITY_GEN_INST_EXT) {
-        tcg_target_long disp = (addr - (tcg_target_long)s->code_ptr) >> 1;
+    if ((facilities & FACILITY_GEN_INST_EXT) && !(addr & 1)) {
+        ptrdiff_t disp = tcg_pcrel_diff(s, abs) >> 1;
         if (disp == (int32_t)disp) {
             if (type == TCG_TYPE_I32) {
                 tcg_out_insn(s, RIL, LRL, dest, disp);
@@ -1154,15 +1151,15 @@ static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
     tcg_out_risbg(s, dest, src, msb, lsb, ofs, 0);
 }
 
-static void tgen_gotoi(TCGContext *s, int cc, tcg_target_long dest)
+static void tgen_gotoi(TCGContext *s, int cc, tcg_insn_unit *dest)
 {
-    tcg_target_long off = (dest - (tcg_target_long)s->code_ptr) >> 1;
-    if (off > -0x8000 && off < 0x7fff) {
+    ptrdiff_t off = dest - s->code_ptr;
+    if (off == (int16_t)off) {
         tcg_out_insn(s, RI, BRC, cc, off);
     } else if (off == (int32_t)off) {
         tcg_out_insn(s, RIL, BRCL, cc, off);
     } else {
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, dest);
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
         tcg_out_insn(s, RR, BCR, cc, TCG_TMP0);
     }
 }
@@ -1171,15 +1168,15 @@ static void tgen_branch(TCGContext *s, int cc, int labelno)
 {
     TCGLabel* l = &s->labels[labelno];
     if (l->has_value) {
-        tgen_gotoi(s, cc, l->u.value);
+        tgen_gotoi(s, cc, l->u.value_ptr);
     } else if (USE_LONG_BRANCHES) {
         tcg_out16(s, RIL_BRCL | (cc << 4));
         tcg_out_reloc(s, s->code_ptr, R_390_PC32DBL, labelno, -2);
-        s->code_ptr += 4;
+        s->code_ptr += 2;
     } else {
         tcg_out16(s, RI_BRC | (cc << 4));
         tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, labelno, -2);
-        s->code_ptr += 2;
+        s->code_ptr += 1;
     }
 }
 
@@ -1187,14 +1184,14 @@ static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc,
                                 TCGReg r1, TCGReg r2, int labelno)
 {
     TCGLabel* l = &s->labels[labelno];
-    tcg_target_long off;
+    intptr_t off;
 
     if (l->has_value) {
-        off = (l->u.value - (tcg_target_long)s->code_ptr) >> 1;
+        off = l->u.value_ptr - s->code_ptr;
     } else {
         /* We need to keep the offset unchanged for retranslation.  */
-        off = ((int16_t *)s->code_ptr)[1];
-        tcg_out_reloc(s, s->code_ptr + 2, R_390_PC16DBL, labelno, -2);
+        off = s->code_ptr[1];
+        tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, labelno, -2);
     }
 
     tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2);
@@ -1209,11 +1206,11 @@ static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc,
     tcg_target_long off;
 
     if (l->has_value) {
-        off = (l->u.value - (tcg_target_long)s->code_ptr) >> 1;
+        off = l->u.value_ptr - s->code_ptr;
     } else {
         /* We need to keep the offset unchanged for retranslation.  */
-        off = ((int16_t *)s->code_ptr)[1];
-        tcg_out_reloc(s, s->code_ptr + 2, R_390_PC16DBL, labelno, -2);
+        off = s->code_ptr[1];
+        tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, labelno, -2);
     }
 
     tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc);
@@ -1272,13 +1269,13 @@ static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
     tgen_branch(s, cc, labelno);
 }
 
-static void tgen_calli(TCGContext *s, tcg_target_long dest)
+static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
 {
-    tcg_target_long off = (dest - (tcg_target_long)s->code_ptr) >> 1;
+    ptrdiff_t off = dest - s->code_ptr;
     if (off == (int32_t)off) {
         tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off);
     } else {
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, dest);
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
         tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_TMP0);
     }
 }
@@ -1395,14 +1392,14 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int opc, TCGReg data,
 #if defined(CONFIG_SOFTMMU)
 static TCGReg tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
                                     TCGReg addr_reg, int mem_index, int opc,
-                                    uint16_t **label2_ptr_p, int is_store)
+                                    tcg_insn_unit **label2_ptr_p, int is_store)
 {
     const TCGReg arg0 = tcg_target_call_iarg_regs[0];
     const TCGReg arg1 = tcg_target_call_iarg_regs[1];
     const TCGReg arg2 = tcg_target_call_iarg_regs[2];
     const TCGReg arg3 = tcg_target_call_iarg_regs[3];
     int s_bits = opc & 3;
-    uint16_t *label1_ptr;
+    tcg_insn_unit *label1_ptr;
     tcg_target_long ofs;
 
     if (TARGET_LONG_BITS == 32) {
@@ -1436,7 +1433,7 @@ static TCGReg tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
         tcg_out_mov(s, TCG_TYPE_I64, arg1, addr_reg);
     }
 
-    label1_ptr = (uint16_t*)s->code_ptr;
+    label1_ptr = s->code_ptr;
 
     /* je label1 (offset will be patched in later) */
     tcg_out_insn(s, RI, BRC, S390_CC_EQ, 0);
@@ -1463,11 +1460,11 @@ static TCGReg tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
         }
         tcg_out_movi(s, TCG_TYPE_I32, arg3, mem_index);
         tcg_out_mov(s, TCG_TYPE_I64, arg0, TCG_AREG0);
-        tgen_calli(s, (tcg_target_ulong)qemu_st_helpers[s_bits]);
+        tcg_out_call(s, qemu_st_helpers[s_bits]);
     } else {
         tcg_out_movi(s, TCG_TYPE_I32, arg2, mem_index);
         tcg_out_mov(s, TCG_TYPE_I64, arg0, TCG_AREG0);
-        tgen_calli(s, (tcg_target_ulong)qemu_ld_helpers[s_bits]);
+        tcg_out_call(s, qemu_ld_helpers[s_bits]);
 
         /* sign extension */
         switch (opc) {
@@ -1488,13 +1485,12 @@ static TCGReg tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
     }
 
     /* jump to label2 (end) */
-    *label2_ptr_p = (uint16_t*)s->code_ptr;
+    *label2_ptr_p = s->code_ptr;
 
     tcg_out_insn(s, RI, BRC, S390_CC_ALWAYS, 0);
 
     /* this is label1, patch branch */
-    *(label1_ptr + 1) = ((unsigned long)s->code_ptr -
-                         (unsigned long)label1_ptr) >> 1;
+    label1_ptr[1] = s->code_ptr - label1_ptr;
 
     ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
     assert(ofs < 0x80000);
@@ -1504,11 +1500,10 @@ static TCGReg tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
     return arg1;
 }
 
-static void tcg_finish_qemu_ldst(TCGContext* s, uint16_t *label2_ptr)
+static void tcg_finish_qemu_ldst(TCGContext* s, tcg_insn_unit *label2_ptr)
 {
     /* patch branch */
-    *(label2_ptr + 1) = ((unsigned long)s->code_ptr -
-                         (unsigned long)label2_ptr) >> 1;
+    label2_ptr[1] = s->code_ptr - label2_ptr;
 }
 #else
 static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg,
@@ -1535,7 +1530,7 @@ static void tcg_out_qemu_ld(TCGContext* s, const TCGArg* args, int opc)
     TCGReg addr_reg, data_reg;
 #if defined(CONFIG_SOFTMMU)
     int mem_index;
-    uint16_t *label2_ptr;
+    tcg_insn_unit *label2_ptr;
 #else
     TCGReg index_reg;
     tcg_target_long disp;
@@ -1564,7 +1559,7 @@ static void tcg_out_qemu_st(TCGContext* s, const TCGArg* args, int opc)
     TCGReg addr_reg, data_reg;
 #if defined(CONFIG_SOFTMMU)
     int mem_index;
-    uint16_t *label2_ptr;
+    tcg_insn_unit *label2_ptr;
 #else
     TCGReg index_reg;
     tcg_target_long disp;
@@ -1602,7 +1597,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_exit_tb:
         /* return value */
         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, args[0]);
-        tgen_gotoi(s, S390_CC_ALWAYS, (unsigned long)tb_ret_addr);
+        tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr);
         break;
 
     case INDEX_op_goto_tb:
@@ -1614,22 +1609,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
             /* and go there */
             tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_TMP0);
         }
-        s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
-        break;
-
-    case INDEX_op_call:
-        if (const_args[0]) {
-            tgen_calli(s, args[0]);
-        } else {
-            tcg_out_insn(s, RR, BASR, TCG_REG_R14, args[0]);
-        }
-        break;
-
-    case INDEX_op_mov_i32:
-        tcg_out_mov(s, TCG_TYPE_I32, args[0], args[1]);
-        break;
-    case INDEX_op_movi_i32:
-        tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
+        s->tb_next_offset[args[0]] = tcg_current_code_size(s);
         break;
 
     OP_32_64(ld8u):
@@ -1864,13 +1844,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_qemu_st(s, args, LD_UINT64);
         break;
 
-    case INDEX_op_mov_i64:
-        tcg_out_mov(s, TCG_TYPE_I64, args[0], args[1]);
-        break;
-    case INDEX_op_movi_i64:
-        tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
-        break;
-
     case INDEX_op_ld16s_i64:
         tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
         break;
@@ -2075,8 +2048,12 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tgen_deposit(s, args[0], args[2], args[3], args[4]);
         break;
 
+    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
+    case INDEX_op_mov_i64:
+    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
+    case INDEX_op_movi_i64:
+    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
     default:
-        fprintf(stderr,"unimplemented opc 0x%x\n",opc);
         tcg_abort();
     }
 }
@@ -2084,12 +2061,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
 static const TCGTargetOpDef s390_op_defs[] = {
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
-    { INDEX_op_call, { "ri" } },
     { INDEX_op_br, { } },
 
-    { INDEX_op_mov_i32, { "r", "r" } },
-    { INDEX_op_movi_i32, { "r" } },
-
     { INDEX_op_ld8u_i32, { "r", "r" } },
     { INDEX_op_ld8s_i32, { "r", "r" } },
     { INDEX_op_ld16u_i32, { "r", "r" } },
@@ -2147,9 +2120,6 @@ static const TCGTargetOpDef s390_op_defs[] = {
     { INDEX_op_qemu_st32, { "L", "L" } },
     { INDEX_op_qemu_st64, { "L", "L" } },
 
-    { INDEX_op_mov_i64, { "r", "r" } },
-    { INDEX_op_movi_i64, { "r" } },
-
     { INDEX_op_ld8u_i64, { "r", "r" } },
     { INDEX_op_ld8s_i64, { "r", "r" } },
     { INDEX_op_ld16u_i64, { "r", "r" } },
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index 755c00237e..5bf733eb3d 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -24,6 +24,8 @@
 #ifndef TCG_TARGET_S390 
 #define TCG_TARGET_S390 1
 
+#define TCG_TARGET_INSN_UNIT_SIZE 2
+
 typedef enum TCGReg {
     TCG_REG_R0 = 0,
     TCG_REG_R1,
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 5468ff5c16..17ff5773ad 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -279,37 +279,32 @@ static inline int check_fit_i32(int32_t val, unsigned int bits)
 # define check_fit_ptr  check_fit_i32
 #endif
 
-static void patch_reloc(uint8_t *code_ptr, int type,
+static void patch_reloc(tcg_insn_unit *code_ptr, int type,
                         intptr_t value, intptr_t addend)
 {
     uint32_t insn;
-    value += addend;
+
+    assert(addend == 0);
+    value = tcg_ptr_byte_diff((tcg_insn_unit *)value, code_ptr);
+
     switch (type) {
-    case R_SPARC_32:
-        if (value != (uint32_t)value) {
-            tcg_abort();
-        }
-        *(uint32_t *)code_ptr = value;
-        break;
     case R_SPARC_WDISP16:
-        value -= (intptr_t)code_ptr;
         if (!check_fit_ptr(value >> 2, 16)) {
             tcg_abort();
         }
-        insn = *(uint32_t *)code_ptr;
+        insn = *code_ptr;
         insn &= ~INSN_OFF16(-1);
         insn |= INSN_OFF16(value);
-        *(uint32_t *)code_ptr = insn;
+        *code_ptr = insn;
         break;
     case R_SPARC_WDISP19:
-        value -= (intptr_t)code_ptr;
         if (!check_fit_ptr(value >> 2, 19)) {
             tcg_abort();
         }
-        insn = *(uint32_t *)code_ptr;
+        insn = *code_ptr;
         insn &= ~INSN_OFF19(-1);
         insn |= INSN_OFF19(value);
-        *(uint32_t *)code_ptr = insn;
+        *code_ptr = insn;
         break;
     default:
         tcg_abort();
@@ -573,10 +568,10 @@ static void tcg_out_bpcc(TCGContext *s, int scond, int flags, int label)
     int off19;
 
     if (l->has_value) {
-        off19 = INSN_OFF19(l->u.value - (unsigned long)s->code_ptr);
+        off19 = INSN_OFF19(tcg_pcrel_diff(s, l->u.value_ptr));
     } else {
         /* Make sure to preserve destinations during retranslation.  */
-        off19 = *(uint32_t *)s->code_ptr & INSN_OFF19(-1);
+        off19 = *s->code_ptr & INSN_OFF19(-1);
         tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, label, 0);
     }
     tcg_out_bpcc0(s, scond, flags, off19);
@@ -620,10 +615,10 @@ static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGReg arg1,
         int off16;
 
         if (l->has_value) {
-            off16 = INSN_OFF16(l->u.value - (unsigned long)s->code_ptr);
+            off16 = INSN_OFF16(tcg_pcrel_diff(s, l->u.value_ptr));
         } else {
             /* Make sure to preserve destinations during retranslation.  */
-            off16 = *(uint32_t *)s->code_ptr & INSN_OFF16(-1);
+            off16 = *s->code_ptr & INSN_OFF16(-1);
             tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP16, label, 0);
         }
         tcg_out32(s, INSN_OP(0) | INSN_OP2(3) | BPR_PT | INSN_RS1(arg1)
@@ -740,62 +735,66 @@ static void tcg_out_addsub2(TCGContext *s, TCGReg rl, TCGReg rh,
     tcg_out_mov(s, TCG_TYPE_I32, rl, tmp);
 }
 
-static void tcg_out_calli(TCGContext *s, uintptr_t dest)
+static void tcg_out_call_nodelay(TCGContext *s, tcg_insn_unit *dest)
 {
-    intptr_t disp = dest - (uintptr_t)s->code_ptr;
+    ptrdiff_t disp = tcg_pcrel_diff(s, dest);
 
     if (disp == (int32_t)disp) {
         tcg_out32(s, CALL | (uint32_t)disp >> 2);
     } else {
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, dest & ~0xfff);
-        tcg_out_arithi(s, TCG_REG_O7, TCG_REG_T1, dest & 0xfff, JMPL);
+        uintptr_t desti = (uintptr_t)dest;
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, desti & ~0xfff);
+        tcg_out_arithi(s, TCG_REG_O7, TCG_REG_T1, desti & 0xfff, JMPL);
     }
 }
 
+static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
+{
+    tcg_out_call_nodelay(s, dest);
+    tcg_out_nop(s);
+}
+
 #ifdef CONFIG_SOFTMMU
-static uintptr_t qemu_ld_trampoline[16];
-static uintptr_t qemu_st_trampoline[16];
+static tcg_insn_unit *qemu_ld_trampoline[16];
+static tcg_insn_unit *qemu_st_trampoline[16];
 
 static void build_trampolines(TCGContext *s)
 {
-    static uintptr_t const qemu_ld_helpers[16] = {
-        [MO_UB]   = (uintptr_t)helper_ret_ldub_mmu,
-        [MO_SB]   = (uintptr_t)helper_ret_ldsb_mmu,
-        [MO_LEUW] = (uintptr_t)helper_le_lduw_mmu,
-        [MO_LESW] = (uintptr_t)helper_le_ldsw_mmu,
-        [MO_LEUL] = (uintptr_t)helper_le_ldul_mmu,
-        [MO_LEQ]  = (uintptr_t)helper_le_ldq_mmu,
-        [MO_BEUW] = (uintptr_t)helper_be_lduw_mmu,
-        [MO_BESW] = (uintptr_t)helper_be_ldsw_mmu,
-        [MO_BEUL] = (uintptr_t)helper_be_ldul_mmu,
-        [MO_BEQ]  = (uintptr_t)helper_be_ldq_mmu,
+    static void * const qemu_ld_helpers[16] = {
+        [MO_UB]   = helper_ret_ldub_mmu,
+        [MO_SB]   = helper_ret_ldsb_mmu,
+        [MO_LEUW] = helper_le_lduw_mmu,
+        [MO_LESW] = helper_le_ldsw_mmu,
+        [MO_LEUL] = helper_le_ldul_mmu,
+        [MO_LEQ]  = helper_le_ldq_mmu,
+        [MO_BEUW] = helper_be_lduw_mmu,
+        [MO_BESW] = helper_be_ldsw_mmu,
+        [MO_BEUL] = helper_be_ldul_mmu,
+        [MO_BEQ]  = helper_be_ldq_mmu,
     };
-    static uintptr_t const qemu_st_helpers[16] = {
-        [MO_UB]   = (uintptr_t)helper_ret_stb_mmu,
-        [MO_LEUW] = (uintptr_t)helper_le_stw_mmu,
-        [MO_LEUL] = (uintptr_t)helper_le_stl_mmu,
-        [MO_LEQ]  = (uintptr_t)helper_le_stq_mmu,
-        [MO_BEUW] = (uintptr_t)helper_be_stw_mmu,
-        [MO_BEUL] = (uintptr_t)helper_be_stl_mmu,
-        [MO_BEQ]  = (uintptr_t)helper_be_stq_mmu,
+    static void * const qemu_st_helpers[16] = {
+        [MO_UB]   = helper_ret_stb_mmu,
+        [MO_LEUW] = helper_le_stw_mmu,
+        [MO_LEUL] = helper_le_stl_mmu,
+        [MO_LEQ]  = helper_le_stq_mmu,
+        [MO_BEUW] = helper_be_stw_mmu,
+        [MO_BEUL] = helper_be_stl_mmu,
+        [MO_BEQ]  = helper_be_stq_mmu,
     };
 
     int i;
     TCGReg ra;
-    uintptr_t tramp;
 
     for (i = 0; i < 16; ++i) {
-        if (qemu_ld_helpers[i] == 0) {
+        if (qemu_ld_helpers[i] == NULL) {
             continue;
         }
 
         /* May as well align the trampoline.  */
-        tramp = (uintptr_t)s->code_ptr;
-        while (tramp & 15) {
+        while ((uintptr_t)s->code_ptr & 15) {
             tcg_out_nop(s);
-            tramp += 4;
         }
-        qemu_ld_trampoline[i] = tramp;
+        qemu_ld_trampoline[i] = s->code_ptr;
 
         if (SPARC64 || TARGET_LONG_BITS == 32) {
             ra = TCG_REG_O3;
@@ -810,22 +809,20 @@ static void build_trampolines(TCGContext *s)
         /* Set the env operand.  */
         tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0);
         /* Tail call.  */
-        tcg_out_calli(s, qemu_ld_helpers[i]);
+        tcg_out_call_nodelay(s, qemu_ld_helpers[i]);
         tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra);
     }
 
     for (i = 0; i < 16; ++i) {
-        if (qemu_st_helpers[i] == 0) {
+        if (qemu_st_helpers[i] == NULL) {
             continue;
         }
 
         /* May as well align the trampoline.  */
-        tramp = (uintptr_t)s->code_ptr;
-        while (tramp & 15) {
+        while ((uintptr_t)s->code_ptr & 15) {
             tcg_out_nop(s);
-            tramp += 4;
         }
-        qemu_st_trampoline[i] = tramp;
+        qemu_st_trampoline[i] = s->code_ptr;
 
         if (SPARC64) {
             ra = TCG_REG_O4;
@@ -859,7 +856,7 @@ static void build_trampolines(TCGContext *s)
         /* Set the env operand.  */
         tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O0, TCG_AREG0);
         /* Tail call.  */
-        tcg_out_calli(s, qemu_st_helpers[i]);
+        tcg_out_call_nodelay(s, qemu_st_helpers[i]);
         tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O7, ra);
     }
 }
@@ -1005,8 +1002,8 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
 #ifdef CONFIG_SOFTMMU
     TCGMemOp s_bits = memop & MO_SIZE;
     TCGReg addrz, param;
-    uintptr_t func;
-    uint32_t *label_ptr;
+    tcg_insn_unit *func;
+    tcg_insn_unit *label_ptr;
 
     addrz = tcg_out_tlb_load(s, addr, memi, s_bits,
                              offsetof(CPUTLBEntry, addr_read));
@@ -1016,7 +1013,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
        over the TLB Miss case.  */
 
     /* beq,a,pt %[xi]cc, label0 */
-    label_ptr = (uint32_t *)s->code_ptr;
+    label_ptr = s->code_ptr;
     tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
                   | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
     /* delay slot */
@@ -1038,8 +1035,8 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
     } else {
         func = qemu_ld_trampoline[memop];
     }
-    assert(func != 0);
-    tcg_out_calli(s, func);
+    assert(func != NULL);
+    tcg_out_call_nodelay(s, func);
     /* delay slot */
     tcg_out_movi(s, TCG_TYPE_I32, param, memi);
 
@@ -1067,7 +1064,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
         }
     }
 
-    *label_ptr |= INSN_OFF19((uintptr_t)s->code_ptr - (uintptr_t)label_ptr);
+    *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
 #else
     if (SPARC64 && TARGET_LONG_BITS == 32) {
         tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
@@ -1085,8 +1082,8 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
 #ifdef CONFIG_SOFTMMU
     TCGMemOp s_bits = memop & MO_SIZE;
     TCGReg addrz, param;
-    uintptr_t func;
-    uint32_t *label_ptr;
+    tcg_insn_unit *func;
+    tcg_insn_unit *label_ptr;
 
     addrz = tcg_out_tlb_load(s, addr, memi, s_bits,
                              offsetof(CPUTLBEntry, addr_write));
@@ -1094,7 +1091,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
     /* The fast path is exactly one insn.  Thus we can perform the entire
        TLB Hit in the (annulled) delay slot of the branch over TLB Miss.  */
     /* beq,a,pt %[xi]cc, label0 */
-    label_ptr = (uint32_t *)s->code_ptr;
+    label_ptr = s->code_ptr;
     tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
                   | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
     /* delay slot */
@@ -1115,12 +1112,12 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
     tcg_out_mov(s, TCG_TYPE_REG, param++, data);
 
     func = qemu_st_trampoline[memop];
-    assert(func != 0);
-    tcg_out_calli(s, func);
+    assert(func != NULL);
+    tcg_out_call_nodelay(s, func);
     /* delay slot */
     tcg_out_movi(s, TCG_TYPE_REG, param, memi);
 
-    *label_ptr |= INSN_OFF19((uintptr_t)s->code_ptr - (uintptr_t)label_ptr);
+    *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
 #else
     if (SPARC64 && TARGET_LONG_BITS == 32) {
         tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL);
@@ -1159,26 +1156,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_goto_tb:
         if (s->tb_jmp_offset) {
             /* direct jump method */
-            uint32_t old_insn = *(uint32_t *)s->code_ptr;
-            s->tb_jmp_offset[a0] = s->code_ptr - s->code_buf;
+            s->tb_jmp_offset[a0] = tcg_current_code_size(s);
             /* Make sure to preserve links during retranslation.  */
-            tcg_out32(s, CALL | (old_insn & ~INSN_OP(-1)));
+            tcg_out32(s, CALL | (*s->code_ptr & ~INSN_OP(-1)));
         } else {
             /* indirect jump method */
             tcg_out_ld_ptr(s, TCG_REG_T1, (uintptr_t)(s->tb_next + a0));
             tcg_out_arithi(s, TCG_REG_G0, TCG_REG_T1, 0, JMPL);
         }
         tcg_out_nop(s);
-        s->tb_next_offset[a0] = s->code_ptr - s->code_buf;
-        break;
-    case INDEX_op_call:
-        if (const_args[0]) {
-            tcg_out_calli(s, a0);
-        } else {
-            tcg_out_arithi(s, TCG_REG_O7, a0, 0, JMPL);
-        }
-        /* delay slot */
-        tcg_out_nop(s);
+        s->tb_next_offset[a0] = tcg_current_code_size(s);
         break;
     case INDEX_op_br:
         tcg_out_bpcc(s, COND_A, BPCC_PT, a0);
@@ -1373,13 +1360,12 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 	tcg_out_arithc(s, a0, TCG_REG_G0, a1, const_args[1], c);
 	break;
 
+    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
     case INDEX_op_mov_i64:
-    case INDEX_op_mov_i32:
+    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
     case INDEX_op_movi_i64:
-    case INDEX_op_movi_i32:
-        /* Always implemented with tcg_out_mov/i, never with tcg_out_op.  */
+    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
     default:
-        /* Opcode not implemented.  */
         tcg_abort();
     }
 }
@@ -1387,11 +1373,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 static const TCGTargetOpDef sparc_op_defs[] = {
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
-    { INDEX_op_call, { "ri" } },
     { INDEX_op_br, { } },
 
-    { INDEX_op_mov_i32, { "r", "r" } },
-    { INDEX_op_movi_i32, { "r" } },
     { INDEX_op_ld8u_i32, { "r", "r" } },
     { INDEX_op_ld8s_i32, { "r", "r" } },
     { INDEX_op_ld16u_i32, { "r", "r" } },
@@ -1428,8 +1411,6 @@ static const TCGTargetOpDef sparc_op_defs[] = {
     { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rJ" } },
     { INDEX_op_muls2_i32, { "r", "r", "rZ", "rJ" } },
 
-    { INDEX_op_mov_i64, { "R", "R" } },
-    { INDEX_op_movi_i64, { "R" } },
     { INDEX_op_ld8u_i64, { "R", "r" } },
     { INDEX_op_ld8s_i64, { "R", "r" } },
     { INDEX_op_ld16u_i64, { "R", "r" } },
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index 3a903dbfd0..473bfc71fd 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -26,6 +26,7 @@
 
 #define TCG_TARGET_REG_BITS 64
 
+#define TCG_TARGET_INSN_UNIT_SIZE 4
 #define TCG_TARGET_NB_REGS 32
 
 typedef enum {
diff --git a/tcg/tcg-be-ldst.h b/tcg/tcg-be-ldst.h
index ad94c0ca51..49b3de61ea 100644
--- a/tcg/tcg-be-ldst.h
+++ b/tcg/tcg-be-ldst.h
@@ -31,8 +31,8 @@ typedef struct TCGLabelQemuLdst {
     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
     int mem_index;          /* soft MMU memory index */
-    uint8_t *raddr;         /* gen code addr of the next IR of qemu_ld/st IR */
-    uint8_t *label_ptr[2];  /* label pointers to be updated */
+    tcg_insn_unit *raddr;   /* gen code addr of the next IR of qemu_ld/st IR */
+    tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
 } TCGLabelQemuLdst;
 
 typedef struct TCGBackendData {
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 6dbbb38838..bdd0139482 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -390,11 +390,7 @@ static inline int tcg_gen_sizemask(int n, int is_64bit, int is_signed)
 static inline void tcg_gen_helperN(void *func, int flags, int sizemask,
                                    TCGArg ret, int nargs, TCGArg *args)
 {
-    TCGv_ptr fn;
-    fn = tcg_const_ptr(func);
-    tcg_gen_callN(&tcg_ctx, fn, flags, sizemask, ret,
-                  nargs, args);
-    tcg_temp_free_ptr(fn);
+    tcg_gen_callN(&tcg_ctx, func, flags, sizemask, ret, nargs, args);
 }
 
 /* Note: Both tcg_gen_helper32() and tcg_gen_helper64() are currently
@@ -405,29 +401,23 @@ static inline void tcg_gen_helperN(void *func, int flags, int sizemask,
 static inline void tcg_gen_helper32(void *func, int sizemask, TCGv_i32 ret,
                                     TCGv_i32 a, TCGv_i32 b)
 {
-    TCGv_ptr fn;
     TCGArg args[2];
-    fn = tcg_const_ptr(func);
     args[0] = GET_TCGV_I32(a);
     args[1] = GET_TCGV_I32(b);
-    tcg_gen_callN(&tcg_ctx, fn,
+    tcg_gen_callN(&tcg_ctx, func,
                   TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_SIDE_EFFECTS,
                   sizemask, GET_TCGV_I32(ret), 2, args);
-    tcg_temp_free_ptr(fn);
 }
 
 static inline void tcg_gen_helper64(void *func, int sizemask, TCGv_i64 ret,
                                     TCGv_i64 a, TCGv_i64 b)
 {
-    TCGv_ptr fn;
     TCGArg args[2];
-    fn = tcg_const_ptr(func);
     args[0] = GET_TCGV_I64(a);
     args[1] = GET_TCGV_I64(b);
-    tcg_gen_callN(&tcg_ctx, fn,
+    tcg_gen_callN(&tcg_ctx, func,
                   TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_SIDE_EFFECTS,
                   sizemask, GET_TCGV_I64(ret), 2, args);
-    tcg_temp_free_ptr(fn);
 }
 
 /* 32 bit ops */
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index adb6ce1bb9..71ba64ac16 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -40,7 +40,7 @@ DEF(discard, 1, 0, 0, TCG_OPF_NOT_PRESENT)
 DEF(set_label, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
 
 /* variable number of parameters */
-DEF(call, 0, 1, 2, TCG_OPF_CALL_CLOBBER)
+DEF(call, 0, 0, 3, TCG_OPF_CALL_CLOBBER | TCG_OPF_NOT_PRESENT)
 
 DEF(br, 0, 0, 1, TCG_OPF_BB_END)
 
@@ -51,8 +51,8 @@ DEF(br, 0, 0, 1, TCG_OPF_BB_END)
 # define IMPL64  TCG_OPF_64BIT
 #endif
 
-DEF(mov_i32, 1, 1, 0, 0)
-DEF(movi_i32, 1, 0, 1, 0)
+DEF(mov_i32, 1, 1, 0, TCG_OPF_NOT_PRESENT)
+DEF(movi_i32, 1, 0, 1, TCG_OPF_NOT_PRESENT)
 DEF(setcond_i32, 1, 2, 1, 0)
 DEF(movcond_i32, 1, 4, 1, IMPL(TCG_TARGET_HAS_movcond_i32))
 /* load/store */
@@ -110,8 +110,8 @@ DEF(eqv_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_eqv_i32))
 DEF(nand_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nand_i32))
 DEF(nor_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nor_i32))
 
-DEF(mov_i64, 1, 1, 0, IMPL64)
-DEF(movi_i64, 1, 0, 1, IMPL64)
+DEF(mov_i64, 1, 1, 0, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
+DEF(movi_i64, 1, 0, 1, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
 DEF(setcond_i64, 1, 2, 1, IMPL64)
 DEF(movcond_i64, 1, 4, 1, IMPL64 | IMPL(TCG_TARGET_HAS_movcond_i64))
 /* load/store */
diff --git a/tcg/tcg.c b/tcg/tcg.c
index e71f7a0975..0670affe2f 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -65,7 +65,7 @@
 /* Forward declarations for functions declared in tcg-target.c and used here. */
 static void tcg_target_init(TCGContext *s);
 static void tcg_target_qemu_prologue(TCGContext *s);
-static void patch_reloc(uint8_t *code_ptr, int type, 
+static void patch_reloc(tcg_insn_unit *code_ptr, int type,
                         intptr_t value, intptr_t addend);
 
 /* The CIE and FDE header definitions will be common to all hosts.  */
@@ -101,6 +101,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
                        const int *const_args);
 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
                        intptr_t arg2);
+static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
 static int tcg_target_const_match(tcg_target_long val, TCGType type,
                                   const TCGArgConstraint *arg_ct);
 static void tcg_out_tb_init(TCGContext *s);
@@ -117,35 +118,87 @@ const size_t tcg_op_defs_max = ARRAY_SIZE(tcg_op_defs);
 static TCGRegSet tcg_target_available_regs[2];
 static TCGRegSet tcg_target_call_clobber_regs;
 
+#if TCG_TARGET_INSN_UNIT_SIZE == 1
 static inline void tcg_out8(TCGContext *s, uint8_t v)
 {
     *s->code_ptr++ = v;
 }
 
+static inline void tcg_patch8(tcg_insn_unit *p, uint8_t v)
+{
+    *p = v;
+}
+#endif
+
+#if TCG_TARGET_INSN_UNIT_SIZE <= 2
 static inline void tcg_out16(TCGContext *s, uint16_t v)
 {
-    uint8_t *p = s->code_ptr;
-    *(uint16_t *)p = v;
-    s->code_ptr = p + 2;
+    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
+        *s->code_ptr++ = v;
+    } else {
+        tcg_insn_unit *p = s->code_ptr;
+        memcpy(p, &v, sizeof(v));
+        s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
+    }
+}
+
+static inline void tcg_patch16(tcg_insn_unit *p, uint16_t v)
+{
+    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
+        *p = v;
+    } else {
+        memcpy(p, &v, sizeof(v));
+    }
 }
+#endif
 
+#if TCG_TARGET_INSN_UNIT_SIZE <= 4
 static inline void tcg_out32(TCGContext *s, uint32_t v)
 {
-    uint8_t *p = s->code_ptr;
-    *(uint32_t *)p = v;
-    s->code_ptr = p + 4;
+    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
+        *s->code_ptr++ = v;
+    } else {
+        tcg_insn_unit *p = s->code_ptr;
+        memcpy(p, &v, sizeof(v));
+        s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
+    }
+}
+
+static inline void tcg_patch32(tcg_insn_unit *p, uint32_t v)
+{
+    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
+        *p = v;
+    } else {
+        memcpy(p, &v, sizeof(v));
+    }
 }
+#endif
 
+#if TCG_TARGET_INSN_UNIT_SIZE <= 8
 static inline void tcg_out64(TCGContext *s, uint64_t v)
 {
-    uint8_t *p = s->code_ptr;
-    *(uint64_t *)p = v;
-    s->code_ptr = p + 8;
+    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
+        *s->code_ptr++ = v;
+    } else {
+        tcg_insn_unit *p = s->code_ptr;
+        memcpy(p, &v, sizeof(v));
+        s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
+    }
+}
+
+static inline void tcg_patch64(tcg_insn_unit *p, uint64_t v)
+{
+    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
+        *p = v;
+    } else {
+        memcpy(p, &v, sizeof(v));
+    }
 }
+#endif
 
 /* label relocation processing */
 
-static void tcg_out_reloc(TCGContext *s, uint8_t *code_ptr, int type,
+static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
                           int label_index, intptr_t addend)
 {
     TCGLabel *l;
@@ -168,23 +221,20 @@ static void tcg_out_reloc(TCGContext *s, uint8_t *code_ptr, int type,
     }
 }
 
-static void tcg_out_label(TCGContext *s, int label_index, void *ptr)
+static void tcg_out_label(TCGContext *s, int label_index, tcg_insn_unit *ptr)
 {
-    TCGLabel *l;
-    TCGRelocation *r;
+    TCGLabel *l = &s->labels[label_index];
     intptr_t value = (intptr_t)ptr;
+    TCGRelocation *r;
 
-    l = &s->labels[label_index];
-    if (l->has_value) {
-        tcg_abort();
-    }
-    r = l->u.first_reloc;
-    while (r != NULL) {
+    assert(!l->has_value);
+
+    for (r = l->u.first_reloc; r != NULL; r = r->next) {
         patch_reloc(r->ptr, r->type, value, r->addend);
-        r = r->next;
     }
+
     l->has_value = 1;
-    l->u.value = value;
+    l->u.value_ptr = ptr;
 }
 
 int gen_new_label(void)
@@ -339,7 +389,7 @@ void tcg_prologue_init(TCGContext *s)
 
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
-        size_t size = s->code_ptr - s->code_buf;
+        size_t size = tcg_current_code_size(s);
         qemu_log("PROLOGUE: [size=%zu]\n", size);
         log_disas(s->code_buf, size);
         qemu_log("\n");
@@ -656,7 +706,7 @@ int tcg_check_temp_count(void)
 /* Note: we convert the 64 bit args to 32 bit and do some alignment
    and endian swap. Maybe it would be better to do the alignment
    and endian swap in tcg_reg_alloc_call(). */
-void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags,
+void tcg_gen_callN(TCGContext *s, void *func, unsigned int flags,
                    int sizemask, TCGArg ret, int nargs, TCGArg *args)
 {
     int i;
@@ -783,11 +833,10 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags,
         *s->gen_opparam_ptr++ = args[i];
         real_args++;
     }
-    *s->gen_opparam_ptr++ = GET_TCGV_PTR(func);
-
+    *s->gen_opparam_ptr++ = (uintptr_t)func;
     *s->gen_opparam_ptr++ = flags;
 
-    *nparam = (nb_rets << 16) | (real_args + 1);
+    *nparam = (nb_rets << 16) | real_args;
 
     /* total parameters, needed to go backward in the instruction stream */
     *s->gen_opparam_ptr++ = 1 + nb_rets + real_args + 3;
@@ -1194,49 +1243,21 @@ void tcg_dump_ops(TCGContext *s)
             nb_iargs = arg & 0xffff;
             nb_cargs = def->nb_cargs;
 
-            qemu_log(" %s ", def->name);
-
-            /* function name */
-            qemu_log("%s",
-                     tcg_get_arg_str_idx(s, buf, sizeof(buf),
-                                         args[nb_oargs + nb_iargs - 1]));
-            /* flags */
-            qemu_log(",$0x%" TCG_PRIlx, args[nb_oargs + nb_iargs]);
-            /* nb out args */
-            qemu_log(",$%d", nb_oargs);
-            for(i = 0; i < nb_oargs; i++) {
-                qemu_log(",");
-                qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
+            /* function name, flags, out args */
+            qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
+                     tcg_find_helper(s, args[nb_oargs + nb_iargs]),
+                     args[nb_oargs + nb_iargs + 1], nb_oargs);
+            for (i = 0; i < nb_oargs; i++) {
+                qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
                                                    args[i]));
             }
-            for(i = 0; i < (nb_iargs - 1); i++) {
-                qemu_log(",");
-                if (args[nb_oargs + i] == TCG_CALL_DUMMY_ARG) {
-                    qemu_log("<dummy>");
-                } else {
-                    qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
-                                                       args[nb_oargs + i]));
-                }
-            }
-        } else if (c == INDEX_op_movi_i32 || c == INDEX_op_movi_i64) {
-            tcg_target_ulong val;
-            const char *name;
-
-            nb_oargs = def->nb_oargs;
-            nb_iargs = def->nb_iargs;
-            nb_cargs = def->nb_cargs;
-            qemu_log(" %s %s,$", def->name,
-                     tcg_get_arg_str_idx(s, buf, sizeof(buf), args[0]));
-            val = args[1];
-            name = tcg_find_helper(s, val);
-            if (name) {
-                qemu_log("%s", name);
-            } else {
-                if (c == INDEX_op_movi_i32) {
-                    qemu_log("0x%x", (uint32_t)val);
-                } else {
-                    qemu_log("0x%" PRIx64 , (uint64_t)val);
+            for (i = 0; i < nb_iargs; i++) {
+                TCGArg arg = args[nb_oargs + i];
+                const char *t = "<dummy>";
+                if (arg != TCG_CALL_DUMMY_ARG) {
+                    t = tcg_get_arg_str_idx(s, buf, sizeof(buf), arg);
                 }
+                qemu_log(",%s", t);
             }
         } else {
             qemu_log(" %s ", def->name);
@@ -1499,9 +1520,9 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
    temporaries are removed. */
 static void tcg_liveness_analysis(TCGContext *s)
 {
-    int i, op_index, nb_args, nb_iargs, nb_oargs, arg, nb_ops;
+    int i, op_index, nb_args, nb_iargs, nb_oargs, nb_ops;
     TCGOpcode op, op_new, op_new2;
-    TCGArg *args;
+    TCGArg *args, arg;
     const TCGOpDef *def;
     uint8_t *dead_temps, *mem_temps;
     uint16_t dead_args;
@@ -1531,15 +1552,15 @@ static void tcg_liveness_analysis(TCGContext *s)
 
                 nb_args = args[-1];
                 args -= nb_args;
-                nb_iargs = args[0] & 0xffff;
-                nb_oargs = args[0] >> 16;
-                args++;
-                call_flags = args[nb_oargs + nb_iargs];
+                arg = *args++;
+                nb_iargs = arg & 0xffff;
+                nb_oargs = arg >> 16;
+                call_flags = args[nb_oargs + nb_iargs + 1];
 
                 /* pure functions can be removed if their result is not
                    used */
                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
-                    for(i = 0; i < nb_oargs; i++) {
+                    for (i = 0; i < nb_oargs; i++) {
                         arg = args[i];
                         if (!dead_temps[arg] || mem_temps[arg]) {
                             goto do_not_remove_call;
@@ -1553,7 +1574,7 @@ static void tcg_liveness_analysis(TCGContext *s)
                     /* output args are dead */
                     dead_args = 0;
                     sync_args = 0;
-                    for(i = 0; i < nb_oargs; i++) {
+                    for (i = 0; i < nb_oargs; i++) {
                         arg = args[i];
                         if (dead_temps[arg]) {
                             dead_args |= (1 << i);
@@ -1576,7 +1597,7 @@ static void tcg_liveness_analysis(TCGContext *s)
                     }
 
                     /* input args are live */
-                    for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
+                    for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
                         arg = args[i];
                         if (arg != TCG_CALL_DUMMY_ARG) {
                             if (dead_temps[arg]) {
@@ -2075,13 +2096,12 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
 {
     TCGRegSet allocated_regs;
     TCGTemp *ts, *ots;
-    const TCGArgConstraint *arg_ct, *oarg_ct;
+    TCGType type;
 
     tcg_regset_set(allocated_regs, s->reserved_regs);
     ots = &s->temps[args[0]];
     ts = &s->temps[args[1]];
-    oarg_ct = &def->args_ct[0];
-    arg_ct = &def->args_ct[1];
+    type = ots->type;
 
     /* If the source value is not in a register, and we're going to be
        forced to have it in a register in order to perform the copy,
@@ -2089,12 +2109,13 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
        we don't have to reload SOURCE the next time it is used. */
     if (((NEED_SYNC_ARG(0) || ots->fixed_reg) && ts->val_type != TEMP_VAL_REG)
         || ts->val_type == TEMP_VAL_MEM) {
-        ts->reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
+        ts->reg = tcg_reg_alloc(s, tcg_target_available_regs[type],
+                                allocated_regs);
         if (ts->val_type == TEMP_VAL_MEM) {
-            tcg_out_ld(s, ts->type, ts->reg, ts->mem_reg, ts->mem_offset);
+            tcg_out_ld(s, type, ts->reg, ts->mem_reg, ts->mem_offset);
             ts->mem_coherent = 1;
         } else if (ts->val_type == TEMP_VAL_CONST) {
-            tcg_out_movi(s, ts->type, ts->reg, ts->val);
+            tcg_out_movi(s, type, ts->reg, ts->val);
         }
         s->reg_to_temp[ts->reg] = args[1];
         ts->val_type = TEMP_VAL_REG;
@@ -2109,7 +2130,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
         if (!ots->mem_allocated) {
             temp_allocate_frame(s, args[0]);
         }
-        tcg_out_st(s, ots->type, ts->reg, ots->mem_reg, ots->mem_offset);
+        tcg_out_st(s, type, ts->reg, ots->mem_reg, ots->mem_offset);
         if (IS_DEAD_ARG(1)) {
             temp_dead(s, args[1]);
         }
@@ -2137,9 +2158,10 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
                 /* When allocating a new register, make sure to not spill the
                    input one. */
                 tcg_regset_set_reg(allocated_regs, ts->reg);
-                ots->reg = tcg_reg_alloc(s, oarg_ct->u.regs, allocated_regs);
+                ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[type],
+                                         allocated_regs);
             }
-            tcg_out_mov(s, ots->type, ots->reg, ts->reg);
+            tcg_out_mov(s, type, ots->reg, ts->reg);
         }
         ots->val_type = TEMP_VAL_REG;
         ots->mem_coherent = 0;
@@ -2323,26 +2345,27 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
                               uint16_t dead_args, uint8_t sync_args)
 {
     int nb_iargs, nb_oargs, flags, nb_regs, i, reg, nb_params;
-    TCGArg arg, func_arg;
+    TCGArg arg;
     TCGTemp *ts;
     intptr_t stack_offset;
     size_t call_stack_size;
-    uintptr_t func_addr;
-    int const_func_arg, allocate_args;
+    tcg_insn_unit *func_addr;
+    int allocate_args;
     TCGRegSet allocated_regs;
-    const TCGArgConstraint *arg_ct;
 
     arg = *args++;
 
     nb_oargs = arg >> 16;
     nb_iargs = arg & 0xffff;
-    nb_params = nb_iargs - 1;
+    nb_params = nb_iargs;
 
-    flags = args[nb_oargs + nb_iargs];
+    func_addr = (tcg_insn_unit *)(intptr_t)args[nb_oargs + nb_iargs];
+    flags = args[nb_oargs + nb_iargs + 1];
 
     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
-    if (nb_regs > nb_params)
+    if (nb_regs > nb_params) {
         nb_regs = nb_params;
+    }
 
     /* assign stack slots first */
     call_stack_size = (nb_params - nb_regs) * sizeof(tcg_target_long);
@@ -2410,40 +2433,6 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
         }
     }
     
-    /* assign function address */
-    func_arg = args[nb_oargs + nb_iargs - 1];
-    arg_ct = &def->args_ct[0];
-    ts = &s->temps[func_arg];
-    func_addr = ts->val;
-    const_func_arg = 0;
-    if (ts->val_type == TEMP_VAL_MEM) {
-        reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
-        tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
-        func_arg = reg;
-        tcg_regset_set_reg(allocated_regs, reg);
-    } else if (ts->val_type == TEMP_VAL_REG) {
-        reg = ts->reg;
-        if (!tcg_regset_test_reg(arg_ct->u.regs, reg)) {
-            reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
-            tcg_out_mov(s, ts->type, reg, ts->reg);
-        }
-        func_arg = reg;
-        tcg_regset_set_reg(allocated_regs, reg);
-    } else if (ts->val_type == TEMP_VAL_CONST) {
-        if (tcg_target_const_match(func_addr, ts->type, arg_ct)) {
-            const_func_arg = 1;
-            func_arg = func_addr;
-        } else {
-            reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
-            tcg_out_movi(s, ts->type, reg, func_addr);
-            func_arg = reg;
-            tcg_regset_set_reg(allocated_regs, reg);
-        }
-    } else {
-        tcg_abort();
-    }
-        
-    
     /* mark dead temporaries and free the associated registers */
     for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
         if (IS_DEAD_ARG(i)) {
@@ -2468,7 +2457,7 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
         save_globals(s, allocated_regs);
     }
 
-    tcg_out_op(s, opc, &func_arg, &const_func_arg);
+    tcg_out_call(s, func_addr);
 
     /* assign output registers and emit moves if needed */
     for(i = 0; i < nb_oargs; i++) {
@@ -2518,7 +2507,8 @@ static void dump_op_count(void)
 #endif
 
 
-static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
+static inline int tcg_gen_code_common(TCGContext *s,
+                                      tcg_insn_unit *gen_code_buf,
                                       long search_pc)
 {
     TCGOpcode opc;
@@ -2633,7 +2623,7 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
         }
         args += def->nb_args;
     next:
-        if (search_pc >= 0 && search_pc < s->code_ptr - gen_code_buf) {
+        if (search_pc >= 0 && search_pc < tcg_current_code_size(s)) {
             return op_index;
         }
         op_index++;
@@ -2647,7 +2637,7 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
     return -1;
 }
 
-int tcg_gen_code(TCGContext *s, uint8_t *gen_code_buf)
+int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf)
 {
 #ifdef CONFIG_PROFILER
     {
@@ -2666,16 +2656,17 @@ int tcg_gen_code(TCGContext *s, uint8_t *gen_code_buf)
     tcg_gen_code_common(s, gen_code_buf, -1);
 
     /* flush instruction cache */
-    flush_icache_range((uintptr_t)gen_code_buf, (uintptr_t)s->code_ptr);
+    flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
 
-    return s->code_ptr -  gen_code_buf;
+    return tcg_current_code_size(s);
 }
 
 /* Return the index of the micro operation such as the pc after is <
    offset bytes from the start of the TB.  The contents of gen_code_buf must
    not be changed, though writing the same values is ok.
    Return -1 if not found. */
-int tcg_gen_code_search_pc(TCGContext *s, uint8_t *gen_code_buf, long offset)
+int tcg_gen_code_search_pc(TCGContext *s, tcg_insn_unit *gen_code_buf,
+                           long offset)
 {
     return tcg_gen_code_common(s, gen_code_buf, offset);
 }
diff --git a/tcg/tcg.h b/tcg/tcg.h
index a6a2d06080..fbc93101cf 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -146,10 +146,25 @@ typedef enum TCGOpcode {
 #define tcg_regset_andnot(d, a, b) (d) = (a) & ~(b)
 #define tcg_regset_not(d, a) (d) = ~(a)
 
+#ifndef TCG_TARGET_INSN_UNIT_SIZE
+# error "Missing TCG_TARGET_INSN_UNIT_SIZE"
+#elif TCG_TARGET_INSN_UNIT_SIZE == 1
+typedef uint8_t tcg_insn_unit;
+#elif TCG_TARGET_INSN_UNIT_SIZE == 2
+typedef uint16_t tcg_insn_unit;
+#elif TCG_TARGET_INSN_UNIT_SIZE == 4
+typedef uint32_t tcg_insn_unit;
+#elif TCG_TARGET_INSN_UNIT_SIZE == 8
+typedef uint64_t tcg_insn_unit;
+#else
+/* The port better have done this.  */
+#endif
+
+
 typedef struct TCGRelocation {
     struct TCGRelocation *next;
     int type;
-    uint8_t *ptr;
+    tcg_insn_unit *ptr;
     intptr_t addend;
 } TCGRelocation; 
 
@@ -157,6 +172,7 @@ typedef struct TCGLabel {
     int has_value;
     union {
         uintptr_t value;
+        tcg_insn_unit *value_ptr;
         TCGRelocation *first_reloc;
     } u;
 } TCGLabel;
@@ -464,7 +480,7 @@ struct TCGContext {
     int nb_temps;
 
     /* goto_tb support */
-    uint8_t *code_buf;
+    tcg_insn_unit *code_buf;
     uintptr_t *tb_next;
     uint16_t *tb_next_offset;
     uint16_t *tb_jmp_offset; /* != NULL if USE_DIRECT_JUMP */
@@ -485,7 +501,7 @@ struct TCGContext {
     intptr_t frame_end;
     int frame_reg;
 
-    uint8_t *code_ptr;
+    tcg_insn_unit *code_ptr;
     TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */
     TCGTempSet free_temps[TCG_TYPE_COUNT * 2];
 
@@ -524,14 +540,17 @@ struct TCGContext {
     uint16_t gen_opc_icount[OPC_BUF_SIZE];
     uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
 
-    /* Code generation */
+    /* Code generation.  Note that we specifically do not use tcg_insn_unit
+       here, because there's too much arithmetic throughout that relies
+       on addition and subtraction working on bytes.  Rely on the GCC
+       extension that allows arithmetic on void*.  */
     int code_gen_max_blocks;
-    uint8_t *code_gen_prologue;
-    uint8_t *code_gen_buffer;
+    void *code_gen_prologue;
+    void *code_gen_buffer;
     size_t code_gen_buffer_size;
     /* threshold to flush the translated code buffer */
     size_t code_gen_buffer_max_size;
-    uint8_t *code_gen_ptr;
+    void *code_gen_ptr;
 
     TBContext tb_ctx;
 
@@ -566,8 +585,9 @@ void tcg_context_init(TCGContext *s);
 void tcg_prologue_init(TCGContext *s);
 void tcg_func_start(TCGContext *s);
 
-int tcg_gen_code(TCGContext *s, uint8_t *gen_code_buf);
-int tcg_gen_code_search_pc(TCGContext *s, uint8_t *gen_code_buf, long offset);
+int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf);
+int tcg_gen_code_search_pc(TCGContext *s, tcg_insn_unit *gen_code_buf,
+                           long offset);
 
 void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size);
 
@@ -705,7 +725,7 @@ void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs);
 #define tcg_temp_free_ptr(T) tcg_temp_free_i64(TCGV_PTR_TO_NAT(T))
 #endif
 
-void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags,
+void tcg_gen_callN(TCGContext *s, void *func, unsigned int flags,
                    int sizemask, TCGArg ret, int nargs, TCGArg *args);
 
 void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
@@ -724,6 +744,51 @@ TCGv_i32 tcg_const_local_i32(int32_t val);
 TCGv_i64 tcg_const_local_i64(int64_t val);
 
 /**
+ * tcg_ptr_byte_diff
+ * @a, @b: addresses to be differenced
+ *
+ * There are many places within the TCG backends where we need a byte
+ * difference between two pointers.  While this can be accomplished
+ * with local casting, it's easy to get wrong -- especially if one is
+ * concerned with the signedness of the result.
+ *
+ * This version relies on GCC's void pointer arithmetic to get the
+ * correct result.
+ */
+
+static inline ptrdiff_t tcg_ptr_byte_diff(void *a, void *b)
+{
+    return a - b;
+}
+
+/**
+ * tcg_pcrel_diff
+ * @s: the tcg context
+ * @target: address of the target
+ *
+ * Produce a pc-relative difference, from the current code_ptr
+ * to the destination address.
+ */
+
+static inline ptrdiff_t tcg_pcrel_diff(TCGContext *s, void *target)
+{
+    return tcg_ptr_byte_diff(target, s->code_ptr);
+}
+
+/**
+ * tcg_current_code_size
+ * @s: the tcg context
+ *
+ * Compute the current code size within the translation block.
+ * This is used to fill in qemu's data structures for goto_tb.
+ */
+
+static inline size_t tcg_current_code_size(TCGContext *s)
+{
+    return tcg_ptr_byte_diff(s->code_ptr, s->code_buf);
+}
+
+/**
  * tcg_qemu_tb_exec:
  * @env: CPUArchState * for the CPU
  * @tb_ptr: address of generated code for the TB to execute
diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c
index 47c0b85d95..9b39231c15 100644
--- a/tcg/tci/tcg-target.c
+++ b/tcg/tci/tcg-target.c
@@ -59,12 +59,8 @@
 static const TCGTargetOpDef tcg_target_op_defs[] = {
     { INDEX_op_exit_tb, { NULL } },
     { INDEX_op_goto_tb, { NULL } },
-    { INDEX_op_call, { RI } },
     { INDEX_op_br, { NULL } },
 
-    { INDEX_op_mov_i32, { R, R } },
-    { INDEX_op_movi_i32, { R } },
-
     { INDEX_op_ld8u_i32, { R, R } },
     { INDEX_op_ld8s_i32, { R, R } },
     { INDEX_op_ld16u_i32, { R, R } },
@@ -141,9 +137,6 @@ static const TCGTargetOpDef tcg_target_op_defs[] = {
 #endif
 
 #if TCG_TARGET_REG_BITS == 64
-    { INDEX_op_mov_i64, { R, R } },
-    { INDEX_op_movi_i64, { R } },
-
     { INDEX_op_ld8u_i64, { R, R } },
     { INDEX_op_ld8s_i64, { R, R } },
     { INDEX_op_ld16u_i64, { R, R } },
@@ -371,14 +364,18 @@ static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 };
 #endif
 
-static void patch_reloc(uint8_t *code_ptr, int type,
+static void patch_reloc(tcg_insn_unit *code_ptr, int type,
                         intptr_t value, intptr_t addend)
 {
     /* tcg_out_reloc always uses the same type, addend. */
     assert(type == sizeof(tcg_target_long));
     assert(addend == 0);
     assert(value != 0);
-    *(tcg_target_long *)code_ptr = value;
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_patch32(code_ptr, value);
+    } else {
+        tcg_patch64(code_ptr, value);
+    }
 }
 
 /* Parse target specific constraints. */
@@ -413,8 +410,11 @@ void tci_disas(uint8_t opc)
 /* Write value (native size). */
 static void tcg_out_i(TCGContext *s, tcg_target_ulong v)
 {
-    *(tcg_target_ulong *)s->code_ptr = v;
-    s->code_ptr += sizeof(tcg_target_ulong);
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_out32(s, v);
+    } else {
+        tcg_out64(s, v);
+    }
 }
 
 /* Write opcode. */
@@ -542,6 +542,11 @@ static void tcg_out_movi(TCGContext *s, TCGType type,
     old_code_ptr[1] = s->code_ptr - old_code_ptr;
 }
 
+static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *arg)
+{
+    tcg_out_ri(s, 1, (uintptr_t)arg);
+}
+
 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
                        const int *const_args)
 {
@@ -557,21 +562,18 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         if (s->tb_jmp_offset) {
             /* Direct jump method. */
             assert(args[0] < ARRAY_SIZE(s->tb_jmp_offset));
-            s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
+            s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
             tcg_out32(s, 0);
         } else {
             /* Indirect jump method. */
             TODO();
         }
         assert(args[0] < ARRAY_SIZE(s->tb_next_offset));
-        s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
+        s->tb_next_offset[args[0]] = tcg_current_code_size(s);
         break;
     case INDEX_op_br:
         tci_out_label(s, args[0]);
         break;
-    case INDEX_op_call:
-        tcg_out_ri(s, const_args[0], args[0]);
-        break;
     case INDEX_op_setcond_i32:
         tcg_out_r(s, args[0]);
         tcg_out_r(s, args[1]);
@@ -596,9 +598,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         tcg_out8(s, args[3]);   /* condition */
         break;
 #endif
-    case INDEX_op_movi_i32:
-        TODO(); /* Handled by tcg_out_movi? */
-        break;
     case INDEX_op_ld8u_i32:
     case INDEX_op_ld8s_i32:
     case INDEX_op_ld16u_i32:
@@ -654,10 +653,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         break;
 
 #if TCG_TARGET_REG_BITS == 64
-    case INDEX_op_mov_i64:
-    case INDEX_op_movi_i64:
-        TODO();
-        break;
     case INDEX_op_add_i64:
     case INDEX_op_sub_i64:
     case INDEX_op_mul_i64:
@@ -825,11 +820,12 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         tcg_out_i(s, *args);
 #endif
         break;
-    case INDEX_op_end:
-        TODO();
-        break;
+    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
+    case INDEX_op_mov_i64:
+    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
+    case INDEX_op_movi_i64:
+    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
     default:
-        fprintf(stderr, "Missing: %s\n", tcg_op_defs[opc].name);
         tcg_abort();
     }
     old_code_ptr[1] = s->code_ptr - old_code_ptr;
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 37719e875c..0be5acdb81 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -43,6 +43,7 @@
 #include "config-host.h"
 
 #define TCG_TARGET_INTERPRETER 1
+#define TCG_TARGET_INSN_UNIT_SIZE 1
 
 #if UINTPTR_MAX == UINT32_MAX
 # define TCG_TARGET_REG_BITS 32
diff --git a/translate-all.c b/translate-all.c
index 5759974d42..5549a85ed5 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -143,7 +143,7 @@ void cpu_gen_init(void)
 int cpu_gen_code(CPUArchState *env, TranslationBlock *tb, int *gen_code_size_ptr)
 {
     TCGContext *s = &tcg_ctx;
-    uint8_t *gen_code_buf;
+    tcg_insn_unit *gen_code_buf;
     int gen_code_size;
 #ifdef CONFIG_PROFILER
     int64_t ti;
@@ -186,8 +186,8 @@ int cpu_gen_code(CPUArchState *env, TranslationBlock *tb, int *gen_code_size_ptr
 
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
-        qemu_log("OUT: [size=%d]\n", *gen_code_size_ptr);
-        log_disas(tb->tc_ptr, *gen_code_size_ptr);
+        qemu_log("OUT: [size=%d]\n", gen_code_size);
+        log_disas(tb->tc_ptr, gen_code_size);
         qemu_log("\n");
         qemu_log_flush();
     }
@@ -235,7 +235,8 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
     s->tb_jmp_offset = NULL;
     s->tb_next = tb->tb_next;
 #endif
-    j = tcg_gen_code_search_pc(s, (uint8_t *)tc_ptr, searched_pc - tc_ptr);
+    j = tcg_gen_code_search_pc(s, (tcg_insn_unit *)tc_ptr,
+                               searched_pc - tc_ptr);
     if (j < 0)
         return -1;
     /* now find start of instruction before */
@@ -944,7 +945,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 {
     CPUArchState *env = cpu->env_ptr;
     TranslationBlock *tb;
-    uint8_t *tc_ptr;
     tb_page_addr_t phys_pc, phys_page2;
     target_ulong virt_page2;
     int code_gen_size;
@@ -959,8 +959,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
         /* Don't forget to invalidate previous TB info.  */
         tcg_ctx.tb_ctx.tb_invalidated_flag = 1;
     }
-    tc_ptr = tcg_ctx.code_gen_ptr;
-    tb->tc_ptr = tc_ptr;
+    tb->tc_ptr = tcg_ctx.code_gen_ptr;
     tb->cs_base = cs_base;
     tb->flags = flags;
     tb->cflags = cflags;