summary refs log tree commit diff stats
path: root/tcg
diff options
context:
space:
mode:
Diffstat (limited to 'tcg')
-rw-r--r--tcg/README32
-rw-r--r--tcg/aarch64/tcg-target.c59
-rw-r--r--tcg/aarch64/tcg-target.h3
-rw-r--r--tcg/arm/tcg-target.c8
-rw-r--r--tcg/i386/tcg-target.c49
-rw-r--r--tcg/i386/tcg-target.h3
-rw-r--r--tcg/ia64/tcg-target.c29
-rw-r--r--tcg/ia64/tcg-target.h3
-rw-r--r--tcg/mips/tcg-target.c16
-rw-r--r--tcg/optimize.c253
-rw-r--r--tcg/ppc/tcg-target.c65
-rw-r--r--tcg/ppc/tcg-target.h3
-rw-r--r--tcg/s390/tcg-target.c50
-rw-r--r--tcg/s390/tcg-target.h3
-rw-r--r--tcg/sparc/tcg-target.c36
-rw-r--r--tcg/sparc/tcg-target.h3
-rw-r--r--tcg/tcg-op.c48
-rw-r--r--tcg/tcg-op.h12
-rw-r--r--tcg/tcg-opc.h10
-rw-r--r--tcg/tcg.h3
-rw-r--r--tcg/tci/tcg-target.c4
-rw-r--r--tcg/tci/tcg-target.h3
22 files changed, 393 insertions, 302 deletions
diff --git a/tcg/README b/tcg/README
index a550ff176d..34c0775cff 100644
--- a/tcg/README
+++ b/tcg/README
@@ -314,11 +314,17 @@ This operation would be equivalent to
 
   dest = (t1 & ~0x0f00) | ((t2 << 8) & 0x0f00)
 
-* trunc_shr_i32 t0, t1, pos
+* extrl_i64_i32 t0, t1
 
-For 64-bit hosts only, right shift the 64-bit input T1 by POS and
-truncate to 32-bit output T0.  Depending on the host, this may be
-a simple mov/shift, or may require additional canonicalization.
+For 64-bit hosts only, extract the low 32-bits of input T1 and place it
+into 32-bit output T0.  Depending on the host, this may be a simple move,
+or may require additional canonicalization.
+
+* extrh_i64_i32 t0, t1
+
+For 64-bit hosts only, extract the high 32-bits of input T1 and place it
+into 32-bit output T0.  Depending on the host, this may be a simple shift,
+or may require additional canonicalization.
 
 ********* Conditional moves
 
@@ -466,13 +472,25 @@ On a 32 bit target, all 64 bit operations are converted to 32 bits. A
 few specific operations must be implemented to allow it (see add2_i32,
 sub2_i32, brcond2_i32).
 
+On a 64 bit target, the values are transfered between 32 and 64-bit
+registers using the following ops:
+- trunc_shr_i64_i32
+- ext_i32_i64
+- extu_i32_i64
+
+They ensure that the values are correctly truncated or extended when
+moved from a 32-bit to a 64-bit register or vice-versa. Note that the
+trunc_shr_i64_i32 is an optional op. It is not necessary to implement
+it if all the following conditions are met:
+- 64-bit registers can hold 32-bit values
+- 32-bit values in a 64-bit register do not need to stay zero or
+  sign extended
+- all 32-bit TCG ops ignore the high part of 64-bit registers
+
 Floating point operations are not supported in this version. A
 previous incarnation of the code generator had full support of them,
 but it is better to concentrate on integer operations first.
 
-On a 64 bit target, no assumption is made in TCG about the storage of
-the 32 bit values in 64 bit registers.
-
 4.2) Constraints
 
 GCC like constraints are used to define the constraints of every
diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index b7ec4f5ace..01ae610cd7 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -30,7 +30,7 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 static const int tcg_target_reg_alloc_order[] = {
     TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
     TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
-    TCG_REG_X28, /* we will reserve this for GUEST_BASE if configured */
+    TCG_REG_X28, /* we will reserve this for guest_base if configured */
 
     TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
     TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
@@ -56,11 +56,7 @@ static const int tcg_target_call_oarg_regs[1] = {
 #define TCG_REG_TMP TCG_REG_X30
 
 #ifndef CONFIG_SOFTMMU
-# ifdef CONFIG_USE_GUEST_BASE
-#  define TCG_REG_GUEST_BASE TCG_REG_X28
-# else
-#  define TCG_REG_GUEST_BASE TCG_REG_XZR
-# endif
+#define TCG_REG_GUEST_BASE TCG_REG_X28
 #endif
 
 static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
@@ -1051,14 +1047,29 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
    slow path for the failure case, which will be patched later when finalizing
    the slow path. Generated code returns the host addend in X1,
    clobbers X0,X2,X3,TMP. */
-static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp s_bits,
+static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
                              tcg_insn_unit **label_ptr, int mem_index,
                              bool is_read)
 {
-    TCGReg base = TCG_AREG0;
     int tlb_offset = is_read ?
         offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
         : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
+    int s_mask = (1 << (opc & MO_SIZE)) - 1;
+    TCGReg base = TCG_AREG0, x3;
+    uint64_t tlb_mask;
+
+    /* For aligned accesses, we check the first byte and include the alignment
+       bits within the address.  For unaligned access, we check that we don't
+       cross pages using the address of the last byte of the access.  */
+    if ((opc & MO_AMASK) == MO_ALIGN || s_mask == 0) {
+        tlb_mask = TARGET_PAGE_MASK | s_mask;
+        x3 = addr_reg;
+    } else {
+        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
+                     TCG_REG_X3, addr_reg, s_mask);
+        tlb_mask = TARGET_PAGE_MASK;
+        x3 = TCG_REG_X3;
+    }
 
     /* Extract the TLB index from the address into X0.
        X0<CPU_TLB_BITS:0> =
@@ -1066,11 +1077,9 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp s_bits,
     tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
                  TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
 
-    /* Store the page mask part of the address and the low s_bits into X3.
-       Later this allows checking for equality and alignment at the same time.
-       X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */
-    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, TCG_REG_X3,
-                     addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
+    /* Store the page mask part of the address into X3.  */
+    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
+                     TCG_REG_X3, x3, tlb_mask);
 
     /* Add any "high bits" from the tlb offset to the env address into X2,
        to take advantage of the LSL12 form of the ADDI instruction.
@@ -1207,17 +1216,16 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
     const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
 #ifdef CONFIG_SOFTMMU
     unsigned mem_index = get_mmuidx(oi);
-    TCGMemOp s_bits = memop & MO_SIZE;
     tcg_insn_unit *label_ptr;
 
-    tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 1);
+    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
     tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
                            TCG_REG_X1, otype, addr_reg);
     add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
                         s->code_ptr, label_ptr);
 #else /* !CONFIG_SOFTMMU */
     tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
-                           GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR,
+                           guest_base ? TCG_REG_GUEST_BASE : TCG_REG_XZR,
                            otype, addr_reg);
 #endif /* CONFIG_SOFTMMU */
 }
@@ -1229,17 +1237,16 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
     const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
 #ifdef CONFIG_SOFTMMU
     unsigned mem_index = get_mmuidx(oi);
-    TCGMemOp s_bits = memop & MO_SIZE;
     tcg_insn_unit *label_ptr;
 
-    tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 0);
+    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
     tcg_out_qemu_st_direct(s, memop, data_reg,
                            TCG_REG_X1, otype, addr_reg);
-    add_qemu_ldst_label(s, false, oi, s_bits == MO_64, data_reg, addr_reg,
-                        s->code_ptr, label_ptr);
+    add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
+                        data_reg, addr_reg, s->code_ptr, label_ptr);
 #else /* !CONFIG_SOFTMMU */
     tcg_out_qemu_st_direct(s, memop, data_reg,
-                           GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR,
+                           guest_base ? TCG_REG_GUEST_BASE : TCG_REG_XZR,
                            otype, addr_reg);
 #endif /* CONFIG_SOFTMMU */
 }
@@ -1556,6 +1563,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_ext16s_i32:
         tcg_out_sxt(s, ext, MO_16, a0, a1);
         break;
+    case INDEX_op_ext_i32_i64:
     case INDEX_op_ext32s_i64:
         tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
         break;
@@ -1567,6 +1575,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_ext16u_i32:
         tcg_out_uxt(s, MO_16, a0, a1);
         break;
+    case INDEX_op_extu_i32_i64:
     case INDEX_op_ext32u_i64:
         tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
         break;
@@ -1712,6 +1721,8 @@ static const TCGTargetOpDef aarch64_op_defs[] = {
     { INDEX_op_ext8u_i64, { "r", "r" } },
     { INDEX_op_ext16u_i64, { "r", "r" } },
     { INDEX_op_ext32u_i64, { "r", "r" } },
+    { INDEX_op_ext_i32_i64, { "r", "r" } },
+    { INDEX_op_extu_i32_i64, { "r", "r" } },
 
     { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
     { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
@@ -1794,9 +1805,9 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
                   CPU_TEMP_BUF_NLONGS * sizeof(long));
 
-#if defined(CONFIG_USE_GUEST_BASE)
-    if (GUEST_BASE) {
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, GUEST_BASE);
+#if !defined(CONFIG_SOFTMMU)
+    if (guest_base) {
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
         tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
     }
 #endif
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index 8aec04d2bf..19a04a6e75 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -70,7 +70,8 @@ typedef enum {
 #define TCG_TARGET_HAS_muls2_i32        0
 #define TCG_TARGET_HAS_muluh_i32        0
 #define TCG_TARGET_HAS_mulsh_i32        0
-#define TCG_TARGET_HAS_trunc_shr_i32    0
+#define TCG_TARGET_HAS_extrl_i64_i32    0
+#define TCG_TARGET_HAS_extrh_i64_i32    0
 
 #define TCG_TARGET_HAS_div_i64          1
 #define TCG_TARGET_HAS_rem_i64          1
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index ae2ec7a922..3edf6a6f97 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -1493,8 +1493,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
     add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
                         s->code_ptr, label_ptr);
 #else /* !CONFIG_SOFTMMU */
-    if (GUEST_BASE) {
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, GUEST_BASE);
+    if (guest_base) {
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
         tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP);
     } else {
         tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo);
@@ -1623,8 +1623,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
     add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
                         s->code_ptr, label_ptr);
 #else /* !CONFIG_SOFTMMU */
-    if (GUEST_BASE) {
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, GUEST_BASE);
+    if (guest_base) {
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
         tcg_out_qemu_st_index(s, COND_AL, opc, datalo,
                               datahi, addrlo, TCG_REG_TMP);
     } else {
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 887f22f675..d2adbc4d17 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -1172,7 +1172,7 @@ static void * const qemu_st_helpers[16] = {
    First argument register is clobbered.  */
 
 static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
-                                    int mem_index, TCGMemOp s_bits,
+                                    int mem_index, TCGMemOp opc,
                                     tcg_insn_unit **label_ptr, int which)
 {
     const TCGReg r0 = TCG_REG_L0;
@@ -1180,6 +1180,8 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
     TCGType ttype = TCG_TYPE_I32;
     TCGType htype = TCG_TYPE_I32;
     int trexw = 0, hrexw = 0;
+    int s_mask = (1 << (opc & MO_SIZE)) - 1;
+    bool aligned = (opc & MO_AMASK) == MO_ALIGN || s_mask == 0;
 
     if (TCG_TARGET_REG_BITS == 64) {
         if (TARGET_LONG_BITS == 64) {
@@ -1193,13 +1195,19 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
     }
 
     tcg_out_mov(s, htype, r0, addrlo);
-    tcg_out_mov(s, ttype, r1, addrlo);
+    if (aligned) {
+        tcg_out_mov(s, ttype, r1, addrlo);
+    } else {
+        /* For unaligned access check that we don't cross pages using
+           the page address of the last byte.  */
+        tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask);
+    }
 
     tcg_out_shifti(s, SHIFT_SHR + hrexw, r0,
                    TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
 
     tgen_arithi(s, ARITH_AND + trexw, r1,
-                TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
+                TARGET_PAGE_MASK | (aligned ? s_mask : 0), 0);
     tgen_arithi(s, ARITH_AND + hrexw, r0,
                 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
 
@@ -1424,7 +1432,7 @@ int arch_prctl(int code, unsigned long addr);
 static int guest_base_flags;
 static inline void setup_guest_base_seg(void)
 {
-    if (arch_prctl(ARCH_SET_GS, GUEST_BASE) == 0) {
+    if (arch_prctl(ARCH_SET_GS, guest_base) == 0) {
         guest_base_flags = P_GS;
     }
 }
@@ -1545,7 +1553,6 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
     TCGMemOp opc;
 #if defined(CONFIG_SOFTMMU)
     int mem_index;
-    TCGMemOp s_bits;
     tcg_insn_unit *label_ptr[2];
 #endif
 
@@ -1558,9 +1565,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
 
 #if defined(CONFIG_SOFTMMU)
     mem_index = get_mmuidx(oi);
-    s_bits = opc & MO_SIZE;
 
-    tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
+    tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
                      label_ptr, offsetof(CPUTLBEntry, addr_read));
 
     /* TLB Hit.  */
@@ -1571,7 +1577,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
                         s->code_ptr, label_ptr);
 #else
     {
-        int32_t offset = GUEST_BASE;
+        int32_t offset = guest_base;
         TCGReg base = addrlo;
         int index = -1;
         int seg = 0;
@@ -1580,7 +1586,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
            We can do this with the ADDR32 prefix if we're not using
            a guest base, or when using segmentation.  Otherwise we
            need to zero-extend manually.  */
-        if (GUEST_BASE == 0 || guest_base_flags) {
+        if (guest_base == 0 || guest_base_flags) {
             seg = guest_base_flags;
             offset = 0;
             if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
@@ -1591,8 +1597,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
                 tcg_out_ext32u(s, TCG_REG_L0, base);
                 base = TCG_REG_L0;
             }
-            if (offset != GUEST_BASE) {
-                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
+            if (offset != guest_base) {
+                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
                 index = TCG_REG_L1;
                 offset = 0;
             }
@@ -1687,7 +1693,6 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
     TCGMemOp opc;
 #if defined(CONFIG_SOFTMMU)
     int mem_index;
-    TCGMemOp s_bits;
     tcg_insn_unit *label_ptr[2];
 #endif
 
@@ -1700,9 +1705,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
 
 #if defined(CONFIG_SOFTMMU)
     mem_index = get_mmuidx(oi);
-    s_bits = opc & MO_SIZE;
 
-    tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
+    tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
                      label_ptr, offsetof(CPUTLBEntry, addr_write));
 
     /* TLB Hit.  */
@@ -1713,12 +1717,12 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
                         s->code_ptr, label_ptr);
 #else
     {
-        int32_t offset = GUEST_BASE;
+        int32_t offset = guest_base;
         TCGReg base = addrlo;
         int seg = 0;
 
         /* See comment in tcg_out_qemu_ld re zero-extension of addrlo.  */
-        if (GUEST_BASE == 0 || guest_base_flags) {
+        if (guest_base == 0 || guest_base_flags) {
             seg = guest_base_flags;
             offset = 0;
             if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
@@ -1727,12 +1731,12 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
         } else if (TCG_TARGET_REG_BITS == 64) {
             /* ??? Note that we can't use the same SIB addressing scheme
                as for loads, since we require L0 free for bswap.  */
-            if (offset != GUEST_BASE) {
+            if (offset != guest_base) {
                 if (TARGET_LONG_BITS == 32) {
                     tcg_out_ext32u(s, TCG_REG_L0, base);
                     base = TCG_REG_L0;
                 }
-                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
+                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
                 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
                 base = TCG_REG_L1;
                 offset = 0;
@@ -2064,9 +2068,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_bswap64_i64:
         tcg_out_bswap64(s, args[0]);
         break;
+    case INDEX_op_extu_i32_i64:
     case INDEX_op_ext32u_i64:
         tcg_out_ext32u(s, args[0], args[1]);
         break;
+    case INDEX_op_ext_i32_i64:
     case INDEX_op_ext32s_i64:
         tcg_out_ext32s(s, args[0], args[1]);
         break;
@@ -2201,6 +2207,9 @@ static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_ext16u_i64, { "r", "r" } },
     { INDEX_op_ext32u_i64, { "r", "r" } },
 
+    { INDEX_op_ext_i32_i64, { "r", "r" } },
+    { INDEX_op_extu_i32_i64, { "r", "r" } },
+
     { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
     { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
 
@@ -2306,8 +2315,8 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out_opc(s, OPC_RET, 0, 0, 0);
 
 #if !defined(CONFIG_SOFTMMU)
-    /* Try to set up a segment register to point to GUEST_BASE.  */
-    if (GUEST_BASE) {
+    /* Try to set up a segment register to point to guest_base.  */
+    if (guest_base) {
         setup_guest_base_seg();
     }
 #endif
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 25b513354c..92be341713 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -102,7 +102,8 @@ extern bool have_bmi1;
 #define TCG_TARGET_HAS_mulsh_i32        0
 
 #if TCG_TARGET_REG_BITS == 64
-#define TCG_TARGET_HAS_trunc_shr_i32    0
+#define TCG_TARGET_HAS_extrl_i64_i32    0
+#define TCG_TARGET_HAS_extrh_i64_i32    0
 #define TCG_TARGET_HAS_div2_i64         1
 #define TCG_TARGET_HAS_rot_i64          1
 #define TCG_TARGET_HAS_ext8s_i64        1
diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 81cb9f79f3..3c07017868 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -40,13 +40,8 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 };
 #endif
 
-#ifdef CONFIG_USE_GUEST_BASE
+#ifndef CONFIG_SOFTMMU
 #define TCG_GUEST_BASE_REG TCG_REG_R55
-#else
-#define TCG_GUEST_BASE_REG TCG_REG_R0
-#endif
-#ifndef GUEST_BASE
-#define GUEST_BASE 0
 #endif
 
 /* Branch registers */
@@ -1765,7 +1760,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args)
     bswap = opc & MO_BSWAP;
 
 #if TARGET_LONG_BITS == 32
-    if (GUEST_BASE != 0) {
+    if (guest_base != 0) {
         tcg_out_bundle(s, mII,
                        INSN_NOP_M,
                        tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29,
@@ -1829,7 +1824,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args)
         }
     }
 #else
-    if (GUEST_BASE != 0) {
+    if (guest_base != 0) {
         tcg_out_bundle(s, MmI,
                        tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
                                    TCG_GUEST_BASE_REG, addr_reg),
@@ -1889,7 +1884,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args)
     bswap = opc & MO_BSWAP;
 
 #if TARGET_LONG_BITS == 32
-    if (GUEST_BASE != 0) {
+    if (guest_base != 0) {
         tcg_out_bundle(s, mII,
                        INSN_NOP_M,
                        tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29,
@@ -1935,7 +1930,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args)
                    INSN_NOP_M,
                    INSN_NOP_I);
 #else
-    if (GUEST_BASE != 0) {
+    if (guest_base != 0) {
         add_guest_base = tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
                                      TCG_GUEST_BASE_REG, addr_reg);
         addr_reg = TCG_REG_R2;
@@ -1944,7 +1939,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args)
     }
 
     if (!bswap) {
-        tcg_out_bundle(s, (GUEST_BASE ? MmI : mmI),
+        tcg_out_bundle(s, (guest_base ? MmI : mmI),
                        add_guest_base,
                        tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits],
                                    data_reg, addr_reg),
@@ -2148,9 +2143,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_ext16u_i64:
         tcg_out_ext(s, OPC_ZXT2_I29, args[0], args[1]);
         break;
+    case INDEX_op_ext_i32_i64:
     case INDEX_op_ext32s_i64:
         tcg_out_ext(s, OPC_SXT4_I29, args[0], args[1]);
         break;
+    case INDEX_op_extu_i32_i64:
     case INDEX_op_ext32u_i64:
         tcg_out_ext(s, OPC_ZXT4_I29, args[0], args[1]);
         break;
@@ -2301,6 +2298,8 @@ static const TCGTargetOpDef ia64_op_defs[] = {
     { INDEX_op_ext16u_i64, { "r", "rZ"} },
     { INDEX_op_ext32s_i64, { "r", "rZ"} },
     { INDEX_op_ext32u_i64, { "r", "rZ"} },
+    { INDEX_op_ext_i32_i64, { "r", "rZ" } },
+    { INDEX_op_extu_i32_i64, { "r", "rZ" } },
 
     { INDEX_op_bswap16_i64, { "r", "rZ" } },
     { INDEX_op_bswap32_i64, { "r", "rZ" } },
@@ -2349,14 +2348,14 @@ static void tcg_target_qemu_prologue(TCGContext *s)
                    tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21,
                                TCG_REG_B6, TCG_REG_R33, 0));
 
-    /* ??? If GUEST_BASE < 0x200000, we could load the register via
+    /* ??? If guest_base < 0x200000, we could load the register via
        an ADDL in the M slot of the next bundle.  */
-    if (GUEST_BASE != 0) {
+    if (guest_base != 0) {
         tcg_out_bundle(s, mlx,
                        INSN_NOP_M,
-                       tcg_opc_l2 (GUEST_BASE),
+                       tcg_opc_l2(guest_base),
                        tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2,
-                                   TCG_GUEST_BASE_REG, GUEST_BASE));
+                                   TCG_GUEST_BASE_REG, guest_base));
         tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
     }
 
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index a04ed81262..ae9b79f02f 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -160,7 +160,8 @@ typedef enum {
 #define TCG_TARGET_HAS_muluh_i64        0
 #define TCG_TARGET_HAS_mulsh_i32        0
 #define TCG_TARGET_HAS_mulsh_i64        0
-#define TCG_TARGET_HAS_trunc_shr_i32    0
+#define TCG_TARGET_HAS_extrl_i64_i32    0
+#define TCG_TARGET_HAS_extrh_i64_i32    0
 
 #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16)
 #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16)
diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index e97980df0b..c0ce520228 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -1180,12 +1180,12 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
     add_qemu_ldst_label(s, 1, oi, data_regl, data_regh, addr_regl, addr_regh,
                         s->code_ptr, label_ptr);
 #else
-    if (GUEST_BASE == 0 && data_regl != addr_regl) {
+    if (guest_base == 0 && data_regl != addr_regl) {
         base = addr_regl;
-    } else if (GUEST_BASE == (int16_t)GUEST_BASE) {
-        tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, GUEST_BASE);
+    } else if (guest_base == (int16_t)guest_base) {
+        tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, guest_base);
     } else {
-        tcg_out_movi(s, TCG_TYPE_PTR, base, GUEST_BASE);
+        tcg_out_movi(s, TCG_TYPE_PTR, base, guest_base);
         tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl);
     }
     tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc);
@@ -1314,14 +1314,14 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
     add_qemu_ldst_label(s, 0, oi, data_regl, data_regh, addr_regl, addr_regh,
                         s->code_ptr, label_ptr);
 #else
-    if (GUEST_BASE == 0) {
+    if (guest_base == 0) {
         base = addr_regl;
     } else {
         base = TCG_REG_A0;
-        if (GUEST_BASE == (int16_t)GUEST_BASE) {
-            tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, GUEST_BASE);
+        if (guest_base == (int16_t)guest_base) {
+            tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, guest_base);
         } else {
-            tcg_out_movi(s, TCG_TYPE_PTR, base, GUEST_BASE);
+            tcg_out_movi(s, TCG_TYPE_PTR, base, guest_base);
             tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl);
         }
     }
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 18283cfd7b..10795ec9d5 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -35,14 +35,8 @@
         glue(glue(case INDEX_op_, x), _i32):    \
         glue(glue(case INDEX_op_, x), _i64)
 
-typedef enum {
-    TCG_TEMP_UNDEF = 0,
-    TCG_TEMP_CONST,
-    TCG_TEMP_COPY,
-} tcg_temp_state;
-
 struct tcg_temp_info {
-    tcg_temp_state state;
+    bool is_const;
     uint16_t prev_copy;
     uint16_t next_copy;
     tcg_target_ulong val;
@@ -50,23 +44,47 @@ struct tcg_temp_info {
 };
 
 static struct tcg_temp_info temps[TCG_MAX_TEMPS];
+static TCGTempSet temps_used;
+
+static inline bool temp_is_const(TCGArg arg)
+{
+    return temps[arg].is_const;
+}
+
+static inline bool temp_is_copy(TCGArg arg)
+{
+    return temps[arg].next_copy != arg;
+}
 
-/* Reset TEMP's state to TCG_TEMP_UNDEF.  If TEMP only had one copy, remove
-   the copy flag from the left temp.  */
+/* Reset TEMP's state, possibly removing the temp for the list of copies.  */
 static void reset_temp(TCGArg temp)
 {
-    if (temps[temp].state == TCG_TEMP_COPY) {
-        if (temps[temp].prev_copy == temps[temp].next_copy) {
-            temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF;
-        } else {
-            temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
-            temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
-        }
-    }
-    temps[temp].state = TCG_TEMP_UNDEF;
+    temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
+    temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
+    temps[temp].next_copy = temp;
+    temps[temp].prev_copy = temp;
+    temps[temp].is_const = false;
     temps[temp].mask = -1;
 }
 
+/* Reset all temporaries, given that there are NB_TEMPS of them.  */
+static void reset_all_temps(int nb_temps)
+{
+    bitmap_zero(temps_used.l, nb_temps);
+}
+
+/* Initialize and activate a temporary.  */
+static void init_temp_info(TCGArg temp)
+{
+    if (!test_bit(temp, temps_used.l)) {
+        temps[temp].next_copy = temp;
+        temps[temp].prev_copy = temp;
+        temps[temp].is_const = false;
+        temps[temp].mask = -1;
+        set_bit(temp, temps_used.l);
+    }
+}
+
 static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op,
                                 TCGOpcode opc, int nargs)
 {
@@ -98,16 +116,6 @@ static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op,
     return new_op;
 }
 
-/* Reset all temporaries, given that there are NB_TEMPS of them.  */
-static void reset_all_temps(int nb_temps)
-{
-    int i;
-    for (i = 0; i < nb_temps; i++) {
-        temps[i].state = TCG_TEMP_UNDEF;
-        temps[i].mask = -1;
-    }
-}
-
 static int op_bits(TCGOpcode op)
 {
     const TCGOpDef *def = &tcg_op_defs[op];
@@ -179,8 +187,7 @@ static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
         return true;
     }
 
-    if (temps[arg1].state != TCG_TEMP_COPY
-        || temps[arg2].state != TCG_TEMP_COPY) {
+    if (!temp_is_copy(arg1) || !temp_is_copy(arg2)) {
         return false;
     }
 
@@ -202,7 +209,7 @@ static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args,
     op->opc = new_op;
 
     reset_temp(dst);
-    temps[dst].state = TCG_TEMP_CONST;
+    temps[dst].is_const = true;
     temps[dst].val = val;
     mask = val;
     if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) {
@@ -223,11 +230,6 @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args,
         return;
     }
 
-    if (temps[src].state == TCG_TEMP_CONST) {
-        tcg_opt_gen_movi(s, op, args, dst, temps[src].val);
-        return;
-    }
-
     TCGOpcode new_op = op_to_mov(op->opc);
     tcg_target_ulong mask;
 
@@ -241,19 +243,13 @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args,
     }
     temps[dst].mask = mask;
 
-    assert(temps[src].state != TCG_TEMP_CONST);
-
     if (s->temps[src].type == s->temps[dst].type) {
-        if (temps[src].state != TCG_TEMP_COPY) {
-            temps[src].state = TCG_TEMP_COPY;
-            temps[src].next_copy = src;
-            temps[src].prev_copy = src;
-        }
-        temps[dst].state = TCG_TEMP_COPY;
         temps[dst].next_copy = temps[src].next_copy;
         temps[dst].prev_copy = src;
         temps[temps[dst].next_copy].prev_copy = dst;
         temps[src].next_copy = dst;
+        temps[dst].is_const = temps[src].is_const;
+        temps[dst].val = temps[src].val;
     }
 
     args[0] = dst;
@@ -292,7 +288,6 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
     case INDEX_op_shr_i32:
         return (uint32_t)x >> (y & 31);
 
-    case INDEX_op_trunc_shr_i32:
     case INDEX_op_shr_i64:
         return (uint64_t)x >> (y & 63);
 
@@ -347,12 +342,18 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
     CASE_OP_32_64(ext16u):
         return (uint16_t)x;
 
+    case INDEX_op_ext_i32_i64:
     case INDEX_op_ext32s_i64:
         return (int32_t)x;
 
+    case INDEX_op_extu_i32_i64:
+    case INDEX_op_extrl_i64_i32:
     case INDEX_op_ext32u_i64:
         return (uint32_t)x;
 
+    case INDEX_op_extrh_i64_i32:
+        return (uint64_t)x >> 32;
+
     case INDEX_op_muluh_i32:
         return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
     case INDEX_op_mulsh_i32:
@@ -395,7 +396,7 @@ static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
 {
     TCGArg res = do_constant_folding_2(op, x, y);
     if (op_bits(op) == 32) {
-        res &= 0xffffffff;
+        res = (int32_t)res;
     }
     return res;
 }
@@ -481,7 +482,7 @@ static bool do_constant_folding_cond_eq(TCGCond c)
 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
                                        TCGArg y, TCGCond c)
 {
-    if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) {
+    if (temp_is_const(x) && temp_is_const(y)) {
         switch (op_bits(op)) {
         case 32:
             return do_constant_folding_cond_32(temps[x].val, temps[y].val, c);
@@ -492,7 +493,7 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
         }
     } else if (temps_are_copies(x, y)) {
         return do_constant_folding_cond_eq(c);
-    } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) {
+    } else if (temp_is_const(y) && temps[y].val == 0) {
         switch (c) {
         case TCG_COND_LTU:
             return 0;
@@ -513,12 +514,10 @@ static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
     TCGArg al = p1[0], ah = p1[1];
     TCGArg bl = p2[0], bh = p2[1];
 
-    if (temps[bl].state == TCG_TEMP_CONST
-        && temps[bh].state == TCG_TEMP_CONST) {
+    if (temp_is_const(bl) && temp_is_const(bh)) {
         uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val;
 
-        if (temps[al].state == TCG_TEMP_CONST
-            && temps[ah].state == TCG_TEMP_CONST) {
+        if (temp_is_const(al) && temp_is_const(ah)) {
             uint64_t a;
             a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val;
             return do_constant_folding_cond_64(a, b, c);
@@ -544,8 +543,8 @@ static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
 {
     TCGArg a1 = *p1, a2 = *p2;
     int sum = 0;
-    sum += temps[a1].state == TCG_TEMP_CONST;
-    sum -= temps[a2].state == TCG_TEMP_CONST;
+    sum += temp_is_const(a1);
+    sum -= temp_is_const(a2);
 
     /* Prefer the constant in second argument, and then the form
        op a, a, b, which is better handled on non-RISC hosts. */
@@ -560,10 +559,10 @@ static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
 {
     int sum = 0;
-    sum += temps[p1[0]].state == TCG_TEMP_CONST;
-    sum += temps[p1[1]].state == TCG_TEMP_CONST;
-    sum -= temps[p2[0]].state == TCG_TEMP_CONST;
-    sum -= temps[p2[1]].state == TCG_TEMP_CONST;
+    sum += temp_is_const(p1[0]);
+    sum += temp_is_const(p1[1]);
+    sum -= temp_is_const(p2[0]);
+    sum -= temp_is_const(p2[1]);
     if (sum > 0) {
         TCGArg t;
         t = p1[0], p1[0] = p2[0], p2[0] = t;
@@ -598,17 +597,29 @@ void tcg_optimize(TCGContext *s)
         const TCGOpDef *def = &tcg_op_defs[opc];
 
         oi_next = op->next;
+
+        /* Count the arguments, and initialize the temps that are
+           going to be used */
         if (opc == INDEX_op_call) {
             nb_oargs = op->callo;
             nb_iargs = op->calli;
+            for (i = 0; i < nb_oargs + nb_iargs; i++) {
+                tmp = args[i];
+                if (tmp != TCG_CALL_DUMMY_ARG) {
+                    init_temp_info(tmp);
+                }
+            }
         } else {
             nb_oargs = def->nb_oargs;
             nb_iargs = def->nb_iargs;
+            for (i = 0; i < nb_oargs + nb_iargs; i++) {
+                init_temp_info(args[i]);
+            }
         }
 
         /* Do copy propagation */
         for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
-            if (temps[args[i]].state == TCG_TEMP_COPY) {
+            if (temp_is_copy(args[i])) {
                 args[i] = find_better_copy(s, args[i]);
             }
         }
@@ -678,8 +689,7 @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64(sar):
         CASE_OP_32_64(rotl):
         CASE_OP_32_64(rotr):
-            if (temps[args[1]].state == TCG_TEMP_CONST
-                && temps[args[1]].val == 0) {
+            if (temp_is_const(args[1]) && temps[args[1]].val == 0) {
                 tcg_opt_gen_movi(s, op, args, args[0], 0);
                 continue;
             }
@@ -689,7 +699,7 @@ void tcg_optimize(TCGContext *s)
                 TCGOpcode neg_op;
                 bool have_neg;
 
-                if (temps[args[2]].state == TCG_TEMP_CONST) {
+                if (temp_is_const(args[2])) {
                     /* Proceed with possible constant folding. */
                     break;
                 }
@@ -703,8 +713,7 @@ void tcg_optimize(TCGContext *s)
                 if (!have_neg) {
                     break;
                 }
-                if (temps[args[1]].state == TCG_TEMP_CONST
-                    && temps[args[1]].val == 0) {
+                if (temp_is_const(args[1]) && temps[args[1]].val == 0) {
                     op->opc = neg_op;
                     reset_temp(args[0]);
                     args[1] = args[2];
@@ -714,34 +723,30 @@ void tcg_optimize(TCGContext *s)
             break;
         CASE_OP_32_64(xor):
         CASE_OP_32_64(nand):
-            if (temps[args[1]].state != TCG_TEMP_CONST
-                && temps[args[2]].state == TCG_TEMP_CONST
-                && temps[args[2]].val == -1) {
+            if (!temp_is_const(args[1])
+                && temp_is_const(args[2]) && temps[args[2]].val == -1) {
                 i = 1;
                 goto try_not;
             }
             break;
         CASE_OP_32_64(nor):
-            if (temps[args[1]].state != TCG_TEMP_CONST
-                && temps[args[2]].state == TCG_TEMP_CONST
-                && temps[args[2]].val == 0) {
+            if (!temp_is_const(args[1])
+                && temp_is_const(args[2]) && temps[args[2]].val == 0) {
                 i = 1;
                 goto try_not;
             }
             break;
         CASE_OP_32_64(andc):
-            if (temps[args[2]].state != TCG_TEMP_CONST
-                && temps[args[1]].state == TCG_TEMP_CONST
-                && temps[args[1]].val == -1) {
+            if (!temp_is_const(args[2])
+                && temp_is_const(args[1]) && temps[args[1]].val == -1) {
                 i = 2;
                 goto try_not;
             }
             break;
         CASE_OP_32_64(orc):
         CASE_OP_32_64(eqv):
-            if (temps[args[2]].state != TCG_TEMP_CONST
-                && temps[args[1]].state == TCG_TEMP_CONST
-                && temps[args[1]].val == 0) {
+            if (!temp_is_const(args[2])
+                && temp_is_const(args[1]) && temps[args[1]].val == 0) {
                 i = 2;
                 goto try_not;
             }
@@ -782,9 +787,8 @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64(or):
         CASE_OP_32_64(xor):
         CASE_OP_32_64(andc):
-            if (temps[args[1]].state != TCG_TEMP_CONST
-                && temps[args[2]].state == TCG_TEMP_CONST
-                && temps[args[2]].val == 0) {
+            if (!temp_is_const(args[1])
+                && temp_is_const(args[2]) && temps[args[2]].val == 0) {
                 tcg_opt_gen_mov(s, op, args, args[0], args[1]);
                 continue;
             }
@@ -792,9 +796,8 @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64(and):
         CASE_OP_32_64(orc):
         CASE_OP_32_64(eqv):
-            if (temps[args[1]].state != TCG_TEMP_CONST
-                && temps[args[2]].state == TCG_TEMP_CONST
-                && temps[args[2]].val == -1) {
+            if (!temp_is_const(args[1])
+                && temp_is_const(args[2]) && temps[args[2]].val == -1) {
                 tcg_opt_gen_mov(s, op, args, args[0], args[1]);
                 continue;
             }
@@ -832,17 +835,26 @@ void tcg_optimize(TCGContext *s)
 
         CASE_OP_32_64(and):
             mask = temps[args[2]].mask;
-            if (temps[args[2]].state == TCG_TEMP_CONST) {
+            if (temp_is_const(args[2])) {
         and_const:
                 affected = temps[args[1]].mask & ~mask;
             }
             mask = temps[args[1]].mask & mask;
             break;
 
+        case INDEX_op_ext_i32_i64:
+            if ((temps[args[1]].mask & 0x80000000) != 0) {
+                break;
+            }
+        case INDEX_op_extu_i32_i64:
+            /* We do not compute affected as it is a size changing op.  */
+            mask = (uint32_t)temps[args[1]].mask;
+            break;
+
         CASE_OP_32_64(andc):
             /* Known-zeros does not imply known-ones.  Therefore unless
                args[2] is constant, we can't infer anything from it.  */
-            if (temps[args[2]].state == TCG_TEMP_CONST) {
+            if (temp_is_const(args[2])) {
                 mask = ~temps[args[2]].mask;
                 goto and_const;
             }
@@ -851,37 +863,40 @@ void tcg_optimize(TCGContext *s)
             break;
 
         case INDEX_op_sar_i32:
-            if (temps[args[2]].state == TCG_TEMP_CONST) {
+            if (temp_is_const(args[2])) {
                 tmp = temps[args[2]].val & 31;
                 mask = (int32_t)temps[args[1]].mask >> tmp;
             }
             break;
         case INDEX_op_sar_i64:
-            if (temps[args[2]].state == TCG_TEMP_CONST) {
+            if (temp_is_const(args[2])) {
                 tmp = temps[args[2]].val & 63;
                 mask = (int64_t)temps[args[1]].mask >> tmp;
             }
             break;
 
         case INDEX_op_shr_i32:
-            if (temps[args[2]].state == TCG_TEMP_CONST) {
+            if (temp_is_const(args[2])) {
                 tmp = temps[args[2]].val & 31;
                 mask = (uint32_t)temps[args[1]].mask >> tmp;
             }
             break;
         case INDEX_op_shr_i64:
-            if (temps[args[2]].state == TCG_TEMP_CONST) {
+            if (temp_is_const(args[2])) {
                 tmp = temps[args[2]].val & 63;
                 mask = (uint64_t)temps[args[1]].mask >> tmp;
             }
             break;
 
-        case INDEX_op_trunc_shr_i32:
-            mask = (uint64_t)temps[args[1]].mask >> args[2];
+        case INDEX_op_extrl_i64_i32:
+            mask = (uint32_t)temps[args[1]].mask;
+            break;
+        case INDEX_op_extrh_i64_i32:
+            mask = (uint64_t)temps[args[1]].mask >> 32;
             break;
 
         CASE_OP_32_64(shl):
-            if (temps[args[2]].state == TCG_TEMP_CONST) {
+            if (temp_is_const(args[2])) {
                 tmp = temps[args[2]].val & (TCG_TARGET_REG_BITS - 1);
                 mask = temps[args[1]].mask << tmp;
             }
@@ -962,8 +977,7 @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64(mul):
         CASE_OP_32_64(muluh):
         CASE_OP_32_64(mulsh):
-            if ((temps[args[2]].state == TCG_TEMP_CONST
-                && temps[args[2]].val == 0)) {
+            if ((temp_is_const(args[2]) && temps[args[2]].val == 0)) {
                 tcg_opt_gen_movi(s, op, args, args[0], 0);
                 continue;
             }
@@ -1018,21 +1032,17 @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64(ext16u):
         case INDEX_op_ext32s_i64:
         case INDEX_op_ext32u_i64:
-            if (temps[args[1]].state == TCG_TEMP_CONST) {
+        case INDEX_op_ext_i32_i64:
+        case INDEX_op_extu_i32_i64:
+        case INDEX_op_extrl_i64_i32:
+        case INDEX_op_extrh_i64_i32:
+            if (temp_is_const(args[1])) {
                 tmp = do_constant_folding(opc, temps[args[1]].val, 0);
                 tcg_opt_gen_movi(s, op, args, args[0], tmp);
                 break;
             }
             goto do_default;
 
-        case INDEX_op_trunc_shr_i32:
-            if (temps[args[1]].state == TCG_TEMP_CONST) {
-                tmp = do_constant_folding(opc, temps[args[1]].val, args[2]);
-                tcg_opt_gen_movi(s, op, args, args[0], tmp);
-                break;
-            }
-            goto do_default;
-
         CASE_OP_32_64(add):
         CASE_OP_32_64(sub):
         CASE_OP_32_64(mul):
@@ -1055,8 +1065,7 @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64(divu):
         CASE_OP_32_64(rem):
         CASE_OP_32_64(remu):
-            if (temps[args[1]].state == TCG_TEMP_CONST
-                && temps[args[2]].state == TCG_TEMP_CONST) {
+            if (temp_is_const(args[1]) && temp_is_const(args[2])) {
                 tmp = do_constant_folding(opc, temps[args[1]].val,
                                           temps[args[2]].val);
                 tcg_opt_gen_movi(s, op, args, args[0], tmp);
@@ -1065,8 +1074,7 @@ void tcg_optimize(TCGContext *s)
             goto do_default;
 
         CASE_OP_32_64(deposit):
-            if (temps[args[1]].state == TCG_TEMP_CONST
-                && temps[args[2]].state == TCG_TEMP_CONST) {
+            if (temp_is_const(args[1]) && temp_is_const(args[2])) {
                 tmp = deposit64(temps[args[1]].val, args[3], args[4],
                                 temps[args[2]].val);
                 tcg_opt_gen_movi(s, op, args, args[0], tmp);
@@ -1106,10 +1114,8 @@ void tcg_optimize(TCGContext *s)
 
         case INDEX_op_add2_i32:
         case INDEX_op_sub2_i32:
-            if (temps[args[2]].state == TCG_TEMP_CONST
-                && temps[args[3]].state == TCG_TEMP_CONST
-                && temps[args[4]].state == TCG_TEMP_CONST
-                && temps[args[5]].state == TCG_TEMP_CONST) {
+            if (temp_is_const(args[2]) && temp_is_const(args[3])
+                && temp_is_const(args[4]) && temp_is_const(args[5])) {
                 uint32_t al = temps[args[2]].val;
                 uint32_t ah = temps[args[3]].val;
                 uint32_t bl = temps[args[4]].val;
@@ -1128,8 +1134,8 @@ void tcg_optimize(TCGContext *s)
 
                 rl = args[0];
                 rh = args[1];
-                tcg_opt_gen_movi(s, op, args, rl, (uint32_t)a);
-                tcg_opt_gen_movi(s, op2, args2, rh, (uint32_t)(a >> 32));
+                tcg_opt_gen_movi(s, op, args, rl, (int32_t)a);
+                tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(a >> 32));
 
                 /* We've done all we need to do with the movi.  Skip it.  */
                 oi_next = op2->next;
@@ -1138,8 +1144,7 @@ void tcg_optimize(TCGContext *s)
             goto do_default;
 
         case INDEX_op_mulu2_i32:
-            if (temps[args[2]].state == TCG_TEMP_CONST
-                && temps[args[3]].state == TCG_TEMP_CONST) {
+            if (temp_is_const(args[2]) && temp_is_const(args[3])) {
                 uint32_t a = temps[args[2]].val;
                 uint32_t b = temps[args[3]].val;
                 uint64_t r = (uint64_t)a * b;
@@ -1149,8 +1154,8 @@ void tcg_optimize(TCGContext *s)
 
                 rl = args[0];
                 rh = args[1];
-                tcg_opt_gen_movi(s, op, args, rl, (uint32_t)r);
-                tcg_opt_gen_movi(s, op2, args2, rh, (uint32_t)(r >> 32));
+                tcg_opt_gen_movi(s, op, args, rl, (int32_t)r);
+                tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(r >> 32));
 
                 /* We've done all we need to do with the movi.  Skip it.  */
                 oi_next = op2->next;
@@ -1171,10 +1176,8 @@ void tcg_optimize(TCGContext *s)
                     tcg_op_remove(s, op);
                 }
             } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
-                       && temps[args[2]].state == TCG_TEMP_CONST
-                       && temps[args[3]].state == TCG_TEMP_CONST
-                       && temps[args[2]].val == 0
-                       && temps[args[3]].val == 0) {
+                       && temp_is_const(args[2]) && temps[args[2]].val == 0
+                       && temp_is_const(args[3]) && temps[args[3]].val == 0) {
                 /* Simplify LT/GE comparisons vs zero to a single compare
                    vs the high word of the input.  */
             do_brcond_high:
@@ -1236,10 +1239,8 @@ void tcg_optimize(TCGContext *s)
             do_setcond_const:
                 tcg_opt_gen_movi(s, op, args, args[0], tmp);
             } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
-                       && temps[args[3]].state == TCG_TEMP_CONST
-                       && temps[args[4]].state == TCG_TEMP_CONST
-                       && temps[args[3]].val == 0
-                       && temps[args[4]].val == 0) {
+                       && temp_is_const(args[3]) && temps[args[3]].val == 0
+                       && temp_is_const(args[4]) && temps[args[4]].val == 0) {
                 /* Simplify LT/GE comparisons vs zero to a single compare
                    vs the high word of the input.  */
             do_setcond_high:
@@ -1299,7 +1300,9 @@ void tcg_optimize(TCGContext *s)
             if (!(args[nb_oargs + nb_iargs + 1]
                   & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
                 for (i = 0; i < nb_globals; i++) {
-                    reset_temp(i);
+                    if (test_bit(i, temps_used.l)) {
+                        reset_temp(i);
+                    }
                 }
             }
             goto do_reset_output;
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 2b6eafa03c..92ef719e40 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -80,19 +80,13 @@
 
 static tcg_insn_unit *tb_ret_addr;
 
-#ifndef GUEST_BASE
-#define GUEST_BASE 0
-#endif
-
 #include "elf.h"
 static bool have_isa_2_06;
 #define HAVE_ISA_2_06  have_isa_2_06
 #define HAVE_ISEL      have_isa_2_06
 
-#ifdef CONFIG_USE_GUEST_BASE
+#ifndef CONFIG_SOFTMMU
 #define TCG_GUEST_BASE_REG 30
-#else
-#define TCG_GUEST_BASE_REG 0
 #endif
 
 #ifndef NDEBUG
@@ -1361,7 +1355,7 @@ static void * const qemu_st_helpers[16] = {
    in CR7, loads the addend of the TLB into R3, and returns the register
    containing the guest address (zero-extended into R4).  Clobbers R0 and R2. */
 
-static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits,
+static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp opc,
                                TCGReg addrlo, TCGReg addrhi,
                                int mem_index, bool is_read)
 {
@@ -1371,6 +1365,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits,
            : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
     int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
     TCGReg base = TCG_AREG0;
+    TCGMemOp s_bits = opc & MO_SIZE;
 
     /* Extract the page index, shifted into place for tlb index.  */
     if (TCG_TARGET_REG_BITS == 64) {
@@ -1422,17 +1417,37 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits,
        to minimize any load use delay.  */
     tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3, add_off);
 
-    /* Clear the non-page, non-alignment bits from the address.  */
+    /* Clear the non-page, non-alignment bits from the address */
     if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) {
+        /* We don't support unaligned accesses on 32-bits, preserve
+         * the bottom bits and thus trigger a comparison failure on
+         * unaligned accesses
+         */
         tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
                     (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS);
-    } else if (!s_bits) {
-        tcg_out_rld(s, RLDICR, TCG_REG_R0, addrlo,
-                    0, 63 - TARGET_PAGE_BITS);
+    } else if (s_bits) {
+        /* > byte access, we need to handle alignment */
+        if ((opc & MO_AMASK) == MO_ALIGN) {
+            /* Alignment required by the front-end, same as 32-bits */
+            tcg_out_rld(s, RLDICL, TCG_REG_R0, addrlo,
+                        64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - s_bits);
+            tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
+       } else {
+           /* We support unaligned accesses, we need to make sure we fail
+            * if we cross a page boundary. The trick is to add the
+            * access_size-1 to the address before masking the low bits.
+            * That will make the address overflow to the next page if we
+            * cross a page boundary which will then force a mismatch of
+            * the TLB compare since the next page cannot possibly be in
+            * the same TLB index.
+            */
+            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, (1 << s_bits) - 1));
+            tcg_out_rld(s, RLDICR, TCG_REG_R0, TCG_REG_R0,
+                        0, 63 - TARGET_PAGE_BITS);
+        }
     } else {
-        tcg_out_rld(s, RLDICL, TCG_REG_R0, addrlo,
-                    64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - s_bits);
-        tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
+        /* Byte access, just chop off the bits below the page index */
+        tcg_out_rld(s, RLDICR, TCG_REG_R0, addrlo, 0, 63 - TARGET_PAGE_BITS);
     }
 
     if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
@@ -1592,7 +1607,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
 
 #ifdef CONFIG_SOFTMMU
     mem_index = get_mmuidx(oi);
-    addrlo = tcg_out_tlb_read(s, s_bits, addrlo, addrhi, mem_index, true);
+    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, true);
 
     /* Load a pointer into the current opcode w/conditional branch-link. */
     label_ptr = s->code_ptr;
@@ -1600,7 +1615,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
 
     rbase = TCG_REG_R3;
 #else  /* !CONFIG_SOFTMMU */
-    rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0;
+    rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
     if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
         tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
         addrlo = TCG_REG_TMP1;
@@ -1667,7 +1682,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
 
 #ifdef CONFIG_SOFTMMU
     mem_index = get_mmuidx(oi);
-    addrlo = tcg_out_tlb_read(s, s_bits, addrlo, addrhi, mem_index, false);
+    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, false);
 
     /* Load a pointer into the current opcode w/conditional branch-link. */
     label_ptr = s->code_ptr;
@@ -1675,7 +1690,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
 
     rbase = TCG_REG_R3;
 #else  /* !CONFIG_SOFTMMU */
-    rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0;
+    rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
     if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
         tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
         addrlo = TCG_REG_TMP1;
@@ -1779,9 +1794,9 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     }
     tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
 
-#ifdef CONFIG_USE_GUEST_BASE
-    if (GUEST_BASE) {
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE);
+#ifndef CONFIG_SOFTMMU
+    if (guest_base) {
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
         tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
     }
 #endif
@@ -2200,12 +2215,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
     case INDEX_op_ext16s_i64:
         c = EXTSH;
         goto gen_ext;
+    case INDEX_op_ext_i32_i64:
     case INDEX_op_ext32s_i64:
         c = EXTSW;
         goto gen_ext;
     gen_ext:
         tcg_out32(s, c | RS(args[1]) | RA(args[0]));
         break;
+    case INDEX_op_extu_i32_i64:
+        tcg_out_ext32u(s, args[0], args[1]);
+        break;
 
     case INDEX_op_setcond_i32:
         tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
@@ -2482,6 +2501,8 @@ static const TCGTargetOpDef ppc_op_defs[] = {
     { INDEX_op_ext8s_i64, { "r", "r" } },
     { INDEX_op_ext16s_i64, { "r", "r" } },
     { INDEX_op_ext32s_i64, { "r", "r" } },
+    { INDEX_op_ext_i32_i64, { "r", "r" } },
+    { INDEX_op_extu_i32_i64, { "r", "r" } },
     { INDEX_op_bswap16_i64, { "r", "r" } },
     { INDEX_op_bswap32_i64, { "r", "r" } },
     { INDEX_op_bswap64_i64, { "r", "r" } },
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 7ce7048824..b4f0818762 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -77,7 +77,8 @@ typedef enum {
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_add2_i32         0
 #define TCG_TARGET_HAS_sub2_i32         0
-#define TCG_TARGET_HAS_trunc_shr_i32    0
+#define TCG_TARGET_HAS_extrl_i64_i32    0
+#define TCG_TARGET_HAS_extrh_i64_i32    0
 #define TCG_TARGET_HAS_div_i64          1
 #define TCG_TARGET_HAS_rem_i64          0
 #define TCG_TARGET_HAS_rot_i64          1
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index aa718eca0c..fbf97bb2e1 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -51,17 +51,10 @@
 /* A scratch register that may be be used throughout the backend.  */
 #define TCG_TMP0        TCG_REG_R14
 
-#ifdef CONFIG_USE_GUEST_BASE
+#ifndef CONFIG_SOFTMMU
 #define TCG_GUEST_BASE_REG TCG_REG_R13
-#else
-#define TCG_GUEST_BASE_REG TCG_REG_R0
-#endif
-
-#ifndef GUEST_BASE
-#define GUEST_BASE 0
 #endif
 
-
 /* All of the following instructions are prefixed with their instruction
    format, and are defined as 8- or 16-bit quantities, even when the two
    halves of the 16-bit quantity may appear 32 bits apart in the insn.
@@ -1504,20 +1497,36 @@ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
 static TCGReg tcg_out_tlb_read(TCGContext* s, TCGReg addr_reg, TCGMemOp opc,
                                int mem_index, bool is_ld)
 {
-    TCGMemOp s_bits = opc & MO_SIZE;
-    uint64_t tlb_mask = TARGET_PAGE_MASK | ((1 << s_bits) - 1);
-    int ofs;
+    int s_mask = (1 << (opc & MO_SIZE)) - 1;
+    int ofs, a_off;
+    uint64_t tlb_mask;
+
+    /* For aligned accesses, we check the first byte and include the alignment
+       bits within the address.  For unaligned access, we check that we don't
+       cross pages using the address of the last byte of the access.  */
+    if ((opc & MO_AMASK) == MO_ALIGN || s_mask == 0) {
+        a_off = 0;
+        tlb_mask = TARGET_PAGE_MASK | s_mask;
+    } else {
+        a_off = s_mask;
+        tlb_mask = TARGET_PAGE_MASK;
+    }
 
     if (facilities & FACILITY_GEN_INST_EXT) {
         tcg_out_risbg(s, TCG_REG_R2, addr_reg,
                       64 - CPU_TLB_BITS - CPU_TLB_ENTRY_BITS,
                       63 - CPU_TLB_ENTRY_BITS,
                       64 + CPU_TLB_ENTRY_BITS - TARGET_PAGE_BITS, 1);
-        tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
+        if (a_off) {
+            tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
+            tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
+        } else {
+            tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
+        }
     } else {
         tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
                      TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
-        tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_R3, addr_reg);
+        tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
         tgen_andi(s, TCG_TYPE_I64, TCG_REG_R2,
                   (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
         tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
@@ -1622,9 +1631,9 @@ static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg,
         tgen_ext32u(s, TCG_TMP0, *addr_reg);
         *addr_reg = TCG_TMP0;
     }
-    if (GUEST_BASE < 0x80000) {
+    if (guest_base < 0x80000) {
         *index_reg = TCG_REG_NONE;
-        *disp = GUEST_BASE;
+        *disp = guest_base;
     } else {
         *index_reg = TCG_GUEST_BASE_REG;
         *disp = 0;
@@ -2090,6 +2099,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_ext16s_i64:
         tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]);
         break;
+    case INDEX_op_ext_i32_i64:
     case INDEX_op_ext32s_i64:
         tgen_ext32s(s, args[0], args[1]);
         break;
@@ -2099,6 +2109,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_ext16u_i64:
         tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]);
         break;
+    case INDEX_op_extu_i32_i64:
     case INDEX_op_ext32u_i64:
         tgen_ext32u(s, args[0], args[1]);
         break;
@@ -2251,6 +2262,9 @@ static const TCGTargetOpDef s390_op_defs[] = {
     { INDEX_op_ext32s_i64, { "r", "r" } },
     { INDEX_op_ext32u_i64, { "r", "r" } },
 
+    { INDEX_op_ext_i32_i64, { "r", "r" } },
+    { INDEX_op_extu_i32_i64, { "r", "r" } },
+
     { INDEX_op_bswap16_i64, { "r", "r" } },
     { INDEX_op_bswap32_i64, { "r", "r" } },
     { INDEX_op_bswap64_i64, { "r", "r" } },
@@ -2328,10 +2342,12 @@ static void tcg_target_qemu_prologue(TCGContext *s)
                   TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
                   CPU_TEMP_BUF_NLONGS * sizeof(long));
 
-    if (GUEST_BASE >= 0x80000) {
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE);
+#ifndef CONFIG_SOFTMMU
+    if (guest_base >= 0x80000) {
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
         tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
     }
+#endif
 
     tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
     /* br %r3 (go to TB) */
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index 91576d5949..d9dc038733 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -72,7 +72,8 @@ typedef enum TCGReg {
 #define TCG_TARGET_HAS_muls2_i32        0
 #define TCG_TARGET_HAS_muluh_i32        0
 #define TCG_TARGET_HAS_mulsh_i32        0
-#define TCG_TARGET_HAS_trunc_shr_i32    0
+#define TCG_TARGET_HAS_extrl_i64_i32    0
+#define TCG_TARGET_HAS_extrh_i64_i32    0
 
 #define TCG_TARGET_HAS_div2_i64         1
 #define TCG_TARGET_HAS_rot_i64          1
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 1a870a81d7..54df1bc424 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -83,10 +83,8 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 #define TCG_REG_T1  TCG_REG_G1
 #define TCG_REG_T2  TCG_REG_O7
 
-#ifdef CONFIG_USE_GUEST_BASE
+#ifndef CONFIG_SOFTMMU
 # define TCG_GUEST_BASE_REG TCG_REG_I5
-#else
-# define TCG_GUEST_BASE_REG TCG_REG_G0
 #endif
 
 static const int tcg_target_reg_alloc_order[] = {
@@ -955,9 +953,9 @@ static void tcg_target_qemu_prologue(TCGContext *s)
     tcg_out32(s, SAVE | INSN_RD(TCG_REG_O6) | INSN_RS1(TCG_REG_O6) |
               INSN_IMM13(-frame_size));
 
-#ifdef CONFIG_USE_GUEST_BASE
-    if (GUEST_BASE != 0) {
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE);
+#ifndef CONFIG_SOFTMMU
+    if (guest_base != 0) {
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
         tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
     }
 #endif
@@ -1146,7 +1144,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
         addr = TCG_REG_T1;
     }
     tcg_out_ldst_rr(s, data, addr,
-                    (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
+                    (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0),
                     qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
 #endif /* CONFIG_SOFTMMU */
 }
@@ -1201,7 +1199,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
         addr = TCG_REG_T1;
     }
     tcg_out_ldst_rr(s, data, addr,
-                    (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
+                    (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0),
                     qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
 #endif /* CONFIG_SOFTMMU */
 }
@@ -1407,18 +1405,19 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_divu_i64:
         c = ARITH_UDIVX;
         goto gen_arith;
+    case INDEX_op_ext_i32_i64:
     case INDEX_op_ext32s_i64:
         tcg_out_arithi(s, a0, a1, 0, SHIFT_SRA);
         break;
+    case INDEX_op_extu_i32_i64:
     case INDEX_op_ext32u_i64:
         tcg_out_arithi(s, a0, a1, 0, SHIFT_SRL);
         break;
-    case INDEX_op_trunc_shr_i32:
-        if (a2 == 0) {
-            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
-        } else {
-            tcg_out_arithi(s, a0, a1, a2, SHIFT_SRLX);
-        }
+    case INDEX_op_extrl_i64_i32:
+        tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
+        break;
+    case INDEX_op_extrh_i64_i32:
+        tcg_out_arithi(s, a0, a1, 32, SHIFT_SRLX);
         break;
 
     case INDEX_op_brcond_i64:
@@ -1531,9 +1530,12 @@ static const TCGTargetOpDef sparc_op_defs[] = {
     { INDEX_op_neg_i64, { "R", "RJ" } },
     { INDEX_op_not_i64, { "R", "RJ" } },
 
-    { INDEX_op_ext32s_i64, { "R", "r" } },
-    { INDEX_op_ext32u_i64, { "R", "r" } },
-    { INDEX_op_trunc_shr_i32,  { "r", "R" } },
+    { INDEX_op_ext32s_i64, { "R", "R" } },
+    { INDEX_op_ext32u_i64, { "R", "R" } },
+    { INDEX_op_ext_i32_i64, { "R", "r" } },
+    { INDEX_op_extu_i32_i64, { "R", "r" } },
+    { INDEX_op_extrl_i64_i32,  { "r", "R" } },
+    { INDEX_op_extrh_i64_i32,  { "r", "R" } },
 
     { INDEX_op_brcond_i64, { "RZ", "RJ" } },
     { INDEX_op_setcond_i64, { "R", "RZ", "RJ" } },
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index f584de4766..2cd72d2d41 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -118,7 +118,8 @@ extern bool use_vis3_instructions;
 #define TCG_TARGET_HAS_muluh_i32        0
 #define TCG_TARGET_HAS_mulsh_i32        0
 
-#define TCG_TARGET_HAS_trunc_shr_i32    1
+#define TCG_TARGET_HAS_extrl_i64_i32    1
+#define TCG_TARGET_HAS_extrh_i64_i32    1
 #define TCG_TARGET_HAS_div_i64          1
 #define TCG_TARGET_HAS_rem_i64          0
 #define TCG_TARGET_HAS_rot_i64          0
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 45098c310e..0b9dd8ff9f 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -1737,28 +1737,28 @@ void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
 
 /* Size changing operations.  */
 
-void tcg_gen_trunc_shr_i64_i32(TCGv_i32 ret, TCGv_i64 arg, unsigned count)
+void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
 {
-    tcg_debug_assert(count < 64);
     if (TCG_TARGET_REG_BITS == 32) {
-        if (count >= 32) {
-            tcg_gen_shri_i32(ret, TCGV_HIGH(arg), count - 32);
-        } else if (count == 0) {
-            tcg_gen_mov_i32(ret, TCGV_LOW(arg));
-        } else {
-            TCGv_i64 t = tcg_temp_new_i64();
-            tcg_gen_shri_i64(t, arg, count);
-            tcg_gen_mov_i32(ret, TCGV_LOW(t));
-            tcg_temp_free_i64(t);
-        }
-    } else if (TCG_TARGET_HAS_trunc_shr_i32) {
-        tcg_gen_op3i_i32(INDEX_op_trunc_shr_i32, ret,
-                         MAKE_TCGV_I32(GET_TCGV_I64(arg)), count);
-    } else if (count == 0) {
+        tcg_gen_mov_i32(ret, TCGV_LOW(arg));
+    } else if (TCG_TARGET_HAS_extrl_i64_i32) {
+        tcg_gen_op2(&tcg_ctx, INDEX_op_extrl_i64_i32,
+                    GET_TCGV_I32(ret), GET_TCGV_I64(arg));
+    } else {
         tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(arg)));
+    }
+}
+
+void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_gen_mov_i32(ret, TCGV_HIGH(arg));
+    } else if (TCG_TARGET_HAS_extrh_i64_i32) {
+        tcg_gen_op2(&tcg_ctx, INDEX_op_extrh_i64_i32,
+                    GET_TCGV_I32(ret), GET_TCGV_I64(arg));
     } else {
         TCGv_i64 t = tcg_temp_new_i64();
-        tcg_gen_shri_i64(t, arg, count);
+        tcg_gen_shri_i64(t, arg, 32);
         tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(t)));
         tcg_temp_free_i64(t);
     }
@@ -1770,9 +1770,8 @@ void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
         tcg_gen_mov_i32(TCGV_LOW(ret), arg);
         tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
     } else {
-        /* Note: we assume the target supports move between
-           32 and 64 bit registers.  */
-        tcg_gen_ext32u_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg)));
+        tcg_gen_op2(&tcg_ctx, INDEX_op_extu_i32_i64,
+                    GET_TCGV_I64(ret), GET_TCGV_I32(arg));
     }
 }
 
@@ -1782,9 +1781,8 @@ void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg)
         tcg_gen_mov_i32(TCGV_LOW(ret), arg);
         tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
     } else {
-        /* Note: we assume the target supports move between
-           32 and 64 bit registers.  */
-        tcg_gen_ext32s_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg)));
+        tcg_gen_op2(&tcg_ctx, INDEX_op_ext_i32_i64,
+                    GET_TCGV_I64(ret), GET_TCGV_I32(arg));
     }
 }
 
@@ -1820,8 +1818,8 @@ void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg)
         tcg_gen_mov_i32(lo, TCGV_LOW(arg));
         tcg_gen_mov_i32(hi, TCGV_HIGH(arg));
     } else {
-        tcg_gen_trunc_shr_i64_i32(lo, arg, 0);
-        tcg_gen_trunc_shr_i64_i32(hi, arg, 32);
+        tcg_gen_extrl_i64_i32(lo, arg);
+        tcg_gen_extrh_i64_i32(hi, arg);
     }
 }
 
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index d1d763f6ff..6da083a1e9 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -684,7 +684,8 @@ static inline void tcg_gen_neg_i64(TCGv_i64 ret, TCGv_i64 arg)
 void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg);
 void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg);
 void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high);
-void tcg_gen_trunc_shr_i64_i32(TCGv_i32 ret, TCGv_i64 arg, unsigned int c);
+void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg);
+void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg);
 void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg);
 void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg);
 
@@ -693,11 +694,6 @@ static inline void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi)
     tcg_gen_deposit_i64(ret, lo, hi, 32, 32);
 }
 
-static inline void tcg_gen_trunc_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
-{
-    tcg_gen_trunc_shr_i64_i32(ret, arg, 0);
-}
-
 /* QEMU specific operations.  */
 
 #ifndef TARGET_LONG_BITS
@@ -853,7 +849,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
 #define tcg_gen_divu_tl tcg_gen_divu_i64
 #define tcg_gen_remu_tl tcg_gen_remu_i64
 #define tcg_gen_discard_tl tcg_gen_discard_i64
-#define tcg_gen_trunc_tl_i32 tcg_gen_trunc_i64_i32
+#define tcg_gen_trunc_tl_i32 tcg_gen_extrl_i64_i32
 #define tcg_gen_trunc_i64_tl tcg_gen_mov_i64
 #define tcg_gen_extu_i32_tl tcg_gen_extu_i32_i64
 #define tcg_gen_ext_i32_tl tcg_gen_ext_i32_i64
@@ -932,7 +928,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
 #define tcg_gen_remu_tl tcg_gen_remu_i32
 #define tcg_gen_discard_tl tcg_gen_discard_i32
 #define tcg_gen_trunc_tl_i32 tcg_gen_mov_i32
-#define tcg_gen_trunc_i64_tl tcg_gen_trunc_i64_i32
+#define tcg_gen_trunc_i64_tl tcg_gen_extrl_i64_i32
 #define tcg_gen_extu_i32_tl tcg_gen_mov_i32
 #define tcg_gen_ext_i32_tl tcg_gen_mov_i32
 #define tcg_gen_extu_tl_i64 tcg_gen_extu_i32_i64
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 13ccb60a5d..02bbf30387 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -138,8 +138,14 @@ DEF(rotl_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64))
 DEF(rotr_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64))
 DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64))
 
-DEF(trunc_shr_i32, 1, 1, 1,
-    IMPL(TCG_TARGET_HAS_trunc_shr_i32)
+/* size changing ops */
+DEF(ext_i32_i64, 1, 1, 0, IMPL64)
+DEF(extu_i32_i64, 1, 1, 0, IMPL64)
+DEF(extrl_i64_i32, 1, 1, 0,
+    IMPL(TCG_TARGET_HAS_extrl_i64_i32)
+    | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0))
+DEF(extrh_i64_i32, 1, 1, 0,
+    IMPL(TCG_TARGET_HAS_extrh_i64_i32)
     | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0))
 
 DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | IMPL64)
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 231a781524..f437824ba9 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -66,7 +66,8 @@ typedef uint64_t TCGRegSet;
 
 #if TCG_TARGET_REG_BITS == 32
 /* Turn some undef macros into false macros.  */
-#define TCG_TARGET_HAS_trunc_shr_i32    0
+#define TCG_TARGET_HAS_extrl_i64_i32    0
+#define TCG_TARGET_HAS_extrh_i64_i32    0
 #define TCG_TARGET_HAS_div_i64          0
 #define TCG_TARGET_HAS_rem_i64          0
 #define TCG_TARGET_HAS_div2_i64         0
diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c
index 83472dbcd8..bbb54d4e8c 100644
--- a/tcg/tci/tcg-target.c
+++ b/tcg/tci/tcg-target.c
@@ -210,6 +210,8 @@ static const TCGTargetOpDef tcg_target_op_defs[] = {
 #if TCG_TARGET_HAS_ext32u_i64
     { INDEX_op_ext32u_i64, { R, R } },
 #endif
+    { INDEX_op_ext_i32_i64, { R, R } },
+    { INDEX_op_extu_i32_i64, { R, R } },
 #if TCG_TARGET_HAS_bswap16_i64
     { INDEX_op_bswap16_i64, { R, R } },
 #endif
@@ -701,6 +703,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
     case INDEX_op_ext16u_i64:   /* Optional (TCG_TARGET_HAS_ext16u_i64). */
     case INDEX_op_ext32s_i64:   /* Optional (TCG_TARGET_HAS_ext32s_i64). */
     case INDEX_op_ext32u_i64:   /* Optional (TCG_TARGET_HAS_ext32u_i64). */
+    case INDEX_op_ext_i32_i64:
+    case INDEX_op_extu_i32_i64:
 #endif /* TCG_TARGET_REG_BITS == 64 */
     case INDEX_op_neg_i32:      /* Optional (TCG_TARGET_HAS_neg_i32). */
     case INDEX_op_not_i32:      /* Optional (TCG_TARGET_HAS_not_i32). */
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index cbf3f9b5a6..77e5952781 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -84,7 +84,8 @@
 #define TCG_TARGET_HAS_mulsh_i32        0
 
 #if TCG_TARGET_REG_BITS == 64
-#define TCG_TARGET_HAS_trunc_shr_i32    0
+#define TCG_TARGET_HAS_extrl_i64_i32    0
+#define TCG_TARGET_HAS_extrh_i64_i32    0
 #define TCG_TARGET_HAS_bswap16_i64      1
 #define TCG_TARGET_HAS_bswap32_i64      1
 #define TCG_TARGET_HAS_bswap64_i64      1