summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--tcg/s390/tcg-target.c543
-rw-r--r--tcg/s390/tcg-target.h26
2 files changed, 315 insertions, 254 deletions
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index e12a15221a..7d6f777081 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -24,16 +24,18 @@
  * THE SOFTWARE.
  */
 
+/* We only support generating code for 64-bit mode.  */
+#if TCG_TARGET_REG_BITS != 64
+#error "unsupported code generation mode"
+#endif
+
 /* ??? The translation blocks produced by TCG are generally small enough to
    be entirely reachable with a 16-bit displacement.  Leaving the option for
    a 32-bit displacement here Just In Case.  */
 #define USE_LONG_BRANCHES 0
 
 #define TCG_CT_CONST_32    0x0100
-#define TCG_CT_CONST_NEG   0x0200
-#define TCG_CT_CONST_ADDI  0x0400
 #define TCG_CT_CONST_MULI  0x0800
-#define TCG_CT_CONST_ANDI  0x1000
 #define TCG_CT_CONST_ORI   0x2000
 #define TCG_CT_CONST_XORI  0x4000
 #define TCG_CT_CONST_CMPI  0x8000
@@ -63,6 +65,7 @@
 typedef enum S390Opcode {
     RIL_AFI     = 0xc209,
     RIL_AGFI    = 0xc208,
+    RIL_ALFI    = 0xc20b,
     RIL_ALGFI   = 0xc20a,
     RIL_BRASL   = 0xc005,
     RIL_BRCL    = 0xc004,
@@ -84,6 +87,8 @@ typedef enum S390Opcode {
     RIL_NILF    = 0xc00b,
     RIL_OIHF    = 0xc00c,
     RIL_OILF    = 0xc00d,
+    RIL_SLFI    = 0xc205,
+    RIL_SLGFI   = 0xc204,
     RIL_XIHF    = 0xc006,
     RIL_XILF    = 0xc007,
 
@@ -118,8 +123,12 @@ typedef enum S390Opcode {
     RIE_CLGIJ   = 0xec7d,
     RIE_CLRJ    = 0xec77,
     RIE_CRJ     = 0xec76,
+    RIE_RISBG   = 0xec55,
 
     RRE_AGR     = 0xb908,
+    RRE_ALGR    = 0xb90a,
+    RRE_ALCR    = 0xb998,
+    RRE_ALCGR   = 0xb988,
     RRE_CGR     = 0xb920,
     RRE_CLGR    = 0xb921,
     RRE_DLGR    = 0xb987,
@@ -137,14 +146,22 @@ typedef enum S390Opcode {
     RRE_LRVR    = 0xb91f,
     RRE_LRVGR   = 0xb90f,
     RRE_LTGR    = 0xb902,
+    RRE_MLGR    = 0xb986,
     RRE_MSGR    = 0xb90c,
     RRE_MSR     = 0xb252,
     RRE_NGR     = 0xb980,
     RRE_OGR     = 0xb981,
     RRE_SGR     = 0xb909,
+    RRE_SLGR    = 0xb90b,
+    RRE_SLBR    = 0xb999,
+    RRE_SLBGR   = 0xb989,
     RRE_XGR     = 0xb982,
 
+    RRF_LOCR    = 0xb9f2,
+    RRF_LOCGR   = 0xb9e2,
+
     RR_AR       = 0x1a,
+    RR_ALR      = 0x1e,
     RR_BASR     = 0x0d,
     RR_BCR      = 0x07,
     RR_CLR      = 0x15,
@@ -156,6 +173,7 @@ typedef enum S390Opcode {
     RR_NR       = 0x14,
     RR_OR       = 0x16,
     RR_SR       = 0x1b,
+    RR_SLR      = 0x1f,
     RR_XR       = 0x17,
 
     RSY_RLL     = 0xeb1d,
@@ -172,6 +190,7 @@ typedef enum S390Opcode {
     RXY_AY      = 0xe35a,
     RXY_CG      = 0xe320,
     RXY_CY      = 0xe359,
+    RXY_LAY     = 0xe371,
     RXY_LB      = 0xe376,
     RXY_LG      = 0xe304,
     RXY_LGB     = 0xe377,
@@ -198,6 +217,7 @@ typedef enum S390Opcode {
     RX_A        = 0x5a,
     RX_C        = 0x59,
     RX_L        = 0x58,
+    RX_LA       = 0x41,
     RX_LH       = 0x48,
     RX_ST       = 0x50,
     RX_STC      = 0x42,
@@ -252,9 +272,6 @@ static const int tcg_target_call_iarg_regs[] = {
 
 static const int tcg_target_call_oarg_regs[] = {
     TCG_REG_R2,
-#if TCG_TARGET_REG_BITS == 32
-    TCG_REG_R3
-#endif
 };
 
 #define S390_CC_EQ      8
@@ -329,6 +346,7 @@ static uint8_t *tb_ret_addr;
 #define FACILITY_LONG_DISP	(1ULL << (63 - 18))
 #define FACILITY_EXT_IMM	(1ULL << (63 - 21))
 #define FACILITY_GEN_INST_EXT	(1ULL << (63 - 34))
+#define FACILITY_LOAD_ON_COND   (1ULL << (63 - 45))
 
 static uint64_t facilities;
 
@@ -376,6 +394,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
         tcg_regset_set32(ct->u.regs, 0, 0xffff);
         tcg_regset_reset_reg (ct->u.regs, TCG_REG_R2);
         tcg_regset_reset_reg (ct->u.regs, TCG_REG_R3);
+        tcg_regset_reset_reg (ct->u.regs, TCG_REG_R4);
         break;
     case 'a':                  /* force R2 for division */
         ct->ct |= TCG_CT_REG;
@@ -387,21 +406,12 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
         tcg_regset_clear(ct->u.regs);
         tcg_regset_set_reg(ct->u.regs, TCG_REG_R3);
         break;
-    case 'N':                  /* force immediate negate */
-        ct->ct |= TCG_CT_CONST_NEG;
-        break;
     case 'W':                  /* force 32-bit ("word") immediate */
         ct->ct |= TCG_CT_CONST_32;
         break;
-    case 'I':
-        ct->ct |= TCG_CT_CONST_ADDI;
-        break;
     case 'K':
         ct->ct |= TCG_CT_CONST_MULI;
         break;
-    case 'A':
-        ct->ct |= TCG_CT_CONST_ANDI;
-        break;
     case 'O':
         ct->ct |= TCG_CT_CONST_ORI;
         break;
@@ -420,63 +430,6 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
     return 0;
 }
 
-/* Immediates to be used with logical AND.  This is an optimization only,
-   since a full 64-bit immediate AND can always be performed with 4 sequential
-   NI[LH][LH] instructions.  What we're looking for is immediates that we
-   can load efficiently, and the immediate load plus the reg-reg AND is
-   smaller than the sequential NI's.  */
-
-static int tcg_match_andi(int ct, tcg_target_ulong val)
-{
-    int i;
-
-    if (facilities & FACILITY_EXT_IMM) {
-        if (ct & TCG_CT_CONST_32) {
-            /* All 32-bit ANDs can be performed with 1 48-bit insn.  */
-            return 1;
-        }
-
-        /* Zero-extensions.  */
-        if (val == 0xff || val == 0xffff || val == 0xffffffff) {
-            return 1;
-        }
-    } else {
-        if (ct & TCG_CT_CONST_32) {
-            val = (uint32_t)val;
-        } else if (val == 0xffffffff) {
-            return 1;
-        }
-    }
-
-    /* Try all 32-bit insns that can perform it in one go.  */
-    for (i = 0; i < 4; i++) {
-        tcg_target_ulong mask = ~(0xffffull << i*16);
-        if ((val & mask) == mask) {
-            return 1;
-        }
-    }
-
-    /* Look for 16-bit values performing the mask.  These are better
-       to load with LLI[LH][LH].  */
-    for (i = 0; i < 4; i++) {
-        tcg_target_ulong mask = 0xffffull << i*16;
-        if ((val & mask) == val) {
-            return 0;
-        }
-    }
-
-    /* Look for 32-bit values performing the 64-bit mask.  These
-       are better to load with LLI[LH]F, or if extended immediates
-       not available, with a pair of LLI insns.  */
-    if ((ct & TCG_CT_CONST_32) == 0) {
-        if (val <= 0xffffffff || (val & 0xffffffff) == 0) {
-            return 0;
-        }
-    }
-
-    return 1;
-}
-
 /* Immediates to be used with logical OR.  This is an optimization only,
    since a full 64-bit immediate OR can always be performed with 4 sequential
    OI[LH][LH] instructions.  What we're looking for is immediates that we
@@ -571,25 +524,12 @@ static int tcg_target_const_match(tcg_target_long val,
     }
 
     /* Handle the modifiers.  */
-    if (ct & TCG_CT_CONST_NEG) {
-        val = -val;
-    }
     if (ct & TCG_CT_CONST_32) {
         val = (int32_t)val;
     }
 
     /* The following are mutually exclusive.  */
-    if (ct & TCG_CT_CONST_ADDI) {
-        /* Immediates that may be used with add.  If we have the
-           extended-immediates facility then we have ADD IMMEDIATE
-           with signed and unsigned 32-bit, otherwise we have only
-           ADD HALFWORD IMMEDIATE with a signed 16-bit.  */
-        if (facilities & FACILITY_EXT_IMM) {
-            return val == (int32_t)val || val == (uint32_t)val;
-        } else {
-            return val == (int16_t)val;
-        }
-    } else if (ct & TCG_CT_CONST_MULI) {
+    if (ct & TCG_CT_CONST_MULI) {
         /* Immediates that may be used with multiply.  If we have the
            general-instruction-extensions, then we have MULTIPLY SINGLE
            IMMEDIATE with a signed 32-bit, otherwise we have only
@@ -599,8 +539,6 @@ static int tcg_target_const_match(tcg_target_long val,
         } else {
             return val == (int16_t)val;
         }
-    } else if (ct & TCG_CT_CONST_ANDI) {
-        return tcg_match_andi(ct, val);
     } else if (ct & TCG_CT_CONST_ORI) {
         return tcg_match_ori(ct, val);
     } else if (ct & TCG_CT_CONST_XORI) {
@@ -625,6 +563,12 @@ static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op,
     tcg_out32(s, (op << 16) | (r1 << 4) | r2);
 }
 
+static void tcg_out_insn_RRF(TCGContext *s, S390Opcode op,
+                             TCGReg r1, TCGReg r2, int m3)
+{
+    tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2);
+}
+
 static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
 {
     tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff));
@@ -770,7 +714,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type,
     /* If we get here, both the high and low parts have non-zero bits.  */
 
     /* Recurse to load the lower 32-bits.  */
-    tcg_out_movi(s, TCG_TYPE_I32, ret, sval);
+    tcg_out_movi(s, TCG_TYPE_I64, ret, uval & 0xffffffff);
 
     /* Insert data into the high 32-bits.  */
     uval = uval >> 31 >> 1;
@@ -804,10 +748,11 @@ static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy,
                         tcg_target_long ofs)
 {
     if (ofs < -0x80000 || ofs >= 0x80000) {
-        /* Combine the low 16 bits of the offset with the actual load insn;
-           the high 48 bits must come from an immediate load.  */
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs & ~0xffff);
-        ofs &= 0xffff;
+        /* Combine the low 20 bits of the offset with the actual load insn;
+           the high 44 bits must come from an immediate load.  */
+        tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000;
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low);
+        ofs = low;
 
         /* If we were already given an index register, add it in.  */
         if (index != TCG_REG_NONE) {
@@ -866,6 +811,15 @@ static void tcg_out_ld_abs(TCGContext *s, TCGType type, TCGReg dest, void *abs)
     tcg_out_ld(s, type, dest, dest, addr & 0xffff);
 }
 
+static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
+                                 int msb, int lsb, int ofs, int z)
+{
+    /* Format RIE-f */
+    tcg_out16(s, (RIE_RISBG & 0xff00) | (dest << 4) | src);
+    tcg_out16(s, (msb << 8) | (z << 7) | lsb);
+    tcg_out16(s, (ofs << 8) | (RIE_RISBG & 0xff));
+}
+
 static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
 {
     if (facilities & FACILITY_EXT_IMM) {
@@ -956,30 +910,37 @@ static inline void tgen_ext32u(TCGContext *s, TCGReg dest, TCGReg src)
     tcg_out_insn(s, RRE, LLGFR, dest, src);
 }
 
-static inline void tgen32_addi(TCGContext *s, TCGReg dest, int32_t val)
+/* Accept bit patterns like these:
+    0....01....1
+    1....10....0
+    1..10..01..1
+    0..01..10..0
+   Copied from gcc sources.  */
+static inline bool risbg_mask(uint64_t c)
 {
-    if (val == (int16_t)val) {
-        tcg_out_insn(s, RI, AHI, dest, val);
-    } else {
-        tcg_out_insn(s, RIL, AFI, dest, val);
-    }
-}
-
-static inline void tgen64_addi(TCGContext *s, TCGReg dest, int64_t val)
-{
-    if (val == (int16_t)val) {
-        tcg_out_insn(s, RI, AGHI, dest, val);
-    } else if (val == (int32_t)val) {
-        tcg_out_insn(s, RIL, AGFI, dest, val);
-    } else if (val == (uint32_t)val) {
-        tcg_out_insn(s, RIL, ALGFI, dest, val);
-    } else {
-        tcg_abort();
-    }
-
+    uint64_t lsb;
+    /* We don't change the number of transitions by inverting,
+       so make sure we start with the LSB zero.  */
+    if (c & 1) {
+        c = ~c;
+    }
+    /* Reject all zeros or all ones.  */
+    if (c == 0) {
+        return false;
+    }
+    /* Find the first transition.  */
+    lsb = c & -c;
+    /* Invert to look for a second transition.  */
+    c = ~c;
+    /* Erase the first transition.  */
+    c &= -lsb;
+    /* Find the second transition, if any.  */
+    lsb = c & -c;
+    /* Match if all the bits are 1's, or if c is zero.  */
+    return c == -lsb;
 }
 
-static void tgen64_andi(TCGContext *s, TCGReg dest, tcg_target_ulong val)
+static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
 {
     static const S390Opcode ni_insns[4] = {
         RI_NILL, RI_NILH, RI_NIHL, RI_NIHH
@@ -987,63 +948,64 @@ static void tgen64_andi(TCGContext *s, TCGReg dest, tcg_target_ulong val)
     static const S390Opcode nif_insns[2] = {
         RIL_NILF, RIL_NIHF
     };
-
+    uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull);
     int i;
 
-    /* Look for no-op.  */
-    if (val == -1) {
-        return;
-    }
-
     /* Look for the zero-extensions.  */
-    if (val == 0xffffffff) {
+    if ((val & valid) == 0xffffffff) {
         tgen_ext32u(s, dest, dest);
         return;
     }
-
     if (facilities & FACILITY_EXT_IMM) {
-        if (val == 0xff) {
+        if ((val & valid) == 0xff) {
             tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
             return;
         }
-        if (val == 0xffff) {
+        if ((val & valid) == 0xffff) {
             tgen_ext16u(s, TCG_TYPE_I64, dest, dest);
             return;
         }
+    }
 
-        /* Try all 32-bit insns that can perform it in one go.  */
-        for (i = 0; i < 4; i++) {
-            tcg_target_ulong mask = ~(0xffffull << i*16);
-            if ((val & mask) == mask) {
-                tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
-                return;
-            }
+    /* Try all 32-bit insns that can perform it in one go.  */
+    for (i = 0; i < 4; i++) {
+        tcg_target_ulong mask = ~(0xffffull << i*16);
+        if (((val | ~valid) & mask) == mask) {
+            tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
+            return;
         }
+    }
 
-        /* Try all 48-bit insns that can perform it in one go.  */
-        if (facilities & FACILITY_EXT_IMM) {
-            for (i = 0; i < 2; i++) {
-                tcg_target_ulong mask = ~(0xffffffffull << i*32);
-                if ((val & mask) == mask) {
-                    tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
-                    return;
-                }
+    /* Try all 48-bit insns that can perform it in one go.  */
+    if (facilities & FACILITY_EXT_IMM) {
+        for (i = 0; i < 2; i++) {
+            tcg_target_ulong mask = ~(0xffffffffull << i*32);
+            if (((val | ~valid) & mask) == mask) {
+                tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
+                return;
             }
         }
+    }
+    if ((facilities & FACILITY_GEN_INST_EXT) && risbg_mask(val)) {
+        int msb, lsb;
+        if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
+            /* Achieve wraparound by swapping msb and lsb.  */
+            msb = 63 - ctz64(~val);
+            lsb = clz64(~val) + 1;
+        } else {
+            msb = clz64(val);
+            lsb = 63 - ctz64(val);
+        }
+        tcg_out_risbg(s, dest, dest, msb, lsb, 0, 1);
+        return;
+    }
 
-        /* Perform the AND via sequential modifications to the high and low
-           parts.  Do this via recursion to handle 16-bit vs 32-bit masks in
-           each half.  */
-        tgen64_andi(s, dest, val | 0xffffffff00000000ull);
-        tgen64_andi(s, dest, val | 0x00000000ffffffffull);
+    /* Fall back to loading the constant.  */
+    tcg_out_movi(s, type, TCG_TMP0, val);
+    if (type == TCG_TYPE_I32) {
+        tcg_out_insn(s, RR, NR, dest, TCG_TMP0);
     } else {
-        /* With no extended-immediate facility, just emit the sequence.  */
-        for (i = 0; i < 4; i++) {
-            tcg_target_ulong mask = 0xffffull << i*16;
-            if ((val & mask) != mask) {
-                tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
-            }
-        }
+        tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
     }
 }
 
@@ -1156,9 +1118,9 @@ static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
 }
 
 static void tgen_setcond(TCGContext *s, TCGType type, TCGCond c,
-                         TCGReg dest, TCGReg r1, TCGArg c2, int c2const)
+                         TCGReg dest, TCGReg c1, TCGArg c2, int c2const)
 {
-    int cc = tgen_cmp(s, type, c, r1, c2, c2const);
+    int cc = tgen_cmp(s, type, c, c1, c2, c2const);
 
     /* Emit: r1 = 1; if (cc) goto over; r1 = 0; over:  */
     tcg_out_movi(s, type, dest, 1);
@@ -1166,6 +1128,36 @@ static void tgen_setcond(TCGContext *s, TCGType type, TCGCond c,
     tcg_out_movi(s, type, dest, 0);
 }
 
+static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
+                         TCGReg c1, TCGArg c2, int c2const, TCGReg r3)
+{
+    int cc;
+    if (facilities & FACILITY_LOAD_ON_COND) {
+        cc = tgen_cmp(s, type, c, c1, c2, c2const);
+        tcg_out_insn(s, RRF, LOCGR, dest, r3, cc);
+    } else {
+        c = tcg_invert_cond(c);
+        cc = tgen_cmp(s, type, c, c1, c2, c2const);
+
+        /* Emit: if (cc) goto over; dest = r3; over:  */
+        tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
+        tcg_out_insn(s, RRE, LGR, dest, r3);
+    }
+}
+
+bool tcg_target_deposit_valid(int ofs, int len)
+{
+    return (facilities & FACILITY_GEN_INST_EXT) != 0;
+}
+
+static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
+                         int ofs, int len)
+{
+    int lsb = (63 - ofs);
+    int msb = lsb - (len - 1);
+    tcg_out_risbg(s, dest, src, msb, lsb, ofs, 0);
+}
+
 static void tgen_gotoi(TCGContext *s, int cc, tcg_target_long dest)
 {
     tcg_target_long off = (dest - (tcg_target_long)s->code_ptr) >> 1;
@@ -1239,7 +1231,7 @@ static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
     int cc;
 
     if (facilities & FACILITY_GEN_INST_EXT) {
-        bool is_unsigned = (c > TCG_COND_GT);
+        bool is_unsigned = is_unsigned_cond(c);
         bool in_range;
         S390Opcode opc;
 
@@ -1405,37 +1397,29 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int opc, TCGReg data,
 }
 
 #if defined(CONFIG_SOFTMMU)
-static void tgen64_andi_tmp(TCGContext *s, TCGReg dest, tcg_target_ulong val)
+static TCGReg tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
+                                    TCGReg addr_reg, int mem_index, int opc,
+                                    uint16_t **label2_ptr_p, int is_store)
 {
-    if (tcg_match_andi(0, val)) {
-        tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, val);
-        tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
-    } else {
-        tgen64_andi(s, dest, val);
-    }
-}
-
-static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
-                                  TCGReg addr_reg, int mem_index, int opc,
-                                  uint16_t **label2_ptr_p, int is_store)
-{
-    const TCGReg arg0 = TCG_REG_R2;
-    const TCGReg arg1 = TCG_REG_R3;
+    const TCGReg arg0 = tcg_target_call_iarg_regs[0];
+    const TCGReg arg1 = tcg_target_call_iarg_regs[1];
+    const TCGReg arg2 = tcg_target_call_iarg_regs[2];
+    const TCGReg arg3 = tcg_target_call_iarg_regs[3];
     int s_bits = opc & 3;
     uint16_t *label1_ptr;
     tcg_target_long ofs;
 
     if (TARGET_LONG_BITS == 32) {
-        tgen_ext32u(s, arg0, addr_reg);
+        tgen_ext32u(s, arg1, addr_reg);
     } else {
-        tcg_out_mov(s, TCG_TYPE_I64, arg0, addr_reg);
+        tcg_out_mov(s, TCG_TYPE_I64, arg1, addr_reg);
     }
 
-    tcg_out_sh64(s, RSY_SRLG, arg1, addr_reg, TCG_REG_NONE,
+    tcg_out_sh64(s, RSY_SRLG, arg2, addr_reg, TCG_REG_NONE,
                  TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
 
-    tgen64_andi_tmp(s, arg0, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
-    tgen64_andi_tmp(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
+    tgen_andi(s, TCG_TYPE_I64, arg1, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
+    tgen_andi(s, TCG_TYPE_I64, arg2, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
 
     if (is_store) {
         ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
@@ -1445,15 +1429,15 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
     assert(ofs < 0x80000);
 
     if (TARGET_LONG_BITS == 32) {
-        tcg_out_mem(s, RX_C, RXY_CY, arg0, arg1, TCG_AREG0, ofs);
+        tcg_out_mem(s, RX_C, RXY_CY, arg1, arg2, TCG_AREG0, ofs);
     } else {
-        tcg_out_mem(s, 0, RXY_CG, arg0, arg1, TCG_AREG0, ofs);
+        tcg_out_mem(s, 0, RXY_CG, arg1, arg2, TCG_AREG0, ofs);
     }
 
     if (TARGET_LONG_BITS == 32) {
-        tgen_ext32u(s, arg0, addr_reg);
+        tgen_ext32u(s, arg1, addr_reg);
     } else {
-        tcg_out_mov(s, TCG_TYPE_I64, arg0, addr_reg);
+        tcg_out_mov(s, TCG_TYPE_I64, arg1, addr_reg);
     }
 
     label1_ptr = (uint16_t*)s->code_ptr;
@@ -1467,56 +1451,42 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
            for the calling convention.  */
         switch (opc) {
         case LD_UINT8:
-            tgen_ext8u(s, TCG_TYPE_I64, arg1, data_reg);
+            tgen_ext8u(s, TCG_TYPE_I64, arg2, data_reg);
             break;
         case LD_UINT16:
-            tgen_ext16u(s, TCG_TYPE_I64, arg1, data_reg);
+            tgen_ext16u(s, TCG_TYPE_I64, arg2, data_reg);
             break;
         case LD_UINT32:
-            tgen_ext32u(s, arg1, data_reg);
+            tgen_ext32u(s, arg2, data_reg);
             break;
         case LD_UINT64:
-            tcg_out_mov(s, TCG_TYPE_I64, arg1, data_reg);
+            tcg_out_mov(s, TCG_TYPE_I64, arg2, data_reg);
             break;
         default:
             tcg_abort();
         }
-        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, mem_index);
-        /* XXX/FIXME: suboptimal */
-        tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
-                    tcg_target_call_iarg_regs[2]);
-        tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
-                    tcg_target_call_iarg_regs[1]);
-        tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
-                    tcg_target_call_iarg_regs[0]);
-        tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
-                    TCG_AREG0);
+        tcg_out_movi(s, TCG_TYPE_I32, arg3, mem_index);
+        tcg_out_mov(s, TCG_TYPE_I64, arg0, TCG_AREG0);
         tgen_calli(s, (tcg_target_ulong)qemu_st_helpers[s_bits]);
     } else {
-        tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index);
-        /* XXX/FIXME: suboptimal */
-        tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
-                    tcg_target_call_iarg_regs[1]);
-        tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
-                    tcg_target_call_iarg_regs[0]);
-        tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
-                    TCG_AREG0);
+        tcg_out_movi(s, TCG_TYPE_I32, arg2, mem_index);
+        tcg_out_mov(s, TCG_TYPE_I64, arg0, TCG_AREG0);
         tgen_calli(s, (tcg_target_ulong)qemu_ld_helpers[s_bits]);
 
         /* sign extension */
         switch (opc) {
         case LD_INT8:
-            tgen_ext8s(s, TCG_TYPE_I64, data_reg, arg0);
+            tgen_ext8s(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
             break;
         case LD_INT16:
-            tgen_ext16s(s, TCG_TYPE_I64, data_reg, arg0);
+            tgen_ext16s(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
             break;
         case LD_INT32:
-            tgen_ext32s(s, data_reg, arg0);
+            tgen_ext32s(s, data_reg, TCG_REG_R2);
             break;
         default:
             /* unsigned -> just copy */
-            tcg_out_mov(s, TCG_TYPE_I64, data_reg, arg0);
+            tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
             break;
         }
     }
@@ -1533,7 +1503,9 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
     ofs = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
     assert(ofs < 0x80000);
 
-    tcg_out_mem(s, 0, RXY_AG, arg0, arg1, TCG_AREG0, ofs);
+    tcg_out_mem(s, 0, RXY_AG, arg1, arg2, TCG_AREG0, ofs);
+
+    return arg1;
 }
 
 static void tcg_finish_qemu_ldst(TCGContext* s, uint16_t *label2_ptr)
@@ -1579,10 +1551,10 @@ static void tcg_out_qemu_ld(TCGContext* s, const TCGArg* args, int opc)
 #if defined(CONFIG_SOFTMMU)
     mem_index = *args;
 
-    tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index,
-                          opc, &label2_ptr, 0);
+    addr_reg = tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index,
+                                     opc, &label2_ptr, 0);
 
-    tcg_out_qemu_ld_direct(s, opc, data_reg, TCG_REG_R2, TCG_REG_NONE, 0);
+    tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, TCG_REG_NONE, 0);
 
     tcg_finish_qemu_ldst(s, label2_ptr);
 #else
@@ -1608,10 +1580,10 @@ static void tcg_out_qemu_st(TCGContext* s, const TCGArg* args, int opc)
 #if defined(CONFIG_SOFTMMU)
     mem_index = *args;
 
-    tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index,
-                          opc, &label2_ptr, 1);
+    addr_reg = tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index,
+                                     opc, &label2_ptr, 1);
 
-    tcg_out_qemu_st_direct(s, opc, data_reg, TCG_REG_R2, TCG_REG_NONE, 0);
+    tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, TCG_REG_NONE, 0);
 
     tcg_finish_qemu_ldst(s, label2_ptr);
 #else
@@ -1620,19 +1592,15 @@ static void tcg_out_qemu_st(TCGContext* s, const TCGArg* args, int opc)
 #endif
 }
 
-#if TCG_TARGET_REG_BITS == 64
 # define OP_32_64(x) \
         case glue(glue(INDEX_op_,x),_i32): \
         case glue(glue(INDEX_op_,x),_i64)
-#else
-# define OP_32_64(x) \
-        case glue(glue(INDEX_op_,x),_i32)
-#endif
 
 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
                 const TCGArg *args, const int *const_args)
 {
     S390Opcode op;
+    TCGArg a0, a1, a2;
 
     switch (opc) {
     case INDEX_op_exit_tb:
@@ -1708,23 +1676,38 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_add_i32:
+        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
         if (const_args[2]) {
-            tgen32_addi(s, args[0], args[2]);
+        do_addi_32:
+            if (a0 == a1) {
+                if (a2 == (int16_t)a2) {
+                    tcg_out_insn(s, RI, AHI, a0, a2);
+                    break;
+                }
+                if (facilities & FACILITY_EXT_IMM) {
+                    tcg_out_insn(s, RIL, AFI, a0, a2);
+                    break;
+                }
+            }
+            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
+        } else if (a0 == a1) {
+            tcg_out_insn(s, RR, AR, a0, a2);
         } else {
-            tcg_out_insn(s, RR, AR, args[0], args[2]);
+            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
         }
         break;
     case INDEX_op_sub_i32:
+        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
         if (const_args[2]) {
-            tgen32_addi(s, args[0], -args[2]);
-        } else {
-            tcg_out_insn(s, RR, SR, args[0], args[2]);
+            a2 = -a2;
+            goto do_addi_32;
         }
+        tcg_out_insn(s, RR, SR, args[0], args[2]);
         break;
 
     case INDEX_op_and_i32:
         if (const_args[2]) {
-            tgen64_andi(s, args[0], args[2] | 0xffffffff00000000ull);
+            tgen_andi(s, TCG_TYPE_I32, args[0], args[2]);
         } else {
             tcg_out_insn(s, RR, NR, args[0], args[2]);
         }
@@ -1824,6 +1807,17 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
         break;
 
+    case INDEX_op_add2_i32:
+        /* ??? Make use of ALFI.  */
+        tcg_out_insn(s, RR, ALR, args[0], args[4]);
+        tcg_out_insn(s, RRE, ALCR, args[1], args[5]);
+        break;
+    case INDEX_op_sub2_i32:
+        /* ??? Make use of SLFI.  */
+        tcg_out_insn(s, RR, SLR, args[0], args[4]);
+        tcg_out_insn(s, RRE, SLBR, args[1], args[5]);
+        break;
+
     case INDEX_op_br:
         tgen_branch(s, S390_CC_ALWAYS, args[0]);
         break;
@@ -1836,6 +1830,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
                      args[2], const_args[2]);
         break;
+    case INDEX_op_movcond_i32:
+        tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1],
+                     args[2], const_args[2], args[3]);
+        break;
 
     case INDEX_op_qemu_ld8u:
         tcg_out_qemu_ld(s, args, LD_UINT8);
@@ -1870,7 +1868,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_qemu_st(s, args, LD_UINT64);
         break;
 
-#if TCG_TARGET_REG_BITS == 64
     case INDEX_op_mov_i64:
         tcg_out_mov(s, TCG_TYPE_I64, args[0], args[1]);
         break;
@@ -1899,15 +1896,39 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_add_i64:
+        a0 = args[0], a1 = args[1], a2 = args[2];
         if (const_args[2]) {
-            tgen64_addi(s, args[0], args[2]);
+        do_addi_64:
+            if (a0 == a1) {
+                if (a2 == (int16_t)a2) {
+                    tcg_out_insn(s, RI, AGHI, a0, a2);
+                    break;
+                }
+                if (facilities & FACILITY_EXT_IMM) {
+                    if (a2 == (int32_t)a2) {
+                        tcg_out_insn(s, RIL, AGFI, a0, a2);
+                        break;
+                    } else if (a2 == (uint32_t)a2) {
+                        tcg_out_insn(s, RIL, ALGFI, a0, a2);
+                        break;
+                    } else if (-a2 == (uint32_t)-a2) {
+                        tcg_out_insn(s, RIL, SLGFI, a0, -a2);
+                        break;
+                    }
+                }
+            }
+            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
+        } else if (a0 == a1) {
+            tcg_out_insn(s, RRE, AGR, a0, a2);
         } else {
-            tcg_out_insn(s, RRE, AGR, args[0], args[2]);
+            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
         }
         break;
     case INDEX_op_sub_i64:
+        a0 = args[0], a1 = args[1], a2 = args[2];
         if (const_args[2]) {
-            tgen64_addi(s, args[0], -args[2]);
+            a2 = -a2;
+            goto do_addi_64;
         } else {
             tcg_out_insn(s, RRE, SGR, args[0], args[2]);
         }
@@ -1915,7 +1936,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
 
     case INDEX_op_and_i64:
         if (const_args[2]) {
-            tgen64_andi(s, args[0], args[2]);
+            tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
         } else {
             tcg_out_insn(s, RRE, NGR, args[0], args[2]);
         }
@@ -1964,6 +1985,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_divu2_i64:
         tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]);
         break;
+    case INDEX_op_mulu2_i64:
+        tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]);
+        break;
 
     case INDEX_op_shl_i64:
         op = RSY_SLLG;
@@ -2020,6 +2044,17 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tgen_ext32u(s, args[0], args[1]);
         break;
 
+    case INDEX_op_add2_i64:
+        /* ??? Make use of ALGFI and SLGFI.  */
+        tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
+        tcg_out_insn(s, RRE, ALCGR, args[1], args[5]);
+        break;
+    case INDEX_op_sub2_i64:
+        /* ??? Make use of ALGFI and SLGFI.  */
+        tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
+        tcg_out_insn(s, RRE, SLBGR, args[1], args[5]);
+        break;
+
     case INDEX_op_brcond_i64:
         tgen_brcond(s, TCG_TYPE_I64, args[2], args[0],
                     args[1], const_args[1], args[3]);
@@ -2028,6 +2063,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
                      args[2], const_args[2]);
         break;
+    case INDEX_op_movcond_i64:
+        tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1],
+                     args[2], const_args[2], args[3]);
+        break;
 
     case INDEX_op_qemu_ld32u:
         tcg_out_qemu_ld(s, args, LD_UINT32);
@@ -2035,7 +2074,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_qemu_ld32s:
         tcg_out_qemu_ld(s, args, LD_INT32);
         break;
-#endif /* TCG_TARGET_REG_BITS == 64 */
+
+    OP_32_64(deposit):
+        tgen_deposit(s, args[0], args[2], args[3], args[4]);
+        break;
 
     default:
         fprintf(stderr,"unimplemented opc 0x%x\n",opc);
@@ -2061,14 +2103,14 @@ static const TCGTargetOpDef s390_op_defs[] = {
     { INDEX_op_st16_i32, { "r", "r" } },
     { INDEX_op_st_i32, { "r", "r" } },
 
-    { INDEX_op_add_i32, { "r", "0", "rWI" } },
-    { INDEX_op_sub_i32, { "r", "0", "rWNI" } },
+    { INDEX_op_add_i32, { "r", "r", "ri" } },
+    { INDEX_op_sub_i32, { "r", "0", "ri" } },
     { INDEX_op_mul_i32, { "r", "0", "rK" } },
 
     { INDEX_op_div2_i32, { "b", "a", "0", "1", "r" } },
     { INDEX_op_divu2_i32, { "b", "a", "0", "1", "r" } },
 
-    { INDEX_op_and_i32, { "r", "0", "rWA" } },
+    { INDEX_op_and_i32, { "r", "0", "ri" } },
     { INDEX_op_or_i32, { "r", "0", "rWO" } },
     { INDEX_op_xor_i32, { "r", "0", "rWX" } },
 
@@ -2089,8 +2131,13 @@ static const TCGTargetOpDef s390_op_defs[] = {
     { INDEX_op_bswap16_i32, { "r", "r" } },
     { INDEX_op_bswap32_i32, { "r", "r" } },
 
+    { INDEX_op_add2_i32, { "r", "r", "0", "1", "r", "r" } },
+    { INDEX_op_sub2_i32, { "r", "r", "0", "1", "r", "r" } },
+
     { INDEX_op_brcond_i32, { "r", "rWC" } },
     { INDEX_op_setcond_i32, { "r", "r", "rWC" } },
+    { INDEX_op_movcond_i32, { "r", "r", "rWC", "r", "0" } },
+    { INDEX_op_deposit_i32, { "r", "0", "r" } },
 
     { INDEX_op_qemu_ld8u, { "r", "L" } },
     { INDEX_op_qemu_ld8s, { "r", "L" } },
@@ -2104,7 +2151,6 @@ static const TCGTargetOpDef s390_op_defs[] = {
     { INDEX_op_qemu_st32, { "L", "L" } },
     { INDEX_op_qemu_st64, { "L", "L" } },
 
-#if defined(__s390x__)
     { INDEX_op_mov_i64, { "r", "r" } },
     { INDEX_op_movi_i64, { "r" } },
 
@@ -2121,14 +2167,15 @@ static const TCGTargetOpDef s390_op_defs[] = {
     { INDEX_op_st32_i64, { "r", "r" } },
     { INDEX_op_st_i64, { "r", "r" } },
 
-    { INDEX_op_add_i64, { "r", "0", "rI" } },
-    { INDEX_op_sub_i64, { "r", "0", "rNI" } },
+    { INDEX_op_add_i64, { "r", "r", "ri" } },
+    { INDEX_op_sub_i64, { "r", "0", "ri" } },
     { INDEX_op_mul_i64, { "r", "0", "rK" } },
 
     { INDEX_op_div2_i64, { "b", "a", "0", "1", "r" } },
     { INDEX_op_divu2_i64, { "b", "a", "0", "1", "r" } },
+    { INDEX_op_mulu2_i64, { "b", "a", "0", "r" } },
 
-    { INDEX_op_and_i64, { "r", "0", "rA" } },
+    { INDEX_op_and_i64, { "r", "0", "ri" } },
     { INDEX_op_or_i64, { "r", "0", "rO" } },
     { INDEX_op_xor_i64, { "r", "0", "rX" } },
 
@@ -2152,12 +2199,16 @@ static const TCGTargetOpDef s390_op_defs[] = {
     { INDEX_op_bswap32_i64, { "r", "r" } },
     { INDEX_op_bswap64_i64, { "r", "r" } },
 
+    { INDEX_op_add2_i64, { "r", "r", "0", "1", "r", "r" } },
+    { INDEX_op_sub2_i64, { "r", "r", "0", "1", "r", "r" } },
+
     { INDEX_op_brcond_i64, { "r", "rC" } },
     { INDEX_op_setcond_i64, { "r", "r", "rC" } },
+    { INDEX_op_movcond_i64, { "r", "r", "rC", "r", "0" } },
+    { INDEX_op_deposit_i64, { "r", "0", "r" } },
 
     { INDEX_op_qemu_ld32u, { "r", "L" } },
     { INDEX_op_qemu_ld32s, { "r", "L" } },
-#endif
 
     { -1 },
 };
@@ -2302,17 +2353,24 @@ static void tcg_target_init(TCGContext *s)
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
 
     tcg_add_target_add_op_defs(s390_op_defs);
-    tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf),
-                  CPU_TEMP_BUF_NLONGS * sizeof(long));
 }
 
 static void tcg_target_qemu_prologue(TCGContext *s)
 {
+    tcg_target_long frame_size;
+
     /* stmg %r6,%r15,48(%r15) (save registers) */
     tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48);
 
-    /* aghi %r15,-160 (stack frame) */
-    tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -160);
+    /* aghi %r15,-frame_size */
+    frame_size = TCG_TARGET_CALL_STACK_OFFSET;
+    frame_size += TCG_STATIC_CALL_ARGS_SIZE;
+    frame_size += CPU_TEMP_BUF_NLONGS * sizeof(long);
+    tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -frame_size);
+
+    tcg_set_frame(s, TCG_REG_CALL_STACK,
+                  TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
+                  CPU_TEMP_BUF_NLONGS * sizeof(long));
 
     if (GUEST_BASE >= 0x80000) {
         tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE);
@@ -2325,8 +2383,9 @@ static void tcg_target_qemu_prologue(TCGContext *s)
 
     tb_ret_addr = s->code_ptr;
 
-    /* lmg %r6,%r15,208(%r15) (restore registers) */
-    tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 208);
+    /* lmg %r6,%r15,fs+48(%r15) (restore registers) */
+    tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15,
+                 frame_size + 48);
 
     /* br %r14 (return) */
     tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index 40211e68f1..42ca36c0e9 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -63,14 +63,13 @@ typedef enum TCGReg {
 #define TCG_TARGET_HAS_eqv_i32          0
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
-#define TCG_TARGET_HAS_deposit_i32      0
-#define TCG_TARGET_HAS_movcond_i32      0
-#define TCG_TARGET_HAS_add2_i32         0
-#define TCG_TARGET_HAS_sub2_i32         0
+#define TCG_TARGET_HAS_deposit_i32      1
+#define TCG_TARGET_HAS_movcond_i32      1
+#define TCG_TARGET_HAS_add2_i32         1
+#define TCG_TARGET_HAS_sub2_i32         1
 #define TCG_TARGET_HAS_mulu2_i32        0
 #define TCG_TARGET_HAS_muls2_i32        0
 
-#if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_div2_i64         1
 #define TCG_TARGET_HAS_rot_i64          1
 #define TCG_TARGET_HAS_ext8s_i64        1
@@ -89,18 +88,21 @@ typedef enum TCGReg {
 #define TCG_TARGET_HAS_eqv_i64          0
 #define TCG_TARGET_HAS_nand_i64         0
 #define TCG_TARGET_HAS_nor_i64          0
-#define TCG_TARGET_HAS_deposit_i64      0
-#define TCG_TARGET_HAS_movcond_i64      0
-#define TCG_TARGET_HAS_add2_i64         0
-#define TCG_TARGET_HAS_sub2_i64         0
-#define TCG_TARGET_HAS_mulu2_i64        0
+#define TCG_TARGET_HAS_deposit_i64      1
+#define TCG_TARGET_HAS_movcond_i64      1
+#define TCG_TARGET_HAS_add2_i64         1
+#define TCG_TARGET_HAS_sub2_i64         1
+#define TCG_TARGET_HAS_mulu2_i64        1
 #define TCG_TARGET_HAS_muls2_i64        0
-#endif
+
+extern bool tcg_target_deposit_valid(int ofs, int len);
+#define TCG_TARGET_deposit_i32_valid  tcg_target_deposit_valid
+#define TCG_TARGET_deposit_i64_valid  tcg_target_deposit_valid
 
 /* used for function call generation */
 #define TCG_REG_CALL_STACK		TCG_REG_R15
 #define TCG_TARGET_STACK_ALIGN		8
-#define TCG_TARGET_CALL_STACK_OFFSET	0
+#define TCG_TARGET_CALL_STACK_OFFSET	160
 
 #define TCG_TARGET_EXTEND_ARGS 1