summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--linux-user/main.c39
-rw-r--r--linux-user/signal.c159
-rw-r--r--linux-user/syscall_defs.h11
-rw-r--r--linux-user/tilegx/syscall.h3
-rw-r--r--target-tilegx/cpu.c7
-rw-r--r--target-tilegx/cpu.h8
-rw-r--r--target-tilegx/helper.c81
-rw-r--r--target-tilegx/helper.h16
-rw-r--r--target-tilegx/simd_helper.c118
-rw-r--r--target-tilegx/translate.c438
10 files changed, 799 insertions, 81 deletions
diff --git a/linux-user/main.c b/linux-user/main.c
index 6599a41404..1f60ff2a1f 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -3414,28 +3414,47 @@ void cpu_loop(CPUS390XState *env)
 
 #ifdef TARGET_TILEGX
 
-static void gen_sigsegv_maperr(CPUTLGState *env, target_ulong addr)
+static void gen_sigill_reg(CPUTLGState *env)
 {
     target_siginfo_t info;
 
-    info.si_signo = TARGET_SIGSEGV;
+    info.si_signo = TARGET_SIGILL;
     info.si_errno = 0;
-    info.si_code = TARGET_SEGV_MAPERR;
-    info._sifields._sigfault._addr = addr;
+    info.si_code = TARGET_ILL_PRVREG;
+    info._sifields._sigfault._addr = env->pc;
     queue_signal(env, info.si_signo, &info);
 }
 
-static void gen_sigill_reg(CPUTLGState *env)
+static void do_signal(CPUTLGState *env, int signo, int sigcode)
 {
     target_siginfo_t info;
 
-    info.si_signo = TARGET_SIGILL;
+    info.si_signo = signo;
     info.si_errno = 0;
-    info.si_code = TARGET_ILL_PRVREG;
     info._sifields._sigfault._addr = env->pc;
+
+    if (signo == TARGET_SIGSEGV) {
+        /* The passed in sigcode is a dummy; check for a page mapping
+           and pass either MAPERR or ACCERR.  */
+        target_ulong addr = env->excaddr;
+        info._sifields._sigfault._addr = addr;
+        if (page_check_range(addr, 1, PAGE_VALID) < 0) {
+            sigcode = TARGET_SEGV_MAPERR;
+        } else {
+            sigcode = TARGET_SEGV_ACCERR;
+        }
+    }
+    info.si_code = sigcode;
+
     queue_signal(env, info.si_signo, &info);
 }
 
+static void gen_sigsegv_maperr(CPUTLGState *env, target_ulong addr)
+{
+    env->excaddr = addr;
+    do_signal(env, TARGET_SIGSEGV, 0);
+}
+
 static void set_regval(CPUTLGState *env, uint8_t reg, uint64_t val)
 {
     if (unlikely(reg >= TILEGX_R_COUNT)) {
@@ -3622,13 +3641,13 @@ void cpu_loop(CPUTLGState *env)
         case TILEGX_EXCP_OPCODE_FETCHOR4:
             do_fetch(env, trapnr, false);
             break;
+        case TILEGX_EXCP_SIGNAL:
+            do_signal(env, env->signo, env->sigcode);
+            break;
         case TILEGX_EXCP_REG_IDN_ACCESS:
         case TILEGX_EXCP_REG_UDN_ACCESS:
             gen_sigill_reg(env);
             break;
-        case TILEGX_EXCP_SEGV:
-            gen_sigsegv_maperr(env, env->excaddr);
-            break;
         default:
             fprintf(stderr, "trapnr is %d[0x%x].\n", trapnr, trapnr);
             g_assert_not_reached();
diff --git a/linux-user/signal.c b/linux-user/signal.c
index ac82baa0f0..9d62e027e3 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -5537,6 +5537,163 @@ long do_rt_sigreturn(CPUAlphaState *env)
     force_sig(TARGET_SIGSEGV);
 }
 
+#elif defined(TARGET_TILEGX)
+
+struct target_sigcontext {
+    union {
+        /* General-purpose registers.  */
+        abi_ulong gregs[56];
+        struct {
+            abi_ulong __gregs[53];
+            abi_ulong tp;        /* Aliases gregs[TREG_TP].  */
+            abi_ulong sp;        /* Aliases gregs[TREG_SP].  */
+            abi_ulong lr;        /* Aliases gregs[TREG_LR].  */
+        };
+    };
+    abi_ulong pc;        /* Program counter.  */
+    abi_ulong ics;       /* In Interrupt Critical Section?  */
+    abi_ulong faultnum;  /* Fault number.  */
+    abi_ulong pad[5];
+};
+
+struct target_ucontext {
+    abi_ulong tuc_flags;
+    abi_ulong tuc_link;
+    target_stack_t tuc_stack;
+    struct target_sigcontext tuc_mcontext;
+    target_sigset_t tuc_sigmask;   /* mask last for extensibility */
+};
+
+struct target_rt_sigframe {
+    unsigned char save_area[16]; /* caller save area */
+    struct target_siginfo info;
+    struct target_ucontext uc;
+};
+
+static void setup_sigcontext(struct target_sigcontext *sc,
+                             CPUArchState *env, int signo)
+{
+    int i;
+
+    for (i = 0; i < TILEGX_R_COUNT; ++i) {
+        __put_user(env->regs[i], &sc->gregs[i]);
+    }
+
+    __put_user(env->pc, &sc->pc);
+    __put_user(0, &sc->ics);
+    __put_user(signo, &sc->faultnum);
+}
+
+static void restore_sigcontext(CPUTLGState *env, struct target_sigcontext *sc)
+{
+    int i;
+
+    for (i = 0; i < TILEGX_R_COUNT; ++i) {
+        __get_user(env->regs[i], &sc->gregs[i]);
+    }
+
+    __get_user(env->pc, &sc->pc);
+}
+
+static abi_ulong get_sigframe(struct target_sigaction *ka, CPUArchState *env,
+                              size_t frame_size)
+{
+    unsigned long sp = env->regs[TILEGX_R_SP];
+
+    if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size))) {
+        return -1UL;
+    }
+
+    if ((ka->sa_flags & SA_ONSTACK) && !sas_ss_flags(sp)) {
+        sp = target_sigaltstack_used.ss_sp + target_sigaltstack_used.ss_size;
+    }
+
+    sp -= frame_size;
+    sp &= -16UL;
+    return sp;
+}
+
+static void setup_rt_frame(int sig, struct target_sigaction *ka,
+                           target_siginfo_t *info,
+                           target_sigset_t *set, CPUArchState *env)
+{
+    abi_ulong frame_addr;
+    struct target_rt_sigframe *frame;
+    unsigned long restorer;
+
+    frame_addr = get_sigframe(ka, env, sizeof(*frame));
+    if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
+        goto give_sigsegv;
+    }
+
+    /* Always write at least the signal number for the stack backtracer. */
+    if (ka->sa_flags & TARGET_SA_SIGINFO) {
+        /* At sigreturn time, restore the callee-save registers too. */
+        tswap_siginfo(&frame->info, info);
+        /* regs->flags |= PT_FLAGS_RESTORE_REGS; FIXME: we can skip it? */
+    } else {
+        __put_user(info->si_signo, &frame->info.si_signo);
+    }
+
+    /* Create the ucontext.  */
+    __put_user(0, &frame->uc.tuc_flags);
+    __put_user(0, &frame->uc.tuc_link);
+    __put_user(target_sigaltstack_used.ss_sp, &frame->uc.tuc_stack.ss_sp);
+    __put_user(sas_ss_flags(env->regs[TILEGX_R_SP]),
+               &frame->uc.tuc_stack.ss_flags);
+    __put_user(target_sigaltstack_used.ss_size, &frame->uc.tuc_stack.ss_size);
+    setup_sigcontext(&frame->uc.tuc_mcontext, env, info->si_signo);
+
+    restorer = (unsigned long) do_rt_sigreturn;
+    if (ka->sa_flags & TARGET_SA_RESTORER) {
+            restorer = (unsigned long) ka->sa_restorer;
+    }
+    env->pc = (unsigned long) ka->_sa_handler;
+    env->regs[TILEGX_R_SP] = (unsigned long) frame;
+    env->regs[TILEGX_R_LR] = restorer;
+    env->regs[0] = (unsigned long) sig;
+    env->regs[1] = (unsigned long) &frame->info;
+    env->regs[2] = (unsigned long) &frame->uc;
+    /* regs->flags |= PT_FLAGS_CALLER_SAVES; FIXME: we can skip it? */
+
+    unlock_user_struct(frame, frame_addr, 1);
+    return;
+
+give_sigsegv:
+    if (sig == TARGET_SIGSEGV) {
+        ka->_sa_handler = TARGET_SIG_DFL;
+    }
+    force_sig(TARGET_SIGSEGV /* , current */);
+}
+
+long do_rt_sigreturn(CPUTLGState *env)
+{
+    abi_ulong frame_addr = env->regs[TILEGX_R_SP];
+    struct target_rt_sigframe *frame;
+    sigset_t set;
+
+    if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) {
+        goto badframe;
+    }
+    target_to_host_sigset(&set, &frame->uc.tuc_sigmask);
+    do_sigprocmask(SIG_SETMASK, &set, NULL);
+
+    restore_sigcontext(env, &frame->uc.tuc_mcontext);
+    if (do_sigaltstack(frame_addr + offsetof(struct target_rt_sigframe,
+                                             uc.tuc_stack),
+                       0, env->regs[TILEGX_R_SP]) == -EFAULT) {
+        goto badframe;
+    }
+
+    unlock_user_struct(frame, frame_addr, 0);
+    return env->regs[TILEGX_R_RE];
+
+
+ badframe:
+    unlock_user_struct(frame, frame_addr, 0);
+    force_sig(TARGET_SIGSEGV);
+}
+
 #else
 
 static void setup_frame(int sig, struct target_sigaction *ka,
@@ -5657,7 +5814,7 @@ void process_pending_signals(CPUArchState *cpu_env)
 #endif
         /* prepare the stack frame of the virtual CPU */
 #if defined(TARGET_ABI_MIPSN32) || defined(TARGET_ABI_MIPSN64) \
-    || defined(TARGET_OPENRISC)
+    || defined(TARGET_OPENRISC) || defined(TARGET_TILEGX)
         /* These targets do not have traditional signals.  */
         setup_rt_frame(sig, sa, &q->info, &target_old_set, cpu_env);
 #else
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index 7ca33a6f62..f996acf945 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -748,6 +748,10 @@ typedef struct target_siginfo {
 #define TARGET_ILL_PRVREG	(6)	/* privileged register */
 #define TARGET_ILL_COPROC	(7)	/* coprocessor error */
 #define TARGET_ILL_BADSTK	(8)	/* internal stack error */
+#ifdef TARGET_TILEGX
+#define TARGET_ILL_DBLFLT       (9)     /* double fault */
+#define TARGET_ILL_HARDWALL     (10)    /* user networks hardwall violation */
+#endif
 
 /*
  * SIGFPE si_codes
@@ -767,6 +771,7 @@ typedef struct target_siginfo {
  */
 #define TARGET_SEGV_MAPERR     (1)  /* address not mapped to object */
 #define TARGET_SEGV_ACCERR     (2)  /* invalid permissions for mapped object */
+#define TARGET_SEGV_BNDERR     (3)  /* failed address bound checks */
 
 /*
  * SIGBUS si_codes
@@ -774,12 +779,18 @@ typedef struct target_siginfo {
 #define TARGET_BUS_ADRALN       (1)	/* invalid address alignment */
 #define TARGET_BUS_ADRERR       (2)	/* non-existent physical address */
 #define TARGET_BUS_OBJERR       (3)	/* object specific hardware error */
+/* hardware memory error consumed on a machine check: action required */
+#define TARGET_BUS_MCEERR_AR    (4)
+/* hardware memory error detected in process but not consumed: action optional*/
+#define TARGET_BUS_MCEERR_AO    (5)
 
 /*
  * SIGTRAP si_codes
  */
 #define TARGET_TRAP_BRKPT	(1)	/* process breakpoint */
 #define TARGET_TRAP_TRACE	(2)	/* process trace trap */
+#define TARGET_TRAP_BRANCH      (3)     /* process taken branch trap */
+#define TARGET_TRAP_HWBKPT      (4)     /* hardware breakpoint/watchpoint */
 
 #endif /* defined(TARGET_I386) || defined(TARGET_ARM) */
 
diff --git a/linux-user/tilegx/syscall.h b/linux-user/tilegx/syscall.h
index 653ece13d8..a938d4e90c 100644
--- a/linux-user/tilegx/syscall.h
+++ b/linux-user/tilegx/syscall.h
@@ -37,4 +37,7 @@ struct target_pt_regs {
 #define TARGET_MLOCKALL_MCL_CURRENT 1
 #define TARGET_MLOCKALL_MCL_FUTURE  2
 
+/* For faultnum */
+#define TARGET_INT_SWINT_1            14
+
 #endif
diff --git a/target-tilegx/cpu.c b/target-tilegx/cpu.c
index 78b73e45c4..3c5481d443 100644
--- a/target-tilegx/cpu.c
+++ b/target-tilegx/cpu.c
@@ -22,6 +22,7 @@
 #include "qemu-common.h"
 #include "hw/qdev-properties.h"
 #include "migration/vmstate.h"
+#include "linux-user/syscall_defs.h"
 
 static void tilegx_cpu_dump_state(CPUState *cs, FILE *f,
                                   fprintf_function cpu_fprintf, int flags)
@@ -121,8 +122,12 @@ static int tilegx_cpu_handle_mmu_fault(CPUState *cs, vaddr address, int rw,
 {
     TileGXCPU *cpu = TILEGX_CPU(cs);
 
-    cs->exception_index = TILEGX_EXCP_SEGV;
+    /* The sigcode field will be filled in by do_signal in main.c.  */
+    cs->exception_index = TILEGX_EXCP_SIGNAL;
     cpu->env.excaddr = address;
+    cpu->env.signo = TARGET_SIGSEGV;
+    cpu->env.sigcode = 0;
+
     return 1;
 }
 
diff --git a/target-tilegx/cpu.h b/target-tilegx/cpu.h
index b9f5082b95..6c0fd5365d 100644
--- a/target-tilegx/cpu.h
+++ b/target-tilegx/cpu.h
@@ -53,6 +53,8 @@ enum {
     TILEGX_SPR_CMPEXCH = 0,
     TILEGX_SPR_CRITICAL_SEC = 1,
     TILEGX_SPR_SIM_CONTROL = 2,
+    TILEGX_SPR_EX_CONTEXT_0_0 = 3,
+    TILEGX_SPR_EX_CONTEXT_0_1 = 4,
     TILEGX_SPR_COUNT
 };
 
@@ -60,7 +62,7 @@ enum {
 typedef enum {
     TILEGX_EXCP_NONE = 0,
     TILEGX_EXCP_SYSCALL = 1,
-    TILEGX_EXCP_SEGV = 2,
+    TILEGX_EXCP_SIGNAL = 2,
     TILEGX_EXCP_OPCODE_UNKNOWN = 0x101,
     TILEGX_EXCP_OPCODE_UNIMPLEMENTED = 0x102,
     TILEGX_EXCP_OPCODE_CMPEXCH = 0x103,
@@ -87,10 +89,12 @@ typedef struct CPUTLGState {
     uint64_t pc;                       /* Current pc */
 
 #if defined(CONFIG_USER_ONLY)
+    uint64_t excaddr;                  /* exception address */
     uint64_t atomic_srca;              /* Arguments to atomic "exceptions" */
     uint64_t atomic_srcb;
     uint32_t atomic_dstr;
-    uint64_t excaddr;                  /* exception address */
+    uint32_t signo;                    /* Signal number */
+    uint32_t sigcode;                  /* Signal code */
 #endif
 
     CPU_COMMON
diff --git a/target-tilegx/helper.c b/target-tilegx/helper.c
index a01bb8d513..dda821f5cb 100644
--- a/target-tilegx/helper.c
+++ b/target-tilegx/helper.c
@@ -21,6 +21,8 @@
 #include "cpu.h"
 #include "qemu-common.h"
 #include "exec/helper-proto.h"
+#include <zlib.h> /* For crc32 */
+#include "syscall_defs.h"
 
 void helper_exception(CPUTLGState *env, uint32_t excp)
 {
@@ -30,6 +32,27 @@ void helper_exception(CPUTLGState *env, uint32_t excp)
     cpu_loop_exit(cs);
 }
 
+void helper_ext01_ics(CPUTLGState *env)
+{
+    uint64_t val = env->spregs[TILEGX_SPR_EX_CONTEXT_0_1];
+
+    switch (val) {
+    case 0:
+    case 1:
+        env->spregs[TILEGX_SPR_CRITICAL_SEC] = val;
+        break;
+    default:
+#if defined(CONFIG_USER_ONLY)
+        env->signo = TARGET_SIGILL;
+        env->sigcode = TARGET_ILL_ILLOPC;
+        helper_exception(env, TILEGX_EXCP_SIGNAL);
+#else
+        helper_exception(env, TILEGX_EXCP_OPCODE_UNIMPLEMENTED);
+#endif
+        break;
+    }
+}
+
 uint64_t helper_cntlz(uint64_t arg)
 {
     return clz64(arg);
@@ -78,3 +101,61 @@ uint64_t helper_shufflebytes(uint64_t dest, uint64_t srca, uint64_t srcb)
 
     return vdst;
 }
+
+uint64_t helper_crc32_8(uint64_t accum, uint64_t input)
+{
+    uint8_t buf = input;
+
+    /* zlib crc32 converts the accumulator and output to one's complement.  */
+    return crc32(accum ^ 0xffffffff, &buf, 1) ^ 0xffffffff;
+}
+
+uint64_t helper_crc32_32(uint64_t accum, uint64_t input)
+{
+    uint8_t buf[4];
+
+    stl_le_p(buf, input);
+
+    /* zlib crc32 converts the accumulator and output to one's complement.  */
+    return crc32(accum ^ 0xffffffff, buf, 4) ^ 0xffffffff;
+}
+
+uint64_t helper_cmula(uint64_t srcd, uint64_t srca, uint64_t srcb)
+{
+    uint32_t reala = (int16_t)srca;
+    uint32_t imaga = (int16_t)(srca >> 16);
+    uint32_t realb = (int16_t)srcb;
+    uint32_t imagb = (int16_t)(srcb >> 16);
+    uint32_t reald = srcd;
+    uint32_t imagd = srcd >> 32;
+    uint32_t realr = reala * realb - imaga * imagb + reald;
+    uint32_t imagr = reala * imagb + imaga * realb + imagd;
+
+    return deposit64(realr, 32, 32, imagr);
+}
+
+uint64_t helper_cmulaf(uint64_t srcd, uint64_t srca, uint64_t srcb)
+{
+    uint32_t reala = (int16_t)srca;
+    uint32_t imaga = (int16_t)(srca >> 16);
+    uint32_t realb = (int16_t)srcb;
+    uint32_t imagb = (int16_t)(srcb >> 16);
+    uint32_t reald = (int16_t)srcd;
+    uint32_t imagd = (int16_t)(srcd >> 16);
+    int32_t realr = reala * realb - imaga * imagb;
+    int32_t imagr = reala * imagb + imaga * realb;
+
+    return deposit32((realr >> 15) + reald, 16, 16, (imagr >> 15) + imagd);
+}
+
+uint64_t helper_cmul2(uint64_t srca, uint64_t srcb, int shift, int round)
+{
+    uint32_t reala = (int16_t)srca;
+    uint32_t imaga = (int16_t)(srca >> 16);
+    uint32_t realb = (int16_t)srcb;
+    uint32_t imagb = (int16_t)(srcb >> 16);
+    int32_t realr = reala * realb - imaga * imagb + round;
+    int32_t imagr = reala * imagb + imaga * realb + round;
+
+    return deposit32(realr >> shift, 16, 16, imagr >> shift);
+}
diff --git a/target-tilegx/helper.h b/target-tilegx/helper.h
index 766f5f2f9c..9281d0f428 100644
--- a/target-tilegx/helper.h
+++ b/target-tilegx/helper.h
@@ -1,10 +1,26 @@
 DEF_HELPER_2(exception, noreturn, env, i32)
+DEF_HELPER_1(ext01_ics, void, env)
 DEF_HELPER_FLAGS_1(cntlz, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_FLAGS_1(cnttz, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_FLAGS_1(pcnt, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_FLAGS_1(revbits, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_FLAGS_3(shufflebytes, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64)
+DEF_HELPER_FLAGS_2(crc32_8, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(crc32_32, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_3(cmula, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64)
+DEF_HELPER_FLAGS_3(cmulaf, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64)
+DEF_HELPER_FLAGS_4(cmul2, TCG_CALL_NO_RWG_SE, i64, i64, i64, int, int)
 
+DEF_HELPER_FLAGS_2(v1int_h, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(v1int_l, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(v2int_h, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(v2int_l, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(v1multu, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(v2mults, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(v1shl, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(v1shru, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(v1shrs, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(v2shl, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(v2shru, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(v2shrs, TCG_CALL_NO_RWG_SE, i64, i64, i64)
diff --git a/target-tilegx/simd_helper.c b/target-tilegx/simd_helper.c
index b9319292f3..6d7bb5ce2b 100644
--- a/target-tilegx/simd_helper.c
+++ b/target-tilegx/simd_helper.c
@@ -23,12 +23,54 @@
 #include "exec/helper-proto.h"
 
 
+/* Broadcast a value to all elements of a vector.  */
+#define V1(X)      (((X) & 0xff) * 0x0101010101010101ull)
+#define V2(X)      (((X) & 0xffff) * 0x0001000100010001ull)
+
+
+uint64_t helper_v1multu(uint64_t a, uint64_t b)
+{
+    uint64_t r = 0;
+    int i;
+
+    for (i = 0; i < 64; i += 8) {
+        unsigned ae = extract64(a, i, 8);
+        unsigned be = extract64(b, i, 8);
+        r = deposit64(r, i, 8, ae * be);
+    }
+    return r;
+}
+
+uint64_t helper_v2mults(uint64_t a, uint64_t b)
+{
+    uint64_t r = 0;
+    int i;
+
+    /* While the instruction talks about signed inputs, with a
+       truncated result the sign of the inputs doesn't matter.  */
+    for (i = 0; i < 64; i += 16) {
+        unsigned ae = extract64(a, i, 16);
+        unsigned be = extract64(b, i, 16);
+        r = deposit64(r, i, 16, ae * be);
+    }
+    return r;
+}
+
 uint64_t helper_v1shl(uint64_t a, uint64_t b)
 {
     uint64_t m;
 
     b &= 7;
-    m = 0x0101010101010101ULL * (0xff >> b);
+    m = V1(0xff >> b);
+    return (a & m) << b;
+}
+
+uint64_t helper_v2shl(uint64_t a, uint64_t b)
+{
+    uint64_t m;
+
+    b &= 15;
+    m = V2(0xffff >> b);
     return (a & m) << b;
 }
 
@@ -37,7 +79,16 @@ uint64_t helper_v1shru(uint64_t a, uint64_t b)
     uint64_t m;
 
     b &= 7;
-    m = 0x0101010101010101ULL * ((0xff << b) & 0xff);
+    m = V1(0xff << b);
+    return (a & m) >> b;
+}
+
+uint64_t helper_v2shru(uint64_t a, uint64_t b)
+{
+    uint64_t m;
+
+    b &= 15;
+    m = V2(0xffff << b);
     return (a & m) >> b;
 }
 
@@ -48,8 +99,67 @@ uint64_t helper_v1shrs(uint64_t a, uint64_t b)
 
     b &= 7;
     for (i = 0; i < 64; i += 8) {
-        int64_t ae = (int8_t)(a >> i);
-        r |= ((ae >> b) & 0xff) << i;
+        r = deposit64(r, i, 8, sextract64(a, i + b, 8 - b));
+    }
+    return r;
+}
+
+uint64_t helper_v2shrs(uint64_t a, uint64_t b)
+{
+    uint64_t r = 0;
+    int i;
+
+    b &= 15;
+    for (i = 0; i < 64; i += 16) {
+        r = deposit64(r, i, 16, sextract64(a, i + b, 16 - b));
+    }
+    return r;
+}
+
+uint64_t helper_v1int_h(uint64_t a, uint64_t b)
+{
+    uint64_t r = 0;
+    int i;
+
+    for (i = 0; i < 32; i += 8) {
+        r = deposit64(r, 2 * i + 8, 8, extract64(a, i + 32, 8));
+        r = deposit64(r, 2 * i, 8, extract64(b, i + 32, 8));
+    }
+    return r;
+}
+
+uint64_t helper_v1int_l(uint64_t a, uint64_t b)
+{
+    uint64_t r = 0;
+    int i;
+
+    for (i = 0; i < 32; i += 8) {
+        r = deposit64(r, 2 * i + 8, 8, extract64(a, i, 8));
+        r = deposit64(r, 2 * i, 8, extract64(b, i, 8));
+    }
+    return r;
+}
+
+uint64_t helper_v2int_h(uint64_t a, uint64_t b)
+{
+    uint64_t r = 0;
+    int i;
+
+    for (i = 0; i < 32; i += 16) {
+        r = deposit64(r, 2 * i + 16, 16, extract64(a, i + 32, 16));
+        r = deposit64(r, 2 * i, 16, extract64(b, i + 32, 16));
+    }
+    return r;
+}
+
+uint64_t helper_v2int_l(uint64_t a, uint64_t b)
+{
+    uint64_t r = 0;
+    int i;
+
+    for (i = 0; i < 32; i += 16) {
+        r = deposit64(r, 2 * i + 16, 16, extract64(a, i, 16));
+        r = deposit64(r, 2 * i, 16, extract64(b, i, 16));
     }
     return r;
 }
diff --git a/target-tilegx/translate.c b/target-tilegx/translate.c
index e70c3e5ab7..acb9ec480c 100644
--- a/target-tilegx/translate.c
+++ b/target-tilegx/translate.c
@@ -23,6 +23,8 @@
 #include "disas/disas.h"
 #include "tcg-op.h"
 #include "exec/cpu_ldst.h"
+#include "linux-user/syscall_defs.h"
+
 #include "opcode_tilegx.h"
 #include "spr_def_64.h"
 
@@ -96,6 +98,7 @@ typedef struct {
 #define OE_SH(E,XY)    OE(SHIFT_OPCODE_##XY, E##_SHIFT_OPCODE_##XY, XY)
 
 #define V1_IMM(X)      (((X) & 0xff) * 0x0101010101010101ull)
+#define V2_IMM(X)      (((X) & 0xffff) * 0x0001000100010001ull)
 
 
 static void gen_exception(DisasContext *dc, TileExcp num)
@@ -275,6 +278,44 @@ static void gen_mul_half(TCGv tdest, TCGv tsrca, TCGv tsrcb,
     tcg_temp_free(t);
 }
 
+static void gen_cmul2(TCGv tdest, TCGv tsrca, TCGv tsrcb, int sh, int rd)
+{
+    TCGv_i32 tsh = tcg_const_i32(sh);
+    TCGv_i32 trd = tcg_const_i32(rd);
+    gen_helper_cmul2(tdest, tsrca, tsrcb, tsh, trd);
+    tcg_temp_free_i32(tsh);
+    tcg_temp_free_i32(trd);
+}
+
+static TileExcp gen_st_opcode(DisasContext *dc, unsigned dest, unsigned srca,
+                              unsigned srcb, TCGMemOp memop, const char *name)
+{
+    if (dest) {
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
+    }
+
+    tcg_gen_qemu_st_tl(load_gr(dc, srcb), load_gr(dc, srca),
+		       dc->mmuidx, memop);
+
+    qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s, %s", name,
+                  reg_names[srca], reg_names[srcb]);
+    return TILEGX_EXCP_NONE;
+}
+
+static TileExcp gen_st_add_opcode(DisasContext *dc, unsigned srca, unsigned srcb,
+                                  int imm, TCGMemOp memop, const char *name)
+{
+    TCGv tsrca = load_gr(dc, srca);
+    TCGv tsrcb = load_gr(dc, srcb);
+
+    tcg_gen_qemu_st_tl(tsrcb, tsrca, dc->mmuidx, memop);
+    tcg_gen_addi_tl(dest_gr(dc, srca), tsrca, imm);
+
+    qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s, %s, %d", name,
+                  reg_names[srca], reg_names[srcb], imm);
+    return TILEGX_EXCP_NONE;
+}
+
 /* Equality comparison with zero can be done quickly and efficiently.  */
 static void gen_v1cmpeq0(TCGv v)
 {
@@ -310,42 +351,152 @@ static void gen_v1cmpne0(TCGv v)
     tcg_temp_free(c);
 }
 
-static TileExcp gen_st_opcode(DisasContext *dc, unsigned dest, unsigned srca,
-                              unsigned srcb, TCGMemOp memop, const char *name)
+/* Vector addition can be performed via arithmetic plus masking.  It is
+   efficient this way only for 4 or more elements.  */
+static void gen_v12add(TCGv tdest, TCGv tsrca, TCGv tsrcb, uint64_t sign)
 {
-    if (dest) {
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
-    }
+    TCGv tmask = tcg_const_tl(~sign);
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
 
-    tcg_gen_qemu_st_tl(load_gr(dc, srcb), load_gr(dc, srca),
-		       dc->mmuidx, memop);
+    /* ((a & ~sign) + (b & ~sign)) ^ ((a ^ b) & sign).  */
+    tcg_gen_and_tl(t0, tsrca, tmask);
+    tcg_gen_and_tl(t1, tsrcb, tmask);
+    tcg_gen_add_tl(tdest, t0, t1);
+    tcg_gen_xor_tl(t0, tsrca, tsrcb);
+    tcg_gen_andc_tl(t0, t0, tmask);
+    tcg_gen_xor_tl(tdest, tdest, t0);
 
-    qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s, %s", name,
-                  reg_names[srca], reg_names[srcb]);
-    return TILEGX_EXCP_NONE;
+    tcg_temp_free(t1);
+    tcg_temp_free(t0);
+    tcg_temp_free(tmask);
 }
 
-static TileExcp gen_st_add_opcode(DisasContext *dc, unsigned srca, unsigned srcb,
-                                  int imm, TCGMemOp memop, const char *name)
+/* Similarly for vector subtraction.  */
+static void gen_v12sub(TCGv tdest, TCGv tsrca, TCGv tsrcb, uint64_t sign)
 {
-    TCGv tsrca = load_gr(dc, srca);
-    TCGv tsrcb = load_gr(dc, srcb);
+    TCGv tsign = tcg_const_tl(sign);
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
 
-    tcg_gen_qemu_st_tl(tsrcb, tsrca, dc->mmuidx, memop);
-    tcg_gen_addi_tl(dest_gr(dc, srca), tsrca, imm);
+    /* ((a | sign) - (b & ~sign)) ^ ((a ^ ~b) & sign).  */
+    tcg_gen_or_tl(t0, tsrca, tsign);
+    tcg_gen_andc_tl(t1, tsrcb, tsign);
+    tcg_gen_sub_tl(tdest, t0, t1);
+    tcg_gen_eqv_tl(t0, tsrca, tsrcb);
+    tcg_gen_and_tl(t0, t0, tsign);
+    tcg_gen_xor_tl(tdest, tdest, t0);
 
-    qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s, %s, %d", name,
-                  reg_names[srca], reg_names[srcb], imm);
-    return TILEGX_EXCP_NONE;
+    tcg_temp_free(t1);
+    tcg_temp_free(t0);
+    tcg_temp_free(tsign);
+}
+
+static void gen_v4sh(TCGv d64, TCGv a64, TCGv b64,
+                     void (*generate)(TCGv_i32, TCGv_i32, TCGv_i32))
+{
+    TCGv_i32 al = tcg_temp_new_i32();
+    TCGv_i32 ah = tcg_temp_new_i32();
+    TCGv_i32 bl = tcg_temp_new_i32();
+
+    tcg_gen_extr_i64_i32(al, ah, a64);
+    tcg_gen_extrl_i64_i32(bl, b64);
+    tcg_gen_andi_i32(bl, bl, 31);
+    generate(al, al, bl);
+    generate(ah, ah, bl);
+    tcg_gen_concat_i32_i64(d64, al, ah);
+
+    tcg_temp_free_i32(al);
+    tcg_temp_free_i32(ah);
+    tcg_temp_free_i32(bl);
+}
+
+static void gen_v4op(TCGv d64, TCGv a64, TCGv b64,
+                     void (*generate)(TCGv_i32, TCGv_i32, TCGv_i32))
+{
+    TCGv_i32 al = tcg_temp_new_i32();
+    TCGv_i32 ah = tcg_temp_new_i32();
+    TCGv_i32 bl = tcg_temp_new_i32();
+    TCGv_i32 bh = tcg_temp_new_i32();
+
+    tcg_gen_extr_i64_i32(al, ah, a64);
+    tcg_gen_extr_i64_i32(bl, bh, b64);
+    generate(al, al, bl);
+    generate(ah, ah, bh);
+    tcg_gen_concat_i32_i64(d64, al, ah);
+
+    tcg_temp_free_i32(al);
+    tcg_temp_free_i32(ah);
+    tcg_temp_free_i32(bl);
+    tcg_temp_free_i32(bh);
+}
+
+static TileExcp gen_signal(DisasContext *dc, int signo, int sigcode,
+                           const char *mnemonic)
+{
+    TCGv_i32 t0 = tcg_const_i32(signo);
+    TCGv_i32 t1 = tcg_const_i32(sigcode);
+
+    tcg_gen_st_i32(t0, cpu_env, offsetof(CPUTLGState, signo));
+    tcg_gen_st_i32(t1, cpu_env, offsetof(CPUTLGState, sigcode));
+
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t0);
+
+    qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s", mnemonic);
+    return TILEGX_EXCP_SIGNAL;
+}
+
+static bool parse_from_addli(uint64_t bundle, int *signo, int *sigcode)
+{
+    int imm;
+
+    if ((get_Opcode_X0(bundle) != ADDLI_OPCODE_X0)
+        || (get_Dest_X0(bundle) != TILEGX_R_ZERO)
+        || (get_SrcA_X0(bundle) != TILEGX_R_ZERO)) {
+        return false;
+    }
+
+    imm = get_Imm16_X0(bundle);
+    *signo = imm & 0x3f;
+    *sigcode = (imm >> 6) & 0xf;
+
+    /* ??? The linux kernel validates both signo and the sigcode vs the
+       known max for each signal.  Don't bother here.  */
+    return true;
+}
+
+static TileExcp gen_specill(DisasContext *dc, unsigned dest, unsigned srca,
+                            uint64_t bundle)
+{
+    const char *mnemonic;
+    int signo;
+    int sigcode;
+
+    if (dest == 0x1c && srca == 0x25) {
+        signo = TARGET_SIGTRAP;
+        sigcode = TARGET_TRAP_BRKPT;
+        mnemonic = "bpt";
+    } else if (dest == 0x1d && srca == 0x25
+               && parse_from_addli(bundle, &signo, &sigcode)) {
+        mnemonic = "raise";
+    } else {
+        signo = TARGET_SIGILL;
+        sigcode = TARGET_ILL_ILLOPC;
+        mnemonic = "ill";
+    }
+
+    return gen_signal(dc, signo, sigcode, mnemonic);
 }
 
 static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext,
-                              unsigned dest, unsigned srca)
+                              unsigned dest, unsigned srca, uint64_t bundle)
 {
     TCGv tdest, tsrca;
     const char *mnemonic;
     TCGMemOp memop;
     TileExcp ret = TILEGX_EXCP_NONE;
+    bool prefetch_nofault = false;
 
     /* Eliminate instructions with no output before doing anything else.  */
     switch (opext) {
@@ -368,10 +519,9 @@ static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext,
         mnemonic = "flushwb";
         goto done0;
     case OE_RR_X1(ILL):
+        return gen_specill(dc, dest, srca, bundle);
     case OE_RR_Y1(ILL):
-        mnemonic = (dest == 0x1c && srca == 0x25 ? "bpt" : "ill");
-        qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s", mnemonic);
-        return TILEGX_EXCP_OPCODE_UNKNOWN;
+        return gen_signal(dc, TARGET_SIGILL, TARGET_ILL_ILLOPC, "ill");
     case OE_RR_X1(MF):
         mnemonic = "mf";
         goto done0;
@@ -379,6 +529,15 @@ static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext,
         /* ??? This should yield, especially in system mode.  */
         mnemonic = "nap";
         goto done0;
+    case OE_RR_X1(IRET):
+        gen_helper_ext01_ics(cpu_env);
+        dc->jmp.cond = TCG_COND_ALWAYS;
+        dc->jmp.dest = tcg_temp_new();
+        tcg_gen_ld_tl(dc->jmp.dest, cpu_env,
+                      offsetof(CPUTLGState, spregs[TILEGX_SPR_EX_CONTEXT_0_0]));
+        tcg_gen_andi_tl(dc->jmp.dest, dc->jmp.dest, ~7);
+        mnemonic = "iret";
+        goto done0;
     case OE_RR_X1(SWINT0):
     case OE_RR_X1(SWINT2):
     case OE_RR_X1(SWINT3):
@@ -388,7 +547,7 @@ static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext,
         mnemonic = "swint1";
     done0:
         if (srca || dest) {
-            return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+            return TILEGX_EXCP_OPCODE_UNKNOWN;
         }
         qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s", mnemonic);
         return ret;
@@ -434,7 +593,7 @@ static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext,
         tcg_gen_andi_tl(dc->jmp.dest, load_gr(dc, srca), ~7);
     done1:
         if (dest) {
-            return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+            return TILEGX_EXCP_OPCODE_UNKNOWN;
         }
         qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s", mnemonic, reg_names[srca]);
         return ret;
@@ -456,31 +615,33 @@ static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext,
         break;
     case OE_RR_X0(FSINGLE_PACK1):
     case OE_RR_Y0(FSINGLE_PACK1):
-    case OE_RR_X1(IRET):
         return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RR_X1(LD1S):
         memop = MO_SB;
-        mnemonic = "ld1s";
+        mnemonic = "ld1s"; /* prefetch_l1_fault */
         goto do_load;
     case OE_RR_X1(LD1U):
         memop = MO_UB;
-        mnemonic = "ld1u";
+        mnemonic = "ld1u"; /* prefetch, prefetch_l1 */
+        prefetch_nofault = (dest == TILEGX_R_ZERO);
         goto do_load;
     case OE_RR_X1(LD2S):
         memop = MO_TESW;
-        mnemonic = "ld2s";
+        mnemonic = "ld2s"; /* prefetch_l2_fault */
         goto do_load;
     case OE_RR_X1(LD2U):
         memop = MO_TEUW;
-        mnemonic = "ld2u";
+        mnemonic = "ld2u"; /* prefetch_l2 */
+        prefetch_nofault = (dest == TILEGX_R_ZERO);
         goto do_load;
     case OE_RR_X1(LD4S):
         memop = MO_TESL;
-        mnemonic = "ld4s";
+        mnemonic = "ld4s"; /* prefetch_l3_fault */
         goto do_load;
     case OE_RR_X1(LD4U):
         memop = MO_TEUL;
-        mnemonic = "ld4u";
+        mnemonic = "ld4u"; /* prefetch_l3 */
+        prefetch_nofault = (dest == TILEGX_R_ZERO);
         goto do_load;
     case OE_RR_X1(LDNT1S):
         memop = MO_SB;
@@ -514,7 +675,9 @@ static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext,
         memop = MO_TEQ;
         mnemonic = "ld";
     do_load:
-        tcg_gen_qemu_ld_tl(tdest, tsrca, dc->mmuidx, memop);
+        if (!prefetch_nofault) {
+            tcg_gen_qemu_ld_tl(tdest, tsrca, dc->mmuidx, memop);
+        }
         break;
     case OE_RR_X1(LDNA):
         tcg_gen_andi_tl(tdest, tsrca, ~7);
@@ -524,7 +687,7 @@ static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext,
     case OE_RR_X1(LNK):
     case OE_RR_Y1(LNK):
         if (srca) {
-            return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+            return TILEGX_EXCP_OPCODE_UNKNOWN;
         }
         tcg_gen_movi_tl(tdest, dc->pc + TILEGX_BUNDLE_SIZE_IN_BYTES);
         mnemonic = "lnk";
@@ -546,14 +709,29 @@ static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext,
         break;
     case OE_RR_X0(TBLIDXB0):
     case OE_RR_Y0(TBLIDXB0):
+        tcg_gen_deposit_tl(tdest, load_gr(dc, dest), tsrca, 2, 8);
+        mnemonic = "tblidxb0";
+        break;
     case OE_RR_X0(TBLIDXB1):
     case OE_RR_Y0(TBLIDXB1):
+        tcg_gen_shri_tl(tdest, tsrca, 8);
+        tcg_gen_deposit_tl(tdest, load_gr(dc, dest), tdest, 2, 8);
+        mnemonic = "tblidxb1";
+        break;
     case OE_RR_X0(TBLIDXB2):
     case OE_RR_Y0(TBLIDXB2):
+        tcg_gen_shri_tl(tdest, tsrca, 16);
+        tcg_gen_deposit_tl(tdest, load_gr(dc, dest), tdest, 2, 8);
+        mnemonic = "tblidxb2";
+        break;
     case OE_RR_X0(TBLIDXB3):
     case OE_RR_Y0(TBLIDXB3):
+        tcg_gen_shri_tl(tdest, tsrca, 24);
+        tcg_gen_deposit_tl(tdest, load_gr(dc, dest), tdest, 2, 8);
+        mnemonic = "tblidxb3";
+        break;
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 
     qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s, %s", mnemonic,
@@ -663,15 +841,41 @@ static TileExcp gen_rrr_opcode(DisasContext *dc, unsigned opext,
         mnemonic = "cmpne";
         break;
     case OE_RRR(CMULAF, 0, X0):
+        gen_helper_cmulaf(tdest, load_gr(dc, dest), tsrca, tsrcb);
+        mnemonic = "cmulaf";
+        break;
     case OE_RRR(CMULA, 0, X0):
+        gen_helper_cmula(tdest, load_gr(dc, dest), tsrca, tsrcb);
+        mnemonic = "cmula";
+        break;
     case OE_RRR(CMULFR, 0, X0):
+        gen_cmul2(tdest, tsrca, tsrcb, 15, 1 << 14);
+        mnemonic = "cmulfr";
+        break;
     case OE_RRR(CMULF, 0, X0):
+        gen_cmul2(tdest, tsrca, tsrcb, 15, 0);
+        mnemonic = "cmulf";
+        break;
     case OE_RRR(CMULHR, 0, X0):
+        gen_cmul2(tdest, tsrca, tsrcb, 16, 1 << 15);
+        mnemonic = "cmulhr";
+        break;
     case OE_RRR(CMULH, 0, X0):
+        gen_cmul2(tdest, tsrca, tsrcb, 16, 0);
+        mnemonic = "cmulh";
+        break;
     case OE_RRR(CMUL, 0, X0):
+        gen_helper_cmula(tdest, load_zero(dc), tsrca, tsrcb);
+        mnemonic = "cmul";
+        break;
     case OE_RRR(CRC32_32, 0, X0):
+        gen_helper_crc32_32(tdest, tsrca, tsrcb);
+        mnemonic = "crc32_32";
+        break;
     case OE_RRR(CRC32_8, 0, X0):
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        gen_helper_crc32_8(tdest, tsrca, tsrcb);
+        mnemonic = "crc32_8";
+        break;
     case OE_RRR(DBLALIGN2, 0, X0):
     case OE_RRR(DBLALIGN2, 0, X1):
         gen_dblaligni(tdest, tsrca, tsrcb, 16);
@@ -1024,8 +1228,12 @@ static TileExcp gen_rrr_opcode(DisasContext *dc, unsigned opext,
         break;
     case OE_RRR(V1ADDUC, 0, X0):
     case OE_RRR(V1ADDUC, 0, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V1ADD, 0, X0):
     case OE_RRR(V1ADD, 0, X1):
+        gen_v12add(tdest, tsrca, tsrcb, V1_IMM(0x80));
+        mnemonic = "v1add";
+        break;
     case OE_RRR(V1ADIFFU, 0, X0):
     case OE_RRR(V1AVGU, 0, X0):
         return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
@@ -1060,17 +1268,28 @@ static TileExcp gen_rrr_opcode(DisasContext *dc, unsigned opext,
     case OE_RRR(V1DOTPUS, 0, X0):
     case OE_RRR(V1DOTPU, 0, X0):
     case OE_RRR(V1DOTP, 0, X0):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V1INT_H, 0, X0):
     case OE_RRR(V1INT_H, 0, X1):
+        gen_helper_v1int_h(tdest, tsrca, tsrcb);
+        mnemonic = "v1int_h";
+        break;
     case OE_RRR(V1INT_L, 0, X0):
     case OE_RRR(V1INT_L, 0, X1):
+        gen_helper_v1int_l(tdest, tsrca, tsrcb);
+        mnemonic = "v1int_l";
+        break;
     case OE_RRR(V1MAXU, 0, X0):
     case OE_RRR(V1MAXU, 0, X1):
     case OE_RRR(V1MINU, 0, X0):
     case OE_RRR(V1MINU, 0, X1):
     case OE_RRR(V1MNZ, 0, X0):
     case OE_RRR(V1MNZ, 0, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V1MULTU, 0, X0):
+        gen_helper_v1multu(tdest, tsrca, tsrcb);
+        mnemonic = "v1multu";
+        break;
     case OE_RRR(V1MULUS, 0, X0):
     case OE_RRR(V1MULU, 0, X0):
     case OE_RRR(V1MZ, 0, X0):
@@ -1095,12 +1314,20 @@ static TileExcp gen_rrr_opcode(DisasContext *dc, unsigned opext,
         break;
     case OE_RRR(V1SUBUC, 0, X0):
     case OE_RRR(V1SUBUC, 0, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V1SUB, 0, X0):
     case OE_RRR(V1SUB, 0, X1):
+        gen_v12sub(tdest, tsrca, tsrcb, V1_IMM(0x80));
+        mnemonic = "v1sub";
+        break;
     case OE_RRR(V2ADDSC, 0, X0):
     case OE_RRR(V2ADDSC, 0, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V2ADD, 0, X0):
     case OE_RRR(V2ADD, 0, X1):
+        gen_v12add(tdest, tsrca, tsrcb, V2_IMM(0x8000));
+        mnemonic = "v2add";
+        break;
     case OE_RRR(V2ADIFFS, 0, X0):
     case OE_RRR(V2AVGS, 0, X0):
     case OE_RRR(V2CMPEQ, 0, X0):
@@ -1117,10 +1344,17 @@ static TileExcp gen_rrr_opcode(DisasContext *dc, unsigned opext,
     case OE_RRR(V2CMPNE, 0, X1):
     case OE_RRR(V2DOTPA, 0, X0):
     case OE_RRR(V2DOTP, 0, X0):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V2INT_H, 0, X0):
     case OE_RRR(V2INT_H, 0, X1):
+        gen_helper_v2int_h(tdest, tsrca, tsrcb);
+        mnemonic = "v2int_h";
+        break;
     case OE_RRR(V2INT_L, 0, X0):
     case OE_RRR(V2INT_L, 0, X1):
+        gen_helper_v2int_l(tdest, tsrca, tsrcb);
+        mnemonic = "v2int_l";
+        break;
     case OE_RRR(V2MAXS, 0, X0):
     case OE_RRR(V2MAXS, 0, X1):
     case OE_RRR(V2MINS, 0, X0):
@@ -1129,7 +1363,11 @@ static TileExcp gen_rrr_opcode(DisasContext *dc, unsigned opext,
     case OE_RRR(V2MNZ, 0, X1):
     case OE_RRR(V2MULFSC, 0, X0):
     case OE_RRR(V2MULS, 0, X0):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V2MULTS, 0, X0):
+        gen_helper_v2mults(tdest, tsrca, tsrcb);
+        mnemonic = "v2mults";
+        break;
     case OE_RRR(V2MZ, 0, X0):
     case OE_RRR(V2MZ, 0, X1):
     case OE_RRR(V2PACKH, 0, X0):
@@ -1144,21 +1382,38 @@ static TileExcp gen_rrr_opcode(DisasContext *dc, unsigned opext,
     case OE_RRR(V2SADU, 0, X0):
     case OE_RRR(V2SHLSC, 0, X0):
     case OE_RRR(V2SHLSC, 0, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V2SHL, 0, X0):
     case OE_RRR(V2SHL, 0, X1):
+        gen_helper_v2shl(tdest, tsrca, tsrcb);
+        mnemonic = "v2shl";
+        break;
     case OE_RRR(V2SHRS, 0, X0):
     case OE_RRR(V2SHRS, 0, X1):
+        gen_helper_v2shrs(tdest, tsrca, tsrcb);
+        mnemonic = "v2shrs";
+        break;
     case OE_RRR(V2SHRU, 0, X0):
     case OE_RRR(V2SHRU, 0, X1):
+        gen_helper_v2shru(tdest, tsrca, tsrcb);
+        mnemonic = "v2shru";
+        break;
     case OE_RRR(V2SUBSC, 0, X0):
     case OE_RRR(V2SUBSC, 0, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V2SUB, 0, X0):
     case OE_RRR(V2SUB, 0, X1):
+        gen_v12sub(tdest, tsrca, tsrcb, V2_IMM(0x8000));
+        mnemonic = "v2sub";
+        break;
     case OE_RRR(V4ADDSC, 0, X0):
     case OE_RRR(V4ADDSC, 0, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V4ADD, 0, X0):
     case OE_RRR(V4ADD, 0, X1):
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        gen_v4op(tdest, tsrca, tsrcb, tcg_gen_add_i32);
+        mnemonic = "v4add";
+        break;
     case OE_RRR(V4INT_H, 0, X0):
     case OE_RRR(V4INT_H, 0, X1):
         tcg_gen_shri_tl(tdest, tsrcb, 32);
@@ -1174,17 +1429,30 @@ static TileExcp gen_rrr_opcode(DisasContext *dc, unsigned opext,
     case OE_RRR(V4PACKSC, 0, X1):
     case OE_RRR(V4SHLSC, 0, X0):
     case OE_RRR(V4SHLSC, 0, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V4SHL, 0, X0):
     case OE_RRR(V4SHL, 0, X1):
+        gen_v4sh(tdest, tsrca, tsrcb, tcg_gen_shl_i32);
+        mnemonic = "v4shl";
+        break;
     case OE_RRR(V4SHRS, 0, X0):
     case OE_RRR(V4SHRS, 0, X1):
+        gen_v4sh(tdest, tsrca, tsrcb, tcg_gen_sar_i32);
+        mnemonic = "v4shrs";
+        break;
     case OE_RRR(V4SHRU, 0, X0):
     case OE_RRR(V4SHRU, 0, X1):
+        gen_v4sh(tdest, tsrca, tsrcb, tcg_gen_shr_i32);
+        mnemonic = "v4shru";
+        break;
     case OE_RRR(V4SUBSC, 0, X0):
     case OE_RRR(V4SUBSC, 0, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_RRR(V4SUB, 0, X0):
     case OE_RRR(V4SUB, 0, X1):
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        gen_v4op(tdest, tsrca, tsrcb, tcg_gen_sub_i32);
+        mnemonic = "v2sub";
+        break;
     case OE_RRR(XOR, 0, X0):
     case OE_RRR(XOR, 0, X1):
     case OE_RRR(XOR, 5, Y0):
@@ -1193,7 +1461,7 @@ static TileExcp gen_rrr_opcode(DisasContext *dc, unsigned opext,
         mnemonic = "xor";
         break;
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 
     qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s, %s, %s", mnemonic,
@@ -1206,6 +1474,7 @@ static TileExcp gen_rri_opcode(DisasContext *dc, unsigned opext,
 {
     TCGv tdest = dest_gr(dc, dest);
     TCGv tsrca = load_gr(dc, srca);
+    bool prefetch_nofault = false;
     const char *mnemonic;
     TCGMemOp memop;
     int i2, i3;
@@ -1255,27 +1524,30 @@ static TileExcp gen_rri_opcode(DisasContext *dc, unsigned opext,
         break;
     case OE_IM(LD1S_ADD, X1):
         memop = MO_SB;
-        mnemonic = "ld1s_add";
+        mnemonic = "ld1s_add"; /* prefetch_add_l1_fault */
         goto do_load_add;
     case OE_IM(LD1U_ADD, X1):
         memop = MO_UB;
-        mnemonic = "ld1u_add";
+        mnemonic = "ld1u_add"; /* prefetch_add_l1 */
+        prefetch_nofault = (dest == TILEGX_R_ZERO);
         goto do_load_add;
     case OE_IM(LD2S_ADD, X1):
         memop = MO_TESW;
-        mnemonic = "ld2s_add";
+        mnemonic = "ld2s_add"; /* prefetch_add_l2_fault */
         goto do_load_add;
     case OE_IM(LD2U_ADD, X1):
         memop = MO_TEUW;
-        mnemonic = "ld2u_add";
+        mnemonic = "ld2u_add"; /* prefetch_add_l2 */
+        prefetch_nofault = (dest == TILEGX_R_ZERO);
         goto do_load_add;
     case OE_IM(LD4S_ADD, X1):
         memop = MO_TESL;
-        mnemonic = "ld4s_add";
+        mnemonic = "ld4s_add"; /* prefetch_add_l3_fault */
         goto do_load_add;
     case OE_IM(LD4U_ADD, X1):
         memop = MO_TEUL;
-        mnemonic = "ld4u_add";
+        mnemonic = "ld4u_add"; /* prefetch_add_l3 */
+        prefetch_nofault = (dest == TILEGX_R_ZERO);
         goto do_load_add;
     case OE_IM(LDNT1S_ADD, X1):
         memop = MO_SB;
@@ -1307,9 +1579,11 @@ static TileExcp gen_rri_opcode(DisasContext *dc, unsigned opext,
         goto do_load_add;
     case OE_IM(LD_ADD, X1):
         memop = MO_TEQ;
-        mnemonic = "ldnt_add";
+        mnemonic = "ld_add";
     do_load_add:
-        tcg_gen_qemu_ld_tl(tdest, tsrca, dc->mmuidx, memop);
+        if (!prefetch_nofault) {
+            tcg_gen_qemu_ld_tl(tdest, tsrca, dc->mmuidx, memop);
+        }
         tcg_gen_addi_tl(dest_gr(dc, srca), tsrca, imm);
         break;
     case OE_IM(LDNA_ADD, X1):
@@ -1325,6 +1599,11 @@ static TileExcp gen_rri_opcode(DisasContext *dc, unsigned opext,
         break;
     case OE_IM(V1ADDI, X0):
     case OE_IM(V1ADDI, X1):
+        t0 = tcg_const_tl(V1_IMM(imm));
+        gen_v12add(tdest, tsrca, t0, V1_IMM(0x80));
+        tcg_temp_free(t0);
+        mnemonic = "v1addi";
+        break;
     case OE_IM(V1CMPEQI, X0):
     case OE_IM(V1CMPEQI, X1):
         tcg_gen_xori_tl(tdest, tsrca, V1_IMM(imm));
@@ -1339,8 +1618,14 @@ static TileExcp gen_rri_opcode(DisasContext *dc, unsigned opext,
     case OE_IM(V1MAXUI, X1):
     case OE_IM(V1MINUI, X0):
     case OE_IM(V1MINUI, X1):
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     case OE_IM(V2ADDI, X0):
     case OE_IM(V2ADDI, X1):
+        t0 = tcg_const_tl(V2_IMM(imm));
+        gen_v12add(tdest, tsrca, t0, V2_IMM(0x8000));
+        tcg_temp_free(t0);
+        mnemonic = "v2addi";
+        break;
     case OE_IM(V2CMPEQI, X0):
     case OE_IM(V2CMPEQI, X1):
     case OE_IM(V2CMPLTSI, X0):
@@ -1427,11 +1712,27 @@ static TileExcp gen_rri_opcode(DisasContext *dc, unsigned opext,
         break;
     case OE_SH(V2SHLI, X0):
     case OE_SH(V2SHLI, X1):
+        i2 = imm & 15;
+        i3 = 0xffff >> i2;
+        tcg_gen_andi_tl(tdest, tsrca, V2_IMM(i3));
+        tcg_gen_shli_tl(tdest, tdest, i2);
+        mnemonic = "v2shli";
+        break;
     case OE_SH(V2SHRSI, X0):
     case OE_SH(V2SHRSI, X1):
+        t0 = tcg_const_tl(imm & 15);
+        gen_helper_v2shrs(tdest, tsrca, t0);
+        tcg_temp_free(t0);
+        mnemonic = "v2shrsi";
+        break;
     case OE_SH(V2SHRUI, X0):
     case OE_SH(V2SHRUI, X1):
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        i2 = imm & 15;
+        i3 = (0xffff << i2) & 0xffff;
+        tcg_gen_andi_tl(tdest, tsrca, V2_IMM(i3));
+        tcg_gen_shri_tl(tdest, tdest, i2);
+        mnemonic = "v2shrui";
+        break;
 
     case OE(ADDLI_OPCODE_X0, 0, X0):
     case OE(ADDLI_OPCODE_X1, 0, X1):
@@ -1452,7 +1753,7 @@ static TileExcp gen_rri_opcode(DisasContext *dc, unsigned opext,
         break;
 
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 
     qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s, %s, %d", mnemonic,
@@ -1546,7 +1847,7 @@ static TileExcp gen_bf_opcode_x0(DisasContext *dc, unsigned ext,
         break;
 
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 
     qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s, %s, %u, %u", mnemonic,
@@ -1602,7 +1903,7 @@ static TileExcp gen_branch_opcode_x1(DisasContext *dc, unsigned ext,
         mnemonic = "blbs";
         break;
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
@@ -1654,6 +1955,10 @@ static const TileSPR *find_spr(unsigned spr)
       offsetof(CPUTLGState, spregs[TILEGX_SPR_CRITICAL_SEC]), 0, 0)
     D(SIM_CONTROL,
       offsetof(CPUTLGState, spregs[TILEGX_SPR_SIM_CONTROL]), 0, 0)
+    D(EX_CONTEXT_0_0,
+      offsetof(CPUTLGState, spregs[TILEGX_SPR_EX_CONTEXT_0_0]), 0, 0)
+    D(EX_CONTEXT_0_1,
+      offsetof(CPUTLGState, spregs[TILEGX_SPR_EX_CONTEXT_0_1]), 0, 0)
     }
 
 #undef D
@@ -1669,7 +1974,7 @@ static TileExcp gen_mtspr_x1(DisasContext *dc, unsigned spr, unsigned srca)
 
     if (def == NULL) {
         qemu_log_mask(CPU_LOG_TB_IN_ASM, "mtspr spr[%u], %s", spr, reg_names[srca]);
-        return TILEGX_EXCP_OPCODE_UNKNOWN;
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     }
 
     tsrca = load_gr(dc, srca);
@@ -1689,7 +1994,7 @@ static TileExcp gen_mfspr_x1(DisasContext *dc, unsigned dest, unsigned spr)
 
     if (def == NULL) {
         qemu_log_mask(CPU_LOG_TB_IN_ASM, "mtspr %s, spr[%u]", reg_names[dest], spr);
-        return TILEGX_EXCP_OPCODE_UNKNOWN;
+        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
     }
 
     tdest = dest_gr(dc, dest);
@@ -1715,7 +2020,7 @@ static TileExcp decode_y0(DisasContext *dc, tilegx_bundle_bits bundle)
     case RRR_1_OPCODE_Y0:
         if (ext == UNARY_RRR_1_OPCODE_Y0) {
             ext = get_UnaryOpcodeExtension_Y0(bundle);
-            return gen_rr_opcode(dc, OE(opc, ext, Y0), dest, srca);
+            return gen_rr_opcode(dc, OE(opc, ext, Y0), dest, srca, bundle);
         }
         /* fallthru */
     case RRR_0_OPCODE_Y0:
@@ -1744,7 +2049,7 @@ static TileExcp decode_y0(DisasContext *dc, tilegx_bundle_bits bundle)
         return gen_rri_opcode(dc, OE(opc, 0, Y0), dest, srca, imm);
 
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 }
 
@@ -1761,7 +2066,7 @@ static TileExcp decode_y1(DisasContext *dc, tilegx_bundle_bits bundle)
     case RRR_1_OPCODE_Y1:
         if (ext == UNARY_RRR_1_OPCODE_Y0) {
             ext = get_UnaryOpcodeExtension_Y1(bundle);
-            return gen_rr_opcode(dc, OE(opc, ext, Y1), dest, srca);
+            return gen_rr_opcode(dc, OE(opc, ext, Y1), dest, srca, bundle);
         }
         /* fallthru */
     case RRR_0_OPCODE_Y1:
@@ -1788,7 +2093,7 @@ static TileExcp decode_y1(DisasContext *dc, tilegx_bundle_bits bundle)
         return gen_rri_opcode(dc, OE(opc, 0, Y1), dest, srca, imm);
 
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 }
 
@@ -1846,7 +2151,7 @@ static TileExcp decode_y2(DisasContext *dc, tilegx_bundle_bits bundle)
         return gen_st_opcode(dc, 0, srca, srcbdest, MO_TEQ, "st");
 
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 }
 
@@ -1863,7 +2168,7 @@ static TileExcp decode_x0(DisasContext *dc, tilegx_bundle_bits bundle)
         ext = get_RRROpcodeExtension_X0(bundle);
         if (ext == UNARY_RRR_0_OPCODE_X0) {
             ext = get_UnaryOpcodeExtension_X0(bundle);
-            return gen_rr_opcode(dc, OE(opc, ext, X0), dest, srca);
+            return gen_rr_opcode(dc, OE(opc, ext, X0), dest, srca, bundle);
         }
         srcb = get_SrcB_X0(bundle);
         return gen_rrr_opcode(dc, OE(opc, ext, X0), dest, srca, srcb);
@@ -1891,7 +2196,7 @@ static TileExcp decode_x0(DisasContext *dc, tilegx_bundle_bits bundle)
         return gen_rri_opcode(dc, OE(opc, 0, X0), dest, srca, imm);
 
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 }
 
@@ -1910,7 +2215,7 @@ static TileExcp decode_x1(DisasContext *dc, tilegx_bundle_bits bundle)
         switch (ext) {
         case UNARY_RRR_0_OPCODE_X1:
             ext = get_UnaryOpcodeExtension_X1(bundle);
-            return gen_rr_opcode(dc, OE(opc, ext, X1), dest, srca);
+            return gen_rr_opcode(dc, OE(opc, ext, X1), dest, srca, bundle);
         case ST1_RRR_0_OPCODE_X1:
             return gen_st_opcode(dc, dest, srca, srcb, MO_UB, "st1");
         case ST2_RRR_0_OPCODE_X1:
@@ -1981,7 +2286,7 @@ static TileExcp decode_x1(DisasContext *dc, tilegx_bundle_bits bundle)
         return gen_rri_opcode(dc, OE(opc, 0, X1), dest, srca, imm);
 
     default:
-        return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+        return TILEGX_EXCP_OPCODE_UNKNOWN;
     }
 }
 
@@ -1992,8 +2297,15 @@ static void notice_excp(DisasContext *dc, uint64_t bundle,
         return;
     }
     gen_exception(dc, excp);
-    if (excp == TILEGX_EXCP_OPCODE_UNIMPLEMENTED) {
+    switch (excp) {
+    case TILEGX_EXCP_OPCODE_UNIMPLEMENTED:
         qemu_log_mask(LOG_UNIMP, "UNIMP %s, [" FMT64X "]\n", type, bundle);
+        break;
+    case TILEGX_EXCP_OPCODE_UNKNOWN:
+        qemu_log_mask(LOG_UNIMP, "UNKNOWN %s, [" FMT64X "]\n", type, bundle);
+        break;
+    default:
+        break;
     }
 }