summary refs log tree commit diff stats
path: root/target-mips
diff options
context:
space:
mode:
Diffstat (limited to 'target-mips')
-rw-r--r--target-mips/Makefile.objs2
-rw-r--r--target-mips/TODO3
-rw-r--r--target-mips/cpu.h23
-rw-r--r--target-mips/dsp_helper.c4033
-rw-r--r--target-mips/helper.c3
-rw-r--r--target-mips/helper.h361
-rw-r--r--target-mips/op_helper.c824
-rw-r--r--target-mips/translate.c3177
-rw-r--r--target-mips/translate_init.c52
9 files changed, 7709 insertions, 769 deletions
diff --git a/target-mips/Makefile.objs b/target-mips/Makefile.objs
index 3eeeeac8b8..119c816518 100644
--- a/target-mips/Makefile.objs
+++ b/target-mips/Makefile.objs
@@ -1,2 +1,2 @@
-obj-y += translate.o op_helper.o lmi_helper.o helper.o cpu.o
+obj-y += translate.o dsp_helper.o op_helper.o lmi_helper.o helper.o cpu.o
 obj-$(CONFIG_SOFTMMU) += machine.o
diff --git a/target-mips/TODO b/target-mips/TODO
index 2a3546f624..1d782d8027 100644
--- a/target-mips/TODO
+++ b/target-mips/TODO
@@ -6,8 +6,7 @@ General
 - Unimplemented ASEs:
   - MDMX
   - SmartMIPS
-  - DSP r1
-  - DSP r2
+  - microMIPS DSP r1 & r2 encodings
 - MT ASE only partially implemented and not functional
 - Shadow register support only partially implemented,
   lacks set switching on interrupt/exception.
diff --git a/target-mips/cpu.h b/target-mips/cpu.h
index 38943dec10..aebb2d5b79 100644
--- a/target-mips/cpu.h
+++ b/target-mips/cpu.h
@@ -415,7 +415,7 @@ struct CPUMIPSState {
     int error_code;
     uint32_t hflags;    /* CPU State */
     /* TMASK defines different execution modes */
-#define MIPS_HFLAG_TMASK  0x007FF
+#define MIPS_HFLAG_TMASK  0xC07FF
 #define MIPS_HFLAG_MODE   0x00007 /* execution modes                    */
     /* The KSU flags must be the lowest bits in hflags. The flag order
        must be the same as defined for CP0 Status. This allows to use
@@ -453,6 +453,9 @@ struct CPUMIPSState {
 #define MIPS_HFLAG_BDS32  0x10000 /* branch requires 32-bit delay slot  */
 #define MIPS_HFLAG_BX     0x20000 /* branch exchanges execution mode    */
 #define MIPS_HFLAG_BMASK  (MIPS_HFLAG_BMASK_BASE | MIPS_HFLAG_BMASK_EXT)
+    /* MIPS DSP resources access. */
+#define MIPS_HFLAG_DSP   0x40000  /* Enable access to MIPS DSP resources. */
+#define MIPS_HFLAG_DSPR2 0x80000  /* Enable access to MIPS DSPR2 resources. */
     target_ulong btarget;        /* Jump / branch target               */
     target_ulong bcond;          /* Branch condition (if needed)       */
 
@@ -610,8 +613,9 @@ enum {
     EXCP_MDMX,
     EXCP_C2E,
     EXCP_CACHE, /* 32 */
+    EXCP_DSPDIS,
 
-    EXCP_LAST = EXCP_CACHE,
+    EXCP_LAST = EXCP_DSPDIS,
 };
 /* Dummy exception for conditional stores.  */
 #define EXCP_SC 0x100
@@ -773,6 +777,21 @@ static inline void compute_hflags(CPUMIPSState *env)
     if (env->CP0_Status & (1 << CP0St_FR)) {
         env->hflags |= MIPS_HFLAG_F64;
     }
+    if (env->insn_flags & ASE_DSPR2) {
+        /* Enables access MIPS DSP resources, now our cpu is DSP ASER2,
+           so enable to access DSPR2 resources. */
+        if (env->CP0_Status & (1 << CP0St_MX)) {
+            env->hflags |= MIPS_HFLAG_DSP | MIPS_HFLAG_DSPR2;
+        }
+
+    } else if (env->insn_flags & ASE_DSP) {
+        /* Enables access MIPS DSP resources, now our cpu is DSP ASE,
+           so enable to access DSP resources. */
+        if (env->CP0_Status & (1 << CP0St_MX)) {
+            env->hflags |= MIPS_HFLAG_DSP;
+        }
+
+    }
     if (env->insn_flags & ISA_MIPS32R2) {
         if (env->active_fpu.fcr0 & (1 << FCR0_F64)) {
             env->hflags |= MIPS_HFLAG_COP1X;
diff --git a/target-mips/dsp_helper.c b/target-mips/dsp_helper.c
new file mode 100644
index 0000000000..b59133ea8f
--- /dev/null
+++ b/target-mips/dsp_helper.c
@@ -0,0 +1,4033 @@
+/*
+ * MIPS ASE DSP Instruction emulation helpers for QEMU.
+ *
+ * Copyright (c) 2012  Jia Liu <proljc@gmail.com>
+ *                     Dongxue Zhang <elat.era@gmail.com>
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "cpu.h"
+#include "helper.h"
+
+/*** MIPS DSP internal functions begin ***/
+#define MIPSDSP_ABS(x) (((x) >= 0) ? x : -x)
+#define MIPSDSP_OVERFLOW(a, b, c, d) (!(!((a ^ b ^ -1) & (a ^ c) & d)))
+
+static inline void set_DSPControl_overflow_flag(uint32_t flag, int position,
+                                                CPUMIPSState *env)
+{
+    env->active_tc.DSPControl |= (target_ulong)flag << position;
+}
+
+static inline void set_DSPControl_carryflag(uint32_t flag, CPUMIPSState *env)
+{
+    env->active_tc.DSPControl |= (target_ulong)flag << 13;
+}
+
+static inline uint32_t get_DSPControl_carryflag(CPUMIPSState *env)
+{
+    return (env->active_tc.DSPControl >> 13) & 0x01;
+}
+
+static inline void set_DSPControl_24(uint32_t flag, int len, CPUMIPSState *env)
+{
+  uint32_t filter;
+
+  filter = ((0x01 << len) - 1) << 24;
+  filter = ~filter;
+
+  env->active_tc.DSPControl &= filter;
+  env->active_tc.DSPControl |= (target_ulong)flag << 24;
+}
+
+static inline uint32_t get_DSPControl_24(int len, CPUMIPSState *env)
+{
+  uint32_t filter;
+
+  filter = (0x01 << len) - 1;
+
+  return (env->active_tc.DSPControl >> 24) & filter;
+}
+
+static inline void set_DSPControl_pos(uint32_t pos, CPUMIPSState *env)
+{
+    target_ulong dspc;
+
+    dspc = env->active_tc.DSPControl;
+#ifndef TARGET_MIPS64
+    dspc = dspc & 0xFFFFFFC0;
+    dspc |= pos;
+#else
+    dspc = dspc & 0xFFFFFF80;
+    dspc |= pos;
+#endif
+    env->active_tc.DSPControl = dspc;
+}
+
+static inline uint32_t get_DSPControl_pos(CPUMIPSState *env)
+{
+    target_ulong dspc;
+    uint32_t pos;
+
+    dspc = env->active_tc.DSPControl;
+
+#ifndef TARGET_MIPS64
+    pos = dspc & 0x3F;
+#else
+    pos = dspc & 0x7F;
+#endif
+
+    return pos;
+}
+
+static inline void set_DSPControl_efi(uint32_t flag, CPUMIPSState *env)
+{
+    env->active_tc.DSPControl &= 0xFFFFBFFF;
+    env->active_tc.DSPControl |= (target_ulong)flag << 14;
+}
+
+#define DO_MIPS_SAT_ABS(size)                                          \
+static inline int##size##_t mipsdsp_sat_abs##size(int##size##_t a,         \
+                                                  CPUMIPSState *env)   \
+{                                                                      \
+    if (a == INT##size##_MIN) {                                        \
+        set_DSPControl_overflow_flag(1, 20, env);                      \
+        return INT##size##_MAX;                                        \
+    } else {                                                           \
+        return MIPSDSP_ABS(a);                                         \
+    }                                                                  \
+}
+DO_MIPS_SAT_ABS(8)
+DO_MIPS_SAT_ABS(16)
+DO_MIPS_SAT_ABS(32)
+#undef DO_MIPS_SAT_ABS
+
+/* get sum value */
+static inline int16_t mipsdsp_add_i16(int16_t a, int16_t b, CPUMIPSState *env)
+{
+    int16_t tempI;
+
+    tempI = a + b;
+
+    if (MIPSDSP_OVERFLOW(a, b, tempI, 0x8000)) {
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return tempI;
+}
+
+static inline int16_t mipsdsp_sat_add_i16(int16_t a, int16_t b,
+                                          CPUMIPSState *env)
+{
+    int16_t tempS;
+
+    tempS = a + b;
+
+    if (MIPSDSP_OVERFLOW(a, b, tempS, 0x8000)) {
+        if (a > 0) {
+            tempS = 0x7FFF;
+        } else {
+            tempS = 0x8000;
+        }
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return tempS;
+}
+
+static inline int32_t mipsdsp_sat_add_i32(int32_t a, int32_t b,
+                                          CPUMIPSState *env)
+{
+    int32_t tempI;
+
+    tempI = a + b;
+
+    if (MIPSDSP_OVERFLOW(a, b, tempI, 0x80000000)) {
+        if (a > 0) {
+            tempI = 0x7FFFFFFF;
+        } else {
+            tempI = 0x80000000;
+        }
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return tempI;
+}
+
+static inline uint8_t mipsdsp_add_u8(uint8_t a, uint8_t b, CPUMIPSState *env)
+{
+    uint16_t temp;
+
+    temp = (uint16_t)a + (uint16_t)b;
+
+    if (temp & 0x0100) {
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return temp & 0xFF;
+}
+
+static inline uint16_t mipsdsp_add_u16(uint16_t a, uint16_t b,
+                                       CPUMIPSState *env)
+{
+    uint32_t temp;
+
+    temp = (uint32_t)a + (uint32_t)b;
+
+    if (temp & 0x00010000) {
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return temp & 0xFFFF;
+}
+
+static inline uint8_t mipsdsp_sat_add_u8(uint8_t a, uint8_t b,
+                                         CPUMIPSState *env)
+{
+    uint8_t  result;
+    uint16_t temp;
+
+    temp = (uint16_t)a + (uint16_t)b;
+    result = temp & 0xFF;
+
+    if (0x0100 & temp) {
+        result = 0xFF;
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return result;
+}
+
+static inline uint16_t mipsdsp_sat_add_u16(uint16_t a, uint16_t b,
+                                           CPUMIPSState *env)
+{
+    uint16_t result;
+    uint32_t temp;
+
+    temp = (uint32_t)a + (uint32_t)b;
+    result = temp & 0xFFFF;
+
+    if (0x00010000 & temp) {
+        result = 0xFFFF;
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return result;
+}
+
+static inline int32_t mipsdsp_sat32_acc_q31(int32_t acc, int32_t a,
+                                            CPUMIPSState *env)
+{
+    int64_t temp;
+    int32_t temp32, temp31, result;
+    int64_t temp_sum;
+
+#ifndef TARGET_MIPS64
+    temp = ((uint64_t)env->active_tc.HI[acc] << 32) |
+           (uint64_t)env->active_tc.LO[acc];
+#else
+    temp = (uint64_t)env->active_tc.LO[acc];
+#endif
+
+    temp_sum = (int64_t)a + temp;
+
+    temp32 = (temp_sum >> 32) & 0x01;
+    temp31 = (temp_sum >> 31) & 0x01;
+    result = temp_sum & 0xFFFFFFFF;
+
+    /* FIXME
+       This sat function may wrong, because user manual wrote:
+       temp127..0 ← temp + ( (signA) || a31..0
+       if ( temp32 ≠ temp31 ) then
+           if ( temp32 = 0 ) then
+               temp31..0 ← 0x80000000
+           else
+                temp31..0 ← 0x7FFFFFFF
+           endif
+           DSPControlouflag:16+acc ← 1
+       endif
+     */
+    if (temp32 != temp31) {
+        if (temp32 == 0) {
+            result = 0x7FFFFFFF;
+        } else {
+            result = 0x80000000;
+        }
+        set_DSPControl_overflow_flag(1, 16 + acc, env);
+    }
+
+    return result;
+}
+
+/* a[0] is LO, a[1] is HI. */
+static inline void mipsdsp_sat64_acc_add_q63(int64_t *ret,
+                                             int32_t ac,
+                                             int64_t *a,
+                                             CPUMIPSState *env)
+{
+    bool temp64;
+
+    ret[0] = env->active_tc.LO[ac] + a[0];
+    ret[1] = env->active_tc.HI[ac] + a[1];
+
+    if (((uint64_t)ret[0] < (uint64_t)env->active_tc.LO[ac]) &&
+        ((uint64_t)ret[0] < (uint64_t)a[0])) {
+        ret[1] += 1;
+    }
+    temp64 = ret[1] & 1;
+    if (temp64 != ((ret[0] >> 63) & 0x01)) {
+        if (temp64) {
+            ret[0] = (0x01ull << 63);
+            ret[1] = ~0ull;
+        } else {
+            ret[0] = (0x01ull << 63) - 1;
+            ret[1] = 0x00;
+        }
+        set_DSPControl_overflow_flag(1, 16 + ac, env);
+    }
+}
+
+static inline void mipsdsp_sat64_acc_sub_q63(int64_t *ret,
+                                             int32_t ac,
+                                             int64_t *a,
+                                             CPUMIPSState *env)
+{
+    bool temp64;
+
+    ret[0] = env->active_tc.LO[ac] - a[0];
+    ret[1] = env->active_tc.HI[ac] - a[1];
+
+    if ((uint64_t)ret[0] > (uint64_t)env->active_tc.LO[ac]) {
+        ret[1] -= 1;
+    }
+    temp64 = ret[1] & 1;
+    if (temp64 != ((ret[0] >> 63) & 0x01)) {
+        if (temp64) {
+            ret[0] = (0x01ull << 63);
+            ret[1] = ~0ull;
+        } else {
+            ret[0] = (0x01ull << 63) - 1;
+            ret[1] = 0x00;
+        }
+        set_DSPControl_overflow_flag(1, 16 + ac, env);
+    }
+}
+
+static inline int32_t mipsdsp_mul_i16_i16(int16_t a, int16_t b,
+                                          CPUMIPSState *env)
+{
+    int32_t temp;
+
+    temp = (int32_t)a * (int32_t)b;
+
+    if ((temp > (int)0x7FFF) || (temp < (int)0xFFFF8000)) {
+        set_DSPControl_overflow_flag(1, 21, env);
+    }
+    temp &= 0x0000FFFF;
+
+    return temp;
+}
+
+static inline int32_t mipsdsp_mul_u16_u16(int32_t a, int32_t b)
+{
+    return a * b;
+}
+
+static inline int32_t mipsdsp_mul_i32_i32(int32_t a, int32_t b)
+{
+    return a * b;
+}
+
+static inline int32_t mipsdsp_sat16_mul_i16_i16(int16_t a, int16_t b,
+                                                CPUMIPSState *env)
+{
+    int32_t temp;
+
+    temp = (int32_t)a * (int32_t)b;
+
+    if (temp > (int)0x7FFF) {
+        temp = 0x00007FFF;
+        set_DSPControl_overflow_flag(1, 21, env);
+    } else if (temp < (int)0xffff8000) {
+        temp = 0xFFFF8000;
+        set_DSPControl_overflow_flag(1, 21, env);
+    }
+    temp &= 0x0000FFFF;
+
+    return temp;
+}
+
+static inline int32_t mipsdsp_mul_q15_q15_overflowflag21(uint16_t a, uint16_t b,
+                                                         CPUMIPSState *env)
+{
+    int32_t temp;
+
+    if ((a == 0x8000) && (b == 0x8000)) {
+        temp = 0x7FFFFFFF;
+        set_DSPControl_overflow_flag(1, 21, env);
+    } else {
+        temp = ((int32_t)(int16_t)a * (int32_t)(int16_t)b) << 1;
+    }
+
+    return temp;
+}
+
+/* right shift */
+static inline uint8_t mipsdsp_rshift_u8(uint8_t a, target_ulong mov)
+{
+    return a >> mov;
+}
+
+static inline uint16_t mipsdsp_rshift_u16(uint16_t a, target_ulong mov)
+{
+    return a >> mov;
+}
+
+static inline int8_t mipsdsp_rashift8(int8_t a, target_ulong mov)
+{
+    return a >> mov;
+}
+
+static inline int16_t mipsdsp_rashift16(int16_t a, target_ulong mov)
+{
+    return a >> mov;
+}
+
+static inline int32_t mipsdsp_rashift32(int32_t a, target_ulong mov)
+{
+    return a >> mov;
+}
+
+static inline int16_t mipsdsp_rshift1_add_q16(int16_t a, int16_t b)
+{
+    int32_t temp;
+
+    temp = (int32_t)a + (int32_t)b;
+
+    return (temp >> 1) & 0xFFFF;
+}
+
+/* round right shift */
+static inline int16_t mipsdsp_rrshift1_add_q16(int16_t a, int16_t b)
+{
+    int32_t temp;
+
+    temp = (int32_t)a + (int32_t)b;
+    temp += 1;
+
+    return (temp >> 1) & 0xFFFF;
+}
+
+static inline int32_t mipsdsp_rshift1_add_q32(int32_t a, int32_t b)
+{
+    int64_t temp;
+
+    temp = (int64_t)a + (int64_t)b;
+
+    return (temp >> 1) & 0xFFFFFFFF;
+}
+
+static inline int32_t mipsdsp_rrshift1_add_q32(int32_t a, int32_t b)
+{
+    int64_t temp;
+
+    temp = (int64_t)a + (int64_t)b;
+    temp += 1;
+
+    return (temp >> 1) & 0xFFFFFFFF;
+}
+
+static inline uint8_t mipsdsp_rshift1_add_u8(uint8_t a, uint8_t b)
+{
+    uint16_t temp;
+
+    temp = (uint16_t)a + (uint16_t)b;
+
+    return (temp >> 1) & 0x00FF;
+}
+
+static inline uint8_t mipsdsp_rrshift1_add_u8(uint8_t a, uint8_t b)
+{
+    uint16_t temp;
+
+    temp = (uint16_t)a + (uint16_t)b + 1;
+
+    return (temp >> 1) & 0x00FF;
+}
+
+static inline uint8_t mipsdsp_rshift1_sub_u8(uint8_t a, uint8_t b)
+{
+    uint16_t temp;
+
+    temp = (uint16_t)a - (uint16_t)b;
+
+    return (temp >> 1) & 0x00FF;
+}
+
+static inline uint8_t mipsdsp_rrshift1_sub_u8(uint8_t a, uint8_t b)
+{
+    uint16_t temp;
+
+    temp = (uint16_t)a - (uint16_t)b + 1;
+
+    return (temp >> 1) & 0x00FF;
+}
+
+static inline int64_t mipsdsp_rashift_short_acc(int32_t ac,
+                                                int32_t shift,
+                                                CPUMIPSState *env)
+{
+    int32_t sign, temp31;
+    int64_t temp, acc;
+
+    sign = (env->active_tc.HI[ac] >> 31) & 0x01;
+    acc = ((int64_t)env->active_tc.HI[ac] << 32) |
+          ((int64_t)env->active_tc.LO[ac] & 0xFFFFFFFF);
+    if (shift == 0) {
+        temp = acc;
+    } else {
+        if (sign == 0) {
+            temp = (((int64_t)0x01 << (32 - shift + 1)) - 1) & (acc >> shift);
+        } else {
+            temp = ((((int64_t)0x01 << (shift + 1)) - 1) << (32 - shift)) |
+                   (acc >> shift);
+        }
+    }
+
+    temp31 = (temp >> 31) & 0x01;
+    if (sign != temp31) {
+        set_DSPControl_overflow_flag(1, 23, env);
+    }
+
+    return temp;
+}
+
+/*  128 bits long. p[0] is LO, p[1] is HI. */
+static inline void mipsdsp_rndrashift_short_acc(int64_t *p,
+                                                int32_t ac,
+                                                int32_t shift,
+                                                CPUMIPSState *env)
+{
+    int64_t acc;
+
+    acc = ((int64_t)env->active_tc.HI[ac] << 32) |
+          ((int64_t)env->active_tc.LO[ac] & 0xFFFFFFFF);
+    if (shift == 0) {
+        p[0] = acc << 1;
+        p[1] = (acc >> 63) & 0x01;
+    } else {
+        p[0] = acc >> (shift - 1);
+        p[1] = 0;
+    }
+}
+
+/* 128 bits long. p[0] is LO, p[1] is HI */
+static inline void mipsdsp_rashift_acc(uint64_t *p,
+                                       uint32_t ac,
+                                       uint32_t shift,
+                                       CPUMIPSState *env)
+{
+    uint64_t tempB, tempA;
+
+    tempB = env->active_tc.HI[ac];
+    tempA = env->active_tc.LO[ac];
+    shift = shift & 0x1F;
+
+    if (shift == 0) {
+        p[1] = tempB;
+        p[0] = tempA;
+    } else {
+        p[0] = (tempB << (64 - shift)) | (tempA >> shift);
+        p[1] = (int64_t)tempB >> shift;
+    }
+}
+
+/* 128 bits long. p[0] is LO, p[1] is HI , p[2] is sign of HI.*/
+static inline void mipsdsp_rndrashift_acc(uint64_t *p,
+                                          uint32_t ac,
+                                          uint32_t shift,
+                                          CPUMIPSState *env)
+{
+    int64_t tempB, tempA;
+
+    tempB = env->active_tc.HI[ac];
+    tempA = env->active_tc.LO[ac];
+    shift = shift & 0x3F;
+
+    if (shift == 0) {
+        p[2] = tempB >> 63;
+        p[1] = (tempB << 1) | (tempA >> 63);
+        p[0] = tempA << 1;
+    } else {
+        p[0] = (tempB << (65 - shift)) | (tempA >> (shift - 1));
+        p[1] = (int64_t)tempB >> (shift - 1);
+        if (tempB >= 0) {
+            p[2] = 0x0;
+        } else {
+            p[2] = ~0ull;
+        }
+    }
+}
+
+static inline int32_t mipsdsp_mul_q15_q15(int32_t ac, uint16_t a, uint16_t b,
+                                          CPUMIPSState *env)
+{
+    int32_t temp;
+
+    if ((a == 0x8000) && (b == 0x8000)) {
+        temp = 0x7FFFFFFF;
+        set_DSPControl_overflow_flag(1, 16 + ac, env);
+    } else {
+        temp = ((uint32_t)a * (uint32_t)b) << 1;
+    }
+
+    return temp;
+}
+
+static inline int64_t mipsdsp_mul_q31_q31(int32_t ac, uint32_t a, uint32_t b,
+                                          CPUMIPSState *env)
+{
+    uint64_t temp;
+
+    if ((a == 0x80000000) && (b == 0x80000000)) {
+        temp = (0x01ull << 63) - 1;
+        set_DSPControl_overflow_flag(1, 16 + ac, env);
+    } else {
+        temp = ((uint64_t)a * (uint64_t)b) << 1;
+    }
+
+    return temp;
+}
+
+static inline uint16_t mipsdsp_mul_u8_u8(uint8_t a, uint8_t b)
+{
+    return (uint16_t)a * (uint16_t)b;
+}
+
+static inline uint16_t mipsdsp_mul_u8_u16(uint8_t a, uint16_t b,
+                                          CPUMIPSState *env)
+{
+    uint32_t tempI;
+
+    tempI = (uint32_t)a * (uint32_t)b;
+    if (tempI > 0x0000FFFF) {
+        tempI = 0x0000FFFF;
+        set_DSPControl_overflow_flag(1, 21, env);
+    }
+
+    return tempI & 0x0000FFFF;
+}
+
+static inline uint64_t mipsdsp_mul_u32_u32(uint32_t a, uint32_t b)
+{
+    return (uint64_t)a * (uint64_t)b;
+}
+
+static inline int16_t mipsdsp_rndq15_mul_q15_q15(uint16_t a, uint16_t b,
+                                                 CPUMIPSState *env)
+{
+    uint32_t temp;
+
+    if ((a == 0x8000) && (b == 0x8000)) {
+        temp = 0x7FFF0000;
+        set_DSPControl_overflow_flag(1, 21, env);
+    } else {
+        temp = (a * b) << 1;
+        temp = temp + 0x00008000;
+    }
+
+    return (temp & 0xFFFF0000) >> 16;
+}
+
+static inline int32_t mipsdsp_sat16_mul_q15_q15(uint16_t a, uint16_t b,
+                                                CPUMIPSState *env)
+{
+    int32_t temp;
+
+    if ((a == 0x8000) && (b == 0x8000)) {
+        temp = 0x7FFF0000;
+        set_DSPControl_overflow_flag(1, 21, env);
+    } else {
+        temp = ((uint32_t)a * (uint32_t)b);
+        temp = temp << 1;
+    }
+
+    return (temp >> 16) & 0x0000FFFF;
+}
+
+static inline uint16_t mipsdsp_trunc16_sat16_round(int32_t a,
+                                                   CPUMIPSState *env)
+{
+    int64_t temp;
+
+    temp = (int32_t)a + 0x00008000;
+
+    if (a > (int)0x7fff8000) {
+        temp = 0x7FFFFFFF;
+        set_DSPControl_overflow_flag(1, 22, env);
+    }
+
+    return (temp >> 16) & 0xFFFF;
+}
+
+static inline uint8_t mipsdsp_sat8_reduce_precision(uint16_t a,
+                                                    CPUMIPSState *env)
+{
+    uint16_t mag;
+    uint32_t sign;
+
+    sign = (a >> 15) & 0x01;
+    mag = a & 0x7FFF;
+
+    if (sign == 0) {
+        if (mag > 0x7F80) {
+            set_DSPControl_overflow_flag(1, 22, env);
+            return 0xFF;
+        } else {
+            return (mag >> 7) & 0xFFFF;
+        }
+    } else {
+        set_DSPControl_overflow_flag(1, 22, env);
+        return 0x00;
+    }
+}
+
+static inline uint8_t mipsdsp_lshift8(uint8_t a, uint8_t s, CPUMIPSState *env)
+{
+    uint8_t sign;
+    uint8_t discard;
+
+    if (s == 0) {
+        return a;
+    } else {
+        sign = (a >> 7) & 0x01;
+        if (sign != 0) {
+            discard = (((0x01 << (8 - s)) - 1) << s) |
+                      ((a >> (6 - (s - 1))) & ((0x01 << s) - 1));
+        } else {
+            discard = a >> (6 - (s - 1));
+        }
+
+        if (discard != 0x00) {
+            set_DSPControl_overflow_flag(1, 22, env);
+        }
+        return a << s;
+    }
+}
+
+static inline uint16_t mipsdsp_lshift16(uint16_t a, uint8_t s,
+                                        CPUMIPSState *env)
+{
+    uint8_t  sign;
+    uint16_t discard;
+
+    if (s == 0) {
+        return a;
+    } else {
+        sign = (a >> 15) & 0x01;
+        if (sign != 0) {
+            discard = (((0x01 << (16 - s)) - 1) << s) |
+                      ((a >> (14 - (s - 1))) & ((0x01 << s) - 1));
+        } else {
+            discard = a >> (14 - (s - 1));
+        }
+
+        if ((discard != 0x0000) && (discard != 0xFFFF)) {
+            set_DSPControl_overflow_flag(1, 22, env);
+        }
+        return a << s;
+    }
+}
+
+
+static inline uint32_t mipsdsp_lshift32(uint32_t a, uint8_t s,
+                                        CPUMIPSState *env)
+{
+    uint32_t discard;
+
+    if (s == 0) {
+        return a;
+    } else {
+        discard = (int32_t)a >> (31 - (s - 1));
+
+        if ((discard != 0x00000000) && (discard != 0xFFFFFFFF)) {
+            set_DSPControl_overflow_flag(1, 22, env);
+        }
+        return a << s;
+    }
+}
+
+static inline uint16_t mipsdsp_sat16_lshift(uint16_t a, uint8_t s,
+                                            CPUMIPSState *env)
+{
+    uint8_t  sign;
+    uint16_t discard;
+
+    if (s == 0) {
+        return a;
+    } else {
+        sign = (a >> 15) & 0x01;
+        if (sign != 0) {
+            discard = (((0x01 << (16 - s)) - 1) << s) |
+                      ((a >> (14 - (s - 1))) & ((0x01 << s) - 1));
+        } else {
+            discard = a >> (14 - (s - 1));
+        }
+
+        if ((discard != 0x0000) && (discard != 0xFFFF)) {
+            set_DSPControl_overflow_flag(1, 22, env);
+            return (sign == 0) ? 0x7FFF : 0x8000;
+        } else {
+            return a << s;
+        }
+    }
+}
+
+static inline uint32_t mipsdsp_sat32_lshift(uint32_t a, uint8_t s,
+                                            CPUMIPSState *env)
+{
+    uint8_t  sign;
+    uint32_t discard;
+
+    if (s == 0) {
+        return a;
+    } else {
+        sign = (a >> 31) & 0x01;
+        if (sign != 0) {
+            discard = (((0x01 << (32 - s)) - 1) << s) |
+                      ((a >> (30 - (s - 1))) & ((0x01 << s) - 1));
+        } else {
+            discard = a >> (30 - (s - 1));
+        }
+
+        if ((discard != 0x00000000) && (discard != 0xFFFFFFFF)) {
+            set_DSPControl_overflow_flag(1, 22, env);
+            return (sign == 0) ? 0x7FFFFFFF : 0x80000000;
+        } else {
+            return a << s;
+        }
+    }
+}
+
+static inline uint8_t mipsdsp_rnd8_rashift(uint8_t a, uint8_t s)
+{
+    uint32_t temp;
+
+    if (s == 0) {
+        temp = (uint32_t)a << 1;
+    } else {
+        temp = (int32_t)(int8_t)a >> (s - 1);
+    }
+
+    return (temp + 1) >> 1;
+}
+
+static inline uint16_t mipsdsp_rnd16_rashift(uint16_t a, uint8_t s)
+{
+    uint32_t temp;
+
+    if (s == 0) {
+        temp = (uint32_t)a << 1;
+    } else {
+        temp = (int32_t)(int16_t)a >> (s - 1);
+    }
+
+    return (temp + 1) >> 1;
+}
+
+static inline uint32_t mipsdsp_rnd32_rashift(uint32_t a, uint8_t s)
+{
+    int64_t temp;
+
+    if (s == 0) {
+        temp = (uint64_t)a << 1;
+    } else {
+        temp = (int64_t)(int32_t)a >> (s - 1);
+    }
+    temp += 1;
+
+    return (temp >> 1) & 0xFFFFFFFFull;
+}
+
+static inline uint16_t mipsdsp_sub_i16(int16_t a, int16_t b, CPUMIPSState *env)
+{
+    int16_t  temp;
+
+    temp = a - b;
+    if (MIPSDSP_OVERFLOW(a, -b, temp, 0x8000)) {
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return temp;
+}
+
+static inline uint16_t mipsdsp_sat16_sub(int16_t a, int16_t b,
+                                         CPUMIPSState *env)
+{
+    int16_t  temp;
+
+    temp = a - b;
+    if (MIPSDSP_OVERFLOW(a, -b, temp, 0x8000)) {
+        if (a > 0) {
+            temp = 0x7FFF;
+        } else {
+            temp = 0x8000;
+        }
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return temp;
+}
+
+static inline uint32_t mipsdsp_sat32_sub(int32_t a, int32_t b,
+                                         CPUMIPSState *env)
+{
+    int32_t  temp;
+
+    temp = a - b;
+    if (MIPSDSP_OVERFLOW(a, -b, temp, 0x80000000)) {
+        if (a > 0) {
+            temp = 0x7FFFFFFF;
+        } else {
+            temp = 0x80000000;
+        }
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return temp & 0xFFFFFFFFull;
+}
+
+static inline uint16_t mipsdsp_rshift1_sub_q16(int16_t a, int16_t b)
+{
+    int32_t  temp;
+
+    temp = (int32_t)a - (int32_t)b;
+
+    return (temp >> 1) & 0x0000FFFF;
+}
+
+static inline uint16_t mipsdsp_rrshift1_sub_q16(int16_t a, int16_t b)
+{
+    int32_t  temp;
+
+    temp = (int32_t)a - (int32_t)b;
+    temp += 1;
+
+    return (temp >> 1) & 0x0000FFFF;
+}
+
+static inline uint32_t mipsdsp_rshift1_sub_q32(int32_t a, int32_t b)
+{
+    int64_t  temp;
+
+    temp = (int64_t)a - (int64_t)b;
+
+    return (temp >> 1) & 0xFFFFFFFFull;
+}
+
+static inline uint32_t mipsdsp_rrshift1_sub_q32(int32_t a, int32_t b)
+{
+    int64_t  temp;
+
+    temp = (int64_t)a - (int64_t)b;
+    temp += 1;
+
+    return (temp >> 1) & 0xFFFFFFFFull;
+}
+
+static inline uint16_t mipsdsp_sub_u16_u16(uint16_t a, uint16_t b,
+                                           CPUMIPSState *env)
+{
+    uint8_t  temp16;
+    uint32_t temp;
+
+    temp = (uint32_t)a - (uint32_t)b;
+    temp16 = (temp >> 16) & 0x01;
+    if (temp16 == 1) {
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+    return temp & 0x0000FFFF;
+}
+
+static inline uint16_t mipsdsp_satu16_sub_u16_u16(uint16_t a, uint16_t b,
+                                                  CPUMIPSState *env)
+{
+    uint8_t  temp16;
+    uint32_t temp;
+
+    temp   = (uint32_t)a - (uint32_t)b;
+    temp16 = (temp >> 16) & 0x01;
+
+    if (temp16 == 1) {
+        temp = 0x0000;
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return temp & 0x0000FFFF;
+}
+
+static inline uint8_t mipsdsp_sub_u8(uint8_t a, uint8_t b, CPUMIPSState *env)
+{
+    uint8_t  temp8;
+    uint16_t temp;
+
+    temp = (uint16_t)a - (uint16_t)b;
+    temp8 = (temp >> 8) & 0x01;
+    if (temp8 == 1) {
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return temp & 0x00FF;
+}
+
+static inline uint8_t mipsdsp_satu8_sub(uint8_t a, uint8_t b, CPUMIPSState *env)
+{
+    uint8_t  temp8;
+    uint16_t temp;
+
+    temp = (uint16_t)a - (uint16_t)b;
+    temp8 = (temp >> 8) & 0x01;
+    if (temp8 == 1) {
+        temp = 0x00;
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return temp & 0x00FF;
+}
+
+static inline uint32_t mipsdsp_sub32(int32_t a, int32_t b, CPUMIPSState *env)
+{
+    int32_t temp;
+
+    temp = a - b;
+    if (MIPSDSP_OVERFLOW(a, -b, temp, 0x80000000)) {
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return temp;
+}
+
+static inline int32_t mipsdsp_add_i32(int32_t a, int32_t b, CPUMIPSState *env)
+{
+    int32_t temp;
+
+    temp = a + b;
+
+    if (MIPSDSP_OVERFLOW(a, b, temp, 0x80000000)) {
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    return temp;
+}
+
+static inline int32_t mipsdsp_cmp_eq(int32_t a, int32_t b)
+{
+    return a == b;
+}
+
+static inline int32_t mipsdsp_cmp_le(int32_t a, int32_t b)
+{
+    return a <= b;
+}
+
+static inline int32_t mipsdsp_cmp_lt(int32_t a, int32_t b)
+{
+    return a < b;
+}
+
+static inline int32_t mipsdsp_cmpu_eq(uint32_t a, uint32_t b)
+{
+    return a == b;
+}
+
+static inline int32_t mipsdsp_cmpu_le(uint32_t a, uint32_t b)
+{
+    return a <= b;
+}
+
+static inline int32_t mipsdsp_cmpu_lt(uint32_t a, uint32_t b)
+{
+    return a < b;
+}
+/*** MIPS DSP internal functions end ***/
+
+#define MIPSDSP_LHI 0xFFFFFFFF00000000ull
+#define MIPSDSP_LLO 0x00000000FFFFFFFFull
+#define MIPSDSP_HI  0xFFFF0000
+#define MIPSDSP_LO  0x0000FFFF
+#define MIPSDSP_Q3  0xFF000000
+#define MIPSDSP_Q2  0x00FF0000
+#define MIPSDSP_Q1  0x0000FF00
+#define MIPSDSP_Q0  0x000000FF
+
+#define MIPSDSP_SPLIT32_8(num, a, b, c, d)  \
+    do {                                    \
+        a = (num >> 24) & MIPSDSP_Q0;       \
+        b = (num >> 16) & MIPSDSP_Q0;       \
+        c = (num >> 8) & MIPSDSP_Q0;        \
+        d = num & MIPSDSP_Q0;               \
+    } while (0)
+
+#define MIPSDSP_SPLIT32_16(num, a, b)       \
+    do {                                    \
+        a = (num >> 16) & MIPSDSP_LO;       \
+        b = num & MIPSDSP_LO;               \
+    } while (0)
+
+#define MIPSDSP_RETURN32(a)             ((target_long)(int32_t)a)
+#define MIPSDSP_RETURN32_8(a, b, c, d)  ((target_long)(int32_t) \
+                                         (((uint32_t)a << 24) | \
+                                         (((uint32_t)b << 16) | \
+                                         (((uint32_t)c << 8) |  \
+                                          ((uint32_t)d & 0xFF)))))
+#define MIPSDSP_RETURN32_16(a, b)       ((target_long)(int32_t) \
+                                         (((uint32_t)a << 16) | \
+                                          ((uint32_t)b & 0xFFFF)))
+
+#ifdef TARGET_MIPS64
+#define MIPSDSP_SPLIT64_16(num, a, b, c, d)  \
+    do {                                     \
+        a = (num >> 48) & MIPSDSP_LO;        \
+        b = (num >> 32) & MIPSDSP_LO;        \
+        c = (num >> 16) & MIPSDSP_LO;        \
+        d = num & MIPSDSP_LO;                \
+    } while (0)
+
+#define MIPSDSP_SPLIT64_32(num, a, b)       \
+    do {                                    \
+        a = (num >> 32) & MIPSDSP_LLO;      \
+        b = num & MIPSDSP_LLO;              \
+    } while (0)
+
+#define MIPSDSP_RETURN64_16(a, b, c, d) (((uint64_t)a << 48) | \
+                                         ((uint64_t)b << 32) | \
+                                         ((uint64_t)c << 16) | \
+                                         (uint64_t)d)
+#define MIPSDSP_RETURN64_32(a, b)       (((uint64_t)a << 32) | (uint64_t)b)
+#endif
+
+/** DSP Arithmetic Sub-class insns **/
+#define ARITH_PH(name, func)                                      \
+target_ulong helper_##name##_ph(target_ulong rs, target_ulong rt) \
+{                                                                 \
+    uint16_t  rsh, rsl, rth, rtl, temph, templ;                   \
+                                                                  \
+    MIPSDSP_SPLIT32_16(rs, rsh, rsl);                             \
+    MIPSDSP_SPLIT32_16(rt, rth, rtl);                             \
+                                                                  \
+    temph = mipsdsp_##func(rsh, rth);                             \
+    templ = mipsdsp_##func(rsl, rtl);                             \
+                                                                  \
+    return MIPSDSP_RETURN32_16(temph, templ);                     \
+}
+
+#define ARITH_PH_ENV(name, func)                                  \
+target_ulong helper_##name##_ph(target_ulong rs, target_ulong rt, \
+                                CPUMIPSState *env)                \
+{                                                                 \
+    uint16_t  rsh, rsl, rth, rtl, temph, templ;                   \
+                                                                  \
+    MIPSDSP_SPLIT32_16(rs, rsh, rsl);                             \
+    MIPSDSP_SPLIT32_16(rt, rth, rtl);                             \
+                                                                  \
+    temph = mipsdsp_##func(rsh, rth, env);                        \
+    templ = mipsdsp_##func(rsl, rtl, env);                        \
+                                                                  \
+    return MIPSDSP_RETURN32_16(temph, templ);                     \
+}
+
+
+ARITH_PH_ENV(addq, add_i16);
+ARITH_PH_ENV(addq_s, sat_add_i16);
+ARITH_PH_ENV(addu, add_u16);
+ARITH_PH_ENV(addu_s, sat_add_u16);
+
+ARITH_PH(addqh, rshift1_add_q16);
+ARITH_PH(addqh_r, rrshift1_add_q16);
+
+ARITH_PH_ENV(subq, sub_i16);
+ARITH_PH_ENV(subq_s, sat16_sub);
+ARITH_PH_ENV(subu, sub_u16_u16);
+ARITH_PH_ENV(subu_s, satu16_sub_u16_u16);
+
+ARITH_PH(subqh, rshift1_sub_q16);
+ARITH_PH(subqh_r, rrshift1_sub_q16);
+
+#undef ARITH_PH
+#undef ARITH_PH_ENV
+
+#ifdef TARGET_MIPS64
+#define ARITH_QH_ENV(name, func) \
+target_ulong helper_##name##_qh(target_ulong rs, target_ulong rt, \
+                                CPUMIPSState *env)           \
+{                                                            \
+    uint16_t rs3, rs2, rs1, rs0;                             \
+    uint16_t rt3, rt2, rt1, rt0;                             \
+    uint16_t tempD, tempC, tempB, tempA;                     \
+                                                             \
+    MIPSDSP_SPLIT64_16(rs, rs3, rs2, rs1, rs0);              \
+    MIPSDSP_SPLIT64_16(rt, rt3, rt2, rt1, rt0);              \
+                                                             \
+    tempD = mipsdsp_##func(rs3, rt3, env);                   \
+    tempC = mipsdsp_##func(rs2, rt2, env);                   \
+    tempB = mipsdsp_##func(rs1, rt1, env);                   \
+    tempA = mipsdsp_##func(rs0, rt0, env);                   \
+                                                             \
+    return MIPSDSP_RETURN64_16(tempD, tempC, tempB, tempA);  \
+}
+
+ARITH_QH_ENV(addq, add_i16);
+ARITH_QH_ENV(addq_s, sat_add_i16);
+ARITH_QH_ENV(addu, add_u16);
+ARITH_QH_ENV(addu_s, sat_add_u16);
+
+ARITH_QH_ENV(subq, sub_i16);
+ARITH_QH_ENV(subq_s, sat16_sub);
+ARITH_QH_ENV(subu, sub_u16_u16);
+ARITH_QH_ENV(subu_s, satu16_sub_u16_u16);
+
+#undef ARITH_QH_ENV
+
+#endif
+
+#define ARITH_W(name, func) \
+target_ulong helper_##name##_w(target_ulong rs, target_ulong rt) \
+{                                                                \
+    uint32_t rd;                                                 \
+    rd = mipsdsp_##func(rs, rt);                                 \
+    return MIPSDSP_RETURN32(rd);                                 \
+}
+
+#define ARITH_W_ENV(name, func) \
+target_ulong helper_##name##_w(target_ulong rs, target_ulong rt, \
+                               CPUMIPSState *env)                \
+{                                                                \
+    uint32_t rd;                                                 \
+    rd = mipsdsp_##func(rs, rt, env);                            \
+    return MIPSDSP_RETURN32(rd);                                 \
+}
+
+ARITH_W_ENV(addq_s, sat_add_i32);
+
+ARITH_W(addqh, rshift1_add_q32);
+ARITH_W(addqh_r, rrshift1_add_q32);
+
+ARITH_W_ENV(subq_s, sat32_sub);
+
+ARITH_W(subqh, rshift1_sub_q32);
+ARITH_W(subqh_r, rrshift1_sub_q32);
+
+#undef ARITH_W
+#undef ARITH_W_ENV
+
+target_ulong helper_absq_s_w(target_ulong rt, CPUMIPSState *env)
+{
+    uint32_t rd;
+
+    rd = mipsdsp_sat_abs32(rt, env);
+
+    return (target_ulong)rd;
+}
+
+
+#if defined(TARGET_MIPS64)
+
+#define ARITH_PW_ENV(name, func) \
+target_ulong helper_##name##_pw(target_ulong rs, target_ulong rt, \
+                                CPUMIPSState *env)                \
+{                                                                 \
+    uint32_t rs1, rs0;                                            \
+    uint32_t rt1, rt0;                                            \
+    uint32_t tempB, tempA;                                        \
+                                                                  \
+    MIPSDSP_SPLIT64_32(rs, rs1, rs0);                             \
+    MIPSDSP_SPLIT64_32(rt, rt1, rt0);                             \
+                                                                  \
+    tempB = mipsdsp_##func(rs1, rt1, env);                        \
+    tempA = mipsdsp_##func(rs0, rt0, env);                        \
+                                                                  \
+    return MIPSDSP_RETURN64_32(tempB, tempA);                     \
+}
+
+ARITH_PW_ENV(addq, add_i32);
+ARITH_PW_ENV(addq_s, sat_add_i32);
+ARITH_PW_ENV(subq, sub32);
+ARITH_PW_ENV(subq_s, sat32_sub);
+
+#undef ARITH_PW_ENV
+
+#endif
+
+#define ARITH_QB(name, func) \
+target_ulong helper_##name##_qb(target_ulong rs, target_ulong rt) \
+{                                                                 \
+    uint8_t  rs0, rs1, rs2, rs3;                                  \
+    uint8_t  rt0, rt1, rt2, rt3;                                  \
+    uint8_t  temp0, temp1, temp2, temp3;                          \
+                                                                  \
+    MIPSDSP_SPLIT32_8(rs, rs3, rs2, rs1, rs0);                    \
+    MIPSDSP_SPLIT32_8(rt, rt3, rt2, rt1, rt0);                    \
+                                                                  \
+    temp0 = mipsdsp_##func(rs0, rt0);                             \
+    temp1 = mipsdsp_##func(rs1, rt1);                             \
+    temp2 = mipsdsp_##func(rs2, rt2);                             \
+    temp3 = mipsdsp_##func(rs3, rt3);                             \
+                                                                  \
+    return MIPSDSP_RETURN32_8(temp3, temp2, temp1, temp0);        \
+}
+
+#define ARITH_QB_ENV(name, func) \
+target_ulong helper_##name##_qb(target_ulong rs, target_ulong rt, \
+                                CPUMIPSState *env)          \
+{                                                           \
+    uint8_t  rs0, rs1, rs2, rs3;                            \
+    uint8_t  rt0, rt1, rt2, rt3;                            \
+    uint8_t  temp0, temp1, temp2, temp3;                    \
+                                                            \
+    MIPSDSP_SPLIT32_8(rs, rs3, rs2, rs1, rs0);              \
+    MIPSDSP_SPLIT32_8(rt, rt3, rt2, rt1, rt0);              \
+                                                            \
+    temp0 = mipsdsp_##func(rs0, rt0, env);                  \
+    temp1 = mipsdsp_##func(rs1, rt1, env);                  \
+    temp2 = mipsdsp_##func(rs2, rt2, env);                  \
+    temp3 = mipsdsp_##func(rs3, rt3, env);                  \
+                                                            \
+    return MIPSDSP_RETURN32_8(temp3, temp2, temp1, temp0);  \
+}
+
+ARITH_QB(adduh, rshift1_add_u8);
+ARITH_QB(adduh_r, rrshift1_add_u8);
+
+ARITH_QB_ENV(addu, add_u8);
+ARITH_QB_ENV(addu_s, sat_add_u8);
+
+#undef ADDU_QB
+#undef ADDU_QB_ENV
+
+#if defined(TARGET_MIPS64)
+#define ARITH_OB(name, func) \
+target_ulong helper_##name##_ob(target_ulong rs, target_ulong rt) \
+{                                                                 \
+    int i;                                                        \
+    uint8_t rs_t[8], rt_t[8];                                     \
+    uint8_t temp[8];                                              \
+    uint64_t result;                                              \
+                                                                  \
+    result = 0;                                                   \
+                                                                  \
+    for (i = 0; i < 8; i++) {                                     \
+        rs_t[i] = (rs >> (8 * i)) & MIPSDSP_Q0;                   \
+        rt_t[i] = (rt >> (8 * i)) & MIPSDSP_Q0;                   \
+        temp[i] = mipsdsp_##func(rs_t[i], rt_t[i]);               \
+        result |= (uint64_t)temp[i] << (8 * i);                   \
+    }                                                             \
+                                                                  \
+    return result;                                                \
+}
+
+#define ARITH_OB_ENV(name, func) \
+target_ulong helper_##name##_ob(target_ulong rs, target_ulong rt, \
+                                CPUMIPSState *env)                \
+{                                                                 \
+    int i;                                                        \
+    uint8_t rs_t[8], rt_t[8];                                     \
+    uint8_t temp[8];                                              \
+    uint64_t result;                                              \
+                                                                  \
+    result = 0;                                                   \
+                                                                  \
+    for (i = 0; i < 8; i++) {                                     \
+        rs_t[i] = (rs >> (8 * i)) & MIPSDSP_Q0;                   \
+        rt_t[i] = (rt >> (8 * i)) & MIPSDSP_Q0;                   \
+        temp[i] = mipsdsp_##func(rs_t[i], rt_t[i], env);          \
+        result |= (uint64_t)temp[i] << (8 * i);                   \
+    }                                                             \
+                                                                  \
+    return result;                                                \
+}
+
+ARITH_OB_ENV(addu, add_u8);
+ARITH_OB_ENV(addu_s, sat_add_u8);
+
+ARITH_OB(adduh, rshift1_add_u8);
+ARITH_OB(adduh_r, rrshift1_add_u8);
+
+ARITH_OB_ENV(subu, sub_u8);
+ARITH_OB_ENV(subu_s, satu8_sub);
+
+ARITH_OB(subuh, rshift1_sub_u8);
+ARITH_OB(subuh_r, rrshift1_sub_u8);
+
+#undef ARITH_OB
+#undef ARITH_OB_ENV
+
+#endif
+
+#define SUBU_QB(name, func) \
+target_ulong helper_##name##_qb(target_ulong rs,               \
+                                target_ulong rt,               \
+                                CPUMIPSState *env)             \
+{                                                              \
+    uint8_t rs3, rs2, rs1, rs0;                                \
+    uint8_t rt3, rt2, rt1, rt0;                                \
+    uint8_t tempD, tempC, tempB, tempA;                        \
+                                                               \
+    MIPSDSP_SPLIT32_8(rs, rs3, rs2, rs1, rs0);                 \
+    MIPSDSP_SPLIT32_8(rt, rt3, rt2, rt1, rt0);                 \
+                                                               \
+    tempD = mipsdsp_##func(rs3, rt3, env);                     \
+    tempC = mipsdsp_##func(rs2, rt2, env);                     \
+    tempB = mipsdsp_##func(rs1, rt1, env);                     \
+    tempA = mipsdsp_##func(rs0, rt0, env);                     \
+                                                               \
+    return MIPSDSP_RETURN32_8(tempD, tempC, tempB, tempA);     \
+}
+
+SUBU_QB(subu, sub_u8);
+SUBU_QB(subu_s, satu8_sub);
+
+#undef SUBU_QB
+
+#define SUBUH_QB(name, var) \
+target_ulong helper_##name##_qb(target_ulong rs, target_ulong rt) \
+{                                                                 \
+    uint8_t rs3, rs2, rs1, rs0;                                   \
+    uint8_t rt3, rt2, rt1, rt0;                                   \
+    uint8_t tempD, tempC, tempB, tempA;                           \
+                                                                  \
+    MIPSDSP_SPLIT32_8(rs, rs3, rs2, rs1, rs0);                    \
+    MIPSDSP_SPLIT32_8(rt, rt3, rt2, rt1, rt0);                    \
+                                                                  \
+    tempD = ((uint16_t)rs3 - (uint16_t)rt3 + var) >> 1;           \
+    tempC = ((uint16_t)rs2 - (uint16_t)rt2 + var) >> 1;           \
+    tempB = ((uint16_t)rs1 - (uint16_t)rt1 + var) >> 1;           \
+    tempA = ((uint16_t)rs0 - (uint16_t)rt0 + var) >> 1;           \
+                                                                  \
+    return ((uint32_t)tempD << 24) | ((uint32_t)tempC << 16) |    \
+        ((uint32_t)tempB << 8) | ((uint32_t)tempA);               \
+}
+
+SUBUH_QB(subuh, 0);
+SUBUH_QB(subuh_r, 1);
+
+#undef SUBUH_QB
+
+target_ulong helper_addsc(target_ulong rs, target_ulong rt, CPUMIPSState *env)
+{
+    uint64_t temp, tempRs, tempRt;
+    int32_t flag;
+
+    tempRs = (uint64_t)rs & MIPSDSP_LLO;
+    tempRt = (uint64_t)rt & MIPSDSP_LLO;
+
+    temp = tempRs + tempRt;
+    flag = (temp & 0x0100000000ull) >> 32;
+    set_DSPControl_carryflag(flag, env);
+
+    return (target_long)(int32_t)(temp & MIPSDSP_LLO);
+}
+
+target_ulong helper_addwc(target_ulong rs, target_ulong rt, CPUMIPSState *env)
+{
+    uint32_t rd;
+    int32_t temp32, temp31;
+    int64_t tempL;
+
+    tempL = (int64_t)(int32_t)rs + (int64_t)(int32_t)rt +
+        get_DSPControl_carryflag(env);
+    temp31 = (tempL >> 31) & 0x01;
+    temp32 = (tempL >> 32) & 0x01;
+
+    if (temp31 != temp32) {
+        set_DSPControl_overflow_flag(1, 20, env);
+    }
+
+    rd = tempL & MIPSDSP_LLO;
+
+    return (target_long)(int32_t)rd;
+}
+
+target_ulong helper_modsub(target_ulong rs, target_ulong rt)
+{
+    int32_t decr;
+    uint16_t lastindex;
+    target_ulong rd;
+
+    decr = rt & MIPSDSP_Q0;
+    lastindex = (rt >> 8) & MIPSDSP_LO;
+
+    if ((rs & MIPSDSP_LLO) == 0x00000000) {
+        rd = (target_ulong)lastindex;
+    } else {
+        rd = rs - decr;
+    }
+
+    return rd;
+}
+
+target_ulong helper_raddu_w_qb(target_ulong rs)
+{
+    uint8_t  rs3, rs2, rs1, rs0;
+    uint16_t temp;
+
+    MIPSDSP_SPLIT32_8(rs, rs3, rs2, rs1, rs0);
+
+    temp = (uint16_t)rs3 + (uint16_t)rs2 + (uint16_t)rs1 + (uint16_t)rs0;
+
+    return (target_ulong)temp;
+}
+
+#if defined(TARGET_MIPS64)
+target_ulong helper_raddu_l_ob(target_ulong rs)
+{
+    int i;
+    uint16_t rs_t[8];
+    uint64_t temp;
+
+    temp = 0;
+
+    for (i = 0; i < 8; i++) {
+        rs_t[i] = (rs >> (8 * i)) & MIPSDSP_Q0;
+        temp += (uint64_t)rs_t[i];
+    }
+
+    return temp;
+}
+#endif
+
+target_ulong helper_absq_s_qb(target_ulong rt, CPUMIPSState *env)
+{
+    uint8_t tempD, tempC, tempB, tempA;
+
+    MIPSDSP_SPLIT32_8(rt, tempD, tempC, tempB, tempA);
+
+    tempD = mipsdsp_sat_abs8(tempD, env);
+    tempC = mipsdsp_sat_abs8(tempC, env);
+    tempB = mipsdsp_sat_abs8(tempB, env);
+    tempA = mipsdsp_sat_abs8(tempA, env);
+
+    return MIPSDSP_RETURN32_8(tempD, tempC, tempB, tempA);
+}
+
+target_ulong helper_absq_s_ph(target_ulong rt, CPUMIPSState *env)
+{
+    uint16_t tempB, tempA;
+
+    MIPSDSP_SPLIT32_16(rt, tempB, tempA);
+
+    tempB = mipsdsp_sat_abs16 (tempB, env);
+    tempA = mipsdsp_sat_abs16 (tempA, env);
+
+    return MIPSDSP_RETURN32_16(tempB, tempA);
+}
+
+#if defined(TARGET_MIPS64)
+target_ulong helper_absq_s_ob(target_ulong rt, CPUMIPSState *env)
+{
+    int i;
+    int8_t temp[8];
+    uint64_t result;
+
+    for (i = 0; i < 8; i++) {
+        temp[i] = (rt >> (8 * i)) & MIPSDSP_Q0;
+        temp[i] = mipsdsp_sat_abs8(temp[i], env);
+    }
+
+    for (i = 0; i < 8; i++) {
+        result = (uint64_t)(uint8_t)temp[i] << (8 * i);
+    }
+
+    return result;
+}
+
+target_ulong helper_absq_s_qh(target_ulong rt, CPUMIPSState *env)
+{
+    int16_t tempD, tempC, tempB, tempA;
+
+    MIPSDSP_SPLIT64_16(rt, tempD, tempC, tempB, tempA);
+
+    tempD = mipsdsp_sat_abs16(tempD, env);
+    tempC = mipsdsp_sat_abs16(tempC, env);
+    tempB = mipsdsp_sat_abs16(tempB, env);
+    tempA = mipsdsp_sat_abs16(tempA, env);
+
+    return MIPSDSP_RETURN64_16(tempD, tempC, tempB, tempA);
+}
+
+target_ulong helper_absq_s_pw(target_ulong rt, CPUMIPSState *env)
+{
+    int32_t tempB, tempA;
+
+    MIPSDSP_SPLIT64_32(rt, tempB, tempA);
+
+    tempB = mipsdsp_sat_abs32(tempB, env);
+    tempA = mipsdsp_sat_abs32(tempA, env);
+
+    return MIPSDSP_RETURN64_32(tempB, tempA);
+}
+#endif
+
+#define PRECR_QB_PH(name, a, b)\
+target_ulong helper_##name##_qb_ph(target_ulong rs, target_ulong rt) \
+{                                                                    \
+    uint8_t tempD, tempC, tempB, tempA;                              \
+                                                                     \
+    tempD = (rs >> a) & MIPSDSP_Q0;                                  \
+    tempC = (rs >> b) & MIPSDSP_Q0;                                  \
+    tempB = (rt >> a) & MIPSDSP_Q0;                                  \
+    tempA = (rt >> b) & MIPSDSP_Q0;                                  \
+                                                                     \
+    return MIPSDSP_RETURN32_8(tempD, tempC, tempB, tempA);           \
+}
+
+PRECR_QB_PH(precr, 16, 0);
+PRECR_QB_PH(precrq, 24, 8);
+
+#undef PRECR_QB_OH
+
+target_ulong helper_precr_sra_ph_w(uint32_t sa, target_ulong rs,
+                                   target_ulong rt)
+{
+    uint16_t tempB, tempA;
+
+    tempB = ((int32_t)rt >> sa) & MIPSDSP_LO;
+    tempA = ((int32_t)rs >> sa) & MIPSDSP_LO;
+
+    return MIPSDSP_RETURN32_16(tempB, tempA);
+}
+
+target_ulong helper_precr_sra_r_ph_w(uint32_t sa,
+                                     target_ulong rs, target_ulong rt)
+{
+    uint64_t tempB, tempA;
+
+    /* If sa = 0, then (sa - 1) = -1 will case shift error, so we need else. */
+    if (sa == 0) {
+        tempB = (rt & MIPSDSP_LO) << 1;
+        tempA = (rs & MIPSDSP_LO) << 1;
+    } else {
+        tempB = ((int32_t)rt >> (sa - 1)) + 1;
+        tempA = ((int32_t)rs >> (sa - 1)) + 1;
+    }
+    rt = (((tempB >> 1) & MIPSDSP_LO) << 16) | ((tempA >> 1) & MIPSDSP_LO);
+
+    return (target_long)(int32_t)rt;
+}
+
+target_ulong helper_precrq_ph_w(target_ulong rs, target_ulong rt)
+{
+    uint16_t tempB, tempA;
+
+    tempB = (rs & MIPSDSP_HI) >> 16;
+    tempA = (rt & MIPSDSP_HI) >> 16;
+
+    return MIPSDSP_RETURN32_16(tempB, tempA);
+}
+
+target_ulong helper_precrq_rs_ph_w(target_ulong rs, target_ulong rt,
+                                   CPUMIPSState *env)
+{
+    uint16_t tempB, tempA;
+
+    tempB = mipsdsp_trunc16_sat16_round(rs, env);
+    tempA = mipsdsp_trunc16_sat16_round(rt, env);
+
+    return MIPSDSP_RETURN32_16(tempB, tempA);
+}
+
+#if defined(TARGET_MIPS64)
+target_ulong helper_precr_ob_qh(target_ulong rs, target_ulong rt)
+{
+    uint8_t rs6, rs4, rs2, rs0;
+    uint8_t rt6, rt4, rt2, rt0;
+    uint64_t temp;
+
+    rs6 = (rs >> 48) & MIPSDSP_Q0;
+    rs4 = (rs >> 32) & MIPSDSP_Q0;
+    rs2 = (rs >> 16) & MIPSDSP_Q0;
+    rs0 = rs & MIPSDSP_Q0;
+    rt6 = (rt >> 48) & MIPSDSP_Q0;
+    rt4 = (rt >> 32) & MIPSDSP_Q0;
+    rt2 = (rt >> 16) & MIPSDSP_Q0;
+    rt0 = rt & MIPSDSP_Q0;
+
+    temp = ((uint64_t)rs6 << 56) | ((uint64_t)rs4 << 48) |
+           ((uint64_t)rs2 << 40) | ((uint64_t)rs0 << 32) |
+           ((uint64_t)rt6 << 24) | ((uint64_t)rt4 << 16) |
+           ((uint64_t)rt2 << 8) | (uint64_t)rt0;
+
+    return temp;
+}
+
+#define PRECR_QH_PW(name, var) \
+target_ulong helper_precr_##name##_qh_pw(target_ulong rs, target_ulong rt, \
+                                    uint32_t sa)                      \
+{                                                                     \
+    uint16_t rs3, rs2, rs1, rs0;                                      \
+    uint16_t rt3, rt2, rt1, rt0;                                      \
+    uint16_t tempD, tempC, tempB, tempA;                              \
+                                                                      \
+    MIPSDSP_SPLIT64_16(rs, rs3, rs2, rs1, rs0);                       \
+    MIPSDSP_SPLIT64_16(rt, rt3, rt2, rt1, rt0);                       \
+                                                                      \
+    /* When sa = 0, we use rt2, rt0, rs2, rs0;                        \
+     * when sa != 0, we use rt3, rt1, rs3, rs1. */                    \
+    if (sa == 0) {                                                    \
+        tempD = rt2 << var;                                           \
+        tempC = rt0 << var;                                           \
+        tempB = rs2 << var;                                           \
+        tempA = rs0 << var;                                           \
+    } else {                                                          \
+        tempD = (((int16_t)rt3 >> sa) + var) >> var;                  \
+        tempC = (((int16_t)rt1 >> sa) + var) >> var;                  \
+        tempB = (((int16_t)rs3 >> sa) + var) >> var;                  \
+        tempA = (((int16_t)rs1 >> sa) + var) >> var;                  \
+    }                                                                 \
+                                                                      \
+    return MIPSDSP_RETURN64_16(tempD, tempC, tempB, tempA);           \
+}
+
+PRECR_QH_PW(sra, 0);
+PRECR_QH_PW(sra_r, 1);
+
+#undef PRECR_QH_PW
+
+target_ulong helper_precrq_ob_qh(target_ulong rs, target_ulong rt)
+{
+    uint8_t rs6, rs4, rs2, rs0;
+    uint8_t rt6, rt4, rt2, rt0;
+    uint64_t temp;
+
+    rs6 = (rs >> 56) & MIPSDSP_Q0;
+    rs4 = (rs >> 40) & MIPSDSP_Q0;
+    rs2 = (rs >> 24) & MIPSDSP_Q0;
+    rs0 = (rs >> 8) & MIPSDSP_Q0;
+    rt6 = (rt >> 56) & MIPSDSP_Q0;
+    rt4 = (rt >> 40) & MIPSDSP_Q0;
+    rt2 = (rt >> 24) & MIPSDSP_Q0;
+    rt0 = (rt >> 8) & MIPSDSP_Q0;
+
+    temp = ((uint64_t)rs6 << 56) | ((uint64_t)rs4 << 48) |
+           ((uint64_t)rs2 << 40) | ((uint64_t)rs0 << 32) |
+           ((uint64_t)rt6 << 24) | ((uint64_t)rt4 << 16) |
+           ((uint64_t)rt2 << 8) | (uint64_t)rt0;
+
+    return temp;
+}
+
+target_ulong helper_precrq_qh_pw(target_ulong rs, target_ulong rt)
+{
+    uint16_t tempD, tempC, tempB, tempA;
+
+    tempD = (rs >> 48) & MIPSDSP_LO;
+    tempC = (rs >> 16) & MIPSDSP_LO;
+    tempB = (rt >> 48) & MIPSDSP_LO;
+    tempA = (rt >> 16) & MIPSDSP_LO;
+
+    return MIPSDSP_RETURN64_16(tempD, tempC, tempB, tempA);
+}
+
+target_ulong helper_precrq_rs_qh_pw(target_ulong rs, target_ulong rt,
+                                    CPUMIPSState *env)
+{
+    uint32_t rs2, rs0;
+    uint32_t rt2, rt0;
+    uint16_t tempD, tempC, tempB, tempA;
+
+    rs2 = (rs >> 32) & MIPSDSP_LLO;
+    rs0 = rs & MIPSDSP_LLO;
+    rt2 = (rt >> 32) & MIPSDSP_LLO;
+    rt0 = rt & MIPSDSP_LLO;
+
+    tempD = mipsdsp_trunc16_sat16_round(rs2, env);
+    tempC = mipsdsp_trunc16_sat16_round(rs0, env);
+    tempB = mipsdsp_trunc16_sat16_round(rt2, env);
+    tempA = mipsdsp_trunc16_sat16_round(rt0, env);
+
+    return MIPSDSP_RETURN64_16(tempD, tempC, tempB, tempA);
+}
+
+target_ulong helper_precrq_pw_l(target_ulong rs, target_ulong rt)
+{
+    uint32_t tempB, tempA;
+
+    tempB = (rs >> 32) & MIPSDSP_LLO;
+    tempA = (rt >> 32) & MIPSDSP_LLO;
+
+    return MIPSDSP_RETURN64_32(tempB, tempA);
+}
+#endif
+
+target_ulong helper_precrqu_s_qb_ph(target_ulong rs, target_ulong rt,
+                                    CPUMIPSState *env)
+{
+    uint8_t  tempD, tempC, tempB, tempA;
+    uint16_t rsh, rsl, rth, rtl;
+
+    rsh = (rs & MIPSDSP_HI) >> 16;
+    rsl =  rs & MIPSDSP_LO;
+    rth = (rt & MIPSDSP_HI) >> 16;
+    rtl =  rt & MIPSDSP_LO;
+
+    tempD = mipsdsp_sat8_reduce_precision(rsh, env);
+    tempC = mipsdsp_sat8_reduce_precision(rsl, env);
+    tempB = mipsdsp_sat8_reduce_precision(rth, env);
+    tempA = mipsdsp_sat8_reduce_precision(rtl, env);
+
+    return MIPSDSP_RETURN32_8(tempD, tempC, tempB, tempA);
+}
+
+#if defined(TARGET_MIPS64)
+target_ulong helper_precrqu_s_ob_qh(target_ulong rs, target_ulong rt,
+                                    CPUMIPSState *env)
+{
+    int i;
+    uint16_t rs3, rs2, rs1, rs0;
+    uint16_t rt3, rt2, rt1, rt0;
+    uint8_t temp[8];
+    uint64_t result;
+
+    result = 0;
+
+    MIPSDSP_SPLIT64_16(rs, rs3, rs2, rs1, rs0);
+    MIPSDSP_SPLIT64_16(rt, rt3, rt2, rt1, rt0);
+
+    temp[7] = mipsdsp_sat8_reduce_precision(rs3, env);
+    temp[6] = mipsdsp_sat8_reduce_precision(rs2, env);
+    temp[5] = mipsdsp_sat8_reduce_precision(rs1, env);
+    temp[4] = mipsdsp_sat8_reduce_precision(rs0, env);
+    temp[3] = mipsdsp_sat8_reduce_precision(rt3, env);
+    temp[2] = mipsdsp_sat8_reduce_precision(rt2, env);
+    temp[1] = mipsdsp_sat8_reduce_precision(rt1, env);
+    temp[0] = mipsdsp_sat8_reduce_precision(rt0, env);
+
+    for (i = 0; i < 8; i++) {
+        result |= (uint64_t)temp[i] << (8 * i);
+    }
+
+    return result;
+}
+
+#define PRECEQ_PW(name, a, b) \
+target_ulong helper_preceq_pw_##name(target_ulong rt) \
+{                                                       \
+    uint16_t tempB, tempA;                              \
+    uint32_t tempBI, tempAI;                            \
+                                                        \
+    tempB = (rt >> a) & MIPSDSP_LO;                     \
+    tempA = (rt >> b) & MIPSDSP_LO;                     \
+                                                        \
+    tempBI = (uint32_t)tempB << 16;                     \
+    tempAI = (uint32_t)tempA << 16;                     \
+                                                        \
+    return MIPSDSP_RETURN64_32(tempBI, tempAI);         \
+}
+
+PRECEQ_PW(qhl, 48, 32);
+PRECEQ_PW(qhr, 16, 0);
+PRECEQ_PW(qhla, 48, 16);
+PRECEQ_PW(qhra, 32, 0);
+
+#undef PRECEQ_PW
+
+#endif
+
+#define PRECEQU_PH(name, a, b) \
+target_ulong helper_precequ_ph_##name(target_ulong rt) \
+{                                                        \
+    uint16_t tempB, tempA;                               \
+                                                         \
+    tempB = (rt >> a) & MIPSDSP_Q0;                      \
+    tempA = (rt >> b) & MIPSDSP_Q0;                      \
+                                                         \
+    tempB = tempB << 7;                                  \
+    tempA = tempA << 7;                                  \
+                                                         \
+    return MIPSDSP_RETURN32_16(tempB, tempA);            \
+}
+
+PRECEQU_PH(qbl, 24, 16);
+PRECEQU_PH(qbr, 8, 0);
+PRECEQU_PH(qbla, 24, 8);
+PRECEQU_PH(qbra, 16, 0);
+
+#undef PRECEQU_PH
+
+#if defined(TARGET_MIPS64)
+#define PRECEQU_QH(name, a, b, c, d) \
+target_ulong helper_precequ_qh_##name(target_ulong rt)       \
+{                                                            \
+    uint16_t tempD, tempC, tempB, tempA;                     \
+                                                             \
+    tempD = (rt >> a) & MIPSDSP_Q0;                          \
+    tempC = (rt >> b) & MIPSDSP_Q0;                          \
+    tempB = (rt >> c) & MIPSDSP_Q0;                          \
+    tempA = (rt >> d) & MIPSDSP_Q0;                          \
+                                                             \
+    tempD = tempD << 7;                                      \
+    tempC = tempC << 7;                                      \
+    tempB = tempB << 7;                                      \
+    tempA = tempA << 7;                                      \
+                                                             \
+    return MIPSDSP_RETURN64_16(tempD, tempC, tempB, tempA);  \
+}
+
+PRECEQU_QH(obl, 56, 48, 40, 32);
+PRECEQU_QH(obr, 24, 16, 8, 0);
+PRECEQU_QH(obla, 56, 40, 24, 8);
+PRECEQU_QH(obra, 48, 32, 16, 0);
+
+#undef PRECEQU_QH
+
+#endif
+
+#define PRECEU_PH(name, a, b) \
+target_ulong helper_preceu_ph_##name(target_ulong rt) \
+{                                                     \
+    uint16_t tempB, tempA;                            \
+                                                      \
+    tempB = (rt >> a) & MIPSDSP_Q0;                   \
+    tempA = (rt >> b) & MIPSDSP_Q0;                   \
+                                                      \
+    return MIPSDSP_RETURN32_16(tempB, tempA);         \
+}
+
+PRECEU_PH(qbl, 24, 16);
+PRECEU_PH(qbr, 8, 0);
+PRECEU_PH(qbla, 24, 8);
+PRECEU_PH(qbra, 16, 0);
+
+#undef PRECEU_PH
+
+#if defined(TARGET_MIPS64)
+#define PRECEU_QH(name, a, b, c, d) \
+target_ulong helper_preceu_qh_##name(target_ulong rt)        \
+{                                                            \
+    uint16_t tempD, tempC, tempB, tempA;                     \
+                                                             \
+    tempD = (rt >> a) & MIPSDSP_Q0;                          \
+    tempC = (rt >> b) & MIPSDSP_Q0;                          \
+    tempB = (rt >> c) & MIPSDSP_Q0;                          \
+    tempA = (rt >> d) & MIPSDSP_Q0;                          \
+                                                             \
+    return MIPSDSP_RETURN64_16(tempD, tempC, tempB, tempA);  \
+}
+
+PRECEU_QH(obl, 56, 48, 40, 32);
+PRECEU_QH(obr, 24, 16, 8, 0);
+PRECEU_QH(obla, 56, 40, 24, 8);
+PRECEU_QH(obra, 48, 32, 16, 0);
+
+#undef PRECEU_QH
+
+#endif
+
+/** DSP GPR-Based Shift Sub-class insns **/
+#define SHIFT_QB(name, func) \
+target_ulong helper_##name##_qb(target_ulong sa, target_ulong rt) \
+{                                                                    \
+    uint8_t rt3, rt2, rt1, rt0;                                      \
+                                                                     \
+    sa = sa & 0x07;                                                  \
+                                                                     \
+    MIPSDSP_SPLIT32_8(rt, rt3, rt2, rt1, rt0);                       \
+                                                                     \
+    rt3 = mipsdsp_##func(rt3, sa);                                   \
+    rt2 = mipsdsp_##func(rt2, sa);                                   \
+    rt1 = mipsdsp_##func(rt1, sa);                                   \
+    rt0 = mipsdsp_##func(rt0, sa);                                   \
+                                                                     \
+    return MIPSDSP_RETURN32_8(rt3, rt2, rt1, rt0);                   \
+}
+
+#define SHIFT_QB_ENV(name, func) \
+target_ulong helper_##name##_qb(target_ulong sa, target_ulong rt,\
+                                CPUMIPSState *env) \
+{                                                                    \
+    uint8_t rt3, rt2, rt1, rt0;                                      \
+                                                                     \
+    sa = sa & 0x07;                                                  \
+                                                                     \
+    MIPSDSP_SPLIT32_8(rt, rt3, rt2, rt1, rt0);                       \
+                                                                     \
+    rt3 = mipsdsp_##func(rt3, sa, env);                              \
+    rt2 = mipsdsp_##func(rt2, sa, env);                              \
+    rt1 = mipsdsp_##func(rt1, sa, env);                              \
+    rt0 = mipsdsp_##func(rt0, sa, env);                              \
+                                                                     \
+    return MIPSDSP_RETURN32_8(rt3, rt2, rt1, rt0);                   \
+}
+
+SHIFT_QB_ENV(shll, lshift8);
+SHIFT_QB(shrl, rshift_u8);
+
+SHIFT_QB(shra, rashift8);
+SHIFT_QB(shra_r, rnd8_rashift);
+
+#undef SHIFT_QB
+#undef SHIFT_QB_ENV
+
+#if defined(TARGET_MIPS64)
+#define SHIFT_OB(name, func) \
+target_ulong helper_##name##_ob(target_ulong rt, target_ulong sa) \
+{                                                                        \
+    int i;                                                               \
+    uint8_t rt_t[8];                                                     \
+    uint64_t temp;                                                       \
+                                                                         \
+    sa = sa & 0x07;                                                      \
+    temp = 0;                                                            \
+                                                                         \
+    for (i = 0; i < 8; i++) {                                            \
+        rt_t[i] = (rt >> (8 * i)) & MIPSDSP_Q0;                          \
+        rt_t[i] = mipsdsp_##func(rt_t[i], sa);                           \
+        temp |= (uint64_t)rt_t[i] << (8 * i);                            \
+    }                                                                    \
+                                                                         \
+    return temp;                                                         \
+}
+
+#define SHIFT_OB_ENV(name, func) \
+target_ulong helper_##name##_ob(target_ulong rt, target_ulong sa, \
+                                CPUMIPSState *env)                       \
+{                                                                        \
+    int i;                                                               \
+    uint8_t rt_t[8];                                                     \
+    uint64_t temp;                                                       \
+                                                                         \
+    sa = sa & 0x07;                                                      \
+    temp = 0;                                                            \
+                                                                         \
+    for (i = 0; i < 8; i++) {                                            \
+        rt_t[i] = (rt >> (8 * i)) & MIPSDSP_Q0;                          \
+        rt_t[i] = mipsdsp_##func(rt_t[i], sa, env);                      \
+        temp |= (uint64_t)rt_t[i] << (8 * i);                            \
+    }                                                                    \
+                                                                         \
+    return temp;                                                         \
+}
+
+SHIFT_OB_ENV(shll, lshift8);
+SHIFT_OB(shrl, rshift_u8);
+
+SHIFT_OB(shra, rashift8);
+SHIFT_OB(shra_r, rnd8_rashift);
+
+#undef SHIFT_OB
+#undef SHIFT_OB_ENV
+
+#endif
+
+#define SHIFT_PH(name, func) \
+target_ulong helper_##name##_ph(target_ulong sa, target_ulong rt, \
+                                CPUMIPSState *env)                \
+{                                                                 \
+    uint16_t rth, rtl;                                            \
+                                                                  \
+    sa = sa & 0x0F;                                               \
+                                                                  \
+    MIPSDSP_SPLIT32_16(rt, rth, rtl);                             \
+                                                                  \
+    rth = mipsdsp_##func(rth, sa, env);                           \
+    rtl = mipsdsp_##func(rtl, sa, env);                           \
+                                                                  \
+    return MIPSDSP_RETURN32_16(rth, rtl);                         \
+}
+
+SHIFT_PH(shll, lshift16);
+SHIFT_PH(shll_s, sat16_lshift);
+
+#undef SHIFT_PH
+
+#if defined(TARGET_MIPS64)
+#define SHIFT_QH(name, func) \
+target_ulong helper_##name##_qh(target_ulong rt, target_ulong sa) \
+{                                                                 \
+    uint16_t rt3, rt2, rt1, rt0;                                  \
+                                                                  \
+    sa = sa & 0x0F;                                               \
+                                                                  \
+    MIPSDSP_SPLIT64_16(rt, rt3, rt2, rt1, rt0);                   \
+                                                                  \
+    rt3 = mipsdsp_##func(rt3, sa);                                \
+    rt2 = mipsdsp_##func(rt2, sa);                                \
+    rt1 = mipsdsp_##func(rt1, sa);                                \
+    rt0 = mipsdsp_##func(rt0, sa);                                \
+                                                                  \
+    return MIPSDSP_RETURN64_16(rt3, rt2, rt1, rt0);               \
+}
+
+#define SHIFT_QH_ENV(name, func) \
+target_ulong helper_##name##_qh(target_ulong rt, target_ulong sa, \
+                                CPUMIPSState *env)                \
+{                                                                 \
+    uint16_t rt3, rt2, rt1, rt0;                                  \
+                                                                  \
+    sa = sa & 0x0F;                                               \
+                                                                  \
+    MIPSDSP_SPLIT64_16(rt, rt3, rt2, rt1, rt0);                   \
+                                                                  \
+    rt3 = mipsdsp_##func(rt3, sa, env);                           \
+    rt2 = mipsdsp_##func(rt2, sa, env);                           \
+    rt1 = mipsdsp_##func(rt1, sa, env);                           \
+    rt0 = mipsdsp_##func(rt0, sa, env);                           \
+                                                                  \
+    return MIPSDSP_RETURN64_16(rt3, rt2, rt1, rt0);               \
+}
+
+SHIFT_QH_ENV(shll, lshift16);
+SHIFT_QH_ENV(shll_s, sat16_lshift);
+
+SHIFT_QH(shrl, rshift_u16);
+SHIFT_QH(shra, rashift16);
+SHIFT_QH(shra_r, rnd16_rashift);
+
+#undef SHIFT_QH
+#undef SHIFT_QH_ENV
+
+#endif
+
+#define SHIFT_W(name, func) \
+target_ulong helper_##name##_w(target_ulong sa, target_ulong rt) \
+{                                                                       \
+    uint32_t temp;                                                      \
+                                                                        \
+    sa = sa & 0x1F;                                                     \
+    temp = mipsdsp_##func(rt, sa);                                      \
+                                                                        \
+    return (target_long)(int32_t)temp;                                  \
+}
+
+#define SHIFT_W_ENV(name, func) \
+target_ulong helper_##name##_w(target_ulong sa, target_ulong rt, \
+                               CPUMIPSState *env) \
+{                                                                       \
+    uint32_t temp;                                                      \
+                                                                        \
+    sa = sa & 0x1F;                                                     \
+    temp = mipsdsp_##func(rt, sa, env);                                 \
+                                                                        \
+    return (target_long)(int32_t)temp;                                  \
+}
+
+SHIFT_W_ENV(shll_s, sat32_lshift);
+SHIFT_W(shra_r, rnd32_rashift);
+
+#undef SHIFT_W
+#undef SHIFT_W_ENV
+
+#if defined(TARGET_MIPS64)
+#define SHIFT_PW(name, func) \
+target_ulong helper_##name##_pw(target_ulong rt, target_ulong sa) \
+{                                                                 \
+    uint32_t rt1, rt0;                                            \
+                                                                  \
+    sa = sa & 0x1F;                                               \
+    MIPSDSP_SPLIT64_32(rt, rt1, rt0);                             \
+                                                                  \
+    rt1 = mipsdsp_##func(rt1, sa);                                \
+    rt0 = mipsdsp_##func(rt0, sa);                                \
+                                                                  \
+    return MIPSDSP_RETURN64_32(rt1, rt0);                         \
+}
+
+#define SHIFT_PW_ENV(name, func) \
+target_ulong helper_##name##_pw(target_ulong rt, target_ulong sa, \
+                                CPUMIPSState *env)                \
+{                                                                 \
+    uint32_t rt1, rt0;                                            \
+                                                                  \
+    sa = sa & 0x1F;                                               \
+    MIPSDSP_SPLIT64_32(rt, rt1, rt0);                             \
+                                                                  \
+    rt1 = mipsdsp_##func(rt1, sa, env);                           \
+    rt0 = mipsdsp_##func(rt0, sa, env);                           \
+                                                                  \
+    return MIPSDSP_RETURN64_32(rt1, rt0);                         \
+}
+
+SHIFT_PW_ENV(shll, lshift32);
+SHIFT_PW_ENV(shll_s, sat32_lshift);
+
+SHIFT_PW(shra, rashift32);
+SHIFT_PW(shra_r, rnd32_rashift);
+
+#undef SHIFT_PW
+#undef SHIFT_PW_ENV
+
+#endif
+
+#define SHIFT_PH(name, func) \
+target_ulong helper_##name##_ph(target_ulong sa, target_ulong rt) \
+{                                                                    \
+    uint16_t rth, rtl;                                               \
+                                                                     \
+    sa = sa & 0x0F;                                                  \
+                                                                     \
+    MIPSDSP_SPLIT32_16(rt, rth, rtl);                                \
+                                                                     \
+    rth = mipsdsp_##func(rth, sa);                                   \
+    rtl = mipsdsp_##func(rtl, sa);                                   \
+                                                                     \
+    return MIPSDSP_RETURN32_16(rth, rtl);                            \
+}
+
+SHIFT_PH(shrl, rshift_u16);
+SHIFT_PH(shra, rashift16);
+SHIFT_PH(shra_r, rnd16_rashift);
+
+#undef SHIFT_PH
+
+/** DSP Multiply Sub-class insns **/
+/* Return value made up by two 16bits value.
+ * FIXME give the macro a better name.
+ */
+#define MUL_RETURN32_16_PH(name, func, \
+                           rsmov1, rsmov2, rsfilter, \
+                           rtmov1, rtmov2, rtfilter) \
+target_ulong helper_##name(target_ulong rs, target_ulong rt, \
+                           CPUMIPSState *env)                \
+{                                                            \
+    uint16_t rsB, rsA, rtB, rtA;                             \
+                                                             \
+    rsB = (rs >> rsmov1) & rsfilter;                         \
+    rsA = (rs >> rsmov2) & rsfilter;                         \
+    rtB = (rt >> rtmov1) & rtfilter;                         \
+    rtA = (rt >> rtmov2) & rtfilter;                         \
+                                                             \
+    rsB = mipsdsp_##func(rsB, rtB, env);                     \
+    rsA = mipsdsp_##func(rsA, rtA, env);                     \
+                                                             \
+    return MIPSDSP_RETURN32_16(rsB, rsA);                    \
+}
+
+MUL_RETURN32_16_PH(muleu_s_ph_qbl, mul_u8_u16, \
+                      24, 16, MIPSDSP_Q0, \
+                      16, 0, MIPSDSP_LO);
+MUL_RETURN32_16_PH(muleu_s_ph_qbr, mul_u8_u16, \
+                      8, 0, MIPSDSP_Q0, \
+                      16, 0, MIPSDSP_LO);
+MUL_RETURN32_16_PH(mulq_rs_ph, rndq15_mul_q15_q15, \
+                      16, 0, MIPSDSP_LO, \
+                      16, 0, MIPSDSP_LO);
+MUL_RETURN32_16_PH(mul_ph, mul_i16_i16, \
+                      16, 0, MIPSDSP_LO, \
+                      16, 0, MIPSDSP_LO);
+MUL_RETURN32_16_PH(mul_s_ph, sat16_mul_i16_i16, \
+                      16, 0, MIPSDSP_LO, \
+                      16, 0, MIPSDSP_LO);
+MUL_RETURN32_16_PH(mulq_s_ph, sat16_mul_q15_q15, \
+                      16, 0, MIPSDSP_LO, \
+                      16, 0, MIPSDSP_LO);
+
+#undef MUL_RETURN32_16_PH
+
+#define MUL_RETURN32_32_ph(name, func, movbits) \
+target_ulong helper_##name(target_ulong rs, target_ulong rt, \
+                                  CPUMIPSState *env)         \
+{                                                            \
+    int16_t rsh, rth;                                        \
+    int32_t temp;                                            \
+                                                             \
+    rsh = (rs >> movbits) & MIPSDSP_LO;                      \
+    rth = (rt >> movbits) & MIPSDSP_LO;                      \
+    temp = mipsdsp_##func(rsh, rth, env);                    \
+                                                             \
+    return (target_long)(int32_t)temp;                       \
+}
+
+MUL_RETURN32_32_ph(muleq_s_w_phl, mul_q15_q15_overflowflag21, 16);
+MUL_RETURN32_32_ph(muleq_s_w_phr, mul_q15_q15_overflowflag21, 0);
+
+#undef MUL_RETURN32_32_ph
+
+#define MUL_VOID_PH(name, use_ac_env) \
+void helper_##name(uint32_t ac, target_ulong rs, target_ulong rt,        \
+                          CPUMIPSState *env)                             \
+{                                                                        \
+    int16_t rsh, rsl, rth, rtl;                                          \
+    int32_t tempB, tempA;                                                \
+    int64_t acc, dotp;                                                   \
+                                                                         \
+    MIPSDSP_SPLIT32_16(rs, rsh, rsl);                                    \
+    MIPSDSP_SPLIT32_16(rt, rth, rtl);                                    \
+                                                                         \
+    if (use_ac_env == 1) {                                               \
+        tempB = mipsdsp_mul_q15_q15(ac, rsh, rth, env);                  \
+        tempA = mipsdsp_mul_q15_q15(ac, rsl, rtl, env);                  \
+    } else {                                                             \
+        tempB = mipsdsp_mul_u16_u16(rsh, rth);                           \
+        tempA = mipsdsp_mul_u16_u16(rsl, rtl);                           \
+    }                                                                    \
+                                                                         \
+    dotp = (int64_t)tempB - (int64_t)tempA;                              \
+    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |                      \
+          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);               \
+    dotp = dotp + acc;                                                   \
+    env->active_tc.HI[ac] = (target_long)(int32_t)                       \
+                            ((dotp & MIPSDSP_LHI) >> 32);                \
+    env->active_tc.LO[ac] = (target_long)(int32_t)(dotp & MIPSDSP_LLO);  \
+}
+
+MUL_VOID_PH(mulsaq_s_w_ph, 1);
+MUL_VOID_PH(mulsa_w_ph, 0);
+
+#undef MUL_VOID_PH
+
+#if defined(TARGET_MIPS64)
+#define MUL_RETURN64_16_QH(name, func, \
+                           rsmov1, rsmov2, rsmov3, rsmov4, rsfilter, \
+                           rtmov1, rtmov2, rtmov3, rtmov4, rtfilter) \
+target_ulong helper_##name(target_ulong rs, target_ulong rt,         \
+                           CPUMIPSState *env)                        \
+{                                                                    \
+    uint16_t rs3, rs2, rs1, rs0;                                     \
+    uint16_t rt3, rt2, rt1, rt0;                                     \
+    uint16_t tempD, tempC, tempB, tempA;                             \
+                                                                     \
+    rs3 = (rs >> rsmov1) & rsfilter;                                 \
+    rs2 = (rs >> rsmov2) & rsfilter;                                 \
+    rs1 = (rs >> rsmov3) & rsfilter;                                 \
+    rs0 = (rs >> rsmov4) & rsfilter;                                 \
+    rt3 = (rt >> rtmov1) & rtfilter;                                 \
+    rt2 = (rt >> rtmov2) & rtfilter;                                 \
+    rt1 = (rt >> rtmov3) & rtfilter;                                 \
+    rt0 = (rt >> rtmov4) & rtfilter;                                 \
+                                                                     \
+    tempD = mipsdsp_##func(rs3, rt3, env);                           \
+    tempC = mipsdsp_##func(rs2, rt2, env);                           \
+    tempB = mipsdsp_##func(rs1, rt1, env);                           \
+    tempA = mipsdsp_##func(rs0, rt0, env);                           \
+                                                                     \
+    return MIPSDSP_RETURN64_16(tempD, tempC, tempB, tempA);          \
+}
+
+MUL_RETURN64_16_QH(muleu_s_qh_obl, mul_u8_u16, \
+                   56, 48, 40, 32, MIPSDSP_Q0, \
+                   48, 32, 16, 0, MIPSDSP_LO);
+MUL_RETURN64_16_QH(muleu_s_qh_obr, mul_u8_u16, \
+                   24, 16, 8, 0, MIPSDSP_Q0, \
+                   48, 32, 16, 0, MIPSDSP_LO);
+MUL_RETURN64_16_QH(mulq_rs_qh, rndq15_mul_q15_q15, \
+                   48, 32, 16, 0, MIPSDSP_LO, \
+                   48, 32, 16, 0, MIPSDSP_LO);
+
+#undef MUL_RETURN64_16_QH
+
+#define MUL_RETURN64_32_QH(name, \
+                           rsmov1, rsmov2, \
+                           rtmov1, rtmov2) \
+target_ulong helper_##name(target_ulong rs, target_ulong rt, \
+                           CPUMIPSState *env)                \
+{                                                            \
+    uint16_t rsB, rsA;                                       \
+    uint16_t rtB, rtA;                                       \
+    uint32_t tempB, tempA;                                   \
+                                                             \
+    rsB = (rs >> rsmov1) & MIPSDSP_LO;                       \
+    rsA = (rs >> rsmov2) & MIPSDSP_LO;                       \
+    rtB = (rt >> rtmov1) & MIPSDSP_LO;                       \
+    rtA = (rt >> rtmov2) & MIPSDSP_LO;                       \
+                                                             \
+    tempB = mipsdsp_mul_q15_q15(5, rsB, rtB, env);           \
+    tempA = mipsdsp_mul_q15_q15(5, rsA, rtA, env);           \
+                                                             \
+    return ((uint64_t)tempB << 32) | (uint64_t)tempA;        \
+}
+
+MUL_RETURN64_32_QH(muleq_s_pw_qhl, 48, 32, 48, 32);
+MUL_RETURN64_32_QH(muleq_s_pw_qhr, 16, 0, 16, 0);
+
+#undef MUL_RETURN64_32_QH
+
+void helper_mulsaq_s_w_qh(target_ulong rs, target_ulong rt, uint32_t ac,
+                          CPUMIPSState *env)
+{
+    int16_t rs3, rs2, rs1, rs0;
+    int16_t rt3, rt2, rt1, rt0;
+    int32_t tempD, tempC, tempB, tempA;
+    int64_t acc[2];
+    int64_t temp[2];
+    int64_t temp_sum;
+
+    MIPSDSP_SPLIT64_16(rs, rs3, rs2, rs1, rs0);
+    MIPSDSP_SPLIT64_16(rt, rt3, rt2, rt1, rt0);
+
+    tempD = mipsdsp_mul_q15_q15(ac, rs3, rt3, env);
+    tempC = mipsdsp_mul_q15_q15(ac, rs2, rt2, env);
+    tempB = mipsdsp_mul_q15_q15(ac, rs1, rt1, env);
+    tempA = mipsdsp_mul_q15_q15(ac, rs0, rt0, env);
+
+    temp[0] = ((int32_t)tempD - (int32_t)tempC) +
+              ((int32_t)tempB - (int32_t)tempA);
+    temp[0] = (int64_t)(temp[0] << 30) >> 30;
+    if (((temp[0] >> 33) & 0x01) == 0) {
+        temp[1] = 0x00;
+    } else {
+        temp[1] = ~0ull;
+    }
+
+    acc[0] = env->active_tc.LO[ac];
+    acc[1] = env->active_tc.HI[ac];
+
+    temp_sum = acc[0] + temp[0];
+    if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&
+       ((uint64_t)temp_sum < (uint64_t)temp[0])) {
+        acc[1] += 1;
+    }
+    acc[0] = temp_sum;
+    acc[1] += temp[1];
+
+    env->active_tc.HI[ac] = acc[1];
+    env->active_tc.LO[ac] = acc[0];
+}
+#endif
+
+#define DP_QB(name, func, is_add, rsmov1, rsmov2, rtmov1, rtmov2) \
+void helper_##name(uint32_t ac, target_ulong rs, target_ulong rt,        \
+                   CPUMIPSState *env)                                    \
+{                                                                        \
+    uint8_t rs3, rs2;                                                    \
+    uint8_t rt3, rt2;                                                    \
+    uint16_t tempB, tempA;                                               \
+    uint64_t tempC, dotp;                                                \
+                                                                         \
+    rs3 = (rs >> rsmov1) & MIPSDSP_Q0;                                   \
+    rs2 = (rs >> rsmov2) & MIPSDSP_Q0;                                   \
+    rt3 = (rt >> rtmov1) & MIPSDSP_Q0;                                   \
+    rt2 = (rt >> rtmov2) & MIPSDSP_Q0;                                   \
+    tempB = mipsdsp_##func(rs3, rt3);                                    \
+    tempA = mipsdsp_##func(rs2, rt2);                                    \
+    dotp = (int64_t)tempB + (int64_t)tempA;                              \
+    if (is_add) {                                                        \
+        tempC = (((uint64_t)env->active_tc.HI[ac] << 32) |               \
+                 ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO))        \
+            + dotp;                                                      \
+    } else {                                                             \
+        tempC = (((uint64_t)env->active_tc.HI[ac] << 32) |               \
+                 ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO))        \
+            - dotp;                                                      \
+    }                                                                    \
+                                                                         \
+    env->active_tc.HI[ac] = (target_long)(int32_t)                       \
+                            ((tempC & MIPSDSP_LHI) >> 32);               \
+    env->active_tc.LO[ac] = (target_long)(int32_t)(tempC & MIPSDSP_LLO); \
+}
+
+DP_QB(dpau_h_qbl, mul_u8_u8, 1, 24, 16, 24, 16);
+DP_QB(dpau_h_qbr, mul_u8_u8, 1, 8, 0, 8, 0);
+DP_QB(dpsu_h_qbl, mul_u8_u8, 0, 24, 16, 24, 16);
+DP_QB(dpsu_h_qbr, mul_u8_u8, 0, 8, 0, 8, 0);
+
+#undef DP_QB
+
+#if defined(TARGET_MIPS64)
+#define DP_OB(name, add_sub, \
+              rsmov1, rsmov2, rsmov3, rsmov4, \
+              rtmov1, rtmov2, rtmov3, rtmov4) \
+void helper_##name(target_ulong rs, target_ulong rt, uint32_t ac,       \
+                       CPUMIPSState *env)                               \
+{                                                                       \
+    uint8_t rsD, rsC, rsB, rsA;                                         \
+    uint8_t rtD, rtC, rtB, rtA;                                         \
+    uint16_t tempD, tempC, tempB, tempA;                                \
+    uint64_t temp[2];                                                   \
+    uint64_t acc[2];                                                    \
+    uint64_t temp_sum;                                                  \
+                                                                        \
+    temp[0] = 0;                                                        \
+    temp[1] = 0;                                                        \
+                                                                        \
+    rsD = (rs >> rsmov1) & MIPSDSP_Q0;                                  \
+    rsC = (rs >> rsmov2) & MIPSDSP_Q0;                                  \
+    rsB = (rs >> rsmov3) & MIPSDSP_Q0;                                  \
+    rsA = (rs >> rsmov4) & MIPSDSP_Q0;                                  \
+    rtD = (rt >> rtmov1) & MIPSDSP_Q0;                                  \
+    rtC = (rt >> rtmov2) & MIPSDSP_Q0;                                  \
+    rtB = (rt >> rtmov3) & MIPSDSP_Q0;                                  \
+    rtA = (rt >> rtmov4) & MIPSDSP_Q0;                                  \
+                                                                        \
+    tempD = mipsdsp_mul_u8_u8(rsD, rtD);                                \
+    tempC = mipsdsp_mul_u8_u8(rsC, rtC);                                \
+    tempB = mipsdsp_mul_u8_u8(rsB, rtB);                                \
+    tempA = mipsdsp_mul_u8_u8(rsA, rtA);                                \
+                                                                        \
+    temp[0] = (uint64_t)tempD + (uint64_t)tempC +                       \
+      (uint64_t)tempB + (uint64_t)tempA;                                \
+                                                                        \
+    acc[0] = env->active_tc.LO[ac];                                     \
+    acc[1] = env->active_tc.HI[ac];                                     \
+                                                                        \
+    if (add_sub) {                                                      \
+        temp_sum = acc[0] + temp[0];                                    \
+        if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&                  \
+            ((uint64_t)temp_sum < (uint64_t)temp[0])) {                 \
+            acc[1] += 1;                                                \
+        }                                                               \
+        temp[0] = temp_sum;                                             \
+        temp[1] = acc[1] + temp[1];                                     \
+    } else {                                                            \
+        temp_sum = acc[0] - temp[0];                                    \
+        if ((uint64_t)temp_sum > (uint64_t)acc[0]) {                    \
+            acc[1] -= 1;                                                \
+        }                                                               \
+        temp[0] = temp_sum;                                             \
+        temp[1] = acc[1] - temp[1];                                     \
+    }                                                                   \
+                                                                        \
+    env->active_tc.HI[ac] = temp[1];                                    \
+    env->active_tc.LO[ac] = temp[0];                                    \
+}
+
+DP_OB(dpau_h_obl, 1, 56, 48, 40, 32, 56, 48, 40, 32);
+DP_OB(dpau_h_obr, 1, 24, 16, 8, 0, 24, 16, 8, 0);
+DP_OB(dpsu_h_obl, 0, 56, 48, 40, 32, 56, 48, 40, 32);
+DP_OB(dpsu_h_obr, 0, 24, 16, 8, 0, 24, 16, 8, 0);
+
+#undef DP_OB
+#endif
+
+#define DP_NOFUNC_PH(name, is_add, rsmov1, rsmov2, rtmov1, rtmov2)             \
+void helper_##name(uint32_t ac, target_ulong rs, target_ulong rt,              \
+                   CPUMIPSState *env)                                          \
+{                                                                              \
+    uint16_t rsB, rsA, rtB, rtA;                                               \
+    int32_t  tempA, tempB;                                                     \
+    int64_t  acc;                                                              \
+                                                                               \
+    rsB = (rs >> rsmov1) & MIPSDSP_LO;                                         \
+    rsA = (rs >> rsmov2) & MIPSDSP_LO;                                         \
+    rtB = (rt >> rtmov1) & MIPSDSP_LO;                                         \
+    rtA = (rt >> rtmov2) & MIPSDSP_LO;                                         \
+                                                                               \
+    tempB = (int32_t)rsB * (int32_t)rtB;                                       \
+    tempA = (int32_t)rsA * (int32_t)rtA;                                       \
+                                                                               \
+    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |                            \
+          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);                     \
+                                                                               \
+    if (is_add) {                                                              \
+        acc = acc + ((int64_t)tempB + (int64_t)tempA);                         \
+    } else {                                                                   \
+        acc = acc - ((int64_t)tempB + (int64_t)tempA);                         \
+    }                                                                          \
+                                                                               \
+    env->active_tc.HI[ac] = (target_long)(int32_t)((acc & MIPSDSP_LHI) >> 32); \
+    env->active_tc.LO[ac] = (target_long)(int32_t)(acc & MIPSDSP_LLO);         \
+}
+
+DP_NOFUNC_PH(dpa_w_ph, 1, 16, 0, 16, 0);
+DP_NOFUNC_PH(dpax_w_ph, 1, 16, 0, 0, 16);
+DP_NOFUNC_PH(dps_w_ph, 0, 16, 0, 16, 0);
+DP_NOFUNC_PH(dpsx_w_ph, 0, 16, 0, 0, 16);
+#undef DP_NOFUNC_PH
+
+#define DP_HASFUNC_PH(name, is_add, rsmov1, rsmov2, rtmov1, rtmov2) \
+void helper_##name(uint32_t ac, target_ulong rs, target_ulong rt,   \
+                   CPUMIPSState *env)                      \
+{                                                          \
+    int16_t rsB, rsA, rtB, rtA;                            \
+    int32_t tempB, tempA;                                  \
+    int64_t acc, dotp;                                     \
+                                                           \
+    rsB = (rs >> rsmov1) & MIPSDSP_LO;                     \
+    rsA = (rs >> rsmov2) & MIPSDSP_LO;                     \
+    rtB = (rt >> rtmov1) & MIPSDSP_LO;                     \
+    rtA = (rt >> rtmov2) & MIPSDSP_LO;                     \
+                                                           \
+    tempB = mipsdsp_mul_q15_q15(ac, rsB, rtB, env);        \
+    tempA = mipsdsp_mul_q15_q15(ac, rsA, rtA, env);        \
+                                                           \
+    dotp = (int64_t)tempB + (int64_t)tempA;                \
+    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |        \
+          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO); \
+                                                           \
+    if (is_add) {                                          \
+        acc = acc + dotp;                                  \
+    } else {                                               \
+        acc = acc - dotp;                                  \
+    }                                                      \
+                                                           \
+    env->active_tc.HI[ac] = (target_long)(int32_t)         \
+        ((acc & MIPSDSP_LHI) >> 32);                       \
+    env->active_tc.LO[ac] = (target_long)(int32_t)         \
+        (acc & MIPSDSP_LLO);                               \
+}
+
+DP_HASFUNC_PH(dpaq_s_w_ph, 1, 16, 0, 16, 0);
+DP_HASFUNC_PH(dpaqx_s_w_ph, 1, 16, 0, 0, 16);
+DP_HASFUNC_PH(dpsq_s_w_ph, 0, 16, 0, 16, 0);
+DP_HASFUNC_PH(dpsqx_s_w_ph, 0, 16, 0, 0, 16);
+
+#undef DP_HASFUNC_PH
+
+#define DP_128OPERATION_PH(name, is_add) \
+void helper_##name(uint32_t ac, target_ulong rs, target_ulong rt, \
+                          CPUMIPSState *env)                             \
+{                                                                        \
+    int16_t rsh, rsl, rth, rtl;                                          \
+    int32_t tempB, tempA, tempC62_31, tempC63;                           \
+    int64_t acc, dotp, tempC;                                            \
+                                                                         \
+    MIPSDSP_SPLIT32_16(rs, rsh, rsl);                                    \
+    MIPSDSP_SPLIT32_16(rt, rth, rtl);                                    \
+                                                                         \
+    tempB = mipsdsp_mul_q15_q15(ac, rsh, rtl, env);                      \
+    tempA = mipsdsp_mul_q15_q15(ac, rsl, rth, env);                      \
+                                                                         \
+    dotp = (int64_t)tempB + (int64_t)tempA;                              \
+    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |                      \
+          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);               \
+    if (is_add) {                                                        \
+        tempC = acc + dotp;                                              \
+    } else {                                                             \
+        tempC = acc - dotp;                                              \
+    }                                                                    \
+    tempC63 = (tempC >> 63) & 0x01;                                      \
+    tempC62_31 = (tempC >> 31) & 0xFFFFFFFF;                             \
+                                                                         \
+    if ((tempC63 == 0) && (tempC62_31 != 0x00000000)) {                  \
+        tempC = 0x7FFFFFFF;                                              \
+        set_DSPControl_overflow_flag(1, 16 + ac, env);                   \
+    }                                                                    \
+                                                                         \
+    if ((tempC63 == 1) && (tempC62_31 != 0xFFFFFFFF)) {                  \
+        tempC = (int64_t)(int32_t)0x80000000;                            \
+        set_DSPControl_overflow_flag(1, 16 + ac, env);                   \
+    }                                                                    \
+                                                                         \
+    env->active_tc.HI[ac] = (target_long)(int32_t)                       \
+        ((tempC & MIPSDSP_LHI) >> 32);                                   \
+    env->active_tc.LO[ac] = (target_long)(int32_t)                       \
+        (tempC & MIPSDSP_LLO);                                           \
+}
+
+DP_128OPERATION_PH(dpaqx_sa_w_ph, 1);
+DP_128OPERATION_PH(dpsqx_sa_w_ph, 0);
+
+#undef DP_128OPERATION_HP
+
+#if defined(TARGET_MIPS64)
+#define DP_QH(name, is_add, use_ac_env) \
+void helper_##name(target_ulong rs, target_ulong rt, uint32_t ac,    \
+                   CPUMIPSState *env)                                \
+{                                                                    \
+    int32_t rs3, rs2, rs1, rs0;                                      \
+    int32_t rt3, rt2, rt1, rt0;                                      \
+    int32_t tempD, tempC, tempB, tempA;                              \
+    int64_t acc[2];                                                  \
+    int64_t temp[2];                                                 \
+    int64_t temp_sum;                                                \
+                                                                     \
+    MIPSDSP_SPLIT64_16(rs, rs3, rs2, rs1, rs0);                      \
+    MIPSDSP_SPLIT64_16(rt, rt3, rt2, rt1, rt0);                      \
+                                                                     \
+    if (use_ac_env) {                                                \
+        tempD = mipsdsp_mul_q15_q15(ac, rs3, rt3, env);              \
+        tempC = mipsdsp_mul_q15_q15(ac, rs2, rt2, env);              \
+        tempB = mipsdsp_mul_q15_q15(ac, rs1, rt1, env);              \
+        tempA = mipsdsp_mul_q15_q15(ac, rs0, rt0, env);              \
+    } else {                                                         \
+        tempD = mipsdsp_mul_u16_u16(rs3, rt3);                       \
+        tempC = mipsdsp_mul_u16_u16(rs2, rt2);                       \
+        tempB = mipsdsp_mul_u16_u16(rs1, rt1);                       \
+        tempA = mipsdsp_mul_u16_u16(rs0, rt0);                       \
+    }                                                                \
+                                                                     \
+    temp[0] = (int64_t)tempD + (int64_t)tempC +                      \
+              (int64_t)tempB + (int64_t)tempA;                       \
+                                                                     \
+    if (temp[0] >= 0) {                                              \
+        temp[1] = 0;                                                 \
+    } else {                                                         \
+        temp[1] = ~0ull;                                             \
+    }                                                                \
+                                                                     \
+    acc[1] = env->active_tc.HI[ac];                                  \
+    acc[0] = env->active_tc.LO[ac];                                  \
+                                                                     \
+    if (is_add) {                                                    \
+        temp_sum = acc[0] + temp[0];                                 \
+        if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&               \
+            ((uint64_t)temp_sum < (uint64_t)temp[0])) {              \
+            acc[1] = acc[1] + 1;                                     \
+        }                                                            \
+        temp[0] = temp_sum;                                          \
+        temp[1] = acc[1] + temp[1];                                  \
+    } else {                                                         \
+        temp_sum = acc[0] - temp[0];                                 \
+        if ((uint64_t)temp_sum > (uint64_t)acc[0]) {                 \
+            acc[1] = acc[1] - 1;                                     \
+        }                                                            \
+        temp[0] = temp_sum;                                          \
+        temp[1] = acc[1] - temp[1];                                  \
+    }                                                                \
+                                                                     \
+    env->active_tc.HI[ac] = temp[1];                                 \
+    env->active_tc.LO[ac] = temp[0];                                 \
+}
+
+DP_QH(dpa_w_qh, 1, 0);
+DP_QH(dpaq_s_w_qh, 1, 1);
+DP_QH(dps_w_qh, 0, 0);
+DP_QH(dpsq_s_w_qh, 0, 1);
+
+#undef DP_QH
+
+#endif
+
+#define DP_L_W(name, is_add) \
+void helper_##name(uint32_t ac, target_ulong rs, target_ulong rt,     \
+                   CPUMIPSState *env)                                 \
+{                                                                     \
+    int32_t temp63;                                                   \
+    int64_t dotp, acc;                                                \
+    uint64_t temp;                                                    \
+                                                                      \
+    dotp = mipsdsp_mul_q31_q31(ac, rs, rt, env);                      \
+    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |                   \
+          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);            \
+    if (!is_add) {                                                    \
+        dotp = -dotp;                                                 \
+    }                                                                 \
+                                                                      \
+    temp = acc + dotp;                                                \
+    if (MIPSDSP_OVERFLOW((uint64_t)acc, (uint64_t)dotp, temp,         \
+                         (0x01ull << 63))) {                          \
+        temp63 = (temp >> 63) & 0x01;                                 \
+        if (temp63 == 1) {                                            \
+            temp = (0x01ull << 63) - 1;                               \
+        } else {                                                      \
+            temp = 0x01ull << 63;                                     \
+        }                                                             \
+                                                                      \
+        set_DSPControl_overflow_flag(1, 16 + ac, env);                \
+    }                                                                 \
+                                                                      \
+    env->active_tc.HI[ac] = (target_long)(int32_t)                    \
+        ((temp & MIPSDSP_LHI) >> 32);                                 \
+    env->active_tc.LO[ac] = (target_long)(int32_t)                    \
+        (temp & MIPSDSP_LLO);                                         \
+}
+
+DP_L_W(dpaq_sa_l_w, 1);
+DP_L_W(dpsq_sa_l_w, 0);
+
+#undef DP_L_W
+
+#if defined(TARGET_MIPS64)
+#define DP_L_PW(name, func) \
+void helper_##name(target_ulong rs, target_ulong rt, uint32_t ac, \
+                   CPUMIPSState *env)                             \
+{                                                                 \
+    int32_t rs1, rs0;                                             \
+    int32_t rt1, rt0;                                             \
+    int64_t tempB[2], tempA[2];                                   \
+    int64_t temp[2];                                              \
+    int64_t acc[2];                                               \
+    int64_t temp_sum;                                             \
+                                                                  \
+    temp[0] = 0;                                                  \
+    temp[1] = 0;                                                  \
+                                                                  \
+    MIPSDSP_SPLIT64_32(rs, rs1, rs0);                             \
+    MIPSDSP_SPLIT64_32(rt, rt1, rt0);                             \
+                                                                  \
+    tempB[0] = mipsdsp_mul_q31_q31(ac, rs1, rt1, env);            \
+    tempA[0] = mipsdsp_mul_q31_q31(ac, rs0, rt0, env);            \
+                                                                  \
+    if (tempB[0] >= 0) {                                          \
+        tempB[1] = 0x00;                                          \
+    } else {                                                      \
+        tempB[1] = ~0ull;                                         \
+    }                                                             \
+                                                                  \
+    if (tempA[0] >= 0) {                                          \
+        tempA[1] = 0x00;                                          \
+    } else {                                                      \
+        tempA[1] = ~0ull;                                         \
+    }                                                             \
+                                                                  \
+    temp_sum = tempB[0] + tempA[0];                               \
+    if (((uint64_t)temp_sum < (uint64_t)tempB[0]) &&              \
+        ((uint64_t)temp_sum < (uint64_t)tempA[0])) {              \
+        temp[1] += 1;                                             \
+    }                                                             \
+    temp[0] = temp_sum;                                           \
+    temp[1] += tempB[1] + tempA[1];                               \
+                                                                  \
+    mipsdsp_##func(acc, ac, temp, env);                           \
+                                                                  \
+    env->active_tc.HI[ac] = acc[1];                               \
+    env->active_tc.LO[ac] = acc[0];                               \
+}
+
+DP_L_PW(dpaq_sa_l_pw, sat64_acc_add_q63);
+DP_L_PW(dpsq_sa_l_pw, sat64_acc_sub_q63);
+
+#undef DP_L_PW
+
+void helper_mulsaq_s_l_pw(target_ulong rs, target_ulong rt, uint32_t ac,
+                          CPUMIPSState *env)
+{
+    int32_t rs1, rs0;
+    int32_t rt1, rt0;
+    int64_t tempB[2], tempA[2];
+    int64_t temp[2];
+    int64_t acc[2];
+    int64_t temp_sum;
+
+    rs1 = (rs >> 32) & MIPSDSP_LLO;
+    rs0 = rs & MIPSDSP_LLO;
+    rt1 = (rt >> 32) & MIPSDSP_LLO;
+    rt0 = rt & MIPSDSP_LLO;
+
+    tempB[0] = mipsdsp_mul_q31_q31(ac, rs1, rt1, env);
+    tempA[0] = mipsdsp_mul_q31_q31(ac, rs0, rt0, env);
+
+    if (tempB[0] >= 0) {
+        tempB[1] = 0x00;
+    } else {
+        tempB[1] = ~0ull;
+    }
+
+    if (tempA[0] >= 0) {
+        tempA[1] = 0x00;
+    } else {
+        tempA[1] = ~0ull;
+    }
+
+    acc[0] = env->active_tc.LO[ac];
+    acc[1] = env->active_tc.HI[ac];
+
+    temp_sum = tempB[0] - tempA[0];
+    if ((uint64_t)temp_sum > (uint64_t)tempB[0]) {
+        tempB[1] -= 1;
+    }
+    temp[0] = temp_sum;
+    temp[1] = tempB[1] - tempA[1];
+
+    if ((temp[1] & 0x01) == 0) {
+        temp[1] = 0x00;
+    } else {
+        temp[1] = ~0ull;
+    }
+
+    temp_sum = acc[0] + temp[0];
+    if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&
+       ((uint64_t)temp_sum < (uint64_t)temp[0])) {
+        acc[1] += 1;
+    }
+    acc[0] = temp_sum;
+    acc[1] += temp[1];
+
+    env->active_tc.HI[ac] = acc[1];
+    env->active_tc.LO[ac] = acc[0];
+}
+#endif
+
+#define MAQ_S_W(name, mov) \
+void helper_##name(uint32_t ac, target_ulong rs, target_ulong rt, \
+                   CPUMIPSState *env)                             \
+{                                                                 \
+    int16_t rsh, rth;                                             \
+    int32_t tempA;                                                \
+    int64_t tempL, acc;                                           \
+                                                                  \
+    rsh = (rs >> mov) & MIPSDSP_LO;                               \
+    rth = (rt >> mov) & MIPSDSP_LO;                               \
+    tempA  = mipsdsp_mul_q15_q15(ac, rsh, rth, env);              \
+    acc = ((uint64_t)env->active_tc.HI[ac] << 32) |               \
+          ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);        \
+    tempL  = (int64_t)tempA + acc;                                \
+    env->active_tc.HI[ac] = (target_long)(int32_t)                \
+        ((tempL & MIPSDSP_LHI) >> 32);                            \
+    env->active_tc.LO[ac] = (target_long)(int32_t)                \
+        (tempL & MIPSDSP_LLO);                                    \
+}
+
+MAQ_S_W(maq_s_w_phl, 16);
+MAQ_S_W(maq_s_w_phr, 0);
+
+#undef MAQ_S_W
+
+#define MAQ_SA_W(name, mov) \
+void helper_##name(uint32_t ac, target_ulong rs, target_ulong rt,        \
+                   CPUMIPSState *env)                                    \
+{                                                                        \
+    int16_t rsh, rth;                                                    \
+    int32_t tempA;                                                       \
+                                                                         \
+    rsh = (rs >> mov) & MIPSDSP_LO;                                      \
+    rth = (rt >> mov) & MIPSDSP_LO;                                      \
+    tempA = mipsdsp_mul_q15_q15(ac, rsh, rth, env);                      \
+    tempA = mipsdsp_sat32_acc_q31(ac, tempA, env);                       \
+                                                                         \
+    env->active_tc.HI[ac] = (target_long)(int32_t)(((int64_t)tempA &     \
+                                                    MIPSDSP_LHI) >> 32); \
+    env->active_tc.LO[ac] = (target_long)(int32_t)((int64_t)tempA &      \
+                                                   MIPSDSP_LLO);         \
+}
+
+MAQ_SA_W(maq_sa_w_phl, 16);
+MAQ_SA_W(maq_sa_w_phr, 0);
+
+#undef MAQ_SA_W
+
+#define MULQ_W(name, addvar) \
+target_ulong helper_##name(target_ulong rs, target_ulong rt,   \
+                           CPUMIPSState *env)                  \
+{                                                              \
+    uint32_t rs_t, rt_t;                                       \
+    int32_t tempI;                                             \
+    int64_t tempL;                                             \
+                                                               \
+    rs_t = rs & MIPSDSP_LLO;                                   \
+    rt_t = rt & MIPSDSP_LLO;                                   \
+                                                               \
+    if ((rs_t == 0x80000000) && (rt_t == 0x80000000)) {        \
+        tempL = 0x7FFFFFFF00000000ull;                         \
+        set_DSPControl_overflow_flag(1, 21, env);              \
+    } else {                                                   \
+        tempL  = ((int64_t)rs_t * (int64_t)rt_t) << 1;         \
+        tempL += addvar;                                       \
+    }                                                          \
+    tempI = (tempL & MIPSDSP_LHI) >> 32;                       \
+                                                               \
+    return (target_long)(int32_t)tempI;                        \
+}
+
+MULQ_W(mulq_s_w, 0);
+MULQ_W(mulq_rs_w, 0x80000000ull);
+
+#undef MULQ_W
+
+#if defined(TARGET_MIPS64)
+
+#define MAQ_S_W_QH(name, mov) \
+void helper_##name(target_ulong rs, target_ulong rt, uint32_t ac, \
+                   CPUMIPSState *env)                             \
+{                                                                 \
+    int16_t rs_t, rt_t;                                           \
+    int32_t temp_mul;                                             \
+    int64_t temp[2];                                              \
+    int64_t acc[2];                                               \
+    int64_t temp_sum;                                             \
+                                                                  \
+    temp[0] = 0;                                                  \
+    temp[1] = 0;                                                  \
+                                                                  \
+    rs_t = (rs >> mov) & MIPSDSP_LO;                              \
+    rt_t = (rt >> mov) & MIPSDSP_LO;                              \
+    temp_mul = mipsdsp_mul_q15_q15(ac, rs_t, rt_t, env);          \
+                                                                  \
+    temp[0] = (int64_t)temp_mul;                                  \
+    if (temp[0] >= 0) {                                           \
+        temp[1] = 0x00;                                           \
+    } else {                                                      \
+        temp[1] = ~0ull;                                          \
+    }                                                             \
+                                                                  \
+    acc[0] = env->active_tc.LO[ac];                               \
+    acc[1] = env->active_tc.HI[ac];                               \
+                                                                  \
+    temp_sum = acc[0] + temp[0];                                  \
+    if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&                \
+        ((uint64_t)temp_sum < (uint64_t)temp[0])) {               \
+        acc[1] += 1;                                              \
+    }                                                             \
+    acc[0] = temp_sum;                                            \
+    acc[1] += temp[1];                                            \
+                                                                  \
+    env->active_tc.HI[ac] = acc[1];                               \
+    env->active_tc.LO[ac] = acc[0];                               \
+}
+
+MAQ_S_W_QH(maq_s_w_qhll, 48);
+MAQ_S_W_QH(maq_s_w_qhlr, 32);
+MAQ_S_W_QH(maq_s_w_qhrl, 16);
+MAQ_S_W_QH(maq_s_w_qhrr, 0);
+
+#undef MAQ_S_W_QH
+
+#define MAQ_SA_W(name, mov) \
+void helper_##name(target_ulong rs, target_ulong rt, uint32_t ac, \
+                   CPUMIPSState *env)                             \
+{                                                                 \
+    int16_t rs_t, rt_t;                                           \
+    int32_t temp;                                                 \
+    int64_t acc[2];                                               \
+                                                                  \
+    rs_t = (rs >> mov) & MIPSDSP_LO;                              \
+    rt_t = (rt >> mov) & MIPSDSP_LO;                              \
+    temp = mipsdsp_mul_q15_q15(ac, rs_t, rt_t, env);              \
+    temp = mipsdsp_sat32_acc_q31(ac, temp, env);                  \
+                                                                  \
+    acc[0] = (int64_t)(int32_t)temp;                              \
+    if (acc[0] >= 0) {                                            \
+        acc[1] = 0x00;                                            \
+    } else {                                                      \
+        acc[1] = ~0ull;                                           \
+    }                                                             \
+                                                                  \
+    env->active_tc.HI[ac] = acc[1];                               \
+    env->active_tc.LO[ac] = acc[0];                               \
+}
+
+MAQ_SA_W(maq_sa_w_qhll, 48);
+MAQ_SA_W(maq_sa_w_qhlr, 32);
+MAQ_SA_W(maq_sa_w_qhrl, 16);
+MAQ_SA_W(maq_sa_w_qhrr, 0);
+
+#undef MAQ_SA_W
+
+#define MAQ_S_L_PW(name, mov) \
+void helper_##name(target_ulong rs, target_ulong rt, uint32_t ac, \
+                   CPUMIPSState *env)                             \
+{                                                                 \
+    int32_t rs_t, rt_t;                                           \
+    int64_t temp[2];                                              \
+    int64_t acc[2];                                               \
+    int64_t temp_sum;                                             \
+                                                                  \
+    temp[0] = 0;                                                  \
+    temp[1] = 0;                                                  \
+                                                                  \
+    rs_t = (rs >> mov) & MIPSDSP_LLO;                             \
+    rt_t = (rt >> mov) & MIPSDSP_LLO;                             \
+                                                                  \
+    temp[0] = mipsdsp_mul_q31_q31(ac, rs_t, rt_t, env);           \
+    if (temp[0] >= 0) {                                           \
+        temp[1] = 0x00;                                           \
+    } else {                                                      \
+        temp[1] = ~0ull;                                          \
+    }                                                             \
+                                                                  \
+    acc[0] = env->active_tc.LO[ac];                               \
+    acc[1] = env->active_tc.HI[ac];                               \
+                                                                  \
+    temp_sum = acc[0] + temp[0];                                  \
+    if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&                \
+        ((uint64_t)temp_sum < (uint64_t)temp[0])) {               \
+        acc[1] += 1;                                              \
+    }                                                             \
+    acc[0] = temp_sum;                                            \
+    acc[1] += temp[1];                                            \
+                                                                  \
+    env->active_tc.HI[ac] = acc[1];                               \
+    env->active_tc.LO[ac] = acc[0];                               \
+}
+
+MAQ_S_L_PW(maq_s_l_pwl, 32);
+MAQ_S_L_PW(maq_s_l_pwr, 0);
+
+#undef MAQ_S_L_PW
+
+#define DM_OPERATE(name, func, is_add, sigext) \
+void helper_##name(target_ulong rs, target_ulong rt, uint32_t ac,    \
+                  CPUMIPSState *env)                                 \
+{                                                                    \
+    int32_t rs1, rs0;                                                \
+    int32_t rt1, rt0;                                                \
+    int64_t tempBL[2], tempAL[2];                                    \
+    int64_t acc[2];                                                  \
+    int64_t temp[2];                                                 \
+    int64_t temp_sum;                                                \
+                                                                     \
+    temp[0] = 0x00;                                                  \
+    temp[1] = 0x00;                                                  \
+                                                                     \
+    MIPSDSP_SPLIT64_32(rs, rs1, rs0);                                \
+    MIPSDSP_SPLIT64_32(rt, rt1, rt0);                                \
+                                                                     \
+    if (sigext) {                                                    \
+        tempBL[0] = (int64_t)mipsdsp_##func(rs1, rt1);               \
+        tempAL[0] = (int64_t)mipsdsp_##func(rs0, rt0);               \
+                                                                     \
+        if (tempBL[0] >= 0) {                                        \
+            tempBL[1] = 0x0;                                         \
+        } else {                                                     \
+            tempBL[1] = ~0ull;                                       \
+        }                                                            \
+                                                                     \
+        if (tempAL[0] >= 0) {                                        \
+            tempAL[1] = 0x0;                                         \
+        } else {                                                     \
+            tempAL[1] = ~0ull;                                       \
+        }                                                            \
+    } else {                                                         \
+        tempBL[0] = mipsdsp_##func(rs1, rt1);                        \
+        tempAL[0] = mipsdsp_##func(rs0, rt0);                        \
+        tempBL[1] = 0;                                               \
+        tempAL[1] = 0;                                               \
+    }                                                                \
+                                                                     \
+    acc[1] = env->active_tc.HI[ac];                                  \
+    acc[0] = env->active_tc.LO[ac];                                  \
+                                                                     \
+    temp_sum = tempBL[0] + tempAL[0];                                \
+    if (((uint64_t)temp_sum < (uint64_t)tempBL[0]) &&                \
+        ((uint64_t)temp_sum < (uint64_t)tempAL[0])) {                \
+        temp[1] += 1;                                                \
+    }                                                                \
+    temp[0] = temp_sum;                                              \
+    temp[1] += tempBL[1] + tempAL[1];                                \
+                                                                     \
+    if (is_add) {                                                    \
+        temp_sum = acc[0] + temp[0];                                 \
+        if (((uint64_t)temp_sum < (uint64_t)acc[0]) &&               \
+            ((uint64_t)temp_sum < (uint64_t)temp[0])) {              \
+            acc[1] += 1;                                             \
+        }                                                            \
+        temp[0] = temp_sum;                                          \
+        temp[1] = acc[1] + temp[1];                                  \
+    } else {                                                         \
+        temp_sum = acc[0] - temp[0];                                 \
+        if ((uint64_t)temp_sum > (uint64_t)acc[0]) {                 \
+            acc[1] -= 1;                                             \
+        }                                                            \
+        temp[0] = temp_sum;                                          \
+        temp[1] = acc[1] - temp[1];                                  \
+    }                                                                \
+                                                                     \
+    env->active_tc.HI[ac] = temp[1];                                 \
+    env->active_tc.LO[ac] = temp[0];                                 \
+}
+
+DM_OPERATE(dmadd, mul_i32_i32, 1, 1);
+DM_OPERATE(dmaddu, mul_u32_u32, 1, 0);
+DM_OPERATE(dmsub, mul_i32_i32, 0, 1);
+DM_OPERATE(dmsubu, mul_u32_u32, 0, 0);
+#undef DM_OPERATE
+#endif
+
+/** DSP Bit/Manipulation Sub-class insns **/
+target_ulong helper_bitrev(target_ulong rt)
+{
+    int32_t temp;
+    uint32_t rd;
+    int i;
+
+    temp = rt & MIPSDSP_LO;
+    rd = 0;
+    for (i = 0; i < 16; i++) {
+        rd = (rd << 1) | (temp & 1);
+        temp = temp >> 1;
+    }
+
+    return (target_ulong)rd;
+}
+
+#define BIT_INSV(name, posfilter, sizefilter, ret_type)         \
+target_ulong helper_##name(CPUMIPSState *env, target_ulong rs,  \
+                           target_ulong rt)                     \
+{                                                               \
+    uint32_t pos, size, msb, lsb;                               \
+    target_ulong filter;                                        \
+    target_ulong temp, temprs, temprt;                          \
+    target_ulong dspc;                                          \
+                                                                \
+    dspc = env->active_tc.DSPControl;                           \
+                                                                \
+    pos  = dspc & posfilter;                                    \
+    size = (dspc >> 7) & sizefilter;                            \
+                                                                \
+    msb  = pos + size - 1;                                      \
+    lsb  = pos;                                                 \
+                                                                \
+    if (lsb > msb || (msb > TARGET_LONG_BITS)) {                \
+        return rt;                                              \
+    }                                                           \
+                                                                \
+    filter = ((int32_t)0x01 << size) - 1;                       \
+    filter = filter << pos;                                     \
+    temprs = rs & filter;                                       \
+    temprt = rt & ~filter;                                      \
+    temp = temprs | temprt;                                     \
+                                                                \
+    return (target_long)(ret_type)temp;                         \
+}
+
+BIT_INSV(insv, 0x1F, 0x1F, int32_t);
+#ifdef TARGET_MIPS64
+BIT_INSV(dinsv, 0x7F, 0x3F, target_long);
+#endif
+
+#undef BIT_INSV
+
+
+/** DSP Compare-Pick Sub-class insns **/
+#define CMP_HAS_RET(name, func, split_num, filter, bit_size) \
+target_ulong helper_##name(target_ulong rs, target_ulong rt) \
+{                                                       \
+    uint32_t rs_t, rt_t;                                \
+    uint8_t cc;                                         \
+    uint32_t temp = 0;                                  \
+    int i;                                              \
+                                                        \
+    for (i = 0; i < split_num; i++) {                   \
+        rs_t = (rs >> (bit_size * i)) & filter;         \
+        rt_t = (rt >> (bit_size * i)) & filter;         \
+        cc = mipsdsp_##func(rs_t, rt_t);                \
+        temp |= cc << i;                                \
+    }                                                   \
+                                                        \
+    return (target_ulong)temp;                          \
+}
+
+CMP_HAS_RET(cmpgu_eq_qb, cmpu_eq, 4, MIPSDSP_Q0, 8);
+CMP_HAS_RET(cmpgu_lt_qb, cmpu_lt, 4, MIPSDSP_Q0, 8);
+CMP_HAS_RET(cmpgu_le_qb, cmpu_le, 4, MIPSDSP_Q0, 8);
+
+#ifdef TARGET_MIPS64
+CMP_HAS_RET(cmpgu_eq_ob, cmpu_eq, 8, MIPSDSP_Q0, 8);
+CMP_HAS_RET(cmpgu_lt_ob, cmpu_lt, 8, MIPSDSP_Q0, 8);
+CMP_HAS_RET(cmpgu_le_ob, cmpu_le, 8, MIPSDSP_Q0, 8);
+#endif
+
+#undef CMP_HAS_RET
+
+
+#define CMP_NO_RET(name, func, split_num, filter, bit_size) \
+void helper_##name(target_ulong rs, target_ulong rt,        \
+                            CPUMIPSState *env)              \
+{                                                           \
+    int##bit_size##_t rs_t, rt_t;                           \
+    int##bit_size##_t flag = 0;                             \
+    int##bit_size##_t cc;                                   \
+    int i;                                                  \
+                                                            \
+    for (i = 0; i < split_num; i++) {                       \
+        rs_t = (rs >> (bit_size * i)) & filter;             \
+        rt_t = (rt >> (bit_size * i)) & filter;             \
+                                                            \
+        cc = mipsdsp_##func((int32_t)rs_t, (int32_t)rt_t);  \
+        flag |= cc << i;                                    \
+    }                                                       \
+                                                            \
+    set_DSPControl_24(flag, split_num, env);                \
+}
+
+CMP_NO_RET(cmpu_eq_qb, cmpu_eq, 4, MIPSDSP_Q0, 8);
+CMP_NO_RET(cmpu_lt_qb, cmpu_lt, 4, MIPSDSP_Q0, 8);
+CMP_NO_RET(cmpu_le_qb, cmpu_le, 4, MIPSDSP_Q0, 8);
+
+CMP_NO_RET(cmp_eq_ph, cmp_eq, 2, MIPSDSP_LO, 16);
+CMP_NO_RET(cmp_lt_ph, cmp_lt, 2, MIPSDSP_LO, 16);
+CMP_NO_RET(cmp_le_ph, cmp_le, 2, MIPSDSP_LO, 16);
+
+#ifdef TARGET_MIPS64
+CMP_NO_RET(cmpu_eq_ob, cmpu_eq, 8, MIPSDSP_Q0, 8);
+CMP_NO_RET(cmpu_lt_ob, cmpu_lt, 8, MIPSDSP_Q0, 8);
+CMP_NO_RET(cmpu_le_ob, cmpu_le, 8, MIPSDSP_Q0, 8);
+
+CMP_NO_RET(cmp_eq_qh, cmp_eq, 4, MIPSDSP_LO, 16);
+CMP_NO_RET(cmp_lt_qh, cmp_lt, 4, MIPSDSP_LO, 16);
+CMP_NO_RET(cmp_le_qh, cmp_le, 4, MIPSDSP_LO, 16);
+
+CMP_NO_RET(cmp_eq_pw, cmp_eq, 2, MIPSDSP_LLO, 32);
+CMP_NO_RET(cmp_lt_pw, cmp_lt, 2, MIPSDSP_LLO, 32);
+CMP_NO_RET(cmp_le_pw, cmp_le, 2, MIPSDSP_LLO, 32);
+#endif
+#undef CMP_NO_RET
+
+#if defined(TARGET_MIPS64)
+
+#define CMPGDU_OB(name) \
+target_ulong helper_cmpgdu_##name##_ob(target_ulong rs, target_ulong rt, \
+                                       CPUMIPSState *env)  \
+{                                                     \
+    int i;                                            \
+    uint8_t rs_t, rt_t;                               \
+    uint32_t cond;                                    \
+                                                      \
+    cond = 0;                                         \
+                                                      \
+    for (i = 0; i < 8; i++) {                         \
+        rs_t = (rs >> (8 * i)) & MIPSDSP_Q0;          \
+        rt_t = (rt >> (8 * i)) & MIPSDSP_Q0;          \
+                                                      \
+        if (mipsdsp_cmpu_##name(rs_t, rt_t)) {        \
+            cond |= 0x01 << i;                        \
+        }                                             \
+    }                                                 \
+                                                      \
+    set_DSPControl_24(cond, 8, env);                  \
+                                                      \
+    return (uint64_t)cond;                            \
+}
+
+CMPGDU_OB(eq)
+CMPGDU_OB(lt)
+CMPGDU_OB(le)
+#undef CMPGDU_OB
+#endif
+
+#define PICK_INSN(name, split_num, filter, bit_size, ret32bit) \
+target_ulong helper_##name(target_ulong rs, target_ulong rt,   \
+                            CPUMIPSState *env)                 \
+{                                                              \
+    uint32_t rs_t, rt_t;                                       \
+    uint32_t cc;                                               \
+    target_ulong dsp;                                          \
+    int i;                                                     \
+    target_ulong result = 0;                                   \
+                                                               \
+    dsp = env->active_tc.DSPControl;                           \
+    for (i = 0; i < split_num; i++) {                          \
+        rs_t = (rs >> (bit_size * i)) & filter;                \
+        rt_t = (rt >> (bit_size * i)) & filter;                \
+        cc = (dsp >> (24 + i)) & 0x01;                         \
+        cc = cc == 1 ? rs_t : rt_t;                            \
+                                                               \
+        result |= (target_ulong)cc << (bit_size * i);          \
+    }                                                          \
+                                                               \
+    if (ret32bit) {                                            \
+        result = (target_long)(int32_t)(result & MIPSDSP_LLO); \
+    }                                                          \
+                                                               \
+    return result;                                             \
+}
+
+PICK_INSN(pick_qb, 4, MIPSDSP_Q0, 8, 1);
+PICK_INSN(pick_ph, 2, MIPSDSP_LO, 16, 1);
+
+#ifdef TARGET_MIPS64
+PICK_INSN(pick_ob, 8, MIPSDSP_Q0, 8, 0);
+PICK_INSN(pick_qh, 4, MIPSDSP_LO, 16, 0);
+PICK_INSN(pick_pw, 2, MIPSDSP_LLO, 32, 0);
+#endif
+#undef PICK_INSN
+
+#define APPEND_INSN(name, ret_32) \
+target_ulong helper_##name(target_ulong rt, target_ulong rs, uint32_t sa) \
+{                                                                         \
+    target_ulong temp;                                                    \
+                                                                          \
+    if (ret_32) {                                                         \
+        temp = ((rt & MIPSDSP_LLO) << sa) |                               \
+               ((rs & MIPSDSP_LLO) & ((0x01 << sa) - 1));                 \
+        temp = (target_long)(int32_t)(temp & MIPSDSP_LLO);                \
+    } else {                                                              \
+        temp = (rt << sa) | (rs & ((0x01 << sa) - 1));                    \
+    }                                                                     \
+                                                                          \
+    return temp;                                                          \
+}
+
+APPEND_INSN(append, 1);
+#ifdef TARGET_MIPS64
+APPEND_INSN(dappend, 0);
+#endif
+#undef APPEND_INSN
+
+#define PREPEND_INSN(name, or_val, ret_32)                    \
+target_ulong helper_##name(target_ulong rs, target_ulong rt,  \
+                           uint32_t sa)                       \
+{                                                             \
+    sa |= or_val;                                             \
+                                                              \
+    if (1) {                                                  \
+        return (target_long)(int32_t)(uint32_t)               \
+            (((rs & MIPSDSP_LLO) << (32 - sa)) |              \
+             ((rt & MIPSDSP_LLO) >> sa));                     \
+    } else {                                                  \
+        return (rs << (64 - sa)) | (rt >> sa);                \
+    }                                                         \
+}
+
+PREPEND_INSN(prepend, 0, 1);
+#ifdef TARGET_MIPS64
+PREPEND_INSN(prependw, 0, 0);
+PREPEND_INSN(prependd, 0x20, 0);
+#endif
+#undef PREPEND_INSN
+
+#define BALIGN_INSN(name, filter, ret32) \
+target_ulong helper_##name(target_ulong rs, target_ulong rt, uint32_t bp) \
+{                                                                         \
+    bp = bp & 0x03;                                                       \
+                                                                          \
+    if ((bp & 1) == 0) {                                                  \
+        return rt;                                                        \
+    } else {                                                              \
+        if (ret32) {                                                      \
+            return (target_long)(int32_t)((rt << (8 * bp)) |              \
+                                          (rs >> (8 * (4 - bp))));        \
+        } else {                                                          \
+            return (rt << (8 * bp)) | (rs >> (8 * (8 - bp)));             \
+        }                                                                 \
+    }                                                                     \
+}
+
+BALIGN_INSN(balign, 0x03, 1);
+#if defined(TARGET_MIPS64)
+BALIGN_INSN(dbalign, 0x07, 0);
+#endif
+#undef BALIGN_INSN
+
+target_ulong helper_packrl_ph(target_ulong rs, target_ulong rt)
+{
+    uint32_t rsl, rth;
+
+    rsl =  rs & MIPSDSP_LO;
+    rth = (rt & MIPSDSP_HI) >> 16;
+
+    return (target_long)(int32_t)((rsl << 16) | rth);
+}
+
+#if defined(TARGET_MIPS64)
+target_ulong helper_packrl_pw(target_ulong rs, target_ulong rt)
+{
+    uint32_t rs0, rt1;
+
+    rs0 = rs & MIPSDSP_LLO;
+    rt1 = (rt >> 32) & MIPSDSP_LLO;
+
+    return ((uint64_t)rs0 << 32) | (uint64_t)rt1;
+}
+#endif
+
+/** DSP Accumulator and DSPControl Access Sub-class insns **/
+target_ulong helper_extr_w(target_ulong ac, target_ulong shift,
+                           CPUMIPSState *env)
+{
+    int32_t tempI;
+    int64_t tempDL[2];
+
+    shift = shift & 0x0F;
+
+    mipsdsp_rndrashift_short_acc(tempDL, ac, shift, env);
+    if ((tempDL[1] != 0 || (tempDL[0] & MIPSDSP_LHI) != 0) &&
+        (tempDL[1] != 1 || (tempDL[0] & MIPSDSP_LHI) != MIPSDSP_LHI)) {
+        set_DSPControl_overflow_flag(1, 23, env);
+    }
+
+    tempI = (tempDL[0] >> 1) & MIPSDSP_LLO;
+
+    tempDL[0] += 1;
+    if (tempDL[0] == 0) {
+        tempDL[1] += 1;
+    }
+
+    if ((!(tempDL[1] == 0 && (tempDL[0] & MIPSDSP_LHI) == 0x00)) &&
+        (!(tempDL[1] == 1 && (tempDL[0] & MIPSDSP_LHI) == MIPSDSP_LHI))) {
+        set_DSPControl_overflow_flag(1, 23, env);
+    }
+
+    return (target_long)tempI;
+}
+
+target_ulong helper_extr_r_w(target_ulong ac, target_ulong shift,
+                             CPUMIPSState *env)
+{
+    int64_t tempDL[2];
+
+    shift = shift & 0x0F;
+
+    mipsdsp_rndrashift_short_acc(tempDL, ac, shift, env);
+    if ((tempDL[1] != 0 || (tempDL[0] & MIPSDSP_LHI) != 0) &&
+        (tempDL[1] != 1 || (tempDL[0] & MIPSDSP_LHI) != MIPSDSP_LHI)) {
+        set_DSPControl_overflow_flag(1, 23, env);
+    }
+
+    tempDL[0] += 1;
+    if (tempDL[0] == 0) {
+        tempDL[1] += 1;
+    }
+
+    if ((tempDL[1] != 0 || (tempDL[0] & MIPSDSP_LHI) != 0) &&
+        (tempDL[1] != 1 && (tempDL[0] & MIPSDSP_LHI) != MIPSDSP_LHI)) {
+        set_DSPControl_overflow_flag(1, 23, env);
+    }
+
+    return (target_long)(int32_t)(tempDL[0] >> 1);
+}
+
+target_ulong helper_extr_rs_w(target_ulong ac, target_ulong shift,
+                              CPUMIPSState *env)
+{
+    int32_t tempI, temp64;
+    int64_t tempDL[2];
+
+    shift = shift & 0x0F;
+
+    mipsdsp_rndrashift_short_acc(tempDL, ac, shift, env);
+    if ((tempDL[1] != 0 || (tempDL[0] & MIPSDSP_LHI) != 0) &&
+        (tempDL[1] != 1 || (tempDL[0] & MIPSDSP_LHI) != MIPSDSP_LHI)) {
+        set_DSPControl_overflow_flag(1, 23, env);
+    }
+    tempDL[0] += 1;
+    if (tempDL[0] == 0) {
+        tempDL[1] += 1;
+    }
+    tempI = tempDL[0] >> 1;
+
+    if ((tempDL[1] != 0 || (tempDL[0] & MIPSDSP_LHI) != 0) &&
+        (tempDL[1] != 1 || (tempDL[0] & MIPSDSP_LHI) != MIPSDSP_LHI)) {
+        temp64 = tempDL[1];
+        if (temp64 == 0) {
+            tempI = 0x7FFFFFFF;
+        } else {
+            tempI = 0x80000000;
+        }
+        set_DSPControl_overflow_flag(1, 23, env);
+    }
+
+    return (target_long)tempI;
+}
+
+#if defined(TARGET_MIPS64)
+target_ulong helper_dextr_w(target_ulong ac, target_ulong shift,
+                            CPUMIPSState *env)
+{
+    uint64_t temp[3];
+
+    shift = shift & 0x3F;
+
+    mipsdsp_rndrashift_acc(temp, ac, shift, env);
+
+    return (int64_t)(int32_t)(temp[0] >> 1);
+}
+
+target_ulong helper_dextr_r_w(target_ulong ac, target_ulong shift,
+                              CPUMIPSState *env)
+{
+    uint64_t temp[3];
+    uint32_t temp128;
+
+    shift = shift & 0x3F;
+    mipsdsp_rndrashift_acc(temp, ac, shift, env);
+
+    temp[0] += 1;
+    if (temp[0] == 0) {
+        temp[1] += 1;
+        if (temp[1] == 0) {
+            temp[2] += 1;
+        }
+    }
+
+    temp128 = temp[2] & 0x01;
+
+    if ((temp128 != 0 || temp[1] != 0) &&
+       (temp128 != 1 || temp[1] != ~0ull)) {
+        set_DSPControl_overflow_flag(1, 23, env);
+    }
+
+    return (int64_t)(int32_t)(temp[0] >> 1);
+}
+
+target_ulong helper_dextr_rs_w(target_ulong ac, target_ulong shift,
+                               CPUMIPSState *env)
+{
+    uint64_t temp[3];
+    uint32_t temp128;
+
+    shift = shift & 0x3F;
+    mipsdsp_rndrashift_acc(temp, ac, shift, env);
+
+    temp[0] += 1;
+    if (temp[0] == 0) {
+        temp[1] += 1;
+        if (temp[1] == 0) {
+            temp[2] += 1;
+        }
+    }
+
+    temp128 = temp[2] & 0x01;
+
+    if ((temp128 != 0 || temp[1] != 0) &&
+       (temp128 != 1 || temp[1] != ~0ull)) {
+        if (temp128 == 0) {
+            temp[0] = 0x0FFFFFFFF;
+        } else {
+            temp[0] = 0x0100000000;
+        }
+        set_DSPControl_overflow_flag(1, 23, env);
+    }
+
+    return (int64_t)(int32_t)(temp[0] >> 1);
+}
+
+target_ulong helper_dextr_l(target_ulong ac, target_ulong shift,
+                            CPUMIPSState *env)
+{
+    uint64_t temp[3];
+    target_ulong result;
+
+    shift = shift & 0x3F;
+
+    mipsdsp_rndrashift_acc(temp, ac, shift, env);
+    result = (temp[1] << 63) | (temp[0] >> 1);
+
+    return result;
+}
+
+target_ulong helper_dextr_r_l(target_ulong ac, target_ulong shift,
+                              CPUMIPSState *env)
+{
+    uint64_t temp[3];
+    uint32_t temp128;
+    target_ulong result;
+
+    shift = shift & 0x3F;
+    mipsdsp_rndrashift_acc(temp, ac, shift, env);
+
+    temp[0] += 1;
+    if (temp[0] == 0) {
+        temp[1] += 1;
+        if (temp[1] == 0) {
+            temp[2] += 1;
+        }
+    }
+
+    temp128 = temp[2] & 0x01;
+
+    if ((temp128 != 0 || temp[1] != 0) &&
+       (temp128 != 1 || temp[1] != ~0ull)) {
+        set_DSPControl_overflow_flag(1, 23, env);
+    }
+
+    result = (temp[1] << 63) | (temp[0] >> 1);
+
+    return result;
+}
+
+target_ulong helper_dextr_rs_l(target_ulong ac, target_ulong shift,
+                               CPUMIPSState *env)
+{
+    uint64_t temp[3];
+    uint32_t temp128;
+    target_ulong result;
+
+    shift = shift & 0x3F;
+    mipsdsp_rndrashift_acc(temp, ac, shift, env);
+
+    temp[0] += 1;
+    if (temp[0] == 0) {
+        temp[1] += 1;
+        if (temp[1] == 0) {
+            temp[2] += 1;
+        }
+    }
+
+    temp128 = temp[2] & 0x01;
+
+    if ((temp128 != 0 || temp[1] != 0) &&
+       (temp128 != 1 || temp[1] != ~0ull)) {
+        if (temp128 == 0) {
+            temp[1] &= ~0x00ull - 1;
+            temp[0] |= ~0x00ull - 1;
+        } else {
+            temp[1] |= 0x01;
+            temp[0] &= 0x01;
+        }
+        set_DSPControl_overflow_flag(1, 23, env);
+    }
+    result = (temp[1] << 63) | (temp[0] >> 1);
+
+    return result;
+}
+#endif
+
+target_ulong helper_extr_s_h(target_ulong ac, target_ulong shift,
+                             CPUMIPSState *env)
+{
+    int64_t temp;
+
+    shift = shift & 0x0F;
+
+    temp = mipsdsp_rashift_short_acc(ac, shift, env);
+    if (temp > (int64_t)0x7FFF) {
+        temp = 0x00007FFF;
+        set_DSPControl_overflow_flag(1, 23, env);
+    } else if (temp < (int64_t)0xFFFFFFFFFFFF8000) {
+        temp = 0xFFFF8000;
+        set_DSPControl_overflow_flag(1, 23, env);
+    }
+
+    return (target_long)(int32_t)(temp & 0xFFFFFFFF);
+}
+
+
+#if defined(TARGET_MIPS64)
+target_ulong helper_dextr_s_h(target_ulong ac, target_ulong shift,
+                              CPUMIPSState *env)
+{
+    int64_t temp[2];
+    uint32_t temp127;
+
+    shift = shift & 0x1F;
+
+    mipsdsp_rashift_acc((uint64_t *)temp, ac, shift, env);
+
+    temp127 = (temp[1] >> 63) & 0x01;
+
+    if ((temp127 == 0) && (temp[1] > 0 || temp[0] > 32767)) {
+        temp[0] &= 0xFFFF0000;
+        temp[0] |= 0x00007FFF;
+        set_DSPControl_overflow_flag(1, 23, env);
+    } else if ((temp127 == 1) &&
+            (temp[1] < 0xFFFFFFFFFFFFFFFFll
+             || temp[0] < 0xFFFFFFFFFFFF1000ll)) {
+        temp[0] &= 0xFFFF0000;
+        temp[0] |= 0x00008000;
+        set_DSPControl_overflow_flag(1, 23, env);
+    }
+
+    return (int64_t)(int16_t)(temp[0] & MIPSDSP_LO);
+}
+
+#endif
+
+target_ulong helper_extp(target_ulong ac, target_ulong size, CPUMIPSState *env)
+{
+    int32_t start_pos;
+    int sub;
+    uint32_t temp;
+    uint64_t acc;
+
+    size = size & 0x1F;
+
+    temp = 0;
+    start_pos = get_DSPControl_pos(env);
+    sub = start_pos - (size + 1);
+    if (sub >= -1) {
+        acc = ((uint64_t)env->active_tc.HI[ac] << 32) |
+              ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
+        temp = (acc >> (start_pos - size)) &
+               (((uint32_t)0x01 << (size + 1)) - 1);
+        set_DSPControl_efi(0, env);
+    } else {
+        set_DSPControl_efi(1, env);
+    }
+
+    return (target_ulong)temp;
+}
+
+target_ulong helper_extpdp(target_ulong ac, target_ulong size,
+                           CPUMIPSState *env)
+{
+    int32_t start_pos;
+    int sub;
+    uint32_t temp;
+    uint64_t acc;
+
+    size = size & 0x1F;
+    temp = 0;
+    start_pos = get_DSPControl_pos(env);
+    sub = start_pos - (size + 1);
+    if (sub >= -1) {
+        acc  = ((uint64_t)env->active_tc.HI[ac] << 32) |
+               ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
+        temp = (acc >> (start_pos - size)) &
+               (((uint32_t)0x01 << (size + 1)) - 1);
+
+        set_DSPControl_pos(start_pos - (size + 1), env);
+        set_DSPControl_efi(0, env);
+    } else {
+        set_DSPControl_efi(1, env);
+    }
+
+    return (target_ulong)temp;
+}
+
+
+#if defined(TARGET_MIPS64)
+target_ulong helper_dextp(target_ulong ac, target_ulong size, CPUMIPSState *env)
+{
+    int start_pos;
+    int len;
+    int sub;
+    uint64_t tempB, tempA;
+    uint64_t temp;
+
+    temp = 0;
+
+    size = size & 0x3F;
+    start_pos = get_DSPControl_pos(env);
+    len = start_pos - size;
+    tempB = env->active_tc.HI[ac];
+    tempA = env->active_tc.LO[ac];
+
+    sub = start_pos - (size + 1);
+
+    if (sub >= -1) {
+        temp = (tempB << (64 - len)) | (tempA >> len);
+        temp = temp & ((0x01 << (size + 1)) - 1);
+        set_DSPControl_efi(0, env);
+    } else {
+        set_DSPControl_efi(1, env);
+    }
+
+    return temp;
+}
+
+target_ulong helper_dextpdp(target_ulong ac, target_ulong size,
+                            CPUMIPSState *env)
+{
+    int start_pos;
+    int len;
+    int sub;
+    uint64_t tempB, tempA;
+    uint64_t temp;
+
+    temp = 0;
+    size = size & 0x3F;
+    start_pos = get_DSPControl_pos(env);
+    len = start_pos - size;
+    tempB = env->active_tc.HI[ac];
+    tempA = env->active_tc.LO[ac];
+
+    sub = start_pos - (size + 1);
+
+    if (sub >= -1) {
+        temp = (tempB << (64 - len)) | (tempA >> len);
+        temp = temp & ((0x01 << (size + 1)) - 1);
+        set_DSPControl_pos(sub, env);
+        set_DSPControl_efi(0, env);
+    } else {
+        set_DSPControl_efi(1, env);
+    }
+
+    return temp;
+}
+
+#endif
+
+void helper_shilo(target_ulong ac, target_ulong rs, CPUMIPSState *env)
+{
+    int8_t  rs5_0;
+    uint64_t temp, acc;
+
+    rs5_0 = rs & 0x3F;
+    rs5_0 = (int8_t)(rs5_0 << 2) >> 2;
+    rs5_0 = MIPSDSP_ABS(rs5_0);
+    acc   = (((uint64_t)env->active_tc.HI[ac] << 32) & MIPSDSP_LHI) |
+            ((uint64_t)env->active_tc.LO[ac] & MIPSDSP_LLO);
+    if (rs5_0 == 0) {
+        temp = acc;
+    } else {
+        if (rs5_0 > 0) {
+            temp = acc >> rs5_0;
+        } else {
+            temp = acc << rs5_0;
+        }
+    }
+
+    env->active_tc.HI[ac] = (target_ulong)(int32_t)((temp & MIPSDSP_LHI) >> 32);
+    env->active_tc.LO[ac] = (target_ulong)(int32_t)(temp & MIPSDSP_LLO);
+}
+
+#if defined(TARGET_MIPS64)
+void helper_dshilo(target_ulong shift, target_ulong ac, CPUMIPSState *env)
+{
+    int8_t shift_t;
+    uint64_t tempB, tempA;
+
+    shift_t = (int8_t)(shift << 1) >> 1;
+
+    tempB = env->active_tc.HI[ac];
+    tempA = env->active_tc.LO[ac];
+
+    if (shift_t != 0) {
+        if (shift_t >= 0) {
+            tempA = (tempB << (64 - shift_t)) | (tempA >> shift_t);
+            tempB = tempB >> shift_t;
+        } else {
+            shift_t = -shift_t;
+            tempB = (tempB << shift_t) | (tempA >> (64 - shift_t));
+            tempA = tempA << shift_t;
+        }
+    }
+
+    env->active_tc.HI[ac] = tempB;
+    env->active_tc.LO[ac] = tempA;
+}
+
+#endif
+void helper_mthlip(target_ulong ac, target_ulong rs, CPUMIPSState *env)
+{
+    int32_t tempA, tempB, pos;
+
+    tempA = rs;
+    tempB = env->active_tc.LO[ac];
+    env->active_tc.HI[ac] = (target_long)tempB;
+    env->active_tc.LO[ac] = (target_long)tempA;
+    pos = get_DSPControl_pos(env);
+
+    if (pos > 32) {
+        return;
+    } else {
+        set_DSPControl_pos(pos + 32, env);
+    }
+}
+
+#if defined(TARGET_MIPS64)
+void helper_dmthlip(target_ulong rs, target_ulong ac, CPUMIPSState *env)
+{
+    uint8_t ac_t;
+    uint8_t pos;
+    uint64_t tempB, tempA;
+
+    ac_t = ac & 0x3;
+
+    tempA = rs;
+    tempB = env->active_tc.LO[ac_t];
+
+    env->active_tc.HI[ac_t] = tempB;
+    env->active_tc.LO[ac_t] = tempA;
+
+    pos = get_DSPControl_pos(env);
+
+    if (pos <= 64) {
+        pos = pos + 64;
+        set_DSPControl_pos(pos, env);
+    }
+}
+#endif
+
+void helper_wrdsp(target_ulong rs, target_ulong mask_num, CPUMIPSState *env)
+{
+    uint8_t  mask[6];
+    uint8_t  i;
+    uint32_t newbits, overwrite;
+    target_ulong dsp;
+
+    newbits   = 0x00;
+    overwrite = 0xFFFFFFFF;
+    dsp = env->active_tc.DSPControl;
+
+    for (i = 0; i < 6; i++) {
+        mask[i] = (mask_num >> i) & 0x01;
+    }
+
+    if (mask[0] == 1) {
+#if defined(TARGET_MIPS64)
+        overwrite &= 0xFFFFFF80;
+        newbits   &= 0xFFFFFF80;
+        newbits   |= 0x0000007F & rs;
+#else
+        overwrite &= 0xFFFFFFC0;
+        newbits   &= 0xFFFFFFC0;
+        newbits   |= 0x0000003F & rs;
+#endif
+    }
+
+    if (mask[1] == 1) {
+        overwrite &= 0xFFFFE07F;
+        newbits   &= 0xFFFFE07F;
+        newbits   |= 0x00001F80 & rs;
+    }
+
+    if (mask[2] == 1) {
+        overwrite &= 0xFFFFDFFF;
+        newbits   &= 0xFFFFDFFF;
+        newbits   |= 0x00002000 & rs;
+    }
+
+    if (mask[3] == 1) {
+        overwrite &= 0xFF00FFFF;
+        newbits   &= 0xFF00FFFF;
+        newbits   |= 0x00FF0000 & rs;
+    }
+
+    if (mask[4] == 1) {
+        overwrite &= 0x00FFFFFF;
+        newbits   &= 0x00FFFFFF;
+        newbits   |= 0xFF000000 & rs;
+    }
+
+    if (mask[5] == 1) {
+        overwrite &= 0xFFFFBFFF;
+        newbits   &= 0xFFFFBFFF;
+        newbits   |= 0x00004000 & rs;
+    }
+
+    dsp = dsp & overwrite;
+    dsp = dsp | newbits;
+    env->active_tc.DSPControl = dsp;
+}
+
+target_ulong helper_rddsp(target_ulong masknum, CPUMIPSState *env)
+{
+    uint8_t  mask[6];
+    uint32_t ruler, i;
+    target_ulong temp;
+    target_ulong dsp;
+
+    ruler = 0x01;
+    for (i = 0; i < 6; i++) {
+        mask[i] = (masknum & ruler) >> i ;
+        ruler = ruler << 1;
+    }
+
+    temp  = 0x00;
+    dsp = env->active_tc.DSPControl;
+
+    if (mask[0] == 1) {
+#if defined(TARGET_MIPS64)
+        temp |= dsp & 0x7F;
+#else
+        temp |= dsp & 0x3F;
+#endif
+    }
+
+    if (mask[1] == 1) {
+        temp |= dsp & 0x1F80;
+    }
+
+    if (mask[2] == 1) {
+        temp |= dsp & 0x2000;
+    }
+
+    if (mask[3] == 1) {
+        temp |= dsp & 0x00FF0000;
+    }
+
+    if (mask[4] == 1) {
+        temp |= dsp & 0xFF000000;
+    }
+
+    if (mask[5] == 1) {
+        temp |= dsp & 0x4000;
+    }
+
+    return temp;
+}
+
+
+#undef MIPSDSP_LHI
+#undef MIPSDSP_LLO
+#undef MIPSDSP_HI
+#undef MIPSDSP_LO
+#undef MIPSDSP_Q3
+#undef MIPSDSP_Q2
+#undef MIPSDSP_Q1
+#undef MIPSDSP_Q0
+
+#undef MIPSDSP_SPLIT32_8
+#undef MIPSDSP_SPLIT32_16
+
+#undef MIPSDSP_RETURN32
+#undef MIPSDSP_RETURN32_8
+#undef MIPSDSP_RETURN32_16
+
+#ifdef TARGET_MIPS64
+#undef MIPSDSP_SPLIT64_16
+#undef MIPSDSP_SPLIT64_32
+#undef MIPSDSP_RETURN64_16
+#undef MIPSDSP_RETURN64_32
+#endif
diff --git a/target-mips/helper.c b/target-mips/helper.c
index 3b8c696a9e..e877b8db78 100644
--- a/target-mips/helper.c
+++ b/target-mips/helper.c
@@ -592,6 +592,9 @@ void do_interrupt (CPUMIPSState *env)
     case EXCP_THREAD:
         cause = 25;
         goto set_EPC;
+    case EXCP_DSPDIS:
+        cause = 26;
+        goto set_EPC;
     case EXCP_CACHE:
         cause = 30;
         if (env->CP0_Status & (1 << CP0St_BEV)) {
diff --git a/target-mips/helper.h b/target-mips/helper.h
index 43ac39ff41..acf9ebd759 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -4,13 +4,9 @@ DEF_HELPER_3(raise_exception_err, noreturn, env, i32, int)
 DEF_HELPER_2(raise_exception, noreturn, env, i32)
 
 #ifdef TARGET_MIPS64
-DEF_HELPER_4(ldl, tl, env, tl, tl, int)
-DEF_HELPER_4(ldr, tl, env, tl, tl, int)
 DEF_HELPER_4(sdl, void, env, tl, tl, int)
 DEF_HELPER_4(sdr, void, env, tl, tl, int)
 #endif
-DEF_HELPER_4(lwl, tl, env, tl, tl, int)
-DEF_HELPER_4(lwr, tl, env, tl, tl, int)
 DEF_HELPER_4(swl, void, env, tl, tl, int)
 DEF_HELPER_4(swr, void, env, tl, tl, int)
 
@@ -254,10 +250,10 @@ FOP_PROTO(rsqrt2)
 DEF_HELPER_4(float_ ## op ## _s, i32, env, i32, i32, i32)  \
 DEF_HELPER_4(float_ ## op ## _d, i64, env, i64, i64, i64)  \
 DEF_HELPER_4(float_ ## op ## _ps, i64, env, i64, i64, i64)
-FOP_PROTO(muladd)
-FOP_PROTO(mulsub)
-FOP_PROTO(nmuladd)
-FOP_PROTO(nmulsub)
+FOP_PROTO(madd)
+FOP_PROTO(msub)
+FOP_PROTO(nmadd)
+FOP_PROTO(nmsub)
 #undef FOP_PROTO
 
 #define FOP_PROTO(op)                                    \
@@ -362,4 +358,353 @@ DEF_HELPER_FLAGS_2(pasubub, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_1(biadd, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_FLAGS_1(pmovmskb, TCG_CALL_NO_RWG_SE, i64, i64)
 
+/*** MIPS DSP ***/
+/* DSP Arithmetic Sub-class insns */
+DEF_HELPER_FLAGS_3(addq_ph, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(addq_s_ph, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(addq_qh, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(addq_s_qh, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(addq_s_w, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(addq_pw, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(addq_s_pw, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(addu_qb, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(addu_s_qb, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_2(adduh_qb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(adduh_r_qb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_3(addu_ph, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(addu_s_ph, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_2(addqh_ph, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(addqh_r_ph, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(addqh_w, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(addqh_r_w, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(addu_ob, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(addu_s_ob, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_2(adduh_ob, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(adduh_r_ob, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_3(addu_qh, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(addu_s_qh, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(subq_ph, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(subq_s_ph, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(subq_qh, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(subq_s_qh, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(subq_s_w, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(subq_pw, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(subq_s_pw, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(subu_qb, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(subu_s_qb, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_2(subuh_qb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(subuh_r_qb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_3(subu_ph, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(subu_s_ph, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_2(subqh_ph, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(subqh_r_ph, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(subqh_w, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(subqh_r_w, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(subu_ob, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(subu_s_ob, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_2(subuh_ob, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(subuh_r_ob, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_3(subu_qh, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(subu_s_qh, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(addsc, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(addwc, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_2(modsub, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_1(raddu_w_qb, TCG_CALL_NO_RWG_SE, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_1(raddu_l_ob, TCG_CALL_NO_RWG_SE, tl, tl)
+#endif
+DEF_HELPER_FLAGS_2(absq_s_qb, 0, tl, tl, env)
+DEF_HELPER_FLAGS_2(absq_s_ph, 0, tl, tl, env)
+DEF_HELPER_FLAGS_2(absq_s_w, 0, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_2(absq_s_ob, 0, tl, tl, env)
+DEF_HELPER_FLAGS_2(absq_s_qh, 0, tl, tl, env)
+DEF_HELPER_FLAGS_2(absq_s_pw, 0, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_2(precr_qb_ph, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(precrq_qb_ph, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_3(precr_sra_ph_w, TCG_CALL_NO_RWG_SE,
+                   tl, i32, tl, tl)
+DEF_HELPER_FLAGS_3(precr_sra_r_ph_w, TCG_CALL_NO_RWG_SE,
+                   tl, i32, tl, tl)
+DEF_HELPER_FLAGS_2(precrq_ph_w, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_3(precrq_rs_ph_w, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_2(precr_ob_qh, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_3(precr_sra_qh_pw,
+                   TCG_CALL_NO_RWG_SE, tl, tl, tl, i32)
+DEF_HELPER_FLAGS_3(precr_sra_r_qh_pw,
+                   TCG_CALL_NO_RWG_SE, tl, tl, tl, i32)
+DEF_HELPER_FLAGS_2(precrq_ob_qh, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(precrq_qh_pw, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_3(precrq_rs_qh_pw,
+                   TCG_CALL_NO_RWG_SE, tl, tl, tl, env)
+DEF_HELPER_FLAGS_2(precrq_pw_l, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+#endif
+DEF_HELPER_FLAGS_3(precrqu_s_qb_ph, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(precrqu_s_ob_qh,
+                   TCG_CALL_NO_RWG_SE, tl, tl, tl, env)
+
+DEF_HELPER_FLAGS_1(preceq_pw_qhl, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(preceq_pw_qhr, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(preceq_pw_qhla, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(preceq_pw_qhra, TCG_CALL_NO_RWG_SE, tl, tl)
+#endif
+DEF_HELPER_FLAGS_1(precequ_ph_qbl, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(precequ_ph_qbr, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(precequ_ph_qbla, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(precequ_ph_qbra, TCG_CALL_NO_RWG_SE, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_1(precequ_qh_obl, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(precequ_qh_obr, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(precequ_qh_obla, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(precequ_qh_obra, TCG_CALL_NO_RWG_SE, tl, tl)
+#endif
+DEF_HELPER_FLAGS_1(preceu_ph_qbl, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(preceu_ph_qbr, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(preceu_ph_qbla, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(preceu_ph_qbra, TCG_CALL_NO_RWG_SE, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_1(preceu_qh_obl, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(preceu_qh_obr, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(preceu_qh_obla, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(preceu_qh_obra, TCG_CALL_NO_RWG_SE, tl, tl)
+#endif
+
+/* DSP GPR-Based Shift Sub-class insns */
+DEF_HELPER_FLAGS_3(shll_qb, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(shll_ob, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(shll_ph, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(shll_s_ph, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(shll_qh, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(shll_s_qh, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(shll_s_w, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(shll_pw, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(shll_s_pw, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_2(shrl_qb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(shrl_ph, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_2(shrl_ob, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(shrl_qh, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+#endif
+DEF_HELPER_FLAGS_2(shra_qb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(shra_r_qb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_2(shra_ob, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(shra_r_ob, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+#endif
+DEF_HELPER_FLAGS_2(shra_ph, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(shra_r_ph, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(shra_r_w, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_2(shra_qh, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(shra_r_qh, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(shra_pw, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(shra_r_pw, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+#endif
+
+/* DSP Multiply Sub-class insns */
+DEF_HELPER_FLAGS_3(muleu_s_ph_qbl, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(muleu_s_ph_qbr, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(muleu_s_qh_obl, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(muleu_s_qh_obr, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(mulq_rs_ph, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(mulq_rs_qh, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(muleq_s_w_phl, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(muleq_s_w_phr, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(muleq_s_pw_qhl, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(muleq_s_pw_qhr, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_4(dpau_h_qbl, 0, void, i32, tl, tl, env)
+DEF_HELPER_FLAGS_4(dpau_h_qbr, 0, void, i32, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(dpau_h_obl, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(dpau_h_obr, 0, void, tl, tl, i32, env)
+#endif
+DEF_HELPER_FLAGS_4(dpsu_h_qbl, 0, void, i32, tl, tl, env)
+DEF_HELPER_FLAGS_4(dpsu_h_qbr, 0, void, i32, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(dpsu_h_obl, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(dpsu_h_obr, 0, void, tl, tl, i32, env)
+#endif
+DEF_HELPER_FLAGS_4(dpa_w_ph, 0, void, i32, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(dpa_w_qh, 0, void, tl, tl, i32, env)
+#endif
+DEF_HELPER_FLAGS_4(dpax_w_ph, 0, void, i32, tl, tl, env)
+DEF_HELPER_FLAGS_4(dpaq_s_w_ph, 0, void, i32, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(dpaq_s_w_qh, 0, void, tl, tl, i32, env)
+#endif
+DEF_HELPER_FLAGS_4(dpaqx_s_w_ph, 0, void, i32, tl, tl, env)
+DEF_HELPER_FLAGS_4(dpaqx_sa_w_ph, 0, void, i32, tl, tl, env)
+DEF_HELPER_FLAGS_4(dps_w_ph, 0, void, i32, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(dps_w_qh, 0, void, tl, tl, i32, env)
+#endif
+DEF_HELPER_FLAGS_4(dpsx_w_ph, 0, void, i32, tl, tl, env)
+DEF_HELPER_FLAGS_4(dpsq_s_w_ph, 0, void, i32, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(dpsq_s_w_qh, 0, void, tl, tl, i32, env)
+#endif
+DEF_HELPER_FLAGS_4(dpsqx_s_w_ph, 0, void, i32, tl, tl, env)
+DEF_HELPER_FLAGS_4(dpsqx_sa_w_ph, 0, void, i32, tl, tl, env)
+DEF_HELPER_FLAGS_4(mulsaq_s_w_ph, 0, void, i32, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(mulsaq_s_w_qh, 0, void, tl, tl, i32, env)
+#endif
+DEF_HELPER_FLAGS_4(dpaq_sa_l_w, 0, void, i32, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(dpaq_sa_l_pw, 0, void, tl, tl, i32, env)
+#endif
+DEF_HELPER_FLAGS_4(dpsq_sa_l_w, 0, void, i32, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(dpsq_sa_l_pw, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(mulsaq_s_l_pw, 0, void, tl, tl, i32, env)
+#endif
+DEF_HELPER_FLAGS_4(maq_s_w_phl, 0, void, i32, tl, tl, env)
+DEF_HELPER_FLAGS_4(maq_s_w_phr, 0, void, i32, tl, tl, env)
+DEF_HELPER_FLAGS_4(maq_sa_w_phl, 0, void, i32, tl, tl, env)
+DEF_HELPER_FLAGS_4(maq_sa_w_phr, 0, void, i32, tl, tl, env)
+DEF_HELPER_FLAGS_3(mul_ph, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(mul_s_ph, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(mulq_s_ph, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(mulq_s_w, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(mulq_rs_w, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_4(mulsa_w_ph, 0, void, i32, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_4(maq_s_w_qhll, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(maq_s_w_qhlr, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(maq_s_w_qhrl, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(maq_s_w_qhrr, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(maq_sa_w_qhll, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(maq_sa_w_qhlr, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(maq_sa_w_qhrl, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(maq_sa_w_qhrr, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(maq_s_l_pwl, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(maq_s_l_pwr, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(dmadd, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(dmaddu, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(dmsub, 0, void, tl, tl, i32, env)
+DEF_HELPER_FLAGS_4(dmsubu, 0, void, tl, tl, i32, env)
+#endif
+
+/* DSP Bit/Manipulation Sub-class insns */
+DEF_HELPER_FLAGS_1(bitrev, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_3(insv, 0, tl, env, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(dinsv, 0, tl, env, tl, tl);
+#endif
+
+/* DSP Compare-Pick Sub-class insns */
+DEF_HELPER_FLAGS_3(cmpu_eq_qb, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmpu_lt_qb, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmpu_le_qb, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_2(cmpgu_eq_qb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(cmpgu_lt_qb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(cmpgu_le_qb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_3(cmp_eq_ph, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmp_lt_ph, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmp_le_ph, 0, void, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(cmpu_eq_ob, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmpu_lt_ob, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmpu_le_ob, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmpgdu_eq_ob, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmpgdu_lt_ob, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmpgdu_le_ob, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_2(cmpgu_eq_ob, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(cmpgu_lt_ob, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(cmpgu_le_ob, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_3(cmp_eq_qh, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmp_lt_qh, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmp_le_qh, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmp_eq_pw, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmp_lt_pw, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_3(cmp_le_pw, 0, void, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(pick_qb, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(pick_ph, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(pick_ob, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(pick_qh, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(pick_pw, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(append, TCG_CALL_NO_RWG_SE, tl, tl, tl, i32)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(dappend, TCG_CALL_NO_RWG_SE, tl, tl, tl, i32)
+#endif
+DEF_HELPER_FLAGS_3(prepend, TCG_CALL_NO_RWG_SE, tl, tl, tl, i32)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(prependd, TCG_CALL_NO_RWG_SE, tl, tl, tl, i32)
+DEF_HELPER_FLAGS_3(prependw, TCG_CALL_NO_RWG_SE, tl, tl, tl, i32)
+#endif
+DEF_HELPER_FLAGS_3(balign, TCG_CALL_NO_RWG_SE, tl, tl, tl, i32)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(dbalign, TCG_CALL_NO_RWG_SE, tl, tl, tl, i32)
+#endif
+DEF_HELPER_FLAGS_2(packrl_ph, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_2(packrl_pw, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+#endif
+
+/* DSP Accumulator and DSPControl Access Sub-class insns */
+DEF_HELPER_FLAGS_3(extr_w, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(extr_r_w, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(extr_rs_w, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(dextr_w, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(dextr_r_w, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(dextr_rs_w, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(dextr_l, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(dextr_r_l, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(dextr_rs_l, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(extr_s_h, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(dextr_s_h, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(extp, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(extpdp, 0, tl, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(dextp, 0, tl, tl, tl, env)
+DEF_HELPER_FLAGS_3(dextpdp, 0, tl, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(shilo, 0, void, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(dshilo, 0, void, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(mthlip, 0, void, tl, tl, env)
+#if defined(TARGET_MIPS64)
+DEF_HELPER_FLAGS_3(dmthlip, 0, void, tl, tl, env)
+#endif
+DEF_HELPER_FLAGS_3(wrdsp, 0, void, tl, tl, env)
+DEF_HELPER_FLAGS_2(rddsp, 0, tl, tl, env)
+
+
+
 #include "def-helper.h"
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index d50334f45f..f45d494b14 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -33,34 +33,49 @@ static inline void cpu_mips_tlb_flush (CPUMIPSState *env, int flush_global);
 /*****************************************************************************/
 /* Exceptions processing helpers */
 
-void helper_raise_exception_err(CPUMIPSState *env, uint32_t exception,
-                                int error_code)
+static inline void QEMU_NORETURN do_raise_exception_err(CPUMIPSState *env,
+                                                        uint32_t exception,
+                                                        int error_code,
+                                                        uintptr_t pc)
 {
+    TranslationBlock *tb;
 #if 1
     if (exception < 0x100)
         qemu_log("%s: %d %d\n", __func__, exception, error_code);
 #endif
     env->exception_index = exception;
     env->error_code = error_code;
+
+    if (pc) {
+        /* now we have a real cpu fault */
+        tb = tb_find_pc(pc);
+        if (tb) {
+            /* the PC is inside the translated code. It means that we have
+               a virtual CPU fault */
+            cpu_restore_state(tb, env, pc);
+        }
+    }
+
     cpu_loop_exit(env);
 }
 
-void helper_raise_exception(CPUMIPSState *env, uint32_t exception)
+static inline void QEMU_NORETURN do_raise_exception(CPUMIPSState *env,
+                                                    uint32_t exception,
+                                                    uintptr_t pc)
 {
-    helper_raise_exception_err(env, exception, 0);
+    do_raise_exception_err(env, exception, 0, pc);
 }
 
-#if !defined(CONFIG_USER_ONLY)
-static void do_restore_state(CPUMIPSState *env, uintptr_t pc)
+void helper_raise_exception_err(CPUMIPSState *env, uint32_t exception,
+                                int error_code)
 {
-    TranslationBlock *tb;
+    do_raise_exception_err(env, exception, error_code, 0);
+}
 
-    tb = tb_find_pc (pc);
-    if (tb) {
-        cpu_restore_state(tb, env, pc);
-    }
+void helper_raise_exception(CPUMIPSState *env, uint32_t exception)
+{
+    do_raise_exception(env, exception, 0);
 }
-#endif
 
 #if defined(CONFIG_USER_ONLY)
 #define HELPER_LD(name, insn, type)                                     \
@@ -335,56 +350,6 @@ HELPER_ST_ATOMIC(scd, ld, sd, 0x7)
 #define GET_OFFSET(addr, offset) (addr - (offset))
 #endif
 
-target_ulong helper_lwl(CPUMIPSState *env, target_ulong arg1,
-                        target_ulong arg2, int mem_idx)
-{
-    target_ulong tmp;
-
-    tmp = do_lbu(env, arg2, mem_idx);
-    arg1 = (arg1 & 0x00FFFFFF) | (tmp << 24);
-
-    if (GET_LMASK(arg2) <= 2) {
-        tmp = do_lbu(env, GET_OFFSET(arg2, 1), mem_idx);
-        arg1 = (arg1 & 0xFF00FFFF) | (tmp << 16);
-    }
-
-    if (GET_LMASK(arg2) <= 1) {
-        tmp = do_lbu(env, GET_OFFSET(arg2, 2), mem_idx);
-        arg1 = (arg1 & 0xFFFF00FF) | (tmp << 8);
-    }
-
-    if (GET_LMASK(arg2) == 0) {
-        tmp = do_lbu(env, GET_OFFSET(arg2, 3), mem_idx);
-        arg1 = (arg1 & 0xFFFFFF00) | tmp;
-    }
-    return (int32_t)arg1;
-}
-
-target_ulong helper_lwr(CPUMIPSState *env, target_ulong arg1,
-                        target_ulong arg2, int mem_idx)
-{
-    target_ulong tmp;
-
-    tmp = do_lbu(env, arg2, mem_idx);
-    arg1 = (arg1 & 0xFFFFFF00) | tmp;
-
-    if (GET_LMASK(arg2) >= 1) {
-        tmp = do_lbu(env, GET_OFFSET(arg2, -1), mem_idx);
-        arg1 = (arg1 & 0xFFFF00FF) | (tmp << 8);
-    }
-
-    if (GET_LMASK(arg2) >= 2) {
-        tmp = do_lbu(env, GET_OFFSET(arg2, -2), mem_idx);
-        arg1 = (arg1 & 0xFF00FFFF) | (tmp << 16);
-    }
-
-    if (GET_LMASK(arg2) == 3) {
-        tmp = do_lbu(env, GET_OFFSET(arg2, -3), mem_idx);
-        arg1 = (arg1 & 0x00FFFFFF) | (tmp << 24);
-    }
-    return (int32_t)arg1;
-}
-
 void helper_swl(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
                 int mem_idx)
 {
@@ -425,98 +390,6 @@ void helper_swr(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
 #define GET_LMASK64(v) (((v) & 7) ^ 7)
 #endif
 
-target_ulong helper_ldl(CPUMIPSState *env, target_ulong arg1,
-                        target_ulong arg2, int mem_idx)
-{
-    uint64_t tmp;
-
-    tmp = do_lbu(env, arg2, mem_idx);
-    arg1 = (arg1 & 0x00FFFFFFFFFFFFFFULL) | (tmp << 56);
-
-    if (GET_LMASK64(arg2) <= 6) {
-        tmp = do_lbu(env, GET_OFFSET(arg2, 1), mem_idx);
-        arg1 = (arg1 & 0xFF00FFFFFFFFFFFFULL) | (tmp << 48);
-    }
-
-    if (GET_LMASK64(arg2) <= 5) {
-        tmp = do_lbu(env, GET_OFFSET(arg2, 2), mem_idx);
-        arg1 = (arg1 & 0xFFFF00FFFFFFFFFFULL) | (tmp << 40);
-    }
-
-    if (GET_LMASK64(arg2) <= 4) {
-        tmp = do_lbu(env, GET_OFFSET(arg2, 3), mem_idx);
-        arg1 = (arg1 & 0xFFFFFF00FFFFFFFFULL) | (tmp << 32);
-    }
-
-    if (GET_LMASK64(arg2) <= 3) {
-        tmp = do_lbu(env, GET_OFFSET(arg2, 4), mem_idx);
-        arg1 = (arg1 & 0xFFFFFFFF00FFFFFFULL) | (tmp << 24);
-    }
-
-    if (GET_LMASK64(arg2) <= 2) {
-        tmp = do_lbu(env, GET_OFFSET(arg2, 5), mem_idx);
-        arg1 = (arg1 & 0xFFFFFFFFFF00FFFFULL) | (tmp << 16);
-    }
-
-    if (GET_LMASK64(arg2) <= 1) {
-        tmp = do_lbu(env, GET_OFFSET(arg2, 6), mem_idx);
-        arg1 = (arg1 & 0xFFFFFFFFFFFF00FFULL) | (tmp << 8);
-    }
-
-    if (GET_LMASK64(arg2) == 0) {
-        tmp = do_lbu(env, GET_OFFSET(arg2, 7), mem_idx);
-        arg1 = (arg1 & 0xFFFFFFFFFFFFFF00ULL) | tmp;
-    }
-
-    return arg1;
-}
-
-target_ulong helper_ldr(CPUMIPSState *env, target_ulong arg1,
-                        target_ulong arg2, int mem_idx)
-{
-    uint64_t tmp;
-
-    tmp = do_lbu(env, arg2, mem_idx);
-    arg1 = (arg1 & 0xFFFFFFFFFFFFFF00ULL) | tmp;
-
-    if (GET_LMASK64(arg2) >= 1) {
-        tmp = do_lbu(env, GET_OFFSET(arg2, -1), mem_idx);
-        arg1 = (arg1 & 0xFFFFFFFFFFFF00FFULL) | (tmp  << 8);
-    }
-
-    if (GET_LMASK64(arg2) >= 2) {
-        tmp = do_lbu(env, GET_OFFSET(arg2, -2), mem_idx);
-        arg1 = (arg1 & 0xFFFFFFFFFF00FFFFULL) | (tmp << 16);
-    }
-
-    if (GET_LMASK64(arg2) >= 3) {
-        tmp = do_lbu(env, GET_OFFSET(arg2, -3), mem_idx);
-        arg1 = (arg1 & 0xFFFFFFFF00FFFFFFULL) | (tmp << 24);
-    }
-
-    if (GET_LMASK64(arg2) >= 4) {
-        tmp = do_lbu(env, GET_OFFSET(arg2, -4), mem_idx);
-        arg1 = (arg1 & 0xFFFFFF00FFFFFFFFULL) | (tmp << 32);
-    }
-
-    if (GET_LMASK64(arg2) >= 5) {
-        tmp = do_lbu(env, GET_OFFSET(arg2, -5), mem_idx);
-        arg1 = (arg1 & 0xFFFF00FFFFFFFFFFULL) | (tmp << 40);
-    }
-
-    if (GET_LMASK64(arg2) >= 6) {
-        tmp = do_lbu(env, GET_OFFSET(arg2, -6), mem_idx);
-        arg1 = (arg1 & 0xFF00FFFFFFFFFFFFULL) | (tmp << 48);
-    }
-
-    if (GET_LMASK64(arg2) == 7) {
-        tmp = do_lbu(env, GET_OFFSET(arg2, -7), mem_idx);
-        arg1 = (arg1 & 0x00FFFFFFFFFFFFFFULL) | (tmp << 56);
-    }
-
-    return arg1;
-}
-
 void helper_sdl(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
                 int mem_idx)
 {
@@ -579,32 +452,19 @@ void helper_lwm(CPUMIPSState *env, target_ulong addr, target_ulong reglist,
 {
     target_ulong base_reglist = reglist & 0xf;
     target_ulong do_r31 = reglist & 0x10;
-#ifdef CONFIG_USER_ONLY
-#undef ldfun
-#define ldfun(env, addr) ldl_raw(addr)
-#else
-    uint32_t (*ldfun)(CPUMIPSState *env, target_ulong);
-
-    switch (mem_idx)
-    {
-    case 0: ldfun = cpu_ldl_kernel; break;
-    case 1: ldfun = cpu_ldl_super; break;
-    default:
-    case 2: ldfun = cpu_ldl_user; break;
-    }
-#endif
 
     if (base_reglist > 0 && base_reglist <= ARRAY_SIZE (multiple_regs)) {
         target_ulong i;
 
         for (i = 0; i < base_reglist; i++) {
-            env->active_tc.gpr[multiple_regs[i]] = (target_long)ldfun(env, addr);
+            env->active_tc.gpr[multiple_regs[i]] =
+                (target_long)do_lw(env, addr, mem_idx);
             addr += 4;
         }
     }
 
     if (do_r31) {
-        env->active_tc.gpr[31] = (target_long)ldfun(env, addr);
+        env->active_tc.gpr[31] = (target_long)do_lw(env, addr, mem_idx);
     }
 }
 
@@ -613,32 +473,18 @@ void helper_swm(CPUMIPSState *env, target_ulong addr, target_ulong reglist,
 {
     target_ulong base_reglist = reglist & 0xf;
     target_ulong do_r31 = reglist & 0x10;
-#ifdef CONFIG_USER_ONLY
-#undef stfun
-#define stfun(env, addr, val) stl_raw(addr, val)
-#else
-    void (*stfun)(CPUMIPSState *env, target_ulong, uint32_t);
-
-    switch (mem_idx)
-    {
-    case 0: stfun = cpu_stl_kernel; break;
-    case 1: stfun = cpu_stl_super; break;
-     default:
-    case 2: stfun = cpu_stl_user; break;
-    }
-#endif
 
     if (base_reglist > 0 && base_reglist <= ARRAY_SIZE (multiple_regs)) {
         target_ulong i;
 
         for (i = 0; i < base_reglist; i++) {
-            stfun(env, addr, env->active_tc.gpr[multiple_regs[i]]);
+            do_sw(env, addr, env->active_tc.gpr[multiple_regs[i]], mem_idx);
             addr += 4;
         }
     }
 
     if (do_r31) {
-        stfun(env, addr, env->active_tc.gpr[31]);
+        do_sw(env, addr, env->active_tc.gpr[31], mem_idx);
     }
 }
 
@@ -648,32 +494,18 @@ void helper_ldm(CPUMIPSState *env, target_ulong addr, target_ulong reglist,
 {
     target_ulong base_reglist = reglist & 0xf;
     target_ulong do_r31 = reglist & 0x10;
-#ifdef CONFIG_USER_ONLY
-#undef ldfun
-#define ldfun(env, addr) ldq_raw(addr)
-#else
-    uint64_t (*ldfun)(CPUMIPSState *env, target_ulong);
-
-    switch (mem_idx)
-    {
-    case 0: ldfun = cpu_ldq_kernel; break;
-    case 1: ldfun = cpu_ldq_super; break;
-    default:
-    case 2: ldfun = cpu_ldq_user; break;
-    }
-#endif
 
     if (base_reglist > 0 && base_reglist <= ARRAY_SIZE (multiple_regs)) {
         target_ulong i;
 
         for (i = 0; i < base_reglist; i++) {
-            env->active_tc.gpr[multiple_regs[i]] = ldfun(env, addr);
+            env->active_tc.gpr[multiple_regs[i]] = do_ld(env, addr, mem_idx);
             addr += 8;
         }
     }
 
     if (do_r31) {
-        env->active_tc.gpr[31] = ldfun(env, addr);
+        env->active_tc.gpr[31] = do_ld(env, addr, mem_idx);
     }
 }
 
@@ -682,32 +514,18 @@ void helper_sdm(CPUMIPSState *env, target_ulong addr, target_ulong reglist,
 {
     target_ulong base_reglist = reglist & 0xf;
     target_ulong do_r31 = reglist & 0x10;
-#ifdef CONFIG_USER_ONLY
-#undef stfun
-#define stfun(env, addr, val) stq_raw(addr, val)
-#else
-    void (*stfun)(CPUMIPSState *env, target_ulong, uint64_t);
-
-    switch (mem_idx)
-    {
-    case 0: stfun = cpu_stq_kernel; break;
-    case 1: stfun = cpu_stq_super; break;
-     default:
-    case 2: stfun = cpu_stq_user; break;
-    }
-#endif
 
     if (base_reglist > 0 && base_reglist <= ARRAY_SIZE (multiple_regs)) {
         target_ulong i;
 
         for (i = 0; i < base_reglist; i++) {
-            stfun(env, addr, env->active_tc.gpr[multiple_regs[i]]);
+            do_sd(env, addr, env->active_tc.gpr[multiple_regs[i]], mem_idx);
             addr += 8;
         }
     }
 
     if (do_r31) {
-        stfun(env, addr, env->active_tc.gpr[31]);
+        do_sd(env, addr, env->active_tc.gpr[31], mem_idx);
     }
 }
 #endif
@@ -2001,14 +1819,32 @@ static void r4k_fill_tlb(CPUMIPSState *env, int idx)
 
 void r4k_helper_tlbwi(CPUMIPSState *env)
 {
+    r4k_tlb_t *tlb;
     int idx;
+    target_ulong VPN;
+    uint8_t ASID;
+    bool G, V0, D0, V1, D1;
 
     idx = (env->CP0_Index & ~0x80000000) % env->tlb->nb_tlb;
+    tlb = &env->tlb->mmu.r4k.tlb[idx];
+    VPN = env->CP0_EntryHi & (TARGET_PAGE_MASK << 1);
+#if defined(TARGET_MIPS64)
+    VPN &= env->SEGMask;
+#endif
+    ASID = env->CP0_EntryHi & 0xff;
+    G = env->CP0_EntryLo0 & env->CP0_EntryLo1 & 1;
+    V0 = (env->CP0_EntryLo0 & 2) != 0;
+    D0 = (env->CP0_EntryLo0 & 4) != 0;
+    V1 = (env->CP0_EntryLo1 & 2) != 0;
+    D1 = (env->CP0_EntryLo1 & 4) != 0;
 
-    /* Discard cached TLB entries.  We could avoid doing this if the
-       tlbwi is just upgrading access permissions on the current entry;
-       that might be a further win.  */
-    r4k_mips_tlb_flush_extra (env, env->tlb->nb_tlb);
+    /* Discard cached TLB entries, unless tlbwi is just upgrading access
+       permissions on the current entry. */
+    if (tlb->VPN != VPN || tlb->ASID != ASID || tlb->G != G ||
+        (tlb->V0 && !V0) || (tlb->D0 && !D0) ||
+        (tlb->V1 && !V1) || (tlb->D1 && !D1)) {
+        r4k_mips_tlb_flush_extra(env, env->tlb->nb_tlb);
+    }
 
     r4k_invalidate_tlb(env, idx, 0);
     r4k_fill_tlb(env, idx);
@@ -2038,6 +1874,9 @@ void r4k_helper_tlbp(CPUMIPSState *env)
         mask = tlb->PageMask | ~(TARGET_PAGE_MASK << 1);
         tag = env->CP0_EntryHi & ~mask;
         VPN = tlb->VPN & ~mask;
+#if defined(TARGET_MIPS64)
+        tag &= env->SEGMask;
+#endif
         /* Check ASID, virtual page number & size */
         if ((tlb->G == 1 || tlb->ASID == ASID) && VPN == tag) {
             /* TLB match */
@@ -2053,6 +1892,9 @@ void r4k_helper_tlbp(CPUMIPSState *env)
             mask = tlb->PageMask | ~(TARGET_PAGE_MASK << 1);
             tag = env->CP0_EntryHi & ~mask;
             VPN = tlb->VPN & ~mask;
+#if defined(TARGET_MIPS64)
+            tag &= env->SEGMask;
+#endif
             /* Check ASID, virtual page number & size */
             if ((tlb->G == 1 || tlb->ASID == ASID) && VPN == tag) {
                 r4k_mips_tlb_flush_extra (env, i);
@@ -2295,28 +2137,18 @@ static void do_unaligned_access(CPUMIPSState *env, target_ulong addr,
                                 int is_write, int is_user, uintptr_t retaddr)
 {
     env->CP0_BadVAddr = addr;
-    do_restore_state(env, retaddr);
-    helper_raise_exception(env, (is_write == 1) ? EXCP_AdES : EXCP_AdEL);
+    do_raise_exception(env, (is_write == 1) ? EXCP_AdES : EXCP_AdEL, retaddr);
 }
 
 void tlb_fill(CPUMIPSState *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
-    TranslationBlock *tb;
     int ret;
 
     ret = cpu_mips_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (ret) {
-        if (retaddr) {
-            /* now we have a real cpu fault */
-            tb = tb_find_pc(retaddr);
-            if (tb) {
-                /* the PC is inside the translated code. It means that we have
-                   a virtual CPU fault */
-                cpu_restore_state(tb, env, retaddr);
-            }
-        }
-        helper_raise_exception_err(env, env->exception_index, env->error_code);
+        do_raise_exception_err(env, env->exception_index,
+                               env->error_code, retaddr);
     }
 }
 
@@ -2332,14 +2164,10 @@ void cpu_unassigned_access(CPUMIPSState *env, hwaddr addr,
 
 /* Complex FPU operations which may need stack space. */
 
-#define FLOAT_ONE32 make_float32(0x3f8 << 20)
-#define FLOAT_ONE64 make_float64(0x3ffULL << 52)
 #define FLOAT_TWO32 make_float32(1 << 30)
 #define FLOAT_TWO64 make_float64(1ULL << 62)
-#define FLOAT_QNAN32 0x7fbfffff
-#define FLOAT_QNAN64 0x7ff7ffffffffffffULL
-#define FLOAT_SNAN32 0x7fffffff
-#define FLOAT_SNAN64 0x7fffffffffffffffULL
+#define FP_TO_INT32_OVERFLOW 0x7fffffff
+#define FP_TO_INT64_OVERFLOW 0x7fffffffffffffffULL
 
 /* convert MIPS rounding mode in FCR31 to IEEE library */
 static unsigned int ieee_rm[] = {
@@ -2414,7 +2242,7 @@ void helper_ctc1(CPUMIPSState *env, target_ulong arg1, uint32_t reg)
     RESTORE_FLUSH_MODE;
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     if ((GET_FP_ENABLE(env->active_fpu.fcr31) | 0x20) & GET_FP_CAUSE(env->active_fpu.fcr31))
-        helper_raise_exception(env, EXCP_FPE);
+        do_raise_exception(env, EXCP_FPE, GETPC());
 }
 
 static inline int ieee_ex_to_mips(int xcpt)
@@ -2440,15 +2268,21 @@ static inline int ieee_ex_to_mips(int xcpt)
     return ret;
 }
 
-static inline void update_fcr31(CPUMIPSState *env)
+static inline void update_fcr31(CPUMIPSState *env, uintptr_t pc)
 {
     int tmp = ieee_ex_to_mips(get_float_exception_flags(&env->active_fpu.fp_status));
 
     SET_FP_CAUSE(env->active_fpu.fcr31, tmp);
-    if (GET_FP_ENABLE(env->active_fpu.fcr31) & tmp)
-        helper_raise_exception(env, EXCP_FPE);
-    else
-        UPDATE_FP_FLAGS(env->active_fpu.fcr31, tmp);
+
+    if (tmp) {
+        set_float_exception_flags(0, &env->active_fpu.fp_status);
+
+        if (GET_FP_ENABLE(env->active_fpu.fcr31) & tmp) {
+            do_raise_exception(env, EXCP_FPE, pc);
+        } else {
+            UPDATE_FP_FLAGS(env->active_fpu.fcr31, tmp);
+        }
+    }
 }
 
 /* Float support.
@@ -2459,21 +2293,24 @@ static inline void update_fcr31(CPUMIPSState *env)
 /* unary operations, modifying fp status  */
 uint64_t helper_float_sqrt_d(CPUMIPSState *env, uint64_t fdt0)
 {
-    return float64_sqrt(fdt0, &env->active_fpu.fp_status);
+    fdt0 = float64_sqrt(fdt0, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return fdt0;
 }
 
 uint32_t helper_float_sqrt_s(CPUMIPSState *env, uint32_t fst0)
 {
-    return float32_sqrt(fst0, &env->active_fpu.fp_status);
+    fst0 = float32_sqrt(fst0, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+    return fst0;
 }
 
 uint64_t helper_float_cvtd_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint64_t fdt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float32_to_float64(fst0, &env->active_fpu.fp_status);
-    update_fcr31(env);
+    update_fcr31(env, GETPC());
     return fdt2;
 }
 
@@ -2481,9 +2318,8 @@ uint64_t helper_float_cvtd_w(CPUMIPSState *env, uint32_t wt0)
 {
     uint64_t fdt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = int32_to_float64(wt0, &env->active_fpu.fp_status);
-    update_fcr31(env);
+    update_fcr31(env, GETPC());
     return fdt2;
 }
 
@@ -2491,9 +2327,8 @@ uint64_t helper_float_cvtd_l(CPUMIPSState *env, uint64_t dt0)
 {
     uint64_t fdt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = int64_to_float64(dt0, &env->active_fpu.fp_status);
-    update_fcr31(env);
+    update_fcr31(env, GETPC());
     return fdt2;
 }
 
@@ -2501,11 +2336,12 @@ uint64_t helper_float_cvtl_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t dt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
-    update_fcr31(env);
-    if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
-        dt2 = FLOAT_SNAN64;
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        dt2 = FP_TO_INT64_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
     return dt2;
 }
 
@@ -2513,11 +2349,12 @@ uint64_t helper_float_cvtl_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint64_t dt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
-    update_fcr31(env);
-    if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
-        dt2 = FLOAT_SNAN64;
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        dt2 = FP_TO_INT64_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
     return dt2;
 }
 
@@ -2526,10 +2363,9 @@ uint64_t helper_float_cvtps_pw(CPUMIPSState *env, uint64_t dt0)
     uint32_t fst2;
     uint32_t fsth2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = int32_to_float32(dt0 & 0XFFFFFFFF, &env->active_fpu.fp_status);
     fsth2 = int32_to_float32(dt0 >> 32, &env->active_fpu.fp_status);
-    update_fcr31(env);
+    update_fcr31(env, GETPC());
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
@@ -2537,15 +2373,24 @@ uint64_t helper_float_cvtpw_ps(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t wt2;
     uint32_t wth2;
+    int excp, excph;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = float32_to_int32(fdt0 & 0XFFFFFFFF, &env->active_fpu.fp_status);
+    excp = get_float_exception_flags(&env->active_fpu.fp_status);
+    if (excp & (float_flag_overflow | float_flag_invalid)) {
+        wt2 = FP_TO_INT32_OVERFLOW;
+    }
+
+    set_float_exception_flags(0, &env->active_fpu.fp_status);
     wth2 = float32_to_int32(fdt0 >> 32, &env->active_fpu.fp_status);
-    update_fcr31(env);
-    if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID)) {
-        wt2 = FLOAT_SNAN32;
-        wth2 = FLOAT_SNAN32;
+    excph = get_float_exception_flags(&env->active_fpu.fp_status);
+    if (excph & (float_flag_overflow | float_flag_invalid)) {
+        wth2 = FP_TO_INT32_OVERFLOW;
     }
+
+    set_float_exception_flags(excp | excph, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
+
     return ((uint64_t)wth2 << 32) | wt2;
 }
 
@@ -2553,9 +2398,8 @@ uint32_t helper_float_cvts_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t fst2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float64_to_float32(fdt0, &env->active_fpu.fp_status);
-    update_fcr31(env);
+    update_fcr31(env, GETPC());
     return fst2;
 }
 
@@ -2563,9 +2407,8 @@ uint32_t helper_float_cvts_w(CPUMIPSState *env, uint32_t wt0)
 {
     uint32_t fst2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = int32_to_float32(wt0, &env->active_fpu.fp_status);
-    update_fcr31(env);
+    update_fcr31(env, GETPC());
     return fst2;
 }
 
@@ -2573,9 +2416,8 @@ uint32_t helper_float_cvts_l(CPUMIPSState *env, uint64_t dt0)
 {
     uint32_t fst2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = int64_to_float32(dt0, &env->active_fpu.fp_status);
-    update_fcr31(env);
+    update_fcr31(env, GETPC());
     return fst2;
 }
 
@@ -2583,9 +2425,8 @@ uint32_t helper_float_cvts_pl(CPUMIPSState *env, uint32_t wt0)
 {
     uint32_t wt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = wt0;
-    update_fcr31(env);
+    update_fcr31(env, GETPC());
     return wt2;
 }
 
@@ -2593,9 +2434,8 @@ uint32_t helper_float_cvts_pu(CPUMIPSState *env, uint32_t wth0)
 {
     uint32_t wt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = wth0;
-    update_fcr31(env);
+    update_fcr31(env, GETPC());
     return wt2;
 }
 
@@ -2603,11 +2443,12 @@ uint32_t helper_float_cvtw_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t wt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
-    update_fcr31(env);
-    if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
-        wt2 = FLOAT_SNAN32;
+    update_fcr31(env, GETPC());
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        wt2 = FP_TO_INT32_OVERFLOW;
+    }
     return wt2;
 }
 
@@ -2615,11 +2456,12 @@ uint32_t helper_float_cvtw_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t wt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
-    update_fcr31(env);
-    if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
-        wt2 = FLOAT_SNAN32;
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        wt2 = FP_TO_INT32_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
     return wt2;
 }
 
@@ -2627,13 +2469,14 @@ uint64_t helper_float_roundl_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t dt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     set_float_rounding_mode(float_round_nearest_even, &env->active_fpu.fp_status);
     dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31(env);
-    if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
-        dt2 = FLOAT_SNAN64;
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        dt2 = FP_TO_INT64_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
     return dt2;
 }
 
@@ -2641,13 +2484,14 @@ uint64_t helper_float_roundl_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint64_t dt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     set_float_rounding_mode(float_round_nearest_even, &env->active_fpu.fp_status);
     dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31(env);
-    if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
-        dt2 = FLOAT_SNAN64;
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        dt2 = FP_TO_INT64_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
     return dt2;
 }
 
@@ -2655,13 +2499,14 @@ uint32_t helper_float_roundw_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t wt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     set_float_rounding_mode(float_round_nearest_even, &env->active_fpu.fp_status);
     wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31(env);
-    if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
-        wt2 = FLOAT_SNAN32;
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        wt2 = FP_TO_INT32_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
     return wt2;
 }
 
@@ -2669,13 +2514,14 @@ uint32_t helper_float_roundw_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t wt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     set_float_rounding_mode(float_round_nearest_even, &env->active_fpu.fp_status);
     wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31(env);
-    if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
-        wt2 = FLOAT_SNAN32;
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        wt2 = FP_TO_INT32_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
     return wt2;
 }
 
@@ -2683,11 +2529,12 @@ uint64_t helper_float_truncl_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t dt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     dt2 = float64_to_int64_round_to_zero(fdt0, &env->active_fpu.fp_status);
-    update_fcr31(env);
-    if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
-        dt2 = FLOAT_SNAN64;
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        dt2 = FP_TO_INT64_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
     return dt2;
 }
 
@@ -2695,11 +2542,12 @@ uint64_t helper_float_truncl_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint64_t dt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     dt2 = float32_to_int64_round_to_zero(fst0, &env->active_fpu.fp_status);
-    update_fcr31(env);
-    if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
-        dt2 = FLOAT_SNAN64;
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        dt2 = FP_TO_INT64_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
     return dt2;
 }
 
@@ -2707,11 +2555,12 @@ uint32_t helper_float_truncw_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t wt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = float64_to_int32_round_to_zero(fdt0, &env->active_fpu.fp_status);
-    update_fcr31(env);
-    if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
-        wt2 = FLOAT_SNAN32;
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        wt2 = FP_TO_INT32_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
     return wt2;
 }
 
@@ -2719,11 +2568,12 @@ uint32_t helper_float_truncw_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t wt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = float32_to_int32_round_to_zero(fst0, &env->active_fpu.fp_status);
-    update_fcr31(env);
-    if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
-        wt2 = FLOAT_SNAN32;
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        wt2 = FP_TO_INT32_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
     return wt2;
 }
 
@@ -2731,13 +2581,14 @@ uint64_t helper_float_ceill_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t dt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
     dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31(env);
-    if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
-        dt2 = FLOAT_SNAN64;
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        dt2 = FP_TO_INT64_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
     return dt2;
 }
 
@@ -2745,13 +2596,14 @@ uint64_t helper_float_ceill_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint64_t dt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
     dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31(env);
-    if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
-        dt2 = FLOAT_SNAN64;
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        dt2 = FP_TO_INT64_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
     return dt2;
 }
 
@@ -2759,13 +2611,14 @@ uint32_t helper_float_ceilw_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t wt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
     wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31(env);
-    if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
-        wt2 = FLOAT_SNAN32;
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        wt2 = FP_TO_INT32_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
     return wt2;
 }
 
@@ -2773,13 +2626,14 @@ uint32_t helper_float_ceilw_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t wt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
     wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31(env);
-    if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
-        wt2 = FLOAT_SNAN32;
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        wt2 = FP_TO_INT32_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
     return wt2;
 }
 
@@ -2787,13 +2641,14 @@ uint64_t helper_float_floorl_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t dt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
     dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31(env);
-    if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
-        dt2 = FLOAT_SNAN64;
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        dt2 = FP_TO_INT64_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
     return dt2;
 }
 
@@ -2801,13 +2656,14 @@ uint64_t helper_float_floorl_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint64_t dt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
     dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31(env);
-    if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
-        dt2 = FLOAT_SNAN64;
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        dt2 = FP_TO_INT64_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
     return dt2;
 }
 
@@ -2815,13 +2671,14 @@ uint32_t helper_float_floorw_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t wt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
     wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31(env);
-    if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
-        wt2 = FLOAT_SNAN32;
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        wt2 = FP_TO_INT32_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
     return wt2;
 }
 
@@ -2829,13 +2686,14 @@ uint32_t helper_float_floorw_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t wt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
     wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31(env);
-    if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
-        wt2 = FLOAT_SNAN32;
+    if (get_float_exception_flags(&env->active_fpu.fp_status)
+        & (float_flag_invalid | float_flag_overflow)) {
+        wt2 = FP_TO_INT32_OVERFLOW;
+    }
+    update_fcr31(env, GETPC());
     return wt2;
 }
 
@@ -2867,9 +2725,8 @@ uint64_t helper_float_recip_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t fdt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
-    fdt2 = float64_div(FLOAT_ONE64, fdt0, &env->active_fpu.fp_status);
-    update_fcr31(env);
+    fdt2 = float64_div(float64_one, fdt0, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
     return fdt2;
 }
 
@@ -2877,9 +2734,8 @@ uint32_t helper_float_recip_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t fst2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
-    fst2 = float32_div(FLOAT_ONE32, fst0, &env->active_fpu.fp_status);
-    update_fcr31(env);
+    fst2 = float32_div(float32_one, fst0, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
     return fst2;
 }
 
@@ -2887,10 +2743,9 @@ uint64_t helper_float_rsqrt_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t fdt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float64_sqrt(fdt0, &env->active_fpu.fp_status);
-    fdt2 = float64_div(FLOAT_ONE64, fdt2, &env->active_fpu.fp_status);
-    update_fcr31(env);
+    fdt2 = float64_div(float64_one, fdt2, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
     return fdt2;
 }
 
@@ -2898,10 +2753,9 @@ uint32_t helper_float_rsqrt_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t fst2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_sqrt(fst0, &env->active_fpu.fp_status);
-    fst2 = float32_div(FLOAT_ONE32, fst2, &env->active_fpu.fp_status);
-    update_fcr31(env);
+    fst2 = float32_div(float32_one, fst2, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
     return fst2;
 }
 
@@ -2909,9 +2763,8 @@ uint64_t helper_float_recip1_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t fdt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
-    fdt2 = float64_div(FLOAT_ONE64, fdt0, &env->active_fpu.fp_status);
-    update_fcr31(env);
+    fdt2 = float64_div(float64_one, fdt0, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
     return fdt2;
 }
 
@@ -2919,9 +2772,8 @@ uint32_t helper_float_recip1_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t fst2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
-    fst2 = float32_div(FLOAT_ONE32, fst0, &env->active_fpu.fp_status);
-    update_fcr31(env);
+    fst2 = float32_div(float32_one, fst0, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
     return fst2;
 }
 
@@ -2930,10 +2782,9 @@ uint64_t helper_float_recip1_ps(CPUMIPSState *env, uint64_t fdt0)
     uint32_t fst2;
     uint32_t fsth2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
-    fst2 = float32_div(FLOAT_ONE32, fdt0 & 0XFFFFFFFF, &env->active_fpu.fp_status);
-    fsth2 = float32_div(FLOAT_ONE32, fdt0 >> 32, &env->active_fpu.fp_status);
-    update_fcr31(env);
+    fst2 = float32_div(float32_one, fdt0 & 0XFFFFFFFF, &env->active_fpu.fp_status);
+    fsth2 = float32_div(float32_one, fdt0 >> 32, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
@@ -2941,10 +2792,9 @@ uint64_t helper_float_rsqrt1_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t fdt2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float64_sqrt(fdt0, &env->active_fpu.fp_status);
-    fdt2 = float64_div(FLOAT_ONE64, fdt2, &env->active_fpu.fp_status);
-    update_fcr31(env);
+    fdt2 = float64_div(float64_one, fdt2, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
     return fdt2;
 }
 
@@ -2952,10 +2802,9 @@ uint32_t helper_float_rsqrt1_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t fst2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_sqrt(fst0, &env->active_fpu.fp_status);
-    fst2 = float32_div(FLOAT_ONE32, fst2, &env->active_fpu.fp_status);
-    update_fcr31(env);
+    fst2 = float32_div(float32_one, fst2, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
     return fst2;
 }
 
@@ -2964,12 +2813,11 @@ uint64_t helper_float_rsqrt1_ps(CPUMIPSState *env, uint64_t fdt0)
     uint32_t fst2;
     uint32_t fsth2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_sqrt(fdt0 & 0XFFFFFFFF, &env->active_fpu.fp_status);
     fsth2 = float32_sqrt(fdt0 >> 32, &env->active_fpu.fp_status);
-    fst2 = float32_div(FLOAT_ONE32, fst2, &env->active_fpu.fp_status);
-    fsth2 = float32_div(FLOAT_ONE32, fsth2, &env->active_fpu.fp_status);
-    update_fcr31(env);
+    fst2 = float32_div(float32_one, fst2, &env->active_fpu.fp_status);
+    fsth2 = float32_div(float32_one, fsth2, &env->active_fpu.fp_status);
+    update_fcr31(env, GETPC());
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
@@ -2982,11 +2830,8 @@ uint64_t helper_float_ ## name ## _d(CPUMIPSState *env,            \
 {                                                                  \
     uint64_t dt2;                                                  \
                                                                    \
-    set_float_exception_flags(0, &env->active_fpu.fp_status);            \
     dt2 = float64_ ## name (fdt0, fdt1, &env->active_fpu.fp_status);     \
-    update_fcr31(env);                                             \
-    if (GET_FP_CAUSE(env->active_fpu.fcr31) & FP_INVALID)                \
-        dt2 = FLOAT_QNAN64;                                        \
+    update_fcr31(env, GETPC());                                    \
     return dt2;                                                    \
 }                                                                  \
                                                                    \
@@ -2995,11 +2840,8 @@ uint32_t helper_float_ ## name ## _s(CPUMIPSState *env,            \
 {                                                                  \
     uint32_t wt2;                                                  \
                                                                    \
-    set_float_exception_flags(0, &env->active_fpu.fp_status);            \
     wt2 = float32_ ## name (fst0, fst1, &env->active_fpu.fp_status);     \
-    update_fcr31(env);                                             \
-    if (GET_FP_CAUSE(env->active_fpu.fcr31) & FP_INVALID)                \
-        wt2 = FLOAT_QNAN32;                                        \
+    update_fcr31(env, GETPC());                                    \
     return wt2;                                                    \
 }                                                                  \
                                                                    \
@@ -3014,14 +2856,9 @@ uint64_t helper_float_ ## name ## _ps(CPUMIPSState *env,           \
     uint32_t wt2;                                                  \
     uint32_t wth2;                                                 \
                                                                    \
-    set_float_exception_flags(0, &env->active_fpu.fp_status);            \
     wt2 = float32_ ## name (fst0, fst1, &env->active_fpu.fp_status);     \
     wth2 = float32_ ## name (fsth0, fsth1, &env->active_fpu.fp_status);  \
-    update_fcr31(env);                                             \
-    if (GET_FP_CAUSE(env->active_fpu.fcr31) & FP_INVALID) {              \
-        wt2 = FLOAT_QNAN32;                                        \
-        wth2 = FLOAT_QNAN32;                                       \
-    }                                                              \
+    update_fcr31(env, GETPC());                                    \
     return ((uint64_t)wth2 << 32) | wt2;                           \
 }
 
@@ -3031,112 +2868,66 @@ FLOAT_BINOP(mul)
 FLOAT_BINOP(div)
 #undef FLOAT_BINOP
 
-/* ternary operations */
-#define FLOAT_TERNOP(name1, name2)                                        \
-uint64_t helper_float_ ## name1 ## name2 ## _d(CPUMIPSState *env,         \
-                                               uint64_t fdt0,             \
-                                               uint64_t fdt1,             \
-                                               uint64_t fdt2)             \
-{                                                                         \
-    fdt0 = float64_ ## name1 (fdt0, fdt1, &env->active_fpu.fp_status);          \
-    return float64_ ## name2 (fdt0, fdt2, &env->active_fpu.fp_status);          \
-}                                                                         \
-                                                                          \
-uint32_t helper_float_ ## name1 ## name2 ## _s(CPUMIPSState *env,         \
-                                               uint32_t fst0,             \
-                                               uint32_t fst1,             \
-                                               uint32_t fst2)             \
-{                                                                         \
-    fst0 = float32_ ## name1 (fst0, fst1, &env->active_fpu.fp_status);          \
-    return float32_ ## name2 (fst0, fst2, &env->active_fpu.fp_status);          \
-}                                                                         \
-                                                                          \
-uint64_t helper_float_ ## name1 ## name2 ## _ps(CPUMIPSState *env,        \
-                                                uint64_t fdt0,            \
-                                                uint64_t fdt1,            \
-                                                uint64_t fdt2)            \
-{                                                                         \
-    uint32_t fst0 = fdt0 & 0XFFFFFFFF;                                    \
-    uint32_t fsth0 = fdt0 >> 32;                                          \
-    uint32_t fst1 = fdt1 & 0XFFFFFFFF;                                    \
-    uint32_t fsth1 = fdt1 >> 32;                                          \
-    uint32_t fst2 = fdt2 & 0XFFFFFFFF;                                    \
-    uint32_t fsth2 = fdt2 >> 32;                                          \
-                                                                          \
-    fst0 = float32_ ## name1 (fst0, fst1, &env->active_fpu.fp_status);          \
-    fsth0 = float32_ ## name1 (fsth0, fsth1, &env->active_fpu.fp_status);       \
-    fst2 = float32_ ## name2 (fst0, fst2, &env->active_fpu.fp_status);          \
-    fsth2 = float32_ ## name2 (fsth0, fsth2, &env->active_fpu.fp_status);       \
-    return ((uint64_t)fsth2 << 32) | fst2;                                \
-}
-
-FLOAT_TERNOP(mul, add)
-FLOAT_TERNOP(mul, sub)
-#undef FLOAT_TERNOP
-
-/* negated ternary operations */
-#define FLOAT_NTERNOP(name1, name2)                                       \
-uint64_t helper_float_n ## name1 ## name2 ## _d(CPUMIPSState *env,        \
-                                                uint64_t fdt0,            \
-                                                uint64_t fdt1,            \
-                                                uint64_t fdt2)            \
-{                                                                         \
-    fdt0 = float64_ ## name1 (fdt0, fdt1, &env->active_fpu.fp_status);          \
-    fdt2 = float64_ ## name2 (fdt0, fdt2, &env->active_fpu.fp_status);          \
-    return float64_chs(fdt2);                                             \
-}                                                                         \
-                                                                          \
-uint32_t helper_float_n ## name1 ## name2 ## _s(CPUMIPSState *env,        \
-                                                uint32_t fst0,            \
-                                                uint32_t fst1,            \
-                                                uint32_t fst2)            \
-{                                                                         \
-    fst0 = float32_ ## name1 (fst0, fst1, &env->active_fpu.fp_status);          \
-    fst2 = float32_ ## name2 (fst0, fst2, &env->active_fpu.fp_status);          \
-    return float32_chs(fst2);                                             \
-}                                                                         \
-                                                                          \
-uint64_t helper_float_n ## name1 ## name2 ## _ps(CPUMIPSState *env,       \
-                                                 uint64_t fdt0,           \
-                                                 uint64_t fdt1,           \
-                                                 uint64_t fdt2)           \
-{                                                                         \
-    uint32_t fst0 = fdt0 & 0XFFFFFFFF;                                    \
-    uint32_t fsth0 = fdt0 >> 32;                                          \
-    uint32_t fst1 = fdt1 & 0XFFFFFFFF;                                    \
-    uint32_t fsth1 = fdt1 >> 32;                                          \
-    uint32_t fst2 = fdt2 & 0XFFFFFFFF;                                    \
-    uint32_t fsth2 = fdt2 >> 32;                                          \
-                                                                          \
-    fst0 = float32_ ## name1 (fst0, fst1, &env->active_fpu.fp_status);          \
-    fsth0 = float32_ ## name1 (fsth0, fsth1, &env->active_fpu.fp_status);       \
-    fst2 = float32_ ## name2 (fst0, fst2, &env->active_fpu.fp_status);          \
-    fsth2 = float32_ ## name2 (fsth0, fsth2, &env->active_fpu.fp_status);       \
-    fst2 = float32_chs(fst2);                                             \
-    fsth2 = float32_chs(fsth2);                                           \
-    return ((uint64_t)fsth2 << 32) | fst2;                                \
-}
-
-FLOAT_NTERNOP(mul, add)
-FLOAT_NTERNOP(mul, sub)
-#undef FLOAT_NTERNOP
+/* FMA based operations */
+#define FLOAT_FMA(name, type)                                        \
+uint64_t helper_float_ ## name ## _d(CPUMIPSState *env,              \
+                                     uint64_t fdt0, uint64_t fdt1,   \
+                                     uint64_t fdt2)                  \
+{                                                                    \
+    fdt0 = float64_muladd(fdt0, fdt1, fdt2, type,                    \
+                         &env->active_fpu.fp_status);                \
+    update_fcr31(env, GETPC());                                      \
+    return fdt0;                                                     \
+}                                                                    \
+                                                                     \
+uint32_t helper_float_ ## name ## _s(CPUMIPSState *env,              \
+                                     uint32_t fst0, uint32_t fst1,   \
+                                     uint32_t fst2)                  \
+{                                                                    \
+    fst0 = float32_muladd(fst0, fst1, fst2, type,                    \
+                         &env->active_fpu.fp_status);                \
+    update_fcr31(env, GETPC());                                      \
+    return fst0;                                                     \
+}                                                                    \
+                                                                     \
+uint64_t helper_float_ ## name ## _ps(CPUMIPSState *env,             \
+                                      uint64_t fdt0, uint64_t fdt1,  \
+                                      uint64_t fdt2)                 \
+{                                                                    \
+    uint32_t fst0 = fdt0 & 0XFFFFFFFF;                               \
+    uint32_t fsth0 = fdt0 >> 32;                                     \
+    uint32_t fst1 = fdt1 & 0XFFFFFFFF;                               \
+    uint32_t fsth1 = fdt1 >> 32;                                     \
+    uint32_t fst2 = fdt2 & 0XFFFFFFFF;                               \
+    uint32_t fsth2 = fdt2 >> 32;                                     \
+                                                                     \
+    fst0 = float32_muladd(fst0, fst1, fst2, type,                    \
+                          &env->active_fpu.fp_status);               \
+    fsth0 = float32_muladd(fsth0, fsth1, fsth2, type,                \
+                           &env->active_fpu.fp_status);              \
+    update_fcr31(env, GETPC());                                      \
+    return ((uint64_t)fsth0 << 32) | fst0;                           \
+}
+FLOAT_FMA(madd, 0)
+FLOAT_FMA(msub, float_muladd_negate_c)
+FLOAT_FMA(nmadd, float_muladd_negate_result)
+FLOAT_FMA(nmsub, float_muladd_negate_result | float_muladd_negate_c)
+#undef FLOAT_FMA
 
 /* MIPS specific binary operations */
 uint64_t helper_float_recip2_d(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
 {
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float64_mul(fdt0, fdt2, &env->active_fpu.fp_status);
-    fdt2 = float64_chs(float64_sub(fdt2, FLOAT_ONE64, &env->active_fpu.fp_status));
-    update_fcr31(env);
+    fdt2 = float64_chs(float64_sub(fdt2, float64_one, &env->active_fpu.fp_status));
+    update_fcr31(env, GETPC());
     return fdt2;
 }
 
 uint32_t helper_float_recip2_s(CPUMIPSState *env, uint32_t fst0, uint32_t fst2)
 {
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_mul(fst0, fst2, &env->active_fpu.fp_status);
-    fst2 = float32_chs(float32_sub(fst2, FLOAT_ONE32, &env->active_fpu.fp_status));
-    update_fcr31(env);
+    fst2 = float32_chs(float32_sub(fst2, float32_one, &env->active_fpu.fp_status));
+    update_fcr31(env, GETPC());
     return fst2;
 }
 
@@ -3147,32 +2938,29 @@ uint64_t helper_float_recip2_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
     uint32_t fst2 = fdt2 & 0XFFFFFFFF;
     uint32_t fsth2 = fdt2 >> 32;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_mul(fst0, fst2, &env->active_fpu.fp_status);
     fsth2 = float32_mul(fsth0, fsth2, &env->active_fpu.fp_status);
-    fst2 = float32_chs(float32_sub(fst2, FLOAT_ONE32, &env->active_fpu.fp_status));
-    fsth2 = float32_chs(float32_sub(fsth2, FLOAT_ONE32, &env->active_fpu.fp_status));
-    update_fcr31(env);
+    fst2 = float32_chs(float32_sub(fst2, float32_one, &env->active_fpu.fp_status));
+    fsth2 = float32_chs(float32_sub(fsth2, float32_one, &env->active_fpu.fp_status));
+    update_fcr31(env, GETPC());
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
 uint64_t helper_float_rsqrt2_d(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
 {
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float64_mul(fdt0, fdt2, &env->active_fpu.fp_status);
-    fdt2 = float64_sub(fdt2, FLOAT_ONE64, &env->active_fpu.fp_status);
+    fdt2 = float64_sub(fdt2, float64_one, &env->active_fpu.fp_status);
     fdt2 = float64_chs(float64_div(fdt2, FLOAT_TWO64, &env->active_fpu.fp_status));
-    update_fcr31(env);
+    update_fcr31(env, GETPC());
     return fdt2;
 }
 
 uint32_t helper_float_rsqrt2_s(CPUMIPSState *env, uint32_t fst0, uint32_t fst2)
 {
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_mul(fst0, fst2, &env->active_fpu.fp_status);
-    fst2 = float32_sub(fst2, FLOAT_ONE32, &env->active_fpu.fp_status);
+    fst2 = float32_sub(fst2, float32_one, &env->active_fpu.fp_status);
     fst2 = float32_chs(float32_div(fst2, FLOAT_TWO32, &env->active_fpu.fp_status));
-    update_fcr31(env);
+    update_fcr31(env, GETPC());
     return fst2;
 }
 
@@ -3183,14 +2971,13 @@ uint64_t helper_float_rsqrt2_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
     uint32_t fst2 = fdt2 & 0XFFFFFFFF;
     uint32_t fsth2 = fdt2 >> 32;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_mul(fst0, fst2, &env->active_fpu.fp_status);
     fsth2 = float32_mul(fsth0, fsth2, &env->active_fpu.fp_status);
-    fst2 = float32_sub(fst2, FLOAT_ONE32, &env->active_fpu.fp_status);
-    fsth2 = float32_sub(fsth2, FLOAT_ONE32, &env->active_fpu.fp_status);
+    fst2 = float32_sub(fst2, float32_one, &env->active_fpu.fp_status);
+    fsth2 = float32_sub(fsth2, float32_one, &env->active_fpu.fp_status);
     fst2 = float32_chs(float32_div(fst2, FLOAT_TWO32, &env->active_fpu.fp_status));
     fsth2 = float32_chs(float32_div(fsth2, FLOAT_TWO32, &env->active_fpu.fp_status));
-    update_fcr31(env);
+    update_fcr31(env, GETPC());
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
@@ -3203,10 +2990,9 @@ uint64_t helper_float_addr_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt1)
     uint32_t fst2;
     uint32_t fsth2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_add (fst0, fsth0, &env->active_fpu.fp_status);
     fsth2 = float32_add (fst1, fsth1, &env->active_fpu.fp_status);
-    update_fcr31(env);
+    update_fcr31(env, GETPC());
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
@@ -3219,10 +3005,9 @@ uint64_t helper_float_mulr_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt1)
     uint32_t fst2;
     uint32_t fsth2;
 
-    set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_mul (fst0, fsth0, &env->active_fpu.fp_status);
     fsth2 = float32_mul (fst1, fsth1, &env->active_fpu.fp_status);
-    update_fcr31(env);
+    update_fcr31(env, GETPC());
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
@@ -3232,9 +3017,8 @@ void helper_cmp_d_ ## op(CPUMIPSState *env, uint64_t fdt0,     \
                          uint64_t fdt1, int cc)                \
 {                                                              \
     int c;                                                     \
-    set_float_exception_flags(0, &env->active_fpu.fp_status);  \
     c = cond;                                                  \
-    update_fcr31(env);                                         \
+    update_fcr31(env, GETPC());                                \
     if (c)                                                     \
         SET_FP_COND(cc, env->active_fpu);                      \
     else                                                       \
@@ -3244,11 +3028,10 @@ void helper_cmpabs_d_ ## op(CPUMIPSState *env, uint64_t fdt0,  \
                             uint64_t fdt1, int cc)             \
 {                                                              \
     int c;                                                     \
-    set_float_exception_flags(0, &env->active_fpu.fp_status);  \
     fdt0 = float64_abs(fdt0);                                  \
     fdt1 = float64_abs(fdt1);                                  \
     c = cond;                                                  \
-    update_fcr31(env);                                         \
+    update_fcr31(env, GETPC());                                \
     if (c)                                                     \
         SET_FP_COND(cc, env->active_fpu);                      \
     else                                                       \
@@ -3281,9 +3064,8 @@ void helper_cmp_s_ ## op(CPUMIPSState *env, uint32_t fst0,     \
                          uint32_t fst1, int cc)                \
 {                                                              \
     int c;                                                     \
-    set_float_exception_flags(0, &env->active_fpu.fp_status);  \
     c = cond;                                                  \
-    update_fcr31(env);                                         \
+    update_fcr31(env, GETPC());                                \
     if (c)                                                     \
         SET_FP_COND(cc, env->active_fpu);                      \
     else                                                       \
@@ -3293,11 +3075,10 @@ void helper_cmpabs_s_ ## op(CPUMIPSState *env, uint32_t fst0,  \
                             uint32_t fst1, int cc)             \
 {                                                              \
     int c;                                                     \
-    set_float_exception_flags(0, &env->active_fpu.fp_status);  \
     fst0 = float32_abs(fst0);                                  \
     fst1 = float32_abs(fst1);                                  \
     c = cond;                                                  \
-    update_fcr31(env);                                         \
+    update_fcr31(env, GETPC());                                \
     if (c)                                                     \
         SET_FP_COND(cc, env->active_fpu);                      \
     else                                                       \
@@ -3331,14 +3112,13 @@ void helper_cmp_ps_ ## op(CPUMIPSState *env, uint64_t fdt0,     \
 {                                                               \
     uint32_t fst0, fsth0, fst1, fsth1;                          \
     int ch, cl;                                                 \
-    set_float_exception_flags(0, &env->active_fpu.fp_status);   \
     fst0 = fdt0 & 0XFFFFFFFF;                                   \
     fsth0 = fdt0 >> 32;                                         \
     fst1 = fdt1 & 0XFFFFFFFF;                                   \
     fsth1 = fdt1 >> 32;                                         \
     cl = condl;                                                 \
     ch = condh;                                                 \
-    update_fcr31(env);                                          \
+    update_fcr31(env, GETPC());                                 \
     if (cl)                                                     \
         SET_FP_COND(cc, env->active_fpu);                       \
     else                                                        \
@@ -3359,7 +3139,7 @@ void helper_cmpabs_ps_ ## op(CPUMIPSState *env, uint64_t fdt0,  \
     fsth1 = float32_abs(fdt1 >> 32);                            \
     cl = condl;                                                 \
     ch = condh;                                                 \
-    update_fcr31(env);                                          \
+    update_fcr31(env, GETPC());                                 \
     if (cl)                                                     \
         SET_FP_COND(cc, env->active_fpu);                       \
     else                                                        \
diff --git a/target-mips/translate.c b/target-mips/translate.c
index ed55e260ac..8175da05d0 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -5,6 +5,7 @@
  *  Copyright (c) 2006 Marius Groeger (FPU operations)
  *  Copyright (c) 2006 Thiemo Seufer (MIPS32R2 support)
  *  Copyright (c) 2009 CodeSourcery (MIPS16 and microMIPS support)
+ *  Copyright (c) 2012 Jia Liu & Dongxue Zhang (MIPS ASE DSP support)
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -312,6 +313,35 @@ enum {
     OPC_MODU_G_2E   = 0x23 | OPC_SPECIAL3,
     OPC_DMOD_G_2E   = 0x26 | OPC_SPECIAL3,
     OPC_DMODU_G_2E  = 0x27 | OPC_SPECIAL3,
+
+    /* MIPS DSP Load */
+    OPC_LX_DSP         = 0x0A | OPC_SPECIAL3,
+    /* MIPS DSP Arithmetic */
+    OPC_ADDU_QB_DSP    = 0x10 | OPC_SPECIAL3,
+    OPC_ADDU_OB_DSP    = 0x14 | OPC_SPECIAL3,
+    OPC_ABSQ_S_PH_DSP  = 0x12 | OPC_SPECIAL3,
+    OPC_ABSQ_S_QH_DSP  = 0x16 | OPC_SPECIAL3,
+    /* OPC_ADDUH_QB_DSP is same as OPC_MULT_G_2E.  */
+    /* OPC_ADDUH_QB_DSP   = 0x18 | OPC_SPECIAL3,  */
+    OPC_CMPU_EQ_QB_DSP = 0x11 | OPC_SPECIAL3,
+    OPC_CMPU_EQ_OB_DSP = 0x15 | OPC_SPECIAL3,
+    /* MIPS DSP GPR-Based Shift Sub-class */
+    OPC_SHLL_QB_DSP    = 0x13 | OPC_SPECIAL3,
+    OPC_SHLL_OB_DSP    = 0x17 | OPC_SPECIAL3,
+    /* MIPS DSP Multiply Sub-class insns */
+    /* OPC_MUL_PH_DSP is same as OPC_ADDUH_QB_DSP.  */
+    /* OPC_MUL_PH_DSP     = 0x18 | OPC_SPECIAL3,  */
+    OPC_DPA_W_PH_DSP   = 0x30 | OPC_SPECIAL3,
+    OPC_DPAQ_W_QH_DSP  = 0x34 | OPC_SPECIAL3,
+    /* DSP Bit/Manipulation Sub-class */
+    OPC_INSV_DSP       = 0x0C | OPC_SPECIAL3,
+    OPC_DINSV_DSP      = 0x0D | OPC_SPECIAL3,
+    /* MIPS DSP Compare-Pick Sub-class */
+    OPC_APPEND_DSP     = 0x31 | OPC_SPECIAL3,
+    OPC_DAPPEND_DSP    = 0x35 | OPC_SPECIAL3,
+    /* MIPS DSP Accumulator and DSPControl Access Sub-class */
+    OPC_EXTR_W_DSP     = 0x38 | OPC_SPECIAL3,
+    OPC_DEXTR_W_DSP    = 0x3C | OPC_SPECIAL3,
 };
 
 /* BSHFL opcodes */
@@ -331,6 +361,413 @@ enum {
     OPC_DSHD     = (0x05 << 6) | OPC_DBSHFL,
 };
 
+/* MIPS DSP REGIMM opcodes */
+enum {
+    OPC_BPOSGE32 = (0x1C << 16) | OPC_REGIMM,
+    OPC_BPOSGE64 = (0x1D << 16) | OPC_REGIMM,
+};
+
+#define MASK_LX(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+/* MIPS DSP Load */
+enum {
+    OPC_LBUX = (0x06 << 6) | OPC_LX_DSP,
+    OPC_LHX  = (0x04 << 6) | OPC_LX_DSP,
+    OPC_LWX  = (0x00 << 6) | OPC_LX_DSP,
+    OPC_LDX = (0x08 << 6) | OPC_LX_DSP,
+};
+
+#define MASK_ADDU_QB(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP Arithmetic Sub-class */
+    OPC_ADDQ_PH        = (0x0A << 6) | OPC_ADDU_QB_DSP,
+    OPC_ADDQ_S_PH      = (0x0E << 6) | OPC_ADDU_QB_DSP,
+    OPC_ADDQ_S_W       = (0x16 << 6) | OPC_ADDU_QB_DSP,
+    OPC_ADDU_QB        = (0x00 << 6) | OPC_ADDU_QB_DSP,
+    OPC_ADDU_S_QB      = (0x04 << 6) | OPC_ADDU_QB_DSP,
+    OPC_ADDU_PH        = (0x08 << 6) | OPC_ADDU_QB_DSP,
+    OPC_ADDU_S_PH      = (0x0C << 6) | OPC_ADDU_QB_DSP,
+    OPC_SUBQ_PH        = (0x0B << 6) | OPC_ADDU_QB_DSP,
+    OPC_SUBQ_S_PH      = (0x0F << 6) | OPC_ADDU_QB_DSP,
+    OPC_SUBQ_S_W       = (0x17 << 6) | OPC_ADDU_QB_DSP,
+    OPC_SUBU_QB        = (0x01 << 6) | OPC_ADDU_QB_DSP,
+    OPC_SUBU_S_QB      = (0x05 << 6) | OPC_ADDU_QB_DSP,
+    OPC_SUBU_PH        = (0x09 << 6) | OPC_ADDU_QB_DSP,
+    OPC_SUBU_S_PH      = (0x0D << 6) | OPC_ADDU_QB_DSP,
+    OPC_ADDSC          = (0x10 << 6) | OPC_ADDU_QB_DSP,
+    OPC_ADDWC          = (0x11 << 6) | OPC_ADDU_QB_DSP,
+    OPC_MODSUB         = (0x12 << 6) | OPC_ADDU_QB_DSP,
+    OPC_RADDU_W_QB     = (0x14 << 6) | OPC_ADDU_QB_DSP,
+    /* MIPS DSP Multiply Sub-class insns */
+    OPC_MULEU_S_PH_QBL = (0x06 << 6) | OPC_ADDU_QB_DSP,
+    OPC_MULEU_S_PH_QBR = (0x07 << 6) | OPC_ADDU_QB_DSP,
+    OPC_MULQ_RS_PH     = (0x1F << 6) | OPC_ADDU_QB_DSP,
+    OPC_MULEQ_S_W_PHL  = (0x1C << 6) | OPC_ADDU_QB_DSP,
+    OPC_MULEQ_S_W_PHR  = (0x1D << 6) | OPC_ADDU_QB_DSP,
+    OPC_MULQ_S_PH      = (0x1E << 6) | OPC_ADDU_QB_DSP,
+};
+
+#define OPC_ADDUH_QB_DSP OPC_MULT_G_2E
+#define MASK_ADDUH_QB(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP Arithmetic Sub-class */
+    OPC_ADDUH_QB   = (0x00 << 6) | OPC_ADDUH_QB_DSP,
+    OPC_ADDUH_R_QB = (0x02 << 6) | OPC_ADDUH_QB_DSP,
+    OPC_ADDQH_PH   = (0x08 << 6) | OPC_ADDUH_QB_DSP,
+    OPC_ADDQH_R_PH = (0x0A << 6) | OPC_ADDUH_QB_DSP,
+    OPC_ADDQH_W    = (0x10 << 6) | OPC_ADDUH_QB_DSP,
+    OPC_ADDQH_R_W  = (0x12 << 6) | OPC_ADDUH_QB_DSP,
+    OPC_SUBUH_QB   = (0x01 << 6) | OPC_ADDUH_QB_DSP,
+    OPC_SUBUH_R_QB = (0x03 << 6) | OPC_ADDUH_QB_DSP,
+    OPC_SUBQH_PH   = (0x09 << 6) | OPC_ADDUH_QB_DSP,
+    OPC_SUBQH_R_PH = (0x0B << 6) | OPC_ADDUH_QB_DSP,
+    OPC_SUBQH_W    = (0x11 << 6) | OPC_ADDUH_QB_DSP,
+    OPC_SUBQH_R_W  = (0x13 << 6) | OPC_ADDUH_QB_DSP,
+    /* MIPS DSP Multiply Sub-class insns */
+    OPC_MUL_PH     = (0x0C << 6) | OPC_ADDUH_QB_DSP,
+    OPC_MUL_S_PH   = (0x0E << 6) | OPC_ADDUH_QB_DSP,
+    OPC_MULQ_S_W   = (0x16 << 6) | OPC_ADDUH_QB_DSP,
+    OPC_MULQ_RS_W  = (0x17 << 6) | OPC_ADDUH_QB_DSP,
+};
+
+#define MASK_ABSQ_S_PH(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP Arithmetic Sub-class */
+    OPC_ABSQ_S_QB       = (0x01 << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_ABSQ_S_PH       = (0x09 << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_ABSQ_S_W        = (0x11 << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_PRECEQ_W_PHL    = (0x0C << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_PRECEQ_W_PHR    = (0x0D << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_PRECEQU_PH_QBL  = (0x04 << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_PRECEQU_PH_QBR  = (0x05 << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_PRECEQU_PH_QBLA = (0x06 << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_PRECEQU_PH_QBRA = (0x07 << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_PRECEU_PH_QBL   = (0x1C << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_PRECEU_PH_QBR   = (0x1D << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_PRECEU_PH_QBLA  = (0x1E << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_PRECEU_PH_QBRA  = (0x1F << 6) | OPC_ABSQ_S_PH_DSP,
+    /* DSP Bit/Manipulation Sub-class */
+    OPC_BITREV          = (0x1B << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_REPL_QB         = (0x02 << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_REPLV_QB        = (0x03 << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_REPL_PH         = (0x0A << 6) | OPC_ABSQ_S_PH_DSP,
+    OPC_REPLV_PH        = (0x0B << 6) | OPC_ABSQ_S_PH_DSP,
+};
+
+#define MASK_CMPU_EQ_QB(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP Arithmetic Sub-class */
+    OPC_PRECR_QB_PH      = (0x0D << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_PRECRQ_QB_PH     = (0x0C << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_PRECR_SRA_PH_W   = (0x1E << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_PRECR_SRA_R_PH_W = (0x1F << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_PRECRQ_PH_W      = (0x14 << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_PRECRQ_RS_PH_W   = (0x15 << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_PRECRQU_S_QB_PH  = (0x0F << 6) | OPC_CMPU_EQ_QB_DSP,
+    /* DSP Compare-Pick Sub-class */
+    OPC_CMPU_EQ_QB       = (0x00 << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_CMPU_LT_QB       = (0x01 << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_CMPU_LE_QB       = (0x02 << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_CMPGU_EQ_QB      = (0x04 << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_CMPGU_LT_QB      = (0x05 << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_CMPGU_LE_QB      = (0x06 << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_CMPGDU_EQ_QB     = (0x18 << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_CMPGDU_LT_QB     = (0x19 << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_CMPGDU_LE_QB     = (0x1A << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_CMP_EQ_PH        = (0x08 << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_CMP_LT_PH        = (0x09 << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_CMP_LE_PH        = (0x0A << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_PICK_QB          = (0x03 << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_PICK_PH          = (0x0B << 6) | OPC_CMPU_EQ_QB_DSP,
+    OPC_PACKRL_PH        = (0x0E << 6) | OPC_CMPU_EQ_QB_DSP,
+};
+
+#define MASK_SHLL_QB(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP GPR-Based Shift Sub-class */
+    OPC_SHLL_QB    = (0x00 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHLLV_QB   = (0x02 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHLL_PH    = (0x08 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHLLV_PH   = (0x0A << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHLL_S_PH  = (0x0C << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHLLV_S_PH = (0x0E << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHLL_S_W   = (0x14 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHLLV_S_W  = (0x16 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRL_QB    = (0x01 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRLV_QB   = (0x03 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRL_PH    = (0x19 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRLV_PH   = (0x1B << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRA_QB    = (0x04 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRA_R_QB  = (0x05 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRAV_QB   = (0x06 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRAV_R_QB = (0x07 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRA_PH    = (0x09 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRAV_PH   = (0x0B << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRA_R_PH  = (0x0D << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRAV_R_PH = (0x0F << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRA_R_W   = (0x15 << 6) | OPC_SHLL_QB_DSP,
+    OPC_SHRAV_R_W  = (0x17 << 6) | OPC_SHLL_QB_DSP,
+};
+
+#define MASK_DPA_W_PH(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP Multiply Sub-class insns */
+    OPC_DPAU_H_QBL    = (0x03 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPAU_H_QBR    = (0x07 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPSU_H_QBL    = (0x0B << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPSU_H_QBR    = (0x0F << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPA_W_PH      = (0x00 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPAX_W_PH     = (0x08 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPAQ_S_W_PH   = (0x04 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPAQX_S_W_PH  = (0x18 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPAQX_SA_W_PH = (0x1A << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPS_W_PH      = (0x01 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPSX_W_PH     = (0x09 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPSQ_S_W_PH   = (0x05 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPSQX_S_W_PH  = (0x19 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPSQX_SA_W_PH = (0x1B << 6) | OPC_DPA_W_PH_DSP,
+    OPC_MULSAQ_S_W_PH = (0x06 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPAQ_SA_L_W   = (0x0C << 6) | OPC_DPA_W_PH_DSP,
+    OPC_DPSQ_SA_L_W   = (0x0D << 6) | OPC_DPA_W_PH_DSP,
+    OPC_MAQ_S_W_PHL   = (0x14 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_MAQ_S_W_PHR   = (0x16 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_MAQ_SA_W_PHL  = (0x10 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_MAQ_SA_W_PHR  = (0x12 << 6) | OPC_DPA_W_PH_DSP,
+    OPC_MULSA_W_PH    = (0x02 << 6) | OPC_DPA_W_PH_DSP,
+};
+
+#define MASK_INSV(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* DSP Bit/Manipulation Sub-class */
+    OPC_INSV = (0x00 << 6) | OPC_INSV_DSP,
+};
+
+#define MASK_APPEND(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP Compare-Pick Sub-class */
+    OPC_APPEND  = (0x00 << 6) | OPC_APPEND_DSP,
+    OPC_PREPEND = (0x01 << 6) | OPC_APPEND_DSP,
+    OPC_BALIGN  = (0x10 << 6) | OPC_APPEND_DSP,
+};
+
+#define MASK_EXTR_W(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP Accumulator and DSPControl Access Sub-class */
+    OPC_EXTR_W     = (0x00 << 6) | OPC_EXTR_W_DSP,
+    OPC_EXTR_R_W   = (0x04 << 6) | OPC_EXTR_W_DSP,
+    OPC_EXTR_RS_W  = (0x06 << 6) | OPC_EXTR_W_DSP,
+    OPC_EXTR_S_H   = (0x0E << 6) | OPC_EXTR_W_DSP,
+    OPC_EXTRV_S_H  = (0x0F << 6) | OPC_EXTR_W_DSP,
+    OPC_EXTRV_W    = (0x01 << 6) | OPC_EXTR_W_DSP,
+    OPC_EXTRV_R_W  = (0x05 << 6) | OPC_EXTR_W_DSP,
+    OPC_EXTRV_RS_W = (0x07 << 6) | OPC_EXTR_W_DSP,
+    OPC_EXTP       = (0x02 << 6) | OPC_EXTR_W_DSP,
+    OPC_EXTPV      = (0x03 << 6) | OPC_EXTR_W_DSP,
+    OPC_EXTPDP     = (0x0A << 6) | OPC_EXTR_W_DSP,
+    OPC_EXTPDPV    = (0x0B << 6) | OPC_EXTR_W_DSP,
+    OPC_SHILO      = (0x1A << 6) | OPC_EXTR_W_DSP,
+    OPC_SHILOV     = (0x1B << 6) | OPC_EXTR_W_DSP,
+    OPC_MTHLIP     = (0x1F << 6) | OPC_EXTR_W_DSP,
+    OPC_WRDSP      = (0x13 << 6) | OPC_EXTR_W_DSP,
+    OPC_RDDSP      = (0x12 << 6) | OPC_EXTR_W_DSP,
+};
+
+#define MASK_ABSQ_S_QH(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP Arithmetic Sub-class */
+    OPC_PRECEQ_L_PWL    = (0x14 << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_PRECEQ_L_PWR    = (0x15 << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_PRECEQ_PW_QHL   = (0x0C << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_PRECEQ_PW_QHR   = (0x0D << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_PRECEQ_PW_QHLA  = (0x0E << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_PRECEQ_PW_QHRA  = (0x0F << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_PRECEQU_QH_OBL  = (0x04 << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_PRECEQU_QH_OBR  = (0x05 << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_PRECEQU_QH_OBLA = (0x06 << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_PRECEQU_QH_OBRA = (0x07 << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_PRECEU_QH_OBL   = (0x1C << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_PRECEU_QH_OBR   = (0x1D << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_PRECEU_QH_OBLA  = (0x1E << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_PRECEU_QH_OBRA  = (0x1F << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_ABSQ_S_OB       = (0x01 << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_ABSQ_S_PW       = (0x11 << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_ABSQ_S_QH       = (0x09 << 6) | OPC_ABSQ_S_QH_DSP,
+    /* DSP Bit/Manipulation Sub-class */
+    OPC_REPL_OB         = (0x02 << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_REPL_PW         = (0x12 << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_REPL_QH         = (0x0A << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_REPLV_OB        = (0x03 << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_REPLV_PW        = (0x13 << 6) | OPC_ABSQ_S_QH_DSP,
+    OPC_REPLV_QH        = (0x0B << 6) | OPC_ABSQ_S_QH_DSP,
+};
+
+#define MASK_ADDU_OB(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP Multiply Sub-class insns */
+    OPC_MULEQ_S_PW_QHL = (0x1C << 6) | OPC_ADDU_OB_DSP,
+    OPC_MULEQ_S_PW_QHR = (0x1D << 6) | OPC_ADDU_OB_DSP,
+    OPC_MULEU_S_QH_OBL = (0x06 << 6) | OPC_ADDU_OB_DSP,
+    OPC_MULEU_S_QH_OBR = (0x07 << 6) | OPC_ADDU_OB_DSP,
+    OPC_MULQ_RS_QH     = (0x1F << 6) | OPC_ADDU_OB_DSP,
+    /* MIPS DSP Arithmetic Sub-class */
+    OPC_RADDU_L_OB     = (0x14 << 6) | OPC_ADDU_OB_DSP,
+    OPC_SUBQ_PW        = (0x13 << 6) | OPC_ADDU_OB_DSP,
+    OPC_SUBQ_S_PW      = (0x17 << 6) | OPC_ADDU_OB_DSP,
+    OPC_SUBQ_QH        = (0x0B << 6) | OPC_ADDU_OB_DSP,
+    OPC_SUBQ_S_QH      = (0x0F << 6) | OPC_ADDU_OB_DSP,
+    OPC_SUBU_OB        = (0x01 << 6) | OPC_ADDU_OB_DSP,
+    OPC_SUBU_S_OB      = (0x05 << 6) | OPC_ADDU_OB_DSP,
+    OPC_SUBU_QH        = (0x09 << 6) | OPC_ADDU_OB_DSP,
+    OPC_SUBU_S_QH      = (0x0D << 6) | OPC_ADDU_OB_DSP,
+    OPC_SUBUH_OB       = (0x19 << 6) | OPC_ADDU_OB_DSP,
+    OPC_SUBUH_R_OB     = (0x1B << 6) | OPC_ADDU_OB_DSP,
+    OPC_ADDQ_PW        = (0x12 << 6) | OPC_ADDU_OB_DSP,
+    OPC_ADDQ_S_PW      = (0x16 << 6) | OPC_ADDU_OB_DSP,
+    OPC_ADDQ_QH        = (0x0A << 6) | OPC_ADDU_OB_DSP,
+    OPC_ADDQ_S_QH      = (0x0E << 6) | OPC_ADDU_OB_DSP,
+    OPC_ADDU_OB        = (0x00 << 6) | OPC_ADDU_OB_DSP,
+    OPC_ADDU_S_OB      = (0x04 << 6) | OPC_ADDU_OB_DSP,
+    OPC_ADDU_QH        = (0x08 << 6) | OPC_ADDU_OB_DSP,
+    OPC_ADDU_S_QH      = (0x0C << 6) | OPC_ADDU_OB_DSP,
+    OPC_ADDUH_OB       = (0x18 << 6) | OPC_ADDU_OB_DSP,
+    OPC_ADDUH_R_OB     = (0x1A << 6) | OPC_ADDU_OB_DSP,
+};
+
+#define MASK_CMPU_EQ_OB(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* DSP Compare-Pick Sub-class */
+    OPC_CMP_EQ_PW         = (0x10 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMP_LT_PW         = (0x11 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMP_LE_PW         = (0x12 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMP_EQ_QH         = (0x08 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMP_LT_QH         = (0x09 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMP_LE_QH         = (0x0A << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMPGDU_EQ_OB      = (0x18 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMPGDU_LT_OB      = (0x19 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMPGDU_LE_OB      = (0x1A << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMPGU_EQ_OB       = (0x04 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMPGU_LT_OB       = (0x05 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMPGU_LE_OB       = (0x06 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMPU_EQ_OB        = (0x00 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMPU_LT_OB        = (0x01 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_CMPU_LE_OB        = (0x02 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_PACKRL_PW         = (0x0E << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_PICK_OB           = (0x03 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_PICK_PW           = (0x13 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_PICK_QH           = (0x0B << 6) | OPC_CMPU_EQ_OB_DSP,
+    /* MIPS DSP Arithmetic Sub-class */
+    OPC_PRECR_OB_QH       = (0x0D << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_PRECR_SRA_QH_PW   = (0x1E << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_PRECR_SRA_R_QH_PW = (0x1F << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_PRECRQ_OB_QH      = (0x0C << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_PRECRQ_PW_L       = (0x1C << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_PRECRQ_QH_PW      = (0x14 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_PRECRQ_RS_QH_PW   = (0x15 << 6) | OPC_CMPU_EQ_OB_DSP,
+    OPC_PRECRQU_S_OB_QH   = (0x0F << 6) | OPC_CMPU_EQ_OB_DSP,
+};
+
+#define MASK_DAPPEND(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* DSP Compare-Pick Sub-class */
+    OPC_DAPPEND  = (0x00 << 6) | OPC_DAPPEND_DSP,
+    OPC_PREPENDD = (0x03 << 6) | OPC_DAPPEND_DSP,
+    OPC_PREPENDW = (0x01 << 6) | OPC_DAPPEND_DSP,
+    OPC_DBALIGN  = (0x10 << 6) | OPC_DAPPEND_DSP,
+};
+
+#define MASK_DEXTR_W(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP Accumulator and DSPControl Access Sub-class */
+    OPC_DMTHLIP     = (0x1F << 6) | OPC_DEXTR_W_DSP,
+    OPC_DSHILO      = (0x1A << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTP       = (0x02 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTPDP     = (0x0A << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTPDPV    = (0x0B << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTPV      = (0x03 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTR_L     = (0x10 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTR_R_L   = (0x14 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTR_RS_L  = (0x16 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTR_W     = (0x00 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTR_R_W   = (0x04 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTR_RS_W  = (0x06 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTR_S_H   = (0x0E << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTRV_L    = (0x11 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTRV_R_L  = (0x15 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTRV_RS_L = (0x17 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTRV_S_H  = (0x0F << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTRV_W    = (0x01 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTRV_R_W  = (0x05 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DEXTRV_RS_W = (0x07 << 6) | OPC_DEXTR_W_DSP,
+    OPC_DSHILOV     = (0x1B << 6) | OPC_DEXTR_W_DSP,
+};
+
+#define MASK_DINSV(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* DSP Bit/Manipulation Sub-class */
+    OPC_DINSV = (0x00 << 6) | OPC_DINSV_DSP,
+};
+
+#define MASK_DPAQ_W_QH(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP Multiply Sub-class insns */
+    OPC_DMADD         = (0x19 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DMADDU        = (0x1D << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DMSUB         = (0x1B << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DMSUBU        = (0x1F << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPA_W_QH      = (0x00 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPAQ_S_W_QH   = (0x04 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPAQ_SA_L_PW  = (0x0C << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPAU_H_OBL    = (0x03 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPAU_H_OBR    = (0x07 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPS_W_QH      = (0x01 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPSQ_S_W_QH   = (0x05 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPSQ_SA_L_PW  = (0x0D << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPSU_H_OBL    = (0x0B << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_DPSU_H_OBR    = (0x0F << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_S_L_PWL   = (0x1C << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_S_L_PWR   = (0x1E << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_S_W_QHLL  = (0x14 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_SA_W_QHLL = (0x10 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_S_W_QHLR  = (0x15 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_SA_W_QHLR = (0x11 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_S_W_QHRL  = (0x16 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_SA_W_QHRL = (0x12 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_S_W_QHRR  = (0x17 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MAQ_SA_W_QHRR = (0x13 << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MULSAQ_S_L_PW = (0x0E << 6) | OPC_DPAQ_W_QH_DSP,
+    OPC_MULSAQ_S_W_QH = (0x06 << 6) | OPC_DPAQ_W_QH_DSP,
+};
+
+#define MASK_SHLL_OB(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
+enum {
+    /* MIPS DSP GPR-Based Shift Sub-class */
+    OPC_SHLL_PW    = (0x10 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHLL_S_PW  = (0x14 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHLLV_OB   = (0x02 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHLLV_PW   = (0x12 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHLLV_S_PW = (0x16 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHLLV_QH   = (0x0A << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHLLV_S_QH = (0x0E << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRA_PW    = (0x11 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRA_R_PW  = (0x15 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRAV_OB   = (0x06 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRAV_R_OB = (0x07 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRAV_PW   = (0x13 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRAV_R_PW = (0x17 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRAV_QH   = (0x0B << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRAV_R_QH = (0x0F << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRLV_OB   = (0x03 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRLV_QH   = (0x1B << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHLL_OB    = (0x00 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHLL_QH    = (0x08 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHLL_S_QH  = (0x0C << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRA_OB    = (0x04 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRA_R_OB  = (0x05 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRA_QH    = (0x09 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRA_R_QH  = (0x0D << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRL_OB    = (0x01 << 6) | OPC_SHLL_OB_DSP,
+    OPC_SHRL_QH    = (0x19 << 6) | OPC_SHLL_OB_DSP,
+};
+
 /* Coprocessor 0 (rs field) */
 #define MASK_CP0(op)       MASK_OP_MAJOR(op) | (op & (0x1F << 21))
 
@@ -578,6 +1015,7 @@ static TCGv_i32 fpu_fcr0, fpu_fcr31;
 static TCGv_i64 fpu_f64[32];
 
 static uint32_t gen_opc_hflags[OPC_BUF_SIZE];
+static target_ulong gen_opc_btarget[OPC_BUF_SIZE];
 
 #include "gen-icount.h"
 
@@ -948,6 +1386,24 @@ static inline void check_cp1_registers(DisasContext *ctx, int regs)
         generate_exception(ctx, EXCP_RI);
 }
 
+/* Verify that the processor is running with DSP instructions enabled.
+   This is enabled by CP0 Status register MX(24) bit.
+ */
+
+static inline void check_dsp(DisasContext *ctx)
+{
+    if (unlikely(!(ctx->hflags & MIPS_HFLAG_DSP))) {
+        generate_exception(ctx, EXCP_DSPDIS);
+    }
+}
+
+static inline void check_dspr2(DisasContext *ctx)
+{
+    if (unlikely(!(ctx->hflags & MIPS_HFLAG_DSPR2))) {
+        generate_exception(ctx, EXCP_DSPDIS);
+    }
+}
+
 /* This code generates a "reserved instruction" exception if the
    CPU does not support the instruction set corresponding to flags. */
 static inline void check_insn(CPUMIPSState *env, DisasContext *ctx, int flags)
@@ -1028,35 +1484,6 @@ FOP_CONDS(abs, 1, ps, FMT_PS, 64)
 #undef gen_ldcmp_fpr64
 
 /* load/store instructions. */
-#define OP_LD(insn,fname)                                                 \
-static inline void op_ld_##insn(TCGv ret, TCGv arg1, DisasContext *ctx)   \
-{                                                                         \
-    tcg_gen_qemu_##fname(ret, arg1, ctx->mem_idx);                        \
-}
-OP_LD(lb,ld8s);
-OP_LD(lbu,ld8u);
-OP_LD(lh,ld16s);
-OP_LD(lhu,ld16u);
-OP_LD(lw,ld32s);
-#if defined(TARGET_MIPS64)
-OP_LD(lwu,ld32u);
-OP_LD(ld,ld64);
-#endif
-#undef OP_LD
-
-#define OP_ST(insn,fname)                                                  \
-static inline void op_st_##insn(TCGv arg1, TCGv arg2, DisasContext *ctx)   \
-{                                                                          \
-    tcg_gen_qemu_##fname(arg1, arg2, ctx->mem_idx);                        \
-}
-OP_ST(sb,st8);
-OP_ST(sh,st16);
-OP_ST(sw,st32);
-#if defined(TARGET_MIPS64)
-OP_ST(sd,st64);
-#endif
-#undef OP_ST
-
 #ifdef CONFIG_USER_ONLY
 #define OP_LD_ATOMIC(insn,fname)                                           \
 static inline void op_ld_##insn(TCGv ret, TCGv arg1, DisasContext *ctx)    \
@@ -1153,7 +1580,7 @@ static void gen_ld (CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
                     int rt, int base, int16_t offset)
 {
     const char *opn = "ld";
-    TCGv t0, t1;
+    TCGv t0, t1, t2;
 
     if (rt == 0 && env->insn_flags & (INSN_LOONGSON2E | INSN_LOONGSON2F)) {
         /* Loongson CPU uses a load to zero register for prefetch.
@@ -1164,20 +1591,17 @@ static void gen_ld (CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
     }
 
     t0 = tcg_temp_new();
-    t1 = tcg_temp_new();
     gen_base_offset_addr(ctx, t0, base, offset);
 
     switch (opc) {
 #if defined(TARGET_MIPS64)
     case OPC_LWU:
-        save_cpu_state(ctx, 0);
-        op_ld_lwu(t0, t0, ctx);
+        tcg_gen_qemu_ld32u(t0, t0, ctx->mem_idx);
         gen_store_gpr(t0, rt);
         opn = "lwu";
         break;
     case OPC_LD:
-        save_cpu_state(ctx, 0);
-        op_ld_ld(t0, t0, ctx);
+        tcg_gen_qemu_ld64(t0, t0, ctx->mem_idx);
         gen_store_gpr(t0, rt);
         opn = "ld";
         break;
@@ -1188,78 +1612,130 @@ static void gen_ld (CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
         opn = "lld";
         break;
     case OPC_LDL:
-        save_cpu_state(ctx, 1);
+        t1 = tcg_temp_new();
+        tcg_gen_andi_tl(t1, t0, 7);
+#ifndef TARGET_WORDS_BIGENDIAN
+        tcg_gen_xori_tl(t1, t1, 7);
+#endif
+        tcg_gen_shli_tl(t1, t1, 3);
+        tcg_gen_andi_tl(t0, t0, ~7);
+        tcg_gen_qemu_ld64(t0, t0, ctx->mem_idx);
+        tcg_gen_shl_tl(t0, t0, t1);
+        tcg_gen_xori_tl(t1, t1, 63);
+        t2 = tcg_const_tl(0x7fffffffffffffffull);
+        tcg_gen_shr_tl(t2, t2, t1);
         gen_load_gpr(t1, rt);
-        gen_helper_1e2i(ldl, t1, t1, t0, ctx->mem_idx);
-        gen_store_gpr(t1, rt);
+        tcg_gen_and_tl(t1, t1, t2);
+        tcg_temp_free(t2);
+        tcg_gen_or_tl(t0, t0, t1);
+        tcg_temp_free(t1);
+        gen_store_gpr(t0, rt);
         opn = "ldl";
         break;
     case OPC_LDR:
-        save_cpu_state(ctx, 1);
+        t1 = tcg_temp_new();
+        tcg_gen_andi_tl(t1, t0, 7);
+#ifdef TARGET_WORDS_BIGENDIAN
+        tcg_gen_xori_tl(t1, t1, 7);
+#endif
+        tcg_gen_shli_tl(t1, t1, 3);
+        tcg_gen_andi_tl(t0, t0, ~7);
+        tcg_gen_qemu_ld64(t0, t0, ctx->mem_idx);
+        tcg_gen_shr_tl(t0, t0, t1);
+        tcg_gen_xori_tl(t1, t1, 63);
+        t2 = tcg_const_tl(0xfffffffffffffffeull);
+        tcg_gen_shl_tl(t2, t2, t1);
         gen_load_gpr(t1, rt);
-        gen_helper_1e2i(ldr, t1, t1, t0, ctx->mem_idx);
-        gen_store_gpr(t1, rt);
+        tcg_gen_and_tl(t1, t1, t2);
+        tcg_temp_free(t2);
+        tcg_gen_or_tl(t0, t0, t1);
+        tcg_temp_free(t1);
+        gen_store_gpr(t0, rt);
         opn = "ldr";
         break;
     case OPC_LDPC:
-        save_cpu_state(ctx, 0);
-        tcg_gen_movi_tl(t1, pc_relative_pc(ctx));
+        t1 = tcg_const_tl(pc_relative_pc(ctx));
         gen_op_addr_add(ctx, t0, t0, t1);
-        op_ld_ld(t0, t0, ctx);
+        tcg_temp_free(t1);
+        tcg_gen_qemu_ld64(t0, t0, ctx->mem_idx);
         gen_store_gpr(t0, rt);
         opn = "ldpc";
         break;
 #endif
     case OPC_LWPC:
-        save_cpu_state(ctx, 0);
-        tcg_gen_movi_tl(t1, pc_relative_pc(ctx));
+        t1 = tcg_const_tl(pc_relative_pc(ctx));
         gen_op_addr_add(ctx, t0, t0, t1);
-        op_ld_lw(t0, t0, ctx);
+        tcg_temp_free(t1);
+        tcg_gen_qemu_ld32s(t0, t0, ctx->mem_idx);
         gen_store_gpr(t0, rt);
         opn = "lwpc";
         break;
     case OPC_LW:
-        save_cpu_state(ctx, 0);
-        op_ld_lw(t0, t0, ctx);
+        tcg_gen_qemu_ld32s(t0, t0, ctx->mem_idx);
         gen_store_gpr(t0, rt);
         opn = "lw";
         break;
     case OPC_LH:
-        save_cpu_state(ctx, 0);
-        op_ld_lh(t0, t0, ctx);
+        tcg_gen_qemu_ld16s(t0, t0, ctx->mem_idx);
         gen_store_gpr(t0, rt);
         opn = "lh";
         break;
     case OPC_LHU:
-        save_cpu_state(ctx, 0);
-        op_ld_lhu(t0, t0, ctx);
+        tcg_gen_qemu_ld16u(t0, t0, ctx->mem_idx);
         gen_store_gpr(t0, rt);
         opn = "lhu";
         break;
     case OPC_LB:
-        save_cpu_state(ctx, 0);
-        op_ld_lb(t0, t0, ctx);
+        tcg_gen_qemu_ld8s(t0, t0, ctx->mem_idx);
         gen_store_gpr(t0, rt);
         opn = "lb";
         break;
     case OPC_LBU:
-        save_cpu_state(ctx, 0);
-        op_ld_lbu(t0, t0, ctx);
+        tcg_gen_qemu_ld8u(t0, t0, ctx->mem_idx);
         gen_store_gpr(t0, rt);
         opn = "lbu";
         break;
     case OPC_LWL:
-        save_cpu_state(ctx, 1);
+        t1 = tcg_temp_new();
+        tcg_gen_andi_tl(t1, t0, 3);
+#ifndef TARGET_WORDS_BIGENDIAN
+        tcg_gen_xori_tl(t1, t1, 3);
+#endif
+        tcg_gen_shli_tl(t1, t1, 3);
+        tcg_gen_andi_tl(t0, t0, ~3);
+        tcg_gen_qemu_ld32u(t0, t0, ctx->mem_idx);
+        tcg_gen_shl_tl(t0, t0, t1);
+        tcg_gen_xori_tl(t1, t1, 31);
+        t2 = tcg_const_tl(0x7fffffffull);
+        tcg_gen_shr_tl(t2, t2, t1);
         gen_load_gpr(t1, rt);
-        gen_helper_1e2i(lwl, t1, t1, t0, ctx->mem_idx);
-        gen_store_gpr(t1, rt);
+        tcg_gen_and_tl(t1, t1, t2);
+        tcg_temp_free(t2);
+        tcg_gen_or_tl(t0, t0, t1);
+        tcg_temp_free(t1);
+        tcg_gen_ext32s_tl(t0, t0);
+        gen_store_gpr(t0, rt);
         opn = "lwl";
         break;
     case OPC_LWR:
-        save_cpu_state(ctx, 1);
+        t1 = tcg_temp_new();
+        tcg_gen_andi_tl(t1, t0, 3);
+#ifdef TARGET_WORDS_BIGENDIAN
+        tcg_gen_xori_tl(t1, t1, 3);
+#endif
+        tcg_gen_shli_tl(t1, t1, 3);
+        tcg_gen_andi_tl(t0, t0, ~3);
+        tcg_gen_qemu_ld32u(t0, t0, ctx->mem_idx);
+        tcg_gen_shr_tl(t0, t0, t1);
+        tcg_gen_xori_tl(t1, t1, 31);
+        t2 = tcg_const_tl(0xfffffffeull);
+        tcg_gen_shl_tl(t2, t2, t1);
         gen_load_gpr(t1, rt);
-        gen_helper_1e2i(lwr, t1, t1, t0, ctx->mem_idx);
-        gen_store_gpr(t1, rt);
+        tcg_gen_and_tl(t1, t1, t2);
+        tcg_temp_free(t2);
+        tcg_gen_or_tl(t0, t0, t1);
+        tcg_temp_free(t1);
+        gen_store_gpr(t0, rt);
         opn = "lwr";
         break;
     case OPC_LL:
@@ -1272,7 +1748,6 @@ static void gen_ld (CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
     (void)opn; /* avoid a compiler warning */
     MIPS_DEBUG("%s %s, %d(%s)", opn, regnames[rt], offset, regnames[base]);
     tcg_temp_free(t0);
-    tcg_temp_free(t1);
 }
 
 /* Store */
@@ -1288,8 +1763,7 @@ static void gen_st (DisasContext *ctx, uint32_t opc, int rt,
     switch (opc) {
 #if defined(TARGET_MIPS64)
     case OPC_SD:
-        save_cpu_state(ctx, 0);
-        op_st_sd(t1, t0, ctx);
+        tcg_gen_qemu_st64(t1, t0, ctx->mem_idx);
         opn = "sd";
         break;
     case OPC_SDL:
@@ -1304,18 +1778,15 @@ static void gen_st (DisasContext *ctx, uint32_t opc, int rt,
         break;
 #endif
     case OPC_SW:
-        save_cpu_state(ctx, 0);
-        op_st_sw(t1, t0, ctx);
+        tcg_gen_qemu_st32(t1, t0, ctx->mem_idx);
         opn = "sw";
         break;
     case OPC_SH:
-        save_cpu_state(ctx, 0);
-        op_st_sh(t1, t0, ctx);
+        tcg_gen_qemu_st16(t1, t0, ctx->mem_idx);
         opn = "sh";
         break;
     case OPC_SB:
-        save_cpu_state(ctx, 0);
-        op_st_sb(t1, t0, ctx);
+        tcg_gen_qemu_st8(t1, t0, ctx->mem_idx);
         opn = "sb";
         break;
     case OPC_SWL:
@@ -1343,13 +1814,14 @@ static void gen_st_cond (DisasContext *ctx, uint32_t opc, int rt,
     const char *opn = "st_cond";
     TCGv t0, t1;
 
+#ifdef CONFIG_USER_ONLY
     t0 = tcg_temp_local_new();
-
-    gen_base_offset_addr(ctx, t0, base, offset);
-    /* Don't do NOP if destination is zero: we must perform the actual
-       memory access. */
-
     t1 = tcg_temp_local_new();
+#else
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+#endif
+    gen_base_offset_addr(ctx, t0, base, offset);
     gen_load_gpr(t1, rt);
     switch (opc) {
 #if defined(TARGET_MIPS64)
@@ -1887,35 +2359,32 @@ static void gen_cond_move(CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
                           int rd, int rs, int rt)
 {
     const char *opn = "cond move";
-    int l1;
+    TCGv t0, t1, t2;
 
     if (rd == 0) {
-        /* If no destination, treat it as a NOP.
-           For add & sub, we must generate the overflow exception when needed. */
+        /* If no destination, treat it as a NOP. */
         MIPS_DEBUG("NOP");
         return;
     }
 
-    l1 = gen_new_label();
+    t0 = tcg_temp_new();
+    gen_load_gpr(t0, rt);
+    t1 = tcg_const_tl(0);
+    t2 = tcg_temp_new();
+    gen_load_gpr(t2, rs);
     switch (opc) {
     case OPC_MOVN:
-        if (likely(rt != 0))
-            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[rt], 0, l1);
-        else
-            tcg_gen_br(l1);
+        tcg_gen_movcond_tl(TCG_COND_NE, cpu_gpr[rd], t0, t1, t2, cpu_gpr[rd]);
         opn = "movn";
         break;
     case OPC_MOVZ:
-        if (likely(rt != 0))
-            tcg_gen_brcondi_tl(TCG_COND_NE, cpu_gpr[rt], 0, l1);
+        tcg_gen_movcond_tl(TCG_COND_EQ, cpu_gpr[rd], t0, t1, t2, cpu_gpr[rd]);
         opn = "movz";
         break;
     }
-    if (rs != 0)
-        tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
-    else
-        tcg_gen_movi_tl(cpu_gpr[rd], 0);
-    gen_set_label(l1);
+    tcg_temp_free(t2);
+    tcg_temp_free(t1);
+    tcg_temp_free(t0);
 
     (void)opn; /* avoid a compiler warning */
     MIPS_DEBUG("%s %s, %s, %s", opn, regnames[rd], regnames[rs], regnames[rt]);
@@ -2101,33 +2570,75 @@ static void gen_shift (CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
 static void gen_HILO (DisasContext *ctx, uint32_t opc, int reg)
 {
     const char *opn = "hilo";
+    unsigned int acc;
 
     if (reg == 0 && (opc == OPC_MFHI || opc == OPC_MFLO)) {
         /* Treat as NOP. */
         MIPS_DEBUG("NOP");
         return;
     }
+
+    if (opc == OPC_MFHI || opc == OPC_MFLO) {
+        acc = ((ctx->opcode) >> 21) & 0x03;
+    } else {
+        acc = ((ctx->opcode) >> 11) & 0x03;
+    }
+
+    if (acc != 0) {
+        check_dsp(ctx);
+    }
+
     switch (opc) {
     case OPC_MFHI:
-        tcg_gen_mov_tl(cpu_gpr[reg], cpu_HI[0]);
+#if defined(TARGET_MIPS64)
+        if (acc != 0) {
+            tcg_gen_ext32s_tl(cpu_gpr[reg], cpu_HI[acc]);
+        } else
+#endif
+        {
+            tcg_gen_mov_tl(cpu_gpr[reg], cpu_HI[acc]);
+        }
         opn = "mfhi";
         break;
     case OPC_MFLO:
-        tcg_gen_mov_tl(cpu_gpr[reg], cpu_LO[0]);
+#if defined(TARGET_MIPS64)
+        if (acc != 0) {
+            tcg_gen_ext32s_tl(cpu_gpr[reg], cpu_LO[acc]);
+        } else
+#endif
+        {
+            tcg_gen_mov_tl(cpu_gpr[reg], cpu_LO[acc]);
+        }
         opn = "mflo";
         break;
     case OPC_MTHI:
-        if (reg != 0)
-            tcg_gen_mov_tl(cpu_HI[0], cpu_gpr[reg]);
-        else
-            tcg_gen_movi_tl(cpu_HI[0], 0);
+        if (reg != 0) {
+#if defined(TARGET_MIPS64)
+            if (acc != 0) {
+                tcg_gen_ext32s_tl(cpu_HI[acc], cpu_gpr[reg]);
+            } else
+#endif
+            {
+                tcg_gen_mov_tl(cpu_HI[acc], cpu_gpr[reg]);
+            }
+        } else {
+            tcg_gen_movi_tl(cpu_HI[acc], 0);
+        }
         opn = "mthi";
         break;
     case OPC_MTLO:
-        if (reg != 0)
-            tcg_gen_mov_tl(cpu_LO[0], cpu_gpr[reg]);
-        else
-            tcg_gen_movi_tl(cpu_LO[0], 0);
+        if (reg != 0) {
+#if defined(TARGET_MIPS64)
+            if (acc != 0) {
+                tcg_gen_ext32s_tl(cpu_LO[acc], cpu_gpr[reg]);
+            } else
+#endif
+            {
+                tcg_gen_mov_tl(cpu_LO[acc], cpu_gpr[reg]);
+            }
+        } else {
+            tcg_gen_movi_tl(cpu_LO[acc], 0);
+        }
         opn = "mtlo";
         break;
     }
@@ -2140,61 +2651,50 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
 {
     const char *opn = "mul/div";
     TCGv t0, t1;
+    unsigned int acc;
 
-    switch (opc) {
-    case OPC_DIV:
-    case OPC_DIVU:
-#if defined(TARGET_MIPS64)
-    case OPC_DDIV:
-    case OPC_DDIVU:
-#endif
-        t0 = tcg_temp_local_new();
-        t1 = tcg_temp_local_new();
-        break;
-    default:
-        t0 = tcg_temp_new();
-        t1 = tcg_temp_new();
-        break;
-    }
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
 
     gen_load_gpr(t0, rs);
     gen_load_gpr(t1, rt);
+
     switch (opc) {
     case OPC_DIV:
         {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
+            TCGv t2 = tcg_temp_new();
+            TCGv t3 = tcg_temp_new();
             tcg_gen_ext32s_tl(t0, t0);
             tcg_gen_ext32s_tl(t1, t1);
-            tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t0, INT_MIN, l2);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1, l2);
-
-            tcg_gen_mov_tl(cpu_LO[0], t0);
-            tcg_gen_movi_tl(cpu_HI[0], 0);
-            tcg_gen_br(l1);
-            gen_set_label(l2);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, INT_MIN);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1);
+            tcg_gen_and_tl(t2, t2, t3);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
+            tcg_gen_or_tl(t2, t2, t3);
+            tcg_gen_movi_tl(t3, 0);
+            tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
             tcg_gen_div_tl(cpu_LO[0], t0, t1);
             tcg_gen_rem_tl(cpu_HI[0], t0, t1);
             tcg_gen_ext32s_tl(cpu_LO[0], cpu_LO[0]);
             tcg_gen_ext32s_tl(cpu_HI[0], cpu_HI[0]);
-            gen_set_label(l1);
+            tcg_temp_free(t3);
+            tcg_temp_free(t2);
         }
         opn = "div";
         break;
     case OPC_DIVU:
         {
-            int l1 = gen_new_label();
-
+            TCGv t2 = tcg_const_tl(0);
+            TCGv t3 = tcg_const_tl(1);
             tcg_gen_ext32u_tl(t0, t0);
             tcg_gen_ext32u_tl(t1, t1);
-            tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
+            tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
             tcg_gen_divu_tl(cpu_LO[0], t0, t1);
             tcg_gen_remu_tl(cpu_HI[0], t0, t1);
             tcg_gen_ext32s_tl(cpu_LO[0], cpu_LO[0]);
             tcg_gen_ext32s_tl(cpu_HI[0], cpu_HI[0]);
-            gen_set_label(l1);
+            tcg_temp_free(t3);
+            tcg_temp_free(t2);
         }
         opn = "divu";
         break;
@@ -2202,6 +2702,10 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
         {
             TCGv_i64 t2 = tcg_temp_new_i64();
             TCGv_i64 t3 = tcg_temp_new_i64();
+            acc = ((ctx->opcode) >> 11) & 0x03;
+            if (acc != 0) {
+                check_dsp(ctx);
+            }
 
             tcg_gen_ext_tl_i64(t2, t0);
             tcg_gen_ext_tl_i64(t3, t1);
@@ -2211,8 +2715,8 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
             tcg_gen_shri_i64(t2, t2, 32);
             tcg_gen_trunc_i64_tl(t1, t2);
             tcg_temp_free_i64(t2);
-            tcg_gen_ext32s_tl(cpu_LO[0], t0);
-            tcg_gen_ext32s_tl(cpu_HI[0], t1);
+            tcg_gen_ext32s_tl(cpu_LO[acc], t0);
+            tcg_gen_ext32s_tl(cpu_HI[acc], t1);
         }
         opn = "mult";
         break;
@@ -2220,6 +2724,10 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
         {
             TCGv_i64 t2 = tcg_temp_new_i64();
             TCGv_i64 t3 = tcg_temp_new_i64();
+            acc = ((ctx->opcode) >> 11) & 0x03;
+            if (acc != 0) {
+                check_dsp(ctx);
+            }
 
             tcg_gen_ext32u_tl(t0, t0);
             tcg_gen_ext32u_tl(t1, t1);
@@ -2231,38 +2739,39 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
             tcg_gen_shri_i64(t2, t2, 32);
             tcg_gen_trunc_i64_tl(t1, t2);
             tcg_temp_free_i64(t2);
-            tcg_gen_ext32s_tl(cpu_LO[0], t0);
-            tcg_gen_ext32s_tl(cpu_HI[0], t1);
+            tcg_gen_ext32s_tl(cpu_LO[acc], t0);
+            tcg_gen_ext32s_tl(cpu_HI[acc], t1);
         }
         opn = "multu";
         break;
 #if defined(TARGET_MIPS64)
     case OPC_DDIV:
         {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
-            tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t0, -1LL << 63, l2);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1LL, l2);
-            tcg_gen_mov_tl(cpu_LO[0], t0);
-            tcg_gen_movi_tl(cpu_HI[0], 0);
-            tcg_gen_br(l1);
-            gen_set_label(l2);
-            tcg_gen_div_i64(cpu_LO[0], t0, t1);
-            tcg_gen_rem_i64(cpu_HI[0], t0, t1);
-            gen_set_label(l1);
+            TCGv t2 = tcg_temp_new();
+            TCGv t3 = tcg_temp_new();
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, -1LL << 63);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1LL);
+            tcg_gen_and_tl(t2, t2, t3);
+            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
+            tcg_gen_or_tl(t2, t2, t3);
+            tcg_gen_movi_tl(t3, 0);
+            tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
+            tcg_gen_div_tl(cpu_LO[0], t0, t1);
+            tcg_gen_rem_tl(cpu_HI[0], t0, t1);
+            tcg_temp_free(t3);
+            tcg_temp_free(t2);
         }
         opn = "ddiv";
         break;
     case OPC_DDIVU:
         {
-            int l1 = gen_new_label();
-
-            tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
+            TCGv t2 = tcg_const_tl(0);
+            TCGv t3 = tcg_const_tl(1);
+            tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
             tcg_gen_divu_i64(cpu_LO[0], t0, t1);
             tcg_gen_remu_i64(cpu_HI[0], t0, t1);
-            gen_set_label(l1);
+            tcg_temp_free(t3);
+            tcg_temp_free(t2);
         }
         opn = "ddivu";
         break;
@@ -2279,41 +2788,49 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
         {
             TCGv_i64 t2 = tcg_temp_new_i64();
             TCGv_i64 t3 = tcg_temp_new_i64();
+            acc = ((ctx->opcode) >> 11) & 0x03;
+            if (acc != 0) {
+                check_dsp(ctx);
+            }
 
             tcg_gen_ext_tl_i64(t2, t0);
             tcg_gen_ext_tl_i64(t3, t1);
             tcg_gen_mul_i64(t2, t2, t3);
-            tcg_gen_concat_tl_i64(t3, cpu_LO[0], cpu_HI[0]);
+            tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
             tcg_gen_add_i64(t2, t2, t3);
             tcg_temp_free_i64(t3);
             tcg_gen_trunc_i64_tl(t0, t2);
             tcg_gen_shri_i64(t2, t2, 32);
             tcg_gen_trunc_i64_tl(t1, t2);
             tcg_temp_free_i64(t2);
-            tcg_gen_ext32s_tl(cpu_LO[0], t0);
-            tcg_gen_ext32s_tl(cpu_HI[0], t1);
+            tcg_gen_ext32s_tl(cpu_LO[acc], t0);
+            tcg_gen_ext32s_tl(cpu_HI[acc], t1);
         }
         opn = "madd";
         break;
     case OPC_MADDU:
-       {
+        {
             TCGv_i64 t2 = tcg_temp_new_i64();
             TCGv_i64 t3 = tcg_temp_new_i64();
+            acc = ((ctx->opcode) >> 11) & 0x03;
+            if (acc != 0) {
+                check_dsp(ctx);
+            }
 
             tcg_gen_ext32u_tl(t0, t0);
             tcg_gen_ext32u_tl(t1, t1);
             tcg_gen_extu_tl_i64(t2, t0);
             tcg_gen_extu_tl_i64(t3, t1);
             tcg_gen_mul_i64(t2, t2, t3);
-            tcg_gen_concat_tl_i64(t3, cpu_LO[0], cpu_HI[0]);
+            tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
             tcg_gen_add_i64(t2, t2, t3);
             tcg_temp_free_i64(t3);
             tcg_gen_trunc_i64_tl(t0, t2);
             tcg_gen_shri_i64(t2, t2, 32);
             tcg_gen_trunc_i64_tl(t1, t2);
             tcg_temp_free_i64(t2);
-            tcg_gen_ext32s_tl(cpu_LO[0], t0);
-            tcg_gen_ext32s_tl(cpu_HI[0], t1);
+            tcg_gen_ext32s_tl(cpu_LO[acc], t0);
+            tcg_gen_ext32s_tl(cpu_HI[acc], t1);
         }
         opn = "maddu";
         break;
@@ -2321,19 +2838,23 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
         {
             TCGv_i64 t2 = tcg_temp_new_i64();
             TCGv_i64 t3 = tcg_temp_new_i64();
+            acc = ((ctx->opcode) >> 11) & 0x03;
+            if (acc != 0) {
+                check_dsp(ctx);
+            }
 
             tcg_gen_ext_tl_i64(t2, t0);
             tcg_gen_ext_tl_i64(t3, t1);
             tcg_gen_mul_i64(t2, t2, t3);
-            tcg_gen_concat_tl_i64(t3, cpu_LO[0], cpu_HI[0]);
+            tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
             tcg_gen_sub_i64(t2, t3, t2);
             tcg_temp_free_i64(t3);
             tcg_gen_trunc_i64_tl(t0, t2);
             tcg_gen_shri_i64(t2, t2, 32);
             tcg_gen_trunc_i64_tl(t1, t2);
             tcg_temp_free_i64(t2);
-            tcg_gen_ext32s_tl(cpu_LO[0], t0);
-            tcg_gen_ext32s_tl(cpu_HI[0], t1);
+            tcg_gen_ext32s_tl(cpu_LO[acc], t0);
+            tcg_gen_ext32s_tl(cpu_HI[acc], t1);
         }
         opn = "msub";
         break;
@@ -2341,21 +2862,25 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
         {
             TCGv_i64 t2 = tcg_temp_new_i64();
             TCGv_i64 t3 = tcg_temp_new_i64();
+            acc = ((ctx->opcode) >> 11) & 0x03;
+            if (acc != 0) {
+                check_dsp(ctx);
+            }
 
             tcg_gen_ext32u_tl(t0, t0);
             tcg_gen_ext32u_tl(t1, t1);
             tcg_gen_extu_tl_i64(t2, t0);
             tcg_gen_extu_tl_i64(t3, t1);
             tcg_gen_mul_i64(t2, t2, t3);
-            tcg_gen_concat_tl_i64(t3, cpu_LO[0], cpu_HI[0]);
+            tcg_gen_concat_tl_i64(t3, cpu_LO[acc], cpu_HI[acc]);
             tcg_gen_sub_i64(t2, t3, t2);
             tcg_temp_free_i64(t3);
             tcg_gen_trunc_i64_tl(t0, t2);
             tcg_gen_shri_i64(t2, t2, 32);
             tcg_gen_trunc_i64_tl(t1, t2);
             tcg_temp_free_i64(t2);
-            tcg_gen_ext32s_tl(cpu_LO[0], t0);
-            tcg_gen_ext32s_tl(cpu_HI[0], t1);
+            tcg_gen_ext32s_tl(cpu_LO[acc], t0);
+            tcg_gen_ext32s_tl(cpu_HI[acc], t1);
         }
         opn = "msubu";
         break;
@@ -3144,6 +3669,16 @@ static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
         }
         btgt = ctx->pc + insn_bytes + offset;
         break;
+    case OPC_BPOSGE32:
+#if defined(TARGET_MIPS64)
+    case OPC_BPOSGE64:
+        tcg_gen_andi_tl(t0, cpu_dspctrl, 0x7F);
+#else
+        tcg_gen_andi_tl(t0, cpu_dspctrl, 0x3F);
+#endif
+        bcond_compute = 1;
+        btgt = ctx->pc + insn_bytes + offset;
+        break;
     case OPC_J:
     case OPC_JAL:
     case OPC_JALX:
@@ -3332,6 +3867,16 @@ static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
             tcg_gen_setcondi_tl(TCG_COND_LT, bcond, t0, 0);
             MIPS_DEBUG("bltzl %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto likely;
+        case OPC_BPOSGE32:
+            tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 32);
+            MIPS_DEBUG("bposge32 " TARGET_FMT_lx, btgt);
+            goto not_likely;
+#if defined(TARGET_MIPS64)
+        case OPC_BPOSGE64:
+            tcg_gen_setcondi_tl(TCG_COND_GE, bcond, t0, 64);
+            MIPS_DEBUG("bposge64 " TARGET_FMT_lx, btgt);
+            goto not_likely;
+#endif
         case OPC_BLTZALS:
         case OPC_BLTZAL:
             ctx->hflags |= (opc == OPC_BLTZALS
@@ -3383,7 +3928,6 @@ static void gen_bitops (DisasContext *ctx, uint32_t opc, int rt,
 {
     TCGv t0 = tcg_temp_new();
     TCGv t1 = tcg_temp_new();
-    target_ulong mask;
 
     gen_load_gpr(t1, rs);
     switch (opc) {
@@ -3416,45 +3960,22 @@ static void gen_bitops (DisasContext *ctx, uint32_t opc, int rt,
     case OPC_INS:
         if (lsb > msb)
             goto fail;
-        mask = ((msb - lsb + 1 < 32) ? ((1 << (msb - lsb + 1)) - 1) : ~0) << lsb;
         gen_load_gpr(t0, rt);
-        tcg_gen_andi_tl(t0, t0, ~mask);
-        tcg_gen_shli_tl(t1, t1, lsb);
-        tcg_gen_andi_tl(t1, t1, mask);
-        tcg_gen_or_tl(t0, t0, t1);
+        tcg_gen_deposit_tl(t0, t0, t1, lsb, msb - lsb + 1);
         tcg_gen_ext32s_tl(t0, t0);
         break;
 #if defined(TARGET_MIPS64)
     case OPC_DINSM:
-        if (lsb > msb)
-            goto fail;
-        mask = ((msb - lsb + 1 + 32 < 64) ? ((1ULL << (msb - lsb + 1 + 32)) - 1) : ~0ULL) << lsb;
         gen_load_gpr(t0, rt);
-        tcg_gen_andi_tl(t0, t0, ~mask);
-        tcg_gen_shli_tl(t1, t1, lsb);
-        tcg_gen_andi_tl(t1, t1, mask);
-        tcg_gen_or_tl(t0, t0, t1);
+        tcg_gen_deposit_tl(t0, t0, t1, lsb, msb + 32 - lsb + 1);
         break;
     case OPC_DINSU:
-        if (lsb > msb)
-            goto fail;
-        mask = ((1ULL << (msb - lsb + 1)) - 1) << (lsb + 32);
         gen_load_gpr(t0, rt);
-        tcg_gen_andi_tl(t0, t0, ~mask);
-        tcg_gen_shli_tl(t1, t1, lsb + 32);
-        tcg_gen_andi_tl(t1, t1, mask);
-        tcg_gen_or_tl(t0, t0, t1);
+        tcg_gen_deposit_tl(t0, t0, t1, lsb + 32, msb - lsb + 1);
         break;
     case OPC_DINS:
-        if (lsb > msb)
-            goto fail;
         gen_load_gpr(t0, rt);
-        mask = ((1ULL << (msb - lsb + 1)) - 1) << lsb;
-        gen_load_gpr(t0, rt);
-        tcg_gen_andi_tl(t0, t0, ~mask);
-        tcg_gen_shli_tl(t1, t1, lsb);
-        tcg_gen_andi_tl(t1, t1, mask);
-        tcg_gen_or_tl(t0, t0, t1);
+        tcg_gen_deposit_tl(t0, t0, t1, lsb, msb - lsb + 1);
         break;
 #endif
     default:
@@ -8148,7 +8669,6 @@ static void gen_flt3_ldst (DisasContext *ctx, uint32_t opc,
     }
     /* Don't do NOP if destination is zero: we must perform the actual
        memory access. */
-    save_cpu_state(ctx, 0);
     switch (opc) {
     case OPC_LWXC1:
         check_cop1x(ctx);
@@ -8288,7 +8808,7 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
             gen_load_fpr32(fp2, fr);
-            gen_helper_float_muladd_s(fp2, cpu_env, fp0, fp1, fp2);
+            gen_helper_float_madd_s(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp2, fd);
@@ -8307,7 +8827,7 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_muladd_d(fp2, cpu_env, fp0, fp1, fp2);
+            gen_helper_float_madd_d(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -8325,7 +8845,7 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_muladd_ps(fp2, cpu_env, fp0, fp1, fp2);
+            gen_helper_float_madd_ps(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -8343,7 +8863,7 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
             gen_load_fpr32(fp2, fr);
-            gen_helper_float_mulsub_s(fp2, cpu_env, fp0, fp1, fp2);
+            gen_helper_float_msub_s(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp2, fd);
@@ -8362,7 +8882,7 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_mulsub_d(fp2, cpu_env, fp0, fp1, fp2);
+            gen_helper_float_msub_d(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -8380,7 +8900,7 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_mulsub_ps(fp2, cpu_env, fp0, fp1, fp2);
+            gen_helper_float_msub_ps(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -8398,7 +8918,7 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
             gen_load_fpr32(fp2, fr);
-            gen_helper_float_nmuladd_s(fp2, cpu_env, fp0, fp1, fp2);
+            gen_helper_float_nmadd_s(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp2, fd);
@@ -8417,7 +8937,7 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmuladd_d(fp2, cpu_env, fp0, fp1, fp2);
+            gen_helper_float_nmadd_d(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -8435,7 +8955,7 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmuladd_ps(fp2, cpu_env, fp0, fp1, fp2);
+            gen_helper_float_nmadd_ps(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -8453,7 +8973,7 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
             gen_load_fpr32(fp2, fr);
-            gen_helper_float_nmulsub_s(fp2, cpu_env, fp0, fp1, fp2);
+            gen_helper_float_nmsub_s(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp2, fd);
@@ -8472,7 +8992,7 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmulsub_d(fp2, cpu_env, fp0, fp1, fp2);
+            gen_helper_float_nmsub_d(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -8490,7 +9010,7 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmulsub_ps(fp2, cpu_env, fp0, fp1, fp2);
+            gen_helper_float_nmsub_ps(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -8791,22 +9311,22 @@ static void gen_mips16_save (DisasContext *ctx,
     case 4:
         gen_base_offset_addr(ctx, t0, 29, 12);
         gen_load_gpr(t1, 7);
-        op_st_sw(t1, t0, ctx);
+        tcg_gen_qemu_st32(t1, t0, ctx->mem_idx);
         /* Fall through */
     case 3:
         gen_base_offset_addr(ctx, t0, 29, 8);
         gen_load_gpr(t1, 6);
-        op_st_sw(t1, t0, ctx);
+        tcg_gen_qemu_st32(t1, t0, ctx->mem_idx);
         /* Fall through */
     case 2:
         gen_base_offset_addr(ctx, t0, 29, 4);
         gen_load_gpr(t1, 5);
-        op_st_sw(t1, t0, ctx);
+        tcg_gen_qemu_st32(t1, t0, ctx->mem_idx);
         /* Fall through */
     case 1:
         gen_base_offset_addr(ctx, t0, 29, 0);
         gen_load_gpr(t1, 4);
-        op_st_sw(t1, t0, ctx);
+        tcg_gen_qemu_st32(t1, t0, ctx->mem_idx);
     }
 
     gen_load_gpr(t0, 29);
@@ -8814,7 +9334,7 @@ static void gen_mips16_save (DisasContext *ctx,
 #define DECR_AND_STORE(reg) do {                \
         tcg_gen_subi_tl(t0, t0, 4);             \
         gen_load_gpr(t1, reg);                  \
-        op_st_sw(t1, t0, ctx);                  \
+        tcg_gen_qemu_st32(t1, t0, ctx->mem_idx);                  \
     } while (0)
 
     if (do_ra) {
@@ -8912,10 +9432,10 @@ static void gen_mips16_restore (DisasContext *ctx,
 
     tcg_gen_addi_tl(t0, cpu_gpr[29], framesize);
 
-#define DECR_AND_LOAD(reg) do {                 \
-        tcg_gen_subi_tl(t0, t0, 4);             \
-        op_ld_lw(t1, t0, ctx);                  \
-        gen_store_gpr(t1, reg);                 \
+#define DECR_AND_LOAD(reg) do {                   \
+        tcg_gen_subi_tl(t0, t0, 4);               \
+        tcg_gen_qemu_ld32u(t1, t0, ctx->mem_idx); \
+        gen_store_gpr(t1, reg);                   \
     } while (0)
 
     if (do_ra) {
@@ -10421,8 +10941,7 @@ static void gen_ldxs (DisasContext *ctx, int base, int index, int rd)
         gen_op_addr_add(ctx, t0, t1, t0);
     }
 
-    save_cpu_state(ctx, 0);
-    op_ld_lw(t1, t0, ctx);
+    tcg_gen_qemu_ld32s(t1, t0, ctx->mem_idx);
     gen_store_gpr(t1, rd);
 
     tcg_temp_free(t0);
@@ -10451,23 +10970,21 @@ static void gen_ldst_pair (DisasContext *ctx, uint32_t opc, int rd,
             generate_exception(ctx, EXCP_RI);
             return;
         }
-        save_cpu_state(ctx, 0);
-        op_ld_lw(t1, t0, ctx);
+        tcg_gen_qemu_ld32s(t1, t0, ctx->mem_idx);
         gen_store_gpr(t1, rd);
         tcg_gen_movi_tl(t1, 4);
         gen_op_addr_add(ctx, t0, t0, t1);
-        op_ld_lw(t1, t0, ctx);
+        tcg_gen_qemu_ld32s(t1, t0, ctx->mem_idx);
         gen_store_gpr(t1, rd+1);
         opn = "lwp";
         break;
     case SWP:
-        save_cpu_state(ctx, 0);
         gen_load_gpr(t1, rd);
-        op_st_sw(t1, t0, ctx);
+        tcg_gen_qemu_st32(t1, t0, ctx->mem_idx);
         tcg_gen_movi_tl(t1, 4);
         gen_op_addr_add(ctx, t0, t0, t1);
         gen_load_gpr(t1, rd+1);
-        op_st_sw(t1, t0, ctx);
+        tcg_gen_qemu_st32(t1, t0, ctx->mem_idx);
         opn = "swp";
         break;
 #ifdef TARGET_MIPS64
@@ -10476,23 +10993,21 @@ static void gen_ldst_pair (DisasContext *ctx, uint32_t opc, int rd,
             generate_exception(ctx, EXCP_RI);
             return;
         }
-        save_cpu_state(ctx, 0);
-        op_ld_ld(t1, t0, ctx);
+        tcg_gen_qemu_ld64(t1, t0, ctx->mem_idx);
         gen_store_gpr(t1, rd);
         tcg_gen_movi_tl(t1, 8);
         gen_op_addr_add(ctx, t0, t0, t1);
-        op_ld_ld(t1, t0, ctx);
+        tcg_gen_qemu_ld64(t1, t0, ctx->mem_idx);
         gen_store_gpr(t1, rd+1);
         opn = "ldp";
         break;
     case SDP:
-        save_cpu_state(ctx, 0);
         gen_load_gpr(t1, rd);
-        op_st_sd(t1, t0, ctx);
+        tcg_gen_qemu_st64(t1, t0, ctx->mem_idx);
         tcg_gen_movi_tl(t1, 8);
         gen_op_addr_add(ctx, t0, t0, t1);
         gen_load_gpr(t1, rd+1);
-        op_st_sd(t1, t0, ctx);
+        tcg_gen_qemu_st64(t1, t0, ctx->mem_idx);
         opn = "sdp";
         break;
 #endif
@@ -12105,6 +12620,1686 @@ static int decode_micromips_opc (CPUMIPSState *env, DisasContext *ctx, int *is_b
 
 #endif
 
+/* MIPSDSP functions. */
+static void gen_mipsdsp_ld(CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
+                           int rd, int base, int offset)
+{
+    const char *opn = "ldx";
+    TCGv t0;
+
+    if (rd == 0) {
+        MIPS_DEBUG("NOP");
+        return;
+    }
+
+    check_dsp(ctx);
+    t0 = tcg_temp_new();
+
+    if (base == 0) {
+        gen_load_gpr(t0, offset);
+    } else if (offset == 0) {
+        gen_load_gpr(t0, base);
+    } else {
+        gen_op_addr_add(ctx, t0, cpu_gpr[base], cpu_gpr[offset]);
+    }
+
+    switch (opc) {
+    case OPC_LBUX:
+        tcg_gen_qemu_ld8u(t0, t0, ctx->mem_idx);
+        gen_store_gpr(t0, rd);
+        opn = "lbux";
+        break;
+    case OPC_LHX:
+        tcg_gen_qemu_ld16s(t0, t0, ctx->mem_idx);
+        gen_store_gpr(t0, rd);
+        opn = "lhx";
+        break;
+    case OPC_LWX:
+        tcg_gen_qemu_ld32s(t0, t0, ctx->mem_idx);
+        gen_store_gpr(t0, rd);
+        opn = "lwx";
+        break;
+#if defined(TARGET_MIPS64)
+    case OPC_LDX:
+        tcg_gen_qemu_ld64(t0, t0, ctx->mem_idx);
+        gen_store_gpr(t0, rd);
+        opn = "ldx";
+        break;
+#endif
+    }
+    (void)opn; /* avoid a compiler warning */
+    MIPS_DEBUG("%s %s, %s(%s)", opn,
+               regnames[rd], regnames[offset], regnames[base]);
+    tcg_temp_free(t0);
+}
+
+static void gen_mipsdsp_arith(DisasContext *ctx, uint32_t op1, uint32_t op2,
+                              int ret, int v1, int v2)
+{
+    const char *opn = "mipsdsp arith";
+    TCGv v1_t;
+    TCGv v2_t;
+
+    if (ret == 0) {
+        /* Treat as NOP. */
+        MIPS_DEBUG("NOP");
+        return;
+    }
+
+    v1_t = tcg_temp_new();
+    v2_t = tcg_temp_new();
+
+    gen_load_gpr(v1_t, v1);
+    gen_load_gpr(v2_t, v2);
+
+    switch (op1) {
+    /* OPC_MULT_G_2E is equal OPC_ADDUH_QB_DSP */
+    case OPC_MULT_G_2E:
+        check_dspr2(ctx);
+        switch (op2) {
+        case OPC_ADDUH_QB:
+            gen_helper_adduh_qb(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_ADDUH_R_QB:
+            gen_helper_adduh_r_qb(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_ADDQH_PH:
+            gen_helper_addqh_ph(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_ADDQH_R_PH:
+            gen_helper_addqh_r_ph(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_ADDQH_W:
+            gen_helper_addqh_w(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_ADDQH_R_W:
+            gen_helper_addqh_r_w(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_SUBUH_QB:
+            gen_helper_subuh_qb(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_SUBUH_R_QB:
+            gen_helper_subuh_r_qb(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_SUBQH_PH:
+            gen_helper_subqh_ph(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_SUBQH_R_PH:
+            gen_helper_subqh_r_ph(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_SUBQH_W:
+            gen_helper_subqh_w(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_SUBQH_R_W:
+            gen_helper_subqh_r_w(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        }
+        break;
+    case OPC_ABSQ_S_PH_DSP:
+        switch (op2) {
+        case OPC_ABSQ_S_QB:
+            check_dspr2(ctx);
+            gen_helper_absq_s_qb(cpu_gpr[ret], v2_t, cpu_env);
+            break;
+        case OPC_ABSQ_S_PH:
+            check_dsp(ctx);
+            gen_helper_absq_s_ph(cpu_gpr[ret], v2_t, cpu_env);
+            break;
+        case OPC_ABSQ_S_W:
+            check_dsp(ctx);
+            gen_helper_absq_s_w(cpu_gpr[ret], v2_t, cpu_env);
+            break;
+        case OPC_PRECEQ_W_PHL:
+            check_dsp(ctx);
+            tcg_gen_andi_tl(cpu_gpr[ret], v2_t, 0xFFFF0000);
+            tcg_gen_ext32s_tl(cpu_gpr[ret], cpu_gpr[ret]);
+            break;
+        case OPC_PRECEQ_W_PHR:
+            check_dsp(ctx);
+            tcg_gen_andi_tl(cpu_gpr[ret], v2_t, 0x0000FFFF);
+            tcg_gen_shli_tl(cpu_gpr[ret], cpu_gpr[ret], 16);
+            tcg_gen_ext32s_tl(cpu_gpr[ret], cpu_gpr[ret]);
+            break;
+        case OPC_PRECEQU_PH_QBL:
+            check_dsp(ctx);
+            gen_helper_precequ_ph_qbl(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQU_PH_QBR:
+            check_dsp(ctx);
+            gen_helper_precequ_ph_qbr(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQU_PH_QBLA:
+            check_dsp(ctx);
+            gen_helper_precequ_ph_qbla(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQU_PH_QBRA:
+            check_dsp(ctx);
+            gen_helper_precequ_ph_qbra(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEU_PH_QBL:
+            check_dsp(ctx);
+            gen_helper_preceu_ph_qbl(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEU_PH_QBR:
+            check_dsp(ctx);
+            gen_helper_preceu_ph_qbr(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEU_PH_QBLA:
+            check_dsp(ctx);
+            gen_helper_preceu_ph_qbla(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEU_PH_QBRA:
+            check_dsp(ctx);
+            gen_helper_preceu_ph_qbra(cpu_gpr[ret], v2_t);
+            break;
+        }
+        break;
+    case OPC_ADDU_QB_DSP:
+        switch (op2) {
+        case OPC_ADDQ_PH:
+            check_dsp(ctx);
+            gen_helper_addq_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDQ_S_PH:
+            check_dsp(ctx);
+            gen_helper_addq_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDQ_S_W:
+            check_dsp(ctx);
+            gen_helper_addq_s_w(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDU_QB:
+            check_dsp(ctx);
+            gen_helper_addu_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDU_S_QB:
+            check_dsp(ctx);
+            gen_helper_addu_s_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDU_PH:
+            check_dspr2(ctx);
+            gen_helper_addu_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDU_S_PH:
+            check_dspr2(ctx);
+            gen_helper_addu_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBQ_PH:
+            check_dsp(ctx);
+            gen_helper_subq_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBQ_S_PH:
+            check_dsp(ctx);
+            gen_helper_subq_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBQ_S_W:
+            check_dsp(ctx);
+            gen_helper_subq_s_w(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBU_QB:
+            check_dsp(ctx);
+            gen_helper_subu_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBU_S_QB:
+            check_dsp(ctx);
+            gen_helper_subu_s_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBU_PH:
+            check_dspr2(ctx);
+            gen_helper_subu_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBU_S_PH:
+            check_dspr2(ctx);
+            gen_helper_subu_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDSC:
+            check_dsp(ctx);
+            gen_helper_addsc(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDWC:
+            check_dsp(ctx);
+            gen_helper_addwc(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MODSUB:
+            check_dsp(ctx);
+            gen_helper_modsub(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_RADDU_W_QB:
+            check_dsp(ctx);
+            gen_helper_raddu_w_qb(cpu_gpr[ret], v1_t);
+            break;
+        }
+        break;
+    case OPC_CMPU_EQ_QB_DSP:
+        switch (op2) {
+        case OPC_PRECR_QB_PH:
+            check_dspr2(ctx);
+            gen_helper_precr_qb_ph(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_PRECRQ_QB_PH:
+            check_dsp(ctx);
+            gen_helper_precrq_qb_ph(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_PRECR_SRA_PH_W:
+            check_dspr2(ctx);
+            {
+                TCGv_i32 sa_t = tcg_const_i32(v2);
+                gen_helper_precr_sra_ph_w(cpu_gpr[ret], sa_t, v1_t,
+                                          cpu_gpr[ret]);
+                tcg_temp_free_i32(sa_t);
+                break;
+            }
+        case OPC_PRECR_SRA_R_PH_W:
+            check_dspr2(ctx);
+            {
+                TCGv_i32 sa_t = tcg_const_i32(v2);
+                gen_helper_precr_sra_r_ph_w(cpu_gpr[ret], sa_t, v1_t,
+                                            cpu_gpr[ret]);
+                tcg_temp_free_i32(sa_t);
+                break;
+            }
+        case OPC_PRECRQ_PH_W:
+            check_dsp(ctx);
+            gen_helper_precrq_ph_w(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_PRECRQ_RS_PH_W:
+            check_dsp(ctx);
+            gen_helper_precrq_rs_ph_w(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_PRECRQU_S_QB_PH:
+            check_dsp(ctx);
+            gen_helper_precrqu_s_qb_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        }
+        break;
+#ifdef TARGET_MIPS64
+    case OPC_ABSQ_S_QH_DSP:
+        switch (op2) {
+        case OPC_PRECEQ_L_PWL:
+            check_dsp(ctx);
+            tcg_gen_andi_tl(cpu_gpr[ret], v2_t, 0xFFFFFFFF00000000ull);
+            break;
+        case OPC_PRECEQ_L_PWR:
+            check_dsp(ctx);
+            tcg_gen_shli_tl(cpu_gpr[ret], v2_t, 32);
+            break;
+        case OPC_PRECEQ_PW_QHL:
+            check_dsp(ctx);
+            gen_helper_preceq_pw_qhl(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQ_PW_QHR:
+            check_dsp(ctx);
+            gen_helper_preceq_pw_qhr(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQ_PW_QHLA:
+            check_dsp(ctx);
+            gen_helper_preceq_pw_qhla(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQ_PW_QHRA:
+            check_dsp(ctx);
+            gen_helper_preceq_pw_qhra(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQU_QH_OBL:
+            check_dsp(ctx);
+            gen_helper_precequ_qh_obl(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQU_QH_OBR:
+            check_dsp(ctx);
+            gen_helper_precequ_qh_obr(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQU_QH_OBLA:
+            check_dsp(ctx);
+            gen_helper_precequ_qh_obla(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEQU_QH_OBRA:
+            check_dsp(ctx);
+            gen_helper_precequ_qh_obra(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEU_QH_OBL:
+            check_dsp(ctx);
+            gen_helper_preceu_qh_obl(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEU_QH_OBR:
+            check_dsp(ctx);
+            gen_helper_preceu_qh_obr(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEU_QH_OBLA:
+            check_dsp(ctx);
+            gen_helper_preceu_qh_obla(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_PRECEU_QH_OBRA:
+            check_dsp(ctx);
+            gen_helper_preceu_qh_obra(cpu_gpr[ret], v2_t);
+            break;
+        case OPC_ABSQ_S_OB:
+            check_dspr2(ctx);
+            gen_helper_absq_s_ob(cpu_gpr[ret], v2_t, cpu_env);
+            break;
+        case OPC_ABSQ_S_PW:
+            check_dsp(ctx);
+            gen_helper_absq_s_pw(cpu_gpr[ret], v2_t, cpu_env);
+            break;
+        case OPC_ABSQ_S_QH:
+            check_dsp(ctx);
+            gen_helper_absq_s_qh(cpu_gpr[ret], v2_t, cpu_env);
+            break;
+        }
+        break;
+    case OPC_ADDU_OB_DSP:
+        switch (op2) {
+        case OPC_RADDU_L_OB:
+            check_dsp(ctx);
+            gen_helper_raddu_l_ob(cpu_gpr[ret], v1_t);
+            break;
+        case OPC_SUBQ_PW:
+            check_dsp(ctx);
+            gen_helper_subq_pw(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBQ_S_PW:
+            check_dsp(ctx);
+            gen_helper_subq_s_pw(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBQ_QH:
+            check_dsp(ctx);
+            gen_helper_subq_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBQ_S_QH:
+            check_dsp(ctx);
+            gen_helper_subq_s_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBU_OB:
+            check_dsp(ctx);
+            gen_helper_subu_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBU_S_OB:
+            check_dsp(ctx);
+            gen_helper_subu_s_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBU_QH:
+            check_dspr2(ctx);
+            gen_helper_subu_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBU_S_QH:
+            check_dspr2(ctx);
+            gen_helper_subu_s_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_SUBUH_OB:
+            check_dspr2(ctx);
+            gen_helper_subuh_ob(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_SUBUH_R_OB:
+            check_dspr2(ctx);
+            gen_helper_subuh_r_ob(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_ADDQ_PW:
+            check_dsp(ctx);
+            gen_helper_addq_pw(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDQ_S_PW:
+            check_dsp(ctx);
+            gen_helper_addq_s_pw(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDQ_QH:
+            check_dsp(ctx);
+            gen_helper_addq_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDQ_S_QH:
+            check_dsp(ctx);
+            gen_helper_addq_s_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDU_OB:
+            check_dsp(ctx);
+            gen_helper_addu_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDU_S_OB:
+            check_dsp(ctx);
+            gen_helper_addu_s_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDU_QH:
+            check_dspr2(ctx);
+            gen_helper_addu_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDU_S_QH:
+            check_dspr2(ctx);
+            gen_helper_addu_s_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_ADDUH_OB:
+            check_dspr2(ctx);
+            gen_helper_adduh_ob(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_ADDUH_R_OB:
+            check_dspr2(ctx);
+            gen_helper_adduh_r_ob(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        }
+        break;
+    case OPC_CMPU_EQ_OB_DSP:
+        switch (op2) {
+        case OPC_PRECR_OB_QH:
+            check_dspr2(ctx);
+            gen_helper_precr_ob_qh(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_PRECR_SRA_QH_PW:
+            check_dspr2(ctx);
+            {
+                TCGv_i32 ret_t = tcg_const_i32(ret);
+                gen_helper_precr_sra_qh_pw(v2_t, v1_t, v2_t, ret_t);
+                tcg_temp_free_i32(ret_t);
+                break;
+            }
+        case OPC_PRECR_SRA_R_QH_PW:
+            check_dspr2(ctx);
+            {
+                TCGv_i32 sa_v = tcg_const_i32(ret);
+                gen_helper_precr_sra_r_qh_pw(v2_t, v1_t, v2_t, sa_v);
+                tcg_temp_free_i32(sa_v);
+                break;
+            }
+        case OPC_PRECRQ_OB_QH:
+            check_dsp(ctx);
+            gen_helper_precrq_ob_qh(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_PRECRQ_PW_L:
+            check_dsp(ctx);
+            gen_helper_precrq_pw_l(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_PRECRQ_QH_PW:
+            check_dsp(ctx);
+            gen_helper_precrq_qh_pw(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_PRECRQ_RS_QH_PW:
+            check_dsp(ctx);
+            gen_helper_precrq_rs_qh_pw(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_PRECRQU_S_OB_QH:
+            check_dsp(ctx);
+            gen_helper_precrqu_s_ob_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        }
+        break;
+#endif
+    }
+
+    tcg_temp_free(v1_t);
+    tcg_temp_free(v2_t);
+
+    (void)opn; /* avoid a compiler warning */
+    MIPS_DEBUG("%s", opn);
+}
+
+static void gen_mipsdsp_shift(DisasContext *ctx, uint32_t opc,
+                              int ret, int v1, int v2)
+{
+    uint32_t op2;
+    const char *opn = "mipsdsp shift";
+    TCGv t0;
+    TCGv v1_t;
+    TCGv v2_t;
+
+    if (ret == 0) {
+        /* Treat as NOP. */
+        MIPS_DEBUG("NOP");
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    v1_t = tcg_temp_new();
+    v2_t = tcg_temp_new();
+
+    tcg_gen_movi_tl(t0, v1);
+    gen_load_gpr(v1_t, v1);
+    gen_load_gpr(v2_t, v2);
+
+    switch (opc) {
+    case OPC_SHLL_QB_DSP:
+        {
+            op2 = MASK_SHLL_QB(ctx->opcode);
+            switch (op2) {
+            case OPC_SHLL_QB:
+                check_dsp(ctx);
+                gen_helper_shll_qb(cpu_gpr[ret], t0, v2_t, cpu_env);
+                break;
+            case OPC_SHLLV_QB:
+                check_dsp(ctx);
+                gen_helper_shll_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+                break;
+            case OPC_SHLL_PH:
+                check_dsp(ctx);
+                gen_helper_shll_ph(cpu_gpr[ret], t0, v2_t, cpu_env);
+                break;
+            case OPC_SHLLV_PH:
+                check_dsp(ctx);
+                gen_helper_shll_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+                break;
+            case OPC_SHLL_S_PH:
+                check_dsp(ctx);
+                gen_helper_shll_s_ph(cpu_gpr[ret], t0, v2_t, cpu_env);
+                break;
+            case OPC_SHLLV_S_PH:
+                check_dsp(ctx);
+                gen_helper_shll_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+                break;
+            case OPC_SHLL_S_W:
+                check_dsp(ctx);
+                gen_helper_shll_s_w(cpu_gpr[ret], t0, v2_t, cpu_env);
+                break;
+            case OPC_SHLLV_S_W:
+                check_dsp(ctx);
+                gen_helper_shll_s_w(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+                break;
+            case OPC_SHRL_QB:
+                check_dsp(ctx);
+                gen_helper_shrl_qb(cpu_gpr[ret], t0, v2_t);
+                break;
+            case OPC_SHRLV_QB:
+                check_dsp(ctx);
+                gen_helper_shrl_qb(cpu_gpr[ret], v1_t, v2_t);
+                break;
+            case OPC_SHRL_PH:
+                check_dspr2(ctx);
+                gen_helper_shrl_ph(cpu_gpr[ret], t0, v2_t);
+                break;
+            case OPC_SHRLV_PH:
+                check_dspr2(ctx);
+                gen_helper_shrl_ph(cpu_gpr[ret], v1_t, v2_t);
+                break;
+            case OPC_SHRA_QB:
+                check_dspr2(ctx);
+                gen_helper_shra_qb(cpu_gpr[ret], t0, v2_t);
+                break;
+            case OPC_SHRA_R_QB:
+                check_dspr2(ctx);
+                gen_helper_shra_r_qb(cpu_gpr[ret], t0, v2_t);
+                break;
+            case OPC_SHRAV_QB:
+                check_dspr2(ctx);
+                gen_helper_shra_qb(cpu_gpr[ret], v1_t, v2_t);
+                break;
+            case OPC_SHRAV_R_QB:
+                check_dspr2(ctx);
+                gen_helper_shra_r_qb(cpu_gpr[ret], v1_t, v2_t);
+                break;
+            case OPC_SHRA_PH:
+                check_dsp(ctx);
+                gen_helper_shra_ph(cpu_gpr[ret], t0, v2_t);
+                break;
+            case OPC_SHRA_R_PH:
+                check_dsp(ctx);
+                gen_helper_shra_r_ph(cpu_gpr[ret], t0, v2_t);
+                break;
+            case OPC_SHRAV_PH:
+                check_dsp(ctx);
+                gen_helper_shra_ph(cpu_gpr[ret], v1_t, v2_t);
+                break;
+            case OPC_SHRAV_R_PH:
+                check_dsp(ctx);
+                gen_helper_shra_r_ph(cpu_gpr[ret], v1_t, v2_t);
+                break;
+            case OPC_SHRA_R_W:
+                check_dsp(ctx);
+                gen_helper_shra_r_w(cpu_gpr[ret], t0, v2_t);
+                break;
+            case OPC_SHRAV_R_W:
+                check_dsp(ctx);
+                gen_helper_shra_r_w(cpu_gpr[ret], v1_t, v2_t);
+                break;
+            default:            /* Invalid */
+                MIPS_INVAL("MASK SHLL.QB");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
+        }
+#ifdef TARGET_MIPS64
+    case OPC_SHLL_OB_DSP:
+        op2 = MASK_SHLL_OB(ctx->opcode);
+        switch (op2) {
+        case OPC_SHLL_PW:
+            check_dsp(ctx);
+            gen_helper_shll_pw(cpu_gpr[ret], v2_t, t0, cpu_env);
+            break;
+        case OPC_SHLLV_PW:
+            check_dsp(ctx);
+            gen_helper_shll_pw(cpu_gpr[ret], v2_t, v1_t, cpu_env);
+            break;
+        case OPC_SHLL_S_PW:
+            check_dsp(ctx);
+            gen_helper_shll_s_pw(cpu_gpr[ret], v2_t, t0, cpu_env);
+            break;
+        case OPC_SHLLV_S_PW:
+            check_dsp(ctx);
+            gen_helper_shll_s_pw(cpu_gpr[ret], v2_t, v1_t, cpu_env);
+            break;
+        case OPC_SHLL_OB:
+            check_dsp(ctx);
+            gen_helper_shll_ob(cpu_gpr[ret], v2_t, t0, cpu_env);
+            break;
+        case OPC_SHLLV_OB:
+            check_dsp(ctx);
+            gen_helper_shll_ob(cpu_gpr[ret], v2_t, v1_t, cpu_env);
+            break;
+        case OPC_SHLL_QH:
+            check_dsp(ctx);
+            gen_helper_shll_qh(cpu_gpr[ret], v2_t, t0, cpu_env);
+            break;
+        case OPC_SHLLV_QH:
+            check_dsp(ctx);
+            gen_helper_shll_qh(cpu_gpr[ret], v2_t, v1_t, cpu_env);
+            break;
+        case OPC_SHLL_S_QH:
+            check_dsp(ctx);
+            gen_helper_shll_s_qh(cpu_gpr[ret], v2_t, t0, cpu_env);
+            break;
+        case OPC_SHLLV_S_QH:
+            check_dsp(ctx);
+            gen_helper_shll_s_qh(cpu_gpr[ret], v2_t, v1_t, cpu_env);
+            break;
+        case OPC_SHRA_OB:
+            check_dspr2(ctx);
+            gen_helper_shra_ob(cpu_gpr[ret], v2_t, t0);
+            break;
+        case OPC_SHRAV_OB:
+            check_dspr2(ctx);
+            gen_helper_shra_ob(cpu_gpr[ret], v2_t, v1_t);
+            break;
+        case OPC_SHRA_R_OB:
+            check_dspr2(ctx);
+            gen_helper_shra_r_ob(cpu_gpr[ret], v2_t, t0);
+            break;
+        case OPC_SHRAV_R_OB:
+            check_dspr2(ctx);
+            gen_helper_shra_r_ob(cpu_gpr[ret], v2_t, v1_t);
+            break;
+        case OPC_SHRA_PW:
+            check_dsp(ctx);
+            gen_helper_shra_pw(cpu_gpr[ret], v2_t, t0);
+            break;
+        case OPC_SHRAV_PW:
+            check_dsp(ctx);
+            gen_helper_shra_pw(cpu_gpr[ret], v2_t, v1_t);
+            break;
+        case OPC_SHRA_R_PW:
+            check_dsp(ctx);
+            gen_helper_shra_r_pw(cpu_gpr[ret], v2_t, t0);
+            break;
+        case OPC_SHRAV_R_PW:
+            check_dsp(ctx);
+            gen_helper_shra_r_pw(cpu_gpr[ret], v2_t, v1_t);
+            break;
+        case OPC_SHRA_QH:
+            check_dsp(ctx);
+            gen_helper_shra_qh(cpu_gpr[ret], v2_t, t0);
+            break;
+        case OPC_SHRAV_QH:
+            check_dsp(ctx);
+            gen_helper_shra_qh(cpu_gpr[ret], v2_t, v1_t);
+            break;
+        case OPC_SHRA_R_QH:
+            check_dsp(ctx);
+            gen_helper_shra_r_qh(cpu_gpr[ret], v2_t, t0);
+            break;
+        case OPC_SHRAV_R_QH:
+            check_dsp(ctx);
+            gen_helper_shra_r_qh(cpu_gpr[ret], v2_t, v1_t);
+            break;
+        case OPC_SHRL_OB:
+            check_dsp(ctx);
+            gen_helper_shrl_ob(cpu_gpr[ret], v2_t, t0);
+            break;
+        case OPC_SHRLV_OB:
+            check_dsp(ctx);
+            gen_helper_shrl_ob(cpu_gpr[ret], v2_t, v1_t);
+            break;
+        case OPC_SHRL_QH:
+            check_dspr2(ctx);
+            gen_helper_shrl_qh(cpu_gpr[ret], v2_t, t0);
+            break;
+        case OPC_SHRLV_QH:
+            check_dspr2(ctx);
+            gen_helper_shrl_qh(cpu_gpr[ret], v2_t, v1_t);
+            break;
+        default:            /* Invalid */
+            MIPS_INVAL("MASK SHLL.OB");
+            generate_exception(ctx, EXCP_RI);
+            break;
+        }
+        break;
+#endif
+    }
+
+    tcg_temp_free(t0);
+    tcg_temp_free(v1_t);
+    tcg_temp_free(v2_t);
+    (void)opn; /* avoid a compiler warning */
+    MIPS_DEBUG("%s", opn);
+}
+
+static void gen_mipsdsp_multiply(DisasContext *ctx, uint32_t op1, uint32_t op2,
+                                 int ret, int v1, int v2, int check_ret)
+{
+    const char *opn = "mipsdsp multiply";
+    TCGv_i32 t0;
+    TCGv v1_t;
+    TCGv v2_t;
+
+    if ((ret == 0) && (check_ret == 1)) {
+        /* Treat as NOP. */
+        MIPS_DEBUG("NOP");
+        return;
+    }
+
+    t0 = tcg_temp_new_i32();
+    v1_t = tcg_temp_new();
+    v2_t = tcg_temp_new();
+
+    tcg_gen_movi_i32(t0, ret);
+    gen_load_gpr(v1_t, v1);
+    gen_load_gpr(v2_t, v2);
+
+    switch (op1) {
+    /* OPC_MULT_G_2E, OPC_ADDUH_QB_DSP, OPC_MUL_PH_DSP have
+     * the same mask and op1. */
+    case OPC_MULT_G_2E:
+        switch (op2) {
+        case  OPC_MUL_PH:
+            gen_helper_mul_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case  OPC_MUL_S_PH:
+            gen_helper_mul_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULQ_S_W:
+            gen_helper_mulq_s_w(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULQ_RS_W:
+            gen_helper_mulq_rs_w(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        }
+        break;
+    case OPC_DPA_W_PH_DSP:
+        switch (op2) {
+        case OPC_DPAU_H_QBL:
+            check_dsp(ctx);
+            gen_helper_dpau_h_qbl(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPAU_H_QBR:
+            check_dsp(ctx);
+            gen_helper_dpau_h_qbr(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPSU_H_QBL:
+            check_dsp(ctx);
+            gen_helper_dpsu_h_qbl(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPSU_H_QBR:
+            check_dsp(ctx);
+            gen_helper_dpsu_h_qbr(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPA_W_PH:
+            check_dspr2(ctx);
+            gen_helper_dpa_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPAX_W_PH:
+            check_dspr2(ctx);
+            gen_helper_dpax_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPAQ_S_W_PH:
+            check_dsp(ctx);
+            gen_helper_dpaq_s_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPAQX_S_W_PH:
+            check_dspr2(ctx);
+            gen_helper_dpaqx_s_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPAQX_SA_W_PH:
+            check_dspr2(ctx);
+            gen_helper_dpaqx_sa_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPS_W_PH:
+            check_dspr2(ctx);
+            gen_helper_dps_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPSX_W_PH:
+            check_dspr2(ctx);
+            gen_helper_dpsx_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPSQ_S_W_PH:
+            check_dsp(ctx);
+            gen_helper_dpsq_s_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPSQX_S_W_PH:
+            check_dspr2(ctx);
+            gen_helper_dpsqx_s_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPSQX_SA_W_PH:
+            check_dspr2(ctx);
+            gen_helper_dpsqx_sa_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULSAQ_S_W_PH:
+            check_dsp(ctx);
+            gen_helper_mulsaq_s_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPAQ_SA_L_W:
+            check_dsp(ctx);
+            gen_helper_dpaq_sa_l_w(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_DPSQ_SA_L_W:
+            check_dsp(ctx);
+            gen_helper_dpsq_sa_l_w(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MAQ_S_W_PHL:
+            check_dsp(ctx);
+            gen_helper_maq_s_w_phl(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MAQ_S_W_PHR:
+            check_dsp(ctx);
+            gen_helper_maq_s_w_phr(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MAQ_SA_W_PHL:
+            check_dsp(ctx);
+            gen_helper_maq_sa_w_phl(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MAQ_SA_W_PHR:
+            check_dsp(ctx);
+            gen_helper_maq_sa_w_phr(t0, v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULSA_W_PH:
+            check_dspr2(ctx);
+            gen_helper_mulsa_w_ph(t0, v1_t, v2_t, cpu_env);
+            break;
+        }
+        break;
+#ifdef TARGET_MIPS64
+    case OPC_DPAQ_W_QH_DSP:
+        {
+            int ac = ret & 0x03;
+            tcg_gen_movi_i32(t0, ac);
+
+            switch (op2) {
+            case OPC_DMADD:
+                check_dsp(ctx);
+                gen_helper_dmadd(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DMADDU:
+                check_dsp(ctx);
+                gen_helper_dmaddu(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DMSUB:
+                check_dsp(ctx);
+                gen_helper_dmsub(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DMSUBU:
+                check_dsp(ctx);
+                gen_helper_dmsubu(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPA_W_QH:
+                check_dspr2(ctx);
+                gen_helper_dpa_w_qh(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPAQ_S_W_QH:
+                check_dsp(ctx);
+                gen_helper_dpaq_s_w_qh(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPAQ_SA_L_PW:
+                check_dsp(ctx);
+                gen_helper_dpaq_sa_l_pw(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPAU_H_OBL:
+                check_dsp(ctx);
+                gen_helper_dpau_h_obl(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPAU_H_OBR:
+                check_dsp(ctx);
+                gen_helper_dpau_h_obr(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPS_W_QH:
+                check_dspr2(ctx);
+                gen_helper_dps_w_qh(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPSQ_S_W_QH:
+                check_dsp(ctx);
+                gen_helper_dpsq_s_w_qh(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPSQ_SA_L_PW:
+                check_dsp(ctx);
+                gen_helper_dpsq_sa_l_pw(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPSU_H_OBL:
+                check_dsp(ctx);
+                gen_helper_dpsu_h_obl(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_DPSU_H_OBR:
+                check_dsp(ctx);
+                gen_helper_dpsu_h_obr(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_S_L_PWL:
+                check_dsp(ctx);
+                gen_helper_maq_s_l_pwl(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_S_L_PWR:
+                check_dsp(ctx);
+                gen_helper_maq_s_l_pwr(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_S_W_QHLL:
+                check_dsp(ctx);
+                gen_helper_maq_s_w_qhll(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_SA_W_QHLL:
+                check_dsp(ctx);
+                gen_helper_maq_sa_w_qhll(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_S_W_QHLR:
+                check_dsp(ctx);
+                gen_helper_maq_s_w_qhlr(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_SA_W_QHLR:
+                check_dsp(ctx);
+                gen_helper_maq_sa_w_qhlr(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_S_W_QHRL:
+                check_dsp(ctx);
+                gen_helper_maq_s_w_qhrl(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_SA_W_QHRL:
+                check_dsp(ctx);
+                gen_helper_maq_sa_w_qhrl(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_S_W_QHRR:
+                check_dsp(ctx);
+                gen_helper_maq_s_w_qhrr(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MAQ_SA_W_QHRR:
+                check_dsp(ctx);
+                gen_helper_maq_sa_w_qhrr(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MULSAQ_S_L_PW:
+                check_dsp(ctx);
+                gen_helper_mulsaq_s_l_pw(v1_t, v2_t, t0, cpu_env);
+                break;
+            case OPC_MULSAQ_S_W_QH:
+                check_dsp(ctx);
+                gen_helper_mulsaq_s_w_qh(v1_t, v2_t, t0, cpu_env);
+                break;
+            }
+        }
+        break;
+#endif
+    case OPC_ADDU_QB_DSP:
+        switch (op2) {
+        case OPC_MULEU_S_PH_QBL:
+            check_dsp(ctx);
+            gen_helper_muleu_s_ph_qbl(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULEU_S_PH_QBR:
+            check_dsp(ctx);
+            gen_helper_muleu_s_ph_qbr(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULQ_RS_PH:
+            check_dsp(ctx);
+            gen_helper_mulq_rs_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULEQ_S_W_PHL:
+            check_dsp(ctx);
+            gen_helper_muleq_s_w_phl(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULEQ_S_W_PHR:
+            check_dsp(ctx);
+            gen_helper_muleq_s_w_phr(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULQ_S_PH:
+            check_dspr2(ctx);
+            gen_helper_mulq_s_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        }
+        break;
+#ifdef TARGET_MIPS64
+    case OPC_ADDU_OB_DSP:
+        switch (op2) {
+        case OPC_MULEQ_S_PW_QHL:
+            check_dsp(ctx);
+            gen_helper_muleq_s_pw_qhl(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULEQ_S_PW_QHR:
+            check_dsp(ctx);
+            gen_helper_muleq_s_pw_qhr(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULEU_S_QH_OBL:
+            check_dsp(ctx);
+            gen_helper_muleu_s_qh_obl(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULEU_S_QH_OBR:
+            check_dsp(ctx);
+            gen_helper_muleu_s_qh_obr(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_MULQ_RS_QH:
+            check_dsp(ctx);
+            gen_helper_mulq_rs_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        }
+        break;
+#endif
+    }
+
+    tcg_temp_free_i32(t0);
+    tcg_temp_free(v1_t);
+    tcg_temp_free(v2_t);
+
+    (void)opn; /* avoid a compiler warning */
+    MIPS_DEBUG("%s", opn);
+
+}
+
+static void gen_mipsdsp_bitinsn(CPUMIPSState *env, DisasContext *ctx,
+                                uint32_t op1, uint32_t op2,
+                                int ret, int val)
+{
+    const char *opn = "mipsdsp Bit/ Manipulation";
+    int16_t imm;
+    TCGv t0;
+    TCGv val_t;
+
+    if (ret == 0) {
+        /* Treat as NOP. */
+        MIPS_DEBUG("NOP");
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    val_t = tcg_temp_new();
+    gen_load_gpr(val_t, val);
+
+    switch (op1) {
+    case OPC_ABSQ_S_PH_DSP:
+        switch (op2) {
+        case OPC_BITREV:
+            check_dsp(ctx);
+            gen_helper_bitrev(cpu_gpr[ret], val_t);
+            break;
+        case OPC_REPL_QB:
+            check_dsp(ctx);
+            {
+                target_long result;
+                imm = (ctx->opcode >> 16) & 0xFF;
+                result = (uint32_t)imm << 24 |
+                         (uint32_t)imm << 16 |
+                         (uint32_t)imm << 8  |
+                         (uint32_t)imm;
+                result = (int32_t)result;
+                tcg_gen_movi_tl(cpu_gpr[ret], result);
+            }
+            break;
+        case OPC_REPLV_QB:
+            check_dsp(ctx);
+            tcg_gen_ext8u_tl(cpu_gpr[ret], val_t);
+            tcg_gen_shli_tl(t0, cpu_gpr[ret], 8);
+            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+            tcg_gen_shli_tl(t0, cpu_gpr[ret], 16);
+            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+            tcg_gen_ext32s_tl(cpu_gpr[ret], cpu_gpr[ret]);
+            break;
+        case OPC_REPL_PH:
+            check_dsp(ctx);
+            {
+                imm = (ctx->opcode >> 16) & 0x03FF;
+                tcg_gen_movi_tl(cpu_gpr[ret], \
+                                (target_long)((int32_t)imm << 16 | \
+                                (uint32_t)(uint16_t)imm));
+            }
+            break;
+        case OPC_REPLV_PH:
+            check_dsp(ctx);
+            tcg_gen_ext16u_tl(cpu_gpr[ret], val_t);
+            tcg_gen_shli_tl(t0, cpu_gpr[ret], 16);
+            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+            tcg_gen_ext32s_tl(cpu_gpr[ret], cpu_gpr[ret]);
+            break;
+        }
+        break;
+#ifdef TARGET_MIPS64
+    case OPC_ABSQ_S_QH_DSP:
+        switch (op2) {
+        case OPC_REPL_OB:
+            check_dsp(ctx);
+            {
+                target_long temp;
+
+                imm = (ctx->opcode >> 16) & 0xFF;
+                temp = ((uint64_t)imm << 8) | (uint64_t)imm;
+                temp = (temp << 16) | temp;
+                temp = (temp << 32) | temp;
+                tcg_gen_movi_tl(cpu_gpr[ret], temp);
+                break;
+            }
+        case OPC_REPL_PW:
+            check_dsp(ctx);
+            {
+                target_long temp;
+
+                imm = (ctx->opcode >> 16) & 0x03FF;
+                imm = (int16_t)(imm << 6) >> 6;
+                temp = ((target_long)imm << 32) \
+                       | ((target_long)imm & 0xFFFFFFFF);
+                tcg_gen_movi_tl(cpu_gpr[ret], temp);
+                break;
+            }
+        case OPC_REPL_QH:
+            check_dsp(ctx);
+            {
+                target_long temp;
+
+                imm = (ctx->opcode >> 16) & 0x03FF;
+                imm = (int16_t)(imm << 6) >> 6;
+
+                temp = ((uint64_t)(uint16_t)imm << 48) |
+                       ((uint64_t)(uint16_t)imm << 32) |
+                       ((uint64_t)(uint16_t)imm << 16) |
+                       (uint64_t)(uint16_t)imm;
+                tcg_gen_movi_tl(cpu_gpr[ret], temp);
+                break;
+            }
+        case OPC_REPLV_OB:
+            check_dsp(ctx);
+            tcg_gen_ext8u_tl(cpu_gpr[ret], val_t);
+            tcg_gen_shli_tl(t0, cpu_gpr[ret], 8);
+            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+            tcg_gen_shli_tl(t0, cpu_gpr[ret], 16);
+            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+            tcg_gen_shli_tl(t0, cpu_gpr[ret], 32);
+            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+            break;
+        case OPC_REPLV_PW:
+            check_dsp(ctx);
+            tcg_gen_ext32u_i64(cpu_gpr[ret], val_t);
+            tcg_gen_shli_tl(t0, cpu_gpr[ret], 32);
+            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+            break;
+        case OPC_REPLV_QH:
+            check_dsp(ctx);
+            tcg_gen_ext16u_tl(cpu_gpr[ret], val_t);
+            tcg_gen_shli_tl(t0, cpu_gpr[ret], 16);
+            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+            tcg_gen_shli_tl(t0, cpu_gpr[ret], 32);
+            tcg_gen_or_tl(cpu_gpr[ret], cpu_gpr[ret], t0);
+            break;
+        }
+        break;
+#endif
+    }
+    tcg_temp_free(t0);
+    tcg_temp_free(val_t);
+
+    (void)opn; /* avoid a compiler warning */
+    MIPS_DEBUG("%s", opn);
+}
+
+static void gen_mipsdsp_add_cmp_pick(DisasContext *ctx,
+                                     uint32_t op1, uint32_t op2,
+                                     int ret, int v1, int v2, int check_ret)
+{
+    const char *opn = "mipsdsp add compare pick";
+    TCGv_i32 t0;
+    TCGv t1;
+    TCGv v1_t;
+    TCGv v2_t;
+
+    if ((ret == 0) && (check_ret == 1)) {
+        /* Treat as NOP. */
+        MIPS_DEBUG("NOP");
+        return;
+    }
+
+    t0 = tcg_temp_new_i32();
+    t1 = tcg_temp_new();
+    v1_t = tcg_temp_new();
+    v2_t = tcg_temp_new();
+
+    gen_load_gpr(v1_t, v1);
+    gen_load_gpr(v2_t, v2);
+
+    switch (op1) {
+    case OPC_APPEND_DSP:
+        switch (op2) {
+        case OPC_APPEND:
+            tcg_gen_movi_i32(t0, v2);
+            gen_helper_append(cpu_gpr[ret], cpu_gpr[ret], v1_t, t0);
+            break;
+        case OPC_PREPEND:
+            tcg_gen_movi_i32(t0, v2);
+            gen_helper_prepend(cpu_gpr[ret], v1_t, cpu_gpr[ret], t0);
+            break;
+        case OPC_BALIGN:
+            tcg_gen_movi_i32(t0, v2);
+            gen_helper_balign(cpu_gpr[ret], v1_t, cpu_gpr[ret], t0);
+            break;
+        default:            /* Invid */
+            MIPS_INVAL("MASK APPEND");
+            generate_exception(ctx, EXCP_RI);
+            break;
+        }
+        break;
+    case OPC_CMPU_EQ_QB_DSP:
+        switch (op2) {
+        case OPC_CMPU_EQ_QB:
+            check_dsp(ctx);
+            gen_helper_cmpu_eq_qb(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMPU_LT_QB:
+            check_dsp(ctx);
+            gen_helper_cmpu_lt_qb(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMPU_LE_QB:
+            check_dsp(ctx);
+            gen_helper_cmpu_le_qb(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMPGU_EQ_QB:
+            check_dsp(ctx);
+            gen_helper_cmpgu_eq_qb(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_CMPGU_LT_QB:
+            check_dsp(ctx);
+            gen_helper_cmpgu_lt_qb(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_CMPGU_LE_QB:
+            check_dsp(ctx);
+            gen_helper_cmpgu_le_qb(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_CMPGDU_EQ_QB:
+            check_dspr2(ctx);
+            gen_helper_cmpgu_eq_qb(t1, v1_t, v2_t);
+            tcg_gen_mov_tl(cpu_gpr[ret], t1);
+            tcg_gen_andi_tl(cpu_dspctrl, cpu_dspctrl, 0xF0FFFFFF);
+            tcg_gen_shli_tl(t1, t1, 24);
+            tcg_gen_or_tl(cpu_dspctrl, cpu_dspctrl, t1);
+            break;
+        case OPC_CMPGDU_LT_QB:
+            check_dspr2(ctx);
+            gen_helper_cmpgu_lt_qb(t1, v1_t, v2_t);
+            tcg_gen_mov_tl(cpu_gpr[ret], t1);
+            tcg_gen_andi_tl(cpu_dspctrl, cpu_dspctrl, 0xF0FFFFFF);
+            tcg_gen_shli_tl(t1, t1, 24);
+            tcg_gen_or_tl(cpu_dspctrl, cpu_dspctrl, t1);
+            break;
+        case OPC_CMPGDU_LE_QB:
+            check_dspr2(ctx);
+            gen_helper_cmpgu_le_qb(t1, v1_t, v2_t);
+            tcg_gen_mov_tl(cpu_gpr[ret], t1);
+            tcg_gen_andi_tl(cpu_dspctrl, cpu_dspctrl, 0xF0FFFFFF);
+            tcg_gen_shli_tl(t1, t1, 24);
+            tcg_gen_or_tl(cpu_dspctrl, cpu_dspctrl, t1);
+            break;
+        case OPC_CMP_EQ_PH:
+            check_dsp(ctx);
+            gen_helper_cmp_eq_ph(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMP_LT_PH:
+            check_dsp(ctx);
+            gen_helper_cmp_lt_ph(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMP_LE_PH:
+            check_dsp(ctx);
+            gen_helper_cmp_le_ph(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_PICK_QB:
+            check_dsp(ctx);
+            gen_helper_pick_qb(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_PICK_PH:
+            check_dsp(ctx);
+            gen_helper_pick_ph(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_PACKRL_PH:
+            check_dsp(ctx);
+            gen_helper_packrl_ph(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        }
+        break;
+#ifdef TARGET_MIPS64
+    case OPC_CMPU_EQ_OB_DSP:
+        switch (op2) {
+        case OPC_CMP_EQ_PW:
+            check_dsp(ctx);
+            gen_helper_cmp_eq_pw(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMP_LT_PW:
+            check_dsp(ctx);
+            gen_helper_cmp_lt_pw(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMP_LE_PW:
+            check_dsp(ctx);
+            gen_helper_cmp_le_pw(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMP_EQ_QH:
+            check_dsp(ctx);
+            gen_helper_cmp_eq_qh(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMP_LT_QH:
+            check_dsp(ctx);
+            gen_helper_cmp_lt_qh(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMP_LE_QH:
+            check_dsp(ctx);
+            gen_helper_cmp_le_qh(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMPGDU_EQ_OB:
+            check_dspr2(ctx);
+            gen_helper_cmpgdu_eq_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMPGDU_LT_OB:
+            check_dspr2(ctx);
+            gen_helper_cmpgdu_lt_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMPGDU_LE_OB:
+            check_dspr2(ctx);
+            gen_helper_cmpgdu_le_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMPGU_EQ_OB:
+            check_dsp(ctx);
+            gen_helper_cmpgu_eq_ob(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_CMPGU_LT_OB:
+            check_dsp(ctx);
+            gen_helper_cmpgu_lt_ob(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_CMPGU_LE_OB:
+            check_dsp(ctx);
+            gen_helper_cmpgu_le_ob(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_CMPU_EQ_OB:
+            check_dsp(ctx);
+            gen_helper_cmpu_eq_ob(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMPU_LT_OB:
+            check_dsp(ctx);
+            gen_helper_cmpu_lt_ob(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_CMPU_LE_OB:
+            check_dsp(ctx);
+            gen_helper_cmpu_le_ob(v1_t, v2_t, cpu_env);
+            break;
+        case OPC_PACKRL_PW:
+            check_dsp(ctx);
+            gen_helper_packrl_pw(cpu_gpr[ret], v1_t, v2_t);
+            break;
+        case OPC_PICK_OB:
+            check_dsp(ctx);
+            gen_helper_pick_ob(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_PICK_PW:
+            check_dsp(ctx);
+            gen_helper_pick_pw(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        case OPC_PICK_QH:
+            check_dsp(ctx);
+            gen_helper_pick_qh(cpu_gpr[ret], v1_t, v2_t, cpu_env);
+            break;
+        }
+        break;
+    case OPC_DAPPEND_DSP:
+        switch (op2) {
+        case OPC_DAPPEND:
+            tcg_gen_movi_i32(t0, v2);
+            gen_helper_dappend(cpu_gpr[ret], v1_t, cpu_gpr[ret], t0);
+            break;
+        case OPC_PREPENDD:
+            tcg_gen_movi_i32(t0, v2);
+            gen_helper_prependd(cpu_gpr[ret], v1_t, cpu_gpr[ret], t0);
+            break;
+        case OPC_PREPENDW:
+            tcg_gen_movi_i32(t0, v2);
+            gen_helper_prependw(cpu_gpr[ret], v1_t, cpu_gpr[ret], t0);
+            break;
+        case OPC_DBALIGN:
+            tcg_gen_movi_i32(t0, v2);
+            gen_helper_dbalign(cpu_gpr[ret], v1_t, cpu_gpr[ret], t0);
+            break;
+        default:            /* Invalid */
+            MIPS_INVAL("MASK DAPPEND");
+            generate_exception(ctx, EXCP_RI);
+            break;
+        }
+        break;
+#endif
+    }
+
+    tcg_temp_free_i32(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(v1_t);
+    tcg_temp_free(v2_t);
+
+    (void)opn; /* avoid a compiler warning */
+    MIPS_DEBUG("%s", opn);
+}
+
+static void gen_mipsdsp_accinsn(DisasContext *ctx, uint32_t op1, uint32_t op2,
+                                int ret, int v1, int v2, int check_ret)
+
+{
+    const char *opn = "mipsdsp accumulator";
+    TCGv t0;
+    TCGv t1;
+    TCGv v1_t;
+    TCGv v2_t;
+    int16_t imm;
+
+    if ((ret == 0) && (check_ret == 1)) {
+        /* Treat as NOP. */
+        MIPS_DEBUG("NOP");
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    v1_t = tcg_temp_new();
+    v2_t = tcg_temp_new();
+
+    gen_load_gpr(v1_t, v1);
+    gen_load_gpr(v2_t, v2);
+
+    switch (op1) {
+    case OPC_EXTR_W_DSP:
+        check_dsp(ctx);
+        switch (op2) {
+        case OPC_EXTR_W:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_extr_w(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_EXTR_R_W:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_extr_r_w(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_EXTR_RS_W:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_extr_rs_w(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_EXTR_S_H:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_extr_s_h(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_EXTRV_S_H:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_extr_s_h(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_EXTRV_W:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_extr_w(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_EXTRV_R_W:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_extr_r_w(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_EXTRV_RS_W:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_extr_rs_w(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_EXTP:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_extp(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_EXTPV:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_extp(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_EXTPDP:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_extpdp(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_EXTPDPV:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_extpdp(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_SHILO:
+            imm = (ctx->opcode >> 20) & 0x3F;
+            tcg_gen_movi_tl(t0, ret);
+            tcg_gen_movi_tl(t1, imm);
+            gen_helper_shilo(t0, t1, cpu_env);
+            break;
+        case OPC_SHILOV:
+            tcg_gen_movi_tl(t0, ret);
+            gen_helper_shilo(t0, v1_t, cpu_env);
+            break;
+        case OPC_MTHLIP:
+            tcg_gen_movi_tl(t0, ret);
+            gen_helper_mthlip(t0, v1_t, cpu_env);
+            break;
+        case OPC_WRDSP:
+            imm = (ctx->opcode >> 11) & 0x3FF;
+            tcg_gen_movi_tl(t0, imm);
+            gen_helper_wrdsp(v1_t, t0, cpu_env);
+            break;
+        case OPC_RDDSP:
+            imm = (ctx->opcode >> 16) & 0x03FF;
+            tcg_gen_movi_tl(t0, imm);
+            gen_helper_rddsp(cpu_gpr[ret], t0, cpu_env);
+            break;
+        }
+        break;
+#ifdef TARGET_MIPS64
+    case OPC_DEXTR_W_DSP:
+        check_dsp(ctx);
+        switch (op2) {
+        case OPC_DMTHLIP:
+            tcg_gen_movi_tl(t0, ret);
+            gen_helper_dmthlip(v1_t, t0, cpu_env);
+            break;
+        case OPC_DSHILO:
+            {
+                int shift = (ctx->opcode >> 19) & 0x7F;
+                int ac = (ctx->opcode >> 11) & 0x03;
+                tcg_gen_movi_tl(t0, shift);
+                tcg_gen_movi_tl(t1, ac);
+                gen_helper_dshilo(t0, t1, cpu_env);
+                break;
+            }
+        case OPC_DSHILOV:
+            {
+                int ac = (ctx->opcode >> 11) & 0x03;
+                tcg_gen_movi_tl(t0, ac);
+                gen_helper_dshilo(v1_t, t0, cpu_env);
+                break;
+            }
+        case OPC_DEXTP:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+
+            gen_helper_dextp(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTPV:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_dextp(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_DEXTPDP:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_dextpdp(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTPDPV:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_dextpdp(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_DEXTR_L:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_dextr_l(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTR_R_L:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_dextr_r_l(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTR_RS_L:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_dextr_rs_l(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTR_W:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_dextr_w(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTR_R_W:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_dextr_r_w(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTR_RS_W:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_dextr_rs_w(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTR_S_H:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_dextr_s_h(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTRV_S_H:
+            tcg_gen_movi_tl(t0, v2);
+            tcg_gen_movi_tl(t1, v1);
+            gen_helper_dextr_s_h(cpu_gpr[ret], t0, t1, cpu_env);
+            break;
+        case OPC_DEXTRV_L:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_dextr_l(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_DEXTRV_R_L:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_dextr_r_l(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_DEXTRV_RS_L:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_dextr_rs_l(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_DEXTRV_W:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_dextr_w(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_DEXTRV_R_W:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_dextr_r_w(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        case OPC_DEXTRV_RS_W:
+            tcg_gen_movi_tl(t0, v2);
+            gen_helper_dextr_rs_w(cpu_gpr[ret], t0, v1_t, cpu_env);
+            break;
+        }
+        break;
+#endif
+    }
+
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    tcg_temp_free(v1_t);
+    tcg_temp_free(v2_t);
+
+    (void)opn; /* avoid a compiler warning */
+    MIPS_DEBUG("%s", opn);
+}
+
+/* End MIPSDSP functions. */
+
 static void decode_opc (CPUMIPSState *env, DisasContext *ctx, int *is_branch)
 {
     int32_t offset;
@@ -12456,10 +14651,272 @@ static void decode_opc (CPUMIPSState *env, DisasContext *ctx, int *is_branch)
             }
             break;
         case OPC_DIV_G_2E ... OPC_DIVU_G_2E:
-        case OPC_MULT_G_2E ... OPC_MULTU_G_2E:
         case OPC_MOD_G_2E ... OPC_MODU_G_2E:
-            check_insn(env, ctx, INSN_LOONGSON2E);
-            gen_loongson_integer(ctx, op1, rd, rs, rt);
+        case OPC_MULT_G_2E ... OPC_MULTU_G_2E:
+        /* OPC_MULT_G_2E, OPC_ADDUH_QB_DSP, OPC_MUL_PH_DSP have
+         * the same mask and op1. */
+            if ((env->insn_flags & ASE_DSPR2) && (op1 == OPC_MULT_G_2E)) {
+                op2 = MASK_ADDUH_QB(ctx->opcode);
+                switch (op2) {
+                case OPC_ADDUH_QB:
+                case OPC_ADDUH_R_QB:
+                case OPC_ADDQH_PH:
+                case OPC_ADDQH_R_PH:
+                case OPC_ADDQH_W:
+                case OPC_ADDQH_R_W:
+                case OPC_SUBUH_QB:
+                case OPC_SUBUH_R_QB:
+                case OPC_SUBQH_PH:
+                case OPC_SUBQH_R_PH:
+                case OPC_SUBQH_W:
+                case OPC_SUBQH_R_W:
+                    gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
+                    break;
+                case OPC_MUL_PH:
+                case OPC_MUL_S_PH:
+                case OPC_MULQ_S_W:
+                case OPC_MULQ_RS_W:
+                    gen_mipsdsp_multiply(ctx, op1, op2, rd, rs, rt, 1);
+                    break;
+                default:
+                    MIPS_INVAL("MASK ADDUH.QB");
+                    generate_exception(ctx, EXCP_RI);
+                    break;
+                }
+            } else if (env->insn_flags & INSN_LOONGSON2E) {
+                gen_loongson_integer(ctx, op1, rd, rs, rt);
+            } else {
+                generate_exception(ctx, EXCP_RI);
+            }
+            break;
+        case OPC_LX_DSP:
+            op2 = MASK_LX(ctx->opcode);
+            switch (op2) {
+#if defined(TARGET_MIPS64)
+            case OPC_LDX:
+#endif
+            case OPC_LBUX:
+            case OPC_LHX:
+            case OPC_LWX:
+                gen_mipsdsp_ld(env, ctx, op2, rd, rs, rt);
+                break;
+            default:            /* Invalid */
+                MIPS_INVAL("MASK LX");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
+        case OPC_ABSQ_S_PH_DSP:
+            op2 = MASK_ABSQ_S_PH(ctx->opcode);
+            switch (op2) {
+            case OPC_ABSQ_S_QB:
+            case OPC_ABSQ_S_PH:
+            case OPC_ABSQ_S_W:
+            case OPC_PRECEQ_W_PHL:
+            case OPC_PRECEQ_W_PHR:
+            case OPC_PRECEQU_PH_QBL:
+            case OPC_PRECEQU_PH_QBR:
+            case OPC_PRECEQU_PH_QBLA:
+            case OPC_PRECEQU_PH_QBRA:
+            case OPC_PRECEU_PH_QBL:
+            case OPC_PRECEU_PH_QBR:
+            case OPC_PRECEU_PH_QBLA:
+            case OPC_PRECEU_PH_QBRA:
+                gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
+                break;
+            case OPC_BITREV:
+            case OPC_REPL_QB:
+            case OPC_REPLV_QB:
+            case OPC_REPL_PH:
+            case OPC_REPLV_PH:
+                gen_mipsdsp_bitinsn(env, ctx, op1, op2, rd, rt);
+                break;
+            default:
+                MIPS_INVAL("MASK ABSQ_S.PH");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
+        case OPC_ADDU_QB_DSP:
+            op2 = MASK_ADDU_QB(ctx->opcode);
+            switch (op2) {
+            case OPC_ADDQ_PH:
+            case OPC_ADDQ_S_PH:
+            case OPC_ADDQ_S_W:
+            case OPC_ADDU_QB:
+            case OPC_ADDU_S_QB:
+            case OPC_ADDU_PH:
+            case OPC_ADDU_S_PH:
+            case OPC_SUBQ_PH:
+            case OPC_SUBQ_S_PH:
+            case OPC_SUBQ_S_W:
+            case OPC_SUBU_QB:
+            case OPC_SUBU_S_QB:
+            case OPC_SUBU_PH:
+            case OPC_SUBU_S_PH:
+            case OPC_ADDSC:
+            case OPC_ADDWC:
+            case OPC_MODSUB:
+            case OPC_RADDU_W_QB:
+                gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
+                break;
+            case OPC_MULEU_S_PH_QBL:
+            case OPC_MULEU_S_PH_QBR:
+            case OPC_MULQ_RS_PH:
+            case OPC_MULEQ_S_W_PHL:
+            case OPC_MULEQ_S_W_PHR:
+            case OPC_MULQ_S_PH:
+                gen_mipsdsp_multiply(ctx, op1, op2, rd, rs, rt, 1);
+                break;
+            default:            /* Invalid */
+                MIPS_INVAL("MASK ADDU.QB");
+                generate_exception(ctx, EXCP_RI);
+                break;
+
+            }
+            break;
+        case OPC_CMPU_EQ_QB_DSP:
+            op2 = MASK_CMPU_EQ_QB(ctx->opcode);
+            switch (op2) {
+            case OPC_PRECR_SRA_PH_W:
+            case OPC_PRECR_SRA_R_PH_W:
+                gen_mipsdsp_arith(ctx, op1, op2, rt, rs, rd);
+                break;
+            case OPC_PRECR_QB_PH:
+            case OPC_PRECRQ_QB_PH:
+            case OPC_PRECRQ_PH_W:
+            case OPC_PRECRQ_RS_PH_W:
+            case OPC_PRECRQU_S_QB_PH:
+                gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
+                break;
+            case OPC_CMPU_EQ_QB:
+            case OPC_CMPU_LT_QB:
+            case OPC_CMPU_LE_QB:
+            case OPC_CMP_EQ_PH:
+            case OPC_CMP_LT_PH:
+            case OPC_CMP_LE_PH:
+                gen_mipsdsp_add_cmp_pick(ctx, op1, op2, rd, rs, rt, 0);
+                break;
+            case OPC_CMPGU_EQ_QB:
+            case OPC_CMPGU_LT_QB:
+            case OPC_CMPGU_LE_QB:
+            case OPC_CMPGDU_EQ_QB:
+            case OPC_CMPGDU_LT_QB:
+            case OPC_CMPGDU_LE_QB:
+            case OPC_PICK_QB:
+            case OPC_PICK_PH:
+            case OPC_PACKRL_PH:
+                gen_mipsdsp_add_cmp_pick(ctx, op1, op2, rd, rs, rt, 1);
+                break;
+            default:            /* Invalid */
+                MIPS_INVAL("MASK CMPU.EQ.QB");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
+        case OPC_SHLL_QB_DSP:
+            gen_mipsdsp_shift(ctx, op1, rd, rs, rt);
+            break;
+        case OPC_DPA_W_PH_DSP:
+            op2 = MASK_DPA_W_PH(ctx->opcode);
+            switch (op2) {
+            case OPC_DPAU_H_QBL:
+            case OPC_DPAU_H_QBR:
+            case OPC_DPSU_H_QBL:
+            case OPC_DPSU_H_QBR:
+            case OPC_DPA_W_PH:
+            case OPC_DPAX_W_PH:
+            case OPC_DPAQ_S_W_PH:
+            case OPC_DPAQX_S_W_PH:
+            case OPC_DPAQX_SA_W_PH:
+            case OPC_DPS_W_PH:
+            case OPC_DPSX_W_PH:
+            case OPC_DPSQ_S_W_PH:
+            case OPC_DPSQX_S_W_PH:
+            case OPC_DPSQX_SA_W_PH:
+            case OPC_MULSAQ_S_W_PH:
+            case OPC_DPAQ_SA_L_W:
+            case OPC_DPSQ_SA_L_W:
+            case OPC_MAQ_S_W_PHL:
+            case OPC_MAQ_S_W_PHR:
+            case OPC_MAQ_SA_W_PHL:
+            case OPC_MAQ_SA_W_PHR:
+            case OPC_MULSA_W_PH:
+                gen_mipsdsp_multiply(ctx, op1, op2, rd, rs, rt, 0);
+                break;
+            default:            /* Invalid */
+                MIPS_INVAL("MASK DPAW.PH");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
+        case OPC_INSV_DSP:
+            op2 = MASK_INSV(ctx->opcode);
+            switch (op2) {
+            case OPC_INSV:
+                check_dsp(ctx);
+                {
+                    TCGv t0, t1;
+
+                    if (rt == 0) {
+                        MIPS_DEBUG("NOP");
+                        break;
+                    }
+
+                    t0 = tcg_temp_new();
+                    t1 = tcg_temp_new();
+
+                    gen_load_gpr(t0, rt);
+                    gen_load_gpr(t1, rs);
+
+                    gen_helper_insv(cpu_gpr[rt], cpu_env, t1, t0);
+
+                    tcg_temp_free(t0);
+                    tcg_temp_free(t1);
+                    break;
+                }
+            default:            /* Invalid */
+                MIPS_INVAL("MASK INSV");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
+        case OPC_APPEND_DSP:
+            check_dspr2(ctx);
+            op2 = MASK_APPEND(ctx->opcode);
+            gen_mipsdsp_add_cmp_pick(ctx, op1, op2, rt, rs, rd, 1);
+            break;
+        case OPC_EXTR_W_DSP:
+            op2 = MASK_EXTR_W(ctx->opcode);
+            switch (op2) {
+            case OPC_EXTR_W:
+            case OPC_EXTR_R_W:
+            case OPC_EXTR_RS_W:
+            case OPC_EXTR_S_H:
+            case OPC_EXTRV_S_H:
+            case OPC_EXTRV_W:
+            case OPC_EXTRV_R_W:
+            case OPC_EXTRV_RS_W:
+            case OPC_EXTP:
+            case OPC_EXTPV:
+            case OPC_EXTPDP:
+            case OPC_EXTPDPV:
+                gen_mipsdsp_accinsn(ctx, op1, op2, rt, rs, rd, 1);
+                break;
+            case OPC_RDDSP:
+                gen_mipsdsp_accinsn(ctx, op1, op2, rd, rs, rt, 1);
+                break;
+            case OPC_SHILO:
+            case OPC_SHILOV:
+            case OPC_MTHLIP:
+            case OPC_WRDSP:
+                gen_mipsdsp_accinsn(ctx, op1, op2, rd, rs, rt, 0);
+                break;
+            default:            /* Invalid */
+                MIPS_INVAL("MASK EXTR.W");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
             break;
 #if defined(TARGET_MIPS64)
         case OPC_DEXTM ... OPC_DEXT:
@@ -12480,6 +14937,235 @@ static void decode_opc (CPUMIPSState *env, DisasContext *ctx, int *is_branch)
             check_insn(env, ctx, INSN_LOONGSON2E);
             gen_loongson_integer(ctx, op1, rd, rs, rt);
             break;
+        case OPC_ABSQ_S_QH_DSP:
+            op2 = MASK_ABSQ_S_QH(ctx->opcode);
+            switch (op2) {
+            case OPC_PRECEQ_L_PWL:
+            case OPC_PRECEQ_L_PWR:
+            case OPC_PRECEQ_PW_QHL:
+            case OPC_PRECEQ_PW_QHR:
+            case OPC_PRECEQ_PW_QHLA:
+            case OPC_PRECEQ_PW_QHRA:
+            case OPC_PRECEQU_QH_OBL:
+            case OPC_PRECEQU_QH_OBR:
+            case OPC_PRECEQU_QH_OBLA:
+            case OPC_PRECEQU_QH_OBRA:
+            case OPC_PRECEU_QH_OBL:
+            case OPC_PRECEU_QH_OBR:
+            case OPC_PRECEU_QH_OBLA:
+            case OPC_PRECEU_QH_OBRA:
+            case OPC_ABSQ_S_OB:
+            case OPC_ABSQ_S_PW:
+            case OPC_ABSQ_S_QH:
+                gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
+                break;
+            case OPC_REPL_OB:
+            case OPC_REPL_PW:
+            case OPC_REPL_QH:
+            case OPC_REPLV_OB:
+            case OPC_REPLV_PW:
+            case OPC_REPLV_QH:
+                gen_mipsdsp_bitinsn(env, ctx, op1, op2, rd, rt);
+                break;
+            default:            /* Invalid */
+                MIPS_INVAL("MASK ABSQ_S.QH");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
+        case OPC_ADDU_OB_DSP:
+            op2 = MASK_ADDU_OB(ctx->opcode);
+            switch (op2) {
+            case OPC_RADDU_L_OB:
+            case OPC_SUBQ_PW:
+            case OPC_SUBQ_S_PW:
+            case OPC_SUBQ_QH:
+            case OPC_SUBQ_S_QH:
+            case OPC_SUBU_OB:
+            case OPC_SUBU_S_OB:
+            case OPC_SUBU_QH:
+            case OPC_SUBU_S_QH:
+            case OPC_SUBUH_OB:
+            case OPC_SUBUH_R_OB:
+            case OPC_ADDQ_PW:
+            case OPC_ADDQ_S_PW:
+            case OPC_ADDQ_QH:
+            case OPC_ADDQ_S_QH:
+            case OPC_ADDU_OB:
+            case OPC_ADDU_S_OB:
+            case OPC_ADDU_QH:
+            case OPC_ADDU_S_QH:
+            case OPC_ADDUH_OB:
+            case OPC_ADDUH_R_OB:
+                gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
+                break;
+            case OPC_MULEQ_S_PW_QHL:
+            case OPC_MULEQ_S_PW_QHR:
+            case OPC_MULEU_S_QH_OBL:
+            case OPC_MULEU_S_QH_OBR:
+            case OPC_MULQ_RS_QH:
+                gen_mipsdsp_multiply(ctx, op1, op2, rd, rs, rt, 1);
+                break;
+            default:            /* Invalid */
+                MIPS_INVAL("MASK ADDU.OB");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
+        case OPC_CMPU_EQ_OB_DSP:
+            op2 = MASK_CMPU_EQ_OB(ctx->opcode);
+            switch (op2) {
+            case OPC_PRECR_SRA_QH_PW:
+            case OPC_PRECR_SRA_R_QH_PW:
+                /* Return value is rt. */
+                gen_mipsdsp_arith(ctx, op1, op2, rt, rs, rd);
+                break;
+            case OPC_PRECR_OB_QH:
+            case OPC_PRECRQ_OB_QH:
+            case OPC_PRECRQ_PW_L:
+            case OPC_PRECRQ_QH_PW:
+            case OPC_PRECRQ_RS_QH_PW:
+            case OPC_PRECRQU_S_OB_QH:
+                gen_mipsdsp_arith(ctx, op1, op2, rd, rs, rt);
+                break;
+            case OPC_CMPU_EQ_OB:
+            case OPC_CMPU_LT_OB:
+            case OPC_CMPU_LE_OB:
+            case OPC_CMP_EQ_QH:
+            case OPC_CMP_LT_QH:
+            case OPC_CMP_LE_QH:
+            case OPC_CMP_EQ_PW:
+            case OPC_CMP_LT_PW:
+            case OPC_CMP_LE_PW:
+                gen_mipsdsp_add_cmp_pick(ctx, op1, op2, rd, rs, rt, 0);
+                break;
+            case OPC_CMPGDU_EQ_OB:
+            case OPC_CMPGDU_LT_OB:
+            case OPC_CMPGDU_LE_OB:
+            case OPC_CMPGU_EQ_OB:
+            case OPC_CMPGU_LT_OB:
+            case OPC_CMPGU_LE_OB:
+            case OPC_PACKRL_PW:
+            case OPC_PICK_OB:
+            case OPC_PICK_PW:
+            case OPC_PICK_QH:
+                gen_mipsdsp_add_cmp_pick(ctx, op1, op2, rd, rs, rt, 1);
+                break;
+            default:            /* Invalid */
+                MIPS_INVAL("MASK CMPU_EQ.OB");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
+        case OPC_DAPPEND_DSP:
+            check_dspr2(ctx);
+            op2 = MASK_DAPPEND(ctx->opcode);
+            gen_mipsdsp_add_cmp_pick(ctx, op1, op2, rt, rs, rd, 1);
+            break;
+        case OPC_DEXTR_W_DSP:
+            op2 = MASK_DEXTR_W(ctx->opcode);
+            switch (op2) {
+            case OPC_DEXTP:
+            case OPC_DEXTPDP:
+            case OPC_DEXTPDPV:
+            case OPC_DEXTPV:
+            case OPC_DEXTR_L:
+            case OPC_DEXTR_R_L:
+            case OPC_DEXTR_RS_L:
+            case OPC_DEXTR_W:
+            case OPC_DEXTR_R_W:
+            case OPC_DEXTR_RS_W:
+            case OPC_DEXTR_S_H:
+            case OPC_DEXTRV_L:
+            case OPC_DEXTRV_R_L:
+            case OPC_DEXTRV_RS_L:
+            case OPC_DEXTRV_S_H:
+            case OPC_DEXTRV_W:
+            case OPC_DEXTRV_R_W:
+            case OPC_DEXTRV_RS_W:
+                gen_mipsdsp_accinsn(ctx, op1, op2, rt, rs, rd, 1);
+                break;
+            case OPC_DMTHLIP:
+            case OPC_DSHILO:
+            case OPC_DSHILOV:
+                gen_mipsdsp_accinsn(ctx, op1, op2, rd, rs, rt, 0);
+                break;
+            default:            /* Invalid */
+                MIPS_INVAL("MASK EXTR.W");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
+        case OPC_DPAQ_W_QH_DSP:
+            op2 = MASK_DPAQ_W_QH(ctx->opcode);
+            switch (op2) {
+            case OPC_DPAU_H_OBL:
+            case OPC_DPAU_H_OBR:
+            case OPC_DPSU_H_OBL:
+            case OPC_DPSU_H_OBR:
+            case OPC_DPA_W_QH:
+            case OPC_DPAQ_S_W_QH:
+            case OPC_DPS_W_QH:
+            case OPC_DPSQ_S_W_QH:
+            case OPC_MULSAQ_S_W_QH:
+            case OPC_DPAQ_SA_L_PW:
+            case OPC_DPSQ_SA_L_PW:
+            case OPC_MULSAQ_S_L_PW:
+                gen_mipsdsp_multiply(ctx, op1, op2, rd, rs, rt, 0);
+                break;
+            case OPC_MAQ_S_W_QHLL:
+            case OPC_MAQ_S_W_QHLR:
+            case OPC_MAQ_S_W_QHRL:
+            case OPC_MAQ_S_W_QHRR:
+            case OPC_MAQ_SA_W_QHLL:
+            case OPC_MAQ_SA_W_QHLR:
+            case OPC_MAQ_SA_W_QHRL:
+            case OPC_MAQ_SA_W_QHRR:
+            case OPC_MAQ_S_L_PWL:
+            case OPC_MAQ_S_L_PWR:
+            case OPC_DMADD:
+            case OPC_DMADDU:
+            case OPC_DMSUB:
+            case OPC_DMSUBU:
+                gen_mipsdsp_multiply(ctx, op1, op2, rd, rs, rt, 0);
+                break;
+            default:            /* Invalid */
+                MIPS_INVAL("MASK DPAQ.W.QH");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
+        case OPC_DINSV_DSP:
+            op2 = MASK_INSV(ctx->opcode);
+            switch (op2) {
+            case OPC_DINSV:
+                {
+                    TCGv t0, t1;
+
+                    if (rt == 0) {
+                        MIPS_DEBUG("NOP");
+                        break;
+                    }
+                    check_dsp(ctx);
+
+                    t0 = tcg_temp_new();
+                    t1 = tcg_temp_new();
+
+                    gen_load_gpr(t0, rt);
+                    gen_load_gpr(t1, rs);
+
+                    gen_helper_dinsv(cpu_gpr[rt], cpu_env, t1, t0);
+                    break;
+                }
+            default:            /* Invalid */
+                MIPS_INVAL("MASK DINSV");
+                generate_exception(ctx, EXCP_RI);
+                break;
+            }
+            break;
+        case OPC_SHLL_OB_DSP:
+            gen_mipsdsp_shift(ctx, op1, rd, rs, rt);
+            break;
 #endif
         default:            /* Invalid */
             MIPS_INVAL("special3");
@@ -12503,6 +15189,14 @@ static void decode_opc (CPUMIPSState *env, DisasContext *ctx, int *is_branch)
             check_insn(env, ctx, ISA_MIPS32R2);
             /* Treat as NOP. */
             break;
+        case OPC_BPOSGE32:    /* MIPS DSP branch */
+#if defined(TARGET_MIPS64)
+        case OPC_BPOSGE64:
+#endif
+            check_dsp(ctx);
+            gen_compute_branch(ctx, op1, 4, -1, -2, (int32_t)imm << 2);
+            *is_branch = 1;
+            break;
         default:            /* Invalid */
             MIPS_INVAL("regimm");
             generate_exception(ctx, EXCP_RI);
@@ -12859,6 +15553,7 @@ gen_intermediate_code_internal (CPUMIPSState *env, TranslationBlock *tb,
             }
             gen_opc_pc[lj] = ctx.pc;
             gen_opc_hflags[lj] = ctx.hflags & MIPS_HFLAG_BMASK;
+            gen_opc_btarget[lj] = ctx.btarget;
             gen_opc_instr_start[lj] = 1;
             gen_opc_icount[lj] = num_insns;
         }
@@ -13209,6 +15904,11 @@ void cpu_state_reset(CPUMIPSState *env)
     if (env->CP0_Config1 & (1 << CP0C1_FP)) {
         env->CP0_Status |= (1 << CP0St_CU1);
     }
+    if (env->cpu_model->insn_flags & ASE_DSPR2) {
+        env->hflags |= MIPS_HFLAG_DSP | MIPS_HFLAG_DSPR2;
+    } else if (env->cpu_model->insn_flags & ASE_DSP) {
+        env->hflags |= MIPS_HFLAG_DSP;
+    }
 #else
     if (env->hflags & MIPS_HFLAG_BMASK) {
         /* If the exception was raised from a delay slot,
@@ -13274,4 +15974,13 @@ void restore_state_to_opc(CPUMIPSState *env, TranslationBlock *tb, int pc_pos)
     env->active_tc.PC = gen_opc_pc[pc_pos];
     env->hflags &= ~MIPS_HFLAG_BMASK;
     env->hflags |= gen_opc_hflags[pc_pos];
+    switch (env->hflags & MIPS_HFLAG_BMASK_BASE) {
+    case MIPS_HFLAG_BR:
+        break;
+    case MIPS_HFLAG_BC:
+    case MIPS_HFLAG_BL:
+    case MIPS_HFLAG_B:
+        env->btarget = gen_opc_btarget[pc_pos];
+        break;
+    }
 }
diff --git a/target-mips/translate_init.c b/target-mips/translate_init.c
index c39138f3c5..7cf238f34b 100644
--- a/target-mips/translate_init.c
+++ b/target-mips/translate_init.c
@@ -311,6 +311,29 @@ static const mips_def_t mips_defs[] =
         .insn_flags = CPU_MIPS32R2 | ASE_MIPS16 | ASE_DSP | ASE_MT,
         .mmu_type = MMU_TYPE_R4000,
     },
+    {
+        .name = "74Kf",
+        .CP0_PRid = 0x00019700,
+        .CP0_Config0 = MIPS_CONFIG0 | (0x1 << CP0C0_AR) |
+                    (MMU_TYPE_R4000 << CP0C0_MT),
+        .CP0_Config1 = MIPS_CONFIG1 | (1 << CP0C1_FP) | (15 << CP0C1_MMU) |
+                       (0 << CP0C1_IS) | (3 << CP0C1_IL) | (1 << CP0C1_IA) |
+                       (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA) |
+                       (1 << CP0C1_CA),
+        .CP0_Config2 = MIPS_CONFIG2,
+        .CP0_Config3 = MIPS_CONFIG3 | (0 << CP0C3_VInt) | (1 << CP0C3_DSPP),
+        .CP0_LLAddr_rw_bitmask = 0,
+        .CP0_LLAddr_shift = 4,
+        .SYNCI_Step = 32,
+        .CCRes = 2,
+        .CP0_Status_rw_bitmask = 0x3778FF1F,
+        .CP1_fcr0 = (1 << FCR0_F64) | (1 << FCR0_L) | (1 << FCR0_W) |
+                    (1 << FCR0_D) | (1 << FCR0_S) | (0x93 << FCR0_PRID),
+        .SEGBITS = 32,
+        .PABITS = 32,
+        .insn_flags = CPU_MIPS32R2 | ASE_MIPS16 | ASE_DSP | ASE_DSPR2,
+        .mmu_type = MMU_TYPE_R4000,
+    },
 #if defined(TARGET_MIPS64)
     {
         .name = "R4000",
@@ -484,6 +507,35 @@ static const mips_def_t mips_defs[] =
       .insn_flags = CPU_LOONGSON2F,
       .mmu_type = MMU_TYPE_R4000,
     },
+    {
+        /* A generic CPU providing MIPS64 ASE DSP 2 features.
+           FIXME: Eventually this should be replaced by a real CPU model. */
+        .name = "mips64dspr2",
+        .CP0_PRid = 0x00010000,
+        .CP0_Config0 = MIPS_CONFIG0 | (0x1 << CP0C0_AR) | (0x2 << CP0C0_AT) |
+                       (MMU_TYPE_R4000 << CP0C0_MT),
+        .CP0_Config1 = MIPS_CONFIG1 | (1 << CP0C1_FP) | (63 << CP0C1_MMU) |
+                       (2 << CP0C1_IS) | (4 << CP0C1_IL) | (3 << CP0C1_IA) |
+                       (2 << CP0C1_DS) | (4 << CP0C1_DL) | (3 << CP0C1_DA) |
+                       (1 << CP0C1_PC) | (1 << CP0C1_WR) | (1 << CP0C1_EP),
+        .CP0_Config2 = MIPS_CONFIG2,
+        .CP0_Config3 = MIPS_CONFIG3 | (1 << CP0C3_LPA),
+        .CP0_LLAddr_rw_bitmask = 0,
+        .CP0_LLAddr_shift = 0,
+        .SYNCI_Step = 32,
+        .CCRes = 2,
+        .CP0_Status_rw_bitmask = 0x37FBFFFF,
+        .CP1_fcr0 = (1 << FCR0_F64) | (1 << FCR0_3D) | (1 << FCR0_PS) |
+                    (1 << FCR0_L) | (1 << FCR0_W) | (1 << FCR0_D) |
+                    (1 << FCR0_S) | (0x00 << FCR0_PRID) | (0x0 << FCR0_REV),
+        .SEGBITS = 42,
+        /* The architectural limit is 59, but we have hardcoded 36 bit
+           in some places...
+        .PABITS = 59, */ /* the architectural limit */
+        .PABITS = 36,
+        .insn_flags = CPU_MIPS64R2 | ASE_DSP | ASE_DSPR2,
+        .mmu_type = MMU_TYPE_R4000,
+    },
 
 #endif
 };