summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rwxr-xr-xconfigure6
-rw-r--r--default-configs/or1k-linux-user.mak1
-rw-r--r--default-configs/or1k-softmmu.mak4
-rw-r--r--default-configs/or32-linux-user.mak1
-rw-r--r--default-configs/or32-softmmu.mak4
-rw-r--r--hw/openrisc/openrisc_sim.c4
-rw-r--r--linux-user/elfload.c3
-rw-r--r--linux-user/main.c98
-rw-r--r--linux-user/openrisc/target_cpu.h4
-rw-r--r--linux-user/openrisc/target_syscall.h2
-rw-r--r--target/openrisc/Makefile.objs2
-rw-r--r--target/openrisc/cpu.c1
-rw-r--r--target/openrisc/cpu.h50
-rw-r--r--target/openrisc/exception_helper.c32
-rw-r--r--target/openrisc/fpu_helper.c68
-rw-r--r--target/openrisc/gdbstub.c17
-rw-r--r--target/openrisc/helper.h33
-rw-r--r--target/openrisc/int_helper.c61
-rw-r--r--target/openrisc/interrupt.c14
-rw-r--r--target/openrisc/interrupt_helper.c4
-rw-r--r--target/openrisc/machine.c62
-rw-r--r--target/openrisc/mmu.c1
-rw-r--r--target/openrisc/sys_helper.c62
-rw-r--r--target/openrisc/translate.c1389
-rw-r--r--tests/tcg/openrisc/Makefile4
25 files changed, 915 insertions, 1012 deletions
diff --git a/configure b/configure
index 63253398a2..1c9655e639 100755
--- a/configure
+++ b/configure
@@ -5843,7 +5843,7 @@ target_name=$(echo $target | cut -d '-' -f 1)
 target_bigendian="no"
 
 case "$target_name" in
-  armeb|hppa|lm32|m68k|microblaze|mips|mipsn32|mips64|moxie|or32|ppc|ppcemb|ppc64|ppc64abi32|s390x|sh4eb|sparc|sparc64|sparc32plus|xtensaeb)
+  armeb|hppa|lm32|m68k|microblaze|mips|mipsn32|mips64|moxie|or1k|ppc|ppcemb|ppc64|ppc64abi32|s390x|sh4eb|sparc|sparc64|sparc32plus|xtensaeb)
   target_bigendian=yes
   ;;
 esac
@@ -5937,7 +5937,7 @@ case "$target_name" in
   ;;
   nios2)
   ;;
-  or32)
+  or1k)
     TARGET_ARCH=openrisc
     TARGET_BASE_ARCH=openrisc
   ;;
@@ -6145,7 +6145,7 @@ for i in $ARCH $TARGET_BASE_ARCH ; do
   nios2)
     disas_config "NIOS2"
   ;;
-  or32)
+  or1k)
     disas_config "OPENRISC"
   ;;
   ppc*)
diff --git a/default-configs/or1k-linux-user.mak b/default-configs/or1k-linux-user.mak
new file mode 100644
index 0000000000..20e03c1317
--- /dev/null
+++ b/default-configs/or1k-linux-user.mak
@@ -0,0 +1 @@
+# Default configuration for or1k-linux-user
diff --git a/default-configs/or1k-softmmu.mak b/default-configs/or1k-softmmu.mak
new file mode 100644
index 0000000000..10bfa7abb8
--- /dev/null
+++ b/default-configs/or1k-softmmu.mak
@@ -0,0 +1,4 @@
+# Default configuration for or1k-softmmu
+
+CONFIG_SERIAL=y
+CONFIG_OPENCORES_ETH=y
diff --git a/default-configs/or32-linux-user.mak b/default-configs/or32-linux-user.mak
deleted file mode 100644
index 808c1f9b83..0000000000
--- a/default-configs/or32-linux-user.mak
+++ /dev/null
@@ -1 +0,0 @@
-# Default configuration for or32-linux-user
diff --git a/default-configs/or32-softmmu.mak b/default-configs/or32-softmmu.mak
deleted file mode 100644
index cce474672a..0000000000
--- a/default-configs/or32-softmmu.mak
+++ /dev/null
@@ -1,4 +0,0 @@
-# Default configuration for or32-softmmu
-
-CONFIG_SERIAL=y
-CONFIG_OPENCORES_ETH=y
diff --git a/hw/openrisc/openrisc_sim.c b/hw/openrisc/openrisc_sim.c
index 6d06d5be01..fc0d0967b7 100644
--- a/hw/openrisc/openrisc_sim.c
+++ b/hw/openrisc/openrisc_sim.c
@@ -139,10 +139,10 @@ static void openrisc_sim_init(MachineState *machine)
 
 static void openrisc_sim_machine_init(MachineClass *mc)
 {
-    mc->desc = "or32 simulation";
+    mc->desc = "or1k simulation";
     mc->init = openrisc_sim_init;
     mc->max_cpus = 1;
     mc->is_default = 1;
 }
 
-DEFINE_MACHINE("or32-sim", openrisc_sim_machine_init)
+DEFINE_MACHINE("or1k-sim", openrisc_sim_machine_init)
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index c66cbbe84b..8271227339 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -1054,9 +1054,8 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs,
     for (i = 0; i < 32; i++) {
         (*regs)[i] = tswapreg(env->gpr[i]);
     }
-
     (*regs)[32] = tswapreg(env->pc);
-    (*regs)[33] = tswapreg(env->sr);
+    (*regs)[33] = tswapreg(cpu_get_sr(env));
 }
 #define ELF_HWCAP 0
 #define ELF_PLATFORM NULL
diff --git a/linux-user/main.c b/linux-user/main.c
index e588f58f2a..4fd49ce6b6 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -2574,52 +2574,17 @@ kuser_fail:
 void cpu_loop(CPUOpenRISCState *env)
 {
     CPUState *cs = CPU(openrisc_env_get_cpu(env));
-    int trapnr, gdbsig;
+    int trapnr;
     abi_long ret;
+    target_siginfo_t info;
 
     for (;;) {
         cpu_exec_start(cs);
         trapnr = cpu_exec(cs);
         cpu_exec_end(cs);
         process_queued_cpu_work(cs);
-        gdbsig = 0;
 
         switch (trapnr) {
-        case EXCP_RESET:
-            qemu_log_mask(CPU_LOG_INT, "\nReset request, exit, pc is %#x\n", env->pc);
-            exit(EXIT_FAILURE);
-            break;
-        case EXCP_BUSERR:
-            qemu_log_mask(CPU_LOG_INT, "\nBus error, exit, pc is %#x\n", env->pc);
-            gdbsig = TARGET_SIGBUS;
-            break;
-        case EXCP_DPF:
-        case EXCP_IPF:
-            cpu_dump_state(cs, stderr, fprintf, 0);
-            gdbsig = TARGET_SIGSEGV;
-            break;
-        case EXCP_TICK:
-            qemu_log_mask(CPU_LOG_INT, "\nTick time interrupt pc is %#x\n", env->pc);
-            break;
-        case EXCP_ALIGN:
-            qemu_log_mask(CPU_LOG_INT, "\nAlignment pc is %#x\n", env->pc);
-            gdbsig = TARGET_SIGBUS;
-            break;
-        case EXCP_ILLEGAL:
-            qemu_log_mask(CPU_LOG_INT, "\nIllegal instructionpc is %#x\n", env->pc);
-            gdbsig = TARGET_SIGILL;
-            break;
-        case EXCP_INT:
-            qemu_log_mask(CPU_LOG_INT, "\nExternal interruptpc is %#x\n", env->pc);
-            break;
-        case EXCP_DTLBMISS:
-        case EXCP_ITLBMISS:
-            qemu_log_mask(CPU_LOG_INT, "\nTLB miss\n");
-            break;
-        case EXCP_RANGE:
-            qemu_log_mask(CPU_LOG_INT, "\nRange\n");
-            gdbsig = TARGET_SIGSEGV;
-            break;
         case EXCP_SYSCALL:
             env->pc += 4;   /* 0xc00; */
             ret = do_syscall(env,
@@ -2636,32 +2601,54 @@ void cpu_loop(CPUOpenRISCState *env)
                 env->gpr[11] = ret;
             }
             break;
+        case EXCP_DPF:
+        case EXCP_IPF:
+        case EXCP_RANGE:
+            info.si_signo = TARGET_SIGSEGV;
+            info.si_errno = 0;
+            info.si_code = TARGET_SEGV_MAPERR;
+            info._sifields._sigfault._addr = env->pc;
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
+            break;
+        case EXCP_ALIGN:
+            info.si_signo = TARGET_SIGBUS;
+            info.si_errno = 0;
+            info.si_code = TARGET_BUS_ADRALN;
+            info._sifields._sigfault._addr = env->pc;
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
+            break;
+        case EXCP_ILLEGAL:
+            info.si_signo = TARGET_SIGILL;
+            info.si_errno = 0;
+            info.si_code = TARGET_ILL_ILLOPC;
+            info._sifields._sigfault._addr = env->pc;
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
+            break;
         case EXCP_FPE:
-            qemu_log_mask(CPU_LOG_INT, "\nFloating point error\n");
+            info.si_signo = TARGET_SIGFPE;
+            info.si_errno = 0;
+            info.si_code = 0;
+            info._sifields._sigfault._addr = env->pc;
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             break;
-        case EXCP_TRAP:
-            qemu_log_mask(CPU_LOG_INT, "\nTrap\n");
-            gdbsig = TARGET_SIGTRAP;
+        case EXCP_INTERRUPT:
+            /* We processed the pending cpu work above.  */
             break;
-        case EXCP_NR:
-            qemu_log_mask(CPU_LOG_INT, "\nNR\n");
+        case EXCP_DEBUG:
+            trapnr = gdb_handlesig(cs, TARGET_SIGTRAP);
+            if (trapnr) {
+                info.si_signo = trapnr;
+                info.si_errno = 0;
+                info.si_code = TARGET_TRAP_BRKPT;
+                queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
+            }
             break;
         case EXCP_ATOMIC:
             cpu_exec_step_atomic(cs);
             break;
         default:
-            EXCP_DUMP(env, "\nqemu: unhandled CPU exception %#x - aborting\n",
-                     trapnr);
-            gdbsig = TARGET_SIGILL;
-            break;
-        }
-        if (gdbsig) {
-            gdb_handlesig(cs, gdbsig);
-            if (gdbsig != TARGET_SIGTRAP) {
-                exit(EXIT_FAILURE);
-            }
+            g_assert_not_reached();
         }
-
         process_pending_signals(env);
     }
 }
@@ -4778,9 +4765,8 @@ int main(int argc, char **argv, char **envp)
         for (i = 0; i < 32; i++) {
             env->gpr[i] = regs->gpr[i];
         }
-
-        env->sr = regs->sr;
         env->pc = regs->pc;
+        cpu_set_sr(env, regs->sr);
     }
 #elif defined(TARGET_SH4)
     {
diff --git a/linux-user/openrisc/target_cpu.h b/linux-user/openrisc/target_cpu.h
index a21ed1aff8..f283d96a93 100644
--- a/linux-user/openrisc/target_cpu.h
+++ b/linux-user/openrisc/target_cpu.h
@@ -30,9 +30,7 @@ static inline void cpu_clone_regs(CPUOpenRISCState *env, target_ulong newsp)
 
 static inline void cpu_set_tls(CPUOpenRISCState *env, target_ulong newtls)
 {
-    /* Linux kernel 3.10 does not pay any attention to CLONE_SETTLS
-     * in copy_thread(), so QEMU need not do so either.
-     */
+    env->gpr[10] = newtls;
 }
 
 #endif
diff --git a/linux-user/openrisc/target_syscall.h b/linux-user/openrisc/target_syscall.h
index 9d3380f9a8..03104f80af 100644
--- a/linux-user/openrisc/target_syscall.h
+++ b/linux-user/openrisc/target_syscall.h
@@ -31,4 +31,6 @@ struct target_pt_regs {
 #define TARGET_MLOCKALL_MCL_CURRENT 1
 #define TARGET_MLOCKALL_MCL_FUTURE  2
 
+#define MMAP_SHIFT TARGET_PAGE_BITS
+
 #endif /* OPENRISC_TARGET_SYSCALL_H */
diff --git a/target/openrisc/Makefile.objs b/target/openrisc/Makefile.objs
index 397d01650e..918b1c6e9c 100644
--- a/target/openrisc/Makefile.objs
+++ b/target/openrisc/Makefile.objs
@@ -1,5 +1,5 @@
 obj-$(CONFIG_SOFTMMU) += machine.o
 obj-y += cpu.o exception.o interrupt.o mmu.o translate.o
-obj-y += exception_helper.o fpu_helper.o int_helper.o \
+obj-y += exception_helper.o fpu_helper.o \
          interrupt_helper.o mmu_helper.o sys_helper.o
 obj-y += gdbstub.o
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
index 422139d29f..7fd2b9a216 100644
--- a/target/openrisc/cpu.c
+++ b/target/openrisc/cpu.c
@@ -48,6 +48,7 @@ static void openrisc_cpu_reset(CPUState *s)
 
     cpu->env.pc = 0x100;
     cpu->env.sr = SR_FO | SR_SM;
+    cpu->env.lock_addr = -1;
     s->exception_index = -1;
 
     cpu->env.upr = UPR_UP | UPR_DMP | UPR_IMP | UPR_PICP | UPR_TTP;
diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h
index 508ef568b4..418a0e6960 100644
--- a/target/openrisc/cpu.h
+++ b/target/openrisc/cpu.h
@@ -32,7 +32,7 @@ struct OpenRISCCPU;
 #include "fpu/softfloat.h"
 #include "qom/cpu.h"
 
-#define TYPE_OPENRISC_CPU "or32-cpu"
+#define TYPE_OPENRISC_CPU "or1k-cpu"
 
 #define OPENRISC_CPU_CLASS(klass) \
     OBJECT_CLASS_CHECK(OpenRISCCPUClass, (klass), TYPE_OPENRISC_CPU)
@@ -58,6 +58,7 @@ typedef struct OpenRISCCPUClass {
 } OpenRISCCPUClass;
 
 #define NB_MMU_MODES    3
+#define TARGET_INSN_START_EXTRA_WORDS 1
 
 enum {
     MMU_NOMMU_IDX = 0,
@@ -82,9 +83,6 @@ enum {
 /* Version Register */
 #define SPR_VR 0xFFFF003F
 
-/* Internal flags, delay slot flag */
-#define D_FLAG    1
-
 /* Interrupt */
 #define NR_IRQS  32
 
@@ -273,20 +271,18 @@ typedef struct CPUOpenRISCTLBContext {
 typedef struct CPUOpenRISCState {
     target_ulong gpr[32];     /* General registers */
     target_ulong pc;          /* Program counter */
-    target_ulong npc;         /* Next PC */
     target_ulong ppc;         /* Prev PC */
     target_ulong jmp_pc;      /* Jump PC */
 
-    target_ulong machi;       /* Multiply register MACHI */
-    target_ulong maclo;       /* Multiply register MACLO */
-
-    target_ulong fpmaddhi;    /* Multiply and add float register FPMADDHI */
-    target_ulong fpmaddlo;    /* Multiply and add float register FPMADDLO */
+    uint64_t mac;             /* Multiply registers MACHI:MACLO */
 
     target_ulong epcr;        /* Exception PC register */
     target_ulong eear;        /* Exception EA register */
 
-    uint32_t sr;              /* Supervisor register */
+    target_ulong sr_f;        /* the SR_F bit, values 0, 1.  */
+    target_ulong sr_cy;       /* the SR_CY bit, values 0, 1.  */
+    target_long  sr_ov;       /* the SR_OV bit (in the sign bit only) */
+    uint32_t sr;              /* Supervisor register, without SR_{F,CY,OV} */
     uint32_t vr;              /* Version register */
     uint32_t upr;             /* Unit presence register */
     uint32_t cpucfgr;         /* CPU configure register */
@@ -296,9 +292,10 @@ typedef struct CPUOpenRISCState {
     uint32_t fpcsr;           /* Float register */
     float_status fp_status;
 
-    uint32_t flags;           /* cpu_flags, we only use it for exception
-                                 in solt so far.  */
-    uint32_t btaken;          /* the SR_F bit */
+    target_ulong lock_addr;
+    target_ulong lock_value;
+
+    uint32_t dflag;           /* In delay slot (boolean) */
 
     /* Fields up to this point are cleared by a CPU reset */
     struct {} end_reset_fields;
@@ -391,14 +388,19 @@ int cpu_openrisc_get_phys_data(OpenRISCCPU *cpu,
 
 #include "exec/cpu-all.h"
 
+#define TB_FLAGS_DFLAG 1
+#define TB_FLAGS_R0_0  2
+#define TB_FLAGS_OVE   SR_OVE
+
 static inline void cpu_get_tb_cpu_state(CPUOpenRISCState *env,
                                         target_ulong *pc,
                                         target_ulong *cs_base, uint32_t *flags)
 {
     *pc = env->pc;
     *cs_base = 0;
-    /* D_FLAG -- branch instruction exception */
-    *flags = (env->flags & D_FLAG);
+    *flags = (env->dflag
+              | (env->gpr[0] == 0 ? TB_FLAGS_R0_0 : 0)
+              | (env->sr & SR_OVE));
 }
 
 static inline int cpu_mmu_index(CPUOpenRISCState *env, bool ifetch)
@@ -409,6 +411,22 @@ static inline int cpu_mmu_index(CPUOpenRISCState *env, bool ifetch)
     return (env->sr & SR_SM) == 0 ? MMU_USER_IDX : MMU_SUPERVISOR_IDX;
 }
 
+static inline uint32_t cpu_get_sr(const CPUOpenRISCState *env)
+{
+    return (env->sr
+            + env->sr_f * SR_F
+            + env->sr_cy * SR_CY
+            + (env->sr_ov < 0) * SR_OV);
+}
+
+static inline void cpu_set_sr(CPUOpenRISCState *env, uint32_t val)
+{
+    env->sr_f = (val & SR_F) != 0;
+    env->sr_cy = (val & SR_CY) != 0;
+    env->sr_ov = (val & SR_OV ? -1 : 0);
+    env->sr = (val & ~(SR_F | SR_CY | SR_OV)) | SR_FO;
+}
+
 #define CPU_INTERRUPT_TIMER   CPU_INTERRUPT_TGT_INT_0
 
 #endif /* OPENRISC_CPU_H */
diff --git a/target/openrisc/exception_helper.c b/target/openrisc/exception_helper.c
index 329a9e400b..a8a5f69b05 100644
--- a/target/openrisc/exception_helper.c
+++ b/target/openrisc/exception_helper.c
@@ -19,7 +19,9 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
+#include "exec/exec-all.h"
 #include "exec/helper-proto.h"
+#include "exec/exec-all.h"
 #include "exception.h"
 
 void HELPER(exception)(CPUOpenRISCState *env, uint32_t excp)
@@ -28,3 +30,33 @@ void HELPER(exception)(CPUOpenRISCState *env, uint32_t excp)
 
     raise_exception(cpu, excp);
 }
+
+static void QEMU_NORETURN do_range(CPUOpenRISCState *env, uintptr_t pc)
+{
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
+    CPUState *cs = CPU(cpu);
+
+    cs->exception_index = EXCP_RANGE;
+    cpu_loop_exit_restore(cs, pc);
+}
+
+void HELPER(ove_cy)(CPUOpenRISCState *env)
+{
+    if (env->sr_cy) {
+        do_range(env, GETPC());
+    }
+}
+
+void HELPER(ove_ov)(CPUOpenRISCState *env)
+{
+    if (env->sr_ov < 0) {
+        do_range(env, GETPC());
+    }
+}
+
+void HELPER(ove_cyov)(CPUOpenRISCState *env)
+{
+    if (env->sr_cy || env->sr_ov < 0) {
+        do_range(env, GETPC());
+    }
+}
diff --git a/target/openrisc/fpu_helper.c b/target/openrisc/fpu_helper.c
index c54404b80d..1375cea948 100644
--- a/target/openrisc/fpu_helper.c
+++ b/target/openrisc/fpu_helper.c
@@ -146,52 +146,32 @@ FLOAT_CALC(div)
 FLOAT_CALC(rem)
 #undef FLOAT_CALC
 
-#define FLOAT_TERNOP(name1, name2)                                        \
-uint64_t helper_float_ ## name1 ## name2 ## _d(CPUOpenRISCState *env,     \
-                                               uint64_t fdt0,             \
-                                               uint64_t fdt1)             \
-{                                                                         \
-    uint64_t result, temp, hi, lo;                                        \
-    uint32_t val1, val2;                                                  \
-    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);                         \
-    hi = env->fpmaddhi;                                                   \
-    lo = env->fpmaddlo;                                                   \
-    set_float_exception_flags(0, &cpu->env.fp_status);                    \
-    result = float64_ ## name1(fdt0, fdt1, &cpu->env.fp_status);          \
-    lo &= 0xffffffff;                                                     \
-    hi &= 0xffffffff;                                                     \
-    temp = (hi << 32) | lo;                                               \
-    result = float64_ ## name2(result, temp, &cpu->env.fp_status);        \
-    val1 = result >> 32;                                                  \
-    val2 = (uint32_t) (result & 0xffffffff);                              \
-    update_fpcsr(cpu);                                                    \
-    cpu->env.fpmaddlo = val2;                                             \
-    cpu->env.fpmaddhi = val1;                                             \
-    return 0;                                                             \
-}                                                                         \
-                                                                          \
-uint32_t helper_float_ ## name1 ## name2 ## _s(CPUOpenRISCState *env,     \
-                                            uint32_t fdt0, uint32_t fdt1) \
-{                                                                         \
-    uint64_t result, temp, hi, lo;                                        \
-    uint32_t val1, val2;                                                  \
-    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);                         \
-    hi = cpu->env.fpmaddhi;                                               \
-    lo = cpu->env.fpmaddlo;                                               \
-    set_float_exception_flags(0, &cpu->env.fp_status);                    \
-    result = float64_ ## name1(fdt0, fdt1, &cpu->env.fp_status);          \
-    temp = (hi << 32) | lo;                                               \
-    result = float64_ ## name2(result, temp, &cpu->env.fp_status);        \
-    val1 = result >> 32;                                                  \
-    val2 = (uint32_t) (result & 0xffffffff);                              \
-    update_fpcsr(cpu);                                                    \
-    cpu->env.fpmaddlo = val2;                                             \
-    cpu->env.fpmaddhi = val1;                                             \
-    return 0;                                                             \
+
+uint64_t helper_float_madd_d(CPUOpenRISCState *env, uint64_t a,
+                             uint64_t b, uint64_t c)
+{
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
+    uint64_t result;
+    set_float_exception_flags(0, &cpu->env.fp_status);
+    /* Note that or1ksim doesn't use merged operation.  */
+    result = float64_mul(b, c, &cpu->env.fp_status);
+    result = float64_add(result, a, &cpu->env.fp_status);
+    update_fpcsr(cpu);
+    return result;
 }
 
-FLOAT_TERNOP(mul, add)
-#undef FLOAT_TERNOP
+uint32_t helper_float_madd_s(CPUOpenRISCState *env, uint32_t a,
+                             uint32_t b, uint32_t c)
+{
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
+    uint32_t result;
+    set_float_exception_flags(0, &cpu->env.fp_status);
+    /* Note that or1ksim doesn't use merged operation.  */
+    result = float32_mul(b, c, &cpu->env.fp_status);
+    result = float32_add(result, a, &cpu->env.fp_status);
+    update_fpcsr(cpu);
+    return result;
+}
 
 
 #define FLOAT_CMP(name)                                                   \
diff --git a/target/openrisc/gdbstub.c b/target/openrisc/gdbstub.c
index cb16e76358..b18c7e9f05 100644
--- a/target/openrisc/gdbstub.c
+++ b/target/openrisc/gdbstub.c
@@ -34,11 +34,11 @@ int openrisc_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n)
         case 32:    /* PPC */
             return gdb_get_reg32(mem_buf, env->ppc);
 
-        case 33:    /* NPC */
-            return gdb_get_reg32(mem_buf, env->npc);
+        case 33:    /* NPC (equals PC) */
+            return gdb_get_reg32(mem_buf, env->pc);
 
         case 34:    /* SR */
-            return gdb_get_reg32(mem_buf, env->sr);
+            return gdb_get_reg32(mem_buf, cpu_get_sr(env));
 
         default:
             break;
@@ -68,12 +68,17 @@ int openrisc_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n)
             env->ppc = tmp;
             break;
 
-        case 33: /* NPC */
-            env->npc = tmp;
+        case 33: /* NPC (equals PC) */
+            /* If setting PC to something different,
+               also clear delayed branch status.  */
+            if (env->pc != tmp) {
+                env->pc = tmp;
+                env->dflag = 0;
+            }
             break;
 
         case 34: /* SR */
-            env->sr = tmp;
+            cpu_set_sr(env, tmp);
             break;
 
         default:
diff --git a/target/openrisc/helper.h b/target/openrisc/helper.h
index bcc7245fc3..4fd1a6bb8e 100644
--- a/target/openrisc/helper.h
+++ b/target/openrisc/helper.h
@@ -18,23 +18,23 @@
  */
 
 /* exception */
-DEF_HELPER_FLAGS_2(exception, 0, void, env, i32)
+DEF_HELPER_FLAGS_2(exception, TCG_CALL_NO_WG, void, env, i32)
+DEF_HELPER_FLAGS_1(ove_cy, TCG_CALL_NO_WG, void, env)
+DEF_HELPER_FLAGS_1(ove_ov, TCG_CALL_NO_WG, void, env)
+DEF_HELPER_FLAGS_1(ove_cyov, TCG_CALL_NO_WG, void, env)
 
 /* float */
-DEF_HELPER_FLAGS_2(itofd, 0, i64, env, i64)
-DEF_HELPER_FLAGS_2(itofs, 0, i32, env, i32)
-DEF_HELPER_FLAGS_2(ftoid, 0, i64, env, i64)
-DEF_HELPER_FLAGS_2(ftois, 0, i32, env, i32)
+DEF_HELPER_FLAGS_2(itofd, TCG_CALL_NO_WG, i64, env, i64)
+DEF_HELPER_FLAGS_2(itofs, TCG_CALL_NO_WG, i32, env, i32)
+DEF_HELPER_FLAGS_2(ftoid, TCG_CALL_NO_WG, i64, env, i64)
+DEF_HELPER_FLAGS_2(ftois, TCG_CALL_NO_WG, i32, env, i32)
 
-#define FOP_MADD(op)                                             \
-DEF_HELPER_FLAGS_3(float_ ## op ## _s, 0, i32, env, i32, i32)    \
-DEF_HELPER_FLAGS_3(float_ ## op ## _d, 0, i64, env, i64, i64)
-FOP_MADD(muladd)
-#undef FOP_MADD
+DEF_HELPER_FLAGS_4(float_madd_s, TCG_CALL_NO_WG, i32, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(float_madd_d, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
 
 #define FOP_CALC(op)                                            \
-DEF_HELPER_FLAGS_3(float_ ## op ## _s, 0, i32, env, i32, i32)    \
-DEF_HELPER_FLAGS_3(float_ ## op ## _d, 0, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(float_ ## op ## _s, TCG_CALL_NO_WG, i32, env, i32, i32) \
+DEF_HELPER_FLAGS_3(float_ ## op ## _d, TCG_CALL_NO_WG, i64, env, i64, i64)
 FOP_CALC(add)
 FOP_CALC(sub)
 FOP_CALC(mul)
@@ -43,8 +43,8 @@ FOP_CALC(rem)
 #undef FOP_CALC
 
 #define FOP_CMP(op)                                              \
-DEF_HELPER_FLAGS_3(float_ ## op ## _s, 0, i32, env, i32, i32)    \
-DEF_HELPER_FLAGS_3(float_ ## op ## _d, 0, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(float_ ## op ## _s, TCG_CALL_NO_WG, i32, env, i32, i32) \
+DEF_HELPER_FLAGS_3(float_ ## op ## _d, TCG_CALL_NO_WG, i64, env, i64, i64)
 FOP_CMP(eq)
 FOP_CMP(lt)
 FOP_CMP(le)
@@ -53,12 +53,9 @@ FOP_CMP(gt)
 FOP_CMP(ge)
 #undef FOP_CMP
 
-/* int */
-DEF_HELPER_FLAGS_3(mul32, 0, i32, env, i32, i32)
-
 /* interrupt */
 DEF_HELPER_FLAGS_1(rfe, 0, void, env)
 
 /* sys */
 DEF_HELPER_FLAGS_4(mtspr, 0, void, env, tl, tl, tl)
-DEF_HELPER_FLAGS_4(mfspr, 0, tl, env, tl, tl, tl)
+DEF_HELPER_FLAGS_4(mfspr, TCG_CALL_NO_WG, tl, env, tl, tl, tl)
diff --git a/target/openrisc/int_helper.c b/target/openrisc/int_helper.c
deleted file mode 100644
index ba0fd277cd..0000000000
--- a/target/openrisc/int_helper.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * OpenRISC int helper routines
- *
- * Copyright (c) 2011-2012 Jia Liu <proljc@gmail.com>
- *                         Feng Gao <gf91597@gmail.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-#include "cpu.h"
-#include "exec/helper-proto.h"
-#include "exception.h"
-#include "qemu/host-utils.h"
-
-uint32_t HELPER(mul32)(CPUOpenRISCState *env,
-                       uint32_t ra, uint32_t rb)
-{
-    uint64_t result;
-    uint32_t high, cy;
-
-    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
-
-    result = (uint64_t)ra * rb;
-    /* regisiers in or32 is 32bit, so 32 is NOT a magic number.
-       or64 is not handled in this function, and not implement yet,
-       TARGET_LONG_BITS for or64 is 64, it will break this function,
-       so, we didn't use TARGET_LONG_BITS here.  */
-    high = result >> 32;
-    cy = result >> (32 - 1);
-
-    if ((cy & 0x1) == 0x0) {
-        if (high == 0x0) {
-            return result;
-        }
-    }
-
-    if ((cy & 0x1) == 0x1) {
-        if (high == 0xffffffff) {
-            return result;
-        }
-    }
-
-    cpu->env.sr |= (SR_OV | SR_CY);
-    if (cpu->env.sr & SR_OVE) {
-        raise_exception(cpu, EXCP_RANGE);
-    }
-
-    return result;
-}
diff --git a/target/openrisc/interrupt.c b/target/openrisc/interrupt.c
index e43fc84ef7..a2eec6fb32 100644
--- a/target/openrisc/interrupt.c
+++ b/target/openrisc/interrupt.c
@@ -34,20 +34,27 @@ void openrisc_cpu_do_interrupt(CPUState *cs)
     CPUOpenRISCState *env = &cpu->env;
 
     env->epcr = env->pc;
-    if (env->flags & D_FLAG) {
-        env->flags &= ~D_FLAG;
+    if (env->dflag) {
+        env->dflag = 0;
         env->sr |= SR_DSX;
         env->epcr -= 4;
+    } else {
+        env->sr &= ~SR_DSX;
     }
     if (cs->exception_index == EXCP_SYSCALL) {
         env->epcr += 4;
     }
+    /* When we have an illegal instruction the error effective address
+       shall be set to the illegal instruction address.  */
+    if (cs->exception_index == EXCP_ILLEGAL) {
+        env->eear = env->pc;
+    }
 
     /* For machine-state changed between user-mode and supervisor mode,
        we need flush TLB when we enter&exit EXCP.  */
     tlb_flush(cs);
 
-    env->esr = env->sr;
+    env->esr = cpu_get_sr(env);
     env->sr &= ~SR_DME;
     env->sr &= ~SR_IME;
     env->sr |= SR_SM;
@@ -55,6 +62,7 @@ void openrisc_cpu_do_interrupt(CPUState *cs)
     env->sr &= ~SR_TEE;
     env->tlb->cpu_openrisc_map_address_data = &cpu_openrisc_get_phys_nommu;
     env->tlb->cpu_openrisc_map_address_code = &cpu_openrisc_get_phys_nommu;
+    env->lock_addr = -1;
 
     if (cs->exception_index > 0 && cs->exception_index < EXCP_NR) {
         env->pc = (cs->exception_index << 8);
diff --git a/target/openrisc/interrupt_helper.c b/target/openrisc/interrupt_helper.c
index 0ed5146e8d..56620e0571 100644
--- a/target/openrisc/interrupt_helper.c
+++ b/target/openrisc/interrupt_helper.c
@@ -32,8 +32,8 @@ void HELPER(rfe)(CPUOpenRISCState *env)
                          (cpu->env.esr & (SR_SM | SR_IME | SR_DME));
 #endif
     cpu->env.pc = cpu->env.epcr;
-    cpu->env.npc = cpu->env.epcr;
-    cpu->env.sr = cpu->env.esr;
+    cpu_set_sr(&cpu->env, cpu->env.esr);
+    cpu->env.lock_addr = -1;
 
 #ifndef CONFIG_USER_ONLY
     if (cpu->env.sr & SR_DME) {
diff --git a/target/openrisc/machine.c b/target/openrisc/machine.c
index 17b0c77d6c..686eaa30c9 100644
--- a/target/openrisc/machine.c
+++ b/target/openrisc/machine.c
@@ -24,20 +24,64 @@
 #include "hw/boards.h"
 #include "migration/cpu.h"
 
+static int get_sr(QEMUFile *f, void *opaque, size_t size, VMStateField *field)
+{
+    CPUOpenRISCState *env = opaque;
+    cpu_set_sr(env, qemu_get_be32(f));
+    return 0;
+}
+
+static int put_sr(QEMUFile *f, void *opaque, size_t size,
+                  VMStateField *field, QJSON *vmdesc)
+{
+    CPUOpenRISCState *env = opaque;
+    qemu_put_be32(f, cpu_get_sr(env));
+    return 0;
+}
+
+static const VMStateInfo vmstate_sr = {
+    .name = "sr",
+    .get = get_sr,
+    .put = put_sr,
+};
+
 static const VMStateDescription vmstate_env = {
     .name = "env",
-    .version_id = 1,
-    .minimum_version_id = 1,
+    .version_id = 4,
+    .minimum_version_id = 4,
     .fields = (VMStateField[]) {
-        VMSTATE_UINT32_ARRAY(gpr, CPUOpenRISCState, 32),
-        VMSTATE_UINT32(sr, CPUOpenRISCState),
-        VMSTATE_UINT32(epcr, CPUOpenRISCState),
-        VMSTATE_UINT32(eear, CPUOpenRISCState),
+        VMSTATE_UINTTL_ARRAY(gpr, CPUOpenRISCState, 32),
+        VMSTATE_UINTTL(pc, CPUOpenRISCState),
+        VMSTATE_UINTTL(ppc, CPUOpenRISCState),
+        VMSTATE_UINTTL(jmp_pc, CPUOpenRISCState),
+        VMSTATE_UINTTL(lock_addr, CPUOpenRISCState),
+        VMSTATE_UINTTL(lock_value, CPUOpenRISCState),
+        VMSTATE_UINTTL(epcr, CPUOpenRISCState),
+        VMSTATE_UINTTL(eear, CPUOpenRISCState),
+
+        /* Save the architecture value of the SR, not the internally
+           expanded version.  Since this architecture value does not
+           exist in memory to be stored, this requires a but of hoop
+           jumping.  We want OFFSET=0 so that we effectively pass ENV
+           to the helper functions, and we need to fill in the name by
+           hand since there's no field of that name.  */
+        {
+            .name = "sr",
+            .version_id = 0,
+            .size = sizeof(uint32_t),
+            .info = &vmstate_sr,
+            .flags = VMS_SINGLE,
+            .offset = 0
+        },
+
+        VMSTATE_UINT32(vr, CPUOpenRISCState),
+        VMSTATE_UINT32(upr, CPUOpenRISCState),
+        VMSTATE_UINT32(cpucfgr, CPUOpenRISCState),
+        VMSTATE_UINT32(dmmucfgr, CPUOpenRISCState),
+        VMSTATE_UINT32(immucfgr, CPUOpenRISCState),
         VMSTATE_UINT32(esr, CPUOpenRISCState),
         VMSTATE_UINT32(fpcsr, CPUOpenRISCState),
-        VMSTATE_UINT32(pc, CPUOpenRISCState),
-        VMSTATE_UINT32(npc, CPUOpenRISCState),
-        VMSTATE_UINT32(ppc, CPUOpenRISCState),
+        VMSTATE_UINT64(mac, CPUOpenRISCState),
         VMSTATE_END_OF_LIST()
     }
 };
diff --git a/target/openrisc/mmu.c b/target/openrisc/mmu.c
index 505dcdcdc8..56b11d3d68 100644
--- a/target/openrisc/mmu.c
+++ b/target/openrisc/mmu.c
@@ -174,6 +174,7 @@ static void cpu_openrisc_raise_mmu_exception(OpenRISCCPU *cpu,
 
     cs->exception_index = exception;
     cpu->env.eear = address;
+    cpu->env.lock_addr = -1;
 }
 
 #ifndef CONFIG_USER_ONLY
diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c
index daea902856..60c3193656 100644
--- a/target/openrisc/sys_helper.c
+++ b/target/openrisc/sys_helper.c
@@ -29,11 +29,10 @@ void HELPER(mtspr)(CPUOpenRISCState *env,
                    target_ulong ra, target_ulong rb, target_ulong offset)
 {
 #ifndef CONFIG_USER_ONLY
-    int spr = (ra | offset);
-    int idx;
-
     OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
+    int spr = (ra | offset);
+    int idx;
 
     switch (spr) {
     case TO_SPR(0, 0): /* VR */
@@ -41,7 +40,14 @@ void HELPER(mtspr)(CPUOpenRISCState *env,
         break;
 
     case TO_SPR(0, 16): /* NPC */
-        env->npc = rb;
+        cpu_restore_state(cs, GETPC());
+        /* ??? Mirror or1ksim in not trashing delayed branch state
+           when "jumping" to the current instruction.  */
+        if (env->pc != rb) {
+            env->pc = rb;
+            env->dflag = 0;
+            cpu_loop_exit(cs);
+        }
         break;
 
     case TO_SPR(0, 17): /* SR */
@@ -49,8 +55,7 @@ void HELPER(mtspr)(CPUOpenRISCState *env,
             (rb & (SR_IME | SR_DME | SR_SM))) {
             tlb_flush(cs);
         }
-        env->sr = rb;
-        env->sr |= SR_FO;      /* FO is const equal to 1 */
+        cpu_set_sr(env, rb);
         if (env->sr & SR_DME) {
             env->tlb->cpu_openrisc_map_address_data =
                 &cpu_openrisc_get_phys_data;
@@ -121,6 +126,12 @@ void HELPER(mtspr)(CPUOpenRISCState *env,
     case TO_SPR(2, 1280) ... TO_SPR(2, 1407): /* ITLBW3MR 0-127 */
     case TO_SPR(2, 1408) ... TO_SPR(2, 1535): /* ITLBW3TR 0-127 */
         break;
+    case TO_SPR(5, 1):  /* MACLO */
+        env->mac = deposit64(env->mac, 0, 32, rb);
+        break;
+    case TO_SPR(5, 2):  /* MACHI */
+        env->mac = deposit64(env->mac, 32, 32, rb);
+        break;
     case TO_SPR(9, 0):  /* PICMR */
         env->picmr |= rb;
         break;
@@ -165,7 +176,6 @@ void HELPER(mtspr)(CPUOpenRISCState *env,
         cpu_openrisc_timer_update(cpu);
         break;
     default:
-
         break;
     }
 #endif
@@ -175,11 +185,11 @@ target_ulong HELPER(mfspr)(CPUOpenRISCState *env,
                            target_ulong rd, target_ulong ra, uint32_t offset)
 {
 #ifndef CONFIG_USER_ONLY
+    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
+    CPUState *cs = CPU(cpu);
     int spr = (ra | offset);
     int idx;
 
-    OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
-
     switch (spr) {
     case TO_SPR(0, 0): /* VR */
         return env->vr & SPR_VR;
@@ -196,13 +206,15 @@ target_ulong HELPER(mfspr)(CPUOpenRISCState *env,
     case TO_SPR(0, 4): /* IMMUCFGR */
         return env->immucfgr;
 
-    case TO_SPR(0, 16): /* NPC */
-        return env->npc;
+    case TO_SPR(0, 16): /* NPC (equals PC) */
+        cpu_restore_state(cs, GETPC());
+        return env->pc;
 
     case TO_SPR(0, 17): /* SR */
-        return env->sr;
+        return cpu_get_sr(env);
 
     case TO_SPR(0, 18): /* PPC */
+        cpu_restore_state(cs, GETPC());
         return env->ppc;
 
     case TO_SPR(0, 32): /* EPCR */
@@ -246,6 +258,13 @@ target_ulong HELPER(mfspr)(CPUOpenRISCState *env,
     case TO_SPR(2, 1408) ... TO_SPR(2, 1535): /* ITLBW3TR 0-127 */
         break;
 
+    case TO_SPR(5, 1):  /* MACLO */
+        return (uint32_t)env->mac;
+        break;
+    case TO_SPR(5, 2):  /* MACHI */
+        return env->mac >> 32;
+        break;
+
     case TO_SPR(9, 0):  /* PICMR */
         return env->picmr;
 
@@ -264,25 +283,6 @@ target_ulong HELPER(mfspr)(CPUOpenRISCState *env,
     }
 #endif
 
-/*If we later need to add tracepoints (or debug printfs) for the return
-value, it may be useful to structure the code like this:
-
-target_ulong ret = 0;
-
-switch() {
-case x:
- ret = y;
- break;
-case z:
- ret = 42;
- break;
-...
-}
-
-later something like trace_spr_read(ret);
-
-return ret;*/
-
     /* for rd is passed in, if rd unchanged, just keep it back.  */
     return rd;
 }
diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c
index 03fa7db570..7c4cbf205f 100644
--- a/target/openrisc/translate.c
+++ b/target/openrisc/translate.c
@@ -34,37 +34,34 @@
 #include "trace-tcg.h"
 #include "exec/log.h"
 
-
-#define OPENRISC_DISAS
-
-#ifdef OPENRISC_DISAS
-#  define LOG_DIS(...) qemu_log_mask(CPU_LOG_TB_IN_ASM, ## __VA_ARGS__)
-#else
-#  define LOG_DIS(...) do { } while (0)
-#endif
+#define LOG_DIS(str, ...) \
+    qemu_log_mask(CPU_LOG_TB_IN_ASM, "%08x: " str, dc->pc, ## __VA_ARGS__)
 
 typedef struct DisasContext {
     TranslationBlock *tb;
-    target_ulong pc, ppc, npc;
-    uint32_t tb_flags, synced_flags, flags;
+    target_ulong pc;
     uint32_t is_jmp;
     uint32_t mem_idx;
-    int singlestep_enabled;
+    uint32_t tb_flags;
     uint32_t delayed_branch;
+    bool singlestep_enabled;
 } DisasContext;
 
 static TCGv_env cpu_env;
 static TCGv cpu_sr;
 static TCGv cpu_R[32];
+static TCGv cpu_R0;
 static TCGv cpu_pc;
 static TCGv jmp_pc;            /* l.jr/l.jalr temp pc */
-static TCGv cpu_npc;
 static TCGv cpu_ppc;
-static TCGv_i32 env_btaken;    /* bf/bnf , F flag taken */
+static TCGv cpu_sr_f;           /* bf/bnf, F flag taken */
+static TCGv cpu_sr_cy;          /* carry (unsigned overflow) */
+static TCGv cpu_sr_ov;          /* signed overflow */
+static TCGv cpu_lock_addr;
+static TCGv cpu_lock_value;
 static TCGv_i32 fpcsr;
-static TCGv machi, maclo;
-static TCGv fpmaddhi, fpmaddlo;
-static TCGv_i32 env_flags;
+static TCGv_i64 cpu_mac;        /* MACHI:MACLO */
+static TCGv_i32 cpu_dflag;
 #include "exec/gen-icount.h"
 
 void openrisc_translate_init(void)
@@ -81,76 +78,39 @@ void openrisc_translate_init(void)
     tcg_ctx.tcg_env = cpu_env;
     cpu_sr = tcg_global_mem_new(cpu_env,
                                 offsetof(CPUOpenRISCState, sr), "sr");
-    env_flags = tcg_global_mem_new_i32(cpu_env,
-                                       offsetof(CPUOpenRISCState, flags),
-                                       "flags");
+    cpu_dflag = tcg_global_mem_new_i32(cpu_env,
+                                       offsetof(CPUOpenRISCState, dflag),
+                                       "dflag");
     cpu_pc = tcg_global_mem_new(cpu_env,
                                 offsetof(CPUOpenRISCState, pc), "pc");
-    cpu_npc = tcg_global_mem_new(cpu_env,
-                                 offsetof(CPUOpenRISCState, npc), "npc");
     cpu_ppc = tcg_global_mem_new(cpu_env,
                                  offsetof(CPUOpenRISCState, ppc), "ppc");
     jmp_pc = tcg_global_mem_new(cpu_env,
                                 offsetof(CPUOpenRISCState, jmp_pc), "jmp_pc");
-    env_btaken = tcg_global_mem_new_i32(cpu_env,
-                                        offsetof(CPUOpenRISCState, btaken),
-                                        "btaken");
+    cpu_sr_f = tcg_global_mem_new(cpu_env,
+                                  offsetof(CPUOpenRISCState, sr_f), "sr_f");
+    cpu_sr_cy = tcg_global_mem_new(cpu_env,
+                                   offsetof(CPUOpenRISCState, sr_cy), "sr_cy");
+    cpu_sr_ov = tcg_global_mem_new(cpu_env,
+                                   offsetof(CPUOpenRISCState, sr_ov), "sr_ov");
+    cpu_lock_addr = tcg_global_mem_new(cpu_env,
+                                       offsetof(CPUOpenRISCState, lock_addr),
+                                       "lock_addr");
+    cpu_lock_value = tcg_global_mem_new(cpu_env,
+                                        offsetof(CPUOpenRISCState, lock_value),
+                                        "lock_value");
     fpcsr = tcg_global_mem_new_i32(cpu_env,
                                    offsetof(CPUOpenRISCState, fpcsr),
                                    "fpcsr");
-    machi = tcg_global_mem_new(cpu_env,
-                               offsetof(CPUOpenRISCState, machi),
-                               "machi");
-    maclo = tcg_global_mem_new(cpu_env,
-                               offsetof(CPUOpenRISCState, maclo),
-                               "maclo");
-    fpmaddhi = tcg_global_mem_new(cpu_env,
-                                  offsetof(CPUOpenRISCState, fpmaddhi),
-                                  "fpmaddhi");
-    fpmaddlo = tcg_global_mem_new(cpu_env,
-                                  offsetof(CPUOpenRISCState, fpmaddlo),
-                                  "fpmaddlo");
+    cpu_mac = tcg_global_mem_new_i64(cpu_env,
+                                     offsetof(CPUOpenRISCState, mac),
+                                     "mac");
     for (i = 0; i < 32; i++) {
         cpu_R[i] = tcg_global_mem_new(cpu_env,
                                       offsetof(CPUOpenRISCState, gpr[i]),
                                       regnames[i]);
     }
-}
-
-/* Writeback SR_F translation space to execution space.  */
-static inline void wb_SR_F(void)
-{
-    TCGLabel *label = gen_new_label();
-    tcg_gen_andi_tl(cpu_sr, cpu_sr, ~SR_F);
-    tcg_gen_brcondi_tl(TCG_COND_EQ, env_btaken, 0, label);
-    tcg_gen_ori_tl(cpu_sr, cpu_sr, SR_F);
-    gen_set_label(label);
-}
-
-static inline int zero_extend(unsigned int val, int width)
-{
-    return val & ((1 << width) - 1);
-}
-
-static inline int sign_extend(unsigned int val, int width)
-{
-    int sval;
-
-    /* LSL */
-    val <<= TARGET_LONG_BITS - width;
-    sval = val;
-    /* ASR.  */
-    sval >>= TARGET_LONG_BITS - width;
-    return sval;
-}
-
-static inline void gen_sync_flags(DisasContext *dc)
-{
-    /* Sync the tb dependent flag between translate and runtime.  */
-    if (dc->tb_flags != dc->synced_flags) {
-        tcg_gen_movi_tl(env_flags, dc->tb_flags);
-        dc->synced_flags = dc->tb_flags;
-    }
+    cpu_R0 = cpu_R[0];
 }
 
 static void gen_exception(DisasContext *dc, unsigned int excp)
@@ -191,6 +151,15 @@ static void check_ov64s(DisasContext *dc)
 }
 #endif*/
 
+/* We're about to write to REG.  On the off-chance that the user is
+   writing to R0, re-instate the architectural register.  */
+#define check_r0_write(reg)             \
+    do {                                \
+        if (unlikely(reg == 0)) {       \
+            cpu_R[0] = cpu_R0;          \
+        }                               \
+    } while (0)
+
 static inline bool use_goto_tb(DisasContext *dc, target_ulong dest)
 {
     if (unlikely(dc->singlestep_enabled)) {
@@ -219,32 +188,35 @@ static void gen_goto_tb(DisasContext *dc, int n, target_ulong dest)
     }
 }
 
-static void gen_jump(DisasContext *dc, uint32_t imm, uint32_t reg, uint32_t op0)
+static void gen_jump(DisasContext *dc, int32_t n26, uint32_t reg, uint32_t op0)
 {
-    target_ulong tmp_pc;
-    /* N26, 26bits imm */
-    tmp_pc = sign_extend((imm<<2), 26) + dc->pc;
+    target_ulong tmp_pc = dc->pc + n26 * 4;
 
     switch (op0) {
     case 0x00:     /* l.j */
         tcg_gen_movi_tl(jmp_pc, tmp_pc);
         break;
     case 0x01:     /* l.jal */
-        tcg_gen_movi_tl(cpu_R[9], (dc->pc + 8));
+        tcg_gen_movi_tl(cpu_R[9], dc->pc + 8);
+        /* Optimize jal being used to load the PC for PIC.  */
+        if (tmp_pc == dc->pc + 8) {
+            return;
+        }
         tcg_gen_movi_tl(jmp_pc, tmp_pc);
         break;
     case 0x03:     /* l.bnf */
     case 0x04:     /* l.bf  */
         {
-            TCGLabel *lab = gen_new_label();
-            TCGv sr_f = tcg_temp_new();
-            tcg_gen_movi_tl(jmp_pc, dc->pc+8);
-            tcg_gen_andi_tl(sr_f, cpu_sr, SR_F);
-            tcg_gen_brcondi_i32(op0 == 0x03 ? TCG_COND_EQ : TCG_COND_NE,
-                                sr_f, SR_F, lab);
-            tcg_gen_movi_tl(jmp_pc, tmp_pc);
-            gen_set_label(lab);
-            tcg_temp_free(sr_f);
+            TCGv t_next = tcg_const_tl(dc->pc + 8);
+            TCGv t_true = tcg_const_tl(tmp_pc);
+            TCGv t_zero = tcg_const_tl(0);
+
+            tcg_gen_movcond_tl(op0 == 0x03 ? TCG_COND_EQ : TCG_COND_NE,
+                               jmp_pc, cpu_sr_f, t_zero, t_true, t_next);
+
+            tcg_temp_free(t_next);
+            tcg_temp_free(t_true);
+            tcg_temp_free(t_zero);
         }
         break;
     case 0x11:     /* l.jr */
@@ -260,10 +232,314 @@ static void gen_jump(DisasContext *dc, uint32_t imm, uint32_t reg, uint32_t op0)
     }
 
     dc->delayed_branch = 2;
-    dc->tb_flags |= D_FLAG;
-    gen_sync_flags(dc);
 }
 
+static void gen_ove_cy(DisasContext *dc)
+{
+    if (dc->tb_flags & SR_OVE) {
+        gen_helper_ove_cy(cpu_env);
+    }
+}
+
+static void gen_ove_ov(DisasContext *dc)
+{
+    if (dc->tb_flags & SR_OVE) {
+        gen_helper_ove_ov(cpu_env);
+    }
+}
+
+static void gen_ove_cyov(DisasContext *dc)
+{
+    if (dc->tb_flags & SR_OVE) {
+        gen_helper_ove_cyov(cpu_env);
+    }
+}
+
+static void gen_add(DisasContext *dc, TCGv dest, TCGv srca, TCGv srcb)
+{
+    TCGv t0 = tcg_const_tl(0);
+    TCGv res = tcg_temp_new();
+
+    tcg_gen_add2_tl(res, cpu_sr_cy, srca, t0, srcb, t0);
+    tcg_gen_xor_tl(cpu_sr_ov, srca, srcb);
+    tcg_gen_xor_tl(t0, res, srcb);
+    tcg_gen_andc_tl(cpu_sr_ov, t0, cpu_sr_ov);
+    tcg_temp_free(t0);
+
+    tcg_gen_mov_tl(dest, res);
+    tcg_temp_free(res);
+
+    gen_ove_cyov(dc);
+}
+
+static void gen_addc(DisasContext *dc, TCGv dest, TCGv srca, TCGv srcb)
+{
+    TCGv t0 = tcg_const_tl(0);
+    TCGv res = tcg_temp_new();
+
+    tcg_gen_add2_tl(res, cpu_sr_cy, srca, t0, cpu_sr_cy, t0);
+    tcg_gen_add2_tl(res, cpu_sr_cy, res, cpu_sr_cy, srcb, t0);
+    tcg_gen_xor_tl(cpu_sr_ov, srca, srcb);
+    tcg_gen_xor_tl(t0, res, srcb);
+    tcg_gen_andc_tl(cpu_sr_ov, t0, cpu_sr_ov);
+    tcg_temp_free(t0);
+
+    tcg_gen_mov_tl(dest, res);
+    tcg_temp_free(res);
+
+    gen_ove_cyov(dc);
+}
+
+static void gen_sub(DisasContext *dc, TCGv dest, TCGv srca, TCGv srcb)
+{
+    TCGv res = tcg_temp_new();
+
+    tcg_gen_sub_tl(res, srca, srcb);
+    tcg_gen_xor_tl(cpu_sr_cy, srca, srcb);
+    tcg_gen_xor_tl(cpu_sr_ov, res, srcb);
+    tcg_gen_and_tl(cpu_sr_ov, cpu_sr_ov, cpu_sr_cy);
+    tcg_gen_setcond_tl(TCG_COND_LTU, cpu_sr_cy, srca, srcb);
+
+    tcg_gen_mov_tl(dest, res);
+    tcg_temp_free(res);
+
+    gen_ove_cyov(dc);
+}
+
+static void gen_mul(DisasContext *dc, TCGv dest, TCGv srca, TCGv srcb)
+{
+    TCGv t0 = tcg_temp_new();
+
+    tcg_gen_muls2_tl(dest, cpu_sr_ov, srca, srcb);
+    tcg_gen_sari_tl(t0, dest, TARGET_LONG_BITS - 1);
+    tcg_gen_setcond_tl(TCG_COND_NE, cpu_sr_ov, cpu_sr_ov, t0);
+    tcg_temp_free(t0);
+
+    tcg_gen_neg_tl(cpu_sr_ov, cpu_sr_ov);
+    gen_ove_ov(dc);
+}
+
+static void gen_mulu(DisasContext *dc, TCGv dest, TCGv srca, TCGv srcb)
+{
+    tcg_gen_muls2_tl(dest, cpu_sr_cy, srca, srcb);
+    tcg_gen_setcondi_tl(TCG_COND_NE, cpu_sr_cy, cpu_sr_cy, 0);
+
+    gen_ove_cy(dc);
+}
+
+static void gen_div(DisasContext *dc, TCGv dest, TCGv srca, TCGv srcb)
+{
+    TCGv t0 = tcg_temp_new();
+
+    tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_sr_ov, srcb, 0);
+    /* The result of divide-by-zero is undefined.
+       Supress the host-side exception by dividing by 1.  */
+    tcg_gen_or_tl(t0, srcb, cpu_sr_ov);
+    tcg_gen_div_tl(dest, srca, t0);
+    tcg_temp_free(t0);
+
+    tcg_gen_neg_tl(cpu_sr_ov, cpu_sr_ov);
+    gen_ove_ov(dc);
+}
+
+static void gen_divu(DisasContext *dc, TCGv dest, TCGv srca, TCGv srcb)
+{
+    TCGv t0 = tcg_temp_new();
+
+    tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_sr_cy, srcb, 0);
+    /* The result of divide-by-zero is undefined.
+       Supress the host-side exception by dividing by 1.  */
+    tcg_gen_or_tl(t0, srcb, cpu_sr_cy);
+    tcg_gen_divu_tl(dest, srca, t0);
+    tcg_temp_free(t0);
+
+    gen_ove_cy(dc);
+}
+
+static void gen_muld(DisasContext *dc, TCGv srca, TCGv srcb)
+{
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+
+    tcg_gen_ext_tl_i64(t1, srca);
+    tcg_gen_ext_tl_i64(t2, srcb);
+    if (TARGET_LONG_BITS == 32) {
+        tcg_gen_mul_i64(cpu_mac, t1, t2);
+        tcg_gen_movi_tl(cpu_sr_ov, 0);
+    } else {
+        TCGv_i64 high = tcg_temp_new_i64();
+
+        tcg_gen_muls2_i64(cpu_mac, high, t1, t2);
+        tcg_gen_sari_i64(t1, cpu_mac, 63);
+        tcg_gen_setcond_i64(TCG_COND_NE, t1, t1, high);
+        tcg_temp_free_i64(high);
+        tcg_gen_trunc_i64_tl(cpu_sr_ov, t1);
+        tcg_gen_neg_tl(cpu_sr_ov, cpu_sr_ov);
+
+        gen_ove_ov(dc);
+    }
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+}
+
+static void gen_muldu(DisasContext *dc, TCGv srca, TCGv srcb)
+{
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+
+    tcg_gen_extu_tl_i64(t1, srca);
+    tcg_gen_extu_tl_i64(t2, srcb);
+    if (TARGET_LONG_BITS == 32) {
+        tcg_gen_mul_i64(cpu_mac, t1, t2);
+        tcg_gen_movi_tl(cpu_sr_cy, 0);
+    } else {
+        TCGv_i64 high = tcg_temp_new_i64();
+
+        tcg_gen_mulu2_i64(cpu_mac, high, t1, t2);
+        tcg_gen_setcondi_i64(TCG_COND_NE, high, high, 0);
+        tcg_gen_trunc_i64_tl(cpu_sr_cy, high);
+        tcg_temp_free_i64(high);
+
+        gen_ove_cy(dc);
+    }
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+}
+
+static void gen_mac(DisasContext *dc, TCGv srca, TCGv srcb)
+{
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+
+    tcg_gen_ext_tl_i64(t1, srca);
+    tcg_gen_ext_tl_i64(t2, srcb);
+    tcg_gen_mul_i64(t1, t1, t2);
+
+    /* Note that overflow is only computed during addition stage.  */
+    tcg_gen_xor_i64(t2, cpu_mac, t1);
+    tcg_gen_add_i64(cpu_mac, cpu_mac, t1);
+    tcg_gen_xor_i64(t1, t1, cpu_mac);
+    tcg_gen_andc_i64(t1, t1, t2);
+    tcg_temp_free_i64(t2);
+
+#if TARGET_LONG_BITS == 32
+    tcg_gen_extrh_i64_i32(cpu_sr_ov, t1);
+#else
+    tcg_gen_mov_i64(cpu_sr_ov, t1);
+#endif
+    tcg_temp_free_i64(t1);
+
+    gen_ove_ov(dc);
+}
+
+static void gen_macu(DisasContext *dc, TCGv srca, TCGv srcb)
+{
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+
+    tcg_gen_extu_tl_i64(t1, srca);
+    tcg_gen_extu_tl_i64(t2, srcb);
+    tcg_gen_mul_i64(t1, t1, t2);
+    tcg_temp_free_i64(t2);
+
+    /* Note that overflow is only computed during addition stage.  */
+    tcg_gen_add_i64(cpu_mac, cpu_mac, t1);
+    tcg_gen_setcond_i64(TCG_COND_LTU, t1, cpu_mac, t1);
+    tcg_gen_trunc_i64_tl(cpu_sr_cy, t1);
+    tcg_temp_free_i64(t1);
+
+    gen_ove_cy(dc);
+}
+
+static void gen_msb(DisasContext *dc, TCGv srca, TCGv srcb)
+{
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+
+    tcg_gen_ext_tl_i64(t1, srca);
+    tcg_gen_ext_tl_i64(t2, srcb);
+    tcg_gen_mul_i64(t1, t1, t2);
+
+    /* Note that overflow is only computed during subtraction stage.  */
+    tcg_gen_xor_i64(t2, cpu_mac, t1);
+    tcg_gen_sub_i64(cpu_mac, cpu_mac, t1);
+    tcg_gen_xor_i64(t1, t1, cpu_mac);
+    tcg_gen_and_i64(t1, t1, t2);
+    tcg_temp_free_i64(t2);
+
+#if TARGET_LONG_BITS == 32
+    tcg_gen_extrh_i64_i32(cpu_sr_ov, t1);
+#else
+    tcg_gen_mov_i64(cpu_sr_ov, t1);
+#endif
+    tcg_temp_free_i64(t1);
+
+    gen_ove_ov(dc);
+}
+
+static void gen_msbu(DisasContext *dc, TCGv srca, TCGv srcb)
+{
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+
+    tcg_gen_extu_tl_i64(t1, srca);
+    tcg_gen_extu_tl_i64(t2, srcb);
+    tcg_gen_mul_i64(t1, t1, t2);
+
+    /* Note that overflow is only computed during subtraction stage.  */
+    tcg_gen_setcond_i64(TCG_COND_LTU, t2, cpu_mac, t1);
+    tcg_gen_sub_i64(cpu_mac, cpu_mac, t1);
+    tcg_gen_trunc_i64_tl(cpu_sr_cy, t2);
+    tcg_temp_free_i64(t2);
+    tcg_temp_free_i64(t1);
+
+    gen_ove_cy(dc);
+}
+
+static void gen_lwa(DisasContext *dc, TCGv rd, TCGv ra, int32_t ofs)
+{
+    TCGv ea = tcg_temp_new();
+
+    tcg_gen_addi_tl(ea, ra, ofs);
+    tcg_gen_qemu_ld_tl(rd, ea, dc->mem_idx, MO_TEUL);
+    tcg_gen_mov_tl(cpu_lock_addr, ea);
+    tcg_gen_mov_tl(cpu_lock_value, rd);
+    tcg_temp_free(ea);
+}
+
+static void gen_swa(DisasContext *dc, int b, TCGv ra, int32_t ofs)
+{
+    TCGv ea, val;
+    TCGLabel *lab_fail, *lab_done;
+
+    ea = tcg_temp_new();
+    tcg_gen_addi_tl(ea, ra, ofs);
+
+    /* For TB_FLAGS_R0_0, the branch below invalidates the temporary assigned
+       to cpu_R[0].  Since l.swa is quite often immediately followed by a
+       branch, don't bother reallocating; finish the TB using the "real" R0.
+       This also takes care of RB input across the branch.  */
+    cpu_R[0] = cpu_R0;
+
+    lab_fail = gen_new_label();
+    lab_done = gen_new_label();
+    tcg_gen_brcond_tl(TCG_COND_NE, ea, cpu_lock_addr, lab_fail);
+    tcg_temp_free(ea);
+
+    val = tcg_temp_new();
+    tcg_gen_atomic_cmpxchg_tl(val, cpu_lock_addr, cpu_lock_value,
+                              cpu_R[b], dc->mem_idx, MO_TEUL);
+    tcg_gen_setcond_tl(TCG_COND_EQ, cpu_sr_f, val, cpu_lock_value);
+    tcg_temp_free(val);
+
+    tcg_gen_br(lab_done);
+
+    gen_set_label(lab_fail);
+    tcg_gen_movi_tl(cpu_sr_f, 0);
+
+    gen_set_label(lab_done);
+    tcg_gen_movi_tl(cpu_lock_addr, -1);
+}
 
 static void dec_calc(DisasContext *dc, uint32_t insn)
 {
@@ -276,468 +552,183 @@ static void dec_calc(DisasContext *dc, uint32_t insn)
     rb = extract32(insn, 11, 5);
     rd = extract32(insn, 21, 5);
 
-    switch (op0) {
-    case 0x0000:
-        switch (op1) {
-        case 0x00:    /* l.add */
+    switch (op1) {
+    case 0:
+        switch (op0) {
+        case 0x0: /* l.add */
             LOG_DIS("l.add r%d, r%d, r%d\n", rd, ra, rb);
-            {
-                TCGLabel *lab = gen_new_label();
-                TCGv_i64 ta = tcg_temp_new_i64();
-                TCGv_i64 tb = tcg_temp_new_i64();
-                TCGv_i64 td = tcg_temp_local_new_i64();
-                TCGv_i32 res = tcg_temp_local_new_i32();
-                TCGv_i32 sr_ove = tcg_temp_local_new_i32();
-                tcg_gen_extu_i32_i64(ta, cpu_R[ra]);
-                tcg_gen_extu_i32_i64(tb, cpu_R[rb]);
-                tcg_gen_add_i64(td, ta, tb);
-                tcg_gen_extrl_i64_i32(res, td);
-                tcg_gen_shri_i64(td, td, 31);
-                tcg_gen_andi_i64(td, td, 0x3);
-                /* Jump to lab when no overflow.  */
-                tcg_gen_brcondi_i64(TCG_COND_EQ, td, 0x0, lab);
-                tcg_gen_brcondi_i64(TCG_COND_EQ, td, 0x3, lab);
-                tcg_gen_ori_i32(cpu_sr, cpu_sr, (SR_OV | SR_CY));
-                tcg_gen_andi_i32(sr_ove, cpu_sr, SR_OVE);
-                tcg_gen_brcondi_i32(TCG_COND_NE, sr_ove, SR_OVE, lab);
-                gen_exception(dc, EXCP_RANGE);
-                gen_set_label(lab);
-                tcg_gen_mov_i32(cpu_R[rd], res);
-                tcg_temp_free_i64(ta);
-                tcg_temp_free_i64(tb);
-                tcg_temp_free_i64(td);
-                tcg_temp_free_i32(res);
-                tcg_temp_free_i32(sr_ove);
-            }
-            break;
-        default:
-            gen_illegal_exception(dc);
-            break;
-        }
-        break;
+            gen_add(dc, cpu_R[rd], cpu_R[ra], cpu_R[rb]);
+            return;
 
-    case 0x0001:    /* l.addc */
-        switch (op1) {
-        case 0x00:
+        case 0x1: /* l.addc */
             LOG_DIS("l.addc r%d, r%d, r%d\n", rd, ra, rb);
-            {
-                TCGLabel *lab = gen_new_label();
-                TCGv_i64 ta = tcg_temp_new_i64();
-                TCGv_i64 tb = tcg_temp_new_i64();
-                TCGv_i64 tcy = tcg_temp_local_new_i64();
-                TCGv_i64 td = tcg_temp_local_new_i64();
-                TCGv_i32 res = tcg_temp_local_new_i32();
-                TCGv_i32 sr_cy = tcg_temp_local_new_i32();
-                TCGv_i32 sr_ove = tcg_temp_local_new_i32();
-                tcg_gen_extu_i32_i64(ta, cpu_R[ra]);
-                tcg_gen_extu_i32_i64(tb, cpu_R[rb]);
-                tcg_gen_andi_i32(sr_cy, cpu_sr, SR_CY);
-                tcg_gen_extu_i32_i64(tcy, sr_cy);
-                tcg_gen_shri_i64(tcy, tcy, 10);
-                tcg_gen_add_i64(td, ta, tb);
-                tcg_gen_add_i64(td, td, tcy);
-                tcg_gen_extrl_i64_i32(res, td);
-                tcg_gen_shri_i64(td, td, 32);
-                tcg_gen_andi_i64(td, td, 0x3);
-                /* Jump to lab when no overflow.  */
-                tcg_gen_brcondi_i64(TCG_COND_EQ, td, 0x0, lab);
-                tcg_gen_brcondi_i64(TCG_COND_EQ, td, 0x3, lab);
-                tcg_gen_ori_i32(cpu_sr, cpu_sr, (SR_OV | SR_CY));
-                tcg_gen_andi_i32(sr_ove, cpu_sr, SR_OVE);
-                tcg_gen_brcondi_i32(TCG_COND_NE, sr_ove, SR_OVE, lab);
-                gen_exception(dc, EXCP_RANGE);
-                gen_set_label(lab);
-                tcg_gen_mov_i32(cpu_R[rd], res);
-                tcg_temp_free_i64(ta);
-                tcg_temp_free_i64(tb);
-                tcg_temp_free_i64(tcy);
-                tcg_temp_free_i64(td);
-                tcg_temp_free_i32(res);
-                tcg_temp_free_i32(sr_cy);
-                tcg_temp_free_i32(sr_ove);
-            }
-            break;
-        default:
-            gen_illegal_exception(dc);
-            break;
-        }
-        break;
+            gen_addc(dc, cpu_R[rd], cpu_R[ra], cpu_R[rb]);
+            return;
 
-    case 0x0002:    /* l.sub */
-        switch (op1) {
-        case 0x00:
+        case 0x2: /* l.sub */
             LOG_DIS("l.sub r%d, r%d, r%d\n", rd, ra, rb);
-            {
-                TCGLabel *lab = gen_new_label();
-                TCGv_i64 ta = tcg_temp_new_i64();
-                TCGv_i64 tb = tcg_temp_new_i64();
-                TCGv_i64 td = tcg_temp_local_new_i64();
-                TCGv_i32 res = tcg_temp_local_new_i32();
-                TCGv_i32 sr_ove = tcg_temp_local_new_i32();
-
-                tcg_gen_extu_i32_i64(ta, cpu_R[ra]);
-                tcg_gen_extu_i32_i64(tb, cpu_R[rb]);
-                tcg_gen_sub_i64(td, ta, tb);
-                tcg_gen_extrl_i64_i32(res, td);
-                tcg_gen_shri_i64(td, td, 31);
-                tcg_gen_andi_i64(td, td, 0x3);
-                /* Jump to lab when no overflow.  */
-                tcg_gen_brcondi_i64(TCG_COND_EQ, td, 0x0, lab);
-                tcg_gen_brcondi_i64(TCG_COND_EQ, td, 0x3, lab);
-                tcg_gen_ori_i32(cpu_sr, cpu_sr, (SR_OV | SR_CY));
-                tcg_gen_andi_i32(sr_ove, cpu_sr, SR_OVE);
-                tcg_gen_brcondi_i32(TCG_COND_NE, sr_ove, SR_OVE, lab);
-                gen_exception(dc, EXCP_RANGE);
-                gen_set_label(lab);
-                tcg_gen_mov_i32(cpu_R[rd], res);
-                tcg_temp_free_i64(ta);
-                tcg_temp_free_i64(tb);
-                tcg_temp_free_i64(td);
-                tcg_temp_free_i32(res);
-                tcg_temp_free_i32(sr_ove);
-            }
-            break;
-        default:
-            gen_illegal_exception(dc);
-            break;
-        }
-        break;
+            gen_sub(dc, cpu_R[rd], cpu_R[ra], cpu_R[rb]);
+            return;
 
-    case 0x0003:    /* l.and */
-        switch (op1) {
-        case 0x00:
+        case 0x3: /* l.and */
             LOG_DIS("l.and r%d, r%d, r%d\n", rd, ra, rb);
             tcg_gen_and_tl(cpu_R[rd], cpu_R[ra], cpu_R[rb]);
-            break;
-        default:
-            gen_illegal_exception(dc);
-            break;
-        }
-        break;
+            return;
 
-    case 0x0004:    /* l.or */
-        switch (op1) {
-        case 0x00:
+        case 0x4: /* l.or */
             LOG_DIS("l.or r%d, r%d, r%d\n", rd, ra, rb);
             tcg_gen_or_tl(cpu_R[rd], cpu_R[ra], cpu_R[rb]);
-            break;
-        default:
-            gen_illegal_exception(dc);
-            break;
-        }
-        break;
+            return;
 
-    case 0x0005:
-        switch (op1) {
-        case 0x00:    /* l.xor */
+        case 0x5: /* l.xor */
             LOG_DIS("l.xor r%d, r%d, r%d\n", rd, ra, rb);
             tcg_gen_xor_tl(cpu_R[rd], cpu_R[ra], cpu_R[rb]);
-            break;
-        default:
-            gen_illegal_exception(dc);
-            break;
-        }
-        break;
-
-    case 0x0006:
-        switch (op1) {
-        case 0x03:    /* l.mul */
-            LOG_DIS("l.mul r%d, r%d, r%d\n", rd, ra, rb);
-            if (ra != 0 && rb != 0) {
-                gen_helper_mul32(cpu_R[rd], cpu_env, cpu_R[ra], cpu_R[rb]);
-            } else {
-                tcg_gen_movi_tl(cpu_R[rd], 0x0);
-            }
-            break;
-        default:
-            gen_illegal_exception(dc);
-            break;
-        }
-        break;
-
-    case 0x0009:
-        switch (op1) {
-        case 0x03:    /* l.div */
-            LOG_DIS("l.div r%d, r%d, r%d\n", rd, ra, rb);
-            {
-                TCGLabel *lab0 = gen_new_label();
-                TCGLabel *lab1 = gen_new_label();
-                TCGLabel *lab2 = gen_new_label();
-                TCGLabel *lab3 = gen_new_label();
-                TCGv_i32 sr_ove = tcg_temp_local_new_i32();
-                if (rb == 0) {
-                    tcg_gen_ori_tl(cpu_sr, cpu_sr, (SR_OV | SR_CY));
-                    tcg_gen_andi_tl(sr_ove, cpu_sr, SR_OVE);
-                    tcg_gen_brcondi_tl(TCG_COND_NE, sr_ove, SR_OVE, lab0);
-                    gen_exception(dc, EXCP_RANGE);
-                    gen_set_label(lab0);
-                } else {
-                    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_R[rb],
-                                       0x00000000, lab1);
-                    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_R[ra],
-                                       0x80000000, lab2);
-                    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_R[rb],
-                                       0xffffffff, lab2);
-                    gen_set_label(lab1);
-                    tcg_gen_ori_tl(cpu_sr, cpu_sr, (SR_OV | SR_CY));
-                    tcg_gen_andi_tl(sr_ove, cpu_sr, SR_OVE);
-                    tcg_gen_brcondi_tl(TCG_COND_NE, sr_ove, SR_OVE, lab3);
-                    gen_exception(dc, EXCP_RANGE);
-                    gen_set_label(lab2);
-                    tcg_gen_div_tl(cpu_R[rd], cpu_R[ra], cpu_R[rb]);
-                    gen_set_label(lab3);
-                }
-                tcg_temp_free_i32(sr_ove);
-            }
-            break;
-
-        default:
-            gen_illegal_exception(dc);
-            break;
-        }
-        break;
-
-    case 0x000a:
-        switch (op1) {
-        case 0x03:    /* l.divu */
-            LOG_DIS("l.divu r%d, r%d, r%d\n", rd, ra, rb);
-            {
-                TCGLabel *lab0 = gen_new_label();
-                TCGLabel *lab1 = gen_new_label();
-                TCGLabel *lab2 = gen_new_label();
-                TCGv_i32 sr_ove = tcg_temp_local_new_i32();
-                if (rb == 0) {
-                    tcg_gen_ori_tl(cpu_sr, cpu_sr, (SR_OV | SR_CY));
-                    tcg_gen_andi_tl(sr_ove, cpu_sr, SR_OVE);
-                    tcg_gen_brcondi_tl(TCG_COND_NE, sr_ove, SR_OVE, lab0);
-                    gen_exception(dc, EXCP_RANGE);
-                    gen_set_label(lab0);
-                } else {
-                    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_R[rb],
-                                       0x00000000, lab1);
-                    tcg_gen_ori_tl(cpu_sr, cpu_sr, (SR_OV | SR_CY));
-                    tcg_gen_andi_tl(sr_ove, cpu_sr, SR_OVE);
-                    tcg_gen_brcondi_tl(TCG_COND_NE, sr_ove, SR_OVE, lab2);
-                    gen_exception(dc, EXCP_RANGE);
-                    gen_set_label(lab1);
-                    tcg_gen_divu_tl(cpu_R[rd], cpu_R[ra], cpu_R[rb]);
-                    gen_set_label(lab2);
-                }
-                tcg_temp_free_i32(sr_ove);
-            }
-            break;
-
-        default:
-            gen_illegal_exception(dc);
-            break;
-        }
-        break;
-
-    case 0x000b:
-        switch (op1) {
-        case 0x03:    /* l.mulu */
-            LOG_DIS("l.mulu r%d, r%d, r%d\n", rd, ra, rb);
-            if (rb != 0 && ra != 0) {
-                TCGv_i64 result = tcg_temp_local_new_i64();
-                TCGv_i64 tra = tcg_temp_local_new_i64();
-                TCGv_i64 trb = tcg_temp_local_new_i64();
-                TCGv_i64 high = tcg_temp_new_i64();
-                TCGv_i32 sr_ove = tcg_temp_local_new_i32();
-                TCGLabel *lab = gen_new_label();
-                /* Calculate each result. */
-                tcg_gen_extu_i32_i64(tra, cpu_R[ra]);
-                tcg_gen_extu_i32_i64(trb, cpu_R[rb]);
-                tcg_gen_mul_i64(result, tra, trb);
-                tcg_temp_free_i64(tra);
-                tcg_temp_free_i64(trb);
-                tcg_gen_shri_i64(high, result, TARGET_LONG_BITS);
-                /* Overflow or not. */
-                tcg_gen_brcondi_i64(TCG_COND_EQ, high, 0x00000000, lab);
-                tcg_gen_ori_tl(cpu_sr, cpu_sr, (SR_OV | SR_CY));
-                tcg_gen_andi_tl(sr_ove, cpu_sr, SR_OVE);
-                tcg_gen_brcondi_tl(TCG_COND_NE, sr_ove, SR_OVE, lab);
-                gen_exception(dc, EXCP_RANGE);
-                gen_set_label(lab);
-                tcg_temp_free_i64(high);
-                tcg_gen_trunc_i64_tl(cpu_R[rd], result);
-                tcg_temp_free_i64(result);
-                tcg_temp_free_i32(sr_ove);
-            } else {
-                tcg_gen_movi_tl(cpu_R[rd], 0);
-            }
-            break;
-
-        default:
-            gen_illegal_exception(dc);
-            break;
-        }
-        break;
-
-    case 0x000e:
-        switch (op1) {
-        case 0x00:    /* l.cmov */
-            LOG_DIS("l.cmov r%d, r%d, r%d\n", rd, ra, rb);
-            {
-                TCGLabel *lab = gen_new_label();
-                TCGv res = tcg_temp_local_new();
-                TCGv sr_f = tcg_temp_new();
-                tcg_gen_andi_tl(sr_f, cpu_sr, SR_F);
-                tcg_gen_mov_tl(res, cpu_R[rb]);
-                tcg_gen_brcondi_tl(TCG_COND_NE, sr_f, SR_F, lab);
-                tcg_gen_mov_tl(res, cpu_R[ra]);
-                gen_set_label(lab);
-                tcg_gen_mov_tl(cpu_R[rd], res);
-                tcg_temp_free(sr_f);
-                tcg_temp_free(res);
-            }
-            break;
-
-        default:
-            gen_illegal_exception(dc);
-            break;
-        }
-        break;
-
-    case 0x000f:
-        switch (op1) {
-        case 0x00:    /* l.ff1 */
-            LOG_DIS("l.ff1 r%d, r%d, r%d\n", rd, ra, rb);
-            tcg_gen_ctzi_tl(cpu_R[rd], cpu_R[ra], -1);
-            tcg_gen_addi_tl(cpu_R[rd], cpu_R[rd], 1);
-            break;
-        case 0x01:    /* l.fl1 */
-            LOG_DIS("l.fl1 r%d, r%d, r%d\n", rd, ra, rb);
-            tcg_gen_clzi_tl(cpu_R[rd], cpu_R[ra], TARGET_LONG_BITS);
-            tcg_gen_subfi_tl(cpu_R[rd], TARGET_LONG_BITS, cpu_R[rd]);
-            break;
-
-        default:
-            gen_illegal_exception(dc);
-            break;
-        }
-        break;
+            return;
 
-    case 0x0008:
-        switch (op1) {
-        case 0x00:
+        case 0x8:
             switch (op2) {
-            case 0x00:    /* l.sll */
+            case 0: /* l.sll */
                 LOG_DIS("l.sll r%d, r%d, r%d\n", rd, ra, rb);
                 tcg_gen_shl_tl(cpu_R[rd], cpu_R[ra], cpu_R[rb]);
-                break;
-            case 0x01:    /* l.srl */
+                return;
+            case 1: /* l.srl */
                 LOG_DIS("l.srl r%d, r%d, r%d\n", rd, ra, rb);
                 tcg_gen_shr_tl(cpu_R[rd], cpu_R[ra], cpu_R[rb]);
-                break;
-            case 0x02:    /* l.sra */
+                return;
+            case 2: /* l.sra */
                 LOG_DIS("l.sra r%d, r%d, r%d\n", rd, ra, rb);
                 tcg_gen_sar_tl(cpu_R[rd], cpu_R[ra], cpu_R[rb]);
-                break;
-            case 0x03:    /* l.ror */
+                return;
+            case 3: /* l.ror */
                 LOG_DIS("l.ror r%d, r%d, r%d\n", rd, ra, rb);
                 tcg_gen_rotr_tl(cpu_R[rd], cpu_R[ra], cpu_R[rb]);
-                break;
-
-            default:
-                gen_illegal_exception(dc);
-                break;
+                return;
             }
             break;
 
-        default:
-            gen_illegal_exception(dc);
-            break;
-        }
-        break;
-
-    case 0x000c:
-        switch (op1) {
-        case 0x00:
+        case 0xc:
             switch (op2) {
-            case 0x00:    /* l.exths */
+            case 0: /* l.exths */
                 LOG_DIS("l.exths r%d, r%d\n", rd, ra);
                 tcg_gen_ext16s_tl(cpu_R[rd], cpu_R[ra]);
-                break;
-            case 0x01:    /* l.extbs */
+                return;
+            case 1: /* l.extbs */
                 LOG_DIS("l.extbs r%d, r%d\n", rd, ra);
                 tcg_gen_ext8s_tl(cpu_R[rd], cpu_R[ra]);
-                break;
-            case 0x02:    /* l.exthz */
+                return;
+            case 2: /* l.exthz */
                 LOG_DIS("l.exthz r%d, r%d\n", rd, ra);
                 tcg_gen_ext16u_tl(cpu_R[rd], cpu_R[ra]);
-                break;
-            case 0x03:    /* l.extbz */
+                return;
+            case 3: /* l.extbz */
                 LOG_DIS("l.extbz r%d, r%d\n", rd, ra);
                 tcg_gen_ext8u_tl(cpu_R[rd], cpu_R[ra]);
-                break;
-
-            default:
-                gen_illegal_exception(dc);
-                break;
+                return;
             }
             break;
 
-        default:
-            gen_illegal_exception(dc);
-            break;
-        }
-        break;
-
-    case 0x000d:
-        switch (op1) {
-        case 0x00:
+        case 0xd:
             switch (op2) {
-            case 0x00:    /* l.extws */
+            case 0: /* l.extws */
                 LOG_DIS("l.extws r%d, r%d\n", rd, ra);
                 tcg_gen_ext32s_tl(cpu_R[rd], cpu_R[ra]);
-                break;
-            case 0x01:    /* l.extwz */
+                return;
+            case 1: /* l.extwz */
                 LOG_DIS("l.extwz r%d, r%d\n", rd, ra);
                 tcg_gen_ext32u_tl(cpu_R[rd], cpu_R[ra]);
-                break;
-
-            default:
-                gen_illegal_exception(dc);
-                break;
+                return;
             }
             break;
 
-        default:
-            gen_illegal_exception(dc);
-            break;
+        case 0xe: /* l.cmov */
+            LOG_DIS("l.cmov r%d, r%d, r%d\n", rd, ra, rb);
+            {
+                TCGv zero = tcg_const_tl(0);
+                tcg_gen_movcond_tl(TCG_COND_NE, cpu_R[rd], cpu_sr_f, zero,
+                                   cpu_R[ra], cpu_R[rb]);
+                tcg_temp_free(zero);
+            }
+            return;
+
+        case 0xf: /* l.ff1 */
+            LOG_DIS("l.ff1 r%d, r%d, r%d\n", rd, ra, rb);
+            tcg_gen_ctzi_tl(cpu_R[rd], cpu_R[ra], -1);
+            tcg_gen_addi_tl(cpu_R[rd], cpu_R[rd], 1);
+            return;
         }
         break;
 
-    default:
-        gen_illegal_exception(dc);
+    case 1:
+        switch (op0) {
+        case 0xf: /* l.fl1 */
+            LOG_DIS("l.fl1 r%d, r%d, r%d\n", rd, ra, rb);
+            tcg_gen_clzi_tl(cpu_R[rd], cpu_R[ra], TARGET_LONG_BITS);
+            tcg_gen_subfi_tl(cpu_R[rd], TARGET_LONG_BITS, cpu_R[rd]);
+            return;
+        }
+        break;
+
+    case 2:
+        break;
+
+    case 3:
+        switch (op0) {
+        case 0x6: /* l.mul */
+            LOG_DIS("l.mul r%d, r%d, r%d\n", rd, ra, rb);
+            gen_mul(dc, cpu_R[rd], cpu_R[ra], cpu_R[rb]);
+            return;
+
+        case 0x7: /* l.muld */
+            LOG_DIS("l.muld r%d, r%d\n", ra, rb);
+            gen_muld(dc, cpu_R[ra], cpu_R[rb]);
+            break;
+
+        case 0x9: /* l.div */
+            LOG_DIS("l.div r%d, r%d, r%d\n", rd, ra, rb);
+            gen_div(dc, cpu_R[rd], cpu_R[ra], cpu_R[rb]);
+            return;
+
+        case 0xa: /* l.divu */
+            LOG_DIS("l.divu r%d, r%d, r%d\n", rd, ra, rb);
+            gen_divu(dc, cpu_R[rd], cpu_R[ra], cpu_R[rb]);
+            return;
+
+        case 0xb: /* l.mulu */
+            LOG_DIS("l.mulu r%d, r%d, r%d\n", rd, ra, rb);
+            gen_mulu(dc, cpu_R[rd], cpu_R[ra], cpu_R[rb]);
+            return;
+
+        case 0xc: /* l.muldu */
+            LOG_DIS("l.muldu r%d, r%d\n", ra, rb);
+            gen_muldu(dc, cpu_R[ra], cpu_R[rb]);
+            return;
+        }
         break;
     }
+    gen_illegal_exception(dc);
 }
 
 static void dec_misc(DisasContext *dc, uint32_t insn)
 {
     uint32_t op0, op1;
     uint32_t ra, rb, rd;
-#ifdef OPENRISC_DISAS
-    uint32_t L6, K5;
-#endif
-    uint32_t I16, I5, I11, N26, tmp;
+    uint32_t L6, K5, K16, K5_11;
+    int32_t I16, I5_11, N26;
     TCGMemOp mop;
+    TCGv t0;
 
     op0 = extract32(insn, 26, 6);
     op1 = extract32(insn, 24, 2);
     ra = extract32(insn, 16, 5);
     rb = extract32(insn, 11, 5);
     rd = extract32(insn, 21, 5);
-#ifdef OPENRISC_DISAS
     L6 = extract32(insn, 5, 6);
     K5 = extract32(insn, 0, 5);
-#endif
-    I16 = extract32(insn, 0, 16);
-    I5 = extract32(insn, 21, 5);
-    I11 = extract32(insn, 0, 11);
-    N26 = extract32(insn, 0, 26);
-    tmp = (I5<<11) + I11;
+    K16 = extract32(insn, 0, 16);
+    I16 = (int16_t)K16;
+    N26 = sextract32(insn, 0, 26);
+    K5_11 = (extract32(insn, 21, 5) << 11) | extract32(insn, 0, 11);
+    I5_11 = (int16_t)K5_11;
 
     switch (op0) {
     case 0x00:    /* l.j */
@@ -783,24 +774,10 @@ static void dec_misc(DisasContext *dc, uint32_t insn)
         break;
 
     case 0x13:    /* l.maci */
-        LOG_DIS("l.maci %d, r%d, %d\n", I5, ra, I11);
-        {
-            TCGv_i64 t1 = tcg_temp_new_i64();
-            TCGv_i64 t2 = tcg_temp_new_i64();
-            TCGv_i32 dst = tcg_temp_new_i32();
-            TCGv ttmp = tcg_const_tl(tmp);
-            tcg_gen_mul_tl(dst, cpu_R[ra], ttmp);
-            tcg_gen_ext_i32_i64(t1, dst);
-            tcg_gen_concat_i32_i64(t2, maclo, machi);
-            tcg_gen_add_i64(t2, t2, t1);
-            tcg_gen_extrl_i64_i32(maclo, t2);
-            tcg_gen_shri_i64(t2, t2, 32);
-            tcg_gen_extrl_i64_i32(machi, t2);
-            tcg_temp_free_i32(dst);
-            tcg_temp_free(ttmp);
-            tcg_temp_free_i64(t1);
-            tcg_temp_free_i64(t2);
-        }
+        LOG_DIS("l.maci r%d, %d\n", ra, I16);
+        t0 = tcg_const_tl(I16);
+        gen_mac(dc, cpu_R[ra], t0);
+        tcg_temp_free(t0);
         break;
 
     case 0x09:    /* l.rfe */
@@ -819,6 +796,12 @@ static void dec_misc(DisasContext *dc, uint32_t insn)
         }
         break;
 
+    case 0x1b: /* l.lwa */
+        LOG_DIS("l.lwa r%d, r%d, %d\n", rd, ra, I16);
+        check_r0_write(rd);
+        gen_lwa(dc, cpu_R[rd], cpu_R[ra], I16);
+        break;
+
     case 0x1c:    /* l.cust1 */
         LOG_DIS("l.cust1\n");
         break;
@@ -891,117 +874,63 @@ static void dec_misc(DisasContext *dc, uint32_t insn)
         goto do_load;
 
     do_load:
-        {
-            TCGv t0 = tcg_temp_new();
-            tcg_gen_addi_tl(t0, cpu_R[ra], sign_extend(I16, 16));
-            tcg_gen_qemu_ld_tl(cpu_R[rd], t0, dc->mem_idx, mop);
-            tcg_temp_free(t0);
-        }
+        check_r0_write(rd);
+        t0 = tcg_temp_new();
+        tcg_gen_addi_tl(t0, cpu_R[ra], I16);
+        tcg_gen_qemu_ld_tl(cpu_R[rd], t0, dc->mem_idx, mop);
+        tcg_temp_free(t0);
         break;
 
     case 0x27:    /* l.addi */
         LOG_DIS("l.addi r%d, r%d, %d\n", rd, ra, I16);
-        {
-            if (I16 == 0) {
-                tcg_gen_mov_tl(cpu_R[rd], cpu_R[ra]);
-            } else {
-                TCGLabel *lab = gen_new_label();
-                TCGv_i64 ta = tcg_temp_new_i64();
-                TCGv_i64 td = tcg_temp_local_new_i64();
-                TCGv_i32 res = tcg_temp_local_new_i32();
-                TCGv_i32 sr_ove = tcg_temp_local_new_i32();
-                tcg_gen_extu_i32_i64(ta, cpu_R[ra]);
-                tcg_gen_addi_i64(td, ta, sign_extend(I16, 16));
-                tcg_gen_extrl_i64_i32(res, td);
-                tcg_gen_shri_i64(td, td, 32);
-                tcg_gen_andi_i64(td, td, 0x3);
-                /* Jump to lab when no overflow.  */
-                tcg_gen_brcondi_i64(TCG_COND_EQ, td, 0x0, lab);
-                tcg_gen_brcondi_i64(TCG_COND_EQ, td, 0x3, lab);
-                tcg_gen_ori_i32(cpu_sr, cpu_sr, (SR_OV | SR_CY));
-                tcg_gen_andi_i32(sr_ove, cpu_sr, SR_OVE);
-                tcg_gen_brcondi_i32(TCG_COND_NE, sr_ove, SR_OVE, lab);
-                gen_exception(dc, EXCP_RANGE);
-                gen_set_label(lab);
-                tcg_gen_mov_i32(cpu_R[rd], res);
-                tcg_temp_free_i64(ta);
-                tcg_temp_free_i64(td);
-                tcg_temp_free_i32(res);
-                tcg_temp_free_i32(sr_ove);
-            }
-        }
+        check_r0_write(rd);
+        t0 = tcg_const_tl(I16);
+        gen_add(dc, cpu_R[rd], cpu_R[ra], t0);
+        tcg_temp_free(t0);
         break;
 
     case 0x28:    /* l.addic */
         LOG_DIS("l.addic r%d, r%d, %d\n", rd, ra, I16);
-        {
-            TCGLabel *lab = gen_new_label();
-            TCGv_i64 ta = tcg_temp_new_i64();
-            TCGv_i64 td = tcg_temp_local_new_i64();
-            TCGv_i64 tcy = tcg_temp_local_new_i64();
-            TCGv_i32 res = tcg_temp_local_new_i32();
-            TCGv_i32 sr_cy = tcg_temp_local_new_i32();
-            TCGv_i32 sr_ove = tcg_temp_local_new_i32();
-            tcg_gen_extu_i32_i64(ta, cpu_R[ra]);
-            tcg_gen_andi_i32(sr_cy, cpu_sr, SR_CY);
-            tcg_gen_shri_i32(sr_cy, sr_cy, 10);
-            tcg_gen_extu_i32_i64(tcy, sr_cy);
-            tcg_gen_addi_i64(td, ta, sign_extend(I16, 16));
-            tcg_gen_add_i64(td, td, tcy);
-            tcg_gen_extrl_i64_i32(res, td);
-            tcg_gen_shri_i64(td, td, 32);
-            tcg_gen_andi_i64(td, td, 0x3);
-            /* Jump to lab when no overflow.  */
-            tcg_gen_brcondi_i64(TCG_COND_EQ, td, 0x0, lab);
-            tcg_gen_brcondi_i64(TCG_COND_EQ, td, 0x3, lab);
-            tcg_gen_ori_i32(cpu_sr, cpu_sr, (SR_OV | SR_CY));
-            tcg_gen_andi_i32(sr_ove, cpu_sr, SR_OVE);
-            tcg_gen_brcondi_i32(TCG_COND_NE, sr_ove, SR_OVE, lab);
-            gen_exception(dc, EXCP_RANGE);
-            gen_set_label(lab);
-            tcg_gen_mov_i32(cpu_R[rd], res);
-            tcg_temp_free_i64(ta);
-            tcg_temp_free_i64(td);
-            tcg_temp_free_i64(tcy);
-            tcg_temp_free_i32(res);
-            tcg_temp_free_i32(sr_cy);
-            tcg_temp_free_i32(sr_ove);
-        }
+        check_r0_write(rd);
+        t0 = tcg_const_tl(I16);
+        gen_addc(dc, cpu_R[rd], cpu_R[ra], t0);
+        tcg_temp_free(t0);
         break;
 
     case 0x29:    /* l.andi */
-        LOG_DIS("l.andi r%d, r%d, %d\n", rd, ra, I16);
-        tcg_gen_andi_tl(cpu_R[rd], cpu_R[ra], zero_extend(I16, 16));
+        LOG_DIS("l.andi r%d, r%d, %d\n", rd, ra, K16);
+        check_r0_write(rd);
+        tcg_gen_andi_tl(cpu_R[rd], cpu_R[ra], K16);
         break;
 
     case 0x2a:    /* l.ori */
-        LOG_DIS("l.ori r%d, r%d, %d\n", rd, ra, I16);
-        tcg_gen_ori_tl(cpu_R[rd], cpu_R[ra], zero_extend(I16, 16));
+        LOG_DIS("l.ori r%d, r%d, %d\n", rd, ra, K16);
+        check_r0_write(rd);
+        tcg_gen_ori_tl(cpu_R[rd], cpu_R[ra], K16);
         break;
 
     case 0x2b:    /* l.xori */
         LOG_DIS("l.xori r%d, r%d, %d\n", rd, ra, I16);
-        tcg_gen_xori_tl(cpu_R[rd], cpu_R[ra], sign_extend(I16, 16));
+        check_r0_write(rd);
+        tcg_gen_xori_tl(cpu_R[rd], cpu_R[ra], I16);
         break;
 
     case 0x2c:    /* l.muli */
         LOG_DIS("l.muli r%d, r%d, %d\n", rd, ra, I16);
-        if (ra != 0 && I16 != 0) {
-            TCGv_i32 im = tcg_const_i32(I16);
-            gen_helper_mul32(cpu_R[rd], cpu_env, cpu_R[ra], im);
-            tcg_temp_free_i32(im);
-        } else {
-            tcg_gen_movi_tl(cpu_R[rd], 0x0);
-        }
+        check_r0_write(rd);
+        t0 = tcg_const_tl(I16);
+        gen_mul(dc, cpu_R[rd], cpu_R[ra], t0);
+        tcg_temp_free(t0);
         break;
 
     case 0x2d:    /* l.mfspr */
-        LOG_DIS("l.mfspr r%d, r%d, %d\n", rd, ra, I16);
+        LOG_DIS("l.mfspr r%d, r%d, %d\n", rd, ra, K16);
+        check_r0_write(rd);
         {
 #if defined(CONFIG_USER_ONLY)
             return;
 #else
-            TCGv_i32 ti = tcg_const_i32(I16);
+            TCGv_i32 ti = tcg_const_i32(K16);
             if (dc->mem_idx == MMU_USER_IDX) {
                 gen_illegal_exception(dc);
                 return;
@@ -1013,12 +942,12 @@ static void dec_misc(DisasContext *dc, uint32_t insn)
         break;
 
     case 0x30:    /* l.mtspr */
-        LOG_DIS("l.mtspr %d, r%d, r%d, %d\n", I5, ra, rb, I11);
+        LOG_DIS("l.mtspr r%d, r%d, %d\n", ra, rb, K5_11);
         {
 #if defined(CONFIG_USER_ONLY)
             return;
 #else
-            TCGv_i32 im = tcg_const_i32(tmp);
+            TCGv_i32 im = tcg_const_i32(K5_11);
             if (dc->mem_idx == MMU_USER_IDX) {
                 gen_illegal_exception(dc);
                 return;
@@ -1029,34 +958,39 @@ static void dec_misc(DisasContext *dc, uint32_t insn)
         }
         break;
 
+    case 0x33: /* l.swa */
+        LOG_DIS("l.swa r%d, r%d, %d\n", ra, rb, I5_11);
+        gen_swa(dc, rb, cpu_R[ra], I5_11);
+        break;
+
 /* not used yet, open it when we need or64.  */
 /*#ifdef TARGET_OPENRISC64
     case 0x34:     l.sd
-        LOG_DIS("l.sd %d, r%d, r%d, %d\n", I5, ra, rb, I11);
+        LOG_DIS("l.sd r%d, r%d, %d\n", ra, rb, I5_11);
         check_ob64s(dc);
         mop = MO_TEQ;
         goto do_store;
 #endif*/
 
     case 0x35:    /* l.sw */
-        LOG_DIS("l.sw %d, r%d, r%d, %d\n", I5, ra, rb, I11);
+        LOG_DIS("l.sw r%d, r%d, %d\n", ra, rb, I5_11);
         mop = MO_TEUL;
         goto do_store;
 
     case 0x36:    /* l.sb */
-        LOG_DIS("l.sb %d, r%d, r%d, %d\n", I5, ra, rb, I11);
+        LOG_DIS("l.sb r%d, r%d, %d\n", ra, rb, I5_11);
         mop = MO_UB;
         goto do_store;
 
     case 0x37:    /* l.sh */
-        LOG_DIS("l.sh %d, r%d, r%d, %d\n", I5, ra, rb, I11);
+        LOG_DIS("l.sh r%d, r%d, %d\n", ra, rb, I5_11);
         mop = MO_TEUW;
         goto do_store;
 
     do_store:
         {
             TCGv t0 = tcg_temp_new();
-            tcg_gen_addi_tl(t0, cpu_R[ra], sign_extend(tmp, 16));
+            tcg_gen_addi_tl(t0, cpu_R[ra], I5_11);
             tcg_gen_qemu_st_tl(cpu_R[rb], t0, dc->mem_idx, mop);
             tcg_temp_free(t0);
         }
@@ -1079,40 +1013,22 @@ static void dec_mac(DisasContext *dc, uint32_t insn)
     switch (op0) {
     case 0x0001:    /* l.mac */
         LOG_DIS("l.mac r%d, r%d\n", ra, rb);
-        {
-            TCGv_i32 t0 = tcg_temp_new_i32();
-            TCGv_i64 t1 = tcg_temp_new_i64();
-            TCGv_i64 t2 = tcg_temp_new_i64();
-            tcg_gen_mul_tl(t0, cpu_R[ra], cpu_R[rb]);
-            tcg_gen_ext_i32_i64(t1, t0);
-            tcg_gen_concat_i32_i64(t2, maclo, machi);
-            tcg_gen_add_i64(t2, t2, t1);
-            tcg_gen_extrl_i64_i32(maclo, t2);
-            tcg_gen_shri_i64(t2, t2, 32);
-            tcg_gen_extrl_i64_i32(machi, t2);
-            tcg_temp_free_i32(t0);
-            tcg_temp_free_i64(t1);
-            tcg_temp_free_i64(t2);
-        }
+        gen_mac(dc, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0x0002:    /* l.msb */
         LOG_DIS("l.msb r%d, r%d\n", ra, rb);
-        {
-            TCGv_i32 t0 = tcg_temp_new_i32();
-            TCGv_i64 t1 = tcg_temp_new_i64();
-            TCGv_i64 t2 = tcg_temp_new_i64();
-            tcg_gen_mul_tl(t0, cpu_R[ra], cpu_R[rb]);
-            tcg_gen_ext_i32_i64(t1, t0);
-            tcg_gen_concat_i32_i64(t2, maclo, machi);
-            tcg_gen_sub_i64(t2, t2, t1);
-            tcg_gen_extrl_i64_i32(maclo, t2);
-            tcg_gen_shri_i64(t2, t2, 32);
-            tcg_gen_extrl_i64_i32(machi, t2);
-            tcg_temp_free_i32(t0);
-            tcg_temp_free_i64(t1);
-            tcg_temp_free_i64(t2);
-        }
+        gen_msb(dc, cpu_R[ra], cpu_R[rb]);
+        break;
+
+    case 0x0003:    /* l.macu */
+        LOG_DIS("l.macu r%d, r%d\n", ra, rb);
+        gen_macu(dc, cpu_R[ra], cpu_R[rb]);
+        break;
+
+    case 0x0004:    /* l.msbu */
+        LOG_DIS("l.msbu r%d, r%d\n", ra, rb);
+        gen_msbu(dc, cpu_R[ra], cpu_R[rb]);
         break;
 
     default:
@@ -1124,30 +1040,33 @@ static void dec_mac(DisasContext *dc, uint32_t insn)
 static void dec_logic(DisasContext *dc, uint32_t insn)
 {
     uint32_t op0;
-    uint32_t rd, ra, L6;
+    uint32_t rd, ra, L6, S6;
     op0 = extract32(insn, 6, 2);
     rd = extract32(insn, 21, 5);
     ra = extract32(insn, 16, 5);
     L6 = extract32(insn, 0, 6);
+    S6 = L6 & (TARGET_LONG_BITS - 1);
 
+    check_r0_write(rd);
     switch (op0) {
     case 0x00:    /* l.slli */
         LOG_DIS("l.slli r%d, r%d, %d\n", rd, ra, L6);
-        tcg_gen_shli_tl(cpu_R[rd], cpu_R[ra], (L6 & 0x1f));
+        tcg_gen_shli_tl(cpu_R[rd], cpu_R[ra], S6);
         break;
 
     case 0x01:    /* l.srli */
         LOG_DIS("l.srli r%d, r%d, %d\n", rd, ra, L6);
-        tcg_gen_shri_tl(cpu_R[rd], cpu_R[ra], (L6 & 0x1f));
+        tcg_gen_shri_tl(cpu_R[rd], cpu_R[ra], S6);
         break;
 
     case 0x02:    /* l.srai */
         LOG_DIS("l.srai r%d, r%d, %d\n", rd, ra, L6);
-        tcg_gen_sari_tl(cpu_R[rd], cpu_R[ra], (L6 & 0x1f)); break;
+        tcg_gen_sari_tl(cpu_R[rd], cpu_R[ra], S6);
+        break;
 
     case 0x03:    /* l.rori */
         LOG_DIS("l.rori r%d, r%d, %d\n", rd, ra, L6);
-        tcg_gen_rotri_tl(cpu_R[rd], cpu_R[ra], (L6 & 0x1f));
+        tcg_gen_rotri_tl(cpu_R[rd], cpu_R[ra], S6);
         break;
 
     default:
@@ -1165,6 +1084,7 @@ static void dec_M(DisasContext *dc, uint32_t insn)
     rd = extract32(insn, 21, 5);
     K16 = extract32(insn, 0, 16);
 
+    check_r0_write(rd);
     switch (op0) {
     case 0x0:    /* l.movhi */
         LOG_DIS("l.movhi  r%d, %d\n", rd, K16);
@@ -1173,9 +1093,8 @@ static void dec_M(DisasContext *dc, uint32_t insn)
 
     case 0x1:    /* l.macrc */
         LOG_DIS("l.macrc  r%d\n", rd);
-        tcg_gen_mov_tl(cpu_R[rd], maclo);
-        tcg_gen_movi_tl(maclo, 0x0);
-        tcg_gen_movi_tl(machi, 0x0);
+        tcg_gen_trunc_i64_tl(cpu_R[rd], cpu_mac);
+        tcg_gen_movi_i64(cpu_mac, 0);
         break;
 
     default:
@@ -1193,7 +1112,6 @@ static void dec_comp(DisasContext *dc, uint32_t insn)
     ra = extract32(insn, 16, 5);
     rb = extract32(insn, 11, 5);
 
-    tcg_gen_movi_i32(env_btaken, 0x0);
     /* unsigned integers  */
     tcg_gen_ext32u_tl(cpu_R[ra], cpu_R[ra]);
     tcg_gen_ext32u_tl(cpu_R[rb], cpu_R[rb]);
@@ -1201,141 +1119,133 @@ static void dec_comp(DisasContext *dc, uint32_t insn)
     switch (op0) {
     case 0x0:    /* l.sfeq */
         LOG_DIS("l.sfeq  r%d, r%d\n", ra, rb);
-        tcg_gen_setcond_tl(TCG_COND_EQ, env_btaken, cpu_R[ra], cpu_R[rb]);
+        tcg_gen_setcond_tl(TCG_COND_EQ, cpu_sr_f, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0x1:    /* l.sfne */
         LOG_DIS("l.sfne  r%d, r%d\n", ra, rb);
-        tcg_gen_setcond_tl(TCG_COND_NE, env_btaken, cpu_R[ra], cpu_R[rb]);
+        tcg_gen_setcond_tl(TCG_COND_NE, cpu_sr_f, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0x2:    /* l.sfgtu */
         LOG_DIS("l.sfgtu  r%d, r%d\n", ra, rb);
-        tcg_gen_setcond_tl(TCG_COND_GTU, env_btaken, cpu_R[ra], cpu_R[rb]);
+        tcg_gen_setcond_tl(TCG_COND_GTU, cpu_sr_f, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0x3:    /* l.sfgeu */
         LOG_DIS("l.sfgeu  r%d, r%d\n", ra, rb);
-        tcg_gen_setcond_tl(TCG_COND_GEU, env_btaken, cpu_R[ra], cpu_R[rb]);
+        tcg_gen_setcond_tl(TCG_COND_GEU, cpu_sr_f, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0x4:    /* l.sfltu */
         LOG_DIS("l.sfltu  r%d, r%d\n", ra, rb);
-        tcg_gen_setcond_tl(TCG_COND_LTU, env_btaken, cpu_R[ra], cpu_R[rb]);
+        tcg_gen_setcond_tl(TCG_COND_LTU, cpu_sr_f, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0x5:    /* l.sfleu */
         LOG_DIS("l.sfleu  r%d, r%d\n", ra, rb);
-        tcg_gen_setcond_tl(TCG_COND_LEU, env_btaken, cpu_R[ra], cpu_R[rb]);
+        tcg_gen_setcond_tl(TCG_COND_LEU, cpu_sr_f, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0xa:    /* l.sfgts */
         LOG_DIS("l.sfgts  r%d, r%d\n", ra, rb);
-        tcg_gen_setcond_tl(TCG_COND_GT, env_btaken, cpu_R[ra], cpu_R[rb]);
+        tcg_gen_setcond_tl(TCG_COND_GT, cpu_sr_f, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0xb:    /* l.sfges */
         LOG_DIS("l.sfges  r%d, r%d\n", ra, rb);
-        tcg_gen_setcond_tl(TCG_COND_GE, env_btaken, cpu_R[ra], cpu_R[rb]);
+        tcg_gen_setcond_tl(TCG_COND_GE, cpu_sr_f, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0xc:    /* l.sflts */
         LOG_DIS("l.sflts  r%d, r%d\n", ra, rb);
-        tcg_gen_setcond_tl(TCG_COND_LT, env_btaken, cpu_R[ra], cpu_R[rb]);
+        tcg_gen_setcond_tl(TCG_COND_LT, cpu_sr_f, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0xd:    /* l.sfles */
         LOG_DIS("l.sfles  r%d, r%d\n", ra, rb);
-        tcg_gen_setcond_tl(TCG_COND_LE, env_btaken, cpu_R[ra], cpu_R[rb]);
+        tcg_gen_setcond_tl(TCG_COND_LE, cpu_sr_f, cpu_R[ra], cpu_R[rb]);
         break;
 
     default:
         gen_illegal_exception(dc);
         break;
     }
-    wb_SR_F();
 }
 
 static void dec_compi(DisasContext *dc, uint32_t insn)
 {
-    uint32_t op0;
-    uint32_t ra, I16;
+    uint32_t op0, ra;
+    int32_t I16;
 
     op0 = extract32(insn, 21, 5);
     ra = extract32(insn, 16, 5);
-    I16 = extract32(insn, 0, 16);
-
-    tcg_gen_movi_i32(env_btaken, 0x0);
-    I16 = sign_extend(I16, 16);
+    I16 = sextract32(insn, 0, 16);
 
     switch (op0) {
     case 0x0:    /* l.sfeqi */
         LOG_DIS("l.sfeqi  r%d, %d\n", ra, I16);
-        tcg_gen_setcondi_tl(TCG_COND_EQ, env_btaken, cpu_R[ra], I16);
+        tcg_gen_setcondi_tl(TCG_COND_EQ, cpu_sr_f, cpu_R[ra], I16);
         break;
 
     case 0x1:    /* l.sfnei */
         LOG_DIS("l.sfnei  r%d, %d\n", ra, I16);
-        tcg_gen_setcondi_tl(TCG_COND_NE, env_btaken, cpu_R[ra], I16);
+        tcg_gen_setcondi_tl(TCG_COND_NE, cpu_sr_f, cpu_R[ra], I16);
         break;
 
     case 0x2:    /* l.sfgtui */
         LOG_DIS("l.sfgtui  r%d, %d\n", ra, I16);
-        tcg_gen_setcondi_tl(TCG_COND_GTU, env_btaken, cpu_R[ra], I16);
+        tcg_gen_setcondi_tl(TCG_COND_GTU, cpu_sr_f, cpu_R[ra], I16);
         break;
 
     case 0x3:    /* l.sfgeui */
         LOG_DIS("l.sfgeui  r%d, %d\n", ra, I16);
-        tcg_gen_setcondi_tl(TCG_COND_GEU, env_btaken, cpu_R[ra], I16);
+        tcg_gen_setcondi_tl(TCG_COND_GEU, cpu_sr_f, cpu_R[ra], I16);
         break;
 
     case 0x4:    /* l.sfltui */
         LOG_DIS("l.sfltui  r%d, %d\n", ra, I16);
-        tcg_gen_setcondi_tl(TCG_COND_LTU, env_btaken, cpu_R[ra], I16);
+        tcg_gen_setcondi_tl(TCG_COND_LTU, cpu_sr_f, cpu_R[ra], I16);
         break;
 
     case 0x5:    /* l.sfleui */
         LOG_DIS("l.sfleui  r%d, %d\n", ra, I16);
-        tcg_gen_setcondi_tl(TCG_COND_LEU, env_btaken, cpu_R[ra], I16);
+        tcg_gen_setcondi_tl(TCG_COND_LEU, cpu_sr_f, cpu_R[ra], I16);
         break;
 
     case 0xa:    /* l.sfgtsi */
         LOG_DIS("l.sfgtsi  r%d, %d\n", ra, I16);
-        tcg_gen_setcondi_tl(TCG_COND_GT, env_btaken, cpu_R[ra], I16);
+        tcg_gen_setcondi_tl(TCG_COND_GT, cpu_sr_f, cpu_R[ra], I16);
         break;
 
     case 0xb:    /* l.sfgesi */
         LOG_DIS("l.sfgesi  r%d, %d\n", ra, I16);
-        tcg_gen_setcondi_tl(TCG_COND_GE, env_btaken, cpu_R[ra], I16);
+        tcg_gen_setcondi_tl(TCG_COND_GE, cpu_sr_f, cpu_R[ra], I16);
         break;
 
     case 0xc:    /* l.sfltsi */
         LOG_DIS("l.sfltsi  r%d, %d\n", ra, I16);
-        tcg_gen_setcondi_tl(TCG_COND_LT, env_btaken, cpu_R[ra], I16);
+        tcg_gen_setcondi_tl(TCG_COND_LT, cpu_sr_f, cpu_R[ra], I16);
         break;
 
     case 0xd:    /* l.sflesi */
         LOG_DIS("l.sflesi  r%d, %d\n", ra, I16);
-        tcg_gen_setcondi_tl(TCG_COND_LE, env_btaken, cpu_R[ra], I16);
+        tcg_gen_setcondi_tl(TCG_COND_LE, cpu_sr_f, cpu_R[ra], I16);
         break;
 
     default:
         gen_illegal_exception(dc);
         break;
     }
-    wb_SR_F();
 }
 
 static void dec_sys(DisasContext *dc, uint32_t insn)
 {
     uint32_t op0;
-#ifdef OPENRISC_DISAS
     uint32_t K16;
-#endif
+
     op0 = extract32(insn, 16, 10);
-#ifdef OPENRISC_DISAS
     K16 = extract32(insn, 0, 16);
-#endif
 
     switch (op0) {
     case 0x000:    /* l.sys */
@@ -1347,52 +1257,21 @@ static void dec_sys(DisasContext *dc, uint32_t insn)
 
     case 0x100:    /* l.trap */
         LOG_DIS("l.trap %d\n", K16);
-#if defined(CONFIG_USER_ONLY)
-        return;
-#else
-        if (dc->mem_idx == MMU_USER_IDX) {
-            gen_illegal_exception(dc);
-            return;
-        }
         tcg_gen_movi_tl(cpu_pc, dc->pc);
         gen_exception(dc, EXCP_TRAP);
-#endif
         break;
 
     case 0x300:    /* l.csync */
         LOG_DIS("l.csync\n");
-#if defined(CONFIG_USER_ONLY)
-        return;
-#else
-        if (dc->mem_idx == MMU_USER_IDX) {
-            gen_illegal_exception(dc);
-            return;
-        }
-#endif
         break;
 
     case 0x200:    /* l.msync */
         LOG_DIS("l.msync\n");
-#if defined(CONFIG_USER_ONLY)
-        return;
-#else
-        if (dc->mem_idx == MMU_USER_IDX) {
-            gen_illegal_exception(dc);
-            return;
-        }
-#endif
+        tcg_gen_mb(TCG_MO_ALL);
         break;
 
     case 0x270:    /* l.psync */
         LOG_DIS("l.psync\n");
-#if defined(CONFIG_USER_ONLY)
-        return;
-#else
-        if (dc->mem_idx == MMU_USER_IDX) {
-            gen_illegal_exception(dc);
-            return;
-        }
-#endif
         break;
 
     default:
@@ -1413,78 +1292,81 @@ static void dec_float(DisasContext *dc, uint32_t insn)
     switch (op0) {
     case 0x00:    /* lf.add.s */
         LOG_DIS("lf.add.s r%d, r%d, r%d\n", rd, ra, rb);
+        check_r0_write(rd);
         gen_helper_float_add_s(cpu_R[rd], cpu_env, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0x01:    /* lf.sub.s */
         LOG_DIS("lf.sub.s r%d, r%d, r%d\n", rd, ra, rb);
+        check_r0_write(rd);
         gen_helper_float_sub_s(cpu_R[rd], cpu_env, cpu_R[ra], cpu_R[rb]);
         break;
 
-
     case 0x02:    /* lf.mul.s */
         LOG_DIS("lf.mul.s r%d, r%d, r%d\n", rd, ra, rb);
-        if (ra != 0 && rb != 0) {
-            gen_helper_float_mul_s(cpu_R[rd], cpu_env, cpu_R[ra], cpu_R[rb]);
-        } else {
-            tcg_gen_ori_tl(fpcsr, fpcsr, FPCSR_ZF);
-            tcg_gen_movi_i32(cpu_R[rd], 0x0);
-        }
+        check_r0_write(rd);
+        gen_helper_float_mul_s(cpu_R[rd], cpu_env, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0x03:    /* lf.div.s */
         LOG_DIS("lf.div.s r%d, r%d, r%d\n", rd, ra, rb);
+        check_r0_write(rd);
         gen_helper_float_div_s(cpu_R[rd], cpu_env, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0x04:    /* lf.itof.s */
         LOG_DIS("lf.itof r%d, r%d\n", rd, ra);
+        check_r0_write(rd);
         gen_helper_itofs(cpu_R[rd], cpu_env, cpu_R[ra]);
         break;
 
     case 0x05:    /* lf.ftoi.s */
         LOG_DIS("lf.ftoi r%d, r%d\n", rd, ra);
+        check_r0_write(rd);
         gen_helper_ftois(cpu_R[rd], cpu_env, cpu_R[ra]);
         break;
 
     case 0x06:    /* lf.rem.s */
         LOG_DIS("lf.rem.s r%d, r%d, r%d\n", rd, ra, rb);
+        check_r0_write(rd);
         gen_helper_float_rem_s(cpu_R[rd], cpu_env, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0x07:    /* lf.madd.s */
         LOG_DIS("lf.madd.s r%d, r%d, r%d\n", rd, ra, rb);
-        gen_helper_float_muladd_s(cpu_R[rd], cpu_env, cpu_R[ra], cpu_R[rb]);
+        check_r0_write(rd);
+        gen_helper_float_madd_s(cpu_R[rd], cpu_env, cpu_R[rd],
+                                cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0x08:    /* lf.sfeq.s */
         LOG_DIS("lf.sfeq.s r%d, r%d\n", ra, rb);
-        gen_helper_float_eq_s(env_btaken, cpu_env, cpu_R[ra], cpu_R[rb]);
+        gen_helper_float_eq_s(cpu_sr_f, cpu_env, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0x09:    /* lf.sfne.s */
         LOG_DIS("lf.sfne.s r%d, r%d\n", ra, rb);
-        gen_helper_float_ne_s(env_btaken, cpu_env, cpu_R[ra], cpu_R[rb]);
+        gen_helper_float_ne_s(cpu_sr_f, cpu_env, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0x0a:    /* lf.sfgt.s */
         LOG_DIS("lf.sfgt.s r%d, r%d\n", ra, rb);
-        gen_helper_float_gt_s(env_btaken, cpu_env, cpu_R[ra], cpu_R[rb]);
+        gen_helper_float_gt_s(cpu_sr_f, cpu_env, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0x0b:    /* lf.sfge.s */
         LOG_DIS("lf.sfge.s r%d, r%d\n", ra, rb);
-        gen_helper_float_ge_s(env_btaken, cpu_env, cpu_R[ra], cpu_R[rb]);
+        gen_helper_float_ge_s(cpu_sr_f, cpu_env, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0x0c:    /* lf.sflt.s */
         LOG_DIS("lf.sflt.s r%d, r%d\n", ra, rb);
-        gen_helper_float_lt_s(env_btaken, cpu_env, cpu_R[ra], cpu_R[rb]);
+        gen_helper_float_lt_s(cpu_sr_f, cpu_env, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0x0d:    /* lf.sfle.s */
         LOG_DIS("lf.sfle.s r%d, r%d\n", ra, rb);
-        gen_helper_float_le_s(env_btaken, cpu_env, cpu_R[ra], cpu_R[rb]);
+        gen_helper_float_le_s(cpu_sr_f, cpu_env, cpu_R[ra], cpu_R[rb]);
         break;
 
 /* not used yet, open it when we need or64.  */
@@ -1492,90 +1374,94 @@ static void dec_float(DisasContext *dc, uint32_t insn)
     case 0x10:     lf.add.d
         LOG_DIS("lf.add.d r%d, r%d, r%d\n", rd, ra, rb);
         check_of64s(dc);
+        check_r0_write(rd);
         gen_helper_float_add_d(cpu_R[rd], cpu_env, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0x11:     lf.sub.d
         LOG_DIS("lf.sub.d r%d, r%d, r%d\n", rd, ra, rb);
         check_of64s(dc);
+        check_r0_write(rd);
         gen_helper_float_sub_d(cpu_R[rd], cpu_env, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0x12:     lf.mul.d
         LOG_DIS("lf.mul.d r%d, r%d, r%d\n", rd, ra, rb);
         check_of64s(dc);
-        if (ra != 0 && rb != 0) {
-            gen_helper_float_mul_d(cpu_R[rd], cpu_env, cpu_R[ra], cpu_R[rb]);
-        } else {
-            tcg_gen_ori_tl(fpcsr, fpcsr, FPCSR_ZF);
-            tcg_gen_movi_i64(cpu_R[rd], 0x0);
-        }
+        check_r0_write(rd);
+        gen_helper_float_mul_d(cpu_R[rd], cpu_env, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0x13:     lf.div.d
         LOG_DIS("lf.div.d r%d, r%d, r%d\n", rd, ra, rb);
         check_of64s(dc);
+        check_r0_write(rd);
         gen_helper_float_div_d(cpu_R[rd], cpu_env, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0x14:     lf.itof.d
         LOG_DIS("lf.itof r%d, r%d\n", rd, ra);
         check_of64s(dc);
+        check_r0_write(rd);
         gen_helper_itofd(cpu_R[rd], cpu_env, cpu_R[ra]);
         break;
 
     case 0x15:     lf.ftoi.d
         LOG_DIS("lf.ftoi r%d, r%d\n", rd, ra);
         check_of64s(dc);
+        check_r0_write(rd);
         gen_helper_ftoid(cpu_R[rd], cpu_env, cpu_R[ra]);
         break;
 
     case 0x16:     lf.rem.d
         LOG_DIS("lf.rem.d r%d, r%d, r%d\n", rd, ra, rb);
         check_of64s(dc);
+        check_r0_write(rd);
         gen_helper_float_rem_d(cpu_R[rd], cpu_env, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0x17:     lf.madd.d
         LOG_DIS("lf.madd.d r%d, r%d, r%d\n", rd, ra, rb);
         check_of64s(dc);
-        gen_helper_float_muladd_d(cpu_R[rd], cpu_env, cpu_R[ra], cpu_R[rb]);
+        check_r0_write(rd);
+        gen_helper_float_madd_d(cpu_R[rd], cpu_env, cpu_R[rd],
+                                cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0x18:     lf.sfeq.d
         LOG_DIS("lf.sfeq.d r%d, r%d\n", ra, rb);
         check_of64s(dc);
-        gen_helper_float_eq_d(env_btaken, cpu_env, cpu_R[ra], cpu_R[rb]);
+        gen_helper_float_eq_d(cpu_sr_f, cpu_env, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0x1a:     lf.sfgt.d
         LOG_DIS("lf.sfgt.d r%d, r%d\n", ra, rb);
         check_of64s(dc);
-        gen_helper_float_gt_d(env_btaken, cpu_env, cpu_R[ra], cpu_R[rb]);
+        gen_helper_float_gt_d(cpu_sr_f, cpu_env, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0x1b:     lf.sfge.d
         LOG_DIS("lf.sfge.d r%d, r%d\n", ra, rb);
         check_of64s(dc);
-        gen_helper_float_ge_d(env_btaken, cpu_env, cpu_R[ra], cpu_R[rb]);
+        gen_helper_float_ge_d(cpu_sr_f, cpu_env, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0x19:     lf.sfne.d
         LOG_DIS("lf.sfne.d r%d, r%d\n", ra, rb);
         check_of64s(dc);
-        gen_helper_float_ne_d(env_btaken, cpu_env, cpu_R[ra], cpu_R[rb]);
+        gen_helper_float_ne_d(cpu_sr_f, cpu_env, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0x1c:     lf.sflt.d
         LOG_DIS("lf.sflt.d r%d, r%d\n", ra, rb);
         check_of64s(dc);
-        gen_helper_float_lt_d(env_btaken, cpu_env, cpu_R[ra], cpu_R[rb]);
+        gen_helper_float_lt_d(cpu_sr_f, cpu_env, cpu_R[ra], cpu_R[rb]);
         break;
 
     case 0x1d:     lf.sfle.d
         LOG_DIS("lf.sfle.d r%d, r%d\n", ra, rb);
         check_of64s(dc);
-        gen_helper_float_le_d(env_btaken, cpu_env, cpu_R[ra], cpu_R[rb]);
+        gen_helper_float_le_d(cpu_sr_f, cpu_env, cpu_R[ra], cpu_R[rb]);
         break;
 #endif*/
 
@@ -1583,7 +1469,6 @@ static void dec_float(DisasContext *dc, uint32_t insn)
         gen_illegal_exception(dc);
         break;
     }
-    wb_SR_F();
 }
 
 static void disas_openrisc_insn(DisasContext *dc, OpenRISCCPU *cpu)
@@ -1646,12 +1531,10 @@ void gen_intermediate_code(CPUOpenRISCState *env, struct TranslationBlock *tb)
     dc->tb = tb;
 
     dc->is_jmp = DISAS_NEXT;
-    dc->ppc = pc_start;
     dc->pc = pc_start;
-    dc->flags = cpu->env.cpucfgr;
     dc->mem_idx = cpu_mmu_index(&cpu->env, false);
-    dc->synced_flags = dc->tb_flags = tb->flags;
-    dc->delayed_branch = !!(dc->tb_flags & D_FLAG);
+    dc->tb_flags = tb->flags;
+    dc->delayed_branch = (dc->tb_flags & TB_FLAGS_DFLAG) != 0;
     dc->singlestep_enabled = cs->singlestep_enabled;
 
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
@@ -1665,10 +1548,26 @@ void gen_intermediate_code(CPUOpenRISCState *env, struct TranslationBlock *tb)
         max_insns = TCG_MAX_INSNS;
     }
 
+    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
+        && qemu_log_in_addr_range(pc_start)) {
+        qemu_log_lock();
+        qemu_log("----------------\n");
+        qemu_log("IN: %s\n", lookup_symbol(pc_start));
+    }
+
     gen_tb_start(tb);
 
+    /* Allow the TCG optimizer to see that R0 == 0,
+       when it's true, which is the common case.  */
+    if (dc->tb_flags & TB_FLAGS_R0_0) {
+        cpu_R[0] = tcg_const_tl(0);
+    } else {
+        cpu_R[0] = cpu_R0;
+    }
+
     do {
-        tcg_gen_insn_start(dc->pc);
+        tcg_gen_insn_start(dc->pc, (dc->delayed_branch ? 1 : 0)
+			   | (num_insns ? 2 : 0));
         num_insns++;
 
         if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
@@ -1686,23 +1585,16 @@ void gen_intermediate_code(CPUOpenRISCState *env, struct TranslationBlock *tb)
         if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
         }
-        dc->ppc = dc->pc - 4;
-        dc->npc = dc->pc + 4;
-        tcg_gen_movi_tl(cpu_ppc, dc->ppc);
-        tcg_gen_movi_tl(cpu_npc, dc->npc);
         disas_openrisc_insn(dc, cpu);
-        dc->pc = dc->npc;
+        dc->pc = dc->pc + 4;
+
         /* delay slot */
         if (dc->delayed_branch) {
             dc->delayed_branch--;
             if (!dc->delayed_branch) {
-                dc->tb_flags &= ~D_FLAG;
-                gen_sync_flags(dc);
                 tcg_gen_mov_tl(cpu_pc, jmp_pc);
-                tcg_gen_mov_tl(cpu_npc, jmp_pc);
-                tcg_gen_movi_tl(jmp_pc, 0);
-                tcg_gen_exit_tb(0);
-                dc->is_jmp = DISAS_JUMP;
+                tcg_gen_discard_tl(jmp_pc);
+                dc->is_jmp = DISAS_UPDATE;
                 break;
             }
         }
@@ -1716,14 +1608,17 @@ void gen_intermediate_code(CPUOpenRISCState *env, struct TranslationBlock *tb)
     if (tb->cflags & CF_LAST_IO) {
         gen_io_end();
     }
+
+    if ((dc->tb_flags & TB_FLAGS_DFLAG ? 1 : 0) != (dc->delayed_branch != 0)) {
+        tcg_gen_movi_i32(cpu_dflag, dc->delayed_branch != 0);
+    }
+
+    tcg_gen_movi_tl(cpu_ppc, dc->pc - 4);
     if (dc->is_jmp == DISAS_NEXT) {
         dc->is_jmp = DISAS_UPDATE;
         tcg_gen_movi_tl(cpu_pc, dc->pc);
     }
     if (unlikely(cs->singlestep_enabled)) {
-        if (dc->is_jmp == DISAS_NEXT) {
-            tcg_gen_movi_tl(cpu_pc, dc->pc);
-        }
         gen_exception(dc, EXCP_DEBUG);
     } else {
         switch (dc->is_jmp) {
@@ -1749,18 +1644,12 @@ void gen_intermediate_code(CPUOpenRISCState *env, struct TranslationBlock *tb)
     tb->size = dc->pc - pc_start;
     tb->icount = num_insns;
 
-#ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
         && qemu_log_in_addr_range(pc_start)) {
-        qemu_log_lock();
-        qemu_log("----------------\n");
-        qemu_log("IN: %s\n", lookup_symbol(pc_start));
-        log_target_disas(cs, pc_start, dc->pc - pc_start, 0);
-        qemu_log("\nisize=%d osize=%d\n",
-                 dc->pc - pc_start, tcg_op_buf_count());
+        log_target_disas(cs, pc_start, tb->size, 0);
+        qemu_log("\n");
         qemu_log_unlock();
     }
-#endif
 }
 
 void openrisc_cpu_dump_state(CPUState *cs, FILE *f,
@@ -1782,4 +1671,8 @@ void restore_state_to_opc(CPUOpenRISCState *env, TranslationBlock *tb,
                           target_ulong *data)
 {
     env->pc = data[0];
+    env->dflag = data[1] & 1;
+    if (data[1] & 2) {
+        env->ppc = env->pc - 4;
+    }
 }
diff --git a/tests/tcg/openrisc/Makefile b/tests/tcg/openrisc/Makefile
index 7e65888761..fb5ceda512 100644
--- a/tests/tcg/openrisc/Makefile
+++ b/tests/tcg/openrisc/Makefile
@@ -1,8 +1,8 @@
 -include ../../config-host.mak
 
-CROSS = or32-linux-
+CROSS = or1k-linux-
 
-SIM = qemu-or32
+SIM = qemu-or1k
 
 CC = $(CROSS)gcc