about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorYang Liu <numbksco@gmail.com>2024-03-02 16:45:55 +0800
committerGitHub <noreply@github.com>2024-03-02 09:45:55 +0100
commit829c254b1f5e14a48a8f096fdccd2b3637aabe32 (patch)
tree882292e195dcf7391f43a934c96caa1afbe00d98
parent8bf54eda562dc2f1c4ae74c8122d3b7be98f2d22 (diff)
downloadbox64-829c254b1f5e14a48a8f096fdccd2b3637aabe32.tar.gz
box64-829c254b1f5e14a48a8f096fdccd2b3637aabe32.zip
[LA64_DYNAREC] Added 70-7F Jcc opcodes, refine printer and some fixes too (#1307)
* [LA64_DYNAREC] Added 70-7F Jcc opcodes and some fixes too

* [LA64_DYNAREC] Added more instructions to the printer and made the format prettier

* Make LBT truely optional

* Do not test LBT in CI

* Format

* Optimize

* Fixed printer format

* Fixed CLEAR_FLAGS macro

* Fixed xMASK

* Use $r22 ($sp) in the prolog/epilog for better semantics

* Fixed la64_next
-rw-r--r--.github/workflows/release.yml3
-rw-r--r--src/dynarec/dynarec_arch.h1
-rw-r--r--src/dynarec/la64/dynarec_la64_00.c60
-rw-r--r--src/dynarec/la64/dynarec_la64_emit_math.c28
-rw-r--r--src/dynarec/la64/dynarec_la64_emit_tests.c7
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.c59
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.h132
-rw-r--r--src/dynarec/la64/dynarec_la64_pass0.h1
-rw-r--r--src/dynarec/la64/la64_emitter.h78
-rw-r--r--src/dynarec/la64/la64_epilog.S4
-rw-r--r--src/dynarec/la64/la64_next.S40
-rw-r--r--src/dynarec/la64/la64_printer.c581
-rw-r--r--src/dynarec/la64/la64_prolog.S8
13 files changed, 876 insertions, 126 deletions
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index ffbbf458..f2e9afed 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -53,7 +53,7 @@ jobs:
               unzip android-ndk-r26b-linux.zip
               echo "BOX64_COMPILER=$PWD/android-ndk-r26b/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android31-clang" >> $GITHUB_ENV
               echo "BOX64_PLATFORM_MARCRO=-DANDROID=1 -DARM_DYNAREC=1 -DBAD_SIGNAL=1" >> $GITHUB_ENV
-              git clone https://github.com/termux/termux-docker.git  
+              git clone https://github.com/termux/termux-docker.git
               sudo cp -rf termux-docker/system/arm /system
               sudo chown -R $(whoami):$(whoami) /system
               sudo chmod 755 -R /system
@@ -150,7 +150,6 @@ jobs:
             INTERPRETER=qemu-riscv64-static QEMU_LD_PREFIX=/usr/riscv64-linux-gnu/ QEMU_CPU=rv64,zba=true,zbb=true,zbc=true,zbs=true ctest -j$(nproc) --output-on-failure
             INTERPRETER=qemu-riscv64-static QEMU_LD_PREFIX=/usr/riscv64-linux-gnu/ QEMU_CPU=rv64,xtheadba=true,xtheadba=true,xtheadbb=true,xtheadbs=true,xtheadcondmov=true,xtheadmemidx=true,xtheadmempair=true,xtheadfmemidx=true,xtheadmac=true,xtheadfmv=true ctest -j$(nproc) --output-on-failure
           elif [[ ${{ matrix.platform }} == 'LARCH64' ]]; then
-            INTERPRETER=qemu-loongarch64-static QEMU_LD_PREFIX=/usr/loongarch64-linux-gnu/ ctest -j$(nproc) --output-on-failure
             INTERPRETER=qemu-loongarch64-static QEMU_LD_PREFIX=/usr/loongarch64-linux-gnu/ BOX64_DYNAREC_LA64NOEXT=1 ctest -j$(nproc) --output-on-failure
             INTERPRETER=qemu-loongarch64-static QEMU_LD_PREFIX=/usr/loongarch64-linux-gnu/ BOX64_DYNAREC=0 ctest -j$(nproc) --output-on-failure
           elif [[ ${{ matrix.platform }} == 'ANDROID' ]]; then
diff --git a/src/dynarec/dynarec_arch.h b/src/dynarec/dynarec_arch.h
index 9ee01282..26207428 100644
--- a/src/dynarec/dynarec_arch.h
+++ b/src/dynarec/dynarec_arch.h
@@ -24,6 +24,7 @@
 

 #define ADDITIONNAL_DEFINITION()

 

+// TODO

 #define OTHER_CACHE()

 

 #include "la64/la64_printer.h"

diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c
index 268c89ac..d74b3165 100644
--- a/src/dynarec/la64/dynarec_la64_00.c
+++ b/src/dynarec/la64/dynarec_la64_00.c
@@ -182,10 +182,17 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             nextop = F8;
             switch ((nextop >> 3) & 7) {
                 case 5: // SUB
-                    if(opcode==0x81) {INST_NAME("SUB Ed, Id");} else {INST_NAME("SUB Ed, Ib");}
+                    if (opcode == 0x81) {
+                        INST_NAME("SUB Ed, Id");
+                    } else {
+                        INST_NAME("SUB Ed, Ib");
+                    }
                     SETFLAGS(X_ALL, SF_SET_PENDING);
-                    GETED((opcode==0x81)?4:1);
-                    if(opcode==0x81) i64 = F32S; else i64 = F8S;
+                    GETED((opcode == 0x81) ? 4 : 1);
+                    if (opcode == 0x81)
+                        i64 = F32S;
+                    else
+                        i64 = F8S;
                     emit_sub32c(dyn, ninst, rex, ed, i64, x3, x4, x5, x6);
                     WBACK;
                     break;
@@ -193,6 +200,47 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     DEFAULT;
             }
             break;
+
+        #define GO(GETFLAGS, NO, YES, F, I)                                                         \
+            READFLAGS(F);                                                                           \
+            i8 = F8S;                                                                               \
+            BARRIER(BARRIER_MAYBE);                                                                 \
+            JUMP(addr + i8, 1);                                                                     \
+            if (la64_lbt && I >= 0xC) {                                                             \
+                X64_SET_EFLAGS(xFlags, F);                                                          \
+                X64_SETJ(x1, I);                                                                    \
+            } else {                                                                                \
+                GETFLAGS;                                                                           \
+            }                                                                                       \
+            if (dyn->insts[ninst].x64.jmp_insts == -1 || CHECK_CACHE()) {                           \
+                /* out of block */                                                                  \
+                i32 = dyn->insts[ninst].epilog - (dyn->native_size);                                \
+                if (la64_lbt && I >= 0xC)                                                           \
+                    BEQZ(x1, i32);                                                                  \
+                else                                                                                \
+                    B##NO(x1, i32);                                                                 \
+                if (dyn->insts[ninst].x64.jmp_insts == -1) {                                        \
+                    if (!(dyn->insts[ninst].x64.barrier & BARRIER_FLOAT))                           \
+                        fpu_purgecache(dyn, ninst, 1, x1, x2, x3);                                  \
+                    jump_to_next(dyn, addr + i8, 0, ninst, rex.is32bits);                           \
+                } else {                                                                            \
+                    CacheTransform(dyn, ninst, cacheupd, x1, x2, x3);                               \
+                    i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address - (dyn->native_size); \
+                    B(i32);                                                                         \
+                }                                                                                   \
+            } else {                                                                                \
+                /* inside the block */                                                              \
+                i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address - (dyn->native_size);     \
+                if (la64_lbt && I >= 0xC)                                                           \
+                    BNEZ(x1, i32);                                                                  \
+                else                                                                                \
+                    B##YES(x1, i32);                                                                \
+            }
+
+            GOCOND(0x70, "J", "ib");
+
+#undef GO
+
         case 0x85:
             INST_NAME("TEST Ed, Gd");
             SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -219,10 +267,10 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             break;
         case 0x8B:
             INST_NAME("MOV Gd, Ed");
-            nextop=F8;
+            nextop = F8;
             GETGD;
-            if(MODREG) {
-                MVxw(gd, TO_LA64((nextop&7) + (rex.b<<3)));
+            if (MODREG) {
+                MVxw(gd, TO_LA64((nextop & 7) + (rex.b << 3)));
             } else {
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, 1, 0);
                 SMREADLOCK(lock);
diff --git a/src/dynarec/la64/dynarec_la64_emit_math.c b/src/dynarec/la64/dynarec_la64_emit_math.c
index 9543d5f1..ffcc9301 100644
--- a/src/dynarec/la64/dynarec_la64_emit_math.c
+++ b/src/dynarec/la64/dynarec_la64_emit_math.c
@@ -24,7 +24,7 @@
 // emit ADD32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
 void emit_add32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5)
 {
-    CLEAR_FLAGS();
+    CLEAR_FLAGS(s3);
     IFX(X_PEND) {
         if (rex.w) {
             ST_D(s1, xEmu, offsetof(x64emu_t, op1));
@@ -42,7 +42,7 @@ void emit_add32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         IFX(X_ALL) {
             X64_ADD_WU(s1, s2);
             X64_GET_EFLAGS(s3, X_ALL);
-            ORI(xFlags, xFlags, s3);
+            OR(xFlags, xFlags, s3);
         }
         ADDxw(s1, s1, s2);
         if (!rex.w) ZEROUP(s1);
@@ -127,7 +127,7 @@ void emit_add32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 // emit ADD32 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch
 void emit_add32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5)
 {
-    CLEAR_FLAGS();
+    CLEAR_FLAGS(s3);
     if (s1 == xRSP && (!dyn->insts || dyn->insts[ninst].x64.gen_flags == X_PEND)) {
         // special case when doing math on ESP and only PEND is needed: ignoring it!
         if (c >= -2048 && c < 2048) {
@@ -157,7 +157,7 @@ void emit_add32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
         IFX(X_ALL) {
             X64_ADD_WU(s1, s2);
             X64_GET_EFLAGS(s3, X_ALL);
-            ORI(xFlags, xFlags, s3);
+            OR(xFlags, xFlags, s3);
         }
         ADDxw(s1, s1, s2);
         if (!rex.w) ZEROUP(s1);
@@ -251,7 +251,7 @@ void emit_add32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
 // emit ADD8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
 void emit_add8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
 {
-    CLEAR_FLAGS();
+    CLEAR_FLAGS(s3);
     IFX(X_PEND) {
         ST_B(s1, xEmu, offsetof(x64emu_t, op1));
         ST_B(s2, xEmu, offsetof(x64emu_t, op2));
@@ -264,7 +264,7 @@ void emit_add8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
         IFX(X_ALL) {
             X64_ADD_B(s1, s2);
             X64_GET_EFLAGS(s3, X_ALL);
-            ORI(xFlags, xFlags, s3);
+            OR(xFlags, xFlags, s3);
         }
         ADD_D(s1, s1, s2);
         ANDI(s1, s1, 0xff);
@@ -332,7 +332,7 @@ void emit_add8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
 // emit ADD8 instruction, from s1, const c, store result in s1 using s3 and s4 as scratch
 void emit_add8c(dynarec_la64_t* dyn, int ninst, int s1, int c, int s2, int s3, int s4)
 {
-    CLEAR_FLAGS();
+    CLEAR_FLAGS(s3);
     IFX(X_PEND) {
         MOV32w(s4, c & 0xff);
         ST_B(s1, xEmu, offsetof(x64emu_t, op1));
@@ -347,7 +347,7 @@ void emit_add8c(dynarec_la64_t* dyn, int ninst, int s1, int c, int s2, int s3, i
             IFX(X_PEND) {} else { MOV32w(s4, c & 0xff); }
             X64_ADD_B(s1, s4);
             X64_GET_EFLAGS(s3, X_ALL);
-            ORI(xFlags, xFlags, s3);
+            OR(xFlags, xFlags, s3);
         }
         ADDI_D(s1, s1, c & 0xff);
         ANDI(s1, s1, 0xff);
@@ -416,7 +416,7 @@ void emit_add8c(dynarec_la64_t* dyn, int ninst, int s1, int c, int s2, int s3, i
 // emit SUB8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
 void emit_sub8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
 {
-    CLEAR_FLAGS();
+    CLEAR_FLAGS(s3);
     IFX(X_PEND) {
         ST_B(s1, xEmu, offsetof(x64emu_t, op1));
         ST_B(s2, xEmu, offsetof(x64emu_t, op2));
@@ -429,7 +429,7 @@ void emit_sub8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
         IFX(X_ALL) {
             X64_SUB_B(s1, s2);
             X64_GET_EFLAGS(s3, X_ALL);
-            ORI(xFlags, xFlags, s3);
+            OR(xFlags, xFlags, s3);
         }
         SUB_D(s1, s1, s2);
         ANDI(s1, s1, 0xff);
@@ -474,7 +474,7 @@ void emit_sub8c(dynarec_la64_t* dyn, int ninst, int s1, int c, int s2, int s3, i
 // emit SUB32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
 void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5)
 {
-    CLEAR_FLAGS();
+    CLEAR_FLAGS(s3);
     IFX(X_PEND) {
         SDxw(s1, xEmu, offsetof(x64emu_t, op1));
         SDxw(s2, xEmu, offsetof(x64emu_t, op2));
@@ -487,7 +487,7 @@ void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         IFX(X_ALL) {
             X64_SUB_WU(s1, s2);
             X64_GET_EFLAGS(s3, X_ALL);
-            ORI(xFlags, xFlags, s3);
+            OR(xFlags, xFlags, s3);
         }
         SUBxw(s1, s1, s2);
         if (!rex.w) ZEROUP(s1);
@@ -526,7 +526,7 @@ void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 // emit SUB32 instruction, from s1, constant c, store result in s1 using s2, s3, s4 and s5 as scratch
 void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5)
 {
-    CLEAR_FLAGS();
+    CLEAR_FLAGS(s3);
     if(s1==xRSP && (!dyn->insts || dyn->insts[ninst].x64.gen_flags==X_PEND))
     {
         // special case when doing math on RSP and only PEND is needed: ignoring it!
@@ -553,7 +553,7 @@ void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
         IFX(X_ALL) {
             X64_SUB_WU(s1, s2);
             X64_GET_EFLAGS(s3, X_ALL);
-            ORI(xFlags, xFlags, s3);
+            OR(xFlags, xFlags, s3);
         }
         SUBxw(s1, s1, s2);
         if (!rex.w) ZEROUP(s1);
diff --git a/src/dynarec/la64/dynarec_la64_emit_tests.c b/src/dynarec/la64/dynarec_la64_emit_tests.c
index f7e1938e..c425d057 100644
--- a/src/dynarec/la64/dynarec_la64_emit_tests.c
+++ b/src/dynarec/la64/dynarec_la64_emit_tests.c
@@ -25,7 +25,7 @@
 // emit TEST32 instruction, from test s1, s2, using s3 and s4 as scratch
 void emit_test32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5)
 {
-    CLEAR_FLAGS();
+    CLEAR_FLAGS(s3);
     IFX_PENDOR0 {
         SET_DF(s3, rex.w?d_tst64:d_tst32);
     } else {
@@ -36,12 +36,11 @@ void emit_test32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int
         IFX(X_ALL) {
             if (rex.w) X64_AND_D(s1, s2); else X64_AND_W(s1, s2);
             X64_GET_EFLAGS(s3, X_ALL);
-            ORI(xFlags, xFlags, s3);
+            OR(xFlags, xFlags, s3);
         }
 
-        AND(s3, s1, s2);
-
         IFX_PENDOR0 {
+            AND(s3, s1, s2);
             SDxw(s3, xEmu, offsetof(x64emu_t, res));
         }
         return;
diff --git a/src/dynarec/la64/dynarec_la64_helper.c b/src/dynarec/la64/dynarec_la64_helper.c
index 6de2422b..77c19950 100644
--- a/src/dynarec/la64/dynarec_la64_helper.c
+++ b/src/dynarec/la64/dynarec_la64_helper.c
@@ -508,3 +508,62 @@ void fpu_propagate_stack(dynarec_la64_t* dyn, int ninst)
 {
     // TODO
 }
+
+
+static void fpuCacheTransform(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3)
+{
+    // TODO
+}
+
+static void flagsCacheTransform(dynarec_la64_t* dyn, int ninst, int s1)
+{
+#if STEP > 1
+    int j64;
+    int jmp = dyn->insts[ninst].x64.jmp_insts;
+    if(jmp<0)
+        return;
+    if(dyn->f.dfnone)  // flags are fully known, nothing we can do more
+        return;
+    MESSAGE(LOG_DUMP, "\tFlags fetch ---- ninst=%d -> %d\n", ninst, jmp);
+    int go = 0;
+    switch (dyn->insts[jmp].f_entry.pending) {
+        case SF_UNKNOWN: break;
+        case SF_SET:
+            if(dyn->f.pending!=SF_SET && dyn->f.pending!=SF_SET_PENDING)
+                go = 1;
+            break;
+        case SF_SET_PENDING:
+            if(dyn->f.pending!=SF_SET
+            && dyn->f.pending!=SF_SET_PENDING
+            && dyn->f.pending!=SF_PENDING)
+                go = 1;
+            break;
+        case SF_PENDING:
+            if(dyn->f.pending!=SF_SET
+            && dyn->f.pending!=SF_SET_PENDING
+            && dyn->f.pending!=SF_PENDING)
+                go = 1;
+            else
+                go = (dyn->insts[jmp].f_entry.dfnone  == dyn->f.dfnone)?0:1;
+            break;
+    }
+    if(dyn->insts[jmp].f_entry.dfnone && !dyn->f.dfnone)
+        go = 1;
+    if(go) {
+        if(dyn->f.pending!=SF_PENDING) {
+            LD_W(s1, xEmu, offsetof(x64emu_t, df));
+            j64 = (GETMARKF2)-(dyn->native_size);
+            BEQZ(s1, j64);
+        }
+        CALL_(UpdateFlags, -1, 0);
+        MARKF2;
+    }
+#endif
+}
+
+void CacheTransform(dynarec_la64_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3) {
+    if(cacheupd&2)
+        fpuCacheTransform(dyn, ninst, s1, s2, s3);
+    if(cacheupd&1)
+        flagsCacheTransform(dyn, ninst, s1);
+}
diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h
index 76f71d32..5a1ea5cb 100644
--- a/src/dynarec/la64/dynarec_la64_helper.h
+++ b/src/dynarec/la64/dynarec_la64_helper.h
@@ -110,12 +110,12 @@
 #define GETEB(i, D)                                                                             \
     if (MODREG) {                                                                               \
         if (rex.rex) {                                                                          \
-            wback = TO_LA64((nextop & 7) + (rex.b << 3));                                      \
+            wback = TO_LA64((nextop & 7) + (rex.b << 3));                                       \
             wb2 = 0;                                                                            \
         } else {                                                                                \
             wback = (nextop & 7);                                                               \
             wb2 = (wback >> 2) * 8;                                                             \
-            wback = TO_LA64((wback & 3));                                                      \
+            wback = TO_LA64((wback & 3));                                                       \
         }                                                                                       \
         if (wb2) {                                                                              \
             MV(i, wback);                                                                       \
@@ -134,21 +134,21 @@
     }
 
 // GETGB will use i for gd
-#define GETGB(i)                                               \
-    if (rex.rex) {                                             \
+#define GETGB(i)                                              \
+    if (rex.rex) {                                            \
         gb1 = TO_LA64(((nextop & 0x38) >> 3) + (rex.r << 3)); \
-        gb2 = 0;                                               \
-    } else {                                                   \
-        gd = (nextop & 0x38) >> 3;                             \
-        gb2 = ((gd & 4) >> 2);                                 \
+        gb2 = 0;                                              \
+    } else {                                                  \
+        gd = (nextop & 0x38) >> 3;                            \
+        gb2 = ((gd & 4) >> 2);                                \
         gb1 = TO_LA64((gd & 3));                              \
-    }                                                          \
-    gd = i;                                                    \
-    if (gb2) {                                                 \
-        MV(gd, gb1);                                           \
-        SRLI_D(gd, gd, 8);                                     \
-        ANDI(gd, gd, 0xff);                                    \
-    } else                                                     \
+    }                                                         \
+    gd = i;                                                   \
+    if (gb2) {                                                \
+        MV(gd, gb1);                                          \
+        SRLI_D(gd, gd, 8);                                    \
+        ANDI(gd, gd, 0xff);                                   \
+    } else                                                    \
         ANDI(gd, gb1, 0xff);
 
 // Write gb (gd) back to original register / memory, using s1 as scratch
@@ -211,7 +211,7 @@
 #define MARKLOCK    dyn->insts[ninst].marklock = dyn->native_size
 #define GETMARKLOCK dyn->insts[ninst].marklock
 
-#define IFX(A) if ((dyn->insts[ninst].x64.gen_flags & (A)))
+#define IFX(A)      if ((dyn->insts[ninst].x64.gen_flags & (A)))
 #define IFX_PENDOR0 if ((dyn->insts[ninst].x64.gen_flags & (X_PEND) || !dyn->insts[ninst].x64.gen_flags))
 #define IFXX(A)     if ((dyn->insts[ninst].x64.gen_flags == (A)))
 #define IFX2X(A, B) if ((dyn->insts[ninst].x64.gen_flags == (A) || dyn->insts[ninst].x64.gen_flags == (B) || dyn->insts[ninst].x64.gen_flags == ((A) | (B))))
@@ -235,8 +235,8 @@
 #define SET_NODF() dyn->f.dfnone = 0
 #define SET_DFOK() dyn->f.dfnone = 1
 
-#define CLEAR_FLAGS() \
-    IFX(X_ALL) { ANDI(xFlags, xFlags, ~((1UL << F_AF) | (1UL << F_CF) | (1UL << F_OF) | (1UL << F_ZF) | (1UL << F_SF) | (1UL << F_PF))); }
+#define CLEAR_FLAGS(s) \
+    IFX(X_ALL) { MOV64x(s, (1UL << F_AF) | (1UL << F_CF) | (1UL << F_OF) | (1UL << F_ZF) | (1UL << F_SF) | (1UL << F_PF)); ANDN(xFlags, xFlags, s); }
 
 #define CALC_SUB_FLAGS(op1_, op2, res, scratch1, scratch2, width)     \
     IFX(X_AF | X_CF | X_OF)                                           \
@@ -274,7 +274,7 @@
             XOR(scratch1, scratch1, scratch2);                        \
             ANDI(scratch1, scratch1, 1);                              \
             BEQZ(scratch1, 8);                                        \
-            ORI(xFlags, xFlags, 1 << F_OF);                          \
+            ORI(xFlags, xFlags, 1 << F_OF);                           \
         }                                                             \
     }
 
@@ -312,7 +312,9 @@
     else                                                                                                            \
         dyn->f.pending = SF_SET
 #endif
-
+#ifndef JUMP
+#define JUMP(A, C)
+#endif
 #ifndef BARRIER
 #define BARRIER(A)
 #endif
@@ -400,6 +402,8 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
 #define fpu_reflectcache    STEPNAME(fpu_reflectcache)
 #define fpu_unreflectcache  STEPNAME(fpu_unreflectcache)
 
+#define CacheTransform STEPNAME(CacheTransform)
+
 /* setup r2 to address pointed by */
 uintptr_t geted(dynarec_la64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, rex_t rex, int* l, int i12, int delta);
 
@@ -434,6 +438,14 @@ void fpu_unreflectcache(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3);
 void fpu_pushcache(dynarec_la64_t* dyn, int ninst, int s1, int not07);
 void fpu_popcache(dynarec_la64_t* dyn, int ninst, int s1, int not07);
 
+void CacheTransform(dynarec_la64_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3);
+
+#if STEP < 2
+#define CHECK_CACHE() 0
+#else
+#define CHECK_CACHE() (cacheupd = CacheNeedsTransform(dyn, ninst))
+#endif
+
 uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
 
 #if STEP < 3
@@ -448,6 +460,86 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
 #define MAYUSE(A)
 #endif
 
+#define GOCOND(B, T1, T2)                                                                        \
+    case B + 0x0:                                                                                \
+        INST_NAME(T1 "O " T2);                                                                   \
+        GO(ANDI(x1, xFlags, 1 << F_OF), EQZ, NEZ, X_OF, X64_JMP_JO);                             \
+        break;                                                                                   \
+    case B + 0x1:                                                                                \
+        INST_NAME(T1 "NO " T2);                                                                  \
+        GO(ANDI(x1, xFlags, 1 << F_OF), NEZ, EQZ, X_OF, X64_JMP_JNO);                            \
+        break;                                                                                   \
+    case B + 0x2:                                                                                \
+        INST_NAME(T1 "C " T2);                                                                   \
+        GO(ANDI(x1, xFlags, 1 << F_CF), EQZ, NEZ, X_CF, X64_JMP_JB);                             \
+        break;                                                                                   \
+    case B + 0x3:                                                                                \
+        INST_NAME(T1 "NC " T2);                                                                  \
+        GO(ANDI(x1, xFlags, 1 << F_CF), NEZ, EQZ, X_CF, X64_JMP_JNB);                            \
+        break;                                                                                   \
+    case B + 0x4:                                                                                \
+        INST_NAME(T1 "Z " T2);                                                                   \
+        GO(ANDI(x1, xFlags, 1 << F_ZF), EQZ, NEZ, X_ZF, X64_JMP_JE);                             \
+        break;                                                                                   \
+    case B + 0x5:                                                                                \
+        INST_NAME(T1 "NZ " T2);                                                                  \
+        GO(ANDI(x1, xFlags, 1 << F_ZF), NEZ, EQZ, X_ZF, X64_JMP_JNE);                            \
+        break;                                                                                   \
+    case B + 0x6:                                                                                \
+        INST_NAME(T1 "BE " T2);                                                                  \
+        GO(ANDI(x1, xFlags, (1 << F_CF) | (1 << F_ZF)), EQZ, NEZ, X_CF | X_ZF, X64_JMP_JBE);     \
+        break;                                                                                   \
+    case B + 0x7:                                                                                \
+        INST_NAME(T1 "NBE " T2);                                                                 \
+        GO(ANDI(x1, xFlags, (1 << F_CF) | (1 << F_ZF)), NEZ, EQZ, X_CF | X_ZF, X64_JMP_JA);      \
+        break;                                                                                   \
+    case B + 0x8:                                                                                \
+        INST_NAME(T1 "S " T2);                                                                   \
+        GO(ANDI(x1, xFlags, 1 << F_SF), EQZ, NEZ, X_SF, X64_JMP_JS);                             \
+        break;                                                                                   \
+    case B + 0x9:                                                                                \
+        INST_NAME(T1 "NS " T2);                                                                  \
+        GO(ANDI(x1, xFlags, 1 << F_SF), NEZ, EQZ, X_SF, X64_JMP_JNS);                            \
+        break;                                                                                   \
+    case B + 0xA:                                                                                \
+        INST_NAME(T1 "P " T2);                                                                   \
+        GO(ANDI(x1, xFlags, 1 << F_PF), EQZ, NEZ, X_PF, X64_JMP_JP);                             \
+        break;                                                                                   \
+    case B + 0xB:                                                                                \
+        INST_NAME(T1 "NP " T2);                                                                  \
+        GO(ANDI(x1, xFlags, 1 << F_PF), NEZ, EQZ, X_PF, X64_JMP_JNP);                            \
+        break;                                                                                   \
+    case B + 0xC:                                                                                \
+        INST_NAME(T1 "L " T2);                                                                   \
+        GO(SRLI_D(x1, xFlags, F_SF - F_OF);                                                      \
+            XOR(x1, x1, xFlags);                                                                 \
+            ANDI(x1, x1, 1 << F_OF), EQZ, NEZ, X_SF | X_OF, X64_JMP_JL);                         \
+        break;                                                                                   \
+    case B + 0xD:                                                                                \
+        INST_NAME(T1 "GE " T2);                                                                  \
+        GO(SRLI_D(x1, xFlags, F_SF - F_OF);                                                      \
+            XOR(x1, x1, xFlags);                                                                 \
+            ANDI(x1, x1, 1 << F_OF), NEZ, EQZ, X_SF | X_OF, X64_JMP_JGE);                        \
+        break;                                                                                   \
+    case B + 0xE:                                                                                \
+        INST_NAME(T1 "LE " T2);                                                                  \
+        GO(SRLI_D(x1, xFlags, F_SF - F_OF);                                                      \
+            XOR(x1, x1, xFlags);                                                                 \
+            ANDI(x1, x1, 1 << F_OF);                                                             \
+            ANDI(x3, xFlags, 1 << F_ZF);                                                         \
+            OR(x1, x1, x3);                                                                      \
+            ANDI(x1, x1, (1 << F_OF) | (1 << F_ZF)), EQZ, NEZ, X_SF | X_OF | X_ZF, X64_JMP_JLE); \
+        break;                                                                                   \
+    case B + 0xF:                                                                                \
+        INST_NAME(T1 "G " T2);                                                                   \
+        GO(SRLI_D(x1, xFlags, F_SF - F_OF);                                                      \
+            XOR(x1, x1, xFlags);                                                                 \
+            ANDI(x1, x1, 1 << F_OF);                                                             \
+            ANDI(x3, xFlags, 1 << F_ZF);                                                         \
+            OR(x1, x1, x3);                                                                      \
+            ANDI(x1, x1, (1 << F_OF) | (1 << F_ZF)), NEZ, EQZ, X_SF | X_OF | X_ZF, X64_JMP_JG);  \
+        break
+
 #define NOTEST(s1)                                       \
     if (box64_dynarec_test) {                            \
         ST_W(xZR, xEmu, offsetof(x64emu_t, test.test));  \
diff --git a/src/dynarec/la64/dynarec_la64_pass0.h b/src/dynarec/la64/dynarec_la64_pass0.h
index f3cca4d7..fd934d09 100644
--- a/src/dynarec/la64/dynarec_la64_pass0.h
+++ b/src/dynarec/la64/dynarec_la64_pass0.h
@@ -15,6 +15,7 @@
     dyn->f.pending = (B) & SF_SET_PENDING; \
     dyn->f.dfnone = ((B) & SF_SET) ? 1 : 0;
 #define EMIT(A) dyn->native_size += 4
+#define JUMP(A, C) add_jump(dyn, ninst); add_next(dyn, (uintptr_t)A); dyn->insts[ninst].x64.jmp = A; dyn->insts[ninst].x64.jmp_cond = C
 #define BARRIER(A)                                 \
     if (A != BARRIER_MAYBE) {                      \
         fpu_purgecache(dyn, ninst, 0, x1, x2, x3); \
diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h
index 656607f4..894c8b35 100644
--- a/src/dynarec/la64/la64_emitter.h
+++ b/src/dynarec/la64/la64_emitter.h
@@ -198,6 +198,8 @@ f24-f31  fs0-fs7   Static registers                Callee
 // GR[rd] = GR[rj] ^ ZeroExtend(imm12, GRLEN)
 #define XORI(rd, rj, imm12) EMIT(type_2RI12(0b0000001111, imm12, rj, rd))
 
+#define NOP() ANDI(xZR, xZR, 0)
+
 // tmp = SLL(GR[rj][31:0], GR[rk][4:0])
 // GR[rd] = SignExtend(tmp[31:0], GRLEN)
 #define SLL_W(rd, rj, rk) EMIT(type_3R(0b00000000000101110, rk, rj, rd))
@@ -231,36 +233,94 @@ f24-f31  fs0-fs7   Static registers                Callee
 
 // if GR[rj] == GR[rd]:
 //     PC = PC + SignExtend({imm16, 2'b0}, GRLEN)
-#define BEQ(rj, rd, imm16) EMIT(type_2RI16(0b010110, imm16, rj, rd))
+#define BEQ(rj, rd, imm18) EMIT(type_2RI16(0b010110, ((imm18)>>2), rj, rd))
 // if GR[rj] != GR[rd]:
 //     PC = PC + SignExtend({imm16, 2'b0}, GRLEN)
-#define BNE(rj, rd, imm16) EMIT(type_2RI16(0b010111, imm16, rj, rd))
+#define BNE(rj, rd, imm18) EMIT(type_2RI16(0b010111, ((imm18)>>2), rj, rd))
 // if signed(GR[rj]) < signed(GR[rd]):
 //     PC = PC + SignExtend({imm16, 2'b0}, GRLEN)
-#define BLT(rj, rd, imm16) EMIT(type_2RI16(0b011000, imm16, rj, rd))
+#define BLT(rj, rd, imm18) EMIT(type_2RI16(0b011000, ((imm18)>>2), rj, rd))
 // if signed(GR[rj]) >= signed(GR[rd]):
 //     PC = PC + SignExtend({imm16, 2'b0}, GRLEN)
-#define BGE(rj, rd, imm16) EMIT(type_2RI16(0b011001, imm16, rj, rd))
+#define BGE(rj, rd, imm18) EMIT(type_2RI16(0b011001, ((imm18)>>2), rj, rd))
 // if unsigned(GR[rj]) == unsigned(GR[rd]):
 //     PC = PC + SignExtend({imm16, 2'b0}, GRLEN)
-#define BLTU(rj, rd, imm16) EMIT(type_2RI16(0b011010, imm16, rj, rd))
+#define BLTU(rj, rd, imm18) EMIT(type_2RI16(0b011010, ((imm18)>>2), rj, rd))
 // if unsigned(GR[rj]) == unsigned(GR[rd]):
 //     PC = PC + SignExtend({imm16, 2'b0}, GRLEN)
-#define BGEU(rj, rd, imm16) EMIT(type_2RI16(0b011011, imm16, rj, rd))
+#define BGEU(rj, rd, imm18) EMIT(type_2RI16(0b011011, ((imm18)>>2), rj, rd))
 
 // if GR[rj] == 0:
 //     PC = PC + SignExtend({imm21, 2'b0}, GRLEN)
-#define BEQZ(rj, imm21) EMIT(type_1RI21(0b010000, (imm21) >> 2, rj))
+#define BEQZ(rj, imm23) EMIT(type_1RI21(0b010000, ((imm23)>>2), rj))
 // if GR[rj] != 0:
 //     PC = PC + SignExtend({imm21, 2'b0}, GRLEN)
-#define BNEZ(rj, imm21) EMIT(type_1RI21(0b010001, (imm21) >> 2, rj))
+#define BNEZ(rj, imm23) EMIT(type_1RI21(0b010001, ((imm23)>>2), rj))
 
 // GR[rd] = PC + 4
 // PC = GR[rj] + SignExtend({imm16, 2'b0}, GRLEN)
-#define JIRL(rd, rj, imm16) EMIT(type_2RI16(0b010011, imm16, rj, rd))
+#define JIRL(rd, rj, imm18) EMIT(type_2RI16(0b010011, ((imm18)>>2), rj, rd))
+
 // PC = GR[rj]
 #define BR(rj) JIRL(xZR, rj, 0x0)
 
+// PC = PC + SignExtend({imm26, 2'b0}, GRLEN)
+#define B(imm28) EMIT(type_I26(0b010100, ((imm28)>>2)))
+
+#define BEQ_safe(rj, rd, imm)                  \
+    if ((imm) > -0x20000 && (imm) < 0x20000) { \
+        BEQ(rj, rd, imm);                      \
+        NOP();                                 \
+    } else {                                   \
+        BNE(rj, rd, 8);                        \
+        B(imm - 4);                            \
+    }
+
+#define BNE_safe(rj, rd, imm)                  \
+    if ((imm) > -0x20000 && (imm) < 0x20000) { \
+        BNE(rj, rd, imm);                      \
+        NOP();                                 \
+    } else {                                   \
+        BEQ(rj, rd, 8);                        \
+        B(imm - 4);                            \
+    }
+
+#define BLT_safe(rj, rd, imm)                  \
+    if ((imm) > -0x20000 && (imm) < 0x20000) { \
+        BLT(rj, rd, imm);                      \
+        NOP();                                 \
+    } else {                                   \
+        BGE(rj, rd, 8);                        \
+        B(imm - 4);                            \
+    }
+
+#define BGE_safe(rj, rd, imm)                  \
+    if ((imm) > -0x20000 && (imm) < 0x20000) { \
+        BGE(rj, rd, imm);                      \
+        NOP();                                 \
+    } else {                                   \
+        BLT(rj, rd, 8);                        \
+        B(imm - 4);                            \
+    }
+
+#define BLTU_safe(rj, rd, imm)                 \
+    if ((imm) > -0x20000 && (imm) < 0x20000) { \
+        BLTU(rj, rd, imm);                     \
+        NOP();                                 \
+    } else {                                   \
+        BGEU(rj, rd, 8);                       \
+        B(imm - 4);                            \
+    }
+
+#define BGEU_safe(rj, rd, imm)                 \
+    if ((imm) > -0x20000 && (imm) < 0x20000) { \
+        BGEU(rj, rd, imm);                     \
+        NOP();                                 \
+    } else {                                   \
+        BLTU(rj, rd, 8);                       \
+        B(imm - 4);                            \
+    }
+
 // vaddr = GR[rj] + SignExtend(imm12, GRLEN)
 // AddressComplianceCheck(vaddr)
 // paddr = AddressTranslation(vaddr)
diff --git a/src/dynarec/la64/la64_epilog.S b/src/dynarec/la64/la64_epilog.S
index bb6977c1..91627ea2 100644
--- a/src/dynarec/la64/la64_epilog.S
+++ b/src/dynarec/la64/la64_epilog.S
@@ -28,8 +28,8 @@ la64_epilog:
     st.d   $r31, $r4, (8 * 16) // xFlags
     st.d   $r20, $r4, (8 * 17) // put back reg value in emu, including EIP (so x27 must be EIP now)
     ld.d   $sp,  $r4, 552      // restore saved sp from emu->xSPSave, see la64_prolog
-    ld.d   $r11, $sp, -8
-    st.d   $r11, $r4, 552
+    ld.d   $r22, $sp, -8
+    st.d   $r22, $r4, 552
     // vpop {d8-d15}
     ld.d   $r1,  $sp, (8 * 0) // load ra
     ld.d   $r22, $sp, (8 * 1) // load fp
diff --git a/src/dynarec/la64/la64_next.S b/src/dynarec/la64/la64_next.S
index c69830f3..d683ab80 100644
--- a/src/dynarec/la64/la64_next.S
+++ b/src/dynarec/la64/la64_next.S
@@ -17,16 +17,16 @@ la64_next:
     addi.d $sp, $sp, -(8 * 12)
     st.d   $a0, $sp, 0
     st.d   $a1, $sp, 8
-    st.d   $r12, $sp, 16
-    st.d   $r13, $sp, 24
-    st.d   $r14, $sp, 32
-    st.d   $r15, $sp, 40
-    st.d   $r16, $sp, 48
-    st.d   $r17, $sp, 56
-    st.d   $r18, $sp, 64
-    st.d   $r19, $sp, 72
-    st.d   $r20, $sp, 80
-    st.d   $r30, $sp, 88 // also save r30(rip) to allow change in LinkNext
+    st.d   $r11, $sp, 16
+    st.d   $r12, $sp, 24
+    st.d   $r13, $sp, 32
+    st.d   $r14, $sp, 40
+    st.d   $r15, $sp, 48
+    st.d   $r16, $sp, 56
+    st.d   $r17, $sp, 64
+    st.d   $r18, $sp, 72
+    st.d   $r19, $sp, 80
+    st.d   $r20, $sp, 88 // also save r30(rip) to allow change in LinkNext
 
     move   $a2, $ra      // "from" is in ra, so put in a2
     addi.d $a3, $sp, 88  // a3 is address to change rip
@@ -37,16 +37,16 @@ la64_next:
     // pop regs
     ld.d   $a0, $sp, 0
     ld.d   $a1, $sp, 8
-    ld.d   $r12, $sp, 16
-    ld.d   $r13, $sp, 24
-    ld.d   $r14, $sp, 32
-    ld.d   $r15, $sp, 40
-    ld.d   $r16, $sp, 48
-    ld.d   $r17, $sp, 56
-    ld.d   $r18, $sp, 64
-    ld.d   $r19, $sp, 72
-    ld.d   $r20, $sp, 80
-    ld.d   $r30, $sp, 88
+    ld.d   $r11, $sp, 16
+    ld.d   $r12, $sp, 24
+    ld.d   $r13, $sp, 32
+    ld.d   $r14, $sp, 40
+    ld.d   $r15, $sp, 48
+    ld.d   $r16, $sp, 56
+    ld.d   $r17, $sp, 64
+    ld.d   $r18, $sp, 72
+    ld.d   $r19, $sp, 80
+    ld.d   $r20, $sp, 88
     addi.d $sp,  $sp, (8 * 12)
     // return offset is jump address
     jr     $a3
\ No newline at end of file
diff --git a/src/dynarec/la64/la64_printer.c b/src/dynarec/la64/la64_printer.c
index 277921dd..62718191 100644
--- a/src/dynarec/la64/la64_printer.c
+++ b/src/dynarec/la64/la64_printer.c
@@ -38,7 +38,7 @@ int isMask(uint32_t opcode, const char* mask, la64_print_t *a)
         mask++;
         --i;
     }
-    
+
     return 1;
 }
 
@@ -62,224 +62,715 @@ const char* la64_print(uint32_t opcode, uintptr_t addr)
     #define imm_up a.u
     // ADD.W
     if(isMask(opcode, "00000000000100000kkkkkjjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "ADD.W %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]);
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "ADD.W", Xt[Rd], Xt[Rj], Xt[Rk]);
         return buff;
     }
     // SUB.W
     if(isMask(opcode, "00000000000100010kkkkkjjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "SUB.W %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]);
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "SUB.W", Xt[Rd], Xt[Rj], Xt[Rk]);
         return buff;
     }
     // ADD.D
     if(isMask(opcode, "00000000000100001kkkkkjjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "ADD.D %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]);
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "ADD.D", Xt[Rd], Xt[Rj], Xt[Rk]);
         return buff;
     }
     // SUB.D
     if(isMask(opcode, "00000000000100011kkkkkjjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "SUB.D %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]);
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "SUB.D", Xt[Rd], Xt[Rj], Xt[Rk]);
         return buff;
     }
     // ADDI.W
     if(isMask(opcode, "0000001010iiiiiiiiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "ADDI.W %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "ADDI.W", Xt[Rd], Xt[Rj], signExtend(imm, 12));
         return buff;
     }
     // ADDI.D
     if(isMask(opcode, "0000001011iiiiiiiiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "ADDI.D %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "ADDI.D", Xt[Rd], Xt[Rj], signExtend(imm, 12));
         return buff;
     }
     // ADDU16I.D
     if(isMask(opcode, "000100iiiiiiiiiiiiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "ADDU16I.D %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "ADDU16I.D", Xt[Rd], Xt[Rj], signExtend(imm, 12));
         return buff;
     }
     // ALSL.W
     if(isMask(opcode, "000000000000010iikkkkkjjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "ALSL.W %s, %s, %s, %d", Xt[Rd], Xt[Rj], Xt[Rk], imm);
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %s, %d", "ALSL.W", Xt[Rd], Xt[Rj], Xt[Rk], imm);
         return buff;
     }
     // ALSL.WU
     if(isMask(opcode, "000000000000011iikkkkkjjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "ALSL.WU %s, %s, %s, %d", Xt[Rd], Xt[Rj], Xt[Rk], imm);
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %s, %d", "ALSL.WU", Xt[Rd], Xt[Rj], Xt[Rk], imm);
         return buff;
     }
     // ALSL.D
     if(isMask(opcode, "000000000010110iikkkkkjjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "ALSL.D %s, %s, %s, %d", Xt[Rd], Xt[Rj], Xt[Rk], imm);
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %s, %d", "ALSL.D", Xt[Rd], Xt[Rj], Xt[Rk], imm);
         return buff;
     }
     // LU12I.W
     if(isMask(opcode, "0001010iiiiiiiiiiiiiiiiiiiiddddd", &a)) {
-        snprintf(buff, sizeof(buff), "LU12I.W %s, %d", Xt[Rd], imm);
+        snprintf(buff, sizeof(buff), "%-15s %s, %d", "LU12I.W", Xt[Rd], imm);
         return buff;
     }
     // LU32I.D
     if(isMask(opcode, "0001011iiiiiiiiiiiiiiiiiiiiddddd", &a)) {
-        snprintf(buff, sizeof(buff), "LU32I.D %s, %d", Xt[Rd], imm);
+        snprintf(buff, sizeof(buff), "%-15s %s, %d", "LU32I.D", Xt[Rd], imm);
         return buff;
     }
     // LU52I.D
     if(isMask(opcode, "0000001100iiiiiiiiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "LU52I.D %s, %s, %d", Xt[Rd], Xt[Rj], imm);
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "LU52I.D", Xt[Rd], Xt[Rj], imm);
         return buff;
     }
     // PCADDI
     if(isMask(opcode, "0001100iiiiiiiiiiiiiiiiiiiiddddd", &a)) {
-        snprintf(buff, sizeof(buff), "PCADDI %s, %d", Xt[Rd], imm);
+        snprintf(buff, sizeof(buff), "%-15s %s, %d", "PCADDI", Xt[Rd], imm);
         return buff;
     }
     // PCADDU12I
     if(isMask(opcode, "0001101iiiiiiiiiiiiiiiiiiiiddddd", &a)) {
-        snprintf(buff, sizeof(buff), "PCADDU12I %s, %d", Xt[Rd], imm);
+        snprintf(buff, sizeof(buff), "%-15s %s, %d", "PCADDU12I", Xt[Rd], imm);
         return buff;
     }
     // PCADDU18I
     if(isMask(opcode, "0001110iiiiiiiiiiiiiiiiiiiiddddd", &a)) {
-        snprintf(buff, sizeof(buff), "PCADDU18I %s, %d", Xt[Rd], imm);
+        snprintf(buff, sizeof(buff), "%-15s %s, %d", "PCADDU18I", Xt[Rd], imm);
         return buff;
     }
     // PCALAU12I
     if(isMask(opcode, "0001111iiiiiiiiiiiiiiiiiiiiddddd", &a)) {
-        snprintf(buff, sizeof(buff), "PCALAU12I %s, %d", Xt[Rd], imm);
+        snprintf(buff, sizeof(buff), "%-15s %s, %d", "PCALAU12I", Xt[Rd], imm);
         return buff;
     }
     // AND
     if(isMask(opcode, "00000000000101001kkkkkjjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "AND %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]);
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "AND", Xt[Rd], Xt[Rj], Xt[Rk]);
         return buff;
     }
     // OR
     if(isMask(opcode, "00000000000101010kkkkkjjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "OR %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]);
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "OR", Xt[Rd], Xt[Rj], Xt[Rk]);
         return buff;
     }
     // NOR
     if(isMask(opcode, "00000000000101000kkkkkjjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "NOR %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]);
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "NOR", Xt[Rd], Xt[Rj], Xt[Rk]);
         return buff;
     }
     // XOR
     if(isMask(opcode, "00000000000101011kkkkkjjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "XOR %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]);
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "XOR", Xt[Rd], Xt[Rj], Xt[Rk]);
         return buff;
     }
     // ANDN
     if(isMask(opcode, "00000000000101101kkkkkjjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "ANDN %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]);
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "ANDN", Xt[Rd], Xt[Rj], Xt[Rk]);
         return buff;
     }
     // ORN
     if(isMask(opcode, "00000000000101100kkkkkjjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "ORN %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]);
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "ORN", Xt[Rd], Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // NOP -- special case of ANDI
+    if(isMask(opcode, "00000011010000000000000000000000", &a)) {
+        snprintf(buff, sizeof(buff), "NOP");
         return buff;
     }
     // ANDI
     if(isMask(opcode, "0000001101iiiiiiiiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "ANDI %s, %s, 0x%x", Xt[Rd], Xt[Rj], imm);
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%x", "ANDI", Xt[Rd], Xt[Rj], imm);
         return buff;
     }
     // ORI
     if(isMask(opcode, "0000001110iiiiiiiiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "ORI %s, %s, 0x%x", Xt[Rd], Xt[Rj], imm);
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%x", "ORI", Xt[Rd], Xt[Rj], imm);
         return buff;
     }
     // XORI
     if(isMask(opcode, "0000001111iiiiiiiiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "XORI %s, %s, 0x%x", Xt[Rd], Xt[Rj], imm);
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%x", "XORI", Xt[Rd], Xt[Rj], imm);
+        return buff;
+    }
+    // SLL.W
+    if(isMask(opcode, "00000000000101110kkkkkjjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "SLL.W", Xt[Rd], Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // SRL.W
+    if(isMask(opcode, "00000000000101111kkkkkjjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "SRL.W", Xt[Rd], Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // SLA.W
+    if(isMask(opcode, "00000000000110000kkkkkjjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "SLA.W", Xt[Rd], Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // ROTR.W
+    if(isMask(opcode, "00000000000110110kkkkkjjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "ROTR.W", Xt[Rd], Xt[Rj], Xt[Rk]);
         return buff;
     }
     // SLLI.D
     if(isMask(opcode, "0000000001000001iiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "SLLI.D %s, %s, %u", Xt[Rd], Xt[Rj], imm);
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %u", "SLLI.D", Xt[Rd], Xt[Rj], imm);
         return buff;
     }
     // SRLI.D
     if(isMask(opcode, "0000000001000101iiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "SRLI.D %s, %s, %u", Xt[Rd], Xt[Rj], imm);
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %u", "SRLI.D", Xt[Rd], Xt[Rj], imm);
         return buff;
     }
     // SRAI.D
     if(isMask(opcode, "0000000001001001iiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "SRAI.D %s, %s, %u", Xt[Rd], Xt[Rj], imm);
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %u", "SRAI.D", Xt[Rd], Xt[Rj], imm);
         return buff;
     }
     // ROTRI.D
     if(isMask(opcode, "0000000001001101iiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "ROTRI.D %s, %s, %u", Xt[Rd], Xt[Rj], imm);
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %u", "ROTRI.D", Xt[Rd], Xt[Rj], imm);
+        return buff;
+    }
+    // BEQ
+    if(isMask(opcode, "010110iiiiiiiiiiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "BEQ", Xt[Rd], Xt[Rj], imm << 2);
+        return buff;
+    }
+    // BNE
+    if(isMask(opcode, "010111iiiiiiiiiiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "BNE", Xt[Rd], Xt[Rj], imm << 2);
+        return buff;
+    }
+    // BLT
+    if(isMask(opcode, "011000iiiiiiiiiiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "BLT", Xt[Rd], Xt[Rj], imm << 2);
+        return buff;
+    }
+    // BGE
+    if(isMask(opcode, "011001iiiiiiiiiiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "BGE", Xt[Rd], Xt[Rj], imm << 2);
+        return buff;
+    }
+    // BLTU
+    if(isMask(opcode, "011010iiiiiiiiiiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "BLTU", Xt[Rd], Xt[Rj], imm << 2);
+        return buff;
+    }
+    // BGEU
+    if(isMask(opcode, "011011iiiiiiiiiiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "BGEU", Xt[Rd], Xt[Rj], imm << 2);
         return buff;
     }
     // BEQZ
     if(isMask(opcode, "010000iiiiiiiiiiiiiiiijjjjjuuuuu", &a)) {
-        snprintf(buff, sizeof(buff), "BEQZ %s, %d", Xt[Rj], imm + (imm_up << 16));
+        snprintf(buff, sizeof(buff), "%-15s %s, %d", "BEQZ", Xt[Rj], (imm + (imm_up << 16) << 2));
         return buff;
     }
     // BNEZ
     if(isMask(opcode, "010001iiiiiiiiiiiiiiiijjjjjuuuuu", &a)) {
-        snprintf(buff, sizeof(buff), "BNEZ %s, %d", Xt[Rj], imm + (imm_up << 16));
+        snprintf(buff, sizeof(buff), "%-15s %s, %d", "BNEZ", Xt[Rj], (imm + (imm_up << 16) << 2));
+        return buff;
+    }
+    // BR -- special case of JIRL
+    if(isMask(opcode, "0100110000000000000000jjjjj00000", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s", "JR", Xt[Rj]);
         return buff;
     }
     // JIRL
     if(isMask(opcode, "010011iiiiiiiiiiiiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "JIRL %s, %s, %d", Xt[Rd], Xt[Rj], imm);
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "JIRL", Xt[Rd], Xt[Rj], imm);
+        return buff;
+    }
+    // B
+    if(isMask(opcode, "010100iiiiiiiiiiiiiiiiiiiiiiiiii", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s 0x%x", "B", imm);
         return buff;
     }
     // LD.B
     if(isMask(opcode, "0010100000iiiiiiiiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "LD.B %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "LD.B", Xt[Rd], Xt[Rj], signExtend(imm, 12));
         return buff;
     }
     // LD.H
     if(isMask(opcode, "0010100001iiiiiiiiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "LD.H %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "LD.H", Xt[Rd], Xt[Rj], signExtend(imm, 12));
         return buff;
     }
     // LD.W
     if(isMask(opcode, "0010100010iiiiiiiiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "LD.W %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "LD.W", Xt[Rd], Xt[Rj], signExtend(imm, 12));
         return buff;
     }
     // LD.D
     if(isMask(opcode, "0010100011iiiiiiiiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "LD.D %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "LD.D", Xt[Rd], Xt[Rj], signExtend(imm, 12));
         return buff;
     }
     // LD.BU
     if(isMask(opcode, "0010101000iiiiiiiiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "LD.BU %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "LD.BU", Xt[Rd], Xt[Rj], signExtend(imm, 12));
         return buff;
     }
     // LD.HU
     if(isMask(opcode, "0010101001iiiiiiiiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "LD.HU %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "LD.HU", Xt[Rd], Xt[Rj], signExtend(imm, 12));
         return buff;
     }
     // LD.WU
     if(isMask(opcode, "0010101010iiiiiiiiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "LD.WU %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "LD.WU", Xt[Rd], Xt[Rj], signExtend(imm, 12));
         return buff;
     }
     // ST.B
     if(isMask(opcode, "0010100100iiiiiiiiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "ST.B %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "ST.B", Xt[Rd], Xt[Rj], signExtend(imm, 12));
         return buff;
     }
     // ST.H
     if(isMask(opcode, "0010100101iiiiiiiiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "ST.H %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "ST.H", Xt[Rd], Xt[Rj], signExtend(imm, 12));
         return buff;
     }
     // ST.W
     if(isMask(opcode, "0010100110iiiiiiiiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "ST.W %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "ST.W", Xt[Rd], Xt[Rj], signExtend(imm, 12));
         return buff;
     }
     // ST.D
     if(isMask(opcode, "0010100111iiiiiiiiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "ST.D %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "ST.D", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        return buff;
+    }
+    // X64CLRSM
+    if(isMask(opcode, "00000000000000001000000000101000", &a)) {
+        snprintf(buff, sizeof(buff), "X64CLRSM");
+        return buff;
+    }
+    // X64SETSM
+    if(isMask(opcode, "00000000000000001000000000001000", &a)) {
+        snprintf(buff, sizeof(buff), "X64SETSM");
+        return buff;
+    }
+    // X64INCTOP
+    if(isMask(opcode, "00000000000000001000000000001001", &a)) {
+        snprintf(buff, sizeof(buff), "X64INCTOP");
+        return buff;
+    }
+    // X64DECTOP
+    if(isMask(opcode, "00000000000000001000000000101001", &a)) {
+        snprintf(buff, sizeof(buff), "X64DECTOP");
+        return buff;
+    }
+    // X64SETTOP
+    if(isMask(opcode, "000000000000000001110000iii00000", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %d", "X64SETTOP", imm);
+        return buff;
+    }
+    // X64GETTOP
+    if(isMask(opcode, "000000000000000001110100000ddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s", "X64GETTOP", Xt[Rd]);
+        return buff;
+    }
+    // X64GETEFLAGS
+    if(isMask(opcode, "00000000010111iiiiiiii00000ddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, 0x%x", "X64GETEFLAGS", Xt[Rd], imm);
+        return buff;
+    }
+    // X64SETEFLAGS
+    if(isMask(opcode, "00000000010111iiiiiiii00001ddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, 0x%x", "X64SETEFLAGS", Xt[Rd], imm);
+        return buff;
+    }
+    // X64SETJ
+    if(isMask(opcode, "000000000011011010iiii00000ddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, 0x%x", "X64SETJ", Xt[Rd], imm);
+        return buff;
+    }
+    // X64INC.B
+    if(isMask(opcode, "0000000000000000100000jjjjj00000", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s", "X64INC.B", Xt[Rj]);
+        return buff;
+    }
+    // X64INC.H
+    if(isMask(opcode, "0000000000000000100000jjjjj00001", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s", "X64INC.H", Xt[Rj]);
+        return buff;
+    }
+    // X64INC.W
+    if(isMask(opcode, "0000000000000000100000jjjjj00010", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s", "X64INC.W", Xt[Rj]);
+        return buff;
+    }
+    // X64INC.D
+    if(isMask(opcode, "0000000000000000100000jjjjj00011", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s", "X64INC.D", Xt[Rj]);
+        return buff;
+    }
+    // X64DEC.B
+    if(isMask(opcode, "0000000000000000100000jjjjj00100", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s", "X64DEC.B", Xt[Rj]);
+        return buff;
+    }
+    // X64DEC.H
+    if(isMask(opcode, "0000000000000000100000jjjjj00101", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s", "X64DEC.H", Xt[Rj]);
+        return buff;
+    }
+    // X64DEC.W
+    if(isMask(opcode, "0000000000000000100000jjjjj00110", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s", "X64DEC.W", Xt[Rj]);
+        return buff;
+    }
+    // X64DEC.D
+    if(isMask(opcode, "0000000000000000100000jjjjj00111", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s", "X64DEC.D", Xt[Rj]);
+        return buff;
+    }
+    // X64MUL.B
+    if(isMask(opcode, "00000000001111101kkkkkjjjjj00000", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64MUL.B", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64MUL.H
+    if(isMask(opcode, "00000000001111101kkkkkjjjjj00001", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64MUL.B", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64MUL.W
+    if(isMask(opcode, "00000000001111101kkkkkjjjjj00010", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64MUL.B", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64MUL.D
+    if(isMask(opcode, "00000000001111101kkkkkjjjjj00011", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64MUL.B", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64MUL.BU
+    if(isMask(opcode, "00000000001111101kkkkkjjjjj00100", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64MUL.B", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64MUL.HU
+    if(isMask(opcode, "00000000001111101kkkkkjjjjj00101", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64MUL.B", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64MUL.WU
+    if(isMask(opcode, "00000000001111101kkkkkjjjjj00110", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64MUL.B", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64MUL.DU
+    if(isMask(opcode, "00000000001111101kkkkkjjjjj00111", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64MUL.B", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64ADD.WU
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj00000", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64ADD.WU", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64ADD.DU
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj00001", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64ADD.DU", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64SUB.WU
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj00010", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64SUB.WU", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64SUB.DU
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj00011", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64SUB.DU", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64ADD.B
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj00100", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64ADD.B", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64ADD.H
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj00101", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64ADD.H", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64ADD.W
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj00110", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64ADD.W", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64ADD.D
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj00111", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64ADD.D", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64SUB.B
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj01000", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64SUB.B", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64SUB.H
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj01001", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64SUB.H", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64SUB.W
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj01010", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64SUB.W", Xt[Rj], Xt[Rk]);
         return buff;
     }
+    // X64SUB.D
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj01011", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64SUB.D", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64ADC.B
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj01100", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64ADC.B", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64ADC.H
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj01101", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64ADC.H", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64ADC.W
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj01110", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64ADC.W", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64ADC.D
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj01111", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64ADC.D", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64SBC.B
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj10000", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64SBC.B", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64SBC.H
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj10001", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64SBC.H", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64SBC.W
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj10010", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64SBC.W", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64SBC.D
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj10011", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64SBC.D", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64SLL.B
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj10100", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64SLL.B", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64SLL.H
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj10101", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64SLL.H", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64SLL.W
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj10110", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64SLL.W", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64SLL.D
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj10111", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64SLL.D", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64SRL.B
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj11000", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64SRL.B", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64SRL.H
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj11001", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64SRL.H", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64SRL.W
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj11010", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64SRL.W", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64SRL.D
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj11011", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64SRL.D", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64SRA.B
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj11100", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64SRA.B", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64SRA.H
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj11101", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64SRA.H", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64SRA.W
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj11110", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64SRA.W", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64SRA.D
+    if(isMask(opcode, "00000000001111110kkkkkjjjjj11111", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64SRA.D", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64ROTR.B
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj00000", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64ROTR.B", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64ROTR.H
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj00001", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64ROTR.H", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64ROTR.D
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj00010", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64ROTR.D", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64ROTR.W
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj00011", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64ROTR.W", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64ROTL.B
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj00100", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64ROTL.B", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64ROTL.H
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj00101", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64ROTL.H", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64ROTL.W
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj00110", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64ROTL.W", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64ROTL.D
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj00111", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64ROTL.D", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64RCR.B
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj01000", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64RCR.B", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64RCR.H
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj01001", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64RCR.H", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64RCR.W
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj01010", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64RCR.W", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64RCR.D
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj01011", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64RCR.D", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64RCL.B
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj01100", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64RCL.B", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64RCL.H
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj01101", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64RCL.H", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64RCL.W
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj01110", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64RCL.W", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64RCL.D
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj01111", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64RCL.D", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64AND.B
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj10000", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64AND.B", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64AND.H
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj10001", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64AND.H", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64AND.W
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj10010", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64AND.W", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64AND.D
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj10011", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64AND.D", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64OR.B
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj10100", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64OR.B", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64OR.H
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj10101", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64OR.H", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64OR.W
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj10110", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64OR.W", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64OR.D
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj10111", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64OR.D", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64XOR.B
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj11000", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64XOR.B", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64XOR.H
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj11001", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64XOR.H", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64XOR.W
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj11010", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64XOR.W", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // X64XOR.D
+    if(isMask(opcode, "00000000001111111kkkkkjjjjj11011", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s", "X64XOR.D", Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+
     snprintf(buff, sizeof(buff), "%08X ???", __builtin_bswap32(opcode));
     return buff;
 }
\ No newline at end of file
diff --git a/src/dynarec/la64/la64_prolog.S b/src/dynarec/la64/la64_prolog.S
index 354e5518..5f57218d 100644
--- a/src/dynarec/la64/la64_prolog.S
+++ b/src/dynarec/la64/la64_prolog.S
@@ -48,16 +48,16 @@ la64_prolog:
     ld.d   $r28, $r4, (8 * 13)
     ld.d   $r29, $r4, (8 * 14)
     ld.d   $r30, $r4, (8 * 15)
-    ld.d   $r31, $r4, (8 * 16)  //xFlags  
+    ld.d   $r31, $r4, (8 * 16)  //xFlags
     ld.d   $r20, $r4, (8 * 17)  //xRIP
-    ld.d   $r11, $r4, 552 // grab an old value of emu->xSPSave
+    ld.d   $r22, $r4, 552 // grab an old value of emu->xSPSave
     st.d   $sp,  $r4, 552 // save current sp to emu->xSPSave
     // push sentinel onto the stack
-    st.d   $r11, $sp, -16
+    st.d   $r22, $sp, -16
     st.d   $r0,  $sp, -8
     addi.d $sp,  $sp, -16
     // setup xMASK
     addi.w  $r11, $r0, -1
-    lu32i.d $r4, 0
+    lu32i.d $r11, 0
     //jump to function
     jirl   $r0,  $a1, 0