about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <numbksco@gmail.com>2024-05-24 04:21:56 +0800
committerGitHub <noreply@github.com>2024-05-23 22:21:56 +0200
commit712e8315f8555f85f718e6d09e04d0bdab327659 (patch)
tree50c851c4ea229c72b64217a984c8bedcc4235a7e /src
parent604ab2252658e85791bfd3d5a04f546b554f23c3 (diff)
downloadbox64-712e8315f8555f85f718e6d09e04d0bdab327659.tar.gz
box64-712e8315f8555f85f718e6d09e04d0bdab327659.zip
[LA64_DYNAREC] Added more opcodes (#1528)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_00.c54
-rw-r--r--src/dynarec/la64/dynarec_la64_0f.c31
-rw-r--r--src/dynarec/la64/dynarec_la64_66.c37
-rw-r--r--src/dynarec/la64/dynarec_la64_660f.c45
-rw-r--r--src/dynarec/la64/dynarec_la64_emit_math.c281
-rw-r--r--src/dynarec/la64/dynarec_la64_f0.c44
-rw-r--r--src/dynarec/la64/dynarec_la64_f20f.c2
-rw-r--r--src/dynarec/la64/dynarec_la64_f30f.c1
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.h8
-rw-r--r--src/dynarec/la64/la64_emitter.h3
10 files changed, 498 insertions, 8 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c
index 4ed4e271..586d2392 100644
--- a/src/dynarec/la64/dynarec_la64_00.c
+++ b/src/dynarec/la64/dynarec_la64_00.c
@@ -167,6 +167,16 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     DEFAULT;
             }
             break;
+        case 0x18:
+            INST_NAME("SBB Eb, Gb");
+            READFLAGS(X_CF);
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETEB(x1, 0);
+            GETGB(x2);
+            emit_sbb8(dyn, ninst, x1, x2, x4, x5, x6);
+            EBBACK();
+            break;
         case 0x19:
             INST_NAME("SBB Ed, Gd");
             READFLAGS(X_CF);
@@ -321,6 +331,14 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             GETED(0);
             emit_xor32(dyn, ninst, rex, gd, ed, x3, x4);
             break;
+        case 0x34:
+            INST_NAME("XOR AL, Ib");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            u8 = F8;
+            ANDI(x1, xRAX, 0xff);
+            emit_xor8c(dyn, ninst, x1, u8, x3, x4);
+            BSTRINS_D(xRAX, x1, 7, 0);
+            break;
         case 0x35:
             INST_NAME("XOR EAX, Id");
             SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -2052,6 +2070,35 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     DEFAULT;
             }
             break;
+        case 0xFC:
+            INST_NAME("CLD");
+            BSTRINS_D(xFlags, xZR, F_DF, F_DF);
+            break;
+        case 0xFD:
+            INST_NAME("STD");
+            ORI(xFlags, xFlags, 1 << F_DF);
+            break;
+        case 0xFE:
+            nextop = F8;
+            switch ((nextop >> 3) & 7) {
+                case 0:
+                    INST_NAME("INC Eb");
+                    SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING);
+                    GETEB(x1, 0);
+                    emit_inc8(dyn, ninst, ed, x2, x4, x5);
+                    EBBACK();
+                    break;
+                case 1:
+                    INST_NAME("DEC Eb");
+                    SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING);
+                    GETEB(x1, 0);
+                    emit_dec8(dyn, ninst, ed, x2, x4, x5);
+                    EBBACK();
+                    break;
+                default:
+                    DEFAULT;
+            }
+            break;
         case 0xFF:
             nextop = F8;
             switch ((nextop >> 3) & 7) {
@@ -2071,12 +2118,9 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     break;
                 case 2:
                     INST_NAME("CALL Ed");
-                    PASS2IF((box64_dynarec_safeflags > 1) || ((ninst && dyn->insts[ninst - 1].x64.set_flags) || ((ninst > 1) && dyn->insts[ninst - 2].x64.set_flags)), 1)
-                    {
+                    PASS2IF ((box64_dynarec_safeflags > 1) || ((ninst && dyn->insts[ninst - 1].x64.set_flags) || ((ninst > 1) && dyn->insts[ninst - 2].x64.set_flags)), 1) {
                         READFLAGS(X_PEND); // that's suspicious
-                    }
-                    else
-                    {
+                    } else {
                         SETFLAGS(X_ALL, SF_SET); // Hack to put flag in "don't care" state
                     }
                     GETEDz(0);
diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c
index 87c51c81..6d056fa2 100644
--- a/src/dynarec/la64/dynarec_la64_0f.c
+++ b/src/dynarec/la64/dynarec_la64_0f.c
@@ -417,6 +417,13 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             GETGX(q1, 1);
             VFCVTL_D_S(q1, q0);
             break;
+        case 0x5B:
+            INST_NAME("CVTDQ2PS Gx, Ex");
+            nextop = F8;
+            GETEX(q0, 0, 0);
+            GETGX_empty(q1);
+            VFFINT_S_W(q1, q0);
+            break;
         case 0x5C:
             INST_NAME("SUBPS Gx, Ex");
             nextop = F8;
@@ -424,6 +431,13 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             GETGX(v0, 1);
             VFSUB_S(v0, v0, q0);
             break;
+        case 0x5E:
+            INST_NAME("DIVPS Gx, Ex");
+            nextop = F8;
+            GETEX(q0, 0, 0);
+            GETGX(v0, 1);
+            VFDIV_S(v0, v0, q0);
+            break;
 
         #define GO(GETFLAGS, NO, YES, F, I)                                                         \
             if (box64_dynarec_test == 2) { NOTEST(x1); }                                            \
@@ -791,6 +805,23 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             }
             if (!rex.w) ZEROUP(gd);
             break;
+        case 0xC2:
+            INST_NAME("CMPPS Gx, Ex, Ib");
+            nextop = F8;
+            GETGX(v0, 1);
+            GETEX(v1, 0, 1);
+            u8 = F8;
+            switch (u8 & 7) {
+                case 0: VFCMP_D(v0, v0, v1, cEQ); break;  // Equal
+                case 1: VFCMP_D(v0, v0, v1, cLT); break;  // Less than
+                case 2: VFCMP_D(v0, v0, v1, cLE); break;  // Less or equal
+                case 3: VFCMP_D(v0, v0, v1, cUN); break;  // NaN
+                case 4: VFCMP_D(v0, v0, v1, cUNE); break; // Not Equal or unordered
+                case 5: VFCMP_D(v0, v1, v0, cULE); break; // Greater or equal or unordered
+                case 6: VFCMP_D(v0, v1, v0, cULT); break; // Greater or unordered, test inverted, N!=V so unordered or less than (inverted)
+                case 7: VFCMP_D(v0, v0, v1, cOR); break;  // not NaN
+            }
+            break;
         case 0xC6:
             INST_NAME("SHUFPS Gx, Ex, Ib");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_66.c b/src/dynarec/la64/dynarec_la64_66.c
index 88d03ddc..8e7764d0 100644
--- a/src/dynarec/la64/dynarec_la64_66.c
+++ b/src/dynarec/la64/dynarec_la64_66.c
@@ -324,6 +324,22 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 SMWRITELOCK(lock);
             }
             break;
+        case 0x8B:
+            INST_NAME("MOV Gw, Ew");
+            nextop = F8;
+            GETGD;
+            if (MODREG) {
+                ed = TO_LA64((nextop & 7) + (rex.b << 3));
+                if (ed != gd) {
+                    BSTRINS_D(gd, ed, 15, 0);
+                }
+            } else {
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, 1, 0);
+                SMREADLOCK(lock);
+                LD_HU(x1, ed, fixedaddress);
+                BSTRINS_D(gd, x1, 15, 0);
+            }
+            break;
         case 0x90:
         case 0x91:
         case 0x92:
@@ -528,6 +544,27 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     DEFAULT;
             }
             break;
+        case 0xFF:
+            nextop = F8;
+            switch ((nextop >> 3) & 7) {
+                case 0:
+                    INST_NAME("INC Ew");
+                    SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING);
+                    GETEW(x1, 0);
+                    emit_inc16(dyn, ninst, x1, x2, x4, x5);
+                    EWBACK;
+                    break;
+                case 1:
+                    INST_NAME("DEC Ew");
+                    SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING);
+                    GETEW(x1, 0);
+                    emit_dec16(dyn, ninst, x1, x2, x4, x5, x6);
+                    EWBACK;
+                    break;
+                default:
+                    DEFAULT;
+            }
+            break;
         default:
             DEFAULT;
     }
diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c
index aea7b815..c9057c1e 100644
--- a/src/dynarec/la64/dynarec_la64_660f.c
+++ b/src/dynarec/la64/dynarec_la64_660f.c
@@ -51,6 +51,20 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
     MAYUSE(j64);
 
     switch (opcode) {
+        case 0x12:
+            INST_NAME("MOVLPD Gx, Eq");
+            nextop = F8;
+            GETGX(v0, 1);
+            if (MODREG) {
+                DEFAULT;
+                return addr;
+            }
+            SMREAD();
+            addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0);
+            v1 = fpu_get_scratch(dyn);
+            FLD_D(v1, wback, fixedaddress);
+            VEXTRINS_D(v0, v1, 0);
+            break;
         case 0x14:
             INST_NAME("UNPCKLPD Gx, Ex");
             nextop = F8;
@@ -86,6 +100,21 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 VLD(v0, ed, fixedaddress);
             }
             break;
+        case 0x29:
+            INST_NAME("MOVAPD Ex,Gx");
+            nextop = F8;
+            GETG;
+            v0 = sse_get_reg(dyn, ninst, x1, gd, 0);
+            if (MODREG) {
+                ed = (nextop & 7) + (rex.b << 3);
+                v1 = sse_get_reg_empty(dyn, ninst, x1, ed);
+                VOR_V(v1, v0, v0);
+            } else {
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0);
+                VST(v0, ed, fixedaddress);
+                SMWRITE2();
+            }
+            break;
         case 0x2E:
             // no special check...
         case 0x2F:
@@ -283,6 +312,14 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 VXOR_V(q0, q0, q1);
             }
             break;
+        case 0x58:
+            INST_NAME("ADDPD Gx, Ex");
+            nextop = F8;
+            GETEX(q0, 0, 0);
+            GETGX(q1, 1);
+            // TODO: fastnan handling
+            VFADD_D(q1, q1, q0);
+            break;
         case 0x5A:
             INST_NAME("CVTPD2PS Gx, Ex");
             nextop = F8;
@@ -294,6 +331,14 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             VXOR_V(v0, v0, v0);
             VEXTRINS_D(v0, q0, 0);
             break;
+        case 0x5C:
+            INST_NAME("SUBPD Gx, Ex");
+            nextop = F8;
+            GETEX(q0, 0, 0);
+            GETGX(q1, 1);
+            // TODO: fastnan handling
+            VFSUB_D(q1, q1, q0);
+            break;
         case 0x60:
             INST_NAME("PUNPCKLBW Gx,Ex");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_emit_math.c b/src/dynarec/la64/dynarec_la64_emit_math.c
index cd94673e..ad205467 100644
--- a/src/dynarec/la64/dynarec_la64_emit_math.c
+++ b/src/dynarec/la64/dynarec_la64_emit_math.c
@@ -1127,6 +1127,141 @@ void emit_adc32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     }
 }
 
+// emit INC8 instruction, from s1, store result in s1 using s2, s3 and s4 as scratch
+void emit_inc8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
+{
+    IFX (X_PEND) {
+        ST_B(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s3, d_inc8);
+    } else IFX (X_ALL) {
+        SET_DFNONE();
+    }
+    IFXA (X_AF | X_OF, !la64_lbt) {
+        ORI(s3, s1, 1);  // s3 = op1 | op2
+        ANDI(s4, s1, 1); // s5 = op1 & op2
+    }
+
+    IFXA (X_ALL, la64_lbt) {
+        X64_INC_B(s1);
+    }
+
+    ADDI_W(s1, s1, 1);
+
+    IFX (X_PEND) {
+        ST_B(s1, xEmu, offsetof(x64emu_t, res));
+    }
+
+    if (la64_lbt) {
+        ANDI(s1, s1, 0xff);
+        return;
+    }
+
+    IFX (X_ALL) {
+        // preserving CF
+        MOV64x(s4, (1UL << F_AF) | (1UL << F_OF) | (1UL << F_ZF) | (1UL << F_SF) | (1UL << F_PF));
+        ANDN(xFlags, xFlags, s4);
+    }
+    IFX (X_AF | X_OF) {
+        ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2)
+        OR(s3, s3, s4);   // cc = (~res & (op1 | op2)) | (op1 & op2)
+        IFX (X_AF) {
+            ANDI(s2, s3, 0x08); // AF: cc & 0x08
+            BEQZ(s2, 8);
+            ORI(xFlags, xFlags, 1 << F_AF);
+        }
+        IFX (X_OF) {
+            SRLI_D(s3, s3, 6);
+            SRLI_D(s2, s3, 1);
+            XOR(s3, s3, s2);
+            ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
+            BEQZ(s3, 8);
+            ORI(xFlags, xFlags, 1 << F_OF);
+        }
+    }
+    IFX (X_SF) {
+        ANDI(s2, s1, 0x80);
+        BEQZ(s2, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    ANDI(s1, s1, 0xff);
+    IFX (X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s2);
+    }
+    IFX (X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+}
+
+// emit INC16 instruction, from s1, store result in s1 using s3 and s4 as scratch
+void emit_inc16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
+{
+    IFX (X_PEND) {
+        ST_H(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s3, d_inc16);
+    } else IFX (X_ZF | X_OF | X_AF | X_SF | X_PF) {
+        SET_DFNONE();
+    }
+    IFXA (X_AF | X_OF, !la64_lbt) {
+        ORI(s3, s1, 1);  // s3 = op1 | op2
+        ANDI(s4, s1, 1); // s4 = op1 & op2
+    }
+
+    IFXA (X_ALL, la64_lbt) {
+        X64_INC_H(s1);
+    }
+
+    ADDI_D(s1, s1, 1);
+
+    IFX (X_PEND) {
+        ST_H(s1, xEmu, offsetof(x64emu_t, res));
+    }
+
+    if (la64_lbt) {
+        BSTRPICK_D(s1, s1, 15, 0);
+        return;
+    }
+
+    IFX (X_ALL) {
+        // preserving CF
+        MOV64x(s4, (1UL << F_AF) | (1UL << F_OF) | (1UL << F_ZF) | (1UL << F_SF) | (1UL << F_PF));
+        ANDN(xFlags, xFlags, s4);
+    }
+
+    IFX (X_AF | X_OF) {
+        ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2)
+        OR(s3, s3, s4);   // cc = (~res & (op1 | op2)) | (op1 & op2)
+        IFX (X_AF) {
+            ANDI(s4, s3, 0x08); // AF: cc & 0x08
+            BEQZ(s4, 8);
+            ORI(xFlags, xFlags, 1 << F_AF);
+        }
+        IFX (X_OF) {
+            SRLI_D(s3, s3, 14);
+            SRLI_D(s4, s3, 1);
+            XOR(s3, s3, s4);
+            ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
+            BEQZ(s3, 8);
+            ORI(xFlags, xFlags, 1 << F_OF);
+        }
+    }
+
+    BSTRPICK_D(s1, s1, 15, 0);
+
+    IFX (X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX (X_SF) {
+        SRLI_D(s3, s1, 15);
+        BEQZ(s3, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    IFX (X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
 // emit INC32 instruction, from s1, store result in s1 using s3 and s4 as scratch
 void emit_inc32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5)
 {
@@ -1156,7 +1291,10 @@ void emit_inc32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         SDxw(s1, xEmu, offsetof(x64emu_t, res));
     }
 
-    if (la64_lbt) return;
+    if (la64_lbt) {
+        if (!rex.w) ZEROUP(s1);
+        return;
+    }
 
     IFX (X_ALL) {
         // preserving CF
@@ -1196,6 +1334,141 @@ void emit_inc32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     }
 }
 
+// emit DEC8 instruction, from s1, store result in s1 using s2, s3 and s4 as scratch
+void emit_dec8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
+{
+    IFX (X_PEND) {
+        ST_B(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s3, d_dec8);
+    } else IFX (X_ALL) {
+        SET_DFNONE();
+    }
+    IFXA (X_AF | X_OF, !la64_lbt) {
+        NOR(s4, xZR, s1); // s4 = ~op1
+        ORI(s3, s4, 1);   // s3 = ~op1 | op2
+        ANDI(s4, s4, 1);  // s4 = ~op1 & op2
+    }
+
+    IFXA (X_ALL, la64_lbt) {
+        X64_DEC_B(s1);
+    }
+
+    ADDI_W(s1, s1, -1);
+
+    IFX (X_PEND) {
+        ST_B(s1, xEmu, offsetof(x64emu_t, res));
+    }
+
+    if (la64_lbt) {
+        ANDI(s1, s1, 0xff);
+        return;
+    }
+
+    IFX (X_ALL) {
+        // preserving CF
+        MOV64x(s4, (1UL << F_AF) | (1UL << F_OF) | (1UL << F_ZF) | (1UL << F_SF) | (1UL << F_PF));
+        ANDN(xFlags, xFlags, s4);
+    }
+    IFX (X_AF | X_OF) {
+        AND(s3, s1, s3); // s3 = res & (~op1 | op2)
+        OR(s3, s3, s4);  // cc = (res & (~op1 | op2)) | (~op1 & op2)
+        IFX (X_AF) {
+            ANDI(s2, s3, 0x08); // AF: cc & 0x08
+            BEQZ(s2, 8);
+            ORI(xFlags, xFlags, 1 << F_AF);
+        }
+        IFX (X_OF) {
+            SRLI_D(s3, s3, 6);
+            SRLI_D(s2, s3, 1);
+            XOR(s3, s3, s2);
+            ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
+            BEQZ(s3, 8);
+            ORI(xFlags, xFlags, 1 << F_OF);
+        }
+    }
+    IFX (X_SF) {
+        ANDI(s2, s1, 0x80);
+        BEQZ(s2, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    ANDI(s1, s1, 0xff);
+    IFX (X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s2);
+    }
+    IFX (X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+}
+
+// emit DEC16 instruction, from s1, store result in s1 using s3 and s4 as scratch
+void emit_dec16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
+{
+    IFX (X_PEND) {
+        ST_H(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s3, d_dec16);
+    } else IFX (X_ALL) {
+        SET_DFNONE();
+    }
+    IFX (X_AF | X_OF) {
+        NOR(s5, xZR, s1);
+        ORI(s3, s5, 1);  // s3 = ~op1 | op2
+        ANDI(s5, s5, 1); // s5 = ~op1 & op2
+    }
+
+    IFXA (X_ALL, la64_lbt) {
+        X64_DEC_H(s1);
+    }
+
+    ADDI_W(s1, s1, -1);
+
+    IFX (X_PEND) {
+        ST_H(s1, xEmu, offsetof(x64emu_t, res));
+    }
+
+    if (la64_lbt) {
+        BSTRPICK_D(s1, s1, 15, 0);
+        return;
+    }
+
+    IFX (X_ALL) {
+        // preserving CF
+        MOV64x(s4, (1UL << F_AF) | (1UL << F_OF) | (1UL << F_ZF) | (1UL << F_SF) | (1UL << F_PF));
+        ANDN(xFlags, xFlags, s4);
+    }
+
+    IFX (X_AF | X_OF) {
+        AND(s3, s1, s3); // s3 = res & (~op1 | op2)
+        OR(s3, s3, s5);  // cc = (res & (~op1 | op2)) | (~op1 & op2)
+        IFX (X_AF) {
+            ANDI(s2, s3, 0x08); // AF: cc & 0x08
+            BEQZ(s2, 8);
+            ORI(xFlags, xFlags, 1 << F_AF);
+        }
+        IFX (X_OF) {
+            SRLI_D(s3, s3, 14);
+            SRLI_D(s2, s3, 1);
+            XOR(s3, s3, s2);
+            ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
+            BEQZ(s3, 8);
+            ORI(xFlags, xFlags, 1 << F_OF);
+        }
+    }
+    SLLI_W(s1, s1, 16);
+    IFX (X_SF) {
+        BGE(s1, xZR, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    SRLI_W(s1, s1, 16);
+    IFX (X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s2);
+    }
+    IFX (X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+}
+
 // emit DEC32 instruction, from s1, store result in s1 using s3 and s4 as scratch
 void emit_dec32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5)
 {
@@ -1219,14 +1492,16 @@ void emit_dec32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         }
     }
 
-
     ADDIxw(s1, s1, -1);
 
     IFX (X_PEND) {
         SDxw(s1, xEmu, offsetof(x64emu_t, res));
     }
 
-    if (la64_lbt) return;
+    if (la64_lbt) {
+        if (!rex.w) ZEROUP(s1);
+        return;
+    }
 
     IFX (X_ALL) {
         // preserving CF
diff --git a/src/dynarec/la64/dynarec_la64_f0.c b/src/dynarec/la64/dynarec_la64_f0.c
index 550713c4..bfad81df 100644
--- a/src/dynarec/la64/dynarec_la64_f0.c
+++ b/src/dynarec/la64/dynarec_la64_f0.c
@@ -382,6 +382,50 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 SMDMB();
             }
             break;
+        case 0xFF:
+            nextop = F8;
+
+            switch ((nextop >> 3) & 7) {
+                case 0:
+                    INST_NAME("LOCK INC Ed");
+                    SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING);
+                    SMDMB();
+                    if (MODREG) {
+                        ed = TO_LA64((nextop & 7) + (rex.b << 3));
+                        emit_inc32(dyn, ninst, rex, ed, x3, x4, x5, x6);
+                    } else {
+                        addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0);
+                        MARKLOCK;
+                        LLxw(x1, wback, 0);
+                        ADDIxw(x4, x1, 1);
+                        SCxw(x4, wback, 0);
+                        BEQZ_MARKLOCK(x4);
+                        IFX (X_ALL | X_PEND)
+                            emit_inc32(dyn, ninst, rex, x1, x3, x4, x5, x6);
+                    }
+                    break;
+                case 1:
+                    INST_NAME("LOCK DEC Ed");
+                    SETFLAGS(X_ALL & ~X_CF, SF_SUBSET_PENDING);
+                    SMDMB();
+                    if (MODREG) {
+                        ed = xRAX + (nextop & 7) + (rex.b << 3);
+                        emit_dec32(dyn, ninst, rex, ed, x3, x4, x5, x6);
+                    } else {
+                        addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0);
+                        MARKLOCK;
+                        LLxw(x1, wback, 0);
+                        ADDIxw(x4, x1, -1);
+                        SCxw(x4, wback, 0);
+                        BNEZ_MARKLOCK(x4);
+                        IFX (X_ALL | X_PEND)
+                            emit_dec32(dyn, ninst, rex, x1, x3, x4, x5, x6);
+                    }
+                    break;
+                default:
+                    DEFAULT;
+            }
+            break;
         default:
             DEFAULT;
     }
diff --git a/src/dynarec/la64/dynarec_la64_f20f.c b/src/dynarec/la64/dynarec_la64_f20f.c
index b6e060ea..eb03439f 100644
--- a/src/dynarec/la64/dynarec_la64_f20f.c
+++ b/src/dynarec/la64/dynarec_la64_f20f.c
@@ -108,6 +108,7 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             } else {
                 FTINTRZ_W_D(d1, q0);
                 MOVFR2GR_S(gd, d1);
+                ZEROUP(gd);
             }
             if (!rex.w) ZEROUP(gd);
             if (!box64_dynarec_fastround) {
@@ -138,6 +139,7 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             } else {
                 FTINT_W_D(d1, q0);
                 MOVFR2GR_S(gd, d1);
+                ZEROUP(gd);
             }
             x87_restoreround(dyn, ninst, u8);
             if (!box64_dynarec_fastround) {
diff --git a/src/dynarec/la64/dynarec_la64_f30f.c b/src/dynarec/la64/dynarec_la64_f30f.c
index c42310a7..5e1f0940 100644
--- a/src/dynarec/la64/dynarec_la64_f30f.c
+++ b/src/dynarec/la64/dynarec_la64_f30f.c
@@ -114,6 +114,7 @@ uintptr_t dynarec64_F30F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             } else {
                 FTINTRZ_W_S(d1, d0);
                 MOVFR2GR_S(gd, d1);
+                ZEROUP(gd);
             }
             if (!rex.w) ZEROUP(gd);
             if (!box64_dynarec_fastround) {
diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h
index cb74bcdf..fbc91e34 100644
--- a/src/dynarec/la64/dynarec_la64_helper.h
+++ b/src/dynarec/la64/dynarec_la64_helper.h
@@ -764,7 +764,11 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
 #define emit_sbb32          STEPNAME(emit_sbb32)
 #define emit_neg8           STEPNAME(emit_neg8)
 #define emit_neg32          STEPNAME(emit_neg32)
+#define emit_inc8           STEPNAME(emit_inc8)
+#define emit_inc16          STEPNAME(emit_inc16)
 #define emit_inc32          STEPNAME(emit_inc32)
+#define emit_dec8           STEPNAME(emit_dec8)
+#define emit_dec16          STEPNAME(emit_dec16)
 #define emit_dec32          STEPNAME(emit_dec32)
 #define emit_or32           STEPNAME(emit_or32)
 #define emit_or32c          STEPNAME(emit_or32c)
@@ -855,7 +859,11 @@ void emit_sbb16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
 void emit_sbb32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
 void emit_neg8(dynarec_la64_t* dyn, int ninst, int s1, int s3, int s4);
 void emit_neg32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3);
+void emit_inc8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
+void emit_inc16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
 void emit_inc32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
+void emit_dec8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
+void emit_dec16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
 void emit_dec32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
 void emit_or32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
 void emit_or32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4);
diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h
index d5388f2c..6201eda8 100644
--- a/src/dynarec/la64/la64_emitter.h
+++ b/src/dynarec/la64/la64_emitter.h
@@ -1357,6 +1357,9 @@ LSX instruction starts with V, LASX instruction starts with XV.
 #define VLD(vd, rj, imm12)          EMIT(type_2RI12(0b0010110000, imm12, rj, vd))
 #define VST(vd, rj, imm12)          EMIT(type_2RI12(0b0010110001, imm12, rj, vd))
 
+#define VFCMP_S(vd, vj, vk, cond)   EMIT(type_4R(0b000011000101, cond, vk, vj, vd))
+#define VFCMP_D(vd, vj, vk, cond)   EMIT(type_4R(0b000011000110, cond, vk, vj, vd))
+
 #define XVADD_B(vd, vj, vk)          EMIT(type_3R(0b01110100000010100, vk, vj, vd))
 #define XVADD_H(vd, vj, vk)          EMIT(type_3R(0b01110100000010101, vk, vj, vd))
 #define XVADD_W(vd, vj, vk)          EMIT(type_3R(0b01110100000010110, vk, vj, vd))