about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorYang Liu <numbksco@gmail.com>2024-11-21 18:44:47 +0800
committerGitHub <noreply@github.com>2024-11-21 11:44:47 +0100
commit85c6e28d472a6ffd8ff54d8e8ac4d53945778a1b (patch)
treee374fa591dce9a022822658e644e31f41665dc1c
parentd1a253f4694588ad6efafc36a3240687034f8780 (diff)
downloadbox64-85c6e28d472a6ffd8ff54d8e8ac4d53945778a1b.tar.gz
box64-85c6e28d472a6ffd8ff54d8e8ac4d53945778a1b.zip
[LA64_DYNAREC] Added more opcodes for JDK (#2055)
-rw-r--r--src/dynarec/la64/dynarec_la64_00.c71
-rw-r--r--src/dynarec/la64/dynarec_la64_0f.c80
-rw-r--r--src/dynarec/la64/dynarec_la64_66.c26
-rw-r--r--src/dynarec/la64/dynarec_la64_660f.c23
-rw-r--r--src/dynarec/la64/dynarec_la64_f0.c73
5 files changed, 272 insertions, 1 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c
index fbbad121..9f618456 100644
--- a/src/dynarec/la64/dynarec_la64_00.c
+++ b/src/dynarec/la64/dynarec_la64_00.c
@@ -1053,6 +1053,24 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 }
             }
             break;
+        case 0x8F:
+            INST_NAME("POP Ed");
+            nextop = F8;
+            if (MODREG) {
+                POP1z(xRAX + (nextop & 7) + (rex.b << 3));
+            } else {
+                POP1z(x2); // so this can handle POP [ESP] and maybe some variant too
+                addr = geted(dyn, addr, ninst, nextop, &ed, x3, x1, &fixedaddress, rex, &lock, 1, 0);
+                if (ed == xRSP) {
+                    SDz(x2, ed, fixedaddress);
+                } else {
+                    // complicated to just allow a segfault that can be recovered correctly
+                    ADDIz(xRSP, xRSP, rex.is32bits ? -4 : -8);
+                    SDz(x2, ed, fixedaddress);
+                    ADDIz(xRSP, xRSP, rex.is32bits ? 4 : 8);
+                }
+            }
+            break;
         case 0x90:
         case 0x91:
         case 0x92:
@@ -1379,6 +1397,59 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     break;
             }
             break;
+        case 0xAF:
+            switch (rep) {
+                case 1:
+                case 2:
+                    if (rep == 1) {
+                        INST_NAME("REPNZ SCASD");
+                    } else {
+                        INST_NAME("REPZ SCASD");
+                    }
+                    MAYSETFLAGS();
+                    SETFLAGS(X_ALL, SF_SET_PENDING);
+                    CBZ_NEXT(xRCX);
+                    if (rex.w) {
+                        MV(x1, xRAX);
+                    } else {
+                        ZEROUP2(x1, xRAX);
+                    }
+                    ANDI(x2, xFlags, 1 << F_DF);
+                    BNEZ_MARK2(x2);
+                    MARK; // Part with DF==0
+                    LDxw(x2, xRDI, 0);
+                    ADDI_D(xRDI, xRDI, rex.w ? 8 : 4);
+                    ADDI_D(xRCX, xRCX, -1);
+                    if (rep == 1) {
+                        BEQ_MARK3(x1, x2);
+                    } else {
+                        BNE_MARK3(x1, x2);
+                    }
+                    BNE_MARK(xRCX, xZR);
+                    B_MARK3_nocond;
+                    MARK2; // Part with DF==1
+                    LDxw(x2, xRDI, 0);
+                    ADDI_D(xRDI, xRDI, rex.w ? -8 : -4);
+                    ADDI_D(xRCX, xRCX, -1);
+                    if (rep == 1) {
+                        BEQ_MARK3(x1, x2);
+                    } else {
+                        BNE_MARK3(x1, x2);
+                    }
+                    BNE_MARK2(xRCX, xZR);
+                    MARK3; // end
+                    emit_cmp32(dyn, ninst, rex, x1, x2, x3, x4, x5, x6);
+                    break;
+                default:
+                    INST_NAME("SCASD");
+                    SETFLAGS(X_ALL, SF_SET_PENDING);
+                    GETDIR(x3, x1, rex.w ? 8 : 4);
+                    LDxw(x2, xRDI, 0);
+                    ADD_D(xRDI, xRDI, x3);
+                    emit_cmp32(dyn, ninst, rex, xRAX, x2, x3, x4, x5, x6);
+                    break;
+            }
+            break;
         case 0xB0:
         case 0xB1:
         case 0xB2:
diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c
index 0bc20158..bc584b86 100644
--- a/src/dynarec/la64/dynarec_la64_0f.c
+++ b/src/dynarec/la64/dynarec_la64_0f.c
@@ -110,6 +110,17 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             *need_epilog = 0;
             *ok = 0;
             break;
+        case 0x0D:
+            nextop = F8;
+            switch ((nextop >> 3) & 7) {
+                case 1:
+                    INST_NAME("PREFETCHW");
+                    FAKEED;
+                    break;
+                default: //???
+                    DEFAULT;
+            }
+            break;
         case 0x10:
             INST_NAME("MOVUPS Gx,Ex");
             nextop = F8;
@@ -325,6 +336,17 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 SPILL_EFLAGS();
             }
             break;
+        case 0x31:
+            INST_NAME("RDTSC");
+            NOTEST(x1);
+            // TODO: how to read the wall-clock real time on LoongArch?
+            CALL(ReadTSC, x3); // will return the u64 in x3
+            if (box64_rdtsc_shift) {
+                SRLI_D(x3, x3, box64_rdtsc_shift);
+            }
+            SRLI_D(xRDX, x3, 32);
+            ZEROUP2(xRDX, x3);
+            break;
         case 0x38:
             // SSE3
             nextop = F8;
@@ -771,6 +793,15 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                             CALL(rex.is32bits ? ((void*)fpu_fxsave32) : ((void*)fpu_fxsave64), -1);
                         }
                         break;
+                    case 1:
+                        INST_NAME("FXRSTOR Ed");
+                        MESSAGE(LOG_DUMP, "Need Optimization\n");
+                        SKIPTEST(x1);
+                        fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
+                        addr = geted(dyn, addr, ninst, nextop, &ed, x1, x3, &fixedaddress, rex, NULL, 0, 0);
+                        if (ed != x1) { MV(x1, ed); }
+                        CALL(rex.is32bits ? ((void*)fpu_fxrstor32) : ((void*)fpu_fxrstor64), -1);
+                        break;
                     case 2:
                         INST_NAME("LDMXCSR Md");
                         GETED(0);
@@ -785,6 +816,33 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         LD_WU(x4, xEmu, offsetof(x64emu_t, mxcsr));
                         ST_W(x4, wback, fixedaddress);
                         break;
+                    case 4:
+                        INST_NAME("XSAVE Ed");
+                        MESSAGE(LOG_DUMP, "Need Optimization\n");
+                        fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
+                        addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, NULL, 0, 0);
+                        if (ed != x1) { MV(x1, ed); }
+                        MOV32w(x2, rex.is32bits);
+                        CALL((void*)fpu_xsave, -1);
+                        break;
+                    case 5:
+                        INST_NAME("XRSTOR Ed");
+                        MESSAGE(LOG_DUMP, "Need Optimization\n");
+                        fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
+                        addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, NULL, 0, 0);
+                        if (ed != x1) { MV(x1, ed); }
+                        MOV32w(x2, rex.is32bits);
+                        CALL((void*)fpu_xrstor, -1);
+                        break;
+                    case 7:
+                        INST_NAME("CLFLUSH Ed");
+                        MESSAGE(LOG_DUMP, "Need Optimization?\n");
+                        addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, NULL, 0, 0);
+                        if (wback != A1) {
+                            MV(A1, wback);
+                        }
+                        CALL_(native_clflush, -1, 0);
+                        break;
                     default:
                         DEFAULT;
                 }
@@ -955,6 +1013,28 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     }
                     MARK;
                     break;
+                case 7:
+                    INST_NAME("BTC Ed, Ib");
+                    SETFLAGS(X_CF, SF_SUBSET);
+                    SET_DFNONE();
+                    GETED(1);
+                    u8 = F8;
+                    u8 &= rex.w ? 0x3f : 0x1f;
+                    BSTRPICK_D(x3, ed, u8, u8);
+                    BSTRINS_D(xFlags, x3, 0, 0);
+                    if (u8 <= 10) {
+                        XORI(ed, ed, (1LL << u8));
+                    } else {
+                        MOV64xw(x3, (1LL << u8));
+                        XOR(ed, ed, x3);
+                    }
+                    if (wback) {
+                        SDxw(ed, wback, fixedaddress);
+                        SMWRITE();
+                    } else if (!rex.w) {
+                        ZEROUP(ed);
+                    }
+                    break;
                 default:
                     DEFAULT;
             }
diff --git a/src/dynarec/la64/dynarec_la64_66.c b/src/dynarec/la64/dynarec_la64_66.c
index 4af61163..bfe64ef1 100644
--- a/src/dynarec/la64/dynarec_la64_66.c
+++ b/src/dynarec/la64/dynarec_la64_66.c
@@ -695,6 +695,30 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     emit_neg16(dyn, ninst, ed, x2, x4);
                     EWBACK;
                     break;
+                case 6:
+                    INST_NAME("DIV Ew");
+                    SETFLAGS(X_ALL, SF_SET);
+                    SET_DFNONE();
+                    GETEW(x1, 0);
+                    BSTRPICK_D(x2, xRAX, 15, 0);
+                    SLLI_D(x7, xRDX, 48);
+                    SRLI_D(x7, x7, 32);
+                    OR(x2, x2, x7);
+                    if(box64_dynarec_div0) {
+                        BNE_MARK3(ed, xZR);
+                        GETIP_(ip);
+                        STORE_XEMU_CALL();
+                        CALL(native_div0, -1);
+                        CLEARIP();
+                        LOAD_XEMU_CALL();
+                        jump_to_epilog(dyn, 0, xRIP, ninst);
+                        MARK3;
+                    }
+                    DIV_WU(x7, x2, ed);
+                    MOD_WU(x4, x2, ed);
+                    BSTRINSz(xRAX, x7, 15, 0);
+                    BSTRINSz(xRDX, x4, 15, 0);
+                    break;
                 case 7:
                     INST_NAME("IDIV Ew");
                     NOTEST(x1);
@@ -717,7 +741,7 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     DIV_W(x3, x2, ed);
                     MOD_W(x4, x2, ed);
                     BSTRINSz(xRAX, x3, 15, 0);
-                    BSTRINSz(xRAX, x4, 15, 0);
+                    BSTRINSz(xRDX, x4, 15, 0);
                     break;
                 default:
                     DEFAULT;
diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c
index c348a788..f0e382ae 100644
--- a/src/dynarec/la64/dynarec_la64_660f.c
+++ b/src/dynarec/la64/dynarec_la64_660f.c
@@ -107,6 +107,21 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             }
             VEXTRINS_D(v0, v1, 0x10);
             break;
+        case 0x16:
+            INST_NAME("MOVHPD Gx, Eq");
+            nextop = F8;
+            GETGX(v0, 1);
+            if (MODREG) {
+                // access register instead of memory is bad opcode!
+                DEFAULT;
+                return addr;
+            }
+            SMREAD();
+            addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0);
+            v1 = fpu_get_scratch(dyn);
+            FLD_D(v1, ed, fixedaddress);
+            VEXTRINS_D(v0, v1, 0x10);
+            break;
         case 0x1F:
             INST_NAME("NOP (multibyte)");
             nextop = F8;
@@ -1217,6 +1232,14 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETEX(v1, 0, 0);
             VMUH_H(v0, v0, v1);
             break;
+        case 0xE6:
+            INST_NAME("CVTTPD2DQ Gx, Ex");
+            nextop = F8;
+            GETEX(v1, 0, 0);
+            GETGX_empty(v0);
+            // TODO: fastround
+            VFTINTRZ_W_D(v0, v1, v1);
+            break;
         case 0xE7:
             INST_NAME("MOVNTDQ Ex, Gx");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_f0.c b/src/dynarec/la64/dynarec_la64_f0.c
index e857999e..22457c0d 100644
--- a/src/dynarec/la64/dynarec_la64_f0.c
+++ b/src/dynarec/la64/dynarec_la64_f0.c
@@ -99,6 +99,79 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
         case 0x0F:
             nextop = F8;
             switch (nextop) {
+                case 0xB0:
+                    switch (rep) {
+                        case 0:
+                            INST_NAME("LOCK CMPXCHG Eb, Gb");
+                            SETFLAGS(X_ALL, SF_SET_PENDING);
+                            nextop = F8;
+                            ANDI(x6, xRAX, 0xff); // AL
+                            SMDMB();
+                            if (MODREG) {
+                                if (rex.rex) {
+                                    wback = TO_LA64((nextop & 7) + (rex.b << 3));
+                                    wb2 = 0;
+                                } else {
+                                    wback = (nextop & 7);
+                                    wb2 = (wback >> 2) * 8;
+                                    wback = TO_LA64(wback & 3);
+                                }
+                                BSTRPICK_D(x2, wback, wb2 + 7, wb2);
+                                wb1 = 0;
+                                ed = x2;
+                                UFLAG_IF {
+                                    emit_cmp8(dyn, ninst, x6, ed, x3, x4, x5, x1);
+                                }
+                                BNE_MARK2(x6, x2);
+                                BSTRPICK_D(wback, x2, wb2 + 7, wb2);
+                                GETGB(x1);
+                                MV(ed, gd);
+                                MARK2;
+                                BSTRINS_D(xRAX, x2, 7, 0);
+                                B_NEXT_nocond;
+                            } else {
+                                if (rex.rex) {
+                                    gb1 = TO_LA64(((nextop & 0x38) >> 3) + (rex.r << 3));
+                                    gb2 = 0;
+                                } else {
+                                    gd = (nextop & 0x38) >> 3;
+                                    gb2 = ((gd & 4) >> 2) * 8;
+                                    gb1 = TO_LA64(gd & 3);
+                                }
+                                addr = geted(dyn, addr, ninst, nextop, &wback, x3, x2, &fixedaddress, rex, LOCK_LOCK, 0, 0);
+                                ANDI(x5, wback, 0b11);
+                                SLLI_D(x5, x5, 3); // shamt
+                                MARKLOCK;
+                                ADDI_D(x7, xZR, ~0b11);
+                                AND(x7, wback, x7); // align to 32bit
+                                LD_WU(x1, x7, 0);
+                                LL_W(x4, x7, 0);
+                                SRL_D(x4, x4, x5);
+                                ANDI(x4, x4, 0xff);
+                                BNE_MARK(x6, x4); // compare AL with m8
+                                // AL == m8, r8 is loaded into m8
+                                ADDI_D(x2, xZR, 0xff);
+                                SLL_D(x2, x2, x5);
+                                NOR(x2, x2, xZR);
+                                AND(x2, x1, x2);
+                                BSTRPICK_D(x1, gb1, gb2 + 7, gb2);
+                                SLL_D(x1, x1, x5);
+                                OR(x1, x1, x2);
+                                SC_W(x1, x7, 0);
+                                BEQZ_MARKLOCK(x1);
+                                // done
+                                MARK;
+                                UFLAG_IF { emit_cmp8(dyn, ninst, x6, x4, x1, x2, x3, x5); }
+                                // load m8 into AL
+                                ANDI(xRAX, xRAX, ~0xff);
+                                OR(xRAX, xRAX, x4);
+                            }
+                            SMDMB();
+                            break;
+                        default:
+                            DEFAULT;
+                    }
+                    break;
                 case 0xB1:
                     switch (rep) {
                         case 0: