about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <numbksco@gmail.com>2024-09-27 01:45:40 +0800
committerGitHub <noreply@github.com>2024-09-26 19:45:40 +0200
commit0fb0ac7994e53700d781f406612bd0f96df97933 (patch)
tree631a1da93d3a77d5b442cb2635c555d9421e8e6c /src
parent24f278c9618509038e6e278586a10a21c35803c6 (diff)
downloadbox64-0fb0ac7994e53700d781f406612bd0f96df97933.tar.gz
box64-0fb0ac7994e53700d781f406612bd0f96df97933.zip
[LA64_DYNAREC] Added unaligned support to CMPXCHG8B (#1877)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_f0.c93
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.h14
-rw-r--r--src/dynarec/la64/dynarec_la64_private.h1
3 files changed, 79 insertions, 29 deletions
diff --git a/src/dynarec/la64/dynarec_la64_f0.c b/src/dynarec/la64/dynarec_la64_f0.c
index bfc918e8..a6cad8dc 100644
--- a/src/dynarec/la64/dynarec_la64_f0.c
+++ b/src/dynarec/la64/dynarec_la64_f0.c
@@ -188,21 +188,29 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     }
                     break;
                 case 0xC7:
-                    switch (rep) {
-                        case 0:
+                    // rep has no impact here
+                    nextop = F8;
+                    switch ((nextop >> 3) & 7) {
+                        case 1:
                             if (rex.w) {
                                 INST_NAME("LOCK CMPXCHG16B Gq, Eq");
+                                if (!la64_scq) {
+                                    static int warned = 0;
+                                    PASS3(if (!warned) dynarec_log(LOG_INFO, "Warning, LOCK CMPXCHG16B is not well supported on LoongArch without SCQ and issues are expected.\n"));
+                                    warned = 1;
+                                }
                             } else {
                                 INST_NAME("LOCK CMPXCHG8B Gq, Eq");
                             }
                             SETFLAGS(X_ZF, SF_SUBSET);
-                            nextop = F8;
                             addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, LOCK_LOCK, 0, 0);
-                            if (la64_lbt) {
-                                X64_SET_EFLAGS(xZR, X_ZF);
-                            } else {
-                                ADDI_D(x2, xZR, ~(1 << F_ZF));
-                                AND(xFlags, xFlags, x2);
+                            UFLAG_IF {
+                                if (la64_lbt) {
+                                    X64_SET_EFLAGS(xZR, X_ZF);
+                                } else {
+                                    ADDI_D(x2, xZR, ~(1 << F_ZF));
+                                    AND(xFlags, xFlags, x2);
+                                }
                             }
                             if (rex.w) {
                                 if (la64_scq) {
@@ -215,11 +223,13 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                                     MV(x5, xRBX);
                                     SC_Q(x5, xRCX, wback);
                                     BEQZ_MARKLOCK(x5);
-                                    if (la64_lbt) {
-                                        ADDI_D(x5, xZR, -1);
-                                        X64_SET_EFLAGS(x5, X_ZF);
-                                    } else {
-                                        ORI(xFlags, xFlags, 1 << F_ZF);
+                                    UFLAG_IF {
+                                        if (la64_lbt) {
+                                            ADDI_D(x5, xZR, -1);
+                                            X64_SET_EFLAGS(x5, X_ZF);
+                                        } else {
+                                            ORI(xFlags, xFlags, 1 << F_ZF);
+                                        }
                                     }
                                     B_MARK3_nocond;
                                     MARK;
@@ -243,11 +253,13 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                                     BNE_MARK(x3, xRDX);
                                     ST_D(xRBX, wback, 0);
                                     ST_D(xRCX, wback, 8);
-                                    if (la64_lbt) {
-                                        ADDI_D(x5, xZR, -1);
-                                        X64_SET_EFLAGS(x5, X_ZF);
-                                    } else {
-                                        ORI(xFlags, xFlags, 1 << F_ZF);
+                                    UFLAG_IF {
+                                        if (la64_lbt) {
+                                            ADDI_D(x5, xZR, -1);
+                                            X64_SET_EFLAGS(x5, X_ZF);
+                                        } else {
+                                            ORI(xFlags, xFlags, 1 << F_ZF);
+                                        }
                                     }
                                     B_MARK3_nocond;
                                     MARK;
@@ -260,23 +272,46 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                                 }
                             } else {
                                 SMDMB();
-                                ZEROUP2(x3, xRAX);
-                                SLLI_D(x2, xRDX, 32);
-                                OR(x3, x3, x2);
-                                ZEROUP2(x4, xRBX);
-                                SLLI_D(x2, xRCX, 32);
-                                OR(x4, x4, x2);
+                                BSTRINS_D(x3, xRAX, 31, 0);
+                                BSTRINS_D(x3, xRDX, 63, 32);
+                                BSTRINS_D(x4, xRBX, 31, 0);
+                                BSTRINS_D(x4, xRCX, 63, 32);
+                                ANDI(x2, wback, 7);
+                                BNEZ_MARK2(x2);
+                                // Aligned
                                 MARKLOCK;
                                 LL_D(x2, wback, 0);
                                 BNE_MARK(x2, x3); // EDX_EAX != Ed
                                 MV(x5, x4);
                                 SC_D(x5, wback, 0);
                                 BEQZ_MARKLOCK(x5);
-                                if (la64_lbt) {
-                                    ADDI_D(x5, xZR, -1);
-                                    X64_SET_EFLAGS(x5, X_ZF);
-                                } else {
-                                    ORI(xFlags, xFlags, 1 << F_ZF);
+                                UFLAG_IF {
+                                    if (la64_lbt) {
+                                        ADDI_D(x5, xZR, -1);
+                                        X64_SET_EFLAGS(x5, X_ZF);
+                                    } else {
+                                        ORI(xFlags, xFlags, 1 << F_ZF);
+                                    }
+                                }
+                                B_MARK3_nocond;
+                                MARK2;
+                                // Unaligned
+                                ADDI_W(x5, xZR, 0xFF8);
+                                AND(x5, wback, x5);
+                                MARKLOCK2;
+                                LD_D(x2, wback, 0);
+                                LL_D(x6, x5, 0);
+                                BNE_MARK(x2, x3); // EDX_EAX != Ed
+                                SC_D(x6, x5, 0);
+                                BEQZ_MARKLOCK2(x6);
+                                ST_D(x4, wback, 0);
+                                UFLAG_IF {
+                                    if (la64_lbt) {
+                                        ADDI_D(x5, xZR, -1);
+                                        X64_SET_EFLAGS(x5, X_ZF);
+                                    } else {
+                                        ORI(xFlags, xFlags, 1 << F_ZF);
+                                    }
                                 }
                                 B_MARK3_nocond;
                                 MARK;
diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h
index 89cc8249..21f3c2af 100644
--- a/src/dynarec/la64/dynarec_la64_helper.h
+++ b/src/dynarec/la64/dynarec_la64_helper.h
@@ -441,6 +441,8 @@
 #define GETMARKSEG  dyn->insts[ninst].markseg
 #define MARKLOCK    dyn->insts[ninst].marklock = dyn->native_size
 #define GETMARKLOCK dyn->insts[ninst].marklock
+#define MARKLOCK2    dyn->insts[ninst].marklock2 = dyn->native_size
+#define GETMARKLOCK2 dyn->insts[ninst].marklock2
 
 #define Bxx_gen(OP, M, reg1, reg2)   \
     j64 = GET##M - dyn->native_size; \
@@ -462,6 +464,8 @@
 #define BNE_MARK3(reg1, reg2) Bxx_gen(NE, MARK3, reg1, reg2)
 // Branch to MARKLOCK if reg1!=reg2 (use j64)
 #define BNE_MARKLOCK(reg1, reg2) Bxx_gen(NE, MARKLOCK, reg1, reg2)
+// Branch to MARKLOCK2 if reg1!=reg2 (use j64)
+#define BNE_MARKLOCK2(reg1, reg2) Bxx_gen(NE, MARKLOCK2, reg1, reg2)
 
 // Branch to MARK if reg1==reg2 (use j64)
 #define BEQ_MARK(reg1, reg2) Bxx_gen(EQ, MARK, reg1, reg2)
@@ -471,12 +475,16 @@
 #define BEQ_MARK3(reg1, reg2) Bxx_gen(EQ, MARK3, reg1, reg2)
 // Branch to MARKLOCK if reg1==reg2 (use j64)
 #define BEQ_MARKLOCK(reg1, reg2) Bxx_gen(EQ, MARKLOCK, reg1, reg2)
+// Branch to MARKLOCK2 if reg1==reg2 (use j64)
+#define BEQ_MARKLOCK2(reg1, reg2) Bxx_gen(EQ, MARKLOCK2, reg1, reg2)
 // Branch to MARK if reg1==0 (use j64)
 #define BEQZ_MARK(reg) BxxZ_gen(EQ, MARK, reg)
 // Branch to MARK2 if reg1==0 (use j64)
 #define BEQZ_MARK2(reg) BxxZ_gen(EQ, MARK2, reg)
 // Branch to MARKLOCK if reg1==0 (use j64)
 #define BEQZ_MARKLOCK(reg) BxxZ_gen(EQ, MARKLOCK, reg)
+// Branch to MARKLOCK2 if reg1==0 (use j64)
+#define BEQZ_MARKLOCK2(reg) BxxZ_gen(EQ, MARKLOCK2, reg)
 
 // Branch to MARK if reg1!=0 (use j64)
 #define BNEZ_MARK(reg) BxxZ_gen(NE, MARK, reg)
@@ -486,6 +494,8 @@
 #define BNEZ_MARK3(reg) BxxZ_gen(NE, MARK3, reg)
 // Branch to MARKLOCK if reg1!=0 (use j64)
 #define BNEZ_MARKLOCK(reg) BxxZ_gen(NE, MARKLOCK, reg)
+// Branch to MARKLOCK2 if reg1!=0 (use j64)
+#define BNEZ_MARKLOCK2(reg) BxxZ_gen(NE, MARKLOCK2, reg)
 
 // Branch to MARK if fcc!=0 (use j64)
 #define BCNEZ_MARK(fcc) BCxxZ_gen(NE, MARK, fcc)
@@ -495,6 +505,8 @@
 #define BCNEZ_MARK3(fcc) BCxxZ_gen(NE, MARK3, fcc)
 // Branch to MARKLOCK if fcc!=0 (use j64)
 #define BCNEZ_MARKLOCK(fcc) BxxZ_gen(NE, MARKLOCK, fcc)
+// Branch to MARKLOCK2 if fcc!=0 (use j64)
+#define BCNEZ_MARKLOCK2(fcc) BxxZ_gen(NE, MARKLOCK2, fcc)
 
 // Branch to MARK if fcc==0 (use j64)
 #define BCEQZ_MARK(fcc) BCxxZ_gen(EQ, MARK, fcc)
@@ -504,6 +516,8 @@
 #define BCEQZ_MARK3(fcc) BCxxZ_gen(EQ, MARK3, fcc)
 // Branch to MARKLOCK if fcc==0 (use j64)
 #define BCEQZ_MARKLOCK(fcc) BxxZ_gen(EQ, MARKLOCK, fcc)
+// Branch to MARKLOCK2 if fcc==0 (use j64)
+#define BCEQZ_MARKLOCK2(fcc) BxxZ_gen(EQ, MARKLOCK2, fcc)
 
 // Branch to MARK if reg1<reg2 (use j64)
 #define BLT_MARK(reg1, reg2) Bxx_gen(LT, MARK, reg1, reg2)
diff --git a/src/dynarec/la64/dynarec_la64_private.h b/src/dynarec/la64/dynarec_la64_private.h
index 27a93a66..3d65de48 100644
--- a/src/dynarec/la64/dynarec_la64_private.h
+++ b/src/dynarec/la64/dynarec_la64_private.h
@@ -78,6 +78,7 @@ typedef struct instruction_la64_s {
     uintptr_t           markf[2];
     uintptr_t           markseg;
     uintptr_t           marklock;
+    uintptr_t           marklock2;
     int                 pass2choice;// value for choices that are fixed on pass2 for pass3
     uintptr_t           natcall;
     uint16_t            retn;