about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2025-10-10 20:45:50 +0800
committerGitHub <noreply@github.com>2025-10-10 14:45:50 +0200
commit70ab16a6d6c54c83a6f9f678c5bbe1c104c6a672 (patch)
tree755c17c6f83c0ecd4915040e5477e4095c69e3e5
parent74d4db051b4c74aaab23b19fbb51e441448faf8e (diff)
downloadbox64-70ab16a6d6c54c83a6f9f678c5bbe1c104c6a672.tar.gz
box64-70ab16a6d6c54c83a6f9f678c5bbe1c104c6a672.zip
[DYNAREC][INTERP] Fixed an edge case of CMPXHG (#3049)
-rw-r--r--src/dynarec/arm64/dynarec_arm64_0f.c10
-rw-r--r--src/dynarec/arm64/dynarec_arm64_f0.c28
-rw-r--r--src/dynarec/la64/dynarec_la64_f0.c15
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f0.c15
-rw-r--r--src/emu/x64run0f.c1
5 files changed, 43 insertions, 26 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c
index 8813a577..0996a5a9 100644
--- a/src/dynarec/arm64/dynarec_arm64_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_0f.c
@@ -2106,19 +2106,23 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     CMPSxw_REG(xRAX, ed);

                 }

                 MOVxw_REG(x1, ed); // save value

-                Bcond(cNE, 4+4);

+                Bcond(cNE, 4 + (rex.w ? 4 : 8));

                 MOVxw_REG(ed, gd);

+                if (!rex.w) { B_NEXT_nocond; }

             } else {

                 addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, 0);

                 LDxw(x1, wback, fixedaddress);

-                UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5);}

+                UFLAG_IF {

+                    emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5);

+                }

                 SUBxw_REG(x4, xRAX, x1);

                 CBNZxw_MARK(x4);

                 // EAX == Ed

                 STxw(gd, wback, fixedaddress);

+                if (!rex.w) { B_NEXT_nocond; }

                 MARK;

             }

-            MOVxw_REG(xRAX, x1);    // upper part of RAX will be erase on 32bits, no mater what

+            MOVxw_REG(xRAX, x1);

             break;

 

         case 0xB3:

diff --git a/src/dynarec/arm64/dynarec_arm64_f0.c b/src/dynarec/arm64/dynarec_arm64_f0.c
index 2e5f82ff..491bbe3c 100644
--- a/src/dynarec/arm64/dynarec_arm64_f0.c
+++ b/src/dynarec/arm64/dynarec_arm64_f0.c
@@ -334,11 +334,13 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                                     CMPSxw_REG(xRAX, ed);
                                 }
                                 MOVxw_REG(x1, ed); // save value
-                                Bcond(cNE, 4+4);
+                                Bcond(cNE, 4 + (rex.w ? 4 : 8));
                                 MOVxw_REG(ed, gd);
+                                if (!rex.w) { B_NEXT_nocond; }
                                 MOVxw_REG(xRAX, x1);
                             } else {
                                 addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
+                                UFLAG_IF { MOVxw_REG(x6, xRAX); }
                                 if(!ALIGNED_ATOMICxw) {
                                     if(cpuext.uscat) {
                                         ANDx_mask(x1, wback, 1, 0, 3);  // mask = F
@@ -353,16 +355,17 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                                 // disabling use of atomics for now, as it seems to make (at least)
                                 //  HorizonZeroDawn and Cyberpunk2077 (both from GoG) unstable
                                 //  but why?!
-                                if(cpuext.atomics && 0) {
+                                if (rex.w /* RAX should NOT be zero-upped if equal */ && cpuext.atomics && 0) {
                                     UFLAG_IF {
                                         MOVxw_REG(x1, xRAX);
                                         CASALxw(x1, gd, wback);
-                                        if(!ALIGNED_ATOMICxw) {
+                                        MOVxw_REG(xRAX, x1);
+                                        if (!ALIGNED_ATOMICxw) {
                                             B_MARK_nocond;
                                         }
                                     } else {
                                         CASALxw(xRAX, gd, wback);
-                                        if(!ALIGNED_ATOMICxw) {
+                                        if (!ALIGNED_ATOMICxw) {
                                             B_NEXT_nocond;
                                         }
                                     }
@@ -370,12 +373,14 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                                     MARKLOCK;
                                     LDAXRxw(x1, wback);
                                     CMPSxw_REG(xRAX, x1);
-                                    B_MARK(cNE);
+                                    Bcond(cNE, 4 + (rex.w ? 8 : 12));
                                     // EAX == Ed
                                     STLXRxw(x4, gd, wback);
                                     CBNZx_MARKLOCK(x4);
                                     // done
-                                    if(!ALIGNED_ATOMICxw) {
+                                    if (!rex.w) { B_MARK_nocond; }
+                                    MOVxw_REG(xRAX, x1);
+                                    if(!ALIGNED_ATOMICxw && rex.w) {
                                         B_MARK_nocond;
                                     }
                                 }
@@ -387,18 +392,17 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                                     SUBxw_UXTB(x3, x3, x1);
                                     CBNZw_MARK3(x3);
                                     CMPSxw_REG(xRAX, x1);
-                                    B_MARK(cNE);
+                                    Bcond(cNE, 4 + (rex.w ? 12 : 16));
                                     // EAX == Ed
                                     STLXRB(x4, gd, wback);
                                     CBNZx_MARK3(x4);
                                     STRxw_U12(gd, wback, 0);
+                                    if (!rex.w) { B_MARK_nocond; }
+                                    MOVxw_REG(xRAX, x1);
                                 }
                                 MARK;
-                                // Common part (and fallback for EAX != Ed)
-                                UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5); MOVxw_REG(xRAX, x1);}
-                                else {
-                                    if(!ALIGNED_ATOMICxw || !(cpuext.atomics && 0))
-                                        MOVxw_REG(xRAX, x1);    // upper par of RAX will be erase on 32bits, no mater what
+                                UFLAG_IF {
+                                    emit_cmp32(dyn, ninst, rex, x6, x1, x3, x4, x5);
                                 }
                             }
                             break;
diff --git a/src/dynarec/la64/dynarec_la64_f0.c b/src/dynarec/la64/dynarec_la64_f0.c
index 30d8d844..a45508be 100644
--- a/src/dynarec/la64/dynarec_la64_f0.c
+++ b/src/dynarec/la64/dynarec_la64_f0.c
@@ -175,10 +175,12 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                                 SUBxw(x2, x1, xRAX);
                                 BNE_MARK2(x2, xZR);
                                 MVxw(ed, gd);
+                                if (!rex.w) { B_NEXT_nocond; }
                                 MARK2;
                                 MVxw(xRAX, x1);
                             } else {
                                 addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0);
+                                UFLAG_IF { MVxw(x6, xRAX); }
                                 ANDI(x1, wback, (1 << (rex.w + 2)) - 1);
                                 BNEZ_MARK3(x1);
                                 // Aligned
@@ -186,11 +188,13 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                                 MV(x4, gd);
                                 LLxw(x1, wback, 0);
                                 SUBxw(x3, x1, xRAX);
-                                BNEZ_MARK(x3);
+                                BNEZ(x3, 4 + (rex.w ? 8 : 12));
                                 // EAX == Ed
                                 SCxw(x4, wback, 0);
                                 BEQZ_MARKLOCK(x4);
-                                B_MARK_nocond;
+                                if (!rex.w) { B_MARK_nocond; }
+                                MVxw(xRAX, x1);
+                                if (rex.w) { B_MARK_nocond; }
                                 MARK3;
                                 // Unaligned
                                 ADDI_D(x5, xZR, -(1 << (rex.w + 2)));
@@ -199,14 +203,15 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                                 LDxw(x1, wback, 0);
                                 LLxw(x6, x5, 0);
                                 SUBxw(x3, x1, xRAX);
-                                BNEZ_MARK(x3);
+                                BNEZ(x3, 4 + (rex.w ? 12 : 16));
                                 // EAX == Ed
                                 SCxw(x6, x5, 0);
                                 BEQZ_MARKLOCK2(x6);
                                 SDxw(gd, wback, 0);
-                                MARK;
-                                UFLAG_IF { emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5, x6); }
+                                if (!rex.w) { B_MARK_nocond; }
                                 MVxw(xRAX, x1);
+                                MARK;
+                                UFLAG_IF { emit_cmp32(dyn, ninst, rex, x6, x1, x3, x4, x5, x6); }
                             }
                             break;
                         default:
diff --git a/src/dynarec/rv64/dynarec_rv64_f0.c b/src/dynarec/rv64/dynarec_rv64_f0.c
index c7f3f634..5f1db176 100644
--- a/src/dynarec/rv64/dynarec_rv64_f0.c
+++ b/src/dynarec/rv64/dynarec_rv64_f0.c
@@ -249,21 +249,25 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                                 SUBxw(x2, ed, xRAX);
                                 BNE_MARK2(x2, xZR);
                                 MVxw(ed, gd);
+                                if (!rex.w) { B_NEXT_nocond; }
                                 MARK2;
                                 MVxw(xRAX, x1);
                             } else {
                                 addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0);
+                                UFLAG_IF { MVxw(x6, xRAX); }
                                 ANDI(x1, wback, (1 << (rex.w + 2)) - 1);
                                 BNEZ_MARK3(x1);
                                 // Aligned
                                 MARKLOCK;
                                 LRxw(x1, wback, 1, 1);
                                 SUBxw(x3, x1, xRAX);
-                                BNEZ_MARK(x3);
+                                BNEZ(x3, 4 + (rex.w ? 8 : 12));
                                 // EAX == Ed
                                 SCxw(x4, gd, wback, 1, 1);
                                 BNEZ_MARKLOCK(x4);
-                                B_MARK_nocond;
+                                if (!rex.w) { B_MARK_nocond; }
+                                MVxw(xRAX, x1);
+                                if (rex.w) { B_MARK_nocond; }
                                 MARK3;
                                 // Unaligned
                                 ANDI(x5, wback, -(1 << (rex.w + 2)));
@@ -271,14 +275,15 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                                 LDxw(x1, wback, 0);
                                 LRxw(x6, x5, 1, 1);
                                 SUBxw(x3, x1, xRAX);
-                                BNEZ_MARK(x3);
+                                BNEZ(x3, 4 + (rex.w ? 12 : 16));
                                 // EAX == Ed
                                 SCxw(x4, x6, x5, 1, 1);
                                 BNEZ_MARKLOCK2(x4);
                                 SDxw(gd, wback, 0);
-                                MARK;
-                                UFLAG_IF { emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5, x6); }
+                                if (!rex.w) { B_MARK_nocond; }
                                 MVxw(xRAX, x1);
+                                MARK;
+                                UFLAG_IF { emit_cmp32(dyn, ninst, rex, x6, x1, x3, x4, x5, x6); }
                             }
                             break;
                         default:
diff --git a/src/emu/x64run0f.c b/src/emu/x64run0f.c
index 9895f3db..d4505030 100644
--- a/src/emu/x64run0f.c
+++ b/src/emu/x64run0f.c
@@ -1418,7 +1418,6 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
                         ED->q[0] = GD->dword[0];

                     else

                         ED->dword[0] = GD->dword[0];

-                    R_RAX = R_EAX;   // to erase upper part of RAX

                 } else {

                     R_RAX = ED->dword[0];

                 }