about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorxctan <xctan@cirno.icu>2023-05-28 23:17:09 +0800
committerGitHub <noreply@github.com>2023-05-28 17:17:09 +0200
commit26b01d56f59bc17f48044ce79fbea41a7e2439cb (patch)
tree2f4a5c2b808a095b042cec786fee9aa5c922c845 /src
parenteb687748c5f6cb39b5f3d70721f3e96a3c04549d (diff)
downloadbox64-26b01d56f59bc17f48044ce79fbea41a7e2439cb.tar.gz
box64-26b01d56f59bc17f48044ce79fbea41a7e2439cb.zip
[RV64_DYNAREC] Added F3 0F B8 POPCNT opcode and fixed LOCK CMPXCHG16B for ML2 (#808)
* [RV64_DYNAREC] Added lock to F0 48 0F C7 LOCK CMPXCHG16B opcode

* [RV64_DYNAREC] Added F3 0F B8 POPCNT opcode

* [RV64_DYNAREC] Moved 16B lock to box64context
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f0.c14
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f30f.c42
-rw-r--r--src/dynarec/rv64/rv64_emitter.h4
-rwxr-xr-xsrc/include/box64context.h3
4 files changed, 61 insertions, 2 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_f0.c b/src/dynarec/rv64/dynarec_rv64_f0.c
index cafca95d..6bb11fbd 100644
--- a/src/dynarec/rv64/dynarec_rv64_f0.c
+++ b/src/dynarec/rv64/dynarec_rv64_f0.c
@@ -283,9 +283,16 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                             addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, LOCK_LOCK, 0, 0);
                             ANDI(xFlags, xFlags, ~(1<<F_ZF));
                             if (rex.w) {
-                                // there is no atomic move on 16bytes, so faking it
+                                // there is no atomic move on 16bytes, so implement it with mutex
+                                LD(x9, xEmu, offsetof(x64emu_t, context));
+                                ADDI(x9, x9, offsetof(box64context_t, mutex_16b));
+                                ADDI(x4, xZR, 1);
+                                MARKLOCK;
+                                AMOSWAP_W(x4, x4, x9, 1, 1);
+                                // x4 == 1 if locked
+                                BNEZ_MARKLOCK(x4);
+
                                 SMDMB();
-                                // MARKLOCK;
                                 LD(x2, wback, 0);
                                 LD(x3, wback, 8);
                                 BNE_MARK(x2, xRAX);
@@ -299,6 +306,9 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                                 MV(xRDX, x3);
                                 MARK3;
                                 SMDMB();
+
+                                // unlock
+                                AMOSWAP_W(xZR, xZR, x9, 1, 1);
                             } else {
                                 SMDMB();
                                 MARKLOCK;
diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c
index bdb8fa3b..cbf4c555 100644
--- a/src/dynarec/rv64/dynarec_rv64_f30f.c
+++ b/src/dynarec/rv64/dynarec_rv64_f30f.c
@@ -298,6 +298,48 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 SW(x3, gback, i*4);
             }
             break;
+        case 0xB8:
+            INST_NAME("POPCNT Gd, Ed");
+            SETFLAGS(X_ALL, SF_SET);
+            SET_DFNONE();
+            nextop = F8;
+            GETED(0);
+            GETGD;
+            if(!rex.w && MODREG) {
+                AND(x4, ed, xMASK);
+                ed = x4;
+            }
+            CLEAR_FLAGS();
+            BNE_MARK(ed, xZR);
+            ORI(xFlags, xFlags, 1<<F_ZF);
+            MOV32w(gd, 0);
+            B_NEXT_nocond;
+            MARK;
+            if(rv64_zbb) {
+                CPOPxw(gd, ed);
+            } else {
+                TABLE64(x1, 0x5555555555555555uLL);
+                SRLI(x5, ed, 1);
+                AND(x5, x5, x1);
+                SUB(x5, ed, x5);
+                TABLE64(x3, 0x3333333333333333uLL);
+                SRLI(x1, x5, 2);
+                AND(x1, x1, x3);
+                AND(x5, x5, x3);
+                ADD(x5, x5, x1);
+                TABLE64(x3, 0x0F0F0F0F0F0F0F0FuLL);
+                SRLI(x1, x5, 4);
+                ADD(x5, x5, x1);
+                AND(x5, x5, x3);
+                SRLI(x1, x5, 32);
+                ADDW(x5, x5, x1);
+                SRLIW(x1, x5, 16);
+                ADDW(x5, x5, x1);
+                SRLIW(x1, x5, 8);
+                ADDW(x5, x5, x1);
+                ANDI(gd, x5, 0x7F);
+            }
+            break;
         case 0xBC:
             INST_NAME("TZCNT Gd, Ed");
             SETFLAGS(X_ZF, SF_SUBSET);
diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h
index bc737cc0..35c8bb48 100644
--- a/src/dynarec/rv64/rv64_emitter.h
+++ b/src/dynarec/rv64/rv64_emitter.h
@@ -360,6 +360,8 @@ f28–31  ft8–11  FP temporaries                  Caller
 #define LR_W(rd, rs1, aq, rl)       EMIT(R_type(AQ_RL(0b00010, aq, rl), 0, rs1, 0b010, rd, 0b0101111))
 #define SC_W(rd, rs2, rs1, aq, rl)  EMIT(R_type(AQ_RL(0b00011, aq, rl), rs2, rs1, 0b010, rd, 0b0101111))
 
+#define AMOSWAP_W(rd, rs2, rs1, aq, rl)  EMIT(R_type(AQ_RL(0b00001, aq, rl), rs2, rs1, 0b010, rd, 0b0101111))
+
 // RV64A
 #define LR_D(rd, rs1, aq, rl)       EMIT(R_type(AQ_RL(0b00010, aq, rl), 0, rs1, 0b011, rd, 0b0101111))
 #define SC_D(rd, rs2, rs1, aq, rl)  EMIT(R_type(AQ_RL(0b00011, aq, rl), rs2, rs1, 0b011, rd, 0b0101111))
@@ -367,6 +369,8 @@ f28–31  ft8–11  FP temporaries                  Caller
 #define LRxw(rd, rs1, aq, rl)       EMIT(R_type(AQ_RL(0b00010, aq, rl), 0, rs1, 0b010|rex.w, rd, 0b0101111))
 #define SCxw(rd, rs2, rs1, aq, rl)  EMIT(R_type(AQ_RL(0b00011, aq, rl), rs2, rs1, 0b010|rex.w, rd, 0b0101111))
 
+#define AMOSWAP_D(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00001, aq, rl), rs2, rs1, 0b011, rd, 0b0101111))
+
 // RV32F
 // Read round mode
 #define FRRM(rd)                    CSRRS(rd, xZR, 0x002)
diff --git a/src/include/box64context.h b/src/include/box64context.h
index fb09c004..9b38fa83 100755
--- a/src/include/box64context.h
+++ b/src/include/box64context.h
@@ -151,6 +151,9 @@ typedef struct box64context_s {
     uintptr_t           max_db_size;    // the biggest (in x86_64 instructions bytes) built dynablock
     int                 trace_dynarec;
     pthread_mutex_t     mutex_lock;     // this is for the Test interpreter
+    #ifdef __riscv
+    uint32_t            mutex_16b;
+    #endif
     #endif
 
     library_t           *libclib;       // shortcut to libc library (if loaded, so probably yes)