about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2023-04-11 00:28:40 +0800
committerGitHub <noreply@github.com>2023-04-10 18:28:40 +0200
commit5eb54321e8010cbb1991c9f583e63e0e8f4699cd (patch)
tree43583c5ee88a2ec540f2411cfbac508f62c47b80 /src
parent9ce44af20405a6a00744500d9f1778894e945297 (diff)
downloadbox64-5eb54321e8010cbb1991c9f583e63e0e8f4699cd.tar.gz
box64-5eb54321e8010cbb1991c9f583e63e0e8f4699cd.zip
[RV64_DYNAREC] Added more opcodes for SV (#684)
* [RV64_DYNAREC] Added 0F AB BTS opcode

* [RV64_DYNAREC] Added F0 81,83 /4 LOCK AND opcode

* [RV64_DYNAREC] Added F0 81,83 /1 LOCK OR opcode

* [RV64_DYNAREC] Added 66 0F 58 ADDPD opcode

* [RV64_DYNAREC] Added F2 0F 70 PSHUFLW opcode

* [RV64_DYNAREC] Added 0F AC SHRD opcode

* [RV64_DYNAREC] Fixed a copy-paste typo
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f.c48
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f.c8
-rw-r--r--src/dynarec/rv64/dynarec_rv64_emit_shift.c67
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f0.c58
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f20f.c29
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h2
6 files changed, 209 insertions, 3 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c
index b2bf4df4..e0ec682f 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f.c
@@ -390,7 +390,53 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             ANDI(xFlags, xFlags, ~1);   //F_CF is 1
             OR(xFlags, xFlags, x4);
             break;
-
+        case 0xAB:
+            INST_NAME("BTS Ed, Gd");
+            SETFLAGS(X_CF, SF_SUBSET);
+            SET_DFNONE();
+            nextop = F8;
+            GETGD;
+            if (MODREG) {
+                ed = xRAX+(nextop&7)+(rex.b<<3);
+                wback = 0;
+            } else {
+                SMREAD();
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, x1, &fixedaddress, rex, NULL, 1, 0);
+                SRAI(x1, gd, 5+rex.w);
+                SLLI(x1, x1, 2+rex.w);
+                ADD(x3, wback, x1);
+                LDxw(x1, x3, fixedaddress);
+                ed = x1;
+                wback = x3;
+            }
+            if (rex.w) {
+                ANDI(x2, gd, 0x3f);
+            } else {
+                ANDI(x2, gd, 0x1f);
+            }
+            SRL(x4, ed, x2);
+            ANDI(x4, x4, 1); // F_CF is 1
+            ANDI(xFlags, xFlags, ~1);
+            OR(xFlags, xFlags, x4);
+            ADDI(x3, xZR, 1);
+            SLL(x3, x3, x2);
+            OR(ed, ed, x3);
+            if(wback) {
+                SDxw(ed, wback, fixedaddress);
+                SMWRITE();
+            }
+            break;
+        case 0xAC:
+            nextop = F8;
+            INST_NAME("SHRD Ed, Gd, Ib");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            GETED(1);
+            GETGD;
+            u8 = F8;
+            u8&=(rex.w?0x3f:0x1f);
+            emit_shrd32c(dyn, ninst, rex, ed, gd, u8, x3, x4);
+            WBACK;
+            break;
         case 0xAE:
             nextop = F8;
             if((nextop&0xF8)==0xE8) {
diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c
index a2212e9a..efaa4a14 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f.c
@@ -190,6 +190,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETGX(x2);
             SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4));
             break;
+        case 0x58:
+            INST_NAME("ADDPD Gx, Ex");
+            nextop = F8;
+            //TODO: fastnan handling
+            GETEX(x1, 0);
+            GETGX(x2);
+            SSE_LOOP_FQ(x3, x4, FADDD(v0, v0, v1));
+            break;
         case 0x5C:
             INST_NAME("SUBPD Gx, Ex");
             nextop = F8;
diff --git a/src/dynarec/rv64/dynarec_rv64_emit_shift.c b/src/dynarec/rv64/dynarec_rv64_emit_shift.c
index f0245994..6af7aad7 100644
--- a/src/dynarec/rv64/dynarec_rv64_emit_shift.c
+++ b/src/dynarec/rv64/dynarec_rv64_emit_shift.c
@@ -306,3 +306,70 @@ void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
         emit_pf(dyn, ninst, s1, s3, s4);
     }
 }
+
+// emit SHRD32 instruction, from s1, fill s2 , constant c, store result in s1 using s3 and s4 as scratch
+void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4)
+{
+    CLEAR_FLAGS();
+
+    IFX(X_PEND) {
+        if (c) {
+            MOV64x(s3, c);
+            SDxw(s3, xEmu, offsetof(x64emu_t, op2));
+        } else SDxw(xZR, xEmu, offsetof(x64emu_t, op2));
+        SDxw(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s4, rex.w?d_shr64:d_shr32);
+    } else IFX(X_ALL) {
+        SET_DFNONE();
+    }
+    if(!c) {
+        IFX(X_PEND) {
+            SDxw(s1, xEmu, offsetof(x64emu_t, res));
+        }
+        return;
+    }
+    IFX(X_CF) {
+        if (c > 1) {
+            SRAI(s3, s1, c-1);
+            ANDI(s3, s3, 1); // LSB
+            BEQZ(s3, 8);
+        } else {
+            // no need to shift
+            ANDI(s3, s1, 1);
+            BEQZ(s3, 8);
+        }
+        ORI(xFlags, xFlags, 1 << F_CF);
+    }
+
+    SRLIxw(s3, s1, c);
+    SLLIxw(s1, s2, (rex.w?64:32)-c);
+    OR(s1, s1, s3);
+
+    IFX(X_SF) {
+        BGE(s1, xZR, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    if (!rex.w) {
+        ZEROUP(s1);
+    }
+    IFX(X_PEND) {
+        SDxw(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX(X_OF) {
+        if(c==1) {
+            SRLI(s3, s1, rex.w?62:30);
+            SRLI(s4, s1, rex.w?63:31);
+            XOR(s3, s3, s4);
+            ANDI(s3, s3, 1);
+            BEQZ(s3, 8);
+            ORI(xFlags, xFlags, 1 << F_OF2);
+        }
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
diff --git a/src/dynarec/rv64/dynarec_rv64_f0.c b/src/dynarec/rv64/dynarec_rv64_f0.c
index ec3fb824..4696c70c 100644
--- a/src/dynarec/rv64/dynarec_rv64_f0.c
+++ b/src/dynarec/rv64/dynarec_rv64_f0.c
@@ -168,6 +168,64 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                             emit_add32c(dyn, ninst, rex, x1, i64, x3, x4, x5, x6);
                     }
                     break;
+                case 1: // OR
+                    if(opcode==0x81) {
+                        INST_NAME("LOCK OR Ed, Id");
+                    } else {
+                        INST_NAME("LOCK OR Ed, Ib");
+                    }
+                    SETFLAGS(X_ALL, SF_SET_PENDING);
+                    if(MODREG) {
+                        if(opcode==0x81) i64 = F32S; else i64 = F8S;
+                        ed = xRAX+(nextop&7)+(rex.b<<3);
+                        emit_or32c(dyn, ninst, rex, ed, i64, x3, x4);
+                    } else {
+                        addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1);
+                        if(opcode==0x81) i64 = F32S; else i64 = F8S;
+                        MARKLOCK;
+                        LRxw(x1, wback, 1, 1);
+                        if (i64>=-2048 && i64<2048) {
+                            ORI(x4, x1, i64);
+                        } else {
+                            MOV64xw(x4, i64);
+                            OR(x4, x1, x4);
+                        }
+                        if (!rex.w) ZEROUP(x4);
+                        SCxw(x3, x4, wback, 1, 1);
+                        BNEZ_MARKLOCK(x3);
+                        IFX(X_ALL|X_PEND)
+                            emit_or32c(dyn, ninst, rex, x1, i64, x3, x4);
+                    }
+                    break;
+                case 4: // AND
+                    if(opcode==0x81) {
+                        INST_NAME("LOCK AND Ed, Id");
+                    } else {
+                        INST_NAME("LOCK AND Ed, Ib");
+                    }
+                    SETFLAGS(X_ALL, SF_SET_PENDING);
+                    if(MODREG) {
+                        if(opcode==0x81) i64 = F32S; else i64 = F8S;
+                        ed = xRAX+(nextop&7)+(rex.b<<3);
+                        emit_and32c(dyn, ninst, rex, ed, i64, x3, x4);
+                    } else {
+                        addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1);
+                        if(opcode==0x81) i64 = F32S; else i64 = F8S;
+                        MARKLOCK;
+                        LRxw(x1, wback, 1, 1);
+                        if (i64>=-2048 && i64<2048) {
+                            ANDI(x4, x1, i64);
+                        } else {
+                            MOV64xw(x4, i64);
+                            AND(x4, x1, x4);
+                        }
+                        if (!rex.w) ZEROUP(x4);
+                        SCxw(x3, x4, wback, 1, 1);
+                        BNEZ_MARKLOCK(x3);
+                        IFX(X_ALL|X_PEND)
+                            emit_and32c(dyn, ninst, rex, x1, i64, x3, x4);
+                    }
+                    break;
                 case 5: // SUB
                     if(opcode==0x81) {
                         INST_NAME("LOCK SUB Ed, Id");
diff --git a/src/dynarec/rv64/dynarec_rv64_f20f.c b/src/dynarec/rv64/dynarec_rv64_f20f.c
index 8057345f..ec8912fa 100644
--- a/src/dynarec/rv64/dynarec_rv64_f20f.c
+++ b/src/dynarec/rv64/dynarec_rv64_f20f.c
@@ -29,7 +29,7 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
     uint8_t opcode = F8;
     uint8_t nextop;
     uint8_t gd, ed;
-    uint8_t wback;
+    uint8_t wback, gback;
     uint8_t u8;
     uint64_t u64, j64;
     int v0, v1;
@@ -190,6 +190,33 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 FNEGD(v0, v0);
             }
             break;
+        case 0x70: // TODO: Optimize this!
+            INST_NAME("PSHUFLW Gx, Ex, Ib");
+            nextop = F8;
+            GETGX(x1);
+            GETEX(x2, 1);
+            u8 = F8;
+            int32_t idx;
+
+            idx = (u8>>(0*2))&3;
+            LHU(x3, wback, fixedaddress+idx*2);
+            idx = (u8>>(1*2))&3;
+            LHU(x4, wback, fixedaddress+idx*2);
+            idx = (u8>>(2*2))&3;
+            LHU(x5, wback, fixedaddress+idx*2);
+            idx = (u8>>(3*2))&3;
+            LHU(x6, wback, fixedaddress+idx*2);
+
+            SW(x3, gback, 0*2);
+            SW(x4, gback, 1*2);
+            SW(x5, gback, 2*2);
+            SW(x6, gback, 3*2);
+
+            if (!(MODREG && (gd==ed))) {
+                LD(x3, wback, fixedaddress+8);
+                SD(x3, gback, 8);
+            }
+            break;
         case 0xC2:
             INST_NAME("CMPSD Gx, Ex, Ib");
             nextop = F8;
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index 004a6dac..40985d8c 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -956,7 +956,7 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
 void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
 //void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
 //void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
-//void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
+void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
 //void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
 
 void emit_pf(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);