about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-03-26 12:28:53 +0000
committerptitSeb <sebastien.chev@gmail.com>2023-03-26 12:28:53 +0000
commit85f6d8308a0dccda7880fd973f0d3c0bc61d4a31 (patch)
tree110d6587a5e1c5beeae909222b62852e9417b917
parent06f2750eefeedfeb465a56ea69a9978774017ad7 (diff)
downloadbox64-85f6d8308a0dccda7880fd973f0d3c0bc61d4a31.tar.gz
box64-85f6d8308a0dccda7880fd973f0d3c0bc61d4a31.zip
[RV64_DYNAREC] Added a bunch of opcodes, plus some improvments/fixes to SSE macros
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00.c59
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f.c43
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f.c18
-rw-r--r--src/dynarec/rv64/dynarec_rv64_emit_math.c55
-rw-r--r--src/dynarec/rv64/dynarec_rv64_emit_shift.c54
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f30f.c17
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h21
7 files changed, 248 insertions, 19 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00.c b/src/dynarec/rv64/dynarec_rv64_00.c
index 4c7d0247..3d393fdc 100644
--- a/src/dynarec/rv64/dynarec_rv64_00.c
+++ b/src/dynarec/rv64/dynarec_rv64_00.c
@@ -751,7 +751,47 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 SMWRITELOCK(lock);
             }
             break;
-
+        case 0x8A:
+            INST_NAME("MOV Gb, Eb");
+            nextop = F8;
+            gd = ((nextop&0x38)>>3)+(rex.r<<3);
+            if(rex.rex) {
+                gb2 = 0;
+                gb1 = xRAX + gd;
+            } else {
+                gb2 = ((gd&4)>>2);
+                gb1 = xRAX+(gd&3);
+            }
+            gd = x4;
+            if(MODREG) {
+                ed = (nextop&7) + (rex.b<<3);
+                if(rex.rex) {
+                    eb1 = xRAX+ed;
+                    eb2 = 0;
+                } else {
+                    eb1 = xRAX+(ed&3);  // Ax, Cx, Dx or Bx
+                    eb2 = ((ed&4)>>2);    // L or H
+                }
+                if(eb2) {
+                    SRLI(x1, eb1, 8);
+                    ANDI(x1, x1, 0xff);
+                } else {
+                    ANDI(x1, eb1, 0xff);
+                }
+            } else {
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, 1, 0);
+                SMREADLOCK(lock);
+                LB(x1, ed, fixedaddress);
+            }
+            if(gb2) {
+                MOV64x(x4, ~0xff00);
+                AND(gb1, gb1, x4);
+                SLLI(x1, x1, 8);
+            } else {
+                ANDI(gb1, gb1, ~0xff);
+            }
+            OR(gb1, gb1, x1);
+            break;
         case 0x8B:
             INST_NAME("MOV Gd, Ed");
             nextop=F8;
@@ -1312,6 +1352,16 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     emit_shl32(dyn, ninst, rex, ed, x3, x5, x4, x6);
                     WBACK;
                     break;
+                case 5:
+                    INST_NAME("SHR Ed, CL");
+                    SETFLAGS(X_ALL, SF_SET_PENDING);    // some flags are left undefined
+                    ANDI(x3, xRCX, rex.w?0x3f:0x1f);
+                    GETED(0);
+                    if(!rex.w && MODREG) {ZEROUP(ed);}
+                    CBZ_NEXT(x3);
+                    emit_shr32(dyn, ninst, rex, ed, x3, x5, x4);
+                    WBACK;
+                    break;
                 case 7:
                     INST_NAME("SAR Ed, CL");
                     SETFLAGS(X_ALL, SF_PENDING);
@@ -1511,6 +1561,13 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     NOT(x1, x1);
                     EBBACK(x5, 1);
                     break;
+                case 3:
+                    INST_NAME("NEG Eb");
+                    SETFLAGS(X_ALL, SF_SET_PENDING);
+                    GETEB(x1, 0);
+                    emit_neg8(dyn, ninst, x1, x2, x4);
+                    EBBACK(x5, 0);
+                    break;
                 case 4:
                     INST_NAME("MUL AL, Ed");
                     SETFLAGS(X_ALL, SF_PENDING);
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c
index a2449465..543ffae8 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f.c
@@ -31,7 +31,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
     uint8_t opcode = F8;
     uint8_t nextop, u8;
     uint8_t gd, ed;
-    uint8_t wback, wb2;
+    uint8_t wback, wb2, gback;
     uint8_t eb1, eb2;
     int32_t i32, i32_;
     int cacheupd = 0;
@@ -44,6 +44,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
     int64_t fixedaddress;
     int unscaled;
     MAYUSE(wb2);
+    MAYUSE(gback);
     MAYUSE(eb1);
     MAYUSE(eb2);
     MAYUSE(q0);
@@ -111,6 +112,30 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             break;
 
 
+        case 0x10:
+            INST_NAME("MOVUPS Gx,Ex");
+            nextop = F8;
+            GETGX(x1);
+            GETEX(x2, 0);
+            LD(x3, wback, fixedaddress+0);
+            LD(x4, wback, fixedaddress+8);
+            SD(x3, gback, 0);
+            SD(x4, gback, 8);
+            break;
+        case 0x11:
+            INST_NAME("MOVUPS Ex,Gx");
+            nextop = F8;
+            GETGX(x1);
+            GETEX(x2, 0);
+            LD(x3, gback, 0);
+            LD(x4, gback, 8);
+            SD(x3, wback, fixedaddress+0);
+            SD(x4, wback, fixedaddress+8);
+            if(!MODREG)
+                SMWRITE2();
+            break;
+
+
         case 0x18:
             nextop = F8;
             if((nextop&0xC0)==0xC0) {
@@ -169,6 +194,22 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
         GOCOND(0x40, "CMOV", "Gd, Ed");
         #undef GO
 
+        case 0x57:
+            INST_NAME("XORPS");
+            nextop = F8;
+            //TODO: it might be possible to check if SS or SD are used and not purge them to optimize a bit
+            GETGX(x1);
+            if(MODREG && gd==(nextop&7)+(rex.b<<3))
+            {
+                // just zero dest
+                SD(xZR, x1, 0);
+                SD(xZR, x1, 8);
+            } else {
+                GETEX(x2, 0);
+                SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4));
+            }
+            break;
+
         case 0x77:
             INST_NAME("EMMS");
             // empty MMX, FPU now usable
diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c
index 03e44fc8..5299f162 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f.c
@@ -30,7 +30,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
     uint8_t nextop, u8;
     int32_t i32;
     uint8_t gd, ed;
-    uint8_t wback, wb1, wb2;
+    uint8_t wback, wb1, wb2, gback;
     uint8_t eb1, eb2;
     int64_t j64;
     uint64_t tmp64u, tmp64u2;
@@ -126,22 +126,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PXOR Gx, Ex");
             nextop = F8;
             GETGX(x1);
-            GETEX(x2, 0);
-            if(gd==ed) {
+            if(MODREG && gd==(nextop&7)+(rex.b<<3))
+            {
                 // just zero dest
                 SD(xZR, x1, 0);
                 SD(xZR, x1, 8);
             } else {
-                //1st
-                LD(x3, x1, 0);
-                LD(x4, x2, 0);
-                XOR(x3, x3, x4);
-                SD(x3, x1, 0);
-                // 2nd
-                LD(x3, x1, 8);
-                LD(x4, x2, 8);
-                XOR(x3, x3, x4);
-                SD(x3, x1, 8);
+                GETEX(x2, 0);
+                SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4));
             }
             break;
 
diff --git a/src/dynarec/rv64/dynarec_rv64_emit_math.c b/src/dynarec/rv64/dynarec_rv64_emit_math.c
index bd2dbb96..e3125f31 100644
--- a/src/dynarec/rv64/dynarec_rv64_emit_math.c
+++ b/src/dynarec/rv64/dynarec_rv64_emit_math.c
@@ -759,6 +759,61 @@ void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     }
 }
 
+// emit NEG8 instruction, from s1, store result in s1 using s2 and s3 as scratch
+void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
+{
+    CLEAR_FLAGS();
+    IFX(X_PEND) {
+        SB(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s3, d_neg8);
+    } else IFX(X_ALL) {
+        SET_DFNONE();
+    }
+    IFX(X_AF | X_OF) {
+        MV(s3, s1);      // s3 = op1
+    }
+
+    NEG(s1, s1);
+    ANDI(s1, s1, 0xff);
+    IFX(X_PEND) {
+        SB(s1, xEmu, offsetof(x64emu_t, res));
+    }
+
+    IFX(X_CF) {
+        BEQZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_CF);
+    }
+    
+    IFX(X_AF | X_OF) {
+        OR(s3, s1, s3); // s3 = res | op1
+        IFX(X_AF) {
+            /* af = bc & 0x8 */
+            ANDI(s2, s3, 8);
+            BEQZ(s2, 8);
+            ORI(xFlags, xFlags, 1 << F_AF);
+        }
+        IFX(X_OF) {
+            /* of = ((bc >> (width-2)) ^ (bc >> (width-1))) & 0x1; */
+            SRLI(s2, s3, 6);
+            SRLI(s3, s2, 1);
+            XOR(s2, s2, s3);
+            ANDI(s2, s2, 1);
+            BEQZ(s2, 8);
+            ORI(xFlags, xFlags, 1 << F_OF2);
+        }    
+    }
+    IFX(X_SF) {
+        ANDI(s3, s1, 1 << F_SF);    // 1<<F_SF is sign bit, so just mask
+        OR(xFlags, xFlags, s3);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s2);
+    }
+    IFX(X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+}
 
 // emit ADC32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
 void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5)
diff --git a/src/dynarec/rv64/dynarec_rv64_emit_shift.c b/src/dynarec/rv64/dynarec_rv64_emit_shift.c
index a9e3a2b9..1ecb57c6 100644
--- a/src/dynarec/rv64/dynarec_rv64_emit_shift.c
+++ b/src/dynarec/rv64/dynarec_rv64_emit_shift.c
@@ -130,6 +130,60 @@ void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
     }
 }
 
+// emit SHR32 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3 and s4 as scratch
+void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4)
+{
+    int64_t j64;
+
+    CLEAR_FLAGS();
+
+    IFX(X_PEND) {
+        SDxw(s2, xEmu, offsetof(x64emu_t, op2));
+        SDxw(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s4, rex.w?d_shr64:d_shr32);
+    } else IFX(X_ALL) {
+        SET_DFNONE();
+    }
+
+    IFX(X_CF) {
+        SUBI(s3, s2, 1);
+        SRA(s3, s1, s3);
+        ANDI(s3, s3, 1); // LSB
+        BEQZ(s3, 8);
+        ORI(xFlags, xFlags, 1 << F_CF);
+    }
+
+    SRL(s1, s1, s2);
+
+    IFX(X_SF) {
+        BGE(s1, xZR, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    if (!rex.w) {
+        ZEROUP(s1);
+    }
+    IFX(X_PEND) {
+        SDxw(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX(X_OF) {
+        ADDI(s3, xZR, 1);
+        BEQ(s2, s3, 4+6*4);
+            SRLI(s3, s1, rex.w?62:30);
+            SRLI(s4, s1, rex.w?63:31);
+            XOR(s3, s3, s4);
+            ANDI(s3, s3, 1);
+            BEQZ(s3, 8);
+            ORI(xFlags, xFlags, 1 << F_OF2);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
 // emit SHR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
 void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4)
 {
diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c
index c50196e0..99ac53ae 100644
--- a/src/dynarec/rv64/dynarec_rv64_f30f.c
+++ b/src/dynarec/rv64/dynarec_rv64_f30f.c
@@ -120,6 +120,23 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             FCVTDS(v0, v1);
             break;
 
+        case 0x7E:
+            INST_NAME("MOVQ Gx, Ex");
+            nextop = F8;
+            // Will load Gx as SD. Is that a good choice?
+            if(MODREG) {
+                v1 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3), 0);
+                GETGXSD_empty(v0);
+                FMVD(v0, v1);
+            } else {
+                GETGXSD_empty(v0);
+                SMREAD();
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
+                FLD(v0, ed, fixedaddress);
+            }
+            SD(xZR, xEmu, offsetof(x64emu_t, xmm[gd])+8);
+            break;
+
         default:
             DEFAULT;
     }
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index 223fc5a3..cb46cf2b 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -292,25 +292,38 @@
         FLW(a, ed, fixedaddress);                                                                       \
     }
 
-// Will get pointer to GX in general register a, will purge SS or SD if loaded
+// Will get pointer to GX in general register a, will purge SS or SD if loaded. can use gback as load address
 #define GETGX(a)                        \
     gd = ((nextop&0x38)>>3)+(rex.r<<3); \
     sse_forget_reg(dyn, ninst, gd);     \
+    gback = a;                          \
     ADDI(a, xEmu, offsetof(x64emu_t, xmm[gd]))
 
-// Get Ex address in regenal register a, will purge SS or SD or it's reg and is loaded. May use x3
+// Get Ex address in regenal register a, will purge SS or SD or it's reg and is loaded. May use x3. Use wback as load adress!
 #define GETEX(a, D)                                                                                     \
     if(MODREG) {                                                                                        \
         ed = (nextop&7)+(rex.b<<3);                                                                     \
         sse_forget_reg(dyn, ninst, ed);                                                                 \
         fixedaddress = 0;                                                                               \
         ADDI(a, xEmu, offsetof(x64emu_t, xmm[ed]));                                                     \
+        wback = a;                                                                                      \
     } else {                                                                                            \
         SMREAD();                                                                                       \
         ed=16;                                                                                          \
         addr = geted(dyn, addr, ninst, nextop, &wback, a, x3, &fixedaddress, rex, NULL, 1, D);          \
     }
 
+// Loop for SSE opcode that use 64bits value and write to GX.
+#define SSE_LOOP_Q(GX1, EX1, F)         \
+    LD(GX1, gback, 0);                  \
+    LD(EX1, wback, fixedaddress+0);     \
+    F;                                  \
+    SD(GX1, gback, 0);                  \
+    LD(GX1, gback, 8);                  \
+    LD(EX1, wback, fixedaddress+8);     \
+    F;                                  \
+    SD(GX1, gback, 8)
+
 // CALL will use x6 for the call address. Return value can be put in ret (unless ret is -1)
 // R0 will not be pushed/popd if ret is -2
 #define CALL(F, ret) call_c(dyn, ninst, F, x6, ret, 1, 0)
@@ -830,10 +843,10 @@ void emit_sbb8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s
 //void emit_sbb16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
 void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3);
 //void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);
-//void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);
+void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);
 void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
 void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4, int s5);
-//void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
+void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
 void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
 void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
 //void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);