about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorxctan <xctan@cirno.icu>2023-04-28 23:20:09 +0800
committerGitHub <noreply@github.com>2023-04-28 17:20:09 +0200
commit69529708952abd1786dff2dfa6910a159ce8802c (patch)
treeb25921b54c237ab269a6f0d245091606103fcf0c /src
parent14e20e643515d4e7171f54e6d2fb78cf784f7d10 (diff)
downloadbox64-69529708952abd1786dff2dfa6910a159ce8802c.tar.gz
box64-69529708952abd1786dff2dfa6910a159ce8802c.zip
[RV64_DYNAREC] Added more opcodes (#747)
* [RV64_DYNAREC] Added 0F 52 RSQRTPS opcode

* [RV64_DYNAREC] Added C0 /1 ROR opcode

* [RV64_DYNAREC] Added 66 0F 3A 20 PINSRB opcode

* [RV64_DYNAREC] Added AA STOSB opcode

* [RV64_DYNAREC] Added 66 F0 {81,83} LOCK ADD opcode
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_2.c25
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_3.c10
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f.c33
-rw-r--r--src/dynarec/rv64/dynarec_rv64_66.c4
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f.c8
-rw-r--r--src/dynarec/rv64/dynarec_rv64_66f0.c133
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h2
7 files changed, 214 insertions, 1 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00_2.c b/src/dynarec/rv64/dynarec_rv64_00_2.c
index 93dda7ef..f2fd1287 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_2.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_2.c
@@ -631,6 +631,31 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             MOV64xw(x2, i64);
             emit_test32(dyn, ninst, rex, xRAX, x2, x3, x4, x5);
             break;
+        case 0xAA:
+            if(rep) {
+                INST_NAME("REP STOSB");
+                CBZ_NEXT(xRCX);
+                ANDI(x1, xFlags, 1<<F_DF);
+                BNEZ_MARK2(x1);
+                MARK;   // Part with DF==0
+                SB(xRAX, xRDI, 0);
+                ADDI(xRDI, xRDI, 1);
+                ADDI(xRCX, xRCX, -1);
+                BNEZ_MARK(xRCX);
+                B_NEXT_nocond;
+                MARK2;  // Part with DF==1
+                SB(xRAX, xRDI, 0);
+                ADDI(xRDI, xRDI, -1);
+                ADDI(xRCX, xRCX, -1);
+                BNEZ_MARK2(xRCX);
+                // done
+            } else {
+                INST_NAME("STOSB");
+                GETDIR(x3, x1, 1);
+                SB(xRAX, xRDI, 0);
+                ADD(xRDI, xRDI, x3);
+            }
+            break;
         case 0xAB:
             if(rep) {
                 INST_NAME("REP STOSD");
diff --git a/src/dynarec/rv64/dynarec_rv64_00_3.c b/src/dynarec/rv64/dynarec_rv64_00_3.c
index e9a3d0e0..19d6815e 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_3.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_3.c
@@ -66,6 +66,16 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     CALL_(rol8, ed, x3);
                     EBBACK(x5, 0);
                     break;
+                case 1:
+                    INST_NAME("ROR Eb, Ib");
+                    MESSAGE(LOG_DUMP, "Need Optimization\n");
+                    SETFLAGS(X_OF|X_CF, SF_SET);
+                    GETEB(x1, 1);
+                    u8 = F8;
+                    MOV32w(x2, u8);
+                    CALL_(ror8, ed, x3);
+                    EBBACK(x5, 0);
+                    break;
                 case 4:
                 case 6:
                     INST_NAME("SHL Eb, Ib");
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c
index be6ec046..3e320614 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f.c
@@ -354,6 +354,39 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 FSW(d0, gback, 4*i);
             }
             break;
+        case 0x52:
+            INST_NAME("RSQRTPS Gx, Ex");
+            nextop = F8;
+            GETGX(x1);
+            GETEX(x2, 0);
+            s0 = fpu_get_scratch(dyn);
+            s1 = fpu_get_scratch(dyn); // 1.0f
+            v0 = fpu_get_scratch(dyn); // 0.0f
+            // do accurate computation, because riscv doesn't have rsqrt
+            MOV32w(x3, 1);
+            FCVTSW(s1, x3, RD_DYN);
+            if (!box64_dynarec_fastnan) {
+                FCVTSW(v0, xZR, RD_DYN);
+            }
+            for(int i=0; i<4; ++i) {
+                FLW(s0, wback, fixedaddress+i*4);
+                if (!box64_dynarec_fastnan) {
+                    FLES(x3, v0, s0); // s0 >= 0.0f?
+                    BNEZ(x3, 6*4);
+                    FEQS(x3, s0, s0); // isnan(s0)?
+                    BEQZ(x3, 2*4);
+                    // s0 is negative, so generate a NaN
+                    FDIVS(s0, s1, v0);
+                    // s0 is a NaN, just copy it
+                    FSW(s0, gback, i*4);
+                    J(4*4);
+                    // do regular computation
+                }
+                FSQRTS(s0, s0);
+                FDIVS(s0, s1, s0);
+                FSW(s0, gback, i*4);
+            }
+            break;
         case 0x53:
             INST_NAME("RCPPS Gx, Ex");
             nextop = F8;
diff --git a/src/dynarec/rv64/dynarec_rv64_66.c b/src/dynarec/rv64/dynarec_rv64_66.c
index 7bc996e9..24584465 100644
--- a/src/dynarec/rv64/dynarec_rv64_66.c
+++ b/src/dynarec/rv64/dynarec_rv64_66.c
@@ -716,6 +716,10 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     DEFAULT;
             }
             break;
+
+        case 0xF0:
+            return dynarec64_66F0(dyn, addr, ip, ninst, rex, rep, ok, need_epilog);
+
         case 0xF7:
             nextop = F8;
             switch((nextop>>3)&7) {
diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c
index 19c12bfd..ee89c010 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f.c
@@ -469,6 +469,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                         SW(ed, gback, 4*(u8&0x3));
                     }
                     break;
+                case 0x20:
+                    INST_NAME("PINSRB Gx, ED, Ib");
+                    nextop = F8;
+                    GETGX(x3);
+                    GETED(1);
+                    u8 = F8;
+                    SB(ed, x3, u8&0xF);
+                    break;
                 default:
                     DEFAULT;
             }
diff --git a/src/dynarec/rv64/dynarec_rv64_66f0.c b/src/dynarec/rv64/dynarec_rv64_66f0.c
new file mode 100644
index 00000000..b1df0489
--- /dev/null
+++ b/src/dynarec/rv64/dynarec_rv64_66f0.c
@@ -0,0 +1,133 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <pthread.h>
+#include <errno.h>
+
+#include "debug.h"
+#include "box64context.h"
+#include "dynarec.h"
+#include "emu/x64emu_private.h"
+#include "emu/x64run_private.h"
+#include "x64run.h"
+#include "x64emu.h"
+#include "box64stack.h"
+#include "callback.h"
+#include "emu/x64run_private.h"
+#include "x64trace.h"
+#include "dynarec_native.h"
+
+#include "rv64_printer.h"
+#include "dynarec_rv64_private.h"
+#include "dynarec_rv64_helper.h"
+#include "dynarec_rv64_functions.h"
+
+
+uintptr_t dynarec64_66F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog)
+{
+    (void)ip; (void)rep; (void)need_epilog;
+
+    uint8_t opcode = F8;
+    uint8_t nextop;
+    uint8_t gd, ed, u8;
+    uint8_t wback, wb1, wb2, gb1, gb2;
+    int32_t i32;
+    int64_t i64, j64;
+    int64_t fixedaddress;
+    int unscaled;
+    MAYUSE(gb1);
+    MAYUSE(gb2);
+    MAYUSE(wb1);
+    MAYUSE(wb2);
+    MAYUSE(j64);
+
+    while((opcode==0xF2) || (opcode==0xF3)) {
+        rep = opcode-0xF1;
+        opcode = F8;
+    }
+    // REX prefix before the F0/66 are ignored
+    rex.rex = 0;
+    while(opcode>=0x40 && opcode<=0x4f) {
+        rex.rex = opcode;
+        opcode = F8;
+    }
+
+    switch(opcode) {
+        case 0x81:
+        case 0x83:
+            nextop = F8;
+            SMDMB();
+            switch((nextop>>3)&7) {
+                case 0: //ADD
+                    if(opcode==0x81) {
+                        INST_NAME("LOCK ADD Ew, Iw");
+                    } else {
+                        INST_NAME("LOCK ADD Ew, Ib");
+                    }
+                    SETFLAGS(X_ALL, SF_SET_PENDING);
+                    if(MODREG) {
+                        if(opcode==0x81) i32 = F16S; else i32 = F8S;
+                        ed = xRAX+(nextop&7)+(rex.b<<3);
+                        MOV32w(x5, i32);
+                        SLLI(x6, ed, 48);
+                        SRLI(x6, x6, 48);
+                        emit_add16(dyn, ninst, x6, x5, x3, x4, x2);
+                        SRLI(ed, ed, 16);
+                        SLLI(ed, ed, 16);
+                        OR(ed, ed, x6);
+                    } else {
+                        addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, (opcode==0x81)?2:1);
+                        if(opcode==0x81) i32 = F16S; else i32 = F8S;
+                        MOV32w(x5, i32);
+
+                        ANDI(x3, wback, 0b10);
+                        BNEZ_MARK(x3);
+
+                        // lower 16 bits
+                        MARKLOCK;
+                        LR_W(x1, wback, 1, 1);
+                        SRLIW(x3, x1, 16);
+                        SLLIW(x3, x3, 16);
+                        ADD(x4, x1, x5);
+                        SLLIW(x4, x4, 16);
+                        SRLIW(x4, x4, 16);
+                        OR(x4, x4, x3);
+                        SC_W(x3, x4, wback, 1, 1);
+                        BNEZ_MARKLOCK(x3);
+                        IFX(X_ALL|X_PEND) {
+                            SLLIW(x1, x1, 16);
+                            SRLIW(x1, x1, 16);
+                        }
+                        B_MARK3_nocond;
+
+                        MARK;
+                        // upper 16 bits
+                        XORI(wback, wback, 0b10);
+                        MARK2;
+                        LR_W(x1, wback, 1, 1);
+                        SLLIW(x3, x1, 16);
+                        SRLIW(x3, x3, 16);
+                        SRLIW(x1, x1, 16);
+                        ADD(x4, x1, x5);
+                        SLLIW(x4, x4, 16);
+                        OR(x4, x4, x3);
+                        SC_W(x3, x4, wback, 1, 1);
+                        BNEZ_MARK2(x3);
+
+                        MARK3;
+                        // final
+                        IFX(X_ALL|X_PEND) {
+                            emit_add16(dyn, ninst, x1, x5, x3, x4, x6);
+                        }
+                    }
+                    break;
+                default:
+                    DEFAULT;
+            }
+            SMDMB();
+            break;
+
+        default:
+            DEFAULT;
+    }
+}
\ No newline at end of file
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index f4fde7dd..d7f52df7 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -1097,7 +1097,7 @@ uintptr_t dynarec64_DF(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
 uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
 uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog);
 //uintptr_t dynarec64_6664(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int seg, int* ok, int* need_epilog);
-//uintptr_t dynarec64_66F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
+uintptr_t dynarec64_66F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
 uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog);
 uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog);