about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2021-04-03 11:43:17 +0200
committerptitSeb <sebastien.chev@gmail.com>2021-04-03 11:43:17 +0200
commitae70f168a8dd40b3d903eac2170f107ea9f0d127 (patch)
treeef1bb76bcc771d45ef821fd72f391eb663d71bba
parentb239bec2c5d63120e4aa761b6b014af6e50d0633 (diff)
downloadbox64-ae70f168a8dd40b3d903eac2170f107ea9f0d127.tar.gz
box64-ae70f168a8dd40b3d903eac2170f107ea9f0d127.zip
[DYNAREC] Added D9 opcodes
-rwxr-xr-xCMakeLists.txt2
-rwxr-xr-xsrc/dynarec/arm64_emitter.h15
-rwxr-xr-xsrc/dynarec/dynarec_arm64_00.c6
-rw-r--r--src/dynarec/dynarec_arm64_d9.c339
-rwxr-xr-xsrc/dynarec/dynarec_arm64_helper.h9
-rwxr-xr-xsrc/dynarec/dynarec_arm64_pass2.h3
-rwxr-xr-xsrc/dynarec/dynarec_arm64_pass3.h3
7 files changed, 370 insertions, 7 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c2798036..2e7ed6f3 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -287,7 +287,7 @@ if(ARM_DYNAREC)
     "${BOX64_ROOT}/src/dynarec/dynarec_arm64_66.c"
     "${BOX64_ROOT}/src/dynarec/dynarec_arm64_67.c"
     #"${BOX64_ROOT}/src/dynarec/dynarec_arm64_d8.c"
-    #"${BOX64_ROOT}/src/dynarec/dynarec_arm64_d9.c"
+    "${BOX64_ROOT}/src/dynarec/dynarec_arm64_d9.c"
     #"${BOX64_ROOT}/src/dynarec/dynarec_arm64_da.c"
     #"${BOX64_ROOT}/src/dynarec/dynarec_arm64_db.c"
     #"${BOX64_ROOT}/src/dynarec/dynarec_arm64_dc.c"
diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h
index f8aa400a..d7e2a007 100755
--- a/src/dynarec/arm64_emitter.h
+++ b/src/dynarec/arm64_emitter.h
@@ -670,6 +670,12 @@
 #define VSTR128_REG(Qt, Rn, Rm)             EMIT(VMEM_REG_gen(0b00, 0b10, Rm, 0b011, 0, Rn, Dt))
 #define VSTR128_REG_LSL4(Qt, Rn, Rm)        EMIT(VMEM_REG_gen(0b00, 0b10, Rm, 0b011, 1, Rn, Dt))
 
+#define VLDR_PC_gen(opc, imm19, Rt)         ((opc)<<30 | 0b011<<27 | 1<<26 | (imm19)<<5 | (Rt))
+#define VLDR32_literal(Vt, imm19)           EMIT(VLDR_PC_gen(0b00, ((imm19)>>2)&0x7FFFF, Vt))
+#define VLDR64_literal(Vt, imm19)           EMIT(VLDR_PC_gen(0b01, ((imm19)>>2)&0x7FFFF, Vt))
+#define VLDR128_literal(Vt, imm19)          EMIT(VLDR_PC_gen(0b10, ((imm19)>>2)&0x7FFFF, Vt))
+
+
 #define LD1R_gen(Q, size, Rn, Rt)           ((Q)<<30 | 0b0011010<<23 | 1<<22 | 0<<21 | 0b110<<13 | (size)<<10 | (Rn)<<5 | (Rt))
 #define VLDQ1R_8(Vt, Rn)                    EMIT(LD1R_gen(1, 0b00, Rn, Vt))
 #define VLDQ1R_16(Vt, Rn)                   EMIT(LD1R_gen(1, 0b01, Rn, Vt))
@@ -878,6 +884,15 @@
 #define VFADDPQS(Vd, Vn, Vm)        EMIT(FADDP_vector(1, 0, Vm, Vn, Vd))
 #define VFADDPQD(Vd, Vn, Vm)        EMIT(FADDP_vector(1, 1, Vm, Vn, Vd))
 
+// NEG / ABS
+#define FNEGABS_scalar(type, opc, Rn, Rd)  (0b11110<<24 | (type)<<22 | 1<<21 | (opc)<<15 | 0b10000<<10 | (Rn)<<5 | (Rd))
+#define FNEGS(Sd, Sn)               EMIT(FNEGABS_scalar(0b00, 0b10, Sn, Sd))
+#define FNEGD(Dd, Dn)               EMIT(FNEGABS_scalar(0b01, 0b10, Dn, Dd))
+
+#define FABSS(Sd, Sn)               EMIT(FNEGABS_scalar(0b00, 0b01, Sn, Sd))
+#define FABSD(Dd, Dn)               EMIT(FNEGABS_scalar(0b01, 0b01, Dn, Dd))
+
+
 // MUL
 #define FMUL_vector(Q, sz, Rm, Rn, Rd)  ((Q)<<30 | 1<<29 | 0b01110<<24 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b11011<<11 | 1<<10 | (Rn)<<5 | (Rd))
 #define VFMULS(Sd, Sn, Sm)          EMIT(FMUL_vector(0, 0, Sm, Sn, Sd))
diff --git a/src/dynarec/dynarec_arm64_00.c b/src/dynarec/dynarec_arm64_00.c
index af350c13..9329cf3d 100755
--- a/src/dynarec/dynarec_arm64_00.c
+++ b/src/dynarec/dynarec_arm64_00.c
@@ -1828,7 +1828,11 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     break;
             }
             break;
-        
+
+        case 0xD9:
+            addr = dynarec64_D9(dyn, addr, ip, ninst, rex, rep, ok, need_epilog);
+            break;
+
         case 0xE8:
             INST_NAME("CALL Id");
             i32 = F32S;
diff --git a/src/dynarec/dynarec_arm64_d9.c b/src/dynarec/dynarec_arm64_d9.c
new file mode 100644
index 00000000..b729eb8c
--- /dev/null
+++ b/src/dynarec/dynarec_arm64_d9.c
@@ -0,0 +1,339 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <pthread.h>
+#include <errno.h>
+
+#include "debug.h"
+#include "box64context.h"
+#include "dynarec.h"
+#include "emu/x64emu_private.h"
+#include "emu/x64run_private.h"
+#include "x64run.h"
+#include "x64emu.h"
+#include "box64stack.h"
+#include "callback.h"
+#include "emu/x64run_private.h"
+#include "x64trace.h"
+#include "dynarec_arm64.h"
+#include "dynarec_arm64_private.h"
+#include "arm64_printer.h"
+#include "emu/x87emu_private.h"
+
+#include "dynarec_arm64_helper.h"
+#include "dynarec_arm64_functions.h"
+
+
+uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog)
+{
+    uint8_t nextop = F8;
+    uint8_t ed;
+    uint8_t wback, wb1;
+    int fixedaddress;
+    int v1, v2;
+    int s0;
+    int i1, i2, i3;
+
+    MAYUSE(s0);
+    MAYUSE(v2);
+    MAYUSE(v1);
+
+    switch(nextop) {
+        case 0xC0:
+        case 0xC1:
+        case 0xC2:
+        case 0xC3:
+        case 0xC4:
+        case 0xC5:
+        case 0xC6:
+        case 0xC7:
+            INST_NAME("FLD STx");
+            v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7);
+            v2 = x87_do_push(dyn, ninst);
+            FMOVD(v2, v1);
+            break;
+
+        case 0xC8:
+        case 0xC9:
+        case 0xCA:
+        case 0xCB:
+        case 0xCC:
+        case 0xCD:
+        case 0xCE:
+        case 0xCF:
+            INST_NAME("FXCH STx");
+            // swap the cache value, not the double value itself :p
+            i1 = x87_get_cache(dyn, ninst, x1, x2, nextop&7);
+            i2 = x87_get_cache(dyn, ninst, x1, x2, 0);
+            i3 = dyn->x87cache[i1];
+            dyn->x87cache[i1] = dyn->x87cache[i2];
+            dyn->x87cache[i2] = i3;
+            break;
+
+        case 0xD0:
+            INST_NAME("FNOP");
+            break;
+
+        case 0xE0:
+            INST_NAME("FCHS");
+            v1 = x87_get_st(dyn, ninst, x1, x2, 0);
+            FNEGD(v1, v1);
+            break;
+        case 0xE1:
+            INST_NAME("FABS");
+            v1 = x87_get_st(dyn, ninst, x1, x2, 0);
+            FABSD(v1, v1);
+            break;
+
+        case 0xE4:
+            INST_NAME("FTST");
+            v1 = x87_get_st(dyn, ninst, x1, x2, 0);
+            FCMPD_0(v1);
+            FCOM(x1, x2, x3);   // same flags...
+            break;
+        case 0xE5:
+            INST_NAME("FXAM");
+            x87_refresh(dyn, ninst, x1, x2, 0);
+            CALL(fpu_fxam, -1);  // should be possible inline, but is it worth it?
+            break;
+
+        case 0xE8:
+            INST_NAME("FLD1");
+            v1 = x87_do_push(dyn, ninst);
+            FTABLE64(v1, 1.0);
+            break;
+        case 0xE9:
+            INST_NAME("FLDL2T");
+            v1 = x87_do_push(dyn, ninst);
+            FTABLE64(v1, L2T);
+            break;
+        case 0xEA:     
+            INST_NAME("FLDL2E");
+            v1 = x87_do_push(dyn, ninst);
+            FTABLE64(v1, L2E);
+            break;
+        case 0xEB:
+            INST_NAME("FLDPI");
+            v1 = x87_do_push(dyn, ninst);
+            FTABLE64(v1, PI);
+            break;
+        case 0xEC:
+            INST_NAME("FLDLG2");
+            v1 = x87_do_push(dyn, ninst);
+            FTABLE64(v1, LG2);
+            break;
+        case 0xED:
+            INST_NAME("FLDLN2");
+            v1 = x87_do_push(dyn, ninst);
+            FTABLE64(v1, LN2);
+            break;
+        case 0xEE:
+            INST_NAME("FLDZ");
+            v1 = x87_do_push(dyn, ninst);
+            FTABLE64(v1, 0.0);
+            break;
+
+        case 0xFA:
+            INST_NAME("FSQRT");
+            v1 = x87_get_st(dyn, ninst, x1, x2, 0);
+            FSQRTD(v1, v1);
+            break;
+
+        case 0xFC:
+            INST_NAME("FRNDINT");
+            // use C helper for now, nothing staightforward is available
+            x87_forget(dyn, ninst, x1, x2, 0);
+            CALL(arm_frndint, -1);
+            /*
+            v1 = x87_get_st(dyn, ninst, x1, x2, 0);
+            VCMP_F64_0(v1);
+            VMRS_APSR();
+            B_NEXT(cVS);    // Unordered, skip
+            B_NEXT(cEQ);    // Zero, skip
+            u8 = x87_setround(dyn, ninst, x1, x2, x3);
+            VCVT_S32_F64(x1, v1);   // limit to 32bits....
+            VCVT_F64_S32(v1, x1);
+            x87_restoreround(dyn, ninst, u8);
+            */
+            break;
+        case 0xF0:
+            INST_NAME("F2XM1");
+            x87_forget(dyn, ninst, x1, x2, 0);
+            CALL(arm_f2xm1, -1);
+            break;
+        case 0xF1:
+            INST_NAME("FYL2X");
+            x87_forget(dyn, ninst, x1, x2, 0);
+            x87_forget(dyn, ninst, x1, x2, 1);
+            CALL(arm_fyl2x, -1);
+            x87_do_pop(dyn, ninst);
+            break;
+        case 0xF2:
+            INST_NAME("FTAN");
+            x87_forget(dyn, ninst, x1, x2, 0);
+            CALL(arm_ftan, -1);
+            v1 = x87_do_push(dyn, ninst);
+            FTABLE64(v1, 1.0);
+            break;
+        case 0xF3:
+            INST_NAME("FPATAN");
+            x87_forget(dyn, ninst, x1, x2, 0);
+            x87_forget(dyn, ninst, x1, x2, 1);
+            CALL(arm_fpatan, -1);
+            x87_do_pop(dyn, ninst);
+            break;
+        case 0xF4:
+            INST_NAME("FXTRACT");
+            x87_do_push_empty(dyn, ninst, 0);
+            x87_forget(dyn, ninst, x1, x2, 1);
+            CALL(arm_fxtract, -1);
+            break;
+        case 0xF5:
+            INST_NAME("FPREM1");
+            x87_forget(dyn, ninst, x1, x2, 0);
+            x87_forget(dyn, ninst, x1, x2, 1);
+            CALL(arm_fprem1, -1);
+            break;
+        case 0xF6:
+            INST_NAME("FDECSTP");
+            fpu_purgecache(dyn, ninst, x1, x2, x3);
+            LDRw_U12(x2, xEmu, offsetof(x64emu_t, top));
+            SUBw_U12(x2, x2, 1);
+            ANDw_mask(x2, x2, 0, 2);    //mask=7
+            STRw_U12(x2, xEmu, offsetof(x64emu_t, top));
+            break;
+        case 0xF7:
+            INST_NAME("FINCSTP");
+            fpu_purgecache(dyn, ninst, x1, x2, x3);
+            LDRw_U12(x2, xEmu, offsetof(x64emu_t, top));
+            ADDw_U12(x2, x2, 1);
+            ANDw_mask(x2, x2, 0, 2);    //mask=7
+            STRw_U12(x2, xEmu, offsetof(x64emu_t, top));
+            break;
+        case 0xF8:
+            INST_NAME("FPREM");
+            x87_forget(dyn, ninst, x1, x2, 0);
+            x87_forget(dyn, ninst, x1, x2, 1);
+            CALL(arm_fprem, -1);
+            break;
+        case 0xF9:
+            INST_NAME("FYL2XP1");
+            x87_forget(dyn, ninst, x1, x2, 0);
+            x87_forget(dyn, ninst, x1, x2, 1);
+            CALL(arm_fyl2xp1, -1);
+            x87_do_pop(dyn, ninst);
+            break;
+        case 0xFB:
+            INST_NAME("FSINCOS");
+            x87_do_push_empty(dyn, ninst, 0);
+            x87_forget(dyn, ninst, x1, x2, 1);
+            CALL(arm_fsincos, -1);
+            break;
+        case 0xFD:
+            INST_NAME("FSCALE");
+            x87_forget(dyn, ninst, x1, x2, 0);
+            x87_forget(dyn, ninst, x1, x2, 1);
+            CALL(arm_fscale, -1);
+            break;
+        case 0xFE:
+            INST_NAME("FSIN");
+            x87_forget(dyn, ninst, x1, x2, 0);
+            CALL(arm_fsin, -1);
+            break;
+        case 0xFF:
+            INST_NAME("FCOS");
+            x87_forget(dyn, ninst, x1, x2, 0);
+            CALL(arm_fcos, -1);
+            break;
+
+
+        case 0xD1:
+        case 0xD4:
+        case 0xD5:
+        case 0xD6:
+        case 0xD7:
+        case 0xD8:
+        case 0xD9:
+        case 0xDA:
+        case 0xDB:
+        case 0xDC:
+        case 0xDD:
+        case 0xDE:
+        case 0xDF:
+        case 0xE2:
+        case 0xE3:
+        case 0xE6:
+        case 0xE7:
+        case 0xEF:
+            DEFAULT;
+            break;
+             
+        default:
+            switch((nextop>>3)&7) {
+                case 0:
+                    INST_NAME("FLD ST0, float[ED]");
+                    v1 = x87_do_push(dyn, ninst);
+                    s0 = fpu_get_scratch(dyn);
+                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0);
+                    VLDR32_U12(s0, ed, fixedaddress);
+                    FCVT_D_S(v1, s0);
+                    break;
+                case 2:
+                    INST_NAME("FST float[ED], ST0");
+                    v1 = x87_get_st(dyn, ninst, x1, x2, 0);
+                    s0 = fpu_get_scratch(dyn);
+                    FCVT_S_D(s0, v1);
+                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0);
+                    VSTR32_U12(s0, ed, fixedaddress);
+                    break;
+                case 3:
+                    INST_NAME("FSTP float[ED], ST0");
+                    v1 = x87_get_st(dyn, ninst, x1, x2, 0);
+                    s0 = fpu_get_scratch(dyn);
+                    FCVT_S_D(s0, v1);
+                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0);
+                    VSTR32_U12(s0, ed, fixedaddress);
+                    x87_do_pop(dyn, ninst);
+                    break;
+                case 4:
+                    INST_NAME("FLDENV Ed");
+                    fpu_purgecache(dyn, ninst, x1, x2, x3); // maybe only x87, not SSE?
+                    addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0);
+                    if(ed!=x1) {
+                        MOVx_REG(x1, ed);
+                    }
+                    MOV32w(x2, 0);
+                    CALL(fpu_loadenv, -1);
+                    break;
+                case 5:
+                    INST_NAME("FLDCW Ew");
+                    GETEW(x1, 0);
+                    STRH_U12(x1, xEmu, offsetof(x64emu_t, cw));    // hopefully cw is not too far for an imm8
+                    UBFXw(x1, x1, 10, 2);    // extract round
+                    STRw_U12(x1, xEmu, offsetof(x64emu_t, round));
+                    break;
+                case 6:
+                    INST_NAME("FNSTENV Ed");
+                    fpu_purgecache(dyn, ninst, x1, x2, x3); // maybe only x87, not SSE?
+                    addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0);
+                    if(ed!=x1) {
+                        MOVx_REG(x1, ed);
+                    }
+                    MOV32w(x2, 0);
+                    CALL(fpu_savenv, -1);
+                    break;
+                case 7:
+                    INST_NAME("FNSTCW Ew");
+                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<1, 1, rex, 0, 0);
+                    ed = x1;
+                    wb1 = 1;
+                    LDRH_U12(x1, xEmu, offsetof(x64emu_t, cw));
+                    EWBACK;
+                    break;
+                default:
+                    DEFAULT;
+            }
+    }
+    return addr;
+}
+
diff --git a/src/dynarec/dynarec_arm64_helper.h b/src/dynarec/dynarec_arm64_helper.h
index 651ab7ef..26557afe 100755
--- a/src/dynarec/dynarec_arm64_helper.h
+++ b/src/dynarec/dynarec_arm64_helper.h
@@ -358,8 +358,8 @@
 // Generate FCOM with s1 and s2 scratch regs (the VCMP is already done)
 #define FCOM(s1, s2, s3)                                                    \
     LDRH_U12(s3, xEmu, offsetof(x64emu_t, sw));   /*offset is 8bits right?*/\
-    MOV32w(s1, 0b01000111);                                                 \
-    BICw_REG_LSL(s3, s3, s1, 8);                                            \
+    MOV32w(s1, 0b0100011100000000);                                         \
+    BICw_REG(s3, s3, s1);                                                   \
     CSETw(s1, cMI); /* 1 if less than, 0 else */                            \
     MOV32w(s2, 0b01000101); /* unordered */                                 \
     CSELw(s1, s2, s1, cVS);                                                 \
@@ -508,6 +508,9 @@
 #ifndef TABLE64
 #define TABLE64(A, V)
 #endif
+#ifndef FTABLE64
+#define FTABLE64(A, V)
+#endif
 
 #if STEP < 2
 #define GETIP(A)
@@ -833,7 +836,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
 uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
 uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
 //uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
-//uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
+uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
 //uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
 //uintptr_t dynarec64_DB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
 //uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
diff --git a/src/dynarec/dynarec_arm64_pass2.h b/src/dynarec/dynarec_arm64_pass2.h
index 7f1d0de1..d5d039e5 100755
--- a/src/dynarec/dynarec_arm64_pass2.h
+++ b/src/dynarec/dynarec_arm64_pass2.h
@@ -7,4 +7,5 @@
 #define INST_EPILOG dyn->insts[ninst].epilog = dyn->arm_size; 
 #define INST_NAME(name) 
 #define NEW_BARRIER_INST    if(ninst) ++dyn->sons_size
-#define TABLE64(A, V)   if((V)>0xffffffffLL) {Table64(dyn, (V)); EMIT(0);} else {MOV64x(A, V);}
\ No newline at end of file
+#define TABLE64(A, V)   if((V)>0xffffffffLL) {Table64(dyn, (V)); EMIT(0);} else {MOV64x(A, V);}
+#define FTABLE64(A, V)  {mmx87_regs_t v = {.d = V}; Table64(dyn, v.q); EMIT(0);}
\ No newline at end of file
diff --git a/src/dynarec/dynarec_arm64_pass3.h b/src/dynarec/dynarec_arm64_pass3.h
index 74784196..cb6891d4 100755
--- a/src/dynarec/dynarec_arm64_pass3.h
+++ b/src/dynarec/dynarec_arm64_pass3.h
@@ -32,4 +32,5 @@
     ++dyn->sons_size;                               \
     }
 
-#define TABLE64(A, V)   if((V)>0xffffffffLL) {int val64offset = Table64(dyn, (V)); MESSAGE(LOG_DUMP, "  Table64: 0x%lx\n", (V)); LDRx_literal(A, val64offset);} else {MOV64x(A, V);}
\ No newline at end of file
+#define TABLE64(A, V)   if((V)>0xffffffffLL) {int val64offset = Table64(dyn, (V)); MESSAGE(LOG_DUMP, "  Table64: 0x%lx\n", (V)); LDRx_literal(A, val64offset);} else {MOV64x(A, V);}
+#define FTABLE64(A, V)  {mmx87_regs_t v = {.d = V}; int val64offset = Table64(dyn, v.q); MESSAGE(LOG_DUMP, "  FTable64: %g\n", v.d); VLDR64_literal(A, val64offset);}
\ No newline at end of file