diff options
Diffstat (limited to 'src')
| -rwxr-xr-x | src/dynarec/arm64_emitter.h | 15 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_00.c | 6 | ||||
| -rw-r--r-- | src/dynarec/dynarec_arm64_d9.c | 339 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_helper.h | 9 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_pass2.h | 3 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_pass3.h | 3 |
6 files changed, 369 insertions, 6 deletions
diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h index f8aa400a..d7e2a007 100755 --- a/src/dynarec/arm64_emitter.h +++ b/src/dynarec/arm64_emitter.h @@ -670,6 +670,12 @@ #define VSTR128_REG(Qt, Rn, Rm) EMIT(VMEM_REG_gen(0b00, 0b10, Rm, 0b011, 0, Rn, Dt)) #define VSTR128_REG_LSL4(Qt, Rn, Rm) EMIT(VMEM_REG_gen(0b00, 0b10, Rm, 0b011, 1, Rn, Dt)) +#define VLDR_PC_gen(opc, imm19, Rt) ((opc)<<30 | 0b011<<27 | 1<<26 | (imm19)<<5 | (Rt)) +#define VLDR32_literal(Vt, imm19) EMIT(VLDR_PC_gen(0b00, ((imm19)>>2)&0x7FFFF, Vt)) +#define VLDR64_literal(Vt, imm19) EMIT(VLDR_PC_gen(0b01, ((imm19)>>2)&0x7FFFF, Vt)) +#define VLDR128_literal(Vt, imm19) EMIT(VLDR_PC_gen(0b10, ((imm19)>>2)&0x7FFFF, Vt)) + + #define LD1R_gen(Q, size, Rn, Rt) ((Q)<<30 | 0b0011010<<23 | 1<<22 | 0<<21 | 0b110<<13 | (size)<<10 | (Rn)<<5 | (Rt)) #define VLDQ1R_8(Vt, Rn) EMIT(LD1R_gen(1, 0b00, Rn, Vt)) #define VLDQ1R_16(Vt, Rn) EMIT(LD1R_gen(1, 0b01, Rn, Vt)) @@ -878,6 +884,15 @@ #define VFADDPQS(Vd, Vn, Vm) EMIT(FADDP_vector(1, 0, Vm, Vn, Vd)) #define VFADDPQD(Vd, Vn, Vm) EMIT(FADDP_vector(1, 1, Vm, Vn, Vd)) +// NEG / ABS +#define FNEGABS_scalar(type, opc, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | (opc)<<15 | 0b10000<<10 | (Rn)<<5 | (Rd)) +#define FNEGS(Sd, Sn) EMIT(FNEGABS_scalar(0b00, 0b10, Sn, Sd)) +#define FNEGD(Dd, Dn) EMIT(FNEGABS_scalar(0b01, 0b10, Dn, Dd)) + +#define FABSS(Sd, Sn) EMIT(FNEGABS_scalar(0b00, 0b01, Sn, Sd)) +#define FABSD(Dd, Dn) EMIT(FNEGABS_scalar(0b01, 0b01, Dn, Dd)) + + // MUL #define FMUL_vector(Q, sz, Rm, Rn, Rd) ((Q)<<30 | 1<<29 | 0b01110<<24 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b11011<<11 | 1<<10 | (Rn)<<5 | (Rd)) #define VFMULS(Sd, Sn, Sm) EMIT(FMUL_vector(0, 0, Sm, Sn, Sd)) diff --git a/src/dynarec/dynarec_arm64_00.c b/src/dynarec/dynarec_arm64_00.c index af350c13..9329cf3d 100755 --- a/src/dynarec/dynarec_arm64_00.c +++ b/src/dynarec/dynarec_arm64_00.c @@ -1828,7 +1828,11 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; } break; - + + case 0xD9: + addr = dynarec64_D9(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); + break; + case 0xE8: INST_NAME("CALL Id"); i32 = F32S; diff --git a/src/dynarec/dynarec_arm64_d9.c b/src/dynarec/dynarec_arm64_d9.c new file mode 100644 index 00000000..b729eb8c --- /dev/null +++ b/src/dynarec/dynarec_arm64_d9.c @@ -0,0 +1,339 @@ +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <pthread.h> +#include <errno.h> + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_arm64.h" +#include "dynarec_arm64_private.h" +#include "arm64_printer.h" +#include "emu/x87emu_private.h" + +#include "dynarec_arm64_helper.h" +#include "dynarec_arm64_functions.h" + + +uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) +{ + uint8_t nextop = F8; + uint8_t ed; + uint8_t wback, wb1; + int fixedaddress; + int v1, v2; + int s0; + int i1, i2, i3; + + MAYUSE(s0); + MAYUSE(v2); + MAYUSE(v1); + + switch(nextop) { + case 0xC0: + case 0xC1: + case 0xC2: + case 0xC3: + case 0xC4: + case 0xC5: + case 0xC6: + case 0xC7: + INST_NAME("FLD STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7); + v2 = x87_do_push(dyn, ninst); + FMOVD(v2, v1); + break; + + case 0xC8: + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: + INST_NAME("FXCH STx"); + // swap the cache value, not the double value itself :p + i1 = x87_get_cache(dyn, ninst, x1, x2, nextop&7); + i2 = x87_get_cache(dyn, ninst, x1, x2, 0); + i3 = dyn->x87cache[i1]; + dyn->x87cache[i1] = dyn->x87cache[i2]; + dyn->x87cache[i2] = i3; + break; + + case 0xD0: + INST_NAME("FNOP"); + break; + + case 0xE0: + INST_NAME("FCHS"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + FNEGD(v1, v1); + break; + case 0xE1: + INST_NAME("FABS"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + FABSD(v1, v1); + break; + + case 0xE4: + INST_NAME("FTST"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + FCMPD_0(v1); + FCOM(x1, x2, x3); // same flags... + break; + case 0xE5: + INST_NAME("FXAM"); + x87_refresh(dyn, ninst, x1, x2, 0); + CALL(fpu_fxam, -1); // should be possible inline, but is it worth it? + break; + + case 0xE8: + INST_NAME("FLD1"); + v1 = x87_do_push(dyn, ninst); + FTABLE64(v1, 1.0); + break; + case 0xE9: + INST_NAME("FLDL2T"); + v1 = x87_do_push(dyn, ninst); + FTABLE64(v1, L2T); + break; + case 0xEA: + INST_NAME("FLDL2E"); + v1 = x87_do_push(dyn, ninst); + FTABLE64(v1, L2E); + break; + case 0xEB: + INST_NAME("FLDPI"); + v1 = x87_do_push(dyn, ninst); + FTABLE64(v1, PI); + break; + case 0xEC: + INST_NAME("FLDLG2"); + v1 = x87_do_push(dyn, ninst); + FTABLE64(v1, LG2); + break; + case 0xED: + INST_NAME("FLDLN2"); + v1 = x87_do_push(dyn, ninst); + FTABLE64(v1, LN2); + break; + case 0xEE: + INST_NAME("FLDZ"); + v1 = x87_do_push(dyn, ninst); + FTABLE64(v1, 0.0); + break; + + case 0xFA: + INST_NAME("FSQRT"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + FSQRTD(v1, v1); + break; + + case 0xFC: + INST_NAME("FRNDINT"); + // use C helper for now, nothing staightforward is available + x87_forget(dyn, ninst, x1, x2, 0); + CALL(arm_frndint, -1); + /* + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + VCMP_F64_0(v1); + VMRS_APSR(); + B_NEXT(cVS); // Unordered, skip + B_NEXT(cEQ); // Zero, skip + u8 = x87_setround(dyn, ninst, x1, x2, x3); + VCVT_S32_F64(x1, v1); // limit to 32bits.... + VCVT_F64_S32(v1, x1); + x87_restoreround(dyn, ninst, u8); + */ + break; + case 0xF0: + INST_NAME("F2XM1"); + x87_forget(dyn, ninst, x1, x2, 0); + CALL(arm_f2xm1, -1); + break; + case 0xF1: + INST_NAME("FYL2X"); + x87_forget(dyn, ninst, x1, x2, 0); + x87_forget(dyn, ninst, x1, x2, 1); + CALL(arm_fyl2x, -1); + x87_do_pop(dyn, ninst); + break; + case 0xF2: + INST_NAME("FTAN"); + x87_forget(dyn, ninst, x1, x2, 0); + CALL(arm_ftan, -1); + v1 = x87_do_push(dyn, ninst); + FTABLE64(v1, 1.0); + break; + case 0xF3: + INST_NAME("FPATAN"); + x87_forget(dyn, ninst, x1, x2, 0); + x87_forget(dyn, ninst, x1, x2, 1); + CALL(arm_fpatan, -1); + x87_do_pop(dyn, ninst); + break; + case 0xF4: + INST_NAME("FXTRACT"); + x87_do_push_empty(dyn, ninst, 0); + x87_forget(dyn, ninst, x1, x2, 1); + CALL(arm_fxtract, -1); + break; + case 0xF5: + INST_NAME("FPREM1"); + x87_forget(dyn, ninst, x1, x2, 0); + x87_forget(dyn, ninst, x1, x2, 1); + CALL(arm_fprem1, -1); + break; + case 0xF6: + INST_NAME("FDECSTP"); + fpu_purgecache(dyn, ninst, x1, x2, x3); + LDRw_U12(x2, xEmu, offsetof(x64emu_t, top)); + SUBw_U12(x2, x2, 1); + ANDw_mask(x2, x2, 0, 2); //mask=7 + STRw_U12(x2, xEmu, offsetof(x64emu_t, top)); + break; + case 0xF7: + INST_NAME("FINCSTP"); + fpu_purgecache(dyn, ninst, x1, x2, x3); + LDRw_U12(x2, xEmu, offsetof(x64emu_t, top)); + ADDw_U12(x2, x2, 1); + ANDw_mask(x2, x2, 0, 2); //mask=7 + STRw_U12(x2, xEmu, offsetof(x64emu_t, top)); + break; + case 0xF8: + INST_NAME("FPREM"); + x87_forget(dyn, ninst, x1, x2, 0); + x87_forget(dyn, ninst, x1, x2, 1); + CALL(arm_fprem, -1); + break; + case 0xF9: + INST_NAME("FYL2XP1"); + x87_forget(dyn, ninst, x1, x2, 0); + x87_forget(dyn, ninst, x1, x2, 1); + CALL(arm_fyl2xp1, -1); + x87_do_pop(dyn, ninst); + break; + case 0xFB: + INST_NAME("FSINCOS"); + x87_do_push_empty(dyn, ninst, 0); + x87_forget(dyn, ninst, x1, x2, 1); + CALL(arm_fsincos, -1); + break; + case 0xFD: + INST_NAME("FSCALE"); + x87_forget(dyn, ninst, x1, x2, 0); + x87_forget(dyn, ninst, x1, x2, 1); + CALL(arm_fscale, -1); + break; + case 0xFE: + INST_NAME("FSIN"); + x87_forget(dyn, ninst, x1, x2, 0); + CALL(arm_fsin, -1); + break; + case 0xFF: + INST_NAME("FCOS"); + x87_forget(dyn, ninst, x1, x2, 0); + CALL(arm_fcos, -1); + break; + + + case 0xD1: + case 0xD4: + case 0xD5: + case 0xD6: + case 0xD7: + case 0xD8: + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: + case 0xE2: + case 0xE3: + case 0xE6: + case 0xE7: + case 0xEF: + DEFAULT; + break; + + default: + switch((nextop>>3)&7) { + case 0: + INST_NAME("FLD ST0, float[ED]"); + v1 = x87_do_push(dyn, ninst); + s0 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + VLDR32_U12(s0, ed, fixedaddress); + FCVT_D_S(v1, s0); + break; + case 2: + INST_NAME("FST float[ED], ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + s0 = fpu_get_scratch(dyn); + FCVT_S_D(s0, v1); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + VSTR32_U12(s0, ed, fixedaddress); + break; + case 3: + INST_NAME("FSTP float[ED], ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + s0 = fpu_get_scratch(dyn); + FCVT_S_D(s0, v1); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + VSTR32_U12(s0, ed, fixedaddress); + x87_do_pop(dyn, ninst); + break; + case 4: + INST_NAME("FLDENV Ed"); + fpu_purgecache(dyn, ninst, x1, x2, x3); // maybe only x87, not SSE? + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + if(ed!=x1) { + MOVx_REG(x1, ed); + } + MOV32w(x2, 0); + CALL(fpu_loadenv, -1); + break; + case 5: + INST_NAME("FLDCW Ew"); + GETEW(x1, 0); + STRH_U12(x1, xEmu, offsetof(x64emu_t, cw)); // hopefully cw is not too far for an imm8 + UBFXw(x1, x1, 10, 2); // extract round + STRw_U12(x1, xEmu, offsetof(x64emu_t, round)); + break; + case 6: + INST_NAME("FNSTENV Ed"); + fpu_purgecache(dyn, ninst, x1, x2, x3); // maybe only x87, not SSE? + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + if(ed!=x1) { + MOVx_REG(x1, ed); + } + MOV32w(x2, 0); + CALL(fpu_savenv, -1); + break; + case 7: + INST_NAME("FNSTCW Ew"); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<1, 1, rex, 0, 0); + ed = x1; + wb1 = 1; + LDRH_U12(x1, xEmu, offsetof(x64emu_t, cw)); + EWBACK; + break; + default: + DEFAULT; + } + } + return addr; +} + diff --git a/src/dynarec/dynarec_arm64_helper.h b/src/dynarec/dynarec_arm64_helper.h index 651ab7ef..26557afe 100755 --- a/src/dynarec/dynarec_arm64_helper.h +++ b/src/dynarec/dynarec_arm64_helper.h @@ -358,8 +358,8 @@ // Generate FCOM with s1 and s2 scratch regs (the VCMP is already done) #define FCOM(s1, s2, s3) \ LDRH_U12(s3, xEmu, offsetof(x64emu_t, sw)); /*offset is 8bits right?*/\ - MOV32w(s1, 0b01000111); \ - BICw_REG_LSL(s3, s3, s1, 8); \ + MOV32w(s1, 0b0100011100000000); \ + BICw_REG(s3, s3, s1); \ CSETw(s1, cMI); /* 1 if less than, 0 else */ \ MOV32w(s2, 0b01000101); /* unordered */ \ CSELw(s1, s2, s1, cVS); \ @@ -508,6 +508,9 @@ #ifndef TABLE64 #define TABLE64(A, V) #endif +#ifndef FTABLE64 +#define FTABLE64(A, V) +#endif #if STEP < 2 #define GETIP(A) @@ -833,7 +836,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); //uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); -//uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); //uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); //uintptr_t dynarec64_DB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); //uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); diff --git a/src/dynarec/dynarec_arm64_pass2.h b/src/dynarec/dynarec_arm64_pass2.h index 7f1d0de1..d5d039e5 100755 --- a/src/dynarec/dynarec_arm64_pass2.h +++ b/src/dynarec/dynarec_arm64_pass2.h @@ -7,4 +7,5 @@ #define INST_EPILOG dyn->insts[ninst].epilog = dyn->arm_size; #define INST_NAME(name) #define NEW_BARRIER_INST if(ninst) ++dyn->sons_size -#define TABLE64(A, V) if((V)>0xffffffffLL) {Table64(dyn, (V)); EMIT(0);} else {MOV64x(A, V);} \ No newline at end of file +#define TABLE64(A, V) if((V)>0xffffffffLL) {Table64(dyn, (V)); EMIT(0);} else {MOV64x(A, V);} +#define FTABLE64(A, V) {mmx87_regs_t v = {.d = V}; Table64(dyn, v.q); EMIT(0);} \ No newline at end of file diff --git a/src/dynarec/dynarec_arm64_pass3.h b/src/dynarec/dynarec_arm64_pass3.h index 74784196..cb6891d4 100755 --- a/src/dynarec/dynarec_arm64_pass3.h +++ b/src/dynarec/dynarec_arm64_pass3.h @@ -32,4 +32,5 @@ ++dyn->sons_size; \ } -#define TABLE64(A, V) if((V)>0xffffffffLL) {int val64offset = Table64(dyn, (V)); MESSAGE(LOG_DUMP, " Table64: 0x%lx\n", (V)); LDRx_literal(A, val64offset);} else {MOV64x(A, V);} \ No newline at end of file +#define TABLE64(A, V) if((V)>0xffffffffLL) {int val64offset = Table64(dyn, (V)); MESSAGE(LOG_DUMP, " Table64: 0x%lx\n", (V)); LDRx_literal(A, val64offset);} else {MOV64x(A, V);} +#define FTABLE64(A, V) {mmx87_regs_t v = {.d = V}; int val64offset = Table64(dyn, v.q); MESSAGE(LOG_DUMP, " FTable64: %g\n", v.d); VLDR64_literal(A, val64offset);} \ No newline at end of file |