diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2021-04-03 18:50:14 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2021-04-03 18:50:14 +0200 |
| commit | 7bcf1cc0aed10ff948367f2dbe7ee9a815c9a9bc (patch) | |
| tree | 7192e910e0314e8ddce66f5a069f11b2842d93db /src | |
| parent | dba4ff3cff8e7c1431325575355bf2df58ec92fe (diff) | |
| download | box64-7bcf1cc0aed10ff948367f2dbe7ee9a815c9a9bc.tar.gz box64-7bcf1cc0aed10ff948367f2dbe7ee9a815c9a9bc.zip | |
[DYNAREC] Added DF opcodes
Diffstat (limited to 'src')
| -rwxr-xr-x | src/dynarec/arm64_emitter.h | 55 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_00.c | 4 | ||||
| -rw-r--r-- | src/dynarec/dynarec_arm64_df.c | 289 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_helper.h | 6 |
4 files changed, 342 insertions, 12 deletions
diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h index 8695771b..cb00c19e 100755 --- a/src/dynarec/arm64_emitter.h +++ b/src/dynarec/arm64_emitter.h @@ -611,10 +611,27 @@ #define MRS_nzvc(Rt) EMIT(MRS_gen(1, 1, 3, 4, 2, 0, Rt)) // MSR : to System register #define MSR_nzvc(Rt) EMIT(MRS_gen(0, 1, 3, 4, 2, 0, Rt)) -// mrs x0, fpcr : 1101010100 1 1 1 011 0100 0100 000 00000 o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0100(4) op2=2 +// mrs x0, fpcr : 1101010100 1 1 1 011 0100 0100 000 00000 o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0100(4) op2=0 #define MRS_fpcr(Rt) EMIT(MRS_gen(1, 1, 3, 4, 4, 0, Rt)) #define MSR_fpcr(Rt) EMIT(MRS_gen(0, 1, 3, 4, 4, 0, Rt)) - +// mrs x0, fpsr : 1101010100 1 1 1 011 0100 0100 001 00000 o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0100(4) op2=1 +#define MRS_fpsr(Rt) EMIT(MRS_gen(1, 1, 3, 4, 4, 1, Rt)) +#define MSR_fpsr(Rt) EMIT(MRS_gen(0, 1, 3, 4, 4, 1, Rt)) +// NEON Saturation Bit +#define FPSR_QC 27 +// NEON Input Denormal Cumulative +#define FPSR_IDC 7 +// NEON IneXact Cumulative +#define FPSR_IXC 4 +// NEON Underflow Cumulative +#define FPSR_UFC 3 +// NEON Overflow Cumulative +#define FPSR_OFC 2 +// NEON Divide by 0 Cumulative +#define FPSR_DZC 1 +// NEON Invalid Operation Cumulative +#define FPSR_IOC 0 + // FCSEL #define FCSEL_scalar(type, Rm, cond, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | (Rm)<<16 | (cond)<<12 | 0b11<<10 | (Rn)<<5 | (Rd)) #define FCSELS(Sd, Sn, Sm, cond) EMIT(FCSEL_scalar(0b00, Sm, cond, Sn, Sd)) @@ -634,6 +651,8 @@ #define VSTR64_U12(Dt, Rn, imm15) EMIT(VMEM_gen(0b11, 0b00, ((uint32_t)(imm15>>3))&0xfff, Rn, Dt)) // imm16 must be 4-aligned #define VSTR128_U12(Qt, Rn, imm16) EMIT(VMEM_gen(0b00, 0b10, ((uint32_t)((imm16)>>4))&0xfff, Rn, Qt)) +// (imm14) must be 1-aligned +#define VSTR16_U12(Ht, Rn, imm14) EMIT(VMEM_gen(0b01, 0b00, ((uint32_t)(imm14>>1))&0xfff, Rn, Ht)) #define VMEMUR_vector(size, opc, imm9, Rn, Rt) ((size)<<30 | 0b111<<27 | 1<<26 | (opc)<<22 | (imm9)<<12 | (Rn)<<5 | (Rt)) // signed offset, no alignement! @@ -1118,15 +1137,21 @@ #define VFRINTISQ(Vd,Vn) EMIT(FRINT_vector(1, 1, 1, 0, 1, Vn, Vd)) #define VFRINTIDQ(Vd,Vn) EMIT(FRINT_vector(1, 1, 1, 1, 1, Vn, Vd)) -#define FRINT_scalar(type, op, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | 0b0100<<17 | (op)<<15 | 0b10000<<10 | (Rn)<<5 | (Rd)) -#define FRINT32ZS(Sd, Sn) EMIT(FRINT_scalar(0b00, 0b00, Sn, Sd)) -#define FRINT32ZD(Dd, Dn) EMIT(FRINT_scalar(0b01, 0b00, Dn, Dd)) -#define FRINT32XS(Sd, Sn) EMIT(FRINT_scalar(0b00, 0b01, Sn, Sd)) -#define FRINT32XD(Dd, Dn) EMIT(FRINT_scalar(0b01, 0b01, Dn, Dd)) -#define FRINT64ZS(Sd, Sn) EMIT(FRINT_scalar(0b00, 0b10, Sn, Sd)) -#define FRINT64ZD(Dd, Dn) EMIT(FRINT_scalar(0b01, 0b10, Dn, Dd)) -#define FRINT64XS(Sd, Sn) EMIT(FRINT_scalar(0b00, 0b11, Sn, Sd)) -#define FRINT64XD(Dd, Dn) EMIT(FRINT_scalar(0b01, 0b11, Dn, Dd)) +#define FRINTxx_scalar(type, op, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | 0b0100<<17 | (op)<<15 | 0b10000<<10 | (Rn)<<5 | (Rd)) +#define FRINT32ZS(Sd, Sn) EMIT(FRINTxx_scalar(0b00, 0b00, Sn, Sd)) +#define FRINT32ZD(Dd, Dn) EMIT(FRINTxx_scalar(0b01, 0b00, Dn, Dd)) +#define FRINT32XS(Sd, Sn) EMIT(FRINTxx_scalar(0b00, 0b01, Sn, Sd)) +#define FRINT32XD(Dd, Dn) EMIT(FRINTxx_scalar(0b01, 0b01, Dn, Dd)) +#define FRINT64ZS(Sd, Sn) EMIT(FRINTxx_scalar(0b00, 0b10, Sn, Sd)) +#define FRINT64ZD(Dd, Dn) EMIT(FRINTxx_scalar(0b01, 0b10, Dn, Dd)) +#define FRINT64XS(Sd, Sn) EMIT(FRINTxx_scalar(0b00, 0b11, Sn, Sd)) +#define FRINT64XD(Dd, Dn) EMIT(FRINTxx_scalar(0b01, 0b11, Dn, Dd)) + +#define FRINT_scalar(type, rmode, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | 0b001<<18 | (rmode)<<15 | 0b10000<<10 | (Rn)<<5 | (Rd)) +#define FRINTZS(Sd, Sn) EMIT(FRINT_scalar(0b00, 0b011, Sn, Sd)) +#define FRINTZD(Sd, Sn) EMIT(FRINT_scalar(0b01, 0b011, Sn, Sd)) +#define FRINTXS(Sd, Sn) EMIT(FRINT_scalar(0b00, 0b110, Sn, Sd)) +#define FRINTXD(Sd, Sn) EMIT(FRINT_scalar(0b01, 0b110, Sn, Sd)) // FMAX / FMIN #define FMINMAX_vector(Q, U, o1, sz, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (o1)<<23 | (sz)<<22 | 0b1<<21 | (Rm)<<16 | 0b11110<<11 | 1<<10 | (Rn)<<5 | (Rd)) @@ -1217,6 +1242,14 @@ #define VTRNQ2_8(Vd, Vn, Vm) EMIT(TRN_gen(1, 0b00, Vm, 1, Vn, Vd)) // QXTN / QXTN2 +#define QXTN_scalar(U, size, Rn, Rd) (0b01<<30 | (U)<<29 | 0b11110<<24 | (size)<<22 | 0b10000<<17 | 0b10100<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +// Signed saturating extract Narrow, from D to S +#define SQXTN_S_D(Sd, Dn) EMIT(QXTN_scalar(0, 0b10, Dn, Sd)) +// Signed saturating extract Narrow, from S to H +#define SQXTN_H_S(Hd, Sn) EMIT(QXTN_scalar(0, 0b01, Sn, Hd)) +// Signed saturating extract Narrow, from H to B +#define SQXTN_B_H(Bd, Hn) EMIT(QXTN_scalar(0, 0b00, Hn, Bd)) + #define QXTN_vector(Q, U, size, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 0b10000<<17 | 0b10100<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) // Signed saturating extract Narrow, takes Rn element and reduce 64->32 with Signed saturation and fit lower part of Rd #define SQXTN_32(Rd, Rn) EMIT(QXTN_vector(0, 0, 0b10, Rn, Rd)) diff --git a/src/dynarec/dynarec_arm64_00.c b/src/dynarec/dynarec_arm64_00.c index 1b0c573e..783db2c3 100755 --- a/src/dynarec/dynarec_arm64_00.c +++ b/src/dynarec/dynarec_arm64_00.c @@ -1844,6 +1844,10 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin addr = dynarec64_DD(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); break; + case 0xDF: + addr = dynarec64_DF(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); + break; + case 0xE8: INST_NAME("CALL Id"); i32 = F32S; diff --git a/src/dynarec/dynarec_arm64_df.c b/src/dynarec/dynarec_arm64_df.c new file mode 100644 index 00000000..981fe830 --- /dev/null +++ b/src/dynarec/dynarec_arm64_df.c @@ -0,0 +1,289 @@ +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <pthread.h> +#include <errno.h> + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_arm64.h" +#include "dynarec_arm64_private.h" +#include "arm64_printer.h" +#include "emu/x87emu_private.h" + +#include "dynarec_arm64_helper.h" +#include "dynarec_arm64_functions.h" + + +uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) +{ + uint8_t nextop = F8; + uint8_t ed, wback, u8; + int fixedaddress; + int v1, v2; + int j32; + int s0; + + MAYUSE(s0); + MAYUSE(v2); + MAYUSE(v1); + MAYUSE(j32); + + switch(nextop) { + case 0xC0: + case 0xC1: + case 0xC2: + case 0xC3: + case 0xC4: + case 0xC5: + case 0xC6: + case 0xC7: + INST_NAME("FFREEP STx"); + // not handling Tag... + x87_do_pop(dyn, ninst); + break; + + case 0xE0: + INST_NAME("FNSTSW AX"); + LDRw_U12(x2, xEmu, offsetof(x64emu_t, top)); + LDRH_U12(x1, xEmu, offsetof(x64emu_t, sw)); + BFIw(x1, x2, 11, 3); // inject top + BFIw(xRAX, x1, 0, 16); + break; + case 0xE8: + case 0xE9: + case 0xEA: + case 0xEB: + case 0xEC: + case 0xED: + case 0xEE: + case 0xEF: + INST_NAME("FUCOMIP ST0, STx"); + SETFLAGS(X_ALL, SF_SET); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); + FCMPD(v1, v2); + FCOMI(x1, x2); + x87_do_pop(dyn, ninst); + break; + case 0xF0: + case 0xF1: + case 0xF2: + case 0xF3: + case 0xF4: + case 0xF5: + case 0xF6: + case 0xF7: + INST_NAME("FCOMIP ST0, STx"); + SETFLAGS(X_ALL, SF_SET); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7); + FCMPD(v1, v2); + FCOMI(x1, x2); + x87_do_pop(dyn, ninst); + break; + + case 0xC8: + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: + case 0xD0: + case 0xD1: + case 0xD2: + case 0xD3: + case 0xD4: + case 0xD5: + case 0xD6: + case 0xD7: + case 0xD8: + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: + case 0xE1: + case 0xE2: + case 0xE3: + case 0xE4: + case 0xE5: + case 0xE6: + case 0xE7: + case 0xF8: + case 0xF9: + case 0xFA: + case 0xFB: + case 0xFC: + case 0xFD: + case 0xFE: + case 0xFF: + DEFAULT; + break; + + default: + switch((nextop>>3)&7) { + case 0: + INST_NAME("FILD ST0, Ew"); + v1 = x87_do_push(dyn, ninst); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<1, 1, rex, 0, 0); + LDRSHw_U12(x1, wback, fixedaddress); + SCVTFDw(v1, x1); + break; + case 1: + INST_NAME("FISTTP Ew, ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<1, 1, rex, 0, 0); + ed = x1; + s0 = fpu_get_scratch(dyn); + #if 0 + // this version needs ARM v8.5, //TODO: add detection of this extensio to use it + FRINT32ZD(s0, v1); + // no saturation instruction on Arm, so using NEON + VFCVTZSd(s0, s0); + SQXTN_S_D(s0, s0); + SQXTN_H_S(s0, s0); + VSTR16_U12(s0, wback, fixedaddress); + #else + MSR_fpsr(x5); + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MRS_fpsr(x5); + VFCVTZSd(s0, v1); + SQXTN_S_D(s0, s0); + SQXTN_H_S(s0, s0); + VSTR16_U12(s0, wback, fixedaddress); + MSR_fpsr(x5); // get back FPSR to check the IOC bit + TBZ_NEXT(x5, FPSR_IOC); + MOV32w(x5, 0x8000); + STRH_U12(x5, wback, fixedaddress); + #endif + x87_do_pop(dyn, ninst); + break; + case 2: + INST_NAME("FIST Ew, ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + u8 = x87_setround(dyn, ninst, x1, x2, x4); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<1, 1, rex, 0, 0); + ed = x1; + s0 = fpu_get_scratch(dyn); + #if 0 + FRINT32XD(s0, v1); + // no saturation instruction on Arm, so using NEON + VFCVTZSd(s0, s0); + SQXTN_S_D(s0, s0); + SQXTN_H_S(s0, s0); + VSTR16_U12(s0, wback, fixedaddress); + #else + MSR_fpsr(x5); + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MRS_fpsr(x5); + FRINTXD(s0, v1); + VFCVTZSd(s0, s0); + SQXTN_S_D(s0, s0); + SQXTN_H_S(s0, s0); + VSTR16_U12(s0, wback, fixedaddress); + MSR_fpsr(x5); // get back FPSR to check the IOC bit + TBZ_NEXT(x5, FPSR_IOC); + MOV32w(x5, 0x8000); + STRH_U12(x5, wback, fixedaddress); + #endif + x87_restoreround(dyn, ninst, u8); + break; + case 3: + INST_NAME("FISTP Ew, ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + u8 = x87_setround(dyn, ninst, x1, x2, x4); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<1, 1, rex, 0, 0); + ed = x1; + s0 = fpu_get_scratch(dyn); + #if 0 + FRINT32XD(s0, v1); + // no saturation instruction on Arm, so using NEON + VFCVTZSd(s0, s0); + SQXTN_S_D(s0, s0); + SQXTN_H_S(s0, s0); + VSTR16_U12(s0, wback, fixedaddress); + #else + MSR_fpsr(x5); + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MRS_fpsr(x5); + FRINTXD(s0, v1); + VFCVTZSd(s0, s0); + SQXTN_S_D(s0, s0); + SQXTN_H_S(s0, s0); + VSTR16_U12(s0, wback, fixedaddress); + MSR_fpsr(x5); // get back FPSR to check the IOC bit + TBZ_NEXT(x5, FPSR_IOC); + MOV32w(x5, 0x8000); + STRH_U12(x5, wback, fixedaddress); + #endif + x87_do_pop(dyn, ninst); + x87_restoreround(dyn, ninst, u8); + break; + case 4: + INST_NAME("FBLD ST0, tbytes"); + x87_do_push_empty(dyn, ninst, x1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + if(ed!=x1) {MOVx_REG(x1, ed);} + CALL(fpu_fbld, -1); + break; + case 5: + INST_NAME("FILD ST0, i64"); + v1 = x87_do_push(dyn, ninst); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); + LDRx_U12(x1, wback, fixedaddress); + SCVTFDx(v1, x1); + break; + case 6: + INST_NAME("FBSTP tbytes, ST0"); + x87_forget(dyn, ninst, x1, x2, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + if(ed!=x1) {MOVx_REG(x1, ed);} + CALL(fpu_fbst, -1); + x87_do_pop(dyn, ninst); + break; + case 7: + INST_NAME("FISTP i64, ST0"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0); + u8 = x87_setround(dyn, ninst, x1, x2, x4); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); + ed = x1; + s0 = fpu_get_scratch(dyn); + #if 0 + FRINT64XD(s0, v1); + VFCVTZSd(s0, s0); + VSTR64_U12(s0, wback, fixedaddress); + #else + MSR_fpsr(x5); + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MRS_fpsr(x5); + FRINTXD(s0, v1); + VFCVTZSd(s0, s0); + VSTR64_U12(s0, wback, fixedaddress); + MSR_fpsr(x5); // get back FPSR to check the IOC bit + TBZ_NEXT(x5, FPSR_IOC); + MOV64x(x5, 0x8000000000000000LL); + STRx_U12(x5, wback, fixedaddress); + #endif + x87_restoreround(dyn, ninst, u8); + break; + default: + DEFAULT; + } + } + return addr; +} + diff --git a/src/dynarec/dynarec_arm64_helper.h b/src/dynarec/dynarec_arm64_helper.h index 62539654..61a115c5 100755 --- a/src/dynarec/dynarec_arm64_helper.h +++ b/src/dynarec/dynarec_arm64_helper.h @@ -328,6 +328,10 @@ #define CBZx_NEXT(reg) \ j32 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->arm_size)):0; \ CBZx(reg, j32) +// Test bit N of A and branch to next instruction if not set +#define TBZ_NEXT(A, N) \ + j32 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->arm_size)):0; \ + TBZ(A, N, j32) // Branch to MARKSEG if cond (use j32) #define B_MARKSEG(cond) \ j32 = GETMARKSEG-(dyn->arm_size); \ @@ -845,7 +849,7 @@ uintptr_t dynarec64_DB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin //uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); uintptr_t dynarec64_DD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); //uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); -//uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog); |