diff options
Diffstat (limited to 'src')
23 files changed, 594 insertions, 709 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index a119f14c..9f573f8f 100755 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -1231,7 +1231,9 @@ #define SCVTFDx(Dd, Xn) EMIT(SCVTF_scalar(1, 0b01, 0b00, 0b010, Xn, Dd)) #define SCVTF_vector_scalar(U, sz, Rn, Rd) (1<<30 | (U)<<29 | 0b11110<<24 | (sz)<<22 | 0b10000<<17 | 0b11101<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +// Convert Vn from i32 to Vd float #define SCVTFSS(Vd, Vn) EMIT(SCVTF_vector_scalar(0, 0, Vn, Vd)) +// Convert Vn from i64 to Vd double #define SCVTFDD(Vd, Vn) EMIT(SCVTF_vector_scalar(0, 1, Vn, Vd)) #define SCVTF_vector(Q, U, sz, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (sz)<<22 | 0b10000<<17 | 0b11101<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c index 77f0660e..4f327f5b 100755 --- a/src/dynarec/arm64/dynarec_arm64_00.c +++ b/src/dynarec/arm64/dynarec_arm64_00.c @@ -108,7 +108,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin nextop = F8; GETEB(x1, 0); GETGB(x2); - emit_or8(dyn, ninst, x1, x2, x4, x2); + emit_or8(dyn, ninst, x1, x2, x4, x5); EBBACK; break; case 0x09: @@ -562,7 +562,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin i64 = F32S; if(PK(0)==0xC3) { MESSAGE(LOG_DUMP, "PUSH then RET, using indirect\n"); - TABLE64(x3, ip+1); + TABLE64(x3, addr-4); LDRSW_U12(x1, x3, 0); PUSH1(x1); } else { @@ -869,7 +869,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MARKLOCK; // do the swap with exclusive locking LDAXRB(x1, ed); - // do the swap 14 -> strb(ed), 1 -> gd + // do the swap 4 -> strb(ed), 1 -> gd STLXRB(x3, x4, ed); CBNZx_MARKLOCK(x3); SMDMB(); @@ -1040,7 +1040,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0x8F: INST_NAME("POP Ed"); nextop = F8; - if((nextop&0xC0)==0xC0) { + if(MODREG) { POP1(xRAX+(nextop&7)+(rex.b<<3)); } else { POP1(x2); // so this can handle POP [ESP] and maybe some variant too @@ -2303,7 +2303,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 2: INST_NAME("NOT Ed"); - GETED(4); + GETED(0); MVNxw_REG(ed, ed); WBACK; break; diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index ed2a120b..fe039dd7 100755 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -24,47 +24,6 @@ #include "dynarec_arm64_functions.h" #include "dynarec_arm64_helper.h" -#define GETG \ - gd = ((nextop&0x38)>>3)+(rex.r<<3) \ - -#define GETGX(a, w) \ - gd = ((nextop&0x38)>>3)+(rex.r<<3); \ - a = sse_get_reg(dyn, ninst, x1, gd, w) - -#define GETGX_empty(a) \ - gd = ((nextop&0x38)>>3)+(rex.r<<3); \ - a = sse_get_reg_empty(dyn, ninst, x1, gd) - -#define GETEX(a, w, D) \ - if(MODREG) { \ - a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w); \ - } else { \ - SMREAD(); \ - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, D); \ - a = fpu_get_scratch(dyn); \ - VLDR128_U12(a, ed, fixedaddress); \ - } - -#define GETGM(a) \ - gd = ((nextop&0x38)>>3); \ - a = mmx_get_reg(dyn, ninst, x1, x2, x3, gd) - -#define GETEM(a, D) \ - if(MODREG) { \ - a = mmx_get_reg(dyn, ninst, x1, x2, x3, (nextop&7)); \ - } else { \ - SMREAD(); \ - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, D); \ - a = fpu_get_scratch(dyn); \ - VLDR64_U12(a, ed, fixedaddress); \ - } - -#define PUTEM(a) \ - if(!MODREG) { \ - VSTR64_U12(a, ed, fixedaddress); \ - SMWRITE2(); \ - } - uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) { (void)ip; (void)rep; (void)need_epilog; @@ -107,6 +66,10 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GETIP(ip); STORE_XEMU_CALL(xRIP); CALL(arm_ud, -1); + LOAD_XEMU_CALL(xRIP); + jump_to_epilog(dyn, 0, xRIP, ninst); + *need_epilog = 0; + *ok = 0; break; case 0x05: @@ -174,9 +137,9 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VMOVQ(v0, v1); } else { v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0); VLDR128_U12(v0, ed, fixedaddress); // no alignment issue with ARMv8 NEON :) - SMWRITE2(); } break; case 0x11: @@ -435,10 +398,10 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(MODREG) { // reg <= reg REVxw(xRAX+(nextop&7)+(rex.b<<3), gd); } else { // mem <= reg - SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, 0); REVxw(x1, gd); STRxw_U12(x1, ed, fixedaddress); + SMWRITE2(); } break; @@ -493,7 +456,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin nextop = F8; GETGD; MOV32w(gd, 0); - if((nextop&0xC0)==0xC0) { + if(MODREG) { // EX is an xmm reg GETEX(q0, 0, 0); VMOVQDto(x1, q0, 0); @@ -721,37 +684,36 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("PUNPCKHBW Gm,Em"); nextop = F8; GETGM(q0); - GETEM(q1, 1); + GETEM(q1, 0); VZIP2_8(q0, q0, q1); break; case 0x69: INST_NAME("PUNPCKHWD Gm,Em"); nextop = F8; GETGM(q0); - GETEM(q1, 1); + GETEM(q1, 0); VZIP2_16(q0, q0, q1); break; case 0x6A: INST_NAME("PUNPCKHDQ Gm,Em"); nextop = F8; GETGM(q0); - GETEM(q1, 1); + GETEM(q1, 0); VZIP2_32(q0, q0, q1); break; case 0x6B: INST_NAME("PACKSSDW Gm,Em"); nextop = F8; GETGM(v0); + q0 = fpu_get_scratch(dyn); + VMOVeD(q0, 0, v0, 0); if(MODREG) { GETEM(v1, 0); - q0 = fpu_get_scratch(dyn); VMOVeD(q0, 1, v1, 0); } else { - q0 = fpu_get_scratch(dyn); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0); VLD1_64(q0, 1, ed); } - VMOVeD(q0, 0, v0, 0); SQXTN_16(v0, q0); break; @@ -765,6 +727,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(rex.w) { FMOVDx(v0, ed); } else { + VEOR(v0, v0, v0); FMOVSw(v0, ed); } } else { @@ -885,9 +848,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else if(u8) { VSHR_16(q0, q0, u8); } - if(!MODREG) { - VSTR64_U12(q0, ed, fixedaddress); - } + PUTEM(q0); } break; case 4: @@ -898,9 +859,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(u8) { VSSHR_16(q0, q0, u8); } - if(!MODREG) { - VSTR64_U12(q0, ed, fixedaddress); - } + PUTEM(q0); break; case 6: INST_NAME("PSLLW Em, Ib"); @@ -912,9 +871,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { VSHL_16(q0, q0, u8); } - if(!MODREG) { - VSTR64_U12(q0, ed, fixedaddress); - } + PUTEM(q0); } break; default: @@ -935,9 +892,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else if(u8) { VSHR_32(d0, d0, u8); } - if(!MODREG) { - VSTR64_U12(d0, ed, fixedaddress); - } + PUTEM(d0); } break; case 4: @@ -948,9 +903,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(u8) { VSSHR_32(d0, d0, u8); } - if(!MODREG) { - VSTR64_U12(d0, ed, fixedaddress); - } + PUTEM(d0); break; case 6: INST_NAME("PSLLD Em, Ib"); @@ -962,9 +915,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { VSHL_32(d0, d0, u8); } - if(!MODREG) { - VSTR64_U12(d0, ed, fixedaddress); - } + PUTEM(d0); } break; default: @@ -1040,9 +991,9 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if((nextop&0xC0)==0xC0) { ed = xRAX + (nextop&7) + (rex.b<<3); if(rex.w) { - VMOVQDto(ed, v0, 0); + FMOVxD(ed, v0); } else { - VMOVSto(ed, v0, 0); + FMOVwS(ed, v0); MOVxw_REG(ed, ed); } } else { @@ -1852,10 +1803,10 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 0xE7: - INST_NAME("MOVNTQ Em, Gm"); // Non Temporal par not handled for now + INST_NAME("MOVNTQ Em, Gm"); nextop = F8; gd = (nextop&0x38)>>3; - if((nextop&0xC0)==0xC0) { + if(MODREG) { DEFAULT; } else { v0 = mmx_get_reg(dyn, ninst, x1, x2, x3, gd); @@ -1905,7 +1856,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin nextop = F8; gd = ((nextop&0x38)>>3); if(MODREG && ((nextop&7))==gd) { - // special case for PXOR Gx, Gx + // special case for PXOR Gm, Gm q0 = mmx_get_reg_empty(dyn, ninst, x1, x2, x3, gd); VEOR(q0, q0, q0); } else { diff --git a/src/dynarec/arm64/dynarec_arm64_64.c b/src/dynarec/arm64/dynarec_arm64_64.c index b279b9c8..c6dfb44d 100644 --- a/src/dynarec/arm64/dynarec_arm64_64.c +++ b/src/dynarec/arm64/dynarec_arm64_64.c @@ -103,8 +103,9 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin nextop = F8; GETG; if(MODREG) { + ed = (nextop&7)+ (rex.b<<3); v0 = sse_get_reg(dyn, ninst, x1, gd, 1); - q0 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3), 0); + q0 = sse_get_reg(dyn, ninst, x1, ed, 0); VMOVeS(v0, 0, q0, 0); } else { grab_segdata(dyn, addr, ninst, x4, seg); @@ -161,7 +162,8 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GETG; v0 = sse_get_reg(dyn, ninst, x1, gd, 0); if(MODREG) { - q0 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3), 1); + ed = (nextop&7)+ (rex.b<<3); + q0 = sse_get_reg(dyn, ninst, x1, ed, 1); VMOVeS(q0, 0, v0, 0); } else { grab_segdata(dyn, addr, ninst, x4, seg); @@ -201,57 +203,69 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0xAF: - INST_NAME("IMUL Gd, Ed"); - SETFLAGS(X_ALL, SF_PENDING); - nextop = F8; - grab_segdata(dyn, addr, ninst, x4, seg); - GETGD; - GETEDO(x4, 0); - if(rex.w) { - // 64bits imul - UFLAG_IF { - SMULH(x3, gd, ed); - MULx(gd, gd, ed); - UFLAG_OP1(x3); - UFLAG_RES(gd); - UFLAG_DF(x3, d_imul64); - } else { - MULxw(gd, gd, ed); - } - } else { - // 32bits imul - UFLAG_IF { - SMULL(gd, gd, ed); - UFLAG_RES(gd); - LSRx(x3, gd, 32); - UFLAG_OP1(x3); - UFLAG_DF(x3, d_imul32); - MOVw_REG(gd, gd); - } else { - MULxw(gd, gd, ed); - } + switch(rep) { + case 0: + INST_NAME("IMUL Gd, Ed"); + SETFLAGS(X_ALL, SF_PENDING); + nextop = F8; + grab_segdata(dyn, addr, ninst, x4, seg); + GETGD; + GETEDO(x4, 0); + if(rex.w) { + // 64bits imul + UFLAG_IF { + SMULH(x3, gd, ed); + MULx(gd, gd, ed); + UFLAG_OP1(x3); + UFLAG_RES(gd); + UFLAG_DF(x3, d_imul64); + } else { + MULxw(gd, gd, ed); + } + } else { + // 32bits imul + UFLAG_IF { + SMULL(gd, gd, ed); + UFLAG_RES(gd); + LSRx(x3, gd, 32); + UFLAG_OP1(x3); + UFLAG_DF(x3, d_imul32); + MOVw_REG(gd, gd); + } else { + MULxw(gd, gd, ed); + } + } + break; + default: + DEFAULT; } break; case 0xB6: - INST_NAME("MOVZX Gd, Eb"); - nextop = F8; - grab_segdata(dyn, addr, ninst, x4, seg); - GETGD; - if(MODREG) { - if(rex.rex) { - eb1 = xRAX+(nextop&7)+(rex.b<<3); - eb2 = 0; \ - } else { - ed = (nextop&7); - eb1 = xRAX+(ed&3); // Ax, Cx, Dx or Bx - eb2 = (ed&4)>>2; // L or H - } - UBFXxw(gd, eb1, eb2*8, 8); - } else { - SMREAD(); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, NULL, 0, 0); - LDRB_REG(gd, ed, x4); + switch(rep) { + case 0: + INST_NAME("MOVZX Gd, Eb"); + nextop = F8; + grab_segdata(dyn, addr, ninst, x4, seg); + GETGD; + if(MODREG) { + if(rex.rex) { + eb1 = xRAX+(nextop&7)+(rex.b<<3); + eb2 = 0; \ + } else { + ed = (nextop&7); + eb1 = xRAX+(ed&3); // Ax, Cx, Dx or Bx + eb2 = (ed&4)>>2; // L or H + } + UBFXxw(gd, eb1, eb2*8, 8); + } else { + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, NULL, 0, 0); + LDRB_REG(gd, ed, x4); + } + break; + default: + DEFAULT; } break; @@ -326,7 +340,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 0x66: - addr = dynarec64_6664(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); + addr = dynarec64_6664(dyn, addr, ip, ninst, rex, seg, ok, need_epilog); break; case 0x80: @@ -814,7 +828,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 2: INST_NAME("NOT Ed"); - GETEDO(x6, 4); + GETEDO(x6, 0); MVNxw_REG(ed, ed); WBACKO(x6); break; diff --git a/src/dynarec/arm64/dynarec_arm64_66.c b/src/dynarec/arm64/dynarec_arm64_66.c index 493544ea..951518b5 100755 --- a/src/dynarec/arm64/dynarec_arm64_66.c +++ b/src/dynarec/arm64/dynarec_arm64_66.c @@ -295,9 +295,11 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 0x64: - addr = dynarec64_6664(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); + addr = dynarec64_6664(dyn, addr, ip, ninst, rex, _FS, ok, need_epilog); + break; + case 0x65: + addr = dynarec64_6664(dyn, addr, ip, ninst, rex, _GS, ok, need_epilog); break; - case 0x66: addr = dynarec64_66(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); break; diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index 5ff39e2b..814c2552 100755 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -22,27 +22,6 @@ #include "dynarec_arm64_functions.h" #include "dynarec_arm64_helper.h" -// Get EX as a quad -#define GETEX(a, w, D) \ - if(MODREG) { \ - a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w); \ - } else { \ - SMREAD(); \ - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, D); \ - a = fpu_get_scratch(dyn); \ - VLDR128_U12(a, ed, fixedaddress); \ - } - -#define GETG gd = ((nextop&0x38)>>3)+(rex.r<<3) - -#define GETGX(a, w) \ - gd = ((nextop&0x38)>>3)+(rex.r<<3); \ - a = sse_get_reg(dyn, ninst, x1, gd, w) - -#define GETGX_empty(a) \ - gd = ((nextop&0x38)>>3)+(rex.r<<3); \ - a = sse_get_reg_empty(dyn, ninst, x1, gd) - uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) { (void)ip; (void)rep; (void)need_epilog; @@ -684,9 +663,9 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n INST_NAME("PINSRB Gx, ED, Ib"); nextop = F8; GETGX(q0, 1); - GETEB(x1, 1); + GETED(1); u8 = F8; - VMOVQBfrom(q0, (u8&15), x1); + VMOVQBfrom(q0, (u8&15), ed); break; case 0x22: @@ -1215,10 +1194,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } else if(u8) { VSHRQ_16(q0, q0, u8); } - if(!MODREG) { - VSTR128_U12(q0, ed, fixedaddress); - SMWRITE2(); - } + PUTEX(q0); } break; case 4: @@ -1229,10 +1205,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n if(u8) { VSSHRQ_16(q0, q0, u8); } - if(!MODREG) { - VSTR128_U12(q0, ed, fixedaddress); - SMWRITE2(); - } + PUTEX(q0); break; case 6: INST_NAME("PSLLW Ex, Ib"); @@ -1244,10 +1217,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } else { VSHLQ_16(q0, q0, u8); } - if(!MODREG) { - VSTR128_U12(q0, ed, fixedaddress); - SMWRITE2(); - } + PUTEX(q0); } break; default: @@ -1268,10 +1238,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } else if(u8) { VSHRQ_32(q0, q0, u8); } - if(!MODREG) { - VSTR128_U12(q0, ed, fixedaddress); - SMWRITE2(); - } + PUTEX(q0); } break; case 4: @@ -1282,10 +1249,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n if(u8) { VSSHRQ_32(q0, q0, u8); } - if(!MODREG) { - VSTR128_U12(q0, ed, fixedaddress); - SMWRITE2(); - } + PUTEX(q0); break; case 6: INST_NAME("PSLLD Ex, Ib"); @@ -1297,10 +1261,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } else { VSHLQ_32(q0, q0, u8); } - if(!MODREG) { - VSTR128_U12(q0, ed, fixedaddress); - SMWRITE2(); - } + PUTEX(q0); } break; default: @@ -1320,10 +1281,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } else if(u8) { VSHRQ_64(q0, q0, u8); } - if(!MODREG) { - VSTR128_U12(q0, ed, fixedaddress); - SMWRITE2(); - } + PUTEX(q0); } break; case 3: @@ -1338,10 +1296,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n VEORQ(q1, q1, q1); VEXTQ_8(q0, q0, q1, u8); } - if(!MODREG) { - VSTR128_U12(q0, ed, fixedaddress); - SMWRITE2(); - } + PUTEX(q0); } break; case 6: @@ -1354,10 +1309,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } else { VSHLQ_64(q0, q0, u8); } - if(!MODREG) { - VSTR128_U12(q0, ed, fixedaddress); - SMWRITE2(); - } + PUTEX(q0); } break; case 7: @@ -1372,10 +1324,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n VEORQ(q1, q1, q1); VEXTQ_8(q0, q1, q0, 16-u8); } - if(!MODREG) { - VSTR128_U12(q0, ed, fixedaddress); - SMWRITE2(); - } + PUTEX(q0); } break; default: diff --git a/src/dynarec/arm64/dynarec_arm64_6664.c b/src/dynarec/arm64/dynarec_arm64_6664.c index 362dd5de..92abeac6 100644 --- a/src/dynarec/arm64/dynarec_arm64_6664.c +++ b/src/dynarec/arm64/dynarec_arm64_6664.c @@ -24,9 +24,9 @@ #define GETG gd = ((nextop&0x38)>>3)+(rex.r<<3) -uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) +uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int seg, int* ok, int* need_epilog) { - (void)ip; (void)rep; (void)need_epilog; + (void)ip; (void)need_epilog; uint8_t opcode = F8; uint8_t nextop; @@ -64,7 +64,7 @@ uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n if(MODREG) { v1 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3), 0); } else { - grab_segdata(dyn, addr, ninst, x4, _FS); + grab_segdata(dyn, addr, ninst, x4, seg); SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0); v1 = fpu_get_scratch(dyn); \ @@ -82,7 +82,7 @@ uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n v1 = sse_get_reg_empty(dyn, ninst, x1, (nextop&7) + (rex.b<<3)); FMOVD(v1, v0); } else { - grab_segdata(dyn, addr, ninst, x4, _FS); + grab_segdata(dyn, addr, ninst, x4, seg); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0); VSTR64_REG(v0, ed, x4); SMWRITE(); @@ -108,7 +108,7 @@ uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } } } else { - grab_segdata(dyn, addr, ninst, x4, _FS); + grab_segdata(dyn, addr, ninst, x4, seg); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, NULL, 0, 0); if(rex.w) { STRx_REG(gd, ed, x4); @@ -133,7 +133,7 @@ uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } } } else { // mem <= reg - grab_segdata(dyn, addr, ninst, x4, _FS); + grab_segdata(dyn, addr, ninst, x4, seg); SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, NULL, 0, 0); if(rex.w) { diff --git a/src/dynarec/arm64/dynarec_arm64_66f0.c b/src/dynarec/arm64/dynarec_arm64_66f0.c index bfdf24ee..2a436ea1 100644 --- a/src/dynarec/arm64/dynarec_arm64_66f0.c +++ b/src/dynarec/arm64/dynarec_arm64_66f0.c @@ -89,7 +89,6 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n ed = xRAX+(nextop&7)+(rex.b<<3); wback = 0; UXTHw(x1, ed); - UFLAG_IF {emit_cmp16(dyn, ninst, x6, x1, x3, x4, x5);} CMPSxw_REG(x6, x1); B_MARK(cNE); BFIx(ed, gd, 0, 16); @@ -120,7 +119,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } MARK; // Common part (and fallback for EAX != Ed) - UFLAG_IF {emit_cmp32(dyn, ninst, rex, x6, x1, x3, x4, x5);} + UFLAG_IF {emit_cmp16(dyn, ninst, x6, x1, x3, x4, x5);} BFIx(xRAX, x1, 0, 16); SMDMB(); break; @@ -347,9 +346,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n if(opcode==0x81) i32 = F16S; else i32 = F8S; if(i32) { MOV32w(x5, i32); - UXTHw(x6, ed); emit_cmp16(dyn, ninst, x6, x5, x3, x4, x6); - BFIx(ed, x6, 0, 16); } else { emit_cmp16_0(dyn, ninst, ed, x3, x4); } diff --git a/src/dynarec/arm64/dynarec_arm64_67.c b/src/dynarec/arm64/dynarec_arm64_67.c index 094f68e7..a68cc363 100755 --- a/src/dynarec/arm64/dynarec_arm64_67.c +++ b/src/dynarec/arm64/dynarec_arm64_67.c @@ -175,53 +175,67 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0x2E: // no special check... case 0x2F: - if(rep) { - DEFAULT; - } else { - if(opcode==0x2F) {INST_NAME("COMISS Gx, Ex");} else {INST_NAME("UCOMISS Gx, Ex");} - SETFLAGS(X_ALL, SF_SET); - nextop = F8; - GETGX(v0, 0); - if(MODREG) { - s0 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3), 0); - } else { - s0 = fpu_get_scratch(dyn); - SMREAD(); - addr = geted32(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0); - VLDR32_U12(s0, ed, fixedaddress); - } - FCMPS(v0, s0); - FCOMI(x1, x2); + switch(rep) { + case 0: + if(opcode==0x2F) {INST_NAME("COMISS Gx, Ex");} else {INST_NAME("UCOMISS Gx, Ex");} + SETFLAGS(X_ALL, SF_SET); + nextop = F8; + GETGX(v0, 0); + if(MODREG) { + s0 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3), 0); + } else { + s0 = fpu_get_scratch(dyn); + SMREAD(); + addr = geted32(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0); + VLDR32_U12(s0, ed, fixedaddress); + } + FCMPS(v0, s0); + FCOMI(x1, x2); + break; + default: + DEFAULT; } break; case 0x6F: - INST_NAME("MOVQ Gm, Em"); - nextop = F8; - GETGm; - if(MODREG) { - v1 = mmx_get_reg(dyn, ninst, x1, x2, x3, nextop&7); // no rex.b on MMX - v0 = mmx_get_reg_empty(dyn, ninst, x1, x2, x3, gd); - VMOV(v0, v1); - } else { - v0 = mmx_get_reg_empty(dyn, ninst, x1, x2, x3, gd); - SMREAD(); - addr = geted32(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); - VLDR64_U12(v0, ed, fixedaddress); + switch(rep) { + case 0: + INST_NAME("MOVQ Gm, Em"); + nextop = F8; + GETGm; + if(MODREG) { + v1 = mmx_get_reg(dyn, ninst, x1, x2, x3, nextop&7); // no rex.b on MMX + v0 = mmx_get_reg_empty(dyn, ninst, x1, x2, x3, gd); + VMOV(v0, v1); + } else { + v0 = mmx_get_reg_empty(dyn, ninst, x1, x2, x3, gd); + SMREAD(); + addr = geted32(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); + VLDR64_U12(v0, ed, fixedaddress); + } + break; + default: + DEFAULT; } break; case 0x7F: - INST_NAME("MOVQ Em, Gm"); - nextop = F8; - GETGM(v0); - if(MODREG) { - v1 = mmx_get_reg_empty(dyn, ninst, x1, x2, x3, nextop&7); - VMOV(v1, v0); - } else { - addr = geted32(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); - VSTR64_U12(v0, ed, fixedaddress); - SMWRITE(); + switch(rep) { + case 0: + INST_NAME("MOVQ Em, Gm"); + nextop = F8; + GETGM(v0); + if(MODREG) { + v1 = mmx_get_reg_empty(dyn, ninst, x1, x2, x3, nextop&7); + VMOV(v1, v0); + } else { + addr = geted32(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); + VSTR64_U12(v0, ed, fixedaddress); + SMWRITE(); + } + break; + default: + DEFAULT; } break; diff --git a/src/dynarec/arm64/dynarec_arm64_d9.c b/src/dynarec/arm64/dynarec_arm64_d9.c index ce101f46..25587a9c 100644 --- a/src/dynarec/arm64/dynarec_arm64_d9.c +++ b/src/dynarec/arm64/dynarec_arm64_d9.c @@ -281,7 +281,11 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin #else v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); u8 = x87_setround(dyn, ninst, x1, x2, x3); - FRINTID(v1, v1); + if(ST_IS_F(0)) { + FRINTIS(v1, v1); + } else { + FRINTID(v1, v1); + } x87_restoreround(dyn, ninst, u8); #endif break; @@ -324,14 +328,10 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0: INST_NAME("FLD ST0, float[ED]"); v1 = x87_do_push(dyn, ninst, x1, box64_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F); - if(ST_IS_F(0)) - s0 = v1; - else - s0 = fpu_get_scratch(dyn); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0); - VLDR32_U12(s0, ed, fixedaddress); + VLDR32_U12(v1, ed, fixedaddress); if(!ST_IS_F(0)) { - FCVT_D_S(v1, s0); + FCVT_D_S(v1, v1); } break; case 2: @@ -349,14 +349,11 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 3: INST_NAME("FSTP float[ED], ST0"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_F); - if(ST_IS_F(0)) - s0 = v1; - else { - s0 = fpu_get_scratch(dyn); - FCVT_S_D(s0, v1); - } addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0); - VSTR32_U12(s0, ed, fixedaddress); + if(!ST_IS_F(0)) { + FCVT_S_D(v1, v1); + } + VSTR32_U12(v1, ed, fixedaddress); x87_do_pop(dyn, ninst, x3); break; case 4: diff --git a/src/dynarec/arm64/dynarec_arm64_db.c b/src/dynarec/arm64/dynarec_arm64_db.c index 5f8a1396..b9d71080 100644 --- a/src/dynarec/arm64/dynarec_arm64_db.c +++ b/src/dynarec/arm64/dynarec_arm64_db.c @@ -189,24 +189,18 @@ uintptr_t dynarec64_DB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0: INST_NAME("FILD ST0, Ed"); v1 = x87_do_push(dyn, ninst, x1, NEON_CACHE_ST_D); - s0 = fpu_get_scratch(dyn); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0); - VLDR32_U12(s0, ed, fixedaddress); - SXTL_32(v1, s0); - SCVTFDD(v1, v1); + VLDR32_U12(v1, ed, fixedaddress); + SXTL_32(v1, v1); // i32 -> i64 + SCVTFDD(v1, v1); // i64 -> double break; case 1: INST_NAME("FISTTP Ed, ST0"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - wback = 0; - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0); - ed = x1; - } + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0); s0 = fpu_get_scratch(dyn); #if 0 + ed = x1; FRINT32ZD(s0, v1); FCVTZSwD(ed, s0); WBACK; @@ -230,15 +224,10 @@ uintptr_t dynarec64_DB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FIST Ed, ST0"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); u8 = x87_setround(dyn, ninst, x1, x2, x4); // x1 have the modified RPSCR reg - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - wback = 0; - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0); - ed = x1; - } + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0); s0 = fpu_get_scratch(dyn); #if 0 + ed = x1; FRINT32XD(s0, v1); FCVTZSwD(ed, s0); WBACK; @@ -262,15 +251,10 @@ uintptr_t dynarec64_DB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FISTP Ed, ST0"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); u8 = x87_setround(dyn, ninst, x1, x2, x4); // x1 have the modified RPSCR reg - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - wback = 0; - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0); - ed = x1; - } + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0); s0 = fpu_get_scratch(dyn); #if 0 + ed = x1; FRINT32XD(s0, v1); FCVTZSwD(ed, s0); WBACK; @@ -294,13 +278,18 @@ uintptr_t dynarec64_DB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 5: INST_NAME("FLD tbyte"); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0); - if(PK(0)==0xDB && ((PK(1)>>3)&7)==7) { + if((PK(0)==0xDB && ((PK(1)>>3)&7)==7) || (PK(0)>=0x40 && PK(0)<=0x4f && PK(1)==0xDB && ((PK(2)>>3)&7)==7)) { // the FLD is immediatly followed by an FSTP LDRx_U12(x5, ed, 0); LDRH_U12(x6, ed, 8); // no persistant scratch register, so unrool both instruction here... MESSAGE(LOG_DUMP, "\tHack: FSTP tbyte\n"); - nextop = F8; //0xDB + nextop = F8; // 0xDB or rex + if(nextop>=0x40 && nextop<=0x4f) { + rex.rex = nextop; + nextop = F8; //0xDB + } else + rex.rex = 0; nextop = F8; //modrm addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0); STRx_U12(x5, ed, 0); diff --git a/src/dynarec/arm64/dynarec_arm64_dc.c b/src/dynarec/arm64/dynarec_arm64_dc.c index da4ba97c..c729ed09 100644 --- a/src/dynarec/arm64/dynarec_arm64_dc.c +++ b/src/dynarec/arm64/dynarec_arm64_dc.c @@ -182,7 +182,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FADD ST0, double[ED]"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 3, rex, NULL, 0, 0); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); VLDR64_U12(v2, wback, fixedaddress); FADDD(v1, v1, v2); break; @@ -190,7 +190,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FMUL ST0, double[ED]"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 3, rex, NULL, 0, 0); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); VLDR64_U12(v2, wback, fixedaddress); FMULD(v1, v1, v2); break; @@ -198,7 +198,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FCOM ST0, double[ED]"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 3, rex, NULL, 0, 0); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); VLDR64_U12(v2, wback, fixedaddress); FCMPD(v1, v2); FCOM(x1, x2, x3); @@ -207,7 +207,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FCOMP ST0, double[ED]"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 3, rex, NULL, 0, 0); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); VLDR64_U12(v2, wback, fixedaddress); FCMPD(v1, v2); FCOM(x1, x2, x3); @@ -217,7 +217,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FSUB ST0, double[ED]"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 3, rex, NULL, 0, 0); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); VLDR64_U12(v2, wback, fixedaddress); FSUBD(v1, v1, v2); break; @@ -225,7 +225,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FSUBR ST0, double[ED]"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 3, rex, NULL, 0, 0); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); VLDR64_U12(v2, wback, fixedaddress); FSUBD(v1, v2, v1); break; @@ -233,7 +233,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FDIV ST0, double[ED]"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 3, rex, NULL, 0, 0); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); VLDR64_U12(v2, wback, fixedaddress); FDIVD(v1, v1, v2); break; @@ -241,7 +241,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FDIVR ST0, double[ED]"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); v2 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 3, rex, NULL, 0, 0); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); VLDR64_U12(v2, wback, fixedaddress); FDIVD(v1, v2, v1); break; diff --git a/src/dynarec/arm64/dynarec_arm64_dd.c b/src/dynarec/arm64/dynarec_arm64_dd.c index 686a7dde..dfb17507 100644 --- a/src/dynarec/arm64/dynarec_arm64_dd.c +++ b/src/dynarec/arm64/dynarec_arm64_dd.c @@ -33,10 +33,12 @@ uintptr_t dynarec64_DD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin int64_t fixedaddress; int v1, v2; int s0; + int64_t j64; MAYUSE(s0); MAYUSE(v2); MAYUSE(v1); + MAYUSE(j64); switch(nextop) { case 0xC0: @@ -170,11 +172,21 @@ uintptr_t dynarec64_DD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin #if 0 // those are ARM 8.5 opcode! FRINT64ZD(s0, v1); + FCVTZSxD(x2, s0); + STRx_U12(x2, ed, fixedaddress); #else - FRINTRRD(s0, v1, 3); // not ideal, might averflow and not set 0x8000000000000000 correctly - #endif + MRS_fpsr(x5); + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MSR_fpsr(x5); + FRINTRRD(s0, v1, 3); FCVTZSxD(x2, s0); STRx_U12(x2, ed, fixedaddress); + MRS_fpsr(x5); // get back FPSR to check the IOC bit + TBZ_MARK3(x5, FPSR_IOC); + ORRx_mask(x5, xZR, 1, 1, 0); //0x8000000000000000 + STRw_U12(x5, ed, fixedaddress); + MARK3; + #endif x87_do_pop(dyn, ninst, x3); break; case 2: diff --git a/src/dynarec/arm64/dynarec_arm64_df.c b/src/dynarec/arm64/dynarec_arm64_df.c index 960164e1..2cbcfe4b 100644 --- a/src/dynarec/arm64/dynarec_arm64_df.c +++ b/src/dynarec/arm64/dynarec_arm64_df.c @@ -308,7 +308,7 @@ uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VSTR64_U12(s0, wback, fixedaddress); MRS_fpsr(x5); // get back FPSR to check the IOC bit TBZ_MARK3(x5, FPSR_IOC); - MOV64x(x5, 0x8000000000000000LL); + ORRx_mask(x5, xZR, 1, 1, 0); //0x8000000000000000 STRx_U12(x5, wback, fixedaddress); MARK3; #endif diff --git a/src/dynarec/arm64/dynarec_arm64_emit_logic.c b/src/dynarec/arm64/dynarec_arm64_emit_logic.c index b152ac4d..668713a6 100755 --- a/src/dynarec/arm64/dynarec_arm64_emit_logic.c +++ b/src/dynarec/arm64/dynarec_arm64_emit_logic.c @@ -47,7 +47,7 @@ void emit_or32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, } IFX(X_SF) { LSRxw(s3, s1, (rex.w)?63:31); - BFIx(xFlags, s3, F_SF, 1); + BFIw(xFlags, s3, F_SF, 1); } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); @@ -78,7 +78,7 @@ void emit_or32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int } IFX(X_SF) { LSRxw(s3, s1, (rex.w)?63:31); - BFIx(xFlags, s3, F_SF, 1); + BFIw(xFlags, s3, F_SF, 1); } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); @@ -109,7 +109,7 @@ void emit_xor32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 } IFX(X_SF) { LSRxw(s3, s1, (rex.w)?63:31); - BFIx(xFlags, s3, F_SF, 1); + BFIw(xFlags, s3, F_SF, 1); } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); @@ -140,7 +140,7 @@ void emit_xor32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in } IFX(X_SF) { LSRxw(s3, s1, (rex.w)?63:31); - BFIx(xFlags, s3, F_SF, 1); + BFIw(xFlags, s3, F_SF, 1); } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); @@ -174,7 +174,7 @@ void emit_and32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 } IFX(X_SF) { LSRxw(s3, s1, (rex.w)?63:31); - BFIx(xFlags, s3, F_SF, 1); + BFIw(xFlags, s3, F_SF, 1); } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); @@ -208,7 +208,7 @@ void emit_and32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in } IFX(X_SF) { LSRxw(s3, s1, (rex.w)?63:31); - BFIx(xFlags, s3, F_SF, 1); + BFIw(xFlags, s3, F_SF, 1); } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); @@ -579,9 +579,6 @@ void emit_and16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) IFX(X_PEND) { STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); } - IFX(X_PEND) { - STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); - } IFX(X_CF | X_AF | X_OF) { MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF)); BICw_REG(xFlags, xFlags, s3); diff --git a/src/dynarec/arm64/dynarec_arm64_emit_math.c b/src/dynarec/arm64/dynarec_arm64_emit_math.c index 37952444..8d952d50 100755 --- a/src/dynarec/arm64/dynarec_arm64_emit_math.c +++ b/src/dynarec/arm64/dynarec_arm64_emit_math.c @@ -157,8 +157,7 @@ void emit_sub32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 SET_DFNONE(s3); } IFX(X_AF) { - MVNxw_REG(s3, s1); - ORRxw_REG(s3, s3, s2); // s3 = ~op1 | op2 + ORNxw_REG(s3, s2, s1); // s3 = ~op1 | op2 BICxw(s4, s2, s1); // s4 = ~op1 & op2 } IFX(X_ALL) { @@ -612,8 +611,7 @@ void emit_sub16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) SET_DFNONE(s3); } IFX(X_AF|X_OF|X_CF) { - MVNw_REG(s3, s1); - ORRw_REG(s3, s3, s2); // s3 = ~op1 | op2 + ORNw_REG(s3, s2, s1); // s3 = ~op1 | op2 BICw_REG(s4, s2, s1); // s4 = ~op1 & op2 } @@ -920,7 +918,11 @@ void emit_dec8(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4) ANDw_mask(s4, s3, 0, 0); // s4 = ~op1 & op2 ORRw_mask(s3, s3, 0, 0); // s3 = ~op1 | op2 } - SUBSw_U12(s1, s1, 1); + IFX(X_ZF) { + SUBSw_U12(s1, s1, 1); + } else { + SUBw_U12(s1, s1, 1); + } IFX(X_PEND) { STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); } @@ -962,7 +964,11 @@ void emit_dec16(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4) IFX(X_AF|X_OF) { MVNw_REG(s4, s1); } - SUBSw_U12(s1, s1, 1); + IFX(X_ZF) { + SUBSw_U12(s1, s1, 1); + } else { + SUBw_U12(s1, s1, 1); + } IFX(X_PEND) { STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); } @@ -1177,54 +1183,7 @@ void emit_adc8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4, in { MAYUSE(s5); MOV32w(s5, c&0xff); - IFX(X_PEND) { - STRB_U12(s1, xEmu, offsetof(x64emu_t, op1)); - STRB_U12(s5, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s4, d_adc8); - } else IFX(X_ALL) { - SET_DFNONE(s4); - } - IFX(X_AF | X_OF) { - MOVw_REG(s4, s1); - } - MRS_nzvc(s3); - BFIx(s3, xFlags, 29, 1); // set C - MSR_nzvc(s3); // load CC into ARM CF - ADCw_REG(s1, s1, s5); - IFX(X_PEND) { - STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_AF|X_OF) { - ORRw_REG(s3, s4, s5); // s3 = op1 | op2 - ANDw_REG(s4, s4, s5); // s4 = op1 & op2 - BICw_REG(s3, s3, s1); // s3 = (op1 | op2) & ~ res - ORRw_REG(s3, s3, s4); // s4 = (op1 & op2) | ((op1 | op2) & ~ res) - IFX(X_AF) { - LSRw(s4, s3, 3); - BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 - } - IFX(X_OF) { - LSRw(s4, s3, 6); - EORw_REG_LSR(s4, s4, s4, 1); - BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1 - } - } - IFX(X_CF) { - LSRw(s3, s1, 8); - BFIw(xFlags, s3, F_CF, 1); - } - IFX(X_ZF) { - ANDSw_mask(s1, s1, 0, 0b000111); //mask=000000ff - CSETw(s3, cEQ); - BFIw(xFlags, s3, F_ZF, 1); - } - IFX(X_SF) { - LSRw(s3, s1, 7); - BFIw(xFlags, s3, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } + emit_adc8(dyn, ninst, s1, s5, s3, s4); } // emit ADC16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch @@ -1246,7 +1205,7 @@ void emit_adc16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) MSR_nzvc(s3); // load CC into ARM CF ADCw_REG(s1, s1, s2); IFX(X_PEND) { - STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); + STRw_U12(s1, xEmu, offsetof(x64emu_t, res)); } IFX(X_AF|X_OF) { ORRw_REG(s3, s4, s2); // s3 = op1 | op2 @@ -1536,55 +1495,7 @@ void emit_sbb8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4, in { MAYUSE(s5); MOV32w(s5, c&0xff); - IFX(X_PEND) { - STRB_U12(s1, xEmu, offsetof(x64emu_t, op1)); - STRB_U12(s5, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s3, d_sbb8); - } else IFX(X_ALL) { - SET_DFNONE(s3); - } - EORw_mask(s4, xFlags, 0, 0); // invert CC because it's reverted for SUB on ARM - MRS_nzvc(s3); - BFIx(s3, s4, 29, 1); // set C, bit 29 - MSR_nzvc(s3); // load CC into ARM CF - IFX(X_AF|X_OF|X_CF) { - MVNw_REG(s4, s1); - } - SBCw_REG(s1, s1, s5); - IFX(X_PEND) { - STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); - } - IFX(X_AF|X_OF|X_CF) { - ORRw_REG(s3, s4, s5); // s3 = ~op1 | op2 - ANDw_REG(s4, s4, s5); // s4 = ~op1 & op2 - ANDw_REG(s3, s3, s1); // s3 = (~op1 | op2) & res - ORRw_REG(s3, s3, s4); // s3 = (~op1 & op2) | ((~op1 | op2) & res) - IFX(X_CF) { - LSRw(s4, s3, 7); - BFIw(xFlags, s4, F_CF, 1); // CF : bc & 0x80 - } - IFX(X_AF) { - LSRw(s4, s3, 3); - BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 - } - IFX(X_OF) { - LSRw(s4, s3, 6); - EORw_REG_LSR(s4, s4, s4, 1); - BFIw(xFlags, s4, F_OF, 1); // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1 - } - } - IFX(X_ZF) { - ANDSw_mask(s1, s1, 0, 0b000111); //mask=000000ff - CSETw(s3, cEQ); - BFIw(xFlags, s3, F_ZF, 1); - } - IFX(X_SF) { - LSRw(s3, s1, 7); - BFIw(xFlags, s3, F_SF, 1); - } - IFX(X_PF) { - emit_pf(dyn, ninst, s1, s3, s4); - } + emit_sbb8(dyn, ninst, s1, s5, s3, s4); } // emit SBB16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch @@ -1773,7 +1684,11 @@ void emit_neg16(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4) IFX(X_AF|X_OF) { MOVw_REG(s3, s1); } - NEGSw_REG(s1, s1); + IFX(X_ZF) { + NEGSw_REG(s1, s1); + } else { + NEGw_REG(s1, s1); + } IFX(X_PEND) { STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); } @@ -1819,7 +1734,11 @@ void emit_neg8(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4) IFX(X_AF|X_OF) { MOVw_REG(s3, s1); } - NEGSw_REG(s1, s1); + IFX(X_ZF) { + NEGSw_REG(s1, s1); + } else { + NEGw_REG(s1, s1); + } IFX(X_PEND) { STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); } diff --git a/src/dynarec/arm64/dynarec_arm64_emit_shift.c b/src/dynarec/arm64/dynarec_arm64_emit_shift.c index 51903720..45add1f2 100755 --- a/src/dynarec/arm64/dynarec_arm64_emit_shift.c +++ b/src/dynarec/arm64/dynarec_arm64_emit_shift.c @@ -37,14 +37,11 @@ void emit_shl32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 } else IFX(X_ALL) { SET_DFNONE(s4); } - IFX(F_OF) { + IFX(X_OF) { CMPSxw_U12(s2, 0); - IFX(F_OF) { - Bcond(cNE, +8); - BFCx(xFlags, F_OF, 1); - } + Bcond(cNE, +8+((dyn->insts[ninst].x64.gen_flags&X_PEND)?4:0)); + BFCw(xFlags, F_OF, 1); IFX(X_PEND) { - Bcond(cNE, +8); STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); } B_NEXT(cEQ); @@ -81,7 +78,7 @@ void emit_shl32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 } // emit SHL32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch -void emit_shl32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4) +void emit_shl32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4) { IFX(X_PEND) { MOV32w(s3, c); @@ -92,8 +89,8 @@ void emit_shl32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, in SET_DFNONE(s4); } if(c==0) { - IFX(F_OF) { - BFCx(xFlags, F_OF, 1); + IFX(X_OF) { + BFCw(xFlags, F_OF, 1); } IFX(X_PEND) { STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); @@ -175,11 +172,7 @@ void emit_shr32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 IFX(X_OF) { CMPSxw_U12(s2, 1); // if s2==1 Bcond(cNE, 4+3*4); - if(rex.w) { - LSRx(s4, s1, 62); - } else { - LSRw(s4, s1, 30); - } + LSRxw(s4, s1, rex.w?62:30); EORw_REG_LSR(s4, s4, s4, 1); BFIw(xFlags, s4, F_OF, 1); } @@ -189,7 +182,7 @@ void emit_shr32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 } // emit SHR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch -void emit_shr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4) +void emit_shr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4) { IFX(X_PEND) { MOV32w(s3, c); @@ -206,10 +199,8 @@ void emit_shr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, in return; } IFX(X_CF) { - if(c>1) { - LSRxw(s3, s1, c-1); - } - BFIw(xFlags, (c>1)?s3:s1, 0, 1); + LSRxw(s3, s1, c-1); + BFIw(xFlags, s3, 0, 1); } LSRxw(s1, s1, c); IFX(X_PEND) { @@ -237,7 +228,7 @@ void emit_shr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, in } // emit SAR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch -void emit_sar32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4) +void emit_sar32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4) { IFX(X_PEND) { MOV32w(s3, c); @@ -254,10 +245,8 @@ void emit_sar32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, in return; } IFX(X_CF) { - if(c>1) { - ASRxw(s3, s1, c-1); - } - BFIw(xFlags, (c>1)?s3:s1, 0, 1); + ASRxw(s3, s1, c-1); + BFIw(xFlags, s3, 0, 1); } ASRxw(s1, s1, c); IFX(X_PEND) { @@ -278,13 +267,13 @@ void emit_sar32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, in } // emit ROL32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch -void emit_rol32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4) +void emit_rol32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4) { MAYUSE(rex); MAYUSE(s1); MAYUSE(s3); MAYUSE(s4); IFX(X_PEND) { MOV32w(s3, c); STRxw_U12(s3, xEmu, offsetof(x64emu_t, op2)); - SET_DF(s4, d_rol32); + SET_DF(s4, rex.w?d_rol64:d_rol32); } else IFX(X_ALL) { SET_DFNONE(s4); } @@ -310,7 +299,7 @@ void emit_rol32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, in } // emit ROR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch -void emit_ror32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4) +void emit_ror32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4) { MAYUSE(s1); MAYUSE(s3); MAYUSE(s4); IFX(X_PEND) { @@ -344,7 +333,7 @@ void emit_ror32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, in } // emit SHRD32 instruction, from s1, fill s2 , constant c, store result in s1 using s3 and s4 as scratch -void emit_shrd32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int32_t c, int s3, int s4) +void emit_shrd32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4) { c&=(rex.w?0x3f:0x1f); IFX(X_PEND) { @@ -363,10 +352,8 @@ void emit_shrd32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int3 return; } IFX(X_CF) { - if(c>1) { - LSRxw(s3, s1, c-1); - } - BFIw(xFlags, (c>1)?s3:s1, 0, 1); + LSRxw(s3, s1, c-1); + BFIw(xFlags, s3, 0, 1); } LSRxw(s3, s1, c); ORRxw_REG_LSL(s1, s3, s2, (rex.w?64:32)-c); @@ -394,7 +381,7 @@ void emit_shrd32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int3 } } -void emit_shld32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int32_t c, int s3, int s4) +void emit_shld32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4) { c&=(rex.w?0x3f:0x1f); IFX(X_PEND) { @@ -407,8 +394,8 @@ void emit_shld32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int3 SET_DFNONE(s4); } if(c==0) { - IFX(F_OF) { - BFCx(xFlags, F_OF, 1); + IFX(X_OF) { + BFCw(xFlags, F_OF, 1); } IFX(X_PEND) { STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); diff --git a/src/dynarec/arm64/dynarec_arm64_emit_tests.c b/src/dynarec/arm64/dynarec_arm64_emit_tests.c index 301ab2f2..4d032e02 100755 --- a/src/dynarec/arm64/dynarec_arm64_emit_tests.c +++ b/src/dynarec/arm64/dynarec_arm64_emit_tests.c @@ -85,19 +85,14 @@ void emit_cmp32_0(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int SUBSxw_U12(s3, s1, 0); // res = s1 - 0 // and now the tricky ones (and mostly unused), PF and AF // bc = (res & (~d | s)) | (~d & s) => is 0 here... - IFX(X_OF|X_AF) { - MOV32w(s4, (1<<F_OF)|(1<<F_AF)); + IFX(X_OF|X_AF|X_CF) { + MOV32w(s4, (1<<F_OF)|(1<<F_AF)|(1<<F_CF)); BICw(xFlags, xFlags, s4); } IFX(X_ZF) { CSETw(s4, cEQ); BFIw(xFlags, s4, F_ZF, 1); } - IFX(X_CF) { - // inverted carry - CSETw(s4, cCC); - BFIw(xFlags, s4, F_CF, 1); - } IFX(X_SF) { LSRxw(s3, s1, (rex.w)?63:31); BFIw(xFlags, s3, F_SF, 1); @@ -118,12 +113,15 @@ void emit_cmp16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, i } else { SET_DFNONE(s3); } - SUBw_REG(s5, s1, s2); // res = s1 - s2 + IFX(X_ZF) { + SUBSw_REG(s5, s1, s2); // res = s1 - s2 + } else { + SUBw_REG(s5, s1, s2); // res = s1 - s2 + } IFX_PENDOR0 { STRH_U12(s5, xEmu, offsetof(x64emu_t, res)); } IFX(X_ZF) { - TSTw_mask(s5, 0, 15); //mask=0xffff CSETw(s3, cEQ); BFIw(xFlags, s3, F_ZF, 1); } @@ -316,11 +314,9 @@ void emit_test16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, } else { SET_DFNONE(s4); } - IFX(X_OF) { - BFCw(xFlags, F_OF, 1); - } - IFX(X_CF) { - BFCw(xFlags, F_CF, 1); + IFX(X_CF | X_AF | X_OF) { + MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF)); + BICw(xFlags, xFlags, s3); } ANDSw_REG(s5, s1, s2); // res = s1 & s2 IFX_PENDOR0 { @@ -349,11 +345,9 @@ void emit_test8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, i } else { SET_DFNONE(s4); } - IFX(X_OF) { - BFCw(xFlags, F_OF, 1); - } - IFX(X_CF) { - BFCw(xFlags, F_CF, 1); + IFX(X_CF | X_AF | X_OF) { + MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF)); + BICw(xFlags, xFlags, s3); } ANDSw_REG(s5, s1, s2); // res = s1 & s2 IFX_PENDOR0 { diff --git a/src/dynarec/arm64/dynarec_arm64_f0.c b/src/dynarec/arm64/dynarec_arm64_f0.c index 8bcf3e41..b38692fe 100644 --- a/src/dynarec/arm64/dynarec_arm64_f0.c +++ b/src/dynarec/arm64/dynarec_arm64_f0.c @@ -76,7 +76,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, LOCK_LOCK, 0, 0); MARKLOCK; LDAXRB(x1, wback); - emit_add8(dyn, ninst, x1, x2, x4, x3); + emit_add8(dyn, ninst, x1, x2, x4, x5); STLXRB(x4, x1, wback); CBNZx_MARKLOCK(x4); } @@ -124,7 +124,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, LOCK_LOCK, 0, 0); MARKLOCK; LDAXRB(x1, wback); - emit_or8(dyn, ninst, x1, x2, x4, x3); + emit_or8(dyn, ninst, x1, x2, x4, x5); STLXRB(x4, x1, wback); CBNZx_MARKLOCK(x4); } @@ -155,161 +155,184 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin switch(nextop) { case 0xB0: - INST_NAME("LOCK CMPXCHG Eb, Gb"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGB(x1); - UBFXx(x6, xRAX, 0, 8); - SMDMB(); - if(MODREG) { - if(rex.rex) { - wback = xRAX+(nextop&7)+(rex.b<<3); - wb2 = 0; - } else { - wback = (nextop&7); - wb2 = (wback>>2)*8; - wback = xRAX+(wback&3); - } - UBFXx(x2, wback, wb2, 8); - wb1 = 0; - ed = x2; - UFLAG_IF {emit_cmp8(dyn, ninst, x6, ed, x3, x4, x5);} - CMPSxw_REG(x6, x2); - B_MARK2(cNE); - BFIx(wback, x2, wb2, 8); - MOVxw_REG(ed, gd); - MARK2; - BFIx(xRAX, x2, 0, 8); - B_NEXT_nocond; - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, LOCK_LOCK, 0, 0); - // Aligned version - MARKLOCK; - LDAXRB(x2, wback); - CMPSxw_REG(x6, x2); - B_MARK(cNE); - // EAX == Ed - STLXRB(x4, gd, wback); - CBNZx_MARKLOCK(x4); - // done - MARK; - UFLAG_IF {emit_cmp32(dyn, ninst, rex, x6, x2, x3, x4, x5);} - BFIx(xRAX, x2, 0, 8); // upper par of RAX will be erase on 32bits, no mater what + switch(rep) { + case 0: + INST_NAME("LOCK CMPXCHG Eb, Gb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGB(x1); + UBFXx(x6, xRAX, 0, 8); + SMDMB(); + if(MODREG) { + if(rex.rex) { + wback = xRAX+(nextop&7)+(rex.b<<3); + wb2 = 0; + } else { + wback = (nextop&7); + wb2 = (wback>>2)*8; + wback = xRAX+(wback&3); + } + UBFXx(x2, wback, wb2, 8); + wb1 = 0; + ed = x2; + UFLAG_IF {emit_cmp8(dyn, ninst, x6, ed, x3, x4, x5);} + CMPSxw_REG(x6, x2); + B_MARK2(cNE); + BFIx(wback, x2, wb2, 8); + MOVxw_REG(ed, gd); + MARK2; + BFIx(xRAX, x2, 0, 8); + B_NEXT_nocond; + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, LOCK_LOCK, 0, 0); + MARKLOCK; + LDAXRB(x2, wback); + CMPSxw_REG(x6, x2); + B_MARK(cNE); + // EAX == Ed + STLXRB(x4, gd, wback); + CBNZx_MARKLOCK(x4); + // done + MARK; + UFLAG_IF {emit_cmp32(dyn, ninst, rex, x6, x2, x3, x4, x5);} + BFIx(xRAX, x2, 0, 8); + } + SMDMB(); + break; + default: + DEFAULT; } - SMDMB(); break; case 0xB1: - INST_NAME("LOCK CMPXCHG Ed, Gd"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGD; - SMDMB(); - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - wback = 0; - UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, ed, x3, x4, x5);} - MOVxw_REG(x1, ed); // save value - CMPSxw_REG(xRAX, x1); - B_MARK2(cNE); - MOVxw_REG(ed, gd); - MARK2; - MOVxw_REG(xRAX, x1); - B_NEXT_nocond; - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, LOCK_LOCK, 0, 0); - TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 - B_MARK3(cNE); - // Aligned version - MARKLOCK; - LDAXRxw(x1, wback); - CMPSxw_REG(xRAX, x1); - B_MARK(cNE); - // EAX == Ed - STLXRxw(x4, gd, wback); - CBNZx_MARKLOCK(x4); - // done - B_MARK_nocond; - // Unaligned version - MARK3; - LDRxw_U12(x1, wback, 0); - LDAXRB(x3, wback); // dummy read, to arm the write... - CMPSxw_REG(xRAX, x1); - B_MARK(cNE); - // EAX == Ed - STLXRB(x4, gd, wback); - CBNZx_MARK3(x4); - STRxw_U12(gd, wback, 0); - MARK; - // Common part (and fallback for EAX != Ed) - UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5);} - MOVxw_REG(xRAX, x1); // upper par of RAX will be erase on 32bits, no mater what + switch(rep) { + case 0: + INST_NAME("LOCK CMPXCHG Ed, Gd"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + SMDMB(); + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + wback = 0; + UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, ed, x3, x4, x5);} + MOVxw_REG(x1, ed); // save value + CMPSxw_REG(xRAX, x1); + B_MARK2(cNE); + MOVxw_REG(ed, gd); + MARK2; + MOVxw_REG(xRAX, x1); + B_NEXT_nocond; + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, LOCK_LOCK, 0, 0); + TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 + B_MARK3(cNE); + // Aligned version + MARKLOCK; + LDAXRxw(x1, wback); + CMPSxw_REG(xRAX, x1); + B_MARK(cNE); + // EAX == Ed + STLXRxw(x4, gd, wback); + CBNZx_MARKLOCK(x4); + // done + B_MARK_nocond; + // Unaligned version + MARK3; + LDRxw_U12(x1, wback, 0); + LDAXRB(x3, wback); // dummy read, to arm the write... + CMPSxw_REG(xRAX, x1); + B_MARK(cNE); + // EAX == Ed + STLXRB(x4, gd, wback); + CBNZx_MARK3(x4); + STRxw_U12(gd, wback, 0); + MARK; + // Common part (and fallback for EAX != Ed) + UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5);} + MOVxw_REG(xRAX, x1); // upper par of RAX will be erase on 32bits, no mater what + } + SMDMB(); + break; + default: + DEFAULT; } - SMDMB(); break; case 0xC1: - INST_NAME("LOCK XADD Gd, Ed"); - SETFLAGS(X_ALL, SF_SET_PENDING); - nextop = F8; - GETGD; - SMDMB(); - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - MOVxw_REG(x1, ed); - MOVxw_REG(ed, gd); - MOVxw_REG(gd, x1); - emit_add32(dyn, ninst, rex, ed, gd, x3, x4); - } else { - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, LOCK_LOCK, 0, 0); - TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 - B_MARK(cNE); // unaligned - MARKLOCK; - LDAXRxw(x1, wback); - ADDxw_REG(x4, x1, gd); - STLXRxw(x3, x4, wback); - CBNZx_MARKLOCK(x3); - B_MARK2_nocond; - MARK; - LDRxw_U12(x1, wback, 0); - LDAXRB(x4, wback); - BFIxw(x1, x4, 0, 8); - ADDxw_REG(x4, x1, gd); - STLXRB(x3, x4, wback); - CBNZx_MARK(x3); - STRxw_U12(x4, wback, 0); - MARK2; - IFX(X_ALL|X_PEND) { - MOVxw_REG(x2, x1); - emit_add32(dyn, ninst, rex, x2, gd, x3, x4); - } - MOVxw_REG(gd, x1); + switch(rep) { + case 0: + INST_NAME("LOCK XADD Gd, Ed"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + SMDMB(); + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + MOVxw_REG(x1, ed); + MOVxw_REG(ed, gd); + MOVxw_REG(gd, x1); + emit_add32(dyn, ninst, rex, ed, gd, x3, x4); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, LOCK_LOCK, 0, 0); + TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 + B_MARK(cNE); // unaligned + MARKLOCK; + LDAXRxw(x1, wback); + ADDxw_REG(x4, x1, gd); + STLXRxw(x3, x4, wback); + CBNZx_MARKLOCK(x3); + B_MARK2_nocond; + MARK; + LDRxw_U12(x1, wback, 0); + LDAXRB(x4, wback); + BFIxw(x1, x4, 0, 8); + ADDxw_REG(x4, x1, gd); + STLXRB(x3, x4, wback); + CBNZx_MARK(x3); + STRxw_U12(x4, wback, 0); + MARK2; + IFX(X_ALL|X_PEND) { + MOVxw_REG(x2, x1); + emit_add32(dyn, ninst, rex, x2, gd, x3, x4); + } + MOVxw_REG(gd, x1); + } + SMDMB(); + break; + default: + DEFAULT; } - SMDMB(); break; case 0xC7: - INST_NAME("LOCK CMPXCHG8B Gq, Eq"); - SETFLAGS(X_ZF, SF_SUBSET); - nextop = F8; - addr = geted(dyn, addr, ninst, nextop, &wback, x1, &fixedaddress, 0, 0, rex, LOCK_LOCK, 0, 0); - SMDMB(); - MARKLOCK; - LDAXPxw(x2, x3, wback); - CMPSxw_REG(xRAX, x2); - B_MARK(cNE); // EAX != Ed[0] - CMPSxw_REG(xRDX, x3); - B_MARK(cNE); // EDX != Ed[1] - STLXPxw(x4, xRBX, xRCX, wback); - CBNZx_MARKLOCK(x4); - MOV32w(x1, 1); - B_MARK3_nocond; - MARK; - MOVxw_REG(xRAX, x2); - MOVxw_REG(xRDX, x3); - MOV32w(x1, 0); - MARK3; - SMDMB(); - BFIw(xFlags, x1, F_ZF, 1); + switch(rep) { + case 0: + INST_NAME("LOCK CMPXCHG8B Gq, Eq"); + SETFLAGS(X_ZF, SF_SUBSET); + nextop = F8; + addr = geted(dyn, addr, ninst, nextop, &wback, x1, &fixedaddress, 0, 0, rex, LOCK_LOCK, 0, 0); + SMDMB(); + MARKLOCK; + LDAXPxw(x2, x3, wback); + CMPSxw_REG(xRAX, x2); + B_MARK(cNE); // EAX != Ed[0] + CMPSxw_REG(xRDX, x3); + B_MARK(cNE); // EDX != Ed[1] + STLXPxw(x4, xRBX, xRCX, wback); + CBNZx_MARKLOCK(x4); + MOV32w(x1, 1); + B_MARK3_nocond; + MARK; + MOVxw_REG(xRAX, x2); + MOVxw_REG(xRDX, x3); + MOV32w(x1, 0); + MARK3; + SMDMB(); + BFIw(xFlags, x1, F_ZF, 1); + break; + default: + DEFAULT; + } break; default: @@ -420,7 +443,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin wb1 = 1; MARKLOCK; LDAXRB(x1, wback); - emit_adc8c(dyn, ninst, x1, u8, x2, x4, x5); + emit_adc8c(dyn, ninst, x1, u8, x2, x4, x3); STLXRB(x3, x1, wback); CBNZx_MARKLOCK(x3); } @@ -441,7 +464,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin wb1 = 1; MARKLOCK; LDAXRB(x1, wback); - emit_sbb8c(dyn, ninst, x1, u8, x2, x4, x5); + emit_sbb8c(dyn, ninst, x1, u8, x2, x4, x3); STLXRB(x3, x1, wback); CBNZx_MARKLOCK(x3); } @@ -481,7 +504,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin wb1 = 1; MARKLOCK; LDAXRB(x1, wback); - emit_sub8c(dyn, ninst, x1, u8, x2, x4, x5); + emit_sub8c(dyn, ninst, x1, u8, x2, x4, x3); STLXRB(x3, x1, wback); CBNZx_MARKLOCK(x3); } diff --git a/src/dynarec/arm64/dynarec_arm64_f20f.c b/src/dynarec/arm64/dynarec_arm64_f20f.c index 153b634f..75553bfc 100755 --- a/src/dynarec/arm64/dynarec_arm64_f20f.c +++ b/src/dynarec/arm64/dynarec_arm64_f20f.c @@ -22,29 +22,6 @@ #include "dynarec_arm64_functions.h" #include "dynarec_arm64_helper.h" -// Get Ex as a double, not a quad (warning, x2 get used) -#define GETEX(a, w, D) \ - if(MODREG) { \ - a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w); \ - } else { \ - SMREAD(); \ - a = fpu_get_scratch(dyn); \ - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, D); \ - VLDR64_U12(a, ed, fixedaddress); \ - } - -#define GETG gd = ((nextop&0x38)>>3)+(rex.r<<3) - -#define GETGX(a, w) gd = ((nextop&0x38)>>3)+(rex.r<<3); \ - a = sse_get_reg(dyn, ninst, x1, gd, w) - -#define GETGX_empty(a) gd = ((nextop&0x38)>>3)+(rex.r<<3); \ - a = sse_get_reg_empty(dyn, ninst, x1, gd) - -#define GETGM(a) \ - gd = ((nextop&0x38)>>3); \ - a = mmx_get_reg(dyn, ninst, x1, x2, x3, gd) - uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog) { (void)ip; (void)need_epilog; @@ -134,7 +111,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n INST_NAME("CVTTSD2SI Gd, Ex"); nextop = F8; GETGD; - GETEX(q0, 0, 0); + GETEXSD(q0, 0, 0); if(!box64_dynarec_fastround) { MRS_fpsr(x5); BFCw(x5, FPSR_IOC, 1); // reset IOC bit @@ -145,7 +122,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n MRS_fpsr(x5); // get back FPSR to check the IOC bit TBZ_NEXT(x5, FPSR_IOC); if(rex.w) { - MOV64x(gd, 0x8000000000000000); + ORRx_mask(gd, xZR, 1, 1, 0); //0x8000000000000000 } else { MOV32w(gd, 0x80000000); } @@ -155,7 +132,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n INST_NAME("CVTSD2SI Gd, Ex"); nextop = F8; GETGD; - GETEX(q0, 0, 0); + GETEXSD(q0, 0, 0); if(!box64_dynarec_fastround) { MRS_fpsr(x5); BFCw(x5, FPSR_IOC, 1); // reset IOC bit @@ -170,7 +147,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n MRS_fpsr(x5); // get back FPSR to check the IOC bit TBZ_NEXT(x5, FPSR_IOC); if(rex.w) { - MOV64x(gd, 0x8000000000000000); + ORRx_mask(gd, xZR, 1, 1, 0); //0x8000000000000000 } else { MOV32w(gd, 0x80000000); } @@ -183,7 +160,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; GETGX(v0, 1); d1 = fpu_get_scratch(dyn); - GETEX(d0, 0, 0); + GETEXSD(d0, 0, 0); if(!box64_dynarec_fastnan) { v1 = fpu_get_scratch(dyn); FCMLTD_0(v1, d0); @@ -201,7 +178,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; GETGX(d1, 1); v1 = fpu_get_scratch(dyn); - GETEX(d0, 0, 0); + GETEXSD(d0, 0, 0); if(!box64_dynarec_fastnan) { v0 = fpu_get_scratch(dyn); q0 = fpu_get_scratch(dyn); @@ -223,7 +200,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; GETGX(d1, 1); v1 = fpu_get_scratch(dyn); - GETEX(d0, 0, 0); + GETEXSD(d0, 0, 0); if(!box64_dynarec_fastnan) { v0 = fpu_get_scratch(dyn); q0 = fpu_get_scratch(dyn); @@ -244,7 +221,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n INST_NAME("CVTSD2SS Gx, Ex"); nextop = F8; GETGX(v0, 1); - GETEX(d0, 0, 0); + GETEXSD(d0, 0, 0); d1 = fpu_get_scratch(dyn); FCVT_S_D(d1, d0); VMOVeS(v0, 0, d1, 0); @@ -255,7 +232,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; GETGX(d1, 1); v1 = fpu_get_scratch(dyn); - GETEX(d0, 0, 0); + GETEXSD(d0, 0, 0); if(!box64_dynarec_fastnan) { v0 = fpu_get_scratch(dyn); q0 = fpu_get_scratch(dyn); @@ -276,7 +253,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n INST_NAME("MINSD Gx, Ex"); nextop = F8; GETGX(v0, 1); - GETEX(v1, 0, 0); + GETEXSD(v1, 0, 0); // MINSD: if any input is NaN, or Ex[0]<Gx[0], copy Ex[0] -> Gx[0] #if 0 d0 = fpu_get_scratch(dyn); @@ -293,7 +270,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; GETGX(v0, 1); d1 = fpu_get_scratch(dyn); - GETEX(v1, 0, 0); + GETEXSD(v1, 0, 0); if(!box64_dynarec_fastnan) { d0 = fpu_get_scratch(dyn); q0 = fpu_get_scratch(dyn); @@ -314,7 +291,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n INST_NAME("MAXSD Gx, Ex"); nextop = F8; GETGX(v0, 1); - GETEX(v1, 0, 0); + GETEXSD(v1, 0, 0); // MAXSD: if any input is NaN, or Ex[0]>Gx[0], copy Ex[0] -> Gx[0] #if 0 d0 = fpu_get_scratch(dyn); @@ -330,7 +307,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n case 0x70: INST_NAME("PSHUFLW Gx, Ex, Ib"); nextop = F8; - GETEX(v1, 0, 1); + GETEXSD(v1, 0, 1); GETGX(v0, 1); u8 = F8; @@ -368,7 +345,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n INST_NAME("CMPSD Gx, Ex, Ib"); nextop = F8; GETGX(v0, 1); - GETEX(v1, 0, 1); + GETEXSD(v1, 0, 1); u8 = F8; FCMPD(v0, v1); switch(u8&7) { @@ -388,7 +365,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n INST_NAME("ADDSUBPS Gx, Ex"); nextop = F8; GETGX(v0, 1); - GETEX(v1, 0, 0); + GETEXSD(v1, 0, 0); q0 = fpu_get_scratch(dyn); static float addsubps[4] = {-1.f, 1.f, -1.f, 1.f}; MAYUSE(addsubps); @@ -401,14 +378,14 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n INST_NAME("MOVDQ2Q Gm, Ex"); nextop = F8; GETGM(v0); - GETEX(v1, 0, 0); + GETEXSD(v1, 0, 0); VMOV(v0, v1); break; case 0xE6: INST_NAME("CVTPD2DQ Gx, Ex"); nextop = F8; - GETEX(v1, 0, 0); + GETEXSD(v1, 0, 0); GETGX_empty(v0); u8 = sse_setround(dyn, ninst, x1, x2, x3); VFRINTIDQ(v0, v1); diff --git a/src/dynarec/arm64/dynarec_arm64_f30f.c b/src/dynarec/arm64/dynarec_arm64_f30f.c index 3615b231..ea057881 100755 --- a/src/dynarec/arm64/dynarec_arm64_f30f.c +++ b/src/dynarec/arm64/dynarec_arm64_f30f.c @@ -22,36 +22,6 @@ #include "dynarec_arm64_functions.h" #include "dynarec_arm64_helper.h" -// Get Ex as a single, not a quad (warning, x2 get used) -#define GETEX(a, w, D) \ - if(MODREG) { \ - a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w); \ - } else { \ - SMREAD(); \ - a = fpu_get_scratch(dyn); \ - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, D); \ - VLDR32_U12(a, ed, fixedaddress); \ - } - -// Get EX as a quad -#define GETEXQ(a, w, D) \ - if(MODREG) { \ - a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w); \ - } else { \ - SMREAD(); \ - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, D); \ - a = fpu_get_scratch(dyn); \ - VLDR128_U12(a, ed, fixedaddress); \ - } - -#define GETG gd = ((nextop&0x38)>>3)+(rex.r<<3) - -#define GETGX(a, w) gd = ((nextop&0x38)>>3)+(rex.r<<3); \ - a = sse_get_reg(dyn, ninst, x1, gd, w) - -#define GETGX_empty(a) gd = ((nextop&0x38)>>3)+(rex.r<<3); \ - a = sse_get_reg_empty(dyn, ninst, x1, gd) - uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog) { (void)ip; (void)need_epilog; @@ -160,7 +130,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n INST_NAME("CVTTSS2SI Gd, Ex"); nextop = F8; GETGD; - GETEX(d0, 0, 0); + GETEXSS(d0, 0, 0); if(!box64_dynarec_fastround) { MRS_fpsr(x5); BFCw(x5, FPSR_IOC, 1); // reset IOC bit @@ -171,7 +141,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n MRS_fpsr(x5); // get back FPSR to check the IOC bit TBZ_NEXT(x5, FPSR_IOC); if(rex.w) { - MOV64x(gd, 0x8000000000000000); + ORRx_mask(gd, xZR, 1, 1, 0); //0x8000000000000000 } else { MOV32w(gd, 0x80000000); } @@ -181,7 +151,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n INST_NAME("CVTSS2SI Gd, Ex"); nextop = F8; GETGD; - GETEX(q0, 0, 0); + GETEXSS(q0, 0, 0); if(!box64_dynarec_fastround) { MRS_fpsr(x5); BFCw(x5, FPSR_IOC, 1); // reset IOC bit @@ -196,7 +166,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n MRS_fpsr(x5); // get back FPSR to check the IOC bit TBZ_NEXT(x5, FPSR_IOC); if(rex.w) { - MOV64x(gd, 0x8000000000000000); + ORRx_mask(gd, xZR, 1, 1, 0); //0x8000000000000000 } else { MOV32w(gd, 0x80000000); } @@ -207,7 +177,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; GETGX(v0, 1); d1 = fpu_get_scratch(dyn); - GETEX(d0, 0, 0); + GETEXSS(d0, 0, 0); FSQRTS(d1, d0); VMOVeS(v0, 0, d1, 0); break; @@ -215,7 +185,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n INST_NAME("RSQRTSS Gx, Ex"); nextop = F8; GETGX(v0, 1); - GETEX(v1, 0, 0); + GETEXSS(v1, 0, 0); d0 = fpu_get_scratch(dyn); d1 = fpu_get_scratch(dyn); // so here: F32: Imm8 = abcd efgh that gives => aBbbbbbc defgh000 00000000 00000000 @@ -232,7 +202,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n INST_NAME("RCPSS Gx, Ex"); nextop = F8; GETGX(v0, 1); - GETEX(v1, 0, 0); + GETEXSS(v1, 0, 0); d0 = fpu_get_scratch(dyn); FMOVS_8(d0, 0b01110000); //1.0f FDIVS(d0, d0, v1); @@ -244,7 +214,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; GETGX(v0, 1); d1 = fpu_get_scratch(dyn); - GETEX(d0, 0, 0); + GETEXSS(d0, 0, 0); FADDS(d1, v0, d0); // the high part of the vector is erased... VMOVeS(v0, 0, d1, 0); break; @@ -253,7 +223,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; GETGX(v0, 1); d1 = fpu_get_scratch(dyn); - GETEX(d0, 0, 0); + GETEXSS(d0, 0, 0); FMULS(d1, v0, d0); VMOVeS(v0, 0, d1, 0); break; @@ -261,7 +231,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n INST_NAME("CVTSS2SD Gx, Ex"); nextop = F8; GETGX(v0, 1); - GETEX(v1, 0, 0); + GETEXSS(v1, 0, 0); d0 = fpu_get_scratch(dyn); FCVT_D_S(d0, v1); VMOVeD(v0, 0, d0, 0); @@ -269,7 +239,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n case 0x5B: INST_NAME("CVTTPS2DQ Gx, Ex"); nextop = F8; - GETEXQ(d0, 0, 0); + GETEX(d0, 0, 0) ; GETGX_empty(v0); VFCVTZSQS(v0, d0); break; @@ -279,7 +249,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; GETGX(v0, 1); d1 = fpu_get_scratch(dyn); - GETEX(d0, 0, 0); + GETEXSS(d0, 0, 0); FSUBS(d1, v0, d0); VMOVeS(v0, 0, d1, 0); break; @@ -287,7 +257,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n INST_NAME("MINSS Gx, Ex"); nextop = F8; GETGX(v0, 1); - GETEX(v1, 0, 0); + GETEXSS(v1, 0, 0); // MINSS: if any input is NaN, or Ex[0]<Gx[0], copy Ex[0] -> Gx[0] #if 0 d0 = fpu_get_scratch(dyn); @@ -304,7 +274,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; GETGX(v0, 1); d1 = fpu_get_scratch(dyn); - GETEX(d0, 0, 0); + GETEXSS(d0, 0, 0); FDIVS(d1, v0, d0); VMOVeS(v0, 0, d1, 0); break; @@ -312,7 +282,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n INST_NAME("MAXSS Gx, Ex"); nextop = F8; GETGX(v0, 1); - GETEX(v1, 0, 0); + GETEXSS(v1, 0, 0); // MAXSS: if any input is NaN, or Ex[0]>Gx[0], copy Ex[0] -> Gx[0] #if 0 d0 = fpu_get_scratch(dyn); @@ -342,7 +312,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n case 0x70: INST_NAME("PSHUFHW Gx, Ex, Ib"); nextop = F8; - GETEXQ(v1, 0, 1); + GETEX(v1, 0, 1) ; GETGX(v0, 1); u8 = F8; // only high part need to be suffled. VTBL only handle 8bits value, so the 16bits suffles need to be changed in 8bits @@ -423,7 +393,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n INST_NAME("CMPSS Gx, Ex, Ib"); nextop = F8; GETGX(v0, 1); - GETEX(v1, 0, 1); + GETEXSS(v1, 0, 1); u8 = F8; FCMPS(v0, v1); switch(u8&7) { diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index 6e1d3a2d..2cb06c07 100755 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -286,7 +286,7 @@ ed = i; \ } else { \ SMREAD(); \ - addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, NULL, 0, D); \ + addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff, 0, rex, NULL, 0, D); \ ADDx_REG(x3, wback, i); \ if(wback!=x3) wback = x3; \ LDRB_U12(i, wback, fixedaddress); \ @@ -360,6 +360,84 @@ // Write gb (gd) back to original register / memory #define GBBACK BFIx(gb1, gd, gb2, 8); +// Generic get GD, but reg value in gd (R_RAX is not added) +#define GETG gd = ((nextop&0x38)>>3)+(rex.r<<3) + +// Get GX as a quad (might use x1) +#define GETGX(a, w) \ + gd = ((nextop&0x38)>>3)+(rex.r<<3); \ + a = sse_get_reg(dyn, ninst, x1, gd, w) + +// Get an empty GX (use x1) +#define GETGX_empty(a) \ + gd = ((nextop&0x38)>>3)+(rex.r<<3); \ + a = sse_get_reg_empty(dyn, ninst, x1, gd) + +// Get EX as a quad, (x1 is used) +#define GETEX(a, w, D) \ + if(MODREG) { \ + a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w); \ + } else { \ + SMREAD(); \ + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, D); \ + a = fpu_get_scratch(dyn); \ + VLDR128_U12(a, ed, fixedaddress); \ + } + +// Put Back EX if it was a memory and not an emm register +#define PUTEX(a) \ + if(!MODREG) { \ + VSTR128_U12(a, ed, fixedaddress); \ + SMWRITE2(); \ + } + + +// Get Ex as a double, not a quad (warning, x1 get used) +#define GETEXSD(a, w, D) \ + if(MODREG) { \ + a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w); \ + } else { \ + SMREAD(); \ + a = fpu_get_scratch(dyn); \ + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, D); \ + VLDR64_U12(a, ed, fixedaddress); \ + } + +// Get Ex as a single, not a quad (warning, x1 get used) +#define GETEXSS(a, w, D) \ + if(MODREG) { \ + a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w); \ + } else { \ + SMREAD(); \ + a = fpu_get_scratch(dyn); \ + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, D); \ + VLDR32_U12(a, ed, fixedaddress); \ + } + +// Get GM, might use x1, x2 and x3 +#define GETGM(a) \ + gd = ((nextop&0x38)>>3); \ + a = mmx_get_reg(dyn, ninst, x1, x2, x3, gd) + +// Get EM, might use x1, x2 and x3 +#define GETEM(a, D) \ + if(MODREG) { \ + a = mmx_get_reg(dyn, ninst, x1, x2, x3, (nextop&7)); \ + } else { \ + SMREAD(); \ + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, D); \ + a = fpu_get_scratch(dyn); \ + VLDR64_U12(a, ed, fixedaddress); \ + } + +// Put Back EM if it was a memory and not an emm register +#define PUTEM(a) \ + if(!MODREG) { \ + VSTR64_U12(a, ed, fixedaddress); \ + SMWRITE2(); \ + } + + // Get Direction with size Z and based of F_DF flag, on register r ready for LDR/STR fetching // F_DF is 1<<10, so 1 ROR 11*2 (so F_OF) #define GETDIR(r, A) \ @@ -976,14 +1054,14 @@ void emit_neg32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4 void emit_neg16(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); void emit_neg8(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); void emit_shl32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); -void emit_shl32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4); +void emit_shl32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_shr32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); -void emit_shr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4); -void emit_sar32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4); -void emit_rol32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4); -void emit_ror32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4); -void emit_shrd32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int32_t c, int s3, int s4); -void emit_shld32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int32_t c, int s3, int s4); +void emit_shr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); +void emit_sar32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); +void emit_rol32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); +void emit_ror32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); +void emit_shrd32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4); +void emit_shld32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4); void emit_pf(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); @@ -1100,7 +1178,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); -uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int seg, int* ok, int* need_epilog); uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog); uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog); diff --git a/src/dynarec/dynarec_native.c b/src/dynarec/dynarec_native.c index 506eaf0f..c2cf1387 100755 --- a/src/dynarec/dynarec_native.c +++ b/src/dynarec/dynarec_native.c @@ -425,6 +425,19 @@ void* CreateEmptyBlock(dynablock_t* block, uintptr_t addr) { } void* FillBlock64(dynablock_t* block, uintptr_t addr) { + /* + A Block must have this layout: + + 0x0000..0x0007 : dynablock_t* : self + 0x0008..8+4*n : actual Native instructions, (n is the total number) + A .. A+8*n : Table64: n 64bits values + B .. B+7 : dynablock_t* : self (as part of JmpNext, that simulate another block) + B+8 .. B+15 : 2 Native code for jmpnext (or jmp epilog in case of empty block) + B+16 .. B+23 : jmpnext (or jmp_epilog) address + B+24 .. B+31 : empty (in case an architecture needs more than 2 opcodes) + B+32 .. B+32+sz : instsize (compressed array with each instruction lenght on x64 and native side) + + */ if(IsInHotPage(addr)) { dynarec_log(LOG_DEBUG, "Cancelling dynarec FillBlock on hotpage for %p\n", (void*)addr); return NULL; |