diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f.c | 4 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c | 2 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c | 272 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_f30f.c | 1 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.h | 23 | ||||
| -rw-r--r-- | src/emu/x64runavxf30f.c | 4 |
6 files changed, 299 insertions, 7 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c index 871fefd7..5c569ab7 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c @@ -190,7 +190,7 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, } else { for(int l=0; l<1+vex.l; ++l) { if(!l) { - GETGX_empty_VX(v0, v2, 0); + GETGX_empty_VX(v0, v2); addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0); } else { GETGY_empty_VY(v0, v2, 0, -1, -1); @@ -219,7 +219,7 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, } else { for(int l=0; l<1+vex.l; ++l) { if(!l) { - GETGX_empty_VX(v0, v2, 0); + GETGX_empty_VX(v0, v2); addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0); ADDx_U12(x1, ed, 8); } else { diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c index 3cfafcaa..72c93dbe 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c @@ -123,7 +123,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, case 0x2A: INST_NAME("VCVTSI2SD Gx, Vx, Ed"); nextop = F8; - GETGX_empty_VX(v0, v1, 0); + GETGX_empty_VX(v0, v1); GETED(0); d1 = fpu_get_scratch(dyn, ninst); if(rex.w) { diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c index 5f870cb5..30e4ea24 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c @@ -61,6 +61,278 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, switch(opcode) { + case 0x10: + INST_NAME("VMOVSS Gx, Ex [,Vx]"); + nextop = F8; + GETG; + if(MODREG) { + GETGX_empty_VXEX(v0, v2, v1, 0); + if((v0!=v2) && (v0!=v1)) VMOVQ(v0, v2); + if(v0!=v1) VMOVeS(v0, 0, v1, 0); + if((v0!=v2) && (v0==v1)) { VMOVeS(v0, 1, v2, 1); VMOVeD(v0, 1, v2, 1);} + } else { + GETGX_empty(v0); + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); + VLD32(v0, ed, fixedaddress); + } + YMM0(gd); + break; + case 0x11: + INST_NAME("VMOVSS Ex, Gx [,Vx]"); + nextop = F8; + GETG; + if(MODREG) { + GETGXVXEX_empty(v0, v2, v1, 0); + if((v2!=v1) && (v2!=v0)) VMOVQ(v2, v1); + if(v2!=v0) VMOVeS(v2, 0, v0, 0); + if((v2!=v1) && (v2==v0)) { VMOVeS(v2, 1, v0, 1); VMOVeD(v2, 1, v0, 1);} + YMM0((nextop&7)+(rex.b<<3)); + } else { + v0 = sse_get_reg(dyn, ninst, x1, gd, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); + VST32(v0, ed, fixedaddress); + SMWRITE2(); + } + break; + case 0x12: + INST_NAME("VMOVSLDUP Gx, Ex"); + nextop = F8; + GETEX_Y(q1, 0, 0); + GETGX_empty(q0); + VTRNQ1_32(q0, q1, q1); + if(vex.l) { + GETGY_empty_EY(q0, q1); + VTRNQ1_32(q0, q1, q1); + } else YMM0(gd); + break; + + case 0x16: + INST_NAME("MOVSHDUP Gx, Ex"); + nextop = F8; + GETEX_Y(q1, 0, 0); + GETGX_empty(q0); + VTRNQ2_32(q0, q1, q1); + if(vex.l) { + GETGY_empty_EY(q0, q1); + VTRNQ2_32(q0, q1, q1); + } else YMM0(gd); + break; + + case 0x2A: + INST_NAME("VCVTSI2SS Gx, Vx, Ed"); + nextop = F8; + d1 = fpu_get_scratch(dyn, ninst); + GETGX_empty_VX(v0, v1); + GETED(0); + if(rex.w) { + SCVTFSx(d1, ed); + } else { + SCVTFSw(d1, ed); + } + if(v0!=v1) VMOVQ(v0, v1); + VMOVeS(v0, 0, d1, 0); + YMM0(gd); + break; + + case 0x2C: + INST_NAME("VCVTTSS2SI Gd, Ex"); + nextop = F8; + GETGD; + GETEXSS(d0, 0, 0); + if(!box64_dynarec_fastround) { + MRS_fpsr(x5); + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MSR_fpsr(x5); + } + FCVTZSxwS(gd, d0); + if(!box64_dynarec_fastround) { + MRS_fpsr(x5); // get back FPSR to check the IOC bit + TBZ_NEXT(x5, FPSR_IOC); + if(rex.w) { + ORRx_mask(gd, xZR, 1, 1, 0); //0x8000000000000000 + } else { + ORRw_mask(gd, xZR, 1, 0); //0x80000000 + } + } + break; + case 0x2D: + INST_NAME("VCVTSS2SI Gd, Ex"); + nextop = F8; + GETGD; + GETEXSS(q0, 0, 0); + if(!box64_dynarec_fastround) { + MRS_fpsr(x5); + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MSR_fpsr(x5); + } + u8 = sse_setround(dyn, ninst, x1, x2, x3); + d1 = fpu_get_scratch(dyn, ninst); + FRINTIS(d1, q0); + x87_restoreround(dyn, ninst, u8); + FCVTZSxwS(gd, d1); + if(!box64_dynarec_fastround) { + MRS_fpsr(x5); // get back FPSR to check the IOC bit + TBZ_NEXT(x5, FPSR_IOC); + if(rex.w) { + ORRx_mask(gd, xZR, 1, 1, 0); //0x8000000000000000 + } else { + ORRw_mask(gd, xZR, 1, 0); //0x80000000 + } + } + break; + + case 0x58: + INST_NAME("VADDSS Gx, Vx, Ex"); + nextop = F8; + d1 = fpu_get_scratch(dyn, ninst); + GETGX_empty_VX(v0, v2); + GETEXSS(v1, 0, 0); + if(v0!=v2) { + if(v0==v1) { + VMOV(d1, v1); + v1 = d1; + } + VMOVQ(v0, v2); + } + FADDS(d1, v0, v1); + VMOVeS(v0, 0, d1, 0); + YMM0(gd) + break; + case 0x59: + INST_NAME("VMULSS Gx, Vx, Ex"); + nextop = F8; + d1 = fpu_get_scratch(dyn, ninst); + GETGX_empty_VX(v0, v2); + GETEXSS(v1, 0, 0); + if(v0!=v2) { + if(v0==v1) { + VMOV(d1, v1); + v1 = d1; + } + VMOVQ(v0, v2); + } + FMULS(d1, v0, v1); + VMOVeS(v0, 0, d1, 0); + YMM0(gd) + break; + case 0x5A: + INST_NAME("VCVTSS2SD Gx, Vx, Ex"); + nextop = F8; + d1 = fpu_get_scratch(dyn, ninst); + GETGX_empty_VX(v0, v2); + GETEXSS(v1, 0, 0); + if(v0!=v2) { + if(v0==v1) { + VMOV(d1, v1); + v1 = d1; + } + VMOVQ(v0, v2); + } + FCVT_D_S(d1, v1); + VMOVeD(v0, 0, d1, 0); + YMM0(gd); + break; + case 0x5B: + INST_NAME("VCVTTPS2DQ Gx, Ex"); + nextop = F8; + d0 = fpu_get_scratch(dyn, ninst); + if(!box64_dynarec_fastround) { + MRS_fpsr(x5); + ORRw_mask(x4, xZR, 1, 0); //0x80000000 + } + for(int l=0; l<1+vex.l; ++l) { + if(!l) { GETGX_empty_EX(v0, v1, 0); } else { GETGY_empty_EY(v0, v1); } + if(box64_dynarec_fastround) { + VFCVTZSQS(v0, v1); + } else { + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MSR_fpsr(x5); + for(int i=0; i<4; ++i) { + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MSR_fpsr(x5); + VMOVeS(d0, 0, v1, i); + VFCVTZSs(d0, d0); + MRS_fpsr(x5); // get back FPSR to check the IOC bit + TBZ(x5, FPSR_IOC, 4+4); + VMOVQSfrom(d0, 0, x4); + VMOVeS(v0, i, d0, 0); + } + } + } + if(!vex.l) YMM0(gd); + break; + case 0x5C: + INST_NAME("VSUBSS Gx, Vx, Ex"); + nextop = F8; + d1 = fpu_get_scratch(dyn, ninst); + GETGX_empty_VX(v0, v2); + GETEXSS(v1, 0, 0); + if(v0!=v2) { + if(v0==v1) { + VMOV(d1, v1); + v1 = d1; + } + VMOVQ(v0, v2); + } + FSUBS(d1, v0, v1); + VMOVeS(v0, 0, d1, 0); + YMM0(gd) + break; + case 0x5D: + INST_NAME("VMINSS Gx, Vx, Ex"); + nextop = F8; + d1 = fpu_get_scratch(dyn, ninst); + GETGX_empty_VX(v0, v2); + GETEXSS(v1, 0, 0); + if(v0!=v2) { + if(v0==v1) { + VMOV(d1, v1); + v1 = d1; + } + VMOVQ(v0, v2); + } + FCMPS(v0, v1); + B_NEXT(cLS); //Less than or equal + VMOVeS(v0, 0, v1, 0); // to not erase uper part + YMM0(gd) + break; + case 0x5E: + INST_NAME("VDIVSS Gx, Vx, Ex"); + nextop = F8; + d1 = fpu_get_scratch(dyn, ninst); + GETGX_empty_VX(v0, v2); + GETEXSS(v1, 0, 0); + if(v0!=v2) { + if(v0==v1) { + VMOV(d1, v1); + v1 = d1; + } + VMOVQ(v0, v2); + } + FDIVS(d1, v0, v1); + VMOVeS(v0, 0, d1, 0); + YMM0(gd) + break; + case 0x5F: + INST_NAME("VMAXSS Gx, Vx, Ex"); + nextop = F8; + d1 = fpu_get_scratch(dyn, ninst); + GETGX_empty_VX(v0, v2); + GETEXSS(v1, 0, 0); + if(v0!=v2) { + if(v0==v1) { + VMOV(d1, v1); + v1 = d1; + } + VMOVQ(v0, v2); + } + FCMPS(v0, v1); + B_NEXT(cGE); //Greater than or equal + VMOVeS(v0, 0, v1, 0); // to not erase uper part + YMM0(gd) + break; + case 0x6F: INST_NAME("VMOVDQU Gx, Ex");// no alignment constraint on NEON here, so same as MOVDQA nextop = F8; diff --git a/src/dynarec/arm64/dynarec_arm64_f30f.c b/src/dynarec/arm64/dynarec_arm64_f30f.c index 96fe7543..a3144f21 100644 --- a/src/dynarec/arm64/dynarec_arm64_f30f.c +++ b/src/dynarec/arm64/dynarec_arm64_f30f.c @@ -291,7 +291,6 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } } break; - case 0x5C: INST_NAME("SUBSS Gx, Ex"); nextop = F8; diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index 8fd918d6..45296269 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -490,8 +490,13 @@ GETEX_Y(ex, 0, D); \ GETGX_empty(gx) +// Get empty GX, and non-writen EX +#define GETGX_empty_EX(gx, ex, D) \ + GETEX_Y(ex, 0, D); \ + GETGX_empty(gx) + // Get empty GX, and non-writen VX -#define GETGX_empty_VX(gx, vx, D) \ +#define GETGX_empty_VX(gx, vx) \ GETVX(vx, 0); \ GETGX_empty(gx) @@ -501,6 +506,11 @@ GETEX_Y(ex, 1, D); \ GETGX(gx, 0) +#define GETGXVXEX_empty(gx, vx, ex, D) \ + GETVX(vx, 0); \ + GETGX(gx, 0); \ + GETEX_empty_Y(ex, D); + // Get empty GY, and non-writen VY and EY #define GETGY_empty_VYEY(gy, vy, ey) \ vy = ymm_get_reg(dyn, ninst, x1, vex.v, 0, gd, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1); \ @@ -554,6 +564,17 @@ a = fpu_get_scratch(dyn, ninst); \ VLD128(a, ed, fixedaddress); \ } +// Get EX as a quad, (x3 is used) +#define GETEX_empty_Y(a, D) \ + if(MODREG) { \ + a = sse_get_reg_empty(dyn, ninst, x3, (nextop&7)+(rex.b<<3)); \ + } else { \ + WILLWRITE2(); \ + addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, D); \ + unscaled = 0; \ + a = fpu_get_scratch(dyn, ninst); \ + VLD128(a, ed, fixedaddress); \ + } // Get EX as a quad, (x1 is used) #define GETEX(a, w, D) \ diff --git a/src/emu/x64runavxf30f.c b/src/emu/x64runavxf30f.c index 511463fd..a6cd93d5 100644 --- a/src/emu/x64runavxf30f.c +++ b/src/emu/x64runavxf30f.c @@ -63,7 +63,7 @@ uintptr_t RunAVX_F30F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) switch(opcode) { - case 0x10: /* VMOVSS Gx Ex */ + case 0x10: /* VMOVSS Gx, [Vx,] Ex */ nextop = F8; GETEX(0); GETGX; @@ -78,7 +78,7 @@ uintptr_t RunAVX_F30F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) GETGY; GY->u128 = 0; break; - case 0x11: /* MOVSS Ex Gx */ + case 0x11: /* VMOVSS Ex, [Vx,] Gx */ nextop = F8; GETEX(0); GETGX; |