diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/arm64_emitter.h | 1 | ||||
| -rw-r--r-- | src/dynarec/arm64/arm64_printer.c | 8 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_0f.c | 97 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_660f.c | 87 |
4 files changed, 175 insertions, 18 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index 02ba2097..e27011db 100644 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -1360,6 +1360,7 @@ #define VFRINTRDQ(Vd,Vn, mode) EMIT(FRINT_vector(1, 0, (mode)&1, 1, ((mode)>>1)&1, Vn, Vd)) // round with mode, mode is 0 = TieEven, 1=+inf, 2=-inf, 3=zero #define VFRINTRSQ(Vd,Vn, mode) EMIT(FRINT_vector(1, 0, (mode)&1, 0, ((mode)>>1)&1, Vn, Vd)) +#define VFRINTRS(Vd, Vn, mode) EMIT(FRINT_vector(0, 0, (mode)&1, 0, ((mode)>>1)&1, Vn, Vd)) #define FRINTI_scalar(type, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | 0b001<<18 | 0b111<<15 | 0b10000<<10 | (Rn)<<5 | (Rd)) #define FRINTIS(Sd, Sn) EMIT(FRINTI_scalar(0b00, Sn, Sd)) diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c index 2806497b..705314df 100644 --- a/src/dynarec/arm64/arm64_printer.c +++ b/src/dynarec/arm64/arm64_printer.c @@ -1302,6 +1302,14 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "FCVT%sS %s, %c%d", roundings[a.c], sf?Xt[Rd]:Wt[Rd], s, Rn); return buff; } + if(isMask(opcode, "0QU01110of100001101o10nnnnnddddd", &a)) { + const char* Y[] = {"2S", "4S", "??", "2D"}; + const char* Z[] = {"S", "S", "??", "D"}; + const char* Vd = Y[(sf<<1) | a.Q]; + const char* roundings[] = {"N", "M", "P", "Z"}; + snprintf(buff, sizeof(buff), "VFCVT%s%s%s%s V%d.%s, V%d.%s", roundings[option], a.U?"U":"S", a.Q?"Q":"", Z[(sf<<1)|a.Q], Rd, Vd, Rn, Vd); + return buff; + } // FMOV if(isMask(opcode, "00011110pp100000010000nnnnnddddd", &a)) { diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index a3ef9ef7..d1103c4e 100644 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -302,7 +302,17 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SMWRITE2(); } break; - + case 0x2A: + INST_NAME("CVTPI2PS Gx,Em"); + nextop = F8; + GETGX(v0, 1); + GETEM(q1, 0); + d0 = fpu_get_scratch(dyn); + u8 = sse_setround(dyn, ninst, x1, x2, x3); + SCVTFS(d0, q1); + x87_restoreround(dyn, ninst, u8); + VMOVeD(v0, 0, d0, 0); + break; case 0x2B: INST_NAME("MOVNTPS Ex,Gx"); nextop = F8; @@ -317,7 +327,69 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VST128(v0, ed, fixedaddress); } break; - + case 0x2C: + INST_NAME("CVTTPS2PI Gm,Ex"); + nextop = F8; + GETGM(q0); + GETEX(v1, 0, 0); + if (box64_dynarec_fastround) { + VFCVTZSS(q0, v1); + } else { + MRS_fpsr(x5); + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MSR_fpsr(x5); + ORRw_mask(x2, xZR, 1, 0); //0x80000000 + d0 = fpu_get_scratch(dyn); + for (int i=0; i<2; ++i) { + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + if (i) { + VMOVeS(d0, 0, v1, i); + FRINTZS(d0, d0); + } else { + FRINTZS(d0, v1); + } + FCVTZSwS(x1, d0); + MRS_fpsr(x5); // get back FPSR to check the IOC bit + TBZ(x5, FPSR_IOC, 4+4); + MOVw_REG(x1, x2); + VMOVQSfrom(q0, i, x1); + } + } + break; + case 0x2D: + INST_NAME("CVTPS2PI Gm, Ex"); + nextop = F8; + GETGM(q0); + GETEX(v1, 0, 0); + if (box64_dynarec_fastround) { + u8 = sse_setround(dyn, ninst, x1, x2, x3); + VFRINTIS(q0, v1); + x87_restoreround(dyn, ninst, u8); + VFCVTZSS(q0, q0); + } else { + u8 = sse_setround(dyn, ninst, x1, x2, x3); + MRS_fpsr(x5); + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MSR_fpsr(x5); + ORRw_mask(x2, xZR, 1, 0); //0x80000000 + d0 = fpu_get_scratch(dyn); + for (int i=0; i<2; ++i) { + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + if (i) { + VMOVeS(d0, 0, v1, i); + FRINTIS(d0, d0); + } else { + FRINTIS(d0, v1); + } + FCVTZSwS(x1, d0); + MRS_fpsr(x5); // get back FPSR to check the IOC bit + TBZ(x5, FPSR_IOC, 4+4); + MOVw_REG(x1, x2); + VMOVQSfrom(q0, i, x1); + } + x87_restoreround(dyn, ninst, u8); + } + break; case 0x2E: // no special check... case 0x2F: @@ -2039,21 +2111,12 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GETEM(q1, 0); d0 = fpu_get_scratch(dyn); d1 = fpu_get_scratch(dyn); - USHR_64(d1, q1, 7); - MOV32w(x1, 1); - VDUPB(d0, x1); - VAND(d1, d1, d0); - MOV32w(x1, 0xff); - VDUPB(d0, x1); - VMUL_8(d0, d0, d1); // d0 = byte selection bitmask - VAND(d1, q0, d0); // d1 = masked Gm - LDx(x1, xRDI, 0); // x1 = [rdi] - VMOVQDto(x2, d0, 0); - MVNx_REG(x2, x2); - ANDx_REG(x1, x1, x2); // x1 = clear selected bytes - VMOVQDto(x2, d1, 0); - ORRx_REG(x1, x1, x2); - STx(x1, xRDI, 0); + VSSHR_8(d1, q1, 7); // d1 = byte slection mask + VLDR64_U12(d0, xRDI, 0); + VBIC(d0, d0, d1); // d0 = clear masked byte + VAND(d1, q0, d1); // d1 = masked Gm + VORR(d0, d0, d1); + VSTR64_U12(d0, xRDI, 0); break; case 0xF8: INST_NAME("PSUBB Gm, Em"); diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index 9f30df50..30cfc937 100644 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -201,7 +201,92 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n SMWRITE2(); } break; - + case 0x2A: + INST_NAME("CVTPI2PD Gx,Em"); + nextop = F8; + GETGX(v0, 1); + GETEM(q1, 0); + SXTL_32(v0, q1); + SCVTQFD(v0, v0); + break; + case 0x2B: + INST_NAME("MOVNTPD Ex,Gx"); + nextop = F8; + GETGX(v0, 0); + if(MODREG) { + ed = (nextop&7)+(rex.b<<3); + v1 = sse_get_reg_empty(dyn, ninst, x1, ed); + VMOVQ(v1, v0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, 0); + VST128(v0, ed, fixedaddress); + } + break; + case 0x2C: + INST_NAME("CVTTPD2PI Gm,Ex"); + nextop = F8; + GETGM(q0); + GETEX(v1, 0, 0); + if(box64_dynarec_fastround) { + VFCVTZSQD(q0, v1); + SQXTN_32(q0, q0); + } else { + MRS_fpsr(x5); + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MSR_fpsr(x5); + ORRw_mask(x2, xZR, 1, 0); //0x80000000 + d0 = fpu_get_scratch(dyn); + for (int i=0; i<2; ++i) { + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + if (i) { + VMOVeD(d0, 0, v1, i); + FRINTZD(d0, d0); + } else { + FRINTZD(d0, v1); + } + FCVTZSwD(x1, d0); + MRS_fpsr(x5); // get back FPSR to check the IOC bit + TBZ(x5, FPSR_IOC, 4+4); + MOVw_REG(x1, x2); + VMOVQSfrom(q0, i, x1); + } + } + break; + case 0x2D: + INST_NAME("CVTPD2PI Gm,Ex"); + nextop = F8; + GETGM(q0); + GETEX(v1, 0, 0); + if(box64_dynarec_fastround) { + u8 = sse_setround(dyn, ninst, x1, x2, x3); + VFRINTIDQ(q0, v1); + FCVTXN(q0, q0); + x87_restoreround(dyn, ninst, u8); + VFCVTZSS(q0, q0); + } else { + u8 = sse_setround(dyn, ninst, x1, x2, x3); + MRS_fpsr(x5); + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MSR_fpsr(x5); + ORRw_mask(x2, xZR, 1, 0); //0x80000000 + d0 = fpu_get_scratch(dyn); + for (int i=0; i<2; ++i) { + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + if (i) { + VMOVeD(d0, 0, v1, i); + FRINTID(d0, d0); + } else { + FRINTID(d0, v1); + } + FCVTZSwD(x1, d0); + MRS_fpsr(x5); // get back FPSR to check the IOC bit + TBZ(x5, FPSR_IOC, 4+4); + MOVw_REG(x1, x2); + VMOVQSfrom(q0, i, x1); + } + x87_restoreround(dyn, ninst, u8); + } + break; case 0x2E: // no special check... case 0x2F: |