diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2023-08-25 13:22:04 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-08-25 13:22:04 +0200 |
| commit | 0393e15843bb484bb65255a6b02a52fc2498c9fb (patch) | |
| tree | b182392749e3315018f6058d0ee6a8f3a80b50c2 | |
| parent | 30475f0edadb7e7d3af5d9ffb66b7b9b2c7299f1 (diff) | |
| parent | 539fb8e2149826ff785747f2f4524051cafd927d (diff) | |
| download | box64-0393e15843bb484bb65255a6b02a52fc2498c9fb.tar.gz box64-0393e15843bb484bb65255a6b02a52fc2498c9fb.zip | |
Merge pull request #951 from wannacu/main
Added (66) 0F 2A/2C/2D opcodes
| -rw-r--r-- | src/dynarec/arm64/arm64_emitter.h | 1 | ||||
| -rw-r--r-- | src/dynarec/arm64/arm64_printer.c | 8 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_0f.c | 97 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_660f.c | 87 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 61 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f.c | 58 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 37 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_private.h | 4 |
8 files changed, 322 insertions, 31 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index 02ba2097..e27011db 100644 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -1360,6 +1360,7 @@ #define VFRINTRDQ(Vd,Vn, mode) EMIT(FRINT_vector(1, 0, (mode)&1, 1, ((mode)>>1)&1, Vn, Vd)) // round with mode, mode is 0 = TieEven, 1=+inf, 2=-inf, 3=zero #define VFRINTRSQ(Vd,Vn, mode) EMIT(FRINT_vector(1, 0, (mode)&1, 0, ((mode)>>1)&1, Vn, Vd)) +#define VFRINTRS(Vd, Vn, mode) EMIT(FRINT_vector(0, 0, (mode)&1, 0, ((mode)>>1)&1, Vn, Vd)) #define FRINTI_scalar(type, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | 0b001<<18 | 0b111<<15 | 0b10000<<10 | (Rn)<<5 | (Rd)) #define FRINTIS(Sd, Sn) EMIT(FRINTI_scalar(0b00, Sn, Sd)) diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c index 2806497b..705314df 100644 --- a/src/dynarec/arm64/arm64_printer.c +++ b/src/dynarec/arm64/arm64_printer.c @@ -1302,6 +1302,14 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "FCVT%sS %s, %c%d", roundings[a.c], sf?Xt[Rd]:Wt[Rd], s, Rn); return buff; } + if(isMask(opcode, "0QU01110of100001101o10nnnnnddddd", &a)) { + const char* Y[] = {"2S", "4S", "??", "2D"}; + const char* Z[] = {"S", "S", "??", "D"}; + const char* Vd = Y[(sf<<1) | a.Q]; + const char* roundings[] = {"N", "M", "P", "Z"}; + snprintf(buff, sizeof(buff), "VFCVT%s%s%s%s V%d.%s, V%d.%s", roundings[option], a.U?"U":"S", a.Q?"Q":"", Z[(sf<<1)|a.Q], Rd, Vd, Rn, Vd); + return buff; + } // FMOV if(isMask(opcode, "00011110pp100000010000nnnnnddddd", &a)) { diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index a3ef9ef7..d1103c4e 100644 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -302,7 +302,17 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SMWRITE2(); } break; - + case 0x2A: + INST_NAME("CVTPI2PS Gx,Em"); + nextop = F8; + GETGX(v0, 1); + GETEM(q1, 0); + d0 = fpu_get_scratch(dyn); + u8 = sse_setround(dyn, ninst, x1, x2, x3); + SCVTFS(d0, q1); + x87_restoreround(dyn, ninst, u8); + VMOVeD(v0, 0, d0, 0); + break; case 0x2B: INST_NAME("MOVNTPS Ex,Gx"); nextop = F8; @@ -317,7 +327,69 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VST128(v0, ed, fixedaddress); } break; - + case 0x2C: + INST_NAME("CVTTPS2PI Gm,Ex"); + nextop = F8; + GETGM(q0); + GETEX(v1, 0, 0); + if (box64_dynarec_fastround) { + VFCVTZSS(q0, v1); + } else { + MRS_fpsr(x5); + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MSR_fpsr(x5); + ORRw_mask(x2, xZR, 1, 0); //0x80000000 + d0 = fpu_get_scratch(dyn); + for (int i=0; i<2; ++i) { + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + if (i) { + VMOVeS(d0, 0, v1, i); + FRINTZS(d0, d0); + } else { + FRINTZS(d0, v1); + } + FCVTZSwS(x1, d0); + MRS_fpsr(x5); // get back FPSR to check the IOC bit + TBZ(x5, FPSR_IOC, 4+4); + MOVw_REG(x1, x2); + VMOVQSfrom(q0, i, x1); + } + } + break; + case 0x2D: + INST_NAME("CVTPS2PI Gm, Ex"); + nextop = F8; + GETGM(q0); + GETEX(v1, 0, 0); + if (box64_dynarec_fastround) { + u8 = sse_setround(dyn, ninst, x1, x2, x3); + VFRINTIS(q0, v1); + x87_restoreround(dyn, ninst, u8); + VFCVTZSS(q0, q0); + } else { + u8 = sse_setround(dyn, ninst, x1, x2, x3); + MRS_fpsr(x5); + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MSR_fpsr(x5); + ORRw_mask(x2, xZR, 1, 0); //0x80000000 + d0 = fpu_get_scratch(dyn); + for (int i=0; i<2; ++i) { + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + if (i) { + VMOVeS(d0, 0, v1, i); + FRINTIS(d0, d0); + } else { + FRINTIS(d0, v1); + } + FCVTZSwS(x1, d0); + MRS_fpsr(x5); // get back FPSR to check the IOC bit + TBZ(x5, FPSR_IOC, 4+4); + MOVw_REG(x1, x2); + VMOVQSfrom(q0, i, x1); + } + x87_restoreround(dyn, ninst, u8); + } + break; case 0x2E: // no special check... case 0x2F: @@ -2039,21 +2111,12 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GETEM(q1, 0); d0 = fpu_get_scratch(dyn); d1 = fpu_get_scratch(dyn); - USHR_64(d1, q1, 7); - MOV32w(x1, 1); - VDUPB(d0, x1); - VAND(d1, d1, d0); - MOV32w(x1, 0xff); - VDUPB(d0, x1); - VMUL_8(d0, d0, d1); // d0 = byte selection bitmask - VAND(d1, q0, d0); // d1 = masked Gm - LDx(x1, xRDI, 0); // x1 = [rdi] - VMOVQDto(x2, d0, 0); - MVNx_REG(x2, x2); - ANDx_REG(x1, x1, x2); // x1 = clear selected bytes - VMOVQDto(x2, d1, 0); - ORRx_REG(x1, x1, x2); - STx(x1, xRDI, 0); + VSSHR_8(d1, q1, 7); // d1 = byte slection mask + VLDR64_U12(d0, xRDI, 0); + VBIC(d0, d0, d1); // d0 = clear masked byte + VAND(d1, q0, d1); // d1 = masked Gm + VORR(d0, d0, d1); + VSTR64_U12(d0, xRDI, 0); break; case 0xF8: INST_NAME("PSUBB Gm, Em"); diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index 9f30df50..30cfc937 100644 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -201,7 +201,92 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n SMWRITE2(); } break; - + case 0x2A: + INST_NAME("CVTPI2PD Gx,Em"); + nextop = F8; + GETGX(v0, 1); + GETEM(q1, 0); + SXTL_32(v0, q1); + SCVTQFD(v0, v0); + break; + case 0x2B: + INST_NAME("MOVNTPD Ex,Gx"); + nextop = F8; + GETGX(v0, 0); + if(MODREG) { + ed = (nextop&7)+(rex.b<<3); + v1 = sse_get_reg_empty(dyn, ninst, x1, ed); + VMOVQ(v1, v0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, 0); + VST128(v0, ed, fixedaddress); + } + break; + case 0x2C: + INST_NAME("CVTTPD2PI Gm,Ex"); + nextop = F8; + GETGM(q0); + GETEX(v1, 0, 0); + if(box64_dynarec_fastround) { + VFCVTZSQD(q0, v1); + SQXTN_32(q0, q0); + } else { + MRS_fpsr(x5); + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MSR_fpsr(x5); + ORRw_mask(x2, xZR, 1, 0); //0x80000000 + d0 = fpu_get_scratch(dyn); + for (int i=0; i<2; ++i) { + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + if (i) { + VMOVeD(d0, 0, v1, i); + FRINTZD(d0, d0); + } else { + FRINTZD(d0, v1); + } + FCVTZSwD(x1, d0); + MRS_fpsr(x5); // get back FPSR to check the IOC bit + TBZ(x5, FPSR_IOC, 4+4); + MOVw_REG(x1, x2); + VMOVQSfrom(q0, i, x1); + } + } + break; + case 0x2D: + INST_NAME("CVTPD2PI Gm,Ex"); + nextop = F8; + GETGM(q0); + GETEX(v1, 0, 0); + if(box64_dynarec_fastround) { + u8 = sse_setround(dyn, ninst, x1, x2, x3); + VFRINTIDQ(q0, v1); + FCVTXN(q0, q0); + x87_restoreround(dyn, ninst, u8); + VFCVTZSS(q0, q0); + } else { + u8 = sse_setround(dyn, ninst, x1, x2, x3); + MRS_fpsr(x5); + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MSR_fpsr(x5); + ORRw_mask(x2, xZR, 1, 0); //0x80000000 + d0 = fpu_get_scratch(dyn); + for (int i=0; i<2; ++i) { + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + if (i) { + VMOVeD(d0, 0, v1, i); + FRINTID(d0, d0); + } else { + FRINTID(d0, v1); + } + FCVTZSwD(x1, d0); + MRS_fpsr(x5); // get back FPSR to check the IOC bit + TBZ(x5, FPSR_IOC, 4+4); + MOVw_REG(x1, x2); + VMOVQSfrom(q0, i, x1); + } + x87_restoreround(dyn, ninst, u8); + } + break; case 0x2E: // no special check... case 0x2F: diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index 199068cf..9e225bd2 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -262,7 +262,20 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if(!MODREG) SMWRITE2(); break; - + case 0x2A: + INST_NAME("CVTPI2PS Gx,Em"); + nextop = F8; + GETGX(); + GETEM(x2, 0); + d0 = fpu_get_scratch(dyn); + u8 = sse_setround(dyn, ninst, x4, x5); + for (int i=0; i<2; ++i) { + LW(x3, wback, fixedaddress+i*4); + FCVTSW(d0, x3, RD_DYN); + FSW(d0, gback, gdoffset+i*4); + } + x87_restoreround(dyn, ninst, u8); + break; case 0x2B: INST_NAME("MOVNTPS Ex,Gx"); nextop = F8; @@ -273,6 +286,52 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SD(x3, wback, fixedaddress+0); SD(x4, wback, fixedaddress+8); break; + case 0x2C: + INST_NAME("CVTTPS2PI Gm,Ex"); + nextop = F8; + GETGM(); + GETEX(x2, 0); + d0 = fpu_get_scratch(dyn); + for (int i=0; i<2; ++i) { + if(!box64_dynarec_fastround) { + FSFLAGSI(0); // // reset all bits + } + FLW(d0, wback, fixedaddress+i*4); + FCVTWS(x1, d0, RD_RTZ); + if(!box64_dynarec_fastround) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, (1<<FR_NV)|(1<<FR_OF)); + BEQ_MARKi(x5, xZR, i); + MOV32w(x1, 0x80000000); + MARKi(i); + } + SW(x1, gback, gdoffset+i*4); + } + break; + case 0x2D: + INST_NAME("CVTPS2PI Gm, Ex"); + nextop = F8; + GETGM(); + GETEX(x2, 0); + d0 = fpu_get_scratch(dyn); + u8 = sse_setround(dyn, ninst, x6, x4); + for (int i=0; i<2; ++i) { + if(!box64_dynarec_fastround) { + FSFLAGSI(0); // // reset all bits + } + FLW(d0, wback, fixedaddress+i*4); + FCVTWS(x1, d0, RD_DYN); + if(!box64_dynarec_fastround) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, (1<<FR_NV)|(1<<FR_OF)); + BEQ_MARKi(x5, xZR, i); + MOV32w(x1, 0x80000000); + MARKi(i); + } + SW(x1, gback, gdoffset+i*4); + } + x87_restoreround(dyn, ninst, u8); + break; case 0x2E: // no special check... case 0x2F: diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index c60d214d..7baddf4f 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -148,6 +148,18 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SSE_LOOP_MV_Q2(x3); if(!MODREG) SMWRITE2(); break; + case 0x2A: + INST_NAME("CVTPI2PD Gx,Em"); + nextop = F8; + GETGX(); + GETEM(x2, 0); + d0 = fpu_get_scratch(dyn); + for (int i=0; i<2; ++i) { + LW(x1, wback, fixedaddress+i*4); + FCVTDW(d0, x1, RD_RTZ); + FSD(d0, gback, gdoffset+i*8); + } + break; case 0x2B: INST_NAME("MOVNTPD Ex, Gx"); nextop = F8; @@ -155,6 +167,52 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x2, 0); SSE_LOOP_MV_Q2(x3); break; + case 0x2C: + INST_NAME("CVTTPD2PI Gm,Ex"); + nextop = F8; + GETGM(); + GETEX(x2, 0); + d0 = fpu_get_scratch(dyn); + for (int i=0; i<2; ++i) { + if(!box64_dynarec_fastround) { + FSFLAGSI(0); // // reset all bits + } + FLD(d0, wback, fixedaddress+i*8); + FCVTWD(x1, d0, RD_RTZ); + if(!box64_dynarec_fastround) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, (1<<FR_NV)|(1<<FR_OF)); + BEQ_MARKi(x5, xZR, i); + MOV32w(x1, 0x80000000); + MARKi(i); + } + SW(x1, gback, gdoffset+i*4); + } + break; + case 0x2D: + INST_NAME("CVTPD2PI Gm,Ex"); + nextop = F8; + GETGM(); + GETEX(x2, 0); + d0 = fpu_get_scratch(dyn); + u8 = sse_setround(dyn, ninst, x4, x5); + for (int i=0; i<2; ++i) { + if(!box64_dynarec_fastround) { + FSFLAGSI(0); // // reset all bits + } + FLD(d0, wback, fixedaddress+i*8); + FCVTWD(x1, d0, RD_DYN); + if(!box64_dynarec_fastround) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, (1<<FR_NV)|(1<<FR_OF)); + BEQ_MARKi(x5, xZR, i); + MOV32w(x1, 0x80000000); + MARKi(i); + } + SW(x1, gback, gdoffset+i*4); + } + x87_restoreround(dyn, ninst, u8); + break; case 0x2E: // no special check... case 0x2F: diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 0b1023b3..51d7817f 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -531,16 +531,22 @@ // R0 will not be pushed/popd if ret is -2. Flags are not save/restored #define CALL_S(F, ret) call_c(dyn, ninst, F, x6, ret, 0, 0) -#define MARK dyn->insts[ninst].mark = dyn->native_size -#define GETMARK dyn->insts[ninst].mark -#define MARK2 dyn->insts[ninst].mark2 = dyn->native_size -#define GETMARK2 dyn->insts[ninst].mark2 -#define MARK3 dyn->insts[ninst].mark3 = dyn->native_size -#define GETMARK3 dyn->insts[ninst].mark3 -#define MARKF dyn->insts[ninst].markf = dyn->native_size -#define GETMARKF dyn->insts[ninst].markf -#define MARKF2 dyn->insts[ninst].markf2 = dyn->native_size -#define GETMARKF2 dyn->insts[ninst].markf2 +#define MARKi(i) dyn->insts[ninst].mark[i] = dyn->native_size +#define GETMARKi(i) dyn->insts[ninst].mark[i] +#define MARK MARKi(0) +#define GETMARK GETMARKi(0) +#define MARK2 MARKi(1) +#define GETMARK2 GETMARKi(1) +#define MARK3 MARKi(2) +#define GETMARK3 GETMARKi(2) + +#define MARKFi(i) dyn->insts[ninst].markf[i] = dyn->native_size +#define GETMARKFi(i) dyn->insts[ninst].markf[i] +#define MARKF MARKFi(0) +#define GETMARKF GETMARKFi(0) +#define MARKF2 MARKFi(1) +#define GETMARKF2 GETMARKFi(1) + #define MARKSEG dyn->insts[ninst].markseg = dyn->native_size #define GETMARKSEG dyn->insts[ninst].markseg #define MARKLOCK dyn->insts[ninst].marklock = dyn->native_size @@ -550,20 +556,31 @@ j64 = GET##M - dyn->native_size; \ B##OP (reg1, reg2, j64) +#define Bxx_geni(OP, M, reg1, reg2, i) \ + j64 = GET##M##i(i) - dyn->native_size; \ + B##OP (reg1, reg2, j64) + // Branch to MARK if reg1==reg2 (use j64) #define BEQ_MARK(reg1, reg2) Bxx_gen(EQ, MARK, reg1, reg2) +#define BEQ_MARKi(reg1, reg2, i) Bxx_geni(EQ, MARK, reg1, reg2, i) // Branch to MARK if reg1!=reg2 (use j64) #define BNE_MARK(reg1, reg2) Bxx_gen(NE, MARK, reg1, reg2) +#define BNE_MARKi(reg1, reg2, i) Bxx_geni(NE, MARK, reg1, reg2, i) // Branch to MARK if reg1!=0 (use j64) #define BNEZ_MARK(reg) BNE_MARK(reg, xZR) +#define BNEZ_MARKi(reg, i) BNE_MARKi(reg, xZR, i) // Branch to MARK instruction unconditionnal (use j64) #define B_MARK_nocond Bxx_gen(__, MARK, 0, 0) +#define B_MARKi_nocond Bxx_geni(__, MARK, 0, 0, i) // Branch to MARK if reg1<reg2 (use j64) #define BLT_MARK(reg1, reg2) Bxx_gen(LT, MARK, reg1, reg2) +#define BLT_MARKi(reg1, reg2) Bxx_geni(LT, MARK, reg1, reg2, i) // Branch to MARK if reg1<reg2 (use j64) #define BLTU_MARK(reg1, reg2) Bxx_gen(LTU, MARK, reg1, reg2) +#define BLTU_MARKi(reg1, reg2) Bxx_geni(LTU, MARK, reg1, reg2, i) // Branch to MARK if reg1>=reg2 (use j64) #define BGE_MARK(reg1, reg2) Bxx_gen(GE, MARK, reg1, reg2) +#define BGE_MARKi(reg1, reg2) Bxx_geni(GE, MARK, reg1, reg2, i) // Branch to MARK2 if reg1==reg2 (use j64) #define BEQ_MARK2(reg1, reg2) Bxx_gen(EQ, MARK2, reg1,reg2) // Branch to MARK2 if reg1!=reg2 (use j64) diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h index 99135297..d377309d 100644 --- a/src/dynarec/rv64/dynarec_rv64_private.h +++ b/src/dynarec/rv64/dynarec_rv64_private.h @@ -79,8 +79,8 @@ typedef struct instruction_rv64_s { int size2; // size of the arm emitted instrucion after pass2 int pred_sz; // size of predecessor list int *pred; // predecessor array - uintptr_t mark, mark2, mark3; - uintptr_t markf, markf2; + uintptr_t mark[3]; + uintptr_t markf[2]; uintptr_t markseg; uintptr_t marklock; int pass2choice;// value for choices that are fixed on pass2 for pass3 |