diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-06-01 18:03:24 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-06-01 18:03:24 +0200 |
| commit | c6814a6f975a5aa796769cdcd4554a069c30e699 (patch) | |
| tree | 0742c284b08ba7244570e818d04b4bd97c4787b9 /src/dynarec | |
| parent | 9dbd7fc7e0615f48c9287305610bf9aed1cc24f3 (diff) | |
| download | box64-c6814a6f975a5aa796769cdcd4554a069c30e699.tar.gz box64-c6814a6f975a5aa796769cdcd4554a069c30e699.zip | |
[ARM64_DYNAREC] Added AVX.0F 28/29/2B/2E/2F/54-56/5A-5F opcodes, plus various small fixes
Diffstat (limited to 'src/dynarec')
| -rw-r--r-- | src/dynarec/arm64/arm64_emitter.h | 2 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_0f.c | 190 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.h | 16 |
3 files changed, 201 insertions, 7 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index 9552f5de..050eece5 100644 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -1483,7 +1483,7 @@ int convert_bitmask(uint64_t bitmask); #define VUZP1Q_64(Rt, Rn, Rm) EMIT(UZP_gen(1, 0b11, Rm, 0, Rn, Rt)) #define VUZP2Q_64(Rt, Rn, Rm) EMIT(UZP_gen(1, 0b11, Rm, 1, Rn, Rt)) -#define BITBIF_gen(Q, opc2, Rm, Rn, Rd) ((Q)<<30 | 0b101110101<<21 | (Rm)<<16 | 0b000111<<10 | (Rn)<<5 | (Rd)) +#define BITBIF_gen(Q, opc2, Rm, Rn, Rd) ((Q)<<30 | 0b101110<<24 | (opc2)<<22 | 1<<21 | (Rm)<<16 | 0b000111<<10 | (Rn)<<5 | (Rd)) // Bitwise insert Vn in Vd if Vm is "0" #define VBIF(Vd, Vn,Vm) EMIT(BITBIF_gen(0, 0b11, Vm, Vn, Vd)) // Bitwise insert Vn in Vd if Vm is "0" diff --git a/src/dynarec/arm64/dynarec_arm64_avx_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_0f.c index e7ccee1e..2acb719a 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_0f.c @@ -176,6 +176,120 @@ uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int } else YMM0(gd); break; + case 0x28: + INST_NAME("VMOVAPS Gx, Ex"); + nextop = F8; + GETG; + if(MODREG) { + ed = (nextop&7)+(rex.b<<3); + v1 = sse_get_reg(dyn, ninst, x1, ed, 0); + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + VMOVQ(v0, v1); + if(vex.l) { + GETGY_empty_EY(v0, v1); + VMOVQ(v0, v1); + } + } else { + v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0); + VLDR128_U12(v0, ed, fixedaddress); + if(vex.l) { + GETGY_empty(v0, -1, -1, -1); + VLDR128_U12(v0, ed, fixedaddress+16); + } + } + if(!vex.l) YMM0(gd); + break; + case 0x29: + INST_NAME("VMOVAPS Ex, Gx"); + nextop = F8; + GETG; + v0 = sse_get_reg(dyn, ninst, x1, gd, 0); + if(MODREG) { + ed = (nextop&7)+(rex.b<<3); + v1 = sse_get_reg_empty(dyn, ninst, x1, ed); + VMOVQ(v1, v0); + if(vex.l) { + GETGYEY_empty(v0, v1); + VMOVQ(v1, v0); + } + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0); + VSTR128_U12(v0, ed, fixedaddress); + if(vex.l) { + GETGY(v0, 0, -1, -1, -1); + VSTR128_U12(v0, ed, fixedaddress+16); + } + SMWRITE2(); + } + break; + + case 0x2B: + INST_NAME("VMOVNTPS Ex, Gx"); + nextop = F8; + GETG; + v0 = sse_get_reg(dyn, ninst, x1, gd, 0); + if(MODREG) { + ed = (nextop&7)+(rex.b<<3); + v1 = sse_get_reg_empty(dyn, ninst, x1, ed); + VMOVQ(v1, v0); + if(vex.l) { + GETGYEY_empty(v0, v1); + VMOVQ(v1, v0); + } + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0); + VSTR128_U12(v0, ed, fixedaddress); + if(vex.l) { + GETGY(v0, 0, -1, -1, -1); + VSTR128_U12(v0, ed, fixedaddress+16); + } + } + break; + + case 0x2E: + // no special check... + case 0x2F: + if(opcode==0x2F) {INST_NAME("VCOMISS Gx, Ex");} else {INST_NAME("VUCOMISS Gx, Ex");} + SETFLAGS(X_ALL, SF_SET_NODF); + nextop = F8; + GETGX(v0, 0); + GETEXSS(s0, 0, 0); + FCMPS(v0, s0); + FCOMI(x1, x2); + break; + + case 0x54: + INST_NAME("VANDPS Gx, Vx, Ex"); + nextop = F8; + GETGX_empty_VXEX(v0, v2, v1, 0); + VANDQ(v0, v2, v1); + if(vex.l) { + GETGY_empty_VYEY(v0, v2, v1); + VANDQ(v0, v2, v1); + } else YMM0(gd) + break; + case 0x55: + INST_NAME("VANDNPS Gx, Vx, Ex"); + nextop = F8; + GETGX_empty_VXEX(v0, v2, v1, 0); + VBICQ(v0, v1, v2); + if(vex.l) { + GETGY_empty_VYEY(v0, v2, v1); + VBICQ(v0, v1, v2); + } else YMM0(gd) + break; + case 0x56: + INST_NAME("VORPS Gx, Vx, Ex"); + nextop = F8; + GETGX_empty_VXEX(v0, v2, v1, 0); + VORRQ(v0, v2, v1); + if(vex.l) { + GETGY_empty_VYEY(v0, v2, v1); + VORRQ(v0, v2, v1); + } else YMM0(gd) + break; case 0x57: INST_NAME("VXORPS Gx, Vx, Ex"); nextop = F8; @@ -206,6 +320,82 @@ uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int VFMULQS(v0, v2, v1); } else YMM0(gd) break; + case 0x5A: + INST_NAME("VCVTPS2PD Gx, Ex"); + nextop = F8; + GETGX_empty_EX(v0, v1, 0); + if(vex.l) { + GETGY_empty(q0, -1, -1, -1); + FCVTL2(q0, v1); + } else YMM0(gd) + FCVTL(v0, v1); + break; + case 0x5B: + INST_NAME("VCVTDQ2PS Gx, Ex"); + nextop = F8; + GETGX_empty_EX(v0, v1, 0); + SCVTQFS(v0, v1); + if(vex.l) { + GETGY_empty_EY(v0, v1); + SCVTQFS(v0, v1); + } else YMM0(gd) + break; + case 0x5C: + INST_NAME("VSUBPS Gx, Vx, Ex"); + nextop = F8; + GETGX_empty_VXEX(v0, v2, v1, 0); + VFSUBQS(v0, v2, v1); + if(vex.l) { + GETGY_empty_VYEY(v0, v2, v1); + VFSUBQS(v0, v2, v1); + } else YMM0(gd) + break; + case 0x5D: + INST_NAME("VMINPS Gx, Vx, Ex"); + nextop = F8; + if(!box64_dynarec_fastnan) { + q0 = fpu_get_scratch(dyn, ninst); + } + for(int l=0; l<1+vex.l; ++l) { + if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); } + // FMIN/FMAX wll not copy a NaN if either is NaN + // but x86 will copy src2 if either value is NaN, so lets force a copy of Src2 (Ex) if result is NaN + VFMINQS(v0, v2, v1); + if(!box64_dynarec_fastnan && (v2!=v1)) { + VFCMEQQS(q0, v0, v0); // 0 is NaN, 1 is not NaN, so MASK for NaN + VBIFQ(v0, v1, q0); // copy dest where source is NaN + } + } + if(!vex.l) YMM0(gd); + break; + case 0x5E: + INST_NAME("VDIVPS Gx, Vx, Ex"); + nextop = F8; + GETGX_empty_VXEX(v0, v2, v1, 0); + VFDIVQS(v0, v2, v1); + if(vex.l) { + GETGY_empty_VYEY(v0, v2, v1); + VFDIVQS(v0, v2, v1); + } else YMM0(gd) + break; + case 0x5F: + INST_NAME("VMAXPS Gx, Vx, Ex"); + nextop = F8; + if(!box64_dynarec_fastnan) { + q0 = fpu_get_scratch(dyn, ninst); + } + for(int l=0; l<1+vex.l; ++l) { + if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); } + // FMIN/FMAX wll not copy a NaN if either is NaN + // but x86 will copy src2 if either value is NaN, so lets force a copy of Src2 (Ex) if result is NaN + VFMAXQS(v0, v2, v1); + if(!box64_dynarec_fastnan && (v2!=v1)) { + VFCMEQQS(q0, v0, v0); // 0 is NaN, 1 is not NaN, so MASK for NaN + VBIFQ(v0, v1, q0); // copy dest where source is NaN + } + } + if(!vex.l) YMM0(gd); + break; case 0x77: INST_NAME("VZEROUPPER"); diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index 45296269..3e4c605d 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -534,16 +534,22 @@ if(MODREG) \ ey = ymm_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 1, gd, -1, -1); \ else \ - VLD128(ey, ed, fixedaddress+16); \ + VLDR128_U12(ey, ed, fixedaddress+16); \ gy = ymm_get_reg(dyn, ninst, x1, gd, 0, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1, -1) +// Get empty EY and non-writen GY +#define GETGYEY_empty(gy, ey) \ + gy = ymm_get_reg(dyn, ninst, x1, gd, 0, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1, -1); \ + if(MODREG) \ + ey = ymm_get_reg_empty(dyn, ninst, x1, (nextop&7)+(rex.b<<3), gd, -1, -1) + // Get empty GY, and non-writen EY #define GETGY_empty_EY(gy, ey) \ if(MODREG) \ ey = ymm_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0, gd, -1, -1); \ else \ - VLD128(ey, ed, fixedaddress+16); \ - gy = ymm_get_reg_empty(dyn, ninst, x1, gd, -1, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1) + VLDR128_U12(ey, ed, fixedaddress+16); \ + gy = ymm_get_reg_empty(dyn, ninst, x1, gd, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1, -1) // Get empty VY, and non-writen EY #define GETVY_empty_EY(vy, ey) \ @@ -551,7 +557,7 @@ ey = ymm_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0, vex.v, -1, -1); \ else \ VLD128(ey, ed, fixedaddress+16); \ - vy = ymm_get_reg_empty(dyn, ninst, x1, vex.v, -1, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1) + vy = ymm_get_reg_empty(dyn, ninst, x1, vex.v, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1, -1) // Get EX as a quad, (x3 is used) #define GETEX_Y(a, w, D) \ @@ -572,8 +578,6 @@ WILLWRITE2(); \ addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, D); \ unscaled = 0; \ - a = fpu_get_scratch(dyn, ninst); \ - VLD128(a, ed, fixedaddress); \ } // Get EX as a quad, (x1 is used) |