diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2025-01-31 16:50:51 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2025-01-31 16:50:51 +0100 |
| commit | 2239f92816ab68ea99e8a756af1d01360644eb6d (patch) | |
| tree | 417360752c3d374f2540cdb077c8d90688b711e1 /src | |
| parent | 637e177408d5cd7f04e52fb79b6183bee3088988 (diff) | |
| download | box64-2239f92816ab68ea99e8a756af1d01360644eb6d.tar.gz box64-2239f92816ab68ea99e8a756af1d01360644eb6d.zip | |
[ARM64_DYNAREC] Various small fixes and optims in a few AVX opcodes
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_0f.c | 22 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_0f38.c | 72 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f.c | 2 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c | 7 |
4 files changed, 69 insertions, 34 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_0f.c index 9e9e6a76..387faf1b 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_0f.c @@ -643,13 +643,13 @@ uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int 4 -> 5 // Inexact 5 -> 1 // denormal */ - // doing X86 -> ARM here, 0 1 2 3 4 5 -> 0 5 1 2 3 4 + // doing X86 -> ARM here, 0 1 2 3 4 5 -> 0 2 3 4 5 1 if(ed!=x1) - MOVw_REG(x1, ed); - BFXILw(x2, x1, 1, 5); // x2 = 1 2 3 4 5 ... - BFIw(x1, x2, 2, 4); // x1 = 0 1 1 2 3 4 - RORw(x2, x2, 4); // x2 = 5 .... 1 2 3 4 - BFIw(x1, x2, 1, 1); // x1 = 0 5 1 2 3 4 + MOVw_REG(x1, ed); // x1 = 543210 + RORw(x3, x1, 2); // x3 = 10.....5432 + BFIw(x1, x3, 1, 4); // x1 = 54320 + RORw(x3, x3, 32-1); // x3 = 0.....54321 + BFIw(x1, x3, 5, 1); // x1 = 154320 MRS_fpsr(x2); BFIx(x2, x1, 0, 6); MSR_fpsr(x2); @@ -661,12 +661,12 @@ uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int LDRw_U12(x4, xEmu, offsetof(x64emu_t, mxcsr)); if(BOX64ENV(sse_flushto0)) { // sync with fpsr, with mask from mxcsr - // doing ARM -> X86 here, 0 1 2 3 4 5 -> 0 2 3 4 5 1 MRS_fpsr(x1); - RORw(x3, x1, 2); //x3 = 2 3 4 5 .... 0 1 - BFIw(x1, x3, 1, 4); - RORw(x3, x3, 32-1); - BFIw(x1, x3, 5, 1); // x1 is Flags + // doing ARM -> X86 here, 543210 => 432150 + UBFXw(x2, x1, 1, 5); // x2 = 54321 + BFIw(x1, x2, 2, 4); // x1 = 432110 + LSRw(x2, x2, 4); // x2 = 5 + BFIw(x1, x2, 1, 1); // x1 = 432150 //BFXILw(x3, x4, 7, 6); // this would the mask, but let's ignore that for now BFIw(x4, x1, 0, 6); // inject back the flags } diff --git a/src/dynarec/arm64/dynarec_arm64_avx_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_0f38.c index da34d5f7..bd0a1baf 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_0f38.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_0f38.c @@ -39,6 +39,7 @@ uintptr_t dynarec64_AVX_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, i int q0, q1, q2; int d0, d1, d2; int s0; + int need_tst; uint64_t tmp64u; int64_t j64; int64_t fixedaddress; @@ -68,21 +69,32 @@ uintptr_t dynarec64_AVX_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, i GETGD; GETED(0); GETVD; - IFX(X_ZF) + need_tst = 0; + IFX(X_ZF) need_tst = 1; + IFXNATIVE(X_SF, NF_SF) need_tst = 1; + IFXNATIVE(X_OF, NF_VF) need_tst = 1; + IFXNATIVE(X_CF, NF_CF) need_tst = 1; + if(need_tst) BICSxw(gd, ed, vd); else BICxw(gd, ed, vd); IFX(X_ZF) { - CSETw(x1, cEQ); - BFIw(xFlags, x1, F_ZF, 1); + IFNATIVE(NF_EQ) {} else { + CSETw(x1, cEQ); + BFIw(xFlags, x1, F_ZF, 1); + } + } + IFX(X_OF) { + IFNATIVE(NF_VF) {} else {BFCw(xFlags, F_OF, 1);} + } + IFX(X_CF) { + IFNATIVE(NF_CF) {} else {BFCw(xFlags, F_CF, 1);} } - IFX(X_OF) - BFCw(xFlags, F_OF, 1); - IFX(X_CF) - BFCw(xFlags, F_CF, 1); IFX(X_SF) { - LSRxw_IMM(x1, gd, rex.w?63:31); - BFIw(xFlags, x1, F_SF, 1); + IFNATIVE(NF_SF) {} else { + LSRxw_IMM(x1, gd, rex.w?63:31); + BFIw(xFlags, x1, F_SF, 1); + } } break; case 0xF3: @@ -99,19 +111,27 @@ uintptr_t dynarec64_AVX_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, i BFIw(xFlags, x3, F_CF, 1); } SUBxw_U12(x3, ed, 1); - IFX(X_ZF) + need_tst = 0; + IFX(X_ZF) need_tst = 1; + IFXNATIVE(X_SF, NF_SF) need_tst = 1; + IFXNATIVE(X_OF, NF_VF) need_tst = 1; + if(need_tst) ANDSxw_REG(vd, ed, x3); else ANDxw_REG(vd, ed, x3); IFX(X_ZF) { - CSETMw(x3, cEQ); - BFIw(xFlags, x3, F_ZF, 1); + IFNATIVE(NF_EQ) {} else { + CSETMw(x3, cEQ); + BFIw(xFlags, x3, F_ZF, 1); + } } IFX(X_SF) { - LSRxw(x3, vd, rex.w?63:31); - BFIw(xFlags, x3, F_SF, 1); + IFNATIVE(NF_SF) {} else { + LSRxw(x3, vd, rex.w?63:31); + BFIw(xFlags, x3, F_SF, 1); + } } - IFX(X_OF) BFCw(xFlags, F_OF, 1); + IFX(X_OF) IFNATIVE(NF_VF) {} else {BFCw(xFlags, F_OF, 1);} if (BOX64ENV(dynarec_test)) { IFX(X_AF) BFCw(xFlags, F_AF, 1); IFX(X_PF) BFCw(xFlags, F_PF, 1); @@ -139,19 +159,29 @@ uintptr_t dynarec64_AVX_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, i B_MARK(cPL); LSLxw_REG(x2, x2, x1); MARK; - IFX(X_ZF) { + need_tst = 0; + IFX(X_ZF) need_tst = 1; + IFXNATIVE(X_SF, NF_SF) need_tst = 1; + IFXNATIVE(X_OF, NF_VF) need_tst = 1; + if(need_tst) { BICSxw(gd, ed, x2); - CSETw(x3, cEQ); - BFIw(xFlags, x3, F_ZF, 1); } else BICxw(gd, ed, x2); + IFX(X_ZF) { + IFNATIVE(NF_EQ) {} else { + CSETw(x3, cEQ); + BFIw(xFlags, x3, F_ZF, 1); + } + } IFX(X_SF) { - LSRxw(x3, gd, rex.w?63:31); - BFIw(xFlags, x3, F_SF, 1); + IFNATIVE(NF_SF) {} else { + LSRxw(x3, gd, rex.w?63:31); + BFIw(xFlags, x3, F_SF, 1); + } } IFX(X_AF) BFCw(xFlags, F_AF, 1); IFX(X_PF) BFCw(xFlags, F_PF, 1); - IFX(X_OF) BFCw(xFlags, F_OF, 1); + IFX(X_OF) IFNATIVE(NF_VF) {} else {BFCw(xFlags, F_OF, 1);} break; default: diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c index 81169c55..642cf169 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c @@ -454,7 +454,7 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, FRINTIS(d0, d0); VFCVTZSs(d0, d0); MRS_fpsr(x5); // get back FPSR to check the IOC bit - TSTw_mask(x5, 0, 0); // mask=1 + TSTw_mask(x5, 0, 0); // mask=(1<<IOC) FCSELS(d0, d0, d1, cEQ); VMOVeS(v0, i, d0, 0); } diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c index 35e30357..afc1ed55 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c @@ -258,6 +258,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, VMOVQ(v0, v2); } VMOVeD(v0, 0, q2, 0); + YMM0(gd); break; case 0x5A: INST_NAME("VCVTSD2SS Gx, Vx, Ex"); @@ -461,7 +462,11 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, for(int l=0; l<1+vex.l; ++l) { if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); } if(v0==v1) { - VFMLAQS(v0, v2, q0); + //TODO: find a better way + if(!l) q1 = fpu_get_scratch(dyn, ninst); + VMOVQ(q1, v2); + VFMLAQS(q1, v1, q0); + VMOVQ(v0, q1); } else { if(v0!=v2) VMOVQ(v0, v2); VFMLAQS(v0, v1, q0); |