diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2025-04-23 10:57:07 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2025-04-23 10:57:07 +0200 |
| commit | 3afe87bccedbe06bdf13633fe81b2cdbb52c28ab (patch) | |
| tree | 452797d489c9934d6f64c38990567855347926e5 /src | |
| parent | d79d6bd6c2a84ede9a5a07b80549f05451615021 (diff) | |
| download | box64-3afe87bccedbe06bdf13633fe81b2cdbb52c28ab.tar.gz box64-3afe87bccedbe06bdf13633fe81b2cdbb52c28ab.zip | |
[ARM64_DYNAREC] Various improvment to various SSE/AVX 128bits/256bits mov opcodes
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/arm64_emitter.h | 11 | ||||
| -rw-r--r-- | src/dynarec/arm64/arm64_printer.c | 11 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_64.c | 2 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_0f.c | 56 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f.c | 105 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c | 5 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c | 7 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c | 36 |
8 files changed, 144 insertions, 89 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index 1c8f0296..749e7e03 100644 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -909,7 +909,7 @@ int convert_bitmask(uint64_t bitmask); #define FCSELS(Sd, Sn, Sm, cond) EMIT(FCSEL_scalar(0b00, Sm, cond, Sn, Sd)) #define FCSELD(Dd, Dn, Dm, cond) EMIT(FCSEL_scalar(0b01, Dm, cond, Dn, Dd)) -// VLDR +// VLDR/VSTR #define VMEM_gen(size, opc, imm12, Rn, Rt) ((size)<<30 | 0b111<<27 | 1<<26 | 0b01<<24 | (opc)<<22 | (imm12)<<10 | (Rn)<<5 | (Rt)) // imm13 must be 1-aligned #define VLDR16_U12(Ht, Rn, imm13) EMIT(VMEM_gen(0b01, 0b01, ((uint32_t)((imm13)>>1))&0xfff, Rn, Ht)) @@ -928,6 +928,15 @@ int convert_bitmask(uint64_t bitmask); // (imm13) must be 1-aligned #define VSTR16_U12(Ht, Rn, imm13) EMIT(VMEM_gen(0b01, 0b00, ((uint32_t)((imm13)>>1))&0xfff, Rn, Ht)) +//VLDP/VSTP +#define VMEMP_vector(opc, L, imm7, Rt2, Rn, Rt) ((opc)<<30 | 0b101<<27 | 1<<26 | 0b010<<23 | (L)<<22 | (imm7)<<15 | (Rt2)<<10 | (Rn)<<5 | (Rt)) +#define VLDP32_I7(Rt1, Rt2, Rn, imm9) EMIT(VMEMP_vector(0b00, 1, (((int64_t)(imm9))>>2)&0x7f, Rt2, Rn, Rt1)) +#define VLDP64_I7(Rt1, Rt2, Rn, imm10) EMIT(VMEMP_vector(0b01, 1, (((int64_t)(imm10))>>3)&0x7f, Rt2, Rn, Rt1)) +#define VLDP128_I7(Rt1, Rt2, Rn, imm11) EMIT(VMEMP_vector(0b10, 1, (((int64_t)(imm11))>>4)&0x7f, Rt2, Rn, Rt1)) +#define VSTP32_I7(Rt1, Rt2, Rn, imm9) EMIT(VMEMP_vector(0b00, 0, (((int64_t)(imm9))>>2)&0x7f, Rt2, Rn, Rt1)) +#define VSTP64_I7(Rt1, Rt2, Rn, imm10) EMIT(VMEMP_vector(0b01, 0, (((int64_t)(imm10))>>3)&0x7f, Rt2, Rn, Rt1)) +#define VSTP128_I7(Rt1, Rt2, Rn, imm11) EMIT(VMEMP_vector(0b10, 0, (((int64_t)(imm11))>>4)&0x7f, Rt2, Rn, Rt1)) + #define VMEMUR_vector(size, opc, imm9, Rn, Rt) ((size)<<30 | 0b111<<27 | 1<<26 | (opc)<<22 | (imm9)<<12 | (Rn)<<5 | (Rt)) // signed offset, no alignement! #define VLDR8_I9(Vt, Rn, imm9) EMIT(VMEMUR_vector(0b00, 0b01, (imm9)&0b111111111, Rn, Vt)) diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c index a0818a78..7d730bc9 100644 --- a/src/dynarec/arm64/arm64_printer.c +++ b/src/dynarec/arm64/arm64_printer.c @@ -1644,6 +1644,17 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "%sR %s%d, [%s, %+d]", a.L?"LD":"ST", Y[sz], Rt, XtSp[Rn], imm); return buff; } + // LDP/STP vector + if(isMask(opcode, "ff1011010Liiiiiii22222nnnnnttttt", &a)) { + const char* Y[] = {"S", "D", "Q", "?"}; + int sz = sf; + int offset = signExtend(imm, 7)<<(2+sz); + if(!offset) + snprintf(buff, sizeof(buff), "%sP %s%d, %s%d, [%s]", a.L?"LD":"ST", Y[sz], Rt, Y[sz], Rt2, XtSp[Rn]); + else + snprintf(buff, sizeof(buff), "%sP %s%d, %s%d, [%s, %s0x%x]", a.L?"LD":"ST", Y[sz], Rt, Y[sz], Rt2, XtSp[Rn], (offset<0)?"-":"", abs(offset)); + return buff; + } // (S/U)QXT(U)N if(isMask(opcode, "0Q101110ff100001001010nnnnnddddd", &a)) { diff --git a/src/dynarec/arm64/dynarec_arm64_64.c b/src/dynarec/arm64/dynarec_arm64_64.c index 1f9edc20..616d9d00 100644 --- a/src/dynarec/arm64/dynarec_arm64_64.c +++ b/src/dynarec/arm64/dynarec_arm64_64.c @@ -217,7 +217,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, 0); ADDz_REG(x4, x4, ed); - VLD128(v0, ed, fixedaddress); + VLD128(v0, x4, fixedaddress); } break; default: diff --git a/src/dynarec/arm64/dynarec_arm64_avx_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_0f.c index 50231d0e..4deb01a9 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_0f.c @@ -79,11 +79,13 @@ uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int } else { v0 = sse_get_reg_empty(dyn, ninst, x1, gd); SMREAD(); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0); - VLDR128_U12(v0, ed, fixedaddress); // no alignment issue with ARMv8 NEON :) if(vex.l) { - v0 = ymm_get_reg_empty(dyn, ninst, x1, gd, -1, -1, -1); - VLDR128_U12(v0, ed, fixedaddress+16); + v1 = ymm_get_reg_empty(dyn, ninst, x1, gd, -1, -1, -1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0x3f<<4, 15, rex, NULL, 1, 0); + VLDP128_I7(v0, v1, ed, fixedaddress); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, 0); + VLD128(v0, ed, fixedaddress); // no alignment issue with ARMv8 NEON :) } } if(!vex.l) YMM0(gd); @@ -101,13 +103,15 @@ uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int v0 = ymm_get_reg(dyn, ninst, x1, gd, 0, ed, -1, -1); v1 = ymm_get_reg_empty(dyn, ninst, x1, ed, gd, -1, -1); VMOVQ(v1, v0); - } + } else YMM0(ed); } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0); - VSTR128_U12(v0, ed, fixedaddress); if(vex.l) { - v0 = ymm_get_reg(dyn, ninst, x1, gd, 0, ed, -1, -1); - VSTR128_U12(v0, ed, fixedaddress+16); + v1 = ymm_get_reg(dyn, ninst, x1, gd, 0, ed, -1, -1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0x3f<<4, 15, rex, NULL, 1, 0); + VSTP128_I7(v0, v1, ed, fixedaddress); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, 0); + VST128(v0, ed, fixedaddress); } SMWRITE2(); } @@ -224,11 +228,13 @@ uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int } else { v0 = sse_get_reg_empty(dyn, ninst, x1, gd); SMREAD(); - addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0); - VLDR128_U12(v0, ed, fixedaddress); if(vex.l) { - GETGY_empty(v0, -1, -1, -1); - VLDR128_U12(v0, ed, fixedaddress+16); + GETGY_empty(v1, -1, -1, -1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0x3f<<4, 15, rex, NULL, 1, 0); + VLDP128_I7(v0, v1, ed, fixedaddress); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, 0); + VLD128(v0, ed, fixedaddress); } } if(!vex.l) YMM0(gd); @@ -245,13 +251,15 @@ uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int if(vex.l) { GETGYEY_empty(v0, v1); VMOVQ(v1, v0); - } + } else YMM0(ed); } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0); - VSTR128_U12(v0, ed, fixedaddress); if(vex.l) { - GETGY(v0, 0, -1, -1, -1); - VSTR128_U12(v0, ed, fixedaddress+16); + GETGY(v1, 0, -1, -1, -1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0x3f<<4, 15, rex, NULL, 1, 0); + VSTP128_I7(v0, v1, ed, fixedaddress); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, 0); + VST128(v0, ed, fixedaddress); } SMWRITE2(); } @@ -269,13 +277,15 @@ uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int if(vex.l) { GETGYEY_empty(v0, v1); VMOVQ(v1, v0); - } + } else YMM0(ed); } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0); - VSTR128_U12(v0, ed, fixedaddress); if(vex.l) { - GETGY(v0, 0, -1, -1, -1); - VSTR128_U12(v0, ed, fixedaddress+16); + GETGY(v1, 0, -1, -1, -1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0x3f<<4, 15, rex, NULL, 1, 0); + VSTP128_I7(v0, v1, ed, fixedaddress); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, 0); + VST128(v0, ed, fixedaddress); } } break; diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c index 4dabd1fe..31da91af 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c @@ -76,11 +76,13 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, } else { SMREAD(); v0 = sse_get_reg_empty(dyn, ninst, x1, gd); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0); - VLDR128_U12(v0, ed, fixedaddress); if(vex.l) { - GETGY_empty(v0, -1, -1, -1); - VLDR128_U12(v0, ed, fixedaddress+16); + GETGY_empty(v1, -1, -1, -1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0x3f<<4, 15, rex, NULL, 1, 0); + VLDP128_I7(v0, v1, ed, fixedaddress); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, 0); + VLD128(v0, ed, fixedaddress); } } if(!vex.l) YMM0(gd); @@ -91,18 +93,21 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, GETG; v0 = sse_get_reg(dyn, ninst, x1, gd, 0); if(MODREG) { - v1 = sse_get_reg_empty(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + ed = (nextop&7)+(rex.b<<3); + v1 = sse_get_reg_empty(dyn, ninst, x1, ed); VMOVQ(v1, v0); if(vex.l) { GETGYEY_empty(v0, v1); VMOVQ(v1, v0); - } + } else YMM0(ed); } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0); - VSTR128_U12(v0, ed, fixedaddress); if(vex.l) { - GETGY(v0, 0, -1, -1, -1); - VSTR128_U12(v0, ed, fixedaddress+16); + GETGY(v1, 0, -1, -1, -1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0x3f<<4, 15, rex, NULL, 1, 0); + VSTP128_I7(v0, v1, ed, fixedaddress); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, 0); + VST128(v0, ed, fixedaddress); } SMWRITE2(); } @@ -205,11 +210,13 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, } else { SMREAD(); v0 = sse_get_reg_empty(dyn, ninst, x1, gd); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0); - VLDR128_U12(v0, ed, fixedaddress); if(vex.l) { - GETGY_empty(v0, -1, -1, -1); - VLDR128_U12(v0, ed, fixedaddress+16); + GETGY_empty(v1, -1, -1, -1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0x3f<<4, 15, rex, NULL, 1, 0); + VLDP128_I7(v0, v1, ed, fixedaddress); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, 0); + VLD128(v0, ed, fixedaddress); } } if(!vex.l) YMM0(gd); @@ -220,18 +227,21 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, GETG; v0 = sse_get_reg(dyn, ninst, x1, gd, 0); if(MODREG) { - v1 = sse_get_reg_empty(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + ed = (nextop&7)+(rex.b<<3); + v1 = sse_get_reg_empty(dyn, ninst, x1, ed); VMOVQ(v1, v0); if(vex.l) { GETGYEY_empty(v0, v1); VMOVQ(v1, v0); - } + } else YMM0(ed); } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0); - VSTR128_U12(v0, ed, fixedaddress); if(vex.l) { - GETGY(v0, 0, -1, -1, -1); - VSTR128_U12(v0, ed, fixedaddress+16); + GETGY(v1, 0, -1, -1, -1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0x3f<<4, 15, rex, NULL, 1, 0); + VSTP128_I7(v0, v1, ed, fixedaddress); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, 0); + VST128(v0, ed, fixedaddress); } SMWRITE2(); } @@ -243,18 +253,21 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, GETG; v0 = sse_get_reg(dyn, ninst, x1, gd, 0); if(MODREG) { - v1 = sse_get_reg_empty(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + ed = (nextop&7) + (rex.b<<3); + v1 = sse_get_reg_empty(dyn, ninst, x1, ed); VMOVQ(v1, v0); if(vex.l) { GETGYEY_empty(v0, v1); VMOVQ(v1, v0); - } + } else YMM0(ed); } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0); - VSTR128_U12(v0, ed, fixedaddress); if(vex.l) { - GETGY(v0, 0, -1, -1, -1); - VSTR128_U12(v0, ed, fixedaddress+16); + GETGY(v1, 0, -1, -1, -1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0x3f<<4, 15, rex, NULL, 1, 0); + VSTP128_I7(v0, v1, ed, fixedaddress); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, 0); + VST128(v0, ed, fixedaddress); } } break; @@ -785,11 +798,13 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, } else { GETGX_empty(v0); SMREAD(); - addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0); - VLDR128_U12(v0, ed, fixedaddress); if(vex.l) { - GETGY_empty(v0, -1, -1, -1); - VLDR128_U12(v0, ed, fixedaddress+16); + GETGY_empty(v1, -1, -1, -1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0x3f<<4, 15, rex, NULL, 1, 0); + VLDP128_I7(v0, v1, ed, fixedaddress); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, 0); + VLD128(v0, ed, fixedaddress); } } if(!vex.l) YMM0(gd); @@ -1206,18 +1221,21 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, nextop = F8; GETGX(v0, 0); if(MODREG) { - v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 1); + ed = (nextop&7)+(rex.b<<3); + v1 = sse_get_reg(dyn, ninst, x1, ed, 1); VMOVQ(v1, v0); if(vex.l) { GETGYEY(v0, v1); VMOVQ(v1, v0); - } + } else YMM0(ed); } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xffe<<4, 15, rex, NULL, 0, 0); - VST128(v0, ed, fixedaddress); if(vex.l) { - GETGY(v0, 0, -1, -1, -1); - VST128(v0, ed, fixedaddress+16); + GETGY(v1, 0, -1, -1, -1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0x3f<<4, 15, rex, NULL, 1, 0); + VSTP128_I7(v0, v1, ed, fixedaddress); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, 0); + VST128(v0, ed, fixedaddress); } SMWRITE2(); } @@ -1694,18 +1712,21 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, GETG; v0 = sse_get_reg(dyn, ninst, x1, gd, 0); if(MODREG) { - v1 = sse_get_reg_empty(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + ed = (nextop&7)+(rex.b<<3); + v1 = sse_get_reg_empty(dyn, ninst, x1, ed); VMOVQ(v1, v0); if(vex.l) { GETGYEY_empty(v0, v1); VMOVQ(v1, v0); - } + } else YMM0(ed); } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0); - VSTR128_U12(v0, ed, fixedaddress); if(vex.l) { - GETGY(v0, 0, -1, -1, -1); - VSTR128_U12(v0, ed, fixedaddress+16); + GETGY(v1, 0, -1, -1, -1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0x3f<<4, 15, rex, NULL, 1, 0); + VSTP128_I7(v0, v1, ed, fixedaddress); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, 0); + VST128(v0, ed, fixedaddress); } } break; diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c index f6c036aa..fe254a73 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c @@ -386,9 +386,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip v1 = ymm_get_reg(dyn, ninst, x3, s0, 0, gd, vex.v, -1); VMOVQ(d1, v1); } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0); - VLDR128_U12(d0, ed, fixedaddress); - VLDR128_U12(d1, ed, fixedaddress+16); + addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0x3f<<4, 15, rex, NULL, 1, 0); + VLDP128_I7(d0, d1, ed, fixedaddress); } MOV32w(x3, 0x03020100); VDUPQS(q1, x3); diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c index 5e79f6a1..a07dcee8 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c @@ -599,11 +599,10 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, } } else { v0 = sse_get_reg_empty(dyn, ninst, x1, gd); + v1 = ymm_get_reg_empty(dyn, ninst, x1, gd, -1, -1, -1); SMREAD(); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0); - VLDR128_U12(v0, ed, fixedaddress); - v0 = ymm_get_reg_empty(dyn, ninst, x1, gd, -1, -1, -1); - VLDR128_U12(v0, ed, fixedaddress+16); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0x3f<<4, 15, rex, NULL, 1, 0); + VLDP128_I7(v0, v1, ed, fixedaddress); } if(!vex.l) YMM0(gd); break; diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c index 4cc04f7e..181170c9 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c @@ -438,22 +438,25 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, INST_NAME("VMOVDQU Gx, Ex");// no alignment constraint on NEON here, so same as MOVDQA nextop = F8; if(MODREG) { - v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0); + ed = (nextop&7)+(rex.b<<3); + v1 = sse_get_reg(dyn, ninst, x1, ed, 0); GETGX_empty(v0); VMOVQ(v0, v1); if(vex.l) { - v1 = ymm_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0, gd, -12, -1); - GETGY_empty(v0, (nextop&7)+(rex.b<<3), -1, -1); + v1 = ymm_get_reg(dyn, ninst, x1, ed, 0, gd, -1, -1); + GETGY_empty(v0, ed, -1, -1); VMOVQ(v0, v1); } } else { GETGX_empty(v0); SMREAD(); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0); - VLDR128_U12(v0, ed, fixedaddress); if(vex.l) { - GETGY_empty(v0, -1, -1, -1); - VLDR128_U12(v0, ed, fixedaddress+16); + GETGY_empty(v1, -1, -1, -1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0x3f<<4, 15, rex, NULL, 1, 0); + VLDP128_I7(v0, v1, ed, fixedaddress); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, 0); + VLD128(v0, ed, fixedaddress); } } if(!vex.l) YMM0(gd); @@ -507,13 +510,14 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, nextop = F8; GETGX(v0, 0); if(MODREG) { - v1 = sse_get_reg_empty(dyn, ninst, x1, (nextop&7) + (rex.b<<3)); + ed = (nextop&7) + (rex.b<<3); + v1 = sse_get_reg_empty(dyn, ninst, x1, ed); VMOVQ(v1, v0); if(vex.l) { - GETGY(v0, 0, (nextop&7) + (rex.b<<3), -1, -1); - v1 = ymm_get_reg_empty(dyn, ninst, x1, (nextop&7) + (rex.b<<3), gd, -1, -1); + GETGY(v0, 0, ed, -1, -1); + v1 = ymm_get_reg_empty(dyn, ninst, x1, ed, gd, -1, -1); VMOVQ(v1, v0); - } // no ymm raz here it seems + } else YMM0(ed); } else { IF_UNALIGNED(ip) { MESSAGE(LOG_DEBUG, "\tUnaligned path"); @@ -534,11 +538,13 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, } } } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0); - VSTR128_U12(v0, ed, fixedaddress); if(vex.l) { - GETGY(v0, 0, -1, -1, -1); - VSTR128_U12(v0, ed, fixedaddress+16); + GETGY(v1, 0, -1, -1, -1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0x3f<<4, 15, rex, NULL, 1, 0); + VSTP128_I7(v0, v1, ed, fixedaddress); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, 0); + VST128(v0, ed, fixedaddress); } } SMWRITE2(); |