diff options
| author | phorcys <phorcys@126.com> | 2025-06-25 00:38:12 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-06-24 18:38:12 +0200 |
| commit | 7962aacc7fbe1746da2bd1ed14d84856d5c5dd6f (patch) | |
| tree | 98cfff449112f4e3be94e0bb303e184392f74f96 /src | |
| parent | 9195bc2d4a02770fe13d421c4c9c4eb438dec6e3 (diff) | |
| download | box64-7962aacc7fbe1746da2bd1ed14d84856d5c5dd6f.tar.gz box64-7962aacc7fbe1746da2bd1ed14d84856d5c5dd6f.zip | |
[LA64_DYNAREC] Add la64 avx load/store ops part 1. (#2766)
* VEX.0f VMOVNTPS/VLDMXCSR/VSTMXCSR * VEX.66.0f VMOVNTPD/VMOVD/VMOVNTDQ * VEX.66.0f.3a VMOVNTDQA * VEX.f2.0f VMOVDDUP/VLDDQU * VEX.f3.0f VMOVSLDUP/VMOVSHDUP/VMOVD
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_0f.c | 52 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f.c | 48 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f3a.c | 22 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_f2_0f.c | 46 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_f3_0f.c | 38 |
5 files changed, 194 insertions, 12 deletions
diff --git a/src/dynarec/la64/dynarec_la64_avx_0f.c b/src/dynarec/la64/dynarec_la64_avx_0f.c index ec260232..9c693fc3 100644 --- a/src/dynarec/la64/dynarec_la64_avx_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_0f.c @@ -141,6 +141,22 @@ uintptr_t dynarec64_AVX_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, in SMWRITE2(); } break; + case 0x2B: + INST_NAME("VMOVNTPS Ex, Gx"); + nextop = F8; + GETGYxy(q0, 0); + if (MODREG) { + DEFAULT; + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x4, x5, &fixedaddress, rex, NULL, 1, 0); + if (vex.l) { + XVST(q0, ed, fixedaddress); + } else { + VST(q0, ed, fixedaddress); + } + SMWRITE2(); + } + break; case 0x77: if (!vex.l) { INST_NAME("VZEROUPPER"); @@ -150,9 +166,9 @@ uintptr_t dynarec64_AVX_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, in q2 = fpu_get_scratch(dyn); XVXOR_V(q2, q2, q2); for (int i = 0; i < (rex.is32bits ? 8 : 16); ++i) { - if (dyn->lsx.avxcache[i].v != -1 ){ + if (dyn->lsx.avxcache[i].v != -1) { // avx used register - if(dyn->lsx.avxcache[i].width == LSX_AVX_WIDTH_256) { + if (dyn->lsx.avxcache[i].width == LSX_AVX_WIDTH_256) { // 256 width, fill upper 128bits with zero. q1 = avx_get_reg(dyn, ninst, x1, i, 1, LSX_AVX_WIDTH_256); // mark reg write (dirty) XVPERMI_Q(q1, q2, XVPERMI_IMM_4_0(0, 2)); @@ -161,7 +177,7 @@ uintptr_t dynarec64_AVX_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, in q1 = avx_get_reg(dyn, ninst, x1, i, 1, LSX_AVX_WIDTH_128); // mark reg write (dirty) dyn->lsx.avxcache[i].zero_upper = 1; } - }else { + } else { // SSE register or unused register, store 128bit zero to x64emu_t.ymm[] VST(q2, xEmu, offsetof(x64emu_t, ymm[i])); } @@ -180,7 +196,35 @@ uintptr_t dynarec64_AVX_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, in } } break; - + case 0xAE: + nextop = F8; + if (MODREG) { + DEFAULT; + } else + switch ((nextop >> 3) & 7) { + case 2: + INST_NAME("VLDMXCSR Md"); + GETED(0); + ST_W(ed, xEmu, offsetof(x64emu_t, mxcsr)); + if (BOX64ENV(sse_flushto0)) { + // sync with fpsr, with mask from mxcsr + // TODO + } + break; + case 3: + INST_NAME("VSTMXCSR Md"); + addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, NULL, 0, 0); + LD_WU(x4, xEmu, offsetof(x64emu_t, mxcsr)); + ST_W(x4, wback, fixedaddress); + if (BOX64ENV(sse_flushto0)) { + // sync with fpsr, with mask from mxcsr + // TODO + } + break; + default: + DEFAULT; + } + break; default: DEFAULT; } diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f.c b/src/dynarec/la64/dynarec_la64_avx_66_0f.c index f8c4963c..95004d73 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f.c @@ -141,6 +141,22 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, SMWRITE2(); } break; + case 0x2B: + INST_NAME("VMOVNTPD Ex, Gx"); + nextop = F8; + GETGYxy(q0, 0); + if (MODREG) { + DEFAULT; + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x4, x5, &fixedaddress, rex, NULL, 1, 0); + if (vex.l) { + XVST(q0, ed, fixedaddress); + } else { + VST(q0, ed, fixedaddress); + } + SMWRITE2(); + } + break; case 0x6E: INST_NAME("VMOVD Gx, Ed"); nextop = F8; @@ -217,7 +233,37 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, SMWRITE2(); } break; - + case 0xD6: + INST_NAME("VMOVD Ex, Gx"); + nextop = F8; + GETGYx(q0, 0); + if (MODREG) { + GETEYx_empty(q1, 0); + XVXOR_V(q1, q1, q1); + XVINSVE0_D(q1, q0, 0); + YMM_UNMARK_UPPER_ZERO(q1); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x4, x5, &fixedaddress, rex, NULL, 1, 0); + FST_D(q0, ed, fixedaddress); + SMWRITE2(); + } + break; + case 0xE7: + INST_NAME("VMOVNTDQ Ex, Gx"); + nextop = F8; + GETGYxy(q0, 0); + if (MODREG) { + DEFAULT; + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x4, x5, &fixedaddress, rex, NULL, 1, 0); + if (vex.l) { + XVST(q0, ed, fixedaddress); + } else { + VST(q0, ed, fixedaddress); + } + SMWRITE2(); + } + break; default: DEFAULT; } diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c b/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c index 1310cac2..74cf1759 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c @@ -68,7 +68,7 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i GETGY_empty_VYEY_xy(q0, q1, q2, 1); u8 = F8; XVOR_V(q0, q1, q1); - XVPERMI_Q(q0, q2, (u8&1) == 0 ? 0b00110000 : 0b00000010); + XVPERMI_Q(q0, q2, (u8 & 1) == 0 ? 0b00110000 : 0b00000010); break; case 0x19: case 0x39: @@ -81,9 +81,9 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i GETEY_GY_xy(q1, q0, 1); u8 = F8; if (MODREG) { - XVPERMI_Q(q1, q0, (u8&1) == 0 ? XVPERMI_IMM_4_0(3, 0) : XVPERMI_IMM_4_0(3, 1)); + XVPERMI_Q(q1, q0, (u8 & 1) == 0 ? XVPERMI_IMM_4_0(3, 0) : XVPERMI_IMM_4_0(3, 1)); } else { - if ((u8&1) == 1) { + if ((u8 & 1) == 1) { XVPERMI_Q(q1, q0, XVPERMI_IMM_4_0(3, 1)); VST(q1, ed, fixedaddress); } else { @@ -91,6 +91,22 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i } } break; + case 0x2A: + INST_NAME("VMOVNTDQA Gx, Ex"); + nextop = F8; + if (MODREG) { + DEFAULT; + } else { + GETGYxy_empty(q0); + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 0, 0); + if (vex.l) { + XVLD(q0, ed, fixedaddress); + } else { + VLD(q0, ed, fixedaddress); + } + } + break; default: DEFAULT; } diff --git a/src/dynarec/la64/dynarec_la64_avx_f2_0f.c b/src/dynarec/la64/dynarec_la64_avx_f2_0f.c index 6d6f8209..f97e9890 100644 --- a/src/dynarec/la64/dynarec_la64_avx_f2_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_f2_0f.c @@ -69,7 +69,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, VEXTRINS_D(q0, q2, 0); } else { GETEYSD(q2, 0, 0); - GETGYx_empty(q0); + GETGYx_empty(q0); XVXOR_V(q0, q0, q0); XVINSVE0_D(q0, q2, 0); YMM_UNMARK_UPPER_ZERO(q0); @@ -95,7 +95,49 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, SMWRITE2(); } break; - + case 0x12: + INST_NAME("VMOVDDUP Gx, Ex"); + nextop = F8; + if (MODREG) { + GETGY_empty_EY_xy(q0, q1, 0); + } else { + GETGYxy_empty(q0); + q1 = fpu_get_scratch(dyn); + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x4, x5, &fixedaddress, rex, NULL, 0, 0); + if (vex.l) { + XVLD(q1, ed, 0); + } else { + VLDREPL_D(q0, ed, 0); + } + } + if (vex.l) { + XVSHUF4I_D(q0, q1, 0b1010); + } else if (MODREG) { + VREPLVE_D(q0, q1, 0); + } + break; + case 0xF0: + INST_NAME("VLDDQU Gx, Ex"); + nextop = F8; + if (MODREG) { + GETGY_empty_EY_xy(q0, q1, 0); + if (vex.l) { + XVOR_V(q0, q1, q1); + } else { + VOR_V(q0, q1, q1); + } + } else { + GETGYxy_empty(q0); + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + if (vex.l) { + XVLD(q0, ed, fixedaddress); + } else { + VLD(q0, ed, fixedaddress); + } + } + break; default: DEFAULT; } diff --git a/src/dynarec/la64/dynarec_la64_avx_f3_0f.c b/src/dynarec/la64/dynarec_la64_avx_f3_0f.c index d9ad1413..f6f4535d 100644 --- a/src/dynarec/la64/dynarec_la64_avx_f3_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_f3_0f.c @@ -50,8 +50,8 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, if (MODREG) { GETVYx(q1, 0); GETEYSS(q2, 0, 0); - GETGYx_empty(q0); - if ( !gd == vex.v) VOR_V(q0, q1, q1); + GETGYx_empty(q0); + if (!gd == vex.v) VOR_V(q0, q1, q1); VEXTRINS_W(q0, q2, 0); } else { GETEYSS(q2, 0, 0); @@ -81,6 +81,26 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, SMWRITE2(); } break; + case 0x12: + INST_NAME("VMOVSLDUP Gx, Ex"); + nextop = F8; + GETGY_empty_EY_xy(q0, q1, 0); + if (vex.l) { + XVPACKEV_W(q0, q1, q1); + } else { + VPACKEV_W(q0, q1, q1); + } + break; + case 0x16: + INST_NAME("VMOVSHDUP Gx, Ex"); + nextop = F8; + GETGY_empty_EY_xy(q0, q1, 0); + if (vex.l) { + XVPACKOD_W(q0, q1, q1); + } else { + VPACKOD_W(q0, q1, q1); + } + break; case 0x6F: INST_NAME("VMOVDQU Gx, Ex"); nextop = F8; @@ -102,6 +122,20 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, } } break; + case 0x7E: + INST_NAME("VMOVD Gx, Ex"); + nextop = F8; + if (MODREG) { + GETEYx(q1, 0, 0); + GETGYx_empty(q0); + } else { + GETEYSD(q1, 0, 0); + GETGYx_empty(q0); + } + XVXOR_V(q0, q0, q0); + XVINSVE0_D(q0, q1, 0); + YMM_UNMARK_UPPER_ZERO(q0); + break; case 0x7F: INST_NAME("VMOVDQU Ex, Gx"); nextop = F8; |