diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2025-08-24 16:29:59 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-08-24 10:29:59 +0200 |
| commit | 2a525b4eb298a5f410014b7fc4a4f7e82130ce4f (patch) | |
| tree | 4333d02be621909d654eb81a3f9d22ac87c908be /src | |
| parent | cb2c29c5f992cb4e5ec481f8d99b973b90411403 (diff) | |
| download | box64-2a525b4eb298a5f410014b7fc4a4f7e82130ce4f.tar.gz box64-2a525b4eb298a5f410014b7fc4a4f7e82130ce4f.zip | |
[RV64_DYNAREC] Added YMM0 placeholder for later optim (#2968)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_avx_0f.c | 19 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_avx_66_0f.c | 124 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c | 162 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_avx_66_0f3a.c | 24 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_avx_f2_0f.c | 7 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c | 22 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 8 |
7 files changed, 125 insertions, 241 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_avx_0f.c b/src/dynarec/rv64/dynarec_rv64_avx_0f.c index 6b4b812d..1bc87fa2 100644 --- a/src/dynarec/rv64/dynarec_rv64_avx_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_avx_0f.c @@ -62,10 +62,8 @@ uintptr_t dynarec64_AVX_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, in SD(x3, gback, gyoffset); LD(x3, wback, fixedaddress + 8); SD(x3, gback, gyoffset + 8); - } else { - SD(xZR, gback, gyoffset); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x29: INST_NAME("VMOVAPS Ex, Gx"); @@ -83,11 +81,8 @@ uintptr_t dynarec64_AVX_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, in SD(x3, wback, fixedaddress); LD(x3, gback, gyoffset + 8); SD(x3, wback, fixedaddress + 8); - } else if (MODREG) { - GETEY(); - SD(xZR, wback, fixedaddress); - SD(xZR, wback, fixedaddress + 8); - } + } else if (MODREG) + YMM0(ed); break; case 0x2E: // no special check... @@ -149,10 +144,8 @@ uintptr_t dynarec64_AVX_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, in FCVTDS(s1, s1); FSD(s0, gback, gyoffset + 0); FSD(s1, gback, gyoffset + 8); - } else { - FSD(xZR, gback, gyoffset + 0); - FSD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; default: DEFAULT; diff --git a/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c b/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c index 88e3d85c..af0abf60 100644 --- a/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_avx_66_0f.c @@ -62,10 +62,8 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, SD(x3, gback, gyoffset); LD(x3, wback, fixedaddress + 8); SD(x3, gback, gyoffset + 8); - } else { - SD(xZR, gback, gyoffset); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x29: INST_NAME("VMOVAPD Ex, Gx"); @@ -83,11 +81,8 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, LD(x4, gback, gyoffset + 8); SD(x3, wback, fixedaddress + 0); SD(x4, wback, fixedaddress + 8); - } else if (MODREG) { - GETEY(); - SD(xZR, wback, fixedaddress + 0); - SD(xZR, wback, fixedaddress + 8); - } + } else if (MODREG) + YMM0(ed); if (!MODREG) SMWRITE2(); break; case 0x2E: @@ -188,10 +183,8 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, } FSD(d0, gback, gyoffset + i * 8); } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x54: INST_NAME("VANDPD Gx, Vx, Ex"); @@ -219,10 +212,8 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, AND(x6, x6, x4); SD(x5, gback, gyoffset + 0); SD(x6, gback, gyoffset + 8); - } else { - SD(xZR, gback, gyoffset); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x55: INST_NAME("VANDNPD Gx, Vx, Ex"); @@ -264,10 +255,8 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, } SD(x5, gback, gyoffset + 0); SD(x6, gback, gyoffset + 8); - } else { - SD(xZR, gback, gyoffset); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x56: INST_NAME("VORPD Gx, Vx, Ex"); @@ -295,10 +284,8 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, OR(x6, x6, x4); SD(x5, gback, gyoffset + 0); SD(x6, gback, gyoffset + 8); - } else { - SD(xZR, gback, gyoffset); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x57: INST_NAME("VXORPD Gx, Vx, Ex"); @@ -326,10 +313,8 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, XOR(x6, x6, x4); SD(x5, gback, gyoffset + 0); SD(x6, gback, gyoffset + 8); - } else { - SD(xZR, gback, gyoffset); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x58: INST_NAME("VADDPD Gx, Vx, Ex"); @@ -377,10 +362,8 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, } FSD(v0, gback, gyoffset + 8 * i); } - } else { - SD(xZR, gback, gyoffset); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x59: INST_NAME("VMULPD Gx, Vx, Ex"); @@ -428,10 +411,8 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, } FSD(v0, gback, gyoffset + 8 * i); } - } else { - SD(xZR, gback, gyoffset); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x5C: INST_NAME("VSUBPD Gx, Vx, Ex"); @@ -479,10 +460,8 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, } FSD(v0, gback, gyoffset + 8 * i); } - } else { - SD(xZR, gback, gyoffset); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x5D: INST_NAME("VMINPD Gx, Vx, Ex"); @@ -520,10 +499,8 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, FMVD(v1, v0); FSD(v1, gback, gyoffset + 8 * i); } - } else { - SD(xZR, gback, gyoffset); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x5E: INST_NAME("VDIVPD Gx, Vx, Ex"); @@ -571,10 +548,8 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, } FSD(v0, gback, gyoffset + 8 * i); } - } else { - SD(xZR, gback, gyoffset); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x5F: INST_NAME("VMAXPD Gx, Vx, Ex"); @@ -612,10 +587,8 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, FMVD(v1, v0); FSD(v1, gback, gyoffset + 8 * i); } - } else { - SD(xZR, gback, gyoffset); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x66: INST_NAME("VPCMPGTD Gx, Vx, Ex"); @@ -641,10 +614,8 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, NEG(x3, x4); SW(x3, gback, gyoffset + i * 4); } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x6E: INST_NAME("VMOVD Gx, Ed"); @@ -658,8 +629,7 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, } SD(ed, gback, gdoffset); SD(xZR, gback, gdoffset + 8); - SD(xZR, gback, gyoffset); - SD(xZR, gback, gyoffset + 8); + YMM0(gd); break; case 0x6F: INST_NAME("VMOVDQA Gx, Ex"); @@ -677,10 +647,8 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, SD(x3, gback, gyoffset); LD(x3, wback, fixedaddress + 8); SD(x3, gback, gyoffset + 8); - } else { - SD(xZR, gback, gyoffset); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x7E: INST_NAME("VMOVD Ed, Gx"); @@ -723,11 +691,8 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, SD(x3, wback, fixedaddress); LD(x3, gback, gyoffset + 8); SD(x3, wback, fixedaddress + 8); - } else if (MODREG) { - GETEY(); - SD(xZR, wback, fixedaddress); - SD(xZR, wback, fixedaddress + 8); - } + } else if (MODREG) + YMM0(ed); break; case 0xC2: INST_NAME("VCMPPD Gx, Vx, Ex, Ib"); @@ -812,10 +777,7 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, SD(x3, gback, gyoffset + 8 * (i - 2)); } } - if (!vex.l) { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + if (!vex.l) YMM0(gd); break; case 0xC6: INST_NAME("VSHUFPD Gx, Vx, Ex, Ib"); @@ -836,10 +798,8 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, LD(x4, wback, fixedaddress + 8 * ((u8 >> 3) & 1)); SD(x3, gback, gyoffset + 0); SD(x4, gback, gyoffset + 8); - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0xD0: INST_NAME("VADDSUBPD Gx, Vx, Ex"); @@ -893,10 +853,8 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, } FSD(v0, gback, gyoffset + 8 * i); } - } else { - SD(xZR, gback, gyoffset); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0xEF: INST_NAME("VPXOR Gx, Vx, Ex"); @@ -924,10 +882,8 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, LD(x4, wback, fixedaddress + 8); XOR(x3, x3, x4); SD(x3, gback, gyoffset + 8); - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; default: DEFAULT; diff --git a/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c b/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c index 0a3ef423..3c81181a 100644 --- a/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c +++ b/src/dynarec/rv64/dynarec_rv64_avx_66_0f38.c @@ -95,10 +95,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i LBU(x4, x4, vyoffset); SB(x4, gback, gyoffset + i); } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x01: case 0x03: @@ -176,10 +174,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SH(x3, gback, gyoffset + 2 * (4 + i)); } } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x02: INST_NAME("VPHADDD Gx, Vx, Ex"); @@ -249,10 +245,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SW(x3, gback, gyoffset + 4 * (2 + i)); } } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x04: INST_NAME("VPMADDUBSW Gx, Vx, Ex"); @@ -286,10 +280,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SAT16(x3, x6); SH(x3, gback, gyoffset + i * 2); } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x05: case 0x07: @@ -367,10 +359,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SH(x3, gback, gyoffset + 2 * (4 + i)); } } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x06: INST_NAME("VPHSUBD Gx, Vx, Ex"); @@ -440,10 +430,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SW(x3, gback, gyoffset + 4 * (2 + i)); } } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x08: INST_NAME("VPSIGNB Gx, Vx, Ex"); @@ -473,10 +461,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i MUL(x3, x1, x3); SB(x3, gback, gyoffset + i); } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x09: INST_NAME("VPSIGNW Gx, Vx, Ex"); @@ -506,10 +492,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i MUL(x3, x1, x3); SH(x3, gback, gyoffset + i * 2); } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x0A: INST_NAME("VPSIGND Gx, Vx, Ex"); @@ -539,10 +523,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i MUL(x3, x1, x3); SW(x3, gback, gyoffset + i * 4); } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x0B: INST_NAME("VPMULHRSW Gx, Vx, Ex"); @@ -572,10 +554,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SRAI(x3, x3, 1); SH(x3, gback, gyoffset + i * 2); } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x17: INST_NAME("VPTEST Gx, Ex"); @@ -655,10 +635,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i NEG(x4, x4); SB(x4, gback, gyoffset + i); } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x1D: INST_NAME("VPABSW Gx, Ex"); @@ -680,10 +658,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i NEG(x4, x4); SH(x4, gback, gyoffset + i * 2); } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x1E: INST_NAME("VPABSD Gx, Ex"); @@ -705,10 +681,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i NEG(x4, x4); SW(x4, gback, gyoffset + i * 4); } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x20: INST_NAME("VPMOVSXBW Gx, Ex"); @@ -721,10 +695,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i LB(x3, wback, fixedaddress + 8 + i); SH(x3, gback, gyoffset + i * 2); } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); for (int i = 7; i >= 0; --i) { LB(x3, wback, fixedaddress + i); SH(x3, gback, gdoffset + i * 2); @@ -741,10 +713,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i LB(x3, wback, fixedaddress + 4 + i); SW(x3, gback, gyoffset + i * 4); } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); for (int i = 3; i >= 0; --i) { LB(x3, wback, fixedaddress + i); SW(x3, gback, gdoffset + i * 4); @@ -761,10 +731,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i LB(x3, wback, fixedaddress + 2 + i); SD(x3, gback, gyoffset + i * 8); } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); for (int i = 1; i >= 0; --i) { LB(x3, wback, fixedaddress + i); SD(x3, gback, gdoffset + i * 8); @@ -781,10 +749,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i LH(x3, wback, fixedaddress + 8 + i * 2); SW(x3, gback, gyoffset + i * 4); } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); for (int i = 3; i >= 0; --i) { LH(x3, wback, fixedaddress + i * 2); SW(x3, gback, gdoffset + i * 4); @@ -801,10 +767,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i LH(x3, wback, fixedaddress + 4 + i * 2); SD(x3, gback, gyoffset + i * 8); } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); for (int i = 1; i >= 0; --i) { LH(x3, wback, fixedaddress + i * 2); SD(x3, gback, gdoffset + i * 8); @@ -821,10 +785,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i LW(x4, wback, fixedaddress + 8 + i * 4); SD(x4, gback, gyoffset + i * 8); } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); for (int i = 1; i >= 0; --i) { LW(x4, wback, fixedaddress + i * 4); SD(x4, gback, gdoffset + i * 8); @@ -841,10 +803,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i LBU(x3, wback, fixedaddress + 8 + i); SH(x3, gback, gyoffset + i * 2); } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); for (int i = 7; i >= 0; --i) { LBU(x3, wback, fixedaddress + i); SH(x3, gback, gdoffset + i * 2); @@ -861,10 +821,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i LBU(x3, wback, fixedaddress + 4 + i); SW(x3, gback, gyoffset + i * 4); } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); for (int i = 3; i >= 0; --i) { LBU(x3, wback, fixedaddress + i); SW(x3, gback, gdoffset + i * 4); @@ -881,10 +839,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i LBU(x3, wback, fixedaddress + 2 + i); SD(x3, gback, gyoffset + i * 8); } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); for (int i = 1; i >= 0; --i) { LBU(x3, wback, fixedaddress + i); SD(x3, gback, gdoffset + i * 8); @@ -901,10 +857,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i LHU(x3, wback, fixedaddress + 8 + i * 2); SW(x3, gback, gyoffset + i * 4); } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); for (int i = 3; i >= 0; --i) { LHU(x3, wback, fixedaddress + i * 2); SW(x3, gback, gdoffset + i * 4); @@ -921,10 +875,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i LHU(x3, wback, fixedaddress + 4 + i * 2); SD(x3, gback, gyoffset + i * 8); } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); for (int i = 1; i >= 0; --i) { LHU(x3, wback, fixedaddress + i * 2); SD(x3, gback, gdoffset + i * 8); @@ -941,10 +893,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i LWU(x4, wback, fixedaddress + 8 + i * 4); SD(x4, gback, gyoffset + i * 8); } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); for (int i = 1; i >= 0; --i) { LWU(x4, wback, fixedaddress + i * 4); SD(x4, gback, gdoffset + i * 8); @@ -982,10 +932,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } SW(x4, gback, gyoffset + i * 4); } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x3D: INST_NAME("VPMAXSD Gx, Vx, Ex"); @@ -1019,10 +967,8 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } SW(x4, gback, gyoffset + i * 4); } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; default: DEFAULT; diff --git a/src/dynarec/rv64/dynarec_rv64_avx_66_0f3a.c b/src/dynarec/rv64/dynarec_rv64_avx_66_0f3a.c index b1c19693..207a9723 100644 --- a/src/dynarec/rv64/dynarec_rv64_avx_66_0f3a.c +++ b/src/dynarec/rv64/dynarec_rv64_avx_66_0f3a.c @@ -79,10 +79,8 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SH(x3, gback, gyoffset + 2 * i); } } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x0F: INST_NAME("VPALIGNR Gx, Vx, Ex, Ib"); @@ -161,8 +159,7 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i if (vex.l) { GETEY(); if (u8 > 31) { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); + YMM0(gd); } else if (u8 > 23) { LD(x5, vback, vyoffset + 8); if (u8 > 24) { @@ -225,10 +222,8 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SD(x4, gback, gyoffset + 0); } } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; case 0x22: if (rex.w) { @@ -253,8 +248,7 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } else { SW(ed, gback, gdoffset + 4 * (u8 & 0x3)); } - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); + YMM0(gd); break; case 0x4A: INST_NAME("VBLENDVPS Gx, Vx, Ex, XMMImm8"); @@ -286,10 +280,8 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i MV(x3, x5); SW(x3, gback, gyoffset + i * 4); } - } else { - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); - } + } else + YMM0(gd); break; default: DEFAULT; diff --git a/src/dynarec/rv64/dynarec_rv64_avx_f2_0f.c b/src/dynarec/rv64/dynarec_rv64_avx_f2_0f.c index 6b9f916b..86a60503 100644 --- a/src/dynarec/rv64/dynarec_rv64_avx_f2_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_avx_f2_0f.c @@ -61,8 +61,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, } else { SD(xZR, gback, gdoffset + 8); } - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); + YMM0(gd); break; case 0x11: INST_NAME("VMOVSD Ex, [Vx,] Gx"); @@ -76,9 +75,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, GETVX(); LD(x3, vback, vxoffset + 8); SD(x3, wback, fixedaddress + 8); - GETEY(); - SD(xZR, wback, fixedaddress + 0); - SD(xZR, wback, fixedaddress + 8); + YMM0(ed); } break; default: diff --git a/src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c b/src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c index 7761c55f..83e4748a 100644 --- a/src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c @@ -66,8 +66,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, SD(xZR, gback, gdoffset + 8); } GETGY(); - SD(xZR, gback, gyoffset); - SD(xZR, gback, gyoffset + 8); + YMM0(gd); break; case 0x11: INST_NAME("VMOVSS Ex, [Vx,] Gx"); @@ -82,9 +81,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, SW(x3, wback, fixedaddress + 4); LD(x3, vback, vxoffset + 8); SD(x3, wback, fixedaddress + 8); - GETEY(); - SD(xZR, wback, fixedaddress); - SD(xZR, wback, fixedaddress + 8); + YMM0(ed); } break; case 0x5A: @@ -102,8 +99,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, LD(x2, vback, vxoffset + 8); SD(x2, gback, gdoffset + 8); } - SD(xZR, gback, gyoffset); - SD(xZR, gback, gyoffset + 8); + YMM0(gd); break; case 0x5D: INST_NAME("VMINSS Gx, Vx, Ex"); @@ -132,8 +128,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, LD(x2, vback, vxoffset + 8); SD(x2, gback, gdoffset + 8); } - SD(xZR, gback, gyoffset); - SD(xZR, gback, gyoffset + 8); + YMM0(gd); break; case 0x5F: INST_NAME("VMAXSS Gx, Vx, Ex"); @@ -162,8 +157,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, LD(x2, vback, vxoffset + 8); SD(x2, gback, gdoffset + 8); } - SD(xZR, gback, gyoffset); - SD(xZR, gback, gyoffset + 8); + YMM0(gd); break; case 0x7E: INST_NAME("VMOVQ Gx, Ex"); @@ -174,8 +168,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, LD(x3, wback, fixedaddress); SD(x3, gback, gdoffset + 0); SD(xZR, gback, gdoffset + 8); - SD(xZR, gback, gyoffset + 0); - SD(xZR, gback, gyoffset + 8); + YMM0(gd); break; case 0xC2: INST_NAME("VCMPSS Gx, Vx, Ex, Ib"); @@ -239,8 +232,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, LD(x2, vback, vxoffset + 8); SD(x2, gback, gdoffset + 8); } - SD(xZR, gback, gyoffset); - SD(xZR, gback, gyoffset + 8); + YMM0(gd); break; default: DEFAULT; diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index ed7c594e..9b7cec7a 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -2051,4 +2051,12 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, #define SCRATCH_USAGE(usage) #endif +// TODO: can be lazy +#define YMM0(a) \ + do { \ + SD(xZR, xEmu, offsetof(x64emu_t, ymm[a]) + 0); \ + SD(xZR, xEmu, offsetof(x64emu_t, ymm[a]) + 8); \ + } while (0) + + #endif //__DYNAREC_RV64_HELPER_H__ |