diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2023-09-25 04:20:21 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-09-24 22:20:21 +0200 |
| commit | c00cd1d6271b313ea459d0b782bc392af74e1598 (patch) | |
| tree | 392f37492bb8aca62ee40e747a59787e0a34a982 /src | |
| parent | ab65641eae5c6b6440ab7defe4b798953249a1d3 (diff) | |
| download | box64-c00cd1d6271b313ea459d0b782bc392af74e1598.tar.gz box64-c00cd1d6271b313ea459d0b782bc392af74e1598.zip | |
[RV64_DYNAREC] Added more support for XTheadBb extension (#989)
* Reformat * Use TH_FF0 for LZCNT * Reformat * Reformat * Added MOVBE tests * Reformat * Added a new REVxw macro * Refined test24 * Refined test24 * Fix bugs * [INTERPRETER] Fixed MOVBE * [DYNAREC_ARM64] Fix MOVBE
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_660f.c | 32 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 1060 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f.c | 1639 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f30f.c | 221 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 955 | ||||
| -rw-r--r-- | src/emu/x64run660f.c | 48 |
6 files changed, 2045 insertions, 1910 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index 2d80e68d..0013517e 100644 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -169,7 +169,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; FAKEED; break; - + case 0x28: INST_NAME("MOVAPD Gx,Ex"); nextop = F8; @@ -857,7 +857,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n BFIx(gd, x1, 0, 16); } else { SMREAD(); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, (1<<2)-1, rex, NULL, 0, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<1, (1<<1)-1, rex, NULL, 0, 0); LDH(x1, ed, fixedaddress); REV16x(x1, x1); BFIx(gd, x1, 0, 16); @@ -873,7 +873,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n BFIx(ed, x1, 0, 16); } else { SMREAD(); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, (1<<2)-1, rex, NULL, 0, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<1, (1<<1)-1, rex, NULL, 0, 0); REV16x(x1, gd); STH(x1, ed, fixedaddress); } @@ -1007,7 +1007,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n GETEX(q1, 0, 1); u8 = F8; if(u8>31) { - VEORQ(q0, q0, q0); + VEORQ(q0, q0, q0); } else if(u8>15) { d0 = fpu_get_scratch(dyn); VEORQ(d0, d0, d0); @@ -1121,7 +1121,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n sse_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); // gx if(MODREG) { - ed = (nextop&7)+(rex.b<<3); + ed = (nextop&7)+(rex.b<<3); sse_forget_reg(dyn, ninst, ed); MOV32w(x2, ed); MOV32w(x3, 0); //p = NULL @@ -1145,7 +1145,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n sse_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); // gx if(MODREG) { - ed = (nextop&7)+(rex.b<<3); + ed = (nextop&7)+(rex.b<<3); sse_forget_reg(dyn, ninst, ed); MOV32w(x2, ed); MOV32w(x3, 0); //p = NULL @@ -1195,7 +1195,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n LSRx(x1, x1, 63); LSRx(gd, gd, 63); BFIx(gd, x1, 1, 1); - break; + break; case 0x51: INST_NAME("SQRTPD Gx, Ex"); nextop = F8; @@ -1215,7 +1215,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n VFSQRTQD(q1, q0); } break; - + case 0x54: INST_NAME("ANDPD Gx, Ex"); nextop = F8; @@ -2195,21 +2195,21 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n case 0: VFCMEQQD(v0, v0, v1); break; // Equal case 1: VFCMGTQD(v0, v1, v0); break; // Less than case 2: VFCMGEQD(v0, v1, v0); break; // Less or equal - case 3: VFCMEQQD(v0, v0, v0); + case 3: VFCMEQQD(v0, v0, v0); if(v0!=v1) { - q0 = fpu_get_scratch(dyn); - VFCMEQQD(q0, v1, v1); + q0 = fpu_get_scratch(dyn); + VFCMEQQD(q0, v1, v1); VANDQ(v0, v0, q0); } - VMVNQ(v0, v0); + VMVNQ(v0, v0); break; // NaN (NaN is not equal to himself) case 4: VFCMEQQD(v0, v0, v1); VMVNQ(v0, v0); break; // Not Equal (or unordered on ARM, not on X86...) case 5: VFCMGTQD(v0, v1, v0); VMVNQ(v0, v0); break; // Greater or equal or unordered case 6: VFCMGEQD(v0, v1, v0); VMVNQ(v0, v0); break; // Greater or unordered - case 7: VFCMEQQD(v0, v0, v0); + case 7: VFCMEQQD(v0, v0, v0); if(v0!=v1) { - q0 = fpu_get_scratch(dyn); - VFCMEQQD(q0, v1, v1); + q0 = fpu_get_scratch(dyn); + VFCMEQQD(q0, v1, v1); VANDQ(v0, v0, q0); } break; // not NaN @@ -2386,7 +2386,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n UADDLV_8(q1, q1); // accumalte VMOVBto(x1, q1, 0); BFIx(gd, x1, 8, 8); - break; + break; case 0xD8: INST_NAME("PSUBUSB Gx, Ex"); nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index 8e85cc8e..227ffda5 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -26,7 +26,8 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog) { - (void)ip; (void)need_epilog; + (void)ip; + (void)need_epilog; uint8_t opcode = F8; uint8_t nextop, u8; @@ -55,13 +56,13 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MAYUSE(j64); MAYUSE(cacheupd); - switch(opcode) { + switch (opcode) { case 0x01: INST_NAME("FAKE xgetbv"); nextop = F8; addr = fakeed(dyn, addr, ninst, nextop); - SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state + SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state GETIP(ip); STORE_XEMU_CALL(); CALL(native_ud, -1); @@ -90,7 +91,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x09: INST_NAME("WBINVD"); - SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state + SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state GETIP(ip); STORE_XEMU_CALL(); CALL(native_ud, -1); @@ -102,7 +103,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x0B: INST_NAME("UD2"); - SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state + SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state GETIP(ip); STORE_XEMU_CALL(); CALL(native_ud, -1); @@ -114,13 +115,13 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x0D: nextop = F8; - switch((nextop>>3)&7) { + switch ((nextop >> 3) & 7) { case 1: INST_NAME("PREFETCHW"); // nop without Zicbom, Zicbop, Zicboz extensions FAKEED; break; - default: //??? + default: //??? DEFAULT; } break; @@ -130,31 +131,31 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; GETGX(); GETEX(x2, 0); - LD(x3, wback, fixedaddress+0); - LD(x4, wback, fixedaddress+8); - SD(x3, gback, gdoffset+0); - SD(x4, gback, gdoffset+8); + LD(x3, wback, fixedaddress + 0); + LD(x4, wback, fixedaddress + 8); + SD(x3, gback, gdoffset + 0); + SD(x4, gback, gdoffset + 8); break; case 0x11: INST_NAME("MOVUPS Ex,Gx"); nextop = F8; GETGX(); GETEX(x2, 0); - LD(x3, gback, gdoffset+0); - LD(x4, gback, gdoffset+8); - SD(x3, wback, fixedaddress+0); - SD(x4, wback, fixedaddress+8); - if(!MODREG) + LD(x3, gback, gdoffset + 0); + LD(x4, gback, gdoffset + 8); + SD(x3, wback, fixedaddress + 0); + SD(x4, wback, fixedaddress + 8); + if (!MODREG) SMWRITE2(); break; case 0x12: nextop = F8; - if(MODREG) { + if (MODREG) { INST_NAME("MOVHLPS Gx,Ex"); GETGX(); GETEX(x2, 0); - LD(x3, wback, fixedaddress+8); - SD(x3, gback, gdoffset+0); + LD(x3, wback, fixedaddress + 8); + SD(x3, gback, gdoffset + 0); } else { INST_NAME("MOVLPS Gx,Ex"); GETEXSD(v0, 0); @@ -167,9 +168,9 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; GETGX(); GETEX(x2, 0); - LD(x3, gback, gdoffset+0); - SD(x3, wback, fixedaddress+0); - if(!MODREG) + LD(x3, gback, gdoffset + 0); + SD(x3, wback, fixedaddress + 0); + if (!MODREG) SMWRITE2(); break; case 0x14: @@ -177,30 +178,30 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; GETGX(); GETEX(x2, 0); - LWU(x5, gback, gdoffset+1*4); - LWU(x3, wback, fixedaddress+0); - LWU(x4, wback, fixedaddress+4); - SW(x4, gback, gdoffset+3*4); - SW(x5, gback, gdoffset+2*4); - SW(x3, gback, gdoffset+1*4); + LWU(x5, gback, gdoffset + 1 * 4); + LWU(x3, wback, fixedaddress + 0); + LWU(x4, wback, fixedaddress + 4); + SW(x4, gback, gdoffset + 3 * 4); + SW(x5, gback, gdoffset + 2 * 4); + SW(x3, gback, gdoffset + 1 * 4); break; case 0x15: INST_NAME("UNPCKHPS Gx,Ex"); nextop = F8; GETGX(); GETEX(x2, 0); - LWU(x3, wback, fixedaddress+2*4); - LWU(x4, wback, fixedaddress+3*4); - LWU(x5, gback, gdoffset+2*4); - LWU(x6, gback, gdoffset+3*4); - SW(x5, gback, gdoffset+0*4); - SW(x3, gback, gdoffset+1*4); - SW(x6, gback, gdoffset+2*4); - SW(x4, gback, gdoffset+3*4); + LWU(x3, wback, fixedaddress + 2 * 4); + LWU(x4, wback, fixedaddress + 3 * 4); + LWU(x5, gback, gdoffset + 2 * 4); + LWU(x6, gback, gdoffset + 3 * 4); + SW(x5, gback, gdoffset + 0 * 4); + SW(x3, gback, gdoffset + 1 * 4); + SW(x6, gback, gdoffset + 2 * 4); + SW(x4, gback, gdoffset + 3 * 4); break; case 0x16: nextop = F8; - if(MODREG) { + if (MODREG) { INST_NAME("MOVLHPS Gx,Ex"); } else { INST_NAME("MOVHPS Gx,Ex"); @@ -208,35 +209,35 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } GETGX(); GETEX(x2, 0); - LD(x4, wback, fixedaddress+0); - SD(x4, gback, gdoffset+8); + LD(x4, wback, fixedaddress + 0); + SD(x4, gback, gdoffset + 8); break; case 0x17: INST_NAME("MOVHPS Ex,Gx"); nextop = F8; GETGX(); GETEX(x2, 0); - LD(x4, gback, gdoffset+8); - SD(x4, wback, fixedaddress+0); - if(!MODREG) + LD(x4, gback, gdoffset + 8); + SD(x4, wback, fixedaddress + 0); + if (!MODREG) SMWRITE2(); break; case 0x18: nextop = F8; - if((nextop&0xC0)==0xC0) { + if ((nextop & 0xC0) == 0xC0) { INST_NAME("NOP (multibyte)"); } else - switch((nextop>>3)&7) { - case 0: - case 1: - case 2: - case 3: - INST_NAME("PREFETCHh Ed"); - FAKEED; - break; - default: - INST_NAME("NOP (multibyte)"); - FAKEED; + switch ((nextop >> 3) & 7) { + case 0: + case 1: + case 2: + case 3: + INST_NAME("PREFETCHh Ed"); + FAKEED; + break; + default: + INST_NAME("NOP (multibyte)"); + FAKEED; } break; @@ -259,7 +260,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETGX(); GETEX(x2, 0); SSE_LOOP_MV_Q2(x3); - if(!MODREG) + if (!MODREG) SMWRITE2(); break; case 0x2A: @@ -269,10 +270,10 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEM(x2, 0); d0 = fpu_get_scratch(dyn); u8 = sse_setround(dyn, ninst, x4, x5); - for (int i=0; i<2; ++i) { - LW(x3, wback, fixedaddress+i*4); + for (int i = 0; i < 2; ++i) { + LW(x3, wback, fixedaddress + i * 4); FCVTSW(d0, x3, RD_DYN); - FSW(d0, gback, gdoffset+i*4); + FSW(d0, gback, gdoffset + i * 4); } x87_restoreround(dyn, ninst, u8); break; @@ -281,10 +282,10 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; GETGX(); GETEX(x2, 0); - LD(x3, gback, gdoffset+0); - LD(x4, gback, gdoffset+8); - SD(x3, wback, fixedaddress+0); - SD(x4, wback, fixedaddress+8); + LD(x3, gback, gdoffset + 0); + LD(x4, gback, gdoffset + 8); + SD(x3, wback, fixedaddress + 0); + SD(x4, wback, fixedaddress + 8); break; case 0x2C: INST_NAME("CVTTPS2PI Gm,Ex"); @@ -292,20 +293,20 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETGM(); GETEX(x2, 0); d0 = fpu_get_scratch(dyn); - for (int i=0; i<2; ++i) { - if(!box64_dynarec_fastround) { - FSFLAGSI(0); // // reset all bits + for (int i = 0; i < 2; ++i) { + if (!box64_dynarec_fastround) { + FSFLAGSI(0); // // reset all bits } - FLW(d0, wback, fixedaddress+i*4); + FLW(d0, wback, fixedaddress + i * 4); FCVTWS(x1, d0, RD_RTZ); - if(!box64_dynarec_fastround) { - FRFLAGS(x5); // get back FPSR to check the IOC bit - ANDI(x5, x5, (1<<FR_NV)|(1<<FR_OF)); + if (!box64_dynarec_fastround) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, (1 << FR_NV) | (1 << FR_OF)); BEQ_MARKi(x5, xZR, i); MOV32w(x1, 0x80000000); MARKi(i); } - SW(x1, gback, gdoffset+i*4); + SW(x1, gback, gdoffset + i * 4); } break; case 0x2D: @@ -315,27 +316,31 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEX(x2, 0); d0 = fpu_get_scratch(dyn); u8 = sse_setround(dyn, ninst, x6, x4); - for (int i=0; i<2; ++i) { - if(!box64_dynarec_fastround) { - FSFLAGSI(0); // // reset all bits + for (int i = 0; i < 2; ++i) { + if (!box64_dynarec_fastround) { + FSFLAGSI(0); // // reset all bits } - FLW(d0, wback, fixedaddress+i*4); + FLW(d0, wback, fixedaddress + i * 4); FCVTWS(x1, d0, RD_DYN); - if(!box64_dynarec_fastround) { - FRFLAGS(x5); // get back FPSR to check the IOC bit - ANDI(x5, x5, (1<<FR_NV)|(1<<FR_OF)); + if (!box64_dynarec_fastround) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, (1 << FR_NV) | (1 << FR_OF)); BEQ_MARKi(x5, xZR, i); MOV32w(x1, 0x80000000); MARKi(i); } - SW(x1, gback, gdoffset+i*4); + SW(x1, gback, gdoffset + i * 4); } x87_restoreround(dyn, ninst, u8); break; case 0x2E: // no special check... case 0x2F: - if(opcode==0x2F) {INST_NAME("COMISS Gx, Ex");} else {INST_NAME("UCOMISS Gx, Ex");} + if (opcode == 0x2F) { + INST_NAME("COMISS Gx, Ex"); + } else { + INST_NAME("UCOMISS Gx, Ex"); + } SETFLAGS(X_ALL, SF_SET); SET_DFNONE(); nextop = F8; @@ -343,160 +348,61 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEXSS(v0, 0); CLEAR_FLAGS(); // if isnan(d0) || isnan(v0) - IFX(X_ZF | X_PF | X_CF) { + IFX(X_ZF | X_PF | X_CF) + { FEQS(x3, d0, d0); FEQS(x2, v0, v0); AND(x2, x2, x3); BNE_MARK(x2, xZR); - ORI(xFlags, xFlags, (1<<F_ZF) | (1<<F_PF) | (1<<F_CF)); + ORI(xFlags, xFlags, (1 << F_ZF) | (1 << F_PF) | (1 << F_CF)); B_NEXT_nocond; } MARK; // else if isless(d0, v0) - IFX(X_CF) { + IFX(X_CF) + { FLTS(x2, d0, v0); BEQ_MARK2(x2, xZR); - ORI(xFlags, xFlags, 1<<F_CF); + ORI(xFlags, xFlags, 1 << F_CF); B_NEXT_nocond; } MARK2; // else if d0 == v0 - IFX(X_ZF) { + IFX(X_ZF) + { FEQS(x2, d0, v0); CBZ_NEXT(x2); - ORI(xFlags, xFlags, 1<<F_ZF); + ORI(xFlags, xFlags, 1 << F_ZF); } break; case 0x31: INST_NAME("RDTSC"); NOTEST(x1); MESSAGE(LOG_DUMP, "Need Optimization\n"); - CALL(ReadTSC, x3); // will return the u64 in x3 + CALL(ReadTSC, x3); // will return the u64 in x3 SRLI(xRDX, x3, 32); - AND(xRAX, x3, 32); // wipe upper part + AND(xRAX, x3, 32); // wipe upper part break; case 0x38: - //SSE3 - nextop=F8; - switch(nextop) { + // SSE3 + nextop = F8; + switch (nextop) { case 0xF0: INST_NAME("MOVBE Gd, Ed"); - nextop=F8; + nextop = F8; GETGD; SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); LDxw(gd, ed, fixedaddress); - if (rv64_zbb) { - REV8(gd, gd); - if (!rex.w) { - SRLI(gd, gd, 32); - } - } else { - if (rex.w) { - MOV_U12(x2, 0xff); - SLLI(x1, gd, 56); - SRLI(x3, gd, 56); - SRLI(x4, gd, 40); - SLLI(x2, x2, 8); - AND(x4, x4, x2); - OR(x1, x1, x3); - OR(x1, x1, x4); - SLLI(x3, gd, 40); - SLLI(x4, x2, 40); - AND(x3, x3, x4); - OR(x1, x1, x3); - - SRLI(x3, gd, 24); - SLLI(x4, x2, 8); - AND(x3, x3, x4); - OR(x1, x1, x3); - SLLI(x3, gd, 24); - SLLI(x4, x2, 32); - AND(x3, x3, x4); - OR(x1, x1, x3); - - SRLI(x3, gd, 8); - SLLI(x4, x2, 16); - AND(x3, x3, x4); - OR(x1, x1, x3); - SLLI(x3, gd, 8); - SLLI(x4, x2, 24); - AND(x3, x3, x4); - OR(gd, x1, x3); - } else { - MOV_U12(x2, 0xff); - SLLIW(x2, x2, 8); - SLLIW(x1, gd, 24); - SRLIW(x3, gd, 24); - SRLIW(x4, gd, 8); - AND(x4, x4, x2); - OR(x1, x1, x3); - OR(x1, x1, x4); - SLLIW(gd, gd, 8); - LUI(x2, 0xff0); - AND(gd, gd, x2); - OR(gd, gd, x1); - } - } + REV8xw(gd, gd, x1, x2, x3, x4); break; case 0xF1: INST_NAME("MOVBE Ed, Gd"); - nextop=F8; + nextop = F8; GETGD; SMREAD(); addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); - if (rv64_zbb) { - REV8(x1, gd); - if (!rex.w) { - SRLI(x1, x1, 32); - } - } else { - if (rex.w) { - MOV_U12(x2, 0xff); - SLLI(x1, gd, 56); - SRLI(x3, gd, 56); - SRLI(x4, gd, 40); - SLLI(x2, x2, 8); - AND(x4, x4, x2); - OR(x1, x1, x3); - OR(x1, x1, x4); - SLLI(x3, gd, 40); - SLLI(x4, x2, 40); - AND(x3, x3, x4); - OR(x1, x1, x3); - - SRLI(x3, gd, 24); - SLLI(x4, x2, 8); - AND(x3, x3, x4); - OR(x1, x1, x3); - SLLI(x3, gd, 24); - SLLI(x4, x2, 32); - AND(x3, x3, x4); - OR(x1, x1, x3); - - SRLI(x3, gd, 8); - SLLI(x4, x2, 16); - AND(x3, x3, x4); - OR(x1, x1, x3); - SLLI(x3, gd, 8); - SLLI(x4, x2, 24); - AND(x3, x3, x4); - OR(x1, x1, x3); - } else { - MOV_U12(x2, 0xff); - SLLIW(x2, x2, 8); - SLLIW(x1, gd, 24); - SRLIW(x3, gd, 24); - SRLIW(x4, gd, 8); - AND(x4, x4, x2); - OR(x1, x1, x3); - OR(x1, x1, x4); - SLLIW(x3, gd, 8); - LUI(x2, 0xff0); - AND(x3, x3, x2); - OR(x1, x1, x3); - } - } + REV8xw(x1, gd, x1, x2, x3, x4); SDxw(x1, wback, fixedaddress); break; default: @@ -504,34 +410,34 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } break; - #define GO(GETFLAGS, NO, YES, F) \ - READFLAGS(F); \ - GETFLAGS; \ - nextop=F8; \ - GETGD; \ - if(MODREG) { \ - ed = xRAX+(nextop&7)+(rex.b<<3); \ - B##NO(x1, 8); \ - MV(gd, ed); \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x4, &fixedaddress, rex, NULL, 1, 0); \ - B##NO(x1, 8); \ - LDxw(gd, ed, fixedaddress); \ - } \ - if(!rex.w) ZEROUP(gd); +#define GO(GETFLAGS, NO, YES, F) \ + READFLAGS(F); \ + GETFLAGS; \ + nextop = F8; \ + GETGD; \ + if (MODREG) { \ + ed = xRAX + (nextop & 7) + (rex.b << 3); \ + B##NO(x1, 8); \ + MV(gd, ed); \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x4, &fixedaddress, rex, NULL, 1, 0); \ + B##NO(x1, 8); \ + LDxw(gd, ed, fixedaddress); \ + } \ + if (!rex.w) ZEROUP(gd); - GOCOND(0x40, "CMOV", "Gd, Ed"); - #undef GO + GOCOND(0x40, "CMOV", "Gd, Ed"); +#undef GO case 0x50: INST_NAME("MOVMSKPS Gd, Ex"); nextop = F8; GETGD; GETEX(x1, 0); XOR(gd, gd, gd); - for(int i=0; i<4; ++i) { - LWU(x2, wback, fixedaddress+i*4); - SRLI(x2, x2, 31-i); - if (i>0) ANDI(x2, x2, 1<<i); + for (int i = 0; i < 4; ++i) { + LWU(x2, wback, fixedaddress + i * 4); + SRLI(x2, x2, 31 - i); + if (i > 0) ANDI(x2, x2, 1 << i); OR(gd, gd, x2); } break; @@ -541,10 +447,10 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETGX(); GETEX(x2, 0); d0 = fpu_get_scratch(dyn); - for(int i=0; i<4; ++i) { - FLW(d0, wback, fixedaddress+4*i); + for (int i = 0; i < 4; ++i) { + FLW(d0, wback, fixedaddress + 4 * i); FSQRTS(d0, d0); - FSW(d0, gback, gdoffset+4*i); + FSW(d0, gback, gdoffset + 4 * i); } break; case 0x52: @@ -561,23 +467,23 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (!box64_dynarec_fastnan) { FCVTSW(v0, xZR, RD_DYN); } - for(int i=0; i<4; ++i) { - FLW(s0, wback, fixedaddress+i*4); + for (int i = 0; i < 4; ++i) { + FLW(s0, wback, fixedaddress + i * 4); if (!box64_dynarec_fastnan) { FLES(x3, v0, s0); // s0 >= 0.0f? - BNEZ(x3, 6*4); + BNEZ(x3, 6 * 4); FEQS(x3, s0, s0); // isnan(s0)? - BEQZ(x3, 2*4); + BEQZ(x3, 2 * 4); // s0 is negative, so generate a NaN FDIVS(s0, s1, v0); // s0 is a NaN, just copy it - FSW(s0, gback, gdoffset+i*4); - J(4*4); + FSW(s0, gback, gdoffset + i * 4); + J(4 * 4); // do regular computation } FSQRTS(s0, s0); FDIVS(s0, s1, s0); - FSW(s0, gback, gdoffset+i*4); + FSW(s0, gback, gdoffset + i * 4); } break; case 0x53: @@ -589,17 +495,17 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni d1 = fpu_get_scratch(dyn); LUI(x3, 0x3f800); FMVWX(d0, x3); // 1.0f - for(int i=0; i<4; ++i) { - FLW(d1, wback, fixedaddress+4*i); + for (int i = 0; i < 4; ++i) { + FLW(d1, wback, fixedaddress + 4 * i); FDIVS(d1, d0, d1); - FSW(d1, gback, gdoffset+4*i); + FSW(d1, gback, gdoffset + 4 * i); } break; case 0x54: INST_NAME("ANDPS Gx, Ex"); nextop = F8; - gd = ((nextop&0x38)>>3)+(rex.r<<3); - if(!(MODREG && gd==(nextop&7)+(rex.b<<3))) { + gd = ((nextop & 0x38) >> 3) + (rex.r << 3); + if (!(MODREG && gd == (nextop & 7) + (rex.b << 3))) { GETGX(); GETEX(x2, 0); SSE_LOOP_Q(x3, x4, AND(x3, x3, x4)); @@ -615,8 +521,8 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x56: INST_NAME("ORPS Gx, Ex"); nextop = F8; - gd = ((nextop&0x38)>>3)+(rex.r<<3); - if(!(MODREG && gd==(nextop&7)+(rex.b<<3))) { + gd = ((nextop & 0x38) >> 3) + (rex.r << 3); + if (!(MODREG && gd == (nextop & 7) + (rex.b << 3))) { GETGX(); GETEX(x2, 0); SSE_LOOP_Q(x3, x4, OR(x3, x3, x4)); @@ -625,13 +531,12 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x57: INST_NAME("XORPS Gx, Ex"); nextop = F8; - //TODO: it might be possible to check if SS or SD are used and not purge them to optimize a bit + // TODO: it might be possible to check if SS or SD are used and not purge them to optimize a bit GETGX(); - if(MODREG && gd==(nextop&7)+(rex.b<<3)) - { + if (MODREG && gd == (nextop & 7) + (rex.b << 3)) { // just zero dest - SD(xZR, gback, gdoffset+0); - SD(xZR, gback, gdoffset+8); + SD(xZR, gback, gdoffset + 0); + SD(xZR, gback, gdoffset + 8); } else { GETEX(x2, 0); SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); @@ -644,12 +549,12 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEX(x2, 0); s0 = fpu_get_scratch(dyn); s1 = fpu_get_scratch(dyn); - for(int i=0; i<4; ++i) { + for (int i = 0; i < 4; ++i) { // GX->f[i] += EX->f[i]; - FLW(s0, wback, fixedaddress+i*4); - FLW(s1, gback, gdoffset+i*4); + FLW(s0, wback, fixedaddress + i * 4); + FLW(s1, gback, gdoffset + i * 4); FADDS(s1, s1, s0); - FSW(s1, gback, gdoffset+i*4); + FSW(s1, gback, gdoffset + i * 4); } break; case 0x59: @@ -659,12 +564,12 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEX(x2, 0); s0 = fpu_get_scratch(dyn); s1 = fpu_get_scratch(dyn); - for(int i=0; i<4; ++i) { + for (int i = 0; i < 4; ++i) { // GX->f[i] *= EX->f[i]; - FLW(s0, wback, fixedaddress+i*4); - FLW(s1, gback, gdoffset+i*4); + FLW(s0, wback, fixedaddress + i * 4); + FLW(s1, gback, gdoffset + i * 4); FMULS(s1, s1, s0); - FSW(s1, gback, gdoffset+i*4); + FSW(s1, gback, gdoffset + i * 4); } break; case 0x5A: @@ -675,11 +580,11 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni s0 = fpu_get_scratch(dyn); s1 = fpu_get_scratch(dyn); FLW(s0, wback, fixedaddress); - FLW(s1, wback, fixedaddress+4); + FLW(s1, wback, fixedaddress + 4); FCVTDS(s0, s0); FCVTDS(s1, s1); - FSD(s0, gback, gdoffset+0); - FSD(s1, gback, gdoffset+8); + FSD(s0, gback, gdoffset + 0); + FSD(s1, gback, gdoffset + 8); break; case 0x5B: INST_NAME("CVTDQ2PS Gx, Ex"); @@ -687,10 +592,10 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETGX(); GETEX(x2, 0); s0 = fpu_get_scratch(dyn); - for (int i=0; i<4; ++i) { - LW(x3, wback, fixedaddress+i*4); + for (int i = 0; i < 4; ++i) { + LW(x3, wback, fixedaddress + i * 4); FCVTSW(s0, x3, RD_RNE); - FSW(s0, gback, gdoffset+i*4); + FSW(s0, gback, gdoffset + i * 4); } break; case 0x5C: @@ -700,12 +605,12 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEX(x2, 0); s0 = fpu_get_scratch(dyn); s1 = fpu_get_scratch(dyn); - for(int i=0; i<4; ++i) { + for (int i = 0; i < 4; ++i) { // GX->f[i] -= EX->f[i]; - FLW(s0, wback, fixedaddress+i*4); - FLW(s1, gback, gdoffset+i*4); + FLW(s0, wback, fixedaddress + i * 4); + FLW(s1, gback, gdoffset + i * 4); FSUBS(s1, s1, s0); - FSW(s1, gback, gdoffset+i*4); + FSW(s1, gback, gdoffset + i * 4); } break; case 0x5D: @@ -715,20 +620,20 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEX(x2, 0); s0 = fpu_get_scratch(dyn); s1 = fpu_get_scratch(dyn); - for(int i=0; i<4; ++i) { - FLW(s0, wback, fixedaddress+i*4); - FLW(s1, gback, gdoffset+i*4); - if(!box64_dynarec_fastnan) { + for (int i = 0; i < 4; ++i) { + FLW(s0, wback, fixedaddress + i * 4); + FLW(s1, gback, gdoffset + i * 4); + if (!box64_dynarec_fastnan) { FEQS(x3, s0, s0); FEQS(x4, s1, s1); AND(x3, x3, x4); BEQZ(x3, 12); FLTS(x3, s0, s1); BEQZ(x3, 8); - FSW(s0, gback, gdoffset+i*4); + FSW(s0, gback, gdoffset + i * 4); } else { FMINS(s1, s1, s0); - FSW(s1, gback, gdoffset+i*4); + FSW(s1, gback, gdoffset + i * 4); } } break; @@ -739,12 +644,12 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEX(x2, 0); s0 = fpu_get_scratch(dyn); s1 = fpu_get_scratch(dyn); - for(int i=0; i<4; ++i) { + for (int i = 0; i < 4; ++i) { // GX->f[i] /= EX->f[i]; - FLW(s0, wback, fixedaddress+i*4); - FLW(s1, gback, gdoffset+i*4); + FLW(s0, wback, fixedaddress + i * 4); + FLW(s1, gback, gdoffset + i * 4); FDIVS(s1, s1, s0); - FSW(s1, gback, gdoffset+i*4); + FSW(s1, gback, gdoffset + i * 4); } break; case 0x5F: @@ -754,20 +659,20 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEX(x2, 0); s0 = fpu_get_scratch(dyn); s1 = fpu_get_scratch(dyn); - for(int i=0; i<4; ++i) { - FLW(s0, wback, fixedaddress+i*4); - FLW(s1, gback, gdoffset+i*4); - if(!box64_dynarec_fastnan) { + for (int i = 0; i < 4; ++i) { + FLW(s0, wback, fixedaddress + i * 4); + FLW(s1, gback, gdoffset + i * 4); + if (!box64_dynarec_fastnan) { FEQS(x3, s0, s0); FEQS(x4, s1, s1); AND(x3, x3, x4); BEQZ(x3, 12); FLTS(x3, s1, s0); BEQZ(x3, 8); - FSW(s0, gback, gdoffset+i*4); + FSW(s0, gback, gdoffset + i * 4); } else { FMAXS(s1, s1, s0); - FSW(s1, gback, gdoffset+i*4); + FSW(s1, gback, gdoffset + i * 4); } } break; @@ -775,23 +680,23 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("PUNPCKLBW Gm,Em"); nextop = F8; GETGM(); - for(int i=3; i>0; --i) { // 0 is untouched + for (int i = 3; i > 0; --i) { // 0 is untouched // GX->ub[2 * i] = GX->ub[i]; - LBU(x3, gback, gdoffset+i); - SB(x3, gback, gdoffset+2*i); + LBU(x3, gback, gdoffset + i); + SB(x3, gback, gdoffset + 2 * i); } - if (MODREG && gd==(nextop&7)) { - for(int i=0; i<4; ++i) { + if (MODREG && gd == (nextop & 7)) { + for (int i = 0; i < 4; ++i) { // GX->ub[2 * i + 1] = GX->ub[2 * i]; - LBU(x3, gback, gdoffset+2*i); - SB(x3, gback, gdoffset+2*i+1); + LBU(x3, gback, gdoffset + 2 * i); + SB(x3, gback, gdoffset + 2 * i + 1); } } else { GETEM(x2, 0); - for(int i=0; i<4; ++i) { + for (int i = 0; i < 4; ++i) { // GX->ub[2 * i + 1] = EX->ub[i]; - LBU(x3, wback, fixedaddress+i); - SB(x3, gback, gdoffset+2*i+1); + LBU(x3, wback, fixedaddress + i); + SB(x3, gback, gdoffset + 2 * i + 1); } } break; @@ -801,14 +706,14 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETGM(); GETEM(x2, 0); // GM->uw[3] = EM->uw[1]; - LHU(x3, wback, fixedaddress+2*1); - SH(x3, gback, gdoffset+2*3); + LHU(x3, wback, fixedaddress + 2 * 1); + SH(x3, gback, gdoffset + 2 * 3); // GM->uw[2] = GM->uw[1]; - LHU(x3, gback, gdoffset+2*1); - SH(x3, gback, gdoffset+2*2); + LHU(x3, gback, gdoffset + 2 * 1); + SH(x3, gback, gdoffset + 2 * 2); // GM->uw[1] = EM->uw[0]; - LHU(x3, wback, fixedaddress+2*0); - SH(x3, gback, gdoffset+2*1); + LHU(x3, wback, fixedaddress + 2 * 0); + SH(x3, gback, gdoffset + 2 * 1); break; case 0x62: INST_NAME("PUNPCKLDQ Gm, Em"); @@ -817,38 +722,38 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEM(x2, 0); // GM->ud[1] = EM->ud[0]; LWU(x3, wback, fixedaddress); - SW(x3, gback, gdoffset+4*1); + SW(x3, gback, gdoffset + 4 * 1); break; case 0x67: INST_NAME("PACKUSWB Gm, Em"); nextop = F8; GETGM(); ADDI(x5, xZR, 0xFF); - for(int i=0; i<4; ++i) { + for (int i = 0; i < 4; ++i) { // GX->ub[i] = (GX->sw[i]<0)?0:((GX->sw[i]>0xff)?0xff:GX->sw[i]); - LH(x3, gback, gdoffset+i*2); + LH(x3, gback, gdoffset + i * 2); BGE(x5, x3, 8); ADDI(x3, xZR, 0xFF); NOT(x4, x3); SRAI(x4, x4, 63); AND(x3, x3, x4); - SB(x3, gback, gdoffset+i); + SB(x3, gback, gdoffset + i); } - if (MODREG && gd==(nextop&7)) { + if (MODREG && gd == (nextop & 7)) { // GM->ud[1] = GM->ud[0]; - LW(x3, gback, gdoffset+0*4); - SW(x3, gback, gdoffset+1*4); + LW(x3, gback, gdoffset + 0 * 4); + SW(x3, gback, gdoffset + 1 * 4); } else { GETEM(x1, 0); - for(int i=0; i<4; ++i) { + for (int i = 0; i < 4; ++i) { // GX->ub[4+i] = (EX->sw[i]<0)?0:((EX->sw[i]>0xff)?0xff:EX->sw[i]); - LH(x3, wback, fixedaddress+i*2); + LH(x3, wback, fixedaddress + i * 2); BGE(x5, x3, 8); ADDI(x3, xZR, 0xFF); NOT(x4, x3); SRAI(x4, x4, 63); AND(x3, x3, x4); - SB(x3, gback, gdoffset+4+i); + SB(x3, gback, gdoffset + 4 + i); } } break; @@ -856,23 +761,23 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("PUNPCKHBW Gm,Em"); nextop = F8; GETGM(); - for(int i=0; i<4; ++i) { + for (int i = 0; i < 4; ++i) { // GX->ub[2 * i] = GX->ub[i + 4]; - LBU(x3, gback, gdoffset+i+4); - SB(x3, gback, gdoffset+2*i); + LBU(x3, gback, gdoffset + i + 4); + SB(x3, gback, gdoffset + 2 * i); } - if (MODREG && gd==(nextop&7)) { - for(int i=0; i<4; ++i) { + if (MODREG && gd == (nextop & 7)) { + for (int i = 0; i < 4; ++i) { // GX->ub[2 * i + 1] = GX->ub[2 * i]; - LBU(x3, gback, gdoffset+2*i); - SB(x3, gback, gdoffset+2*i+1); + LBU(x3, gback, gdoffset + 2 * i); + SB(x3, gback, gdoffset + 2 * i + 1); } } else { GETEM(x2, 0); - for(int i=0; i<4; ++i) { + for (int i = 0; i < 4; ++i) { // GX->ub[2 * i + 1] = EX->ub[i + 4]; - LBU(x3, wback, fixedaddress+i+4); - SB(x3, gback, gdoffset+2*i+1); + LBU(x3, wback, fixedaddress + i + 4); + SB(x3, gback, gdoffset + 2 * i + 1); } } break; @@ -880,23 +785,23 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("PUNPCKHWD Gm,Em"); nextop = F8; GETGM(); - for(int i=0; i<2; ++i) { + for (int i = 0; i < 2; ++i) { // GX->uw[2 * i] = GX->uw[i + 2]; - LHU(x3, gback, gdoffset+(i+2)*2); - SH(x3, gback, gdoffset+2*i*2); + LHU(x3, gback, gdoffset + (i + 2) * 2); + SH(x3, gback, gdoffset + 2 * i * 2); } - if (MODREG && gd==(nextop&7)) { - for(int i=0; i<2; ++i) { + if (MODREG && gd == (nextop & 7)) { + for (int i = 0; i < 2; ++i) { // GX->uw[2 * i + 1] = GX->uw[2 * i]; - LHU(x3, gback, gdoffset+2*i*2); - SH(x3, gback, gdoffset+(2*i+1)*2); + LHU(x3, gback, gdoffset + 2 * i * 2); + SH(x3, gback, gdoffset + (2 * i + 1) * 2); } } else { GETEM(x1, 0); - for(int i=0; i<2; ++i) { + for (int i = 0; i < 2; ++i) { // GX->uw[2 * i + 1] = EX->uw[i + 2]; - LHU(x3, wback, fixedaddress+(i+2)*2); - SH(x3, gback, gdoffset+(2*i+1)*2); + LHU(x3, wback, fixedaddress + (i + 2) * 2); + SH(x3, gback, gdoffset + (2 * i + 1) * 2); } } break; @@ -906,30 +811,33 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEM(x1, 0); GETGM(); // GM->ud[0] = GM->ud[1]; - LWU(x3, gback, gdoffset+1*4); - SW(x3, gback, gdoffset+0*4); - if (!(MODREG && (gd==ed))) { + LWU(x3, gback, gdoffset + 1 * 4); + SW(x3, gback, gdoffset + 0 * 4); + if (!(MODREG && (gd == ed))) { // GM->ud[1] = EM->ud[1]; - LWU(x3, wback, fixedaddress+1*4); - SW(x3, gback, gdoffset+1*4); + LWU(x3, wback, fixedaddress + 1 * 4); + SW(x3, gback, gdoffset + 1 * 4); } break; case 0x6E: INST_NAME("MOVD Gm, Ed"); nextop = F8; GETGM(); - if(MODREG) { - ed = xRAX + (nextop&7) + (rex.b<<3); + if (MODREG) { + ed = xRAX + (nextop & 7) + (rex.b << 3); } else { addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 1, 0); - if(rex.w) { + if (rex.w) { LD(x4, ed, fixedaddress); } else { LW(x4, ed, fixedaddress); } ed = x4; } - if(rex.w) SD(ed, gback, gdoffset+0); else SW(ed, gback, gdoffset+0); + if (rex.w) + SD(ed, gback, gdoffset + 0); + else + SW(ed, gback, gdoffset + 0); break; case 0x6F: INST_NAME("MOVQ Gm, Em"); @@ -937,24 +845,24 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETGM(); GETEM(x2, 0); LD(x3, wback, fixedaddress); - SD(x3, gback, gdoffset+0); + SD(x3, gback, gdoffset + 0); break; case 0x71: nextop = F8; - switch((nextop>>3)&7) { + switch ((nextop >> 3) & 7) { case 2: INST_NAME("PSRLW Em, Ib"); GETEM(x1, 1); u8 = F8; - if (u8>15) { + if (u8 > 15) { // just zero dest SD(xZR, wback, fixedaddress); - } else if(u8) { - for (int i=0; i<4; ++i) { + } else if (u8) { + for (int i = 0; i < 4; ++i) { // EX->uw[i] >>= u8; - LHU(x3, wback, fixedaddress+i*2); + LHU(x3, wback, fixedaddress + i * 2); SRLI(x3, x3, u8); - SH(x3, wback, fixedaddress+i*2); + SH(x3, wback, fixedaddress + i * 2); } } break; @@ -962,13 +870,13 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("PSRAW Em, Ib"); GETEM(x1, 1); u8 = F8; - if(u8>15) u8=15; - if(u8) { - for (int i=0; i<4; ++i) { + if (u8 > 15) u8 = 15; + if (u8) { + for (int i = 0; i < 4; ++i) { // EX->sw[i] >>= u8; - LH(x3, wback, fixedaddress+i*2); + LH(x3, wback, fixedaddress + i * 2); SRAI(x3, x3, u8); - SH(x3, wback, fixedaddress+i*2); + SH(x3, wback, fixedaddress + i * 2); } } break; @@ -976,15 +884,15 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("PSLLW Em, Ib"); GETEM(x1, 1); u8 = F8; - if (u8>15) { + if (u8 > 15) { // just zero dest - SD(xZR, wback, fixedaddress+0); - } else if(u8) { - for (int i=0; i<4; ++i) { + SD(xZR, wback, fixedaddress + 0); + } else if (u8) { + for (int i = 0; i < 4; ++i) { // EX->uw[i] <<= u8; - LHU(x3, wback, fixedaddress+i*2); + LHU(x3, wback, fixedaddress + i * 2); SLLI(x3, x3, u8); - SH(x3, wback, fixedaddress+i*2); + SH(x3, wback, fixedaddress + i * 2); } } break; @@ -1005,76 +913,76 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni // empty MMX, FPU now usable mmx_purgecache(dyn, ninst, 0, x1); /*emu->top = 0; - emu->fpu_stack = 0;*/ //TODO: Check if something is needed here? + emu->fpu_stack = 0;*/ + // TODO: Check if something is needed here? break; case 0x7F: INST_NAME("MOVQ Em, Gm"); nextop = F8; GETGM(); GETEM(x2, 0); - LD(x3, gback, gdoffset+0); + LD(x3, gback, gdoffset + 0); SD(x3, wback, fixedaddress); break; - #define GO(GETFLAGS, NO, YES, F) \ - READFLAGS(F); \ - i32_ = F32S; \ - BARRIER(BARRIER_MAYBE); \ - JUMP(addr+i32_, 1); \ - GETFLAGS; \ - if(dyn->insts[ninst].x64.jmp_insts==-1 || \ - CHECK_CACHE()) { \ - /* out of the block */ \ - i32 = dyn->insts[ninst].epilog-(dyn->native_size); \ - B##NO##_safe(x1, i32); \ - if(dyn->insts[ninst].x64.jmp_insts==-1) { \ - if(!(dyn->insts[ninst].x64.barrier&BARRIER_FLOAT)) \ - fpu_purgecache(dyn, ninst, 1, x1, x2, x3); \ - jump_to_next(dyn, addr+i32_, 0, ninst); \ - } else { \ - CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \ - i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size); \ - B(i32); \ - } \ - } else { \ - /* inside the block */ \ - i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size); \ - B##YES##_safe(x1, i32); \ - } +#define GO(GETFLAGS, NO, YES, F) \ + READFLAGS(F); \ + i32_ = F32S; \ + BARRIER(BARRIER_MAYBE); \ + JUMP(addr + i32_, 1); \ + GETFLAGS; \ + if (dyn->insts[ninst].x64.jmp_insts == -1 || CHECK_CACHE()) { \ + /* out of the block */ \ + i32 = dyn->insts[ninst].epilog - (dyn->native_size); \ + B##NO##_safe(x1, i32); \ + if (dyn->insts[ninst].x64.jmp_insts == -1) { \ + if (!(dyn->insts[ninst].x64.barrier & BARRIER_FLOAT)) \ + fpu_purgecache(dyn, ninst, 1, x1, x2, x3); \ + jump_to_next(dyn, addr + i32_, 0, ninst); \ + } else { \ + CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \ + i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address - (dyn->native_size); \ + B(i32); \ + } \ + } else { \ + /* inside the block */ \ + i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address - (dyn->native_size); \ + B##YES##_safe(x1, i32); \ + } - GOCOND(0x80, "J", "Id"); - #undef GO + GOCOND(0x80, "J", "Id"); +#undef GO - #define GO(GETFLAGS, NO, YES, F) \ - READFLAGS(F); \ - GETFLAGS; \ - nextop=F8; \ - S##YES(x3, x1); \ - if(MODREG) { \ - if(rex.rex) { \ - eb1= xRAX+(nextop&7)+(rex.b<<3); \ - eb2 = 0; \ - } else { \ - ed = (nextop&7); \ - eb2 = (ed>>2)*8; \ - eb1 = xRAX+(ed&3); \ - } \ - if (eb2) { \ - LUI(x1, 0xffff0); \ - ORI(x1, x1, 0xff); \ - AND(eb1, eb1, x1); \ - SLLI(x3, x3, 8); \ - } else { \ - ANDI(eb1, eb1, 0xf00); \ - } \ - OR(eb1, eb1, x3); \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress,rex, NULL, 1, 0); \ - SB(x3, ed, fixedaddress); \ - SMWRITE(); \ - } +#define GO(GETFLAGS, NO, YES, F) \ + READFLAGS(F); \ + GETFLAGS; \ + nextop = F8; \ + S##YES(x3, x1); \ + if (MODREG) { \ + if (rex.rex) { \ + eb1 = xRAX + (nextop & 7) + (rex.b << 3); \ + eb2 = 0; \ + } else { \ + ed = (nextop & 7); \ + eb2 = (ed >> 2) * 8; \ + eb1 = xRAX + (ed & 3); \ + } \ + if (eb2) { \ + LUI(x1, 0xffff0); \ + ORI(x1, x1, 0xff); \ + AND(eb1, eb1, x1); \ + SLLI(x3, x3, 8); \ + } else { \ + ANDI(eb1, eb1, 0xf00); \ + } \ + OR(eb1, eb1, x3); \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); \ + SB(x3, ed, fixedaddress); \ + SMWRITE(); \ + } - GOCOND(0x90, "SET", "Eb"); - #undef GO + GOCOND(0x90, "SET", "Eb"); +#undef GO case 0xA2: INST_NAME("CPUID"); @@ -1091,20 +999,20 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SET_DFNONE(); nextop = F8; GETGD; - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); + if (MODREG) { + ed = xRAX + (nextop & 7) + (rex.b << 3); } else { SMREAD(); addr = geted(dyn, addr, ninst, nextop, &wback, x3, x1, &fixedaddress, rex, NULL, 1, 0); - SRAIxw(x1, gd, 5+rex.w); // r1 = (gd>>5) - ADDSL(x3, wback, x1, 2+rex.w, x1); + SRAIxw(x1, gd, 5 + rex.w); // r1 = (gd>>5) + ADDSL(x3, wback, x1, 2 + rex.w, x1); LDxw(x1, x3, fixedaddress); ed = x1; } - ANDI(x2, gd, rex.w?0x3f:0x1f); + ANDI(x2, gd, rex.w ? 0x3f : 0x1f); SRL(x4, ed, x2); ANDI(x4, x4, 1); - ANDI(xFlags, xFlags, ~1); //F_CF is 1 + ANDI(xFlags, xFlags, ~1); // F_CF is 1 OR(xFlags, xFlags, x4); break; case 0xA4: @@ -1124,13 +1032,13 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; GETGD; if (MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); + ed = xRAX + (nextop & 7) + (rex.b << 3); wback = 0; } else { SMREAD(); addr = geted(dyn, addr, ninst, nextop, &wback, x3, x1, &fixedaddress, rex, NULL, 1, 0); - SRAI(x1, gd, 5+rex.w); - ADDSL(x3, wback, x1, 2+rex.w, x1); + SRAI(x1, gd, 5 + rex.w); + ADDSL(x3, wback, x1, 2 + rex.w, x1); LDxw(x1, x3, fixedaddress); ed = x1; wback = x3; @@ -1147,7 +1055,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ADDI(x3, xZR, 1); SLL(x3, x3, x2); OR(ed, ed, x3); - if(wback) { + if (wback) { SDxw(ed, wback, fixedaddress); SMWRITE(); } @@ -1159,36 +1067,34 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETED(1); GETGD; u8 = F8; - u8&=(rex.w?0x3f:0x1f); + u8 &= (rex.w ? 0x3f : 0x1f); emit_shrd32c(dyn, ninst, rex, ed, gd, u8, x3, x4); WBACK; break; case 0xAE: nextop = F8; - if((nextop&0xF8)==0xE8) { + if ((nextop & 0xF8) == 0xE8) { INST_NAME("LFENCE"); SMDMB(); - } else - if((nextop&0xF8)==0xF0) { + } else if ((nextop & 0xF8) == 0xF0) { INST_NAME("MFENCE"); SMDMB(); - } else - if((nextop&0xF8)==0xF8) { + } else if ((nextop & 0xF8) == 0xF8) { INST_NAME("SFENCE"); SMDMB(); } else { - switch((nextop>>3)&7) { + switch ((nextop >> 3) & 7) { case 0: INST_NAME("FXSAVE Ed"); MESSAGE(LOG_DUMP, "Need Optimization\n"); SKIPTEST(x1); fpu_purgecache(dyn, ninst, 0, x1, x2, x3); - if(MODREG) { + if (MODREG) { DEFAULT; } else { addr = geted(dyn, addr, ninst, nextop, &ed, x1, x3, &fixedaddress, rex, NULL, 0, 0); - if(ed!=x1) {MV(x1, ed);} - CALL(rex.w?((void*)fpu_fxsave64):((void*)fpu_fxsave32), -1); + if (ed != x1) { MV(x1, ed); } + CALL(rex.w ? ((void*)fpu_fxsave64) : ((void*)fpu_fxsave32), -1); } break; case 1: @@ -1196,19 +1102,19 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MESSAGE(LOG_DUMP, "Need Optimization\n"); SKIPTEST(x1); fpu_purgecache(dyn, ninst, 0, x1, x2, x3); - if(MODREG) { + if (MODREG) { DEFAULT; } else { addr = geted(dyn, addr, ninst, nextop, &ed, x1, x3, &fixedaddress, rex, NULL, 0, 0); - if(ed!=x1) {MV(x1, ed);} - CALL(rex.w?((void*)fpu_fxrstor64):((void*)fpu_fxrstor32), -1); + if (ed != x1) { MV(x1, ed); } + CALL(rex.w ? ((void*)fpu_fxrstor64) : ((void*)fpu_fxrstor32), -1); } break; case 2: INST_NAME("LDMXCSR Md"); GETED(0); SW(ed, xEmu, offsetof(x64emu_t, mxcsr)); - if(box64_sse_flushto0) { + if (box64_sse_flushto0) { // TODO: applyFlushTo0 also needs to add RISC-V support. } break; @@ -1222,7 +1128,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("CLFLUSH Ed"); MESSAGE(LOG_DUMP, "Need Optimization?\n"); addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, NULL, 0, 0); - if(wback!=A1) { + if (wback != A1) { MV(A1, wback); } CALL_(native_clflush, -1, 0); @@ -1238,26 +1144,32 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; GETGD; GETED(0); - if(rex.w) { + if (rex.w) { // 64bits imul - UFLAG_IF { + UFLAG_IF + { MULH(x3, gd, ed); MUL(gd, gd, ed); UFLAG_OP1(x3); UFLAG_RES(gd); UFLAG_DF(x3, d_imul64); - } else { + } + else + { MULxw(gd, gd, ed); } } else { // 32bits imul - UFLAG_IF { + UFLAG_IF + { MUL(gd, gd, ed); UFLAG_RES(gd); SRLI(x3, gd, 32); UFLAG_OP1(x3); UFLAG_DF(x3, d_imul32); - } else { + } + else + { MULxw(gd, gd, ed); } SLLI(gd, gd, 32); @@ -1270,14 +1182,14 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SET_DFNONE(); nextop = F8; GETGD; - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); + if (MODREG) { + ed = xRAX + (nextop & 7) + (rex.b << 3); wback = 0; } else { SMREAD(); addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); - SRAI(x1, gd, 5+rex.w); - ADDSL(x3, wback, x1, 2+rex.w, x1); + SRAI(x1, gd, 5 + rex.w); + ADDSL(x3, wback, x1, 2 + rex.w, x1); LDxw(x1, x3, fixedaddress); ed = x1; wback = x3; @@ -1295,7 +1207,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SLL(x5, x5, x2); NOT(x5, x5); AND(ed, ed, x5); - if(wback) { + if (wback) { SDxw(ed, wback, fixedaddress); SMWRITE(); } @@ -1304,14 +1216,14 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("MOVZX Gd, Eb"); nextop = F8; GETGD; - if(MODREG) { - if(rex.rex) { - eb1 = xRAX+(nextop&7)+(rex.b<<3); - eb2 = 0; \ + if (MODREG) { + if (rex.rex) { + eb1 = xRAX + (nextop & 7) + (rex.b << 3); + eb2 = 0; } else { - ed = (nextop&7); - eb1 = xRAX+(ed&3); // Ax, Cx, Dx or Bx - eb2 = (ed&4)>>2; // L or H + ed = (nextop & 7); + eb1 = xRAX + (ed & 3); // Ax, Cx, Dx or Bx + eb2 = (ed & 4) >> 2; // L or H } if (eb2) { SRLI(gd, eb1, 8); @@ -1329,8 +1241,8 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("MOVZX Gd, Ew"); nextop = F8; GETGD; - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); + if (MODREG) { + ed = xRAX + (nextop & 7) + (rex.b << 3); ZEXTH(gd, ed); } else { SMREAD(); @@ -1340,14 +1252,14 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 0xBA: nextop = F8; - switch((nextop>>3)&7) { + switch ((nextop >> 3) & 7) { case 4: INST_NAME("BT Ed, Ib"); SETFLAGS(X_CF, SF_SUBSET); SET_DFNONE(); GETED(1); u8 = F8; - u8&=rex.w?0x3f:0x1f; + u8 &= rex.w ? 0x3f : 0x1f; SRLIxw(x3, ed, u8); ANDI(x3, x3, 1); // F_CF is 1 ANDI(xFlags, xFlags, ~1); @@ -1359,19 +1271,19 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SET_DFNONE(); GETED(1); u8 = F8; - u8&=(rex.w?0x3f:0x1f); - ORI(xFlags, xFlags, 1<<F_CF); + u8 &= (rex.w ? 0x3f : 0x1f); + ORI(xFlags, xFlags, 1 << F_CF); if (u8 <= 10) { - ANDI(x6, ed, 1<<u8); + ANDI(x6, ed, 1 << u8); BNE_MARK(x6, xZR); - ANDI(xFlags, xFlags, ~(1<<F_CF)); - XORI(ed, ed, 1<<u8); + ANDI(xFlags, xFlags, ~(1 << F_CF)); + XORI(ed, ed, 1 << u8); } else { ORI(x6, xZR, 1); SLLI(x6, x6, u8); AND(x4, ed, x6); BNE_MARK(x4, xZR); - ANDI(xFlags, xFlags, ~(1<<F_CF)); + ANDI(xFlags, xFlags, ~(1 << F_CF)); XOR(ed, ed, x6); } if (wback) { @@ -1386,19 +1298,19 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SET_DFNONE(); GETED(1); u8 = F8; - u8&=(rex.w?0x3f:0x1f); - ANDI(xFlags, xFlags, ~(1<<F_CF)); + u8 &= (rex.w ? 0x3f : 0x1f); + ANDI(xFlags, xFlags, ~(1 << F_CF)); if (u8 <= 10) { - ANDI(x6, ed, 1<<u8); + ANDI(x6, ed, 1 << u8); BEQ_MARK(x6, xZR); - ORI(xFlags, xFlags, 1<<F_CF); - XORI(ed, ed, 1<<u8); + ORI(xFlags, xFlags, 1 << F_CF); + XORI(ed, ed, 1 << u8); } else { ORI(x6, xZR, 1); SLLI(x6, x6, u8); AND(x6, ed, x6); BEQ_MARK(x6, xZR); - ORI(xFlags, xFlags, 1<<F_CF); + ORI(xFlags, xFlags, 1 << F_CF); XOR(ed, ed, x6); } if (wback) { @@ -1413,7 +1325,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SET_DFNONE(); GETED(1); u8 = F8; - u8&=rex.w?0x3f:0x1f; + u8 &= rex.w ? 0x3f : 0x1f; SRLIxw(x3, ed, u8); ANDI(x3, x3, 1); // F_CF is 1 ANDI(xFlags, xFlags, ~1); @@ -1424,7 +1336,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MOV64xw(x3, (1LL << u8)); XOR(ed, ed, x3); } - if(wback) { + if (wback) { SDxw(ed, wback, fixedaddress); SMWRITE(); } @@ -1440,13 +1352,13 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; GETGD; if (MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); + ed = xRAX + (nextop & 7) + (rex.b << 3); wback = 0; } else { SMREAD(); addr = geted(dyn, addr, ninst, nextop, &wback, x3, x1, &fixedaddress, rex, NULL, 1, 0); - SRAI(x1, gd, 5+rex.w); - ADDSL(x3, wback, x1, 2+rex.w, x1); + SRAI(x1, gd, 5 + rex.w); + ADDSL(x3, wback, x1, 2 + rex.w, x1); LDxw(x1, x3, fixedaddress); ed = x1; wback = x3; @@ -1463,7 +1375,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ADDI(x3, xZR, 1); SLL(x3, x3, x2); XOR(ed, ed, x3); - if(wback) { + if (wback) { SDxw(ed, wback, fixedaddress); SMWRITE(); } @@ -1475,27 +1387,27 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; GETED(0); GETGD; - if(!rex.w && MODREG) { + if (!rex.w && MODREG) { AND(x4, ed, xMASK); ed = x4; } BNE_MARK(ed, xZR); - ORI(xFlags, xFlags, 1<<F_ZF); + ORI(xFlags, xFlags, 1 << F_ZF); B_NEXT_nocond; MARK; - if(rv64_zbb) { + if (rv64_zbb) { CTZxw(gd, ed); } else { NEG(x2, ed); AND(x2, x2, ed); TABLE64(x3, 0x03f79d71b4ca8b09ULL); MUL(x2, x2, x3); - SRLI(x2, x2, 64-6); + SRLI(x2, x2, 64 - 6); TABLE64(x1, (uintptr_t)&deBruijn64tab); ADD(x1, x1, x2); LBU(gd, x1, 0); } - ANDI(xFlags, xFlags, ~(1<<F_ZF)); + ANDI(xFlags, xFlags, ~(1 << F_ZF)); break; case 0xBD: INST_NAME("BSR Gd, Ed"); @@ -1504,44 +1416,44 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; GETED(0); GETGD; - if(!rex.w && MODREG) { + if (!rex.w && MODREG) { AND(x4, ed, xMASK); ed = x4; } BNE_MARK(ed, xZR); - ORI(xFlags, xFlags, 1<<F_ZF); + ORI(xFlags, xFlags, 1 << F_ZF); B_NEXT_nocond; MARK; - ANDI(xFlags, xFlags, ~(1<<F_ZF)); - if(rv64_zbb) { - MOV32w(x1, rex.w?63:31); + ANDI(xFlags, xFlags, ~(1 << F_ZF)); + if (rv64_zbb) { + MOV32w(x1, rex.w ? 63 : 31); CLZxw(gd, ed); SUB(gd, x1, gd); } else { - if(ed!=gd) + if (ed != gd) u8 = gd; else u8 = x1; ADDI(u8, xZR, 0); - if(rex.w) { + if (rex.w) { MV(x2, ed); SRLI(x3, x2, 32); - BEQZ(x3, 4+2*4); + BEQZ(x3, 4 + 2 * 4); ADDI(u8, u8, 32); MV(x2, x3); } else { AND(x2, ed, xMASK); } SRLI(x3, x2, 16); - BEQZ(x3, 4+2*4); + BEQZ(x3, 4 + 2 * 4); ADDI(u8, u8, 16); MV(x2, x3); SRLI(x3, x2, 8); - BEQZ(x3, 4+2*4); + BEQZ(x3, 4 + 2 * 4); ADDI(u8, u8, 8); MV(x2, x3); SRLI(x3, x2, 4); - BEQZ(x3, 4+2*4); + BEQZ(x3, 4 + 2 * 4); ADDI(u8, u8, 4); MV(x2, x3); ANDI(x2, x2, 0b1111); @@ -1555,31 +1467,31 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("MOVSX Gd, Eb"); nextop = F8; GETGD; - if(MODREG) { - if(rex.rex) { - wback = xRAX+(nextop&7)+(rex.b<<3); + if (MODREG) { + if (rex.rex) { + wback = xRAX + (nextop & 7) + (rex.b << 3); wb2 = 0; } else { - wback = (nextop&7); - wb2 = (wback>>2)*8; - wback = xRAX+(wback&3); + wback = (nextop & 7); + wb2 = (wback >> 2) * 8; + wback = xRAX + (wback & 3); } - SLLI(gd, wback, 56-wb2); + SLLI(gd, wback, 56 - wb2); SRAI(gd, gd, 56); } else { SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x3, x1, &fixedaddress, rex, NULL, 1, 0); LB(gd, ed, fixedaddress); } - if(!rex.w) + if (!rex.w) ZEROUP(gd); break; case 0xBF: INST_NAME("MOVSX Gd, Ew"); nextop = F8; GETGD; - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); + if (MODREG) { + ed = xRAX + (nextop & 7) + (rex.b << 3); SLLI(gd, ed, 48); SRAI(gd, gd, 48); } else { @@ -1587,7 +1499,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni addr = geted(dyn, addr, ninst, nextop, &ed, x3, x1, &fixedaddress, rex, NULL, 1, 0); LH(gd, ed, fixedaddress); } - if(!rex.w) + if (!rex.w) ZEROUP(gd); break; case 0xC2: @@ -1598,12 +1510,12 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni u8 = F8; d0 = fpu_get_scratch(dyn); d1 = fpu_get_scratch(dyn); - for(int i=0; i<4; ++i) { - FLW(d0, gback, gdoffset+i*4); - FLW(d1, wback, fixedaddress+i*4); - if ((u8&7) == 0) { // Equal + for (int i = 0; i < 4; ++i) { + FLW(d0, gback, gdoffset + i * 4); + FLW(d1, wback, fixedaddress + i * 4); + if ((u8 & 7) == 0) { // Equal FEQS(x3, d0, d1); - } else if ((u8&7) == 4) { // Not Equal or unordered + } else if ((u8 & 7) == 4) { // Not Equal or unordered FEQS(x3, d0, d1); XORI(x3, x3, 1); } else { @@ -1612,41 +1524,47 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FEQS(x3, d1, d1); AND(x3, x3, x4); - switch(u8&7) { - case 1: BEQ_MARK(x3, xZR); FLTS(x3, d0, d1); break; // Less than - case 2: BEQ_MARK(x3, xZR); FLES(x3, d0, d1); break; // Less or equal - case 3: XORI(x3, x3, 1); break; // NaN - case 5: { // Greater or equal or unordered - BEQ(x3, xZR, 12); // MARK2 - FLES(x3, d1, d0); - J(8); // MARK; - break; - } - case 6: { // Greater or unordered, test inverted, N!=V so unordered or less than (inverted) - BEQ(x3, xZR, 12); // MARK2 - FLTS(x3, d1, d0); - J(8); // MARK; - break; - } - case 7: break; // Not NaN + switch (u8 & 7) { + case 1: + BEQ_MARK(x3, xZR); + FLTS(x3, d0, d1); + break; // Less than + case 2: + BEQ_MARK(x3, xZR); + FLES(x3, d0, d1); + break; // Less or equal + case 3: XORI(x3, x3, 1); break; // NaN + case 5: { // Greater or equal or unordered + BEQ(x3, xZR, 12); // MARK2 + FLES(x3, d1, d0); + J(8); // MARK; + break; + } + case 6: { // Greater or unordered, test inverted, N!=V so unordered or less than (inverted) + BEQ(x3, xZR, 12); // MARK2 + FLTS(x3, d1, d0); + J(8); // MARK; + break; + } + case 7: break; // Not NaN } // MARK2; - if ((u8&7) == 5 || (u8&7) == 6) { + if ((u8 & 7) == 5 || (u8 & 7) == 6) { MOV32w(x3, 1); } // MARK; } NEG(x3, x3); - SW(x3, gback, gdoffset+i*4); + SW(x3, gback, gdoffset + i * 4); } break; case 0xC3: INST_NAME("MOVNTI Ed, Gd"); nextop = F8; GETGD; - if(MODREG) { - MVxw(xRAX+(nextop&7)+(rex.b<<3), gd); + if (MODREG) { + MVxw(xRAX + (nextop & 7) + (rex.b << 3), gd); } else { addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); SDxw(gd, ed, fixedaddress); @@ -1660,19 +1578,19 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni u8 = F8; int32_t idx; - idx = (u8>>(0*2))&3; - LWU(x3, gback, gdoffset+idx*4); - idx = (u8>>(1*2))&3; - LWU(x4, gback, gdoffset+idx*4); - idx = (u8>>(2*2))&3; - LWU(x5, wback, fixedaddress+idx*4); - idx = (u8>>(3*2))&3; - LWU(x6, wback, fixedaddress+idx*4); + idx = (u8 >> (0 * 2)) & 3; + LWU(x3, gback, gdoffset + idx * 4); + idx = (u8 >> (1 * 2)) & 3; + LWU(x4, gback, gdoffset + idx * 4); + idx = (u8 >> (2 * 2)) & 3; + LWU(x5, wback, fixedaddress + idx * 4); + idx = (u8 >> (3 * 2)) & 3; + LWU(x6, wback, fixedaddress + idx * 4); - SW(x3, gback, gdoffset+0*4); - SW(x4, gback, gdoffset+1*4); - SW(x5, gback, gdoffset+2*4); - SW(x6, gback, gdoffset+3*4); + SW(x3, gback, gdoffset + 0 * 4); + SW(x4, gback, gdoffset + 1 * 4); + SW(x5, gback, gdoffset + 2 * 4); + SW(x6, gback, gdoffset + 3 * 4); break; case 0xC8: @@ -1682,62 +1600,22 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0xCC: case 0xCD: case 0xCE: - case 0xCF: /* BSWAP reg */ + case 0xCF: /* BSWAP reg */ INST_NAME("BSWAP Reg"); - gd = xRAX+(opcode&7)+(rex.b<<3); - if(rv64_zbb) { - REV8(gd, gd); - if(!rex.w) - SRLI(gd, gd, 32); - } else { - gback = gd; - if (!rex.w) { - AND(x4, gd, xMASK); - gd = x4; - } - ANDI(x1, gd, 0xff); - SLLI(x1, x1, (rex.w?64:32)-8); - SRLI(x2, gd, 8); - ANDI(x3, x2, 0xff); - SLLI(x3, x3, (rex.w?64:32)-16); - OR(x1, x1, x3); - SRLI(x2, gd, 16); - ANDI(x3, x2, 0xff); - SLLI(x3, x3, (rex.w?64:32)-24); - OR(x1, x1, x3); - SRLI(x2, gd, 24); - if(rex.w) { - ANDI(x3, x2, 0xff); - SLLI(x3, x3, 64-32); - OR(x1, x1, x3); - SRLI(x2, gd, 32); - ANDI(x3, x2, 0xff); - SLLI(x3, x3, 64-40); - OR(x1, x1, x3); - SRLI(x2, gd, 40); - ANDI(x3, x2, 0xff); - SLLI(x3, x3, 64-48); - OR(x1, x1, x3); - SRLI(x2, gd, 48); - ANDI(x3, x2, 0xff); - SLLI(x3, x3, 64-56); - OR(x1, x1, x3); - SRLI(x2, gd, 56); - } - OR(gback, x1, x2); - } + gd = xRAX + (opcode & 7) + (rex.b << 3); + REV8xw(gd, gd, x1, x2, x3, x4); break; case 0xE5: INST_NAME("PMULHW Gm,Em"); nextop = F8; GETGM(); GETEM(x2, 0); - for(int i=0; i<4; ++i) { - LH(x3, gback, gdoffset+2*i); - LH(x4, wback, fixedaddress+2*i); + for (int i = 0; i < 4; ++i) { + LH(x3, gback, gdoffset + 2 * i); + LH(x4, wback, fixedaddress + 2 * i); MULW(x3, x3, x4); SRAIW(x3, x3, 16); - SH(x3, gback, gdoffset+2*i); + SH(x3, gback, gdoffset + 2 * i); } break; case 0xED: @@ -1745,35 +1623,35 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; GETGM(); GETEM(x2, 0); - for(int i=0; i<4; ++i) { + for (int i = 0; i < 4; ++i) { // tmp32s = (int32_t)GX->sw[i] + EX->sw[i]; // GX->sw[i] = (tmp32s>32767)?32767:((tmp32s<-32768)?-32768:tmp32s); - LH(x3, gback, gdoffset+2*i); - LH(x4, wback, fixedaddress+2*i); + LH(x3, gback, gdoffset + 2 * i); + LH(x4, wback, fixedaddress + 2 * i); ADDW(x3, x3, x4); LUI(x4, 0xFFFF8); // -32768 BGE(x3, x4, 12); - SH(x4, gback, gdoffset+2*i); - J(20); // continue + SH(x4, gback, gdoffset + 2 * i); + J(20); // continue LUI(x4, 8); // 32768 BLT(x3, x4, 8); ADDIW(x3, x4, -1); - SH(x3, gback, gdoffset+2*i); + SH(x3, gback, gdoffset + 2 * i); } break; case 0xEF: INST_NAME("PXOR Gm,Em"); nextop = F8; GETGM(); - if(MODREG && gd==(nextop&7)) { + if (MODREG && gd == (nextop & 7)) { // just zero dest - SD(xZR, gback, gdoffset+0); + SD(xZR, gback, gdoffset + 0); } else { GETEM(x2, 0); - LD(x3, gback, gdoffset+0); + LD(x3, gback, gdoffset + 0); LD(x4, wback, fixedaddress); XOR(x3, x3, x4); - SD(x3, gback, gdoffset+0); + SD(x3, gback, gdoffset + 0); } break; case 0xF9: diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index 7baddf4f..bfdbbaa5 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -23,7 +23,8 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog) { - (void)ip; (void)need_epilog; + (void)ip; + (void)need_epilog; uint8_t opcode = F8; uint8_t nextop, u8, s8; @@ -49,7 +50,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int static const int8_t round_round[] = { RD_RNE, RD_RDN, RD_RUP, RD_RTZ }; - switch(opcode) { + switch (opcode) { case 0x10: INST_NAME("MOVUPD Gx,Ex"); nextop = F8; @@ -63,13 +64,13 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x1, 0); GETGX(); SSE_LOOP_MV_Q2(x3); - if(!MODREG) SMWRITE2(); + if (!MODREG) SMWRITE2(); break; case 0x12: INST_NAME("MOVLPD Gx, Eq"); nextop = F8; GETGX(); - if(MODREG) { + if (MODREG) { // access register instead of memory is bad opcode! DEFAULT; return addr; @@ -77,19 +78,19 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SMREAD(); addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0); LD(x3, wback, fixedaddress); - SD(x3, gback, gdoffset+0); + SD(x3, gback, gdoffset + 0); break; case 0x13: INST_NAME("MOVLPD Eq, Gx"); nextop = F8; GETGX(); - if(MODREG) { + if (MODREG) { // access register instead of memory is bad opcode! DEFAULT; return addr; } addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0); - LD(x3, gback, gdoffset+0); + LD(x3, gback, gdoffset + 0); SD(x3, wback, fixedaddress); SMWRITE2(); break; @@ -99,8 +100,8 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(); GETEX(x2, 0); // GX->q[1] = EX->q[0]; - LD(x3, wback, fixedaddress+0); - SD(x3, gback, gdoffset+8); + LD(x3, wback, fixedaddress + 0); + SD(x3, gback, gdoffset + 8); break; case 0x15: INST_NAME("UNPCKHPD Gx, Ex"); @@ -108,17 +109,17 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x1, 0); GETGX(); // GX->q[0] = GX->q[1]; - LD(x3, gback, gdoffset+8); - SD(x3, gback, gdoffset+0); + LD(x3, gback, gdoffset + 8); + SD(x3, gback, gdoffset + 0); // GX->q[1] = EX->q[1]; - LD(x3, wback, fixedaddress+8); - SD(x3, gback, gdoffset+8); + LD(x3, wback, fixedaddress + 8); + SD(x3, gback, gdoffset + 8); break; case 0x16: INST_NAME("MOVHPD Gx, Eq"); nextop = F8; GETGX(); - if(MODREG) { + if (MODREG) { // access register instead of memory is bad opcode! DEFAULT; return addr; @@ -126,7 +127,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SMREAD(); addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0); LD(x3, wback, fixedaddress); - SD(x3, gback, gdoffset+8); + SD(x3, gback, gdoffset + 8); break; case 0x1F: INST_NAME("NOP (multibyte)"); @@ -146,7 +147,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x1, 0); GETGX(); SSE_LOOP_MV_Q2(x3); - if(!MODREG) SMWRITE2(); + if (!MODREG) SMWRITE2(); break; case 0x2A: INST_NAME("CVTPI2PD Gx,Em"); @@ -154,10 +155,10 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(); GETEM(x2, 0); d0 = fpu_get_scratch(dyn); - for (int i=0; i<2; ++i) { - LW(x1, wback, fixedaddress+i*4); + for (int i = 0; i < 2; ++i) { + LW(x1, wback, fixedaddress + i * 4); FCVTDW(d0, x1, RD_RTZ); - FSD(d0, gback, gdoffset+i*8); + FSD(d0, gback, gdoffset + i * 8); } break; case 0x2B: @@ -173,20 +174,20 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGM(); GETEX(x2, 0); d0 = fpu_get_scratch(dyn); - for (int i=0; i<2; ++i) { - if(!box64_dynarec_fastround) { - FSFLAGSI(0); // // reset all bits + for (int i = 0; i < 2; ++i) { + if (!box64_dynarec_fastround) { + FSFLAGSI(0); // // reset all bits } - FLD(d0, wback, fixedaddress+i*8); + FLD(d0, wback, fixedaddress + i * 8); FCVTWD(x1, d0, RD_RTZ); - if(!box64_dynarec_fastround) { - FRFLAGS(x5); // get back FPSR to check the IOC bit - ANDI(x5, x5, (1<<FR_NV)|(1<<FR_OF)); + if (!box64_dynarec_fastround) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, (1 << FR_NV) | (1 << FR_OF)); BEQ_MARKi(x5, xZR, i); MOV32w(x1, 0x80000000); MARKi(i); } - SW(x1, gback, gdoffset+i*4); + SW(x1, gback, gdoffset + i * 4); } break; case 0x2D: @@ -196,27 +197,31 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x2, 0); d0 = fpu_get_scratch(dyn); u8 = sse_setround(dyn, ninst, x4, x5); - for (int i=0; i<2; ++i) { - if(!box64_dynarec_fastround) { - FSFLAGSI(0); // // reset all bits + for (int i = 0; i < 2; ++i) { + if (!box64_dynarec_fastround) { + FSFLAGSI(0); // // reset all bits } - FLD(d0, wback, fixedaddress+i*8); + FLD(d0, wback, fixedaddress + i * 8); FCVTWD(x1, d0, RD_DYN); - if(!box64_dynarec_fastround) { - FRFLAGS(x5); // get back FPSR to check the IOC bit - ANDI(x5, x5, (1<<FR_NV)|(1<<FR_OF)); + if (!box64_dynarec_fastround) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, (1 << FR_NV) | (1 << FR_OF)); BEQ_MARKi(x5, xZR, i); MOV32w(x1, 0x80000000); MARKi(i); } - SW(x1, gback, gdoffset+i*4); + SW(x1, gback, gdoffset + i * 4); } x87_restoreround(dyn, ninst, u8); break; case 0x2E: // no special check... case 0x2F: - if(opcode==0x2F) {INST_NAME("COMISD Gx, Ex");} else {INST_NAME("UCOMISD Gx, Ex");} + if (opcode == 0x2F) { + INST_NAME("COMISD Gx, Ex"); + } else { + INST_NAME("UCOMISD Gx, Ex"); + } SETFLAGS(X_ALL, SF_SET); SET_DFNONE(); nextop = F8; @@ -224,33 +229,36 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEXSD(v0, 0); CLEAR_FLAGS(); // if isnan(d0) || isnan(v0) - IFX(X_ZF | X_PF | X_CF) { + IFX(X_ZF | X_PF | X_CF) + { FEQD(x3, d0, d0); FEQD(x2, v0, v0); AND(x2, x2, x3); BNE_MARK(x2, xZR); - ORI(xFlags, xFlags, (1<<F_ZF) | (1<<F_PF) | (1<<F_CF)); + ORI(xFlags, xFlags, (1 << F_ZF) | (1 << F_PF) | (1 << F_CF)); B_NEXT_nocond; } MARK; // else if isless(d0, v0) - IFX(X_CF) { + IFX(X_CF) + { FLTD(x2, d0, v0); BEQ_MARK2(x2, xZR); - ORI(xFlags, xFlags, 1<<F_CF); + ORI(xFlags, xFlags, 1 << F_CF); B_NEXT_nocond; } MARK2; // else if d0 == v0 - IFX(X_ZF) { + IFX(X_ZF) + { FEQD(x2, d0, v0); CBZ_NEXT(x2); - ORI(xFlags, xFlags, 1<<F_ZF); + ORI(xFlags, xFlags, 1 << F_ZF); } break; - case 0x38: // SSSE3 opcodes + case 0x38: // SSSE3 opcodes nextop = F8; - switch(nextop) { + switch (nextop) { case 0x00: INST_NAME("PSHUFB Gx, Ex"); nextop = F8; @@ -261,46 +269,46 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ADDI(x5, xEmu, offsetof(x64emu_t, scratch)); // perserve gd - LD(x3, gback, gdoffset+0); - LD(x4, gback, gdoffset+8); + LD(x3, gback, gdoffset + 0); + LD(x4, gback, gdoffset + 8); SD(x3, x5, 0); SD(x4, x5, 8); - for (int i=0; i<16; ++i) { - LBU(x3, wback, fixedaddress+i); + for (int i = 0; i < 16; ++i) { + LBU(x3, wback, fixedaddress + i); ANDI(x4, x3, 128); BEQZ(x4, 12); - SB(xZR, gback, gdoffset+i); + SB(xZR, gback, gdoffset + i); BEQZ(xZR, 20); // continue ANDI(x4, x3, 15); ADD(x4, x4, x5); LBU(x4, x4, 0); - SB(x4, gback, gdoffset+i); + SB(x4, gback, gdoffset + i); } break; case 0x01: INST_NAME("PHADDW Gx, Ex"); nextop = F8; GETGX(); - for (int i=0; i<4; ++i) { + for (int i = 0; i < 4; ++i) { // GX->sw[i] = GX->sw[i*2+0]+GX->sw[i*2+1]; - LH(x3, gback, gdoffset+2*(i*2+0)); - LH(x4, gback, gdoffset+2*(i*2+1)); + LH(x3, gback, gdoffset + 2 * (i * 2 + 0)); + LH(x4, gback, gdoffset + 2 * (i * 2 + 1)); ADDW(x3, x3, x4); - SH(x3, gback, gdoffset+2*i); + SH(x3, gback, gdoffset + 2 * i); } - if (MODREG && gd==(nextop&7)+(rex.b<<3)) { + if (MODREG && gd == (nextop & 7) + (rex.b << 3)) { // GX->q[1] = GX->q[0]; - LD(x3, gback, gdoffset+0); - SD(x3, gback, gdoffset+8); + LD(x3, gback, gdoffset + 0); + SD(x3, gback, gdoffset + 8); } else { GETEX(x2, 0); - for (int i=0; i<4; ++i) { + for (int i = 0; i < 4; ++i) { // GX->sw[4+i] = EX->sw[i*2+0] + EX->sw[i*2+1]; - LH(x3, wback, fixedaddress+2*(i*2+0)); - LH(x4, wback, fixedaddress+2*(i*2+1)); + LH(x3, wback, fixedaddress + 2 * (i * 2 + 0)); + LH(x4, wback, fixedaddress + 2 * (i * 2 + 1)); ADDW(x3, x3, x4); - SH(x3, gback, gdoffset+2*(4+i)); + SH(x3, gback, gdoffset + 2 * (4 + i)); } } break; @@ -309,31 +317,31 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); // GX->sd[0] += GX->sd[1]; - LW(x3, gback, gdoffset+0*4); - LW(x4, gback, gdoffset+1*4); + LW(x3, gback, gdoffset + 0 * 4); + LW(x4, gback, gdoffset + 1 * 4); ADDW(x3, x3, x4); - SW(x3, gback, gdoffset+0*4); + SW(x3, gback, gdoffset + 0 * 4); // GX->sd[1] = GX->sd[2] + GX->sd[3]; - LW(x3, gback, gdoffset+2*4); - LW(x4, gback, gdoffset+3*4); + LW(x3, gback, gdoffset + 2 * 4); + LW(x4, gback, gdoffset + 3 * 4); ADDW(x3, x3, x4); - SW(x3, gback, gdoffset+1*4); - if (MODREG && gd==(nextop&7)+(rex.b<<3)) { + SW(x3, gback, gdoffset + 1 * 4); + if (MODREG && gd == (nextop & 7) + (rex.b << 3)) { // GX->q[1] = GX->q[0]; - LD(x3, gback, gdoffset+0); - SD(x3, gback, gdoffset+8); + LD(x3, gback, gdoffset + 0); + SD(x3, gback, gdoffset + 8); } else { GETEX(x2, 0); // GX->sd[2] = EX->sd[0] + EX->sd[1]; - LW(x3, wback, fixedaddress+0*4); - LW(x4, wback, fixedaddress+1*4); + LW(x3, wback, fixedaddress + 0 * 4); + LW(x4, wback, fixedaddress + 1 * 4); ADDW(x3, x3, x4); - SW(x3, gback, gdoffset+2*4); + SW(x3, gback, gdoffset + 2 * 4); // GX->sd[3] = EX->sd[2] + EX->sd[3]; - LW(x3, wback, fixedaddress+2*4); - LW(x4, wback, fixedaddress+3*4); + LW(x3, wback, fixedaddress + 2 * 4); + LW(x4, wback, fixedaddress + 3 * 4); ADDW(x3, x3, x4); - SW(x3, gback, gdoffset+3*4); + SW(x3, gback, gdoffset + 3 * 4); } break; @@ -344,24 +352,24 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x2, 0); MOV64x(x5, 32767); MOV64x(x6, -32768); - for(int i=0; i<8; ++i) { - LBU(x3, gback, gdoffset+i*2); - LB(x4, wback, fixedaddress+i*2); + for (int i = 0; i < 8; ++i) { + LBU(x3, gback, gdoffset + i * 2); + LB(x4, wback, fixedaddress + i * 2); MUL(x9, x3, x4); - LBU(x3, gback, gdoffset+i*2+1); - LB(x4, wback, fixedaddress+i*2+1); + LBU(x3, gback, gdoffset + i * 2 + 1); + LB(x4, wback, fixedaddress + i * 2 + 1); MUL(x3, x3, x4); ADD(x3, x3, x9); - if(rv64_zbb) { + if (rv64_zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { - BLT(x3, x5, 4+4); + BLT(x3, x5, 4 + 4); MV(x3, x5); - BLT(x6, x3, 4+4); + BLT(x6, x3, 4 + 4); MV(x3, x6); } - SH(x3, gback, gdoffset+i*2); + SH(x3, gback, gdoffset + i * 2); } break; @@ -370,14 +378,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<16; ++i) { - LB(x3, gback, gdoffset+i); - LB(x4, wback, fixedaddress+i); - BGE(x4, xZR, 4+4); + for (int i = 0; i < 16; ++i) { + LB(x3, gback, gdoffset + i); + LB(x4, wback, fixedaddress + i); + BGE(x4, xZR, 4 + 4); NEG(x3, x3); - BNE(x4, xZR, 4+4); + BNE(x4, xZR, 4 + 4); MOV_U12(x3, 0); - SB(x3, gback, gdoffset+i); + SB(x3, gback, gdoffset + i); } break; case 0x09: @@ -385,14 +393,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<8; ++i) { - LH(x3, gback, gdoffset+i*2); - LH(x4, wback, fixedaddress+i*2); - BGE(x4, xZR, 4+4); + for (int i = 0; i < 8; ++i) { + LH(x3, gback, gdoffset + i * 2); + LH(x4, wback, fixedaddress + i * 2); + BGE(x4, xZR, 4 + 4); NEG(x3, x3); - BNE(x4, xZR, 4+4); + BNE(x4, xZR, 4 + 4); MOV_U12(x3, 0); - SH(x3, gback, gdoffset+i*2); + SH(x3, gback, gdoffset + i * 2); } break; case 0x0A: @@ -400,14 +408,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<4; ++i) { - LW(x3, gback, gdoffset+i*4); - LW(x4, wback, fixedaddress+i*4); - BGE(x4, xZR, 4+4); + for (int i = 0; i < 4; ++i) { + LW(x3, gback, gdoffset + i * 4); + LW(x4, wback, fixedaddress + i * 4); + BGE(x4, xZR, 4 + 4); NEG(x3, x3); - BNE(x4, xZR, 4+4); + BNE(x4, xZR, 4 + 4); ADDI(x3, xZR, 0); - SW(x3, gback, gdoffset+i*4); + SW(x3, gback, gdoffset + i * 4); } break; case 0x0B: @@ -415,14 +423,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<8; ++i) { - LH(x3, gback, gdoffset+i*2); - LH(x4, wback, fixedaddress+i*2); + for (int i = 0; i < 8; ++i) { + LH(x3, gback, gdoffset + i * 2); + LH(x4, wback, fixedaddress + i * 2); MUL(x3, x3, x4); SRAI(x3, x3, 14); ADDI(x3, x3, 1); SRAI(x3, x3, 1); - SH(x3, gback, gdoffset+i*2); + SH(x3, gback, gdoffset + i * 2); } break; case 0x10: @@ -431,11 +439,11 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(); GETEX(x2, 0); sse_forget_reg(dyn, ninst, 0); // forget xmm[0] - for (int i=0; i<16; ++i) { - LB(x3, xEmu, offsetof(x64emu_t, xmm[0])+i); + for (int i = 0; i < 16; ++i) { + LB(x3, xEmu, offsetof(x64emu_t, xmm[0]) + i); BGE(x3, xZR, 12); // continue - LBU(x3, wback, fixedaddress+i); - SB(x3, gback, gdoffset+i); + LBU(x3, wback, fixedaddress + i); + SB(x3, gback, gdoffset + i); // continue } break; @@ -447,29 +455,32 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x2, 0); CLEAR_FLAGS(); SET_DFNONE(); - IFX(X_ZF|X_CF) { - LD(x5, wback, fixedaddress+0); - LD(x6, wback, fixedaddress+8); + IFX(X_ZF | X_CF) + { + LD(x5, wback, fixedaddress + 0); + LD(x6, wback, fixedaddress + 8); - IFX(X_ZF) { - LD(x3, gback, gdoffset+0); - LD(x4, gback, gdoffset+8); + IFX(X_ZF) + { + LD(x3, gback, gdoffset + 0); + LD(x4, gback, gdoffset + 8); AND(x3, x3, x5); AND(x4, x4, x6); OR(x3, x3, x4); BNEZ(x3, 8); - ORI(xFlags, xFlags, 1<<F_ZF); + ORI(xFlags, xFlags, 1 << F_ZF); } - IFX(X_CF) { - LD(x3, gback, gdoffset+0); + IFX(X_CF) + { + LD(x3, gback, gdoffset + 0); NOT(x3, x3); - LD(x4, gback, gdoffset+8); + LD(x4, gback, gdoffset + 8); NOT(x4, x4); AND(x3, x3, x5); AND(x4, x4, x6); OR(x3, x3, x4); BNEZ(x3, 8); - ORI(xFlags, xFlags, 1<<F_CF); + ORI(xFlags, xFlags, 1 << F_CF); } } break; @@ -479,11 +490,11 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<16; ++i) { - LB(x4, wback, fixedaddress+i); - BGE(x4, xZR, 4+4); + for (int i = 0; i < 16; ++i) { + LB(x4, wback, fixedaddress + i); + BGE(x4, xZR, 4 + 4); NEG(x4, x4); - SB(x4, gback, gdoffset+i); + SB(x4, gback, gdoffset + i); } break; case 0x1D: @@ -491,11 +502,11 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<8; ++i) { - LH(x4, wback, fixedaddress+i*2); - BGE(x4, xZR, 4+4); + for (int i = 0; i < 8; ++i) { + LH(x4, wback, fixedaddress + i * 2); + BGE(x4, xZR, 4 + 4); NEG(x4, x4); - SH(x4, gback, gdoffset+i*2); + SH(x4, gback, gdoffset + i * 2); } break; case 0x1E: @@ -503,12 +514,12 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - MOV64x(x5, ~(1<<31)); - for(int i=0; i<4; ++i) { - LW(x4, wback, fixedaddress+i*4); - BGE(x4, xZR, 4+4); + MOV64x(x5, ~(1 << 31)); + for (int i = 0; i < 4; ++i) { + LW(x4, wback, fixedaddress + i * 4); + BGE(x4, xZR, 4 + 4); NEG(x4, x4); - SW(x4, gback, gdoffset+i*4); + SW(x4, gback, gdoffset + i * 4); } break; @@ -518,35 +529,36 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(); GETEX(x2, 0); MOV64x(x5, 65535); - for(int i=0; i<4; ++i) { - LW(x3, gback, gdoffset+i*4); - if(rv64_zbb) { + for (int i = 0; i < 4; ++i) { + LW(x3, gback, gdoffset + i * 4); + if (rv64_zbb) { MIN(x3, x3, x5); MAX(x3, x3, xZR); } else { - BGE(x3, xZR, 4+4); + BGE(x3, xZR, 4 + 4); MV(x3, xZR); - BLT(x3, x5, 4+4); + BLT(x3, x5, 4 + 4); MV(x3, x5); } - SH(x3, gback, gdoffset+i*2); + SH(x3, gback, gdoffset + i * 2); } - if(MODREG && gd==ed) { - LD(x3, gback, gdoffset+0); - SD(x3, gback, gdoffset+8); - } else for(int i=0; i<4; ++i) { - LW(x3, wback, fixedaddress+i*4); - if(rv64_zbb) { - MIN(x3, x3, x5); - MAX(x3, x3, xZR); - } else { - BGE(x3, xZR, 4+4); - MV(x3, xZR); - BLT(x3, x5, 4+4); - MV(x3, x5); + if (MODREG && gd == ed) { + LD(x3, gback, gdoffset + 0); + SD(x3, gback, gdoffset + 8); + } else + for (int i = 0; i < 4; ++i) { + LW(x3, wback, fixedaddress + i * 4); + if (rv64_zbb) { + MIN(x3, x3, x5); + MAX(x3, x3, xZR); + } else { + BGE(x3, xZR, 4 + 4); + MV(x3, xZR); + BLT(x3, x5, 4 + 4); + MV(x3, x5); + } + SH(x3, gback, gdoffset + 8 + i * 2); } - SH(x3, gback, gdoffset+8+i*2); - } break; case 0x30: @@ -554,9 +566,9 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=7; i>=0; --i) { - LBU(x3, wback, fixedaddress+i); - SH(x3, gback, gdoffset+i*2); + for (int i = 7; i >= 0; --i) { + LBU(x3, wback, fixedaddress + i); + SH(x3, gback, gdoffset + i * 2); } break; case 0x31: @@ -564,9 +576,9 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=3; i>=0; --i) { - LBU(x3, wback, fixedaddress+i); - SW(x3, gback, gdoffset+i*4); + for (int i = 3; i >= 0; --i) { + LBU(x3, wback, fixedaddress + i); + SW(x3, gback, gdoffset + i * 4); } break; case 0x32: @@ -574,9 +586,9 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=1; i>=0; --i) { - LBU(x3, wback, fixedaddress+i); - SD(x3, gback, gdoffset+i*8); + for (int i = 1; i >= 0; --i) { + LBU(x3, wback, fixedaddress + i); + SD(x3, gback, gdoffset + i * 8); } break; case 0x33: @@ -584,9 +596,9 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=3; i>=0; --i) { - LHU(x3, wback, fixedaddress+i*2); - SW(x3, gback, gdoffset+i*4); + for (int i = 3; i >= 0; --i) { + LHU(x3, wback, fixedaddress + i * 2); + SW(x3, gback, gdoffset + i * 4); } break; case 0x34: @@ -594,9 +606,9 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=1; i>=0; --i) { - LHU(x3, wback, fixedaddress+i*2); - SD(x3, gback, gdoffset+i*8); + for (int i = 1; i >= 0; --i) { + LHU(x3, wback, fixedaddress + i * 2); + SD(x3, gback, gdoffset + i * 8); } break; case 0x35: @@ -604,106 +616,130 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=1; i>=0; --i) { - LWU(x3, wback, fixedaddress+i*4); - SD(x3, gback, gdoffset+i*8); + for (int i = 1; i >= 0; --i) { + LWU(x3, wback, fixedaddress + i * 4); + SD(x3, gback, gdoffset + i * 8); } break; case 0x38: - INST_NAME("PMINSB Gx, Ex"); // SSE4 opcode! + INST_NAME("PMINSB Gx, Ex"); // SSE4 opcode! nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<16; ++i) { - LB(x3, gback, gdoffset+i); - LB(x4, wback, fixedaddress+i); - if(rv64_zbb) MIN(x4, x3, x4); else BLT(x3, x4, 4+4); - SB(x4, gback, gdoffset+i); + for (int i = 0; i < 16; ++i) { + LB(x3, gback, gdoffset + i); + LB(x4, wback, fixedaddress + i); + if (rv64_zbb) + MIN(x4, x3, x4); + else + BLT(x3, x4, 4 + 4); + SB(x4, gback, gdoffset + i); } break; case 0x39: - INST_NAME("PMINSD Gx, Ex"); // SSE4 opcode! + INST_NAME("PMINSD Gx, Ex"); // SSE4 opcode! nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<4; ++i) { - LW(x3, gback, gdoffset+i*4); - LW(x4, wback, fixedaddress+i*4); - if(rv64_zbb) MIN(x4, x3, x4); else BLT(x3, x4, 4+4); - SW(x4, gback, gdoffset+i*4); + for (int i = 0; i < 4; ++i) { + LW(x3, gback, gdoffset + i * 4); + LW(x4, wback, fixedaddress + i * 4); + if (rv64_zbb) + MIN(x4, x3, x4); + else + BLT(x3, x4, 4 + 4); + SW(x4, gback, gdoffset + i * 4); } break; case 0x3A: - INST_NAME("PMINUW Gx, Ex"); // SSE4 opcode! + INST_NAME("PMINUW Gx, Ex"); // SSE4 opcode! nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<8; ++i) { - LHU(x3, gback, gdoffset+i*2); - LHU(x4, wback, fixedaddress+i*2); - if(rv64_zbb) MINU(x4, x3, x4); else BLTU(x3, x4, 4+4); - SH(x4, gback, gdoffset+i*2); + for (int i = 0; i < 8; ++i) { + LHU(x3, gback, gdoffset + i * 2); + LHU(x4, wback, fixedaddress + i * 2); + if (rv64_zbb) + MINU(x4, x3, x4); + else + BLTU(x3, x4, 4 + 4); + SH(x4, gback, gdoffset + i * 2); } break; case 0x3B: - INST_NAME("PMINUD Gx, Ex"); // SSE4 opcode! + INST_NAME("PMINUD Gx, Ex"); // SSE4 opcode! nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<4; ++i) { - LWU(x3, gback, gdoffset+i*4); - LWU(x4, wback, fixedaddress+i*4); - if(rv64_zbb) MINU(x4, x3, x4); else BLTU(x3, x4, 4+4); - SW(x4, gback, gdoffset+i*4); + for (int i = 0; i < 4; ++i) { + LWU(x3, gback, gdoffset + i * 4); + LWU(x4, wback, fixedaddress + i * 4); + if (rv64_zbb) + MINU(x4, x3, x4); + else + BLTU(x3, x4, 4 + 4); + SW(x4, gback, gdoffset + i * 4); } break; case 0x3C: - INST_NAME("PMAXSB Gx, Ex"); // SSE4 opcode! + INST_NAME("PMAXSB Gx, Ex"); // SSE4 opcode! nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<16; ++i) { - LB(x3, gback, gdoffset+i); - LB(x4, wback, fixedaddress+i); - if(rv64_zbb) MAX(x4, x3, x4); else BLT(x4, x3, 4+4); - SB(x4, gback, gdoffset+i); + for (int i = 0; i < 16; ++i) { + LB(x3, gback, gdoffset + i); + LB(x4, wback, fixedaddress + i); + if (rv64_zbb) + MAX(x4, x3, x4); + else + BLT(x4, x3, 4 + 4); + SB(x4, gback, gdoffset + i); } break; case 0x3D: - INST_NAME("PMAXSD Gx, Ex"); // SSE4 opcode! + INST_NAME("PMAXSD Gx, Ex"); // SSE4 opcode! nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<4; ++i) { - LW(x3, gback, gdoffset+i*4); - LW(x4, wback, fixedaddress+i*4); - if(rv64_zbb) MAX(x4, x3, x4); else BLT(x4, x3, 4+4); - SW(x4, gback, gdoffset+i*4); + for (int i = 0; i < 4; ++i) { + LW(x3, gback, gdoffset + i * 4); + LW(x4, wback, fixedaddress + i * 4); + if (rv64_zbb) + MAX(x4, x3, x4); + else + BLT(x4, x3, 4 + 4); + SW(x4, gback, gdoffset + i * 4); } break; case 0x3E: - INST_NAME("PMAXUW Gx, Ex"); // SSE4 opcode! + INST_NAME("PMAXUW Gx, Ex"); // SSE4 opcode! nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<8; ++i) { - LHU(x3, gback, gdoffset+i*2); - LHU(x4, wback, fixedaddress+i*2); - if(rv64_zbb) MAXU(x4, x3, x4); else BLTU(x4, x3, 4+4); - SH(x4, gback, gdoffset+i*2); + for (int i = 0; i < 8; ++i) { + LHU(x3, gback, gdoffset + i * 2); + LHU(x4, wback, fixedaddress + i * 2); + if (rv64_zbb) + MAXU(x4, x3, x4); + else + BLTU(x4, x3, 4 + 4); + SH(x4, gback, gdoffset + i * 2); } break; case 0x3F: - INST_NAME("PMAXUD Gx, Ex"); // SSE4 opcode! + INST_NAME("PMAXUD Gx, Ex"); // SSE4 opcode! nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<4; ++i) { - LWU(x3, gback, gdoffset+i*4); - LWU(x4, wback, fixedaddress+i*4); - if(rv64_zbb) MAXU(x4, x3, x4); else BLTU(x4, x3, 4+4); - SW(x4, gback, gdoffset+i*4); + for (int i = 0; i < 4; ++i) { + LWU(x3, gback, gdoffset + i * 4); + LWU(x4, wback, fixedaddress + i * 4); + if (rv64_zbb) + MAXU(x4, x3, x4); + else + BLTU(x4, x3, 4 + 4); + SW(x4, gback, gdoffset + i * 4); } break; case 0x40: @@ -711,15 +747,15 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<4; ++i) { - LW(x3, gback, gdoffset+i*4); - LW(x4, wback, fixedaddress+i*4); + for (int i = 0; i < 4; ++i) { + LW(x3, gback, gdoffset + i * 4); + LW(x4, wback, fixedaddress + i * 4); MUL(x3, x3, x4); - SW(x3, gback, gdoffset+i*4); + SW(x3, gback, gdoffset + i * 4); } break; case 0xDB: - INST_NAME("AESIMC Gx, Ex"); // AES-NI + INST_NAME("AESIMC Gx, Ex"); // AES-NI nextop = F8; GETGX(); GETEX(x2, 0); @@ -729,7 +765,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int CALL(native_aesimc, -1); break; case 0xDC: - INST_NAME("AESENC Gx, Ex"); // AES-NI + INST_NAME("AESENC Gx, Ex"); // AES-NI nextop = F8; GETG; sse_forget_reg(dyn, ninst, gd); @@ -740,7 +776,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); break; case 0xDD: - INST_NAME("AESENCLAST Gx, Ex"); // AES-NI + INST_NAME("AESENCLAST Gx, Ex"); // AES-NI nextop = F8; GETG; sse_forget_reg(dyn, ninst, gd); @@ -751,7 +787,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); break; case 0xDE: - INST_NAME("AESDEC Gx, Ex"); // AES-NI + INST_NAME("AESDEC Gx, Ex"); // AES-NI nextop = F8; GETG; sse_forget_reg(dyn, ninst, gd); @@ -763,7 +799,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0xDF: - INST_NAME("AESDECLAST Gx, Ex"); // AES-NI + INST_NAME("AESDECLAST Gx, Ex"); // AES-NI nextop = F8; GETG; sse_forget_reg(dyn, ninst, gd); @@ -775,14 +811,17 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0xF0: INST_NAME("MOVBE Gw, Ew"); - nextop=F8; + nextop = F8; GETGD; SMREAD(); - addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 1); + addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 1, 0); LHU(x1, ed, fixedaddress); if (rv64_zbb) { REV8(x1, x1); SRLI(x1, x1, 48); + } else if (rv64_xtheadbb) { + TH_REVW(x1, x1); + SRLI(x1, x1, 16); } else { ANDI(x2, x1, 0xff); SLLI(x2, x2, 8); @@ -791,17 +830,20 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } LUI(x2, 0xffff0); AND(gd, gd, x2); - OR(gd, gd, x1); + OR(gd, gd, x1); break; case 0xF1: INST_NAME("MOVBE Ew, Gw"); - nextop=F8; + nextop = F8; GETGD; SMREAD(); - addr = geted(dyn, addr, ninst, nextop, &wback, x3, x2, &fixedaddress, rex, NULL, 0, 1); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, x2, &fixedaddress, rex, NULL, 1, 0); if (rv64_zbb) { REV8(x1, gd); SRLI(x1, x1, 48); + } else if (rv64_xtheadbb) { + TH_REVW(x1, gd); + SRLI(x1, x1, 16); } else { ANDI(x1, gd, 0xff); SLLI(x1, x1, 8); @@ -815,9 +857,9 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int DEFAULT; } break; - case 0x3A: // these are some more SSSE3+ opcodes + case 0x3A: // these are some more SSSE3+ opcodes opcode = F8; - switch(opcode) { + switch (opcode) { case 0x0B: INST_NAME("ROUNDSD Gx, Ex, Ib"); nextop = F8; @@ -828,7 +870,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int u8 = F8; FEQD(x2, d0, d0); BNEZ_MARK(x2); - if (v0!=d0) FMVD(v0, d0); + if (v0 != d0) FMVD(v0, d0); B_NEXT_nocond; MARK; // d0 is not nan FABSD(v1, d0); @@ -836,16 +878,16 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int FCVTDL(d1, x3, RD_RTZ); FLTD(x3, v1, d1); BNEZ_MARK2(x3); - if (v0!=d0) FMVD(v0, d0); + if (v0 != d0) FMVD(v0, d0); B_NEXT_nocond; MARK2; - if(u8&4) { + if (u8 & 4) { u8 = sse_setround(dyn, ninst, x4, x2); FCVTLD(x5, d0, RD_DYN); FCVTDL(v0, x5, RD_RTZ); x87_restoreround(dyn, ninst, u8); } else { - FCVTLD(x5, d0, round_round[u8&3]); + FCVTLD(x5, d0, round_round[u8 & 3]); FCVTDL(v0, x5, RD_RTZ); } break; @@ -871,20 +913,20 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int FLTD(x4, v1, d1); BNEZ(x4, 8); B_MARK_nocond; - if(u8&4) { + if (u8 & 4) { u8 = sse_setround(dyn, ninst, x4, x5); FCVTLD(x5, d0, RD_DYN); FCVTDL(d0, x5, RD_RTZ); x87_restoreround(dyn, ninst, u8); } else { - FCVTLD(x5, d0, round_round[u8&3]); + FCVTLD(x5, d0, round_round[u8 & 3]); FCVTDL(d0, x5, RD_RTZ); } MARK; - FSD(d0, gback, gdoffset+0); + FSD(d0, gback, gdoffset + 0); // i = 1 - FLD(d0, wback, fixedaddress+8); + FLD(d0, wback, fixedaddress + 8); FEQD(x4, d0, d0); BNEZ(x4, 8); B_MARK2_nocond; @@ -893,17 +935,17 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int FLTD(x4, v1, d1); BNEZ(x4, 8); B_MARK2_nocond; - if(u8&4) { + if (u8 & 4) { u8 = sse_setround(dyn, ninst, x4, x5); FCVTLD(x5, d0, RD_DYN); FCVTDL(d0, x5, RD_RTZ); x87_restoreround(dyn, ninst, u8); } else { - FCVTLD(x5, d0, round_round[u8&3]); + FCVTLD(x5, d0, round_round[u8 & 3]); FCVTDL(d0, x5, RD_RTZ); } MARK2; - FSD(d0, gback, gdoffset+8); + FSD(d0, gback, gdoffset + 8); break; case 0x0E: INST_NAME("PBLENDW Gx, Ex, Ib"); @@ -912,34 +954,34 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x2, 1); u8 = F8; i32 = 0; - if (MODREG && gd==ed) break; + if (MODREG && gd == ed) break; while (u8) - if(u8&1) { - if(!(i32&1) && u8&2) { - if(!(i32&3) && (u8&0xf)==0xf) { + if (u8 & 1) { + if (!(i32 & 1) && u8 & 2) { + if (!(i32 & 3) && (u8 & 0xf) == 0xf) { // whole 64bits - LD(x3, wback, fixedaddress+8*(i32>>2)); - SD(x3, gback, gdoffset+8*(i32>>2)); - i32+=4; - u8>>=4; + LD(x3, wback, fixedaddress + 8 * (i32 >> 2)); + SD(x3, gback, gdoffset + 8 * (i32 >> 2)); + i32 += 4; + u8 >>= 4; } else { // 32bits - LWU(x3, wback, fixedaddress+4*(i32>>1)); - SW(x3, gback, gdoffset+4*(i32>>1)); - i32+=2; - u8>>=2; + LWU(x3, wback, fixedaddress + 4 * (i32 >> 1)); + SW(x3, gback, gdoffset + 4 * (i32 >> 1)); + i32 += 2; + u8 >>= 2; } } else { // 16 bits - LHU(x3, wback, fixedaddress+2*i32); - SH(x3, gback, gdoffset+2*i32); + LHU(x3, wback, fixedaddress + 2 * i32); + SH(x3, gback, gdoffset + 2 * i32); i32++; - u8>>=1; + u8 >>= 1; } } else { // nope i32++; - u8>>=1; + u8 >>= 1; } break; case 0x0F: @@ -951,38 +993,42 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int sse_forget_reg(dyn, ninst, x5); ADDI(x5, xEmu, offsetof(x64emu_t, scratch)); // perserve gd - LD(x3, gback, gdoffset+0); - LD(x4, gback, gdoffset+8); + LD(x3, gback, gdoffset + 0); + LD(x4, gback, gdoffset + 8); SD(x3, x5, 0); SD(x4, x5, 8); - if(u8>31) { - SD(xZR, gback, gdoffset+0); - SD(xZR, gback, gdoffset+8); + if (u8 > 31) { + SD(xZR, gback, gdoffset + 0); + SD(xZR, gback, gdoffset + 8); } else { - for (int i=0; i<16; ++i, ++u8) { - if (u8>15) { - if(u8>31) { - SB(xZR, gback, gdoffset+i); + for (int i = 0; i < 16; ++i, ++u8) { + if (u8 > 15) { + if (u8 > 31) { + SB(xZR, gback, gdoffset + i); continue; - } - else LBU(x3, x5, u8-16); + } else + LBU(x3, x5, u8 - 16); } else { - LBU(x3, wback, fixedaddress+u8); + LBU(x3, wback, fixedaddress + u8); } - SB(x3, gback, gdoffset+i); + SB(x3, gback, gdoffset + i); } } break; case 0x16: - if(rex.w) {INST_NAME("PEXTRQ Ed, Gx, Ib");} else {INST_NAME("PEXTRD Ed, Gx, Ib");} + if (rex.w) { + INST_NAME("PEXTRQ Ed, Gx, Ib"); + } else { + INST_NAME("PEXTRD Ed, Gx, Ib"); + } nextop = F8; GETGX(); GETED(1); u8 = F8; - if(rex.w) - LD(ed, gback, gdoffset+8*(u8&1)); + if (rex.w) + LD(ed, gback, gdoffset + 8 * (u8 & 1)); else - LWU(ed, gback, gdoffset+4*(u8&3)); + LWU(ed, gback, gdoffset + 4 * (u8 & 3)); if (wback) { SDxw(ed, wback, fixedaddress); SMWRITE2(); @@ -994,7 +1040,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(); GETED(1); u8 = F8; - SB(ed, gback, gdoffset+u8&0xF); + SB(ed, gback, gdoffset + u8 & 0xF); break; case 0x21: INST_NAME("INSERTPS GX, EX, Ib"); @@ -1002,14 +1048,17 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(); GETEX(x2, 1); u8 = F8; - if(MODREG) s8 = (u8>>6)&3; else s8 = 0; + if (MODREG) + s8 = (u8 >> 6) & 3; + else + s8 = 0; // GX->ud[(tmp8u>>4)&3] = EX->ud[tmp8s]; - LWU(x3, wback, fixedaddress+4*s8); - SW(x3, gback, gdoffset+4*(u8>>4)); - for(int i=0; i<4; ++i) { - if(u8&(1<<i)) + LWU(x3, wback, fixedaddress + 4 * s8); + SW(x3, gback, gdoffset + 4 * (u8 >> 4)); + for (int i = 0; i < 4; ++i) { + if (u8 & (1 << i)) // GX->ud[i] = 0; - SW(xZR, gback, gdoffset+4*i); + SW(xZR, gback, gdoffset + 4 * i); } break; case 0x22: @@ -1018,10 +1067,10 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(); GETED(1); u8 = F8; - if(rex.w) { - SD(ed, gback, gdoffset+8*(u8&0x1)); + if (rex.w) { + SD(ed, gback, gdoffset + 8 * (u8 & 0x1)); } else { - SW(ed, gback, gdoffset+4*(u8&0x3)); + SW(ed, gback, gdoffset + 4 * (u8 & 0x3)); } break; case 0x44: @@ -1030,15 +1079,15 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETG; sse_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); // gx - if(MODREG) { - ed = (nextop&7)+(rex.b<<3); + if (MODREG) { + ed = (nextop & 7) + (rex.b << 3); sse_forget_reg(dyn, ninst, ed); MOV32w(x2, ed); - MOV32w(x3, 0); // p = NULL + MOV32w(x3, 0); // p = NULL } else { MOV32w(x2, 0); addr = geted(dyn, addr, ninst, nextop, &ed, x3, x5, &fixedaddress, rex, NULL, 0, 1); - if(ed!=x3) { + if (ed != x3) { MV(x3, ed); } } @@ -1047,20 +1096,20 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int CALL(native_pclmul, -1); break; case 0xDF: - INST_NAME("AESKEYGENASSIST Gx, Ex, Ib"); // AES-NI + INST_NAME("AESKEYGENASSIST Gx, Ex, Ib"); // AES-NI nextop = F8; GETG; sse_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); // gx - if(MODREG) { - ed = (nextop&7)+(rex.b<<3); + if (MODREG) { + ed = (nextop & 7) + (rex.b << 3); sse_forget_reg(dyn, ninst, ed); MOV32w(x2, ed); - MOV32w(x3, 0); //p = NULL + MOV32w(x3, 0); // p = NULL } else { MOV32w(x2, 0); addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 0, 1); - if(ed!=x3) { + if (ed != x3) { MV(x3, ed); } } @@ -1068,41 +1117,41 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int MOV32w(x4, u8); CALL(native_aeskeygenassist, -1); break; - default: + default: DEFAULT; } break; - #define GO(GETFLAGS, NO, YES, F) \ - READFLAGS(F); \ - GETFLAGS; \ - nextop=F8; \ - GETGD; \ - if(MODREG) { \ - ed = xRAX+(nextop&7)+(rex.b<<3); \ - ZEXTH(x4, ed); \ - ed = x4; \ - } else { \ - SMREAD(); \ - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x4, &fixedaddress, rex, NULL, 1, 0); \ - LHU(x4, ed, fixedaddress); \ - ed = x4; \ - } \ - B##NO(x1, 4+3*4); \ - LUI(x3, 0xffff0); \ - AND(gd, gd, x3); \ - OR(gd, gd, ed); +#define GO(GETFLAGS, NO, YES, F) \ + READFLAGS(F); \ + GETFLAGS; \ + nextop = F8; \ + GETGD; \ + if (MODREG) { \ + ed = xRAX + (nextop & 7) + (rex.b << 3); \ + ZEXTH(x4, ed); \ + ed = x4; \ + } else { \ + SMREAD(); \ + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x4, &fixedaddress, rex, NULL, 1, 0); \ + LHU(x4, ed, fixedaddress); \ + ed = x4; \ + } \ + B##NO(x1, 4 + 3 * 4); \ + LUI(x3, 0xffff0); \ + AND(gd, gd, x3); \ + OR(gd, gd, ed); - GOCOND(0x40, "CMOV", "Gw, Ew"); - #undef GO + GOCOND(0x40, "CMOV", "Gw, Ew"); +#undef GO case 0x50: INST_NAME("PMOVMSKD Gd, Ex"); nextop = F8; GETGD; GETEX(x1, 0); MV(gd, xZR); - for(int i=0; i<2; ++i) { + for (int i = 0; i < 2; ++i) { // GD->dword[0] |= ((EX->q[i]>>63)&1)<<i; - LD(x2, wback, fixedaddress+8*i); + LD(x2, wback, fixedaddress + 8 * i); SRLI(x2, x2, 63); if (i) SLLI(x2, x2, 1); OR(gd, gd, x2); @@ -1114,21 +1163,21 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(); GETEX(x2, 0); d0 = fpu_get_scratch(dyn); - if(!box64_dynarec_fastnan) { + if (!box64_dynarec_fastnan) { d1 = fpu_get_scratch(dyn); FMVDX(d1, xZR); } - for (int i=0; i<2; ++i) { - FLD(d0, wback, fixedaddress+i*8); - if(!box64_dynarec_fastnan) { + for (int i = 0; i < 2; ++i) { + FLD(d0, wback, fixedaddress + i * 8); + if (!box64_dynarec_fastnan) { FLTD(x3, d0, d1); } FSQRTD(d0, d0); - if(!box64_dynarec_fastnan) { + if (!box64_dynarec_fastnan) { BEQ(x3, xZR, 8); FNEGD(d0, d0); } - FSD(d0, gback, gdoffset+i*8); + FSD(d0, gback, gdoffset + i * 8); } break; case 0x54: @@ -1165,12 +1214,12 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x1, 0); GETGX(); SSE_LOOP_FQ(x3, x4, { - if(!box64_dynarec_fastnan) { + if (!box64_dynarec_fastnan) { FEQD(x3, v0, v0); FEQD(x4, v1, v1); } FADDD(v0, v0, v1); - if(!box64_dynarec_fastnan) { + if (!box64_dynarec_fastnan) { AND(x3, x3, x4); BEQZ(x3, 16); FEQD(x3, v0, v0); @@ -1185,12 +1234,12 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x1, 0); GETGX(); SSE_LOOP_FQ(x3, x4, { - if(!box64_dynarec_fastnan) { + if (!box64_dynarec_fastnan) { FEQD(x3, v0, v0); FEQD(x4, v1, v1); } FMULD(v0, v0, v1); - if(!box64_dynarec_fastnan) { + if (!box64_dynarec_fastnan) { AND(x3, x3, x4); BEQZ(x3, 16); FEQD(x3, v0, v0); @@ -1206,15 +1255,15 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x2, 0); d0 = fpu_get_scratch(dyn); // GX->f[0] = EX->d[0]; - FLD(d0, wback, fixedaddress+0); + FLD(d0, wback, fixedaddress + 0); FCVTSD(d0, d0); - FSD(d0, gback, gdoffset+0); + FSD(d0, gback, gdoffset + 0); // GX->f[1] = EX->d[1]; - FLD(d0, wback, fixedaddress+8); + FLD(d0, wback, fixedaddress + 8); FCVTSD(d0, d0); - FSD(d0, gback, gdoffset+4); + FSD(d0, gback, gdoffset + 4); // GX->q[1] = 0; - SD(xZR, gback, gdoffset+8); + SD(xZR, gback, gdoffset + 8); break; case 0x5B: INST_NAME("CVTPS2DQ Gx, Ex"); @@ -1223,14 +1272,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x2, 0); d0 = fpu_get_scratch(dyn); u8 = sse_setround(dyn, ninst, x6, x4); - for (int i=0; i<4; ++i) { - FLW(d0, wback, fixedaddress+4*i); + for (int i = 0; i < 4; ++i) { + FLW(d0, wback, fixedaddress + 4 * i); FCVTLS(x3, d0, RD_DYN); SEXT_W(x5, x3); SUB(x5, x5, x3); BEQZ(x5, 8); LUI(x3, 0x80000); // INT32_MIN - SW(x3, gback, gdoffset+4*i); + SW(x3, gback, gdoffset + 4 * i); } x87_restoreround(dyn, ninst, u8); break; @@ -1240,12 +1289,12 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x1, 0); GETGX(); SSE_LOOP_FQ(x3, x4, { - if(!box64_dynarec_fastnan) { + if (!box64_dynarec_fastnan) { FEQD(x3, v0, v0); FEQD(x4, v1, v1); } FSUBD(v0, v0, v1); - if(!box64_dynarec_fastnan) { + if (!box64_dynarec_fastnan) { AND(x3, x3, x4); BEQZ(x3, 16); FEQD(x3, v0, v0); @@ -1261,16 +1310,16 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x2, 0); d0 = fpu_get_scratch(dyn); d1 = fpu_get_scratch(dyn); - for (int i=0; i<2; ++i) { - FLD(d0, gback, gdoffset+8*i); - FLD(d1, wback, fixedaddress+8*i); + for (int i = 0; i < 2; ++i) { + FLD(d0, gback, gdoffset + 8 * i); + FLD(d1, wback, fixedaddress + 8 * i); FEQD(x3, d0, d0); FEQD(x4, d1, d1); AND(x3, x3, x4); BEQ(x3, xZR, 12); FLTD(x3, d1, d0); BEQ(x3, xZR, 8); // continue - FSD(d1, gback, gdoffset+8*i); + FSD(d1, gback, gdoffset + 8 * i); } break; case 0x5E: @@ -1279,12 +1328,12 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x1, 0); GETGX(); SSE_LOOP_FQ(x3, x4, { - if(!box64_dynarec_fastnan) { + if (!box64_dynarec_fastnan) { FEQD(x3, v0, v0); FEQD(x4, v1, v1); } FDIVD(v0, v0, v1); - if(!box64_dynarec_fastnan) { + if (!box64_dynarec_fastnan) { AND(x3, x3, x4); BEQZ(x3, 16); FEQD(x3, v0, v0); @@ -1300,39 +1349,39 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x2, 0); d0 = fpu_get_scratch(dyn); d1 = fpu_get_scratch(dyn); - for (int i=0; i<2; ++i) { - FLD(d0, gback, gdoffset+8*i); - FLD(d1, wback, fixedaddress+8*i); + for (int i = 0; i < 2; ++i) { + FLD(d0, gback, gdoffset + 8 * i); + FLD(d1, wback, fixedaddress + 8 * i); FEQD(x3, d0, d0); FEQD(x4, d1, d1); AND(x3, x3, x4); BEQ(x3, xZR, 12); FLTD(x3, d0, d1); BEQ(x3, xZR, 8); // continue - FSD(d1, gback, gdoffset+8*i); + FSD(d1, gback, gdoffset + 8 * i); } break; case 0x60: INST_NAME("PUNPCKLBW Gx,Ex"); nextop = F8; GETGX(); - for(int i=7; i>0; --i) { // 0 is untouched + for (int i = 7; i > 0; --i) { // 0 is untouched // GX->ub[2 * i] = GX->ub[i]; - LBU(x3, gback, gdoffset+i); - SB(x3, gback, gdoffset+2*i); + LBU(x3, gback, gdoffset + i); + SB(x3, gback, gdoffset + 2 * i); } - if (MODREG && gd==(nextop&7)+(rex.b<<3)) { - for(int i=0; i<8; ++i) { + if (MODREG && gd == (nextop & 7) + (rex.b << 3)) { + for (int i = 0; i < 8; ++i) { // GX->ub[2 * i + 1] = GX->ub[2 * i]; - LBU(x3, gback, gdoffset+2*i); - SB(x3, gback, gdoffset+2*i+1); + LBU(x3, gback, gdoffset + 2 * i); + SB(x3, gback, gdoffset + 2 * i + 1); } } else { GETEX(x1, 0); - for(int i=0; i<8; ++i) { + for (int i = 0; i < 8; ++i) { // GX->ub[2 * i + 1] = EX->ub[i]; - LBU(x3, wback, fixedaddress+i); - SB(x3, gback, gdoffset+2*i+1); + LBU(x3, wback, fixedaddress + i); + SB(x3, gback, gdoffset + 2 * i + 1); } } break; @@ -1340,23 +1389,23 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("PUNPCKLWD Gx,Ex"); nextop = F8; GETGX(); - for(int i=3; i>0; --i) { + for (int i = 3; i > 0; --i) { // GX->uw[2 * i] = GX->uw[i]; - LHU(x3, gback, gdoffset+i*2); - SH(x3, gback, gdoffset+2*i*2); + LHU(x3, gback, gdoffset + i * 2); + SH(x3, gback, gdoffset + 2 * i * 2); } - if (MODREG && gd==(nextop&7)+(rex.b<<3)) { - for(int i=0; i<4; ++i) { + if (MODREG && gd == (nextop & 7) + (rex.b << 3)) { + for (int i = 0; i < 4; ++i) { // GX->uw[2 * i + 1] = GX->uw[2 * i]; - LHU(x3, gback, gdoffset+2*i*2); - SH(x3, gback, gdoffset+(2*i+1)*2); + LHU(x3, gback, gdoffset + 2 * i * 2); + SH(x3, gback, gdoffset + (2 * i + 1) * 2); } } else { GETEX(x1, 0); - for(int i=0; i<4; ++i) { + for (int i = 0; i < 4; ++i) { // GX->uw[2 * i + 1] = EX->uw[i]; - LHU(x3, wback, fixedaddress+i*2); - SH(x3, gback, gdoffset+(2*i+1)*2); + LHU(x3, wback, fixedaddress + i * 2); + SH(x3, gback, gdoffset + (2 * i + 1) * 2); } } break; @@ -1366,14 +1415,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x1, 0); GETGX(); // GX->ud[3] = EX->ud[1]; - LWU(x3, wback, fixedaddress+1*4); - SW(x3, gback, gdoffset+3*4); + LWU(x3, wback, fixedaddress + 1 * 4); + SW(x3, gback, gdoffset + 3 * 4); // GX->ud[2] = GX->ud[1]; - LWU(x3, gback, gdoffset+1*4); - SW(x3, gback, gdoffset+2*4); + LWU(x3, gback, gdoffset + 1 * 4); + SW(x3, gback, gdoffset + 2 * 4); // GX->ud[1] = EX->ud[0]; - LWU(x3, wback, fixedaddress+0*4); - SW(x3, gback, gdoffset+1*4); + LWU(x3, wback, fixedaddress + 0 * 4); + SW(x3, gback, gdoffset + 1 * 4); break; case 0x63: INST_NAME("PACKSSWB Gx, Ex"); @@ -1382,48 +1431,49 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x2, 0); MOV64x(x5, 127); MOV64x(x6, -128); - for(int i=0; i<8; ++i) { - LH(x3, gback, gdoffset+i*2); - if(rv64_zbb) { + for (int i = 0; i < 8; ++i) { + LH(x3, gback, gdoffset + i * 2); + if (rv64_zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { - BLT(x3, x5, 4+4); + BLT(x3, x5, 4 + 4); MV(x3, x5); - BGE(x3, x6, 4+4); + BGE(x3, x6, 4 + 4); MV(x3, x6); } - SB(x3, gback, gdoffset+i); - } - if(MODREG && gd==ed) { - LD(x3, gback, gdoffset+0); - SD(x3, gback, gdoffset+8); - } else for(int i=0; i<8; ++i) { - LH(x3, wback, fixedaddress+i*2); - if(rv64_zbb) { - MIN(x3, x3, x5); - MAX(x3, x3, x6); - } else { - BLT(x3, x5, 4+4); - MV(x3, x5); - BGE(x3, x6, 4+4); - MV(x3, x6); + SB(x3, gback, gdoffset + i); + } + if (MODREG && gd == ed) { + LD(x3, gback, gdoffset + 0); + SD(x3, gback, gdoffset + 8); + } else + for (int i = 0; i < 8; ++i) { + LH(x3, wback, fixedaddress + i * 2); + if (rv64_zbb) { + MIN(x3, x3, x5); + MAX(x3, x3, x6); + } else { + BLT(x3, x5, 4 + 4); + MV(x3, x5); + BGE(x3, x6, 4 + 4); + MV(x3, x6); + } + SB(x3, gback, gdoffset + 8 + i); } - SB(x3, gback, gdoffset+8+i); - } break; case 0x64: INST_NAME("PCMPGTB Gx,Ex"); nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<16; ++i) { + for (int i = 0; i < 16; ++i) { // GX->ub[i] = (GX->sb[i]>EX->sb[i])?0xFF:0x00; - LB(x3, wback, fixedaddress+i); - LB(x4, gback, gdoffset+i); + LB(x3, wback, fixedaddress + i); + LB(x4, gback, gdoffset + i); SLT(x3, x3, x4); NEG(x3, x3); - SB(x3, gback, gdoffset+i); + SB(x3, gback, gdoffset + i); } break; case 0x65: @@ -1431,13 +1481,13 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<8; ++i) { + for (int i = 0; i < 8; ++i) { // GX->uw[i] = (GX->sw[i]>EX->sw[i])?0xFFFF:0x0000; - LH(x3, wback, fixedaddress+i*2); - LH(x4, gback, gdoffset+i*2); + LH(x3, wback, fixedaddress + i * 2); + LH(x4, gback, gdoffset + i * 2); SLT(x3, x3, x4); NEG(x3, x3); - SH(x3, gback, gdoffset+i*2); + SH(x3, gback, gdoffset + i * 2); } break; case 0x66: @@ -1452,31 +1502,31 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); ADDI(x5, xZR, 0xFF); - for(int i=0; i<8; ++i) { + for (int i = 0; i < 8; ++i) { // GX->ub[i] = (GX->sw[i]<0)?0:((GX->sw[i]>0xff)?0xff:GX->sw[i]); - LH(x3, gback, gdoffset+i*2); + LH(x3, gback, gdoffset + i * 2); BGE(x5, x3, 8); ADDI(x3, xZR, 0xFF); NOT(x4, x3); SRAI(x4, x4, 63); AND(x3, x3, x4); - SB(x3, gback, gdoffset+i); + SB(x3, gback, gdoffset + i); } - if (MODREG && gd==(nextop&7)+(rex.b<<3)) { + if (MODREG && gd == (nextop & 7) + (rex.b << 3)) { // GX->q[1] = GX->q[0]; - LD(x3, gback, gdoffset+0*8); - SD(x3, gback, gdoffset+1*8); + LD(x3, gback, gdoffset + 0 * 8); + SD(x3, gback, gdoffset + 1 * 8); } else { GETEX(x1, 0); - for(int i=0; i<8; ++i) { + for (int i = 0; i < 8; ++i) { // GX->ub[8+i] = (EX->sw[i]<0)?0:((EX->sw[i]>0xff)?0xff:EX->sw[i]); - LH(x3, wback, fixedaddress+i*2); + LH(x3, wback, fixedaddress + i * 2); BGE(x5, x3, 8); ADDI(x3, xZR, 0xFF); NOT(x4, x3); SRAI(x4, x4, 63); AND(x3, x3, x4); - SB(x3, gback, gdoffset+8+i); + SB(x3, gback, gdoffset + 8 + i); } } break; @@ -1484,23 +1534,23 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("PUNPCKHBW Gx,Ex"); nextop = F8; GETGX(); - for(int i=0; i<8; ++i) { + for (int i = 0; i < 8; ++i) { // GX->ub[2 * i] = GX->ub[i + 8]; - LBU(x3, gback, gdoffset+i+8); - SB(x3, gback, gdoffset+2*i); + LBU(x3, gback, gdoffset + i + 8); + SB(x3, gback, gdoffset + 2 * i); } - if (MODREG && gd==(nextop&7)+(rex.b<<3)) { - for(int i=0; i<8; ++i) { + if (MODREG && gd == (nextop & 7) + (rex.b << 3)) { + for (int i = 0; i < 8; ++i) { // GX->ub[2 * i + 1] = GX->ub[2 * i]; - LBU(x3, gback, gdoffset+2*i); - SB(x3, gback, gdoffset+2*i+1); + LBU(x3, gback, gdoffset + 2 * i); + SB(x3, gback, gdoffset + 2 * i + 1); } } else { GETEX(x2, 0); - for(int i=0; i<8; ++i) { + for (int i = 0; i < 8; ++i) { // GX->ub[2 * i + 1] = EX->ub[i + 8]; - LBU(x3, wback, fixedaddress+i+8); - SB(x3, gback, gdoffset+2*i+1); + LBU(x3, wback, fixedaddress + i + 8); + SB(x3, gback, gdoffset + 2 * i + 1); } } break; @@ -1508,23 +1558,23 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("PUNPCKHWD Gx,Ex"); nextop = F8; GETGX(); - for(int i=0; i<4; ++i) { + for (int i = 0; i < 4; ++i) { // GX->uw[2 * i] = GX->uw[i + 4]; - LHU(x3, gback, gdoffset+(i+4)*2); - SH(x3, gback, gdoffset+2*i*2); + LHU(x3, gback, gdoffset + (i + 4) * 2); + SH(x3, gback, gdoffset + 2 * i * 2); } - if (MODREG && gd==(nextop&7)+(rex.b<<3)) { - for(int i=0; i<4; ++i) { + if (MODREG && gd == (nextop & 7) + (rex.b << 3)) { + for (int i = 0; i < 4; ++i) { // GX->uw[2 * i + 1] = GX->uw[2 * i]; - LHU(x3, gback, gdoffset+2*i*2); - SH(x3, gback, gdoffset+(2*i+1)*2); + LHU(x3, gback, gdoffset + 2 * i * 2); + SH(x3, gback, gdoffset + (2 * i + 1) * 2); } } else { GETEX(x1, 0); - for(int i=0; i<4; ++i) { + for (int i = 0; i < 4; ++i) { // GX->uw[2 * i + 1] = EX->uw[i + 4]; - LHU(x3, wback, fixedaddress+(i+4)*2); - SH(x3, gback, gdoffset+(2*i+1)*2); + LHU(x3, wback, fixedaddress + (i + 4) * 2); + SH(x3, gback, gdoffset + (2 * i + 1) * 2); } } break; @@ -1534,18 +1584,18 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x1, 0); GETGX(); // GX->ud[0] = GX->ud[2]; - LWU(x3, gback, gdoffset+2*4); - SW(x3, gback, gdoffset+0*4); + LWU(x3, gback, gdoffset + 2 * 4); + SW(x3, gback, gdoffset + 0 * 4); // GX->ud[1] = EX->ud[2]; - LWU(x3, wback, fixedaddress+2*4); - SW(x3, gback, gdoffset+1*4); + LWU(x3, wback, fixedaddress + 2 * 4); + SW(x3, gback, gdoffset + 1 * 4); // GX->ud[2] = GX->ud[3]; - LWU(x3, gback, gdoffset+3*4); - SW(x3, gback, gdoffset+2*4); + LWU(x3, gback, gdoffset + 3 * 4); + SW(x3, gback, gdoffset + 2 * 4); // GX->ud[3] = EX->ud[3]; - if (!(MODREG && (gd==ed))) { - LWU(x3, wback, fixedaddress+3*4); - SW(x3, gback, gdoffset+3*4); + if (!(MODREG && (gd == ed))) { + LWU(x3, wback, fixedaddress + 3 * 4); + SW(x3, gback, gdoffset + 3 * 4); } break; case 0x6B: @@ -1554,29 +1604,29 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(); MOV64x(x5, 32768); NEG(x6, x5); - for(int i=0; i<4; ++i) { + for (int i = 0; i < 4; ++i) { // GX->sw[i] = (GX->sd[i]<-32768)?-32768:((GX->sd[i]>32767)?32767:GX->sd[i]); - LW(x3, gback, gdoffset+i*4); + LW(x3, gback, gdoffset + i * 4); BGE(x5, x3, 8); ADDI(x3, x5, -1); BGE(x3, x6, 8); MV(x3, x6); - SH(x3, gback, gdoffset+i*2); + SH(x3, gback, gdoffset + i * 2); } - if (MODREG && gd==(nextop&7)+(rex.b<<3)) { + if (MODREG && gd == (nextop & 7) + (rex.b << 3)) { // GX->q[1] = GX->q[0]; - LD(x3, gback, gdoffset+0*8); - SD(x3, gback, gdoffset+1*8); + LD(x3, gback, gdoffset + 0 * 8); + SD(x3, gback, gdoffset + 1 * 8); } else { GETEX(x1, 0); - for(int i=0; i<4; ++i) { + for (int i = 0; i < 4; ++i) { // GX->sw[4+i] = (EX->sd[i]<-32768)?-32768:((EX->sd[i]>32767)?32767:EX->sd[i]); - LW(x3, wback, fixedaddress+i*4); + LW(x3, wback, fixedaddress + i * 4); BGE(x5, x3, 8); ADDI(x3, x5, -1); BGE(x3, x6, 8); MV(x3, x6); - SH(x3, gback, gdoffset+(4+i)*2); + SH(x3, gback, gdoffset + (4 + i) * 2); } } break; @@ -1584,13 +1634,13 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("PUNPCKLQDQ Gx,Ex"); nextop = F8; GETGX(); - if(MODREG) { - v1 = sse_get_reg(dyn, ninst, x2, (nextop&7)+(rex.b<<3), 0); - FSD(v1, gback, gdoffset+8); + if (MODREG) { + v1 = sse_get_reg(dyn, ninst, x2, (nextop & 7) + (rex.b << 3), 0); + FSD(v1, gback, gdoffset + 8); } else { addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); - LD(x3, ed, fixedaddress+0); - SD(x3, gback, gdoffset+8); + LD(x3, ed, fixedaddress + 0); + SD(x3, gback, gdoffset + 8); } break; case 0x6D: @@ -1598,27 +1648,27 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - LD(x3, gback, gdoffset+8); - SD(x3, gback, gdoffset+0); - LD(x3, wback, fixedaddress+8); - SD(x3, gback, gdoffset+8); + LD(x3, gback, gdoffset + 8); + SD(x3, gback, gdoffset + 0); + LD(x3, wback, fixedaddress + 8); + SD(x3, gback, gdoffset + 8); break; case 0x6E: INST_NAME("MOVD Gx, Ed"); nextop = F8; - if(rex.w) { + if (rex.w) { GETGXSD_empty(v0); } else { GETGXSS_empty(v0); } GETED(0); - if(rex.w) { + if (rex.w) { FMVDX(v0, ed); } else { FMVWX(v0, ed); - SW(xZR, xEmu, offsetof(x64emu_t, xmm[gd])+4); + SW(xZR, xEmu, offsetof(x64emu_t, xmm[gd]) + 4); } - SD(xZR, xEmu, offsetof(x64emu_t, xmm[gd])+8); + SD(xZR, xEmu, offsetof(x64emu_t, xmm[gd]) + 8); break; case 0x6F: INST_NAME("MOVDQA Gx,Ex"); @@ -1635,37 +1685,37 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int u8 = F8; int32_t idx; - idx = (u8>>(0*2))&3; - LWU(x3, wback, fixedaddress+idx*4); - idx = (u8>>(1*2))&3; - LWU(x4, wback, fixedaddress+idx*4); - idx = (u8>>(2*2))&3; - LWU(x5, wback, fixedaddress+idx*4); - idx = (u8>>(3*2))&3; - LWU(x6, wback, fixedaddress+idx*4); + idx = (u8 >> (0 * 2)) & 3; + LWU(x3, wback, fixedaddress + idx * 4); + idx = (u8 >> (1 * 2)) & 3; + LWU(x4, wback, fixedaddress + idx * 4); + idx = (u8 >> (2 * 2)) & 3; + LWU(x5, wback, fixedaddress + idx * 4); + idx = (u8 >> (3 * 2)) & 3; + LWU(x6, wback, fixedaddress + idx * 4); - SW(x3, gback, gdoffset+0*4); - SW(x4, gback, gdoffset+1*4); - SW(x5, gback, gdoffset+2*4); - SW(x6, gback, gdoffset+3*4); + SW(x3, gback, gdoffset + 0 * 4); + SW(x4, gback, gdoffset + 1 * 4); + SW(x5, gback, gdoffset + 2 * 4); + SW(x6, gback, gdoffset + 3 * 4); break; case 0x71: nextop = F8; - switch((nextop>>3)&7) { + switch ((nextop >> 3) & 7) { case 2: INST_NAME("PSRLW Ex, Ib"); GETEX(x1, 1); u8 = F8; - if (u8>15) { + if (u8 > 15) { // just zero dest - SD(xZR, wback, fixedaddress+0); - SD(xZR, wback, fixedaddress+8); - } else if(u8) { - for (int i=0; i<8; ++i) { + SD(xZR, wback, fixedaddress + 0); + SD(xZR, wback, fixedaddress + 8); + } else if (u8) { + for (int i = 0; i < 8; ++i) { // EX->uw[i] >>= u8; - LHU(x3, wback, fixedaddress+i*2); + LHU(x3, wback, fixedaddress + i * 2); SRLI(x3, x3, u8); - SH(x3, wback, fixedaddress+i*2); + SH(x3, wback, fixedaddress + i * 2); } } break; @@ -1673,13 +1723,13 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("PSRAW Ex, Ib"); GETEX(x1, 1); u8 = F8; - if(u8>15) u8=15; - if(u8) { - for (int i=0; i<8; ++i) { + if (u8 > 15) u8 = 15; + if (u8) { + for (int i = 0; i < 8; ++i) { // EX->sw[i] >>= u8; - LH(x3, wback, fixedaddress+i*2); + LH(x3, wback, fixedaddress + i * 2); SRAI(x3, x3, u8); - SH(x3, wback, fixedaddress+i*2); + SH(x3, wback, fixedaddress + i * 2); } } break; @@ -1687,16 +1737,16 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("PSLLW Ex, Ib"); GETEX(x1, 1); u8 = F8; - if (u8>15) { + if (u8 > 15) { // just zero dest - SD(xZR, wback, fixedaddress+0); - SD(xZR, wback, fixedaddress+8); - } else if(u8) { - for (int i=0; i<8; ++i) { + SD(xZR, wback, fixedaddress + 0); + SD(xZR, wback, fixedaddress + 8); + } else if (u8) { + for (int i = 0; i < 8; ++i) { // EX->uw[i] <<= u8; - LHU(x3, wback, fixedaddress+i*2); + LHU(x3, wback, fixedaddress + i * 2); SLLI(x3, x3, u8); - SH(x3, wback, fixedaddress+i*2); + SH(x3, wback, fixedaddress + i * 2); } } break; @@ -1707,17 +1757,17 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x72: nextop = F8; - switch((nextop>>3)&7) { + switch ((nextop >> 3) & 7) { case 2: INST_NAME("PSRLD Ex, Ib"); GETEX(x1, 1); u8 = F8; - if(u8) { - if (u8>31) { + if (u8) { + if (u8 > 31) { // just zero dest - SD(xZR, wback, fixedaddress+0); - SD(xZR, wback, fixedaddress+8); - } else if(u8) { + SD(xZR, wback, fixedaddress + 0); + SD(xZR, wback, fixedaddress + 8); + } else if (u8) { SSE_LOOP_D_S(x3, SRLI(x3, x3, u8)); } } @@ -1726,7 +1776,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("PSRAD Ex, Ib"); GETEX(x1, 1); u8 = F8; - if(u8>31) u8=31; + if (u8 > 31) u8 = 31; if (u8) { SSE_LOOP_D_S(x3, SRAIW(x3, x3, u8)); } @@ -1735,12 +1785,12 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("PSLLD Ex, Ib"); GETEX(x1, 1); u8 = F8; - if(u8) { - if (u8>31) { + if (u8) { + if (u8 > 31) { // just zero dest - SD(xZR, wback, fixedaddress+0); - SD(xZR, wback, fixedaddress+8); - } else if(u8) { + SD(xZR, wback, fixedaddress + 0); + SD(xZR, wback, fixedaddress + 8); + } else if (u8) { SSE_LOOP_D_S(x3, SLLI(x3, x3, u8)); } } @@ -1751,50 +1801,50 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x73: nextop = F8; - switch((nextop>>3)&7) { + switch ((nextop >> 3) & 7) { case 2: INST_NAME("PSRLQ Ex, Ib"); GETEX(x1, 1); u8 = F8; - if(!u8) break; - if(u8>63) { + if (!u8) break; + if (u8 > 63) { // just zero dest - SD(xZR, wback, fixedaddress+0); - SD(xZR, wback, fixedaddress+8); + SD(xZR, wback, fixedaddress + 0); + SD(xZR, wback, fixedaddress + 8); } else { - LD(x3, wback, fixedaddress+0); - LD(x4, wback, fixedaddress+8); + LD(x3, wback, fixedaddress + 0); + LD(x4, wback, fixedaddress + 8); SRLI(x3, x3, u8); SRLI(x4, x4, u8); - SD(x3, wback, fixedaddress+0); - SD(x4, wback, fixedaddress+8); + SD(x3, wback, fixedaddress + 0); + SD(x4, wback, fixedaddress + 8); } break; case 3: INST_NAME("PSRLDQ Ex, Ib"); GETEX(x1, 1); u8 = F8; - if(!u8) break; - if(u8>15) { + if (!u8) break; + if (u8 > 15) { // just zero dest - SD(xZR, wback, fixedaddress+0); - SD(xZR, wback, fixedaddress+8); + SD(xZR, wback, fixedaddress + 0); + SD(xZR, wback, fixedaddress + 8); } else { - u8*=8; + u8 *= 8; if (u8 < 64) { - LD(x3, wback, fixedaddress+0); - LD(x4, wback, fixedaddress+8); + LD(x3, wback, fixedaddress + 0); + LD(x4, wback, fixedaddress + 8); SRLI(x3, x3, u8); - SLLI(x5, x4, 64-u8); + SLLI(x5, x4, 64 - u8); OR(x3, x3, x5); - SD(x3, wback, fixedaddress+0); + SD(x3, wback, fixedaddress + 0); SRLI(x4, x4, u8); - SD(x4, wback, fixedaddress+8); + SD(x4, wback, fixedaddress + 8); } else { - LD(x3, wback, fixedaddress+8); - if (u8-64 > 0) { SRLI(x3, x3, u8-64); } - SD(x3, wback, fixedaddress+0); - SD(xZR, wback, fixedaddress+8); + LD(x3, wback, fixedaddress + 8); + if (u8 - 64 > 0) { SRLI(x3, x3, u8 - 64); } + SD(x3, wback, fixedaddress + 0); + SD(xZR, wback, fixedaddress + 8); } } break; @@ -1802,45 +1852,45 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("PSLLQ Ex, Ib"); GETEX(x1, 1); u8 = F8; - if(!u8) break; - if(u8>63) { + if (!u8) break; + if (u8 > 63) { // just zero dest - SD(xZR, wback, fixedaddress+0); - SD(xZR, wback, fixedaddress+8); + SD(xZR, wback, fixedaddress + 0); + SD(xZR, wback, fixedaddress + 8); } else { - LD(x3, wback, fixedaddress+0); - LD(x4, wback, fixedaddress+8); + LD(x3, wback, fixedaddress + 0); + LD(x4, wback, fixedaddress + 8); SLLI(x3, x3, u8); SLLI(x4, x4, u8); - SD(x3, wback, fixedaddress+0); - SD(x4, wback, fixedaddress+8); + SD(x3, wback, fixedaddress + 0); + SD(x4, wback, fixedaddress + 8); } break; case 7: INST_NAME("PSLLDQ Ex, Ib"); GETEX(x1, 1); u8 = F8; - if(!u8) break; - if(u8>15) { + if (!u8) break; + if (u8 > 15) { // just zero dest - SD(xZR, wback, fixedaddress+0); - SD(xZR, wback, fixedaddress+8); + SD(xZR, wback, fixedaddress + 0); + SD(xZR, wback, fixedaddress + 8); } else { - u8*=8; + u8 *= 8; if (u8 < 64) { - LD(x3, wback, fixedaddress+0); - LD(x4, wback, fixedaddress+8); + LD(x3, wback, fixedaddress + 0); + LD(x4, wback, fixedaddress + 8); SLLI(x4, x4, u8); - SRLI(x5, x3, 64-u8); + SRLI(x5, x3, 64 - u8); OR(x4, x4, x5); - SD(x4, wback, fixedaddress+8); + SD(x4, wback, fixedaddress + 8); SLLI(x3, x3, u8); - SD(x3, wback, fixedaddress+0); + SD(x3, wback, fixedaddress + 0); } else { - LD(x3, wback, fixedaddress+0); - if (u8-64 > 0) { SLLI(x3, x3, u8-64); } - SD(x3, wback, fixedaddress+8); - SD(xZR, wback, fixedaddress+0); + LD(x3, wback, fixedaddress + 0); + if (u8 - 64 > 0) { SLLI(x3, x3, u8 - 64); } + SD(x3, wback, fixedaddress + 8); + SD(xZR, wback, fixedaddress + 0); } } break; @@ -1853,13 +1903,13 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for (int i=0; i<16; ++i) { - LBU(x3, gback, gdoffset+i); - LBU(x4, wback, fixedaddress+i); + for (int i = 0; i < 16; ++i) { + LBU(x3, gback, gdoffset + i); + LBU(x4, wback, fixedaddress + i); SUB(x3, x3, x4); SEQZ(x3, x3); NEG(x3, x3); - SB(x3, gback, gdoffset+i); + SB(x3, gback, gdoffset + i); } break; case 0x75: @@ -1882,63 +1932,63 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(); d0 = fpu_get_scratch(dyn); d1 = fpu_get_scratch(dyn); - FLD(d0, gback, gdoffset+0); - FLD(d1, gback, gdoffset+8); - if(!box64_dynarec_fastnan) { + FLD(d0, gback, gdoffset + 0); + FLD(d1, gback, gdoffset + 8); + if (!box64_dynarec_fastnan) { FEQD(x3, d0, d0); FEQD(x4, d1, d1); AND(x3, x3, x4); } FADDD(d0, d0, d1); - if(!box64_dynarec_fastnan) { + if (!box64_dynarec_fastnan) { FEQD(x4, d0, d0); BEQZ(x3, 12); BNEZ(x4, 8); FNEGD(d0, d0); } - FSD(d0, gback, gdoffset+0); - if(MODREG && gd==(nextop&7)+(rex.b<<3)) { - FSD(d0, gback, gdoffset+8); + FSD(d0, gback, gdoffset + 0); + if (MODREG && gd == (nextop & 7) + (rex.b << 3)) { + FSD(d0, gback, gdoffset + 8); } else { GETEX(x2, 0); - FLD(d0, wback, fixedaddress+0); - FLD(d1, wback, fixedaddress+8); - if(!box64_dynarec_fastnan) { + FLD(d0, wback, fixedaddress + 0); + FLD(d1, wback, fixedaddress + 8); + if (!box64_dynarec_fastnan) { FEQD(x3, d0, d0); FEQD(x4, d1, d1); AND(x3, x3, x4); } FADDD(d0, d0, d1); - if(!box64_dynarec_fastnan) { + if (!box64_dynarec_fastnan) { FEQD(x4, d0, d0); BEQZ(x3, 12); BNEZ(x4, 8); FNEGD(d0, d0); } - FSD(d0, gback, gdoffset+8); + FSD(d0, gback, gdoffset + 8); } break; case 0x7E: INST_NAME("MOVD Ed,Gx"); nextop = F8; GETGX(); - if(rex.w) { - if(MODREG) { - ed = xRAX + (nextop&7) + (rex.b<<3); - LD(ed, gback, gdoffset+0); + if (rex.w) { + if (MODREG) { + ed = xRAX + (nextop & 7) + (rex.b << 3); + LD(ed, gback, gdoffset + 0); } else { addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); - LD(x3, gback, gdoffset+0); + LD(x3, gback, gdoffset + 0); SD(x3, ed, fixedaddress); SMWRITE2(); } } else { - if(MODREG) { - ed = xRAX + (nextop&7) + (rex.b<<3); - LWU(ed, gback, gdoffset+0); + if (MODREG) { + ed = xRAX + (nextop & 7) + (rex.b << 3); + LWU(ed, gback, gdoffset + 0); } else { addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); - LWU(x3, gback, gdoffset+0); + LWU(x3, gback, gdoffset + 0); SW(x3, ed, fixedaddress); SMWRITE2(); } @@ -1950,7 +2000,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(); GETEX(x2, 0); SSE_LOOP_MV_Q2(x3); - if(!MODREG) SMWRITE2(); + if (!MODREG) SMWRITE2(); break; case 0xAF: INST_NAME("IMUL Gw,Ew"); @@ -1969,17 +2019,17 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("MOVSX Gw, Eb"); nextop = F8; GETGD; - if(MODREG) { - if(rex.rex) { - ed = xRAX+(nextop&7)+(rex.b<<3); - eb1=ed; - eb2=0; + if (MODREG) { + if (rex.rex) { + ed = xRAX + (nextop & 7) + (rex.b << 3); + eb1 = ed; + eb2 = 0; } else { - ed = (nextop&7); - eb1 = xRAX+(ed&3); // Ax, Cx, Dx or Bx - eb2 = (ed&4)>>2; // L or H + ed = (nextop & 7); + eb1 = xRAX + (ed & 3); // Ax, Cx, Dx or Bx + eb2 = (ed & 4) >> 2; // L or H } - SLLI(x1, eb1, 56-eb2*8); + SLLI(x1, eb1, 56 - eb2 * 8); SRAI(x1, x1, 56); } else { SMREAD(); @@ -2000,12 +2050,12 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int u8 = F8; d0 = fpu_get_scratch(dyn); d1 = fpu_get_scratch(dyn); - for(int i=0; i<2; ++i) { - FLD(d0, gback, gdoffset+8*i); - FLD(d1, wback, fixedaddress+8*i); - if ((u8&7) == 0) { // Equal + for (int i = 0; i < 2; ++i) { + FLD(d0, gback, gdoffset + 8 * i); + FLD(d1, wback, fixedaddress + 8 * i); + if ((u8 & 7) == 0) { // Equal FEQD(x3, d0, d1); - } else if ((u8&7) == 4) { // Not Equal or unordered + } else if ((u8 & 7) == 4) { // Not Equal or unordered FEQD(x3, d0, d1); XORI(x3, x3, 1); } else { @@ -2014,33 +2064,39 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int FEQD(x3, d1, d1); AND(x3, x3, x4); - switch(u8&7) { - case 1: BEQ_MARK(x3, xZR); FLTD(x3, d0, d1); break; // Less than - case 2: BEQ_MARK(x3, xZR); FLED(x3, d0, d1); break; // Less or equal - case 3: XORI(x3, x3, 1); break; // NaN - case 5: { // Greater or equal or unordered - BEQ(x3, xZR, 12); // MARK2 - FLED(x3, d1, d0); - J(8); // MARK; - break; - } - case 6: { // Greater or unordered - BEQ(x3, xZR, 12); // MARK2 - FLTD(x3, d1, d0); - J(8); // MARK; - break; - } - case 7: break; // Not NaN + switch (u8 & 7) { + case 1: + BEQ_MARK(x3, xZR); + FLTD(x3, d0, d1); + break; // Less than + case 2: + BEQ_MARK(x3, xZR); + FLED(x3, d0, d1); + break; // Less or equal + case 3: XORI(x3, x3, 1); break; // NaN + case 5: { // Greater or equal or unordered + BEQ(x3, xZR, 12); // MARK2 + FLED(x3, d1, d0); + J(8); // MARK; + break; + } + case 6: { // Greater or unordered + BEQ(x3, xZR, 12); // MARK2 + FLTD(x3, d1, d0); + J(8); // MARK; + break; + } + case 7: break; // Not NaN } // MARK2; - if ((u8&7) == 5 || (u8&7) == 6) { + if ((u8 & 7) == 5 || (u8 & 7) == 6) { MOV32w(x3, 1); } // MARK; } NEG(x3, x3); - SD(x3, gback, gdoffset+8*i); + SD(x3, gback, gdoffset + 8 * i); } break; case 0xC4: @@ -2049,7 +2105,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETED(1); GETGX(); u8 = (F8)&7; - SH(ed, gback, gdoffset+u8*2); + SH(ed, gback, gdoffset + u8 * 2); break; case 0xC5: INST_NAME("PEXTRW Gd,Ex,Ib"); @@ -2057,7 +2113,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGD; GETEX(x1, 0); u8 = (F8)&7; - LHU(gd, wback, fixedaddress+u8*2); + LHU(gd, wback, fixedaddress + u8 * 2); break; case 0xC6: INST_NAME("SHUFPD Gx, Ex, Ib"); @@ -2065,15 +2121,15 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(); GETEX(x2, 1); u8 = F8; - if (MODREG && gd==(nextop&7)+(rex.b<<3) && u8==0) { - LD(x3, gback, gdoffset+0); - SD(x3, gback, gdoffset+8); + if (MODREG && gd == (nextop & 7) + (rex.b << 3) && u8 == 0) { + LD(x3, gback, gdoffset + 0); + SD(x3, gback, gdoffset + 8); break; } - LD(x3, gback, gdoffset+8*(u8&1)); - LD(x4, wback, fixedaddress+8*((u8>>1)&1)); - SD(x3, gback, gdoffset+0); - SD(x4, gback, gdoffset+8); + LD(x3, gback, gdoffset + 8 * (u8 & 1)); + LD(x4, wback, fixedaddress + 8 * ((u8 >> 1) & 1)); + SD(x3, gback, gdoffset + 0); + SD(x4, gback, gdoffset + 8); break; case 0xD1: INST_NAME("PSRLW Gx,Ex"); @@ -2083,14 +2139,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LD(x3, wback, fixedaddress); ADDI(x4, xZR, 16); BLTU_MARK(x3, x4); - SD(xZR, gback, gdoffset+0); - SD(xZR, gback, gdoffset+8); + SD(xZR, gback, gdoffset + 0); + SD(xZR, gback, gdoffset + 8); B_NEXT_nocond; MARK; - for (int i=0; i<8; ++i) { - LHU(x5, gback, gdoffset+2*i); + for (int i = 0; i < 8; ++i) { + LHU(x5, gback, gdoffset + 2 * i); SRLW(x5, x5, x3); - SH(x5, gback, gdoffset+2*i); + SH(x5, gback, gdoffset + 2 * i); } break; case 0xD2: @@ -2101,14 +2157,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LD(x3, wback, fixedaddress); ADDI(x4, xZR, 32); BLTU_MARK(x3, x4); - SD(xZR, gback, gdoffset+0); - SD(xZR, gback, gdoffset+8); + SD(xZR, gback, gdoffset + 0); + SD(xZR, gback, gdoffset + 8); B_NEXT_nocond; MARK; - for (int i=0; i<4; ++i) { - LWU(x5, gback, gdoffset+4*i); + for (int i = 0; i < 4; ++i) { + LWU(x5, gback, gdoffset + 4 * i); SRLW(x5, x5, x3); - SW(x5, gback, gdoffset+4*i); + SW(x5, gback, gdoffset + 4 * i); } break; case 0xD3: @@ -2119,14 +2175,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LD(x3, wback, fixedaddress); ADDI(x4, xZR, 64); BLTU_MARK(x3, x4); - SD(xZR, gback, gdoffset+0); - SD(xZR, gback, gdoffset+8); + SD(xZR, gback, gdoffset + 0); + SD(xZR, gback, gdoffset + 8); B_NEXT_nocond; MARK; - for (int i=0; i<2; ++i) { - LD(x5, gback, gdoffset+8*i); + for (int i = 0; i < 2; ++i) { + LD(x5, gback, gdoffset + 8 * i); SRL(x5, x5, x3); - SD(x5, gback, gdoffset+8*i); + SD(x5, gback, gdoffset + 8 * i); } break; case 0xD4: @@ -2141,11 +2197,11 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<8; ++i) { - LH(x3, gback, gdoffset+2*i); - LH(x4, wback, fixedaddress+2*i); + for (int i = 0; i < 8; ++i) { + LH(x3, gback, gdoffset + 2 * i); + LH(x4, wback, fixedaddress + 2 * i); MULW(x3, x3, x4); - SH(x3, gback, gdoffset+2*i); + SH(x3, gback, gdoffset + 2 * i); } break; case 0xD6: @@ -2153,9 +2209,9 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGXSD(d0); GETEX(x2, 0); - FSD(d0, wback, fixedaddress+0); + FSD(d0, wback, fixedaddress + 0); if (MODREG) { - SD(xZR, wback, fixedaddress+8); + SD(xZR, wback, fixedaddress + 8); } else { SMWRITE2(); } @@ -2166,8 +2222,8 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x2, 0); GETGD; MV(gd, xZR); - for (int i=0; i<16; ++i) { - LB(x1, wback, fixedaddress+i); + for (int i = 0; i < 16; ++i) { + LB(x1, wback, fixedaddress + i); SLT(x3, x1, xZR); if (i > 0) SLLI(x3, x3, i); OR(gd, gd, x3); @@ -2178,14 +2234,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<16; ++i) { - LBU(x3, gback, gdoffset+i); - LBU(x4, wback, fixedaddress+i); + for (int i = 0; i < 16; ++i) { + LBU(x3, gback, gdoffset + i); + LBU(x4, wback, fixedaddress + i); SUB(x3, x3, x4); NOT(x4, x3); SRAI(x4, x4, 63); AND(x3, x3, x4); - SB(x3, gback, gdoffset+i); + SB(x3, gback, gdoffset + i); } break; case 0xD9: @@ -2200,12 +2256,12 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for (int i=0; i<16; ++i) { - LBU(x3, gback, gdoffset+i); - LBU(x4, wback, fixedaddress+i); + for (int i = 0; i < 16; ++i) { + LBU(x3, gback, gdoffset + i); + LBU(x4, wback, fixedaddress + i); BLTU(x3, x4, 8); MV(x3, x4); - SB(x3, gback, gdoffset+i); + SB(x3, gback, gdoffset + i); } break; case 0xDB: @@ -2221,13 +2277,13 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(); GETEX(x2, 0); ADDI(x5, xZR, 0xFF); - for(int i=0; i<16; ++i) { - LBU(x3, gback, gdoffset+i); - LBU(x4, wback, fixedaddress+i); + for (int i = 0; i < 16; ++i) { + LBU(x3, gback, gdoffset + i); + LBU(x4, wback, fixedaddress + i); ADD(x3, x3, x4); BLT(x3, x5, 8); ADDI(x3, xZR, 0xFF); - SB(x3, gback, gdoffset+i); + SB(x3, gback, gdoffset + i); } break; case 0xDD: @@ -2235,16 +2291,16 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<8; ++i) { + for (int i = 0; i < 8; ++i) { // tmp32s = (int32_t)GX->uw[i] + EX->uw[i]; // GX->uw[i] = (tmp32s>65535)?65535:tmp32s; - LHU(x3, gback, gdoffset+i*2); - LHU(x4, wback, fixedaddress+i*2); + LHU(x3, gback, gdoffset + i * 2); + LHU(x4, wback, fixedaddress + i * 2); ADDW(x3, x3, x4); MOV32w(x4, 65536); BLT(x3, x4, 8); ADDIW(x3, x4, -1); - SH(x3, gback, gdoffset+i*2); + SH(x3, gback, gdoffset + i * 2); } break; case 0xDE: @@ -2252,12 +2308,12 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for (int i=0; i<16; ++i) { - LBU(x3, gback, gdoffset+i); - LBU(x4, wback, fixedaddress+i); + for (int i = 0; i < 16; ++i) { + LBU(x3, gback, gdoffset + i); + LBU(x4, wback, fixedaddress + i); BLTU(x4, x3, 8); MV(x3, x4); - SB(x3, gback, gdoffset+i); + SB(x3, gback, gdoffset + i); } break; case 0xDF: @@ -2267,18 +2323,18 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x2, 0); SSE_LOOP_Q(x3, x4, NOT(x3, x3); AND(x3, x3, x4)); break; - case 0xE0: + case 0xE0: INST_NAME("PAVGB Gx, Ex"); nextop = F8; GETGX(); GETEX(x2, 0); - for (int i=0; i<16; ++i) { - LBU(x3, gback, gdoffset+i); - LBU(x4, wback, fixedaddress+i); + for (int i = 0; i < 16; ++i) { + LBU(x3, gback, gdoffset + i); + LBU(x4, wback, fixedaddress + i); ADDW(x3, x3, x4); ADDIW(x3, x3, 1); SRAIW(x3, x3, 1); - SB(x3, gback, gdoffset+i); + SB(x3, gback, gdoffset + i); } break; case 0xE1: @@ -2290,10 +2346,10 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LD(x3, wback, fixedaddress); BLTU(x3, x4, 8); SUBI(x3, x4, 1); - for (int i=0; i<8; ++i) { - LH(x4, gback, gdoffset+2*i); + for (int i = 0; i < 8; ++i) { + LH(x4, gback, gdoffset + 2 * i); SRAW(x4, x4, x3); - SH(x4, gback, gdoffset+2*i); + SH(x4, gback, gdoffset + 2 * i); } break; case 0xE2: @@ -2305,10 +2361,10 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LD(x3, wback, fixedaddress); BLTU(x3, x4, 8); SUBI(x3, x4, 1); - for (int i=0; i<4; ++i) { - LW(x4, gback, gdoffset+4*i); + for (int i = 0; i < 4; ++i) { + LW(x4, gback, gdoffset + 4 * i); SRAW(x4, x4, x3); - SW(x4, gback, gdoffset+4*i); + SW(x4, gback, gdoffset + 4 * i); } break; case 0xE3: @@ -2316,13 +2372,13 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for (int i=0; i<8; ++i) { - LHU(x3, gback, gdoffset+2*i); - LHU(x4, wback, fixedaddress+2*i); + for (int i = 0; i < 8; ++i) { + LHU(x3, gback, gdoffset + 2 * i); + LHU(x4, wback, fixedaddress + 2 * i); ADDW(x3, x3, x4); ADDIW(x3, x3, 1); SRAIW(x3, x3, 1); - SH(x3, gback, gdoffset+2*i); + SH(x3, gback, gdoffset + 2 * i); } break; case 0xE4: @@ -2330,12 +2386,12 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<8; ++i) { - LHU(x3, gback, gdoffset+2*i); - LHU(x4, wback, fixedaddress+2*i); + for (int i = 0; i < 8; ++i) { + LHU(x3, gback, gdoffset + 2 * i); + LHU(x4, wback, fixedaddress + 2 * i); MULW(x3, x3, x4); SRLIW(x3, x3, 16); - SH(x3, gback, gdoffset+2*i); + SH(x3, gback, gdoffset + 2 * i); } break; case 0xE5: @@ -2343,12 +2399,12 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<8; ++i) { - LH(x3, gback, gdoffset+2*i); - LH(x4, wback, fixedaddress+2*i); + for (int i = 0; i < 8; ++i) { + LH(x3, gback, gdoffset + 2 * i); + LH(x4, wback, fixedaddress + 2 * i); MULW(x3, x3, x4); SRAIW(x3, x3, 16); - SH(x3, gback, gdoffset+2*i); + SH(x3, gback, gdoffset + 2 * i); } break; case 0xE6: @@ -2358,31 +2414,31 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x2, 0); v0 = fpu_get_scratch(dyn); v1 = fpu_get_scratch(dyn); - FLD(v0, wback, fixedaddress+0); - FLD(v1, wback, fixedaddress+8); - if(!box64_dynarec_fastround) { - FSFLAGSI(0); // // reset all bits + FLD(v0, wback, fixedaddress + 0); + FLD(v1, wback, fixedaddress + 8); + if (!box64_dynarec_fastround) { + FSFLAGSI(0); // // reset all bits } FCVTWD(x3, v0, RD_RTZ); - if(!box64_dynarec_fastround) { - FRFLAGS(x5); // get back FPSR to check the IOC bit - ANDI(x5, x5, (1<<FR_NV)|(1<<FR_OF)); + if (!box64_dynarec_fastround) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, (1 << FR_NV) | (1 << FR_OF)); BEQ_MARK(x5, xZR); MOV32w(x3, 0x80000000); MARK; - FSFLAGSI(0); // // reset all bits + FSFLAGSI(0); // // reset all bits } FCVTWD(x4, v1, RD_RTZ); - if(!box64_dynarec_fastround) { - FRFLAGS(x5); // get back FPSR to check the IOC bit - ANDI(x5, x5, (1<<FR_NV)|(1<<FR_OF)); + if (!box64_dynarec_fastround) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, (1 << FR_NV) | (1 << FR_OF)); BEQ_MARK2(x5, xZR); MOV32w(x4, 0x80000000); MARK2; } - SW(x3, gback, gdoffset+0); - SW(x4, gback, gdoffset+4); - SD(xZR, gback, gdoffset+8); + SW(x3, gback, gdoffset + 0); + SW(x4, gback, gdoffset + 4); + SD(xZR, gback, gdoffset + 8); break; case 0xE7: INST_NAME("MOVNTDQ Ex, Gx"); @@ -2396,23 +2452,23 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<16; ++i) { + for (int i = 0; i < 16; ++i) { // tmp16s = (int16_t)GX->sb[i] - EX->sb[i]; // GX->sb[i] = (tmp16s<-128)?-128:((tmp16s>127)?127:tmp16s); - LB(x3, gback, gdoffset+i); - LB(x4, wback, fixedaddress+i); + LB(x3, gback, gdoffset + i); + LB(x4, wback, fixedaddress + i); SUBW(x3, x3, x4); SLLIW(x3, x3, 16); SRAIW(x3, x3, 16); ADDI(x4, xZR, 0x7f); - BLT(x3, x4, 12); // tmp16s>127? - SB(x4, gback, gdoffset+i); - J(24); // continue + BLT(x3, x4, 12); // tmp16s>127? + SB(x4, gback, gdoffset + i); + J(24); // continue ADDI(x4, xZR, 0xf80); - BLT(x4, x3, 12); // tmp16s<-128? - SB(x4, gback, gdoffset+i); - J(8); // continue - SB(x3, gback, gdoffset+i); + BLT(x4, x3, 12); // tmp16s<-128? + SB(x4, gback, gdoffset + i); + J(8); // continue + SB(x3, gback, gdoffset + i); } break; case 0xE9: @@ -2420,20 +2476,20 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<8; ++i) { + for (int i = 0; i < 8; ++i) { // tmp32s = (int32_t)GX->sw[i] - EX->sw[i]; // GX->sw[i] = (tmp32s>32767)?32767:((tmp32s<-32768)?-32768:tmp32s); - LH(x3, gback, gdoffset+2*i); - LH(x4, wback, fixedaddress+2*i); + LH(x3, gback, gdoffset + 2 * i); + LH(x4, wback, fixedaddress + 2 * i); SUBW(x3, x3, x4); LUI(x4, 0xFFFF8); // -32768 BGE(x3, x4, 12); - SH(x4, gback, gdoffset+2*i); - J(20); // continue + SH(x4, gback, gdoffset + 2 * i); + J(20); // continue LUI(x4, 8); // 32768 BLT(x3, x4, 8); ADDIW(x3, x4, -1); - SH(x3, gback, gdoffset+2*i); + SH(x3, gback, gdoffset + 2 * i); } break; case 0xEA: @@ -2441,12 +2497,12 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for (int i=0; i<8; ++i) { - LH(x3, gback, gdoffset+2*i); - LH(x4, wback, fixedaddress+2*i); + for (int i = 0; i < 8; ++i) { + LH(x3, gback, gdoffset + 2 * i); + LH(x4, wback, fixedaddress + 2 * i); BLT(x3, x4, 8); MV(x3, x4); - SH(x3, gback, gdoffset+2*i); + SH(x3, gback, gdoffset + 2 * i); } break; case 0xEB: @@ -2461,23 +2517,23 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<16; ++i) { + for (int i = 0; i < 16; ++i) { // tmp16s = (int16_t)GX->sb[i] + EX->sb[i]; // GX->sb[i] = (tmp16s>127)?127:((tmp16s<-128)?-128:tmp16s); - LB(x3, gback, gdoffset+i); - LB(x4, wback, fixedaddress+i); + LB(x3, gback, gdoffset + i); + LB(x4, wback, fixedaddress + i); ADDW(x3, x3, x4); SLLIW(x3, x3, 16); SRAIW(x3, x3, 16); ADDI(x4, xZR, 0x7f); - BLT(x3, x4, 12); // tmp16s>127? - SB(x4, gback, gdoffset+i); - J(24); // continue + BLT(x3, x4, 12); // tmp16s>127? + SB(x4, gback, gdoffset + i); + J(24); // continue ADDI(x4, xZR, 0xf80); - BLT(x4, x3, 12); // tmp16s<-128? - SB(x4, gback, gdoffset+i); - J(8); // continue - SB(x3, gback, gdoffset+i); + BLT(x4, x3, 12); // tmp16s<-128? + SB(x4, gback, gdoffset + i); + J(8); // continue + SB(x3, gback, gdoffset + i); } break; case 0xED: @@ -2485,20 +2541,20 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<8; ++i) { + for (int i = 0; i < 8; ++i) { // tmp32s = (int32_t)GX->sw[i] + EX->sw[i]; // GX->sw[i] = (tmp32s>32767)?32767:((tmp32s<-32768)?-32768:tmp32s); - LH(x3, gback, gdoffset+2*i); - LH(x4, wback, fixedaddress+2*i); + LH(x3, gback, gdoffset + 2 * i); + LH(x4, wback, fixedaddress + 2 * i); ADDW(x3, x3, x4); LUI(x4, 0xFFFF8); // -32768 BGE(x3, x4, 12); - SH(x4, gback, gdoffset+2*i); - J(20); // continue + SH(x4, gback, gdoffset + 2 * i); + J(20); // continue LUI(x4, 8); // 32768 BLT(x3, x4, 8); ADDIW(x3, x4, -1); - SH(x3, gback, gdoffset+2*i); + SH(x3, gback, gdoffset + 2 * i); } break; case 0xEE: @@ -2512,11 +2568,10 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("PXOR Gx, Ex"); nextop = F8; GETGX(); - if(MODREG && gd==(nextop&7)+(rex.b<<3)) - { + if (MODREG && gd == (nextop & 7) + (rex.b << 3)) { // just zero dest - SD(xZR, gback, gdoffset+0); - SD(xZR, gback, gdoffset+8); + SD(xZR, gback, gdoffset + 0); + SD(xZR, gback, gdoffset + 8); } else { GETEX(x2, 0); SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); @@ -2528,17 +2583,17 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(); GETEX(x2, 0); ADDI(x4, xZR, 16); - LD(x3, wback, fixedaddress+0); + LD(x3, wback, fixedaddress + 0); BLTU_MARK(x3, x4); // just zero dest - SD(xZR, gback, gdoffset+0); - SD(xZR, gback, gdoffset+8); + SD(xZR, gback, gdoffset + 0); + SD(xZR, gback, gdoffset + 8); B_NEXT_nocond; MARK; - for (int i=0; i<8; ++i) { - LHU(x4, gback, gdoffset+2*i); + for (int i = 0; i < 8; ++i) { + LHU(x4, gback, gdoffset + 2 * i); SLLW(x4, x4, x3); - SH(x4, gback, gdoffset+2*i); + SH(x4, gback, gdoffset + 2 * i); } break; case 0xF2: @@ -2547,17 +2602,17 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(); GETEX(x2, 0); ADDI(x4, xZR, 32); - LD(x3, wback, fixedaddress+0); + LD(x3, wback, fixedaddress + 0); BLTU_MARK(x3, x4); // just zero dest - SD(xZR, gback, gdoffset+0); - SD(xZR, gback, gdoffset+8); + SD(xZR, gback, gdoffset + 0); + SD(xZR, gback, gdoffset + 8); B_NEXT_nocond; MARK; - for (int i=0; i<4; ++i) { - LWU(x4, gback, gdoffset+4*i); + for (int i = 0; i < 4; ++i) { + LWU(x4, gback, gdoffset + 4 * i); SLLW(x4, x4, x3); - SW(x4, gback, gdoffset+4*i); + SW(x4, gback, gdoffset + 4 * i); } break; case 0xF3: @@ -2566,17 +2621,17 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(); GETEX(x2, 0); ADDI(x4, xZR, 64); - LD(x3, wback, fixedaddress+0); + LD(x3, wback, fixedaddress + 0); BLTU_MARK(x3, x4); // just zero dest - SD(xZR, gback, gdoffset+0); - SD(xZR, gback, gdoffset+8); + SD(xZR, gback, gdoffset + 0); + SD(xZR, gback, gdoffset + 8); B_NEXT_nocond; MARK; - for (int i=0; i<2; ++i) { - LD(x4, gback, gdoffset+8*i); + for (int i = 0; i < 2; ++i) { + LD(x4, gback, gdoffset + 8 * i); SLL(x4, x4, x3); - SD(x4, gback, gdoffset+8*i); + SD(x4, gback, gdoffset + 8 * i); } break; case 0xF4: @@ -2585,32 +2640,32 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(); GETEX(x2, 0); // GX->q[1] = (uint64_t)EX->ud[2]*GX->ud[2]; - LWU(x3, gback, gdoffset+2*4); - LWU(x4, wback, fixedaddress+2*4); + LWU(x3, gback, gdoffset + 2 * 4); + LWU(x4, wback, fixedaddress + 2 * 4); MUL(x3, x3, x4); - SD(x3, gback, gdoffset+8); + SD(x3, gback, gdoffset + 8); // GX->q[0] = (uint64_t)EX->ud[0]*GX->ud[0]; - LWU(x3, gback, gdoffset+0*4); - LWU(x4, wback, fixedaddress+0*4); + LWU(x3, gback, gdoffset + 0 * 4); + LWU(x4, wback, fixedaddress + 0 * 4); MUL(x3, x3, x4); - SD(x3, gback, gdoffset+0); + SD(x3, gback, gdoffset + 0); break; case 0xF5: INST_NAME("PMADDWD Gx, Ex"); nextop = F8; GETGX(); GETEX(x2, 0); - for (int i=0; i<4; ++i) { + for (int i = 0; i < 4; ++i) { // GX->sd[i] = (int32_t)(GX->sw[i*2+0])*EX->sw[i*2+0] + // (int32_t)(GX->sw[i*2+1])*EX->sw[i*2+1]; - LH(x3, gback, gdoffset+2*(i*2+0)); - LH(x4, wback, fixedaddress+2*(i*2+0)); + LH(x3, gback, gdoffset + 2 * (i * 2 + 0)); + LH(x4, wback, fixedaddress + 2 * (i * 2 + 0)); MULW(x5, x3, x4); - LH(x3, gback, gdoffset+2*(i*2+1)); - LH(x4, wback, fixedaddress+2*(i*2+1)); + LH(x3, gback, gdoffset + 2 * (i * 2 + 1)); + LH(x4, wback, fixedaddress + 2 * (i * 2 + 1)); MULW(x6, x3, x4); ADDW(x5, x5, x6); - SW(x5, gback, gdoffset+4*i); + SW(x5, gback, gdoffset + 4 * i); } break; case 0xF6: @@ -2619,18 +2674,18 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(); GETEX(x2, 0); MV(x6, xZR); - for (int i=0; i<16; ++i) { - LBU(x3, gback, gdoffset+i); - LBU(x4, wback, fixedaddress+i); + for (int i = 0; i < 16; ++i) { + LBU(x3, gback, gdoffset + i); + LBU(x4, wback, fixedaddress + i); SUBW(x3, x3, x4); SRAIW(x5, x3, 31); XOR(x3, x5, x3); SUBW(x3, x3, x5); ANDI(x3, x3, 0xff); ADDW(x6, x6, x3); - if (i==7 || i == 15) { - SD(x6, gback, gdoffset+i+1-8); - if (i==7) MV(x6, xZR); + if (i == 7 || i == 15) { + SD(x6, gback, gdoffset + i + 1 - 8); + if (i == 7) MV(x6, xZR); } } break; @@ -2639,12 +2694,12 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<16; ++i) { + for (int i = 0; i < 16; ++i) { // GX->sb[i] -= EX->sb[i]; - LB(x3, wback, fixedaddress+i); - LB(x4, gback, gdoffset+i); + LB(x3, wback, fixedaddress + i); + LB(x4, gback, gdoffset + i); SUB(x3, x4, x3); - SB(x3, gback, gdoffset+i); + SB(x3, gback, gdoffset + i); } break; case 0xF9: @@ -2673,12 +2728,12 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0); - for(int i=0; i<16; ++i) { + for (int i = 0; i < 16; ++i) { // GX->sb[i] += EX->sb[i]; - LB(x3, gback, gdoffset+i); - LB(x4, wback, fixedaddress+i); + LB(x3, gback, gdoffset + i); + LB(x4, wback, fixedaddress + i); ADDW(x3, x3, x4); - SB(x3, gback, gdoffset+i); + SB(x3, gback, gdoffset + i); } break; case 0xFD: diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c index 0c0676e0..9007e46e 100644 --- a/src/dynarec/rv64/dynarec_rv64_f30f.c +++ b/src/dynarec/rv64/dynarec_rv64_f30f.c @@ -24,7 +24,8 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog) { - (void)ip; (void)need_epilog; + (void)ip; + (void)need_epilog; uint8_t opcode = F8; uint8_t nextop, u8; @@ -46,14 +47,14 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int MAYUSE(v1); MAYUSE(j64); - switch(opcode) { + switch (opcode) { case 0x10: INST_NAME("MOVSS Gx, Ex"); nextop = F8; GETG; - if(MODREG) { + if (MODREG) { v0 = sse_get_reg(dyn, ninst, x1, gd, 1); - q0 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3), 1); + q0 = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 1); FMVS(v0, q0); } else { v0 = sse_get_reg_empty(dyn, ninst, x1, gd, 1); @@ -61,8 +62,8 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 8, 0); FLW(v0, ed, fixedaddress); // reset upper part - SW(xZR, xEmu, offsetof(x64emu_t, xmm[gd])+4); - SD(xZR, xEmu, offsetof(x64emu_t, xmm[gd])+8); + SW(xZR, xEmu, offsetof(x64emu_t, xmm[gd]) + 4); + SD(xZR, xEmu, offsetof(x64emu_t, xmm[gd]) + 8); } break; case 0x11: @@ -70,8 +71,8 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETG; v0 = sse_get_reg(dyn, ninst, x1, gd, 1); - if(MODREG) { - q0 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3), 1); + if (MODREG) { + q0 = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 1); FMVS(q0, v0); } else { addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); @@ -88,12 +89,12 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int // GX->ud[1] = GX->ud[0] = EX->ud[0]; // GX->ud[3] = GX->ud[2] = EX->ud[2]; - LD(x3, wback, fixedaddress+0); - SD(x3, gback, gdoffset+0); - SD(x3, gback, gdoffset+4); - LD(x3, wback, fixedaddress+8); - SD(x3, gback, gdoffset+8); - SD(x3, gback, gdoffset+12); + LD(x3, wback, fixedaddress + 0); + SD(x3, gback, gdoffset + 0); + SD(x3, gback, gdoffset + 4); + LD(x3, wback, fixedaddress + 8); + SD(x3, gback, gdoffset + 8); + SD(x3, gback, gdoffset + 12); break; case 0x1E: INST_NAME("NOP / ENDBR32 / ENDBR64"); @@ -106,7 +107,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGXSS(v0); GETED(0); - if(rex.w) { + if (rex.w) { FCVTSL(v0, ed, RD_RNE); } else { FCVTSW(v0, ed, RD_RNE); @@ -118,17 +119,17 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGD; GETEXSS(d0, 0); - if(!box64_dynarec_fastround) { - FSFLAGSI(0); // // reset all bits + if (!box64_dynarec_fastround) { + FSFLAGSI(0); // // reset all bits } FCVTSxw(gd, d0, RD_RTZ); - if(!rex.w) + if (!rex.w) ZEROUP(gd); - if(!box64_dynarec_fastround) { - FRFLAGS(x5); // get back FPSR to check the IOC bit - ANDI(x5, x5, (1<<FR_NV)|(1<<FR_OF)); + if (!box64_dynarec_fastround) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, (1 << FR_NV) | (1 << FR_OF)); CBZ_NEXT(x5); - if(rex.w) { + if (rex.w) { MOV64x(gd, 0x8000000000000000LL); } else { MOV32w(gd, 0x80000000); @@ -140,19 +141,19 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGD; GETEXSS(d0, 0); - if(!box64_dynarec_fastround) { - FSFLAGSI(0); // // reset all bits + if (!box64_dynarec_fastround) { + FSFLAGSI(0); // // reset all bits } u8 = sse_setround(dyn, ninst, x5, x6); FCVTSxw(gd, d0, RD_DYN); x87_restoreround(dyn, ninst, u8); - if(!rex.w) + if (!rex.w) ZEROUP(gd); - if(!box64_dynarec_fastround) { - FRFLAGS(x5); // get back FPSR to check the IOC bit - ANDI(x5, x5, (1<<FR_NV)|(1<<FR_OF)); + if (!box64_dynarec_fastround) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, (1 << FR_NV) | (1 << FR_OF)); CBZ_NEXT(x5); - if(rex.w) { + if (rex.w) { MOV64x(gd, 0x8000000000000000LL); } else { MOV32w(gd, 0x80000000); @@ -257,31 +258,31 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int u8 = F8; int32_t idx; - idx = 4+((u8>>(0*2))&3); - LHU(x3, wback, fixedaddress+idx*2); - idx = 4+((u8>>(1*2))&3); - LHU(x4, wback, fixedaddress+idx*2); - idx = 4+((u8>>(2*2))&3); - LHU(x5, wback, fixedaddress+idx*2); - idx = 4+((u8>>(3*2))&3); - LHU(x6, wback, fixedaddress+idx*2); + idx = 4 + ((u8 >> (0 * 2)) & 3); + LHU(x3, wback, fixedaddress + idx * 2); + idx = 4 + ((u8 >> (1 * 2)) & 3); + LHU(x4, wback, fixedaddress + idx * 2); + idx = 4 + ((u8 >> (2 * 2)) & 3); + LHU(x5, wback, fixedaddress + idx * 2); + idx = 4 + ((u8 >> (3 * 2)) & 3); + LHU(x6, wback, fixedaddress + idx * 2); - SH(x3, gback, gdoffset+(4+0)*2); - SH(x4, gback, gdoffset+(4+1)*2); - SH(x5, gback, gdoffset+(4+2)*2); - SH(x6, gback, gdoffset+(4+3)*2); + SH(x3, gback, gdoffset + (4 + 0) * 2); + SH(x4, gback, gdoffset + (4 + 1) * 2); + SH(x5, gback, gdoffset + (4 + 2) * 2); + SH(x6, gback, gdoffset + (4 + 3) * 2); - if (!(MODREG && (gd==ed))) { - LD(x3, wback, fixedaddress+0); - SD(x3, gback, gdoffset+0); + if (!(MODREG && (gd == ed))) { + LD(x3, wback, fixedaddress + 0); + SD(x3, gback, gdoffset + 0); } break; case 0x7E: INST_NAME("MOVQ Gx, Ex"); nextop = F8; // Will load Gx as SD. Is that a good choice? - if(MODREG) { - v1 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3), 0); + if (MODREG) { + v1 = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0); GETGXSD_empty(v0); FMVD(v0, v1); } else { @@ -290,7 +291,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); FLD(v0, ed, fixedaddress); } - SD(xZR, xEmu, offsetof(x64emu_t, xmm[gd])+8); + SD(xZR, xEmu, offsetof(x64emu_t, xmm[gd]) + 8); break; case 0x7F: INST_NAME("MOVDQU Ex,Gx"); @@ -298,7 +299,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(); GETEX(x2, 0); SSE_LOOP_MV_Q2(x3); - if(!MODREG) SMWRITE2(); + if (!MODREG) SMWRITE2(); break; case 0x5B: @@ -307,19 +308,19 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGX(); GETEX(x2, 0); v0 = fpu_get_scratch(dyn); - for(int i=0; i<4; ++i) { - if(!box64_dynarec_fastround) { + for (int i = 0; i < 4; ++i) { + if (!box64_dynarec_fastround) { FSFLAGSI(0); // reset all bits } - FLW(v0, wback, fixedaddress+i*4); + FLW(v0, wback, fixedaddress + i * 4); FCVTWS(x3, v0, RD_RTZ); - if(!box64_dynarec_fastround) { - FRFLAGS(x5); // get back FPSR to check the IOC bit - ANDI(x5, x5, (1<<FR_NV)|(1<<FR_OF)); + if (!box64_dynarec_fastround) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, (1 << FR_NV) | (1 << FR_OF)); BEQZ(x5, 8); MOV32w(x3, 0x80000000); } - SW(x3, gback, gdoffset+i*4); + SW(x3, gback, gdoffset + i * 4); } break; case 0xB8: @@ -329,17 +330,17 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETED(0); GETGD; - if(!rex.w && MODREG) { + if (!rex.w && MODREG) { AND(x4, ed, xMASK); ed = x4; } CLEAR_FLAGS(); BNE_MARK(ed, xZR); - ORI(xFlags, xFlags, 1<<F_ZF); + ORI(xFlags, xFlags, 1 << F_ZF); MOV32w(gd, 0); B_NEXT_nocond; MARK; - if(rv64_zbb) { + if (rv64_zbb) { CPOPxw(gd, ed); } else { TABLE64(x1, 0x5555555555555555uLL); @@ -371,75 +372,77 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETED(0); GETGD; - if(!rex.w && MODREG) { + if (!rex.w && MODREG) { AND(x4, ed, xMASK); ed = x4; } - ANDI(xFlags, xFlags, ~((1<<F_ZF) | (1<<F_CF))); + ANDI(xFlags, xFlags, ~((1 << F_ZF) | (1 << F_CF))); BNE_MARK(ed, xZR); - ORI(xFlags, xFlags, 1<<F_CF); - MOV32w(gd, rex.w?64:32); + ORI(xFlags, xFlags, 1 << F_CF); + MOV32w(gd, rex.w ? 64 : 32); B_NEXT_nocond; MARK; - if(rv64_zbb) { + if (rv64_zbb) { CTZxw(gd, ed); } else { NEG(x2, ed); AND(x2, x2, ed); TABLE64(x3, 0x03f79d71b4ca8b09ULL); MUL(x2, x2, x3); - SRLI(x2, x2, 64-6); + SRLI(x2, x2, 64 - 6); TABLE64(x1, (uintptr_t)&deBruijn64tab); ADD(x1, x1, x2); LBU(gd, x1, 0); } - BNE(gd, xZR, 4+4); - ORI(xFlags, xFlags, 1<<F_ZF); + BNE(gd, xZR, 4 + 4); + ORI(xFlags, xFlags, 1 << F_ZF); break; case 0xBD: INST_NAME("LZCNT Gd, Ed"); - SETFLAGS(X_ZF|X_CF, SF_SUBSET); + SETFLAGS(X_ZF | X_CF, SF_SUBSET); SET_DFNONE(); nextop = F8; GETED(0); GETGD; - if(!rex.w && MODREG) { + if (!rex.w && MODREG) { AND(x4, ed, xMASK); ed = x4; } BNE_MARK(ed, xZR); - MOV32w(gd, rex.w?64:32); - ANDI(xFlags, xFlags, ~(1<<F_ZF)); - ORI(xFlags, xFlags, 1<<F_CF); + MOV32w(gd, rex.w ? 64 : 32); + ANDI(xFlags, xFlags, ~(1 << F_ZF)); + ORI(xFlags, xFlags, 1 << F_CF); B_NEXT_nocond; MARK; - if(rv64_zbb) { + if (rv64_zbb) { CLZxw(gd, ed); + } else if (rv64_xtheadbb) { + TH_FF0(gd, ed); } else { - if(ed!=gd) + if (ed != gd) u8 = gd; else u8 = x1; - ADDI(u8, xZR, rex.w?63:31); - if(rex.w) { + ADDI(u8, xZR, rex.w ? 63 : 31); + if (rex.w) { MV(x2, ed); SRLI(x3, x2, 32); - BEQZ(x3, 4+2*4); + BEQZ(x3, 4 + 2 * 4); SUBI(u8, u8, 32); MV(x2, x3); } else { AND(x2, ed, xMASK); } SRLI(x3, x2, 16); - BEQZ(x3, 4+2*4); + BEQZ(x3, 4 + 2 * 4); SUBI(u8, u8, 16); MV(x2, x3); SRLI(x3, x2, 8); - BEQZ(x3, 4+2*4); + BEQZ(x3, 4 + 2 * 4); SUBI(u8, u8, 8); MV(x2, x3); SRLI(x3, x2, 4); - BEQZ(x3, 4+2*4); + BEQZ(x3, 4 + 2 * 4); SUBI(u8, u8, 4); MV(x2, x3); ANDI(x2, x2, 0b1111); @@ -449,9 +452,9 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SUB(gd, u8, x2); MARK2; } - ANDI(xFlags, xFlags, ~((1<<F_ZF) | (1<<F_CF))); - BNE(gd, xZR, 4+4); - ORI(xFlags, xFlags, 1<<F_ZF); + ANDI(xFlags, xFlags, ~((1 << F_ZF) | (1 << F_CF))); + BNE(gd, xZR, 4 + 4); + ORI(xFlags, xFlags, 1 << F_ZF); break; case 0xC2: @@ -460,9 +463,9 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGXSS(d0); GETEXSS(d1, 1); u8 = F8; - if ((u8&7) == 0) { // Equal + if ((u8 & 7) == 0) { // Equal FEQS(x2, d0, d1); - } else if ((u8&7) == 4) { // Not Equal or unordered + } else if ((u8 & 7) == 4) { // Not Equal or unordered FEQS(x2, d0, d1); XORI(x2, x2, 1); } else { @@ -471,27 +474,33 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int FEQS(x2, d1, d1); AND(x2, x2, x3); - switch(u8&7) { - case 1: BEQ_MARK(x2, xZR); FLTS(x2, d0, d1); break; // Less than - case 2: BEQ_MARK(x2, xZR); FLES(x2, d0, d1); break; // Less or equal - case 3: XORI(x2, x2, 1); break; // NaN - case 5: { // Greater or equal or unordered - BEQ_MARK2(x2, xZR); - FLES(x2, d1, d0); - B_MARK_nocond; - break; - } - case 6: { // Greater or unordered, test inverted, N!=V so unordered or less than (inverted) - BEQ_MARK2(x2, xZR); - FLTS(x2, d1, d0); - B_MARK_nocond; - break; - } - case 7: break; // Not NaN + switch (u8 & 7) { + case 1: + BEQ_MARK(x2, xZR); + FLTS(x2, d0, d1); + break; // Less than + case 2: + BEQ_MARK(x2, xZR); + FLES(x2, d0, d1); + break; // Less or equal + case 3: XORI(x2, x2, 1); break; // NaN + case 5: { // Greater or equal or unordered + BEQ_MARK2(x2, xZR); + FLES(x2, d1, d0); + B_MARK_nocond; + break; + } + case 6: { // Greater or unordered, test inverted, N!=V so unordered or less than (inverted) + BEQ_MARK2(x2, xZR); + FLTS(x2, d1, d0); + B_MARK_nocond; + break; + } + case 7: break; // Not NaN } MARK2; - if ((u8&7) == 5 || (u8&7) == 6) { + if ((u8 & 7) == 5 || (u8 & 7) == 6) { MOV32w(x2, 1); } MARK; @@ -507,12 +516,12 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(x2, 0); q0 = fpu_get_scratch(dyn); q1 = fpu_get_scratch(dyn); - LW(x3, wback, fixedaddress+0); - LW(x4, wback, fixedaddress+4); + LW(x3, wback, fixedaddress + 0); + LW(x4, wback, fixedaddress + 4); FCVTDW(q0, x3, RD_RTZ); FCVTDW(q1, x4, RD_RTZ); - FSD(q0, gback, gdoffset+0); - FSD(q1, gback, gdoffset+8); + FSD(q0, gback, gdoffset + 0); + FSD(q1, gback, gdoffset + 8); break; default: diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index 1caebc90..63e62ad1 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -30,637 +30,829 @@ f18–27 fs2–11 FP saved registers Callee f28–31 ft8–11 FP temporaries Caller */ // x86 Register mapping -#define xRAX 16 -#define xRCX 17 -#define xRDX 18 -#define xRBX 19 -#define xRSP 20 -#define xRBP 21 -#define xRSI 22 -#define xRDI 23 -#define xR8 24 -#define xR9 25 -#define xR10 26 -#define xR11 27 -#define xR12 28 -#define xR13 29 -#define xR14 30 -#define xR15 31 -#define xFlags 8 -#define xRIP 7 +#define xRAX 16 +#define xRCX 17 +#define xRDX 18 +#define xRBX 19 +#define xRSP 20 +#define xRBP 21 +#define xRSI 22 +#define xRDI 23 +#define xR8 24 +#define xR9 25 +#define xR10 26 +#define xR11 27 +#define xR12 28 +#define xR13 29 +#define xR14 30 +#define xR15 31 +#define xFlags 8 +#define xRIP 7 // 32bits version -#define wEAX xRAX -#define wECX xRCX -#define wEDX xRDX -#define wEBX xRBX -#define wESP xRSP -#define wEBP xRBP -#define wESI xRSI -#define wEDI xRDI -#define wR8 xR8 -#define wR9 xR9 -#define wR10 xR10 -#define wR11 xR11 -#define wR12 xR12 -#define wR13 xR13 -#define wR14 xR14 -#define wR15 xR15 -#define wFlags xFlags +#define wEAX xRAX +#define wECX xRCX +#define wEDX xRDX +#define wEBX xRBX +#define wESP xRSP +#define wEBP xRBP +#define wESI xRSI +#define wEDI xRDI +#define wR8 xR8 +#define wR9 xR9 +#define wR10 xR10 +#define wR11 xR11 +#define wR12 xR12 +#define wR13 xR13 +#define wR14 xR14 +#define wR15 xR15 +#define wFlags xFlags // scratch registers -#define x1 11 -#define x2 12 -#define x3 13 -#define x4 14 -#define x5 15 -#define x6 6 -#define x9 9 +#define x1 11 +#define x2 12 +#define x3 13 +#define x4 14 +#define x5 15 +#define x6 6 +#define x9 9 // used to clear the upper 32bits -#define xMASK 5 +#define xMASK 5 // 32bits version of scratch -#define w1 x1 -#define w2 x2 -#define w3 x3 -#define w4 x4 -#define w5 x5 -#define w6 x6 +#define w1 x1 +#define w2 x2 +#define w3 x3 +#define w4 x4 +#define w5 x5 +#define w6 x6 // emu is r10 -#define xEmu 10 +#define xEmu 10 // RV64 RA -#define xRA 1 -#define xSP 2 +#define xRA 1 +#define xSP 2 // RV64 args -#define A0 10 -#define A1 11 -#define A2 12 -#define A3 13 -#define A4 14 -#define A5 15 -#define A6 16 -#define A7 17 +#define A0 10 +#define A1 11 +#define A2 12 +#define A3 13 +#define A4 14 +#define A5 15 +#define A6 16 +#define A7 17 // xZR reg is 0 -#define xZR 0 -#define wZR xZR +#define xZR 0 +#define wZR xZR // replacement for F_OF internaly, using a reserved bit. Need to use F_OF2 internaly, never F_OF directly! -#define F_OF2 F_res3 +#define F_OF2 F_res3 // split a 32bits value in 20bits + 12bits, adjust the upper part is 12bits is negative -#define SPLIT20(A) (((A)+0x800)>>12) -#define SPLIT12(A) ((A)&0xfff) +#define SPLIT20(A) (((A) + 0x800) >> 12) +#define SPLIT12(A) ((A)&0xfff) // MOV64x/MOV32w is quite complex, so use a function for this -#define MOV64x(A, B) rv64_move64(dyn, ninst, A, B) -#define MOV32w(A, B) rv64_move32(dyn, ninst, A, B, 1) -#define MOV64xw(A, B) if(rex.w) {MOV64x(A, B);} else {MOV32w(A, B);} -#define MOV64z(A, B) if(rex.is32bits) {MOV32w(A, B);} else {MOV64x(A, B);} +#define MOV64x(A, B) rv64_move64(dyn, ninst, A, B) +#define MOV32w(A, B) rv64_move32(dyn, ninst, A, B, 1) +#define MOV64xw(A, B) \ + if (rex.w) { \ + MOV64x(A, B); \ + } else { \ + MOV32w(A, B); \ + } +#define MOV64z(A, B) \ + if (rex.is32bits) { \ + MOV32w(A, B); \ + } else { \ + MOV64x(A, B); \ + } // ZERO the upper part -#define ZEROUP(r) AND(r, r, xMASK) +#define ZEROUP(r) AND(r, r, xMASK) -#define R_type(funct7, rs2, rs1, funct3, rd, opcode) ((funct7)<<25 | (rs2)<<20 | (rs1)<<15 | (funct3)<<12 | (rd)<<7 | (opcode)) -#define I_type(imm12, rs1, funct3, rd, opcode) ((imm12)<<20 | (rs1)<<15 | (funct3)<<12 | (rd)<<7 | (opcode)) -#define S_type(imm12, rs2, rs1, funct3, opcode) (((imm12)>>5)<<25 | (rs2)<<20 | (rs1)<<15 | (funct3)<<12 | ((imm12)&31)<<7 | (opcode)) -#define B_type(imm13, rs2, rs1, funct3, opcode) ((((imm13)>>12)&1)<<31 | (((imm13)>>5)&63)<<25 | (rs2)<<20 | (rs1)<<15 | (funct3)<<12 | (((imm13)>>1)&15)<<8 | (((imm13)>>11)&1)<<7 | (opcode)) -#define U_type(imm32, rd, opcode) (((imm32)>>12)<<12 | (rd)<<7 | (opcode)) -#define J_type(imm21, rd, opcode) ((((imm21)>>20)&1)<<31 | (((imm21)>>1)&0b1111111111)<<21 | (((imm21)>>11)&1)<<20 | (((imm21)>>12)&0b11111111)<<12 | (rd)<<7 | (opcode)) +#define R_type(funct7, rs2, rs1, funct3, rd, opcode) ((funct7) << 25 | (rs2) << 20 | (rs1) << 15 | (funct3) << 12 | (rd) << 7 | (opcode)) +#define I_type(imm12, rs1, funct3, rd, opcode) ((imm12) << 20 | (rs1) << 15 | (funct3) << 12 | (rd) << 7 | (opcode)) +#define S_type(imm12, rs2, rs1, funct3, opcode) (((imm12) >> 5) << 25 | (rs2) << 20 | (rs1) << 15 | (funct3) << 12 | ((imm12)&31) << 7 | (opcode)) +#define B_type(imm13, rs2, rs1, funct3, opcode) ((((imm13) >> 12) & 1) << 31 | (((imm13) >> 5) & 63) << 25 | (rs2) << 20 | (rs1) << 15 | (funct3) << 12 | (((imm13) >> 1) & 15) << 8 | (((imm13) >> 11) & 1) << 7 | (opcode)) +#define U_type(imm32, rd, opcode) (((imm32) >> 12) << 12 | (rd) << 7 | (opcode)) +#define J_type(imm21, rd, opcode) ((((imm21) >> 20) & 1) << 31 | (((imm21) >> 1) & 0b1111111111) << 21 | (((imm21) >> 11) & 1) << 20 | (((imm21) >> 12) & 0b11111111) << 12 | (rd) << 7 | (opcode)) // RV32I // put imm20 in the [31:12] bits of rd, zero [11:0] and sign extend bits31 -#define LUI(rd, imm20) EMIT(U_type((imm20)<<12, rd, 0b0110111)) +#define LUI(rd, imm20) EMIT(U_type((imm20) << 12, rd, 0b0110111)) // put PC+imm20 in rd -#define AUIPC(rd, imm20) EMIT(U_type((imm20)<<12, rd, 0b0010111)) +#define AUIPC(rd, imm20) EMIT(U_type((imm20) << 12, rd, 0b0010111)) -#define JAL_gen(rd, imm21) J_type(imm21, rd, 0b1101111) +#define JAL_gen(rd, imm21) J_type(imm21, rd, 0b1101111) // Unconditional branch, no return address set -#define B(imm21) EMIT(JAL_gen(xZR, imm21)) -#define B__(reg1, reg2, imm21) B(imm21) +#define B(imm21) EMIT(JAL_gen(xZR, imm21)) +#define B__(reg1, reg2, imm21) B(imm21) // Unconditional branch, return set to xRA -#define JAL(imm21) EMIT(JAL_gen(xRA, imm21)) +#define JAL(imm21) EMIT(JAL_gen(xRA, imm21)) // Unconditional branch, without link -#define J(imm21) EMIT(JAL_gen(xZR, imm21)) +#define J(imm21) EMIT(JAL_gen(xZR, imm21)) -#define JALR_gen(rd, rs1, imm12) I_type(imm12, rs1, 0b000, rd, 0b1100111) +#define JALR_gen(rd, rs1, imm12) I_type(imm12, rs1, 0b000, rd, 0b1100111) // Unconditionnal branch to r, no return address set -#define BR(r) EMIT(JALR_gen(xZR, r, 0)) +#define BR(r) EMIT(JALR_gen(xZR, r, 0)) // Unconditionnal branch to r+i12, no return address set -#define BR_I12(r, imm12) EMIT(JALR_gen(xZR, r, (imm12)&0b111111111111)) +#define BR_I12(r, imm12) EMIT(JALR_gen(xZR, r, (imm12)&0b111111111111)) // Unconditionnal branch to r, return address set to xRA -#define JALR(r) EMIT(JALR_gen(xRA, r, 0)) +#define JALR(r) EMIT(JALR_gen(xRA, r, 0)) // Unconditionnal branch to r+i12, return address set to xRA -#define JALR_I12(r, imm12) EMIT(JALR_gen(xRA, r, (imm12)&0b111111111111)) +#define JALR_I12(r, imm12) EMIT(JALR_gen(xRA, r, (imm12)&0b111111111111)) // rd = rs1 + imm12 -#define ADDI(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b000, rd, 0b0010011)) +#define ADDI(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b000, rd, 0b0010011)) // rd = rs1 - imm12 (pseudo instruction) -#define SUBI(rd, rs1, imm12) EMIT(I_type((-(imm12))&0b111111111111, rs1, 0b000, rd, 0b0010011)) +#define SUBI(rd, rs1, imm12) EMIT(I_type((-(imm12)) & 0b111111111111, rs1, 0b000, rd, 0b0010011)) // rd = (rs1<imm12)?1:0 -#define SLTI(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b010, rd, 0b0010011)) +#define SLTI(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b010, rd, 0b0010011)) // rd = (rs1<imm12)?1:0 unsigned -#define SLTIU(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b011, rd, 0b0010011)) +#define SLTIU(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b011, rd, 0b0010011)) // rd = rs1 ^ imm12 -#define XORI(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b100, rd, 0b0010011)) +#define XORI(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b100, rd, 0b0010011)) // rd = rs1 | imm12 -#define ORI(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b110, rd, 0b0010011)) +#define ORI(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b110, rd, 0b0010011)) // rd = rs1 & imm12 -#define ANDI(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b111, rd, 0b0010011)) +#define ANDI(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b111, rd, 0b0010011)) // rd = imm12 -#define MOV_U12(rd, imm12) ADDI(rd, xZR, imm12) +#define MOV_U12(rd, imm12) ADDI(rd, xZR, imm12) // nop -#define NOP() ADDI(xZR, xZR, 0) +#define NOP() ADDI(xZR, xZR, 0) // rd = rs1 + rs2 -#define ADD(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b000, rd, 0b0110011)) +#define ADD(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b000, rd, 0b0110011)) // rd = rs1 + rs2 -#define ADDW(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b000, rd, 0b0111011)) +#define ADDW(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b000, rd, 0b0111011)) // rd = rs1 + rs2 -#define ADDxw(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b000, rd, rex.w?0b0110011:0b0111011)) +#define ADDxw(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b000, rd, rex.w ? 0b0110011 : 0b0111011)) // rd = rs1 + rs2 -#define ADDz(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b000, rd, rex.is32bits?0b0111011:0b0110011)) +#define ADDz(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b000, rd, rex.is32bits ? 0b0111011 : 0b0110011)) // rd = rs1 - rs2 -#define SUB(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b000, rd, 0b0110011)) +#define SUB(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b000, rd, 0b0110011)) // rd = rs1 - rs2 -#define SUBW(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b000, rd, 0b0111011)) +#define SUBW(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b000, rd, 0b0111011)) // rd = rs1 - rs2 -#define SUBxw(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b000, rd, rex.w?0b0110011:0b0111011)) +#define SUBxw(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b000, rd, rex.w ? 0b0110011 : 0b0111011)) // rd = rs1 - rs2 -#define SUBz(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b000, rd, rex.is32bits?0b0111011:0b0110011)) +#define SUBz(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b000, rd, rex.is32bits ? 0b0111011 : 0b0110011)) // rd = rs1<<rs2 -#define SLL(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b001, rd, 0b0110011)) +#define SLL(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b001, rd, 0b0110011)) // rd = (rs1<rs2)?1:0 -#define SLT(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b010, rd, 0b0110011)) +#define SLT(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b010, rd, 0b0110011)) // rd = (rs1<rs2)?1:0 Unsigned -#define SLTU(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b011, rd, 0b0110011)) +#define SLTU(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b011, rd, 0b0110011)) // rd = rs1 ^ rs2 -#define XOR(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b100, rd, 0b0110011)) +#define XOR(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b100, rd, 0b0110011)) // rd = rs1 ^ rs2 -#define XORxw(rd, rs1, rs2) do{ XOR(rd, rs1, rs2); if (!rex.w) ZEROUP(rd); }while(0) +#define XORxw(rd, rs1, rs2) \ + do { \ + XOR(rd, rs1, rs2); \ + if (!rex.w) ZEROUP(rd); \ + } while (0) // rd = rs1>>rs2 logical -#define SRL(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b101, rd, 0b0110011)) +#define SRL(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b101, rd, 0b0110011)) // rd = rs1>>rs2 arithmetic -#define SRA(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b101, rd, 0b0110011)) +#define SRA(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b101, rd, 0b0110011)) // rd = rs1 | rs2 -#define OR(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b110, rd, 0b0110011)) +#define OR(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b110, rd, 0b0110011)) // rd = rs1 & rs2 -#define AND(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b111, rd, 0b0110011)) +#define AND(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b111, rd, 0b0110011)) // rd = rs1 (pseudo instruction) -#define MV(rd, rs1) ADDI(rd, rs1, 0) +#define MV(rd, rs1) ADDI(rd, rs1, 0) // rd = rs1 (pseudo instruction) -#define MVxw(rd, rs1) if(rex.w) {MV(rd, rs1);} else {AND(rd, rs1, xMASK);} +#define MVxw(rd, rs1) \ + if (rex.w) { \ + MV(rd, rs1); \ + } else { \ + AND(rd, rs1, xMASK); \ + } // rd = rs1 (pseudo instruction) -#define MVz(rd, rs1) if(rex.is32bits) {AND(rd, rs1, xMASK);} else {MV(rd, rs1);} +#define MVz(rd, rs1) \ + if (rex.is32bits) { \ + AND(rd, rs1, xMASK); \ + } else { \ + MV(rd, rs1); \ + } // rd = !rs1 -#define NOT(rd, rs1) XORI(rd, rs1, -1) +#define NOT(rd, rs1) XORI(rd, rs1, -1) // rd = -rs1 -#define NEG(rd, rs1) SUB(rd, xZR, rs1) +#define NEG(rd, rs1) SUB(rd, xZR, rs1) // rd = -rs1 -#define NEGxw(rd, rs1) SUBxw(rd, xZR, rs1) +#define NEGxw(rd, rs1) SUBxw(rd, xZR, rs1) // rd = rs1 == 0 -#define SEQZ(rd, rs1) SLTIU(rd, rs1, 1) +#define SEQZ(rd, rs1) SLTIU(rd, rs1, 1) // rd = rs1 != 0 -#define SNEZ(rd, rs1) SLTU(rd, xZR, rs1) +#define SNEZ(rd, rs1) SLTU(rd, xZR, rs1) -#define BEQ(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b000, 0b1100011)) -#define BNE(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b001, 0b1100011)) -#define BLT(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b100, 0b1100011)) -#define BGE(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b101, 0b1100011)) -#define BLTU(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b110, 0b1100011)) -#define BGEU(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b111, 0b1100011)) +#define BEQ(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b000, 0b1100011)) +#define BNE(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b001, 0b1100011)) +#define BLT(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b100, 0b1100011)) +#define BGE(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b101, 0b1100011)) +#define BLTU(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b110, 0b1100011)) +#define BGEU(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b111, 0b1100011)) // TODO: Find a better way to have conditionnal jumps? Imm is a relative jump address, so the the 2nd jump needs to be addapted -#define BEQ_safe(rs1, rs2, imm) if((imm)>-0x1000 && (imm)<0x1000) {BEQ(rs1, rs2, imm); NOP();} else {BNE(rs1, rs2, 8); B(imm-4);} -#define BNE_safe(rs1, rs2, imm) if((imm)>-0x1000 && (imm)<0x1000) {BNE(rs1, rs2, imm); NOP();} else {BEQ(rs1, rs2, 8); B(imm-4);} -#define BLT_safe(rs1, rs2, imm) if((imm)>-0x1000 && (imm)<0x1000) {BLT(rs1, rs2, imm); NOP();} else {BGE(rs2, rs1, 8); B(imm-4);} -#define BGE_safe(rs1, rs2, imm) if((imm)>-0x1000 && (imm)<0x1000) {BGE(rs1, rs2, imm); NOP();} else {BLT(rs2, rs1, 8); B(imm-4);} -#define BLTU_safe(rs1, rs2, imm) if((imm)>-0x1000 && (imm)<0x1000) {BLTU(rs1, rs2, imm); NOP();} else {BGEU(rs2, rs1, 8); B(imm-4);} -#define BGEU_safe(rs1, rs2, imm) if((imm)>-0x1000 && (imm)<0x1000) {BGEU(rs1, rs2, imm); NOP();} else {BLTU(rs2, rs1, 8); B(imm-4);} +#define BEQ_safe(rs1, rs2, imm) \ + if ((imm) > -0x1000 && (imm) < 0x1000) { \ + BEQ(rs1, rs2, imm); \ + NOP(); \ + } else { \ + BNE(rs1, rs2, 8); \ + B(imm - 4); \ + } +#define BNE_safe(rs1, rs2, imm) \ + if ((imm) > -0x1000 && (imm) < 0x1000) { \ + BNE(rs1, rs2, imm); \ + NOP(); \ + } else { \ + BEQ(rs1, rs2, 8); \ + B(imm - 4); \ + } +#define BLT_safe(rs1, rs2, imm) \ + if ((imm) > -0x1000 && (imm) < 0x1000) { \ + BLT(rs1, rs2, imm); \ + NOP(); \ + } else { \ + BGE(rs2, rs1, 8); \ + B(imm - 4); \ + } +#define BGE_safe(rs1, rs2, imm) \ + if ((imm) > -0x1000 && (imm) < 0x1000) { \ + BGE(rs1, rs2, imm); \ + NOP(); \ + } else { \ + BLT(rs2, rs1, 8); \ + B(imm - 4); \ + } +#define BLTU_safe(rs1, rs2, imm) \ + if ((imm) > -0x1000 && (imm) < 0x1000) { \ + BLTU(rs1, rs2, imm); \ + NOP(); \ + } else { \ + BGEU(rs2, rs1, 8); \ + B(imm - 4); \ + } +#define BGEU_safe(rs1, rs2, imm) \ + if ((imm) > -0x1000 && (imm) < 0x1000) { \ + BGEU(rs1, rs2, imm); \ + NOP(); \ + } else { \ + BLTU(rs2, rs1, 8); \ + B(imm - 4); \ + } -#define BEQZ(rs1, imm13) BEQ(rs1, 0, imm13) -#define BNEZ(rs1, imm13) BNE(rs1, 0, imm13) +#define BEQZ(rs1, imm13) BEQ(rs1, 0, imm13) +#define BNEZ(rs1, imm13) BNE(rs1, 0, imm13) -#define BEQZ_safe(rs1, imm) if((imm)>-0x1000 && (imm)<0x1000) {BEQZ(rs1, imm); NOP();} else {BNEZ(rs1, 8); B(imm-4);} -#define BNEZ_safe(rs1, imm) if((imm)>-0x1000 && (imm)<0x1000) {BNEZ(rs1, imm); NOP();} else {BEQZ(rs1, 8); B(imm-4);} +#define BEQZ_safe(rs1, imm) \ + if ((imm) > -0x1000 && (imm) < 0x1000) { \ + BEQZ(rs1, imm); \ + NOP(); \ + } else { \ + BNEZ(rs1, 8); \ + B(imm - 4); \ + } +#define BNEZ_safe(rs1, imm) \ + if ((imm) > -0x1000 && (imm) < 0x1000) { \ + BNEZ(rs1, imm); \ + NOP(); \ + } else { \ + BEQZ(rs1, 8); \ + B(imm - 4); \ + } // rd = 4-bytes[rs1+imm12] signed extended -#define LW(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b010, rd, 0b0000011)) +#define LW(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b010, rd, 0b0000011)) // rd = 2-bytes[rs1+imm12] signed extended -#define LH(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b001, rd, 0b0000011)) +#define LH(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b001, rd, 0b0000011)) // rd = byte[rs1+imm12] signed extended -#define LB(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b000, rd, 0b0000011)) +#define LB(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b000, rd, 0b0000011)) // rd = 2-bytes[rs1+imm12] zero extended -#define LHU(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b101, rd, 0b0000011)) +#define LHU(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b101, rd, 0b0000011)) // rd = byte[rs1+imm12] zero extended -#define LBU(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b100, rd, 0b0000011)) +#define LBU(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b100, rd, 0b0000011)) // byte[rs1+imm12] = rs2 -#define SB(rs2, rs1, imm12) EMIT(S_type(imm12, rs2, rs1, 0b000, 0b0100011)) +#define SB(rs2, rs1, imm12) EMIT(S_type(imm12, rs2, rs1, 0b000, 0b0100011)) // 2-bytes[rs1+imm12] = rs2 -#define SH(rs2, rs1, imm12) EMIT(S_type(imm12, rs2, rs1, 0b001, 0b0100011)) +#define SH(rs2, rs1, imm12) EMIT(S_type(imm12, rs2, rs1, 0b001, 0b0100011)) // 4-bytes[rs1+imm12] = rs2 -#define SW(rs2, rs1, imm12) EMIT(S_type(imm12, rs2, rs1, 0b010, 0b0100011)) - -#define PUSH1(reg) do {SD(reg, xRSP, -8); SUBI(xRSP, xRSP, 8);} while(0) -#define POP1(reg) do {LD(reg, xRSP, 0); if (reg!=xRSP) ADDI(xRSP, xRSP, 8);} while(0) -#define PUSH1_32(reg) do {SW(reg, xRSP, -4); SUBIW(xRSP, xRSP, 4);} while(0) -#define POP1_32(reg) do {LWU(reg, xRSP, 0); if (reg!=xRSP) ADDIW(xRSP, xRSP, 4);} while(0) - -#define POP1z(reg) if(rex.is32bits) {POP1_32(reg);} else {POP1(reg);} -#define PUSH1z(reg) if(rex.is32bits) {PUSH1_32(reg);} else {PUSH1(reg);} +#define SW(rs2, rs1, imm12) EMIT(S_type(imm12, rs2, rs1, 0b010, 0b0100011)) + +#define PUSH1(reg) \ + do { \ + SD(reg, xRSP, -8); \ + SUBI(xRSP, xRSP, 8); \ + } while (0) +#define POP1(reg) \ + do { \ + LD(reg, xRSP, 0); \ + if (reg != xRSP) ADDI(xRSP, xRSP, 8); \ + } while (0) +#define PUSH1_32(reg) \ + do { \ + SW(reg, xRSP, -4); \ + SUBIW(xRSP, xRSP, 4); \ + } while (0) +#define POP1_32(reg) \ + do { \ + LWU(reg, xRSP, 0); \ + if (reg != xRSP) ADDIW(xRSP, xRSP, 4); \ + } while (0) + +#define POP1z(reg) \ + if (rex.is32bits) { \ + POP1_32(reg); \ + } else { \ + POP1(reg); \ + } +#define PUSH1z(reg) \ + if (rex.is32bits) { \ + PUSH1_32(reg); \ + } else { \ + PUSH1(reg); \ + } -#define FENCE_gen(pred, succ) (((pred)<<24) | ((succ)<<20) | 0b0001111) -#define FENCE() EMIT(FENCE_gen(3, 3)) +#define FENCE_gen(pred, succ) (((pred) << 24) | ((succ) << 20) | 0b0001111) +#define FENCE() EMIT(FENCE_gen(3, 3)) -#define FENCE_I_gen() ((0b001<<12) | 0b0001111) -#define FENCE_I() EMIT(FENCE_I_gen()) +#define FENCE_I_gen() ((0b001 << 12) | 0b0001111) +#define FENCE_I() EMIT(FENCE_I_gen()) -#define EBREAK() EMIT(I_type(1, 0, 0, 0, 0b1110011)) +#define EBREAK() EMIT(I_type(1, 0, 0, 0, 0b1110011)) // RV64I -#define LWU(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b110, rd, 0b0000011)) +#define LWU(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b110, rd, 0b0000011)) // rd = [rs1 + imm12] -#define LD(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b011, rd, 0b0000011)) +#define LD(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b011, rd, 0b0000011)) // rd = [rs1 + imm12] -#define LDxw(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b011<<(1-rex.w), rd, 0b0000011)) +#define LDxw(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b011 << (1 - rex.w), rd, 0b0000011)) // rd = [rs1 + imm12] -#define LDz(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b011<<rex.is32bits, rd, 0b0000011)) +#define LDz(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b011 << rex.is32bits, rd, 0b0000011)) // [rs1 + imm12] = rs2 -#define SD(rs2, rs1, imm12) EMIT(S_type(imm12, rs2, rs1, 0b011, 0b0100011)) +#define SD(rs2, rs1, imm12) EMIT(S_type(imm12, rs2, rs1, 0b011, 0b0100011)) // [rs1 + imm12] = rs2 -#define SDxw(rs2, rs1, imm12) EMIT(S_type(imm12, rs2, rs1, 0b010+rex.w, 0b0100011)) +#define SDxw(rs2, rs1, imm12) EMIT(S_type(imm12, rs2, rs1, 0b010 + rex.w, 0b0100011)) // [rs1 + imm12] = rs2 -#define SDz(rs2, rs1, imm12) EMIT(S_type(imm12, rs2, rs1, 0b010+(1-rex.is32bits), 0b0100011)) +#define SDz(rs2, rs1, imm12) EMIT(S_type(imm12, rs2, rs1, 0b010 + (1 - rex.is32bits), 0b0100011)) // Shift Left Immediate -#define SLLI(rd, rs1, imm6) EMIT(I_type(imm6, rs1, 0b001, rd, 0b0010011)) +#define SLLI(rd, rs1, imm6) EMIT(I_type(imm6, rs1, 0b001, rd, 0b0010011)) // Shift Right Logical Immediate -#define SRLI(rd, rs1, imm6) EMIT(I_type(imm6, rs1, 0b101, rd, 0b0010011)) +#define SRLI(rd, rs1, imm6) EMIT(I_type(imm6, rs1, 0b101, rd, 0b0010011)) // Shift Right Arithmetic Immediate -#define SRAI(rd, rs1, imm6) EMIT(I_type((imm6)|(0b010000<<6), rs1, 0b101, rd, 0b0010011)) +#define SRAI(rd, rs1, imm6) EMIT(I_type((imm6) | (0b010000 << 6), rs1, 0b101, rd, 0b0010011)) // rd = rs1 + imm12 -#define ADDIW(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b000, rd, 0b0011011)) +#define ADDIW(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b000, rd, 0b0011011)) // rd = rs1 - imm12 -#define SUBIW(rd, rs1, imm12) EMIT(I_type((-imm12)&0b111111111111, rs1, 0b000, rd, 0b0011011)) +#define SUBIW(rd, rs1, imm12) EMIT(I_type((-imm12) & 0b111111111111, rs1, 0b000, rd, 0b0011011)) // rd = rs1 + imm12 -#define ADDIxw(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b000, rd, rex.w?0b0010011:0b0011011)) +#define ADDIxw(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b000, rd, rex.w ? 0b0010011 : 0b0011011)) // rd = rs1 + imm12 -#define ADDIz(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b000, rd, rex.is32bits?0b0011011:0b0010011)) +#define ADDIz(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b000, rd, rex.is32bits ? 0b0011011 : 0b0010011)) // rd = rs1 + (rs2 << imm2) -#define ADDSL(rd, rs1, rs2, imm2, scratch) if (!(imm2)) { \ - ADD(rd, rs1, rs2); \ - } else if (rv64_zba) { \ - SHxADD(rd, rs2, imm2, rs1); \ - } else if (rv64_xtheadba) { \ - TH_ADDSL(rd, rs1, rs2, imm2); \ - } else { \ - SLLI(scratch, rs2, imm2); \ - ADD(rd, rs1, scratch); \ - } \ - -#define SEXT_W(rd, rs1) ADDIW(rd, rs1, 0) +#define ADDSL(rd, rs1, rs2, imm2, scratch) \ + if (!(imm2)) { \ + ADD(rd, rs1, rs2); \ + } else if (rv64_zba) { \ + SHxADD(rd, rs2, imm2, rs1); \ + } else if (rv64_xtheadba) { \ + TH_ADDSL(rd, rs1, rs2, imm2); \ + } else { \ + SLLI(scratch, rs2, imm2); \ + ADD(rd, rs1, scratch); \ + } + +#define SEXT_W(rd, rs1) ADDIW(rd, rs1, 0) // rd = rs1<<rs2 -#define SLLW(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b001, rd, 0b0111011)) +#define SLLW(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b001, rd, 0b0111011)) // rd = rs1>>rs2 logical -#define SRLW(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b101, rd, 0b0111011)) +#define SRLW(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b101, rd, 0b0111011)) // rd = rs1>>rs2 arithmetic -#define SRAW(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b101, rd, 0b0111011)) - -#define SLLxw(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b001, rd, rex.w?0b0110011:0b0111011)) -#define SRLxw(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b101, rd, rex.w?0b0110011:0b0111011)) -#define SRAxw(rd, rs1, rs2) if(rex.w) {SRA(rd, rs1, rs2);} else {SRAW(rd, rs1, rs2); ZEROUP(rd);} +#define SRAW(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b101, rd, 0b0111011)) + +#define SLLxw(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b001, rd, rex.w ? 0b0110011 : 0b0111011)) +#define SRLxw(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b101, rd, rex.w ? 0b0110011 : 0b0111011)) +#define SRAxw(rd, rs1, rs2) \ + if (rex.w) { \ + SRA(rd, rs1, rs2); \ + } else { \ + SRAW(rd, rs1, rs2); \ + ZEROUP(rd); \ + } // Shift Left Immediate, 32-bit, sign-extended -#define SLLIW(rd, rs1, imm5) EMIT(I_type(imm5, rs1, 0b001, rd, 0b0011011)) +#define SLLIW(rd, rs1, imm5) EMIT(I_type(imm5, rs1, 0b001, rd, 0b0011011)) // Shift Left Immediate -#define SLLIxw(rd, rs1, imm) if (rex.w) { SLLI(rd, rs1, imm); } else { SLLIW(rd, rs1, imm); } +#define SLLIxw(rd, rs1, imm) \ + if (rex.w) { \ + SLLI(rd, rs1, imm); \ + } else { \ + SLLIW(rd, rs1, imm); \ + } // Shift Right Logical Immediate, 32-bit, sign-extended -#define SRLIW(rd, rs1, imm5) EMIT(I_type(imm5, rs1, 0b101, rd, 0b0011011)) +#define SRLIW(rd, rs1, imm5) EMIT(I_type(imm5, rs1, 0b101, rd, 0b0011011)) // Shift Right Logical Immediate -#define SRLIxw(rd, rs1, imm) if (rex.w) { SRLI(rd, rs1, imm); } else { SRLIW(rd, rs1, imm); } +#define SRLIxw(rd, rs1, imm) \ + if (rex.w) { \ + SRLI(rd, rs1, imm); \ + } else { \ + SRLIW(rd, rs1, imm); \ + } // Shift Right Arithmetic Immediate, 32-bit, sign-extended -#define SRAIW(rd, rs1, imm5) EMIT(I_type((imm5)|(0b0100000<<5), rs1, 0b101, rd, 0b0011011)) +#define SRAIW(rd, rs1, imm5) EMIT(I_type((imm5) | (0b0100000 << 5), rs1, 0b101, rd, 0b0011011)) // Shift Right Arithmetic Immediate -#define SRAIxw(rd, rs1, imm) if (rex.w) { SRAI(rd, rs1, imm); } else { SRAIW(rd, rs1, imm); } +#define SRAIxw(rd, rs1, imm) \ + if (rex.w) { \ + SRAI(rd, rs1, imm); \ + } else { \ + SRAIW(rd, rs1, imm); \ + } -#define CSRRW(rd, rs1, csr) EMIT(I_type(csr, rs1, 0b001, rd, 0b1110011)) -#define CSRRS(rd, rs1, csr) EMIT(I_type(csr, rs1, 0b010, rd, 0b1110011)) -#define CSRRC(rd, rs1, csr) EMIT(I_type(csr, rs1, 0b011, rd, 0b1110011)) -#define CSRRWI(rd, imm, csr) EMIT(I_type(csr, imm, 0b101, rd, 0b1110011)) -#define CSRRSI(rd, imm, csr) EMIT(I_type(csr, imm, 0b110, rd, 0b1110011)) -#define CSRRCI(rd, imm, csr) EMIT(I_type(csr, imm, 0b111, rd, 0b1110011)) +#define CSRRW(rd, rs1, csr) EMIT(I_type(csr, rs1, 0b001, rd, 0b1110011)) +#define CSRRS(rd, rs1, csr) EMIT(I_type(csr, rs1, 0b010, rd, 0b1110011)) +#define CSRRC(rd, rs1, csr) EMIT(I_type(csr, rs1, 0b011, rd, 0b1110011)) +#define CSRRWI(rd, imm, csr) EMIT(I_type(csr, imm, 0b101, rd, 0b1110011)) +#define CSRRSI(rd, imm, csr) EMIT(I_type(csr, imm, 0b110, rd, 0b1110011)) +#define CSRRCI(rd, imm, csr) EMIT(I_type(csr, imm, 0b111, rd, 0b1110011)) // RV32M // rd =(lower) rs1 * rs2 (both signed) -#define MUL(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b000, rd, 0b0110011)) +#define MUL(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b000, rd, 0b0110011)) // rd =(upper) rs1 * rs2 (both signed) -#define MULH(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b001, rd, 0b0110011)) +#define MULH(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b001, rd, 0b0110011)) // rd =(upper) (signed)rs1 * (unsigned)rs2 -#define MULHSU(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b010, rd, 0b0110011)) +#define MULHSU(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b010, rd, 0b0110011)) // rd =(upper) rs1 * rs2 (both unsigned) -#define MULHU(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b011, rd, 0b0110011)) +#define MULHU(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b011, rd, 0b0110011)) // rd =(upper) rs1 / rs2 -#define DIV(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b100, rd, 0b0110011)) -#define DIVU(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b101, rd, 0b0110011)) +#define DIV(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b100, rd, 0b0110011)) +#define DIVU(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b101, rd, 0b0110011)) // rd = rs1 mod rs2 -#define REM(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b110, rd, 0b0110011)) -#define REMU(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b111, rd, 0b0110011)) +#define REM(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b110, rd, 0b0110011)) +#define REMU(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b111, rd, 0b0110011)) // RV64M // rd = rs1 * rs2 -#define MULW(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b000, rd, 0b0111011)) +#define MULW(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b000, rd, 0b0111011)) // rd = rs1 * rs2 -#define MULxw(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b000, rd, rex.w?0b0110011:0b0111011)) +#define MULxw(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b000, rd, rex.w ? 0b0110011 : 0b0111011)) // rd = rs1 / rs2 -#define DIVW(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b100, rd, 0b0111011)) -#define DIVxw(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b100, rd, rex.w?0b0110011:0b0111011)) -#define DIVUW(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b101, rd, 0b0111011)) -#define DIVUxw(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b101, rd, rex.w?0b0110011:0b0111011)) +#define DIVW(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b100, rd, 0b0111011)) +#define DIVxw(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b100, rd, rex.w ? 0b0110011 : 0b0111011)) +#define DIVUW(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b101, rd, 0b0111011)) +#define DIVUxw(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b101, rd, rex.w ? 0b0110011 : 0b0111011)) // rd = rs1 mod rs2 -#define REMW(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b110, rd, 0b0111011)) -#define REMxw(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b110, rd, rex.w?0b0110011:0b0111011)) -#define REMUW(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b111, rd, 0b0111011)) -#define REMUxw(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b111, rd, rex.w?0b0110011:0b0111011)) +#define REMW(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b110, rd, 0b0111011)) +#define REMxw(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b110, rd, rex.w ? 0b0110011 : 0b0111011)) +#define REMUW(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b111, rd, 0b0111011)) +#define REMUxw(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b111, rd, rex.w ? 0b0110011 : 0b0111011)) -#define AQ_RL(f5, aq, rl) ((f5 << 2) | ((aq&1) << 1) | (rl&1)) +#define AQ_RL(f5, aq, rl) ((f5 << 2) | ((aq & 1) << 1) | (rl & 1)) // RV32A -#define LR_W(rd, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00010, aq, rl), 0, rs1, 0b010, rd, 0b0101111)) -#define SC_W(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00011, aq, rl), rs2, rs1, 0b010, rd, 0b0101111)) +#define LR_W(rd, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00010, aq, rl), 0, rs1, 0b010, rd, 0b0101111)) +#define SC_W(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00011, aq, rl), rs2, rs1, 0b010, rd, 0b0101111)) -#define AMOSWAP_W(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00001, aq, rl), rs2, rs1, 0b010, rd, 0b0101111)) +#define AMOSWAP_W(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00001, aq, rl), rs2, rs1, 0b010, rd, 0b0101111)) // RV64A -#define LR_D(rd, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00010, aq, rl), 0, rs1, 0b011, rd, 0b0101111)) -#define SC_D(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00011, aq, rl), rs2, rs1, 0b011, rd, 0b0101111)) +#define LR_D(rd, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00010, aq, rl), 0, rs1, 0b011, rd, 0b0101111)) +#define SC_D(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00011, aq, rl), rs2, rs1, 0b011, rd, 0b0101111)) -#define LRxw(rd, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00010, aq, rl), 0, rs1, 0b010|rex.w, rd, 0b0101111)) -#define SCxw(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00011, aq, rl), rs2, rs1, 0b010|rex.w, rd, 0b0101111)) +#define LRxw(rd, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00010, aq, rl), 0, rs1, 0b010 | rex.w, rd, 0b0101111)) +#define SCxw(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00011, aq, rl), rs2, rs1, 0b010 | rex.w, rd, 0b0101111)) #define AMOSWAP_D(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00001, aq, rl), rs2, rs1, 0b011, rd, 0b0101111)) // RV32F // Read round mode -#define FRRM(rd) CSRRS(rd, xZR, 0x002) +#define FRRM(rd) CSRRS(rd, xZR, 0x002) // Swap round mode -#define FSRM(rd, rs) CSRRW(rd, rs, 0x002) +#define FSRM(rd, rs) CSRRW(rd, rs, 0x002) // Write FP exception flags, immediate -#define FSFLAGSI(imm) CSRRWI(xZR, imm, 0x0001) +#define FSFLAGSI(imm) CSRRWI(xZR, imm, 0x0001) // Read FP exception flags to rd -#define FRFLAGS(rd) CSRRS(rd, xZR, 0x0001) +#define FRFLAGS(rd) CSRRS(rd, xZR, 0x0001) // Inexact -#define FR_NX 0 +#define FR_NX 0 // Underflow -#define FR_UF 1 +#define FR_UF 1 // Overflow -#define FR_OF 2 +#define FR_OF 2 // Divide by Zero -#define FR_DZ 3 +#define FR_DZ 3 // Invalid Operation -#define FR_NV 4 +#define FR_NV 4 // Round to Nearest, ties to Even -#define RD_RNE 0b000 +#define RD_RNE 0b000 // Round towards Zero -#define RD_RTZ 0b001 +#define RD_RTZ 0b001 // Round Down (towards −∞) -#define RD_RDN 0b010 +#define RD_RDN 0b010 // Round Up (towards +∞) -#define RD_RUP 0b011 +#define RD_RUP 0b011 // Round to Nearest, ties to Max Magnitude -#define RD_RMM 0b100 +#define RD_RMM 0b100 // In instruction’s rm field, selects dynamic rounding mode; -#define RD_RM 0b111 -#define RD_DYN RD_RM +#define RD_RM 0b111 +#define RD_DYN RD_RM // load single precision from rs1+imm12 to frd -#define FLW(frd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b010, frd, 0b0000111)) +#define FLW(frd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b010, frd, 0b0000111)) // store single precision frs2 to rs1+imm12 -#define FSW(frs2, rs1, imm12) EMIT(S_type(imm12, frs2, rs1, 0b010, 0b0100111)) +#define FSW(frs2, rs1, imm12) EMIT(S_type(imm12, frs2, rs1, 0b010, 0b0100111)) // store rs1 with rs2 sign bit to rd -#define FSGNJS(rd, rs1, rs2) EMIT(R_type(0b0010000, rs2, rs1, 0b000, rd, 0b1010011)) +#define FSGNJS(rd, rs1, rs2) EMIT(R_type(0b0010000, rs2, rs1, 0b000, rd, 0b1010011)) // move rs1 to rd -#define FMVS(rd, rs1) FSGNJS(rd, rs1, rs1) +#define FMVS(rd, rs1) FSGNJS(rd, rs1, rs1) // store rs1 with oposite rs2 sign bit to rd -#define FSGNJNS(rd, rs1, rs2) EMIT(R_type(0b0010000, rs2, rs1, 0b001, rd, 0b1010011)) +#define FSGNJNS(rd, rs1, rs2) EMIT(R_type(0b0010000, rs2, rs1, 0b001, rd, 0b1010011)) // -rs1 => rd -#define FNEGS(rd, rs1) FSGNJNS(rd, rs1, rs1) +#define FNEGS(rd, rs1) FSGNJNS(rd, rs1, rs1) // store rs1 with rs1^rs2 sign bit to rd -#define FSGNJXS(rd, rs1, rs2) EMIT(R_type(0b0010000, rs2, rs1, 0b010, rd, 0b1010011)) +#define FSGNJXS(rd, rs1, rs2) EMIT(R_type(0b0010000, rs2, rs1, 0b010, rd, 0b1010011)) // |rs1| => rd -#define FABSS(rd, rs1) FSGNJXS(rd, rs1, rs1) +#define FABSS(rd, rs1) FSGNJXS(rd, rs1, rs1) // Move from Single -#define FMVXW(rd, frs1) EMIT(R_type(0b1110000, 0b00000, frs1, 0b000, rd, 0b1010011)) +#define FMVXW(rd, frs1) EMIT(R_type(0b1110000, 0b00000, frs1, 0b000, rd, 0b1010011)) // Move to Single -#define FMVWX(frd, rs1) EMIT(R_type(0b1111000, 0b00000, rs1, 0b000, frd, 0b1010011)) +#define FMVWX(frd, rs1) EMIT(R_type(0b1111000, 0b00000, rs1, 0b000, frd, 0b1010011)) // Convert from signed 32bits to Single -#define FCVTSW(frd, rs1, rm) EMIT(R_type(0b1101000, 0b00000, rs1, rm, frd, 0b1010011)) +#define FCVTSW(frd, rs1, rm) EMIT(R_type(0b1101000, 0b00000, rs1, rm, frd, 0b1010011)) // Convert from Single to signed 32bits (trucated) -#define FCVTWS(rd, frs1, rm) EMIT(R_type(0b1100000, 0b00000, frs1, rm, rd, 0b1010011)) +#define FCVTWS(rd, frs1, rm) EMIT(R_type(0b1100000, 0b00000, frs1, rm, rd, 0b1010011)) -#define FADDS(frd, frs1, frs2) EMIT(R_type(0b0000000, frs2, frs1, 0b000, frd, 0b1010011)) -#define FSUBS(frd, frs1, frs2) EMIT(R_type(0b0000100, frs2, frs1, 0b000, frd, 0b1010011)) -#define FMULS(frd, frs1, frs2) EMIT(R_type(0b0001000, frs2, frs1, 0b000, frd, 0b1010011)) -#define FDIVS(frd, frs1, frs2) EMIT(R_type(0b0001100, frs2, frs1, 0b000, frd, 0b1010011)) -#define FSQRTS(frd, frs1) EMIT(R_type(0b0101100, 0b00000, frs1, 0b000, frd, 0b1010011)) -#define FMINS(frd, frs1, frs2) EMIT(R_type(0b0010100, frs2, frs1, 0b000, frd, 0b1010011)) -#define FMAXS(frd, frs1, frs2) EMIT(R_type(0b0010100, frs2, frs1, 0b001, frd, 0b1010011)) +#define FADDS(frd, frs1, frs2) EMIT(R_type(0b0000000, frs2, frs1, 0b000, frd, 0b1010011)) +#define FSUBS(frd, frs1, frs2) EMIT(R_type(0b0000100, frs2, frs1, 0b000, frd, 0b1010011)) +#define FMULS(frd, frs1, frs2) EMIT(R_type(0b0001000, frs2, frs1, 0b000, frd, 0b1010011)) +#define FDIVS(frd, frs1, frs2) EMIT(R_type(0b0001100, frs2, frs1, 0b000, frd, 0b1010011)) +#define FSQRTS(frd, frs1) EMIT(R_type(0b0101100, 0b00000, frs1, 0b000, frd, 0b1010011)) +#define FMINS(frd, frs1, frs2) EMIT(R_type(0b0010100, frs2, frs1, 0b000, frd, 0b1010011)) +#define FMAXS(frd, frs1, frs2) EMIT(R_type(0b0010100, frs2, frs1, 0b001, frd, 0b1010011)) // compare -#define FEQS(rd, frs1, frs2) EMIT(R_type(0b1010000, frs2, frs1, 0b010, rd, 0b1010011)) -#define FLTS(rd, frs1, frs2) EMIT(R_type(0b1010000, frs2, frs1, 0b001, rd, 0b1010011)) -#define FLES(rd, frs1, frs2) EMIT(R_type(0b1010000, frs2, frs1, 0b000, rd, 0b1010011)) +#define FEQS(rd, frs1, frs2) EMIT(R_type(0b1010000, frs2, frs1, 0b010, rd, 0b1010011)) +#define FLTS(rd, frs1, frs2) EMIT(R_type(0b1010000, frs2, frs1, 0b001, rd, 0b1010011)) +#define FLES(rd, frs1, frs2) EMIT(R_type(0b1010000, frs2, frs1, 0b000, rd, 0b1010011)) // RV64F // Convert from signed 64bits to Single -#define FCVTSL(frd, rs1, rm) EMIT(R_type(0b1101000, 0b00010, rs1, rm, frd, 0b1010011)) +#define FCVTSL(frd, rs1, rm) EMIT(R_type(0b1101000, 0b00010, rs1, rm, frd, 0b1010011)) // Convert from unsigned 64bits to Single -#define FCVTSLU(frd, rs1, rm) EMIT(R_type(0b1101000, 0b00011, rs1, rm, frd, 0b1010011)) +#define FCVTSLU(frd, rs1, rm) EMIT(R_type(0b1101000, 0b00011, rs1, rm, frd, 0b1010011)) // Convert from Single to signed 64bits -#define FCVTLS(rd, frs1, rm) EMIT(R_type(0b1100000, 0b00010, frs1, rm, rd, 0b1010011)) +#define FCVTLS(rd, frs1, rm) EMIT(R_type(0b1100000, 0b00010, frs1, rm, rd, 0b1010011)) // Convert from Single to unsigned 64bits -#define FCVTLUS(rd, frs1, rm) EMIT(R_type(0b1100000, 0b00011, frs1, rm, rd, 0b1010011)) +#define FCVTLUS(rd, frs1, rm) EMIT(R_type(0b1100000, 0b00011, frs1, rm, rd, 0b1010011)) // onvert from Single to signed 32/64bits (trucated) -#define FCVTSxw(rd, frs1, rm) EMIT(R_type(0b1100000, rex.w?0b00010:0b00000, frs1, rm, rd, 0b1010011)) +#define FCVTSxw(rd, frs1, rm) EMIT(R_type(0b1100000, rex.w ? 0b00010 : 0b00000, frs1, rm, rd, 0b1010011)) // RV32D // load double precision from rs1+imm12 to frd -#define FLD(frd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b011, frd, 0b0000111)) +#define FLD(frd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b011, frd, 0b0000111)) // store double precision frs2 to rs1+imm12 -#define FSD(frs2, rs1, imm12) EMIT(S_type(imm12, frs2, rs1, 0b011, 0b0100111)) +#define FSD(frs2, rs1, imm12) EMIT(S_type(imm12, frs2, rs1, 0b011, 0b0100111)) // Convert Double frs1 to Single frd -#define FCVTSD(frd, frs1) EMIT(R_type(0b0100000, 0b00001, frs1, 0b000, frd, 0b1010011)) +#define FCVTSD(frd, frs1) EMIT(R_type(0b0100000, 0b00001, frs1, 0b000, frd, 0b1010011)) // Convert Single frs1 to Double frd -#define FCVTDS(frd, frs1) EMIT(R_type(0b0100001, 0b00000, frs1, 0b000, frd, 0b1010011)) +#define FCVTDS(frd, frs1) EMIT(R_type(0b0100001, 0b00000, frs1, 0b000, frd, 0b1010011)) // Convert from Double to signed 32bits -#define FCVTWD(rd, frs1, rm) EMIT(R_type(0b1100001, 0b00000, frs1, rm, rd, 0b1010011)) +#define FCVTWD(rd, frs1, rm) EMIT(R_type(0b1100001, 0b00000, frs1, rm, rd, 0b1010011)) // Convert from Double to unsigned 32bits -#define FCVTWUD(rd, frs1, rm) EMIT(R_type(0b1100001, 0b00001, frs1, rm, rd, 0b1010011)) +#define FCVTWUD(rd, frs1, rm) EMIT(R_type(0b1100001, 0b00001, frs1, rm, rd, 0b1010011)) // store rs1 with rs2 sign bit to rd -#define FSGNJD(rd, rs1, rs2) EMIT(R_type(0b0010001, rs2, rs1, 0b000, rd, 0b1010011)) +#define FSGNJD(rd, rs1, rs2) EMIT(R_type(0b0010001, rs2, rs1, 0b000, rd, 0b1010011)) // move rs1 to rd -#define FMVD(rd, rs1) FSGNJD(rd, rs1, rs1) +#define FMVD(rd, rs1) FSGNJD(rd, rs1, rs1) // store rs1 with oposite rs2 sign bit to rd -#define FSGNJND(rd, rs1, rs2) EMIT(R_type(0b0010001, rs2, rs1, 0b001, rd, 0b1010011)) +#define FSGNJND(rd, rs1, rs2) EMIT(R_type(0b0010001, rs2, rs1, 0b001, rd, 0b1010011)) // -rs1 => rd -#define FNEGD(rd, rs1) FSGNJND(rd, rs1, rs1) +#define FNEGD(rd, rs1) FSGNJND(rd, rs1, rs1) // store rs1 with rs1^rs2 sign bit to rd -#define FSGNJXD(rd, rs1, rs2) EMIT(R_type(0b0010001, rs2, rs1, 0b010, rd, 0b1010011)) +#define FSGNJXD(rd, rs1, rs2) EMIT(R_type(0b0010001, rs2, rs1, 0b010, rd, 0b1010011)) // |rs1| => rd -#define FABSD(rd, rs1) FSGNJXD(rd, rs1, rs1) +#define FABSD(rd, rs1) FSGNJXD(rd, rs1, rs1) // Convert from signed 32bits to Double -#define FCVTDW(frd, rs1, rm) EMIT(R_type(0b1101001, 0b00000, rs1, rm, frd, 0b1010011)) - -#define FEQD(rd, frs1, frs2) EMIT(R_type(0b1010001, frs2, frs1, 0b010, rd, 0b1010011)) -#define FLTD(rd, frs1, frs2) EMIT(R_type(0b1010001, frs2, frs1, 0b001, rd, 0b1010011)) -#define FLED(rd, frs1, frs2) EMIT(R_type(0b1010001, frs2, frs1, 0b000, rd, 0b1010011)) - -#define FADDD(frd, frs1, frs2) EMIT(R_type(0b0000001, frs2, frs1, 0b000, frd, 0b1010011)) -#define FSUBD(frd, frs1, frs2) EMIT(R_type(0b0000101, frs2, frs1, 0b000, frd, 0b1010011)) -#define FMULD(frd, frs1, frs2) EMIT(R_type(0b0001001, frs2, frs1, 0b000, frd, 0b1010011)) -#define FDIVD(frd, frs1, frs2) EMIT(R_type(0b0001101, frs2, frs1, 0b000, frd, 0b1010011)) -#define FSQRTD(frd, frs1) EMIT(R_type(0b0101101, 0b00000, frs1, 0b000, frd, 0b1010011)) -#define FMIND(frd, frs1, frs2) EMIT(R_type(0b0010101, frs2, frs1, 0b000, frd, 0b1010011)) -#define FMAXD(frd, frs1, frs2) EMIT(R_type(0b0010101, frs2, frs1, 0b001, frd, 0b1010011)) - -//RV64D -// Move from Double -#define FMVXD(rd, frs1) EMIT(R_type(0b1110001, 0b00000, frs1, 0b000, rd, 0b1010011)) +#define FCVTDW(frd, rs1, rm) EMIT(R_type(0b1101001, 0b00000, rs1, rm, frd, 0b1010011)) + +#define FEQD(rd, frs1, frs2) EMIT(R_type(0b1010001, frs2, frs1, 0b010, rd, 0b1010011)) +#define FLTD(rd, frs1, frs2) EMIT(R_type(0b1010001, frs2, frs1, 0b001, rd, 0b1010011)) +#define FLED(rd, frs1, frs2) EMIT(R_type(0b1010001, frs2, frs1, 0b000, rd, 0b1010011)) + +#define FADDD(frd, frs1, frs2) EMIT(R_type(0b0000001, frs2, frs1, 0b000, frd, 0b1010011)) +#define FSUBD(frd, frs1, frs2) EMIT(R_type(0b0000101, frs2, frs1, 0b000, frd, 0b1010011)) +#define FMULD(frd, frs1, frs2) EMIT(R_type(0b0001001, frs2, frs1, 0b000, frd, 0b1010011)) +#define FDIVD(frd, frs1, frs2) EMIT(R_type(0b0001101, frs2, frs1, 0b000, frd, 0b1010011)) +#define FSQRTD(frd, frs1) EMIT(R_type(0b0101101, 0b00000, frs1, 0b000, frd, 0b1010011)) +#define FMIND(frd, frs1, frs2) EMIT(R_type(0b0010101, frs2, frs1, 0b000, frd, 0b1010011)) +#define FMAXD(frd, frs1, frs2) EMIT(R_type(0b0010101, frs2, frs1, 0b001, frd, 0b1010011)) + +// RV64D +// Move from Double +#define FMVXD(rd, frs1) EMIT(R_type(0b1110001, 0b00000, frs1, 0b000, rd, 0b1010011)) // Move to Double -#define FMVDX(frd, rs1) EMIT(R_type(0b1111001, 0b00000, rs1, 0b000, frd, 0b1010011)) +#define FMVDX(frd, rs1) EMIT(R_type(0b1111001, 0b00000, rs1, 0b000, frd, 0b1010011)) // Convert from signed 64bits to Double -#define FCVTDL(frd, rs1, rm) EMIT(R_type(0b1101001, 0b00010, rs1, rm, frd, 0b1010011)) +#define FCVTDL(frd, rs1, rm) EMIT(R_type(0b1101001, 0b00010, rs1, rm, frd, 0b1010011)) // Convert from unsigned 64bits to Double -#define FCVTDLU(frd, rs1, rm) EMIT(R_type(0b1101001, 0b00011, rs1, rm, frd, 0b1010011)) +#define FCVTDLU(frd, rs1, rm) EMIT(R_type(0b1101001, 0b00011, rs1, rm, frd, 0b1010011)) // Convert from Double to signed 64bits -#define FCVTLD(rd, frs1, rm) EMIT(R_type(0b1100001, 0b00010, frs1, rm, rd, 0b1010011)) +#define FCVTLD(rd, frs1, rm) EMIT(R_type(0b1100001, 0b00010, frs1, rm, rd, 0b1010011)) // Convert from Double to unsigned 64bits -#define FCVTLUD(rd, frs1, rm) EMIT(R_type(0b1100001, 0b00011, frs1, rm, rd, 0b1010011)) +#define FCVTLUD(rd, frs1, rm) EMIT(R_type(0b1100001, 0b00011, frs1, rm, rd, 0b1010011)) // Convert from Double to signed integer -#define FCVTLDxw(rd, frs1, rm) EMIT(R_type(0b1100001, 0b00000+(rex.w?0b10:0b00), frs1, rm, rd, 0b1010011)) +#define FCVTLDxw(rd, frs1, rm) EMIT(R_type(0b1100001, 0b00000 + (rex.w ? 0b10 : 0b00), frs1, rm, rd, 0b1010011)) // Convert from Double to unsigned integer -#define FCVTLUDxw(rd, frs1, rm) EMIT(R_type(0b1100001, 0b00001+(rex.w?0b10:0b00), frs1, rm, rd, 0b1010011)) +#define FCVTLUDxw(rd, frs1, rm) EMIT(R_type(0b1100001, 0b00001 + (rex.w ? 0b10 : 0b00), frs1, rm, rd, 0b1010011)) -//Zba -// Add unsigned word (Wz(rs1) + X(rs2)) -#define ADDUW(rd, rs1, rs2) EMIT(R_type(0b0000100, rs2, rs1, 0b000, rd, 0b0111011)) +// Zba +// Add unsigned word (Wz(rs1) + X(rs2)) +#define ADDUW(rd, rs1, rs2) EMIT(R_type(0b0000100, rs2, rs1, 0b000, rd, 0b0111011)) // Zero-extend Word -#define ZEXTW(rd, rs1) ADDUW(rd, rs1, xZR) +#define ZEXTW(rd, rs1) ADDUW(rd, rs1, xZR) // Shift left by 1 and add (rd = X(rs2) + X(rs1)<<1) -#define SH1ADD(rd, rs1, rs2) EMIT(R_type(0b0010000, rs2, rs1, 0b010, rd, 0b0110011)) +#define SH1ADD(rd, rs1, rs2) EMIT(R_type(0b0010000, rs2, rs1, 0b010, rd, 0b0110011)) // Shift unsigned word left by 1 and add (rd = X(rs2) + Wz(rs1)<<1) -#define SH1ADDUW(rd, rs1, rs2) EMIT(R_type(0b0010000, rs2, rs1, 0b010, rd, 0b0111011)) +#define SH1ADDUW(rd, rs1, rs2) EMIT(R_type(0b0010000, rs2, rs1, 0b010, rd, 0b0111011)) // Shift left by 2 and add (rd = X(rs2) + X(rs1)<<2) -#define SH2ADD(rd, rs1, rs2) EMIT(R_type(0b0010000, rs2, rs1, 0b100, rd, 0b0110011)) +#define SH2ADD(rd, rs1, rs2) EMIT(R_type(0b0010000, rs2, rs1, 0b100, rd, 0b0110011)) // Shift unsigned word left by 2 and add (rd = X(rs2) + Wz(rs1)<<2) -#define SH2ADDUW(rd, rs1, rs2) EMIT(R_type(0b0010000, rs2, rs1, 0b100, rd, 0b0111011)) +#define SH2ADDUW(rd, rs1, rs2) EMIT(R_type(0b0010000, rs2, rs1, 0b100, rd, 0b0111011)) // Shift left by 3 and add (rd = X(rs2) + X(rs1)<<3) -#define SH3ADD(rd, rs1, rs2) EMIT(R_type(0b0010000, rs2, rs1, 0b110, rd, 0b0110011)) +#define SH3ADD(rd, rs1, rs2) EMIT(R_type(0b0010000, rs2, rs1, 0b110, rd, 0b0110011)) // Shift unsigned word left by 3 and add (rd = X(rs2) + Wz(rs1)<<3) -#define SH3ADDUW(rd, rs1, rs2) EMIT(R_type(0b0010000, rs2, rs1, 0b110, rd, 0b0111011)) +#define SH3ADDUW(rd, rs1, rs2) EMIT(R_type(0b0010000, rs2, rs1, 0b110, rd, 0b0111011)) // Shift left unsigned word (immediate) -#define SLLIUW(rd, rs1, imm) EMIT(R_type(0b0000100, imm, rs1, 0b001, rd, 0b0011011)) +#define SLLIUW(rd, rs1, imm) EMIT(R_type(0b0000100, imm, rs1, 0b001, rd, 0b0011011)) // Shift left by 1,2 or 3 and add (rd = X(rs2) + X(rs1)<<x) -#define SHxADD(rd, rs1, x, rs2) EMIT(R_type(0b0010000, rs2, rs1, (x)<<1, rd, 0b0110011)) +#define SHxADD(rd, rs1, x, rs2) EMIT(R_type(0b0010000, rs2, rs1, (x) << 1, rd, 0b0110011)) // Shift unsigned word left by 1,2 or 3 and add (rd = X(rs2) + Wz(rs1)<<x) -#define SHxADDUW(rd, rs1, x, rs2) EMIT(R_type(0b0010000, rs2, rs1, (x)<<1, rd, 0b0111011)) +#define SHxADDUW(rd, rs1, x, rs2) EMIT(R_type(0b0010000, rs2, rs1, (x) << 1, rd, 0b0111011)) -//Zbb -// AND with reverted operand (rs1 & ~rs2) -#define ANDN(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b111, rd, 0b0110011)) +// Zbb +// AND with reverted operand (rs1 & ~rs2) +#define ANDN(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b111, rd, 0b0110011)) // OR with reverted operand (rs1 | ~rs2) -#define ORN(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b110, rd, 0b0110011)) +#define ORN(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b110, rd, 0b0110011)) // Exclusive NOR (~(rs1 ^ rs2)) -#define XNOR(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b100, rd, 0b0110011)) +#define XNOR(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b100, rd, 0b0110011)) // Count leading zero bits -#define CLZ(rd, rs) EMIT(R_type(0b0110000, 0b00000, rs, 0b001, rd, 0b0010011)) +#define CLZ(rd, rs) EMIT(R_type(0b0110000, 0b00000, rs, 0b001, rd, 0b0010011)) // Count leading zero bits in word -#define CLZW(rd, rs) EMIT(R_type(0b0110000, 0b00000, rs, 0b001, rd, 0b0011011)) +#define CLZW(rd, rs) EMIT(R_type(0b0110000, 0b00000, rs, 0b001, rd, 0b0011011)) // Count leading zero bits -#define CLZxw(rd, rs) EMIT(R_type(0b0110000, 0b00000, rs, 0b001, rd, rex.w?0b0010011:0b0011011)) +#define CLZxw(rd, rs) EMIT(R_type(0b0110000, 0b00000, rs, 0b001, rd, rex.w ? 0b0010011 : 0b0011011)) // Count trailing zero bits -#define CTZ(rd, rs) EMIT(R_type(0b0110000, 0b00001, rs, 0b001, rd, 0b0010011)) +#define CTZ(rd, rs) EMIT(R_type(0b0110000, 0b00001, rs, 0b001, rd, 0b0010011)) // Count trailing zero bits in word -#define CTZW(rd, rs) EMIT(R_type(0b0110000, 0b00001, rs, 0b001, rd, 0b0011011)) +#define CTZW(rd, rs) EMIT(R_type(0b0110000, 0b00001, rs, 0b001, rd, 0b0011011)) // Count trailing zero bits -#define CTZxw(rd, rs) EMIT(R_type(0b0110000, 0b00001, rs, 0b001, rd, rex.w?0b0010011:0b0011011)) +#define CTZxw(rd, rs) EMIT(R_type(0b0110000, 0b00001, rs, 0b001, rd, rex.w ? 0b0010011 : 0b0011011)) // Count set bits -#define CPOP(rd, rs) EMIT(R_type(0b0110000, 0b00010, rs, 0b001, rd, 0b0010011)) +#define CPOP(rd, rs) EMIT(R_type(0b0110000, 0b00010, rs, 0b001, rd, 0b0010011)) // Count set bits in word -#define CPOPW(rd, rs) EMIT(R_type(0b0110000, 0b00010, rs, 0b001, rd, 0b0011011)) +#define CPOPW(rd, rs) EMIT(R_type(0b0110000, 0b00010, rs, 0b001, rd, 0b0011011)) // Count set bits -#define CPOPxw(rd, rs) EMIT(R_type(0b0110000, 0b00010, rs, 0b001, rd, rex.w?0b0010011:0b0011011)) +#define CPOPxw(rd, rs) EMIT(R_type(0b0110000, 0b00010, rs, 0b001, rd, rex.w ? 0b0010011 : 0b0011011)) // Maximum -#define MAX(rd, rs1, rs2) EMIT(R_type(0b0000101, rs2, rs1, 0b110, rd, 0b0110011)) +#define MAX(rd, rs1, rs2) EMIT(R_type(0b0000101, rs2, rs1, 0b110, rd, 0b0110011)) // Unisgned maximum -#define MAXU(rd, rs1, rs2) EMIT(R_type(0b0000101, rs2, rs1, 0b111, rd, 0b0110011)) +#define MAXU(rd, rs1, rs2) EMIT(R_type(0b0000101, rs2, rs1, 0b111, rd, 0b0110011)) // Minimum -#define MIN(rd, rs1, rs2) EMIT(R_type(0b0000101, rs2, rs1, 0b100, rd, 0b0110011)) +#define MIN(rd, rs1, rs2) EMIT(R_type(0b0000101, rs2, rs1, 0b100, rd, 0b0110011)) // Unsigned minimum -#define MINU(rd, rs1, rs2) EMIT(R_type(0b0000101, rs2, rs1, 0b101, rd, 0b0110011)) +#define MINU(rd, rs1, rs2) EMIT(R_type(0b0000101, rs2, rs1, 0b101, rd, 0b0110011)) // Sign-extend byte -#define SEXTB(rd, rs) EMIT(R_type(0b0110000, 0b00100, rs, 0b001, rd, 0b0010011)) +#define SEXTB(rd, rs) EMIT(R_type(0b0110000, 0b00100, rs, 0b001, rd, 0b0010011)) // Sign-extend half-word -#define SEXTH(rd, rs) EMIT(R_type(0b0110000, 0b00101, rs, 0b001, rd, 0b0010011)) +#define SEXTH(rd, rs) EMIT(R_type(0b0110000, 0b00101, rs, 0b001, rd, 0b0010011)) // Zero-extend half-word -#define ZEXTH_(rd, rs) EMIT(R_type(0b0000100, 0b00000, rs, 0b100, rd, 0b0111011)) +#define ZEXTH_(rd, rs) EMIT(R_type(0b0000100, 0b00000, rs, 0b100, rd, 0b0111011)) // Zero-extend half-word -#define ZEXTH(rd, rs) if(rv64_zbb) ZEXTH_(rd, rs); else {SLLI(rd, rs, 48); SRLI(rd, rd, 48);} +#define ZEXTH(rd, rs) \ + if (rv64_zbb) \ + ZEXTH_(rd, rs); \ + else { \ + SLLI(rd, rs, 48); \ + SRLI(rd, rd, 48); \ + } // Rotate left (register) -#define ROL(rd, rs1, rs2) EMIT(R_type(0b0110000, rs2, rs1, 0b001, rd, 0b0110011)) +#define ROL(rd, rs1, rs2) EMIT(R_type(0b0110000, rs2, rs1, 0b001, rd, 0b0110011)) // Rotate left word (register) -#define ROLW(rd, rs1, rs2) EMIT(R_type(0b0110000, rs2, rs1, 0b001, rd, 0b0111011)) +#define ROLW(rd, rs1, rs2) EMIT(R_type(0b0110000, rs2, rs1, 0b001, rd, 0b0111011)) // Rotate left (register) -#define ROLxw(rd, rs1, rs2) EMIT(R_type(0b0110000, rs2, rs1, 0b001, rd, rex.w?0b0110011:0b0111011)) +#define ROLxw(rd, rs1, rs2) EMIT(R_type(0b0110000, rs2, rs1, 0b001, rd, rex.w ? 0b0110011 : 0b0111011)) // Rotate right (register) -#define ROR(rd, rs1, rs2) EMIT(R_type(0b0110000, rs2, rs1, 0b101, rd, 0b0110011)) +#define ROR(rd, rs1, rs2) EMIT(R_type(0b0110000, rs2, rs1, 0b101, rd, 0b0110011)) // Rotate right (immediate) -#define RORI(rd, rs1, shamt) EMIT(R_type(0b0110000, shamt, rs1, 0b101, rd, 0b0010011)) +#define RORI(rd, rs1, shamt) EMIT(R_type(0b0110000, shamt, rs1, 0b101, rd, 0b0010011)) // Rotate right word (immediate) -#define RORIW(rd, rs1, shamt) EMIT(R_type(0b0110000, shamt, rs1, 0b101, rd, 0b0011011)) +#define RORIW(rd, rs1, shamt) EMIT(R_type(0b0110000, shamt, rs1, 0b101, rd, 0b0011011)) // Rotate right (immediate) -#define RORIxw(rd, rs1, shamt) EMIT(R_type(0b0110000, shamt, rs1, 0b101, rd, rex.w?0b0010011:0b0011011)) +#define RORIxw(rd, rs1, shamt) EMIT(R_type(0b0110000, shamt, rs1, 0b101, rd, rex.w ? 0b0010011 : 0b0011011)) // Rotate right word (register) -#define RORW(rd, rs1, rs2) EMIT(R_type(0b0110000, rs2, rs1, 0b101, rd, 0b0111011)) +#define RORW(rd, rs1, rs2) EMIT(R_type(0b0110000, rs2, rs1, 0b101, rd, 0b0111011)) // Rotate right (register) -#define RORxw(rd, rs1, rs2) EMIT(R_type(0b0110000, rs2, rs1, 0b101, rd, rex.w?0b0110011:0b0111011)) +#define RORxw(rd, rs1, rs2) EMIT(R_type(0b0110000, rs2, rs1, 0b101, rd, rex.w ? 0b0110011 : 0b0111011)) // Bitwise OR Combine, byte granule (for all byte, if byte==0, res.byte=0, else res.byte=0xff) -#define ORCB(rd, rs) EMIT(I_type(0b001010000111, rs, 0b101, rd, 0b0010011)) +#define ORCB(rd, rs) EMIT(I_type(0b001010000111, rs, 0b101, rd, 0b0010011)) // Byte-reverse register -#define REV8(rd, rs) EMIT(I_type(0b011010111000, rs, 0b101, rd, 0b0010011)) +#define REV8(rd, rs) EMIT(I_type(0b011010111000, rs, 0b101, rd, 0b0010011)) + +// Byte-reverse register, rd can be the same as rs or s1, but rs cannot be the same as s1. +#define REV8xw(rd, rs, s1, s2, s3, s4) \ + if (rv64_zbb) { \ + REV8(rd, rs); \ + if (!rex.w) { \ + SRLI(rd, rd, 32); \ + } \ + } else if (rv64_xtheadbb) { \ + if (rex.w) { \ + TH_REV(rd, rs); \ + } else { \ + TH_REVW(rd, rs); \ + } \ + } else { \ + MOV_U12(s2, 0xff); \ + if (rex.w) { \ + SLLI(s1, rs, 56); \ + SRLI(s3, rs, 56); \ + SRLI(s4, rs, 40); \ + SLLI(s2, s2, 8); \ + AND(s4, s4, s2); \ + OR(s1, s1, s3); \ + OR(s1, s1, s4); \ + SLLI(s3, rs, 40); \ + SLLI(s4, s2, 40); \ + AND(s3, s3, s4); \ + OR(s1, s1, s3); \ + SRLI(s3, rs, 24); \ + SLLI(s4, s2, 8); \ + AND(s3, s3, s4); \ + OR(s1, s1, s3); \ + SLLI(s3, rs, 24); \ + SLLI(s4, s2, 32); \ + AND(s3, s3, s4); \ + OR(s1, s1, s3); \ + SRLI(s3, rs, 8); \ + SLLI(s4, s2, 16); \ + AND(s3, s3, s4); \ + OR(s1, s1, s3); \ + SLLI(s3, rs, 8); \ + SLLI(s4, s2, 24); \ + AND(s3, s3, s4); \ + OR(rd, s1, s3); \ + } else { \ + SLLIW(s2, s2, 8); \ + SLLIW(s1, rs, 24); \ + SRLIW(s3, rs, 24); \ + SRLIW(s4, rs, 8); \ + AND(s4, s4, s2); \ + OR(s1, s1, s3); \ + OR(s1, s1, s4); \ + SLLIW(s3, rs, 8); \ + LUI(s2, 0xff0); \ + AND(s3, s3, s2); \ + OR(rd, s1, s3); \ + } \ + } -//Zbc -// Carry-less multily (low-part) -#define CLMUL(rd, rs1, rs2) EMIT(R_type(0b0000101, rs2, rs1, 0b001, rd, 0b0110011)) +// Zbc +// Carry-less multily (low-part) +#define CLMUL(rd, rs1, rs2) EMIT(R_type(0b0000101, rs2, rs1, 0b001, rd, 0b0110011)) // Carry-less multiply (high-part) -#define CLMULH(rd, rs1, rs2) EMIT(R_type(0b0000101, rs2, rs1, 0b011, rd, 0b0110011)) +#define CLMULH(rd, rs1, rs2) EMIT(R_type(0b0000101, rs2, rs1, 0b011, rd, 0b0110011)) // Carry-less multiply (reversed) -#define CLMULR(rd, rs1, rs2) EMIT(R_type(0b0000101, rs2, rs1, 0b010, rd, 0b0110011)) +#define CLMULR(rd, rs1, rs2) EMIT(R_type(0b0000101, rs2, rs1, 0b010, rd, 0b0110011)) -//Zbs -// encoding of the "imm" on RV64 use a slight different mask, but it will work using R_type with high bit of imm ovewriting low bit op func -// Single-bit Clear (Register) -#define BCLR(rd, rs1, rs2) EMIT(R_type(0b0100100, rs2, rs1, 0b001, rd, 0b0110011)) +// Zbs +// encoding of the "imm" on RV64 use a slight different mask, but it will work using R_type with high bit of imm ovewriting low bit op func +// Single-bit Clear (Register) +#define BCLR(rd, rs1, rs2) EMIT(R_type(0b0100100, rs2, rs1, 0b001, rd, 0b0110011)) // Single-bit Clear (Immediate) -#define BCLI(rd, rs1, imm) EMIT(R_type(0b0100100, imm, rs1, 0b001, rd, 0b0010011)) +#define BCLI(rd, rs1, imm) EMIT(R_type(0b0100100, imm, rs1, 0b001, rd, 0b0010011)) // Single-bit Extreact (Register) -#define BEXT(rd, rs1, rs2) EMIT(R_type(0b0100100, rs2, rs1, 0b101, rd, 0b0110011)) +#define BEXT(rd, rs1, rs2) EMIT(R_type(0b0100100, rs2, rs1, 0b101, rd, 0b0110011)) // Single-bit Extract (Immediate) -#define BEXTI(rd, rs1, imm) EMIT(R_type(0b0100100, imm, rs1, 0b101, rd, 0b0010011)) +#define BEXTI(rd, rs1, imm) EMIT(R_type(0b0100100, imm, rs1, 0b101, rd, 0b0010011)) // Single-bit Invert (Register) -#define BINV(rd, rs1, rs2) EMIT(R_type(0b0110100, rs2, rs1, 0b001, rd, 0b0110011)) +#define BINV(rd, rs1, rs2) EMIT(R_type(0b0110100, rs2, rs1, 0b001, rd, 0b0110011)) // Single-bit Invert (Immediate) -#define BINVI(rd, rs1, imm) EMIT(R_type(0b0110100, imm, rs1, 0b001, rd, 0b0010011)) +#define BINVI(rd, rs1, imm) EMIT(R_type(0b0110100, imm, rs1, 0b001, rd, 0b0010011)) // Single-bit Set (Register) -#define BSET(rd, rs1, rs2) EMIT(R_type(0b0010100, rs2, rs1, 0b001, rd, 0b0110011)) +#define BSET(rd, rs1, rs2) EMIT(R_type(0b0010100, rs2, rs1, 0b001, rd, 0b0110011)) // Single-bit Set (Immediate) -#define BSETI(rd, rs1, imm) EMIT(R_type(0b0010100, imm, rs1, 0b001, rd, 0b0010011)) +#define BSETI(rd, rs1, imm) EMIT(R_type(0b0010100, imm, rs1, 0b001, rd, 0b0010011)) /// THead vendor extension /// https://github.com/T-head-Semi/thead-extension-spec/releases @@ -669,11 +861,12 @@ f28–31 ft8–11 FP temporaries Caller // Add a shifted operand to a second operand. // reg[rd] := reg[rs1] + (reg[rs2] << imm2) -#define TH_ADDSL(rd, rs1, rs2, imm2) EMIT(R_type(imm2&0b11, rs2, rs1, 0b001, rd, 0b0001011)) +#define TH_ADDSL(rd, rs1, rs2, imm2) EMIT(R_type(imm2 & 0b11, rs2, rs1, 0b001, rd, 0b0001011)) // XTheadBb - Basic bit-manipulation -#define TH_SRRIxw(rd, rs1, imm) if(rex.w) { \ +#define TH_SRRIxw(rd, rs1, imm) \ + if (rex.w) { \ TH_SRRI(rd, rs1, imm); \ } else { \ TH_SRRIW(rd, rs1, imm); \ @@ -681,20 +874,20 @@ f28–31 ft8–11 FP temporaries Caller // Perform a cyclic right shift. // reg[rd] := (reg[rs1] >> imm6) | (reg[rs1] << (xlen - imm6)) -#define TH_SRRI(rd, rs1, imm6) EMIT(I_type(0b000100000000|(imm6&0x3f), rs1, 0b001, rd, 0b0001011)) +#define TH_SRRI(rd, rs1, imm6) EMIT(I_type(0b000100000000 | (imm6 & 0x3f), rs1, 0b001, rd, 0b0001011)) // Perform a cyclic right shift on word operand. // data := zext.w(reg[rs1]) // reg[rd] := (data >> imm5) | (data << (32 - imm5)) -#define TH_SRRIW(rd, rs1, imm5) EMIT(I_type(0b000101000000|(imm5&0x1f), rs1, 0b001, rd, 0b0001011)) +#define TH_SRRIW(rd, rs1, imm5) EMIT(I_type(0b000101000000 | (imm5 & 0x1f), rs1, 0b001, rd, 0b0001011)) // Extract and sign-extend bits. // reg[rd] := sign_extend(reg[rs1][imm1:imm2]) -#define TH_EXT(rd, rs1, imm1, imm2) EMIT(I_type(((imm1&0x1f)<<6)|(imm2&0x1f), rs1, 0b010, rd, 0b0001011)) +#define TH_EXT(rd, rs1, imm1, imm2) EMIT(I_type(((imm1 & 0x1f) << 6) | (imm2 & 0x1f), rs1, 0b010, rd, 0b0001011)) // Extract and zero-extend bits. // reg[rd] := zero_extend(reg[rs1][imm1:imm2]) -#define TH_EXTU(rd, rs1, imm1, imm2) EMIT(I_type(((imm1&0x1f)<<6)|(imm2&0x1f), rs1, 0b011, rd, 0b0001011)) +#define TH_EXTU(rd, rs1, imm1, imm2) EMIT(I_type(((imm1 & 0x1f) << 6) | (imm2 & 0x1f), rs1, 0b011, rd, 0b0001011)) // Find first '0'-bit // for i=xlen..0: @@ -739,7 +932,7 @@ f28–31 ft8–11 FP temporaries Caller // rd := 1 // else // rd := 0 -#define TH_TST(rd, rs1, imm6) EMIT(I_type(0b100010000000|(imm6&0x3f), rs1, 0b001, rd, 0b0001011)) +#define TH_TST(rd, rs1, imm6) EMIT(I_type(0b100010000000 | (imm6 & 0x3f), rs1, 0b001, rd, 0b0001011)) // XTheadCondMov - Conditional move @@ -759,7 +952,7 @@ f28–31 ft8–11 FP temporaries Caller // Load indexed byte, increment address after loading. // rd := sign_extend(mem[rs1]) // rs1 := rs1 + (sign_extend(imm5) << imm2) -#define TH_LBIA(rd, rs1, imm5, imm2) EMIT(I_type(0b000110000000|((imm2&0b11)<<5)|(imm5&0x1f), rs1, 0b100, rd, 0b0001011)) +#define TH_LBIA(rd, rs1, imm5, imm2) EMIT(I_type(0b000110000000 | ((imm2 & 0b11) << 5) | (imm5 & 0x1f), rs1, 0b100, rd, 0b0001011)) // TODO // th.lbib rd, (rs1), imm5, imm2 Load indexed byte @@ -813,7 +1006,7 @@ f28–31 ft8–11 FP temporaries Caller // addr := rs1 + (zero_extend(imm2) << 4) // rd1 := mem[addr+7:addr] // rd2 := mem[addr+15:addr+8] -#define TH_LDD(rd1, rd2, rs1, imm2) EMIT(R_type(0b1111100|(imm2&0b11), rd2, rs1, 0b100, rd1, 0b0001011)) +#define TH_LDD(rd1, rd2, rs1, imm2) EMIT(R_type(0b1111100 | (imm2 & 0b11), rd2, rs1, 0b100, rd1, 0b0001011)) // TODO // th.lwd rd1, rd2, (rs1), imm2, 3 Load two signed 32-bit values @@ -826,7 +1019,7 @@ f28–31 ft8–11 FP temporaries Caller // Load indexed double-precision floating point value. // addr := rs1 + (rs2 << imm2) // rd := fmem[addr+7:addr] -#define TH_FLRD(rd, rs1, rs2, imm2) EMIT(R_type(0b0110000|(imm2&0b11), rs2, rs1, 0b110, rd, 0b0001011)) +#define TH_FLRD(rd, rs1, rs2, imm2) EMIT(R_type(0b0110000 | (imm2 & 0b11), rs2, rs1, 0b110, rd, 0b0001011)) // TODO // th.flrw rd, rs1, rs2, imm2 Load indexed float diff --git a/src/emu/x64run660f.c b/src/emu/x64run660f.c index 902772e5..739c9664 100644 --- a/src/emu/x64run660f.c +++ b/src/emu/x64run660f.c @@ -27,19 +27,19 @@ static uint8_t ff_mult(uint8_t a, uint8_t b) int retval = 0; for(int i = 0; i < 8; i++) { - if((b & 1) == 1) + if((b & 1) == 1) retval ^= a; - + if((a & 0x80)) { a <<= 1; a ^= 0x1b; } else { a <<= 1; } - + b >>= 1; } - + return retval; } @@ -514,7 +514,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr) for(int i=1; i>=0; --i) GX->sq[i] = EX->sd[i]; break; - + case 0x28: /* PMULDQ Gx, Ex */ nextop = F8; GETEX(0); @@ -790,15 +790,15 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr) break; case 0xF0: /* MOVBE Gw, Ew */ nextop = F8; - GETEX(0); - GETGX; - GX->uw[0] = __builtin_bswap16(EX->uw[0]); + GETED(0); + GETGD; + GD->word[0] = __builtin_bswap16(ED->word[0]); break; case 0xF1: /* MOVBE Ew, Gw */ nextop = F8; - GETEX(0); - GETGX; - EX->uw[0] = __builtin_bswap16(GX->uw[0]); + GETED(0); + GETGD; + ED->word[0] = __builtin_bswap16(GD->word[0]); break; default: return 0; @@ -1113,7 +1113,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr) return 0; } break; - + GOCOND(0x40 , nextop = F8; CHECK_FLAGS(emu); @@ -1285,7 +1285,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr) if (isnan(GX->d[1]) || isnan(EX->d[1]) || isgreater(EX->d[1], GX->d[1])) GX->d[1] = EX->d[1]; break; - + case 0x60: /* PUNPCKLBW Gx,Ex */ nextop = F8; GETEX(0); @@ -1295,7 +1295,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr) if(GX==EX) for(int i=0; i<8; ++i) GX->ub[2 * i + 1] = GX->ub[2 * i]; - else + else for(int i=0; i<8; ++i) GX->ub[2 * i + 1] = EX->ub[i]; break; @@ -1538,7 +1538,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr) } else { EX->q[0] = EX->q[1] >> (tmp8u - 64); EX->q[1] = 0; - } + } } break; case 6: /* PSLLQ Ex, Ib */ @@ -1827,7 +1827,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr) GW->word[0] = EW->word[0]; break; - case 0xBA: + case 0xBA: nextop = F8; switch((nextop>>3)&7) { case 4: /* BT Ew,Ib */ @@ -2096,7 +2096,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr) GETGX; if(EX->q[0]>15) {GX->q[0] = GX->q[1] = 0;} - else + else {tmp8u=EX->ub[0]; for (int i=0; i<8; ++i) GX->uw[i] >>= tmp8u;} break; case 0xD2: /* PSRLD Gx, Ex */ @@ -2105,7 +2105,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr) GETGX; if(EX->q[0]>31) {GX->q[0] = GX->q[1] = 0;} - else + else {tmp8u=EX->ub[0]; for (int i=0; i<4; ++i) GX->ud[i] >>= tmp8u;} break; case 0xD3: /* PSRLQ Gx, Ex */ @@ -2114,7 +2114,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr) GETGX; if(EX->q[0]>63) {GX->q[0] = GX->q[1] = 0;} - else + else {tmp8u=EX->ub[0]; for (int i=0; i<2; ++i) GX->q[i] >>= tmp8u;} break; case 0xD4: /* PADDQ Gx,Ex */ @@ -2229,7 +2229,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr) GETEX(0); GETGX; tmp8u=(EX->q[0]>15)?15:EX->ub[0]; - for (int i=0; i<8; ++i) + for (int i=0; i<8; ++i) GX->sw[i] >>= tmp8u; break; case 0xE2: /* PSRAD Gx, Ex */ @@ -2357,7 +2357,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr) GETGX; if(EX->q[0]>15) {GX->q[0] = GX->q[1] = 0;} - else + else {tmp8u=EX->ub[0]; for (int i=0; i<8; ++i) GX->uw[i] <<= tmp8u;} break; case 0xF2: /* PSLLD Gx, Ex */ @@ -2366,7 +2366,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr) GETGX; if(EX->q[0]>31) {GX->q[0] = GX->q[1] = 0;} - else + else {tmp8u=EX->ub[0]; for (int i=0; i<4; ++i) GX->ud[i] <<= tmp8u;} break; case 0xF3: /* PSLLQ Gx, Ex */ @@ -2375,7 +2375,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr) GETGX; if(EX->q[0]>63) {GX->q[0] = GX->q[1] = 0;} - else + else {tmp8u=EX->ub[0]; for (int i=0; i<2; ++i) GX->q[i] <<= tmp8u;} break; case 0xF4: /* PMULUDQ Gx,Ex */ @@ -2468,7 +2468,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr) GX->sd[2] += EX->sd[2]; GX->sd[3] += EX->sd[3]; break; - + default: return 0; } |