diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_private.h | 2 | ||||
| -rw-r--r-- | src/dynarec/dynarec_native_pass.c | 12 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00_3.c | 10 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 10 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f20f.c | 4 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 1843 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_private.h | 6 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 152 |
8 files changed, 1080 insertions, 959 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h index 6ac79d7a..acbbe2e8 100644 --- a/src/dynarec/arm64/dynarec_arm64_private.h +++ b/src/dynarec/arm64/dynarec_arm64_private.h @@ -126,7 +126,7 @@ int Table64(dynarec_arm_t *dyn, uint64_t val, int pass); // add a value to tabl void CreateJmpNext(void* addr, void* next); -#define GO_TRACE(A, B) \ +#define GO_TRACE(A, B, s0) \ GETIP(addr); \ MOVx_REG(x1, xRIP); \ STORE_XEMU_CALL(xRIP); \ diff --git a/src/dynarec/dynarec_native_pass.c b/src/dynarec/dynarec_native_pass.c index f5ee2501..0562fb6b 100644 --- a/src/dynarec/dynarec_native_pass.c +++ b/src/dynarec/dynarec_native_pass.c @@ -88,17 +88,17 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int if(box64_dynarec_test) { MESSAGE(LOG_DUMP, "TEST INIT ----\n"); fpu_reflectcache(dyn, ninst, x1, x2, x3); - GO_TRACE(x64test_init, 1); + GO_TRACE(x64test_init, 1, x1); fpu_unreflectcache(dyn, ninst, x1, x2, x3); MESSAGE(LOG_DUMP, "----------\n"); } #ifdef HAVE_TRACE else if(my_context->dec && box64_dynarec_trace) { - if((trace_end == 0) + if((trace_end == 0) || ((ip >= trace_start) && (ip < trace_end))) { MESSAGE(LOG_DUMP, "TRACE ----\n"); fpu_reflectcache(dyn, ninst, x1, x2, x3); - GO_TRACE(PrintTrace, 1); + GO_TRACE(PrintTrace, 1, x1); fpu_unreflectcache(dyn, ninst, x1, x2, x3); MESSAGE(LOG_DUMP, "----------\n"); } @@ -200,7 +200,7 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int if(*(uint32_t*)addr!=0) { // check if need to continue (but is next 4 bytes are 0, stop) uintptr_t next = get_closest_next(dyn, addr); if(next && ( - (((next-addr)<15) && is_nops(dyn, addr, next-addr)) + (((next-addr)<15) && is_nops(dyn, addr, next-addr)) /*||(((next-addr)<30) && is_instructions(dyn, addr, next-addr))*/ )) { ok = 1; @@ -232,7 +232,7 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int } #endif if(ok<0) { - ok = 0; need_epilog=1; + ok = 0; need_epilog=1; #if STEP == 0 if(ninst) { --ninst; @@ -256,7 +256,7 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int } ++ninst; #if STEP == 0 - if(ok && (((box64_dynarec_bigblock<stopblock) && !isJumpTableDefault64((void*)addr)) + if(ok && (((box64_dynarec_bigblock<stopblock) && !isJumpTableDefault64((void*)addr)) || (addr>=box64_nodynarec_start && addr<box64_nodynarec_end))) #else if(ok && (ninst==dyn->size)) diff --git a/src/dynarec/rv64/dynarec_rv64_00_3.c b/src/dynarec/rv64/dynarec_rv64_00_3.c index dece35af..efe58f61 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_3.c +++ b/src/dynarec/rv64/dynarec_rv64_00_3.c @@ -319,7 +319,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int addr+=8+8; } else { GETIP(ip+1); // read the 0xCC - STORE_XEMU_CALL(); + STORE_XEMU_CALL(x3); ADDI(x1, xEmu, (uint32_t)offsetof(x64emu_t, ip)); // setup addr as &emu->ip CALL_S(x64Int3, -1); LOAD_XEMU_CALL(); @@ -360,7 +360,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int NOTEST(x1); SMEND(); GETIP(addr); - STORE_XEMU_CALL(); + STORE_XEMU_CALL(x3); CALL_S(x86Syscall, -1); LOAD_XEMU_CALL(); TABLE64(x3, addr); // expected return address @@ -368,13 +368,13 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LW(x1, xEmu, offsetof(x64emu_t, quit)); BEQ_NEXT(x1, xZR); MARK; - LOAD_XEMU_REM(); + LOAD_XEMU_REM(x3); jump_to_epilog(dyn, 0, xRIP, ninst); } else { INST_NAME("INT n"); SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state GETIP(ip); - STORE_XEMU_CALL(); + STORE_XEMU_CALL(x3); CALL(native_priv, -1); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); @@ -723,7 +723,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int dyn->last_ip = addr; } else { GETIP_(dyn->insts[ninst].natcall); // read the 0xCC already - STORE_XEMU_CALL(); + STORE_XEMU_CALL(x3); ADDI(x1, xEmu, (uint32_t)offsetof(x64emu_t, ip)); // setup addr as &emu->ip CALL_S(x64Int3, -1); LOAD_XEMU_CALL(); diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index d7e4eeb8..32211e31 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -64,7 +64,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni addr = fakeed(dyn, addr, ninst, nextop); SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state GETIP(ip); - STORE_XEMU_CALL(); + STORE_XEMU_CALL(x3); CALL(native_ud, -1); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); @@ -77,7 +77,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni NOTEST(x1); SMEND(); GETIP(addr); - STORE_XEMU_CALL(); + STORE_XEMU_CALL(x3); CALL_S(x64Syscall, -1); LOAD_XEMU_CALL(); TABLE64(x3, addr); // expected return address @@ -85,7 +85,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LW(w1, xEmu, offsetof(x64emu_t, quit)); CBZ_NEXT(w1); MARK; - LOAD_XEMU_REM(); + LOAD_XEMU_REM(x3); jump_to_epilog(dyn, 0, xRIP, ninst); break; @@ -93,7 +93,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("WBINVD"); SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state GETIP(ip); - STORE_XEMU_CALL(); + STORE_XEMU_CALL(x3); CALL(native_ud, -1); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); @@ -105,7 +105,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("UD2"); SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state GETIP(ip); - STORE_XEMU_CALL(); + STORE_XEMU_CALL(x3); CALL(native_ud, -1); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); diff --git a/src/dynarec/rv64/dynarec_rv64_f20f.c b/src/dynarec/rv64/dynarec_rv64_f20f.c index ac3da811..e2ce3aac 100644 --- a/src/dynarec/rv64/dynarec_rv64_f20f.c +++ b/src/dynarec/rv64/dynarec_rv64_f20f.c @@ -152,7 +152,7 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int addr = fakeed(dyn, addr, ninst, nextop); SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state GETIP(ip); - STORE_XEMU_CALL(); + STORE_XEMU_CALL(x3); CALL(native_ud, -1); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); @@ -165,7 +165,7 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int addr = fakeed(dyn, addr, ninst, nextop); SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state GETIP(ip); - STORE_XEMU_CALL(); + STORE_XEMU_CALL(x3); CALL(native_ud, -1); LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 04a20308..02bc5211 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -20,439 +20,511 @@ #define F8 *(uint8_t*)(addr++) #define F8S *(int8_t*)(addr++) -#define F16 *(uint16_t*)(addr+=2, addr-2) -#define F16S *(int16_t*)(addr+=2, addr-2) -#define F32 *(uint32_t*)(addr+=4, addr-4) -#define F32S *(int32_t*)(addr+=4, addr-4) -#define F32S64 (uint64_t)(int64_t)F32S -#define F64 *(uint64_t*)(addr+=8, addr-8) -#define PK(a) *(uint8_t*)(addr+a) -#define PK16(a) *(uint16_t*)(addr+a) -#define PK32(a) *(uint32_t*)(addr+a) -#define PK64(a) *(uint64_t*)(addr+a) -#define PKip(a) *(uint8_t*)(ip+a) +#define F16 *(uint16_t*)(addr += 2, addr - 2) +#define F16S *(int16_t*)(addr += 2, addr - 2) +#define F32 *(uint32_t*)(addr += 4, addr - 4) +#define F32S *(int32_t*)(addr += 4, addr - 4) +#define F32S64 (uint64_t)(int64_t) F32S +#define F64 *(uint64_t*)(addr += 8, addr - 8) +#define PK(a) *(uint8_t*)(addr + a) +#define PK16(a) *(uint16_t*)(addr + a) +#define PK32(a) *(uint32_t*)(addr + a) +#define PK64(a) *(uint64_t*)(addr + a) +#define PKip(a) *(uint8_t*)(ip + a) // Strong mem emulation helpers // Sequence of Read will trigger a DMB on "first" read if strongmem is 2 // Squence of Write will trigger a DMB on "last" write if strongmem is 1 // Opcode will read -#define SMREAD() if(!dyn->smread && box64_dynarec_strongmem>1) {SMDMB();} +#define SMREAD() \ + if (!dyn->smread && box64_dynarec_strongmem > 1) { SMDMB(); } // Opcode will read with option forced lock -#define SMREADLOCK(lock) if(lock || (!dyn->smread && box64_dynarec_strongmem>1)) {SMDMB();} +#define SMREADLOCK(lock) \ + if (lock || (!dyn->smread && box64_dynarec_strongmem > 1)) { SMDMB(); } // Opcode migh read (depend on nextop) -#define SMMIGHTREAD() if(!MODREG) {SMREAD();} +#define SMMIGHTREAD() \ + if (!MODREG) { SMREAD(); } // Opcode has wrote -#define SMWRITE() dyn->smwrite=1 +#define SMWRITE() dyn->smwrite = 1 // Opcode has wrote (strongmem>1 only) -#define SMWRITE2() if(box64_dynarec_strongmem>1) dyn->smwrite=1 +#define SMWRITE2() \ + if (box64_dynarec_strongmem > 1) dyn->smwrite = 1 // Opcode has wrote with option forced lock -#define SMWRITELOCK(lock) if(lock) {SMDMB();} else dyn->smwrite=1 +#define SMWRITELOCK(lock) \ + if (lock) { \ + SMDMB(); \ + } else \ + dyn->smwrite = 1 // Opcode migh have wrote (depend on nextop) -#define SMMIGHTWRITE() if(!MODREG) {SMWRITE();} +#define SMMIGHTWRITE() \ + if (!MODREG) { SMWRITE(); } // Start of sequence -#define SMSTART() SMEND() +#define SMSTART() SMEND() // End of sequence -#define SMEND() if(dyn->smwrite && box64_dynarec_strongmem) {FENCE();} dyn->smwrite=0; dyn->smread=0; +#define SMEND() \ + if (dyn->smwrite && box64_dynarec_strongmem) { FENCE(); } \ + dyn->smwrite = 0; \ + dyn->smread = 0; // Force a Data memory barrier (for LOCK: prefix) -#define SMDMB() FENCE(); dyn->smwrite=0; dyn->smread=1 +#define SMDMB() \ + FENCE(); \ + dyn->smwrite = 0; \ + dyn->smread = 1 -//LOCK_* define -#define LOCK_LOCK (int*)1 +// LOCK_* define +#define LOCK_LOCK (int*)1 // GETGD get x64 register in gd -#define GETGD gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3) +#define GETGD gd = xRAX + ((nextop & 0x38) >> 3) + (rex.r << 3) // GETED can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI -#define GETED(D) if(MODREG) { \ - ed = xRAX+(nextop&7)+(rex.b<<3); \ - wback = 0; \ - } else { \ - SMREAD() \ - addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, D); \ - LDxw(x1, wback, fixedaddress); \ - ed = x1; \ - } +#define GETED(D) \ + if (MODREG) { \ + ed = xRAX + (nextop & 7) + (rex.b << 3); \ + wback = 0; \ + } else { \ + SMREAD() \ + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, D); \ + LDxw(x1, wback, fixedaddress); \ + ed = x1; \ + } // GETSED can use r1 for ed, and r2 for wback. ed will be sign extended! -#define GETSED(D) if(MODREG) { \ - ed = xRAX+(nextop&7)+(rex.b<<3); \ - wback = 0; \ - if(!rex.w) { \ - ADDW(x1, ed, xZR); \ - ed = x1; \ - } \ - } else { \ - SMREAD() \ - addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, D); \ - if(rex.w) \ - LD(x1, wback, fixedaddress); \ - else \ - LW(x1, wback, fixedaddress); \ - ed = x1; \ - } +#define GETSED(D) \ + if (MODREG) { \ + ed = xRAX + (nextop & 7) + (rex.b << 3); \ + wback = 0; \ + if (!rex.w) { \ + ADDW(x1, ed, xZR); \ + ed = x1; \ + } \ + } else { \ + SMREAD() \ + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, D); \ + if (rex.w) \ + LD(x1, wback, fixedaddress); \ + else \ + LW(x1, wback, fixedaddress); \ + ed = x1; \ + } // GETEDx can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI -#define GETEDx(D) if(MODREG) { \ - ed = xRAX+(nextop&7)+(rex.b<<3); \ - wback = 0; \ - } else { \ - SMREAD() \ - addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, D); \ - LD(x1, wback, fixedaddress); \ - ed = x1; \ - } -#define GETEDz(D) if(MODREG) { \ - ed = xRAX+(nextop&7)+(rex.b<<3); \ - wback = 0; \ - } else { \ - SMREAD() \ - addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, D); \ - LDz(x1, wback, fixedaddress); \ - ed = x1; \ - } +#define GETEDx(D) \ + if (MODREG) { \ + ed = xRAX + (nextop & 7) + (rex.b << 3); \ + wback = 0; \ + } else { \ + SMREAD() \ + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, D); \ + LD(x1, wback, fixedaddress); \ + ed = x1; \ + } +#define GETEDz(D) \ + if (MODREG) { \ + ed = xRAX + (nextop & 7) + (rex.b << 3); \ + wback = 0; \ + } else { \ + SMREAD() \ + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, D); \ + LDz(x1, wback, fixedaddress); \ + ed = x1; \ + } // GETED32 can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI -#define GETED32(D) if(MODREG) { \ - ed = xRAX+(nextop&7)+(rex.b<<3); \ - wback = 0; \ - } else { \ - SMREAD() \ - addr = geted32(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, D); \ - LDxw(x1, wback, fixedaddress); \ - ed = x1; \ - } -//GETEDH can use hint for ed, and x1 or x2 for wback (depending on hint), might also use x3. wback is 0 if ed is xEAX..xEDI -#define GETEDH(hint, D) if(MODREG) { \ - ed = xRAX+(nextop&7)+(rex.b<<3); \ - wback = 0; \ - } else { \ - SMREAD(); \ - addr = geted(dyn, addr, ninst, nextop, &wback, (hint==x2)?x1:x2, (hint==x1)?x1:x3, &fixedaddress, rex, NULL, 1, D); \ - LDxw(hint, wback, fixedaddress); \ - ed = hint; \ - } -//GETEDW can use hint for wback and ret for ed. wback is 0 if ed is xEAX..xEDI -#define GETEDW(hint, ret, D) if(MODREG) { \ - ed = xRAX+(nextop&7)+(rex.b<<3); \ - MV(ret, ed); \ - wback = 0; \ - } else { \ - SMREAD(); \ - addr = geted(dyn, addr, ninst, nextop, &wback, (hint==x2)?x1:x2, (hint==x1)?x1:x3, &fixedaddress, rex, NULL, 0, D); \ - ed = ret; \ - LDxw(ed, wback, fixedaddress); \ - } +#define GETED32(D) \ + if (MODREG) { \ + ed = xRAX + (nextop & 7) + (rex.b << 3); \ + wback = 0; \ + } else { \ + SMREAD() \ + addr = geted32(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, D); \ + LDxw(x1, wback, fixedaddress); \ + ed = x1; \ + } +// GETEDH can use hint for ed, and x1 or x2 for wback (depending on hint), might also use x3. wback is 0 if ed is xEAX..xEDI +#define GETEDH(hint, D) \ + if (MODREG) { \ + ed = xRAX + (nextop & 7) + (rex.b << 3); \ + wback = 0; \ + } else { \ + SMREAD(); \ + addr = geted(dyn, addr, ninst, nextop, &wback, (hint == x2) ? x1 : x2, (hint == x1) ? x1 : x3, &fixedaddress, rex, NULL, 1, D); \ + LDxw(hint, wback, fixedaddress); \ + ed = hint; \ + } +// GETEDW can use hint for wback and ret for ed. wback is 0 if ed is xEAX..xEDI +#define GETEDW(hint, ret, D) \ + if (MODREG) { \ + ed = xRAX + (nextop & 7) + (rex.b << 3); \ + MV(ret, ed); \ + wback = 0; \ + } else { \ + SMREAD(); \ + addr = geted(dyn, addr, ninst, nextop, &wback, (hint == x2) ? x1 : x2, (hint == x1) ? x1 : x3, &fixedaddress, rex, NULL, 0, D); \ + ed = ret; \ + LDxw(ed, wback, fixedaddress); \ + } // GETGW extract x64 register in gd, that is i -#define GETGW(i) gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3); ZEXTH(i, gd); gd = i; -//GETEWW will use i for ed, and can use w for wback. -#define GETEWW(w, i, D) if(MODREG) { \ - wback = xRAX+(nextop&7)+(rex.b<<3);\ - ZEXTH(i, wback); \ - ed = i; \ - wb1 = 0; \ - } else { \ - SMREAD(); \ - addr = geted(dyn, addr, ninst, nextop, &wback, w, i, &fixedaddress, rex, NULL, 1, D); \ - LHU(i, wback, fixedaddress);\ - ed = i; \ - wb1 = 1; \ - } -//GETEW will use i for ed, and can use r3 for wback. +#define GETGW(i) \ + gd = xRAX + ((nextop & 0x38) >> 3) + (rex.r << 3); \ + ZEXTH(i, gd); \ + gd = i; +// GETEWW will use i for ed, and can use w for wback. +#define GETEWW(w, i, D) \ + if (MODREG) { \ + wback = xRAX + (nextop & 7) + (rex.b << 3); \ + ZEXTH(i, wback); \ + ed = i; \ + wb1 = 0; \ + } else { \ + SMREAD(); \ + addr = geted(dyn, addr, ninst, nextop, &wback, w, i, &fixedaddress, rex, NULL, 1, D); \ + LHU(i, wback, fixedaddress); \ + ed = i; \ + wb1 = 1; \ + } +// GETEW will use i for ed, and can use r3 for wback. #define GETEW(i, D) GETEWW(x3, i, D) -//GETSEW will use i for ed, and can use r3 for wback. This is the Signed version -#define GETSEW(i, D) if(MODREG) { \ - wback = xRAX+(nextop&7)+(rex.b<<3);\ - if(rv64_zbb) SEXTH(i, wback); else {SLLI(i, wback, 48); SRAI(i, i, 48);}\ - ed = i; \ - wb1 = 0; \ - } else { \ - SMREAD(); \ - addr = geted(dyn, addr, ninst, nextop, &wback, x3, i, &fixedaddress, rex, NULL, 1, D); \ - LH(i, wback, fixedaddress); \ - ed = i; \ - wb1 = 1; \ - } +// GETSEW will use i for ed, and can use r3 for wback. This is the Signed version +#define GETSEW(i, D) \ + if (MODREG) { \ + wback = xRAX + (nextop & 7) + (rex.b << 3); \ + if (rv64_zbb) \ + SEXTH(i, wback); \ + else { \ + SLLI(i, wback, 48); \ + SRAI(i, i, 48); \ + } \ + ed = i; \ + wb1 = 0; \ + } else { \ + SMREAD(); \ + addr = geted(dyn, addr, ninst, nextop, &wback, x3, i, &fixedaddress, rex, NULL, 1, D); \ + LH(i, wback, fixedaddress); \ + ed = i; \ + wb1 = 1; \ + } // Write ed back to original register / memory -#define EWBACK EWBACKW(ed) +#define EWBACK EWBACKW(ed) // Write w back to original register / memory (w needs to be 16bits only!) -#define EWBACKW(w) if(wb1) {SH(w, wback, fixedaddress); SMWRITE();} else {SRLI(wback, wback, 16); SLLI(wback, wback, 16); OR(wback, wback, w);} +#define EWBACKW(w) \ + if (wb1) { \ + SH(w, wback, fixedaddress); \ + SMWRITE(); \ + } else { \ + SRLI(wback, wback, 16); \ + SLLI(wback, wback, 16); \ + OR(wback, wback, w); \ + } // Write back gd in correct register (gd needs to be 16bits only!) -#define GWBACK do{int g=xRAX+((nextop&0x38)>>3)+(rex.r<<3); SRLI(g, g, 16); SLLI(g, g, 16); OR(g, g, gd);}while(0) - -//GETEDO can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI -#define GETEDO(O, D, S) if(MODREG) { \ - ed = xRAX+(nextop&7)+(rex.b<<3); \ - wback = 0; \ - } else { \ - SMREAD(); \ - addr = geted(dyn, addr, ninst, nextop, &wback, x2, S, &fixedaddress, rex, NULL, 1, D); \ - ADD(S, wback, O); \ - LDxw(x1, S, fixedaddress); \ - ed = x1; \ - } -#define WBACKO(O) if(wback) {ADD(O, wback, O); SDxw(ed, O, 0); SMWRITE2();} +#define GWBACK \ + do { \ + int g = xRAX + ((nextop & 0x38) >> 3) + (rex.r << 3); \ + SRLI(g, g, 16); \ + SLLI(g, g, 16); \ + OR(g, g, gd); \ + } while (0) + +// GETEDO can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI +#define GETEDO(O, D, S) \ + if (MODREG) { \ + ed = xRAX + (nextop & 7) + (rex.b << 3); \ + wback = 0; \ + } else { \ + SMREAD(); \ + addr = geted(dyn, addr, ninst, nextop, &wback, x2, S, &fixedaddress, rex, NULL, 1, D); \ + ADD(S, wback, O); \ + LDxw(x1, S, fixedaddress); \ + ed = x1; \ + } +#define WBACKO(O) \ + if (wback) { \ + ADD(O, wback, O); \ + SDxw(ed, O, 0); \ + SMWRITE2(); \ + } // FAKEED like GETED, but doesn't get anything -#define FAKEED if(!MODREG) { \ - addr = fakeed(dyn, addr, ninst, nextop); \ - } +#define FAKEED \ + if (!MODREG) { \ + addr = fakeed(dyn, addr, ninst, nextop); \ + } // GETGW extract x64 register in gd, that is i, Signed extented -#define GETSGW(i) gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3); SLLIW(i, gd, 16); SRAIW(i, i, 16); gd = i; +#define GETSGW(i) \ + gd = xRAX + ((nextop & 0x38) >> 3) + (rex.r << 3); \ + SLLIW(i, gd, 16); \ + SRAIW(i, i, 16); \ + gd = i; // Write back ed in wback (if wback not 0) -#define WBACK if(wback) {SDxw(ed, wback, fixedaddress); SMWRITE();} +#define WBACK \ + if (wback) { \ + SDxw(ed, wback, fixedaddress); \ + SMWRITE(); \ + } // GETEB will use i for ed, and can use r3 for wback. -#define GETEB(i, D) if(MODREG) { \ - if(rex.rex) { \ - wback = xRAX+(nextop&7)+(rex.b<<3); \ - wb2 = 0; \ - } else { \ - wback = (nextop&7); \ - wb2 = (wback>>2)*8; \ - wback = xRAX+(wback&3); \ - } \ - if (wb2) { \ - if (rv64_xtheadbb) { \ - TH_EXTU(i, wback, 15, 8); \ - } else { \ - MV(i, wback); \ - SRLI(i, i, wb2); \ - ANDI(i, i, 0xff); \ - } \ - } else ANDI(i, wback, 0xff);\ - wb1 = 0; \ - ed = i; \ - } else { \ - SMREAD(); \ - addr = geted(dyn, addr, ninst, nextop, &wback, x3, x2, &fixedaddress, rex, NULL, 1, D); \ - LBU(i, wback, fixedaddress);\ - wb1 = 1; \ - ed = i; \ - } -//GETEBO will use i for ed, i is also Offset, and can use r3 for wback. -#define GETEBO(i, D) if(MODREG) { \ - if(rex.rex) { \ - wback = xRAX+(nextop&7)+(rex.b<<3); \ - wb2 = 0; \ - } else { \ - wback = (nextop&7); \ - wb2 = (wback>>2)*8; \ - wback = xRAX+(wback&3); \ - } \ - if (wb2) { \ - if (rv64_xtheadbb) { \ - TH_EXTU(i, wback, 15, 8); \ - } else { \ - MV(i, wback); \ - SRLI(i, i, wb2); \ - ANDI(i, i, 0xff); \ - } \ - } else ANDI(i, wback, 0xff);\ - wb1 = 0; \ - ed = i; \ - } else { \ - SMREAD(); \ - addr = geted(dyn, addr, ninst, nextop, &wback, x3, x2, &fixedaddress, rex, NULL, 1, D); \ - ADD(x3, wback, i); \ - if(wback!=x3) wback = x3; \ - LBU(i, wback, fixedaddress);\ - wb1 = 1; \ - ed = i; \ - } -//GETSEB sign extend EB, will use i for ed, and can use r3 for wback. -#define GETSEB(i, D) if(MODREG) { \ - if(rex.rex) { \ - wback = xRAX+(nextop&7)+(rex.b<<3); \ - wb2 = 0; \ - } else { \ - wback = (nextop&7); \ - wb2 = (wback>>2)*8; \ - wback = xRAX+(wback&3); \ - } \ - MV(i, wback); \ - SLLIW(i, i, 24-wb2); \ - SRAIW(i, i, 24); \ - wb1 = 0; \ - ed = i; \ - } else { \ - SMREAD(); \ - addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, D); \ - LB(i, wback, fixedaddress); \ - wb1 = 1; \ - ed = i; \ - } +#define GETEB(i, D) \ + if (MODREG) { \ + if (rex.rex) { \ + wback = xRAX + (nextop & 7) + (rex.b << 3); \ + wb2 = 0; \ + } else { \ + wback = (nextop & 7); \ + wb2 = (wback >> 2) * 8; \ + wback = xRAX + (wback & 3); \ + } \ + if (wb2) { \ + if (rv64_xtheadbb) { \ + TH_EXTU(i, wback, 15, 8); \ + } else { \ + MV(i, wback); \ + SRLI(i, i, wb2); \ + ANDI(i, i, 0xff); \ + } \ + } else \ + ANDI(i, wback, 0xff); \ + wb1 = 0; \ + ed = i; \ + } else { \ + SMREAD(); \ + addr = geted(dyn, addr, ninst, nextop, &wback, x3, x2, &fixedaddress, rex, NULL, 1, D); \ + LBU(i, wback, fixedaddress); \ + wb1 = 1; \ + ed = i; \ + } +// GETEBO will use i for ed, i is also Offset, and can use r3 for wback. +#define GETEBO(i, D) \ + if (MODREG) { \ + if (rex.rex) { \ + wback = xRAX + (nextop & 7) + (rex.b << 3); \ + wb2 = 0; \ + } else { \ + wback = (nextop & 7); \ + wb2 = (wback >> 2) * 8; \ + wback = xRAX + (wback & 3); \ + } \ + if (wb2) { \ + if (rv64_xtheadbb) { \ + TH_EXTU(i, wback, 15, 8); \ + } else { \ + MV(i, wback); \ + SRLI(i, i, wb2); \ + ANDI(i, i, 0xff); \ + } \ + } else \ + ANDI(i, wback, 0xff); \ + wb1 = 0; \ + ed = i; \ + } else { \ + SMREAD(); \ + addr = geted(dyn, addr, ninst, nextop, &wback, x3, x2, &fixedaddress, rex, NULL, 1, D); \ + ADD(x3, wback, i); \ + if (wback != x3) wback = x3; \ + LBU(i, wback, fixedaddress); \ + wb1 = 1; \ + ed = i; \ + } +// GETSEB sign extend EB, will use i for ed, and can use r3 for wback. +#define GETSEB(i, D) \ + if (MODREG) { \ + if (rex.rex) { \ + wback = xRAX + (nextop & 7) + (rex.b << 3); \ + wb2 = 0; \ + } else { \ + wback = (nextop & 7); \ + wb2 = (wback >> 2) * 8; \ + wback = xRAX + (wback & 3); \ + } \ + MV(i, wback); \ + SLLIW(i, i, 24 - wb2); \ + SRAIW(i, i, 24); \ + wb1 = 0; \ + ed = i; \ + } else { \ + SMREAD(); \ + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, D); \ + LB(i, wback, fixedaddress); \ + wb1 = 1; \ + ed = i; \ + } // GETEB32 will use i for ed, and can use r3 for wback. -#define GETEB32(i, D) if(MODREG) { \ - if(rex.rex) { \ - wback = xRAX+(nextop&7)+(rex.b<<3); \ - wb2 = 0; \ - } else { \ - wback = (nextop&7); \ - wb2 = (wback>>2)*8; \ - wback = xRAX+(wback&3); \ - } \ - if (wb2) { \ - if (rv64_xtheadbb) { \ - TH_EXTU(i, wback, 15, 8); \ - } else { \ - MV(i, wback); \ - SRLI(i, i, wb2); \ - ANDI(i, i, 0xff); \ - } \ - } else ANDI(i, wback, 0xff);\ - wb1 = 0; \ - ed = i; \ - } else { \ - SMREAD(); \ - addr = geted32(dyn, addr, ninst, nextop, &wback, x3, x2, &fixedaddress, rex, NULL, 1, D); \ - LBU(i, wback, fixedaddress);\ - wb1 = 1; \ - ed = i; \ - } - -//GETGB will use i for gd -#define GETGB(i) if(rex.rex) { \ - gb1 = xRAX+((nextop&0x38)>>3)+(rex.r<<3); \ - gb2 = 0; \ - } else { \ - gd = (nextop&0x38)>>3; \ - gb2 = ((gd&4)>>2); \ - gb1 = xRAX+(gd&3); \ - } \ - gd = i; \ - if (gb2) { \ - if (rv64_xtheadbb) { \ - TH_EXTU(gd, gb1, 15, 8);\ - } else { \ - MV(gd, gb1); \ - SRLI(gd, gd, 8); \ - ANDI(gd, gd, 0xff); \ - } \ - } else ANDI(gd, gb1, 0xff); +#define GETEB32(i, D) \ + if (MODREG) { \ + if (rex.rex) { \ + wback = xRAX + (nextop & 7) + (rex.b << 3); \ + wb2 = 0; \ + } else { \ + wback = (nextop & 7); \ + wb2 = (wback >> 2) * 8; \ + wback = xRAX + (wback & 3); \ + } \ + if (wb2) { \ + if (rv64_xtheadbb) { \ + TH_EXTU(i, wback, 15, 8); \ + } else { \ + MV(i, wback); \ + SRLI(i, i, wb2); \ + ANDI(i, i, 0xff); \ + } \ + } else \ + ANDI(i, wback, 0xff); \ + wb1 = 0; \ + ed = i; \ + } else { \ + SMREAD(); \ + addr = geted32(dyn, addr, ninst, nextop, &wback, x3, x2, &fixedaddress, rex, NULL, 1, D); \ + LBU(i, wback, fixedaddress); \ + wb1 = 1; \ + ed = i; \ + } + +// GETGB will use i for gd +#define GETGB(i) \ + if (rex.rex) { \ + gb1 = xRAX + ((nextop & 0x38) >> 3) + (rex.r << 3); \ + gb2 = 0; \ + } else { \ + gd = (nextop & 0x38) >> 3; \ + gb2 = ((gd & 4) >> 2); \ + gb1 = xRAX + (gd & 3); \ + } \ + gd = i; \ + if (gb2) { \ + if (rv64_xtheadbb) { \ + TH_EXTU(gd, gb1, 15, 8); \ + } else { \ + MV(gd, gb1); \ + SRLI(gd, gd, 8); \ + ANDI(gd, gd, 0xff); \ + } \ + } else \ + ANDI(gd, gb1, 0xff); // Write gb (gd) back to original register / memory, using s1 as scratch -#define GBBACK(s1) if(gb2) { \ - MOV64x(s1, 0xffffffffffff00ffLL); \ - AND(gb1, gb1, s1); \ - SLLI(s1, gd, 8); \ - OR(gb1, gb1, s1); \ - } else { \ - ANDI(gb1, gb1, ~0xff); \ - OR(gb1, gb1, gd); \ - } +#define GBBACK(s1) \ + if (gb2) { \ + MOV64x(s1, 0xffffffffffff00ffLL); \ + AND(gb1, gb1, s1); \ + SLLI(s1, gd, 8); \ + OR(gb1, gb1, s1); \ + } else { \ + ANDI(gb1, gb1, ~0xff); \ + OR(gb1, gb1, gd); \ + } // Write eb (ed) back to original register / memory, using s1 as scratch -#define EBBACK(s1, c) if(wb1) { \ - SB(ed, wback, fixedaddress); \ - SMWRITE(); \ - } else if(wb2) { \ - MOV64x(s1, 0xffffffffffff00ffLL); \ - AND(wback, wback, s1); \ - if (c) {ANDI(ed, ed, 0xff);} \ - SLLI(s1, ed, 8); \ - OR(wback, wback, s1); \ - } else { \ - ANDI(wback, wback, ~0xff); \ - if (c) {ANDI(ed, ed, 0xff);} \ - OR(wback, wback, ed); \ - } +#define EBBACK(s1, c) \ + if (wb1) { \ + SB(ed, wback, fixedaddress); \ + SMWRITE(); \ + } else if (wb2) { \ + MOV64x(s1, 0xffffffffffff00ffLL); \ + AND(wback, wback, s1); \ + if (c) { ANDI(ed, ed, 0xff); } \ + SLLI(s1, ed, 8); \ + OR(wback, wback, s1); \ + } else { \ + ANDI(wback, wback, ~0xff); \ + if (c) { ANDI(ed, ed, 0xff); } \ + OR(wback, wback, ed); \ + } // Get direction with size Z and based of F_DF flag, on register r ready for load/store fetching // using s as scratch. -#define GETDIR(r, s, Z) \ - MOV32w(r, Z); /* mask=1<<10 */ \ - ANDI(s, xFlags, 1<<F_DF); \ - BEQZ(s, 8); \ - SUB(r, xZR, r); \ +#define GETDIR(r, s, Z) \ + MOV32w(r, Z); /* mask=1<<10 */ \ + ANDI(s, xFlags, 1 << F_DF); \ + BEQZ(s, 8); \ + SUB(r, xZR, r); // Generic get GD, but reg value in gd (R_RAX is not added) -#define GETG gd = ((nextop&0x38)>>3)+(rex.r<<3) +#define GETG gd = ((nextop & 0x38) >> 3) + (rex.r << 3) // Get GX as a Single (might use x2) -#define GETGXSS(a) \ - gd = ((nextop&0x38)>>3)+(rex.r<<3); \ +#define GETGXSS(a) \ + gd = ((nextop & 0x38) >> 3) + (rex.r << 3); \ a = sse_get_reg(dyn, ninst, x2, gd, 1) // Get GX as a Single (might use x2), no fetching old value -#define GETGXSS_empty(a) \ - gd = ((nextop&0x38)>>3)+(rex.r<<3); \ +#define GETGXSS_empty(a) \ + gd = ((nextop & 0x38) >> 3) + (rex.r << 3); \ a = sse_get_reg_empty(dyn, ninst, x2, gd, 1) // Get GX as a Double (might use x2) -#define GETGXSD(a) \ - gd = ((nextop&0x38)>>3)+(rex.r<<3); \ +#define GETGXSD(a) \ + gd = ((nextop & 0x38) >> 3) + (rex.r << 3); \ a = sse_get_reg(dyn, ninst, x2, gd, 0) // Get GX as a Double (might use x2), no fetching old value -#define GETGXSD_empty(a) \ - gd = ((nextop&0x38)>>3)+(rex.r<<3); \ +#define GETGXSD_empty(a) \ + gd = ((nextop & 0x38) >> 3) + (rex.r << 3); \ a = sse_get_reg_empty(dyn, ninst, x2, gd, 0) // Get Ex as a single, not a quad (warning, x1 get used, x2 might too) -#define GETEXSS(a, D) \ - if(MODREG) { \ - a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 1); \ - } else { \ - SMREAD(); \ - a = fpu_get_scratch(dyn); \ - addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, D); \ - FLW(a, ed, fixedaddress); \ +#define GETEXSS(a, D) \ + if (MODREG) { \ + a = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 1); \ + } else { \ + SMREAD(); \ + a = fpu_get_scratch(dyn); \ + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, D); \ + FLW(a, ed, fixedaddress); \ } // Get Ex as a double, not a quad (warning, x1 get used, x2 might too) -#define GETEXSD(a, D) \ - if(MODREG) { \ - a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0); \ - } else { \ - SMREAD(); \ - a = fpu_get_scratch(dyn); \ - addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, D); \ - FLD(a, ed, fixedaddress); \ +#define GETEXSD(a, D) \ + if (MODREG) { \ + a = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0); \ + } else { \ + SMREAD(); \ + a = fpu_get_scratch(dyn); \ + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, D); \ + FLD(a, ed, fixedaddress); \ } // Will get pointer to GX in general register a, will purge SS or SD if loaded. can use gback as load address -#define GETGX() \ - gd = ((nextop&0x38)>>3)+(rex.r<<3); \ - sse_forget_reg(dyn, ninst, gd); \ - gback = xEmu; \ +#define GETGX() \ + gd = ((nextop & 0x38) >> 3) + (rex.r << 3); \ + sse_forget_reg(dyn, ninst, gd); \ + gback = xEmu; \ gdoffset = offsetof(x64emu_t, xmm[gd]) // Get Ex address in general register a, will purge SS or SD if it's reg and is loaded. May use x3. Use wback as load address! -#define GETEX(a, D) \ - if(MODREG) { \ - ed = (nextop&7)+(rex.b<<3); \ - sse_forget_reg(dyn, ninst, ed); \ - fixedaddress = offsetof(x64emu_t, xmm[ed]); \ - wback = xEmu; \ - } else { \ - SMREAD(); \ - ed=16; \ - addr = geted(dyn, addr, ninst, nextop, &wback, a, x3, &fixedaddress, rex, NULL, 1, D); \ +#define GETEX(a, D) \ + if (MODREG) { \ + ed = (nextop & 7) + (rex.b << 3); \ + sse_forget_reg(dyn, ninst, ed); \ + fixedaddress = offsetof(x64emu_t, xmm[ed]); \ + wback = xEmu; \ + } else { \ + SMREAD(); \ + ed = 16; \ + addr = geted(dyn, addr, ninst, nextop, &wback, a, x3, &fixedaddress, rex, NULL, 1, D); \ } -#define GETGM() \ - gd = ((nextop&0x38)>>3); \ - mmx_forget_reg(dyn, ninst, gd); \ - gback = xEmu; \ +#define GETGM() \ + gd = ((nextop & 0x38) >> 3); \ + mmx_forget_reg(dyn, ninst, gd); \ + gback = xEmu; \ gdoffset = offsetof(x64emu_t, mmx[gd]) // Get EM, might use x3 -#define GETEM(a, D) \ - if(MODREG) { \ - ed = (nextop&7); \ - mmx_forget_reg(dyn, ninst, ed); \ - fixedaddress = offsetof(x64emu_t, mmx[ed]); \ - wback = xEmu; \ - } else { \ - SMREAD(); \ - ed=8; \ - addr = geted(dyn, addr, ninst, nextop, &wback, a, x3, &fixedaddress, rex, NULL, 1, D); \ +#define GETEM(a, D) \ + if (MODREG) { \ + ed = (nextop & 7); \ + mmx_forget_reg(dyn, ninst, ed); \ + fixedaddress = offsetof(x64emu_t, mmx[ed]); \ + wback = xEmu; \ + } else { \ + SMREAD(); \ + ed = 8; \ + addr = geted(dyn, addr, ninst, nextop, &wback, a, x3, &fixedaddress, rex, NULL, 1, D); \ } -#define SSE_LOOP_D_ITEM(GX1, EX1, F, i) \ - LWU(GX1, gback, gdoffset+i*4); \ - LWU(EX1, wback, fixedaddress+i*4); \ - F; \ - SW(GX1, gback, gdoffset+i*4); +#define SSE_LOOP_D_ITEM(GX1, EX1, F, i) \ + LWU(GX1, gback, gdoffset + i * 4); \ + LWU(EX1, wback, fixedaddress + i * 4); \ + F; \ + SW(GX1, gback, gdoffset + i * 4); // Loop for SSE opcode that use 32bits value and write to GX. #define SSE_LOOP_D(GX1, EX1, F) \ @@ -461,11 +533,11 @@ SSE_LOOP_D_ITEM(GX1, EX1, F, 2) \ SSE_LOOP_D_ITEM(GX1, EX1, F, 3) -#define SSE_LOOP_DS_ITEM(GX1, EX1, F, i) \ - LW(GX1, gback, gdoffset+i*4); \ - LW(EX1, wback, fixedaddress+i*4); \ - F; \ - SW(GX1, gback, gdoffset+i*4); +#define SSE_LOOP_DS_ITEM(GX1, EX1, F, i) \ + LW(GX1, gback, gdoffset + i * 4); \ + LW(EX1, wback, fixedaddress + i * 4); \ + F; \ + SW(GX1, gback, gdoffset + i * 4); // Loop for SSE opcode that use 32bits value and write to GX. #define SSE_LOOP_DS(GX1, EX1, F) \ @@ -474,34 +546,34 @@ SSE_LOOP_DS_ITEM(GX1, EX1, F, 2) \ SSE_LOOP_DS_ITEM(GX1, EX1, F, 3) -#define MMX_LOOP_W(GX1, EX1, F) \ - for (int i=0; i<4; ++i) { \ - LHU(GX1, gback, gdoffset+i*2); \ - LHU(EX1, wback, fixedaddress+i*2); \ - F; \ - SH(GX1, gback, gdoffset+i*2); \ +#define MMX_LOOP_W(GX1, EX1, F) \ + for (int i = 0; i < 4; ++i) { \ + LHU(GX1, gback, gdoffset + i * 2); \ + LHU(EX1, wback, fixedaddress + i * 2); \ + F; \ + SH(GX1, gback, gdoffset + i * 2); \ } -#define SSE_LOOP_W(GX1, EX1, F) \ - for (int i=0; i<8; ++i) { \ - LHU(GX1, gback, gdoffset+i*2); \ - LHU(EX1, wback, fixedaddress+i*2); \ - F; \ - SH(GX1, gback, gdoffset+i*2); \ +#define SSE_LOOP_W(GX1, EX1, F) \ + for (int i = 0; i < 8; ++i) { \ + LHU(GX1, gback, gdoffset + i * 2); \ + LHU(EX1, wback, fixedaddress + i * 2); \ + F; \ + SH(GX1, gback, gdoffset + i * 2); \ } -#define SSE_LOOP_WS(GX1, EX1, F) \ - for (int i=0; i<8; ++i) { \ - LH(GX1, gback, gdoffset+i*2); \ - LH(EX1, wback, fixedaddress+i*2); \ - F; \ - SH(GX1, gback, gdoffset+i*2); \ +#define SSE_LOOP_WS(GX1, EX1, F) \ + for (int i = 0; i < 8; ++i) { \ + LH(GX1, gback, gdoffset + i * 2); \ + LH(EX1, wback, fixedaddress + i * 2); \ + F; \ + SH(GX1, gback, gdoffset + i * 2); \ } -#define SSE_LOOP_D_S_ITEM(EX1, F, i) \ - LWU(EX1, wback, fixedaddress+i*4); \ - F; \ - SW(EX1, wback, fixedaddress+i*4); +#define SSE_LOOP_D_S_ITEM(EX1, F, i) \ + LWU(EX1, wback, fixedaddress + i * 4); \ + F; \ + SW(EX1, wback, fixedaddress + i * 4); // Loop for SSE opcode that use 32bits value and write to EX. #define SSE_LOOP_D_S(EX1, F) \ @@ -510,11 +582,11 @@ SSE_LOOP_D_S_ITEM(EX1, F, 2) \ SSE_LOOP_D_S_ITEM(EX1, F, 3) -#define SSE_LOOP_Q_ITEM(GX1, EX1, F, i) \ - LD(GX1, gback, gdoffset+i*8); \ - LD(EX1, wback, fixedaddress+i*8); \ - F; \ - SD(GX1, gback, gdoffset+i*8); +#define SSE_LOOP_Q_ITEM(GX1, EX1, F, i) \ + LD(GX1, gback, gdoffset + i * 8); \ + LD(EX1, wback, fixedaddress + i * 8); \ + F; \ + SD(GX1, gback, gdoffset + i * 8); // Loop for SSE opcode that use 64bits value and write to GX. #define SSE_LOOP_Q(GX1, EX1, F) \ @@ -522,11 +594,11 @@ SSE_LOOP_Q_ITEM(GX1, EX1, F, 1) -#define SSE_LOOP_FQ_ITEM(GX1, EX1, F, i) \ - FLD(v0, gback, gdoffset+i*8); \ - FLD(v1, wback, fixedaddress+i*8); \ - F; \ - FSD(v0, gback, gdoffset+i*8); +#define SSE_LOOP_FQ_ITEM(GX1, EX1, F, i) \ + FLD(v0, gback, gdoffset + i * 8); \ + FLD(v1, wback, fixedaddress + i * 8); \ + F; \ + FSD(v0, gback, gdoffset + i * 8); #define SSE_LOOP_FQ(GX1, EX1, F) \ v0 = fpu_get_scratch(dyn); \ @@ -535,18 +607,18 @@ SSE_LOOP_FQ_ITEM(GX1, EX1, F, 1) -#define SSE_LOOP_MV_Q_ITEM(s, i) \ - LD(s, wback, fixedaddress+i*8); \ - SD(s, gback, gdoffset+i*8); +#define SSE_LOOP_MV_Q_ITEM(s, i) \ + LD(s, wback, fixedaddress + i * 8); \ + SD(s, gback, gdoffset + i * 8); // Loop for SSE opcode that moves 64bits value from wback to gback, use s as scratch. #define SSE_LOOP_MV_Q(s) \ SSE_LOOP_MV_Q_ITEM(s, 0) \ SSE_LOOP_MV_Q_ITEM(s, 1) -#define SSE_LOOP_MV_Q_ITEM2(s, i) \ - LD(s, gback, gdoffset+i*8); \ - SD(s, wback, fixedaddress+i*8); +#define SSE_LOOP_MV_Q_ITEM2(s, i) \ + LD(s, gback, gdoffset + i * 8); \ + SD(s, wback, fixedaddress + i * 8); // Loop for SSE opcode that moves 64bits value from gback to wback, use s as scratch. #define SSE_LOOP_MV_Q2(s) \ @@ -563,7 +635,7 @@ // R0 will not be pushed/popd if ret is -2. Flags are not save/restored #define CALL_S(F, ret) call_c(dyn, ninst, F, x6, ret, 0, 0) -#define MARKi(i) dyn->insts[ninst].mark[i] = dyn->native_size +#define MARKi(i) dyn->insts[ninst].mark[i] = dyn->native_size #define GETMARKi(i) dyn->insts[ninst].mark[i] #define MARK MARKi(0) #define GETMARK GETMARKi(0) @@ -572,57 +644,57 @@ #define MARK3 MARKi(2) #define GETMARK3 GETMARKi(2) -#define MARKFi(i) dyn->insts[ninst].markf[i] = dyn->native_size +#define MARKFi(i) dyn->insts[ninst].markf[i] = dyn->native_size #define GETMARKFi(i) dyn->insts[ninst].markf[i] -#define MARKF MARKFi(0) -#define GETMARKF GETMARKFi(0) -#define MARKF2 MARKFi(1) -#define GETMARKF2 GETMARKFi(1) +#define MARKF MARKFi(0) +#define GETMARKF GETMARKFi(0) +#define MARKF2 MARKFi(1) +#define GETMARKF2 GETMARKFi(1) #define MARKSEG dyn->insts[ninst].markseg = dyn->native_size #define GETMARKSEG dyn->insts[ninst].markseg #define MARKLOCK dyn->insts[ninst].marklock = dyn->native_size #define GETMARKLOCK dyn->insts[ninst].marklock -#define Bxx_gen(OP, M, reg1, reg2) \ - j64 = GET##M - dyn->native_size; \ - B##OP (reg1, reg2, j64) +#define Bxx_gen(OP, M, reg1, reg2) \ + j64 = GET##M - dyn->native_size; \ + B##OP(reg1, reg2, j64) -#define Bxx_geni(OP, M, reg1, reg2, i) \ - j64 = GET##M##i(i) - dyn->native_size; \ - B##OP (reg1, reg2, j64) +#define Bxx_geni(OP, M, reg1, reg2, i) \ + j64 = GET##M##i(i) - dyn->native_size; \ + B##OP(reg1, reg2, j64) // Branch to MARK if reg1==reg2 (use j64) -#define BEQ_MARK(reg1, reg2) Bxx_gen(EQ, MARK, reg1, reg2) +#define BEQ_MARK(reg1, reg2) Bxx_gen(EQ, MARK, reg1, reg2) #define BEQ_MARKi(reg1, reg2, i) Bxx_geni(EQ, MARK, reg1, reg2, i) // Branch to MARK if reg1!=reg2 (use j64) -#define BNE_MARK(reg1, reg2) Bxx_gen(NE, MARK, reg1, reg2) +#define BNE_MARK(reg1, reg2) Bxx_gen(NE, MARK, reg1, reg2) #define BNE_MARKi(reg1, reg2, i) Bxx_geni(NE, MARK, reg1, reg2, i) // Branch to MARK if reg1!=0 (use j64) -#define BNEZ_MARK(reg) BNE_MARK(reg, xZR) +#define BNEZ_MARK(reg) BNE_MARK(reg, xZR) #define BNEZ_MARKi(reg, i) BNE_MARKi(reg, xZR, i) // Branch to MARK instruction unconditionnal (use j64) -#define B_MARK_nocond Bxx_gen(__, MARK, 0, 0) -#define B_MARKi_nocond Bxx_geni(__, MARK, 0, 0, i) +#define B_MARK_nocond Bxx_gen(__, MARK, 0, 0) +#define B_MARKi_nocond Bxx_geni(__, MARK, 0, 0, i) // Branch to MARK if reg1<reg2 (use j64) -#define BLT_MARK(reg1, reg2) Bxx_gen(LT, MARK, reg1, reg2) +#define BLT_MARK(reg1, reg2) Bxx_gen(LT, MARK, reg1, reg2) #define BLT_MARKi(reg1, reg2) Bxx_geni(LT, MARK, reg1, reg2, i) // Branch to MARK if reg1<reg2 (use j64) -#define BLTU_MARK(reg1, reg2) Bxx_gen(LTU, MARK, reg1, reg2) +#define BLTU_MARK(reg1, reg2) Bxx_gen(LTU, MARK, reg1, reg2) #define BLTU_MARKi(reg1, reg2) Bxx_geni(LTU, MARK, reg1, reg2, i) // Branch to MARK if reg1>=reg2 (use j64) -#define BGE_MARK(reg1, reg2) Bxx_gen(GE, MARK, reg1, reg2) +#define BGE_MARK(reg1, reg2) Bxx_gen(GE, MARK, reg1, reg2) #define BGE_MARKi(reg1, reg2) Bxx_geni(GE, MARK, reg1, reg2, i) // Branch to MARK2 if reg1==reg2 (use j64) -#define BEQ_MARK2(reg1, reg2) Bxx_gen(EQ, MARK2, reg1,reg2) +#define BEQ_MARK2(reg1, reg2) Bxx_gen(EQ, MARK2, reg1, reg2) // Branch to MARK2 if reg1!=reg2 (use j64) -#define BNE_MARK2(reg1, reg2) Bxx_gen(NE, MARK2, reg1,reg2) +#define BNE_MARK2(reg1, reg2) Bxx_gen(NE, MARK2, reg1, reg2) // Branch to MARK2 if reg1!=0 (use j64) #define BNEZ_MARK2(reg) BNE_MARK2(reg, xZR) // Branch to MARK2 if reg1<reg2 (use j64) -#define BLT_MARK2(reg1, reg2) Bxx_gen(LT, MARK2, reg1,reg2) +#define BLT_MARK2(reg1, reg2) Bxx_gen(LT, MARK2, reg1, reg2) // Branch to MARK2 instruction unconditionnal (use j64) -#define B_MARK2_nocond Bxx_gen(__, MARK2, 0, 0) +#define B_MARK2_nocond Bxx_gen(__, MARK2, 0, 0) // Branch to MARK3 if reg1==reg2 (use j64) #define BEQ_MARK3(reg1, reg2) Bxx_gen(EQ, MARK3, reg1, reg2) // Branch to MARK3 if reg1!=reg2 (use j64) @@ -630,124 +702,157 @@ // Branch to MARK3 if reg1!=0 (use j64) #define BNEZ_MARK3(reg) BNE_MARK3(reg, xZR) // Branch to MARK3 instruction unconditionnal (use j64) -#define B_MARK3_nocond Bxx_gen(__, MARK3, 0, 0) +#define B_MARK3_nocond Bxx_gen(__, MARK3, 0, 0) // Branch to MARKLOCK if reg1!=reg2 (use j64) #define BNE_MARKLOCK(reg1, reg2) Bxx_gen(NE, MARKLOCK, reg1, reg2) // Branch to MARKLOCK if reg1!=0 (use j64) #define BNEZ_MARKLOCK(reg) BNE_MARKLOCK(reg, xZR) // Branch to NEXT if reg1==reg2 (use j64) -#define BEQ_NEXT(reg1, reg2) \ - j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; \ +#define BEQ_NEXT(reg1, reg2) \ + j64 = (dyn->insts) ? (dyn->insts[ninst].epilog - (dyn->native_size)) : 0; \ BEQ(reg1, reg2, j64) // Branch to NEXT if reg1==0 (use j64) -#define CBZ_NEXT(reg1) \ - j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; \ +#define CBZ_NEXT(reg1) \ + j64 = (dyn->insts) ? (dyn->insts[ninst].epilog - (dyn->native_size)) : 0; \ BEQ(reg1, xZR, j64) // Branch to NEXT if reg1!=0 (use j64) -#define CBNZ_NEXT(reg1) \ - j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; \ +#define CBNZ_NEXT(reg1) \ + j64 = (dyn->insts) ? (dyn->insts[ninst].epilog - (dyn->native_size)) : 0; \ BNE(reg1, xZR, j64) // Branch to next instruction unconditionnal (use j64) -#define B_NEXT_nocond \ - j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0;\ +#define B_NEXT_nocond \ + j64 = (dyn->insts) ? (dyn->insts[ninst].epilog - (dyn->native_size)) : 0; \ B(j64) // Branch to MARKSEG if reg is 0 (use j64) -#define CBZ_MARKSEG(reg) \ - j64 = GETMARKSEG-(dyn->native_size); \ +#define CBZ_MARKSEG(reg) \ + j64 = GETMARKSEG - (dyn->native_size); \ BEQZ(reg, j64); // Branch to MARKSEG if reg is not 0 (use j64) -#define CBNZ_MARKSEG(reg) \ - j64 = GETMARKSEG-(dyn->native_size); \ +#define CBNZ_MARKSEG(reg) \ + j64 = GETMARKSEG - (dyn->native_size); \ BNEZ(reg, j64); -#define IFX(A) if((dyn->insts[ninst].x64.gen_flags&(A))) -#define IFX_PENDOR0 if((dyn->insts[ninst].x64.gen_flags&(X_PEND) || !dyn->insts[ninst].x64.gen_flags)) -#define IFXX(A) if((dyn->insts[ninst].x64.gen_flags==(A))) -#define IFX2X(A, B) if((dyn->insts[ninst].x64.gen_flags==(A) || dyn->insts[ninst].x64.gen_flags==(B) || dyn->insts[ninst].x64.gen_flags==((A)|(B)))) -#define IFXN(A, B) if((dyn->insts[ninst].x64.gen_flags&(A) && !(dyn->insts[ninst].x64.gen_flags&(B)))) +#define IFX(A) if ((dyn->insts[ninst].x64.gen_flags & (A))) +#define IFX_PENDOR0 if ((dyn->insts[ninst].x64.gen_flags & (X_PEND) || !dyn->insts[ninst].x64.gen_flags)) +#define IFXX(A) if ((dyn->insts[ninst].x64.gen_flags == (A))) +#define IFX2X(A, B) if ((dyn->insts[ninst].x64.gen_flags == (A) || dyn->insts[ninst].x64.gen_flags == (B) || dyn->insts[ninst].x64.gen_flags == ((A) | (B)))) +#define IFXN(A, B) if ((dyn->insts[ninst].x64.gen_flags & (A) && !(dyn->insts[ninst].x64.gen_flags & (B)))) -#define STORE_REG(A) SD(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) -#define LOAD_REG(A) LD(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) +#define STORE_REG(A) SD(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) +#define LOAD_REG(A) LD(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) // Need to also store current value of some register, as they may be used by functions like setjmp -#define STORE_XEMU_CALL() \ - STORE_REG(RBX); \ - STORE_REG(RDX); \ - STORE_REG(RSP); \ - STORE_REG(RBP); \ - STORE_REG(RDI); \ - STORE_REG(RSI); \ - STORE_REG(R8); \ - STORE_REG(R9); \ - STORE_REG(R10); \ - STORE_REG(R11); \ - -#define LOAD_XEMU_CALL() \ - -#define LOAD_XEMU_REM() \ - LOAD_REG(RBX); \ - LOAD_REG(RDX); \ - LOAD_REG(RSP); \ - LOAD_REG(RBP); \ - LOAD_REG(RDI); \ - LOAD_REG(RSI); \ - LOAD_REG(R8); \ - LOAD_REG(R9); \ - LOAD_REG(R10); \ - LOAD_REG(R11); \ - - -#define SET_DFNONE() if(!dyn->f.dfnone) {SW(xZR, xEmu, offsetof(x64emu_t, df)); dyn->f.dfnone=1;} -#define SET_DF(S, N) if((N)!=d_none) {MOV_U12(S, (N)); SW(S, xEmu, offsetof(x64emu_t, df)); dyn->f.dfnone=0;} else SET_DFNONE() -#define SET_NODF() dyn->f.dfnone = 0 -#define SET_DFOK() dyn->f.dfnone = 1 - -#define CLEAR_FLAGS() IFX(X_ALL) {ANDI(xFlags, xFlags, ~((1UL<<F_AF) | (1UL<<F_CF) | (1UL<<F_OF2) | (1UL<<F_ZF) | (1UL<<F_SF) | (1UL<<F_PF)));} - -#define CALC_SUB_FLAGS(op1_, op2, res, scratch1, scratch2, width) \ - IFX(X_AF | X_CF | X_OF) { \ - /* calc borrow chain */ \ - /* bc = (res & (~op1 | op2)) | (~op1 & op2) */ \ - OR(scratch1, op1_, op2); \ - AND(scratch2, res, scratch1); \ - AND(op1_, op1_, op2); \ - OR(scratch2, scratch2, op1_); \ - IFX(X_AF) { \ - /* af = bc & 0x8 */ \ - ANDI(scratch1, scratch2, 8); \ - BEQZ(scratch1, 8); \ - ORI(xFlags, xFlags, 1 << F_AF); \ - } \ - IFX(X_CF) { \ - /* cf = bc & (1<<(width-1)) */ \ - if ((width) == 8) { \ - ANDI(scratch1, scratch2, 0x80); \ - } else { \ - SRLI(scratch1, scratch2, (width)-1); \ - if(width!=64) ANDI(scratch1, scratch1, 1); \ - } \ - BEQZ(scratch1, 8); \ - ORI(xFlags, xFlags, 1 << F_CF); \ - } \ - IFX(X_OF) { \ - /* of = ((bc >> (width-2)) ^ (bc >> (width-1))) & 0x1; */ \ - SRLI(scratch1, scratch2, (width)-2); \ - SRLI(scratch2, scratch1, 1); \ - XOR(scratch1, scratch1, scratch2); \ - ANDI(scratch1, scratch1, 1); \ - BEQZ(scratch1, 8); \ - ORI(xFlags, xFlags, 1 << F_OF2); \ - } \ +#define STORE_XEMU_CALL(s0) \ + if (rv64_xtheadmempair) { \ + ADDI(s0, xEmu, offsetof(x64emu_t, regs[_RSP])); \ + TH_SDD(xRDX, xRBX, xEmu, 1); \ + TH_SDD(xRSP, xRBP, s0, 0); \ + TH_SDD(xRSI, xRDI, s0, 1); \ + TH_SDD(xR8, xR9, s0, 2); \ + TH_SDD(xR10, xR11, s0, 3); \ + } else { \ + STORE_REG(RBX); \ + STORE_REG(RDX); \ + STORE_REG(RSP); \ + STORE_REG(RBP); \ + STORE_REG(RDI); \ + STORE_REG(RSI); \ + STORE_REG(R8); \ + STORE_REG(R9); \ + STORE_REG(R10); \ + STORE_REG(R11); \ + } + +#define LOAD_XEMU_CALL() + +#define LOAD_XEMU_REM(s0) \ + if (rv64_xtheadmempair) { \ + ADDI(s0, xEmu, offsetof(x64emu_t, regs[_RSP])); \ + TH_LDD(xRDX, xRBX, xEmu, 1); \ + TH_LDD(xRSP, xRBP, s0, 0); \ + TH_LDD(xRSI, xRDI, s0, 1); \ + TH_LDD(xR8, xR9, s0, 2); \ + TH_LDD(xR10, xR11, s0, 3); \ + } else { \ + LOAD_REG(RBX); \ + LOAD_REG(RDX); \ + LOAD_REG(RSP); \ + LOAD_REG(RBP); \ + LOAD_REG(RDI); \ + LOAD_REG(RSI); \ + LOAD_REG(R8); \ + LOAD_REG(R9); \ + LOAD_REG(R10); \ + LOAD_REG(R11); \ + } + + +#define SET_DFNONE() \ + if (!dyn->f.dfnone) { \ + SW(xZR, xEmu, offsetof(x64emu_t, df)); \ + dyn->f.dfnone = 1; \ + } +#define SET_DF(S, N) \ + if ((N) != d_none) { \ + MOV_U12(S, (N)); \ + SW(S, xEmu, offsetof(x64emu_t, df)); \ + dyn->f.dfnone = 0; \ + } else \ + SET_DFNONE() +#define SET_NODF() dyn->f.dfnone = 0 +#define SET_DFOK() dyn->f.dfnone = 1 + +#define CLEAR_FLAGS() \ + IFX(X_ALL) { ANDI(xFlags, xFlags, ~((1UL << F_AF) | (1UL << F_CF) | (1UL << F_OF2) | (1UL << F_ZF) | (1UL << F_SF) | (1UL << F_PF))); } + +#define CALC_SUB_FLAGS(op1_, op2, res, scratch1, scratch2, width) \ + IFX(X_AF | X_CF | X_OF) \ + { \ + /* calc borrow chain */ \ + /* bc = (res & (~op1 | op2)) | (~op1 & op2) */ \ + OR(scratch1, op1_, op2); \ + AND(scratch2, res, scratch1); \ + AND(op1_, op1_, op2); \ + OR(scratch2, scratch2, op1_); \ + IFX(X_AF) \ + { \ + /* af = bc & 0x8 */ \ + ANDI(scratch1, scratch2, 8); \ + BEQZ(scratch1, 8); \ + ORI(xFlags, xFlags, 1 << F_AF); \ + } \ + IFX(X_CF) \ + { \ + /* cf = bc & (1<<(width-1)) */ \ + if ((width) == 8) { \ + ANDI(scratch1, scratch2, 0x80); \ + } else { \ + SRLI(scratch1, scratch2, (width)-1); \ + if (width != 64) ANDI(scratch1, scratch1, 1); \ + } \ + BEQZ(scratch1, 8); \ + ORI(xFlags, xFlags, 1 << F_CF); \ + } \ + IFX(X_OF) \ + { \ + /* of = ((bc >> (width-2)) ^ (bc >> (width-1))) & 0x1; */ \ + SRLI(scratch1, scratch2, (width)-2); \ + SRLI(scratch2, scratch1, 1); \ + XOR(scratch1, scratch1, scratch2); \ + ANDI(scratch1, scratch1, 1); \ + BEQZ(scratch1, 8); \ + ORI(xFlags, xFlags, 1 << F_OF2); \ + } \ } // Adjust the xFlags bit 11 -> bit 5, result in reg (can be xFlags, but not s1) -#define FLAGS_ADJUST_FROM11(reg, s1)\ - ANDI(reg, xFlags, ~(1<<5)); \ - SRLI(s1, reg, 11-5); \ - ANDI(s1, s1, 1<<5); \ +#define FLAGS_ADJUST_FROM11(reg, s1) \ + ANDI(reg, xFlags, ~(1 << 5)); \ + SRLI(s1, reg, 11 - 5); \ + ANDI(s1, s1, 1 << 5); \ OR(reg, reg, s1) // Adjust the xFlags bit 5 -> bit 11, src and dst can be the same (and can be xFlags, but not s1) @@ -755,8 +860,8 @@ LUI(s1, 0xFFFFF); \ ADDIW(s1, s1, 0x7DF); \ AND(s1, src, s1); \ - ANDI(dst, src, 1<<5); \ - SLLI(dst, dst, 11-5); \ + ANDI(dst, src, 1 << 5); \ + SLLI(dst, dst, 11 - 5); \ OR(dst, dst, s1) #ifndef MAYSETFLAGS @@ -764,37 +869,39 @@ #endif #ifndef READFLAGS -#define READFLAGS(A) \ - if(((A)!=X_PEND && dyn->f.pending!=SF_SET) \ - && (dyn->f.pending!=SF_SET_PENDING)) { \ - if(dyn->f.pending!=SF_PENDING) { \ - LD(x3, xEmu, offsetof(x64emu_t, df)); \ - j64 = (GETMARKF)-(dyn->native_size); \ - BEQ(x3, xZR, j64); \ - } \ - CALL_(UpdateFlags, -1, 0); \ - FLAGS_ADJUST_FROM11(xFlags, x3); \ - MARKF; \ - dyn->f.pending = SF_SET; \ - SET_DFOK(); \ +#define READFLAGS(A) \ + if (((A) != X_PEND && dyn->f.pending != SF_SET) \ + && (dyn->f.pending != SF_SET_PENDING)) { \ + if (dyn->f.pending != SF_PENDING) { \ + LD(x3, xEmu, offsetof(x64emu_t, df)); \ + j64 = (GETMARKF) - (dyn->native_size); \ + BEQ(x3, xZR, j64); \ + } \ + CALL_(UpdateFlags, -1, 0); \ + FLAGS_ADJUST_FROM11(xFlags, x3); \ + MARKF; \ + dyn->f.pending = SF_SET; \ + SET_DFOK(); \ } #endif #ifndef SETFLAGS -#define SETFLAGS(A, B) \ - if(dyn->f.pending!=SF_SET \ - && ((B)&SF_SUB) \ - && (dyn->insts[ninst].x64.gen_flags&(~(A)))) \ - READFLAGS(((dyn->insts[ninst].x64.gen_flags&X_PEND)?X_ALL:dyn->insts[ninst].x64.gen_flags)&(~(A)));\ - if(dyn->insts[ninst].x64.gen_flags) switch(B) { \ - case SF_SUBSET: \ - case SF_SET: dyn->f.pending = SF_SET; break; \ - case SF_PENDING: dyn->f.pending = SF_PENDING; break; \ - case SF_SUBSET_PENDING: \ - case SF_SET_PENDING: \ - dyn->f.pending = (dyn->insts[ninst].x64.gen_flags&X_PEND)?SF_SET_PENDING:SF_SET; \ - break; \ - } else dyn->f.pending = SF_SET +#define SETFLAGS(A, B) \ + if (dyn->f.pending != SF_SET \ + && ((B)&SF_SUB) \ + && (dyn->insts[ninst].x64.gen_flags & (~(A)))) \ + READFLAGS(((dyn->insts[ninst].x64.gen_flags & X_PEND) ? X_ALL : dyn->insts[ninst].x64.gen_flags) & (~(A))); \ + if (dyn->insts[ninst].x64.gen_flags) switch (B) { \ + case SF_SUBSET: \ + case SF_SET: dyn->f.pending = SF_SET; break; \ + case SF_PENDING: dyn->f.pending = SF_PENDING; break; \ + case SF_SUBSET_PENDING: \ + case SF_SET_PENDING: \ + dyn->f.pending = (dyn->insts[ninst].x64.gen_flags & X_PEND) ? SF_SET_PENDING : SF_SET; \ + break; \ + } \ + else \ + dyn->f.pending = SF_SET #endif #ifndef JUMP #define JUMP(A, C) @@ -805,14 +912,24 @@ #ifndef BARRIER_NEXT #define BARRIER_NEXT(A) #endif -#define UFLAG_OP1(A) if(dyn->insts[ninst].x64.gen_flags) {SDxw(A, xEmu, offsetof(x64emu_t, op1));} -#define UFLAG_OP2(A) if(dyn->insts[ninst].x64.gen_flags) {SDxw(A, xEmu, offsetof(x64emu_t, op2));} -#define UFLAG_OP12(A1, A2) if(dyn->insts[ninst].x64.gen_flags) {SDxw(A1, xEmu, offsetof(x64emu_t, op1));SDxw(A2, xEmu, offsetof(x64emu_t, op2));} -#define UFLAG_RES(A) if(dyn->insts[ninst].x64.gen_flags) {SDxw(A, xEmu, offsetof(x64emu_t, res));} -#define UFLAG_DF(r, A) if(dyn->insts[ninst].x64.gen_flags) {SET_DF(r, A)} -#define UFLAG_IF if(dyn->insts[ninst].x64.gen_flags) +#define UFLAG_OP1(A) \ + if (dyn->insts[ninst].x64.gen_flags) { SDxw(A, xEmu, offsetof(x64emu_t, op1)); } +#define UFLAG_OP2(A) \ + if (dyn->insts[ninst].x64.gen_flags) { SDxw(A, xEmu, offsetof(x64emu_t, op2)); } +#define UFLAG_OP12(A1, A2) \ + if (dyn->insts[ninst].x64.gen_flags) { \ + SDxw(A1, xEmu, offsetof(x64emu_t, op1)); \ + SDxw(A2, xEmu, offsetof(x64emu_t, op2)); \ + } +#define UFLAG_RES(A) \ + if (dyn->insts[ninst].x64.gen_flags) { SDxw(A, xEmu, offsetof(x64emu_t, res)); } +#define UFLAG_DF(r, A) \ + if (dyn->insts[ninst].x64.gen_flags) { SET_DF(r, A) } +#define UFLAG_IF if (dyn->insts[ninst].x64.gen_flags) #ifndef DEFAULT -#define DEFAULT *ok = -1; BARRIER(2) +#define DEFAULT \ + *ok = -1; \ + BARRIER(2) #endif #ifndef TABLE64 @@ -830,206 +947,208 @@ #define GETIP_(A) #else // put value in the Table64 even if not using it for now to avoid difference between Step2 and Step3. Needs to be optimized later... -#define GETIP(A) \ - if(dyn->last_ip && ((A)-dyn->last_ip)<2048) { \ - uint64_t _delta_ip = (A)-dyn->last_ip; \ - dyn->last_ip += _delta_ip; \ - if(_delta_ip) { \ - ADDI(xRIP, xRIP, _delta_ip); \ - } \ - } else { \ - dyn->last_ip = (A); \ - if(dyn->last_ip<0xffffffff) { \ - MOV64x(xRIP, dyn->last_ip); \ - } else \ - TABLE64(xRIP, dyn->last_ip); \ +#define GETIP(A) \ + if (dyn->last_ip && ((A)-dyn->last_ip) < 2048) { \ + uint64_t _delta_ip = (A)-dyn->last_ip; \ + dyn->last_ip += _delta_ip; \ + if (_delta_ip) { \ + ADDI(xRIP, xRIP, _delta_ip); \ + } \ + } else { \ + dyn->last_ip = (A); \ + if (dyn->last_ip < 0xffffffff) { \ + MOV64x(xRIP, dyn->last_ip); \ + } else \ + TABLE64(xRIP, dyn->last_ip); \ } #define GETIP_(A) \ - if(dyn->last_ip && ((A)-dyn->last_ip)<2048) { \ + if (dyn->last_ip && ((A)-dyn->last_ip) < 2048) { \ int64_t _delta_ip = (A)-dyn->last_ip; \ - if(_delta_ip) {ADDI(xRIP, xRIP, _delta_ip);} \ + if (_delta_ip) { ADDI(xRIP, xRIP, _delta_ip); } \ } else { \ - if((A)<0xffffffff) { \ + if ((A) < 0xffffffff) { \ MOV64x(xRIP, (A)); \ } else \ TABLE64(xRIP, (A)); \ } #endif -#define CLEARIP() dyn->last_ip=0 +#define CLEARIP() dyn->last_ip = 0 #if STEP < 2 -#define PASS2IF(A, B) if(A) +#define PASS2IF(A, B) if (A) #elif STEP == 2 -#define PASS2IF(A, B) if(A) dyn->insts[ninst].pass2choice = B; if(dyn->insts[ninst].pass2choice == B) +#define PASS2IF(A, B) \ + if (A) dyn->insts[ninst].pass2choice = B; \ + if (dyn->insts[ninst].pass2choice == B) #else -#define PASS2IF(A, B) if(dyn->insts[ninst].pass2choice == B) +#define PASS2IF(A, B) if (dyn->insts[ninst].pass2choice == B) #endif -#define MODREG ((nextop&0xC0)==0xC0) +#define MODREG ((nextop & 0xC0) == 0xC0) void rv64_epilog(void); void rv64_epilog_fast(void); void* rv64_next(x64emu_t* emu, uintptr_t addr); #ifndef STEPNAME -#define STEPNAME3(N,M) N##M -#define STEPNAME2(N,M) STEPNAME3(N,M) -#define STEPNAME(N) STEPNAME2(N, STEP) +#define STEPNAME3(N, M) N##M +#define STEPNAME2(N, M) STEPNAME3(N, M) +#define STEPNAME(N) STEPNAME2(N, STEP) #endif -#define native_pass STEPNAME(native_pass) - -#define dynarec64_00 STEPNAME(dynarec64_00) -#define dynarec64_00_0 STEPNAME(dynarec64_00_0) -#define dynarec64_00_1 STEPNAME(dynarec64_00_1) -#define dynarec64_00_2 STEPNAME(dynarec64_00_2) -#define dynarec64_00_3 STEPNAME(dynarec64_00_3) -#define dynarec64_0F STEPNAME(dynarec64_0F) -#define dynarec64_64 STEPNAME(dynarec64_64) -#define dynarec64_65 STEPNAME(dynarec64_65) -#define dynarec64_66 STEPNAME(dynarec64_66) -#define dynarec64_67 STEPNAME(dynarec64_67) -#define dynarec64_D8 STEPNAME(dynarec64_D8) -#define dynarec64_D9 STEPNAME(dynarec64_D9) -#define dynarec64_DA STEPNAME(dynarec64_DA) -#define dynarec64_DB STEPNAME(dynarec64_DB) -#define dynarec64_DC STEPNAME(dynarec64_DC) -#define dynarec64_DD STEPNAME(dynarec64_DD) -#define dynarec64_DE STEPNAME(dynarec64_DE) -#define dynarec64_DF STEPNAME(dynarec64_DF) -#define dynarec64_F0 STEPNAME(dynarec64_F0) -#define dynarec64_660F STEPNAME(dynarec64_660F) -#define dynarec64_6664 STEPNAME(dynarec64_6664) -#define dynarec64_66F0 STEPNAME(dynarec64_66F0) -#define dynarec64_F20F STEPNAME(dynarec64_F20F) -#define dynarec64_F30F STEPNAME(dynarec64_F30F) - -#define geted STEPNAME(geted) -#define geted32 STEPNAME(geted32) -#define geted16 STEPNAME(geted16) -#define jump_to_epilog STEPNAME(jump_to_epilog) -#define jump_to_epilog_fast STEPNAME(jump_to_epilog_fast) -#define jump_to_next STEPNAME(jump_to_next) -#define ret_to_epilog STEPNAME(ret_to_epilog) -#define retn_to_epilog STEPNAME(retn_to_epilog) -#define iret_to_epilog STEPNAME(iret_to_epilog) -#define call_c STEPNAME(call_c) -#define call_n STEPNAME(call_n) -#define grab_segdata STEPNAME(grab_segdata) -#define emit_cmp8 STEPNAME(emit_cmp8) -#define emit_cmp16 STEPNAME(emit_cmp16) -#define emit_cmp32 STEPNAME(emit_cmp32) -#define emit_cmp8_0 STEPNAME(emit_cmp8_0) -#define emit_cmp16_0 STEPNAME(emit_cmp16_0) -#define emit_cmp32_0 STEPNAME(emit_cmp32_0) -#define emit_test8 STEPNAME(emit_test8) -#define emit_test16 STEPNAME(emit_test16) -#define emit_test32 STEPNAME(emit_test32) -#define emit_test32c STEPNAME(emit_test32) -#define emit_add32 STEPNAME(emit_add32) -#define emit_add32c STEPNAME(emit_add32c) -#define emit_add8 STEPNAME(emit_add8) -#define emit_add8c STEPNAME(emit_add8c) -#define emit_sub32 STEPNAME(emit_sub32) -#define emit_sub32c STEPNAME(emit_sub32c) -#define emit_sub8 STEPNAME(emit_sub8) -#define emit_sub8c STEPNAME(emit_sub8c) -#define emit_or32 STEPNAME(emit_or32) -#define emit_or32c STEPNAME(emit_or32c) -#define emit_xor32 STEPNAME(emit_xor32) -#define emit_xor32c STEPNAME(emit_xor32c) -#define emit_and32 STEPNAME(emit_and32) -#define emit_and32c STEPNAME(emit_and32c) -#define emit_or8 STEPNAME(emit_or8) -#define emit_or8c STEPNAME(emit_or8c) -#define emit_xor8 STEPNAME(emit_xor8) -#define emit_xor8c STEPNAME(emit_xor8c) -#define emit_and8 STEPNAME(emit_and8) -#define emit_and8c STEPNAME(emit_and8c) -#define emit_add16 STEPNAME(emit_add16) -#define emit_add16c STEPNAME(emit_add16c) -#define emit_sub16 STEPNAME(emit_sub16) -#define emit_sub16c STEPNAME(emit_sub16c) -#define emit_or16 STEPNAME(emit_or16) -#define emit_or16c STEPNAME(emit_or16c) -#define emit_xor16 STEPNAME(emit_xor16) -#define emit_xor16c STEPNAME(emit_xor16c) -#define emit_and16 STEPNAME(emit_and16) -#define emit_and16c STEPNAME(emit_and16c) -#define emit_inc32 STEPNAME(emit_inc32) -#define emit_inc16 STEPNAME(emit_inc16) -#define emit_inc8 STEPNAME(emit_inc8) -#define emit_dec32 STEPNAME(emit_dec32) -#define emit_dec16 STEPNAME(emit_dec16) -#define emit_dec8 STEPNAME(emit_dec8) -#define emit_adc32 STEPNAME(emit_adc32) -#define emit_adc32c STEPNAME(emit_adc32c) -#define emit_adc8 STEPNAME(emit_adc8) -#define emit_adc8c STEPNAME(emit_adc8c) -#define emit_adc16 STEPNAME(emit_adc16) -#define emit_adc16c STEPNAME(emit_adc16c) -#define emit_sbb32 STEPNAME(emit_sbb32) -#define emit_sbb32c STEPNAME(emit_sbb32c) -#define emit_sbb8 STEPNAME(emit_sbb8) -#define emit_sbb8c STEPNAME(emit_sbb8c) -#define emit_sbb16 STEPNAME(emit_sbb16) -#define emit_sbb16c STEPNAME(emit_sbb16c) -#define emit_neg32 STEPNAME(emit_neg32) -#define emit_neg16 STEPNAME(emit_neg16) -#define emit_neg8 STEPNAME(emit_neg8) -#define emit_shl32 STEPNAME(emit_shl32) -#define emit_shl32c STEPNAME(emit_shl32c) -#define emit_shr32 STEPNAME(emit_shr32) -#define emit_shr32c STEPNAME(emit_shr32c) -#define emit_sar32c STEPNAME(emit_sar32c) -#define emit_rol32 STEPNAME(emit_rol32) -#define emit_ror32 STEPNAME(emit_ror32) -#define emit_rol32c STEPNAME(emit_rol32c) -#define emit_ror32c STEPNAME(emit_ror32c) -#define emit_shrd32c STEPNAME(emit_shrd32c) -#define emit_shld32c STEPNAME(emit_shld32c) - -#define emit_pf STEPNAME(emit_pf) - -#define x87_do_push STEPNAME(x87_do_push) -#define x87_do_push_empty STEPNAME(x87_do_push_empty) -#define x87_do_pop STEPNAME(x87_do_pop) -#define x87_get_current_cache STEPNAME(x87_get_current_cache) -#define x87_get_cache STEPNAME(x87_get_cache) -#define x87_get_extcache STEPNAME(x87_get_extcache) -#define x87_get_st STEPNAME(x87_get_st) -#define x87_get_st_empty STEPNAME(x87_get_st) -#define x87_refresh STEPNAME(x87_refresh) -#define x87_forget STEPNAME(x87_forget) -#define x87_reget_st STEPNAME(x87_reget_st) -#define x87_stackcount STEPNAME(x87_stackcount) -#define x87_swapreg STEPNAME(x87_swapreg) -#define x87_setround STEPNAME(x87_setround) -#define x87_restoreround STEPNAME(x87_restoreround) -#define sse_setround STEPNAME(sse_setround) -#define mmx_get_reg STEPNAME(mmx_get_reg) -#define mmx_get_reg_empty STEPNAME(mmx_get_reg_empty) -#define mmx_forget_reg STEPNAME(mmx_forget_reg) -#define sse_get_reg STEPNAME(sse_get_reg) -#define sse_get_reg_empty STEPNAME(sse_get_reg_empty) -#define sse_forget_reg STEPNAME(sse_forget_reg) -#define sse_purge07cache STEPNAME(sse_purge07cache) - -#define fpu_pushcache STEPNAME(fpu_pushcache) -#define fpu_popcache STEPNAME(fpu_popcache) -#define fpu_reset STEPNAME(fpu_reset) -#define fpu_reset_cache STEPNAME(fpu_reset_cache) +#define native_pass STEPNAME(native_pass) + +#define dynarec64_00 STEPNAME(dynarec64_00) +#define dynarec64_00_0 STEPNAME(dynarec64_00_0) +#define dynarec64_00_1 STEPNAME(dynarec64_00_1) +#define dynarec64_00_2 STEPNAME(dynarec64_00_2) +#define dynarec64_00_3 STEPNAME(dynarec64_00_3) +#define dynarec64_0F STEPNAME(dynarec64_0F) +#define dynarec64_64 STEPNAME(dynarec64_64) +#define dynarec64_65 STEPNAME(dynarec64_65) +#define dynarec64_66 STEPNAME(dynarec64_66) +#define dynarec64_67 STEPNAME(dynarec64_67) +#define dynarec64_D8 STEPNAME(dynarec64_D8) +#define dynarec64_D9 STEPNAME(dynarec64_D9) +#define dynarec64_DA STEPNAME(dynarec64_DA) +#define dynarec64_DB STEPNAME(dynarec64_DB) +#define dynarec64_DC STEPNAME(dynarec64_DC) +#define dynarec64_DD STEPNAME(dynarec64_DD) +#define dynarec64_DE STEPNAME(dynarec64_DE) +#define dynarec64_DF STEPNAME(dynarec64_DF) +#define dynarec64_F0 STEPNAME(dynarec64_F0) +#define dynarec64_660F STEPNAME(dynarec64_660F) +#define dynarec64_6664 STEPNAME(dynarec64_6664) +#define dynarec64_66F0 STEPNAME(dynarec64_66F0) +#define dynarec64_F20F STEPNAME(dynarec64_F20F) +#define dynarec64_F30F STEPNAME(dynarec64_F30F) + +#define geted STEPNAME(geted) +#define geted32 STEPNAME(geted32) +#define geted16 STEPNAME(geted16) +#define jump_to_epilog STEPNAME(jump_to_epilog) +#define jump_to_epilog_fast STEPNAME(jump_to_epilog_fast) +#define jump_to_next STEPNAME(jump_to_next) +#define ret_to_epilog STEPNAME(ret_to_epilog) +#define retn_to_epilog STEPNAME(retn_to_epilog) +#define iret_to_epilog STEPNAME(iret_to_epilog) +#define call_c STEPNAME(call_c) +#define call_n STEPNAME(call_n) +#define grab_segdata STEPNAME(grab_segdata) +#define emit_cmp8 STEPNAME(emit_cmp8) +#define emit_cmp16 STEPNAME(emit_cmp16) +#define emit_cmp32 STEPNAME(emit_cmp32) +#define emit_cmp8_0 STEPNAME(emit_cmp8_0) +#define emit_cmp16_0 STEPNAME(emit_cmp16_0) +#define emit_cmp32_0 STEPNAME(emit_cmp32_0) +#define emit_test8 STEPNAME(emit_test8) +#define emit_test16 STEPNAME(emit_test16) +#define emit_test32 STEPNAME(emit_test32) +#define emit_test32c STEPNAME(emit_test32) +#define emit_add32 STEPNAME(emit_add32) +#define emit_add32c STEPNAME(emit_add32c) +#define emit_add8 STEPNAME(emit_add8) +#define emit_add8c STEPNAME(emit_add8c) +#define emit_sub32 STEPNAME(emit_sub32) +#define emit_sub32c STEPNAME(emit_sub32c) +#define emit_sub8 STEPNAME(emit_sub8) +#define emit_sub8c STEPNAME(emit_sub8c) +#define emit_or32 STEPNAME(emit_or32) +#define emit_or32c STEPNAME(emit_or32c) +#define emit_xor32 STEPNAME(emit_xor32) +#define emit_xor32c STEPNAME(emit_xor32c) +#define emit_and32 STEPNAME(emit_and32) +#define emit_and32c STEPNAME(emit_and32c) +#define emit_or8 STEPNAME(emit_or8) +#define emit_or8c STEPNAME(emit_or8c) +#define emit_xor8 STEPNAME(emit_xor8) +#define emit_xor8c STEPNAME(emit_xor8c) +#define emit_and8 STEPNAME(emit_and8) +#define emit_and8c STEPNAME(emit_and8c) +#define emit_add16 STEPNAME(emit_add16) +#define emit_add16c STEPNAME(emit_add16c) +#define emit_sub16 STEPNAME(emit_sub16) +#define emit_sub16c STEPNAME(emit_sub16c) +#define emit_or16 STEPNAME(emit_or16) +#define emit_or16c STEPNAME(emit_or16c) +#define emit_xor16 STEPNAME(emit_xor16) +#define emit_xor16c STEPNAME(emit_xor16c) +#define emit_and16 STEPNAME(emit_and16) +#define emit_and16c STEPNAME(emit_and16c) +#define emit_inc32 STEPNAME(emit_inc32) +#define emit_inc16 STEPNAME(emit_inc16) +#define emit_inc8 STEPNAME(emit_inc8) +#define emit_dec32 STEPNAME(emit_dec32) +#define emit_dec16 STEPNAME(emit_dec16) +#define emit_dec8 STEPNAME(emit_dec8) +#define emit_adc32 STEPNAME(emit_adc32) +#define emit_adc32c STEPNAME(emit_adc32c) +#define emit_adc8 STEPNAME(emit_adc8) +#define emit_adc8c STEPNAME(emit_adc8c) +#define emit_adc16 STEPNAME(emit_adc16) +#define emit_adc16c STEPNAME(emit_adc16c) +#define emit_sbb32 STEPNAME(emit_sbb32) +#define emit_sbb32c STEPNAME(emit_sbb32c) +#define emit_sbb8 STEPNAME(emit_sbb8) +#define emit_sbb8c STEPNAME(emit_sbb8c) +#define emit_sbb16 STEPNAME(emit_sbb16) +#define emit_sbb16c STEPNAME(emit_sbb16c) +#define emit_neg32 STEPNAME(emit_neg32) +#define emit_neg16 STEPNAME(emit_neg16) +#define emit_neg8 STEPNAME(emit_neg8) +#define emit_shl32 STEPNAME(emit_shl32) +#define emit_shl32c STEPNAME(emit_shl32c) +#define emit_shr32 STEPNAME(emit_shr32) +#define emit_shr32c STEPNAME(emit_shr32c) +#define emit_sar32c STEPNAME(emit_sar32c) +#define emit_rol32 STEPNAME(emit_rol32) +#define emit_ror32 STEPNAME(emit_ror32) +#define emit_rol32c STEPNAME(emit_rol32c) +#define emit_ror32c STEPNAME(emit_ror32c) +#define emit_shrd32c STEPNAME(emit_shrd32c) +#define emit_shld32c STEPNAME(emit_shld32c) + +#define emit_pf STEPNAME(emit_pf) + +#define x87_do_push STEPNAME(x87_do_push) +#define x87_do_push_empty STEPNAME(x87_do_push_empty) +#define x87_do_pop STEPNAME(x87_do_pop) +#define x87_get_current_cache STEPNAME(x87_get_current_cache) +#define x87_get_cache STEPNAME(x87_get_cache) +#define x87_get_extcache STEPNAME(x87_get_extcache) +#define x87_get_st STEPNAME(x87_get_st) +#define x87_get_st_empty STEPNAME(x87_get_st) +#define x87_refresh STEPNAME(x87_refresh) +#define x87_forget STEPNAME(x87_forget) +#define x87_reget_st STEPNAME(x87_reget_st) +#define x87_stackcount STEPNAME(x87_stackcount) +#define x87_swapreg STEPNAME(x87_swapreg) +#define x87_setround STEPNAME(x87_setround) +#define x87_restoreround STEPNAME(x87_restoreround) +#define sse_setround STEPNAME(sse_setround) +#define mmx_get_reg STEPNAME(mmx_get_reg) +#define mmx_get_reg_empty STEPNAME(mmx_get_reg_empty) +#define mmx_forget_reg STEPNAME(mmx_forget_reg) +#define sse_get_reg STEPNAME(sse_get_reg) +#define sse_get_reg_empty STEPNAME(sse_get_reg_empty) +#define sse_forget_reg STEPNAME(sse_forget_reg) +#define sse_purge07cache STEPNAME(sse_purge07cache) + +#define fpu_pushcache STEPNAME(fpu_pushcache) +#define fpu_popcache STEPNAME(fpu_popcache) +#define fpu_reset STEPNAME(fpu_reset) +#define fpu_reset_cache STEPNAME(fpu_reset_cache) #define fpu_propagate_stack STEPNAME(fpu_propagate_stack) -#define fpu_purgecache STEPNAME(fpu_purgecache) -#define mmx_purgecache STEPNAME(mmx_purgecache) -#define x87_purgecache STEPNAME(x87_purgecache) -#define sse_purgecache STEPNAME(sse_purgecache) -#define fpu_reflectcache STEPNAME(fpu_reflectcache) -#define fpu_unreflectcache STEPNAME(fpu_unreflectcache) +#define fpu_purgecache STEPNAME(fpu_purgecache) +#define mmx_purgecache STEPNAME(mmx_purgecache) +#define x87_purgecache STEPNAME(x87_purgecache) +#define sse_purgecache STEPNAME(sse_purgecache) +#define fpu_reflectcache STEPNAME(fpu_reflectcache) +#define fpu_unreflectcache STEPNAME(fpu_unreflectcache) -#define CacheTransform STEPNAME(CacheTransform) -#define rv64_move64 STEPNAME(rv64_move64) -#define rv64_move32 STEPNAME(rv64_move32) +#define CacheTransform STEPNAME(CacheTransform) +#define rv64_move64 STEPNAME(rv64_move64) +#define rv64_move32 STEPNAME(rv64_move32) /* setup r2 to address pointed by */ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, rex_t rex, int* l, int i12, int delta); @@ -1038,7 +1157,7 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uintptr_t geted32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, rex_t rex, int* l, int i12, int delta); /* setup r2 to address pointed by */ -//uintptr_t geted16(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int64_t* fixaddress, int absmax, uint32_t mask, int s); +// uintptr_t geted16(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int64_t* fixaddress, int absmax, uint32_t mask, int s); // generic x64 helper @@ -1082,15 +1201,15 @@ void emit_xor8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s void emit_and8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); void emit_and8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); void emit_add16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); -//void emit_add16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +// void emit_add16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); void emit_sub16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); -//void emit_sub16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +// void emit_sub16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); void emit_or16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); -//void emit_or16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +// void emit_or16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); void emit_xor16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); -//void emit_xor16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +// void emit_xor16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); void emit_and16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); -//void emit_and16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +// void emit_and16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); void emit_inc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); void emit_inc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); @@ -1098,17 +1217,17 @@ void emit_dec32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s void emit_dec16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); void emit_dec8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5, int s6); -//void emit_adc32c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +// void emit_adc32c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); void emit_adc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); void emit_adc8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5, int s6); void emit_adc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); -//void emit_adc16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +// void emit_adc16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); void emit_sbb32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); -//void emit_sbb32c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +// void emit_sbb32c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); void emit_sbb8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); void emit_sbb8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5, int s6); void emit_sbb16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); -//void emit_sbb16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +// void emit_sbb16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3); void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4); void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4); @@ -1166,44 +1285,44 @@ void rv64_move64(dynarec_rv64_t* dyn, int ninst, int reg, int64_t val); void rv64_move32(dynarec_rv64_t* dyn, int ninst, int reg, int32_t val, int zeroup); #if STEP < 2 -#define CHECK_CACHE() 0 +#define CHECK_CACHE() 0 #else -#define CHECK_CACHE() (cacheupd = CacheNeedsTransform(dyn, ninst)) +#define CHECK_CACHE() (cacheupd = CacheNeedsTransform(dyn, ninst)) #endif #define extcache_st_coherency STEPNAME(extcache_st_coherency) int extcache_st_coherency(dynarec_rv64_t* dyn, int ninst, int a, int b); #if STEP == 0 -#define ST_IS_F(A) 0 -#define X87_COMBINE(A, B) EXT_CACHE_ST_D -#define X87_ST0 EXT_CACHE_ST_D -#define X87_ST(A) EXT_CACHE_ST_D +#define ST_IS_F(A) 0 +#define X87_COMBINE(A, B) EXT_CACHE_ST_D +#define X87_ST0 EXT_CACHE_ST_D +#define X87_ST(A) EXT_CACHE_ST_D #elif STEP == 1 -#define ST_IS_F(A) (extcache_get_current_st(dyn, ninst, A)==EXT_CACHE_ST_F) +#define ST_IS_F(A) (extcache_get_current_st(dyn, ninst, A) == EXT_CACHE_ST_F) #define X87_COMBINE(A, B) extcache_combine_st(dyn, ninst, A, B) -#define X87_ST0 extcache_get_current_st(dyn, ninst, 0) -#define X87_ST(A) extcache_get_current_st(dyn, ninst, A) +#define X87_ST0 extcache_get_current_st(dyn, ninst, 0) +#define X87_ST(A) extcache_get_current_st(dyn, ninst, A) #else -#define ST_IS_F(A) (extcache_get_st(dyn, ninst, A)==EXT_CACHE_ST_F) +#define ST_IS_F(A) (extcache_get_st(dyn, ninst, A) == EXT_CACHE_ST_F) #if STEP == 3 #define X87_COMBINE(A, B) extcache_st_coherency(dyn, ninst, A, B) #else #define X87_COMBINE(A, B) extcache_get_st(dyn, ninst, A) #endif -#define X87_ST0 extcache_get_st(dyn, ninst, 0) -#define X87_ST(A) extcache_get_st(dyn, ninst, A) +#define X87_ST0 extcache_get_st(dyn, ninst, 0) +#define X87_ST(A) extcache_get_st(dyn, ninst, A) #endif -//MMX helpers -// get float register for a MMX reg, create the entry if needed +// MMX helpers +// get float register for a MMX reg, create the entry if needed int mmx_get_reg(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int a); // get float register for a MMX reg, but don't try to synch it if it needed to be created int mmx_get_reg_empty(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int a); // forget float register for a MMX reg, create the entry if needed void mmx_forget_reg(dynarec_rv64_t* dyn, int ninst, int a); -//SSE/SSE2 helpers -// get float register for a SSE reg, create the entry if needed +// SSE/SSE2 helpers +// get float register for a SSE reg, create the entry if needed int sse_get_reg(dynarec_rv64_t* dyn, int ninst, int s1, int a, int single); // get float register for a SSE reg, but don't try to synch it if it needed to be created int sse_get_reg_empty(dynarec_rv64_t* dyn, int ninst, int s1, int a, int single); @@ -1238,12 +1357,12 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog); uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int seg, int* ok, int* need_epilog); -//uintptr_t dynarec64_65(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep,int* ok, int* need_epilog); +// uintptr_t dynarec64_65(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep,int* ok, int* need_epilog); uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); -//uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +// uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); uintptr_t dynarec64_DB(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); uintptr_t dynarec64_DD(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); @@ -1259,139 +1378,123 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int #if STEP < 2 #define PASS2(A) #else -#define PASS2(A) A +#define PASS2(A) A #endif #if STEP < 3 #define PASS3(A) #else -#define PASS3(A) A +#define PASS3(A) A #endif #if STEP < 3 -#define MAYUSE(A) (void)A +#define MAYUSE(A) (void)A #else #define MAYUSE(A) #endif // GOCOND will use x1 and x3 -#define GOCOND(B, T1, T2) \ - case B+0x0: \ - INST_NAME(T1 "O " T2); \ - GO( ANDI(x1, xFlags, 1<<F_OF2) \ - , EQZ, NEZ, X_OF) \ - break; \ - case B+0x1: \ - INST_NAME(T1 "NO " T2); \ - GO( ANDI(x1, xFlags, 1<<F_OF2) \ - , NEZ, EQZ, X_OF) \ - break; \ - case B+0x2: \ - INST_NAME(T1 "C " T2); \ - GO( ANDI(x1, xFlags, 1<<F_CF) \ - , EQZ, NEZ, X_CF) \ - break; \ - case B+0x3: \ - INST_NAME(T1 "NC " T2); \ - GO( ANDI(x1, xFlags, 1<<F_CF) \ - , NEZ, EQZ, X_CF) \ - break; \ - case B+0x4: \ - INST_NAME(T1 "Z " T2); \ - GO( ANDI(x1, xFlags, 1<<F_ZF) \ - , EQZ, NEZ, X_ZF) \ - break; \ - case B+0x5: \ - INST_NAME(T1 "NZ " T2); \ - GO( ANDI(x1, xFlags, 1<<F_ZF) \ - , NEZ, EQZ, X_ZF) \ - break; \ - case B+0x6: \ - INST_NAME(T1 "BE " T2); \ - GO( ANDI(x1, xFlags, (1<<F_CF)|(1<<F_ZF)) \ - , EQZ, NEZ, X_CF|X_ZF) \ - break; \ - case B+0x7: \ - INST_NAME(T1 "NBE " T2); \ - GO( ANDI(x1, xFlags, (1<<F_CF)|(1<<F_ZF)) \ - , NEZ, EQZ, X_CF|X_ZF) \ - break; \ - case B+0x8: \ - INST_NAME(T1 "S " T2); \ - GO( ANDI(x1, xFlags, 1<<F_SF) \ - , EQZ, NEZ, X_SF) \ - break; \ - case B+0x9: \ - INST_NAME(T1 "NS " T2); \ - GO( ANDI(x1, xFlags, 1<<F_SF) \ - , NEZ, EQZ, X_SF) \ - break; \ - case B+0xA: \ - INST_NAME(T1 "P " T2); \ - GO( ANDI(x1, xFlags, 1<<F_PF) \ - , EQZ, NEZ, X_PF) \ - break; \ - case B+0xB: \ - INST_NAME(T1 "NP " T2); \ - GO( ANDI(x1, xFlags, 1<<F_PF) \ - , NEZ, EQZ, X_PF) \ - break; \ - case B+0xC: \ - INST_NAME(T1 "L " T2); \ - GO( SRLI(x1, xFlags, F_SF-F_OF2); \ - XOR(x1, x1, xFlags); \ - ANDI(x1, x1, 1<<F_OF2) \ - , EQZ, NEZ, X_SF|X_OF) \ - break; \ - case B+0xD: \ - INST_NAME(T1 "GE " T2); \ - GO( SRLI(x1, xFlags, F_SF-F_OF2); \ - XOR(x1, x1, xFlags); \ - ANDI(x1, x1, 1<<F_OF2) \ - , NEZ, EQZ, X_SF|X_OF) \ - break; \ - case B+0xE: \ - INST_NAME(T1 "LE " T2); \ - GO( SRLI(x1, xFlags, F_SF-F_OF2); \ - XOR(x1, x1, xFlags); \ - ANDI(x1, x1, 1<<F_OF2); \ - ANDI(x3, xFlags, 1<<F_ZF); \ - OR(x1, x1, x3); \ - ANDI(x1, x1, (1<<F_OF2) | (1<<F_ZF)) \ - , EQZ, NEZ, X_SF|X_OF|X_ZF) \ - break; \ - case B+0xF: \ - INST_NAME(T1 "G " T2); \ - GO( SRLI(x1, xFlags, F_SF-F_OF2); \ - XOR(x1, x1, xFlags); \ - ANDI(x1, x1, 1<<F_OF2); \ - ANDI(x3, xFlags, 1<<F_ZF); \ - OR(x1, x1, x3); \ - ANDI(x1, x1, (1<<F_OF2) | (1<<F_ZF)) \ - , NEZ, EQZ, X_SF|X_OF|X_ZF) \ +#define GOCOND(B, T1, T2) \ + case B + 0x0: \ + INST_NAME(T1 "O " T2); \ + GO(ANDI(x1, xFlags, 1 << F_OF2), EQZ, NEZ, X_OF) \ + break; \ + case B + 0x1: \ + INST_NAME(T1 "NO " T2); \ + GO(ANDI(x1, xFlags, 1 << F_OF2), NEZ, EQZ, X_OF) \ + break; \ + case B + 0x2: \ + INST_NAME(T1 "C " T2); \ + GO(ANDI(x1, xFlags, 1 << F_CF), EQZ, NEZ, X_CF) \ + break; \ + case B + 0x3: \ + INST_NAME(T1 "NC " T2); \ + GO(ANDI(x1, xFlags, 1 << F_CF), NEZ, EQZ, X_CF) \ + break; \ + case B + 0x4: \ + INST_NAME(T1 "Z " T2); \ + GO(ANDI(x1, xFlags, 1 << F_ZF), EQZ, NEZ, X_ZF) \ + break; \ + case B + 0x5: \ + INST_NAME(T1 "NZ " T2); \ + GO(ANDI(x1, xFlags, 1 << F_ZF), NEZ, EQZ, X_ZF) \ + break; \ + case B + 0x6: \ + INST_NAME(T1 "BE " T2); \ + GO(ANDI(x1, xFlags, (1 << F_CF) | (1 << F_ZF)), EQZ, NEZ, X_CF | X_ZF) \ + break; \ + case B + 0x7: \ + INST_NAME(T1 "NBE " T2); \ + GO(ANDI(x1, xFlags, (1 << F_CF) | (1 << F_ZF)), NEZ, EQZ, X_CF | X_ZF) \ + break; \ + case B + 0x8: \ + INST_NAME(T1 "S " T2); \ + GO(ANDI(x1, xFlags, 1 << F_SF), EQZ, NEZ, X_SF) \ + break; \ + case B + 0x9: \ + INST_NAME(T1 "NS " T2); \ + GO(ANDI(x1, xFlags, 1 << F_SF), NEZ, EQZ, X_SF) \ + break; \ + case B + 0xA: \ + INST_NAME(T1 "P " T2); \ + GO(ANDI(x1, xFlags, 1 << F_PF), EQZ, NEZ, X_PF) \ + break; \ + case B + 0xB: \ + INST_NAME(T1 "NP " T2); \ + GO(ANDI(x1, xFlags, 1 << F_PF), NEZ, EQZ, X_PF) \ + break; \ + case B + 0xC: \ + INST_NAME(T1 "L " T2); \ + GO(SRLI(x1, xFlags, F_SF - F_OF2); \ + XOR(x1, x1, xFlags); \ + ANDI(x1, x1, 1 << F_OF2), EQZ, NEZ, X_SF | X_OF) \ + break; \ + case B + 0xD: \ + INST_NAME(T1 "GE " T2); \ + GO(SRLI(x1, xFlags, F_SF - F_OF2); \ + XOR(x1, x1, xFlags); \ + ANDI(x1, x1, 1 << F_OF2), NEZ, EQZ, X_SF | X_OF) \ + break; \ + case B + 0xE: \ + INST_NAME(T1 "LE " T2); \ + GO(SRLI(x1, xFlags, F_SF - F_OF2); \ + XOR(x1, x1, xFlags); \ + ANDI(x1, x1, 1 << F_OF2); \ + ANDI(x3, xFlags, 1 << F_ZF); \ + OR(x1, x1, x3); \ + ANDI(x1, x1, (1 << F_OF2) | (1 << F_ZF)), EQZ, NEZ, X_SF | X_OF | X_ZF) \ + break; \ + case B + 0xF: \ + INST_NAME(T1 "G " T2); \ + GO(SRLI(x1, xFlags, F_SF - F_OF2); \ + XOR(x1, x1, xFlags); \ + ANDI(x1, x1, 1 << F_OF2); \ + ANDI(x3, xFlags, 1 << F_ZF); \ + OR(x1, x1, x3); \ + ANDI(x1, x1, (1 << F_OF2) | (1 << F_ZF)), NEZ, EQZ, X_SF | X_OF | X_ZF) \ break -#define NOTEST(s1) \ - if(box64_dynarec_test) { \ - SW(xZR, xEmu, offsetof(x64emu_t, test.test)); \ - SW(xZR, xEmu, offsetof(x64emu_t, test.clean)); \ +#define NOTEST(s1) \ + if (box64_dynarec_test) { \ + SW(xZR, xEmu, offsetof(x64emu_t, test.test)); \ + SW(xZR, xEmu, offsetof(x64emu_t, test.clean)); \ } -#define SKIPTEST(s1) \ - if(box64_dynarec_test) { \ - SW(xZR, xEmu, offsetof(x64emu_t, test.clean)); \ +#define SKIPTEST(s1) \ + if (box64_dynarec_test) { \ + SW(xZR, xEmu, offsetof(x64emu_t, test.clean)); \ } -#define GOTEST(s1, s2) \ - if(box64_dynarec_test) { \ - MOV32w(s2, 1); \ - SW(s2, xEmu, offsetof(x64emu_t, test.test)); \ +#define GOTEST(s1, s2) \ + if (box64_dynarec_test) { \ + MOV32w(s2, 1); \ + SW(s2, xEmu, offsetof(x64emu_t, test.test)); \ } -#define GETREX() \ - rex.rex = 0; \ - if(!rex.is32bits) \ - while(opcode>=0x40 && opcode<=0x4f) { \ - rex.rex = opcode; \ - opcode = F8; \ +#define GETREX() \ + rex.rex = 0; \ + if (!rex.is32bits) \ + while (opcode >= 0x40 && opcode <= 0x4f) { \ + rex.rex = opcode; \ + opcode = F8; \ } #endif //__DYNAREC_RV64_HELPER_H__ diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h index d377309d..b7058c9a 100644 --- a/src/dynarec/rv64/dynarec_rv64_private.h +++ b/src/dynarec/rv64/dynarec_rv64_private.h @@ -139,12 +139,12 @@ int Table64(dynarec_rv64_t *dyn, uint64_t val, int pass); // add a value to tab void CreateJmpNext(void* addr, void* next); -#define GO_TRACE(A, B) \ +#define GO_TRACE(A, B, s0) \ GETIP(addr); \ MV(A1, xRIP); \ - STORE_XEMU_CALL(); \ + STORE_XEMU_CALL(s0); \ MOV64x(A2, B); \ CALL(A, -1); \ LOAD_XEMU_CALL() -#endif //__DYNAREC_RV64_PRIVATE_H_ \ No newline at end of file +#endif //__DYNAREC_RV64_PRIVATE_H_ diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index e7608781..7a1b3f4a 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -30,42 +30,42 @@ f18–27 fs2–11 FP saved registers Callee f28–31 ft8–11 FP temporaries Caller */ // x86 Register mapping -#define xRAX 16 -#define xRCX 17 -#define xRDX 18 -#define xRBX 19 -#define xRSP 20 -#define xRBP 21 -#define xRSI 22 -#define xRDI 23 -#define xR8 24 -#define xR9 25 -#define xR10 26 -#define xR11 27 -#define xR12 28 -#define xR13 29 -#define xR14 30 -#define xR15 31 +#define xRAX 16 +#define xRCX 17 +#define xRDX 18 +#define xRBX 19 +#define xRSP 20 +#define xRBP 21 +#define xRSI 22 +#define xRDI 23 +#define xR8 24 +#define xR9 25 +#define xR10 26 +#define xR11 27 +#define xR12 28 +#define xR13 29 +#define xR14 30 +#define xR15 31 #define xFlags 8 -#define xRIP 7 +#define xRIP 7 // 32bits version -#define wEAX xRAX -#define wECX xRCX -#define wEDX xRDX -#define wEBX xRBX -#define wESP xRSP -#define wEBP xRBP -#define wESI xRSI -#define wEDI xRDI -#define wR8 xR8 -#define wR9 xR9 -#define wR10 xR10 -#define wR11 xR11 -#define wR12 xR12 -#define wR13 xR13 -#define wR14 xR14 -#define wR15 xR15 +#define wEAX xRAX +#define wECX xRCX +#define wEDX xRDX +#define wEBX xRBX +#define wESP xRSP +#define wEBP xRBP +#define wESI xRSI +#define wEDI xRDI +#define wR8 xR8 +#define wR9 xR9 +#define wR10 xR10 +#define wR11 xR11 +#define wR12 xR12 +#define wR13 xR13 +#define wR14 xR14 +#define wR15 xR15 #define wFlags xFlags // scratch registers #define x1 11 @@ -129,11 +129,11 @@ f28–31 ft8–11 FP temporaries Caller #define ZEROUP(r) AND(r, r, xMASK) #define R_type(funct7, rs2, rs1, funct3, rd, opcode) ((funct7) << 25 | (rs2) << 20 | (rs1) << 15 | (funct3) << 12 | (rd) << 7 | (opcode)) -#define I_type(imm12, rs1, funct3, rd, opcode) ((imm12) << 20 | (rs1) << 15 | (funct3) << 12 | (rd) << 7 | (opcode)) -#define S_type(imm12, rs2, rs1, funct3, opcode) (((imm12) >> 5) << 25 | (rs2) << 20 | (rs1) << 15 | (funct3) << 12 | ((imm12)&31) << 7 | (opcode)) -#define B_type(imm13, rs2, rs1, funct3, opcode) ((((imm13) >> 12) & 1) << 31 | (((imm13) >> 5) & 63) << 25 | (rs2) << 20 | (rs1) << 15 | (funct3) << 12 | (((imm13) >> 1) & 15) << 8 | (((imm13) >> 11) & 1) << 7 | (opcode)) -#define U_type(imm32, rd, opcode) (((imm32) >> 12) << 12 | (rd) << 7 | (opcode)) -#define J_type(imm21, rd, opcode) ((((imm21) >> 20) & 1) << 31 | (((imm21) >> 1) & 0b1111111111) << 21 | (((imm21) >> 11) & 1) << 20 | (((imm21) >> 12) & 0b11111111) << 12 | (rd) << 7 | (opcode)) +#define I_type(imm12, rs1, funct3, rd, opcode) ((imm12) << 20 | (rs1) << 15 | (funct3) << 12 | (rd) << 7 | (opcode)) +#define S_type(imm12, rs2, rs1, funct3, opcode) (((imm12) >> 5) << 25 | (rs2) << 20 | (rs1) << 15 | (funct3) << 12 | ((imm12)&31) << 7 | (opcode)) +#define B_type(imm13, rs2, rs1, funct3, opcode) ((((imm13) >> 12) & 1) << 31 | (((imm13) >> 5) & 63) << 25 | (rs2) << 20 | (rs1) << 15 | (funct3) << 12 | (((imm13) >> 1) & 15) << 8 | (((imm13) >> 11) & 1) << 7 | (opcode)) +#define U_type(imm32, rd, opcode) (((imm32) >> 12) << 12 | (rd) << 7 | (opcode)) +#define J_type(imm21, rd, opcode) ((((imm21) >> 20) & 1) << 31 | (((imm21) >> 1) & 0b1111111111) << 21 | (((imm21) >> 11) & 1) << 20 | (((imm21) >> 12) & 0b11111111) << 12 | (rd) << 7 | (opcode)) // RV32I // put imm20 in the [31:12] bits of rd, zero [11:0] and sign extend bits31 @@ -144,7 +144,7 @@ f28–31 ft8–11 FP temporaries Caller #define JAL_gen(rd, imm21) J_type(imm21, rd, 0b1101111) // Unconditional branch, no return address set -#define B(imm21) EMIT(JAL_gen(xZR, imm21)) +#define B(imm21) EMIT(JAL_gen(xZR, imm21)) #define B__(reg1, reg2, imm21) B(imm21) // Unconditional branch, return set to xRA #define JAL(imm21) EMIT(JAL_gen(xRA, imm21)) @@ -248,10 +248,10 @@ f28–31 ft8–11 FP temporaries Caller #define SNEZ(rd, rs1) SLTU(rd, xZR, rs1) -#define BEQ(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b000, 0b1100011)) -#define BNE(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b001, 0b1100011)) -#define BLT(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b100, 0b1100011)) -#define BGE(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b101, 0b1100011)) +#define BEQ(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b000, 0b1100011)) +#define BNE(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b001, 0b1100011)) +#define BLT(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b100, 0b1100011)) +#define BGE(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b101, 0b1100011)) #define BLTU(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b110, 0b1100011)) #define BGEU(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b111, 0b1100011)) @@ -377,10 +377,10 @@ f28–31 ft8–11 FP temporaries Caller } #define FENCE_gen(pred, succ) (((pred) << 24) | ((succ) << 20) | 0b0001111) -#define FENCE() EMIT(FENCE_gen(3, 3)) +#define FENCE() EMIT(FENCE_gen(3, 3)) #define FENCE_I_gen() ((0b001 << 12) | 0b0001111) -#define FENCE_I() EMIT(FENCE_I_gen()) +#define FENCE_I() EMIT(FENCE_I_gen()) #define EBREAK() EMIT(I_type(1, 0, 0, 0, 0b1110011)) @@ -476,9 +476,9 @@ f28–31 ft8–11 FP temporaries Caller SRAIW(rd, rs1, imm); \ } -#define CSRRW(rd, rs1, csr) EMIT(I_type(csr, rs1, 0b001, rd, 0b1110011)) -#define CSRRS(rd, rs1, csr) EMIT(I_type(csr, rs1, 0b010, rd, 0b1110011)) -#define CSRRC(rd, rs1, csr) EMIT(I_type(csr, rs1, 0b011, rd, 0b1110011)) +#define CSRRW(rd, rs1, csr) EMIT(I_type(csr, rs1, 0b001, rd, 0b1110011)) +#define CSRRS(rd, rs1, csr) EMIT(I_type(csr, rs1, 0b010, rd, 0b1110011)) +#define CSRRC(rd, rs1, csr) EMIT(I_type(csr, rs1, 0b011, rd, 0b1110011)) #define CSRRWI(rd, imm, csr) EMIT(I_type(csr, imm, 0b101, rd, 0b1110011)) #define CSRRSI(rd, imm, csr) EMIT(I_type(csr, imm, 0b110, rd, 0b1110011)) #define CSRRCI(rd, imm, csr) EMIT(I_type(csr, imm, 0b111, rd, 0b1110011)) @@ -493,10 +493,10 @@ f28–31 ft8–11 FP temporaries Caller // rd =(upper) rs1 * rs2 (both unsigned) #define MULHU(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b011, rd, 0b0110011)) // rd =(upper) rs1 / rs2 -#define DIV(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b100, rd, 0b0110011)) +#define DIV(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b100, rd, 0b0110011)) #define DIVU(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b101, rd, 0b0110011)) // rd = rs1 mod rs2 -#define REM(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b110, rd, 0b0110011)) +#define REM(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b110, rd, 0b0110011)) #define REMU(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b111, rd, 0b0110011)) // RV64M @@ -505,29 +505,29 @@ f28–31 ft8–11 FP temporaries Caller // rd = rs1 * rs2 #define MULxw(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b000, rd, rex.w ? 0b0110011 : 0b0111011)) // rd = rs1 / rs2 -#define DIVW(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b100, rd, 0b0111011)) -#define DIVxw(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b100, rd, rex.w ? 0b0110011 : 0b0111011)) -#define DIVUW(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b101, rd, 0b0111011)) +#define DIVW(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b100, rd, 0b0111011)) +#define DIVxw(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b100, rd, rex.w ? 0b0110011 : 0b0111011)) +#define DIVUW(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b101, rd, 0b0111011)) #define DIVUxw(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b101, rd, rex.w ? 0b0110011 : 0b0111011)) // rd = rs1 mod rs2 -#define REMW(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b110, rd, 0b0111011)) -#define REMxw(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b110, rd, rex.w ? 0b0110011 : 0b0111011)) -#define REMUW(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b111, rd, 0b0111011)) +#define REMW(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b110, rd, 0b0111011)) +#define REMxw(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b110, rd, rex.w ? 0b0110011 : 0b0111011)) +#define REMUW(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b111, rd, 0b0111011)) #define REMUxw(rd, rs1, rs2) EMIT(R_type(0b0000001, rs2, rs1, 0b111, rd, rex.w ? 0b0110011 : 0b0111011)) #define AQ_RL(f5, aq, rl) ((f5 << 2) | ((aq & 1) << 1) | (rl & 1)) // RV32A -#define LR_W(rd, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00010, aq, rl), 0, rs1, 0b010, rd, 0b0101111)) +#define LR_W(rd, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00010, aq, rl), 0, rs1, 0b010, rd, 0b0101111)) #define SC_W(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00011, aq, rl), rs2, rs1, 0b010, rd, 0b0101111)) #define AMOSWAP_W(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00001, aq, rl), rs2, rs1, 0b010, rd, 0b0101111)) // RV64A -#define LR_D(rd, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00010, aq, rl), 0, rs1, 0b011, rd, 0b0101111)) +#define LR_D(rd, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00010, aq, rl), 0, rs1, 0b011, rd, 0b0101111)) #define SC_D(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00011, aq, rl), rs2, rs1, 0b011, rd, 0b0101111)) -#define LRxw(rd, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00010, aq, rl), 0, rs1, 0b010 | rex.w, rd, 0b0101111)) +#define LRxw(rd, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00010, aq, rl), 0, rs1, 0b010 | rex.w, rd, 0b0101111)) #define SCxw(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00011, aq, rl), rs2, rs1, 0b010 | rex.w, rd, 0b0101111)) #define AMOSWAP_D(rd, rs2, rs1, aq, rl) EMIT(R_type(AQ_RL(0b00001, aq, rl), rs2, rs1, 0b011, rd, 0b0101111)) @@ -563,7 +563,7 @@ f28–31 ft8–11 FP temporaries Caller // Round to Nearest, ties to Max Magnitude #define RD_RMM 0b100 // In instruction’s rm field, selects dynamic rounding mode; -#define RD_RM 0b111 +#define RD_RM 0b111 #define RD_DYN RD_RM // load single precision from rs1+imm12 to frd @@ -595,7 +595,7 @@ f28–31 ft8–11 FP temporaries Caller #define FSUBS(frd, frs1, frs2) EMIT(R_type(0b0000100, frs2, frs1, 0b000, frd, 0b1010011)) #define FMULS(frd, frs1, frs2) EMIT(R_type(0b0001000, frs2, frs1, 0b000, frd, 0b1010011)) #define FDIVS(frd, frs1, frs2) EMIT(R_type(0b0001100, frs2, frs1, 0b000, frd, 0b1010011)) -#define FSQRTS(frd, frs1) EMIT(R_type(0b0101100, 0b00000, frs1, 0b000, frd, 0b1010011)) +#define FSQRTS(frd, frs1) EMIT(R_type(0b0101100, 0b00000, frs1, 0b000, frd, 0b1010011)) #define FMINS(frd, frs1, frs2) EMIT(R_type(0b0010100, frs2, frs1, 0b000, frd, 0b1010011)) #define FMAXS(frd, frs1, frs2) EMIT(R_type(0b0010100, frs2, frs1, 0b001, frd, 0b1010011)) @@ -652,7 +652,7 @@ f28–31 ft8–11 FP temporaries Caller #define FSUBD(frd, frs1, frs2) EMIT(R_type(0b0000101, frs2, frs1, 0b000, frd, 0b1010011)) #define FMULD(frd, frs1, frs2) EMIT(R_type(0b0001001, frs2, frs1, 0b000, frd, 0b1010011)) #define FDIVD(frd, frs1, frs2) EMIT(R_type(0b0001101, frs2, frs1, 0b000, frd, 0b1010011)) -#define FSQRTD(frd, frs1) EMIT(R_type(0b0101101, 0b00000, frs1, 0b000, frd, 0b1010011)) +#define FSQRTD(frd, frs1) EMIT(R_type(0b0101101, 0b00000, frs1, 0b000, frd, 0b1010011)) #define FMIND(frd, frs1, frs2) EMIT(R_type(0b0010101, frs2, frs1, 0b000, frd, 0b1010011)) #define FMAXD(frd, frs1, frs2) EMIT(R_type(0b0010101, frs2, frs1, 0b001, frd, 0b1010011)) @@ -1029,11 +1029,29 @@ f28–31 ft8–11 FP temporaries Caller // rd2 := mem[addr+15:addr+8] #define TH_LDD(rd1, rd2, rs1, imm2) EMIT(R_type(0b1111100 | ((imm2)&0b11), rd2, rs1, 0b100, rd1, 0b0001011)) -// TODO -// th.lwd rd1, rd2, (rs1), imm2, 3 Load two signed 32-bit values -// th.lwud rd1, rd2, (rs1), imm2, 3 Load two unsigned 32-bit values -// th.sdd rd1, rd2, (rs1), imm2, 4 Store two 64-bit values -// th.swd rd1, rd2, (rs1), imm2, 3 Store two 32-bit values +// Load two signed 32-bit values from memory into two GPRs. +// addr := rs1 + (zero_extend(imm2) << 3) +// reg[rd1] := sign_extend(mem[addr+3:addr]) +// reg[rd2] := sign_extend(mem[addr+7:addr+4]) +#define TH_LWD(rd1, rd2, rs1, imm2) EMIT(R_type(0b1110000 | ((imm2)&0b11), rd2, rs1, 0b100, rd1, 0b0001011)) + +// Load two unsigned 32-bit values from memory into two GPRs. +// addr := rs1 + (zero_extend(imm2) << 3) +// reg[rd1] := zero_extend(mem[addr+3:addr]) +// reg[rd2] := zero_extend(mem[addr+7:addr+4]) +#define TH_LWUD(rd1, rd2, rs1, imm2) EMIT(R_type(0b1111000 | ((imm2)&0b11), rd2, rs1, 0b100, rd1, 0b0001011)) + +// Store two 64-bit values to memory from two GPRs. +// addr := rs1 + (zero_extend(imm2) << 4) +// mem[addr+7:addr] := reg[rd1] +// mem[addr+15:addr+8] := reg[rd2] +#define TH_SDD(rd1, rd2, rs1, imm2) EMIT(R_type(0b1111100 | ((imm2)&0b11), rd2, rs1, 0b101, rd1, 0b0001011)) + +// Store two 32-bit values to memory from two GPRs. +// addr := rs1 + (zero_extend(imm2) << 3) +// mem[addr+3:addr] := reg[rd1][31:0] +// mem[addr+7:addr+3] := reg[rd2][31:0] +#define TH_SWD(rd1, rd2, rs1, imm2) EMIT(R_type(0b1110000 | ((imm2)&0b11), rd2, rs1, 0b101, rd1, 0b0001011)) // XTheadFMemIdx - Indexed memory operations for floating-point registers |