diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00.c | 58 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_emit_logic.c | 4 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_emit_math.c | 32 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_emit_shift.c | 18 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_emit_tests.c | 33 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_functions.c | 49 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_functions.h | 3 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 67 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 119 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_pass2.h | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_pass3.h | 30 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_epilog.S | 2 |
13 files changed, 336 insertions, 83 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00.c b/src/dynarec/rv64/dynarec_rv64_00.c index 6f1b7f7c..72c10f90 100644 --- a/src/dynarec/rv64/dynarec_rv64_00.c +++ b/src/dynarec/rv64/dynarec_rv64_00.c @@ -89,7 +89,9 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETGD; GETED(0); emit_xor32(dyn, ninst, rex, ed, gd, x3, x4); - WBACK; + if(ed!=gd) { + WBACK; + } break; case 0x39: INST_NAME("CMP Ed, Gd"); @@ -127,6 +129,37 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ADDI(xRSP, xRSP, 8); } break; + + #define GO(GETFLAGS, NO, YES, F) \ + READFLAGS(F); \ + i8 = F8S; \ + BARRIER(BARRIER_MAYBE); \ + JUMP(addr+i8, 1); \ + GETFLAGS; \ + if(dyn->insts[ninst].x64.jmp_insts==-1 || \ + CHECK_CACHE()) { \ + /* out of the block */ \ + i32 = dyn->insts[ninst].epilog-(dyn->native_size); \ + NO(x1, i32); \ + if(dyn->insts[ninst].x64.jmp_insts==-1) { \ + if(!(dyn->insts[ninst].x64.barrier&BARRIER_FLOAT)) \ + fpu_purgecache(dyn, ninst, 1, x1, x2, x3); \ + jump_to_next(dyn, addr+i8, 0, ninst); \ + } else { \ + CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \ + i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size);\ + B(i32); \ + } \ + } else { \ + /* inside the block */ \ + i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size); \ + YES(x1, i32); \ + } + + GOCOND(0x70, "J", "ib"); + + #undef GO + case 0x80: nextop = F8; switch((nextop>>3)&7) { @@ -314,17 +347,22 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (eb2) { // load a mask to x3 (ffffffffffff00ff) - LUI(x3, 0xffff0); - ADDI(x3, x3, 0xff); + LUI(x3, 0xffffffffffff0); + ORI(x3, x3, 0xff); // apply mask AND(eb1, eb1, x3); - ADDI(x4, xZR, u8); - SLLI(x4, x4, 8); - OR(eb1, eb1, x4); + if(u8) { + if((u8<<8)<2048) { + ADDI(x4, xZR, u8<<8); + } else { + ADDI(x4, xZR, u8); + SLLI(x4, x4, 8); + } + OR(eb1, eb1, x4); + } } else { - SRLI(eb1, eb1, 8); - SLLI(eb1, eb1, 8); - ADDI(eb1, eb1, u8); + ANDI(eb1, eb1, 0xf00); // mask ffffffffffffff00 + ORI(eb1, eb1, u8); } } else { // mem <= u8 addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, &lock, 0, 1); @@ -378,6 +416,8 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni //x87_forget(dyn, ninst, x3, x4, 0); //sse_purge07cache(dyn, ninst, x3); tmp = isSimpleWrapper(*(wrapper_t*)(addr)); + if(tmp<0 || tmp>1) + tmp=0; //TODO: removed when FP is in place if((box64_log<2 && !cycle_log) && tmp) { //GETIP(ip+3+8+8); // read the 0xCC call_n(dyn, ninst, *(void**)(addr+8), tmp); diff --git a/src/dynarec/rv64/dynarec_rv64_emit_logic.c b/src/dynarec/rv64/dynarec_rv64_emit_logic.c index b36867f8..0f7527d9 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_logic.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_logic.c @@ -37,7 +37,7 @@ void emit_xor32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s // test sign bit before zeroup. IFX(X_SF) { - BGE(s1, xZR, 4); + BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } if (!rex.w) { @@ -49,7 +49,7 @@ void emit_xor32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s } IFX(X_ZF) { - BNEZ(s1, 4); + BNEZ(s1, 8); ORI(xFlags, xFlags, F_ZF); } IFX(X_PF) { diff --git a/src/dynarec/rv64/dynarec_rv64_emit_math.c b/src/dynarec/rv64/dynarec_rv64_emit_math.c index b2e9be9c..a201d639 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_math.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_math.c @@ -45,12 +45,12 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SRLI(s5, s5, 0x20); ADD(s5, s5, s4); // hi SRAI(s5, s5, 0x20); - BEQZ(s5, 4); + BEQZ(s5, 8); ORI(xFlags, xFlags, 1 << F_CF); } else { ADD(s5, s1, s2); SRLI(s5, s5, 0x20); - BEQZ(s5, 4); + BEQZ(s5, 8); ORI(xFlags, xFlags, 1 << F_CF); } } @@ -70,7 +70,7 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2) IFX(X_AF) { ANDI(s4, s3, 0x08); // AF: cc & 0x08 - BEQZ(s4, 4); + BEQZ(s4, 8); ORI(xFlags, xFlags, 1 << F_AF); } IFX(X_OF) { @@ -78,12 +78,12 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SRLI(s4, s3, 1); XOR(s3, s3, s4); ANDI(s3, s3, 1); // OF: xor of two MSB's of cc - BEQZ(s3, 4); + BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_OF2); } } IFX(X_SF) { - BGE(s1, xZR, 4); + BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } if (!rex.w) { @@ -93,7 +93,7 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s emit_pf(dyn, ninst, s1, s3, s4); } IFX(X_ZF) { - BNEZ(s1, 4); + BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } } @@ -134,12 +134,12 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i SRLI(s5, s5, 0x20); ADD(s5, s5, s4); // hi SRAI(s5, s5, 0x20); - BEQZ(s5, 4); + BEQZ(s5, 8); ORI(xFlags, xFlags, 1 << F_CF); } else { ADD(s5, s1, s2); SRLI(s5, s5, 0x20); - BEQZ(s5, 4); + BEQZ(s5, 8); ORI(xFlags, xFlags, 1 << F_CF); } } @@ -164,7 +164,7 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i OR(s3, s3, s4); // cc = (~res & (op1 | op2)) | (op1 & op2) IFX(X_AF) { ANDI(s4, s3, 0x08); // AF: cc & 0x08 - BEQZ(s4, 4); + BEQZ(s4, 8); ORI(xFlags, xFlags, 1 << F_AF); } IFX(X_OF) { @@ -172,12 +172,12 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i SRLI(s4, s3, 1); XOR(s3, s3, s4); ANDI(s3, s3, 1); // OF: xor of two MSB's of cc - BEQZ(s3, 4); + BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_OF2); } } IFX(X_SF) { - BGE(s1, xZR, 4); + BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } if (!rex.w) { @@ -187,7 +187,7 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i emit_pf(dyn, ninst, s1, s3, s4); } IFX(X_ZF) { - BNEZ(s1, 4); + BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } } @@ -215,7 +215,7 @@ void emit_sub32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SDxw(s1, xEmu, offsetof(x64emu_t, res)); } IFX(X_SF) { - BGE(s1, xZR, 4); + BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } if (!rex.w) { @@ -223,7 +223,7 @@ void emit_sub32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s } CALC_SUB_FLAGS(s5, s2, s1, s3, s4, rex.w?64:32); IFX(X_ZF) { - BEQZ(s1, 4); + BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } IFX(X_PF) { @@ -279,7 +279,7 @@ void emit_sub32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i SDxw(s1, xEmu, offsetof(x64emu_t, res)); } IFX(X_SF) { - BGE(s1, xZR, 4); + BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } if (!rex.w) { @@ -287,7 +287,7 @@ void emit_sub32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i } CALC_SUB_FLAGS(s5, s2, s1, s3, s4, rex.w?64:32); IFX(X_ZF) { - BEQZ(s1, 4); + BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } IFX(X_PF) { diff --git a/src/dynarec/rv64/dynarec_rv64_emit_shift.c b/src/dynarec/rv64/dynarec_rv64_emit_shift.c index 36c5c192..db038011 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_shift.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_shift.c @@ -46,11 +46,11 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, if (c > 1) { SRAI(s3, s1, c-1); ANDI(s3, s3, 1); // LSB - BEQZ(s3, 4); + BEQZ(s3, 8); } else { // no need to shift ANDI(s3, s1, 1); - BEQZ(s3, 4); + BEQZ(s3, 8); } ORI(xFlags, xFlags, 1 << F_CF); } @@ -58,7 +58,7 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, SRLIxw(s1, s1, c); IFX(X_SF) { - BGE(s1, xZR, 4); + BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } if (!rex.w) { @@ -68,7 +68,7 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, SDxw(s1, xEmu, offsetof(x64emu_t, res)); } IFX(X_ZF) { - BNEZ(s1, 4); + BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } IFX(X_OF) { @@ -77,7 +77,7 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, SRLI(s4, s1, rex.w?63:31); XOR(s3, s3, s4); ANDI(s3, s3, 1); - BEQZ(s3, 4); + BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_OF2); } } @@ -109,11 +109,11 @@ void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, if (c > 1) { SRAI(s3, s1, c-1); ANDI(s3, s3, 1); // LSB - BEQZ(s3, 4); + BEQZ(s3, 8); } else { // no need to shift ANDI(s3, s1, 1); - BEQZ(s3, 4); + BEQZ(s3, 8); } ORI(xFlags, xFlags, 1 << F_CF); } @@ -122,7 +122,7 @@ void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, // SRAIW sign-extends, so test sign bit before clearing upper bits IFX(X_SF) { - BGE(s1, xZR, 4); + BGE(s1, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } if (!rex.w) { @@ -132,7 +132,7 @@ void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, SDxw(s1, xEmu, offsetof(x64emu_t, res)); } IFX(X_ZF) { - BNEZ(s1, 4); + BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } IFX(X_PF) { diff --git a/src/dynarec/rv64/dynarec_rv64_emit_tests.c b/src/dynarec/rv64/dynarec_rv64_emit_tests.c index 0b85d831..59f2b495 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_tests.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_tests.c @@ -24,7 +24,8 @@ #include "dynarec_rv64_helper.h" // emit CMP8 instruction, from cmp s1, s2, using s3, s4, s5 and s6 as scratch -void emit_cmp8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5, int s6) { +void emit_cmp8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5, int s6) +{ CLEAR_FLAGS(); IFX_PENDOR0 { SB(s1, xEmu, offsetof(x64emu_t, op1)); @@ -47,12 +48,12 @@ void emit_cmp8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i } IFX(X_SF) { SRLI(s3, s6, 7); - BEQZ(s3, 4); + BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_SF); } CALC_SUB_FLAGS(s5, s2, s6, s3, s4, 8); IFX(X_ZF) { - BEQZ(s6, 4); + BNEZ(s6, 8); ORI(xFlags, xFlags, 1 << F_ZF); } IFX(X_PF) { @@ -61,7 +62,8 @@ void emit_cmp8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i } // emit CMP8 instruction, from cmp s1 , 0, using s3 and s4 as scratch -void emit_cmp8_0(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4) { +void emit_cmp8_0(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4) +{ CLEAR_FLAGS(); IFX_PENDOR0 { SB(s1, xEmu, offsetof(x64emu_t, op1)); @@ -74,11 +76,11 @@ void emit_cmp8_0(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4) { IFX(X_SF) { SRLI(s3, s1, 7); - BEQZ(s3, 4); + BEQZ(s3, 8); ORI(xFlags, xFlags, 1 << F_SF); } IFX(X_ZF) { - BEQZ(s1, 4); + BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } IFX(X_PF) { @@ -109,15 +111,15 @@ void emit_cmp32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SDxw(s6, xEmu, offsetof(x64emu_t, res)); } IFX(X_SF) { - BGE(s6, xZR, 0); + BGE(s6, xZR, 8); ORI(xFlags, xFlags, 1 << F_SF); } if (!rex.w) { - ZEROUP(s1); + ZEROUP(s6); } CALC_SUB_FLAGS(s5, s2, s6, s3, s4, rex.w?64:32); IFX(X_ZF) { - BEQZ(s6, 4); + BNEZ(s6, 8); ORI(xFlags, xFlags, 1 << F_ZF); } IFX(X_PF) { @@ -128,6 +130,7 @@ void emit_cmp32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s // emit CMP32 instruction, from cmp s1, 0, using s3 and s4 as scratch void emit_cmp32_0(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4) { + CLEAR_FLAGS(); IFX_PENDOR0 { SD(s1, xEmu, offsetof(x64emu_t, op1)); SD(xZR, xEmu, offsetof(x64emu_t, op2)); @@ -138,15 +141,15 @@ void emit_cmp32_0(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s3, int } IFX(X_SF) { if (rex.w) { - BGE(s1, xZR, 4); + BGE(s1, xZR, 8); } else { SRLI(s3, s1, 31); - BEQZ(s3, 4); + BEQZ(s3, 8); } ORI(xFlags, xFlags, 1 << F_SF); } IFX(X_ZF) { - BEQZ(s1, 4); + BNEZ(s1, 8); ORI(xFlags, xFlags, 1 << F_ZF); } IFX(X_PF) { @@ -172,12 +175,12 @@ void emit_test32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int IFX(X_SF) { if (!rex.w) ZEROUP(s3); SRLI(s4, s3, rex.w?63:31); - BEQZ(s4, 4); + BEQZ(s4, 8); ORI(xFlags, xFlags, 1 << F_SF); } IFX(X_ZF) { - BNEZ(s3, 4); - ORI(xFlags, xFlags, F_ZF); + BNEZ(s3, 8); + ORI(xFlags, xFlags, 1 << F_ZF); } IFX(X_PF) { emit_pf(dyn, ninst, s3, s4, s5); diff --git a/src/dynarec/rv64/dynarec_rv64_functions.c b/src/dynarec/rv64/dynarec_rv64_functions.c index 8df799bb..681d20d3 100644 --- a/src/dynarec/rv64/dynarec_rv64_functions.c +++ b/src/dynarec/rv64/dynarec_rv64_functions.c @@ -107,6 +107,55 @@ uintptr_t fakeed(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop) #undef F8 #undef F32 +static int fpuCacheNeedsTransform(dynarec_rv64_t* dyn, int ninst) +{ + // TODO + return 0; +} + +static int flagsCacheNeedsTransform(dynarec_rv64_t* dyn, int ninst) +{ + int jmp = dyn->insts[ninst].x64.jmp_insts; + if(jmp<0) + return 0; + if(dyn->insts[ninst].f_exit.dfnone) // flags are fully known, nothing we can do more + return 0; +/* if((dyn->f.pending!=SF_SET) + && (dyn->f.pending!=SF_SET_PENDING)) { + if(dyn->f.pending!=SF_PENDING) {*/ + switch (dyn->insts[jmp].f_entry.pending) { + case SF_UNKNOWN: return 0; + case SF_SET: + if(dyn->insts[ninst].f_exit.pending!=SF_SET && dyn->insts[ninst].f_exit.pending!=SF_SET_PENDING) + return 1; + else + return 0; + case SF_SET_PENDING: + if(dyn->insts[ninst].f_exit.pending!=SF_SET + && dyn->insts[ninst].f_exit.pending!=SF_SET_PENDING + && dyn->insts[ninst].f_exit.pending!=SF_PENDING) + return 1; + else + return 0; + case SF_PENDING: + if(dyn->insts[ninst].f_exit.pending!=SF_SET + && dyn->insts[ninst].f_exit.pending!=SF_SET_PENDING + && dyn->insts[ninst].f_exit.pending!=SF_PENDING) + return 1; + else + return (dyn->insts[jmp].f_entry.dfnone == dyn->insts[ninst].f_exit.dfnone)?0:1; + } + if(dyn->insts[jmp].f_entry.dfnone && !dyn->insts[ninst].f_exit.dfnone) + return 1; + return 0; +} +int CacheNeedsTransform(dynarec_rv64_t* dyn, int ninst) { + int ret = 0; + if (fpuCacheNeedsTransform(dyn, ninst)) ret|=1; + if (flagsCacheNeedsTransform(dyn, ninst)) ret|=2; + return ret; +} + int isNativeCall(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn) { (void)dyn; diff --git a/src/dynarec/rv64/dynarec_rv64_functions.h b/src/dynarec/rv64/dynarec_rv64_functions.h index ed94ac14..2fb76659 100644 --- a/src/dynarec/rv64/dynarec_rv64_functions.h +++ b/src/dynarec/rv64/dynarec_rv64_functions.h @@ -16,6 +16,9 @@ uintptr_t fakeed(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop) // Is what pointed at addr a native call? And if yes, to what function? int isNativeCall(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn); +// FPU Cache transformation (for loops) +int CacheNeedsTransform(dynarec_rv64_t* dyn, int i1); + // predecessor access int isPred(dynarec_rv64_t* dyn, int ninst, int pred); int getNominalPred(dynarec_rv64_t* dyn, int ninst); diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index 02be87d5..4ae11186 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -517,7 +517,65 @@ void fpu_popcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07) //TODO } -void rv64_move32(dynarec_rv64_t* dyn, int ninst, int reg, int32_t val) +static void fpuCacheTransform(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3) +{ + //TODO +} +static void flagsCacheTransform(dynarec_rv64_t* dyn, int ninst, int s1) +{ +#if STEP > 1 + int j64; + int jmp = dyn->insts[ninst].x64.jmp_insts; + if(jmp<0) + return; + if(dyn->f.dfnone) // flags are fully known, nothing we can do more + return; + MESSAGE(LOG_DUMP, "\tFlags fetch ---- ninst=%d -> %d\n", ninst, jmp); + int go = 0; + switch (dyn->insts[jmp].f_entry.pending) { + case SF_UNKNOWN: break; + case SF_SET: + if(dyn->f.pending!=SF_SET && dyn->f.pending!=SF_SET_PENDING) + go = 1; + break; + case SF_SET_PENDING: + if(dyn->f.pending!=SF_SET + && dyn->f.pending!=SF_SET_PENDING + && dyn->f.pending!=SF_PENDING) + go = 1; + break; + case SF_PENDING: + if(dyn->f.pending!=SF_SET + && dyn->f.pending!=SF_SET_PENDING + && dyn->f.pending!=SF_PENDING) + go = 1; + else + go = (dyn->insts[jmp].f_entry.dfnone == dyn->f.dfnone)?0:1; + break; + } + if(dyn->insts[jmp].f_entry.dfnone && !dyn->f.dfnone) + go = 1; + if(go) { + if(dyn->f.pending!=SF_PENDING) { + LW(s1, xEmu, offsetof(x64emu_t, df)); + j64 = (GETMARK3)-(dyn->native_size); + BEQZ(s1, j64); + } + CALL_(UpdateFlags, -1, 0); + MARK3; + } +#endif +} + +void CacheTransform(dynarec_rv64_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3) { + if(cacheupd&1) + fpuCacheTransform(dyn, ninst, s1, s2, s3); + if(cacheupd&2) + flagsCacheTransform(dyn, ninst, s1); +} + + +void rv64_move32(dynarec_rv64_t* dyn, int ninst, int reg, int32_t val, int zeroup) { // Depending on val, the following insns are emitted. // val == 0 -> ADDI @@ -533,13 +591,16 @@ void rv64_move32(dynarec_rv64_t* dyn, int ninst, int reg, int32_t val) src = reg; } if (lo12 || !hi20) ADDI(reg, src, lo12); + if(zeroup && ((hi20&0x80000) || (!hi20 && (lo12&0x800)))) { + ZEROUP(reg); + } } void rv64_move64(dynarec_rv64_t* dyn, int ninst, int reg, int64_t val) { if(((val<<32)>>32)==val) { // 32bits value - rv64_move32(dyn, ninst, reg, val); + rv64_move32(dyn, ninst, reg, val, 0); return; } @@ -568,6 +629,6 @@ void emit_pf(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4) SRLW(s4, s4, s1); ANDI(s4, s4, 1); - BEQZ(s4, 4); + BEQZ(s4, 8); ORI(xFlags, xFlags, 1 << F_PF); } diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 55869194..83e9c8cc 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -69,7 +69,7 @@ } else { \ SMREAD() \ addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, D); \ - LD(x1, wback, fixedaddress); \ + LDxw(x1, wback, fixedaddress); \ ed = x1; \ } @@ -180,8 +180,8 @@ LOAD_REG(R11); \ -#define SET_DFNONE(S) if(!dyn->f.dfnone) {MOV_U12(S, d_none); SD(S, xEmu, offsetof(x64emu_t, df)); dyn->f.dfnone=1;} -#define SET_DF(S, N) if((N)!=d_none) {MOV_U12(S, (N)); SD(S, xEmu, offsetof(x64emu_t, df)); dyn->f.dfnone=0;} else SET_DFNONE(S) +#define SET_DFNONE(S) if(!dyn->f.dfnone) {MOV_U12(S, d_none); SW(S, xEmu, offsetof(x64emu_t, df)); dyn->f.dfnone=1;} +#define SET_DF(S, N) if((N)!=d_none) {MOV_U12(S, (N)); SW(S, xEmu, offsetof(x64emu_t, df)); dyn->f.dfnone=0;} else SET_DFNONE(S) #define SET_NODF() dyn->f.dfnone = 0 #define SET_DFOK() dyn->f.dfnone = 1 @@ -198,27 +198,27 @@ IFX(X_AF) { \ /* af = bc & 0x8 */ \ ANDI(scratch1, scratch2, 8); \ - BEQZ(scratch1, 4); \ + BEQZ(scratch1, 8); \ ORI(xFlags, xFlags, 1 << F_AF); \ } \ IFX(X_CF) { \ /* cf = bc & (1<<(width-1)) */ \ - if (width == 8) { \ + if ((width) == 8) { \ ANDI(scratch1, scratch2, 0x80); \ } else { \ - SRLI(scratch1, scratch2, width-1); \ - if (width == 16) ANDI(scratch1, scratch1, 1); \ + SRLI(scratch1, scratch2, (width)-1); \ + if ((width) == 16) ANDI(scratch1, scratch1, 1); \ } \ - BEQZ(scratch1, 4); \ + BEQZ(scratch1, 8); \ ORI(xFlags, xFlags, 1 << F_CF); \ } \ IFX(X_OF) { \ /* of = ((bc >> (width-2)) ^ (bc >> (width-1))) & 0x1; */ \ - SRLI(scratch1, scratch2, width-2); \ + SRLI(scratch1, scratch2, (width)-2); \ SRLI(scratch2, scratch1, 1); \ XOR(scratch1, scratch1, scratch2); \ ANDI(scratch1, scratch1, 1); \ - BEQZ(scratch1, 4); \ + BEQZ(scratch1, 8); \ ORI(xFlags, xFlags, 1 << F_OF2); \ } \ } @@ -611,10 +611,10 @@ void emit_pf(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4); // Set rounding according to mxcsr flags, return reg to restore flags //int sse_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3); -//void CacheTransform(dynarec_rv64_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3); +void CacheTransform(dynarec_rv64_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3); void rv64_move64(dynarec_rv64_t* dyn, int ninst, int reg, int64_t val); -void rv64_move32(dynarec_rv64_t* dyn, int ninst, int reg, int32_t val); +void rv64_move32(dynarec_rv64_t* dyn, int ninst, int reg, int32_t val, int zeroup); #if STEP < 2 #define CHECK_CACHE() 0 @@ -680,4 +680,99 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int #define MAYUSE(A) #endif +// GOCOND will use x1 and x3 +#define GOCOND(B, T1, T2) \ + case B+0x0: \ + INST_NAME(T1 "O " T2); \ + GO( ANDI(x1, xFlags, 1<<F_OF2) \ + , BEQZ, BNEZ, X_OF) \ + break; \ + case B+0x1: \ + INST_NAME(T1 "NO " T2); \ + GO( ANDI(x1, xFlags, 1<<F_OF2) \ + , BNEZ, BEQZ, X_OF) \ + break; \ + case B+0x2: \ + INST_NAME(T1 "C " T2); \ + GO( ANDI(x1, xFlags, 1<<F_CF) \ + , BEQZ, BNEZ, X_CF) \ + break; \ + case B+0x3: \ + INST_NAME(T1 "NC " T2); \ + GO( ANDI(x1, xFlags, 1<<F_CF) \ + , BNEZ, BEQZ, X_CF) \ + break; \ + case B+0x4: \ + INST_NAME(T1 "Z " T2); \ + GO( ANDI(x1, xFlags, 1<<F_ZF) \ + , BEQZ, BNEZ, X_ZF) \ + break; \ + case B+0x5: \ + INST_NAME(T1 "NZ " T2); \ + GO( ANDI(x1, xFlags, 1<<F_ZF) \ + , BNEZ, BEQZ, X_ZF) \ + break; \ + case B+0x6: \ + INST_NAME(T1 "BE " T2); \ + GO( ANDI(x1, xFlags, (1<<F_CF)|(1<<F_ZF)) \ + , BEQZ, BNEZ, X_CF|X_ZF) \ + break; \ + case B+0x7: \ + INST_NAME(T1 "NBE " T2); \ + GO( ANDI(x1, xFlags, (1<<F_CF)|(1<<F_ZF)) \ + , BNEZ, BEQZ, X_CF|X_ZF) \ + break; \ + case B+0x8: \ + INST_NAME(T1 "S " T2); \ + GO( ANDI(x1, xFlags, 1<<F_SF) \ + , BEQZ, BNEZ, X_SF) \ + break; \ + case B+0x9: \ + INST_NAME(T1 "NS " T2); \ + GO( ANDI(x1, xFlags, 1<<F_SF) \ + , BNEZ, BEQZ, X_SF) \ + break; \ + case B+0xA: \ + INST_NAME(T1 "P " T2); \ + GO( ANDI(x1, xFlags, 1<<F_PF) \ + , BEQZ, BNEZ, X_PF) \ + break; \ + case B+0xB: \ + INST_NAME(T1 "NP " T2); \ + GO( ANDI(x1, xFlags, 1<<F_PF) \ + , BNEZ, BEQZ, X_PF) \ + break; \ + case B+0xC: \ + INST_NAME(T1 "L " T2); \ + GO( SRLI(x1, xFlags, F_SF-F_OF2); \ + XOR(x1, x1, xFlags); \ + ANDI(x1, x1, 1<<F_OF2) \ + , BEQZ, BNEZ, X_SF|X_OF) \ + break; \ + case B+0xD: \ + INST_NAME(T1 "GE " T2); \ + GO( SRLI(x1, xFlags, F_SF-F_OF2); \ + XOR(x1, x1, xFlags); \ + ANDI(x1, x1, 1<<F_OF2) \ + , BNEZ, BEQZ, X_SF|X_OF) \ + break; \ + case B+0xE: \ + INST_NAME(T1 "LE " T2); \ + GO( SRLI(x1, xFlags, F_SF-F_OF2); \ + XOR(x1, x1, xFlags); \ + ANDI(x3, xFlags, 1<<F_ZF); \ + OR(x1, x1, x3); \ + ANDI(x1, x1, (1<<F_OF2) | (1<<F_ZF)) \ + , BEQZ, BNEZ, X_SF|X_OF|X_ZF) \ + break; \ + case B+0xF: \ + INST_NAME(T1 "G " T2); \ + GO( SRLI(x1, xFlags, F_SF-F_OF2); \ + XOR(x1, x1, xFlags); \ + ANDI(x3, xFlags, 1<<F_ZF); \ + OR(x1, x1, x3); \ + ANDI(x1, x1, (1<<F_OF2) | (1<<F_ZF)) \ + , BNEZ, BEQZ, X_SF|X_OF|X_ZF) \ + break + #endif //__DYNAREC_RV64_HELPER_H__ diff --git a/src/dynarec/rv64/dynarec_rv64_pass2.h b/src/dynarec/rv64/dynarec_rv64_pass2.h index 3255dcbf..408c3e97 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass2.h +++ b/src/dynarec/rv64/dynarec_rv64_pass2.h @@ -6,7 +6,7 @@ } #define MESSAGE(A, ...) -#define EMIT(A) dyn->insts[ninst].size+=4; dyn->native_size+=4 +#define EMIT(A) do {dyn->insts[ninst].size+=4; dyn->native_size+=4;}while(0) #define NEW_INST \ if(ninst) { \ dyn->insts[ninst].address = (dyn->insts[ninst-1].address+dyn->insts[ninst-1].size); \ diff --git a/src/dynarec/rv64/dynarec_rv64_pass3.h b/src/dynarec/rv64/dynarec_rv64_pass3.h index 1d7eb6d5..93b5411e 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass3.h +++ b/src/dynarec/rv64/dynarec_rv64_pass3.h @@ -4,10 +4,12 @@ addInst(dyn->instsize, &dyn->insts_size, dyn->insts[ninst].x64.size, dyn->insts[ninst].size/4); \ addInst(dyn->instsize, &dyn->insts_size, 0, 0); #define EMIT(A) \ - if(box64_dynarec_dump) {dynarec_log(LOG_NONE, "\t%08x\t%s\n", (uint32_t)(A), rv64_print(A, (uintptr_t)dyn->block));} \ - *(uint32_t*)(dyn->block) = (uint32_t)(A); \ - dyn->block += 4; dyn->native_size += 4; \ - dyn->insts[ninst].size2 += 4 + do { \ + if(box64_dynarec_dump) {dynarec_log(LOG_NONE, "\t%08x\t%s\n", (uint32_t)(A), rv64_print(A, (uintptr_t)dyn->block));} \ + *(uint32_t*)(dyn->block) = (uint32_t)(A); \ + dyn->block += 4; dyn->native_size += 4; \ + dyn->insts[ninst].size2 += 4; \ + }while(0) #define MESSAGE(A, ...) if(box64_dynarec_dump) dynarec_log(LOG_NONE, __VA_ARGS__) #define NEW_INST \ @@ -35,16 +37,16 @@ dyn->smread, dyn->smwrite); \ if(dyn->insts[ninst].pred_sz) { \ dynarec_log(LOG_NONE, ", pred="); \ - for(int ii=0; ii<dyn->insts[ninst].pred_sz; ++ii)\ - dynarec_log(LOG_NONE, "%s%d", ii?"/":"", dyn->insts[ninst].pred[ii]);\ - } \ - if(dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts>=0)\ - dynarec_log(LOG_NONE, ", jmp=%d", dyn->insts[ninst].x64.jmp_insts);\ - if(dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts==-1)\ - dynarec_log(LOG_NONE, ", jmp=out"); \ - if(dyn->last_ip) \ - dynarec_log(LOG_NONE, ", last_ip=%p", (void*)dyn->last_ip);\ - dynarec_log(LOG_NONE, "%s\n", (box64_dynarec_dump>1)?"\e[m":""); \ + for(int ii=0; ii<dyn->insts[ninst].pred_sz; ++ii) \ + dynarec_log(LOG_NONE, "%s%d", ii?"/":"", dyn->insts[ninst].pred[ii]); \ + } \ + if(dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts>=0) \ + dynarec_log(LOG_NONE, ", jmp=%d", dyn->insts[ninst].x64.jmp_insts); \ + if(dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts==-1) \ + dynarec_log(LOG_NONE, ", jmp=out"); \ + if(dyn->last_ip) \ + dynarec_log(LOG_NONE, ", last_ip=%p", (void*)dyn->last_ip); \ + dynarec_log(LOG_NONE, "%s\n", (box64_dynarec_dump>1)?"\e[m":""); \ } #define TABLE64(A, V) {int val64offset = Table64(dyn, (V)); MESSAGE(LOG_DUMP, " Table64: 0x%lx\n", (V)); AUIPC(A, SPLIT20(val64offset)); LD(A, A, SPLIT12(val64offset));} diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index 32988c19..5f5cfe0c 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -110,7 +110,7 @@ f28–31 ft8–11 FP temporaries Caller // MOV64x/MOV32w is quite complex, so use a function for this #define MOV64x(A, B) rv64_move64(dyn, ninst, A, B) -#define MOV32w(A, B) do{ rv64_move32(dyn, ninst, A, B); if(A&0x80000000) ZEROUP(A); }while(0); +#define MOV32w(A, B) rv64_move32(dyn, ninst, A, B, 1) #define MOV64xw(A, B) if(rex.w) {MOV64x(A, B);} else {MOV32w(A, B);} // ZERO the upper part diff --git a/src/dynarec/rv64/rv64_epilog.S b/src/dynarec/rv64/rv64_epilog.S index fcaf646a..9b97c232 100644 --- a/src/dynarec/rv64/rv64_epilog.S +++ b/src/dynarec/rv64/rv64_epilog.S @@ -32,7 +32,7 @@ rv64_epilog: slli x5, x5, 11-5 or x8, x8, x5 sd x8, 128(a0) //xFlags - sd x7, 136(a0) // put back reg value in emu, including EIP (so x27 must be EIP now) + sd x7, 136(a0) // put back reg value in emu, including EIP (so x7 must be EIP now) //restore all used register ld ra, (sp) // save ra ld x8, 8(sp) // save fp |