diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2023-03-23 15:09:47 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-03-23 08:09:47 +0100 |
| commit | c0d3135e5420f34cd65be49adb674d822f601f78 (patch) | |
| tree | f7efce3bd32aba4ddf493bb4437e9a5cab47da3c /src | |
| parent | b8b5203d08da4a13f617f3614613df02fc2da7b6 (diff) | |
| download | box64-c0d3135e5420f34cd65be49adb674d822f601f78.tar.gz box64-c0d3135e5420f34cd65be49adb674d822f601f78.zip | |
[RV64_DYNAREC] Added A5 REP MOVSD opcode (#622)
And replace ADDI with SUBI for readability
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00.c | 37 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_d9.c | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_emit_math.c | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 18 |
4 files changed, 45 insertions, 14 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00.c b/src/dynarec/rv64/dynarec_rv64_00.c index 98d5439f..88dd3df8 100644 --- a/src/dynarec/rv64/dynarec_rv64_00.c +++ b/src/dynarec/rv64/dynarec_rv64_00.c @@ -665,6 +665,37 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ZEROUP(xRDX); } break; + case 0xA5: + if(rep) { + INST_NAME("REP MOVSD"); + CBZ_NEXT(xRCX); + ANDI(x1, xFlags, 1<<F_DF); + BNEZ_MARK2(x1); + MARK; // Part with DF==0 + LDxw(x1, xRSI, 0); + ADDI(xRSI, xRSI, rex.w?8:4); + SDxw(x1, xRDI, 0); + ADDI(xRDI, xRDI, rex.w?8:4); + SUBI(xRCX, xRCX, 1); + BNEZ_MARK(xRCX); + B_NEXT_nocond; + MARK2; // Part with DF==1 + LDxw(x1, xRSI, 0); + SUBI(xRSI, xRSI, rex.w?8:4); + SDxw(x1, xRDI, 0); + SUBI(xRDI, xRDI, rex.w?8:4); + SUBI(xRCX, xRCX, 1); + BNEZ_MARK2(xRCX); + // done + } else { + INST_NAME("MOVSD"); + GETDIR(x3, x1, rex.w?8:4); + LDxw(x1, xRSI, 0); + SDxw(x1, xRDI, 0); + ADD(xRSI, xRSI, x3); + ADD(xRDI, xRDI, x3); + } + break; case 0xA8: INST_NAME("TEST AL, Ib"); SETFLAGS(X_ALL, SF_SET_PENDING); @@ -689,13 +720,13 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MARK; // Part with DF==0 SDxw(xRAX, xRDI, 0); ADDI(xRDI, xRDI, rex.w?8:4); - ADDI(xRCX, xRCX, -1); + SUBI(xRCX, xRCX, 1); BNEZ_MARK(xRCX); B_NEXT_nocond; MARK2; // Part with DF==1 SDxw(xRAX, xRDI, 0); - ADDI(xRDI, xRDI, rex.w?-8:-4); - ADDI(xRCX, xRCX, -1); + SUBI(xRDI, xRDI, rex.w?8:4); + SUBI(xRCX, xRCX, 1); BNEZ_MARK2(xRCX); // done } else { diff --git a/src/dynarec/rv64/dynarec_rv64_d9.c b/src/dynarec/rv64/dynarec_rv64_d9.c index bff399e6..9378c650 100644 --- a/src/dynarec/rv64/dynarec_rv64_d9.c +++ b/src/dynarec/rv64/dynarec_rv64_d9.c @@ -231,7 +231,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("FDECSTP"); fpu_purgecache(dyn, ninst, 0, x1, x2, x3); LW(x2, xEmu, offsetof(x64emu_t, top)); - ADDI(x2, x2, -1); + SUBI(x2, x2, 1); ANDI(x2, x2, 7); SW(x2, xEmu, offsetof(x64emu_t, top)); break; diff --git a/src/dynarec/rv64/dynarec_rv64_emit_math.c b/src/dynarec/rv64/dynarec_rv64_emit_math.c index 7d8ed89a..74349328 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_math.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_math.c @@ -401,7 +401,7 @@ void emit_sub32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i { // special case when doing math on RSP and only PEND is needed: ignoring it! if (c > -2048 && c <= 2048) { - ADDI(s1, s1, -c); + SUBI(s1, s1, c); } else { MOV64xw(s2, c); SUBxw(s1, s1, s2); diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index 86533a16..fe7d0804 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -394,7 +394,7 @@ void call_c(dynarec_rv64_t* dyn, int ninst, void* fnc, int reg, int ret, int sav } fpu_pushcache(dyn, ninst, reg, 0); if(ret!=-2) { - ADDI(xSP, xSP, -16); // RV64 stack needs to be 16byte aligned + SUBI(xSP, xSP, 16); // RV64 stack needs to be 16byte aligned SD(xEmu, xSP, 0); SD(savereg, xSP, 8); // x5..x8, x10..x17, x28..x31 those needs to be saved by caller @@ -447,7 +447,7 @@ void call_n(dynarec_rv64_t* dyn, int ninst, void* fnc, int w) fpu_pushcache(dyn, ninst, x3, 1); // x5..x8, x10..x17, x28..x31 those needs to be saved by caller // RDI, RSI, RDX, RCX, R8, R9 are used for function call - ADDI(xSP, xSP, -16); + SUBI(xSP, xSP, 16); SD(xEmu, xSP, 0); SD(xRIP, xSP, 8); // ARM64 stack needs to be 16byte aligned STORE_REG(R12); @@ -560,7 +560,7 @@ void x87_stackcount(dynarec_rv64_t* dyn, int ninst, int scratch) SW(scratch, xEmu, offsetof(x64emu_t, fpu_stack)); // Sub x87stack to top, with and 7 LW(scratch, xEmu, offsetof(x64emu_t, top)); - ADDI(scratch, scratch, -a); + SUBI(scratch, scratch, a); ANDI(scratch, scratch, 7); SW(scratch, xEmu, offsetof(x64emu_t, top)); // reset x87stack, but not the stack count of extcache @@ -674,7 +674,7 @@ void x87_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1, int s2, in // new tag to fulls ADDI(s3, xZR, 0); for (int i=0; i<a; ++i) { - ADDI(s2, s2, -1); + SUBI(s2, s2, 1); ANDI(s2, s2, 7); // (emu->top + st)&7 SLLI(s1, s2, 2); ADD(s1, xEmu, s1); @@ -970,7 +970,7 @@ int x87_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3) SLLI(s1, s1, 1); ADDI(s2, xZR, 3); BGE(s1, s2, 4+8); - ADDI(s1, s1, -4); + SUBI(s1, s1, 4); XORI(s3, s1, 0b11); // transform done (is there a faster way?) FSRM(s3); // exange RM with current @@ -991,7 +991,7 @@ int sse_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3) SLLI(s1, s1, 1); ADDI(s2, xZR, 3); BGE(s1, s2, 4+8); - ADDI(s1, s1, -4); + SUBI(s1, s1, 4); XORI(s3, s1, 0b11); // transform done (is there a faster way?) FSRM(s3); // exange RM with current @@ -1213,7 +1213,7 @@ void fpu_pushcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07) ++n; if(n) { MESSAGE(LOG_DUMP, "\tPush x87/MMX Cache (%d)------\n", n); - ADDI(xSP, xSP, -8*((n+1)&~1)); + SUBI(xSP, xSP, 8*((n+1)&~1)); int p = 0; for(int i=17; i<24; ++i) if(dyn->e.extcache[i].v!=0) { @@ -1520,7 +1520,7 @@ static void fpuCacheTransform(dynarec_rv64_t* dyn, int ninst, int s1, int s2, in ADDI(s1, xEmu, offsetof(x64emu_t, p_regs)); SLLI(s3, s3, 2); for (int i=0; i<a; ++i) { - ADDI(s3, s3, -1<<2); + SUBI(s3, s3, 1<<2); ANDI(s3, s3, 7<<2); ADD(s3, s1, s3); SW(s2, s3, 0); // that slot is full @@ -1788,4 +1788,4 @@ void fpu_propagate_stack(dynarec_rv64_t* dyn, int ninst) dyn->e.news = 0; dyn->e.stack_push = 0; dyn->e.swapped = 0; -} \ No newline at end of file +} |