diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-02-21 15:31:09 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-02-21 15:31:09 +0100 |
| commit | e71df7eb670a1d943a41b7c94f3fc3794bc927eb (patch) | |
| tree | e8d7ae3654bdc72fbc138dae3552406d72cf3139 | |
| parent | 9d1e6b9b960c33bb524cabfd53bb1ce1133e5e3b (diff) | |
| download | box64-e71df7eb670a1d943a41b7c94f3fc3794bc927eb.tar.gz box64-e71df7eb670a1d943a41b7c94f3fc3794bc927eb.zip | |
[ARM64_DYNAREC] Optimized rcl 8bits with constant
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_00.c | 45 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_emit_shift.c | 33 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.h | 2 |
3 files changed, 55 insertions, 25 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c index 1e0cd854..a56e2aec 100644 --- a/src/dynarec/arm64/dynarec_arm64_00.c +++ b/src/dynarec/arm64/dynarec_arm64_00.c @@ -1844,7 +1844,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(u8) { SETFLAGS(X_CF|((u8==1)?X_OF:0), SF_SUBSET_PENDING); GETEB(x1, 1); - u8 = F8; + u8 = F8&0x1f; emit_rol8c(dyn, ninst, x1, u8, x4, x5); EBBACK; } else { @@ -1858,7 +1858,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(u8) { SETFLAGS(X_CF|((u8==1)?X_OF:0), SF_SUBSET_PENDING); GETEB(x1, 1); - u8 = F8; + u8 = F8&0x1f; emit_ror8c(dyn, ninst, x1, u8, x4, x5); EBBACK; } else { @@ -1868,35 +1868,35 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 2: INST_NAME("RCL Eb, Ib"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - READFLAGS(X_CF); u8 = geted_ib(dyn, addr, ninst, nextop)&0x1f; - if(u8==1) { - SETFLAGS(X_OF|X_CF, SF_SET); + if(u8) { + READFLAGS(X_CF); + SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING); + GETEB(x1, 1); + u8 = F8&0x1f; + emit_rcl8c(dyn, ninst, x1, u8, x4, x5); + EBBACK; } else { - SETFLAGS(X_CF, SF_SET); + FAKEED; + F8; } - GETEB(x1, 1); - u8 = F8; - MOV32w(x2, u8); - CALL_(rcl8, ed, x3); - EBBACK; break; case 3: INST_NAME("RCR Eb, Ib"); MESSAGE(LOG_DUMP, "Need Optimization\n"); READFLAGS(X_CF); u8 = geted_ib(dyn, addr, ninst, nextop)&0x1f; - if(u8==1) { + if(u8) { SETFLAGS(X_OF|X_CF, SF_SET); + GETEB(x1, 1); + u8 = F8&0x1f; + MOV32w(x2, u8); + CALL_(rcr8, ed, x3); + EBBACK; } else { - SETFLAGS(X_CF, SF_SET); + FAKEED; + F8; } - GETEB(x1, 1); - u8 = F8; - MOV32w(x2, u8); - CALL_(rcr8, ed, x3); - EBBACK; break; case 4: case 6: @@ -2317,12 +2317,9 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 2: INST_NAME("RCL Eb, 1"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - READFLAGS(X_CF); - SETFLAGS(X_OF|X_CF, SF_SET); - MOV32w(x2, 1); + SETFLAGS(X_OF|X_CF, SF_SUBSET); GETEB(x1, 0); - CALL_(rcl8, x1, x3); + emit_rcl8c(dyn, ninst, ed, 1, x4, x5); EBBACK; break; case 3: diff --git a/src/dynarec/arm64/dynarec_arm64_emit_shift.c b/src/dynarec/arm64/dynarec_arm64_emit_shift.c index 61e866c7..0456359b 100644 --- a/src/dynarec/arm64/dynarec_arm64_emit_shift.c +++ b/src/dynarec/arm64/dynarec_arm64_emit_shift.c @@ -976,6 +976,37 @@ void emit_ror16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int } } +// emit RcL8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch +void emit_rcl8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4) +{ + MAYUSE(s1); MAYUSE(s3); MAYUSE(s4); + IFX(X_PEND) { + MOV32w(s3, c); + STRB_U12(s3, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, d_rol8); + } else IFX(X_ALL) { + SET_DFNONE(s4); + } + IFX(X_OF|X_CF) { + if(c%9) { + LSRw_IMM(x2, x1, 8-(c%9)); + } + } + BFIw(x1, xFlags, 8, 1); // insert cf + ORRw_REG_LSL(x1, x1, x1, 9); // insert x1 again + LSRw_IMM(x1, x1, 9-(c%9)); // do the rcl + UXTBw(x1, x1); + IFX(X_OF|X_CF) { + BFIw(xFlags, x2, F_CF, 1); + IFX(X_OF) { + if(c==1) { + EORw_REG_LSR(x2, x2, x1, 7); + BFIw(xFlags, x2, F_OF, 1); + } + } + } +} + // emit SHRD32 instruction, from s1, fill s2 , constant c, store result in s1 using s3 and s4 as scratch void emit_shrd32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4) { @@ -1321,7 +1352,7 @@ void emit_shld16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s5, int s3, ORRw_REG_LSL(s4, s2, s1, 16); MOV32w(s3, 32); SUBw_REG(s3, s3, s5); - RORw_REG(s3, s4, s3); + LSRw_REG(s3, s4, s3); BFIw(xFlags, s3, F_CF, 1); } IFX(X_OF) { diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index f2654aa4..902219c6 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -1080,6 +1080,7 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr); #define emit_ror8c STEPNAME(emit_ror8c) #define emit_rol16c STEPNAME(emit_rol16c) #define emit_ror16c STEPNAME(emit_ror16c) +#define emit_rcl8c STEPNAME(emit_rcl8c) #define emit_shrd32c STEPNAME(emit_shrd32c) #define emit_shrd32 STEPNAME(emit_shrd32) #define emit_shld32c STEPNAME(emit_shld32c) @@ -1230,6 +1231,7 @@ void emit_rol8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s void emit_ror8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4); void emit_rol16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4); void emit_ror16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4); +void emit_rcl8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4); void emit_shrd32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4); void emit_shld32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4); void emit_shrd32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4); |