diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2023-12-22 10:50:08 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2023-12-22 10:50:08 +0100 |
| commit | 5b335f0f2658ae0afda037de2120bc5dc6aa4d48 (patch) | |
| tree | e456a4e558b5f27ca0ebff033eb6f38d926332ec /src | |
| parent | 28af18f36e265e01d03b2832e328d374ed0212ae (diff) | |
| download | box64-5b335f0f2658ae0afda037de2120bc5dc6aa4d48.tar.gz box64-5b335f0f2658ae0afda037de2120bc5dc6aa4d48.zip | |
[ARM64_DYNAREC] Small optim on CF flag computation for 8/16bits add/sub operations
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_emit_math.c | 36 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_emit_tests.c | 18 |
2 files changed, 25 insertions, 29 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_emit_math.c b/src/dynarec/arm64/dynarec_arm64_emit_math.c index 1248ef63..f71f5537 100644 --- a/src/dynarec/arm64/dynarec_arm64_emit_math.c +++ b/src/dynarec/arm64/dynarec_arm64_emit_math.c @@ -297,8 +297,7 @@ void emit_add8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) } } IFX(X_CF) { - LSRw(s3, s1, 8); - BFIw(xFlags, s3, F_CF, 1); + BFXILw(xFlags, s1, 8, 1); } IFX(X_PEND) { STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); @@ -341,8 +340,7 @@ void emit_add8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4) } } IFX(X_CF) { - LSRw(s3, s1, 8); - BFIw(xFlags, s3, F_CF, 1); + BFXILw(xFlags, s1, 8, 1); } IFX(X_PEND) { STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); @@ -364,7 +362,7 @@ void emit_sub8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) } else IFX(X_ALL) { SET_DFNONE(s3); } - IFX(X_AF|X_OF|X_CF) { + IFX(X_AF|X_OF) { MVNw_REG(s3, s1); ORRw_REG(s3, s3, s2); // s3 = ~op1 | op2 BICw_REG(s4, s2, s1); // s4 = ~op1 & op2 @@ -374,13 +372,12 @@ void emit_sub8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) IFX(X_PEND) { STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); } - IFX(X_AF|X_OF|X_CF) { + IFX(X_CF) { + BFXILw(xFlags, s1, 8, 1); + } + IFX(X_AF|X_OF) { ANDw_REG(s3, s3, s1); // s3 = (~op1 | op2) & res ORRw_REG(s3, s3, s4); // s3 = (~op1 & op2) | ((~op1 | op2) & res) - IFX(X_CF) { - LSRw(s4, s3, 7); - BFIw(xFlags, s4, F_CF, 1); // CF : bc & 0x80 - } IFX(X_AF) { LSRw(s4, s3, 3); BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 @@ -411,7 +408,7 @@ void emit_sub8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4, in } else IFX(X_ALL) { SET_DFNONE(s3); } - IFX(X_AF|X_OF|X_CF) { + IFX(X_AF|X_OF) { MVNw_REG(s3, s1); ORRw_REG(s3, s3, s5); // s3 = ~op1 | op2 BICw_REG(s4, s5, s1); // s4 = ~op1 & op2 @@ -424,7 +421,10 @@ void emit_sub8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4, in IFX(X_PEND) { STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); } - IFX(X_AF|X_OF|X_CF) { + IFX(X_CF) { + BFXILw(xFlags, s1, 8, 1); + } + IFX(X_AF|X_OF) { ANDw_REG(s3, s3, s1); // s3 = (~op1 | op2) & res ORRw_REG(s3, s3, s4); // s3 = (~op1 & op2) | ((~op1 | op2) & res) IFX(X_CF) { @@ -478,8 +478,7 @@ void emit_add16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) } } IFX(X_CF) { - LSRw(s3, s1, 16); - BFIw(xFlags, s3, F_CF, 1); + BFXILw(xFlags, s1, 16, 1); } IFX(X_PEND) { STRw_U12(s1, xEmu, offsetof(x64emu_t, res)); @@ -577,13 +576,12 @@ void emit_sub16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) IFX(X_PEND) { STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); } - IFX(X_AF|X_OF|X_CF) { + IFX(X_CF) { + BFXILw(xFlags, s1, 16, 1); + } + IFX(X_AF|X_OF) { ANDw_REG(s3, s3, s1); // s3 = (~op1 | op2) & res ORRw_REG(s3, s3, s4); // s3 = (~op1 & op2) | ((~op1 | op2) & res) - IFX(X_CF) { - LSRw(s4, s3, 15); - BFIw(xFlags, s4, F_CF, 1); // CF : bc & 0x8000 - } IFX(X_AF) { LSRw(s4, s3, 3); BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 diff --git a/src/dynarec/arm64/dynarec_arm64_emit_tests.c b/src/dynarec/arm64/dynarec_arm64_emit_tests.c index 5c6c602f..fcd23e2f 100644 --- a/src/dynarec/arm64/dynarec_arm64_emit_tests.c +++ b/src/dynarec/arm64/dynarec_arm64_emit_tests.c @@ -116,17 +116,16 @@ void emit_cmp16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, i STRH_U12(s5, xEmu, offsetof(x64emu_t, res)); } COMP_ZFSF(s5, 16) + IFX(X_CF) { + BFXILw(xFlags, s5, 16, 1); + } // bc = (res & (~d | s)) | (~d & s) - IFX(X_CF|X_AF|X_OF) { + IFX(X_AF|X_OF) { MVNw_REG(s4, s1); // s4 = ~d ORRw_REG(s4, s4, s2); // s4 = ~d | s ANDw_REG(s4, s4, s5); // s4 = res & (~d | s) BICw_REG(s3, s2, s1); // s3 = s & ~d ORRw_REG(s3, s4, s3); // s3 = (res & (~d | s)) | (s & ~d) - IFX(X_CF) { - LSRw(s4, s3, 15); - BFIw(xFlags, s4, F_CF, 1); // CF : bc & 0x8000 - } IFX(X_AF) { LSRw(s4, s3, 3); BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 @@ -180,16 +179,15 @@ void emit_cmp8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, in STRB_U12(s5, xEmu, offsetof(x64emu_t, res)); } COMP_ZFSF(s5, 8) + IFX(X_CF) { + BFXILw(xFlags, s5, 8, 1); + } // bc = (res & (~d | s)) | (~d & s) - IFX(X_CF|X_AF|X_OF) { + IFX(X_AF|X_OF) { ORNw_REG(s4, s2, s1); // s4 = ~d | s ANDw_REG(s4, s4, s5); // s4 = res & (~d | s) BICw_REG(s3, s2, s1); // s3 = s & ~d ORRw_REG(s3, s4, s3); // s3 = (res & (~d | s)) | (s & ~d) - IFX(X_CF) { - LSRw(s4, s3, 7); - BFIw(xFlags, s4, F_CF, 1); // CF : bc & 0x80 - } IFX(X_AF) { LSRw(s4, s3, 3); BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 |