diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2023-11-10 17:24:17 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2023-11-10 17:24:17 +0100 |
| commit | b82d9d15d4040c568734b5cba949975c01315955 (patch) | |
| tree | f55b81b4aa1a887e3b9a694033bef392c3a85458 /src/dynarec | |
| parent | 3bc0495eb259302bfbe445c29a61531b11393a68 (diff) | |
| download | box64-b82d9d15d4040c568734b5cba949975c01315955.tar.gz box64-b82d9d15d4040c568734b5cba949975c01315955.zip | |
[ARM64_DYNAREC] More fixing and improving on shifting opcodes
Diffstat (limited to 'src/dynarec')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_00.c | 206 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_0f.c | 45 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_64.c | 6 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_66.c | 22 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_660f.c | 22 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_67.c | 4 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_emit_math.c | 7 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_emit_shift.c | 177 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.h | 20 | ||||
| -rw-r--r-- | src/dynarec/dynarec_native_functions.c | 6 | ||||
| -rw-r--r-- | src/dynarec/dynarec_native_functions.h | 2 |
11 files changed, 391 insertions, 126 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c index 2233c27f..ded12737 100644 --- a/src/dynarec/arm64/dynarec_arm64_00.c +++ b/src/dynarec/arm64/dynarec_arm64_00.c @@ -769,9 +769,9 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SMULL(gd, ed, x4); UFLAG_RES(gd); LSRx(x3, gd, 32); + MOVw_REG(gd, gd); UFLAG_OP1(x3); UFLAG_DF(x3, d_imul32); - MOVw_REG(gd, gd); } else { MULxw(gd, ed, x4); } @@ -1733,22 +1733,22 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin switch((nextop>>3)&7) { case 0: INST_NAME("ROL Eb, Ib"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_OF|X_CF, SF_SET); + if(geted_ib(dyn, addr, ninst, nextop)&0x1f) { + SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING); + } GETEB(x1, 1); u8 = F8; - MOV32w(x2, u8); - CALL_(rol8, ed, x3); + emit_rol8c(dyn, ninst, x1, u8&7, x4, x5); EBBACK; break; case 1: INST_NAME("ROR Eb, Ib"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_OF|X_CF, SF_SET); + if(geted_ib(dyn, addr, ninst, nextop)&0x1f) { + SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING); + } GETEB(x1, 1); u8 = F8; - MOV32w(x2, u8); - CALL_(ror8, ed, x3); + emit_ror8c(dyn, ninst, x1, u8&7, x4, x5); EBBACK; break; case 2: @@ -1839,7 +1839,9 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin switch((nextop>>3)&7) { case 0: INST_NAME("ROL Ed, Ib"); - SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING); + if(geted_ib(dyn, addr, ninst, nextop)&(0x1f+(rex.w*0x20))) { + SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING); + } GETED(1); u8 = (F8)&(rex.w?0x3f:0x1f); emit_rol32c(dyn, ninst, rex, ed, u8, x3, x4); @@ -1847,7 +1849,9 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 1: INST_NAME("ROR Ed, Ib"); - SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING); + if(geted_ib(dyn, addr, ninst, nextop)&(0x1f+(rex.w*0x20))) { + SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING); + } GETED(1); u8 = (F8)&(rex.w?0x3f:0x1f); emit_ror32c(dyn, ninst, rex, ed, u8, x3, x4); @@ -2099,30 +2103,71 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0: if(opcode==0xD0) { INST_NAME("ROL Eb, 1"); - MOV32w(x2, 1); + SETFLAGS(X_OF|X_CF, SF_SUBSET); + GETEB(x1, 0); + emit_rol8c(dyn, ninst, ed, 1, x4, x5); + EBBACK; } else { INST_NAME("ROL Eb, CL"); - ANDSw_mask(x2, xRCX, 0, 0b00100); + SETFLAGS(X_OF|X_CF, SF_SUBSET); + UFLAG_IF { + TSTw_mask(xRCX, 0, 0b00100); //mask=0x00000001f + } + ANDw_mask(x2, xRCX, 0, 0b00010); //mask=0x000000007 + MOV32w(x4, 8); + SUBx_REG(x2, x4, x2); + GETEB(x1, 0); + UFLAG_IF { + B_NEXT(cEQ); + } + ORRw_REG_LSL(ed, ed, ed, 8); + LSRw_REG(ed, ed, x2); + EBBACK; + UFLAG_IF { // calculate flags directly + CMPSw_U12(x2, 7); + B_MARK(cNE); + LSRxw(x3, ed, 7); + ADDxw_REG(x3, x3, ed); + BFIw(xFlags, x3, F_OF, 1); + MARK; + BFIw(xFlags, ed, F_CF, 1); + UFLAG_DF(x2, d_none); + } } - MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_OF|X_CF, SF_SET); - GETEB(x1, 0); - CALL_(rol8, x1, x3); - EBBACK; break; case 1: if(opcode==0xD0) { INST_NAME("ROR Eb, 1"); MOV32w(x2, 1); + SETFLAGS(X_OF|X_CF, SF_SUBSET); + GETEB(x1, 0); + emit_ror8c(dyn, ninst, ed, 1, x4, x5); + EBBACK; } else { INST_NAME("ROR Eb, CL"); - ANDSw_mask(x2, xRCX, 0, 0b00100); + SETFLAGS(X_OF|X_CF, SF_SUBSET); + UFLAG_IF { + TSTw_mask(xRCX, 0, 0b00100); //mask=0x00000001f + } + ANDw_mask(x2, xRCX, 0, 0b00010); //mask=0x000000007 + GETEB(x1, 0); + UFLAG_IF { + B_NEXT(cEQ); + } + ORRw_REG_LSL(ed, ed, ed, 8); + LSRw_REG(ed, ed, x2); + EBBACK; + UFLAG_IF { // calculate flags directly + CMPSw_U12(x2, 1); + B_MARK(cNE); + LSRxw(x2, ed, 6); // x2 = d>>30 + EORw_REG_LSR(x2, x2, x2, 1); // x2 = ((d>>30) ^ ((d>>30)>>1)) + BFIw(xFlags, x2, F_OF, 1); + MARK; + BFXILw(xFlags, ed, 7, 1); + UFLAG_DF(x2, d_none); + } } - MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_OF|X_CF, SF_SET); - GETEB(x1, 0); - CALL_(ror8, x1, x3); - EBBACK; break; case 2: if(opcode==0xD0) {INST_NAME("RCL Eb, 1");} else {INST_NAME("RCL Eb, CL");} @@ -2262,16 +2307,26 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0: INST_NAME("ROL Ed, CL"); SETFLAGS(X_OF|X_CF, SF_SUBSET); - if(rex.w) { - ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + UFLAG_IF { + if(rex.w) { + ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + } } else { - ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + if(rex.w) { + ANDx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + } } MOV64xw(x4, (rex.w?64:32)); SUBx_REG(x3, x4, x3); GETED(0); - if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} - B_NEXT(cEQ); + UFLAG_IF { + if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} + B_NEXT(cEQ); + } RORxw_REG(ed, ed, x3); WBACK; UFLAG_IF { // calculate flags directly @@ -2288,14 +2343,24 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 1: INST_NAME("ROR Ed, CL"); SETFLAGS(X_OF|X_CF, SF_SUBSET); - if(rex.w) { - ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + UFLAG_IF { + if(rex.w) { + ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + } } else { - ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + if(rex.w) { + ANDx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + } } GETED(0); - if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} - B_NEXT(cEQ); + UFLAG_IF { + if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} + B_NEXT(cEQ); + } RORxw_REG(ed, ed, x3); WBACK; UFLAG_IF { // calculate flags directly @@ -2305,8 +2370,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin EORw_REG_LSR(x2, x2, x2, 1); // x2 = ((d>>30) ^ ((d>>30)>>1)) BFIw(xFlags, x2, F_OF, 1); MARK; - LSRxw(x2, ed, rex.w?63:31); - BFIw(xFlags, x2, F_CF, 1); + BFXILxw(xFlags, ed, rex.w?63:31, 1); UFLAG_DF(x2, d_none); } break; @@ -2346,42 +2410,72 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 6: INST_NAME("SHL Ed, CL"); SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined - if(rex.w) { - ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + UFLAG_IF { + if(rex.w) { + ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + } } else { - ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + if(rex.w) { + ANDx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + } } GETED(0); - if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} - B_NEXT(cEQ); + UFLAG_IF { + if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} + B_NEXT(cEQ); + } emit_shl32(dyn, ninst, rex, ed, x3, x5, x4); WBACK; break; case 5: INST_NAME("SHR Ed, CL"); SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined - if(rex.w) { - ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + UFLAG_IF { + if(rex.w) { + ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + } } else { - ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + if(rex.w) { + ANDx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + } } GETED(0); - if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} - B_NEXT(cEQ); + UFLAG_IF { + if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} + B_NEXT(cEQ); + } emit_shr32(dyn, ninst, rex, ed, x3, x5, x4); WBACK; break; case 7: INST_NAME("SAR Ed, CL"); SETFLAGS(X_ALL, SF_PENDING); - if(rex.w) { - ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + UFLAG_IF { + if(rex.w) { + ANDSx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + } } else { - ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + if(rex.w) { + ANDx_mask(x3, xRCX, 1, 0, 0b00101); //mask=0x000000000000003f + } else { + ANDw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f + } } GETED(0); - if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} - B_NEXT(cEQ); + UFLAG_IF { + if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} + B_NEXT(cEQ); + } UFLAG_OP12(ed, x3); ASRxw_REG(ed, ed, x3); WBACK; @@ -2661,29 +2755,29 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 3: INST_NAME("NEG Eb"); SETFLAGS(X_ALL, SF_SET_PENDING); - GETEB(x1, 0); + GETSEB(x1, 0); emit_neg8(dyn, ninst, x1, x2, x4); EBBACK; break; case 4: INST_NAME("MUL AL, Ed"); SETFLAGS(X_ALL, SF_PENDING); - UFLAG_DF(x1, d_mul8); GETEB(x1, 0); UXTBw(x2, xRAX); MULw(x1, x2, x1); UFLAG_RES(x1); BFIx(xRAX, x1, 0, 16); + UFLAG_DF(x1, d_mul8); break; case 5: INST_NAME("IMUL AL, Eb"); SETFLAGS(X_ALL, SF_PENDING); - UFLAG_DF(x1, d_imul8); GETSEB(x1, 0); SXTBw(x2, xRAX); MULw(x1, x2, x1); UFLAG_RES(x1); BFIx(xRAX, x1, 0, 16); + UFLAG_DF(x1, d_imul8); break; case 6: INST_NAME("DIV Eb"); @@ -2730,7 +2824,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 4: INST_NAME("MUL EAX, Ed"); SETFLAGS(X_ALL, SF_PENDING); - UFLAG_DF(x2, rex.w?d_mul64:d_mul32); GETED(0); if(rex.w) { if(ed==xRDX) gd=x3; else gd=xRDX; @@ -2744,11 +2837,11 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } UFLAG_RES(xRAX); UFLAG_OP1(xRDX); + UFLAG_DF(x2, rex.w?d_mul64:d_mul32); break; case 5: INST_NAME("IMUL EAX, Ed"); SETFLAGS(X_ALL, SF_PENDING); - UFLAG_DF(x2, rex.w?d_imul64:d_imul32); GETED(0); if(rex.w) { if(ed==xRDX) gd=x3; else gd=xRDX; @@ -2762,6 +2855,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } UFLAG_RES(xRAX); UFLAG_OP1(xRDX); + UFLAG_DF(x2, rex.w?d_imul64:d_imul32); break; case 6: INST_NAME("DIV Ed"); diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index 7b7cc338..48af11a0 100644 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -1333,17 +1333,13 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { ANDw_mask(x2, gd, 0, 0b00100); //mask=0x00000001f } - LSRxw_REG(x4, ed, x2); - if(rex.w) { - ANDSx_mask(x4, x4, 1, 0, 0); //mask=1 - } else { - ANDSw_mask(x4, x4, 0, 0); //mask=1 + IFX(X_CF) { + LSRxw_REG(x4, ed, x2); + BFIw(xFlags, x4, F_CF, 1); } - BFIw(xFlags, x4, F_CF, 1); MOV32w(x4, 1); LSLxw_REG(x4, x4, x2); - EORxw_REG(x4, ed, x4); - CSELxw(ed, ed, x4, cNE); + ORRxw_REG(ed, ed, x4); if(wback) { STRxw_U12(ed, wback, fixedaddress); SMWRITE(); @@ -1468,8 +1464,8 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin UFLAG_RES(gd); LSRx(x3, gd, 32); UFLAG_OP1(x3); - UFLAG_DF(x3, d_imul32); MOVw_REG(gd, gd); + UFLAG_DF(x3, d_imul32); } else { MULxw(gd, gd, ed); } @@ -1499,13 +1495,10 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { ANDw_mask(x2, gd, 0, 0b00100); //mask=0x00000001f } - LSRxw_REG(x4, ed, x2); - if(rex.w) { - ANDx_mask(x4, x4, 1, 0, 0); //mask=1 - } else { - ANDw_mask(x4, x4, 0, 0); //mask=1 + IFX(X_CF) { + LSRxw_REG(x4, ed, x2); + BFIw(xFlags, x4, F_CF, 1); } - BFIw(xFlags, x4, F_CF, 1); MOV32w(x4, 1); LSLxw_REG(x4, x4, x2); BICxw_REG(ed, ed, x4); @@ -1584,7 +1577,9 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } u8 = F8; u8&=(rex.w?0x3f:0x1f); - BFXILxw(xFlags, ed, u8, 1); // inject 1 bit from u8 to F_CF (i.e. pos 0) + IFX(X_CF) { + BFXILxw(xFlags, ed, u8, 1); // inject 1 bit from u8 to F_CF (i.e. pos 0) + } MOV32w(x4, 1); ORRxw_REG_LSL(ed, ed, x4, u8); if(wback) { @@ -1607,8 +1602,9 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } u8 = F8; u8&=(rex.w?0x3f:0x1f); - BFXILxw(xFlags, ed, u8, 1); // inject 1 bit from u8 to F_CF (i.e. pos 0) - UBFXw(x4, xFlags, 0, 1); + IFX(X_CF) { + BFXILxw(xFlags, ed, u8, 1); // inject 1 bit from u8 to F_CF (i.e. pos 0) + } MOV32w(x4, 1); BICxw_REG_LSL(ed, ed, x4, u8); if(wback) { @@ -1631,7 +1627,9 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } u8 = F8; u8&=(rex.w?0x3f:0x1f); - BFXILxw(xFlags, ed, u8, 1); // inject 1 bit from u8 to F_CF (i.e. pos 0) + IFX(X_CF) { + BFXILxw(xFlags, ed, u8, 1); // inject 1 bit from u8 to F_CF (i.e. pos 0) + } MOV32w(x4, 1); EORxw_REG_LSL(ed, ed, x4, u8); if(wback) { @@ -1666,13 +1664,10 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { ANDw_mask(x2, gd, 0, 0b00100); //mask=0x00000001f } - LSRxw_REG(x4, ed, x2); - if(rex.w) { - ANDx_mask(x4, x4, 1, 0, 0); //mask=1 - } else { - ANDw_mask(x4, x4, 0, 0); //mask=1 + IFX(X_CF) { + LSRxw_REG(x4, ed, x2); + BFIw(xFlags, x4, F_CF, 1); } - BFIw(xFlags, x4, F_CF, 1); MOV32w(x4, 1); LSLxw_REG(x4, x4, x2); EORxw_REG(ed, ed, x4); diff --git a/src/dynarec/arm64/dynarec_arm64_64.c b/src/dynarec/arm64/dynarec_arm64_64.c index 0bf2576b..9af908e2 100644 --- a/src/dynarec/arm64/dynarec_arm64_64.c +++ b/src/dynarec/arm64/dynarec_arm64_64.c @@ -233,8 +233,8 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin UFLAG_RES(gd); LSRx(x3, gd, 32); UFLAG_OP1(x3); - UFLAG_DF(x3, d_imul32); MOVw_REG(gd, gd); + UFLAG_DF(x3, d_imul32); } else { MULxw(gd, gd, ed); } @@ -966,7 +966,6 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 4: INST_NAME("MUL EAX, Ed"); SETFLAGS(X_ALL, SF_PENDING); - UFLAG_DF(x2, rex.w?d_mul64:d_mul32); GETEDO(x6, 0); if(rex.w) { if(ed==xRDX) gd=x3; else gd=xRDX; @@ -980,11 +979,11 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } UFLAG_RES(xRAX); UFLAG_OP1(xRDX); + UFLAG_DF(x2, rex.w?d_mul64:d_mul32); break; case 5: INST_NAME("IMUL EAX, Ed"); SETFLAGS(X_ALL, SF_PENDING); - UFLAG_DF(x2, rex.w?d_imul64:d_imul32); GETEDO(x6, 0); if(rex.w) { if(ed==xRDX) gd=x3; else gd=xRDX; @@ -998,6 +997,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } UFLAG_RES(xRAX); UFLAG_OP1(xRDX); + UFLAG_DF(x2, rex.w?d_imul64:d_imul32); break; case 6: INST_NAME("DIV Ed"); diff --git a/src/dynarec/arm64/dynarec_arm64_66.c b/src/dynarec/arm64/dynarec_arm64_66.c index bd5eeb58..9e8fe773 100644 --- a/src/dynarec/arm64/dynarec_arm64_66.c +++ b/src/dynarec/arm64/dynarec_arm64_66.c @@ -458,7 +458,6 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } SETFLAGS(X_ALL, SF_PENDING); nextop = F8; - UFLAG_DF(x1, d_imul16); GETSEW(x1, (opcode==0x69)?2:1); if(opcode==0x69) i32 = F16S; else i32 = F8S; MOV32w(x2, i32); @@ -466,6 +465,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin UFLAG_RES(x2); gd=x2; GWBACK; + UFLAG_DF(x1, d_imul16); break; case 0x70: @@ -929,22 +929,22 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin switch((nextop>>3)&7) { case 0: INST_NAME("ROL Ew, Ib"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_OF|X_CF, SF_SET); + if(geted_ib(dyn, addr, ninst, nextop)&15) { + SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING); + } GETEW(x1, 1); u8 = F8; - MOV32w(x2, u8); - CALL_(rol16, x1, x3); + emit_rol16c(dyn, ninst, x1, u8&15, x4, x5); EWBACK; break; case 1: INST_NAME("ROR Ew, Ib"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - SETFLAGS(X_OF|X_CF, SF_SET); + if(geted_ib(dyn, addr, ninst, nextop)&15) { + SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING); + } GETEW(x1, 1); u8 = F8; - MOV32w(x2, u8); - CALL_(ror16, x1, x3); + emit_ror16c(dyn, ninst, x1, u8&15, x4, x5); EWBACK; break; case 2: @@ -1167,24 +1167,24 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 4: INST_NAME("MUL AX, Ew"); SETFLAGS(X_ALL, SF_PENDING); - UFLAG_DF(x1, d_mul16); GETEW(x1, 0); UXTHw(x2, xRAX); MULw(x1, x2, x1); UFLAG_RES(x1); BFIx(xRAX, x1, 0, 16); BFXILx(xRDX, x1, 16, 16); + UFLAG_DF(x1, d_mul16); break; case 5: INST_NAME("IMUL AX, Ew"); SETFLAGS(X_ALL, SF_PENDING); - UFLAG_DF(x1, d_imul16); GETSEW(x1, 0); SXTHw(x2, xRAX); MULw(x1, x2, x1); UFLAG_RES(x1); BFIx(xRAX, x1, 0, 16); BFXILx(xRDX, x1, 16, 16); + UFLAG_DF(x1, d_imul16); break; case 6: INST_NAME("DIV Ew"); diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index 174819e0..ba330878 100644 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -2195,12 +2195,12 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n ed = x4; } ANDw_mask(x2, gd, 0, 0b000011); // mask=0x0f - LSRw_REG(x1, ed, x2); - BFIw(xFlags, x1, F_CF, 1); - ANDSw_mask(x1, x1, 0, 0); //mask=1 - B_NEXT(cNE); + IFX(X_CF) { + LSRw_REG(x1, ed, x2); + BFIw(xFlags, x1, F_CF, 1); + } MOV32w(x1, 1); - LSLxw_REG(x1, x1, x2); + LSLw_REG(x1, x1, x2); ORRx_REG(ed, ed, x1); if(wback) { STRH_U12(ed, wback, fixedaddress); @@ -2231,12 +2231,12 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n INST_NAME("IMUL Gw,Ew"); SETFLAGS(X_ALL, SF_PENDING); nextop = F8; - UFLAG_DF(x1, d_imul16); GETSEW(x1, 0); GETSGW(x2); MULw(x2, x2, x1); UFLAG_RES(x2); GWBACK; + UFLAG_DF(x1, d_imul16); break; case 0xB3: @@ -2452,6 +2452,16 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n BFIx(gd, x1, 0, 16); break; + case 0xC1: + INST_NAME("XADD Gw, Ew"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGW(x1); + GETEW(x2, 0); + BFIx(xRAX+((nextop&0x38)>>3)+(rex.r<<3), ed, 0, 16); + emit_add16(dyn, ninst, ed, gd, x4, x5); + EWBACK; + break; case 0xC2: INST_NAME("CMPPD Gx, Ex, Ib"); nextop = F8; diff --git a/src/dynarec/arm64/dynarec_arm64_67.c b/src/dynarec/arm64/dynarec_arm64_67.c index 7c4ea045..5cc96deb 100644 --- a/src/dynarec/arm64/dynarec_arm64_67.c +++ b/src/dynarec/arm64/dynarec_arm64_67.c @@ -1008,7 +1008,6 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 4: INST_NAME("MUL EAX, Ed"); SETFLAGS(X_ALL, SF_PENDING); - UFLAG_DF(x2, rex.w?d_mul64:d_mul32); GETED32(0); if(rex.w) { if(ed==xRDX) gd=x3; else gd=xRDX; @@ -1022,11 +1021,11 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } UFLAG_RES(xRAX); UFLAG_OP1(xRDX); + UFLAG_DF(x2, rex.w?d_mul64:d_mul32); break; case 5: INST_NAME("IMUL EAX, Ed"); SETFLAGS(X_ALL, SF_PENDING); - UFLAG_DF(x2, rex.w?d_imul64:d_imul32); GETED32(0); if(rex.w) { if(ed==xRDX) gd=x3; else gd=xRDX; @@ -1040,6 +1039,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } UFLAG_RES(xRAX); UFLAG_OP1(xRDX); + UFLAG_DF(x2, rex.w?d_imul64:d_imul32); break; case 6: INST_NAME("DIV Ed"); diff --git a/src/dynarec/arm64/dynarec_arm64_emit_math.c b/src/dynarec/arm64/dynarec_arm64_emit_math.c index d23dde0f..3a7cc285 100644 --- a/src/dynarec/arm64/dynarec_arm64_emit_math.c +++ b/src/dynarec/arm64/dynarec_arm64_emit_math.c @@ -363,7 +363,7 @@ void emit_add8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4) SET_DFNONE(s3); } IFX(X_AF | X_OF) { - if(X_PEND) {} else {MOV32w(s4, c&0xff);} + IFX(X_PEND) {} else {MOV32w(s4, c&0xff);} ORRw_REG(s3, s1, s4); // s3 = op1 | op2 ANDw_REG(s4, s1, s4); // s4 = op1 & op2 } @@ -1766,10 +1766,9 @@ void emit_neg8(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4) IFX(X_AF|X_OF) { MOVw_REG(s3, s1); } + NEGw_REG(s1, s1); IFX(X_ZF) { - NEGSw_REG(s1, s1); - } else { - NEGw_REG(s1, s1); + ANDSw_mask(s1, s1, 0, 7); // mask 0xff } IFX(X_PEND) { STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); diff --git a/src/dynarec/arm64/dynarec_arm64_emit_shift.c b/src/dynarec/arm64/dynarec_arm64_emit_shift.c index 7c4a0bf4..485489a4 100644 --- a/src/dynarec/arm64/dynarec_arm64_emit_shift.c +++ b/src/dynarec/arm64/dynarec_arm64_emit_shift.c @@ -154,6 +154,12 @@ void emit_shr32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 LSRxw_REG(s3, s1, s3); BFIw(xFlags, s3, 0, 1); } + IFX(X_OF) { + CMPSxw_U12(s2, 1); // if s2==1 + Bcond(cNE, 4+2*4); + LSRxw(s4, s1, rex.w?63:31); + BFIw(xFlags, s4, F_OF, 1); + } LSRxw_REG(s1, s1, s2); IFX(X_PEND) { STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); @@ -167,13 +173,6 @@ void emit_shr32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 LSRxw(s4, s1, (rex.w)?63:31); BFIx(xFlags, s4, F_SF, 1); } - IFX(X_OF) { - CMPSxw_U12(s2, 1); // if s2==1 - Bcond(cNE, 4+3*4); - LSRxw(s4, s1, rex.w?62:30); - EORw_REG_LSR(s4, s4, s4, 1); - BFIw(xFlags, s4, F_OF, 1); - } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } @@ -197,8 +196,18 @@ void emit_shr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, i return; } IFX(X_CF) { - LSRxw(s3, s1, c-1); - BFIw(xFlags, s3, 0, 1); + if(c==1) { + BFIw(xFlags, s1, 0, 1); + } else { + LSRxw(s3, s1, c-1); + BFIw(xFlags, s3, 0, 1); + } + } + IFX(X_OF) { + if(c==1) { + LSRxw(s4, s1, rex.w?63:31); + BFIw(xFlags, s4, F_OF, 1); + } } LSRxw(s1, s1, c); IFX(X_PEND) { @@ -213,13 +222,6 @@ void emit_shr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, i LSRxw(s4, s1, (rex.w)?63:31); BFIx(xFlags, s4, F_SF, 1); } - IFX(X_OF) { - if(c==1) { - LSRxw(s4, s1, rex.w?62:30); - EORw_REG_LSR(s4, s4, s4, 1); - BFIw(xFlags, s4, F_OF, 1); - } - } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } @@ -259,6 +261,10 @@ void emit_sar32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, i LSRxw(s4, s1, (rex.w)?63:31); BFIx(xFlags, s4, F_SF, 1); } + IFX(X_OF) + if(c==1) { + BFCw(xFlags, F_OF, 1); + } IFX(X_PF) { emit_pf(dyn, ninst, s1, s3, s4); } @@ -318,8 +324,7 @@ void emit_ror32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, i STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); } IFX(X_CF) { - LSRxw(s3, s1, rex.w?63:31); - BFIw(xFlags, s3, F_CF, 1); + BFXILxw(xFlags, s1, rex.w?63:31, 1); } IFX(X_OF) { if(c==1) { @@ -330,6 +335,142 @@ void emit_ror32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, i } } +// emit ROL8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch +void emit_rol8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4) +{ + MAYUSE(s1); MAYUSE(s3); MAYUSE(s4); + IFX(X_PEND) { + MOV32w(s3, c); + STRB_U12(s3, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, d_rol8); + } else IFX(X_ALL) { + SET_DFNONE(s4); + } + if(!c) { + IFX(X_PEND) { + STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + return; + } + int rc = 8-(c&7); + ORRw_REG_LSL(s1, s1, s1, 8); + LSRw(s1, s1, rc); + IFX(X_PEND) { + STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_CF) { + BFIw(xFlags, s1, F_CF, 1); + } + IFX(X_OF) { + if(c==1) { + EORw_REG_LSR(s3, s1, s1, 7); + BFIw(xFlags, s3, F_OF, 1); + } + } +} + +// emit ROR8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch +void emit_ror8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4) +{ + MAYUSE(s1); MAYUSE(s3); MAYUSE(s4); + IFX(X_PEND) { + MOV32w(s3, c); + STRB_U12(s3, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, d_ror8); + } else IFX(X_ALL) { + SET_DFNONE(s4); + } + if(!c) { + IFX(X_PEND) { + STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + return; + } + ORRw_REG_LSL(s1, s1, s1, 8); + LSRw(s1, s1, c&7); + IFX(X_PEND) { + STRB_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_CF) { + BFXILw(xFlags, s1, 7, 1); + } + IFX(X_OF) { + if(c==1) { + LSRw(s3, s1, 6); + EORw_REG_LSR(s3, s3, s3, 1); + BFIw(xFlags, s3, F_OF, 1); + } + } +} + +// emit ROL16 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch +void emit_rol16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4) +{ + MAYUSE(s1); MAYUSE(s3); MAYUSE(s4); + IFX(X_PEND) { + MOV32w(s3, c); + STRH_U12(s3, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, d_rol16); + } else IFX(X_ALL) { + SET_DFNONE(s4); + } + if(!c) { + IFX(X_PEND) { + STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + return; + } + int rc = 16-(c&15); + ORRw_REG_LSL(s1, s1, s1, 16); + LSRw(s1, s1, rc); + IFX(X_PEND) { + STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_CF) { + BFIw(xFlags, s1, F_CF, 1); + } + IFX(X_OF) { + if(c==1) { + EORw_REG_LSR(s3, s1, s1, 15); + BFIw(xFlags, s3, F_OF, 1); + } + } +} + +// emit ROR16 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch +void emit_ror16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4) +{ + MAYUSE(s1); MAYUSE(s3); MAYUSE(s4); + IFX(X_PEND) { + MOV32w(s3, c); + STRH_U12(s3, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, d_ror16); + } else IFX(X_ALL) { + SET_DFNONE(s4); + } + if(!c) { + IFX(X_PEND) { + STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + return; + } + ORRw_REG_LSL(s1, s1, s1, 16); + LSRw(s1, s1, c&15); + IFX(X_PEND) { + STRH_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_CF) { + BFXILw(xFlags, s1, 15, 1); + } + IFX(X_OF) { + if(c==1) { + LSRw(s3, s1, 14); + EORw_REG_LSR(s3, s3, s3, 1); + BFIw(xFlags, s3, F_OF, 1); + } + } +} + // emit SHRD32 instruction, from s1, fill s2 , constant c, store result in s1 using s3 and s4 as scratch void emit_shrd32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4) { diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index cdf69a72..1c0fc352 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -787,7 +787,17 @@ x87_do_pop(dyn, ninst, scratch); #define SET_DFNONE(S) if(!dyn->f.dfnone) {STRw_U12(wZR, xEmu, offsetof(x64emu_t, df)); dyn->f.dfnone=1;} -#define SET_DF(S, N) if((N)!=d_none) {MOVZw(S, (N)); STRw_U12(S, xEmu, offsetof(x64emu_t, df)); dyn->f.dfnone=0;} else SET_DFNONE(S) +#define SET_DF(S, N) \ + if((N)!=d_none) { \ + MOVZw(S, (N)); \ + STRw_U12(S, xEmu, offsetof(x64emu_t, df)); \ + if(dyn->f.pending==SF_PENDING && dyn->insts[ninst].x64.need_after && !(dyn->insts[ninst].x64.need_after&X_PEND)) { \ + CALL_(UpdateFlags, -1, 0); \ + dyn->f.pending = SF_SET; \ + SET_NODF(); \ + } \ + dyn->f.dfnone=0; \ + } else SET_DFNONE(S) #define SET_NODF() dyn->f.dfnone = 0 #define SET_DFOK() dyn->f.dfnone = 1 @@ -1015,6 +1025,10 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr); #define emit_sar32c STEPNAME(emit_sar32c) #define emit_rol32c STEPNAME(emit_rol32c) #define emit_ror32c STEPNAME(emit_ror32c) +#define emit_rol8c STEPNAME(emit_rol8c) +#define emit_ror8c STEPNAME(emit_ror8c) +#define emit_rol16c STEPNAME(emit_rol16c) +#define emit_ror16c STEPNAME(emit_ror16c) #define emit_shrd32c STEPNAME(emit_shrd32c) #define emit_shld32c STEPNAME(emit_shld32c) @@ -1144,6 +1158,10 @@ void emit_shr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, i void emit_sar32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_rol32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); void emit_ror32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); +void emit_rol8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4); +void emit_ror8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4); +void emit_rol16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4); +void emit_ror16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4); void emit_shrd32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4); void emit_shld32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4); diff --git a/src/dynarec/dynarec_native_functions.c b/src/dynarec/dynarec_native_functions.c index 434fbd8a..d70beca9 100644 --- a/src/dynarec/dynarec_native_functions.c +++ b/src/dynarec/dynarec_native_functions.c @@ -474,6 +474,12 @@ uintptr_t fakeed(dynarec_native_t* dyn, uintptr_t addr, int ninst, uint8_t nexto } return addr; } +// return Ib on a mod/rm opcode without emiting anything +uint8_t geted_ib(dynarec_native_t* dyn, uintptr_t addr, int ninst, uint8_t nextop) +{ + addr = fakeed(dyn, addr, ninst, nextop); + return F8; +} #undef F8 int isNativeCall(dynarec_native_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn) diff --git a/src/dynarec/dynarec_native_functions.h b/src/dynarec/dynarec_native_functions.h index ed6d0f74..06585c4e 100644 --- a/src/dynarec/dynarec_native_functions.h +++ b/src/dynarec/dynarec_native_functions.h @@ -57,6 +57,8 @@ int getNominalPred(dynarec_native_t* dyn, int ninst); // Do the GETED, but don't emit anything... uintptr_t fakeed(dynarec_native_t* dyn, uintptr_t addr, int ninst, uint8_t nextop); +// return Ib on a mod/rm opcode without emiting anything +uint8_t geted_ib(dynarec_native_t* dyn, uintptr_t addr, int ninst, uint8_t nextop); // Is what pointed at addr a native call? And if yes, to what function? int isNativeCall(dynarec_native_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn); |