diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2023-02-19 17:58:11 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2023-02-19 17:58:11 +0100 |
| commit | d0a724bb08f06f1581621720b114335403bc29d9 (patch) | |
| tree | 3ebed330b438c4d6ddeec5d960e0594dd0250b50 /src | |
| parent | 8e9ac1b9ac492cb02f59300b343e41b37de28ad4 (diff) | |
| download | box64-d0a724bb08f06f1581621720b114335403bc29d9.tar.gz box64-d0a724bb08f06f1581621720b114335403bc29d9.zip | |
[DYNAREC] Optimized DB /7 opcode
Diffstat (limited to 'src')
| -rwxr-xr-x | src/dynarec/arm64/arm64_emitter.h | 11 | ||||
| -rwxr-xr-x | src/dynarec/arm64/arm64_printer.c | 6 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_db.c | 51 |
3 files changed, 65 insertions, 3 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index 9f573f8f..c3d62074 100755 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -266,6 +266,13 @@ #define STRH_U12(Rt, Rn, imm12) EMIT(ST_gen(0b01, 0b01, ((uint32_t)((imm12)>>1))&0xfff, Rn, Rt)) #define STRxw_U12(Rt, Rn, imm12) EMIT(ST_gen((rex.w)?0b11:0b10, 0b01, ((uint32_t)((imm12)>>(2+rex.w)))&0xfff, Rn, Rt)) +#define STU_gen(size, opc, imm9, Rn, Rt) ((size)<<30 | 0b111<<27 | (opc)<<22 | ((imm9)&0x1ff)<<12 | (Rn)<<5 | (Rt)) +#define STURx_I9(Rt, Rn, imm9) EMIT(STU_gen(0b11, 0b00, imm9, Rn, Rt)) +#define STURw_I9(Rt, Rn, imm9) EMIT(STU_gen(0b10, 0b00, imm9, Rn, Rt)) +#define STURxw_I9(Rt, Rn, imm9) EMIT(STU_gen((rex.w)?0b11:0b10, 0b00, imm9, Rn, Rt)) +#define STURH_I9(Rt, Rn, imm9) EMIT(STU_gen(0b01, 0b00, imm9, Rn, Rt)) +#define STURB_I9(Rt, Rn, imm9) EMIT(STU_gen(0b00, 0b00, imm9, Rn, Rt)) + #define STR_REG_gen(size, Rm, option, S, Rn, Rt) ((size)<<30 | 0b111<<27 | 0b00<<22 | 1<<21 | (Rm)<<16 | (option)<<13 | (S)<<12 | (0b10)<<10 | (Rn)<<5 | (Rt)) #define STRx_REG(Rt, Rn, Rm) EMIT(STR_REG_gen(0b11, Rm, 0b011, 0, Rn, Rt)) #define STRx_REG_LSL3(Rt, Rn, Rm) EMIT(STR_REG_gen(0b11, Rm, 0b011, 1, Rn, Rt)) @@ -649,6 +656,10 @@ #define REV16w(Rd, Rn) EMIT(REV_gen(0, 0b01, Rn, Rd)) #define REV16x(Rd, Rn) EMIT(REV_gen(1, 0b01, Rn, Rd)) +// UDF +#define UDF_gen(imm16) ((imm16)&0xffff) +#define UDF(imm16) EMIT(UDF_gen(imm16)) + // MRS #define MRS_gen(L, o0, op1, CRn, CRm, op2, Rt) (0b1101010100<<22 | (L)<<21 | 1<<20 | (o0)<<19 | (op1)<<16 | (CRn)<<12 | (CRm)<<8 | (op2)<<5 | (Rt)) // mrs x0, nzcv : 1101010100 1 1 1 011 0100 0010 000 00000 o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0010(2) op2=0 diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c index 4bad45d5..e9356773 100755 --- a/src/dynarec/arm64/arm64_printer.c +++ b/src/dynarec/arm64/arm64_printer.c @@ -25,7 +25,7 @@ typedef struct arm64_print_s { uint64_t DecodeBitMasks(int N, int imms, int immr) { - int len = 31-__builtin_clz(N<<6 | ((~imms)&0b111111)); + int len = 31-__builtin_clz((N<<6) | ((~imms)&0b111111)); if(len<1) return 0; int levels = (1<<len) - 1; int s = imms & levels; @@ -1140,8 +1140,8 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) else if(sf==0 && type==0 && rmode==0 && opcd==6) {snprintf(buff, sizeof(buff), "FMOV %s, S%d", Wt[Rn], Rd);} else if(sf==1 && type==1 && rmode==0 && opcd==7) {snprintf(buff, sizeof(buff), "FMOV D%d, %s", Rd, Xt[Rn]);} else if(sf==1 && type==2 && rmode==1 && opcd==7) {snprintf(buff, sizeof(buff), "FMOV V%d.D[1], %s", Rd, Xt[Rn]);} - else if(sf==1 && type==1 && rmode==0 && opcd==6) {snprintf(buff, sizeof(buff), "FMOV %s, S%d", Xt[Rn], Rd);} - else if(sf==1 && type==2 && rmode==1 && opcd==6) {snprintf(buff, sizeof(buff), "FMOV %s, V%d.D[1]", Xt[Rn], Rd);} + else if(sf==1 && type==1 && rmode==0 && opcd==6) {snprintf(buff, sizeof(buff), "FMOV %s, D%d", Xt[Rd], Rn);} + else if(sf==1 && type==2 && rmode==1 && opcd==6) {snprintf(buff, sizeof(buff), "FMOV %s, V%d.D[1]", Xt[Rd], Rn);} else snprintf(buff, sizeof(buff), "FMOV ????"); return buff; } diff --git a/src/dynarec/arm64/dynarec_arm64_db.c b/src/dynarec/arm64/dynarec_arm64_db.c index e40cc71d..4533c83b 100644 --- a/src/dynarec/arm64/dynarec_arm64_db.c +++ b/src/dynarec/arm64/dynarec_arm64_db.c @@ -315,12 +315,63 @@ uintptr_t dynarec64_DB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); VSTR64_U12(v1, wback, fixedaddress); } else { + #if 0 x87_forget(dyn, ninst, x1, x3, 0); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0); if(ed!=x1) { MOVx_REG(x1, ed); } CALL(arm_fstp, -1); + #else + // Painfully long, straight conversion from the C code, shoud be optimized + v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, NULL, 0, 0); + FMOVxD(x1, v1); + // do special value first + TSTx_mask(x1, 1, 0b00000, 0b111110); //0x7fffffffffffffffL + B_MARK(cNE); + // Zero + LSRx(x3, x1, 63-15); //x3 = sign+exp + MOVZw(x5, 0); // x5 = mantisse + B_MARK3_nocond; + MARK; + // get sign, in main ouput x5 for sign+exp + ANDx_mask(x5, x1, 1, 1, 0); //0x8000000000000000 + LSRx(x5, x5, 63-15); // x5 = sign + // get exp + LSRx(x3, x1, 52); // x3 = exp11 + ANDw_mask(x3, x3, 0, 0b1010); //0x7ff + MOV32w(x4, 0x7ff); + CMPSw_REG(x3, x4); + B_MARK2(cNE); + // NaN and Infinite + ORRw_mask(x3, x5, 0, 0b1110); //x3 = sign | 0x7fff + TSTx_mask(x1, 1, 0, 0b110011); //0x000fffffffffffffL + ORRx_mask(x5, xZR, 1, 1, 0); //0x8000000000000000 + ORRx_mask(x4, xZR, 1, 0b10, 0b01); //0xc000000000000000 + CSELx(x5, x5, x4, cEQ); // x5 = mantisse + B_MARK3_nocond; + MARK2; + // regular / denormals + ANDx_mask(x1, x1, 1, 0, 0b110011); //0x000fffffffffffffL + LSLx_IMM(x1, x1, 11); //x1 = mantisse missing "1" + MOVZw(x4, 16383-1023); //BIAS80 - BIAS64 + CBZw(x3, 4+3*4); // exp11 == 0? + // normals + ADDw_REG(x3, x3, x4); // x3 = exp16 + ORRw_REG(x3, x3, x5); // x3 = sign | exp + ORRx_mask(x5, x1, 1, 1, 0); //0x8000000000000000 x5 = mantisse + B_MARK3_nocond; + // denormals + CLZx(x6, x1); + ADDw_U12(x6, x6, 1); // "one" + SUBw_REG(x3, x4, x6); // x3 = exp16 + ORRw_REG(x3, x3, x5); // x3 = sign | exp16 + LSLx_REG(x5, x1, x6); // x5 = mantisse + MARK3; + STRx_U12(x5, wback, 0); + STRH_U12(x3, wback, 8); + #endif } x87_do_pop(dyn, ninst, x3); break; |