diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2021-03-20 09:50:44 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2021-03-20 09:50:44 +0100 |
| commit | 495d5664015ea1ef484a2bedf3707acc7f2670b8 (patch) | |
| tree | 8195513b64d3acd613223a967ff62191a56c5c02 | |
| parent | 7a056851d98db45584f0d978dc6567d4406be3df (diff) | |
| download | box64-495d5664015ea1ef484a2bedf3707acc7f2670b8.tar.gz box64-495d5664015ea1ef484a2bedf3707acc7f2670b8.zip | |
[DYNAREC] Small optim on MOV64xw / MOV64x / MOV32w macros
| -rwxr-xr-x | src/dynarec/arm64_emitter.h | 65 |
1 files changed, 47 insertions, 18 deletions
diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h index 1095fff7..4d8c6ff3 100755 --- a/src/dynarec/arm64_emitter.h +++ b/src/dynarec/arm64_emitter.h @@ -89,35 +89,40 @@ // MOVZ #define MOVZ_gen(sf, hw, imm16, Rd) ((sf)<<31 | 0b10<<29 | 0b100101<<23 | (hw)<<21 | (imm16)<<5 | (Rd)) -#define MOVZx(Rd, imm16) EMIT(MOVZ_gen(1, 0, (imm16)&0xffff, Rd)) -#define MOVZx_LSL(Rd, imm16, shift) EMIT(MOVZ_gen(1, (shift)/16, (imm16)&0xffff, Rd)) -#define MOVZw(Rd, imm16) EMIT(MOVZ_gen(0, 0, (imm16)&0xffff, Rd)) -#define MOVZw_LSL(Rd, imm16, shift) EMIT(MOVZ_gen(0, (shift)/16, (imm16)&0xffff, Rd)) +#define MOVZx(Rd, imm16) EMIT(MOVZ_gen(1, 0, ((uint16_t)imm16)&0xffff, Rd)) +#define MOVZx_LSL(Rd, imm16, shift) EMIT(MOVZ_gen(1, (shift)/16, ((uint16_t)imm16)&0xffff, Rd)) +#define MOVZw(Rd, imm16) EMIT(MOVZ_gen(0, 0, ((uint16_t)imm16)&0xffff, Rd)) +#define MOVZw_LSL(Rd, imm16, shift) EMIT(MOVZ_gen(0, (shift)/16, ((uint16_t)imm16)&0xffff, Rd)) // MOVN #define MOVN_gen(sf, hw, imm16, Rd) ((sf)<<31 | 0b00<<29 | 0b100101<<23 | (hw)<<21 | (imm16)<<5 | (Rd)) -#define MOVNx(Rd, imm16) EMIT(MOVN_gen(1, 0, (imm16)&0xffff, Rd)) -#define MOVNx_LSL(Rd, imm16, shift) EMIT(MOVN_gen(1, (shift)/16, (imm16)&0xffff, Rd)) -#define MOVNw(Rd, imm16) EMIT(MOVN_gen(0, 0, (imm16)&0xffff, Rd)) -#define MOVNw_LSL(Rd, imm16, shift) EMIT(MOVN_gen(0, (shift)/16, (imm16)&0xffff, Rd)) +#define MOVNx(Rd, imm16) EMIT(MOVN_gen(1, 0, ((uint16_t)imm16)&0xffff, Rd)) +#define MOVNx_LSL(Rd, imm16, shift) EMIT(MOVN_gen(1, (shift)/16, ((uint16_t)imm16)&0xffff, Rd)) +#define MOVNw(Rd, imm16) EMIT(MOVN_gen(0, 0, ((uint16_t)imm16)&0xffff, Rd)) +#define MOVNw_LSL(Rd, imm16, shift) EMIT(MOVN_gen(0, (shift)/16, ((uint16_t)imm16)&0xffff, Rd)) // MOVK #define MOVK_gen(sf, hw, imm16, Rd) ((sf)<<31 | 0b11<<29 | 0b100101<<23 | (hw)<<21 | (imm16)<<5 | (Rd)) -#define MOVKx(Rd, imm16) EMIT(MOVK_gen(1, 0, (imm16)&0xffff, Rd)) -#define MOVKx_LSL(Rd, imm16, shift) EMIT(MOVK_gen(1, (shift)/16, (imm16)&0xffff, Rd)) -#define MOVKw(Rd, imm16) EMIT(MOVK_gen(0, 0, (imm16)&0xffff, Rd)) -#define MOVKw_LSL(Rd, imm16, shift) EMIT(MOVK_gen(0, (shift)/16, (imm16)&0xffff, Rd)) - -#define MOV32w(Rd, imm32) {MOVZw(Rd, (imm32)&0xffff); if((imm32)&0xffff0000) {MOVKw_LSL(Rd, ((imm32)>>16)&0xffff, 16);}} +#define MOVKx(Rd, imm16) EMIT(MOVK_gen(1, 0, ((uint16_t)imm16)&0xffff, Rd)) +#define MOVKx_LSL(Rd, imm16, shift) EMIT(MOVK_gen(1, (shift)/16, ((uint16_t)imm16)&0xffff, Rd)) +#define MOVKw(Rd, imm16) EMIT(MOVK_gen(0, 0, ((uint16_t)imm16)&0xffff, Rd)) +#define MOVKw_LSL(Rd, imm16, shift) EMIT(MOVK_gen(0, (shift)/16, ((uint16_t)imm16)&0xffff, Rd)) + +#define MOV32w(Rd, imm32) \ + if(~((uint32_t)(imm32))<0xffff) { \ + MOVNw(Rd, (~(uint32_t)(imm32))&0xffff); \ + } else { \ + MOVZw(Rd, (imm32)&0xffff); \ + if((imm32)&0xffff0000) {MOVKw_LSL(Rd, ((imm32)>>16)&0xffff, 16);} \ + } #define MOV64x(Rd, imm64) \ if(~((uint64_t)(imm64))<0xffff) { \ - MOVZx(Rd, (~(uint64_t)(imm64))&0xffff); \ - MVNx_REG(Rd, Rd); \ + MOVNx(Rd, (~(uint64_t)(imm64))&0xffff); \ } else { \ MOVZx(Rd, ((uint64_t)(imm64))&0xffff); \ if(((uint64_t)(imm64))&0xffff0000) {MOVKx_LSL(Rd, (((uint64_t)(imm64))>>16)&0xffff, 16);} \ - if(((uint64_t)(imm64))&0xffff00000000L) {MOVKx_LSL(Rd, (((uint64_t)(imm64))>>32)&0xffff, 32);} \ - if(((uint64_t)(imm64))&0xffff000000000000L) {MOVKx_LSL(Rd, (((uint64_t)(imm64))>>48)&0xffff, 48);} \ + if(((uint64_t)(imm64))&0xffff00000000LL) {MOVKx_LSL(Rd, (((uint64_t)(imm64))>>32)&0xffff, 32);} \ + if(((uint64_t)(imm64))&0xffff000000000000LL) {MOVKx_LSL(Rd, (((uint64_t)(imm64))>>48)&0xffff, 48);} \ } #define MOV64xw(Rd, imm64) if(rex.w) {MOV64x(Rd, imm64);} else {MOV32w(Rd, imm64);} @@ -355,6 +360,11 @@ #define TSTw_REG(Rn, Rm) ANDSw_REG(wZR, Rn, Rm) #define TSTxw_REG(Rn, Rm) ANDSxw_REG(xZR, Rn, Rm) +// ASRV +#define ASRV_gen(sf, Rm, Rn, Rd) ((sf)<<31 | 0b11010110<<21 | (Rm)<<16 | 0b0010<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +#define ASRx_REG(Rd, Rn, Rm) EMIT(ASRV_gen(1, Rm, Rn, Rd)) +#define ASRw_REG(Rd, Rn, Rm) EMIT(ASRV_gen(0, Rm, Rn, Rd)) +#define ASRxw_REG(Rd, Rn, Rm) EMIT(ASRV_gen(rex.w, Rm, Rn, Rd)) // BFI #define BFM_gen(sf, opc, N, immr, imms, Rn, Rd) ((sf)<<31 | (opc)<<29 | 0b100110<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn)<<5 | (Rd)) @@ -367,6 +377,12 @@ #define BFCx(Rd, lsb, width) BFMx(Rd, xZR, ((-lsb)%64)&0x3f, (width)-1) #define BFCw(Rd, lsb, width) BFMw(Rd, xZR, ((-lsb)%32)&0x1f, (width)-1) #define BFCxw(Rd, lsb, width) BFMxw(Rd, xZR, rex.w?(((-lsb)%64)&0x3f):(((-lsb)%32)&0x1f), (width)-1) +// Insert lsb:width part of Rn into low part of Rd (leaving rest of Rd untouched) +#define BFXILx(Rd, Rn, lsb, width) BFM_gen(1, 0b01, 1, (lsb), (lsb)+(width)-1, Rn, Rd) +// Insert lsb:width part of Rn into low part of Rd (leaving rest of Rd untouched) +#define BFXILw(Rd, Rn, lsb, width) BFM_gen(0, 0b01, 0, (lsb), (lsb)+(width)-1, Rn, Rd) +// Insert lsb:width part of Rn into low part of Rd (leaving rest of Rd untouched) +#define BFXILxw(Rd, Rn, lsb, width) BFM_gen(rex.w, 0b01, rex.w, (lsb), (lsb)+(width)-1, Rn, Rd) // UBFX #define UBFM_gen(sf, N, immr, imms, Rn, Rd) ((sf)<<31 | 0b10<<29 | 0b100110<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn)<<5 | (Rd)) @@ -387,6 +403,12 @@ #define LSLx(Rd, Rn, lsl) UBFMx(Rd, Rn, ((-(lsl))%64)&63, 63-(lsl)) #define LSLw(Rd, Rn, lsl) UBFMw(Rd, Rn, ((-(lsl))%32)&31, 31-(lsl)) #define LSLxw(Rd, Rn, lsl) UBFMxw(Rd, Rn, rex.w?(((-(lsl))%64)&63):(((-(lsl))%32)&31), (rex.w?63:31)-(lsl)) +// Take width first bits from Rn, LSL lsb and create Rd +#define UBFIZx(Rd, Rn, lsb, width) UBFMx(Rd, Rn, ((-(lsb))%64)&63, width-1) +// Take width first bits from Rn, LSL lsb and create Rd +#define UBFIZw(Rd, Rn, lsb, width) UBFMw(Rd, Rn, ((-(lsb))%32)&31, width-1) +// Take width first bits from Rn, LSL lsb and create Rd +#define UBFIZxw(Rd, Rn, lsb, width) UBFMxw(Rd, Rn, rex.w?(((-(lsb))%64)&63):(((-(lsb))%32)&31), width-1) // SBFM #define SBFM_gen(sf, N, immr, imms, Rn, Rd) ((sf)<<31 | 0b00<<29 | 0b100110<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn)<<5 | (Rd)) @@ -414,6 +436,13 @@ #define RORw(Rd, Rn, lsb) EMIT(EXTR_gen(0, 0, Rn, lsb, Rn, Rd)) #define RORxw(Rd, Rn, lsb) EMIT(EXTR_gen(rex.w, rex.w, Rn, lsb, Rn, Rd)) +// RORV +#define RORV_gen(sf, Rm, Rn, Rd) ((sf)<<31 | 0b11010110<<21 | (Rm)<<16 | 0b0010<<12 | 0b11<<10 | (Rn)<<5 | (Rd)) +#define RORx_REG(Rd, Rn, Rm) EMIT(RORV_gen(1, Rm, Rn, Rd)) +#define RORw_REG(Rd, Rn, Rm) EMIT(RORV_gen(0, Rm, Rn, Rd)) +#define RORxw_REG(Rd, Rn, Rm) EMIT(RORV_gen(rex.w, Rm, Rn, Rd)) + + // LSRV / LSLV #define LS_V_gen(sf, Rm, op2, Rn, Rd) ((sf)<<31 | 0b11010110<<21 | (Rm)<<16 | 0b0010<<12 | (op2)<<10 | (Rn)<<5 | (Rd)) #define LSRx_REG(Rd, Rn, Rm) EMIT(LS_V_gen(1, Rm, 0b01, Rn, Rd)) |