diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2021-03-23 20:12:11 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2021-03-23 20:12:11 +0100 |
| commit | 2487fb4d980003f567ff78bf10ba06a95bd9d1b4 (patch) | |
| tree | 37ca8d8dd9f1bb59741c04fa0eca8ed22b46ac1c /src | |
| parent | de0718811e271b8971444fea68f2a04f3d9bca26 (diff) | |
| download | box64-2487fb4d980003f567ff78bf10ba06a95bd9d1b4.tar.gz box64-2487fb4d980003f567ff78bf10ba06a95bd9d1b4.zip | |
[DYNAREC] Added 0F 58/59 opcodes, plus lots of small bugfix (IntoTheBreach sounds good now)
Diffstat (limited to 'src')
| -rwxr-xr-x | src/dynarec/arm64_emitter.h | 42 | ||||
| -rwxr-xr-x | src/dynarec/arm64_printer.c | 8 | ||||
| -rwxr-xr-x | src/dynarec/dynablock.c | 4 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_00.c | 1 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_0f.c | 40 | ||||
| -rw-r--r-- | src/dynarec/dynarec_arm64_64.c | 11 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_66.c | 1 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_660f.c | 39 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_67.c | 10 | ||||
| -rw-r--r-- | src/dynarec/dynarec_arm64_f0.c | 18 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_f20f.c | 19 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_f30f.c | 16 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_functions.c | 3 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_helper.c | 4 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_helper.h | 6 | ||||
| -rw-r--r-- | src/emu/x64runf0.c | 2 |
16 files changed, 109 insertions, 115 deletions
diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h index 47ee286f..f6eef0c8 100755 --- a/src/dynarec/arm64_emitter.h +++ b/src/dynarec/arm64_emitter.h @@ -645,28 +645,28 @@ #define SHL_vector(Q, immh, immb, Rn, Rd) ((Q)<<30 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 0b01010<<11 | 1<<10 | (Rn)<<5 | (Rd)) #define VSHLQ_8(Vd, Vn, shift) EMIT(SHL_vector(1, 0b0001, (shift)&7, Vn, Vd)) -#define VSHLQ_16(Vd, Vn, shift) EMIT(SHL_vector(1, 0b0010 | ((shift)>>3)&1, (shift)&7, Vn, Vd)) -#define VSHLQ_32(Vd, Vn, shift) EMIT(SHL_vector(1, 0b0100 | ((shift)>>3)&3, (shift)&7, Vn, Vd)) -#define VSHLQ_64(Vd, Vn, shift) EMIT(SHL_vector(1, 0b1000 | ((shift)>>3)&7, (shift)&7, Vn, Vd)) +#define VSHLQ_16(Vd, Vn, shift) EMIT(SHL_vector(1, 0b0010 | (((shift)>>3)&1), (shift)&7, Vn, Vd)) +#define VSHLQ_32(Vd, Vn, shift) EMIT(SHL_vector(1, 0b0100 | (((shift)>>3)&3), (shift)&7, Vn, Vd)) +#define VSHLQ_64(Vd, Vn, shift) EMIT(SHL_vector(1, 0b1000 | (((shift)>>3)&7), (shift)&7, Vn, Vd)) #define VSHL_8(Vd, Vn, shift) EMIT(SHL_vector(0, 0b0001, (shift)&7, Vn, Vd)) #define VSHL_16(Vd, Vn, shift) EMIT(SHL_vector(0, 0b0010 | ((shift)>>3)&1, (shift)&7, Vn, Vd)) -#define VSHL_32(Vd, Vn, shift) EMIT(SHL_vector(0, 0b0100 | ((shift)>>3)&3, (shift)&7, Vn, Vd)) +#define VSHL_32(Vd, Vn, shift) EMIT(SHL_vector(0, 0b0100 | (((shift)>>3)&3), (shift)&7, Vn, Vd)) #define SHR_vector(Q, U, immh, immb, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 0b00000<<11 | 1<<10 | (Rn)<<5 | (Rd)) #define VSHRQ_8(Vd, Vn, shift) EMIT(SHR_vector(1, 1, 0b0001, (8-(shift))&7, Vn, Vd)) -#define VSHRQ_16(Vd, Vn, shift) EMIT(SHR_vector(1, 1, 0b0010 | ((16-(shift))>>3)&1, (16-(shift))&7, Vn, Vd)) -#define VSHRQ_32(Vd, Vn, shift) EMIT(SHR_vector(1, 1, 0b0100 | ((32-(shift))>>3)&3, (32-(shift))&7, Vn, Vd)) -#define VSHRQ_64(Vd, Vn, shift) EMIT(SHR_vector(1, 1, 0b1000 | ((64-(shift))>>3)&7, (64-(shift))&7, Vn, Vd)) +#define VSHRQ_16(Vd, Vn, shift) EMIT(SHR_vector(1, 1, 0b0010 | (((16-(shift))>>3)&1), (16-(shift))&7, Vn, Vd)) +#define VSHRQ_32(Vd, Vn, shift) EMIT(SHR_vector(1, 1, 0b0100 | (((32-(shift))>>3)&3), (32-(shift))&7, Vn, Vd)) +#define VSHRQ_64(Vd, Vn, shift) EMIT(SHR_vector(1, 1, 0b1000 | (((64-(shift))>>3)&7), (64-(shift))&7, Vn, Vd)) #define VSHR_8(Vd, Vn, shift) EMIT(SHR_vector(0, 1, 0b0001, (8-(shift))&7, Vn, Vd)) -#define VSHR_16(Vd, Vn, shift) EMIT(SHR_vector(0, 1, 0b0010 | ((16-(shift))>>3)&1, (16-(shift))&7, Vn, Vd)) -#define VSHR_32(Vd, Vn, shift) EMIT(SHR_vector(0, 1, 0b0100 | ((32-(shift))>>3)&3, (32-(shift))&7, Vn, Vd)) +#define VSHR_16(Vd, Vn, shift) EMIT(SHR_vector(0, 1, 0b0010 | (((16-(shift))>>3)&1), (16-(shift))&7, Vn, Vd)) +#define VSHR_32(Vd, Vn, shift) EMIT(SHR_vector(0, 1, 0b0100 | (((32-(shift))>>3)&3), (32-(shift))&7, Vn, Vd)) #define VSSHRQ_8(Vd, Vn, shift) EMIT(SHR_vector(1, 0, 0b0001, (8-(shift))&7, Vn, Vd)) -#define VSSHRQ_16(Vd, Vn, shift) EMIT(SHR_vector(1, 0, 0b0010 | ((16-(shift))>>3)&1, (16-(shift))&7, Vn, Vd)) -#define VSSHRQ_32(Vd, Vn, shift) EMIT(SHR_vector(1, 0, 0b0100 | ((32-(shift))>>3)&3, (32-(shift))&7, Vn, Vd)) -#define VSSHRQ_64(Vd, Vn, shift) EMIT(SHR_vector(1, 0, 0b1000 | ((64-(shift))>>3)&7, (64-(shift))&7, Vn, Vd)) +#define VSSHRQ_16(Vd, Vn, shift) EMIT(SHR_vector(1, 0, 0b0010 | (((16-(shift))>>3)&1), (16-(shift))&7, Vn, Vd)) +#define VSSHRQ_32(Vd, Vn, shift) EMIT(SHR_vector(1, 0, 0b0100 | (((32-(shift))>>3)&3), (32-(shift))&7, Vn, Vd)) +#define VSSHRQ_64(Vd, Vn, shift) EMIT(SHR_vector(1, 0, 0b1000 | (((64-(shift))>>3)&7), (64-(shift))&7, Vn, Vd)) #define VSSHR_8(Vd, Vn, shift) EMIT(SHR_vector(0, 0, 0b0001, (8-(shift))&7, Vn, Vd)) -#define VSSHR_16(Vd, Vn, shift) EMIT(SHR_vector(0, 0, 0b0010 | ((16-(shift))>>3)&1, (16-(shift))&7, Vn, Vd)) -#define VSSHR_32(Vd, Vn, shift) EMIT(SHR_vector(0, 0, 0b0100 | ((32-(shift))>>3)&3, (32-(shift))&7, Vn, Vd)) +#define VSSHR_16(Vd, Vn, shift) EMIT(SHR_vector(0, 0, 0b0010 | (((16-(shift))>>3)&1), (16-(shift))&7, Vn, Vd)) +#define VSSHR_32(Vd, Vn, shift) EMIT(SHR_vector(0, 0, 0b0100 | (((32-(shift))>>3)&3), (32-(shift))&7, Vn, Vd)) #define EXT_vector(Q, Rm, imm4, Rn, Rd) ((Q)<<30 | 0b101110<<24 | (Rm)<<16 | (imm4)<<11 | (Rn)<<5 | (Rd)) #define VEXTQ_8(Rd, Rn, Rm, index) EMIT(EXT_vector(1, Rm, index, Rn, Rd)) @@ -675,21 +675,21 @@ #define SLI_vector(Q, immh, immb, Rn, Rd) ((Q)<<30 | 1<<29 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 0b01010<<1 | 1<<10 | (Rn)<<5 | (Rd)) #define VSLIQ_8(Vd, Vn, shift) EMIT(VSLI_vector(1, 0b0001, (shift)&7, Vn, Vd)) #define VSLIQ_16(Vd, Vn, shift) EMIT(VSLI_vector(1, 0b0010 | ((shift)>>3)&1, (shift)&7, Vn, Vd)) -#define VSLIQ_32(Vd, Vn, shift) EMIT(VSLI_vector(1, 0b0100 | ((shift)>>3)&3, (shift)&7, Vn, Vd)) -#define VSLIQ_64(Vd, Vn, shift) EMIT(VSLI_vector(1, 0b1000 | ((shift)>>3)&7, (shift)&7, Vn, Vd)) +#define VSLIQ_32(Vd, Vn, shift) EMIT(VSLI_vector(1, 0b0100 | (((shift)>>3)&3), (shift)&7, Vn, Vd)) +#define VSLIQ_64(Vd, Vn, shift) EMIT(VSLI_vector(1, 0b1000 | (((shift)>>3)&7), (shift)&7, Vn, Vd)) #define VSLI_8(Vd, Vn, shift) EMIT(VSLI_vector(0, 0b0001, (shift)&7, Vn, Vd)) #define VSLI_16(Vd, Vn, shift) EMIT(VSLI_vector(0, 0b0010 | ((shift)>>3)&1, (shift)&7, Vn, Vd)) -#define VSLI_32(Vd, Vn, shift) EMIT(VSLI_vector(0, 0b0100 | ((shift)>>3)&3, (shift)&7, Vn, Vd)) +#define VSLI_32(Vd, Vn, shift) EMIT(VSLI_vector(0, 0b0100 | (((shift)>>3)&3), (shift)&7, Vn, Vd)) // Shift Right and Insert (not touching higher part of dest) #define SRI_vector(Q, immh, immb, Rn, Rd) ((Q)<<30 | 1<<29 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 0b01000<<1 | 1<<10 | (Rn)<<5 | (Rd)) #define VSRIQ_8(Vd, Vn, shift) EMIT(VSRI_vector(1, 0b0001, (shift)&7, Vn, Vd)) #define VSRIQ_16(Vd, Vn, shift) EMIT(VSRI_vector(1, 0b0010 | ((shift)>>3)&1, (shift)&7, Vn, Vd)) -#define VSRIQ_32(Vd, Vn, shift) EMIT(VSRI_vector(1, 0b0100 | ((shift)>>3)&3, (shift)&7, Vn, Vd)) -#define VSRIQ_64(Vd, Vn, shift) EMIT(VSRI_vector(1, 0b1000 | ((shift)>>3)&7, (shift)&7, Vn, Vd)) +#define VSRIQ_32(Vd, Vn, shift) EMIT(VSRI_vector(1, 0b0100 | (((shift)>>3)&3), (shift)&7, Vn, Vd)) +#define VSRIQ_64(Vd, Vn, shift) EMIT(VSRI_vector(1, 0b1000 | (((shift)>>3)&7), (shift)&7, Vn, Vd)) #define VSRI_8(Vd, Vn, shift) EMIT(VSRI_vector(0, 0b0001, (shift)&7, Vn, Vd)) #define VSRI_16(Vd, Vn, shift) EMIT(VSRI_vector(0, 0b0010 | ((shift)>>3)&1, (shift)&7, Vn, Vd)) -#define VSRI_32(Vd, Vn, shift) EMIT(VSRI_vector(0, 0b0100 | ((shift)>>3)&3, (shift)&7, Vn, Vd)) +#define VSRI_32(Vd, Vn, shift) EMIT(VSRI_vector(0, 0b0100 | (((shift)>>3)&3), (shift)&7, Vn, Vd)) // Integer MATH #define ADDSUB_vector(Q, U, size, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b10000<<11 | 1<<10 | (Rn)<<5 | (Rd)) @@ -786,7 +786,7 @@ #define FSUBD(Dd, Dn, Dm) EMIT(FADDSUB_scalar(0b01, Dm, 1, Dn, Dd)) // MUL -#define FMUL_vector(Q, sz, Rm, Rn, Rd) ((Q)<<30 | 1<<29 | 0b01110<<24 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b011<<11 | 1<<10 | (Rn)<<5 | (Rd)) +#define FMUL_vector(Q, sz, Rm, Rn, Rd) ((Q)<<30 | 1<<29 | 0b01110<<24 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b11011<<11 | 1<<10 | (Rn)<<5 | (Rd)) #define VFMULS(Sd, Sn, Sm) EMIT(FMUL_vector(0, 0, Sm, Sn, Sd)) #define VFMULQS(Sd, Sn, Sm) EMIT(FMUL_vector(1, 0, Sm, Sn, Sd)) #define VFMULQD(Sd, Sn, Sm) EMIT(FMUL_vector(1, 1, Sm, Sn, Sd)) diff --git a/src/dynarec/arm64_printer.c b/src/dynarec/arm64_printer.c index 8d9aedfe..ad36e3c0 100755 --- a/src/dynarec/arm64_printer.c +++ b/src/dynarec/arm64_printer.c @@ -91,9 +91,9 @@ int isMask(uint32_t opcode, const char* mask, arm64_print_t *a) int64_t signExtend(uint32_t val, int sz) { int64_t ret = val; - if((val>>(sz-1))&1 == 1) - val |= (0xffffffffffffffffll<<sz); - return val; + if((val>>(sz-1))&1) + ret |= (0xffffffffffffffffll<<sz); + return ret; } const char* arm64_print(uint32_t opcode, uintptr_t addr) @@ -911,7 +911,7 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) char s = a.Q?'V':'D'; char d = sf?'D':'S'; int n = (a.Q && !sf)?4:2; - snprintf(buff, sizeof(buff), "VFMUL %c%d.%d%c, %c%d.%d%c, %c%d.%c%d", s, Rd, n, d, s, Rn, n, d, s, Rm, s, d); + snprintf(buff, sizeof(buff), "VFMUL %c%d.%d%c, %c%d.%d%c, %c%d.%d%c", s, Rd, n, d, s, Rn, n, d, s, Rm, n, d); return buff; } if(isMask(opcode, "00011110ff1mmmmm000010nnnnnddddd", &a)) { diff --git a/src/dynarec/dynablock.c b/src/dynarec/dynablock.c index 79a07027..836d7eb0 100755 --- a/src/dynarec/dynablock.c +++ b/src/dynarec/dynablock.c @@ -323,7 +323,7 @@ static dynablock_t* internalDBGetBlock(x64emu_t* emu, uintptr_t addr, uintptr_t if(!created) return block; // existing block... - #if 0 + #if 1 if(box64_dynarec_dump) pthread_mutex_lock(&my_context->mutex_dyndump); #endif @@ -338,7 +338,7 @@ static dynablock_t* internalDBGetBlock(x64emu_t* emu, uintptr_t addr, uintptr_t free(block); block = NULL; } - #if 0 + #if 1 if(box64_dynarec_dump) pthread_mutex_unlock(&my_context->mutex_dyndump); #endif diff --git a/src/dynarec/dynarec_arm64_00.c b/src/dynarec/dynarec_arm64_00.c index 2e44b5bf..38c6a447 100755 --- a/src/dynarec/dynarec_arm64_00.c +++ b/src/dynarec/dynarec_arm64_00.c @@ -43,6 +43,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin opcode = F8; MAYUSE(eb1); MAYUSE(eb2); + MAYUSE(wb2); MAYUSE(tmp); MAYUSE(j32); diff --git a/src/dynarec/dynarec_arm64_0f.c b/src/dynarec/dynarec_arm64_0f.c index 8905878f..7a9ca5b8 100755 --- a/src/dynarec/dynarec_arm64_0f.c +++ b/src/dynarec/dynarec_arm64_0f.c @@ -34,7 +34,7 @@ #define GETEX(a, D) \ if(MODREG) { \ - a = sse_get_reg(dyn, ninst, x1, nextop&7+(rex.b<<3)); \ + a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); \ } else { \ addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); \ a = fpu_get_scratch(dyn); \ @@ -42,11 +42,11 @@ } #define GETGM(a) \ - gd = (nextop&0x38)>>3; \ + gd = ((nextop&0x38)>>3); \ a = mmx_get_reg(dyn, ninst, x1, gd) #define GETEM(a, D) \ if(MODREG) { \ - a = mmx_get_reg(dyn, ninst, x1, nextop&7); \ + a = mmx_get_reg(dyn, ninst, x1, (nextop&7)); \ } else { \ addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0); \ a = fpu_get_scratch_double(dyn); \ @@ -63,24 +63,25 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin uint8_t nextop, u8; int32_t i32, i32_, j32; uint8_t gd, ed; - uint8_t wback, wb1, wb2; + uint8_t wback, wb2; uint8_t eb1, eb2; - uint8_t gb1, gb2; - int v0, v1, v2; + int v0, v1; int q0, q1; - int d0, d1; + int d0; int s0; int fixedaddress; - int parity; MAYUSE(s0); + MAYUSE(q0); MAYUSE(q1); - MAYUSE(v2); - MAYUSE(gb2); - MAYUSE(gb1); + MAYUSE(v0); + MAYUSE(v1); + MAYUSE(d0); MAYUSE(eb2); MAYUSE(eb1); MAYUSE(wb2); MAYUSE(j32); + MAYUSE(i32); + MAYUSE(u8); #if STEP == 3 //static const int8_t mask_shift8[] = { -7, -6, -5, -4, -3, -2, -1, 0 }; #endif @@ -135,7 +136,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 0x16: nextop = F8; - if((nextop&0xC0)==0xC0) { + if(MODREG) { INST_NAME("MOVLHPS Gx,Ex"); GETGX(v0); v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); @@ -260,7 +261,20 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GETGX(v0); VEORQ(v0, v0, q0); break; - + case 0x58: + INST_NAME("ADDPS Gx, Ex"); + nextop = F8; + GETEX(q0, 0); + GETGX(v0); + VFADDQS(v0, v0, q0); + break; + case 0x59: + INST_NAME("MULPS Gx, Ex"); + nextop = F8; + GETEX(q0, 0); + GETGX(v0); + VFMULQS(v0, v0, q0); + break; case 0x5A: INST_NAME("CVTPS2PD Gx, Ex"); nextop = F8; diff --git a/src/dynarec/dynarec_arm64_64.c b/src/dynarec/dynarec_arm64_64.c index 8854ca3f..d86be338 100644 --- a/src/dynarec/dynarec_arm64_64.c +++ b/src/dynarec/dynarec_arm64_64.c @@ -26,16 +26,11 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) { uint8_t opcode = F8; - uint8_t nextop, u8; - uint32_t u32; - int32_t i32, j32; - int16_t i16; - uint16_t u16; + uint8_t nextop; + int32_t j32; uint8_t gd, ed; - uint8_t wback, wb1, wb2, gb1, gb2; + uint8_t wback; int fixedaddress; - MAYUSE(u16); - MAYUSE(u8); MAYUSE(j32); while((opcode==0xF2) || (opcode==0xF3)) { diff --git a/src/dynarec/dynarec_arm64_66.c b/src/dynarec/dynarec_arm64_66.c index 5637b3f8..91f8d618 100755 --- a/src/dynarec/dynarec_arm64_66.c +++ b/src/dynarec/dynarec_arm64_66.c @@ -27,7 +27,6 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin { uint8_t opcode = F8; uint8_t nextop, u8; - uint32_t u32; int32_t i32, j32; int16_t i16; uint16_t u16; diff --git a/src/dynarec/dynarec_arm64_660f.c b/src/dynarec/dynarec_arm64_660f.c index 2221e2db..3429049c 100755 --- a/src/dynarec/dynarec_arm64_660f.c +++ b/src/dynarec/dynarec_arm64_660f.c @@ -53,16 +53,15 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n int v0, v1; int q0, q1; int d0; - int s0; int fixedaddress; - int parity; MAYUSE(d0); + MAYUSE(q0); MAYUSE(q1); MAYUSE(eb1); MAYUSE(eb2); MAYUSE(j32); - #if STEP == 3 + #if 0//STEP == 3 static const int8_t mask_shift8[] = { -7, -6, -5, -4, -3, -2, -1, 0 }; #endif @@ -74,13 +73,12 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; GETGX(v0); if(MODREG) { - v1 = sse_get_reg(dyn, ninst, x1, nextop&7); + v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + VMOVeD(v0, 1, v1, 0); } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); - v1 = fpu_get_scratch(dyn); - VLDR64_U12(v1, ed, fixedaddress); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); + VLD1_64(v0, 1, ed); } - VMOVeD(v0, 1, v1, 0); break; case 0x15: INST_NAME("UNPCKHPD Gx, Ex"); @@ -88,13 +86,14 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n GETGX(v0); VMOVeD(v0, 0, v0, 1); if(MODREG) { - v1 = sse_get_reg(dyn, ninst, x1, nextop&7); + v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); + VMOVeD(v0, 1, v1, 1); } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0); v1 = fpu_get_scratch(dyn); - VLDR64_U12(v1, ed, fixedaddress); + ADDx_U12(ed, ed, 8); + VLD1_64(v0, 1, ed); } - VMOVeD(v0, 1, v1, 1); break; case 0x1F: @@ -106,7 +105,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n case 0x28: INST_NAME("MOVAPD Gx,Ex"); nextop = F8; - gd = ((nextop&0x38)>>3) + (rex.r<<3); + GETG; if(MODREG) { ed = (nextop&7)+(rex.b<<3); v1 = sse_get_reg(dyn, ninst, x1, ed); @@ -121,7 +120,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n case 0x29: INST_NAME("MOVAPD Ex,Gx"); nextop = F8; - gd = ((nextop&0x38)>>3) + (rex.r<<3); + GETG; v0 = sse_get_reg(dyn, ninst, x1, gd); if(MODREG) { ed = (nextop&7)+(rex.b<<3); @@ -139,8 +138,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n if(opcode==0x2F) {INST_NAME("COMISD Gx, Ex");} else {INST_NAME("UCOMISD Gx, Ex");} SETFLAGS(X_ALL, SF_SET); nextop = F8; - gd = ((nextop&0x38)>>3) + (rex.r<<3); - v0 = sse_get_reg(dyn, ninst, x1, gd); + GETGX(v0); GETEX(q0, 0); FCMPD(v0, q0); FCOMI(x1, x2); @@ -416,15 +414,14 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n case 0x7E: INST_NAME("MOVD Ed,Gx"); nextop = F8; - gd = ((nextop&0x38)>>3)+(rex.r<<3); - v0 = sse_get_reg(dyn, ninst, x1, gd); + GETGX(v0); if(rex.w) { if(MODREG) { ed = xRAX + (nextop&7) + (rex.b<<3); VMOVQDto(ed, v0, 0); } else { addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); - VSTR64_U12(x2, ed, fixedaddress); + VSTR64_U12(v0, ed, fixedaddress); } } else { if(MODREG) { @@ -432,7 +429,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n VMOVSto(ed, v0, 0); } else { addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - VSTR32_U12(x2, ed, fixedaddress); + VSTR32_U12(v0, ed, fixedaddress); } } break; @@ -569,7 +566,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; GETGD; GETEW(x1, 0); // Get EW - TSTw_REG(x1, x1); + TSTw_REG(x1, x1); // Don't use CBZ here, as the flag is reused later B_MARK(cEQ); LSLw(x1, x1, 16); // put bits on top CLZw(x2, x1); // x2 gets leading 0 diff --git a/src/dynarec/dynarec_arm64_67.c b/src/dynarec/dynarec_arm64_67.c index 1b691710..f2c7ebe3 100755 --- a/src/dynarec/dynarec_arm64_67.c +++ b/src/dynarec/dynarec_arm64_67.c @@ -26,18 +26,10 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) { uint8_t opcode = F8; - uint8_t nextop, u8; int8_t i8; - uint32_t u32; int32_t i32, j32; - int16_t i16; - uint16_t u16; - uint8_t gd, ed; - uint8_t wback, wb1; - int fixedaddress; - MAYUSE(u16); - MAYUSE(u8); MAYUSE(j32); + MAYUSE(i32); // REX prefix before the 67 are ignored rex.rex = 0; diff --git a/src/dynarec/dynarec_arm64_f0.c b/src/dynarec/dynarec_arm64_f0.c index a36f2589..7b6991f2 100644 --- a/src/dynarec/dynarec_arm64_f0.c +++ b/src/dynarec/dynarec_arm64_f0.c @@ -26,17 +26,15 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) { uint8_t opcode = F8; - uint8_t nextop, u8; - uint32_t u32; - int32_t i32, j32; - int16_t i16; - uint16_t u16; + uint8_t nextop; + int32_t j32; uint8_t gd, ed; - uint8_t wback, wb1, wb2, gb1, gb2; + uint8_t wback, wb2, gb1, gb2; int64_t i64; int fixedaddress; - MAYUSE(u16); - MAYUSE(u8); + MAYUSE(gb1); + MAYUSE(gb2); + MAYUSE(wb2); MAYUSE(j32); while((opcode==0xF2) || (opcode==0xF3)) { @@ -67,7 +65,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } UBFXw(x1, wback, wb2*8, 8); emit_add8(dyn, ninst, x1, x2, x4, x3); - BFIx(wback, ed, wb2*8, 8); + BFIx(wback, x1, wb2*8, 8); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, 0, 0); MARKLOCK; @@ -363,7 +361,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GETED(0); // No need to LOCK, this is readonly if(opcode==0x81) i64 = F32S; else i64 = F8S; - if(i32) { + if(i64) { MOV64xw(x5, i64); emit_cmp32(dyn, ninst, rex, ed, x5, x3, x4, x6); } else { diff --git a/src/dynarec/dynarec_arm64_f20f.c b/src/dynarec/dynarec_arm64_f20f.c index 193f1169..195a3e59 100755 --- a/src/dynarec/dynarec_arm64_f20f.c +++ b/src/dynarec/dynarec_arm64_f20f.c @@ -40,23 +40,18 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog) { uint8_t opcode = F8; - uint8_t nextop, u8; - int32_t i32, j32; + uint8_t nextop; uint8_t gd, ed; - uint8_t wback, wb1; - uint8_t eb1, eb2; - int v0, v1; - int q0, q1; + uint8_t wback; + int v0; + int q0; int d0, d1; - int s0; int fixedaddress; - int parity; MAYUSE(d0); - MAYUSE(q1); - MAYUSE(eb1); - MAYUSE(eb2); - MAYUSE(j32); + MAYUSE(d1); + MAYUSE(q0); + MAYUSE(v0); switch(opcode) { diff --git a/src/dynarec/dynarec_arm64_f30f.c b/src/dynarec/dynarec_arm64_f30f.c index c7bd738a..574764b8 100755 --- a/src/dynarec/dynarec_arm64_f30f.c +++ b/src/dynarec/dynarec_arm64_f30f.c @@ -41,22 +41,18 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n { uint8_t opcode = F8; uint8_t nextop, u8; - int32_t i32, j32; uint8_t gd, ed; - uint8_t wback, wb1; - uint8_t eb1, eb2; + uint8_t wback; int v0, v1; - int q0, q1; + int q0; int d0, d1; - int s0; int fixedaddress; - int parity; MAYUSE(d0); - MAYUSE(q1); - MAYUSE(eb1); - MAYUSE(eb2); - MAYUSE(j32); + MAYUSE(d1); + MAYUSE(q0); + MAYUSE(v0); + MAYUSE(v1); switch(opcode) { diff --git a/src/dynarec/dynarec_arm64_functions.c b/src/dynarec/dynarec_arm64_functions.c index d2ba4f6f..da6175af 100755 --- a/src/dynarec/dynarec_arm64_functions.c +++ b/src/dynarec/dynarec_arm64_functions.c @@ -268,7 +268,8 @@ int getedparity(dynarec_arm_t* dyn, int ninst, uintptr_t addr, uint8_t nextop, i return ((sib&0x7)==4 && (sib>>6)>=parity)?1:0; } } else if((nextop&7)==5) { - uint64_t tmp = F32S64+addr+delta; + uint64_t tmp = F32S64; + tmp+=addr+delta; return (tmp&tested)?0:1; } else { return 0; diff --git a/src/dynarec/dynarec_arm64_helper.c b/src/dynarec/dynarec_arm64_helper.c index 6be6efc4..f22cac82 100755 --- a/src/dynarec/dynarec_arm64_helper.c +++ b/src/dynarec/dynarec_arm64_helper.c @@ -240,6 +240,7 @@ void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst) MOVx_REG(xRIP, reg); } uintptr_t tbl = getJumpTable64(); + MAYUSE(tbl); TABLE64(x2, tbl); UBFXx(x3, xRIP, 48, JMPTABL_SHIFT); LDRx_REG_LSL3(x2, x2, x3); @@ -250,7 +251,8 @@ void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst) UBFXx(x3, xRIP, 0, JMPTABL_SHIFT); LDRx_REG_LSL3(x3, x2, x3); } else { - uintptr_t p = getJumpTableAddress64(ip); + uintptr_t p = getJumpTableAddress64(ip); + MAYUSE(p); TABLE64(x2, p); GETIP_(ip); LDRx_U12(x3, x2, 0); diff --git a/src/dynarec/dynarec_arm64_helper.h b/src/dynarec/dynarec_arm64_helper.h index ff397379..ed940272 100755 --- a/src/dynarec/dynarec_arm64_helper.h +++ b/src/dynarec/dynarec_arm64_helper.h @@ -265,6 +265,10 @@ #define CBNZx_MARK(reg) \ j32 = GETMARK-(dyn->arm_size); \ CBNZx(reg, j32) +// Branch to MARK if reg is not 0 (use j32) +#define CBNZw_MARK(reg) \ + j32 = GETMARK-(dyn->arm_size); \ + CBNZw(reg, j32) // Branch to MARK2 if cond (use j32) #define B_MARK2(cond) \ j32 = GETMARK2-(dyn->arm_size); \ @@ -440,7 +444,7 @@ if(A==xRIP) dyn->last_ip = 0 #define SET_DFNONE(S) if(!dyn->dfnone) {MOVZw(S, d_none); STRw_U12(S, xEmu, offsetof(x64emu_t, df)); dyn->dfnone=1;} -#define SET_DF(S, N) if(N) {MOVZw(S, N); STRw_U12(S, xEmu, offsetof(x64emu_t, df)); dyn->dfnone=0;} else SET_DFNONE(S) +#define SET_DF(S, N) if((N)!=d_none) {MOVZw(S, (N)); STRw_U12(S, xEmu, offsetof(x64emu_t, df)); dyn->dfnone=0;} else SET_DFNONE(S) #define SET_NODF() dyn->dfnone = 0 #define SET_DFOK() dyn->dfnone = 1 diff --git a/src/emu/x64runf0.c b/src/emu/x64runf0.c index b09fd3aa..48eee438 100644 --- a/src/emu/x64runf0.c +++ b/src/emu/x64runf0.c @@ -301,7 +301,7 @@ int RunF0(x64emu_t *emu, rex_t rex) case 4: do { tmp32u2 = arm64_lock_read_d(ED); tmp32u2 = and32(emu, tmp32u2, tmp64u);} while(arm64_lock_write_d(ED, tmp32u2)); break; case 5: do { tmp32u2 = arm64_lock_read_d(ED); tmp32u2 = sub32(emu, tmp32u2, tmp64u);} while(arm64_lock_write_d(ED, tmp32u2)); break; case 6: do { tmp32u2 = arm64_lock_read_d(ED); tmp32u2 = xor32(emu, tmp32u2, tmp64u);} while(arm64_lock_write_d(ED, tmp32u2)); break; - case 7: cmp32(emu, ED->dword[0], tmp32u); break; + case 7: cmp32(emu, ED->dword[0], tmp64u); break; } } #else |