diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2023-03-05 20:16:22 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2023-03-05 20:16:22 +0100 |
| commit | b1923cadb5fbd4abd3fcba36142266f6ce588f4e (patch) | |
| tree | 479d45d4eec05abd148d5038e9e78807d9400590 | |
| parent | 0a955c10203de975b0681d527832bdf86fbb2620 (diff) | |
| download | box64-b1923cadb5fbd4abd3fcba36142266f6ce588f4e.tar.gz box64-b1923cadb5fbd4abd3fcba36142266f6ce588f4e.zip | |
[DYNAREC] Some fixes to double SSE opcode (fixes Steam BigPicture)
| -rwxr-xr-x | src/dynarec/arm64/arm64_emitter.h | 12 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_0f.c | 14 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_660f.c | 2 |
3 files changed, 16 insertions, 12 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index b25d490d..5a440520 100755 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -288,23 +288,23 @@ #define LDPx_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 1, 0b01, (((uint32_t)(imm))>>3)&0x7f, Rt2, Rn, Rt)) #define LDPw_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 1, 0b01, (((uint32_t)(imm))>>2)&0x7f, Rt2, Rn, Rt)) -#define LDPxw_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 1, 0b01, (((uint32_t)(imm))>>(2+rex.w)), Rt2, Rn, Rt)) +#define LDPxw_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 1, 0b01, (((uint32_t)(imm))>>(2+rex.w))&0x7f, Rt2, Rn, Rt)) #define LDPx_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 1, 0b11, (((uint32_t)(imm))>>3)&0x7f, Rt2, Rn, Rt)) #define LDPw_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 1, 0b11, (((uint32_t)(imm))>>2)&0x7f, Rt2, Rn, Rt)) -#define LDPxw_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 1, 0b11, (((uint32_t)(imm))>>(2+rex.w)), Rt2, Rn, Rt)) +#define LDPxw_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 1, 0b11, (((uint32_t)(imm))>>(2+rex.w))&0x7f, Rt2, Rn, Rt)) #define LDPx_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 1, 0b10, (((uint32_t)(imm))>>3)&0x7f, Rt2, Rn, Rt)) #define LDPw_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 1, 0b10, (((uint32_t)(imm))>>2)&0x7f, Rt2, Rn, Rt)) -#define LDPxw_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 1, 0b10, (((uint32_t)(imm))>>(2+rex.w)), Rt2, Rn, Rt)) +#define LDPxw_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 1, 0b10, (((uint32_t)(imm))>>(2+rex.w))&0x7f, Rt2, Rn, Rt)) #define STPx_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 0, 0b01, (((uint32_t)(imm))>>3)&0x7f, Rt2, Rn, Rt)) #define STPw_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 0, 0b01, (((uint32_t)(imm))>>2)&0x7f, Rt2, Rn, Rt)) -#define STPxw_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 0, 0b01, (((uint32_t)(imm))>>(2+rex.w)), Rt2, Rn, Rt)) +#define STPxw_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 0, 0b01, (((uint32_t)(imm))>>(2+rex.w))&0x7f, Rt2, Rn, Rt)) #define STPx_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 0, 0b11, (((uint32_t)(imm))>>3)&0x7f, Rt2, Rn, Rt)) #define STPw_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 0, 0b11, (((uint32_t)(imm))>>2)&0x7f, Rt2, Rn, Rt)) -#define STPxw_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 0, 0b11, (((uint32_t)(imm))>>(2+rex.w)), Rt2, Rn, Rt)) +#define STPxw_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 0, 0b11, (((uint32_t)(imm))>>(2+rex.w))&0x7f, Rt2, Rn, Rt)) #define STPx_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 0, 0b10, (((uint32_t)(imm))>>3)&0x7f, Rt2, Rn, Rt)) #define STPw_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 0, 0b10, (((uint32_t)(imm))>>2)&0x7f, Rt2, Rn, Rt)) -#define STPxw_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 0, 0b10, (((uint32_t)(imm))>>(2+rex.w)), Rt2, Rn, Rt)) +#define STPxw_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 0, 0b10, (((uint32_t)(imm))>>(2+rex.w))&0x7f, Rt2, Rn, Rt)) // PUSH / POP helper #define POP1(reg) LDRx_S9_postindex(reg, xRSP, 8) diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index fe039dd7..8a51ab72 100755 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -1642,27 +1642,31 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GETGX(v0, 1); if(!MODREG) { addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 1); - v1 = 0; // to avoid a warning + v1 = -1; // to avoid a warning } else v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0); u8 = F8; - if(MODREG && v0==v1 && (u8&0x3)==((u8>>2)&3) && (u8&0xf)==((u8>>4)&0xf)) { + if(v0==v1 && (u8&0x3)==((u8>>2)&3) && (u8&0xf)==((u8>>4)&0xf)) { VDUPQ_32(v0, v0, u8&3); + } else if(v0==v1 && (u8==0xe0)) { // easy special case + VMOVeS(v0, 1, v0, 0); + } else if(v0==v1 && (u8==0xe5)) { // easy special case + VMOVeS(v0, 0, v0, 1); } else { d0 = fpu_get_scratch(dyn); // first two elements from Gx for(int i=0; i<2; ++i) { - VMOVeS(d0, i, v0, (u8>>(i*2)&3)); + VMOVeS(d0, i, v0, (u8>>(i*2))&3); } // second two from Ex if(MODREG) { for(int i=2; i<4; ++i) { - VMOVeS(d0, i, v1, (u8>>(i*2)&3)); + VMOVeS(d0, i, v1, (u8>>(i*2))&3); } } else { SMREAD(); for(int i=2; i<4; ++i) { - ADDx_U12(x2, ed, (u8>>(i*2)&3)*4); + ADDx_U12(x2, ed, ((u8>>(i*2))&3)*4); VLD1_32(d0, i, x2); } } diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index 814c2552..01cd736a 100755 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -1984,7 +1984,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; GETEX(v1, 0, 0); GETGX_empty(v0); - VFCVTNSQD(v0, v1); // convert double -> int64 + VFCVTZSQD(v0, v1); // convert double -> int64 SQXTN_32(v0, v0); // convert int64 -> int32 with saturation in lower part, RaZ high part break; case 0xE7: |