diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2023-10-30 19:00:44 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2023-10-30 19:00:44 +0100 |
| commit | 5c13f8f10ac82ca642a6c930585989cc0d75a664 (patch) | |
| tree | 269e1c1acb78487caf49fb1e31407f87e4b65a4d /src | |
| parent | 20cf990bf7e2c37a565dedc03890c9df1ea8b602 (diff) | |
| download | box64-5c13f8f10ac82ca642a6c930585989cc0d75a664.tar.gz box64-5c13f8f10ac82ca642a6c930585989cc0d75a664.zip | |
[ARM64_DYNAREC] Added 66 0F 3A 60..63 opcodes
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/arm64_emitter.h | 3 | ||||
| -rw-r--r-- | src/dynarec/arm64/arm64_printer.c | 14 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_660f.c | 178 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.c | 13 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.h | 4 |
5 files changed, 208 insertions, 4 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index 886ecfa0..4a939eb8 100644 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -1766,8 +1766,9 @@ // MOV Immediate #define MOVI_vector(Q, op, abc, cmode, defgh, Rd) ((Q)<<30 | (op)<<29 | 0b0111100000<<19 | (abc)<<16 | (cmode)<<12 | 1<<10 | (defgh)<<5 | (Rd)) #define MOVIQ_8(Rd, imm8) EMIT(MOVI_vector(1, 0, (((imm8)>>5)&0b111), 0b1110, ((imm8)&0b11111), Rd)) +#define MOVIQ_16(Rd, imm8, lsl8) EMIT(MOVI_vector(1, 0, (((imm8)>>5)&0b111), 0b1000|((lsl8)?0b10:0), ((imm8)&0b11111), Rd)) #define MOVI_8(Rd, imm8) EMIT(MOVI_vector(0, 0, (((imm8)>>5)&0b111), 0b1110, ((imm8)&0b11111), Rd)) -#define MOVI_16(Rd, imm8) EMIT(MOVI_vector(0, 0, (((imm8)>>5)&0b111), 0b1000, ((imm8)&0b11111), Rd)) +#define MOVI_16(Rd, imm8, lsl8) EMIT(MOVI_vector(0, 0, (((imm8)>>5)&0b111), 0b1000|((lsl8)?0b10:0), ((imm8)&0b11111), Rd)) #define MOVI_32(Rd, imm8) EMIT(MOVI_vector(0, 0, (((imm8)>>5)&0b111), 0b0000, ((imm8)&0b11111), Rd)) #define MOVI_64(Rd, imm8) EMIT(MOVI_vector(0, 1, (((imm8)>>5)&0b111), 0b1110, ((imm8)&0b11111), Rd)) diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c index 4a889a28..70f96d34 100644 --- a/src/dynarec/arm64/arm64_printer.c +++ b/src/dynarec/arm64/arm64_printer.c @@ -983,11 +983,21 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "MOVI V%d.%s, #0x%x", Rd, Vd, imm); return buff; } - // MOV immediate (not)shifted 16bits & 32bits + // MOV immediate notshifted 16bits & 32bits if(isMask(opcode, "0Q00111100000iiif00001iiiiiddddd", &a)) { const char* Y[] = {"2S", "4S", "4H", "8H"}; const char* Vd = Y[(sf<<1)| a.Q]; - snprintf(buff, sizeof(buff), "MOVI V%d.%s, #0x%x", Rd, Vd, imm); + int sh = 0; + + snprintf(buff, sizeof(buff), "MOVI V%d.%s, #0x%x", Rd, Vd, imm<<sh); + return buff; + } + // MOV immediate shifted 16bits + if(isMask(opcode, "0Q00111100000iii101001iiiiiddddd", &a)) { + const char* Y[] = {"4H", "8H"}; + const char* Vd = Y[a.Q]; + + snprintf(buff, sizeof(buff), "MOVI V%d.%s, #0x%x", Rd, Vd, imm<<8); return buff; } diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index 800436f3..224ea138 100644 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -20,6 +20,7 @@ #include "dynarec_arm64_private.h" #include "dynarec_arm64_functions.h" #include "dynarec_arm64_helper.h" +#include "emu/x64compstrings.h" uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) { @@ -48,6 +49,8 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n MAYUSE(j64); #if STEP > 1 static const int8_t mask_shift8[] = { -7, -6, -5, -4, -3, -2, -1, 0 }; + static const int8_t mask_string8[] = { 7, 6, 5, 4, 3, 2, 1, 0 }; + static const int8_t mask_string16[] = { 15, 14, 13, 12, 11, 10, 9, 8 }; static const int8_t round_round[] = { 0, 2, 1, 3}; #endif @@ -1237,6 +1240,181 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } break; + case 0x60: + INST_NAME("PCMPESTRM Gx, Ex, Ib"); + SETFLAGS(X_OF|X_CF|X_AF|X_ZF|X_SF|X_PF, SF_SET); + nextop = F8; + GETG; + sse_forget_reg(dyn, ninst, gd); + ADDx_U12(x3, xEmu, offsetof(x64emu_t, xmm[gd])); + if(MODREG) { + ed = (nextop&7)+(rex.b<<3); + sse_reflect_reg(dyn, ninst, ed); + ADDx_U12(x1, xEmu, offsetof(x64emu_t, xmm[ed])); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 1); + if(ed!=x1) { + MOVx_REG(x1, ed); + } + } + MOVx_REG(x2, xRDX); + MOVx_REG(x4, xRAX); + u8 = F8; + MOV32w(x5, u8); + CALL(sse42_compare_string_explicit_len, x1); + q0 = sse_get_reg_empty(dyn, ninst, x2, gd); + q1 = fpu_get_scratch(dyn); + if(u8&0b1000000) { + switch(u8&1) { + case 0b00: + VDUPQB(q0, x1); // load the low 8bits of the mask + LSRw_IMM(x1, x1, 8); + VDUPQB(q1, x1); // load the high 8bits of the mask + VEXTQ_8(q0, q0, q1, 8); // low and hig bits mask + TABLE64(x2, (uintptr_t)&mask_string8); + VLDR64_U12(q1, x2, 0); // load shift + VDUPQ_64(q1, q1, 0); + USHLQ_8(q0, q0, q1); // extract 1 bit + MOVIQ_8(q1, 0x80); // load mask + VANDQ(q0, q0, q1); + VSSHRQ_8(q0, q0, 7); // saturate the mask + break; + case 0b01: + VDUPQH(q0, x1); // load the 8bits of the mask + TABLE64(x2, (uintptr_t)&mask_string16); + VLDR64_U12(q1, x2, 0); // load shift + UXTL_8(q1, q1); // extend mask to 16bits + USHLQ_16(q0, q0, q1); // extract 1 bit + MOVIQ_16(q1, 0x80, 1); // load mask + VANDQ(q0, q0, q1); + VSSHRQ_16(q0, q0, 15); // saturate the mask + } + } else { + VEORQ(q0, q0, q0); + VMOVQHfrom(q0, 0, x1); + } + break; + case 0x61: + INST_NAME("PCMPESTRI Gx, Ex, Ib"); + SETFLAGS(X_OF|X_CF|X_AF|X_ZF|X_SF|X_PF, SF_SET); + nextop = F8; + GETG; + sse_reflect_reg(dyn, ninst, gd); + ADDx_U12(x3, xEmu, offsetof(x64emu_t, xmm[gd])); + if(MODREG) { + ed = (nextop&7)+(rex.b<<3); + sse_reflect_reg(dyn, ninst, ed); + ADDx_U12(x1, xEmu, offsetof(x64emu_t, xmm[ed])); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 1); + if(ed!=x1) { + MOVx_REG(x1, ed); + } + } + MOVx_REG(x2, xRDX); + MOVx_REG(x4, xRAX); + u8 = F8; + MOV32w(x5, u8); + CALL(sse42_compare_string_explicit_len, x1); + CBNZw_MARK(x1); + MOV32w(xRCX, (u8&1)?8:16); + B_NEXT_nocond; + MARK; + if(u8&0b1000000) { + CLZw(xRCX, x1); + MOV32w(x2, 31); + SUBw_REG(xRCX, x2, xRCX); + } else { + RBITxw(xRCX, x1); + CLZw(xRCX, xRCX); + } + break; + case 0x62: + INST_NAME("PCMPISTRM Gx, Ex, Ib"); + SETFLAGS(X_OF|X_CF|X_AF|X_ZF|X_SF|X_PF, SF_SET); + nextop = F8; + GETG; + sse_forget_reg(dyn, ninst, gd); + ADDx_U12(x2, xEmu, offsetof(x64emu_t, xmm[gd])); + if(MODREG) { + ed = (nextop&7)+(rex.b<<3); + sse_reflect_reg(dyn, ninst, ed); + ADDx_U12(x1, xEmu, offsetof(x64emu_t, xmm[ed])); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 1); + if(ed!=x1) { + MOVx_REG(x1, ed); + } + } + u8 = F8; + MOV32w(x3, u8); + CALL(sse42_compare_string_implicit_len, x1); + q0 = sse_get_reg_empty(dyn, ninst, x2, gd); + q1 = fpu_get_scratch(dyn); + if(u8&0b1000000) { + switch(u8&1) { + case 0b00: + VDUPQB(q0, x1); // load the low 8bits of the mask + LSRw_IMM(x1, x1, 8); + VDUPQB(q1, x1); // load the high 8bits of the mask + VEXTQ_8(q0, q0, q1, 8); // low and hig bits mask + TABLE64(x2, (uintptr_t)&mask_string8); + VLDR64_U12(q1, x2, 0); // load shift + VDUPQ_64(q1, q1, 0); + USHLQ_8(q0, q0, q1); // extract 1 bit + MOVIQ_8(q1, 0x80); // load mask + VANDQ(q0, q0, q1); + VSSHRQ_8(q0, q0, 7); // saturate the mask + break; + case 0b01: + VDUPQH(q0, x1); // load the 8bits of the mask + TABLE64(x2, (uintptr_t)&mask_string16); + VLDR64_U12(q1, x2, 0); // load shift + UXTL_8(q1, q1); // extend mask to 16bits + USHLQ_16(q0, q0, q1); // extract 1 bit + MOVIQ_16(q1, 0x80, 1); // load mask + VANDQ(q0, q0, q1); + VSSHRQ_16(q0, q0, 15); // saturate the mask + } + } else { + VEORQ(q0, q0, q0); + VMOVQHfrom(q0, 0, x1); + } + break; + case 0x63: + INST_NAME("PCMPISTRI Gx, Ex, Ib"); + SETFLAGS(X_OF|X_CF|X_AF|X_ZF|X_SF|X_PF, SF_SET); + nextop = F8; + GETG; + sse_reflect_reg(dyn, ninst, gd); + ADDx_U12(x2, xEmu, offsetof(x64emu_t, xmm[gd])); + if(MODREG) { + ed = (nextop&7)+(rex.b<<3); + sse_reflect_reg(dyn, ninst, ed); + ADDx_U12(x1, xEmu, offsetof(x64emu_t, xmm[ed])); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 1); + if(ed!=x1) { + MOVx_REG(x1, ed); + } + } + u8 = F8; + MOV32w(x3, u8); + CALL(sse42_compare_string_implicit_len, x1); + CBNZw_MARK(x1); + MOV32w(xRCX, (u8&1)?8:16); + B_NEXT_nocond; + MARK; + if(u8&0b1000000) { + CLZw(xRCX, x1); + MOV32w(x2, 31); + SUBw_REG(xRCX, x2, xRCX); + } else { + RBITxw(xRCX, x1); + CLZw(xRCX, xRCX); + } + break; + case 0xDF: INST_NAME("AESKEYGENASSIST Gx, Ex, Ib"); // AES-NI nextop = F8; diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c index 006e2c3c..f623061f 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.c +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -1609,6 +1609,17 @@ static void sse_reflectcache(dynarec_arm_t* dyn, int ninst, int s1) } } +void sse_reflect_reg(dynarec_arm_t* dyn, int ninst, int a) +{ + if(dyn->n.ssecache[a].v==-1) + return; + if(dyn->n.neoncache[dyn->n.ssecache[a].reg].t == NEON_CACHE_XMMW) { + VSTR128_U12(dyn->n.ssecache[a].reg, xEmu, offsetof(x64emu_t, xmm[a])); + /*dyn->n.neoncache[dyn->n.ssecache[a].reg].t = NEON_CACHE_XMMR; + dyn->n.ssecache[a].write = 0;*/ + } +} + void fpu_pushcache(dynarec_arm_t* dyn, int ninst, int s1, int not07) { int start = not07?8:0; @@ -1641,6 +1652,8 @@ void fpu_popcache(dynarec_arm_t* dyn, int ninst, int s1, int not07) for (int i=start; i<16; ++i) if(dyn->n.ssecache[i].v!=-1) { VLDR128_U12(dyn->n.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i])); + /*dyn->n.ssecache[i].write = 0; // OPTIM: it's sync, so not write anymore + dyn->n.neoncache[dyn->n.ssecache[i].reg].t = NEON_CACHE_XMMR;*/ } MESSAGE(LOG_DUMP, "\t------- Pop XMM Cache (%d)\n", n); } diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index 268f8ee5..298f106a 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -1041,6 +1041,7 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr); #define sse_get_reg_empty STEPNAME(sse_get_reg_empty) #define sse_forget_reg STEPNAME(sse_forget_reg) #define sse_purge07cache STEPNAME(sse_purge07cache) +#define sse_reflect_reg STEPNAME(sse_reflect_reg) #define fpu_pushcache STEPNAME(fpu_pushcache) #define fpu_popcache STEPNAME(fpu_popcache) @@ -1232,7 +1233,8 @@ int sse_get_reg_empty(dynarec_arm_t* dyn, int ninst, int s1, int a); void sse_forget_reg(dynarec_arm_t* dyn, int ninst, int a); // purge the XMM0..XMM7 cache (before function call) void sse_purge07cache(dynarec_arm_t* dyn, int ninst, int s1); - +// Push current value to the cache +void sse_reflect_reg(dynarec_arm_t* dyn, int ninst, int a); // common coproc helpers // reset the cache void fpu_reset(dynarec_arm_t* dyn); |