diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2023-04-06 19:55:34 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2023-04-06 19:55:34 +0200 |
| commit | 053ecec70bda076cfd4910a850bfbd8971fd7501 (patch) | |
| tree | d045a1e6b1438c1a621901c7e7be3aa07278d814 /src | |
| parent | 42601d845132aef7be9d1a0c246f75ca47f7618b (diff) | |
| download | box64-053ecec70bda076cfd4910a850bfbd8971fd7501.tar.gz box64-053ecec70bda076cfd4910a850bfbd8971fd7501.zip | |
[ARM64_DYNAREC] Fixes to 66 0F E1/E2 opcodes
Diffstat (limited to 'src')
| -rwxr-xr-x | src/dynarec/arm64/arm64_emitter.h | 18 | ||||
| -rwxr-xr-x | src/dynarec/arm64/arm64_printer.c | 44 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_660f.c | 28 |
3 files changed, 62 insertions, 28 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index c84e4f86..0d6998ba 100755 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -1695,6 +1695,8 @@ #define MOVI_vector(Q, op, abc, cmode, defgh, Rd) ((Q)<<30 | (op)<<29 | 0b0111100000<<19 | (abc)<<16 | (cmode)<<12 | 1<<10 | (defgh)<<5 | (Rd)) #define MOVIQ_8(Rd, imm8) EMIT(MOVI_vector(1, 0, (((imm8)>>5)&0b111), 0b1110, ((imm8)&0b11111), Rd)) #define MOVI_8(Rd, imm8) EMIT(MOVI_vector(0, 0, (((imm8)>>5)&0b111), 0b1110, ((imm8)&0b11111), Rd)) +#define MOVI_16(Rd, imm8) EMIT(MOVI_vector(0, 0, (((imm8)>>5)&0b111), 0b1000, ((imm8)&0b11111), Rd)) +#define MOVI_32(Rd, imm8) EMIT(MOVI_vector(0, 0, (((imm8)>>5)&0b111), 0b0000, ((imm8)&0b11111), Rd)) // SHLL and eXtend Long #define SHLL_vector(Q, U, immh, immb, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 0b10100<<11 | 1<<10 | (Rn)<<5 | (Rd)) @@ -1764,35 +1766,35 @@ #define SMAX_8(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 0, 0b00, Vm, 0, Vn, Vd)) #define SMAX_16(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 0, 0b01, Vm, 0, Vn, Vd)) #define SMAX_32(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 0, 0b10, Vm, 0, Vn, Vd)) -#define SMAX_64(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 0, 0b11, Vm, 0, Vn, Vd)) +//#define SMAX_64(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 0, 0b11, Vm, 0, Vn, Vd)) #define UMAX_8(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 1, 0b00, Vm, 0, Vn, Vd)) #define UMAX_16(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 1, 0b01, Vm, 0, Vn, Vd)) #define UMAX_32(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 1, 0b10, Vm, 0, Vn, Vd)) -#define UMAX_64(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 1, 0b11, Vm, 0, Vn, Vd)) +//#define UMAX_64(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 1, 0b11, Vm, 0, Vn, Vd)) #define SMIN_8(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 0, 0b00, Vm, 1, Vn, Vd)) #define SMIN_16(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 0, 0b01, Vm, 1, Vn, Vd)) #define SMIN_32(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 0, 0b10, Vm, 1, Vn, Vd)) -#define SMIN_64(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 0, 0b11, Vm, 1, Vn, Vd)) +//#define SMIN_64(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 0, 0b11, Vm, 1, Vn, Vd)) #define UMIN_8(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 1, 0b00, Vm, 1, Vn, Vd)) #define UMIN_16(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 1, 0b01, Vm, 1, Vn, Vd)) #define UMIN_32(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 1, 0b10, Vm, 1, Vn, Vd)) -#define UMIN_64(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 1, 0b11, Vm, 1, Vn, Vd)) +//#define UMIN_64(Vd, Vn, Vm) EMIT(MINMAX_vector(0, 1, 0b11, Vm, 1, Vn, Vd)) #define SMAXQ_8(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 0, 0b00, Vm, 0, Vn, Vd)) #define SMAXQ_16(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 0, 0b01, Vm, 0, Vn, Vd)) #define SMAXQ_32(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 0, 0b10, Vm, 0, Vn, Vd)) -#define SMAXQ_64(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 0, 0b11, Vm, 0, Vn, Vd)) +//#define SMAXQ_64(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 0, 0b11, Vm, 0, Vn, Vd)) #define UMAXQ_8(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 1, 0b00, Vm, 0, Vn, Vd)) #define UMAXQ_16(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 1, 0b01, Vm, 0, Vn, Vd)) #define UMAXQ_32(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 1, 0b10, Vm, 0, Vn, Vd)) -#define UMAXQ_64(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 1, 0b11, Vm, 0, Vn, Vd)) +//#define UMAXQ_64(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 1, 0b11, Vm, 0, Vn, Vd)) #define SMINQ_8(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 0, 0b00, Vm, 1, Vn, Vd)) #define SMINQ_16(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 0, 0b01, Vm, 1, Vn, Vd)) #define SMINQ_32(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 0, 0b10, Vm, 1, Vn, Vd)) -#define SMINQ_64(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 0, 0b11, Vm, 1, Vn, Vd)) +//#define SMINQ_64(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 0, 0b11, Vm, 1, Vn, Vd)) #define UMINQ_8(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 1, 0b00, Vm, 1, Vn, Vd)) #define UMINQ_16(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 1, 0b01, Vm, 1, Vn, Vd)) #define UMINQ_32(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 1, 0b10, Vm, 1, Vn, Vd)) -#define UMINQ_64(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 1, 0b11, Vm, 1, Vn, Vd)) +//#define UMINQ_64(Vd, Vn, Vm) EMIT(MINMAX_vector(1, 1, 0b11, Vm, 1, Vn, Vd)) // HADD vector #define HADD_vector(Q, U, size, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 1<<10 | (Rn)<<5 | (Rd)) diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c index 2c499ea4..9fe7535d 100755 --- a/src/dynarec/arm64/arm64_printer.c +++ b/src/dynarec/arm64/arm64_printer.c @@ -950,6 +950,28 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "VCMEQ V%d.%s, V%d.%s, V%d.%s", Rd, Vd, Rn, Vd, Rm, Vd); return buff; } + // MIN/MAX + if(isMask(opcode, "0QU01110ff1mmmmm0110o1nnnnnddddd", &a)) { + const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"}; + const char* Vd = Y[((sf)<<1) | a.Q]; + snprintf(buff, sizeof(buff), "%c%s V%d.%s, V%d.%s, V%d.%s", a.U?'U':'S', a.o?"MIN":"MAX", Rd, Vd, Rn, Vd, Rm, Vd); + return buff; + } + + // MOV immediate (not)shifted 8bits + if(isMask(opcode, "0Q00111100000iii111001iiiiiddddd", &a)) { + const char* Y[] = {"8B", "16B"}; + const char* Vd = Y[a.Q]; + snprintf(buff, sizeof(buff), "MOVI V%d.%s, #0x%x", Rd, Vd, imm); + return buff; + } + // MOV immediate (not)shifted 16bits & 32bits + if(isMask(opcode, "0Q00111100000iiif00001iiiiiddddd", &a)) { + const char* Y[] = {"2S", "4S", "4H", "8H"}; + const char* Vd = Y[(sf<<1)| a.Q]; + snprintf(buff, sizeof(buff), "MOVI V%d.%s, #0x%x", Rd, Vd, imm); + return buff; + } // Shift if(isMask(opcode, "0QU011110hhhhrrr000001nnnnnddddd", &a)) { @@ -1125,6 +1147,20 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "F%s%s V%d.%d%c, V%d.%d%c, V%d.%d%c", option?"MIN":"MAX", a.Q?"Q":"", Rd, n, s, Rn, n, s, Rm, n, s); return buff; } + // NEG + if(isMask(opcode, "0Q101110ff100000101110nnnnnddddd", &a)) { + const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"}; + const char* Vd = Y[(sf<<1) | a.Q]; + snprintf(buff, sizeof(buff), "NEG%s V%d.%s, V%d.%s", a.Q?"Q":"", Rd, Vd, Rn, Vd); + return buff; + } + // SSHL vector + if(isMask(opcode, "0QU01110ff1mmmmm010rS1nnnnnddddd", &a)) { + const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"}; + const char* Vd = Y[(sf<<1) | a.Q]; + snprintf(buff, sizeof(buff), "%c%s%sSHL%s V%d.%s, V%d.%s, V%d.%s", a.U?'U':'S', a.r?"R":"", a.S?"Q":"", a.Q?"Q":"", Rd, Vd, Rn, Vd, Rm, Vd); + return buff; + } // FCVT if(isMask(opcode, "f0011110pp10010U000000nnnnnddddd", &a)) { @@ -1300,14 +1336,6 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) return buff; } - // MOV immediate - if(isMask(opcode, "0Q00111100000iii111001iiiiiddddd", &a)) { - const char* Y[] = {"8B", "16B"}; - const char* Vd = Y[a.Q]; - snprintf(buff, sizeof(buff), "MOVI V%d.%s, #0x%x", Rd, Vd, imm); - return buff; - } - // LD1/ST1 single structure if(isMask(opcode, "0Q0011010L000000cc0Sffnnnnnttttt", &a)) { int scale = a.c; diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index 038e70c3..b06a9af2 100755 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -2136,13 +2136,14 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n GETGX(q0, 1); GETEX(q1, 0, 0); v0 = fpu_get_scratch(dyn); - VMOVeD(v0, 0, q1, 0); - VMOVeD(v0, 1, q1, 0); - SQXTN_32(v0, v0); // 2*q1 in 32bits now - NEG_32(v0, v0); // because we want SHR and not SHL - VMOVeD(v0, 1, v0, 0); - SQXTN_16(v0, v0); // 4*q1 in 32bits now - VMOVeD(v0, 1, v0, 0); + v1 = fpu_get_scratch(dyn); + SQXTN_32(v0, q1); + NEG_32(v0, v0); + MOVI_32(v1, 15); + SMIN_32(v0, v0, v1); + NEG_32(v1, v1); + SMAX_32(v0, v0, v1); // limit to -15 .. +15 values + VDUPQ_16(v0, v0, 0); // only the low 8bits will be used anyway SSHLQ_16(q0, q0, v0); break; case 0xE2: @@ -2151,11 +2152,14 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n GETGX(q0, 1); GETEX(q1, 0, 0); v0 = fpu_get_scratch(dyn); - VMOVeD(v0, 0, q1, 0); - VMOVeD(v0, 1, q1, 0); - SQXTN_32(v0, v0); // 2*q1 in 32bits now - NEG_32(v0, v0); // because we want SHR and not SHL - VMOVeD(v0, 1, v0, 0); + v1 = fpu_get_scratch(dyn); + SQXTN_32(v0, q1); + NEG_32(v0, v0); + MOVI_32(v1, 31); + SMIN_32(v0, v0, v1); + NEG_32(v1, v1); + SMAX_32(v0, v0, v1); // limit to -31 .. +31 values + VDUPQ_32(v0, v0, 0); // only the low 8bits will be used anyway SSHLQ_32(q0, q0, v0); break; case 0xE3: |