diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2025-04-21 12:22:06 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2025-04-21 12:22:06 +0200 |
| commit | 6f3f3e0e85bd55bae2ff2040e8e4eb921f8716dd (patch) | |
| tree | fac61537d7657a6d8ec48fdcf949893f51104bb3 /src | |
| parent | 2384462f61f5105921aa931855f028b1f3b5c4c6 (diff) | |
| download | box64-6f3f3e0e85bd55bae2ff2040e8e4eb921f8716dd.tar.gz box64-6f3f3e0e85bd55bae2ff2040e8e4eb921f8716dd.zip | |
[ARM64_DYNAREC] Add/Improved (V)H[ADD/SUB]P[S/D] opcodes
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/arm64_emitter.h | 6 | ||||
| -rw-r--r-- | src/dynarec/arm64/arm64_printer.c | 8 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_660f.c | 17 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f.c | 32 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c | 5 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_f20f.c | 8 |
6 files changed, 53 insertions, 23 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index 7d087fc5..1c8f0296 100644 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -1570,6 +1570,12 @@ int convert_bitmask(uint64_t bitmask); #define VFMAXQS(Vd, Vn, Vm) EMIT(FMINMAX_vector(1, 0, 0, 0, Vm, Vn, Vd)) #define VFMINQD(Vd, Vn, Vm) EMIT(FMINMAX_vector(1, 0, 1, 1, Vm, Vn, Vd)) #define VFMAXQD(Vd, Vn, Vm) EMIT(FMINMAX_vector(1, 0, 0, 1, Vm, Vn, Vd)) +#define VFMINPS(Vd, Vn, Vm) EMIT(FMINMAX_vector(0, 1, 1, 0, Vm, Vn, Vd)) +#define VFMAXPS(Vd, Vn, Vm) EMIT(FMINMAX_vector(0, 1, 0, 0, Vm, Vn, Vd)) +#define VFMINPQS(Vd, Vn, Vm) EMIT(FMINMAX_vector(1, 1, 1, 0, Vm, Vn, Vd)) +#define VFMAXPQS(Vd, Vn, Vm) EMIT(FMINMAX_vector(1, 1, 0, 0, Vm, Vn, Vd)) +#define VFMINPQD(Vd, Vn, Vm) EMIT(FMINMAX_vector(1, 1, 1, 1, Vm, Vn, Vd)) +#define VFMAXPQD(Vd, Vn, Vm) EMIT(FMINMAX_vector(1, 1, 0, 1, Vm, Vn, Vd)) #define FMINMAX_scalar(type, Rm, op, Rn, Rd) (0b11110<<24 | (type)<<22 | 1<<21 | (Rm)<<16 | 0b01<<14 | (op)<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) #define FMINS(Sd, Sn, Sm) EMIT(FMINMAX_scalar(0b00, Sm, 0b01, Sn, Sd)) diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c index 766ac6ea..a0818a78 100644 --- a/src/dynarec/arm64/arm64_printer.c +++ b/src/dynarec/arm64/arm64_printer.c @@ -1362,18 +1362,18 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "F%s %c%d, %c%d, %c%d", (option==3)?"MINNM":((option==2)?"MAXNM":((!option)?"MAX":"MIN")), s, Rd, s, Rn, s, Rm); return buff; } - if(isMask(opcode, "0Q001110of1mmmmm110001nnnnnddddd", &a)) { + if(isMask(opcode, "0QU01110of1mmmmm110001nnnnnddddd", &a)) { char s = (sf==0)?'S':((sf==1)?'D':'?'); int n = (sf==0)?2:1; n *= a.Q?2:1; - snprintf(buff, sizeof(buff), "F%sNM%s V%d.%d%c, V%d.%d%c, V%d.%d%c", option?"MIN":"MAX", a.Q?"Q":"", Rd, n, s, Rn, n, s, Rm, n, s); + snprintf(buff, sizeof(buff), "F%sNM%s%s V%d.%d%c, V%d.%d%c, V%d.%d%c", option?"MIN":"MAX", a.U?"P":"", a.Q?"Q":"", Rd, n, s, Rn, n, s, Rm, n, s); return buff; } - if(isMask(opcode, "0Q001110of1mmmmm111101nnnnnddddd", &a)) { + if(isMask(opcode, "0QU01110of1mmmmm111101nnnnnddddd", &a)) { char s = (sf==0)?'S':((sf==1)?'D':'?'); int n = (sf==0)?2:1; n *= a.Q?2:1; - snprintf(buff, sizeof(buff), "F%s%s V%d.%d%c, V%d.%d%c, V%d.%d%c", option?"MIN":"MAX", a.Q?"Q":"", Rd, n, s, Rn, n, s, Rm, n, s); + snprintf(buff, sizeof(buff), "F%s%s%s V%d.%d%c, V%d.%d%c, V%d.%d%c", option?"MIN":"MAX", a.U?"P":"", a.Q?"Q":"", Rd, n, s, Rn, n, s, Rm, n, s); return buff; } // FMADD diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index 14540eef..81dad552 100644 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -2289,10 +2289,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n v0 = fpu_get_scratch(dyn, ninst); v1 = fpu_get_scratch(dyn, ninst); // check if any input value was NAN - // but need to mix low/high part - VTRNQ1_64(v0, q1, q0); - VTRNQ2_64(v1, q1, q0); - VFMAXQD(v0, v0, v1); // propagate NAN + VFMAXPQD(v0, q1, q0); // propagate NAN VFCMEQQD(v0, v0, v0); // 0 if NAN, 1 if not NAN } VFADDPQD(q1, q1, q0); @@ -2311,7 +2308,19 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n v0 = fpu_get_scratch(dyn, ninst); VUZP1Q_64(v0, q0, q1); VUZP2Q_64(q0, q0, q1); + if(!BOX64ENV(dynarec_fastnan)) { + v1 = fpu_get_scratch(dyn, ninst); + // check if any input value was NAN + VFMAXQD(v1, v0, q0); // propagate NAN + VFCMEQQD(v1, v1, v1); // 0 if NAN, 1 if not NAN + } VFSUBQD(q0, v0, q0); + if(!BOX64ENV(dynarec_fastnan)) { + VFCMEQQD(v0, q0, q0); // 0 => out is NAN + VBICQ(v1, v1, v0); // forget it in any input was a NAN alreavy + VSHLQ_64(v1, v1, 63); // only keep the sign bit + VORRQ(q0, q0, v1); // NAN -> -NAN + } break; case 0x7E: INST_NAME("MOVD Ed,Gx"); diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c index f4424246..efffe8dd 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c @@ -1138,10 +1138,7 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); } if(!BOX64ENV(dynarec_fastnan)) { // check if any input value was NAN - // but need to mix low/high part - VTRNQ1_64(q0, v2, v1); - VTRNQ2_64(q1, v2, v1); - VFMAXQD(q0, q0, q1); // propagate NAN + VFMAXPQD(q0, v2, v1); // propagate NAN VFCMEQQD(q0, q0, q0); // 0 if NAN, 1 if not NAN } VFADDPQD(v0, v2, v1); @@ -1154,7 +1151,32 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, } if(!vex.l) YMM0(gd); break; - + case 0x7D: + INST_NAME("VHSUBPD Gx, Vx, Ex"); + nextop = F8; + q0 = fpu_get_scratch(dyn, ninst); + if(!BOX64ENV(dynarec_fastnan)) + q1 = fpu_get_scratch(dyn, ninst); + for(int l=0; l<1+vex.l; ++l) { + if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); } + VUZP1Q_64(q0, v2, v1); + VUZP2Q_64(v0, v2, v1); + if(!BOX64ENV(dynarec_fastnan)) { + // check if any input value was NAN + // but need to mix low/high part + VFMAXQD(q1, v0, q0); // propagate NAN + VFCMEQQD(q1, q1, q1); // 0 if NAN, 1 if not NAN + } + VFSUBQD(v0, q0, v0); + if(!BOX64ENV(dynarec_fastnan)) { + VFCMEQQD(q0, v0, v0); // 0 => out is NAN + VBICQ(q1, q1, q0); // forget it in any input was a NAN already + VSHLQ_64(q1, q1, 63); // only keep the sign bit + VORRQ(v0, v0, q1); // NAN -> -NAN + } + } + if(!vex.l) YMM0(gd); + break; case 0x7E: INST_NAME("VMOVD Ed,Gx"); nextop = F8; diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c index 156d9243..a94574a8 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c @@ -429,10 +429,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); } if(!BOX64ENV(dynarec_fastnan)) { // check if any input value was NAN - // but need to mix low/high part - VUZP1Q_32(q0, v2, v1); - VUZP2Q_32(q1, v2, v1); - VFMAXQS(q0, q0, q1); // propagate NAN + VFMAXPQS(q0, v2, v1); // propagate NAN VFCMEQQS(q0, q0, q0); // 0 if NAN, 1 if not NAN } VFADDPQS(v0, v2, v1); diff --git a/src/dynarec/arm64/dynarec_arm64_f20f.c b/src/dynarec/arm64/dynarec_arm64_f20f.c index 03240803..d329f560 100644 --- a/src/dynarec/arm64/dynarec_arm64_f20f.c +++ b/src/dynarec/arm64/dynarec_arm64_f20f.c @@ -427,10 +427,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n v0 = fpu_get_scratch(dyn, ninst); v1 = fpu_get_scratch(dyn, ninst); // check if any input value was NAN - // but need to mix low/high part - VUZP1Q_32(v0, q0, q1); - VUZP2Q_32(v1, q0, q1); - VFMAXQS(v0, v0, v1); // propagate NAN + VFMAXPQS(v0, q1, q0); // propagate NAN VFCMEQQS(v0, v0, v0); // 0 if NAN, 1 if not NAN } VFADDPQS(q1, q1, q0); @@ -452,8 +449,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n if(!BOX64ENV(dynarec_fastnan)) { d1 = fpu_get_scratch(dyn, ninst); // check if any input value was NAN - // but need to mix low/high part - VFMAXQS(d1, v0, d0); // propagate NAN + VFMAXQS(d1, d0, v0); // propagate NAN VFCMEQQS(d1, d1, d1); // 0 if NAN, 1 if not NAN } VFSUBQS(v0, d0, v0); |