diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2022-04-01 19:27:27 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2022-04-01 19:27:27 +0200 |
| commit | f2012fc6365c338b977a6e6a230e1d9d7c750d51 (patch) | |
| tree | 584792a0b26327fdde1da550b1b06e93587a2332 /src | |
| parent | 16f82ba6b3a447fca0d9d1c56098cc1aace10d2c (diff) | |
| download | box64-f2012fc6365c338b977a6e6a230e1d9d7c750d51.tar.gz box64-f2012fc6365c338b977a6e6a230e1d9d7c750d51.zip | |
Added more sse2 opcode to test17, and added nan handling to SQRTSD and MULSD ([DYNAREC] too)
Diffstat (limited to 'src')
| -rwxr-xr-x | src/dynarec/arm64/arm64_emitter.h | 37 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_0f.c | 20 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_660f.c | 28 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_f20f.c | 30 | ||||
| -rw-r--r-- | src/emu/x64run0f.c | 14 | ||||
| -rw-r--r-- | src/emu/x64runf20f.c | 11 |
6 files changed, 99 insertions, 41 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index b69ef087..437b2130 100755 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -1467,18 +1467,35 @@ // Vector Float CMP // EQual #define FCMP_vector(Q, U, E, sz, Rm, ac, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (E)<<23 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b1110<<12 | (ac)<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define FCMEQQD(Rd, Rn, Rm) EMIT(FCMP_vector(1, 0, 0, 1, Rm, 0, Rn, Rd)) -#define FCMEQQS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 0, 0, 0, Rm, 0, Rn, Rd)) +#define VFCMEQQD(Rd, Rn, Rm) EMIT(FCMP_vector(1, 0, 0, 1, Rm, 0, Rn, Rd)) +#define VFCMEQQS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 0, 0, 0, Rm, 0, Rn, Rd)) // Greater or Equal -#define FCMGEQD(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 0, 1, Rm, 0, Rn, Rd)) -#define FCMGEQS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 0, 0, Rm, 0, Rn, Rd)) -#define FCMGEQD_ABS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 0, 1, Rm, 1, Rn, Rd)) -#define FCMGEQS_ABS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 0, 0, Rm, 1, Rn, Rd)) +#define VFCMGEQD(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 0, 1, Rm, 0, Rn, Rd)) +#define VFCMGEQS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 0, 0, Rm, 0, Rn, Rd)) +#define VFCMGEQD_ABS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 0, 1, Rm, 1, Rn, Rd)) +#define VFCMGEQS_ABS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 0, 0, Rm, 1, Rn, Rd)) // Greater Than -#define FCMGTQD(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 1, 1, Rm, 0, Rn, Rd)) -#define FCMGTQS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 1, 0, Rm, 0, Rn, Rd)) -#define FCMGTQD_ABS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 1, 1, Rm, 1, Rn, Rd)) -#define FCMGTQS_ABS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 1, 0, Rm, 1, Rn, Rd)) +#define VFCMGTQD(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 1, 1, Rm, 0, Rn, Rd)) +#define VFCMGTQS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 1, 0, Rm, 0, Rn, Rd)) +#define VFCMGTQD_ABS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 1, 1, Rm, 1, Rn, Rd)) +#define VFCMGTQS_ABS(Rd, Rn, Rm) EMIT(FCMP_vector(1, 1, 1, 0, Rm, 1, Rn, Rd)) + +// Scalar Float CMP to 0 +#define FCMP_0_scalar(U, sz, op, Rn, Rd) (0b01<<30 | (U)<<29| 0b11110<<24 | 1<<23 | (sz)<<22 | 0b10000<<17 | 0b011<<14 | (op)<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +// Less or equal to 0 +#define FCMLES_0(Rd, Rn) EMIT(FCMP_0_scalar(1, 0, 0b01, (Rn), (Rd))) +#define FCMLED_0(Rd, Rn) EMIT(FCMP_0_scalar(1, 1, 0b01, (Rn), (Rd))) +// Greater than 0 +#define FCMGTS_0(Rd, Rn) EMIT(FCMP_0_scalar(0, 0, 0b00, (Rn), (Rd))) +#define FCMGTD_0(Rd, Rn) EMIT(FCMP_0_scalar(0, 1, 0b00, (Rn), (Rd))) +// Less than 0 +#define FCMLTS_0(Rd, Rn) EMIT(FCMP_0_scalar(0, 0, 0b10, (Rn), (Rd))) +#define FCMLTD_0(Rd, Rn) EMIT(FCMP_0_scalar(0, 1, 0b10, (Rn), (Rd))) + +// Scalar Float CMP +#define FCMP_op_scalar(U, E, sz, Rm, ac, Rn, Rd) (0b01<<30 | (U)<<29 | 0b11110<<24 | (E)<<23 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b1110<<12 | (ac<<11 | 1<<10 | (Rn)<<5 | (Rd))) +#define FCMEQS(Rd, Rn, Rm) EMIT(FCMP_op_scalar(1, 0, 0, (Rm), 0, (Rn), (Rd))) +#define FCMEQD(Rd, Rn, Rm) EMIT(FCMP_op_scalar(1, 1, 0, (Rm), 0, (Rn), (Rd))) // UMULL / SMULL #define MULL_vector(Q, U, size, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b1100<<12 |(Rn)<<5 |(Rd)) diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index b726db9a..df36e90f 100755 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -1540,24 +1540,24 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin u8 = F8; switch(u8&7) { // the inversion of the params in the comparison is there to handle NaN the same way SSE does - case 0: FCMEQQS(v0, v0, v1); break; // Equal - case 1: FCMGTQS(v0, v1, v0); break; // Less than - case 2: FCMGEQS(v0, v1, v0); break; // Less or equal - case 3: FCMEQQS(v0, v0, v0); + case 0: VFCMEQQS(v0, v0, v1); break; // Equal + case 1: VFCMGTQS(v0, v1, v0); break; // Less than + case 2: VFCMGEQS(v0, v1, v0); break; // Less or equal + case 3: VFCMEQQS(v0, v0, v0); if(v0!=v1) { q0 = fpu_get_scratch(dyn); - FCMEQQS(q0, v1, v1); + VFCMEQQS(q0, v1, v1); VANDQ(v0, v0, q0); } VMVNQ(v0, v0); break; // NaN (NaN is not equal to himself) - case 4: FCMEQQS(v0, v0, v1); VMVNQ(v0, v0); break; // Not Equal (or unordered on ARM, not on X86...) - case 5: FCMGTQS(v0, v1, v0); VMVNQ(v0, v0); break; // Greater or equal or unordered - case 6: FCMGEQS(v0, v1, v0); VMVNQ(v0, v0); break; // Greater or unordered - case 7: FCMEQQS(v0, v0, v0); + case 4: VFCMEQQS(v0, v0, v1); VMVNQ(v0, v0); break; // Not Equal (or unordered on ARM, not on X86...) + case 5: VFCMGTQS(v0, v1, v0); VMVNQ(v0, v0); break; // Greater or equal or unordered + case 6: VFCMGEQS(v0, v1, v0); VMVNQ(v0, v0); break; // Greater or unordered + case 7: VFCMEQQS(v0, v0, v0); if(v0!=v1) { q0 = fpu_get_scratch(dyn); - FCMEQQS(q0, v1, v1); + VFCMEQQS(q0, v1, v1); VANDQ(v0, v0, q0); } break; // not NaN diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index b90c27cb..b90e49a9 100755 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -709,11 +709,11 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n v1 = fpu_get_scratch(dyn); // check if any input value was NAN VFMAXQD(v0, q0, q1); // propagate NAN - FCMEQQD(v0, v0, v0); // 0 if NAN, 1 if not NAN + VFCMEQQD(v0, v0, v0); // 0 if NAN, 1 if not NAN } VFMULQD(q1, q1, q0); if(!box64_dynarec_fastnan) { - FCMEQQD(v1, q1, q1); // 0 => out is NAN + VFCMEQQD(v1, q1, q1); // 0 => out is NAN VBICQ(v1, v0, v1); // forget it in any input was a NAN already VSHLQ_64(v1, v1, 63); // only keep the sign bit VORRQ(q1, q1, v1); // NAN -> -NAN @@ -775,11 +775,11 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n v1 = fpu_get_scratch(dyn); // check if any input value was NAN VFMAXQD(v0, q0, q1); // propagate NAN - FCMEQQD(v0, v0, v0); // 0 if NAN, 1 if not NAN + VFCMEQQD(v0, v0, v0); // 0 if NAN, 1 if not NAN } VFDIVQD(q1, q1, q0); if(!box64_dynarec_fastnan) { - FCMEQQD(v1, q1, q1); // 0 => out is NAN + VFCMEQQD(v1, q1, q1); // 0 => out is NAN VBICQ(v1, v0, v1); // forget it in any input was a NAN already VSHLQ_64(v1, v1, 63); // only keep the sign bit VORRQ(q1, q1, v1); // NAN -> -NAN @@ -1516,24 +1516,24 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n u8 = F8; switch(u8&7) { // the inversion of the params in the comparison is there to handle NaN the same way SSE does - case 0: FCMEQQD(v0, v0, v1); break; // Equal - case 1: FCMGTQD(v0, v1, v0); break; // Less than - case 2: FCMGEQD(v0, v1, v0); break; // Less or equal - case 3: FCMEQQD(v0, v0, v0); + case 0: VFCMEQQD(v0, v0, v1); break; // Equal + case 1: VFCMGTQD(v0, v1, v0); break; // Less than + case 2: VFCMGEQD(v0, v1, v0); break; // Less or equal + case 3: VFCMEQQD(v0, v0, v0); if(v0!=v1) { q0 = fpu_get_scratch(dyn); - FCMEQQD(q0, v1, v1); + VFCMEQQD(q0, v1, v1); VANDQ(v0, v0, q0); } VMVNQ(v0, v0); break; // NaN (NaN is not equal to himself) - case 4: FCMEQQD(v0, v0, v1); VMVNQ(v0, v0); break; // Not Equal (or unordered on ARM, not on X86...) - case 5: FCMGTQD(v0, v1, v0); VMVNQ(v0, v0); break; // Greater or equal or unordered - case 6: FCMGEQD(v0, v1, v0); VMVNQ(v0, v0); break; // Greater or unordered - case 7: FCMEQQD(v0, v0, v0); + case 4: VFCMEQQD(v0, v0, v1); VMVNQ(v0, v0); break; // Not Equal (or unordered on ARM, not on X86...) + case 5: VFCMGTQD(v0, v1, v0); VMVNQ(v0, v0); break; // Greater or equal or unordered + case 6: VFCMGEQD(v0, v1, v0); VMVNQ(v0, v0); break; // Greater or unordered + case 7: VFCMEQQD(v0, v0, v0); if(v0!=v1) { q0 = fpu_get_scratch(dyn); - FCMEQQD(q0, v1, v1); + VFCMEQQD(q0, v1, v1); VANDQ(v0, v0, q0); } break; // not NaN diff --git a/src/dynarec/arm64/dynarec_arm64_f20f.c b/src/dynarec/arm64/dynarec_arm64_f20f.c index cf047a10..47e88d40 100755 --- a/src/dynarec/arm64/dynarec_arm64_f20f.c +++ b/src/dynarec/arm64/dynarec_arm64_f20f.c @@ -170,7 +170,16 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n GETGX(v0); d1 = fpu_get_scratch(dyn); GETEX(d0, 0); + if(!box64_dynarec_fastnan) { + v1 = fpu_get_scratch(dyn); + FCMLTD_0(v1, d0); + USHR_64(v1, v1, 63); + SHL_64(v1, v1, 63); + } FSQRTD(d1, d0); + if(!box64_dynarec_fastnan) { + VORR(d1, d1, v1); + } VMOVeD(v0, 0, d1, 0); break; @@ -186,11 +195,24 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n case 0x59: INST_NAME("MULSD Gx, Ex"); nextop = F8; - GETGX(v0); - d1 = fpu_get_scratch(dyn); + GETGX(d1); + v1 = fpu_get_scratch(dyn); GETEX(d0, 0); - FMULD(d1, v0, d0); - VMOVeD(v0, 0, d1, 0); + if(!box64_dynarec_fastnan) { + v0 = fpu_get_scratch(dyn); + q0 = fpu_get_scratch(dyn); + // check if any input value was NAN + FMAXD(v0, d0, d1); // propagate NAN + FCMEQD(v0, v0, v0); // 0 if NAN, 1 if not NAN + } + FMULD(v1, d1, d0); + if(!box64_dynarec_fastnan) { + FCMEQD(q0, d1, d1); // 0 => out is NAN + VBIC(q0, v0, q0); // forget it in any input was a NAN already + SHL_64(q0, q0, 63); // only keep the sign bit + VORR(d1, d1, q0); // NAN -> -NAN + } + VMOVeD(d1, 0, v1, 0); break; case 0x5A: INST_NAME("CVTSD2SS Gx, Ex"); diff --git a/src/emu/x64run0f.c b/src/emu/x64run0f.c index 7546957f..c2b8dcaa 100644 --- a/src/emu/x64run0f.c +++ b/src/emu/x64run0f.c @@ -366,8 +366,18 @@ int Run0F(x64emu_t *emu, rex_t rex) nextop = F8; GETEX(0); GETGX; - for(int i=0; i<4; ++i) - GX->f[i] = 1.0f/sqrtf(EX->f[i]); + for(int i=0; i<4; ++i) { + if(EX->f[i]==0) + GX->f[i] = 1.0f/EX->f[i]; + else if (EX->f[i]<0) + GX->f[i] = NAN; + else if (isnan(EX->f[i])) + GX->f[i] = EX->f[i]; + else if (isinf(EX->f[i])) + GX->f[i] = 0.0; + else + GX->f[i] = 1.0f/sqrtf(EX->f[i]); + } break; case 0x53: /* RCPPS Gx, Ex */ nextop = F8; diff --git a/src/emu/x64runf20f.c b/src/emu/x64runf20f.c index eff4ca5f..c441ee27 100644 --- a/src/emu/x64runf20f.c +++ b/src/emu/x64runf20f.c @@ -124,7 +124,10 @@ int RunF20F(x64emu_t *emu, rex_t rex) nextop = F8; GETEX(0); GETGX; - GX->d[0] = sqrt(EX->d[0]); + if(EX->d[0]<0.0 ) + GX->d[0] = -NAN; + else + GX->d[0] = sqrt(EX->d[0]); break; case 0x58: /* ADDSD Gx, Ex */ @@ -137,6 +140,12 @@ int RunF20F(x64emu_t *emu, rex_t rex) nextop = F8; GETEX(0); GETGX; + #ifndef NOALIGN + // mul generate a -NAN only if doing (+/-)inf * (+/-)0 + if((isinf(GX->d[0]) && EX->d[0]==0.0) || (isinf(EX->d[0]) && GX->d[0]==0.0)) + GX->d[0] = -NAN; + else + #endif GX->d[0] *= EX->d[0]; break; case 0x5A: /* CVTSD2SS Gx, Ex */ |