about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2022-04-01 19:27:27 +0200
committerptitSeb <sebastien.chev@gmail.com>2022-04-01 19:27:27 +0200
commitf2012fc6365c338b977a6e6a230e1d9d7c750d51 (patch)
tree584792a0b26327fdde1da550b1b06e93587a2332 /src
parent16f82ba6b3a447fca0d9d1c56098cc1aace10d2c (diff)
downloadbox64-f2012fc6365c338b977a6e6a230e1d9d7c750d51.tar.gz
box64-f2012fc6365c338b977a6e6a230e1d9d7c750d51.zip
Added more sse2 opcode to test17, and added nan handling to SQRTSD and MULSD ([DYNAREC] too)
Diffstat (limited to 'src')
-rwxr-xr-xsrc/dynarec/arm64/arm64_emitter.h37
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_0f.c20
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_660f.c28
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_f20f.c30
-rw-r--r--src/emu/x64run0f.c14
-rw-r--r--src/emu/x64runf20f.c11
6 files changed, 99 insertions, 41 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h
index b69ef087..437b2130 100755
--- a/src/dynarec/arm64/arm64_emitter.h
+++ b/src/dynarec/arm64/arm64_emitter.h
@@ -1467,18 +1467,35 @@
 // Vector Float CMP
 // EQual
 #define FCMP_vector(Q, U, E, sz, Rm, ac, Rn, Rd)    ((Q)<<30 | (U)<<29 | 0b01110<<24 | (E)<<23 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b1110<<12 | (ac)<<11 | 1<<10 | (Rn)<<5 | (Rd))
-#define FCMEQQD(Rd, Rn, Rm)          EMIT(FCMP_vector(1, 0, 0, 1, Rm, 0, Rn, Rd))
-#define FCMEQQS(Rd, Rn, Rm)          EMIT(FCMP_vector(1, 0, 0, 0, Rm, 0, Rn, Rd))
+#define VFCMEQQD(Rd, Rn, Rm)         EMIT(FCMP_vector(1, 0, 0, 1, Rm, 0, Rn, Rd))
+#define VFCMEQQS(Rd, Rn, Rm)         EMIT(FCMP_vector(1, 0, 0, 0, Rm, 0, Rn, Rd))
 // Greater or Equal
-#define FCMGEQD(Rd, Rn, Rm)          EMIT(FCMP_vector(1, 1, 0, 1, Rm, 0, Rn, Rd))
-#define FCMGEQS(Rd, Rn, Rm)          EMIT(FCMP_vector(1, 1, 0, 0, Rm, 0, Rn, Rd))
-#define FCMGEQD_ABS(Rd, Rn, Rm)      EMIT(FCMP_vector(1, 1, 0, 1, Rm, 1, Rn, Rd))
-#define FCMGEQS_ABS(Rd, Rn, Rm)      EMIT(FCMP_vector(1, 1, 0, 0, Rm, 1, Rn, Rd))
+#define VFCMGEQD(Rd, Rn, Rm)         EMIT(FCMP_vector(1, 1, 0, 1, Rm, 0, Rn, Rd))
+#define VFCMGEQS(Rd, Rn, Rm)         EMIT(FCMP_vector(1, 1, 0, 0, Rm, 0, Rn, Rd))
+#define VFCMGEQD_ABS(Rd, Rn, Rm)     EMIT(FCMP_vector(1, 1, 0, 1, Rm, 1, Rn, Rd))
+#define VFCMGEQS_ABS(Rd, Rn, Rm)     EMIT(FCMP_vector(1, 1, 0, 0, Rm, 1, Rn, Rd))
 // Greater Than
-#define FCMGTQD(Rd, Rn, Rm)          EMIT(FCMP_vector(1, 1, 1, 1, Rm, 0, Rn, Rd))
-#define FCMGTQS(Rd, Rn, Rm)          EMIT(FCMP_vector(1, 1, 1, 0, Rm, 0, Rn, Rd))
-#define FCMGTQD_ABS(Rd, Rn, Rm)      EMIT(FCMP_vector(1, 1, 1, 1, Rm, 1, Rn, Rd))
-#define FCMGTQS_ABS(Rd, Rn, Rm)      EMIT(FCMP_vector(1, 1, 1, 0, Rm, 1, Rn, Rd))
+#define VFCMGTQD(Rd, Rn, Rm)         EMIT(FCMP_vector(1, 1, 1, 1, Rm, 0, Rn, Rd))
+#define VFCMGTQS(Rd, Rn, Rm)         EMIT(FCMP_vector(1, 1, 1, 0, Rm, 0, Rn, Rd))
+#define VFCMGTQD_ABS(Rd, Rn, Rm)     EMIT(FCMP_vector(1, 1, 1, 1, Rm, 1, Rn, Rd))
+#define VFCMGTQS_ABS(Rd, Rn, Rm)     EMIT(FCMP_vector(1, 1, 1, 0, Rm, 1, Rn, Rd))
+
+// Scalar Float CMP to 0
+#define FCMP_0_scalar(U, sz, op, Rn, Rd) (0b01<<30 | (U)<<29| 0b11110<<24 | 1<<23 | (sz)<<22 | 0b10000<<17 | 0b011<<14 | (op)<<12 | 0b10<<10 | (Rn)<<5 | (Rd))
+// Less or equal to 0
+#define FCMLES_0(Rd, Rn)             EMIT(FCMP_0_scalar(1, 0, 0b01, (Rn), (Rd)))
+#define FCMLED_0(Rd, Rn)             EMIT(FCMP_0_scalar(1, 1, 0b01, (Rn), (Rd)))
+// Greater than 0
+#define FCMGTS_0(Rd, Rn)             EMIT(FCMP_0_scalar(0, 0, 0b00, (Rn), (Rd)))
+#define FCMGTD_0(Rd, Rn)             EMIT(FCMP_0_scalar(0, 1, 0b00, (Rn), (Rd)))
+// Less than 0
+#define FCMLTS_0(Rd, Rn)             EMIT(FCMP_0_scalar(0, 0, 0b10, (Rn), (Rd)))
+#define FCMLTD_0(Rd, Rn)             EMIT(FCMP_0_scalar(0, 1, 0b10, (Rn), (Rd)))
+
+// Scalar Float CMP
+#define FCMP_op_scalar(U, E, sz, Rm, ac, Rn, Rd)    (0b01<<30 | (U)<<29 | 0b11110<<24 | (E)<<23 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b1110<<12 | (ac<<11 | 1<<10 | (Rn)<<5 | (Rd)))
+#define FCMEQS(Rd, Rn, Rm)          EMIT(FCMP_op_scalar(1, 0, 0, (Rm), 0, (Rn), (Rd)))
+#define FCMEQD(Rd, Rn, Rm)          EMIT(FCMP_op_scalar(1, 1, 0, (Rm), 0, (Rn), (Rd)))
 
 // UMULL / SMULL
 #define MULL_vector(Q, U, size, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b1100<<12 |(Rn)<<5 |(Rd))
diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c
index b726db9a..df36e90f 100755
--- a/src/dynarec/arm64/dynarec_arm64_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_0f.c
@@ -1540,24 +1540,24 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             u8 = F8;

             switch(u8&7) {

                 // the inversion of the params in the comparison is there to handle NaN the same way SSE does

-                case 0: FCMEQQS(v0, v0, v1); break;   // Equal

-                case 1: FCMGTQS(v0, v1, v0); break;   // Less than

-                case 2: FCMGEQS(v0, v1, v0); break;   // Less or equal

-                case 3: FCMEQQS(v0, v0, v0); 

+                case 0: VFCMEQQS(v0, v0, v1); break;   // Equal

+                case 1: VFCMGTQS(v0, v1, v0); break;   // Less than

+                case 2: VFCMGEQS(v0, v1, v0); break;   // Less or equal

+                case 3: VFCMEQQS(v0, v0, v0); 

                         if(v0!=v1) {

                             q0 = fpu_get_scratch(dyn); 

-                            FCMEQQS(q0, v1, v1); 

+                            VFCMEQQS(q0, v1, v1); 

                             VANDQ(v0, v0, q0);

                         }

                         VMVNQ(v0, v0); 

                         break;   // NaN (NaN is not equal to himself)

-                case 4: FCMEQQS(v0, v0, v1); VMVNQ(v0, v0); break;   // Not Equal (or unordered on ARM, not on X86...)

-                case 5: FCMGTQS(v0, v1, v0); VMVNQ(v0, v0); break;   // Greater or equal or unordered

-                case 6: FCMGEQS(v0, v1, v0); VMVNQ(v0, v0); break;   // Greater or unordered

-                case 7: FCMEQQS(v0, v0, v0); 

+                case 4: VFCMEQQS(v0, v0, v1); VMVNQ(v0, v0); break;   // Not Equal (or unordered on ARM, not on X86...)

+                case 5: VFCMGTQS(v0, v1, v0); VMVNQ(v0, v0); break;   // Greater or equal or unordered

+                case 6: VFCMGEQS(v0, v1, v0); VMVNQ(v0, v0); break;   // Greater or unordered

+                case 7: VFCMEQQS(v0, v0, v0); 

                         if(v0!=v1) {

                             q0 = fpu_get_scratch(dyn); 

-                            FCMEQQS(q0, v1, v1); 

+                            VFCMEQQS(q0, v1, v1); 

                             VANDQ(v0, v0, q0);

                         }

                         break;   // not NaN

diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c
index b90c27cb..b90e49a9 100755
--- a/src/dynarec/arm64/dynarec_arm64_660f.c
+++ b/src/dynarec/arm64/dynarec_arm64_660f.c
@@ -709,11 +709,11 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 v1 = fpu_get_scratch(dyn);

                 // check if any input value was NAN

                 VFMAXQD(v0, q0, q1);    // propagate NAN

-                FCMEQQD(v0, v0, v0);    // 0 if NAN, 1 if not NAN

+                VFCMEQQD(v0, v0, v0);    // 0 if NAN, 1 if not NAN

             }

             VFMULQD(q1, q1, q0);

             if(!box64_dynarec_fastnan) {

-                FCMEQQD(v1, q1, q1);    // 0 => out is NAN

+                VFCMEQQD(v1, q1, q1);    // 0 => out is NAN

                 VBICQ(v1, v0, v1);      // forget it in any input was a NAN already

                 VSHLQ_64(v1, v1, 63);   // only keep the sign bit

                 VORRQ(q1, q1, v1);      // NAN -> -NAN

@@ -775,11 +775,11 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 v1 = fpu_get_scratch(dyn);

                 // check if any input value was NAN

                 VFMAXQD(v0, q0, q1);    // propagate NAN

-                FCMEQQD(v0, v0, v0);    // 0 if NAN, 1 if not NAN

+                VFCMEQQD(v0, v0, v0);    // 0 if NAN, 1 if not NAN

             }

             VFDIVQD(q1, q1, q0);

             if(!box64_dynarec_fastnan) {

-                FCMEQQD(v1, q1, q1);    // 0 => out is NAN

+                VFCMEQQD(v1, q1, q1);    // 0 => out is NAN

                 VBICQ(v1, v0, v1);      // forget it in any input was a NAN already

                 VSHLQ_64(v1, v1, 63);   // only keep the sign bit

                 VORRQ(q1, q1, v1);      // NAN -> -NAN

@@ -1516,24 +1516,24 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             u8 = F8;

             switch(u8&7) {

                 // the inversion of the params in the comparison is there to handle NaN the same way SSE does

-                case 0: FCMEQQD(v0, v0, v1); break;   // Equal

-                case 1: FCMGTQD(v0, v1, v0); break;   // Less than

-                case 2: FCMGEQD(v0, v1, v0); break;   // Less or equal

-                case 3: FCMEQQD(v0, v0, v0); 

+                case 0: VFCMEQQD(v0, v0, v1); break;   // Equal

+                case 1: VFCMGTQD(v0, v1, v0); break;   // Less than

+                case 2: VFCMGEQD(v0, v1, v0); break;   // Less or equal

+                case 3: VFCMEQQD(v0, v0, v0); 

                         if(v0!=v1) {

                             q0 = fpu_get_scratch(dyn); 

-                            FCMEQQD(q0, v1, v1); 

+                            VFCMEQQD(q0, v1, v1); 

                             VANDQ(v0, v0, q0);

                         }

                         VMVNQ(v0, v0); 

                         break;   // NaN (NaN is not equal to himself)

-                case 4: FCMEQQD(v0, v0, v1); VMVNQ(v0, v0); break;   // Not Equal (or unordered on ARM, not on X86...)

-                case 5: FCMGTQD(v0, v1, v0); VMVNQ(v0, v0); break;   // Greater or equal or unordered

-                case 6: FCMGEQD(v0, v1, v0); VMVNQ(v0, v0); break;   // Greater or unordered

-                case 7: FCMEQQD(v0, v0, v0); 

+                case 4: VFCMEQQD(v0, v0, v1); VMVNQ(v0, v0); break;   // Not Equal (or unordered on ARM, not on X86...)

+                case 5: VFCMGTQD(v0, v1, v0); VMVNQ(v0, v0); break;   // Greater or equal or unordered

+                case 6: VFCMGEQD(v0, v1, v0); VMVNQ(v0, v0); break;   // Greater or unordered

+                case 7: VFCMEQQD(v0, v0, v0); 

                         if(v0!=v1) {

                             q0 = fpu_get_scratch(dyn); 

-                            FCMEQQD(q0, v1, v1); 

+                            VFCMEQQD(q0, v1, v1); 

                             VANDQ(v0, v0, q0);

                         }

                         break;   // not NaN

diff --git a/src/dynarec/arm64/dynarec_arm64_f20f.c b/src/dynarec/arm64/dynarec_arm64_f20f.c
index cf047a10..47e88d40 100755
--- a/src/dynarec/arm64/dynarec_arm64_f20f.c
+++ b/src/dynarec/arm64/dynarec_arm64_f20f.c
@@ -170,7 +170,16 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             GETGX(v0);

             d1 = fpu_get_scratch(dyn);

             GETEX(d0, 0);

+            if(!box64_dynarec_fastnan) {

+                v1 = fpu_get_scratch(dyn);

+                FCMLTD_0(v1, d0);

+                USHR_64(v1, v1, 63);

+                SHL_64(v1, v1, 63);

+            }

             FSQRTD(d1, d0);

+            if(!box64_dynarec_fastnan) {

+                VORR(d1, d1, v1);

+            }

             VMOVeD(v0, 0, d1, 0);

             break;

 

@@ -186,11 +195,24 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
         case 0x59:

             INST_NAME("MULSD Gx, Ex");

             nextop = F8;

-            GETGX(v0);

-            d1 = fpu_get_scratch(dyn);

+            GETGX(d1);

+            v1 = fpu_get_scratch(dyn);

             GETEX(d0, 0);

-            FMULD(d1, v0, d0);

-            VMOVeD(v0, 0, d1, 0);

+            if(!box64_dynarec_fastnan) {

+                v0 = fpu_get_scratch(dyn);

+                q0 = fpu_get_scratch(dyn);

+                // check if any input value was NAN

+                FMAXD(v0, d0, d1);    // propagate NAN

+                FCMEQD(v0, v0, v0);    // 0 if NAN, 1 if not NAN

+            }

+            FMULD(v1, d1, d0);

+            if(!box64_dynarec_fastnan) {

+                FCMEQD(q0, d1, d1);    // 0 => out is NAN

+                VBIC(q0, v0, q0);      // forget it in any input was a NAN already

+                SHL_64(q0, q0, 63);   // only keep the sign bit

+                VORR(d1, d1, q0);      // NAN -> -NAN

+            }

+            VMOVeD(d1, 0, v1, 0);

             break;

         case 0x5A:

             INST_NAME("CVTSD2SS Gx, Ex");

diff --git a/src/emu/x64run0f.c b/src/emu/x64run0f.c
index 7546957f..c2b8dcaa 100644
--- a/src/emu/x64run0f.c
+++ b/src/emu/x64run0f.c
@@ -366,8 +366,18 @@ int Run0F(x64emu_t *emu, rex_t rex)
             nextop = F8;

             GETEX(0);

             GETGX;

-            for(int i=0; i<4; ++i)

-                GX->f[i] = 1.0f/sqrtf(EX->f[i]);

+            for(int i=0; i<4; ++i) {

+                if(EX->f[i]==0)

+                    GX->f[i] = 1.0f/EX->f[i];

+                else if (EX->f[i]<0)

+                    GX->f[i] = NAN;

+                else if (isnan(EX->f[i]))

+                    GX->f[i] = EX->f[i];

+                else if (isinf(EX->f[i]))

+                    GX->f[i] = 0.0;

+                else

+                    GX->f[i] = 1.0f/sqrtf(EX->f[i]);

+            }

             break;

         case 0x53:                      /* RCPPS Gx, Ex */

             nextop = F8;

diff --git a/src/emu/x64runf20f.c b/src/emu/x64runf20f.c
index eff4ca5f..c441ee27 100644
--- a/src/emu/x64runf20f.c
+++ b/src/emu/x64runf20f.c
@@ -124,7 +124,10 @@ int RunF20F(x64emu_t *emu, rex_t rex)
         nextop = F8;

         GETEX(0);

         GETGX;

-        GX->d[0] = sqrt(EX->d[0]);

+        if(EX->d[0]<0.0 )

+            GX->d[0] = -NAN;

+        else

+            GX->d[0] = sqrt(EX->d[0]);

         break;

 

     case 0x58:  /* ADDSD Gx, Ex */

@@ -137,6 +140,12 @@ int RunF20F(x64emu_t *emu, rex_t rex)
         nextop = F8;

         GETEX(0);

         GETGX;

+        #ifndef NOALIGN

+            // mul generate a -NAN only if doing (+/-)inf * (+/-)0

+            if((isinf(GX->d[0]) && EX->d[0]==0.0) || (isinf(EX->d[0]) && GX->d[0]==0.0))

+                GX->d[0] = -NAN;

+            else

+        #endif

         GX->d[0] *= EX->d[0];

         break;

     case 0x5A:  /* CVTSD2SS Gx, Ex */