about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2022-04-01 20:41:51 +0200
committerptitSeb <sebastien.chev@gmail.com>2022-04-01 20:41:51 +0200
commitead3217bf47f328ee7cce78a3080112f0f7a804b (patch)
tree1d9c2a8f8bd3edabb6cfb7b9b75ef7e65bbb5d15
parentf2012fc6365c338b977a6e6a230e1d9d7c750d51 (diff)
downloadbox64-ead3217bf47f328ee7cce78a3080112f0f7a804b.tar.gz
box64-ead3217bf47f328ee7cce78a3080112f0f7a804b.zip
More work on test17, and handling -NAN on divsd
-rwxr-xr-xsrc/dynarec/arm64/arm64_emitter.h6
-rwxr-xr-xsrc/dynarec/arm64/arm64_printer.c2
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_f20f.c24
-rw-r--r--tests/ref17.txt60
-rwxr-xr-xtests/test17bin221720 -> 254272 bytes
-rw-r--r--tests/test17.c10
6 files changed, 91 insertions, 11 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h
index 437b2130..9a094eca 100755
--- a/src/dynarec/arm64/arm64_emitter.h
+++ b/src/dynarec/arm64/arm64_emitter.h
@@ -1493,9 +1493,9 @@
 #define FCMLTD_0(Rd, Rn)             EMIT(FCMP_0_scalar(0, 1, 0b10, (Rn), (Rd)))
 
 // Scalar Float CMP
-#define FCMP_op_scalar(U, E, sz, Rm, ac, Rn, Rd)    (0b01<<30 | (U)<<29 | 0b11110<<24 | (E)<<23 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b1110<<12 | (ac<<11 | 1<<10 | (Rn)<<5 | (Rd)))
-#define FCMEQS(Rd, Rn, Rm)          EMIT(FCMP_op_scalar(1, 0, 0, (Rm), 0, (Rn), (Rd)))
-#define FCMEQD(Rd, Rn, Rm)          EMIT(FCMP_op_scalar(1, 1, 0, (Rm), 0, (Rn), (Rd)))
+#define FCMP_op_scalar(U, E, sz, Rm, ac, Rn, Rd)    (0b01<<30 | (U)<<29 | 0b11110<<24 | (E)<<23 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b1110<<12 | (ac)<<11 | 1<<10 | (Rn)<<5 | (Rd))
+#define FCMEQS(Rd, Rn, Rm)          EMIT(FCMP_op_scalar(0, 0, 0, (Rm), 0, (Rn), (Rd)))
+#define FCMEQD(Rd, Rn, Rm)          EMIT(FCMP_op_scalar(0, 0, 1, (Rm), 0, (Rn), (Rd)))
 
 // UMULL / SMULL
 #define MULL_vector(Q, U, size, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b1100<<12 |(Rn)<<5 |(Rd))
diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c
index 9e47c5ea..a156c00e 100755
--- a/src/dynarec/arm64/arm64_printer.c
+++ b/src/dynarec/arm64/arm64_printer.c
@@ -1051,7 +1051,7 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
     //FMIN/FMAX

     if(isMask(opcode, "00011110ff1mmmmm01oo10nnnnnddddd", &a)) {

         char s = (sf==0)?'S':((sf==1)?'D':'?');

-        snprintf(buff, sizeof(buff), "F%s %c%d, %c%d, %c%d", (option==3)?"MINNM":((option==2)?"MINNMP":((!option)?"MAXNM":"MAXNMP")), s, Rd, s, Rn, s, Rm);

+        snprintf(buff, sizeof(buff), "F%s %c%d, %c%d, %c%d", (option==3)?"MINNM":((option==2)?"MAXNM":((!option)?"MAX":"MIN")), s, Rd, s, Rn, s, Rm);

         return buff;

     }

     if(isMask(opcode, "0Q001110of1mmmmm110001nnnnnddddd", &a)) {

diff --git a/src/dynarec/arm64/dynarec_arm64_f20f.c b/src/dynarec/arm64/dynarec_arm64_f20f.c
index 47e88d40..0db99b64 100755
--- a/src/dynarec/arm64/dynarec_arm64_f20f.c
+++ b/src/dynarec/arm64/dynarec_arm64_f20f.c
@@ -173,7 +173,6 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             if(!box64_dynarec_fastnan) {

                 v1 = fpu_get_scratch(dyn);

                 FCMLTD_0(v1, d0);

-                USHR_64(v1, v1, 63);

                 SHL_64(v1, v1, 63);

             }

             FSQRTD(d1, d0);

@@ -207,10 +206,10 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             }

             FMULD(v1, d1, d0);

             if(!box64_dynarec_fastnan) {

-                FCMEQD(q0, d1, d1);    // 0 => out is NAN

+                FCMEQD(q0, v1, v1);    // 0 => out is NAN

                 VBIC(q0, v0, q0);      // forget it in any input was a NAN already

-                SHL_64(q0, q0, 63);   // only keep the sign bit

-                VORR(d1, d1, q0);      // NAN -> -NAN

+                SHL_64(q0, q0, 63);     // only keep the sign bit

+                VORR(v1, v1, q0);      // NAN -> -NAN

             }

             VMOVeD(d1, 0, v1, 0);

             break;

@@ -255,8 +254,21 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             nextop = F8;

             GETGX(v0);

             d1 = fpu_get_scratch(dyn);

-            GETEX(d0, 0);

-            FDIVD(d1, v0, d0);

+            GETEX(v1, 0);

+            if(!box64_dynarec_fastnan) {

+                d0 = fpu_get_scratch(dyn);

+                q0 = fpu_get_scratch(dyn);

+                // check if any input value was NAN

+                FMAXD(d0, v0, v1);      // propagate NAN

+                FCMEQD(d0, d0, d0);     // 0 if NAN, 1 if not NAN

+            }

+            FDIVD(d1, v0, v1);

+            if(!box64_dynarec_fastnan) {

+                FCMEQD(q0, d1, d1);     // 0 => out is NAN

+                VBIC(q0, d0, q0);       // forget it in any input was a NAN already

+                SHL_64(q0, q0, 63);     // only keep the sign bit

+                VORR(d1, d1, q0);       // NAN -> -NAN

+            }

             VMOVeD(v0, 0, d1, 0);

             break;

         case 0x5F:

diff --git a/tests/ref17.txt b/tests/ref17.txt
index eb67ae5e..97e5ea89 100644
--- a/tests/ref17.txt
+++ b/tests/ref17.txt
@@ -454,23 +454,83 @@ sqrtsd(1 2 , 1 2 ) = 1 2
 sqrtsd(1 2 , 0 -2 ) = 0 2 
 sqrtsd(1 2 , inf -inf ) = inf 2 
 sqrtsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 
+sqrtsd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 
+sqrtsd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf 
 sqrtsd(1 2 , 2 1 ) = 1.41421 2 
 sqrtsd(1 2 , -2 0 ) = 0xfff8000000000000 2 
 sqrtsd(1 2 , -inf inf ) = 0xfff8000000000000 2 
 sqrtsd(1 2 , -0 0x7ff8000000000000 ) = -0 2 
+sqrtsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2 
+sqrtsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2 
 addsd(1 2 , 1 2 ) = 2 2 
 addsd(1 2 , 0 -2 ) = 1 2 
 addsd(1 2 , inf -inf ) = inf 2 
 addsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 
+addsd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 
+addsd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf 
 addsd(1 2 , 2 1 ) = 3 2 
 addsd(1 2 , -2 0 ) = -1 2 
 addsd(1 2 , -inf inf ) = -inf 2 
 addsd(1 2 , -0 0x7ff8000000000000 ) = 1 2 
+addsd(0 -2 , -0 0x7ff8000000000000 ) = 0 -2 
+addsd(0 -2 , -0 0x7ff8000000000000 ) = 0 -2 
 mulsd(1 2 , 1 2 ) = 1 2 
 mulsd(1 2 , 0 -2 ) = 0 2 
 mulsd(1 2 , inf -inf ) = inf 2 
 mulsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 
+mulsd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 
+mulsd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf 
 mulsd(1 2 , 2 1 ) = 2 2 
 mulsd(1 2 , -2 0 ) = -2 2 
 mulsd(1 2 , -inf inf ) = -inf 2 
 mulsd(1 2 , -0 0x7ff8000000000000 ) = -0 2 
+mulsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2 
+mulsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2 
+subsd(1 2 , 1 2 ) = 0 2 
+subsd(1 2 , 0 -2 ) = 1 2 
+subsd(1 2 , inf -inf ) = -inf 2 
+subsd(1 2 , 0x7ff8000000000000 -0 ) = 0xfff8000000000000 2 
+subsd(0 -2 , 0x7ff8000000000000 -0 ) = 0xfff8000000000000 -2 
+subsd(inf -inf , 0x7ff8000000000000 -0 ) = 0xfff8000000000000 -inf 
+subsd(1 2 , 2 1 ) = -1 2 
+subsd(1 2 , -2 0 ) = 3 2 
+subsd(1 2 , -inf inf ) = inf 2 
+subsd(1 2 , -0 0x7ff8000000000000 ) = 1 2 
+subsd(0 -2 , -0 0x7ff8000000000000 ) = 0 -2 
+subsd(0 -2 , -0 0x7ff8000000000000 ) = 0 -2 
+minsd(1 2 , 1 2 ) = 1 2 
+minsd(1 2 , 0 -2 ) = 0 2 
+minsd(1 2 , inf -inf ) = 1 2 
+minsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 
+minsd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 
+minsd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf 
+minsd(1 2 , 2 1 ) = 1 2 
+minsd(1 2 , -2 0 ) = -2 2 
+minsd(1 2 , -inf inf ) = -inf 2 
+minsd(1 2 , -0 0x7ff8000000000000 ) = -0 2 
+minsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2 
+minsd(0 -2 , -0 0x7ff8000000000000 ) = -0 -2 
+divsd(1 2 , 1 2 ) = 1 2 
+divsd(1 2 , 0 -2 ) = inf 2 
+divsd(1 2 , inf -inf ) = 0 2 
+divsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 
+divsd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 
+divsd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf 
+divsd(1 2 , 2 1 ) = 0.5 2 
+divsd(1 2 , -2 0 ) = -0.5 2 
+divsd(1 2 , -inf inf ) = -0 2 
+divsd(1 2 , -0 0x7ff8000000000000 ) = -inf 2 
+divsd(0 -2 , -0 0x7ff8000000000000 ) = 0xfff8000000000000 -2 
+divsd(0 -2 , -0 0x7ff8000000000000 ) = 0xfff8000000000000 -2 
+maxsd(1 2 , 1 2 ) = 1 2 
+maxsd(1 2 , 0 -2 ) = 1 2 
+maxsd(1 2 , inf -inf ) = inf 2 
+maxsd(1 2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 2 
+maxsd(0 -2 , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -2 
+maxsd(inf -inf , 0x7ff8000000000000 -0 ) = 0x7ff8000000000000 -inf 
+maxsd(1 2 , 2 1 ) = 2 2 
+maxsd(1 2 , -2 0 ) = 1 2 
+maxsd(1 2 , -inf inf ) = 1 2 
+maxsd(1 2 , -0 0x7ff8000000000000 ) = 1 2 
+maxsd(0 -2 , -0 0x7ff8000000000000 ) = 0 -2 
+maxsd(0 -2 , -0 0x7ff8000000000000 ) = 0 -2 
diff --git a/tests/test17 b/tests/test17
index f2632c6b..23f9c660 100755
--- a/tests/test17
+++ b/tests/test17
Binary files differdiff --git a/tests/test17.c b/tests/test17.c
index 0627d8c5..385b624c 100644
--- a/tests/test17.c
+++ b/tests/test17.c
@@ -391,10 +391,14 @@ printf(N " %g, %g => %g\n", b, a, *(float*)&r);
  GO2sd(A, B, a128_pd, b128_pd)              \
  GO2sd(A, B, a128_pd, c128_pd)              \
  GO2sd(A, B, a128_pd, d128_pd)              \
+ GO2sd(A, B, b128_pd, d128_pd)              \
+ GO2sd(A, B, c128_pd, d128_pd)              \
  GO2sd(A, B, a128_pd, reverse_pd(a128_pd))  \
  GO2sd(A, B, a128_pd, reverse_pd(b128_pd))  \
  GO2sd(A, B, a128_pd, reverse_pd(c128_pd))  \
- GO2sd(A, B, a128_pd, reverse_pd(d128_pd))
+ GO2sd(A, B, a128_pd, reverse_pd(d128_pd))  \
+ GO2sd(A, B, b128_pd, reverse_pd(d128_pd))  \
+ GO2sd(A, B, b128_pd, reverse_pd(d128_pd))
 
 
  GO2(shuffle, 8, pshufb, a128_8, b128_8)
@@ -581,6 +585,10 @@ printf(N " %g, %g => %g\n", b, a, *(float*)&r);
  MULTIGO2sd(sqrt, sqrtsd)
  MULTIGO2sd(add, addsd)
  MULTIGO2sd(mul, mulsd)
+ MULTIGO2sd(sub, subsd)
+ MULTIGO2sd(min, minsd)
+ MULTIGO2sd(div, divsd)
+ MULTIGO2sd(max, maxsd)
 
  return 0;
 }