about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_0f.c67
-rw-r--r--src/dynarec/la64/dynarec_la64_660f.c41
-rw-r--r--src/dynarec/la64/dynarec_la64_f20f.c36
-rw-r--r--src/dynarec/la64/dynarec_la64_f30f.c62
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.h30
-rw-r--r--src/dynarec/la64/la64_emitter.h37
-rw-r--r--src/dynarec/la64/la64_printer.c8
7 files changed, 269 insertions, 12 deletions
diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c
index a540f714..4aee3a5d 100644
--- a/src/dynarec/la64/dynarec_la64_0f.c
+++ b/src/dynarec/la64/dynarec_la64_0f.c
@@ -139,6 +139,22 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             nextop = F8;
             FAKEED;
             break;
+        case 0x28:
+            INST_NAME("MOVAPS Gx,Ex");
+            nextop = F8;
+            GETG;
+            if (MODREG) {
+                ed = (nextop & 7) + (rex.b << 3);
+                v1 = sse_get_reg(dyn, ninst, x1, ed, 0);
+                v0 = sse_get_reg_empty(dyn, ninst, x1, gd);
+                VOR_V(v0, v1, v1);
+            } else {
+                v0 = sse_get_reg_empty(dyn, ninst, x1, gd);
+                SMREAD();
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0);
+                VLD(v0, ed, fixedaddress);
+            }
+            break;
         case 0x29:
             INST_NAME("MOVAPS Ex,Gx");
             nextop = F8;
@@ -154,6 +170,47 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 SMWRITE2();
             }
             break;
+        case 0x2E:
+            // no special check...
+        case 0x2F:
+            if (opcode == 0x2F) {
+                INST_NAME("COMISS Gx, Ex");
+            } else {
+                INST_NAME("UCOMISS Gx, Ex");
+            }
+            SETFLAGS(X_ALL, SF_SET);
+            SET_DFNONE();
+            nextop = F8;
+            GETGX(d0, 0);
+            GETEXSS(v0, 0, 0);
+            CLEAR_FLAGS(x2);
+            // if isnan(d0) || isnan(v0)
+            IFX (X_ZF | X_PF | X_CF) {
+                FCMP_S(fcc0, d0, v0, cUN);
+                BCEQZ_MARK(fcc0);
+                ORI(xFlags, xFlags, (1 << F_ZF) | (1 << F_PF) | (1 << F_CF));
+                B_MARK3_nocond;
+            }
+            MARK;
+            // else if isless(d0, v0)
+            IFX (X_CF) {
+                FCMP_S(fcc1, d0, v0, cLT);
+                BCEQZ_MARK2(fcc1);
+                ORI(xFlags, xFlags, 1 << F_CF);
+                B_MARK3_nocond;
+            }
+            MARK2;
+            // else if d0 == v0
+            IFX (X_ZF) {
+                FCMP_S(fcc2, d0, v0, cEQ);
+                BCEQZ_MARK3(fcc2);
+                ORI(xFlags, xFlags, 1 << F_ZF);
+            }
+            MARK3;
+            IFX (X_ALL) {
+                SPILL_EFLAGS();
+            }
+            break;
         #define GO(GETFLAGS, NO, YES, F, I)                                                          \
             READFLAGS(F);                                                                            \
             if (la64_lbt) {                                                                          \
@@ -511,10 +568,14 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             GETGX(v0, 1);
             GETEX(v1, 0, 1);
             u8 = F8;
-            if (v0 != v1) {
-                VEXTRINS_D(v0, v1, 0x11); // v0[127:64] = v1[127:64]
+            if (v0 == v1) {
+                VSHUF4I_W(v0, v0, u8);
+            } else {
+                q1 = fpu_get_scratch(dyn);
+                VSHUF4I_W(v0, v0, u8);
+                VSHUF4I_W(q1, v1, u8);
+                VEXTRINS_D(v0, q1, 0x11); // v0[127:64] = q1[127:64]
             }
-            VSHUF4I_W(v0, v0, u8);
             break;
         case 0xC8:
         case 0xC9:
diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c
index 474bbfc2..23ad1fc6 100644
--- a/src/dynarec/la64/dynarec_la64_660f.c
+++ b/src/dynarec/la64/dynarec_la64_660f.c
@@ -56,6 +56,22 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             nextop = F8;
             FAKEED;
             break;
+        case 0x28:
+            INST_NAME("MOVAPD Gx,Ex");
+            nextop = F8;
+            GETG;
+            if (MODREG) {
+                ed = (nextop & 7) + (rex.b << 3);
+                v1 = sse_get_reg(dyn, ninst, x1, ed, 0);
+                v0 = sse_get_reg_empty(dyn, ninst, x1, gd);
+                VOR_V(v0, v1, v1);
+            } else {
+                SMREAD();
+                v0 = sse_get_reg_empty(dyn, ninst, x1, gd);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0);
+                VLD(v0, ed, fixedaddress);
+            }
+            break;
         case 0x61:
             INST_NAME("PUNPCKLWD Gx,Ex");
             nextop = F8;
@@ -121,6 +137,31 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 VLD(v0, ed, fixedaddress);
             }
             break;
+        case 0x7E:
+            INST_NAME("MOVD Ed,Gx");
+            nextop = F8;
+            GETGX(v0, 0);
+            if (rex.w) {
+                if (MODREG) {
+                    ed = TO_LA64((nextop & 7) + (rex.b << 3));
+                    MOVFR2GR_D(ed, v0);
+                } else {
+                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0);
+                    FST_D(v0, ed, fixedaddress);
+                    SMWRITE2();
+                }
+            } else {
+                if (MODREG) {
+                    ed = TO_LA64((nextop & 7) + (rex.b << 3));
+                    MOVFR2GR_S(ed, v0);
+                    ZEROUP(ed);
+                } else {
+                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0);
+                    FST_S(v0, ed, fixedaddress);
+                    SMWRITE2();
+                }
+            }
+            break;
         case 0xBE:
             INST_NAME("MOVSX Gw, Eb");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_f20f.c b/src/dynarec/la64/dynarec_la64_f20f.c
index 25e523cb..74f76744 100644
--- a/src/dynarec/la64/dynarec_la64_f20f.c
+++ b/src/dynarec/la64/dynarec_la64_f20f.c
@@ -80,41 +80,69 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
         case 0x58:
             INST_NAME("ADDSD Gx, Ex");
             nextop = F8;
-            // TODO: fastnan handling
             GETGX(v0, 1);
             GETEXSD(v1, 0);
             d0 = fpu_get_scratch(dyn);
             FADD_D(d0, v0, v1);
+            if (!box64_dynarec_fastnan) {
+                FCMP_D(fcc0, v0, v1, cUN);
+                BCNEZ_MARK(fcc0);
+                FCMP_D(fcc1, d0, d0, cOR);
+                BCNEZ_MARK(fcc1);
+                FNEG_D(d0, d0);
+            }
+            MARK;
             VEXTRINS_D(v0, d0, 0); // v0[63:0] = d0[63:0]
             break;
         case 0x59:
             INST_NAME("MULSD Gx, Ex");
             nextop = F8;
-            // TODO: fastnan handling
             GETGX(v0, 1);
             GETEXSD(v1, 0);
             d0 = fpu_get_scratch(dyn);
             FMUL_D(d0, v0, v1);
+            if (!box64_dynarec_fastnan) {
+                FCMP_D(fcc0, v0, v1, cUN);
+                BCNEZ_MARK(fcc0);
+                FCMP_D(fcc1, d0, d0, cOR);
+                BCNEZ_MARK(fcc1);
+                FNEG_D(d0, d0);
+            }
+            MARK;
             VEXTRINS_D(v0, d0, 0); // v0[63:0] = d0[63:0]
             break;
         case 0x5C:
             INST_NAME("SUBSD Gx, Ex");
             nextop = F8;
-            // TODO: fastnan handling
             GETGX(v0, 1);
             GETEXSD(v1, 0);
             d0 = fpu_get_scratch(dyn);
             FSUB_D(d0, v0, v1);
+            if (!box64_dynarec_fastnan) {
+                FCMP_D(fcc0, v0, v1, cUN);
+                BCNEZ_MARK(fcc0);
+                FCMP_D(fcc1, d0, d0, cOR);
+                BCNEZ_MARK(fcc1);
+                FNEG_D(d0, d0);
+            }
+            MARK;
             VEXTRINS_D(v0, d0, 0); // v0[63:0] = d0[63:0]
             break;
         case 0x5E:
             INST_NAME("DIVSD Gx, Ex");
             nextop = F8;
-            // TODO: fastnan handling
             GETGX(v0, 1);
             GETEXSD(v1, 0);
             d0 = fpu_get_scratch(dyn);
             FDIV_D(d0, v0, v1);
+            if (!box64_dynarec_fastnan) {
+                FCMP_D(fcc0, v0, v1, cUN);
+                BCNEZ_MARK(fcc0);
+                FCMP_D(fcc1, d0, d0, cOR);
+                BCNEZ_MARK(fcc1);
+                FNEG_D(d0, d0);
+            }
+            MARK;
             VEXTRINS_D(v0, d0, 0); // v0[63:0] = d0[63:0]
             break;
         default:
diff --git a/src/dynarec/la64/dynarec_la64_f30f.c b/src/dynarec/la64/dynarec_la64_f30f.c
index 47670a31..496fb001 100644
--- a/src/dynarec/la64/dynarec_la64_f30f.c
+++ b/src/dynarec/la64/dynarec_la64_f30f.c
@@ -8,6 +8,7 @@
 #include "dynarec.h"
 #include "emu/x64emu_private.h"
 #include "emu/x64run_private.h"
+#include "la64_emitter.h"
 #include "x64run.h"
 #include "x64emu.h"
 #include "box64stack.h"
@@ -107,6 +108,30 @@ uintptr_t dynarec64_F30F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             FMUL_S(d1, v0, d0);
             VEXTRINS_W(v0, d1, 0);
             break;
+        case 0x5A:
+            INST_NAME("CVTSS2SD Gx, Ex");
+            nextop = F8;
+            GETGX(v0, 1);
+            GETEXSS(v1, 0, 0);
+            d1 = fpu_get_scratch(dyn);
+            FCVT_D_S(d1, v1);
+            VEXTRINS_D(v0, d1, 0);
+            break;
+        case 0x5D:
+            INST_NAME("MINSS Gx, Ex");
+            nextop = F8;
+            GETGX(d0, 1);
+            GETEXSS(d1, 0, 0);
+            FCMP_S(fcc0, d0, d1, cUN);
+            BCNEZ_MARK(fcc0);
+            FCMP_S(fcc1, d1, d0, cLT);
+            BCEQZ_MARK2(fcc1);
+            MARK;
+            v1 = fpu_get_scratch(dyn);
+            FMOV_S(v1, d1);
+            VEXTRINS_W(d0, v1, 0);
+            MARK2;
+            break;
         case 0x5E:
             INST_NAME("DIVSS Gx, Ex");
             nextop = F8;
@@ -116,6 +141,21 @@ uintptr_t dynarec64_F30F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             FDIV_S(d1, v0, d0);
             VEXTRINS_W(v0, d1, 0);
             break;
+        case 0x5F:
+            INST_NAME("MAXSS Gx, Ex");
+            nextop = F8;
+            GETGX(d0, 1);
+            GETEXSS(d1, 0, 0);
+            FCMP_S(fcc0, d0, d1, cUN);
+            BCNEZ_MARK(fcc0);
+            FCMP_S(fcc1, d0, d1, cLT);
+            BCEQZ_MARK2(fcc1);
+            MARK;
+            v1 = fpu_get_scratch(dyn);
+            FMOV_S(v1, d1);
+            VEXTRINS_W(d0, v1, 0);
+            MARK2;
+            break;
         case 0x6F:
             INST_NAME("MOVDQU Gx, Ex");
             nextop = F8;
@@ -145,6 +185,28 @@ uintptr_t dynarec64_F30F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             }
             VEXTRINS_D(v0, v1, 0); // v0[63:0] = v1[63:0]
             break;
+        case 0xC2:
+            INST_NAME("CMPSS Gx, Ex, Ib");
+            nextop = F8;
+            GETGX(v0, 1);
+            GETEXSS(v1, 0, 1);
+            u8 = F8;
+            switch (u8 & 7) {
+                case 0: FCMP_S(fcc0, v0, v1, cEQ); break;  // Equal
+                case 1: FCMP_S(fcc0, v0, v1, cLT); break;  // Less than
+                case 2: FCMP_S(fcc0, v0, v1, cLE); break;  // Less or equal
+                case 3: FCMP_S(fcc0, v0, v1, cUN); break;  // NaN
+                case 4: FCMP_S(fcc0, v0, v1, cUNE); break; // Not Equal or unordered
+                case 5: FCMP_S(fcc0, v1, v0, cULE); break; // Greater or equal or unordered
+                case 6: FCMP_S(fcc0, v1, v0, cULT); break; // Greater or unordered, test inverted, N!=V so unordered or less than (inverted)
+                case 7: FCMP_S(fcc0, v0, v1, cOR); break;  // not NaN
+            }
+            MOVCF2GR(x2, fcc0);
+            NEG_D(x2, x2);
+            q1 = fpu_get_scratch(dyn);
+            MOVGR2FR_W(q1, x2);
+            VEXTRINS_W(v0, q1, 0);
+            break;
         default:
             DEFAULT;
     }
diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h
index d6b9629a..a7d55077 100644
--- a/src/dynarec/la64/dynarec_la64_helper.h
+++ b/src/dynarec/la64/dynarec_la64_helper.h
@@ -364,6 +364,10 @@
     j64 = GET##M - dyn->native_size; \
     B##OP##Z(reg, j64)
 
+#define BCxxZ_gen(OP, M, fcc)        \
+    j64 = GET##M - dyn->native_size; \
+    BC##OP##Z(fcc, j64)
+
 // Branch to MARK if reg1!=reg2 (use j64)
 #define BNE_MARK(reg1, reg2) Bxx_gen(NE, MARK, reg1, reg2)
 // Branch to MARK2 if reg1!=reg2 (use j64)
@@ -393,6 +397,24 @@
 // Branch to MARKLOCK if reg1!=0 (use j64)
 #define BNEZ_MARKLOCK(reg) BxxZ_gen(NE, MARKLOCK, reg)
 
+// Branch to MARK if fcc!=0 (use j64)
+#define BCNEZ_MARK(fcc) BCxxZ_gen(NE, MARK, fcc)
+// Branch to MARK2 if fcc!=0 (use j64)
+#define BCNEZ_MARK2(fcc) BCxxZ_gen(NE, MARK2, fcc)
+// Branch to MARK3 if fcc!=0 (use j64)
+#define BCNEZ_MARK3(fcc) BCxxZ_gen(NE, MARK3, fcc)
+// Branch to MARKLOCK if fcc!=0 (use j64)
+#define BCNEZ_MARKLOCK(fcc) BxxZ_gen(NE, MARKLOCK, fcc)
+
+// Branch to MARK if fcc==0 (use j64)
+#define BCEQZ_MARK(fcc) BCxxZ_gen(EQ, MARK, fcc)
+// Branch to MARK2 if fcc==0 (use j64)
+#define BCEQZ_MARK2(fcc) BCxxZ_gen(EQ, MARK2, fcc)
+// Branch to MARK3 if fcc==0 (use j64)
+#define BCEQZ_MARK3(fcc) BCxxZ_gen(EQ, MARK3, fcc)
+// Branch to MARKLOCK if fcc==0 (use j64)
+#define BCEQZ_MARKLOCK(fcc) BxxZ_gen(EQ, MARKLOCK, fcc)
+
 // Branch to MARK if reg1<reg2 (use j64)
 #define BLT_MARK(reg1, reg2) Bxx_gen(LT, MARK, reg1, reg2)
 // Branch to MARK if reg1<reg2 (use j64)
@@ -418,6 +440,14 @@
 #define B_NEXT_nocond                                                         \
     j64 = (dyn->insts) ? (dyn->insts[ninst].epilog - (dyn->native_size)) : 0; \
     B(j64)
+// Branch to NEXT if fcc==0 (use j64)
+#define CBCZ_NEXT(fcc)                                                        \
+    j64 = (dyn->insts) ? (dyn->insts[ninst].epilog - (dyn->native_size)) : 0; \
+    BCEQZ(fcc, j64)
+// Branch to NEXT if fcc!=0 (use j64)
+#define CBCNZ_NEXT(fcc)                                                       \
+    j64 = (dyn->insts) ? (dyn->insts[ninst].epilog - (dyn->native_size)) : 0; \
+    BCNEZ(fcc, j64)
 
 // Branch to NEXT if reg1==reg2 (use j64)
 #define BEQ_NEXT(reg1, reg2)                                                  \
diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h
index 7860734a..778e1a83 100644
--- a/src/dynarec/la64/la64_emitter.h
+++ b/src/dynarec/la64/la64_emitter.h
@@ -109,6 +109,38 @@ f24-f31  fs0-fs7   Static registers                Callee
 #define wZR     xZR
 #define r0      xZR
 
+#define fcc0 0
+#define fcc1 1
+#define fcc2 2
+#define fcc3 3
+#define fcc4 4
+#define fcc5 5
+#define fcc6 6
+#define fcc7 7
+
+#define cAF  0x0
+#define cUN  0x8
+#define cEQ  0x4
+#define cUEQ 0xC
+#define cLT  0x2
+#define cULT 0xA
+#define cLE  0x6
+#define cULE 0xE
+#define cNE  0x10
+#define cOR  0x14
+#define cUNE 0x18
+#define sAF  0x1
+#define sUN  0x9
+#define sEQ  0x5
+#define sUEQ 0xD
+#define sLT  0x3
+#define sULT 0xB
+#define sLE  0x7
+#define sULE 0xF
+#define sNE  0x11
+#define sOR  0x15
+#define sUNE 0x19
+
 // split a 32bits value in 20bits + 12bits, adjust the upper part is 12bits is negative
 #define SPLIT20(A) (((A) + 0x800) >> 12)
 #define SPLIT12(A) ((A) & 0xfff)
@@ -508,7 +540,10 @@ f24-f31  fs0-fs7   Static registers                Callee
 #define BEQZ(rj, imm23) EMIT(type_1RI21(0b010000, ((imm23)>>2), rj))
 // if GR[rj] != 0:
 //     PC = PC + SignExtend({imm21, 2'b0}, GRLEN)
-#define BNEZ(rj, imm23) EMIT(type_1RI21(0b010001, ((imm23)>>2), rj))
+#define BNEZ(rj, imm23) EMIT(type_1RI21(0b010001, ((imm23) >> 2), rj))
+
+#define BCEQZ(cj, imm23) EMIT(type_1RI21(0b010010, ((imm23)>>2), 0b00000 | cj))
+#define BCNEZ(cj, imm23) EMIT(type_1RI21(0b010010, ((imm23)>>2), 0b01000 | cj))
 
 // GR[rd] = PC + 4
 // PC = GR[rj] + SignExtend({imm16, 2'b0}, GRLEN)
diff --git a/src/dynarec/la64/la64_printer.c b/src/dynarec/la64/la64_printer.c
index 3d4cf991..247653b4 100644
--- a/src/dynarec/la64/la64_printer.c
+++ b/src/dynarec/la64/la64_printer.c
@@ -2141,19 +2141,19 @@ const char* la64_print(uint32_t opcode, uintptr_t addr)
         return buff;
     }
     if (isMask(opcode, "01110011100000iiiiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "VEXTRINS.D", Vt[Rd], Vt[Rj], signExtend(imm, 8));
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%x", "VEXTRINS.D", Vt[Rd], Vt[Rj], imm);
         return buff;
     }
     if (isMask(opcode, "01110011100001iiiiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "VEXTRINS.W", Vt[Rd], Vt[Rj], signExtend(imm, 8));
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%x", "VEXTRINS.W", Vt[Rd], Vt[Rj], imm);
         return buff;
     }
     if (isMask(opcode, "01110011100010iiiiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "VEXTRINS.H", Vt[Rd], Vt[Rj], signExtend(imm, 8));
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%x", "VEXTRINS.H", Vt[Rd], Vt[Rj], imm);
         return buff;
     }
     if (isMask(opcode, "01110011100011iiiiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "VEXTRINS.B", Vt[Rd], Vt[Rj], signExtend(imm, 8));
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%x", "VEXTRINS.B", Vt[Rd], Vt[Rj], imm);
         return buff;
     }
     if (isMask(opcode, "0010110000iiiiiiiiiiiijjjjjddddd", &a)) {