about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-01-27 13:31:07 +0100
committerptitSeb <sebastien.chev@gmail.com>2024-01-27 13:31:07 +0100
commit124672b8116e815e64f8f45aa76196d37d52f55e (patch)
tree4c12ab93beb8ad33488458feb2950f2ecba38bff /src
parent28ef6cdefc2d3a8803c6dc148e172c70b1c11175 (diff)
downloadbox64-124672b8116e815e64f8f45aa76196d37d52f55e.tar.gz
box64-124672b8116e815e64f8f45aa76196d37d52f55e.zip
[ARM64_DYNAREC] Fixed/Improved D9 E5 opcode and add hack for FLD1/FLD1/FPATAN sequence
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_d9.c91
1 files changed, 85 insertions, 6 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_d9.c b/src/dynarec/arm64/dynarec_arm64_d9.c
index cf76770d..48ad7001 100644
--- a/src/dynarec/arm64/dynarec_arm64_d9.c
+++ b/src/dynarec/arm64/dynarec_arm64_d9.c
@@ -31,7 +31,7 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
     uint8_t ed;
     uint8_t wback, wb1;
     uint8_t u8;
-    int64_t fixedaddress;
+    int64_t fixedaddress, j64;
     int unscaled;
     int v1, v2;
     int s0;
@@ -131,18 +131,97 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             break;
         case 0xE5:
             INST_NAME("FXAM");
+            #if 1
+            i1 = x87_get_current_cache(dyn, ninst, 0, NEON_CACHE_ST_D);
+            // value put in x14
+            if(i1==-1) {
+                // not in cache, so check Empty status and load it
+                i2 = -dyn->n.x87stack;
+                LDRw_U12(x3, xEmu, offsetof(x64emu_t, fpu_stack));
+                if(i2) {
+                    if(i2<0) {
+                        ADDw_U12(x3, x3, -i2);
+                    } else {
+                        SUBw_U12(x3, x3, i2);
+                    }
+                }
+                CMPSw_U12(x3, 0);
+                MOV32w(x3, 0b100000100000000);
+                CSELx(x4, x3, x4, cLE); // empty: C3,C2,C0 = 101
+                B_MARK3(cLE);
+                // x4 will be the actual top
+                LDRw_U12(x4, xEmu, offsetof(x64emu_t, top));
+                if(i2) {
+                    if(i2<0) {
+                        SUBw_U12(x4, x4, -i2);
+                    } else {
+                        ADDw_U12(x4, x4, i2);
+                    }
+                    ANDw_mask(x4, x4, 0, 3);    // (emu->top + i)&7
+                }
+                ADDx_REG_LSL(x1, xEmu, x4, 3);
+                LDRx_U12(x2, x1, offsetof(x64emu_t, x87)); // load x2 with ST0 anyway, for sign extraction
+            } else {
+                // simply move from cache reg to x2
+                v1 = dyn->n.x87reg[i1];
+                VMOVQDto(x2, v1, 0);
+            }
+            // get exponant in x1
+            LSRx_IMM(x1, x2, 20+32);
+            ANDSx_mask(x1, x1, 1, 0b00000, 0b001010); // 0x7ff
+            B_MARK(cNE); // not zero or denormal
+            ANDx_mask(x1, x3, 1, 0, 0b111110); // 0x7fffffffffffffff
+            ORRx_REG(x1, x1, x2);
+            MOV32w(x4, 0b100000000000000); // Zero: C3,C2,C0 = 100
+            MOV32w(x5, 0b100010000000000); // Denormal: C3,C2,C0 = 110
+            CSELx(x4, x4, x5, cEQ);
+            B_MARK3(c__);
+            MARK;
+            ORRx_mask(x4, xZR, 1, 0b001100, 0b001010); // 0x7ff0000000000000
+            CMPSx_REG(x2, x4);   // infinite/NaN?
+            MOV32w(x5, 0b000010000000000); // normal: C3,C2,C0 = 010
+            CSELx(x4, x5, x4, cNE);
+            B_MARK3(cNE);
+            TSTx_mask(x2, 1, 0b000000, 0b110011); // 0x000fffffffffffff
+            MOV32w(x4, 0b000010100000000); // infinity: C3,C2,C0 = 011
+            MOV32w(x3, 0b000000100000000); // NaN: C3,C2,C0 = 001
+            CSELx(x4, x4, x3, cEQ);
+            MARK3;
+            // Extract signa & Update SW
+            LSRx_IMM(x1, x2, 63);
+            BFIx(x4, x1, 9, 1); //C1
+            LDRH_U12(x1, xEmu, offsetof(x64emu_t, sw));
+            MOV32w(x2, 0b01000111);
+            BICw_REG_LSL(x1, x1, x2, 8);
+            ORRw_REG(x4, x4, x1);
+            STRH_U12(x4, xEmu, offsetof(x64emu_t, sw));
+            #else
             MESSAGE(LOG_DUMP, "Need Optimization\n");
-            x87_refresh(dyn, ninst, x1, x2, 0);
+            i1 = x87_stackcount(dyn, ninst, x1);
+            x87_forget(dyn, ninst, x1, x2, 0);
+            //x87_refresh(dyn, ninst, x1, x2, 0);
             CALL(fpu_fxam, -1);  // should be possible inline, but is it worth it?
+            x87_unstackcount(dyn, ninst, x1, i1);
+            #endif
             break;
 
         case 0xE8:
             INST_NAME("FLD1");
-            X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, NEON_CACHE_ST_F);
-            if(ST_IS_F(0)) {
-                FMOVS_8(v1, 0b01110000);
+            if(ninst<dyn->size+2 && (dyn->insts[ninst+1].pred_sz==1) && (dyn->insts[ninst+2].pred_sz==1)
+                && PK(0)==0xD9 && PK(1)==0xE8
+                && PK(2)==0xD9 && PK(3)==0xF3
+            ) {
+                MESSAGE(LOG_DUMP, "Hack for FLD1 FLD1 FPATAN");
+                X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, NEON_CACHE_ST_F);
+                FTABLE64(v1, PI/4.0);
+                addr+=4;
             } else {
-                FMOVD_8(v1, 0b01110000);
+                X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, NEON_CACHE_ST_F);
+                if(ST_IS_F(0)) {
+                    FMOVS_8(v1, 0b01110000);
+                } else {
+                    FMOVD_8(v1, 0b01110000);
+                }
             }
             break;
         case 0xE9: