about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2025-01-10 15:51:08 +0100
committerptitSeb <sebastien.chev@gmail.com>2025-01-10 15:51:08 +0100
commitb977d84f111a1b4b405d339b0226fae3cc808fd0 (patch)
treef80d5cdd0db261116ff9753b5c5259099a78b7c6 /src
parentb7f14ff33869397abcfebbf12b69a7b0d65c73d7 (diff)
downloadbox64-b977d84f111a1b4b405d339b0226fae3cc808fd0.tar.gz
box64-b977d84f111a1b4b405d339b0226fae3cc808fd0.zip
[ARM64_DYNAREC] Reworked a bit DB / 7 opcode
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_db.c106
1 files changed, 50 insertions, 56 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_db.c b/src/dynarec/arm64/dynarec_arm64_db.c
index c16d14e4..a27f5456 100644
--- a/src/dynarec/arm64/dynarec_arm64_db.c
+++ b/src/dynarec/arm64/dynarec_arm64_db.c
@@ -324,63 +324,57 @@ uintptr_t dynarec64_DB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
                     VST64(v1, wback, fixedaddress);
                 } else {
-                    #if 0
-                    x87_forget(dyn, ninst, x1, x3, 0);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
-                    if(ed!=x1) {
-                        MOVx_REG(x1, ed);
+                    if(!box64_dynarec_fastround) {
+                        x87_forget(dyn, ninst, x1, x3, 0);
+                        addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
+                        if(ed!=x1) {MOVx_REG(x1, ed);}
+                        CALL(native_fstp, -1);
+                    } else {
+                        // Painfully long, straight conversion from the C code, shoud be optimized
+                        v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                        addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
+                        FMOVxD(x1, v1);
+                        // do special value first
+                        TSTx_mask(x1, 1, 0b00000, 0b111110);    //0x7fffffffffffffffL
+                        B_MARK(cNE);
+                        // Zero
+                        LSRx(x3, x1, 63-15);    //x3 = sign+exp
+                        MOVZw(x5, 0);           // x5 = mantisse
+                        B_MARK3_nocond;
+                        MARK;
+                        // get sign, in main ouput x5 for sign+exp
+                        ANDx_mask(x5, x1, 1, 1, 0); //0x8000000000000000
+                        LSRx(x5, x5, 63-15);    // x5 = sign
+                        // get exp
+                        LSRx(x3, x1, 52);       // x3 = exp11
+                        ANDw_mask(x3, x3, 0, 0b1010);    //0x7ff
+                        ANDSx_mask(x1, x1, 1, 0, 0b110011); //0x000fffffffffffffL
+                        LSLx_IMM(x1, x1, 11);   // mantice
+                        CMPSw_U12(x3, 0x7ff);
+                        B_MARK2(cNE);
+                        // NaN and Infinite
+                        ORRw_mask(x3, x5, 0, 0b1110);    //x3 = sign | 0x7fff
+                        ORRx_mask(x5, x1, 1, 1, 0); //0x8000000000000000
+                        B_MARK3_nocond;
+                        MARK2;
+                        // regular / denormals
+                        MOVZw(x4, 16383-1023);  //BIAS80 - BIAS64
+                        CBZw(x3, 4+4*4);        // exp11 == 0?
+                        // normals
+                        ADDw_REG(x3, x3, x4);   // x3 = exp16
+                        ORRw_REG(x3, x3, x5);   // x3 = sign | exp
+                        ORRx_mask(x5, x1, 1, 1, 0);    //0x8000000000000000 x5 = mantisse
+                        B_MARK3_nocond;
+                        // denormals
+                        CLZx(x6, x1);
+                        ADDw_U12(x6, x6, 1);    // "one"
+                        SUBw_REG(x3, x4, x6);   // x3 = exp16
+                        ORRw_REG(x3, x3, x5);   // x3 = sign | exp16
+                        LSLx_REG(x5, x1, x6);   // x5 = mantisse
+                        MARK3;
+                        STRx_U12(x5, wback, 0);
+                        STRH_U12(x3, wback, 8);
                     }
-                    CALL(native_fstp, -1);
-                    #else
-                    // Painfully long, straight conversion from the C code, shoud be optimized
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
-                    FMOVxD(x1, v1);
-                    // do special value first
-                    TSTx_mask(x1, 1, 0b00000, 0b111110);    //0x7fffffffffffffffL
-                    B_MARK(cNE);
-                    // Zero
-                    LSRx(x3, x1, 63-15);    //x3 = sign+exp
-                    MOVZw(x5, 0);           // x5 = mantisse
-                    B_MARK3_nocond;
-                    MARK;
-                    // get sign, in main ouput x5 for sign+exp
-                    ANDx_mask(x5, x1, 1, 1, 0); //0x8000000000000000
-                    LSRx(x5, x5, 63-15);    // x5 = sign
-                    // get exp
-                    LSRx(x3, x1, 52);       // x3 = exp11
-                    ANDw_mask(x3, x3, 0, 0b1010);    //0x7ff
-                    MOV32w(x4, 0x7ff);
-                    CMPSw_REG(x3, x4);
-                    B_MARK2(cNE);
-                    // NaN and Infinite
-                    ORRw_mask(x3, x5, 0, 0b1110);    //x3 = sign | 0x7fff
-                    TSTx_mask(x1, 1, 0, 0b110011); //0x000fffffffffffffL
-                    ORRx_mask(x5, xZR, 1, 1, 0);    //0x8000000000000000
-                    ORRx_mask(x4, xZR, 1, 0b10, 0b01); //0xc000000000000000
-                    CSELx(x5, x5, x4, cEQ);     // x5 = mantisse
-                    B_MARK3_nocond;
-                    MARK2;
-                    // regular / denormals
-                    ANDx_mask(x1, x1, 1, 0, 0b110011); //0x000fffffffffffffL
-                    LSLx_IMM(x1, x1, 11);   //x1 = mantisse missing "1"
-                    MOVZw(x4, 16383-1023);  //BIAS80 - BIAS64
-                    CBZw(x3, 4+3*4);        // exp11 == 0?
-                    // normals
-                    ADDw_REG(x3, x3, x4);   // x3 = exp16
-                    ORRw_REG(x3, x3, x5);   // x3 = sign | exp
-                    ORRx_mask(x5, x1, 1, 1, 0);    //0x8000000000000000 x5 = mantisse
-                    B_MARK3_nocond;
-                    // denormals
-                    CLZx(x6, x1);
-                    ADDw_U12(x6, x6, 1);    // "one"
-                    SUBw_REG(x3, x4, x6);   // x3 = exp16
-                    ORRw_REG(x3, x3, x5);   // x3 = sign | exp16
-                    LSLx_REG(x5, x1, x6);   // x5 = mantisse
-                    MARK3;
-                    STRx_U12(x5, wback, 0);
-                    STRH_U12(x3, wback, 8);
-                    #endif
                 }
                 X87_POP_OR_FAIL(dyn, ninst, x3);
                 break;