about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-02-19 17:58:11 +0100
committerptitSeb <sebastien.chev@gmail.com>2023-02-19 17:58:11 +0100
commitd0a724bb08f06f1581621720b114335403bc29d9 (patch)
tree3ebed330b438c4d6ddeec5d960e0594dd0250b50 /src
parent8e9ac1b9ac492cb02f59300b343e41b37de28ad4 (diff)
downloadbox64-d0a724bb08f06f1581621720b114335403bc29d9.tar.gz
box64-d0a724bb08f06f1581621720b114335403bc29d9.zip
[DYNAREC] Optimized DB /7 opcode
Diffstat (limited to 'src')
-rwxr-xr-xsrc/dynarec/arm64/arm64_emitter.h11
-rwxr-xr-xsrc/dynarec/arm64/arm64_printer.c6
-rw-r--r--src/dynarec/arm64/dynarec_arm64_db.c51
3 files changed, 65 insertions, 3 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h
index 9f573f8f..c3d62074 100755
--- a/src/dynarec/arm64/arm64_emitter.h
+++ b/src/dynarec/arm64/arm64_emitter.h
@@ -266,6 +266,13 @@
 #define STRH_U12(Rt, Rn, imm12)           EMIT(ST_gen(0b01, 0b01, ((uint32_t)((imm12)>>1))&0xfff, Rn, Rt))
 #define STRxw_U12(Rt, Rn, imm12)          EMIT(ST_gen((rex.w)?0b11:0b10, 0b01, ((uint32_t)((imm12)>>(2+rex.w)))&0xfff, Rn, Rt))
 
+#define STU_gen(size, opc, imm9, Rn, Rt)  ((size)<<30 | 0b111<<27 | (opc)<<22 | ((imm9)&0x1ff)<<12 | (Rn)<<5 | (Rt))
+#define STURx_I9(Rt, Rn, imm9)            EMIT(STU_gen(0b11, 0b00, imm9, Rn, Rt))
+#define STURw_I9(Rt, Rn, imm9)            EMIT(STU_gen(0b10, 0b00, imm9, Rn, Rt))
+#define STURxw_I9(Rt, Rn, imm9)           EMIT(STU_gen((rex.w)?0b11:0b10, 0b00, imm9, Rn, Rt))
+#define STURH_I9(Rt, Rn, imm9)            EMIT(STU_gen(0b01, 0b00, imm9, Rn, Rt))
+#define STURB_I9(Rt, Rn, imm9)            EMIT(STU_gen(0b00, 0b00, imm9, Rn, Rt))
+
 #define STR_REG_gen(size, Rm, option, S, Rn, Rt)    ((size)<<30 | 0b111<<27 | 0b00<<22 | 1<<21 | (Rm)<<16 | (option)<<13 | (S)<<12 | (0b10)<<10 | (Rn)<<5 | (Rt))
 #define STRx_REG(Rt, Rn, Rm)            EMIT(STR_REG_gen(0b11, Rm, 0b011, 0, Rn, Rt))
 #define STRx_REG_LSL3(Rt, Rn, Rm)       EMIT(STR_REG_gen(0b11, Rm, 0b011, 1, Rn, Rt))
@@ -649,6 +656,10 @@
 #define REV16w(Rd, Rn)                  EMIT(REV_gen(0, 0b01, Rn, Rd))
 #define REV16x(Rd, Rn)                  EMIT(REV_gen(1, 0b01, Rn, Rd))
 
+// UDF
+#define UDF_gen(imm16)                  ((imm16)&0xffff)
+#define UDF(imm16)                      EMIT(UDF_gen(imm16))
+
 // MRS
 #define MRS_gen(L, o0, op1, CRn, CRm, op2, Rt)  (0b1101010100<<22 | (L)<<21 | 1<<20 | (o0)<<19 | (op1)<<16 | (CRn)<<12 | (CRm)<<8 | (op2)<<5 | (Rt))
 // mrs    x0, nzcv : 1101010100 1 1 1 011 0100 0010 000 00000    o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0010(2) op2=0
diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c
index 4bad45d5..e9356773 100755
--- a/src/dynarec/arm64/arm64_printer.c
+++ b/src/dynarec/arm64/arm64_printer.c
@@ -25,7 +25,7 @@ typedef struct arm64_print_s {
 

 uint64_t DecodeBitMasks(int N, int imms, int immr)

 {

-    int len = 31-__builtin_clz(N<<6 | ((~imms)&0b111111));

+    int len = 31-__builtin_clz((N<<6) | ((~imms)&0b111111));

     if(len<1) return 0;

     int levels = (1<<len) - 1;

     int s = imms & levels;

@@ -1140,8 +1140,8 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
         else if(sf==0 && type==0 && rmode==0 && opcd==6) {snprintf(buff, sizeof(buff), "FMOV %s, S%d", Wt[Rn], Rd);}

         else if(sf==1 && type==1 && rmode==0 && opcd==7) {snprintf(buff, sizeof(buff), "FMOV D%d, %s", Rd, Xt[Rn]);}

         else if(sf==1 && type==2 && rmode==1 && opcd==7) {snprintf(buff, sizeof(buff), "FMOV V%d.D[1], %s", Rd, Xt[Rn]);}

-        else if(sf==1 && type==1 && rmode==0 && opcd==6) {snprintf(buff, sizeof(buff), "FMOV %s, S%d", Xt[Rn], Rd);}

-        else if(sf==1 && type==2 && rmode==1 && opcd==6) {snprintf(buff, sizeof(buff), "FMOV %s, V%d.D[1]", Xt[Rn], Rd);}

+        else if(sf==1 && type==1 && rmode==0 && opcd==6) {snprintf(buff, sizeof(buff), "FMOV %s, D%d", Xt[Rd], Rn);}

+        else if(sf==1 && type==2 && rmode==1 && opcd==6) {snprintf(buff, sizeof(buff), "FMOV %s, V%d.D[1]", Xt[Rd], Rn);}

         else snprintf(buff, sizeof(buff), "FMOV ????");

         return buff;

     }

diff --git a/src/dynarec/arm64/dynarec_arm64_db.c b/src/dynarec/arm64/dynarec_arm64_db.c
index e40cc71d..4533c83b 100644
--- a/src/dynarec/arm64/dynarec_arm64_db.c
+++ b/src/dynarec/arm64/dynarec_arm64_db.c
@@ -315,12 +315,63 @@ uintptr_t dynarec64_DB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0);
                         VSTR64_U12(v1, wback, fixedaddress);
                     } else {
+                        #if 0
                         x87_forget(dyn, ninst, x1, x3, 0);
                         addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0);
                         if(ed!=x1) {
                             MOVx_REG(x1, ed);
                         }
                         CALL(arm_fstp, -1);
+                        #else
+                        // Painfully long, straight conversion from the C code, shoud be optimized
+                        v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                        addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, NULL, 0, 0);
+                        FMOVxD(x1, v1);
+                        // do special value first
+                        TSTx_mask(x1, 1, 0b00000, 0b111110);    //0x7fffffffffffffffL
+                        B_MARK(cNE);
+                        // Zero
+                        LSRx(x3, x1, 63-15);    //x3 = sign+exp
+                        MOVZw(x5, 0);           // x5 = mantisse
+                        B_MARK3_nocond;
+                        MARK;
+                        // get sign, in main ouput x5 for sign+exp
+                        ANDx_mask(x5, x1, 1, 1, 0); //0x8000000000000000
+                        LSRx(x5, x5, 63-15);    // x5 = sign
+                        // get exp
+                        LSRx(x3, x1, 52);       // x3 = exp11
+                        ANDw_mask(x3, x3, 0, 0b1010);    //0x7ff
+                        MOV32w(x4, 0x7ff);
+                        CMPSw_REG(x3, x4);
+                        B_MARK2(cNE);
+                        // NaN and Infinite
+                        ORRw_mask(x3, x5, 0, 0b1110);    //x3 = sign | 0x7fff 
+                        TSTx_mask(x1, 1, 0, 0b110011); //0x000fffffffffffffL
+                        ORRx_mask(x5, xZR, 1, 1, 0);    //0x8000000000000000
+                        ORRx_mask(x4, xZR, 1, 0b10, 0b01); //0xc000000000000000
+                        CSELx(x5, x5, x4, cEQ);     // x5 = mantisse
+                        B_MARK3_nocond;
+                        MARK2;
+                        // regular / denormals
+                        ANDx_mask(x1, x1, 1, 0, 0b110011); //0x000fffffffffffffL
+                        LSLx_IMM(x1, x1, 11);   //x1 = mantisse missing "1"
+                        MOVZw(x4, 16383-1023);  //BIAS80 - BIAS64
+                        CBZw(x3, 4+3*4);        // exp11 == 0?
+                        // normals
+                        ADDw_REG(x3, x3, x4);   // x3 = exp16
+                        ORRw_REG(x3, x3, x5);   // x3 = sign | exp
+                        ORRx_mask(x5, x1, 1, 1, 0);    //0x8000000000000000 x5 = mantisse
+                        B_MARK3_nocond;
+                        // denormals
+                        CLZx(x6, x1);
+                        ADDw_U12(x6, x6, 1);    // "one"
+                        SUBw_REG(x3, x4, x6);   // x3 = exp16
+                        ORRw_REG(x3, x3, x5);   // x3 = sign | exp16
+                        LSLx_REG(x5, x1, x6);   // x5 = mantisse
+                        MARK3;
+                        STRx_U12(x5, wback, 0);
+                        STRH_U12(x3, wback, 8);
+                        #endif
                     }
                     x87_do_pop(dyn, ninst, x3);
                     break;