about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rwxr-xr-xsrc/dynarec/arm64_emitter.h8
-rwxr-xr-xsrc/dynarec/dynarec_arm64_f20f.c24
-rwxr-xr-xsrc/dynarec/dynarec_arm64_f30f.c87
3 files changed, 89 insertions, 30 deletions
diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h
index 8078169a..025f381f 100755
--- a/src/dynarec/arm64_emitter.h
+++ b/src/dynarec/arm64_emitter.h
@@ -517,10 +517,14 @@
  
 // VLDR
 #define VMEM_gen(size, opc, imm12, Rn, Rt)  ((size)<<30 | 0b111<<27 | 1<<26 | 0b01<<24 | (opc)<<22 | (imm12)<<10 | (Rn)<<5 | (Rt))
+// imm14 must be 3-aligned
+#define VLDR32_U12(Dt, Rn, imm14)           EMIT(VMEM_gen(0b10, 0b01, ((uint32_t)((imm14)>>2))&0xfff, Rn, Dt))
 // imm15 must be 3-aligned
 #define VLDR64_U12(Dt, Rn, imm15)           EMIT(VMEM_gen(0b11, 0b01, ((uint32_t)((imm15)>>3))&0xfff, Rn, Dt))
 // imm16 must be 4-aligned
 #define VLDR128_U12(Qt, Rn, imm16)          EMIT(VMEM_gen(0b00, 0b11, ((uint32_t)((imm16)>>4))&0xfff, Rn, Qt))
+// (imm14) must be 3-aligned
+#define VSTR32_U12(Dt, Rn, imm14)           EMIT(VMEM_gen(0b10, 0b00, ((uint32_t)(imm14>>2))&0xfff, Rn, Dt))
 // (imm15) must be 3-aligned
 #define VSTR64_U12(Dt, Rn, imm15)           EMIT(VMEM_gen(0b11, 0b00, ((uint32_t)(imm15>>3))&0xfff, Rn, Dt))
 // imm16 must be 4-aligned
@@ -538,11 +542,15 @@
 
 #define VMEM_REG_gen(size, opc, Rm, option, S, Rn, Rt)  ((size)<<30 | 0b111<<27 | 1<<26 | (opc)<<22 | 1<<21 | (Rm)<<16 | (option)<<13 | (S)<<12 | 0b10<<10 | (Rn)<<5 | (Rt))
 
+#define VLDR32_REG(Dt, Rn, Rm)              EMIT(VMEM_REG_gen(0b10, 0b01, Rm, 0b011, 0, Rn, Dt))
+#define VLDR32_REG_LSL3(Dt, Rn, Rm)         EMIT(VMEM_REG_gen(0b10, 0b01, Rm, 0b011, 1, Rn, Dt))
 #define VLDR64_REG(Dt, Rn, Rm)              EMIT(VMEM_REG_gen(0b11, 0b01, Rm, 0b011, 0, Rn, Dt))
 #define VLDR64_REG_LSL3(Dt, Rn, Rm)         EMIT(VMEM_REG_gen(0b11, 0b01, Rm, 0b011, 1, Rn, Dt))
 #define VLDR128_REG(Qt, Rn, Rm)             EMIT(VMEM_REG_gen(0b00, 0b11, Rm, 0b011, 0, Rn, Dt))
 #define VLDR128_REG_LSL4(Qt, Rn, Rm)        EMIT(VMEM_REG_gen(0b00, 0b11, Rm, 0b011, 1, Rn, Dt))
 
+#define VSTR32_REG(Dt, Rn, Rm)              EMIT(VMEM_REG_gen(0b10, 0b00, Rm, 0b011, 0, Rn, Dt))
+#define VSTR32_REG_LSL3(Dt, Rn, Rm)         EMIT(VMEM_REG_gen(0b10, 0b00, Rm, 0b011, 1, Rn, Dt))
 #define VSTR64_REG(Dt, Rn, Rm)              EMIT(VMEM_REG_gen(0b11, 0b00, Rm, 0b011, 0, Rn, Dt))
 #define VSTR64_REG_LSL3(Dt, Rn, Rm)         EMIT(VMEM_REG_gen(0b11, 0b00, Rm, 0b011, 1, Rn, Dt))
 #define VSTR128_REG(Qt, Rn, Rm)             EMIT(VMEM_REG_gen(0b00, 0b10, Rm, 0b011, 0, Rn, Dt))
diff --git a/src/dynarec/dynarec_arm64_f20f.c b/src/dynarec/dynarec_arm64_f20f.c
index 2c5ed5bc..23a23be4 100755
--- a/src/dynarec/dynarec_arm64_f20f.c
+++ b/src/dynarec/dynarec_arm64_f20f.c
@@ -39,7 +39,10 @@
         }                                                                                           \

     }

 

-#define GETGX   gd = ((nextop&0x38)>>3)+(rex.r<<3)

+#define GETG        gd = ((nextop&0x38)>>3)+(rex.r<<3)

+

+#define GETGX(a)    gd = ((nextop&0x38)>>3)+(rex.r<<3); \

+                    a = sse_get_reg(dyn, ninst, x1, gd)

 

 uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog)

 {

@@ -67,7 +70,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
         case 0x10:

             INST_NAME("MOVSD Gx, Ex");

             nextop = F8;

-            GETGX;

+            GETG;

             if(MODREG) {

                 ed = (nextop&7)+ (rex.b<<3);

                 v0 = sse_get_reg(dyn, ninst, x1, gd);

@@ -84,7 +87,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
         case 0x11:

             INST_NAME("MOVSD Ex, Gx");

             nextop = F8;

-            GETGX;

+            GETG;

             v0 = sse_get_reg(dyn, ninst, x1, gd);

             if(MODREG) {

                 ed = (nextop&7)+ (rex.b<<3);

@@ -100,8 +103,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
         case 0x51:

             INST_NAME("SQRTSD Gx, Ex");

             nextop = F8;

-            GETGX;

-            v0 = sse_get_reg(dyn, ninst, x1, gd);

+            GETGX(v0);

             d1 = fpu_get_scratch(dyn);

             GETEX(d0, 0);

             FSQRTD(d1, d0);

@@ -111,8 +113,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
         case 0x58:

             INST_NAME("ADDSD Gx, Ex");

             nextop = F8;

-            GETGX;

-            v0 = sse_get_reg(dyn, ninst, x1, gd);

+            GETGX(v0);

             d1 = fpu_get_scratch(dyn);

             GETEX(d0, 0);

             FADDD(d1, v0, d0);  // the high part of the vector is erased...

@@ -121,8 +122,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
         case 0x59:

             INST_NAME("MULSD Gx, Ex");

             nextop = F8;

-            GETGX;

-            v0 = sse_get_reg(dyn, ninst, x1, gd);

+            GETGX(v0);

             d1 = fpu_get_scratch(dyn);

             GETEX(d0, 0);

             FMULD(d1, v0, d0);

@@ -132,8 +132,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
         case 0x5C:

             INST_NAME("SUBSD Gx, Ex");

             nextop = F8;

-            GETGX;

-            v0 = sse_get_reg(dyn, ninst, x1, gd);

+            GETGX(v0);

             d1 = fpu_get_scratch(dyn);

             GETEX(d0, 0);

             FSUBD(d1, v0, d0);

@@ -143,8 +142,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
         case 0x5E:

             INST_NAME("DIVSD Gx, Ex");

             nextop = F8;

-            GETGX;

-            v0 = sse_get_reg(dyn, ninst, x1, gd);

+            GETGX(v0);

             d1 = fpu_get_scratch(dyn);

             GETEX(d0, 0);

             FDIVD(d1, v0, d0);

diff --git a/src/dynarec/dynarec_arm64_f30f.c b/src/dynarec/dynarec_arm64_f30f.c
index 68b22031..62d5e965 100755
--- a/src/dynarec/dynarec_arm64_f30f.c
+++ b/src/dynarec/dynarec_arm64_f30f.c
@@ -22,24 +22,28 @@
 #include "dynarec_arm64_functions.h"

 #include "dynarec_arm64_helper.h"

 

-// Get Ex as a double, not a quad (warning, x2 and x3 may get used)

-#define GETEX(a) \

-    if((nextop&0xC0)==0xC0) { \

-        a = sse_get_reg(dyn, ninst, x1, nextop&7); \

-    } else {    \

-        parity = getedparity(dyn, ninst, addr, nextop, 3);  \

-        a = fpu_get_scratch_double(dyn);            \

-        if(parity) {                                \

-            addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 1023, 3); \

-            VLDR_64(a, ed, fixedaddress);           \

-        } else {                                    \

-            addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 4095-4, 0);\

-            LDR_IMM9(x2, ed, fixedaddress+0);       \

-            LDR_IMM9(x3, ed, fixedaddress+4);       \

-            VMOVtoV_D(a, x2, x3);                   \

-        }                                           \

+// Get Ex as a single, not a quad (warning, x2 get used)

+#define GETEX(a, D)                                                                                 \

+    if(MODREG) {                                                                                    \

+        a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3));                                     \

+    } else {                                                                                        \

+        parity = getedparity(dyn, ninst, addr, nextop, 3, D);                                       \

+        a = fpu_get_scratch(dyn);                                                                   \

+        if(parity) {                                                                                \

+            addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, D); \

+            VLDR32_U12(a, ed, fixedaddress);                                                        \

+        } else {                                                                                    \

+            addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, D); \

+            LDRw_U12(x2, ed, fixedaddress);                                                         \

+            VMOVQSfrom(a, 0, x2);                                                                   \

+        }                                                                                           \

     }

 

+#define GETG        gd = ((nextop&0x38)>>3)+(rex.r<<3)

+

+#define GETGX(a)    gd = ((nextop&0x38)>>3)+(rex.r<<3); \

+                    a = sse_get_reg(dyn, ninst, x1, gd)

+

 uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog)

 {

     uint8_t opcode = F8;

@@ -50,7 +54,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
     uint8_t eb1, eb2;

     int v0, v1;

     int q0, q1;

-    int d0;

+    int d0, d1;

     int s0;

     int fixedaddress;

     int parity;

@@ -64,6 +68,55 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
     switch(opcode) {

 

 

+        case 0x51:

+            INST_NAME("SQRTSS Gx, Ex");

+            nextop = F8;

+            GETGX(v0);

+            d1 = fpu_get_scratch(dyn);

+            GETEX(d0, 0);

+            FSQRTS(d1, d0);

+            VMOVeS(v0, 0, d1, 0);

+            break;

+

+        case 0x58:

+            INST_NAME("ADDSS Gx, Ex");

+            nextop = F8;

+            GETGX(v0);

+            d1 = fpu_get_scratch(dyn);

+            GETEX(d0, 0);

+            FADDS(d1, v0, d0);  // the high part of the vector is erased...

+            VMOVeS(v0, 0, d1, 0);

+            break;

+        case 0x59:

+            INST_NAME("MULSS Gx, Ex");

+            nextop = F8;

+            GETGX(v0);

+            d1 = fpu_get_scratch(dyn);

+            GETEX(d0, 0);

+            FMULS(d1, v0, d0);

+            VMOVeS(v0, 0, d1, 0);

+            break;

+

+        case 0x5C:

+            INST_NAME("SUBSS Gx, Ex");

+            nextop = F8;

+            GETGX(v0);

+            d1 = fpu_get_scratch(dyn);

+            GETEX(d0, 0);

+            FSUBS(d1, v0, d0);

+            VMOVeS(v0, 0, d1, 0);

+            break;

+

+        case 0x5E:

+            INST_NAME("DIVSS Gx, Ex");

+            nextop = F8;

+            GETGX(v0);

+            d1 = fpu_get_scratch(dyn);

+            GETEX(d0, 0);

+            FDIVS(d1, v0, d0);

+            VMOVeS(v0, 0, d1, 0);

+            break;

+            

         default:

             DEFAULT;

     }