about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2021-03-22 20:09:39 +0100
committerptitSeb <sebastien.chev@gmail.com>2021-03-22 20:09:39 +0100
commitcdaf19899e9760e225a53ddf67e446d671e2fc8b (patch)
tree19a2b88a1735a2b0696869e315e71932f9e057c6 /src
parentb5847d83cbe04809a8b1fe89d8c31745b82f9c43 (diff)
downloadbox64-cdaf19899e9760e225a53ddf67e446d671e2fc8b.tar.gz
box64-cdaf19899e9760e225a53ddf67e446d671e2fc8b.zip
[DYNAREC] ARMv8 NEON doesn't require alignment
Diffstat (limited to 'src')
-rwxr-xr-xsrc/dynarec/dynarec_arm64_0f.c24
-rwxr-xr-xsrc/dynarec/dynarec_arm64_660f.c28
-rwxr-xr-xsrc/dynarec/dynarec_arm64_f20f.c17
-rwxr-xr-xsrc/dynarec/dynarec_arm64_f30f.c32
4 files changed, 25 insertions, 76 deletions
diff --git a/src/dynarec/dynarec_arm64_0f.c b/src/dynarec/dynarec_arm64_0f.c
index d060818a..29298506 100755
--- a/src/dynarec/dynarec_arm64_0f.c
+++ b/src/dynarec/dynarec_arm64_0f.c
@@ -96,11 +96,8 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 VMOVQ(v0, v1);

             } else {

                 v0 = sse_get_reg_empty(dyn, ninst, x1, gd);

-                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xff0<<3, 7, rex, 0, 0);

-                LDRx_U12(x2, ed, fixedaddress);

-                VMOVQDfrom(v0, 0, x2);

-                LDRx_U12(x2, ed, fixedaddress+8);

-                VMOVQDfrom(v0, 1, x2);

+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0);

+                VLDR64_U12(v0, ed, fixedaddress);   // no alignment issue with ARMv8 NEON :)

             }

             break;

         case 0x11:

@@ -113,11 +110,8 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 v1 = sse_get_reg_empty(dyn, ninst, x1, ed);

                 VMOVQ(v1, v0);

             } else {

-                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xff0<<3, 7, rex, 0, 0);

-                VMOVQDto(x2, v0, 0);

-                STRx_U12(x2, ed, fixedaddress);

-                VMOVQDto(x2, v0, 1);

-                STRx_U12(x2, ed, fixedaddress+8);

+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0);

+                VSTR128_U12(v0, ed, fixedaddress);

             }

             break;

 

@@ -182,15 +176,9 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             if(MODREG) {

                 s0 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3));

             } else {

-                parity = getedparity(dyn, ninst, addr, nextop, 3, 0);

                 s0 = fpu_get_scratch(dyn);

-                if(parity) {

-                    addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0);

-                    VLDR32_U12(s0, ed, fixedaddress);

-                } else {

-                    GETED(0);

-                    VMOVQSfrom(s0, 0, ed);

-                }

+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0);

+                VLDR32_U12(s0, ed, fixedaddress);

             }

             FCMPS(v0, s0);

             FCOMI(x1, x2);

diff --git a/src/dynarec/dynarec_arm64_660f.c b/src/dynarec/dynarec_arm64_660f.c
index a168dd24..5b230443 100755
--- a/src/dynarec/dynarec_arm64_660f.c
+++ b/src/dynarec/dynarec_arm64_660f.c
@@ -23,13 +23,13 @@
 #include "dynarec_arm64_helper.h"

 

 // Get EX as a quad

-#define GETEX(a, D)                                                                                     \

-    if(MODREG) {                                                                                        \

-        a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3));                                         \

-    } else {                                                                                            \

-        addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, (1<<4)-1, rex, 0, D);  \

-        a = fpu_get_scratch(dyn);                                                                       \

-        VLDR128_U12(a, ed, fixedaddress);                                                               \

+#define GETEX(a, D)                                                                                 \

+    if(MODREG) {                                                                                    \

+        a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3));                                     \

+    } else {                                                                                        \

+        addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, D);    \

+        a = fpu_get_scratch(dyn);                                                                   \

+        VLDR128_U12(a, ed, fixedaddress);                                                           \

     }

 

 #define GETG        gd = ((nextop&0x38)>>3)+(rex.r<<3)

@@ -169,18 +169,16 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                     ed = xRAX + (nextop&7) + (rex.b<<3);

                     VMOVQDto(ed, v0, 0);

                 } else {

-                    VMOVQDto(x2, v0, 0); // to avoid Bus Error, using regular store

                     addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0);

-                    STRx_U12(x2, ed, fixedaddress);

+                    VSTR64_U12(x2, ed, fixedaddress);

                 }

             } else {

                 if(MODREG) {

                     ed = xRAX + (nextop&7) + (rex.b<<3);

                     VMOVSto(ed, v0, 0);

                 } else {

-                    VMOVSto(x2, v0, 0); // to avoid Bus Error, using regular store

                     addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0);

-                    STRw_U12(x2, ed, fixedaddress);

+                    VSTR32_U12(x2, ed, fixedaddress);

                 }

             }

             break;

@@ -335,18 +333,12 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             nextop = F8;

             GETG;

             v0 = sse_get_reg(dyn, ninst, x1, gd);

-            parity = getedparity(dyn, ninst, addr, nextop, 7, 0);

             if(MODREG) {

                 v1 = sse_get_reg_empty(dyn, ninst, x1, (nextop&7) + (rex.b<<3));

                 FMOVD(v1, v0);

             } else {

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0);

-                if(parity) {

-                    VSTR64_U12(v0, ed, fixedaddress);

-                } else {

-                    VMOVQDto(x2, v0, 0);

-                    STRx_U12(x2, ed, fixedaddress);

-                }

+                VSTR64_U12(v0, ed, fixedaddress);

             }

             break;

 

diff --git a/src/dynarec/dynarec_arm64_f20f.c b/src/dynarec/dynarec_arm64_f20f.c
index 71e0a049..95bca08a 100755
--- a/src/dynarec/dynarec_arm64_f20f.c
+++ b/src/dynarec/dynarec_arm64_f20f.c
@@ -27,16 +27,9 @@
     if(MODREG) {                                                                                    \

         a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3));                                     \

     } else {                                                                                        \

-        parity = getedparity(dyn, ninst, addr, nextop, 7, D);                                       \

         a = fpu_get_scratch(dyn);                                                                   \

-        if(parity) {                                                                                \

-            addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, D); \

-            VLDR64_U12(a, ed, fixedaddress);                                                        \

-        } else {                                                                                    \

-            addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, D); \

-            LDRx_U12(x2, ed, fixedaddress+0);                                                       \

-            VMOVQDfrom(a, 0, x2);                                                                   \

-        }                                                                                           \

+        addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, D);     \

+        VLDR64_U12(a, ed, fixedaddress);                                                            \

     }

 

 #define GETG        gd = ((nextop&0x38)>>3)+(rex.r<<3)

@@ -79,8 +72,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             } else {

                 v0 = sse_get_reg_empty(dyn, ninst, x1, gd);

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0);

-                LDRx_U12(x2, ed, fixedaddress);

-                FMOVDx(v0, x2); // upper part reseted

+                VLDR64_U12(v0, ed, fixedaddress); // upper part reseted

             }

             break;

         case 0x11:

@@ -93,9 +85,8 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 d0 = sse_get_reg(dyn, ninst, x1, ed);

                 VMOV(d0, v0);

             } else {

-                VMOVQDto(x2, v0, 0);

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0);

-                STRx_U12(x2, ed, fixedaddress);

+                VSTR64_U12(v0, ed, fixedaddress);

             }

             break;

 

diff --git a/src/dynarec/dynarec_arm64_f30f.c b/src/dynarec/dynarec_arm64_f30f.c
index 93c32fbb..5b0352f7 100755
--- a/src/dynarec/dynarec_arm64_f30f.c
+++ b/src/dynarec/dynarec_arm64_f30f.c
@@ -27,15 +27,9 @@
     if(MODREG) {                                                                                    \

         a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3));                                     \

     } else {                                                                                        \

-        parity = getedparity(dyn, ninst, addr, nextop, 3, D);                                       \

         a = fpu_get_scratch(dyn);                                                                   \

         addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, D);     \

-        if(parity) {                                                                                \

-            VLDR32_U12(a, ed, fixedaddress);                                                        \

-        } else {                                                                                    \

-            LDRw_U12(x2, ed, fixedaddress);                                                         \

-            VMOVQSfrom(a, 0, x2);                                                                   \

-        }                                                                                           \

+        VLDR32_U12(a, ed, fixedaddress);                                                            \

     }

 

 #define GETG        gd = ((nextop&0x38)>>3)+(rex.r<<3)

@@ -76,15 +70,8 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 VMOVeS(v0, 0, q0, 0);

             } else {

                 v0 = sse_get_reg_empty(dyn, ninst, x1, gd);

-                parity = getedparity(dyn, ninst, addr, nextop, 3, 0);

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0);

-                if(parity) {

-                    VLDR32_U12(v0, ed, fixedaddress);

-                } else {

-                    LDRw_U12(x2, ed, fixedaddress);   // to avoid bus errors

-                    VEORQ(v0, v0, v0);

-                    VMOVQSfrom(v0, 0, x2);

-                }

+                VLDR32_U12(v0, ed, fixedaddress);

             }

             break;

         case 0x11:

@@ -96,14 +83,8 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 q0 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3));

                 VMOVeS(q0, 0, v0, 0);

             } else {

-                parity = getedparity(dyn, ninst, addr, nextop, 3, 0);

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0);

-                if(parity) {

-                    VSTR32_U12(v0, ed, fixedaddress);

-                } else {

-                    VMOVSto(x2, v0, 0);

-                    STRw_U12(x2, ed, fixedaddress);

-                }

+                VSTR32_U12(v0, ed, fixedaddress);

             }

             break;

 

@@ -202,11 +183,8 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 VMOVQ(v1, v0);

             } else {

                 v0 = sse_get_reg(dyn, ninst, x1, gd);

-                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xff0<<3, 7, rex, 0, 0);

-                VMOVQDto(x2, v0, 0);

-                STRx_U12(x2, ed, fixedaddress+0);

-                VMOVQDto(x2, v0, 1);

-                STRx_U12(x2, ed, fixedaddress+8);

+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0);

+                VSTR128_U12(v0, ed, fixedaddress);

             }

             break;