about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-10-11 16:03:50 +0200
committerptitSeb <sebastien.chev@gmail.com>2023-10-11 16:03:50 +0200
commit3e882f3fd9c58a4a864b5a42b38db9288347e231 (patch)
tree6c02d35975eb80fea0c617622f826adfa3476bca /src
parent0499c38466ca094b230ec6c825ec96ddcd987892 (diff)
downloadbox64-3e882f3fd9c58a4a864b5a42b38db9288347e231.tar.gz
box64-3e882f3fd9c58a4a864b5a42b38db9288347e231.zip
[ARM64_DYNAREC] Fixed 66 0F 38 20..25/30..35 to only read needed amount of data instead of 128bits (fixes some border case issue)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_660f.c25
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h17
2 files changed, 30 insertions, 12 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c
index 5ca4734d..6dc2f3c0 100644
--- a/src/dynarec/arm64/dynarec_arm64_660f.c
+++ b/src/dynarec/arm64/dynarec_arm64_660f.c
@@ -537,14 +537,14 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 case 0x20:

                     INST_NAME("PMOVSXBW Gx, Ex");  // SSE4 opcode!

                     nextop = F8;

-                    GETEX(q1, 0, 0);

+                    GETEX64(q1, 0, 0);

                     GETGX_empty(q0);

                     SXTL_8(q0, q1);     // 8bits->16bits

                     break;

                 case 0x21:

                     INST_NAME("PMOVSXBD Gx, Ex");  // SSE4 opcode!

                     nextop = F8;

-                    GETEX(q1, 0, 0);

+                    GETEX32(q1, 0, 0);

                     GETGX_empty(q0);

                     SXTL_8(q0, q1);     // 8bits->16bits

                     SXTL_16(q0, q0);    //16bits->32bits

@@ -552,7 +552,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 case 0x22:

                     INST_NAME("PMOVSXBQ Gx, Ex");  // SSE4 opcode!

                     nextop = F8;

-                    GETEX(q1, 0, 0);

+                    GETEX16(q1, 0, 0);

                     GETGX_empty(q0);

                     SXTL_8(q0, q1);     // 8bits->16bits

                     SXTL_16(q0, q0);    //16bits->32bits

@@ -561,14 +561,14 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 case 0x23:

                     INST_NAME("PMOVSXWD Gx, Ex");  // SSE4 opcode!

                     nextop = F8;

-                    GETEX(q1, 0, 0);

+                    GETEX64(q1, 0, 0);

                     GETGX_empty(q0);

                     SXTL_16(q0, q1);     // 16bits->32bits

                     break;

                 case 0x24:

                     INST_NAME("PMOVSXWQ Gx, Ex");  // SSE4 opcode!

                     nextop = F8;

-                    GETEX(q1, 0, 0);

+                    GETEX32(q1, 0, 0);

                     GETGX_empty(q0);

                     SXTL_16(q0, q1);     // 16bits->32bits

                     SXTL_32(q0, q0);     // 32bits->64bits

@@ -576,7 +576,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 case 0x25:

                     INST_NAME("PMOVSXDQ Gx, Ex");  // SSE4 opcode!

                     nextop = F8;

-                    GETEX(q1, 0, 0);

+                    GETEX64(q1, 0, 0);

                     GETGX_empty(q0);

                     SXTL_32(q0, q1);     // 32bits->64bits

                     break;

@@ -633,14 +633,14 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 case 0x30:

                     INST_NAME("PMOVZXBW Gx, Ex");  // SSE4 opcode!

                     nextop = F8;

-                    GETEX(q1, 0, 0);

+                    GETEX64(q1, 0, 0);

                     GETGX_empty(q0);

                     UXTL_8(q0, q1);     // 8bits->16bits

                     break;

                 case 0x31:

                     INST_NAME("PMOVZXBD Gx, Ex");  // SSE4 opcode!

                     nextop = F8;

-                    GETEX(q1, 0, 0);

+                    GETEX32(q1, 0, 0);

                     GETGX_empty(q0);

                     UXTL_8(q0, q1);     // 8bits->16bits

                     UXTL_16(q0, q0);    //16bits->32bits

@@ -648,7 +648,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 case 0x32:

                     INST_NAME("PMOVZXBQ Gx, Ex");  // SSE4 opcode!

                     nextop = F8;

-                    GETEX(q1, 0, 0);

+                    GETEX16(q1, 0, 0);

                     GETGX_empty(q0);

                     UXTL_8(q0, q1);     // 8bits->16bits

                     UXTL_16(q0, q0);    //16bits->32bits

@@ -657,14 +657,14 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 case 0x33:

                     INST_NAME("PMOVZXWD Gx, Ex");  // SSE4 opcode!

                     nextop = F8;

-                    GETEX(q1, 0, 0);

+                    GETEX64(q1, 0, 0);

                     GETGX_empty(q0);

                     UXTL_16(q0, q1);     // 16bits->32bits

                     break;

                 case 0x34:

                     INST_NAME("PMOVZXWQ Gx, Ex");  // SSE4 opcode!

                     nextop = F8;

-                    GETEX(q1, 0, 0);

+                    GETEX32(q1, 0, 0);

                     GETGX_empty(q0);

                     UXTL_16(q0, q1);     // 16bits->32bits

                     UXTL_32(q0, q0);     // 32bits->64bits

@@ -672,10 +672,11 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 case 0x35:

                     INST_NAME("PMOVZXDQ Gx, Ex");  // SSE4 opcode!

                     nextop = F8;

-                    GETEX(q1, 0, 0);

+                    GETEX64(q1, 0, 0);

                     GETGX_empty(q0);

                     UXTL_32(q0, q1);     // 32bits->64bits

                     break;

+

                 case 0x37:

                     INST_NAME("PCMPGTQ Gx, Ex"); // SSE4 opcode!

                     nextop = F8;

diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index 792e7e9f..0ff17756 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -422,6 +422,9 @@
         VLD64(a, ed, fixedaddress);                                                                     \
     }
 
+// Get Ex as 64bits, not a quad (warning, x1 get used)
+#define GETEX64(a, w, D)    GETEXSD(a, w, D)
+
 // Get Ex as a single, not a quad (warning, x1 get used)
 #define GETEXSS(a, w, D)                                                                                \
     if(MODREG) {                                                                                        \
@@ -433,6 +436,20 @@
         VLD32(a, ed, fixedaddress);                                                                     \
     }
 
+// Get Ex as 32bits, not a quad (warning, x1 get used)
+#define GETEX32(a, w, D)    GETEXSS(a, w, D)
+
+// Get Ex as 16bits, not a quad (warning, x1 get used)
+#define GETEX16(a, w, D)                                                                                \
+    if(MODREG) {                                                                                        \
+        a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w);                                      \
+    } else {                                                                                            \
+        SMREAD();                                                                                       \
+        a = fpu_get_scratch(dyn);                                                                       \
+        addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, D);   \
+        VLD16(a, ed, fixedaddress);                                                                     \
+    }
+
 // Get GM, might use x1, x2 and x3
 #define GETGM(a)                        \
     gd = ((nextop&0x38)>>3);            \