about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-05-30 17:39:46 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-05-30 17:39:46 +0200
commitd1703b9bc044152a15e74bf1650a2d63dd0fd153 (patch)
treef18bf69fa6231aaa632773a02ab481826edd5449 /src
parenteb9da7417f801ecb61e7b8eb18eac091683b651b (diff)
downloadbox64-d1703b9bc044152a15e74bf1650a2d63dd0fd153.tar.gz
box64-d1703b9bc044152a15e74bf1650a2d63dd0fd153.zip
[ARM64_DYNAREC] Added AVX.66.0F 5B/6F opcodes
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx.c2
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_66_0f.c150
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h18
3 files changed, 166 insertions, 4 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_avx.c b/src/dynarec/arm64/dynarec_arm64_avx.c
index 3d25a610..56236005 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx.c
@@ -51,6 +51,8 @@ uintptr_t dynarec64_AVX(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ni
 
     if( (vex.m==VEX_M_0F) && (vex.p==VEX_P_NONE))
         addr = dynarec64_AVX_0F(dyn, addr, ip, ninst, vex, ok, need_epilog);
+    else if( (vex.m==VEX_M_0F) && (vex.p==VEX_P_66))
+        addr = dynarec64_AVX_66_0F(dyn, addr, ip, ninst, vex, ok, need_epilog);
     else if( (vex.m==VEX_M_0F38) && (vex.p==VEX_P_66))
         addr = dynarec64_AVX_66_0F38(dyn, addr, ip, ninst, vex, ok, need_epilog);
     else if( (vex.m==VEX_M_0F3A) && (vex.p==VEX_P_66))
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c
new file mode 100644
index 00000000..d3cf8eb9
--- /dev/null
+++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c
@@ -0,0 +1,150 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <errno.h>
+
+#include "debug.h"
+#include "box64context.h"
+#include "dynarec.h"
+#include "emu/x64emu_private.h"
+#include "emu/x64run_private.h"
+#include "x64run.h"
+#include "x64emu.h"
+#include "box64stack.h"
+#include "callback.h"
+#include "emu/x64run_private.h"
+#include "x64trace.h"
+#include "dynarec_native.h"
+#include "my_cpuid.h"
+#include "emu/x87emu_private.h"
+#include "emu/x64shaext.h"
+
+#include "arm64_printer.h"
+#include "dynarec_arm64_private.h"
+#include "dynarec_arm64_functions.h"
+#include "dynarec_arm64_helper.h"
+
+uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog)
+{
+    (void)ip; (void)need_epilog;
+
+    uint8_t opcode = F8;
+    uint8_t nextop, u8;
+    uint8_t gd, ed;
+    uint8_t wback, wb1, wb2;
+    uint8_t eb1, eb2, gb1, gb2;
+    int32_t i32, i32_;
+    int cacheupd = 0;
+    int v0, v1, v2;
+    int q0, q1, q2;
+    int d0, d1, d2;
+    int s0;
+    uint64_t tmp64u;
+    int64_t j64;
+    int64_t fixedaddress;
+    int unscaled;
+    MAYUSE(wb1);
+    MAYUSE(wb2);
+    MAYUSE(eb1);
+    MAYUSE(eb2);
+    MAYUSE(gb1);
+    MAYUSE(gb2);
+    MAYUSE(q0);
+    MAYUSE(q1);
+    MAYUSE(d0);
+    MAYUSE(d1);
+    MAYUSE(s0);
+    MAYUSE(j64);
+    MAYUSE(cacheupd);
+
+    rex_t rex = vex.rex;
+
+    switch(opcode) {
+
+        case 0x5B:
+            INST_NAME("VCVTPS2DQ Gx, Ex");
+            nextop = F8;
+            GETEX(v1, 0, 0);
+            GETGX_empty(v0);
+            if(box64_dynarec_fastround) {
+                u8 = sse_setround(dyn, ninst, x1, x2, x3);
+                VFRINTISQ(v0, v1);
+                if(!vex.l) x87_restoreround(dyn, ninst, u8);
+                VFCVTZSQS(v0, v0);
+            } else {
+                MRS_fpsr(x5);
+                BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
+                MSR_fpsr(x5);
+                u8 = sse_setround(dyn, ninst, x1, x2, x3);
+                MOV32w(x4, 0x80000000);
+                d0 = fpu_get_scratch(dyn, ninst);
+                for(int i=0; i<4; ++i) {
+                    BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
+                    MSR_fpsr(x5);
+                    VMOVeS(d0, 0, v1, i);
+                    FRINTIS(d0, d0);
+                    VFCVTZSs(d0, d0);
+                    MRS_fpsr(x5);   // get back FPSR to check the IOC bit
+                    TBZ(x5, FPSR_IOC, 4+4);
+                    VMOVQSfrom(d0, 0, x4);
+                    VMOVeS(v0, i, d0, 0);
+                }
+                if(!vex.l) x87_restoreround(dyn, ninst, u8);
+            }
+            if(vex.l) {
+                GETGY_empty_EY(v0, v1);
+                if(box64_dynarec_fastround) {
+                    VFRINTISQ(v0, v1);
+                    x87_restoreround(dyn, ninst, u8);
+                    VFCVTZSQS(v0, v0);
+                } else {
+                    MRS_fpsr(x5);
+                    BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
+                    MSR_fpsr(x5);
+                    MOV32w(x4, 0x80000000);
+                    d0 = fpu_get_scratch(dyn, ninst);
+                    for(int i=0; i<4; ++i) {
+                        BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
+                        MSR_fpsr(x5);
+                        VMOVeS(d0, 0, v1, i);
+                        FRINTIS(d0, d0);
+                        VFCVTZSs(d0, d0);
+                        MRS_fpsr(x5);   // get back FPSR to check the IOC bit
+                        TBZ(x5, FPSR_IOC, 4+4);
+                        VMOVQSfrom(d0, 0, x4);
+                        VMOVeS(v0, i, d0, 0);
+                    }
+                    x87_restoreround(dyn, ninst, u8);
+                }
+            } else YMM0(gd);
+            break;
+
+        case 0x6F:
+            INST_NAME("MOVDQA Gx,Ex");
+            nextop = F8;
+            if(MODREG) {
+                v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0);
+                GETGX_empty(v0);
+                VMOVQ(v0, v1);
+                if(vex.l) {
+                    GETGY_empty_EY(v0, v1);
+                    VMOVQ(v0, v1);
+                }
+            } else {
+                GETGX_empty(v0);
+                SMREAD();
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0);
+                VLD128(v0, ed, fixedaddress);
+                if(vex.l) {
+                    GETGY_empty(v0, -1, -1, -1);
+                    VLD128(v0, ed, fixedaddress+16);
+                }
+            }
+            if(!vex.l) YMM0(gd);
+            break;
+
+        default:
+            DEFAULT;
+    }
+    return addr;
+}
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index 073d6d4b..b9f4f01a 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -476,13 +476,21 @@
     GETGX_empty(gx)
 
 // Get empty GY, and non-writen VY and EY
-#define GETGY_empty_VYEY(gy, vy, ey)                                                \
-    vy = ymm_get_reg(dyn, ninst, x1, vex.v, 0, gd, (nextop&7)+(rex.b<<3), -1);      \
+#define GETGY_empty_VYEY(gy, vy, ey)                                                            \
+    vy = ymm_get_reg(dyn, ninst, x1, vex.v, 0, gd, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1);    \
+    if(MODREG)                                                                                  \
+        ey = ymm_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0, gd, vex.v, -1);              \
+    else                                                                                        \
+        VLD128(ey, ed, fixedaddress+16);                                                        \
+    gy = ymm_get_reg_empty(dyn, ninst, x1, gd, vex.v, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1)
+
+// Get empty GY, and non-writen EY
+#define GETGY_empty_EY(gy, ey)                                                      \
     if(MODREG)                                                                      \
-        ey = ymm_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0, gd, vex.v, -1);  \
+        ey = ymm_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0, gd, -1, -1);     \
     else                                                                            \
         VLD128(ey, ed, fixedaddress+16);                                            \
-    gy = ymm_get_reg_empty(dyn, ninst, x1, gd, vex.v, (nextop&7)+(rex.b<<3), -1)
+    gy = ymm_get_reg_empty(dyn, ninst, x1, gd, -1, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1)
 
 // Get EX as a quad, (x1 is used)
 #define GETEX_Y(a, w, D)                                                                                \
@@ -1065,6 +1073,7 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr);
 #define dynarec64_F30F     STEPNAME(dynarec64_F30F)
 #define dynarec64_AVX      STEPNAME(dynarec64_AVX)
 #define dynarec64_AVX_0F   STEPNAME(dynarec64_AVX_0F)
+#define dynarec64_AVX_66_0F     STEPNAME(dynarec64_AVX_66_0F)
 #define dynarec64_AVX_66_0F38   STEPNAME(dynarec64_AVX_66_0F38)
 #define dynarec64_AVX_66_0F3A   STEPNAME(dynarec64_AVX_66_0F3A)
 
@@ -1484,6 +1493,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
 uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog);
 uintptr_t dynarec64_AVX(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
 uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
+uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
 uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
 uintptr_t dynarec64_AVX_66_0F3A(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);