about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-06-01 14:28:32 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-06-01 14:28:32 +0200
commita89d7a2e2a88a10e34293a2aaf7878cd9cce6865 (patch)
tree1d6b58999917aa3f2b51eff9b8ac946a418c3de0 /src
parent0336f6a9872d5c95959a8566d5e0391890761794 (diff)
downloadbox64-a89d7a2e2a88a10e34293a2aaf7878cd9cce6865.tar.gz
box64-a89d7a2e2a88a10e34293a2aaf7878cd9cce6865.zip
[ARM64_DYNAREC] Added AVX.F2.0F 10-12/2A/2C/2D opcodes
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx.c2
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c190
-rw-r--r--src/dynarec/arm64/dynarec_arm64_f20f.c7
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h9
-rw-r--r--src/emu/x64runavxf20f.c4
5 files changed, 205 insertions, 7 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_avx.c b/src/dynarec/arm64/dynarec_arm64_avx.c
index 1dade096..4a89afd1 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx.c
@@ -53,6 +53,8 @@ uintptr_t dynarec64_AVX(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ni
         addr = dynarec64_AVX_0F(dyn, addr, ip, ninst, vex, ok, need_epilog);
     else if( (vex.m==VEX_M_0F) && (vex.p==VEX_P_66))
         addr = dynarec64_AVX_66_0F(dyn, addr, ip, ninst, vex, ok, need_epilog);
+    else if( (vex.m==VEX_M_0F) && (vex.p==VEX_P_F2))
+        addr = dynarec64_AVX_F2_0F(dyn, addr, ip, ninst, vex, ok, need_epilog);
     else if( (vex.m==VEX_M_0F) && (vex.p==VEX_P_F3))
         addr = dynarec64_AVX_F3_0F(dyn, addr, ip, ninst, vex, ok, need_epilog);
     else if( (vex.m==VEX_M_0F38) && (vex.p==VEX_P_66))
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c
new file mode 100644
index 00000000..3cfafcaa
--- /dev/null
+++ b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c
@@ -0,0 +1,190 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <errno.h>
+
+#include "debug.h"
+#include "box64context.h"
+#include "dynarec.h"
+#include "emu/x64emu_private.h"
+#include "emu/x64run_private.h"
+#include "x64run.h"
+#include "x64emu.h"
+#include "box64stack.h"
+#include "callback.h"
+#include "emu/x64run_private.h"
+#include "x64trace.h"
+#include "dynarec_native.h"
+#include "my_cpuid.h"
+#include "emu/x87emu_private.h"
+#include "emu/x64shaext.h"
+
+#include "arm64_printer.h"
+#include "dynarec_arm64_private.h"
+#include "dynarec_arm64_functions.h"
+#include "dynarec_arm64_helper.h"
+
+uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog)
+{
+    (void)ip; (void)need_epilog;
+
+    uint8_t opcode = F8;
+    uint8_t nextop, u8;
+    uint8_t gd, ed, vd;
+    uint8_t wback, wb1, wb2;
+    uint8_t eb1, eb2, gb1, gb2;
+    int32_t i32, i32_;
+    int cacheupd = 0;
+    int v0, v1, v2;
+    int q0, q1, q2;
+    int d0, d1, d2;
+    int s0;
+    uint64_t tmp64u;
+    int64_t j64;
+    int64_t fixedaddress;
+    int unscaled;
+    MAYUSE(wb1);
+    MAYUSE(wb2);
+    MAYUSE(eb1);
+    MAYUSE(eb2);
+    MAYUSE(gb1);
+    MAYUSE(gb2);
+    MAYUSE(q0);
+    MAYUSE(q1);
+    MAYUSE(d0);
+    MAYUSE(d1);
+    MAYUSE(s0);
+    MAYUSE(j64);
+    MAYUSE(cacheupd);
+
+    rex_t rex = vex.rex;
+
+    switch(opcode) {
+
+        case 0x10:
+            INST_NAME("VMOVSD Gx, [Vx,] Ex");
+            nextop = F8;
+            GETG;
+            if(MODREG) {
+                GETGX_empty_VXEX(v0, v2, v1, 0);
+                if(v0!=v1) VMOVeD(v0, 0, v1, 0);
+                if(v0!=v2) VMOVeD(v0, 1, v2, 1);
+            } else {
+                SMREAD();
+                GETGX_empty(v0);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
+                VLD64(v0, ed, fixedaddress); // upper part reseted
+            }
+            YMM0(gd);
+            break;
+        case 0x11:
+            INST_NAME("VMOVSD Ex, Vx, Gx");
+            nextop = F8;
+            GETG;
+            v0 = sse_get_reg(dyn, ninst, x1, gd, 0);
+            if(MODREG) {
+                GETVXEX(v2, 0, v1, 1, 0);
+                if(v0!=v1) VMOVeD(v1, 0, v0, 0);
+                if(v1!=v2) VMOVeD(v1, 1, v2, 1);
+                YMM0((nextop&7)+(rex.b<<3));
+            } else {
+                WILLWRITE2();
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
+                VST64(v0, ed, fixedaddress);
+                SMWRITE2();
+            }
+            break;
+        case 0x12:
+            INST_NAME("VMOVDDUP Gx, Ex");
+            nextop = F8;
+            GETG;
+            if(MODREG) {
+                v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0);
+                GETGX_empty(v0);
+                VDUPQ_64(v0, v1, 0);
+                if(vex.l) {
+                    GETGY_empty_EY(v0, v1);
+                    VDUPQ_64(v0, v1, 0);
+                }
+            } else {
+                SMREAD();
+                v0 = sse_get_reg_empty(dyn, ninst, x1, gd);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
+                VLDQ1R_64(v0, ed);
+                if(vex.l) {
+                    GETGY_empty(v0, -1, -1, -1);
+                    ADDx_U12(x3, ed, 16);
+                    VLDQ1R_64(v0, ed);
+                }
+            }
+            if(!vex.l) YMM0(gd);
+            break;
+
+        case 0x2A:
+            INST_NAME("VCVTSI2SD Gx, Vx, Ed");
+            nextop = F8;
+            GETGX_empty_VX(v0, v1, 0);
+            GETED(0);
+            d1 = fpu_get_scratch(dyn, ninst);
+            if(rex.w) {
+                SCVTFDx(d1, ed);
+            } else {
+                SCVTFDw(d1, ed);
+            }
+            if(v0!=v1) VMOVQ(v0, v1);
+            VMOVeD(v0, 0, d1, 0);
+            YMM0(gd);
+            break;
+
+        case 0x2C:
+            INST_NAME("VCVTTSD2SI Gd, Ex");
+            nextop = F8;
+            GETGD;
+            GETEXSD(q0, 0, 0);
+            if(!box64_dynarec_fastround) {
+                MRS_fpsr(x5);
+                BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
+                MSR_fpsr(x5);
+            }
+            FCVTZSxwD(gd, q0);
+            if(!box64_dynarec_fastround) {
+                MRS_fpsr(x5);   // get back FPSR to check the IOC bit
+                TBZ_NEXT(x5, FPSR_IOC);
+                if(rex.w) {
+                    ORRx_mask(gd, xZR, 1, 1, 0);    //0x8000000000000000
+                } else {
+                    MOV32w(gd, 0x80000000);
+                }
+            }
+            break;
+        case 0x2D:
+            INST_NAME("VCVTSD2SI Gd, Ex");
+            nextop = F8;
+            GETGD;
+            GETEXSD(q0, 0, 0);
+            if(!box64_dynarec_fastround) {
+                MRS_fpsr(x5);
+                BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
+                MSR_fpsr(x5);
+            }
+            u8 = sse_setround(dyn, ninst, x1, x2, x3);
+            d1 = fpu_get_scratch(dyn, ninst);
+            FRINTID(d1, q0);
+            x87_restoreround(dyn, ninst, u8);
+            FCVTZSxwD(gd, d1);
+            if(!box64_dynarec_fastround) {
+                MRS_fpsr(x5);   // get back FPSR to check the IOC bit
+                TBZ_NEXT(x5, FPSR_IOC);
+                if(rex.w) {
+                    ORRx_mask(gd, xZR, 1, 1, 0);    //0x8000000000000000
+                } else {
+                    MOV32w(gd, 0x80000000);
+                }
+            }
+            break;
+
+        default:
+            DEFAULT;
+    }
+    return addr;
+}
diff --git a/src/dynarec/arm64/dynarec_arm64_f20f.c b/src/dynarec/arm64/dynarec_arm64_f20f.c
index 8f8e549b..d7eefb17 100644
--- a/src/dynarec/arm64/dynarec_arm64_f20f.c
+++ b/src/dynarec/arm64/dynarec_arm64_f20f.c
@@ -84,14 +84,13 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             if(MODREG) {

                 d0 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0);

                 v0 = sse_get_reg_empty(dyn, ninst, x1, gd);

-                VMOVeD(v0, 0, d0, 0);

+                VDUPQ_64(v0, d0, 0);

             } else {

                 SMREAD();

                 v0 = sse_get_reg_empty(dyn, ninst, x1, gd);

-                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);

-                VLD64(v0, ed, fixedaddress);

+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);

+                VLDQ1R_64(v0, ed);

             }

-            VMOVeD(v0, 1, v0, 0);

             break;

 

         case 0x2A:

diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index 96d0c6c7..8fd918d6 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -464,6 +464,11 @@
     GETEX_Y(ex, 0, D);                      \
     GETVX_empty(vx)
 
+// Get VX, and EX
+#define GETVXEX(vx, w1, ex, w2, D)          \
+    GETEX_Y(ex, w2, D);                     \
+    GETVX(vx, w1)
+
 #define GETGY_VY(a, w1, b, w2, k1, k2)                      \
     if(w2) b = ymm_get_reg(dyn, ninst, x1, vex.v, w2, gd, k1, k2); \
     a = ymm_get_reg(dyn, ninst, x1, gd, w1, vex.v, k1, k2); \
@@ -1122,7 +1127,8 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr);
 #define dynarec64_AVX      STEPNAME(dynarec64_AVX)
 #define dynarec64_AVX_0F   STEPNAME(dynarec64_AVX_0F)
 #define dynarec64_AVX_66_0F     STEPNAME(dynarec64_AVX_66_0F)
-#define dynarec64_AVX_F3_0F     STEPNAME(dynarec64_AVX_F2_0F38)
+#define dynarec64_AVX_F2_0F     STEPNAME(dynarec64_AVX_F2_0F)
+#define dynarec64_AVX_F3_0F     STEPNAME(dynarec64_AVX_F3_0F)
 #define dynarec64_AVX_66_0F38   STEPNAME(dynarec64_AVX_66_0F38)
 #define dynarec64_AVX_66_0F3A   STEPNAME(dynarec64_AVX_66_0F3A)
 #define dynarec64_AVX_F2_0F38   STEPNAME(dynarec64_AVX_F2_0F38)
@@ -1549,6 +1555,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
 uintptr_t dynarec64_AVX(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
 uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
 uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
+uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
 uintptr_t dynarec64_AVX_F3_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
 uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
 uintptr_t dynarec64_AVX_66_0F3A(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
diff --git a/src/emu/x64runavxf20f.c b/src/emu/x64runavxf20f.c
index 3da16c92..b607fc27 100644
--- a/src/emu/x64runavxf20f.c
+++ b/src/emu/x64runavxf20f.c
@@ -60,7 +60,7 @@ uintptr_t RunAVX_F20F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
 
     switch(opcode) {
 
-        case 0x10:  /* VMOVSD Gx Ex */
+        case 0x10:  /* VMOVSD Gx, Vx, Ex */
             nextop = F8;
             GETEX(0);
             GETGX;
@@ -74,7 +74,7 @@ uintptr_t RunAVX_F20F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             GETGY;
             GY->u128 = 0;
             break;
-        case 0x11:  /* VMOVSD Ex Gx */
+        case 0x11:  /* VMOVSD Ex, Vx, Gx */
             nextop = F8;
             GETEX(0);
             GETGX;