about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-05-30 16:53:39 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-05-30 16:53:39 +0200
commit003a83b40b8645e9215126b883829937db1be967 (patch)
treefa8d0e30e45c15abf17d0f7d9b51a635db09c5c4 /src
parent2f8d28e7dafbd914697a73f37811300922f1552b (diff)
downloadbox64-003a83b40b8645e9215126b883829937db1be967.tar.gz
box64-003a83b40b8645e9215126b883829937db1be967.zip
[ARM64_DYNAREC] Added AVX.66.0F38 18 and AVX.66.0F3A 0C opcodes
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/arm64_emitter.h11
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx.c2
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c86
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c46
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h5
5 files changed, 150 insertions, 0 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h
index e29922db..464693ba 100644
--- a/src/dynarec/arm64/arm64_emitter.h
+++ b/src/dynarec/arm64/arm64_emitter.h
@@ -1483,6 +1483,16 @@ int convert_bitmask(uint64_t bitmask);
 #define VUZP1Q_64(Rt, Rn, Rm)       EMIT(UZP_gen(1, 0b11, Rm, 0, Rn, Rt))
 #define VUZP2Q_64(Rt, Rn, Rm)       EMIT(UZP_gen(1, 0b11, Rm, 1, Rn, Rt))
 
+#define BITBIF_gen(Q, opc2, Rm, Rn, Rd) ((Q)<<30 | 0b101110101<<21 | (Rm)<<16 | 0b000111<<10 | (Rn)<<4 | (Rd))
+// Bitwise insert Vn in Vd if Vm is "0"
+#define VBIF(Vd, Vn,Vm)             EMIT(BITBIF_gen(0, 0b11, Vm, Vn, Vd))
+// Bitwise insert Vn in Vd if Vm is "0"
+#define VBIFQ(Vd, Vn,Vm)            EMIT(BITBIF_gen(1, 0b11, Vm, Vn, Vd))
+// Bitwise insert Vn in Vd if Vm is "1"
+#define VBIT(Vd, Vn,Vm)             EMIT(BITBIF_gen(0, 0b10, Vm, Vn, Vd))
+// Bitwise insert Vn in Vd if Vm is "1"
+#define VBITQ(Vd, Vn,Vm)            EMIT(BITBIF_gen(1, 0b10, Vm, Vn, Vd))
+
 #define DUP_element(Q, imm5, Rn, Rd)    ((Q)<<30 | 0b01110000<<21 | (imm5)<<16 | 1<<10 | (Rn)<<5 | (Rd))
 #define VDUP_8(Vd, Vn, idx)         EMIT(DUP_element(0, ((idx)<<1|1), Vn, Vd))
 #define VDUPQ_8(Vd, Vn, idx)        EMIT(DUP_element(1, ((idx)<<1|1), Vn, Vd))
@@ -1812,6 +1822,7 @@ int convert_bitmask(uint64_t bitmask);
 #define MOVI_vector(Q, op, abc, cmode, defgh, Rd)   ((Q)<<30 | (op)<<29 | 0b0111100000<<19 | (abc)<<16 | (cmode)<<12 | 1<<10 | (defgh)<<5 | (Rd))
 #define MOVIQ_8(Rd, imm8)           EMIT(MOVI_vector(1, 0, (((imm8)>>5)&0b111), 0b1110, ((imm8)&0b11111), Rd))
 #define MOVIQ_16(Rd, imm8, lsl8)    EMIT(MOVI_vector(1, 0, (((imm8)>>5)&0b111), 0b1000|((lsl8)?0b10:0), ((imm8)&0b11111), Rd))
+#define MOVIQ_64(Rd, imm8)          EMIT(MOVI_vector(1, 1, (((imm8)>>5)&0b111), 0b1110, ((imm8)&0b11111), Rd))
 #define MOVI_8(Rd, imm8)            EMIT(MOVI_vector(0, 0, (((imm8)>>5)&0b111), 0b1110, ((imm8)&0b11111), Rd))
 #define MOVI_16(Rd, imm8, lsl8)     EMIT(MOVI_vector(0, 0, (((imm8)>>5)&0b111), 0b1000|((lsl8)?0b10:0), ((imm8)&0b11111), Rd))
 #define MOVI_32(Rd, imm8)           EMIT(MOVI_vector(0, 0, (((imm8)>>5)&0b111), 0b0000, ((imm8)&0b11111), Rd))
diff --git a/src/dynarec/arm64/dynarec_arm64_avx.c b/src/dynarec/arm64/dynarec_arm64_avx.c
index 3150580d..3d25a610 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx.c
@@ -51,6 +51,8 @@ uintptr_t dynarec64_AVX(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ni
 
     if( (vex.m==VEX_M_0F) && (vex.p==VEX_P_NONE))
         addr = dynarec64_AVX_0F(dyn, addr, ip, ninst, vex, ok, need_epilog);
+    else if( (vex.m==VEX_M_0F38) && (vex.p==VEX_P_66))
+        addr = dynarec64_AVX_66_0F38(dyn, addr, ip, ninst, vex, ok, need_epilog);
     else if( (vex.m==VEX_M_0F3A) && (vex.p==VEX_P_66))
         addr = dynarec64_AVX_66_0F3A(dyn, addr, ip, ninst, vex, ok, need_epilog);
     else {DEFAULT;}
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c
new file mode 100644
index 00000000..74e8a20c
--- /dev/null
+++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c
@@ -0,0 +1,86 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <errno.h>
+
+#include "debug.h"
+#include "box64context.h"
+#include "dynarec.h"
+#include "emu/x64emu_private.h"
+#include "emu/x64run_private.h"
+#include "x64run.h"
+#include "x64emu.h"
+#include "box64stack.h"
+#include "callback.h"
+#include "emu/x64run_private.h"
+#include "x64trace.h"
+#include "dynarec_native.h"
+#include "my_cpuid.h"
+#include "emu/x87emu_private.h"
+#include "emu/x64shaext.h"
+
+#include "arm64_printer.h"
+#include "dynarec_arm64_private.h"
+#include "dynarec_arm64_functions.h"
+#include "dynarec_arm64_helper.h"
+
+uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog)
+{
+    (void)ip; (void)need_epilog;
+
+    uint8_t opcode = F8;
+    uint8_t nextop, u8;
+    uint8_t gd, ed;
+    uint8_t wback, wb1, wb2;
+    uint8_t eb1, eb2, gb1, gb2;
+    int32_t i32, i32_;
+    int cacheupd = 0;
+    int v0, v1, v2;
+    int q0, q1, q2;
+    int d0, d1, d2;
+    int s0;
+    uint64_t tmp64u;
+    int64_t j64;
+    int64_t fixedaddress;
+    int unscaled;
+    MAYUSE(wb1);
+    MAYUSE(wb2);
+    MAYUSE(eb1);
+    MAYUSE(eb2);
+    MAYUSE(gb1);
+    MAYUSE(gb2);
+    MAYUSE(q0);
+    MAYUSE(q1);
+    MAYUSE(d0);
+    MAYUSE(d1);
+    MAYUSE(s0);
+    MAYUSE(j64);
+    MAYUSE(cacheupd);
+
+    rex_t rex = vex.rex;
+
+    switch(opcode) {
+
+        case 0x18:
+            INST_NAME("VBROADCASTSS Gx, Ex");
+            nextop = F8;
+            if(MODREG) {
+                v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0);
+            } else {
+                addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                v1 = fpu_get_scratch(dyn, ninst);
+                VLD32(v1, ed, fixedaddress);
+            }
+            GETGX_empty(v0);
+            VDUPQ_32(v0, v1, 0);
+            if(vex.l) {
+                GETGY_empty(v0, -1, -1, -1);
+                VDUPQ_32(v0, v1, 0);
+            } else YMM0(gd);
+            break;
+
+        default:
+            DEFAULT;
+    }
+    return addr;
+}
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c
index b2c2f8b3..12f64243 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c
@@ -61,6 +61,52 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
 
     switch(opcode) {
 
+        case 0x0C:
+            INST_NAME("VPBLENDPS Gx, Vx, Ex, Ib");
+            nextop = F8;
+            GETGX_empty_VXEX(q0, q2, q1, 1);
+            u8 = F8;
+            if(q0==q1) {
+                for(int i=0; i<4; ++i)
+                    if(u8&(1<<i)) {
+                        VMOVeS(q0, i, q1, i);
+                    } else if(q0!=q2)
+                        VMOVeS(q0, i, q2, i);
+            } else {
+                if(q0!=q2)
+                    VMOVQ(q0, q2);
+                if((u8&15)==0b0011) {
+                    VMOVeD(q0, 0, q1, 0);
+                } else if((u8&15)==0b1100) {
+                    VMOVeD(q0, 1, q1, 1);
+                } else for(int i=0; i<4; ++i)
+                    if(u8&(1<<i)) {
+                        VMOVeS(q0, i, q1, i);
+                    }
+            }
+            if(vex.l) {
+                GETGY_empty_VYEY(q0, q2, q1);
+                if(q0==q1) {
+                    for(int i=0; i<4; ++i)
+                        if(u8&(1<<(i+4))) {
+                            VMOVeS(q0, i, q1, i);
+                        } else if(q0!=q2)
+                            VMOVeS(q0, i, q2, i);
+                } else {
+                    if(q0!=q2)
+                        VMOVQ(q0, q2);
+                    if((u8>>4)==0b0011) {
+                        VMOVeD(q0, 0, q1, 0);
+                    } else if((u8>>4)==0b1100) {
+                        VMOVeD(q0, 1, q1, 1);
+                    } else for(int i=0; i<4; ++i)
+                        if(u8&(1<<(i+4))) {
+                            VMOVeS(q0, i, q1, i);
+                        }
+                }
+            } else YMM0(gd);
+            break;
+
         case 0x18:
             INST_NAME("VINSERTF128 Gx, Ex, imm8");
             nextop = F8;
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index f5b4cfe0..f39fe49a 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -462,6 +462,9 @@
     a = ymm_get_reg(dyn, ninst, x1, gd, vex.v, w1, k1, k2); \
     if(!w2) b = ymm_get_reg(dyn, ninst, x1, vex.v, w2, gd, k1, k2)
 
+#define GETGY_empty(a, k1, k2, k3)                          \
+    a = ymm_get_reg_empty(dyn, ninst, x1, gd, k1, k2, k3)
+
 #define GETGY_empty_VY(a, b, w2, k1, k2)                    \
     b = ymm_get_reg(dyn, ninst, x1, vex.v, w2, gd, k1, k2); \
     a = ymm_get_reg_empty(dyn, ninst, x1, gd, vex.v, k1, k2)
@@ -1062,6 +1065,7 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr);
 #define dynarec64_F30F     STEPNAME(dynarec64_F30F)
 #define dynarec64_AVX      STEPNAME(dynarec64_AVX)
 #define dynarec64_AVX_0F   STEPNAME(dynarec64_AVX_0F)
+#define dynarec64_AVX_66_0F38   STEPNAME(dynarec64_AVX_66_0F38)
 #define dynarec64_AVX_66_0F3A   STEPNAME(dynarec64_AVX_66_0F3A)
 
 #define geted           STEPNAME(geted)
@@ -1480,6 +1484,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
 uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog);
 uintptr_t dynarec64_AVX(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
 uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
+uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
 uintptr_t dynarec64_AVX_66_0F3A(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog);
 
 #if STEP < 2