diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-05-30 16:53:39 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-05-30 16:53:39 +0200 |
| commit | 003a83b40b8645e9215126b883829937db1be967 (patch) | |
| tree | fa8d0e30e45c15abf17d0f7d9b51a635db09c5c4 /src | |
| parent | 2f8d28e7dafbd914697a73f37811300922f1552b (diff) | |
| download | box64-003a83b40b8645e9215126b883829937db1be967.tar.gz box64-003a83b40b8645e9215126b883829937db1be967.zip | |
[ARM64_DYNAREC] Added AVX.66.0F38 18 and AVX.66.0F3A 0C opcodes
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/arm64_emitter.h | 11 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx.c | 2 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c | 86 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c | 46 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.h | 5 |
5 files changed, 150 insertions, 0 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index e29922db..464693ba 100644 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -1483,6 +1483,16 @@ int convert_bitmask(uint64_t bitmask); #define VUZP1Q_64(Rt, Rn, Rm) EMIT(UZP_gen(1, 0b11, Rm, 0, Rn, Rt)) #define VUZP2Q_64(Rt, Rn, Rm) EMIT(UZP_gen(1, 0b11, Rm, 1, Rn, Rt)) +#define BITBIF_gen(Q, opc2, Rm, Rn, Rd) ((Q)<<30 | 0b101110101<<21 | (Rm)<<16 | 0b000111<<10 | (Rn)<<4 | (Rd)) +// Bitwise insert Vn in Vd if Vm is "0" +#define VBIF(Vd, Vn,Vm) EMIT(BITBIF_gen(0, 0b11, Vm, Vn, Vd)) +// Bitwise insert Vn in Vd if Vm is "0" +#define VBIFQ(Vd, Vn,Vm) EMIT(BITBIF_gen(1, 0b11, Vm, Vn, Vd)) +// Bitwise insert Vn in Vd if Vm is "1" +#define VBIT(Vd, Vn,Vm) EMIT(BITBIF_gen(0, 0b10, Vm, Vn, Vd)) +// Bitwise insert Vn in Vd if Vm is "1" +#define VBITQ(Vd, Vn,Vm) EMIT(BITBIF_gen(1, 0b10, Vm, Vn, Vd)) + #define DUP_element(Q, imm5, Rn, Rd) ((Q)<<30 | 0b01110000<<21 | (imm5)<<16 | 1<<10 | (Rn)<<5 | (Rd)) #define VDUP_8(Vd, Vn, idx) EMIT(DUP_element(0, ((idx)<<1|1), Vn, Vd)) #define VDUPQ_8(Vd, Vn, idx) EMIT(DUP_element(1, ((idx)<<1|1), Vn, Vd)) @@ -1812,6 +1822,7 @@ int convert_bitmask(uint64_t bitmask); #define MOVI_vector(Q, op, abc, cmode, defgh, Rd) ((Q)<<30 | (op)<<29 | 0b0111100000<<19 | (abc)<<16 | (cmode)<<12 | 1<<10 | (defgh)<<5 | (Rd)) #define MOVIQ_8(Rd, imm8) EMIT(MOVI_vector(1, 0, (((imm8)>>5)&0b111), 0b1110, ((imm8)&0b11111), Rd)) #define MOVIQ_16(Rd, imm8, lsl8) EMIT(MOVI_vector(1, 0, (((imm8)>>5)&0b111), 0b1000|((lsl8)?0b10:0), ((imm8)&0b11111), Rd)) +#define MOVIQ_64(Rd, imm8) EMIT(MOVI_vector(1, 1, (((imm8)>>5)&0b111), 0b1110, ((imm8)&0b11111), Rd)) #define MOVI_8(Rd, imm8) EMIT(MOVI_vector(0, 0, (((imm8)>>5)&0b111), 0b1110, ((imm8)&0b11111), Rd)) #define MOVI_16(Rd, imm8, lsl8) EMIT(MOVI_vector(0, 0, (((imm8)>>5)&0b111), 0b1000|((lsl8)?0b10:0), ((imm8)&0b11111), Rd)) #define MOVI_32(Rd, imm8) EMIT(MOVI_vector(0, 0, (((imm8)>>5)&0b111), 0b0000, ((imm8)&0b11111), Rd)) diff --git a/src/dynarec/arm64/dynarec_arm64_avx.c b/src/dynarec/arm64/dynarec_arm64_avx.c index 3150580d..3d25a610 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx.c +++ b/src/dynarec/arm64/dynarec_arm64_avx.c @@ -51,6 +51,8 @@ uintptr_t dynarec64_AVX(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ni if( (vex.m==VEX_M_0F) && (vex.p==VEX_P_NONE)) addr = dynarec64_AVX_0F(dyn, addr, ip, ninst, vex, ok, need_epilog); + else if( (vex.m==VEX_M_0F38) && (vex.p==VEX_P_66)) + addr = dynarec64_AVX_66_0F38(dyn, addr, ip, ninst, vex, ok, need_epilog); else if( (vex.m==VEX_M_0F3A) && (vex.p==VEX_P_66)) addr = dynarec64_AVX_66_0F3A(dyn, addr, ip, ninst, vex, ok, need_epilog); else {DEFAULT;} diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c new file mode 100644 index 00000000..74e8a20c --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c @@ -0,0 +1,86 @@ +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <errno.h> + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" +#include "my_cpuid.h" +#include "emu/x87emu_private.h" +#include "emu/x64shaext.h" + +#include "arm64_printer.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_functions.h" +#include "dynarec_arm64_helper.h" + +uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog) +{ + (void)ip; (void)need_epilog; + + uint8_t opcode = F8; + uint8_t nextop, u8; + uint8_t gd, ed; + uint8_t wback, wb1, wb2; + uint8_t eb1, eb2, gb1, gb2; + int32_t i32, i32_; + int cacheupd = 0; + int v0, v1, v2; + int q0, q1, q2; + int d0, d1, d2; + int s0; + uint64_t tmp64u; + int64_t j64; + int64_t fixedaddress; + int unscaled; + MAYUSE(wb1); + MAYUSE(wb2); + MAYUSE(eb1); + MAYUSE(eb2); + MAYUSE(gb1); + MAYUSE(gb2); + MAYUSE(q0); + MAYUSE(q1); + MAYUSE(d0); + MAYUSE(d1); + MAYUSE(s0); + MAYUSE(j64); + MAYUSE(cacheupd); + + rex_t rex = vex.rex; + + switch(opcode) { + + case 0x18: + INST_NAME("VBROADCASTSS Gx, Ex"); + nextop = F8; + if(MODREG) { + v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); + v1 = fpu_get_scratch(dyn, ninst); + VLD32(v1, ed, fixedaddress); + } + GETGX_empty(v0); + VDUPQ_32(v0, v1, 0); + if(vex.l) { + GETGY_empty(v0, -1, -1, -1); + VDUPQ_32(v0, v1, 0); + } else YMM0(gd); + break; + + default: + DEFAULT; + } + return addr; +} diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c index b2c2f8b3..12f64243 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c @@ -61,6 +61,52 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip switch(opcode) { + case 0x0C: + INST_NAME("VPBLENDPS Gx, Vx, Ex, Ib"); + nextop = F8; + GETGX_empty_VXEX(q0, q2, q1, 1); + u8 = F8; + if(q0==q1) { + for(int i=0; i<4; ++i) + if(u8&(1<<i)) { + VMOVeS(q0, i, q1, i); + } else if(q0!=q2) + VMOVeS(q0, i, q2, i); + } else { + if(q0!=q2) + VMOVQ(q0, q2); + if((u8&15)==0b0011) { + VMOVeD(q0, 0, q1, 0); + } else if((u8&15)==0b1100) { + VMOVeD(q0, 1, q1, 1); + } else for(int i=0; i<4; ++i) + if(u8&(1<<i)) { + VMOVeS(q0, i, q1, i); + } + } + if(vex.l) { + GETGY_empty_VYEY(q0, q2, q1); + if(q0==q1) { + for(int i=0; i<4; ++i) + if(u8&(1<<(i+4))) { + VMOVeS(q0, i, q1, i); + } else if(q0!=q2) + VMOVeS(q0, i, q2, i); + } else { + if(q0!=q2) + VMOVQ(q0, q2); + if((u8>>4)==0b0011) { + VMOVeD(q0, 0, q1, 0); + } else if((u8>>4)==0b1100) { + VMOVeD(q0, 1, q1, 1); + } else for(int i=0; i<4; ++i) + if(u8&(1<<(i+4))) { + VMOVeS(q0, i, q1, i); + } + } + } else YMM0(gd); + break; + case 0x18: INST_NAME("VINSERTF128 Gx, Ex, imm8"); nextop = F8; diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index f5b4cfe0..f39fe49a 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -462,6 +462,9 @@ a = ymm_get_reg(dyn, ninst, x1, gd, vex.v, w1, k1, k2); \ if(!w2) b = ymm_get_reg(dyn, ninst, x1, vex.v, w2, gd, k1, k2) +#define GETGY_empty(a, k1, k2, k3) \ + a = ymm_get_reg_empty(dyn, ninst, x1, gd, k1, k2, k3) + #define GETGY_empty_VY(a, b, w2, k1, k2) \ b = ymm_get_reg(dyn, ninst, x1, vex.v, w2, gd, k1, k2); \ a = ymm_get_reg_empty(dyn, ninst, x1, gd, vex.v, k1, k2) @@ -1062,6 +1065,7 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr); #define dynarec64_F30F STEPNAME(dynarec64_F30F) #define dynarec64_AVX STEPNAME(dynarec64_AVX) #define dynarec64_AVX_0F STEPNAME(dynarec64_AVX_0F) +#define dynarec64_AVX_66_0F38 STEPNAME(dynarec64_AVX_66_0F38) #define dynarec64_AVX_66_0F3A STEPNAME(dynarec64_AVX_66_0F3A) #define geted STEPNAME(geted) @@ -1480,6 +1484,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog); uintptr_t dynarec64_AVX(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog); uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog); +uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog); uintptr_t dynarec64_AVX_66_0F3A(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog); #if STEP < 2 |