diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-05-26 09:21:04 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-05-26 09:21:04 +0200 |
| commit | d09a48fd03466e7160752d8ed17c9df3976d11f9 (patch) | |
| tree | c862d8ae9e7df0b439645b08ed4e5265c174b908 | |
| parent | b5c0a857ca18fa91d0416a35e5ea9c7acf790d79 (diff) | |
| download | box64-d09a48fd03466e7160752d8ed17c9df3976d11f9.tar.gz box64-d09a48fd03466e7160752d8ed17c9df3976d11f9.zip | |
[INTERPRETER] my first avx opcode
| -rw-r--r-- | CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/core.c | 9 | ||||
| -rw-r--r-- | src/emu/x64run.c | 76 | ||||
| -rw-r--r-- | src/emu/x64run_private.h | 19 | ||||
| -rw-r--r-- | src/emu/x64runavx.c | 72 | ||||
| -rw-r--r-- | src/emu/x87emu_private.c | 21 | ||||
| -rw-r--r-- | src/emu/x87emu_private.h | 1 | ||||
| -rw-r--r-- | src/libtools/signals.c | 13 | ||||
| -rw-r--r-- | src/tools/my_cpuid.c | 2 | ||||
| -rw-r--r-- | src/tools/rcfile.c | 1 | ||||
| -rw-r--r-- | src/wrapped/wrappedlibc.c | 2 |
11 files changed, 198 insertions, 19 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 158130d7..5c0299a4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -395,6 +395,7 @@ set(INTERPRETER "${BOX64_ROOT}/src/emu/x64runf0.c" "${BOX64_ROOT}/src/emu/x64runf20f.c" "${BOX64_ROOT}/src/emu/x64runf30f.c" + "${BOX64_ROOT}/src/emu/x64runavx.c" ) if(STATICBUILD) diff --git a/src/core.c b/src/core.c index 48eebd1d..928a056d 100644 --- a/src/core.c +++ b/src/core.c @@ -993,6 +993,15 @@ void LoadLogEnv() if(!box64_sse42) printf_log(LOG_INFO, "Do not expose SSE 4.2 capabilities\n"); } + p = getenv("BOX64_AVX"); + if(p) { + if(strlen(p)==1) { + if(p[0]>='0' && p[0]<='0'+1) + box64_avx = p[0]-'0'; + } + if(box64_avx) + printf_log(LOG_INFO, "Will expose AVX capabilities\n"); + } p = getenv("BOX64_FIX_64BIT_INODES"); if(p) { if(strlen(p)==1) { diff --git a/src/emu/x64run.c b/src/emu/x64run.c index d7950424..ef2f550a 100644 --- a/src/emu/x64run.c +++ b/src/emu/x64run.c @@ -28,6 +28,27 @@ int my_setcontext(x64emu_t* emu, void* ucp); +static const char* avx_prefix_string(uint16_t p) +{ + switch(p) { + case 0: return "0"; + case 1: return "66"; + case 2: return "F3"; + case 3: return "F2"; + default: return "??"; + } +} +static const char* avx_map_string(uint16_t m) +{ + switch(m) { + case 0: return "0"; + case 1: return "0F"; + case 2: return "0F38"; + case 3: return "0F3A"; + default: return "??"; + } +} + #ifdef TEST_INTERPRETER int RunTest(x64test_t *test) #else @@ -1353,8 +1374,32 @@ x64emurun: emu->segs_serial[_ES] = 0; GD->dword[0] = *(uint32_t*)ED; } else { - // AVX not supported yet - emit_signal(emu, SIGILL, (void*)R_RIP, 0); + vex_t vex = {0}; + vex.rex = rex; + tmp8u = F8; + vex.m = tmp8u&0b00011111; + vex.rex.b = (tmp8u&0b00100000)?0:1; + vex.rex.x = (tmp8u&0b01000000)?0:1; + vex.rex.r = (tmp8u&0b10000000)?0:1; + tmp8u = F8; + vex.p = tmp8u&0b00000011; + vex.l = (tmp8u>>2)&1; + vex.v = ((~tmp8u)>>3)&0b1111; + vex.rex.w = (tmp8u>>7)&1; + #ifdef TEST_INTERPRETER + if(!(addr = TestAVX(test, vex, addr, &step))) + unimp = 1; + #else + if(!(addr = RunAVX(emu, vex, addr, &step))) { + printf_log(LOG_NONE, "Unimplemented AVX opcode prefix %s map %s ", avx_prefix_string(vex.p), avx_prefix_string(vex.m)); + unimp = 1; + goto fini; + } + if(step==2) { + STEP2; + } + #endif + break; } break; case 0xC5: /* LDS Gd,Ed */ @@ -1366,8 +1411,31 @@ x64emurun: emu->segs_serial[_DS] = 0; GD->dword[0] = *(uint32_t*)ED; } else { - // AVX not supported yet - emit_signal(emu, SIGILL, (void*)R_RIP, 0); + vex_t vex = {0}; + vex.rex = rex; + tmp8u = F8; + vex.p = tmp8u&0b00000011; + vex.l = (tmp8u>>2)&1; + vex.v = ((~tmp8u)>>3)&0b1111; + vex.rex.r = (tmp8u&0b10000000)?0:1; + vex.rex.b = 0; + vex.rex.x = 0; + vex.rex.w = 0; + vex.m = VEX_M_0F; + #ifdef TEST_INTERPRETER + if(!(addr = TestAVX(test, vex, addr, &step))) + unimp = 1; + #else + if(!(addr = RunAVX(emu, vex, addr, &step))) { + printf_log(LOG_NONE, "Unimplemented AVX opcode prefix %s map %s ", avx_prefix_string(vex.p), avx_map_string(vex.m)); + unimp = 1; + goto fini; + } + if(step==2) { + STEP2; + } + #endif + break; } break; case 0xC6: /* MOV Eb,Ib */ diff --git a/src/emu/x64run_private.h b/src/emu/x64run_private.h index 01cf3e4d..659b76bf 100644 --- a/src/emu/x64run_private.h +++ b/src/emu/x64run_private.h @@ -21,6 +21,22 @@ typedef struct rex_s { int is32bits; } rex_t; +#define VEX_P_NONE 0 +#define VEX_P_66 1 +#define VEX_P_F3 2 +#define VEX_P_F2 3 +#define VEX_M_NONE 0 +#define VEX_M_0F 1 +#define VEX_M_OF38 2 +#define VEX_M_0F3A 3 +typedef struct vex_s { + rex_t rex; + uint16_t l:1; + uint16_t p:2; //0: none, 1: 0x66, 2:0xF3, 3: 0xF2 + uint16_t v:4; // src register + uint16_t m:5; // opcode map +} vex_t; + static inline uint8_t Peek(x64emu_t *emu, int offset){return *(uint8_t*)(R_RIP + offset);} #ifdef TEST_INTERPRETER @@ -155,6 +171,8 @@ uintptr_t RunDF(x64emu_t *emu, rex_t rex, uintptr_t addr); uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr); uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step); uintptr_t RunF30F(x64emu_t *emu, rex_t rex, uintptr_t addr); +uintptr_t RunAVX(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step); + uintptr_t Test0F(x64test_t *test, rex_t rex, uintptr_t addr, int *step); uintptr_t Test64(x64test_t *test, rex_t rex, int seg, uintptr_t addr); @@ -183,6 +201,7 @@ uintptr_t TestDF(x64test_t *test, rex_t rex, uintptr_t addr); uintptr_t TestF0(x64test_t *test, rex_t rex, uintptr_t addr); uintptr_t TestF20F(x64test_t *test, rex_t rex, uintptr_t addr, int *step); uintptr_t TestF30F(x64test_t *test, rex_t rex, uintptr_t addr); +uintptr_t TestAVX(x64test_t *test, vex_t vex, uintptr_t addr, int *step); void x64Syscall(x64emu_t *emu); diff --git a/src/emu/x64runavx.c b/src/emu/x64runavx.c new file mode 100644 index 00000000..78515b25 --- /dev/null +++ b/src/emu/x64runavx.c @@ -0,0 +1,72 @@ +#define _GNU_SOURCE +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <fenv.h> +#include <string.h> +#include <signal.h> +#include <sys/types.h> +#include <unistd.h> + +#include "debug.h" +#include "box64stack.h" +#include "x64emu.h" +#include "x64run.h" +#include "x64emu_private.h" +#include "x64run_private.h" +#include "x64primop.h" +#include "x64trace.h" +#include "x87emu_private.h" +#include "box64context.h" +#include "my_cpuid.h" +#include "bridge.h" +#include "signals.h" +#include "x64shaext.h" +#ifdef DYNAREC +#include "custommem.h" +#include "../dynarec/native_lock.h" +#endif + +#include "modrm.h" + +#ifdef TEST_INTERPRETER +uintptr_t TestAVX(x64test_t *test, vex_t vex, uintptr_t addr, int *step) +#else +uintptr_t RunAVX(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step) +#endif +{ + uint8_t opcode; + uint8_t nextop; + uint8_t tmp8u; + int8_t tmp8s; + int32_t tmp32s, tmp32s2; + uint32_t tmp32u, tmp32u2; + uint64_t tmp64u, tmp64u2; + int64_t tmp64s; + reg64_t *oped, *opgd; + sse_regs_t *opex, *opgx, eax1; + mmx87_regs_t *opem, *opgm, eam1; + +#ifdef TEST_INTERPRETER + x64emu_t *emu = test->emu; +#endif + opcode = F8; + + switch(opcode) { + + case 0x77: + if(!vex.l && (vex.m==VEX_M_0F) && (vex.p==VEX_P_NONE)) { + if(vex.v!=0) { + emit_signal(emu, SIGILL, (void*)R_RIP, 0); + } else { + memset(emu->ymm, 0, sizeof(sse_regs_t)*(vex.rex.is32bits)?16:8); + } + } else + return 0; + break; + default: + return 0; + } + return addr; +} diff --git a/src/emu/x87emu_private.c b/src/emu/x87emu_private.c index e19f67a2..d5e2421a 100644 --- a/src/emu/x87emu_private.c +++ b/src/emu/x87emu_private.c @@ -441,11 +441,11 @@ typedef struct xsaveheader_s { uint8_t reserved[64-16]; } xsaveheader_t; -void fpu_xsave(x64emu_t* emu, void* ed, int is32bits) +void fpu_xsave_mask(x64emu_t* emu, void* ed, int is32bits, uint64_t mask) { xsave64_t *p = (xsave64_t*)ed; xsaveheader_t *h = (xsaveheader_t*)(p+1); - uint32_t rfbm = (0b111&R_EAX); + uint32_t rfbm = (0b111&mask); h->xstate_bv =(h->xstate_bv&~0b111)|rfbm; h->xcomp_bv = 0; if(h->xstate_bv&0b001) { @@ -473,16 +473,21 @@ void fpu_xsave(x64emu_t* emu, void* ed, int is32bits) } // copy SSE regs if(h->xstate_bv&0b10) { - for(int i=0; i<is32bits?8:16; ++i) + for(int i=0; i<(is32bits?8:16); ++i) memcpy(&p->XmmRegisters[i], &emu->xmm[i], 16); } if(h->xstate_bv&0b100) { sse_regs_t* avx = (sse_regs_t*)(h+1); - for(int i=0; i<is32bits?8:16; ++i) + for(int i=0; i<(is32bits?8:16); ++i) memcpy(&avx[i], &emu->ymm[i], 16); } } +void fpu_xsave(x64emu_t* emu, void* ed, int is32bits) +{ + fpu_xsave_mask(emu, ed, is32bits, R_RAX); +} + void fpu_xrstor(x64emu_t* emu, void* ed, int is32bits) { xsave64_t *p = (xsave64_t*)ed; @@ -518,19 +523,19 @@ void fpu_xrstor(x64emu_t* emu, void* ed, int is32bits) } if(to_restore&0b010) { // copy SSE regs - for(int i=0; i<is32bits?8:16; ++i) + for(int i=0; i<(is32bits?8:16); ++i) memcpy(&emu->xmm[i], &p->XmmRegisters[i], 16); } else if(to_init&0b010) { - for(int i=0; i<is32bits?8:16; ++i) + for(int i=0; i<(is32bits?8:16); ++i) memset(&emu->xmm[i], 0, 16); } if(to_restore&0b100) { // copy AVX upper part of regs sse_regs_t* avx = (sse_regs_t*)(h+1); - for(int i=0; i<is32bits?8:16; ++i) + for(int i=0; i<(is32bits?8:16); ++i) memcpy(&emu->ymm[i], &avx[i], 16); } else if(to_init&0b100) { - for(int i=0; i<is32bits?8:16; ++i) + for(int i=0; i<(is32bits?8:16); ++i) memcpy(&emu->ymm[i], 0, 16); } } \ No newline at end of file diff --git a/src/emu/x87emu_private.h b/src/emu/x87emu_private.h index db5553a2..b389028a 100644 --- a/src/emu/x87emu_private.h +++ b/src/emu/x87emu_private.h @@ -217,6 +217,7 @@ void fpu_fxrstor32(x64emu_t* emu, void* ed); void fpu_fxsave64(x64emu_t* emu, void* ed); void fpu_fxrstor64(x64emu_t* emu, void* ed); void fpu_xsave(x64emu_t* emu, void* ed, int is32bits); +void fpu_xsave_mask(x64emu_t* emu, void* ed, int is32bits, uint64_t mask); void fpu_xrstor(x64emu_t* emu, void* ed, int is32bits); #endif //__X87RUN_PRIVATE_H_ diff --git a/src/libtools/signals.c b/src/libtools/signals.c index 97ab274a..1f86f0f8 100644 --- a/src/libtools/signals.c +++ b/src/libtools/signals.c @@ -119,8 +119,7 @@ struct x64_fpstate uint32_t mxcsr_mask; struct x64_fpreg _st[8]; struct x64_xmmreg _xmm[16]; - uint32_t res[12]; - uint32_t res2[12]; + uint32_t res[24]; }__attribute__((packed)); typedef struct x64_fpstate *x64_fpregset_t; @@ -988,6 +987,8 @@ void my_sigactionhandler_oldcode(int32_t sig, int simple, siginfo_t* info, void // TODO: do I need to really setup 2 stack frame? That doesn't seems right! // setup stack frame + frame -= 512+64+16*16; + void* xstate = (void*)frame; frame -= sizeof(siginfo_t); siginfo_t* info2 = (siginfo_t*)frame; memcpy(info2, info, sizeof(siginfo_t)); @@ -1082,10 +1083,10 @@ void my_sigactionhandler_oldcode(int32_t sig, int simple, siginfo_t* info, void #endif #endif // get FloatPoint status - sigcontext->uc_mcontext.fpregs = (struct x64_libc_fpstate*)&sigcontext->xstate; - fpu_fxsave64(emu, &sigcontext->xstate); - // add custom SIGN in reserved area - //((unsigned int *)(&sigcontext.xstate.fpstate.padding))[8*4+12] = 0x46505853; // not yet, when XSAVE / XRSTR will be ready + sigcontext->uc_mcontext.fpregs = xstate;//(struct x64_libc_fpstate*)&sigcontext->xstate; + fpu_xsave_mask(emu, xstate, 0, 0b111); + memcpy(&sigcontext->xstate, xstate, sizeof(sigcontext->xstate)); + ((struct x64_fpstate*)xstate)->res[12] = 0x46505853; // magic number to signal an XSTATE type of fpregs // get signal mask if(new_ss) { diff --git a/src/tools/my_cpuid.c b/src/tools/my_cpuid.c index 9f0614bf..a78c499d 100644 --- a/src/tools/my_cpuid.c +++ b/src/tools/my_cpuid.c @@ -262,6 +262,8 @@ void my_cpuid(x64emu_t* emu, uint32_t tmp32u) | 1<<23 // POPCOUNT | 1<<25 // aesni | 1<<26 // xsave + | 1<<27 // osxsave + | box64_avx<<28 // AVX ; break; case 0x2: // TLB and Cache info. Sending 1st gen P4 info... diff --git a/src/tools/rcfile.c b/src/tools/rcfile.c index 3fc9c869..4f99daec 100644 --- a/src/tools/rcfile.c +++ b/src/tools/rcfile.c @@ -100,6 +100,7 @@ ENTRYBOOL(BOX64_NOPULSE, box64_nopulse) \ ENTRYBOOL(BOX64_NOGTK, box64_nogtk) \ ENTRYBOOL(BOX64_NOVULKAN, box64_novulkan) \ ENTRYBOOL(BOX64_SSE42, box64_sse42) \ +ENTRYBOOL(BOX64_AVX, box64_avx) \ ENTRYBOOL(BOX64_FUTEX_WAITV, box64_futex_waitv) \ ENTRYSTRING_(BOX64_BASH, bash) \ ENTRYINT(BOX64_JITGDB, jit_gdb, 0, 3, 2) \ diff --git a/src/wrapped/wrappedlibc.c b/src/wrapped/wrappedlibc.c index 9e8ba194..f9dc8e2f 100644 --- a/src/wrapped/wrappedlibc.c +++ b/src/wrapped/wrappedlibc.c @@ -1639,7 +1639,7 @@ void CreateCPUInfoFile(int fd) P; sprintf(buff, "bogomips\t: %g\n", getBogoMips()); P; - sprintf(buff, "flags\t\t: fpu cx8 sep ht cmov clflush mmx sse sse2 syscall tsc lahf_lm ssse3 ht tm lm fxsr cpuid pclmulqdq cx16 aes movbe pni sse4_1 sse4_2 lzcnt popcnt\n"); + sprintf(buff, "flags\t\t: fpu cx8 sep ht cmov clflush mmx sse sse2 syscall tsc lahf_lm ssse3 ht tm lm fxsr cpuid pclmulqdq cx16 aes movbe pni sse4_1%s%s lzcnt popcnt\n", box64_sse42?" sse4_2":"", box64_avx?" avx":""); P; sprintf(buff, "address sizes\t: 48 bits physical, 48 bits virtual\n"); P; |