diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-05-24 20:17:35 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-05-24 20:17:35 +0200 |
| commit | 39303611efec5b89b44e47217b096eff5195dc97 (patch) | |
| tree | ce1cc46c134b3cead7c6ab2f1c02d44d5486695e | |
| parent | fcf0cf00d0a82649ba0cf9f6a8c0450bb6ed5f47 (diff) | |
| download | box64-39303611efec5b89b44e47217b096eff5195dc97.tar.gz box64-39303611efec5b89b44e47217b096eff5195dc97.zip | |
more avx infrastructure
| -rw-r--r-- | src/core.c | 1 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_0f.c | 2 | ||||
| -rw-r--r-- | src/emu/x64emu.c | 12 | ||||
| -rw-r--r-- | src/emu/x64emu_private.h | 2 | ||||
| -rw-r--r-- | src/emu/x64run0f.c | 2 | ||||
| -rw-r--r-- | src/emu/x64test.c | 21 | ||||
| -rw-r--r-- | src/emu/x87emu_private.c | 47 | ||||
| -rw-r--r-- | src/include/debug.h | 1 | ||||
| -rw-r--r-- | src/tools/my_cpuid.c | 17 |
9 files changed, 80 insertions, 25 deletions
diff --git a/src/core.c b/src/core.c index c8a56384..48eebd1d 100644 --- a/src/core.c +++ b/src/core.c @@ -149,6 +149,7 @@ int box64_sse_flushto0 = 0; int box64_x87_no80bits = 0; int box64_sync_rounding = 0; int box64_sse42 = 1; +int box64_avx = 0; int fix_64bit_inodes = 0; int box64_dummy_crashhandler = 1; int box64_mapclean = 0; diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index e62caec5..b7da8adb 100644 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -73,7 +73,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin B_MARK(cEQ); UDF(0); MARK; - MOV32w(xRAX, 0b11); + MOV32w(xRAX, 0b111); MOV32w(xRDX, 0); break; case 0xE0: diff --git a/src/emu/x64emu.c b/src/emu/x64emu.c index 2b8f94bc..f19b6e73 100644 --- a/src/emu/x64emu.c +++ b/src/emu/x64emu.c @@ -235,6 +235,7 @@ void CloneEmu(x64emu_t *newemu, const x64emu_t* emu) newemu->top = emu->top; newemu->fpu_stack = emu->fpu_stack; memcpy(newemu->xmm, emu->xmm, sizeof(emu->xmm)); + memcpy(newemu->ymm, emu->ymm, sizeof(emu->ymm)); newemu->df = emu->df; newemu->df_sav = emu->df_sav; newemu->op1 = emu->op1; @@ -263,6 +264,7 @@ void CopyEmu(x64emu_t *newemu, const x64emu_t* emu) memcpy(newemu->x87, emu->x87, sizeof(emu->x87)); memcpy(newemu->mmx, emu->mmx, sizeof(emu->mmx)); memcpy(newemu->xmm, emu->xmm, sizeof(emu->xmm)); + memcpy(newemu->ymm, emu->ymm, sizeof(emu->ymm)); memcpy(newemu->fpu_ld, emu->fpu_ld, sizeof(emu->fpu_ld)); memcpy(newemu->fpu_ll, emu->fpu_ll, sizeof(emu->fpu_ll)); newemu->fpu_tags = emu->fpu_tags; @@ -416,6 +418,16 @@ const char* DumpCPURegs(x64emu_t* emu, uintptr_t ip, int is32bits) sprintf(tmp, "%02d:%016lx-%016lx", i, emu->xmm[i].q[1], emu->xmm[i].q[0]); } strcat(buff, tmp); + if(box64_avx) { + if (trace_regsdiff && (emu->old_ymm[i].q[0] != emu->ymm[i].q[0] || emu->old_ymm[i].q[1] != emu->ymm[i].q[1])) { + sprintf(tmp, "\e[1;35m-%016lx-%016lx\e[m", emu->ymm[i].q[1], emu->ymm[i].q[0]); + emu->old_ymm[i].q[0] = emu->ymm[i].q[0]; + emu->old_ymm[i].q[1] = emu->ymm[i].q[1]; + } else { + sprintf(tmp, "-:%016lx-%016lx", emu->ymm[i].q[1], emu->ymm[i].q[0]); + } + strcat(buff, tmp); + } if ((i&3)==3) strcat(buff, "\n"); else strcat(buff, " "); } } diff --git a/src/emu/x64emu_private.h b/src/emu/x64emu_private.h index 223c6768..e372d384 100644 --- a/src/emu/x64emu_private.h +++ b/src/emu/x64emu_private.h @@ -63,6 +63,7 @@ typedef struct x64emu_s { reg64_t ip; // sse sse_regs_t xmm[16]; + sse_regs_t ymm[16]; // fpu / mmx mmx87_regs_t x87[8]; mmx87_regs_t mmx[8]; @@ -114,6 +115,7 @@ typedef struct x64emu_s { x64test_t test; // used for dynarec testing #ifdef HAVE_TRACE sse_regs_t old_xmm[16]; + sse_regs_t old_ymm[16]; #endif // scratch stack, used for alignment of double and 64bits ints on arm. 200 elements should be enough uint64_t scratch[200]; diff --git a/src/emu/x64run0f.c b/src/emu/x64run0f.c index 95b8cc12..2b229c5c 100644 --- a/src/emu/x64run0f.c +++ b/src/emu/x64run0f.c @@ -98,7 +98,7 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step) test->notest = 1; #endif } else { - R_RAX = 0b11; // x87 & SSE for now + R_RAX = 0b111; // x87 & SSE & AVX for now R_RDX = 0; } break; diff --git a/src/emu/x64test.c b/src/emu/x64test.c index ea97500c..df93094a 100644 --- a/src/emu/x64test.c +++ b/src/emu/x64test.c @@ -108,11 +108,22 @@ void x64test_check(x64emu_t* ref, uintptr_t ip) BANNER; printf_log(LOG_NONE, "MXCSR: %x | %x\n", ref->mxcsr.x32, emu->mxcsr.x32); } - if(memcmp(ref->xmm, emu->xmm, sizeof(emu->xmm))) { - BANNER; - for(int i=0; i<16; ++i) { - if(ref->xmm[i].q[0]!=emu->xmm[i].q[0] || ref->xmm[i].q[1]!=emu->xmm[i].q[1] ) { - printf_log(LOG_NONE, "XMM[%02d]: %016zx-%016zx | %016zx-%016zx\n", i, ref->xmm[i].q[1], ref->xmm[i].q[0], emu->xmm[i].q[1], emu->xmm[i].q[0]); + if(box64_avx) { + if(memcmp(ref->xmm, emu->xmm, sizeof(emu->xmm)) || memcmp(ref->ymm, emu->ymm, sizeof(emu->ymm))) { + BANNER; + for(int i=0; i<16; ++i) { + if(ref->ymm[i].u128!=emu->ymm[i].u128 || ref->xmm[i].u128!=emu->xmm[i].u128 ) { + printf_log(LOG_NONE, "YMM[%02d]: %016zx-%016zx-%016zx-%016zx | %016zx-%016zx-%016zx-%016zx\n", i, ref->ymm[i].q[1], ref->ymm[i].q[0], ref->xmm[i].q[1], ref->ymm[i].q[0], emu->ymm[i].q[3], emu->ymm[i].q[2], emu->xmm[i].q[1], emu->xmm[i].q[0]); + } + } + } + } else { + if(memcmp(ref->xmm, emu->xmm, sizeof(emu->xmm))) { + BANNER; + for(int i=0; i<16; ++i) { + if(ref->xmm[i].u128!=emu->xmm[i].u128) { + printf_log(LOG_NONE, "XMM[%02d]: %016zx-%016zx | %016zx-%016zx\n", i, ref->xmm[i].q[1], ref->xmm[i].q[0], emu->xmm[i].q[1], emu->xmm[i].q[0]); + } } } } diff --git a/src/emu/x87emu_private.c b/src/emu/x87emu_private.c index 79306a76..e19f67a2 100644 --- a/src/emu/x87emu_private.c +++ b/src/emu/x87emu_private.c @@ -354,7 +354,8 @@ void fpu_fxsave32(x64emu_t* emu, void* ed) for(int i=0; i<8; ++i) memcpy(&p->FloatRegisters[i].q[0], (i<stack)?&ST(i):&emu->mmx[i], sizeof(mmx87_regs_t)); // copy SSE regs - memcpy(&p->XmmRegisters[0], &emu->xmm[0], sizeof(emu->xmm)); + for(int i=0; i<16; ++i) + memcpy(&p->XmmRegisters[i], &emu->xmm[i], 16); } void fpu_fxsave64(x64emu_t* emu, void* ed) @@ -380,7 +381,8 @@ void fpu_fxsave64(x64emu_t* emu, void* ed) for(int i=0; i<8; ++i) memcpy(&p->FloatRegisters[i].q[0], (i<stack)?&ST(i):&emu->mmx[i], sizeof(mmx87_regs_t)); // copy SSE regs - memcpy(&p->XmmRegisters[0], &emu->xmm[0], sizeof(emu->xmm)); + for(int i=0; i<16; ++i) + memcpy(&p->XmmRegisters[i], &emu->xmm[i], 16); } void fpu_fxrstor32(x64emu_t* emu, void* ed) @@ -404,7 +406,8 @@ void fpu_fxrstor32(x64emu_t* emu, void* ed) for(int i=0; i<8; ++i) memcpy((i<stack)?&ST(i):&emu->mmx[i], &p->FloatRegisters[i].q[0], sizeof(mmx87_regs_t)); // copy SSE regs - memcpy(&emu->xmm[0], &p->XmmRegisters[0], sizeof(emu->xmm)); + for(int i=0; i<16; ++i) + memcpy(&emu->xmm[i], &p->XmmRegisters[i], 16); } void fpu_fxrstor64(x64emu_t* emu, void* ed) @@ -428,7 +431,8 @@ void fpu_fxrstor64(x64emu_t* emu, void* ed) for(int i=0; i<8; ++i) memcpy((i<stack)?&ST(i):&emu->mmx[i], &p->FloatRegisters[i].q[0], sizeof(mmx87_regs_t)); // copy SSE regs - memcpy(&emu->xmm[0], &p->XmmRegisters[0], sizeof(emu->xmm)); + for(int i=0; i<16; ++i) + memcpy(&emu->xmm[i], &p->XmmRegisters[i], 16); } typedef struct xsaveheader_s { @@ -441,10 +445,10 @@ void fpu_xsave(x64emu_t* emu, void* ed, int is32bits) { xsave64_t *p = (xsave64_t*)ed; xsaveheader_t *h = (xsaveheader_t*)(p+1); - uint32_t rfbm = (0b11&R_EAX); - h->xstate_bv =(h->xstate_bv&~0b11)|rfbm; + uint32_t rfbm = (0b111&R_EAX); + h->xstate_bv =(h->xstate_bv&~0b111)|rfbm; h->xcomp_bv = 0; - if(h->xstate_bv&0b01) { + if(h->xstate_bv&0b001) { int top = emu->top&7; int stack = 8-top; if(emu->fpu_tags == TAGS_EMPTY) @@ -464,7 +468,7 @@ void fpu_xsave(x64emu_t* emu, void* ed, int is32bits) for(int i=0; i<8; ++i) memcpy(&p->FloatRegisters[i].q[0], (i<stack)?&ST(i):&emu->mmx[i], sizeof(mmx87_regs_t)); } - if(((h->xstate_bv&0b10)||(h->xstate_bv&0b100))&&!(h->xstate_bv&0b01)) { + if(((h->xstate_bv&0b010)||(h->xstate_bv&0b100))&&!(h->xstate_bv&0b001)) { p->MxCsr = emu->mxcsr.x32; } // copy SSE regs @@ -472,6 +476,11 @@ void fpu_xsave(x64emu_t* emu, void* ed, int is32bits) for(int i=0; i<is32bits?8:16; ++i) memcpy(&p->XmmRegisters[i], &emu->xmm[i], 16); } + if(h->xstate_bv&0b100) { + sse_regs_t* avx = (sse_regs_t*)(h+1); + for(int i=0; i<is32bits?8:16; ++i) + memcpy(&avx[i], &emu->ymm[i], 16); + } } void fpu_xrstor(x64emu_t* emu, void* ed, int is32bits) @@ -479,11 +488,11 @@ void fpu_xrstor(x64emu_t* emu, void* ed, int is32bits) xsave64_t *p = (xsave64_t*)ed; xsaveheader_t *h = (xsaveheader_t*)(p+1); int compressed = (h->xcomp_bv>>63); - uint32_t rfbm = (0b11&R_EAX); + uint32_t rfbm = (0b111&R_EAX); uint32_t to_restore = rfbm & h->xstate_bv; uint32_t to_init = rfbm & ~h->xstate_bv; // check componant to restore - if(to_restore&0b01) { + if(to_restore&0b001) { emu->cw.x16 = p->ControlWord; emu->sw.x16 = p->StatusWord; emu->mxcsr.x32 = p->MxCsr; @@ -501,15 +510,27 @@ void fpu_xrstor(x64emu_t* emu, void* ed, int is32bits) // copy back MMX regs... for(int i=0; i<8; ++i) memcpy((i<stack)?&ST(i):&emu->mmx[i], &p->FloatRegisters[i].q[0], sizeof(mmx87_regs_t)); - } else if(to_init&0b01) { + } else if(to_init&0b001) { reset_fpu(emu); } - if(((to_restore&0b10)||(to_restore&0b100))&&!(to_restore&0b01)) { + if(((to_restore&0b010)||(to_restore&0b100))&&!(to_restore&0b001)) { emu->mxcsr.x32 = p->MxCsr; } - if(to_restore&0b10) { + if(to_restore&0b010) { // copy SSE regs for(int i=0; i<is32bits?8:16; ++i) memcpy(&emu->xmm[i], &p->XmmRegisters[i], 16); + } else if(to_init&0b010) { + for(int i=0; i<is32bits?8:16; ++i) + memset(&emu->xmm[i], 0, 16); + } + if(to_restore&0b100) { + // copy AVX upper part of regs + sse_regs_t* avx = (sse_regs_t*)(h+1); + for(int i=0; i<is32bits?8:16; ++i) + memcpy(&emu->ymm[i], &avx[i], 16); + } else if(to_init&0b100) { + for(int i=0; i<is32bits?8:16; ++i) + memcpy(&emu->ymm[i], 0, 16); } } \ No newline at end of file diff --git a/src/include/debug.h b/src/include/debug.h index c4ea7ab1..5932f10a 100644 --- a/src/include/debug.h +++ b/src/include/debug.h @@ -93,6 +93,7 @@ extern int box64_sse_flushto0; extern int box64_x87_no80bits; extern int box64_sync_rounding; extern int box64_sse42; +extern int box64_avx; extern int allow_missing_libs; extern int box64_mapclean; extern int box64_prefer_wrapped; diff --git a/src/tools/my_cpuid.c b/src/tools/my_cpuid.c index d2f283e9..9f0614bf 100644 --- a/src/tools/my_cpuid.c +++ b/src/tools/my_cpuid.c @@ -337,10 +337,10 @@ void my_cpuid(x64emu_t* emu, uint32_t tmp32u) case 0xD: // Processor Extended State Enumeration Main Leaf / Sub Leaf switch(R_CX) { case 0: - R_EAX = 0b11; // x87 SSE saved - R_EBX = 512+64; // size of xsave/xrstor - R_ECX = 512+64; // same - R_EDX = 0; // more bits + R_EAX = 0b111; // x87 SSE AVX saved + R_EBX = 512+64+16*16; // size of xsave/xrstor + R_ECX = 512+64+16*16; // same + R_EDX = 0; // more bits break; case 1: R_EAX = 0; // XSAVEOPT (0) and XSAVEC (1), XGETBV with ECX=1 (2) XSAVES (3) and XFD (4) not supported yet @@ -355,11 +355,18 @@ void my_cpuid(x64emu_t* emu, uint32_t tmp32u) break; case 3: // componant 1: sse - R_EAX = 16*16; // size of the x87 block + R_EAX = 16*16; // size of the sse block R_EBX = 160; // offset R_ECX = 0; R_EDX = 0; break; + case 4: + // componant 2: avx + R_EAX = 16*16; // size of the avx block + R_EBX = 512+64; // offset + R_ECX = 0; + R_EDX = 0; + break; default: R_EAX = R_ECX = R_EBX = R_EDX = 0; break; |