diff options
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_0f.c | 34 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_0f.c | 4 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 6 | ||||
| -rw-r--r-- | src/emu/x64run0f.c | 27 | ||||
| -rw-r--r-- | src/emu/x87emu_private.c | 83 | ||||
| -rw-r--r-- | src/emu/x87emu_private.h | 2 | ||||
| -rw-r--r-- | src/tools/my_cpuid.c | 36 |
7 files changed, 168 insertions, 24 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index 581d4821..e62caec5 100644 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -68,15 +68,13 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(MODREG) switch(nextop) { case 0xD0: - INST_NAME("FAKE xgetbv"); - SETFLAGS(X_ALL, SF_SET_NODF); // Hack to set flags in "don't care" state - GETIP(ip); - STORE_XEMU_CALL(xRIP); - CALL(native_ud, -1); - LOAD_XEMU_CALL(xRIP); - jump_to_epilog(dyn, 0, xRIP, ninst); - *need_epilog = 0; - *ok = 0; + INST_NAME("XGETBV"); + CMPSw_REG(xRCX, xZR); + B_MARK(cEQ); + UDF(0); + MARK; + MOV32w(xRAX, 0b11); + MOV32w(xRDX, 0); break; case 0xE0: case 0xE1: @@ -1805,6 +1803,24 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin LDRw_U12(x4, xEmu, offsetof(x64emu_t, mxcsr)); STW(x4, ed, fixedaddress); break; + case 4: + INST_NAME("XSAVE Ed"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + fpu_purgecache(dyn, ninst, 0, x1, x2, x3); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0); + if(ed!=x1) {MOVx_REG(x1, ed);} + MOV32w(x2, rex.is32bits); + CALL((void*)fpu_xsave, -1); + break; + case 5: + INST_NAME("XRSTOR Ed"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + fpu_purgecache(dyn, ninst, 0, x1, x2, x3); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0); + if(ed!=x1) {MOVx_REG(x1, ed);} + MOV32w(x2, rex.is32bits); + CALL((void*)fpu_xrstor, -1); + break; case 7: INST_NAME("CLFLUSH Ed"); MESSAGE(LOG_DUMP, "Need Optimization?\n"); diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c index 6d056fa2..18c42025 100644 --- a/src/dynarec/la64/dynarec_la64_0f.c +++ b/src/dynarec/la64/dynarec_la64_0f.c @@ -66,6 +66,9 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if(MODREG) { switch(nextop) { case 0xD0: + //TODO + DEFAULT; + /* INST_NAME("FAKE xgetbv"); nextop = F8; addr = fakeed(dyn, addr, ninst, nextop); @@ -77,6 +80,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni jump_to_epilog(dyn, 0, xRIP, ninst); *need_epilog = 0; *ok = 0; + */ break; default: DEFAULT; diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index 4159068f..7c5dfe01 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -66,7 +66,9 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if(MODREG) { switch(nextop) { case 0xD0: - INST_NAME("FAKE xgetbv"); + //TODO + DEFAULT; + /*INST_NAME("FAKE xgetbv"); nextop = F8; addr = fakeed(dyn, addr, ninst, nextop); SETFLAGS(X_ALL, SF_SET_NODF); // Hack to set flags in "don't care" state @@ -76,7 +78,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LOAD_XEMU_CALL(); jump_to_epilog(dyn, 0, xRIP, ninst); *need_epilog = 0; - *ok = 0; + *ok = 0;*/ break; case 0xF9: diff --git a/src/emu/x64run0f.c b/src/emu/x64run0f.c index f2666e5e..95b8cc12 100644 --- a/src/emu/x64run0f.c +++ b/src/emu/x64run0f.c @@ -91,11 +91,16 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step) if(MODREG) switch(nextop) { case 0xD0: - #ifndef TEST_INTERPRETER - emit_signal(emu, SIGILL, (void*)R_RIP, 0); - #else - test->notest = 1; - #endif + if(R_RCX) { + #ifndef TEST_INTERPRETER + emit_signal(emu, SIGILL, (void*)R_RIP, 0); + #else + test->notest = 1; + #endif + } else { + R_RAX = 0b11; // x87 & SSE for now + R_RDX = 0; + } break; case 0xE0: case 0xE1: @@ -1286,6 +1291,18 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step) GETED(0); ED->dword[0] = emu->mxcsr.x32; break; + case 4: /* XSAVE Ed */ + _GETED(0); + #ifdef TEST_INTERPRETER + emu->sw.f.F87_TOP = emu->top&7; + #else + fpu_xsave(emu, ED, rex.is32bits); + #endif + break; + case 5: /* XRSTOR Ed */ + _GETED(0); + fpu_xrstor(emu, ED, rex.is32bits); + break; case 7: /* CLFLUSH Ed */ _GETED(0); #if defined(DYNAREC) && !defined(TEST_INTERPRETER) diff --git a/src/emu/x87emu_private.c b/src/emu/x87emu_private.c index f6fda3b6..79306a76 100644 --- a/src/emu/x87emu_private.c +++ b/src/emu/x87emu_private.c @@ -430,3 +430,86 @@ void fpu_fxrstor64(x64emu_t* emu, void* ed) // copy SSE regs memcpy(&emu->xmm[0], &p->XmmRegisters[0], sizeof(emu->xmm)); } + +typedef struct xsaveheader_s { + uint64_t xstate_bv; + uint64_t xcomp_bv; + uint8_t reserved[64-16]; +} xsaveheader_t; + +void fpu_xsave(x64emu_t* emu, void* ed, int is32bits) +{ + xsave64_t *p = (xsave64_t*)ed; + xsaveheader_t *h = (xsaveheader_t*)(p+1); + uint32_t rfbm = (0b11&R_EAX); + h->xstate_bv =(h->xstate_bv&~0b11)|rfbm; + h->xcomp_bv = 0; + if(h->xstate_bv&0b01) { + int top = emu->top&7; + int stack = 8-top; + if(emu->fpu_tags == TAGS_EMPTY) + stack = 0; + emu->sw.f.F87_TOP = top; + p->ControlWord = emu->cw.x16; + p->StatusWord = emu->sw.x16; + p->MxCsr = emu->mxcsr.x32; + uint8_t tags = 0; + for (int i=0; i<8; ++i) + tags |= (((emu->fpu_tags>>(i*2))&0b11)?0:1)<<i; + p->TagWord = emu->fpu_tags; + p->ErrorOpcode = 0; + p->ErrorOffset = 0; + p->DataOffset = 0; + // copy FPU/MMX regs... + for(int i=0; i<8; ++i) + memcpy(&p->FloatRegisters[i].q[0], (i<stack)?&ST(i):&emu->mmx[i], sizeof(mmx87_regs_t)); + } + if(((h->xstate_bv&0b10)||(h->xstate_bv&0b100))&&!(h->xstate_bv&0b01)) { + p->MxCsr = emu->mxcsr.x32; + } + // copy SSE regs + if(h->xstate_bv&0b10) { + for(int i=0; i<is32bits?8:16; ++i) + memcpy(&p->XmmRegisters[i], &emu->xmm[i], 16); + } +} + +void fpu_xrstor(x64emu_t* emu, void* ed, int is32bits) +{ + xsave64_t *p = (xsave64_t*)ed; + xsaveheader_t *h = (xsaveheader_t*)(p+1); + int compressed = (h->xcomp_bv>>63); + uint32_t rfbm = (0b11&R_EAX); + uint32_t to_restore = rfbm & h->xstate_bv; + uint32_t to_init = rfbm & ~h->xstate_bv; + // check componant to restore + if(to_restore&0b01) { + emu->cw.x16 = p->ControlWord; + emu->sw.x16 = p->StatusWord; + emu->mxcsr.x32 = p->MxCsr; + if(box64_sse_flushto0) + applyFlushTo0(emu); + emu->top = emu->sw.f.F87_TOP; + uint8_t tags = p->TagWord; + emu->fpu_tags = 0; + for(int i=0; i<8; ++i) + emu->fpu_tags |= (((tags>>i)&1)?0:0b11)<<(i*2); + int top = emu->top&7; + int stack = 8-top; + if(emu->fpu_tags == TAGS_EMPTY) + stack = 0; + // copy back MMX regs... + for(int i=0; i<8; ++i) + memcpy((i<stack)?&ST(i):&emu->mmx[i], &p->FloatRegisters[i].q[0], sizeof(mmx87_regs_t)); + } else if(to_init&0b01) { + reset_fpu(emu); + } + if(((to_restore&0b10)||(to_restore&0b100))&&!(to_restore&0b01)) { + emu->mxcsr.x32 = p->MxCsr; + } + if(to_restore&0b10) { + // copy SSE regs + for(int i=0; i<is32bits?8:16; ++i) + memcpy(&emu->xmm[i], &p->XmmRegisters[i], 16); + } +} \ No newline at end of file diff --git a/src/emu/x87emu_private.h b/src/emu/x87emu_private.h index a3c589df..db5553a2 100644 --- a/src/emu/x87emu_private.h +++ b/src/emu/x87emu_private.h @@ -216,5 +216,7 @@ void fpu_fxsave32(x64emu_t* emu, void* ed); void fpu_fxrstor32(x64emu_t* emu, void* ed); void fpu_fxsave64(x64emu_t* emu, void* ed); void fpu_fxrstor64(x64emu_t* emu, void* ed); +void fpu_xsave(x64emu_t* emu, void* ed, int is32bits); +void fpu_xrstor(x64emu_t* emu, void* ed, int is32bits); #endif //__X87RUN_PRIVATE_H_ diff --git a/src/tools/my_cpuid.c b/src/tools/my_cpuid.c index a6909570..d2f283e9 100644 --- a/src/tools/my_cpuid.c +++ b/src/tools/my_cpuid.c @@ -261,6 +261,7 @@ void my_cpuid(x64emu_t* emu, uint32_t tmp32u) | 1<<22 // MOVBE | 1<<23 // POPCOUNT | 1<<25 // aesni + | 1<<26 // xsave ; break; case 0x2: // TLB and Cache info. Sending 1st gen P4 info... @@ -334,15 +335,34 @@ void my_cpuid(x64emu_t* emu, uint32_t tmp32u) R_EAX = 0; break; case 0xD: // Processor Extended State Enumeration Main Leaf / Sub Leaf - if(R_CX==0) { - R_EAX = 1 | 2; // x87 SSE saved - R_EBX = 512; // size of xsave/xrstor - R_ECX = 512; // same - R_EDX = 0; // more bits - } else if(R_CX==1){ - R_EAX = R_ECX = R_EBX = R_EDX = 0; // XSAVEOPT and co are not available - } else { + switch(R_CX) { + case 0: + R_EAX = 0b11; // x87 SSE saved + R_EBX = 512+64; // size of xsave/xrstor + R_ECX = 512+64; // same + R_EDX = 0; // more bits + break; + case 1: + R_EAX = 0; // XSAVEOPT (0) and XSAVEC (1), XGETBV with ECX=1 (2) XSAVES (3) and XFD (4) not supported yet + R_ECX = R_EBX = R_EDX = 0; + break; + case 2: + // componant 0: x87 + R_EAX = 160; // size of the x87 block + R_EBX = 0; // offset + R_ECX = 0; + R_EDX = 0; + break; + case 3: + // componant 1: sse + R_EAX = 16*16; // size of the x87 block + R_EBX = 160; // offset + R_ECX = 0; + R_EDX = 0; + break; + default: R_EAX = R_ECX = R_EBX = R_EDX = 0; + break; } break; case 0xE: //? |