about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--src/dynarec/arm64/dynarec_arm64_0f.c34
-rw-r--r--src/dynarec/la64/dynarec_la64_0f.c4
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f.c6
-rw-r--r--src/emu/x64run0f.c27
-rw-r--r--src/emu/x87emu_private.c83
-rw-r--r--src/emu/x87emu_private.h2
-rw-r--r--src/tools/my_cpuid.c36
7 files changed, 168 insertions, 24 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c
index 581d4821..e62caec5 100644
--- a/src/dynarec/arm64/dynarec_arm64_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_0f.c
@@ -68,15 +68,13 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             if(MODREG)

             switch(nextop) {

                 case 0xD0:

-                    INST_NAME("FAKE xgetbv");

-                    SETFLAGS(X_ALL, SF_SET_NODF);    // Hack to set flags in "don't care" state

-                    GETIP(ip);

-                    STORE_XEMU_CALL(xRIP);

-                    CALL(native_ud, -1);

-                    LOAD_XEMU_CALL(xRIP);

-                    jump_to_epilog(dyn, 0, xRIP, ninst);

-                    *need_epilog = 0;

-                    *ok = 0;

+                    INST_NAME("XGETBV");

+                    CMPSw_REG(xRCX, xZR);

+                    B_MARK(cEQ);

+                    UDF(0);

+                    MARK;

+                    MOV32w(xRAX, 0b11);

+                    MOV32w(xRDX, 0);

                     break;

                 case 0xE0:

                 case 0xE1:

@@ -1805,6 +1803,24 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         LDRw_U12(x4, xEmu, offsetof(x64emu_t, mxcsr));

                         STW(x4, ed, fixedaddress);

                         break;

+                    case 4:

+                        INST_NAME("XSAVE Ed");

+                        MESSAGE(LOG_DUMP, "Need Optimization\n");

+                        fpu_purgecache(dyn, ninst, 0, x1, x2, x3);

+                        addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);

+                        if(ed!=x1) {MOVx_REG(x1, ed);}

+                        MOV32w(x2, rex.is32bits);

+                        CALL((void*)fpu_xsave, -1);

+                        break;

+                    case 5:

+                        INST_NAME("XRSTOR Ed");

+                        MESSAGE(LOG_DUMP, "Need Optimization\n");

+                        fpu_purgecache(dyn, ninst, 0, x1, x2, x3);

+                        addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);

+                        if(ed!=x1) {MOVx_REG(x1, ed);}

+                        MOV32w(x2, rex.is32bits);

+                        CALL((void*)fpu_xrstor, -1);

+                        break;

                     case 7:

                         INST_NAME("CLFLUSH Ed");

                         MESSAGE(LOG_DUMP, "Need Optimization?\n");

diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c
index 6d056fa2..18c42025 100644
--- a/src/dynarec/la64/dynarec_la64_0f.c
+++ b/src/dynarec/la64/dynarec_la64_0f.c
@@ -66,6 +66,9 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             if(MODREG) {
                 switch(nextop) {
                     case 0xD0:
+                        //TODO
+                        DEFAULT;
+                        /*
                         INST_NAME("FAKE xgetbv");
                         nextop = F8;
                         addr = fakeed(dyn, addr, ninst, nextop);
@@ -77,6 +80,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         jump_to_epilog(dyn, 0, xRIP, ninst);
                         *need_epilog = 0;
                         *ok = 0;
+                        */
                         break;
                     default:
                         DEFAULT;
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c
index 4159068f..7c5dfe01 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f.c
@@ -66,7 +66,9 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             if(MODREG) {
                 switch(nextop) {
                     case 0xD0:
-                        INST_NAME("FAKE xgetbv");
+                        //TODO
+                        DEFAULT;
+                        /*INST_NAME("FAKE xgetbv");
                         nextop = F8;
                         addr = fakeed(dyn, addr, ninst, nextop);
                         SETFLAGS(X_ALL, SF_SET_NODF); // Hack to set flags in "don't care" state
@@ -76,7 +78,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         LOAD_XEMU_CALL();
                         jump_to_epilog(dyn, 0, xRIP, ninst);
                         *need_epilog = 0;
-                        *ok = 0;
+                        *ok = 0;*/
                         break;
 
                     case 0xF9:
diff --git a/src/emu/x64run0f.c b/src/emu/x64run0f.c
index f2666e5e..95b8cc12 100644
--- a/src/emu/x64run0f.c
+++ b/src/emu/x64run0f.c
@@ -91,11 +91,16 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
             if(MODREG)

             switch(nextop) {

                 case 0xD0:

-                    #ifndef TEST_INTERPRETER

-                    emit_signal(emu, SIGILL, (void*)R_RIP, 0);

-                    #else

-                    test->notest = 1;

-                    #endif

+                    if(R_RCX) {

+                        #ifndef TEST_INTERPRETER

+                        emit_signal(emu, SIGILL, (void*)R_RIP, 0);

+                        #else

+                        test->notest = 1;

+                        #endif

+                    } else {

+                        R_RAX = 0b11;   // x87 & SSE for now

+                        R_RDX = 0;

+                    }

                     break;

                 case 0xE0:

                 case 0xE1:

@@ -1286,6 +1291,18 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
                     GETED(0);

                     ED->dword[0] = emu->mxcsr.x32;

                     break;

+                case 4:                 /* XSAVE Ed */

+                    _GETED(0);

+                    #ifdef TEST_INTERPRETER

+                    emu->sw.f.F87_TOP = emu->top&7;

+                    #else

+                    fpu_xsave(emu, ED, rex.is32bits);

+                    #endif

+                    break;

+                case 5:                 /* XRSTOR Ed */

+                    _GETED(0);

+                    fpu_xrstor(emu, ED, rex.is32bits);

+                    break;

                 case 7:                 /* CLFLUSH Ed */

                     _GETED(0);

                     #if defined(DYNAREC) && !defined(TEST_INTERPRETER)

diff --git a/src/emu/x87emu_private.c b/src/emu/x87emu_private.c
index f6fda3b6..79306a76 100644
--- a/src/emu/x87emu_private.c
+++ b/src/emu/x87emu_private.c
@@ -430,3 +430,86 @@ void fpu_fxrstor64(x64emu_t* emu, void* ed)
     // copy SSE regs
     memcpy(&emu->xmm[0], &p->XmmRegisters[0], sizeof(emu->xmm));
 }
+
+typedef struct xsaveheader_s {
+    uint64_t xstate_bv;
+    uint64_t xcomp_bv;
+    uint8_t  reserved[64-16];
+} xsaveheader_t;
+
+void fpu_xsave(x64emu_t* emu, void* ed, int is32bits)
+{
+    xsave64_t *p = (xsave64_t*)ed;
+    xsaveheader_t *h = (xsaveheader_t*)(p+1);
+    uint32_t rfbm = (0b11&R_EAX);
+    h->xstate_bv =(h->xstate_bv&~0b11)|rfbm;
+    h->xcomp_bv = 0;
+    if(h->xstate_bv&0b01) {
+        int top = emu->top&7;
+        int stack = 8-top;
+        if(emu->fpu_tags == TAGS_EMPTY)
+            stack = 0;
+        emu->sw.f.F87_TOP = top;
+        p->ControlWord = emu->cw.x16;
+        p->StatusWord = emu->sw.x16;
+        p->MxCsr = emu->mxcsr.x32;
+        uint8_t tags = 0;
+        for (int i=0; i<8; ++i)
+            tags |= (((emu->fpu_tags>>(i*2))&0b11)?0:1)<<i;
+        p->TagWord = emu->fpu_tags;
+        p->ErrorOpcode = 0;
+        p->ErrorOffset = 0;
+        p->DataOffset = 0;
+        // copy FPU/MMX regs...
+        for(int i=0; i<8; ++i)
+            memcpy(&p->FloatRegisters[i].q[0], (i<stack)?&ST(i):&emu->mmx[i], sizeof(mmx87_regs_t));
+    }
+    if(((h->xstate_bv&0b10)||(h->xstate_bv&0b100))&&!(h->xstate_bv&0b01)) {
+        p->MxCsr = emu->mxcsr.x32;
+    }
+    // copy SSE regs
+    if(h->xstate_bv&0b10) {
+        for(int i=0; i<is32bits?8:16; ++i)
+            memcpy(&p->XmmRegisters[i], &emu->xmm[i], 16);
+    }
+}
+
+void fpu_xrstor(x64emu_t* emu, void* ed, int is32bits)
+{
+    xsave64_t *p = (xsave64_t*)ed;
+    xsaveheader_t *h = (xsaveheader_t*)(p+1);
+    int compressed = (h->xcomp_bv>>63);
+    uint32_t rfbm = (0b11&R_EAX);
+    uint32_t to_restore = rfbm & h->xstate_bv;
+    uint32_t to_init = rfbm & ~h->xstate_bv;
+    // check componant to restore
+    if(to_restore&0b01) {
+        emu->cw.x16 = p->ControlWord;
+        emu->sw.x16 = p->StatusWord;
+        emu->mxcsr.x32 = p->MxCsr;
+        if(box64_sse_flushto0)
+            applyFlushTo0(emu);
+        emu->top = emu->sw.f.F87_TOP;
+        uint8_t tags = p->TagWord;
+        emu->fpu_tags = 0;
+        for(int i=0; i<8; ++i)
+            emu->fpu_tags |= (((tags>>i)&1)?0:0b11)<<(i*2);
+        int top = emu->top&7;
+        int stack = 8-top;
+        if(emu->fpu_tags == TAGS_EMPTY)
+            stack = 0;
+        // copy back MMX regs...
+        for(int i=0; i<8; ++i)
+            memcpy((i<stack)?&ST(i):&emu->mmx[i], &p->FloatRegisters[i].q[0], sizeof(mmx87_regs_t));
+    } else if(to_init&0b01) {
+        reset_fpu(emu);
+    }
+    if(((to_restore&0b10)||(to_restore&0b100))&&!(to_restore&0b01)) {
+        emu->mxcsr.x32 = p->MxCsr;
+    }
+    if(to_restore&0b10) {
+        // copy SSE regs
+        for(int i=0; i<is32bits?8:16; ++i)
+            memcpy(&emu->xmm[i], &p->XmmRegisters[i], 16);
+    }
+}
\ No newline at end of file
diff --git a/src/emu/x87emu_private.h b/src/emu/x87emu_private.h
index a3c589df..db5553a2 100644
--- a/src/emu/x87emu_private.h
+++ b/src/emu/x87emu_private.h
@@ -216,5 +216,7 @@ void fpu_fxsave32(x64emu_t* emu, void* ed);
 void fpu_fxrstor32(x64emu_t* emu, void* ed);
 void fpu_fxsave64(x64emu_t* emu, void* ed);
 void fpu_fxrstor64(x64emu_t* emu, void* ed);
+void fpu_xsave(x64emu_t* emu, void* ed, int is32bits);
+void fpu_xrstor(x64emu_t* emu, void* ed, int is32bits);
 
 #endif //__X87RUN_PRIVATE_H_
diff --git a/src/tools/my_cpuid.c b/src/tools/my_cpuid.c
index a6909570..d2f283e9 100644
--- a/src/tools/my_cpuid.c
+++ b/src/tools/my_cpuid.c
@@ -261,6 +261,7 @@ void my_cpuid(x64emu_t* emu, uint32_t tmp32u)
                     | 1<<22     // MOVBE
                     | 1<<23     // POPCOUNT
                     | 1<<25     // aesni
+                    | 1<<26     // xsave
                     ; 
             break;
         case 0x2:   // TLB and Cache info. Sending 1st gen P4 info...
@@ -334,15 +335,34 @@ void my_cpuid(x64emu_t* emu, uint32_t tmp32u)
             R_EAX = 0;
             break;
         case 0xD:   // Processor Extended State Enumeration Main Leaf / Sub Leaf
-            if(R_CX==0) {
-                R_EAX = 1 | 2;  // x87 SSE saved
-                R_EBX = 512;    // size of xsave/xrstor
-                R_ECX = 512;    // same
-                R_EDX = 0;      // more bits
-            } else if(R_CX==1){
-                R_EAX = R_ECX = R_EBX = R_EDX = 0;  // XSAVEOPT and co are not available
-            } else {
+            switch(R_CX) {
+            case 0:
+                R_EAX = 0b11;       // x87 SSE saved
+                R_EBX = 512+64;     // size of xsave/xrstor
+                R_ECX = 512+64;     // same
+                R_EDX = 0;          // more bits
+                break;
+            case 1:
+                R_EAX = 0;      // XSAVEOPT (0) and XSAVEC (1), XGETBV with ECX=1 (2) XSAVES (3) and XFD (4) not supported yet
+                R_ECX = R_EBX = R_EDX = 0;
+                break;
+            case 2:
+                // componant 0: x87
+                R_EAX = 160; // size of the x87 block
+                R_EBX = 0;  // offset
+                R_ECX = 0;
+                R_EDX = 0;
+                break;
+            case 3:
+                // componant 1: sse
+                R_EAX = 16*16; // size of the x87 block
+                R_EBX = 160;  // offset
+                R_ECX = 0;
+                R_EDX = 0;
+                break;
+            default:
                 R_EAX = R_ECX = R_EBX = R_EDX = 0;
+                break;
             }
             break;
         case 0xE:   //?