about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-05-24 20:17:35 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-05-24 20:17:35 +0200
commit39303611efec5b89b44e47217b096eff5195dc97 (patch)
treece1cc46c134b3cead7c6ab2f1c02d44d5486695e
parentfcf0cf00d0a82649ba0cf9f6a8c0450bb6ed5f47 (diff)
downloadbox64-39303611efec5b89b44e47217b096eff5195dc97.tar.gz
box64-39303611efec5b89b44e47217b096eff5195dc97.zip
more avx infrastructure
-rw-r--r--src/core.c1
-rw-r--r--src/dynarec/arm64/dynarec_arm64_0f.c2
-rw-r--r--src/emu/x64emu.c12
-rw-r--r--src/emu/x64emu_private.h2
-rw-r--r--src/emu/x64run0f.c2
-rw-r--r--src/emu/x64test.c21
-rw-r--r--src/emu/x87emu_private.c47
-rw-r--r--src/include/debug.h1
-rw-r--r--src/tools/my_cpuid.c17
9 files changed, 80 insertions, 25 deletions
diff --git a/src/core.c b/src/core.c
index c8a56384..48eebd1d 100644
--- a/src/core.c
+++ b/src/core.c
@@ -149,6 +149,7 @@ int box64_sse_flushto0 = 0;
 int box64_x87_no80bits = 0;
 int box64_sync_rounding = 0;
 int box64_sse42 = 1;
+int box64_avx = 0;
 int fix_64bit_inodes = 0;
 int box64_dummy_crashhandler = 1;
 int box64_mapclean = 0;
diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c
index e62caec5..b7da8adb 100644
--- a/src/dynarec/arm64/dynarec_arm64_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_0f.c
@@ -73,7 +73,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     B_MARK(cEQ);

                     UDF(0);

                     MARK;

-                    MOV32w(xRAX, 0b11);

+                    MOV32w(xRAX, 0b111);

                     MOV32w(xRDX, 0);

                     break;

                 case 0xE0:

diff --git a/src/emu/x64emu.c b/src/emu/x64emu.c
index 2b8f94bc..f19b6e73 100644
--- a/src/emu/x64emu.c
+++ b/src/emu/x64emu.c
@@ -235,6 +235,7 @@ void CloneEmu(x64emu_t *newemu, const x64emu_t* emu)
 	newemu->top = emu->top;
     newemu->fpu_stack = emu->fpu_stack;
     memcpy(newemu->xmm, emu->xmm, sizeof(emu->xmm));
+    memcpy(newemu->ymm, emu->ymm, sizeof(emu->ymm));
     newemu->df = emu->df;
     newemu->df_sav = emu->df_sav;
     newemu->op1 = emu->op1;
@@ -263,6 +264,7 @@ void CopyEmu(x64emu_t *newemu, const x64emu_t* emu)
 	memcpy(newemu->x87, emu->x87, sizeof(emu->x87));
 	memcpy(newemu->mmx, emu->mmx, sizeof(emu->mmx));
     memcpy(newemu->xmm, emu->xmm, sizeof(emu->xmm));
+    memcpy(newemu->ymm, emu->ymm, sizeof(emu->ymm));
     memcpy(newemu->fpu_ld, emu->fpu_ld, sizeof(emu->fpu_ld));
     memcpy(newemu->fpu_ll, emu->fpu_ll, sizeof(emu->fpu_ll));
     newemu->fpu_tags = emu->fpu_tags;
@@ -416,6 +418,16 @@ const char* DumpCPURegs(x64emu_t* emu, uintptr_t ip, int is32bits)
                 sprintf(tmp, "%02d:%016lx-%016lx", i, emu->xmm[i].q[1], emu->xmm[i].q[0]);
             }
             strcat(buff, tmp);
+            if(box64_avx) {
+                if (trace_regsdiff && (emu->old_ymm[i].q[0] != emu->ymm[i].q[0] || emu->old_ymm[i].q[1] != emu->ymm[i].q[1])) {
+                    sprintf(tmp, "\e[1;35m-%016lx-%016lx\e[m", emu->ymm[i].q[1], emu->ymm[i].q[0]);
+                    emu->old_ymm[i].q[0] = emu->ymm[i].q[0];
+                    emu->old_ymm[i].q[1] = emu->ymm[i].q[1];
+                } else {
+                    sprintf(tmp, "-:%016lx-%016lx", emu->ymm[i].q[1], emu->ymm[i].q[0]);
+                }
+                strcat(buff, tmp);
+            }
             if ((i&3)==3) strcat(buff, "\n"); else strcat(buff, " ");
         }
     }
diff --git a/src/emu/x64emu_private.h b/src/emu/x64emu_private.h
index 223c6768..e372d384 100644
--- a/src/emu/x64emu_private.h
+++ b/src/emu/x64emu_private.h
@@ -63,6 +63,7 @@ typedef struct x64emu_s {
     reg64_t     ip;
     // sse
     sse_regs_t  xmm[16];
+    sse_regs_t  ymm[16];
     // fpu / mmx
 	mmx87_regs_t x87[8];
 	mmx87_regs_t mmx[8];
@@ -114,6 +115,7 @@ typedef struct x64emu_s {
     x64test_t   test;       // used for dynarec testing
     #ifdef HAVE_TRACE
     sse_regs_t  old_xmm[16];
+    sse_regs_t  old_ymm[16];
     #endif
     // scratch stack, used for alignment of double and 64bits ints on arm. 200 elements should be enough
     uint64_t    scratch[200];
diff --git a/src/emu/x64run0f.c b/src/emu/x64run0f.c
index 95b8cc12..2b229c5c 100644
--- a/src/emu/x64run0f.c
+++ b/src/emu/x64run0f.c
@@ -98,7 +98,7 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
                         test->notest = 1;

                         #endif

                     } else {

-                        R_RAX = 0b11;   // x87 & SSE for now

+                        R_RAX = 0b111;   // x87 & SSE & AVX for now

                         R_RDX = 0;

                     }

                     break;

diff --git a/src/emu/x64test.c b/src/emu/x64test.c
index ea97500c..df93094a 100644
--- a/src/emu/x64test.c
+++ b/src/emu/x64test.c
@@ -108,11 +108,22 @@ void x64test_check(x64emu_t* ref, uintptr_t ip)
         BANNER;
         printf_log(LOG_NONE, "MXCSR: %x | %x\n", ref->mxcsr.x32, emu->mxcsr.x32);
     }
-    if(memcmp(ref->xmm, emu->xmm, sizeof(emu->xmm))) {
-        BANNER;
-        for(int i=0; i<16; ++i) {
-            if(ref->xmm[i].q[0]!=emu->xmm[i].q[0] || ref->xmm[i].q[1]!=emu->xmm[i].q[1] ) {
-                printf_log(LOG_NONE, "XMM[%02d]: %016zx-%016zx | %016zx-%016zx\n", i, ref->xmm[i].q[1], ref->xmm[i].q[0], emu->xmm[i].q[1], emu->xmm[i].q[0]);
+    if(box64_avx) {
+        if(memcmp(ref->xmm, emu->xmm, sizeof(emu->xmm)) || memcmp(ref->ymm, emu->ymm, sizeof(emu->ymm))) {
+            BANNER;
+            for(int i=0; i<16; ++i) {
+                if(ref->ymm[i].u128!=emu->ymm[i].u128 || ref->xmm[i].u128!=emu->xmm[i].u128 ) {
+                    printf_log(LOG_NONE, "YMM[%02d]: %016zx-%016zx-%016zx-%016zx | %016zx-%016zx-%016zx-%016zx\n", i, ref->ymm[i].q[1], ref->ymm[i].q[0], ref->xmm[i].q[1], ref->ymm[i].q[0], emu->ymm[i].q[3], emu->ymm[i].q[2], emu->xmm[i].q[1], emu->xmm[i].q[0]);
+                }
+            }
+        }
+    } else {
+        if(memcmp(ref->xmm, emu->xmm, sizeof(emu->xmm))) {
+            BANNER;
+            for(int i=0; i<16; ++i) {
+                if(ref->xmm[i].u128!=emu->xmm[i].u128) {
+                    printf_log(LOG_NONE, "XMM[%02d]: %016zx-%016zx | %016zx-%016zx\n", i, ref->xmm[i].q[1], ref->xmm[i].q[0], emu->xmm[i].q[1], emu->xmm[i].q[0]);
+                }
             }
         }
     }
diff --git a/src/emu/x87emu_private.c b/src/emu/x87emu_private.c
index 79306a76..e19f67a2 100644
--- a/src/emu/x87emu_private.c
+++ b/src/emu/x87emu_private.c
@@ -354,7 +354,8 @@ void fpu_fxsave32(x64emu_t* emu, void* ed)
     for(int i=0; i<8; ++i)
         memcpy(&p->FloatRegisters[i].q[0], (i<stack)?&ST(i):&emu->mmx[i], sizeof(mmx87_regs_t));
     // copy SSE regs
-    memcpy(&p->XmmRegisters[0], &emu->xmm[0], sizeof(emu->xmm));
+    for(int i=0; i<16; ++i)
+        memcpy(&p->XmmRegisters[i], &emu->xmm[i], 16);
 }
 
 void fpu_fxsave64(x64emu_t* emu, void* ed)
@@ -380,7 +381,8 @@ void fpu_fxsave64(x64emu_t* emu, void* ed)
     for(int i=0; i<8; ++i)
         memcpy(&p->FloatRegisters[i].q[0], (i<stack)?&ST(i):&emu->mmx[i], sizeof(mmx87_regs_t));
     // copy SSE regs
-    memcpy(&p->XmmRegisters[0], &emu->xmm[0], sizeof(emu->xmm));
+    for(int i=0; i<16; ++i)
+        memcpy(&p->XmmRegisters[i], &emu->xmm[i], 16);
 }
 
 void fpu_fxrstor32(x64emu_t* emu, void* ed)
@@ -404,7 +406,8 @@ void fpu_fxrstor32(x64emu_t* emu, void* ed)
     for(int i=0; i<8; ++i)
         memcpy((i<stack)?&ST(i):&emu->mmx[i], &p->FloatRegisters[i].q[0], sizeof(mmx87_regs_t));
     // copy SSE regs
-    memcpy(&emu->xmm[0], &p->XmmRegisters[0], sizeof(emu->xmm));
+    for(int i=0; i<16; ++i)
+        memcpy(&emu->xmm[i], &p->XmmRegisters[i], 16);
 }
 
 void fpu_fxrstor64(x64emu_t* emu, void* ed)
@@ -428,7 +431,8 @@ void fpu_fxrstor64(x64emu_t* emu, void* ed)
     for(int i=0; i<8; ++i)
         memcpy((i<stack)?&ST(i):&emu->mmx[i], &p->FloatRegisters[i].q[0], sizeof(mmx87_regs_t));
     // copy SSE regs
-    memcpy(&emu->xmm[0], &p->XmmRegisters[0], sizeof(emu->xmm));
+    for(int i=0; i<16; ++i)
+        memcpy(&emu->xmm[i], &p->XmmRegisters[i], 16);
 }
 
 typedef struct xsaveheader_s {
@@ -441,10 +445,10 @@ void fpu_xsave(x64emu_t* emu, void* ed, int is32bits)
 {
     xsave64_t *p = (xsave64_t*)ed;
     xsaveheader_t *h = (xsaveheader_t*)(p+1);
-    uint32_t rfbm = (0b11&R_EAX);
-    h->xstate_bv =(h->xstate_bv&~0b11)|rfbm;
+    uint32_t rfbm = (0b111&R_EAX);
+    h->xstate_bv =(h->xstate_bv&~0b111)|rfbm;
     h->xcomp_bv = 0;
-    if(h->xstate_bv&0b01) {
+    if(h->xstate_bv&0b001) {
         int top = emu->top&7;
         int stack = 8-top;
         if(emu->fpu_tags == TAGS_EMPTY)
@@ -464,7 +468,7 @@ void fpu_xsave(x64emu_t* emu, void* ed, int is32bits)
         for(int i=0; i<8; ++i)
             memcpy(&p->FloatRegisters[i].q[0], (i<stack)?&ST(i):&emu->mmx[i], sizeof(mmx87_regs_t));
     }
-    if(((h->xstate_bv&0b10)||(h->xstate_bv&0b100))&&!(h->xstate_bv&0b01)) {
+    if(((h->xstate_bv&0b010)||(h->xstate_bv&0b100))&&!(h->xstate_bv&0b001)) {
         p->MxCsr = emu->mxcsr.x32;
     }
     // copy SSE regs
@@ -472,6 +476,11 @@ void fpu_xsave(x64emu_t* emu, void* ed, int is32bits)
         for(int i=0; i<is32bits?8:16; ++i)
             memcpy(&p->XmmRegisters[i], &emu->xmm[i], 16);
     }
+    if(h->xstate_bv&0b100) {
+        sse_regs_t* avx = (sse_regs_t*)(h+1);
+        for(int i=0; i<is32bits?8:16; ++i)
+            memcpy(&avx[i], &emu->ymm[i], 16);
+    }
 }
 
 void fpu_xrstor(x64emu_t* emu, void* ed, int is32bits)
@@ -479,11 +488,11 @@ void fpu_xrstor(x64emu_t* emu, void* ed, int is32bits)
     xsave64_t *p = (xsave64_t*)ed;
     xsaveheader_t *h = (xsaveheader_t*)(p+1);
     int compressed = (h->xcomp_bv>>63);
-    uint32_t rfbm = (0b11&R_EAX);
+    uint32_t rfbm = (0b111&R_EAX);
     uint32_t to_restore = rfbm & h->xstate_bv;
     uint32_t to_init = rfbm & ~h->xstate_bv;
     // check componant to restore
-    if(to_restore&0b01) {
+    if(to_restore&0b001) {
         emu->cw.x16 = p->ControlWord;
         emu->sw.x16 = p->StatusWord;
         emu->mxcsr.x32 = p->MxCsr;
@@ -501,15 +510,27 @@ void fpu_xrstor(x64emu_t* emu, void* ed, int is32bits)
         // copy back MMX regs...
         for(int i=0; i<8; ++i)
             memcpy((i<stack)?&ST(i):&emu->mmx[i], &p->FloatRegisters[i].q[0], sizeof(mmx87_regs_t));
-    } else if(to_init&0b01) {
+    } else if(to_init&0b001) {
         reset_fpu(emu);
     }
-    if(((to_restore&0b10)||(to_restore&0b100))&&!(to_restore&0b01)) {
+    if(((to_restore&0b010)||(to_restore&0b100))&&!(to_restore&0b001)) {
         emu->mxcsr.x32 = p->MxCsr;
     }
-    if(to_restore&0b10) {
+    if(to_restore&0b010) {
         // copy SSE regs
         for(int i=0; i<is32bits?8:16; ++i)
             memcpy(&emu->xmm[i], &p->XmmRegisters[i], 16);
+    } else if(to_init&0b010) {
+        for(int i=0; i<is32bits?8:16; ++i)
+            memset(&emu->xmm[i], 0, 16);
+    }
+    if(to_restore&0b100) {
+        // copy AVX upper part of regs
+        sse_regs_t* avx = (sse_regs_t*)(h+1);
+        for(int i=0; i<is32bits?8:16; ++i)
+            memcpy(&emu->ymm[i], &avx[i], 16);
+    } else if(to_init&0b100) {
+        for(int i=0; i<is32bits?8:16; ++i)
+            memcpy(&emu->ymm[i], 0, 16);
     }
 }
\ No newline at end of file
diff --git a/src/include/debug.h b/src/include/debug.h
index c4ea7ab1..5932f10a 100644
--- a/src/include/debug.h
+++ b/src/include/debug.h
@@ -93,6 +93,7 @@ extern int box64_sse_flushto0;
 extern int box64_x87_no80bits;
 extern int box64_sync_rounding;
 extern int box64_sse42;
+extern int box64_avx;
 extern int allow_missing_libs;
 extern int box64_mapclean;
 extern int box64_prefer_wrapped;
diff --git a/src/tools/my_cpuid.c b/src/tools/my_cpuid.c
index d2f283e9..9f0614bf 100644
--- a/src/tools/my_cpuid.c
+++ b/src/tools/my_cpuid.c
@@ -337,10 +337,10 @@ void my_cpuid(x64emu_t* emu, uint32_t tmp32u)
         case 0xD:   // Processor Extended State Enumeration Main Leaf / Sub Leaf
             switch(R_CX) {
             case 0:
-                R_EAX = 0b11;       // x87 SSE saved
-                R_EBX = 512+64;     // size of xsave/xrstor
-                R_ECX = 512+64;     // same
-                R_EDX = 0;          // more bits
+                R_EAX = 0b111;          // x87 SSE AVX saved
+                R_EBX = 512+64+16*16;     // size of xsave/xrstor
+                R_ECX = 512+64+16*16;     // same
+                R_EDX = 0;              // more bits
                 break;
             case 1:
                 R_EAX = 0;      // XSAVEOPT (0) and XSAVEC (1), XGETBV with ECX=1 (2) XSAVES (3) and XFD (4) not supported yet
@@ -355,11 +355,18 @@ void my_cpuid(x64emu_t* emu, uint32_t tmp32u)
                 break;
             case 3:
                 // componant 1: sse
-                R_EAX = 16*16; // size of the x87 block
+                R_EAX = 16*16; // size of the sse block
                 R_EBX = 160;  // offset
                 R_ECX = 0;
                 R_EDX = 0;
                 break;
+            case 4:
+                // componant 2: avx
+                R_EAX = 16*16; // size of the avx block
+                R_EBX = 512+64;  // offset
+                R_ECX = 0;
+                R_EDX = 0;
+                break;
             default:
                 R_EAX = R_ECX = R_EBX = R_EDX = 0;
                 break;