about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-05-26 09:21:04 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-05-26 09:21:04 +0200
commitd09a48fd03466e7160752d8ed17c9df3976d11f9 (patch)
treec862d8ae9e7df0b439645b08ed4e5265c174b908
parentb5c0a857ca18fa91d0416a35e5ea9c7acf790d79 (diff)
downloadbox64-d09a48fd03466e7160752d8ed17c9df3976d11f9.tar.gz
box64-d09a48fd03466e7160752d8ed17c9df3976d11f9.zip
[INTERPRETER] my first avx opcode
-rw-r--r--CMakeLists.txt1
-rw-r--r--src/core.c9
-rw-r--r--src/emu/x64run.c76
-rw-r--r--src/emu/x64run_private.h19
-rw-r--r--src/emu/x64runavx.c72
-rw-r--r--src/emu/x87emu_private.c21
-rw-r--r--src/emu/x87emu_private.h1
-rw-r--r--src/libtools/signals.c13
-rw-r--r--src/tools/my_cpuid.c2
-rw-r--r--src/tools/rcfile.c1
-rw-r--r--src/wrapped/wrappedlibc.c2
11 files changed, 198 insertions, 19 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 158130d7..5c0299a4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -395,6 +395,7 @@ set(INTERPRETER
     "${BOX64_ROOT}/src/emu/x64runf0.c"
     "${BOX64_ROOT}/src/emu/x64runf20f.c"
     "${BOX64_ROOT}/src/emu/x64runf30f.c"
+    "${BOX64_ROOT}/src/emu/x64runavx.c"
 )
 
 if(STATICBUILD)
diff --git a/src/core.c b/src/core.c
index 48eebd1d..928a056d 100644
--- a/src/core.c
+++ b/src/core.c
@@ -993,6 +993,15 @@ void LoadLogEnv()
         if(!box64_sse42)
             printf_log(LOG_INFO, "Do not expose SSE 4.2 capabilities\n");
     }
+    p = getenv("BOX64_AVX");
+    if(p) {
+        if(strlen(p)==1) {
+            if(p[0]>='0' && p[0]<='0'+1)
+                box64_avx = p[0]-'0';
+        }
+        if(box64_avx)
+            printf_log(LOG_INFO, "Will expose AVX capabilities\n");
+    }
     p = getenv("BOX64_FIX_64BIT_INODES");
     if(p) {
         if(strlen(p)==1) {
diff --git a/src/emu/x64run.c b/src/emu/x64run.c
index d7950424..ef2f550a 100644
--- a/src/emu/x64run.c
+++ b/src/emu/x64run.c
@@ -28,6 +28,27 @@
 
 int my_setcontext(x64emu_t* emu, void* ucp);
 
+static const char* avx_prefix_string(uint16_t p)
+{
+    switch(p) {
+        case 0: return "0";
+        case 1: return "66";
+        case 2: return "F3";
+        case 3: return "F2";
+        default: return "??";
+    }
+}
+static const char* avx_map_string(uint16_t m)
+{
+    switch(m) {
+        case 0: return "0";
+        case 1: return "0F";
+        case 2: return "0F38";
+        case 3: return "0F3A";
+        default: return "??";
+    }
+}
+
 #ifdef TEST_INTERPRETER
 int RunTest(x64test_t *test)
 #else
@@ -1353,8 +1374,32 @@ x64emurun:
                 emu->segs_serial[_ES] = 0;
                 GD->dword[0] = *(uint32_t*)ED;
             } else {
-                // AVX not supported yet
-                emit_signal(emu, SIGILL, (void*)R_RIP, 0);
+                vex_t vex = {0};
+                vex.rex = rex;
+                tmp8u = F8;
+                vex.m = tmp8u&0b00011111;
+                vex.rex.b = (tmp8u&0b00100000)?0:1;
+                vex.rex.x = (tmp8u&0b01000000)?0:1;
+                vex.rex.r = (tmp8u&0b10000000)?0:1;
+                tmp8u = F8;
+                vex.p = tmp8u&0b00000011;
+                vex.l = (tmp8u>>2)&1;
+                vex.v = ((~tmp8u)>>3)&0b1111;
+                vex.rex.w = (tmp8u>>7)&1;
+                #ifdef TEST_INTERPRETER 
+                if(!(addr = TestAVX(test, vex, addr, &step)))
+                    unimp = 1;
+                #else
+                if(!(addr = RunAVX(emu, vex, addr, &step))) {
+                    printf_log(LOG_NONE, "Unimplemented AVX opcode prefix %s map %s ", avx_prefix_string(vex.p), avx_prefix_string(vex.m));
+                    unimp = 1;
+                    goto fini;
+                }
+                if(step==2) {
+                    STEP2;
+                }
+                #endif
+                break;
             }
             break;
         case 0xC5:                      /* LDS Gd,Ed */
@@ -1366,8 +1411,31 @@ x64emurun:
                 emu->segs_serial[_DS] = 0;
                 GD->dword[0] = *(uint32_t*)ED;
             } else {
-                // AVX not supported yet
-                emit_signal(emu, SIGILL, (void*)R_RIP, 0);
+                vex_t vex = {0};
+                vex.rex = rex;
+                tmp8u = F8;
+                vex.p = tmp8u&0b00000011;
+                vex.l = (tmp8u>>2)&1;
+                vex.v = ((~tmp8u)>>3)&0b1111;
+                vex.rex.r = (tmp8u&0b10000000)?0:1;
+                vex.rex.b = 0;
+                vex.rex.x = 0;
+                vex.rex.w = 0;
+                vex.m = VEX_M_0F;
+                #ifdef TEST_INTERPRETER 
+                if(!(addr = TestAVX(test, vex, addr, &step)))
+                    unimp = 1;
+                #else
+                if(!(addr = RunAVX(emu, vex, addr, &step))) {
+                    printf_log(LOG_NONE, "Unimplemented AVX opcode prefix %s map %s ", avx_prefix_string(vex.p), avx_map_string(vex.m));
+                    unimp = 1;
+                    goto fini;
+                }
+                if(step==2) {
+                    STEP2;
+                }
+                #endif
+                break;
             }
             break;
         case 0xC6:                      /* MOV Eb,Ib */
diff --git a/src/emu/x64run_private.h b/src/emu/x64run_private.h
index 01cf3e4d..659b76bf 100644
--- a/src/emu/x64run_private.h
+++ b/src/emu/x64run_private.h
@@ -21,6 +21,22 @@ typedef struct rex_s {
     int     is32bits;
 } rex_t;
 
+#define VEX_P_NONE  0
+#define VEX_P_66    1
+#define VEX_P_F3    2
+#define VEX_P_F2    3
+#define VEX_M_NONE  0
+#define VEX_M_0F    1
+#define VEX_M_OF38  2
+#define VEX_M_0F3A  3
+typedef struct vex_s {
+    rex_t       rex;
+    uint16_t    l:1;
+    uint16_t    p:2;    //0: none, 1: 0x66, 2:0xF3, 3: 0xF2
+    uint16_t    v:4;    // src register
+    uint16_t    m:5;    // opcode map
+} vex_t;
+
 static inline uint8_t Peek(x64emu_t *emu, int offset){return *(uint8_t*)(R_RIP + offset);}
 
 #ifdef TEST_INTERPRETER
@@ -155,6 +171,8 @@ uintptr_t RunDF(x64emu_t *emu, rex_t rex, uintptr_t addr);
 uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr);
 uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step);
 uintptr_t RunF30F(x64emu_t *emu, rex_t rex, uintptr_t addr);
+uintptr_t RunAVX(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step);
+
 
 uintptr_t Test0F(x64test_t *test, rex_t rex, uintptr_t addr, int *step);
 uintptr_t Test64(x64test_t *test, rex_t rex, int seg, uintptr_t addr);
@@ -183,6 +201,7 @@ uintptr_t TestDF(x64test_t *test, rex_t rex, uintptr_t addr);
 uintptr_t TestF0(x64test_t *test, rex_t rex, uintptr_t addr);
 uintptr_t TestF20F(x64test_t *test, rex_t rex, uintptr_t addr, int *step);
 uintptr_t TestF30F(x64test_t *test, rex_t rex, uintptr_t addr);
+uintptr_t TestAVX(x64test_t *test, vex_t vex, uintptr_t addr, int *step);
 
 
 void x64Syscall(x64emu_t *emu);
diff --git a/src/emu/x64runavx.c b/src/emu/x64runavx.c
new file mode 100644
index 00000000..78515b25
--- /dev/null
+++ b/src/emu/x64runavx.c
@@ -0,0 +1,72 @@
+#define _GNU_SOURCE
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <fenv.h>
+#include <string.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "debug.h"
+#include "box64stack.h"
+#include "x64emu.h"
+#include "x64run.h"
+#include "x64emu_private.h"
+#include "x64run_private.h"
+#include "x64primop.h"
+#include "x64trace.h"
+#include "x87emu_private.h"
+#include "box64context.h"
+#include "my_cpuid.h"
+#include "bridge.h"
+#include "signals.h"
+#include "x64shaext.h"
+#ifdef DYNAREC
+#include "custommem.h"
+#include "../dynarec/native_lock.h"
+#endif
+
+#include "modrm.h"
+
+#ifdef TEST_INTERPRETER
+uintptr_t TestAVX(x64test_t *test, vex_t vex, uintptr_t addr, int *step)
+#else
+uintptr_t RunAVX(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
+#endif
+{
+    uint8_t opcode;
+    uint8_t nextop;
+    uint8_t tmp8u;
+    int8_t tmp8s;
+    int32_t tmp32s, tmp32s2;
+    uint32_t tmp32u, tmp32u2;
+    uint64_t tmp64u, tmp64u2;
+    int64_t tmp64s;
+    reg64_t *oped, *opgd;
+    sse_regs_t *opex, *opgx, eax1;
+    mmx87_regs_t *opem, *opgm, eam1;
+
+#ifdef TEST_INTERPRETER
+    x64emu_t *emu = test->emu;
+#endif
+    opcode = F8;
+
+    switch(opcode) {
+
+        case 0x77:
+            if(!vex.l && (vex.m==VEX_M_0F) && (vex.p==VEX_P_NONE)) {
+                if(vex.v!=0) {
+                    emit_signal(emu, SIGILL, (void*)R_RIP, 0);
+                } else {
+                    memset(emu->ymm, 0, sizeof(sse_regs_t)*(vex.rex.is32bits)?16:8);
+                }
+            } else
+                return 0;
+            break;
+        default:
+            return 0;
+    }
+    return addr;
+}
diff --git a/src/emu/x87emu_private.c b/src/emu/x87emu_private.c
index e19f67a2..d5e2421a 100644
--- a/src/emu/x87emu_private.c
+++ b/src/emu/x87emu_private.c
@@ -441,11 +441,11 @@ typedef struct xsaveheader_s {
     uint8_t  reserved[64-16];
 } xsaveheader_t;
 
-void fpu_xsave(x64emu_t* emu, void* ed, int is32bits)
+void fpu_xsave_mask(x64emu_t* emu, void* ed, int is32bits, uint64_t mask)
 {
     xsave64_t *p = (xsave64_t*)ed;
     xsaveheader_t *h = (xsaveheader_t*)(p+1);
-    uint32_t rfbm = (0b111&R_EAX);
+    uint32_t rfbm = (0b111&mask);
     h->xstate_bv =(h->xstate_bv&~0b111)|rfbm;
     h->xcomp_bv = 0;
     if(h->xstate_bv&0b001) {
@@ -473,16 +473,21 @@ void fpu_xsave(x64emu_t* emu, void* ed, int is32bits)
     }
     // copy SSE regs
     if(h->xstate_bv&0b10) {
-        for(int i=0; i<is32bits?8:16; ++i)
+        for(int i=0; i<(is32bits?8:16); ++i)
             memcpy(&p->XmmRegisters[i], &emu->xmm[i], 16);
     }
     if(h->xstate_bv&0b100) {
         sse_regs_t* avx = (sse_regs_t*)(h+1);
-        for(int i=0; i<is32bits?8:16; ++i)
+        for(int i=0; i<(is32bits?8:16); ++i)
             memcpy(&avx[i], &emu->ymm[i], 16);
     }
 }
 
+void fpu_xsave(x64emu_t* emu, void* ed, int is32bits)
+{
+    fpu_xsave_mask(emu, ed, is32bits, R_RAX);
+}
+
 void fpu_xrstor(x64emu_t* emu, void* ed, int is32bits)
 {
     xsave64_t *p = (xsave64_t*)ed;
@@ -518,19 +523,19 @@ void fpu_xrstor(x64emu_t* emu, void* ed, int is32bits)
     }
     if(to_restore&0b010) {
         // copy SSE regs
-        for(int i=0; i<is32bits?8:16; ++i)
+        for(int i=0; i<(is32bits?8:16); ++i)
             memcpy(&emu->xmm[i], &p->XmmRegisters[i], 16);
     } else if(to_init&0b010) {
-        for(int i=0; i<is32bits?8:16; ++i)
+        for(int i=0; i<(is32bits?8:16); ++i)
             memset(&emu->xmm[i], 0, 16);
     }
     if(to_restore&0b100) {
         // copy AVX upper part of regs
         sse_regs_t* avx = (sse_regs_t*)(h+1);
-        for(int i=0; i<is32bits?8:16; ++i)
+        for(int i=0; i<(is32bits?8:16); ++i)
             memcpy(&emu->ymm[i], &avx[i], 16);
     } else if(to_init&0b100) {
-        for(int i=0; i<is32bits?8:16; ++i)
+        for(int i=0; i<(is32bits?8:16); ++i)
             memcpy(&emu->ymm[i], 0, 16);
     }
 }
\ No newline at end of file
diff --git a/src/emu/x87emu_private.h b/src/emu/x87emu_private.h
index db5553a2..b389028a 100644
--- a/src/emu/x87emu_private.h
+++ b/src/emu/x87emu_private.h
@@ -217,6 +217,7 @@ void fpu_fxrstor32(x64emu_t* emu, void* ed);
 void fpu_fxsave64(x64emu_t* emu, void* ed);
 void fpu_fxrstor64(x64emu_t* emu, void* ed);
 void fpu_xsave(x64emu_t* emu, void* ed, int is32bits);
+void fpu_xsave_mask(x64emu_t* emu, void* ed, int is32bits, uint64_t mask);
 void fpu_xrstor(x64emu_t* emu, void* ed, int is32bits);
 
 #endif //__X87RUN_PRIVATE_H_
diff --git a/src/libtools/signals.c b/src/libtools/signals.c
index 97ab274a..1f86f0f8 100644
--- a/src/libtools/signals.c
+++ b/src/libtools/signals.c
@@ -119,8 +119,7 @@ struct x64_fpstate
   uint32_t          mxcsr_mask;
   struct x64_fpreg  _st[8];
   struct x64_xmmreg _xmm[16];
-  uint32_t          res[12];
-  uint32_t          res2[12];
+  uint32_t          res[24];
 }__attribute__((packed));
 
 typedef struct x64_fpstate *x64_fpregset_t;
@@ -988,6 +987,8 @@ void my_sigactionhandler_oldcode(int32_t sig, int simple, siginfo_t* info, void
 
     // TODO: do I need to really setup 2 stack frame? That doesn't seems right!
     // setup stack frame
+    frame -= 512+64+16*16;
+    void* xstate = (void*)frame;
     frame -= sizeof(siginfo_t);
     siginfo_t* info2 = (siginfo_t*)frame;
     memcpy(info2, info, sizeof(siginfo_t));
@@ -1082,10 +1083,10 @@ void my_sigactionhandler_oldcode(int32_t sig, int simple, siginfo_t* info, void
 #endif
 #endif
     // get FloatPoint status
-    sigcontext->uc_mcontext.fpregs = (struct x64_libc_fpstate*)&sigcontext->xstate;
-    fpu_fxsave64(emu, &sigcontext->xstate);
-    // add custom SIGN in reserved area
-    //((unsigned int *)(&sigcontext.xstate.fpstate.padding))[8*4+12] = 0x46505853;  // not yet, when XSAVE / XRSTR will be ready
+    sigcontext->uc_mcontext.fpregs = xstate;//(struct x64_libc_fpstate*)&sigcontext->xstate;
+    fpu_xsave_mask(emu, xstate, 0, 0b111);
+    memcpy(&sigcontext->xstate, xstate, sizeof(sigcontext->xstate));
+    ((struct x64_fpstate*)xstate)->res[12] = 0x46505853;   // magic number to signal an XSTATE type of fpregs
     // get signal mask
 
     if(new_ss) {
diff --git a/src/tools/my_cpuid.c b/src/tools/my_cpuid.c
index 9f0614bf..a78c499d 100644
--- a/src/tools/my_cpuid.c
+++ b/src/tools/my_cpuid.c
@@ -262,6 +262,8 @@ void my_cpuid(x64emu_t* emu, uint32_t tmp32u)
                     | 1<<23     // POPCOUNT
                     | 1<<25     // aesni
                     | 1<<26     // xsave
+                    | 1<<27     // osxsave
+                    | box64_avx<<28 // AVX
                     ; 
             break;
         case 0x2:   // TLB and Cache info. Sending 1st gen P4 info...
diff --git a/src/tools/rcfile.c b/src/tools/rcfile.c
index 3fc9c869..4f99daec 100644
--- a/src/tools/rcfile.c
+++ b/src/tools/rcfile.c
@@ -100,6 +100,7 @@ ENTRYBOOL(BOX64_NOPULSE, box64_nopulse)                 \
 ENTRYBOOL(BOX64_NOGTK, box64_nogtk)                     \
 ENTRYBOOL(BOX64_NOVULKAN, box64_novulkan)               \
 ENTRYBOOL(BOX64_SSE42, box64_sse42)                     \
+ENTRYBOOL(BOX64_AVX, box64_avx)                         \
 ENTRYBOOL(BOX64_FUTEX_WAITV, box64_futex_waitv)         \
 ENTRYSTRING_(BOX64_BASH, bash)                          \
 ENTRYINT(BOX64_JITGDB, jit_gdb, 0, 3, 2)                \
diff --git a/src/wrapped/wrappedlibc.c b/src/wrapped/wrappedlibc.c
index 9e8ba194..f9dc8e2f 100644
--- a/src/wrapped/wrappedlibc.c
+++ b/src/wrapped/wrappedlibc.c
@@ -1639,7 +1639,7 @@ void CreateCPUInfoFile(int fd)
         P;
         sprintf(buff, "bogomips\t: %g\n", getBogoMips());
         P;
-        sprintf(buff, "flags\t\t: fpu cx8 sep ht cmov clflush mmx sse sse2 syscall tsc lahf_lm ssse3 ht tm lm fxsr cpuid pclmulqdq cx16 aes movbe pni sse4_1 sse4_2 lzcnt popcnt\n");
+        sprintf(buff, "flags\t\t: fpu cx8 sep ht cmov clflush mmx sse sse2 syscall tsc lahf_lm ssse3 ht tm lm fxsr cpuid pclmulqdq cx16 aes movbe pni sse4_1%s%s lzcnt popcnt\n", box64_sse42?" sse4_2":"", box64_avx?" avx":"");
         P;
         sprintf(buff, "address sizes\t: 48 bits physical, 48 bits virtual\n");
         P;