From b5105a1e57bba3305d5dce93ab4d2f7faab6b34a Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Mon, 26 Aug 2024 17:45:13 +0200 Subject: Added preliminary Box32 support (#1760) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Improve the ReserveHigMemory helper function * [BOX32] Added some wrapping infrastructure * [BOX32] More wrapped 32bits lib infrastructure * [BOX32] Added callback and tls 32bits handling * [BOX32] Added more 32bits, around wrappers and elfs * [BOX32] Added the 32bits version of myalign * [BOX32] More wrapped libs and 32bits fixes and imrpovments * [BOX32] Added some 32bits tests * [BOX32] Try to enable some Box32 build and test on the CI * [BOX32] Disable Box32 testing on CI platform that use qemu * [BOX32] Another attempt to disable Box32 testing on CI platform that use qemu * [BOX32] Small fix for another attempt to disable Box32 testing on CI platform that use qemu * [BOX32] Yet another fix for another attempt to disable Box32 testing on CI platform that use qemu * [BOX32] Fixed a typo in CI script * [BOX32] Better scratch alighnment and enabled more tests * [BOX32] Added (partial) wrapped 32bits librt * [BOX32] Added mention of Box32 in README * [BOX32] Added phtread handling, and numerous fixes to 32bits handling. [ARM64_DYNAREC] Fixed access to segment with negative offset * [BOX32] Added system libs and cpp testing, plus some more fixes * [BOX32] Fix previous commit * [BOX32] Better stack adjustment for 32bits processes * [BOX32] Added getenv wrapped 32bits function and friends * [BOX32] Don't look for box86 for a Box32 build * [BOX32] Don't do 32bits cppThreads test for now on CI * [BOX32] Enabled a few more 32bits tests * [BOX32] For ld_lib_path for both CppThreads tests * [BOX32] [ANDROID] Some Fixes for Android Build * [BOX32] Still need to disable cppThread_32bits test on CI for some reason * [BOX32] [ANDROID] Don't show PreInit Array Warning (#1751) * [BOX32] [ANDROID] One More Fix for Android Build That I forgotten to … (#1752) * [BOX32] [ANDROID] One More Fix for Android Build That I forgotten to push before * [BOX32] [ANDROID] Try to Create __libc_init * [BOX32] [ANDROID] Try to disable NEEDED_LIBS for now (libdl is not wrapped) * [BOX32] Updated generated files * [BOX32] Added 32bits context functions * [BOX32] Added 32bits signal handling * [BOX32] Added some missing 32bits elfloader functions * [BOX32] Fix build on x86_64 machine * [BOX32] Better fix for x86_64 build * [BOX32] Actually added missing libs, and re-enabled cppThreads_32bits test * [BOX32] Added wrapped 32bits libdl * [BOX32] Try to re-enabled Box32 test on CI for ARM64 builds * [BOX32] fine-tuning Box32 test on CI for ARM64 builds * [BOX32] More fine-tuning to Box32 test on CI for ARM64 builds * [BOX32] Enabled Box32 test on CI for LA64 and RV64 builds too * [BOX32] re-Disabled Box32 test on CI for LA64 and RV64 builds, not working for now * [BOX32] Temporarily disabled cppThreads_32bits test on CI --------- Co-authored-by: KreitinnSoftware Co-authored-by: KreitinnSoftware <80591934+KreitinnSoftware@users.noreply.github.com> --- src/include/box32.h | 5 - src/include/box32context.h | 7 + src/include/box64context.h | 11 ++ src/include/debug.h | 2 +- src/include/elfload_dump.h | 4 + src/include/myalign32.h | 409 +++++++++++++++++++++++++++++++++++++++++++++ src/include/regs.h | 1 + src/include/threads.h | 15 ++ src/include/x64emu.h | 7 +- src/include/x64run.h | 3 + 10 files changed, 455 insertions(+), 9 deletions(-) create mode 100644 src/include/box32context.h create mode 100755 src/include/myalign32.h (limited to 'src/include') diff --git a/src/include/box32.h b/src/include/box32.h index ab8fa184..9554e65f 100644 --- a/src/include/box32.h +++ b/src/include/box32.h @@ -104,9 +104,4 @@ void fini_hash_helper(); typedef struct x86emu_s x86emu_t; -void* my_mmap(x86emu_t* emu, void* addr, unsigned long length, int prot, int flags, int fd, int offset); -void* my_mmap64(x86emu_t* emu, void *addr, unsigned long length, int prot, int flags, int fd, int64_t offset); -int my_munmap(x86emu_t* emu, void* addr, unsigned long length); -int my_mprotect(x86emu_t* emu, void *addr, unsigned long len, int prot); - #endif //__BOX32_64__H_ diff --git a/src/include/box32context.h b/src/include/box32context.h new file mode 100644 index 00000000..fac7a6f7 --- /dev/null +++ b/src/include/box32context.h @@ -0,0 +1,7 @@ +#ifndef __BOX32CONTEXT_H_ +#define __BOX32CONTEXT_H_ + +#include "box32.h" +#include "box64context.h" + +#endif//__BOX32CONTEXT_H_ \ No newline at end of file diff --git a/src/include/box64context.h b/src/include/box64context.h index b8706729..532c1aaf 100644 --- a/src/include/box64context.h +++ b/src/include/box64context.h @@ -7,12 +7,19 @@ #ifdef DYNAREC #include "dynarec/native_lock.h" #endif +#ifndef BOX32_DEF +#define BOX32_DEF +typedef uint32_t ptr_t; +typedef int32_t long_t; +typedef uint32_t ulong_t; +#endif #ifdef DYNAREC // disabling for now, seems to have a negative impact on performances //#define USE_CUSTOM_MUTEX #endif + typedef struct elfheader_s elfheader_t; typedef struct cleanup_s cleanup_t; typedef struct x64emu_s x64emu_t; @@ -26,6 +33,7 @@ typedef struct kh_defaultversion_s kh_defaultversion_t; typedef struct kh_mapsymbols_s kh_mapsymbols_t; typedef struct library_s library_t; typedef struct linkmap_s linkmap_t; +typedef struct linkmap32_s linkmap32_t; typedef struct kh_threadstack_s kh_threadstack_t; typedef struct rbtree rbtree; typedef struct atfork_fnc_s { @@ -97,9 +105,11 @@ typedef struct box64context_s { int argc; char** argv; + ptr_t argv32; int envc; char** envv; + ptr_t envv32; int orig_argc; char** orig_argv; @@ -175,6 +185,7 @@ typedef struct box64context_s { library_t *sdl2lib; library_t *sdl2mixerlib; linkmap_t *linkmap; + linkmap32_t *linkmap32; void* sdl1allocrw; // SDL1 AllocRW/FreeRW function void* sdl1freerw; void* sdl2allocrw; // SDL2 AllocRW/FreeRW function diff --git a/src/include/debug.h b/src/include/debug.h index e0f81c95..1ecf5b4e 100644 --- a/src/include/debug.h +++ b/src/include/debug.h @@ -121,7 +121,7 @@ extern int box64_x11threads; extern int box64_x11glx; extern char* box64_libGL; extern uintptr_t fmod_smc_start, fmod_smc_end; // to handle libfmod (from Unreal) SMC (self modifying code) -extern uint32_t default_gs; +extern uint32_t default_gs, default_fs; extern int jit_gdb; // launch gdb when a segfault is trapped extern int box64_tcmalloc_minimal; // when using tcmalloc_minimal #define LOG_NONE 0 diff --git a/src/include/elfload_dump.h b/src/include/elfload_dump.h index 3fa0a910..848a2817 100644 --- a/src/include/elfload_dump.h +++ b/src/include/elfload_dump.h @@ -26,4 +26,8 @@ void DumpRelRTable64(elfheader_t *h, int cnt, Elf64_Relr *relr, const char *name void DumpBinary(char* p, int sz); +#ifndef SHT_CHECKSUM +#define SHT_CHECKSUM 0x6ffffff8 +#endif + #endif //ELFLOADER_DUMP_H \ No newline at end of file diff --git a/src/include/myalign32.h b/src/include/myalign32.h new file mode 100755 index 00000000..c81ccdf6 --- /dev/null +++ b/src/include/myalign32.h @@ -0,0 +1,409 @@ +#ifndef __MY_ALIGN32__H_ +#define __MY_ALIGN32__H_ +#include + +#define X64_VA_MAX_REG (6*8) +#define X64_VA_MAX_XMM ((6*8)+(8*16)) + +#define ALIGN64_16(A) (uint64_t*)((((uintptr_t)A)+15)&~15LL) + +#ifdef __x86_64__ +// x86_64, 6 64bits general regs and 16 or 8? 128bits float regs +/* +For reference, here is the x86_64 va_list structure +typedef struct { + unsigned int gp_offset; + unsigned int fp_offset; + void *overflow_arg_area; + void *reg_save_area; +} va_list[1]; +*/ +#define CREATE_SYSV_VALIST_32(A) \ + va_list sysv_varargs; \ + sysv_varargs->gp_offset=X64_VA_MAX_REG; \ + sysv_varargs->fp_offset=X64_VA_MAX_XMM; \ + sysv_varargs->reg_save_area=(A); \ + sysv_varargs->overflow_arg_area=A + +#define CONVERT_VALIST_32(A) \ + va_list sysv_varargs; \ + sysv_varargs->gp_offset=X64_VA_MAX_REG; \ + sysv_varargs->fp_offset=X64_VA_MAX_XMM; \ + sysv_varargs->reg_save_area=(A); \ + sysv_varargs->overflow_arg_area=A + + + +#elif defined(__aarch64__) +// aarch64: 8 64bits general regs and 8 128bits float regs +/* +va_list declared as +typedef struct va_list { + void * stack; // next stack param + void * gr_top; // end of GP arg reg save area + void * vr_top; // end of FP/SIMD arg reg save area + int gr_offs; // offset from gr_top to next GP register arg + int vr_offs; // offset from vr_top to next FP/SIMD register arg +} va_list; +*/ +#define CREATE_SYSV_VALIST_32(A) \ + va_list sysv_varargs; \ + sysv_varargs.__gr_offs=(8*8); \ + sysv_varargs.__vr_offs=(8*16); \ + sysv_varargs.__stack=(A); + +#define CONVERT_VALIST_32(A) \ + va_list sysv_varargs; \ + sysv_varargs.__gr_offs=(8*8); \ + sysv_varargs.__vr_offs=(8*16)); \ + sysv_varargs.__stack=(A); + + +#elif defined(__loongarch64) || defined(__powerpc64__) || defined(__riscv) +#define CREATE_SYSV_VALIST_32(A) \ + va_list sysv_varargs = (va_list)A + +#define CREATE_VALIST_FROM_VALIST_32(VA, SCRATCH) \ + va_list sysv_varargs = (va_list)A + +#else +#error Unknown architecture! +#endif + +#define VARARGS_32 sysv_varargs +#define PREPARE_VALIST_32 CREATE_SYSV_VALIST_32(emu->scratch) +#define VARARGS_32_(A) sysv_varargs +#define PREPARE_VALIST_32_(A) CREATE_SYSV_VALIST_32(A) + +void myStackAlign32(const char* fmt, uint32_t* st, uint64_t* mystack); +void myStackAlignGVariantNew32(const char* fmt, uint32_t* st, uint64_t* mystack); +void myStackAlignW32(const char* fmt, uint32_t* st, uint64_t* mystack); + +void UnalignStat64_32(const void* source, void* dest); + +void UnalignStatFS64_32(const void* source, void* dest); +#if 0 +void UnalignOggVorbis(void* dest, void* source); // Arm -> x86 +void AlignOggVorbis(void* dest, void* source); // x86 -> Arm + +void UnalignVorbisDspState(void* dest, void* source); // Arm -> x86 +void AlignVorbisDspState(void* dest, void* source); // x86 -> Arm + +void UnalignVorbisBlock(void* dest, void* source); // Arm -> x86 +void AlignVorbisBlock(void* dest, void* source); // x86 -> Arm + +void UnalignEpollEvent(void* dest, void* source, int nbr); // Arm -> x86 +void AlignEpollEvent(void* dest, void* source, int nbr); // x86 -> Arm + +void UnalignSmpegInfo(void* dest, void* source); // Arm -> x86 +void AlignSmpegInfo(void* dest, void* source); // x86 -> Arm +#endif +// stat64 is packed on i386, not on arm (and possibly other structures) +#undef st_atime +#undef st_atime_nsec +#undef st_mtime +#undef st_mtime_nsec +#undef st_ctime +#undef st_ctime_nsec +struct i386_stat64 { + uint64_t st_dev; + uint8_t __pad0[4]; + uint32_t __st_ino; + uint32_t st_mode; + uint32_t st_nlink; + uint32_t st_uid; + uint32_t st_gid; + uint64_t st_rdev; + uint8_t __pad3[4]; + int64_t st_size; + uint32_t st_blksize; + uint64_t st_blocks; + uint32_t st_atime; + uint32_t st_atime_nsec; + uint32_t st_mtime; + uint32_t st_mtime_nsec; + uint32_t st_ctime; + uint32_t st_ctime_nsec; + uint64_t st_ino; +} __attribute__((packed)); + +struct i386_fsid { + int val[2]; +} __attribute__((packed)); + +struct i386_statfs { + uint32_t f_type; + uint32_t f_bsize; + uint32_t f_blocks; + uint32_t f_bfree; + uint32_t f_bavail; + uint32_t f_files; + uint32_t f_ffree; + struct i386_fsid f_fsid; + uint32_t f_namelen; + uint32_t f_frsize; + uint32_t f_flags; + uint32_t f_spare[4]; +} __attribute__((packed)); + +struct i386_statfs64 { + uint32_t f_type; + uint32_t f_bsize; + uint64_t f_blocks; + uint64_t f_bfree; + uint64_t f_bavail; + uint64_t f_files; + uint64_t f_ffree; + struct i386_fsid f_fsid; + uint32_t f_namelen; + uint32_t f_frsize; + uint32_t f_flags; + uint32_t f_spare[4]; +} __attribute__((packed)); +#if 0 +typedef struct { + unsigned char *data; + int storage; + int fill; + int returned; + + int unsynced; + int headerbytes; + int bodybytes; +} ogg_sync_state; + +typedef struct { + unsigned char *body_data; /* bytes from packet bodies */ + long body_storage; /* storage elements allocated */ + long body_fill; /* elements stored; fill mark */ + long body_returned; /* elements of fill returned */ + + + int *lacing_vals; /* The values that will go to the segment table */ + int64_t *granule_vals; /* granulepos values for headers. Not compact + this way, but it is simple coupled to the + lacing fifo */ + long lacing_storage; + long lacing_fill; + long lacing_packet; + long lacing_returned; + + unsigned char header[282]; /* working space for header encode */ + int header_fill; + + int e_o_s; /* set when we have buffered the last packet in the + logical bitstream */ + int b_o_s; /* set after we've written the initial page + of a logical bitstream */ + long serialno; + long pageno; + int64_t packetno; /* sequence number for decode; the framing + knows where there's a hole in the data, + but we need coupling so that the codec + (which is in a separate abstraction + layer) also knows about the gap */ + int64_t granulepos; + +} ogg_stream_state; + +typedef struct vorbis_dsp_state { + int analysisp; + ptr_t vi; //vorbis_info + + ptr_t pcm; //float** + ptr_t pcmret; // float** + int pcm_storage; + int pcm_current; + int pcm_returned; + + int preextrapolate; + int eofflag; + + long lW; + long W; + long nW; + long centerW; + + int64_t granulepos; + int64_t sequence; + + int64_t glue_bits; + int64_t time_bits; + int64_t floor_bits; + int64_t res_bits; + + void *backend_state; +} vorbis_dsp_state; + +typedef struct { + long endbyte; + int endbit; + + unsigned char *buffer; + unsigned char *ptr; + long storage; +} oggpack_buffer; + +typedef struct vorbis_block { + /* necessary stream state for linking to the framing abstraction */ + float **pcm; /* this is a pointer into local storage */ + oggpack_buffer opb; + + long lW; + long W; + long nW; + int pcmend; + int mode; + + int eofflag; + int64_t granulepos; + int64_t sequence; + vorbis_dsp_state *vd; /* For read-only access of configuration */ + + /* local storage to avoid remallocing; it's up to the mapping to + structure it */ + void *localstore; + long localtop; + long localalloc; + long totaluse; + struct alloc_chain *reap; + + /* bitmetrics for the frame */ + long glue_bits; + long time_bits; + long floor_bits; + long res_bits; + + void *internal; + +} vorbis_block; + +typedef struct { + size_t (*read_func) (void *ptr, size_t size, size_t nmemb, void *datasource); + int (*seek_func) (void *datasource, int64_t offset, int whence); + int (*close_func) (void *datasource); + long (*tell_func) (void *datasource); +} ov_callbacks; + +typedef struct OggVorbis { + void *datasource; /* Pointer to a FILE *, etc. */ + int seekable; + int64_t offset; + int64_t end; + ogg_sync_state oy; + + /* If the FILE handle isn't seekable (eg, a pipe), only the current + stream appears */ + int links; + int64_t *offsets; + int64_t *dataoffsets; + long *serialnos; + int64_t *pcmlengths; /* overloaded to maintain binary + compatibility; x2 size, stores both + beginning and end values */ + void *vi; //vorbis_info + void *vc; //vorbis_comment + + /* Decoding working state local storage */ + int64_t pcm_offset; + int ready_state; + long current_serialno; + int current_link; + + double bittrack; + double samptrack; + + ogg_stream_state os; /* take physical pages, weld into a logical + stream of packets */ + vorbis_dsp_state vd; /* central working state for the packet->PCM decoder */ + vorbis_block vb; /* local working space for packet->PCM decode */ + + ov_callbacks callbacks; + +} OggVorbis; + +typedef struct my_SMPEG_Info_s { + int has_audio; + int has_video; + int width; + int height; + int current_frame; + double current_fps; + char audio_string[80]; + int audio_current_frame; + uint32_t current_offset; + uint32_t total_size; + double current_time; + double total_time; +} my_SMPEG_Info_t; + +typedef struct __attribute__((packed)) x86_ftsent_s { + struct x86_ftsent_s *fts_cycle; + struct x86_ftsent_s *fts_parent; + struct x86_ftsent_s *fts_link; + long fts_number; + void *fts_pointer; + char *fts_accpath; + char *fts_path; + int fts_errno; + int fts_symfd; + uint16_t fts_pathlen; + uint16_t fts_namelen; + uintptr_t fts_ino; + uint64_t fts_dev; + uint32_t fts_nlink; + int16_t fts_level; + uint16_t fts_info; + uint16_t fts_flags; + uint16_t fts_instr; + struct stat *fts_statp; + char fts_name[1]; +} x86_ftsent_t; + +void UnalignFTSENT(void* dest, void* source); // Arm -> x86 +void AlignFTSENT(void* dest, void* source); // x86 -> Arm + +typedef struct my_flock64_s { + uint16_t l_type; + uint16_t l_whence; + int64_t l_start; + int64_t l_len; + int l_pid; +} my_flock64_t; + +typedef struct __attribute__((packed)) x86_flock64_s { + uint16_t l_type; + uint16_t l_whence; + int64_t l_start; + int64_t l_len; + int l_pid; +} x86_flock64_t; + +void UnalignFlock64(void* dest, void* source); // Arm -> x86 +void AlignFlock64(void* dest, void* source); // x86 -> Arm + +// defined in wrapperlibc.c +int of_convert(int); // x86->arm +int of_unconvert(int); // arm->x86 + +typedef struct my_GValue_s +{ + int g_type; + union { + int v_int; + int64_t v_int64; + uint64_t v_uint64; + float v_float; + double v_double; + void* v_pointer; + } data[2]; +} my_GValue_t; + +void alignNGValue(my_GValue_t* v, void* value, int n); +void unalignNGValue(void* value, my_GValue_t* v, int n); +#endif + +int of_convert32(int a); +int of_unconvert32(int a); + +#endif//__MY_ALIGN32__H_ \ No newline at end of file diff --git a/src/include/regs.h b/src/include/regs.h index 3cfcdc5a..5b2a9c41 100644 --- a/src/include/regs.h +++ b/src/include/regs.h @@ -319,6 +319,7 @@ typedef union { #define R_R13 emu->regs[_R13].q[0] #define R_R14 emu->regs[_R14].q[0] #define R_R15 emu->regs[_R15].q[0] +#define R_EIP emu->ip.dword[0] #define R_EAX emu->regs[_AX].dword[0] #define R_EBX emu->regs[_BX].dword[0] #define R_ECX emu->regs[_CX].dword[0] diff --git a/src/include/threads.h b/src/include/threads.h index 8ed0e4c1..a35b02f3 100644 --- a/src/include/threads.h +++ b/src/include/threads.h @@ -4,11 +4,26 @@ typedef struct box64context_s box64context_t; typedef struct x64emu_s x64emu_t; +typedef struct emuthread_s { + uintptr_t fnc; + void* arg; + x64emu_t* emu; + int join; + uintptr_t self; + ulong_t hself; + int cancel_cap, cancel_size; + void** cancels; +} emuthread_t; + void CleanStackSize(box64context_t* context); void init_pthread_helper(void); void fini_pthread_helper(box64context_t* context); void clean_current_emuthread(void); +#ifdef BOX32 +void init_pthread_helper_32(void); +void fini_pthread_helper_32(box64context_t* context); +#endif // prepare an "emuthread structure" in pet and return address of function pointer for a "thread creation routine" void* my_prepare_thread(x64emu_t *emu, void* f, void* arg, int ssize, void** pet); diff --git a/src/include/x64emu.h b/src/include/x64emu.h index 5591823b..8c08a0c2 100644 --- a/src/include/x64emu.h +++ b/src/include/x64emu.h @@ -25,7 +25,8 @@ void SetEDX(x64emu_t *emu, uint32_t v); void SetEDI(x64emu_t *emu, uint32_t v); void SetESI(x64emu_t *emu, uint32_t v); void SetEBP(x64emu_t *emu, uint32_t v); -//void SetESP(x64emu_t *emu, uint32_t v); +void SetESP(x64emu_t *emu, uint32_t v); +void SetEIP(x64emu_t *emu, uint32_t v); void SetRAX(x64emu_t *emu, uint64_t v); void SetRBX(x64emu_t *emu, uint64_t v); void SetRCX(x64emu_t *emu, uint64_t v); @@ -35,8 +36,8 @@ void SetRSI(x64emu_t *emu, uint64_t v); void SetRBP(x64emu_t *emu, uint64_t v); void SetRSP(x64emu_t *emu, uint64_t v); void SetRIP(x64emu_t *emu, uint64_t v); -//void SetFS(x64emu_t *emu, uint16_t v); -//uint16_t GetFS(x64emu_t *emu); +void SetFS(x64emu_t *emu, uint16_t v); +uint16_t GetFS(x64emu_t *emu); uint64_t GetRSP(x64emu_t *emu); uint64_t GetRBP(x64emu_t *emu); void ResetFlags(x64emu_t *emu); diff --git a/src/include/x64run.h b/src/include/x64run.h index a5d4528e..9eed07c0 100644 --- a/src/include/x64run.h +++ b/src/include/x64run.h @@ -10,6 +10,9 @@ void DynaRun(x64emu_t *emu); uint32_t LibSyscall(x64emu_t *emu); void PltResolver64(x64emu_t* emu); +#ifdef BOX32 +void PltResolver32(x64emu_t* emu); +#endif extern uintptr_t pltResolver64; int GetTID(void); -- cgit 1.4.1