From 0810859c21fdc866afb1645b4bb207df386fcb58 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Sun, 19 Feb 2023 11:41:31 +0100 Subject: Fixed a regression with factorio --- src/emu/x87emu_private.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/emu/x87emu_private.c b/src/emu/x87emu_private.c index fc104557..2114c00d 100755 --- a/src/emu/x87emu_private.c +++ b/src/emu/x87emu_private.c @@ -201,7 +201,7 @@ void D2LD(void* d, void* ld) double FromLD(void* ld) { - double ret = 0; + double ret; // cannot add = 0; it break factorio (issue when calling fmodl) LD2D(ld, &ret); return ret; } @@ -210,7 +210,7 @@ double FromLD(void* ld) long double LD2localLD(void* ld) { // local implementation may not be try Quad precision, but double-double precision, so simple way to keep the 80bits precision in the conversion - double ret = 0; + double ret; // cannot add = 0; it break factorio (issue when calling fmodl) LD2D(ld, &ret); return ret; } -- cgit 1.4.1 From 4610f451e99610043461401f4c9acd3de408a02d Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Sun, 19 Feb 2023 14:50:01 +0100 Subject: Improved convertion to/from 80bits double, and added BOX64_X87_NO80BITS to not handle them --- docs/USAGE.md | 10 ++++++++++ src/dynarec/arm64/dynarec_arm64_db.c | 30 +++++++++++++++++++++--------- src/emu/x87emu_private.c | 30 +++++++++++++++++++++++++++++- src/include/debug.h | 1 + src/main.c | 7 +++++++ src/tools/rcfile.c | 1 + 6 files changed, 69 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/docs/USAGE.md b/docs/USAGE.md index bc2bdea3..17fadb61 100755 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -187,6 +187,16 @@ Behavior with FillBlock is not availble (FillBlock build Dynarec blocks and is n * 0 : Dynarec will not wait for FillBlock to ready and use Interpreter instead (might speedup a bit massive multithread or JIT programs) * 1 : Dynarec will wait for FillBlock to be ready (Default) +#### BOX64_SSE_FLUSHTO0 * +Handling of SSE Flush to 0 flags +* 0 : Just track the flag (Default) +* 1 : Direct apply of SSE Flush to 0 flag + +#### BOX64_X87_NO80BITS * +Handling of x87 80bits long double +* 0 : Try to handle 80bits long double as precise as possible (Default) +* 1 : Handle them as double + #### BOX64_LIBGL * * libXXXX set the name for libGL (defaults to libGL.so.1). * /PATH/TO/libGLXXX : Sets the name and path for libGL diff --git a/src/dynarec/arm64/dynarec_arm64_db.c b/src/dynarec/arm64/dynarec_arm64_db.c index b9d71080..e40cc71d 100644 --- a/src/dynarec/arm64/dynarec_arm64_db.c +++ b/src/dynarec/arm64/dynarec_arm64_db.c @@ -295,21 +295,33 @@ uintptr_t dynarec64_DB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin STRx_U12(x5, ed, 0); STRH_U12(x6, ed, 8); } else { - if(ed!=x1) { - MOVx_REG(x1, ed); + if(box64_x87_no80bits) { + v1 = x87_do_push(dyn, ninst, x1, NEON_CACHE_ST_D); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); + VLDR64_U12(v1, ed, fixedaddress); + } else { + if(ed!=x1) { + MOVx_REG(x1, ed); + } + x87_do_push_empty(dyn, ninst, x3); + CALL(arm_fld, -1); } - x87_do_push_empty(dyn, ninst, x3); - CALL(arm_fld, -1); } break; case 7: INST_NAME("FSTP tbyte"); - x87_forget(dyn, ninst, x1, x3, 0); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0); - if(ed!=x1) { - MOVx_REG(x1, ed); + if(box64_x87_no80bits) { + v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0); + VSTR64_U12(v1, wback, fixedaddress); + } else { + x87_forget(dyn, ninst, x1, x3, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0); + if(ed!=x1) { + MOVx_REG(x1, ed); + } + CALL(arm_fstp, -1); } - CALL(arm_fstp, -1); x87_do_pop(dyn, ninst, x3); break; default: diff --git a/src/emu/x87emu_private.c b/src/emu/x87emu_private.c index 2114c00d..9f4b8af6 100755 --- a/src/emu/x87emu_private.c +++ b/src/emu/x87emu_private.c @@ -83,6 +83,10 @@ void fpu_fbld(x64emu_t* emu, uint8_t* s) { // long double (80bits) -> double (64bits) void LD2D(void* ld, void* d) { + if(box64_x87_no80bits) { + *(uint64_t*)d = *(uint64_t*)ld; + return; + } FPU_t result; #pragma pack(push, 1) struct { @@ -122,7 +126,7 @@ void LD2D(void* ld, void* d) *(uint64_t*)d = result.q; return; } - if(((uint32_t)(val.b&0x7fff)==0) || (exp64<=0)) { + if(((uint32_t)(val.b&0x7fff)==0) || (exp64<-1074)) { //if(val.f.q==0) // zero //if(val.f.q!=0) @@ -134,6 +138,18 @@ void LD2D(void* ld, void* d) return; } + if(exp64<=0 && val.f.q) { + // try to see if it can be a denormal + int one = -exp64-1022; + uint64_t r = 0; + if(val.b&0x8000) + r |= 0x8000000000000000L; + r |= val.f.q>>one; + *(uint64_t*)d = r; + return; + + } + if(exp64>=0x7ff) { // to big value... result.d = HUGE_VAL; @@ -154,6 +170,10 @@ void LD2D(void* ld, void* d) // double (64bits) -> long double (80bits) void D2LD(void* d, void* ld) { + if(box64_x87_no80bits) { + *(uint64_t*)ld = *(uint64_t*)d; + return; + } #pragma pack(push, 1) struct { FPU_t f; @@ -190,6 +210,12 @@ void D2LD(void* d, void* ld) if(exp80!=0){ mant80final |= 0x8000000000000000L; exp80final += (BIAS80 - BIAS64); + } else if(mant80final!=0) { + // denormals -> normal + exp80final = BIAS80-1023; + int one = __builtin_clz(mant80final) + 1; + exp80final -= one; + mant80final<<=one; } } val.b = ((int16_t)(sign80)<<15)| (int16_t)(exp80final); @@ -201,6 +227,8 @@ void D2LD(void* d, void* ld) double FromLD(void* ld) { + if(box64_x87_no80bits) + return *(double*)ld; double ret; // cannot add = 0; it break factorio (issue when calling fmodl) LD2D(ld, &ret); return ret; diff --git a/src/include/debug.h b/src/include/debug.h index 7251c728..06ec0d8c 100755 --- a/src/include/debug.h +++ b/src/include/debug.h @@ -47,6 +47,7 @@ extern uint64_t start_cnt; extern int box64_nosandbox; extern int box64_dummy_crashhandler; extern int box64_sse_flushto0; +extern int box64_x87_no80bits; extern int allow_missing_libs; extern int box64_mapclean; extern int box64_prefer_wrapped; diff --git a/src/main.c b/src/main.c index 6df6c8b1..b8ee9f4b 100755 --- a/src/main.c +++ b/src/main.c @@ -92,6 +92,7 @@ int allow_missing_libs = 0; int box64_prefer_emulated = 0; int box64_prefer_wrapped = 0; int box64_sse_flushto0 = 0; +int box64_x87_no80bits = 0; int fix_64bit_inodes = 0; int box64_dummy_crashhandler = 1; int box64_mapclean = 0; @@ -951,6 +952,12 @@ void LoadEnvVars(box64context_t *context) printf_log(LOG_INFO, "BOX64: Direct apply of SSE Flush to 0 flag\n"); } } + if(getenv("BOX64_X87_NO80BITS")) { + if (strcmp(getenv("BOX64_X87_NO80BITS"), "1")==0) { + box64_x87_no80bits = 1; + printf_log(LOG_INFO, "BOX64: all 80bits x87 long double will be handle as double\n"); + } + } if(getenv("BOX64_PREFER_WRAPPED")) { if (strcmp(getenv("BOX64_PREFER_WRAPPED"), "1")==0) { box64_prefer_wrapped = 1; diff --git a/src/tools/rcfile.c b/src/tools/rcfile.c index a1914fab..e3789a20 100644 --- a/src/tools/rcfile.c +++ b/src/tools/rcfile.c @@ -64,6 +64,7 @@ ENTRYBOOL(BOX64_X11THREADS, box64_x11threads) \ ENTRYBOOL(BOX64_X11GLX, box64_x11glx) \ ENTRYDSTRING(BOX64_LIBGL, box64_libGL) \ ENTRYBOOL(BOX64_SSE_FLUSHTO0, box64_sse_flushto0) \ +ENTRYBOOL(BOX64_X87_NO80BITS, box64_x87_no80bits) \ ENTRYSTRING_(BOX64_EMULATED_LIBS, emulated_libs) \ ENTRYBOOL(BOX64_ALLOWMISSINGLIBS, allow_missing_libs) \ ENTRYBOOL(BOX64_PREFER_WRAPPED, box64_prefer_wrapped) \ -- cgit 1.4.1 From 272d4efd4b5e07b911835b5e86d8339ea2a45cb4 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Sun, 19 Feb 2023 15:04:36 +0100 Subject: Hide a warning when running java stuffs --- src/elfs/elfloader.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/elfs/elfloader.c b/src/elfs/elfloader.c index 9f6a4741..2082322d 100755 --- a/src/elfs/elfloader.c +++ b/src/elfs/elfloader.c @@ -493,7 +493,7 @@ int RelocateElfREL(lib_t *maplib, lib_t *local_maplib, int bindnow, elfheader_t* end = globend; } if (!offs) { - if(strcmp(symname, "__gmon_start__") && strcmp(symname, "data_start") && strcmp(symname, "__data_start")) + if(strcmp(symname, "__gmon_start__") && strcmp(symname, "data_start") && strcmp(symname, "__data_start") && strcmp(symname, "collector_func_load")) printf_log(LOG_NONE, "%s: Global Symbol %s (ver=%d/%s) not found, cannot apply R_X86_64_GLOB_DAT @%p (%p) in %s\n", (bind==STB_WEAK)?"Warning":"Error", symname, version, vername?vername:"(none)", p, *(void**)p, head->name); } else { printf_dump(LOG_NEVER, "Apply %s R_X86_64_GLOB_DAT @%p (%p -> %p) on sym=%s (ver=%d/%s)\n", (bind==STB_LOCAL)?"Local":"Global", p, (void*)(p?(*p):0), (void*)offs, symname, version, vername?vername:"(none)"); @@ -702,7 +702,7 @@ int RelocateElfRELA(lib_t *maplib, lib_t *local_maplib, int bindnow, elfheader_t end = globend; } if (!offs) { - if(strcmp(symname, "__gmon_start__") && strcmp(symname, "data_start") && strcmp(symname, "__data_start")) + if(strcmp(symname, "__gmon_start__") && strcmp(symname, "data_start") && strcmp(symname, "__data_start") && strcmp(symname, "collector_func_load")) printf_log((bind==STB_WEAK)?LOG_INFO:LOG_NONE, "%s: Global Symbol %s not found, cannot apply R_X86_64_GLOB_DAT @%p (%p) in %s\n", (bind==STB_WEAK)?"Warning":"Error", symname, p, *(void**)p, head->name); } else { printf_dump(LOG_NEVER, "Apply %s R_X86_64_GLOB_DAT @%p (%p -> %p) on sym=%s (ver=%d/%s)\n", (bind==STB_LOCAL)?"Local":"Global", p, (void*)(p?(*p):0), (void*)offs, symname, version, vername?vername:"(none)"); -- cgit 1.4.1 From 8e9ac1b9ac492cb02f59300b343e41b37de28ad4 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Sun, 19 Feb 2023 15:12:46 +0100 Subject: Added some more wrapped functions to gstreamer and gsttag --- src/wrapped/wrappedgstreamer_private.h | 2 +- src/wrapped/wrappedgsttag_private.h | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/wrapped/wrappedgstreamer_private.h b/src/wrapped/wrappedgstreamer_private.h index 23510223..3e481530 100644 --- a/src/wrapped/wrappedgstreamer_private.h +++ b/src/wrapped/wrappedgstreamer_private.h @@ -1322,7 +1322,7 @@ GO(gst_sample_ref, pFp) //GO(gst_sample_set_caps, //GO(gst_sample_set_info, //GO(gst_sample_set_segment, -//DATAB(_gst_sample_type, +DATAB(_gst_sample_type, sizeof(long)) //GO(gst_sample_unref, GO(gst_scheduling_flags_get_type, pFv) GO(gst_search_mode_get_type, pFv) diff --git a/src/wrapped/wrappedgsttag_private.h b/src/wrapped/wrappedgsttag_private.h index 2aaa7b24..26e284ae 100644 --- a/src/wrapped/wrappedgsttag_private.h +++ b/src/wrapped/wrappedgsttag_private.h @@ -10,11 +10,11 @@ //GO(gst_tag_from_id3_user_tag, //GO(gst_tag_from_vorbis_tag, //GO(gst_tag_get_id3v2_tag_size, -//GO(gst_tag_get_language_code_iso_639_1, -//GO(gst_tag_get_language_code_iso_639_2B, -//GO(gst_tag_get_language_code_iso_639_2T, -//GO(gst_tag_get_language_codes, -//GO(gst_tag_get_language_name, +GO(gst_tag_get_language_code_iso_639_1, pFp) +GO(gst_tag_get_language_code_iso_639_2B, pFp) +GO(gst_tag_get_language_code_iso_639_2T, pFp) +GO(gst_tag_get_language_codes, pFv) +GO(gst_tag_get_language_name, pFp) //GO(gst_tag_get_license_description, //GO(gst_tag_get_license_flags, //GO(gst_tag_get_license_jurisdiction, -- cgit 1.4.1 From d0a724bb08f06f1581621720b114335403bc29d9 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Sun, 19 Feb 2023 17:58:11 +0100 Subject: [DYNAREC] Optimized DB /7 opcode --- src/dynarec/arm64/arm64_emitter.h | 11 ++++++++ src/dynarec/arm64/arm64_printer.c | 6 ++--- src/dynarec/arm64/dynarec_arm64_db.c | 51 ++++++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index 9f573f8f..c3d62074 100755 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -266,6 +266,13 @@ #define STRH_U12(Rt, Rn, imm12) EMIT(ST_gen(0b01, 0b01, ((uint32_t)((imm12)>>1))&0xfff, Rn, Rt)) #define STRxw_U12(Rt, Rn, imm12) EMIT(ST_gen((rex.w)?0b11:0b10, 0b01, ((uint32_t)((imm12)>>(2+rex.w)))&0xfff, Rn, Rt)) +#define STU_gen(size, opc, imm9, Rn, Rt) ((size)<<30 | 0b111<<27 | (opc)<<22 | ((imm9)&0x1ff)<<12 | (Rn)<<5 | (Rt)) +#define STURx_I9(Rt, Rn, imm9) EMIT(STU_gen(0b11, 0b00, imm9, Rn, Rt)) +#define STURw_I9(Rt, Rn, imm9) EMIT(STU_gen(0b10, 0b00, imm9, Rn, Rt)) +#define STURxw_I9(Rt, Rn, imm9) EMIT(STU_gen((rex.w)?0b11:0b10, 0b00, imm9, Rn, Rt)) +#define STURH_I9(Rt, Rn, imm9) EMIT(STU_gen(0b01, 0b00, imm9, Rn, Rt)) +#define STURB_I9(Rt, Rn, imm9) EMIT(STU_gen(0b00, 0b00, imm9, Rn, Rt)) + #define STR_REG_gen(size, Rm, option, S, Rn, Rt) ((size)<<30 | 0b111<<27 | 0b00<<22 | 1<<21 | (Rm)<<16 | (option)<<13 | (S)<<12 | (0b10)<<10 | (Rn)<<5 | (Rt)) #define STRx_REG(Rt, Rn, Rm) EMIT(STR_REG_gen(0b11, Rm, 0b011, 0, Rn, Rt)) #define STRx_REG_LSL3(Rt, Rn, Rm) EMIT(STR_REG_gen(0b11, Rm, 0b011, 1, Rn, Rt)) @@ -649,6 +656,10 @@ #define REV16w(Rd, Rn) EMIT(REV_gen(0, 0b01, Rn, Rd)) #define REV16x(Rd, Rn) EMIT(REV_gen(1, 0b01, Rn, Rd)) +// UDF +#define UDF_gen(imm16) ((imm16)&0xffff) +#define UDF(imm16) EMIT(UDF_gen(imm16)) + // MRS #define MRS_gen(L, o0, op1, CRn, CRm, op2, Rt) (0b1101010100<<22 | (L)<<21 | 1<<20 | (o0)<<19 | (op1)<<16 | (CRn)<<12 | (CRm)<<8 | (op2)<<5 | (Rt)) // mrs x0, nzcv : 1101010100 1 1 1 011 0100 0010 000 00000 o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0010(2) op2=0 diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c index 4bad45d5..e9356773 100755 --- a/src/dynarec/arm64/arm64_printer.c +++ b/src/dynarec/arm64/arm64_printer.c @@ -25,7 +25,7 @@ typedef struct arm64_print_s { uint64_t DecodeBitMasks(int N, int imms, int immr) { - int len = 31-__builtin_clz(N<<6 | ((~imms)&0b111111)); + int len = 31-__builtin_clz((N<<6) | ((~imms)&0b111111)); if(len<1) return 0; int levels = (1< Date: Sun, 19 Feb 2023 23:54:00 +0100 Subject: [DYNAREC] Introduced BOX64_DYNAREC_FASTPAGE to use an alternate way to handle HotPages (faster but crashy) --- docs/USAGE.md | 7 ++++++- src/dynarec/dynablock.c | 15 +++++++++++++-- src/include/debug.h | 1 + src/main.c | 10 ++++++++++ src/tools/rcfile.c | 8 +++++--- 5 files changed, 35 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/docs/USAGE.md b/docs/USAGE.md index 17fadb61..92913438 100755 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -175,7 +175,12 @@ Optimisation of CALL/RET opcodes (not compatible with jit/dynarec/smc) #### BOX64_DYNAREC_HOTPAGE * Handling of HotPage (Page beeing both executed and writen) * 0 : Don't track hotpage -* 1-255 : Trak HotPage, and disable execution of a page beeing writen for N attempts (default is 16) +* 1-255 : Track HotPage, and disable execution of a page beeing writen for N attempts (default is 16) + +#### BOX64_DYNAREC_FASTPAGE * +Will use a faster handling of HotPage (Page beeing both executed and writen) +* 0 : use regular hotpage (Default) +* 1 : Use faster hotpage, taking the risk of running obsolete JIT code (might be faster, but more prone to crash) #### BOX64_DYNAREC_BLEEDING_EDGE * Detect MonoBleedingEdge and apply conservative settings diff --git a/src/dynarec/dynablock.c b/src/dynarec/dynablock.c index 6ff5e53c..9529a91c 100755 --- a/src/dynarec/dynablock.c +++ b/src/dynarec/dynablock.c @@ -205,8 +205,19 @@ dynablock_t* DBGetBlock(x64emu_t* emu, uintptr_t addr, int create) dynablock_t *db = internalDBGetBlock(emu, addr, addr, create, 1); if(db && db->done && db->block && db->need_test) { if(AreaInHotPage((uintptr_t)db->x64_addr, (uintptr_t)db->x64_addr + db->x64_size - 1)) { - dynarec_log(LOG_INFO, "Not running block %p from %p:%p with for %p because it's in a hotpage\n", db, db->x64_addr, db->x64_addr+db->x64_size-1, (void*)addr); - return NULL; + if(box64_dynarec_fastpage) { + uint32_t hash = X31_hash_code(db->x64_addr, db->x64_size); + if(hash==db->hash) // seems ok, run it without reprotecting it + return db; + db->done = 0; // invalidating the block, it's already not good + dynarec_log(LOG_DEBUG, "Invalidating block %p from %p:%p (hash:%X/%X) for %p\n", db, db->x64_addr, db->x64_addr+db->x64_size-1, hash, db->hash, (void*)addr); + // Free db, it's now invalid! + FreeDynablock(db, 1); + return NULL; // not building a new one, it's still a hotpage + } else { + dynarec_log(LOG_INFO, "Not running block %p from %p:%p with for %p because it's in a hotpage\n", db, db->x64_addr, db->x64_addr+db->x64_size-1, (void*)addr); + return NULL; + } } uint32_t hash = X31_hash_code(db->x64_addr, db->x64_size); if(mutex_trylock(&my_context->mutex_dyndump)) { diff --git a/src/include/debug.h b/src/include/debug.h index 06ec0d8c..b9729787 100755 --- a/src/include/debug.h +++ b/src/include/debug.h @@ -23,6 +23,7 @@ extern int box64_dynarec_safeflags; extern int box64_dynarec_callret; extern int box64_dynarec_bleeding_edge; extern int box64_dynarec_hotpage; +extern int box64_dynarec_fastpage; extern int box64_dynarec_wait; #ifdef ARM64 extern int arm64_asimd; diff --git a/src/main.c b/src/main.c index b8ee9f4b..55a18473 100755 --- a/src/main.c +++ b/src/main.c @@ -57,6 +57,7 @@ int box64_dynarec_fastround = 1; int box64_dynarec_safeflags = 1; int box64_dynarec_callret = 0; int box64_dynarec_hotpage = 16; +int box64_dynarec_fastpage = 0; int box64_dynarec_bleeding_edge = 1; int box64_dynarec_wait = 1; uintptr_t box64_nodynarec_start = 0; @@ -563,6 +564,15 @@ void LoadLogEnv() else printf_log(LOG_INFO, "Dynarec will not tag HotPage\n"); } + p = getenv("BOX64_DYNAREC_FASTPAGE"); + if(p) { + if(strlen(p)==1) { + if(p[0]>='0' && p[0]<='1') + box64_dynarec_fastpage = p[0]-'0'; + } + if(box64_dynarec_fastpage) + printf_log(LOG_INFO, "Dynarec will use Fast HotPage\n"); + } p = getenv("BOX64_NODYNAREC"); if(p) { if (strchr(p,'-')) { diff --git a/src/tools/rcfile.c b/src/tools/rcfile.c index e3789a20..60e1a1e5 100644 --- a/src/tools/rcfile.c +++ b/src/tools/rcfile.c @@ -112,7 +112,8 @@ ENTRYINT(BOX64_DYNAREC_SAFEFLAGS, box64_dynarec_safeflags, 0, 2, 2) \ ENTRYBOOL(BOX64_DYNAREC_CALLRET, box64_dynarec_callret) \ ENTRYBOOL(BOX64_DYNAREC_BLEEDING_EDGE, box64_dynarec_bleeding_edge) \ ENTRYINT(BOX64_DYNAREC_HOTPAGE, box64_dynarec_hotpage, 0, 255, 8) \ -ENTRYBOOL(box64_dynarec_wait, box64_dynarec_wait) \ +ENTRYBOOL(BOX64_DYNAREC_FASTPAGE, box64_dynarec_fastpage) \ +ENTRYBOOL(BOX64_DYNAREC_WAIT, box64_dynarec_wait) \ ENTRYSTRING_(BOX64_NODYNAREC, box64_nodynarec) \ #else @@ -129,7 +130,8 @@ IGNORE(BOX64_DYNAREC_SAFEFLAGS) \ IGNORE(BOX64_DYNAREC_CALLRET) \ IGNORE(BOX64_DYNAREC_BLEEDING_EDGE) \ IGNORE(BOX64_DYNAREC_HOTPAGE) \ -IGNORE(BOX64_DYNAREC_wait) \ +IGNORE(BOX64_DYNAREC_FASTPAGE) \ +IGNORE(BOX64_DYNAREC_WAIT) \ IGNORE(BOX64_NODYNAREC) \ #endif @@ -354,7 +356,7 @@ void LoadRCFile(const char* filename) if(0) ; SUPER() else if(len && current_name) { - printf_log(LOG_INFO, "Warning, unsupported %s=%s for [%s] in %s", key, val, current_name, filename); + printf_log(LOG_INFO, "Warning, unsupported %s=%s for [%s] in %s\n", key, val, current_name, filename); } #undef ENTRYBOOL #undef CENTRYBOOL -- cgit 1.4.1 From 692ad43285a4182669254449d343066e2bdfe318 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Sun, 19 Feb 2023 23:55:18 +0100 Subject: [DYNAREC] Optimized FillBlock64 to avoid 3 for loop on all block instructions and a temporary malloc/free --- src/dynarec/arm64/dynarec_arm64_pass2.h | 12 ++++++++---- src/dynarec/arm64/dynarec_arm64_pass3.h | 9 +++++++-- src/dynarec/arm64/dynarec_arm64_private.h | 1 + src/dynarec/dynarec_native.c | 25 ++++--------------------- src/include/dynarec_native.h | 3 +++ 5 files changed, 23 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/dynarec/arm64/dynarec_arm64_pass2.h b/src/dynarec/arm64/dynarec_arm64_pass2.h index 29d5f01a..edc43ae5 100755 --- a/src/dynarec/arm64/dynarec_arm64_pass2.h +++ b/src/dynarec/arm64/dynarec_arm64_pass2.h @@ -1,14 +1,18 @@ #define INIT dyn->native_size = 0 -#define FINI if(ninst) {dyn->insts[ninst].address = (dyn->insts[ninst-1].address+dyn->insts[ninst-1].size);} +#define FINI \ + if(ninst) { \ + dyn->insts[ninst].address = (dyn->insts[ninst-1].address+dyn->insts[ninst-1].size); \ + dyn->insts_size += 1+((dyn->insts[ninst].x64.size>dyn->insts[ninst].size)?dyn->insts[ninst].x64.size:dyn->insts[ninst].size)/15; \ + } #define MESSAGE(A, ...) #define EMIT(A) dyn->insts[ninst].size+=4; dyn->native_size+=4 #define NEW_INST \ if(ninst) { \ dyn->insts[ninst].address = (dyn->insts[ninst-1].address+dyn->insts[ninst-1].size); \ - if(ninst && isInstClean(dyn, ninst)) { \ - if(dyn->last_ip!=ip) dyn->last_ip = 0; \ - } \ + if(isInstClean(dyn, ninst) && dyn->last_ip!=ip) \ + dyn->last_ip = 0; \ + dyn->insts_size += 1+((dyn->insts[ninst-1].x64.size>dyn->insts[ninst-1].size)?dyn->insts[ninst-1].x64.size:dyn->insts[ninst-1].size)/15; \ } #define INST_EPILOG dyn->insts[ninst].epilog = dyn->native_size; #define INST_NAME(name) diff --git a/src/dynarec/arm64/dynarec_arm64_pass3.h b/src/dynarec/arm64/dynarec_arm64_pass3.h index 13b2b323..ae79c11f 100755 --- a/src/dynarec/arm64/dynarec_arm64_pass3.h +++ b/src/dynarec/arm64/dynarec_arm64_pass3.h @@ -1,5 +1,8 @@ #define INIT -#define FINI +#define FINI \ + if(ninst) \ + addInst(dyn->instsize, &dyn->insts_size, dyn->insts[ninst].x64.size, dyn->insts[ninst].size/4); \ + addInst(dyn->instsize, &dyn->insts_size, 0, 0); #define EMIT(A) \ if(box64_dynarec_dump) {dynarec_log(LOG_NONE, "\t%08x\t%s\n", (uint32_t)(A), arm64_print(A, (uintptr_t)dyn->block));} \ *(uint32_t*)(dyn->block) = (uint32_t)(A); \ @@ -10,7 +13,9 @@ #define NEW_INST \ if(ninst && isInstClean(dyn, ninst)) { \ if(dyn->last_ip!=ip) dyn->last_ip = 0; \ - } + } \ + if(ninst) \ + addInst(dyn->instsize, &dyn->insts_size, dyn->insts[ninst-1].x64.size, dyn->insts[ninst-1].size/4); #define INST_EPILOG #define INST_NAME(name) \ if(box64_dynarec_dump) {\ diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h index 6abdf829..1b194e97 100755 --- a/src/dynarec/arm64/dynarec_arm64_private.h +++ b/src/dynarec/arm64/dynarec_arm64_private.h @@ -103,6 +103,7 @@ typedef struct dynarec_arm_s { int* predecessor;// single array of all predecessor dynablock_t* dynablock; instsize_t* instsize; + size_t insts_size; // size of the instruction size array (calculated) uint8_t smread; // for strongmem model emulation uint8_t smwrite; // for strongmem model emulation } dynarec_arm_t; diff --git a/src/dynarec/dynarec_native.c b/src/dynarec/dynarec_native.c index f8d29b61..c3f01ff2 100755 --- a/src/dynarec/dynarec_native.c +++ b/src/dynarec/dynarec_native.c @@ -240,7 +240,7 @@ int is_instructions(dynarec_native_t *dyn, uintptr_t addr, int n) return (i==n)?1:0; } -instsize_t* addInst(instsize_t* insts, size_t* size, size_t* cap, int x64_size, int native_size) +void addInst(instsize_t* insts, size_t* size, int x64_size, int native_size) { // x64 instruction is <16 bytes int toadd; @@ -248,10 +248,6 @@ instsize_t* addInst(instsize_t* insts, size_t* size, size_t* cap, int x64_size, toadd = 1 + x64_size/15; else toadd = 1 + native_size/15; - if((*size)+toadd>(*cap)) { - *cap = (*size)+toadd; - insts = (instsize_t*)customRealloc(insts, (*cap)*sizeof(instsize_t)); - } while(toadd) { if(x64_size>15) insts[*size].x64 = 15; @@ -266,7 +262,6 @@ instsize_t* addInst(instsize_t* insts, size_t* size, size_t* cap, int x64_size, ++(*size); --toadd; } - return insts; } // add a value to table64 (if needed) and gives back the imm19 to use in LDR_literal @@ -391,7 +386,6 @@ void CancelBlock64(int need_lock) } customFree(helper->next); customFree(helper->insts); - customFree(helper->instsize); customFree(helper->predecessor); customFree(helper->table64); if(helper->dynablock && helper->dynablock->actual_block) @@ -522,18 +516,7 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr) { // pass 2, instruction size native_pass2(&helper, addr); // keep size of instructions for signal handling - size_t insts_rsize = 0; - { - size_t insts_size = 0; - size_t cap = 1; - for(int i=0; ihelper.insts[i].size)?helper.insts[i].x64.size:helper.insts[i].size)/15; - helper.instsize = (instsize_t*)customCalloc(cap, sizeof(instsize_t)); - for(int i=0; iinstsize = instsize; // ok, free the helper now customFree(helper.insts); helper.insts = NULL; customFree(helper.table64); helper.table64 = NULL; - customFree(helper.instsize); helper.instsize = NULL; customFree(helper.predecessor); helper.predecessor = NULL; diff --git a/src/include/dynarec_native.h b/src/include/dynarec_native.h index 9fe26323..eff5a6bf 100755 --- a/src/include/dynarec_native.h +++ b/src/include/dynarec_native.h @@ -3,6 +3,9 @@ typedef struct dynablock_s dynablock_t; typedef struct x64emu_s x64emu_t; +typedef struct instsize_s instsize_t; + +void addInst(instsize_t* insts, size_t* size, int x64_size, int native_size); void CancelBlock64(int need_lock); void* FillBlock64(dynablock_t* block, uintptr_t addr); -- cgit 1.4.1 From 9ef48de1a860a3fc2ce33e7aaaa942212b279613 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Mon, 20 Feb 2023 16:38:27 +0100 Subject: [DYNAREC] Fixed LZCNT and TZCNT opcode, for good --- src/dynarec/arm64/dynarec_arm64_f30f.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/dynarec/arm64/dynarec_arm64_f30f.c b/src/dynarec/arm64/dynarec_arm64_f30f.c index 1f60b843..2d0b8c6b 100755 --- a/src/dynarec/arm64/dynarec_arm64_f30f.c +++ b/src/dynarec/arm64/dynarec_arm64_f30f.c @@ -367,13 +367,13 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n GETED(0); GETGD; TSTxw_REG(ed, ed); - CSETw(x1, cEQ); - BFIw(xFlags, x1, F_CF, 1); // CF = is source 0? - RBITxw(x1, ed); // reverse - CLZxw(gd, x1); // x2 gets leading 0 == TZCNT + CSETw(x3, cEQ); + BFIw(xFlags, x3, F_CF, 1); // CF = is source 0? + RBITxw(x3, ed); // reverse + CLZxw(gd, x3); // x2 gets leading 0 == TZCNT TSTxw_REG(gd, gd); - CSETw(x1, cEQ); - BFIw(xFlags, x1, F_ZF, 1); // ZF = is dest 0? + CSETw(x3, cEQ); + BFIw(xFlags, x3, F_ZF, 1); // ZF = is dest 0? break; case 0xBD: INST_NAME("LZCNT Gd, Ed"); @@ -383,12 +383,12 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n GETED(0); GETGD; TSTxw_REG(ed, ed); - CSETw(x1, cEQ); - BFIw(xFlags, x1, F_CF, 1); // CF = is source 0? + CSETw(x3, cEQ); + BFIw(xFlags, x3, F_CF, 1); // CF = is source 0? CLZxw(gd, ed); // x2 gets leading 0 == LZCNT TSTxw_REG(gd, gd); - CSETw(x1, cEQ); - BFIw(xFlags, x1, F_ZF, 1); // ZF = is dest 0? + CSETw(x3, cEQ); + BFIw(xFlags, x3, F_ZF, 1); // ZF = is dest 0? break; case 0xC2: -- cgit 1.4.1 From c6dd73065db825d7456c00cc3f1127aff195ebe7 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Mon, 20 Feb 2023 19:48:47 +0100 Subject: [DYNAREC] This small utility function can be static --- src/dynarec/dynablock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/dynarec/dynablock.c b/src/dynarec/dynablock.c index 9529a91c..59530aed 100755 --- a/src/dynarec/dynablock.c +++ b/src/dynarec/dynablock.c @@ -68,7 +68,7 @@ void MarkDynablock(dynablock_t* db) } } -int IntervalIntersects(uintptr_t start1, uintptr_t end1, uintptr_t start2, uintptr_t end2) +static int IntervalIntersects(uintptr_t start1, uintptr_t end1, uintptr_t start2, uintptr_t end2) { if(start1 > end2 || start2 > end1) return 0; -- cgit 1.4.1 From ed4e33d8886a7e8c89318fa41535e9158a3c9893 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Mon, 20 Feb 2023 19:49:55 +0100 Subject: [DYNAREC] Directly run program Segfault handler for Access Error with PROT_DYNAREC_R, after a quick unprotectDB first --- src/libtools/signals.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/libtools/signals.c b/src/libtools/signals.c index 40e9a8a7..b2da2723 100755 --- a/src/libtools/signals.c +++ b/src/libtools/signals.c @@ -855,7 +855,7 @@ void my_box64signalhandler(int32_t sig, siginfo_t* info, void * ucntx) } dynablock_t* db = NULL; int db_searched = 0; - if ((sig==SIGSEGV) && (addr) && (info->si_code == SEGV_ACCERR) && (prot&PROT_CUSTOM)) { + if ((sig==SIGSEGV) && (addr) && (info->si_code == SEGV_ACCERR) && (prot&PROT_DYNAREC)) { mutex_lock(&mutex_dynarec_prot); // check if SMC inside block db = FindDynablockFromNativeAddress(pc); @@ -966,6 +966,9 @@ dynarec_log(/*LOG_DEBUG*/LOG_INFO, "Repeated SIGSEGV with Access error on %p for glitch2_prot = 0; } mutex_unlock(&mutex_dynarec_prot); + } else if ((sig==SIGSEGV) && (addr) && (info->si_code == SEGV_ACCERR) && (prot&PROT_DYNAREC_R)) { + // unprotect and continue to signal handler, because Write is not there on purpose + unprotectDB((uintptr_t)addr, 1, 1); // unprotect 1 byte... But then, the whole page will be unprotected } if(!db_searched) db = FindDynablockFromNativeAddress(pc); -- cgit 1.4.1 From fb5ac9c4cddf2bf03ee62dc4addac719c0cf02ab Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Mon, 20 Feb 2023 19:50:26 +0100 Subject: Fixed a improved HotPage handling --- docs/USAGE.md | 2 +- src/custommem.c | 58 +++++++++++++++++++++++++++------------------------------ src/main.c | 2 +- 3 files changed, 29 insertions(+), 33 deletions(-) (limited to 'src') diff --git a/docs/USAGE.md b/docs/USAGE.md index 92913438..422dc176 100755 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -175,7 +175,7 @@ Optimisation of CALL/RET opcodes (not compatible with jit/dynarec/smc) #### BOX64_DYNAREC_HOTPAGE * Handling of HotPage (Page beeing both executed and writen) * 0 : Don't track hotpage -* 1-255 : Track HotPage, and disable execution of a page beeing writen for N attempts (default is 16) +* 1-255 : Track HotPage, and disable execution of a page beeing writen for N attempts (default is 4) #### BOX64_DYNAREC_FASTPAGE * Will use a faster handling of HotPage (Page beeing both executed and writen) diff --git a/src/custommem.c b/src/custommem.c index 00e24246..6638af31 100644 --- a/src/custommem.c +++ b/src/custommem.c @@ -557,25 +557,29 @@ void FreeDynarecMap(uintptr_t addr) } } -uintptr_t getSizeJmpDefault(uintptr_t addr, size_t maxsize) +static uintptr_t getDBSize(uintptr_t addr, size_t maxsize, dynablock_t** db) { - uintptr_t idx3, idx2, idx1, idx0; - idx3 = (((uintptr_t)addr)>>48)&0xffff; + const uintptr_t idx3 = (addr>>48)&0xffff; + const uintptr_t idx2 = (addr>>32)&0xffff; + const uintptr_t idx1 = (addr>>16)&0xffff; + uintptr_t idx0 = addr&0xffff; + *db = *(dynablock_t**)(box64_jmptbl3[idx3][idx2][idx1][idx0]- sizeof(void*)); + if(*db) + return 1; if(box64_jmptbl3[idx3] == box64_jmptbldefault2) - return ((addr&~((1LL<<48)-1))|0xffffffffffffLL)-addr + 1; - idx2 = (((uintptr_t)addr)>>32)&0xffff; + return (addr|0xffffffffffffLL)+1-addr; if(box64_jmptbl3[idx3][idx2] == box64_jmptbldefault1) - return ((addr&~((1LL<<32)-1))|0xffffffffLL)-addr + 1; - idx1 = (((uintptr_t)addr)>>16)&0xffff; + return (addr|0xffffffffLL)+1-addr; uintptr_t* block = box64_jmptbl3[idx3][idx2][idx1]; if(block == box64_jmptbldefault0) - return ((addr&~((1LL<<16)-1))|0xffffLL)-addr + 1; - idx0 = addr&0xffff; + return (addr|0xffffLL)+1-addr; if (maxsize>0x10000) maxsize = 0x10000; while(idx0max_db_size)?0:(addr-my_context->max_db_size)):addr; dynarec_log(LOG_DEBUG, "cleanDBFromAddressRange %p/%p -> %p %s\n", (void*)addr, (void*)start_addr, (void*)(addr+size-1), destroy?"destroy":"mark"); - for (uintptr_t i=start_addr; i>16); i<=(end>>16); ++i) if(memprot[i].prot==memprot_default) { uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t)); - /*if (native_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) { - box_free(newblock); - }*/ memprot[i].prot = newblock; } for (uintptr_t i=idx; i<=end; ++i) { @@ -817,14 +816,11 @@ void unprotectDB(uintptr_t addr, size_t size, int mark) if(end>16); i<=(end>>16); ++i) + /*for (uintptr_t i=(idx>>16); i<=(end>>16); ++i) if(memprot[i].prot==memprot_default) { uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t)); - /*if (native_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) { - box_free(newblock); - }*/ memprot[i].prot = newblock; - } + }*/ for (uintptr_t i=idx; i<=end; ++i) { uint32_t prot = memprot[i>>16].prot[i&0xffff]; if(prot&PROT_DYNAREC) { @@ -1028,7 +1024,7 @@ void allocProtection(uintptr_t addr, size_t size, uint32_t prot) end = (1LL<<(48-MEMPROT_SHIFT))-1; mutex_lock(&mutex_prot); addMapMem(addr, addr+size-1); - for (uintptr_t i=(idx>>16); i<=(end>>16); ++i) + /*for (uintptr_t i=(idx>>16); i<=(end>>16); ++i) if(memprot[i].prot==memprot_default) { uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t)); memprot[i].prot = newblock; @@ -1042,23 +1038,23 @@ void allocProtection(uintptr_t addr, size_t size, uint32_t prot) block[ii] = prot; } i+=finish-start; // +1 from the "for" loop - } + }*/ mutex_unlock(&mutex_prot); } #ifdef DYNAREC int IsInHotPage(uintptr_t addr) { - if(addr<=(1LL<<48)) + if(addr>=(1LL<<48)) return 0; int idx = (addr>>MEMPROT_SHIFT)>>16; - uint8_t *block = memprot[idx].hot; - if(!block) + uint8_t *hot = memprot[idx].hot; + if(!hot) return 0; int base = (addr>>MEMPROT_SHIFT)&0xffff; - if(!block[base]) + if(!hot[base]) return 0; // decrement hot - native_lock_decifnot0b(&block[base]); + native_lock_decifnot0b(&hot[base]); return 1; } diff --git a/src/main.c b/src/main.c index 55a18473..6b5e6dc7 100755 --- a/src/main.c +++ b/src/main.c @@ -56,7 +56,7 @@ int box64_dynarec_fastnan = 1; int box64_dynarec_fastround = 1; int box64_dynarec_safeflags = 1; int box64_dynarec_callret = 0; -int box64_dynarec_hotpage = 16; +int box64_dynarec_hotpage = 4; int box64_dynarec_fastpage = 0; int box64_dynarec_bleeding_edge = 1; int box64_dynarec_wait = 1; -- cgit 1.4.1 From 3fe2843bb3b95d8edd63d21c4f6acad7f652be91 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Mon, 20 Feb 2023 21:18:58 +0100 Subject: Added some more libFAudio wrapped functions --- src/wrapped/wrappedfaudio_private.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/wrapped/wrappedfaudio_private.h b/src/wrapped/wrappedfaudio_private.h index 3c145ea7..bec2d849 100644 --- a/src/wrapped/wrappedfaudio_private.h +++ b/src/wrapped/wrappedfaudio_private.h @@ -106,13 +106,13 @@ GO(FACTWave_SetMatrixCoefficients, uFpuup) GO(FACTWave_SetPitch, uFpw) GO(FACTWave_SetVolume, uFpf) GO(FACTWave_Stop, uFpu) -//GO(FAPOBase_AddRef, +GO(FAPOBase_AddRef, iFp) //GO(FAPOBase_BeginProcess, //GO(FAPOBase_CalcInputFrames, //GO(FAPOBase_CalcOutputFrames, //GO(FAPOBase_EndProcess, //GO(FAPOBase_GetParameters, -//GO(FAPOBase_GetRegistrationProperties, +GO(FAPOBase_GetRegistrationProperties, uFpp) //GO(FAPOBase_Initialize, //GO(FAPOBase_IsInputFormatSupported, //GO(FAPOBase_IsOutputFormatSupported, @@ -120,7 +120,7 @@ GO(FACTWave_Stop, uFpu) //GO(FAPOBase_OnSetParameters, //GO(FAPOBase_ParametersChanged, //GO(FAPOBase_ProcessThru, -//GO(FAPOBase_Release, +GO(FAPOBase_Release, iFp) //GO(FAPOBase_Reset, //GO(FAPOBase_SetParameters, //GO(FAPOBase_UnlockForProcess, -- cgit 1.4.1