diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-09-13 01:03:00 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-09-12 19:03:00 +0200 |
| commit | 9544fbdff9f02f47a64c7d0b31d7b9c9ecd3c703 (patch) | |
| tree | bb38884877fd5046a34b2b90fbb60d9dd774d5f6 /src | |
| parent | 431365d1f8b9b5c022a48b554ccb647eaac674a1 (diff) | |
| download | box64-9544fbdff9f02f47a64c7d0b31d7b9c9ecd3c703.tar.gz box64-9544fbdff9f02f47a64c7d0b31d7b9c9ecd3c703.zip | |
[RV64_DYNAREC] Refactored vector SEW tracking (#1820)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_private.h | 2 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_private.h | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_functions.c | 12 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 10 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_pass0.h | 33 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_pass1.h | 22 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_pass2.h | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_pass3.h | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_private.h | 9 |
9 files changed, 46 insertions, 48 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h index c1015abd..840fb47f 100644 --- a/src/dynarec/arm64/dynarec_arm64_private.h +++ b/src/dynarec/arm64/dynarec_arm64_private.h @@ -89,7 +89,7 @@ typedef struct instruction_arm64_s { uint16_t ymm0_in; // bitmap of ymm to zero at purge uint16_t ymm0_add; // the ymm0 added by the opcode uint16_t ymm0_sub; // the ymm0 removed by the opcode - uint16_t ymm0_out; // the ymmm0 at th end of the opcode + uint16_t ymm0_out; // the ymm0 at th end of the opcode uint16_t ymm0_pass2, ymm0_pass3; uint8_t barrier_maybe; uint8_t will_write; diff --git a/src/dynarec/la64/dynarec_la64_private.h b/src/dynarec/la64/dynarec_la64_private.h index 56b2e715..27a93a66 100644 --- a/src/dynarec/la64/dynarec_la64_private.h +++ b/src/dynarec/la64/dynarec_la64_private.h @@ -85,7 +85,7 @@ typedef struct instruction_la64_s { uint16_t ymm0_in; // bitmap of ymm to zero at purge uint16_t ymm0_add; // the ymm0 added by the opcode uint16_t ymm0_sub; // the ymm0 removed by the opcode - uint16_t ymm0_out; // the ymmm0 at th end of the opcode + uint16_t ymm0_out; // the ymm0 at th end of the opcode uint16_t ymm0_pass2, ymm0_pass3; uint8_t barrier_maybe; uint8_t will_write; diff --git a/src/dynarec/rv64/dynarec_rv64_functions.c b/src/dynarec/rv64/dynarec_rv64_functions.c index 37b81c09..3f0a04ad 100644 --- a/src/dynarec/rv64/dynarec_rv64_functions.c +++ b/src/dynarec/rv64/dynarec_rv64_functions.c @@ -379,11 +379,11 @@ int sewNeedsTransform(dynarec_rv64_t* dyn, int ninst) { int i2 = dyn->insts[ninst].x64.jmp_insts; - if (dyn->insts[i2].vector_sew == VECTOR_SEWNA) + if (dyn->insts[i2].vector_sew_entry == VECTOR_SEWNA) return 0; - else if (dyn->insts[i2].vector_sew == VECTOR_SEWANY && dyn->insts[ninst].vector_sew != VECTOR_SEWNA) + else if (dyn->insts[i2].vector_sew_entry == VECTOR_SEWANY && dyn->insts[ninst].vector_sew_exit != VECTOR_SEWNA) return 0; - else if (dyn->insts[i2].vector_sew == dyn->insts[ninst].vector_sew) + else if (dyn->insts[i2].vector_sew_entry == dyn->insts[ninst].vector_sew_exit) return 0; return 1; @@ -616,7 +616,7 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r }; if(box64_dynarec_dump) { printf_x64_instruction(rex.is32bits?my_context->dec32:my_context->dec, &dyn->insts[ninst].x64, name); - dynarec_log(LOG_NONE, "%s%p: %d emitted opcodes, inst=%d, barrier=%d state=%d/%d(%d), %s=%X/%X, use=%X, need=%X/%X, sm=%d/%d, sew=%d", + dynarec_log(LOG_NONE, "%s%p: %d emitted opcodes, inst=%d, barrier=%d state=%d/%d(%d), %s=%X/%X, use=%X, need=%X/%X, sm=%d/%d, sew@entry=%d, sew@exit=%d", (box64_dynarec_dump > 1) ? "\e[32m" : "", (void*)(dyn->native_start + dyn->insts[ninst].address), dyn->insts[ninst].size / 4, @@ -631,7 +631,7 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r dyn->insts[ninst].x64.use_flags, dyn->insts[ninst].x64.need_before, dyn->insts[ninst].x64.need_after, - dyn->smread, dyn->smwrite, dyn->insts[ninst].vector_sew); + dyn->smread, dyn->smwrite, dyn->insts[ninst].vector_sew_entry, dyn->insts[ninst].vector_sew_exit); if(dyn->insts[ninst].pred_sz) { dynarec_log(LOG_NONE, ", pred="); for(int ii=0; ii<dyn->insts[ninst].pred_sz; ++ii) @@ -722,6 +722,7 @@ void fpu_reset(dynarec_rv64_t* dyn) mmx_reset(&dyn->e); sse_reset(&dyn->e); fpu_reset_reg(dyn); + dyn->vector_sew = VECTOR_SEWNA; } void fpu_reset_ninst(dynarec_rv64_t* dyn, int ninst) @@ -730,6 +731,7 @@ void fpu_reset_ninst(dynarec_rv64_t* dyn, int ninst) mmx_reset(&dyn->insts[ninst].e); sse_reset(&dyn->insts[ninst].e); fpu_reset_reg_extcache(&dyn->insts[ninst].e); + dyn->vector_sew = VECTOR_SEWNA; } int fpu_is_st_freed(dynarec_rv64_t* dyn, int ninst, int st) diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index a8ef8f21..489646d5 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -2432,9 +2432,9 @@ static void sewTransform(dynarec_rv64_t* dyn, int ninst, int s1) int j64; int jmp = dyn->insts[ninst].x64.jmp_insts; if (jmp < 0) return; - if (dyn->insts[jmp].vector_sew == VECTOR_SEWNA) return; - MESSAGE(LOG_DUMP, "\tSEW changed to %d ---- ninst=%d -> %d\n", dyn->insts[jmp].vector_sew, ninst, jmp); - vector_vsetvl_emul1(dyn, ninst, s1, dyn->insts[jmp].vector_sew, 1); + if (dyn->insts[jmp].vector_sew_entry == VECTOR_SEWNA) return; + MESSAGE(LOG_DUMP, "\tSEW changed to %d ---- ninst=%d -> %d\n", dyn->insts[jmp].vector_sew_entry, ninst, jmp); + vector_vsetvl_emul1(dyn, ninst, s1, dyn->insts[jmp].vector_sew_entry, 1); } void CacheTransform(dynarec_rv64_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3) @@ -2528,10 +2528,10 @@ void fpu_reset_cache(dynarec_rv64_t* dyn, int ninst, int reset_n) #if STEP > 1 // for STEP 2 & 3, just need to refresh with current, and undo the changes (push & swap) dyn->e = dyn->insts[ninst].e; - dyn->vector_sew = dyn->insts[ninst].vector_sew; + dyn->vector_sew = dyn->insts[ninst].vector_sew_exit; #else dyn->e = dyn->insts[reset_n].e; - dyn->vector_sew = dyn->insts[reset_n].vector_sew; + dyn->vector_sew = dyn->insts[reset_n].vector_sew_exit; #endif extcacheUnwind(&dyn->e); #if STEP == 0 diff --git a/src/dynarec/rv64/dynarec_rv64_pass0.h b/src/dynarec/rv64/dynarec_rv64_pass0.h index 9a84ab5b..d0d81a6f 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass0.h +++ b/src/dynarec/rv64/dynarec_rv64_pass0.h @@ -19,25 +19,24 @@ #define JUMP(A, C) add_jump(dyn, ninst); add_next(dyn, (uintptr_t)A); SMEND(); dyn->insts[ninst].x64.jmp = A; dyn->insts[ninst].x64.jmp_cond = C; dyn->insts[ninst].x64.jmp_insts = 0 #define BARRIER(A) if(A!=BARRIER_MAYBE) {fpu_purgecache(dyn, ninst, 0, x1, x2, x3); dyn->insts[ninst].x64.barrier = A;} else dyn->insts[ninst].barrier_maybe = 1 #define SET_HASCALLRET() dyn->insts[ninst].x64.has_callret = 1 -#define NEW_INST \ - ++dyn->size; \ - memset(&dyn->insts[ninst], 0, sizeof(instruction_native_t)); \ - dyn->insts[ninst].x64.addr = ip; \ - dyn->e.combined1 = dyn->e.combined2 = 0; \ - dyn->e.swapped = 0; \ - dyn->e.barrier = 0; \ - for (int i = 0; i < 16; ++i) \ - dyn->e.olds[i].v = 0; \ - dyn->insts[ninst].f_entry = dyn->f; \ - if (reset_n == -1) \ - dyn->vector_sew = ninst ? dyn->insts[ninst - 1].vector_sew : VECTOR_SEWNA; \ - if (ninst) \ +#define NEW_INST \ + ++dyn->size; \ + memset(&dyn->insts[ninst], 0, sizeof(instruction_native_t)); \ + dyn->insts[ninst].x64.addr = ip; \ + dyn->e.combined1 = dyn->e.combined2 = 0; \ + dyn->e.swapped = 0; \ + dyn->e.barrier = 0; \ + for (int i = 0; i < 16; ++i) \ + dyn->e.olds[i].v = 0; \ + dyn->insts[ninst].f_entry = dyn->f; \ + dyn->insts[ninst].vector_sew_entry = dyn->vector_sew; \ + if (ninst) \ dyn->insts[ninst - 1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst - 1].x64.addr; -#define INST_EPILOG \ - dyn->insts[ninst].f_exit = dyn->f; \ - dyn->insts[ninst].e = dyn->e; \ - dyn->insts[ninst].vector_sew = dyn->vector_sew; \ +#define INST_EPILOG \ + dyn->insts[ninst].f_exit = dyn->f; \ + dyn->insts[ninst].e = dyn->e; \ + dyn->insts[ninst].vector_sew_exit = dyn->vector_sew; \ dyn->insts[ninst].x64.has_next = (ok > 0) ? 1 : 0; #define INST_NAME(name) #define DEFAULT \ diff --git a/src/dynarec/rv64/dynarec_rv64_pass1.h b/src/dynarec/rv64/dynarec_rv64_pass1.h index c7813ba0..5360f767 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass1.h +++ b/src/dynarec/rv64/dynarec_rv64_pass1.h @@ -2,18 +2,18 @@ #define FINI #define MESSAGE(A, ...) do {} while (0) #define EMIT(A) do {} while (0) -#define NEW_INST \ - dyn->insts[ninst].f_entry = dyn->f; \ - dyn->e.combined1 = dyn->e.combined2 = 0; \ - for (int i = 0; i < 16; ++i) \ - dyn->e.olds[i].v = 0; \ - if (reset_n != -1) \ - dyn->vector_sew = ninst ? dyn->insts[ninst - 1].vector_sew : VECTOR_SEWNA; \ - dyn->e.swapped = 0; \ +#define NEW_INST \ + dyn->insts[ninst].f_entry = dyn->f; \ + dyn->e.combined1 = dyn->e.combined2 = 0; \ + for (int i = 0; i < 16; ++i) \ + dyn->e.olds[i].v = 0; \ + dyn->insts[ninst].vector_sew_entry = dyn->vector_sew; \ + dyn->e.swapped = 0; \ dyn->e.barrier = 0 -#define INST_EPILOG \ - dyn->insts[ninst].e = dyn->e; \ - dyn->insts[ninst].f_exit = dyn->f +#define INST_EPILOG \ + dyn->insts[ninst].e = dyn->e; \ + dyn->insts[ninst].f_exit = dyn->f; \ + dyn->insts[ninst].vector_sew_exit = dyn->vector_sew; #define INST_NAME(name) diff --git a/src/dynarec/rv64/dynarec_rv64_pass2.h b/src/dynarec/rv64/dynarec_rv64_pass2.h index 37a71b9a..909522e3 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass2.h +++ b/src/dynarec/rv64/dynarec_rv64_pass2.h @@ -8,8 +8,6 @@ #define MESSAGE(A, ...) do {} while (0) #define EMIT(A) do {dyn->insts[ninst].size+=4; dyn->native_size+=4;}while(0) #define NEW_INST \ - if (reset_n != -1) \ - dyn->vector_sew = ninst ? dyn->insts[ninst - 1].vector_sew : VECTOR_SEWNA; \ if (ninst) { \ dyn->insts[ninst].address = (dyn->insts[ninst - 1].address + dyn->insts[ninst - 1].size); \ dyn->insts_size += 1 + ((dyn->insts[ninst - 1].x64.size > (dyn->insts[ninst - 1].size / 4)) ? dyn->insts[ninst - 1].x64.size : (dyn->insts[ninst - 1].size / 4)) / 15; \ diff --git a/src/dynarec/rv64/dynarec_rv64_pass3.h b/src/dynarec/rv64/dynarec_rv64_pass3.h index 5dc088a2..1254dc4a 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass3.h +++ b/src/dynarec/rv64/dynarec_rv64_pass3.h @@ -13,8 +13,6 @@ #define MESSAGE(A, ...) if(box64_dynarec_dump) dynarec_log(LOG_NONE, __VA_ARGS__) #define NEW_INST \ - if (reset_n != -1) \ - dyn->vector_sew = ninst ? dyn->insts[ninst - 1].vector_sew : VECTOR_SEWNA; \ if (box64_dynarec_dump) print_newinst(dyn, ninst); \ if (ninst) { \ addInst(dyn->instsize, &dyn->insts_size, dyn->insts[ninst - 1].x64.size, dyn->insts[ninst - 1].size / 4); \ diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h index 771e02d8..f3fde565 100644 --- a/src/dynarec/rv64/dynarec_rv64_private.h +++ b/src/dynarec/rv64/dynarec_rv64_private.h @@ -108,13 +108,14 @@ typedef struct instruction_rv64_s { uint16_t ymm0_in; // bitmap of ymm to zero at purge uint16_t ymm0_add; // the ymm0 added by the opcode uint16_t ymm0_sub; // the ymm0 removed by the opcode - uint16_t ymm0_out; // the ymmm0 at th end of the opcode + uint16_t ymm0_out; // the ymm0 at th end of the opcode uint16_t ymm0_pass2, ymm0_pass3; int barrier_maybe; flagcache_t f_exit; // flags status at end of instruction extcache_t e; // extcache at end of instruction (but before poping) flagcache_t f_entry; // flags status before the instruction begin - uint8_t vector_sew; + uint8_t vector_sew_entry; // sew status before the instruction begin + uint8_t vector_sew_exit; // sew status at the end of instruction } instruction_rv64_t; typedef struct dynarec_rv64_s { @@ -153,8 +154,8 @@ typedef struct dynarec_rv64_s { uint16_t ymm_zero; // bitmap of ymm to zero at purge uint8_t always_test; uint8_t abort; - uint8_t vector_sew; - uint8_t vector_eew; // effective element width + uint8_t vector_sew; // current sew status + uint8_t vector_eew; // current effective sew status, should only be used after SET_ELEMENT_WIDTH } dynarec_rv64_t; // v0 is hardware wired to vector mask register, which should be always reserved |