about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2024-09-13 01:03:00 +0800
committerGitHub <noreply@github.com>2024-09-12 19:03:00 +0200
commit9544fbdff9f02f47a64c7d0b31d7b9c9ecd3c703 (patch)
treebb38884877fd5046a34b2b90fbb60d9dd774d5f6 /src
parent431365d1f8b9b5c022a48b554ccb647eaac674a1 (diff)
downloadbox64-9544fbdff9f02f47a64c7d0b31d7b9c9ecd3c703.tar.gz
box64-9544fbdff9f02f47a64c7d0b31d7b9c9ecd3c703.zip
[RV64_DYNAREC] Refactored vector SEW tracking (#1820)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_private.h2
-rw-r--r--src/dynarec/la64/dynarec_la64_private.h2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_functions.c12
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.c10
-rw-r--r--src/dynarec/rv64/dynarec_rv64_pass0.h33
-rw-r--r--src/dynarec/rv64/dynarec_rv64_pass1.h22
-rw-r--r--src/dynarec/rv64/dynarec_rv64_pass2.h2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_pass3.h2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_private.h9
9 files changed, 46 insertions, 48 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h
index c1015abd..840fb47f 100644
--- a/src/dynarec/arm64/dynarec_arm64_private.h
+++ b/src/dynarec/arm64/dynarec_arm64_private.h
@@ -89,7 +89,7 @@ typedef struct instruction_arm64_s {
     uint16_t            ymm0_in;    // bitmap of ymm to zero at purge
     uint16_t            ymm0_add;   // the ymm0 added by the opcode
     uint16_t            ymm0_sub;   // the ymm0 removed by the opcode
-    uint16_t            ymm0_out;   // the ymmm0 at th end of the opcode
+    uint16_t            ymm0_out;   // the ymm0 at th end of the opcode
     uint16_t            ymm0_pass2, ymm0_pass3;
     uint8_t             barrier_maybe;
     uint8_t             will_write;
diff --git a/src/dynarec/la64/dynarec_la64_private.h b/src/dynarec/la64/dynarec_la64_private.h
index 56b2e715..27a93a66 100644
--- a/src/dynarec/la64/dynarec_la64_private.h
+++ b/src/dynarec/la64/dynarec_la64_private.h
@@ -85,7 +85,7 @@ typedef struct instruction_la64_s {
     uint16_t            ymm0_in;    // bitmap of ymm to zero at purge
     uint16_t            ymm0_add;   // the ymm0 added by the opcode
     uint16_t            ymm0_sub;   // the ymm0 removed by the opcode
-    uint16_t            ymm0_out;   // the ymmm0 at th end of the opcode
+    uint16_t            ymm0_out;   // the ymm0 at th end of the opcode
     uint16_t            ymm0_pass2, ymm0_pass3;
     uint8_t             barrier_maybe;
     uint8_t             will_write;
diff --git a/src/dynarec/rv64/dynarec_rv64_functions.c b/src/dynarec/rv64/dynarec_rv64_functions.c
index 37b81c09..3f0a04ad 100644
--- a/src/dynarec/rv64/dynarec_rv64_functions.c
+++ b/src/dynarec/rv64/dynarec_rv64_functions.c
@@ -379,11 +379,11 @@ int sewNeedsTransform(dynarec_rv64_t* dyn, int ninst)
 {
     int i2 = dyn->insts[ninst].x64.jmp_insts;
 
-    if (dyn->insts[i2].vector_sew == VECTOR_SEWNA)
+    if (dyn->insts[i2].vector_sew_entry == VECTOR_SEWNA)
         return 0;
-    else if (dyn->insts[i2].vector_sew == VECTOR_SEWANY && dyn->insts[ninst].vector_sew != VECTOR_SEWNA)
+    else if (dyn->insts[i2].vector_sew_entry == VECTOR_SEWANY && dyn->insts[ninst].vector_sew_exit != VECTOR_SEWNA)
         return 0;
-    else if (dyn->insts[i2].vector_sew == dyn->insts[ninst].vector_sew)
+    else if (dyn->insts[i2].vector_sew_entry == dyn->insts[ninst].vector_sew_exit)
         return 0;
 
     return 1;
@@ -616,7 +616,7 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r
     };
     if(box64_dynarec_dump) {
         printf_x64_instruction(rex.is32bits?my_context->dec32:my_context->dec, &dyn->insts[ninst].x64, name);
-        dynarec_log(LOG_NONE, "%s%p: %d emitted opcodes, inst=%d, barrier=%d state=%d/%d(%d), %s=%X/%X, use=%X, need=%X/%X, sm=%d/%d, sew=%d",
+        dynarec_log(LOG_NONE, "%s%p: %d emitted opcodes, inst=%d, barrier=%d state=%d/%d(%d), %s=%X/%X, use=%X, need=%X/%X, sm=%d/%d, sew@entry=%d, sew@exit=%d",
             (box64_dynarec_dump > 1) ? "\e[32m" : "",
             (void*)(dyn->native_start + dyn->insts[ninst].address),
             dyn->insts[ninst].size / 4,
@@ -631,7 +631,7 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r
             dyn->insts[ninst].x64.use_flags,
             dyn->insts[ninst].x64.need_before,
             dyn->insts[ninst].x64.need_after,
-            dyn->smread, dyn->smwrite, dyn->insts[ninst].vector_sew);
+            dyn->smread, dyn->smwrite, dyn->insts[ninst].vector_sew_entry, dyn->insts[ninst].vector_sew_exit);
         if(dyn->insts[ninst].pred_sz) {
             dynarec_log(LOG_NONE, ", pred=");
             for(int ii=0; ii<dyn->insts[ninst].pred_sz; ++ii)
@@ -722,6 +722,7 @@ void fpu_reset(dynarec_rv64_t* dyn)
     mmx_reset(&dyn->e);
     sse_reset(&dyn->e);
     fpu_reset_reg(dyn);
+    dyn->vector_sew = VECTOR_SEWNA;
 }
 
 void fpu_reset_ninst(dynarec_rv64_t* dyn, int ninst)
@@ -730,6 +731,7 @@ void fpu_reset_ninst(dynarec_rv64_t* dyn, int ninst)
     mmx_reset(&dyn->insts[ninst].e);
     sse_reset(&dyn->insts[ninst].e);
     fpu_reset_reg_extcache(&dyn->insts[ninst].e);
+    dyn->vector_sew = VECTOR_SEWNA;
 }
 
 int fpu_is_st_freed(dynarec_rv64_t* dyn, int ninst, int st)
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c
index a8ef8f21..489646d5 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.c
+++ b/src/dynarec/rv64/dynarec_rv64_helper.c
@@ -2432,9 +2432,9 @@ static void sewTransform(dynarec_rv64_t* dyn, int ninst, int s1)
     int j64;
     int jmp = dyn->insts[ninst].x64.jmp_insts;
     if (jmp < 0) return;
-    if (dyn->insts[jmp].vector_sew == VECTOR_SEWNA) return;
-    MESSAGE(LOG_DUMP, "\tSEW changed to %d ---- ninst=%d -> %d\n", dyn->insts[jmp].vector_sew, ninst, jmp);
-    vector_vsetvl_emul1(dyn, ninst, s1, dyn->insts[jmp].vector_sew, 1);
+    if (dyn->insts[jmp].vector_sew_entry == VECTOR_SEWNA) return;
+    MESSAGE(LOG_DUMP, "\tSEW changed to %d ---- ninst=%d -> %d\n", dyn->insts[jmp].vector_sew_entry, ninst, jmp);
+    vector_vsetvl_emul1(dyn, ninst, s1, dyn->insts[jmp].vector_sew_entry, 1);
 }
 
 void CacheTransform(dynarec_rv64_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3)
@@ -2528,10 +2528,10 @@ void fpu_reset_cache(dynarec_rv64_t* dyn, int ninst, int reset_n)
     #if STEP > 1
     // for STEP 2 & 3, just need to refresh with current, and undo the changes (push & swap)
     dyn->e = dyn->insts[ninst].e;
-    dyn->vector_sew = dyn->insts[ninst].vector_sew;
+    dyn->vector_sew = dyn->insts[ninst].vector_sew_exit;
     #else
     dyn->e = dyn->insts[reset_n].e;
-    dyn->vector_sew = dyn->insts[reset_n].vector_sew;
+    dyn->vector_sew = dyn->insts[reset_n].vector_sew_exit;
     #endif
     extcacheUnwind(&dyn->e);
     #if STEP == 0
diff --git a/src/dynarec/rv64/dynarec_rv64_pass0.h b/src/dynarec/rv64/dynarec_rv64_pass0.h
index 9a84ab5b..d0d81a6f 100644
--- a/src/dynarec/rv64/dynarec_rv64_pass0.h
+++ b/src/dynarec/rv64/dynarec_rv64_pass0.h
@@ -19,25 +19,24 @@
 #define JUMP(A, C)         add_jump(dyn, ninst); add_next(dyn, (uintptr_t)A); SMEND(); dyn->insts[ninst].x64.jmp = A; dyn->insts[ninst].x64.jmp_cond = C; dyn->insts[ninst].x64.jmp_insts = 0
 #define BARRIER(A)      if(A!=BARRIER_MAYBE) {fpu_purgecache(dyn, ninst, 0, x1, x2, x3); dyn->insts[ninst].x64.barrier = A;} else dyn->insts[ninst].barrier_maybe = 1
 #define SET_HASCALLRET()    dyn->insts[ninst].x64.has_callret = 1
-#define NEW_INST                                                                   \
-    ++dyn->size;                                                                   \
-    memset(&dyn->insts[ninst], 0, sizeof(instruction_native_t));                   \
-    dyn->insts[ninst].x64.addr = ip;                                               \
-    dyn->e.combined1 = dyn->e.combined2 = 0;                                       \
-    dyn->e.swapped = 0;                                                            \
-    dyn->e.barrier = 0;                                                            \
-    for (int i = 0; i < 16; ++i)                                                   \
-        dyn->e.olds[i].v = 0;                                                      \
-    dyn->insts[ninst].f_entry = dyn->f;                                            \
-    if (reset_n == -1)                                                             \
-        dyn->vector_sew = ninst ? dyn->insts[ninst - 1].vector_sew : VECTOR_SEWNA; \
-    if (ninst)                                                                     \
+#define NEW_INST                                                 \
+    ++dyn->size;                                                 \
+    memset(&dyn->insts[ninst], 0, sizeof(instruction_native_t)); \
+    dyn->insts[ninst].x64.addr = ip;                             \
+    dyn->e.combined1 = dyn->e.combined2 = 0;                     \
+    dyn->e.swapped = 0;                                          \
+    dyn->e.barrier = 0;                                          \
+    for (int i = 0; i < 16; ++i)                                 \
+        dyn->e.olds[i].v = 0;                                    \
+    dyn->insts[ninst].f_entry = dyn->f;                          \
+    dyn->insts[ninst].vector_sew_entry = dyn->vector_sew;        \
+    if (ninst)                                                   \
         dyn->insts[ninst - 1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst - 1].x64.addr;
 
-#define INST_EPILOG                                 \
-    dyn->insts[ninst].f_exit = dyn->f;              \
-    dyn->insts[ninst].e = dyn->e;                   \
-    dyn->insts[ninst].vector_sew = dyn->vector_sew; \
+#define INST_EPILOG                                      \
+    dyn->insts[ninst].f_exit = dyn->f;                   \
+    dyn->insts[ninst].e = dyn->e;                        \
+    dyn->insts[ninst].vector_sew_exit = dyn->vector_sew; \
     dyn->insts[ninst].x64.has_next = (ok > 0) ? 1 : 0;
 #define INST_NAME(name)
 #define DEFAULT                         \
diff --git a/src/dynarec/rv64/dynarec_rv64_pass1.h b/src/dynarec/rv64/dynarec_rv64_pass1.h
index c7813ba0..5360f767 100644
--- a/src/dynarec/rv64/dynarec_rv64_pass1.h
+++ b/src/dynarec/rv64/dynarec_rv64_pass1.h
@@ -2,18 +2,18 @@
 #define FINI
 #define MESSAGE(A, ...) do {} while (0)
 #define EMIT(A) do {} while (0)
-#define NEW_INST                                                                   \
-    dyn->insts[ninst].f_entry = dyn->f;                                            \
-    dyn->e.combined1 = dyn->e.combined2 = 0;                                       \
-    for (int i = 0; i < 16; ++i)                                                   \
-        dyn->e.olds[i].v = 0;                                                      \
-    if (reset_n != -1)                                                             \
-        dyn->vector_sew = ninst ? dyn->insts[ninst - 1].vector_sew : VECTOR_SEWNA; \
-    dyn->e.swapped = 0;                                                            \
+#define NEW_INST                                          \
+    dyn->insts[ninst].f_entry = dyn->f;                   \
+    dyn->e.combined1 = dyn->e.combined2 = 0;              \
+    for (int i = 0; i < 16; ++i)                          \
+        dyn->e.olds[i].v = 0;                             \
+    dyn->insts[ninst].vector_sew_entry = dyn->vector_sew; \
+    dyn->e.swapped = 0;                                   \
     dyn->e.barrier = 0
 
-#define INST_EPILOG                             \
-        dyn->insts[ninst].e = dyn->e;           \
-        dyn->insts[ninst].f_exit = dyn->f
+#define INST_EPILOG                    \
+    dyn->insts[ninst].e = dyn->e;      \
+    dyn->insts[ninst].f_exit = dyn->f; \
+    dyn->insts[ninst].vector_sew_exit = dyn->vector_sew;
 
 #define INST_NAME(name)
diff --git a/src/dynarec/rv64/dynarec_rv64_pass2.h b/src/dynarec/rv64/dynarec_rv64_pass2.h
index 37a71b9a..909522e3 100644
--- a/src/dynarec/rv64/dynarec_rv64_pass2.h
+++ b/src/dynarec/rv64/dynarec_rv64_pass2.h
@@ -8,8 +8,6 @@
 #define MESSAGE(A, ...) do {} while (0)
 #define EMIT(A)     do {dyn->insts[ninst].size+=4; dyn->native_size+=4;}while(0)
 #define NEW_INST                                                                                                                                                               \
-    if (reset_n != -1)                                                                                                                                                         \
-        dyn->vector_sew = ninst ? dyn->insts[ninst - 1].vector_sew : VECTOR_SEWNA;                                                                                             \
     if (ninst) {                                                                                                                                                               \
         dyn->insts[ninst].address = (dyn->insts[ninst - 1].address + dyn->insts[ninst - 1].size);                                                                              \
         dyn->insts_size += 1 + ((dyn->insts[ninst - 1].x64.size > (dyn->insts[ninst - 1].size / 4)) ? dyn->insts[ninst - 1].x64.size : (dyn->insts[ninst - 1].size / 4)) / 15; \
diff --git a/src/dynarec/rv64/dynarec_rv64_pass3.h b/src/dynarec/rv64/dynarec_rv64_pass3.h
index 5dc088a2..1254dc4a 100644
--- a/src/dynarec/rv64/dynarec_rv64_pass3.h
+++ b/src/dynarec/rv64/dynarec_rv64_pass3.h
@@ -13,8 +13,6 @@
 
 #define MESSAGE(A, ...)  if(box64_dynarec_dump) dynarec_log(LOG_NONE, __VA_ARGS__)
 #define NEW_INST                                                                                                  \
-    if (reset_n != -1)                                                                                            \
-        dyn->vector_sew = ninst ? dyn->insts[ninst - 1].vector_sew : VECTOR_SEWNA;                                \
     if (box64_dynarec_dump) print_newinst(dyn, ninst);                                                            \
     if (ninst) {                                                                                                  \
         addInst(dyn->instsize, &dyn->insts_size, dyn->insts[ninst - 1].x64.size, dyn->insts[ninst - 1].size / 4); \
diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h
index 771e02d8..f3fde565 100644
--- a/src/dynarec/rv64/dynarec_rv64_private.h
+++ b/src/dynarec/rv64/dynarec_rv64_private.h
@@ -108,13 +108,14 @@ typedef struct instruction_rv64_s {
     uint16_t            ymm0_in;    // bitmap of ymm to zero at purge
     uint16_t            ymm0_add;   // the ymm0 added by the opcode
     uint16_t            ymm0_sub;   // the ymm0 removed by the opcode
-    uint16_t            ymm0_out;   // the ymmm0 at th end of the opcode
+    uint16_t            ymm0_out;   // the ymm0 at th end of the opcode
     uint16_t            ymm0_pass2, ymm0_pass3;
     int                 barrier_maybe;
     flagcache_t         f_exit;     // flags status at end of instruction
     extcache_t          e;          // extcache at end of instruction (but before poping)
     flagcache_t         f_entry;    // flags status before the instruction begin
-    uint8_t             vector_sew;
+    uint8_t             vector_sew_entry; // sew status before the instruction begin
+    uint8_t             vector_sew_exit;  // sew status at the end of instruction
 } instruction_rv64_t;
 
 typedef struct dynarec_rv64_s {
@@ -153,8 +154,8 @@ typedef struct dynarec_rv64_s {
     uint16_t            ymm_zero;   // bitmap of ymm to zero at purge
     uint8_t             always_test;
     uint8_t             abort;
-    uint8_t             vector_sew;
-    uint8_t             vector_eew; // effective element width
+    uint8_t             vector_sew; // current sew status
+    uint8_t             vector_eew; // current effective sew status, should only be used after SET_ELEMENT_WIDTH
 } dynarec_rv64_t;
 
 // v0 is hardware wired to vector mask register, which should be always reserved