about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2024-07-19 22:05:52 +0800
committerGitHub <noreply@github.com>2024-07-19 16:05:52 +0200
commitd1eb01e535b324e23e85535fce5ee4516161c47e (patch)
treecbc8be34e1db420fe5767ca7ac512b2be228a4b9
parent3e20a60c37b77b782a81f1a6b234a3c4ddd61d2c (diff)
downloadbox64-d1eb01e535b324e23e85535fce5ee4516161c47e.tar.gz
box64-d1eb01e535b324e23e85535fce5ee4516161c47e.zip
[RV64_DYNAREC] Added more 66 0F 38 opcodes for vector (#1699)
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f_vector.c32
-rw-r--r--src/dynarec/rv64/rv64_emitter.h2
2 files changed, 34 insertions, 0 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c
index 4ca426e6..257d07b0 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c
@@ -81,6 +81,38 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
                     VRGATHER_VV(v1, v0, q0, VECTOR_UNMASKED); // registers cannot be overlapped!!
                     VMV_V_V(q0, v1);
                     break;
+                case 0x01 ... 0x07:
+                    // pairwise opcodes are complicated, fallback to scalar.
+                    return 0;
+                case 0x08 ... 0x0a:
+                    if (nextop == 0x08) {
+                        INST_NAME("PSIGNB Gx, Ex");
+                        SET_ELEMENT_WIDTH(x1, VECTOR_SEW8);
+                        i32 = 7;
+                    } else if (nextop == 0x09) {
+                        INST_NAME("PSIGNW Gx, Ex");
+                        SET_ELEMENT_WIDTH(x1, VECTOR_SEW16);
+                        i32 = 15;
+                    } else {
+                        INST_NAME("PSIGND Gx, Ex");
+                        SET_ELEMENT_WIDTH(x1, VECTOR_SEW32);
+                        i32 = 31;
+                    }
+                    nextop = F8;
+                    GETGX_vector(q0, 1);
+                    GETEX_vector(q1, 0, 0);
+                    v0 = fpu_get_scratch(dyn);
+                    v1 = fpu_get_scratch(dyn);
+                    // absolute
+                    VSRA_VI(v0, i32, q1, VECTOR_UNMASKED);
+                    VXOR_VV(v1, v0, q0, VECTOR_UNMASKED);
+                    VSUB_VV(v1, v0, v1, VECTOR_UNMASKED);
+                    // handle zeroing
+                    VMSEQ_VI(VECTOR_MASKREG, 0, q1, VECTOR_UNMASKED);
+                    VXOR_VV(v0, v0, v0, VECTOR_UNMASKED);
+                    VADC_VIM(v0, 0x1f, v0); // implies VECTOR_MASKREG
+                    VAND_VV(q0, v1, v0, VECTOR_UNMASKED);
+                    break;
                 default:
                     DEFAULT_VECTOR;
             }
diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h
index 4d574684..959308fb 100644
--- a/src/dynarec/rv64/rv64_emitter.h
+++ b/src/dynarec/rv64/rv64_emitter.h
@@ -1230,6 +1230,8 @@ f28–31  ft8–11  FP temporaries                  Caller
 #define VECTOR_NFIELD7 0b110
 #define VECTOR_NFIELD8 0b111
 
+#define VECTOR_MASKREG 0 // fixed to v0
+
 //  configuration setting
 //  https://github.com/riscv/riscv-v-spec/blob/master/vcfg-format.adoc
 #define VSETIVLI(rd, zimm, zimm10) EMIT(I_type(0b110000000000 | (zimm10), zimm, 0b111, rd, 0b1010111)) // 11...............111.....1010111