about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-05-01 10:24:46 +0000
committerptitSeb <sebastien.chev@gmail.com>2023-05-01 10:24:46 +0000
commitaac1663d7579cb9e821d342dda9f999623d20b62 (patch)
treec9a273241ff52100d9a79dec64d0d3002bdaee24
parent5b9d963d314aa99302fa5cddb459e34e408d47b2 (diff)
downloadbox64-aac1663d7579cb9e821d342dda9f999623d20b62.tar.gz
box64-aac1663d7579cb9e821d342dda9f999623d20b62.zip
[RV64_DYNAREC] Add detection of Zba, Zbb, Zbc and Zbs CPU extensions
-rwxr-xr-xCMakeLists.txt1
-rw-r--r--src/dynarec/rv64/rv64_emitter.h110
-rwxr-xr-xsrc/include/debug.h5
-rwxr-xr-xsrc/main.c12
-rw-r--r--src/rv64detect.c70
5 files changed, 198 insertions, 0 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1dad88e2..28b721bc 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -611,6 +611,7 @@ if(RV64_DYNAREC)
         "${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_functions.c"
         "${BOX64_ROOT}/src/dynarec/rv64/rv64_printer.c"
         "${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_jmpnext.c"
+        "${BOX64_ROOT}/src/rv64detect.c"
 
         "${BOX64_ROOT}/src/dynarec/rv64/rv64_prolog.S"
         "${BOX64_ROOT}/src/dynarec/rv64/rv64_epilog.S"
diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h
index 29336895..eec49a09 100644
--- a/src/dynarec/rv64/rv64_emitter.h
+++ b/src/dynarec/rv64/rv64_emitter.h
@@ -509,4 +509,114 @@ f28–31  ft8–11  FP temporaries                  Caller
 // Convert from Double to unsigned integer
 #define FCVTLUDxw(rd, frs1, rm)     EMIT(R_type(0b1100001, 0b00001+(rex.w?0b10:0b00), frs1, rm, rd, 0b1010011))
 
+//Zba
+// Add unsigned word (Wz(rs1) + X(rs2))
+#define ADDUW(rd, rs1, rs2)         EMIT(R_type(0b0000100, rs2, rs1, 0b000, rd, 0b0111011))
+// Zero-extend Word
+#define ZEXTW(rd, rs1)              ADDUW(rd, rs1, xZR)
+// Shift left by 1 and add (rd = X(rs2) + X(rs1)<<1)
+#define SH1ADD(rd, rs1, rs2)        EMIT(R_type(0b0010000, rs2, rs1, 0b010, rd, 0b0110011))
+// Shift unsigned word left by 1 and add (rd = X(rs2) + Wz(rs1)<<1)
+#define SH1ADDUW(rd, rs1, rs2)      EMIT(R_type(0b0010000, rs2, rs1, 0b010, rd, 0b0111011))
+// Shift left by 2 and add (rd = X(rs2) + X(rs1)<<2)
+#define SH2ADD(rd, rs1, rs2)        EMIT(R_type(0b0010000, rs2, rs1, 0b100, rd, 0b0110011))
+// Shift unsigned word left by 2 and add (rd = X(rs2) + Wz(rs1)<<2)
+#define SH2ADDUW(rd, rs1, rs2)      EMIT(R_type(0b0010000, rs2, rs1, 0b100, rd, 0b0111011))
+// Shift left by 3 and add (rd = X(rs2) + X(rs1)<<3)
+#define SH3ADD(rd, rs1, rs2)        EMIT(R_type(0b0010000, rs2, rs1, 0b110, rd, 0b0110011))
+// Shift unsigned word left by 3 and add (rd = X(rs2) + Wz(rs1)<<3)
+#define SH3ADDUW(rd, rs1, rs2)      EMIT(R_type(0b0010000, rs2, rs1, 0b110, rd, 0b0111011))
+// Shift left unsigned word (immediate)
+#define SLLIUW(rd, rs1, imm)        EMIT(R_type(0b0000100, imm, rs1, 0b001, rd, 0b0011011))
+
+//Zbb
+// AND with reverted operand (rs1 & ~rs2)
+#define ANDN(rd, rs1, rs2)      EMIT(R_type(0b0100000, rs2, rs1, 0b111, rd, 0b0110011))
+// OR with reverted operand (rs1 | ~rs2)
+#define ORN(rd, rs1, rs2)       EMIT(R_type(0b0100000, rs2, rs1, 0b110, rd, 0b0110011))
+// Exclusive NOR (~(rs1 ^ rs2))
+#define XNOR(rd, rs1, rs2)      EMIT(R_type(0b0100000, rs2, rs1, 0b100, rd, 0b0110011))
+// Count leading zero bits
+#define CLZ(rd, rs)             EMIT(R_type(0b0110000, 0b00000, rs1, 0b001, rd, 0b0010011))
+// Count leading zero bits in word
+#define CLZW(rd, rs)            EMIT(R_type(0b0110000, 0b00000, rs1, 0b001, rd, 0b0011011))
+// Count leading zero bits
+#define CLZxw(rd, rs)           EMIT(R_type(0b0110000, 0b00000, rs1, 0b001, rd, rex.w?0b0010011:0b0011011))
+// Count trailing zero bits
+#define CTZ(rd, rs)             EMIT(R_type(0b0110000, 0b00001, rs1, 0b001, rd, 0b0010011))
+// Count trailing zero bits in word
+#define CTZW(rd, rs)            EMIT(R_type(0b0110000, 0b00001, rs1, 0b001, rd, 0b0011011))
+// Count trailing zero bits
+#define CTZxw(rd, rs)           EMIT(R_type(0b0110000, 0b00001, rs1, 0b001, rd, rex.w?0b0010011:0b0011011))
+// Count set bits
+#define CPOP(rd, rs)            EMIT(R_type(0b0110000, 0b00010, rs1, 0b001, rd, 0b0010011))
+// Count set bits in word
+#define CPOPW(rd, rs)           EMIT(R_type(0b0110000, 0b00010, rs1, 0b001, rd, 0b0011011))
+// Count set bits
+#define CPOPxw(rd, rs)          EMIT(R_type(0b0110000, 0b00010, rs1, 0b001, rd, rex.w?0b0010011:0b0011011))
+// Maximum
+#define MAX(rd, rs1, rs2)       EMIT(R_type(0b0000101, rs2, rs1, 0b110, rd, 0b0110011))
+// Unisgned maximum
+#define MAXU(rd, rs1, rs2)      EMIT(R_type(0b0000101, rs2, rs1, 0b111, rd, 0b0110011))
+// Minimum
+#define MIN(rd, rs1, rs2)       EMIT(R_type(0b0000101, rs2, rs1, 0b100, rd, 0b0110011))
+// Unsigned minimum
+#define MINU(rd, rs1, rs2)      EMIT(R_type(0b0000101, rs2, rs1, 0b101, rd, 0b0110011))
+// Sign-extend byte
+#define SEXTB(rd, rs)           EMIT(R_type(0b0110000, 0b00100, rs, 0b001, rd, 0b0010011))
+// Sign-extend half-word
+#define SEXTH(rd, rs)           EMIT(R_type(0b0110000, 0b00101, rs, 0b001, rd, 0b0010011))
+// Zero-extend half-word
+#define ZEXTH(rd, rs)           EMIT(R_type(0b0000100, 0b00000, rs, 0b100, rd, 0b0111011))
+// Rotate left (register)
+#define ROL(rd, rs1, rs2)       EMIT(R_type(0b0110000, rs2, rs1, 0b001, rd, 0b0110011))
+// Rotate left word (register)
+#define ROLW(rd, rs1, rs2)      EMIT(R_type(0b0110000, rs2, rs1, 0b001, rd, 0b0111011))
+// Rotate left (register)
+#define ROLxw(rd, rs1, rs2)     EMIT(R_type(0b0110000, rs2, rs1, 0b001, rd, rex.w?0b0110011:0b0111011))
+// Rotate right (register)
+#define ROR(rd, rs1, rs2)       EMIT(R_type(0b0110000, rs2, rs1, 0b101, rd, 0b0110011))
+// Rotate right (immediate)
+#define RORI(rd, rs1, shamt)    EMIT(R_type(0b0110000, shamt, rs1, 0b101, rd, 0b0010011))
+// Rotate right word (immediate)
+#define RORIW(rd, rs1, shamt)   EMIT(R_type(0b0110000, shamt, rs1, 0b101, rd, 0b0011011))
+// Rotate right (immediate)
+#define RORIxw(rd, rs1, shamt)  EMIT(R_type(0b0110000, shamt, rs1, 0b101, rd, rex.w?0b0010011:0b0011011))
+// Rotate right word (register)
+#define RORW(rd, rs1, rs2)      EMIT(R_type(0b0110000, rs2, rs1, 0b101, rd, 0b0111011))
+// Rotate right (register)
+#define RORxw(rd, rs1, rs2)     EMIT(R_type(0b0110000, rs2, rs1, 0b101, rd, rex.w?0b0110011:0b0111011))
+// Bitwise OR Combine, byte granule (for all byte, if byte==0, res.byte=0, else res.byte=0xff)
+#define ORCB(rd, rs)            EMIT(I_type(0b001010000111, rs, 0b101, rd, 0b0010011))
+// Byte-reverse register
+#define REV8(rd, rs)            EMIT(I_type(0b011010111000, rs, 0b101, rd, 0b0010011))
+
+//Zbc
+// Carry-less multily (low-part)
+#define CLMUL(rd, rs1, rs2)         EMIT(R_type(0b0000101, rs2, rs1, 0b001, rd, 0b0110011))
+// Carry-less multiply (high-part)
+#define CLMULH(rd, rs1, rs2)        EMIT(R_type(0b0000101, rs2, rs1, 0b011, rd, 0b0110011))
+// Carry-less multiply (reversed)
+#define CLMULR(rd, rs1, rs2)        EMIT(R_type(0b0000101, rs2, rs1, 0b010, rd, 0b0110011))
+
+//Zbs
+// encoding of the "imm" on RV64 use a slight different mask, but it will work using R_type with high bit of imm ovewriting low bit op func
+// Single-bit Clear (Register)
+#define BCLR(rd, rs1, rs2)          EMIT(R_type(0b0100100, rs2, rs1, 0b001, rd, 0b0110011))
+// Single-bit Clear (Immediate)
+#define BCLI(rd, rs1, imm)          EMIT(R_type(0b0100100, imm, rs1, 0b001, rd, 0b0010011))
+// Single-bit Extreact (Register)
+#define BEXT(rd, rs1, rs2)          EMIT(R_type(0b0100100, rs2, rs1, 0b101, rd, 0b0110011))
+// Single-bit Extract (Immediate)
+#define BEXTI(rd, rs1, imm)         EMIT(R_type(0b0100100, imm, rs1, 0b101, rd, 0b0010011))
+// Single-bit Invert (Register)
+#define BINV(rd, rs1, rs2)          EMIT(R_type(0b0110100, rs2, rs1, 0b001, rd, 0b0110011))
+// Single-bit Invert (Immediate)
+#define BINVI(rd, rs1, imm)         EMIT(R_type(0b0110100, imm, rs1, 0b001, rd, 0b0010011))
+// Single-bit Set (Register)
+#define BSET(rd, rs1, rs2)          EMIT(R_type(0b0010100, rs2, rs1, 0b001, rd, 0b0110011))
+// Single-bit Set (Immediate)
+#define BSETI(rd, rs1, imm)         EMIT(R_type(0b0010100, imm, rs1, 0b001, rd, 0b0010011))
+
+
 #endif //__RV64_EMITTER_H__
diff --git a/src/include/debug.h b/src/include/debug.h
index 37146e02..6f448fc7 100755
--- a/src/include/debug.h
+++ b/src/include/debug.h
@@ -33,6 +33,11 @@ extern int arm64_aes;
 extern int arm64_pmull;
 extern int arm64_crc32;
 extern int arm64_atomics;
+#elif defined(RV64)
+extern int rv64_zba;
+extern int rv64_zbb;
+extern int rv64_zbc;
+extern int rv64_zbs;
 #endif
 #endif
 extern int box64_libcef;
diff --git a/src/main.c b/src/main.c
index f4b7a239..63894b5b 100755
--- a/src/main.c
+++ b/src/main.c
@@ -71,6 +71,11 @@ int arm64_aes = 0;
 int arm64_pmull = 0;
 int arm64_crc32 = 0;
 int arm64_atomics = 0;
+#elif defined(RV64)
+int rv64_zba = 0;
+int rv64_zbb = 0;
+int rv64_zbc = 0;
+int rv64_zbs = 0;
 #endif
 #else   //DYNAREC
 int box64_dynarec = 0;
@@ -355,7 +360,14 @@ HWCAP2_ECV
     printf_log(LOG_INFO, "Dynarec for LoongArch");
     printf_log(LOG_INFO, " PageSize:%zd ", box64_pagesize);
 #elif defined(RV64)
+    void RV64_Detect_Function();
+    RV64_Detect_Function();
     printf_log(LOG_INFO, "Dynarec for RISC-V");
+    printf_log(LOG_INFO, "With extension: I M A F D C");
+    if(rv64_zba) printf_log(LOG_INFO, " Zba");
+    if(rv64_zbb) printf_log(LOG_INFO, " Zbb");
+    if(rv64_zbc) printf_log(LOG_INFO, " Zbc");
+    if(rv64_zbs) printf_log(LOG_INFO, " Zbs");
     printf_log(LOG_INFO, " PageSize:%zd ", box64_pagesize);
 #else
 #error Unsupported architecture
diff --git a/src/rv64detect.c b/src/rv64detect.c
new file mode 100644
index 00000000..5e84423b
--- /dev/null
+++ b/src/rv64detect.c
@@ -0,0 +1,70 @@
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <setjmp.h>
+
+#include "debug.h"
+#include "dynarec/rv64/rv64_emitter.h"
+
+// Detect RV64 extensions, by executing on of the opcode with a SIGILL signal handler
+
+static sigjmp_buf sigbuf = {0};
+typedef void(*vFii_t)(int, int);
+static void detect_sigill(int sig)
+{
+    siglongjmp(sigbuf, 1);
+}
+
+static int Check(void* block)
+{
+    // Clear instruction cache
+    __clear_cache(block, block+box64_pagesize);
+    // Setup SIGILL signal handler
+    signal(SIGILL, detect_sigill);
+    if(sigsetjmp(sigbuf, 1)) {
+        // didn't work, extension not present
+        signal(SIGILL, SIG_DFL);
+        return 0;
+    }
+    ((vFii_t)block)(0, 1);
+    // done...
+    signal(SIGILL, SIG_DFL);
+    return 1;
+}
+
+void RV64_Detect_Function()
+{
+    // Alloc memory to execute stuffs
+    void* my_block = mmap(NULL, box64_pagesize, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+    if(my_block==(void*)-1) {
+        return;
+    }
+    uint32_t* block;
+    #define EMIT(A) *block = (A); ++block
+    // Test Zba with ADDUW
+    block = (uint32_t*)my_block;
+    ADDUW(A0, A0, A1);
+    BR(xRA);
+    rv64_zba = Check(my_block);
+    // Test Zbb with ANDN
+    block = (uint32_t*)my_block;
+    ANDN(A0, A0, A1);
+    BR(xRA);
+    rv64_zbb = Check(my_block);
+    // Test Zbc with CLMUL
+    block = (uint32_t*)my_block;
+    CLMUL(A0, A0, A1);
+    BR(xRA);
+    rv64_zbc = Check(my_block);
+    // Test Zbs with BCLR
+    block = (uint32_t*)my_block;
+    BCLR(A0, A0, A1);
+    BR(xRA);
+    rv64_zbs = Check(my_block);
+
+    // Finish
+    // Free the memory my_block
+    munmap(my_block, box64_pagesize);
+}
\ No newline at end of file