diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-12-13 21:57:28 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-12-13 14:57:28 +0100 |
| commit | 4b4c0143d951b6aa18e2102136b4b72e165459d4 (patch) | |
| tree | 3ef11dd20153979a4bc8ec95b26ada7771096fe3 /src | |
| parent | fa76ce1f057f46a6133dfab67b78cdf16fd1f7f0 (diff) | |
| download | box64-4b4c0143d951b6aa18e2102136b4b72e165459d4.tar.gz box64-4b4c0143d951b6aa18e2102136b4b72e165459d4.zip | |
[RV64_DYNAREC] Optimize push/pop with xtheadmemidx (#2150)
* [RV64_DYNAREC] Optimize push/pop with xtheadmemidx * fix
Diffstat (limited to 'src')
| -rw-r--r-- | src/core.c | 4 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 126 |
2 files changed, 100 insertions, 30 deletions
diff --git a/src/core.c b/src/core.c index 3a1e180b..df3f3907 100644 --- a/src/core.c +++ b/src/core.c @@ -551,7 +551,7 @@ HWCAP2_AFP if (!strcasecmp(p, "xtheadba")) rv64_xtheadba = 0; if (!strcasecmp(p, "xtheadbb")) rv64_xtheadbb = 0; if (!strcasecmp(p, "xtheadbs")) rv64_xtheadbs = 0; - // if (!strcasecmp(p, "xtheadmemidx")) rv64_xtheadmemidx = 0; + if (!strcasecmp(p, "xtheadmemidx")) rv64_xtheadmemidx = 0; // if (!strcasecmp(p, "xtheadfmemidx")) rv64_xtheadfmemidx = 0; // if (!strcasecmp(p, "xtheadmac")) rv64_xtheadmac = 0; // if (!strcasecmp(p, "xtheadfmv")) rv64_xtheadfmv = 0; @@ -575,8 +575,8 @@ HWCAP2_AFP if(rv64_xtheadbs) printf_log(LOG_INFO, " XTheadBs"); if (rv64_xtheadmempair) printf_log(LOG_INFO, " XTheadMemPair"); if (rv64_xtheadcondmov) printf_log(LOG_INFO, " XTheadCondMov"); + if (rv64_xtheadmemidx) printf_log(LOG_INFO, " XTheadMemIdx"); // Disable the display since these are only detected but never used. - // if(rv64_xtheadmemidx) printf_log(LOG_INFO, " XTheadMemIdx"); // if(rv64_xtheadfmemidx) printf_log(LOG_INFO, " XTheadFMemIdx"); // if(rv64_xtheadmac) printf_log(LOG_INFO, " XTheadMac"); // if(rv64_xtheadfmv) printf_log(LOG_INFO, " XTheadFmv"); diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index 9200b358..4e699fa7 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -310,25 +310,41 @@ // 4-bytes[rs1+imm12] = rs2 #define SW(rs2, rs1, imm12) EMIT(S_type(imm12, rs2, rs1, 0b010, 0b0100011)) -#define PUSH1(reg) \ - do { \ - SD(reg, xRSP, 0xFF8); \ - SUBI(xRSP, xRSP, 8); \ +#define PUSH1(reg) \ + do { \ + if (rv64_xtheadmemidx && reg != xRSP) { \ + TH_SDIB(reg, xRSP, -8, 0); \ + } else { \ + SD(reg, xRSP, 0xFF8); \ + SUBI(xRSP, xRSP, 8); \ + } \ } while (0) -#define POP1(reg) \ - do { \ - LD(reg, xRSP, 0); \ - if (reg != xRSP) ADDI(xRSP, xRSP, 8); \ +#define POP1(reg) \ + do { \ + if (rv64_xtheadmemidx && reg != xRSP) { \ + TH_LDIA(reg, xRSP, 8, 0); \ + } else { \ + LD(reg, xRSP, 0); \ + if (reg != xRSP) ADDI(xRSP, xRSP, 8); \ + } \ } while (0) -#define PUSH1_32(reg) \ - do { \ - SW(reg, xRSP, 0xFFC); \ - SUBI(xRSP, xRSP, 4); \ +#define PUSH1_32(reg) \ + do { \ + if (rv64_xtheadmemidx && reg != xRSP) { \ + TH_SWIB(reg, xRSP, -4, 0); \ + } else { \ + SW(reg, xRSP, 0xFFC); \ + SUBI(xRSP, xRSP, 4); \ + } \ } while (0) -#define POP1_32(reg) \ - do { \ - LWU(reg, xRSP, 0); \ - if (reg != xRSP) ADDI(xRSP, xRSP, 4); \ +#define POP1_32(reg) \ + do { \ + if (rv64_xtheadmemidx && reg != xRSP) { \ + TH_LWUIA(reg, xRSP, 4, 0); \ + } else { \ + LWU(reg, xRSP, 0); \ + if (reg != xRSP) ADDI(xRSP, xRSP, 4); \ + } \ } while (0) #define POP1z(reg) \ @@ -344,16 +360,24 @@ PUSH1(reg); \ } -#define PUSH1_16(reg) \ - do { \ - SH(reg, xRSP, 0xFFE); \ - SUBI(xRSP, xRSP, 2); \ +#define PUSH1_16(reg) \ + do { \ + if (rv64_xtheadmemidx && reg != xRSP) { \ + TH_SHIB(reg, xRSP, -2, 0); \ + } else { \ + SH(reg, xRSP, 0xFFE); \ + SUBI(xRSP, xRSP, 2); \ + } \ } while (0) -#define POP1_16(reg) \ - do { \ - LHU(reg, xRSP, 0); \ - if (reg != xRSP) ADDI(xRSP, xRSP, 2); \ +#define POP1_16(reg) \ + do { \ + if (rv64_xtheadmemidx && reg != xRSP) { \ + TH_LHUIA(reg, xRSP, 2, 0); \ + } else { \ + LHU(reg, xRSP, 0); \ + if (reg != xRSP) ADDI(xRSP, xRSP, 2); \ + } \ } while (0) #define FENCE_gen(pred, succ) (((pred) << 24) | ((succ) << 20) | 0b0001111) @@ -1106,19 +1130,66 @@ // rs1 := rs1 + (sign_extend(imm5) << imm2) #define TH_LBIA(rd, rs1, imm5, imm2) EMIT(I_type(0b000110000000 | (((imm2) & 0b11) << 5) | ((imm5) & 0x1f), rs1, 0b100, rd, 0b0001011)) +// Load indexed half-word, increment address after loading. +// if (rs1 != rd) { +// rd := sign_extend(mem[rs1+1:rs1]) +// rs1 := rs1 + (sign_extend(imm5) << imm2) +// } +#define TH_LHIA(rd, rs1, imm5, imm2) EMIT(I_type(0b001110000000 | (((imm2) & 0b11) << 5) | ((imm5) & 0x1f), rs1, 0b100, rd, 0b0001011)) + +// Load indexed unsigned half-word, increment address after loading. +// if (rs1 != rd) { +// rd := zero_extend(mem[rs1+1:rs1]) +// rs1 := rs1 + (sign_extend(imm5) << imm2) +// } +#define TH_LHUIA(rd, rs1, imm5, imm2) EMIT(I_type(0b101110000000 | (((imm2) & 0b11) << 5) | ((imm5) & 0x1f), rs1, 0b100, rd, 0b0001011)) + +// Load indexed word, increment address after loading. +// if (rs1 != rd) { +// rd := sign_extend(mem[rs1+3:rs1]) +// rs1 := rs1 + (sign_extend(imm5) << imm2) +// } +#define TH_LWIA(rd, rs1, imm5, imm2) EMIT(I_type(0b010110000000 | (((imm2) & 0b11) << 5) | ((imm5) & 0x1f), rs1, 0b100, rd, 0b0001011)) + +// Load indexed unsigned word, increment address after loading. +// if (rs1 != rd) { +// rd := zero_extend(mem[rs1+3:rs1]) +// rs1 := rs1 + (sign_extend(imm5) << imm2) +// } +#define TH_LWUIA(rd, rs1, imm5, imm2) EMIT(I_type(0b110110000000 | (((imm2) & 0b11) << 5) | ((imm5) & 0x1f), rs1, 0b100, rd, 0b0001011)) + +// Load indexed double-word, increment address after loading. +// if (rs1 != rd) { +// rd := sign_extend(mem[rs1+7:rs1]) +// rs1 := rs1 + (sign_extend(imm5) << imm2) +// } +#define TH_LDIA(rd, rs1, imm5, imm2) EMIT(I_type(0b011110000000 | (((imm2) & 0b11) << 5) | ((imm5) & 0x1f), rs1, 0b100, rd, 0b0001011)) + +// Store indexed half-word, increment address before storage. +// rs1 := rs1 + (sign_extend(imm5) << imm2) +// mem[rs1+1:rs1] := rd +#define TH_SHIB(rd, rs1, imm5, imm2) EMIT(I_type(0b001010000000 | (((imm2) & 0b11) << 5) | ((imm5) & 0x1f), rs1, 0b101, rd, 0b0001011)) + +// Store indexed word, increment address before storage. +// rs1 := rs1 + (sign_extend(imm5) << imm2) +// mem[rs1+3:rs1] := rd +#define TH_SWIB(rd, rs1, imm5, imm2) EMIT(I_type(0b010010000000 | (((imm2) & 0b11) << 5) | ((imm5) & 0x1f), rs1, 0b101, rd, 0b0001011)) + +// Store indexed double-word, increment address before storage. +// rs1 := rs1 + (sign_extend(imm5) << imm2) +// mem[rs1+7:rs1] := rd +#define TH_SDIB(rd, rs1, imm5, imm2) EMIT(I_type(0b011010000000 | (((imm2) & 0b11) << 5) | ((imm5) & 0x1f), rs1, 0b101, rd, 0b0001011)) + // TODO // th.lbib rd, (rs1), imm5, imm2 Load indexed byte // th.lbuia rd, (rs1), imm5, imm2 Load indexed unsigned byte // th.lbuib rd, (rs1), imm5, imm2 Load indexed unsigned byte // th.lhia rd, (rs1), imm5, imm2 Load indexed half-word // th.lhib rd, (rs1), imm5, imm2 Load indexed half-word -// th.lhuia rd, (rs1), imm5, imm2 Load indexed unsigned half-word // th.lhuib rd, (rs1), imm5, imm2 Load indexed unsigned half-word // th.lwia rd, (rs1), imm5, imm2 Load indexed word // th.lwib rd, (rs1), imm5, imm2 Load indexed word -// th.lwuia rd, (rs1), imm5, imm2 Load indexed unsigned word // th.lwuib rd, (rs1), imm5, imm2 Load indexed unsigned word -// th.ldia rd, (rs1), imm5, imm2 Load indexed double-word // th.ldib rd, (rs1), imm5, imm2 Load indexed double-word // th.sbia rd, (rs1), imm5, imm2 Store indexed byte // th.sbib rd, (rs1), imm5, imm2 Store indexed byte @@ -1127,7 +1198,6 @@ // th.swia rd, (rs1), imm5, imm2 Store indexed word // th.swib rd, (rs1), imm5, imm2 Store indexed word // th.sdia rd, (rs1), imm5, imm2 Store indexed double-word -// th.sdib rd, (rs1), imm5, imm2 Store indexed double-word // th.lrb rd, rs1, rs2, imm2 Load indexed byte // th.lrbu rd, rs1, rs2, imm2 Load indexed unsigned byte // th.lrh rd, rs1, rs2, imm2 Load indexed half-word |