diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2023-03-13 15:17:50 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-03-13 08:17:50 +0100 |
| commit | 874054f3bbf7289b858d66be0453ff1220d05382 (patch) | |
| tree | b505340dc56ca10fce453feafbf7ec6ed6b19e6a /src | |
| parent | a08f3c4c9eca36ef2415d8428434ca4af41aa84e (diff) | |
| download | box64-874054f3bbf7289b858d66be0453ff1220d05382.tar.gz box64-874054f3bbf7289b858d66be0453ff1220d05382.zip | |
[RV64_DYNAREC] Added 29 SUB opcode (#553)
* [RV64_DYNAREC] Added 29 SUB opcode * [RV64_DYNAREC] Clear flags if needed
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00.c | 9 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_emit_math.c | 90 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 24 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 34 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 26 |
5 files changed, 168 insertions, 15 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00.c b/src/dynarec/rv64/dynarec_rv64_00.c index 8fa698a0..0094f1ff 100644 --- a/src/dynarec/rv64/dynarec_rv64_00.c +++ b/src/dynarec/rv64/dynarec_rv64_00.c @@ -52,6 +52,15 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MAYUSE(cacheupd); switch(opcode) { + case 0x29: + INST_NAME("SUB Ed, Gd"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + GETGD; + GETED(0); + emit_sub32(dyn, ninst, rex, ed, gd, x3, x4, x5); + WBACK; + break; case 0x50: case 0x51: diff --git a/src/dynarec/rv64/dynarec_rv64_emit_math.c b/src/dynarec/rv64/dynarec_rv64_emit_math.c new file mode 100644 index 00000000..1c11a515 --- /dev/null +++ b/src/dynarec/rv64/dynarec_rv64_emit_math.c @@ -0,0 +1,90 @@ +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <pthread.h> +#include <errno.h> + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" +#include "../tools/bridge_private.h" + +#include "rv64_printer.h" +#include "dynarec_rv64_private.h" +#include "dynarec_rv64_functions.h" +#include "dynarec_rv64_helper.h" + +// emit SUB32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch +void emit_sub32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) +{ + CLEAR_FLAGS() + IFX(X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, op1)); + SDxw(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s3, rex.w?d_sub64:d_sub32); + } else IFX(X_ALL) { + SET_DFNONE(s3); + } + + IFX(X_AF) { + // for later borrow chain calculation + NOT(s5, s1); + } + + SUBxw(s1, s1, s2); + + IFX(X_PEND) { + SDxw(s1, xEmu, offsetof(x64emu_t, res)); + } + + IFX(X_AF | X_CF | X_OF) { + // calc borrow chain + // bc = (res & (~op1 | op2)) | (~op1 & op2) + OR(s3, s5, s2); + AND(s4, s1, s3); + AND(s5, s5, s2); + OR(s4, s4, s5); + IFX(X_AF) { + // af = bc & 0x8 + ANDI(s3, s4, 8); + BEQZ(s3, 4); + ORI(xFlags, xFlags, 1 << F_AF); + } + IFX(X_CF) { + // cf = bc & (rex.w?(1<<63):(1<<31)) + SRLI(s3, s4, rex.w?63:31); + BEQZ(s3, 4); + ORI(xFlags, xFlags, 1 << F_CF); + } + IFX(X_OF) { + // of = ((bc >> rex.w?62:30) ^ (bc >> rex.w?63:31)) & 0x1; + SRLI(s3, s4, rex.w?62:30); + SRLI(s4, s3, 1); + XOR(s3, s3, s4); + ANDI(s3, s3, 1); + BEQZ(s3, 4); + ORI(xFlags, xFlags, 1 << F_OF); + } + } + IFX(X_ZF) { + BEQZ(s1, 4); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX(X_SF) { + SRLI(s3, s1, rex.w?63:31); + BEQZ(s3, 4); + ORI(xFlags, xFlags, 1 << F_SF); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index 3dee0d98..06fdb095 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -101,7 +101,7 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, } if(nextop&0x80) i64 = F32S; - else + else i64 = F8S; if(i64==0 || ((i64>=-2048) && (i64<=2047) && i12)) { *fixaddress = i64; @@ -210,7 +210,7 @@ void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst) } CLEARIP(); #ifdef HAVE_TRACE - //MOVx(x3, 15); no access to PC reg + //MOVx(x3, 15); no access to PC reg #endif SMEND(); JALR(x2); // save LR... @@ -276,6 +276,7 @@ void rv64_move32(dynarec_rv64_t* dyn, int ninst, int reg, int32_t val) ADDI(reg, reg, r); } } + void rv64_move64(dynarec_rv64_t* dyn, int ninst, int reg, int64_t val) { if(((val<<(64-12))>>(64-12))==val) { @@ -320,4 +321,21 @@ void rv64_move64(dynarec_rv64_t* dyn, int ninst, int reg, int64_t val) if(s) { SLLI(reg, reg, s); } -} \ No newline at end of file +} + +void emit_pf(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4) +{ + MAYUSE(dyn); MAYUSE(ninst); + // PF: (((emu->x64emu_parity_tab[(res&0xff) / 32] >> ((res&0xff) % 32)) & 1) == 0) + MOV64x(s4, (uintptr_t)GetParityTab()); + SRLI(s3, s1, 3); + ANDI(s3, s3, 28); + ADD(s4, s4, s3); + LW(s4, s4, 0); + NOT(s4, s4); + SRLW(s4, s4, s1); + ANDI(s4, s4, 1); + + BEQZ(s4, 4); + ORI(xFlags, xFlags, 1 << F_PF); +} diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 2add1717..3b347d77 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -62,17 +62,33 @@ // GETGD get x64 register in gd #define GETGD gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3) -//GETED can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI +// GETED can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI #define GETED(D) if(MODREG) { \ ed = xRAX+(nextop&7)+(rex.b<<3); \ wback = 0; \ } else { \ SMREAD() \ addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, D); \ - LD_I12(x1, wback, fixedaddress); \ + LD(x1, wback, fixedaddress); \ ed = x1; \ } - + +// Write back ed in wback (if wback not 0) +#define WBACK if(wback) {SDxw(ed, wback, fixedaddress); SMWRITE();} + +#define IFX(A) if((dyn->insts[ninst].x64.gen_flags&(A))) +#define IFX_PENDOR0 if((dyn->insts[ninst].x64.gen_flags&(X_PEND) || !dyn->insts[ninst].x64.gen_flags)) +#define IFXX(A) if((dyn->insts[ninst].x64.gen_flags==(A))) +#define IFX2X(A, B) if((dyn->insts[ninst].x64.gen_flags==(A) || dyn->insts[ninst].x64.gen_flags==(B) || dyn->insts[ninst].x64.gen_flags==((A)|(B)))) +#define IFXN(A, B) if((dyn->insts[ninst].x64.gen_flags&(A) && !(dyn->insts[ninst].x64.gen_flags&(B)))) + +#define SET_DFNONE(S) if(!dyn->f.dfnone) {MOV_U12(S, d_none); SD(S, xEmu, offsetof(x64emu_t, df)); dyn->f.dfnone=1;} +#define SET_DF(S, N) if((N)!=d_none) {MOV_U12(S, (N)); SD(S, xEmu, offsetof(x64emu_t, df)); dyn->f.dfnone=0;} else SET_DFNONE(S) +#define SET_NODF() dyn->f.dfnone = 0 +#define SET_DFOK() dyn->f.dfnone = 1 + +#define CLEAR_FLAGS() IFX(X_ALL) {ANDI(xFlags, xFlags, ~((1UL<<F_AF) | (1UL<<F_CF) | (1UL<<F_OF) | (1UL<<F_ZF) | (1UL<<F_SF) | (1UL<<F_PF)));} + #ifndef MAYSETFLAGS #define MAYSETFLAGS() #endif @@ -87,10 +103,10 @@ #endif #ifndef JUMP -#define JUMP(A, C) +#define JUMP(A, C) #endif #ifndef BARRIER -#define BARRIER(A) +#define BARRIER(A) #endif #ifndef BARRIER_NEXT #define BARRIER_NEXT(A) @@ -325,7 +341,7 @@ void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst); //void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4, int s5); //void emit_add8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); //void emit_add8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); -//void emit_sub32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); +void emit_sub32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); //void emit_sub32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4, int s5); //void emit_sub8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); //void emit_sub8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5); @@ -382,7 +398,7 @@ void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst); //void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4); //void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4); -//void emit_pf(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4); +void emit_pf(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4); // x87 helper // cache of the local stack counter, to avoid upadte at every call @@ -484,7 +500,7 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni #if STEP < 3 #define MAYUSE(A) (void)A #else -#define MAYUSE(A) +#define MAYUSE(A) #endif -#endif //__DYNAREC_RV64_HELPER_H__ \ No newline at end of file +#endif //__DYNAREC_RV64_HELPER_H__ diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index 6810a484..a5cb0167 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -103,7 +103,7 @@ f28–31 ft8–11 FP temporaries Caller #define R_type(funct7, rs2, rs1, funct3, rd, opcode) ((funct7)<<25 | (rs2)<<20 | (rs1)<<15 | (funct3)<<12 | (rd)<<7 | (opcode)) #define I_type(imm12, rs1, funct3, rd, opcode) ((imm12)<<20 | (rs1)<<15 | (funct3)<<12 | (rd)<<7 | (opcode)) #define S_type(imm12, rs2, rs1, funct3, opcode) (((imm12)>>5)<<25 | (rs2)<<20 | (rs1)<<15 | (funct3)<<12 | ((imm12)&31)<<7 | (opcode)) -#define B_type(imm13, rs2, rs1, funct3, opcode) ((((imm13)>>12)&1)<<31 | (((imm13)>>5)&63)<<25 | (rs)<<20 | (rs1)<<15 | (funct3)<<13 | (((imm13)>>1)&15)<<8 | (((imm13)>>11)&1)<<7 | (opcode)) +#define B_type(imm13, rs2, rs1, funct3, opcode) ((((imm13)>>12)&1)<<31 | (((imm13)>>5)&63)<<25 | (rs2)<<20 | (rs1)<<15 | (funct3)<<13 | (((imm13)>>1)&15)<<8 | (((imm13)>>11)&1)<<7 | (opcode)) #define U_type(imm32, rd, opcode) (((imm32)>>12)<<12 | (rd)<<7 | (opcode)) #define J_type(imm21, rd, opcode) ((((imm21)>>20)&1)<<31 | (((imm21)>>1)&0b1111111111)<<21 | (((imm21)>>11)&1)<<20 | (((imm21)>>12)&0b11111111)<<12 | (rd)<<7 | (opcode)) @@ -142,7 +142,7 @@ f28–31 ft8–11 FP temporaries Caller // rd = rs1 | imm12 #define ORI(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b110, rd, 0b0010011)) // rd = rs1 & imm12 -#define ANDI(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b101, rd, 0b0010011)) +#define ANDI(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b111, rd, 0b0010011)) // rd = imm12 #define MOV_U12(rd, imm12) ADDI(rd, xZR, imm12) @@ -153,6 +153,8 @@ f28–31 ft8–11 FP temporaries Caller #define ADD(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b000, rd, 0b0110011)) // rd = rs1 - rs2 #define SUB(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b000, rd, 0b0110011)) +// rd = rs1 - rs2 +#define SUBxw(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b000, rd, rex.w?0b0110011:0b0111011)) // rd = rs1<<rs2 #define SLL(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b001, rd, 0b0110011)) // rd = (rs1<rs2)?1:0 @@ -178,6 +180,18 @@ f28–31 ft8–11 FP temporaries Caller #define NOT(rd, rs1) XORI(rd, rs1, -1) // rd = -rs1 #define NEG(rd, rs1) SUB(rd, xZR, rs1) +// rd = rs1 == 0 +#define SEQZ(rd, rs1) SLTIU(rd, rs1, 0) + +#define BEQ(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b000, 0b1100011)) +#define BNE(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b001, 0b1100011)) +#define BLT(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b100, 0b1100011)) +#define BGE(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b101, 0b1100011)) +#define BLTU(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b110, 0b1100011)) +#define BGEU(rs1, rs2, imm13) EMIT(B_type(imm13, rs2, rs1, 0b111, 0b1100011)) + +#define BEQZ(rs1, imm13) BEQ(rs1, 0, imm13) +#define BNEZ(rs1, imm13) BNE(rs1, 0, imm13) // rd = 4-bytes[rs1+imm12] signed extended #define LW(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b010, rd, 0b0000011)) @@ -219,5 +233,11 @@ f28–31 ft8–11 FP temporaries Caller // Shift Right Aritmetic Immediate #define SRAI(rd, rs1, imm6) EMIT(I_type((imm6)|(0b010000<<6), rs1, 0b101, rd, 0b0010011)) +// rd = rs1<<rs2 +#define SLLW(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b001, rd, 0b0111011)) +// rd = rs1>>rs2 logical +#define SRLW(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b101, rd, 0b0111011)) +// rd = rs1>>rs2 aritmetic +#define SRAW(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b101, rd, 0b0111011)) -#endif //__RV64_EMITTER_H__ \ No newline at end of file +#endif //__RV64_EMITTER_H__ |