diff options
| author | xctan <xctan@cirno.icu> | 2023-04-28 23:20:09 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-04-28 17:20:09 +0200 |
| commit | 69529708952abd1786dff2dfa6910a159ce8802c (patch) | |
| tree | b25921b54c237ab269a6f0d245091606103fcf0c /src | |
| parent | 14e20e643515d4e7171f54e6d2fb78cf784f7d10 (diff) | |
| download | box64-69529708952abd1786dff2dfa6910a159ce8802c.tar.gz box64-69529708952abd1786dff2dfa6910a159ce8802c.zip | |
[RV64_DYNAREC] Added more opcodes (#747)
* [RV64_DYNAREC] Added 0F 52 RSQRTPS opcode
* [RV64_DYNAREC] Added C0 /1 ROR opcode
* [RV64_DYNAREC] Added 66 0F 3A 20 PINSRB opcode
* [RV64_DYNAREC] Added AA STOSB opcode
* [RV64_DYNAREC] Added 66 F0 {81,83} LOCK ADD opcodeDiffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00_2.c | 25 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00_3.c | 10 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 33 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_66.c | 4 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f.c | 8 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_66f0.c | 133 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 2 |
7 files changed, 214 insertions, 1 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00_2.c b/src/dynarec/rv64/dynarec_rv64_00_2.c index 93dda7ef..f2fd1287 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_2.c +++ b/src/dynarec/rv64/dynarec_rv64_00_2.c @@ -631,6 +631,31 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int MOV64xw(x2, i64); emit_test32(dyn, ninst, rex, xRAX, x2, x3, x4, x5); break; + case 0xAA: + if(rep) { + INST_NAME("REP STOSB"); + CBZ_NEXT(xRCX); + ANDI(x1, xFlags, 1<<F_DF); + BNEZ_MARK2(x1); + MARK; // Part with DF==0 + SB(xRAX, xRDI, 0); + ADDI(xRDI, xRDI, 1); + ADDI(xRCX, xRCX, -1); + BNEZ_MARK(xRCX); + B_NEXT_nocond; + MARK2; // Part with DF==1 + SB(xRAX, xRDI, 0); + ADDI(xRDI, xRDI, -1); + ADDI(xRCX, xRCX, -1); + BNEZ_MARK2(xRCX); + // done + } else { + INST_NAME("STOSB"); + GETDIR(x3, x1, 1); + SB(xRAX, xRDI, 0); + ADD(xRDI, xRDI, x3); + } + break; case 0xAB: if(rep) { INST_NAME("REP STOSD"); diff --git a/src/dynarec/rv64/dynarec_rv64_00_3.c b/src/dynarec/rv64/dynarec_rv64_00_3.c index e9a3d0e0..19d6815e 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_3.c +++ b/src/dynarec/rv64/dynarec_rv64_00_3.c @@ -66,6 +66,16 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int CALL_(rol8, ed, x3); EBBACK(x5, 0); break; + case 1: + INST_NAME("ROR Eb, Ib"); + MESSAGE(LOG_DUMP, "Need Optimization\n"); + SETFLAGS(X_OF|X_CF, SF_SET); + GETEB(x1, 1); + u8 = F8; + MOV32w(x2, u8); + CALL_(ror8, ed, x3); + EBBACK(x5, 0); + break; case 4: case 6: INST_NAME("SHL Eb, Ib"); diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index be6ec046..3e320614 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -354,6 +354,39 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FSW(d0, gback, 4*i); } break; + case 0x52: + INST_NAME("RSQRTPS Gx, Ex"); + nextop = F8; + GETGX(x1); + GETEX(x2, 0); + s0 = fpu_get_scratch(dyn); + s1 = fpu_get_scratch(dyn); // 1.0f + v0 = fpu_get_scratch(dyn); // 0.0f + // do accurate computation, because riscv doesn't have rsqrt + MOV32w(x3, 1); + FCVTSW(s1, x3, RD_DYN); + if (!box64_dynarec_fastnan) { + FCVTSW(v0, xZR, RD_DYN); + } + for(int i=0; i<4; ++i) { + FLW(s0, wback, fixedaddress+i*4); + if (!box64_dynarec_fastnan) { + FLES(x3, v0, s0); // s0 >= 0.0f? + BNEZ(x3, 6*4); + FEQS(x3, s0, s0); // isnan(s0)? + BEQZ(x3, 2*4); + // s0 is negative, so generate a NaN + FDIVS(s0, s1, v0); + // s0 is a NaN, just copy it + FSW(s0, gback, i*4); + J(4*4); + // do regular computation + } + FSQRTS(s0, s0); + FDIVS(s0, s1, s0); + FSW(s0, gback, i*4); + } + break; case 0x53: INST_NAME("RCPPS Gx, Ex"); nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_66.c b/src/dynarec/rv64/dynarec_rv64_66.c index 7bc996e9..24584465 100644 --- a/src/dynarec/rv64/dynarec_rv64_66.c +++ b/src/dynarec/rv64/dynarec_rv64_66.c @@ -716,6 +716,10 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni DEFAULT; } break; + + case 0xF0: + return dynarec64_66F0(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); + case 0xF7: nextop = F8; switch((nextop>>3)&7) { diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index 19c12bfd..ee89c010 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -469,6 +469,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SW(ed, gback, 4*(u8&0x3)); } break; + case 0x20: + INST_NAME("PINSRB Gx, ED, Ib"); + nextop = F8; + GETGX(x3); + GETED(1); + u8 = F8; + SB(ed, x3, u8&0xF); + break; default: DEFAULT; } diff --git a/src/dynarec/rv64/dynarec_rv64_66f0.c b/src/dynarec/rv64/dynarec_rv64_66f0.c new file mode 100644 index 00000000..b1df0489 --- /dev/null +++ b/src/dynarec/rv64/dynarec_rv64_66f0.c @@ -0,0 +1,133 @@ +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <pthread.h> +#include <errno.h> + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" + +#include "rv64_printer.h" +#include "dynarec_rv64_private.h" +#include "dynarec_rv64_helper.h" +#include "dynarec_rv64_functions.h" + + +uintptr_t dynarec64_66F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) +{ + (void)ip; (void)rep; (void)need_epilog; + + uint8_t opcode = F8; + uint8_t nextop; + uint8_t gd, ed, u8; + uint8_t wback, wb1, wb2, gb1, gb2; + int32_t i32; + int64_t i64, j64; + int64_t fixedaddress; + int unscaled; + MAYUSE(gb1); + MAYUSE(gb2); + MAYUSE(wb1); + MAYUSE(wb2); + MAYUSE(j64); + + while((opcode==0xF2) || (opcode==0xF3)) { + rep = opcode-0xF1; + opcode = F8; + } + // REX prefix before the F0/66 are ignored + rex.rex = 0; + while(opcode>=0x40 && opcode<=0x4f) { + rex.rex = opcode; + opcode = F8; + } + + switch(opcode) { + case 0x81: + case 0x83: + nextop = F8; + SMDMB(); + switch((nextop>>3)&7) { + case 0: //ADD + if(opcode==0x81) { + INST_NAME("LOCK ADD Ew, Iw"); + } else { + INST_NAME("LOCK ADD Ew, Ib"); + } + SETFLAGS(X_ALL, SF_SET_PENDING); + if(MODREG) { + if(opcode==0x81) i32 = F16S; else i32 = F8S; + ed = xRAX+(nextop&7)+(rex.b<<3); + MOV32w(x5, i32); + SLLI(x6, ed, 48); + SRLI(x6, x6, 48); + emit_add16(dyn, ninst, x6, x5, x3, x4, x2); + SRLI(ed, ed, 16); + SLLI(ed, ed, 16); + OR(ed, ed, x6); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, (opcode==0x81)?2:1); + if(opcode==0x81) i32 = F16S; else i32 = F8S; + MOV32w(x5, i32); + + ANDI(x3, wback, 0b10); + BNEZ_MARK(x3); + + // lower 16 bits + MARKLOCK; + LR_W(x1, wback, 1, 1); + SRLIW(x3, x1, 16); + SLLIW(x3, x3, 16); + ADD(x4, x1, x5); + SLLIW(x4, x4, 16); + SRLIW(x4, x4, 16); + OR(x4, x4, x3); + SC_W(x3, x4, wback, 1, 1); + BNEZ_MARKLOCK(x3); + IFX(X_ALL|X_PEND) { + SLLIW(x1, x1, 16); + SRLIW(x1, x1, 16); + } + B_MARK3_nocond; + + MARK; + // upper 16 bits + XORI(wback, wback, 0b10); + MARK2; + LR_W(x1, wback, 1, 1); + SLLIW(x3, x1, 16); + SRLIW(x3, x3, 16); + SRLIW(x1, x1, 16); + ADD(x4, x1, x5); + SLLIW(x4, x4, 16); + OR(x4, x4, x3); + SC_W(x3, x4, wback, 1, 1); + BNEZ_MARK2(x3); + + MARK3; + // final + IFX(X_ALL|X_PEND) { + emit_add16(dyn, ninst, x1, x5, x3, x4, x6); + } + } + break; + default: + DEFAULT; + } + SMDMB(); + break; + + default: + DEFAULT; + } +} \ No newline at end of file diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index f4fde7dd..d7f52df7 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -1097,7 +1097,7 @@ uintptr_t dynarec64_DF(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog); //uintptr_t dynarec64_6664(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int seg, int* ok, int* need_epilog); -//uintptr_t dynarec64_66F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +uintptr_t dynarec64_66F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog); uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog); |