From a338e3fae83b5afa3e087da66d7d7f7ac7ced19f Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Thu, 4 May 2023 18:02:50 +0800 Subject: [RV64_DYNAREC] Added more opcodes for Unciv (#757) * Use dynarec64_67 * Added 66 0F 3A 44 PCLMULQDQ opcode * Added 67 8D LEA opcode * Added DF /5 FILD opcode * Added D8 FCOMP opcode * Added D8 FSUB opcode * Added F0 0F B0 CMPXCHG opcode * [ARM64_DYNAREC] Fixed a typo in CMPXCHG opcode --- src/dynarec/arm64/dynarec_arm64_f0.c | 2 +- src/dynarec/rv64/dynarec_rv64_00_1.c | 4 +- src/dynarec/rv64/dynarec_rv64_660f.c | 42 +++++++++++---- src/dynarec/rv64/dynarec_rv64_67.c | 14 ++++- src/dynarec/rv64/dynarec_rv64_d8.c | 56 +++++++++++++++++++- src/dynarec/rv64/dynarec_rv64_df.c | 7 +++ src/dynarec/rv64/dynarec_rv64_f0.c | 95 ++++++++++++++++++++++++++++++++++ src/dynarec/rv64/dynarec_rv64_helper.h | 2 +- 8 files changed, 206 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/dynarec/arm64/dynarec_arm64_f0.c b/src/dynarec/arm64/dynarec_arm64_f0.c index 85c17edc..d9c17dbc 100644 --- a/src/dynarec/arm64/dynarec_arm64_f0.c +++ b/src/dynarec/arm64/dynarec_arm64_f0.c @@ -247,7 +247,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin CBNZx_MARKLOCK(x4); // done MARK; - UFLAG_IF {emit_cmp32(dyn, ninst, rex, x6, x2, x3, x4, x5);} + UFLAG_IF {emit_cmp8(dyn, ninst, x6, x2, x3, x4, x5);} BFIx(xRAX, x2, 0, 8); } SMDMB(); diff --git a/src/dynarec/rv64/dynarec_rv64_00_1.c b/src/dynarec/rv64/dynarec_rv64_00_1.c index 3abb0444..875f721c 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_1.c +++ b/src/dynarec/rv64/dynarec_rv64_00_1.c @@ -114,7 +114,9 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x66: addr = dynarec64_66(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); break; - + case 0x67: + addr = dynarec64_67(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); + break; case 0x68: INST_NAME("PUSH Id"); i64 = F32S; diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index 7ed29be5..08303f44 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -458,6 +458,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SMWRITE2(); } break; + case 0x20: + INST_NAME("PINSRB Gx, ED, Ib"); + nextop = F8; + GETGX(x3); + GETED(1); + u8 = F8; + SB(ed, x3, u8&0xF); + break; case 0x22: INST_NAME("PINSRD Gx, ED, Ib"); nextop = F8; @@ -470,15 +478,29 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SW(ed, gback, 4*(u8&0x3)); } break; - case 0x20: - INST_NAME("PINSRB Gx, ED, Ib"); + case 0x44: + INST_NAME("PCLMULQDQ Gx, Ex, Ib"); nextop = F8; - GETGX(x3); - GETED(1); + GETG; + sse_forget_reg(dyn, ninst, gd); + MOV32w(x1, gd); // gx + if(MODREG) { + ed = (nextop&7)+(rex.b<<3); + sse_forget_reg(dyn, ninst, ed); + MOV32w(x2, ed); + MOV32w(x3, 0); // p = NULL + } else { + MOV32w(x2, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x3, x5, &fixedaddress, rex, NULL, 0, 1); + if(ed!=x3) { + MV(x3, ed); + } + } u8 = F8; - SB(ed, x3, u8&0xF); + MOV32w(x4, u8); + CALL(native_pclmul, -1); break; - default: + default: DEFAULT; } break; @@ -939,7 +961,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int v1 = sse_get_reg(dyn, ninst, x2, (nextop&7)+(rex.b<<3), 0); FSD(v1, gback, 8); } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 0, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); LD(x3, ed, fixedaddress+0); SD(x3, gback, 8); } @@ -1278,7 +1300,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ed = xRAX + (nextop&7) + (rex.b<<3); LD(ed, x1, 0); } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 0, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); LD(x3, x1, 0); SD(x3, ed, fixedaddress); SMWRITE2(); @@ -1288,7 +1310,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ed = xRAX + (nextop&7) + (rex.b<<3); LWU(ed, x1, 0); } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 0, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); LWU(x3, x1, 0); SW(x3, ed, fixedaddress); SMWRITE2(); @@ -1335,7 +1357,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SRAI(x1, x1, 56); } else { SMREAD(); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x4, &fixedaddress, rex, NULL, 0, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x4, &fixedaddress, rex, NULL, 1, 0); LB(x1, ed, fixedaddress); } LUI(x5, 0xffff0); diff --git a/src/dynarec/rv64/dynarec_rv64_67.c b/src/dynarec/rv64/dynarec_rv64_67.c index 5ac5cbb9..71d7d887 100644 --- a/src/dynarec/rv64/dynarec_rv64_67.c +++ b/src/dynarec/rv64/dynarec_rv64_67.c @@ -483,7 +483,19 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SMWRITELOCK(lock); } break; - + case 0x8D: + INST_NAME("LEA Gd, Ed"); + nextop=F8; + GETGD; + if(MODREG) { // reg <= reg? that's an invalid operation + DEFAULT; + } else { // mem <= reg + addr = geted(dyn, addr, ninst, nextop, &ed, gd, x1, &fixedaddress, rex, NULL, 0, 0); + if(ed!=gd) { + AND(gd, ed, xMASK); + } + } + break; default: DEFAULT; } diff --git a/src/dynarec/rv64/dynarec_rv64_d8.c b/src/dynarec/rv64/dynarec_rv64_d8.c index beadb202..3a66bba4 100644 --- a/src/dynarec/rv64/dynarec_rv64_d8.c +++ b/src/dynarec/rv64/dynarec_rv64_d8.c @@ -50,9 +50,61 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0xD0 ... 0xD7: case 0xD8 ... 0xDF: - + INST_NAME("FCOMP ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + LHU(x3, xEmu, offsetof(x64emu_t, sw)); + MOV32w(x1, 0b1110100011111111); // mask off c0,c1,c2,c3 + AND(x3, x3, x1); + if(ST_IS_F(0)) { + FEQS(x5, v1, v1); + FEQS(x4, v2, v2); + AND(x5, x5, x4); + BEQZ(x5, 24); // undefined/NaN + FEQS(x5, v1, v2); + BNEZ(x5, 24); // equal + FLTS(x3, v1, v2); // x3 = (v1>2)*8; + wback = xRAX+(wback&3); + } + if (wb2) { + MV(x2, wback); + SRLI(x2, x2, wb2); + ANDI(x2, x2, 0xff); + } else { + ANDI(x2, wback, 0xff); + } + wb1 = 0; + ed = x2; + UFLAG_IF { + emit_cmp8(dyn, ninst, x6, ed, x3, x4, x5, x1); + } + BNE_MARK2(x6, x2); + if (wb2) { + MV(wback, x2); + SRLI(wback, wback, wb2); + ANDI(wback, wback, 0xff); + } else { + ANDI(wback, x2, 0xff); + } + GETGB(x1); + MV(ed, gd); + MARK2; + ANDI(xRAX, xRAX, ~0xff); + OR(xRAX, xRAX, x2); + B_NEXT_nocond; + } else { + // this one is tricky, and did some repetitive work. + // mostly because we only got 6 scratch registers, + // and has so much to do. + if(rex.rex) { + gb1 = xRAX+((nextop&0x38)>>3)+(rex.r<<3); + gb2 = 0; + } else { + gd = (nextop&0x38)>>3; + gb2 = ((gd&4)>>2); + gb1 = xRAX+(gd&3); + } + addr = geted(dyn, addr, ninst, nextop, &wback, x3, x2, &fixedaddress, rex, LOCK_LOCK, 0, 0); + MARKLOCK; + ANDI(x2, wback, ~0b11); // align to 32bit + ANDI(x5, wback, 0b11); + SLLI(x5, x5, 3); // shamt + LWU(x1, x2, 0); + LR_W(x4, x2, 1, 1); + SRL(x4, x4, x5); + ANDI(x4, x4, 0xff); + BNE_MARK(x6, x4); // compare AL with m8 + // AL == m8, r8 is loaded into m8 + ADDI(x2, xZR, 0xff); + SLL(x2, x2, x5); + NOT(x2, x2); + AND(x2, x1, x2); + if (gb2) { + MV(x1, gb1); + SRLI(x1, x1, 8); + ANDI(x1, x1, 0xff); + } else { + ANDI(x1, gb1, 0xff); + } + SLL(x1, x1, x5); + OR(x1, x1, x2); + ANDI(x2, wback, ~0b11); // align to 32bit again + SC_W(x5, x1, x2, 1, 1); + BNEZ_MARKLOCK(x5); + // done + MARK; + UFLAG_IF {emit_cmp8(dyn, ninst, x6, x4, x1, x2, x3, x5);} + // load m8 into AL + ANDI(xRAX, xRAX, ~0xff); + OR(xRAX, xRAX, x4); + } + SMDMB(); + break; + default: + DEFAULT; + } + break; case 0xB1: switch (rep) { case 0: diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 7d6109aa..d8d44460 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -1126,7 +1126,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int seg, int* ok, int* need_epilog); //uintptr_t dynarec64_65(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep,int* ok, int* need_epilog); uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); -//uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); //uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); -- cgit 1.4.1