diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2023-05-04 18:02:50 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-05-04 12:02:50 +0200 |
| commit | a338e3fae83b5afa3e087da66d7d7f7ac7ced19f (patch) | |
| tree | 8a31fc6d1d28dc02ce6b6674a7b751e7b895e509 | |
| parent | df59bf881157c823c2c7c089374c524859ad7118 (diff) | |
| download | box64-a338e3fae83b5afa3e087da66d7d7f7ac7ced19f.tar.gz box64-a338e3fae83b5afa3e087da66d7d7f7ac7ced19f.zip | |
[RV64_DYNAREC] Added more opcodes for Unciv (#757)
* Use dynarec64_67 * Added 66 0F 3A 44 PCLMULQDQ opcode * Added 67 8D LEA opcode * Added DF /5 FILD opcode * Added D8 FCOMP opcode * Added D8 FSUB opcode * Added F0 0F B0 CMPXCHG opcode * [ARM64_DYNAREC] Fixed a typo in CMPXCHG opcode
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_f0.c | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00_1.c | 4 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f.c | 42 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_67.c | 14 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_d8.c | 56 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_df.c | 7 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f0.c | 95 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 2 |
8 files changed, 206 insertions, 16 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_f0.c b/src/dynarec/arm64/dynarec_arm64_f0.c index 85c17edc..d9c17dbc 100644 --- a/src/dynarec/arm64/dynarec_arm64_f0.c +++ b/src/dynarec/arm64/dynarec_arm64_f0.c @@ -247,7 +247,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin CBNZx_MARKLOCK(x4); // done MARK; - UFLAG_IF {emit_cmp32(dyn, ninst, rex, x6, x2, x3, x4, x5);} + UFLAG_IF {emit_cmp8(dyn, ninst, x6, x2, x3, x4, x5);} BFIx(xRAX, x2, 0, 8); } SMDMB(); diff --git a/src/dynarec/rv64/dynarec_rv64_00_1.c b/src/dynarec/rv64/dynarec_rv64_00_1.c index 3abb0444..875f721c 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_1.c +++ b/src/dynarec/rv64/dynarec_rv64_00_1.c @@ -114,7 +114,9 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x66: addr = dynarec64_66(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); break; - + case 0x67: + addr = dynarec64_67(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); + break; case 0x68: INST_NAME("PUSH Id"); i64 = F32S; diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index 7ed29be5..08303f44 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -458,6 +458,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SMWRITE2(); } break; + case 0x20: + INST_NAME("PINSRB Gx, ED, Ib"); + nextop = F8; + GETGX(x3); + GETED(1); + u8 = F8; + SB(ed, x3, u8&0xF); + break; case 0x22: INST_NAME("PINSRD Gx, ED, Ib"); nextop = F8; @@ -470,15 +478,29 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SW(ed, gback, 4*(u8&0x3)); } break; - case 0x20: - INST_NAME("PINSRB Gx, ED, Ib"); + case 0x44: + INST_NAME("PCLMULQDQ Gx, Ex, Ib"); nextop = F8; - GETGX(x3); - GETED(1); + GETG; + sse_forget_reg(dyn, ninst, gd); + MOV32w(x1, gd); // gx + if(MODREG) { + ed = (nextop&7)+(rex.b<<3); + sse_forget_reg(dyn, ninst, ed); + MOV32w(x2, ed); + MOV32w(x3, 0); // p = NULL + } else { + MOV32w(x2, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x3, x5, &fixedaddress, rex, NULL, 0, 1); + if(ed!=x3) { + MV(x3, ed); + } + } u8 = F8; - SB(ed, x3, u8&0xF); + MOV32w(x4, u8); + CALL(native_pclmul, -1); break; - default: + default: DEFAULT; } break; @@ -939,7 +961,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int v1 = sse_get_reg(dyn, ninst, x2, (nextop&7)+(rex.b<<3), 0); FSD(v1, gback, 8); } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 0, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); LD(x3, ed, fixedaddress+0); SD(x3, gback, 8); } @@ -1278,7 +1300,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ed = xRAX + (nextop&7) + (rex.b<<3); LD(ed, x1, 0); } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 0, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); LD(x3, x1, 0); SD(x3, ed, fixedaddress); SMWRITE2(); @@ -1288,7 +1310,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ed = xRAX + (nextop&7) + (rex.b<<3); LWU(ed, x1, 0); } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 0, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); LWU(x3, x1, 0); SW(x3, ed, fixedaddress); SMWRITE2(); @@ -1335,7 +1357,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SRAI(x1, x1, 56); } else { SMREAD(); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x4, &fixedaddress, rex, NULL, 0, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x4, &fixedaddress, rex, NULL, 1, 0); LB(x1, ed, fixedaddress); } LUI(x5, 0xffff0); diff --git a/src/dynarec/rv64/dynarec_rv64_67.c b/src/dynarec/rv64/dynarec_rv64_67.c index 5ac5cbb9..71d7d887 100644 --- a/src/dynarec/rv64/dynarec_rv64_67.c +++ b/src/dynarec/rv64/dynarec_rv64_67.c @@ -483,7 +483,19 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SMWRITELOCK(lock); } break; - + case 0x8D: + INST_NAME("LEA Gd, Ed"); + nextop=F8; + GETGD; + if(MODREG) { // reg <= reg? that's an invalid operation + DEFAULT; + } else { // mem <= reg + addr = geted(dyn, addr, ninst, nextop, &ed, gd, x1, &fixedaddress, rex, NULL, 0, 0); + if(ed!=gd) { + AND(gd, ed, xMASK); + } + } + break; default: DEFAULT; } diff --git a/src/dynarec/rv64/dynarec_rv64_d8.c b/src/dynarec/rv64/dynarec_rv64_d8.c index beadb202..3a66bba4 100644 --- a/src/dynarec/rv64/dynarec_rv64_d8.c +++ b/src/dynarec/rv64/dynarec_rv64_d8.c @@ -50,9 +50,61 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0xD0 ... 0xD7: case 0xD8 ... 0xDF: - + INST_NAME("FCOMP ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + LHU(x3, xEmu, offsetof(x64emu_t, sw)); + MOV32w(x1, 0b1110100011111111); // mask off c0,c1,c2,c3 + AND(x3, x3, x1); + if(ST_IS_F(0)) { + FEQS(x5, v1, v1); + FEQS(x4, v2, v2); + AND(x5, x5, x4); + BEQZ(x5, 24); // undefined/NaN + FEQS(x5, v1, v2); + BNEZ(x5, 24); // equal + FLTS(x3, v1, v2); // x3 = (v1<v2)?1:0 + SLLI(x1, x3, 8); + J(20); // end + // undefined/NaN + LUI(x1, 1); + ADDI(x1, x1, 0b010100000000); + J(8); // end + // equal + LUI(x1, 1); + // end + } else { + FEQD(x5, v1, v1); + FEQD(x4, v2, v2); + AND(x5, x5, x4); + BEQZ(x5, 24); // undefined/NaN + FEQD(x5, v1, v2); + BNEZ(x5, 24); // equal + FLTD(x3, v1, v2); // x3 = (v1<v2)?1:0 + SLLI(x1, x3, 8); + J(20); // end + // undefined/NaN + LUI(x1, 1); + ADDI(x1, x1, 0b010100000000); + J(8); // end + // equal + LUI(x1, 1); + // end + } + OR(x3, x3, x1); + SH(x3, xEmu, offsetof(x64emu_t, sw)); + x87_do_pop(dyn, ninst, x3); + break; case 0xE0 ... 0xE7: - + INST_NAME("FSUB ST0, STx"); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); + v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(ST_IS_F(0)) { + FSUBS(v1, v1, v2); + } else { + FSUBD(v1, v1, v2); + } + break; case 0xE8 ... 0xEF: case 0xF0 ... 0xF7: diff --git a/src/dynarec/rv64/dynarec_rv64_df.c b/src/dynarec/rv64/dynarec_rv64_df.c index aa5f1131..38fab20b 100644 --- a/src/dynarec/rv64/dynarec_rv64_df.c +++ b/src/dynarec/rv64/dynarec_rv64_df.c @@ -172,6 +172,13 @@ uintptr_t dynarec64_DF(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SH(x4, wback, fixedaddress); x87_do_pop(dyn, ninst, x3); break; + case 5: + INST_NAME("FILD ST0, i64"); + v1 = x87_do_push(dyn, ninst, x1, EXT_CACHE_ST_D); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0); + LD(x1, wback, fixedaddress); + FCVTDL(v1, x1, RD_RTZ); + break; case 7: INST_NAME("FISTP i64, ST0"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); diff --git a/src/dynarec/rv64/dynarec_rv64_f0.c b/src/dynarec/rv64/dynarec_rv64_f0.c index 3ccaafa4..617b8df1 100644 --- a/src/dynarec/rv64/dynarec_rv64_f0.c +++ b/src/dynarec/rv64/dynarec_rv64_f0.c @@ -104,6 +104,101 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x0F: nextop = F8; switch(nextop) { + case 0xB0: + switch(rep) { + case 0: + INST_NAME("LOCK CMPXCHG Eb, Gb"); + SETFLAGS(X_ALL, SF_SET_PENDING); + nextop = F8; + ANDI(x6, xRAX, 0xff); // AL + SMDMB(); + if(MODREG) { + if(rex.rex) { + wback = xRAX+(nextop&7)+(rex.b<<3); + wb2 = 0; + } else { + wback = (nextop&7); + wb2 = (wback>>2)*8; + wback = xRAX+(wback&3); + } + if (wb2) { + MV(x2, wback); + SRLI(x2, x2, wb2); + ANDI(x2, x2, 0xff); + } else { + ANDI(x2, wback, 0xff); + } + wb1 = 0; + ed = x2; + UFLAG_IF { + emit_cmp8(dyn, ninst, x6, ed, x3, x4, x5, x1); + } + BNE_MARK2(x6, x2); + if (wb2) { + MV(wback, x2); + SRLI(wback, wback, wb2); + ANDI(wback, wback, 0xff); + } else { + ANDI(wback, x2, 0xff); + } + GETGB(x1); + MV(ed, gd); + MARK2; + ANDI(xRAX, xRAX, ~0xff); + OR(xRAX, xRAX, x2); + B_NEXT_nocond; + } else { + // this one is tricky, and did some repetitive work. + // mostly because we only got 6 scratch registers, + // and has so much to do. + if(rex.rex) { + gb1 = xRAX+((nextop&0x38)>>3)+(rex.r<<3); + gb2 = 0; + } else { + gd = (nextop&0x38)>>3; + gb2 = ((gd&4)>>2); + gb1 = xRAX+(gd&3); + } + addr = geted(dyn, addr, ninst, nextop, &wback, x3, x2, &fixedaddress, rex, LOCK_LOCK, 0, 0); + MARKLOCK; + ANDI(x2, wback, ~0b11); // align to 32bit + ANDI(x5, wback, 0b11); + SLLI(x5, x5, 3); // shamt + LWU(x1, x2, 0); + LR_W(x4, x2, 1, 1); + SRL(x4, x4, x5); + ANDI(x4, x4, 0xff); + BNE_MARK(x6, x4); // compare AL with m8 + // AL == m8, r8 is loaded into m8 + ADDI(x2, xZR, 0xff); + SLL(x2, x2, x5); + NOT(x2, x2); + AND(x2, x1, x2); + if (gb2) { + MV(x1, gb1); + SRLI(x1, x1, 8); + ANDI(x1, x1, 0xff); + } else { + ANDI(x1, gb1, 0xff); + } + SLL(x1, x1, x5); + OR(x1, x1, x2); + ANDI(x2, wback, ~0b11); // align to 32bit again + SC_W(x5, x1, x2, 1, 1); + BNEZ_MARKLOCK(x5); + // done + MARK; + UFLAG_IF {emit_cmp8(dyn, ninst, x6, x4, x1, x2, x3, x5);} + // load m8 into AL + ANDI(xRAX, xRAX, ~0xff); + OR(xRAX, xRAX, x4); + } + SMDMB(); + break; + default: + DEFAULT; + } + break; case 0xB1: switch (rep) { case 0: diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 7d6109aa..d8d44460 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -1126,7 +1126,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int seg, int* ok, int* need_epilog); //uintptr_t dynarec64_65(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep,int* ok, int* need_epilog); uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); -//uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); //uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); |