From 42dc36ad5af78da0e7d255df7662e9148cdfdde2 Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Mon, 26 Jun 2023 02:32:24 +0800 Subject: [32BTIS][DYNAREC_RV64] Added support for 32bits (#861) * [32BTIS][DYNAREC_RV64] Added support for 32bits * Fixed 32bit INC/DEC * Fixed a typo * Some more fixes * Fixed geted_32 * POP -> POP1, PUSH -> PUSH1 --- src/dynarec/arm64/dynarec_arm64_64.c | 24 ++-- src/dynarec/arm64/dynarec_arm64_66.c | 18 +-- src/dynarec/arm64/dynarec_arm64_6664.c | 8 +- src/dynarec/arm64/dynarec_arm64_66f0.c | 15 +-- src/dynarec/arm64/dynarec_arm64_67.c | 79 ++++++------- src/dynarec/arm64/dynarec_arm64_f0.c | 45 ++++---- src/dynarec/arm64/dynarec_arm64_helper.h | 14 ++- src/dynarec/rv64/dynarec_rv64_00.c | 6 - src/dynarec/rv64/dynarec_rv64_00_1.c | 114 ++++++++++++++----- src/dynarec/rv64/dynarec_rv64_00_2.c | 58 ++++++---- src/dynarec/rv64/dynarec_rv64_00_3.c | 57 ++++++---- src/dynarec/rv64/dynarec_rv64_64.c | 37 +++++- src/dynarec/rv64/dynarec_rv64_66.c | 50 ++++++-- src/dynarec/rv64/dynarec_rv64_6664.c | 7 +- src/dynarec/rv64/dynarec_rv64_66f0.c | 8 +- src/dynarec/rv64/dynarec_rv64_67.c | 12 +- src/dynarec/rv64/dynarec_rv64_f0.c | 22 ++-- src/dynarec/rv64/dynarec_rv64_functions.c | 16 +-- src/dynarec/rv64/dynarec_rv64_functions.h | 2 +- src/dynarec/rv64/dynarec_rv64_helper.c | 183 ++++++++++++++++++++++++++---- src/dynarec/rv64/dynarec_rv64_helper.h | 21 +++- src/dynarec/rv64/dynarec_rv64_pass3.h | 6 +- src/dynarec/rv64/rv64_emitter.h | 26 ++++- 23 files changed, 545 insertions(+), 283 deletions(-) (limited to 'src') diff --git a/src/dynarec/arm64/dynarec_arm64_64.c b/src/dynarec/arm64/dynarec_arm64_64.c index 723b4460..540ab50e 100644 --- a/src/dynarec/arm64/dynarec_arm64_64.c +++ b/src/dynarec/arm64/dynarec_arm64_64.c @@ -54,13 +54,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin rep = opcode-0xF1; opcode = F8; } - // REX prefix before the F0 are ignored - rex.rex = 0; - if(!rex.is32bits) - while(opcode>=0x40 && opcode<=0x4f) { - rex.rex = opcode; - opcode = F8; - } + GETREX(); switch(opcode) { @@ -293,7 +287,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GETEDO(x4, 0); emit_xor32(dyn, ninst, rex, gd, ed, x3, x4); break; - + case 0x39: INST_NAME("CMP Seg:Ed, Gd"); SETFLAGS(X_ALL, SF_SET_PENDING); @@ -317,7 +311,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0x63: if(rex.is32bits) { // ARPL here - DEFAULT; + DEFAULT; } else { INST_NAME("MOVSXD Gd, Ed"); nextop = F8; @@ -646,7 +640,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin eb2 = (ed&4)>>2; // L or H } else { eb1 = xRAX+(nextop&7)+(rex.b<<3); - eb2 = 0; + eb2 = 0; } MOV32w(x3, u8); BFIx(eb1, x3, eb2*8, 8); @@ -878,7 +872,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; } break; - + case 0xF7: nextop = F8; grab_segdata(dyn, addr, ninst, x6, seg); @@ -959,8 +953,8 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOVw_REG(xRDX, x4); } else { if(ninst - && dyn->insts[ninst-1].x64.addr - && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x31 + && dyn->insts[ninst-1].x64.addr + && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x31 && *(uint8_t*)(dyn->insts[ninst-1].x64.addr+1)==0xD2) { SET_DFNONE(x2); GETEDO(x6, 0); @@ -996,7 +990,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOVw_REG(xRDX, x4); } else { if(ninst && dyn->insts - && dyn->insts[ninst-1].x64.addr + && dyn->insts[ninst-1].x64.addr && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x48 && *(uint8_t*)(dyn->insts[ninst-1].x64.addr+1)==0x99) { SET_DFNONE(x2) @@ -1022,7 +1016,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; } break; - + case 0xFF: nextop = F8; grab_segdata(dyn, addr, ninst, x6, seg); diff --git a/src/dynarec/arm64/dynarec_arm64_66.c b/src/dynarec/arm64/dynarec_arm64_66.c index c6d8420a..f2f0a0a1 100755 --- a/src/dynarec/arm64/dynarec_arm64_66.c +++ b/src/dynarec/arm64/dynarec_arm64_66.c @@ -49,13 +49,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin rep = opcode-0xF1; opcode = F8; } - // REX prefix before the 66 are ignored - rex.rex = 0; - if(!rex.is32bits) - while(opcode>=0x40 && opcode<=0x4f) { - rex.rex = opcode; - opcode = F8; - } + GETREX(); if(rex.w && !(opcode==0x0f || opcode==0xf0 || opcode==0x64 || opcode==0x65)) // rex.w cancels "66", but not for 66 0f type of prefix return dynarec64_00(dyn, addr-1, ip, ninst, rex, rep, ok, need_epilog); // addr-1, to "put back" opcode @@ -88,7 +82,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin emit_add16(dyn, ninst, x1, x2, x3, x4); BFIx(xRAX, x1, 0, 16); break; - + case 0x09: INST_NAME("OR Ew, Gw"); SETFLAGS(X_ALL, SF_SET_PENDING); @@ -456,7 +450,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; } break; - + case 0x85: INST_NAME("TEST Ew, Gw"); SETFLAGS(X_ALL, SF_SET_PENDING); @@ -494,7 +488,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin BFIx(gd, x1, 0, 16); } break; - + case 0x89: INST_NAME("MOV Ew, Gw"); nextop = F8; @@ -973,7 +967,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; } break; - + case 0xF0: return dynarec64_66F0(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); @@ -1049,7 +1043,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; } break; - + case 0xFF: nextop = F8; switch((nextop>>3)&7) { diff --git a/src/dynarec/arm64/dynarec_arm64_6664.c b/src/dynarec/arm64/dynarec_arm64_6664.c index 920bb3e7..0fe59473 100644 --- a/src/dynarec/arm64/dynarec_arm64_6664.c +++ b/src/dynarec/arm64/dynarec_arm64_6664.c @@ -34,13 +34,7 @@ uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n int unscaled; MAYUSE(j64); - // REX prefix before the 66 are ignored - rex.rex = 0; - if(!rex.is32bits) - while(opcode>=0x40 && opcode<=0x4f) { - rex.rex = opcode; - opcode = F8; - } + GETREX(); /*if(rex.w && opcode!=0x0f) { // rex.w cancels "66", but not for 66 0f type of prefix MESSAGE(LOG_DUMP, "Here!\n"); diff --git a/src/dynarec/arm64/dynarec_arm64_66f0.c b/src/dynarec/arm64/dynarec_arm64_66f0.c index be92a709..3f606799 100644 --- a/src/dynarec/arm64/dynarec_arm64_66f0.c +++ b/src/dynarec/arm64/dynarec_arm64_66f0.c @@ -44,13 +44,8 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n rep = opcode-0xF1; opcode = F8; } - // REX prefix before the F0/66 are ignored - rex.rex = 0; - if(!rex.is32bits) - while(opcode>=0x40 && opcode<=0x4f) { - rex.rex = opcode; - opcode = F8; - } + + GETREX(); switch(opcode) { case 0x09: @@ -124,7 +119,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n BFIx(xRAX, x1, 0, 16); SMDMB(); break; - + case 0xC1: INST_NAME("LOCK XADD Gw, Ew"); SETFLAGS(X_ALL, SF_SET_PENDING); @@ -189,7 +184,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n STLXRH(x3, x1, wback); CBNZx_MARKLOCK(x3); B_NEXT_nocond; - MARK; // unaligned! also, not enough + MARK; // unaligned! also, not enough LDRH_U12(x1, wback, 0); LDAXRB(x4, wback); BFIw(x1, x4, 0, 8); // re-inject @@ -307,7 +302,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n STLXRH(x3, x1, wback); CBNZx_MARKLOCK(x3); B_NEXT_nocond; - MARK; // unaligned! also, not enough + MARK; // unaligned! also, not enough LDRH_U12(x1, wback, 0); LDAXRB(x4, wback); BFIw(x1, x4, 0, 8); // re-inject diff --git a/src/dynarec/arm64/dynarec_arm64_67.c b/src/dynarec/arm64/dynarec_arm64_67.c index cfe50d20..24b87d9b 100755 --- a/src/dynarec/arm64/dynarec_arm64_67.c +++ b/src/dynarec/arm64/dynarec_arm64_67.c @@ -53,13 +53,8 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin return addr; } + GETREX(); - // REX prefix before the 67 are ignored - rex.rex = 0; - while(opcode>=0x40 && opcode<=0x4f) { - rex.rex = opcode; - opcode = F8; - } rep = 0; while((opcode==0xF2) || (opcode==0xF3)) { rep = opcode-0xF1; @@ -238,39 +233,39 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } break; - case 0xB6: - INST_NAME("MOVZX Gd, Eb"); - nextop = F8; - GETGD; - if(MODREG) { - if(rex.rex) { - eb1 = xRAX+(nextop&7)+(rex.b<<3); - eb2 = 0; \ - } else { - ed = (nextop&7); - eb1 = xRAX+(ed&3); // Ax, Cx, Dx or Bx - eb2 = (ed&4)>>2; // L or H - } - UBFXxw(gd, eb1, eb2*8, 8); - } else { - SMREAD(); - addr = geted32(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff, 0, rex, NULL, 0, 0); - LDB(gd, ed, fixedaddress); - } - break; - case 0xB7: - INST_NAME("MOVZX Gd, Ew"); - nextop = F8; - GETGD; - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - UBFXxw(gd, ed, 0, 16); + case 0xB6: + INST_NAME("MOVZX Gd, Eb"); + nextop = F8; + GETGD; + if(MODREG) { + if(rex.rex) { + eb1 = xRAX+(nextop&7)+(rex.b<<3); + eb2 = 0; \ } else { - SMREAD(); - addr = geted32(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0); - LDH(gd, ed, fixedaddress); + ed = (nextop&7); + eb1 = xRAX+(ed&3); // Ax, Cx, Dx or Bx + eb2 = (ed&4)>>2; // L or H } - break; + UBFXxw(gd, eb1, eb2*8, 8); + } else { + SMREAD(); + addr = geted32(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff, 0, rex, NULL, 0, 0); + LDB(gd, ed, fixedaddress); + } + break; + case 0xB7: + INST_NAME("MOVZX Gd, Ew"); + nextop = F8; + GETGD; + if(MODREG) { + ed = xRAX+(nextop&7)+(rex.b<<3); + UBFXxw(gd, ed, 0, 16); + } else { + SMREAD(); + addr = geted32(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0); + LDH(gd, ed, fixedaddress); + } + break; default: DEFAULT; @@ -721,7 +716,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; } break; - + case 0x88: INST_NAME("MOV Eb, Gb"); nextop = F8; @@ -1027,9 +1022,9 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOVw_REG(xRAX, x2); MOVw_REG(xRDX, x4); } else { - if(ninst && dyn->insts - && dyn->insts[ninst-1].x64.addr - && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x31 + if(ninst && dyn->insts + && dyn->insts[ninst-1].x64.addr + && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x31 && *(uint8_t*)(dyn->insts[ninst-1].x64.addr+1)==0xD2) { SET_DFNONE(x2); GETED32(0); @@ -1065,7 +1060,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOVw_REG(xRDX, x4); } else { if(ninst && dyn->insts - && dyn->insts[ninst-1].x64.addr + && dyn->insts[ninst-1].x64.addr && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x48 && *(uint8_t*)(dyn->insts[ninst-1].x64.addr+1)==0x99) { SET_DFNONE(x2) diff --git a/src/dynarec/arm64/dynarec_arm64_f0.c b/src/dynarec/arm64/dynarec_arm64_f0.c index 0d1c7391..4a1421cc 100644 --- a/src/dynarec/arm64/dynarec_arm64_f0.c +++ b/src/dynarec/arm64/dynarec_arm64_f0.c @@ -46,13 +46,8 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin rep = opcode-0xF1; opcode = F8; } - // REX prefix before the F0 are ignored - rex.rex = 0; - if(!rex.is32bits) - while(opcode>=0x40 && opcode<=0x4f) { - rex.rex = opcode; - opcode = F8; - } + + GETREX(); switch(opcode) { case 0x00: @@ -66,14 +61,14 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin wback = xRAX + (nextop&7) + (rex.b<<3); wb2 = 0; } else { - wback = (nextop&7); - wb2 = (wback>>2); + wback = (nextop&7); + wb2 = (wback>>2); wback = xRAX+(wback&3); } - UBFXw(x1, wback, wb2*8, 8); + UBFXw(x1, wback, wb2*8, 8); emit_add8(dyn, ninst, x1, x2, x4, x3); BFIx(wback, x1, wb2*8, 8); - } else { + } else { addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); MARKLOCK; LDAXRB(x1, wback); @@ -114,14 +109,14 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin wback = xRAX + (nextop&7) + (rex.b<<3); wb2 = 0; } else { - wback = (nextop&7); - wb2 = (wback>>2); + wback = (nextop&7); + wb2 = (wback>>2); wback = xRAX+(wback&3); } - UBFXw(x1, wback, wb2*8, 8); + UBFXw(x1, wback, wb2*8, 8); emit_or8(dyn, ninst, x1, x2, x4, x3); BFIx(wback, x1, wb2*8, 8); - } else { + } else { addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); MARKLOCK; LDAXRB(x1, wback); @@ -220,11 +215,11 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(rex.rex) { wback = xRAX+(nextop&7)+(rex.b<<3); wb2 = 0; - } else { + } else { wback = (nextop&7); wb2 = (wback>>2)*8; wback = xRAX+(wback&3); - } + } UBFXx(x2, wback, wb2, 8); wb1 = 0; ed = x2; @@ -445,7 +440,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin DEFAULT; } break; - + case 0x21: INST_NAME("LOCK AND Ed, Gd"); SETFLAGS(X_ALL, SF_SET_PENDING); @@ -465,7 +460,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } SMDMB(); break; - + case 0x29: INST_NAME("LOCK SUB Ed, Gd"); SETFLAGS(X_ALL, SF_SET_PENDING); @@ -681,7 +676,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin CBNZx_MARKLOCK(x3); SMDMB(); B_NEXT_nocond; - MARK; // unaligned! also, not enough + MARK; // unaligned! also, not enough LDRxw_U12(x1, wback, 0); LDAXRB(x4, wback); BFIxw(x1, x4, 0, 8); // re-inject @@ -789,7 +784,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin CBNZx_MARKLOCK(x3); SMDMB(); B_NEXT_nocond; - MARK; // unaligned! also, not enough + MARK; // unaligned! also, not enough LDRxw_U12(x1, wback, 0); LDAXRB(x4, wback); BFIxw(x1, x4, 0, 8); // re-inject @@ -834,7 +829,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } SMDMB(); break; - + case 0x86: INST_NAME("LOCK XCHG Eb, Gb"); // Do the swap @@ -896,7 +891,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOVxw_REG(gd, x1); } break; - + case 0xF6: nextop = F8; switch((nextop>>3)&7) { @@ -931,7 +926,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin DEFAULT; } break; - + case 0xFE: nextop = F8; switch((nextop>>3)&7) @@ -1042,7 +1037,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin DEFAULT; } break; - + default: DEFAULT; } diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index 8002b718..9c3594ac 100755 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -784,10 +784,10 @@ } else dyn->f.pending = SF_SET #endif #ifndef JUMP -#define JUMP(A, C) +#define JUMP(A, C) #endif #ifndef BARRIER -#define BARRIER(A) +#define BARRIER(A) #endif #ifndef BARRIER_NEXT #define BARRIER_NEXT(A) @@ -1238,7 +1238,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n #if STEP < 3 #define MAYUSE(A) (void)A #else -#define MAYUSE(A) +#define MAYUSE(A) #endif #define GOCOND(B, T1, T2) \ @@ -1346,4 +1346,12 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n STRw_U12(s2, xEmu, offsetof(x64emu_t, test.test)); \ } +#define GETREX() \ + rex.rex = 0; \ + if(!rex.is32bits) \ + while(opcode>=0x40 && opcode<=0x4f) { \ + rex.rex = opcode; \ + opcode = F8; \ + } + #endif //__DYNAREC_ARM64_HELPER_H__ diff --git a/src/dynarec/rv64/dynarec_rv64_00.c b/src/dynarec/rv64/dynarec_rv64_00.c index c7704e56..684aa490 100644 --- a/src/dynarec/rv64/dynarec_rv64_00.c +++ b/src/dynarec/rv64/dynarec_rv64_00.c @@ -31,12 +31,6 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni { uint8_t opcode; - - if(rex.is32bits) { - DEFAULT; - return ip; - } - opcode = PK(0); switch(opcode) { case 0x00 ... 0x3f: addr = dynarec64_00_0(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); break; diff --git a/src/dynarec/rv64/dynarec_rv64_00_1.c b/src/dynarec/rv64/dynarec_rv64_00_1.c index 03e15cd2..54ca28f5 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_1.c +++ b/src/dynarec/rv64/dynarec_rv64_00_1.c @@ -52,7 +52,32 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int MAYUSE(cacheupd); switch(opcode) { - + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + INST_NAME("INC Reg (32bits)"); + SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING); + gd = xRAX + (opcode&7); + emit_inc32(dyn, ninst, rex, gd, x1, x2, x3, x4); + break; + case 0x48: + case 0x49: + case 0x4A: + case 0x4B: + case 0x4C: + case 0x4D: + case 0x4E: + case 0x4F: + INST_NAME("DEC Reg (32bits)"); + SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING); + gd = xRAX + (opcode&7); + emit_dec32(dyn, ninst, rex, gd, x1, x2, x3, x4); + break; case 0x50: case 0x51: case 0x52: @@ -63,8 +88,7 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x57: INST_NAME("PUSH reg"); gd = xRAX+(opcode&0x07)+(rex.b<<3); - SD(gd, xRSP, -8); - SUBI(xRSP, xRSP, 8); + PUSH1z(gd); break; case 0x58: case 0x59: @@ -76,31 +100,65 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x5F: INST_NAME("POP reg"); gd = xRAX+(opcode&0x07)+(rex.b<<3); - LD(gd, xRSP, 0); - if(gd!=xRSP) { - ADDI(xRSP, xRSP, 8); + POP1z(gd); + break; + + case 0x60: + if(rex.is32bits) { + INST_NAME("PUSHAD"); + AND(x1, xRSP, xMASK); + PUSH1_32(xRAX); + PUSH1_32(xRCX); + PUSH1_32(xRDX); + PUSH1_32(xRBX); + PUSH1_32(x1); + PUSH1_32(xRBP); + PUSH1_32(xRSI); + PUSH1_32(xRDI); + } else { + DEFAULT; + } + break; + case 0x61: + if(rex.is32bits) { + INST_NAME("POPAD"); + POP1_32(xRDI); + POP1_32(xRSI); + POP1_32(xRBP); + POP1_32(x1); + POP1_32(xRBX); + POP1_32(xRDX); + POP1_32(xRCX); + POP1_32(xRAX); + } else { + DEFAULT; } break; case 0x63: - INST_NAME("MOVSXD Gd, Ed"); - nextop = F8; - GETGD; - if(rex.w) { - if(MODREG) { // reg <= reg - ADDIW(gd, xRAX+(nextop&7)+(rex.b<<3), 0); - } else { // mem <= reg - SMREAD(); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); - LW(gd, ed, fixedaddress); - } + if(rex.is32bits) { + // this is ARPL opcode + DEFAULT; } else { - if(MODREG) { // reg <= reg - AND(gd, xRAX+(nextop&7)+(rex.b<<3), xMASK); - } else { // mem <= reg - SMREAD(); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); - LWU(gd, ed, fixedaddress); + INST_NAME("MOVSXD Gd, Ed"); + nextop = F8; + GETGD; + if(rex.w) { + if(MODREG) { // reg <= reg + ADDIW(gd, xRAX+(nextop&7)+(rex.b<<3), 0); + } else { // mem <= reg + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + LW(gd, ed, fixedaddress); + } + } else { + if(MODREG) { // reg <= reg + AND(gd, xRAX+(nextop&7)+(rex.b<<3), xMASK); + } else { // mem <= reg + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); + LWU(gd, ed, fixedaddress); + } } } break; @@ -123,10 +181,10 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int MESSAGE(LOG_DUMP, "PUSH then RET, using indirect\n"); TABLE64(x3, addr-4); LW(x1, x3, 0); - PUSH1(x1); + PUSH1z(x1); } else { - MOV64x(x3, i64); - PUSH1(x3); + MOV64z(x3, i64); + PUSH1z(x3); } break; case 0x69: @@ -165,8 +223,8 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x6A: INST_NAME("PUSH Ib"); i64 = F8S; - MOV64x(x3, i64); - PUSH1(x3); + MOV64z(x3, i64); + PUSH1z(x3); break; case 0x6B: INST_NAME("IMUL Gd, Ed, Ib"); diff --git a/src/dynarec/rv64/dynarec_rv64_00_2.c b/src/dynarec/rv64/dynarec_rv64_00_2.c index 9f9c7c8e..3fe52cef 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_2.c +++ b/src/dynarec/rv64/dynarec_rv64_00_2.c @@ -324,7 +324,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGD; if(MODREG) { // reg <= reg MVxw(xRAX+(nextop&7)+(rex.b<<3), gd); - } else { // mem <= reg + } else { // mem <= reg addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, 1, 0); SDxw(gd, ed, fixedaddress); SMWRITELOCK(lock); @@ -399,15 +399,13 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("LEA Gd, Ed"); nextop=F8; GETGD; - if(MODREG) { // reg <= reg? that's an invalid operation + if(MODREG) { // reg <= reg? that's an invalid operation DEFAULT; - } else { // mem <= reg - addr = geted(dyn, addr, ninst, nextop, &ed, gd, x1, &fixedaddress, rex, NULL, 0, 0); - if(gd!=ed) { // it's sometimes used as a 3 bytes NOP - MV(gd, ed); - } - if(!rex.w) { - ZEROUP(gd); //truncate the higher 32bits as asked + } else { // mem <= reg + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 0, 0); + MV(gd, ed); + if(!rex.w || rex.is32bits) { + ZEROUP(gd); // truncate the higher 32bits as asked } } break; @@ -429,17 +427,17 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("POP Ed"); nextop = F8; if(MODREG) { - POP1(xRAX+(nextop&7)+(rex.b<<3)); + POP1z(xRAX+(nextop&7)+(rex.b<<3)); } else { - POP1(x2); // so this can handle POP [ESP] and maybe some variant too + POP1z(x2); // so this can handle POP [ESP] and maybe some variant too addr = geted(dyn, addr, ninst, nextop, &ed, x3, x1, &fixedaddress, rex, &lock, 1, 0); if(ed==xRSP) { - SD(x2, ed, fixedaddress); + SDz(x2, ed, fixedaddress); } else { // complicated to just allow a segfault that can be recovered correctly - SUB(xRSP, xRSP, 8); - SD(x2, ed, fixedaddress); - ADD(xRSP, xRSP, 8); + ADDIz(xRSP, xRSP, rex.is32bits?-4:-8); + SDz(x2, ed, fixedaddress); + ADDIz(xRSP, xRSP, rex.is32bits?4:8); } } break; @@ -486,14 +484,15 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int break; case 0x9C: INST_NAME("PUSHF"); + NOTEST(x1); READFLAGS(X_ALL); FLAGS_ADJUST_TO11(x3, xFlags, x2); - PUSH1(x3); + PUSH1z(x3); break; case 0x9D: INST_NAME("POPF"); SETFLAGS(X_ALL, SF_SET); - POP1(xFlags); + POP1z(xFlags); FLAGS_ADJUST_FROM11(xFlags, x2); MOV32w(x1, 0x3F7FD7); AND(xFlags, xFlags, x1); @@ -511,26 +510,35 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x9F: INST_NAME("LAHF"); READFLAGS(X_CF|X_PF|X_AF|X_ZF|X_SF); - ANDI(xRAX, xFlags, 0xFF); - SLLI(xRAX, xRAX, 8); + ANDI(x1, xFlags, 0xFF); + SLLI(x1, x1, 8); + MOV64x(x2, 0xffffffffffff00ffLL); + AND(xRAX, xRAX, x2); + OR(xRAX, xRAX, x1); + break; + case 0xA0: + INST_NAME("MOV AL,Ob"); + if(rex.is32bits) u64 = F32; else u64 = F64; + MOV64z(x1, u64); + LBU(xRAX, x1, 0); break; case 0xA1: INST_NAME("MOV EAX,Od"); - u64 = F64; - MOV64x(x1, u64); + if(rex.is32bits) u64 = F32; else u64 = F64; + MOV64z(x1, u64); LDxw(xRAX, x1, 0); break; case 0xA2: INST_NAME("MOV Ob,AL"); - u64 = F64; - MOV64x(x1, u64); + if(rex.is32bits) u64 = F32; else u64 = F64; + MOV64z(x1, u64); SB(xRAX, x1, 0); SMWRITE(); break; case 0xA3: INST_NAME("MOV Od,EAX"); - u64 = F64; - MOV64x(x1, u64); + if(rex.is32bits) u64 = F32; else u64 = F64; + MOV64z(x1, u64); SDxw(xRAX, x1, 0); SMWRITE(); break; diff --git a/src/dynarec/rv64/dynarec_rv64_00_3.c b/src/dynarec/rv64/dynarec_rv64_00_3.c index f0428791..bf14373c 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_3.c +++ b/src/dynarec/rv64/dynarec_rv64_00_3.c @@ -196,7 +196,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } BARRIER(BARRIER_FLOAT); i32 = F16; - retn_to_epilog(dyn, ninst, i32); + retn_to_epilog(dyn, ninst, rex, i32); *need_epilog = 0; *ok = 0; break; @@ -207,7 +207,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int READFLAGS(X_PEND); // so instead, force the deferred flags, so it's not too slow, and flags are not lost } BARRIER(BARRIER_FLOAT); - ret_to_epilog(dyn, ninst); + ret_to_epilog(dyn, ninst, rex); *need_epilog = 0; *ok = 0; break; @@ -279,8 +279,8 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0xC9: INST_NAME("LEAVE"); - MV(xRSP, xRBP); - POP1(xRBP); + MVz(xRSP, xRBP); + POP1z(xRBP); break; case 0xCC: @@ -627,7 +627,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int #endif } #if STEP < 2 - if(isNativeCall(dyn, addr+i32, &dyn->insts[ninst].natcall, &dyn->insts[ninst].retn)) + if(!rex.is32bits && isNativeCall(dyn, addr+i32, &dyn->insts[ninst].natcall, &dyn->insts[ninst].retn)) tmp = dyn->insts[ninst].pass2choice = 3; else tmp = dyn->insts[ninst].pass2choice = 0; @@ -704,12 +704,13 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int *need_epilog = 0; *ok = 0; } - if(addr<0x100000000LL) { - MOV64x(x2, addr); + + if(rex.is32bits) { + MOV32w(x2, addr); } else { TABLE64(x2, addr); } - PUSH1(x2); + PUSH1z(x2); // TODO: Add support for CALLRET optim /*if(box64_dynarec_callret) { // Push actual return address @@ -729,16 +730,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int *ok = 0; *need_epilog = 0; } - if(addr+i32==0) { // self modifying code maybe? so use indirect address fetching - if(addr-4<0x100000000LL) { - MOV64x(x4, addr-4); - } else { - TABLE64(x4, addr-4); - } - LD(x4, x4, 0); - jump_to_next(dyn, 0, x4, ninst); - } else - jump_to_next(dyn, addr+i32, 0, ninst); + jump_to_next(dyn, addr+i32, 0, ninst); break; } break; @@ -1075,7 +1067,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } else { SETFLAGS(X_ALL, SF_SET); //Hack to put flag in "don't care" state } - GETEDx(0); + GETEDz(0); if(box64_dynarec_callret && box64_dynarec_bigblock>1) { BARRIER(BARRIER_FULL); } else { @@ -1098,22 +1090,41 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } STPx_S7_preindex(x4, xRIP, xSP, -16); }*/ - PUSH1(xRIP); + PUSH1z(xRIP); jump_to_next(dyn, 0, ed, ninst); break; case 4: // JMP Ed INST_NAME("JMP Ed"); READFLAGS(X_PEND); BARRIER(BARRIER_FLOAT); - GETEDx(0); + GETEDz(0); jump_to_next(dyn, 0, ed, ninst); *need_epilog = 0; *ok = 0; break; + case 5: // JMP FAR Ed + if(MODREG) { + DEFAULT; + } else { + INST_NAME("JMP FAR Ed"); + READFLAGS(X_PEND); + BARRIER(BARRIER_FLOAT); + SMREAD() + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 0, 0); + LDxw(x1, wback, 0); + ed = x1; + LHU(x3, wback, rex.w?8:4); + SW(x3, xEmu, offsetof(x64emu_t, segs[_CS])); + SW(xZR, xEmu, offsetof(x64emu_t, segs_serial[_CS])); + jump_to_epilog(dyn, 0, ed, ninst); + *need_epilog = 0; + *ok = 0; + } + break; case 6: // Push Ed INST_NAME("PUSH Ed"); - GETEDx(0); - PUSH1(ed); + GETEDz(0); + PUSH1z(ed); break; default: diff --git a/src/dynarec/rv64/dynarec_rv64_64.c b/src/dynarec/rv64/dynarec_rv64_64.c index 0fbbd8c5..bc3b2c96 100644 --- a/src/dynarec/rv64/dynarec_rv64_64.c +++ b/src/dynarec/rv64/dynarec_rv64_64.c @@ -32,6 +32,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni uint8_t gd, ed, eb1, eb2, gb1, gb2; uint8_t gback, wback, wb1, wb2, wb; int64_t i64, j64; + uint64_t u64; int v0, v1; int q0; int d0; @@ -53,12 +54,8 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni rep = opcode-0xF1; opcode = F8; } - // REX prefix before the F0 are ignored - rex.rex = 0; - while(opcode>=0x40 && opcode<=0x4f) { - rex.rex = opcode; - opcode = F8; - } + + GETREX(); switch(opcode) { case 0x03: @@ -396,6 +393,34 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LDxw(gd, x4, fixedaddress); } break; + + case 0xA1: + INST_NAME("MOV EAX,FS:Od"); + grab_segdata(dyn, addr, ninst, x4, seg); + if(rex.is32bits) + u64 = F32; + else + u64 = F64; + // TODO: could be optimized. + MOV64z(x1, u64); + ADD(x1, x1, x4); + LDxw(xRAX, x1, 0); + break; + + case 0xA3: + INST_NAME("MOV FS:Od,EAX"); + grab_segdata(dyn, addr, ninst, x4, seg); + if(rex.is32bits) + u64 = F32; + else + u64 = F64; + // TODO: could be optimized. + MOV64z(x1, u64); + ADD(x1, x1, x4); + SDxw(xRAX, x1, 0); + SMWRITE2(); + break; + case 0xC6: INST_NAME("MOV Seg:Eb, Ib"); grab_segdata(dyn, addr, ninst, x4, seg); diff --git a/src/dynarec/rv64/dynarec_rv64_66.c b/src/dynarec/rv64/dynarec_rv64_66.c index 2a7280a2..49a7ef65 100644 --- a/src/dynarec/rv64/dynarec_rv64_66.c +++ b/src/dynarec/rv64/dynarec_rv64_66.c @@ -49,14 +49,10 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni rep = opcode-0xF1; opcode = F8; } - // REX prefix before the 66 are ignored - rex.rex = 0; - while(opcode>=0x40 && opcode<=0x4f) { - rex.rex = opcode; - opcode = F8; - } - if(rex.w && opcode!=0x0f) // rex.w cancels "66", but not for 66 0f type of prefix + GETREX(); + + if(rex.w && !(opcode==0x0f || opcode==0xf0 || opcode==0x64 || opcode==0x65)) // rex.w cancels "66", but not for 66 0f type of prefix return dynarec64_00(dyn, addr-1, ip, ninst, rex, rep, ok, need_epilog); // addr-1, to "put back" opcode switch(opcode) { @@ -256,6 +252,42 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_cmp16_0(dyn, ninst, x1, x3, x4); } break; + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + INST_NAME("INC Reg16 (32bits)"); + SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING); + gd = xRAX + (opcode&7); + ZEXTH(x1, gd); + emit_inc16(dyn, ninst, x1, x2, x3, x4); + LUI(x3, 0xffff0); + AND(gd, gd, x3); + OR(gd, gd, x1); + ZEROUP(gd); + break; + case 0x48: + case 0x49: + case 0x4A: + case 0x4B: + case 0x4C: + case 0x4D: + case 0x4E: + case 0x4F: + INST_NAME("DEC Reg16 (32bits)"); + SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING); + gd = xRAX + (opcode&7); + ZEXTH(x1, gd); + emit_dec16(dyn, ninst, x1, x2, x3, x4, x5); + LUI(x3, 0xffff0); + AND(gd, gd, x3); + OR(gd, gd, x1); + ZEROUP(gd); + break; case 0x64: addr = dynarec64_6664(dyn, addr, ip, ninst, rex, _FS, ok, need_epilog); break; @@ -610,7 +642,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } } break; - + case 0xC1: nextop = F8; switch((nextop>>3)&7) { @@ -706,7 +738,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; } break; - + case 0xC7: INST_NAME("MOV Ew, Iw"); nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_6664.c b/src/dynarec/rv64/dynarec_rv64_6664.c index fbf8b15d..a139e3ae 100644 --- a/src/dynarec/rv64/dynarec_rv64_6664.c +++ b/src/dynarec/rv64/dynarec_rv64_6664.c @@ -34,12 +34,7 @@ uintptr_t dynarec64_6664(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int int unscaled; MAYUSE(j64); - // REX prefix before the 66 are ignored - rex.rex = 0; - while(opcode>=0x40 && opcode<=0x4f) { - rex.rex = opcode; - opcode = F8; - } + GETREX(); switch(opcode) { case 0x8B: diff --git a/src/dynarec/rv64/dynarec_rv64_66f0.c b/src/dynarec/rv64/dynarec_rv64_66f0.c index ee2e0b66..863e535d 100644 --- a/src/dynarec/rv64/dynarec_rv64_66f0.c +++ b/src/dynarec/rv64/dynarec_rv64_66f0.c @@ -44,12 +44,8 @@ uintptr_t dynarec64_66F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int rep = opcode-0xF1; opcode = F8; } - // REX prefix before the F0/66 are ignored - rex.rex = 0; - while(opcode>=0x40 && opcode<=0x4f) { - rex.rex = opcode; - opcode = F8; - } + + GETREX(); switch(opcode) { case 0x81: diff --git a/src/dynarec/rv64/dynarec_rv64_67.c b/src/dynarec/rv64/dynarec_rv64_67.c index 89ee4fe3..cb7702a8 100644 --- a/src/dynarec/rv64/dynarec_rv64_67.c +++ b/src/dynarec/rv64/dynarec_rv64_67.c @@ -46,12 +46,14 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MAYUSE(lock); MAYUSE(cacheupd); - // REX prefix before the 67 are ignored - rex.rex = 0; - while(opcode>=0x40 && opcode<=0x4f) { - rex.rex = opcode; - opcode = F8; + if(rex.is32bits) { + // should do a different file + DEFAULT; + return addr; } + + GETREX(); + rep = 0; while((opcode==0xF2) || (opcode==0xF3)) { rep = opcode-0xF1; diff --git a/src/dynarec/rv64/dynarec_rv64_f0.c b/src/dynarec/rv64/dynarec_rv64_f0.c index 65a144da..348f2905 100644 --- a/src/dynarec/rv64/dynarec_rv64_f0.c +++ b/src/dynarec/rv64/dynarec_rv64_f0.c @@ -46,12 +46,8 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni rep = opcode-0xF1; opcode = F8; } - // REX prefix before the F0 are ignored - rex.rex = 0; - while(opcode>=0x40 && opcode<=0x4f) { - rex.rex = opcode; - opcode = F8; - } + + GETREX(); // TODO: Take care of unligned memory access for all the LOCK ones. // https://github.com/ptitSeb/box64/pull/604 @@ -115,14 +111,14 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if(rex.rex) { wback = xRAX+(nextop&7)+(rex.b<<3); wb2 = 0; - } else { + } else { wback = (nextop&7); wb2 = (wback>>2)*8; wback = xRAX+(wback&3); } if (wb2) { - MV(x2, wback); - SRLI(x2, x2, wb2); + MV(x2, wback); + SRLI(x2, x2, wb2); ANDI(x2, x2, 0xff); } else { ANDI(x2, wback, 0xff); @@ -134,8 +130,8 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } BNE_MARK2(x6, x2); if (wb2) { - MV(wback, x2); - SRLI(wback, wback, wb2); + MV(wback, x2); + SRLI(wback, wback, wb2); ANDI(wback, wback, 0xff); } else { ANDI(wback, x2, 0xff); @@ -148,7 +144,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni B_NEXT_nocond; } else { // this one is tricky, and did some repetitive work. - // mostly because we only got 6 scratch registers, + // mostly because we only got 6 scratch registers, // and has so much to do. if(rex.rex) { gb1 = xRAX+((nextop&0x38)>>3)+(rex.r<<3); @@ -541,7 +537,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni emit_sub32c(dyn, ninst, rex, x1, i64, x3, x4, x5, x6); } break; - default: + default: DEFAULT; } SMDMB(); diff --git a/src/dynarec/rv64/dynarec_rv64_functions.c b/src/dynarec/rv64/dynarec_rv64_functions.c index b31b79eb..541ac45f 100644 --- a/src/dynarec/rv64/dynarec_rv64_functions.c +++ b/src/dynarec/rv64/dynarec_rv64_functions.c @@ -128,7 +128,7 @@ int extcache_get_st_f(dynarec_rv64_t* dyn, int ninst, int a) && dyn->insts[ninst].e.extcache[i].n==a) return i; return -1; -} +} int extcache_get_st_f_noback(dynarec_rv64_t* dyn, int ninst, int a) { for(int i=0; i<24; ++i) @@ -136,7 +136,7 @@ int extcache_get_st_f_noback(dynarec_rv64_t* dyn, int ninst, int a) && dyn->insts[ninst].e.extcache[i].n==a) return i; return -1; -} +} int extcache_get_current_st_f(dynarec_rv64_t* dyn, int a) { for(int i=0; i<24; ++i) @@ -144,7 +144,7 @@ int extcache_get_current_st_f(dynarec_rv64_t* dyn, int a) && dyn->e.extcache[i].n==a) return i; return -1; -} +} static void extcache_promote_double_forward(dynarec_rv64_t* dyn, int ninst, int maxinst, int a); static void extcache_promote_double_internal(dynarec_rv64_t* dyn, int ninst, int maxinst, int a); @@ -153,7 +153,7 @@ static void extcache_promote_double_combined(dynarec_rv64_t* dyn, int ninst, int if(a == dyn->insts[ninst].e.combined1 || a == dyn->insts[ninst].e.combined2) { if(a == dyn->insts[ninst].e.combined1) { a = dyn->insts[ninst].e.combined2; - } else + } else a = dyn->insts[ninst].e.combined1; int i = extcache_get_st_f_noback(dyn, ninst, a); //if(box64_dynarec_dump) dynarec_log(LOG_NONE, "extcache_promote_double_combined, ninst=%d combined%c %d i=%d (stack:%d/%d)\n", ninst, (a == dyn->insts[ninst].e.combined2)?'2':'1', a ,i, dyn->insts[ninst].e.stack_push, -dyn->insts[ninst].e.stack_pop); @@ -326,7 +326,7 @@ void extcacheUnwind(extcache_t* cache) { if(cache->swapped) { // unswap - int a = -1; + int a = -1; int b = -1; for(int j=0; j<24 && ((a==-1) || (b==-1)); ++j) if((cache->extcache[j].t == EXT_CACHE_ST_D || cache->extcache[j].t == EXT_CACHE_ST_F)) { @@ -491,7 +491,7 @@ const char* getCacheName(int t, int n) return buff; } -void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name) +void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t rex) { static const char* fnames[] = { "ft0"," ft1", "ft2", "ft3", "ft4", "ft5", "ft6", "ft7", @@ -501,12 +501,12 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name) "ft8", "ft9", "ft10", "ft11" }; if(box64_dynarec_dump) { - printf_x64_instruction(my_context->dec, &dyn->insts[ninst].x64, name); + printf_x64_instruction(rex.is32bits?my_context->dec32:my_context->dec, &dyn->insts[ninst].x64, name); dynarec_log(LOG_NONE, "%s%p: %d emitted opcodes, inst=%d, barrier=%d state=%d/%d(%d), %s=%X/%X, use=%X, need=%X/%X, sm=%d/%d", (box64_dynarec_dump>1)?"\e[32m":"", (void*)(dyn->native_start+dyn->insts[ninst].address), dyn->insts[ninst].size/4, - ninst, + ninst, dyn->insts[ninst].x64.barrier, dyn->insts[ninst].x64.state_flags, dyn->f.pending, diff --git a/src/dynarec/rv64/dynarec_rv64_functions.h b/src/dynarec/rv64/dynarec_rv64_functions.h index fc53dcd7..451336bd 100644 --- a/src/dynarec/rv64/dynarec_rv64_functions.h +++ b/src/dynarec/rv64/dynarec_rv64_functions.h @@ -45,7 +45,7 @@ void extcacheUnwind(extcache_t* cache); const char* getCacheName(int t, int n); -void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name); +void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t rex); void print_opcode(dynarec_native_t* dyn, int ninst, uint32_t opcode); void print_newinst(dynarec_native_t* dyn, int ninst); diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index 52cb5ce6..3342880e 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -26,11 +26,16 @@ #include "dynarec_rv64_functions.h" #include "dynarec_rv64_helper.h" +static uintptr_t geted_32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, int *l, int i12); + /* setup r2 to address pointed by ED, also fixaddress is an optionnal delta in the range [-absmax, +absmax], with delta&mask==0 to be added to ed for LDR/STR */ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, rex_t rex, int *l, int i12, int delta) { MAYUSE(dyn); MAYUSE(ninst); MAYUSE(delta); + if(rex.is32bits) + return geted_32(dyn, addr, ninst, nextop, ed, hint, scratch, fixaddress, l, i12); + int lock = l?((l==LOCK_LOCK)?1:2):0; if(lock==2) *l = 0; @@ -206,6 +211,136 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, return addr; } +static uintptr_t geted_32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, int *l, int i12) +{ + MAYUSE(dyn); MAYUSE(ninst); + + int lock = l?((l==LOCK_LOCK)?1:2):0; + if(lock==2) + *l = 0; + uint8_t ret = x2; + *fixaddress = 0; + if(hint>0) ret = hint; + int maxval = 2047; + if(i12>1) + maxval -= i12; + MAYUSE(scratch); + if(!(nextop&0xC0)) { + if((nextop&7)==4) { + uint8_t sib = F8; + int sib_reg = (sib>>3)&0x7; + int sib_reg2 = sib&0x7; + if(sib_reg2==5) { + int64_t tmp = F32S; + if (sib_reg!=4) { + if(tmp && ((tmp<-2048) || (tmp>maxval) || !i12)) { + MOV32w(scratch, tmp); + if((sib>>6)) { + if(rv64_zba) SHxADDUW(ret, xRAX+sib_reg, (sib>>6), scratch); else {SLLI(ret, xRAX+sib_reg, sib>>6); ADDW(ret, ret, scratch);} + } else + ADDW(ret, xRAX+sib_reg, scratch); + } else { + if(sib>>6) + SLLI(ret, xRAX+sib_reg, (sib>>6)); + else + ret = xRAX+sib_reg; + *fixaddress = tmp; + } + } else { + switch(lock) { + case 1: addLockAddress((int32_t)tmp); break; + case 2: if(isLockAddress((int32_t)tmp)) *l=1; break; + } + MOV32w(ret, tmp); + } + } else { + if (sib_reg!=4) { + if((sib>>6)) { + if(rv64_zba) SHxADDUW(ret, xRAX+sib_reg, (sib>>6), xRAX+sib_reg2); else { SLLI(ret, xRAX+sib_reg, (sib>>6)); ADDW(ret, ret, xRAX+sib_reg2);} + } else + ADDW(ret, xRAX+sib_reg2, xRAX+sib_reg); + } else { + ret = xRAX+sib_reg2; + } + } + } else if((nextop&7)==5) { + uint32_t tmp = F32; + MOV32w(ret, tmp); + switch(lock) { + case 1: addLockAddress(tmp); break; + case 2: if(isLockAddress(tmp)) *l=1; break; + } + } else { + ret = xRAX+(nextop&7); + if(ret==hint) { + AND(hint, ret, xMASK); //to clear upper part + } + } + } else { + int64_t i32; + uint8_t sib = 0; + int sib_reg = 0; + if((nextop&7)==4) { + sib = F8; + sib_reg = (sib>>3)&7; + } + int sib_reg2 = sib&0x07; + if(nextop&0x80) + i32 = F32S; + else + i32 = F8S; + if(i32==0 || ((i32>=-2048) && (i32<=2047) && i12)) { + *fixaddress = i32; + if((nextop&7)==4) { + if (sib_reg!=4) { + if(sib>>6) { + if(rv64_zba) SHxADDUW(ret, xRAX+sib_reg, (sib>>6), xRAX+sib_reg2); else {SLLI(ret, xRAX+sib_reg, (sib>>6)); ADDW(ret, ret, xRAX+sib_reg2);} + } else + ADDW(ret, xRAX+sib_reg2, xRAX+sib_reg); + } else { + ret = xRAX+sib_reg2; + } + } else { + ret = xRAX+(nextop&0x07); + } + } else { + if(i32>=-2048 && i32<=2047) { + if((nextop&7)==4) { + if (sib_reg!=4) { + if(sib>>6) { + if(rv64_zba) SHxADDUW(scratch, xRAX+sib_reg, (sib>>6), xRAX+sib_reg2); else {SLLI(scratch, xRAX+sib_reg, sib>>6); ADDW(scratch, scratch, xRAX+sib_reg2);} + } else + ADDW(scratch, xRAX+sib_reg2, xRAX+sib_reg); + } else { + scratch = xRAX+sib_reg2; + } + } else + scratch = xRAX+(nextop&0x07); + ADDIW(ret, scratch, i32); + } else { + MOV32w(scratch, i32); + if((nextop&7)==4) { + if (sib_reg!=4) { + ADDW(scratch, scratch, xRAX+sib_reg2); + if(sib>>6) { + if(rv64_zba) SHxADDUW(ret, xRAX+sib_reg, (sib>>6), scratch); else {SLLI(ret, xRAX+sib_reg, (sib>>6)); ADDW(ret, ret, scratch);} + } else + ADDW(ret, scratch, xRAX+sib_reg); + } else { + PASS3(int tmp = xRAX+sib_reg2); + ADDW(ret, tmp, scratch); + } + } else { + PASS3(int tmp = xRAX+(nextop&0x07)); + ADDW(ret, tmp, scratch); + } + } + } + } + *ed = ret; + return addr; +} + /* setup r2 to address pointed by ED, also fixaddress is an optionnal delta in the range [-absmax, +absmax], with delta&mask==0 to be added to ed for LDR/STR */ uintptr_t geted32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, rex_t rex, int *l, int i12, int delta) { @@ -229,12 +364,12 @@ uintptr_t geted32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop if((sib&0x7)==5) { int64_t tmp = F32S; if (sib_reg!=4) { - if(tmp && ((tmp<-2048) && (tmp>maxval) || !i12)) { + if(tmp && ((tmp<-2048) || (tmp>maxval) || !i12)) { MOV64x(scratch, tmp); if((sib>>6)) { - if(rv64_zba) SHxADD(ret, xRAX+sib_reg, (sib>>6), scratch); else {SLLI(ret, xRAX+sib_reg, sib>>6); ADD(ret, ret, scratch);} + if(rv64_zba) SHxADDUW(ret, xRAX+sib_reg, (sib>>6), scratch); else {SLLI(ret, xRAX+sib_reg, sib>>6); ADDW(ret, ret, scratch);} } else - ADD(ret, xRAX+sib_reg, scratch); + ADDW(ret, xRAX+sib_reg, scratch); } else { if(sib>>6) SLLI(ret, xRAX+sib_reg, (sib>>6)); @@ -252,9 +387,9 @@ uintptr_t geted32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop } else { if (sib_reg!=4) { if((sib>>6)) { - if(rv64_zba) SHxADD(ret, xRAX+sib_reg, (sib>>6), xRAX+sib_reg2); else { SLLI(ret, xRAX+sib_reg, (sib>>6)); ADD(ret, ret, xRAX+sib_reg2);} + if(rv64_zba) SHxADDUW(ret, xRAX+sib_reg, (sib>>6), xRAX+sib_reg2); else { SLLI(ret, xRAX+sib_reg, (sib>>6)); ADDW(ret, ret, xRAX+sib_reg2);} } else - ADD(ret, xRAX+sib_reg2, xRAX+sib_reg); + ADDW(ret, xRAX+sib_reg2, xRAX+sib_reg); } else { ret = xRAX+sib_reg2; } @@ -263,7 +398,7 @@ uintptr_t geted32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop uint32_t tmp = F32; MOV32w(ret, tmp); GETIP(addr+delta); - ADD(ret, ret, xRIP); + ADDW(ret, ret, xRIP); switch(lock) { case 1: addLockAddress(addr+delta+tmp); break; case 2: if(isLockAddress(addr+delta+tmp)) *l=1; break; @@ -292,9 +427,9 @@ uintptr_t geted32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop if((nextop&7)==4) { if (sib_reg!=4) { if(sib>>6) { - if(rv64_zba) SHxADD(ret, xRAX+sib_reg, (sib>>6), xRAX+sib_reg2); else {SLLI(ret, xRAX+sib_reg, (sib>>6)); ADD(ret, ret, xRAX+sib_reg2);} + if(rv64_zba) SHxADDUW(ret, xRAX+sib_reg, (sib>>6), xRAX+sib_reg2); else {SLLI(ret, xRAX+sib_reg, (sib>>6)); ADDW(ret, ret, xRAX+sib_reg2);} } else - ADD(ret, xRAX+sib_reg2, xRAX+sib_reg); + ADDW(ret, xRAX+sib_reg2, xRAX+sib_reg); } else { ret = xRAX+sib_reg2; } @@ -306,31 +441,31 @@ uintptr_t geted32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop if((nextop&7)==4) { if (sib_reg!=4) { if(sib>>6) { - if(rv64_zba) SHxADD(scratch, xRAX+sib_reg, (sib>>6), xRAX+sib_reg2); else {SLLI(scratch, xRAX+sib_reg, sib>>6); ADD(scratch, scratch, xRAX+sib_reg2);} + if(rv64_zba) SHxADDUW(scratch, xRAX+sib_reg, (sib>>6), xRAX+sib_reg2); else {SLLI(scratch, xRAX+sib_reg, sib>>6); ADDW(scratch, scratch, xRAX+sib_reg2);} } else - ADD(scratch, xRAX+sib_reg2, xRAX+sib_reg); + ADDW(scratch, xRAX+sib_reg2, xRAX+sib_reg); } else { scratch = xRAX+sib_reg2; } } else scratch = xRAX+(nextop&0x07)+(rex.b<<3); - ADDI(ret, scratch, i64); + ADDIW(ret, scratch, i64); } else { MOV32w(scratch, i64); if((nextop&7)==4) { if (sib_reg!=4) { - ADD(scratch, scratch, xRAX+sib_reg2); + ADDW(scratch, scratch, xRAX+sib_reg2); if(sib>>6) { - if(rv64_zba) SHxADD(ret, xRAX+sib_reg, (sib>>6), scratch); else {SLLI(ret, xRAX+sib_reg, (sib>>6)); ADD(ret, ret, scratch);} + if(rv64_zba) SHxADDUW(ret, xRAX+sib_reg, (sib>>6), scratch); else {SLLI(ret, xRAX+sib_reg, (sib>>6)); ADDW(ret, ret, scratch);} } else - ADD(ret, scratch, xRAX+sib_reg); + ADDW(ret, scratch, xRAX+sib_reg); } else { PASS3(int tmp = xRAX+sib_reg2); - ADD(ret, tmp, scratch); + ADDW(ret, tmp, scratch); } } else { PASS3(int tmp = xRAX+(nextop&0x07)+(rex.b<<3)); - ADD(ret, tmp, scratch); + ADDW(ret, tmp, scratch); } } } @@ -428,12 +563,12 @@ void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst) JALR(x2); // save LR... } -void ret_to_epilog(dynarec_rv64_t* dyn, int ninst) +void ret_to_epilog(dynarec_rv64_t* dyn, int ninst, rex_t rex) { MAYUSE(dyn); MAYUSE(ninst); MESSAGE(LOG_DUMP, "Ret to epilog\n"); - POP1(xRIP); - MV(x1, xRIP); + POP1z(xRIP); + MVz(x1, xRIP); SMEND(); /*if(box64_dynarec_callret) { // pop the actual return address from RV64 stack @@ -476,18 +611,18 @@ void ret_to_epilog(dynarec_rv64_t* dyn, int ninst) CLEARIP(); } -void retn_to_epilog(dynarec_rv64_t* dyn, int ninst, int n) +void retn_to_epilog(dynarec_rv64_t* dyn, int ninst, rex_t rex, int n) { MAYUSE(dyn); MAYUSE(ninst); MESSAGE(LOG_DUMP, "Retn to epilog\n"); - POP1(xRIP); + POP1z(xRIP); if(n>0x7ff) { MOV64x(w1, n); - ADD(xRSP, xRSP, x1); + ADDz(xRSP, xRSP, x1); } else { - ADDI(xRSP, xRSP, n); + ADDIz(xRSP, xRSP, n); } - MV(x1, xRIP); + MVz(x1, xRIP); SMEND(); /*if(box64_dynarec_callret) { // pop the actual return address from RV64 stack diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 6ce62914..0b1023b3 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -99,6 +99,15 @@ LD(x1, wback, fixedaddress); \ ed = x1; \ } +#define GETEDz(D) if(MODREG) { \ + ed = xRAX+(nextop&7)+(rex.b<<3); \ + wback = 0; \ + } else { \ + SMREAD() \ + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, D); \ + LDz(x1, wback, fixedaddress); \ + ed = x1; \ + } // GETED32 can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI #define GETED32(D) if(MODREG) { \ ed = xRAX+(nextop&7)+(rex.b<<3); \ @@ -987,8 +996,8 @@ uintptr_t geted32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop void jump_to_epilog(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst); void jump_to_epilog_fast(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst); void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst); -void ret_to_epilog(dynarec_rv64_t* dyn, int ninst); -void retn_to_epilog(dynarec_rv64_t* dyn, int ninst, int n); +void ret_to_epilog(dynarec_rv64_t* dyn, int ninst, rex_t rex); +void retn_to_epilog(dynarec_rv64_t* dyn, int ninst, rex_t rex, int n); void iret_to_epilog(dynarec_rv64_t* dyn, int ninst, int is64bits); void call_c(dynarec_rv64_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int save_reg); void call_n(dynarec_rv64_t* dyn, int ninst, void* fnc, int w); @@ -1328,4 +1337,12 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SW(s2, xEmu, offsetof(x64emu_t, test.test)); \ } +#define GETREX() \ + rex.rex = 0; \ + if(!rex.is32bits) \ + while(opcode>=0x40 && opcode<=0x4f) { \ + rex.rex = opcode; \ + opcode = F8; \ + } + #endif //__DYNAREC_RV64_HELPER_H__ diff --git a/src/dynarec/rv64/dynarec_rv64_pass3.h b/src/dynarec/rv64/dynarec_rv64_pass3.h index dafef0c5..459c4e13 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass3.h +++ b/src/dynarec/rv64/dynarec_rv64_pass3.h @@ -1,4 +1,4 @@ -#define INIT +#define INIT #define FINI \ if(ninst) \ addInst(dyn->instsize, &dyn->insts_size, dyn->insts[ninst].x64.size, dyn->insts[ninst].size/4); \ @@ -16,8 +16,8 @@ if(box64_dynarec_dump) print_newinst(dyn, ninst); \ if(ninst) \ addInst(dyn->instsize, &dyn->insts_size, dyn->insts[ninst-1].x64.size, dyn->insts[ninst-1].size/4); -#define INST_EPILOG -#define INST_NAME(name) inst_name_pass3(dyn, ninst, name) +#define INST_EPILOG +#define INST_NAME(name) inst_name_pass3(dyn, ninst, name, rex) #define TABLE64(A, V) {int val64offset = Table64(dyn, (V), 3); MESSAGE(LOG_DUMP, " Table64: 0x%lx\n", (V)); AUIPC(A, SPLIT20(val64offset)); LD(A, A, SPLIT12(val64offset));} #define FTABLE64(A, V) {mmx87_regs_t v = {.d = V}; int val64offset = Table64(dyn, v.q, 3); MESSAGE(LOG_DUMP, " FTable64: %g\n", v.d); AUIPC(x1, SPLIT20(val64offset)); FLD(A, x1, SPLIT12(val64offset));} diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index 6b01c342..e9fa2f6d 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -113,6 +113,7 @@ f28–31 ft8–11 FP temporaries Caller #define MOV64x(A, B) rv64_move64(dyn, ninst, A, B) #define MOV32w(A, B) rv64_move32(dyn, ninst, A, B, 1) #define MOV64xw(A, B) if(rex.w) {MOV64x(A, B);} else {MOV32w(A, B);} +#define MOV64z(A, B) if(rex.is32bits) {MOV32w(A, B);} else {MOV64x(A, B);} // ZERO the upper part #define ZEROUP(r) AND(r, r, xMASK) @@ -175,12 +176,16 @@ f28–31 ft8–11 FP temporaries Caller #define ADDW(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b000, rd, 0b0111011)) // rd = rs1 + rs2 #define ADDxw(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b000, rd, rex.w?0b0110011:0b0111011)) +// rd = rs1 + rs2 +#define ADDz(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b000, rd, rex.is32bits?0b0111011:0b0110011)) // rd = rs1 - rs2 #define SUB(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b000, rd, 0b0110011)) // rd = rs1 - rs2 #define SUBW(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b000, rd, 0b0111011)) // rd = rs1 - rs2 #define SUBxw(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b000, rd, rex.w?0b0110011:0b0111011)) +// rd = rs1 - rs2 +#define SUBz(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b000, rd, rex.is32bits?0b0111011:0b0110011)) // rd = rs1<