diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 282 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f.c | 808 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f20f.c | 28 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f30f.c | 36 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 52 |
5 files changed, 603 insertions, 603 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index 654a8c1c..e819ab8c 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -42,7 +42,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni int s0, s1; uint64_t tmp64u; int64_t j64; - int64_t fixedaddress; + int64_t fixedaddress, gdoffset; int unscaled; MAYUSE(wb2); MAYUSE(gback); @@ -129,20 +129,20 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x10: INST_NAME("MOVUPS Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); LD(x3, wback, fixedaddress+0); LD(x4, wback, fixedaddress+8); - SD(x3, gback, 0); - SD(x4, gback, 8); + SD(x3, gback, gdoffset+0); + SD(x4, gback, gdoffset+8); break; case 0x11: INST_NAME("MOVUPS Ex,Gx"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); - LD(x3, gback, 0); - LD(x4, gback, 8); + LD(x3, gback, gdoffset+0); + LD(x4, gback, gdoffset+8); SD(x3, wback, fixedaddress+0); SD(x4, wback, fixedaddress+8); if(!MODREG) @@ -152,10 +152,10 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; if(MODREG) { INST_NAME("MOVHLPS Gx,Ex"); - GETGX(x1); + GETGX(); GETEX(x2, 0); LD(x3, wback, fixedaddress+8); - SD(x3, gback, 0); + SD(x3, gback, gdoffset+0); } else { INST_NAME("MOVLPS Gx,Ex"); GETEXSD(v0, 0); @@ -166,9 +166,9 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x13: INST_NAME("MOVLPS Ex,Gx"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); - LD(x3, gback, 0); + LD(x3, gback, gdoffset+0); SD(x3, wback, fixedaddress+0); if(!MODREG) SMWRITE2(); @@ -176,28 +176,28 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x14: INST_NAME("UNPCKLPS Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); - LWU(x5, gback, 1*4); + LWU(x5, gback, gdoffset+1*4); LWU(x3, wback, fixedaddress+0); LWU(x4, wback, fixedaddress+4); - SW(x4, gback, 3*4); - SW(x5, gback, 2*4); - SW(x3, gback, 1*4); + SW(x4, gback, gdoffset+3*4); + SW(x5, gback, gdoffset+2*4); + SW(x3, gback, gdoffset+1*4); break; case 0x15: INST_NAME("UNPCKHPS Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); LWU(x3, wback, fixedaddress+2*4); LWU(x4, wback, fixedaddress+3*4); - LWU(x5, gback, 2*4); - LWU(x6, gback, 3*4); - SW(x5, gback, 0*4); - SW(x3, gback, 1*4); - SW(x6, gback, 2*4); - SW(x4, gback, 3*4); + LWU(x5, gback, gdoffset+2*4); + LWU(x6, gback, gdoffset+3*4); + SW(x5, gback, gdoffset+0*4); + SW(x3, gback, gdoffset+1*4); + SW(x6, gback, gdoffset+2*4); + SW(x4, gback, gdoffset+3*4); break; case 0x16: nextop = F8; @@ -207,17 +207,17 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("MOVHPS Gx,Ex"); SMREAD(); } - GETGX(x1); + GETGX(); GETEX(x2, 0); LD(x4, wback, fixedaddress+0); - SD(x4, gback, 8); + SD(x4, gback, gdoffset+8); break; case 0x17: INST_NAME("MOVHPS Ex,Gx"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); - LD(x4, gback, 8); + LD(x4, gback, gdoffset+8); SD(x4, wback, fixedaddress+0); if(!MODREG) SMWRITE2(); @@ -250,14 +250,14 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x28: INST_NAME("MOVAPS Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_MV_Q(x3); break; case 0x29: INST_NAME("MOVAPS Ex,Gx"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_MV_Q2(x3); if(!MODREG) @@ -267,10 +267,10 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x2B: INST_NAME("MOVNTPS Ex,Gx"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); - LD(x3, gback, 0); - LD(x4, gback, 8); + LD(x3, gback, gdoffset+0); + LD(x4, gback, gdoffset+8); SD(x3, wback, fixedaddress+0); SD(x4, wback, fixedaddress+8); break; @@ -352,19 +352,19 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x51: INST_NAME("SQRTPS Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); d0 = fpu_get_scratch(dyn); for(int i=0; i<4; ++i) { FLW(d0, wback, fixedaddress+4*i); FSQRTS(d0, d0); - FSW(d0, gback, 4*i); + FSW(d0, gback, gdoffset+4*i); } break; case 0x52: INST_NAME("RSQRTPS Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); s0 = fpu_get_scratch(dyn); s1 = fpu_get_scratch(dyn); // 1.0f @@ -385,19 +385,19 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni // s0 is negative, so generate a NaN FDIVS(s0, s1, v0); // s0 is a NaN, just copy it - FSW(s0, gback, i*4); + FSW(s0, gback, gdoffset+i*4); J(4*4); // do regular computation } FSQRTS(s0, s0); FDIVS(s0, s1, s0); - FSW(s0, gback, i*4); + FSW(s0, gback, gdoffset+i*4); } break; case 0x53: INST_NAME("RCPPS Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); d0 = fpu_get_scratch(dyn); d1 = fpu_get_scratch(dyn); @@ -406,7 +406,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni for(int i=0; i<4; ++i) { FLW(d1, wback, fixedaddress+4*i); FDIVS(d1, d0, d1); - FSW(d1, gback, 4*i); + FSW(d1, gback, gdoffset+4*i); } break; case 0x54: @@ -414,7 +414,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; gd = ((nextop&0x38)>>3)+(rex.r<<3); if(!(MODREG && gd==(nextop&7)+(rex.b<<3))) { - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_Q(x3, x4, AND(x3, x3, x4)); } @@ -422,7 +422,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x55: INST_NAME("ANDNPS Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_Q(x3, x4, NOT(x3, x3); AND(x3, x3, x4)); break; @@ -431,7 +431,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; gd = ((nextop&0x38)>>3)+(rex.r<<3); if(!(MODREG && gd==(nextop&7)+(rex.b<<3))) { - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_Q(x3, x4, OR(x3, x3, x4)); } @@ -440,12 +440,12 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("XORPS Gx, Ex"); nextop = F8; //TODO: it might be possible to check if SS or SD are used and not purge them to optimize a bit - GETGX(x1); + GETGX(); if(MODREG && gd==(nextop&7)+(rex.b<<3)) { // just zero dest - SD(xZR, x1, 0); - SD(xZR, x1, 8); + SD(xZR, gback, gdoffset+0); + SD(xZR, gback, gdoffset+8); } else { GETEX(x2, 0); SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); @@ -454,37 +454,37 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x58: INST_NAME("ADDPS Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); s0 = fpu_get_scratch(dyn); s1 = fpu_get_scratch(dyn); for(int i=0; i<4; ++i) { // GX->f[i] += EX->f[i]; FLW(s0, wback, fixedaddress+i*4); - FLW(s1, gback, i*4); + FLW(s1, gback, gdoffset+i*4); FADDS(s1, s1, s0); - FSW(s1, gback, i*4); + FSW(s1, gback, gdoffset+i*4); } break; case 0x59: INST_NAME("MULPS Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); s0 = fpu_get_scratch(dyn); s1 = fpu_get_scratch(dyn); for(int i=0; i<4; ++i) { // GX->f[i] *= EX->f[i]; FLW(s0, wback, fixedaddress+i*4); - FLW(s1, gback, i*4); + FLW(s1, gback, gdoffset+i*4); FMULS(s1, s1, s0); - FSW(s1, gback, i*4); + FSW(s1, gback, gdoffset+i*4); } break; case 0x5A: INST_NAME("CVTPS2PD Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); s0 = fpu_get_scratch(dyn); s1 = fpu_get_scratch(dyn); @@ -492,46 +492,46 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FLW(s1, wback, fixedaddress+4); FCVTDS(s0, s0); FCVTDS(s1, s1); - FSD(s0, gback, 0); - FSD(s1, gback, 8); + FSD(s0, gback, gdoffset+0); + FSD(s1, gback, gdoffset+8); break; case 0x5B: INST_NAME("CVTDQ2PS Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); s0 = fpu_get_scratch(dyn); for (int i=0; i<4; ++i) { LW(x3, wback, fixedaddress+i*4); FCVTSW(s0, x3, RD_RNE); - FSW(s0, gback, i*4); + FSW(s0, gback, gdoffset+i*4); } break; case 0x5C: INST_NAME("SUBPS Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); s0 = fpu_get_scratch(dyn); s1 = fpu_get_scratch(dyn); for(int i=0; i<4; ++i) { // GX->f[i] -= EX->f[i]; FLW(s0, wback, fixedaddress+i*4); - FLW(s1, gback, i*4); + FLW(s1, gback, gdoffset+i*4); FSUBS(s1, s1, s0); - FSW(s1, gback, i*4); + FSW(s1, gback, gdoffset+i*4); } break; case 0x5D: INST_NAME("MINPS Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); s0 = fpu_get_scratch(dyn); s1 = fpu_get_scratch(dyn); for(int i=0; i<4; ++i) { FLW(s0, wback, fixedaddress+i*4); - FLW(s1, gback, i*4); + FLW(s1, gback, gdoffset+i*4); if(!box64_dynarec_fastnan) { FEQS(x3, s0, s0); FEQS(x4, s1, s1); @@ -539,38 +539,38 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni BEQZ(x3, 12); FLTS(x3, s0, s1); BEQZ(x3, 8); - FSW(s0, gback, i*4); + FSW(s0, gback, gdoffset+i*4); } else { FMINS(s1, s1, s0); - FSW(s1, gback, i*4); + FSW(s1, gback, gdoffset+i*4); } } break; case 0x5E: INST_NAME("DIVPS Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); s0 = fpu_get_scratch(dyn); s1 = fpu_get_scratch(dyn); for(int i=0; i<4; ++i) { // GX->f[i] /= EX->f[i]; FLW(s0, wback, fixedaddress+i*4); - FLW(s1, gback, i*4); + FLW(s1, gback, gdoffset+i*4); FDIVS(s1, s1, s0); - FSW(s1, gback, i*4); + FSW(s1, gback, gdoffset+i*4); } break; case 0x5F: INST_NAME("MAXPS Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); s0 = fpu_get_scratch(dyn); s1 = fpu_get_scratch(dyn); for(int i=0; i<4; ++i) { FLW(s0, wback, fixedaddress+i*4); - FLW(s1, gback, i*4); + FLW(s1, gback, gdoffset+i*4); if(!box64_dynarec_fastnan) { FEQS(x3, s0, s0); FEQS(x4, s1, s1); @@ -578,80 +578,80 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni BEQZ(x3, 12); FLTS(x3, s1, s0); BEQZ(x3, 8); - FSW(s0, gback, i*4); + FSW(s0, gback, gdoffset+i*4); } else { FMAXS(s1, s1, s0); - FSW(s1, gback, i*4); + FSW(s1, gback, gdoffset+i*4); } } break; case 0x60: INST_NAME("PUNPCKLBW Gm,Em"); nextop = F8; - GETGM(x1); + GETGM(); for(int i=3; i>0; --i) { // 0 is untouched // GX->ub[2 * i] = GX->ub[i]; - LBU(x3, gback, i); - SB(x3, gback, 2*i); + LBU(x3, gback, gdoffset+i); + SB(x3, gback, gdoffset+2*i); } if (MODREG && gd==(nextop&7)) { for(int i=0; i<4; ++i) { // GX->ub[2 * i + 1] = GX->ub[2 * i]; - LBU(x3, gback, 2*i); - SB(x3, gback, 2*i+1); + LBU(x3, gback, gdoffset+2*i); + SB(x3, gback, gdoffset+2*i+1); } } else { GETEM(x2, 0); for(int i=0; i<4; ++i) { // GX->ub[2 * i + 1] = EX->ub[i]; LBU(x3, wback, fixedaddress+i); - SB(x3, gback, 2*i+1); + SB(x3, gback, gdoffset+2*i+1); } } break; case 0x61: INST_NAME("PUNPCKLWD Gm, Em"); nextop = F8; - GETGM(x1); + GETGM(); GETEM(x2, 0); // GM->uw[3] = EM->uw[1]; LHU(x3, wback, fixedaddress+2*1); - SH(x3, gback, 2*3); + SH(x3, gback, gdoffset+2*3); // GM->uw[2] = GM->uw[1]; - LHU(x3, gback, 2*1); - SH(x3, gback, 2*2); + LHU(x3, gback, gdoffset+2*1); + SH(x3, gback, gdoffset+2*2); // GM->uw[1] = EM->uw[0]; LHU(x3, wback, fixedaddress+2*0); - SH(x3, gback, 2*1); + SH(x3, gback, gdoffset+2*1); break; case 0x62: INST_NAME("PUNPCKLDQ Gm, Em"); nextop = F8; - GETGM(x1); + GETGM(); GETEM(x2, 0); // GM->ud[1] = EM->ud[0]; LWU(x3, wback, fixedaddress); - SW(x3, gback, 4*1); + SW(x3, gback, gdoffset+4*1); break; case 0x67: INST_NAME("PACKUSWB Gm, Em"); nextop = F8; - GETGM(x2); + GETGM(); ADDI(x5, xZR, 0xFF); for(int i=0; i<4; ++i) { // GX->ub[i] = (GX->sw[i]<0)?0:((GX->sw[i]>0xff)?0xff:GX->sw[i]); - LH(x3, gback, i*2); + LH(x3, gback, gdoffset+i*2); BGE(x5, x3, 8); ADDI(x3, xZR, 0xFF); NOT(x4, x3); SRAI(x4, x4, 63); AND(x3, x3, x4); - SB(x3, gback, i); + SB(x3, gback, gdoffset+i); } if (MODREG && gd==(nextop&7)) { // GM->ud[1] = GM->ud[0]; - LW(x3, gback, 0*4); - SW(x3, gback, 1*4); + LW(x3, gback, gdoffset+0*4); + SW(x3, gback, gdoffset+1*4); } else { GETEM(x1, 0); for(int i=0; i<4; ++i) { @@ -662,55 +662,55 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni NOT(x4, x3); SRAI(x4, x4, 63); AND(x3, x3, x4); - SB(x3, gback, 4+i); + SB(x3, gback, gdoffset+4+i); } } break; case 0x68: INST_NAME("PUNPCKHBW Gm,Em"); nextop = F8; - GETGM(x1); + GETGM(); for(int i=0; i<4; ++i) { // GX->ub[2 * i] = GX->ub[i + 4]; - LBU(x3, gback, i+4); - SB(x3, gback, 2*i); + LBU(x3, gback, gdoffset+i+4); + SB(x3, gback, gdoffset+2*i); } if (MODREG && gd==(nextop&7)) { for(int i=0; i<4; ++i) { // GX->ub[2 * i + 1] = GX->ub[2 * i]; - LBU(x3, gback, 2*i); - SB(x3, gback, 2*i+1); + LBU(x3, gback, gdoffset+2*i); + SB(x3, gback, gdoffset+2*i+1); } } else { GETEM(x2, 0); for(int i=0; i<4; ++i) { // GX->ub[2 * i + 1] = EX->ub[i + 4]; LBU(x3, wback, fixedaddress+i+4); - SB(x3, gback, 2*i+1); + SB(x3, gback, gdoffset+2*i+1); } } break; case 0x69: INST_NAME("PUNPCKHWD Gm,Em"); nextop = F8; - GETGM(x2); + GETGM(); for(int i=0; i<2; ++i) { // GX->uw[2 * i] = GX->uw[i + 2]; - LHU(x3, gback, (i+2)*2); - SH(x3, gback, 2*i*2); + LHU(x3, gback, gdoffset+(i+2)*2); + SH(x3, gback, gdoffset+2*i*2); } if (MODREG && gd==(nextop&7)) { for(int i=0; i<2; ++i) { // GX->uw[2 * i + 1] = GX->uw[2 * i]; - LHU(x3, gback, 2*i*2); - SH(x3, gback, (2*i+1)*2); + LHU(x3, gback, gdoffset+2*i*2); + SH(x3, gback, gdoffset+(2*i+1)*2); } } else { GETEM(x1, 0); for(int i=0; i<2; ++i) { // GX->uw[2 * i + 1] = EX->uw[i + 2]; LHU(x3, wback, fixedaddress+(i+2)*2); - SH(x3, gback, (2*i+1)*2); + SH(x3, gback, gdoffset+(2*i+1)*2); } } break; @@ -718,40 +718,40 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("PUNPCKHDQ Gm,Em"); nextop = F8; GETEM(x1, 0); - GETGM(x2); + GETGM(); // GM->ud[0] = GM->ud[1]; - LWU(x3, gback, 1*4); - SW(x3, gback, 0*4); + LWU(x3, gback, gdoffset+1*4); + SW(x3, gback, gdoffset+0*4); if (!(MODREG && (gd==ed))) { // GM->ud[1] = EM->ud[1]; LWU(x3, wback, fixedaddress+1*4); - SW(x3, gback, 1*4); + SW(x3, gback, gdoffset+1*4); } break; case 0x6E: INST_NAME("MOVD Gm, Ed"); nextop = F8; - GETGM(x1); + GETGM(); if(MODREG) { ed = xRAX + (nextop&7) + (rex.b<<3); } else { addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 1, 0); if(rex.w) { - LD(x4, ed, fixedaddress); + LD(x4, ed, fixedaddress); } else { LW(x4, ed, fixedaddress); } ed = x4; } - if(rex.w) SD(ed, gback, 0); else SW(ed, gback, 0); + if(rex.w) SD(ed, gback, gdoffset+0); else SW(ed, gback, gdoffset+0); break; case 0x6F: INST_NAME("MOVQ Gm, Em"); nextop = F8; - GETGM(x1); + GETGM(); GETEM(x2, 0); LD(x3, wback, fixedaddress); - SD(x3, gback, 0); + SD(x3, gback, gdoffset+0); break; case 0x71: nextop = F8; @@ -762,7 +762,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni u8 = F8; if (u8>15) { // just zero dest - SD(xZR, x1, fixedaddress); + SD(xZR, wback, fixedaddress); } else if(u8) { for (int i=0; i<4; ++i) { // EX->uw[i] >>= u8; @@ -792,7 +792,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni u8 = F8; if (u8>15) { // just zero dest - SD(xZR, x1, fixedaddress+0); + SD(xZR, wback, fixedaddress+0); } else if(u8) { for (int i=0; i<4; ++i) { // EX->uw[i] <<= u8; @@ -810,7 +810,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x75: INST_NAME("PCMPEQW Gm,Em"); nextop = F8; - GETGM(x1); + GETGM(); GETEM(x2, 0); MMX_LOOP_W(x3, x4, SUB(x3, x3, x4); SEQZ(x3, x3); NEG(x3, x3)); break; @@ -824,9 +824,9 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x7F: INST_NAME("MOVQ Em, Gm"); nextop = F8; - GETGM(x1); + GETGM(); GETEM(x2, 0); - LD(x3, gback, 0); + LD(x3, gback, gdoffset+0); SD(x3, wback, fixedaddress); break; #define GO(GETFLAGS, NO, YES, F) \ @@ -888,7 +888,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GOCOND(0x90, "SET", "Eb"); #undef GO - + case 0xA2: INST_NAME("CPUID"); NOTEST(x1); @@ -1361,7 +1361,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni BEQZ(x3, 4+2*4); ADDI(u8, u8, 4); MV(x2, x3); - ANDI(x2, x2, 0b1111); + ANDI(x2, x2, 0b1111); TABLE64(x3, (uintptr_t)&lead0tab); ADD(x3, x3, x2); LBU(x2, x3, 0); @@ -1410,13 +1410,13 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0xC2: INST_NAME("CMPPS Gx, Ex, Ib"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 1); u8 = F8; d0 = fpu_get_scratch(dyn); d1 = fpu_get_scratch(dyn); for(int i=0; i<4; ++i) { - FLW(d0, gback, i*4); + FLW(d0, gback, gdoffset+i*4); FLW(d1, wback, fixedaddress+i*4); if ((u8&7) == 0) { // Equal FEQS(x3, d0, d1); @@ -1447,7 +1447,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } case 7: break; // Not NaN } - + // MARK2; if ((u8&7) == 5 || (u8&7) == 6) { MOV32w(x3, 1); @@ -1455,7 +1455,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni // MARK; } NEG(x3, x3); - SW(x3, gback, i*4); + SW(x3, gback, gdoffset+i*4); } break; case 0xC3: @@ -1472,24 +1472,24 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0xC6: // TODO: Optimize this! INST_NAME("SHUFPS Gx, Ex, Ib"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 1); u8 = F8; int32_t idx; idx = (u8>>(0*2))&3; - LWU(x3, gback, idx*4); + LWU(x3, gback, gdoffset+idx*4); idx = (u8>>(1*2))&3; - LWU(x4, gback, idx*4); + LWU(x4, gback, gdoffset+idx*4); idx = (u8>>(2*2))&3; LWU(x5, wback, fixedaddress+idx*4); idx = (u8>>(3*2))&3; LWU(x6, wback, fixedaddress+idx*4); - SW(x3, gback, 0*4); - SW(x4, gback, 1*4); - SW(x5, gback, 2*4); - SW(x6, gback, 3*4); + SW(x3, gback, gdoffset+0*4); + SW(x4, gback, gdoffset+1*4); + SW(x5, gback, gdoffset+2*4); + SW(x6, gback, gdoffset+3*4); break; case 0xC8: @@ -1547,63 +1547,63 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0xE5: INST_NAME("PMULHW Gm,Em"); nextop = F8; - GETGM(x1); + GETGM(); GETEM(x2, 0); for(int i=0; i<4; ++i) { - LH(x3, gback, 2*i); + LH(x3, gback, gdoffset+2*i); LH(x4, wback, fixedaddress+2*i); MULW(x3, x3, x4); SRAIW(x3, x3, 16); - SH(x3, gback, 2*i); + SH(x3, gback, gdoffset+2*i); } break; case 0xED: INST_NAME("PADDSW Gm,Em"); nextop = F8; - GETGM(x1); + GETGM(); GETEM(x2, 0); for(int i=0; i<4; ++i) { // tmp32s = (int32_t)GX->sw[i] + EX->sw[i]; // GX->sw[i] = (tmp32s>32767)?32767:((tmp32s<-32768)?-32768:tmp32s); - LH(x3, gback, 2*i); + LH(x3, gback, gdoffset+2*i); LH(x4, wback, fixedaddress+2*i); ADDW(x3, x3, x4); LUI(x4, 0xFFFF8); // -32768 BGE(x3, x4, 12); - SH(x4, gback, 2*i); + SH(x4, gback, gdoffset+2*i); J(20); // continue LUI(x4, 8); // 32768 BLT(x3, x4, 8); ADDIW(x3, x4, -1); - SH(x3, gback, 2*i); + SH(x3, gback, gdoffset+2*i); } break; case 0xEF: INST_NAME("PXOR Gm,Em"); nextop = F8; - GETGM(x1); + GETGM(); if(MODREG && gd==(nextop&7)) { // just zero dest - SD(xZR, gback, 0); + SD(xZR, gback, gdoffset+0); } else { GETEM(x2, 0); - LD(x3, gback, 0); + LD(x3, gback, gdoffset+0); LD(x4, wback, fixedaddress); XOR(x3, x3, x4); - SD(x3, gback, 0); + SD(x3, gback, gdoffset+0); } break; case 0xF9: INST_NAME("PSUBW Gm, Em"); nextop = F8; - GETGM(x1); + GETGM(); GETEM(x2, 0); MMX_LOOP_W(x3, x4, SUBW(x3, x3, x4)); break; case 0xFD: INST_NAME("PADDW Gm, Em"); nextop = F8; - GETGM(x1); + GETGM(); GETEM(x2, 0); MMX_LOOP_W(x3, x4, ADDW(x3, x3, x4)); break; diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index b3e097eb..2b36300a 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -37,7 +37,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int int v0, v1; int q0, q1; int d0, d1; - int64_t fixedaddress; + int64_t fixedaddress, gdoffset; int unscaled; MAYUSE(d0); @@ -49,27 +49,27 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int MAYUSE(j64); static const int8_t round_round[] = { RD_RNE, RD_RDN, RD_RUP, RD_RTZ }; - + switch(opcode) { case 0x10: INST_NAME("MOVUPD Gx,Ex"); nextop = F8; GETEX(x1, 0); - GETGX(x2); + GETGX(); SSE_LOOP_MV_Q(x3); break; case 0x11: INST_NAME("MOVUPD Ex,Gx"); nextop = F8; GETEX(x1, 0); - GETGX(x2); + GETGX(); SSE_LOOP_MV_Q2(x3); if(!MODREG) SMWRITE2(); break; case 0x12: INST_NAME("MOVLPD Gx, Eq"); nextop = F8; - GETGX(x1); + GETGX(); if(MODREG) { // access register instead of memory is bad opcode! DEFAULT; @@ -78,47 +78,47 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SMREAD(); addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0); LD(x3, wback, fixedaddress); - SD(x3, gback, 0); + SD(x3, gback, gdoffset+0); break; case 0x13: INST_NAME("MOVLPD Eq, Gx"); nextop = F8; - GETGX(x1); + GETGX(); if(MODREG) { // access register instead of memory is bad opcode! DEFAULT; return addr; } addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0); - LD(x3, gback, 0); + LD(x3, gback, gdoffset+0); SD(x3, wback, fixedaddress); SMWRITE2(); break; case 0x14: INST_NAME("UNPCKLPD Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); // GX->q[1] = EX->q[0]; LD(x3, wback, fixedaddress+0); - SD(x3, gback, 8); + SD(x3, gback, gdoffset+8); break; case 0x15: INST_NAME("UNPCKHPD Gx, Ex"); nextop = F8; GETEX(x1, 0); - GETGX(x2); + GETGX(); // GX->q[0] = GX->q[1]; - LD(x3, gback, 8); - SD(x3, gback, 0); + LD(x3, gback, gdoffset+8); + SD(x3, gback, gdoffset+0); // GX->q[1] = EX->q[1]; LD(x3, wback, fixedaddress+8); - SD(x3, gback, 8); + SD(x3, gback, gdoffset+8); break; case 0x16: INST_NAME("MOVHPD Gx, Eq"); nextop = F8; - GETGX(x1); + GETGX(); if(MODREG) { // access register instead of memory is bad opcode! DEFAULT; @@ -127,7 +127,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SMREAD(); addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0); LD(x3, wback, fixedaddress); - SD(x3, gback, 8); + SD(x3, gback, gdoffset+8); break; case 0x1F: INST_NAME("NOP (multibyte)"); @@ -138,21 +138,21 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("MOVAPD Gx,Ex"); nextop = F8; GETEX(x1, 0); - GETGX(x2); + GETGX(); SSE_LOOP_MV_Q(x3); break; case 0x29: INST_NAME("MOVAPD Ex,Gx"); nextop = F8; GETEX(x1, 0); - GETGX(x2); + GETGX(); SSE_LOOP_MV_Q2(x3); if(!MODREG) SMWRITE2(); break; case 0x2B: INST_NAME("MOVNTPD Ex, Gx"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_MV_Q2(x3); break; @@ -197,15 +197,15 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x00: INST_NAME("PSHUFB Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); sse_forget_reg(dyn, ninst, x5); ADDI(x5, xEmu, offsetof(x64emu_t, scratch)); // perserve gd - LD(x3, gback, 0); - LD(x4, gback, 8); + LD(x3, gback, gdoffset+0); + LD(x4, gback, gdoffset+8); SD(x3, x5, 0); SD(x4, x5, 8); @@ -213,29 +213,29 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LBU(x3, wback, fixedaddress+i); ANDI(x4, x3, 128); BEQZ(x4, 12); - SB(xZR, gback, i); + SB(xZR, gback, gdoffset+i); BEQZ(xZR, 20); // continue ANDI(x4, x3, 15); ADD(x4, x4, x5); LBU(x4, x4, 0); - SB(x4, gback, i); + SB(x4, gback, gdoffset+i); } break; case 0x01: INST_NAME("PHADDW Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); for (int i=0; i<4; ++i) { // GX->sw[i] = GX->sw[i*2+0]+GX->sw[i*2+1]; - LH(x3, gback, 2*(i*2+0)); - LH(x4, gback, 2*(i*2+1)); + LH(x3, gback, gdoffset+2*(i*2+0)); + LH(x4, gback, gdoffset+2*(i*2+1)); ADDW(x3, x3, x4); - SH(x3, gback, 2*i); + SH(x3, gback, gdoffset+2*i); } if (MODREG && gd==(nextop&7)+(rex.b<<3)) { // GX->q[1] = GX->q[0]; - LD(x3, gback, 0); - SD(x3, gback, 8); + LD(x3, gback, gdoffset+0); + SD(x3, gback, gdoffset+8); } else { GETEX(x2, 0); for (int i=0; i<4; ++i) { @@ -243,55 +243,55 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LH(x3, wback, fixedaddress+2*(i*2+0)); LH(x4, wback, fixedaddress+2*(i*2+1)); ADDW(x3, x3, x4); - SH(x3, gback, 2*(4+i)); + SH(x3, gback, gdoffset+2*(4+i)); } } break; case 0x02: INST_NAME("PHADDD Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); // GX->sd[0] += GX->sd[1]; - LW(x3, gback, 0*4); - LW(x4, gback, 1*4); + LW(x3, gback, gdoffset+0*4); + LW(x4, gback, gdoffset+1*4); ADDW(x3, x3, x4); - SW(x3, gback, 0*4); + SW(x3, gback, gdoffset+0*4); // GX->sd[1] = GX->sd[2] + GX->sd[3]; - LW(x3, gback, 2*4); - LW(x4, gback, 3*4); + LW(x3, gback, gdoffset+2*4); + LW(x4, gback, gdoffset+3*4); ADDW(x3, x3, x4); - SW(x3, gback, 1*4); + SW(x3, gback, gdoffset+1*4); if (MODREG && gd==(nextop&7)+(rex.b<<3)) { // GX->q[1] = GX->q[0]; - LD(x3, gback, 0); - SD(x3, gback, 8); + LD(x3, gback, gdoffset+0); + SD(x3, gback, gdoffset+8); } else { GETEX(x2, 0); // GX->sd[2] = EX->sd[0] + EX->sd[1]; LW(x3, wback, fixedaddress+0*4); LW(x4, wback, fixedaddress+1*4); ADDW(x3, x3, x4); - SW(x3, gback, 2*4); + SW(x3, gback, gdoffset+2*4); // GX->sd[3] = EX->sd[2] + EX->sd[3]; LW(x3, wback, fixedaddress+2*4); LW(x4, wback, fixedaddress+3*4); ADDW(x3, x3, x4); - SW(x3, gback, 3*4); + SW(x3, gback, gdoffset+3*4); } break; case 0x04: INST_NAME("PADDUBSW Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); MOV64x(x5, 32767); MOV64x(x6, -32768); for(int i=0; i<8; ++i) { - LBU(x3, gback, i*2); + LBU(x3, gback, gdoffset+i*2); LB(x4, wback, fixedaddress+i*2); MUL(x9, x3, x4); - LBU(x3, gback, i*2+1); + LBU(x3, gback, gdoffset+i*2+1); LB(x4, wback, fixedaddress+i*2+1); MUL(x3, x3, x4); ADD(x3, x3, x9); @@ -304,68 +304,68 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int BLT(x6, x3, 4+4); MV(x3, x6); } - SH(x3, gback, i*2); + SH(x3, gback, gdoffset+i*2); } break; case 0x08: INST_NAME("PSIGNB Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<16; ++i) { - LB(x3, gback, i); + LB(x3, gback, gdoffset+i); LB(x4, wback, fixedaddress+i); BGE(x4, xZR, 4+4); NEG(x3, x3); BNE(x4, xZR, 4+4); MOV_U12(x3, 0); - SB(x3, gback, i); + SB(x3, gback, gdoffset+i); } break; case 0x09: INST_NAME("PSIGNW Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<8; ++i) { - LH(x3, gback, i*2); + LH(x3, gback, gdoffset+i*2); LH(x4, wback, fixedaddress+i*2); BGE(x4, xZR, 4+4); NEG(x3, x3); BNE(x4, xZR, 4+4); MOV_U12(x3, 0); - SH(x3, gback, i*2); + SH(x3, gback, gdoffset+i*2); } break; case 0x0A: INST_NAME("PSIGND Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<4; ++i) { - LW(x3, gback, i*4); + LW(x3, gback, gdoffset+i*4); LW(x4, wback, fixedaddress+i*4); BGE(x4, xZR, 4+4); NEG(x3, x3); BNE(x4, xZR, 4+4); ADDI(x3, xZR, 0); - SW(x3, gback, i*4); + SW(x3, gback, gdoffset+i*4); } break; case 0x0B: INST_NAME("PMULHRSW Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<8; ++i) { - LH(x3, gback, i*2); + LH(x3, gback, gdoffset+i*2); LH(x4, wback, fixedaddress+i*2); MUL(x3, x3, x4); SRAI(x3, x3, 14); ADDI(x3, x3, 1); SRAI(x3, x3, 1); - SH(x3, gback, i*2); + SH(x3, gback, gdoffset+i*2); } break; @@ -373,7 +373,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("PTEST Gx, Ex"); nextop = F8; SETFLAGS(X_ALL, SF_SET); - GETGX(x1); + GETGX(); GETEX(x2, 0); CLEAR_FLAGS(); SET_DFNONE(); @@ -382,8 +382,8 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LD(x6, wback, fixedaddress+8); IFX(X_ZF) { - LD(x3, gback, 0); - LD(x4, gback, 8); + LD(x3, gback, gdoffset+0); + LD(x4, gback, gdoffset+8); AND(x3, x3, x5); AND(x4, x4, x6); OR(x3, x3, x4); @@ -391,9 +391,9 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ORI(xFlags, xFlags, 1<<F_ZF); } IFX(X_CF) { - LD(x3, gback, 0); + LD(x3, gback, gdoffset+0); NOT(x3, x3); - LD(x4, gback, 8); + LD(x4, gback, gdoffset+8); NOT(x4, x4); AND(x3, x3, x5); AND(x4, x4, x6); @@ -407,49 +407,49 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x1C: INST_NAME("PABSB Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<16; ++i) { LB(x4, wback, fixedaddress+i); BGE(x4, xZR, 4+4); NEG(x4, x4); - SB(x4, gback, i); + SB(x4, gback, gdoffset+i); } break; case 0x1D: INST_NAME("PABSW Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<8; ++i) { LH(x4, wback, fixedaddress+i*2); BGE(x4, xZR, 4+4); NEG(x4, x4); - SH(x4, gback, i*2); + SH(x4, gback, gdoffset+i*2); } break; case 0x1E: INST_NAME("PABSD Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); MOV64x(x5, ~(1<<31)); for(int i=0; i<4; ++i) { LW(x4, wback, fixedaddress+i*4); BGE(x4, xZR, 4+4); NEG(x4, x4); - SW(x4, gback, i*4); + SW(x4, gback, gdoffset+i*4); } break; case 0x2B: INST_NAME("PACKUSDW Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); MOV64x(x5, 65535); for(int i=0; i<4; ++i) { - LW(x3, gback, i*4); + LW(x3, gback, gdoffset+i*4); if(rv64_zbb) { MIN(x3, x3, x5); MAX(x3, x3, xZR); @@ -459,11 +459,11 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int BLT(x3, x5, 4+4); MV(x3, x5); } - SH(x3, gback, i*2); + SH(x3, gback, gdoffset+i*2); } if(MODREG && gd==ed) { - LD(x3, gback, 0); - SD(x3, gback, 8); + LD(x3, gback, gdoffset+0); + SD(x3, gback, gdoffset+8); } else for(int i=0; i<4; ++i) { LW(x3, wback, fixedaddress+i*4); if(rv64_zbb) { @@ -475,177 +475,177 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int BLT(x3, x5, 4+4); MV(x3, x5); } - SH(x3, gback, 8+i*2); + SH(x3, gback, gdoffset+8+i*2); } break; case 0x30: INST_NAME("PMOVZXBW Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=7; i>=0; --i) { LBU(x3, wback, fixedaddress+i); - SH(x3, gback, i*2); + SH(x3, gback, gdoffset+i*2); } break; case 0x31: INST_NAME("PMOVZXBD Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=3; i>=0; --i) { LBU(x3, wback, fixedaddress+i); - SW(x3, gback, i*4); + SW(x3, gback, gdoffset+i*4); } break; case 0x32: INST_NAME("PMOVZXBQ Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=1; i>=0; --i) { LBU(x3, wback, fixedaddress+i); - SD(x3, gback, i*8); + SD(x3, gback, gdoffset+i*8); } break; case 0x33: INST_NAME("PMOVZXWD Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=3; i>=0; --i) { LHU(x3, wback, fixedaddress+i*2); - SW(x3, gback, i*4); + SW(x3, gback, gdoffset+i*4); } break; case 0x34: INST_NAME("PMOVZXWQ Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=1; i>=0; --i) { LHU(x3, wback, fixedaddress+i*2); - SD(x3, gback, i*8); + SD(x3, gback, gdoffset+i*8); } break; case 0x35: INST_NAME("PMOVZXDQ Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=1; i>=0; --i) { LWU(x3, wback, fixedaddress+i*4); - SD(x3, gback, i*8); + SD(x3, gback, gdoffset+i*8); } break; case 0x38: INST_NAME("PMINSB Gx, Ex"); // SSE4 opcode! nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<16; ++i) { - LB(x3, gback, i); + LB(x3, gback, gdoffset+i); LB(x4, wback, fixedaddress+i); if(rv64_zbb) MIN(x4, x3, x4); else BLT(x3, x4, 4+4); - SB(x4, gback, i); + SB(x4, gback, gdoffset+i); } break; case 0x39: INST_NAME("PMINSD Gx, Ex"); // SSE4 opcode! nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<4; ++i) { - LW(x3, gback, i*4); + LW(x3, gback, gdoffset+i*4); LW(x4, wback, fixedaddress+i*4); if(rv64_zbb) MIN(x4, x3, x4); else BLT(x3, x4, 4+4); - SW(x4, gback, i*4); + SW(x4, gback, gdoffset+i*4); } break; case 0x3A: INST_NAME("PMINUW Gx, Ex"); // SSE4 opcode! nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<8; ++i) { - LHU(x3, gback, i*2); + LHU(x3, gback, gdoffset+i*2); LHU(x4, wback, fixedaddress+i*2); if(rv64_zbb) MINU(x4, x3, x4); else BLTU(x3, x4, 4+4); - SH(x4, gback, i*2); + SH(x4, gback, gdoffset+i*2); } break; case 0x3B: INST_NAME("PMINUD Gx, Ex"); // SSE4 opcode! nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<4; ++i) { - LWU(x3, gback, i*4); + LWU(x3, gback, gdoffset+i*4); LWU(x4, wback, fixedaddress+i*4); if(rv64_zbb) MINU(x4, x3, x4); else BLTU(x3, x4, 4+4); - SW(x4, gback, i*4); + SW(x4, gback, gdoffset+i*4); } break; case 0x3C: INST_NAME("PMAXSB Gx, Ex"); // SSE4 opcode! nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<16; ++i) { - LB(x3, gback, i); + LB(x3, gback, gdoffset+i); LB(x4, wback, fixedaddress+i); if(rv64_zbb) MAX(x4, x3, x4); else BLT(x4, x3, 4+4); - SB(x4, gback, i); + SB(x4, gback, gdoffset+i); } break; case 0x3D: INST_NAME("PMAXSD Gx, Ex"); // SSE4 opcode! nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<4; ++i) { - LW(x3, gback, i*4); + LW(x3, gback, gdoffset+i*4); LW(x4, wback, fixedaddress+i*4); if(rv64_zbb) MAX(x4, x3, x4); else BLT(x4, x3, 4+4); - SW(x4, gback, i*4); + SW(x4, gback, gdoffset+i*4); } break; case 0x3E: INST_NAME("PMAXUW Gx, Ex"); // SSE4 opcode! nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<8; ++i) { - LHU(x3, gback, i*2); + LHU(x3, gback, gdoffset+i*2); LHU(x4, wback, fixedaddress+i*2); if(rv64_zbb) MAXU(x4, x3, x4); else BLTU(x4, x3, 4+4); - SH(x4, gback, i*2); + SH(x4, gback, gdoffset+i*2); } break; case 0x3F: INST_NAME("PMAXUD Gx, Ex"); // SSE4 opcode! nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<4; ++i) { - LWU(x3, gback, i*4); + LWU(x3, gback, gdoffset+i*4); LWU(x4, wback, fixedaddress+i*4); if(rv64_zbb) MAXU(x4, x3, x4); else BLTU(x4, x3, 4+4); - SW(x4, gback, i*4); + SW(x4, gback, gdoffset+i*4); } break; case 0x40: INST_NAME("PMULLD Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<4; ++i) { - LW(x3, gback, i*4); + LW(x3, gback, gdoffset+i*4); LW(x4, wback, fixedaddress+i*4); MUL(x3, x3, x4); - SW(x3, gback, i*4); + SW(x3, gback, gdoffset+i*4); } break; @@ -656,7 +656,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int sse_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); CALL(native_aese, -1); - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); break; @@ -667,7 +667,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int sse_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); CALL(native_aeselast, -1); - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); break; @@ -678,7 +678,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int sse_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); CALL(native_aesd, -1); - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); break; @@ -690,7 +690,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int sse_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); CALL(native_aesdlast, -1); - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); break; @@ -735,7 +735,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x09: INST_NAME("ROUNDPD Gx, Ex, Ib"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 1); u8 = F8; d0 = fpu_get_scratch(dyn); @@ -764,7 +764,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int FCVTDL(d0, x5, RD_RTZ); } MARK; - FSD(d0, gback, 0); + FSD(d0, gback, gdoffset+0); // i = 1 FLD(d0, wback, fixedaddress+8); @@ -786,12 +786,12 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int FCVTDL(d0, x5, RD_RTZ); } MARK2; - FSD(d0, gback, 8); + FSD(d0, gback, gdoffset+8); break; case 0x0E: INST_NAME("PBLENDW Gx, Ex, Ib"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 1); u8 = F8; i32 = 0; @@ -802,20 +802,20 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int if(!(i32&3) && (u8&0xf)==0xf) { // whole 64bits LD(x3, wback, fixedaddress+8*(i32>>2)); - SD(x3, gback, 8*(i32>>2)); + SD(x3, gback, gdoffset+8*(i32>>2)); i32+=4; u8>>=4; } else { // 32bits LWU(x3, wback, fixedaddress+4*(i32>>1)); - SW(x3, gback, 4*(i32>>1)); + SW(x3, gback, gdoffset+4*(i32>>1)); i32+=2; u8>>=2; } } else { // 16 bits LHU(x3, wback, fixedaddress+2*i32); - SH(x3, gback, 2*i32); + SH(x3, gback, gdoffset+2*i32); i32++; u8>>=1; } @@ -828,44 +828,44 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x0F: INST_NAME("PALIGNR Gx, Ex, Ib"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 1); u8 = F8; sse_forget_reg(dyn, ninst, x5); ADDI(x5, xEmu, offsetof(x64emu_t, scratch)); // perserve gd - LD(x3, gback, 0); - LD(x4, gback, 8); + LD(x3, gback, gdoffset+0); + LD(x4, gback, gdoffset+8); SD(x3, x5, 0); SD(x4, x5, 8); if(u8>31) { - SD(xZR, gback, 0); - SD(xZR, gback, 8); + SD(xZR, gback, gdoffset+0); + SD(xZR, gback, gdoffset+8); } else { for (int i=0; i<16; ++i, ++u8) { if (u8>15) { if(u8>31) { - SB(xZR, gback, i); + SB(xZR, gback, gdoffset+i); continue; } else LBU(x3, x5, u8-16); } else { LBU(x3, wback, fixedaddress+u8); } - SB(x3, gback, i); + SB(x3, gback, gdoffset+i); } } break; case 0x16: if(rex.w) {INST_NAME("PEXTRQ Ed, Gx, Ib");} else {INST_NAME("PEXTRD Ed, Gx, Ib");} nextop = F8; - GETGX(x1); + GETGX(); GETED(1); u8 = F8; if(rex.w) - LD(ed, gback, 8*(u8&1)); + LD(ed, gback, gdoffset+8*(u8&1)); else - LWU(ed, gback, 4*(u8&3)); + LWU(ed, gback, gdoffset+4*(u8&3)); if (wback) { SDxw(ed, wback, fixedaddress); SMWRITE2(); @@ -874,37 +874,37 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x20: INST_NAME("PINSRB Gx, ED, Ib"); nextop = F8; - GETGX(x3); + GETGX(); GETED(1); u8 = F8; - SB(ed, x3, u8&0xF); + SB(ed, gback, gdoffset+u8&0xF); break; case 0x21: INST_NAME("INSERTPS GX, EX, Ib"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 1); u8 = F8; if(MODREG) s8 = (u8>>6)&3; else s8 = 0; // GX->ud[(tmp8u>>4)&3] = EX->ud[tmp8s]; LWU(x3, wback, fixedaddress+4*s8); - SW(x3, gback, 4*(u8>>4)); + SW(x3, gback, gdoffset+4*(u8>>4)); for(int i=0; i<4; ++i) { if(u8&(1<<i)) // GX->ud[i] = 0; - SW(xZR, gback, 4*i); + SW(xZR, gback, gdoffset+4*i); } break; case 0x22: INST_NAME("PINSRD Gx, ED, Ib"); nextop = F8; - GETGX(x1); + GETGX(); GETED(1); u8 = F8; if(rex.w) { - SD(ed, gback, 8*(u8&0x1)); + SD(ed, gback, gdoffset+8*(u8&0x1)); } else { - SW(ed, gback, 4*(u8&0x3)); + SW(ed, gback, gdoffset+4*(u8&0x3)); } break; case 0x44: @@ -914,7 +914,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int sse_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); // gx if(MODREG) { - ed = (nextop&7)+(rex.b<<3); + ed = (nextop&7)+(rex.b<<3); sse_forget_reg(dyn, ninst, ed); MOV32w(x2, ed); MOV32w(x3, 0); // p = NULL @@ -936,7 +936,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int sse_forget_reg(dyn, ninst, gd); MOV32w(x1, gd); // gx if(MODREG) { - ed = (nextop&7)+(rex.b<<3); + ed = (nextop&7)+(rex.b<<3); sse_forget_reg(dyn, ninst, ed); MOV32w(x2, ed); MOV32w(x3, 0); //p = NULL @@ -990,11 +990,11 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int if (i) SLLI(x2, x2, 1); OR(gd, gd, x2); } - break; + break; case 0x51: INST_NAME("SQRTPD Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); d0 = fpu_get_scratch(dyn); if(!box64_dynarec_fastnan) { @@ -1011,42 +1011,42 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int BEQ(x3, xZR, 8); FNEGD(d0, d0); } - FSD(d0, gback, i*8); + FSD(d0, gback, gdoffset+i*8); } break; case 0x54: INST_NAME("ANDPD Gx, Ex"); nextop = F8; GETEX(x1, 0); - GETGX(x2); + GETGX(); SSE_LOOP_Q(x3, x4, AND(x3, x3, x4)); break; case 0x55: INST_NAME("ANDNPD Gx, Ex"); nextop = F8; GETEX(x1, 0); - GETGX(x2); + GETGX(); SSE_LOOP_Q(x3, x4, NOT(x3, x3); AND(x3, x3, x4)); break; case 0x56: INST_NAME("ORPD Gx, Ex"); nextop = F8; GETEX(x1, 0); - GETGX(x2); + GETGX(); SSE_LOOP_Q(x3, x4, OR(x3, x3, x4)); break; case 0x57: INST_NAME("XORPD Gx, Ex"); nextop = F8; GETEX(x1, 0); - GETGX(x2); + GETGX(); SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); break; case 0x58: INST_NAME("ADDPD Gx, Ex"); nextop = F8; GETEX(x1, 0); - GETGX(x2); + GETGX(); SSE_LOOP_FQ(x3, x4, { if(!box64_dynarec_fastnan) { FEQD(x3, v0, v0); @@ -1066,7 +1066,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("MULPD Gx, Ex"); nextop = F8; GETEX(x1, 0); - GETGX(x2); + GETGX(); SSE_LOOP_FQ(x3, x4, { if(!box64_dynarec_fastnan) { FEQD(x3, v0, v0); @@ -1085,24 +1085,24 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x5A: INST_NAME("CVTPD2PS Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); d0 = fpu_get_scratch(dyn); // GX->f[0] = EX->d[0]; FLD(d0, wback, fixedaddress+0); FCVTSD(d0, d0); - FSD(d0, gback, 0); + FSD(d0, gback, gdoffset+0); // GX->f[1] = EX->d[1]; FLD(d0, wback, fixedaddress+8); FCVTSD(d0, d0); - FSD(d0, gback, 4); + FSD(d0, gback, gdoffset+4); // GX->q[1] = 0; - SD(xZR, gback, 8); + SD(xZR, gback, gdoffset+8); break; case 0x5B: INST_NAME("CVTPS2DQ Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); d0 = fpu_get_scratch(dyn); u8 = sse_setround(dyn, ninst, x6, x4); @@ -1113,7 +1113,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SUB(x5, x5, x3); BEQZ(x5, 8); LUI(x3, 0x80000); // INT32_MIN - SW(x3, gback, 4*i); + SW(x3, gback, gdoffset+4*i); } x87_restoreround(dyn, ninst, u8); break; @@ -1121,7 +1121,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("SUBPD Gx, Ex"); nextop = F8; GETEX(x1, 0); - GETGX(x2); + GETGX(); SSE_LOOP_FQ(x3, x4, { if(!box64_dynarec_fastnan) { FEQD(x3, v0, v0); @@ -1140,12 +1140,12 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x5D: INST_NAME("MINPD Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); d0 = fpu_get_scratch(dyn); d1 = fpu_get_scratch(dyn); for (int i=0; i<2; ++i) { - FLD(d0, gback, 8*i); + FLD(d0, gback, gdoffset+8*i); FLD(d1, wback, fixedaddress+8*i); FEQD(x3, d0, d0); FEQD(x4, d1, d1); @@ -1153,14 +1153,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int BEQ(x3, xZR, 12); FLTD(x3, d1, d0); BEQ(x3, xZR, 8); // continue - FSD(d1, gback, 8*i); + FSD(d1, gback, gdoffset+8*i); } break; case 0x5E: INST_NAME("DIVPD Gx, Ex"); nextop = F8; GETEX(x1, 0); - GETGX(x2); + GETGX(); SSE_LOOP_FQ(x3, x4, { if(!box64_dynarec_fastnan) { FEQD(x3, v0, v0); @@ -1179,12 +1179,12 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x5F: INST_NAME("MAXPD Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); d0 = fpu_get_scratch(dyn); d1 = fpu_get_scratch(dyn); for (int i=0; i<2; ++i) { - FLD(d0, gback, 8*i); + FLD(d0, gback, gdoffset+8*i); FLD(d1, wback, fixedaddress+8*i); FEQD(x3, d0, d0); FEQD(x4, d1, d1); @@ -1192,54 +1192,54 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int BEQ(x3, xZR, 12); FLTD(x3, d0, d1); BEQ(x3, xZR, 8); // continue - FSD(d1, gback, 8*i); + FSD(d1, gback, gdoffset+8*i); } break; case 0x60: INST_NAME("PUNPCKLBW Gx,Ex"); nextop = F8; - GETGX(x2); + GETGX(); for(int i=7; i>0; --i) { // 0 is untouched // GX->ub[2 * i] = GX->ub[i]; - LBU(x3, gback, i); - SB(x3, gback, 2*i); + LBU(x3, gback, gdoffset+i); + SB(x3, gback, gdoffset+2*i); } if (MODREG && gd==(nextop&7)+(rex.b<<3)) { for(int i=0; i<8; ++i) { // GX->ub[2 * i + 1] = GX->ub[2 * i]; - LBU(x3, gback, 2*i); - SB(x3, gback, 2*i+1); + LBU(x3, gback, gdoffset+2*i); + SB(x3, gback, gdoffset+2*i+1); } } else { GETEX(x1, 0); for(int i=0; i<8; ++i) { // GX->ub[2 * i + 1] = EX->ub[i]; LBU(x3, wback, fixedaddress+i); - SB(x3, gback, 2*i+1); + SB(x3, gback, gdoffset+2*i+1); } } break; case 0x61: INST_NAME("PUNPCKLWD Gx,Ex"); nextop = F8; - GETGX(x2); + GETGX(); for(int i=3; i>0; --i) { // GX->uw[2 * i] = GX->uw[i]; - LHU(x3, gback, i*2); - SH(x3, gback, 2*i*2); + LHU(x3, gback, gdoffset+i*2); + SH(x3, gback, gdoffset+2*i*2); } if (MODREG && gd==(nextop&7)+(rex.b<<3)) { for(int i=0; i<4; ++i) { // GX->uw[2 * i + 1] = GX->uw[2 * i]; - LHU(x3, gback, 2*i*2); - SH(x3, gback, (2*i+1)*2); + LHU(x3, gback, gdoffset+2*i*2); + SH(x3, gback, gdoffset+(2*i+1)*2); } } else { GETEX(x1, 0); for(int i=0; i<4; ++i) { // GX->uw[2 * i + 1] = EX->uw[i]; LHU(x3, wback, fixedaddress+i*2); - SH(x3, gback, (2*i+1)*2); + SH(x3, gback, gdoffset+(2*i+1)*2); } } break; @@ -1247,26 +1247,26 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("PUNPCKLDQ Gx,Ex"); nextop = F8; GETEX(x1, 0); - GETGX(x2); + GETGX(); // GX->ud[3] = EX->ud[1]; LWU(x3, wback, fixedaddress+1*4); - SW(x3, gback, 3*4); + SW(x3, gback, gdoffset+3*4); // GX->ud[2] = GX->ud[1]; - LWU(x3, gback, 1*4); - SW(x3, gback, 2*4); + LWU(x3, gback, gdoffset+1*4); + SW(x3, gback, gdoffset+2*4); // GX->ud[1] = EX->ud[0]; LWU(x3, wback, fixedaddress+0*4); - SW(x3, gback, 1*4); + SW(x3, gback, gdoffset+1*4); break; case 0x63: INST_NAME("PACKSSWB Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); MOV64x(x5, 127); MOV64x(x6, -128); for(int i=0; i<8; ++i) { - LH(x3, gback, i*2); + LH(x3, gback, gdoffset+i*2); if(rv64_zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); @@ -1276,11 +1276,11 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int BGE(x3, x6, 4+4); MV(x3, x6); } - SB(x3, gback, i); + SB(x3, gback, gdoffset+i); } if(MODREG && gd==ed) { - LD(x3, gback, 0); - SD(x3, gback, 8); + LD(x3, gback, gdoffset+0); + SD(x3, gback, gdoffset+8); } else for(int i=0; i<8; ++i) { LH(x3, wback, fixedaddress+i*2); if(rv64_zbb) { @@ -1292,63 +1292,63 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int BGE(x3, x6, 4+4); MV(x3, x6); } - SB(x3, gback, 8+i); + SB(x3, gback, gdoffset+8+i); } break; case 0x64: INST_NAME("PCMPGTB Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<16; ++i) { // GX->ub[i] = (GX->sb[i]>EX->sb[i])?0xFF:0x00; LB(x3, wback, fixedaddress+i); - LB(x4, gback, i); + LB(x4, gback, gdoffset+i); SLT(x3, x3, x4); NEG(x3, x3); - SB(x3, gback, i); + SB(x3, gback, gdoffset+i); } break; case 0x65: INST_NAME("PCMPGTW Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<8; ++i) { // GX->uw[i] = (GX->sw[i]>EX->sw[i])?0xFFFF:0x0000; LH(x3, wback, fixedaddress+i*2); - LH(x4, gback, i*2); + LH(x4, gback, gdoffset+i*2); SLT(x3, x3, x4); NEG(x3, x3); - SH(x3, gback, i*2); + SH(x3, gback, gdoffset+i*2); } break; case 0x66: INST_NAME("PCMPGTD Gx,Ex"); nextop = F8; GETEX(x1, 0); - GETGX(x2); + GETGX(); SSE_LOOP_DS(x3, x4, SLT(x4, x4, x3); SLLI(x3, x4, 63); SRAI(x3, x3, 63)); break; case 0x67: INST_NAME("PACKUSWB Gx, Ex"); nextop = F8; - GETGX(x2); + GETGX(); ADDI(x5, xZR, 0xFF); for(int i=0; i<8; ++i) { // GX->ub[i] = (GX->sw[i]<0)?0:((GX->sw[i]>0xff)?0xff:GX->sw[i]); - LH(x3, gback, i*2); + LH(x3, gback, gdoffset+i*2); BGE(x5, x3, 8); ADDI(x3, xZR, 0xFF); NOT(x4, x3); SRAI(x4, x4, 63); AND(x3, x3, x4); - SB(x3, gback, i); + SB(x3, gback, gdoffset+i); } if (MODREG && gd==(nextop&7)+(rex.b<<3)) { // GX->q[1] = GX->q[0]; - LD(x3, gback, 0*8); - SD(x3, gback, 1*8); + LD(x3, gback, gdoffset+0*8); + SD(x3, gback, gdoffset+1*8); } else { GETEX(x1, 0); for(int i=0; i<8; ++i) { @@ -1359,55 +1359,55 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int NOT(x4, x3); SRAI(x4, x4, 63); AND(x3, x3, x4); - SB(x3, gback, 8+i); + SB(x3, gback, gdoffset+8+i); } } break; case 0x68: INST_NAME("PUNPCKHBW Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); for(int i=0; i<8; ++i) { // GX->ub[2 * i] = GX->ub[i + 8]; - LBU(x3, gback, i+8); - SB(x3, gback, 2*i); + LBU(x3, gback, gdoffset+i+8); + SB(x3, gback, gdoffset+2*i); } if (MODREG && gd==(nextop&7)+(rex.b<<3)) { for(int i=0; i<8; ++i) { // GX->ub[2 * i + 1] = GX->ub[2 * i]; - LBU(x3, gback, 2*i); - SB(x3, gback, 2*i+1); + LBU(x3, gback, gdoffset+2*i); + SB(x3, gback, gdoffset+2*i+1); } } else { GETEX(x2, 0); for(int i=0; i<8; ++i) { // GX->ub[2 * i + 1] = EX->ub[i + 8]; LBU(x3, wback, fixedaddress+i+8); - SB(x3, gback, 2*i+1); + SB(x3, gback, gdoffset+2*i+1); } } break; case 0x69: INST_NAME("PUNPCKHWD Gx,Ex"); nextop = F8; - GETGX(x2); + GETGX(); for(int i=0; i<4; ++i) { // GX->uw[2 * i] = GX->uw[i + 4]; - LHU(x3, gback, (i+4)*2); - SH(x3, gback, 2*i*2); + LHU(x3, gback, gdoffset+(i+4)*2); + SH(x3, gback, gdoffset+2*i*2); } if (MODREG && gd==(nextop&7)+(rex.b<<3)) { for(int i=0; i<4; ++i) { // GX->uw[2 * i + 1] = GX->uw[2 * i]; - LHU(x3, gback, 2*i*2); - SH(x3, gback, (2*i+1)*2); + LHU(x3, gback, gdoffset+2*i*2); + SH(x3, gback, gdoffset+(2*i+1)*2); } } else { GETEX(x1, 0); for(int i=0; i<4; ++i) { // GX->uw[2 * i + 1] = EX->uw[i + 4]; LHU(x3, wback, fixedaddress+(i+4)*2); - SH(x3, gback, (2*i+1)*2); + SH(x3, gback, gdoffset+(2*i+1)*2); } } break; @@ -1415,41 +1415,41 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("PUNPCKHDQ Gx,Ex"); nextop = F8; GETEX(x1, 0); - GETGX(x2); + GETGX(); // GX->ud[0] = GX->ud[2]; - LWU(x3, gback, 2*4); - SW(x3, gback, 0*4); + LWU(x3, gback, gdoffset+2*4); + SW(x3, gback, gdoffset+0*4); // GX->ud[1] = EX->ud[2]; LWU(x3, wback, fixedaddress+2*4); - SW(x3, gback, 1*4); + SW(x3, gback, gdoffset+1*4); // GX->ud[2] = GX->ud[3]; - LWU(x3, gback, 3*4); - SW(x3, gback, 2*4); + LWU(x3, gback, gdoffset+3*4); + SW(x3, gback, gdoffset+2*4); // GX->ud[3] = EX->ud[3]; if (!(MODREG && (gd==ed))) { LWU(x3, wback, fixedaddress+3*4); - SW(x3, gback, 3*4); + SW(x3, gback, gdoffset+3*4); } break; case 0x6B: INST_NAME("PACKSSDW Gx,Ex"); nextop = F8; - GETGX(x2); + GETGX(); MOV64x(x5, 32768); NEG(x6, x5); for(int i=0; i<4; ++i) { // GX->sw[i] = (GX->sd[i]<-32768)?-32768:((GX->sd[i]>32767)?32767:GX->sd[i]); - LW(x3, gback, i*4); + LW(x3, gback, gdoffset+i*4); BGE(x5, x3, 8); ADDI(x3, x5, -1); BGE(x3, x6, 8); MV(x3, x6); - SH(x3, gback, i*2); + SH(x3, gback, gdoffset+i*2); } if (MODREG && gd==(nextop&7)+(rex.b<<3)) { // GX->q[1] = GX->q[0]; - LD(x3, gback, 0*8); - SD(x3, gback, 1*8); + LD(x3, gback, gdoffset+0*8); + SD(x3, gback, gdoffset+1*8); } else { GETEX(x1, 0); for(int i=0; i<4; ++i) { @@ -1459,32 +1459,32 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ADDI(x3, x5, -1); BGE(x3, x6, 8); MV(x3, x6); - SH(x3, gback, (4+i)*2); + SH(x3, gback, gdoffset+(4+i)*2); } } break; case 0x6C: INST_NAME("PUNPCKLQDQ Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); if(MODREG) { v1 = sse_get_reg(dyn, ninst, x2, (nextop&7)+(rex.b<<3), 0); - FSD(v1, gback, 8); + FSD(v1, gback, gdoffset+8); } else { addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); LD(x3, ed, fixedaddress+0); - SD(x3, gback, 8); + SD(x3, gback, gdoffset+8); } break; case 0x6D: INST_NAME("PUNPCKHQDQ Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); - LD(x3, gback, 8); - SD(x3, gback, 0); + LD(x3, gback, gdoffset+8); + SD(x3, gback, gdoffset+0); LD(x3, wback, fixedaddress+8); - SD(x3, gback, 8); + SD(x3, gback, gdoffset+8); break; case 0x6E: INST_NAME("MOVD Gx, Ed"); @@ -1506,14 +1506,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x6F: INST_NAME("MOVDQA Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_MV_Q(x3); break; case 0x70: // TODO: Optimize this! INST_NAME("PSHUFD Gx,Ex,Ib"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 1); u8 = F8; int32_t idx; @@ -1527,10 +1527,10 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int idx = (u8>>(3*2))&3; LWU(x6, wback, fixedaddress+idx*4); - SW(x3, gback, 0*4); - SW(x4, gback, 1*4); - SW(x5, gback, 2*4); - SW(x6, gback, 3*4); + SW(x3, gback, gdoffset+0*4); + SW(x4, gback, gdoffset+1*4); + SW(x5, gback, gdoffset+2*4); + SW(x6, gback, gdoffset+3*4); break; case 0x71: nextop = F8; @@ -1541,8 +1541,8 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int u8 = F8; if (u8>15) { // just zero dest - SD(xZR, x1, fixedaddress+0); - SD(xZR, x1, fixedaddress+8); + SD(xZR, wback, fixedaddress+0); + SD(xZR, wback, fixedaddress+8); } else if(u8) { for (int i=0; i<8; ++i) { // EX->uw[i] >>= u8; @@ -1572,8 +1572,8 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int u8 = F8; if (u8>15) { // just zero dest - SD(xZR, x1, fixedaddress+0); - SD(xZR, x1, fixedaddress+8); + SD(xZR, wback, fixedaddress+0); + SD(xZR, wback, fixedaddress+8); } else if(u8) { for (int i=0; i<8; ++i) { // EX->uw[i] <<= u8; @@ -1598,8 +1598,8 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int if(u8) { if (u8>31) { // just zero dest - SD(xZR, x1, fixedaddress+0); - SD(xZR, x1, fixedaddress+8); + SD(xZR, wback, fixedaddress+0); + SD(xZR, wback, fixedaddress+8); } else if(u8) { SSE_LOOP_D_S(x3, SRLI(x3, x3, u8)); } @@ -1621,8 +1621,8 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int if(u8) { if (u8>31) { // just zero dest - SD(xZR, x1, fixedaddress+0); - SD(xZR, x1, fixedaddress+8); + SD(xZR, wback, fixedaddress+0); + SD(xZR, wback, fixedaddress+8); } else if(u8) { SSE_LOOP_D_S(x3, SLLI(x3, x3, u8)); } @@ -1734,39 +1734,39 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x74: INST_NAME("PCMPEQB Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for (int i=0; i<16; ++i) { - LBU(x3, gback, i); + LBU(x3, gback, gdoffset+i); LBU(x4, wback, fixedaddress+i); SUB(x3, x3, x4); SEQZ(x3, x3); NEG(x3, x3); - SB(x3, gback, i); + SB(x3, gback, gdoffset+i); } break; case 0x75: INST_NAME("PCMPEQW Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_W(x3, x4, SUB(x3, x3, x4); SEQZ(x3, x3); NEG(x3, x3)); break; case 0x76: INST_NAME("PCMPEQD Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_D(x3, x4, XOR(x3, x3, x4); SNEZ(x3, x3); ADDI(x3, x3, -1)); break; case 0x7C: INST_NAME("HADDPD Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); d0 = fpu_get_scratch(dyn); d1 = fpu_get_scratch(dyn); - FLD(d0, gback, 0); - FLD(d1, gback, 8); + FLD(d0, gback, gdoffset+0); + FLD(d1, gback, gdoffset+8); if(!box64_dynarec_fastnan) { FEQD(x3, d0, d0); FEQD(x4, d1, d1); @@ -1779,9 +1779,9 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int BNEZ(x4, 8); FNEGD(d0, d0); } - FSD(d0, gback, 0); + FSD(d0, gback, gdoffset+0); if(MODREG && gd==(nextop&7)+(rex.b<<3)) { - FSD(d0, gback, 8); + FSD(d0, gback, gdoffset+8); } else { GETEX(x2, 0); FLD(d0, wback, fixedaddress+0); @@ -1798,30 +1798,30 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int BNEZ(x4, 8); FNEGD(d0, d0); } - FSD(d0, gback, 8); + FSD(d0, gback, gdoffset+8); } break; case 0x7E: INST_NAME("MOVD Ed,Gx"); nextop = F8; - GETGX(x1); + GETGX(); if(rex.w) { if(MODREG) { ed = xRAX + (nextop&7) + (rex.b<<3); - LD(ed, x1, 0); + LD(ed, gback, gdoffset+0); } else { addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); - LD(x3, x1, 0); + LD(x3, gback, gdoffset+0); SD(x3, ed, fixedaddress); SMWRITE2(); } } else { if(MODREG) { ed = xRAX + (nextop&7) + (rex.b<<3); - LWU(ed, x1, 0); + LWU(ed, gback, gdoffset+0); } else { addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); - LWU(x3, x1, 0); + LWU(x3, gback, gdoffset+0); SW(x3, ed, fixedaddress); SMWRITE2(); } @@ -1830,7 +1830,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x7F: INST_NAME("MOVDQA Ex,Gx"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_MV_Q2(x3); if(!MODREG) SMWRITE2(); @@ -1878,13 +1878,13 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0xC2: INST_NAME("CMPPD Gx, Ex, Ib"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 1); u8 = F8; d0 = fpu_get_scratch(dyn); d1 = fpu_get_scratch(dyn); for(int i=0; i<2; ++i) { - FLD(d0, gback, 8*i); + FLD(d0, gback, gdoffset+8*i); FLD(d1, wback, fixedaddress+8*i); if ((u8&7) == 0) { // Equal FEQD(x3, d0, d1); @@ -1915,7 +1915,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } case 7: break; // Not NaN } - + // MARK2; if ((u8&7) == 5 || (u8&7) == 6) { MOV32w(x3, 1); @@ -1923,16 +1923,16 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int // MARK; } NEG(x3, x3); - SD(x3, gback, 8*i); + SD(x3, gback, gdoffset+8*i); } break; case 0xC4: INST_NAME("PINSRW Gx,Ed,Ib"); nextop = F8; GETED(1); - GETGX(x3); + GETGX(); u8 = (F8)&7; - SH(ed, gback, u8*2); + SH(ed, gback, gdoffset+u8*2); break; case 0xC5: INST_NAME("PEXTRW Gd,Ex,Ib"); @@ -1945,90 +1945,90 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0xC6: INST_NAME("SHUFPD Gx, Ex, Ib"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 1); u8 = F8; if (MODREG && gd==(nextop&7)+(rex.b<<3) && u8==0) { - LD(x3, gback, 0); - SD(x3, gback, 8); + LD(x3, gback, gdoffset+0); + SD(x3, gback, gdoffset+8); break; } - LD(x3, gback, 8*(u8&1)); + LD(x3, gback, gdoffset+8*(u8&1)); LD(x4, wback, fixedaddress+8*((u8>>1)&1)); - SD(x3, gback, 0); - SD(x4, gback, 8); + SD(x3, gback, gdoffset+0); + SD(x4, gback, gdoffset+8); break; case 0xD1: INST_NAME("PSRLW Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); LD(x3, wback, fixedaddress); ADDI(x4, xZR, 16); BLTU_MARK(x3, x4); - SD(xZR, gback, 0); - SD(xZR, gback, 8); + SD(xZR, gback, gdoffset+0); + SD(xZR, gback, gdoffset+8); B_NEXT_nocond; MARK; for (int i=0; i<8; ++i) { - LHU(x5, gback, 2*i); + LHU(x5, gback, gdoffset+2*i); SRLW(x5, x5, x3); - SH(x5, gback, 2*i); + SH(x5, gback, gdoffset+2*i); } break; case 0xD2: INST_NAME("PSRLD Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); LD(x3, wback, fixedaddress); ADDI(x4, xZR, 32); BLTU_MARK(x3, x4); - SD(xZR, gback, 0); - SD(xZR, gback, 8); + SD(xZR, gback, gdoffset+0); + SD(xZR, gback, gdoffset+8); B_NEXT_nocond; MARK; for (int i=0; i<4; ++i) { - LWU(x5, gback, 4*i); + LWU(x5, gback, gdoffset+4*i); SRLW(x5, x5, x3); - SW(x5, gback, 4*i); + SW(x5, gback, gdoffset+4*i); } break; case 0xD3: INST_NAME("PSRLQ Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); LD(x3, wback, fixedaddress); ADDI(x4, xZR, 64); BLTU_MARK(x3, x4); - SD(xZR, gback, 0); - SD(xZR, gback, 8); + SD(xZR, gback, gdoffset+0); + SD(xZR, gback, gdoffset+8); B_NEXT_nocond; MARK; for (int i=0; i<2; ++i) { - LD(x5, gback, 8*i); + LD(x5, gback, gdoffset+8*i); SRL(x5, x5, x3); - SD(x5, gback, 8*i); + SD(x5, gback, gdoffset+8*i); } break; case 0xD4: INST_NAME("PADDQ Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_Q(x3, x4, ADD(x3, x3, x4)); break; case 0xD5: INST_NAME("PMULLW Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<8; ++i) { - LH(x3, gback, 2*i); + LH(x3, gback, gdoffset+2*i); LH(x4, wback, fixedaddress+2*i); MULW(x3, x3, x4); - SH(x3, gback, 2*i); + SH(x3, gback, gdoffset+2*i); } break; case 0xD6: @@ -2059,185 +2059,185 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0xD8: INST_NAME("PSUBUSB Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<16; ++i) { - LBU(x3, gback, i); + LBU(x3, gback, gdoffset+i); LBU(x4, wback, fixedaddress+i); SUB(x3, x3, x4); NOT(x4, x3); SRAI(x4, x4, 63); AND(x3, x3, x4); - SB(x3, gback, i); + SB(x3, gback, gdoffset+i); } break; case 0xD9: INST_NAME("PSUBUSW Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_W(x3, x4, SUB(x3, x3, x4); NOT(x4, x3); SRAI(x4, x4, 63); AND(x3, x3, x4)); break; case 0xDA: INST_NAME("PMINUB Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for (int i=0; i<16; ++i) { - LBU(x3, gback, i); + LBU(x3, gback, gdoffset+i); LBU(x4, wback, fixedaddress+i); BLTU(x3, x4, 8); MV(x3, x4); - SB(x3, gback, i); + SB(x3, gback, gdoffset+i); } break; case 0xDB: INST_NAME("PAND Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_Q(x3, x4, AND(x3, x3, x4)); break; case 0xDC: INST_NAME("PADDUSB Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); ADDI(x5, xZR, 0xFF); for(int i=0; i<16; ++i) { - LBU(x3, gback, i); + LBU(x3, gback, gdoffset+i); LBU(x4, wback, fixedaddress+i); ADD(x3, x3, x4); BLT(x3, x5, 8); ADDI(x3, xZR, 0xFF); - SB(x3, gback, i); + SB(x3, gback, gdoffset+i); } break; case 0xDD: INST_NAME("PADDUSW Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<8; ++i) { // tmp32s = (int32_t)GX->uw[i] + EX->uw[i]; // GX->uw[i] = (tmp32s>65535)?65535:tmp32s; - LHU(x3, gback, i*2); + LHU(x3, gback, gdoffset+i*2); LHU(x4, wback, fixedaddress+i*2); ADDW(x3, x3, x4); MOV32w(x4, 65536); BLT(x3, x4, 8); ADDIW(x3, x4, -1); - SH(x3, gback, i*2); + SH(x3, gback, gdoffset+i*2); } break; case 0xDE: INST_NAME("PMAXUB Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for (int i=0; i<16; ++i) { - LBU(x3, gback, i); + LBU(x3, gback, gdoffset+i); LBU(x4, wback, fixedaddress+i); BLTU(x4, x3, 8); MV(x3, x4); - SB(x3, gback, i); + SB(x3, gback, gdoffset+i); } break; case 0xDF: INST_NAME("PANDN Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_Q(x3, x4, NOT(x3, x3); AND(x3, x3, x4)); break; case 0xE0: INST_NAME("PAVGB Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for (int i=0; i<16; ++i) { - LBU(x3, gback, i); + LBU(x3, gback, gdoffset+i); LBU(x4, wback, fixedaddress+i); ADDW(x3, x3, x4); ADDIW(x3, x3, 1); SRAIW(x3, x3, 1); - SB(x3, gback, i); + SB(x3, gback, gdoffset+i); } break; case 0xE1: INST_NAME("PSRAW Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); ADDI(x4, xZR, 16); LD(x3, wback, fixedaddress); BLTU(x3, x4, 8); SUBI(x3, x4, 1); for (int i=0; i<8; ++i) { - LH(x4, gback, 2*i); + LH(x4, gback, gdoffset+2*i); SRAW(x4, x4, x3); - SH(x4, gback, 2*i); + SH(x4, gback, gdoffset+2*i); } break; case 0xE2: INST_NAME("PSRAD Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); ADDI(x4, xZR, 32); LD(x3, wback, fixedaddress); BLTU(x3, x4, 8); SUBI(x3, x4, 1); for (int i=0; i<4; ++i) { - LW(x4, gback, 4*i); + LW(x4, gback, gdoffset+4*i); SRAW(x4, x4, x3); - SW(x4, gback, 4*i); + SW(x4, gback, gdoffset+4*i); } break; case 0xE3: INST_NAME("PAVGW Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for (int i=0; i<8; ++i) { - LHU(x3, gback, 2*i); + LHU(x3, gback, gdoffset+2*i); LHU(x4, wback, fixedaddress+2*i); ADDW(x3, x3, x4); ADDIW(x3, x3, 1); SRAIW(x3, x3, 1); - SH(x3, gback, 2*i); + SH(x3, gback, gdoffset+2*i); } break; case 0xE4: INST_NAME("PMULHUW Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<8; ++i) { - LHU(x3, gback, 2*i); + LHU(x3, gback, gdoffset+2*i); LHU(x4, wback, fixedaddress+2*i); MULW(x3, x3, x4); SRLIW(x3, x3, 16); - SH(x3, gback, 2*i); + SH(x3, gback, gdoffset+2*i); } break; case 0xE5: INST_NAME("PMULHW Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<8; ++i) { - LH(x3, gback, 2*i); + LH(x3, gback, gdoffset+2*i); LH(x4, wback, fixedaddress+2*i); MULW(x3, x3, x4); SRAIW(x3, x3, 16); - SH(x3, gback, 2*i); + SH(x3, gback, gdoffset+2*i); } break; case 0xE6: INST_NAME("CVTTPD2DQ Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); v0 = fpu_get_scratch(dyn); v1 = fpu_get_scratch(dyn); @@ -2263,143 +2263,143 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int MOV32w(x4, 0x80000000); MARK2; } - SW(x3, gback, 0); - SW(x4, gback, 4); - SD(xZR, gback, 8); + SW(x3, gback, gdoffset+0); + SW(x4, gback, gdoffset+4); + SD(xZR, gback, gdoffset+8); break; case 0xE7: INST_NAME("MOVNTDQ Ex, Gx"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_MV_Q2(x3); break; case 0xE8: INST_NAME("PSUBSB Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<16; ++i) { // tmp16s = (int16_t)GX->sb[i] - EX->sb[i]; // GX->sb[i] = (tmp16s<-128)?-128:((tmp16s>127)?127:tmp16s); - LB(x3, gback, i); + LB(x3, gback, gdoffset+i); LB(x4, wback, fixedaddress+i); SUBW(x3, x3, x4); SLLIW(x3, x3, 16); SRAIW(x3, x3, 16); ADDI(x4, xZR, 0x7f); BLT(x3, x4, 12); // tmp16s>127? - SB(x4, gback, i); + SB(x4, gback, gdoffset+i); J(24); // continue ADDI(x4, xZR, 0xf80); BLT(x4, x3, 12); // tmp16s<-128? - SB(x4, gback, i); + SB(x4, gback, gdoffset+i); J(8); // continue - SB(x3, gback, i); + SB(x3, gback, gdoffset+i); } break; case 0xE9: INST_NAME("PSUBSW Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<8; ++i) { // tmp32s = (int32_t)GX->sw[i] - EX->sw[i]; // GX->sw[i] = (tmp32s>32767)?32767:((tmp32s<-32768)?-32768:tmp32s); - LH(x3, gback, 2*i); + LH(x3, gback, gdoffset+2*i); LH(x4, wback, fixedaddress+2*i); SUBW(x3, x3, x4); LUI(x4, 0xFFFF8); // -32768 BGE(x3, x4, 12); - SH(x4, gback, 2*i); + SH(x4, gback, gdoffset+2*i); J(20); // continue LUI(x4, 8); // 32768 BLT(x3, x4, 8); ADDIW(x3, x4, -1); - SH(x3, gback, 2*i); + SH(x3, gback, gdoffset+2*i); } break; case 0xEA: INST_NAME("PMINSW Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for (int i=0; i<8; ++i) { - LH(x3, gback, 2*i); + LH(x3, gback, gdoffset+2*i); LH(x4, wback, fixedaddress+2*i); BLT(x3, x4, 8); MV(x3, x4); - SH(x3, gback, 2*i); + SH(x3, gback, gdoffset+2*i); } break; case 0xEB: INST_NAME("POR Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_Q(x3, x4, OR(x3, x3, x4)); break; case 0xEC: INST_NAME("PADDSB Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<16; ++i) { // tmp16s = (int16_t)GX->sb[i] + EX->sb[i]; // GX->sb[i] = (tmp16s>127)?127:((tmp16s<-128)?-128:tmp16s); - LB(x3, gback, i); + LB(x3, gback, gdoffset+i); LB(x4, wback, fixedaddress+i); ADDW(x3, x3, x4); SLLIW(x3, x3, 16); SRAIW(x3, x3, 16); ADDI(x4, xZR, 0x7f); BLT(x3, x4, 12); // tmp16s>127? - SB(x4, gback, i); + SB(x4, gback, gdoffset+i); J(24); // continue ADDI(x4, xZR, 0xf80); BLT(x4, x3, 12); // tmp16s<-128? - SB(x4, gback, i); + SB(x4, gback, gdoffset+i); J(8); // continue - SB(x3, gback, i); + SB(x3, gback, gdoffset+i); } break; case 0xED: INST_NAME("PADDSW Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<8; ++i) { // tmp32s = (int32_t)GX->sw[i] + EX->sw[i]; // GX->sw[i] = (tmp32s>32767)?32767:((tmp32s<-32768)?-32768:tmp32s); - LH(x3, gback, 2*i); + LH(x3, gback, gdoffset+2*i); LH(x4, wback, fixedaddress+2*i); ADDW(x3, x3, x4); LUI(x4, 0xFFFF8); // -32768 BGE(x3, x4, 12); - SH(x4, gback, 2*i); + SH(x4, gback, gdoffset+2*i); J(20); // continue LUI(x4, 8); // 32768 BLT(x3, x4, 8); ADDIW(x3, x4, -1); - SH(x3, gback, 2*i); + SH(x3, gback, gdoffset+2*i); } break; case 0xEE: INST_NAME("PMAXSW Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_WS(x3, x4, BGE(x3, x4, 8); MV(x3, x4)); break; case 0xEF: INST_NAME("PXOR Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); if(MODREG && gd==(nextop&7)+(rex.b<<3)) { // just zero dest - SD(xZR, gback, 0); - SD(xZR, gback, 8); + SD(xZR, gback, gdoffset+0); + SD(xZR, gback, gdoffset+8); } else { GETEX(x2, 0); SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4)); @@ -2408,102 +2408,102 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0xF1: INST_NAME("PSLLQ Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); ADDI(x4, xZR, 16); LD(x3, wback, fixedaddress+0); BLTU_MARK(x3, x4); // just zero dest - SD(xZR, gback, 0); - SD(xZR, gback, 8); + SD(xZR, gback, gdoffset+0); + SD(xZR, gback, gdoffset+8); B_NEXT_nocond; MARK; for (int i=0; i<8; ++i) { - LHU(x4, gback, 2*i); + LHU(x4, gback, gdoffset+2*i); SLLW(x4, x4, x3); - SH(x4, gback, 2*i); + SH(x4, gback, gdoffset+2*i); } break; case 0xF2: INST_NAME("PSLLQ Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); ADDI(x4, xZR, 32); LD(x3, wback, fixedaddress+0); BLTU_MARK(x3, x4); // just zero dest - SD(xZR, gback, 0); - SD(xZR, gback, 8); + SD(xZR, gback, gdoffset+0); + SD(xZR, gback, gdoffset+8); B_NEXT_nocond; MARK; for (int i=0; i<4; ++i) { - LWU(x4, gback, 4*i); + LWU(x4, gback, gdoffset+4*i); SLLW(x4, x4, x3); - SW(x4, gback, 4*i); + SW(x4, gback, gdoffset+4*i); } break; case 0xF3: INST_NAME("PSLLQ Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); ADDI(x4, xZR, 64); LD(x3, wback, fixedaddress+0); BLTU_MARK(x3, x4); // just zero dest - SD(xZR, gback, 0); - SD(xZR, gback, 8); + SD(xZR, gback, gdoffset+0); + SD(xZR, gback, gdoffset+8); B_NEXT_nocond; MARK; for (int i=0; i<2; ++i) { - LD(x4, gback, 8*i); + LD(x4, gback, gdoffset+8*i); SLL(x4, x4, x3); - SD(x4, gback, 8*i); + SD(x4, gback, gdoffset+8*i); } break; case 0xF4: INST_NAME("PMULUDQ Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); // GX->q[1] = (uint64_t)EX->ud[2]*GX->ud[2]; - LWU(x3, gback, 2*4); + LWU(x3, gback, gdoffset+2*4); LWU(x4, wback, fixedaddress+2*4); MUL(x3, x3, x4); - SD(x3, gback, 8); + SD(x3, gback, gdoffset+8); // GX->q[0] = (uint64_t)EX->ud[0]*GX->ud[0]; - LWU(x3, gback, 0*4); + LWU(x3, gback, gdoffset+0*4); LWU(x4, wback, fixedaddress+0*4); MUL(x3, x3, x4); - SD(x3, gback, 0); + SD(x3, gback, gdoffset+0); break; case 0xF5: INST_NAME("PMADDWD Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for (int i=0; i<4; ++i) { - // GX->sd[i] = (int32_t)(GX->sw[i*2+0])*EX->sw[i*2+0] + + // GX->sd[i] = (int32_t)(GX->sw[i*2+0])*EX->sw[i*2+0] + // (int32_t)(GX->sw[i*2+1])*EX->sw[i*2+1]; - LH(x3, gback, 2*(i*2+0)); + LH(x3, gback, gdoffset+2*(i*2+0)); LH(x4, wback, fixedaddress+2*(i*2+0)); MULW(x5, x3, x4); - LH(x3, gback, 2*(i*2+1)); + LH(x3, gback, gdoffset+2*(i*2+1)); LH(x4, wback, fixedaddress+2*(i*2+1)); MULW(x6, x3, x4); ADDW(x5, x5, x6); - SW(x5, gback, 4*i); + SW(x5, gback, gdoffset+4*i); } break; case 0xF6: INST_NAME("PSADBW Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); MV(x6, xZR); for (int i=0; i<16; ++i) { - LBU(x3, gback, i); + LBU(x3, gback, gdoffset+i); LBU(x4, wback, fixedaddress+i); SUBW(x3, x3, x4); SRAIW(x5, x3, 31); @@ -2512,7 +2512,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ANDI(x3, x3, 0xff); ADDW(x6, x6, x3); if (i==7 || i == 15) { - SD(x6, gback, i+1-8); + SD(x6, gback, gdoffset+i+1-8); if (i==7) MV(x6, xZR); } } @@ -2520,61 +2520,61 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0xF8: INST_NAME("PSUBB Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<16; ++i) { // GX->sb[i] -= EX->sb[i]; LB(x3, wback, fixedaddress+i); - LB(x4, gback, i); + LB(x4, gback, gdoffset+i); SUB(x3, x4, x3); - SB(x3, gback, i); + SB(x3, gback, gdoffset+i); } break; case 0xF9: INST_NAME("PSUBW Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_W(x3, x4, SUBW(x3, x3, x4)); break; case 0xFA: INST_NAME("PSUBD Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_D(x3, x4, SUBW(x3, x3, x4)); break; case 0xFB: INST_NAME("PSUBQ Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_Q(x3, x4, SUB(x3, x3, x4)); break; case 0xFC: INST_NAME("PADDB Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); for(int i=0; i<16; ++i) { // GX->sb[i] += EX->sb[i]; - LB(x3, gback, i); + LB(x3, gback, gdoffset+i); LB(x4, wback, fixedaddress+i); ADDW(x3, x3, x4); - SB(x3, gback, i); + SB(x3, gback, gdoffset+i); } break; case 0xFD: INST_NAME("PADDW Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_W(x3, x4, ADDW(x3, x3, x4)); break; case 0xFE: INST_NAME("PADDD Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_D(x3, x4, ADDW(x3, x3, x4)); break; diff --git a/src/dynarec/rv64/dynarec_rv64_f20f.c b/src/dynarec/rv64/dynarec_rv64_f20f.c index 74937408..52716ad4 100644 --- a/src/dynarec/rv64/dynarec_rv64_f20f.c +++ b/src/dynarec/rv64/dynarec_rv64_f20f.c @@ -35,7 +35,7 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int int v0, v1; int q0; int d0, d1; - int64_t fixedaddress; + int64_t fixedaddress, gdoffset; int unscaled; MAYUSE(d0); @@ -82,11 +82,11 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x12: INST_NAME("MOVDDUP Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); LD(x3, wback, fixedaddress+0); - SD(x3, gback, 0); - SD(x3, gback, 8); + SD(x3, gback, gdoffset+0); + SD(x3, gback, gdoffset+8); break; case 0x2A: INST_NAME("CVTSI2SD Gx, Ed"); @@ -276,7 +276,7 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x70: // TODO: Optimize this! INST_NAME("PSHUFLW Gx, Ex, Ib"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 1); u8 = F8; int32_t idx; @@ -290,14 +290,14 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int idx = (u8>>(3*2))&3; LHU(x6, wback, fixedaddress+idx*2); - SH(x3, gback, 0*2); - SH(x4, gback, 1*2); - SH(x5, gback, 2*2); - SH(x6, gback, 3*2); + SH(x3, gback, gdoffset+0*2); + SH(x4, gback, gdoffset+1*2); + SH(x5, gback, gdoffset+2*2); + SH(x6, gback, gdoffset+3*2); if (!(MODREG && (gd==ed))) { LD(x3, wback, fixedaddress+8); - SD(x3, gback, 8); + SD(x3, gback, gdoffset+8); } break; case 0xC2: @@ -335,7 +335,7 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } case 7: break; // Not NaN } - + MARK2; if ((u8&7) == 5 || (u8&7) == 6) { MOV32w(x2, 1); @@ -348,7 +348,7 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0xE6: INST_NAME("CVTPD2DQ Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); d0 = fpu_get_scratch(dyn); u8 = sse_setround(dyn, ninst, x6, x4); @@ -359,10 +359,10 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SUB(x5, x5, x3); BEQZ(x5, 8); LUI(x3, 0x80000); // INT32_MIN - SW(x3, gback, 4*i); + SW(x3, gback, gdoffset+4*i); } x87_restoreround(dyn, ninst, u8); - SD(xZR, gback, 8); + SD(xZR, gback, gdoffset+8); break; default: DEFAULT; diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c index cbf4c555..0b0b0c31 100644 --- a/src/dynarec/rv64/dynarec_rv64_f30f.c +++ b/src/dynarec/rv64/dynarec_rv64_f30f.c @@ -35,7 +35,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int int v0, v1; int q0, q1; int d0, d1; - int64_t fixedaddress; + int64_t fixedaddress, gdoffset; int unscaled; int64_t j64; @@ -80,7 +80,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SMWRITE2(); } break; - + case 0x1E: INST_NAME("NOP / ENDBR32 / ENDBR64"); nextop = F8; @@ -221,14 +221,14 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x6F: INST_NAME("MOVDQU Gx,Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_MV_Q(x3); break; case 0x70: // TODO: Optimize this! INST_NAME("PSHUFHW Gx, Ex, Ib"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 1); u8 = F8; int32_t idx; @@ -242,14 +242,14 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int idx = 4+((u8>>(3*2))&3); LHU(x6, wback, fixedaddress+idx*2); - SH(x3, gback, (4+0)*2); - SH(x4, gback, (4+1)*2); - SH(x5, gback, (4+2)*2); - SH(x6, gback, (4+3)*2); + SH(x3, gback, gdoffset+(4+0)*2); + SH(x4, gback, gdoffset+(4+1)*2); + SH(x5, gback, gdoffset+(4+2)*2); + SH(x6, gback, gdoffset+(4+3)*2); if (!(MODREG && (gd==ed))) { LD(x3, wback, fixedaddress+0); - SD(x3, gback, 0); + SD(x3, gback, gdoffset+0); } break; case 0x7E: @@ -271,16 +271,16 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x7F: INST_NAME("MOVDQU Ex,Gx"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); SSE_LOOP_MV_Q2(x3); if(!MODREG) SMWRITE2(); break; - + case 0x5B: INST_NAME("CVTTPS2DQ Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); v0 = fpu_get_scratch(dyn); for(int i=0; i<4; ++i) { @@ -295,7 +295,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int BEQZ(x5, 8); MOV32w(x3, 0x80000000); } - SW(x3, gback, i*4); + SW(x3, gback, gdoffset+i*4); } break; case 0xB8: @@ -418,7 +418,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int BEQZ(x3, 4+2*4); SUBI(u8, u8, 4); MV(x2, x3); - ANDI(x2, x2, 0b1111); + ANDI(x2, x2, 0b1111); TABLE64(x3, (uintptr_t)&lead0tab); ADD(x3, x3, x2); LBU(x2, x3, 0); @@ -465,7 +465,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } case 7: break; // Not NaN } - + MARK2; if ((u8&7) == 5 || (u8&7) == 6) { MOV32w(x2, 1); @@ -479,7 +479,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0xE6: INST_NAME("CVTDQ2PD Gx, Ex"); nextop = F8; - GETGX(x1); + GETGX(); GETEX(x2, 0); q0 = fpu_get_scratch(dyn); q1 = fpu_get_scratch(dyn); @@ -487,8 +487,8 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LW(x4, wback, fixedaddress+4); FCVTDW(q0, x3, RD_RTZ); FCVTDW(q1, x4, RD_RTZ); - FSD(q0, gback, 0); - FSD(q1, gback, 8); + FSD(q0, gback, gdoffset+0); + FSD(q1, gback, gdoffset+8); break; default: diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index fefd6986..a366b09d 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -369,11 +369,11 @@ } // Will get pointer to GX in general register a, will purge SS or SD if loaded. can use gback as load address -#define GETGX(a) \ - gd = ((nextop&0x38)>>3)+(rex.r<<3); \ - sse_forget_reg(dyn, ninst, gd); \ - gback = a; \ - ADDI(a, xEmu, offsetof(x64emu_t, xmm[gd])) +#define GETGX() \ + gd = ((nextop&0x38)>>3)+(rex.r<<3); \ + sse_forget_reg(dyn, ninst, gd); \ + gback = xEmu; \ + gdoffset = offsetof(x64emu_t, xmm[gd]) // Get Ex address in general register a, will purge SS or SD if it's reg and is loaded. May use x3. Use wback as load address! #define GETEX(a, D) \ @@ -388,11 +388,11 @@ addr = geted(dyn, addr, ninst, nextop, &wback, a, x3, &fixedaddress, rex, NULL, 1, D); \ } -#define GETGM(a) \ - gd = ((nextop&0x38)>>3); \ - mmx_forget_reg(dyn, ninst, gd); \ - gback = a; \ - ADDI(a, xEmu, offsetof(x64emu_t, mmx[gd])) +#define GETGM() \ + gd = ((nextop&0x38)>>3); \ + mmx_forget_reg(dyn, ninst, gd); \ + gback = xEmu; \ + gdoffset = offsetof(x64emu_t, mmx[gd]) // Get EM, might use x3 #define GETEM(a, D) \ @@ -408,10 +408,10 @@ } #define SSE_LOOP_D_ITEM(GX1, EX1, F, i) \ - LWU(GX1, gback, i*4); \ + LWU(GX1, gback, gdoffset+i*4); \ LWU(EX1, wback, fixedaddress+i*4); \ F; \ - SW(GX1, gback, i*4); + SW(GX1, gback, gdoffset+i*4); // Loop for SSE opcode that use 32bits value and write to GX. #define SSE_LOOP_D(GX1, EX1, F) \ @@ -421,10 +421,10 @@ SSE_LOOP_D_ITEM(GX1, EX1, F, 3) #define SSE_LOOP_DS_ITEM(GX1, EX1, F, i) \ - LW(GX1, gback, i*4); \ + LW(GX1, gback, gdoffset+i*4); \ LW(EX1, wback, fixedaddress+i*4); \ F; \ - SW(GX1, gback, i*4); + SW(GX1, gback, gdoffset+i*4); // Loop for SSE opcode that use 32bits value and write to GX. #define SSE_LOOP_DS(GX1, EX1, F) \ @@ -435,26 +435,26 @@ #define MMX_LOOP_W(GX1, EX1, F) \ for (int i=0; i<4; ++i) { \ - LHU(GX1, gback, i*2); \ + LHU(GX1, gback, gdoffset+i*2); \ LHU(EX1, wback, fixedaddress+i*2); \ F; \ - SH(GX1, gback, i*2); \ + SH(GX1, gback, gdoffset+i*2); \ } #define SSE_LOOP_W(GX1, EX1, F) \ for (int i=0; i<8; ++i) { \ - LHU(GX1, gback, i*2); \ + LHU(GX1, gback, gdoffset+i*2); \ LHU(EX1, wback, fixedaddress+i*2); \ F; \ - SH(GX1, gback, i*2); \ + SH(GX1, gback, gdoffset+i*2); \ } #define SSE_LOOP_WS(GX1, EX1, F) \ for (int i=0; i<8; ++i) { \ - LH(GX1, gback, i*2); \ + LH(GX1, gback, gdoffset+i*2); \ LH(EX1, wback, fixedaddress+i*2); \ F; \ - SH(GX1, gback, i*2); \ + SH(GX1, gback, gdoffset+i*2); \ } #define SSE_LOOP_D_S_ITEM(EX1, F, i) \ @@ -470,10 +470,10 @@ SSE_LOOP_D_S_ITEM(EX1, F, 3) #define SSE_LOOP_Q_ITEM(GX1, EX1, F, i) \ - LD(GX1, gback, i*8); \ + LD(GX1, gback, gdoffset+i*8); \ LD(EX1, wback, fixedaddress+i*8); \ F; \ - SD(GX1, gback, i*8); + SD(GX1, gback, gdoffset+i*8); // Loop for SSE opcode that use 64bits value and write to GX. #define SSE_LOOP_Q(GX1, EX1, F) \ @@ -482,10 +482,10 @@ #define SSE_LOOP_FQ_ITEM(GX1, EX1, F, i) \ - FLD(v0, gback, i*8); \ + FLD(v0, gback, gdoffset+i*8); \ FLD(v1, wback, fixedaddress+i*8); \ F; \ - FSD(v0, gback, i*8); + FSD(v0, gback, gdoffset+i*8); #define SSE_LOOP_FQ(GX1, EX1, F) \ v0 = fpu_get_scratch(dyn); \ @@ -496,7 +496,7 @@ #define SSE_LOOP_MV_Q_ITEM(s, i) \ LD(s, wback, fixedaddress+i*8); \ - SD(s, gback, i*8); + SD(s, gback, gdoffset+i*8); // Loop for SSE opcode that moves 64bits value from wback to gback, use s as scratch. #define SSE_LOOP_MV_Q(s) \ @@ -504,7 +504,7 @@ SSE_LOOP_MV_Q_ITEM(s, 1) #define SSE_LOOP_MV_Q_ITEM2(s, i) \ - LD(s, gback, i*8); \ + LD(s, gback, gdoffset+i*8); \ SD(s, wback, fixedaddress+i*8); // Loop for SSE opcode that moves 64bits value from gback to wback, use s as scratch. |