about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2024-08-09 18:02:14 +0800
committerGitHub <noreply@github.com>2024-08-09 12:02:14 +0200
commit949b64ca5346a7015185e1d39a3d96eed8cfb806 (patch)
treebcdb988ba53abec46fd817710ad6b95ae4997cc7 /src
parentcef906b24dfd67e9c7672a42a2b58fe55d140203 (diff)
downloadbox64-949b64ca5346a7015185e1d39a3d96eed8cfb806.tar.gz
box64-949b64ca5346a7015185e1d39a3d96eed8cfb806.zip
[DYNAREC_RV64] Removed TODOs on GETEX and GETEM macros (#1720)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f.c234
-rw-r--r--src/dynarec/rv64/dynarec_rv64_64.c2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f.c307
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f20f.c10
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f30f.c14
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h42
6 files changed, 303 insertions, 306 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c
index f7565a7e..cfcc48d4 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f.c
@@ -164,7 +164,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("MOVUPS Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             LD(x3, wback, fixedaddress + 0);
             LD(x4, wback, fixedaddress + 8);
             SD(x3, gback, gdoffset + 0);
@@ -174,7 +174,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("MOVUPS Ex,Gx");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             LD(x3, gback, gdoffset + 0);
             LD(x4, gback, gdoffset + 8);
             SD(x3, wback, fixedaddress + 0);
@@ -187,7 +187,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             if (MODREG) {
                 INST_NAME("MOVHLPS Gx,Ex");
                 GETGX();
-                GETEX(x2, 0);
+                GETEX(x2, 0, 8);
                 LD(x3, wback, fixedaddress + 8);
                 SD(x3, gback, gdoffset + 0);
             } else {
@@ -201,7 +201,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("MOVLPS Ex,Gx");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 1);
             LD(x3, gback, gdoffset + 0);
             SD(x3, wback, fixedaddress + 0);
             if (!MODREG)
@@ -211,7 +211,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("UNPCKLPS Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 4);
             LWU(x5, gback, gdoffset + 1 * 4);
             LWU(x3, wback, fixedaddress + 0);
             LWU(x4, wback, fixedaddress + 4);
@@ -223,7 +223,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("UNPCKHPS Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 12);
             LWU(x3, wback, fixedaddress + 2 * 4);
             LWU(x4, wback, fixedaddress + 3 * 4);
             LWU(x5, gback, gdoffset + 2 * 4);
@@ -242,7 +242,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 SMREAD();
             }
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 1);
             LD(x4, wback, fixedaddress + 0);
             SD(x4, gback, gdoffset + 8);
             break;
@@ -250,7 +250,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("MOVHPS Ex,Gx");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 1);
             LD(x4, gback, gdoffset + 8);
             SD(x4, wback, fixedaddress + 0);
             if (!MODREG)
@@ -285,14 +285,14 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("MOVAPS Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             SSE_LOOP_MV_Q(x3);
             break;
         case 0x29:
             INST_NAME("MOVAPS Ex,Gx");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             SSE_LOOP_MV_Q2(x3);
             if (!MODREG)
                 SMWRITE2();
@@ -301,7 +301,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("CVTPI2PS Gx,Em");
             nextop = F8;
             GETGX();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 4);
             d0 = fpu_get_scratch(dyn);
             u8 = sse_setround(dyn, ninst, x4, x5);
             for (int i = 0; i < 2; ++i) {
@@ -315,7 +315,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("MOVNTPS Ex,Gx");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             LD(x3, gback, gdoffset + 0);
             LD(x4, gback, gdoffset + 8);
             SD(x3, wback, fixedaddress + 0);
@@ -325,7 +325,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("CVTTPS2PI Gm,Ex");
             nextop = F8;
             GETGM();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 4);
             d0 = fpu_get_scratch(dyn);
             for (int i = 0; i < 2; ++i) {
                 if (!box64_dynarec_fastround) {
@@ -347,7 +347,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("CVTPS2PI Gm, Ex");
             nextop = F8;
             GETGM();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 4);
             d0 = fpu_get_scratch(dyn);
             u8 = sse_setround(dyn, ninst, x6, x4);
             for (int i = 0; i < 2; ++i) {
@@ -431,7 +431,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     INST_NAME("PSHUFB Gm, Em");
                     nextop = F8;
                     GETGM();
-                    GETEM(x2, 0);
+                    GETEM(x2, 0, 7);
                     LD(x4, gback, gdoffset);
                     for (int i = 0; i < 8; ++i) {
                         LB(x3, wback, fixedaddress + i);
@@ -461,7 +461,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         LW(x3, gback, gdoffset + 0);
                         SW(x3, gback, gdoffset + 4);
                     } else {
-                        GETEM(x2, 0);
+                        GETEM(x2, 0, 6);
                         for (int i = 0; i < 2; ++i) {
                             // tmp32s = EX->sw[i*2+0] + EX->sw[i*2+1];
                             // GX->sw[4+i] = sat(tmp32s);
@@ -485,7 +485,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         // GM->sd[1] = GM->sd[0];
                         SW(x3, gback, gdoffset + 1 * 4);
                     } else {
-                        GETEM(x2, 0);
+                        GETEM(x2, 0, 4);
                         // GM->sd[1] = EM->sd[0] + EM->sd[1];
                         LW(x3, wback, fixedaddress + 0 * 4);
                         LW(x4, wback, fixedaddress + 1 * 4);
@@ -521,7 +521,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         LW(x3, gback, gdoffset + 0);
                         SW(x3, gback, gdoffset + 4);
                     } else {
-                        GETEM(x2, 0);
+                        GETEM(x2, 0, 6);
                         for (int i = 0; i < 2; ++i) {
                             // tmp32s = EX->sw[i*2+0] + EX->sw[i*2+1];
                             // GX->sw[4+i] = sat(tmp32s);
@@ -545,7 +545,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     INST_NAME("PMADDUBSW Gm,Em");
                     nextop = F8;
                     GETGM();
-                    GETEM(x2, 0);
+                    GETEM(x2, 0, 7);
                     MOV64x(x5, 32767);
                     MOV64x(x6, -32768);
                     for (int i = 0; i < 4; ++i) {
@@ -585,7 +585,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         LW(x3, gback, gdoffset + 0);
                         SW(x3, gback, gdoffset + 4);
                     } else {
-                        GETEM(x2, 0);
+                        GETEM(x2, 0, 6);
                         for (int i = 0; i < 2; ++i) {
                             // tmp32s = EX->sw[i*2+0] + EX->sw[i*2+1];
                             // GX->sw[4+i] = sat(tmp32s);
@@ -609,7 +609,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         // GM->sd[1] = GM->sd[0];
                         SW(x3, gback, gdoffset + 1 * 4);
                     } else {
-                        GETEM(x2, 0);
+                        GETEM(x2, 0, 4);
                         // GM->sd[1] = EM->sd[0] + EM->sd[1];
                         LW(x3, wback, fixedaddress + 0 * 4);
                         LW(x4, wback, fixedaddress + 1 * 4);
@@ -645,7 +645,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         LW(x3, gback, gdoffset + 0);
                         SW(x3, gback, gdoffset + 4);
                     } else {
-                        GETEM(x2, 0);
+                        GETEM(x2, 0, 6);
                         for (int i = 0; i < 2; ++i) {
                             // tmp32s = EX->sw[i*2+0] + EX->sw[i*2+1];
                             // GX->sw[4+i] = sat(tmp32s);
@@ -669,7 +669,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     INST_NAME("PSIGNB Gm,Em");
                     nextop = F8;
                     GETGM();
-                    GETEM(x2, 0);
+                    GETEM(x2, 0, 7);
                     for (int i = 0; i < 8; ++i) {
                         LB(x3, gback, gdoffset + i);
                         LB(x4, wback, fixedaddress + i);
@@ -684,7 +684,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     INST_NAME("PSIGNW Gm,Em");
                     nextop = F8;
                     GETGM();
-                    GETEM(x2, 0);
+                    GETEM(x2, 0, 6);
                     for (int i = 0; i < 4; ++i) {
                         LH(x3, gback, gdoffset + i * 2);
                         LH(x4, wback, fixedaddress + i * 2);
@@ -699,7 +699,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     INST_NAME("PSIGND Gm,Em");
                     nextop = F8;
                     GETGM();
-                    GETEM(x2, 0);
+                    GETEM(x2, 0, 4);
                     for (int i = 0; i < 2; ++i) {
                         LW(x3, gback, gdoffset + i * 4);
                         LW(x4, wback, fixedaddress + i * 4);
@@ -714,7 +714,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     INST_NAME("PMULHRSW Gm,Em");
                     nextop = F8;
                     GETGM();
-                    GETEM(x2, 0);
+                    GETEM(x2, 0, 6);
                     for (int i = 0; i < 4; ++i) {
                         LH(x3, gback, gdoffset + i * 2);
                         LH(x4, wback, fixedaddress + i * 2);
@@ -729,7 +729,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     INST_NAME("PABSB Gm,Em");
                     nextop = F8;
                     GETGM();
-                    GETEM(x2, 0);
+                    GETEM(x2, 0, 7);
                     for (int i = 0; i < 8; ++i) {
                         LB(x4, wback, fixedaddress + i);
                         BGE(x4, xZR, 4 + 4);
@@ -741,7 +741,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     INST_NAME("PABSW Gm,Em");
                     nextop = F8;
                     GETGM();
-                    GETEM(x2, 0);
+                    GETEM(x2, 0, 6);
                     for (int i = 0; i < 4; ++i) {
                         LH(x4, wback, fixedaddress + i * 2);
                         BGE(x4, xZR, 4 + 4);
@@ -753,7 +753,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     INST_NAME("PABSD Gm,Em");
                     nextop = F8;
                     GETGM();
-                    GETEM(x2, 0);
+                    GETEM(x2, 0, 4);
                     for (int i = 0; i < 2; ++i) {
                         LW(x4, wback, fixedaddress + i * 4);
                         BGE(x4, xZR, 4 + 4);
@@ -849,7 +849,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     INST_NAME("PALIGNR Gm, Em, Ib");
                     nextop = F8;
                     GETGM();
-                    GETEM(x2, 1);
+                    GETEM(x2, 1, 1);
                     u8 = F8;
                     if (u8 > 15) {
                         SD(xZR, gback, gdoffset);
@@ -919,7 +919,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("MOVMSKPS Gd, Ex");
             nextop = F8;
             GETGD;
-            GETEX(x1, 0);
+            GETEX(x1, 0, 12);
             XOR(gd, gd, gd);
             for (int i = 0; i < 4; ++i) {
                 LWU(x2, wback, fixedaddress + i * 4);
@@ -932,7 +932,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("SQRTPS Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 12);
             d0 = fpu_get_scratch(dyn);
             for (int i = 0; i < 4; ++i) {
                 FLW(d0, wback, fixedaddress + 4 * i);
@@ -944,7 +944,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("RSQRTPS Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 12);
             s0 = fpu_get_scratch(dyn);
             s1 = fpu_get_scratch(dyn); // 1.0f
             v0 = fpu_get_scratch(dyn); // 0.0f
@@ -977,7 +977,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("RCPPS Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 12);
             d0 = fpu_get_scratch(dyn);
             d1 = fpu_get_scratch(dyn);
             LUI(x3, 0x3f800);
@@ -994,7 +994,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             gd = ((nextop & 0x38) >> 3) + (rex.r << 3);
             if (!(MODREG && gd == (nextop & 7) + (rex.b << 3))) {
                 GETGX();
-                GETEX(x2, 0);
+                GETEX(x2, 0, 8);
                 SSE_LOOP_Q(x3, x4, AND(x3, x3, x4));
             }
             break;
@@ -1002,7 +1002,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("ANDNPS Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             SSE_LOOP_Q(x3, x4, NOT(x3, x3); AND(x3, x3, x4));
             break;
         case 0x56:
@@ -1011,7 +1011,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             gd = ((nextop & 0x38) >> 3) + (rex.r << 3);
             if (!(MODREG && gd == (nextop & 7) + (rex.b << 3))) {
                 GETGX();
-                GETEX(x2, 0);
+                GETEX(x2, 0, 8);
                 SSE_LOOP_Q(x3, x4, OR(x3, x3, x4));
             }
             break;
@@ -1025,7 +1025,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 SD(xZR, gback, gdoffset + 0);
                 SD(xZR, gback, gdoffset + 8);
             } else {
-                GETEX(x2, 0);
+                GETEX(x2, 0, 8);
                 SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4));
             }
             break;
@@ -1033,7 +1033,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("ADDPS Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 12);
             s0 = fpu_get_scratch(dyn);
             s1 = fpu_get_scratch(dyn);
             for (int i = 0; i < 4; ++i) {
@@ -1048,7 +1048,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("MULPS Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 12);
             s0 = fpu_get_scratch(dyn);
             s1 = fpu_get_scratch(dyn);
             for (int i = 0; i < 4; ++i) {
@@ -1063,7 +1063,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("CVTPS2PD Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 4);
             s0 = fpu_get_scratch(dyn);
             s1 = fpu_get_scratch(dyn);
             FLW(s0, wback, fixedaddress);
@@ -1077,7 +1077,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("CVTDQ2PS Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 12);
             s0 = fpu_get_scratch(dyn);
             for (int i = 0; i < 4; ++i) {
                 LW(x3, wback, fixedaddress + i * 4);
@@ -1089,7 +1089,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("SUBPS Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 12);
             s0 = fpu_get_scratch(dyn);
             s1 = fpu_get_scratch(dyn);
             for (int i = 0; i < 4; ++i) {
@@ -1104,7 +1104,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("MINPS Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 12);
             s0 = fpu_get_scratch(dyn);
             s1 = fpu_get_scratch(dyn);
             for (int i = 0; i < 4; ++i) {
@@ -1128,7 +1128,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("DIVPS Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 12);
             s0 = fpu_get_scratch(dyn);
             s1 = fpu_get_scratch(dyn);
             for (int i = 0; i < 4; ++i) {
@@ -1143,7 +1143,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("MAXPS Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 12);
             s0 = fpu_get_scratch(dyn);
             s1 = fpu_get_scratch(dyn);
             for (int i = 0; i < 4; ++i) {
@@ -1179,7 +1179,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     SB(x3, gback, gdoffset + 2 * i + 1);
                 }
             } else {
-                GETEM(x2, 0);
+                GETEM(x2, 0, 3);
                 for (int i = 0; i < 4; ++i) {
                     // GX->ub[2 * i + 1] = EX->ub[i];
                     LBU(x3, wback, fixedaddress + i);
@@ -1191,7 +1191,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PUNPCKLWD Gm, Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 2);
             // GM->uw[3] = EM->uw[1];
             LHU(x3, wback, fixedaddress + 2 * 1);
             SH(x3, gback, gdoffset + 2 * 3);
@@ -1206,7 +1206,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PUNPCKLDQ Gm, Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 1);
             // GM->ud[1] = EM->ud[0];
             LWU(x3, wback, fixedaddress);
             SW(x3, gback, gdoffset + 4 * 1);
@@ -1215,7 +1215,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PACKSSWB Gm,Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 6);
             MOV64x(x5, 127);
             MOV64x(x6, -128);
             for (int i = 0; i < 4; ++i) {
@@ -1253,7 +1253,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PCMPGTB Gm,Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 7);
             for (int i = 0; i < 8; ++i) {
                 // GX->ub[i] = (GX->sb[i]>EX->sb[i])?0xFF:0x00;
                 LB(x3, wback, fixedaddress + i);
@@ -1267,14 +1267,14 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PCMPGTW Gm,Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 6);
             MMX_LOOP_WS(x3, x4, SLT(x3, x4, x3); NEG(x3, x3));
             break;
         case 0x66:
             INST_NAME("PCMPGTD Gm,Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 4);
             MMX_LOOP_DS(x3, x4, SLT(x3, x4, x3); NEG(x3, x3));
             break;
         case 0x67:
@@ -1297,7 +1297,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 LW(x3, gback, gdoffset + 0 * 4);
                 SW(x3, gback, gdoffset + 1 * 4);
             } else {
-                GETEM(x1, 0);
+                GETEM(x1, 0, 6);
                 for (int i = 0; i < 4; ++i) {
                     // GX->ub[4+i] = (EX->sw[i]<0)?0:((EX->sw[i]>0xff)?0xff:EX->sw[i]);
                     LH(x3, wback, fixedaddress + i * 2);
@@ -1326,7 +1326,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     SB(x3, gback, gdoffset + 2 * i + 1);
                 }
             } else {
-                GETEM(x2, 0);
+                GETEM(x2, 0, 7);
                 for (int i = 0; i < 4; ++i) {
                     // GX->ub[2 * i + 1] = EX->ub[i + 4];
                     LBU(x3, wback, fixedaddress + i + 4);
@@ -1350,7 +1350,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     SH(x3, gback, gdoffset + (2 * i + 1) * 2);
                 }
             } else {
-                GETEM(x1, 0);
+                GETEM(x1, 0, 6);
                 for (int i = 0; i < 2; ++i) {
                     // GX->uw[2 * i + 1] = EX->uw[i + 2];
                     LHU(x3, wback, fixedaddress + (i + 2) * 2);
@@ -1361,7 +1361,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
         case 0x6A:
             INST_NAME("PUNPCKHDQ Gm,Em");
             nextop = F8;
-            GETEM(x1, 0);
+            GETEM(x1, 0, 4);
             GETGM();
             // GM->ud[0] = GM->ud[1];
             LWU(x3, gback, gdoffset + 1 * 4);
@@ -1392,7 +1392,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 LWU(x3, gback, gdoffset);
                 SW(x3, gback, gdoffset + 4);
             } else {
-                GETEM(x1, 0);
+                GETEM(x1, 0, 4);
                 for (int i = 0; i < 2; ++i) {
                     // GM->sw[2+i] = (EM->sd[i]<-32768)?-32768:((EM->sd[i]>32767)?32767:EM->sd[i]);
                     LW(x3, wback, fixedaddress + i * 4);
@@ -1428,7 +1428,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("MOVQ Gm, Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 1);
             LD(x3, wback, fixedaddress);
             SD(x3, gback, gdoffset + 0);
             break;
@@ -1436,7 +1436,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PSHUFW Gm, Em, Ib");
             nextop = F8;
             GETGM();
-            GETEM(x2, 1);
+            GETEM(x2, 1, 6);
             u8 = F8;
             LHU(x3, wback, fixedaddress + ((u8 >> (0 * 2)) & 3) * 2);
             LHU(x4, wback, fixedaddress + ((u8 >> (1 * 2)) & 3) * 2);
@@ -1452,7 +1452,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             switch ((nextop >> 3) & 7) {
                 case 2:
                     INST_NAME("PSRLW Em, Ib");
-                    GETEM(x1, 1);
+                    GETEM(x1, 1, 6);
                     u8 = F8;
                     if (u8 > 15) {
                         // just zero dest
@@ -1468,7 +1468,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     break;
                 case 4:
                     INST_NAME("PSRAW Em, Ib");
-                    GETEM(x1, 1);
+                    GETEM(x1, 1, 6);
                     u8 = F8;
                     if (u8 > 15) u8 = 15;
                     if (u8) {
@@ -1482,7 +1482,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     break;
                 case 6:
                     INST_NAME("PSLLW Em, Ib");
-                    GETEM(x1, 1);
+                    GETEM(x1, 1, 6);
                     u8 = F8;
                     if (u8 > 15) {
                         // just zero dest
@@ -1506,7 +1506,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             switch ((nextop >> 3) & 7) {
                 case 2:
                     INST_NAME("PSRLD Em, Ib");
-                    GETEM(x4, 1);
+                    GETEM(x4, 1, 4);
                     u8 = F8;
                     if (u8) {
                         if (u8 > 31) {
@@ -1522,7 +1522,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     break;
                 case 4:
                     INST_NAME("PSRAD Em, Ib");
-                    GETEM(x4, 1);
+                    GETEM(x4, 1, 4);
                     u8 = F8;
                     if (u8 > 31) u8 = 31;
                     if (u8) {
@@ -1535,7 +1535,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     break;
                 case 6:
                     INST_NAME("PSLLD Em, Ib");
-                    GETEM(x4, 1);
+                    GETEM(x4, 1, 4);
                     u8 = F8;
                     if (u8) {
                         if (u8 > 31) {
@@ -1559,7 +1559,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             switch ((nextop >> 3) & 7) {
                 case 2:
                     INST_NAME("PSRLQ Em, Ib");
-                    GETEM(x4, 1);
+                    GETEM(x4, 1, 1);
                     u8 = F8;
                     if (u8) {
                         if (u8 > 63) {
@@ -1573,7 +1573,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     break;
                 case 6:
                     INST_NAME("PSLLQ Em, Ib");
-                    GETEM(x4, 1);
+                    GETEM(x4, 1, 1);
                     u8 = F8;
                     if (u8) {
                         if (u8 > 63) {
@@ -1593,7 +1593,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PCMPEQB Gm,Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 7);
             for (int i = 0; i < 8; ++i) {
                 LBU(x3, gback, gdoffset + i);
                 LBU(x4, wback, fixedaddress + i);
@@ -1607,14 +1607,14 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PCMPEQW Gm,Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 6);
             MMX_LOOP_W(x3, x4, SUB(x3, x3, x4); SEQZ(x3, x3); NEG(x3, x3));
             break;
         case 0x76:
             INST_NAME("PCMPEQD Gm,Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 4);
             MMX_LOOP_D(x3, x4, SUB(x3, x3, x4); SEQZ(x3, x3); NEG(x3, x3));
             break;
         case 0x77:
@@ -1643,7 +1643,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("MOVQ Em, Gm");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 1);
             LD(x3, gback, gdoffset + 0);
             SD(x3, wback, fixedaddress);
             break;
@@ -2231,7 +2231,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("CMPPS Gx, Ex, Ib");
             nextop = F8;
             GETGX();
-            GETEX(x2, 1);
+            GETEX(x2, 1, 12);
             u8 = F8;
             d0 = fpu_get_scratch(dyn);
             d1 = fpu_get_scratch(dyn);
@@ -2307,7 +2307,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PEXTRW Gd,Em,Ib");
             nextop = F8;
             GETGD;
-            GETEM(x2, 0);
+            GETEM(x2, 0, 6);
             u8 = (F8)&3;
             LHU(gd, wback, fixedaddress + u8 * 2);
             break;
@@ -2315,7 +2315,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("SHUFPS Gx, Ex, Ib");
             nextop = F8;
             GETGX();
-            GETEX(x2, 1);
+            GETEX(x2, 1, 12);
             u8 = F8;
             int32_t idx;
 
@@ -2350,7 +2350,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PSRLW Gm, Em");
             nextop = F8;
             GETGM();
-            GETEM(x4, 0);
+            GETEM(x4, 0, 1);
             LD(x1, wback, fixedaddress);
             ADDI(x2, xZR, 15);
             BLTU_MARK(x2, x1);
@@ -2367,7 +2367,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PSRLD Gm, Em");
             nextop = F8;
             GETGM();
-            GETEM(x4, 0);
+            GETEM(x4, 0, 1);
             LD(x1, wback, fixedaddress);
             ADDI(x2, xZR, 31);
             BLTU_MARK(x2, x1);
@@ -2384,7 +2384,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PSRLQ Gm,Em");
             nextop = F8;
             GETGM();
-            GETEM(x4, 0);
+            GETEM(x4, 0, 1);
             LD(x1, wback, fixedaddress);
             ADDI(x2, xZR, 63);
             BLTU_MARK(x2, x1);
@@ -2399,7 +2399,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PADDQ Gm,Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 1);
             LD(x1, wback, fixedaddress);
             LD(x2, gback, gdoffset);
             ADD(x1, x1, x2);
@@ -2409,14 +2409,14 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PMULLW Gm, Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 6);
             MMX_LOOP_WS(x3, x4, MULW(x3, x3, x4));
             break;
         case 0xD7:
             INST_NAME("PMOVMSKB Gd, Em");
             nextop = F8;
             GETGD;
-            GETEM(x2, 0);
+            GETEM(x2, 0, 1);
             LD(x1, wback, fixedaddress + 0);
             for (int i = 0; i < 8; i++) {
                 if (rv64_zbs) {
@@ -2447,7 +2447,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PSUBUSB Gm, Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 7);
             for (int i = 0; i < 8; ++i) {
                 LBU(x3, gback, gdoffset + i);
                 LBU(x4, wback, fixedaddress + i);
@@ -2466,7 +2466,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PSUBUSW Gm, Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 6);
             MMX_LOOP_W(x3, x4,
                 SUB(x3, x3, x4);
                 if (rv64_zbb) {
@@ -2483,7 +2483,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PMINUB Gm, Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 7);
             for (int i = 0; i < 8; ++i) {
                 LBU(x3, gback, gdoffset + i);
                 LBU(x4, wback, fixedaddress + i);
@@ -2500,7 +2500,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PAND Gm, Em");
             nextop = F8;
             GETGM();
-            GETEM(x4, 0);
+            GETEM(x4, 0, 1);
             LD(x1, wback, fixedaddress);
             LD(x2, gback, gdoffset);
             AND(x1, x1, x2);
@@ -2510,7 +2510,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PADDUSB Gm,Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 7);
             ADDI(x5, xZR, 0xFF);
             for (int i = 0; i < 8; ++i) {
                 LBU(x3, gback, gdoffset + i);
@@ -2529,7 +2529,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PADDUSW Gm,Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 6);
             MOV32w(x5, 65535);
             for (int i = 0; i < 4; ++i) {
                 // tmp32s = (int32_t)GX->uw[i] + EX->uw[i];
@@ -2550,7 +2550,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PMAXUB Gm, Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 7);
             for (int i = 0; i < 8; ++i) {
                 LBU(x3, gback, gdoffset + i);
                 LBU(x4, wback, fixedaddress + i);
@@ -2567,7 +2567,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PANDN Gm, Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 1);
             LD(x1, gback, gdoffset);
             LD(x3, wback, fixedaddress);
             if (rv64_zbb) {
@@ -2582,7 +2582,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PAVGB Gm, Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 7);
             for (int i = 0; i < 8; ++i) {
                 LBU(x3, gback, gdoffset + i);
                 LBU(x4, wback, fixedaddress + i);
@@ -2596,7 +2596,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PSRAW Gm,Em");
             nextop = F8;
             GETGM();
-            GETEM(x4, 0);
+            GETEM(x4, 0, 1);
             LBU(x1, wback, fixedaddress);
             ADDI(x2, xZR, 15);
             if (rv64_zbb) {
@@ -2615,7 +2615,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PSRAD Gm, Em");
             nextop = F8;
             GETGM();
-            GETEM(x4, 0);
+            GETEM(x4, 0, 1);
             LBU(x1, wback, fixedaddress);
             ADDI(x2, xZR, 31);
             if (rv64_zbb) {
@@ -2634,7 +2634,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PAVGW Gm,Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 6);
             for (int i = 0; i < 4; ++i) {
                 LHU(x3, gback, gdoffset + 2 * i);
                 LHU(x4, wback, fixedaddress + 2 * i);
@@ -2648,7 +2648,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PMULHUW Gm,Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 6);
             for (int i = 0; i < 4; ++i) {
                 LHU(x3, gback, gdoffset + 2 * i);
                 LHU(x4, wback, fixedaddress + 2 * i);
@@ -2661,7 +2661,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PMULHW Gm,Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 6);
             for (int i = 0; i < 4; ++i) {
                 LH(x3, gback, gdoffset + 2 * i);
                 LH(x4, wback, fixedaddress + 2 * i);
@@ -2686,7 +2686,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PSUBSB Gm,Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 7);
             ADDI(x5, xZR, 0x7f);
             ADDI(x6, xZR, 0xf80);
             for (int i = 0; i < 8; ++i) {
@@ -2709,7 +2709,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PSUBSW Gm,Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 6);
             MOV64x(x5, 32767);
             MOV64x(x6, -32768);
             for (int i = 0; i < 4; ++i) {
@@ -2734,7 +2734,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PMINSW Gx,Ex");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 6);
             for (int i = 0; i < 4; ++i) {
                 LH(x3, gback, gdoffset + 2 * i);
                 LH(x4, wback, fixedaddress + 2 * i);
@@ -2751,7 +2751,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("POR Gm, Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 1);
             LD(x3, gback, gdoffset);
             LD(x4, wback, fixedaddress);
             OR(x3, x3, x4);
@@ -2761,7 +2761,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PADDSB Gm,Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 7);
             MOV64x(x5, 127);
             MOV64x(x6, -128);
             for (int i = 0; i < 8; ++i) {
@@ -2786,7 +2786,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PADDSW Gm,Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 6);
             MOV64x(x5, 32767);
             MOV64x(x6, -32768);
             for (int i = 0; i < 4; ++i) {
@@ -2811,7 +2811,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PMAXSW Gm,Em");
             nextop = F8;
             GETGM();
-            GETEM(x1, 0);
+            GETEM(x1, 0, 6);
             MMX_LOOP_WS(x3, x4,
                 if (rv64_zbb) {
                     MAX(x3, x3, x4);
@@ -2829,7 +2829,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 // just zero dest
                 SD(xZR, gback, gdoffset + 0);
             } else {
-                GETEM(x2, 0);
+                GETEM(x2, 0, 1);
                 LD(x3, gback, gdoffset + 0);
                 LD(x4, wback, fixedaddress);
                 XOR(x3, x3, x4);
@@ -2840,7 +2840,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PSLLW Gm,Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 1);
             ADDI(x4, xZR, 15);
             LD(x1, wback, fixedaddress + 0);
             BLTU_MARK(x4, x1);
@@ -2864,7 +2864,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PSLLD Gm,Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 1);
             ADDI(x4, xZR, 31);
             LD(x1, wback, fixedaddress + 0);
             BLTU_MARK(x4, x1);
@@ -2882,7 +2882,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PSLLQ Gm,Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 1);
             ADDI(x4, xZR, 63);
             LD(x1, gback, gdoffset + 0);
             LD(x3, wback, fixedaddress + 0);
@@ -2897,7 +2897,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PMULUDQ Gm,Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 1);
             LWU(x3, gback, gdoffset + 0 * 4);
             LWU(x4, wback, fixedaddress + 0 * 4);
             MUL(x3, x3, x4);
@@ -2907,7 +2907,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PMADDWD Gm, Em");
             nextop = F8;
             GETGM();
-            GETEM(x5, 0);
+            GETEM(x5, 0, 6);
             for (int i = 0; i < 2; ++i) {
                 LH(x1, gback, gdoffset + i * 4);
                 LH(x2, gback, gdoffset + i * 4 + 2);
@@ -2923,7 +2923,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PSADBW Gm, Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 7);
             MV(x6, xZR);
             for (int i = 0; i < 8; ++i) {
                 LBU(x3, gback, gdoffset + i);
@@ -2943,7 +2943,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("MASKMOVQ Gm, Em");
             nextop = F8;
             GETGM();
-            GETEM(x5, 0);
+            GETEM(x5, 0, 7);
             for (int i = 0; i < 8; i++) {
                 LB(x1, wback, fixedaddress + i);
                 BLT(xZR, x1, 4 * 3);
@@ -2955,7 +2955,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PSUBB Gm, Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 7);
             for (int i = 0; i < 8; ++i) {
                 LB(x3, wback, fixedaddress + i);
                 LB(x4, gback, gdoffset + i);
@@ -2967,21 +2967,21 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PSUBW Gm, Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 6);
             MMX_LOOP_W(x3, x4, SUBW(x3, x3, x4));
             break;
         case 0xFA:
             INST_NAME("PSUBD Gm, Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 4);
             MMX_LOOP_D(x3, x4, SUBW(x3, x3, x4));
             break;
         case 0xFB:
             INST_NAME("PSUBQ Gm, Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 1);
             LD(x1, gback, gdoffset + 0);
             LD(x3, wback, fixedaddress + 0);
             SUB(x1, x1, x3);
@@ -2991,7 +2991,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PADDB Gm, Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 7);
             for (int i = 0; i < 8; ++i) {
                 // GM->sb[i] += EM->sb[i];
                 LB(x3, gback, gdoffset + i);
@@ -3004,14 +3004,14 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("PADDW Gm, Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 6);
             MMX_LOOP_W(x3, x4, ADDW(x3, x3, x4));
             break;
         case 0xFE:
             INST_NAME("PADDD Gm, Em");
             nextop = F8;
             GETGM();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 4);
             MMX_LOOP_D(x3, x4, ADDW(x3, x3, x4));
             break;
         default:
diff --git a/src/dynarec/rv64/dynarec_rv64_64.c b/src/dynarec/rv64/dynarec_rv64_64.c
index ad022f5a..aef4f21c 100644
--- a/src/dynarec/rv64/dynarec_rv64_64.c
+++ b/src/dynarec/rv64/dynarec_rv64_64.c
@@ -76,7 +76,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                             INST_NAME("MOVUPS Ex,Gx");
                             nextop = F8;
                             GETGX();
-                            GETEX(x2, 0);
+                            GETEX(x2, 0, 8);
                             if(!MODREG) {
                                 grab_segdata(dyn, addr, ninst, x4, seg);
                                 ADD(x4, x4, wback);
diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c
index d4f44b3c..fedcdd54 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f.c
@@ -56,14 +56,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
         case 0x10:
             INST_NAME("MOVUPD Gx,Ex");
             nextop = F8;
-            GETEX(x1, 0);
+            GETEX(x1, 0, 8);
             GETGX();
             SSE_LOOP_MV_Q(x3);
             break;
         case 0x11:
             INST_NAME("MOVUPD Ex,Gx");
             nextop = F8;
-            GETEX(x1, 0);
+            GETEX(x1, 0, 8);
             GETGX();
             SSE_LOOP_MV_Q2(x3);
             if (!MODREG) SMWRITE2();
@@ -100,7 +100,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("UNPCKLPD Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 2);
             // GX->q[1] = EX->q[0];
             LD(x3, wback, fixedaddress + 0);
             SD(x3, gback, gdoffset + 8);
@@ -108,7 +108,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
         case 0x15:
             INST_NAME("UNPCKHPD Gx, Ex");
             nextop = F8;
-            GETEX(x1, 0);
+            GETEX(x1, 0, 8);
             GETGX();
             // GX->q[0] = GX->q[1];
             LD(x3, gback, gdoffset + 8);
@@ -139,14 +139,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
         case 0x28:
             INST_NAME("MOVAPD Gx,Ex");
             nextop = F8;
-            GETEX(x1, 0);
+            GETEX(x1, 0, 8);
             GETGX();
             SSE_LOOP_MV_Q(x3);
             break;
         case 0x29:
             INST_NAME("MOVAPD Ex,Gx");
             nextop = F8;
-            GETEX(x1, 0);
+            GETEX(x1, 0, 8);
             GETGX();
             SSE_LOOP_MV_Q2(x3);
             if (!MODREG) SMWRITE2();
@@ -155,7 +155,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("CVTPI2PD Gx,Em");
             nextop = F8;
             GETGX();
-            GETEM(x2, 0);
+            GETEM(x2, 0, 4);
             d0 = fpu_get_scratch(dyn);
             for (int i = 0; i < 2; ++i) {
                 LW(x1, wback, fixedaddress + i * 4);
@@ -167,14 +167,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("MOVNTPD Ex, Gx");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             SSE_LOOP_MV_Q2(x3);
             break;
         case 0x2C:
             INST_NAME("CVTTPD2PI Gm,Ex");
             nextop = F8;
             GETGM();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             d0 = fpu_get_scratch(dyn);
             for (int i = 0; i < 2; ++i) {
                 if (!box64_dynarec_fastround) {
@@ -196,7 +196,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("CVTPD2PI Gm,Ex");
             nextop = F8;
             GETGM();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             d0 = fpu_get_scratch(dyn);
             u8 = sse_setround(dyn, ninst, x4, x5);
             for (int i = 0; i < 2; ++i) {
@@ -265,7 +265,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PSHUFB Gx, Ex");
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 15);
 
                     ADDI(x5, xEmu, offsetof(x64emu_t, scratch));
 
@@ -303,7 +303,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                         LD(x3, gback, gdoffset + 0);
                         SD(x3, gback, gdoffset + 8);
                     } else {
-                        GETEX(x2, 0);
+                        GETEX(x2, 0, 14);
                         for (int i = 0; i < 4; ++i) {
                             // GX->sw[4+i] = EX->sw[i*2+0] + EX->sw[i*2+1];
                             LH(x3, wback, fixedaddress + 2 * (i * 2 + 0));
@@ -332,7 +332,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                         LD(x3, gback, gdoffset + 0);
                         SD(x3, gback, gdoffset + 8);
                     } else {
-                        GETEX(x2, 0);
+                        GETEX(x2, 0, 12);
                         // GX->sd[2] = EX->sd[0] + EX->sd[1];
                         LW(x3, wback, fixedaddress + 0 * 4);
                         LW(x4, wback, fixedaddress + 1 * 4);
@@ -373,7 +373,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                         LD(x3, gback, gdoffset + 0);
                         SD(x3, gback, gdoffset + 8);
                     } else {
-                        GETEX(x2, 0);
+                        GETEX(x2, 0, 14);
                         for (int i = 0; i < 4; ++i) {
                             // tmp32s = EX->sw[i*2+0] + EX->sw[i*2+1];
                             // GX->sw[4+i] = sat(tmp32s);
@@ -397,7 +397,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PMADDUBSW Gx, Ex");
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 15);
                     MOV64x(x5, 32767);
                     MOV64x(x6, -32768);
                     for (int i = 0; i < 8; ++i) {
@@ -436,7 +436,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                         LD(x3, gback, gdoffset + 0);
                         SD(x3, gback, gdoffset + 8);
                     } else {
-                        GETEX(x2, 0);
+                        GETEX(x2, 0, 14);
                         for (int i = 0; i < 4; ++i) {
                             // GX->sw[4+i] = EX->sw[i*2+0] - EX->sw[i*2+1];
                             LH(x3, wback, fixedaddress + 2 * (i * 2 + 0));
@@ -450,7 +450,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PSIGNB Gx, Ex");
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 15);
                     for (int i = 0; i < 16; ++i) {
                         LB(x3, gback, gdoffset + i);
                         LB(x4, wback, fixedaddress + i);
@@ -465,7 +465,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PSIGNW Gx, Ex");
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 14);
                     for (int i = 0; i < 8; ++i) {
                         LH(x3, gback, gdoffset + i * 2);
                         LH(x4, wback, fixedaddress + i * 2);
@@ -480,7 +480,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PSIGND Gx, Ex");
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 12);
                     for (int i = 0; i < 4; ++i) {
                         LW(x3, gback, gdoffset + i * 4);
                         LW(x4, wback, fixedaddress + i * 4);
@@ -495,7 +495,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PMULHRSW Gx, Ex");
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 14);
                     for (int i = 0; i < 8; ++i) {
                         LH(x3, gback, gdoffset + i * 2);
                         LH(x4, wback, fixedaddress + i * 2);
@@ -510,7 +510,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PBLENDVB Gx,Ex");
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 15);
                     sse_forget_reg(dyn, ninst, x6, 0); // forget xmm[0]
                     for (int i = 0; i < 16; ++i) {
                         LB(x3, xEmu, offsetof(x64emu_t, xmm[0]) + i);
@@ -524,7 +524,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PBLENDVPS Gx,Ex");
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 12);
                     for (int i = 0; i < 4; ++i) {
                         LW(x3, xEmu, offsetof(x64emu_t, xmm[0]) + i * 4);
                         BGE(x3, xZR, 4 + 4 * 2);
@@ -537,7 +537,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     nextop = F8;
                     SETFLAGS(X_ALL, SF_SET);
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 8);
                     CLEAR_FLAGS();
                     SET_DFNONE();
                     IFX(X_ZF | X_CF)
@@ -574,7 +574,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PABSB Gx, Ex");
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 15);
                     for (int i = 0; i < 16; ++i) {
                         LB(x4, wback, fixedaddress + i);
                         BGE(x4, xZR, 4 + 4);
@@ -586,7 +586,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PABSW Gx, Ex");
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 14);
                     for (int i = 0; i < 8; ++i) {
                         LH(x4, wback, fixedaddress + i * 2);
                         BGE(x4, xZR, 4 + 4);
@@ -598,7 +598,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PABSD Gx, Ex");
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 12);
                     for (int i = 0; i < 4; ++i) {
                         LW(x4, wback, fixedaddress + i * 4);
                         BGE(x4, xZR, 4 + 4);
@@ -610,7 +610,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PMOVSXBW Gx, Ex"); // SSE4 opcode!
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 7);
                     for (int i = 7; i >= 0; --i) {
                         // GX->sw[i] = EX->sb[i];
                         LB(x3, wback, fixedaddress + i);
@@ -621,7 +621,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PMOVSXBD Gx, Ex"); // SSE4 opcode!
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 3);
                     for (int i = 3; i >= 0; --i) {
                         // GX->sd[i] = EX->sb[i];
                         LB(x3, wback, fixedaddress + i);
@@ -632,7 +632,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PMOVSXBQ Gx, Ex"); // SSE4 opcode!
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 1);
                     for (int i = 1; i >= 0; --i) {
                         // GX->sq[i] = EX->sb[i];
                         LB(x3, wback, fixedaddress + i);
@@ -643,7 +643,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PMOVSXWD Gx, Ex"); // SSE4 opcode!
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 6);
                     for (int i = 3; i >= 0; --i) {
                         // GX->sd[i] = EX->sw[i];
                         LH(x3, wback, fixedaddress + i * 2);
@@ -654,7 +654,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PMOVSXWQ Gx, Ex"); // SSE4 opcode!
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 2);
                     for (int i = 1; i >= 0; --i) {
                         // GX->sq[i] = EX->sw[i];
                         LH(x3, wback, fixedaddress + i * 2);
@@ -665,7 +665,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PMOVSXDQ Gx, Ex");
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 4);
                     for (int i = 1; i >= 0; --i) {
                         // GX->sq[i] = EX->sd[i];
                         LW(x4, wback, fixedaddress + i * 4);
@@ -675,7 +675,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 case 0x28:
                     INST_NAME("PMULDQ Gx, Ex");
                     nextop = F8;
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 8);
                     GETGX();
                     for (int i = 1; i >= 0; --i) {
                         LW(x3, wback, fixedaddress + i * 8);
@@ -688,7 +688,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PACKUSDW Gx, Ex");
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 12);
                     MOV64x(x5, 65535);
                     for (int i = 0; i < 4; ++i) {
                         LW(x3, gback, gdoffset + i * 4);
@@ -726,7 +726,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PMOVZXBW Gx, Ex");
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 7);
                     for (int i = 7; i >= 0; --i) {
                         LBU(x3, wback, fixedaddress + i);
                         SH(x3, gback, gdoffset + i * 2);
@@ -736,7 +736,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PMOVZXBD Gx, Ex");
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 3);
                     for (int i = 3; i >= 0; --i) {
                         LBU(x3, wback, fixedaddress + i);
                         SW(x3, gback, gdoffset + i * 4);
@@ -746,7 +746,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PMOVZXBQ Gx, Ex");
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 2);
                     for (int i = 1; i >= 0; --i) {
                         LBU(x3, wback, fixedaddress + i);
                         SD(x3, gback, gdoffset + i * 8);
@@ -756,7 +756,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PMOVZXWD Gx, Ex");
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 6);
                     for (int i = 3; i >= 0; --i) {
                         LHU(x3, wback, fixedaddress + i * 2);
                         SW(x3, gback, gdoffset + i * 4);
@@ -766,7 +766,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PMOVZXWQ Gx, Ex");
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 2);
                     for (int i = 1; i >= 0; --i) {
                         LHU(x3, wback, fixedaddress + i * 2);
                         SD(x3, gback, gdoffset + i * 8);
@@ -776,7 +776,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PMOVZXDQ Gx, Ex");
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 4);
                     for (int i = 1; i >= 0; --i) {
                         LWU(x3, wback, fixedaddress + i * 4);
                         SD(x3, gback, gdoffset + i * 8);
@@ -787,7 +787,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PMINSB Gx, Ex"); // SSE4 opcode!
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 15);
                     for (int i = 0; i < 16; ++i) {
                         LB(x3, gback, gdoffset + i);
                         LB(x4, wback, fixedaddress + i);
@@ -802,7 +802,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PMINSD Gx, Ex"); // SSE4 opcode!
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 12);
                     for (int i = 0; i < 4; ++i) {
                         LW(x3, gback, gdoffset + i * 4);
                         LW(x4, wback, fixedaddress + i * 4);
@@ -817,7 +817,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PMINUW Gx, Ex"); // SSE4 opcode!
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 14);
                     for (int i = 0; i < 8; ++i) {
                         LHU(x3, gback, gdoffset + i * 2);
                         LHU(x4, wback, fixedaddress + i * 2);
@@ -832,7 +832,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PMINUD Gx, Ex"); // SSE4 opcode!
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 12);
                     for (int i = 0; i < 4; ++i) {
                         LWU(x3, gback, gdoffset + i * 4);
                         LWU(x4, wback, fixedaddress + i * 4);
@@ -847,7 +847,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PMAXSB Gx, Ex"); // SSE4 opcode!
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 15);
                     for (int i = 0; i < 16; ++i) {
                         LB(x3, gback, gdoffset + i);
                         LB(x4, wback, fixedaddress + i);
@@ -862,7 +862,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PMAXSD Gx, Ex"); // SSE4 opcode!
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 12);
                     for (int i = 0; i < 4; ++i) {
                         LW(x3, gback, gdoffset + i * 4);
                         LW(x4, wback, fixedaddress + i * 4);
@@ -877,7 +877,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PMAXUW Gx, Ex"); // SSE4 opcode!
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 14);
                     for (int i = 0; i < 8; ++i) {
                         LHU(x3, gback, gdoffset + i * 2);
                         LHU(x4, wback, fixedaddress + i * 2);
@@ -892,7 +892,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PMAXUD Gx, Ex"); // SSE4 opcode!
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 12);
                     for (int i = 0; i < 4; ++i) {
                         LWU(x3, gback, gdoffset + i * 4);
                         LWU(x4, wback, fixedaddress + i * 4);
@@ -907,7 +907,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PMULLD Gx, Ex");
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 12);
                     for (int i = 0; i < 4; ++i) {
                         LW(x3, gback, gdoffset + i * 4);
                         LW(x4, wback, fixedaddress + i * 4);
@@ -955,7 +955,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("AESIMC Gx, Ex"); // AES-NI
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 8);
                     SSE_LOOP_MV_Q(x3);
                     sse_forget_reg(dyn, ninst, x6, gd);
                     MOV32w(x1, gd);
@@ -969,7 +969,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     MOV32w(x1, gd);
                     CALL(native_aese, -1);
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 8);
                     SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4));
                     break;
                 case 0xDD:
@@ -980,7 +980,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     MOV32w(x1, gd);
                     CALL(native_aeselast, -1);
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 8);
                     SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4));
                     break;
                 case 0xDE:
@@ -991,7 +991,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     MOV32w(x1, gd);
                     CALL(native_aesd, -1);
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 8);
                     SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4));
                     break;
 
@@ -1003,7 +1003,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     MOV32w(x1, gd);
                     CALL(native_aesdlast, -1);
                     GETGX();
-                    GETEX(x2, 0);
+                    GETEX(x2, 0, 8);
                     SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4));
                     break;
                 case 0xF0:
@@ -1092,7 +1092,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("ROUNDPD Gx, Ex, Ib");
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 1);
+                    GETEX(x2, 1, 8);
                     u8 = F8;
                     d0 = fpu_get_scratch(dyn);
                     d1 = fpu_get_scratch(dyn);
@@ -1148,7 +1148,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PBLENDW Gx, Ex, Ib");
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 1);
+                    GETEX(x2, 1, 14);
                     u8 = F8;
                     i32 = 0;
                     if (MODREG && gd == ed) break;
@@ -1185,7 +1185,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PALIGNR Gx, Ex, Ib");
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 1);
+                    GETEX(x2, 1, 8);
                     u8 = F8;
                     if (u8 > 31) {
                         SD(xZR, gback, gdoffset + 0);
@@ -1279,7 +1279,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("INSERTPS GX, EX, Ib");
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 1);
+                    GETEX(x2, 1, 12);
                     u8 = F8;
                     if (MODREG)
                         s8 = (u8 >> 6) & 3;
@@ -1310,7 +1310,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("DPPS Gx, Ex, Ib");
                     nextop = F8;
                     GETGX();
-                    GETEX(x2, 1);
+                    GETEX(x2, 1, 12);
                     u8 = F8;
                     d0 = fpu_get_scratch(dyn);
                     d1 = fpu_get_scratch(dyn);
@@ -1403,7 +1403,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PMOVMSKD Gd, Ex");
             nextop = F8;
             GETGD;
-            GETEX(x1, 0);
+            GETEX(x1, 0, 8);
             MV(gd, xZR);
             for (int i = 0; i < 2; ++i) {
                 // GD->dword[0] |= ((EX->q[i]>>63)&1)<<i;
@@ -1417,7 +1417,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("SQRTPD Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             d0 = fpu_get_scratch(dyn);
             if (!box64_dynarec_fastnan) {
                 d1 = fpu_get_scratch(dyn);
@@ -1439,35 +1439,35 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
         case 0x54:
             INST_NAME("ANDPD Gx, Ex");
             nextop = F8;
-            GETEX(x1, 0);
+            GETEX(x1, 0, 8);
             GETGX();
             SSE_LOOP_Q(x3, x4, AND(x3, x3, x4));
             break;
         case 0x55:
             INST_NAME("ANDNPD Gx, Ex");
             nextop = F8;
-            GETEX(x1, 0);
+            GETEX(x1, 0, 8);
             GETGX();
             SSE_LOOP_Q(x3, x4, NOT(x3, x3); AND(x3, x3, x4));
             break;
         case 0x56:
             INST_NAME("ORPD Gx, Ex");
             nextop = F8;
-            GETEX(x1, 0);
+            GETEX(x1, 0, 8);
             GETGX();
             SSE_LOOP_Q(x3, x4, OR(x3, x3, x4));
             break;
         case 0x57:
             INST_NAME("XORPD Gx, Ex");
             nextop = F8;
-            GETEX(x1, 0);
+            GETEX(x1, 0, 8);
             GETGX();
             SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4));
             break;
         case 0x58:
             INST_NAME("ADDPD Gx, Ex");
             nextop = F8;
-            GETEX(x1, 0);
+            GETEX(x1, 0, 8);
             GETGX();
             SSE_LOOP_FQ(x3, x4, {
                 if (!box64_dynarec_fastnan) {
@@ -1487,7 +1487,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
         case 0x59:
             INST_NAME("MULPD Gx, Ex");
             nextop = F8;
-            GETEX(x1, 0);
+            GETEX(x1, 0, 8);
             GETGX();
             SSE_LOOP_FQ(x3, x4, {
                 if (!box64_dynarec_fastnan) {
@@ -1508,7 +1508,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("CVTPD2PS Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             d0 = fpu_get_scratch(dyn);
             // GX->f[0] = EX->d[0];
             FLD(d0, wback, fixedaddress + 0);
@@ -1525,7 +1525,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("CVTPS2DQ Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 12);
             d0 = fpu_get_scratch(dyn);
             u8 = sse_setround(dyn, ninst, x6, x4);
             for (int i = 0; i < 4; ++i) {
@@ -1541,7 +1541,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
         case 0x5C:
             INST_NAME("SUBPD Gx, Ex");
             nextop = F8;
-            GETEX(x1, 0);
+            GETEX(x1, 0, 8);
             GETGX();
             SSE_LOOP_FQ(x3, x4, {
                 if (!box64_dynarec_fastnan) {
@@ -1562,7 +1562,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("MINPD Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             d0 = fpu_get_scratch(dyn);
             d1 = fpu_get_scratch(dyn);
             for (int i = 0; i < 2; ++i) {
@@ -1580,7 +1580,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
         case 0x5E:
             INST_NAME("DIVPD Gx, Ex");
             nextop = F8;
-            GETEX(x1, 0);
+            GETEX(x1, 0, 8);
             GETGX();
             SSE_LOOP_FQ(x3, x4, {
                 if (!box64_dynarec_fastnan) {
@@ -1601,7 +1601,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("MAXPD Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             d0 = fpu_get_scratch(dyn);
             d1 = fpu_get_scratch(dyn);
             for (int i = 0; i < 2; ++i) {
@@ -1632,7 +1632,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     SB(x3, gback, gdoffset + 2 * i + 1);
                 }
             } else {
-                GETEX(x1, 0);
+                GETEX(x1, 0, 7);
                 for (int i = 0; i < 8; ++i) {
                     // GX->ub[2 * i + 1] = EX->ub[i];
                     LBU(x3, wback, fixedaddress + i);
@@ -1656,7 +1656,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     SH(x3, gback, gdoffset + (2 * i + 1) * 2);
                 }
             } else {
-                GETEX(x1, 0);
+                GETEX(x1, 0, 6);
                 for (int i = 0; i < 4; ++i) {
                     // GX->uw[2 * i + 1] = EX->uw[i];
                     LHU(x3, wback, fixedaddress + i * 2);
@@ -1667,7 +1667,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
         case 0x62:
             INST_NAME("PUNPCKLDQ Gx,Ex");
             nextop = F8;
-            GETEX(x1, 0);
+            GETEX(x1, 0, 4);
             GETGX();
             // GX->ud[3] = EX->ud[1];
             LWU(x3, wback, fixedaddress + 1 * 4);
@@ -1683,7 +1683,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PACKSSWB Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 14);
             MOV64x(x5, 127);
             MOV64x(x6, -128);
             for (int i = 0; i < 8; ++i) {
@@ -1721,7 +1721,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PCMPGTB Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 15);
             for (int i = 0; i < 16; ++i) {
                 // GX->ub[i] = (GX->sb[i]>EX->sb[i])?0xFF:0x00;
                 LB(x3, wback, fixedaddress + i);
@@ -1735,7 +1735,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PCMPGTW Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 14);
             for (int i = 0; i < 8; ++i) {
                 // GX->uw[i] = (GX->sw[i]>EX->sw[i])?0xFFFF:0x0000;
                 LH(x3, wback, fixedaddress + i * 2);
@@ -1748,7 +1748,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
         case 0x66:
             INST_NAME("PCMPGTD Gx,Ex");
             nextop = F8;
-            GETEX(x1, 0);
+            GETEX(x1, 0, 12);
             GETGX();
             SSE_LOOP_DS(x3, x4, SLT(x4, x4, x3); NEG(x3, x4));
             break;
@@ -1772,7 +1772,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 LD(x3, gback, gdoffset + 0 * 8);
                 SD(x3, gback, gdoffset + 1 * 8);
             } else {
-                GETEX(x1, 0);
+                GETEX(x1, 0, 14);
                 for (int i = 0; i < 8; ++i) {
                     // GX->ub[8+i] = (EX->sw[i]<0)?0:((EX->sw[i]>0xff)?0xff:EX->sw[i]);
                     LH(x3, wback, fixedaddress + i * 2);
@@ -1801,7 +1801,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     SB(x3, gback, gdoffset + 2 * i + 1);
                 }
             } else {
-                GETEX(x2, 0);
+                GETEX(x2, 0, 7);
                 for (int i = 0; i < 8; ++i) {
                     // GX->ub[2 * i + 1] = EX->ub[i + 8];
                     LBU(x3, wback, fixedaddress + i + 8);
@@ -1825,7 +1825,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     SH(x3, gback, gdoffset + (2 * i + 1) * 2);
                 }
             } else {
-                GETEX(x1, 0);
+                GETEX(x1, 0, 14);
                 for (int i = 0; i < 4; ++i) {
                     // GX->uw[2 * i + 1] = EX->uw[i + 4];
                     LHU(x3, wback, fixedaddress + (i + 4) * 2);
@@ -1836,7 +1836,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
         case 0x6A:
             INST_NAME("PUNPCKHDQ Gx,Ex");
             nextop = F8;
-            GETEX(x1, 0);
+            GETEX(x1, 0, 12);
             GETGX();
             // GX->ud[0] = GX->ud[2];
             LWU(x3, gback, gdoffset + 2 * 4);
@@ -1873,7 +1873,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 LD(x3, gback, gdoffset + 0 * 8);
                 SD(x3, gback, gdoffset + 1 * 8);
             } else {
-                GETEX(x1, 0);
+                GETEX(x1, 0, 12);
                 for (int i = 0; i < 4; ++i) {
                     // GX->sw[4+i] = (EX->sd[i]<-32768)?-32768:((EX->sd[i]>32767)?32767:EX->sd[i]);
                     LW(x3, wback, fixedaddress + i * 4);
@@ -1902,7 +1902,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PUNPCKHQDQ Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             LD(x3, gback, gdoffset + 8);
             SD(x3, gback, gdoffset + 0);
             LD(x3, wback, fixedaddress + 8);
@@ -1929,14 +1929,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("MOVDQA Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             SSE_LOOP_MV_Q(x3);
             break;
         case 0x70: // TODO: Optimize this!
             INST_NAME("PSHUFD Gx,Ex,Ib");
             nextop = F8;
             GETGX();
-            GETEX(x2, 1);
+            GETEX(x2, 1, 12);
             u8 = F8;
             int32_t idx;
 
@@ -1959,7 +1959,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             switch ((nextop >> 3) & 7) {
                 case 2:
                     INST_NAME("PSRLW Ex, Ib");
-                    GETEX(x1, 1);
+                    GETEX(x1, 1, 14);
                     u8 = F8;
                     if (u8 > 15) {
                         // just zero dest
@@ -1976,7 +1976,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     break;
                 case 4:
                     INST_NAME("PSRAW Ex, Ib");
-                    GETEX(x1, 1);
+                    GETEX(x1, 1, 14);
                     u8 = F8;
                     if (u8 > 15) u8 = 15;
                     if (u8) {
@@ -1990,7 +1990,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     break;
                 case 6:
                     INST_NAME("PSLLW Ex, Ib");
-                    GETEX(x1, 1);
+                    GETEX(x1, 1, 14);
                     u8 = F8;
                     if (u8 > 15) {
                         // just zero dest
@@ -2015,7 +2015,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             switch ((nextop >> 3) & 7) {
                 case 2:
                     INST_NAME("PSRLD Ex, Ib");
-                    GETEX(x1, 1);
+                    GETEX(x1, 1, 12);
                     u8 = F8;
                     if (u8) {
                         if (u8 > 31) {
@@ -2029,7 +2029,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     break;
                 case 4:
                     INST_NAME("PSRAD Ex, Ib");
-                    GETEX(x1, 1);
+                    GETEX(x1, 1, 12);
                     u8 = F8;
                     if (u8 > 31) u8 = 31;
                     if (u8) {
@@ -2038,7 +2038,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     break;
                 case 6:
                     INST_NAME("PSLLD Ex, Ib");
-                    GETEX(x1, 1);
+                    GETEX(x1, 1, 12);
                     u8 = F8;
                     if (u8) {
                         if (u8 > 31) {
@@ -2059,7 +2059,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             switch ((nextop >> 3) & 7) {
                 case 2:
                     INST_NAME("PSRLQ Ex, Ib");
-                    GETEX(x1, 1);
+                    GETEX(x1, 1, 8);
                     u8 = F8;
                     if (!u8) break;
                     if (u8 > 63) {
@@ -2077,7 +2077,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     break;
                 case 3:
                     INST_NAME("PSRLDQ Ex, Ib");
-                    GETEX(x1, 1);
+                    GETEX(x1, 1, 8);
                     u8 = F8;
                     if (!u8) break;
                     if (u8 > 15) {
@@ -2105,7 +2105,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     break;
                 case 6:
                     INST_NAME("PSLLQ Ex, Ib");
-                    GETEX(x1, 1);
+                    GETEX(x1, 1, 8);
                     u8 = F8;
                     if (!u8) break;
                     if (u8 > 63) {
@@ -2123,7 +2123,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     break;
                 case 7:
                     INST_NAME("PSLLDQ Ex, Ib");
-                    GETEX(x1, 1);
+                    GETEX(x1, 1, 8);
                     u8 = F8;
                     if (!u8) break;
                     if (u8 > 15) {
@@ -2157,7 +2157,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PCMPEQB Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 15);
             for (int i = 0; i < 16; ++i) {
                 LBU(x3, gback, gdoffset + i);
                 LBU(x4, wback, fixedaddress + i);
@@ -2171,14 +2171,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PCMPEQW Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 14);
             SSE_LOOP_W(x3, x4, SUB(x3, x3, x4); SEQZ(x3, x3); NEG(x3, x3));
             break;
         case 0x76:
             INST_NAME("PCMPEQD Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 12);
             SSE_LOOP_D(x3, x4, XOR(x3, x3, x4); SNEZ(x3, x3); ADDI(x3, x3, -1));
             break;
         case 0x7C:
@@ -2205,7 +2205,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             if (MODREG && gd == (nextop & 7) + (rex.b << 3)) {
                 FSD(d0, gback, gdoffset + 8);
             } else {
-                GETEX(x2, 0);
+                GETEX(x2, 0, 8);
                 FLD(d0, wback, fixedaddress + 0);
                 FLD(d1, wback, fixedaddress + 8);
                 if (!box64_dynarec_fastnan) {
@@ -2253,7 +2253,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("MOVDQA Ex,Gx");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             SSE_LOOP_MV_Q2(x3);
             if (!MODREG) SMWRITE2();
             break;
@@ -2565,7 +2565,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("CMPPD Gx, Ex, Ib");
             nextop = F8;
             GETGX();
-            GETEX(x2, 1);
+            GETEX(x2, 1, 8);
             u8 = F8;
             d0 = fpu_get_scratch(dyn);
             d1 = fpu_get_scratch(dyn);
@@ -2630,7 +2630,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PEXTRW Gd,Ex,Ib");
             nextop = F8;
             GETGD;
-            GETEX(x1, 0);
+            GETEX(x1, 0, 14);
             u8 = (F8)&7;
             LHU(gd, wback, fixedaddress + u8 * 2);
             break;
@@ -2638,7 +2638,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("SHUFPD Gx, Ex, Ib");
             nextop = F8;
             GETGX();
-            GETEX(x2, 1);
+            GETEX(x2, 1, 8);
             u8 = F8;
             if (MODREG && gd == (nextop & 7) + (rex.b << 3) && u8 == 0) {
                 LD(x3, gback, gdoffset + 0);
@@ -2677,7 +2677,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PSRLW Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 1);
             LD(x3, wback, fixedaddress);
             ADDI(x4, xZR, 16);
             BLTU_MARK(x3, x4);
@@ -2695,7 +2695,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PSRLD Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 1);
             LD(x3, wback, fixedaddress);
             ADDI(x4, xZR, 32);
             BLTU_MARK(x3, x4);
@@ -2713,7 +2713,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PSRLQ Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 1);
             LD(x3, wback, fixedaddress);
             ADDI(x4, xZR, 64);
             BLTU_MARK(x3, x4);
@@ -2731,14 +2731,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PADDQ Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             SSE_LOOP_Q(x3, x4, ADD(x3, x3, x4));
             break;
         case 0xD5:
             INST_NAME("PMULLW Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 14);
             for (int i = 0; i < 8; ++i) {
                 LH(x3, gback, gdoffset + 2 * i);
                 LH(x4, wback, fixedaddress + 2 * i);
@@ -2750,7 +2750,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("MOVQ Ex, Gx");
             nextop = F8;
             GETGXSD(d0);
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             FSD(d0, wback, fixedaddress + 0);
             if (MODREG) {
                 SD(xZR, wback, fixedaddress + 8);
@@ -2761,7 +2761,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
         case 0xD7:
             INST_NAME("PMOVMSKB Gd, Ex");
             nextop = F8;
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             GETGD;
             LD(x1, wback, fixedaddress + 8); // high part
             LD(x2, wback, fixedaddress + 0); // low part, also destroyed wback(x2)
@@ -2808,7 +2808,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PSUBUSB Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 15);
             for (int i = 0; i < 16; ++i) {
                 LBU(x3, gback, gdoffset + i);
                 LBU(x4, wback, fixedaddress + i);
@@ -2827,8 +2827,9 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PSUBUSW Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
-            SSE_LOOP_W(x3, x4,
+            GETEX(x2, 0, 14);
+            SSE_LOOP_W(
+                x3, x4,
                 SUB(x3, x3, x4);
                 if (rv64_zbb) {
                     MAX(x3, x3, xZR);
@@ -2836,15 +2837,13 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     NOT(x4, x3);
                     SRAI(x4, x4, 63);
                     AND(x3, x3, x4);
-                }
-                SH(x3, gback, gdoffset + i * 2);
-            );
+                });
             break;
         case 0xDA:
             INST_NAME("PMINUB Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 15);
             for (int i = 0; i < 16; ++i) {
                 LBU(x3, gback, gdoffset + i);
                 LBU(x4, wback, fixedaddress + i);
@@ -2861,14 +2860,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PAND Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             SSE_LOOP_Q(x3, x4, AND(x3, x3, x4));
             break;
         case 0xDC:
             INST_NAME("PADDUSB Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 15);
             ADDI(x5, xZR, 0xFF);
             for (int i = 0; i < 16; ++i) {
                 LBU(x3, gback, gdoffset + i);
@@ -2887,7 +2886,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PADDUSW Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 14);
             MOV32w(x5, 65535);
             for (int i = 0; i < 8; ++i) {
                 // tmp32s = (int32_t)GX->uw[i] + EX->uw[i];
@@ -2908,7 +2907,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PMAXUB Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 15);
             for (int i = 0; i < 16; ++i) {
                 LBU(x3, gback, gdoffset + i);
                 LBU(x4, wback, fixedaddress + i);
@@ -2925,14 +2924,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PANDN Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             SSE_LOOP_Q(x3, x4, NOT(x3, x3); AND(x3, x3, x4));
             break;
         case 0xE0:
             INST_NAME("PAVGB Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 15);
             for (int i = 0; i < 16; ++i) {
                 LBU(x3, gback, gdoffset + i);
                 LBU(x4, wback, fixedaddress + i);
@@ -2946,7 +2945,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PSRAW Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 1);
             ADDI(x4, xZR, 16);
             LD(x3, wback, fixedaddress);
             BLTU(x3, x4, 8);
@@ -2961,7 +2960,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PSRAD Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 1);
             ADDI(x4, xZR, 32);
             LD(x3, wback, fixedaddress);
             BLTU(x3, x4, 8);
@@ -2976,7 +2975,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PAVGW Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 14);
             for (int i = 0; i < 8; ++i) {
                 LHU(x3, gback, gdoffset + 2 * i);
                 LHU(x4, wback, fixedaddress + 2 * i);
@@ -2990,7 +2989,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PMULHUW Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 14);
             for (int i = 0; i < 8; ++i) {
                 LHU(x3, gback, gdoffset + 2 * i);
                 LHU(x4, wback, fixedaddress + 2 * i);
@@ -3003,7 +3002,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PMULHW Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 14);
             for (int i = 0; i < 8; ++i) {
                 LH(x3, gback, gdoffset + 2 * i);
                 LH(x4, wback, fixedaddress + 2 * i);
@@ -3016,7 +3015,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("CVTTPD2DQ Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             v0 = fpu_get_scratch(dyn);
             v1 = fpu_get_scratch(dyn);
             FLD(v0, wback, fixedaddress + 0);
@@ -3049,14 +3048,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("MOVNTDQ Ex, Gx");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             SSE_LOOP_MV_Q2(x3);
             break;
         case 0xE8:
             INST_NAME("PSUBSB Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 15);
             ADDI(x5, xZR, 0x7f);
             ADDI(x6, xZR, 0xf80);
             for (int i = 0; i < 16; ++i) {
@@ -3081,7 +3080,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PSUBSW Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 14);
             MOV64x(x5, 32767);
             MOV64x(x6, -32768);
             for (int i = 0; i < 8; ++i) {
@@ -3106,7 +3105,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PMINSW Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 14);
             for (int i = 0; i < 8; ++i) {
                 LH(x3, gback, gdoffset + 2 * i);
                 LH(x4, wback, fixedaddress + 2 * i);
@@ -3123,14 +3122,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("POR Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             SSE_LOOP_Q(x3, x4, OR(x3, x3, x4));
             break;
         case 0xEC:
             INST_NAME("PADDSB Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 15);
             MOV64x(x5, 127);
             MOV64x(x6, -128);
             for (int i = 0; i < 16; ++i) {
@@ -3155,7 +3154,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PADDSW Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 14);
             MOV64x(x5, 32767);
             MOV64x(x6, -32768);
             for (int i = 0; i < 8; ++i) {
@@ -3180,7 +3179,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PMAXSW Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 14);
             SSE_LOOP_WS(x3, x4,
                 if (rv64_zbb) {
                     MAX(x3, x3, x4);
@@ -3199,7 +3198,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 SD(xZR, gback, gdoffset + 0);
                 SD(xZR, gback, gdoffset + 8);
             } else {
-                GETEX(x2, 0);
+                GETEX(x2, 0, 8);
                 SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4));
             }
             break;
@@ -3207,7 +3206,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PSLLQ Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 1);
             ADDI(x4, xZR, 16);
             LD(x3, wback, fixedaddress + 0);
             BLTU_MARK(x3, x4);
@@ -3226,7 +3225,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PSLLQ Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 1);
             ADDI(x4, xZR, 32);
             LD(x3, wback, fixedaddress + 0);
             BLTU_MARK(x3, x4);
@@ -3245,7 +3244,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PSLLQ Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 1);
             ADDI(x4, xZR, 64);
             LD(x3, wback, fixedaddress + 0);
             BLTU_MARK(x3, x4);
@@ -3264,7 +3263,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PMULUDQ Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             // GX->q[1] = (uint64_t)EX->ud[2]*GX->ud[2];
             LWU(x3, gback, gdoffset + 2 * 4);
             LWU(x4, wback, fixedaddress + 2 * 4);
@@ -3280,7 +3279,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PMADDWD Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 14);
             for (int i = 0; i < 4; ++i) {
                 // GX->sd[i] = (int32_t)(GX->sw[i*2+0])*EX->sw[i*2+0] +
                 //             (int32_t)(GX->sw[i*2+1])*EX->sw[i*2+1];
@@ -3298,7 +3297,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PSADBW Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 15);
             MV(x6, xZR);
             for (int i = 0; i < 16; ++i) {
                 LBU(x3, gback, gdoffset + i);
@@ -3319,7 +3318,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PSUBB Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 15);
             for (int i = 0; i < 16; ++i) {
                 // GX->sb[i] -= EX->sb[i];
                 LB(x3, wback, fixedaddress + i);
@@ -3332,28 +3331,28 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PSUBW Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 14);
             SSE_LOOP_W(x3, x4, SUBW(x3, x3, x4));
             break;
         case 0xFA:
             INST_NAME("PSUBD Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 12);
             SSE_LOOP_D(x3, x4, SUBW(x3, x3, x4));
             break;
         case 0xFB:
             INST_NAME("PSUBQ Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             SSE_LOOP_Q(x3, x4, SUB(x3, x3, x4));
             break;
         case 0xFC:
             INST_NAME("PADDB Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 15);
             for (int i = 0; i < 16; ++i) {
                 // GX->sb[i] += EX->sb[i];
                 LB(x3, gback, gdoffset + i);
@@ -3366,14 +3365,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PADDW Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 14);
             SSE_LOOP_W(x3, x4, ADDW(x3, x3, x4));
             break;
         case 0xFE:
             INST_NAME("PADDD Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 12);
             SSE_LOOP_D(x3, x4, ADDW(x3, x3, x4));
             break;
         default:
diff --git a/src/dynarec/rv64/dynarec_rv64_f20f.c b/src/dynarec/rv64/dynarec_rv64_f20f.c
index 6bd1c280..a7942c7e 100644
--- a/src/dynarec/rv64/dynarec_rv64_f20f.c
+++ b/src/dynarec/rv64/dynarec_rv64_f20f.c
@@ -83,7 +83,7 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("MOVDDUP Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 1);
             LD(x3, wback, fixedaddress+0);
             SD(x3, gback, gdoffset+0);
             SD(x3, gback, gdoffset+8);
@@ -312,7 +312,7 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("PSHUFLW Gx, Ex, Ib");
             nextop = F8;
             GETGX();
-            GETEX(x2, 1);
+            GETEX(x2, 1, 8);
             u8 = F8;
             int32_t idx;
 
@@ -339,7 +339,7 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("HADDPS Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 12);
             s0 = fpu_get_scratch(dyn);
             s1 = fpu_get_scratch(dyn);
             // GX->f[0] += GX->f[1];
@@ -420,7 +420,7 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("CVTPD2DQ Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             d0 = fpu_get_scratch(dyn);
             u8 = sse_setround(dyn, ninst, x6, x4);
             for (int i=0; i<2 ; ++i) {
@@ -438,7 +438,7 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("LDDQU Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             SSE_LOOP_MV_Q(x3);
             break;
         default:
diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c
index bebcc551..7401d717 100644
--- a/src/dynarec/rv64/dynarec_rv64_f30f.c
+++ b/src/dynarec/rv64/dynarec_rv64_f30f.c
@@ -85,7 +85,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("MOVSLDUP Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
 
             // GX->ud[1] = GX->ud[0] = EX->ud[0];
             // GX->ud[3] = GX->ud[2] = EX->ud[2];
@@ -100,7 +100,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("MOVSHDUP Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 12);
 
             // GX->ud[1] = GX->ud[0] = EX->ud[1];
             // GX->ud[3] = GX->ud[2] = EX->ud[3];
@@ -273,14 +273,14 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("MOVDQU Gx,Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             SSE_LOOP_MV_Q(x3);
             break;
         case 0x70: // TODO: Optimize this!
             INST_NAME("PSHUFHW Gx, Ex, Ib");
             nextop = F8;
             GETGX();
-            GETEX(x2, 1);
+            GETEX(x2, 1, 14);
             u8 = F8;
             int32_t idx;
 
@@ -323,7 +323,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("MOVDQU Ex,Gx");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 8);
             SSE_LOOP_MV_Q2(x3);
             if (!MODREG) SMWRITE2();
             break;
@@ -332,7 +332,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("CVTTPS2DQ Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 12);
             v0 = fpu_get_scratch(dyn);
             for (int i = 0; i < 4; ++i) {
                 if (!box64_dynarec_fastround) {
@@ -491,7 +491,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("CVTDQ2PD Gx, Ex");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0);
+            GETEX(x2, 0, 4);
             q0 = fpu_get_scratch(dyn);
             q1 = fpu_get_scratch(dyn);
             LW(x3, wback, fixedaddress + 0);
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index 22ef28f2..acd9875d 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -481,17 +481,16 @@
     gdoffset = offsetof(x64emu_t, xmm[gd])
 
 // Get Ex address in general register a, will purge SS or SD if it's reg and is loaded. May use x3. Use wback as load address!
-#define GETEX(a, D)                                                                            \
-    if (MODREG) {                                                                              \
-        ed = (nextop & 7) + (rex.b << 3);                                                      \
-        sse_forget_reg(dyn, ninst, x3, ed);                                                    \
-        fixedaddress = offsetof(x64emu_t, xmm[ed]);                                            \
-        wback = xEmu;                                                                          \
-    } else {                                                                                   \
-        SMREAD();                                                                              \
-        ed = 16;                                                                               \
-        addr = geted(dyn, addr, ninst, nextop, &wback, a, x3, &fixedaddress, rex, NULL, 0, D); \
-        fixedaddress = 0; /* TODO: optimize this! */                                           \
+#define GETEX(a, D, I12)                                                                         \
+    if (MODREG) {                                                                                \
+        ed = (nextop & 7) + (rex.b << 3);                                                        \
+        sse_forget_reg(dyn, ninst, x3, ed);                                                      \
+        fixedaddress = offsetof(x64emu_t, xmm[ed]);                                              \
+        wback = xEmu;                                                                            \
+    } else {                                                                                     \
+        SMREAD();                                                                                \
+        ed = 16;                                                                                 \
+        addr = geted(dyn, addr, ninst, nextop, &wback, a, x3, &fixedaddress, rex, NULL, I12, D); \
     }
 
 // Get GX as a quad (might use x1)
@@ -518,17 +517,16 @@
     gdoffset = offsetof(x64emu_t, mmx[gd])
 
 // Get EM, might use x3
-#define GETEM(a, D)                                                                            \
-    if (MODREG) {                                                                              \
-        ed = (nextop & 7);                                                                     \
-        mmx_forget_reg(dyn, ninst, ed);                                                        \
-        fixedaddress = offsetof(x64emu_t, mmx[ed]);                                            \
-        wback = xEmu;                                                                          \
-    } else {                                                                                   \
-        SMREAD();                                                                              \
-        ed = 8;                                                                                \
-        addr = geted(dyn, addr, ninst, nextop, &wback, a, x3, &fixedaddress, rex, NULL, 0, D); \
-        fixedaddress = 0; /* TODO: optimize this! */                                           \
+#define GETEM(a, D, I12)                                                                         \
+    if (MODREG) {                                                                                \
+        ed = (nextop & 7);                                                                       \
+        mmx_forget_reg(dyn, ninst, ed);                                                          \
+        fixedaddress = offsetof(x64emu_t, mmx[ed]);                                              \
+        wback = xEmu;                                                                            \
+    } else {                                                                                     \
+        SMREAD();                                                                                \
+        ed = 8;                                                                                  \
+        addr = geted(dyn, addr, ninst, nextop, &wback, a, x3, &fixedaddress, rex, NULL, I12, D); \
     }
 
 #define GETGX_empty_vector(a)                   \