about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2023-03-25 00:21:06 +0800
committerGitHub <noreply@github.com>2023-03-24 17:21:06 +0100
commit5e680e0f6678a9b9c51b603c8ea1333c325b2d26 (patch)
treed8b8a246c365ad461be83fa19f96e38666e06a7d /src
parent45015d6106133b940cbb5f24e08aac4d70bd7de8 (diff)
downloadbox64-5e680e0f6678a9b9c51b603c8ea1333c325b2d26.tar.gz
box64-5e680e0f6678a9b9c51b603c8ea1333c325b2d26.zip
[RV64_DYNAREC] Added more opcodes (#633)
* [RV64_DYNAREC] Added 81,83 /2 ADC opcode

* [RV64_DYNAREC] Added 34 XOR opcode

* [RV64_DYNAREC] Added 66 3B CMP opcode

* [RV64_DYNAREC] Added 66 09 OR opcode

* [RV64_DYNAREC] Added 66 D1,D3 /5 SHR opcode

* [RV64_DYNAREC] Added 30 XOR opcode

* [RV64_DYNAREC] Added F6 /5 IMUL opcode

* [RV64_DYNAREC] Added 0C OR opcode

* [RV64_DYNAREC] Added 2A SUB opcode

* [RV64_DYNAREC] Added 66 01 ADD opcode

* [RV64_DYNAREC] Added 81,83 /3 SBB opcode

* [RV64_DYNAREC] Added 66 69,6B LMUL opcode

* [RV64_DYNAREC] Added 66 03 ADD opcode

* [RV64_DYNAREC] Added 18 SBB opcode & some fixes & some optims

* [RV64_DYNAREC] Added F6 /2 NOT opcode

* [RV64_DYNAREC] Added C0 /0 ROL opcode & bug fixes

* [RV64_DYNAREC] Added 6A PUSH opcode
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00.c177
-rw-r--r--src/dynarec/rv64/dynarec_rv64_66.c70
-rw-r--r--src/dynarec/rv64/dynarec_rv64_emit_math.c128
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h18
4 files changed, 324 insertions, 69 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00.c b/src/dynarec/rv64/dynarec_rv64_00.c
index fb259259..27f9628f 100644
--- a/src/dynarec/rv64/dynarec_rv64_00.c
+++ b/src/dynarec/rv64/dynarec_rv64_00.c
@@ -66,9 +66,9 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("ADD Gb, Eb");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
-            GETEB(x2, 0);
-            GETGB(x1);
-            emit_add8(dyn, ninst, x1, x2, x3, x4);
+            GETEB(x1, 0);
+            GETGB(x2);
+            emit_add8(dyn, ninst, x1, x2, x4, x5);
             GBBACK(x5);
             break;
         case 0x03:
@@ -90,9 +90,9 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
             GETEB(x1, 0);
-            GETGB(x3);
-            emit_or8(dyn, ninst, x1, x3, x4, x5);
-            EBBACK(x5);
+            GETGB(x2);
+            emit_or8(dyn, ninst, x1, x2, x4, x5);
+            EBBACK(x5, 0);
             break;
         case 0x09:
             INST_NAME("OR Ed, Gd");
@@ -108,9 +108,9 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("OR Gb, Eb");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
-            GETEB(x3, 0);
-            GETGB(x1);
-            emit_or8(dyn, ninst, x1, x3, x4, x5);
+            GETEB(x1, 0);
+            GETGB(x2);
+            emit_or8(dyn, ninst, x1, x2, x4, x5);
             GBBACK(x5);
             break;
         case 0x0B:
@@ -121,6 +121,15 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             GETED(0);
             emit_or32(dyn, ninst, rex, gd, ed, x3, x4);
             break;
+        case 0x0C:
+            INST_NAME("OR AL, Ib");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            u8 = F8;
+            ANDI(x1, xRAX, 0xff);
+            emit_or8c(dyn, ninst, x1, u8, x3, x4, x5);
+            ANDI(xRAX, xRAX, ~0xff);
+            OR(xRAX, xRAX, x1);
+            break;
         case 0x0D:
             INST_NAME("OR EAX, Id");
             SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -143,6 +152,16 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 DEFAULT;
             }
             break;
+        case 0x18:
+            INST_NAME("SBB Eb, Gb");
+            READFLAGS(X_CF);
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETEB(x1, 0);
+            GETGB(x2);
+            emit_sbb8(dyn, ninst, x1, x2, x4, x5, x6);
+            EBBACK(x5, 0);
+            break;
         case 0x19:
             INST_NAME("SBB Ed, Gd");
             READFLAGS(X_CF);
@@ -168,9 +187,9 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
             GETEB(x1, 0);
-            GETGB(x3);
-            emit_and8(dyn, ninst, x1, x3, x4, x5);
-            EBBACK(x3);
+            GETGB(x2);
+            emit_and8(dyn, ninst, x1, x2, x4, x5);
+            EBBACK(x4, 0);
             break;
         case 0x21:
             INST_NAME("AND Ed, Gd");
@@ -185,9 +204,9 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("AND Gb, Eb");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
-            GETEB(x3, 0);
-            GETGB(x1);
-            emit_and8(dyn, ninst, x1, x3, x4, x5);
+            GETEB(x1, 0);
+            GETGB(x2);
+            emit_and8(dyn, ninst, x1, x2, x4, x5);
             GBBACK(x5);
             break;
         case 0x23:
@@ -213,7 +232,15 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             emit_sub32(dyn, ninst, rex, ed, gd, x3, x4, x5);
             WBACK;
             break;
-
+        case 0x2A:
+            INST_NAME("SUB Gb, Eb");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETEB(x1, 0);
+            GETGB(x2);
+            emit_sub8(dyn, ninst, x2, x1, x4, x5, x6);
+            GBBACK(x5);
+            break;
         case 0x2B:
             INST_NAME("SUB Gd, Ed");
             SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -237,6 +264,15 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             i64 = F32S;
             emit_sub32c(dyn, ninst, rex, xRAX, i64, x2, x3, x4, x5);
             break;
+        case 0x30:
+            INST_NAME("XOR Eb, Gb");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETEB(x1, 0);
+            GETGB(x2);
+            emit_xor8(dyn, ninst, x1, x2, x4, x5);
+            EBBACK(x5, 0);
+            break;
         case 0x31:
             INST_NAME("XOR Ed, Gd");
             SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -253,8 +289,8 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
             GETEB(x1, 0);
-            GETGB(x3);
-            emit_xor8(dyn, ninst, x3, x1, x4, x5);
+            GETGB(x2);
+            emit_xor8(dyn, ninst, x2, x1, x4, x5);
             GBBACK(x5);
             break;
         case 0x33:
@@ -265,6 +301,15 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             GETED(0);
             emit_xor32(dyn, ninst, rex, gd, ed, x3, x4);
             break;
+        case 0x34:
+            INST_NAME("XOR AL, Ib");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            u8 = F8;
+            ANDI(x1, xRAX, 0xff);
+            emit_xor8c(dyn, ninst, x1, u8, x3, x4);
+            ANDI(xRAX, xRAX, ~0xff);
+            OR(xRAX, xRAX, x1);
+            break;
         case 0x35:
             INST_NAME("XOR EAX, Id");
             SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -422,7 +467,12 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 ZEROUP(gd);
             }
             break;
-
+        case 0x6A:
+            INST_NAME("PUSH Ib");
+            i64 = F8S;
+            MOV64x(x3, i64);
+            PUSH1(x3);
+            break;
         case 0x6B:
             INST_NAME("IMUL Gd, Ed, Id");
             SETFLAGS(X_ALL, SF_PENDING);
@@ -495,16 +545,16 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     GETEB(x1, 1);
                     u8 = F8;
-                    emit_add8c(dyn, ninst, x1, u8, x3, x4, x5);
-                    EBBACK(x3);
+                    emit_add8c(dyn, ninst, x1, u8, x2, x4, x5);
+                    EBBACK(x5, 0);
                     break;
                 case 1: // OR
                     INST_NAME("OR Eb, Ib");
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     GETEB(x1, 1);
                     u8 = F8;
-                    emit_or8c(dyn, ninst, x1, u8, x3, x4, x5);
-                    EBBACK(x3);
+                    emit_or8c(dyn, ninst, x1, u8, x2, x4, x5);
+                    EBBACK(x5, 0);
                     break;
                 case 3: // SBB
                     INST_NAME("SBB Eb, Ib");
@@ -512,32 +562,32 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     GETEB(x1, 1);
                     u8 = F8;
-                    emit_sbb8c(dyn, ninst, x1, u8, x3, x4, x5, x6);
-                    EBBACK(x3);
+                    emit_sbb8c(dyn, ninst, x1, u8, x2, x4, x5, x6);
+                    EBBACK(x5, 0);
                     break;
                 case 4: // AND
                     INST_NAME("AND Eb, Ib");
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     GETEB(x1, 1);
                     u8 = F8;
-                    emit_and8c(dyn, ninst, x1, u8, x3, x4);
-                    EBBACK(x3);
+                    emit_and8c(dyn, ninst, x1, u8, x2, x4);
+                    EBBACK(x5, 0);
                     break;
                 case 5: // SUB
                     INST_NAME("SUB Eb, Ib");
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     GETEB(x1, 1);
                     u8 = F8;
-                    emit_sub8c(dyn, ninst, x1, u8, x3, x4, x5, x6);
-                    EBBACK(x3);
+                    emit_sub8c(dyn, ninst, x1, u8, x2, x4, x5, x6);
+                    EBBACK(x5, 0);
                     break;
                 case 6: // XOR
                     INST_NAME("XOR Eb, Ib");
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     GETEB(x1, 1);
                     u8 = F8;
-                    emit_xor8c(dyn, ninst, x1, u8, x3, x4);
-                    EBBACK(x3);
+                    emit_xor8c(dyn, ninst, x1, u8, x2, x4);
+                    EBBACK(x5, 0);
                     break;
                 case 7: // CMP
                     INST_NAME("CMP Eb, Ib");
@@ -575,6 +625,26 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     emit_or32c(dyn, ninst, rex, ed, i64, x3, x4);
                     WBACK;
                     break;
+                case 2: // ADC
+                    if(opcode==0x81) {INST_NAME("ADC Ed, Id");} else {INST_NAME("ADC Ed, Ib");}
+                    READFLAGS(X_CF);
+                    SETFLAGS(X_ALL, SF_SET_PENDING);
+                    GETED((opcode==0x81)?4:1);
+                    if(opcode==0x81) i64 = F32S; else i64 = F8S;
+                    MOV64xw(x5, i64);
+                    emit_adc32(dyn, ninst, rex, ed, x5, x3, x4, x6);
+                    WBACK;
+                    break;
+                case 3: // SBB
+                    if(opcode==0x81) {INST_NAME("SBB Ed, Id");} else {INST_NAME("SBB Ed, Ib");}
+                    READFLAGS(X_CF);
+                    SETFLAGS(X_ALL, SF_SET_PENDING);
+                    GETED((opcode==0x81)?4:1);
+                    if(opcode==0x81) i64 = F32S; else i64 = F8S;
+                    MOV64xw(x5, i64);
+                    emit_sbb32(dyn, ninst, rex, ed, x5, x3, x4, x6);
+                    WBACK;
+                    break;
                 case 4: // AND
                     if(opcode==0x81) {INST_NAME("AND Ed, Id");} else {INST_NAME("AND Ed, Ib");}
                     SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -610,8 +680,6 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     } else
                         emit_cmp32_0(dyn, ninst, rex, ed, x3, x4);
                     break;
-                default:
-                    DEFAULT;
             }
             break;
         case 0x84:
@@ -907,6 +975,16 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
         case 0xC0:
             nextop = F8;
             switch((nextop>>3)&7) {
+                case 0:
+                    INST_NAME("ROL Eb, Ib");
+                    MESSAGE(LOG_DUMP, "Need Optimization\n");
+                    SETFLAGS(X_OF|X_CF, SF_SET);
+                    GETEB(x1, 1);
+                    u8 = F8;
+                    MOV32w(x2, u8);
+                    CALL_(rol8, ed, x3);
+                    EBBACK(x5, 0);
+                    break;
                 case 4:
                 case 6:
                     INST_NAME("SHL Eb, Ib");
@@ -919,7 +997,7 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         };
                         UFLAG_OP1(ed);
                         SLLIW(ed, ed, u8);
-                        EBBACK(x5);
+                        EBBACK(x5, 1);
                         UFLAG_RES(ed);
                         UFLAG_DF(x3, d_shl8);
                     } else {
@@ -938,7 +1016,7 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         UFLAG_OP1(ed);
                         if(u8) {
                             SRLIW(ed, ed, u8);
-                            EBBACK(x5);
+                            EBBACK(x5, 1);
                         }
                         UFLAG_RES(ed);
                         UFLAG_DF(x3, d_shr8);
@@ -958,7 +1036,7 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         UFLAG_OP1(ed);
                         if(u8) {
                             SRAIW(ed, ed, u8);
-                            EBBACK(x5);
+                            EBBACK(x5, 1);
                         }
                         UFLAG_RES(ed);
                         UFLAG_DF(x3, d_sar8);
@@ -1174,7 +1252,7 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     GETEB(x1, 0);
                     UFLAG_OP12(ed, x4);
                     SRLW(ed, ed, x4);
-                    EBBACK(x3);
+                    EBBACK(x5, 1);
                     UFLAG_RES(ed);
                     UFLAG_DF(x3, d_shr8);
                     break;
@@ -1190,7 +1268,7 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     GETSEB(x1, 0);
                     UFLAG_OP12(ed, x4)
                     SRA(ed, ed, x4);
-                    EBBACK(x3);
+                    EBBACK(x3, 1);
                     UFLAG_RES(ed);
                     UFLAG_DF(x3, d_sar8);
                     break;
@@ -1427,6 +1505,12 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     MOV32w(x2, u8);
                     emit_test8(dyn, ninst, x1, x2, x3, x4, x5);
                     break;
+                case 2:
+                    INST_NAME("NOT Eb");
+                    GETEB(x1, 0);
+                    NOT(x1, x1);
+                    EBBACK(x5, 1);
+                    break;
                 case 4:
                     INST_NAME("MUL AL, Ed");
                     SETFLAGS(X_ALL, SF_PENDING);
@@ -1441,6 +1525,21 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     SRLI(x1, x1, 48);
                     OR(xRAX, xRAX, x1);
                     break;
+                case 5:
+                    INST_NAME("IMUL AL, Eb");
+                    SETFLAGS(X_ALL, SF_PENDING);
+                    UFLAG_DF(x1, d_imul8);
+                    GETSEB(x1, 0);
+                    SLLI(x2, xRAX, 56);
+                    SRAI(x2, x2, 56);
+                    MULW(x1, x2, x1);
+                    UFLAG_RES(x1);
+                    LUI(x2, 0xffff0);
+                    AND(xRAX, xRAX, x2);
+                    SLLI(x1, x1, 48);
+                    SRLI(x1, x1, 48);
+                    OR(xRAX, xRAX, x1);
+                    break;
                 case 6:
                     INST_NAME("DIV Eb");
                     MESSAGE(LOG_DUMP, "Need Optimization\n");
@@ -1482,7 +1581,7 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     INST_NAME("NEG Ed");
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     GETED(0);
-                    emit_neg32(dyn, ninst, rex, ed, x3, x4, x5);
+                    emit_neg32(dyn, ninst, rex, ed, x3, x4);
                     WBACK;
                     break;
                 case 4:
diff --git a/src/dynarec/rv64/dynarec_rv64_66.c b/src/dynarec/rv64/dynarec_rv64_66.c
index 5989f2bf..1caf2a74 100644
--- a/src/dynarec/rv64/dynarec_rv64_66.c
+++ b/src/dynarec/rv64/dynarec_rv64_66.c
@@ -61,6 +61,24 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
         return dynarec64_00(dyn, addr-1, ip, ninst, rex, rep, ok, need_epilog); // addr-1, to "put back" opcode
 
     switch(opcode) {
+        case 0x01:
+            INST_NAME("ADD Ew, Gw");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETGW(x2);
+            GETEW(x1, 0);
+            emit_add16(dyn, ninst, x1, x2, x4, x5);
+            EWBACK;
+            break;
+        case 0x03:
+            INST_NAME("ADD Gw, Ew");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETGW(x1);
+            GETEW(x2, 0);
+            emit_add16(dyn, ninst, x1, x2, x3, x4);
+            GWBACK;
+            break;
         case 0x05:
             INST_NAME("ADD AX, Iw");
             SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -73,6 +91,15 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             AND(xRAX, xRAX, x3);
             OR(xRAX, xRAX, x1);
             break;
+        case 0x09:
+            INST_NAME("OR Ew, Gw");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETGW(x2);
+            GETEW(x1, 0);
+            emit_or16(dyn, ninst, x1, x2, x4, x2);
+            EWBACK;
+            break;
         case 0x0B:
             INST_NAME("OR Gw, Ew");
             SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -186,6 +213,14 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             GETEW(x1, 0);
             emit_cmp16(dyn, ninst, x1, x2, x3, x4, x5, x6);
             break;
+        case 0x3B:
+            INST_NAME("CMP Gw, Ew");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETGW(x1);
+            GETEW(x2, 0);
+            emit_cmp16(dyn, ninst, x1, x2, x3, x4, x5, x6);
+            break;
         case 0x3D:
             INST_NAME("CMP AX, Iw");
             SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -199,6 +234,24 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 emit_cmp16_0(dyn, ninst, x1, x3, x4);
             }
             break;
+        case 0x69:
+        case 0x6B:
+            if(opcode==0x69) {
+                INST_NAME("IMUL Gw,Ew,Iw");
+            } else {
+                INST_NAME("IMUL Gw,Ew,Ib");
+            }
+            SETFLAGS(X_ALL, SF_PENDING);
+            nextop = F8;
+            UFLAG_DF(x1, d_imul16);
+            GETSEW(x1, (opcode==0x69)?2:1);
+            if(opcode==0x69) i32 = F16S; else i32 = F8S;
+            MOV32w(x2, i32);
+            MULW(x2, x2, x1);
+            UFLAG_RES(x2);
+            gd=x2;
+            GWBACK;
+            break;
         case 0x70:
         case 0x71:
         case 0x72:
@@ -470,6 +523,23 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
         case 0xD3:
             nextop = F8;
             switch((nextop>>3)&7) {
+                case 5:
+                    if(opcode==0xD1) {
+                        INST_NAME("SHR Ew, 1");
+                        MOV32w(x4, 1);
+                    } else {
+                        INST_NAME("SHR Ew, CL");
+                        ANDI(x4, xRCX, 0x1f);
+                    }
+                    UFLAG_IF {MESSAGE(LOG_DUMP, "Need Optimization for flags\n");}
+                    SETFLAGS(X_ALL, SF_PENDING);
+                    GETEW(x1, 0);
+                    UFLAG_OP12(ed, x4)
+                    SRL(ed, ed, x4);
+                    EWBACK;
+                    UFLAG_RES(ed);
+                    UFLAG_DF(x3, d_shr16);
+                    break;
                 case 7:
                     if(opcode==0xD1) {
                         INST_NAME("SAR Ew, 1");
diff --git a/src/dynarec/rv64/dynarec_rv64_emit_math.c b/src/dynarec/rv64/dynarec_rv64_emit_math.c
index dc9b81fd..bd2dbb96 100644
--- a/src/dynarec/rv64/dynarec_rv64_emit_math.c
+++ b/src/dynarec/rv64/dynarec_rv64_emit_math.c
@@ -372,16 +372,13 @@ void emit_add8c(dynarec_rv64_t* dyn, int ninst, int s1, int c, int s2, int s3, i
     }
 }
 
-
-// emit SUB8 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch
-void emit_sub8c(dynarec_rv64_t* dyn, int ninst, int s1, int c, int s2, int s3, int s4, int s5)
+// emit SUB8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
+void emit_sub8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
 {
     CLEAR_FLAGS();
-
-    MOV32w(s5, c&0xff);
     IFX(X_PEND) {
         SB(s1, xEmu, offsetof(x64emu_t, op1));
-        SB(s5, xEmu, offsetof(x64emu_t, op2));
+        SB(s2, xEmu, offsetof(x64emu_t, op2));
         SET_DF(s3, d_sub8);
     } else IFX(X_ALL) {
         SET_DFNONE();
@@ -389,10 +386,10 @@ void emit_sub8c(dynarec_rv64_t* dyn, int ninst, int s1, int c, int s2, int s3, i
 
     IFX(X_AF | X_CF | X_OF) {
         // for later flag calculation
-        NOT(s2, s1);
+        NOT(s5, s1);
     }
 
-    SUB(s1, s1, s5);
+    SUB(s1, s1, s2);
     IFX(X_SF) {
         BGE(s1, xZR, 8);
         ORI(xFlags, xFlags, 1 << F_SF);
@@ -401,7 +398,7 @@ void emit_sub8c(dynarec_rv64_t* dyn, int ninst, int s1, int c, int s2, int s3, i
     IFX(X_PEND) {
         SB(s1, xEmu, offsetof(x64emu_t, res));
     }
-    CALC_SUB_FLAGS(s2, s5, s1, s3, s4, 8);
+    CALC_SUB_FLAGS(s5, s2, s1, s3, s4, 8);
     IFX(X_ZF) {
         BNEZ(s1, 8);
         ORI(xFlags, xFlags, 1 << F_ZF);
@@ -411,6 +408,13 @@ void emit_sub8c(dynarec_rv64_t* dyn, int ninst, int s1, int c, int s2, int s3, i
     }
 }
 
+// emit SUB8 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch
+void emit_sub8c(dynarec_rv64_t* dyn, int ninst, int s1, int c, int s2, int s3, int s4, int s5)
+{
+    MOV32w(s2, c&0xff);
+    emit_sub8(dyn, ninst, s1, s2, s3, s4, s5);
+}
+
 // emit SUB16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
 void emit_sub16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
 {
@@ -625,7 +629,6 @@ void emit_sbb8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
     }
 
     SUBW(s1, s1, s2);
-    ANDI(s1, s1, 0xff);
     ANDI(s3, xFlags, 1 << F_CF);
     SUBW(s1, s1, s3);
     ANDI(s1, s1, 0xff);
@@ -698,8 +701,8 @@ void emit_sbb32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     }
 }
 
-// emit NEG32 instruction, from s1, store result in s1 using s2, s3 and s4 as scratch
-void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4)
+// emit NEG32 instruction, from s1, store result in s1 using s2 and s3 as scratch
+void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3)
 {
     CLEAR_FLAGS();
     IFX(X_PEND) {
@@ -709,27 +712,110 @@ void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         SET_DFNONE();
     }
     IFX(X_AF | X_OF) {
-        ORI(s3, s1, 1);      // s3 = op1 | op2
-        ANDI(s4, s1, 1);     // s4 = op1 & op2
+        MV(s3, s1);      // s3 = op1
     }
 
     NEG(s1, s1);
     IFX(X_PEND) {
         SDxw(s1, xEmu, offsetof(x64emu_t, res));
     }
+
+    IFX(X_CF) {
+        BEQZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_CF);
+    }
+    
     IFX(X_AF | X_OF) {
-        NOT(s2, s1);     // s2 = ~res
-        AND(s3, s2, s3); // s3 = ~res & (op1 | op2)
-        OR(s3, s3, s4);  // cc = (~res & (op1 | op2)) | (op1 & op2)
+        OR(s3, s1, s3); // s3 = res | op1
         IFX(X_AF) {
-            ANDI(s2, s3, 0x08); // AF: cc & 0x08
+            /* af = bc & 0x8 */
+            ANDI(s2, s3, 8);
             BEQZ(s2, 8);
             ORI(xFlags, xFlags, 1 << F_AF);
         }
         IFX(X_OF) {
+            /* of = ((bc >> (width-2)) ^ (bc >> (width-1))) & 0x1; */
+            SRLI(s2, s3, (rex.w?64:32)-2);
+            SRLI(s3, s2, 1);
+            XOR(s2, s2, s3);
+            ANDI(s2, s2, 1);
+            BEQZ(s2, 8);
+            ORI(xFlags, xFlags, 1 << F_OF2);
+        }    
+    }
+    IFX(X_SF) {
+        BGE(s1, xZR, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    if (!rex.w) {
+        ZEROUP(s1);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s2);
+    }
+    IFX(X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+}
+
+
+// emit ADC32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
+void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5)
+{
+    CLEAR_FLAGS();
+    IFX(X_PEND) {
+        SDxw(s1, xEmu, offsetof(x64emu_t, op1));
+        SDxw(s2, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s3, rex.w?d_add64:d_add32b);
+    } else IFX(X_ALL) {
+        SET_DFNONE();
+    }
+    IFX(X_CF) {
+        if (rex.w) {
+            AND(s5, xMASK, s1);
+            AND(s4, xMASK, s2);
+            ADD(s5, s5, s4); // lo
+            SRLI(s3, s1, 0x20);
+            SRLI(s4, s2, 0x20);
+            ADD(s4, s4, s3);
+            SRLI(s5, s5, 0x20);
+            ADD(s5, s5, s4); // hi
+            SRAI(s5, s5, 0x20);
+            BEQZ(s5, 8);
+            ORI(xFlags, xFlags, 1 << F_CF);
+        } else {
+            ADD(s5, s1, s2);
+            SRLI(s5, s5, 0x20);
+            BEQZ(s5, 8);
+            ORI(xFlags, xFlags, 1 << F_CF);
+        }
+    }
+    IFX(X_AF | X_OF) {
+        OR(s3, s1, s2);      // s3 = op1 | op2
+        AND(s4, s1, s2);      // s4 = op1 & op2
+    }
+
+    ADDxw(s1, s1, s2);
+    ANDI(s3, xFlags, 1 << F_CF);
+    ADDxw(s1, s1, s3);
+
+    IFX(X_PEND) {
+        SDxw(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_AF | X_OF) {
+        NOT(s2, s1);   // s2 = ~res
+        AND(s3, s2, s3);   // s3 = ~res & (op1 | op2)
+        OR(s3, s3, s4);   // cc = (~res & (op1 | op2)) | (op1 & op2)
+        IFX(X_AF) {
+            ANDI(s4, s3, 0x08); // AF: cc & 0x08
+            BEQZ(s4, 8);
+            ORI(xFlags, xFlags, 1 << F_AF);
+        }
+        IFX(X_OF) {
             SRLI(s3, s3, rex.w?62:30);
-            SRLI(s2, s3, 1);
-            XOR(s3, s3, s2);
+            SRLI(s4, s3, 1);
+            XOR(s3, s3, s4);
             ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
             BEQZ(s3, 8);
             ORI(xFlags, xFlags, 1 << F_OF2);
@@ -743,7 +829,7 @@ void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         ZEROUP(s1);
     }
     IFX(X_PF) {
-        emit_pf(dyn, ninst, s1, s3, s2);
+        emit_pf(dyn, ninst, s1, s3, s4);
     }
     IFX(X_ZF) {
         BNEZ(s1, 8);
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index 55467e89..4a6147ac 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -181,12 +181,12 @@
                         wb2 = (wback>>2)*8;     \
                         wback = xRAX+(wback&3); \
                     }                           \
-                    if (wb2) {MV(i, wback); SRLI(i, i, wb2); ANDI(i, i, 0xff);} else ANDI(i, wback, 0xff);   \
+                    if (wb2) {MV(i, wback); SRLI(i, i, wb2); ANDI(i, i, 0xff);} else {ANDI(i, wback, 0xff);}   \
                     wb1 = 0;                    \
                     ed = i;                     \
                 } else {                        \
                     SMREAD();                   \
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 0, D); \
+                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, x2, &fixedaddress, rex, NULL, 0, D); \
                     LBU(i, wback, fixedaddress);\
                     wb1 = 1;                    \
                     ed = i;                     \
@@ -224,7 +224,7 @@
                     gb1 = xRAX+(gd&3);                        \
                 }                                             \
                 gd = i;                                       \
-                if (gb2) {MV(gd, gb1); SRLI(gd, gd, 8); ANDI(gd, gd, 0xff);} else ANDI(gd, gb1, 0xff);
+                if (gb2) {MV(gd, gb1); SRLI(gd, gd, 8); ANDI(gd, gd, 0xff);} else {ANDI(gd, gb1, 0xff);}
 
 // Write gb (gd) back to original register / memory, using s1 as scratch
 #define GBBACK(s1) if(gb2) {                            \
@@ -239,19 +239,19 @@
                 }
 
 // Write eb (ed) back to original register / memory, using s1 as scratch
-#define EBBACK(s1) if(wb1) {                            \
+#define EBBACK(s1, c) if(wb1) {                         \
                     SB(ed, wback, fixedaddress);        \
                     SMWRITE();                          \
                 } else if(wb2) {                        \
                     assert(wb2 == 8);                   \
                     MOV64x(s1, 0xffffffffffff00ffLL);   \
                     AND(wback, wback, s1);              \
-                    ANDI(ed, ed, 0xff);                 \
+                    if (c) {ANDI(ed, ed, 0xff);}        \
                     SLLI(s1, ed, 8);                    \
                     OR(wback, wback, s1);               \
                 } else {                                \
                     ANDI(wback, wback, ~0xff);          \
-                    ANDI(ed, ed, 0xff);                 \
+                    if (c) {ANDI(ed, ed, 0xff);}        \
                     OR(wback, wback, ed);               \
                 }
 
@@ -766,7 +766,7 @@ void emit_add8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
 void emit_add8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s2, int s3, int s4);
 void emit_sub32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
 void emit_sub32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5);
-//void emit_sub8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
+void emit_sub8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
 void emit_sub8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s2, int s3, int s4, int s5);
 void emit_or32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
 void emit_or32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4);
@@ -796,7 +796,7 @@ void emit_inc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 //void emit_dec32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4);
 //void emit_dec16(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);
 //void emit_dec8(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);
-//void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
+void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
 //void emit_adc32c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
 //void emit_adc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
 //void emit_adc8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5);
@@ -808,7 +808,7 @@ void emit_sbb8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
 void emit_sbb8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5, int s6);
 //void emit_sbb16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
 //void emit_sbb16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
-void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
+void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3);
 //void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);
 //void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);
 void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);