about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-03-15 18:05:40 +0000
committerptitSeb <sebastien.chev@gmail.com>2023-03-15 18:05:40 +0000
commitd5284a570430af9ea1666c87b2da70e7c0e97ad4 (patch)
tree36a6bf99a4263e695fc46e1205be7532bc55c8ea
parent9b2b603c8ddb2c4b94cbab8e5f02d11989641f5d (diff)
downloadbox64-d5284a570430af9ea1666c87b2da70e7c0e97ad4.tar.gz
box64-d5284a570430af9ea1666c87b2da70e7c0e97ad4.zip
[RV64_DYNAREC] Added E8 CALL opcode, and fixed some issue with many macros
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00.c119
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.c126
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h75
-rw-r--r--src/dynarec/rv64/dynarec_rv64_pass3.h2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_private.h9
-rw-r--r--src/dynarec/rv64/rv64_emitter.h17
6 files changed, 329 insertions, 19 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00.c b/src/dynarec/rv64/dynarec_rv64_00.c
index f72e9f1b..6bdb9c1b 100644
--- a/src/dynarec/rv64/dynarec_rv64_00.c
+++ b/src/dynarec/rv64/dynarec_rv64_00.c
@@ -92,7 +92,6 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             SD(gd, xRSP, -8);
             SUBI(xRSP, xRSP, 8);
             break;
-
         case 0x58:
         case 0x59:
         case 0x5A:
@@ -151,6 +150,7 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     DEFAULT;
             }
             break;
+
         case 0x85:
             INST_NAME("TEST Ed, Gd");
             SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -159,6 +159,7 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             GETED(0);
             emit_test32(dyn, ninst, rex, ed, gd, x3, x4, x5);
             break;
+
         case 0x89:
             INST_NAME("MOV Ed, Gd");
             nextop=F8;
@@ -257,6 +258,122 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 SMWRITELOCK(lock);
             }
             break;
+
+        case 0xE8:
+            INST_NAME("CALL Id");
+            i32 = F32S;
+            if(addr+i32==0) {
+                #if STEP == 3
+                printf_log(LOG_INFO, "Warning, CALL to 0x0 at %p (%p)\n", (void*)addr, (void*)(addr-1));
+                #endif
+            }
+            #if STEP < 2
+            if(isNativeCall(dyn, addr+i32, &dyn->insts[ninst].natcall, &dyn->insts[ninst].retn))
+                tmp = dyn->insts[ninst].pass2choice = 3;
+            else 
+                tmp = dyn->insts[ninst].pass2choice = 0;
+            #else
+                tmp = dyn->insts[ninst].pass2choice;
+            #endif
+            switch(tmp) {
+                case 3:
+                    SETFLAGS(X_ALL, SF_SET);    // Hack to set flags to "dont'care" state
+                    BARRIER(BARRIER_FULL);
+                    //BARRIER_NEXT(BARRIER_FULL);
+                    if(dyn->last_ip && (addr-dyn->last_ip<0x1000)) {
+                        ADDI(x2, xRIP, addr-dyn->last_ip);
+                    } else {
+                        TABLE64(x2, addr);
+                    }
+                    PUSH1(x2);
+                    MESSAGE(LOG_DUMP, "Native Call to %s (retn=%d)\n", GetNativeName(GetNativeFnc(dyn->insts[ninst].natcall-1)), dyn->insts[ninst].retn);
+                    // calling a native function
+                    //sse_purge07cache(dyn, ninst, x3);     // TODO: chack the fpxx to purge/save when implemented
+                    if((box64_log<2 && !cycle_log) && dyn->insts[ninst].natcall) {
+                        tmp=isSimpleWrapper(*(wrapper_t*)(dyn->insts[ninst].natcall+2));
+                        if(tmp>1 || tmp<0)
+                            tmp=0;  // float paramters not ready!
+                    } else
+                        tmp=0;
+                    if((box64_log<2 && !cycle_log) && dyn->insts[ninst].natcall && tmp) {
+                        //GETIP(ip+3+8+8); // read the 0xCC
+                        call_n(dyn, ninst, *(void**)(dyn->insts[ninst].natcall+2+8), tmp);
+                        POP1(xRIP);       // pop the return address
+                        dyn->last_ip = addr;
+                    } else {
+                        GETIP_(dyn->insts[ninst].natcall); // read the 0xCC already
+                        STORE_XEMU_CALL();
+                        ADDI(x1, xEmu, (uint32_t)offsetof(x64emu_t, ip)); // setup addr as &emu->ip
+                        CALL_S(x64Int3, -1);
+                        LOAD_XEMU_CALL();
+                        TABLE64(x3, dyn->insts[ninst].natcall);
+                        ADDI(x3, x3, 2+8+8);
+                        BNE_MARK(xRIP, x3);    // Not the expected address, exit dynarec block
+                        POP1(xRIP);       // pop the return address
+                        if(dyn->insts[ninst].retn) {
+                            if(dyn->insts[ninst].retn<0x1000) {
+                                ADDI(xRSP, xRSP, dyn->insts[ninst].retn);
+                            } else {
+                                MOV64x(x3, dyn->insts[ninst].retn);
+                                ADD(xRSP, xRSP, x3);
+                            }
+                        }
+                        TABLE64(x3, addr);
+                        BNE_MARK(xRIP, x3);    // Not the expected address again
+                        LW(w1, xEmu, offsetof(x64emu_t, quit));
+                        CBZ_NEXT(w1);
+                        MARK;
+                        LOAD_XEMU_REM();    // load remaining register, has they have changed
+                        jump_to_epilog(dyn, 0, xRIP, ninst);
+                        dyn->last_ip = addr;
+                    }
+                    break;
+                default:
+                    if((box64_dynarec_safeflags>1) || (ninst && dyn->insts[ninst-1].x64.set_flags)) {
+                        READFLAGS(X_PEND);  // that's suspicious
+                    } else {
+                        SETFLAGS(X_ALL, SF_SET);    // Hack to set flags to "dont'care" state
+                    }
+                    // regular call
+                    //BARRIER_NEXT(1);
+                    if(box64_dynarec_callret && box64_dynarec_bigblock>1) {
+                        BARRIER(BARRIER_FULL);
+                    } else {
+                        BARRIER(BARRIER_FLOAT);
+                        *need_epilog = 0;
+                        *ok = 0;
+                    }
+                    TABLE64(x2, addr);
+                    PUSH1(x2);
+                    // TODO: Add support for CALLRET optim
+                    /*if(box64_dynarec_callret) {
+                        // Push actual return address
+                        if(addr < (dyn->start+dyn->isize)) {
+                            // there is a next...
+                            j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0;
+                            ADR_S20(x4, j64);
+                        } else {
+                            j64 = getJumpTableAddress64(addr);
+                            TABLE64(x4, j64);
+                            LDR(x4, x4, 0);
+                        }
+                        PUSH1(x4);
+                        PUSH1(x2);
+                    } else */ //CALLRET optim disable for now.
+                    {
+                        *ok = 0;
+                        *need_epilog = 0;
+                    }
+                    if(addr+i32==0) {   // self modifying code maybe? so use indirect address fetching
+                        TABLE64(x4, addr-4);
+                        LD(x4, x4, 0);
+                        jump_to_next(dyn, 0, x4, ninst);
+                    } else
+                        jump_to_next(dyn, addr+i32, 0, ninst);
+                    break;
+            }
+            break;
+
         default:
             DEFAULT;
     }
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c
index 7ad82bee..f44a7ffe 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.c
+++ b/src/dynarec/rv64/dynarec_rv64_helper.c
@@ -239,6 +239,121 @@ void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst)
     JALR(x2); // save LR...
 }
 
+void call_c(dynarec_rv64_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int savereg)
+{
+    MAYUSE(fnc);
+    if(savereg==0)
+        savereg = x6;
+    if(saveflags) {
+        SD(xFlags, xEmu, offsetof(x64emu_t, eflags));
+    }
+    fpu_pushcache(dyn, ninst, reg, 0);
+    if(ret!=-2) {
+        ADDI(xSP, xSP, -16);   // RV64 stack needs to be 16byte aligned
+        SD(xEmu, xSP, 0);
+        SD(savereg, xSP, 8);
+        // x5..x8, x10..x17, x28..x31 those needs to be saved by caller
+        STORE_REG(RAX);
+        STORE_REG(RCX);
+        STORE_REG(R12);
+        STORE_REG(R13);
+        STORE_REG(R14);
+        STORE_REG(R15);
+        SD(xRIP, xEmu, offsetof(x64emu_t, ip));
+    }
+    TABLE64(reg, (uintptr_t)fnc);
+    JALR(reg);
+    if(ret>=0) {
+        MV(ret, xEmu);
+    }
+    if(ret!=-2) {
+        LD(xEmu, xSP, 0);
+        LD(savereg, xSP, 8);
+        ADDI(xSP, xSP, 16);
+        #define GO(A)   if(ret!=x##A) {LOAD_REG(A);}
+        GO(RAX);
+        GO(RCX);
+        GO(R12);
+        GO(R13);
+        GO(R14);
+        GO(R15);
+        if(ret!=xRIP)
+            LD(xRIP, xEmu, offsetof(x64emu_t, ip));
+        #undef GO
+    }
+    // regenerate mask
+    XORI(xMASK, xZR, -1);
+    SRLI(xMASK, xMASK, 32);
+
+    fpu_popcache(dyn, ninst, reg, 0);
+    if(saveflags) {
+        LD(xFlags, xEmu, offsetof(x64emu_t, eflags));
+    }
+    SET_NODF();
+    dyn->last_ip = 0;
+}
+
+void call_n(dynarec_rv64_t* dyn, int ninst, void* fnc, int w)
+{
+    MAYUSE(fnc);
+    SD(xFlags, xEmu, offsetof(x64emu_t, eflags));
+    fpu_pushcache(dyn, ninst, x3, 1);
+    // x5..x8, x10..x17, x28..x31 those needs to be saved by caller
+    // RDI, RSI, RDX, RCX, R8, R9 are used for function call
+    ADDI(xSP, xSP, -16);
+    SD(xEmu, xSP, 0);
+    SD(xRIP, xSP, 8);   // ARM64 stack needs to be 16byte aligned
+    STORE_REG(R12);
+    STORE_REG(R13);
+    STORE_REG(R14);
+    STORE_REG(R15);
+    // float and double args
+    if(abs(w)>1) {
+        /*MESSAGE(LOG_DUMP, "Getting %d XMM args\n", abs(w)-1);
+        for(int i=0; i<abs(w)-1; ++i) {
+            sse_get_reg(dyn, ninst, x6, i, w);
+        }*/
+        MESSAGE(LOG_DUMP, "Warning XMM args not ready\n");
+    }
+    if(w<0) {
+        /*
+        MESSAGE(LOG_DUMP, "Return in XMM0\n");
+        sse_get_reg_empty(dyn, ninst, x6, 0);
+        */
+        MESSAGE(LOG_DUMP, "Warning return in XMM args not ready\n");
+    }
+    // prepare regs for native call
+    MV(A0, xRDI);
+    MV(A1, xRSI);
+    MV(A2, xRDX);
+    MV(A3, xRCX);
+    MV(A4, xR8);
+    MV(A5, xR9);
+    // native call
+    TABLE64(16, (uintptr_t)fnc);    // using x16 as scratch regs for call address
+    JALR(16);
+    // put return value in x64 regs
+    if(w>0) {
+        MV(xRAX, A0);
+        MV(xRDX, A1);
+    }
+    // all done, restore all regs
+    LD(xEmu, xSP, 0);
+    LD(xRIP, xSP, 8);
+    ADDI(xSP, xSP, 16);
+    LOAD_REG(R12);
+    LOAD_REG(R13);
+    LOAD_REG(R14);
+    LOAD_REG(R15);
+    // regenerate mask
+    XORI(xMASK, xZR, -1);
+    SRLI(xMASK, xMASK, 32);
+
+    fpu_popcache(dyn, ninst, x3, 1);
+    LD(xFlags, xEmu, offsetof(x64emu_t, eflags));
+    SET_NODF();
+}
+
 void fpu_reset(dynarec_rv64_t* dyn)
 {
     //TODO
@@ -287,13 +402,8 @@ void fpu_popcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07)
 
 void rv64_move32(dynarec_rv64_t* dyn, int ninst, int reg, int32_t val)
 {
-    int32_t up=(val>>12);
-    int32_t r = val-(up<<12);
-    // check if there is the dreaded sign bit on imm12
-    if(r&0b100000000000 && r!=0xffffffff) {
-        ++up;
-        r = val-(up<<12);
-    }
+    int32_t up=((val+0x800)>>12);
+    int32_t r = val&0xfff;
     LUI(reg, up);
     if(r) {
         ADDI(reg, reg, r);
@@ -304,7 +414,7 @@ void rv64_move64(dynarec_rv64_t* dyn, int ninst, int reg, int64_t val)
 {
     if(((val<<(64-12))>>(64-12))==val) {
         // simple 12bit value
-        MOV_U12(reg, (val&0b111111111111));
+        MOV_U12(reg, (val&0xfff));
         return;
     }
     if(((val<<32)>>32)==val) {
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index c4bebc19..a38fdf46 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -98,6 +98,41 @@
                     wb1 = 1;                    \
                     ed = i;                     \
                 }
+// CALL will use x6 for the call address. Return value can be put in ret (unless ret is -1)
+// R0 will not be pushed/popd if ret is -2
+#define CALL(F, ret) call_c(dyn, ninst, F, x6, ret, 1, 0)
+// CALL_ will use x6 for the call address. Return value can be put in ret (unless ret is -1)
+// R0 will not be pushed/popd if ret is -2
+#define CALL_(F, ret, reg) call_c(dyn, ninst, F, x6, ret, 1, reg)
+// CALL_S will use x6 for the call address. Return value can be put in ret (unless ret is -1)
+// R0 will not be pushed/popd if ret is -2. Flags are not save/restored
+#define CALL_S(F, ret) call_c(dyn, ninst, F, x6, ret, 0, 0)
+
+#define MARK    dyn->insts[ninst].mark = dyn->native_size
+#define GETMARK dyn->insts[ninst].mark
+#define MARK2   dyn->insts[ninst].mark2 = dyn->native_size
+#define GETMARK2 dyn->insts[ninst].mark2
+#define MARK3   dyn->insts[ninst].mark3 = dyn->native_size
+#define GETMARK3 dyn->insts[ninst].mark3
+#define MARKF   dyn->insts[ninst].markf = dyn->native_size
+#define GETMARKF dyn->insts[ninst].markf
+#define MARKSEG dyn->insts[ninst].markseg = dyn->native_size
+#define GETMARKSEG dyn->insts[ninst].markseg
+#define MARKLOCK dyn->insts[ninst].marklock = dyn->native_size
+#define GETMARKLOCK dyn->insts[ninst].marklock
+
+// Branch to MARK if reg1==reg2 (use j64)
+#define BEQ_MARK(reg1, reg2)           \
+    j64 = GETMARK-(dyn->native_size);  \
+    BEQ(reg1, reg2, j64)
+// Branch to MARK if reg1!=reg2 (use j64)
+#define BNE_MARK(reg1, reg2)           \
+    j64 = GETMARK-(dyn->native_size);  \
+    BNE(reg1, reg2, j64)
+// Branch to NEXT if reg1==0 (use j64)
+#define CBZ_NEXT(reg1)                  \
+    j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; \
+    BEQ(reg1, xZR, j64)
 
 #define IFX(A)  if((dyn->insts[ninst].x64.gen_flags&(A)))
 #define IFX_PENDOR0  if((dyn->insts[ninst].x64.gen_flags&(X_PEND) || !dyn->insts[ninst].x64.gen_flags))
@@ -105,6 +140,37 @@
 #define IFX2X(A, B) if((dyn->insts[ninst].x64.gen_flags==(A) || dyn->insts[ninst].x64.gen_flags==(B) || dyn->insts[ninst].x64.gen_flags==((A)|(B))))
 #define IFXN(A, B)  if((dyn->insts[ninst].x64.gen_flags&(A) && !(dyn->insts[ninst].x64.gen_flags&(B))))
 
+#define STORE_REG(A)    SD(x##A, xEmu, offsetof(x64emu_t, regs[_##A]))
+#define LOAD_REG(A)     LD(x##A, xEmu, offsetof(x64emu_t, regs[_##A]))
+
+// Need to also store current value of some register, as they may be used by functions like setjump
+#define STORE_XEMU_CALL()   \
+    STORE_REG(RBX);         \
+    STORE_REG(RDX);         \
+    STORE_REG(RSP);         \
+    STORE_REG(RBP);         \
+    STORE_REG(RDI);         \
+    STORE_REG(RSI);         \
+    STORE_REG(R8);          \
+    STORE_REG(R9);          \
+    STORE_REG(R10);         \
+    STORE_REG(R11);         \
+
+#define LOAD_XEMU_CALL()    \
+
+#define LOAD_XEMU_REM()     \
+    LOAD_REG(RBX);          \
+    LOAD_REG(RDX);          \
+    LOAD_REG(RSP);          \
+    LOAD_REG(RBP);          \
+    LOAD_REG(RDI);          \
+    LOAD_REG(RSI);          \
+    LOAD_REG(R8);           \
+    LOAD_REG(R9);           \
+    LOAD_REG(R10);          \
+    LOAD_REG(R11);          \
+
+
 #define SET_DFNONE(S)    if(!dyn->f.dfnone) {MOV_U12(S, d_none); SD(S, xEmu, offsetof(x64emu_t, df)); dyn->f.dfnone=1;}
 #define SET_DF(S, N)     if((N)!=d_none) {MOV_U12(S, (N)); SD(S, xEmu, offsetof(x64emu_t, df)); dyn->f.dfnone=0;} else SET_DFNONE(S)
 #define SET_NODF()          dyn->f.dfnone = 0
@@ -185,7 +251,7 @@
 #else
 // put value in the Table64 even if not using it for now to avoid difference between Step2 and Step3. Needs to be optimized later...
 #define GETIP(A)                                        \
-    if(dyn->last_ip && ((A)-dyn->last_ip)<0x1000) {     \
+    if(dyn->last_ip && ((A)-dyn->last_ip)<2048) {       \
         uint64_t _delta_ip = (A)-dyn->last_ip;          \
         dyn->last_ip += _delta_ip;                      \
         if(_delta_ip) {                                 \
@@ -199,7 +265,7 @@
             TABLE64(xRIP, dyn->last_ip);                \
     }
 #define GETIP_(A)                                       \
-    if(dyn->last_ip && ((A)-dyn->last_ip)<0x1000) {     \
+    if(dyn->last_ip && ((A)-dyn->last_ip)<2048) {       \
         uint64_t _delta_ip = (A)-dyn->last_ip;          \
         if(_delta_ip) {ADDI(xRIP, xRIP, _delta_ip);}    \
     } else {                                            \
@@ -350,7 +416,6 @@ void* rv64_next(x64emu_t* emu, uintptr_t addr);
 #define sse_get_reg     STEPNAME(sse_get_reg)
 #define sse_get_reg_empty STEPNAME(sse_get_reg_empty)
 #define sse_forget_reg   STEPNAME(sse_forget_reg)
-#define sse_purge07cache STEPNAME(sse_purge07cache)
 
 #define fpu_pushcache   STEPNAME(fpu_pushcache)
 #define fpu_popcache    STEPNAME(fpu_popcache)
@@ -384,8 +449,8 @@ void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst);
 //void ret_to_epilog(dynarec_rv64_t* dyn, int ninst);
 //void retn_to_epilog(dynarec_rv64_t* dyn, int ninst, int n);
 //void iret_to_epilog(dynarec_rv64_t* dyn, int ninst, int is64bits);
-//void call_c(dynarec_rv64_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int save_reg);
-//void call_n(dynarec_rv64_t* dyn, int ninst, void* fnc, int w);
+void call_c(dynarec_rv64_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int save_reg);
+void call_n(dynarec_rv64_t* dyn, int ninst, void* fnc, int w);
 //void grab_segdata(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, int reg, int segment);
 void emit_cmp8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5, int s6);
 //void emit_cmp16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
diff --git a/src/dynarec/rv64/dynarec_rv64_pass3.h b/src/dynarec/rv64/dynarec_rv64_pass3.h
index bf2ec0c8..1d7eb6d5 100644
--- a/src/dynarec/rv64/dynarec_rv64_pass3.h
+++ b/src/dynarec/rv64/dynarec_rv64_pass3.h
@@ -47,4 +47,4 @@
         dynarec_log(LOG_NONE, "%s\n", (box64_dynarec_dump>1)?"\e[m":"");                       \
     }
 
-#define TABLE64(A, V)   {int val64offset = Table64(dyn, (V)); MESSAGE(LOG_DUMP, "  Table64: 0x%lx\n", (V)); AUIPC(A, (val64offset>>12)); LD(A, A, (val64offset&0b111111111111));}
+#define TABLE64(A, V)   {int val64offset = Table64(dyn, (V)); MESSAGE(LOG_DUMP, "  Table64: 0x%lx\n", (V)); AUIPC(A, SPLIT20(val64offset)); LD(A, A, SPLIT12(val64offset));}
diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h
index 7b8fdec2..ac403464 100644
--- a/src/dynarec/rv64/dynarec_rv64_private.h
+++ b/src/dynarec/rv64/dynarec_rv64_private.h
@@ -74,7 +74,12 @@ int Table64(dynarec_rv64_t *dyn, uint64_t val);  // add a value to etable64 (if
 
 void CreateJmpNext(void* addr, void* next);
 
-//TODO: GO_TRACE() !
-#define GO_TRACE()
+#define GO_TRACE()          \
+    GETIP_(ip);             \
+    MV(A1, xRIP);           \
+    STORE_XEMU_CALL();      \
+    MOV64x(A2, 1);          \
+    CALL(PrintTrace, -1);   \
+    LOAD_XEMU_CALL()
 
 #endif //__DYNAREC_RV64_PRIVATE_H_
\ No newline at end of file
diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h
index 6b33560e..cd8e594b 100644
--- a/src/dynarec/rv64/rv64_emitter.h
+++ b/src/dynarec/rv64/rv64_emitter.h
@@ -91,10 +91,20 @@ f28–31  ft8–11  FP temporaries                  Caller
 // RV64 args
 #define A0      10
 #define A1      11
+#define A2      12
+#define A3      13
+#define A4      14
+#define A5      15
+#define A6      16
+#define A7      17
 // xZR reg is 0
 #define xZR     0
 #define wZR     xZR
 
+// split a 32bits value in 20bits + 12bits, adjust the upper part is 12bits is negative
+#define SPLIT20(A)  (((A)+0x800)>>12)
+#define SPLIT12(A)  ((A)&0xfff)
+
 // MOVE64x is quite complex, so use a function for this
 #define MOV64x(A, B)    rv64_move64(dyn, ninst, A, B)
 
@@ -104,7 +114,7 @@ f28–31  ft8–11  FP temporaries                  Caller
 #define R_type(funct7, rs2, rs1, funct3, rd, opcode)    ((funct7)<<25 | (rs2)<<20 | (rs1)<<15 | (funct3)<<12 | (rd)<<7 | (opcode))
 #define I_type(imm12, rs1, funct3, rd, opcode)    ((imm12)<<20 | (rs1)<<15 | (funct3)<<12 | (rd)<<7 | (opcode))
 #define S_type(imm12, rs2, rs1, funct3, opcode)    (((imm12)>>5)<<25 | (rs2)<<20 | (rs1)<<15 | (funct3)<<12 | ((imm12)&31)<<7 | (opcode))
-#define B_type(imm13, rs2, rs1, funct3, opcode)      ((((imm13)>>12)&1)<<31 | (((imm13)>>5)&63)<<25 | (rs2)<<20 | (rs1)<<15 | (funct3)<<13 | (((imm13)>>1)&15)<<8 | (((imm13)>>11)&1)<<7 | (opcode))
+#define B_type(imm13, rs2, rs1, funct3, opcode)      ((((imm13)>>12)&1)<<31 | (((imm13)>>5)&63)<<25 | (rs2)<<20 | (rs1)<<15 | (funct3)<<12 | (((imm13)>>1)&15)<<8 | (((imm13)>>11)&1)<<7 | (opcode))
 #define U_type(imm32, rd, opcode)   (((imm32)>>12)<<12 | (rd)<<7 | (opcode))
 #define J_type(imm21, rd, opcode)    ((((imm21)>>20)&1)<<31 | (((imm21)>>1)&0b1111111111)<<21 | (((imm21)>>11)&1)<<20 | (((imm21)>>12)&0b11111111)<<12 | (rd)<<7 | (opcode))
 
@@ -112,7 +122,7 @@ f28–31  ft8–11  FP temporaries                  Caller
 // put imm20 in the [31:12] bits of rd, zero [11:0] and sign extend bits31
 #define LUI(rd, imm20)                 EMIT(U_type((imm20)<<12, rd, 0b0110111))
 // put PC+imm20 in rd
-#define AUIPC(rd, imm20)               EMIT(U_type((imm20)>>12, rd, 0b0010111))
+#define AUIPC(rd, imm20)               EMIT(U_type((imm20)<<12, rd, 0b0010111))
 
 #define JAL_gen(rd, imm21)             J_type(imm21, rd, 0b1101111)
 // Unconditionnal branch, no return address set
@@ -217,6 +227,9 @@ f28–31  ft8–11  FP temporaries                  Caller
 // 4-bytes[rs1+imm12] = rs2
 #define SW(rs2, rs1, imm12)         EMIT(S_type(imm12, rs2, rs1, 0b010, 0b0100011))
 
+#define PUSH1(reg)                  do {SD(reg, xRSP, -8); SUBI(xRSP, xRSP, 8);} while(0)
+#define POP1(reg)                   do {LD(reg, xRSP, 0); ADDI(xRSP, xRSP, 8);}while(0)
+
 #define FENCE_gen(pred, succ)       (((pred)<<24) | ((succ)<<20) | 0b0001111)
 #define FENCE()                     EMIT(FENCE_gen(3, 3))