about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--src/dynarec/rv64/dynarec_rv64_df.c111
-rw-r--r--src/dynarec/rv64/dynarec_rv64_functions.c109
-rw-r--r--src/dynarec/rv64/dynarec_rv64_functions.h6
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.c103
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h7
-rw-r--r--src/dynarec/rv64/dynarec_rv64_private.h15
6 files changed, 244 insertions, 107 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_df.c b/src/dynarec/rv64/dynarec_rv64_df.c
index a2f66677..704e1302 100644
--- a/src/dynarec/rv64/dynarec_rv64_df.c
+++ b/src/dynarec/rv64/dynarec_rv64_df.c
@@ -172,25 +172,30 @@ uintptr_t dynarec64_DF(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     break;
                 case 5:
                     INST_NAME("FILD ST0, i64");
-                    X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D);
+                    X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_I64);
                     addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0);
-                    LD(x1, wback, fixedaddress);
-                    if (rex.is32bits) {
-                        // need to also feed the STll stuff...
-                        ADDI(x4, xEmu, offsetof(x64emu_t, fpu_ll));
-                        LWU(x5, xEmu, offsetof(x64emu_t, top));
-                        int a = 0 - dyn->e.x87stack;
-                        if(a) {
-                            ADDIW(x5, x5, a);
-                            ANDI(x5, x5, 0x7);
+
+                    if (ST_IS_I64(0)) {
+                        FLD(v1, wback, fixedaddress);
+                    } else {
+                        LD(x1, wback, fixedaddress);
+                        if (rex.is32bits) {
+                            // need to also feed the STll stuff...
+                            ADDI(x4, xEmu, offsetof(x64emu_t, fpu_ll));
+                            LWU(x5, xEmu, offsetof(x64emu_t, top));
+                            int a = 0 - dyn->e.x87stack;
+                            if (a) {
+                                ADDIW(x5, x5, a);
+                                ANDI(x5, x5, 0x7);
+                            }
+                            SLLI(x5, x5, 4); // fpu_ll is 2 i64
+                            ADD(x5, x5, x4);
+                            SD(x1, x5, 8); // ll
+                        }
+                        FCVTDL(v1, x1, RD_RTZ);
+                        if (rex.is32bits) {
+                            FSD(v1, x5, 0); // ref
                         }
-                        SLLI(x5, x5, 4); // fpu_ll is 2 i64
-                        ADD(x5, x5, x4);
-                        SD(x1, x5, 8);   // ll
-                    }
-                    FCVTDL(v1, x1, RD_RTZ);
-                    if(rex.is32bits) {
-                        FSD(v1, x5, 0);  // ref
                     }
                     break;
                 case 6:
@@ -203,44 +208,50 @@ uintptr_t dynarec64_DF(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     break;
                 case 7:
                     INST_NAME("FISTP i64, ST0");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
-                    u8 = x87_setround(dyn, ninst, x1, x2);
+                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_I64);
+                    if (!ST_IS_I64(0)) {
+                        u8 = x87_setround(dyn, ninst, x1, x2);
+                    }
                     addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0);
 
-                    if(rex.is32bits) {
-                        // need to check STll first...
-                        ADDI(x4, xEmu, offsetof(x64emu_t, fpu_ll));
-                        LWU(x5, xEmu, offsetof(x64emu_t, top));
-                        int a = 0 - dyn->e.x87stack;
-                        if(a) {
-                            ADDIW(x5, x5, a);
-                            ANDI(x5, x5, 0x7);
+                    if (ST_IS_I64(0)) {
+                        FSD(v1, wback, fixedaddress);
+                    } else {
+                        if (rex.is32bits) {
+                            // need to check STll first...
+                            ADDI(x4, xEmu, offsetof(x64emu_t, fpu_ll));
+                            LWU(x5, xEmu, offsetof(x64emu_t, top));
+                            int a = 0 - dyn->e.x87stack;
+                            if (a) {
+                                ADDIW(x5, x5, a);
+                                ANDI(x5, x5, 0x7);
+                            }
+                            SLLI(x5, x5, 4); // fpu_ll is 2 i64
+                            ADD(x5, x5, x4);
+                            FMVXD(x3, v1);
+                            LD(x6, x5, 0); // ref
+                            BNE_MARK(x6, x3);
+                            LD(x6, x5, 8); // ll
+                            SD(x6, wback, fixedaddress);
+                            B_MARK3_nocond;
+                            MARK;
                         }
-                        SLLI(x5, x5, 4); // fpu_ll is 2 i64
-                        ADD(x5, x5, x4);
-                        FMVXD(x3, v1);
-                        LD(x6, x5, 0);  // ref
-                        BNE_MARK(x6, x3);
-                        LD(x6, x5, 8);  // ll
-                        SD(x6, wback, fixedaddress);
-                        B_MARK3_nocond;
-                        MARK;
-                    }
 
-                    if(!box64_dynarec_fastround) {
-                        FSFLAGSI(0); // reset all bits
-                    }
-                    FCVTLD(x4, v1, RD_DYN);
-                    if(!box64_dynarec_fastround) {
-                        FRFLAGS(x5);   // get back FPSR to check the IOC bit
-                        ANDI(x5, x5, 1<<FR_NV);
-                        BEQ_MARK2(x5, xZR);
-                        MOV64x(x4, 0x8000000000000000LL);
+                        if (!box64_dynarec_fastround) {
+                            FSFLAGSI(0); // reset all bits
+                        }
+                        FCVTLD(x4, v1, RD_DYN);
+                        if (!box64_dynarec_fastround) {
+                            FRFLAGS(x5); // get back FPSR to check the IOC bit
+                            ANDI(x5, x5, 1 << FR_NV);
+                            BEQ_MARK2(x5, xZR);
+                            MOV64x(x4, 0x8000000000000000LL);
+                        }
+                        MARK2;
+                        SD(x4, wback, fixedaddress);
+                        MARK3;
+                        x87_restoreround(dyn, ninst, u8);
                     }
-                    MARK2;
-                    SD(x4, wback, fixedaddress);
-                    MARK3;
-                    x87_restoreround(dyn, ninst, u8);
                     X87_POP_OR_FAIL(dyn, ninst, x3);
                     break;
                 default:
diff --git a/src/dynarec/rv64/dynarec_rv64_functions.c b/src/dynarec/rv64/dynarec_rv64_functions.c
index f01c638b..b9995f7d 100644
--- a/src/dynarec/rv64/dynarec_rv64_functions.c
+++ b/src/dynarec/rv64/dynarec_rv64_functions.c
@@ -58,7 +58,7 @@ void fpu_free_reg(dynarec_rv64_t* dyn, int reg)
     int idx = EXTIDX(reg);
     // TODO: check upper limit?
     dyn->e.fpuused[idx] = 0;
-    if(dyn->e.extcache[idx].t!=EXT_CACHE_ST_F && dyn->e.extcache[idx].t!=EXT_CACHE_ST_D)
+    if (dyn->e.extcache[idx].t != EXT_CACHE_ST_F && dyn->e.extcache[idx].t != EXT_CACHE_ST_D && dyn->e.extcache[idx].t != EXT_CACHE_ST_I64)
         dyn->e.extcache[idx].v = 0;
 }
 // Get an MMX double reg
@@ -90,18 +90,28 @@ void fpu_reset_reg(dynarec_rv64_t* dyn)
     }
 }
 
+int extcache_no_i64(dynarec_rv64_t* dyn, int ninst, int st, int a)
+{
+    if (a == EXT_CACHE_ST_I64) {
+        extcache_promote_double(dyn, ninst, st);
+        return EXT_CACHE_ST_D;
+    }
+    return a;
+}
+
 int extcache_get_st(dynarec_rv64_t* dyn, int ninst, int a)
 {
     if (dyn->insts[ninst].e.swapped) {
-        if(dyn->insts[ninst].e.combined1 == a)
+        if (dyn->insts[ninst].e.combined1 == a)
             a = dyn->insts[ninst].e.combined2;
-        else if(dyn->insts[ninst].e.combined2 == a)
+        else if (dyn->insts[ninst].e.combined2 == a)
             a = dyn->insts[ninst].e.combined1;
     }
-    for(int i=0; i<24; ++i)
-        if((dyn->insts[ninst].e.extcache[i].t==EXT_CACHE_ST_F
-         || dyn->insts[ninst].e.extcache[i].t==EXT_CACHE_ST_D)
-         && dyn->insts[ninst].e.extcache[i].n==a)
+    for (int i = 0; i < 24; ++i)
+        if ((dyn->insts[ninst].e.extcache[i].t == EXT_CACHE_ST_F
+                || dyn->insts[ninst].e.extcache[i].t == EXT_CACHE_ST_D
+                || dyn->insts[ninst].e.extcache[i].t == EXT_CACHE_ST_I64)
+            && dyn->insts[ninst].e.extcache[i].n == a)
             return dyn->insts[ninst].e.extcache[i].t;
     // not in the cache yet, so will be fetched...
     return EXT_CACHE_ST_D;
@@ -110,12 +120,13 @@ int extcache_get_st(dynarec_rv64_t* dyn, int ninst, int a)
 int extcache_get_current_st(dynarec_rv64_t* dyn, int ninst, int a)
 {
     (void)ninst;
-    if(!dyn->insts)
+    if (!dyn->insts)
         return EXT_CACHE_ST_D;
-    for(int i=0; i<24; ++i)
-        if((dyn->e.extcache[i].t==EXT_CACHE_ST_F
-         || dyn->e.extcache[i].t==EXT_CACHE_ST_D)
-         && dyn->e.extcache[i].n==a)
+    for (int i = 0; i < 24; ++i)
+        if ((dyn->e.extcache[i].t == EXT_CACHE_ST_F
+                || dyn->e.extcache[i].t == EXT_CACHE_ST_D
+                || dyn->e.extcache[i].t == EXT_CACHE_ST_I64)
+            && dyn->e.extcache[i].n == a)
             return dyn->e.extcache[i].t;
     // not in the cache yet, so will be fetched...
     return EXT_CACHE_ST_D;
@@ -129,6 +140,16 @@ int extcache_get_st_f(dynarec_rv64_t* dyn, int ninst, int a)
             return i;
     return -1;
 }
+
+int extcache_get_st_f_i64(dynarec_rv64_t* dyn, int ninst, int a)
+{
+    for (int i = 0; i < 24; ++i)
+        if ((dyn->insts[ninst].e.extcache[i].t == EXT_CACHE_ST_I64 || dyn->insts[ninst].e.extcache[i].t == EXT_CACHE_ST_F)
+            && dyn->insts[ninst].e.extcache[i].n == a)
+            return i;
+    return -1;
+}
+
 int extcache_get_st_f_noback(dynarec_rv64_t* dyn, int ninst, int a)
 {
     for(int i=0; i<24; ++i)
@@ -137,6 +158,16 @@ int extcache_get_st_f_noback(dynarec_rv64_t* dyn, int ninst, int a)
             return i;
     return -1;
 }
+
+int extcache_get_st_f_i64_noback(dynarec_rv64_t* dyn, int ninst, int a)
+{
+    for (int i = 0; i < 24; ++i)
+        if ((dyn->insts[ninst].e.extcache[i].t == EXT_CACHE_ST_I64 || dyn->insts[ninst].e.extcache[i].t == EXT_CACHE_ST_F)
+            && dyn->insts[ninst].e.extcache[i].n == a)
+            return i;
+    return -1;
+}
+
 int extcache_get_current_st_f(dynarec_rv64_t* dyn, int a)
 {
     for(int i=0; i<24; ++i)
@@ -146,6 +177,15 @@ int extcache_get_current_st_f(dynarec_rv64_t* dyn, int a)
     return -1;
 }
 
+int extcache_get_current_st_f_i64(dynarec_rv64_t* dyn, int a)
+{
+    for (int i = 0; i < 24; ++i)
+        if ((dyn->e.extcache[i].t == EXT_CACHE_ST_I64 || dyn->e.extcache[i].t == EXT_CACHE_ST_F)
+            && dyn->e.extcache[i].n == a)
+            return i;
+    return -1;
+}
+
 static void extcache_promote_double_forward(dynarec_rv64_t* dyn, int ninst, int maxinst, int a);
 static void extcache_promote_double_internal(dynarec_rv64_t* dyn, int ninst, int maxinst, int a);
 static void extcache_promote_double_combined(dynarec_rv64_t* dyn, int ninst, int maxinst, int a)
@@ -155,7 +195,7 @@ static void extcache_promote_double_combined(dynarec_rv64_t* dyn, int ninst, int
             a = dyn->insts[ninst].e.combined2;
         } else
             a = dyn->insts[ninst].e.combined1;
-        int i = extcache_get_st_f_noback(dyn, ninst, a);
+        int i = extcache_get_st_f_i64_noback(dyn, ninst, a);
         //if(box64_dynarec_dump) dynarec_log(LOG_NONE, "extcache_promote_double_combined, ninst=%d combined%c %d i=%d (stack:%d/%d)\n", ninst, (a == dyn->insts[ninst].e.combined2)?'2':'1', a ,i, dyn->insts[ninst].e.stack_push, -dyn->insts[ninst].e.stack_pop);
         if(i>=0) {
             dyn->insts[ninst].e.extcache[i].t = EXT_CACHE_ST_D;
@@ -174,7 +214,7 @@ static void extcache_promote_double_internal(dynarec_rv64_t* dyn, int ninst, int
         return;
     while(ninst>=0) {
         a+=dyn->insts[ninst].e.stack_pop;    // adjust Stack depth: add pop'd ST (going backward)
-        int i = extcache_get_st_f(dyn, ninst, a);
+        int i = extcache_get_st_f_i64(dyn, ninst, a);
         //if(box64_dynarec_dump) dynarec_log(LOG_NONE, "extcache_promote_double_internal, ninst=%d, a=%d st=%d:%d, i=%d\n", ninst, a, dyn->insts[ninst].e.stack, dyn->insts[ninst].e.stack_next, i);
         if(i<0) return;
         dyn->insts[ninst].e.extcache[i].t = EXT_CACHE_ST_D;
@@ -209,7 +249,7 @@ static void extcache_promote_double_forward(dynarec_rv64_t* dyn, int ninst, int
             else if (a==dyn->insts[ninst].e.combined2)
                 a = dyn->insts[ninst].e.combined1;
         }
-        int i = extcache_get_st_f_noback(dyn, ninst, a);
+        int i = extcache_get_st_f_i64_noback(dyn, ninst, a);
         //if(box64_dynarec_dump) dynarec_log(LOG_NONE, "extcache_promote_double_forward, ninst=%d, a=%d st=%d:%d(%d/%d), i=%d\n", ninst, a, dyn->insts[ninst].e.stack, dyn->insts[ninst].e.stack_next, dyn->insts[ninst].e.stack_push, -dyn->insts[ninst].e.stack_pop, i);
         if(i<0) return;
         dyn->insts[ninst].e.extcache[i].t = EXT_CACHE_ST_D;
@@ -230,7 +270,7 @@ static void extcache_promote_double_forward(dynarec_rv64_t* dyn, int ninst, int
 
 void extcache_promote_double(dynarec_rv64_t* dyn, int ninst, int a)
 {
-    int i = extcache_get_current_st_f(dyn, a);
+    int i = extcache_get_current_st_f_i64(dyn, a);
     //if(box64_dynarec_dump) dynarec_log(LOG_NONE, "extcache_promote_double, ninst=%d a=%d st=%d i=%d\n", ninst, a, dyn->e.stack, i);
     if(i<0) return;
     dyn->e.extcache[i].t = EXT_CACHE_ST_D;
@@ -266,19 +306,21 @@ int extcache_combine_st(dynarec_rv64_t* dyn, int ninst, int a, int b)
     return EXT_CACHE_ST_D;
 }
 
-static int isCacheEmpty(dynarec_native_t* dyn, int ninst) {
-    if(dyn->insts[ninst].e.stack_next) {
+static int isCacheEmpty(dynarec_native_t* dyn, int ninst)
+{
+    if (dyn->insts[ninst].e.stack_next) {
         return 0;
     }
-    for(int i=0; i<24; ++i)
-        if(dyn->insts[ninst].e.extcache[i].v) {       // there is something at ninst for i
-            if(!(
-            (dyn->insts[ninst].e.extcache[i].t==EXT_CACHE_ST_F || dyn->insts[ninst].e.extcache[i].t==EXT_CACHE_ST_D)
-            && dyn->insts[ninst].e.extcache[i].n<dyn->insts[ninst].e.stack_pop))
+    for (int i = 0; i < 24; ++i)
+        if (dyn->insts[ninst].e.extcache[i].v) { // there is something at ninst for i
+            if (!(
+                    (dyn->insts[ninst].e.extcache[i].t == EXT_CACHE_ST_F
+                        || dyn->insts[ninst].e.extcache[i].t == EXT_CACHE_ST_D
+                        || dyn->insts[ninst].e.extcache[i].t == EXT_CACHE_ST_I64)
+                    && dyn->insts[ninst].e.extcache[i].n < dyn->insts[ninst].e.stack_pop))
                 return 0;
         }
     return 1;
-
 }
 
 int fpuCacheNeedsTransform(dynarec_rv64_t* dyn, int ninst) {
@@ -295,9 +337,11 @@ int fpuCacheNeedsTransform(dynarec_rv64_t* dyn, int ninst) {
         }
         for(int i=0; i<24 && !ret; ++i)
             if(dyn->insts[ninst].e.extcache[i].v) {       // there is something at ninst for i
-                if(!(
-                (dyn->insts[ninst].e.extcache[i].t==EXT_CACHE_ST_F || dyn->insts[ninst].e.extcache[i].t==EXT_CACHE_ST_D)
-                && dyn->insts[ninst].e.extcache[i].n<dyn->insts[ninst].e.stack_pop))
+                if (!(
+                        (dyn->insts[ninst].e.extcache[i].t == EXT_CACHE_ST_F
+                            || dyn->insts[ninst].e.extcache[i].t == EXT_CACHE_ST_D
+                            || dyn->insts[ninst].e.extcache[i].t == EXT_CACHE_ST_I64)
+                        && dyn->insts[ninst].e.extcache[i].n < dyn->insts[ninst].e.stack_pop))
                     ret = 1;
             }
         return ret;
@@ -329,7 +373,9 @@ void extcacheUnwind(extcache_t* cache)
         int a = -1;
         int b = -1;
         for(int j=0; j<24 && ((a==-1) || (b==-1)); ++j)
-            if((cache->extcache[j].t == EXT_CACHE_ST_D || cache->extcache[j].t == EXT_CACHE_ST_F)) {
+            if ((cache->extcache[j].t == EXT_CACHE_ST_D
+                    || cache->extcache[j].t == EXT_CACHE_ST_F
+                    || cache->extcache[j].t == EXT_CACHE_ST_I64)) {
                 if(cache->extcache[j].n == cache->combined1)
                     a = j;
                 else if(cache->extcache[j].n == cache->combined2)
@@ -362,7 +408,9 @@ void extcacheUnwind(extcache_t* cache)
     if(cache->stack_push) {
         // unpush
         for(int j=0; j<24; ++j) {
-            if((cache->extcache[j].t == EXT_CACHE_ST_D || cache->extcache[j].t == EXT_CACHE_ST_F)) {
+            if ((cache->extcache[j].t == EXT_CACHE_ST_D
+                    || cache->extcache[j].t == EXT_CACHE_ST_F
+                    || cache->extcache[j].t == EXT_CACHE_ST_I64)) {
                 if(cache->extcache[j].n<cache->stack_push)
                     cache->extcache[j].v = 0;
                 else
@@ -411,6 +459,7 @@ void extcacheUnwind(extcache_t* cache)
                     break;
                 case EXT_CACHE_ST_F:
                 case EXT_CACHE_ST_D:
+                case EXT_CACHE_ST_I64:
                     cache->x87cache[x87reg] = cache->extcache[i].n;
                     cache->x87reg[x87reg] = EXTREG(i);
                     ++x87reg;
@@ -482,6 +531,7 @@ const char* getCacheName(int t, int n)
     switch(t) {
         case EXT_CACHE_ST_D: sprintf(buff, "ST%d", n); break;
         case EXT_CACHE_ST_F: sprintf(buff, "st%d", n); break;
+        case EXT_CACHE_ST_I64: sprintf(buff, "STi%d", n); break;
         case EXT_CACHE_MM: sprintf(buff, "MM%d", n); break;
         case EXT_CACHE_SS: sprintf(buff, "SS%d", n); break;
         case EXT_CACHE_SD: sprintf(buff, "SD%d", n); break;
@@ -533,6 +583,7 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r
             switch(dyn->insts[ninst].e.extcache[ii].t) {
                 case EXT_CACHE_ST_D: dynarec_log(LOG_NONE, " %s:%s", fnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break;
                 case EXT_CACHE_ST_F: dynarec_log(LOG_NONE, " %s:%s", fnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break;
+                case EXT_CACHE_ST_I64: dynarec_log(LOG_NONE, " %s:%s", fnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break;
                 case EXT_CACHE_MM: dynarec_log(LOG_NONE, " %s:%s", fnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break;
                 case EXT_CACHE_SS: dynarec_log(LOG_NONE, " %s:%s", fnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break;
                 case EXT_CACHE_SD: dynarec_log(LOG_NONE, " %s:%s", fnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break;
diff --git a/src/dynarec/rv64/dynarec_rv64_functions.h b/src/dynarec/rv64/dynarec_rv64_functions.h
index 451336bd..b265f694 100644
--- a/src/dynarec/rv64/dynarec_rv64_functions.h
+++ b/src/dynarec/rv64/dynarec_rv64_functions.h
@@ -28,14 +28,20 @@ void fpu_reset_reg(dynarec_rv64_t* dyn);
 int extcache_get_st(dynarec_rv64_t* dyn, int ninst, int a);
 // Get if STx is FLOAT or DOUBLE
 int extcache_get_st_f(dynarec_rv64_t* dyn, int ninst, int a);
+// Get if STx is FLOAT or I64
+int extcache_get_st_f_i64(dynarec_rv64_t* dyn, int ninst, int a);
 // Get actual type for STx
 int extcache_get_current_st(dynarec_rv64_t* dyn, int ninst, int a);
 // Get actual STx is FLOAT or DOUBLE
 int extcache_get_current_st_f(dynarec_rv64_t* dyn, int a);
+// Get actual STx is FLOAT or I64
+int extcache_get_current_st_f_i64(dynarec_rv64_t* dyn, int a);
 // Back-propagate a change float->double
 void extcache_promote_double(dynarec_rv64_t* dyn, int ninst, int a);
 // Combine and propagate if needed (pass 1 only)
 int extcache_combine_st(dynarec_rv64_t* dyn, int ninst, int a, int b);  // with stack current dyn->n_stack*
+// Do not allow i64 type
+int extcache_no_i64(dynarec_rv64_t* dyn, int ninst, int st, int a);
 
 // FPU Cache transformation (for loops) // Specific, need to be written par backend
 int fpuCacheNeedsTransform(dynarec_rv64_t* dyn, int ninst);
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c
index d175fabb..19b4c316 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.c
+++ b/src/dynarec/rv64/dynarec_rv64_helper.c
@@ -825,7 +825,9 @@ static void x87_reset(dynarec_rv64_t* dyn)
     dyn->e.swapped = 0;
     dyn->e.barrier = 0;
     for(int i=0; i<24; ++i)
-        if(dyn->e.extcache[i].t == EXT_CACHE_ST_F || dyn->e.extcache[i].t == EXT_CACHE_ST_D)
+        if (dyn->e.extcache[i].t == EXT_CACHE_ST_F
+            || dyn->e.extcache[i].t == EXT_CACHE_ST_D
+            || dyn->e.extcache[i].t == EXT_CACHE_ST_I64)
             dyn->e.extcache[i].v = 0;
 }
 
@@ -878,7 +880,9 @@ int x87_do_push(dynarec_rv64_t* dyn, int ninst, int s1, int t)
     dyn->e.stack_push+=1;
     // move all regs in cache, and find a free one
     for(int j=0; j<24; ++j)
-        if((dyn->e.extcache[j].t == EXT_CACHE_ST_D) || (dyn->e.extcache[j].t == EXT_CACHE_ST_F))
+        if ((dyn->e.extcache[j].t == EXT_CACHE_ST_D)
+            || (dyn->e.extcache[j].t == EXT_CACHE_ST_F)
+            || (dyn->e.extcache[j].t == EXT_CACHE_ST_I64))
             ++dyn->e.extcache[j].n;
     int ret = -1;
     for(int i=0; i<8; ++i)
@@ -901,7 +905,9 @@ void x87_do_push_empty(dynarec_rv64_t* dyn, int ninst, int s1)
     dyn->e.stack_push+=1;
     // move all regs in cache
     for(int j=0; j<24; ++j)
-        if((dyn->e.extcache[j].t == EXT_CACHE_ST_D) || (dyn->e.extcache[j].t == EXT_CACHE_ST_F))
+        if ((dyn->e.extcache[j].t == EXT_CACHE_ST_D)
+            || (dyn->e.extcache[j].t == EXT_CACHE_ST_F)
+            || (dyn->e.extcache[j].t == EXT_CACHE_ST_I64))
             ++dyn->e.extcache[j].n;
     for(int i=0; i<8; ++i)
         if(dyn->e.x87cache[i]!=-1)
@@ -1080,9 +1086,13 @@ int x87_get_current_cache(dynarec_rv64_t* dyn, int ninst, int st, int t)
     for (int i=0; i<8; ++i) {
         if(dyn->e.x87cache[i]==st) {
             #if STEP == 1
-            if(t==EXT_CACHE_ST_D && (dyn->e.extcache[EXTIDX(dyn->e.x87reg[i])].t==EXT_CACHE_ST_F))
+            if (t == EXT_CACHE_ST_D && (dyn->e.extcache[EXTIDX(dyn->e.x87reg[i])].t == EXT_CACHE_ST_F || dyn->e.extcache[EXTIDX(dyn->e.x87reg[i])].t == EXT_CACHE_ST_I64))
                 extcache_promote_double(dyn, ninst, st);
-            #endif
+            else if (t == EXT_CACHE_ST_I64 && (dyn->e.extcache[EXTIDX(dyn->e.x87reg[i])].t == EXT_CACHE_ST_F))
+                extcache_promote_double(dyn, ninst, st);
+            else if (t == EXT_CACHE_ST_F && (dyn->e.extcache[EXTIDX(dyn->e.x87reg[i])].t == EXT_CACHE_ST_I64))
+                extcache_promote_double(dyn, ninst, st);
+#endif
             return i;
         }
         assert(dyn->e.x87cache[i]<8);
@@ -1122,8 +1132,10 @@ int x87_get_cache(dynarec_rv64_t* dyn, int ninst, int populate, int s1, int s2,
 int x87_get_extcache(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st)
 {
     for(int ii=0; ii<24; ++ii)
-        if((dyn->e.extcache[ii].t == EXT_CACHE_ST_F || dyn->e.extcache[ii].t == EXT_CACHE_ST_D)
-         && dyn->e.extcache[ii].n==st)
+        if ((dyn->e.extcache[ii].t == EXT_CACHE_ST_F
+                || dyn->e.extcache[ii].t == EXT_CACHE_ST_D
+                || dyn->e.extcache[ii].t == EXT_CACHE_ST_I64)
+            && dyn->e.extcache[ii].n == st)
             return ii;
     assert(0);
     return -1;
@@ -1161,6 +1173,10 @@ void x87_refresh(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st)
     if (dyn->e.extcache[EXTIDX(reg)].t == EXT_CACHE_ST_F) {
         FCVTDS(SCRATCH0, reg);
         FSD(SCRATCH0, s1, offsetof(x64emu_t, x87));
+    } else if (dyn->e.extcache[EXTIDX(reg)].t == EXT_CACHE_ST_I64) {
+        FMVXD(s2, reg);
+        FCVTDL(SCRATCH0, s2, RD_RTZ);
+        FSD(SCRATCH0, s1, offsetof(x64emu_t, x87));
     } else {
         FSD(reg, s1, offsetof(x64emu_t, x87));
     }
@@ -1179,7 +1195,8 @@ void x87_forget(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st)
     MESSAGE(LOG_DUMP, "\tForget x87 Cache for ST%d\n", st);
     const int reg = dyn->e.x87reg[ret];
     #if STEP == 1
-    if (dyn->e.extcache[EXTIDX(reg)].t == EXT_CACHE_ST_F)
+    if (dyn->e.extcache[EXTIDX(dyn->e.x87reg[ret])].t == EXT_CACHE_ST_F
+        || dyn->e.extcache[EXTIDX(dyn->e.x87reg[ret])].t == EXT_CACHE_ST_I64)
         extcache_promote_double(dyn, ninst, st);
     #endif
     // prepare offset to fpu => s1
@@ -1195,6 +1212,10 @@ void x87_forget(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st)
     if (dyn->e.extcache[EXTIDX(reg)].t == EXT_CACHE_ST_F) {
         FCVTDS(SCRATCH0, reg);
         FSD(SCRATCH0, s1, offsetof(x64emu_t, x87));
+    } else if (dyn->e.extcache[EXTIDX(reg)].t == EXT_CACHE_ST_I64) {
+        FMVXD(s2, reg);
+        FCVTDL(SCRATCH0, s2, RD_RTZ);
+        FSD(SCRATCH0, s1, offsetof(x64emu_t, x87));
     } else {
         FSD(reg, s1, offsetof(x64emu_t, x87));
     }
@@ -1216,7 +1237,8 @@ void x87_reget_st(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st)
             // refresh the value
             MESSAGE(LOG_DUMP, "\tRefresh x87 Cache for ST%d\n", st);
             #if STEP == 1
-            if(dyn->e.extcache[EXTIDX(dyn->e.x87reg[i])].t==EXT_CACHE_ST_F)
+            if (dyn->e.extcache[EXTIDX(dyn->e.x87reg[i])].t == EXT_CACHE_ST_F
+                || dyn->e.extcache[EXTIDX(dyn->e.x87reg[i])].t == EXT_CACHE_ST_I64)
                 extcache_promote_double(dyn, ninst, st);
             #endif
             LW(s2, xEmu, offsetof(x64emu_t, top));
@@ -1634,18 +1656,29 @@ void fpu_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1, int s2, in
 static int findCacheSlot(dynarec_rv64_t* dyn, int ninst, int t, int n, extcache_t* cache)
 {
     ext_cache_t f;
-    f.n = n; f.t = t;
-    for(int i=0; i<24; ++i) {
-        if(cache->extcache[i].v == f.v)
+    f.n = n;
+    f.t = t;
+    for (int i = 0; i < 24; ++i) {
+        if (cache->extcache[i].v == f.v)
             return i;
-        if(cache->extcache[i].n == n) {
-            switch(cache->extcache[i].t) {
+        if (cache->extcache[i].n == n) {
+            switch (cache->extcache[i].t) {
                 case EXT_CACHE_ST_F:
-                    if (t==EXT_CACHE_ST_D)
+                    if (t == EXT_CACHE_ST_D)
+                        return i;
+                    if (t == EXT_CACHE_ST_I64)
                         return i;
                     break;
                 case EXT_CACHE_ST_D:
-                    if (t==EXT_CACHE_ST_F)
+                    if (t == EXT_CACHE_ST_F)
+                        return i;
+                    if (t == EXT_CACHE_ST_I64)
+                        return i;
+                    break;
+                case EXT_CACHE_ST_I64:
+                    if (t == EXT_CACHE_ST_F)
+                        return i;
+                    if (t == EXT_CACHE_ST_D)
                         return i;
                     break;
             }
@@ -1703,7 +1736,7 @@ static void swapCache(dynarec_rv64_t* dyn, int ninst, int i, int j, extcache_t *
     cache->extcache[j].v = tmp.v;
 }
 
-static void loadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, int s2, int s3, int* s1_val, int* s2_val, int* s3_top, extcache_t *cache, int i, int t, int n)
+static void loadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, int s2, int s3, int* s1_val, int* s2_val, int* s3_top, extcache_t* cache, int i, int t, int n)
 {
     int reg = EXTREG(i);
     if(cache->extcache[i].v) {
@@ -1738,6 +1771,7 @@ static void loadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, int
             break;
         case EXT_CACHE_ST_D:
         case EXT_CACHE_ST_F:
+        case EXT_CACHE_ST_I64:
             MESSAGE(LOG_DUMP, "\t  - Loading %s\n", getCacheName(t, n));
             if((*s3_top) == 0xffff) {
                 LW(s3, xEmu, offsetof(x64emu_t, top));
@@ -1755,6 +1789,10 @@ static void loadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, int
             if(t==EXT_CACHE_ST_F) {
                 FCVTSD(reg, reg);
             }
+            if (t == EXT_CACHE_ST_I64) {
+                FCVTLD(s1, reg, RD_RTZ);
+                FMVDX(reg, s1);
+            }
             break;
         case EXT_CACHE_NONE:
         case EXT_CACHE_SCR:
@@ -1784,6 +1822,7 @@ static void unloadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, i
             break;
         case EXT_CACHE_ST_D:
         case EXT_CACHE_ST_F:
+        case EXT_CACHE_ST_I64:
             MESSAGE(LOG_DUMP, "\t  - Unloading %s\n", getCacheName(t, n));
             if((*s3_top)==0xffff) {
                 LW(s3, xEmu, offsetof(x64emu_t, top));
@@ -1797,9 +1836,13 @@ static void unloadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, i
             *s3_top += a;
             if(rv64_zba) SH3ADD(s2, s3, xEmu); else {SLLI(s2, s3, 3); ADD(s2, xEmu, s2);}
             *s2_val = 0;
-            if(t==EXT_CACHE_ST_F) {
+            if (t == EXT_CACHE_ST_F) {
                 FCVTDS(reg, reg);
             }
+            if (t == EXT_CACHE_ST_I64) {
+                FMVXD(s1, reg);
+                FCVTDL(reg, s1, RD_RTZ);
+            }
             FSD(reg, s2, offsetof(x64emu_t, x87));
             break;
         case EXT_CACHE_NONE:
@@ -1936,6 +1979,26 @@ static void fpuCacheTransform(dynarec_rv64_t* dyn, int ninst, int s1, int s2, in
                     MESSAGE(LOG_DUMP, "\t  - Convert %s\n", getCacheName(cache.extcache[i].t, cache.extcache[i].n));
                     FCVTDS(EXTREG(i), EXTREG(i));
                     cache.extcache[i].t = EXT_CACHE_ST_D;
+                } else if (cache.extcache[i].t == EXT_CACHE_ST_D && cache_i2.extcache[i].t == EXT_CACHE_ST_I64) {
+                    MESSAGE(LOG_DUMP, "\t  - Convert %s\n", getCacheName(cache.extcache[i].t, cache.extcache[i].n));
+                    FCVTLD(s1, EXTREG(i), RD_RTZ);
+                    FMVDX(EXTREG(i), s1);
+                    cache.extcache[i].t = EXT_CACHE_ST_I64;
+                } else if (cache.extcache[i].t == EXT_CACHE_ST_F && cache_i2.extcache[i].t == EXT_CACHE_ST_I64) {
+                    MESSAGE(LOG_DUMP, "\t  - Convert %s\n", getCacheName(cache.extcache[i].t, cache.extcache[i].n));
+                    FCVTLS(s1, EXTREG(i), RD_RTZ);
+                    FMVDX(EXTREG(i), s1);
+                    cache.extcache[i].t = EXT_CACHE_ST_D;
+                } else if (cache.extcache[i].t == EXT_CACHE_ST_I64 && cache_i2.extcache[i].t == EXT_CACHE_ST_F) {
+                    MESSAGE(LOG_DUMP, "\t  - Convert %s\n", getCacheName(cache.extcache[i].t, cache.extcache[i].n));
+                    FMVXD(s1, EXTREG(i));
+                    FCVTSL(EXTREG(i), s1, RD_RTZ);
+                    cache.extcache[i].t = EXT_CACHE_ST_F;
+                } else if (cache.extcache[i].t == EXT_CACHE_ST_I64 && cache_i2.extcache[i].t == EXT_CACHE_ST_D) {
+                    MESSAGE(LOG_DUMP, "\t  - Convert %s\n", getCacheName(cache.extcache[i].t, cache.extcache[i].n));
+                    FMVXD(s1, EXTREG(i));
+                    FCVTDL(EXTREG(i), s1, RD_RTZ);
+                    cache.extcache[i].t = EXT_CACHE_ST_D;
                 }
             }
         }
@@ -2124,7 +2187,9 @@ void fpu_propagate_stack(dynarec_rv64_t* dyn, int ninst)
 {
     if(dyn->e.stack_pop) {
         for(int j=0; j<24; ++j)
-            if((dyn->e.extcache[j].t == EXT_CACHE_ST_D || dyn->e.extcache[j].t == EXT_CACHE_ST_F)) {
+            if ((dyn->e.extcache[j].t == EXT_CACHE_ST_D
+                    || dyn->e.extcache[j].t == EXT_CACHE_ST_F
+                    || dyn->e.extcache[j].t == EXT_CACHE_ST_I64)) {
                 if(dyn->e.extcache[j].n<dyn->e.stack_pop)
                     dyn->e.extcache[j].v = 0;
                 else
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index 140d90c1..3fb8e654 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -1338,16 +1338,19 @@ int extcache_st_coherency(dynarec_rv64_t* dyn, int ninst, int a, int b);
 
 #if STEP == 0
 #define ST_IS_F(A)        0
+#define ST_IS_I64(A)      0
 #define X87_COMBINE(A, B) EXT_CACHE_ST_D
 #define X87_ST0           EXT_CACHE_ST_D
 #define X87_ST(A)         EXT_CACHE_ST_D
 #elif STEP == 1
 #define ST_IS_F(A)        (extcache_get_current_st(dyn, ninst, A) == EXT_CACHE_ST_F)
+#define ST_IS_I64(A)      (extcache_get_current_st(dyn, ninst, A) == EXT_CACHE_ST_I64)
 #define X87_COMBINE(A, B) extcache_combine_st(dyn, ninst, A, B)
-#define X87_ST0           extcache_get_current_st(dyn, ninst, 0)
-#define X87_ST(A)         extcache_get_current_st(dyn, ninst, A)
+#define X87_ST0           extcache_no_i64(dyn, ninst, 0, extcache_get_current_st(dyn, ninst, 0))
+#define X87_ST(A)         extcache_no_i64(dyn, ninst, A, extcache_get_current_st(dyn, ninst, A))
 #else
 #define ST_IS_F(A) (extcache_get_st(dyn, ninst, A) == EXT_CACHE_ST_F)
+#define ST_IS_I64(A) (extcache_get_st(dyn, ninst, A) == EXT_CACHE_ST_I64)
 #if STEP == 3
 #define X87_COMBINE(A, B) extcache_st_coherency(dyn, ninst, A, B)
 #else
diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h
index 907de0a3..6cd32413 100644
--- a/src/dynarec/rv64/dynarec_rv64_private.h
+++ b/src/dynarec/rv64/dynarec_rv64_private.h
@@ -10,13 +10,14 @@ typedef struct instsize_s instsize_t;
 
 #define BARRIER_MAYBE   8
 
-#define EXT_CACHE_NONE 0
-#define EXT_CACHE_ST_D 1
-#define EXT_CACHE_ST_F 2
-#define EXT_CACHE_MM   3
-#define EXT_CACHE_SS   4
-#define EXT_CACHE_SD   5
-#define EXT_CACHE_SCR  6
+#define EXT_CACHE_NONE   0
+#define EXT_CACHE_ST_D   1
+#define EXT_CACHE_ST_F   2
+#define EXT_CACHE_ST_I64 3
+#define EXT_CACHE_MM     4
+#define EXT_CACHE_SS     5
+#define EXT_CACHE_SD     6
+#define EXT_CACHE_SCR    7
 typedef union ext_cache_s {
     int8_t           v;
     struct {