diff options
Diffstat (limited to 'src/dynarec/arm64/dynarec_arm64_helper.c')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.c | 309 |
1 files changed, 250 insertions, 59 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c index 7aaf098e..13b58359 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.c +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -1,7 +1,6 @@ #include <stdio.h> #include <stdlib.h> #include <stddef.h> -#include <pthread.h> #include <errno.h> #include <assert.h> #include <string.h> @@ -19,7 +18,6 @@ #include "x64trace.h" #include "dynarec_native.h" #include "../dynablock_private.h" -#include "../tools/bridge_private.h" #include "custommem.h" #include "arm64_printer.h" @@ -27,11 +25,16 @@ #include "dynarec_arm64_functions.h" #include "dynarec_arm64_helper.h" +static uintptr_t geted_32(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int64_t* fixaddress, int* unscaled, int absmax, uint32_t mask, int* l, int s); + /* setup r2 to address pointed by ED, also fixaddress is an optionnal delta in the range [-absmax, +absmax], with delta&mask==0 to be added to ed for LDR/STR */ uintptr_t geted(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int64_t* fixaddress, int* unscaled, int absmax, uint32_t mask, rex_t rex, int *l, int s, int delta) { MAYUSE(dyn); MAYUSE(ninst); MAYUSE(delta); + if(rex.is32bits) + return geted_32(dyn, addr, ninst, nextop, ed, hint, fixaddress, unscaled, absmax, mask, l, s); + int lock = l?((l==LOCK_LOCK)?1:2):0; if(unscaled) *unscaled = 0; @@ -119,7 +122,7 @@ uintptr_t geted(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, u } if(nextop&0x80) i64 = F32S; - else + else i64 = F8S; if(i64==0 || ((i64>=absmin) && (i64<=absmax) && !(i64&mask)) || (unscaled && (i64>-256) && (i64<256))) { *fixaddress = i64; @@ -183,6 +186,141 @@ uintptr_t geted(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, u return addr; } +static uintptr_t geted_32(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int64_t* fixaddress, int* unscaled, int absmax, uint32_t mask, int* l, int s) +{ + MAYUSE(dyn); MAYUSE(ninst); + + int lock = l?((l==LOCK_LOCK)?1:2):0; + if(unscaled) + *unscaled = 0; + if(lock==2) + *l = 0; + uint8_t ret = x2; + uint8_t scratch = x2; + *fixaddress = 0; + if(hint>0) ret = hint; + if(hint>0 && hint<xRAX) scratch = hint; + int absmin = 0; + if(s) absmin=-absmax; + MAYUSE(scratch); + if(!(nextop&0xC0)) { + if((nextop&7)==4) { + uint8_t sib = F8; + int sib_reg = (sib>>3)&7; + if((sib&0x7)==5) { + int64_t tmp = F32S; + if (sib_reg!=4) { + if(tmp && (!((tmp>=absmin) && (tmp<=absmax) && !(tmp&mask))) || !(unscaled && (tmp>-256) && (tmp<256))) { + MOV32w(scratch, tmp); + ADDw_REG_LSL(ret, scratch, xRAX+sib_reg, (sib>>6)); + } else { + LSLw(ret, xRAX+sib_reg, (sib>>6)); + *fixaddress = tmp; + if(unscaled && (tmp>-256) && (tmp<256)) + *unscaled = 1; + } + } else { + switch(lock) { + case 1: addLockAddress((int32_t)tmp); break; + case 2: if(isLockAddress((int32_t)tmp)) *l=1; break; + } + MOV32w(ret, tmp); + } + } else { + if (sib_reg!=4) { + ADDw_REG_LSL(ret, xRAX+(sib&0x7), xRAX+sib_reg, (sib>>6)); + } else { + ret = xRAX+(sib&0x7); + } + } + } else if((nextop&7)==5) { + uint64_t tmp = F32; + MOV32w(ret, tmp); + switch(lock) { + case 1: addLockAddress(tmp); break; + case 2: if(isLockAddress(tmp)) *l=1; break; + } + } else { + ret = xRAX+(nextop&7); + if(ret==hint) { + MOVw_REG(hint, ret); //to clear upper part + } + } + } else { + int64_t i32; + uint8_t sib = 0; + int sib_reg = 0; + if((nextop&7)==4) { + sib = F8; + sib_reg = (sib>>3)&7; + } + if(nextop&0x80) + i32 = F32S; + else + i32 = F8S; + if(i32==0 || ((i32>=absmin) && (i32<=absmax) && !(i32&mask)) || (unscaled && (i32>-256) && (i32<256))) { + *fixaddress = i32; + if(unscaled && (i32>-256) && (i32<256)) + *unscaled = 1; + if((nextop&7)==4) { + if (sib_reg!=4) { + ADDw_REG_LSL(ret, xRAX+(sib&0x07), xRAX+sib_reg, (sib>>6)); + } else { + ret = xRAX+(sib&0x07); + } + } else { + ret = xRAX+(nextop&0x07); + } + } else { + int64_t sub = (i32<0)?1:0; + if(sub) i32 = -i32; + if(i32<0x1000) { + if((nextop&7)==4) { + if (sib_reg!=4) { + ADDw_REG_LSL(scratch, xRAX+(sib&0x07), xRAX+sib_reg, (sib>>6)); + } else { + scratch = xRAX+(sib&0x07); + } + } else + scratch = xRAX+(nextop&0x07); + if(sub) { + SUBw_U12(ret, scratch, i32); + } else { + ADDw_U12(ret, scratch, i32); + } + } else { + MOV32w(scratch, i32); + if((nextop&7)==4) { + if (sib_reg!=4) { + if(sub) { + SUBw_REG(scratch, xRAX+(sib&0x07), scratch); + } else { + ADDw_REG(scratch, scratch, xRAX+(sib&0x07)); + } + ADDw_REG_LSL(ret, scratch, xRAX+sib_reg, (sib>>6)); + } else { + PASS3(int tmp = xRAX+(sib&0x07)); + if(sub) { + SUBw_REG(ret, tmp, scratch); + } else { + ADDw_REG(ret, tmp, scratch); + } + } + } else { + PASS3(int tmp = xRAX+(nextop&0x07)); + if(sub) { + SUBw_REG(ret, tmp, scratch); + } else { + ADDw_REG(ret, tmp, scratch); + } + } + } + } + } + *ed = ret; + return addr; +} + /* setup r2 to address pointed by ED, also fixaddress is an optionnal delta in the range [-absmax, +absmax], with delta&mask==0 to be added to ed for LDR/STR */ uintptr_t geted32(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int64_t* fixaddress, int* unscaled, int absmax, uint32_t mask, rex_t rex, int* l, int s, int delta) { @@ -256,9 +394,9 @@ uintptr_t geted32(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, } if(nextop&0x80) i64 = F32S; - else + else i64 = F8S; - if(i64==0 || ((i64>=absmin) && (i64<=absmax) && !(i64&mask)) || (unscaled && (i64>-256) && (i64>256))) { + if(i64==0 || ((i64>=absmin) && (i64<=absmax) && !(i64&mask)) || (unscaled && (i64>-256) && (i64<256))) { *fixaddress = i64; if(unscaled && (i64>-256) && (i64<256)) *unscaled = 1; @@ -339,8 +477,8 @@ uintptr_t geted16(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, int64_t offset = 0; int absmin = 0; if(s) absmin = -absmax; - if(!n && m==6) { - offset = F16; + if(!n && (m&7)==6) { + offset = F16S; MOVZw(ret, offset); } else { switch(n) { @@ -458,18 +596,18 @@ void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst) } CLEARIP(); #ifdef HAVE_TRACE - //MOVx(x3, 15); no access to PC reg + //MOVx(x3, 15); no access to PC reg #endif SMEND(); BLR(x2); // save LR... } -void ret_to_epilog(dynarec_arm_t* dyn, int ninst) +void ret_to_epilog(dynarec_arm_t* dyn, int ninst, rex_t rex) { MAYUSE(dyn); MAYUSE(ninst); MESSAGE(LOG_DUMP, "Ret to epilog\n"); - POP1(xRIP); - MOVx_REG(x1, xRIP); + POP1z(xRIP); + MOVz_REG(x1, xRIP); SMEND(); if(box64_dynarec_callret) { // pop the actual return address for ARM stack @@ -496,18 +634,18 @@ void ret_to_epilog(dynarec_arm_t* dyn, int ninst) CLEARIP(); } -void retn_to_epilog(dynarec_arm_t* dyn, int ninst, int n) +void retn_to_epilog(dynarec_arm_t* dyn, int ninst, rex_t rex, int n) { MAYUSE(dyn); MAYUSE(ninst); MESSAGE(LOG_DUMP, "Retn to epilog\n"); - POP1(xRIP); + POP1z(xRIP); if(n>0xfff) { MOV32w(w1, n); - ADDx_REG(xRSP, xRSP, x1); + ADDz_REG(xRSP, xRSP, x1); } else { - ADDx_U12(xRSP, xRSP, n); + ADDz_U12(xRSP, xRSP, n); } - MOVx_REG(x1, xRIP); + MOVz_REG(x1, xRIP); SMEND(); if(box64_dynarec_callret) { // pop the actual return address for ARM stack @@ -541,24 +679,34 @@ void iret_to_epilog(dynarec_arm_t* dyn, int ninst, int is64bits) MESSAGE(LOG_DUMP, "IRet to epilog\n"); // POP IP NOTEST(x2); - POP1(xRIP); - // POP CS - POP1(x2); + if(is64bits) { + POP1(xRIP); + POP1(x2); + POP1(xFlags); + } else { + POP1_32(xRIP); + POP1_32(x2); + POP1_32(xFlags); + } + // x2 is CS STRH_U12(x2, xEmu, offsetof(x64emu_t, segs[_CS])); - MOVZw(x1, 0); - STRx_U12(x1, xEmu, offsetof(x64emu_t, segs_serial[_CS])); - STRx_U12(x1, xEmu, offsetof(x64emu_t, segs_serial[_SS])); - // POP EFLAGS - POP1(xFlags); + STRw_U12(xZR, xEmu, offsetof(x64emu_t, segs_serial[_CS])); + // clean EFLAGS MOV32w(x1, 0x3F7FD7); ANDx_REG(xFlags, xFlags, x1); - ORRx_mask(xFlags, xFlags, 1, 0b111111, 0); + ORRx_mask(xFlags, xFlags, 1, 0b111111, 0); // xFlags | 0b10 SET_DFNONE(x1); // POP RSP - POP1(x3); + if(is64bits) { + POP1(x3); //rsp + POP1(x2); //ss + } else { + POP1_32(x3); //rsp + POP1_32(x2); //ss + } // POP SS - POP1(x2); STRH_U12(x2, xEmu, offsetof(x64emu_t, segs[_SS])); + STRw_U12(xZR, xEmu, offsetof(x64emu_t, segs_serial[_SS])); // set new RSP MOVx_REG(xRSP, x3); // Ret.... @@ -698,7 +846,9 @@ static void x87_reset(dynarec_arm_t* dyn) dyn->n.swapped = 0; dyn->n.barrier = 0; for(int i=0; i<24; ++i) - if(dyn->n.neoncache[i].t == NEON_CACHE_ST_F || dyn->n.neoncache[i].t == NEON_CACHE_ST_D) + if(dyn->n.neoncache[i].t == NEON_CACHE_ST_F + || dyn->n.neoncache[i].t == NEON_CACHE_ST_D + || dyn->n.neoncache[i].t == NEON_CACHE_ST_I64) dyn->n.neoncache[i].v = 0; } @@ -759,7 +909,9 @@ int x87_do_push(dynarec_arm_t* dyn, int ninst, int s1, int t) dyn->n.stack_push+=1; // move all regs in cache, and find a free one for(int j=0; j<24; ++j) - if((dyn->n.neoncache[j].t == NEON_CACHE_ST_D) || (dyn->n.neoncache[j].t == NEON_CACHE_ST_F)) + if((dyn->n.neoncache[j].t == NEON_CACHE_ST_D) + ||(dyn->n.neoncache[j].t == NEON_CACHE_ST_F) + ||(dyn->n.neoncache[j].t == NEON_CACHE_ST_I64)) ++dyn->n.neoncache[j].n; int ret = -1; for(int i=0; i<8; ++i) @@ -768,13 +920,7 @@ int x87_do_push(dynarec_arm_t* dyn, int ninst, int s1, int t) else if(ret==-1) { dyn->n.x87cache[i] = 0; ret=dyn->n.x87reg[i]=fpu_get_reg_x87(dyn, t, 0); - #if STEP == 1 - // need to check if reg is compatible with float - if((ret>15) && (t == NEON_CACHE_ST_F)) - dyn->n.neoncache[ret].t = NEON_CACHE_ST_D; - #else dyn->n.neoncache[ret].t = X87_ST0; - #endif } return ret; } @@ -788,7 +934,9 @@ void x87_do_push_empty(dynarec_arm_t* dyn, int ninst, int s1) dyn->n.stack_push+=1; // move all regs in cache for(int j=0; j<24; ++j) - if((dyn->n.neoncache[j].t == NEON_CACHE_ST_D) || (dyn->n.neoncache[j].t == NEON_CACHE_ST_F)) + if((dyn->n.neoncache[j].t == NEON_CACHE_ST_D) + ||(dyn->n.neoncache[j].t == NEON_CACHE_ST_F) + ||(dyn->n.neoncache[j].t == NEON_CACHE_ST_I64)) ++dyn->n.neoncache[j].n; for(int i=0; i<8; ++i) if(dyn->n.x87cache[i]!=-1) @@ -985,7 +1133,9 @@ int x87_get_current_cache(dynarec_arm_t* dyn, int ninst, int st, int t) for (int i=0; i<8; ++i) { if(dyn->n.x87cache[i]==st) { #if STEP == 1 - if(t==NEON_CACHE_ST_D && (dyn->n.neoncache[dyn->n.x87reg[i]].t==NEON_CACHE_ST_F)) + if(t==NEON_CACHE_ST_D && (dyn->n.neoncache[dyn->n.x87reg[i]].t==NEON_CACHE_ST_F || dyn->n.neoncache[dyn->n.x87reg[i]].t==NEON_CACHE_ST_I64)) + neoncache_promote_double(dyn, ninst, st); + else if(t==NEON_CACHE_ST_F && (dyn->n.neoncache[dyn->n.x87reg[i]].t==NEON_CACHE_ST_I64)) neoncache_promote_double(dyn, ninst, st); #endif return i; @@ -1031,7 +1181,9 @@ int x87_get_cache(dynarec_arm_t* dyn, int ninst, int populate, int s1, int s2, i int x87_get_neoncache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st) { for(int ii=0; ii<24; ++ii) - if((dyn->n.neoncache[ii].t == NEON_CACHE_ST_F || dyn->n.neoncache[ii].t == NEON_CACHE_ST_D) + if((dyn->n.neoncache[ii].t == NEON_CACHE_ST_F + || dyn->n.neoncache[ii].t == NEON_CACHE_ST_D + || dyn->n.neoncache[ii].t == NEON_CACHE_ST_I64) && dyn->n.neoncache[ii].n==st) return ii; assert(0); @@ -1069,6 +1221,9 @@ void x87_refresh(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st) if(dyn->n.neoncache[dyn->n.x87reg[ret]].t==NEON_CACHE_ST_F) { FCVT_D_S(31, dyn->n.x87reg[ret]); VSTR64_REG_LSL3(31, s1, s2); + } else if(dyn->n.neoncache[dyn->n.x87reg[ret]].t==NEON_CACHE_ST_I64) { + SCVTFDD(31, dyn->n.x87reg[ret]); + VSTR64_REG_LSL3(31, s1, s2); } else { VSTR64_REG_LSL3(dyn->n.x87reg[ret], s1, s2); } @@ -1086,7 +1241,7 @@ void x87_forget(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st) return; MESSAGE(LOG_DUMP, "\tForget x87 Cache for ST%d\n", st); #if STEP == 1 - if(dyn->n.neoncache[dyn->n.x87reg[ret]].t==NEON_CACHE_ST_F) + if(dyn->n.neoncache[dyn->n.x87reg[ret]].t==NEON_CACHE_ST_F || dyn->n.neoncache[dyn->n.x87reg[ret]].t==NEON_CACHE_ST_I64) neoncache_promote_double(dyn, ninst, st); #endif // prepare offset to fpu => s1 @@ -1117,7 +1272,7 @@ void x87_reget_st(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st) // refresh the value MESSAGE(LOG_DUMP, "\tRefresh x87 Cache for ST%d\n", st); #if STEP == 1 - if(dyn->n.neoncache[dyn->n.x87reg[i]].t==NEON_CACHE_ST_F) + if(dyn->n.neoncache[dyn->n.x87reg[i]].t==NEON_CACHE_ST_F || dyn->n.neoncache[dyn->n.x87reg[i]].t==NEON_CACHE_ST_I64) neoncache_promote_double(dyn, ninst, st); #endif ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87)); @@ -1443,10 +1598,20 @@ static int findCacheSlot(dynarec_arm_t* dyn, int ninst, int t, int n, neoncache_ case NEON_CACHE_ST_F: if (t==NEON_CACHE_ST_D) return i; + if (t==NEON_CACHE_ST_I64) + return i; break; case NEON_CACHE_ST_D: if (t==NEON_CACHE_ST_F) return i; + if (t==NEON_CACHE_ST_I64) + return i; + break; + case NEON_CACHE_ST_I64: + if (t==NEON_CACHE_ST_F) + return i; + if (t==NEON_CACHE_ST_D) + return i; break; case NEON_CACHE_XMMR: if(t==NEON_CACHE_XMMW) @@ -1471,7 +1636,7 @@ static void swapCache(dynarec_arm_t* dyn, int ninst, int i, int j, neoncache_t * quad =1; if(cache->neoncache[j].t==NEON_CACHE_XMMR || cache->neoncache[j].t==NEON_CACHE_XMMW) quad =1; - + if(!cache->neoncache[i].v) { // a mov is enough, no need to swap MESSAGE(LOG_DUMP, "\t - Moving %d <- %d\n", i, j); @@ -1531,12 +1696,13 @@ static void loadCache(dynarec_arm_t* dyn, int ninst, int stack_cnt, int s1, int VLDR128_U12(i, xEmu, offsetof(x64emu_t, xmm[n])); break; case NEON_CACHE_MM: - MESSAGE(LOG_DUMP, "\t - Loading %s\n", getCacheName(t, n)); + MESSAGE(LOG_DUMP, "\t - Loading %s\n", getCacheName(t, n)); VLDR64_U12(i, xEmu, offsetof(x64emu_t, mmx[i])); break; case NEON_CACHE_ST_D: case NEON_CACHE_ST_F: - MESSAGE(LOG_DUMP, "\t - Loading %s\n", getCacheName(t, n)); + case NEON_CACHE_ST_I64: + MESSAGE(LOG_DUMP, "\t - Loading %s\n", getCacheName(t, n)); if((*s3_top) == 0xffff) { LDRw_U12(s3, xEmu, offsetof(x64emu_t, top)); *s3_top = 0; @@ -1557,12 +1723,15 @@ static void loadCache(dynarec_arm_t* dyn, int ninst, int stack_cnt, int s1, int if(t==NEON_CACHE_ST_F) { FCVT_S_D(i, i); } - break; + if(t==NEON_CACHE_ST_I64) { + VFCVTZSQD(i, i); + } + break; case NEON_CACHE_NONE: case NEON_CACHE_SCR: default: /* nothing done */ MESSAGE(LOG_DUMP, "\t - ignoring %s\n", getCacheName(t, n)); - break; + break; } cache->neoncache[i].n = n; cache->neoncache[i].t = t; @@ -1579,12 +1748,13 @@ static void unloadCache(dynarec_arm_t* dyn, int ninst, int stack_cnt, int s1, in VSTR128_U12(i, xEmu, offsetof(x64emu_t, xmm[n])); break; case NEON_CACHE_MM: - MESSAGE(LOG_DUMP, "\t - Unloading %s\n", getCacheName(t, n)); + MESSAGE(LOG_DUMP, "\t - Unloading %s\n", getCacheName(t, n)); VSTR64_U12(i, xEmu, offsetof(x64emu_t, mmx[n])); break; case NEON_CACHE_ST_D: case NEON_CACHE_ST_F: - MESSAGE(LOG_DUMP, "\t - Unloading %s\n", getCacheName(t, n)); + case NEON_CACHE_ST_I64: + MESSAGE(LOG_DUMP, "\t - Unloading %s\n", getCacheName(t, n)); if((*s3_top)==0xffff) { LDRw_U12(s3, xEmu, offsetof(x64emu_t, top)); *s3_top = 0; @@ -1603,14 +1773,16 @@ static void unloadCache(dynarec_arm_t* dyn, int ninst, int stack_cnt, int s1, in *s2_val = 0; if(t==NEON_CACHE_ST_F) { FCVT_D_S(i, i); + } else if (t==NEON_CACHE_ST_I64) { + SCVTFDD(i, i); } VSTR64_U12(i, s2, offsetof(x64emu_t, x87)); - break; + break; case NEON_CACHE_NONE: case NEON_CACHE_SCR: default: /* nothing done */ MESSAGE(LOG_DUMP, "\t - ignoring %s\n", getCacheName(t, n)); - break; + break; } cache->neoncache[i].v = 0; } @@ -1732,6 +1904,23 @@ static void fpuCacheTransform(dynarec_arm_t* dyn, int ninst, int s1, int s2, int MESSAGE(LOG_DUMP, "\t - Convert %s\n", getCacheName(cache.neoncache[i].t, cache.neoncache[i].n)); FCVT_D_S(i, i); cache.neoncache[i].t = NEON_CACHE_ST_D; + } else if(cache.neoncache[i].t == NEON_CACHE_ST_D && cache_i2.neoncache[i].t == NEON_CACHE_ST_I64) { + MESSAGE(LOG_DUMP, "\t - Convert %s\n", getCacheName(cache.neoncache[i].t, cache.neoncache[i].n)); + VFCVTZSQD(i, i); + cache.neoncache[i].t = NEON_CACHE_ST_I64; + } else if(cache.neoncache[i].t == NEON_CACHE_ST_F && cache_i2.neoncache[i].t == NEON_CACHE_ST_I64) { + MESSAGE(LOG_DUMP, "\t - Convert %s\n", getCacheName(cache.neoncache[i].t, cache.neoncache[i].n)); + VFCVTZSQS(i, i); + cache.neoncache[i].t = NEON_CACHE_ST_D; + } else if(cache.neoncache[i].t == NEON_CACHE_ST_I64 && cache_i2.neoncache[i].t == NEON_CACHE_ST_F) { + MESSAGE(LOG_DUMP, "\t - Convert %s\n", getCacheName(cache.neoncache[i].t, cache.neoncache[i].n)); + SCVTFDD(i, i); + FCVT_S_D(i, i); + cache.neoncache[i].t = NEON_CACHE_ST_F; + } else if(cache.neoncache[i].t == NEON_CACHE_ST_I64 && cache_i2.neoncache[i].t == NEON_CACHE_ST_D) { + MESSAGE(LOG_DUMP, "\t - Convert %s\n", getCacheName(cache.neoncache[i].t, cache.neoncache[i].n)); + SCVTFDD(i, i); + cache.neoncache[i].t = NEON_CACHE_ST_D; } else if(cache.neoncache[i].t == NEON_CACHE_XMMR && cache_i2.neoncache[i].t == NEON_CACHE_XMMW) { cache.neoncache[i].t = NEON_CACHE_XMMW; } else if(cache.neoncache[i].t == NEON_CACHE_XMMW && cache_i2.neoncache[i].t == NEON_CACHE_XMMR) { @@ -1759,18 +1948,18 @@ static void flagsCacheTransform(dynarec_arm_t* dyn, int ninst, int s1) int go = 0; switch (dyn->insts[jmp].f_entry.pending) { case SF_UNKNOWN: break; - case SF_SET: - if(dyn->f.pending!=SF_SET && dyn->f.pending!=SF_SET_PENDING) - go = 1; + case SF_SET: + if(dyn->f.pending!=SF_SET && dyn->f.pending!=SF_SET_PENDING) + go = 1; break; case SF_SET_PENDING: - if(dyn->f.pending!=SF_SET + if(dyn->f.pending!=SF_SET && dyn->f.pending!=SF_SET_PENDING - && dyn->f.pending!=SF_PENDING) - go = 1; + && dyn->f.pending!=SF_PENDING) + go = 1; break; case SF_PENDING: - if(dyn->f.pending!=SF_SET + if(dyn->f.pending!=SF_SET && dyn->f.pending!=SF_SET_PENDING && dyn->f.pending!=SF_PENDING) go = 1; @@ -1783,11 +1972,11 @@ static void flagsCacheTransform(dynarec_arm_t* dyn, int ninst, int s1) if(go) { if(dyn->f.pending!=SF_PENDING) { LDRw_U12(s1, xEmu, offsetof(x64emu_t, df)); - j64 = (GETMARK3)-(dyn->native_size); + j64 = (GETMARKF2)-(dyn->native_size); CBZw(s1, j64); } CALL_(UpdateFlags, -1, 0); - MARK3; + MARKF2; } #endif } @@ -1883,7 +2072,9 @@ void fpu_propagate_stack(dynarec_arm_t* dyn, int ninst) { if(dyn->n.stack_pop) { for(int j=0; j<24; ++j) - if((dyn->n.neoncache[j].t == NEON_CACHE_ST_D || dyn->n.neoncache[j].t == NEON_CACHE_ST_F)) { + if((dyn->n.neoncache[j].t == NEON_CACHE_ST_D + || dyn->n.neoncache[j].t == NEON_CACHE_ST_F + || dyn->n.neoncache[j].t == NEON_CACHE_ST_I64)) { if(dyn->n.neoncache[j].n<dyn->n.stack_pop) dyn->n.neoncache[j].v = 0; else |