diff options
65 files changed, 812 insertions, 850 deletions
diff --git a/src/core.c b/src/core.c index 80a737c4..059003b3 100644 --- a/src/core.c +++ b/src/core.c @@ -49,6 +49,7 @@ #include "env.h" #include "cleanup.h" #include "freq.h" +#include "core_arch.h" box64context_t *my_context = NULL; extern box64env_t box64env; @@ -75,43 +76,7 @@ uint32_t default_fs = 0; int box64_isglibc234 = 0; #ifdef DYNAREC -#ifdef ARM64 -int arm64_asimd = 0; -int arm64_aes = 0; -int arm64_pmull = 0; -int arm64_crc32 = 0; -int arm64_atomics = 0; -int arm64_sha1 = 0; -int arm64_sha2 = 0; -int arm64_uscat = 0; -int arm64_flagm = 0; -int arm64_flagm2 = 0; -int arm64_frintts = 0; -int arm64_afp = 0; -int arm64_rndr = 0; -#elif defined(RV64) -int rv64_zba = 0; -int rv64_zbb = 0; -int rv64_zbc = 0; -int rv64_zbs = 0; -int rv64_vector = 0; // rvv 1.0 or xtheadvector -int rv64_xtheadvector = 0; -int rv64_vlen = 0; -int rv64_xtheadba = 0; -int rv64_xtheadbb = 0; -int rv64_xtheadbs = 0; -int rv64_xtheadcondmov = 0; -int rv64_xtheadmemidx = 0; -int rv64_xtheadmempair = 0; -int rv64_xtheadfmemidx = 0; -int rv64_xtheadmac = 0; -int rv64_xtheadfmv = 0; -#elif defined(LA64) -int la64_lbt = 0; -int la64_lam_bh = 0; -int la64_lamcas = 0; -int la64_scq = 0; -#endif +cpu_ext_t cpuext = {0}; #endif int box64_wine = 0; @@ -235,70 +200,70 @@ void GatherDynarecExtensions() return; } if(hwcap&HWCAP_CRC32) - arm64_crc32 = 1; + cpuext.crc32 = 1; if(hwcap&HWCAP_PMULL) - arm64_pmull = 1; + cpuext.pmull = 1; if(hwcap&HWCAP_AES) - arm64_aes = 1; + cpuext.aes = 1; if(hwcap&HWCAP_ATOMICS) - arm64_atomics = 1; + cpuext.atomics = 1; #ifdef HWCAP_SHA1 if(hwcap&HWCAP_SHA1) - arm64_sha1 = 1; + cpuext.sha1 = 1; #endif #ifdef HWCAP_SHA2 if(hwcap&HWCAP_SHA2) - arm64_sha2 = 1; + cpuext.sha2 = 1; #endif #ifdef HWCAP_USCAT if(hwcap&HWCAP_USCAT) - arm64_uscat = 1; + cpuext.uscat = 1; #endif #ifdef HWCAP_FLAGM if(hwcap&HWCAP_FLAGM) - arm64_flagm = 1; + cpuext.flagm = 1; #endif unsigned long hwcap2 = real_getauxval(AT_HWCAP2); #ifdef HWCAP2_FLAGM2 if(hwcap2&HWCAP2_FLAGM2) - arm64_flagm2 = 1; + cpuext.flagm2 = 1; #endif #ifdef HWCAP2_FRINT if(hwcap2&HWCAP2_FRINT) - arm64_frintts = 1; + cpuext.frintts = 1; #endif #ifdef HWCAP2_AFP if(hwcap2&HWCAP2_AFP) - arm64_afp = 1; + cpuext.afp = 1; #endif #ifdef HWCAP2_RNG if(hwcap2&HWCAP2_RNG) - arm64_rndr = 1; + cpuext.rndr = 1; #endif printf_log(LOG_INFO, "Dynarec for ARM64, with extension: ASIMD"); - if(arm64_aes) + if(cpuext.aes) printf_log_prefix(0, LOG_INFO, " AES"); - if(arm64_crc32) + if(cpuext.crc32) printf_log_prefix(0, LOG_INFO, " CRC32"); - if(arm64_pmull) + if(cpuext.pmull) printf_log_prefix(0, LOG_INFO, " PMULL"); - if(arm64_atomics) + if(cpuext.atomics) printf_log_prefix(0, LOG_INFO, " ATOMICS"); - if(arm64_sha1) + if(cpuext.sha1) printf_log_prefix(0, LOG_INFO, " SHA1"); - if(arm64_sha2) + if(cpuext.sha2) printf_log_prefix(0, LOG_INFO, " SHA2"); - if(arm64_uscat) + if(cpuext.uscat) printf_log_prefix(0, LOG_INFO, " USCAT"); - if(arm64_flagm) + if(cpuext.flagm) printf_log_prefix(0, LOG_INFO, " FLAGM"); - if(arm64_flagm2) + if(cpuext.flagm2) printf_log_prefix(0, LOG_INFO, " FLAGM2"); - if(arm64_frintts) + if(cpuext.frintts) printf_log_prefix(0, LOG_INFO, " FRINT"); - if(arm64_afp) + if(cpuext.afp) printf_log_prefix(0, LOG_INFO, " AFP"); - if(arm64_rndr) + if(cpuext.rndr) printf_log_prefix(0, LOG_INFO, " RNDR"); printf_log_prefix(0, LOG_INFO, "\n"); #elif defined(LA64) @@ -315,13 +280,13 @@ void GatherDynarecExtensions() return; } - if (la64_lbt = ((cpucfg2 >> 18) & 0b1)) + if (cpuext.lbt = ((cpucfg2 >> 18) & 0b1)) printf_log_prefix(0, LOG_INFO, " LBT_X86"); - if ((la64_lam_bh = (cpucfg2 >> 27) & 0b1)) + if ((cpuext.lam_bh = (cpucfg2 >> 27) & 0b1)) printf_log_prefix(0, LOG_INFO, " LAM_BH"); - if ((la64_lamcas = (cpucfg2 >> 28) & 0b1)) + if ((cpuext.lamcas = (cpucfg2 >> 28) & 0b1)) printf_log_prefix(0, LOG_INFO, " LAMCAS"); - if ((la64_scq = (cpucfg2 >> 30) & 0b1)) + if ((cpuext.scq = (cpucfg2 >> 30) & 0b1)) printf_log_prefix(0, LOG_INFO, " SCQ"); } printf_log_prefix(0, LOG_INFO, "\n"); @@ -334,46 +299,46 @@ void GatherDynarecExtensions() if (p) { p = strtok(p, ","); while (p) { - if (!strcasecmp(p, "zba")) rv64_zba = 0; - if (!strcasecmp(p, "zbb")) rv64_zbb = 0; - if (!strcasecmp(p, "zbc")) rv64_zbc = 0; - if (!strcasecmp(p, "zbs")) rv64_zbs = 0; + if (!strcasecmp(p, "zba")) cpuext.zba = 0; + if (!strcasecmp(p, "zbb")) cpuext.zbb = 0; + if (!strcasecmp(p, "zbc")) cpuext.zbc = 0; + if (!strcasecmp(p, "zbs")) cpuext.zbs = 0; if (!strcasecmp(p, "vector")) { - rv64_vector = 0; - rv64_xtheadvector = 0; + cpuext.vector = 0; + cpuext.xtheadvector = 0; } - if (!strcasecmp(p, "xtheadba")) rv64_xtheadba = 0; - if (!strcasecmp(p, "xtheadbb")) rv64_xtheadbb = 0; - if (!strcasecmp(p, "xtheadbs")) rv64_xtheadbs = 0; - if (!strcasecmp(p, "xtheadmemidx")) rv64_xtheadmemidx = 0; - // if (!strcasecmp(p, "xtheadfmemidx")) rv64_xtheadfmemidx = 0; - // if (!strcasecmp(p, "xtheadmac")) rv64_xtheadmac = 0; - // if (!strcasecmp(p, "xtheadfmv")) rv64_xtheadfmv = 0; - if (!strcasecmp(p, "xtheadmempair")) rv64_xtheadmempair = 0; - if (!strcasecmp(p, "xtheadcondmov")) rv64_xtheadcondmov = 0; + if (!strcasecmp(p, "xtheadba")) cpuext.xtheadba = 0; + if (!strcasecmp(p, "xtheadbb")) cpuext.xtheadbb = 0; + if (!strcasecmp(p, "xtheadbs")) cpuext.xtheadbs = 0; + if (!strcasecmp(p, "xtheadmemidx")) cpuext.xtheadmemidx = 0; + // if (!strcasecmp(p, "xtheadfmemidx")) cpuext.xtheadfmemidx = 0; + // if (!strcasecmp(p, "xtheadmac")) cpuext.xtheadmac = 0; + // if (!strcasecmp(p, "xtheadfmv")) cpuext.xtheadfmv = 0; + if (!strcasecmp(p, "xtheadmempair")) cpuext.xtheadmempair = 0; + if (!strcasecmp(p, "xtheadcondmov")) cpuext.xtheadcondmov = 0; p = strtok(NULL, ","); } } } printf_log(LOG_INFO, "Dynarec for rv64g"); - if (rv64_vector && !rv64_xtheadvector) printf_log_prefix(0, LOG_INFO, "v"); - if (rv64_zba) printf_log_prefix(0, LOG_INFO, "_zba"); - if (rv64_zbb) printf_log_prefix(0, LOG_INFO, "_zbb"); - if (rv64_zbc) printf_log_prefix(0, LOG_INFO, "_zbc"); - if (rv64_zbs) printf_log_prefix(0, LOG_INFO, "_zbs"); - if (rv64_vector && !rv64_xtheadvector) printf_log_prefix(0, LOG_INFO, "_zvl%d", rv64_vlen); - if (rv64_xtheadba) printf_log_prefix(0, LOG_INFO, "_xtheadba"); - if (rv64_xtheadbb) printf_log_prefix(0, LOG_INFO, "_xtheadbb"); - if (rv64_xtheadbs) printf_log_prefix(0, LOG_INFO, "_xtheadbs"); - if (rv64_xtheadmempair) printf_log_prefix(0, LOG_INFO, "_xtheadmempair"); - if (rv64_xtheadcondmov) printf_log_prefix(0, LOG_INFO, "_xtheadcondmov"); - if (rv64_xtheadmemidx) printf_log_prefix(0, LOG_INFO, "_xtheadmemidx"); + if (cpuext.vector && !cpuext.xtheadvector) printf_log_prefix(0, LOG_INFO, "v"); + if (cpuext.zba) printf_log_prefix(0, LOG_INFO, "_zba"); + if (cpuext.zbb) printf_log_prefix(0, LOG_INFO, "_zbb"); + if (cpuext.zbc) printf_log_prefix(0, LOG_INFO, "_zbc"); + if (cpuext.zbs) printf_log_prefix(0, LOG_INFO, "_zbs"); + if (cpuext.vector && !cpuext.xtheadvector) printf_log_prefix(0, LOG_INFO, "_zvl%d", cpuext.vlen*8); + if (cpuext.xtheadba) printf_log_prefix(0, LOG_INFO, "_xtheadba"); + if (cpuext.xtheadbb) printf_log_prefix(0, LOG_INFO, "_xtheadbb"); + if (cpuext.xtheadbs) printf_log_prefix(0, LOG_INFO, "_xtheadbs"); + if (cpuext.xtheadmempair) printf_log_prefix(0, LOG_INFO, "_xtheadmempair"); + if (cpuext.xtheadcondmov) printf_log_prefix(0, LOG_INFO, "_xtheadcondmov"); + if (cpuext.xtheadmemidx) printf_log_prefix(0, LOG_INFO, "_xtheadmemidx"); // Disable the display since these are only detected but never used. - // if(rv64_xtheadfmemidx) printf_log_prefix(0, LOG_INFO, " xtheadfmemidx"); - // if(rv64_xtheadmac) printf_log_prefix(0, LOG_INFO, " xtheadmac"); - // if(rv64_xtheadfmv) printf_log_prefix(0, LOG_INFO, " xtheadfmv"); - if (rv64_xtheadvector) printf_log_prefix(0, LOG_INFO, "_xthvector"); + // if(cpuext.xtheadfmemidx) printf_log_prefix(0, LOG_INFO, " xtheadfmemidx"); + // if(cpuext.xtheadmac) printf_log_prefix(0, LOG_INFO, " xtheadmac"); + // if(cpuext.xtheadfmv) printf_log_prefix(0, LOG_INFO, " xtheadfmv"); + if (cpuext.xtheadvector) printf_log_prefix(0, LOG_INFO, "_xthvector"); printf_log_prefix(0, LOG_INFO, "\n"); #else #error Unsupported architecture diff --git a/src/dynarec/arm64/arm64_lock.S b/src/dynarec/arm64/arm64_lock.S index 74855129..2d85fe5e 100644 --- a/src/dynarec/arm64/arm64_lock.S +++ b/src/dynarec/arm64/arm64_lock.S @@ -5,7 +5,7 @@ .text .align 4 -.extern arm64_atomics +.extern cpuext .global arm64_lock_read_b .global arm64_lock_write_b .global arm64_lock_read_h @@ -106,10 +106,10 @@ arm64_lock_write_dq: arm64_lock_xchg_dd: - adrp x3, arm64_atomics - add x3, x3, #:lo12:arm64_atomics + adrp x3, cpuext + add x3, x3, #:lo12:cpuext ldr w3, [x3] - cbnz w3, arm64_atomic_xchg_dd + tbnz w3, #0, arm64_atomic_xchg_dd dmb ish arm64_lock_xchg_dd_0: // address is x0, value is x1, return old value in x0 @@ -126,10 +126,10 @@ arm64_atomic_xchg_dd: ret arm64_lock_xchg_d: - adrp x3, arm64_atomics - add x3, x3, #:lo12:arm64_atomics + adrp x3, cpuext + add x3, x3, #:lo12:cpuext ldr w3, [x3] - cbnz w3, arm64_atomic_xchg_d + tbnz w3, #0, arm64_atomic_xchg_d dmb ish arm64_lock_xchg_d_0: // address is x0, value is x1, return old value in x0 @@ -146,10 +146,10 @@ arm64_atomic_xchg_d: ret arm64_lock_xchg_h: - adrp x3, arm64_atomics - add x3, x3, #:lo12:arm64_atomics + adrp x3, cpuext + add x3, x3, #:lo12:cpuext ldr w3, [x3] - cbnz w3, arm64_atomic_xchg_h + tbnz w3, #0, arm64_atomic_xchg_h dmb ish arm64_lock_xchg_h_0: // address is x0, value is x1, return old value in x0 @@ -166,10 +166,10 @@ arm64_atomic_xchg_h: ret arm64_lock_xchg_b: - adrp x3, arm64_atomics - add x3, x3, #:lo12:arm64_atomics + adrp x3, cpuext + add x3, x3, #:lo12:cpuext ldr w3, [x3] - cbnz w3, arm64_atomic_xchg_b + tbnz w3, #0, arm64_atomic_xchg_b dmb ish arm64_lock_xchg_b_0: // address is x0, value is x1, return old value in x0 @@ -186,10 +186,10 @@ arm64_atomic_xchg_b: ret arm64_lock_storeifnull: - adrp x3, arm64_atomics - add x3, x3, #:lo12:arm64_atomics + adrp x3, cpuext + add x3, x3, #:lo12:cpuext ldr w3, [x3] - cbnz w3, arm64_atomic_storeifnull + tbnz w3, #0, arm64_atomic_storeifnull dmb ish 1: // address is x0, value is x1, x1 store to x0 only if [x0] is 0. return old [x0] value @@ -211,10 +211,10 @@ arm64_atomic_storeifnull: arm64_lock_storeifnull_d: - adrp x3, arm64_atomics - add x3, x3, #:lo12:arm64_atomics + adrp x3, cpuext + add x3, x3, #:lo12:cpuext ldr w3, [x3] - cbnz w3, arm64_atomic_storeifnull_d + tbnz w3, #0, arm64_atomic_storeifnull_d dmb ish 1: // address is x0, value is w1, w1 store to x0 only if [x0] is 0. return old [x0] value @@ -236,10 +236,10 @@ arm64_atomic_storeifnull_d: ret arm64_lock_storeifref: - adrp x3, arm64_atomics - add x3, x3, #:lo12:arm64_atomics + adrp x3, cpuext + add x3, x3, #:lo12:cpuext ldr w3, [x3] - cbnz w3, arm64_atomic_storeifref + tbnz w3, #0, arm64_atomic_storeifref dmb ish 1: // address is x0, value is x1, x1 store to x0 only if [x0] is x2. return new [x0] value (so x1 or old value) @@ -267,10 +267,10 @@ arm64_atomic_storeifref: ret arm64_lock_storeifref_d: - adrp x3, arm64_atomics - add x3, x3, #:lo12:arm64_atomics + adrp x3, cpuext + add x3, x3, #:lo12:cpuext ldr w3, [x3] - cbnz w3, arm64_atomic_storeifref_d + tbnz w3, #0, arm64_atomic_storeifref_d dmb ish 1: // address is x0, value is w1, w1 store to x0 only if [x0] is w2. return new [x0] value (so x1 or old value) @@ -298,10 +298,10 @@ arm64_atomic_storeifref_d: ret arm64_lock_storeifref2_d: - adrp x3, arm64_atomics - add x3, x3, #:lo12:arm64_atomics + adrp x3, cpuext + add x3, x3, #:lo12:cpuext ldr w3, [x3] - cbnz w3, arm64_atomic_storeifref2_d + tbnz w3, #0, arm64_atomic_storeifref2_d dmb ish 1: // address is x0, value is w1, w1 store to x0 only if [x0] is w2. return old [x0] value @@ -352,10 +352,10 @@ arm64_lock_decifnot0: ret arm64_lock_incif0: - adrp x3, arm64_atomics - add x3, x3, #:lo12:arm64_atomics + adrp x3, cpuext + add x3, x3, #:lo12:cpuext ldr w3, [x3] - cbnz w3, arm64_atomic_incif0 + tbnz w3, #0, arm64_atomic_incif0 dmb ish 1: ldaxr w1, [x0] diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c index 443d497d..2947ee42 100644 --- a/src/dynarec/arm64/dynarec_arm64_00.c +++ b/src/dynarec/arm64/dynarec_arm64_00.c @@ -1336,7 +1336,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin BFIx(eb1, x4, eb2, 8); } else { addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - if(arm64_atomics) { + if(cpuext.atomics) { SWPALB(x4, x1, ed); } else { MARKLOCK; @@ -1363,7 +1363,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GETGD; addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); if(!ALIGNED_ATOMICxw) { - if(arm64_uscat) { + if(cpuext.uscat) { ANDx_mask(x1, ed, 1, 0, 3); // mask = F CMPSw_U12(x1, 16-(1<<(2+rex.w))); B_MARK(cGT); @@ -1372,7 +1372,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin B_MARK(cNE); } } - if(arm64_atomics) { + if(cpuext.atomics) { SWPALxw(gd, gd, ed); if(!ALIGNED_ATOMICxw) { B_NEXT_nocond; @@ -1396,7 +1396,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MARK2; } SMDMB(); - if(!ALIGNED_ATOMICxw || !arm64_atomics) { + if(!ALIGNED_ATOMICxw || !cpuext.atomics) { MOVxw_REG(gd, x1); } } diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index fa476e6d..8842d43f 100644 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -443,7 +443,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if (BOX64ENV(dynarec_fastround)) { VFCVTZSS(q0, v1); } else { - if(arm64_frintts) { + if(cpuext.frintts) { VFRINT32ZS(q0, v1); VFCVTZSS(q0, q0); } else { @@ -481,7 +481,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VFCVTZSS(q0, q0); } else { u8 = sse_setround(dyn, ninst, x1, x2, x3); - if(arm64_frintts) { + if(cpuext.frintts) { VFRINT32XS(q0, v1); VFCVTZSS(q0, q0); } else { @@ -707,7 +707,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GETEX(q1, 0, 0); v0 = fpu_get_scratch(dyn, ninst); VEORQ(v0, v0, v0); - if(arm64_sha1) { + if(cpuext.sha1) { v1 = fpu_get_scratch(dyn, ninst); VMOVeS(v1, 0, q0, 3); SHA1H(v1, v1); @@ -731,7 +731,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0xCA: INST_NAME("SHA1MSG2 Gx, Ex"); nextop = F8; - if(arm64_sha1) { + if(cpuext.sha1) { GETGX(q0, 1); GETEX(q1, 0, 0); VEXTQ_8(q0, q0, q0, 8); @@ -773,7 +773,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0xCB: INST_NAME("SHA256RNDS2 Gx, Ex (, XMM0)"); nextop = F8; - if(arm64_sha2) { + if(cpuext.sha2) { GETGX(q0, 1); GETEX(q1, 0, 0); d0 = sse_get_reg(dyn, ninst, x1, 0, 0); @@ -819,7 +819,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0xCC: INST_NAME("SHA256MSG1 Gx, Ex"); nextop = F8; - if(arm64_sha2) { + if(cpuext.sha2) { GETGX(q0, 1); GETEX(q1, 0, 0); SHA256SU0(q0, q1); @@ -844,7 +844,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0xCD: INST_NAME("SHA256MSG2 Gx, Ex"); nextop = F8; - if(arm64_sha2) { + if(cpuext.sha2) { GETGX(q0, 1); GETEX(q1, 0, 0); v0 = fpu_get_scratch(dyn, ninst); @@ -931,7 +931,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0xCC: INST_NAME("SHA1RNDS4 Gx, Ex, Ib"); nextop = F8; - if(arm64_sha1) { + if(cpuext.sha1) { GETGX(q0, 1); GETEX(q1, 0, 1); u8 = F8&3; @@ -2587,7 +2587,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOV32w(x1, (1<<F_OF)|(1<<F_SF)|(1<<F_ZF)|(1<<F_PF)|(1<<F_AF)); BICw(xFlags, xFlags, x1); } - if(arm64_rndr) { + if(cpuext.rndr) { MRS_rndr(x1); IFX(X_CF) { CSETw(x3, cNE); } } else { @@ -2646,7 +2646,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOV32w(x1, (1<<F_OF)|(1<<F_SF)|(1<<F_ZF)|(1<<F_PF)|(1<<F_AF)); BICw(xFlags, xFlags, x1); } - if(arm64_rndr) { + if(cpuext.rndr) { MRS_rndr(x1); IFX(X_CF) { CSETw(x3, cNE); } } else { diff --git a/src/dynarec/arm64/dynarec_arm64_66.c b/src/dynarec/arm64/dynarec_arm64_66.c index 5435eafc..30b4ea0e 100644 --- a/src/dynarec/arm64/dynarec_arm64_66.c +++ b/src/dynarec/arm64/dynarec_arm64_66.c @@ -596,7 +596,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!ALIGNED_ATOMICH) { TBNZ_MARK(ed, 0); } - if(arm64_atomics) { + if(cpuext.atomics) { SWPALH(gd, x1, ed); SMDMB(); if(!ALIGNED_ATOMICH) { diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index 68061e6b..d1581b5d 100644 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -229,7 +229,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n VFCVTZSQD(q0, v1); SQXTN_32(q0, q0); } else { - if(arm64_frintts) { + if(cpuext.frintts) { VFRINT32ZDQ(q0, v1); VFCVTZSQD(q0, q0); SQXTN_32(q0, q0); @@ -269,7 +269,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n VFCVTZSS(q0, q0); } else { u8 = sse_setround(dyn, ninst, x1, x2, x3); - if(arm64_frintts) { + if(cpuext.frintts) { VFRINT32XDQ(q0, v1); VFCVTZSQD(q0, q0); SQXTN_32(q0, q0); @@ -765,7 +765,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n case 0xDB: INST_NAME("AESIMC Gx, Ex"); // AES-NI nextop = F8; - if(arm64_aes) { + if(cpuext.aes) { GETEX(q1, 0, 0); GETGX_empty(q0); AESIMC(q0, q1); @@ -783,7 +783,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n case 0xDC: INST_NAME("AESENC Gx, Ex"); // AES-NI nextop = F8; - if(arm64_aes) { + if(cpuext.aes) { GETEX(q1, 0, 0); GETGX(q0, 1); v0 = fpu_get_scratch(dyn, ninst); // ARM64 internal operation differs a bit from x86_64 @@ -808,7 +808,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n case 0xDD: INST_NAME("AESENCLAST Gx, Ex"); // AES-NI nextop = F8; - if(arm64_aes) { + if(cpuext.aes) { GETEX(q1, 0, 0); GETGX(q0, 1); v0 = fpu_get_scratch(dyn, ninst); // ARM64 internal operation differs a bit from x86_64 @@ -832,7 +832,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n case 0xDE: INST_NAME("AESDEC Gx, Ex"); // AES-NI nextop = F8; - if(arm64_aes) { + if(cpuext.aes) { GETEX(q1, 0, 0); GETGX(q0, 1); v0 = fpu_get_scratch(dyn, ninst); // ARM64 internal operation differs a bit from x86_64 @@ -857,7 +857,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n case 0xDF: INST_NAME("AESDECLAST Gx, Ex"); // AES-NI nextop = F8; - if(arm64_aes) { + if(cpuext.aes) { GETEX(q1, 0, 0); GETGX(q0, 1); v0 = fpu_get_scratch(dyn, ninst); // ARM64 internal operation differs a bit from x86_64 @@ -919,7 +919,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n GETGD; IFNATIVE_BEFORE(NF_CF) { if(INVERTED_CARRY_BEFORE) { - if(arm64_flagm) + if(cpuext.flagm) CFINV(); else { MRS_nzcv(x3); @@ -1234,7 +1234,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n case 0x44: INST_NAME("PCLMULQDQ Gx, Ex, Ib"); nextop = F8; - if(arm64_pmull) { + if(cpuext.pmull) { GETGX(q0, 1); GETEX(q1, 0, 1); u8 = F8; @@ -1748,7 +1748,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n x87_restoreround(dyn, ninst, u8); VFCVTZSQS(v0, v0); } else { - if(arm64_frintts) { + if(cpuext.frintts) { u8 = sse_setround(dyn, ninst, x1, x2, x3); VFRINT32XSQ(v0, v1); // handle overflow VFCVTZSQS(v0, v0); @@ -3184,7 +3184,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n VFCVTZSQD(v0, v1); // convert double -> int64 SQXTN_32(v0, v0); // convert int64 -> int32 with saturation in lower part, RaZ high part } else { - if(arm64_frintts) { + if(cpuext.frintts) { VFRINT32ZDQ(v0, v1); // handle overflow VFCVTZSQD(v0, v0); // convert double -> int64 SQXTN_32(v0, v0); // convert int64 -> int32 with saturation in lower part, RaZ high part diff --git a/src/dynarec/arm64/dynarec_arm64_66f0.c b/src/dynarec/arm64/dynarec_arm64_66f0.c index 1a636ce6..ed49e2f8 100644 --- a/src/dynarec/arm64/dynarec_arm64_66f0.c +++ b/src/dynarec/arm64/dynarec_arm64_66f0.c @@ -60,7 +60,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n BFIx(ed, x6, 0, 16); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - if(arm64_atomics) { + if(cpuext.atomics) { UFLAG_IF { LDADDALH(x5, x1, wback); emit_add16(dyn, ninst, x1, x5, x3, x4); @@ -90,7 +90,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n BFIx(ed, x6, 0, 16); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - if(arm64_atomics) { + if(cpuext.atomics) { UFLAG_IF { LDSETALH(x5, x1, wback); emit_or16(dyn, ninst, x1, x5, x3, x4); @@ -128,7 +128,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); if(!ALIGNED_ATOMICH) { - if(arm64_uscat) { + if(cpuext.uscat) { ANDx_mask(x1, wback, 1, 0, 3); // mask = F CMPSw_U12(x1, 0xF); B_MARK3(cGE); @@ -138,7 +138,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } } // Aligned version - if(arm64_atomics) { + if(cpuext.atomics) { MOVw_REG(x1, x6); CASALH(x1, gd, wback); } else { @@ -187,7 +187,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n BFIx(ed, x5, 0, 16); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - if(arm64_atomics) { + if(cpuext.atomics) { LDADDALH(x5, x1, wback); } else { MARKLOCK; @@ -244,7 +244,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n BFIx(ed, x6, 0, 16); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - if(arm64_atomics) { + if(cpuext.atomics) { MVNw_REG(x3, gd); UFLAG_IF { LDCLRALH(x3, x1, wback); @@ -286,7 +286,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n if(opcode==0x81) i16 = F16S; else i16 = F8S; MOV32w(x5, i16); if(!ALIGNED_ATOMICH) { - if(arm64_uscat) { + if(cpuext.uscat) { ANDx_mask(x1, wback, 1, 0, 3); // mask = F CMPSw_U12(x1, 0xF); B_MARK(cGE); @@ -295,7 +295,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n B_MARK(cNE); } } - if(arm64_atomics) { + if(cpuext.atomics) { UFLAG_IF { LDADDALH(x5, x1, wback); } else { @@ -341,7 +341,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?2:1); if(opcode==0x81) i16 = F16S; else i16 = F8S; if(!i64) {MOV32w(x5, i16);} - if(arm64_atomics) { + if(cpuext.atomics) { UFLAG_IF { LDSETALH(x5, x1, wback); emit_or16c(dyn, ninst, x1, i16, x3, x4); @@ -423,7 +423,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?2:1); if(opcode==0x81) i16 = F16S; else i16 = F8S; i64 = convert_bitmask_w(i16); - if(arm64_atomics) { + if(cpuext.atomics) { MOV32w(x5, ~i16); UFLAG_IF { LDCLRALH(x5, x1, wback); @@ -466,7 +466,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n if(opcode==0x81) i16 = F16S; else i16 = F8S; MOV32w(x5, i16); if(!ALIGNED_ATOMICH) { - if(arm64_uscat) { + if(cpuext.uscat) { ANDx_mask(x1, wback, 1, 0, 3); // mask = F CMPSw_U12(x1, 0xF); B_MARK(cGE); @@ -475,7 +475,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n B_MARK(cNE); } } - if(arm64_atomics) { + if(cpuext.atomics) { NEGw_REG(x4, x5); UFLAG_IF { LDADDALH(x4, x1, wback); @@ -522,7 +522,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?2:1); if(opcode==0x81) i16 = F16S; else i16 = F8S; i64 = convert_bitmask_w(i16); - if(arm64_atomics) { + if(cpuext.atomics) { UFLAG_IF { LDEORALH(x5, x1, wback); emit_xor16c(dyn, ninst, x1, i16, x3, x4); @@ -574,7 +574,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n BFIx(ed, x6, 0, 16); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - if(arm64_atomics) { + if(cpuext.atomics) { MOV32w(x3, 1); UFLAG_IF { LDADDALH(x3, x1, wback); @@ -602,7 +602,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n BFIx(ed, x6, 0, 16); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - if(arm64_atomics) { + if(cpuext.atomics) { MOV32w(x3, -1); UFLAG_IF { LDADDALH(x3, x1, wback); diff --git a/src/dynarec/arm64/dynarec_arm64_66f20f.c b/src/dynarec/arm64/dynarec_arm64_66f20f.c index 36b65c47..f4866029 100644 --- a/src/dynarec/arm64/dynarec_arm64_66f20f.c +++ b/src/dynarec/arm64/dynarec_arm64_66f20f.c @@ -57,7 +57,7 @@ uintptr_t dynarec64_66F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETEW(x1, 0); GETGD; - if(arm64_crc32) { + if(cpuext.crc32) { CRC32CH(gd, gd, ed); } else { MOV32w(x2, 0x82f63b78); diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c index 888ff831..b5e1d362 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c @@ -429,7 +429,7 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, INST_NAME("VCVTPS2DQ Gx, Ex"); nextop = F8; u8 = sse_setround(dyn, ninst, x1, x2, x6); - if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) { + if(!BOX64ENV(dynarec_fastround) && !cpuext.frintts) { d0 = fpu_get_scratch(dyn, ninst); d1 = fpu_get_scratch(dyn, ninst); MOVI_32_lsl(d1, 0x80, 3); @@ -440,7 +440,7 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, VFRINTISQ(v0, v1); VFCVTZSQS(v0, v0); } else { - if(arm64_frintts) { + if(cpuext.frintts) { VFRINT32XSQ(v0, v1); VFCVTZSQS(v0, v0); } else { @@ -1694,7 +1694,7 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, else SQXTN2_32(v0, d0); // convert int64 -> int32 with saturation in higher part } else { - if(arm64_frintts) { + if(cpuext.frintts) { VFRINT32ZDQ(l?d0:v0, v1); // handle overflow VFCVTZSQD(l?d0:v0, l?d0:v0); // convert double -> int64 if(!l) diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c index 6447eef4..e65864bf 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c @@ -1874,7 +1874,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip INST_NAME("VAESIMC Gx, Ex"); nextop = F8; GETGX_empty_EX(v0, v1, 0); - if(arm64_aes) { + if(cpuext.aes) { AESIMC(v0, v1); } else { if(v0!=v1) { @@ -1890,7 +1890,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip case 0xDC: INST_NAME("VAESENC Gx, Vx, Ex"); // AES-NI nextop = F8; - if(arm64_aes) { + if(cpuext.aes) { d0 = fpu_get_scratch(dyn, ninst); // ARM64 internal operation differs a bit from x86_64 for(int l=0; l<1+vex.l; ++l) { if(!l) {GETGX_empty_VXEX(v0, v2, v1, 0);} else {GETGY_empty_VYEY(v0, v2, v1);} @@ -1931,7 +1931,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip case 0xDD: INST_NAME("VAESENCLAST Gx, Vx, Ex"); // AES-NI nextop = F8; - if(arm64_aes) { + if(cpuext.aes) { d0 = fpu_get_scratch(dyn, ninst); // ARM64 internal operation differs a bit from x86_64 for(int l=0; l<1+vex.l; ++l) { if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); } @@ -1971,7 +1971,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip case 0xDE: INST_NAME("VAESDEC Gx, Vx, Ex"); // AES-NI nextop = F8; - if(arm64_aes) { + if(cpuext.aes) { d0 = fpu_get_scratch(dyn, ninst); // ARM64 internal operation differs a bit from x86_64 for(int l=0; l<1+vex.l; ++l) { if(!l) {GETGX_empty_VXEX(v0, v2, v1, 0);} else {GETGY_empty_VYEY(v0, v2, v1);} @@ -2012,7 +2012,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip case 0xDF: INST_NAME("VAESDECLAST Gx, Vx, Ex"); // AES-NI nextop = F8; - if(arm64_aes) { + if(cpuext.aes) { d0 = fpu_get_scratch(dyn, ninst); // ARM64 internal operation differs a bit from x86_64 for(int l=0; l<1+vex.l; ++l) { if(!l) {GETGX_empty_VXEX(v0, v2, v1, 0);} else {GETGY_empty_VYEY(v0, v2, v1);} diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c index b0253420..42cdb771 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c @@ -662,7 +662,7 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip case 0x44: INST_NAME("VPCLMULQDQ Gx, Vx, Ex, Ib"); nextop = F8; - if(arm64_pmull) { + if(cpuext.pmull) { d0 = fpu_get_scratch(dyn, ninst); for(int l=0; l<1+vex.l; ++l) { if(!l) { diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c index 79a2b651..d1953a79 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c @@ -144,12 +144,12 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, nextop = F8; GETGD; GETEXSD(q0, 0, 0); - if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) { + if(!BOX64ENV(dynarec_fastround) && !cpuext.frintts) { MRS_fpsr(x5); BFCw(x5, FPSR_IOC, 1); // reset IOC bit MSR_fpsr(x5); } - if(!BOX64ENV(dynarec_fastround) && arm64_frintts) { + if(!BOX64ENV(dynarec_fastround) && cpuext.frintts) { v0 = fpu_get_scratch(dyn, ninst); if(rex.w) { FRINT64ZD(v0, q0); @@ -160,7 +160,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, } else { FCVTZSxwD(gd, q0); } - if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) { + if(!BOX64ENV(dynarec_fastround) && !cpuext.frintts) { MRS_fpsr(x5); // get back FPSR to check the IOC bit TBZ_NEXT(x5, FPSR_IOC); if(rex.w) { @@ -175,14 +175,14 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, nextop = F8; GETGD; GETEXSD(q0, 0, 0); - if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) { + if(!BOX64ENV(dynarec_fastround) && !cpuext.frintts) { MRS_fpsr(x5); BFCw(x5, FPSR_IOC, 1); // reset IOC bit MSR_fpsr(x5); } u8 = sse_setround(dyn, ninst, x1, x2, x3); d1 = fpu_get_scratch(dyn, ninst); - if(!BOX64ENV(dynarec_fastround) && arm64_frintts) { + if(!BOX64ENV(dynarec_fastround) && cpuext.frintts) { if(rex.w) { FRINT64XD(d1, q0); } else { @@ -193,7 +193,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, } x87_restoreround(dyn, ninst, u8); FCVTZSxwD(gd, d1); - if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) { + if(!BOX64ENV(dynarec_fastround) && !cpuext.frintts) { MRS_fpsr(x5); // get back FPSR to check the IOC bit TBZ_NEXT(x5, FPSR_IOC); if(rex.w) { @@ -550,7 +550,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, else SQXTN2_32(v0, d0); // convert int64 -> int32 with saturation in higher part } else { - if(arm64_frintts) { + if(cpuext.frintts) { VFRINT32XDQ(l?d0:v0, v1); // round, handling of overflow and Nan to 0x80000000 VFCVTNSQD(l?d0:v0, l?d0:v0); // convert double -> int64 if(!l) diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c index 12f11648..881fc784 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c @@ -144,12 +144,12 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, nextop = F8; GETGD; GETEXSS(d0, 0, 0); - if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) { + if(!BOX64ENV(dynarec_fastround) && !cpuext.frintts) { MRS_fpsr(x5); BFCw(x5, FPSR_IOC, 1); // reset IOC bit MSR_fpsr(x5); } - if(!BOX64ENV(dynarec_fastround) && arm64_frintts) { + if(!BOX64ENV(dynarec_fastround) && cpuext.frintts) { v0 = fpu_get_scratch(dyn, ninst); if(rex.w) { FRINT64ZS(v0, d0); @@ -160,7 +160,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, } else { FCVTZSxwS(gd, d0); } - if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) { + if(!BOX64ENV(dynarec_fastround) && !cpuext.frintts) { MRS_fpsr(x5); // get back FPSR to check the IOC bit TBZ_NEXT(x5, FPSR_IOC); if(rex.w) { @@ -175,14 +175,14 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, nextop = F8; GETGD; GETEXSS(q0, 0, 0); - if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) { + if(!BOX64ENV(dynarec_fastround) && !cpuext.frintts) { MRS_fpsr(x5); BFCw(x5, FPSR_IOC, 1); // reset IOC bit MSR_fpsr(x5); } u8 = sse_setround(dyn, ninst, x1, x2, x3); d1 = fpu_get_scratch(dyn, ninst); - if(!BOX64ENV(dynarec_fastround) && arm64_frintts) { + if(!BOX64ENV(dynarec_fastround) && cpuext.frintts) { if(rex.w) { FRINT64XS(d1, q0); } else { @@ -193,7 +193,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, } x87_restoreround(dyn, ninst, u8); FCVTZSxwS(gd, d1); - if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) { + if(!BOX64ENV(dynarec_fastround) && !cpuext.frintts) { MRS_fpsr(x5); // get back FPSR to check the IOC bit TBZ_NEXT(x5, FPSR_IOC); if(rex.w) { @@ -342,7 +342,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, if(BOX64ENV(dynarec_fastround)) { VFCVTZSQS(v0, v1); } else { - if(arm64_frintts) { + if(cpuext.frintts) { VFRINT32ZSQ(v0, v1); VFCVTZSQS(v0, v0); } else { diff --git a/src/dynarec/arm64/dynarec_arm64_db.c b/src/dynarec/arm64/dynarec_arm64_db.c index e10f8e4a..2d98f0cb 100644 --- a/src/dynarec/arm64/dynarec_arm64_db.c +++ b/src/dynarec/arm64/dynarec_arm64_db.c @@ -193,7 +193,7 @@ uintptr_t dynarec64_DB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); s0 = fpu_get_scratch(dyn, ninst); - if(arm64_frintts) { + if(cpuext.frintts) { FRINT32ZD(s0, v1); FCVTZSwD(x5, s0); STW(x5, wback, fixedaddress); @@ -223,7 +223,7 @@ uintptr_t dynarec64_DB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin u8 = x87_setround(dyn, ninst, x1, x2, x4); // x1 have the modified RPSCR reg addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); s0 = fpu_get_scratch(dyn, ninst); - if(arm64_frintts) { + if(cpuext.frintts) { FRINT32XD(s0, v1); FCVTZSwD(x5, s0); STW(x5, wback, fixedaddress); @@ -253,7 +253,7 @@ uintptr_t dynarec64_DB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin u8 = x87_setround(dyn, ninst, x1, x2, x4); // x1 have the modified RPSCR reg addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); s0 = fpu_get_scratch(dyn, ninst); - if(arm64_frintts) { + if(cpuext.frintts) { FRINT32XD(s0, v1); FCVTZSwD(x5, s0); STW(x5, wback, fixedaddress); diff --git a/src/dynarec/arm64/dynarec_arm64_dd.c b/src/dynarec/arm64/dynarec_arm64_dd.c index f88c965d..fe640ef3 100644 --- a/src/dynarec/arm64/dynarec_arm64_dd.c +++ b/src/dynarec/arm64/dynarec_arm64_dd.c @@ -150,7 +150,7 @@ uintptr_t dynarec64_DD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VST64(v1, ed, fixedaddress); } else { s0 = fpu_get_scratch(dyn, ninst); - if(arm64_frintts) { + if(cpuext.frintts) { FRINT64ZD(s0, v1); VFCVTZSd(s0, s0); VST64(s0, ed, fixedaddress); diff --git a/src/dynarec/arm64/dynarec_arm64_df.c b/src/dynarec/arm64/dynarec_arm64_df.c index f177fef2..e0e06059 100644 --- a/src/dynarec/arm64/dynarec_arm64_df.c +++ b/src/dynarec/arm64/dynarec_arm64_df.c @@ -354,7 +354,7 @@ uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin B_MARK3(c__); MARK2; } - if(arm64_frintts) { + if(cpuext.frintts) { FRINT64XD(s0, v1); VFCVTZSd(s0, s0); VST64(s0, wback, fixedaddress); diff --git a/src/dynarec/arm64/dynarec_arm64_emit_logic.c b/src/dynarec/arm64/dynarec_arm64_emit_logic.c index ac6b8a3a..404f7d6e 100644 --- a/src/dynarec/arm64/dynarec_arm64_emit_logic.c +++ b/src/dynarec/arm64/dynarec_arm64_emit_logic.c @@ -451,7 +451,7 @@ void emit_and8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF)); BICw_REG(xFlags, xFlags, s3); } - if(arm64_flagm) { + if(cpuext.flagm) { COMP_ZFSF(s1, 8) } else { IFX(X_ZF) { @@ -496,7 +496,7 @@ void emit_and8c(dynarec_arm_t* dyn, int ninst, int s1, uint8_t c, int s3, int s4 MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF)); BICw(xFlags, xFlags, s3); } - if(arm64_flagm) { + if(cpuext.flagm) { COMP_ZFSF(s1, 8) } else { IFX(X_ZF) { @@ -639,7 +639,7 @@ void emit_and16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF)); BICw_REG(xFlags, xFlags, s3); } - if(arm64_flagm) { + if(cpuext.flagm) { COMP_ZFSF(s1, 16) } else { IFX(X_ZF) { @@ -684,7 +684,7 @@ void emit_and16c(dynarec_arm_t* dyn, int ninst, int s1, int16_t c, int s3, int s MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF)); BICw(xFlags, xFlags, s3); } - if(arm64_flagm) { + if(cpuext.flagm) { COMP_ZFSF(s1, 16) } else { IFX(X_ZF) { diff --git a/src/dynarec/arm64/dynarec_arm64_emit_math.c b/src/dynarec/arm64/dynarec_arm64_emit_math.c index 76f951ed..63be6c30 100644 --- a/src/dynarec/arm64/dynarec_arm64_emit_math.c +++ b/src/dynarec/arm64/dynarec_arm64_emit_math.c @@ -991,7 +991,7 @@ void emit_adc32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 } IFNATIVE_BEFORE(NF_CF) { if(INVERTED_CARRY_BEFORE) { - if(arm64_flagm) + if(cpuext.flagm) CFINV(); else { MRS_nzcv(s3); @@ -1134,7 +1134,7 @@ void emit_adc8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) } IFNATIVE_BEFORE(NF_CF) { if(INVERTED_CARRY_BEFORE) { - if(arm64_flagm) + if(cpuext.flagm) CFINV(); else { MRS_nzcv(s3); @@ -1198,7 +1198,7 @@ void emit_adc16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) } IFNATIVE_BEFORE(NF_CF) { if(INVERTED_CARRY_BEFORE) { - if(arm64_flagm) + if(cpuext.flagm) CFINV(); else { MRS_nzcv(s3); @@ -1321,7 +1321,7 @@ void emit_sbb32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 } IFNATIVE_BEFORE(NF_CF) { if(!INVERTED_CARRY_BEFORE) { - if(arm64_flagm) + if(cpuext.flagm) CFINV(); else { MRS_nzcv(s3); @@ -1470,7 +1470,7 @@ void emit_sbb8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) } IFNATIVE_BEFORE(NF_CF) { if(!INVERTED_CARRY_BEFORE) { - if(arm64_flagm) + if(cpuext.flagm) CFINV(); else { MRS_nzcv(s3); @@ -1535,7 +1535,7 @@ void emit_sbb16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4) } IFNATIVE_BEFORE(NF_CF) { if(!INVERTED_CARRY_BEFORE) { - if(arm64_flagm) + if(cpuext.flagm) CFINV(); else { MRS_nzcv(s3); diff --git a/src/dynarec/arm64/dynarec_arm64_f0.c b/src/dynarec/arm64/dynarec_arm64_f0.c index 4a72e98e..69f9a5ef 100644 --- a/src/dynarec/arm64/dynarec_arm64_f0.c +++ b/src/dynarec/arm64/dynarec_arm64_f0.c @@ -72,7 +72,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin BFIx(wback, x1, wb2*8, 8); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - if(arm64_atomics) { + if(cpuext.atomics) { UFLAG_IF { LDADDALB(x2, x1, wback); emit_add8(dyn, ninst, x1, x2, x4, x5); @@ -100,7 +100,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); if(!ALIGNED_ATOMICxw) { - if(arm64_uscat) { + if(cpuext.uscat) { ANDx_mask(x1, wback, 1, 0, 3); // mask = F CMPSw_U12(x1, 16-(1<<(2+rex.w))); B_MARK(cGT); @@ -109,7 +109,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin B_MARK(cNE); } } - if(arm64_atomics) { + if(cpuext.atomics) { UFLAG_IF { LDADDALxw(gd, x1, wback); } else { @@ -163,7 +163,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin BFIx(wback, x1, wb2*8, 8); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - if(arm64_atomics) { + if(cpuext.atomics) { LDSETALB(x2, x1, wback); UFLAG_IF { emit_or8(dyn, ninst, x1, x2, x4, x5); @@ -188,7 +188,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin emit_or32(dyn, ninst, rex, ed, gd, x3, x4); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - if(arm64_atomics) { + if(cpuext.atomics) { LDSETALxw(gd, x1, wback); UFLAG_IF { emit_or32(dyn, ninst, rex, x1, gd, x3, x4); @@ -288,7 +288,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin BFIx(xRAX, x2, 0, 8); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - if(arm64_atomics) { + if(cpuext.atomics) { UFLAG_IF { MOVw_REG(x2, x6); CASALB(x6, gd, wback); @@ -338,7 +338,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); if(!ALIGNED_ATOMICxw) { - if(arm64_uscat) { + if(cpuext.uscat) { ANDx_mask(x1, wback, 1, 0, 3); // mask = F CMPSw_U12(x1, 16-(1<<(2+rex.w))); B_MARK3(cGT); @@ -348,7 +348,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } } // Aligned version - if(arm64_atomics) { + if(cpuext.atomics) { UFLAG_IF { MOVxw_REG(x1, xRAX); CASALxw(x1, gd, wback); @@ -395,7 +395,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin // Common part (and fallback for EAX != Ed) UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5); MOVxw_REG(xRAX, x1);} else { - if(!ALIGNED_ATOMICxw || !arm64_atomics) + if(!ALIGNED_ATOMICxw || !cpuext.atomics) MOVxw_REG(xRAX, x1); // upper par of RAX will be erase on 32bits, no mater what } } @@ -617,7 +617,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin EBBACK; // eb gets x1 (sum) } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - if(arm64_atomics) { + if(cpuext.atomics) { UFLAG_IF { MOVxw_REG(x3, gd); LDADDALB(x3, gd, wback); @@ -663,7 +663,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); if(!ALIGNED_ATOMICxw) { - if(arm64_uscat) { + if(cpuext.uscat) { ANDx_mask(x1, wback, 1, 0, 3); // mask = F CMPSw_U12(x1, 16-(1<<(2+rex.w))); B_MARK(cGT); @@ -672,7 +672,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin B_MARK(cNE); // unaligned } } - if(arm64_atomics) { + if(cpuext.atomics) { UFLAG_IF { LDADDALxw(gd, x1, wback); } else { @@ -690,7 +690,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin UFLAG_IF { B_MARK2_nocond; } else { - if(!arm64_atomics) MOVxw_REG(gd, x1); + if(!cpuext.atomics) MOVxw_REG(gd, x1); B_NEXT_nocond; } MARK; @@ -709,7 +709,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOVxw_REG(x3, x1); emit_add32(dyn, ninst, rex, x3, gd, x4, x5); MOVxw_REG(gd, x1); - } else if(!arm64_atomics || !ALIGNED_ATOMICxw) { + } else if(!cpuext.atomics || !ALIGNED_ATOMICxw) { MOVxw_REG(gd, x1); } } @@ -732,7 +732,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_ZF, SF_SUBSET); addr = geted(dyn, addr, ninst, nextop, &wback, x1, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); if(!ALIGNED_ATOMICxw) { - if(arm64_uscat) { + if(cpuext.uscat) { if(rex.w) { TSTx_mask(wback, 1, 0, 3); B_MARK2(cNE); @@ -746,7 +746,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin B_MARK2(cNE); // unaligned } } - if(arm64_atomics) { + if(cpuext.atomics) { MOVx_REG(x2, xRAX); MOVx_REG(x3, xRDX); MOVx_REG(x4, xRBX); @@ -886,7 +886,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); GETGB(x5); - if(arm64_atomics) { + if(cpuext.atomics) { MVNxw_REG(x1, gd); UFLAG_IF { LDCLRALB(x1, x1, wback); @@ -914,7 +914,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin emit_and32(dyn, ninst, rex, ed, gd, x3, x4); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - if(arm64_atomics) { + if(cpuext.atomics) { MVNxw_REG(x1, gd); UFLAG_IF { LDCLRALxw(x1, x1, wback); @@ -944,7 +944,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); if(!ALIGNED_ATOMICxw) { - if(arm64_uscat) { + if(cpuext.uscat) { ANDx_mask(x1, wback, 1, 0, 3); // mask = F CMPSw_U12(x1, 16-(1<<(2+rex.w))); B_MARK(cGT); @@ -953,7 +953,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin B_MARK(cNE); } } - if(arm64_atomics && 0) { // disabled because 0x80000000 has no negative + if(cpuext.atomics && 0) { // disabled because 0x80000000 has no negative NEGxw_REG(x1, gd); UFLAG_IF { LDADDALxw(x1, x1, wback); @@ -1003,7 +1003,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); if(!ALIGNED_ATOMICxw) { - if(arm64_uscat) { + if(cpuext.uscat) { ANDx_mask(x1, wback, 1, 0, 3); // mask = F CMPSw_U12(x1, 16-(1<<(2+rex.w))); B_MARK(cGT); @@ -1012,7 +1012,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin B_MARK(cNE); } } - if(arm64_atomics) { + if(cpuext.atomics) { UFLAG_IF { LDEORALxw(gd, x1, wback); } else { @@ -1065,7 +1065,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 1); u8 = F8; wb1 = 1; - if(arm64_atomics) { + if(cpuext.atomics) { MOV32w(x2, u8); UFLAG_IF { LDADDALB(x2, x1, wback); @@ -1097,7 +1097,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 1); u8 = F8; wb1 = 1; - if(arm64_atomics) { + if(cpuext.atomics) { MOV32w(x2, u8); UFLAG_IF { LDSETALB(x2, x1, wback); @@ -1172,7 +1172,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 1); u8 = F8; wb1 = 1; - if(arm64_atomics) { + if(cpuext.atomics) { MOV32w(x2, ~u8); UFLAG_IF { LDCLRALB(x2, x1, wback); @@ -1203,7 +1203,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 1); u8 = F8; wb1 = 1; - if(arm64_atomics) { + if(cpuext.atomics) { MOV32w(x2, -u8); UFLAG_IF { LDADDALB(x2, x1, wback); @@ -1234,7 +1234,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 1); u8 = F8; wb1 = 1; - if(arm64_atomics) { + if(cpuext.atomics) { MOV32w(x2, u8); UFLAG_IF { LDEORALB(x2, x1, wback); @@ -1291,7 +1291,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOV64xw(x5, i64); } if(!ALIGNED_ATOMICxw) { - if(arm64_uscat) { + if(cpuext.uscat) { ANDx_mask(x1, wback, 1, 0, 3); // mask = F CMPSw_U12(x1, 16-(1<<(2+rex.w))); B_MARK(cGT); @@ -1300,7 +1300,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin B_MARK(cNE); } } - if(arm64_atomics) { + if(cpuext.atomics) { if((i64>-0x1000) && (i64<0x1000)) { MOV64xw(x5, i64); } @@ -1367,7 +1367,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin // this is __faststorefence DMB_ST(); } else { - if(arm64_atomics) { + if(cpuext.atomics) { MOV64xw(x5, i64); UFLAG_IF { LDSETALxw(x5, x1, wback); @@ -1439,7 +1439,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1); if(opcode==0x81) i64 = F32S; else i64 = F8S; - if(arm64_atomics) { + if(cpuext.atomics) { MOV64xw(x5, ~i64); UFLAG_IF { LDCLRALxw(x5, x1, wback); @@ -1472,7 +1472,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOV64xw(x5, i64); } if(!ALIGNED_ATOMICxw) { - if(arm64_uscat) { + if(cpuext.uscat) { ANDx_mask(x1, wback, 1, 0, 3); // mask = F CMPSw_U12(x1, 16-(1<<(2+rex.w))); B_MARK(cGT); @@ -1481,7 +1481,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin B_MARK(cNE); } } - if(arm64_atomics) { + if(cpuext.atomics) { if((i64>-0x1000) && (i64<0x1000)) { MOV64xw(x5, -i64); } else { @@ -1553,7 +1553,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1); if(opcode==0x81) i64 = F32S; else i64 = F8S; - if(arm64_atomics) { + if(cpuext.atomics) { MOV64xw(x5, i64); UFLAG_IF { LDEORALxw(x5, x1, wback); @@ -1609,7 +1609,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { GETGB(x4); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - if(arm64_atomics) { + if(cpuext.atomics) { SWPALB(x4, x1, ed); } else { MARKLOCK; @@ -1636,7 +1636,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GETGD; addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); if(!ALIGNED_ATOMICxw) { - if(arm64_uscat) { + if(cpuext.uscat) { ANDx_mask(x1, ed, 1, 0, 3); // mask = F CMPSw_U12(x1, 16-(1<<(2+rex.w))); B_MARK(cGT); @@ -1645,7 +1645,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin B_MARK(cNE); } } - if(arm64_atomics) { + if(cpuext.atomics) { SWPALxw(gd, gd, ed); if(!ALIGNED_ATOMICxw) { B_NEXT_nocond; @@ -1671,7 +1671,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MARK2; } SMDMB(); - if(!ALIGNED_ATOMICxw || !arm64_atomics) { + if(!ALIGNED_ATOMICxw || !cpuext.atomics) { MOVxw_REG(gd, x1); } } @@ -1697,7 +1697,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin EBBACK; } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - if(arm64_atomics) { + if(cpuext.atomics) { MOV32w(x1, 0xff); STEORLB(x1, wback); } else { @@ -1725,7 +1725,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin EBBACK; } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - if(arm64_atomics) { + if(cpuext.atomics) { MOV64x(x1, ~0LL); STEORLxw(x1, wback); } else { @@ -1757,7 +1757,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); MARKLOCK; - if(arm64_atomics) { + if(cpuext.atomics) { MOV32w(x3, 1); UFLAG_IF { LDADDALB(x3, x1, wback); @@ -1783,7 +1783,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin EBBACK; } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - if(arm64_atomics) { + if(cpuext.atomics) { MOV32w(x3, -1); UFLAG_IF { LDADDALB(x3, x1, wback); @@ -1818,7 +1818,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); if(!ALIGNED_ATOMICxw) { - if(arm64_uscat) { + if(cpuext.uscat) { ANDx_mask(x1, wback, 1, 0, 3); // mask = F CMPSw_U12(x1, 16-(1<<(2+rex.w))); B_MARK(cGT); @@ -1827,7 +1827,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin B_MARK(cNE); // unaligned } } - if(arm64_atomics) { + if(cpuext.atomics) { MOV32w(x3, 1); UFLAG_IF { LDADDALxw(x3, x1, wback); @@ -1874,7 +1874,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); if(!ALIGNED_ATOMICxw) { - if(arm64_uscat) { + if(cpuext.uscat) { ANDx_mask(x1, wback, 1, 0, 3); // mask = F CMPSw_U12(x1, 16-(1<<(2+rex.w))); B_MARK(cGT); @@ -1883,7 +1883,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin B_MARK(cNE); // unaligned } } - if(arm64_atomics) { + if(cpuext.atomics) { MOV64xw(x3, -1); UFLAG_IF { LDADDALxw(x3, x1, wback); diff --git a/src/dynarec/arm64/dynarec_arm64_f20f.c b/src/dynarec/arm64/dynarec_arm64_f20f.c index 8428538d..6e13def9 100644 --- a/src/dynarec/arm64/dynarec_arm64_f20f.c +++ b/src/dynarec/arm64/dynarec_arm64_f20f.c @@ -118,12 +118,12 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; GETGD; GETEXSD(q0, 0, 0); - if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) { + if(!BOX64ENV(dynarec_fastround) && !cpuext.frintts) { MRS_fpsr(x5); BFCw(x5, FPSR_IOC, 1); // reset IOC bit MSR_fpsr(x5); } - if(!BOX64ENV(dynarec_fastround) && arm64_frintts) { + if(!BOX64ENV(dynarec_fastround) && cpuext.frintts) { v0 = fpu_get_scratch(dyn, ninst); if(rex.w) { FRINT64ZD(v0, q0); @@ -134,7 +134,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } else { FCVTZSxwD(gd, q0); } - if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) { + if(!BOX64ENV(dynarec_fastround) && !cpuext.frintts) { MRS_fpsr(x5); // get back FPSR to check the IOC bit TBZ_NEXT(x5, FPSR_IOC); if(rex.w) { @@ -149,14 +149,14 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; GETGD; GETEXSD(q0, 0, 0); - if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) { + if(!BOX64ENV(dynarec_fastround) && !cpuext.frintts) { MRS_fpsr(x5); BFCw(x5, FPSR_IOC, 1); // reset IOC bit MSR_fpsr(x5); } u8 = sse_setround(dyn, ninst, x1, x2, x3); d1 = fpu_get_scratch(dyn, ninst); - if(!BOX64ENV(dynarec_fastround) && arm64_frintts) { + if(!BOX64ENV(dynarec_fastround) && cpuext.frintts) { if(rex.w) { FRINT64XD(d1, q0); } else { @@ -167,7 +167,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } x87_restoreround(dyn, ninst, u8); FCVTZSxwD(gd, d1); - if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) { + if(!BOX64ENV(dynarec_fastround) && !cpuext.frintts) { MRS_fpsr(x5); // get back FPSR to check the IOC bit TBZ_NEXT(x5, FPSR_IOC); if(rex.w) { @@ -187,7 +187,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; GETEB(x1, 0); GETGD; - if(arm64_crc32) { + if(cpuext.crc32) { CRC32CB(gd, gd, ed); } else { EORw_REG(gd, gd, ed); @@ -204,7 +204,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; GETED(0); GETGD; - if(arm64_crc32) { + if(cpuext.crc32) { CRC32Cxw(gd, gd, ed); } else { MOV32w(x2, 0x82f63b78); @@ -551,7 +551,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n SQXTN_32(v0, v0); // convert int64 -> int32 with saturation in lower part, RaZ high part } else { u8 = sse_setround(dyn, ninst, x1, x2, x3); - if(arm64_frintts) { + if(cpuext.frintts) { VFRINT32XDQ(v0, v1); // round, handling of overflow and Nan to 0x80000000 VFCVTNSQD(v0, v0); // convert double -> int64 SQXTN_32(v0, v0); // convert int64 -> int32 with saturation in lower part, RaZ high part diff --git a/src/dynarec/arm64/dynarec_arm64_f30f.c b/src/dynarec/arm64/dynarec_arm64_f30f.c index 2531a8d6..19d054e3 100644 --- a/src/dynarec/arm64/dynarec_arm64_f30f.c +++ b/src/dynarec/arm64/dynarec_arm64_f30f.c @@ -119,12 +119,12 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; GETGD; GETEXSS(d0, 0, 0); - if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) { + if(!BOX64ENV(dynarec_fastround) && !cpuext.frintts) { MRS_fpsr(x5); BFCw(x5, FPSR_IOC, 1); // reset IOC bit MSR_fpsr(x5); } - if(!BOX64ENV(dynarec_fastround) && arm64_frintts) { + if(!BOX64ENV(dynarec_fastround) && cpuext.frintts) { v0 = fpu_get_scratch(dyn, ninst); if(rex.w) { FRINT64ZS(v0, d0); @@ -135,7 +135,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } else { FCVTZSxwS(gd, d0); } - if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) { + if(!BOX64ENV(dynarec_fastround) && !cpuext.frintts) { MRS_fpsr(x5); // get back FPSR to check the IOC bit TBZ_NEXT(x5, FPSR_IOC); if(rex.w) { @@ -150,14 +150,14 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; GETGD; GETEXSS(q0, 0, 0); - if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) { + if(!BOX64ENV(dynarec_fastround) && !cpuext.frintts) { MRS_fpsr(x5); BFCw(x5, FPSR_IOC, 1); // reset IOC bit MSR_fpsr(x5); } u8 = sse_setround(dyn, ninst, x1, x2, x3); d1 = fpu_get_scratch(dyn, ninst); - if(!BOX64ENV(dynarec_fastround) && arm64_frintts) { + if(!BOX64ENV(dynarec_fastround) && cpuext.frintts) { if(rex.w) { FRINT64XS(d1, q0); } else { @@ -168,7 +168,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } x87_restoreround(dyn, ninst, u8); FCVTZSxwS(gd, d1); - if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) { + if(!BOX64ENV(dynarec_fastround) && !cpuext.frintts) { MRS_fpsr(x5); // get back FPSR to check the IOC bit TBZ_NEXT(x5, FPSR_IOC); if(rex.w) { @@ -322,7 +322,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n if(BOX64ENV(dynarec_fastround)) { VFCVTZSQS(v0, v1); } else { - if(arm64_frintts) { + if(cpuext.frintts) { VFRINT32ZSQ(v0, v1); VFCVTZSQS(v0, v0); } else { diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c index 0db1700b..656ee7ad 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.c +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -2593,7 +2593,7 @@ static void nativeFlagsTransform(dynarec_arm_t* dyn, int ninst, int s1, int s2) } // special case for NF_CF changing state if((flags_before&NF_CF) && (flags_after&NF_CF) && (nc_before!=nc_after)) { - if(arm64_flagm && !mrs) { + if(cpuext.flagm && !mrs) { CFINV(); } else { GO_MRS(s2); diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index f0f26b8d..50c1279a 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -943,7 +943,7 @@ #define GEN_INVERTED_CARRY() #endif #ifndef INVERT_CARRY -#define INVERT_CARRY(A) if(dyn->insts[ninst].normal_carry) {if(arm64_flagm) CFINV(); else {MRS_nzcv(A); EORx_mask(A, A, 1, 35, 0); MSR_nzcv(A);}} +#define INVERT_CARRY(A) if(dyn->insts[ninst].normal_carry) {if(cpuext.flagm) CFINV(); else {MRS_nzcv(A); EORx_mask(A, A, 1, 35, 0); MSR_nzcv(A);}} #endif // Generate FCOM with s1 and s2 scratch regs (the VCMP is already done) @@ -1953,7 +1953,7 @@ uintptr_t dynarec64_AVX_F3_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip #define COMP_ZFSF(s1, A) \ IFX(X_ZF|X_SF) { \ - if(arm64_flagm) { \ + if(cpuext.flagm) { \ SETF##A(s1); \ IFX(X_ZF) { \ IFNATIVE(NF_EQ) {} else { \ diff --git a/src/dynarec/dynarec_arch.h b/src/dynarec/dynarec_arch.h index 152d69f7..74d8a6f1 100644 --- a/src/dynarec/dynarec_arch.h +++ b/src/dynarec/dynarec_arch.h @@ -33,7 +33,7 @@ #define STOP_NATIVE_FLAGS(A, B) A->insts[B].nat_flags_op = NAT_FLAG_OP_UNUSABLE #define ARCH_UNALIGNED(A, B) arch_unaligned(A, B) extern uint32_t arm64_crc(void* p, uint32_t len); -#define ARCH_CRC(A, B) if(arm64_crc32) return arm64_crc(A, B) +#define ARCH_CRC(A, B) if(cpuext.crc32) return arm64_crc(A, B) #define ARCH_NOP 0b11010101000000110010000000011111 #define ARCH_UDF 0xcafe diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c index 4811bbac..20b7a597 100644 --- a/src/dynarec/la64/dynarec_la64_00.c +++ b/src/dynarec/la64/dynarec_la64_00.c @@ -700,7 +700,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni BARRIER(BARRIER_MAYBE); \ JUMP(addr + i8, 1); \ if (!dyn->insts[ninst].nat_flags_fusion) { \ - if (la64_lbt) { \ + if (cpuext.lbt) { \ X64_SETJ(tmp1, I); \ } else { \ GETFLAGS; \ @@ -712,7 +712,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (dyn->insts[ninst].nat_flags_fusion) { \ NATIVEJUMP_safe(NATNO, i32); \ } else { \ - if (la64_lbt) \ + if (cpuext.lbt) \ BEQZ_safe(tmp1, i32); \ else \ B##NO##_safe(tmp1, i32); \ @@ -732,7 +732,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (dyn->insts[ninst].nat_flags_fusion) { \ NATIVEJUMP_safe(NATYES, i32); \ } else { \ - if (la64_lbt) \ + if (cpuext.lbt) \ BNEZ_safe(tmp1, i32); \ else \ B##YES##_safe(tmp1, i32); \ @@ -980,7 +980,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { GETGB(x3); addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0); - if (la64_lam_bh) { + if (cpuext.lam_bh) { AMSWAP_DB_B(x1, gd, ed); BSTRINS_D(gb1, x1, gb2 + 7, gb2); } else { @@ -2289,7 +2289,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni READFLAGS(X_ZF); i8 = F8S; ADDI_D(xRCX, xRCX, -1); - if (la64_lbt) + if (cpuext.lbt) X64_GET_EFLAGS(x1, X_ZF); else ANDI(x1, xFlags, 1 << F_ZF); @@ -2301,7 +2301,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni READFLAGS(X_ZF); i8 = F8S; ADDI_D(xRCX, xRCX, -1); - if (la64_lbt) + if (cpuext.lbt) X64_GET_EFLAGS(x1, X_ZF); else ANDI(x1, xFlags, 1 << F_ZF); @@ -2508,7 +2508,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("CMC"); READFLAGS(X_CF); SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); - if (la64_lbt) { + if (cpuext.lbt) { X64_GET_EFLAGS(x3, X_CF); XORI(x3, x3, 1 << F_CF); X64_SET_EFLAGS(x3, X_CF); @@ -2739,7 +2739,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("CLC"); SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); - if (la64_lbt) + if (cpuext.lbt) X64_SET_EFLAGS(xZR, X_CF); else BSTRINS_D(xFlags, xZR, F_CF, F_CF); @@ -2748,7 +2748,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("STC"); SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION); SET_DFNONE(); - if (la64_lbt) { + if (cpuext.lbt) { ORI(x3, xZR, 1 << F_CF); X64_SET_EFLAGS(x3, X_CF); } else { diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c index d3f57a32..18c27852 100644 --- a/src/dynarec/la64/dynarec_la64_0f.c +++ b/src/dynarec/la64/dynarec_la64_0f.c @@ -733,7 +733,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni #define GO(GETFLAGS, NO, YES, NATNO, NATYES, F, I) \ READFLAGS_FUSION(F, x1, x2, x3, x4, x5); \ if (!dyn->insts[ninst].nat_flags_fusion) { \ - if (la64_lbt) { \ + if (cpuext.lbt) { \ X64_SETJ(tmp1, I); \ } else { \ GETFLAGS; \ @@ -746,7 +746,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (dyn->insts[ninst].nat_flags_fusion) { \ NATIVEJUMP(NATNO, 8); \ } else { \ - if (la64_lbt) \ + if (cpuext.lbt) \ BEQZ(tmp1, 8); \ else \ B##NO(tmp1, 8); \ @@ -758,7 +758,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (dyn->insts[ninst].nat_flags_fusion) { \ NATIVEJUMP(NATNO, 8); \ } else { \ - if (la64_lbt) \ + if (cpuext.lbt) \ BEQZ(tmp1, 8); \ else \ B##NO(tmp1, 8); \ @@ -1277,7 +1277,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni BARRIER(BARRIER_MAYBE); \ JUMP(j64, 1); \ if (!dyn->insts[ninst].nat_flags_fusion) { \ - if (la64_lbt) { \ + if (cpuext.lbt) { \ X64_SETJ(x1, I); \ } else { \ GETFLAGS; \ @@ -1289,7 +1289,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (dyn->insts[ninst].nat_flags_fusion) { \ NATIVEJUMP_safe(NATNO, i32); \ } else { \ - if (la64_lbt) \ + if (cpuext.lbt) \ BEQZ_safe(x1, i32); \ else \ B##NO##_safe(x1, i32); \ @@ -1309,7 +1309,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (dyn->insts[ninst].nat_flags_fusion) { \ NATIVEJUMP_safe(NATYES, i32); \ } else { \ - if (la64_lbt) \ + if (cpuext.lbt) \ BNEZ_safe(tmp1, i32); \ else \ B##YES##_safe(tmp1, i32); \ @@ -1325,7 +1325,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni READFLAGS(F); \ tmp1 = x1; \ tmp3 = x3; \ - if (la64_lbt) { \ + if (cpuext.lbt) { \ X64_SETJ(x3, I); \ } else { \ GETFLAGS; \ @@ -1379,7 +1379,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (X_CF) { ANDI(x2, gd, rex.w ? 0x3f : 0x1f); SRLxw(x4, ed, x2); - if (la64_lbt) + if (cpuext.lbt) X64_SET_EFLAGS(x4, X_CF); else BSTRINS_D(xFlags, x4, F_CF, F_CF); @@ -1421,7 +1421,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ANDI(x2, gd, rex.w ? 0x3f : 0x1f); IFX (X_CF) { SRL_D(x4, ed, x2); - if (la64_lbt) { + if (cpuext.lbt) { X64_SET_EFLAGS(x4, X_CF); } else { BSTRINS_D(xFlags, x4, F_CF, F_CF); @@ -1552,7 +1552,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (rex.w) { // 64bits imul UFLAG_IF { - if (la64_lbt) { + if (cpuext.lbt) { X64_MUL_D(gd, ed); } MULH_D(x3, gd, ed); @@ -1564,7 +1564,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { SET_DFNONE(); } - IFXA (X_CF | X_OF, !la64_lbt) { + IFXA (X_CF | X_OF, !cpuext.lbt) { SRAI_D(x4, gd, 63); XOR(x3, x3, x4); SNEZ(x3, x3); @@ -1581,7 +1581,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { // 32bits imul UFLAG_IF { - if (la64_lbt) { + if (cpuext.lbt) { X64_MUL_W(gd, ed); } SLLI_W(gd, gd, 0); @@ -1596,7 +1596,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else IFX (X_CF | X_OF) { SET_DFNONE(); } - IFXA (X_CF | X_OF, !la64_lbt) { + IFXA (X_CF | X_OF, !cpuext.lbt) { SRAI_W(x4, gd, 31); SUB_D(x3, x3, x4); SNEZ(x3, x3); @@ -1689,7 +1689,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni u8 = F8; u8 &= rex.w ? 0x3f : 0x1f; BSTRPICK_D(x4, ed, u8, u8); - if (la64_lbt) + if (cpuext.lbt) X64_SET_EFLAGS(x4, X_CF); else BSTRINS_D(xFlags, x4, 0, 0); @@ -1702,7 +1702,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni u8 = F8; u8 &= (rex.w ? 0x3f : 0x1f); BSTRPICK_D(x4, ed, u8, u8); - if (la64_lbt) + if (cpuext.lbt) X64_SET_EFLAGS(x4, X_CF); else BSTRINS_D(xFlags, x4, 0, 0); @@ -1723,7 +1723,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni u8 = F8; u8 &= (rex.w ? 0x3f : 0x1f); BSTRPICK_D(x4, ed, u8, u8); - if (la64_lbt) + if (cpuext.lbt) X64_SET_EFLAGS(x4, X_CF); else BSTRINS_D(xFlags, x4, 0, 0); @@ -1781,7 +1781,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } ANDI(x2, gd, rex.w ? 0x3f : 0x1f); SRL_D(x4, ed, x2); - if (la64_lbt) + if (cpuext.lbt) X64_SET_EFLAGS(x4, X_CF); else BSTRINS_D(xFlags, x4, F_CF, F_CF); @@ -1807,7 +1807,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ed = x4; } BNE_MARK(ed, xZR); - if (la64_lbt) { + if (cpuext.lbt) { ADDI_D(x3, xZR, 1 << F_ZF); X64_SET_EFLAGS(x3, X_ZF); } else { @@ -1820,7 +1820,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni CTZ_D(gd, ed); else CTZ_W(gd, ed); - if (la64_lbt) { + if (cpuext.lbt) { X64_SET_EFLAGS(xZR, X_ZF); } else { BSTRINS_D(xFlags, xZR, F_ZF, F_ZF); @@ -1838,7 +1838,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ed = x4; } BNE_MARK(ed, xZR); - if (la64_lbt) { + if (cpuext.lbt) { ADDI_D(x3, xZR, 1 << F_ZF); X64_SET_EFLAGS(x3, X_ZF); } else { @@ -1846,7 +1846,7 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } B_NEXT_nocond; MARK; - if (la64_lbt) { + if (cpuext.lbt) { X64_SET_EFLAGS(xZR, X_ZF); } else { BSTRINS_D(xFlags, xZR, F_ZF, F_ZF); diff --git a/src/dynarec/la64/dynarec_la64_64.c b/src/dynarec/la64/dynarec_la64_64.c index 0010e0da..906e8fe7 100644 --- a/src/dynarec/la64/dynarec_la64_64.c +++ b/src/dynarec/la64/dynarec_la64_64.c @@ -252,10 +252,10 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni else i64 = F8S; MOV64xw(x5, i64); - IFXA (X_ALL, !la64_lbt) + IFXA (X_ALL, !cpuext.lbt) ST_D(x6, xEmu, offsetof(x64emu_t, scratch)); emit_adc32(dyn, ninst, rex, ed, x5, x3, x4, x6, x7); - IFXA (X_ALL, !la64_lbt) { + IFXA (X_ALL, !cpuext.lbt) { LD_D(x6, xEmu, offsetof(x64emu_t, scratch)); } WBACKO(x6); diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c index c43efe56..8485057b 100644 --- a/src/dynarec/la64/dynarec_la64_660f.c +++ b/src/dynarec/la64/dynarec_la64_660f.c @@ -525,7 +525,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); GETGX(q0, 0); GETEX(q1, 0, 0); - if (!la64_lbt) { + if (!cpuext.lbt) { CLEAR_FLAGS(x3); } else IFX (X_ALL) { X64_SET_EFLAGS(xZR, X_ALL); @@ -536,7 +536,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int VAND_V(v0, q1, q0); VSETEQZ_V(fcc0, v0); BCEQZ_MARK(fcc0); - if (la64_lbt) { + if (cpuext.lbt) { ADDI_D(x3, xZR, 1 << F_ZF); X64_SET_EFLAGS(x3, X_ZF); } else { @@ -548,7 +548,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int VANDN_V(v0, q0, q1); VSETEQZ_V(fcc0, v0); BCEQZ_MARK2(fcc0); - if (la64_lbt) { + if (cpuext.lbt) { ADDI_D(x3, xZR, 1 << F_CF); X64_SET_EFLAGS(x3, X_ZF); } else { @@ -1361,7 +1361,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int READFLAGS(F); \ tmp1 = x1; \ tmp3 = x3; \ - if (la64_lbt) { \ + if (cpuext.lbt) { \ X64_SETJ(x1, I); \ } else { \ GETFLAGS; \ @@ -1375,7 +1375,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int LD_HU(x4, ed, fixedaddress); \ ed = x4; \ } \ - if (la64_lbt) \ + if (cpuext.lbt) \ BEQZ(x1, 4 + 4); \ else \ B##NO(x1, 4 + 4); \ @@ -2047,7 +2047,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int GETGW(x2); ANDI(gd, gd, 15); SRL_D(x4, ed, gd); - if (la64_lbt) + if (cpuext.lbt) X64_SET_EFLAGS(x4, X_CF); else BSTRINS_D(xFlags, x4, F_CF, F_CF); @@ -2097,7 +2097,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int u8 &= rex.w ? 0x3f : 15; IFX (X_CF) { BSTRPICK_D(x3, ed, u8, u8); - if (la64_lbt) { + if (cpuext.lbt) { X64_SET_EFLAGS(x3, X_CF); } else { BSTRINS_D(xFlags, x3, F_CF, F_CF); diff --git a/src/dynarec/la64/dynarec_la64_66f0.c b/src/dynarec/la64/dynarec_la64_66f0.c index 1031ebc8..a1765946 100644 --- a/src/dynarec/la64/dynarec_la64_66f0.c +++ b/src/dynarec/la64/dynarec_la64_66f0.c @@ -80,7 +80,7 @@ uintptr_t dynarec64_66F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int u64 = (uint16_t)(int16_t)F8S; MOV64x(x5, u64); - if (la64_lam_bh) { + if (cpuext.lam_bh) { AMADD_DB_H(x1, x5, wback); } else { ANDI(x3, wback, 0b10); diff --git a/src/dynarec/la64/dynarec_la64_66f30f.c b/src/dynarec/la64/dynarec_la64_66f30f.c index 4d46c8ba..f5446466 100644 --- a/src/dynarec/la64/dynarec_la64_66f30f.c +++ b/src/dynarec/la64/dynarec_la64_66f30f.c @@ -59,7 +59,7 @@ uintptr_t dynarec64_66F30F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, in VPCNT_H(v0, v0); VPICKVE2GR_HU(gd, v0, 0); IFX (X_ALL) { - if (la64_lbt) { + if (cpuext.lbt) { X64_SET_EFLAGS(xZR, X_ALL); BNEZ_MARK(gd); ADDI_D(x5, xZR, 1 << F_ZF); diff --git a/src/dynarec/la64/dynarec_la64_emit_logic.c b/src/dynarec/la64/dynarec_la64_emit_logic.c index c7a7f203..d11ec8e9 100644 --- a/src/dynarec/la64/dynarec_la64_emit_logic.c +++ b/src/dynarec/la64/dynarec_la64_emit_logic.c @@ -30,7 +30,7 @@ void emit_xor8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) SET_DFNONE(); } - IFXA (X_ALL, la64_lbt) { + IFXA (X_ALL, cpuext.lbt) { X64_XOR_B(s1, s2); } @@ -41,7 +41,7 @@ void emit_xor8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) ST_B(s1, xEmu, offsetof(x64emu_t, res)); } - if (la64_lbt) return; + if (cpuext.lbt) return; CLEAR_FLAGS(s3); IFX (X_SF) { @@ -68,7 +68,7 @@ void emit_xor8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s3, int s SET_DFNONE(); } - if (la64_lbt) { + if (cpuext.lbt) { IFX (X_ALL) { ADDI_D(s3, xZR, c & 0xff); X64_XOR_B(s1, s3); @@ -111,7 +111,7 @@ void emit_xor16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, } - IFXA (X_ALL, la64_lbt) { + IFXA (X_ALL, cpuext.lbt) { X64_XOR_W(s1, s2); } @@ -122,7 +122,7 @@ void emit_xor16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, ST_H(s1, xEmu, offsetof(x64emu_t, res)); } - if (la64_lbt) return; + if (cpuext.lbt) return; CLEAR_FLAGS(s3); IFX (X_ZF | X_SF) { @@ -151,7 +151,7 @@ void emit_xor32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SET_DFNONE(); } - if (la64_lbt) { + if (cpuext.lbt) { IFX(X_ALL) { if (rex.w) X64_XOR_D(s1, s2); @@ -203,7 +203,7 @@ void emit_xor32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i SET_DFNONE(); } - if (la64_lbt) { + if (cpuext.lbt) { IFX (X_ALL) { MOV64xw(s3, c); if (rex.w) @@ -270,7 +270,7 @@ void emit_and8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) SET_DFNONE(); } - IFXA(X_ALL, la64_lbt) { + IFXA(X_ALL, cpuext.lbt) { X64_AND_B(s1, s2); } @@ -280,7 +280,7 @@ void emit_and8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) ST_B(s1, xEmu, offsetof(x64emu_t, res)); } - if (la64_lbt) return; + if (cpuext.lbt) return; CLEAR_FLAGS(s3); IFX(X_SF) { @@ -309,7 +309,7 @@ void emit_and8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s3, int s } - IFXA(X_ALL, la64_lbt) { + IFXA(X_ALL, cpuext.lbt) { MOV32w(s3, c); X64_AND_B(s1, s3); } @@ -320,7 +320,7 @@ void emit_and8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s3, int s ST_D(s1, xEmu, offsetof(x64emu_t, res)); } - if (la64_lbt) return; + if (cpuext.lbt) return; CLEAR_FLAGS(s3); IFX(X_SF) { @@ -346,7 +346,7 @@ void emit_and16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) SET_DFNONE(); } - IFXA (X_ALL, la64_lbt) { + IFXA (X_ALL, cpuext.lbt) { X64_AND_W(s1, s2); } @@ -357,7 +357,7 @@ void emit_and16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) ST_H(s1, xEmu, offsetof(x64emu_t, res)); } - if (la64_lbt) return; + if (cpuext.lbt) return; CLEAR_FLAGS(s3); IFX (X_SF) { @@ -385,7 +385,7 @@ void emit_and32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s } - IFXA(X_ALL, la64_lbt) { + IFXA(X_ALL, cpuext.lbt) { if (rex.w) X64_AND_D(s1, s2); else @@ -399,7 +399,7 @@ void emit_and32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SDxw(s1, xEmu, offsetof(x64emu_t, res)); } - if (la64_lbt) return; + if (cpuext.lbt) return; CLEAR_FLAGS(s3); IFX(X_SF) { @@ -427,7 +427,7 @@ void emit_and32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i SET_DFNONE(); } - IFXA(X_ALL, la64_lbt) { + IFXA(X_ALL, cpuext.lbt) { MOV64xw(s3, c); if (rex.w) X64_AND_D(s1, s3); @@ -438,7 +438,7 @@ void emit_and32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i if (c >= 0 && c <= 4095) { ANDI(s1, s1, c); } else { - IFXA(X_ALL, la64_lbt) { } else MOV64xw(s3, c); + IFXA(X_ALL, cpuext.lbt) { } else MOV64xw(s3, c); AND(s1, s1, s3); // res = s1 & s2 } @@ -446,7 +446,7 @@ void emit_and32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i SDxw(s1, xEmu, offsetof(x64emu_t, res)); } - if (la64_lbt) return; + if (cpuext.lbt) return; CLEAR_FLAGS(s3); IFX(X_SF) { @@ -474,7 +474,7 @@ void emit_or16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) SET_DFNONE(); } - IFXA (X_ALL, la64_lbt) { + IFXA (X_ALL, cpuext.lbt) { X64_OR_H(s1, s2); } @@ -484,7 +484,7 @@ void emit_or16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) ST_D(s1, xEmu, offsetof(x64emu_t, res)); } - if (la64_lbt) return; + if (cpuext.lbt) return; CLEAR_FLAGS(s3); IFX (X_SF) { @@ -512,7 +512,7 @@ void emit_or32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 SET_DFNONE(); } - IFXA (X_ALL, la64_lbt) { + IFXA (X_ALL, cpuext.lbt) { if (rex.w) X64_OR_D(s1, s2); else @@ -525,7 +525,7 @@ void emit_or32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 SDxw(s1, xEmu, offsetof(x64emu_t, res)); } - if (la64_lbt) { + if (cpuext.lbt) { if (!rex.w) ZEROUP(s1); return; } @@ -558,7 +558,7 @@ void emit_or32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in SET_DFNONE(); } - IFXA(X_ALL, la64_lbt) { + IFXA(X_ALL, cpuext.lbt) { MOV64xw(s3, c); if (rex.w) X64_OR_D(s1, s3); @@ -569,7 +569,7 @@ void emit_or32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in if (c >= 0 && c <= 4095) { ORI(s1, s1, c); } else { - IFXA(X_ALL, la64_lbt) { } else MOV64xw(s3, c); + IFXA(X_ALL, cpuext.lbt) { } else MOV64xw(s3, c); OR(s1, s1, s3); } @@ -577,7 +577,7 @@ void emit_or32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in SDxw(s1, xEmu, offsetof(x64emu_t, res)); } - if (la64_lbt) { + if (cpuext.lbt) { if (!rex.w) ZEROUP(s1); return; } @@ -613,7 +613,7 @@ void emit_or8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) SET_DFNONE(); } - IFXA (X_ALL, la64_lbt) { + IFXA (X_ALL, cpuext.lbt) { X64_OR_B(s1, s2); } @@ -623,7 +623,7 @@ void emit_or8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) ST_B(s1, xEmu, offsetof(x64emu_t, res)); } - if (la64_lbt) return; + if (cpuext.lbt) return; CLEAR_FLAGS(s3); IFX (X_SF) { diff --git a/src/dynarec/la64/dynarec_la64_emit_math.c b/src/dynarec/la64/dynarec_la64_emit_math.c index 145dd683..2e1548ed 100644 --- a/src/dynarec/la64/dynarec_la64_emit_math.c +++ b/src/dynarec/la64/dynarec_la64_emit_math.c @@ -37,7 +37,7 @@ void emit_add32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SET_DFNONE(); } - if (la64_lbt) { + if (cpuext.lbt) { IFX (X_ALL) { if (rex.w) X64_ADD_DU(s1, s2); @@ -135,7 +135,7 @@ void emit_add32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i } IFX (X_PEND | X_AF | X_CF | X_OF) { MOV64xw(s2, c); - } else if (la64_lbt) { + } else if (cpuext.lbt) { MOV64xw(s2, c); } IFX (X_PEND) { @@ -146,7 +146,7 @@ void emit_add32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i SET_DFNONE(); } - if (la64_lbt) { + if (cpuext.lbt) { IFX (X_ALL) { if (rex.w) X64_ADD_DU(s1, s2); @@ -246,7 +246,7 @@ void emit_add8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) SET_DFNONE(); } - if (la64_lbt) { + if (cpuext.lbt) { IFX (X_ALL) { X64_ADD_B(s1, s2); } @@ -317,7 +317,7 @@ void emit_add8c(dynarec_la64_t* dyn, int ninst, int s1, int c, int s2, int s3, i SET_DFNONE(); } - if (la64_lbt) { + if (cpuext.lbt) { IFX (X_ALL) { IFX (X_PEND) { } else { @@ -394,12 +394,12 @@ void emit_add16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, } else IFXORNAT (X_ALL) { SET_DFNONE(); } - IFXA (X_AF | X_OF, !la64_lbt) { + IFXA (X_AF | X_OF, !cpuext.lbt) { OR(s3, s1, s2); // s3 = op1 | op2 AND(s4, s1, s2); // s4 = op1 & op2 } - IFXA (X_ALL, la64_lbt) { + IFXA (X_ALL, cpuext.lbt) { X64_ADD_H(s1, s2); } @@ -409,7 +409,7 @@ void emit_add16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, ST_W(s1, xEmu, offsetof(x64emu_t, res)); } - if (la64_lbt) { + if (cpuext.lbt) { BSTRPICK_D(s1, s1, 15, 0); return; } @@ -467,7 +467,7 @@ void emit_sub8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i SET_DFNONE(); } - if (la64_lbt) { + if (cpuext.lbt) { IFX (X_ALL) { X64_SUB_B(s1, s2); } @@ -523,12 +523,12 @@ void emit_sub16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SET_DFNONE(); } - IFXA (X_AF | X_CF | X_OF, !la64_lbt) { + IFXA (X_AF | X_CF | X_OF, !cpuext.lbt) { // for later flag calculation NOR(s5, xZR, s1); } - IFXA (X_ALL, la64_lbt) { + IFXA (X_ALL, cpuext.lbt) { X64_SUB_H(s1, s2); } @@ -537,7 +537,7 @@ void emit_sub16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, ST_H(s1, xEmu, offsetof(x64emu_t, res)); } - if (la64_lbt) { + if (cpuext.lbt) { BSTRPICK_D(s1, s1, 15, 0); return; } @@ -572,7 +572,7 @@ void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SET_DFNONE(); } - if (la64_lbt) { + if (cpuext.lbt) { IFX (X_ALL) { if (rex.w) X64_SUB_DU(s1, s2); @@ -639,7 +639,7 @@ void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i SET_DFNONE(); } - if (la64_lbt) { + if (cpuext.lbt) { IFX (X_PEND) { } else { MOV64xw(s2, c); @@ -715,7 +715,7 @@ void emit_sbb8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i } - if (la64_lbt) { + if (cpuext.lbt) { SBC_B(s3, s1, s2); IFX (X_ALL) { X64_SBC_B(s1, s2); @@ -776,7 +776,7 @@ void emit_sbb16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SET_DFNONE(); } - if (la64_lbt) { + if (cpuext.lbt) { SBC_H(s3, s1, s2); IFX (X_ALL) { @@ -831,7 +831,7 @@ void emit_sbb32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SET_DFNONE(); } - if (la64_lbt) { + if (cpuext.lbt) { if (rex.w) { SBC_D(s3, s1, s2); } else { @@ -899,7 +899,7 @@ void emit_neg8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3) MV(s3, s1); // s3 = op1 } - IFXA (X_ALL, la64_lbt) { + IFXA (X_ALL, cpuext.lbt) { X64_SUB_B(xZR, s1); } @@ -909,7 +909,7 @@ void emit_neg8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3) ST_B(s1, xEmu, offsetof(x64emu_t, res)); } - if (la64_lbt) return; + if (cpuext.lbt) return; CLEAR_FLAGS(s2); IFX (X_CF) { @@ -959,13 +959,13 @@ void emit_neg32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SET_DFNONE(); } - if (!la64_lbt) { + if (!cpuext.lbt) { IFX (X_AF | X_OF) { MV(s3, s1); // s3 = op1 } } - IFXA (X_ALL, la64_lbt) { + IFXA (X_ALL, cpuext.lbt) { if (rex.w) X64_SUB_DU(xZR, s1); else @@ -977,7 +977,7 @@ void emit_neg32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SDxw(s1, xEmu, offsetof(x64emu_t, res)); } - if (la64_lbt) { + if (cpuext.lbt) { if (!rex.w) ZEROUP(s1); return; } @@ -1034,7 +1034,7 @@ void emit_adc8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i SET_DFNONE(); } - if (la64_lbt) { + if (cpuext.lbt) { ADC_B(s3, s1, s2); IFX (X_ALL) { @@ -1118,7 +1118,7 @@ void emit_adc16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SET_DFNONE(); } - if (la64_lbt) { + if (cpuext.lbt) { ADC_H(s3, s1, s2); IFX (X_ALL) { @@ -1195,7 +1195,7 @@ void emit_adc32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SET_DFNONE(); } - if (la64_lbt) { + if (cpuext.lbt) { if (rex.w) ADC_D(s3, s1, s2); else @@ -1300,12 +1300,12 @@ void emit_inc8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) } else IFXORNAT (X_ALL) { SET_DFNONE(); } - IFXA (X_AF | X_OF, !la64_lbt) { + IFXA (X_AF | X_OF, !cpuext.lbt) { ORI(s3, s1, 1); // s3 = op1 | op2 ANDI(s4, s1, 1); // s5 = op1 & op2 } - IFXA (X_ALL, la64_lbt) { + IFXA (X_ALL, cpuext.lbt) { X64_INC_B(s1); } @@ -1315,7 +1315,7 @@ void emit_inc8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) ST_B(s1, xEmu, offsetof(x64emu_t, res)); } - if (la64_lbt) { + if (cpuext.lbt) { ANDI(s1, s1, 0xff); return; } @@ -1367,12 +1367,12 @@ void emit_inc16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) } else IFXORNAT (X_ZF | X_OF | X_AF | X_SF | X_PF) { SET_DFNONE(); } - IFXA (X_AF | X_OF, !la64_lbt) { + IFXA (X_AF | X_OF, !cpuext.lbt) { ORI(s3, s1, 1); // s3 = op1 | op2 ANDI(s4, s1, 1); // s4 = op1 & op2 } - IFXA (X_ALL, la64_lbt) { + IFXA (X_ALL, cpuext.lbt) { X64_INC_H(s1); } @@ -1382,7 +1382,7 @@ void emit_inc16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) ST_H(s1, xEmu, offsetof(x64emu_t, res)); } - if (la64_lbt) { + if (cpuext.lbt) { BSTRPICK_D(s1, s1, 15, 0); return; } @@ -1438,12 +1438,12 @@ void emit_inc32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SET_DFNONE(); } - IFXA (X_AF | X_OF, !la64_lbt) { + IFXA (X_AF | X_OF, !cpuext.lbt) { ORI(s3, s1, 1); // s3 = op1 | op2 ANDI(s5, s1, 1); // s5 = op1 & op2 } - IFXA (X_ALL, la64_lbt) { + IFXA (X_ALL, cpuext.lbt) { if (rex.w) { X64_INC_D(s1); } else { @@ -1457,7 +1457,7 @@ void emit_inc32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SDxw(s1, xEmu, offsetof(x64emu_t, res)); } - if (la64_lbt) { + if (cpuext.lbt) { if (!rex.w) ZEROUP(s1); return; } @@ -1510,13 +1510,13 @@ void emit_dec8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) } else IFXORNAT (X_ALL) { SET_DFNONE(); } - IFXA (X_AF | X_OF, !la64_lbt) { + IFXA (X_AF | X_OF, !cpuext.lbt) { NOR(s4, xZR, s1); // s4 = ~op1 ORI(s3, s4, 1); // s3 = ~op1 | op2 ANDI(s4, s4, 1); // s4 = ~op1 & op2 } - IFXA (X_ALL, la64_lbt) { + IFXA (X_ALL, cpuext.lbt) { X64_DEC_B(s1); } @@ -1526,7 +1526,7 @@ void emit_dec8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4) ST_B(s1, xEmu, offsetof(x64emu_t, res)); } - if (la64_lbt) { + if (cpuext.lbt) { ANDI(s1, s1, 0xff); return; } @@ -1584,7 +1584,7 @@ void emit_dec16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, ANDI(s5, s5, 1); // s5 = ~op1 & op2 } - IFXA (X_ALL, la64_lbt) { + IFXA (X_ALL, cpuext.lbt) { X64_DEC_H(s1); } @@ -1594,7 +1594,7 @@ void emit_dec16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, ST_H(s1, xEmu, offsetof(x64emu_t, res)); } - if (la64_lbt) { + if (cpuext.lbt) { BSTRPICK_D(s1, s1, 15, 0); return; } @@ -1647,13 +1647,13 @@ void emit_dec32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s } else IFXORNAT (X_ALL) { SET_DFNONE(); } - IFXA (X_AF | X_OF, !la64_lbt) { + IFXA (X_AF | X_OF, !cpuext.lbt) { NOR(s5, xZR, s1); ORI(s3, s5, 1); // s3 = ~op1 | op2 ANDI(s5, s5, 1); // s5 = ~op1 & op2 } - IFXA (X_ALL, la64_lbt) { + IFXA (X_ALL, cpuext.lbt) { if (rex.w) { X64_DEC_D(s1); } else { @@ -1667,7 +1667,7 @@ void emit_dec32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SDxw(s1, xEmu, offsetof(x64emu_t, res)); } - if (la64_lbt) { + if (cpuext.lbt) { if (!rex.w) ZEROUP(s1); return; } @@ -1768,7 +1768,7 @@ void emit_neg16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3) ORI(xFlags, xFlags, 1 << F_ZF); } - IFXA (X_ALL, la64_lbt) { + IFXA (X_ALL, cpuext.lbt) { SPILL_EFLAGS(); } } diff --git a/src/dynarec/la64/dynarec_la64_emit_shift.c b/src/dynarec/la64/dynarec_la64_emit_shift.c index 0dcccb39..5a8cde04 100644 --- a/src/dynarec/la64/dynarec_la64_emit_shift.c +++ b/src/dynarec/la64/dynarec_la64_emit_shift.c @@ -33,7 +33,7 @@ void emit_shl16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SET_DFNONE(); } - if (la64_lbt) { + if (cpuext.lbt) { IFX (X_ALL) { X64_SLL_H(s1, s2); } @@ -101,7 +101,7 @@ void emit_shl16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int SET_DFNONE(); } - if (la64_lbt) { + if (cpuext.lbt) { IFX (X_PEND) { } else { MOV64x(s3, c); @@ -194,7 +194,7 @@ void emit_shl32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SET_DFNONE(); } - if (la64_lbt) { + if (cpuext.lbt) { IFX (X_ALL) { if (rex.w) X64_SLL_D(s1, s2); @@ -269,7 +269,7 @@ void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, SET_DFNONE(); } - if (la64_lbt) { + if (cpuext.lbt) { IFX (X_PEND) { } else { MOV64x(s3, c); @@ -347,7 +347,7 @@ void emit_shl8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i SET_DFNONE(); } - if (la64_lbt) { + if (cpuext.lbt) { IFX (X_ALL) { X64_SLL_B(s1, s2); } @@ -413,7 +413,7 @@ void emit_shr8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i SET_DFNONE(); } - if (la64_lbt) { + if (cpuext.lbt) { IFX (X_ALL) { X64_SRL_B(s1, s2); } @@ -474,7 +474,7 @@ void emit_sar8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i SET_DFNONE(); } - if (la64_lbt) { + if (cpuext.lbt) { IFX (X_ALL) { X64_SRA_B(s1, s2); } @@ -531,7 +531,7 @@ void emit_shr16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SET_DFNONE(); } - if (la64_lbt) { + if (cpuext.lbt) { IFX (X_ALL) { X64_SRL_H(s1, s2); } @@ -594,7 +594,7 @@ void emit_shr16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int SET_DFNONE(); } - if (la64_lbt) { + if (cpuext.lbt) { IFX (X_PEND) { } else { MOV64x(s3, c); @@ -660,7 +660,7 @@ void emit_shr32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SET_DFNONE(); } - if (la64_lbt) { + if (cpuext.lbt) { IFX (X_ALL) { if (rex.w) X64_SRL_D(s1, s2); @@ -735,7 +735,7 @@ void emit_shr32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, return; } - if (la64_lbt) { + if (cpuext.lbt) { IFX (X_PEND) { } else { MOV64x(s3, c); @@ -816,7 +816,7 @@ void emit_sar16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SET_DFNONE(); } - if (la64_lbt) { + if (cpuext.lbt) { IFX (X_ALL) { X64_SRA_H(s1, s2); } @@ -877,7 +877,7 @@ void emit_sar16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int SET_DFNONE(); } - if (la64_lbt) { + if (cpuext.lbt) { IFX (X_PEND) { } else { MOV64x(s3, c); @@ -949,7 +949,7 @@ void emit_sar32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, return; } - if (la64_lbt) { + if (cpuext.lbt) { IFX (X_PEND) { } else { MOV64x(s3, c); @@ -1030,7 +1030,7 @@ void emit_ror32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, return; } - IFXA ((X_CF | X_OF), la64_lbt) { + IFXA ((X_CF | X_OF), cpuext.lbt) { if (rex.w) X64_ROTRI_D(s1, c); else @@ -1044,7 +1044,7 @@ void emit_ror32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, SDxw(s1, xEmu, offsetof(x64emu_t, res)); } - if (la64_lbt) return; + if (cpuext.lbt) return; IFX (X_CF | X_OF) { MOV64x(s4, ((1UL << F_CF) | (1UL << F_OF))); @@ -1081,7 +1081,7 @@ void emit_rol32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SET_DFNONE(); } - IFXA ((X_CF | X_OF), la64_lbt) { + IFXA ((X_CF | X_OF), cpuext.lbt) { if (rex.w) X64_ROTL_D(s1, s2); else @@ -1098,7 +1098,7 @@ void emit_rol32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SDxw(s1, xEmu, offsetof(x64emu_t, res)); } - if (la64_lbt) return; + if (cpuext.lbt) return; IFX (X_CF | X_OF) { MOV64x(s4, ((1UL << F_CF) | (1UL << F_OF))); @@ -1137,7 +1137,7 @@ void emit_ror32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SET_DFNONE(); } - IFXA ((X_CF | X_OF), la64_lbt) { + IFXA ((X_CF | X_OF), cpuext.lbt) { if (rex.w) X64_ROTR_D(s1, s2); else @@ -1154,7 +1154,7 @@ void emit_ror32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SDxw(s1, xEmu, offsetof(x64emu_t, res)); } - if (la64_lbt) return; + if (cpuext.lbt) return; IFX (X_CF | X_OF) { MOV64x(s4, ((1UL << F_CF) | (1UL << F_OF))); @@ -1199,7 +1199,7 @@ void emit_rol32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, return; } - IFXA ((X_CF | X_OF), la64_lbt) { + IFXA ((X_CF | X_OF), cpuext.lbt) { if (rex.w) X64_ROTLI_D(s1, c); else @@ -1214,7 +1214,7 @@ void emit_rol32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, SDxw(s1, xEmu, offsetof(x64emu_t, res)); } - if (la64_lbt) return; + if (cpuext.lbt) return; IFX (X_CF | X_OF) { MOV64x(s3, ((1UL << F_CF) | (1UL << F_OF))); @@ -1305,7 +1305,7 @@ void emit_shld32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin emit_pf(dyn, ninst, s1, s3, s4); } - IFXA (X_ALL, la64_lbt) { + IFXA (X_ALL, cpuext.lbt) { SPILL_EFLAGS(); } } @@ -1383,7 +1383,7 @@ void emit_shrd32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin emit_pf(dyn, ninst, s1, s3, s4); } - IFXA (X_ALL, la64_lbt) { + IFXA (X_ALL, cpuext.lbt) { SPILL_EFLAGS(); } } diff --git a/src/dynarec/la64/dynarec_la64_emit_tests.c b/src/dynarec/la64/dynarec_la64_emit_tests.c index db4f21f1..ff84bd8d 100644 --- a/src/dynarec/la64/dynarec_la64_emit_tests.c +++ b/src/dynarec/la64/dynarec_la64_emit_tests.c @@ -34,7 +34,7 @@ void emit_cmp8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i NAT_FLAGS_ENABLE_CARRY(); NAT_FLAGS_ENABLE_SIGN(); - if (la64_lbt) { + if (cpuext.lbt) { IFX(X_ALL) { X64_SUB_B(s1, s2); } @@ -105,7 +105,7 @@ void emit_cmp8_0(dynarec_la64_t* dyn, int ninst, int s1, int s3, int s4) NAT_FLAGS_ENABLE_CARRY(); NAT_FLAGS_ENABLE_SIGN(); - if (la64_lbt) { + if (cpuext.lbt) { IFX(X_ALL) { X64_SUB_B(s1, xZR); } @@ -156,7 +156,7 @@ void emit_cmp16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, NAT_FLAGS_ENABLE_CARRY(); NAT_FLAGS_ENABLE_SIGN(); - if (la64_lbt) { + if (cpuext.lbt) { IFX(X_ALL) { X64_SUB_H(s1, s2); } @@ -229,7 +229,7 @@ void emit_cmp16_0(dynarec_la64_t* dyn, int ninst, int s1, int s3, int s4) NAT_FLAGS_ENABLE_CARRY(); NAT_FLAGS_ENABLE_SIGN(); - if (la64_lbt) { + if (cpuext.lbt) { IFX(X_ALL) { X64_SUB_H(s1, xZR); } @@ -280,7 +280,7 @@ void emit_cmp32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s NAT_FLAGS_ENABLE_CARRY(); NAT_FLAGS_ENABLE_SIGN(); - if (la64_lbt) { + if (cpuext.lbt) { IFX(X_ALL) { if (rex.w) X64_SUB_D(s1, s2); @@ -365,7 +365,7 @@ void emit_cmp32_0(dynarec_la64_t* dyn, int ninst, rex_t rex, uint8_t nextop, int NAT_FLAGS_ENABLE_CARRY(); NAT_FLAGS_ENABLE_SIGN(); - if (la64_lbt) { + if (cpuext.lbt) { IFX(X_ALL) { if (rex.w) X64_SUB_D(s1, xZR); @@ -443,7 +443,7 @@ void emit_test8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, NAT_FLAGS_ENABLE_SIGN(); if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s3, xZR); - if (la64_lbt) { + if (cpuext.lbt) { IFX(X_ALL) { X64_AND_B(s1, s2); } @@ -497,7 +497,7 @@ void emit_test8c(dynarec_la64_t* dyn, int ninst, int s1, uint8_t c, int s3, int NAT_FLAGS_ENABLE_SIGN(); if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s3, xZR); - if (la64_lbt) { + if (cpuext.lbt) { IFX (X_ALL) { ADDI_D(s3, xZR, c); X64_AND_B(s1, s3); @@ -551,7 +551,7 @@ void emit_test16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, NAT_FLAGS_ENABLE_SIGN(); if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s3, xZR); - if (la64_lbt) { + if (cpuext.lbt) { IFX (X_ALL) { X64_AND_H(s1, s2); } @@ -604,7 +604,7 @@ void emit_test32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int NAT_FLAGS_ENABLE_SIGN(); if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s3, xZR); - if (la64_lbt) { + if (cpuext.lbt) { IFX(X_ALL) { if (rex.w) X64_AND_D(s1, s2); @@ -670,7 +670,7 @@ void emit_test32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, NAT_FLAGS_ENABLE_SIGN(); if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s3, xZR); - if (la64_lbt) { + if (cpuext.lbt) { IFX(X_ALL) { MOV64xw(s3, c); if (rex.w) @@ -705,7 +705,7 @@ void emit_test32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, if (c >= 0 && c <= 4095) { ANDI(s3, s1, c); } else { - IFXA(X_ALL, la64_lbt) { } else MOV64xw(s3, c); + IFXA(X_ALL, cpuext.lbt) { } else MOV64xw(s3, c); AND(s3, s1, s3); } diff --git a/src/dynarec/la64/dynarec_la64_f0.c b/src/dynarec/la64/dynarec_la64_f0.c index 18c97589..14f14d50 100644 --- a/src/dynarec/la64/dynarec_la64_f0.c +++ b/src/dynarec/la64/dynarec_la64_f0.c @@ -261,7 +261,7 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 1: if (rex.w) { INST_NAME("LOCK CMPXCHG16B Gq, Eq"); - if (!la64_scq) { + if (!cpuext.scq) { static int warned = 0; PASS3(if (!warned) dynarec_log(LOG_INFO, "Warning, LOCK CMPXCHG16B is not well supported on LoongArch without SCQ and issues are expected.\n")); warned = 1; @@ -272,14 +272,14 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SETFLAGS(X_ZF, SF_SUBSET, NAT_FLAGS_NOFUSION); addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, LOCK_LOCK, 0, 0); UFLAG_IF { - if (la64_lbt) { + if (cpuext.lbt) { X64_SET_EFLAGS(xZR, X_ZF); } else { BSTRINS_D(xFlags, xZR, F_ZF, F_ZF); } } if (rex.w) { - if (la64_scq) { + if (cpuext.scq) { MARKLOCK; LL_D(x2, wback, 0); LD_D(x3, wback, 8); @@ -290,7 +290,7 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SC_Q(x5, xRCX, wback); BEQZ_MARKLOCK(x5); UFLAG_IF { - if (la64_lbt) { + if (cpuext.lbt) { ADDI_D(x5, xZR, -1); X64_SET_EFLAGS(x5, X_ZF); } else { @@ -320,7 +320,7 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ST_D(xRBX, wback, 0); ST_D(xRCX, wback, 8); UFLAG_IF { - if (la64_lbt) { + if (cpuext.lbt) { ADDI_D(x5, xZR, -1); X64_SET_EFLAGS(x5, X_ZF); } else { @@ -352,7 +352,7 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SC_D(x5, wback, 0); BEQZ_MARKLOCK(x5); UFLAG_IF { - if (la64_lbt) { + if (cpuext.lbt) { ADDI_D(x5, xZR, -1); X64_SET_EFLAGS(x5, X_ZF); } else { @@ -372,7 +372,7 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni BEQZ_MARKLOCK2(x6); ST_D(x4, wback, 0); UFLAG_IF { - if (la64_lbt) { + if (cpuext.lbt) { ADDI_D(x5, xZR, -1); X64_SET_EFLAGS(x5, X_ZF); } else { diff --git a/src/dynarec/la64/dynarec_la64_f30f.c b/src/dynarec/la64/dynarec_la64_f30f.c index df53bfb3..b9b028b7 100644 --- a/src/dynarec/la64/dynarec_la64_f30f.c +++ b/src/dynarec/la64/dynarec_la64_f30f.c @@ -421,7 +421,7 @@ uintptr_t dynarec64_F30F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int VPICKVE2GR_WU(gd, v1, 0); } IFX (X_ALL) { - if (la64_lbt) { + if (cpuext.lbt) { X64_SET_EFLAGS(xZR, X_ALL); BNEZ_MARK(gd); ADDI_D(x3, xZR, 1 << F_ZF); diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index 8dfa9970..30774ca3 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -1284,7 +1284,7 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int // Restore xFlags from LBT.eflags #define RESTORE_EFLAGS(s) \ do { \ - if (la64_lbt) { \ + if (cpuext.lbt) { \ CLEAR_FLAGS_(s); \ X64_GET_EFLAGS(s, X_ALL); \ OR(xFlags, xFlags, s); \ @@ -1294,7 +1294,7 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int // Spill xFlags to LBT.eflags #define SPILL_EFLAGS() \ do { \ - if (la64_lbt) { \ + if (cpuext.lbt) { \ X64_SET_EFLAGS(xFlags, X_ALL); \ } \ } while (0) diff --git a/src/dynarec/la64/la64_epilog.S b/src/dynarec/la64/la64_epilog.S index 0b1feb7c..ab6a80d6 100644 --- a/src/dynarec/la64/la64_epilog.S +++ b/src/dynarec/la64/la64_epilog.S @@ -28,8 +28,9 @@ la64_epilog: st.d $r29, $r4, (8 * 14) st.d $r30, $r4, (8 * 15) // restore xFlags from LBT.eflags - la.global $r12, la64_lbt + la.global $r12, cpuext ldptr.d $r12, $r12, 0 + andi $r12, $r12, 1 beqz $r12, 1f ori $r13, $r0, 0b100011010101 andn $r31, $r31, $r13 diff --git a/src/dynarec/la64/la64_lock.h b/src/dynarec/la64/la64_lock.h index 80cd80b2..c5175c2e 100644 --- a/src/dynarec/la64/la64_lock.h +++ b/src/dynarec/la64/la64_lock.h @@ -1,9 +1,9 @@ #ifndef __LA64_LOCK__H__ #define __LA64_LOCK__H__ #include <stdint.h> +#include "core_arch.h" -extern int la64_lamcas; -extern int la64_lam_bh; +extern cpu_ext_t cpuext; // Atomically store val at [p] if old [p] is ref. Return 0 if OK, 1 is not. p needs to be aligned extern int diff --git a/src/dynarec/la64/la64_prolog.S b/src/dynarec/la64/la64_prolog.S index 1e6263bb..fc6bf34c 100644 --- a/src/dynarec/la64/la64_prolog.S +++ b/src/dynarec/la64/la64_prolog.S @@ -6,7 +6,7 @@ .text .align 4 -.extern la64_lbt +.extern cpuext .global la64_prolog la64_prolog: @@ -53,8 +53,9 @@ la64_prolog: ld.d $r31, $r4, (8 * 16) // xFlags ld.d $r20, $r4, (8 * 17) // xRIP // spill xFlags to LBT.eflags - la.global $a6, la64_lbt + la.global $a6, cpuext ldptr.d $a6, $a6, 0 + andi $a6, $a6, 1 beqz $a6, 1f x86mtflag $r31, 0b111111 1: diff --git a/src/dynarec/native_lock.h b/src/dynarec/native_lock.h index 1ed26e8f..5d4514ec 100644 --- a/src/dynarec/native_lock.h +++ b/src/dynarec/native_lock.h @@ -96,13 +96,13 @@ #define native_lock_cas_d(A, B, C) la64_lock_cas_d(A, B, C) #define native_lock_cas_dd(A, B, C) la64_lock_cas_dd(A, B, C) #define native_lock_xchg_b(A, B) \ - la64_lam_bh ? la64_lock_xchg_b(A, B) : la64_lock_xchg_b_slow(A, B) + cpuext.lam_bh ? la64_lock_xchg_b(A, B) : la64_lock_xchg_b_slow(A, B) #define native_lock_read_b(A) tmpcas=*(uint8_t*)(A) #define native_lock_write_b(A, B) \ - la64_lamcas ? la64_lock_cas_b(A, tmpcas, B) : la64_lock_cas_b_slow(A, tmpcas, B) + cpuext.lamcas ? la64_lock_cas_b(A, tmpcas, B) : la64_lock_cas_b_slow(A, tmpcas, B) #define native_lock_read_h(A) tmpcas=*(uint16_t*)(A) #define native_lock_write_h(A, B) \ - la64_lamcas ? la64_lock_cas_h(A, tmpcas, B) : la64_lock_cas_h_slow(A, tmpcas, B) + cpuext.lamcas ? la64_lock_cas_h(A, tmpcas, B) : la64_lock_cas_h_slow(A, tmpcas, B) #define native_lock_read_d(A) tmpcas=*(uint32_t*)(A) #define native_lock_write_d(A, B) la64_lock_cas_d(A, tmpcas, B) #define native_lock_read_dd(A) tmpcas=*(uint64_t*)(A) diff --git a/src/dynarec/rv64/dynarec_rv64_00_0.c b/src/dynarec/rv64/dynarec_rv64_00_0.c index f518ab0f..d779e994 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_0.c +++ b/src/dynarec/rv64/dynarec_rv64_00_0.c @@ -176,17 +176,17 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x0F: switch (rep) { case 0: - if (rv64_vector) + if (cpuext.vector) retaddr = dynarec64_0F_vector(dyn, addr, ip, ninst, rex, ok, need_epilog); addr = retaddr ? retaddr : dynarec64_0F(dyn, addr, ip, ninst, rex, ok, need_epilog); break; case 1: - if (rv64_vector) + if (cpuext.vector) retaddr = dynarec64_F20F_vector(dyn, addr, ip, ninst, rex, ok, need_epilog); addr = retaddr ? retaddr : dynarec64_F20F(dyn, addr, ip, ninst, rex, ok, need_epilog); break; case 2: - if (rv64_vector) + if (cpuext.vector) retaddr = dynarec64_F30F_vector(dyn, addr, ip, ninst, rex, ok, need_epilog); addr = retaddr ? retaddr : dynarec64_F30F(dyn, addr, ip, ninst, rex, ok, need_epilog); break; diff --git a/src/dynarec/rv64/dynarec_rv64_00_1.c b/src/dynarec/rv64/dynarec_rv64_00_1.c index 3e4fd754..a56b1017 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_1.c +++ b/src/dynarec/rv64/dynarec_rv64_00_1.c @@ -174,12 +174,12 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } break; case 0x64: - if (rv64_vector) + if (cpuext.vector) retaddr = dynarec64_64_vector(dyn, addr, ip, ninst, rex, rep, _FS, ok, need_epilog); addr = retaddr ? retaddr : dynarec64_64(dyn, addr, ip, ninst, rex, rep, _FS, ok, need_epilog); break; case 0x65: - if (rv64_vector) + if (cpuext.vector) retaddr = dynarec64_64_vector(dyn, addr, ip, ninst, rex, rep, _GS, ok, need_epilog); addr = retaddr ? retaddr : dynarec64_64(dyn, addr, ip, ninst, rex, rep, _GS, ok, need_epilog); break; @@ -190,7 +190,7 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int if (rex.is32bits) addr = dynarec64_67_32(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); else { - if (rv64_vector) + if (cpuext.vector) retaddr = dynarec64_67_vector(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); addr = retaddr ? retaddr : dynarec64_67(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); } diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index 40de22ce..32db4a8f 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -537,7 +537,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LH(x3, gback, gdoffset + 2 * (i * 2 + 0)); LH(x4, gback, gdoffset + 2 * (i * 2 + 1)); ADDW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -560,7 +560,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LH(x3, wback, fixedaddress + 2 * (i * 2 + 0)); LH(x4, wback, fixedaddress + 2 * (i * 2 + 1)); ADDW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -588,7 +588,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LB(x4, wback, fixedaddress + i * 2 + 1); MUL(x3, x3, x4); ADD(x3, x3, x7); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -661,7 +661,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LH(x3, gback, gdoffset + 2 * (i * 2 + 0)); LH(x4, gback, gdoffset + 2 * (i * 2 + 1)); SUBW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -684,7 +684,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LH(x3, wback, fixedaddress + 2 * (i * 2 + 0)); LH(x4, wback, fixedaddress + 2 * (i * 2 + 1)); SUBW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -1292,7 +1292,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MOV64x(x6, -128); for (int i = 0; i < 4; ++i) { LH(x3, gback, gdoffset + i * 2); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -1309,7 +1309,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else for (int i = 0; i < 4; ++i) { LH(x3, wback, fixedaddress + i * 2); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -1665,7 +1665,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("PCMPEQB Gm,Em"); nextop = F8; GETGM(); - if (rv64_xtheadbb) { + if (cpuext.xtheadbb) { GETEM(x2, 0, 0); LD(x3, gback, gdoffset); LD(x4, wback, fixedaddress); @@ -2567,7 +2567,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEM(x2, 0, 1); LD(x1, wback, fixedaddress + 0); for (int i = 0; i < 8; i++) { - if (rv64_zbs) { + if (cpuext.zbs) { if (i == 0) { BEXTI(gd, x1, 63); } else { @@ -2582,7 +2582,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } } if (i != 0) { - if (rv64_zba) { + if (cpuext.zba) { SH1ADD(gd, gd, x6); } else { SLLI(gd, gd, 1); @@ -2600,7 +2600,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LBU(x3, gback, gdoffset + i); LBU(x4, wback, fixedaddress + i); SUB(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MAX(x3, x3, xZR); } else { NOT(x4, x3); @@ -2615,7 +2615,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; GETGM(); GETEM(x2, 0, 6); - MMX_LOOP_W(x3, x4, SUB(x3, x3, x4); if (rv64_zbb) { MAX(x3, x3, xZR); } else { + MMX_LOOP_W(x3, x4, SUB(x3, x3, x4); if (cpuext.zbb) { MAX(x3, x3, xZR); } else { NOT(x4, x3); SRAI(x4, x4, 63); AND(x3, x3, x4); } SH(x3, gback, gdoffset + i * 2);); @@ -2628,7 +2628,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni for (int i = 0; i < 8; ++i) { LBU(x3, gback, gdoffset + i); LBU(x4, wback, fixedaddress + i); - if (rv64_zbb) { + if (cpuext.zbb) { MINU(x3, x3, x4); } else { BLTU(x3, x4, 8); @@ -2657,7 +2657,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LBU(x3, gback, gdoffset + i); LBU(x4, wback, fixedaddress + i); ADD(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MINU(x3, x3, x5); } else { BLT(x3, x5, 8); @@ -2678,7 +2678,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LHU(x3, gback, gdoffset + i * 2); LHU(x4, wback, fixedaddress + i * 2); ADDW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MINU(x3, x3, x5); } else { BGE(x5, x3, 8); // tmp32s <= 65535? @@ -2695,7 +2695,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni for (int i = 0; i < 8; ++i) { LBU(x3, gback, gdoffset + i); LBU(x4, wback, fixedaddress + i); - if (rv64_zbb) { + if (cpuext.zbb) { MAXU(x3, x3, x4); } else { BLTU(x4, x3, 8); @@ -2711,7 +2711,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEM(x2, 0, 1); LD(x1, gback, gdoffset); LD(x3, wback, fixedaddress); - if (rv64_zbb) { + if (cpuext.zbb) { ANDN(x1, x3, x1); } else { NOT(x1, x1); @@ -2740,7 +2740,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEM(x4, 0, 1); LBU(x1, wback, fixedaddress); ADDI(x2, xZR, 15); - if (rv64_zbb) { + if (cpuext.zbb) { MINU(x1, x1, x2); } else { BLTU(x1, x2, 4 + 4); @@ -2759,7 +2759,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEM(x4, 0, 1); LBU(x1, wback, fixedaddress); ADDI(x2, xZR, 31); - if (rv64_zbb) { + if (cpuext.zbb) { MINU(x1, x1, x2); } else { BLTU(x1, x2, 4 + 4); @@ -2834,7 +2834,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LB(x3, gback, gdoffset + i); LB(x4, wback, fixedaddress + i); SUBW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -2859,7 +2859,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LH(x3, gback, gdoffset + 2 * i); LH(x4, wback, fixedaddress + 2 * i); SUBW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -2879,7 +2879,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni for (int i = 0; i < 4; ++i) { LH(x3, gback, gdoffset + 2 * i); LH(x4, wback, fixedaddress + 2 * i); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x4); } else { BLT(x3, x4, 8); @@ -2911,7 +2911,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LB(x3, gback, gdoffset + i); LB(x4, wback, fixedaddress + i); ADDW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -2936,7 +2936,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LH(x3, gback, gdoffset + 2 * i); LH(x4, wback, fixedaddress + 2 * i); ADDW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -2953,7 +2953,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; GETGM(); GETEM(x1, 0, 6); - MMX_LOOP_WS(x3, x4, if (rv64_zbb) { MAX(x3, x3, x4); } else { + MMX_LOOP_WS(x3, x4, if (cpuext.zbb) { MAX(x3, x3, x4); } else { BGE(x3, x4, 8); MV(x3, x4); }); break; diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c index b1569238..6fa4f633 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c @@ -101,7 +101,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, GETEX_vector(v1, 0, 0, VECTOR_SEW64); q0 = fpu_get_scratch(dyn); VSLIDEDOWN_VI(q0, v1, 1, VECTOR_UNMASKED); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VECTOR_LOAD_VMASK(0b01, x4, 1); VMERGE_VVM(v0, v0, q0); // implies VMASK } else { @@ -127,7 +127,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, if (MODREG) { ed = (nextop & 7) + (rex.b << 3); d0 = sse_get_reg_vector(dyn, ninst, x1, ed, 1, VECTOR_SEW64); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VECTOR_LOAD_VMASK(0b01, x4, 1); VMERGE_VVM(v0, v0, v1); // implies VMASK } else { @@ -173,7 +173,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); v1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); // no more scratches! - if (rv64_vlen >= 256) { + if (cpuext.vlen >= 32) { VWADDU_VX(v0, q0, xZR, VECTOR_UNMASKED); VWADDU_VX(v1, q1, xZR, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL2, 2); @@ -224,7 +224,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); q0 = fpu_get_scratch(dyn); VSLIDE1DOWN_VX(q0, v0, xZR, VECTOR_UNMASKED); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VECTOR_LOAD_VMASK(0b01, x4, 1); VMERGE_VVM(v1, v1, q0); // implies VMASK } else { @@ -498,7 +498,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); GETGD; GETEX_vector(q0, 0, 0, VECTOR_SEW32); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL8); VSRL_VI(v0, q0, 31, VECTOR_UNMASKED); // Force the element width to 4bit @@ -865,7 +865,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, } else { SET_ELEMENT_WIDTH(x3, VECTOR_SEW32, 1); } - if (!rv64_xtheadvector) { + if (!cpuext.xtheadvector) { VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); } VMV_S_X(v0, ed); @@ -1144,7 +1144,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, q0 = fpu_get_scratch(dyn); d0 = fpu_get_scratch(dyn); d1 = fpu_get_scratch(dyn); - if (rv64_xtheadvector) { // lack of vrgatherei16.vv + if (cpuext.xtheadvector) { // lack of vrgatherei16.vv q1 = fpu_get_scratch(dyn); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL1, 1); tmp64u0 = ((((uint64_t)u8 >> 2) & 3) << 32) | (u8 & 3); @@ -1229,7 +1229,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); GETEM_vector(q0, 0); SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL8); VSRL_VI(v0, q0, 7, VECTOR_UNMASKED); // Force the element width to 1bit @@ -1240,7 +1240,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VMSLT_VX(VMASK, q0, xZR, VECTOR_UNMASKED); } VMV_X_S(gd, VMASK); - if (!rv64_xtheadvector) { ANDI(gd, gd, 0xff); } + if (!cpuext.xtheadvector) { ANDI(gd, gd, 0xff); } break; case 0xD8: case 0xD9: diff --git a/src/dynarec/rv64/dynarec_rv64_66.c b/src/dynarec/rv64/dynarec_rv64_66.c index 68ed8f84..ce6ff3d7 100644 --- a/src/dynarec/rv64/dynarec_rv64_66.c +++ b/src/dynarec/rv64/dynarec_rv64_66.c @@ -129,7 +129,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x0F: switch (rep) { case 0: { - if (rv64_vector) + if (cpuext.vector) retaddr = dynarec64_660F_vector(dyn, addr, ip, ninst, rex, ok, need_epilog); addr = retaddr ? retaddr : dynarec64_660F(dyn, addr, ip, ninst, rex, ok, need_epilog); break; diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index 4ecf1d7d..a4c929dd 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -589,7 +589,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int MOV64x(x6, -128); for (int i = 0; i < 8; ++i) { LH(x3, gback, gdoffset + i * 2); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -606,7 +606,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } else for (int i = 0; i < 8; ++i) { LH(x3, wback, fixedaddress + i * 2); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -1058,7 +1058,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("PCMPEQB Gx,Ex"); nextop = F8; GETGX(); - if (rv64_xtheadbb) { + if (cpuext.xtheadbb) { GETEX(x2, 0, 8); SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4); TH_TSTNBZ(x3, x3);); } else { @@ -1374,7 +1374,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int wback = x3; } IFX (X_CF) { - if (rv64_zbs) { + if (cpuext.zbs) { if (rex.w) { BEXT_(x4, ed, gd); } else { @@ -1678,7 +1678,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LD(x1, wback, fixedaddress + 8); // high part LD(x2, wback, fixedaddress + 0); // low part, also destroyed wback(x2) for (int i = 0; i < 8; i++) { - if (rv64_zbs) { + if (cpuext.zbs) { if (i == 0) { BEXTI(gd, x1, 63); } else { @@ -1693,7 +1693,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } } if (i != 0) { - if (rv64_zba) { + if (cpuext.zba) { SH1ADD(gd, gd, x6); } else { SLLI(gd, gd, 1); @@ -1702,13 +1702,13 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } } for (int i = 0; i < 8; i++) { - if (rv64_zbs) { + if (cpuext.zbs) { BEXTI(x6, x2, 63 - i * 8); } else { SRLI(x6, x2, 63 - i * 8); ANDI(x6, x6, 1); } - if (rv64_zba) { + if (cpuext.zba) { SH1ADD(gd, gd, x6); } else { SLLI(gd, gd, 1); @@ -1725,7 +1725,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LBU(x3, gback, gdoffset + i); LBU(x4, wback, fixedaddress + i); SUB(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MAX(x3, x3, xZR); } else { NOT(x4, x3); @@ -1743,7 +1743,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SSE_LOOP_W( x3, x4, SUB(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MAX(x3, x3, xZR); } else { NOT(x4, x3); @@ -1759,7 +1759,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int for (int i = 0; i < 16; ++i) { LBU(x3, gback, gdoffset + i); LBU(x4, wback, fixedaddress + i); - if (rv64_zbb) { + if (cpuext.zbb) { MINU(x3, x3, x4); } else { BLTU(x3, x4, 8); @@ -1785,7 +1785,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LBU(x3, gback, gdoffset + i); LBU(x4, wback, fixedaddress + i); ADD(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MINU(x3, x3, x5); } else { BLT(x3, x5, 8); @@ -1806,7 +1806,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LHU(x3, gback, gdoffset + i * 2); LHU(x4, wback, fixedaddress + i * 2); ADDW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MINU(x3, x3, x5); } else { BGE(x5, x3, 8); // tmp32s <= 65535? @@ -1823,7 +1823,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int for (int i = 0; i < 16; ++i) { LBU(x3, gback, gdoffset + i); LBU(x4, wback, fixedaddress + i); - if (rv64_zbb) { + if (cpuext.zbb) { MAXU(x3, x3, x4); } else { BLTU(x4, x3, 8); @@ -1976,7 +1976,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LB(x3, gback, gdoffset + i); LB(x4, wback, fixedaddress + i); SUBW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -2001,7 +2001,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LH(x3, gback, gdoffset + 2 * i); LH(x4, wback, fixedaddress + 2 * i); SUBW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -2021,7 +2021,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int for (int i = 0; i < 8; ++i) { LH(x3, gback, gdoffset + 2 * i); LH(x4, wback, fixedaddress + 2 * i); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x4); } else { BLT(x3, x4, 8); @@ -2050,7 +2050,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LB(x3, gback, gdoffset + i); LB(x4, wback, fixedaddress + i); ADDW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -2075,7 +2075,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LH(x3, gback, gdoffset + 2 * i); LH(x4, wback, fixedaddress + 2 * i); ADDW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -2092,7 +2092,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0, 14); - SSE_LOOP_WS(x3, x4, if (rv64_zbb) { MAX(x3, x3, x4); } else { + SSE_LOOP_WS(x3, x4, if (cpuext.zbb) { MAX(x3, x3, x4); } else { BGE(x3, x4, 8); MV(x3, x4); }); break; diff --git a/src/dynarec/rv64/dynarec_rv64_660f38.c b/src/dynarec/rv64/dynarec_rv64_660f38.c index d15a5922..658a0531 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f38.c +++ b/src/dynarec/rv64/dynarec_rv64_660f38.c @@ -149,7 +149,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, LH(x3, gback, gdoffset + 2 * (i * 2 + 0)); LH(x4, gback, gdoffset + 2 * (i * 2 + 1)); ADDW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -172,7 +172,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, LH(x3, wback, fixedaddress + 2 * (i * 2 + 0)); LH(x4, wback, fixedaddress + 2 * (i * 2 + 1)); ADDW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -200,7 +200,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, LB(x4, wback, fixedaddress + i * 2 + 1); MUL(x3, x3, x4); ADD(x3, x3, x7); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -500,7 +500,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, MOV64x(x5, 65535); for (int i = 0; i < 4; ++i) { LW(x3, gback, gdoffset + i * 4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, xZR); } else { @@ -517,7 +517,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, } else for (int i = 0; i < 4; ++i) { LW(x3, wback, fixedaddress + i * 4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, xZR); } else { @@ -605,7 +605,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, for (int i = 0; i < 16; ++i) { LB(x3, gback, gdoffset + i); LB(x4, wback, fixedaddress + i); - if (rv64_zbb) + if (cpuext.zbb) MIN(x4, x3, x4); else BLT(x3, x4, 4 + 4); @@ -620,7 +620,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, for (int i = 0; i < 4; ++i) { LW(x3, gback, gdoffset + i * 4); LW(x4, wback, fixedaddress + i * 4); - if (rv64_zbb) + if (cpuext.zbb) MIN(x4, x3, x4); else BLT(x3, x4, 4 + 4); @@ -635,7 +635,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, for (int i = 0; i < 8; ++i) { LHU(x3, gback, gdoffset + i * 2); LHU(x4, wback, fixedaddress + i * 2); - if (rv64_zbb) + if (cpuext.zbb) MINU(x4, x3, x4); else BLTU(x3, x4, 4 + 4); @@ -650,7 +650,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, for (int i = 0; i < 4; ++i) { LWU(x3, gback, gdoffset + i * 4); LWU(x4, wback, fixedaddress + i * 4); - if (rv64_zbb) + if (cpuext.zbb) MINU(x4, x3, x4); else BLTU(x3, x4, 4 + 4); @@ -665,7 +665,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, for (int i = 0; i < 16; ++i) { LB(x3, gback, gdoffset + i); LB(x4, wback, fixedaddress + i); - if (rv64_zbb) + if (cpuext.zbb) MAX(x4, x3, x4); else BLT(x4, x3, 4 + 4); @@ -680,7 +680,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, for (int i = 0; i < 4; ++i) { LW(x3, gback, gdoffset + i * 4); LW(x4, wback, fixedaddress + i * 4); - if (rv64_zbb) + if (cpuext.zbb) MAX(x4, x3, x4); else BLT(x4, x3, 4 + 4); @@ -695,7 +695,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, for (int i = 0; i < 8; ++i) { LHU(x3, gback, gdoffset + i * 2); LHU(x4, wback, fixedaddress + i * 2); - if (rv64_zbb) + if (cpuext.zbb) MAXU(x4, x3, x4); else BLTU(x4, x3, 4 + 4); @@ -710,7 +710,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, for (int i = 0; i < 4; ++i) { LWU(x3, gback, gdoffset + i * 4); LWU(x4, wback, fixedaddress + i * 4); - if (rv64_zbb) + if (cpuext.zbb) MAXU(x4, x3, x4); else BLTU(x4, x3, 4 + 4); @@ -825,10 +825,10 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 1, 0); LHU(x1, ed, fixedaddress); - if (rv64_zbb) { + if (cpuext.zbb) { REV8(x1, x1); SRLI(x1, x1, 48); - } else if (rv64_xtheadbb) { + } else if (cpuext.xtheadbb) { TH_REVW(x1, x1); SRLI(x1, x1, 16); } else { @@ -847,10 +847,10 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, GETGD; SMREAD(); addr = geted(dyn, addr, ninst, nextop, &wback, x3, x2, &fixedaddress, rex, NULL, 1, 0); - if (rv64_zbb) { + if (cpuext.zbb) { REV8(x1, gd); SRLI(x1, x1, 48); - } else if (rv64_xtheadbb) { + } else if (cpuext.xtheadbb) { TH_REVW(x1, gd); SRLI(x1, x1, 16); } else { diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c index 9f74f0bb..d4d2a5b3 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c @@ -92,7 +92,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); LD(x4, ed, fixedaddress); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { v1 = fpu_get_scratch(dyn); VMV_S_X(v1, x4); VECTOR_LOAD_VMASK(0b01, x3, 1); @@ -150,7 +150,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); q0 = fpu_get_scratch(dyn); VSLIDE1DOWN_VX(q0, v0, xZR, VECTOR_UNMASKED); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VECTOR_LOAD_VMASK(0b01, x4, 1); VMERGE_VVM(v0, v1, q0); // implies VMASK } else { @@ -449,32 +449,32 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); IFX (X_ZF) { VAND_VV(v0, q1, q0, VECTOR_UNMASKED); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { // Force the mask element width to 32 vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL2, 1); } VMSGTU_VX(VMASK, v0, xZR, VECTOR_UNMASKED); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL1, 1); } VMV_X_S(x4, VMASK); - if (!rv64_xtheadvector) ANDI(x4, x4, 0b11); + if (!cpuext.xtheadvector) ANDI(x4, x4, 0b11); BNEZ(x4, 8); ORI(xFlags, xFlags, 1 << F_ZF); } IFX (X_CF) { VXOR_VI(v0, q0, 0x1F, VECTOR_UNMASKED); VAND_VV(v0, q1, v0, VECTOR_UNMASKED); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { // Force the mask element width to 32 vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL2, 1); } VMSGTU_VX(VMASK, v0, xZR, VECTOR_UNMASKED); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL1, 1); } VMV_X_S(x4, VMASK); - if (!rv64_xtheadvector) ANDI(x4, x4, 0b11); + if (!cpuext.xtheadvector) ANDI(x4, x4, 0b11); BNEZ(x4, 8); ORI(xFlags, xFlags, 1 << F_CF); } @@ -502,7 +502,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x20: INST_NAME("PMOVSXBW Gx, Ex"); nextop = F8; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETEX_vector(q1, 0, 0, VECTOR_SEW8); GETGX_empty_vector(q0); @@ -526,7 +526,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x21: INST_NAME("PMOVSXBD Gx, Ex"); nextop = F8; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETEX_vector(q1, 0, 0, VECTOR_SEW8); GETGX_empty_vector(q0); @@ -555,7 +555,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x22: INST_NAME("PMOVSXBQ Gx, Ex"); nextop = F8; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETEX_vector(q1, 0, 0, VECTOR_SEW8); GETGX_empty_vector(q0); @@ -585,7 +585,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x23: INST_NAME("PMOVSXWD Gx, Ex"); nextop = F8; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETEX_vector(q1, 0, 0, VECTOR_SEW8); GETGX_empty_vector(q0); @@ -609,7 +609,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x24: INST_NAME("PMOVSXWQ Gx, Ex"); nextop = F8; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETEX_vector(q1, 0, 0, VECTOR_SEW8); GETGX_empty_vector(q0); @@ -637,7 +637,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x25: INST_NAME("PMOVSXDQ Gx, Ex"); nextop = F8; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETEX_vector(q1, 0, 0, VECTOR_SEW8); GETGX_empty_vector(q0); @@ -661,7 +661,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x28: INST_NAME("PMULDQ Gx, Ex"); nextop = F8; - if (rv64_vlen >= 256) { + if (cpuext.vlen >= 32) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); GETGX_vector(v0, 1, VECTOR_SEW64); GETEX_vector(v1, 0, 0, VECTOR_SEW64); @@ -716,7 +716,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x30: INST_NAME("PMOVZXBW Gx, Ex"); nextop = F8; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETEX_vector(q1, 0, 0, VECTOR_SEW8); GETGX_empty_vector(q0); @@ -740,7 +740,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x31: INST_NAME("PMOVZXBD Gx, Ex"); nextop = F8; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETEX_vector(q1, 0, 0, VECTOR_SEW8); GETGX_empty_vector(q0); @@ -768,7 +768,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x32: INST_NAME("PMOVZXBQ Gx, Ex"); nextop = F8; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETEX_vector(q1, 0, 0, VECTOR_SEW8); GETGX_empty_vector(q0); @@ -798,7 +798,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x33: INST_NAME("PMOVZXWD Gx, Ex"); nextop = F8; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETEX_vector(q1, 0, 0, VECTOR_SEW8); GETGX_empty_vector(q0); @@ -822,7 +822,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x34: INST_NAME("PMOVZXWQ Gx, Ex"); nextop = F8; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETEX_vector(q1, 0, 0, VECTOR_SEW8); GETGX_empty_vector(q0); @@ -850,7 +850,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x35: INST_NAME("PMOVZXDQ Gx, Ex"); nextop = F8; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETEX_vector(q1, 0, 0, VECTOR_SEW8); GETGX_empty_vector(q0); @@ -951,7 +951,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); } else if (u8 > 16) { d0 = fpu_get_scratch(dyn); - if (rv64_vlen >= 256) { + if (cpuext.vlen >= 32) { // clear high bits before slidedown! vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 2); VXOR_VV(d0, d0, d0, VECTOR_UNMASKED); @@ -966,7 +966,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i v1 = fpu_get_scratch(dyn); VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); VSLIDEUP_VI(v0, q0, 16 - u8, VECTOR_UNMASKED); - if (rv64_vlen >= 256) { + if (cpuext.vlen >= 32) { // clear high bits before slidedown! d0 = fpu_get_scratch(dyn); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 2); @@ -994,7 +994,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i ed = TO_NAT((nextop & 7) + (rex.b << 3)); u8 = F8; if (u8 & (rex.w ? 1 : 3)) { - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { ADDI(x4, xZR, (u8 & (rex.w ? 1 : 3))); VEXT_X_V(ed, q0, x4); } else { @@ -1005,13 +1005,13 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } } else { VMV_X_S(ed, q0); - if (!rv64_xtheadvector && !rex.w) ZEROUP(ed); + if (!cpuext.xtheadvector && !rex.w) ZEROUP(ed); } } else { addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 0, 1); u8 = F8; if (u8 & (rex.w ? 1 : 3)) { - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { ADDI(x4, xZR, (u8 & (rex.w ? 1 : 3))); VEXT_X_V(x5, q0, x4); } else { @@ -1078,7 +1078,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); GETGD; GETEX_vector(q0, 0, 0, VECTOR_SEW64); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); ADDI(x4, xZR, 63); VSRL_VX(v0, q0, x4, VECTOR_UNMASKED); @@ -1208,7 +1208,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); VFNCVT_F_F_W(d0, d1, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL1, 1); - if (!rv64_xtheadvector) VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); + if (!cpuext.xtheadvector) VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); VMV_X_S(x4, d0); VMV_S_X(v0, x4); break; @@ -1350,7 +1350,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETEX_vector(q1, 0, 0, VECTOR_SEW16); d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); d1 = fpu_get_scratch(dyn); - if (rv64_vlen >= 256) { + if (cpuext.vlen >= 32) { vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 2); // double the vl for slideup. VMV_V_V(d0, q0); VSLIDEUP_VI(d0, q1, 8, VECTOR_UNMASKED); // splice q0 and q1 here! @@ -1388,7 +1388,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETEX_vector(q1, 0, 0, VECTOR_SEW16); d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); d1 = fpu_get_scratch(dyn); - if (rv64_vlen >= 256) { + if (cpuext.vlen >= 32) { vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 2); // double the vl for slideup. if (q0 == q1) { VMV_V_V(d0, q0); @@ -1452,7 +1452,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETEX_vector(q1, 0, 0, VECTOR_SEW32); d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); d1 = fpu_get_scratch(dyn); - if (rv64_vlen >= 256) { + if (cpuext.vlen >= 32) { vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 2); // double the vl for slideup. VMV_V_V(d0, q0); VSLIDEUP_VI(d0, q1, 4, VECTOR_UNMASKED); // splice q0 and q1 here! @@ -1498,7 +1498,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); q0 = fpu_get_scratch(dyn); VSLIDE1DOWN_VX(q0, v0, xZR, VECTOR_UNMASKED); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VECTOR_LOAD_VMASK(0b01, x4, 1); VMERGE_VVM(v0, v1, q0); // implies VMASK } else { @@ -1552,7 +1552,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETGX_empty_vector(q0); v0 = fpu_get_scratch(dyn); tmp64u0 = F8; - if (rv64_xtheadvector) { // lack of vrgatherei16.vv + if (cpuext.xtheadvector) { // lack of vrgatherei16.vv v1 = fpu_get_scratch(dyn); if (tmp64u0 == 0) { VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); @@ -1714,7 +1714,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); } else { d0 = fpu_get_scratch(dyn); - if (rv64_vlen >= 256) { + if (cpuext.vlen >= 32) { // clear high bits before slidedown! vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 2); VXOR_VV(d0, d0, d0, VECTOR_UNMASKED); @@ -1794,7 +1794,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); // no more scratches! VMV_V_V(v0, q0); if (q1 & 1) VMV_V_V(d1, q1); - VMV_V_I(VMASK, rv64_xtheadvector ? 1 : 0b0101); + VMV_V_I(VMASK, cpuext.xtheadvector ? 1 : 0b0101); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL2, 2); VSLIDEUP_VI(v0, (q1 & 1) ? d1 : q1, 2, VECTOR_UNMASKED); VCOMPRESS_VM(d0, v0, VMASK); @@ -1933,7 +1933,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VMV_V_V(v1, v0); } if (u8 & 1) { - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { ADDI(x5, xZR, 1); VEXT_X_V(x4, v0, x5); } else { @@ -2003,7 +2003,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i if (MODREG) { q1 = sse_get_reg_empty_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3)); VMV_X_S(x4, q0); - if (!rv64_xtheadvector) { + if (!cpuext.xtheadvector) { VXOR_VV(q1, q1, q1, VECTOR_UNMASKED); } VMV_S_X(q1, x4); @@ -2020,7 +2020,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETGD; GETEX_vector(q0, 0, 0, VECTOR_SEW8); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL8); VSRL_VI(v0, q0, 7, VECTOR_UNMASKED); // Force the element width to 1bit @@ -2031,7 +2031,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); VMV_X_S(gd, VMASK); - if (!rv64_xtheadvector) { ZEXTH(gd, gd); } + if (!cpuext.xtheadvector) { ZEXTH(gd, gd); } break; case 0xD8: case 0xD9: @@ -2102,7 +2102,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETGX_vector(q0, 1, VECTOR_SEW8); GETEX_vector(q1, 0, 0, VECTOR_SEW8); - if (rv64_xtheadvector) { // lack of vaddu.vv + if (cpuext.xtheadvector) { // lack of vaddu.vv v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); VWADDU_VV(v0, q0, q1, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL2, 2); @@ -2162,7 +2162,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); GETGX_vector(q0, 1, VECTOR_SEW16); GETEX_vector(q1, 0, 0, VECTOR_SEW16); - if (rv64_xtheadvector) { // lack of vaddu.vv + if (cpuext.xtheadvector) { // lack of vaddu.vv v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); VWADDU_VV(v0, q0, q1, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL2, 2); @@ -2199,7 +2199,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETGX_empty_vector(v0); d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); VMV_V_V(d0, v1); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 0.5); ADDI(x4, xZR, 1); // RTZ FSRM(x4, x4); @@ -2335,7 +2335,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0xF4: INST_NAME("PMULUDQ Gx, Ex"); nextop = F8; - if (rv64_vlen >= 256) { + if (cpuext.vlen >= 32) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); GETGX_vector(v0, 1, VECTOR_SEW64); GETEX_vector(v1, 0, 0, VECTOR_SEW64); diff --git a/src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c b/src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c index 4ab73160..6bbc746d 100644 --- a/src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c @@ -59,7 +59,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, GETGX(); GETVX(); GETEX(x2, 0, 1); - if (rv64_xtheadmempair) { + if (cpuext.xtheadmempair) { ADD(x1, vback, vxoffset); TH_LDD(x3, x4, x1, 0); } else { @@ -67,7 +67,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, LD(x4, vback, vxoffset + 8); } LWU(x5, wback, fixedaddress); - if (rv64_xtheadmempair) { + if (cpuext.xtheadmempair) { ADDI(x1, gback, gdoffset); TH_SDD(x3, x4, x1, 0); } else { diff --git a/src/dynarec/rv64/dynarec_rv64_d9.c b/src/dynarec/rv64/dynarec_rv64_d9.c index a5fb073f..852bd0ad 100644 --- a/src/dynarec/rv64/dynarec_rv64_d9.c +++ b/src/dynarec/rv64/dynarec_rv64_d9.c @@ -133,7 +133,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ANDI(x5, x5, 7); // (emu->top + i)&7 } // load x2 with ST0 anyway, for sign extraction - if (rv64_zba) + if (cpuext.zba) SH3ADD(x1, x5, xEmu); else { SLLI(x5, x5, 3); diff --git a/src/dynarec/rv64/dynarec_rv64_emit_math.c b/src/dynarec/rv64/dynarec_rv64_emit_math.c index df5c921d..cc6ecfdb 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_math.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_math.c @@ -33,7 +33,7 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX (X_CF) { if (rex.w) { ZEXTW2(s5, s1); - if (rv64_zba) // lo + if (cpuext.zba) // lo ADDUW(s5, s2, s5); else { ZEXTW2(s4, s2); @@ -64,7 +64,7 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SDxw(s1, xEmu, offsetof(x64emu_t, res)); } IFX (X_AF | X_OF) { - if (rv64_zbb) { + if (cpuext.zbb) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) } else { NOT(s5, s1); // s5 = ~res @@ -128,7 +128,7 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i IFX (X_CF) { if (rex.w) { ZEXTW2(s5, s1); - if (rv64_zba) // lo + if (cpuext.zba) // lo ADDUW(s5, s2, s5); else { ZEXTW2(s4, s2); @@ -167,7 +167,7 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i SDxw(s1, xEmu, offsetof(x64emu_t, res)); } IFX (X_AF | X_OF) { - if (rv64_zbb) { + if (cpuext.zbb) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) } else { NOT(s2, s1); // s2 = ~res @@ -222,7 +222,7 @@ void emit_add16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SW(s1, xEmu, offsetof(x64emu_t, res)); } IFX (X_AF | X_OF) { - if (rv64_zbb) { + if (cpuext.zbb) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) } else { NOT(s5, s1); // s5 = ~res @@ -280,7 +280,7 @@ void emit_add8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i ADD(s1, s1, s2); IFX (X_AF | X_OF) { - if (rv64_zbb) { + if (cpuext.zbb) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) } else { NOT(s2, s1); // s4 = ~res @@ -343,7 +343,7 @@ void emit_add8c(dynarec_rv64_t* dyn, int ninst, int s1, int c, int s2, int s3, i ADDI(s1, s1, c); IFX (X_AF | X_OF) { - if (rv64_zbb) { + if (cpuext.zbb) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) } else { NOT(s2, s1); // s2 = ~res @@ -594,7 +594,7 @@ void emit_inc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) SB(s1, xEmu, offsetof(x64emu_t, res)); } IFX (X_AF | X_OF) { - if (rv64_zbb) { + if (cpuext.zbb) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) } else { NOT(s2, s1); // s2 = ~res @@ -703,7 +703,7 @@ void emit_inc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SDxw(s1, xEmu, offsetof(x64emu_t, res)); } IFX (X_AF | X_OF) { - if (rv64_zbb) { + if (cpuext.zbb) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) } else { NOT(s2, s1); // s2 = ~res @@ -813,7 +813,7 @@ void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SH(s1, xEmu, offsetof(x64emu_t, res)); } IFX (X_AF | X_OF) { - if (rv64_zbb) { + if (cpuext.zbb) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) } else { NOT(s2, s1); // s2 = ~res @@ -964,7 +964,7 @@ void emit_adc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i SW(s1, xEmu, offsetof(x64emu_t, res)); } IFX (X_AF | X_OF) { - if (rv64_zbb) { + if (cpuext.zbb) { ANDN(s3, s4, s1); // s3 = ~res & (op1 | op2) } else { NOT(s2, s1); // s2 = ~res @@ -1288,7 +1288,7 @@ void emit_adc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SW(s1, xEmu, offsetof(x64emu_t, res)); } IFX (X_AF | X_OF) { - if (rv64_zbb) { + if (cpuext.zbb) { ANDN(s3, s4, s1); // s3 = ~res & (op1 | op2) } else { NOT(s2, s1); // s2 = ~res @@ -1340,7 +1340,7 @@ void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX (X_CF) { if (rex.w) { ZEXTW2(s5, s1); - if (rv64_zba) + if (cpuext.zba) ADDUW(s5, s2, s5); else { ZEXTW2(s4, s2); @@ -1377,7 +1377,7 @@ void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SDxw(s1, xEmu, offsetof(x64emu_t, res)); } IFX (X_AF | X_OF) { - if (rv64_zbb) { + if (cpuext.zbb) { ANDN(s3, s4, s1); // s3 = ~res & (op1 | op2) } else { NOT(s3, s1); // s2 = ~res diff --git a/src/dynarec/rv64/dynarec_rv64_emit_shift.c b/src/dynarec/rv64/dynarec_rv64_emit_shift.c index 1bba5289..69dd9a02 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_shift.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_shift.c @@ -950,7 +950,7 @@ void emit_rol32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SET_DFNONE(); } - if (rv64_zbb) { + if (cpuext.zbb) { if (rex.w) { ROL(s1, s1, s2); } else { @@ -1004,7 +1004,7 @@ void emit_ror32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SET_DFNONE(); } - if (rv64_zbb) { + if (cpuext.zbb) { if (rex.w) { ROR(s1, s1, s2); } else { @@ -1093,9 +1093,9 @@ void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, } else IFXORNAT (X_ALL) { SET_DFNONE(); } - if (rv64_zbb) { + if (cpuext.zbb) { RORIxw(s1, s1, (rex.w ? 64 : 32) - c); - } else if (rv64_xtheadbb) { + } else if (cpuext.xtheadbb) { TH_SRRIxw(s1, s1, (rex.w ? 64 : 32) - c); } else { SLLIxw(s3, s1, c); @@ -1179,9 +1179,9 @@ void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, } else IFXORNAT (X_ALL) { SET_DFNONE(); } - if (rv64_zbb) { + if (cpuext.zbb) { RORIxw(s1, s1, c); - } else if (rv64_xtheadbb) { + } else if (cpuext.xtheadbb) { TH_SRRIxw(s1, s1, c); } else { SRLIxw(s3, s1, c); @@ -1324,7 +1324,7 @@ void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin if (c == 1) SRLI(s4, s1, 15); } - if (rv64_zbb) { + if (cpuext.zbb) { RORIW(s1, s1, c); } else { SRLI(s5, s1, c); @@ -1582,7 +1582,7 @@ void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin if (c == 1) SRLI(s5, s1, 15); } - if (rv64_zbb) { + if (cpuext.zbb) { RORIW(s1, s1, 32 - c); } else { SLLI(s3, s1, c); diff --git a/src/dynarec/rv64/dynarec_rv64_f20f_vector.c b/src/dynarec/rv64/dynarec_rv64_f20f_vector.c index 686ee717..c141376f 100644 --- a/src/dynarec/rv64/dynarec_rv64_f20f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_f20f_vector.c @@ -54,7 +54,7 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i ed = (nextop & 7) + (rex.b << 3); v0 = sse_get_reg_vector(dyn, ninst, x1, gd, 1, VECTOR_SEW64); v1 = sse_get_reg_vector(dyn, ninst, x1, ed, 0, VECTOR_SEW64); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VECTOR_LOAD_VMASK(0b01, x4, 1); VMERGE_VVM(v0, v0, v1); // implies VMASK } else { @@ -66,7 +66,7 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i v0 = sse_get_reg_empty_vector(dyn, ninst, x1, gd); addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); LD(x4, ed, fixedaddress); - if (!rv64_xtheadvector) VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); + if (!cpuext.xtheadvector) VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); VMV_S_X(v0, x4); } break; @@ -79,7 +79,7 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i if (MODREG) { ed = (nextop & 7) + (rex.b << 3); d0 = sse_get_reg_vector(dyn, ninst, x1, ed, 1, VECTOR_SEW64); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VECTOR_LOAD_VMASK(0b01, x4, 1); VMERGE_VVM(v0, v0, v1); // implies VMASK } else { @@ -125,7 +125,7 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i FCVTDW(v0, ed, RD_RNE); SET_ELEMENT_WIDTH(x3, VECTOR_SEW64, 1); } - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { v1 = fpu_get_scratch(dyn); VFMV_S_F(v1, v0); VECTOR_LOAD_VMASK(0b01, x4, 1); @@ -231,7 +231,7 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i FMVDX(d0, xZR); VMFLT_VF(VMASK, v1, d0, VECTOR_UNMASKED); VFSGNJN_VV(d1, d1, d1, VECTOR_MASKED); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VECTOR_LOAD_VMASK(0b01, x4, 1); VMERGE_VVM(v0, v0, d1); // implies VMASK } else { @@ -304,14 +304,14 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i if (v1 & 1 || v0 == v1 + 1) { d1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); VMV_V_V(d1, v1); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VFNCVT_F_F_W(d0, d1, VECTOR_MASKED); VMERGE_VVM(v0, v0, d0); // implies VMASK } else { VFNCVT_F_F_W(v0, d1, VECTOR_MASKED); } } else { - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VFNCVT_F_F_W(d0, v1, VECTOR_MASKED); VMERGE_VVM(v0, v0, d0); // implies VMASK } else { @@ -370,7 +370,7 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i MARK; FMVD(d0, d1); MARK2; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VFMV_S_F(d0, d0); VECTOR_LOAD_VMASK(0b0001, x4, 1); VMERGE_VVM(v0, v0, d0); // implies VMASK @@ -430,7 +430,7 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i MARK; FMVD(d0, d1); MARK2; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VFMV_S_F(d0, d0); VECTOR_LOAD_VMASK(0b0001, x4, 1); VMERGE_VVM(v0, v0, d0); // implies VMASK @@ -541,7 +541,7 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i MARK; } NEG(x2, x2); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { v0 = fpu_get_scratch(dyn); VMV_S_X(v0, x2); VECTOR_LOAD_VMASK(0b01, x4, 1); @@ -569,7 +569,7 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VFNCVT_X_F_W(d0, d1, VECTOR_UNMASKED); x87_restoreround(dyn, ninst, u8); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL1, 1); - if (!rv64_xtheadvector) VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); + if (!cpuext.xtheadvector) VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); VMV_X_S(x4, d0); VMV_S_X(v0, x4); break; diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c index 19a8b4b9..50e19353 100644 --- a/src/dynarec/rv64/dynarec_rv64_f30f.c +++ b/src/dynarec/rv64/dynarec_rv64_f30f.c @@ -466,7 +466,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int MOV32w(gd, 0); B_NEXT_nocond; MARK; - if (rv64_zbb) { + if (cpuext.zbb) { CPOPxw(gd, ed); } else { if (rex.w) { diff --git a/src/dynarec/rv64/dynarec_rv64_f30f_vector.c b/src/dynarec/rv64/dynarec_rv64_f30f_vector.c index 900831f7..fa511793 100644 --- a/src/dynarec/rv64/dynarec_rv64_f30f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_f30f_vector.c @@ -56,7 +56,7 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i ed = (nextop & 7) + (rex.b << 3); v0 = sse_get_reg_vector(dyn, ninst, x1, gd, 1, VECTOR_SEW32); v1 = sse_get_reg_vector(dyn, ninst, x1, ed, 0, VECTOR_SEW32); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VECTOR_LOAD_VMASK(0b0001, x4, 1); VMERGE_VVM(v0, v0, v1); // implies VMASK } else { @@ -81,7 +81,7 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i if (MODREG) { ed = (nextop & 7) + (rex.b << 3); d0 = sse_get_reg_vector(dyn, ninst, x1, ed, 1, VECTOR_SEW32); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VECTOR_LOAD_VMASK(0b0001, x4, 1); VMERGE_VVM(v0, v0, v1); // implies VMASK } else { @@ -150,7 +150,7 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETED(0); FCVTSW(v0, ed, RD_RNE); } - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { v1 = fpu_get_scratch(dyn); VFMV_S_F(v1, v0); VECTOR_LOAD_VMASK(0b0001, x4, 1); @@ -331,7 +331,7 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VECTOR_LOAD_VMASK(0b0001, x4, 1); VFWCVT_F_F_V(d0, v1, VECTOR_MASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VMERGE_VVM(v0, v0, d0); // implies VMASK } else { VMV_X_S(x4, d0); @@ -345,7 +345,7 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); GETEX_vector(v1, 0, 0, VECTOR_SEW32); GETGX_empty_vector(v0); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { ADDI(x4, xZR, 1); // RTZ FSRM(x4, x4); VFCVT_X_F_V(v0, v1, VECTOR_UNMASKED); @@ -405,7 +405,7 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i MARK; FMVS(d0, d1); MARK2; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VFMV_S_F(d0, d0); VECTOR_LOAD_VMASK(0b0001, x4, 1); VMERGE_VVM(v0, v0, d0); // implies VMASK @@ -465,7 +465,7 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i MARK; FMVS(d0, d1); MARK2; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VFMV_S_F(d0, d0); VECTOR_LOAD_VMASK(0b0001, x4, 1); VMERGE_VVM(v0, v0, d0); // implies VMASK @@ -530,7 +530,7 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); LD(x4, ed, fixedaddress); } - if (!rv64_xtheadvector) VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); + if (!cpuext.xtheadvector) VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); VMV_S_X(v0, x4); break; case 0x7F: @@ -616,7 +616,7 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i MARK; } NEG(x2, x2); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { v0 = fpu_get_scratch(dyn); VMV_S_X(v0, x2); VECTOR_LOAD_VMASK(0b0001, x4, 1); diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index 79868e55..117045d0 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -86,10 +86,10 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, if (!(sib >> 6)) { ADD(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg)); if (!IS_GPR(ret)) SCRATCH_USAGE(1); - } else if (rv64_zba) { + } else if (cpuext.zba) { SHxADD(ret, TO_NAT(sib_reg), sib >> 6, TO_NAT(sib_reg2)); if (!IS_GPR(ret)) SCRATCH_USAGE(1); - } else if (rv64_xtheadba) { + } else if (cpuext.xtheadba) { TH_ADDSL(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg), sib >> 6); if (!IS_GPR(ret)) SCRATCH_USAGE(1); } else { @@ -160,10 +160,10 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, if (!(sib >> 6)) { ADD(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg)); if (!IS_GPR(ret)) SCRATCH_USAGE(1); - } else if (rv64_zba) { + } else if (cpuext.zba) { SHxADD(ret, TO_NAT(sib_reg), sib >> 6, TO_NAT(sib_reg2)); if (!IS_GPR(ret)) SCRATCH_USAGE(1); - } else if (rv64_xtheadba) { + } else if (cpuext.xtheadba) { TH_ADDSL(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg), sib >> 6); if (!IS_GPR(ret)) SCRATCH_USAGE(1); } else { @@ -183,10 +183,10 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, if (!(sib >> 6)) { ADD(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg)); if (!IS_GPR(ret)) SCRATCH_USAGE(1); - } else if (rv64_zba) { + } else if (cpuext.zba) { SHxADD(ret, TO_NAT(sib_reg), sib >> 6, TO_NAT(sib_reg2)); if (!IS_GPR(ret)) SCRATCH_USAGE(1); - } else if (rv64_xtheadba) { + } else if (cpuext.xtheadba) { TH_ADDSL(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg), sib >> 6); if (!IS_GPR(ret)) SCRATCH_USAGE(1); } else { @@ -587,7 +587,7 @@ void jump_to_epilog_fast(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst) static int indirect_lookup(dynarec_rv64_t* dyn, int ninst, int is32bits, int s1, int s2) { MAYUSE(dyn); - if (rv64_xtheadbb && rv64_xtheadmemidx) { + if (cpuext.xtheadbb && cpuext.xtheadmemidx) { if (!is32bits) { SRLI(s1, xRIP, 48); BNEZ_safe(s1, (intptr_t)dyn->jmp_next - (intptr_t)dyn->block); @@ -1115,7 +1115,7 @@ void x87_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1, int s2, in #endif ADDI(s3, s2, dyn->e.x87cache[i]); // unadjusted count, as it's relative to real top ANDI(s3, s3, 7); // (emu->top + st)&7 - if (rv64_zba) + if (cpuext.zba) SH3ADD(s1, s3, xEmu); else { SLLI(s1, s3, 3); @@ -1227,7 +1227,7 @@ static void x87_reflectcache(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int if (dyn->e.x87cache[i] != -1) { ADDI(s3, s2, dyn->e.x87cache[i]); ANDI(s3, s3, 7); // (emu->top + i)&7 - if (rv64_zba) + if (cpuext.zba) SH3ADD(s1, s3, xEmu); else { SLLI(s1, s3, 3); @@ -1310,7 +1310,7 @@ int x87_get_cache(dynarec_rv64_t* dyn, int ninst, int populate, int s1, int s2, ADDI(s2, s2, a); ANDI(s2, s2, 7); } - if (rv64_zba) + if (cpuext.zba) SH3ADD(s1, s2, xEmu); else { SLLI(s2, s2, 3); @@ -1362,7 +1362,7 @@ void x87_refresh(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st) ADDI(s2, s2, a); ANDI(s2, s2, 7); // (emu->top + i)&7 } - if (rv64_zba) + if (cpuext.zba) SH3ADD(s1, s2, xEmu); else { SLLI(s2, s2, 3); @@ -1405,7 +1405,7 @@ void x87_forget(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st) ADDI(s2, s2, a); ANDI(s2, s2, 7); // (emu->top + i)&7 } - if (rv64_zba) + if (cpuext.zba) SH3ADD(s1, s2, xEmu); else { SLLI(s2, s2, 3); @@ -1449,7 +1449,7 @@ void x87_reget_st(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st) ADDI(s2, s2, a); AND(s2, s2, 7); } - if (rv64_zba) + if (cpuext.zba) SH3ADD(s1, s2, xEmu); else { SLLI(s2, s2, 3); @@ -1474,7 +1474,7 @@ void x87_reget_st(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st) int a = st - dyn->e.x87stack; ADDI(s2, s2, a); ANDI(s2, s2, 7); // (emu->top + i)&7 - if (rv64_zba) + if (cpuext.zba) SH3ADD(s1, s2, xEmu); else { SLLI(s2, s2, 3); @@ -1509,7 +1509,7 @@ void x87_free(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int st) } ANDI(s2, s2, 7); // (emu->top + i)&7 } - if (rv64_zba) + if (cpuext.zba) SH3ADD(s1, s2, xEmu); else { SLLI(s2, s2, 3); @@ -2057,7 +2057,7 @@ static void sse_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1) } for (int i = 0; i < 16; ++i) if (is_avx_zero(dyn, ninst, i)) { - if (rv64_xtheadmempair) { + if (cpuext.xtheadmempair) { ADDI(s1, xEmu, offsetof(x64emu_t, ymm[i])); TH_SDD(xZR, xZR, s1, 0); } else { @@ -2091,7 +2091,7 @@ static void sse_reflectcache(dynarec_rv64_t* dyn, int ninst, int s1) if (dyn->ymm_zero) for (int i = 0; i < 16; ++i) if (is_avx_zero(dyn, ninst, i)) { - if (rv64_xtheadmempair) { + if (cpuext.xtheadmempair) { ADDI(s1, xEmu, offsetof(x64emu_t, ymm[i])); TH_SDD(xZR, xZR, s1, 0); } else { @@ -2104,7 +2104,7 @@ static void sse_reflectcache(dynarec_rv64_t* dyn, int ninst, int s1) void sse_reflect_reg(dynarec_rv64_t* dyn, int ninst, int s1, int a) { if (is_avx_zero(dyn, ninst, a)) { - if (rv64_xtheadmempair) { + if (cpuext.xtheadmempair) { ADDI(s1, xEmu, offsetof(x64emu_t, ymm[a])); TH_SDD(xZR, xZR, s1, 0); } else { @@ -2151,7 +2151,7 @@ void fpu_pushcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07) else FSD(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i])); if (is_avx_zero(dyn, ninst, i)) { - if (rv64_xtheadmempair) { + if (cpuext.xtheadmempair) { ADDI(s1, xEmu, offsetof(x64emu_t, ymm[i])); TH_SDD(xZR, xZR, s1, 0); } else { @@ -2201,7 +2201,7 @@ void fpu_pushcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07) VSE_V(dyn->e.ssecache[i].reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1); } if (is_avx_zero(dyn, ninst, i)) { - if (rv64_xtheadmempair) { + if (cpuext.xtheadmempair) { ADDI(s1, xEmu, offsetof(x64emu_t, ymm[i])); TH_SDD(xZR, xZR, s1, 0); } else { @@ -2487,7 +2487,7 @@ static void loadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, int } *s3_top += a; *s2_val = 0; - if (rv64_zba) + if (cpuext.zba) SH3ADD(s2, s3, xEmu); else { SLLI(s2, s3, 3); @@ -2563,7 +2563,7 @@ static void unloadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, i ANDI(s3, s3, 7); } *s3_top += a; - if (rv64_zba) + if (cpuext.zba) SH3ADD(s2, s3, xEmu); else { SLLI(s2, s3, 3); @@ -2868,7 +2868,7 @@ void emit_pf(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4) { MAYUSE(dyn); MAYUSE(ninst); - if (rv64_zbb) { + if (cpuext.zbb) { ANDI(s3, s1, 0xFF); CPOPW(s3, s3); } else { @@ -2963,11 +2963,11 @@ int vector_vsetvli(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int vlmul, f if (sew == VECTOR_SEWANY) sew = VECTOR_SEW8; uint32_t vl = (int)((float)(16 >> sew) * multiple); - uint32_t vtypei = (sew << (3 - !!rv64_xtheadvector)) | vlmul; + uint32_t vtypei = (sew << (3 - !!cpuext.xtheadvector)) | vlmul; if (dyn->inst_sew == VECTOR_SEWNA || dyn->inst_vl == 0 || dyn->inst_sew != sew || dyn->inst_vl != vl || dyn->inst_vlmul != vlmul) { - if (vl == (rv64_vlen >> (3 + sew - vlmul))) { + if (vl == (cpuext.vlen >> (sew - vlmul))) { VSETVLI(s1, xZR, vtypei); - } else if (vl <= 31 && !rv64_xtheadvector) { + } else if (vl <= 31 && !cpuext.xtheadvector) { VSETIVLI(xZR, vl, vtypei); } else { ADDI(s1, xZR, vl); @@ -3004,7 +3004,7 @@ void vector_loadmask(dynarec_rv64_t* dyn, int ninst, int vreg, uint64_t imm, int #if STEP > 1 uint8_t sew = dyn->inst_sew; uint8_t vlmul = dyn->inst_vlmul; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { if (sew == VECTOR_SEW64 && vlmul == VECTOR_LMUL1) { switch (imm) { case 0: @@ -3244,7 +3244,7 @@ void avx_purge_ymm(dynarec_rv64_t* dyn, int ninst, uint16_t mask, int s1) MESSAGE(LOG_NONE, "Purge YMM mask=%04x --------\n", mask); do_something = 1; } - if (rv64_xtheadmempair) { + if (cpuext.xtheadmempair) { ADDI(s1, xEmu, offsetof(x64emu_t, ymm[i])); TH_SDD(xZR, xZR, s1, 0); } else { diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 4f923318..40857156 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -240,7 +240,7 @@ wback = TO_NAT(wback & 3); \ } \ if (wb2) { \ - if (rv64_xtheadbb) { \ + if (cpuext.xtheadbb) { \ TH_EXTU(i, wback, 15, 8); \ } else { \ SRLI(i, wback, wb2); \ @@ -269,7 +269,7 @@ wback = TO_NAT(wback & 3); \ } \ if (wb2) { \ - if (rv64_xtheadbb) { \ + if (cpuext.xtheadbb) { \ TH_EXTU(i, wback, 15, 8); \ } else { \ SRLI(i, wback, wb2); \ @@ -323,7 +323,7 @@ wback = TO_NAT(wback & 3); \ } \ if (wb2) { \ - if (rv64_xtheadbb) { \ + if (cpuext.xtheadbb) { \ TH_EXTU(i, wback, 15, 8); \ } else { \ MV(i, wback); \ @@ -354,7 +354,7 @@ } \ gd = i; \ if (gb2) { \ - if (rv64_xtheadbb) { \ + if (cpuext.xtheadbb) { \ TH_EXTU(gd, gb1, 15, 8); \ } else { \ SRLI(gd, gb1, 8); \ @@ -920,7 +920,7 @@ #define SET_FLAGS_NEZ(reg, F, scratch) \ do { \ - if (rv64_xtheadcondmov) { \ + if (cpuext.xtheadcondmov) { \ ORI(scratch, xFlags, 1 << F); \ TH_MVNEZ(xFlags, scratch, reg); \ } else { \ @@ -931,7 +931,7 @@ #define SET_FLAGS_EQZ(reg, F, scratch) \ do { \ - if (rv64_xtheadcondmov) { \ + if (cpuext.xtheadcondmov) { \ ORI(scratch, xFlags, 1 << F); \ TH_MVEQZ(xFlags, scratch, reg); \ } else { \ @@ -942,7 +942,7 @@ #define SET_FLAGS_LTZ(reg, F, scratch1, scratch2) \ do { \ - if (rv64_xtheadcondmov) { \ + if (cpuext.xtheadcondmov) { \ SLT(scratch1, reg, xZR); \ ORI(scratch2, xFlags, 1 << F); \ TH_MVNEZ(xFlags, scratch2, scratch1); \ @@ -1894,7 +1894,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, } \ IFX (X_CF | X_PF | X_ZF | X_PEND) { \ MOV32w(s2, 0b01000101); \ - if (rv64_zbb) { \ + if (cpuext.zbb) { \ ANDN(xFlags, xFlags, s2); \ } else { \ NOT(s3, s2); \ @@ -1934,7 +1934,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, ADDIW(reg, s, -1); #define FAST_8BIT_OPERATION(dst, src, s1, OP) \ - if (MODREG && (rv64_zbb || rv64_xtheadbb) && !dyn->insts[ninst].x64.gen_flags) { \ + if (MODREG && (cpuext.zbb || cpuext.xtheadbb) && !dyn->insts[ninst].x64.gen_flags) { \ if (rex.rex) { \ wb = TO_NAT((nextop & 7) + (rex.b << 3)); \ wb2 = 0; \ @@ -1950,13 +1950,13 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, } \ if (src##2) { ANDI(s1, src, 0xf00); } \ SLLI(s1, (src##2 ? s1 : src), 64 - src##2 - 8); \ - if (rv64_zbb) { \ + if (cpuext.zbb) { \ RORI(dst, dst, 8 + dst##2); \ } else { \ TH_SRRI(dst, dst, 8 + dst##2); \ } \ OP; \ - if (rv64_zbb) { \ + if (cpuext.zbb) { \ RORI(dst, dst, 64 - 8 - dst##2); \ } else { \ TH_SRRI(dst, dst, 64 - 8 - dst##2); \ @@ -1969,17 +1969,17 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, } #define FAST_16BIT_OPERATION(dst, src, s1, OP) \ - if (MODREG && (rv64_zbb || rv64_xtheadbb) && !dyn->insts[ninst].x64.gen_flags) { \ + if (MODREG && (cpuext.zbb || cpuext.xtheadbb) && !dyn->insts[ninst].x64.gen_flags) { \ gd = TO_NAT(((nextop & 0x38) >> 3) + (rex.r << 3)); \ ed = TO_NAT((nextop & 7) + (rex.b << 3)); \ SLLI(s1, src, 64 - 16); \ - if (rv64_zbb) { \ + if (cpuext.zbb) { \ RORI(dst, dst, 16); \ } else { \ TH_SRRI(dst, dst, 16); \ } \ OP; \ - if (rv64_zbb) { \ + if (cpuext.zbb) { \ RORI(dst, dst, 64 - 16); \ } else { \ TH_SRRI(dst, dst, 64 - 16); \ diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index f541e0a5..cb8d0c39 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -35,9 +35,9 @@ // ZERO the upper part, compatible to zba, xtheadbb, and rv64gc #define ZEXTW2(rd, rs1) \ do { \ - if (rv64_zba) { \ + if (cpuext.zba) { \ ZEXTW(rd, rs1); \ - } else if (rv64_xtheadbb) { \ + } else if (cpuext.xtheadbb) {\ TH_EXTU(rd, rs1, 31, 0); \ } else { \ SLLI(rd, rs1, 32); \ @@ -251,19 +251,19 @@ #define SGTU(rd, rs1, rs2) SLTU(rd, rs2, rs1); #define SLEU(rd, rs1, rs2) SGEU(rd, rs2, rs1); -#define MVEQ(rd, rs1, rs2, rs3) \ - if (rv64_xtheadcondmov && (rs2 == xZR || rs3 == xZR)) { \ - TH_MVEQZ(rd, rs1, ((rs2 == xZR) ? rs3 : rs2)); \ - } else { \ - BNE(rs2, rs3, 8); \ - MV(rd, rs1); \ +#define MVEQ(rd, rs1, rs2, rs3) \ + if (cpuext.xtheadcondmov && (rs2 == xZR || rs3 == xZR)) { \ + TH_MVEQZ(rd, rs1, ((rs2 == xZR) ? rs3 : rs2)); \ + } else { \ + BNE(rs2, rs3, 8); \ + MV(rd, rs1); \ } -#define MVNE(rd, rs1, rs2, rs3) \ - if (rv64_xtheadcondmov && (rs2 == xZR || rs3 == xZR)) { \ - TH_MVNEZ(rd, rs1, ((rs2 == xZR) ? rs3 : rs2)); \ - } else { \ - BEQ(rs2, rs3, 8); \ - MV(rd, rs1); \ +#define MVNE(rd, rs1, rs2, rs3) \ + if (cpuext.xtheadcondmov && (rs2 == xZR || rs3 == xZR)) { \ + TH_MVNEZ(rd, rs1, ((rs2 == xZR) ? rs3 : rs2)); \ + } else { \ + BEQ(rs2, rs3, 8); \ + MV(rd, rs1); \ } #define MVLT(rd, rs1, rs2, rs3) \ BGE(rs2, rs3, 8); \ @@ -411,36 +411,36 @@ // 4-bytes[rs1+imm12] = rs2 #define SW(rs2, rs1, imm12) EMIT(S_type(imm12, rs2, rs1, 0b010, 0b0100011)) -#define PUSH1(reg) \ - do { \ - if (rv64_xtheadmemidx && reg != xRSP) { \ - TH_SDIB(reg, xRSP, -8, 0); \ - } else { \ - SD(reg, xRSP, 0xFF8); \ - SUBI(xRSP, xRSP, 8); \ - } \ - } while (0) -#define POP1(reg) \ +#define PUSH1(reg) \ do { \ - if (rv64_xtheadmemidx && reg != xRSP) { \ - TH_LDIA(reg, xRSP, 8, 0); \ + if (cpuext.xtheadmemidx && reg != xRSP) { \ + TH_SDIB(reg, xRSP, -8, 0); \ } else { \ - LD(reg, xRSP, 0); \ - if (reg != xRSP) ADDI(xRSP, xRSP, 8); \ + SD(reg, xRSP, 0xFF8); \ + SUBI(xRSP, xRSP, 8); \ } \ } while (0) -#define PUSH1_32(reg) \ - do { \ - if (rv64_xtheadmemidx && reg != xRSP) { \ - TH_SWIB(reg, xRSP, -4, 0); \ - } else { \ - SW(reg, xRSP, 0xFFC); \ - SUBI(xRSP, xRSP, 4); \ - } \ +#define POP1(reg) \ + do { \ + if (cpuext.xtheadmemidx && reg != xRSP) { \ + TH_LDIA(reg, xRSP, 8, 0); \ + } else { \ + LD(reg, xRSP, 0); \ + if (reg != xRSP) ADDI(xRSP, xRSP, 8); \ + } \ + } while (0) +#define PUSH1_32(reg) \ + do { \ + if (cpuext.xtheadmemidx && reg != xRSP) { \ + TH_SWIB(reg, xRSP, -4, 0); \ + } else { \ + SW(reg, xRSP, 0xFFC); \ + SUBI(xRSP, xRSP, 4); \ + } \ } while (0) #define POP1_32(reg) \ do { \ - if (rv64_xtheadmemidx && reg != xRSP) { \ + if (cpuext.xtheadmemidx && reg != xRSP) { \ TH_LWUIA(reg, xRSP, 4, 0); \ } else { \ LWU(reg, xRSP, 0); \ @@ -461,19 +461,19 @@ PUSH1(reg); \ } -#define PUSH1_16(reg) \ - do { \ - if (rv64_xtheadmemidx && reg != xRSP) { \ - TH_SHIB(reg, xRSP, -2, 0); \ - } else { \ - SH(reg, xRSP, 0xFFE); \ - SUBI(xRSP, xRSP, 2); \ - } \ +#define PUSH1_16(reg) \ + do { \ + if (cpuext.xtheadmemidx && reg != xRSP) { \ + TH_SHIB(reg, xRSP, -2, 0); \ + } else { \ + SH(reg, xRSP, 0xFFE); \ + SUBI(xRSP, xRSP, 2); \ + } \ } while (0) #define POP1_16(reg) \ do { \ - if (rv64_xtheadmemidx && reg != xRSP) { \ + if (cpuext.xtheadmemidx && reg != xRSP) { \ TH_LHUIA(reg, xRSP, 2, 0); \ } else { \ LHU(reg, xRSP, 0); \ @@ -539,9 +539,9 @@ #define ADDSL(rd, rs1, rs2, imm2, scratch) \ if (!(imm2)) { \ ADD(rd, rs1, rs2); \ - } else if (rv64_zba) { \ + } else if (cpuext.zba) { \ SHxADD(rd, rs2, imm2, rs1); \ - } else if (rv64_xtheadba) { \ + } else if (cpuext.xtheadba) { \ TH_ADDSL(rd, rs1, rs2, imm2); \ } else { \ SLLI(scratch, rs2, imm2); \ @@ -894,12 +894,12 @@ #define CLZW(rd, rs) EMIT(R_type(0b0110000, 0b00000, rs, 0b001, rd, 0b0011011)) // Count leading zero bits #define CLZxw(rd, rs, x, s1, s2, s3) \ - if (rv64_zbb) { \ + if (cpuext.zbb) { \ if (x) \ CLZ(rd, rs); \ else \ CLZW(rd, rs); \ - } else if (rv64_xtheadbb) { \ + } else if (cpuext.xtheadbb) { \ if (x) { \ TH_FF1(rd, rs); \ } else { \ @@ -949,7 +949,7 @@ // BEWARE: You should take care of the all zeros situation yourself, // and clear the high 32bit when x is 1. #define CTZxw(rd, rs, x, s1, s2) \ - if (rv64_zbb) { \ + if (cpuext.zbb) { \ if (x) \ CTZ(rd, rs); \ else \ @@ -985,9 +985,9 @@ #define SEXTH_(rd, rs) EMIT(R_type(0b0110000, 0b00101, rs, 0b001, rd, 0b0010011)) // Sign-extend half-word #define SEXTH(rd, rs) \ - if (rv64_zbb) \ + if (cpuext.zbb) \ SEXTH_(rd, rs); \ - else if (rv64_xtheadbb) \ + else if (cpuext.xtheadbb) \ TH_EXT(rd, rs, 15, 0); \ else { \ SLLI(rd, rs, 48); \ @@ -997,9 +997,9 @@ #define ZEXTH_(rd, rs) EMIT(R_type(0b0000100, 0b00000, rs, 0b100, rd, 0b0111011)) // Zero-extend half-word #define ZEXTH(rd, rs) \ - if (rv64_zbb) \ + if (cpuext.zbb) \ ZEXTH_(rd, rs); \ - else if (rv64_xtheadbb) \ + else if (cpuext.xtheadbb) \ TH_EXTU(rd, rs, 15, 0); \ else { \ SLLI(rd, rs, 48); \ @@ -1047,12 +1047,12 @@ // Byte-reverse register, rd can be the same as rs or s1, but rs cannot be the same as s1. #define REV8xw(rd, rs, s1, s2, s3, s4) \ - if (rv64_zbb) { \ + if (cpuext.zbb) { \ REV8(rd, rs); \ if (!rex.w) { \ SRLI(rd, rd, 32); \ } \ - } else if (rv64_xtheadbb) { \ + } else if (cpuext.xtheadbb) { \ if (rex.w) { \ TH_REV(rd, rs); \ } else { \ @@ -1135,7 +1135,7 @@ // Single-bit Extract (Register) #define BEXT(rd, rs1, rs2, s0) \ - if (rv64_zbs) { \ + if (cpuext.zbs) { \ if (rex.w) { \ BEXT_(rd, rs1, rs2); \ } else { \ @@ -1150,9 +1150,9 @@ // Single-bit Extract (Immediate) #define BEXTI(rd, rs1, imm) \ - if (rv64_zbs) \ + if (cpuext.zbs) \ BEXTI_(rd, rs1, imm); \ - else if (rv64_xtheadbs) \ + else if (cpuext.xtheadbs) \ TH_TST(rd, rs1, imm); \ else { \ SRLIxw(rd, rs1, imm); \ @@ -1504,14 +1504,14 @@ // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#76-vector-indexed-instructions // Note: Make sure SEW in vtype is always the same as EEW, for xtheadvector compatibility! -#define VLUXEI8_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b000, vd, 0b0000111)) // ...001...........000.....0000111 -#define VLUXEI16_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b101, vd, 0b0000111)) // ...001...........101.....0000111 -#define VLUXEI32_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b110, vd, 0b0000111)) // ...001...........110.....0000111 -#define VLUXEI64_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b111, vd, 0b0000111)) // ...001...........111.....0000111 -#define VSUXEI8_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b000, vs3, 0b0100111)) // ...001...........000.....0100111 -#define VSUXEI16_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b101, vs3, 0b0100111)) // ...001...........101.....0100111 -#define VSUXEI32_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b110, vs3, 0b0100111)) // ...001...........110.....0100111 -#define VSUXEI64_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b111, vs3, 0b0100111)) // ...001...........111.....0100111 +#define VLUXEI8_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (cpuext.xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b000, vd, 0b0000111)) // ...001...........000.....0000111 +#define VLUXEI16_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (cpuext.xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b101, vd, 0b0000111)) // ...001...........101.....0000111 +#define VLUXEI32_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (cpuext.xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b110, vd, 0b0000111)) // ...001...........110.....0000111 +#define VLUXEI64_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (cpuext.xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b111, vd, 0b0000111)) // ...001...........111.....0000111 +#define VSUXEI8_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (cpuext.xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b000, vs3, 0b0100111)) // ...001...........000.....0100111 +#define VSUXEI16_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (cpuext.xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b101, vs3, 0b0100111)) // ...001...........101.....0100111 +#define VSUXEI32_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (cpuext.xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b110, vs3, 0b0100111)) // ...001...........110.....0100111 +#define VSUXEI64_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (cpuext.xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b111, vs3, 0b0100111)) // ...001...........111.....0100111 // Vector Strided Instructions (including segment part) // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#75-vector-strided-instructions @@ -1583,7 +1583,7 @@ #define VFSLIDE1UP_VF(vd, vs2, rs1, vm) EMIT(R_type(0b0011100 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 001110...........101.....1010111 #define VFSLIDE1DOWN_VF(vd, vs2, rs1, vm) EMIT(R_type(0b0011110 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 001111...........101.....1010111 -#define VFMV_S_F(vd, rs1) EMIT(I_type((rv64_xtheadvector ? 0b001101100000 : 0b010000100000), rs1, 0b101, vd, 0b1010111)) // 010000100000.....101.....1010111 +#define VFMV_S_F(vd, rs1) EMIT(I_type((cpuext.xtheadvector ? 0b001101100000 : 0b010000100000), rs1, 0b101, vd, 0b1010111)) // 010000100000.....101.....1010111 #define VFMV_V_F(vd, rs1) EMIT(I_type(0b010111100000, rs1, 0b101, vd, 0b1010111)) // 010111100000.....101.....1010111 #define VFMERGE_VFM(vd, vs2, rs1) EMIT(R_type(0b0101110, vs2, rs1, 0b101, vd, 0b1010111)) // 0101110..........101.....1010111 @@ -1629,7 +1629,7 @@ #define VFSGNJN_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010010 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 001001...........001.....1010111 #define VFSGNJX_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010100 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 001010...........001.....1010111 -#define VFMV_F_S(rd, vs2) EMIT(R_type((rv64_xtheadvector ? 0b0011001 : 0b0100001), vs2, 0b00000, 0b001, rd, 0b1010111)) // 0100001.....00000001.....1010111 +#define VFMV_F_S(rd, vs2) EMIT(R_type((cpuext.xtheadvector ? 0b0011001 : 0b0100001), vs2, 0b00000, 0b001, rd, 0b1010111)) // 0100001.....00000001.....1010111 #define VMFEQ_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0110000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 011000...........001.....1010111 #define VMFLE_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0110010 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 011001...........001.....1010111 @@ -1646,29 +1646,29 @@ #define VFMSAC_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1011100 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 101110...........001.....1010111 #define VFNMSAC_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1011110 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 101111...........001.....1010111 -#define VFCVT_XU_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00000, 0b001, vd, 0b1010111)) // 010010......00000001.....1010111 -#define VFCVT_X_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00001, 0b001, vd, 0b1010111)) // 010010......00001001.....1010111 -#define VFCVT_F_XU_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00010, 0b001, vd, 0b1010111)) // 010010......00010001.....1010111 -#define VFCVT_F_X_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00011, 0b001, vd, 0b1010111)) // 010010......00011001.....1010111 -#define VFCVT_RTZ_XU_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00110, 0b001, vd, 0b1010111)) // 010010......00110001.....1010111 -#define VFCVT_RTZ_X_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00111, 0b001, vd, 0b1010111)) // 010010......00111001.....1010111 -#define VFWCVT_XU_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01000, 0b001, vd, 0b1010111)) // 010010......01000001.....1010111 -#define VFWCVT_X_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01001, 0b001, vd, 0b1010111)) // 010010......01001001.....1010111 -#define VFWCVT_F_XU_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01010, 0b001, vd, 0b1010111)) // 010010......01010001.....1010111 -#define VFWCVT_F_X_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01011, 0b001, vd, 0b1010111)) // 010010......01011001.....1010111 -#define VFWCVT_F_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01100, 0b001, vd, 0b1010111)) // 010010......01100001.....1010111 -#define VFWCVT_RTZ_XU_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01110, 0b001, vd, 0b1010111)) // 010010......01110001.....1010111 -#define VFWCVT_RTZ_X_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01111, 0b001, vd, 0b1010111)) // 010010......01111001.....1010111 -#define VFNCVT_XU_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10000, 0b001, vd, 0b1010111)) // 010010......10000001.....1010111 -#define VFNCVT_X_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10001, 0b001, vd, 0b1010111)) // 010010......10001001.....1010111 -#define VFNCVT_F_XU_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10010, 0b001, vd, 0b1010111)) // 010010......10010001.....1010111 -#define VFNCVT_F_X_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10011, 0b001, vd, 0b1010111)) // 010010......10011001.....1010111 -#define VFNCVT_F_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10100, 0b001, vd, 0b1010111)) // 010010......10100001.....1010111 -#define VFNCVT_ROD_F_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10101, 0b001, vd, 0b1010111)) // 010010......10101001.....1010111 -#define VFNCVT_RTZ_XU_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10110, 0b001, vd, 0b1010111)) // 010010......10110001.....1010111 -#define VFNCVT_RTZ_X_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10111, 0b001, vd, 0b1010111)) // 010010......10111001.....1010111 -#define VFSQRT_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000110 : 0b0100110) | (vm), vs2, 0b00000, 0b001, vd, 0b1010111)) // 010011......00000001.....1010111 -#define VFCLASS_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000110 : 0b0100110) | (vm), vs2, 0b10000, 0b001, vd, 0b1010111)) // 010011......10000001.....1010111 +#define VFCVT_XU_F_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00000, 0b001, vd, 0b1010111)) // 010010......00000001.....1010111 +#define VFCVT_X_F_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00001, 0b001, vd, 0b1010111)) // 010010......00001001.....1010111 +#define VFCVT_F_XU_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00010, 0b001, vd, 0b1010111)) // 010010......00010001.....1010111 +#define VFCVT_F_X_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00011, 0b001, vd, 0b1010111)) // 010010......00011001.....1010111 +#define VFCVT_RTZ_XU_F_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00110, 0b001, vd, 0b1010111)) // 010010......00110001.....1010111 +#define VFCVT_RTZ_X_F_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00111, 0b001, vd, 0b1010111)) // 010010......00111001.....1010111 +#define VFWCVT_XU_F_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01000, 0b001, vd, 0b1010111)) // 010010......01000001.....1010111 +#define VFWCVT_X_F_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01001, 0b001, vd, 0b1010111)) // 010010......01001001.....1010111 +#define VFWCVT_F_XU_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01010, 0b001, vd, 0b1010111)) // 010010......01010001.....1010111 +#define VFWCVT_F_X_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01011, 0b001, vd, 0b1010111)) // 010010......01011001.....1010111 +#define VFWCVT_F_F_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01100, 0b001, vd, 0b1010111)) // 010010......01100001.....1010111 +#define VFWCVT_RTZ_XU_F_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01110, 0b001, vd, 0b1010111)) // 010010......01110001.....1010111 +#define VFWCVT_RTZ_X_F_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01111, 0b001, vd, 0b1010111)) // 010010......01111001.....1010111 +#define VFNCVT_XU_F_W(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10000, 0b001, vd, 0b1010111)) // 010010......10000001.....1010111 +#define VFNCVT_X_F_W(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10001, 0b001, vd, 0b1010111)) // 010010......10001001.....1010111 +#define VFNCVT_F_XU_W(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10010, 0b001, vd, 0b1010111)) // 010010......10010001.....1010111 +#define VFNCVT_F_X_W(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10011, 0b001, vd, 0b1010111)) // 010010......10011001.....1010111 +#define VFNCVT_F_F_W(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10100, 0b001, vd, 0b1010111)) // 010010......10100001.....1010111 +#define VFNCVT_ROD_F_F_W(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10101, 0b001, vd, 0b1010111)) // 010010......10101001.....1010111 +#define VFNCVT_RTZ_XU_F_W(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10110, 0b001, vd, 0b1010111)) // 010010......10110001.....1010111 +#define VFNCVT_RTZ_X_F_W(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10111, 0b001, vd, 0b1010111)) // 010010......10111001.....1010111 +#define VFSQRT_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000110 : 0b0100110) | (vm), vs2, 0b00000, 0b001, vd, 0b1010111)) // 010011......00000001.....1010111 +#define VFCLASS_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000110 : 0b0100110) | (vm), vs2, 0b10000, 0b001, vd, 0b1010111)) // 010011......10000001.....1010111 #define VFRSQRT7_V(vd, vs2, vm) EMIT(R_type(0b0100110 | (vm), vs2, 0b00100, 0b001, vd, 0b1010111)) // 010011......00100001.....1010111 #define VFREC7_V(vd, vs2, vm) EMIT(R_type(0b0100110 | (vm), vs2, 0b00101, 0b001, vd, 0b1010111)) // 010011......00101001.....1010111 @@ -1700,13 +1700,13 @@ #define VSLIDEUP_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0011100 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 001110...........100.....1010111 #define VSLIDEDOWN_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0011110 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 001111...........100.....1010111 -#define VADC_VXM(vd, vs2, rs1) EMIT(R_type((0b0100000 | rv64_xtheadvector), vs2, rs1, 0b100, vd, 0b1010111)) // 0100000..........100.....1010111 -#define VMADC_VXM(vd, vs2, rs1) EMIT(R_type(0b0100010, vs2, rs1, 0b100, vd, 0b1010111)) // 0100010..........100.....1010111 -#define VMADC_VX(vd, vs2, rs1) EMIT(R_type(0b0100011, vs2, rs1, 0b100, vd, 0b1010111)) // 0100011..........100.....1010111 -#define VSBC_VXM(vd, vs2, rs1) EMIT(R_type((0b0100100 | rv64_xtheadvector), vs2, rs1, 0b100, vd, 0b1010111)) // 0100100..........100.....1010111 -#define VMSBC_VXM(vd, vs2, rs1) EMIT(R_type(0b0100110, vs2, rs1, 0b100, vd, 0b1010111)) // 0100110..........100.....1010111 -#define VMSBC_VX(vd, vs2, rs1) EMIT(R_type(0b0100111, vs2, rs1, 0b100, vd, 0b1010111)) // 0100111..........100.....1010111 -#define VMERGE_VXM(vd, vs2, rs1) EMIT(R_type(0b0101110, vs2, rs1, 0b100, vd, 0b1010111)) // 0101110..........100.....1010111 +#define VADC_VXM(vd, vs2, rs1) EMIT(R_type((0b0100000 | cpuext.xtheadvector), vs2, rs1, 0b100, vd, 0b1010111)) // 0100000..........100.....1010111 +#define VMADC_VXM(vd, vs2, rs1) EMIT(R_type(0b0100010, vs2, rs1, 0b100, vd, 0b1010111)) // 0100010..........100.....1010111 +#define VMADC_VX(vd, vs2, rs1) EMIT(R_type(0b0100011, vs2, rs1, 0b100, vd, 0b1010111)) // 0100011..........100.....1010111 +#define VSBC_VXM(vd, vs2, rs1) EMIT(R_type((0b0100100 | cpuext.xtheadvector), vs2, rs1, 0b100, vd, 0b1010111)) // 0100100..........100.....1010111 +#define VMSBC_VXM(vd, vs2, rs1) EMIT(R_type(0b0100110, vs2, rs1, 0b100, vd, 0b1010111)) // 0100110..........100.....1010111 +#define VMSBC_VX(vd, vs2, rs1) EMIT(R_type(0b0100111, vs2, rs1, 0b100, vd, 0b1010111)) // 0100111..........100.....1010111 +#define VMERGE_VXM(vd, vs2, rs1) EMIT(R_type(0b0101110, vs2, rs1, 0b100, vd, 0b1010111)) // 0101110..........100.....1010111 #define VMV_V_X(vd, rs1) EMIT(I_type(0b010111100000, rs1, 0b100, vd, 0b1010111)) // 010111100000.....100.....1010111 @@ -1746,13 +1746,13 @@ #define VRGATHER_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0011000 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 001100...........000.....1010111 #define VRGATHEREI16_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0011100 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 001110...........000.....1010111 -#define VADC_VVM(vd, vs2, vs1) EMIT(R_type((0b0100000 | rv64_xtheadvector), vs2, vs1, 0b000, vd, 0b1010111)) // 0100000..........000.....1010111 -#define VMADC_VVM(vd, vs2, vs1) EMIT(R_type(0b0100010, vs2, vs1, 0b000, vd, 0b1010111)) // 0100010..........000.....1010111 -#define VMADC_VV(vd, vs2, vs1) EMIT(R_type(0b0100011, vs2, vs1, 0b000, vd, 0b1010111)) // 0100011..........000.....1010111 -#define VSBC_VVM(vd, vs2, vs1) EMIT(R_type((0b0100100 | rv64_xtheadvector), vs2, vs1, 0b000, vd, 0b1010111)) // 0100100..........000.....1010111 -#define VMSBC_VVM(vd, vs2, vs1) EMIT(R_type(0b0100110, vs2, vs1, 0b000, vd, 0b1010111)) // 0100110..........000.....1010111 -#define VMSBC_VV(vd, vs2, vs1) EMIT(R_type(0b0100111, vs2, vs1, 0b000, vd, 0b1010111)) // 0100111..........000.....1010111 -#define VMERGE_VVM(vd, vs2, vs1) EMIT(R_type(0b0101110, vs2, vs1, 0b000, vd, 0b1010111)) // 0101110..........000.....1010111 +#define VADC_VVM(vd, vs2, vs1) EMIT(R_type((0b0100000 | cpuext.xtheadvector), vs2, vs1, 0b000, vd, 0b1010111)) // 0100000..........000.....1010111 +#define VMADC_VVM(vd, vs2, vs1) EMIT(R_type(0b0100010, vs2, vs1, 0b000, vd, 0b1010111)) // 0100010..........000.....1010111 +#define VMADC_VV(vd, vs2, vs1) EMIT(R_type(0b0100011, vs2, vs1, 0b000, vd, 0b1010111)) // 0100011..........000.....1010111 +#define VSBC_VVM(vd, vs2, vs1) EMIT(R_type((0b0100100 | cpuext.xtheadvector), vs2, vs1, 0b000, vd, 0b1010111)) // 0100100..........000.....1010111 +#define VMSBC_VVM(vd, vs2, vs1) EMIT(R_type(0b0100110, vs2, vs1, 0b000, vd, 0b1010111)) // 0100110..........000.....1010111 +#define VMSBC_VV(vd, vs2, vs1) EMIT(R_type(0b0100111, vs2, vs1, 0b000, vd, 0b1010111)) // 0100111..........000.....1010111 +#define VMERGE_VVM(vd, vs2, vs1) EMIT(R_type(0b0101110, vs2, vs1, 0b000, vd, 0b1010111)) // 0101110..........000.....1010111 #define VMV_V_V(vd, vs1) EMIT(I_type(0b010111100000, vs1, 0b000, vd, 0b1010111)) // 010111100000.....000.....1010111 @@ -1789,10 +1789,10 @@ #define VSLIDEUP_VI(vd, vs2, simm5, vm) EMIT(R_type(0b0011100 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 001110...........011.....1010111 #define VSLIDEDOWN_VI(vd, vs2, simm5, vm) EMIT(R_type(0b0011110 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 001111...........011.....1010111 -#define VADC_VIM(vd, vs2, simm5) EMIT(R_type((0b0100000 | rv64_xtheadvector), vs2, simm5, 0b011, vd, 0b1010111)) // 0100000..........011.....1010111 -#define VMADC_VIM(vd, vs2, simm5) EMIT(R_type(0b0100010, vs2, simm5, 0b011, vd, 0b1010111)) // 0100010..........011.....1010111 -#define VMADC_VI(vd, vs2, simm5) EMIT(R_type(0b0100011, vs2, simm5, 0b011, vd, 0b1010111)) // 0100011..........011.....1010111 -#define VMERGE_VIM(vd, vs2, simm5) EMIT(R_type(0b0101110, vs2, simm5, 0b011, vd, 0b1010111)) // 0101110..........011.....1010111 +#define VADC_VIM(vd, vs2, simm5) EMIT(R_type((0b0100000 | cpuext.xtheadvector), vs2, simm5, 0b011, vd, 0b1010111)) // 0100000..........011.....1010111 +#define VMADC_VIM(vd, vs2, simm5) EMIT(R_type(0b0100010, vs2, simm5, 0b011, vd, 0b1010111)) // 0100010..........011.....1010111 +#define VMADC_VI(vd, vs2, simm5) EMIT(R_type(0b0100011, vs2, simm5, 0b011, vd, 0b1010111)) // 0100011..........011.....1010111 +#define VMERGE_VIM(vd, vs2, simm5) EMIT(R_type(0b0101110, vs2, simm5, 0b011, vd, 0b1010111)) // 0101110..........011.....1010111 #define VMV_V_I(vd, simm5) EMIT(I_type(0b010111100000, simm5, 0b011, vd, 0b1010111)) // 010111100000.....011.....1010111 @@ -1836,10 +1836,10 @@ #define VASUBU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001010...........010.....1010111 // Warning: zero-extended on xtheadvector! -#define VMV_X_S(rd, vs2) EMIT(R_type((rv64_xtheadvector ? 0b0011001 : 0b0100001), vs2, 0b00000, 0b010, rd, 0b1010111)) // 0100001.....00000010.....1010111 +#define VMV_X_S(rd, vs2) EMIT(R_type((cpuext.xtheadvector ? 0b0011001 : 0b0100001), vs2, 0b00000, 0b010, rd, 0b1010111)) // 0100001.....00000010.....1010111 // Warning: xtheadvector only -#define VEXT_X_V(rd, vs2, rs1) EMIT(R_type((rv64_xtheadvector ? 0b0011001 : 0b0100001), vs2, rs1, 0b010, rd, 0b1010111)) +#define VEXT_X_V(rd, vs2, rs1) EMIT(R_type((cpuext.xtheadvector ? 0b0011001 : 0b0100001), vs2, rs1, 0b010, rd, 0b1010111)) // Vector Integer Extension Instructions // https://github.com/riscv/riscv-v-spec/blob/e49574c92b072fd4d71e6cb20f7e8154de5b83fe/v-spec.adoc#123-vector-integer-extension @@ -1861,14 +1861,14 @@ #define VMNOR_MM(vd, vs2, vs1) EMIT(R_type(0b0111101, vs2, vs1, 0b010, vd, 0b1010111)) // 0111101..........010.....1010111 #define VMXNOR_MM(vd, vs2, vs1) EMIT(R_type(0b0111111, vs2, vs1, 0b010, vd, 0b1010111)) // 0111111..........010.....1010111 -#define VMSBF_M(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b00001, 0b010, vd, 0b1010111)) // 010100......00001010.....1010111 -#define VMSOF_M(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b00010, 0b010, vd, 0b1010111)) // 010100......00010010.....1010111 -#define VMSIF_M(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b00011, 0b010, vd, 0b1010111)) // 010100......00011010.....1010111 -#define VIOTA_M(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b10000, 0b010, vd, 0b1010111)) // 010100......10000010.....1010111 -#define VCPOP_M(rd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0100000) | (vm), vs2, 0b10000, 0b010, rd, 0b1010111)) // 010000......10000010.....1010111 -#define VFIRST_M(rd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0100000) | (vm), vs2, 0b10001, 0b010, rd, 0b1010111)) // 010000......10001010.....1010111 +#define VMSBF_M(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b00001, 0b010, vd, 0b1010111)) // 010100......00001010.....1010111 +#define VMSOF_M(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b00010, 0b010, vd, 0b1010111)) // 010100......00010010.....1010111 +#define VMSIF_M(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b00011, 0b010, vd, 0b1010111)) // 010100......00011010.....1010111 +#define VIOTA_M(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b10000, 0b010, vd, 0b1010111)) // 010100......10000010.....1010111 +#define VCPOP_M(rd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b0101100 : 0b0100000) | (vm), vs2, 0b10000, 0b010, rd, 0b1010111)) // 010000......10000010.....1010111 +#define VFIRST_M(rd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b0101100 : 0b0100000) | (vm), vs2, 0b10001, 0b010, rd, 0b1010111)) // 010000......10001010.....1010111 -#define VID_V(vd, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0101000) | (vm), 0b00000, 0b10001, 0b010, vd, 0b1010111)) // 010100.0000010001010.....1010111 +#define VID_V(vd, vm) EMIT(R_type((cpuext.xtheadvector ? 0b0101100 : 0b0101000) | (vm), 0b00000, 0b10001, 0b010, vd, 0b1010111)) // 010100.0000010001010.....1010111 #define VDIVU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1000000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100000...........010.....1010111 #define VDIV_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1000010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100001...........010.....1010111 @@ -1906,7 +1906,7 @@ #define VSLIDE1DOWN_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0011110 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 001111...........110.....1010111 // Warning, upper elements will be cleared in xtheadvector! -#define VMV_S_X(vd, rs1) EMIT(I_type((rv64_xtheadvector ? 0b001101100000 : 0b010000100000), rs1, 0b110, vd, 0b1010111)) +#define VMV_S_X(vd, rs1) EMIT(I_type((cpuext.xtheadvector ? 0b001101100000 : 0b010000100000), rs1, 0b110, vd, 0b1010111)) #define VDIVU_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1000000 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 100000...........110.....1010111 #define VDIV_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1000010 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 100001...........110.....1010111 diff --git a/src/dynarec/rv64/rv64_printer.c b/src/dynarec/rv64/rv64_printer.c index f28b8b06..c6cc9e17 100644 --- a/src/dynarec/rv64/rv64_printer.c +++ b/src/dynarec/rv64/rv64_printer.c @@ -7,16 +7,6 @@ #include "rv64_printer.h" #include "debug.h" -extern int rv64_xtheadba; -extern int rv64_xtheadbb; -extern int rv64_xtheadbs; -extern int rv64_xtheadcondmov; -extern int rv64_xtheadmemidx; -extern int rv64_xtheadmempair; -extern int rv64_xtheadfmemidx; -extern int rv64_xtheadmac; -extern int rv64_xtheadfmv; - static const char gpr[32][9] = { "zero", "ra", @@ -189,7 +179,7 @@ const char* rv64_print(uint32_t opcode, uintptr_t addr) static char buff[200]; rv64_print_t a; - if (rv64_xtheadba || rv64_xtheadbb || rv64_xtheadbs || rv64_xtheadcondmov || rv64_xtheadmempair) { + if (cpuext.xtheadba || cpuext.xtheadbb || cpuext.xtheadbs || cpuext.xtheadcondmov || cpuext.xtheadmempair) { /**************** * Generated by https://github.com/ksco/riscv-opcodes/tree/box64_printer @@ -693,7 +683,7 @@ const char* rv64_print(uint32_t opcode, uintptr_t addr) } } - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { /* These are written by hand.... */ // rv_v, VSETVLI diff --git a/src/emu/x64runf0.c b/src/emu/x64runf0.c index d86dfce3..a25ce7f4 100644 --- a/src/emu/x64runf0.c +++ b/src/emu/x64runf0.c @@ -790,7 +790,7 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr) if (rex.w) { #if defined(__riscv) || defined(__loongarch64) #if defined(__loongarch64) - if (la64_scq) { + if (cpuext.scq) { do { native_lock_read_dq(&tmp64u, &tmp64u2, ED); if (R_RAX == tmp64u && R_RDX == tmp64u2) { diff --git a/src/include/core_arch.h b/src/include/core_arch.h new file mode 100644 index 00000000..f297bcb8 --- /dev/null +++ b/src/include/core_arch.h @@ -0,0 +1,50 @@ +#ifndef __CORE_ARCH_H__ +#define __CORE_ARCH_H__ +#include <stdint.h> + +#ifdef DYNAREC +typedef union cpu_ext_s { + struct { +#ifdef ARM64 + uint64_t atomics:1; // it's important this is the 1st bit + uint64_t asimd:1; + uint64_t aes:1; + uint64_t pmull:1; + uint64_t crc32:1; + uint64_t sha1:1; + uint64_t sha2:1; + uint64_t uscat:1; + uint64_t flagm:1; + uint64_t flagm2:1; + uint64_t frintts:1; + uint64_t afp:1; + uint64_t rndr:1; +#elif defined(RV64) +uint64_t vlen:8; // Not *8, 8bits should be enugh? that's 2048 vector + uint64_t zba:1; + uint64_t zbb:1; + uint64_t zbc:1; + uint64_t zbs:1; + uint64_t vector:1; // rvv 1.0 or xtheadvector + uint64_t xtheadvector:1; + uint64_t xtheadba:1; + uint64_t xtheadbb:1; + uint64_t xtheadbs:1; + uint64_t xtheadcondmov:1; + uint64_t xtheadmemidx:1; + uint64_t xtheadmempair:1; + uint64_t xtheadfmemidx:1; + uint64_t xtheadmac:1; + uint64_t xtheadfmv:1; +#elif defined(LA64) + uint64_t lbt:1; // it's important it's stay the 1st bit + uint64_t lam_bh:1; + uint64_t lamcas:1; + uint64_t scq:1; +#endif + }; + uint64_t x; +} cpu_ext_t; +#endif + +#endif //__CORE_ARCH_H__ \ No newline at end of file diff --git a/src/include/debug.h b/src/include/debug.h index abd94bf7..57520cbd 100644 --- a/src/include/debug.h +++ b/src/include/debug.h @@ -4,6 +4,7 @@ #include <env.h> #include "os.h" +#include "core_arch.h" typedef struct box64context_s box64context_t; extern box64env_t box64env; @@ -14,42 +15,7 @@ extern int box64_rdtsc; extern uint8_t box64_rdtsc_shift; extern int box64_is32bits; #ifdef DYNAREC -#ifdef ARM64 -extern int arm64_asimd; -extern int arm64_aes; -extern int arm64_pmull; -extern int arm64_crc32; -extern int arm64_atomics; -extern int arm64_sha1; -extern int arm64_sha2; -extern int arm64_uscat; -extern int arm64_flagm; -extern int arm64_flagm2; -extern int arm64_frintts; -extern int arm64_rndr; -#elif defined(RV64) -extern int rv64_zba; -extern int rv64_zbb; -extern int rv64_zbc; -extern int rv64_zbs; -extern int rv64_vector; -extern int rv64_xtheadvector; // rvv 1.0 or xtheadvector -extern int rv64_vlen; -extern int rv64_xtheadba; -extern int rv64_xtheadbb; -extern int rv64_xtheadbs; -extern int rv64_xtheadcondmov; -extern int rv64_xtheadmemidx; -extern int rv64_xtheadmempair; -extern int rv64_xtheadfmemidx; -extern int rv64_xtheadmac; -extern int rv64_xtheadfmv; -#elif defined(LA64) -extern int la64_lbt; -extern int la64_lam_bh; -extern int la64_lamcas; -extern int la64_scq; -#endif +extern cpu_ext_t cpuext; #endif #ifdef HAVE_TRACE extern uintptr_t trace_start, trace_end; diff --git a/src/rv64detect.c b/src/rv64detect.c index 8ef0ad82..3cce199e 100644 --- a/src/rv64detect.c +++ b/src/rv64detect.c @@ -57,66 +57,66 @@ void RV64_Detect_Function() ADDI(A1, xZR, 1); TH_ADDSL(A0, A0, A1, 1); BR(xRA); - rv64_xtheadba - = rv64_xtheadbb - = rv64_xtheadbs - = rv64_xtheadcondmov - = rv64_xtheadmemidx - = rv64_xtheadmempair - = rv64_xtheadfmemidx - = rv64_xtheadmac - = rv64_xtheadfmv = Check(my_block); + cpuext.xtheadba + = cpuext.xtheadbb + = cpuext.xtheadbs + = cpuext.xtheadcondmov + = cpuext.xtheadmemidx + = cpuext.xtheadmempair + = cpuext.xtheadfmemidx + = cpuext.xtheadmac + = cpuext.xtheadfmv = Check(my_block); // Official extensions - if (!rv64_xtheadba) { + if (!cpuext.xtheadba) { // Test Zba with ADDUW block = (uint32_t*)my_block; ADDUW(A0, A0, A1); ADDI(A0, xZR, 42); BR(xRA); - rv64_zba = Check(my_block); + cpuext.zba = Check(my_block); // Test Zbb with ANDN block = (uint32_t*)my_block; ANDN(A0, A0, A1); ADDI(A0, xZR, 42); BR(xRA); - rv64_zbb = Check(my_block); + cpuext.zbb = Check(my_block); // Test Zbc with CLMUL block = (uint32_t*)my_block; CLMUL(A0, A0, A1); ADDI(A0, xZR, 42); BR(xRA); - rv64_zbc = Check(my_block); + cpuext.zbc = Check(my_block); // Test Zbs with BCLR block = (uint32_t*)my_block; BCLR(A0, A0, A1); ADDI(A0, xZR, 42); BR(xRA); - rv64_zbs = Check(my_block); + cpuext.zbs = Check(my_block); } block = (uint32_t*)my_block; CSRRS(xZR, xZR, 0xc22 /* vlenb */); ADDI(A0, xZR, 42); BR(xRA); - rv64_vector = Check(my_block); + cpuext.vector = Check(my_block); - if (rv64_vector) { + if (cpuext.vector) { block = (uint32_t*)my_block; CSRRS(xZR, xZR, 0x00f /* vcsr */); // vcsr does not exists in xtheadvector ADDI(A0, xZR, 42); BR(xRA); - rv64_xtheadvector = !Check(my_block); + cpuext.xtheadvector = !Check(my_block); } - if (rv64_vector) { + if (cpuext.vector) { int vlenb = 0; asm volatile("csrr %0, 0xc22" : "=r"(vlenb)); - rv64_vlen = vlenb * 8; + cpuext.vlen = vlenb; if (vlenb < 16) { // we need vlen >= 128 - rv64_vector = 0; + cpuext.vector = 0; } } diff --git a/wine/wow64/wowbox64.c b/wine/wow64/wowbox64.c index 8a1f4bb8..02b25c05 100644 --- a/wine/wow64/wowbox64.c +++ b/wine/wow64/wowbox64.c @@ -24,6 +24,7 @@ #include "rbtree.h" #include "wine/compiler.h" #include "wine/debug.h" +#include "core_arch.h" uintptr_t box64_pagesize = 4096; @@ -85,19 +86,7 @@ int isRetX87Wrapper(wrapper_t fun) return 0; } -int arm64_asimd = 0; -int arm64_aes = 0; -int arm64_pmull = 0; -int arm64_crc32 = 0; -int arm64_atomics = 0; -int arm64_sha1 = 0; -int arm64_sha2 = 0; -int arm64_uscat = 0; -int arm64_flagm = 0; -int arm64_flagm2 = 0; -int arm64_frintts = 0; -int arm64_afp = 0; -int arm64_rndr = 0; +cpu_ext_t cpuext = {0}; static box64context_t box64_context; box64context_t* my_context = &box64_context; |