diff options
Diffstat (limited to 'src/dynarec/rv64')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00_0.c | 6 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00_1.c | 6 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 50 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f_vector.c | 18 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_66.c | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f.c | 40 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f38.c | 34 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f_vector.c | 86 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c | 4 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_d9.c | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_emit_math.c | 28 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_emit_shift.c | 16 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f20f_vector.c | 22 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f30f.c | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f30f_vector.c | 18 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 56 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 28 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 242 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_printer.c | 14 |
19 files changed, 332 insertions, 342 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00_0.c b/src/dynarec/rv64/dynarec_rv64_00_0.c index f518ab0f..d779e994 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_0.c +++ b/src/dynarec/rv64/dynarec_rv64_00_0.c @@ -176,17 +176,17 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int case 0x0F: switch (rep) { case 0: - if (rv64_vector) + if (cpuext.vector) retaddr = dynarec64_0F_vector(dyn, addr, ip, ninst, rex, ok, need_epilog); addr = retaddr ? retaddr : dynarec64_0F(dyn, addr, ip, ninst, rex, ok, need_epilog); break; case 1: - if (rv64_vector) + if (cpuext.vector) retaddr = dynarec64_F20F_vector(dyn, addr, ip, ninst, rex, ok, need_epilog); addr = retaddr ? retaddr : dynarec64_F20F(dyn, addr, ip, ninst, rex, ok, need_epilog); break; case 2: - if (rv64_vector) + if (cpuext.vector) retaddr = dynarec64_F30F_vector(dyn, addr, ip, ninst, rex, ok, need_epilog); addr = retaddr ? retaddr : dynarec64_F30F(dyn, addr, ip, ninst, rex, ok, need_epilog); break; diff --git a/src/dynarec/rv64/dynarec_rv64_00_1.c b/src/dynarec/rv64/dynarec_rv64_00_1.c index 3e4fd754..a56b1017 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_1.c +++ b/src/dynarec/rv64/dynarec_rv64_00_1.c @@ -174,12 +174,12 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } break; case 0x64: - if (rv64_vector) + if (cpuext.vector) retaddr = dynarec64_64_vector(dyn, addr, ip, ninst, rex, rep, _FS, ok, need_epilog); addr = retaddr ? retaddr : dynarec64_64(dyn, addr, ip, ninst, rex, rep, _FS, ok, need_epilog); break; case 0x65: - if (rv64_vector) + if (cpuext.vector) retaddr = dynarec64_64_vector(dyn, addr, ip, ninst, rex, rep, _GS, ok, need_epilog); addr = retaddr ? retaddr : dynarec64_64(dyn, addr, ip, ninst, rex, rep, _GS, ok, need_epilog); break; @@ -190,7 +190,7 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int if (rex.is32bits) addr = dynarec64_67_32(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); else { - if (rv64_vector) + if (cpuext.vector) retaddr = dynarec64_67_vector(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); addr = retaddr ? retaddr : dynarec64_67(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); } diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index 40de22ce..32db4a8f 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -537,7 +537,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LH(x3, gback, gdoffset + 2 * (i * 2 + 0)); LH(x4, gback, gdoffset + 2 * (i * 2 + 1)); ADDW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -560,7 +560,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LH(x3, wback, fixedaddress + 2 * (i * 2 + 0)); LH(x4, wback, fixedaddress + 2 * (i * 2 + 1)); ADDW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -588,7 +588,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LB(x4, wback, fixedaddress + i * 2 + 1); MUL(x3, x3, x4); ADD(x3, x3, x7); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -661,7 +661,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LH(x3, gback, gdoffset + 2 * (i * 2 + 0)); LH(x4, gback, gdoffset + 2 * (i * 2 + 1)); SUBW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -684,7 +684,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LH(x3, wback, fixedaddress + 2 * (i * 2 + 0)); LH(x4, wback, fixedaddress + 2 * (i * 2 + 1)); SUBW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -1292,7 +1292,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MOV64x(x6, -128); for (int i = 0; i < 4; ++i) { LH(x3, gback, gdoffset + i * 2); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -1309,7 +1309,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else for (int i = 0; i < 4; ++i) { LH(x3, wback, fixedaddress + i * 2); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -1665,7 +1665,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("PCMPEQB Gm,Em"); nextop = F8; GETGM(); - if (rv64_xtheadbb) { + if (cpuext.xtheadbb) { GETEM(x2, 0, 0); LD(x3, gback, gdoffset); LD(x4, wback, fixedaddress); @@ -2567,7 +2567,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEM(x2, 0, 1); LD(x1, wback, fixedaddress + 0); for (int i = 0; i < 8; i++) { - if (rv64_zbs) { + if (cpuext.zbs) { if (i == 0) { BEXTI(gd, x1, 63); } else { @@ -2582,7 +2582,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } } if (i != 0) { - if (rv64_zba) { + if (cpuext.zba) { SH1ADD(gd, gd, x6); } else { SLLI(gd, gd, 1); @@ -2600,7 +2600,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LBU(x3, gback, gdoffset + i); LBU(x4, wback, fixedaddress + i); SUB(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MAX(x3, x3, xZR); } else { NOT(x4, x3); @@ -2615,7 +2615,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; GETGM(); GETEM(x2, 0, 6); - MMX_LOOP_W(x3, x4, SUB(x3, x3, x4); if (rv64_zbb) { MAX(x3, x3, xZR); } else { + MMX_LOOP_W(x3, x4, SUB(x3, x3, x4); if (cpuext.zbb) { MAX(x3, x3, xZR); } else { NOT(x4, x3); SRAI(x4, x4, 63); AND(x3, x3, x4); } SH(x3, gback, gdoffset + i * 2);); @@ -2628,7 +2628,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni for (int i = 0; i < 8; ++i) { LBU(x3, gback, gdoffset + i); LBU(x4, wback, fixedaddress + i); - if (rv64_zbb) { + if (cpuext.zbb) { MINU(x3, x3, x4); } else { BLTU(x3, x4, 8); @@ -2657,7 +2657,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LBU(x3, gback, gdoffset + i); LBU(x4, wback, fixedaddress + i); ADD(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MINU(x3, x3, x5); } else { BLT(x3, x5, 8); @@ -2678,7 +2678,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LHU(x3, gback, gdoffset + i * 2); LHU(x4, wback, fixedaddress + i * 2); ADDW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MINU(x3, x3, x5); } else { BGE(x5, x3, 8); // tmp32s <= 65535? @@ -2695,7 +2695,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni for (int i = 0; i < 8; ++i) { LBU(x3, gback, gdoffset + i); LBU(x4, wback, fixedaddress + i); - if (rv64_zbb) { + if (cpuext.zbb) { MAXU(x3, x3, x4); } else { BLTU(x4, x3, 8); @@ -2711,7 +2711,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEM(x2, 0, 1); LD(x1, gback, gdoffset); LD(x3, wback, fixedaddress); - if (rv64_zbb) { + if (cpuext.zbb) { ANDN(x1, x3, x1); } else { NOT(x1, x1); @@ -2740,7 +2740,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEM(x4, 0, 1); LBU(x1, wback, fixedaddress); ADDI(x2, xZR, 15); - if (rv64_zbb) { + if (cpuext.zbb) { MINU(x1, x1, x2); } else { BLTU(x1, x2, 4 + 4); @@ -2759,7 +2759,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETEM(x4, 0, 1); LBU(x1, wback, fixedaddress); ADDI(x2, xZR, 31); - if (rv64_zbb) { + if (cpuext.zbb) { MINU(x1, x1, x2); } else { BLTU(x1, x2, 4 + 4); @@ -2834,7 +2834,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LB(x3, gback, gdoffset + i); LB(x4, wback, fixedaddress + i); SUBW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -2859,7 +2859,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LH(x3, gback, gdoffset + 2 * i); LH(x4, wback, fixedaddress + 2 * i); SUBW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -2879,7 +2879,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni for (int i = 0; i < 4; ++i) { LH(x3, gback, gdoffset + 2 * i); LH(x4, wback, fixedaddress + 2 * i); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x4); } else { BLT(x3, x4, 8); @@ -2911,7 +2911,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LB(x3, gback, gdoffset + i); LB(x4, wback, fixedaddress + i); ADDW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -2936,7 +2936,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LH(x3, gback, gdoffset + 2 * i); LH(x4, wback, fixedaddress + 2 * i); ADDW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -2953,7 +2953,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; GETGM(); GETEM(x1, 0, 6); - MMX_LOOP_WS(x3, x4, if (rv64_zbb) { MAX(x3, x3, x4); } else { + MMX_LOOP_WS(x3, x4, if (cpuext.zbb) { MAX(x3, x3, x4); } else { BGE(x3, x4, 8); MV(x3, x4); }); break; diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c index b1569238..6fa4f633 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c @@ -101,7 +101,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, GETEX_vector(v1, 0, 0, VECTOR_SEW64); q0 = fpu_get_scratch(dyn); VSLIDEDOWN_VI(q0, v1, 1, VECTOR_UNMASKED); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VECTOR_LOAD_VMASK(0b01, x4, 1); VMERGE_VVM(v0, v0, q0); // implies VMASK } else { @@ -127,7 +127,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, if (MODREG) { ed = (nextop & 7) + (rex.b << 3); d0 = sse_get_reg_vector(dyn, ninst, x1, ed, 1, VECTOR_SEW64); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VECTOR_LOAD_VMASK(0b01, x4, 1); VMERGE_VVM(v0, v0, v1); // implies VMASK } else { @@ -173,7 +173,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); v1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); // no more scratches! - if (rv64_vlen >= 256) { + if (cpuext.vlen >= 32) { VWADDU_VX(v0, q0, xZR, VECTOR_UNMASKED); VWADDU_VX(v1, q1, xZR, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL2, 2); @@ -224,7 +224,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); q0 = fpu_get_scratch(dyn); VSLIDE1DOWN_VX(q0, v0, xZR, VECTOR_UNMASKED); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VECTOR_LOAD_VMASK(0b01, x4, 1); VMERGE_VVM(v1, v1, q0); // implies VMASK } else { @@ -498,7 +498,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); GETGD; GETEX_vector(q0, 0, 0, VECTOR_SEW32); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL8); VSRL_VI(v0, q0, 31, VECTOR_UNMASKED); // Force the element width to 4bit @@ -865,7 +865,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, } else { SET_ELEMENT_WIDTH(x3, VECTOR_SEW32, 1); } - if (!rv64_xtheadvector) { + if (!cpuext.xtheadvector) { VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); } VMV_S_X(v0, ed); @@ -1144,7 +1144,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, q0 = fpu_get_scratch(dyn); d0 = fpu_get_scratch(dyn); d1 = fpu_get_scratch(dyn); - if (rv64_xtheadvector) { // lack of vrgatherei16.vv + if (cpuext.xtheadvector) { // lack of vrgatherei16.vv q1 = fpu_get_scratch(dyn); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL1, 1); tmp64u0 = ((((uint64_t)u8 >> 2) & 3) << 32) | (u8 & 3); @@ -1229,7 +1229,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); GETEM_vector(q0, 0); SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL8); VSRL_VI(v0, q0, 7, VECTOR_UNMASKED); // Force the element width to 1bit @@ -1240,7 +1240,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VMSLT_VX(VMASK, q0, xZR, VECTOR_UNMASKED); } VMV_X_S(gd, VMASK); - if (!rv64_xtheadvector) { ANDI(gd, gd, 0xff); } + if (!cpuext.xtheadvector) { ANDI(gd, gd, 0xff); } break; case 0xD8: case 0xD9: diff --git a/src/dynarec/rv64/dynarec_rv64_66.c b/src/dynarec/rv64/dynarec_rv64_66.c index 68ed8f84..ce6ff3d7 100644 --- a/src/dynarec/rv64/dynarec_rv64_66.c +++ b/src/dynarec/rv64/dynarec_rv64_66.c @@ -129,7 +129,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x0F: switch (rep) { case 0: { - if (rv64_vector) + if (cpuext.vector) retaddr = dynarec64_660F_vector(dyn, addr, ip, ninst, rex, ok, need_epilog); addr = retaddr ? retaddr : dynarec64_660F(dyn, addr, ip, ninst, rex, ok, need_epilog); break; diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index 4ecf1d7d..a4c929dd 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -589,7 +589,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int MOV64x(x6, -128); for (int i = 0; i < 8; ++i) { LH(x3, gback, gdoffset + i * 2); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -606,7 +606,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } else for (int i = 0; i < 8; ++i) { LH(x3, wback, fixedaddress + i * 2); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -1058,7 +1058,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("PCMPEQB Gx,Ex"); nextop = F8; GETGX(); - if (rv64_xtheadbb) { + if (cpuext.xtheadbb) { GETEX(x2, 0, 8); SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4); TH_TSTNBZ(x3, x3);); } else { @@ -1374,7 +1374,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int wback = x3; } IFX (X_CF) { - if (rv64_zbs) { + if (cpuext.zbs) { if (rex.w) { BEXT_(x4, ed, gd); } else { @@ -1678,7 +1678,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LD(x1, wback, fixedaddress + 8); // high part LD(x2, wback, fixedaddress + 0); // low part, also destroyed wback(x2) for (int i = 0; i < 8; i++) { - if (rv64_zbs) { + if (cpuext.zbs) { if (i == 0) { BEXTI(gd, x1, 63); } else { @@ -1693,7 +1693,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } } if (i != 0) { - if (rv64_zba) { + if (cpuext.zba) { SH1ADD(gd, gd, x6); } else { SLLI(gd, gd, 1); @@ -1702,13 +1702,13 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int } } for (int i = 0; i < 8; i++) { - if (rv64_zbs) { + if (cpuext.zbs) { BEXTI(x6, x2, 63 - i * 8); } else { SRLI(x6, x2, 63 - i * 8); ANDI(x6, x6, 1); } - if (rv64_zba) { + if (cpuext.zba) { SH1ADD(gd, gd, x6); } else { SLLI(gd, gd, 1); @@ -1725,7 +1725,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LBU(x3, gback, gdoffset + i); LBU(x4, wback, fixedaddress + i); SUB(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MAX(x3, x3, xZR); } else { NOT(x4, x3); @@ -1743,7 +1743,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SSE_LOOP_W( x3, x4, SUB(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MAX(x3, x3, xZR); } else { NOT(x4, x3); @@ -1759,7 +1759,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int for (int i = 0; i < 16; ++i) { LBU(x3, gback, gdoffset + i); LBU(x4, wback, fixedaddress + i); - if (rv64_zbb) { + if (cpuext.zbb) { MINU(x3, x3, x4); } else { BLTU(x3, x4, 8); @@ -1785,7 +1785,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LBU(x3, gback, gdoffset + i); LBU(x4, wback, fixedaddress + i); ADD(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MINU(x3, x3, x5); } else { BLT(x3, x5, 8); @@ -1806,7 +1806,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LHU(x3, gback, gdoffset + i * 2); LHU(x4, wback, fixedaddress + i * 2); ADDW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MINU(x3, x3, x5); } else { BGE(x5, x3, 8); // tmp32s <= 65535? @@ -1823,7 +1823,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int for (int i = 0; i < 16; ++i) { LBU(x3, gback, gdoffset + i); LBU(x4, wback, fixedaddress + i); - if (rv64_zbb) { + if (cpuext.zbb) { MAXU(x3, x3, x4); } else { BLTU(x4, x3, 8); @@ -1976,7 +1976,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LB(x3, gback, gdoffset + i); LB(x4, wback, fixedaddress + i); SUBW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -2001,7 +2001,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LH(x3, gback, gdoffset + 2 * i); LH(x4, wback, fixedaddress + 2 * i); SUBW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -2021,7 +2021,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int for (int i = 0; i < 8; ++i) { LH(x3, gback, gdoffset + 2 * i); LH(x4, wback, fixedaddress + 2 * i); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x4); } else { BLT(x3, x4, 8); @@ -2050,7 +2050,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LB(x3, gback, gdoffset + i); LB(x4, wback, fixedaddress + i); ADDW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -2075,7 +2075,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int LH(x3, gback, gdoffset + 2 * i); LH(x4, wback, fixedaddress + 2 * i); ADDW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -2092,7 +2092,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETGX(); GETEX(x2, 0, 14); - SSE_LOOP_WS(x3, x4, if (rv64_zbb) { MAX(x3, x3, x4); } else { + SSE_LOOP_WS(x3, x4, if (cpuext.zbb) { MAX(x3, x3, x4); } else { BGE(x3, x4, 8); MV(x3, x4); }); break; diff --git a/src/dynarec/rv64/dynarec_rv64_660f38.c b/src/dynarec/rv64/dynarec_rv64_660f38.c index d15a5922..658a0531 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f38.c +++ b/src/dynarec/rv64/dynarec_rv64_660f38.c @@ -149,7 +149,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, LH(x3, gback, gdoffset + 2 * (i * 2 + 0)); LH(x4, gback, gdoffset + 2 * (i * 2 + 1)); ADDW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -172,7 +172,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, LH(x3, wback, fixedaddress + 2 * (i * 2 + 0)); LH(x4, wback, fixedaddress + 2 * (i * 2 + 1)); ADDW(x3, x3, x4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -200,7 +200,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, LB(x4, wback, fixedaddress + i * 2 + 1); MUL(x3, x3, x4); ADD(x3, x3, x7); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, x6); } else { @@ -500,7 +500,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, MOV64x(x5, 65535); for (int i = 0; i < 4; ++i) { LW(x3, gback, gdoffset + i * 4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, xZR); } else { @@ -517,7 +517,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, } else for (int i = 0; i < 4; ++i) { LW(x3, wback, fixedaddress + i * 4); - if (rv64_zbb) { + if (cpuext.zbb) { MIN(x3, x3, x5); MAX(x3, x3, xZR); } else { @@ -605,7 +605,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, for (int i = 0; i < 16; ++i) { LB(x3, gback, gdoffset + i); LB(x4, wback, fixedaddress + i); - if (rv64_zbb) + if (cpuext.zbb) MIN(x4, x3, x4); else BLT(x3, x4, 4 + 4); @@ -620,7 +620,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, for (int i = 0; i < 4; ++i) { LW(x3, gback, gdoffset + i * 4); LW(x4, wback, fixedaddress + i * 4); - if (rv64_zbb) + if (cpuext.zbb) MIN(x4, x3, x4); else BLT(x3, x4, 4 + 4); @@ -635,7 +635,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, for (int i = 0; i < 8; ++i) { LHU(x3, gback, gdoffset + i * 2); LHU(x4, wback, fixedaddress + i * 2); - if (rv64_zbb) + if (cpuext.zbb) MINU(x4, x3, x4); else BLTU(x3, x4, 4 + 4); @@ -650,7 +650,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, for (int i = 0; i < 4; ++i) { LWU(x3, gback, gdoffset + i * 4); LWU(x4, wback, fixedaddress + i * 4); - if (rv64_zbb) + if (cpuext.zbb) MINU(x4, x3, x4); else BLTU(x3, x4, 4 + 4); @@ -665,7 +665,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, for (int i = 0; i < 16; ++i) { LB(x3, gback, gdoffset + i); LB(x4, wback, fixedaddress + i); - if (rv64_zbb) + if (cpuext.zbb) MAX(x4, x3, x4); else BLT(x4, x3, 4 + 4); @@ -680,7 +680,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, for (int i = 0; i < 4; ++i) { LW(x3, gback, gdoffset + i * 4); LW(x4, wback, fixedaddress + i * 4); - if (rv64_zbb) + if (cpuext.zbb) MAX(x4, x3, x4); else BLT(x4, x3, 4 + 4); @@ -695,7 +695,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, for (int i = 0; i < 8; ++i) { LHU(x3, gback, gdoffset + i * 2); LHU(x4, wback, fixedaddress + i * 2); - if (rv64_zbb) + if (cpuext.zbb) MAXU(x4, x3, x4); else BLTU(x4, x3, 4 + 4); @@ -710,7 +710,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, for (int i = 0; i < 4; ++i) { LWU(x3, gback, gdoffset + i * 4); LWU(x4, wback, fixedaddress + i * 4); - if (rv64_zbb) + if (cpuext.zbb) MAXU(x4, x3, x4); else BLTU(x4, x3, 4 + 4); @@ -825,10 +825,10 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 1, 0); LHU(x1, ed, fixedaddress); - if (rv64_zbb) { + if (cpuext.zbb) { REV8(x1, x1); SRLI(x1, x1, 48); - } else if (rv64_xtheadbb) { + } else if (cpuext.xtheadbb) { TH_REVW(x1, x1); SRLI(x1, x1, 16); } else { @@ -847,10 +847,10 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, GETGD; SMREAD(); addr = geted(dyn, addr, ninst, nextop, &wback, x3, x2, &fixedaddress, rex, NULL, 1, 0); - if (rv64_zbb) { + if (cpuext.zbb) { REV8(x1, gd); SRLI(x1, x1, 48); - } else if (rv64_xtheadbb) { + } else if (cpuext.xtheadbb) { TH_REVW(x1, gd); SRLI(x1, x1, 16); } else { diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c index 9f74f0bb..d4d2a5b3 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c @@ -92,7 +92,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); LD(x4, ed, fixedaddress); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { v1 = fpu_get_scratch(dyn); VMV_S_X(v1, x4); VECTOR_LOAD_VMASK(0b01, x3, 1); @@ -150,7 +150,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); q0 = fpu_get_scratch(dyn); VSLIDE1DOWN_VX(q0, v0, xZR, VECTOR_UNMASKED); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VECTOR_LOAD_VMASK(0b01, x4, 1); VMERGE_VVM(v0, v1, q0); // implies VMASK } else { @@ -449,32 +449,32 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); IFX (X_ZF) { VAND_VV(v0, q1, q0, VECTOR_UNMASKED); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { // Force the mask element width to 32 vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL2, 1); } VMSGTU_VX(VMASK, v0, xZR, VECTOR_UNMASKED); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL1, 1); } VMV_X_S(x4, VMASK); - if (!rv64_xtheadvector) ANDI(x4, x4, 0b11); + if (!cpuext.xtheadvector) ANDI(x4, x4, 0b11); BNEZ(x4, 8); ORI(xFlags, xFlags, 1 << F_ZF); } IFX (X_CF) { VXOR_VI(v0, q0, 0x1F, VECTOR_UNMASKED); VAND_VV(v0, q1, v0, VECTOR_UNMASKED); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { // Force the mask element width to 32 vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL2, 1); } VMSGTU_VX(VMASK, v0, xZR, VECTOR_UNMASKED); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL1, 1); } VMV_X_S(x4, VMASK); - if (!rv64_xtheadvector) ANDI(x4, x4, 0b11); + if (!cpuext.xtheadvector) ANDI(x4, x4, 0b11); BNEZ(x4, 8); ORI(xFlags, xFlags, 1 << F_CF); } @@ -502,7 +502,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x20: INST_NAME("PMOVSXBW Gx, Ex"); nextop = F8; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETEX_vector(q1, 0, 0, VECTOR_SEW8); GETGX_empty_vector(q0); @@ -526,7 +526,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x21: INST_NAME("PMOVSXBD Gx, Ex"); nextop = F8; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETEX_vector(q1, 0, 0, VECTOR_SEW8); GETGX_empty_vector(q0); @@ -555,7 +555,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x22: INST_NAME("PMOVSXBQ Gx, Ex"); nextop = F8; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETEX_vector(q1, 0, 0, VECTOR_SEW8); GETGX_empty_vector(q0); @@ -585,7 +585,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x23: INST_NAME("PMOVSXWD Gx, Ex"); nextop = F8; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETEX_vector(q1, 0, 0, VECTOR_SEW8); GETGX_empty_vector(q0); @@ -609,7 +609,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x24: INST_NAME("PMOVSXWQ Gx, Ex"); nextop = F8; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETEX_vector(q1, 0, 0, VECTOR_SEW8); GETGX_empty_vector(q0); @@ -637,7 +637,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x25: INST_NAME("PMOVSXDQ Gx, Ex"); nextop = F8; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETEX_vector(q1, 0, 0, VECTOR_SEW8); GETGX_empty_vector(q0); @@ -661,7 +661,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x28: INST_NAME("PMULDQ Gx, Ex"); nextop = F8; - if (rv64_vlen >= 256) { + if (cpuext.vlen >= 32) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); GETGX_vector(v0, 1, VECTOR_SEW64); GETEX_vector(v1, 0, 0, VECTOR_SEW64); @@ -716,7 +716,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x30: INST_NAME("PMOVZXBW Gx, Ex"); nextop = F8; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETEX_vector(q1, 0, 0, VECTOR_SEW8); GETGX_empty_vector(q0); @@ -740,7 +740,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x31: INST_NAME("PMOVZXBD Gx, Ex"); nextop = F8; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETEX_vector(q1, 0, 0, VECTOR_SEW8); GETGX_empty_vector(q0); @@ -768,7 +768,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x32: INST_NAME("PMOVZXBQ Gx, Ex"); nextop = F8; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETEX_vector(q1, 0, 0, VECTOR_SEW8); GETGX_empty_vector(q0); @@ -798,7 +798,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x33: INST_NAME("PMOVZXWD Gx, Ex"); nextop = F8; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETEX_vector(q1, 0, 0, VECTOR_SEW8); GETGX_empty_vector(q0); @@ -822,7 +822,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x34: INST_NAME("PMOVZXWQ Gx, Ex"); nextop = F8; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETEX_vector(q1, 0, 0, VECTOR_SEW8); GETGX_empty_vector(q0); @@ -850,7 +850,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x35: INST_NAME("PMOVZXDQ Gx, Ex"); nextop = F8; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETEX_vector(q1, 0, 0, VECTOR_SEW8); GETGX_empty_vector(q0); @@ -951,7 +951,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); } else if (u8 > 16) { d0 = fpu_get_scratch(dyn); - if (rv64_vlen >= 256) { + if (cpuext.vlen >= 32) { // clear high bits before slidedown! vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 2); VXOR_VV(d0, d0, d0, VECTOR_UNMASKED); @@ -966,7 +966,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i v1 = fpu_get_scratch(dyn); VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); VSLIDEUP_VI(v0, q0, 16 - u8, VECTOR_UNMASKED); - if (rv64_vlen >= 256) { + if (cpuext.vlen >= 32) { // clear high bits before slidedown! d0 = fpu_get_scratch(dyn); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 2); @@ -994,7 +994,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i ed = TO_NAT((nextop & 7) + (rex.b << 3)); u8 = F8; if (u8 & (rex.w ? 1 : 3)) { - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { ADDI(x4, xZR, (u8 & (rex.w ? 1 : 3))); VEXT_X_V(ed, q0, x4); } else { @@ -1005,13 +1005,13 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } } else { VMV_X_S(ed, q0); - if (!rv64_xtheadvector && !rex.w) ZEROUP(ed); + if (!cpuext.xtheadvector && !rex.w) ZEROUP(ed); } } else { addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 0, 1); u8 = F8; if (u8 & (rex.w ? 1 : 3)) { - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { ADDI(x4, xZR, (u8 & (rex.w ? 1 : 3))); VEXT_X_V(x5, q0, x4); } else { @@ -1078,7 +1078,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); GETGD; GETEX_vector(q0, 0, 0, VECTOR_SEW64); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); ADDI(x4, xZR, 63); VSRL_VX(v0, q0, x4, VECTOR_UNMASKED); @@ -1208,7 +1208,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); VFNCVT_F_F_W(d0, d1, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL1, 1); - if (!rv64_xtheadvector) VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); + if (!cpuext.xtheadvector) VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); VMV_X_S(x4, d0); VMV_S_X(v0, x4); break; @@ -1350,7 +1350,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETEX_vector(q1, 0, 0, VECTOR_SEW16); d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); d1 = fpu_get_scratch(dyn); - if (rv64_vlen >= 256) { + if (cpuext.vlen >= 32) { vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 2); // double the vl for slideup. VMV_V_V(d0, q0); VSLIDEUP_VI(d0, q1, 8, VECTOR_UNMASKED); // splice q0 and q1 here! @@ -1388,7 +1388,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETEX_vector(q1, 0, 0, VECTOR_SEW16); d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); d1 = fpu_get_scratch(dyn); - if (rv64_vlen >= 256) { + if (cpuext.vlen >= 32) { vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 2); // double the vl for slideup. if (q0 == q1) { VMV_V_V(d0, q0); @@ -1452,7 +1452,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETEX_vector(q1, 0, 0, VECTOR_SEW32); d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); d1 = fpu_get_scratch(dyn); - if (rv64_vlen >= 256) { + if (cpuext.vlen >= 32) { vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 2); // double the vl for slideup. VMV_V_V(d0, q0); VSLIDEUP_VI(d0, q1, 4, VECTOR_UNMASKED); // splice q0 and q1 here! @@ -1498,7 +1498,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); q0 = fpu_get_scratch(dyn); VSLIDE1DOWN_VX(q0, v0, xZR, VECTOR_UNMASKED); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VECTOR_LOAD_VMASK(0b01, x4, 1); VMERGE_VVM(v0, v1, q0); // implies VMASK } else { @@ -1552,7 +1552,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETGX_empty_vector(q0); v0 = fpu_get_scratch(dyn); tmp64u0 = F8; - if (rv64_xtheadvector) { // lack of vrgatherei16.vv + if (cpuext.xtheadvector) { // lack of vrgatherei16.vv v1 = fpu_get_scratch(dyn); if (tmp64u0 == 0) { VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); @@ -1714,7 +1714,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); } else { d0 = fpu_get_scratch(dyn); - if (rv64_vlen >= 256) { + if (cpuext.vlen >= 32) { // clear high bits before slidedown! vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 2); VXOR_VV(d0, d0, d0, VECTOR_UNMASKED); @@ -1794,7 +1794,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); // no more scratches! VMV_V_V(v0, q0); if (q1 & 1) VMV_V_V(d1, q1); - VMV_V_I(VMASK, rv64_xtheadvector ? 1 : 0b0101); + VMV_V_I(VMASK, cpuext.xtheadvector ? 1 : 0b0101); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL2, 2); VSLIDEUP_VI(v0, (q1 & 1) ? d1 : q1, 2, VECTOR_UNMASKED); VCOMPRESS_VM(d0, v0, VMASK); @@ -1933,7 +1933,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VMV_V_V(v1, v0); } if (u8 & 1) { - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { ADDI(x5, xZR, 1); VEXT_X_V(x4, v0, x5); } else { @@ -2003,7 +2003,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i if (MODREG) { q1 = sse_get_reg_empty_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3)); VMV_X_S(x4, q0); - if (!rv64_xtheadvector) { + if (!cpuext.xtheadvector) { VXOR_VV(q1, q1, q1, VECTOR_UNMASKED); } VMV_S_X(q1, x4); @@ -2020,7 +2020,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETGD; GETEX_vector(q0, 0, 0, VECTOR_SEW8); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL8); VSRL_VI(v0, q0, 7, VECTOR_UNMASKED); // Force the element width to 1bit @@ -2031,7 +2031,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); VMV_X_S(gd, VMASK); - if (!rv64_xtheadvector) { ZEXTH(gd, gd); } + if (!cpuext.xtheadvector) { ZEXTH(gd, gd); } break; case 0xD8: case 0xD9: @@ -2102,7 +2102,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); GETGX_vector(q0, 1, VECTOR_SEW8); GETEX_vector(q1, 0, 0, VECTOR_SEW8); - if (rv64_xtheadvector) { // lack of vaddu.vv + if (cpuext.xtheadvector) { // lack of vaddu.vv v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); VWADDU_VV(v0, q0, q1, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL2, 2); @@ -2162,7 +2162,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); GETGX_vector(q0, 1, VECTOR_SEW16); GETEX_vector(q1, 0, 0, VECTOR_SEW16); - if (rv64_xtheadvector) { // lack of vaddu.vv + if (cpuext.xtheadvector) { // lack of vaddu.vv v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); VWADDU_VV(v0, q0, q1, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL2, 2); @@ -2199,7 +2199,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETGX_empty_vector(v0); d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); VMV_V_V(d0, v1); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 0.5); ADDI(x4, xZR, 1); // RTZ FSRM(x4, x4); @@ -2335,7 +2335,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0xF4: INST_NAME("PMULUDQ Gx, Ex"); nextop = F8; - if (rv64_vlen >= 256) { + if (cpuext.vlen >= 32) { SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); GETGX_vector(v0, 1, VECTOR_SEW64); GETEX_vector(v1, 0, 0, VECTOR_SEW64); diff --git a/src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c b/src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c index 4ab73160..6bbc746d 100644 --- a/src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c @@ -59,7 +59,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, GETGX(); GETVX(); GETEX(x2, 0, 1); - if (rv64_xtheadmempair) { + if (cpuext.xtheadmempair) { ADD(x1, vback, vxoffset); TH_LDD(x3, x4, x1, 0); } else { @@ -67,7 +67,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, LD(x4, vback, vxoffset + 8); } LWU(x5, wback, fixedaddress); - if (rv64_xtheadmempair) { + if (cpuext.xtheadmempair) { ADDI(x1, gback, gdoffset); TH_SDD(x3, x4, x1, 0); } else { diff --git a/src/dynarec/rv64/dynarec_rv64_d9.c b/src/dynarec/rv64/dynarec_rv64_d9.c index a5fb073f..852bd0ad 100644 --- a/src/dynarec/rv64/dynarec_rv64_d9.c +++ b/src/dynarec/rv64/dynarec_rv64_d9.c @@ -133,7 +133,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ANDI(x5, x5, 7); // (emu->top + i)&7 } // load x2 with ST0 anyway, for sign extraction - if (rv64_zba) + if (cpuext.zba) SH3ADD(x1, x5, xEmu); else { SLLI(x5, x5, 3); diff --git a/src/dynarec/rv64/dynarec_rv64_emit_math.c b/src/dynarec/rv64/dynarec_rv64_emit_math.c index df5c921d..cc6ecfdb 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_math.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_math.c @@ -33,7 +33,7 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX (X_CF) { if (rex.w) { ZEXTW2(s5, s1); - if (rv64_zba) // lo + if (cpuext.zba) // lo ADDUW(s5, s2, s5); else { ZEXTW2(s4, s2); @@ -64,7 +64,7 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SDxw(s1, xEmu, offsetof(x64emu_t, res)); } IFX (X_AF | X_OF) { - if (rv64_zbb) { + if (cpuext.zbb) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) } else { NOT(s5, s1); // s5 = ~res @@ -128,7 +128,7 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i IFX (X_CF) { if (rex.w) { ZEXTW2(s5, s1); - if (rv64_zba) // lo + if (cpuext.zba) // lo ADDUW(s5, s2, s5); else { ZEXTW2(s4, s2); @@ -167,7 +167,7 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i SDxw(s1, xEmu, offsetof(x64emu_t, res)); } IFX (X_AF | X_OF) { - if (rv64_zbb) { + if (cpuext.zbb) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) } else { NOT(s2, s1); // s2 = ~res @@ -222,7 +222,7 @@ void emit_add16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SW(s1, xEmu, offsetof(x64emu_t, res)); } IFX (X_AF | X_OF) { - if (rv64_zbb) { + if (cpuext.zbb) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) } else { NOT(s5, s1); // s5 = ~res @@ -280,7 +280,7 @@ void emit_add8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i ADD(s1, s1, s2); IFX (X_AF | X_OF) { - if (rv64_zbb) { + if (cpuext.zbb) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) } else { NOT(s2, s1); // s4 = ~res @@ -343,7 +343,7 @@ void emit_add8c(dynarec_rv64_t* dyn, int ninst, int s1, int c, int s2, int s3, i ADDI(s1, s1, c); IFX (X_AF | X_OF) { - if (rv64_zbb) { + if (cpuext.zbb) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) } else { NOT(s2, s1); // s2 = ~res @@ -594,7 +594,7 @@ void emit_inc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) SB(s1, xEmu, offsetof(x64emu_t, res)); } IFX (X_AF | X_OF) { - if (rv64_zbb) { + if (cpuext.zbb) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) } else { NOT(s2, s1); // s2 = ~res @@ -703,7 +703,7 @@ void emit_inc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SDxw(s1, xEmu, offsetof(x64emu_t, res)); } IFX (X_AF | X_OF) { - if (rv64_zbb) { + if (cpuext.zbb) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) } else { NOT(s2, s1); // s2 = ~res @@ -813,7 +813,7 @@ void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SH(s1, xEmu, offsetof(x64emu_t, res)); } IFX (X_AF | X_OF) { - if (rv64_zbb) { + if (cpuext.zbb) { ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2) } else { NOT(s2, s1); // s2 = ~res @@ -964,7 +964,7 @@ void emit_adc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i SW(s1, xEmu, offsetof(x64emu_t, res)); } IFX (X_AF | X_OF) { - if (rv64_zbb) { + if (cpuext.zbb) { ANDN(s3, s4, s1); // s3 = ~res & (op1 | op2) } else { NOT(s2, s1); // s2 = ~res @@ -1288,7 +1288,7 @@ void emit_adc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, SW(s1, xEmu, offsetof(x64emu_t, res)); } IFX (X_AF | X_OF) { - if (rv64_zbb) { + if (cpuext.zbb) { ANDN(s3, s4, s1); // s3 = ~res & (op1 | op2) } else { NOT(s2, s1); // s2 = ~res @@ -1340,7 +1340,7 @@ void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s IFX (X_CF) { if (rex.w) { ZEXTW2(s5, s1); - if (rv64_zba) + if (cpuext.zba) ADDUW(s5, s2, s5); else { ZEXTW2(s4, s2); @@ -1377,7 +1377,7 @@ void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SDxw(s1, xEmu, offsetof(x64emu_t, res)); } IFX (X_AF | X_OF) { - if (rv64_zbb) { + if (cpuext.zbb) { ANDN(s3, s4, s1); // s3 = ~res & (op1 | op2) } else { NOT(s3, s1); // s2 = ~res diff --git a/src/dynarec/rv64/dynarec_rv64_emit_shift.c b/src/dynarec/rv64/dynarec_rv64_emit_shift.c index 1bba5289..69dd9a02 100644 --- a/src/dynarec/rv64/dynarec_rv64_emit_shift.c +++ b/src/dynarec/rv64/dynarec_rv64_emit_shift.c @@ -950,7 +950,7 @@ void emit_rol32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SET_DFNONE(); } - if (rv64_zbb) { + if (cpuext.zbb) { if (rex.w) { ROL(s1, s1, s2); } else { @@ -1004,7 +1004,7 @@ void emit_ror32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s SET_DFNONE(); } - if (rv64_zbb) { + if (cpuext.zbb) { if (rex.w) { ROR(s1, s1, s2); } else { @@ -1093,9 +1093,9 @@ void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, } else IFXORNAT (X_ALL) { SET_DFNONE(); } - if (rv64_zbb) { + if (cpuext.zbb) { RORIxw(s1, s1, (rex.w ? 64 : 32) - c); - } else if (rv64_xtheadbb) { + } else if (cpuext.xtheadbb) { TH_SRRIxw(s1, s1, (rex.w ? 64 : 32) - c); } else { SLLIxw(s3, s1, c); @@ -1179,9 +1179,9 @@ void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, } else IFXORNAT (X_ALL) { SET_DFNONE(); } - if (rv64_zbb) { + if (cpuext.zbb) { RORIxw(s1, s1, c); - } else if (rv64_xtheadbb) { + } else if (cpuext.xtheadbb) { TH_SRRIxw(s1, s1, c); } else { SRLIxw(s3, s1, c); @@ -1324,7 +1324,7 @@ void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin if (c == 1) SRLI(s4, s1, 15); } - if (rv64_zbb) { + if (cpuext.zbb) { RORIW(s1, s1, c); } else { SRLI(s5, s1, c); @@ -1582,7 +1582,7 @@ void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin if (c == 1) SRLI(s5, s1, 15); } - if (rv64_zbb) { + if (cpuext.zbb) { RORIW(s1, s1, 32 - c); } else { SLLI(s3, s1, c); diff --git a/src/dynarec/rv64/dynarec_rv64_f20f_vector.c b/src/dynarec/rv64/dynarec_rv64_f20f_vector.c index 686ee717..c141376f 100644 --- a/src/dynarec/rv64/dynarec_rv64_f20f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_f20f_vector.c @@ -54,7 +54,7 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i ed = (nextop & 7) + (rex.b << 3); v0 = sse_get_reg_vector(dyn, ninst, x1, gd, 1, VECTOR_SEW64); v1 = sse_get_reg_vector(dyn, ninst, x1, ed, 0, VECTOR_SEW64); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VECTOR_LOAD_VMASK(0b01, x4, 1); VMERGE_VVM(v0, v0, v1); // implies VMASK } else { @@ -66,7 +66,7 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i v0 = sse_get_reg_empty_vector(dyn, ninst, x1, gd); addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); LD(x4, ed, fixedaddress); - if (!rv64_xtheadvector) VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); + if (!cpuext.xtheadvector) VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); VMV_S_X(v0, x4); } break; @@ -79,7 +79,7 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i if (MODREG) { ed = (nextop & 7) + (rex.b << 3); d0 = sse_get_reg_vector(dyn, ninst, x1, ed, 1, VECTOR_SEW64); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VECTOR_LOAD_VMASK(0b01, x4, 1); VMERGE_VVM(v0, v0, v1); // implies VMASK } else { @@ -125,7 +125,7 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i FCVTDW(v0, ed, RD_RNE); SET_ELEMENT_WIDTH(x3, VECTOR_SEW64, 1); } - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { v1 = fpu_get_scratch(dyn); VFMV_S_F(v1, v0); VECTOR_LOAD_VMASK(0b01, x4, 1); @@ -231,7 +231,7 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i FMVDX(d0, xZR); VMFLT_VF(VMASK, v1, d0, VECTOR_UNMASKED); VFSGNJN_VV(d1, d1, d1, VECTOR_MASKED); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VECTOR_LOAD_VMASK(0b01, x4, 1); VMERGE_VVM(v0, v0, d1); // implies VMASK } else { @@ -304,14 +304,14 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i if (v1 & 1 || v0 == v1 + 1) { d1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); VMV_V_V(d1, v1); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VFNCVT_F_F_W(d0, d1, VECTOR_MASKED); VMERGE_VVM(v0, v0, d0); // implies VMASK } else { VFNCVT_F_F_W(v0, d1, VECTOR_MASKED); } } else { - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VFNCVT_F_F_W(d0, v1, VECTOR_MASKED); VMERGE_VVM(v0, v0, d0); // implies VMASK } else { @@ -370,7 +370,7 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i MARK; FMVD(d0, d1); MARK2; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VFMV_S_F(d0, d0); VECTOR_LOAD_VMASK(0b0001, x4, 1); VMERGE_VVM(v0, v0, d0); // implies VMASK @@ -430,7 +430,7 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i MARK; FMVD(d0, d1); MARK2; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VFMV_S_F(d0, d0); VECTOR_LOAD_VMASK(0b0001, x4, 1); VMERGE_VVM(v0, v0, d0); // implies VMASK @@ -541,7 +541,7 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i MARK; } NEG(x2, x2); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { v0 = fpu_get_scratch(dyn); VMV_S_X(v0, x2); VECTOR_LOAD_VMASK(0b01, x4, 1); @@ -569,7 +569,7 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VFNCVT_X_F_W(d0, d1, VECTOR_UNMASKED); x87_restoreround(dyn, ninst, u8); vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL1, 1); - if (!rv64_xtheadvector) VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); + if (!cpuext.xtheadvector) VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); VMV_X_S(x4, d0); VMV_S_X(v0, x4); break; diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c index 19a8b4b9..50e19353 100644 --- a/src/dynarec/rv64/dynarec_rv64_f30f.c +++ b/src/dynarec/rv64/dynarec_rv64_f30f.c @@ -466,7 +466,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int MOV32w(gd, 0); B_NEXT_nocond; MARK; - if (rv64_zbb) { + if (cpuext.zbb) { CPOPxw(gd, ed); } else { if (rex.w) { diff --git a/src/dynarec/rv64/dynarec_rv64_f30f_vector.c b/src/dynarec/rv64/dynarec_rv64_f30f_vector.c index 900831f7..fa511793 100644 --- a/src/dynarec/rv64/dynarec_rv64_f30f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_f30f_vector.c @@ -56,7 +56,7 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i ed = (nextop & 7) + (rex.b << 3); v0 = sse_get_reg_vector(dyn, ninst, x1, gd, 1, VECTOR_SEW32); v1 = sse_get_reg_vector(dyn, ninst, x1, ed, 0, VECTOR_SEW32); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VECTOR_LOAD_VMASK(0b0001, x4, 1); VMERGE_VVM(v0, v0, v1); // implies VMASK } else { @@ -81,7 +81,7 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i if (MODREG) { ed = (nextop & 7) + (rex.b << 3); d0 = sse_get_reg_vector(dyn, ninst, x1, ed, 1, VECTOR_SEW32); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VECTOR_LOAD_VMASK(0b0001, x4, 1); VMERGE_VVM(v0, v0, v1); // implies VMASK } else { @@ -150,7 +150,7 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETED(0); FCVTSW(v0, ed, RD_RNE); } - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { v1 = fpu_get_scratch(dyn); VFMV_S_F(v1, v0); VECTOR_LOAD_VMASK(0b0001, x4, 1); @@ -331,7 +331,7 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VECTOR_LOAD_VMASK(0b0001, x4, 1); VFWCVT_F_F_V(d0, v1, VECTOR_MASKED); SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VMERGE_VVM(v0, v0, d0); // implies VMASK } else { VMV_X_S(x4, d0); @@ -345,7 +345,7 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); GETEX_vector(v1, 0, 0, VECTOR_SEW32); GETGX_empty_vector(v0); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { ADDI(x4, xZR, 1); // RTZ FSRM(x4, x4); VFCVT_X_F_V(v0, v1, VECTOR_UNMASKED); @@ -405,7 +405,7 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i MARK; FMVS(d0, d1); MARK2; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VFMV_S_F(d0, d0); VECTOR_LOAD_VMASK(0b0001, x4, 1); VMERGE_VVM(v0, v0, d0); // implies VMASK @@ -465,7 +465,7 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i MARK; FMVS(d0, d1); MARK2; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { VFMV_S_F(d0, d0); VECTOR_LOAD_VMASK(0b0001, x4, 1); VMERGE_VVM(v0, v0, d0); // implies VMASK @@ -530,7 +530,7 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); LD(x4, ed, fixedaddress); } - if (!rv64_xtheadvector) VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); + if (!cpuext.xtheadvector) VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); VMV_S_X(v0, x4); break; case 0x7F: @@ -616,7 +616,7 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i MARK; } NEG(x2, x2); - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { v0 = fpu_get_scratch(dyn); VMV_S_X(v0, x2); VECTOR_LOAD_VMASK(0b0001, x4, 1); diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index 79868e55..117045d0 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -86,10 +86,10 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, if (!(sib >> 6)) { ADD(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg)); if (!IS_GPR(ret)) SCRATCH_USAGE(1); - } else if (rv64_zba) { + } else if (cpuext.zba) { SHxADD(ret, TO_NAT(sib_reg), sib >> 6, TO_NAT(sib_reg2)); if (!IS_GPR(ret)) SCRATCH_USAGE(1); - } else if (rv64_xtheadba) { + } else if (cpuext.xtheadba) { TH_ADDSL(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg), sib >> 6); if (!IS_GPR(ret)) SCRATCH_USAGE(1); } else { @@ -160,10 +160,10 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, if (!(sib >> 6)) { ADD(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg)); if (!IS_GPR(ret)) SCRATCH_USAGE(1); - } else if (rv64_zba) { + } else if (cpuext.zba) { SHxADD(ret, TO_NAT(sib_reg), sib >> 6, TO_NAT(sib_reg2)); if (!IS_GPR(ret)) SCRATCH_USAGE(1); - } else if (rv64_xtheadba) { + } else if (cpuext.xtheadba) { TH_ADDSL(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg), sib >> 6); if (!IS_GPR(ret)) SCRATCH_USAGE(1); } else { @@ -183,10 +183,10 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, if (!(sib >> 6)) { ADD(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg)); if (!IS_GPR(ret)) SCRATCH_USAGE(1); - } else if (rv64_zba) { + } else if (cpuext.zba) { SHxADD(ret, TO_NAT(sib_reg), sib >> 6, TO_NAT(sib_reg2)); if (!IS_GPR(ret)) SCRATCH_USAGE(1); - } else if (rv64_xtheadba) { + } else if (cpuext.xtheadba) { TH_ADDSL(ret, TO_NAT(sib_reg2), TO_NAT(sib_reg), sib >> 6); if (!IS_GPR(ret)) SCRATCH_USAGE(1); } else { @@ -587,7 +587,7 @@ void jump_to_epilog_fast(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst) static int indirect_lookup(dynarec_rv64_t* dyn, int ninst, int is32bits, int s1, int s2) { MAYUSE(dyn); - if (rv64_xtheadbb && rv64_xtheadmemidx) { + if (cpuext.xtheadbb && cpuext.xtheadmemidx) { if (!is32bits) { SRLI(s1, xRIP, 48); BNEZ_safe(s1, (intptr_t)dyn->jmp_next - (intptr_t)dyn->block); @@ -1115,7 +1115,7 @@ void x87_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1, int s2, in #endif ADDI(s3, s2, dyn->e.x87cache[i]); // unadjusted count, as it's relative to real top ANDI(s3, s3, 7); // (emu->top + st)&7 - if (rv64_zba) + if (cpuext.zba) SH3ADD(s1, s3, xEmu); else { SLLI(s1, s3, 3); @@ -1227,7 +1227,7 @@ static void x87_reflectcache(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int if (dyn->e.x87cache[i] != -1) { ADDI(s3, s2, dyn->e.x87cache[i]); ANDI(s3, s3, 7); // (emu->top + i)&7 - if (rv64_zba) + if (cpuext.zba) SH3ADD(s1, s3, xEmu); else { SLLI(s1, s3, 3); @@ -1310,7 +1310,7 @@ int x87_get_cache(dynarec_rv64_t* dyn, int ninst, int populate, int s1, int s2, ADDI(s2, s2, a); ANDI(s2, s2, 7); } - if (rv64_zba) + if (cpuext.zba) SH3ADD(s1, s2, xEmu); else { SLLI(s2, s2, 3); @@ -1362,7 +1362,7 @@ void x87_refresh(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st) ADDI(s2, s2, a); ANDI(s2, s2, 7); // (emu->top + i)&7 } - if (rv64_zba) + if (cpuext.zba) SH3ADD(s1, s2, xEmu); else { SLLI(s2, s2, 3); @@ -1405,7 +1405,7 @@ void x87_forget(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st) ADDI(s2, s2, a); ANDI(s2, s2, 7); // (emu->top + i)&7 } - if (rv64_zba) + if (cpuext.zba) SH3ADD(s1, s2, xEmu); else { SLLI(s2, s2, 3); @@ -1449,7 +1449,7 @@ void x87_reget_st(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st) ADDI(s2, s2, a); AND(s2, s2, 7); } - if (rv64_zba) + if (cpuext.zba) SH3ADD(s1, s2, xEmu); else { SLLI(s2, s2, 3); @@ -1474,7 +1474,7 @@ void x87_reget_st(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st) int a = st - dyn->e.x87stack; ADDI(s2, s2, a); ANDI(s2, s2, 7); // (emu->top + i)&7 - if (rv64_zba) + if (cpuext.zba) SH3ADD(s1, s2, xEmu); else { SLLI(s2, s2, 3); @@ -1509,7 +1509,7 @@ void x87_free(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int st) } ANDI(s2, s2, 7); // (emu->top + i)&7 } - if (rv64_zba) + if (cpuext.zba) SH3ADD(s1, s2, xEmu); else { SLLI(s2, s2, 3); @@ -2057,7 +2057,7 @@ static void sse_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1) } for (int i = 0; i < 16; ++i) if (is_avx_zero(dyn, ninst, i)) { - if (rv64_xtheadmempair) { + if (cpuext.xtheadmempair) { ADDI(s1, xEmu, offsetof(x64emu_t, ymm[i])); TH_SDD(xZR, xZR, s1, 0); } else { @@ -2091,7 +2091,7 @@ static void sse_reflectcache(dynarec_rv64_t* dyn, int ninst, int s1) if (dyn->ymm_zero) for (int i = 0; i < 16; ++i) if (is_avx_zero(dyn, ninst, i)) { - if (rv64_xtheadmempair) { + if (cpuext.xtheadmempair) { ADDI(s1, xEmu, offsetof(x64emu_t, ymm[i])); TH_SDD(xZR, xZR, s1, 0); } else { @@ -2104,7 +2104,7 @@ static void sse_reflectcache(dynarec_rv64_t* dyn, int ninst, int s1) void sse_reflect_reg(dynarec_rv64_t* dyn, int ninst, int s1, int a) { if (is_avx_zero(dyn, ninst, a)) { - if (rv64_xtheadmempair) { + if (cpuext.xtheadmempair) { ADDI(s1, xEmu, offsetof(x64emu_t, ymm[a])); TH_SDD(xZR, xZR, s1, 0); } else { @@ -2151,7 +2151,7 @@ void fpu_pushcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07) else FSD(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i])); if (is_avx_zero(dyn, ninst, i)) { - if (rv64_xtheadmempair) { + if (cpuext.xtheadmempair) { ADDI(s1, xEmu, offsetof(x64emu_t, ymm[i])); TH_SDD(xZR, xZR, s1, 0); } else { @@ -2201,7 +2201,7 @@ void fpu_pushcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07) VSE_V(dyn->e.ssecache[i].reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1); } if (is_avx_zero(dyn, ninst, i)) { - if (rv64_xtheadmempair) { + if (cpuext.xtheadmempair) { ADDI(s1, xEmu, offsetof(x64emu_t, ymm[i])); TH_SDD(xZR, xZR, s1, 0); } else { @@ -2487,7 +2487,7 @@ static void loadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, int } *s3_top += a; *s2_val = 0; - if (rv64_zba) + if (cpuext.zba) SH3ADD(s2, s3, xEmu); else { SLLI(s2, s3, 3); @@ -2563,7 +2563,7 @@ static void unloadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, i ANDI(s3, s3, 7); } *s3_top += a; - if (rv64_zba) + if (cpuext.zba) SH3ADD(s2, s3, xEmu); else { SLLI(s2, s3, 3); @@ -2868,7 +2868,7 @@ void emit_pf(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4) { MAYUSE(dyn); MAYUSE(ninst); - if (rv64_zbb) { + if (cpuext.zbb) { ANDI(s3, s1, 0xFF); CPOPW(s3, s3); } else { @@ -2963,11 +2963,11 @@ int vector_vsetvli(dynarec_rv64_t* dyn, int ninst, int s1, int sew, int vlmul, f if (sew == VECTOR_SEWANY) sew = VECTOR_SEW8; uint32_t vl = (int)((float)(16 >> sew) * multiple); - uint32_t vtypei = (sew << (3 - !!rv64_xtheadvector)) | vlmul; + uint32_t vtypei = (sew << (3 - !!cpuext.xtheadvector)) | vlmul; if (dyn->inst_sew == VECTOR_SEWNA || dyn->inst_vl == 0 || dyn->inst_sew != sew || dyn->inst_vl != vl || dyn->inst_vlmul != vlmul) { - if (vl == (rv64_vlen >> (3 + sew - vlmul))) { + if (vl == (cpuext.vlen >> (sew - vlmul))) { VSETVLI(s1, xZR, vtypei); - } else if (vl <= 31 && !rv64_xtheadvector) { + } else if (vl <= 31 && !cpuext.xtheadvector) { VSETIVLI(xZR, vl, vtypei); } else { ADDI(s1, xZR, vl); @@ -3004,7 +3004,7 @@ void vector_loadmask(dynarec_rv64_t* dyn, int ninst, int vreg, uint64_t imm, int #if STEP > 1 uint8_t sew = dyn->inst_sew; uint8_t vlmul = dyn->inst_vlmul; - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { if (sew == VECTOR_SEW64 && vlmul == VECTOR_LMUL1) { switch (imm) { case 0: @@ -3244,7 +3244,7 @@ void avx_purge_ymm(dynarec_rv64_t* dyn, int ninst, uint16_t mask, int s1) MESSAGE(LOG_NONE, "Purge YMM mask=%04x --------\n", mask); do_something = 1; } - if (rv64_xtheadmempair) { + if (cpuext.xtheadmempair) { ADDI(s1, xEmu, offsetof(x64emu_t, ymm[i])); TH_SDD(xZR, xZR, s1, 0); } else { diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 4f923318..40857156 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -240,7 +240,7 @@ wback = TO_NAT(wback & 3); \ } \ if (wb2) { \ - if (rv64_xtheadbb) { \ + if (cpuext.xtheadbb) { \ TH_EXTU(i, wback, 15, 8); \ } else { \ SRLI(i, wback, wb2); \ @@ -269,7 +269,7 @@ wback = TO_NAT(wback & 3); \ } \ if (wb2) { \ - if (rv64_xtheadbb) { \ + if (cpuext.xtheadbb) { \ TH_EXTU(i, wback, 15, 8); \ } else { \ SRLI(i, wback, wb2); \ @@ -323,7 +323,7 @@ wback = TO_NAT(wback & 3); \ } \ if (wb2) { \ - if (rv64_xtheadbb) { \ + if (cpuext.xtheadbb) { \ TH_EXTU(i, wback, 15, 8); \ } else { \ MV(i, wback); \ @@ -354,7 +354,7 @@ } \ gd = i; \ if (gb2) { \ - if (rv64_xtheadbb) { \ + if (cpuext.xtheadbb) { \ TH_EXTU(gd, gb1, 15, 8); \ } else { \ SRLI(gd, gb1, 8); \ @@ -920,7 +920,7 @@ #define SET_FLAGS_NEZ(reg, F, scratch) \ do { \ - if (rv64_xtheadcondmov) { \ + if (cpuext.xtheadcondmov) { \ ORI(scratch, xFlags, 1 << F); \ TH_MVNEZ(xFlags, scratch, reg); \ } else { \ @@ -931,7 +931,7 @@ #define SET_FLAGS_EQZ(reg, F, scratch) \ do { \ - if (rv64_xtheadcondmov) { \ + if (cpuext.xtheadcondmov) { \ ORI(scratch, xFlags, 1 << F); \ TH_MVEQZ(xFlags, scratch, reg); \ } else { \ @@ -942,7 +942,7 @@ #define SET_FLAGS_LTZ(reg, F, scratch1, scratch2) \ do { \ - if (rv64_xtheadcondmov) { \ + if (cpuext.xtheadcondmov) { \ SLT(scratch1, reg, xZR); \ ORI(scratch2, xFlags, 1 << F); \ TH_MVNEZ(xFlags, scratch2, scratch1); \ @@ -1894,7 +1894,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, } \ IFX (X_CF | X_PF | X_ZF | X_PEND) { \ MOV32w(s2, 0b01000101); \ - if (rv64_zbb) { \ + if (cpuext.zbb) { \ ANDN(xFlags, xFlags, s2); \ } else { \ NOT(s3, s2); \ @@ -1934,7 +1934,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, ADDIW(reg, s, -1); #define FAST_8BIT_OPERATION(dst, src, s1, OP) \ - if (MODREG && (rv64_zbb || rv64_xtheadbb) && !dyn->insts[ninst].x64.gen_flags) { \ + if (MODREG && (cpuext.zbb || cpuext.xtheadbb) && !dyn->insts[ninst].x64.gen_flags) { \ if (rex.rex) { \ wb = TO_NAT((nextop & 7) + (rex.b << 3)); \ wb2 = 0; \ @@ -1950,13 +1950,13 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, } \ if (src##2) { ANDI(s1, src, 0xf00); } \ SLLI(s1, (src##2 ? s1 : src), 64 - src##2 - 8); \ - if (rv64_zbb) { \ + if (cpuext.zbb) { \ RORI(dst, dst, 8 + dst##2); \ } else { \ TH_SRRI(dst, dst, 8 + dst##2); \ } \ OP; \ - if (rv64_zbb) { \ + if (cpuext.zbb) { \ RORI(dst, dst, 64 - 8 - dst##2); \ } else { \ TH_SRRI(dst, dst, 64 - 8 - dst##2); \ @@ -1969,17 +1969,17 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, } #define FAST_16BIT_OPERATION(dst, src, s1, OP) \ - if (MODREG && (rv64_zbb || rv64_xtheadbb) && !dyn->insts[ninst].x64.gen_flags) { \ + if (MODREG && (cpuext.zbb || cpuext.xtheadbb) && !dyn->insts[ninst].x64.gen_flags) { \ gd = TO_NAT(((nextop & 0x38) >> 3) + (rex.r << 3)); \ ed = TO_NAT((nextop & 7) + (rex.b << 3)); \ SLLI(s1, src, 64 - 16); \ - if (rv64_zbb) { \ + if (cpuext.zbb) { \ RORI(dst, dst, 16); \ } else { \ TH_SRRI(dst, dst, 16); \ } \ OP; \ - if (rv64_zbb) { \ + if (cpuext.zbb) { \ RORI(dst, dst, 64 - 16); \ } else { \ TH_SRRI(dst, dst, 64 - 16); \ diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index f541e0a5..cb8d0c39 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -35,9 +35,9 @@ // ZERO the upper part, compatible to zba, xtheadbb, and rv64gc #define ZEXTW2(rd, rs1) \ do { \ - if (rv64_zba) { \ + if (cpuext.zba) { \ ZEXTW(rd, rs1); \ - } else if (rv64_xtheadbb) { \ + } else if (cpuext.xtheadbb) {\ TH_EXTU(rd, rs1, 31, 0); \ } else { \ SLLI(rd, rs1, 32); \ @@ -251,19 +251,19 @@ #define SGTU(rd, rs1, rs2) SLTU(rd, rs2, rs1); #define SLEU(rd, rs1, rs2) SGEU(rd, rs2, rs1); -#define MVEQ(rd, rs1, rs2, rs3) \ - if (rv64_xtheadcondmov && (rs2 == xZR || rs3 == xZR)) { \ - TH_MVEQZ(rd, rs1, ((rs2 == xZR) ? rs3 : rs2)); \ - } else { \ - BNE(rs2, rs3, 8); \ - MV(rd, rs1); \ +#define MVEQ(rd, rs1, rs2, rs3) \ + if (cpuext.xtheadcondmov && (rs2 == xZR || rs3 == xZR)) { \ + TH_MVEQZ(rd, rs1, ((rs2 == xZR) ? rs3 : rs2)); \ + } else { \ + BNE(rs2, rs3, 8); \ + MV(rd, rs1); \ } -#define MVNE(rd, rs1, rs2, rs3) \ - if (rv64_xtheadcondmov && (rs2 == xZR || rs3 == xZR)) { \ - TH_MVNEZ(rd, rs1, ((rs2 == xZR) ? rs3 : rs2)); \ - } else { \ - BEQ(rs2, rs3, 8); \ - MV(rd, rs1); \ +#define MVNE(rd, rs1, rs2, rs3) \ + if (cpuext.xtheadcondmov && (rs2 == xZR || rs3 == xZR)) { \ + TH_MVNEZ(rd, rs1, ((rs2 == xZR) ? rs3 : rs2)); \ + } else { \ + BEQ(rs2, rs3, 8); \ + MV(rd, rs1); \ } #define MVLT(rd, rs1, rs2, rs3) \ BGE(rs2, rs3, 8); \ @@ -411,36 +411,36 @@ // 4-bytes[rs1+imm12] = rs2 #define SW(rs2, rs1, imm12) EMIT(S_type(imm12, rs2, rs1, 0b010, 0b0100011)) -#define PUSH1(reg) \ - do { \ - if (rv64_xtheadmemidx && reg != xRSP) { \ - TH_SDIB(reg, xRSP, -8, 0); \ - } else { \ - SD(reg, xRSP, 0xFF8); \ - SUBI(xRSP, xRSP, 8); \ - } \ - } while (0) -#define POP1(reg) \ +#define PUSH1(reg) \ do { \ - if (rv64_xtheadmemidx && reg != xRSP) { \ - TH_LDIA(reg, xRSP, 8, 0); \ + if (cpuext.xtheadmemidx && reg != xRSP) { \ + TH_SDIB(reg, xRSP, -8, 0); \ } else { \ - LD(reg, xRSP, 0); \ - if (reg != xRSP) ADDI(xRSP, xRSP, 8); \ + SD(reg, xRSP, 0xFF8); \ + SUBI(xRSP, xRSP, 8); \ } \ } while (0) -#define PUSH1_32(reg) \ - do { \ - if (rv64_xtheadmemidx && reg != xRSP) { \ - TH_SWIB(reg, xRSP, -4, 0); \ - } else { \ - SW(reg, xRSP, 0xFFC); \ - SUBI(xRSP, xRSP, 4); \ - } \ +#define POP1(reg) \ + do { \ + if (cpuext.xtheadmemidx && reg != xRSP) { \ + TH_LDIA(reg, xRSP, 8, 0); \ + } else { \ + LD(reg, xRSP, 0); \ + if (reg != xRSP) ADDI(xRSP, xRSP, 8); \ + } \ + } while (0) +#define PUSH1_32(reg) \ + do { \ + if (cpuext.xtheadmemidx && reg != xRSP) { \ + TH_SWIB(reg, xRSP, -4, 0); \ + } else { \ + SW(reg, xRSP, 0xFFC); \ + SUBI(xRSP, xRSP, 4); \ + } \ } while (0) #define POP1_32(reg) \ do { \ - if (rv64_xtheadmemidx && reg != xRSP) { \ + if (cpuext.xtheadmemidx && reg != xRSP) { \ TH_LWUIA(reg, xRSP, 4, 0); \ } else { \ LWU(reg, xRSP, 0); \ @@ -461,19 +461,19 @@ PUSH1(reg); \ } -#define PUSH1_16(reg) \ - do { \ - if (rv64_xtheadmemidx && reg != xRSP) { \ - TH_SHIB(reg, xRSP, -2, 0); \ - } else { \ - SH(reg, xRSP, 0xFFE); \ - SUBI(xRSP, xRSP, 2); \ - } \ +#define PUSH1_16(reg) \ + do { \ + if (cpuext.xtheadmemidx && reg != xRSP) { \ + TH_SHIB(reg, xRSP, -2, 0); \ + } else { \ + SH(reg, xRSP, 0xFFE); \ + SUBI(xRSP, xRSP, 2); \ + } \ } while (0) #define POP1_16(reg) \ do { \ - if (rv64_xtheadmemidx && reg != xRSP) { \ + if (cpuext.xtheadmemidx && reg != xRSP) { \ TH_LHUIA(reg, xRSP, 2, 0); \ } else { \ LHU(reg, xRSP, 0); \ @@ -539,9 +539,9 @@ #define ADDSL(rd, rs1, rs2, imm2, scratch) \ if (!(imm2)) { \ ADD(rd, rs1, rs2); \ - } else if (rv64_zba) { \ + } else if (cpuext.zba) { \ SHxADD(rd, rs2, imm2, rs1); \ - } else if (rv64_xtheadba) { \ + } else if (cpuext.xtheadba) { \ TH_ADDSL(rd, rs1, rs2, imm2); \ } else { \ SLLI(scratch, rs2, imm2); \ @@ -894,12 +894,12 @@ #define CLZW(rd, rs) EMIT(R_type(0b0110000, 0b00000, rs, 0b001, rd, 0b0011011)) // Count leading zero bits #define CLZxw(rd, rs, x, s1, s2, s3) \ - if (rv64_zbb) { \ + if (cpuext.zbb) { \ if (x) \ CLZ(rd, rs); \ else \ CLZW(rd, rs); \ - } else if (rv64_xtheadbb) { \ + } else if (cpuext.xtheadbb) { \ if (x) { \ TH_FF1(rd, rs); \ } else { \ @@ -949,7 +949,7 @@ // BEWARE: You should take care of the all zeros situation yourself, // and clear the high 32bit when x is 1. #define CTZxw(rd, rs, x, s1, s2) \ - if (rv64_zbb) { \ + if (cpuext.zbb) { \ if (x) \ CTZ(rd, rs); \ else \ @@ -985,9 +985,9 @@ #define SEXTH_(rd, rs) EMIT(R_type(0b0110000, 0b00101, rs, 0b001, rd, 0b0010011)) // Sign-extend half-word #define SEXTH(rd, rs) \ - if (rv64_zbb) \ + if (cpuext.zbb) \ SEXTH_(rd, rs); \ - else if (rv64_xtheadbb) \ + else if (cpuext.xtheadbb) \ TH_EXT(rd, rs, 15, 0); \ else { \ SLLI(rd, rs, 48); \ @@ -997,9 +997,9 @@ #define ZEXTH_(rd, rs) EMIT(R_type(0b0000100, 0b00000, rs, 0b100, rd, 0b0111011)) // Zero-extend half-word #define ZEXTH(rd, rs) \ - if (rv64_zbb) \ + if (cpuext.zbb) \ ZEXTH_(rd, rs); \ - else if (rv64_xtheadbb) \ + else if (cpuext.xtheadbb) \ TH_EXTU(rd, rs, 15, 0); \ else { \ SLLI(rd, rs, 48); \ @@ -1047,12 +1047,12 @@ // Byte-reverse register, rd can be the same as rs or s1, but rs cannot be the same as s1. #define REV8xw(rd, rs, s1, s2, s3, s4) \ - if (rv64_zbb) { \ + if (cpuext.zbb) { \ REV8(rd, rs); \ if (!rex.w) { \ SRLI(rd, rd, 32); \ } \ - } else if (rv64_xtheadbb) { \ + } else if (cpuext.xtheadbb) { \ if (rex.w) { \ TH_REV(rd, rs); \ } else { \ @@ -1135,7 +1135,7 @@ // Single-bit Extract (Register) #define BEXT(rd, rs1, rs2, s0) \ - if (rv64_zbs) { \ + if (cpuext.zbs) { \ if (rex.w) { \ BEXT_(rd, rs1, rs2); \ } else { \ @@ -1150,9 +1150,9 @@ // Single-bit Extract (Immediate) #define BEXTI(rd, rs1, imm) \ - if (rv64_zbs) \ + if (cpuext.zbs) \ BEXTI_(rd, rs1, imm); \ - else if (rv64_xtheadbs) \ + else if (cpuext.xtheadbs) \ TH_TST(rd, rs1, imm); \ else { \ SRLIxw(rd, rs1, imm); \ @@ -1504,14 +1504,14 @@ // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#76-vector-indexed-instructions // Note: Make sure SEW in vtype is always the same as EEW, for xtheadvector compatibility! -#define VLUXEI8_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b000, vd, 0b0000111)) // ...001...........000.....0000111 -#define VLUXEI16_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b101, vd, 0b0000111)) // ...001...........101.....0000111 -#define VLUXEI32_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b110, vd, 0b0000111)) // ...001...........110.....0000111 -#define VLUXEI64_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b111, vd, 0b0000111)) // ...001...........111.....0000111 -#define VSUXEI8_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b000, vs3, 0b0100111)) // ...001...........000.....0100111 -#define VSUXEI16_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b101, vs3, 0b0100111)) // ...001...........101.....0100111 -#define VSUXEI32_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b110, vs3, 0b0100111)) // ...001...........110.....0100111 -#define VSUXEI64_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (rv64_xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b111, vs3, 0b0100111)) // ...001...........111.....0100111 +#define VLUXEI8_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (cpuext.xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b000, vd, 0b0000111)) // ...001...........000.....0000111 +#define VLUXEI16_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (cpuext.xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b101, vd, 0b0000111)) // ...001...........101.....0000111 +#define VLUXEI32_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (cpuext.xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b110, vd, 0b0000111)) // ...001...........110.....0000111 +#define VLUXEI64_V(vd, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (cpuext.xtheadvector ? 0b0110 : 0b0010), vs2, rs1, 0b111, vd, 0b0000111)) // ...001...........111.....0000111 +#define VSUXEI8_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (cpuext.xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b000, vs3, 0b0100111)) // ...001...........000.....0100111 +#define VSUXEI16_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (cpuext.xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b101, vs3, 0b0100111)) // ...001...........101.....0100111 +#define VSUXEI32_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (cpuext.xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b110, vs3, 0b0100111)) // ...001...........110.....0100111 +#define VSUXEI64_V(vs3, vs2, rs1, vm, nf) EMIT(R_type(((nf) << 4) | (vm) | (cpuext.xtheadvector ? 0b1110 : 0b0010), vs2, rs1, 0b111, vs3, 0b0100111)) // ...001...........111.....0100111 // Vector Strided Instructions (including segment part) // https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#75-vector-strided-instructions @@ -1583,7 +1583,7 @@ #define VFSLIDE1UP_VF(vd, vs2, rs1, vm) EMIT(R_type(0b0011100 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 001110...........101.....1010111 #define VFSLIDE1DOWN_VF(vd, vs2, rs1, vm) EMIT(R_type(0b0011110 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 001111...........101.....1010111 -#define VFMV_S_F(vd, rs1) EMIT(I_type((rv64_xtheadvector ? 0b001101100000 : 0b010000100000), rs1, 0b101, vd, 0b1010111)) // 010000100000.....101.....1010111 +#define VFMV_S_F(vd, rs1) EMIT(I_type((cpuext.xtheadvector ? 0b001101100000 : 0b010000100000), rs1, 0b101, vd, 0b1010111)) // 010000100000.....101.....1010111 #define VFMV_V_F(vd, rs1) EMIT(I_type(0b010111100000, rs1, 0b101, vd, 0b1010111)) // 010111100000.....101.....1010111 #define VFMERGE_VFM(vd, vs2, rs1) EMIT(R_type(0b0101110, vs2, rs1, 0b101, vd, 0b1010111)) // 0101110..........101.....1010111 @@ -1629,7 +1629,7 @@ #define VFSGNJN_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010010 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 001001...........001.....1010111 #define VFSGNJX_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010100 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 001010...........001.....1010111 -#define VFMV_F_S(rd, vs2) EMIT(R_type((rv64_xtheadvector ? 0b0011001 : 0b0100001), vs2, 0b00000, 0b001, rd, 0b1010111)) // 0100001.....00000001.....1010111 +#define VFMV_F_S(rd, vs2) EMIT(R_type((cpuext.xtheadvector ? 0b0011001 : 0b0100001), vs2, 0b00000, 0b001, rd, 0b1010111)) // 0100001.....00000001.....1010111 #define VMFEQ_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0110000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 011000...........001.....1010111 #define VMFLE_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0110010 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 011001...........001.....1010111 @@ -1646,29 +1646,29 @@ #define VFMSAC_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1011100 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 101110...........001.....1010111 #define VFNMSAC_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1011110 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 101111...........001.....1010111 -#define VFCVT_XU_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00000, 0b001, vd, 0b1010111)) // 010010......00000001.....1010111 -#define VFCVT_X_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00001, 0b001, vd, 0b1010111)) // 010010......00001001.....1010111 -#define VFCVT_F_XU_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00010, 0b001, vd, 0b1010111)) // 010010......00010001.....1010111 -#define VFCVT_F_X_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00011, 0b001, vd, 0b1010111)) // 010010......00011001.....1010111 -#define VFCVT_RTZ_XU_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00110, 0b001, vd, 0b1010111)) // 010010......00110001.....1010111 -#define VFCVT_RTZ_X_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00111, 0b001, vd, 0b1010111)) // 010010......00111001.....1010111 -#define VFWCVT_XU_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01000, 0b001, vd, 0b1010111)) // 010010......01000001.....1010111 -#define VFWCVT_X_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01001, 0b001, vd, 0b1010111)) // 010010......01001001.....1010111 -#define VFWCVT_F_XU_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01010, 0b001, vd, 0b1010111)) // 010010......01010001.....1010111 -#define VFWCVT_F_X_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01011, 0b001, vd, 0b1010111)) // 010010......01011001.....1010111 -#define VFWCVT_F_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01100, 0b001, vd, 0b1010111)) // 010010......01100001.....1010111 -#define VFWCVT_RTZ_XU_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01110, 0b001, vd, 0b1010111)) // 010010......01110001.....1010111 -#define VFWCVT_RTZ_X_F_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01111, 0b001, vd, 0b1010111)) // 010010......01111001.....1010111 -#define VFNCVT_XU_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10000, 0b001, vd, 0b1010111)) // 010010......10000001.....1010111 -#define VFNCVT_X_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10001, 0b001, vd, 0b1010111)) // 010010......10001001.....1010111 -#define VFNCVT_F_XU_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10010, 0b001, vd, 0b1010111)) // 010010......10010001.....1010111 -#define VFNCVT_F_X_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10011, 0b001, vd, 0b1010111)) // 010010......10011001.....1010111 -#define VFNCVT_F_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10100, 0b001, vd, 0b1010111)) // 010010......10100001.....1010111 -#define VFNCVT_ROD_F_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10101, 0b001, vd, 0b1010111)) // 010010......10101001.....1010111 -#define VFNCVT_RTZ_XU_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10110, 0b001, vd, 0b1010111)) // 010010......10110001.....1010111 -#define VFNCVT_RTZ_X_F_W(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10111, 0b001, vd, 0b1010111)) // 010010......10111001.....1010111 -#define VFSQRT_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000110 : 0b0100110) | (vm), vs2, 0b00000, 0b001, vd, 0b1010111)) // 010011......00000001.....1010111 -#define VFCLASS_V(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b1000110 : 0b0100110) | (vm), vs2, 0b10000, 0b001, vd, 0b1010111)) // 010011......10000001.....1010111 +#define VFCVT_XU_F_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00000, 0b001, vd, 0b1010111)) // 010010......00000001.....1010111 +#define VFCVT_X_F_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00001, 0b001, vd, 0b1010111)) // 010010......00001001.....1010111 +#define VFCVT_F_XU_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00010, 0b001, vd, 0b1010111)) // 010010......00010001.....1010111 +#define VFCVT_F_X_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00011, 0b001, vd, 0b1010111)) // 010010......00011001.....1010111 +#define VFCVT_RTZ_XU_F_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00110, 0b001, vd, 0b1010111)) // 010010......00110001.....1010111 +#define VFCVT_RTZ_X_F_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b00111, 0b001, vd, 0b1010111)) // 010010......00111001.....1010111 +#define VFWCVT_XU_F_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01000, 0b001, vd, 0b1010111)) // 010010......01000001.....1010111 +#define VFWCVT_X_F_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01001, 0b001, vd, 0b1010111)) // 010010......01001001.....1010111 +#define VFWCVT_F_XU_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01010, 0b001, vd, 0b1010111)) // 010010......01010001.....1010111 +#define VFWCVT_F_X_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01011, 0b001, vd, 0b1010111)) // 010010......01011001.....1010111 +#define VFWCVT_F_F_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01100, 0b001, vd, 0b1010111)) // 010010......01100001.....1010111 +#define VFWCVT_RTZ_XU_F_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01110, 0b001, vd, 0b1010111)) // 010010......01110001.....1010111 +#define VFWCVT_RTZ_X_F_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b01111, 0b001, vd, 0b1010111)) // 010010......01111001.....1010111 +#define VFNCVT_XU_F_W(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10000, 0b001, vd, 0b1010111)) // 010010......10000001.....1010111 +#define VFNCVT_X_F_W(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10001, 0b001, vd, 0b1010111)) // 010010......10001001.....1010111 +#define VFNCVT_F_XU_W(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10010, 0b001, vd, 0b1010111)) // 010010......10010001.....1010111 +#define VFNCVT_F_X_W(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10011, 0b001, vd, 0b1010111)) // 010010......10011001.....1010111 +#define VFNCVT_F_F_W(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10100, 0b001, vd, 0b1010111)) // 010010......10100001.....1010111 +#define VFNCVT_ROD_F_F_W(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10101, 0b001, vd, 0b1010111)) // 010010......10101001.....1010111 +#define VFNCVT_RTZ_XU_F_W(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10110, 0b001, vd, 0b1010111)) // 010010......10110001.....1010111 +#define VFNCVT_RTZ_X_F_W(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000100 : 0b0100100) | (vm), vs2, 0b10111, 0b001, vd, 0b1010111)) // 010010......10111001.....1010111 +#define VFSQRT_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000110 : 0b0100110) | (vm), vs2, 0b00000, 0b001, vd, 0b1010111)) // 010011......00000001.....1010111 +#define VFCLASS_V(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b1000110 : 0b0100110) | (vm), vs2, 0b10000, 0b001, vd, 0b1010111)) // 010011......10000001.....1010111 #define VFRSQRT7_V(vd, vs2, vm) EMIT(R_type(0b0100110 | (vm), vs2, 0b00100, 0b001, vd, 0b1010111)) // 010011......00100001.....1010111 #define VFREC7_V(vd, vs2, vm) EMIT(R_type(0b0100110 | (vm), vs2, 0b00101, 0b001, vd, 0b1010111)) // 010011......00101001.....1010111 @@ -1700,13 +1700,13 @@ #define VSLIDEUP_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0011100 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 001110...........100.....1010111 #define VSLIDEDOWN_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0011110 | (vm), vs2, rs1, 0b100, vd, 0b1010111)) // 001111...........100.....1010111 -#define VADC_VXM(vd, vs2, rs1) EMIT(R_type((0b0100000 | rv64_xtheadvector), vs2, rs1, 0b100, vd, 0b1010111)) // 0100000..........100.....1010111 -#define VMADC_VXM(vd, vs2, rs1) EMIT(R_type(0b0100010, vs2, rs1, 0b100, vd, 0b1010111)) // 0100010..........100.....1010111 -#define VMADC_VX(vd, vs2, rs1) EMIT(R_type(0b0100011, vs2, rs1, 0b100, vd, 0b1010111)) // 0100011..........100.....1010111 -#define VSBC_VXM(vd, vs2, rs1) EMIT(R_type((0b0100100 | rv64_xtheadvector), vs2, rs1, 0b100, vd, 0b1010111)) // 0100100..........100.....1010111 -#define VMSBC_VXM(vd, vs2, rs1) EMIT(R_type(0b0100110, vs2, rs1, 0b100, vd, 0b1010111)) // 0100110..........100.....1010111 -#define VMSBC_VX(vd, vs2, rs1) EMIT(R_type(0b0100111, vs2, rs1, 0b100, vd, 0b1010111)) // 0100111..........100.....1010111 -#define VMERGE_VXM(vd, vs2, rs1) EMIT(R_type(0b0101110, vs2, rs1, 0b100, vd, 0b1010111)) // 0101110..........100.....1010111 +#define VADC_VXM(vd, vs2, rs1) EMIT(R_type((0b0100000 | cpuext.xtheadvector), vs2, rs1, 0b100, vd, 0b1010111)) // 0100000..........100.....1010111 +#define VMADC_VXM(vd, vs2, rs1) EMIT(R_type(0b0100010, vs2, rs1, 0b100, vd, 0b1010111)) // 0100010..........100.....1010111 +#define VMADC_VX(vd, vs2, rs1) EMIT(R_type(0b0100011, vs2, rs1, 0b100, vd, 0b1010111)) // 0100011..........100.....1010111 +#define VSBC_VXM(vd, vs2, rs1) EMIT(R_type((0b0100100 | cpuext.xtheadvector), vs2, rs1, 0b100, vd, 0b1010111)) // 0100100..........100.....1010111 +#define VMSBC_VXM(vd, vs2, rs1) EMIT(R_type(0b0100110, vs2, rs1, 0b100, vd, 0b1010111)) // 0100110..........100.....1010111 +#define VMSBC_VX(vd, vs2, rs1) EMIT(R_type(0b0100111, vs2, rs1, 0b100, vd, 0b1010111)) // 0100111..........100.....1010111 +#define VMERGE_VXM(vd, vs2, rs1) EMIT(R_type(0b0101110, vs2, rs1, 0b100, vd, 0b1010111)) // 0101110..........100.....1010111 #define VMV_V_X(vd, rs1) EMIT(I_type(0b010111100000, rs1, 0b100, vd, 0b1010111)) // 010111100000.....100.....1010111 @@ -1746,13 +1746,13 @@ #define VRGATHER_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0011000 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 001100...........000.....1010111 #define VRGATHEREI16_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0011100 | (vm), vs2, vs1, 0b000, vd, 0b1010111)) // 001110...........000.....1010111 -#define VADC_VVM(vd, vs2, vs1) EMIT(R_type((0b0100000 | rv64_xtheadvector), vs2, vs1, 0b000, vd, 0b1010111)) // 0100000..........000.....1010111 -#define VMADC_VVM(vd, vs2, vs1) EMIT(R_type(0b0100010, vs2, vs1, 0b000, vd, 0b1010111)) // 0100010..........000.....1010111 -#define VMADC_VV(vd, vs2, vs1) EMIT(R_type(0b0100011, vs2, vs1, 0b000, vd, 0b1010111)) // 0100011..........000.....1010111 -#define VSBC_VVM(vd, vs2, vs1) EMIT(R_type((0b0100100 | rv64_xtheadvector), vs2, vs1, 0b000, vd, 0b1010111)) // 0100100..........000.....1010111 -#define VMSBC_VVM(vd, vs2, vs1) EMIT(R_type(0b0100110, vs2, vs1, 0b000, vd, 0b1010111)) // 0100110..........000.....1010111 -#define VMSBC_VV(vd, vs2, vs1) EMIT(R_type(0b0100111, vs2, vs1, 0b000, vd, 0b1010111)) // 0100111..........000.....1010111 -#define VMERGE_VVM(vd, vs2, vs1) EMIT(R_type(0b0101110, vs2, vs1, 0b000, vd, 0b1010111)) // 0101110..........000.....1010111 +#define VADC_VVM(vd, vs2, vs1) EMIT(R_type((0b0100000 | cpuext.xtheadvector), vs2, vs1, 0b000, vd, 0b1010111)) // 0100000..........000.....1010111 +#define VMADC_VVM(vd, vs2, vs1) EMIT(R_type(0b0100010, vs2, vs1, 0b000, vd, 0b1010111)) // 0100010..........000.....1010111 +#define VMADC_VV(vd, vs2, vs1) EMIT(R_type(0b0100011, vs2, vs1, 0b000, vd, 0b1010111)) // 0100011..........000.....1010111 +#define VSBC_VVM(vd, vs2, vs1) EMIT(R_type((0b0100100 | cpuext.xtheadvector), vs2, vs1, 0b000, vd, 0b1010111)) // 0100100..........000.....1010111 +#define VMSBC_VVM(vd, vs2, vs1) EMIT(R_type(0b0100110, vs2, vs1, 0b000, vd, 0b1010111)) // 0100110..........000.....1010111 +#define VMSBC_VV(vd, vs2, vs1) EMIT(R_type(0b0100111, vs2, vs1, 0b000, vd, 0b1010111)) // 0100111..........000.....1010111 +#define VMERGE_VVM(vd, vs2, vs1) EMIT(R_type(0b0101110, vs2, vs1, 0b000, vd, 0b1010111)) // 0101110..........000.....1010111 #define VMV_V_V(vd, vs1) EMIT(I_type(0b010111100000, vs1, 0b000, vd, 0b1010111)) // 010111100000.....000.....1010111 @@ -1789,10 +1789,10 @@ #define VSLIDEUP_VI(vd, vs2, simm5, vm) EMIT(R_type(0b0011100 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 001110...........011.....1010111 #define VSLIDEDOWN_VI(vd, vs2, simm5, vm) EMIT(R_type(0b0011110 | (vm), vs2, simm5, 0b011, vd, 0b1010111)) // 001111...........011.....1010111 -#define VADC_VIM(vd, vs2, simm5) EMIT(R_type((0b0100000 | rv64_xtheadvector), vs2, simm5, 0b011, vd, 0b1010111)) // 0100000..........011.....1010111 -#define VMADC_VIM(vd, vs2, simm5) EMIT(R_type(0b0100010, vs2, simm5, 0b011, vd, 0b1010111)) // 0100010..........011.....1010111 -#define VMADC_VI(vd, vs2, simm5) EMIT(R_type(0b0100011, vs2, simm5, 0b011, vd, 0b1010111)) // 0100011..........011.....1010111 -#define VMERGE_VIM(vd, vs2, simm5) EMIT(R_type(0b0101110, vs2, simm5, 0b011, vd, 0b1010111)) // 0101110..........011.....1010111 +#define VADC_VIM(vd, vs2, simm5) EMIT(R_type((0b0100000 | cpuext.xtheadvector), vs2, simm5, 0b011, vd, 0b1010111)) // 0100000..........011.....1010111 +#define VMADC_VIM(vd, vs2, simm5) EMIT(R_type(0b0100010, vs2, simm5, 0b011, vd, 0b1010111)) // 0100010..........011.....1010111 +#define VMADC_VI(vd, vs2, simm5) EMIT(R_type(0b0100011, vs2, simm5, 0b011, vd, 0b1010111)) // 0100011..........011.....1010111 +#define VMERGE_VIM(vd, vs2, simm5) EMIT(R_type(0b0101110, vs2, simm5, 0b011, vd, 0b1010111)) // 0101110..........011.....1010111 #define VMV_V_I(vd, simm5) EMIT(I_type(0b010111100000, simm5, 0b011, vd, 0b1010111)) // 010111100000.....011.....1010111 @@ -1836,10 +1836,10 @@ #define VASUBU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010100 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 001010...........010.....1010111 // Warning: zero-extended on xtheadvector! -#define VMV_X_S(rd, vs2) EMIT(R_type((rv64_xtheadvector ? 0b0011001 : 0b0100001), vs2, 0b00000, 0b010, rd, 0b1010111)) // 0100001.....00000010.....1010111 +#define VMV_X_S(rd, vs2) EMIT(R_type((cpuext.xtheadvector ? 0b0011001 : 0b0100001), vs2, 0b00000, 0b010, rd, 0b1010111)) // 0100001.....00000010.....1010111 // Warning: xtheadvector only -#define VEXT_X_V(rd, vs2, rs1) EMIT(R_type((rv64_xtheadvector ? 0b0011001 : 0b0100001), vs2, rs1, 0b010, rd, 0b1010111)) +#define VEXT_X_V(rd, vs2, rs1) EMIT(R_type((cpuext.xtheadvector ? 0b0011001 : 0b0100001), vs2, rs1, 0b010, rd, 0b1010111)) // Vector Integer Extension Instructions // https://github.com/riscv/riscv-v-spec/blob/e49574c92b072fd4d71e6cb20f7e8154de5b83fe/v-spec.adoc#123-vector-integer-extension @@ -1861,14 +1861,14 @@ #define VMNOR_MM(vd, vs2, vs1) EMIT(R_type(0b0111101, vs2, vs1, 0b010, vd, 0b1010111)) // 0111101..........010.....1010111 #define VMXNOR_MM(vd, vs2, vs1) EMIT(R_type(0b0111111, vs2, vs1, 0b010, vd, 0b1010111)) // 0111111..........010.....1010111 -#define VMSBF_M(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b00001, 0b010, vd, 0b1010111)) // 010100......00001010.....1010111 -#define VMSOF_M(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b00010, 0b010, vd, 0b1010111)) // 010100......00010010.....1010111 -#define VMSIF_M(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b00011, 0b010, vd, 0b1010111)) // 010100......00011010.....1010111 -#define VIOTA_M(vd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b10000, 0b010, vd, 0b1010111)) // 010100......10000010.....1010111 -#define VCPOP_M(rd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0100000) | (vm), vs2, 0b10000, 0b010, rd, 0b1010111)) // 010000......10000010.....1010111 -#define VFIRST_M(rd, vs2, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0100000) | (vm), vs2, 0b10001, 0b010, rd, 0b1010111)) // 010000......10001010.....1010111 +#define VMSBF_M(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b00001, 0b010, vd, 0b1010111)) // 010100......00001010.....1010111 +#define VMSOF_M(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b00010, 0b010, vd, 0b1010111)) // 010100......00010010.....1010111 +#define VMSIF_M(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b00011, 0b010, vd, 0b1010111)) // 010100......00011010.....1010111 +#define VIOTA_M(vd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b0101100 : 0b0101000) | (vm), vs2, 0b10000, 0b010, vd, 0b1010111)) // 010100......10000010.....1010111 +#define VCPOP_M(rd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b0101100 : 0b0100000) | (vm), vs2, 0b10000, 0b010, rd, 0b1010111)) // 010000......10000010.....1010111 +#define VFIRST_M(rd, vs2, vm) EMIT(R_type((cpuext.xtheadvector ? 0b0101100 : 0b0100000) | (vm), vs2, 0b10001, 0b010, rd, 0b1010111)) // 010000......10001010.....1010111 -#define VID_V(vd, vm) EMIT(R_type((rv64_xtheadvector ? 0b0101100 : 0b0101000) | (vm), 0b00000, 0b10001, 0b010, vd, 0b1010111)) // 010100.0000010001010.....1010111 +#define VID_V(vd, vm) EMIT(R_type((cpuext.xtheadvector ? 0b0101100 : 0b0101000) | (vm), 0b00000, 0b10001, 0b010, vd, 0b1010111)) // 010100.0000010001010.....1010111 #define VDIVU_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1000000 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100000...........010.....1010111 #define VDIV_VV(vd, vs2, vs1, vm) EMIT(R_type(0b1000010 | (vm), vs2, vs1, 0b010, vd, 0b1010111)) // 100001...........010.....1010111 @@ -1906,7 +1906,7 @@ #define VSLIDE1DOWN_VX(vd, vs2, rs1, vm) EMIT(R_type(0b0011110 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 001111...........110.....1010111 // Warning, upper elements will be cleared in xtheadvector! -#define VMV_S_X(vd, rs1) EMIT(I_type((rv64_xtheadvector ? 0b001101100000 : 0b010000100000), rs1, 0b110, vd, 0b1010111)) +#define VMV_S_X(vd, rs1) EMIT(I_type((cpuext.xtheadvector ? 0b001101100000 : 0b010000100000), rs1, 0b110, vd, 0b1010111)) #define VDIVU_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1000000 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 100000...........110.....1010111 #define VDIV_VX(vd, vs2, rs1, vm) EMIT(R_type(0b1000010 | (vm), vs2, rs1, 0b110, vd, 0b1010111)) // 100001...........110.....1010111 diff --git a/src/dynarec/rv64/rv64_printer.c b/src/dynarec/rv64/rv64_printer.c index f28b8b06..c6cc9e17 100644 --- a/src/dynarec/rv64/rv64_printer.c +++ b/src/dynarec/rv64/rv64_printer.c @@ -7,16 +7,6 @@ #include "rv64_printer.h" #include "debug.h" -extern int rv64_xtheadba; -extern int rv64_xtheadbb; -extern int rv64_xtheadbs; -extern int rv64_xtheadcondmov; -extern int rv64_xtheadmemidx; -extern int rv64_xtheadmempair; -extern int rv64_xtheadfmemidx; -extern int rv64_xtheadmac; -extern int rv64_xtheadfmv; - static const char gpr[32][9] = { "zero", "ra", @@ -189,7 +179,7 @@ const char* rv64_print(uint32_t opcode, uintptr_t addr) static char buff[200]; rv64_print_t a; - if (rv64_xtheadba || rv64_xtheadbb || rv64_xtheadbs || rv64_xtheadcondmov || rv64_xtheadmempair) { + if (cpuext.xtheadba || cpuext.xtheadbb || cpuext.xtheadbs || cpuext.xtheadcondmov || cpuext.xtheadmempair) { /**************** * Generated by https://github.com/ksco/riscv-opcodes/tree/box64_printer @@ -693,7 +683,7 @@ const char* rv64_print(uint32_t opcode, uintptr_t addr) } } - if (rv64_xtheadvector) { + if (cpuext.xtheadvector) { /* These are written by hand.... */ // rv_v, VSETVLI |