diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_0f.c | 25 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f.c | 22 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f3a.c | 87 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_f2_0f.c | 11 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_f3_0f.c | 11 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_emitter.h | 2 |
6 files changed, 157 insertions, 1 deletions
diff --git a/src/dynarec/la64/dynarec_la64_avx_0f.c b/src/dynarec/la64/dynarec_la64_avx_0f.c index 8520045d..ce0bbefc 100644 --- a/src/dynarec/la64/dynarec_la64_avx_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_0f.c @@ -383,6 +383,31 @@ uintptr_t dynarec64_AVX_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, in VBITSEL_Vxy(v0, v0, d1, d0); } break; + case 0x5A: + INST_NAME("VCVTPS2PD Gx, Ex"); + nextop = F8; + if(vex.l){ + GETEYx(v1, 0 ,0); + } else { + GETEYSD(v1, 0, 0); + } + GETGYxy_empty(v0); + if(!BOX64ENV(dynarec_fastround)) { + u8 = sse_setround(dyn, ninst, x6, x4); + } + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + if(vex.l) { + XVFCVTH_D_S(d0, v1); + XVFCVTL_D_S(v0, v1); + XVPERMI_Q(v0, d0, XVPERMI_IMM_4_0(0, 2)); + } else { + VFCVTL_D_S(v0, v1); + } + if(!BOX64ENV(dynarec_fastround)) { + x87_restoreround(dyn, ninst, u8); + } + break; case 0x5C: INST_NAME("VSUBPS Gx, Vx, Ex"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f.c b/src/dynarec/la64/dynarec_la64_avx_66_0f.c index 63b2469e..6f86ee02 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f.c @@ -356,6 +356,28 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, VBITSEL_Vxy(v0, v0, d1, d0); } break; + case 0x5A: + INST_NAME("CVTPD2PS Gx, Ex"); + nextop = F8; + GETEYxy(v1, 0, 0); + GETGYx_empty(v0); + if(!BOX64ENV(dynarec_fastround)) { + u8 = sse_setround(dyn, ninst, x6, x4); + } + d0 = fpu_get_scratch(dyn); + if(vex.l) { + XVXOR_V(d0, d0, d0); + XVFCVT_S_D(v0, d0, v1); + XVPERMI_D(v0, v0, 0b11011000); + } else { + VFCVT_S_D(d0, v1, v1); + XVPICKVE_D(v0, d0, 0); + YMM_UNMARK_UPPER_ZERO(v0); + } + if(!BOX64ENV(dynarec_fastround)) { + x87_restoreround(dyn, ninst, u8); + } + break; case 0x5C: INST_NAME("VSUBPD Gx, Vx, Ex"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c b/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c index beba561f..3493c3b9 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c @@ -53,7 +53,14 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i MAYUSE(s0); MAYUSE(j64); MAYUSE(cacheupd); - +#if STEP > 1 + static const int8_t round_round[] = { + 0x3, // round to nearest with ties to even + 0x0, // round toward minus infinity + 0x1, // round toward plus infinity + 0x2 // round toward zero + }; +#endif rex_t rex = vex.rex; switch (opcode) { @@ -166,6 +173,84 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i XVPERMI_Q(v0, d0, XVPERMI_IMM_4_0(0, 2 + index_lo)); } break; + case 0x08: + INST_NAME("VROUNDPS Gx, Ex, Ib"); + nextop = F8; + GETGY_empty_EY_xy(v0, v1, 1); + u8 = F8; + if (u8 & 4) { + u8 = sse_setround(dyn, ninst, x1, x2); + if(vex.l) { + XVFRINT_S(v0, v1); + } else { + VFRINT_S(v0, v1); + } + x87_restoreround(dyn, ninst, u8); + } else { + if(vex.l) { + XVFRINTRRD_S(v0, v1, round_round[u8 & 3]); + } else { + VFRINTRRD_S(v0, v1, round_round[u8 & 3]); + } + } + break; + case 0x09: + INST_NAME("VROUNDPD Gx, Ex, Ib"); + nextop = F8; + GETGY_empty_EY_xy(v0, v1, 1); + u8 = F8; + if (u8 & 4) { + u8 = sse_setround(dyn, ninst, x1, x2); + if(vex.l) { + XVFRINT_D(v0, v1); + } else { + VFRINT_D(v0, v1); + } + x87_restoreround(dyn, ninst, u8); + } else { + if(vex.l) { + XVFRINTRRD_D(v0, v1, round_round[u8 & 3]); + } else { + VFRINTRRD_D(v0, v1, round_round[u8 & 3]); + } + } + break; + case 0x0A: + INST_NAME("VROUNDSS Gx, Vx, Ex, Ib"); + nextop = F8; + GETEYSS(v2, 0, 1); + GETVYx(v1, 0); + GETGYx_empty(v0); + u8 = F8; + d0 = fpu_get_scratch(dyn); + if (u8 & 4) { + u8 = sse_setround(dyn, ninst, x1, x2); + VFRINT_S(d0, v2); + x87_restoreround(dyn, ninst, u8); + } else { + VFRINTRRD_S(d0, v2, round_round[u8 & 3]); + } + if(v0 != v1) VOR_V(v0, v1, v1); + VEXTRINS_W(v0, d0, 0); + break; + case 0x0B: + INST_NAME("VROUNDSD Gx, Vx, Ex, Ib"); + nextop = F8; + GETEYSD(v2, 0, 1); + GETVYx(v1, 0); + GETGYx_empty(v0); + u8 = F8; + d0 = fpu_get_scratch(dyn); + if (u8 & 4) { + u8 = sse_setround(dyn, ninst, x1, x2); + VFRINT_D(d0, v2); + x87_restoreround(dyn, ninst, u8); + } else { + VFRINTRRD_D(d0, v2, round_round[u8 & 3]); + } + if(v0 != v1) VOR_V(v0, v1, v1); + VEXTRINS_D(v0, d0, 0); + break; case 0x0D: INST_NAME("VBLENDPD Gx, Vx, Ex, Ib"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_avx_f2_0f.c b/src/dynarec/la64/dynarec_la64_avx_f2_0f.c index afe0b086..0a713483 100644 --- a/src/dynarec/la64/dynarec_la64_avx_f2_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_f2_0f.c @@ -172,6 +172,17 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, if (v0 != v1) VOR_V(v0, v1, v1); VEXTRINS_D(v0, d0, 0); break; + case 0x5A: + INST_NAME("VCVTSD2SS Gx, Vx, Ex"); + nextop = F8; + GETEYSD(v2, 0, 0); + GETVYx(v1, 0); + GETGYx_empty(v0); + d0 = fpu_get_scratch(dyn); + FCVT_S_D(d0, v2); + if(v0 != v1) VOR_V(v0, v1, v1); + XVINSVE0_W(v0, d0, 0); + break; case 0x5C: INST_NAME("VSUBSD Gx, Vx, Ex"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_avx_f3_0f.c b/src/dynarec/la64/dynarec_la64_avx_f3_0f.c index ebe56672..c635035f 100644 --- a/src/dynarec/la64/dynarec_la64_avx_f3_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_f3_0f.c @@ -186,6 +186,17 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, if (v0 != v1) VOR_V(v0, v1, v1); VEXTRINS_W(v0, d0, 0); break; + case 0x5A: + INST_NAME("VCVTSS2SD Gx, Vx, Ex"); + nextop = F8; + GETVYx(v1, 0); + GETEYSS(v2, 0, 0); + GETGYx_empty(v0); + d1 = fpu_get_scratch(dyn); + FCVT_D_S(d1, v2); + if(v0 != v1) VOR_V(v0, v1, v1); + VEXTRINS_D(v0, d1, 0); + break; case 0x5C: INST_NAME("VSUBSS Gx, Vx, Ex"); nextop = F8; diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index 804a28dd..7552a1f9 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -1800,6 +1800,8 @@ LSX instruction starts with V, LASX instruction starts with XV. #define XVFRINTRZ_D(xd, xj) EMIT(type_2R(0b0111011010011101011010, xj, xd)) #define XVFRINTRNE_S(xd, xj) EMIT(type_2R(0b0111011010011101011101, xj, xd)) #define XVFRINTRNE_D(xd, xj) EMIT(type_2R(0b0111011010011101011110, xj, xd)) +#define XVFRINTRRD_S(vd, vj, rm) EMIT(type_2RI4(0b011101101001110101, ((rm & 0b11) << 2) | 0b01, vj, vd)) +#define XVFRINTRRD_D(vd, vj, rm) EMIT(type_2RI4(0b011101101001110101, ((rm & 0b11) << 2) | 0b10, vj, vd)) #define XVFCVTL_S_H(xd, xj) EMIT(type_2R(0b0111011010011101111010, xj, xd)) #define XVFCVTH_S_H(xd, xj) EMIT(type_2R(0b0111011010011101111011, xj, xd)) #define XVFCVTL_D_S(xd, xj) EMIT(type_2R(0b0111011010011101111100, xj, xd)) |