diff options
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c | 44 | ||||
| -rw-r--r-- | src/dynarec/dynarec_native_functions.c | 17 |
2 files changed, 24 insertions, 37 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c index 549ec78d..7e1d0579 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c @@ -660,7 +660,7 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip break; case 0x44: - INST_NAME("PCLMULQDQ Gx, Vx, Ex, Ib"); + INST_NAME("VPCLMULQDQ Gx, Vx, Ex, Ib"); nextop = F8; if(arm64_pmull) { d0 = fpu_get_scratch(dyn, ninst); @@ -689,34 +689,24 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip } } } else { - for(int l=0; l<1+vex.l; ++l) { - if(!l) { - GETG; - sse_forget_reg(dyn, ninst, gd); - sse_reflect_reg(dyn, ninst, vex.v); - } - MOV32w(x1, gd); // gx - MOV32w(x2, vex.v); // vx - if(MODREG) { - if(!l) { - ed = (nextop&7)+(rex.b<<3); - sse_forget_reg(dyn, ninst, ed); - } - MOV32w(x3, ed); - } else { - if(!l) { - addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 1); - if(ed!=x3) { - MOVx_REG(x3, ed); - } - } else { - ADDx_U12(x3, ed, 16); - } + GETG; + sse_forget_reg(dyn, ninst, gd); + sse_reflect_reg(dyn, ninst, vex.v); + MOV32w(x1, gd); // gx + MOV32w(x2, vex.v); // vx + if(MODREG) { + ed = (nextop&7)+(rex.b<<3); + sse_forget_reg(dyn, ninst, ed); + MOV32w(x3, ed); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 1); + if(ed!=x3) { + MOVx_REG(x3, ed); } - if(!l) u8 = F8; - MOV32w(x4, u8); - CALL_(l?native_pclmul_y:native_pclmul_x, -1, x3); } + u8 = F8; + MOV32w(x4, u8); + CALL_(vex.l?native_pclmul_y:native_pclmul_x, -1, x3); } if(!vex.l) YMM0(gd); break; diff --git a/src/dynarec/dynarec_native_functions.c b/src/dynarec/dynarec_native_functions.c index 2691e2cc..a97e22c4 100644 --- a/src/dynarec/dynarec_native_functions.c +++ b/src/dynarec/dynarec_native_functions.c @@ -485,14 +485,12 @@ void native_pclmul(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8) for (int i=0; i<64; ++i) if(GX->q[g]&(1LL<<i)) result ^= (op2<<i); - - GX->q[0] = result&0xffffffffffffffffLL; - GX->q[1] = (result>>64)&0xffffffffffffffffLL; + GX->u128 = result; } void native_pclmul_x(x64emu_t* emu, int gx, int vx, void* p, uint32_t u8) { - sse_regs_t *EX = ((uintptr_t)p<16)?((sse_regs_t*)p):&emu->xmm[(uintptr_t)p]; + sse_regs_t *EX = ((uintptr_t)p>15)?((sse_regs_t*)p):&emu->xmm[(uintptr_t)p]; sse_regs_t *GX = &emu->xmm[gx]; sse_regs_t *VX = &emu->xmm[vx]; int g = (u8&1)?1:0; @@ -503,13 +501,13 @@ void native_pclmul_x(x64emu_t* emu, int gx, int vx, void* p, uint32_t u8) if(VX->q[g]&(1LL<<i)) result ^= (op2<<i); - GX->q[0] = result&0xffffffffffffffffLL; - GX->q[1] = (result>>64)&0xffffffffffffffffLL; + GX->u128 = result; } void native_pclmul_y(x64emu_t* emu, int gy, int vy, void* p, uint32_t u8) { - - sse_regs_t *EY = ((uintptr_t)p<16)?((sse_regs_t*)p):&emu->ymm[(uintptr_t)p]; + //compute both low and high values + native_pclmul_x(emu, gy, vy, p, u8); + sse_regs_t *EY = ((uintptr_t)p>15)?((sse_regs_t*)(p+16)):&emu->ymm[(uintptr_t)p]; sse_regs_t *GY = &emu->ymm[gy]; sse_regs_t *VY = &emu->ymm[vy]; int g = (u8&1)?1:0; @@ -520,8 +518,7 @@ void native_pclmul_y(x64emu_t* emu, int gy, int vy, void* p, uint32_t u8) if(VY->q[g]&(1LL<<i)) result ^= (op2<<i); - GY->q[0] = result&0xffffffffffffffffLL; - GY->q[1] = (result>>64)&0xffffffffffffffffLL; + GY->u128 = result; } void native_clflush(x64emu_t* emu, void* p) |