about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c44
-rw-r--r--src/dynarec/dynarec_native_functions.c17
2 files changed, 24 insertions, 37 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c
index 549ec78d..7e1d0579 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c
@@ -660,7 +660,7 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
             break;
 
         case 0x44:
-            INST_NAME("PCLMULQDQ Gx, Vx, Ex, Ib");
+            INST_NAME("VPCLMULQDQ Gx, Vx, Ex, Ib");
             nextop = F8;
             if(arm64_pmull) {
                 d0 = fpu_get_scratch(dyn, ninst);
@@ -689,34 +689,24 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
                     }
                 }
             } else {
-                for(int l=0; l<1+vex.l; ++l) {
-                    if(!l) {
-                        GETG;
-                        sse_forget_reg(dyn, ninst, gd);
-                        sse_reflect_reg(dyn, ninst, vex.v);
-                    }
-                    MOV32w(x1, gd); // gx
-                    MOV32w(x2, vex.v); // vx
-                    if(MODREG) {
-                        if(!l) {
-                            ed = (nextop&7)+(rex.b<<3);
-                            sse_forget_reg(dyn, ninst, ed);
-                        }
-                        MOV32w(x3, ed);
-                    } else {
-                        if(!l) {
-                            addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 1);
-                            if(ed!=x3) {
-                                MOVx_REG(x3, ed);
-                            }
-                        } else {
-                            ADDx_U12(x3, ed, 16);
-                        }
+                GETG;
+                sse_forget_reg(dyn, ninst, gd);
+                sse_reflect_reg(dyn, ninst, vex.v);
+                MOV32w(x1, gd); // gx
+                MOV32w(x2, vex.v); // vx
+                if(MODREG) {
+                    ed = (nextop&7)+(rex.b<<3);
+                    sse_forget_reg(dyn, ninst, ed);
+                    MOV32w(x3, ed);
+                } else {
+                    addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 1);
+                    if(ed!=x3) {
+                        MOVx_REG(x3, ed);
                     }
-                    if(!l) u8 = F8;
-                    MOV32w(x4, u8);
-                    CALL_(l?native_pclmul_y:native_pclmul_x, -1, x3);
                 }
+                u8 = F8;
+                MOV32w(x4, u8);
+                CALL_(vex.l?native_pclmul_y:native_pclmul_x, -1, x3);
             }
             if(!vex.l) YMM0(gd);
             break;
diff --git a/src/dynarec/dynarec_native_functions.c b/src/dynarec/dynarec_native_functions.c
index 2691e2cc..a97e22c4 100644
--- a/src/dynarec/dynarec_native_functions.c
+++ b/src/dynarec/dynarec_native_functions.c
@@ -485,14 +485,12 @@ void native_pclmul(x64emu_t* emu, int gx, int ex, void* p, uint32_t u8)
     for (int i=0; i<64; ++i)
         if(GX->q[g]&(1LL<<i))
             result ^= (op2<<i);
-
-    GX->q[0] = result&0xffffffffffffffffLL;
-    GX->q[1] = (result>>64)&0xffffffffffffffffLL;
+    GX->u128 = result;
 }
 void native_pclmul_x(x64emu_t* emu, int gx, int vx, void* p, uint32_t u8)
 {
 
-    sse_regs_t *EX = ((uintptr_t)p<16)?((sse_regs_t*)p):&emu->xmm[(uintptr_t)p];
+    sse_regs_t *EX = ((uintptr_t)p>15)?((sse_regs_t*)p):&emu->xmm[(uintptr_t)p];
     sse_regs_t *GX = &emu->xmm[gx];
     sse_regs_t *VX = &emu->xmm[vx];
     int g = (u8&1)?1:0;
@@ -503,13 +501,13 @@ void native_pclmul_x(x64emu_t* emu, int gx, int vx, void* p, uint32_t u8)
         if(VX->q[g]&(1LL<<i))
             result ^= (op2<<i);
 
-    GX->q[0] = result&0xffffffffffffffffLL;
-    GX->q[1] = (result>>64)&0xffffffffffffffffLL;
+    GX->u128 = result;
 }
 void native_pclmul_y(x64emu_t* emu, int gy, int vy, void* p, uint32_t u8)
 {
-
-    sse_regs_t *EY = ((uintptr_t)p<16)?((sse_regs_t*)p):&emu->ymm[(uintptr_t)p];
+    //compute both low and high values
+    native_pclmul_x(emu, gy, vy, p, u8);
+    sse_regs_t *EY = ((uintptr_t)p>15)?((sse_regs_t*)(p+16)):&emu->ymm[(uintptr_t)p];
     sse_regs_t *GY = &emu->ymm[gy];
     sse_regs_t *VY = &emu->ymm[vy];
     int g = (u8&1)?1:0;
@@ -520,8 +518,7 @@ void native_pclmul_y(x64emu_t* emu, int gy, int vy, void* p, uint32_t u8)
         if(VY->q[g]&(1LL<<i))
             result ^= (op2<<i);
 
-    GY->q[0] = result&0xffffffffffffffffLL;
-    GY->q[1] = (result>>64)&0xffffffffffffffffLL;
+    GY->u128 = result;
 }
 
 void native_clflush(x64emu_t* emu, void* p)