about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-05-28 19:55:15 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-05-28 19:55:15 +0200
commit503eb41939a915454aa5ca95626560ece19aab30 (patch)
tree550e6dfd037930eb969ab94e1e65296c00f9dfe5 /src
parenta77db3c6a5d35140fbfb9ad717b4b36ba30f2972 (diff)
downloadbox64-503eb41939a915454aa5ca95626560ece19aab30.tar.gz
box64-503eb41939a915454aa5ca95626560ece19aab30.zip
[INTERPRETER] more and more avx/avx2 opcodes
Diffstat (limited to 'src')
-rw-r--r--src/emu/x64runavx0f.c16
-rw-r--r--src/emu/x64runavx660f.c144
-rw-r--r--src/emu/x64runavx660f38.c84
-rw-r--r--src/emu/x64runavx660f3a.c55
-rw-r--r--src/emu/x64runavxf20f.c19
-rw-r--r--src/emu/x64runavxf30f.c14
6 files changed, 321 insertions, 11 deletions
diff --git a/src/emu/x64runavx0f.c b/src/emu/x64runavx0f.c
index 1644836a..8283cb72 100644
--- a/src/emu/x64runavx0f.c
+++ b/src/emu/x64runavx0f.c
@@ -293,7 +293,21 @@ uintptr_t RunAVX_0F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
                 GY->u128 = 0;
             }
             break;
-
+        case 0x56:                      /* VORPS Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GX->u128 = VX->u128 | EX->u128;
+            GETGY;
+            if(vex.l) {
+                GETEY;
+                GETVY;
+                GY->u128 = VY->u128 | EY->u128;
+            } else {
+                GY->u128 = 0;
+            }
+            break;
         case 0x57:                      /* XORPS Gx, Vx, Ex */
             nextop = F8;
             GETEX(0);
diff --git a/src/emu/x64runavx660f.c b/src/emu/x64runavx660f.c
index 1539fed6..6ae59163 100644
--- a/src/emu/x64runavx660f.c
+++ b/src/emu/x64runavx660f.c
@@ -214,6 +214,21 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
                 GY->u128 = 0;
             }
             break;
+        case 0x56:  /* VORPD Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GX->u128 = VX->u128 | EX->u128;
+            GETGY;
+            if(vex.l) {
+                GETEY;
+                GETVY;
+                GY->u128 = VY->u128 | EY->u128;
+            } else {
+                GY->u128 = 0;
+            }
+            break;
 
         case 0x58:  /* VADDPD Gx, Vx, Ex */
             nextop = F8;
@@ -232,7 +247,36 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
                 GY->u128 = 0;
             }
             break;
-
+        case 0x59:  /* MULPD Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            for(int i=0; i<2; ++i) {
+                #ifndef NOALIGN
+                    // mul generate a -NAN only if doing (+/-)inf * (+/-)0
+                    if((isinf(VX->d[i]) && EX->d[i]==0.0) || (isinf(EX->d[i]) && VX->d[i]==0.0))
+                        GX->d[i] = -NAN;
+                    else
+                #endif
+                GX->d[i] = VX->d[i] * EX->d[i];
+            }
+            if(vex.l) {
+                GETEY;
+                GETVY;
+                for(int i=0; i<2; ++i) {
+                    #ifndef NOALIGN
+                        // mul generate a -NAN only if doing (+/-)inf * (+/-)0
+                        if((isinf(VY->d[i]) && EY->d[i]==0.0) || (isinf(EY->d[i]) && VY->d[i]==0.0))
+                            GY->d[i] = -NAN;
+                        else
+                    #endif
+                    GY->d[i] = VY->d[i] * EY->d[i];
+                }
+            } else
+                GY->u128 = 0;
+            break;
         case 0x5A:      /* VCVTPD2PS Gx, Ex */
             nextop = F8;
             GETEX(0);
@@ -392,6 +436,46 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
                 GY->u128 = 0;
             break;
 
+        case 0x63:  /* VPACKSSWB Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            GETEY;
+            GETVY;
+            if(GX==EX) {
+                eax1 = *EX;
+                EX = &eax1;
+            }
+            if(VX==EX) {
+                for(int i=0; i<8; ++i)
+                    GX->sb[i] = (EX->sw[i]<-128)?-128:((EX->sw[i]>127)?127:EX->sw[i]);
+                GX->q[1] = GX->q[0];
+            } else {
+                for(int i=0; i<8; ++i)
+                    GX->sb[i] = (VX->sw[i]<-128)?-128:((VX->sw[i]>127)?127:VX->sw[i]);
+                for(int i=0; i<8; ++i)
+                    GX->sb[8+i] = (EX->sw[i]<-128)?-128:((EX->sw[i]>127)?127:EX->sw[i]);
+            }
+            if(vex.l) {
+                if(GY==EY) {
+                    eay1 = *EY;
+                    EY = &eay1;
+                }
+                if(VY==EY) {
+                    for(int i=0; i<8; ++i)
+                        GY->sb[i] = (EY->sw[i]<-128)?-128:((EY->sw[i]>127)?127:EY->sw[i]);
+                    GY->q[1] = GY->q[0];
+                } else {
+                    for(int i=0; i<8; ++i)
+                        GY->sb[i] = (VY->sw[i]<-128)?-128:((VY->sw[i]>127)?127:VY->sw[i]);
+                    for(int i=0; i<8; ++i)
+                        GY->sb[8+i] = (EY->sw[i]<-128)?-128:((EY->sw[i]>127)?127:EY->sw[i]);
+                }
+            } else
+                GY->u128 = 0;
+            break;
         case 0x64:  /* VPCMPGTB Gx,Vx, Ex */
             nextop = F8;
             GETEX(0);
@@ -440,26 +524,74 @@ uintptr_t RunAVX_660F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             } else
                 GY->q[0] = GY->q[1] = 0;
             break;
+        case 0x67:  /* VPACKUSWB Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            GETEY;
+            GETVY;
+            if(GX==EX) {
+                eax1 = *EX;
+                EX = &eax1;
+            }
+            if(VX==EX) {
+                for(int i=0; i<8; ++i)
+                    GX->ub[i] = (EX->sw[i]<0)?0:((EX->sw[i]>0xff)?0xff:EX->sw[i]);
+                GX->q[1] = GX->q[0];
+            } else {
+                for(int i=0; i<8; ++i)
+                    GX->ub[i] = (VX->sw[i]<0)?0:((VX->sw[i]>0xff)?0xff:VX->sw[i]);
+                for(int i=0; i<8; ++i)
+                    GX->ub[8+i] = (EX->sw[i]<0)?0:((EX->sw[i]>0xff)?0xff:EX->sw[i]);
+            }
+            if(vex.l) {
+                if(GY==EY) {
+                    eay1 = *EY;
+                    EY = &eay1;
+                }
+                if(VY==EY) {
+                    for(int i=0; i<8; ++i)
+                        GY->ub[i] = (EY->sw[i]<0)?0:((EY->sw[i]>0xff)?0xff:EY->sw[i]);
+                    GY->q[1] = GY->q[0];
+                } else {
+                    for(int i=0; i<8; ++i)
+                        GY->ub[i] = (VY->sw[i]<0)?0:((VY->sw[i]>0xff)?0xff:VY->sw[i]);
+                    for(int i=0; i<8; ++i)
+                        GY->ub[8+i] = (EY->sw[i]<0)?0:((EY->sw[i]>0xff)?0xff:EY->sw[i]);
+                }
+            } else
+                GY->u128 = 0;
+            break;
 
-        case 0x6B:  /* VPACKSSDW Gx,Vx, Ex */
+        case 0x6B:  /* VPACKSSDW Gx, Vx, Ex */
             nextop = F8;
             GETEX(0);
             GETGX;
             GETVX;
             GETGY;
+            GETEY;
+            GETVY;
+            if(GX==EX) {
+                eax1 = *EX;
+                EX = &eax1;
+            }
             for(int i=0; i<4; ++i)
                 GX->sw[i] = (VX->sd[i]<-32768)?-32768:((VX->sd[i]>32767)?32767:VX->sd[i]);
-            if(GX==EX)
+            if(VX==EX)
                 GX->q[1] = GX->q[0];
             else
                 for(int i=0; i<4; ++i)
                     GX->sw[4+i] = (EX->sd[i]<-32768)?-32768:((EX->sd[i]>32767)?32767:EX->sd[i]);
             if(vex.l) {
-                GETEY;
-                GETVY;
+                if(GY==EY) {
+                    eay1 = *EY;
+                    EY = &eay1;
+                }
                 for(int i=0; i<4; ++i)
                     GY->sw[i] = (VY->sd[i]<-32768)?-32768:((VY->sd[i]>32767)?32767:VY->sd[i]);
-                if(GY==EY)
+                if(VY==EY)
                     GY->q[1] = GY->q[0];
                 else
                     for(int i=0; i<4; ++i)
diff --git a/src/emu/x64runavx660f38.c b/src/emu/x64runavx660f38.c
index 6f46c659..c2ef2965 100644
--- a/src/emu/x64runavx660f38.c
+++ b/src/emu/x64runavx660f38.c
@@ -191,6 +191,55 @@ uintptr_t RunAVX_660F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             GY->u128 = EX->u128;
             break;
 
+        case 0x1C:  /* PABSB Gx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETGY;
+            for (int i=0; i<16; ++i) {
+                GX->ub[i] = abs(EX->sb[i]);
+            }
+            if(vex.l) {
+                GETEY;
+                for (int i=0; i<16; ++i) {
+                    GY->ub[i] = abs(EY->sb[i]);
+                }
+            } else
+                GY->u128 = 0;
+            break;
+        case 0x1D:  /* PABSW Gx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETGY;
+            for (int i=0; i<8; ++i) {
+                GX->uw[i] = abs(EX->sw[i]);
+            }
+            if(vex.l) {
+                GETEY;
+                for (int i=0; i<8; ++i) {
+                    GY->uw[i] = abs(EY->sw[i]);
+                }
+            } else
+                GY->u128 = 0;
+            break;
+        case 0x1E:  /* PABSD Gx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETGY;
+            for (int i=0; i<4; ++i) {
+                GX->ud[i] = abs(EX->sd[i]);
+            }
+            if(vex.l) {
+                GETEY;
+                for (int i=0; i<4; ++i) {
+                    GY->ud[i] = abs(EY->sd[i]);
+                }
+            } else
+                GY->u128 = 0;
+            break;
+
         case 0x2A:  /* VMOVNTDQA Gx, Ex */
             nextop = F8;
             GETEX(0);
@@ -205,7 +254,40 @@ uintptr_t RunAVX_660F38(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             } else
                 GY->u128 = 0;
             break;
-
+        case 0x2B:  /* VPACKUSDW Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            GETEY;
+            GETVY;
+            if(GX==EX) {
+                eax1 = *EX;
+                EX = &eax1;
+            }
+            for(int i=0; i<4; ++i)
+                GX->uw[i] = (VX->sd[i]<0)?0:((VX->sd[i]>65535)?65535:VX->sd[i]);
+            if(VX==EX)
+                GX->q[1] = GX->q[0];
+            else
+                for(int i=0; i<4; ++i)
+                    GX->uw[i+4] = (EX->sd[i]<0)?0:((EX->sd[i]>65535)?65535:EX->sd[i]);
+            if(vex.l) {
+                if(GY==EY) {
+                    eay1 = *EY;
+                    EY = &eay1;
+                }
+                for(int i=0; i<4; ++i)
+                    GY->uw[i] = (VY->sd[i]<0)?0:((VY->sd[i]>65535)?65535:VY->sd[i]);
+                if(VY==EY)
+                    GY->q[1] = GY->q[0];
+                else
+                    for(int i=0; i<4; ++i)
+                        GY->uw[i+4] = (EY->sd[i]<0)?0:((EY->sd[i]>65535)?65535:EY->sd[i]);
+            } else
+                GY->u128 = 0;
+            break;
         case 0x2C:  /*VMASKMOVPS Gx, Vx, Ex */
             nextop = F8;
             GETEX(0);
diff --git a/src/emu/x64runavx660f3a.c b/src/emu/x64runavx660f3a.c
index 63125974..9016afa2 100644
--- a/src/emu/x64runavx660f3a.c
+++ b/src/emu/x64runavx660f3a.c
@@ -309,7 +309,60 @@ uintptr_t RunAVX_660F3A(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             } else
                 GY->u128 = 0;
             break;
-
+        case 0x42:  /* VMPSADBW Gx, Vx, Ex, Ib */
+            nextop = F8;
+            GETEX(1);
+            GETGX; GETVX; GETGY; GETVY; GETEY;
+            if(GX==EX) {
+                eax1 = *EX;
+                EX=&eax1;
+            }
+            if(GX==VX) {
+                eay1 = *VX;
+                VX=&eay1;
+            }
+            tmp8u = F8;
+            {
+                int src = tmp8u&3;
+                int dst = (tmp8u>>2)&1;
+                int b[11];
+                for (int i=0; i<11; ++i)
+                    b[i] = VX->ub[dst*4+i];
+                for(int i=0; i<8; ++i) {
+                    int tmp = abs(b[i+0]-EX->ub[src*4+0]);
+                    tmp += abs(b[i+1]-EX->ub[src*4+1]);
+                    tmp += abs(b[i+2]-EX->ub[src*4+2]);
+                    tmp += abs(b[i+3]-EX->ub[src*4+3]);
+                    GX->uw[i] = tmp;
+                }
+            }
+            if(vex.l) {
+                if(GY==EY) {
+                    eax1 = *EY;
+                    EY=&eax1;
+                }
+                if(GY==VY) {
+                    eay1 = *VY;
+                    VY=&eay1;
+                }
+                {
+                    int src = (tmp8u>>3)&3;
+                    int dst = (tmp8u>>5)&1;
+                    int b[11];
+                    for (int i=0; i<11; ++i)
+                        b[i] = VY->ub[dst*4+i];
+                    for(int i=0; i<8; ++i) {
+                        int tmp = abs(b[i+0]-EY->ub[src*4+0]);
+                        tmp += abs(b[i+1]-EY->ub[src*4+1]);
+                        tmp += abs(b[i+2]-EY->ub[src*4+2]);
+                        tmp += abs(b[i+3]-EY->ub[src*4+3]);
+                        GY->uw[i] = tmp;
+                    }
+                }
+            } else
+                GY->u128 = 0;
+            break;
+                
         case 0x44:    /* VPCLMULQDQ Gx, Vx, Ex, imm8 */
             nextop = F8;
             GETGX;
diff --git a/src/emu/x64runavxf20f.c b/src/emu/x64runavxf20f.c
index a3006a7d..6208f9f9 100644
--- a/src/emu/x64runavxf20f.c
+++ b/src/emu/x64runavxf20f.c
@@ -195,7 +195,24 @@ uintptr_t RunAVX_F20F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             }
             GY->u128 = 0;
             break;
-
+        case 0x59:  /* VMULSD Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            #ifndef NOALIGN
+                // mul generate a -NAN only if doing (+/-)inf * (+/-)0
+                if((isinf(GX->d[0]) && EX->d[0]==0.0) || (isinf(EX->d[0]) && GX->d[0]==0.0))
+                    GX->d[0] = -NAN;
+                else
+            #endif
+            GX->d[0] = VX->d[0] * EX->d[0];
+            if(GX!=VX) {
+                GX->q[1] = VX->q[1];
+            }
+            GY->u128 = 0;
+            break;
         case 0x5A:  /* VCVTSD2SS Gx, Vx, Ex */
             nextop = F8;
             GETEX(0);
diff --git a/src/emu/x64runavxf30f.c b/src/emu/x64runavxf30f.c
index 8aa1506e..c8fd8b69 100644
--- a/src/emu/x64runavxf30f.c
+++ b/src/emu/x64runavxf30f.c
@@ -222,7 +222,19 @@ uintptr_t RunAVX_F30F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             }
             GY->q[0] = GY->q[1] = 0;
             break;
-
+        case 0x59:  /* VMULSS Gx, Vx, Ex */
+            nextop = F8;
+            GETEX(0);
+            GETGX;
+            GETVX;
+            GETGY;
+            GX->f[0] = VX->f[0] * EX->f[0];
+            if(GX!=VX) {
+                GX->ud[1] = VX->ud[1];
+                GX->q[1] = VX->q[1];
+            }
+            GY->q[0] = GY->q[1] = 0;
+            break;
         case 0x5A:  /* VCVTSS2SD Gx, Vx, Ex */
             nextop = F8;
             GETEX(0);