about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-10-30 16:38:15 +0100
committerptitSeb <sebastien.chev@gmail.com>2023-10-30 16:38:15 +0100
commitcb2a623faa78b02b93b402f665030f6b75cdf0a9 (patch)
tree6b376883dc43953b876c2fd8fa12fce94b29eb1e /src
parent7e80e7ba86c51487e71e95d52fb72f45c94030ba (diff)
downloadbox64-cb2a623faa78b02b93b402f665030f6b75cdf0a9.tar.gz
box64-cb2a623faa78b02b93b402f665030f6b75cdf0a9.zip
Added full SSE 4.2 support
Diffstat (limited to 'src')
-rw-r--r--src/emu/x64compstrings.c120
-rw-r--r--src/emu/x64compstrings.h13
-rw-r--r--src/emu/x64run660f.c60
-rw-r--r--src/emu/x64run670f.c1
-rw-r--r--src/emu/x64runf20f.c38
-rw-r--r--src/tools/my_cpuid.c3
-rw-r--r--src/wrapped/wrappedlibc.c2
7 files changed, 236 insertions, 1 deletions
diff --git a/src/emu/x64compstrings.c b/src/emu/x64compstrings.c
new file mode 100644
index 00000000..72781ebb
--- /dev/null
+++ b/src/emu/x64compstrings.c
@@ -0,0 +1,120 @@
+#include <stdint.h>
+
+#include "box64stack.h"
+#include "x64emu.h"
+#include "x64run_private.h"
+#include "x64emu_private.h"
+#include "x64compstrings.h"
+
+static int overrideIfDataInvalid(sse_regs_t* mem, int lmem, sse_regs_t* reg, int lreg, int j, int i, int imm8)
+{
+    int valid1 = (i<lreg);
+    int valid2 = (j<lmem);
+    if(!valid1 && !valid2)
+        switch((imm8>>2)&3) {
+            case 0b00:
+            case 0b01:  return 0;
+            case 0b10:
+            case 0b11:  return 1;
+        }
+    if(!valid1 && valid2)
+        switch((imm8>>2)&3) {
+            case 0b00:
+            case 0b01:
+            case 0b10:  return 0;
+            case 0b11:  return 1;
+        }
+    if(valid1 && !valid2)
+        return 0;
+    switch((imm8>>2)&3) {
+        case 0b01:  // range
+            switch (imm8&3) {
+                case 0b00:  // ub
+                    return (i&1)?((reg->ub[i]>=mem->ub[j])):((reg->ub[i]<=mem->ub[j]));
+                case 0b01:  // uw
+                    return (i&1)?((reg->uw[i]>=mem->uw[j])):((reg->uw[i]<=mem->uw[j]));
+                case 0b10:  // sb
+                    return (i&1)?((reg->sb[i]>=mem->sb[j])):((reg->sb[i]<=mem->sb[j]));
+                case 0b11:  // sw
+                    return (i&1)?((reg->sw[i]>=mem->sw[j])):((reg->sw[i]<=mem->sw[j]));
+            }
+            break;
+        default:    // the others
+            switch (imm8&1) {
+                case 0: // byte
+                    return (reg->ub[i] == mem->ub[j]);
+                case 1: // word
+                    return (reg->uw[i] == mem->uw[j]);
+            }
+    }
+}
+
+uint32_t sse42_compare_string_explicit_len(x64emu_t* emu, sse_regs_t* mem, int lmem, sse_regs_t* reg, int lreg, uint8_t imm8)
+{
+    // get number of packed byte/word
+    int n_packed = (imm8&1)?8:16;
+    if(lreg<0) lreg = -lreg;
+    if(lmem<0) lmem = -lmem;
+    if(lreg>n_packed) lreg = n_packed;
+    if(lmem>n_packed) lmem = n_packed;
+    // aggregate to intres1
+    uint32_t intres1 = 0;
+    switch((imm8>>2)&3) {
+        case 0b00:  //Equal any
+            for(int j=0; j<n_packed; ++j)
+                for(int i=0; i<n_packed; ++i) {
+                    intres1 |= overrideIfDataInvalid(mem, lmem, reg, lreg, j, i, imm8)<<j;
+                }
+            break;
+        case 0b01:  // Range
+            for(int j=0; j<n_packed; ++j)
+                for(int i=0; i<n_packed; i+=2) {
+                    intres1 |= (overrideIfDataInvalid(mem, lmem, reg, lreg, j, i, imm8) & overrideIfDataInvalid(mem, lmem, reg, lreg, j, i+1, imm8))<<j;
+                }
+            break;
+        case 0b10:  // Equal each
+            for(int i=0; i<n_packed; ++i) {
+                intres1 |= overrideIfDataInvalid(mem, lmem, reg, lreg, i, i, imm8)<<i;
+            }
+            break;
+        case 0b11:  // Equal ordered
+            intres1 = (1<<n_packed)-1;
+            for(int j=0; j<n_packed; ++j)
+                for(int i=0; i<n_packed-j; ++i) {
+                    int k = i+j;
+                    intres1 &= (((1<<n_packed)-1)^(1<<j)) | overrideIfDataInvalid(mem, lmem, reg, lreg, k, i, imm8)<<j;
+                }
+            break;
+    }
+    // build intres2
+    uint32_t intres2 = intres1;
+    switch((imm8>>4)&3) {
+        case 0b01: intres2 ^= ((1<<n_packed)-1);
+        case 0b11: intres2 ^= ((1<<lmem)-1);
+    }
+    // and now set the flags
+    RESET_FLAGS(emu);
+    CONDITIONAL_SET_FLAG(intres2, F_CF);
+    CONDITIONAL_SET_FLAG(lmem<n_packed, F_ZF);
+    CONDITIONAL_SET_FLAG(lreg<n_packed, F_SF);
+    CONDITIONAL_SET_FLAG(intres2&1, F_OF);
+    CLEAR_FLAG(F_AF);
+    CLEAR_FLAG(F_PF);
+    
+    return intres2;
+}
+
+uint32_t sse42_compare_string_implicit_len(x64emu_t* emu, sse_regs_t* mem, sse_regs_t* reg, uint8_t imm8)
+{
+    int lmem = 0;
+    int lreg = 0;
+    // get lmem and lreg
+    if(imm8&1) {
+        while(lmem<8 && mem->uw[lmem]) ++lmem;
+        while(lreg<8 && reg->uw[lreg]) ++lreg;
+    } else {
+        while(lmem<16 && mem->ub[lmem]) ++lmem;
+        while(lreg<16 && reg->ub[lreg]) ++lreg;
+    }
+    return sse42_compare_string_explicit_len(emu, mem, lmem, reg, lreg, imm8);
+}
\ No newline at end of file
diff --git a/src/emu/x64compstrings.h b/src/emu/x64compstrings.h
new file mode 100644
index 00000000..b2a785b2
--- /dev/null
+++ b/src/emu/x64compstrings.h
@@ -0,0 +1,13 @@
+#ifndef __X64_CMPSTRINGS_H__
+#define __X64_CMPSTRINGS_H__
+
+#include <stdint.h>
+
+#include "regs.h"
+
+typedef struct x64emu_s x64emu_t;
+
+uint32_t sse42_compare_string_explicit_len(x64emu_t* emu, sse_regs_t* a, int la, sse_regs_t* b, int lb, uint8_t imm8);
+uint32_t sse42_compare_string_implicit_len(x64emu_t* emu, sse_regs_t* a, sse_regs_t* b, uint8_t imm8);
+
+#endif //__X64_CMPSTRINGS_H__
\ No newline at end of file
diff --git a/src/emu/x64run660f.c b/src/emu/x64run660f.c
index 9b969c83..72180c0c 100644
--- a/src/emu/x64run660f.c
+++ b/src/emu/x64run660f.c
@@ -22,6 +22,7 @@
 #include "bridge.h"

 

 #include "modrm.h"

+#include "x64compstrings.h"

 

 static uint8_t ff_mult(uint8_t a, uint8_t b)

 {

@@ -1118,6 +1119,65 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
                 }

                 break;

 

+            case 0x60:  /* PCMPESTRM */

+                nextop = F8;

+                GETEX(1);

+                GETGX;

+                tmp8u = F8;

+                tmp32u = sse42_compare_string_explicit_len(emu, EX, R_EDX, GX, R_EAX, tmp8u);

+                if(tmp8u&0b1000000) {

+                    switch(tmp8u&1) {

+                        case 0: for(int i=0; i<16; ++i) GX->ub[i] = ((tmp32u>>i)&1)?0xff:0x00; break;

+                        case 1: for(int i=0; i<8; ++i) GX->uw[i] = ((tmp32u>>i)&1)?0xffff:0x0000; break;

+                    }

+                } else {

+                    GX->q[1] = GX->q[0] = 0;

+                    GX->uw[0] = tmp32u;

+                }

+                break;

+            case 0x61:  /* PCMPESTRI */

+                nextop = F8;

+                GETEX(1);

+                GETGX;

+                tmp8u = F8;

+                tmp32u = sse42_compare_string_explicit_len(emu, EX, R_EDX, GX, R_EAX, tmp8u);

+                if(!tmp32u)

+                    R_RCX = (tmp8u&1)?8:16;

+                else if(tmp8u&0b1000000)

+                    R_RCX = 31-__builtin_clz(tmp32u);

+                else

+                    R_RCX = __builtin_ffs(tmp32u) - 1;

+                break;

+            case 0x62:  /* PCMPESTRM */

+                nextop = F8;

+                GETEX(1);

+                GETGX;

+                tmp8u = F8;

+                tmp32u = sse42_compare_string_implicit_len(emu, EX, GX, tmp8u);

+                if(tmp8u&0b1000000) {

+                    switch(tmp8u&1) {

+                        case 0: for(int i=0; i<16; ++i) GX->ub[i] = ((tmp32u>>i)&1)?0xff:0x00; break;

+                        case 1: for(int i=0; i<8; ++i) GX->uw[i] = ((tmp32u>>i)&1)?0xffff:0x0000; break;

+                    }

+                } else {

+                    GX->q[1] = GX->q[0] = 0;

+                    GX->uw[0] = tmp32u;

+                }

+                break;

+            case 0x63:  /* PCMPISTRI */

+                nextop = F8;

+                GETEX(1);

+                GETGX;

+                tmp8u = F8;

+                tmp32u = sse42_compare_string_implicit_len(emu, EX, GX, tmp8u);

+                if(!tmp32u)

+                    R_RCX = (tmp8u&1)?8:16;

+                else if(tmp8u&0b1000000)

+                    R_RCX = 31-__builtin_clz(tmp32u);

+                else

+                    R_RCX = __builtin_ffs(tmp32u) - 1;

+                break;

+

             case 0xDF:      // AESKEYGENASSIST Gx, Ex, u8

                 nextop = F8;

                 GETEX(1);

diff --git a/src/emu/x64run670f.c b/src/emu/x64run670f.c
index 015b3712..2f94bd33 100644
--- a/src/emu/x64run670f.c
+++ b/src/emu/x64run670f.c
@@ -25,6 +25,7 @@
 #endif
 
 #include "modrm.h"
+#include "x64compstrings.h"
 
 #ifdef TEST_INTERPRETER
 uintptr_t Test670F(x64test_t *test, rex_t rex, int rep, uintptr_t addr)
diff --git a/src/emu/x64runf20f.c b/src/emu/x64runf20f.c
index d5ce598f..020a896a 100644
--- a/src/emu/x64runf20f.c
+++ b/src/emu/x64runf20f.c
@@ -156,6 +156,44 @@ uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
             GD->dword[1] = 0;

         }

         break;

+

+    case 0x38:  // more opcodes

+            opcode = F8;

+            switch(opcode) {

+

+                case 0xF0:  // CRC32 Gd, Eb

+                    nextop = F8;

+                    GETEB(0);

+                    GETGD;

+                    GD->dword[0] ^=  EB->byte[0];

+                    for (int i = 0; i < 8; i++) {

+                        if (GD->dword[0] & 1)

+                            GD->dword[0] = (GD->dword[0] >> 1) ^ 0x82f63b78;

+                        else

+                            GD->dword[0] = (GD->dword[0] >> 1);

+                    }

+                    GD->dword[1] = 0;

+                    break;

+                case 0xF1:  // CRC32 Gd, Ed

+                    nextop = F8;

+                    GETED(0);

+                    GETGD;

+                    for(int j=0; j<4*(rex.w+1); ++j) {

+                        GD->dword[0] ^=  ED->byte[j];

+                        for (int i = 0; i < 8; i++) {

+                            if (GD->dword[0] & 1)

+                                GD->dword[0] = (GD->dword[0] >> 1) ^ 0x82f63b78;

+                            else

+                                GD->dword[0] = (GD->dword[0] >> 1);

+                        }

+                    }

+                    GD->dword[1] = 0;

+                    break;

+

+                default:

+                    return 0;

+            }

+        break;

         

     case 0x51:  /* SQRTSD Gx, Ex */

         nextop = F8;

diff --git a/src/tools/my_cpuid.c b/src/tools/my_cpuid.c
index 6039f135..81399633 100644
--- a/src/tools/my_cpuid.c
+++ b/src/tools/my_cpuid.c
@@ -193,6 +193,7 @@ void my_cpuid(x64emu_t* emu, uint32_t tmp32u)
                 R_EAX |= cpu<<24;
             }*/
             R_EDX =   1         // fpu 
+                    | 1<<2      // debugging extension
                     | 1<<4      // rdtsc
                     | 1<<8      // cmpxchg8
                     | 1<<11     // sep (sysenter & sysexit)
@@ -202,6 +203,7 @@ void my_cpuid(x64emu_t* emu, uint32_t tmp32u)
                     | 1<<24     // fxsr (fxsave, fxrestore)
                     | 1<<25     // SSE
                     | 1<<26     // SSE2
+                    | 1<<28     // HT / Multi-core
                     ;
             R_ECX =   1<<0      // SSE3
                     | 1<<1      // PCLMULQDQ
@@ -209,6 +211,7 @@ void my_cpuid(x64emu_t* emu, uint32_t tmp32u)
                     | 1<<12     // fma
                     | 1<<13     // cx16 (cmpxchg16)
                     | 1<<19     // SSE4_1
+                    | 1<<20     // SSE4_2
                     | 1<<22     // MOVBE
                     | 1<<23     // POPCOUNT
                     | 1<<25     // aesni
diff --git a/src/wrapped/wrappedlibc.c b/src/wrapped/wrappedlibc.c
index fbbbef76..b9ed9c42 100644
--- a/src/wrapped/wrappedlibc.c
+++ b/src/wrapped/wrappedlibc.c
@@ -1562,7 +1562,7 @@ void CreateCPUInfoFile(int fd)
         P;
         sprintf(buff, "bogomips\t: %g\n", getBogoMips());
         P;
-        sprintf(buff, "flags\t\t: fpu cx8 sep ht cmov clflush mmx sse sse2 syscall tsc lahf_lm ssse3 ht tm lm fma fxsr cpuid pclmulqdq cx16 aes movbe pni sse4_1 lzcnt popcnt\n");
+        sprintf(buff, "flags\t\t: fpu cx8 sep ht cmov clflush mmx sse sse2 syscall tsc lahf_lm ssse3 ht tm lm fma fxsr cpuid pclmulqdq cx16 aes movbe pni sse4_1 sse4_2 lzcnt popcnt\n");
         P;
         sprintf(buff, "address sizes\t: 48 bits physical, 48 bits virtual\n");
         P;