about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2025-08-22 19:11:51 +0800
committerGitHub <noreply@github.com>2025-08-22 13:11:51 +0200
commit031d3def2d7d1c0f84e8b2575f8c9e2aee47bbfa (patch)
tree7030ab927152bca2ff0a3e739e86f209577d45de /src
parent7c32cb24a05bff89b4b9ed4461ecb9e34fdf3d1d (diff)
downloadbox64-031d3def2d7d1c0f84e8b2575f8c9e2aee47bbfa.tar.gz
box64-031d3def2d7d1c0f84e8b2575f8c9e2aee47bbfa.zip
[RV64_DYNAREC] Added scalar SSE 66 0F 3A 60/61/62 opcodes (#2963)
* [RV64_DYNAREC] Added scalar SSE 66 0F 3A 60/61/62 opcodes

* [CI] Bump timeout
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f38.c122
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f_vector.c2
2 files changed, 123 insertions, 1 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_660f38.c b/src/dynarec/rv64/dynarec_rv64_660f38.c
index aca87f82..6ba960e7 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f38.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f38.c
@@ -1171,6 +1171,128 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode,
                     MOV32w(x4, u8);
                     CALL4(const_native_pclmul, -1, x1, x2, x3, x4);
                     break;
+                case 0x60:
+                    INST_NAME("PCMPESTRM Gx, Ex, Ib");
+                    SETFLAGS(X_ALL, SF_SET_DF, NAT_FLAGS_NOFUSION);
+                    nextop = F8;
+                    GETG;
+                    sse_forget_reg(dyn, ninst, x6, gd);
+                    ADDI(x3, xEmu, offsetof(x64emu_t, xmm[gd]));
+                    if (MODREG) {
+                        ed = (nextop & 7) + (rex.b << 3);
+                        sse_reflect_reg(dyn, ninst, x6, ed);
+                        ADDI(x1, xEmu, offsetof(x64emu_t, xmm[ed]));
+                        ed = x1;
+                    } else {
+                        addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 1);
+                    }
+                    MV(x2, xRDX);
+                    MV(x4, xRAX);
+                    u8 = F8;
+                    ADDI(x5, xZR, u8);
+                    CALL6(const_sse42_compare_string_explicit_len, x1, ed, x2, x3, x4, x5, 0);
+                    if (u8 & 0b1000000) {
+                        switch (u8 & 1) {
+                            case 0b00:
+                                for (int i = 0; i < 16; ++i) {
+                                    SRLI(x3, x1, i);
+                                    ANDI(x3, x3, 1);
+                                    NEG(x3, x3);
+                                    SB(x3, xEmu, offsetof(x64emu_t, xmm[0]) + i);
+                                }
+                                break;
+                            case 0b01:
+                                for (int i = 0; i < 8; ++i) {
+                                    SRLI(x3, x1, i);
+                                    ANDI(x3, x3, 1);
+                                    NEG(x3, x3);
+                                    SH(x3, xEmu, offsetof(x64emu_t, xmm[0]) + i * 2);
+                                }
+                                break;
+                        }
+                    } else {
+                        SW(x1, xEmu, offsetof(x64emu_t, xmm[0]));
+                        SW(xZR, xEmu, offsetof(x64emu_t, xmm[0]) + 4);
+                        SD(xZR, xEmu, offsetof(x64emu_t, xmm[0]) + 8);
+                    }
+                    break;
+                case 0x61:
+                    INST_NAME("PCMPESTRI Gx, Ex, Ib");
+                    nextop = F8;
+                    GETG;
+                    u8 = geted_ib(dyn, addr, ninst, nextop);
+                    SETFLAGS(X_ALL, SF_SET_DF, NAT_FLAGS_NOFUSION);
+                    sse_reflect_reg(dyn, ninst, x6, gd);
+                    ADDI(x3, xEmu, offsetof(x64emu_t, xmm[gd]));
+                    if (MODREG) {
+                        ed = (nextop & 7) + (rex.b << 3);
+                        sse_reflect_reg(dyn, ninst, x6, ed);
+                        ADDI(x1, xEmu, offsetof(x64emu_t, xmm[ed]));
+                        ed = x1;
+                    } else {
+                        addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 1);
+                    }
+                    ZEXTW2(x2, xRDX);
+                    ZEXTW2(x4, xRAX);
+                    u8 = F8;
+                    ADDI(x5, xZR, u8);
+                    CALL6(const_sse42_compare_string_explicit_len, x1, ed, x2, x3, x4, x5, 0);
+                    ZEROUP(x1);
+                    BNEZ_MARK(x1);
+                    MOV32w(xRCX, (u8 & 1) ? 8 : 16);
+                    B_NEXT_nocond;
+                    MARK;
+                    if (u8 & 0b1000000) {
+                        CLZxw(xRCX, x1, 0, x2, x3, x4);
+                        ADDI(x2, xZR, 31);
+                        SUB(xRCX, x2, xRCX);
+                    } else {
+                        CTZxw(xRCX, x1, 0, x2, x3);
+                    }
+                    break;
+                case 0x62:
+                    INST_NAME("PCMPISTRM Gx, Ex, Ib");
+                    SETFLAGS(X_ALL, SF_SET_DF, NAT_FLAGS_NOFUSION);
+                    nextop = F8;
+                    GETG;
+                    sse_forget_reg(dyn, ninst, x6, gd);
+                    ADDI(x2, xEmu, offsetof(x64emu_t, xmm[gd]));
+                    if (MODREG) {
+                        ed = (nextop & 7) + (rex.b << 3);
+                        sse_reflect_reg(dyn, ninst, x6, ed);
+                        ADDI(x1, xEmu, offsetof(x64emu_t, xmm[ed]));
+                        ed = x1;
+                    } else {
+                        addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 1);
+                    }
+                    u8 = F8;
+                    ADDI(x3, xZR, u8);
+                    CALL4(const_sse42_compare_string_implicit_len, x1, ed, x2, x3, 0);
+                    if (u8 & 0b1000000) {
+                        switch (u8 & 1) {
+                            case 0b00:
+                                for (int i = 0; i < 16; ++i) {
+                                    SRLI(x3, x1, i);
+                                    ANDI(x3, x3, 1);
+                                    NEG(x3, x3);
+                                    SB(x3, xEmu, offsetof(x64emu_t, xmm[0]) + i);
+                                }
+                                break;
+                            case 0b01:
+                                for (int i = 0; i < 8; ++i) {
+                                    SRLI(x3, x1, i);
+                                    ANDI(x3, x3, 1);
+                                    NEG(x3, x3);
+                                    SH(x3, xEmu, offsetof(x64emu_t, xmm[0]) + i * 2);
+                                }
+                                break;
+                        }
+                    } else {
+                        SW(x1, xEmu, offsetof(x64emu_t, xmm[0]));
+                        SW(xZR, xEmu, offsetof(x64emu_t, xmm[0]) + 4);
+                        SD(xZR, xEmu, offsetof(x64emu_t, xmm[0]) + 8);
+                    }
+                    break;
                 case 0x63:
                     INST_NAME("PCMPISTRI Gx, Ex, Ib");
                     SETFLAGS(X_ALL, SF_SET_DF, NAT_FLAGS_NOFUSION);
diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c
index d4d2a5b3..3b6f0b54 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c
@@ -1066,7 +1066,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
                     VMERGE_VVM(q0, v1, d0);
                     break;
                 case 0x44:
-                case 0x63:
+                case 0x60 ... 0x63:
                 case 0xDF: return 0;
                 default: DEFAULT_VECTOR;
             }