about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <numbksco@gmail.com>2024-07-10 15:45:27 +0800
committerGitHub <noreply@github.com>2024-07-10 09:45:27 +0200
commit7fa2371bbde733b5219a9f6f18b7e3ef83837166 (patch)
treee22a00d76c5dc81015a27f3ad7fc826eea80fecc /src
parent60c41cadba8179e23d6f8d6cb5aa8c79172960e9 (diff)
downloadbox64-7fa2371bbde733b5219a9f6f18b7e3ef83837166.tar.gz
box64-7fa2371bbde733b5219a9f6f18b7e3ef83837166.zip
[LA64_DYNAREC] Added more opcodes and a minor fix too (#1662)
* [LA64_DYNAREC] Added more opcodes and a minor fix too

* saturation is considered necessary
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_660f.c47
-rw-r--r--src/dynarec/la64/dynarec_la64_emit_math.c2
-rw-r--r--src/dynarec/la64/la64_emitter.h49
3 files changed, 97 insertions, 1 deletions
diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c
index 92004615..32782814 100644
--- a/src/dynarec/la64/dynarec_la64_660f.c
+++ b/src/dynarec/la64/dynarec_la64_660f.c
@@ -555,6 +555,16 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETEX(q1, 0, 0);
             VILVH_W(q0, q1, q0);
             break;
+        case 0x6B:
+            INST_NAME("PACKSSDW Gx,Ex");
+            nextop = F8;
+            GETGX(v0, 1);
+            GETEX(v1, 0, 0);
+            d0 = fpu_get_scratch(dyn);
+            VOR_V(d0, v1, v1);
+            VSSRANI_H_W(d0, v0, 0);
+            VOR_V(v0, d0, d0);
+            break;
         case 0x6C:
             INST_NAME("PUNPCKLQDQ Gx,Ex");
             nextop = F8;
@@ -925,6 +935,13 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETEX(q0, 0, 0);
             VANDN_V(v0, v0, q0);
             break;
+        case 0xE0:
+            INST_NAME("PAVGB Gx, Ex");
+            nextop = F8;
+            GETGX(v0, 1);
+            GETEX(v1, 0, 0);
+            VAVGR_BU(v0, v0, v1);
+            break;
         case 0xE4:
             INST_NAME("PMULHUW Gx,Ex");
             nextop = F8;
@@ -979,6 +996,36 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETEX(v1, 0, 0);
             VMULWEV_D_WU(v0, v0, v1);
             break;
+        case 0xF6:
+            INST_NAME("PSADBW Gx, Ex");
+            nextop = F8;
+            GETGX(q0, 1);
+            GETEX(q1, 0, 0);
+            d0 = fpu_get_scratch(dyn);
+            d1 = fpu_get_scratch(dyn);
+            VABSD_BU(q0, q0, q1);
+
+            // 8bit -> 16bit merge
+            VPICKEV_B(d0, q0, q0);
+            VPICKOD_B(d1, q0, q0);
+            VEXTH_HU_BU(d0, d0);
+            VEXTH_HU_BU(d1, d1);
+            VADD_H(q0, d0, d1);
+
+            // 16bit to 32bit merge
+            VPICKEV_H(d0, q0, q0);
+            VPICKOD_H(d1, q0, q0);
+            VEXTH_WU_HU(d0, d0);
+            VEXTH_WU_HU(d1, d1);
+            VADD_W(q0, d0, d1);
+
+            // 32bit to 64bit merge
+            VPICKEV_W(d0, q0, q0);
+            VPICKOD_W(d1, q0, q0);
+            VEXTH_DU_WU(d0, d0);
+            VEXTH_DU_WU(d1, d1);
+            VADD_D(q0, d0, d1);
+            break;
         case 0xF8:
             INST_NAME("PSUBB Gx,Ex");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_emit_math.c b/src/dynarec/la64/dynarec_la64_emit_math.c
index f99c61ef..577ffc75 100644
--- a/src/dynarec/la64/dynarec_la64_emit_math.c
+++ b/src/dynarec/la64/dynarec_la64_emit_math.c
@@ -729,7 +729,7 @@ void emit_sbb8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
             X64_SBC_B(s1, s2);
         }
 
-        MV(s1, s3);
+        ANDI(s1, s3, 0xff);
         IFX (X_PEND)
             ST_B(s1, xEmu, offsetof(x64emu_t, res));
         return;
diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h
index 25d800fb..bbc500b9 100644
--- a/src/dynarec/la64/la64_emitter.h
+++ b/src/dynarec/la64/la64_emitter.h
@@ -181,6 +181,7 @@ f24-f31  fs0-fs7   Static registers                Callee
 #define type_2RI4(opc, imm4, rj, rd)     ((opc) << 14 | ((imm4)  & 0xF )  << 10 | (rj) << 5 | (rd))
 #define type_2RI5(opc, imm5, rj, rd)     ((opc) << 15 | ((imm5)  & 0x1F)  << 10 | (rj) << 5 | (rd))
 #define type_2RI6(opc, imm6, rj, rd)     ((opc) << 16 | ((imm6)  & 0x3F)  << 10 | (rj) << 5 | (rd))
+#define type_2RI7(opc, imm7, rj, rd)     ((opc) << 17 | ((imm7)  & 0x7F)  << 10 | (rj) << 5 | (rd))
 
 // tmp = GR[rj][31:0] + GR[rk][31:0]
 // Gr[rd] = SignExtend(tmp[31:0], GRLEN)
@@ -1211,6 +1212,14 @@ LSX instruction starts with V, LASX instruction starts with XV.
 #define VSRAN_B_H(vd, vj, vk)       EMIT(type_3R(0b01110000111101101, vk, vj, vd))
 #define VSRAN_H_W(vd, vj, vk)       EMIT(type_3R(0b01110000111101110, vk, vj, vd))
 #define VSRAN_W_D(vd, vj, vk)       EMIT(type_3R(0b01110000111101111, vk, vj, vd))
+#define VSRLNI_B_H(vd, vj, imm4)    EMIT(type_2RI4(0b011100110100000001, imm4, vj, vd))
+#define VSRLNI_H_W(vd, vj, imm5)    EMIT(type_2RI5(0b01110011010000001, imm5, vj, vd))
+#define VSRLNI_W_D(vd, vj, imm6)    EMIT(type_2RI6(0b0111001101000001, imm6, vj, vd))
+#define VSRLNI_D_Q(vd, vj, imm7)    EMIT(type_2RI7(0b011100110100001, imm7, vj, vd))
+#define VSRANI_B_H(vd, vj, imm4)    EMIT(type_2RI4(0b011100110101100001, imm4, vj, vd))
+#define VSRANI_H_W(vd, vj, imm5)    EMIT(type_2RI5(0b01110011010110001, imm5, vj, vd))
+#define VSRANI_W_D(vd, vj, imm6)    EMIT(type_2RI6(0b0111001101011001, imm6, vj, vd))
+#define VSRANI_D_Q(vd, vj, imm7)    EMIT(type_2RI7(0b011100110101101, imm7, vj, vd))
 #define VSRLRN_B_H(vd, vj, vk)      EMIT(type_3R(0b01110000111110001, vk, vj, vd))
 #define VSRLRN_H_W(vd, vj, vk)      EMIT(type_3R(0b01110000111110010, vk, vj, vd))
 #define VSRLRN_W_D(vd, vj, vk)      EMIT(type_3R(0b01110000111110011, vk, vj, vd))
@@ -1229,6 +1238,22 @@ LSX instruction starts with V, LASX instruction starts with XV.
 #define VSSRAN_BU_H(vd, vj, vk)     EMIT(type_3R(0b01110001000001101, vk, vj, vd))
 #define VSSRAN_HU_W(vd, vj, vk)     EMIT(type_3R(0b01110001000001110, vk, vj, vd))
 #define VSSRAN_WU_D(vd, vj, vk)     EMIT(type_3R(0b01110001000001111, vk, vj, vd))
+#define VSSRLNI_B_H(vd, vj, imm4)   EMIT(type_2RI4(0b011100110100100001, imm4, vj, vd))
+#define VSSRLNI_H_W(vd, vj, imm5)   EMIT(type_2RI5(0b01110011010010001, imm5, vj, vd))
+#define VSSRLNI_W_D(vd, vj, imm6)   EMIT(type_2RI6(0b0111001101001001, imm6, vj, vd))
+#define VSSRLNI_D_Q(vd, vj, imm7)   EMIT(type_2RI7(0b011100110100101, imm7, vj, vd))
+#define VSSRANI_B_H(vd, vj, imm4)   EMIT(type_2RI4(0b011100110110000001, imm4, vj, vd))
+#define VSSRANI_H_W(vd, vj, imm5)   EMIT(type_2RI5(0b01110011011000001, imm5, vj, vd))
+#define VSSRANI_W_D(vd, vj, imm6)   EMIT(type_2RI6(0b0111001101100001, imm6, vj, vd))
+#define VSSRANI_D_Q(vd, vj, imm7)   EMIT(type_2RI7(0b011100110110001, imm7, vj, vd))
+#define VSSRLNI_BU_H(vd, vj, imm4)  EMIT(type_2RI4(0b011100110100110001, imm4, vj, vd))
+#define VSSRLNI_HU_W(vd, vj, imm5)  EMIT(type_2RI5(0b01110011010011001, imm5, vj, vd))
+#define VSSRLNI_WU_D(vd, vj, imm6)  EMIT(type_2RI6(0b0111001101001101, imm6, vj, vd))
+#define VSSRLNI_DU_Q(vd, vj, imm7)  EMIT(type_2RI7(0b011100110100111, imm7, vj, vd))
+#define VSSRANI_BU_H(vd, vj, imm4)  EMIT(type_2RI4(0b011100110110010001, imm4, vj, vd))
+#define VSSRANI_HU_W(vd, vj, imm5)  EMIT(type_2RI5(0b01110011011001001, imm5, vj, vd))
+#define VSSRANI_WU_D(vd, vj, imm6)  EMIT(type_2RI6(0b0111001101100101, imm6, vj, vd))
+#define VSSRANI_DU_Q(vd, vj, imm7)  EMIT(type_2RI7(0b011100110110011, imm7, vj, vd))
 #define VSSRLRN_B_H(vd, vj, vk)     EMIT(type_3R(0b01110001000000001, vk, vj, vd))
 #define VSSRLRN_H_W(vd, vj, vk)     EMIT(type_3R(0b01110001000000010, vk, vj, vd))
 #define VSSRLRN_W_D(vd, vj, vk)     EMIT(type_3R(0b01110001000000011, vk, vj, vd))
@@ -1241,6 +1266,22 @@ LSX instruction starts with V, LASX instruction starts with XV.
 #define VSSRARN_BU_H(vd, vj, vk)    EMIT(type_3R(0b01110001000010101, vk, vj, vd))
 #define VSSRARN_HU_W(vd, vj, vk)    EMIT(type_3R(0b01110001000010110, vk, vj, vd))
 #define VSSRARN_WU_D(vd, vj, vk)    EMIT(type_3R(0b01110001000010111, vk, vj, vd))
+#define VSSRLRNI_B_H(vd, vj, imm4)  EMIT(type_2RI4(0b011100110101000001, imm4, vj, vd))
+#define VSSRLRNI_H_W(vd, vj, imm5)  EMIT(type_2RI5(0b01110011010100001, imm5, vj, vd))
+#define VSSRLRNI_W_D(vd, vj, imm6)  EMIT(type_2RI6(0b0111001101010001, imm6, vj, vd))
+#define VSSRLRNI_D_Q(vd, vj, imm7)  EMIT(type_2RI7(0b011100110101001, imm7, vj, vd))
+#define VSSRARNI_B_H(vd, vj, imm4)  EMIT(type_2RI4(0b011100110110100001, imm4, vj, vd))
+#define VSSRARNI_H_W(vd, vj, imm5)  EMIT(type_2RI5(0b01110011011010001, imm5, vj, vd))
+#define VSSRARNI_W_D(vd, vj, imm6)  EMIT(type_2RI6(0b0111001101101001, imm6, vj, vd))
+#define VSSRARNI_D_Q(vd, vj, imm7)  EMIT(type_2RI7(0b011100110110101, imm7, vj, vd))
+#define VSSRLRNI_BU_H(vd, vj, imm4) EMIT(type_2RI4(0b011100110101010001, imm4, vj, vd))
+#define VSSRLRNI_HU_W(vd, vj, imm5) EMIT(type_2RI5(0b01110011010101001, imm5, vj, vd))
+#define VSSRLRNI_WU_D(vd, vj, imm6) EMIT(type_2RI6(0b0111001101010101, imm6, vj, vd))
+#define VSSRLRNI_DU_Q(vd, vj, imm7) EMIT(type_2RI7(0b011100110101011, imm7, vj, vd))
+#define VSSRARNI_BU_H(vd, vj, imm4) EMIT(type_2RI4(0b011100110110110001, imm4, vj, vd))
+#define VSSRARNI_HU_W(vd, vj, imm5) EMIT(type_2RI5(0b01110011011011001, imm5, vj, vd))
+#define VSSRARNI_WU_D(vd, vj, imm6) EMIT(type_2RI6(0b0111001101101101, imm6, vj, vd))
+#define VSSRARNI_DU_Q(vd, vj, imm7) EMIT(type_2RI7(0b011100110110111, imm7, vj, vd))
 #define VBITCLR_B(vd, vj, vk)       EMIT(type_3R(0b01110001000011000, vk, vj, vd))
 #define VBITCLR_H(vd, vj, vk)       EMIT(type_3R(0b01110001000011001, vk, vj, vd))
 #define VBITCLR_W(vd, vj, vk)       EMIT(type_3R(0b01110001000011010, vk, vj, vd))
@@ -1601,6 +1642,14 @@ LSX instruction starts with V, LASX instruction starts with XV.
 #define VSAT_HU(vd, vj, imm4)        EMIT(type_2RI4(0b011100110010100001, imm4, vj, vd))
 #define VSAT_WU(vd, vj, imm5)        EMIT(type_2RI5(0b01110011001010001, imm5, vj, vd))
 #define VSAT_DU(vd, vj, imm6)        EMIT(type_2RI6(0b0111001100101001, imm6, vj, vd))
+#define VEXTH_H_B(vd, vj)            EMIT(type_2R(0b0111001010011110111000, vj, vd))
+#define VEXTH_W_H(vd, vj)            EMIT(type_2R(0b0111001010011110111001, vj, vd))
+#define VEXTH_D_W(vd, vj)            EMIT(type_2R(0b0111001010011110111010, vj, vd))
+#define VEXTH_Q_D(vd, vj)            EMIT(type_2R(0b0111001010011110111011, vj, vd))
+#define VEXTH_HU_BU(vd, vj)          EMIT(type_2R(0b0111001010011110111100, vj, vd))
+#define VEXTH_WU_HU(vd, vj)          EMIT(type_2R(0b0111001010011110111101, vj, vd))
+#define VEXTH_DU_WU(vd, vj)          EMIT(type_2R(0b0111001010011110111110, vj, vd))
+#define VEXTH_QU_DU(vd, vj)          EMIT(type_2R(0b0111001010011110111111, vj, vd))
 #define XVSIGNCOV_B(vd, vj, vk)      EMIT(type_3R(0b01110101001011100, vk, vj, vd))
 #define XVSIGNCOV_H(vd, vj, vk)      EMIT(type_3R(0b01110101001011101, vk, vj, vd))
 #define XVSIGNCOV_W(vd, vj, vk)      EMIT(type_3R(0b01110101001011110, vk, vj, vd))