about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-10-11 22:26:24 +0200
committerptitSeb <sebastien.chev@gmail.com>2023-10-11 22:26:24 +0200
commitb77303931ee7c7fc2d5ac432d615d9a36830f3f4 (patch)
tree3405545cba16d7fb3ef7b817682ea1cf1cf94984 /src
parent2a3923a173b54ca5aac3002230930863e6ccdbe0 (diff)
downloadbox64-b77303931ee7c7fc2d5ac432d615d9a36830f3f4.tar.gz
box64-b77303931ee7c7fc2d5ac432d615d9a36830f3f4.zip
[ARM64_DYNAREC] Small improvement to certain cases for 66 0F 70 opcode
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/arm64_emitter.h9
-rw-r--r--src/dynarec/arm64/dynarec_arm64_660f.c9
2 files changed, 18 insertions, 0 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h
index 16cca694..094adbe0 100644
--- a/src/dynarec/arm64/arm64_emitter.h
+++ b/src/dynarec/arm64/arm64_emitter.h
@@ -951,6 +951,15 @@
 #define VSRI_16(Vd, Vn, shift)              EMIT(VSRI_vector(0, 0b0010 | ((shift)>>3)&1, (shift)&7, Vn, Vd))
 #define VSRI_32(Vd, Vn, shift)              EMIT(VSRI_vector(0, 0b0100 | (((shift)>>3)&3), (shift)&7, Vn, Vd))
 
+// Reverse elements in 64-bit doublewords (vector). This instruction reverses the order of 8-bit, 16-bit, or 32-bit elements in each doubleword
+#define VREVx_vector(Q, U, size, o0, Rn, Rd)    ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 0b10000<<17 | (o0)<<12 | 0b10<<10 | (Rn)<<5 | (Rd))
+#define VREV64_32(Vd, Vn)                   EMIT(VREVx_vector(0, 0, 0b10, 0, Vn, Vd))
+#define VREV64_16(Vd, Vn)                   EMIT(VREVx_vector(0, 0, 0b01, 0, Vn, Vd))
+#define VREV64_8(Vd, Vn)                    EMIT(VREVx_vector(0, 0, 0b00, 0, Vn, Vd))
+#define VREV64Q_32(Vd, Vn)                  EMIT(VREVx_vector(1, 0, 0b10, 0, Vn, Vd))
+#define VREV64Q_16(Vd, Vn)                  EMIT(VREVx_vector(1, 0, 0b01, 0, Vn, Vd))
+#define VREV64Q_8(Vd, Vn)                   EMIT(VREVx_vector(1, 0, 0b00, 0, Vn, Vd))
+
 // Integer MATH
 #define ADDSUB_vector(Q, U, size, Rm, Rn, Rd)   ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b10000<<11 | 1<<10 | (Rn)<<5 | (Rd))
 #define VADDQ_8(Vd, Vn, Vm)                 EMIT(ADDSUB_vector(1, 0, 0b00, Vm, Vn, Vd))
diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c
index 6dc2f3c0..2d51440b 100644
--- a/src/dynarec/arm64/dynarec_arm64_660f.c
+++ b/src/dynarec/arm64/dynarec_arm64_660f.c
@@ -1615,6 +1615,15 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 } else if(u8==0xFF) {

                     // duplicate slot 3 to all spot

                     VDUPQ_32(v0, v1, 3);

+                } else if(u8==0x44) {

+                    // duplicate slot 0/1 to all spot

+                    VDUPQ_64(v0, v1, 0);

+                } else if(u8==0xEE) {

+                    // duplicate slot 2/3 to all spot

+                    VDUPQ_64(v0, v1, 1);

+                } else if(u8==0xB1) {

+                    // invert 0/1 and 2/3

+                    VREV64Q_32(v0, v1);

                 } else if(v0!=v1) {

                     VMOVeS(v0, 0, v1, (u8>>(0*2))&3);

                     VMOVeS(v0, 1, v1, (u8>>(1*2))&3);