about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2021-03-30 11:31:04 +0200
committerptitSeb <sebastien.chev@gmail.com>2021-03-30 11:31:04 +0200
commit0d59028eb2525cffcb05eb7bf72510aa712168f5 (patch)
treed181ae1c680e9bdebde676870d5af8962b98b9a0 /src
parent7ae39b38a024ac9040b5329b87dc9e3e49ccbb93 (diff)
downloadbox64-0d59028eb2525cffcb05eb7bf72510aa712168f5.tar.gz
box64-0d59028eb2525cffcb05eb7bf72510aa712168f5.zip
[DYNAREC] Optimized a bit 66 0F 70 opcode
Diffstat (limited to 'src')
-rwxr-xr-xsrc/dynarec/arm64_emitter.h8
-rwxr-xr-xsrc/dynarec/dynarec_arm64_660f.c34
2 files changed, 24 insertions, 18 deletions
diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h
index 252b2e35..3767d4e4 100755
--- a/src/dynarec/arm64_emitter.h
+++ b/src/dynarec/arm64_emitter.h
@@ -770,10 +770,10 @@
 
 // VMOV
 #define VMOV_element(imm5, imm4, Rn, Rd)    (1<<30 | 1<<29 | 0b01110000<<21 | (imm5)<<16 | (imm4)<<11 | 1<<10 | (Rn)<<5 | (Rd))
-#define VMOVeB(Vd, i1, Vn, i2)              EMIT(VMOV_element(((i1)<<1) | 1, i2, Vn, Vd))
-#define VMOVeH(Vd, i1, Vn, i2)              EMIT(VMOV_element(((i1)<<2) | 2, i2<<1, Vn, Vd))
-#define VMOVeS(Vd, i1, Vn, i2)              EMIT(VMOV_element(((i1)<<3) | 4, i2<<2, Vn, Vd))
-#define VMOVeD(Vd, i1, Vn, i2)              EMIT(VMOV_element(((i1)<<4) | 8, i2<<3, Vn, Vd))
+#define VMOVeB(Vd, i1, Vn, i2)              EMIT(VMOV_element(((i1)<<1) | 1, (i2), Vn, Vd))
+#define VMOVeH(Vd, i1, Vn, i2)              EMIT(VMOV_element(((i1)<<2) | 2, (i2)<<1, Vn, Vd))
+#define VMOVeS(Vd, i1, Vn, i2)              EMIT(VMOV_element(((i1)<<3) | 4, (i2)<<2, Vn, Vd))
+#define VMOVeD(Vd, i1, Vn, i2)              EMIT(VMOV_element(((i1)<<4) | 8, (i2)<<3, Vn, Vd))
 
 #define VMOV_from(imm5, Rn, Rd)     (1<<30 | 0<<29 | 0b01110000<<21 | (imm5)<<16 | 0b0011<<11 | 1<<10 | (Rn)<<5 | (Rd))
 #define VMOVQBfrom(Vd, index, Wn)    EMIT(VMOV_from(((index)<<1) | 1, Wn, Vd))
diff --git a/src/dynarec/dynarec_arm64_660f.c b/src/dynarec/dynarec_arm64_660f.c
index 6fce1a6f..2a5d071a 100755
--- a/src/dynarec/dynarec_arm64_660f.c
+++ b/src/dynarec/dynarec_arm64_660f.c
@@ -50,6 +50,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
     uint8_t gd, ed;

     uint8_t wback, wb1;

     uint8_t eb1, eb2;

+    uint64_t tmp64u;

     int v0, v1;

     int q0, q1;

     int d0;

@@ -443,27 +444,32 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                         VMOVeD(v0, 0, v1, 1);

                         VMOVeD(v0, 1, v1, 0);

                     }

+                } else if(u8==0x00) {

+                    // dumplicate lower 32bits to all spot

+                    if(v0!=v1) {

+                        VMOVeS(v0, 0, v1, 0);

+                    }

+                    VMOVeS(v0, 1, v1, 0);

+                    VMOVeD(v0, 1, v0, 0);

+                } else if(v0!=v1) {

+                    VMOVeS(v0, 0, v1, (u8>>(0*2))&3);

+                    VMOVeS(v0, 1, v1, (u8>>(1*2))&3);

+                    VMOVeS(v0, 2, v1, (u8>>(2*2))&3);

+                    VMOVeS(v0, 3, v1, (u8>>(3*2))&3);

                 } else {

-                    uint32_t swp[4] = {

+                    uint64_t swp[4] = {

                         (0)|(1<<8)|(2<<16)|(3<<24),

                         (4)|(5<<8)|(6<<16)|(7<<24),

                         (8)|(9<<8)|(10<<16)|(11<<24),

                         (12)|(13<<8)|(14<<16)|(15<<24)

                     };

                     d0 = fpu_get_scratch(dyn);

-                    if(v0==v1) {

-                        q1 = fpu_get_scratch(dyn);

-                        VMOVQ(q1, v1);

-                    } else

-                        q1 = v1;

-                    MOV32w(x2, swp[(u8>>(0*2))&3]);

-                    MOV32w(x3, swp[(u8>>(1*2))&3]);

-                    VMOVQSfrom(d0, 0, x2);

-                    VMOVQSfrom(d0, 1, x3);

-                    MOV32w(x2, swp[(u8>>(2*2))&3]);

-                    MOV32w(x3, swp[(u8>>(3*2))&3]);

-                    VMOVQSfrom(d0, 2, x2);

-                    VMOVQSfrom(d0, 3, x3);

+                    tmp64u = swp[(u8>>(0*2))&3] | (swp[(u8>>(1*2))&3]<<32);

+                    MOV64x(x2, tmp64u);

+                    VMOVQDfrom(d0, 0, x2);

+                    tmp64u = swp[(u8>>(2*2))&3] | (swp[(u8>>(3*2))&3]<<32);

+                    MOV64x(x3, tmp64u);

+                    VMOVQDfrom(d0, 1, x3);

                     VTBLQ1_8(v0, v1, d0);

                 }

             } else {