diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2021-03-30 11:31:04 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2021-03-30 11:31:04 +0200 |
| commit | 0d59028eb2525cffcb05eb7bf72510aa712168f5 (patch) | |
| tree | d181ae1c680e9bdebde676870d5af8962b98b9a0 /src | |
| parent | 7ae39b38a024ac9040b5329b87dc9e3e49ccbb93 (diff) | |
| download | box64-0d59028eb2525cffcb05eb7bf72510aa712168f5.tar.gz box64-0d59028eb2525cffcb05eb7bf72510aa712168f5.zip | |
[DYNAREC] Optimized a bit 66 0F 70 opcode
Diffstat (limited to 'src')
| -rwxr-xr-x | src/dynarec/arm64_emitter.h | 8 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_660f.c | 34 |
2 files changed, 24 insertions, 18 deletions
diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h index 252b2e35..3767d4e4 100755 --- a/src/dynarec/arm64_emitter.h +++ b/src/dynarec/arm64_emitter.h @@ -770,10 +770,10 @@ // VMOV #define VMOV_element(imm5, imm4, Rn, Rd) (1<<30 | 1<<29 | 0b01110000<<21 | (imm5)<<16 | (imm4)<<11 | 1<<10 | (Rn)<<5 | (Rd)) -#define VMOVeB(Vd, i1, Vn, i2) EMIT(VMOV_element(((i1)<<1) | 1, i2, Vn, Vd)) -#define VMOVeH(Vd, i1, Vn, i2) EMIT(VMOV_element(((i1)<<2) | 2, i2<<1, Vn, Vd)) -#define VMOVeS(Vd, i1, Vn, i2) EMIT(VMOV_element(((i1)<<3) | 4, i2<<2, Vn, Vd)) -#define VMOVeD(Vd, i1, Vn, i2) EMIT(VMOV_element(((i1)<<4) | 8, i2<<3, Vn, Vd)) +#define VMOVeB(Vd, i1, Vn, i2) EMIT(VMOV_element(((i1)<<1) | 1, (i2), Vn, Vd)) +#define VMOVeH(Vd, i1, Vn, i2) EMIT(VMOV_element(((i1)<<2) | 2, (i2)<<1, Vn, Vd)) +#define VMOVeS(Vd, i1, Vn, i2) EMIT(VMOV_element(((i1)<<3) | 4, (i2)<<2, Vn, Vd)) +#define VMOVeD(Vd, i1, Vn, i2) EMIT(VMOV_element(((i1)<<4) | 8, (i2)<<3, Vn, Vd)) #define VMOV_from(imm5, Rn, Rd) (1<<30 | 0<<29 | 0b01110000<<21 | (imm5)<<16 | 0b0011<<11 | 1<<10 | (Rn)<<5 | (Rd)) #define VMOVQBfrom(Vd, index, Wn) EMIT(VMOV_from(((index)<<1) | 1, Wn, Vd)) diff --git a/src/dynarec/dynarec_arm64_660f.c b/src/dynarec/dynarec_arm64_660f.c index 6fce1a6f..2a5d071a 100755 --- a/src/dynarec/dynarec_arm64_660f.c +++ b/src/dynarec/dynarec_arm64_660f.c @@ -50,6 +50,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n uint8_t gd, ed; uint8_t wback, wb1; uint8_t eb1, eb2; + uint64_t tmp64u; int v0, v1; int q0, q1; int d0; @@ -443,27 +444,32 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n VMOVeD(v0, 0, v1, 1); VMOVeD(v0, 1, v1, 0); } + } else if(u8==0x00) { + // dumplicate lower 32bits to all spot + if(v0!=v1) { + VMOVeS(v0, 0, v1, 0); + } + VMOVeS(v0, 1, v1, 0); + VMOVeD(v0, 1, v0, 0); + } else if(v0!=v1) { + VMOVeS(v0, 0, v1, (u8>>(0*2))&3); + VMOVeS(v0, 1, v1, (u8>>(1*2))&3); + VMOVeS(v0, 2, v1, (u8>>(2*2))&3); + VMOVeS(v0, 3, v1, (u8>>(3*2))&3); } else { - uint32_t swp[4] = { + uint64_t swp[4] = { (0)|(1<<8)|(2<<16)|(3<<24), (4)|(5<<8)|(6<<16)|(7<<24), (8)|(9<<8)|(10<<16)|(11<<24), (12)|(13<<8)|(14<<16)|(15<<24) }; d0 = fpu_get_scratch(dyn); - if(v0==v1) { - q1 = fpu_get_scratch(dyn); - VMOVQ(q1, v1); - } else - q1 = v1; - MOV32w(x2, swp[(u8>>(0*2))&3]); - MOV32w(x3, swp[(u8>>(1*2))&3]); - VMOVQSfrom(d0, 0, x2); - VMOVQSfrom(d0, 1, x3); - MOV32w(x2, swp[(u8>>(2*2))&3]); - MOV32w(x3, swp[(u8>>(3*2))&3]); - VMOVQSfrom(d0, 2, x2); - VMOVQSfrom(d0, 3, x3); + tmp64u = swp[(u8>>(0*2))&3] | (swp[(u8>>(1*2))&3]<<32); + MOV64x(x2, tmp64u); + VMOVQDfrom(d0, 0, x2); + tmp64u = swp[(u8>>(2*2))&3] | (swp[(u8>>(3*2))&3]<<32); + MOV64x(x3, tmp64u); + VMOVQDfrom(d0, 1, x3); VTBLQ1_8(v0, v1, d0); } } else { |