diff options
Diffstat (limited to 'src')
| -rwxr-xr-x | src/dynarec/arm64_emitter.h | 8 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_f20f.c | 24 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_f30f.c | 87 |
3 files changed, 89 insertions, 30 deletions
diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h index 8078169a..025f381f 100755 --- a/src/dynarec/arm64_emitter.h +++ b/src/dynarec/arm64_emitter.h @@ -517,10 +517,14 @@ // VLDR #define VMEM_gen(size, opc, imm12, Rn, Rt) ((size)<<30 | 0b111<<27 | 1<<26 | 0b01<<24 | (opc)<<22 | (imm12)<<10 | (Rn)<<5 | (Rt)) +// imm14 must be 3-aligned +#define VLDR32_U12(Dt, Rn, imm14) EMIT(VMEM_gen(0b10, 0b01, ((uint32_t)((imm14)>>2))&0xfff, Rn, Dt)) // imm15 must be 3-aligned #define VLDR64_U12(Dt, Rn, imm15) EMIT(VMEM_gen(0b11, 0b01, ((uint32_t)((imm15)>>3))&0xfff, Rn, Dt)) // imm16 must be 4-aligned #define VLDR128_U12(Qt, Rn, imm16) EMIT(VMEM_gen(0b00, 0b11, ((uint32_t)((imm16)>>4))&0xfff, Rn, Qt)) +// (imm14) must be 3-aligned +#define VSTR32_U12(Dt, Rn, imm14) EMIT(VMEM_gen(0b10, 0b00, ((uint32_t)(imm14>>2))&0xfff, Rn, Dt)) // (imm15) must be 3-aligned #define VSTR64_U12(Dt, Rn, imm15) EMIT(VMEM_gen(0b11, 0b00, ((uint32_t)(imm15>>3))&0xfff, Rn, Dt)) // imm16 must be 4-aligned @@ -538,11 +542,15 @@ #define VMEM_REG_gen(size, opc, Rm, option, S, Rn, Rt) ((size)<<30 | 0b111<<27 | 1<<26 | (opc)<<22 | 1<<21 | (Rm)<<16 | (option)<<13 | (S)<<12 | 0b10<<10 | (Rn)<<5 | (Rt)) +#define VLDR32_REG(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b10, 0b01, Rm, 0b011, 0, Rn, Dt)) +#define VLDR32_REG_LSL3(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b10, 0b01, Rm, 0b011, 1, Rn, Dt)) #define VLDR64_REG(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b11, 0b01, Rm, 0b011, 0, Rn, Dt)) #define VLDR64_REG_LSL3(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b11, 0b01, Rm, 0b011, 1, Rn, Dt)) #define VLDR128_REG(Qt, Rn, Rm) EMIT(VMEM_REG_gen(0b00, 0b11, Rm, 0b011, 0, Rn, Dt)) #define VLDR128_REG_LSL4(Qt, Rn, Rm) EMIT(VMEM_REG_gen(0b00, 0b11, Rm, 0b011, 1, Rn, Dt)) +#define VSTR32_REG(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b10, 0b00, Rm, 0b011, 0, Rn, Dt)) +#define VSTR32_REG_LSL3(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b10, 0b00, Rm, 0b011, 1, Rn, Dt)) #define VSTR64_REG(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b11, 0b00, Rm, 0b011, 0, Rn, Dt)) #define VSTR64_REG_LSL3(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b11, 0b00, Rm, 0b011, 1, Rn, Dt)) #define VSTR128_REG(Qt, Rn, Rm) EMIT(VMEM_REG_gen(0b00, 0b10, Rm, 0b011, 0, Rn, Dt)) diff --git a/src/dynarec/dynarec_arm64_f20f.c b/src/dynarec/dynarec_arm64_f20f.c index 2c5ed5bc..23a23be4 100755 --- a/src/dynarec/dynarec_arm64_f20f.c +++ b/src/dynarec/dynarec_arm64_f20f.c @@ -39,7 +39,10 @@ } \ } -#define GETGX gd = ((nextop&0x38)>>3)+(rex.r<<3) +#define GETG gd = ((nextop&0x38)>>3)+(rex.r<<3) + +#define GETGX(a) gd = ((nextop&0x38)>>3)+(rex.r<<3); \ + a = sse_get_reg(dyn, ninst, x1, gd) uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog) { @@ -67,7 +70,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n case 0x10: INST_NAME("MOVSD Gx, Ex"); nextop = F8; - GETGX; + GETG; if(MODREG) { ed = (nextop&7)+ (rex.b<<3); v0 = sse_get_reg(dyn, ninst, x1, gd); @@ -84,7 +87,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n case 0x11: INST_NAME("MOVSD Ex, Gx"); nextop = F8; - GETGX; + GETG; v0 = sse_get_reg(dyn, ninst, x1, gd); if(MODREG) { ed = (nextop&7)+ (rex.b<<3); @@ -100,8 +103,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n case 0x51: INST_NAME("SQRTSD Gx, Ex"); nextop = F8; - GETGX; - v0 = sse_get_reg(dyn, ninst, x1, gd); + GETGX(v0); d1 = fpu_get_scratch(dyn); GETEX(d0, 0); FSQRTD(d1, d0); @@ -111,8 +113,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n case 0x58: INST_NAME("ADDSD Gx, Ex"); nextop = F8; - GETGX; - v0 = sse_get_reg(dyn, ninst, x1, gd); + GETGX(v0); d1 = fpu_get_scratch(dyn); GETEX(d0, 0); FADDD(d1, v0, d0); // the high part of the vector is erased... @@ -121,8 +122,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n case 0x59: INST_NAME("MULSD Gx, Ex"); nextop = F8; - GETGX; - v0 = sse_get_reg(dyn, ninst, x1, gd); + GETGX(v0); d1 = fpu_get_scratch(dyn); GETEX(d0, 0); FMULD(d1, v0, d0); @@ -132,8 +132,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n case 0x5C: INST_NAME("SUBSD Gx, Ex"); nextop = F8; - GETGX; - v0 = sse_get_reg(dyn, ninst, x1, gd); + GETGX(v0); d1 = fpu_get_scratch(dyn); GETEX(d0, 0); FSUBD(d1, v0, d0); @@ -143,8 +142,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n case 0x5E: INST_NAME("DIVSD Gx, Ex"); nextop = F8; - GETGX; - v0 = sse_get_reg(dyn, ninst, x1, gd); + GETGX(v0); d1 = fpu_get_scratch(dyn); GETEX(d0, 0); FDIVD(d1, v0, d0); diff --git a/src/dynarec/dynarec_arm64_f30f.c b/src/dynarec/dynarec_arm64_f30f.c index 68b22031..62d5e965 100755 --- a/src/dynarec/dynarec_arm64_f30f.c +++ b/src/dynarec/dynarec_arm64_f30f.c @@ -22,24 +22,28 @@ #include "dynarec_arm64_functions.h" #include "dynarec_arm64_helper.h" -// Get Ex as a double, not a quad (warning, x2 and x3 may get used) -#define GETEX(a) \ - if((nextop&0xC0)==0xC0) { \ - a = sse_get_reg(dyn, ninst, x1, nextop&7); \ - } else { \ - parity = getedparity(dyn, ninst, addr, nextop, 3); \ - a = fpu_get_scratch_double(dyn); \ - if(parity) { \ - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 1023, 3); \ - VLDR_64(a, ed, fixedaddress); \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 4095-4, 0);\ - LDR_IMM9(x2, ed, fixedaddress+0); \ - LDR_IMM9(x3, ed, fixedaddress+4); \ - VMOVtoV_D(a, x2, x3); \ - } \ +// Get Ex as a single, not a quad (warning, x2 get used) +#define GETEX(a, D) \ + if(MODREG) { \ + a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); \ + } else { \ + parity = getedparity(dyn, ninst, addr, nextop, 3, D); \ + a = fpu_get_scratch(dyn); \ + if(parity) { \ + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, D); \ + VLDR32_U12(a, ed, fixedaddress); \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, D); \ + LDRw_U12(x2, ed, fixedaddress); \ + VMOVQSfrom(a, 0, x2); \ + } \ } +#define GETG gd = ((nextop&0x38)>>3)+(rex.r<<3) + +#define GETGX(a) gd = ((nextop&0x38)>>3)+(rex.r<<3); \ + a = sse_get_reg(dyn, ninst, x1, gd) + uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog) { uint8_t opcode = F8; @@ -50,7 +54,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n uint8_t eb1, eb2; int v0, v1; int q0, q1; - int d0; + int d0, d1; int s0; int fixedaddress; int parity; @@ -64,6 +68,55 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n switch(opcode) { + case 0x51: + INST_NAME("SQRTSS Gx, Ex"); + nextop = F8; + GETGX(v0); + d1 = fpu_get_scratch(dyn); + GETEX(d0, 0); + FSQRTS(d1, d0); + VMOVeS(v0, 0, d1, 0); + break; + + case 0x58: + INST_NAME("ADDSS Gx, Ex"); + nextop = F8; + GETGX(v0); + d1 = fpu_get_scratch(dyn); + GETEX(d0, 0); + FADDS(d1, v0, d0); // the high part of the vector is erased... + VMOVeS(v0, 0, d1, 0); + break; + case 0x59: + INST_NAME("MULSS Gx, Ex"); + nextop = F8; + GETGX(v0); + d1 = fpu_get_scratch(dyn); + GETEX(d0, 0); + FMULS(d1, v0, d0); + VMOVeS(v0, 0, d1, 0); + break; + + case 0x5C: + INST_NAME("SUBSS Gx, Ex"); + nextop = F8; + GETGX(v0); + d1 = fpu_get_scratch(dyn); + GETEX(d0, 0); + FSUBS(d1, v0, d0); + VMOVeS(v0, 0, d1, 0); + break; + + case 0x5E: + INST_NAME("DIVSS Gx, Ex"); + nextop = F8; + GETGX(v0); + d1 = fpu_get_scratch(dyn); + GETEX(d0, 0); + FDIVS(d1, v0, d0); + VMOVeS(v0, 0, d1, 0); + break; + default: DEFAULT; } |