From cdaf19899e9760e225a53ddf67e446d671e2fc8b Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Mon, 22 Mar 2021 20:09:39 +0100 Subject: [DYNAREC] ARMv8 NEON doesn't require alignment --- src/dynarec/dynarec_arm64_0f.c | 24 ++++++------------------ src/dynarec/dynarec_arm64_660f.c | 28 ++++++++++------------------ src/dynarec/dynarec_arm64_f20f.c | 17 ++++------------- src/dynarec/dynarec_arm64_f30f.c | 32 +++++--------------------------- 4 files changed, 25 insertions(+), 76 deletions(-) (limited to 'src') diff --git a/src/dynarec/dynarec_arm64_0f.c b/src/dynarec/dynarec_arm64_0f.c index d060818a..29298506 100755 --- a/src/dynarec/dynarec_arm64_0f.c +++ b/src/dynarec/dynarec_arm64_0f.c @@ -96,11 +96,8 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VMOVQ(v0, v1); } else { v0 = sse_get_reg_empty(dyn, ninst, x1, gd); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xff0<<3, 7, rex, 0, 0); - LDRx_U12(x2, ed, fixedaddress); - VMOVQDfrom(v0, 0, x2); - LDRx_U12(x2, ed, fixedaddress+8); - VMOVQDfrom(v0, 1, x2); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); + VLDR64_U12(v0, ed, fixedaddress); // no alignment issue with ARMv8 NEON :) } break; case 0x11: @@ -113,11 +110,8 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin v1 = sse_get_reg_empty(dyn, ninst, x1, ed); VMOVQ(v1, v0); } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xff0<<3, 7, rex, 0, 0); - VMOVQDto(x2, v0, 0); - STRx_U12(x2, ed, fixedaddress); - VMOVQDto(x2, v0, 1); - STRx_U12(x2, ed, fixedaddress+8); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); + VSTR128_U12(v0, ed, fixedaddress); } break; @@ -182,15 +176,9 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(MODREG) { s0 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3)); } else { - parity = getedparity(dyn, ninst, addr, nextop, 3, 0); s0 = fpu_get_scratch(dyn); - if(parity) { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - VLDR32_U12(s0, ed, fixedaddress); - } else { - GETED(0); - VMOVQSfrom(s0, 0, ed); - } + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); + VLDR32_U12(s0, ed, fixedaddress); } FCMPS(v0, s0); FCOMI(x1, x2); diff --git a/src/dynarec/dynarec_arm64_660f.c b/src/dynarec/dynarec_arm64_660f.c index a168dd24..5b230443 100755 --- a/src/dynarec/dynarec_arm64_660f.c +++ b/src/dynarec/dynarec_arm64_660f.c @@ -23,13 +23,13 @@ #include "dynarec_arm64_helper.h" // Get EX as a quad -#define GETEX(a, D) \ - if(MODREG) { \ - a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, (1<<4)-1, rex, 0, D); \ - a = fpu_get_scratch(dyn); \ - VLDR128_U12(a, ed, fixedaddress); \ +#define GETEX(a, D) \ + if(MODREG) { \ + a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); \ + } else { \ + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, D); \ + a = fpu_get_scratch(dyn); \ + VLDR128_U12(a, ed, fixedaddress); \ } #define GETG gd = ((nextop&0x38)>>3)+(rex.r<<3) @@ -169,18 +169,16 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n ed = xRAX + (nextop&7) + (rex.b<<3); VMOVQDto(ed, v0, 0); } else { - VMOVQDto(x2, v0, 0); // to avoid Bus Error, using regular store addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); - STRx_U12(x2, ed, fixedaddress); + VSTR64_U12(x2, ed, fixedaddress); } } else { if(MODREG) { ed = xRAX + (nextop&7) + (rex.b<<3); VMOVSto(ed, v0, 0); } else { - VMOVSto(x2, v0, 0); // to avoid Bus Error, using regular store addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - STRw_U12(x2, ed, fixedaddress); + VSTR32_U12(x2, ed, fixedaddress); } } break; @@ -335,18 +333,12 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; GETG; v0 = sse_get_reg(dyn, ninst, x1, gd); - parity = getedparity(dyn, ninst, addr, nextop, 7, 0); if(MODREG) { v1 = sse_get_reg_empty(dyn, ninst, x1, (nextop&7) + (rex.b<<3)); FMOVD(v1, v0); } else { addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); - if(parity) { - VSTR64_U12(v0, ed, fixedaddress); - } else { - VMOVQDto(x2, v0, 0); - STRx_U12(x2, ed, fixedaddress); - } + VSTR64_U12(v0, ed, fixedaddress); } break; diff --git a/src/dynarec/dynarec_arm64_f20f.c b/src/dynarec/dynarec_arm64_f20f.c index 71e0a049..95bca08a 100755 --- a/src/dynarec/dynarec_arm64_f20f.c +++ b/src/dynarec/dynarec_arm64_f20f.c @@ -27,16 +27,9 @@ if(MODREG) { \ a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); \ } else { \ - parity = getedparity(dyn, ninst, addr, nextop, 7, D); \ a = fpu_get_scratch(dyn); \ - if(parity) { \ - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, D); \ - VLDR64_U12(a, ed, fixedaddress); \ - } else { \ - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, D); \ - LDRx_U12(x2, ed, fixedaddress+0); \ - VMOVQDfrom(a, 0, x2); \ - } \ + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, D); \ + VLDR64_U12(a, ed, fixedaddress); \ } #define GETG gd = ((nextop&0x38)>>3)+(rex.r<<3) @@ -79,8 +72,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } else { v0 = sse_get_reg_empty(dyn, ninst, x1, gd); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); - LDRx_U12(x2, ed, fixedaddress); - FMOVDx(v0, x2); // upper part reseted + VLDR64_U12(v0, ed, fixedaddress); // upper part reseted } break; case 0x11: @@ -93,9 +85,8 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n d0 = sse_get_reg(dyn, ninst, x1, ed); VMOV(d0, v0); } else { - VMOVQDto(x2, v0, 0); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0); - STRx_U12(x2, ed, fixedaddress); + VSTR64_U12(v0, ed, fixedaddress); } break; diff --git a/src/dynarec/dynarec_arm64_f30f.c b/src/dynarec/dynarec_arm64_f30f.c index 93c32fbb..5b0352f7 100755 --- a/src/dynarec/dynarec_arm64_f30f.c +++ b/src/dynarec/dynarec_arm64_f30f.c @@ -27,15 +27,9 @@ if(MODREG) { \ a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3)); \ } else { \ - parity = getedparity(dyn, ninst, addr, nextop, 3, D); \ a = fpu_get_scratch(dyn); \ addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, D); \ - if(parity) { \ - VLDR32_U12(a, ed, fixedaddress); \ - } else { \ - LDRw_U12(x2, ed, fixedaddress); \ - VMOVQSfrom(a, 0, x2); \ - } \ + VLDR32_U12(a, ed, fixedaddress); \ } #define GETG gd = ((nextop&0x38)>>3)+(rex.r<<3) @@ -76,15 +70,8 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n VMOVeS(v0, 0, q0, 0); } else { v0 = sse_get_reg_empty(dyn, ninst, x1, gd); - parity = getedparity(dyn, ninst, addr, nextop, 3, 0); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - if(parity) { - VLDR32_U12(v0, ed, fixedaddress); - } else { - LDRw_U12(x2, ed, fixedaddress); // to avoid bus errors - VEORQ(v0, v0, v0); - VMOVQSfrom(v0, 0, x2); - } + VLDR32_U12(v0, ed, fixedaddress); } break; case 0x11: @@ -96,14 +83,8 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n q0 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3)); VMOVeS(q0, 0, v0, 0); } else { - parity = getedparity(dyn, ninst, addr, nextop, 3, 0); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0); - if(parity) { - VSTR32_U12(v0, ed, fixedaddress); - } else { - VMOVSto(x2, v0, 0); - STRw_U12(x2, ed, fixedaddress); - } + VSTR32_U12(v0, ed, fixedaddress); } break; @@ -202,11 +183,8 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n VMOVQ(v1, v0); } else { v0 = sse_get_reg(dyn, ninst, x1, gd); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xff0<<3, 7, rex, 0, 0); - VMOVQDto(x2, v0, 0); - STRx_U12(x2, ed, fixedaddress+0); - VMOVQDto(x2, v0, 1); - STRx_U12(x2, ed, fixedaddress+8); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); + VSTR128_U12(v0, ed, fixedaddress); } break; -- cgit 1.4.1