diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2023-10-16 20:20:26 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2023-10-16 20:20:26 +0200 |
| commit | f67d0709b77f292ef3c69de0be67f1f3a76474bb (patch) | |
| tree | 46cf31d769881396c46f5d5c3a4ec59fdec23f94 /src | |
| parent | 2e66d603d3b0e9243bb00642a38def375b3a1a94 (diff) | |
| download | box64-f67d0709b77f292ef3c69de0be67f1f3a76474bb.tar.gz box64-f67d0709b77f292ef3c69de0be67f1f3a76474bb.zip | |
[ARM64_DYNAREC] Added code generation for Atomic ARM v8.1 extension for most lock prefix
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/arm64_emitter.h | 192 | ||||
| -rw-r--r-- | src/dynarec/arm64/arm64_printer.c | 50 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_00.c | 36 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_66f0.c | 194 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_f0.c | 576 |
5 files changed, 807 insertions, 241 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index 094adbe0..9b6cc2e2 100644 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -427,6 +427,7 @@ // Data Memory Barrier #define DMB_gen(CRm) (0b1101010100<<22 | 0b011<<16 | 0b0011<<12 | (CRm)<<8 | 1<<7 | 0b01<<5 | 0b11111) #define DMB_ISH() EMIT(DMB_gen(0b1011)) +#define DMB_SY() EMIT(DMB_gen(0b1111)) // Break #define BRK_gen(imm16) (0b11010100<<24 | 0b001<<21 | (((imm16)&0xffff)<<5)) @@ -1938,4 +1939,195 @@ #define PMULL_128(Rd, Rn, Rm) EMIT(PMULL_gen(0, 0b11, Rm, Rn, Rd)) #define PMULL2_128(Rd, Rn, Rm) EMIT(PMULL_gen(1, 0b11, Rm, Rn, Rd)) +// Atomic extension +#define ATOMIC_gen(size, A, R, Rs, opc, Rn, Rt) ((size)<<30 | 0b111<<27 | (A)<<23 | (R)<<22 | 1<<21 | (Rs)<<16 | (opc)<<12 | (Rn)<<5 | (Rt)) +// Atomic ADD +#define LDADDxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 0, Rs, 0b000, Rn, Rt)) +#define LDADDAxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 1, 0, Rs, 0b000, Rn, Rt)) +#define LDADDALxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 1, 1, Rs, 0b000, Rn, Rt)) +#define LDADDLxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 1, Rs, 0b000, Rn, Rt)) +#define STADDxw(Rs, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 0, Rs, 0b000, Rn, 0b11111)) +#define STADDLxw(Rs, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 1, Rs, 0b000, Rn, 0b11111)) +#define LDADDB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 0, 0, Rs, 0b000, Rn, Rt)) +#define LDADDAB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 1, 0, Rs, 0b000, Rn, Rt)) +#define LDADDALB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 1, 1, Rs, 0b000, Rn, Rt)) +#define LDADDLB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 0, 1, Rs, 0b000, Rn, Rt)) +#define STADDB(Rs, Rn) EMIT(ATOMIC_gen(0b00, 0, 0, Rs, 0b000, Rn, 0b11111)) +#define STADDLB(Rs, Rn) EMIT(ATOMIC_gen(0b00, 0, 1, Rs, 0b000, Rn, 0b11111)) +#define LDADDH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 0, 0, Rs, 0b000, Rn, Rt)) +#define LDADDAH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 1, 0, Rs, 0b000, Rn, Rt)) +#define LDADDALH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 1, 1, Rs, 0b000, Rn, Rt)) +#define LDADDLH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 0, 1, Rs, 0b000, Rn, Rt)) +#define STADDH(Rs, Rn) EMIT(ATOMIC_gen(0b01, 0, 0, Rs, 0b000, Rn, 0b11111)) +#define STADDLH(Rs, Rn) EMIT(ATOMIC_gen(0b01, 0, 1, Rs, 0b000, Rn, 0b11111)) +// Atomic AND with complement (i.e. BIC) +#define LDCLRxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 0, Rs, 0b001, Rn, Rt)) +#define LDCLRAxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 1, 0, Rs, 0b001, Rn, Rt)) +#define LDCLRALxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 1, 1, Rs, 0b001, Rn, Rt)) +#define LDCLRLxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 1, Rs, 0b001, Rn, Rt)) +#define STCLRxw(Rs, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 0, Rs, 0b001, Rn, 0b11111)) +#define STCLRLxw(Rs, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 1, Rs, 0b001, Rn, 0b11111)) +#define LDCLRB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 0, 0, Rs, 0b001, Rn, Rt)) +#define LDCLRAB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 1, 0, Rs, 0b001, Rn, Rt)) +#define LDCLRALB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 1, 1, Rs, 0b001, Rn, Rt)) +#define LDCLRLB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 0, 1, Rs, 0b001, Rn, Rt)) +#define STCLRB(Rs, Rn) EMIT(ATOMIC_gen(0b00, 0, 0, Rs, 0b001, Rn, 0b11111)) +#define STCLRLB(Rs, Rn) EMIT(ATOMIC_gen(0b00, 0, 1, Rs, 0b001, Rn, 0b11111)) +#define LDCLRH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 0, 0, Rs, 0b001, Rn, Rt)) +#define LDCLRAH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 1, 0, Rs, 0b001, Rn, Rt)) +#define LDCLRALH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 1, 1, Rs, 0b001, Rn, Rt)) +#define LDCLRLH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 0, 1, Rs, 0b001, Rn, Rt)) +#define STCLRH(Rs, Rn) EMIT(ATOMIC_gen(0b01, 0, 0, Rs, 0b001, Rn, 0b11111)) +#define STCLRLH(Rs, Rn) EMIT(ATOMIC_gen(0b01, 0, 1, Rs, 0b001, Rn, 0b11111)) +// Atomic EOR +#define LDEORxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 0, Rs, 0b010, Rn, Rt)) +#define LDEORAxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 1, 0, Rs, 0b010, Rn, Rt)) +#define LDEORALxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 1, 1, Rs, 0b010, Rn, Rt)) +#define LDEORLxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 1, Rs, 0b010, Rn, Rt)) +#define STEORxw(Rs, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 0, Rs, 0b010, Rn, 0b11111)) +#define STEORLxw(Rs, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 1, Rs, 0b010, Rn, 0b11111)) +#define LDEORB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 0, 0, Rs, 0b010, Rn, Rt)) +#define LDEORAB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 1, 0, Rs, 0b010, Rn, Rt)) +#define LDEORALB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 1, 1, Rs, 0b010, Rn, Rt)) +#define LDEORLB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 0, 1, Rs, 0b010, Rn, Rt)) +#define STEORB(Rs, Rn) EMIT(ATOMIC_gen(0b00, 0, 0, Rs, 0b010, Rn, 0b11111)) +#define STEORLB(Rs, Rn) EMIT(ATOMIC_gen(0b00, 0, 1, Rs, 0b010, Rn, 0b11111)) +#define LDEORH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 0, 0, Rs, 0b010, Rn, Rt)) +#define LDEORAH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 1, 0, Rs, 0b010, Rn, Rt)) +#define LDEORALH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 1, 1, Rs, 0b010, Rn, Rt)) +#define LDEORLH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 0, 1, Rs, 0b010, Rn, Rt)) +#define STEORH(Rs, Rn) EMIT(ATOMIC_gen(0b01, 0, 0, Rs, 0b010, Rn, 0b11111)) +#define STEORLH(Rs, Rn) EMIT(ATOMIC_gen(0b01, 0, 1, Rs, 0b010, Rn, 0b11111)) +// Atomic OR +#define LDSETxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 0, Rs, 0b011, Rn, Rt)) +#define LDSETAxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 1, 0, Rs, 0b011, Rn, Rt)) +#define LDSETALxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 1, 1, Rs, 0b011, Rn, Rt)) +#define LDSETLxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 1, Rs, 0b011, Rn, Rt)) +#define STSETxw(Rs, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 0, Rs, 0b011, Rn, 0b11111)) +#define STSETLxw(Rs, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 1, Rs, 0b011, Rn, 0b11111)) +#define LDSETB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 0, 0, Rs, 0b011, Rn, Rt)) +#define LDSETAB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 1, 0, Rs, 0b011, Rn, Rt)) +#define LDSETALB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 1, 1, Rs, 0b011, Rn, Rt)) +#define LDSETLB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 0, 1, Rs, 0b011, Rn, Rt)) +#define STSETB(Rs, Rn) EMIT(ATOMIC_gen(0b00, 0, 0, Rs, 0b011, Rn, 0b11111)) +#define STSETLB(Rs, Rn) EMIT(ATOMIC_gen(0b00, 0, 1, Rs, 0b011, Rn, 0b11111)) +#define LDSETH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 0, 0, Rs, 0b011, Rn, Rt)) +#define LDSETAH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 1, 0, Rs, 0b011, Rn, Rt)) +#define LDSETALH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 1, 1, Rs, 0b011, Rn, Rt)) +#define LDSETLH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 0, 1, Rs, 0b011, Rn, Rt)) +#define STSETH(Rs, Rn) EMIT(ATOMIC_gen(0b01, 0, 0, Rs, 0b011, Rn, 0b11111)) +#define STSETLH(Rs, Rn) EMIT(ATOMIC_gen(0b01, 0, 1, Rs, 0b011, Rn, 0b11111)) +// Atomic Signel Max +#define LDSMAXxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 0, Rs, 0b100, Rn, Rt)) +#define LDSMAXAxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 1, 0, Rs, 0b100, Rn, Rt)) +#define LDSMAXALxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 1, 1, Rs, 0b100, Rn, Rt)) +#define LDSMAXLxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 1, Rs, 0b100, Rn, Rt)) +#define STSMAXxw(Rs, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 0, Rs, 0b100, Rn, 0b11111)) +#define STSMAXLxw(Rs, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 1, Rs, 0b100, Rn, 0b11111)) +#define LDSMAXB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 0, 0, Rs, 0b100, Rn, Rt)) +#define LDSMAXAB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 1, 0, Rs, 0b100, Rn, Rt)) +#define LDSMAXALB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 1, 1, Rs, 0b100, Rn, Rt)) +#define LDSMAXLB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 0, 1, Rs, 0b100, Rn, Rt)) +#define STSMAXB(Rs, Rn) EMIT(ATOMIC_gen(0b00, 0, 0, Rs, 0b100, Rn, 0b11111)) +#define STSMAXLB(Rs, Rn) EMIT(ATOMIC_gen(0b00, 0, 1, Rs, 0b100, Rn, 0b11111)) +#define LDSMAXH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 0, 0, Rs, 0b100, Rn, Rt)) +#define LDSMAXAH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 1, 0, Rs, 0b100, Rn, Rt)) +#define LDSMAXALH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 1, 1, Rs, 0b100, Rn, Rt)) +#define LDSMAXLH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 0, 1, Rs, 0b100, Rn, Rt)) +#define STSMAXH(Rs, Rn) EMIT(ATOMIC_gen(0b01, 0, 0, Rs, 0b100, Rn, 0b11111)) +#define STSMAXLH(Rs, Rn) EMIT(ATOMIC_gen(0b01, 0, 1, Rs, 0b100, Rn, 0b11111)) +// Atomic Signed Min +#define LDSMINxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 0, Rs, 0b101, Rn, Rt)) +#define LDSMINAxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 1, 0, Rs, 0b101, Rn, Rt)) +#define LDSMINALxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 1, 1, Rs, 0b101, Rn, Rt)) +#define LDSMINLxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 1, Rs, 0b101, Rn, Rt)) +#define STSMINxw(Rs, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 0, Rs, 0b101, Rn, 0b11111)) +#define STSMINLxw(Rs, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 1, Rs, 0b101, Rn, 0b11111)) +#define LDSMINB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 0, 0, Rs, 0b101, Rn, Rt)) +#define LDSMINAB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 1, 0, Rs, 0b101, Rn, Rt)) +#define LDSMINALB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 1, 1, Rs, 0b101, Rn, Rt)) +#define LDSMINLB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 0, 1, Rs, 0b101, Rn, Rt)) +#define STSMINB(Rs, Rn) EMIT(ATOMIC_gen(0b00, 0, 0, Rs, 0b101, Rn, 0b11111)) +#define STSMINLB(Rs, Rn) EMIT(ATOMIC_gen(0b00, 0, 1, Rs, 0b101, Rn, 0b11111)) +#define LDSMINH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 0, 0, Rs, 0b101, Rn, Rt)) +#define LDSMINAH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 1, 0, Rs, 0b101, Rn, Rt)) +#define LDSMINALH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 1, 1, Rs, 0b101, Rn, Rt)) +#define LDSMINLH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 0, 1, Rs, 0b101, Rn, Rt)) +#define STSMINH(Rs, Rn) EMIT(ATOMIC_gen(0b01, 0, 0, Rs, 0b101, Rn, 0b11111)) +#define STSMINLH(Rs, Rn) EMIT(ATOMIC_gen(0b01, 0, 1, Rs, 0b101, Rn, 0b11111)) +// Atomic Unsigned Max +#define LDUMAXxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 0, Rs, 0b110, Rn, Rt)) +#define LDUMAXAxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 1, 0, Rs, 0b110, Rn, Rt)) +#define LDUMAXALxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 1, 1, Rs, 0b110, Rn, Rt)) +#define LDUMAXLxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 1, Rs, 0b110, Rn, Rt)) +#define STUMAXxw(Rs, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 0, Rs, 0b110, Rn, 0b11111)) +#define STUMAXLxw(Rs, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 1, Rs, 0b110, Rn, 0b11111)) +#define LDUMAXB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 0, 0, Rs, 0b110, Rn, Rt)) +#define LDUMAXAB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 1, 0, Rs, 0b110, Rn, Rt)) +#define LDUMAXALB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 1, 1, Rs, 0b110, Rn, Rt)) +#define LDUMAXLB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 0, 1, Rs, 0b110, Rn, Rt)) +#define STUMAXB(Rs, Rn) EMIT(ATOMIC_gen(0b00, 0, 0, Rs, 0b110, Rn, 0b11111)) +#define STUMAXLB(Rs, Rn) EMIT(ATOMIC_gen(0b00, 0, 1, Rs, 0b110, Rn, 0b11111)) +#define LDUMAXH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 0, 0, Rs, 0b110, Rn, Rt)) +#define LDUMAXAH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 1, 0, Rs, 0b110, Rn, Rt)) +#define LDUMAXALH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 1, 1, Rs, 0b110, Rn, Rt)) +#define LDUMAXLH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 0, 1, Rs, 0b110, Rn, Rt)) +#define STUMAXH(Rs, Rn) EMIT(ATOMIC_gen(0b01, 0, 0, Rs, 0b110, Rn, 0b11111)) +#define STUMAXLH(Rs, Rn) EMIT(ATOMIC_gen(0b01, 0, 1, Rs, 0b110, Rn, 0b11111)) +// Atomic Unsigned Min +#define LDUMINxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 0, Rs, 0b111, Rn, Rt)) +#define LDUMINAxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 1, 0, Rs, 0b111, Rn, Rt)) +#define LDUMINALxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 1, 1, Rs, 0b111, Rn, Rt)) +#define LDUMINLxw(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 1, Rs, 0b111, Rn, Rt)) +#define STUMINxw(Rs, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 0, Rs, 0b111, Rn, 0b11111)) +#define STUMINLxw(Rs, Rn) EMIT(ATOMIC_gen(0b10+rex.w, 0, 1, Rs, 0b111, Rn, 0b11111)) +#define LDUMINB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 0, 0, Rs, 0b111, Rn, Rt)) +#define LDUMINAB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 1, 0, Rs, 0b111, Rn, Rt)) +#define LDUMINALB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 1, 1, Rs, 0b111, Rn, Rt)) +#define LDUMINLB(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b00, 0, 1, Rs, 0b111, Rn, Rt)) +#define STUMINB(Rs, Rn) EMIT(ATOMIC_gen(0b00, 0, 0, Rs, 0b111, Rn, 0b11111)) +#define STUMINLB(Rs, Rn) EMIT(ATOMIC_gen(0b00, 0, 1, Rs, 0b111, Rn, 0b11111)) +#define LDUMINH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 0, 0, Rs, 0b111, Rn, Rt)) +#define LDUMINAH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 1, 0, Rs, 0b111, Rn, Rt)) +#define LDUMINALH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 1, 1, Rs, 0b111, Rn, Rt)) +#define LDUMINLH(Rs, Rt, Rn) EMIT(ATOMIC_gen(0b01, 0, 1, Rs, 0b111, Rn, Rt)) +#define STUMINH(Rs, Rn) EMIT(ATOMIC_gen(0b01, 0, 0, Rs, 0b111, Rn, 0b11111)) +#define STUMINLH(Rs, Rn) EMIT(ATOMIC_gen(0b01, 0, 1, Rs, 0b111, Rn, 0b11111)) + +#define SWAP_gen(size, A, R, Rs, Rn, Rt) ((size)<<30 | 0b111<<27 | (A)<<23 | (R)<<22 | 1<<21 | (Rs)<<16 | 1<<15 | (Rn)<<5 | (Rt)) +#define SWPxw(Rs, Rt, Rn) EMIT(SWAP_gen(0b10+rex.w, 0, 0, Rs, Rn, Rt)) +#define SWPAxw(Rs, Rt, Rn) EMIT(SWAP_gen(0b10+rex.w, 1, 0, Rs, Rn, Rt)) +#define SWPALxw(Rs, Rt, Rn) EMIT(SWAP_gen(0b10+rex.w, 1, 1, Rs, Rn, Rt)) +#define SWPLxw(Rs, Rt, Rn) EMIT(SWAP_gen(0b10+rex.w, 0, 1, Rs, Rn, Rt)) +#define SWPB(Rs, Rt, Rn) EMIT(SWAP_gen(0b00, 0, 0, Rs, Rn, Rt)) +#define SWPAB(Rs, Rt, Rn) EMIT(SWAP_gen(0b00, 1, 0, Rs, Rn, Rt)) +#define SWPALB(Rs, Rt, Rn) EMIT(SWAP_gen(0b00, 1, 1, Rs, Rn, Rt)) +#define SWPLB(Rs, Rt, Rn) EMIT(SWAP_gen(0b00, 0, 1, Rs, Rn, Rt)) +#define SWPH(Rs, Rt, Rn) EMIT(SWAP_gen(0b01, 0, 0, Rs, Rn, Rt)) +#define SWPAH(Rs, Rt, Rn) EMIT(SWAP_gen(0b01, 1, 0, Rs, Rn, Rt)) +#define SWPALH(Rs, Rt, Rn) EMIT(SWAP_gen(0b01, 1, 1, Rs, Rn, Rt)) +#define SWPLH(Rs, Rt, Rn) EMIT(SWAP_gen(0b01, 0, 1, Rs, Rn, Rt)) + +#define CAS_gen(size, L, Rs, O0, Rn, Rt) ((size)<<30 | 0b001000<<24 | 1<<23 | (L)<<22 | 1<<21 | (Rs)<<16 | (O0)<<15 | 0b11111<<10 | (Rn)<<5 | (Rt)) +// Compare and Swap compare Rs with [Rn], write Rt is same, return old [Rn] in Rs +#define CASxw(Rs, Rt, Rn) EMIT(CAS_gen(0b10+rex.w, 0, Rs, 0, Rn, Rt)) +#define CASAxw(Rs, Rt, Rn) EMIT(CAS_gen(0b10+rex.w, 1, Rs, 0, Rn, Rt)) +#define CASALxw(Rs, Rt, Rn) EMIT(CAS_gen(0b10+rex.w, 1, Rs, 1, Rn, Rt)) +#define CASLxw(Rs, Rt, Rn) EMIT(CAS_gen(0b10+rex.w, 0, Rs, 1, Rn, Rt)) +#define CASB(Rs, Rt, Rn) EMIT(CAS_gen(0b00, 0, Rs, 0, Rn, Rt)) +#define CASAB(Rs, Rt, Rn) EMIT(CAS_gen(0b00, 1, Rs, 0, Rn, Rt)) +#define CASALB(Rs, Rt, Rn) EMIT(CAS_gen(0b00, 1, Rs, 1, Rn, Rt)) +#define CASLB(Rs, Rt, Rn) EMIT(CAS_gen(0b00, 0, Rs, 1, Rn, Rt)) +#define CASH(Rs, Rt, Rn) EMIT(CAS_gen(0b01, 0, Rs, 0, Rn, Rt)) +#define CASAH(Rs, Rt, Rn) EMIT(CAS_gen(0b01, 1, Rs, 0, Rn, Rt)) +#define CASALH(Rs, Rt, Rn) EMIT(CAS_gen(0b01, 1, Rs, 1, Rn, Rt)) +#define CASLH(Rs, Rt, Rn) EMIT(CAS_gen(0b01, 0, Rs, 1, Rn, Rt)) + +#define CASP_gen(size, L, Rs, O0, Rn, Rt) ((size)<<30 | 0b001000<<24 | 0<<23 | (L)<<22 | 1<<21 | (Rs)<<16 | (O0)<<15 | 0b11111<<10 | (Rn)<<5 | (Rt)) +// Compare and Swap with pair, Rs, Rs+1 and Rt,Rt+1 with [Rt] +#define CASPxw(Rs, Rt, Rn) EMIT(CAS_gen(0b00+rex.w, 0, Rs, 0, Rn, Rt)) +#define CASPAxw(Rs, Rt, Rn) EMIT(CAS_gen(0b00+rex.w, 1, Rs, 0, Rn, Rt)) +#define CASPALxw(Rs, Rt, Rn) EMIT(CAS_gen(0b00+rex.w, 1, Rs, 1, Rn, Rt)) +#define CASPLxw(Rs, Rt, Rn) EMIT(CAS_gen(0b00+rex.w, 0, Rs, 1, Rn, Rt)) + #endif //__ARM64_EMITTER_H__ diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c index 973255f8..5f7433f3 100644 --- a/src/dynarec/arm64/arm64_printer.c +++ b/src/dynarec/arm64/arm64_printer.c @@ -16,7 +16,7 @@ static const char* conds[] = {"cEQ", "cNE", "cCS", "cCC", "cMI", "cPL", "cVS", " #define abs(A) (((A)<0)?(-(A)):(A)) typedef struct arm64_print_s { - int N, S, U, L, Q; + int N, S, U, L, Q, A, R; int t, n, m, d, t2, a; int f, c, o, h, p; int i, r, s; @@ -60,6 +60,8 @@ int isMask(uint32_t opcode, const char* mask, arm64_print_t *a) case 'N': a->N = (a->N<<1) | v; break; case 'S': a->S = (a->S<<1) | v; break; case 'U': a->U = (a->U<<1) | v; break; + case 'A': a->A = (a->A<<1) | v; break; + case 'R': a->R = (a->R<<1) | v; break; case 'L': a->L = (a->L<<1) | v; break; case 'Q': a->Q = (a->Q<<1) | v; break; case 't': a->t = (a->t<<1) | v; break; @@ -1573,6 +1575,52 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) return buff; } + // CASxw + if(isMask(opcode, "1f0010001L1ssssso11111nnnnnttttt", &a)) { + snprintf(buff, sizeof(buff), "CAS%s%s %s, %s, [%s]", a.o?"A":"", a.L?"L":"", sf?Xt[Rs]:Wt[Rs], sf?Xt[Rt]:Wt[Rt], XtSp[Rn]); + return buff; + } + // CAS B/H + if(isMask(opcode, "0f0010001L1ssssso11111nnnnnttttt", &a)) { + snprintf(buff, sizeof(buff), "CAS%s%s%s %s, %s, [%s]", a.o?"A":"", a.L?"L":"", sf?"H":"B", Xt[Rs], Xt[Rt], XtSp[Rn]); + return buff; + } + // CASPxw + if(isMask(opcode, "0f0010000L1ssssso11111nnnnnttttt", &a)) { + snprintf(buff, sizeof(buff), "CASP%s%s %s,%s, %s,%s, [%s]", a.o?"A":"", a.L?"L":"", sf?Xt[Rs]:Wt[Rs], sf?Xt[Rs+1]:Wt[Rs+1], sf?Xt[Rt]:Wt[Rt], sf?Xt[Rt+1]:Wt[Rt+1], XtSp[Rn]); + return buff; + } + // SWPxw + if(isMask(opcode, "1f111000AR1sssss100000nnnnnttttt", &a)) { + snprintf(buff, sizeof(buff), "SWP%s%s %s, %s, [%s]", a.A?"A":"", a.R?"L":"", sf?Xt[Rs]:Wt[Rs], sf?Xt[Rt]:Wt[Rt], XtSp[Rn]); + return buff; + } + // SWPxw + if(isMask(opcode, "0f111000AR1sssss100000nnnnnttttt", &a)) { + snprintf(buff, sizeof(buff), "CAS%s%s%s %s, %s, [%s]", a.A?"A":"", a.R?"L":"", sf?"H":"B", Xt[Rs], Xt[Rt], XtSp[Rn]); + return buff; + } + // LDXXXxw + if(isMask(opcode, "1f111000AR1sssss0ooo00nnnnnttttt", &a)) { + const char* ops[] = { "ADD", "CLR", "EOR", "SET", "SMAX, SMIN", "UMAX", "UMIN" }; + if((Rt == 0b11111) && !a.A) { + snprintf(buff, sizeof(buff), "ST%s%s %s, [%s]", ops[a.o], a.R?"L":"", sf?Xt[Rs]:Wt[Rs], XtSp[Rn]); + } else { + snprintf(buff, sizeof(buff), "LD%s%s%s %s, %s, [%s]", ops[a.o], a.A?"A":"", a.R?"L":"", sf?Xt[Rs]:Wt[Rs], sf?Xt[Rt]:Wt[Rt], XtSp[Rn]); + } + return buff; + } + // LDXXX B/H + if(isMask(opcode, "0f111000AR1sssss0ooo00nnnnnttttt", &a)) { + const char* ops[] = { "ADD", "CLR", "EOR", "SET", "SMAX, SMIN", "UMAX", "UMIN" }; + if((Rt == 0b11111) && !a.A) { + snprintf(buff, sizeof(buff), "ST%s%s%s %s, [%s]", ops[a.o], a.R?"L":"", sf?"H":"B", Xt[Rs], XtSp[Rn]); + } else { + snprintf(buff, sizeof(buff), "LD%s%s%s%s %s, %s, [%s]", ops[a.o], a.A?"A":"", a.R?"L":"", sf?"H":"B", Xt[Rs], Xt[Rt], XtSp[Rn]); + } + return buff; + } + snprintf(buff, sizeof(buff), "%08X ???", __builtin_bswap32(opcode)); return buff; diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c index d6777252..c42adeaf 100644 --- a/src/dynarec/arm64/dynarec_arm64_00.c +++ b/src/dynarec/arm64/dynarec_arm64_00.c @@ -1055,13 +1055,17 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SMDMB(); GETGB(x4); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - MARKLOCK; - // do the swap with exclusive locking - LDAXRB(x1, ed); - // do the swap 4 -> strb(ed), 1 -> gd - STLXRB(x3, x4, ed); - CBNZx_MARKLOCK(x3); - SMDMB(); + if(arm64_atomics) { + SWPALB(x4, x1, ed); + } else { + MARKLOCK; + // do the swap with exclusive locking + LDAXRB(x1, ed); + // do the swap 4 -> strb(ed), 1 -> gd + STLXRB(x3, x4, ed); + CBNZx_MARKLOCK(x3); + SMDMB(); + } BFIx(gb1, x1, gb2, 8); } break; @@ -1080,13 +1084,21 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SMDMB(); TSTx_mask(ed, 1, 0, 1+rex.w); // mask=3 or 7 B_MARK(cNE); - MARKLOCK; - LDAXRxw(x1, ed); - STLXRxw(x3, gd, ed); - CBNZx_MARKLOCK(x3); - B_MARK2_nocond; + if(arm64_atomics) { + SWPALxw(gd, gd, ed); + B_NEXT_nocond; + } else { + MARKLOCK; + LDAXRxw(x1, ed); + STLXRxw(x3, gd, ed); + CBNZx_MARKLOCK(x3); + B_MARK2_nocond; + } MARK; LDRxw_U12(x1, ed, 0); + LDAXRB(x3, ed); + STLXRB(x3, gd, ed); + CBNZx_MARK(x3); STRxw_U12(gd, ed, 0); MARK2; SMDMB(); diff --git a/src/dynarec/arm64/dynarec_arm64_66f0.c b/src/dynarec/arm64/dynarec_arm64_66f0.c index fe32cf99..c5faa8da 100644 --- a/src/dynarec/arm64/dynarec_arm64_66f0.c +++ b/src/dynarec/arm64/dynarec_arm64_66f0.c @@ -61,13 +61,21 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n BFIx(ed, x6, 0, 16); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - MARKLOCK; - LDAXRH(x1, wback); - emit_or16(dyn, ninst, x1, x5, x3, x4); - STLXRH(x3, x1, wback); - CBNZx_MARKLOCK(x3); + if(arm64_atomics) { + UFLAG_IF { + LDSETALH(x5, x1, wback); + emit_or16(dyn, ninst, x1, x5, x3, x4); + } else { + STSETLH(x5, wback); + } + } else { + MARKLOCK; + LDAXRH(x1, wback); + emit_or16(dyn, ninst, x1, x5, x3, x4); + STLXRH(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } } - SMDMB(); break; case 0x0F: @@ -93,14 +101,18 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n TSTx_mask(wback, 1, 0, 0); // mask=1 B_MARK3(cNE); // Aligned version - MARKLOCK; - LDAXRH(x1, wback); - CMPSw_REG(x6, x1); - B_MARK(cNE); - // EAX == Ed - STLXRH(x4, gd, wback); - CBNZx_MARKLOCK(x4); - // done + if(arm64_atomics) { + CASALH(x6, x1, wback); + } else { + MARKLOCK; + LDAXRH(x1, wback); + CMPSw_REG(x6, x1); + B_MARK(cNE); + // EAX == Ed + STLXRH(x4, gd, wback); + CBNZx_MARKLOCK(x4); + // done + } B_MARK_nocond; // Unaligned version MARK3; @@ -134,11 +146,15 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n BFIx(ed, x5, 0, 16); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - MARKLOCK; - LDAXRH(x1, wback); - ADDxw_REG(x4, x1, x5); - STLXRH(x3, x4, wback); - CBNZx_MARKLOCK(x3); + if(arm64_atomics) { + LDADDALH(x5, x1, wback); + } else { + MARKLOCK; + LDAXRH(x1, wback); + ADDxw_REG(x4, x1, x5); + STLXRH(x3, x4, wback); + CBNZx_MARKLOCK(x3); + } IFX(X_ALL|X_PEND) { MOVxw_REG(x2, x1); emit_add16(dyn, ninst, x2, x5, x3, x4); @@ -199,11 +215,18 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n MOV32w(x5, i32); TSTx_mask(wback, 1, 0, 0); // mask=1 B_MARK(cNE); - MARKLOCK; - LDAXRH(x1, wback); - emit_add16(dyn, ninst, x1, x5, x3, x4); - STLXRH(x3, x1, wback); - CBNZx_MARKLOCK(x3); + if(arm64_atomics) { + LDADDALH(x5, x1, wback); + UFLAG_IF { + emit_add16(dyn, ninst, x1, x5, x3, x4); + } + } else { + MARKLOCK; + LDAXRH(x1, wback); + emit_add16(dyn, ninst, x1, x5, x3, x4); + STLXRH(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } B_NEXT_nocond; MARK; // unaligned! also, not enough LDRH_U12(x1, wback, 0); @@ -229,11 +252,20 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?2:1); if(opcode==0x81) i32 = F16S; else i32 = F8S; MOV32w(x5, i32); - MARKLOCK; - LDAXRH(x1, wback); - emit_or16(dyn, ninst, x1, x5, x3, x4); - STLXRH(x3, x1, wback); - CBNZx_MARKLOCK(x3); + if(arm64_atomics) { + UFLAG_IF { + LDSETALH(x5, x1, wback); + emit_or16(dyn, ninst, x1, x5, x3, x4); + } else { + STSETLH(x5, wback); + } + } else { + MARKLOCK; + LDAXRH(x1, wback); + emit_or16(dyn, ninst, x1, x5, x3, x4); + STLXRH(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } } break; case 2: //ADC @@ -293,12 +325,23 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?2:1); if(opcode==0x81) i32 = F16S; else i32 = F8S; - MOV32w(x5, i32); - MARKLOCK; - LDAXRH(x1, wback); - emit_and16(dyn, ninst, x1, x5, x3, x4); - STLXRH(x3, x1, wback); - CBNZx_MARKLOCK(x3); + if(arm64_atomics) { + MOV32w(x5, ~i32); + UFLAG_IF { + LDCLRALH(x5, x1, wback); + MVNw_REG(x5, x5); + emit_and16(dyn, ninst, x1, x5, x3, x4); + } else { + STCLRLH(x5, wback); + } + } else { + MOV32w(x5, i32); + MARKLOCK; + LDAXRH(x1, wback); + emit_and16(dyn, ninst, x1, x5, x3, x4); + STLXRH(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } } break; case 5: //SUB @@ -317,11 +360,21 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n MOV32w(x5, i32); TSTx_mask(wback, 1, 0, 0); // mask=1 B_MARK(cNE); - MARKLOCK; - LDAXRH(x1, wback); - emit_sub16(dyn, ninst, x1, x5, x3, x4); - STLXRH(x3, x1, wback); - CBNZx_MARKLOCK(x3); + if(arm64_atomics) { + NEGw_REG(x4, x5); + UFLAG_IF { + LDADDALH(x4, x1, wback); + emit_sub16(dyn, ninst, x1, x5, x3, x4); + } else { + STADDLH(x4, wback); + } + } else { + MARKLOCK; + LDAXRH(x1, wback); + emit_sub16(dyn, ninst, x1, x5, x3, x4); + STLXRH(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } B_NEXT_nocond; MARK; // unaligned! also, not enough LDRH_U12(x1, wback, 0); @@ -347,11 +400,20 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?2:1); if(opcode==0x81) i32 = F16S; else i32 = F8S; MOV32w(x5, i32); - MARKLOCK; - LDAXRH(x1, wback); - emit_xor16(dyn, ninst, x1, x5, x3, x4); - STLXRH(x3, x1, wback); - CBNZx_MARKLOCK(x3); + if(arm64_atomics) { + UFLAG_IF { + LDEORALH(x5, x1, wback); + emit_xor16(dyn, ninst, x1, x5, x3, x4); + } else { + STEORLH(x5, wback); + } + } else { + MARKLOCK; + LDAXRH(x1, wback); + emit_xor16(dyn, ninst, x1, x5, x3, x4); + STLXRH(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } } break; case 7: //CMP @@ -387,13 +449,22 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n BFIx(ed, x6, 0, 16); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - MARKLOCK; - LDAXRH(x1, wback); - emit_inc16(dyn, ninst, x1, x3, x4); - STLXRH(x3, x1, wback); - CBNZx_MARKLOCK(x3); + if(arm64_atomics) { + MOV32w(x3, 1); + UFLAG_IF { + LDADDALH(x3, x1, wback); + emit_inc16(dyn, ninst, x1, x3, x4); + } else { + STADDLH(x3, wback); + } + } else { + MARKLOCK; + LDAXRH(x1, wback); + emit_inc16(dyn, ninst, x1, x3, x4); + STLXRH(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } } - SMDMB(); break; case 1: //DEC Ew INST_NAME("LOCK DEC Ew"); @@ -406,13 +477,22 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n BFIx(ed, x6, 0, 16); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - MARKLOCK; - LDAXRH(x1, wback); - emit_dec16(dyn, ninst, x1, x3, x4); - STLXRH(x3, x1, wback); - CBNZx_MARKLOCK(x3); + if(arm64_atomics) { + MOV32w(x3, -1); + UFLAG_IF { + LDADDALH(x3, x1, wback); + emit_dec16(dyn, ninst, x1, x3, x4); + } else { + STADDLH(x3, wback); + } + } else { + MARKLOCK; + LDAXRH(x1, wback); + emit_dec16(dyn, ninst, x1, x3, x4); + STLXRH(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } } - SMDMB(); break; default: DEFAULT; diff --git a/src/dynarec/arm64/dynarec_arm64_f0.c b/src/dynarec/arm64/dynarec_arm64_f0.c index 9f43f6b8..b51c4f47 100644 --- a/src/dynarec/arm64/dynarec_arm64_f0.c +++ b/src/dynarec/arm64/dynarec_arm64_f0.c @@ -70,13 +70,19 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin BFIx(wback, x1, wb2*8, 8); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - MARKLOCK; - LDAXRB(x1, wback); - emit_add8(dyn, ninst, x1, x2, x4, x5); - STLXRB(x4, x1, wback); - CBNZx_MARKLOCK(x4); + if(arm64_atomics) { + LDADDALB(x2, x1, wback); + UFLAG_IF { + emit_add8(dyn, ninst, x1, x2, x4, x5); + } + } else { + MARKLOCK; + LDAXRB(x1, wback); + emit_add8(dyn, ninst, x1, x2, x4, x5); + STLXRB(x4, x1, wback); + CBNZx_MARKLOCK(x4); + } } - SMDMB(); break; case 0x01: INST_NAME("LOCK ADD Ed, Gd"); @@ -89,13 +95,19 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin emit_add32(dyn, ninst, rex, ed, gd, x3, x4); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - MARKLOCK; - LDAXRxw(x1, wback); - emit_add32(dyn, ninst, rex, x1, gd, x3, x4); - STLXRxw(x3, x1, wback); - CBNZx_MARKLOCK(x3); + if(arm64_atomics) { + LDADDALxw(gd, x1, wback); + UFLAG_IF { + emit_add32(dyn, ninst, rex, x1, gd, x3, x4); + } + } else { + MARKLOCK; + LDAXRxw(x1, wback); + emit_add32(dyn, ninst, rex, x1, gd, x3, x4); + STLXRxw(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } } - SMDMB(); break; case 0x08: @@ -118,13 +130,19 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin BFIx(wback, x1, wb2*8, 8); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - MARKLOCK; - LDAXRB(x1, wback); - emit_or8(dyn, ninst, x1, x2, x4, x5); - STLXRB(x4, x1, wback); - CBNZx_MARKLOCK(x4); + if(arm64_atomics) { + LDSETALB(x2, x1, wback); + UFLAG_IF { + emit_or8(dyn, ninst, x1, x2, x4, x5); + } + } else { + MARKLOCK; + LDAXRB(x1, wback); + emit_or8(dyn, ninst, x1, x2, x4, x5); + STLXRB(x4, x1, wback); + CBNZx_MARKLOCK(x4); + } } - SMDMB(); break; case 0x09: INST_NAME("LOCK OR Ed, Gd"); @@ -137,13 +155,19 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin emit_or32(dyn, ninst, rex, ed, gd, x3, x4); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - MARKLOCK; - LDAXRxw(x1, wback); - emit_or32(dyn, ninst, rex, x1, gd, x3, x4); - STLXRxw(x3, x1, wback); - CBNZx_MARKLOCK(x3); + if(arm64_atomics) { + LDSETALxw(gd, x1, wback); + UFLAG_IF { + emit_or32(dyn, ninst, rex, x1, gd, x3, x4); + } + } else { + MARKLOCK; + LDAXRxw(x1, wback); + emit_or32(dyn, ninst, rex, x1, gd, x3, x4); + STLXRxw(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } } - SMDMB(); break; case 0x0F: @@ -231,15 +255,19 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin B_NEXT_nocond; } else { addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - MARKLOCK; - LDAXRB(x2, wback); - CMPSxw_REG(x6, x2); - B_MARK(cNE); - // EAX == Ed - STLXRB(x4, gd, wback); - CBNZx_MARKLOCK(x4); - // done - MARK; + if(arm64_atomics) { + CASALB(x6, x2, wback); + } else { + MARKLOCK; + LDAXRB(x2, wback); + CMPSxw_REG(x6, x2); + B_MARK(cNE); + // EAX == Ed + STLXRB(x4, gd, wback); + CBNZx_MARKLOCK(x4); + // done + MARK; + } UFLAG_IF {emit_cmp8(dyn, ninst, x6, x2, x3, x4, x5);} BFIx(xRAX, x2, 0, 8); } @@ -256,7 +284,6 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; GETGD; - SMDMB(); if(MODREG) { ed = xRAX+(nextop&7)+(rex.b<<3); wback = 0; @@ -273,15 +300,27 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 B_MARK3(cNE); // Aligned version - MARKLOCK; - LDAXRxw(x1, wback); - CMPSxw_REG(xRAX, x1); - B_MARK(cNE); - // EAX == Ed - STLXRxw(x4, gd, wback); - CBNZx_MARKLOCK(x4); - // done - B_MARK_nocond; + if(arm64_atomics) { + UFLAG_IF { + MOVxw_REG(x1, xRAX); + CASALxw(x1, gd, wback); + emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5); + MOVxw_REG(xRAX, x1); + } else { + CASALxw(xRAX, gd, wback); + } + B_NEXT_nocond; + } else { + MARKLOCK; + LDAXRxw(x1, wback); + CMPSxw_REG(xRAX, x1); + B_MARK(cNE); + // EAX == Ed + STLXRxw(x4, gd, wback); + CBNZx_MARKLOCK(x4); + // done + B_MARK_nocond; + } // Unaligned version MARK3; LDRxw_U12(x1, wback, 0); @@ -292,12 +331,12 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin STLXRB(x4, gd, wback); CBNZx_MARK3(x4); STRxw_U12(gd, wback, 0); + SMDMB(); MARK; // Common part (and fallback for EAX != Ed) UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5);} MOVxw_REG(xRAX, x1); // upper par of RAX will be erase on 32bits, no mater what } - SMDMB(); break; default: DEFAULT; @@ -358,7 +397,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0xC1: switch(rep) { case 0: - INST_NAME("LOCK XADD Gd, Ed"); + INST_NAME("LOCK XADD Ed, Gd"); SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; GETGD; @@ -373,12 +412,23 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 B_MARK(cNE); // unaligned - MARKLOCK; - LDAXRxw(x1, wback); - ADDxw_REG(x4, x1, gd); - STLXRxw(x3, x4, wback); - CBNZx_MARKLOCK(x3); - B_MARK2_nocond; + if(arm64_atomics) { + UFLAG_IF { + MOVxw_REG(x3, gd); + LDADDALxw(x3, gd, wback); + emit_add32(dyn, ninst, rex, x3, gd, x4, x5); + } else { + LDADDALxw(gd, gd, wback); + } + B_NEXT_nocond; + } else { + MARKLOCK; + LDAXRxw(x1, wback); + ADDxw_REG(x4, x1, gd); + STLXRxw(x3, x4, wback); + CBNZx_MARKLOCK(x3); + B_MARK2_nocond; + } MARK; LDRxw_U12(x1, wback, 0); LDAXRB(x4, wback); @@ -387,6 +437,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin STLXRB(x3, x4, wback); CBNZx_MARK(x3); STRxw_U12(x4, wback, 0); + SMDMB(); MARK2; IFX(X_ALL|X_PEND) { MOVxw_REG(x2, x1); @@ -394,7 +445,6 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } MOVxw_REG(gd, x1); } - SMDMB(); break; default: DEFAULT; @@ -408,24 +458,44 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_ZF, SF_SUBSET); nextop = F8; addr = geted(dyn, addr, ninst, nextop, &wback, x1, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - SMDMB(); - MARKLOCK; - LDAXPxw(x2, x3, wback); - CMPSxw_REG(xRAX, x2); - B_MARK(cNE); // EAX != Ed[0] - CMPSxw_REG(xRDX, x3); - B_MARK(cNE); // EDX != Ed[1] - STLXPxw(x4, xRBX, xRCX, wback); - CBNZx_MARKLOCK(x4); - MOV32w(x1, 1); - B_MARK3_nocond; - MARK; - MOVxw_REG(xRAX, x2); - MOVxw_REG(xRDX, x3); - MOV32w(x1, 0); + if(arm64_atomics) { + MOVx_REG(x2, xRAX); + MOVx_REG(x3, xRDX); + MOVx_REG(x4, xRBX); + MOVx_REG(x5, xRCX); + CASPALxw(x2, x4, wback); + UFLAG_IF { + CMPSxw_REG(x2, xRAX); + CSETw(x1, cEQ); + CMPSxw_REG(x3, xRDX); + CSETw(x2, cEQ); + ANDw_REG(x1, x1, x2); + } + B_MARK3_nocond; + } else { + MARKLOCK; + LDAXPxw(x2, x3, wback); + CMPSxw_REG(xRAX, x2); + B_MARK(cNE); // EAX != Ed[0] + CMPSxw_REG(xRDX, x3); + B_MARK(cNE); // EDX != Ed[1] + STLXPxw(x4, xRBX, xRCX, wback); + CBNZx_MARKLOCK(x4); + UFLAG_IF { + MOV32w(x1, 1); + } + B_MARK3_nocond; + MARK; + MOVxw_REG(xRAX, x2); + MOVxw_REG(xRDX, x3); + UFLAG_IF { + MOV32w(x1, 0); + } + } MARK3; - SMDMB(); - BFIw(xFlags, x1, F_ZF, 1); + UFLAG_IF { + BFIw(xFlags, x1, F_ZF, 1); + } break; default: DEFAULT; @@ -490,19 +560,27 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_ALL, SF_SET_PENDING); nextop = F8; GETGD; - SMDMB(); if(MODREG) { ed = xRAX+(nextop&7)+(rex.b<<3); emit_and32(dyn, ninst, rex, ed, gd, x3, x4); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - MARKLOCK; - LDAXRxw(x1, wback); - emit_and32(dyn, ninst, rex, x1, gd, x3, x4); - STLXRxw(x3, x1, wback); - CBNZx_MARKLOCK(x3); + if(arm64_atomics) { + MVNxw_REG(x1, gd); + UFLAG_IF { + LDCLRALxw(x1, x1, wback); + emit_and32(dyn, ninst, rex, x1, gd, x3, x4); + } else { + STCLRLxw(x1, wback); + } + } else { + MARKLOCK; + LDAXRxw(x1, wback); + emit_and32(dyn, ninst, rex, x1, gd, x3, x4); + STLXRxw(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } } - SMDMB(); break; case 0x29: @@ -516,13 +594,22 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin emit_sub32(dyn, ninst, rex, ed, gd, x3, x4); } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - MARKLOCK; - LDAXRxw(x1, wback); - emit_sub32(dyn, ninst, rex, x1, gd, x3, x4); - STLXRxw(x3, x1, wback); - CBNZx_MARKLOCK(x3); + if(arm64_atomics) { + NEGxw_REG(x1, gd); + UFLAG_IF { + LDADDALxw(x1, x1, wback); + emit_sub32(dyn, ninst, rex, x1, gd, x3, x4); + } else { + STADDLxw(x1, wback); + } + } else { + MARKLOCK; + LDAXRxw(x1, wback); + emit_sub32(dyn, ninst, rex, x1, gd, x3, x4); + STLXRxw(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } } - SMDMB(); break; case 0x66: @@ -545,11 +632,22 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 1); u8 = F8; wb1 = 1; - MARKLOCK; - LDAXRB(x1, wback); - emit_add8c(dyn, ninst, x1, u8, x2, x4); - STLXRB(x3, x1, wback); - CBNZx_MARKLOCK(x3); + if(arm64_atomics) { + MOV32w(x2, u8); + UFLAG_IF { + LDADDALB(x2, x1, wback); + emit_add8(dyn, ninst, x1, x2, x3, x4); + } else { + STADDB(x2, wback); + } + + } else { + MARKLOCK; + LDAXRB(x1, wback); + emit_add8c(dyn, ninst, x1, u8, x2, x4); + STLXRB(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } } break; case 1: //OR @@ -565,11 +663,21 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 1); u8 = F8; wb1 = 1; - MARKLOCK; - LDAXRB(x1, wback); - emit_or8c(dyn, ninst, x1, u8, x2, x4); - STLXRB(x3, x1, wback); - CBNZx_MARKLOCK(x3); + if(arm64_atomics) { + MOV32w(x2, u8); + UFLAG_IF { + LDSETALB(x2, x1, wback); + emit_or8(dyn, ninst, x1, x2, x3, x4); + } else { + STSETLB(x2, wback); + } + } else { + MARKLOCK; + LDAXRB(x1, wback); + emit_or8c(dyn, ninst, x1, u8, x2, x4); + STLXRB(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } } break; case 2: //ADC @@ -627,11 +735,21 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 1); u8 = F8; wb1 = 1; - MARKLOCK; - LDAXRB(x1, wback); - emit_and8c(dyn, ninst, x1, u8, x2, x4); - STLXRB(x3, x1, wback); - CBNZx_MARKLOCK(x3); + if(arm64_atomics) { + MOV32w(x2, ~u8); + UFLAG_IF { + LDCLRALB(x2, x1, wback); + emit_and8c(dyn, ninst, x1, u8, x2, x4); + } else { + STCLRLB(x2, wback); + } + } else { + MARKLOCK; + LDAXRB(x1, wback); + emit_and8c(dyn, ninst, x1, u8, x2, x4); + STLXRB(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } } break; case 5: //SUB @@ -647,11 +765,21 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 1); u8 = F8; wb1 = 1; - MARKLOCK; - LDAXRB(x1, wback); - emit_sub8c(dyn, ninst, x1, u8, x2, x4, x3); - STLXRB(x3, x1, wback); - CBNZx_MARKLOCK(x3); + if(arm64_atomics) { + MOV32w(x2, -u8); + UFLAG_IF { + LDADDALB(x2, x1, wback); + emit_sub8c(dyn, ninst, x1, u8, x2, x4, x3); + } else { + STADDLB(x2, wback); + } + } else { + MARKLOCK; + LDAXRB(x1, wback); + emit_sub8c(dyn, ninst, x1, u8, x2, x4, x3); + STLXRB(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } } break; case 6: //XOR @@ -667,11 +795,21 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin addr = geted(dyn, addr, ninst, nextop, &wback, x5, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 1); u8 = F8; wb1 = 1; - MARKLOCK; - LDAXRB(x1, wback); - emit_xor8c(dyn, ninst, x1, u8, x2, x4); - STLXRB(x3, x1, wback); - CBNZx_MARKLOCK(x3); + if(arm64_atomics) { + MOV32w(x2, u8); + UFLAG_IF { + LDEORALB(x2, x1, wback); + emit_xor8(dyn, ninst, x1, x2, x3, x4); + } else { + STEORLB(x2, wback); + } + } else { + MARKLOCK; + LDAXRB(x1, wback); + emit_xor8c(dyn, ninst, x1, u8, x2, x4); + STLXRB(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } } break; case 7: //CMP @@ -694,7 +832,6 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0x81: case 0x83: nextop = F8; - SMDMB(); switch((nextop>>3)&7) { case 0: //ADD if(opcode==0x81) { @@ -713,12 +850,21 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(opcode==0x81) i64 = F32S; else i64 = F8S; TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 B_MARK(cNE); - MARKLOCK; - LDAXRxw(x1, wback); - emit_add32c(dyn, ninst, rex, x1, i64, x3, x4, x5); - STLXRxw(x3, x1, wback); - CBNZx_MARKLOCK(x3); - SMDMB(); + if(arm64_atomics) { + MOV64xw(x3, i64); + UFLAG_IF { + LDADDALxw(x3, x1, wback); + emit_add32(dyn, ninst, rex, x1, x3, x4, x5); + } else { + STADDLxw(x3, wback); + } + } else { + MARKLOCK; + LDAXRxw(x1, wback); + emit_add32c(dyn, ninst, rex, x1, i64, x3, x4, x5); + STLXRxw(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } B_NEXT_nocond; MARK; // unaligned! also, not enough LDRxw_U12(x1, wback, 0); @@ -728,6 +874,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin STLXRB(x3, x1, wback); CBNZx_MARK(x3); STRxw_U12(x1, wback, 0); // put the whole value + SMDMB(); } break; case 1: //OR @@ -742,11 +889,20 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1); if(opcode==0x81) i64 = F32S; else i64 = F8S; MOV64xw(x5, i64); - MARKLOCK; - LDAXRxw(x1, wback); - emit_or32(dyn, ninst, rex, x1, x5, x3, x4); - STLXRxw(x3, x1, wback); - CBNZx_MARKLOCK(x3); + if(arm64_atomics) { + UFLAG_IF { + LDSETALxw(x5, x1, wback); + emit_or32(dyn, ninst, rex, x1, x5, x3, x4); + } else { + STSETLxw(x5, wback); + } + } else { + MARKLOCK; + LDAXRxw(x1, wback); + emit_or32(dyn, ninst, rex, x1, x5, x3, x4); + STLXRxw(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } } break; case 2: //ADC @@ -800,12 +956,23 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1); if(opcode==0x81) i64 = F32S; else i64 = F8S; - MOV64xw(x5, i64); - MARKLOCK; - LDAXRxw(x1, wback); - emit_and32(dyn, ninst, rex, x1, x5, x3, x4); - STLXRxw(x3, x1, wback); - CBNZx_MARKLOCK(x3); + if(arm64_atomics) { + MOV64xw(x5, ~i64); + UFLAG_IF { + LDCLRALxw(x5, x1, wback); + MVNxw_REG(x5, x5); + emit_and32(dyn, ninst, rex, x1, x5, x3, x4); + } else { + STCLRLxw(x5, wback); + } + } else { + MOV64xw(x5, i64); + MARKLOCK; + LDAXRxw(x1, wback); + emit_and32(dyn, ninst, rex, x1, x5, x3, x4); + STLXRxw(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } } break; case 5: //SUB @@ -821,12 +988,22 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(opcode==0x81) i64 = F32S; else i64 = F8S; TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 B_MARK(cNE); - MARKLOCK; - LDAXRxw(x1, wback); - emit_sub32c(dyn, ninst, rex, x1, i64, x3, x4, x5); - STLXRxw(x3, x1, wback); - CBNZx_MARKLOCK(x3); - SMDMB(); + if(arm64_atomics) { + MOV64xw(x5, -i64); + UFLAG_IF { + LDADDALxw(x5, x1, wback); + NEGxw_REG(x5, x5); + emit_sub32(dyn, ninst, rex, x1, x5, x3, x4); + } else { + STADDLxw(x5, wback); + } + } else { + MARKLOCK; + LDAXRxw(x1, wback); + emit_sub32c(dyn, ninst, rex, x1, i64, x3, x4, x5); + STLXRxw(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } B_NEXT_nocond; MARK; // unaligned! also, not enough LDRxw_U12(x1, wback, 0); @@ -836,6 +1013,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin STLXRB(x3, x1, wback); CBNZx_MARK(x3); STRxw_U12(x1, wback, 0); // put the whole value + SMDMB(); } break; case 6: //XOR @@ -850,11 +1028,20 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1); if(opcode==0x81) i64 = F32S; else i64 = F8S; MOV64xw(x5, i64); - MARKLOCK; - LDAXRxw(x1, wback); - emit_xor32(dyn, ninst, rex, x1, x5, x3, x4); - STLXRxw(x3, x1, wback); - CBNZx_MARKLOCK(x3); + if(arm64_atomics) { + UFLAG_IF { + LDEORALxw(x5, x1, wback); + emit_xor32(dyn, ninst, rex, x1, x5, x3, x4); + } else { + STEORLxw(x5, wback); + } + } else { + MARKLOCK; + LDAXRxw(x1, wback); + emit_xor32(dyn, ninst, rex, x1, x5, x3, x4); + STLXRxw(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } } break; case 7: //CMP @@ -871,7 +1058,6 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } break; } - SMDMB(); break; case 0x86: @@ -897,13 +1083,17 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SMDMB(); GETGB(x4); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - MARKLOCK; - // do the swap with exclusive locking - LDAXRB(x1, ed); - // do the swap 14 -> strb(ed), 1 -> gd - STLXRB(x3, x4, ed); - CBNZx_MARKLOCK(x3); - SMDMB(); + if(arm64_atomics) { + SWPALB(x4, x1, ed); + } else { + MARKLOCK; + // do the swap with exclusive locking + LDAXRB(x1, ed); + // do the swap 14 -> strb(ed), 1 -> gd + STLXRB(x3, x4, ed); + CBNZx_MARKLOCK(x3); + SMDMB(); + } BFIx(gb1, x1, gb2, 8); } break; @@ -922,13 +1112,21 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); TSTx_mask(ed, 1, 0, 1+rex.w); // mask=3 or 7 B_MARK(cNE); - MARKLOCK; - LDAXRxw(x1, ed); - STLXRxw(x3, gd, ed); - CBNZx_MARKLOCK(x3); - B_MARK2_nocond; + if(arm64_atomics) { + SWPALxw(gd, gd, ed); + B_NEXT_nocond; + } else { + MARKLOCK; + LDAXRxw(x1, ed); + STLXRxw(x3, gd, ed); + CBNZx_MARKLOCK(x3); + B_MARK2_nocond; + } MARK; LDRxw_U12(x1, ed, 0); + LDAXRB(x3, ed); + STLXRB(x3, gd, ed); + CBNZx_MARK(x3); STRxw_U12(gd, ed, 0); MARK2; SMDMB(); @@ -986,11 +1184,20 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); MARKLOCK; - LDAXRB(x1, wback); - emit_inc8(dyn, ninst, x1, x3, x4); - STLXRB(x3, x1, wback); - CBNZx_MARKLOCK(x3); - SMDMB(); + if(arm64_atomics) { + MOV32w(x3, 1); + UFLAG_IF { + LDADDALB(x3, x1, wback); + emit_inc8(dyn, ninst, x1, x3, x4); + } else { + STADDLB(x3, wback); + } + } else { + LDAXRB(x1, wback); + emit_inc8(dyn, ninst, x1, x3, x4); + STLXRB(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } } break; case 1: //DEC Ed @@ -1003,12 +1210,21 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin EBBACK; } else { addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); - MARKLOCK; - LDAXRB(x1, wback); - emit_dec8(dyn, ninst, x1, x3, x4); - STLXRB(x3, x1, wback); - CBNZx_MARKLOCK(x3); - SMDMB(); + if(arm64_atomics) { + MOV32w(x3, -1); + UFLAG_IF { + LDADDALB(x3, x1, wback); + emit_inc8(dyn, ninst, x1, x3, x4); + } else { + STADDLB(x3, wback); + } + } else { + MARKLOCK; + LDAXRB(x1, wback); + emit_dec8(dyn, ninst, x1, x3, x4); + STLXRB(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } } break; default: @@ -1030,12 +1246,21 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0); TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 B_MARK(cNE); // unaligned - MARKLOCK; - LDAXRxw(x1, wback); - emit_inc32(dyn, ninst, rex, x1, x3, x4); - STLXRxw(x3, x1, wback); - CBNZx_MARKLOCK(x3); - SMDMB(); + if(arm64_atomics) { + MOV32w(x3, 1); + UFLAG_IF { + LDADDALxw(x3, x1, wback); + emit_inc32(dyn, ninst, rex, x1, x3, x4); + } else { + STADDLxw(x3, wback); + } + } else { + MARKLOCK; + LDAXRxw(x1, wback); + emit_inc32(dyn, ninst, rex, x1, x3, x4); + STLXRxw(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } B_NEXT_nocond; MARK; LDRxw_U12(x1, wback, 0); @@ -1060,11 +1285,20 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin TSTx_mask(wback, 1, 0, 1+rex.w); // mask=3 or 7 B_MARK(cNE); // unaligned MARKLOCK; - LDAXRxw(x1, wback); - emit_dec32(dyn, ninst, rex, x1, x3, x4); - STLXRxw(x3, x1, wback); - CBNZx_MARKLOCK(x3); - SMDMB(); + if(arm64_atomics) { + MOV32w(x3, -1); + UFLAG_IF { + LDADDALxw(x3, x1, wback); + emit_dec32(dyn, ninst, rex, x1, x3, x4); + } else { + STADDLxw(x3, wback); + } + } else { + LDAXRxw(x1, wback); + emit_dec32(dyn, ninst, rex, x1, x3, x4); + STLXRxw(x3, x1, wback); + CBNZx_MARKLOCK(x3); + } B_NEXT_nocond; MARK; LDRxw_U12(x1, wback, 0); |