From 6fea5e5854646f891fe601bc58aa9e33c596f8f5 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Sun, 21 Mar 2021 09:00:51 +0100 Subject: [DYNAREC] Optimized prolog/epilog/next with ldp/stp --- src/dynarec/arm64_epilog.S | 51 ++++++++++++++++------------------------------ src/dynarec/arm64_next.S | 32 ++++++++++------------------- src/dynarec/arm64_prolog.S | 39 ++++++++++++----------------------- 3 files changed, 41 insertions(+), 81 deletions(-) (limited to 'src') diff --git a/src/dynarec/arm64_epilog.S b/src/dynarec/arm64_epilog.S index 1a3b3ddc..eef0d906 100755 --- a/src/dynarec/arm64_epilog.S +++ b/src/dynarec/arm64_epilog.S @@ -9,34 +9,21 @@ .global arm64_epilog arm64_epilog: //update register -> emu - str x10, [x0, (8 * 0)] - str x11, [x0, (8 * 1)] - str x12, [x0, (8 * 2)] - str x13, [x0, (8 * 3)] - str x14, [x0, (8 * 4)] - str x15, [x0, (8 * 5)] - str x16, [x0, (8 * 6)] - str x17, [x0, (8 * 7)] - str x18, [x0, (8 * 8)] - str x19, [x0, (8 * 9)] - str x20, [x0, (8 * 10)] - str x21, [x0, (8 * 11)] - str x22, [x0, (8 * 12)] - str x23, [x0, (8 * 13)] - str x24, [x0, (8 * 14)] - str x25, [x0, (8 * 15)] - str x26, [x0, (8 * 16)] - str x27, [x0, (8 * 17)] // put back reg value in emu, including EIP (so x27 must be EIP now) + stp x10, x11, [x0, (8 * 0)] + stp x12, x13, [x0, (8 * 2)] + stp x14, x15, [x0, (8 * 4)] + stp x16, x17, [x0, (8 * 6)] + stp x18, x19, [x0, (8 * 8)] + stp x20, x21, [x0, (8 * 10)] + stp x22, x23, [x0, (8 * 12)] + stp x24, x25, [x0, (8 * 14)] + stp x26, x27, [x0, (8 * 16)] // put back reg value in emu, including EIP (so x27 must be EIP now) //restore all used register //vpop {d8-d15} - ldr x19, [sp, (8 * 0)] - ldr x20, [sp, (8 * 1)] - ldr x21, [sp, (8 * 2)] - ldr x22, [sp, (8 * 3)] - ldr x23, [sp, (8 * 4)] - ldr x24, [sp, (8 * 5)] - ldr x25, [sp, (8 * 6)] - ldr x26, [sp, (8 * 7)] + ldp x19, x20, [sp, (8 * 0)] + ldp x21, x22, [sp, (8 * 2)] + ldp x23, x24, [sp, (8 * 4)] + ldp x25, x26, [sp, (8 * 6)] ldr x27, [sp, (8 * 8)] add sp, sp, (8 * 10) ldp lr, fp, [sp], 16 // saved lr @@ -48,14 +35,10 @@ arm64_epilog: arm64_epilog_fast: //restore all used register //vpop {d8-d15} - ldr x19, [sp, (8 * 0)] - ldr x20, [sp, (8 * 1)] - ldr x21, [sp, (8 * 2)] - ldr x22, [sp, (8 * 3)] - ldr x23, [sp, (8 * 4)] - ldr x24, [sp, (8 * 5)] - ldr x25, [sp, (8 * 6)] - ldr x26, [sp, (8 * 7)] + ldp x19, x20, [sp, (8 * 0)] + ldp x21, x22, [sp, (8 * 2)] + ldp x23, x24, [sp, (8 * 4)] + ldp x25, x26, [sp, (8 * 6)] ldr x27, [sp, (8 * 8)] add sp, sp, (8 * 10) ldp lr, fp, [sp], 16 // saved lr diff --git a/src/dynarec/arm64_next.S b/src/dynarec/arm64_next.S index 47dfe737..cd60dda5 100755 --- a/src/dynarec/arm64_next.S +++ b/src/dynarec/arm64_next.S @@ -12,33 +12,23 @@ arm64_next: // emu is r0 // IP address is r1 - sub sp, sp, (8 * 12) - str x0, [sp, (8 * 0)] - str x1, [sp, (8 * 1)] - str x10, [sp, (8 * 2)] - str x11, [sp, (8 * 3)] - str x12, [sp, (8 * 4)] - str x13, [sp, (8 * 5)] - str x14, [sp, (8 * 6)] - str x15, [sp, (8 * 7)] - str x16, [sp, (8 * 8)] - str x17, [sp, (8 * 9)] + sub sp, sp, (8 * 12) + stp x0, x1, [sp, (8 * 0)] + stp x10, x11, [sp, (8 * 2)] + stp x12, x13, [sp, (8 * 4)] + stp x14, x15, [sp, (8 * 6)] + stp x16, x17, [sp, (8 * 8)] str x18, [sp, (8 * 10)] // call the function bl LinkNext // preserve return value mov x3, x0 // pop regs - ldr x0, [sp, (8 * 0)] - ldr x1, [sp, (8 * 1)] - ldr x10, [sp, (8 * 2)] - ldr x11, [sp, (8 * 3)] - ldr x12, [sp, (8 * 4)] - ldr x13, [sp, (8 * 5)] - ldr x14, [sp, (8 * 6)] - ldr x15, [sp, (8 * 7)] - ldr x16, [sp, (8 * 8)] - ldr x17, [sp, (8 * 9)] + ldp x0, x1, [sp, (8 * 0)] + ldp x10, x11, [sp, (8 * 2)] + ldp x12, x13, [sp, (8 * 4)] + ldp x14, x15, [sp, (8 * 6)] + ldp x16, x17, [sp, (8 * 8)] ldr x18, [sp, (8 * 10)] add sp, sp, (8 * 12) // return offset is jump address diff --git a/src/dynarec/arm64_prolog.S b/src/dynarec/arm64_prolog.S index 679af0ce..a583c081 100755 --- a/src/dynarec/arm64_prolog.S +++ b/src/dynarec/arm64_prolog.S @@ -11,34 +11,21 @@ arm64_prolog: //save all 18 used register stp lr, fp, [sp, -16]! // save lr sub sp, sp, (8 * 10) - str x19, [sp, (8 * 0)] - str x20, [sp, (8 * 1)] - str x21, [sp, (8 * 2)] - str x22, [sp, (8 * 3)] - str x23, [sp, (8 * 4)] - str x24, [sp, (8 * 5)] - str x25, [sp, (8 * 6)] - str x26, [sp, (8 * 7)] + stp x19, x20, [sp, (8 * 0)] + stp x21, x22, [sp, (8 * 2)] + stp x23, x24, [sp, (8 * 4)] + stp x25, x26, [sp, (8 * 6)] str x27, [sp, (8 * 8)] //vpush {d8-d15} // save NEON regs? //setup emu -> register - ldr x10, [x0, (8 * 0)] - ldr x11, [x0, (8 * 1)] - ldr x12, [x0, (8 * 2)] - ldr x13, [x0, (8 * 3)] - ldr x14, [x0, (8 * 4)] - ldr x15, [x0, (8 * 5)] - ldr x16, [x0, (8 * 6)] - ldr x17, [x0, (8 * 7)] - ldr x18, [x0, (8 * 8)] - ldr x19, [x0, (8 * 9)] - ldr x20, [x0, (8 * 10)] - ldr x21, [x0, (8 * 11)] - ldr x22, [x0, (8 * 12)] - ldr x23, [x0, (8 * 13)] - ldr x24, [x0, (8 * 14)] - ldr x25, [x0, (8 * 15)] - ldr x26, [x0, (8 * 16)] - ldr x27, [x0, (8 * 17)] + ldp x10, x11, [x0, (8 * 0)] + ldp x12, x13, [x0, (8 * 2)] + ldp x14, x15, [x0, (8 * 4)] + ldp x16, x17, [x0, (8 * 6)] + ldp x18, x19, [x0, (8 * 8)] + ldp x20, x21, [x0, (8 * 10)] + ldp x22, x23, [x0, (8 * 12)] + ldp x24, x25, [x0, (8 * 14)] + ldp x26, x27, [x0, (8 * 16)] //jump to function br x1 -- cgit 1.4.1