diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2023-03-16 14:43:53 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-03-16 07:43:53 +0100 |
| commit | e579450af61a2b953b6309c5a3aad4a3d94a8393 (patch) | |
| tree | 061d1c76e4113ac1ba7d58ee1bc31953addb8e64 /src | |
| parent | 50e463917ca388d1fd7285ce0b12476963e97c0f (diff) | |
| download | box64-e579450af61a2b953b6309c5a3aad4a3d94a8393.tar.gz box64-e579450af61a2b953b6309c5a3aad4a3d94a8393.zip | |
[RV64_DYNAREC] Optimize MOV64 emitter (#572)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 65 | ||||
| -rw-r--r-- | src/include/bitutils.h | 7 | ||||
| -rw-r--r-- | src/tools/bitutils.c | 17 |
3 files changed, 48 insertions, 41 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index 204b891d..02be87d5 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -5,6 +5,7 @@ #include <errno.h> #include <assert.h> +#include "bitutils.h" #include "debug.h" #include "box64context.h" #include "dynarec.h" @@ -518,57 +519,39 @@ void fpu_popcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07) void rv64_move32(dynarec_rv64_t* dyn, int ninst, int reg, int32_t val) { - int32_t up=((val+0x800)>>12); - int32_t r = val&0xfff; - LUI(reg, up); - if(r) { - ADDI(reg, reg, r); + // Depending on val, the following insns are emitted. + // val == 0 -> ADDI + // lo12 != 0 && hi20 == 0 -> ADDI + // lo12 == 0 && hi20 != 0 -> LUI + // else -> LUI+ADDI + int32_t hi20 = (val+0x800)>>12 & 0xfffff; + int32_t lo12 = val&0xfff; + + int src = xZR; + if (hi20) { + LUI(reg, hi20); + src = reg; } + if (lo12 || !hi20) ADDI(reg, src, lo12); } void rv64_move64(dynarec_rv64_t* dyn, int ninst, int reg, int64_t val) { - if(((val<<(64-12))>>(64-12))==val) { - // simple 12bit value - MOV_U12(reg, (val&0xfff)); - return; - } if(((val<<32)>>32)==val) { // 32bits value rv64_move32(dyn, ninst, reg, val); return; } - if((val&0xffffffffLL)==val && (val&0x80000000)) { - // 32bits value, but with a sign bit - rv64_move32(dyn, ninst, reg, val); - ZEROUP(reg); - return; - } - //TODO: optimize that later - // Start with the upper 32bits - rv64_move32(dyn, ninst, reg, val>>32); - // now the lower part - uint32_t r = val&0xffffffff; - int s = 11; - if((r>>21)&0b11111111111) { - SLLI(reg, reg, s); - ORI(reg, reg, (r>>21)&0b11111111111); - s = 0; - } - s+=11; - if((r>>10)&0b11111111111) { - SLLI(reg, reg, s); - ORI(reg, reg, (r>>10)&0b11111111111); - s = 0; - } - s+=10; - if(r&0b1111111111) { - SLLI(reg, reg, s); - ORI(reg, reg, r&0b1111111111); - s=0; - } - if(s) { - SLLI(reg, reg, s); + + int64_t lo12 = (val<<52)>>52; + int64_t hi52 = (val+0x800)>>12; + int shift = 12+TrailingZeros64((uint64_t)hi52); + hi52 = ((hi52>>(shift-12))<<shift)>>shift; + rv64_move64(dyn, ninst, reg, hi52); + SLLI(reg, reg, shift); + + if (lo12) { + ADDI(reg, reg, lo12); } } diff --git a/src/include/bitutils.h b/src/include/bitutils.h new file mode 100644 index 00000000..075e8e0b --- /dev/null +++ b/src/include/bitutils.h @@ -0,0 +1,7 @@ +#ifndef __BITUTILS_H_ +#define __BITUTILS_H_ +#include <stdint.h> + +int TrailingZeros64(uint64_t x); + +#endif //__BITUTILS_H_ diff --git a/src/tools/bitutils.c b/src/tools/bitutils.c new file mode 100644 index 00000000..518085c2 --- /dev/null +++ b/src/tools/bitutils.c @@ -0,0 +1,17 @@ +#include <stdint.h> + +const uint64_t deBruijn64 = 0x03f79d71b4ca8b09; +const uint8_t deBruijn64tab[64] = { + 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4, + 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5, + 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11, + 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6, +}; + +int TrailingZeros64(uint64_t x) { + if (x == 0) { + return 64; + } + + return (int)deBruijn64tab[(x&-x)*deBruijn64>>(64-6)]; +} |