diff options
| author | Christian Krinitsin <mail@krinitsin.com> | 2025-07-17 09:10:43 +0200 |
|---|---|---|
| committer | Christian Krinitsin <mail@krinitsin.com> | 2025-07-17 09:10:43 +0200 |
| commit | f2ec263023649e596c5076df32c2d328bc9393d2 (patch) | |
| tree | 5dd86caab46e552bd2e62bf9c4fb1a7504a44db4 /results/scraper/fex/2818 | |
| parent | 63d2e9d409831aa8582787234cae4741847504b7 (diff) | |
| download | qemu-analysis-main.tar.gz qemu-analysis-main.zip | |
Diffstat (limited to 'results/scraper/fex/2818')
| -rw-r--r-- | results/scraper/fex/2818 | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/results/scraper/fex/2818 b/results/scraper/fex/2818 new file mode 100644 index 000000000..04745f6db --- /dev/null +++ b/results/scraper/fex/2818 @@ -0,0 +1,55 @@ +PSAD has unnecessary moves +Can be seen in `OpSize/66_F6.asm` + +```asm +adr x0, #-0x4 (addr 0x7fffe2080844) +str x0, [x28, #184] +ldr q4, [x6, #240] +uabdl v5.8h, v24.8b, v4.8b +uabdl2 v4.8h, v24.16b, v4.16b +addv h5, v5.8h +addv h4, v4.8h +mov v0.16b, v5.16b +mov v0.d[1], v4.d[0] +mov v24.16b, v0.16b +ldr x0, pc+8 (addr 0x7fffe2080878) +blr x0 +``` + +The three moves at the end are unnecessary and can be implemented as follows. +```asm +adr x0, #-0x4 (addr 0x7fffe2080844) +str x0, [x28, #184] +ldr q4, [x6, #240] +uabdl v5.8h, v24.8b, v4.8b +uabdl2 v4.8h, v24.16b, v4.16b +addv v24.16b, v5.8h +addv h4, v4.8h +mov v24.d[1], v4.d[0] +ldr x0, pc+8 (addr 0x7fffe2080878) +blr x0 +``` + +IR: +``` + (%2) CodeBlock %3, %17 + (%3 i0) BeginBlock %2(Invalid) + (%4 i0) GuestOpcode #0x0 + %5(FPRFixed8) i128 = LoadRegister #0x0, #0x140, FPR, FPRFixed, u8:Tmp:Size + %6(GPRFixed2) i64 = LoadRegister #0x0, #0x18, GPR, GPRFixed, u8:Tmp:Size + (%7 i64) InlineConstant #0xf0 + %8(FPR0) i128 = LoadMem FPR, u8:Tmp:Size, %6(GPRFixed2) i64, %7(Invalid), #0x10, SXTX, #0x1 + %9(FPR1) i16v8 = VUABDL u8:Tmp:RegisterSize, u8:Tmp:ElementSize, %5(FPRFixed8) i128, %8(FPR0) i128 + %10(FPR0) i16v8 = VUABDL2 u8:Tmp:RegisterSize, u8:Tmp:ElementSize, %5(FPRFixed8) i128, %8(FPR0) i128 + %11(FPR1) i16v8 = VAddV u8:Tmp:RegisterSize, u8:Tmp:ElementSize, %9(FPR1) i16v8 + %12(FPR0) i16v8 = VAddV u8:Tmp:RegisterSize, u8:Tmp:ElementSize, %10(FPR0) i16v8 + %13(FPRFixed8) i64v2 = VInsElement u8:Tmp:RegisterSize, u8:Tmp:ElementSize, #0x1, #0x0, %11(FPR1) i16v8, %12(FPR0) i16v8 + (%14 i128) StoreRegister %13(FPRFixed8) i64v2, #0x0, #0x140, FPR, FPRFixed, u8:Tmp:Size + (%15 i64) InlineEntrypointOffset #0x9, u8:Tmp:RegisterSize + (%16 i64) ExitFunction %15(Invalid) + (%17 i0) EndBlock %2(Invalid) +``` + +This is because the first `VAddV` doesn't understand it can get stored in to the vector. Which means the `VInsertElement` does a mov+mov+mov. + +Care must be taken as usual that if the destination is to memory, that we don't turn the store in to multiple memory stores. \ No newline at end of file |