summary refs log tree commit diff stats
path: root/results/scraper/fex/2802
diff options
context:
space:
mode:
Diffstat (limited to 'results/scraper/fex/2802')
-rw-r--r--results/scraper/fex/280262
1 files changed, 62 insertions, 0 deletions
diff --git a/results/scraper/fex/2802 b/results/scraper/fex/2802
new file mode 100644
index 000000000..8bee7f580
--- /dev/null
+++ b/results/scraper/fex/2802
@@ -0,0 +1,62 @@
+CMPXCHG16B no longer produces optimal code
+This /used/ to produce optimal code before SRA was introduced, and maybe before some enums were rearranged.

+

+Test case:

+```asm

+%ifdef CONFIG

+{

+}

+%endif

+

+mov rsp, 0xe0001000

+cmpxchg16b [rsp]

+

+hlt

+```

+

+ASM for cmpxchg16b

+```asm

+0x00007f3ec2e80060  10ffffe0            adr x0, #-0x4 (addr 0x7f3ec2e8005c)

+0x00007f3ec2e80064  f9005f80            str x0, [x28, #184]

+; Moving SRA registers RDX:RAX in to element pair (expected).

+0x00007f3ec2e80068  aa0403f4            mov x20, x4

+0x00007f3ec2e8006c  aa0603f5            mov x21, x6

+; Moving SRA registers RCX:RBX in to element pair (Desired).

+0x00007f3ec2e80070  aa0703f6            mov x22, x7

+0x00007f3ec2e80074  aa0503f7            mov x23, x5

+; Moving *expected* in to temporaries. Because the ARM instruction overwrites these . (In IR Op)

+; We don't detect when Dst == Expected, nor have constraints to enforce it.

+0x00007f3ec2e80078  aa1403e2            mov x2, x20

+0x00007f3ec2e8007c  aa1503e3            mov x3, x21

+; Rock the caspal

+0x00007f3ec2e80080  4862fd16            caspal x2, x3, x22, x23, [x8]

+; Move the result back in to destination (In IR Op)

+0x00007f3ec2e80084  aa0203f4            mov x20, x2

+0x00007f3ec2e80088  aa0303f5            mov x21, x3

+; ExtractElementPair(0) - Ugh RA.

+0x00007f3ec2e8008c  aa1403f6            mov x22, x20

+; ExtractElementPair(1) - Ugh RA.

+0x00007f3ec2e80090  aa1503f4            mov x20, x21

+; Calculate if Memory == Expected

+0x00007f3ec2e80094  ca0402d5            eor x21, x22, x4

+0x00007f3ec2e80098  ca060297            eor x23, x20, x6

+0x00007f3ec2e8009c  aa1702b5            orr x21, x21, x23

+0x00007f3ec2e800a0  f10002bf            cmp x21, #0x0 (0)

+0x00007f3ec2e800a4  9a9f17f7            cset x23, eq

+; Set ZF to result

+0x00007f3ec2e800a8  390b1b97            strb w23, [x28, #710]

+; Skip block if ZF was expected.

+0x00007f3ec2e800ac  b4000075            cbz x21, #+0xc (addr 0x7f3ec2e800b8)

+; If ZF wasn't expected, RDX:RAX gets set to the memory that was loaded.

+; Need to be careful here, CMPXCHG8B must /not/ clear the upper bits if memory was expected.

+; Could generate more optimal code if we know the upper bits are already zero, operating in 32-bit mode, or 16B since upper bits will always be set

+0x00007f3ec2e800b0  aa1603e4            mov x4, x22

+0x00007f3ec2e800b4  aa1403e6            mov x6, x20

+; Tail block.

+0x00007f3ec2e800b8  58000040            ldr x0, pc+8 (addr 0x7f3ec2e800c0)

+0x00007f3ec2e800bc  d63f0000            blr x0

+0x00007f3ec2e800c0  d8d7f128            prfm plil1keep, pc-328156 (addr 0x7f3ec2e2fee4)

+0x00007f3ec2e800c4  00007f3e            udf #0x7f3e

+0x00007f3ec2e800c8  0001000a            unallocated (Unallocated)

+0x00007f3ec2e800cc  00000000            udf #0x0

+```
\ No newline at end of file