summary refs log tree commit diff stats
path: root/results/scraper/fex/3794
diff options
context:
space:
mode:
Diffstat (limited to 'results/scraper/fex/3794')
-rw-r--r--results/scraper/fex/3794158
1 files changed, 158 insertions, 0 deletions
diff --git a/results/scraper/fex/3794 b/results/scraper/fex/3794
new file mode 100644
index 000000000..90ab2e99b
--- /dev/null
+++ b/results/scraper/fex/3794
@@ -0,0 +1,158 @@
+AVX128: Some FMA instruction having extraneous moves
+Haven't looked at why these are occuring, maybe the backend implementation needs to check if destination isn't one of the incoming sources? Doesn't occur on the SVE side. The 231 variants are the most interesting here since they shouldn't need any redundant moves unless potentially one of the other sources overlaps the destination (In the case that a negation is needed).

+

+<details>

+  <summary>Negated FMA ops</summary>

+

+```json

+    "vfmsub231ps xmm0, xmm1, xmm2": {

+      "ExpectedInstructionCount": 5,

+      "Comment": [

+        "Map 2 0b01 0xba 128-bit"

+      ],

+      "ExpectedArm64ASM": [

+        "fneg v0.4s, v16.4s",

+        "fmla v0.4s, v17.4s, v18.4s",

+        "mov v16.16b, v0.16b",

+        "movi v2.2d, #0x0",

+        "str q2, [x28, #16]"

+      ]

+    },

+    "vfmsub231ps ymm0, ymm1, ymm2": {

+      "ExpectedInstructionCount": 10,

+      "Comment": [

+        "Map 2 0b01 0xba 256-bit"

+      ],

+      "ExpectedArm64ASM": [

+        "ldr q2, [x28, #16]",

+        "ldr q3, [x28, #32]",

+        "ldr q4, [x28, #48]",

+        "fneg v0.4s, v16.4s",

+        "fmla v0.4s, v17.4s, v18.4s",

+        "mov v16.16b, v0.16b",

+        "fneg v0.4s, v2.4s",

+        "fmla v0.4s, v3.4s, v4.4s",

+        "mov v2.16b, v0.16b",

+        "str q2, [x28, #16]"

+      ]

+    },

+    "vfmsub231pd xmm0, xmm1, xmm2": {

+      "ExpectedInstructionCount": 5,

+      "Comment": [

+        "Map 2 0b01 0xba 128-bit"

+      ],

+      "ExpectedArm64ASM": [

+        "fneg v0.2d, v16.2d",

+        "fmla v0.2d, v17.2d, v18.2d",

+        "mov v16.16b, v0.16b",

+        "movi v2.2d, #0x0",

+        "str q2, [x28, #16]"

+      ]

+    },

+    "vfmsub231pd ymm0, ymm1, ymm2": {

+      "ExpectedInstructionCount": 10,

+      "Comment": [

+        "Map 2 0b01 0xba 256-bit"

+      ],

+      "ExpectedArm64ASM": [

+        "ldr q2, [x28, #16]",

+        "ldr q3, [x28, #32]",

+        "ldr q4, [x28, #48]",

+        "fneg v0.2d, v16.2d",

+        "fmla v0.2d, v17.2d, v18.2d",

+        "mov v16.16b, v0.16b",

+        "fneg v0.2d, v2.2d",

+        "fmla v0.2d, v3.2d, v4.2d",

+        "mov v2.16b, v0.16b",

+        "str q2, [x28, #16]"

+      ]

+    },

+    "vfnmsub231ps xmm0, xmm1, xmm2": {

+      "ExpectedInstructionCount": 5,

+      "Comment": [

+        "Map 2 0b01 0xbe 128-bit"

+      ],

+      "ExpectedArm64ASM": [

+        "fneg v0.4s, v16.4s",

+        "fmls v0.4s, v17.4s, v18.4s",

+        "mov v16.16b, v0.16b",

+        "movi v2.2d, #0x0",

+        "str q2, [x28, #16]"

+      ]

+    },

+    "vfnmsub231ps ymm0, ymm1, ymm2": {

+      "ExpectedInstructionCount": 10,

+      "Comment": [

+        "Map 2 0b01 0xbe 256-bit"

+      ],

+      "ExpectedArm64ASM": [

+        "ldr q2, [x28, #16]",

+        "ldr q3, [x28, #32]",

+        "ldr q4, [x28, #48]",

+        "fneg v0.4s, v16.4s",

+        "fmls v0.4s, v17.4s, v18.4s",

+        "mov v16.16b, v0.16b",

+        "fneg v0.4s, v2.4s",

+        "fmls v0.4s, v3.4s, v4.4s",

+        "mov v2.16b, v0.16b",

+        "str q2, [x28, #16]"

+      ]

+    },

+   "vfnmsub231pd xmm0, xmm1, xmm2": {

+      "ExpectedInstructionCount": 5,

+      "Comment": [

+        "Map 2 0b01 0xbe 128-bit"

+      ],

+      "ExpectedArm64ASM": [

+        "fneg v0.2d, v16.2d",

+        "fmls v0.2d, v17.2d, v18.2d",

+        "mov v16.16b, v0.16b",

+        "movi v2.2d, #0x0",

+        "str q2, [x28, #16]"

+      ]

+    },

+    "vfnmsub231pd ymm0, ymm1, ymm2": {

+      "ExpectedInstructionCount": 10,

+      "Comment": [

+        "Map 2 0b01 0xbe 256-bit"

+      ],

+      "ExpectedArm64ASM": [

+        "ldr q2, [x28, #16]",

+        "ldr q3, [x28, #32]",

+        "ldr q4, [x28, #48]",

+        "fneg v0.2d, v16.2d",

+        "fmls v0.2d, v17.2d, v18.2d",

+        "mov v16.16b, v0.16b",

+        "fneg v0.2d, v2.2d",

+        "fmls v0.2d, v3.2d, v4.2d",

+        "mov v2.16b, v0.16b",

+        "str q2, [x28, #16]"

+      ]

+    },

+```

+</details>

+

+Additionally the addsubs have some extraneous moves as well due to the negations occuring, which should be able to be resolved. This happens on both the ASIMD and SVE sides.

+

+<details>

+  <summary>addsub/subadd ops</summary>

+

+```

+    "vfmaddsub231ps xmm0, xmm1, xmm2": {

+      "ExpectedInstructionCount": 7,

+      "Comment": [

+        "Map 2 0b01 0xb6 128-bit"

+      ],

+      "ExpectedArm64ASM": [

+        "ldr q2, [x28, #2384]",

+        "eor v2.16b, v16.16b, v2.16b",

+        "mov v0.16b, v2.16b",

+        "fmla v0.4s, v17.4s, v18.4s",

+        "mov v16.16b, v0.16b",

+        "movi v2.2d, #0x0",

+        "str q2, [x28, #16]"

+      ]

+    },

+... etc, etc

+```

+</details>