summary refs log tree commit diff stats
path: root/util/bufferiszero.c
diff options
context:
space:
mode:
authorAlexander Monakov <amonakov@ispras.ru>2024-02-06 23:48:07 +0300
committerRichard Henderson <richard.henderson@linaro.org>2024-05-03 08:03:05 -0700
commit93a6085618f16fb2cd316d1e84f1a638b7e2d8ff (patch)
tree68e4b09262ecdf1029e21b95ac5009ba53212579 /util/bufferiszero.c
parentcbe3d5264631aa193fd2705820cbde6c5a602abb (diff)
downloadfocaccia-qemu-93a6085618f16fb2cd316d1e84f1a638b7e2d8ff.tar.gz
focaccia-qemu-93a6085618f16fb2cd316d1e84f1a638b7e2d8ff.zip
util/bufferiszero: Remove useless prefetches
Use of prefetching in bufferiszero.c is quite questionable:

- prefetches are issued just a few CPU cycles before the corresponding
  line would be hit by demand loads;

- they are done for simple access patterns, i.e. where hardware
  prefetchers can perform better;

- they compete for load ports in loops that should be limited by load
  port throughput rather than ALU throughput.

Signed-off-by: Alexander Monakov <amonakov@ispras.ru>
Signed-off-by: Mikhail Romanov <mmromanov@ispras.ru>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20240206204809.9859-5-amonakov@ispras.ru>
Diffstat (limited to 'util/bufferiszero.c')
-rw-r--r--util/bufferiszero.c3
1 files changed, 0 insertions, 3 deletions
diff --git a/util/bufferiszero.c b/util/bufferiszero.c
index 972f394cbd..00118d649e 100644
--- a/util/bufferiszero.c
+++ b/util/bufferiszero.c
@@ -50,7 +50,6 @@ static bool buffer_is_zero_integer(const void *buf, size_t len)
         const uint64_t *e = (uint64_t *)(((uintptr_t)buf + len) & -8);
 
         for (; p + 8 <= e; p += 8) {
-            __builtin_prefetch(p + 8);
             if (t) {
                 return false;
             }
@@ -80,7 +79,6 @@ buffer_zero_sse2(const void *buf, size_t len)
 
     /* Loop over 16-byte aligned blocks of 64.  */
     while (likely(p <= e)) {
-        __builtin_prefetch(p);
         t = _mm_cmpeq_epi8(t, zero);
         if (unlikely(_mm_movemask_epi8(t) != 0xFFFF)) {
             return false;
@@ -111,7 +109,6 @@ buffer_zero_avx2(const void *buf, size_t len)
 
     /* Loop over 32-byte aligned blocks of 128.  */
     while (p <= e) {
-        __builtin_prefetch(p);
         if (unlikely(!_mm256_testz_si256(t, t))) {
             return false;
         }