summary refs log tree commit diff stats
path: root/util/bufferiszero.c
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2020-04-02 20:18:25 +0100
committerPeter Maydell <peter.maydell@linaro.org>2020-04-02 20:18:25 +0100
commit5142ca078d1cbc0f77b0f385d28cdb3e504e62bd (patch)
tree1ff8a252bfe42cd48fb2f9b661adbbd6d0bcc786 /util/bufferiszero.c
parent2833ad487cfff7dc33703e4731b75facde1c561e (diff)
parent0dc0389fa5455bb264866701892ed06bc3eb06e4 (diff)
downloadfocaccia-qemu-5142ca078d1cbc0f77b0f385d28cdb3e504e62bd.tar.gz
focaccia-qemu-5142ca078d1cbc0f77b0f385d28cdb3e504e62bd.zip
Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
Bugfixes for 5.0-rc2.

# gpg: Signature made Thu 02 Apr 2020 19:57:47 BST
# gpg:                using RSA key F13338574B662389866C7682BFFBD25F78C7AE83
# gpg:                issuer "pbonzini@redhat.com"
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full]
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>" [full]
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream:
  xen: fixup RAM memory region initialization
  object-add: don't create return value if failed
  qmp: fix leak on callbacks that return both value and error
  migration: fix cleanup_bh leak on resume
  target/i386: do not set unsupported VMX secondary execution controls
  serial: Fix double migration data
  i386: hvf: Reset IRQ inhibition after moving RIP
  vl: fix broken IPA range for ARM -M virt with KVM enabled
  util/bufferiszero: improve avx2 accelerator
  util/bufferiszero: assign length_to_accel value for each accelerator case
  MAINTAINERS: Add an entry for the HVF accelerator
  softmmu: fix crash with invalid -M memory-backend=
  virtio-iommu: depend on PCI
  hw/isa/superio: Correct the license text
  hw/scsi/vmw_pvscsi: Remove assertion for kick after reset

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'util/bufferiszero.c')
-rw-r--r--util/bufferiszero.c27
1 files changed, 11 insertions, 16 deletions
diff --git a/util/bufferiszero.c b/util/bufferiszero.c
index 663903553a..695bb4ce28 100644
--- a/util/bufferiszero.c
+++ b/util/bufferiszero.c
@@ -158,27 +158,19 @@ buffer_zero_avx2(const void *buf, size_t len)
     __m256i *p = (__m256i *)(((uintptr_t)buf + 5 * 32) & -32);
     __m256i *e = (__m256i *)(((uintptr_t)buf + len) & -32);
 
-    if (likely(p <= e)) {
-        /* Loop over 32-byte aligned blocks of 128.  */
-        do {
-            __builtin_prefetch(p);
-            if (unlikely(!_mm256_testz_si256(t, t))) {
-                return false;
-            }
-            t = p[-4] | p[-3] | p[-2] | p[-1];
-            p += 4;
-        } while (p <= e);
-    } else {
-        t |= _mm256_loadu_si256(buf + 32);
-        if (len <= 128) {
-            goto last2;
+    /* Loop over 32-byte aligned blocks of 128.  */
+    while (p <= e) {
+        __builtin_prefetch(p);
+        if (unlikely(!_mm256_testz_si256(t, t))) {
+            return false;
         }
-    }
+        t = p[-4] | p[-3] | p[-2] | p[-1];
+        p += 4;
+    } ;
 
     /* Finish the last block of 128 unaligned.  */
     t |= _mm256_loadu_si256(buf + len - 4 * 32);
     t |= _mm256_loadu_si256(buf + len - 3 * 32);
- last2:
     t |= _mm256_loadu_si256(buf + len - 2 * 32);
     t |= _mm256_loadu_si256(buf + len - 1 * 32);
 
@@ -254,13 +246,16 @@ static void init_accel(unsigned cache)
     bool (*fn)(const void *, size_t) = buffer_zero_int;
     if (cache & CACHE_SSE2) {
         fn = buffer_zero_sse2;
+        length_to_accel = 64;
     }
 #ifdef CONFIG_AVX2_OPT
     if (cache & CACHE_SSE4) {
         fn = buffer_zero_sse4;
+        length_to_accel = 64;
     }
     if (cache & CACHE_AVX2) {
         fn = buffer_zero_avx2;
+        length_to_accel = 128;
     }
 #endif
 #ifdef CONFIG_AVX512F_OPT