summary refs log tree commit diff stats
path: root/migration/ram.c
diff options
context:
space:
mode:
authorling xu <ling1.xu@intel.com>2022-11-16 23:29:22 +0800
committerJuan Quintela <quintela@redhat.com>2023-02-11 16:51:09 +0100
commit04ffce137b6d85ab4e7687e54e4dffcef0a9ab99 (patch)
tree857d2986fa10dc683585f56c742038b413f9ae5e /migration/ram.c
parente26470501271adf22e4f37d218c2164884ae96fb (diff)
downloadfocaccia-qemu-04ffce137b6d85ab4e7687e54e4dffcef0a9ab99.tar.gz
focaccia-qemu-04ffce137b6d85ab4e7687e54e4dffcef0a9ab99.zip
AVX512 support for xbzrle_encode_buffer
This commit is the same with [PATCH v6 1/2], and provides avx512 support for xbzrle_encode_buffer
function to accelerate xbzrle encoding speed. Runtime check of avx512
support and benchmark for this feature are added. Compared with C
version of xbzrle_encode_buffer function, avx512 version can achieve
50%-70% performance improvement on benchmarking. In addition, if dirty
data is randomly located in 4K page, the avx512 version can achieve
almost 140% performance gain.

Signed-off-by: ling xu <ling1.xu@intel.com>
Co-authored-by: Zhou Zhao <zhou.zhao@intel.com>
Co-authored-by: Jun Jin <jun.i.jin@intel.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Diffstat (limited to 'migration/ram.c')
-rw-r--r--migration/ram.c34
1 files changed, 31 insertions, 3 deletions
diff --git a/migration/ram.c b/migration/ram.c
index 0890816a30..18ac68b181 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -83,6 +83,34 @@
 /* 0x80 is reserved in migration.h start with 0x100 next */
 #define RAM_SAVE_FLAG_COMPRESS_PAGE    0x100
 
+int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int,
+     uint8_t *, int) = xbzrle_encode_buffer;
+#if defined(CONFIG_AVX512BW_OPT)
+#include "qemu/cpuid.h"
+static void __attribute__((constructor)) init_cpu_flag(void)
+{
+    unsigned max = __get_cpuid_max(0, NULL);
+    int a, b, c, d;
+    if (max >= 1) {
+        __cpuid(1, a, b, c, d);
+         /* We must check that AVX is not just available, but usable.  */
+        if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
+            int bv;
+            __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
+            __cpuid_count(7, 0, a, b, c, d);
+           /* 0xe6:
+            *  XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15
+            *                    and ZMM16-ZMM31 state are enabled by OS)
+            *  XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS)
+            */
+            if ((bv & 0xe6) == 0xe6 && (b & bit_AVX512BW)) {
+                xbzrle_encode_buffer_func = xbzrle_encode_buffer_avx512;
+            }
+        }
+    }
+}
+#endif
+
 XBZRLECacheStats xbzrle_counters;
 
 /* used by the search for pages to send */
@@ -806,9 +834,9 @@ static int save_xbzrle_page(RAMState *rs, PageSearchStatus *pss,
     memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
 
     /* XBZRLE encoding (if there is no overflow) */
-    encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
-                                       TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
-                                       TARGET_PAGE_SIZE);
+    encoded_len = xbzrle_encode_buffer_func(prev_cached_page, XBZRLE.current_buf,
+                                            TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
+                                            TARGET_PAGE_SIZE);
 
     /*
      * Update the cache contents, so that it corresponds to the data