17 files changed, 373 insertions, 70 deletions
diff --git a/hw/display/vga.c b/hw/display/vga.c
index 28f298b342..72181330b8 100644
--- a/hw/display/vga.c
+++ b/hw/display/vga.c
@@ -1483,6 +1483,8 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
 
     region_start = (s->start_addr * 4);
     region_end = region_start + (ram_addr_t)s->line_offset * height;
+    region_end += width * s->get_bpp(s) / 8; /* scanline length */
+    region_end -= s->line_offset;
     if (region_end > s->vbe_size) {
         /* wraps around (can happen with cirrus vbe modes) */
         region_start = 0;
diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index ec174309db..65a9196c1a 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -106,10 +106,10 @@ struct USBRedirDevice {
     USBDevice dev;
     /* Properties */
     CharBackend cs;
+    bool enable_streams;
     uint8_t debug;
-    char *filter_str;
     int32_t bootindex;
-    bool enable_streams;
+    char *filter_str;
     /* Data passed from chardev the fd_read cb to the usbredirparser read cb */
     const uint8_t *read_buf;
     int read_buf_size;
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
index f7a3972200..8c90a2e66e 100644
--- a/include/block/aio-wait.h
+++ b/include/block/aio-wait.h
@@ -50,8 +50,8 @@
  *   }
  */
 typedef struct {
-    /* Is the main loop waiting for a kick?  Accessed with atomic ops. */
-    bool need_kick;
+    /* Number of waiting AIO_WAIT_WHILE() callers. Accessed with atomic ops. */
+    unsigned num_waiters;
 } AioWait;
 
 /**
@@ -71,35 +71,34 @@ typedef struct {
  * wait on conditions between two IOThreads since that could lead to deadlock,
  * go via the main loop instead.
  */
-#define AIO_WAIT_WHILE(wait, ctx, cond) ({                  \
-    bool waited_ = false;                                   \
-    bool busy_ = true;                                      \
-    AioWait *wait_ = (wait);                                \
-    AioContext *ctx_ = (ctx);                               \
-    if (in_aio_context_home_thread(ctx_)) {                 \
-        while ((cond) || busy_) {                           \
-            busy_ = aio_poll(ctx_, (cond));                 \
-            waited_ |= !!(cond) | busy_;                    \
-        }                                                   \
-    } else {                                                \
-        assert(qemu_get_current_aio_context() ==            \
-               qemu_get_aio_context());                     \
-        assert(!wait_->need_kick);                          \
-        /* Set wait_->need_kick before evaluating cond.  */ \
-        atomic_mb_set(&wait_->need_kick, true);             \
-        while (busy_) {                                     \
-            if ((cond)) {                                   \
-                waited_ = busy_ = true;                     \
-                aio_context_release(ctx_);                  \
-                aio_poll(qemu_get_aio_context(), true);     \
-                aio_context_acquire(ctx_);                  \
-            } else {                                        \
-                busy_ = aio_poll(ctx_, false);              \
-                waited_ |= busy_;                           \
-            }                                               \
-        }                                                   \
-        atomic_set(&wait_->need_kick, false);               \
-    }                                                       \
+#define AIO_WAIT_WHILE(wait, ctx, cond) ({                         \
+    bool waited_ = false;                                          \
+    bool busy_ = true;                                             \
+    AioWait *wait_ = (wait);                                       \
+    AioContext *ctx_ = (ctx);                                      \
+    if (in_aio_context_home_thread(ctx_)) {                        \
+        while ((cond) || busy_) {                                  \
+            busy_ = aio_poll(ctx_, (cond));                        \
+            waited_ |= !!(cond) | busy_;                           \
+        }                                                          \
+    } else {                                                       \
+        assert(qemu_get_current_aio_context() ==                   \
+               qemu_get_aio_context());                            \
+        /* Increment wait_->num_waiters before evaluating cond. */ \
+        atomic_inc(&wait_->num_waiters);                           \
+        while (busy_) {                                            \
+            if ((cond)) {                                          \
+                waited_ = busy_ = true;                            \
+                aio_context_release(ctx_);                         \
+                aio_poll(qemu_get_aio_context(), true);            \
+                aio_context_acquire(ctx_);                         \
+            } else {                                               \
+                busy_ = aio_poll(ctx_, false);                     \
+                waited_ |= busy_;                                  \
+            }                                                      \
+        }                                                          \
+        atomic_dec(&wait_->num_waiters);                           \
+    }                                                              \
     waited_; })
 
 /**
diff --git a/include/qemu/log-for-trace.h b/include/qemu/log-for-trace.h
new file mode 100644
index 0000000000..2f0a5b080e
--- /dev/null
+++ b/include/qemu/log-for-trace.h
@@ -0,0 +1,35 @@
+/* log-for-trace.h: logging basics required by the trace.h generated
+ * by the log trace backend.
+ *
+ * This should not be included directly by any .c file: if you
+ * need to use the logging functions include "qemu/log.h".
+ *
+ * The purpose of splitting these parts out into their own header
+ * is to catch the easy mistake where a .c file includes trace.h
+ * but forgets to include qemu/log.h. Without this split, that
+ * would result in the .c file compiling fine when the default
+ * trace backend is in use but failing to compile with any other
+ * backend.
+ *
+ * This code is licensed under the GNU General Public License,
+ * version 2 or (at your option) any later version.
+ */
+
+#ifndef QEMU_LOG_FOR_TRACE_H
+#define QEMU_LOG_FOR_TRACE_H
+
+/* Private global variable, don't use */
+extern int qemu_loglevel;
+
+#define LOG_TRACE          (1 << 15)
+
+/* Returns true if a bit is set in the current loglevel mask */
+static inline bool qemu_loglevel_mask(int mask)
+{
+    return (qemu_loglevel & mask) != 0;
+}
+
+/* main logging function */
+int GCC_FMT_ATTR(1, 2) qemu_log(const char *fmt, ...);
+
+#endif
diff --git a/include/qemu/log.h b/include/qemu/log.h
index a50e994c21..ff92a8b86a 100644
--- a/include/qemu/log.h
+++ b/include/qemu/log.h
@@ -1,10 +1,11 @@
 #ifndef QEMU_LOG_H
 #define QEMU_LOG_H
 
+/* A small part of this API is split into its own header */
+#include "qemu/log-for-trace.h"
 
-/* Private global variables, don't use */
+/* Private global variable, don't use */
 extern FILE *qemu_logfile;
-extern int qemu_loglevel;
 
 /* 
  * The new API:
@@ -41,16 +42,9 @@ static inline bool qemu_log_separate(void)
 #define CPU_LOG_MMU        (1 << 12)
 #define CPU_LOG_TB_NOCHAIN (1 << 13)
 #define CPU_LOG_PAGE       (1 << 14)
-#define LOG_TRACE          (1 << 15)
+/* LOG_TRACE (1 << 15) is defined in log-for-trace.h */
 #define CPU_LOG_TB_OP_IND  (1 << 16)
 
-/* Returns true if a bit is set in the current loglevel mask
- */
-static inline bool qemu_loglevel_mask(int mask)
-{
-    return (qemu_loglevel & mask) != 0;
-}
-
 /* Lock output for a series of related logs.  Since this is not needed
  * for a single qemu_log / qemu_log_mask / qemu_log_mask_and_addr, we
  * assume that qemu_loglevel_mask has already been tested, and that
@@ -69,10 +63,6 @@ static inline void qemu_log_unlock(void)
 
 /* Logging functions: */
 
-/* main logging function
- */
-int GCC_FMT_ATTR(1, 2) qemu_log(const char *fmt, ...);
-
 /* vfprintf-like logging function
  */
 static inline void GCC_FMT_ATTR(1, 0)
diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py
index a3a6315055..9d45c6ba4e 100755
--- a/scripts/simpletrace.py
+++ b/scripts/simpletrace.py
@@ -168,7 +168,7 @@ class Analyzer(object):
 def process(events, log, analyzer, read_header=True):
     """Invoke an analyzer on each event in a log."""
     if isinstance(events, str):
-        events = read_events(open(events, 'r'))
+        events = read_events(open(events, 'r'), events)
     if isinstance(log, str):
         log = open(log, 'rb')
 
@@ -199,7 +199,7 @@ def process(events, log, analyzer, read_header=True):
         fn_argcount = len(inspect.getargspec(fn)[0]) - 1
         if fn_argcount == event_argcount + 1:
             # Include timestamp as first argument
-            return lambda _, rec: fn(*((rec[1:2],) + rec[3:3 + event_argcount]))
+            return lambda _, rec: fn(*(rec[1:2] + rec[3:3 + event_argcount]))
         elif fn_argcount == event_argcount + 2:
             # Include timestamp and pid
             return lambda _, rec: fn(*rec[1:3 + event_argcount])
@@ -233,7 +233,7 @@ def run(analyzer):
                          '<trace-file>\n' % sys.argv[0])
         sys.exit(1)
 
-    events = read_events(open(sys.argv[1], 'r'))
+    events = read_events(open(sys.argv[1], 'r'), sys.argv[1])
     process(events, sys.argv[2], analyzer, read_header=read_header)
 
 if __name__ == '__main__':
diff --git a/scripts/tracetool.py b/scripts/tracetool.py
index c55a21518b..fe2b0771f2 100755
--- a/scripts/tracetool.py
+++ b/scripts/tracetool.py
@@ -142,7 +142,7 @@ def main(args):
     events = []
     for arg in args:
         with open(arg, "r") as fh:
-            events.extend(tracetool.read_events(fh))
+            events.extend(tracetool.read_events(fh, arg))
 
     try:
         tracetool.generate(events, arg_group, arg_format, arg_backends,
diff --git a/scripts/tracetool/__init__.py b/scripts/tracetool/__init__.py
index 3646c2b9fc..b20fac34a3 100644
--- a/scripts/tracetool/__init__.py
+++ b/scripts/tracetool/__init__.py
@@ -41,6 +41,51 @@ def out(*lines, **kwargs):
     lines = [ l % kwargs for l in lines ]
     sys.stdout.writelines("\n".join(lines) + "\n")
 
+# We only want to allow standard C types or fixed sized
+# integer types. We don't want QEMU specific types
+# as we can't assume trace backends can resolve all the
+# typedefs
+ALLOWED_TYPES = [
+    "int",
+    "long",
+    "short",
+    "char",
+    "bool",
+    "unsigned",
+    "signed",
+    "float",
+    "double",
+    "int8_t",
+    "uint8_t",
+    "int16_t",
+    "uint16_t",
+    "int32_t",
+    "uint32_t",
+    "int64_t",
+    "uint64_t",
+    "void",
+    "size_t",
+    "ssize_t",
+    "uintptr_t",
+    "ptrdiff_t",
+    # Magic substitution is done by tracetool
+    "TCGv",
+]
+
+def validate_type(name):
+    bits = name.split(" ")
+    for bit in bits:
+        bit = re.sub("\*", "", bit)
+        if bit == "":
+            continue
+        if bit == "const":
+            continue
+        if bit not in ALLOWED_TYPES:
+            raise ValueError("Argument type '%s' is not in whitelist. "
+                             "Only standard C types and fixed size integer "
+                             "types should be used. struct, union, and "
+                             "other complex pointer types should be "
+                             "declared as 'void *'" % name)
 
 class Arguments:
     """Event arguments description."""
@@ -87,6 +132,7 @@ class Arguments:
             else:
                 arg_type, identifier = arg.rsplit(None, 1)
 
+            validate_type(arg_type)
             res.append((arg_type, identifier))
         return Arguments(res)
 
@@ -291,13 +337,15 @@ class Event(object):
                      self)
 
 
-def read_events(fobj):
+def read_events(fobj, fname):
     """Generate the output for the given (format, backends) pair.
 
     Parameters
     ----------
     fobj : file
         Event description file.
+    fname : str
+        Name of event file
 
     Returns a list of Event objects
     """
@@ -312,7 +360,7 @@ def read_events(fobj):
         try:
             event = Event.build(line)
         except ValueError as e:
-            arg0 = 'Error on line %d: %s' % (lineno, e.args[0])
+            arg0 = 'Error at %s:%d: %s' % (fname, lineno, e.args[0])
             e.args = (arg0,) + e.args[1:]
             raise
 
diff --git a/scripts/tracetool/backend/log.py b/scripts/tracetool/backend/log.py
index da86f6b882..78933d03ad 100644
--- a/scripts/tracetool/backend/log.py
+++ b/scripts/tracetool/backend/log.py
@@ -20,7 +20,7 @@ PUBLIC = True
 
 
 def generate_h_begin(events, group):
-    out('#include "qemu/log.h"',
+    out('#include "qemu/log-for-trace.h"',
         '')
 
 
@@ -35,14 +35,13 @@ def generate_h(event, group):
     else:
         cond = "trace_event_get_state(%s)" % ("TRACE_" + event.name.upper())
 
-    out('    if (%(cond)s) {',
+    out('    if (%(cond)s && qemu_loglevel_mask(LOG_TRACE)) {',
         '        struct timeval _now;',
         '        gettimeofday(&_now, NULL);',
-        '        qemu_log_mask(LOG_TRACE,',
-        '                      "%%d@%%zd.%%06zd:%(name)s " %(fmt)s "\\n",',
-        '                      getpid(),',
-        '                      (size_t)_now.tv_sec, (size_t)_now.tv_usec',
-        '                      %(argnames)s);',
+        '        qemu_log("%%d@%%zd.%%06zd:%(name)s " %(fmt)s "\\n",',
+        '                 getpid(),',
+        '                 (size_t)_now.tv_sec, (size_t)_now.tv_usec',
+        '                 %(argnames)s);',
         '    }',
         cond=cond,
         name=event.name,
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 2c04645cea..ec1efd3a3c 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -173,7 +173,32 @@
 #define L2_ITLB_4K_ASSOC       4
 #define L2_ITLB_4K_ENTRIES   512
 
-
+/* CPUID Leaf 0x14 constants: */
+#define INTEL_PT_MAX_SUBLEAF     0x1
+/*
+ * bit[00]: IA32_RTIT_CTL.CR3 filter can be set to 1 and IA32_RTIT_CR3_MATCH
+ *          MSR can be accessed;
+ * bit[01]: Support Configurable PSB and Cycle-Accurate Mode;
+ * bit[02]: Support IP Filtering, TraceStop filtering, and preservation
+ *          of Intel PT MSRs across warm reset;
+ * bit[03]: Support MTC timing packet and suppression of COFI-based packets;
+ */
+#define INTEL_PT_MINIMAL_EBX     0xf
+/*
+ * bit[00]: Tracing can be enabled with IA32_RTIT_CTL.ToPA = 1 and
+ *          IA32_RTIT_OUTPUT_BASE and IA32_RTIT_OUTPUT_MASK_PTRS MSRs can be
+ *          accessed;
+ * bit[01]: ToPA tables can hold any number of output entries, up to the
+ *          maximum allowed by the MaskOrTableOffset field of
+ *          IA32_RTIT_OUTPUT_MASK_PTRS;
+ * bit[02]: Support Single-Range Output scheme;
+ */
+#define INTEL_PT_MINIMAL_ECX     0x7
+#define INTEL_PT_ADDR_RANGES_NUM 0x2 /* Number of configurable address ranges */
+#define INTEL_PT_ADDR_RANGES_NUM_MASK 0x3
+#define INTEL_PT_MTC_BITMAP      (0x0249 << 16) /* Support ART(0,3,6,9) */
+#define INTEL_PT_CYCLE_BITMAP    0x1fff         /* Support 0,2^(0~11) */
+#define INTEL_PT_PSB_BITMAP      (0x003f << 16) /* Support 2K,4K,8K,16K,32K,64K */
 
 static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
                                      uint32_t vendor2, uint32_t vendor3)
@@ -359,6 +384,20 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
         .cpuid_eax = KVM_CPUID_FEATURES, .cpuid_reg = R_EAX,
         .tcg_features = TCG_KVM_FEATURES,
     },
+    [FEAT_KVM_HINTS] = {
+        .feat_names = {
+            "kvm-hint-dedicated", NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+        },
+        .cpuid_eax = KVM_CPUID_FEATURES, .cpuid_reg = R_EDX,
+        .tcg_features = TCG_KVM_FEATURES,
+    },
     [FEAT_HYPERV_EAX] = {
         .feat_names = {
             NULL /* hv_msr_vp_runtime_access */, NULL /* hv_msr_time_refcount_access */,
@@ -428,7 +467,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
             NULL, NULL, "mpx", NULL,
             "avx512f", "avx512dq", "rdseed", "adx",
             "smap", "avx512ifma", "pcommit", "clflushopt",
-            "clwb", NULL, "avx512pf", "avx512er",
+            "clwb", "intel-pt", "avx512pf", "avx512er",
             "avx512cd", "sha-ni", "avx512bw", "avx512vl",
         },
         .cpuid_eax = 7,
@@ -3453,6 +3492,27 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         }
         break;
     }
+    case 0x14: {
+        /* Intel Processor Trace Enumeration */
+        *eax = 0;
+        *ebx = 0;
+        *ecx = 0;
+        *edx = 0;
+        if (!(env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) ||
+            !kvm_enabled()) {
+            break;
+        }
+
+        if (count == 0) {
+            *eax = INTEL_PT_MAX_SUBLEAF;
+            *ebx = INTEL_PT_MINIMAL_EBX;
+            *ecx = INTEL_PT_MINIMAL_ECX;
+        } else if (count == 1) {
+            *eax = INTEL_PT_MTC_BITMAP | INTEL_PT_ADDR_RANGES_NUM;
+            *ebx = INTEL_PT_PSB_BITMAP | INTEL_PT_CYCLE_BITMAP;
+        }
+        break;
+    }
     case 0x40000000:
         /*
          * CPUID code in kvm_arch_init_vcpu() ignores stuff
@@ -4083,6 +4143,34 @@ static int x86_cpu_filter_features(X86CPU *cpu)
         }
     }
 
+    if ((env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) &&
+        kvm_enabled()) {
+        KVMState *s = CPU(cpu)->kvm_state;
+        uint32_t eax_0 = kvm_arch_get_supported_cpuid(s, 0x14, 0, R_EAX);
+        uint32_t ebx_0 = kvm_arch_get_supported_cpuid(s, 0x14, 0, R_EBX);
+        uint32_t ecx_0 = kvm_arch_get_supported_cpuid(s, 0x14, 0, R_ECX);
+        uint32_t eax_1 = kvm_arch_get_supported_cpuid(s, 0x14, 1, R_EAX);
+        uint32_t ebx_1 = kvm_arch_get_supported_cpuid(s, 0x14, 1, R_EBX);
+
+        if (!eax_0 ||
+           ((ebx_0 & INTEL_PT_MINIMAL_EBX) != INTEL_PT_MINIMAL_EBX) ||
+           ((ecx_0 & INTEL_PT_MINIMAL_ECX) != INTEL_PT_MINIMAL_ECX) ||
+           ((eax_1 & INTEL_PT_MTC_BITMAP) != INTEL_PT_MTC_BITMAP) ||
+           ((eax_1 & INTEL_PT_ADDR_RANGES_NUM_MASK) <
+                                           INTEL_PT_ADDR_RANGES_NUM) ||
+           ((ebx_1 & (INTEL_PT_PSB_BITMAP | INTEL_PT_CYCLE_BITMAP)) !=
+                (INTEL_PT_PSB_BITMAP | INTEL_PT_CYCLE_BITMAP))) {
+            /*
+             * Processor Trace capabilities aren't configurable, so if the
+             * host can't emulate the capabilities we report on
+             * cpu_x86_cpuid(), intel-pt can't be enabled on the current host.
+             */
+            env->features[FEAT_7_0_EBX] &= ~CPUID_7_0_EBX_INTEL_PT;
+            cpu->filtered_features[FEAT_7_0_EBX] |= CPUID_7_0_EBX_INTEL_PT;
+            rv = 1;
+        }
+    }
+
     return rv;
 }
 
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index faf39ec1ce..0c3f51445e 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -415,6 +415,21 @@ typedef enum X86Seg {
 #define MSR_MC0_ADDR                    0x402
 #define MSR_MC0_MISC                    0x403
 
+#define MSR_IA32_RTIT_OUTPUT_BASE       0x560
+#define MSR_IA32_RTIT_OUTPUT_MASK       0x561
+#define MSR_IA32_RTIT_CTL               0x570
+#define MSR_IA32_RTIT_STATUS            0x571
+#define MSR_IA32_RTIT_CR3_MATCH         0x572
+#define MSR_IA32_RTIT_ADDR0_A           0x580
+#define MSR_IA32_RTIT_ADDR0_B           0x581
+#define MSR_IA32_RTIT_ADDR1_A           0x582
+#define MSR_IA32_RTIT_ADDR1_B           0x583
+#define MSR_IA32_RTIT_ADDR2_A           0x584
+#define MSR_IA32_RTIT_ADDR2_B           0x585
+#define MSR_IA32_RTIT_ADDR3_A           0x586
+#define MSR_IA32_RTIT_ADDR3_B           0x587
+#define MAX_RTIT_ADDRS                  8
+
 #define MSR_EFER                        0xc0000080
 
 #define MSR_EFER_SCE   (1 << 0)
@@ -471,6 +486,7 @@ typedef enum FeatureWord {
     FEAT_8000_0008_EBX, /* CPUID[8000_0008].EBX */
     FEAT_C000_0001_EDX, /* CPUID[C000_0001].EDX */
     FEAT_KVM,           /* CPUID[4000_0001].EAX (KVM_CPUID_FEATURES) */
+    FEAT_KVM_HINTS,     /* CPUID[4000_0001].EDX */
     FEAT_HYPERV_EAX,    /* CPUID[4000_0003].EAX */
     FEAT_HYPERV_EBX,    /* CPUID[4000_0003].EBX */
     FEAT_HYPERV_EDX,    /* CPUID[4000_0003].EDX */
@@ -640,6 +656,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
 #define CPUID_7_0_EBX_PCOMMIT  (1U << 22) /* Persistent Commit */
 #define CPUID_7_0_EBX_CLFLUSHOPT (1U << 23) /* Flush a Cache Line Optimized */
 #define CPUID_7_0_EBX_CLWB     (1U << 24) /* Cache Line Write Back */
+#define CPUID_7_0_EBX_INTEL_PT (1U << 25) /* Intel Processor Trace */
 #define CPUID_7_0_EBX_AVX512PF (1U << 26) /* AVX-512 Prefetch */
 #define CPUID_7_0_EBX_AVX512ER (1U << 27) /* AVX-512 Exponential and Reciprocal */
 #define CPUID_7_0_EBX_AVX512CD (1U << 28) /* AVX-512 Conflict Detection */
@@ -666,6 +683,8 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
 #define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) /* AVX512 Multiply Accumulation Single Precision */
 #define CPUID_7_0_EDX_SPEC_CTRL     (1U << 26) /* Speculation Control */
 
+#define KVM_HINTS_DEDICATED (1U << 0)
+
 #define CPUID_8000_0008_EBX_IBPB    (1U << 12) /* Indirect Branch Prediction Barrier */
 
 #define CPUID_XSAVE_XSAVEOPT   (1U << 0)
@@ -1153,6 +1172,13 @@ typedef struct CPUX86State {
     uint64_t msr_hv_stimer_config[HV_STIMER_COUNT];
     uint64_t msr_hv_stimer_count[HV_STIMER_COUNT];
 
+    uint64_t msr_rtit_ctrl;
+    uint64_t msr_rtit_status;
+    uint64_t msr_rtit_output_base;
+    uint64_t msr_rtit_output_mask;
+    uint64_t msr_rtit_cr3_match;
+    uint64_t msr_rtit_addrs[MAX_RTIT_ADDRS];
+
     /* exception/interrupt handling */
     int error_code;
     int exception_is_int;
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index ad4b159b28..d996cca68b 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -383,6 +383,9 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
         if (!kvm_irqchip_in_kernel()) {
             ret &= ~(1U << KVM_FEATURE_PV_UNHALT);
         }
+    } else if (function == KVM_CPUID_FEATURES && reg == R_EDX) {
+        ret |= KVM_HINTS_DEDICATED;
+        found = 1;
     }
 
     /* fallback for older kernels */
@@ -801,6 +804,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
         c = &cpuid_data.entries[cpuid_i++];
         c->function = KVM_CPUID_FEATURES | kvm_base;
         c->eax = env->features[FEAT_KVM];
+        c->edx = env->features[FEAT_KVM_HINTS];
     }
 
     cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
@@ -865,6 +869,29 @@ int kvm_arch_init_vcpu(CPUState *cs)
                 c = &cpuid_data.entries[cpuid_i++];
             }
             break;
+        case 0x14: {
+            uint32_t times;
+
+            c->function = i;
+            c->index = 0;
+            c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+            times = c->eax;
+
+            for (j = 1; j <= times; ++j) {
+                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+                    fprintf(stderr, "cpuid_data is full, no space for "
+                                "cpuid(eax:0x14,ecx:0x%x)\n", j);
+                    abort();
+                }
+                c = &cpuid_data.entries[cpuid_i++];
+                c->function = i;
+                c->index = j;
+                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
+            }
+            break;
+        }
         default:
             c->function = i;
             c->flags = 0;
@@ -1788,6 +1815,25 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
                 kvm_msr_entry_add(cpu, MSR_MTRRphysMask(i), mask);
             }
         }
+        if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) {
+            int addr_num = kvm_arch_get_supported_cpuid(kvm_state,
+                                                    0x14, 1, R_EAX) & 0x7;
+
+            kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CTL,
+                            env->msr_rtit_ctrl);
+            kvm_msr_entry_add(cpu, MSR_IA32_RTIT_STATUS,
+                            env->msr_rtit_status);
+            kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_BASE,
+                            env->msr_rtit_output_base);
+            kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_MASK,
+                            env->msr_rtit_output_mask);
+            kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CR3_MATCH,
+                            env->msr_rtit_cr3_match);
+            for (i = 0; i < addr_num; i++) {
+                kvm_msr_entry_add(cpu, MSR_IA32_RTIT_ADDR0_A + i,
+                            env->msr_rtit_addrs[i]);
+            }
+        }
 
         /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see
          *       kvm_put_msr_feature_control. */
@@ -2101,6 +2147,20 @@ static int kvm_get_msrs(X86CPU *cpu)
         }
     }
 
+    if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) {
+        int addr_num =
+            kvm_arch_get_supported_cpuid(kvm_state, 0x14, 1, R_EAX) & 0x7;
+
+        kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CTL, 0);
+        kvm_msr_entry_add(cpu, MSR_IA32_RTIT_STATUS, 0);
+        kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_BASE, 0);
+        kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_MASK, 0);
+        kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CR3_MATCH, 0);
+        for (i = 0; i < addr_num; i++) {
+            kvm_msr_entry_add(cpu, MSR_IA32_RTIT_ADDR0_A + i, 0);
+        }
+    }
+
     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf);
     if (ret < 0) {
         return ret;
@@ -2341,6 +2401,24 @@ static int kvm_get_msrs(X86CPU *cpu)
         case MSR_IA32_SPEC_CTRL:
             env->spec_ctrl = msrs[i].data;
             break;
+        case MSR_IA32_RTIT_CTL:
+            env->msr_rtit_ctrl = msrs[i].data;
+            break;
+        case MSR_IA32_RTIT_STATUS:
+            env->msr_rtit_status = msrs[i].data;
+            break;
+        case MSR_IA32_RTIT_OUTPUT_BASE:
+            env->msr_rtit_output_base = msrs[i].data;
+            break;
+        case MSR_IA32_RTIT_OUTPUT_MASK:
+            env->msr_rtit_output_mask = msrs[i].data;
+            break;
+        case MSR_IA32_RTIT_CR3_MATCH:
+            env->msr_rtit_cr3_match = msrs[i].data;
+            break;
+        case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
+            env->msr_rtit_addrs[index - MSR_IA32_RTIT_ADDR0_A] = msrs[i].data;
+            break;
         }
     }
 
diff --git a/target/i386/machine.c b/target/i386/machine.c
index 361c05aedf..c05fe6fb1a 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -837,6 +837,43 @@ static const VMStateDescription vmstate_spec_ctrl = {
     }
 };
 
+static bool intel_pt_enable_needed(void *opaque)
+{
+    X86CPU *cpu = opaque;
+    CPUX86State *env = &cpu->env;
+    int i;
+
+    if (env->msr_rtit_ctrl || env->msr_rtit_status ||
+        env->msr_rtit_output_base || env->msr_rtit_output_mask ||
+        env->msr_rtit_cr3_match) {
+        return true;
+    }
+
+    for (i = 0; i < MAX_RTIT_ADDRS; i++) {
+        if (env->msr_rtit_addrs[i]) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+static const VMStateDescription vmstate_msr_intel_pt = {
+    .name = "cpu/intel_pt",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = intel_pt_enable_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(env.msr_rtit_ctrl, X86CPU),
+        VMSTATE_UINT64(env.msr_rtit_status, X86CPU),
+        VMSTATE_UINT64(env.msr_rtit_output_base, X86CPU),
+        VMSTATE_UINT64(env.msr_rtit_output_mask, X86CPU),
+        VMSTATE_UINT64(env.msr_rtit_cr3_match, X86CPU),
+        VMSTATE_UINT64_ARRAY(env.msr_rtit_addrs, X86CPU, MAX_RTIT_ADDRS),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 VMStateDescription vmstate_x86_cpu = {
     .name = "cpu",
     .version_id = 12,
@@ -957,6 +994,7 @@ VMStateDescription vmstate_x86_cpu = {
 #endif
         &vmstate_spec_ctrl,
         &vmstate_mcg_ext_ctl,
+        &vmstate_msr_intel_pt,
         NULL
     }
 };
diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker
index 994a35a332..a00004319e 100644
--- a/tests/docker/dockerfiles/fedora.docker
+++ b/tests/docker/dockerfiles/fedora.docker
@@ -2,7 +2,7 @@ FROM fedora:27
 ENV PACKAGES \
     ccache gettext git tar PyYAML sparse flex bison python3 bzip2 hostname \
     glib2-devel pixman-devel zlib-devel SDL-devel libfdt-devel \
-    gcc gcc-c++ clang make perl which bc findutils libaio-devel \
+    gcc gcc-c++ llvm clang make perl which bc findutils libaio-devel \
     nettle-devel libasan libubsan \
     mingw32-pixman mingw32-glib2 mingw32-gmp mingw32-SDL mingw32-pkg-config \
     mingw32-gtk2 mingw32-gtk3 mingw32-gnutls mingw32-nettle mingw32-libtasn1 \
diff --git a/tests/docker/test-debug b/tests/docker/test-debug
index d020b06917..d3f9f70d01 100755
--- a/tests/docker/test-debug
+++ b/tests/docker/test-debug
@@ -1,6 +1,6 @@
 #!/bin/bash -e
 #
-# Compile and check with clang & --enable-debug.
+# Compile and check with clang & --enable-debug --enable-sanitizers.
 #
 # Copyright (c) 2016-2018 Red Hat Inc.
 #
@@ -19,8 +19,8 @@ requires clang asan
 cd "$BUILD_DIR"
 
 OPTS="--cxx=clang++ --cc=clang --host-cc=clang"
-OPTS="--enable-debug $OPTS"
+OPTS="--enable-debug --enable-sanitizers $OPTS"
 
 build_qemu $OPTS
-make $MAKEFLAGS check
+make $MAKEFLAGS V=1 check
 install_qemu
diff --git a/trace-events b/trace-events
index 89fcad0fd1..855b0ab240 100644
--- a/trace-events
+++ b/trace-events
@@ -68,9 +68,9 @@ memory_region_tb_read(int cpu_index, uint64_t addr, uint64_t value, unsigned siz
 memory_region_tb_write(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr 0x%"PRIx64" value 0x%"PRIx64" size %u"
 memory_region_ram_device_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u"
 memory_region_ram_device_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u"
-flatview_new(FlatView *view, MemoryRegion *root) "%p (root %p)"
-flatview_destroy(FlatView *view, MemoryRegion *root) "%p (root %p)"
-flatview_destroy_rcu(FlatView *view, MemoryRegion *root) "%p (root %p)"
+flatview_new(void *view, void *root) "%p (root %p)"
+flatview_destroy(void *view, void *root) "%p (root %p)"
+flatview_destroy_rcu(void *view, void *root) "%p (root %p)"
 
 # gdbstub.c
 gdbstub_op_start(const char *device) "Starting gdbstub using device %s"
diff --git a/util/aio-wait.c b/util/aio-wait.c
index 975afddf4c..b8a8f86dba 100644
--- a/util/aio-wait.c
+++ b/util/aio-wait.c
@@ -34,7 +34,7 @@ static void dummy_bh_cb(void *opaque)
 void aio_wait_kick(AioWait *wait)
 {
     /* The barrier (or an atomic op) is in the caller.  */
-    if (atomic_read(&wait->need_kick)) {
+    if (atomic_read(&wait->num_waiters)) {
         aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
     }
 }