summary refs log tree commit diff stats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/block/block_int.h2
-rw-r--r--include/block/blockjob.h324
-rw-r--r--include/block/blockjob_int.h176
-rw-r--r--include/hw/i386/intel_iommu.h19
-rw-r--r--include/hw/mem/nvdimm.h2
-rw-r--r--include/hw/pci/pci.h1
-rw-r--r--include/hw/virtio/virtio-bus.h2
-rw-r--r--include/hw/virtio/virtio.h2
-rw-r--r--include/hw/xen/xen_backend.h34
-rw-r--r--include/hw/xen/xen_common.h17
-rw-r--r--include/qemu/iova-tree.h134
-rw-r--r--include/qemu/job.h562
-rw-r--r--include/standard-headers/asm-x86/kvm_para.h121
-rw-r--r--include/standard-headers/linux/virtio_balloon.h4
-rw-r--r--include/sysemu/kvm.h1
15 files changed, 924 insertions, 477 deletions
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 76b589da57..6c0927bce3 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1029,7 +1029,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
                             BlockdevOnError on_target_error,
                             int creation_flags,
                             BlockCompletionFunc *cb, void *opaque,
-                            BlockJobTxn *txn, Error **errp);
+                            JobTxn *txn, Error **errp);
 
 void hmp_drive_add_node(Monitor *mon, const char *optstr);
 
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 0f56f723de..32c00b7dc0 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -26,13 +26,13 @@
 #ifndef BLOCKJOB_H
 #define BLOCKJOB_H
 
+#include "qemu/job.h"
 #include "block/block.h"
 #include "qemu/ratelimit.h"
 
 #define BLOCK_JOB_SLICE_TIME 100000000ULL /* ns */
 
 typedef struct BlockJobDriver BlockJobDriver;
-typedef struct BlockJobTxn BlockJobTxn;
 
 /**
  * BlockJob:
@@ -40,141 +40,40 @@ typedef struct BlockJobTxn BlockJobTxn;
  * Long-running operation on a BlockDriverState.
  */
 typedef struct BlockJob {
-    /** The job type, including the job vtable.  */
-    const BlockJobDriver *driver;
+    /** Data belonging to the generic Job infrastructure */
+    Job job;
 
     /** The block device on which the job is operating.  */
     BlockBackend *blk;
 
-    /**
-     * The ID of the block job. May be NULL for internal jobs.
-     */
-    char *id;
-
-    /**
-     * The coroutine that executes the job.  If not NULL, it is
-     * reentered when busy is false and the job is cancelled.
-     */
-    Coroutine *co;
-
-    /**
-     * Set to true if the job should cancel itself.  The flag must
-     * always be tested just before toggling the busy flag from false
-     * to true.  After a job has been cancelled, it should only yield
-     * if #aio_poll will ("sooner or later") reenter the coroutine.
-     */
-    bool cancelled;
-
-    /**
-     * Set to true if the job should abort immediately without waiting
-     * for data to be in sync.
-     */
-    bool force;
-
-    /**
-     * Counter for pause request. If non-zero, the block job is either paused,
-     * or if busy == true will pause itself as soon as possible.
-     */
-    int pause_count;
-
-    /**
-     * Set to true if the job is paused by user.  Can be unpaused with the
-     * block-job-resume QMP command.
-     */
-    bool user_paused;
-
-    /**
-     * Set to false by the job while the coroutine has yielded and may be
-     * re-entered by block_job_enter().  There may still be I/O or event loop
-     * activity pending.  Accessed under block_job_mutex (in blockjob.c).
-     */
-    bool busy;
-
-    /**
-     * Set to true by the job while it is in a quiescent state, where
-     * no I/O or event loop activity is pending.
-     */
-    bool paused;
-
-    /**
-     * Set to true when the job is ready to be completed.
-     */
-    bool ready;
-
-    /**
-     * Set to true when the job has deferred work to the main loop.
-     */
-    bool deferred_to_main_loop;
-
-    /** Element of the list of block jobs */
-    QLIST_ENTRY(BlockJob) job_list;
-
     /** Status that is published by the query-block-jobs QMP API */
     BlockDeviceIoStatus iostatus;
 
-    /** Offset that is published by the query-block-jobs QMP API */
-    int64_t offset;
-
-    /** Length that is published by the query-block-jobs QMP API */
-    int64_t len;
-
     /** Speed that was set with @block_job_set_speed.  */
     int64_t speed;
 
     /** Rate limiting data structure for implementing @speed. */
     RateLimit limit;
 
-    /** The completion function that will be called when the job completes.  */
-    BlockCompletionFunc *cb;
-
     /** Block other operations when block job is running */
     Error *blocker;
 
-    /** BlockDriverStates that are involved in this block job */
-    GSList *nodes;
-
-    /** The opaque value that is passed to the completion function.  */
-    void *opaque;
-
-    /** Reference count of the block job */
-    int refcnt;
-
-    /** True when job has reported completion by calling block_job_completed. */
-    bool completed;
-
-    /** ret code passed to block_job_completed. */
-    int ret;
-
-    /**
-     * Timer that is used by @block_job_sleep_ns. Accessed under
-     * block_job_mutex (in blockjob.c).
-     */
-    QEMUTimer sleep_timer;
+    /** Called when a cancelled job is finalised. */
+    Notifier finalize_cancelled_notifier;
 
-    /** Current state; See @BlockJobStatus for details. */
-    BlockJobStatus status;
+    /** Called when a successfully completed job is finalised. */
+    Notifier finalize_completed_notifier;
 
-    /** True if this job should automatically finalize itself */
-    bool auto_finalize;
+    /** Called when the job transitions to PENDING */
+    Notifier pending_notifier;
 
-    /** True if this job should automatically dismiss itself */
-    bool auto_dismiss;
+    /** Called when the job transitions to READY */
+    Notifier ready_notifier;
 
-    BlockJobTxn *txn;
-    QLIST_ENTRY(BlockJob) txn_list;
+    /** BlockDriverStates that are involved in this block job */
+    GSList *nodes;
 } BlockJob;
 
-typedef enum BlockJobCreateFlags {
-    /* Default behavior */
-    BLOCK_JOB_DEFAULT = 0x00,
-    /* BlockJob is not QMP-created and should not send QMP events */
-    BLOCK_JOB_INTERNAL = 0x01,
-    /* BlockJob requires manual finalize step */
-    BLOCK_JOB_MANUAL_FINALIZE = 0x02,
-    /* BlockJob requires manual dismiss step */
-    BLOCK_JOB_MANUAL_DISMISS = 0x04,
-} BlockJobCreateFlags;
-
 /**
  * block_job_next:
  * @job: A block job, or %NULL.
@@ -231,78 +130,6 @@ void block_job_remove_all_bdrv(BlockJob *job);
 void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp);
 
 /**
- * block_job_start:
- * @job: A job that has not yet been started.
- *
- * Begins execution of a block job.
- * Takes ownership of one reference to the job object.
- */
-void block_job_start(BlockJob *job);
-
-/**
- * block_job_cancel:
- * @job: The job to be canceled.
- * @force: Quit a job without waiting for data to be in sync.
- *
- * Asynchronously cancel the specified job.
- */
-void block_job_cancel(BlockJob *job, bool force);
-
-/**
- * block_job_complete:
- * @job: The job to be completed.
- * @errp: Error object.
- *
- * Asynchronously complete the specified job.
- */
-void block_job_complete(BlockJob *job, Error **errp);
-
-
-/**
- * block_job_finalize:
- * @job: The job to fully commit and finish.
- * @errp: Error object.
- *
- * For jobs that have finished their work and are pending
- * awaiting explicit acknowledgement to commit their work,
- * This will commit that work.
- *
- * FIXME: Make the below statement universally true:
- * For jobs that support the manual workflow mode, all graph
- * changes that occur as a result will occur after this command
- * and before a successful reply.
- */
-void block_job_finalize(BlockJob *job, Error **errp);
-
-/**
- * block_job_dismiss:
- * @job: The job to be dismissed.
- * @errp: Error object.
- *
- * Remove a concluded job from the query list.
- */
-void block_job_dismiss(BlockJob **job, Error **errp);
-
-/**
- * block_job_progress_update:
- * @job: The job that has made progress
- * @done: How much progress the job made
- *
- * Updates the progress counter of the job.
- */
-void block_job_progress_update(BlockJob *job, uint64_t done);
-
-/**
- * block_job_progress_set_remaining:
- * @job: The job whose expected progress end value is set
- * @remaining: Expected end value of the progress counter of the job
- *
- * Sets the expected end value of the progress counter of a job so that a
- * completion percentage can be calculated when the progress is updated.
- */
-void block_job_progress_set_remaining(BlockJob *job, uint64_t remaining);
-
-/**
  * block_job_query:
  * @job: The job to get information about.
  *
@@ -311,78 +138,6 @@ void block_job_progress_set_remaining(BlockJob *job, uint64_t remaining);
 BlockJobInfo *block_job_query(BlockJob *job, Error **errp);
 
 /**
- * block_job_user_pause:
- * @job: The job to be paused.
- *
- * Asynchronously pause the specified job.
- * Do not allow a resume until a matching call to block_job_user_resume.
- */
-void block_job_user_pause(BlockJob *job, Error **errp);
-
-/**
- * block_job_paused:
- * @job: The job to query.
- *
- * Returns true if the job is user-paused.
- */
-bool block_job_user_paused(BlockJob *job);
-
-/**
- * block_job_user_resume:
- * @job: The job to be resumed.
- *
- * Resume the specified job.
- * Must be paired with a preceding block_job_user_pause.
- */
-void block_job_user_resume(BlockJob *job, Error **errp);
-
-/**
- * block_job_user_cancel:
- * @job: The job to be cancelled.
- * @force: Quit a job without waiting for data to be in sync.
- *
- * Cancels the specified job, but may refuse to do so if the
- * operation isn't currently meaningful.
- */
-void block_job_user_cancel(BlockJob *job, bool force, Error **errp);
-
-/**
- * block_job_cancel_sync:
- * @job: The job to be canceled.
- *
- * Synchronously cancel the job.  The completion callback is called
- * before the function returns.  The job may actually complete
- * instead of canceling itself; the circumstances under which this
- * happens depend on the kind of job that is active.
- *
- * Returns the return value from the job if the job actually completed
- * during the call, or -ECANCELED if it was canceled.
- */
-int block_job_cancel_sync(BlockJob *job);
-
-/**
- * block_job_cancel_sync_all:
- *
- * Synchronously cancels all jobs using block_job_cancel_sync().
- */
-void block_job_cancel_sync_all(void);
-
-/**
- * block_job_complete_sync:
- * @job: The job to be completed.
- * @errp: Error object which may be set by block_job_complete(); this is not
- *        necessarily set on every error, the job return value has to be
- *        checked as well.
- *
- * Synchronously complete the job.  The completion callback is called before the
- * function returns, unless it is NULL (which is permissible when using this
- * function).
- *
- * Returns the return value from the job.
- */
-int block_job_complete_sync(BlockJob *job, Error **errp);
-
-/**
  * block_job_iostatus_reset:
  * @job: The job whose I/O status should be reset.
  *
@@ -392,59 +147,6 @@ int block_job_complete_sync(BlockJob *job, Error **errp);
 void block_job_iostatus_reset(BlockJob *job);
 
 /**
- * block_job_txn_new:
- *
- * Allocate and return a new block job transaction.  Jobs can be added to the
- * transaction using block_job_txn_add_job().
- *
- * The transaction is automatically freed when the last job completes or is
- * cancelled.
- *
- * All jobs in the transaction either complete successfully or fail/cancel as a
- * group.  Jobs wait for each other before completing.  Cancelling one job
- * cancels all jobs in the transaction.
- */
-BlockJobTxn *block_job_txn_new(void);
-
-/**
- * block_job_ref:
- *
- * Add a reference to BlockJob refcnt, it will be decreased with
- * block_job_unref, and then be freed if it comes to be the last
- * reference.
- */
-void block_job_ref(BlockJob *job);
-
-/**
- * block_job_unref:
- *
- * Release a reference that was previously acquired with block_job_ref
- * or block_job_create. If it's the last reference to the object, it will be
- * freed.
- */
-void block_job_unref(BlockJob *job);
-
-/**
- * block_job_txn_unref:
- *
- * Release a reference that was previously acquired with block_job_txn_add_job
- * or block_job_txn_new. If it's the last reference to the object, it will be
- * freed.
- */
-void block_job_txn_unref(BlockJobTxn *txn);
-
-/**
- * block_job_txn_add_job:
- * @txn: The transaction (may be NULL)
- * @job: Job to add to the transaction
- *
- * Add @job to the transaction.  The @job must not already be in a transaction.
- * The caller must call either block_job_txn_unref() or block_job_completed()
- * to release the reference that is automatically grabbed here.
- */
-void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job);
-
-/**
  * block_job_is_internal:
  * @job: The job to determine if it is user-visible or not.
  *
diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
index 62ec964d09..5cd50c6639 100644
--- a/include/block/blockjob_int.h
+++ b/include/block/blockjob_int.h
@@ -35,72 +35,8 @@
  * A class type for block job driver.
  */
 struct BlockJobDriver {
-    /** Derived BlockJob struct size */
-    size_t instance_size;
-
-    /** String describing the operation, part of query-block-jobs QMP API */
-    BlockJobType job_type;
-
-    /** Mandatory: Entrypoint for the Coroutine. */
-    CoroutineEntry *start;
-
-    /**
-     * Optional callback for job types whose completion must be triggered
-     * manually.
-     */
-    void (*complete)(BlockJob *job, Error **errp);
-
-    /**
-     * If the callback is not NULL, prepare will be invoked when all the jobs
-     * belonging to the same transaction complete; or upon this job's completion
-     * if it is not in a transaction.
-     *
-     * This callback will not be invoked if the job has already failed.
-     * If it fails, abort and then clean will be called.
-     */
-    int (*prepare)(BlockJob *job);
-
-    /**
-     * If the callback is not NULL, it will be invoked when all the jobs
-     * belonging to the same transaction complete; or upon this job's
-     * completion if it is not in a transaction. Skipped if NULL.
-     *
-     * All jobs will complete with a call to either .commit() or .abort() but
-     * never both.
-     */
-    void (*commit)(BlockJob *job);
-
-    /**
-     * If the callback is not NULL, it will be invoked when any job in the
-     * same transaction fails; or upon this job's failure (due to error or
-     * cancellation) if it is not in a transaction. Skipped if NULL.
-     *
-     * All jobs will complete with a call to either .commit() or .abort() but
-     * never both.
-     */
-    void (*abort)(BlockJob *job);
-
-    /**
-     * If the callback is not NULL, it will be invoked after a call to either
-     * .commit() or .abort(). Regardless of which callback is invoked after
-     * completion, .clean() will always be called, even if the job does not
-     * belong to a transaction group.
-     */
-    void (*clean)(BlockJob *job);
-
-    /**
-     * If the callback is not NULL, it will be invoked when the job transitions
-     * into the paused state.  Paused jobs must not perform any asynchronous
-     * I/O or event loop activity.  This callback is used to quiesce jobs.
-     */
-    void coroutine_fn (*pause)(BlockJob *job);
-
-    /**
-     * If the callback is not NULL, it will be invoked when the job transitions
-     * out of the paused state.  Any asynchronous I/O or event loop activity
-     * should be restarted from this callback.
-     */
-    void coroutine_fn (*resume)(BlockJob *job);
+    /** Generic JobDriver callbacks and settings */
+    JobDriver job_driver;
 
     /*
      * If the callback is not NULL, it will be invoked before the job is
@@ -113,6 +49,10 @@ struct BlockJobDriver {
      * If the callback is not NULL, it will be invoked when the job has to be
      * synchronously cancelled or completed; it should drain BlockDriverStates
      * as required to ensure progress.
+     *
+     * Block jobs must use the default implementation for job_driver.drain,
+     * which will in turn call this callback after doing generic block job
+     * stuff.
      */
     void (*drain)(BlockJob *job);
 };
@@ -126,8 +66,7 @@ struct BlockJobDriver {
  * @bs: The block
  * @perm, @shared_perm: Permissions to request for @bs
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @flags: Creation flags for the Block Job.
- *         See @BlockJobCreateFlags
+ * @flags: Creation flags for the Block Job. See @JobCreateFlags.
  * @cb: Completion function for the job.
  * @opaque: Opaque pointer value passed to @cb.
  * @errp: Error object.
@@ -142,28 +81,31 @@ struct BlockJobDriver {
  * called from a wrapper that is specific to the job type.
  */
 void *block_job_create(const char *job_id, const BlockJobDriver *driver,
-                       BlockJobTxn *txn, BlockDriverState *bs, uint64_t perm,
+                       JobTxn *txn, BlockDriverState *bs, uint64_t perm,
                        uint64_t shared_perm, int64_t speed, int flags,
                        BlockCompletionFunc *cb, void *opaque, Error **errp);
 
 /**
- * block_job_sleep_ns:
- * @job: The job that calls the function.
- * @ns: How many nanoseconds to stop for.
- *
- * Put the job to sleep (assuming that it wasn't canceled) for @ns
- * %QEMU_CLOCK_REALTIME nanoseconds.  Canceling the job will immediately
- * interrupt the wait.
+ * block_job_free:
+ * Callback to be used for JobDriver.free in all block jobs. Frees block job
+ * specific resources in @job.
  */
-void block_job_sleep_ns(BlockJob *job, int64_t ns);
+void block_job_free(Job *job);
 
 /**
- * block_job_yield:
- * @job: The job that calls the function.
- *
- * Yield the block job coroutine.
+ * block_job_user_resume:
+ * Callback to be used for JobDriver.user_resume in all block jobs. Resets the
+ * iostatus when the user resumes @job.
+ */
+void block_job_user_resume(Job *job);
+
+/**
+ * block_job_drain:
+ * Callback to be used for JobDriver.drain in all block jobs. Drains the main
+ * block node associated with the block jobs and calls BlockJobDriver.drain for
+ * job-specific actions.
  */
-void block_job_yield(BlockJob *job);
+void block_job_drain(Job *job);
 
 /**
  * block_job_ratelimit_get_delay:
@@ -174,57 +116,6 @@ void block_job_yield(BlockJob *job);
 int64_t block_job_ratelimit_get_delay(BlockJob *job, uint64_t n);
 
 /**
- * block_job_early_fail:
- * @bs: The block device.
- *
- * The block job could not be started, free it.
- */
-void block_job_early_fail(BlockJob *job);
-
-/**
- * block_job_completed:
- * @job: The job being completed.
- * @ret: The status code.
- *
- * Call the completion function that was registered at creation time, and
- * free @job.
- */
-void block_job_completed(BlockJob *job, int ret);
-
-/**
- * block_job_is_cancelled:
- * @job: The job being queried.
- *
- * Returns whether the job is scheduled for cancellation.
- */
-bool block_job_is_cancelled(BlockJob *job);
-
-/**
- * block_job_pause_point:
- * @job: The job that is ready to pause.
- *
- * Pause now if block_job_pause() has been called.  Block jobs that perform
- * lots of I/O must call this between requests so that the job can be paused.
- */
-void coroutine_fn block_job_pause_point(BlockJob *job);
-
-/**
- * block_job_enter:
- * @job: The job to enter.
- *
- * Continue the specified job by entering the coroutine.
- */
-void block_job_enter(BlockJob *job);
-
-/**
- * block_job_event_ready:
- * @job: The job which is now ready to be completed.
- *
- * Send a BLOCK_JOB_READY event for the specified job.
- */
-void block_job_event_ready(BlockJob *job);
-
-/**
  * block_job_error_action:
  * @job: The job to signal an error for.
  * @on_err: The error action setting.
@@ -237,23 +128,4 @@ void block_job_event_ready(BlockJob *job);
 BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
                                         int is_read, int error);
 
-typedef void BlockJobDeferToMainLoopFn(BlockJob *job, void *opaque);
-
-/**
- * block_job_defer_to_main_loop:
- * @job: The job
- * @fn: The function to run in the main loop
- * @opaque: The opaque value that is passed to @fn
- *
- * This function must be called by the main job coroutine just before it
- * returns.  @fn is executed in the main loop with the BlockDriverState
- * AioContext acquired.  Block jobs must call bdrv_unref(), bdrv_close(), and
- * anything that uses bdrv_drain_all() in the main loop.
- *
- * The @job AioContext is held while @fn executes.
- */
-void block_job_defer_to_main_loop(BlockJob *job,
-                                  BlockJobDeferToMainLoopFn *fn,
-                                  void *opaque);
-
 #endif
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 45ec8919b6..fbfedcb1c0 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -27,6 +27,7 @@
 #include "hw/i386/ioapic.h"
 #include "hw/pci/msi.h"
 #include "hw/sysbus.h"
+#include "qemu/iova-tree.h"
 
 #define TYPE_INTEL_IOMMU_DEVICE "intel-iommu"
 #define INTEL_IOMMU_DEVICE(obj) \
@@ -67,7 +68,6 @@ typedef union VTD_IR_TableEntry VTD_IR_TableEntry;
 typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress;
 typedef struct VTDIrq VTDIrq;
 typedef struct VTD_MSIMessage VTD_MSIMessage;
-typedef struct IntelIOMMUNotifierNode IntelIOMMUNotifierNode;
 
 /* Context-Entry */
 struct VTDContextEntry {
@@ -93,6 +93,10 @@ struct VTDAddressSpace {
     MemoryRegion iommu_ir;      /* Interrupt region: 0xfeeXXXXX */
     IntelIOMMUState *iommu_state;
     VTDContextCacheEntry context_cache_entry;
+    QLIST_ENTRY(VTDAddressSpace) next;
+    /* Superset of notifier flags that this address space has */
+    IOMMUNotifierFlag notifier_flags;
+    IOVATree *iova_tree;          /* Traces mapped IOVA ranges */
 };
 
 struct VTDBus {
@@ -253,11 +257,6 @@ struct VTD_MSIMessage {
 /* When IR is enabled, all MSI/MSI-X data bits should be zero */
 #define VTD_IR_MSI_DATA          (0)
 
-struct IntelIOMMUNotifierNode {
-    VTDAddressSpace *vtd_as;
-    QLIST_ENTRY(IntelIOMMUNotifierNode) next;
-};
-
 /* The iommu (DMAR) device state struct */
 struct IntelIOMMUState {
     X86IOMMUState x86_iommu;
@@ -295,7 +294,7 @@ struct IntelIOMMUState {
     GHashTable *vtd_as_by_busptr;   /* VTDBus objects indexed by PCIBus* reference */
     VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects indexed by bus number */
     /* list of registered notifiers */
-    QLIST_HEAD(, IntelIOMMUNotifierNode) notifiers_list;
+    QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
 
     /* interrupt remapping */
     bool intr_enabled;              /* Whether guest enabled IR */
@@ -305,6 +304,12 @@ struct IntelIOMMUState {
     OnOffAuto intr_eim;             /* Toggle for EIM cabability */
     bool buggy_eim;                 /* Force buggy EIM unless eim=off */
     uint8_t aw_bits;                /* Host/IOVA address width (in bits) */
+
+    /*
+     * Protects IOMMU states in general.  Currently it protects the
+     * per-IOMMU IOTLB cache, and context entry cache in VTDAddressSpace.
+     */
+    QemuMutex iommu_lock;
 };
 
 /* Find the VTD Address space associated with the given bus pointer,
diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h
index 7fd87c4e1c..74c60332e1 100644
--- a/include/hw/mem/nvdimm.h
+++ b/include/hw/mem/nvdimm.h
@@ -48,7 +48,7 @@
 #define NVDIMM_GET_CLASS(obj) OBJECT_GET_CLASS(NVDIMMClass, (obj), \
                                                TYPE_NVDIMM)
 
-#define NVDIMM_LABLE_SIZE_PROP "label-size"
+#define NVDIMM_LABEL_SIZE_PROP "label-size"
 #define NVDIMM_UNARMED_PROP    "unarmed"
 
 struct NVDIMMDevice {
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index a9c3ee5aa2..990d6fcbde 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -101,6 +101,7 @@ extern bool pci_available;
 #define PCI_DEVICE_ID_REDHAT_PCIE_RP     0x000c
 #define PCI_DEVICE_ID_REDHAT_XHCI        0x000d
 #define PCI_DEVICE_ID_REDHAT_PCIE_BRIDGE 0x000e
+#define PCI_DEVICE_ID_REDHAT_MDPY        0x000f
 #define PCI_DEVICE_ID_REDHAT_QXL         0x0100
 
 #define FMT_PCIBUS                      PRIx64
diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h
index ced3d2d2b0..7fec9dc929 100644
--- a/include/hw/virtio/virtio-bus.h
+++ b/include/hw/virtio/virtio-bus.h
@@ -52,6 +52,8 @@ typedef struct VirtioBusClass {
     bool (*has_extra_state)(DeviceState *d);
     bool (*query_guest_notifiers)(DeviceState *d);
     int (*set_guest_notifiers)(DeviceState *d, int nvqs, bool assign);
+    int (*set_host_notifier_mr)(DeviceState *d, int n,
+                                MemoryRegion *mr, bool assign);
     void (*vmstate_change)(DeviceState *d, bool running);
     /*
      * Expose the features the transport layer supports before
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 098bdaaea3..9c1fa07d6d 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -239,6 +239,8 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align);
 void virtio_queue_notify(VirtIODevice *vdev, int n);
 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n);
 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector);
+int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
+                                      MemoryRegion *mr, bool assign);
 int virtio_set_status(VirtIODevice *vdev, uint8_t val);
 void virtio_reset(void *opaque);
 void virtio_update_irq(VirtIODevice *vdev);
diff --git a/include/hw/xen/xen_backend.h b/include/hw/xen/xen_backend.h
index 3a27692407..9c17fdd85d 100644
--- a/include/hw/xen/xen_backend.h
+++ b/include/hw/xen/xen_backend.h
@@ -16,7 +16,6 @@
 /* variables */
 extern struct xs_handle *xenstore;
 extern const char *xen_protocol;
-extern bool xen_feature_grant_copy;
 extern DeviceState *xen_sysdev;
 extern BusState *xen_sysbus;
 
@@ -42,6 +41,39 @@ void xen_be_register_common(void);
 int xen_be_register(const char *type, struct XenDevOps *ops);
 int xen_be_set_state(struct XenDevice *xendev, enum xenbus_state state);
 int xen_be_bind_evtchn(struct XenDevice *xendev);
+void xen_be_set_max_grant_refs(struct XenDevice *xendev,
+                               unsigned int nr_refs);
+void *xen_be_map_grant_refs(struct XenDevice *xendev, uint32_t *refs,
+                            unsigned int nr_refs, int prot);
+void xen_be_unmap_grant_refs(struct XenDevice *xendev, void *ptr,
+                             unsigned int nr_refs);
+
+typedef struct XenGrantCopySegment {
+    union {
+        void *virt;
+        struct {
+            uint32_t ref;
+            off_t offset;
+        } foreign;
+    } source, dest;
+    size_t len;
+} XenGrantCopySegment;
+
+int xen_be_copy_grant_refs(struct XenDevice *xendev,
+                           bool to_domain, XenGrantCopySegment segs[],
+                           unsigned int nr_segs);
+
+static inline void *xen_be_map_grant_ref(struct XenDevice *xendev,
+                                         uint32_t ref, int prot)
+{
+    return xen_be_map_grant_refs(xendev, &ref, 1, prot);
+}
+
+static inline void xen_be_unmap_grant_ref(struct XenDevice *xendev,
+                                          void *ptr)
+{
+    return xen_be_unmap_grant_refs(xendev, ptr, 1);
+}
 
 /* actual backend drivers */
 extern struct XenDevOps xen_console_ops;      /* xen_console.c     */
diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h
index 5f1402b494..bbf207dcef 100644
--- a/include/hw/xen/xen_common.h
+++ b/include/hw/xen/xen_common.h
@@ -667,8 +667,21 @@ static inline int xen_domain_create(xc_interface *xc, uint32_t ssidref,
 
 #if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40800
 
-
-typedef void *xengnttab_grant_copy_segment_t;
+struct xengnttab_grant_copy_segment {
+    union xengnttab_copy_ptr {
+        void *virt;
+        struct {
+            uint32_t ref;
+            uint16_t offset;
+            uint16_t domid;
+        } foreign;
+    } source, dest;
+    uint16_t len;
+    uint16_t flags;
+    int16_t status;
+};
+
+typedef struct xengnttab_grant_copy_segment xengnttab_grant_copy_segment_t;
 
 static inline int xengnttab_grant_copy(xengnttab_handle *xgt, uint32_t count,
                                        xengnttab_grant_copy_segment_t *segs)
diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h
new file mode 100644
index 0000000000..b061932097
--- /dev/null
+++ b/include/qemu/iova-tree.h
@@ -0,0 +1,134 @@
+/*
+ * An very simplified iova tree implementation based on GTree.
+ *
+ * Copyright 2018 Red Hat, Inc.
+ *
+ * Authors:
+ *  Peter Xu <peterx@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ */
+#ifndef IOVA_TREE_H
+#define IOVA_TREE_H
+
+/*
+ * Currently the iova tree will only allow to keep ranges
+ * information, and no extra user data is allowed for each element.  A
+ * benefit is that we can merge adjacent ranges internally within the
+ * tree.  It can save a lot of memory when the ranges are splitted but
+ * mostly continuous.
+ *
+ * Note that current implementation does not provide any thread
+ * protections.  Callers of the iova tree should be responsible
+ * for the thread safety issue.
+ */
+
+#include "qemu/osdep.h"
+#include "exec/memory.h"
+#include "exec/hwaddr.h"
+
+#define  IOVA_OK           (0)
+#define  IOVA_ERR_INVALID  (-1) /* Invalid parameters */
+#define  IOVA_ERR_OVERLAP  (-2) /* IOVA range overlapped */
+
+typedef struct IOVATree IOVATree;
+typedef struct DMAMap {
+    hwaddr iova;
+    hwaddr translated_addr;
+    hwaddr size;                /* Inclusive */
+    IOMMUAccessFlags perm;
+} QEMU_PACKED DMAMap;
+typedef gboolean (*iova_tree_iterator)(DMAMap *map);
+
+/**
+ * iova_tree_new:
+ *
+ * Create a new iova tree.
+ *
+ * Returns: the tree pointer when succeeded, or NULL if error.
+ */
+IOVATree *iova_tree_new(void);
+
+/**
+ * iova_tree_insert:
+ *
+ * @tree: the iova tree to insert
+ * @map: the mapping to insert
+ *
+ * Insert an iova range to the tree.  If there is overlapped
+ * ranges, IOVA_ERR_OVERLAP will be returned.
+ *
+ * Return: 0 if succeeded, or <0 if error.
+ */
+int iova_tree_insert(IOVATree *tree, DMAMap *map);
+
+/**
+ * iova_tree_remove:
+ *
+ * @tree: the iova tree to remove range from
+ * @map: the map range to remove
+ *
+ * Remove mappings from the tree that are covered by the map range
+ * provided.  The range does not need to be exactly what has inserted,
+ * all the mappings that are included in the provided range will be
+ * removed from the tree.  Here map->translated_addr is meaningless.
+ *
+ * Return: 0 if succeeded, or <0 if error.
+ */
+int iova_tree_remove(IOVATree *tree, DMAMap *map);
+
+/**
+ * iova_tree_find:
+ *
+ * @tree: the iova tree to search from
+ * @map: the mapping to search
+ *
+ * Search for a mapping in the iova tree that overlaps with the
+ * mapping range specified.  Only the first found mapping will be
+ * returned.
+ *
+ * Return: DMAMap pointer if found, or NULL if not found.  Note that
+ * the returned DMAMap pointer is maintained internally.  User should
+ * only read the content but never modify or free the content.  Also,
+ * user is responsible to make sure the pointer is valid (say, no
+ * concurrent deletion in progress).
+ */
+DMAMap *iova_tree_find(IOVATree *tree, DMAMap *map);
+
+/**
+ * iova_tree_find_address:
+ *
+ * @tree: the iova tree to search from
+ * @iova: the iova address to find
+ *
+ * Similar to iova_tree_find(), but it tries to find mapping with
+ * range iova=iova & size=0.
+ *
+ * Return: same as iova_tree_find().
+ */
+DMAMap *iova_tree_find_address(IOVATree *tree, hwaddr iova);
+
+/**
+ * iova_tree_foreach:
+ *
+ * @tree: the iova tree to iterate on
+ * @iterator: the interator for the mappings, return true to stop
+ *
+ * Iterate over the iova tree.
+ *
+ * Return: 1 if found any overlap, 0 if not, <0 if error.
+ */
+void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator);
+
+/**
+ * iova_tree_destroy:
+ *
+ * @tree: the iova tree to destroy
+ *
+ * Destroy an existing iova tree.
+ *
+ * Return: None.
+ */
+void iova_tree_destroy(IOVATree *tree);
+
+#endif
diff --git a/include/qemu/job.h b/include/qemu/job.h
new file mode 100644
index 0000000000..8c8badf75e
--- /dev/null
+++ b/include/qemu/job.h
@@ -0,0 +1,562 @@
+/*
+ * Declarations for background jobs
+ *
+ * Copyright (c) 2011 IBM Corp.
+ * Copyright (c) 2012, 2018 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef JOB_H
+#define JOB_H
+
+#include "qapi/qapi-types-block-core.h"
+#include "qemu/queue.h"
+#include "qemu/coroutine.h"
+#include "block/aio.h"
+
+typedef struct JobDriver JobDriver;
+typedef struct JobTxn JobTxn;
+
+
+/**
+ * Long-running operation.
+ */
+typedef struct Job {
+    /** The ID of the job. May be NULL for internal jobs. */
+    char *id;
+
+    /** The type of this job. */
+    const JobDriver *driver;
+
+    /** Reference count of the block job */
+    int refcnt;
+
+    /** Current state; See @JobStatus for details. */
+    JobStatus status;
+
+    /** AioContext to run the job coroutine in */
+    AioContext *aio_context;
+
+    /**
+     * The coroutine that executes the job.  If not NULL, it is reentered when
+     * busy is false and the job is cancelled.
+     */
+    Coroutine *co;
+
+    /**
+     * Timer that is used by @job_sleep_ns. Accessed under job_mutex (in
+     * job.c).
+     */
+    QEMUTimer sleep_timer;
+
+    /**
+     * Counter for pause request. If non-zero, the block job is either paused,
+     * or if busy == true will pause itself as soon as possible.
+     */
+    int pause_count;
+
+    /**
+     * Set to false by the job while the coroutine has yielded and may be
+     * re-entered by job_enter(). There may still be I/O or event loop activity
+     * pending. Accessed under block_job_mutex (in blockjob.c).
+     */
+    bool busy;
+
+    /**
+     * Set to true by the job while it is in a quiescent state, where
+     * no I/O or event loop activity is pending.
+     */
+    bool paused;
+
+    /**
+     * Set to true if the job is paused by user.  Can be unpaused with the
+     * block-job-resume QMP command.
+     */
+    bool user_paused;
+
+    /**
+     * Set to true if the job should cancel itself.  The flag must
+     * always be tested just before toggling the busy flag from false
+     * to true.  After a job has been cancelled, it should only yield
+     * if #aio_poll will ("sooner or later") reenter the coroutine.
+     */
+    bool cancelled;
+
+    /**
+     * Set to true if the job should abort immediately without waiting
+     * for data to be in sync.
+     */
+    bool force_cancel;
+
+    /** Set to true when the job has deferred work to the main loop. */
+    bool deferred_to_main_loop;
+
+    /** True if this job should automatically finalize itself */
+    bool auto_finalize;
+
+    /** True if this job should automatically dismiss itself */
+    bool auto_dismiss;
+
+    /**
+     * Current progress. The unit is arbitrary as long as the ratio between
+     * progress_current and progress_total represents the estimated percentage
+     * of work already done.
+     */
+    int64_t progress_current;
+
+    /** Estimated progress_current value at the completion of the job */
+    int64_t progress_total;
+
+    /** ret code passed to job_completed. */
+    int ret;
+
+    /** The completion function that will be called when the job completes.  */
+    BlockCompletionFunc *cb;
+
+    /** The opaque value that is passed to the completion function.  */
+    void *opaque;
+
+    /** Notifiers called when a cancelled job is finalised */
+    NotifierList on_finalize_cancelled;
+
+    /** Notifiers called when a successfully completed job is finalised */
+    NotifierList on_finalize_completed;
+
+    /** Notifiers called when the job transitions to PENDING */
+    NotifierList on_pending;
+
+    /** Notifiers called when the job transitions to READY */
+    NotifierList on_ready;
+
+    /** Element of the list of jobs */
+    QLIST_ENTRY(Job) job_list;
+
+    /** Transaction this job is part of */
+    JobTxn *txn;
+
+    /** Element of the list of jobs in a job transaction */
+    QLIST_ENTRY(Job) txn_list;
+} Job;
+
+/**
+ * Callbacks and other information about a Job driver.
+ */
+struct JobDriver {
+    /** Derived Job struct size */
+    size_t instance_size;
+
+    /** Enum describing the operation */
+    JobType job_type;
+
+    /** Mandatory: Entrypoint for the Coroutine. */
+    CoroutineEntry *start;
+
+    /**
+     * If the callback is not NULL, it will be invoked when the job transitions
+     * into the paused state.  Paused jobs must not perform any asynchronous
+     * I/O or event loop activity.  This callback is used to quiesce jobs.
+     */
+    void coroutine_fn (*pause)(Job *job);
+
+    /**
+     * If the callback is not NULL, it will be invoked when the job transitions
+     * out of the paused state.  Any asynchronous I/O or event loop activity
+     * should be restarted from this callback.
+     */
+    void coroutine_fn (*resume)(Job *job);
+
+    /**
+     * Called when the job is resumed by the user (i.e. user_paused becomes
+     * false). .user_resume is called before .resume.
+     */
+    void (*user_resume)(Job *job);
+
+    /**
+     * Optional callback for job types whose completion must be triggered
+     * manually.
+     */
+    void (*complete)(Job *job, Error **errp);
+
+    /*
+     * If the callback is not NULL, it will be invoked when the job has to be
+     * synchronously cancelled or completed; it should drain any activities
+     * as required to ensure progress.
+     */
+    void (*drain)(Job *job);
+
+    /**
+     * If the callback is not NULL, prepare will be invoked when all the jobs
+     * belonging to the same transaction complete; or upon this job's completion
+     * if it is not in a transaction.
+     *
+     * This callback will not be invoked if the job has already failed.
+     * If it fails, abort and then clean will be called.
+     */
+    int (*prepare)(Job *job);
+
+    /**
+     * If the callback is not NULL, it will be invoked when all the jobs
+     * belonging to the same transaction complete; or upon this job's
+     * completion if it is not in a transaction. Skipped if NULL.
+     *
+     * All jobs will complete with a call to either .commit() or .abort() but
+     * never both.
+     */
+    void (*commit)(Job *job);
+
+    /**
+     * If the callback is not NULL, it will be invoked when any job in the
+     * same transaction fails; or upon this job's failure (due to error or
+     * cancellation) if it is not in a transaction. Skipped if NULL.
+     *
+     * All jobs will complete with a call to either .commit() or .abort() but
+     * never both.
+     */
+    void (*abort)(Job *job);
+
+    /**
+     * If the callback is not NULL, it will be invoked after a call to either
+     * .commit() or .abort(). Regardless of which callback is invoked after
+     * completion, .clean() will always be called, even if the job does not
+     * belong to a transaction group.
+     */
+    void (*clean)(Job *job);
+
+
+    /** Called when the job is freed */
+    void (*free)(Job *job);
+};
+
+typedef enum JobCreateFlags {
+    /* Default behavior */
+    JOB_DEFAULT = 0x00,
+    /* Job is not QMP-created and should not send QMP events */
+    JOB_INTERNAL = 0x01,
+    /* Job requires manual finalize step */
+    JOB_MANUAL_FINALIZE = 0x02,
+    /* Job requires manual dismiss step */
+    JOB_MANUAL_DISMISS = 0x04,
+} JobCreateFlags;
+
+/**
+ * Allocate and return a new job transaction. Jobs can be added to the
+ * transaction using job_txn_add_job().
+ *
+ * The transaction is automatically freed when the last job completes or is
+ * cancelled.
+ *
+ * All jobs in the transaction either complete successfully or fail/cancel as a
+ * group.  Jobs wait for each other before completing.  Cancelling one job
+ * cancels all jobs in the transaction.
+ */
+JobTxn *job_txn_new(void);
+
+/**
+ * Release a reference that was previously acquired with job_txn_add_job or
+ * job_txn_new. If it's the last reference to the object, it will be freed.
+ */
+void job_txn_unref(JobTxn *txn);
+
+/**
+ * @txn: The transaction (may be NULL)
+ * @job: Job to add to the transaction
+ *
+ * Add @job to the transaction.  The @job must not already be in a transaction.
+ * The caller must call either job_txn_unref() or job_completed() to release
+ * the reference that is automatically grabbed here.
+ *
+ * If @txn is NULL, the function does nothing.
+ */
+void job_txn_add_job(JobTxn *txn, Job *job);
+
+/**
+ * Create a new long-running job and return it.
+ *
+ * @job_id: The id of the newly-created job, or %NULL for internal jobs
+ * @driver: The class object for the newly-created job.
+ * @txn: The transaction this job belongs to, if any. %NULL otherwise.
+ * @ctx: The AioContext to run the job coroutine in.
+ * @flags: Creation flags for the job. See @JobCreateFlags.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ */
+void *job_create(const char *job_id, const JobDriver *driver, JobTxn *txn,
+                 AioContext *ctx, int flags, BlockCompletionFunc *cb,
+                 void *opaque, Error **errp);
+
+/**
+ * Add a reference to Job refcnt, it will be decreased with job_unref, and then
+ * be freed if it comes to be the last reference.
+ */
+void job_ref(Job *job);
+
+/**
+ * Release a reference that was previously acquired with job_ref() or
+ * job_create(). If it's the last reference to the object, it will be freed.
+ */
+void job_unref(Job *job);
+
+/**
+ * @job: The job that has made progress
+ * @done: How much progress the job made since the last call
+ *
+ * Updates the progress counter of the job.
+ */
+void job_progress_update(Job *job, uint64_t done);
+
+/**
+ * @job: The job whose expected progress end value is set
+ * @remaining: Missing progress (on top of the current progress counter value)
+ *             until the new expected end value is reached
+ *
+ * Sets the expected end value of the progress counter of a job so that a
+ * completion percentage can be calculated when the progress is updated.
+ */
+void job_progress_set_remaining(Job *job, uint64_t remaining);
+
+/** To be called when a cancelled job is finalised. */
+void job_event_cancelled(Job *job);
+
+/** To be called when a successfully completed job is finalised. */
+void job_event_completed(Job *job);
+
+/**
+ * Conditionally enter the job coroutine if the job is ready to run, not
+ * already busy and fn() returns true. fn() is called while under the job_lock
+ * critical section.
+ */
+void job_enter_cond(Job *job, bool(*fn)(Job *job));
+
+/**
+ * @job: A job that has not yet been started.
+ *
+ * Begins execution of a job.
+ * Takes ownership of one reference to the job object.
+ */
+void job_start(Job *job);
+
+/**
+ * @job: The job to enter.
+ *
+ * Continue the specified job by entering the coroutine.
+ */
+void job_enter(Job *job);
+
+/**
+ * @job: The job that is ready to pause.
+ *
+ * Pause now if job_pause() has been called. Jobs that perform lots of I/O
+ * must call this between requests so that the job can be paused.
+ */
+void coroutine_fn job_pause_point(Job *job);
+
+/**
+ * @job: The job that calls the function.
+ *
+ * Yield the job coroutine.
+ */
+void job_yield(Job *job);
+
+/**
+ * @job: The job that calls the function.
+ * @ns: How many nanoseconds to stop for.
+ *
+ * Put the job to sleep (assuming that it wasn't canceled) for @ns
+ * %QEMU_CLOCK_REALTIME nanoseconds.  Canceling the job will immediately
+ * interrupt the wait.
+ */
+void coroutine_fn job_sleep_ns(Job *job, int64_t ns);
+
+
+/** Returns the JobType of a given Job. */
+JobType job_type(const Job *job);
+
+/** Returns the enum string for the JobType of a given Job. */
+const char *job_type_str(const Job *job);
+
+/** Returns true if the job should not be visible to the management layer. */
+bool job_is_internal(Job *job);
+
+/** Returns whether the job is scheduled for cancellation. */
+bool job_is_cancelled(Job *job);
+
+/** Returns whether the job is in a completed state. */
+bool job_is_completed(Job *job);
+
+/** Returns whether the job is ready to be completed. */
+bool job_is_ready(Job *job);
+
+/**
+ * Request @job to pause at the next pause point. Must be paired with
+ * job_resume(). If the job is supposed to be resumed by user action, call
+ * job_user_pause() instead.
+ */
+void job_pause(Job *job);
+
+/** Resumes a @job paused with job_pause. */
+void job_resume(Job *job);
+
+/**
+ * Asynchronously pause the specified @job.
+ * Do not allow a resume until a matching call to job_user_resume.
+ */
+void job_user_pause(Job *job, Error **errp);
+
+/** Returns true if the job is user-paused. */
+bool job_user_paused(Job *job);
+
+/**
+ * Resume the specified @job.
+ * Must be paired with a preceding job_user_pause.
+ */
+void job_user_resume(Job *job, Error **errp);
+
+/*
+ * Drain any activities as required to ensure progress. This can be called in a
+ * loop to synchronously complete a job.
+ */
+void job_drain(Job *job);
+
+/**
+ * Get the next element from the list of block jobs after @job, or the
+ * first one if @job is %NULL.
+ *
+ * Returns the requested job, or %NULL if there are no more jobs left.
+ */
+Job *job_next(Job *job);
+
+/**
+ * Get the job identified by @id (which must not be %NULL).
+ *
+ * Returns the requested job, or %NULL if it doesn't exist.
+ */
+Job *job_get(const char *id);
+
+/**
+ * Check whether the verb @verb can be applied to @job in its current state.
+ * Returns 0 if the verb can be applied; otherwise errp is set and -EPERM
+ * returned.
+ */
+int job_apply_verb(Job *job, JobVerb verb, Error **errp);
+
+/** The @job could not be started, free it. */
+void job_early_fail(Job *job);
+
+/** Moves the @job from RUNNING to READY */
+void job_transition_to_ready(Job *job);
+
+/**
+ * @job: The job being completed.
+ * @ret: The status code.
+ *
+ * Marks @job as completed. If @ret is non-zero, the job transaction it is part
+ * of is aborted. If @ret is zero, the job moves into the WAITING state. If it
+ * is the last job to complete in its transaction, all jobs in the transaction
+ * move from WAITING to PENDING.
+ */
+void job_completed(Job *job, int ret);
+
+/** Asynchronously complete the specified @job. */
+void job_complete(Job *job, Error **errp);
+
+/**
+ * Asynchronously cancel the specified @job. If @force is true, the job should
+ * be cancelled immediately without waiting for a consistent state.
+ */
+void job_cancel(Job *job, bool force);
+
+/**
+ * Cancels the specified job like job_cancel(), but may refuse to do so if the
+ * operation isn't meaningful in the current state of the job.
+ */
+void job_user_cancel(Job *job, bool force, Error **errp);
+
+/**
+ * Synchronously cancel the @job.  The completion callback is called
+ * before the function returns.  The job may actually complete
+ * instead of canceling itself; the circumstances under which this
+ * happens depend on the kind of job that is active.
+ *
+ * Returns the return value from the job if the job actually completed
+ * during the call, or -ECANCELED if it was canceled.
+ */
+int job_cancel_sync(Job *job);
+
+/** Synchronously cancels all jobs using job_cancel_sync(). */
+void job_cancel_sync_all(void);
+
+/**
+ * @job: The job to be completed.
+ * @errp: Error object which may be set by job_complete(); this is not
+ *        necessarily set on every error, the job return value has to be
+ *        checked as well.
+ *
+ * Synchronously complete the job.  The completion callback is called before the
+ * function returns, unless it is NULL (which is permissible when using this
+ * function).
+ *
+ * Returns the return value from the job.
+ */
+int job_complete_sync(Job *job, Error **errp);
+
+/**
+ * For a @job that has finished its work and is pending awaiting explicit
+ * acknowledgement to commit its work, this will commit that work.
+ *
+ * FIXME: Make the below statement universally true:
+ * For jobs that support the manual workflow mode, all graph changes that occur
+ * as a result will occur after this command and before a successful reply.
+ */
+void job_finalize(Job *job, Error **errp);
+
+/**
+ * Remove the concluded @job from the query list and resets the passed pointer
+ * to %NULL. Returns an error if the job is not actually concluded.
+ */
+void job_dismiss(Job **job, Error **errp);
+
+typedef void JobDeferToMainLoopFn(Job *job, void *opaque);
+
+/**
+ * @job: The job
+ * @fn: The function to run in the main loop
+ * @opaque: The opaque value that is passed to @fn
+ *
+ * This function must be called by the main job coroutine just before it
+ * returns.  @fn is executed in the main loop with the job AioContext acquired.
+ *
+ * Block jobs must call bdrv_unref(), bdrv_close(), and anything that uses
+ * bdrv_drain_all() in the main loop.
+ *
+ * The @job AioContext is held while @fn executes.
+ */
+void job_defer_to_main_loop(Job *job, JobDeferToMainLoopFn *fn, void *opaque);
+
+/**
+ * Synchronously finishes the given @job. If @finish is given, it is called to
+ * trigger completion or cancellation of the job.
+ *
+ * Returns 0 if the job is successfully completed, -ECANCELED if the job was
+ * cancelled before completing, and -errno in other error cases.
+ */
+int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp);
+
+#endif
diff --git a/include/standard-headers/asm-x86/kvm_para.h b/include/standard-headers/asm-x86/kvm_para.h
new file mode 100644
index 0000000000..53a85ae3ed
--- /dev/null
+++ b/include/standard-headers/asm-x86/kvm_para.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_X86_KVM_PARA_H
+#define _ASM_X86_KVM_PARA_H
+
+#include "standard-headers/linux/types.h"
+
+/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx.  It
+ * should be used to determine that a VM is running under KVM.
+ */
+#define KVM_CPUID_SIGNATURE	0x40000000
+
+/* This CPUID returns two feature bitmaps in eax, edx. Before enabling
+ * a particular paravirtualization, the appropriate feature bit should
+ * be checked in eax. The performance hint feature bit should be checked
+ * in edx.
+ */
+#define KVM_CPUID_FEATURES	0x40000001
+#define KVM_FEATURE_CLOCKSOURCE		0
+#define KVM_FEATURE_NOP_IO_DELAY	1
+#define KVM_FEATURE_MMU_OP		2
+/* This indicates that the new set of kvmclock msrs
+ * are available. The use of 0x11 and 0x12 is deprecated
+ */
+#define KVM_FEATURE_CLOCKSOURCE2        3
+#define KVM_FEATURE_ASYNC_PF		4
+#define KVM_FEATURE_STEAL_TIME		5
+#define KVM_FEATURE_PV_EOI		6
+#define KVM_FEATURE_PV_UNHALT		7
+#define KVM_FEATURE_PV_TLB_FLUSH	9
+#define KVM_FEATURE_ASYNC_PF_VMEXIT	10
+
+#define KVM_HINTS_DEDICATED      0
+
+/* The last 8 bits are used to indicate how to interpret the flags field
+ * in pvclock structure. If no bits are set, all flags are ignored.
+ */
+#define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT	24
+
+#define MSR_KVM_WALL_CLOCK  0x11
+#define MSR_KVM_SYSTEM_TIME 0x12
+
+#define KVM_MSR_ENABLED 1
+/* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */
+#define MSR_KVM_WALL_CLOCK_NEW  0x4b564d00
+#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
+#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
+#define MSR_KVM_STEAL_TIME  0x4b564d03
+#define MSR_KVM_PV_EOI_EN      0x4b564d04
+
+struct kvm_steal_time {
+	uint64_t steal;
+	uint32_t version;
+	uint32_t flags;
+	uint8_t  preempted;
+	uint8_t  uint8_t_pad[3];
+	uint32_t pad[11];
+};
+
+#define KVM_VCPU_PREEMPTED          (1 << 0)
+#define KVM_VCPU_FLUSH_TLB          (1 << 1)
+
+#define KVM_CLOCK_PAIRING_WALLCLOCK 0
+struct kvm_clock_pairing {
+	int64_t sec;
+	int64_t nsec;
+	uint64_t tsc;
+	uint32_t flags;
+	uint32_t pad[9];
+};
+
+#define KVM_STEAL_ALIGNMENT_BITS 5
+#define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1)))
+#define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1)
+
+#define KVM_MAX_MMU_OP_BATCH           32
+
+#define KVM_ASYNC_PF_ENABLED			(1 << 0)
+#define KVM_ASYNC_PF_SEND_ALWAYS		(1 << 1)
+#define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT	(1 << 2)
+
+/* Operations for KVM_HC_MMU_OP */
+#define KVM_MMU_OP_WRITE_PTE            1
+#define KVM_MMU_OP_FLUSH_TLB	        2
+#define KVM_MMU_OP_RELEASE_PT	        3
+
+/* Payload for KVM_HC_MMU_OP */
+struct kvm_mmu_op_header {
+	uint32_t op;
+	uint32_t pad;
+};
+
+struct kvm_mmu_op_write_pte {
+	struct kvm_mmu_op_header header;
+	uint64_t pte_phys;
+	uint64_t pte_val;
+};
+
+struct kvm_mmu_op_flush_tlb {
+	struct kvm_mmu_op_header header;
+};
+
+struct kvm_mmu_op_release_pt {
+	struct kvm_mmu_op_header header;
+	uint64_t pt_phys;
+};
+
+#define KVM_PV_REASON_PAGE_NOT_PRESENT 1
+#define KVM_PV_REASON_PAGE_READY 2
+
+struct kvm_vcpu_pv_apf_data {
+	uint32_t reason;
+	uint8_t pad[60];
+	uint32_t enabled;
+};
+
+#define KVM_PV_EOI_BIT 0
+#define KVM_PV_EOI_MASK (0x1 << KVM_PV_EOI_BIT)
+#define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
+#define KVM_PV_EOI_DISABLED 0x0
+
+#endif /* _ASM_X86_KVM_PARA_H */
diff --git a/include/standard-headers/linux/virtio_balloon.h b/include/standard-headers/linux/virtio_balloon.h
index 7b0a41b8fc..e446805ae9 100644
--- a/include/standard-headers/linux/virtio_balloon.h
+++ b/include/standard-headers/linux/virtio_balloon.h
@@ -53,7 +53,9 @@ struct virtio_balloon_config {
 #define VIRTIO_BALLOON_S_MEMTOT   5   /* Total amount of memory */
 #define VIRTIO_BALLOON_S_AVAIL    6   /* Available memory as in /proc */
 #define VIRTIO_BALLOON_S_CACHES   7   /* Disk caches */
-#define VIRTIO_BALLOON_S_NR       8
+#define VIRTIO_BALLOON_S_HTLB_PGALLOC  8  /* Hugetlb page allocations */
+#define VIRTIO_BALLOON_S_HTLB_PGFAIL   9  /* Hugetlb page allocation failures */
+#define VIRTIO_BALLOON_S_NR       10
 
 /*
  * Memory statistics structure.
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 23669c4d5a..0b64b8e067 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -22,7 +22,6 @@
 #ifdef NEED_CPU_H
 # ifdef CONFIG_KVM
 #  include <linux/kvm.h>
-#  include <linux/kvm_para.h>
 #  define CONFIG_KVM_IS_POSSIBLE
 # endif
 #else