summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS13
-rw-r--r--cpus.c8
-rw-r--r--docs/devel/migration.rst1
-rw-r--r--hw/mem/nvdimm.c1
-rw-r--r--hw/misc/ivshmem.c1
-rw-r--r--hw/pci-host/q35.c10
-rw-r--r--hw/scsi/Makefile.objs2
-rw-r--r--hw/scsi/emulation.c42
-rw-r--r--hw/scsi/lsi53c895a.c19
-rw-r--r--hw/scsi/scsi-disk.c92
-rw-r--r--hw/scsi/scsi-generic.c60
-rw-r--r--include/exec/memory.h25
-rw-r--r--include/hw/pci/pci_ids.h2
-rw-r--r--include/hw/scsi/emulation.h16
-rw-r--r--include/hw/scsi/scsi.h1
-rw-r--r--include/qemu/thread.h22
-rw-r--r--memory.c45
-rw-r--r--memory_mapping.c3
-rw-r--r--scripts/dump-guest-memory.py4
-rw-r--r--target/i386/cpu.c1
-rw-r--r--target/i386/cpu.h1
-rw-r--r--target/i386/hyperv-proto.h2
-rw-r--r--target/i386/kvm.c30
-rw-r--r--target/i386/seg_helper.c4
-rw-r--r--util/qemu-thread-posix.c44
25 files changed, 308 insertions, 141 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 0499e11593..0d68e4bc5e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -105,9 +105,9 @@ Guest CPU cores (TCG):
 ----------------------
 Overall
 L: qemu-devel@nongnu.org
-M: Paolo Bonzini <pbonzini@redhat.com>
 M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
 M: Richard Henderson <rth@twiddle.net>
+R: Paolo Bonzini <pbonzini@redhat.com>
 S: Maintained
 F: cpus.c
 F: exec.c
@@ -1141,7 +1141,8 @@ F: hw/pci-host/ppce500.c
 F: hw/net/fsl_etsec/
 
 Character devices
-M: Paolo Bonzini <pbonzini@redhat.com>
+M: Marc-André Lureau <marcandre.lureau@redhat.com>
+R: Paolo Bonzini <pbonzini@redhat.com>
 S: Odd Fixes
 F: hw/char/
 
@@ -1528,8 +1529,8 @@ T: git git://github.com/famz/qemu.git bitmaps
 T: git git://github.com/jnsnow/qemu.git bitmaps
 
 Character device backends
-M: Paolo Bonzini <pbonzini@redhat.com>
 M: Marc-André Lureau <marcandre.lureau@redhat.com>
+R: Paolo Bonzini <pbonzini@redhat.com>
 S: Maintained
 F: chardev/
 F: include/chardev/
@@ -1762,9 +1763,9 @@ F: tests/qmp-cmd-test.c
 T: git git://repo.or.cz/qemu/armbru.git qapi-next
 
 qtest
-M: Paolo Bonzini <pbonzini@redhat.com>
 M: Thomas Huth <thuth@redhat.com>
 M: Laurent Vivier <lvivier@redhat.com>
+R: Paolo Bonzini <pbonzini@redhat.com>
 S: Maintained
 F: qtest.c
 F: tests/libqtest.*
@@ -1871,7 +1872,6 @@ F: tests/test-io-*
 Sockets
 M: Daniel P. Berrange <berrange@redhat.com>
 M: Gerd Hoffmann <kraxel@redhat.com>
-M: Paolo Bonzini <pbonzini@redhat.com>
 S: Maintained
 F: include/qemu/sockets.h
 F: util/qemu-sockets.c
@@ -2058,13 +2058,12 @@ M: Ronnie Sahlberg <ronniesahlberg@gmail.com>
 M: Paolo Bonzini <pbonzini@redhat.com>
 M: Peter Lieven <pl@kamp.de>
 L: qemu-block@nongnu.org
-S: Supported
+S: Odd Fixes
 F: block/iscsi.c
 F: block/iscsi-opts.c
 
 Network Block Device (NBD)
 M: Eric Blake <eblake@redhat.com>
-M: Paolo Bonzini <pbonzini@redhat.com>
 L: qemu-block@nongnu.org
 S: Maintained
 F: block/nbd*
diff --git a/cpus.c b/cpus.c
index 3978f63d8f..a2b33ccb29 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1554,6 +1554,14 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
             atomic_mb_set(&cpu->exit_request, 0);
         }
 
+        if (use_icount && all_cpu_threads_idle()) {
+            /*
+             * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
+             * in the main_loop, wake it up in order to start the warp timer.
+             */
+            qemu_notify_event();
+        }
+
         qemu_tcg_rr_wait_io_event(cpu ? cpu : first_cpu);
         deal_with_unplugged_cpus();
     }
diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst
index 687570754d..e7658ab050 100644
--- a/docs/devel/migration.rst
+++ b/docs/devel/migration.rst
@@ -435,6 +435,7 @@ Examples of such memory API functions are:
   - memory_region_add_subregion()
   - memory_region_del_subregion()
   - memory_region_set_readonly()
+  - memory_region_set_nonvolatile()
   - memory_region_set_enabled()
   - memory_region_set_address()
   - memory_region_set_alias_offset()
diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
index 49324f3fae..bf2adf5e16 100644
--- a/hw/mem/nvdimm.c
+++ b/hw/mem/nvdimm.c
@@ -116,6 +116,7 @@ static void nvdimm_prepare_memory_region(NVDIMMDevice *nvdimm, Error **errp)
     nvdimm->nvdimm_mr = g_new(MemoryRegion, 1);
     memory_region_init_alias(nvdimm->nvdimm_mr, OBJECT(dimm),
                              "nvdimm-memory", mr, 0, pmem_size);
+    memory_region_set_nonvolatile(nvdimm->nvdimm_mr, true);
     nvdimm->nvdimm_mr->align = align;
 }
 
diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index f88910e55c..ecfd10a29a 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -1279,6 +1279,7 @@ static void desugar_shm(IVShmemState *s)
     object_property_set_bool(obj, true, "share", &error_abort);
     object_property_add_child(OBJECT(s), "internal-shm-backend", obj,
                               &error_abort);
+    object_unref(obj);
     user_creatable_complete(obj, &error_abort);
     s->hostmem = MEMORY_BACKEND(obj);
 }
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index 0c38a8dfd3..7b871b5734 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -629,7 +629,15 @@ static void mch_class_init(ObjectClass *klass, void *data)
     dc->desc = "Host bridge";
     dc->vmsd = &vmstate_mch;
     k->vendor_id = PCI_VENDOR_ID_INTEL;
-    k->device_id = PCI_DEVICE_ID_INTEL_Q35_MCH;
+    /*
+     * The 'q35' machine type implements an Intel Series 3 chipset,
+     * of which there are several variants. The key difference between
+     * the 82P35 MCH ('p35') and 82Q35 GMCH ('q35') variants is that
+     * the latter has an integrated graphics adapter. QEMU does not
+     * implement integrated graphics, so uses the PCI ID for the 82P35
+     * chipset.
+     */
+    k->device_id = PCI_DEVICE_ID_INTEL_P35_MCH;
     k->revision = MCH_HOST_BRIDGE_REVISION_DEFAULT;
     k->class_id = PCI_CLASS_BRIDGE_HOST;
     /*
diff --git a/hw/scsi/Makefile.objs b/hw/scsi/Makefile.objs
index 718b4c2a68..45167baeaf 100644
--- a/hw/scsi/Makefile.objs
+++ b/hw/scsi/Makefile.objs
@@ -1,4 +1,4 @@
-common-obj-y += scsi-disk.o
+common-obj-y += scsi-disk.o emulation.o
 common-obj-y += scsi-generic.o scsi-bus.o
 common-obj-$(CONFIG_LSI_SCSI_PCI) += lsi53c895a.o
 common-obj-$(CONFIG_MPTSAS_SCSI_PCI) += mptsas.o mptconfig.o mptendian.o
diff --git a/hw/scsi/emulation.c b/hw/scsi/emulation.c
new file mode 100644
index 0000000000..06d62f3c38
--- /dev/null
+++ b/hw/scsi/emulation.c
@@ -0,0 +1,42 @@
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qemu/bswap.h"
+#include "hw/scsi/emulation.h"
+
+int scsi_emulate_block_limits(uint8_t *outbuf, const SCSIBlockLimits *bl)
+{
+    /* required VPD size with unmap support */
+    memset(outbuf, 0, 0x3c);
+
+    outbuf[0] = bl->wsnz; /* wsnz */
+
+    if (bl->max_io_sectors) {
+        /* optimal transfer length granularity.  This field and the optimal
+         * transfer length can't be greater than maximum transfer length.
+         */
+        stw_be_p(outbuf + 2, MIN(bl->min_io_size, bl->max_io_sectors));
+
+        /* maximum transfer length */
+        stl_be_p(outbuf + 4, bl->max_io_sectors);
+
+        /* optimal transfer length */
+        stl_be_p(outbuf + 8, MIN(bl->opt_io_size, bl->max_io_sectors));
+    } else {
+        stw_be_p(outbuf + 2, bl->min_io_size);
+        stl_be_p(outbuf + 8, bl->opt_io_size);
+    }
+
+    /* max unmap LBA count */
+    stl_be_p(outbuf + 16, bl->max_unmap_sectors);
+
+    /* max unmap descriptors */
+    stl_be_p(outbuf + 20, bl->max_unmap_descr);
+
+    /* optimal unmap granularity; alignment is zero */
+    stl_be_p(outbuf + 24, bl->unmap_sectors);
+
+    /* max write same size, make it the same as maximum transfer length */
+    stl_be_p(outbuf + 36, bl->max_io_sectors);
+
+    return 0x3c;
+}
diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
index d1e6534311..3f207f607c 100644
--- a/hw/scsi/lsi53c895a.c
+++ b/hw/scsi/lsi53c895a.c
@@ -861,10 +861,11 @@ static void lsi_do_status(LSIState *s)
 
 static void lsi_do_msgin(LSIState *s)
 {
-    int len;
+    uint8_t len;
     trace_lsi_do_msgin(s->dbc, s->msg_len);
     s->sfbr = s->msg[0];
     len = s->msg_len;
+    assert(len > 0 && len <= LSI_MAX_MSGIN_LEN);
     if (len > s->dbc)
         len = s->dbc;
     pci_dma_write(PCI_DEVICE(s), s->dnad, s->msg, len);
@@ -1705,8 +1706,10 @@ static uint8_t lsi_reg_readb(LSIState *s, int offset)
         break;
     case 0x58: /* SBDL */
         /* Some drivers peek at the data bus during the MSG IN phase.  */
-        if ((s->sstat1 & PHASE_MASK) == PHASE_MI)
+        if ((s->sstat1 & PHASE_MASK) == PHASE_MI) {
+            assert(s->msg_len > 0);
             return s->msg[0];
+        }
         ret = 0;
         break;
     case 0x59: /* SBDL high */
@@ -2103,11 +2106,23 @@ static int lsi_pre_save(void *opaque)
     return 0;
 }
 
+static int lsi_post_load(void *opaque, int version_id)
+{
+    LSIState *s = opaque;
+
+    if (s->msg_len < 0 || s->msg_len > LSI_MAX_MSGIN_LEN) {
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
 static const VMStateDescription vmstate_lsi_scsi = {
     .name = "lsiscsi",
     .version_id = 0,
     .minimum_version_id = 0,
     .pre_save = lsi_pre_save,
+    .post_load = lsi_post_load,
     .fields = (VMStateField[]) {
         VMSTATE_PCI_DEVICE(parent_obj, LSIState),
 
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index e2c5408aa2..6eb258d3f3 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -33,6 +33,7 @@ do { printf("scsi-disk: " fmt , ## __VA_ARGS__); } while (0)
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "hw/scsi/scsi.h"
+#include "hw/scsi/emulation.h"
 #include "scsi/constants.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/block-backend.h"
@@ -589,7 +590,7 @@ static uint8_t *scsi_get_buf(SCSIRequest *req)
     return (uint8_t *)r->iov.iov_base;
 }
 
-int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf)
+static int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf)
 {
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
     uint8_t page_code = req->cmd.buf[2];
@@ -691,89 +692,36 @@ int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf)
     }
     case 0xb0: /* block limits */
     {
-        unsigned int unmap_sectors =
-            s->qdev.conf.discard_granularity / s->qdev.blocksize;
-        unsigned int min_io_size =
-            s->qdev.conf.min_io_size / s->qdev.blocksize;
-        unsigned int opt_io_size =
-            s->qdev.conf.opt_io_size / s->qdev.blocksize;
-        unsigned int max_unmap_sectors =
-            s->max_unmap_size / s->qdev.blocksize;
-        unsigned int max_io_sectors =
-            s->max_io_size / s->qdev.blocksize;
+        SCSIBlockLimits bl = {};
 
         if (s->qdev.type == TYPE_ROM) {
             DPRINTF("Inquiry (EVPD[%02X] not supported for CDROM\n",
                     page_code);
             return -1;
         }
+        bl.wsnz = 1;
+        bl.unmap_sectors =
+            s->qdev.conf.discard_granularity / s->qdev.blocksize;
+        bl.min_io_size =
+            s->qdev.conf.min_io_size / s->qdev.blocksize;
+        bl.opt_io_size =
+            s->qdev.conf.opt_io_size / s->qdev.blocksize;
+        bl.max_unmap_sectors =
+            s->max_unmap_size / s->qdev.blocksize;
+        bl.max_io_sectors =
+            s->max_io_size / s->qdev.blocksize;
+        /* 255 descriptors fit in 4 KiB with an 8-byte header */
+        bl.max_unmap_descr = 255;
+
         if (s->qdev.type == TYPE_DISK) {
             int max_transfer_blk = blk_get_max_transfer(s->qdev.conf.blk);
             int max_io_sectors_blk =
                 max_transfer_blk / s->qdev.blocksize;
 
-            max_io_sectors =
-                MIN_NON_ZERO(max_io_sectors_blk, max_io_sectors);
-
-            /* min_io_size and opt_io_size can't be greater than
-             * max_io_sectors */
-            if (min_io_size) {
-                min_io_size = MIN(min_io_size, max_io_sectors);
-            }
-            if (opt_io_size) {
-                opt_io_size = MIN(opt_io_size, max_io_sectors);
-            }
+            bl.max_io_sectors =
+                MIN_NON_ZERO(max_io_sectors_blk, bl.max_io_sectors);
         }
-        /* required VPD size with unmap support */
-        buflen = 0x40;
-        memset(outbuf + 4, 0, buflen - 4);
-
-        outbuf[4] = 0x1; /* wsnz */
-
-        /* optimal transfer length granularity */
-        outbuf[6] = (min_io_size >> 8) & 0xff;
-        outbuf[7] = min_io_size & 0xff;
-
-        /* maximum transfer length */
-        outbuf[8] = (max_io_sectors >> 24) & 0xff;
-        outbuf[9] = (max_io_sectors >> 16) & 0xff;
-        outbuf[10] = (max_io_sectors >> 8) & 0xff;
-        outbuf[11] = max_io_sectors & 0xff;
-
-        /* optimal transfer length */
-        outbuf[12] = (opt_io_size >> 24) & 0xff;
-        outbuf[13] = (opt_io_size >> 16) & 0xff;
-        outbuf[14] = (opt_io_size >> 8) & 0xff;
-        outbuf[15] = opt_io_size & 0xff;
-
-        /* max unmap LBA count, default is 1GB */
-        outbuf[20] = (max_unmap_sectors >> 24) & 0xff;
-        outbuf[21] = (max_unmap_sectors >> 16) & 0xff;
-        outbuf[22] = (max_unmap_sectors >> 8) & 0xff;
-        outbuf[23] = max_unmap_sectors & 0xff;
-
-        /* max unmap descriptors, 255 fit in 4 kb with an 8-byte header */
-        outbuf[24] = 0;
-        outbuf[25] = 0;
-        outbuf[26] = 0;
-        outbuf[27] = 255;
-
-        /* optimal unmap granularity */
-        outbuf[28] = (unmap_sectors >> 24) & 0xff;
-        outbuf[29] = (unmap_sectors >> 16) & 0xff;
-        outbuf[30] = (unmap_sectors >> 8) & 0xff;
-        outbuf[31] = unmap_sectors & 0xff;
-
-        /* max write same size */
-        outbuf[36] = 0;
-        outbuf[37] = 0;
-        outbuf[38] = 0;
-        outbuf[39] = 0;
-
-        outbuf[40] = (max_io_sectors >> 24) & 0xff;
-        outbuf[41] = (max_io_sectors >> 16) & 0xff;
-        outbuf[42] = (max_io_sectors >> 8) & 0xff;
-        outbuf[43] = max_io_sectors & 0xff;
+        buflen += scsi_emulate_block_limits(outbuf + buflen, &bl);
         break;
     }
     case 0xb1: /* block device characteristics */
diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
index d60c4d0fcf..7237b4162e 100644
--- a/hw/scsi/scsi-generic.c
+++ b/hw/scsi/scsi-generic.c
@@ -16,6 +16,7 @@
 #include "qemu-common.h"
 #include "qemu/error-report.h"
 #include "hw/scsi/scsi.h"
+#include "hw/scsi/emulation.h"
 #include "sysemu/block-backend.h"
 
 #ifdef __linux__
@@ -144,7 +145,7 @@ static int execute_command(BlockBackend *blk,
 
 static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s)
 {
-    uint8_t page, page_len;
+    uint8_t page, page_idx;
 
     /*
      *  EVPD set to zero returns the standard INQUIRY data.
@@ -181,7 +182,7 @@ static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s)
             /* Also take care of the opt xfer len. */
             stl_be_p(&r->buf[12],
                     MIN_NON_ZERO(max_transfer, ldl_be_p(&r->buf[12])));
-        } else if (page == 0x00 && s->needs_vpd_bl_emulation) {
+        } else if (s->needs_vpd_bl_emulation && page == 0x00) {
             /*
              * Now we're capable of supplying the VPD Block Limits
              * response if the hardware can't. Add it in the INQUIRY
@@ -190,17 +191,43 @@ static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s)
              *
              * This way, the guest kernel will be aware of the support
              * and will use it to proper setup the SCSI device.
+             *
+             * VPD page numbers must be sorted, so insert 0xb0 at the
+             * right place with an in-place insert.  After the initialization
+             * part of the for loop is executed, the device response is
+             * at r[0] to r[page_idx - 1].
              */
-            page_len = r->buf[3];
-            r->buf[page_len + 4] = 0xb0;
-            r->buf[3] = ++page_len;
+            for (page_idx = lduw_be_p(r->buf + 2) + 4;
+                 page_idx > 4 && r->buf[page_idx - 1] >= 0xb0;
+                 page_idx--) {
+                if (page_idx < r->buflen) {
+                    r->buf[page_idx] = r->buf[page_idx - 1];
+                }
+            }
+            r->buf[page_idx] = 0xb0;
+            stw_be_p(r->buf + 2, lduw_be_p(r->buf + 2) + 1);
         }
     }
 }
 
-static int scsi_emulate_block_limits(SCSIGenericReq *r)
+static int scsi_generic_emulate_block_limits(SCSIGenericReq *r, SCSIDevice *s)
 {
-    r->buflen = scsi_disk_emulate_vpd_page(&r->req, r->buf);
+    int len;
+    uint8_t buf[64];
+
+    SCSIBlockLimits bl = {
+        .max_io_sectors = blk_get_max_transfer(s->conf.blk) / s->blocksize
+    };
+
+    memset(r->buf, 0, r->buflen);
+    stb_p(buf, s->type);
+    stb_p(buf + 1, 0xb0);
+    len = scsi_emulate_block_limits(buf + 4, &bl);
+    assert(len <= sizeof(buf) - 4);
+    stw_be_p(buf + 2, len);
+
+    memcpy(r->buf, buf, MIN(r->buflen, len + 4));
+
     r->io_header.sb_len_wr = 0;
 
     /*
@@ -219,7 +246,6 @@ static void scsi_read_complete(void * opaque, int ret)
 {
     SCSIGenericReq *r = (SCSIGenericReq *)opaque;
     SCSIDevice *s = r->req.dev;
-    SCSISense sense;
     int len;
 
     assert(r->req.aiocb != NULL);
@@ -242,13 +268,15 @@ static void scsi_read_complete(void * opaque, int ret)
      * resulted in sense error but would need emulation.
      * In this case, emulate a valid VPD response.
      */
-    if (s->needs_vpd_bl_emulation) {
-        int is_vpd_bl = r->req.cmd.buf[0] == INQUIRY &&
-                         r->req.cmd.buf[1] & 0x01 &&
-                         r->req.cmd.buf[2] == 0xb0;
-
-        if (is_vpd_bl && sg_io_sense_from_errno(-ret, &r->io_header, &sense)) {
-            len = scsi_emulate_block_limits(r);
+    if (s->needs_vpd_bl_emulation && ret == 0 &&
+        (r->io_header.driver_status & SG_ERR_DRIVER_SENSE) &&
+        r->req.cmd.buf[0] == INQUIRY &&
+        (r->req.cmd.buf[1] & 0x01) &&
+        r->req.cmd.buf[2] == 0xb0) {
+        SCSISense sense =
+            scsi_parse_sense_buf(r->req.sense, r->io_header.sb_len_wr);
+        if (sense.key == ILLEGAL_REQUEST) {
+            len = scsi_generic_emulate_block_limits(r, s);
             /*
              * No need to let scsi_read_complete go on and handle an
              * INQUIRY VPD BL request we created manually.
@@ -527,7 +555,7 @@ static void scsi_generic_set_vpd_bl_emulation(SCSIDevice *s)
     }
 
     page_len = buf[3];
-    for (i = 4; i < page_len + 4; i++) {
+    for (i = 4; i < MIN(sizeof(buf), page_len + 4); i++) {
         if (buf[i] == 0xb0) {
             s->needs_vpd_bl_emulation = false;
             return;
diff --git a/include/exec/memory.h b/include/exec/memory.h
index d0c7f0d9e9..8e61450de3 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -355,6 +355,7 @@ struct MemoryRegion {
     bool ram;
     bool subpage;
     bool readonly; /* For RAM regions */
+    bool nonvolatile;
     bool rom_device;
     bool flush_coalesced_mmio;
     bool global_locking;
@@ -480,6 +481,7 @@ static inline FlatView *address_space_to_flatview(AddressSpace *as)
  * @offset_within_address_space: the address of the first byte of the section
  *     relative to the region's address space
  * @readonly: writes to this section are ignored
+ * @nonvolatile: this section is non-volatile
  */
 struct MemoryRegionSection {
     MemoryRegion *mr;
@@ -488,6 +490,7 @@ struct MemoryRegionSection {
     Int128 size;
     hwaddr offset_within_address_space;
     bool readonly;
+    bool nonvolatile;
 };
 
 /**
@@ -1170,6 +1173,17 @@ static inline bool memory_region_is_rom(MemoryRegion *mr)
     return mr->ram && mr->readonly;
 }
 
+/**
+ * memory_region_is_nonvolatile: check whether a memory region is non-volatile
+ *
+ * Returns %true is a memory region is non-volatile memory.
+ *
+ * @mr: the memory region being queried
+ */
+static inline bool memory_region_is_nonvolatile(MemoryRegion *mr)
+{
+    return mr->nonvolatile;
+}
 
 /**
  * memory_region_get_fd: Get a file descriptor backing a RAM memory region.
@@ -1342,6 +1356,17 @@ void memory_region_reset_dirty(MemoryRegion *mr, hwaddr addr,
 void memory_region_set_readonly(MemoryRegion *mr, bool readonly);
 
 /**
+ * memory_region_set_nonvolatile: Turn a memory region non-volatile
+ *
+ * Allows a memory region to be marked as non-volatile.
+ * only useful on RAM regions.
+ *
+ * @mr: the region being updated.
+ * @nonvolatile: whether rhe region is to be non-volatile.
+ */
+void memory_region_set_nonvolatile(MemoryRegion *mr, bool nonvolatile);
+
+/**
  * memory_region_rom_device_set_romd: enable/disable ROMD mode
  *
  * Allows a ROM device (initialized with memory_region_init_rom_device() to
diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h
index 63acc722a9..eeb33018ad 100644
--- a/include/hw/pci/pci_ids.h
+++ b/include/hw/pci/pci_ids.h
@@ -255,7 +255,7 @@
 #define PCI_DEVICE_ID_INTEL_82801I_EHCI2 0x293c
 #define PCI_DEVICE_ID_INTEL_82599_SFP_VF 0x10ed
 
-#define PCI_DEVICE_ID_INTEL_Q35_MCH      0x29c0
+#define PCI_DEVICE_ID_INTEL_P35_MCH      0x29c0
 
 #define PCI_VENDOR_ID_XEN                0x5853
 #define PCI_DEVICE_ID_XEN_PLATFORM       0x0001
diff --git a/include/hw/scsi/emulation.h b/include/hw/scsi/emulation.h
new file mode 100644
index 0000000000..09fba1ff39
--- /dev/null
+++ b/include/hw/scsi/emulation.h
@@ -0,0 +1,16 @@
+#ifndef HW_SCSI_EMULATION_H
+#define HW_SCSI_EMULATION_H 1
+
+typedef struct SCSIBlockLimits {
+    bool wsnz;
+    uint16_t min_io_size;
+    uint32_t max_unmap_descr;
+    uint32_t opt_io_size;
+    uint32_t max_unmap_sectors;
+    uint32_t unmap_sectors;
+    uint32_t max_io_sectors;
+} SCSIBlockLimits;
+
+int scsi_emulate_block_limits(uint8_t *outbuf, const SCSIBlockLimits *bl);
+
+#endif
diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
index ee3a4118fb..acef25faa4 100644
--- a/include/hw/scsi/scsi.h
+++ b/include/hw/scsi/scsi.h
@@ -189,7 +189,6 @@ void scsi_device_report_change(SCSIDevice *dev, SCSISense sense);
 void scsi_device_unit_attention_reported(SCSIDevice *dev);
 void scsi_generic_read_device_inquiry(SCSIDevice *dev);
 int scsi_device_get_sense(SCSIDevice *dev, uint8_t *buf, int len, bool fixed);
-int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf);
 int scsi_SG_IO_FROM_DEV(BlockBackend *blk, uint8_t *cmd, uint8_t cmd_size,
                         uint8_t *buf, uint8_t buf_size);
 SCSIDevice *scsi_device_find(SCSIBus *bus, int channel, int target, int lun);
diff --git a/include/qemu/thread.h b/include/qemu/thread.h
index b2661b6672..55d83a907c 100644
--- a/include/qemu/thread.h
+++ b/include/qemu/thread.h
@@ -162,7 +162,29 @@ void qemu_thread_exit(void *retval);
 void qemu_thread_naming(bool enable);
 
 struct Notifier;
+/**
+ * qemu_thread_atexit_add:
+ * @notifier: Notifier to add
+ *
+ * Add the specified notifier to a list which will be run via
+ * notifier_list_notify() when this thread exits (either by calling
+ * qemu_thread_exit() or by returning from its start_routine).
+ * The usual usage is that the caller passes a Notifier which is
+ * a per-thread variable; it can then use the callback to free
+ * other per-thread data.
+ *
+ * If the thread exits as part of the entire process exiting,
+ * it is unspecified whether notifiers are called or not.
+ */
 void qemu_thread_atexit_add(struct Notifier *notifier);
+/**
+ * qemu_thread_atexit_remove:
+ * @notifier: Notifier to remove
+ *
+ * Remove the specified notifier from the thread-exit notification
+ * list. It is not valid to try to remove a notifier which is not
+ * on the list.
+ */
 void qemu_thread_atexit_remove(struct Notifier *notifier);
 
 struct QemuSpin {
diff --git a/memory.c b/memory.c
index 51204aa079..d14c6dec1d 100644
--- a/memory.c
+++ b/memory.c
@@ -216,6 +216,7 @@ struct FlatRange {
     uint8_t dirty_log_mask;
     bool romd_mode;
     bool readonly;
+    bool nonvolatile;
 };
 
 #define FOR_EACH_FLAT_RANGE(var, view)          \
@@ -231,6 +232,7 @@ section_from_flat_range(FlatRange *fr, FlatView *fv)
         .size = fr->addr.size,
         .offset_within_address_space = int128_get64(fr->addr.start),
         .readonly = fr->readonly,
+        .nonvolatile = fr->nonvolatile,
     };
 }
 
@@ -240,7 +242,8 @@ static bool flatrange_equal(FlatRange *a, FlatRange *b)
         && addrrange_equal(a->addr, b->addr)
         && a->offset_in_region == b->offset_in_region
         && a->romd_mode == b->romd_mode
-        && a->readonly == b->readonly;
+        && a->readonly == b->readonly
+        && a->nonvolatile == b->nonvolatile;
 }
 
 static FlatView *flatview_new(MemoryRegion *mr_root)
@@ -312,7 +315,8 @@ static bool can_merge(FlatRange *r1, FlatRange *r2)
                      int128_make64(r2->offset_in_region))
         && r1->dirty_log_mask == r2->dirty_log_mask
         && r1->romd_mode == r2->romd_mode
-        && r1->readonly == r2->readonly;
+        && r1->readonly == r2->readonly
+        && r1->nonvolatile == r2->nonvolatile;
 }
 
 /* Attempt to simplify a view by merging adjacent ranges */
@@ -592,7 +596,8 @@ static void render_memory_region(FlatView *view,
                                  MemoryRegion *mr,
                                  Int128 base,
                                  AddrRange clip,
-                                 bool readonly)
+                                 bool readonly,
+                                 bool nonvolatile)
 {
     MemoryRegion *subregion;
     unsigned i;
@@ -608,6 +613,7 @@ static void render_memory_region(FlatView *view,
 
     int128_addto(&base, int128_make64(mr->addr));
     readonly |= mr->readonly;
+    nonvolatile |= mr->nonvolatile;
 
     tmp = addrrange_make(base, mr->size);
 
@@ -620,13 +626,15 @@ static void render_memory_region(FlatView *view,
     if (mr->alias) {
         int128_subfrom(&base, int128_make64(mr->alias->addr));
         int128_subfrom(&base, int128_make64(mr->alias_offset));
-        render_memory_region(view, mr->alias, base, clip, readonly);
+        render_memory_region(view, mr->alias, base, clip,
+                             readonly, nonvolatile);
         return;
     }
 
     /* Render subregions in priority order. */
     QTAILQ_FOREACH(subregion, &mr->subregions, subregions_link) {
-        render_memory_region(view, subregion, base, clip, readonly);
+        render_memory_region(view, subregion, base, clip,
+                             readonly, nonvolatile);
     }
 
     if (!mr->terminates) {
@@ -641,6 +649,7 @@ static void render_memory_region(FlatView *view,
     fr.dirty_log_mask = memory_region_get_dirty_log_mask(mr);
     fr.romd_mode = mr->romd_mode;
     fr.readonly = readonly;
+    fr.nonvolatile = nonvolatile;
 
     /* Render the region itself into any gaps left by the current view. */
     for (i = 0; i < view->nr && int128_nz(remain); ++i) {
@@ -726,7 +735,8 @@ static FlatView *generate_memory_topology(MemoryRegion *mr)
 
     if (mr) {
         render_memory_region(view, mr, int128_zero(),
-                             addrrange_make(int128_zero(), int128_2_64()), false);
+                             addrrange_make(int128_zero(), int128_2_64()),
+                             false, false);
     }
     flatview_simplify(view);
 
@@ -2039,6 +2049,16 @@ void memory_region_set_readonly(MemoryRegion *mr, bool readonly)
     }
 }
 
+void memory_region_set_nonvolatile(MemoryRegion *mr, bool nonvolatile)
+{
+    if (mr->nonvolatile != nonvolatile) {
+        memory_region_transaction_begin();
+        mr->nonvolatile = nonvolatile;
+        memory_region_update_pending |= mr->enabled;
+        memory_region_transaction_commit();
+    }
+}
+
 void memory_region_rom_device_set_romd(MemoryRegion *mr, bool romd_mode)
 {
     if (mr->romd_mode != romd_mode) {
@@ -2489,6 +2509,7 @@ static MemoryRegionSection memory_region_find_rcu(MemoryRegion *mr,
     ret.size = range.size;
     ret.offset_within_address_space = int128_get64(range.start);
     ret.readonly = fr->readonly;
+    ret.nonvolatile = fr->nonvolatile;
     return ret;
 }
 
@@ -2839,10 +2860,11 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f,
             QTAILQ_INSERT_TAIL(alias_print_queue, ml, mrqueue);
         }
         mon_printf(f, TARGET_FMT_plx "-" TARGET_FMT_plx
-                   " (prio %d, %s): alias %s @%s " TARGET_FMT_plx
+                   " (prio %d, %s%s): alias %s @%s " TARGET_FMT_plx
                    "-" TARGET_FMT_plx "%s",
                    cur_start, cur_end,
                    mr->priority,
+                   mr->nonvolatile ? "nv-" : "",
                    memory_region_type((MemoryRegion *)mr),
                    memory_region_name(mr),
                    memory_region_name(mr->alias),
@@ -2854,9 +2876,10 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f,
         }
     } else {
         mon_printf(f,
-                   TARGET_FMT_plx "-" TARGET_FMT_plx " (prio %d, %s): %s%s",
+                   TARGET_FMT_plx "-" TARGET_FMT_plx " (prio %d, %s%s): %s%s",
                    cur_start, cur_end,
                    mr->priority,
+                   mr->nonvolatile ? "nv-" : "",
                    memory_region_type((MemoryRegion *)mr),
                    memory_region_name(mr),
                    mr->enabled ? "" : " [disabled]");
@@ -2941,19 +2964,21 @@ static void mtree_print_flatview(gpointer key, gpointer value,
         mr = range->mr;
         if (range->offset_in_region) {
             p(f, MTREE_INDENT TARGET_FMT_plx "-"
-              TARGET_FMT_plx " (prio %d, %s): %s @" TARGET_FMT_plx,
+              TARGET_FMT_plx " (prio %d, %s%s): %s @" TARGET_FMT_plx,
               int128_get64(range->addr.start),
               int128_get64(range->addr.start) + MR_SIZE(range->addr.size),
               mr->priority,
+              range->nonvolatile ? "nv-" : "",
               range->readonly ? "rom" : memory_region_type(mr),
               memory_region_name(mr),
               range->offset_in_region);
         } else {
             p(f, MTREE_INDENT TARGET_FMT_plx "-"
-              TARGET_FMT_plx " (prio %d, %s): %s",
+              TARGET_FMT_plx " (prio %d, %s%s): %s",
               int128_get64(range->addr.start),
               int128_get64(range->addr.start) + MR_SIZE(range->addr.size),
               mr->priority,
+              range->nonvolatile ? "nv-" : "",
               range->readonly ? "rom" : memory_region_type(mr),
               memory_region_name(mr));
         }
diff --git a/memory_mapping.c b/memory_mapping.c
index 775466f3a8..724dd0b417 100644
--- a/memory_mapping.c
+++ b/memory_mapping.c
@@ -206,7 +206,8 @@ static void guest_phys_blocks_region_add(MemoryListener *listener,
 
     /* we only care about RAM */
     if (!memory_region_is_ram(section->mr) ||
-        memory_region_is_ram_device(section->mr)) {
+        memory_region_is_ram_device(section->mr) ||
+        memory_region_is_nonvolatile(section->mr)) {
         return;
     }
 
diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py
index 5a857cebcf..198cd0fe40 100644
--- a/scripts/dump-guest-memory.py
+++ b/scripts/dump-guest-memory.py
@@ -417,7 +417,9 @@ def get_guest_phys_blocks():
         memory_region = flat_range["mr"].dereference()
 
         # we only care about RAM
-        if not memory_region["ram"]:
+        if (not memory_region["ram"] or
+            memory_region["ram_device"] or
+            memory_region["nonvolatile"]):
             continue
 
         section_size = int128_get64(flat_range["addr"]["size"])
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index af7e9f09cc..f81d35e1f9 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -5732,6 +5732,7 @@ static Property x86_cpu_properties[] = {
     DEFINE_PROP_BOOL("hv-frequencies", X86CPU, hyperv_frequencies, false),
     DEFINE_PROP_BOOL("hv-reenlightenment", X86CPU, hyperv_reenlightenment, false),
     DEFINE_PROP_BOOL("hv-tlbflush", X86CPU, hyperv_tlbflush, false),
+    DEFINE_PROP_BOOL("hv-evmcs", X86CPU, hyperv_evmcs, false),
     DEFINE_PROP_BOOL("hv-ipi", X86CPU, hyperv_ipi, false),
     DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true),
     DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false),
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index ad0e0b4534..9c52d0cbeb 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1391,6 +1391,7 @@ struct X86CPU {
     bool hyperv_frequencies;
     bool hyperv_reenlightenment;
     bool hyperv_tlbflush;
+    bool hyperv_evmcs;
     bool hyperv_ipi;
     bool check_cpuid;
     bool enforce_cpuid;
diff --git a/target/i386/hyperv-proto.h b/target/i386/hyperv-proto.h
index 8c572cd7c2..c0272b3a01 100644
--- a/target/i386/hyperv-proto.h
+++ b/target/i386/hyperv-proto.h
@@ -18,6 +18,7 @@
 #define HV_CPUID_FEATURES                     0x40000003
 #define HV_CPUID_ENLIGHTMENT_INFO             0x40000004
 #define HV_CPUID_IMPLEMENT_LIMITS             0x40000005
+#define HV_CPUID_NESTED_FEATURES              0x4000000A
 #define HV_CPUID_MIN                          0x40000005
 #define HV_CPUID_MAX                          0x4000ffff
 #define HV_HYPERVISOR_PRESENT_BIT             0x80000000
@@ -60,6 +61,7 @@
 #define HV_RELAXED_TIMING_RECOMMENDED       (1u << 5)
 #define HV_CLUSTER_IPI_RECOMMENDED          (1u << 10)
 #define HV_EX_PROCESSOR_MASKS_RECOMMENDED   (1u << 11)
+#define HV_ENLIGHTENED_VMCS_RECOMMENDED     (1u << 14)
 
 /*
  * Basic virtualized MSRs
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 796a049a0d..f524e7d929 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -869,6 +869,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
     uint32_t unused;
     struct kvm_cpuid_entry2 *c;
     uint32_t signature[3];
+    uint16_t evmcs_version;
     int kvm_base = KVM_CPUID_SIGNATURE;
     int r;
     Error *local_err = NULL;
@@ -912,7 +913,8 @@ int kvm_arch_init_vcpu(CPUState *cs)
             memset(signature, 0, 12);
             memcpy(signature, cpu->hyperv_vendor_id, len);
         }
-        c->eax = HV_CPUID_MIN;
+        c->eax = cpu->hyperv_evmcs ?
+            HV_CPUID_NESTED_FEATURES : HV_CPUID_IMPLEMENT_LIMITS;
         c->ebx = signature[0];
         c->ecx = signature[1];
         c->edx = signature[2];
@@ -970,7 +972,16 @@ int kvm_arch_init_vcpu(CPUState *cs)
             c->eax |= HV_CLUSTER_IPI_RECOMMENDED;
             c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED;
         }
-
+        if (cpu->hyperv_evmcs) {
+            if (kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0,
+                                    (uintptr_t)&evmcs_version)) {
+                fprintf(stderr, "Hyper-V Enlightened VMCS "
+                        "(requested by 'hv-evmcs' cpu flag) "
+                        "is not supported by kernel\n");
+                return -ENOSYS;
+            }
+            c->eax |= HV_ENLIGHTENED_VMCS_RECOMMENDED;
+        }
         c->ebx = cpu->hyperv_spinlock_attempts;
 
         c = &cpuid_data.entries[cpuid_i++];
@@ -981,6 +992,21 @@ int kvm_arch_init_vcpu(CPUState *cs)
 
         kvm_base = KVM_CPUID_SIGNATURE_NEXT;
         has_msr_hv_hypercall = true;
+
+        if (cpu->hyperv_evmcs) {
+            __u32 function;
+
+            /* Create zeroed 0x40000006..0x40000009 leaves */
+            for (function = HV_CPUID_IMPLEMENT_LIMITS + 1;
+                 function < HV_CPUID_NESTED_FEATURES; function++) {
+                c = &cpuid_data.entries[cpuid_i++];
+                c->function = function;
+            }
+
+            c = &cpuid_data.entries[cpuid_i++];
+            c->function = HV_CPUID_NESTED_FEATURES;
+            c->eax = evmcs_version;
+        }
     }
 
     if (cpu->expose_kvm) {
diff --git a/target/i386/seg_helper.c b/target/i386/seg_helper.c
index 33714bc6e1..63e265cb38 100644
--- a/target/i386/seg_helper.c
+++ b/target/i386/seg_helper.c
@@ -991,11 +991,11 @@ void helper_syscall(CPUX86State *env, int next_eip_addend)
         int code64;
 
         env->regs[R_ECX] = env->eip + next_eip_addend;
-        env->regs[11] = cpu_compute_eflags(env);
+        env->regs[11] = cpu_compute_eflags(env) & ~RF_MASK;
 
         code64 = env->hflags & HF_CS64_MASK;
 
-        env->eflags &= ~env->fmask;
+        env->eflags &= ~(env->fmask | RF_MASK);
         cpu_load_eflags(env, env->eflags, 0);
         cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
                            0, 0xffffffff,
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
index dfa66ff2fb..865e476df5 100644
--- a/util/qemu-thread-posix.c
+++ b/util/qemu-thread-posix.c
@@ -443,42 +443,34 @@ void qemu_event_wait(QemuEvent *ev)
     }
 }
 
-static pthread_key_t exit_key;
-
-union NotifierThreadData {
-    void *ptr;
-    NotifierList list;
-};
-QEMU_BUILD_BUG_ON(sizeof(union NotifierThreadData) != sizeof(void *));
+static __thread NotifierList thread_exit;
 
+/*
+ * Note that in this implementation you can register a thread-exit
+ * notifier for the main thread, but it will never be called.
+ * This is OK because main thread exit can only happen when the
+ * entire process is exiting, and the API allows notifiers to not
+ * be called on process exit.
+ */
 void qemu_thread_atexit_add(Notifier *notifier)
 {
-    union NotifierThreadData ntd;
-    ntd.ptr = pthread_getspecific(exit_key);
-    notifier_list_add(&ntd.list, notifier);
-    pthread_setspecific(exit_key, ntd.ptr);
+    notifier_list_add(&thread_exit, notifier);
 }
 
 void qemu_thread_atexit_remove(Notifier *notifier)
 {
-    union NotifierThreadData ntd;
-    ntd.ptr = pthread_getspecific(exit_key);
     notifier_remove(notifier);
-    pthread_setspecific(exit_key, ntd.ptr);
-}
-
-static void qemu_thread_atexit_run(void *arg)
-{
-    union NotifierThreadData ntd = { .ptr = arg };
-    notifier_list_notify(&ntd.list, NULL);
 }
 
-static void __attribute__((constructor)) qemu_thread_atexit_init(void)
+static void qemu_thread_atexit_notify(void *arg)
 {
-    pthread_key_create(&exit_key, qemu_thread_atexit_run);
+    /*
+     * Called when non-main thread exits (via qemu_thread_exit()
+     * or by returning from its start routine.)
+     */
+    notifier_list_notify(&thread_exit, NULL);
 }
 
-
 typedef struct {
     void *(*start_routine)(void *);
     void *arg;
@@ -490,6 +482,7 @@ static void *qemu_thread_start(void *args)
     QemuThreadArgs *qemu_thread_args = args;
     void *(*start_routine)(void *) = qemu_thread_args->start_routine;
     void *arg = qemu_thread_args->arg;
+    void *r;
 
 #ifdef CONFIG_PTHREAD_SETNAME_NP
     /* Attempt to set the threads name; note that this is for debug, so
@@ -501,7 +494,10 @@ static void *qemu_thread_start(void *args)
 #endif
     g_free(qemu_thread_args->name);
     g_free(qemu_thread_args);
-    return start_routine(arg);
+    pthread_cleanup_push(qemu_thread_atexit_notify, NULL);
+    r = start_routine(arg);
+    pthread_cleanup_pop(1);
+    return r;
 }
 
 void qemu_thread_create(QemuThread *thread, const char *name,