68 files changed, 2183 insertions, 306 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 23db6f8408..dabbfccf9c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1181,6 +1181,7 @@ S: Supported
 F: hw/s390x/ipl.*
 F: pc-bios/s390-ccw/
 F: pc-bios/s390-ccw.img
+F: docs/devel/s390-dasd-ipl.txt
 T: git https://github.com/borntraeger/qemu.git s390-next
 L: qemu-s390x@nongnu.org
 
@@ -1445,6 +1446,7 @@ S: Supported
 F: hw/vfio/ccw.c
 F: hw/s390x/s390-ccw.c
 F: include/hw/s390x/s390-ccw.h
+F: include/hw/s390x/vfio-ccw.h
 T: git https://github.com/cohuck/qemu.git s390-next
 L: qemu-s390x@nongnu.org
 
diff --git a/bsd-user/main.c b/bsd-user/main.c
index a8c807e8df..6192e9d91e 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -905,7 +905,7 @@ int main(int argc, char **argv)
     /* init tcg before creating CPUs and to get qemu_host_page_size */
     tcg_exec_init(0);
 
-    cpu_type = parse_cpu_model(cpu_model);
+    cpu_type = parse_cpu_option(cpu_model);
     cpu = cpu_create(cpu_type);
     env = cpu->env_ptr;
 #if defined(TARGET_SPARC) || defined(TARGET_PPC)
diff --git a/docs/devel/s390-dasd-ipl.txt b/docs/devel/s390-dasd-ipl.txt
new file mode 100644
index 0000000000..9107e048e4
--- /dev/null
+++ b/docs/devel/s390-dasd-ipl.txt
@@ -0,0 +1,133 @@
+*****************************
+***** s390 hardware IPL *****
+*****************************
+
+The s390 hardware IPL process consists of the following steps.
+
+1. A READ IPL ccw is constructed in memory location 0x0.
+    This ccw, by definition, reads the IPL1 record which is located on the disk
+    at cylinder 0 track 0 record 1. Note that the chain flag is on in this ccw
+    so when it is complete another ccw will be fetched and executed from memory
+    location 0x08.
+
+2. Execute the Read IPL ccw at 0x00, thereby reading IPL1 data into 0x00.
+    IPL1 data is 24 bytes in length and consists of the following pieces of
+    information: [psw][read ccw][tic ccw]. When the machine executes the Read
+    IPL ccw it read the 24-bytes of IPL1 to be read into memory starting at
+    location 0x0. Then the ccw program at 0x08 which consists of a read
+    ccw and a tic ccw is automatically executed because of the chain flag from
+    the original READ IPL ccw. The read ccw will read the IPL2 data into memory
+    and the TIC (Transfer In Channel) will transfer control to the channel
+    program contained in the IPL2 data. The TIC channel command is the
+    equivalent of a branch/jump/goto instruction for channel programs.
+    NOTE: The ccws in IPL1 are defined by the architecture to be format 0.
+
+3. Execute IPL2.
+    The TIC ccw instruction at the end of the IPL1 channel program will begin
+    the execution of the IPL2 channel program. IPL2 is stage-2 of the boot
+    process and will contain a larger channel program than IPL1. The point of
+    IPL2 is to find and load either the operating system or a small program that
+    loads the operating system from disk. At the end of this step all or some of
+    the real operating system is loaded into memory and we are ready to hand
+    control over to the guest operating system. At this point the guest
+    operating system is entirely responsible for loading any more data it might
+    need to function. NOTE: The IPL2 channel program might read data into memory
+    location 0 thereby overwriting the IPL1 psw and channel program. This is ok
+    as long as the data placed in location 0 contains a psw whose instruction
+    address points to the guest operating system code to execute at the end of
+    the IPL/boot process.
+    NOTE: The ccws in IPL2 are defined by the architecture to be format 0.
+
+4. Start executing the guest operating system.
+    The psw that was loaded into memory location 0 as part of the ipl process
+    should contain the needed flags for the operating system we have loaded. The
+    psw's instruction address will point to the location in memory where we want
+    to start executing the operating system. This psw is loaded (via LPSW
+    instruction) causing control to be passed to the operating system code.
+
+In a non-virtualized environment this process, handled entirely by the hardware,
+is kicked off by the user initiating a "Load" procedure from the hardware
+management console. This "Load" procedure crafts a special "Read IPL" ccw in
+memory location 0x0 that reads IPL1. It then executes this ccw thereby kicking
+off the reading of IPL1 data. Since the channel program from IPL1 will be
+written immediately after the special "Read IPL" ccw, the IPL1 channel program
+will be executed immediately (the special read ccw has the chaining bit turned
+on). The TIC at the end of the IPL1 channel program will cause the IPL2 channel
+program to be executed automatically. After this sequence completes the "Load"
+procedure then loads the psw from 0x0.
+
+**********************************************************
+***** How this all pertains to QEMU (and the kernel) *****
+**********************************************************
+
+In theory we should merely have to do the following to IPL/boot a guest
+operating system from a DASD device:
+
+1. Place a "Read IPL" ccw into memory location 0x0 with chaining bit on.
+2. Execute channel program at 0x0.
+3. LPSW 0x0.
+
+However, our emulation of the machine's channel program logic within the kernel
+is missing one key feature that is required for this process to work:
+non-prefetch of ccw data.
+
+When we start a channel program we pass the channel subsystem parameters via an
+ORB (Operation Request Block). One of those parameters is a prefetch bit. If the
+bit is on then the vfio-ccw kernel driver is allowed to read the entire channel
+program from guest memory before it starts executing it. This means that any
+channel commands that read additional channel commands will not work as expected
+because the newly read commands will only exist in guest memory and NOT within
+the kernel's channel subsystem memory. The kernel vfio-ccw driver currently
+requires this bit to be on for all channel programs. This is a problem because
+the IPL process consists of transferring control from the "Read IPL" ccw
+immediately to the IPL1 channel program that was read by "Read IPL".
+
+Not being able to turn off prefetch will also prevent the TIC at the end of the
+IPL1 channel program from transferring control to the IPL2 channel program.
+
+Lastly, in some cases (the zipl bootloader for example) the IPL2 program also
+transfers control to another channel program segment immediately after reading
+it from the disk. So we need to be able to handle this case.
+
+**************************
+***** What QEMU does *****
+**************************
+
+Since we are forced to live with prefetch we cannot use the very simple IPL
+procedure we defined in the preceding section. So we compensate by doing the
+following.
+
+1. Place "Read IPL" ccw into memory location 0x0, but turn off chaining bit.
+2. Execute "Read IPL" at 0x0.
+
+   So now IPL1's psw is at 0x0 and IPL1's channel program is at 0x08.
+
+4. Write a custom channel program that will seek to the IPL2 record and then
+   execute the READ and TIC ccws from IPL1.  Normally the seek is not required
+   because after reading the IPL1 record the disk is automatically positioned
+   to read the very next record which will be IPL2. But since we are not reading
+   both IPL1 and IPL2 as part of the same channel program we must manually set
+   the position.
+
+5. Grab the target address of the TIC instruction from the IPL1 channel program.
+   This address is where the IPL2 channel program starts.
+
+   Now IPL2 is loaded into memory somewhere, and we know the address.
+
+6. Execute the IPL2 channel program at the address obtained in step #5.
+
+   Because this channel program can be dynamic, we must use a special algorithm
+   that detects a READ immediately followed by a TIC and breaks the ccw chain
+   by turning off the chain bit in the READ ccw. When control is returned from
+   the kernel/hardware to the QEMU bios code we immediately issue another start
+   subchannel to execute the remaining TIC instruction. This causes the entire
+   channel program (starting from the TIC) and all needed data to be refetched
+   thereby stepping around the limitation that would otherwise prevent this
+   channel program from executing properly.
+
+   Now the operating system code is loaded somewhere in guest memory and the psw
+   in memory location 0x0 will point to entry code for the guest operating
+   system.
+
+7. LPSW 0x0.
+   LPSW transfers control to the guest operating system and we're done.
diff --git a/docs/nvdimm.txt b/docs/nvdimm.txt
index 7231c2d78f..b531cacd35 100644
--- a/docs/nvdimm.txt
+++ b/docs/nvdimm.txt
@@ -144,9 +144,25 @@ Guest Data Persistence
 ----------------------
 
 Though QEMU supports multiple types of vNVDIMM backends on Linux,
-currently the only one that can guarantee the guest write persistence
-is the device DAX on the real NVDIMM device (e.g., /dev/dax0.0), to
-which all guest access do not involve any host-side kernel cache.
+the only backend that can guarantee the guest write persistence is:
+
+A. DAX device (e.g., /dev/dax0.0, ) or
+B. DAX file(mounted with dax option)
+
+When using B (A file supporting direct mapping of persistent memory)
+as a backend, write persistence is guaranteed if the host kernel has
+support for the MAP_SYNC flag in the mmap system call (available
+since Linux 4.15 and on certain distro kernels) and additionally
+both 'pmem' and 'share' flags are set to 'on' on the backend.
+
+If these conditions are not satisfied i.e. if either 'pmem' or 'share'
+are not set, if the backend file does not support DAX or if MAP_SYNC
+is not supported by the host kernel, write persistence is not
+guaranteed after a system crash. For compatibility reasons, these
+conditions are ignored if not satisfied. Currently, no way is
+provided to test for them.
+For more details, please reference mmap(2) man page:
+http://man7.org/linux/man-pages/man2/mmap.2.html.
 
 When using other types of backends, it's suggested to set 'unarmed'
 option of '-device nvdimm' to 'on', which sets the unarmed flag of the
diff --git a/exec.c b/exec.c
index 2646207661..4e734770c2 100644
--- a/exec.c
+++ b/exec.c
@@ -983,14 +983,18 @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp)
 #endif
 }
 
-const char *parse_cpu_model(const char *cpu_model)
+const char *parse_cpu_option(const char *cpu_option)
 {
     ObjectClass *oc;
     CPUClass *cc;
     gchar **model_pieces;
     const char *cpu_type;
 
-    model_pieces = g_strsplit(cpu_model, ",", 2);
+    model_pieces = g_strsplit(cpu_option, ",", 2);
+    if (!model_pieces[0]) {
+        error_report("-cpu option cannot be empty");
+        exit(1);
+    }
 
     oc = cpu_class_by_name(CPU_RESOLVING_TYPE, model_pieces[0]);
     if (oc == NULL) {
@@ -1688,7 +1692,7 @@ void ram_block_dump(Monitor *mon)
  * when we actually open and map them.  Iterate over the file
  * descriptors instead, and use qemu_fd_getpagesize().
  */
-static int find_max_supported_pagesize(Object *obj, void *opaque)
+static int find_min_backend_pagesize(Object *obj, void *opaque)
 {
     long *hpsize_min = opaque;
 
@@ -1704,7 +1708,27 @@ static int find_max_supported_pagesize(Object *obj, void *opaque)
     return 0;
 }
 
-long qemu_getrampagesize(void)
+static int find_max_backend_pagesize(Object *obj, void *opaque)
+{
+    long *hpsize_max = opaque;
+
+    if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
+        HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+        long hpsize = host_memory_backend_pagesize(backend);
+
+        if (host_memory_backend_is_mapped(backend) && (hpsize > *hpsize_max)) {
+            *hpsize_max = hpsize;
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * TODO: We assume right now that all mapped host memory backends are
+ * used as RAM, however some might be used for different purposes.
+ */
+long qemu_minrampagesize(void)
 {
     long hpsize = LONG_MAX;
     long mainrampagesize;
@@ -1724,7 +1748,7 @@ long qemu_getrampagesize(void)
      */
     memdev_root = object_resolve_path("/objects", NULL);
     if (memdev_root) {
-        object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
+        object_child_foreach(memdev_root, find_min_backend_pagesize, &hpsize);
     }
     if (hpsize == LONG_MAX) {
         /* No additional memory regions found ==> Report main RAM page size */
@@ -1747,8 +1771,24 @@ long qemu_getrampagesize(void)
 
     return hpsize;
 }
+
+long qemu_maxrampagesize(void)
+{
+    long pagesize = qemu_mempath_getpagesize(mem_path);
+    Object *memdev_root = object_resolve_path("/objects", NULL);
+
+    if (memdev_root) {
+        object_child_foreach(memdev_root, find_max_backend_pagesize,
+                             &pagesize);
+    }
+    return pagesize;
+}
 #else
-long qemu_getrampagesize(void)
+long qemu_minrampagesize(void)
+{
+    return getpagesize();
+}
+long qemu_maxrampagesize(void)
 {
     return getpagesize();
 }
@@ -1879,7 +1919,7 @@ static void *file_ram_alloc(RAMBlock *block,
     }
 
     area = qemu_ram_mmap(fd, memory, block->mr->align,
-                         block->flags & RAM_SHARED);
+                         block->flags & RAM_SHARED, block->flags & RAM_PMEM);
     if (area == MAP_FAILED) {
         error_setg_errno(errp, errno,
                          "unable to map backing store for guest RAM");
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index ce2664a30b..16ba67f7a7 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1978,10 +1978,17 @@ static void machvirt_machine_init(void)
 }
 type_init(machvirt_machine_init);
 
+static void virt_machine_4_1_options(MachineClass *mc)
+{
+}
+DEFINE_VIRT_MACHINE_AS_LATEST(4, 1)
+
 static void virt_machine_4_0_options(MachineClass *mc)
 {
+    virt_machine_4_1_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len);
 }
-DEFINE_VIRT_MACHINE_AS_LATEST(4, 0)
+DEFINE_VIRT_MACHINE(4, 0)
 
 static void virt_machine_3_1_options(MachineClass *mc)
 {
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 743fef2898..5d046a43e3 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -24,6 +24,9 @@
 #include "hw/pci/pci.h"
 #include "hw/mem/nvdimm.h"
 
+GlobalProperty hw_compat_4_0[] = {};
+const size_t hw_compat_4_0_len = G_N_ELEMENTS(hw_compat_4_0);
+
 GlobalProperty hw_compat_3_1[] = {
     { "pcie-root-port", "x-speed", "2_5" },
     { "pcie-root-port", "x-width", "1" },
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 6eabdf9917..4a4e2c7fd4 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -1601,6 +1601,8 @@ static void amdvi_class_init(ObjectClass *klass, void* data)
     dc_class->int_remap = amdvi_int_remap;
     /* Supported by the pc-q35-* machine types */
     dc->user_creatable = true;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device";
 }
 
 static const TypeInfo amdvi = {
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 2558f48fe6..44b1231157 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3741,6 +3741,8 @@ static void vtd_class_init(ObjectClass *klass, void *data)
     x86_class->int_remap = vtd_int_remap;
     /* Supported by the pc-q35-* machine types */
     dc->user_creatable = true;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    dc->desc = "Intel IOMMU (VT-d) DMA Remapping device";
 }
 
 static const TypeInfo vtd_info = {
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index f2c15bf1f2..d98b737b8f 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -115,6 +115,9 @@ struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};
 /* Physical Address of PVH entry point read from kernel ELF NOTE */
 static size_t pvh_start_addr;
 
+GlobalProperty pc_compat_4_0[] = {};
+const size_t pc_compat_4_0_len = G_N_ELEMENTS(pc_compat_4_0);
+
 GlobalProperty pc_compat_3_1[] = {
     { "intel-iommu", "dma-drain", "off" },
     { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "off" },
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 8ad8e885c6..c07c4a5b38 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -428,13 +428,25 @@ static void pc_i440fx_machine_options(MachineClass *m)
     machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE);
 }
 
-static void pc_i440fx_4_0_machine_options(MachineClass *m)
+static void pc_i440fx_4_1_machine_options(MachineClass *m)
 {
     pc_i440fx_machine_options(m);
     m->alias = "pc";
     m->is_default = 1;
 }
 
+DEFINE_I440FX_MACHINE(v4_1, "pc-i440fx-4.1", NULL,
+                      pc_i440fx_4_1_machine_options);
+
+static void pc_i440fx_4_0_machine_options(MachineClass *m)
+{
+    pc_i440fx_4_1_machine_options(m);
+    m->alias = NULL;
+    m->is_default = 0;
+    compat_props_add(m->compat_props, hw_compat_4_0, hw_compat_4_0_len);
+    compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len);
+}
+
 DEFINE_I440FX_MACHINE(v4_0, "pc-i440fx-4.0", NULL,
                       pc_i440fx_4_0_machine_options);
 
@@ -911,6 +923,7 @@ static void isa_bridge_class_init(ObjectClass *klass, void *data)
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 
     dc->desc        = "ISA bridge faked to support IGD PT";
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
     k->vendor_id    = PCI_VENDOR_ID_INTEL;
     k->class_id     = PCI_CLASS_BRIDGE_ISA;
 };
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 372c6b73be..37dd350511 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -365,12 +365,23 @@ static void pc_q35_machine_options(MachineClass *m)
     m->max_cpus = 288;
 }
 
-static void pc_q35_4_0_machine_options(MachineClass *m)
+static void pc_q35_4_1_machine_options(MachineClass *m)
 {
     pc_q35_machine_options(m);
     m->alias = "q35";
 }
 
+DEFINE_Q35_MACHINE(v4_1, "pc-q35-4.1", NULL,
+                   pc_q35_4_1_machine_options);
+
+static void pc_q35_4_0_machine_options(MachineClass *m)
+{
+    pc_q35_4_1_machine_options(m);
+    m->alias = NULL;
+    compat_props_add(m->compat_props, hw_compat_4_0, hw_compat_4_0_len);
+    compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len);
+}
+
 DEFINE_Q35_MACHINE(v4_0, "pc-q35-4.0", NULL,
                    pc_q35_4_0_machine_options);
 
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index c56939a43b..2ef3ce4362 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4395,14 +4395,25 @@ static const TypeInfo spapr_machine_info = {
     type_init(spapr_machine_register_##suffix)
 
 /*
+ * pseries-4.1
+ */
+static void spapr_machine_4_1_class_options(MachineClass *mc)
+{
+    /* Defaults for the latest behaviour inherited from the base class */
+}
+
+DEFINE_SPAPR_MACHINE(4_1, "4.1", true);
+
+/*
  * pseries-4.0
  */
 static void spapr_machine_4_0_class_options(MachineClass *mc)
 {
-    /* Defaults for the latest behaviour inherited from the base class */
+    spapr_machine_4_1_class_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len);
 }
 
-DEFINE_SPAPR_MACHINE(4_0, "4.0", true);
+DEFINE_SPAPR_MACHINE(4_0, "4.0", false);
 
 /*
  * pseries-3.1
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index edc5ed0e0c..9b1c10baa6 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -347,7 +347,7 @@ static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr,
         warn_report("Many guests require at least 64kiB hpt-max-page-size");
     }
 
-    spapr_check_pagesize(spapr, qemu_getrampagesize(), errp);
+    spapr_check_pagesize(spapr, qemu_minrampagesize(), errp);
 }
 
 static bool spapr_pagesize_cb(void *opaque, uint32_t seg_pshift,
@@ -609,7 +609,7 @@ static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
         uint8_t mps;
 
         if (kvmppc_hpt_needs_host_contiguous_pages()) {
-            mps = ctz64(qemu_getrampagesize());
+            mps = ctz64(qemu_minrampagesize());
         } else {
             mps = 34; /* allow everything up to 16GiB, i.e. everything */
         }
diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c
index 51b272e190..d0cc06a05f 100644
--- a/hw/s390x/ipl.c
+++ b/hw/s390x/ipl.c
@@ -19,6 +19,7 @@
 #include "hw/loader.h"
 #include "hw/boards.h"
 #include "hw/s390x/virtio-ccw.h"
+#include "hw/s390x/vfio-ccw.h"
 #include "hw/s390x/css.h"
 #include "hw/s390x/ebcdic.h"
 #include "ipl.h"
@@ -303,16 +304,36 @@ static void s390_ipl_set_boot_menu(S390IPLState *ipl)
     ipl->qipl.boot_menu_timeout = cpu_to_be32(splash_time);
 }
 
-static CcwDevice *s390_get_ccw_device(DeviceState *dev_st)
+#define CCW_DEVTYPE_NONE        0x00
+#define CCW_DEVTYPE_VIRTIO      0x01
+#define CCW_DEVTYPE_VIRTIO_NET  0x02
+#define CCW_DEVTYPE_SCSI        0x03
+#define CCW_DEVTYPE_VFIO        0x04
+
+static CcwDevice *s390_get_ccw_device(DeviceState *dev_st, int *devtype)
 {
     CcwDevice *ccw_dev = NULL;
+    int tmp_dt = CCW_DEVTYPE_NONE;
 
     if (dev_st) {
+        VirtIONet *virtio_net_dev = (VirtIONet *)
+            object_dynamic_cast(OBJECT(dev_st), TYPE_VIRTIO_NET);
         VirtioCcwDevice *virtio_ccw_dev = (VirtioCcwDevice *)
             object_dynamic_cast(OBJECT(qdev_get_parent_bus(dev_st)->parent),
                                 TYPE_VIRTIO_CCW_DEVICE);
+        VFIOCCWDevice *vfio_ccw_dev = (VFIOCCWDevice *)
+            object_dynamic_cast(OBJECT(dev_st), TYPE_VFIO_CCW);
+
         if (virtio_ccw_dev) {
             ccw_dev = CCW_DEVICE(virtio_ccw_dev);
+            if (virtio_net_dev) {
+                tmp_dt = CCW_DEVTYPE_VIRTIO_NET;
+            } else {
+                tmp_dt = CCW_DEVTYPE_VIRTIO;
+            }
+        } else if (vfio_ccw_dev) {
+            ccw_dev = CCW_DEVICE(vfio_ccw_dev);
+            tmp_dt = CCW_DEVTYPE_VFIO;
         } else {
             SCSIDevice *sd = (SCSIDevice *)
                 object_dynamic_cast(OBJECT(dev_st),
@@ -325,9 +346,13 @@ static CcwDevice *s390_get_ccw_device(DeviceState *dev_st)
 
                 ccw_dev = (CcwDevice *)object_dynamic_cast(OBJECT(scsi_ccw),
                                                            TYPE_CCW_DEVICE);
+                tmp_dt = CCW_DEVTYPE_SCSI;
             }
         }
     }
+    if (devtype) {
+        *devtype = tmp_dt;
+    }
     return ccw_dev;
 }
 
@@ -335,20 +360,22 @@ static bool s390_gen_initial_iplb(S390IPLState *ipl)
 {
     DeviceState *dev_st;
     CcwDevice *ccw_dev = NULL;
+    SCSIDevice *sd;
+    int devtype;
 
     dev_st = get_boot_device(0);
     if (dev_st) {
-        ccw_dev = s390_get_ccw_device(dev_st);
+        ccw_dev = s390_get_ccw_device(dev_st, &devtype);
     }
 
     /*
      * Currently allow IPL only from CCW devices.
      */
     if (ccw_dev) {
-        SCSIDevice *sd = (SCSIDevice *) object_dynamic_cast(OBJECT(dev_st),
-                                                            TYPE_SCSI_DEVICE);
-
-        if (sd) {
+        switch (devtype) {
+        case CCW_DEVTYPE_SCSI:
+            sd = (SCSIDevice *) object_dynamic_cast(OBJECT(dev_st),
+                                                           TYPE_SCSI_DEVICE);
             ipl->iplb.len = cpu_to_be32(S390_IPLB_MIN_QEMU_SCSI_LEN);
             ipl->iplb.blk0_len =
                 cpu_to_be32(S390_IPLB_MIN_QEMU_SCSI_LEN - S390_IPLB_HEADER_LEN);
@@ -358,20 +385,24 @@ static bool s390_gen_initial_iplb(S390IPLState *ipl)
             ipl->iplb.scsi.channel = cpu_to_be16(sd->channel);
             ipl->iplb.scsi.devno = cpu_to_be16(ccw_dev->sch->devno);
             ipl->iplb.scsi.ssid = ccw_dev->sch->ssid & 3;
-        } else {
-            VirtIONet *vn = (VirtIONet *) object_dynamic_cast(OBJECT(dev_st),
-                                                              TYPE_VIRTIO_NET);
-
+            break;
+        case CCW_DEVTYPE_VFIO:
+            ipl->iplb.len = cpu_to_be32(S390_IPLB_MIN_CCW_LEN);
+            ipl->iplb.pbt = S390_IPL_TYPE_CCW;
+            ipl->iplb.ccw.devno = cpu_to_be16(ccw_dev->sch->devno);
+            ipl->iplb.ccw.ssid = ccw_dev->sch->ssid & 3;
+            break;
+        case CCW_DEVTYPE_VIRTIO_NET:
+            ipl->netboot = true;
+            /* Fall through to CCW_DEVTYPE_VIRTIO case */
+        case CCW_DEVTYPE_VIRTIO:
             ipl->iplb.len = cpu_to_be32(S390_IPLB_MIN_CCW_LEN);
             ipl->iplb.blk0_len =
                 cpu_to_be32(S390_IPLB_MIN_CCW_LEN - S390_IPLB_HEADER_LEN);
             ipl->iplb.pbt = S390_IPL_TYPE_CCW;
             ipl->iplb.ccw.devno = cpu_to_be16(ccw_dev->sch->devno);
             ipl->iplb.ccw.ssid = ccw_dev->sch->ssid & 3;
-
-            if (vn) {
-                ipl->netboot = true;
-            }
+            break;
         }
 
         if (!s390_ipl_set_loadparm(ipl->iplb.loadparm)) {
@@ -530,7 +561,7 @@ void s390_ipl_reset_request(CPUState *cs, enum s390_reset reset_type)
         !ipl->netboot &&
         ipl->iplb.pbt == S390_IPL_TYPE_CCW &&
         is_virtio_scsi_device(&ipl->iplb)) {
-        CcwDevice *ccw_dev = s390_get_ccw_device(get_boot_device(0));
+        CcwDevice *ccw_dev = s390_get_ccw_device(get_boot_device(0), NULL);
 
         if (ccw_dev &&
             cpu_to_be16(ccw_dev->sch->devno) == ipl->iplb.ccw.devno &&
diff --git a/hw/s390x/s390-ccw.c b/hw/s390x/s390-ccw.c
index cad91ee626..f5f025d1b6 100644
--- a/hw/s390x/s390-ccw.c
+++ b/hw/s390x/s390-ccw.c
@@ -124,6 +124,14 @@ static void s390_ccw_unrealize(S390CCWDevice *cdev, Error **errp)
     g_free(cdev->mdevid);
 }
 
+static void s390_ccw_instance_init(Object *obj)
+{
+    S390CCWDevice *dev = S390_CCW_DEVICE(obj);
+
+    device_add_bootindex_property(obj, &dev->bootindex, "bootindex",
+                                  "/disk@0,0", DEVICE(obj), NULL);
+}
+
 static void s390_ccw_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -137,6 +145,7 @@ static void s390_ccw_class_init(ObjectClass *klass, void *data)
 static const TypeInfo s390_ccw_info = {
     .name          = TYPE_S390_CCW,
     .parent        = TYPE_CCW_DEVICE,
+    .instance_init = s390_ccw_instance_init,
     .instance_size = sizeof(S390CCWDevice),
     .class_size    = sizeof(S390CCWDeviceClass),
     .class_init    = s390_ccw_class_init,
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index d11069b860..bbc6e8fa0b 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -15,6 +15,7 @@
 #include "cpu.h"
 #include "hw/boards.h"
 #include "exec/address-spaces.h"
+#include "exec/ram_addr.h"
 #include "hw/s390x/s390-virtio-hcall.h"
 #include "hw/s390x/sclp.h"
 #include "hw/s390x/s390_flic.h"
@@ -163,6 +164,7 @@ static void s390_memory_init(ram_addr_t mem_size)
     MemoryRegion *sysmem = get_system_memory();
     ram_addr_t chunk, offset = 0;
     unsigned int number = 0;
+    Error *local_err = NULL;
     gchar *name;
 
     /* allocate RAM for core */
@@ -182,6 +184,15 @@ static void s390_memory_init(ram_addr_t mem_size)
     }
     g_free(name);
 
+    /*
+     * Configure the maximum page size. As no memory devices were created
+     * yet, this is the page size of initial memory only.
+     */
+    s390_set_max_pagesize(qemu_maxrampagesize(), &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        exit(EXIT_FAILURE);
+    }
     /* Initialize storage key device */
     s390_skeys_init();
     /* Initialize storage attributes device */
@@ -253,6 +264,7 @@ static void ccw_init(MachineState *machine)
     DeviceState *dev;
 
     s390_sclp_init();
+    /* init memory + setup max page size. Required for the CPU model */
     s390_memory_init(machine->ram_size);
 
     /* init CPUs (incl. CPU model) early so s390_has_feature() works */
@@ -646,14 +658,26 @@ bool css_migration_enabled(void)
     }                                                                         \
     type_init(ccw_machine_register_##suffix)
 
+static void ccw_machine_4_1_instance_options(MachineState *machine)
+{
+}
+
+static void ccw_machine_4_1_class_options(MachineClass *mc)
+{
+}
+DEFINE_CCW_MACHINE(4_1, "4.1", true);
+
 static void ccw_machine_4_0_instance_options(MachineState *machine)
 {
+    ccw_machine_4_1_instance_options(machine);
 }
 
 static void ccw_machine_4_0_class_options(MachineClass *mc)
 {
+    ccw_machine_4_1_class_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len);
 }
-DEFINE_CCW_MACHINE(4_0, "4.0", true);
+DEFINE_CCW_MACHINE(4_0, "4.0", false);
 
 static void ccw_machine_3_1_instance_options(MachineState *machine)
 {
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index c44d13cc50..31dd3a2a87 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -21,12 +21,12 @@
 #include "hw/vfio/vfio.h"
 #include "hw/vfio/vfio-common.h"
 #include "hw/s390x/s390-ccw.h"
+#include "hw/s390x/vfio-ccw.h"
 #include "hw/s390x/ccw-device.h"
 #include "exec/address-spaces.h"
 #include "qemu/error-report.h"
 
-#define TYPE_VFIO_CCW "vfio-ccw"
-typedef struct VFIOCCWDevice {
+struct VFIOCCWDevice {
     S390CCWDevice cdev;
     VFIODevice vdev;
     uint64_t io_region_size;
@@ -35,7 +35,7 @@ typedef struct VFIOCCWDevice {
     EventNotifier io_notifier;
     bool force_orb_pfch;
     bool warned_orb_pfch;
-} VFIOCCWDevice;
+};
 
 static inline void warn_once_pfch(VFIOCCWDevice *vcdev, SubchDev *sch,
                                   const char *msg)
diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
index 57fe758e54..96c0ad9d9b 100644
--- a/hw/vfio/spapr.c
+++ b/hw/vfio/spapr.c
@@ -148,7 +148,7 @@ int vfio_spapr_create_window(VFIOContainer *container,
     uint64_t pagesize = memory_region_iommu_get_min_page_size(iommu_mr);
     unsigned entries, bits_total, bits_per_level, max_levels;
     struct vfio_iommu_spapr_tce_create create = { .argsz = sizeof(create) };
-    long rampagesize = qemu_getrampagesize();
+    long rampagesize = qemu_minrampagesize();
 
     /*
      * The host might not support the guest supported IOMMU page size,
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 9ecd911c3e..139ad79390 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -73,7 +73,8 @@ static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
 
 bool ramblock_is_pmem(RAMBlock *rb);
 
-long qemu_getrampagesize(void);
+long qemu_minrampagesize(void);
+long qemu_maxrampagesize(void);
 
 /**
  * qemu_ram_alloc_from_file,
diff --git a/include/hw/boards.h b/include/hw/boards.h
index e231860666..6f7916f88f 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -57,7 +57,6 @@ void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner,
 #define MACHINE_CLASS(klass) \
     OBJECT_CLASS_CHECK(MachineClass, (klass), TYPE_MACHINE)
 
-MachineClass *find_default_machine(void);
 extern MachineState *current_machine;
 
 void machine_run_board_init(MachineState *machine);
@@ -293,6 +292,9 @@ struct MachineState {
     } \
     type_init(machine_initfn##_register_types)
 
+extern GlobalProperty hw_compat_4_0[];
+extern const size_t hw_compat_4_0_len;
+
 extern GlobalProperty hw_compat_3_1[];
 extern const size_t hw_compat_3_1_len;
 
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index ca65ef18af..43df7230a2 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -293,6 +293,9 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t);
 int e820_get_num_entries(void);
 bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
 
+extern GlobalProperty pc_compat_4_0[];
+extern const size_t pc_compat_4_0_len;
+
 extern GlobalProperty pc_compat_3_1[];
 extern const size_t pc_compat_3_1_len;
 
diff --git a/include/hw/s390x/s390-ccw.h b/include/hw/s390x/s390-ccw.h
index 7d15a1a5d4..901d805d79 100644
--- a/include/hw/s390x/s390-ccw.h
+++ b/include/hw/s390x/s390-ccw.h
@@ -27,6 +27,7 @@ typedef struct S390CCWDevice {
     CcwDevice parent_obj;
     CssDevId hostid;
     char *mdevid;
+    int32_t bootindex;
 } S390CCWDevice;
 
 typedef struct S390CCWDeviceClass {
diff --git a/include/hw/s390x/vfio-ccw.h b/include/hw/s390x/vfio-ccw.h
new file mode 100644
index 0000000000..ee5250d0d7
--- /dev/null
+++ b/include/hw/s390x/vfio-ccw.h
@@ -0,0 +1,28 @@
+/*
+ * vfio based subchannel assignment support
+ *
+ * Copyright 2017, 2019 IBM Corp.
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
+ *            Pierre Morel <pmorel@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef HW_VFIO_CCW_H
+#define HW_VFIO_CCW_H
+
+#include "hw/vfio/vfio-common.h"
+#include "hw/s390x/s390-ccw.h"
+#include "hw/s390x/ccw-device.h"
+
+#define TYPE_VFIO_CCW "vfio-ccw"
+#define VFIO_CCW(obj) \
+        OBJECT_CHECK(VFIOCCWDevice, (obj), TYPE_VFIO_CCW)
+
+#define TYPE_VFIO_CCW "vfio-ccw"
+typedef struct VFIOCCWDevice VFIOCCWDevice;
+
+#endif
diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h
index ef04f0ed5b..eec98d82c1 100644
--- a/include/qemu/mmap-alloc.h
+++ b/include/qemu/mmap-alloc.h
@@ -7,7 +7,26 @@ size_t qemu_fd_getpagesize(int fd);
 
 size_t qemu_mempath_getpagesize(const char *mem_path);
 
-void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared);
+/**
+ * qemu_ram_mmap: mmap the specified file or device.
+ *
+ * Parameters:
+ *  @fd: the file or the device to mmap
+ *  @size: the number of bytes to be mmaped
+ *  @align: if not zero, specify the alignment of the starting mapping address;
+ *          otherwise, the alignment in use will be determined by QEMU.
+ *  @shared: map has RAM_SHARED flag.
+ *  @is_pmem: map has RAM_PMEM flag.
+ *
+ * Return:
+ *  On success, return a pointer to the mapped area.
+ *  On failure, return MAP_FAILED.
+ */
+void *qemu_ram_mmap(int fd,
+                    size_t size,
+                    size_t align,
+                    bool shared,
+                    bool is_pmem);
 
 void qemu_ram_munmap(int fd, void *ptr, size_t size);
 
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index e9bec3a5bc..08abcbd3fe 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -681,15 +681,15 @@ ObjectClass *cpu_class_by_name(const char *typename, const char *cpu_model);
 CPUState *cpu_create(const char *typename);
 
 /**
- * parse_cpu_model:
- * @cpu_model: The model string including optional parameters.
+ * parse_cpu_option:
+ * @cpu_option: The -cpu option including optional parameters.
  *
  * processes optional parameters and registers them as global properties
  *
  * Returns: type of CPU to create or prints error and terminates process
  *          if an error occurred.
  */
-const char *parse_cpu_model(const char *cpu_model);
+const char *parse_cpu_option(const char *cpu_option);
 
 /**
  * cpu_has_work:
diff --git a/linux-headers/asm-arm/mman.h b/linux-headers/asm-arm/mman.h
new file mode 100644
index 0000000000..41f99c573b
--- /dev/null
+++ b/linux-headers/asm-arm/mman.h
@@ -0,0 +1,4 @@
+#include <asm-generic/mman.h>
+
+#define arch_mmap_check(addr, len, flags) \
+	(((flags) & MAP_FIXED && (addr) < FIRST_USER_ADDRESS) ? -EINVAL : 0)
diff --git a/linux-headers/asm-arm64/mman.h b/linux-headers/asm-arm64/mman.h
new file mode 100644
index 0000000000..8eebf89f5a
--- /dev/null
+++ b/linux-headers/asm-arm64/mman.h
@@ -0,0 +1 @@
+#include <asm-generic/mman.h>
diff --git a/linux-headers/asm-generic/hugetlb_encode.h b/linux-headers/asm-generic/hugetlb_encode.h
new file mode 100644
index 0000000000..b0f8e87235
--- /dev/null
+++ b/linux-headers/asm-generic/hugetlb_encode.h
@@ -0,0 +1,36 @@
+#ifndef _ASM_GENERIC_HUGETLB_ENCODE_H_
+#define _ASM_GENERIC_HUGETLB_ENCODE_H_
+
+/*
+ * Several system calls take a flag to request "hugetlb" huge pages.
+ * Without further specification, these system calls will use the
+ * system's default huge page size.  If a system supports multiple
+ * huge page sizes, the desired huge page size can be specified in
+ * bits [26:31] of the flag arguments.  The value in these 6 bits
+ * will encode the log2 of the huge page size.
+ *
+ * The following definitions are associated with this huge page size
+ * encoding in flag arguments.  System call specific header files
+ * that use this encoding should include this file.  They can then
+ * provide definitions based on these with their own specific prefix.
+ * for example:
+ * #define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT
+ */
+
+#define HUGETLB_FLAG_ENCODE_SHIFT	26
+#define HUGETLB_FLAG_ENCODE_MASK	0x3f
+
+#define HUGETLB_FLAG_ENCODE_64KB	(16 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_512KB	(19 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_1MB		(20 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_2MB		(21 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_8MB		(23 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_16MB	(24 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_32MB	(25 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_256MB	(28 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_512MB	(29 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_1GB		(30 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_2GB		(31 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_16GB	(34 << HUGETLB_FLAG_ENCODE_SHIFT)
+
+#endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */
diff --git a/linux-headers/asm-generic/mman-common.h b/linux-headers/asm-generic/mman-common.h
new file mode 100644
index 0000000000..e7ee32861d
--- /dev/null
+++ b/linux-headers/asm-generic/mman-common.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_GENERIC_MMAN_COMMON_H
+#define __ASM_GENERIC_MMAN_COMMON_H
+
+/*
+ Author: Michael S. Tsirkin <mst@mellanox.co.il>, Mellanox Technologies Ltd.
+ Based on: asm-xxx/mman.h
+*/
+
+#define PROT_READ	0x1		/* page can be read */
+#define PROT_WRITE	0x2		/* page can be written */
+#define PROT_EXEC	0x4		/* page can be executed */
+#define PROT_SEM	0x8		/* page may be used for atomic ops */
+#define PROT_NONE	0x0		/* page can not be accessed */
+#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
+#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
+
+#define MAP_SHARED	0x01		/* Share changes */
+#define MAP_PRIVATE	0x02		/* Changes are private */
+#define MAP_SHARED_VALIDATE 0x03	/* share + validate extension flags */
+#define MAP_TYPE	0x0f		/* Mask for type of mapping */
+#define MAP_FIXED	0x10		/* Interpret addr exactly */
+#define MAP_ANONYMOUS	0x20		/* don't use a file */
+#ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED
+# define MAP_UNINITIALIZED 0x4000000	/* For anonymous mmap, memory could be uninitialized */
+#else
+# define MAP_UNINITIALIZED 0x0		/* Don't support this flag */
+#endif
+
+/* 0x0100 - 0x80000 flags are defined in asm-generic/mman.h */
+#define MAP_FIXED_NOREPLACE	0x100000	/* MAP_FIXED which doesn't unmap underlying mapping */
+
+/*
+ * Flags for mlock
+ */
+#define MLOCK_ONFAULT	0x01		/* Lock pages in range after they are faulted in, do not prefault */
+
+#define MS_ASYNC	1		/* sync memory asynchronously */
+#define MS_INVALIDATE	2		/* invalidate the caches */
+#define MS_SYNC		4		/* synchronous memory sync */
+
+#define MADV_NORMAL	0		/* no further special treatment */
+#define MADV_RANDOM	1		/* expect random page references */
+#define MADV_SEQUENTIAL	2		/* expect sequential page references */
+#define MADV_WILLNEED	3		/* will need these pages */
+#define MADV_DONTNEED	4		/* don't need these pages */
+
+/* common parameters: try to keep these consistent across architectures */
+#define MADV_FREE	8		/* free pages only if memory pressure */
+#define MADV_REMOVE	9		/* remove these pages & resources */
+#define MADV_DONTFORK	10		/* don't inherit across fork */
+#define MADV_DOFORK	11		/* do inherit across fork */
+#define MADV_HWPOISON	100		/* poison a page for testing */
+#define MADV_SOFT_OFFLINE 101		/* soft offline page for testing */
+
+#define MADV_MERGEABLE   12		/* KSM may merge identical pages */
+#define MADV_UNMERGEABLE 13		/* KSM may not merge identical pages */
+
+#define MADV_HUGEPAGE	14		/* Worth backing with hugepages */
+#define MADV_NOHUGEPAGE	15		/* Not worth backing with hugepages */
+
+#define MADV_DONTDUMP   16		/* Explicity exclude from the core dump,
+					   overrides the coredump filter bits */
+#define MADV_DODUMP	17		/* Clear the MADV_DONTDUMP flag */
+
+#define MADV_WIPEONFORK 18		/* Zero memory on fork, child only */
+#define MADV_KEEPONFORK 19		/* Undo MADV_WIPEONFORK */
+
+/* compatibility flags */
+#define MAP_FILE	0
+
+#define PKEY_DISABLE_ACCESS	0x1
+#define PKEY_DISABLE_WRITE	0x2
+#define PKEY_ACCESS_MASK	(PKEY_DISABLE_ACCESS |\
+				 PKEY_DISABLE_WRITE)
+
+#endif /* __ASM_GENERIC_MMAN_COMMON_H */
diff --git a/linux-headers/asm-generic/mman.h b/linux-headers/asm-generic/mman.h
new file mode 100644
index 0000000000..653687d977
--- /dev/null
+++ b/linux-headers/asm-generic/mman.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_GENERIC_MMAN_H
+#define __ASM_GENERIC_MMAN_H
+
+#include <asm-generic/mman-common.h>
+
+#define MAP_GROWSDOWN	0x0100		/* stack-like segment */
+#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
+#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
+#define MAP_LOCKED	0x2000		/* pages are locked */
+#define MAP_NORESERVE	0x4000		/* don't check for reservations */
+#define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
+#define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
+#define MAP_SYNC	0x80000		/* perform synchronous page faults for the mapping */
+
+/* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */
+
+#define MCL_CURRENT	1		/* lock all current mappings */
+#define MCL_FUTURE	2		/* lock all future mappings */
+#define MCL_ONFAULT	4		/* lock all pages that are faulted in */
+
+#endif /* __ASM_GENERIC_MMAN_H */
diff --git a/linux-headers/asm-mips/mman.h b/linux-headers/asm-mips/mman.h
new file mode 100644
index 0000000000..3035ca499c
--- /dev/null
+++ b/linux-headers/asm-mips/mman.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1995, 1999, 2002 by Ralf Baechle
+ */
+#ifndef _ASM_MMAN_H
+#define _ASM_MMAN_H
+
+/*
+ * Protections are chosen from these bits, OR'd together.  The
+ * implementation does not necessarily support PROT_EXEC or PROT_WRITE
+ * without PROT_READ.  The only guarantees are that no writing will be
+ * allowed without PROT_WRITE and no access will be allowed for PROT_NONE.
+ */
+#define PROT_NONE	0x00		/* page can not be accessed */
+#define PROT_READ	0x01		/* page can be read */
+#define PROT_WRITE	0x02		/* page can be written */
+#define PROT_EXEC	0x04		/* page can be executed */
+/*			0x08		   reserved for PROT_EXEC_NOFLUSH */
+#define PROT_SEM	0x10		/* page may be used for atomic ops */
+#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
+#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
+
+/*
+ * Flags for mmap
+ */
+#define MAP_SHARED	0x001		/* Share changes */
+#define MAP_PRIVATE	0x002		/* Changes are private */
+#define MAP_SHARED_VALIDATE 0x003	/* share + validate extension flags */
+#define MAP_TYPE	0x00f		/* Mask for type of mapping */
+#define MAP_FIXED	0x010		/* Interpret addr exactly */
+
+/* not used by linux, but here to make sure we don't clash with ABI defines */
+#define MAP_RENAME	0x020		/* Assign page to file */
+#define MAP_AUTOGROW	0x040		/* File may grow by writing */
+#define MAP_LOCAL	0x080		/* Copy on fork/sproc */
+#define MAP_AUTORSRV	0x100		/* Logical swap reserved on demand */
+
+/* These are linux-specific */
+#define MAP_NORESERVE	0x0400		/* don't check for reservations */
+#define MAP_ANONYMOUS	0x0800		/* don't use a file */
+#define MAP_GROWSDOWN	0x1000		/* stack-like segment */
+#define MAP_DENYWRITE	0x2000		/* ETXTBSY */
+#define MAP_EXECUTABLE	0x4000		/* mark it as an executable */
+#define MAP_LOCKED	0x8000		/* pages are locked */
+#define MAP_POPULATE	0x10000		/* populate (prefault) pagetables */
+#define MAP_NONBLOCK	0x20000		/* do not block on IO */
+#define MAP_STACK	0x40000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x80000		/* create a huge page mapping */
+#define MAP_FIXED_NOREPLACE 0x100000	/* MAP_FIXED which doesn't unmap underlying mapping */
+
+/*
+ * Flags for msync
+ */
+#define MS_ASYNC	0x0001		/* sync memory asynchronously */
+#define MS_INVALIDATE	0x0002		/* invalidate mappings & caches */
+#define MS_SYNC		0x0004		/* synchronous memory sync */
+
+/*
+ * Flags for mlockall
+ */
+#define MCL_CURRENT	1		/* lock all current mappings */
+#define MCL_FUTURE	2		/* lock all future mappings */
+#define MCL_ONFAULT	4		/* lock all pages that are faulted in */
+
+/*
+ * Flags for mlock
+ */
+#define MLOCK_ONFAULT	0x01		/* Lock pages in range after they are faulted in, do not prefault */
+
+#define MADV_NORMAL	0		/* no further special treatment */
+#define MADV_RANDOM	1		/* expect random page references */
+#define MADV_SEQUENTIAL 2		/* expect sequential page references */
+#define MADV_WILLNEED	3		/* will need these pages */
+#define MADV_DONTNEED	4		/* don't need these pages */
+
+/* common parameters: try to keep these consistent across architectures */
+#define MADV_FREE	8		/* free pages only if memory pressure */
+#define MADV_REMOVE	9		/* remove these pages & resources */
+#define MADV_DONTFORK	10		/* don't inherit across fork */
+#define MADV_DOFORK	11		/* do inherit across fork */
+
+#define MADV_MERGEABLE	 12		/* KSM may merge identical pages */
+#define MADV_UNMERGEABLE 13		/* KSM may not merge identical pages */
+#define MADV_HWPOISON	 100		/* poison a page for testing */
+
+#define MADV_HUGEPAGE	14		/* Worth backing with hugepages */
+#define MADV_NOHUGEPAGE 15		/* Not worth backing with hugepages */
+
+#define MADV_DONTDUMP	16		/* Explicity exclude from the core dump,
+					   overrides the coredump filter bits */
+#define MADV_DODUMP	17		/* Clear the MADV_NODUMP flag */
+
+#define MADV_WIPEONFORK 18		/* Zero memory on fork, child only */
+#define MADV_KEEPONFORK 19		/* Undo MADV_WIPEONFORK */
+
+/* compatibility flags */
+#define MAP_FILE	0
+
+#define PKEY_DISABLE_ACCESS	0x1
+#define PKEY_DISABLE_WRITE	0x2
+#define PKEY_ACCESS_MASK	(PKEY_DISABLE_ACCESS |\
+				 PKEY_DISABLE_WRITE)
+
+#endif /* _ASM_MMAN_H */
diff --git a/linux-headers/asm-powerpc/mman.h b/linux-headers/asm-powerpc/mman.h
new file mode 100644
index 0000000000..1c2b3fca05
--- /dev/null
+++ b/linux-headers/asm-powerpc/mman.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _ASM_POWERPC_MMAN_H
+#define _ASM_POWERPC_MMAN_H
+
+#include <asm-generic/mman-common.h>
+
+
+#define PROT_SAO	0x10		/* Strong Access Ordering */
+
+#define MAP_RENAME      MAP_ANONYMOUS   /* In SunOS terminology */
+#define MAP_NORESERVE   0x40            /* don't reserve swap pages */
+#define MAP_LOCKED	0x80
+
+#define MAP_GROWSDOWN	0x0100		/* stack-like segment */
+#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
+#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
+
+#define MCL_CURRENT     0x2000          /* lock all currently mapped pages */
+#define MCL_FUTURE      0x4000          /* lock all additions to address space */
+#define MCL_ONFAULT	0x8000		/* lock all pages that are faulted in */
+
+#define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
+#define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
+
+/* Override any generic PKEY permission defines */
+#define PKEY_DISABLE_EXECUTE   0x4
+#undef PKEY_ACCESS_MASK
+#define PKEY_ACCESS_MASK       (PKEY_DISABLE_ACCESS |\
+				PKEY_DISABLE_WRITE  |\
+				PKEY_DISABLE_EXECUTE)
+#endif /* _ASM_POWERPC_MMAN_H */
diff --git a/linux-headers/asm-s390/mman.h b/linux-headers/asm-s390/mman.h
new file mode 100644
index 0000000000..8eebf89f5a
--- /dev/null
+++ b/linux-headers/asm-s390/mman.h
@@ -0,0 +1 @@
+#include <asm-generic/mman.h>
diff --git a/linux-headers/asm-x86/mman.h b/linux-headers/asm-x86/mman.h
new file mode 100644
index 0000000000..d4a8d0424b
--- /dev/null
+++ b/linux-headers/asm-x86/mman.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_X86_MMAN_H
+#define _ASM_X86_MMAN_H
+
+#define MAP_32BIT	0x40		/* only give out 32bit addresses */
+
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+/*
+ * Take the 4 protection key bits out of the vma->vm_flags
+ * value and turn them in to the bits that we can put in
+ * to a pte.
+ *
+ * Only override these if Protection Keys are available
+ * (which is only on 64-bit).
+ */
+#define arch_vm_get_page_prot(vm_flags)	__pgprot(	\
+		((vm_flags) & VM_PKEY_BIT0 ? _PAGE_PKEY_BIT0 : 0) |	\
+		((vm_flags) & VM_PKEY_BIT1 ? _PAGE_PKEY_BIT1 : 0) |	\
+		((vm_flags) & VM_PKEY_BIT2 ? _PAGE_PKEY_BIT2 : 0) |	\
+		((vm_flags) & VM_PKEY_BIT3 ? _PAGE_PKEY_BIT3 : 0))
+
+#define arch_calc_vm_prot_bits(prot, key) (		\
+		((key) & 0x1 ? VM_PKEY_BIT0 : 0) |      \
+		((key) & 0x2 ? VM_PKEY_BIT1 : 0) |      \
+		((key) & 0x4 ? VM_PKEY_BIT2 : 0) |      \
+		((key) & 0x8 ? VM_PKEY_BIT3 : 0))
+#endif
+
+#include <asm-generic/mman.h>
+
+#endif /* _ASM_X86_MMAN_H */
diff --git a/linux-headers/linux/mman.h b/linux-headers/linux/mman.h
new file mode 100644
index 0000000000..3c44b6f480
--- /dev/null
+++ b/linux-headers/linux/mman.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _LINUX_MMAN_H
+#define _LINUX_MMAN_H
+
+#include <asm/mman.h>
+#include <asm-generic/hugetlb_encode.h>
+
+#define MREMAP_MAYMOVE	1
+#define MREMAP_FIXED	2
+
+#define OVERCOMMIT_GUESS		0
+#define OVERCOMMIT_ALWAYS		1
+#define OVERCOMMIT_NEVER		2
+
+/*
+ * Huge page size encoding when MAP_HUGETLB is specified, and a huge page
+ * size other than the default is desired.  See hugetlb_encode.h.
+ * All known huge page size encodings are provided here.  It is the
+ * responsibility of the application to know which sizes are supported on
+ * the running system.  See mmap(2) man page for details.
+ */
+#define MAP_HUGE_SHIFT	HUGETLB_FLAG_ENCODE_SHIFT
+#define MAP_HUGE_MASK	HUGETLB_FLAG_ENCODE_MASK
+
+#define MAP_HUGE_64KB	HUGETLB_FLAG_ENCODE_64KB
+#define MAP_HUGE_512KB	HUGETLB_FLAG_ENCODE_512KB
+#define MAP_HUGE_1MB	HUGETLB_FLAG_ENCODE_1MB
+#define MAP_HUGE_2MB	HUGETLB_FLAG_ENCODE_2MB
+#define MAP_HUGE_8MB	HUGETLB_FLAG_ENCODE_8MB
+#define MAP_HUGE_16MB	HUGETLB_FLAG_ENCODE_16MB
+#define MAP_HUGE_32MB	HUGETLB_FLAG_ENCODE_32MB
+#define MAP_HUGE_256MB	HUGETLB_FLAG_ENCODE_256MB
+#define MAP_HUGE_512MB	HUGETLB_FLAG_ENCODE_512MB
+#define MAP_HUGE_1GB	HUGETLB_FLAG_ENCODE_1GB
+#define MAP_HUGE_2GB	HUGETLB_FLAG_ENCODE_2GB
+#define MAP_HUGE_16GB	HUGETLB_FLAG_ENCODE_16GB
+
+#endif /* _LINUX_MMAN_H */
diff --git a/linux-user/main.c b/linux-user/main.c
index 17387166ab..3d2230320b 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -662,7 +662,7 @@ int main(int argc, char **argv, char **envp)
     if (cpu_model == NULL) {
         cpu_model = cpu_get_model(get_elf_eflags(execfd));
     }
-    cpu_type = parse_cpu_model(cpu_model);
+    cpu_type = parse_cpu_option(cpu_model);
 
     /* init tcg before creating CPUs and to get qemu_host_page_size */
     tcg_exec_init(0);
diff --git a/pc-bios/s390-ccw.img b/pc-bios/s390-ccw.img
index 450a076dc0..ba054828d3 100644
--- a/pc-bios/s390-ccw.img
+++ b/pc-bios/s390-ccw.img
Binary files differdiff --git a/pc-bios/s390-ccw/Makefile b/pc-bios/s390-ccw/Makefile
index 1eb316b02f..a048b6b077 100644
--- a/pc-bios/s390-ccw/Makefile
+++ b/pc-bios/s390-ccw/Makefile
@@ -10,7 +10,7 @@ $(call set-vpath, $(SRC_PATH)/pc-bios/s390-ccw)
 .PHONY : all clean build-all
 
 OBJECTS = start.o main.o bootmap.o jump2ipl.o sclp.o menu.o \
-	  virtio.o virtio-scsi.o virtio-blkdev.o libc.o
+	  virtio.o virtio-scsi.o virtio-blkdev.o libc.o cio.o dasd-ipl.o
 
 QEMU_CFLAGS := $(filter -W%, $(QEMU_CFLAGS))
 QEMU_CFLAGS += -ffreestanding -fno-delete-null-pointer-checks -msoft-float
diff --git a/pc-bios/s390-ccw/cio.c b/pc-bios/s390-ccw/cio.c
new file mode 100644
index 0000000000..339ec5fbe7
--- /dev/null
+++ b/pc-bios/s390-ccw/cio.c
@@ -0,0 +1,423 @@
+/*
+ * S390 Channel I/O
+ *
+ * Copyright (c) 2013 Alexander Graf <agraf@suse.de>
+ * Copyright (c) 2019 IBM Corp.
+ *
+ * Author(s): Jason J. Herne <jjherne@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include "libc.h"
+#include "s390-ccw.h"
+#include "s390-arch.h"
+#include "helper.h"
+#include "cio.h"
+
+static char chsc_page[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
+
+static int __do_cio(SubChannelId schid, uint32_t ccw_addr, int fmt, Irb *irb);
+
+int enable_mss_facility(void)
+{
+    int ret;
+    ChscAreaSda *sda_area = (ChscAreaSda *) chsc_page;
+
+    memset(sda_area, 0, PAGE_SIZE);
+    sda_area->request.length = 0x0400;
+    sda_area->request.code = 0x0031;
+    sda_area->operation_code = 0x2;
+
+    ret = chsc(sda_area);
+    if ((ret == 0) && (sda_area->response.code == 0x0001)) {
+        return 0;
+    }
+    return -EIO;
+}
+
+void enable_subchannel(SubChannelId schid)
+{
+    Schib schib;
+
+    stsch_err(schid, &schib);
+    schib.pmcw.ena = 1;
+    msch(schid, &schib);
+}
+
+uint16_t cu_type(SubChannelId schid)
+{
+    Ccw1 sense_id_ccw;
+    SenseId sense_data;
+
+    sense_id_ccw.cmd_code = CCW_CMD_SENSE_ID;
+    sense_id_ccw.cda = ptr2u32(&sense_data);
+    sense_id_ccw.count = sizeof(sense_data);
+    sense_id_ccw.flags |= CCW_FLAG_SLI;
+
+    if (do_cio(schid, CU_TYPE_UNKNOWN, ptr2u32(&sense_id_ccw), CCW_FMT1)) {
+        panic("Failed to run SenseID CCw\n");
+    }
+
+    return sense_data.cu_type;
+}
+
+int basic_sense(SubChannelId schid, uint16_t cutype, void *sense_data,
+                 uint16_t data_size)
+{
+    Ccw1 senseCcw;
+    Irb irb;
+
+    senseCcw.cmd_code = CCW_CMD_BASIC_SENSE;
+    senseCcw.cda = ptr2u32(sense_data);
+    senseCcw.count = data_size;
+
+    return __do_cio(schid, ptr2u32(&senseCcw), CCW_FMT1, &irb);
+}
+
+static bool irb_error(Irb *irb)
+{
+    if (irb->scsw.cstat) {
+        return true;
+    }
+    return irb->scsw.dstat != (SCSW_DSTAT_DEVEND | SCSW_DSTAT_CHEND);
+}
+
+static void print_eckd_dasd_sense_data(SenseDataEckdDasd *sd)
+{
+    char msgline[512];
+
+    if (sd->config_info & 0x8000) {
+        sclp_print("Eckd Dasd Sense Data (fmt 24-bytes):\n");
+    } else {
+        sclp_print("Eckd Dasd Sense Data (fmt 32-bytes):\n");
+    }
+
+    strcat(msgline, "    Sense Condition Flags :");
+    if (sd->common_status & SNS_STAT0_CMD_REJECT) {
+        strcat(msgline, " [Cmd-Reject]");
+    }
+    if (sd->common_status & SNS_STAT0_INTERVENTION_REQ) {
+        strcat(msgline, " [Intervention-Required]");
+    }
+    if (sd->common_status & SNS_STAT0_BUS_OUT_CHECK) {
+        strcat(msgline, " [Bus-Out-Parity-Check]");
+    }
+    if (sd->common_status & SNS_STAT0_EQUIPMENT_CHECK) {
+        strcat(msgline, " [Equipment-Check]");
+    }
+    if (sd->common_status & SNS_STAT0_DATA_CHECK) {
+        strcat(msgline, " [Data-Check]");
+    }
+    if (sd->common_status & SNS_STAT0_OVERRUN) {
+        strcat(msgline, " [Overrun]");
+    }
+    if (sd->common_status & SNS_STAT0_INCOMPL_DOMAIN) {
+        strcat(msgline, " [Incomplete-Domain]");
+    }
+
+    if (sd->status[0] & SNS_STAT1_PERM_ERR) {
+        strcat(msgline, " [Permanent-Error]");
+    }
+    if (sd->status[0] & SNS_STAT1_INV_TRACK_FORMAT) {
+        strcat(msgline, " [Invalid-Track-Fmt]");
+    }
+    if (sd->status[0] & SNS_STAT1_EOC) {
+        strcat(msgline, " [End-of-Cyl]");
+    }
+    if (sd->status[0] & SNS_STAT1_MESSAGE_TO_OPER) {
+        strcat(msgline, " [Operator-Msg]");
+    }
+    if (sd->status[0] & SNS_STAT1_NO_REC_FOUND) {
+        strcat(msgline, " [No-Record-Found]");
+    }
+    if (sd->status[0] & SNS_STAT1_FILE_PROTECTED) {
+        strcat(msgline, " [File-Protected]");
+    }
+    if (sd->status[0] & SNS_STAT1_WRITE_INHIBITED) {
+        strcat(msgline, " [Write-Inhibited]");
+    }
+    if (sd->status[0] & SNS_STAT1_IMPRECISE_END) {
+        strcat(msgline, " [Imprecise-Ending]");
+    }
+
+    if (sd->status[1] & SNS_STAT2_REQ_INH_WRITE) {
+        strcat(msgline, " [Req-Inhibit-Write]");
+    }
+    if (sd->status[1] & SNS_STAT2_CORRECTABLE) {
+        strcat(msgline, " [Correctable-Data-Check]");
+    }
+    if (sd->status[1] & SNS_STAT2_FIRST_LOG_ERR) {
+        strcat(msgline, " [First-Error-Log]");
+    }
+    if (sd->status[1] & SNS_STAT2_ENV_DATA_PRESENT) {
+        strcat(msgline, " [Env-Data-Present]");
+    }
+    if (sd->status[1] & SNS_STAT2_IMPRECISE_END) {
+        strcat(msgline, " [Imprecise-End]");
+    }
+    strcat(msgline, "\n");
+    sclp_print(msgline);
+
+    print_int("    Residual Count     =", sd->res_count);
+    print_int("    Phys Drive ID      =", sd->phys_drive_id);
+    print_int("    low cyl address    =", sd->low_cyl_addr);
+    print_int("    head addr & hi cyl =", sd->head_high_cyl_addr);
+    print_int("    format/message     =", sd->fmt_msg);
+    print_int("    fmt-dependent[0-7] =", sd->fmt_dependent_info[0]);
+    print_int("    fmt-dependent[8-15]=", sd->fmt_dependent_info[1]);
+    print_int("    prog action code   =", sd->program_action_code);
+    print_int("    Configuration info =", sd->config_info);
+    print_int("    mcode / hi-cyl     =", sd->mcode_hicyl);
+    print_int("    cyl & head addr [0]=", sd->cyl_head_addr[0]);
+    print_int("    cyl & head addr [1]=", sd->cyl_head_addr[1]);
+    print_int("    cyl & head addr [2]=", sd->cyl_head_addr[2]);
+}
+
+static void print_irb_err(Irb *irb)
+{
+    uint64_t this_ccw = *(uint64_t *)u32toptr(irb->scsw.cpa);
+    uint64_t prev_ccw = *(uint64_t *)u32toptr(irb->scsw.cpa - 8);
+    char msgline[256];
+
+    sclp_print("Interrupt Response Block Data:\n");
+
+    strcat(msgline, "    Function Ctrl :");
+    if (irb->scsw.ctrl & SCSW_FCTL_START_FUNC) {
+        strcat(msgline, " [Start]");
+    }
+    if (irb->scsw.ctrl & SCSW_FCTL_HALT_FUNC) {
+        strcat(msgline, " [Halt]");
+    }
+    if (irb->scsw.ctrl & SCSW_FCTL_CLEAR_FUNC) {
+        strcat(msgline, " [Clear]");
+    }
+    strcat(msgline, "\n");
+    sclp_print(msgline);
+
+    msgline[0] = '\0';
+    strcat(msgline, "    Activity Ctrl :");
+    if (irb->scsw.ctrl & SCSW_ACTL_RESUME_PEND) {
+        strcat(msgline, " [Resume-Pending]");
+    }
+    if (irb->scsw.ctrl & SCSW_ACTL_START_PEND) {
+        strcat(msgline, " [Start-Pending]");
+    }
+    if (irb->scsw.ctrl & SCSW_ACTL_HALT_PEND) {
+        strcat(msgline, " [Halt-Pending]");
+    }
+    if (irb->scsw.ctrl & SCSW_ACTL_CLEAR_PEND) {
+        strcat(msgline, " [Clear-Pending]");
+    }
+    if (irb->scsw.ctrl & SCSW_ACTL_CH_ACTIVE) {
+        strcat(msgline, " [Channel-Active]");
+    }
+    if (irb->scsw.ctrl & SCSW_ACTL_DEV_ACTIVE) {
+        strcat(msgline, " [Device-Active]");
+    }
+    if (irb->scsw.ctrl & SCSW_ACTL_SUSPENDED) {
+        strcat(msgline, " [Suspended]");
+    }
+    strcat(msgline, "\n");
+    sclp_print(msgline);
+
+    msgline[0] = '\0';
+    strcat(msgline, "    Status Ctrl :");
+    if (irb->scsw.ctrl & SCSW_SCTL_ALERT) {
+        strcat(msgline, " [Alert]");
+    }
+    if (irb->scsw.ctrl & SCSW_SCTL_INTERMED) {
+        strcat(msgline, " [Intermediate]");
+    }
+    if (irb->scsw.ctrl & SCSW_SCTL_PRIMARY) {
+        strcat(msgline, " [Primary]");
+    }
+    if (irb->scsw.ctrl & SCSW_SCTL_SECONDARY) {
+        strcat(msgline, " [Secondary]");
+    }
+    if (irb->scsw.ctrl & SCSW_SCTL_STATUS_PEND) {
+        strcat(msgline, " [Status-Pending]");
+    }
+
+    strcat(msgline, "\n");
+    sclp_print(msgline);
+
+    msgline[0] = '\0';
+    strcat(msgline, "    Device Status :");
+    if (irb->scsw.dstat & SCSW_DSTAT_ATTN) {
+        strcat(msgline, " [Attention]");
+    }
+    if (irb->scsw.dstat & SCSW_DSTAT_STATMOD) {
+        strcat(msgline, " [Status-Modifier]");
+    }
+    if (irb->scsw.dstat & SCSW_DSTAT_CUEND) {
+        strcat(msgline, " [Ctrl-Unit-End]");
+    }
+    if (irb->scsw.dstat & SCSW_DSTAT_BUSY) {
+        strcat(msgline, " [Busy]");
+    }
+    if (irb->scsw.dstat & SCSW_DSTAT_CHEND) {
+        strcat(msgline, " [Channel-End]");
+    }
+    if (irb->scsw.dstat & SCSW_DSTAT_DEVEND) {
+        strcat(msgline, " [Device-End]");
+    }
+    if (irb->scsw.dstat & SCSW_DSTAT_UCHK) {
+        strcat(msgline, " [Unit-Check]");
+    }
+    if (irb->scsw.dstat & SCSW_DSTAT_UEXCP) {
+        strcat(msgline, " [Unit-Exception]");
+    }
+    strcat(msgline, "\n");
+    sclp_print(msgline);
+
+    msgline[0] = '\0';
+    strcat(msgline, "    Channel Status :");
+    if (irb->scsw.cstat & SCSW_CSTAT_PCINT) {
+        strcat(msgline, " [Program-Ctrl-Interruption]");
+    }
+    if (irb->scsw.cstat & SCSW_CSTAT_BADLEN) {
+        strcat(msgline, " [Incorrect-Length]");
+    }
+    if (irb->scsw.cstat & SCSW_CSTAT_PROGCHK) {
+        strcat(msgline, " [Program-Check]");
+    }
+    if (irb->scsw.cstat & SCSW_CSTAT_PROTCHK) {
+        strcat(msgline, " [Protection-Check]");
+    }
+    if (irb->scsw.cstat & SCSW_CSTAT_CHDCHK) {
+        strcat(msgline, " [Channel-Data-Check]");
+    }
+    if (irb->scsw.cstat & SCSW_CSTAT_CHCCHK) {
+        strcat(msgline, " [Channel-Ctrl-Check]");
+    }
+    if (irb->scsw.cstat & SCSW_CSTAT_ICCHK) {
+        strcat(msgline, " [Interface-Ctrl-Check]");
+    }
+    if (irb->scsw.cstat & SCSW_CSTAT_CHAINCHK) {
+        strcat(msgline, " [Chaining-Check]");
+    }
+    strcat(msgline, "\n");
+    sclp_print(msgline);
+
+    print_int("    cpa=", irb->scsw.cpa);
+    print_int("    prev_ccw=", prev_ccw);
+    print_int("    this_ccw=", this_ccw);
+}
+
+/*
+ * Handles executing ssch, tsch and returns the irb obtained from tsch.
+ * Returns 0 on success, -1 if unexpected status pending and we need to retry,
+ * otherwise returns condition code from ssch/tsch for error cases.
+ */
+static int __do_cio(SubChannelId schid, uint32_t ccw_addr, int fmt, Irb *irb)
+{
+    CmdOrb orb = {};
+    int rc;
+
+    IPL_assert(fmt == 0 || fmt == 1, "Invalid ccw format");
+
+    /* ccw_addr must be <= 24 bits and point to at least one whole ccw. */
+    if (fmt == 0) {
+        IPL_assert(ccw_addr <= 0xFFFFFF - 8, "Invalid ccw address");
+    }
+
+    orb.fmt = fmt;
+    orb.pfch = 1;  /* QEMU's cio implementation requires prefetch */
+    orb.c64 = 1;   /* QEMU's cio implementation requires 64-bit idaws */
+    orb.lpm = 0xFF; /* All paths allowed */
+    orb.cpa = ccw_addr;
+
+    rc = ssch(schid, &orb);
+    if (rc == 1 || rc == 2) {
+        /* Subchannel status pending or busy. Eat status and ask for retry. */
+        tsch(schid, irb);
+        return -1;
+    }
+    if (rc) {
+        print_int("ssch failed with cc=", rc);
+        return rc;
+    }
+
+    consume_io_int();
+
+    /* collect status */
+    rc = tsch(schid, irb);
+    if (rc) {
+        print_int("tsch failed with cc=", rc);
+    }
+
+    return rc;
+}
+
+/*
+ * Executes a channel program at a given subchannel. The request to run the
+ * channel program is sent to the subchannel, we then wait for the interrupt
+ * signaling completion of the I/O operation(s) performed by the channel
+ * program. Lastly we verify that the i/o operation completed without error and
+ * that the interrupt we received was for the subchannel used to run the
+ * channel program.
+ *
+ * Note: This function assumes it is running in an environment where no other
+ * cpus are generating or receiving I/O interrupts. So either run it in a
+ * single-cpu environment or make sure all other cpus are not doing I/O and
+ * have I/O interrupts masked off. We also assume that only one device is
+ * active (generating i/o interrupts).
+ *
+ * Returns non-zero on error.
+ */
+int do_cio(SubChannelId schid, uint16_t cutype, uint32_t ccw_addr, int fmt)
+{
+    Irb irb = {};
+    SenseDataEckdDasd sd;
+    int rc, retries = 0;
+
+    while (true) {
+        rc = __do_cio(schid, ccw_addr, fmt, &irb);
+
+        if (rc == -1) {
+            retries++;
+            continue;
+        }
+        if (rc) {
+            /* ssch/tsch error. Message already reported by __do_cio */
+            break;
+        }
+
+        if (!irb_error(&irb)) {
+            break;
+        }
+
+        /*
+         * Unexpected unit check, or interface-control-check. Use sense to
+         * clear (unit check only) then retry.
+         */
+        if ((unit_check(&irb) || iface_ctrl_check(&irb)) && retries <= 2) {
+            if (unit_check(&irb)) {
+                basic_sense(schid, cutype, &sd, sizeof(sd));
+            }
+            retries++;
+            continue;
+        }
+
+        sclp_print("cio device error\n");
+        print_int("  ssid  ", schid.ssid);
+        print_int("  cssid ", schid.cssid);
+        print_int("  sch_no", schid.sch_no);
+        print_int("  ctrl-unit type", cutype);
+        sclp_print("\n");
+        print_irb_err(&irb);
+        if (cutype == CU_TYPE_DASD_3990 || cutype == CU_TYPE_DASD_2107 ||
+            cutype == CU_TYPE_UNKNOWN) {
+            if (!basic_sense(schid, cutype, &sd, sizeof(sd))) {
+                print_eckd_dasd_sense_data(&sd);
+            }
+        }
+        rc = -1;
+        break;
+    }
+
+    return rc;
+}
diff --git a/pc-bios/s390-ccw/cio.h b/pc-bios/s390-ccw/cio.h
index 1a0795f645..aaa432dedd 100644
--- a/pc-bios/s390-ccw/cio.h
+++ b/pc-bios/s390-ccw/cio.h
@@ -17,35 +17,35 @@
  * path management control word
  */
 struct pmcw {
-    __u32 intparm;        /* interruption parameter */
-    __u32 qf      : 1;    /* qdio facility */
-    __u32 w       : 1;
-    __u32 isc     : 3;    /* interruption sublass */
-    __u32 res5    : 3;    /* reserved zeros */
-    __u32 ena     : 1;    /* enabled */
-    __u32 lm      : 2;    /* limit mode */
-    __u32 mme     : 2;    /* measurement-mode enable */
-    __u32 mp      : 1;    /* multipath mode */
-    __u32 tf      : 1;    /* timing facility */
-    __u32 dnv     : 1;    /* device number valid */
-    __u32 dev     : 16;   /* device number */
-    __u8  lpm;            /* logical path mask */
-    __u8  pnom;           /* path not operational mask */
-    __u8  lpum;           /* last path used mask */
-    __u8  pim;            /* path installed mask */
-    __u16 mbi;            /* measurement-block index */
-    __u8  pom;            /* path operational mask */
-    __u8  pam;            /* path available mask */
-    __u8  chpid[8];       /* CHPID 0-7 (if available) */
-    __u32 unused1 : 8;    /* reserved zeros */
-    __u32 st      : 3;    /* subchannel type */
-    __u32 unused2 : 18;   /* reserved zeros */
-    __u32 mbfc    : 1;    /* measurement block format control */
-    __u32 xmwme   : 1;    /* extended measurement word mode enable */
-    __u32 csense  : 1;    /* concurrent sense; can be enabled ...*/
-                /*  ... per MSCH, however, if facility */
-                /*  ... is not installed, this results */
-                /*  ... in an operand exception.       */
+    __u32 intparm;      /* interruption parameter */
+    __u32 qf:1;         /* qdio facility */
+    __u32 w:1;
+    __u32 isc:3;        /* interruption sublass */
+    __u32 res5:3;       /* reserved zeros */
+    __u32 ena:1;        /* enabled */
+    __u32 lm:2;         /* limit mode */
+    __u32 mme:2;        /* measurement-mode enable */
+    __u32 mp:1;         /* multipath mode */
+    __u32 tf:1;         /* timing facility */
+    __u32 dnv:1;        /* device number valid */
+    __u32 dev:16;       /* device number */
+    __u8  lpm;          /* logical path mask */
+    __u8  pnom;         /* path not operational mask */
+    __u8  lpum;         /* last path used mask */
+    __u8  pim;          /* path installed mask */
+    __u16 mbi;          /* measurement-block index */
+    __u8  pom;          /* path operational mask */
+    __u8  pam;          /* path available mask */
+    __u8  chpid[8];     /* CHPID 0-7 (if available) */
+    __u32 unused1:8;    /* reserved zeros */
+    __u32 st:3;         /* subchannel type */
+    __u32 unused2:18;   /* reserved zeros */
+    __u32 mbfc:1;       /* measurement block format control */
+    __u32 xmwme:1;      /* extended measurement word mode enable */
+    __u32 csense:1;     /* concurrent sense; can be enabled ...*/
+                        /*  ... per MSCH, however, if facility */
+                        /*  ... is not installed, this results */
+                        /*  ... in an operand exception.       */
 } __attribute__ ((packed));
 
 /* Target SCHIB configuration. */
@@ -70,35 +70,72 @@ struct scsw {
     __u16 count;
 } __attribute__ ((packed));
 
-#define SCSW_FCTL_CLEAR_FUNC 0x1000
-#define SCSW_FCTL_HALT_FUNC 0x2000
+/* Function Control */
 #define SCSW_FCTL_START_FUNC 0x4000
+#define SCSW_FCTL_HALT_FUNC 0x2000
+#define SCSW_FCTL_CLEAR_FUNC 0x1000
+
+/* Activity Control */
+#define SCSW_ACTL_RESUME_PEND   0x0800
+#define SCSW_ACTL_START_PEND    0x0400
+#define SCSW_ACTL_HALT_PEND     0x0200
+#define SCSW_ACTL_CLEAR_PEND    0x0100
+#define SCSW_ACTL_CH_ACTIVE     0x0080
+#define SCSW_ACTL_DEV_ACTIVE    0x0040
+#define SCSW_ACTL_SUSPENDED     0x0020
+
+/* Status Control */
+#define SCSW_SCTL_ALERT         0x0010
+#define SCSW_SCTL_INTERMED      0x0008
+#define SCSW_SCTL_PRIMARY       0x0004
+#define SCSW_SCTL_SECONDARY     0x0002
+#define SCSW_SCTL_STATUS_PEND   0x0001
+
+/* SCSW Device Status Flags */
+#define SCSW_DSTAT_ATTN     0x80
+#define SCSW_DSTAT_STATMOD  0x40
+#define SCSW_DSTAT_CUEND    0x20
+#define SCSW_DSTAT_BUSY     0x10
+#define SCSW_DSTAT_CHEND    0x08
+#define SCSW_DSTAT_DEVEND   0x04
+#define SCSW_DSTAT_UCHK     0x02
+#define SCSW_DSTAT_UEXCP    0x01
+
+/* SCSW Subchannel Status Flags */
+#define SCSW_CSTAT_PCINT    0x80
+#define SCSW_CSTAT_BADLEN   0x40
+#define SCSW_CSTAT_PROGCHK  0x20
+#define SCSW_CSTAT_PROTCHK  0x10
+#define SCSW_CSTAT_CHDCHK   0x08
+#define SCSW_CSTAT_CHCCHK   0x04
+#define SCSW_CSTAT_ICCHK    0x02
+#define SCSW_CSTAT_CHAINCHK 0x01
 
 /*
  * subchannel information block
  */
-struct schib {
+typedef struct schib {
     struct pmcw pmcw;     /* path management control word */
     struct scsw scsw;     /* subchannel status word */
     __u64 mba;            /* measurement block address */
     __u8 mda[4];          /* model dependent area */
-} __attribute__ ((packed,aligned(4)));
-
-struct subchannel_id {
-        __u32 cssid  : 8;
-        __u32        : 4;
-        __u32 m      : 1;
-        __u32 ssid   : 2;
-        __u32 one    : 1;
-        __u32 sch_no : 16;
-} __attribute__ ((packed, aligned(4)));
+} __attribute__ ((packed, aligned(4))) Schib;
+
+typedef struct subchannel_id {
+        __u32 cssid:8;
+        __u32:4;
+        __u32 m:1;
+        __u32 ssid:2;
+        __u32 one:1;
+        __u32 sch_no:16;
+} __attribute__ ((packed, aligned(4))) SubChannelId;
 
 struct chsc_header {
     __u16 length;
     __u16 code;
 } __attribute__((packed));
 
-struct chsc_area_sda {
+typedef struct chsc_area_sda {
     struct chsc_header request;
     __u8 reserved1:4;
     __u8 format:4;
@@ -111,29 +148,49 @@ struct chsc_area_sda {
     __u32 reserved5:4;
     __u32 format2:4;
     __u32 reserved6:24;
-} __attribute__((packed));
+} __attribute__((packed)) ChscAreaSda;
 
 /*
  * TPI info structure
  */
 struct tpi_info {
     struct subchannel_id schid;
-    __u32 intparm;         /* interruption parameter */
-    __u32 adapter_IO : 1;
-    __u32 reserved2  : 1;
-    __u32 isc        : 3;
-    __u32 reserved3  : 12;
-    __u32 int_type   : 3;
-    __u32 reserved4  : 12;
+    __u32 intparm;      /* interruption parameter */
+    __u32 adapter_IO:1;
+    __u32 reserved2:1;
+    __u32 isc:3;
+    __u32 reserved3:12;
+    __u32 int_type:3;
+    __u32 reserved4:12;
 } __attribute__ ((packed, aligned(4)));
 
-/* channel command word (type 1) */
-struct ccw1 {
+/* channel command word (format 0) */
+typedef struct ccw0 {
+    __u8 cmd_code;
+    __u32 cda:24;
+    __u32 chainData:1;
+    __u32 chain:1;
+    __u32 sli:1;
+    __u32 skip:1;
+    __u32 pci:1;
+    __u32 ida:1;
+    __u32 suspend:1;
+    __u32 mida:1;
+    __u8 reserved;
+    __u16 count;
+} __attribute__ ((packed, aligned(8))) Ccw0;
+
+/* channel command word (format 1) */
+typedef struct ccw1 {
     __u8 cmd_code;
     __u8 flags;
     __u16 count;
     __u32 cda;
-} __attribute__ ((packed, aligned(8)));
+} __attribute__ ((packed, aligned(8))) Ccw1;
+
+/* do_cio() CCW formats */
+#define CCW_FMT0                 0x00
+#define CCW_FMT1                 0x01
 
 #define CCW_FLAG_DC              0x80
 #define CCW_FLAG_CC              0x40
@@ -143,11 +200,14 @@ struct ccw1 {
 #define CCW_FLAG_IDA             0x04
 #define CCW_FLAG_SUSPEND         0x02
 
+/* Common CCW commands */
+#define CCW_CMD_READ_IPL         0x02
 #define CCW_CMD_NOOP             0x03
 #define CCW_CMD_BASIC_SENSE      0x04
 #define CCW_CMD_TIC              0x08
 #define CCW_CMD_SENSE_ID         0xe4
 
+/* Virtio CCW commands */
 #define CCW_CMD_SET_VQ           0x13
 #define CCW_CMD_VDEV_RESET       0x33
 #define CCW_CMD_READ_FEAT        0x12
@@ -159,10 +219,16 @@ struct ccw1 {
 #define CCW_CMD_SET_CONF_IND     0x53
 #define CCW_CMD_READ_VQ_CONF     0x32
 
+/* DASD CCW commands */
+#define CCW_CMD_DASD_READ             0x06
+#define CCW_CMD_DASD_SEEK             0x07
+#define CCW_CMD_DASD_SEARCH_ID_EQ     0x31
+#define CCW_CMD_DASD_READ_MT          0x86
+
 /*
  * Command-mode operation request block
  */
-struct cmd_orb {
+typedef struct cmd_orb {
     __u32 intparm;    /* interruption parameter */
     __u32 key:4;      /* flags, like key, suspend control, etc. */
     __u32 spnd:1;     /* suspend control */
@@ -182,7 +248,7 @@ struct cmd_orb {
     __u32 zero:6;     /* reserved zeros */
     __u32 orbx:1;     /* ORB extension control */
     __u32 cpa;    /* channel program address */
-}  __attribute__ ((packed, aligned(4)));
+}  __attribute__ ((packed, aligned(4))) CmdOrb;
 
 struct ciw {
     __u8 type;
@@ -190,10 +256,15 @@ struct ciw {
     __u16 count;
 };
 
+#define CU_TYPE_UNKNOWN         0x0000
+#define CU_TYPE_DASD_2107       0x2107
+#define CU_TYPE_VIRTIO          0x3832
+#define CU_TYPE_DASD_3990       0x3990
+
 /*
  * sense-id response buffer layout
  */
-struct senseid {
+typedef struct senseid {
     /* common part */
     __u8  reserved;   /* always 0x'FF' */
     __u16 cu_type;    /* control unit type */
@@ -203,15 +274,94 @@ struct senseid {
     __u8  unused;     /* padding byte */
     /* extended part */
     struct ciw ciw[62];
-}  __attribute__ ((packed, aligned(4)));
+}  __attribute__ ((packed, aligned(4))) SenseId;
+
+/*
+ * architected values for first sense byte - common_status. Bits 0-5 of this
+ * field are common to all device types.
+ */
+#define SNS_STAT0_CMD_REJECT         0x80
+#define SNS_STAT0_INTERVENTION_REQ   0x40
+#define SNS_STAT0_BUS_OUT_CHECK      0x20
+#define SNS_STAT0_EQUIPMENT_CHECK    0x10
+#define SNS_STAT0_DATA_CHECK         0x08
+#define SNS_STAT0_OVERRUN            0x04
+#define SNS_STAT0_INCOMPL_DOMAIN     0x01
+
+/* ECKD DASD status[0] byte */
+#define SNS_STAT1_PERM_ERR           0x80
+#define SNS_STAT1_INV_TRACK_FORMAT   0x40
+#define SNS_STAT1_EOC                0x20
+#define SNS_STAT1_MESSAGE_TO_OPER    0x10
+#define SNS_STAT1_NO_REC_FOUND       0x08
+#define SNS_STAT1_FILE_PROTECTED     0x04
+#define SNS_STAT1_WRITE_INHIBITED    0x02
+#define SNS_STAT1_IMPRECISE_END      0x01
+
+/* ECKD DASD status[1] byte */
+#define SNS_STAT2_REQ_INH_WRITE      0x80
+#define SNS_STAT2_CORRECTABLE        0x40
+#define SNS_STAT2_FIRST_LOG_ERR      0x20
+#define SNS_STAT2_ENV_DATA_PRESENT   0x10
+#define SNS_STAT2_IMPRECISE_END      0x04
+
+/* ECKD DASD 24-byte Sense fmt_msg codes */
+#define SENSE24_FMT_PROG_SYS    0x0
+#define SENSE24_FMT_EQUIPMENT   0x2
+#define SENSE24_FMT_CONTROLLER  0x3
+#define SENSE24_FMT_MISC        0xF
+
+/* basic sense response buffer layout */
+typedef struct SenseDataEckdDasd {
+    uint8_t common_status;
+    uint8_t status[2];
+    uint8_t res_count;
+    uint8_t phys_drive_id;
+    uint8_t low_cyl_addr;
+    uint8_t head_high_cyl_addr;
+    uint8_t fmt_msg;
+    uint64_t fmt_dependent_info[2];
+    uint8_t reserved;
+    uint8_t program_action_code;
+    uint16_t config_info;
+    uint8_t mcode_hicyl;
+    uint8_t cyl_head_addr[3];
+}  __attribute__ ((packed, aligned(4))) SenseDataEckdDasd;
+
+#define ECKD_SENSE24_GET_FMT(sd)     (sd->fmt_msg & 0xF0 >> 4)
+#define ECKD_SENSE24_GET_MSG(sd)     (sd->fmt_msg & 0x0F)
+
+#define unit_check(irb)         ((irb)->scsw.dstat & SCSW_DSTAT_UCHK)
+#define iface_ctrl_check(irb)   ((irb)->scsw.cstat & SCSW_CSTAT_ICCHK)
 
 /* interruption response block */
-struct irb {
+typedef struct irb {
     struct scsw scsw;
     __u32 esw[5];
     __u32 ecw[8];
     __u32 emw[8];
-}  __attribute__ ((packed, aligned(4)));
+}  __attribute__ ((packed, aligned(4))) Irb;
+
+/* Used for SEEK ccw commands */
+typedef struct CcwSeekData {
+    uint16_t reserved;
+    uint16_t cyl;
+    uint16_t head;
+} __attribute__((packed)) CcwSeekData;
+
+/* Used for SEARCH ID ccw commands */
+typedef struct CcwSearchIdData {
+    uint16_t cyl;
+    uint16_t head;
+    uint8_t record;
+} __attribute__((packed)) CcwSearchIdData;
+
+int enable_mss_facility(void);
+void enable_subchannel(SubChannelId schid);
+uint16_t cu_type(SubChannelId schid);
+int basic_sense(SubChannelId schid, uint16_t cutype, void *sense_data,
+                 uint16_t data_size);
+int do_cio(SubChannelId schid, uint16_t cutype, uint32_t ccw_addr, int fmt);
 
 /*
  * Some S390 specific IO instructions as inline
diff --git a/pc-bios/s390-ccw/dasd-ipl.c b/pc-bios/s390-ccw/dasd-ipl.c
new file mode 100644
index 0000000000..0fc879bb8e
--- /dev/null
+++ b/pc-bios/s390-ccw/dasd-ipl.c
@@ -0,0 +1,235 @@
+/*
+ * S390 IPL (boot) from a real DASD device via vfio framework.
+ *
+ * Copyright (c) 2019 Jason J. Herne <jjherne@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include "libc.h"
+#include "s390-ccw.h"
+#include "s390-arch.h"
+#include "dasd-ipl.h"
+#include "helper.h"
+
+static char prefix_page[PAGE_SIZE * 2]
+            __attribute__((__aligned__(PAGE_SIZE * 2)));
+
+static void enable_prefixing(void)
+{
+    memcpy(&prefix_page, lowcore, 4096);
+    set_prefix(ptr2u32(&prefix_page));
+}
+
+static void disable_prefixing(void)
+{
+    set_prefix(0);
+    /* Copy io interrupt info back to low core */
+    memcpy((void *)&lowcore->subchannel_id, prefix_page + 0xB8, 12);
+}
+
+static bool is_read_tic_ccw_chain(Ccw0 *ccw)
+{
+    Ccw0 *next_ccw = ccw + 1;
+
+    return ((ccw->cmd_code == CCW_CMD_DASD_READ ||
+            ccw->cmd_code == CCW_CMD_DASD_READ_MT) &&
+            ccw->chain && next_ccw->cmd_code == CCW_CMD_TIC);
+}
+
+static bool dynamic_cp_fixup(uint32_t ccw_addr, uint32_t  *next_cpa)
+{
+    Ccw0 *cur_ccw = (Ccw0 *)(uint64_t)ccw_addr;
+    Ccw0 *tic_ccw;
+
+    while (true) {
+        /* Skip over inline TIC (it might not have the chain bit on)  */
+        if (cur_ccw->cmd_code == CCW_CMD_TIC &&
+            cur_ccw->cda == ptr2u32(cur_ccw) - 8) {
+            cur_ccw += 1;
+            continue;
+        }
+
+        if (!cur_ccw->chain) {
+            break;
+        }
+        if (is_read_tic_ccw_chain(cur_ccw)) {
+            /*
+             * Breaking a chain of CCWs may alter the semantics or even the
+             * validity of a channel program. The heuristic implemented below
+             * seems to work well in practice for the channel programs
+             * generated by zipl.
+             */
+            tic_ccw = cur_ccw + 1;
+            *next_cpa = tic_ccw->cda;
+            cur_ccw->chain = 0;
+            return true;
+        }
+        cur_ccw += 1;
+    }
+    return false;
+}
+
+static int run_dynamic_ccw_program(SubChannelId schid, uint16_t cutype,
+                                   uint32_t cpa)
+{
+    bool has_next;
+    uint32_t next_cpa = 0;
+    int rc;
+
+    do {
+        has_next = dynamic_cp_fixup(cpa, &next_cpa);
+
+        print_int("executing ccw chain at ", cpa);
+        enable_prefixing();
+        rc = do_cio(schid, cutype, cpa, CCW_FMT0);
+        disable_prefixing();
+
+        if (rc) {
+            break;
+        }
+        cpa = next_cpa;
+    } while (has_next);
+
+    return rc;
+}
+
+static void make_readipl(void)
+{
+    Ccw0 *ccwIplRead = (Ccw0 *)0x00;
+
+    /* Create Read IPL ccw at address 0 */
+    ccwIplRead->cmd_code = CCW_CMD_READ_IPL;
+    ccwIplRead->cda = 0x00; /* Read into address 0x00 in main memory */
+    ccwIplRead->chain = 0; /* Chain flag */
+    ccwIplRead->count = 0x18; /* Read 0x18 bytes of data */
+}
+
+static void run_readipl(SubChannelId schid, uint16_t cutype)
+{
+    if (do_cio(schid, cutype, 0x00, CCW_FMT0)) {
+        panic("dasd-ipl: Failed to run Read IPL channel program\n");
+    }
+}
+
+/*
+ * The architecture states that IPL1 data should consist of a psw followed by
+ * format-0 READ and TIC CCWs. Let's sanity check.
+ */
+static void check_ipl1(void)
+{
+    Ccw0 *ccwread = (Ccw0 *)0x08;
+    Ccw0 *ccwtic = (Ccw0 *)0x10;
+
+    if (ccwread->cmd_code != CCW_CMD_DASD_READ ||
+        ccwtic->cmd_code != CCW_CMD_TIC) {
+        panic("dasd-ipl: IPL1 data invalid. Is this disk really bootable?\n");
+    }
+}
+
+static void check_ipl2(uint32_t ipl2_addr)
+{
+    Ccw0 *ccw = u32toptr(ipl2_addr);
+
+    if (ipl2_addr == 0x00) {
+        panic("IPL2 address invalid. Is this disk really bootable?\n");
+    }
+    if (ccw->cmd_code == 0x00) {
+        panic("IPL2 ccw data invalid. Is this disk really bootable?\n");
+    }
+}
+
+static uint32_t read_ipl2_addr(void)
+{
+    Ccw0 *ccwtic = (Ccw0 *)0x10;
+
+    return ccwtic->cda;
+}
+
+static void ipl1_fixup(void)
+{
+    Ccw0 *ccwSeek = (Ccw0 *) 0x08;
+    Ccw0 *ccwSearchID = (Ccw0 *) 0x10;
+    Ccw0 *ccwSearchTic = (Ccw0 *) 0x18;
+    Ccw0 *ccwRead = (Ccw0 *) 0x20;
+    CcwSeekData *seekData = (CcwSeekData *) 0x30;
+    CcwSearchIdData *searchData = (CcwSearchIdData *) 0x38;
+
+    /* move IPL1 CCWs to make room for CCWs needed to locate record 2 */
+    memcpy(ccwRead, (void *)0x08, 16);
+
+    /* Disable chaining so we don't TIC to IPL2 channel program */
+    ccwRead->chain = 0x00;
+
+    ccwSeek->cmd_code = CCW_CMD_DASD_SEEK;
+    ccwSeek->cda = ptr2u32(seekData);
+    ccwSeek->chain = 1;
+    ccwSeek->count = sizeof(*seekData);
+    seekData->reserved = 0x00;
+    seekData->cyl = 0x00;
+    seekData->head = 0x00;
+
+    ccwSearchID->cmd_code = CCW_CMD_DASD_SEARCH_ID_EQ;
+    ccwSearchID->cda = ptr2u32(searchData);
+    ccwSearchID->chain = 1;
+    ccwSearchID->count = sizeof(*searchData);
+    searchData->cyl = 0;
+    searchData->head = 0;
+    searchData->record = 2;
+
+    /* Go back to Search CCW if correct record not yet found */
+    ccwSearchTic->cmd_code = CCW_CMD_TIC;
+    ccwSearchTic->cda = ptr2u32(ccwSearchID);
+}
+
+static void run_ipl1(SubChannelId schid, uint16_t cutype)
+ {
+    uint32_t startAddr = 0x08;
+
+    if (do_cio(schid, cutype, startAddr, CCW_FMT0)) {
+        panic("dasd-ipl: Failed to run IPL1 channel program\n");
+    }
+}
+
+static void run_ipl2(SubChannelId schid, uint16_t cutype, uint32_t addr)
+{
+    if (run_dynamic_ccw_program(schid, cutype, addr)) {
+        panic("dasd-ipl: Failed to run IPL2 channel program\n");
+    }
+}
+
+/*
+ * Limitations in vfio-ccw support complicate the IPL process. Details can
+ * be found in docs/devel/s390-dasd-ipl.txt
+ */
+void dasd_ipl(SubChannelId schid, uint16_t cutype)
+{
+    PSWLegacy *pswl = (PSWLegacy *) 0x00;
+    uint32_t ipl2_addr;
+
+    /* Construct Read IPL CCW and run it to read IPL1 from boot disk */
+    make_readipl();
+    run_readipl(schid, cutype);
+    ipl2_addr = read_ipl2_addr();
+    check_ipl1();
+
+    /*
+     * Fixup IPL1 channel program to account for vfio-ccw limitations, then run
+     * it to read IPL2 channel program from boot disk.
+     */
+    ipl1_fixup();
+    run_ipl1(schid, cutype);
+    check_ipl2(ipl2_addr);
+
+    /*
+     * Run IPL2 channel program to read operating system code from boot disk
+     */
+    run_ipl2(schid, cutype, ipl2_addr);
+
+    /* Transfer control to the guest operating system */
+    pswl->mask |= PSW_MASK_EAMODE;   /* Force z-mode */
+    pswl->addr |= PSW_MASK_BAMODE;   /* ...          */
+    jump_to_low_kernel();
+}
diff --git a/pc-bios/s390-ccw/dasd-ipl.h b/pc-bios/s390-ccw/dasd-ipl.h
new file mode 100644
index 0000000000..c394828906
--- /dev/null
+++ b/pc-bios/s390-ccw/dasd-ipl.h
@@ -0,0 +1,16 @@
+/*
+ * S390 IPL (boot) from a real DASD device via vfio framework.
+ *
+ * Copyright (c) 2019 Jason J. Herne <jjherne@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef DASD_IPL_H
+#define DASD_IPL_H
+
+void dasd_ipl(SubChannelId schid, uint16_t cutype);
+
+#endif /* DASD_IPL_H */
diff --git a/pc-bios/s390-ccw/helper.h b/pc-bios/s390-ccw/helper.h
new file mode 100644
index 0000000000..78d5bc7442
--- /dev/null
+++ b/pc-bios/s390-ccw/helper.h
@@ -0,0 +1,31 @@
+/*
+ * Helper Functions
+ *
+ * Copyright (c) 2019 IBM Corp.
+ *
+ * Author(s): Jason J. Herne <jjherne@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef S390_CCW_HELPER_H
+#define S390_CCW_HELPER_H
+
+#include "s390-ccw.h"
+
+/* Avoids compiler warnings when casting a pointer to a u32 */
+static inline uint32_t ptr2u32(void *ptr)
+{
+    IPL_assert((uint64_t)ptr <= 0xffffffff, "ptr2u32: ptr too large");
+    return (uint32_t)(uint64_t)ptr;
+}
+
+/* Avoids compiler warnings when casting a u32 to a pointer */
+static inline void *u32toptr(uint32_t n)
+{
+    return (void *)(uint64_t)n;
+}
+
+#endif
diff --git a/pc-bios/s390-ccw/libc.h b/pc-bios/s390-ccw/libc.h
index 818517ff5d..bcdc45732d 100644
--- a/pc-bios/s390-ccw/libc.h
+++ b/pc-bios/s390-ccw/libc.h
@@ -67,6 +67,17 @@ static inline size_t strlen(const char *str)
     return i;
 }
 
+static inline char *strcat(char *dest, const char *src)
+{
+    int i;
+    char *dest_end = dest + strlen(dest);
+
+    for (i = 0; i <= strlen(src); i++) {
+        dest_end[i] = src[i];
+    }
+    return dest;
+}
+
 static inline int isdigit(int c)
 {
     return (c >= '0') && (c <= '9');
diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c
index 544851d672..a69c73349e 100644
--- a/pc-bios/s390-ccw/main.c
+++ b/pc-bios/s390-ccw/main.c
@@ -9,21 +9,27 @@
  */
 
 #include "libc.h"
+#include "s390-arch.h"
 #include "s390-ccw.h"
+#include "cio.h"
 #include "virtio.h"
+#include "dasd-ipl.h"
 
 char stack[PAGE_SIZE * 8] __attribute__((__aligned__(PAGE_SIZE)));
 static SubChannelId blk_schid = { .one = 1 };
-IplParameterBlock iplb __attribute__((__aligned__(PAGE_SIZE)));
 static char loadparm_str[LOADPARM_LEN + 1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
 QemuIplParameters qipl;
+IplParameterBlock iplb __attribute__((__aligned__(PAGE_SIZE)));
+static bool have_iplb;
+static uint16_t cutype;
+LowCore const *lowcore; /* Yes, this *is* a pointer to address 0 */
 
 #define LOADPARM_PROMPT "PROMPT  "
 #define LOADPARM_EMPTY  "        "
 #define BOOT_MENU_FLAG_MASK (QIPL_FLAG_BM_OPTS_CMD | QIPL_FLAG_BM_OPTS_ZIPL)
 
 /*
- * Priniciples of Operations (SA22-7832-09) chapter 17 requires that
+ * Principles of Operations (SA22-7832-09) chapter 17 requires that
  * a subsystem-identification is at 184-187 and bytes 188-191 are zero
  * after list-directed-IPL and ccw-IPL.
  */
@@ -48,29 +54,64 @@ unsigned int get_loadparm_index(void)
     return atoui(loadparm_str);
 }
 
-static bool find_dev(Schib *schib, int dev_no)
+/*
+ * Find the subchannel connected to the given device (dev_no) and fill in the
+ * subchannel information block (schib) with the connected subchannel's info.
+ * NOTE: The global variable blk_schid is updated to contain the subchannel
+ * information.
+ *
+ * If the caller gives dev_no=-1 then the user did not specify a boot device.
+ * In this case we'll just use the first potentially bootable device we find.
+ */
+static bool find_subch(int dev_no)
 {
+    Schib schib;
     int i, r;
+    bool is_virtio;
 
     for (i = 0; i < 0x10000; i++) {
         blk_schid.sch_no = i;
-        r = stsch_err(blk_schid, schib);
+        r = stsch_err(blk_schid, &schib);
         if ((r == 3) || (r == -EIO)) {
             break;
         }
-        if (!schib->pmcw.dnv) {
-            continue;
-        }
-        if (!virtio_is_supported(blk_schid)) {
+        if (!schib.pmcw.dnv) {
             continue;
         }
-        /* Skip net devices since no IPLB is created and therefore no
-         * no network bootloader has been loaded
+
+        enable_subchannel(blk_schid);
+        cutype = cu_type(blk_schid);
+
+        /*
+         * Note: we always have to run virtio_is_supported() here to make
+         * sure that the vdev.senseid data gets pre-initialized correctly
          */
-        if (virtio_get_device_type() == VIRTIO_ID_NET && dev_no < 0) {
-            continue;
+        is_virtio = virtio_is_supported(blk_schid);
+
+        /* No specific devno given, just return 1st possibly bootable device */
+        if (dev_no < 0) {
+            switch (cutype) {
+            case CU_TYPE_VIRTIO:
+                if (is_virtio) {
+                    /*
+                     * Skip net devices since no IPLB is created and therefore
+                     * no network bootloader has been loaded
+                     */
+                    if (virtio_get_device_type() != VIRTIO_ID_NET) {
+                        return true;
+                    }
+                }
+                continue;
+            case CU_TYPE_DASD_3990:
+            case CU_TYPE_DASD_2107:
+                return true;
+            default:
+                continue;
+            }
         }
-        if ((dev_no < 0) || (schib->pmcw.dev == dev_no)) {
+
+        /* Caller asked for a specific devno */
+        if (schib.pmcw.dev == dev_no) {
             return true;
         }
     }
@@ -99,68 +140,88 @@ static void menu_setup(void)
     }
 }
 
-static void virtio_setup(void)
+/*
+ * Initialize the channel I/O subsystem so we can talk to our ipl/boot device.
+ */
+static void css_setup(void)
 {
-    Schib schib;
-    int ssid;
-    bool found = false;
-    uint16_t dev_no;
-    char ldp[] = "LOADPARM=[________]\n";
-    VDev *vdev = virtio_get_device();
-    QemuIplParameters *early_qipl = (QemuIplParameters *)QIPL_ADDRESS;
-
     /*
-     * We unconditionally enable mss support. In every sane configuration,
-     * this will succeed; and even if it doesn't, stsch_err() can deal
-     * with the consequences.
+     * Unconditionally enable mss support. In every sane configuration this
+     * will succeed; and even if it doesn't, stsch_err() can handle it.
      */
     enable_mss_facility();
+}
+
+/*
+ * Collect various pieces of information from the hypervisor/hardware that
+ * we'll use to determine exactly how we'll boot.
+ */
+static void boot_setup(void)
+{
+    char lpmsg[] = "LOADPARM=[________]\n";
 
     sclp_get_loadparm_ascii(loadparm_str);
-    memcpy(ldp + 10, loadparm_str, LOADPARM_LEN);
-    sclp_print(ldp);
+    memcpy(lpmsg + 10, loadparm_str, 8);
+    sclp_print(lpmsg);
 
-    memcpy(&qipl, early_qipl, sizeof(QemuIplParameters));
+    have_iplb = store_iplb(&iplb);
+}
 
-    if (store_iplb(&iplb)) {
-        switch (iplb.pbt) {
-        case S390_IPL_TYPE_CCW:
-            dev_no = iplb.ccw.devno;
-            debug_print_int("device no. ", dev_no);
-            blk_schid.ssid = iplb.ccw.ssid & 0x3;
-            debug_print_int("ssid ", blk_schid.ssid);
-            found = find_dev(&schib, dev_no);
-            break;
-        case S390_IPL_TYPE_QEMU_SCSI:
-            vdev->scsi_device_selected = true;
-            vdev->selected_scsi_device.channel = iplb.scsi.channel;
-            vdev->selected_scsi_device.target = iplb.scsi.target;
-            vdev->selected_scsi_device.lun = iplb.scsi.lun;
-            blk_schid.ssid = iplb.scsi.ssid & 0x3;
-            found = find_dev(&schib, iplb.scsi.devno);
-            break;
-        default:
-            panic("List-directed IPL not supported yet!\n");
-        }
-        menu_setup();
-    } else {
+static void find_boot_device(void)
+{
+    VDev *vdev = virtio_get_device();
+    int ssid;
+    bool found;
+
+    if (!have_iplb) {
         for (ssid = 0; ssid < 0x3; ssid++) {
             blk_schid.ssid = ssid;
-            found = find_dev(&schib, -1);
+            found = find_subch(-1);
             if (found) {
-                break;
+                return;
             }
         }
+        panic("Could not find a suitable boot device (none specified)\n");
+    }
+
+    switch (iplb.pbt) {
+    case S390_IPL_TYPE_CCW:
+        debug_print_int("device no. ", iplb.ccw.devno);
+        blk_schid.ssid = iplb.ccw.ssid & 0x3;
+        debug_print_int("ssid ", blk_schid.ssid);
+        found = find_subch(iplb.ccw.devno);
+        break;
+    case S390_IPL_TYPE_QEMU_SCSI:
+        vdev->scsi_device_selected = true;
+        vdev->selected_scsi_device.channel = iplb.scsi.channel;
+        vdev->selected_scsi_device.target = iplb.scsi.target;
+        vdev->selected_scsi_device.lun = iplb.scsi.lun;
+        blk_schid.ssid = iplb.scsi.ssid & 0x3;
+        found = find_subch(iplb.scsi.devno);
+        break;
+    default:
+        panic("List-directed IPL not supported yet!\n");
     }
 
-    IPL_assert(found, "No virtio device found");
+    IPL_assert(found, "Boot device not found\n");
+}
+
+static void virtio_setup(void)
+{
+    VDev *vdev = virtio_get_device();
+    QemuIplParameters *early_qipl = (QemuIplParameters *)QIPL_ADDRESS;
+
+    memcpy(&qipl, early_qipl, sizeof(QemuIplParameters));
+
+    if (have_iplb) {
+        menu_setup();
+    }
 
     if (virtio_get_device_type() == VIRTIO_ID_NET) {
         sclp_print("Network boot device detected\n");
         vdev->netboot_start_addr = qipl.netboot_start_addr;
     } else {
         virtio_blk_setup_device(blk_schid);
-
         IPL_assert(virtio_ipl_disk_is_valid(), "No valid IPL device detected");
     }
 }
@@ -168,9 +229,24 @@ static void virtio_setup(void)
 int main(void)
 {
     sclp_setup();
-    virtio_setup();
-
-    zipl_load(); /* no return */
+    css_setup();
+    boot_setup();
+    find_boot_device();
+    enable_subchannel(blk_schid);
+
+    switch (cutype) {
+    case CU_TYPE_DASD_3990:
+    case CU_TYPE_DASD_2107:
+        dasd_ipl(blk_schid, cutype); /* no return */
+        break;
+    case CU_TYPE_VIRTIO:
+        virtio_setup();
+        zipl_load(); /* no return */
+        break;
+    default:
+        print_int("Attempting to boot from unexpected device type", cutype);
+        panic("");
+    }
 
     panic("Failed to load OS from hard disk\n");
     return 0; /* make compiler happy */
diff --git a/pc-bios/s390-ccw/netboot.mak b/pc-bios/s390-ccw/netboot.mak
index 14e96b2aa6..5eefb7c289 100644
--- a/pc-bios/s390-ccw/netboot.mak
+++ b/pc-bios/s390-ccw/netboot.mak
@@ -1,7 +1,7 @@
 
 SLOF_DIR := $(SRC_PATH)/roms/SLOF
 
-NETOBJS := start.o sclp.o virtio.o virtio-net.o jump2ipl.o netmain.o \
+NETOBJS := start.o sclp.o cio.o virtio.o virtio-net.o jump2ipl.o netmain.o \
 	   libnet.a libc.a
 
 LIBC_INC := -nostdinc -I$(SLOF_DIR)/lib/libc/include
diff --git a/pc-bios/s390-ccw/netmain.c b/pc-bios/s390-ccw/netmain.c
index 0392131c27..f3542cb2cf 100644
--- a/pc-bios/s390-ccw/netmain.c
+++ b/pc-bios/s390-ccw/netmain.c
@@ -33,6 +33,7 @@
 #include <pxelinux.h>
 
 #include "s390-ccw.h"
+#include "cio.h"
 #include "virtio.h"
 
 #define DEFAULT_BOOT_RETRIES 10
@@ -475,6 +476,7 @@ static bool find_net_dev(Schib *schib, int dev_no)
         if (!schib->pmcw.dnv) {
             continue;
         }
+        enable_subchannel(net_schid);
         if (!virtio_is_supported(net_schid)) {
             continue;
         }
diff --git a/pc-bios/s390-ccw/s390-arch.h b/pc-bios/s390-ccw/s390-arch.h
new file mode 100644
index 0000000000..504fc7c2f0
--- /dev/null
+++ b/pc-bios/s390-ccw/s390-arch.h
@@ -0,0 +1,103 @@
+/*
+ * S390 Basic Architecture
+ *
+ * Copyright (c) 2019 Jason J. Herne <jjherne@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef S390_ARCH_H
+#define S390_ARCH_H
+
+typedef struct PSW {
+    uint64_t mask;
+    uint64_t addr;
+} __attribute__ ((aligned(8))) PSW;
+_Static_assert(sizeof(struct PSW) == 16, "PSW size incorrect");
+
+/* Older PSW format used by LPSW instruction */
+typedef struct PSWLegacy {
+    uint32_t mask;
+    uint32_t addr;
+} __attribute__ ((aligned(8))) PSWLegacy;
+_Static_assert(sizeof(struct PSWLegacy) == 8, "PSWLegacy size incorrect");
+
+/* s390 psw bit masks */
+#define PSW_MASK_IOINT      0x0200000000000000ULL
+#define PSW_MASK_WAIT       0x0002000000000000ULL
+#define PSW_MASK_EAMODE     0x0000000100000000ULL
+#define PSW_MASK_BAMODE     0x0000000080000000ULL
+#define PSW_MASK_ZMODE      (PSW_MASK_EAMODE | PSW_MASK_BAMODE)
+
+/* Low core mapping */
+typedef struct LowCore {
+    /* prefix area: defined by architecture */
+    PSWLegacy       ipl_psw;                  /* 0x000 */
+    uint32_t        ccw1[2];                  /* 0x008 */
+    uint32_t        ccw2[2];                  /* 0x010 */
+    uint8_t         pad1[0x80 - 0x18];        /* 0x018 */
+    uint32_t        ext_params;               /* 0x080 */
+    uint16_t        cpu_addr;                 /* 0x084 */
+    uint16_t        ext_int_code;             /* 0x086 */
+    uint16_t        svc_ilen;                 /* 0x088 */
+    uint16_t        svc_code;                 /* 0x08a */
+    uint16_t        pgm_ilen;                 /* 0x08c */
+    uint16_t        pgm_code;                 /* 0x08e */
+    uint32_t        data_exc_code;            /* 0x090 */
+    uint16_t        mon_class_num;            /* 0x094 */
+    uint16_t        per_perc_atmid;           /* 0x096 */
+    uint64_t        per_address;              /* 0x098 */
+    uint8_t         exc_access_id;            /* 0x0a0 */
+    uint8_t         per_access_id;            /* 0x0a1 */
+    uint8_t         op_access_id;             /* 0x0a2 */
+    uint8_t         ar_access_id;             /* 0x0a3 */
+    uint8_t         pad2[0xA8 - 0xA4];        /* 0x0a4 */
+    uint64_t        trans_exc_code;           /* 0x0a8 */
+    uint64_t        monitor_code;             /* 0x0b0 */
+    uint16_t        subchannel_id;            /* 0x0b8 */
+    uint16_t        subchannel_nr;            /* 0x0ba */
+    uint32_t        io_int_parm;              /* 0x0bc */
+    uint32_t        io_int_word;              /* 0x0c0 */
+    uint8_t         pad3[0xc8 - 0xc4];        /* 0x0c4 */
+    uint32_t        stfl_fac_list;            /* 0x0c8 */
+    uint8_t         pad4[0xe8 - 0xcc];        /* 0x0cc */
+    uint64_t        mcic;                     /* 0x0e8 */
+    uint8_t         pad5[0xf4 - 0xf0];        /* 0x0f0 */
+    uint32_t        external_damage_code;     /* 0x0f4 */
+    uint64_t        failing_storage_address;  /* 0x0f8 */
+    uint8_t         pad6[0x110 - 0x100];      /* 0x100 */
+    uint64_t        per_breaking_event_addr;  /* 0x110 */
+    uint8_t         pad7[0x120 - 0x118];      /* 0x118 */
+    PSW             restart_old_psw;          /* 0x120 */
+    PSW             external_old_psw;         /* 0x130 */
+    PSW             svc_old_psw;              /* 0x140 */
+    PSW             program_old_psw;          /* 0x150 */
+    PSW             mcck_old_psw;             /* 0x160 */
+    PSW             io_old_psw;               /* 0x170 */
+    uint8_t         pad8[0x1a0 - 0x180];      /* 0x180 */
+    PSW             restart_new_psw;          /* 0x1a0 */
+    PSW             external_new_psw;         /* 0x1b0 */
+    PSW             svc_new_psw;              /* 0x1c0 */
+    PSW             program_new_psw;          /* 0x1d0 */
+    PSW             mcck_new_psw;             /* 0x1e0 */
+    PSW             io_new_psw;               /* 0x1f0 */
+} __attribute__((packed, aligned(8192))) LowCore;
+
+extern LowCore const *lowcore;
+
+static inline void set_prefix(uint32_t address)
+{
+    asm volatile("spx %0" : : "m" (address) : "memory");
+}
+
+static inline uint32_t store_prefix(void)
+{
+    uint32_t address;
+
+    asm volatile("stpx %0" : "=m" (address));
+    return address;
+}
+
+#endif
diff --git a/pc-bios/s390-ccw/s390-ccw.h b/pc-bios/s390-ccw/s390-ccw.h
index 9828aa233d..11bce7d73c 100644
--- a/pc-bios/s390-ccw/s390-ccw.h
+++ b/pc-bios/s390-ccw/s390-ccw.h
@@ -49,17 +49,10 @@ typedef unsigned long long __u64;
 #include "cio.h"
 #include "iplb.h"
 
-typedef struct irb Irb;
-typedef struct ccw1 Ccw1;
-typedef struct cmd_orb CmdOrb;
-typedef struct schib Schib;
-typedef struct chsc_area_sda ChscAreaSda;
-typedef struct senseid SenseId;
-typedef struct subchannel_id SubChannelId;
-
 /* start.s */
 void disabled_wait(void);
 void consume_sclp_int(void);
+void consume_io_int(void);
 
 /* main.c */
 void panic(const char *string);
@@ -80,7 +73,6 @@ unsigned long virtio_load_direct(ulong rec_list1, ulong rec_list2,
 bool virtio_is_supported(SubChannelId schid);
 void virtio_blk_setup_device(SubChannelId schid);
 int virtio_read(ulong sector, void *load_addr);
-int enable_mss_facility(void);
 u64 get_clock(void);
 ulong get_second(void);
 
diff --git a/pc-bios/s390-ccw/start.S b/pc-bios/s390-ccw/start.S
index 5c22cb0849..aa8fceb19d 100644
--- a/pc-bios/s390-ccw/start.S
+++ b/pc-bios/s390-ccw/start.S
@@ -71,6 +71,26 @@ consume_sclp_int:
         larl %r1, enabled_wait_psw
         lpswe 0(%r1)
 
+/*
+ * void consume_io_int(void)
+ *
+ * eats one I/O interrupt
+ */
+        .globl consume_io_int
+consume_io_int:
+        /* enable I/O interrupts in cr6 */
+        stctg %c6,%c6,0(%r15)
+        oi    4(%r15), 0xff
+        lctlg %c6,%c6,0(%r15)
+        /* prepare i/o call handler */
+        larl  %r1, io_new_code
+        stg   %r1, 0x1f8
+        larl  %r1, io_new_mask
+        mvc   0x1f0(8),0(%r1)
+        /* load enabled wait PSW */
+        larl  %r1, enabled_wait_psw
+        lpswe 0(%r1)
+
 external_new_code:
         /* disable service interrupts in cr0 */
         stctg   %c0,%c0,0(%r15)
@@ -78,6 +98,13 @@ external_new_code:
         lctlg   %c0,%c0,0(%r15)
         br      %r14
 
+io_new_code:
+        /* disable I/O interrupts in cr6 */
+        stctg %c6,%c6,0(%r15)
+        ni    4(%r15), 0x00
+        lctlg %c6,%c6,0(%r15)
+        br    %r14
+
         .align  8
 disabled_wait_psw:
         .quad   0x0002000180000000,0x0000000000000000
@@ -85,3 +112,5 @@ enabled_wait_psw:
         .quad   0x0302000180000000,0x0000000000000000
 external_new_mask:
         .quad   0x0000000180000000
+io_new_mask:
+        .quad   0x0000000180000000
diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c
index cdb66f459e..fb40ca9828 100644
--- a/pc-bios/s390-ccw/virtio.c
+++ b/pc-bios/s390-ccw/virtio.c
@@ -10,9 +10,11 @@
 
 #include "libc.h"
 #include "s390-ccw.h"
+#include "cio.h"
 #include "virtio.h"
 #include "virtio-scsi.h"
 #include "bswap.h"
+#include "helper.h"
 
 #define VRING_WAIT_REPLY_TIMEOUT 30
 
@@ -20,8 +22,6 @@ static VRing block[VIRTIO_MAX_VQS];
 static char ring_area[VIRTIO_RING_SIZE * VIRTIO_MAX_VQS]
                      __attribute__((__aligned__(PAGE_SIZE)));
 
-static char chsc_page[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
-
 static VDev vdev = {
     .nr_vqs = 1,
     .vrings = block,
@@ -90,38 +90,19 @@ int drain_irqs(SubChannelId schid)
     }
 }
 
-static int run_ccw(VDev *vdev, int cmd, void *ptr, int len)
+static int run_ccw(VDev *vdev, int cmd, void *ptr, int len, bool sli)
 {
     Ccw1 ccw = {};
-    CmdOrb orb = {};
-    Schib schib;
-    int r;
-
-    /* start command processing */
-    stsch_err(vdev->schid, &schib);
-    /* enable the subchannel for IPL device */
-    schib.pmcw.ena = 1;
-    msch(vdev->schid, &schib);
-
-    /* start subchannel command */
-    orb.fmt = 1;
-    orb.cpa = (u32)(long)&ccw;
-    orb.lpm = 0x80;
 
     ccw.cmd_code = cmd;
     ccw.cda = (long)ptr;
     ccw.count = len;
 
-    r = ssch(vdev->schid, &orb);
-    /*
-     * XXX Wait until device is done processing the CCW. For now we can
-     *     assume that a simple tsch will have finished the CCW processing,
-     *     but the architecture allows for asynchronous operation
-     */
-    if (!r) {
-        r = drain_irqs(vdev->schid);
+    if (sli) {
+        ccw.flags |= CCW_FLAG_SLI;
     }
-    return r;
+
+    return do_cio(vdev->schid, vdev->senseid.cu_type, ptr2u32(&ccw), CCW_FMT1);
 }
 
 static void vring_init(VRing *vr, VqInfo *info)
@@ -263,7 +244,7 @@ void virtio_setup_ccw(VDev *vdev)
     vdev->config.blk.blk_size = 0; /* mark "illegal" - setup started... */
     vdev->guessed_disk_nature = VIRTIO_GDN_NONE;
 
-    run_ccw(vdev, CCW_CMD_VDEV_RESET, NULL, 0);
+    run_ccw(vdev, CCW_CMD_VDEV_RESET, NULL, 0, false);
 
     switch (vdev->senseid.cu_model) {
     case VIRTIO_ID_NET:
@@ -284,18 +265,19 @@ void virtio_setup_ccw(VDev *vdev)
     default:
         panic("Unsupported virtio device\n");
     }
-    IPL_assert(run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size) == 0,
-               "Could not get block device configuration");
+    IPL_assert(
+        run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size, false) == 0,
+       "Could not get block device configuration");
 
     /* Feature negotiation */
     for (i = 0; i < ARRAY_SIZE(vdev->guest_features); i++) {
         feats.features = 0;
         feats.index = i;
-        rc = run_ccw(vdev, CCW_CMD_READ_FEAT, &feats, sizeof(feats));
+        rc = run_ccw(vdev, CCW_CMD_READ_FEAT, &feats, sizeof(feats), false);
         IPL_assert(rc == 0, "Could not get features bits");
         vdev->guest_features[i] &= bswap32(feats.features);
         feats.features = bswap32(vdev->guest_features[i]);
-        rc = run_ccw(vdev, CCW_CMD_WRITE_FEAT, &feats, sizeof(feats));
+        rc = run_ccw(vdev, CCW_CMD_WRITE_FEAT, &feats, sizeof(feats), false);
         IPL_assert(rc == 0, "Could not set features bits");
     }
 
@@ -312,16 +294,17 @@ void virtio_setup_ccw(VDev *vdev)
         };
 
         IPL_assert(
-            run_ccw(vdev, CCW_CMD_READ_VQ_CONF, &config, sizeof(config)) == 0,
+            run_ccw(vdev, CCW_CMD_READ_VQ_CONF, &config, sizeof(config), false) == 0,
             "Could not get block device VQ configuration");
         info.num = config.num;
         vring_init(&vdev->vrings[i], &info);
         vdev->vrings[i].schid = vdev->schid;
-        IPL_assert(run_ccw(vdev, CCW_CMD_SET_VQ, &info, sizeof(info)) == 0,
-                   "Cannot set VQ info");
+        IPL_assert(
+            run_ccw(vdev, CCW_CMD_SET_VQ, &info, sizeof(info), false) == 0,
+            "Cannot set VQ info");
     }
     IPL_assert(
-        run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status)) == 0,
+        run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false) == 0,
         "Could not write status to host");
 }
 
@@ -329,8 +312,15 @@ bool virtio_is_supported(SubChannelId schid)
 {
     vdev.schid = schid;
     memset(&vdev.senseid, 0, sizeof(vdev.senseid));
-    /* run sense id command */
-    if (run_ccw(&vdev, CCW_CMD_SENSE_ID, &vdev.senseid, sizeof(vdev.senseid))) {
+
+    /*
+     * Run sense id command.
+     * The size of the senseid data differs between devices (notably,
+     * between virtio devices and dasds), so specify the largest possible
+     * size and suppress the incorrect length indication for smaller sizes.
+     */
+    if (run_ccw(&vdev, CCW_CMD_SENSE_ID, &vdev.senseid, sizeof(vdev.senseid),
+                true)) {
         return false;
     }
     if (vdev.senseid.cu_type == 0x3832) {
@@ -343,20 +333,3 @@ bool virtio_is_supported(SubChannelId schid)
     }
     return false;
 }
-
-int enable_mss_facility(void)
-{
-    int ret;
-    ChscAreaSda *sda_area = (ChscAreaSda *) chsc_page;
-
-    memset(sda_area, 0, PAGE_SIZE);
-    sda_area->request.length = 0x0400;
-    sda_area->request.code = 0x0031;
-    sda_area->operation_code = 0x2;
-
-    ret = chsc(sda_area);
-    if ((ret == 0) && (sda_area->response.code == 0x0001)) {
-        return 0;
-    }
-    return -EIO;
-}
diff --git a/pc-bios/s390-netboot.img b/pc-bios/s390-netboot.img
index 2c6886efb8..aa90fbccb1 100644
--- a/pc-bios/s390-netboot.img
+++ b/pc-bios/s390-netboot.img
Binary files differdiff --git a/qemu-options.hx b/qemu-options.hx
index 08749a3391..bdc74c0620 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -4233,6 +4233,11 @@ using the SNIA NVM programming model (e.g. Intel NVDIMM).
 If @option{pmem} is set to 'on', QEMU will take necessary operations to
 guarantee the persistence of its own writes to @option{mem-path}
 (e.g. in vNVDIMM label emulation and live migration).
+Also, we will map the backend-file with MAP_SYNC flag, which ensures the
+file metadata is in sync for @option{mem-path} in case of host crash
+or a power failure. MAP_SYNC requires support from both the host kernel
+(since Linux kernel 4.15) and the filesystem of @option{mem-path} mounted
+with DAX option.
 
 @item -object memory-backend-ram,id=@var{id},merge=@var{on|off},dump=@var{on|off},share=@var{on|off},prealloc=@var{on|off},size=@var{size},host-nodes=@var{host-nodes},policy=@var{default|preferred|bind|interleave}
 
diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
index a310a9072b..c3819d2b98 100755
--- a/scripts/update-linux-headers.sh
+++ b/scripts/update-linux-headers.sh
@@ -95,7 +95,7 @@ for arch in $ARCHLIST; do
 
     rm -rf "$output/linux-headers/asm-$arch"
     mkdir -p "$output/linux-headers/asm-$arch"
-    for header in kvm.h unistd.h bitsperlong.h; do
+    for header in kvm.h unistd.h bitsperlong.h mman.h; do
         cp "$tmpdir/include/asm/$header" "$output/linux-headers/asm-$arch"
     done
 
@@ -139,13 +139,13 @@ done
 rm -rf "$output/linux-headers/linux"
 mkdir -p "$output/linux-headers/linux"
 for header in kvm.h vfio.h vfio_ccw.h vhost.h \
-              psci.h psp-sev.h userfaultfd.h; do
+              psci.h psp-sev.h userfaultfd.h mman.h; do
     cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux"
 done
 
 rm -rf "$output/linux-headers/asm-generic"
 mkdir -p "$output/linux-headers/asm-generic"
-for header in unistd.h bitsperlong.h; do
+for header in unistd.h bitsperlong.h mman-common.h mman.h hugetlb_encode.h; do
     cp "$tmpdir/include/asm-generic/$header" "$output/linux-headers/asm-generic"
 done
 
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index e1687f7547..722c5514d4 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -2935,6 +2935,56 @@ static X86CPUDefinition builtin_x86_defs[] = {
         .model_id = "AMD EPYC Processor (with IBPB)",
         .cache_info = &epyc_cache_info,
     },
+    {
+        .name = "Dhyana",
+        .level = 0xd,
+        .vendor = CPUID_VENDOR_HYGON,
+        .family = 24,
+        .model = 0,
+        .stepping = 1,
+        .features[FEAT_1_EDX] =
+            CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH |
+            CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE |
+            CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE |
+            CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE |
+            CPUID_VME | CPUID_FP87,
+        .features[FEAT_1_ECX] =
+            CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX |
+            CPUID_EXT_XSAVE | CPUID_EXT_POPCNT |
+            CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 |
+            CPUID_EXT_CX16 | CPUID_EXT_FMA | CPUID_EXT_SSSE3 |
+            CPUID_EXT_MONITOR | CPUID_EXT_SSE3,
+        .features[FEAT_8000_0001_EDX] =
+            CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB |
+            CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX |
+            CPUID_EXT2_SYSCALL,
+        .features[FEAT_8000_0001_ECX] =
+            CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH |
+            CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM |
+            CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM |
+            CPUID_EXT3_TOPOEXT,
+        .features[FEAT_8000_0008_EBX] =
+            CPUID_8000_0008_EBX_IBPB,
+        .features[FEAT_7_0_EBX] =
+            CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 |
+            CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_RDSEED |
+            CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT,
+        /*
+         * Missing: XSAVES (not supported by some Linux versions,
+         * including v4.1 to v4.12).
+         * KVM doesn't yet expose any XSAVES state save component.
+         */
+        .features[FEAT_XSAVE] =
+            CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC |
+            CPUID_XSAVE_XGETBV1,
+        .features[FEAT_6_EAX] =
+            CPUID_6_EAX_ARAT,
+        .features[FEAT_SVM] =
+            CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE,
+        .xlevel = 0x8000001E,
+        .model_id = "Hygon Dhyana Processor",
+        .cache_info = &epyc_cache_info,
+    },
 };
 
 typedef struct PropValue {
@@ -4541,6 +4591,10 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         break;
     case 0x8000001D:
         *eax = 0;
+        if (cpu->cache_info_passthrough) {
+            host_cpuid(index, count, eax, ebx, ecx, edx);
+            break;
+        }
         switch (count) {
         case 0: /* L1 dcache info */
             encode_cache_cpuid8000001d(env->cache_info_amd.l1d_cache, cs,
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 828067bd1c..0128910661 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -726,6 +726,8 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
 
 #define CPUID_VENDOR_VIA   "CentaurHauls"
 
+#define CPUID_VENDOR_HYGON    "HygonGenuine"
+
 #define CPUID_MWAIT_IBE     (1U << 1) /* Interrupts can exit capability */
 #define CPUID_MWAIT_EMX     (1U << 0) /* enumeration supported */
 
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 9e86db0963..02e22e2017 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -2164,7 +2164,7 @@ uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
      * or equal to the (logical) backing page size of guest RAM
      */
     kvm_get_smmu_info(&info, &error_fatal);
-    rampagesize = qemu_getrampagesize();
+    rampagesize = qemu_minrampagesize();
     best_page_shift = 0;
 
     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
index 698dd9cb82..b58ef0a8ef 100644
--- a/target/s390x/cpu.c
+++ b/target/s390x/cpu.c
@@ -399,6 +399,13 @@ int s390_set_memory_limit(uint64_t new_limit, uint64_t *hw_limit)
     return 0;
 }
 
+void s390_set_max_pagesize(uint64_t pagesize, Error **errp)
+{
+    if (kvm_enabled()) {
+        kvm_s390_set_max_pagesize(pagesize, errp);
+    }
+}
+
 void s390_cmma_reset(void)
 {
     if (kvm_enabled()) {
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index d8990c405a..7305cacc7b 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -734,6 +734,7 @@ static inline void s390_do_cpu_load_normal(CPUState *cs, run_on_cpu_data arg)
 /* cpu.c */
 void s390_crypto_reset(void);
 int s390_set_memory_limit(uint64_t new_limit, uint64_t *hw_limit);
+void s390_set_max_pagesize(uint64_t pagesize, Error **errp);
 void s390_cmma_reset(void);
 void s390_enable_css_support(S390CPU *cpu);
 int s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch_id,
diff --git a/target/s390x/kvm-stub.c b/target/s390x/kvm-stub.c
index bf7795e47a..22b4514ca6 100644
--- a/target/s390x/kvm-stub.c
+++ b/target/s390x/kvm-stub.c
@@ -93,6 +93,10 @@ int kvm_s390_set_mem_limit(uint64_t new_limit, uint64_t *hw_limit)
     return 0;
 }
 
+void kvm_s390_set_max_pagesize(uint64_t pagesize, Error **errp)
+{
+}
+
 void kvm_s390_crypto_reset(void)
 {
 }
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index 2c6e35b5aa..7df7be4a1b 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -283,44 +283,37 @@ void kvm_s390_crypto_reset(void)
     }
 }
 
-static int kvm_s390_configure_mempath_backing(KVMState *s)
+void kvm_s390_set_max_pagesize(uint64_t pagesize, Error **errp)
 {
-    size_t path_psize = qemu_getrampagesize();
-
-    if (path_psize == 4 * KiB) {
-        return 0;
+    if (pagesize == 4 * KiB) {
+        return;
     }
 
     if (!hpage_1m_allowed()) {
-        error_report("This QEMU machine does not support huge page "
-                     "mappings");
-        return -EINVAL;
+        error_setg(errp, "This QEMU machine does not support huge page "
+                   "mappings");
+        return;
     }
 
-    if (path_psize != 1 * MiB) {
-        error_report("Memory backing with 2G pages was specified, "
-                     "but KVM does not support this memory backing");
-        return -EINVAL;
+    if (pagesize != 1 * MiB) {
+        error_setg(errp, "Memory backing with 2G pages was specified, "
+                   "but KVM does not support this memory backing");
+        return;
     }
 
-    if (kvm_vm_enable_cap(s, KVM_CAP_S390_HPAGE_1M, 0)) {
-        error_report("Memory backing with 1M pages was specified, "
-                     "but KVM does not support this memory backing");
-        return -EINVAL;
+    if (kvm_vm_enable_cap(kvm_state, KVM_CAP_S390_HPAGE_1M, 0)) {
+        error_setg(errp, "Memory backing with 1M pages was specified, "
+                   "but KVM does not support this memory backing");
+        return;
     }
 
     cap_hpage_1m = 1;
-    return 0;
 }
 
 int kvm_arch_init(MachineState *ms, KVMState *s)
 {
     MachineClass *mc = MACHINE_GET_CLASS(ms);
 
-    if (kvm_s390_configure_mempath_backing(s)) {
-        return -EINVAL;
-    }
-
     mc->default_cpu_type = S390_CPU_TYPE_NAME("host");
     cap_sync_regs = kvm_check_extension(s, KVM_CAP_SYNC_REGS);
     cap_async_pf = kvm_check_extension(s, KVM_CAP_ASYNC_PF);
diff --git a/target/s390x/kvm_s390x.h b/target/s390x/kvm_s390x.h
index 6e52287da3..caf985955b 100644
--- a/target/s390x/kvm_s390x.h
+++ b/target/s390x/kvm_s390x.h
@@ -36,6 +36,7 @@ int kvm_s390_cmma_active(void);
 void kvm_s390_cmma_reset(void);
 void kvm_s390_reset_vcpu(S390CPU *cpu);
 int kvm_s390_set_mem_limit(uint64_t new_limit, uint64_t *hw_limit);
+void kvm_s390_set_max_pagesize(uint64_t pagesize, Error **errp);
 void kvm_s390_crypto_reset(void);
 void kvm_s390_restart_interrupt(S390CPU *cpu);
 void kvm_s390_stop_interrupt(S390CPU *cpu);
diff --git a/tests/acceptance/empty_cpu_model.py b/tests/acceptance/empty_cpu_model.py
new file mode 100644
index 0000000000..3f4f663582
--- /dev/null
+++ b/tests/acceptance/empty_cpu_model.py
@@ -0,0 +1,19 @@
+# Check for crash when using empty -cpu option
+#
+# Copyright (c) 2019 Red Hat, Inc.
+#
+# Author:
+#  Eduardo Habkost <ehabkost@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+import subprocess
+from avocado_qemu import Test
+
+class EmptyCPUModel(Test):
+    def test(self):
+        cmd = [self.qemu_bin, '-S', '-display', 'none', '-machine', 'none', '-cpu', '']
+        r = subprocess.run(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+        self.assertEquals(r.returncode, 1, "QEMU exit code should be 1")
+        self.assertEquals(r.stdout, b'', "QEMU stdout should be empty")
+        self.assertNotEquals(r.stderr, b'', "QEMU stderr shouldn't be empty")
diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c
index c591748aaf..24852d4c7d 100644
--- a/tests/boot-serial-test.c
+++ b/tests/boot-serial-test.c
@@ -114,7 +114,7 @@ static testdef_t tests[] = {
     { "sparc", "SS-4", "", "MB86904" },
     { "sparc", "SS-600MP", "", "TMS390Z55" },
     { "sparc64", "sun4u", "", "UltraSPARC" },
-    { "s390x", "s390-ccw-virtio", "", "virtio device" },
+    { "s390x", "s390-ccw-virtio", "", "device" },
     { "m68k", "mcf5208evb", "", "TT", sizeof(kernel_mcf5208), kernel_mcf5208 },
     { "microblaze", "petalogix-s3adsp1800", "", "TT",
       sizeof(kernel_pls3adsp1800), kernel_pls3adsp1800 },
diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
index 8565885420..f7f177d0ea 100644
--- a/util/mmap-alloc.c
+++ b/util/mmap-alloc.c
@@ -10,6 +10,13 @@
  * later.  See the COPYING file in the top-level directory.
  */
 
+#ifdef CONFIG_LINUX
+#include <linux/mman.h>
+#else  /* !CONFIG_LINUX */
+#define MAP_SYNC              0x0
+#define MAP_SHARED_VALIDATE   0x0
+#endif /* CONFIG_LINUX */
+
 #include "qemu/osdep.h"
 #include "qemu/mmap-alloc.h"
 #include "qemu/host-utils.h"
@@ -75,9 +82,14 @@ size_t qemu_mempath_getpagesize(const char *mem_path)
     return getpagesize();
 }
 
-void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
+void *qemu_ram_mmap(int fd,
+                    size_t size,
+                    size_t align,
+                    bool shared,
+                    bool is_pmem)
 {
     int flags;
+    int map_sync_flags = 0;
     int guardfd;
     size_t offset;
     size_t pagesize;
@@ -128,9 +140,40 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
     flags = MAP_FIXED;
     flags |= fd == -1 ? MAP_ANONYMOUS : 0;
     flags |= shared ? MAP_SHARED : MAP_PRIVATE;
+    if (shared && is_pmem) {
+        map_sync_flags = MAP_SYNC | MAP_SHARED_VALIDATE;
+    }
+
     offset = QEMU_ALIGN_UP((uintptr_t)guardptr, align) - (uintptr_t)guardptr;
 
-    ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE, flags, fd, 0);
+    ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE,
+               flags | map_sync_flags, fd, 0);
+
+    if (ptr == MAP_FAILED && map_sync_flags) {
+        if (errno == ENOTSUP) {
+            char *proc_link, *file_name;
+            int len;
+            proc_link = g_strdup_printf("/proc/self/fd/%d", fd);
+            file_name = g_malloc0(PATH_MAX);
+            len = readlink(proc_link, file_name, PATH_MAX - 1);
+            if (len < 0) {
+                len = 0;
+            }
+            file_name[len] = '\0';
+            fprintf(stderr, "Warning: requesting persistence across crashes "
+                    "for backend file %s failed. Proceeding without "
+                    "persistence, data might become corrupted in case of host "
+                    "crash.\n", file_name);
+            g_free(proc_link);
+            g_free(file_name);
+        }
+        /*
+         * if map failed with MAP_SHARED_VALIDATE | MAP_SYNC,
+         * we will remove these flags to handle compatibility.
+         */
+        ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE,
+                   flags, fd, 0);
+    }
 
     if (ptr == MAP_FAILED) {
         munmap(guardptr, total);
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 88dda9cd39..d97b1717d5 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -203,7 +203,7 @@ void *qemu_memalign(size_t alignment, size_t size)
 void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared)
 {
     size_t align = QEMU_VMALLOC_ALIGN;
-    void *ptr = qemu_ram_mmap(-1, size, align, shared);
+    void *ptr = qemu_ram_mmap(-1, size, align, shared, false);
 
     if (ptr == MAP_FAILED) {
         return NULL;
diff --git a/vl.c b/vl.c
index ff5dfb6fbc..4019a4387d 100644
--- a/vl.c
+++ b/vl.c
@@ -1465,45 +1465,34 @@ static int usb_parse(const char *cmdline)
 
 MachineState *current_machine;
 
-static MachineClass *find_machine(const char *name)
+static MachineClass *find_machine(const char *name, GSList *machines)
 {
-    GSList *el, *machines = object_class_get_list(TYPE_MACHINE, false);
-    MachineClass *mc = NULL;
+    GSList *el;
 
     for (el = machines; el; el = el->next) {
-        MachineClass *temp = el->data;
+        MachineClass *mc = el->data;
 
-        if (!strcmp(temp->name, name)) {
-            mc = temp;
-            break;
-        }
-        if (temp->alias &&
-            !strcmp(temp->alias, name)) {
-            mc = temp;
-            break;
+        if (!strcmp(mc->name, name) || !g_strcmp0(mc->alias, name)) {
+            return mc;
         }
     }
 
-    g_slist_free(machines);
-    return mc;
+    return NULL;
 }
 
-MachineClass *find_default_machine(void)
+static MachineClass *find_default_machine(GSList *machines)
 {
-    GSList *el, *machines = object_class_get_list(TYPE_MACHINE, false);
-    MachineClass *mc = NULL;
+    GSList *el;
 
     for (el = machines; el; el = el->next) {
-        MachineClass *temp = el->data;
+        MachineClass *mc = el->data;
 
-        if (temp->is_default) {
-            mc = temp;
-            break;
+        if (mc->is_default) {
+            return mc;
         }
     }
 
-    g_slist_free(machines);
-    return mc;
+    return NULL;
 }
 
 MachineInfoList *qmp_query_machines(Error **errp)
@@ -2585,22 +2574,12 @@ static gint machine_class_cmp(gconstpointer a, gconstpointer b)
                   object_class_get_name(OBJECT_CLASS(mc1)));
 }
 
- static MachineClass *machine_parse(const char *name)
+static MachineClass *machine_parse(const char *name, GSList *machines)
 {
-    MachineClass *mc = NULL;
-    GSList *el, *machines = object_class_get_list(TYPE_MACHINE, false);
+    MachineClass *mc;
+    GSList *el;
 
-    if (name) {
-        mc = find_machine(name);
-    }
-    if (mc) {
-        g_slist_free(machines);
-        return mc;
-    }
-    if (name && !is_help_option(name)) {
-        error_report("unsupported machine type");
-        error_printf("Use -machine help to list supported machines\n");
-    } else {
+    if (is_help_option(name)) {
         printf("Supported machines are:\n");
         machines = g_slist_sort(machines, machine_class_cmp);
         for (el = machines; el; el = el->next) {
@@ -2612,10 +2591,16 @@ static gint machine_class_cmp(gconstpointer a, gconstpointer b)
                    mc->is_default ? " (default)" : "",
                    mc->deprecation_reason ? " (deprecated)" : "");
         }
+        exit(0);
     }
 
-    g_slist_free(machines);
-    exit(!name || !is_help_option(name));
+    mc = find_machine(name, machines);
+    if (!mc) {
+        error_report("unsupported machine type");
+        error_printf("Use -machine help to list supported machines\n");
+        exit(1);
+    }
+    return mc;
 }
 
 void qemu_add_exit_notifier(Notifier *notify)
@@ -2706,7 +2691,8 @@ static const QEMUOption *lookup_opt(int argc, char **argv,
 
 static MachineClass *select_machine(void)
 {
-    MachineClass *machine_class = find_default_machine();
+    GSList *machines = object_class_get_list(TYPE_MACHINE, false);
+    MachineClass *machine_class = find_default_machine(machines);
     const char *optarg;
     QemuOpts *opts;
     Location loc;
@@ -2718,7 +2704,7 @@ static MachineClass *select_machine(void)
 
     optarg = qemu_opt_get(opts, "type");
     if (optarg) {
-        machine_class = machine_parse(optarg);
+        machine_class = machine_parse(optarg, machines);
     }
 
     if (!machine_class) {
@@ -2728,6 +2714,7 @@ static MachineClass *select_machine(void)
     }
 
     loc_pop(&loc);
+    g_slist_free(machines);
     return machine_class;
 }
 
@@ -3002,7 +2989,7 @@ int main(int argc, char **argv, char **envp)
     const char *optarg;
     const char *loadvm = NULL;
     MachineClass *machine_class;
-    const char *cpu_model;
+    const char *cpu_option;
     const char *vga_model = NULL;
     const char *qtest_chrdev = NULL;
     const char *qtest_log = NULL;
@@ -3081,7 +3068,7 @@ int main(int argc, char **argv, char **envp)
     QLIST_INIT (&vm_change_state_head);
     os_setup_early_signal_handling();
 
-    cpu_model = NULL;
+    cpu_option = NULL;
     snapshot = 0;
 
     nb_nics = 0;
@@ -3133,7 +3120,7 @@ int main(int argc, char **argv, char **envp)
             switch(popt->index) {
             case QEMU_OPTION_cpu:
                 /* hw initialization will check this */
-                cpu_model = optarg;
+                cpu_option = optarg;
                 break;
             case QEMU_OPTION_hda:
             case QEMU_OPTION_hdb:
@@ -4050,8 +4037,8 @@ int main(int argc, char **argv, char **envp)
         qemu_set_hw_version(machine_class->hw_version);
     }
 
-    if (cpu_model && is_help_option(cpu_model)) {
-        list_cpus(cpu_model);
+    if (cpu_option && is_help_option(cpu_option)) {
+        list_cpus(cpu_option);
         exit(0);
     }
 
@@ -4299,9 +4286,9 @@ int main(int argc, char **argv, char **envp)
      * Global properties get set up by qdev_prop_register_global(),
      * called from user_register_global_props(), and certain option
      * desugaring.  Also in CPU feature desugaring (buried in
-     * parse_cpu_model()), which happens below this point, but may
+     * parse_cpu_option()), which happens below this point, but may
      * only target the CPU type, which can only be created after
-     * parse_cpu_model() returned the type.
+     * parse_cpu_option() returned the type.
      *
      * Machine compat properties: object_set_machine_compat_props().
      * Accelerator compat props: object_set_accelerator_compat_props(),
@@ -4465,8 +4452,8 @@ int main(int argc, char **argv, char **envp)
 
     /* parse features once if machine provides default cpu_type */
     current_machine->cpu_type = machine_class->default_cpu_type;
-    if (cpu_model) {
-        current_machine->cpu_type = parse_cpu_model(cpu_model);
+    if (cpu_option) {
+        current_machine->cpu_type = parse_cpu_option(cpu_option);
     }
     parse_numa_opts(current_machine);