diff options
68 files changed, 2183 insertions, 306 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 23db6f8408..dabbfccf9c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1181,6 +1181,7 @@ S: Supported F: hw/s390x/ipl.* F: pc-bios/s390-ccw/ F: pc-bios/s390-ccw.img +F: docs/devel/s390-dasd-ipl.txt T: git https://github.com/borntraeger/qemu.git s390-next L: qemu-s390x@nongnu.org @@ -1445,6 +1446,7 @@ S: Supported F: hw/vfio/ccw.c F: hw/s390x/s390-ccw.c F: include/hw/s390x/s390-ccw.h +F: include/hw/s390x/vfio-ccw.h T: git https://github.com/cohuck/qemu.git s390-next L: qemu-s390x@nongnu.org diff --git a/bsd-user/main.c b/bsd-user/main.c index a8c807e8df..6192e9d91e 100644 --- a/bsd-user/main.c +++ b/bsd-user/main.c @@ -905,7 +905,7 @@ int main(int argc, char **argv) /* init tcg before creating CPUs and to get qemu_host_page_size */ tcg_exec_init(0); - cpu_type = parse_cpu_model(cpu_model); + cpu_type = parse_cpu_option(cpu_model); cpu = cpu_create(cpu_type); env = cpu->env_ptr; #if defined(TARGET_SPARC) || defined(TARGET_PPC) diff --git a/docs/devel/s390-dasd-ipl.txt b/docs/devel/s390-dasd-ipl.txt new file mode 100644 index 0000000000..9107e048e4 --- /dev/null +++ b/docs/devel/s390-dasd-ipl.txt @@ -0,0 +1,133 @@ +***************************** +***** s390 hardware IPL ***** +***************************** + +The s390 hardware IPL process consists of the following steps. + +1. A READ IPL ccw is constructed in memory location 0x0. + This ccw, by definition, reads the IPL1 record which is located on the disk + at cylinder 0 track 0 record 1. Note that the chain flag is on in this ccw + so when it is complete another ccw will be fetched and executed from memory + location 0x08. + +2. Execute the Read IPL ccw at 0x00, thereby reading IPL1 data into 0x00. + IPL1 data is 24 bytes in length and consists of the following pieces of + information: [psw][read ccw][tic ccw]. When the machine executes the Read + IPL ccw it read the 24-bytes of IPL1 to be read into memory starting at + location 0x0. Then the ccw program at 0x08 which consists of a read + ccw and a tic ccw is automatically executed because of the chain flag from + the original READ IPL ccw. The read ccw will read the IPL2 data into memory + and the TIC (Transfer In Channel) will transfer control to the channel + program contained in the IPL2 data. The TIC channel command is the + equivalent of a branch/jump/goto instruction for channel programs. + NOTE: The ccws in IPL1 are defined by the architecture to be format 0. + +3. Execute IPL2. + The TIC ccw instruction at the end of the IPL1 channel program will begin + the execution of the IPL2 channel program. IPL2 is stage-2 of the boot + process and will contain a larger channel program than IPL1. The point of + IPL2 is to find and load either the operating system or a small program that + loads the operating system from disk. At the end of this step all or some of + the real operating system is loaded into memory and we are ready to hand + control over to the guest operating system. At this point the guest + operating system is entirely responsible for loading any more data it might + need to function. NOTE: The IPL2 channel program might read data into memory + location 0 thereby overwriting the IPL1 psw and channel program. This is ok + as long as the data placed in location 0 contains a psw whose instruction + address points to the guest operating system code to execute at the end of + the IPL/boot process. + NOTE: The ccws in IPL2 are defined by the architecture to be format 0. + +4. Start executing the guest operating system. + The psw that was loaded into memory location 0 as part of the ipl process + should contain the needed flags for the operating system we have loaded. The + psw's instruction address will point to the location in memory where we want + to start executing the operating system. This psw is loaded (via LPSW + instruction) causing control to be passed to the operating system code. + +In a non-virtualized environment this process, handled entirely by the hardware, +is kicked off by the user initiating a "Load" procedure from the hardware +management console. This "Load" procedure crafts a special "Read IPL" ccw in +memory location 0x0 that reads IPL1. It then executes this ccw thereby kicking +off the reading of IPL1 data. Since the channel program from IPL1 will be +written immediately after the special "Read IPL" ccw, the IPL1 channel program +will be executed immediately (the special read ccw has the chaining bit turned +on). The TIC at the end of the IPL1 channel program will cause the IPL2 channel +program to be executed automatically. After this sequence completes the "Load" +procedure then loads the psw from 0x0. + +********************************************************** +***** How this all pertains to QEMU (and the kernel) ***** +********************************************************** + +In theory we should merely have to do the following to IPL/boot a guest +operating system from a DASD device: + +1. Place a "Read IPL" ccw into memory location 0x0 with chaining bit on. +2. Execute channel program at 0x0. +3. LPSW 0x0. + +However, our emulation of the machine's channel program logic within the kernel +is missing one key feature that is required for this process to work: +non-prefetch of ccw data. + +When we start a channel program we pass the channel subsystem parameters via an +ORB (Operation Request Block). One of those parameters is a prefetch bit. If the +bit is on then the vfio-ccw kernel driver is allowed to read the entire channel +program from guest memory before it starts executing it. This means that any +channel commands that read additional channel commands will not work as expected +because the newly read commands will only exist in guest memory and NOT within +the kernel's channel subsystem memory. The kernel vfio-ccw driver currently +requires this bit to be on for all channel programs. This is a problem because +the IPL process consists of transferring control from the "Read IPL" ccw +immediately to the IPL1 channel program that was read by "Read IPL". + +Not being able to turn off prefetch will also prevent the TIC at the end of the +IPL1 channel program from transferring control to the IPL2 channel program. + +Lastly, in some cases (the zipl bootloader for example) the IPL2 program also +transfers control to another channel program segment immediately after reading +it from the disk. So we need to be able to handle this case. + +************************** +***** What QEMU does ***** +************************** + +Since we are forced to live with prefetch we cannot use the very simple IPL +procedure we defined in the preceding section. So we compensate by doing the +following. + +1. Place "Read IPL" ccw into memory location 0x0, but turn off chaining bit. +2. Execute "Read IPL" at 0x0. + + So now IPL1's psw is at 0x0 and IPL1's channel program is at 0x08. + +4. Write a custom channel program that will seek to the IPL2 record and then + execute the READ and TIC ccws from IPL1. Normally the seek is not required + because after reading the IPL1 record the disk is automatically positioned + to read the very next record which will be IPL2. But since we are not reading + both IPL1 and IPL2 as part of the same channel program we must manually set + the position. + +5. Grab the target address of the TIC instruction from the IPL1 channel program. + This address is where the IPL2 channel program starts. + + Now IPL2 is loaded into memory somewhere, and we know the address. + +6. Execute the IPL2 channel program at the address obtained in step #5. + + Because this channel program can be dynamic, we must use a special algorithm + that detects a READ immediately followed by a TIC and breaks the ccw chain + by turning off the chain bit in the READ ccw. When control is returned from + the kernel/hardware to the QEMU bios code we immediately issue another start + subchannel to execute the remaining TIC instruction. This causes the entire + channel program (starting from the TIC) and all needed data to be refetched + thereby stepping around the limitation that would otherwise prevent this + channel program from executing properly. + + Now the operating system code is loaded somewhere in guest memory and the psw + in memory location 0x0 will point to entry code for the guest operating + system. + +7. LPSW 0x0. + LPSW transfers control to the guest operating system and we're done. diff --git a/docs/nvdimm.txt b/docs/nvdimm.txt index 7231c2d78f..b531cacd35 100644 --- a/docs/nvdimm.txt +++ b/docs/nvdimm.txt @@ -144,9 +144,25 @@ Guest Data Persistence ---------------------- Though QEMU supports multiple types of vNVDIMM backends on Linux, -currently the only one that can guarantee the guest write persistence -is the device DAX on the real NVDIMM device (e.g., /dev/dax0.0), to -which all guest access do not involve any host-side kernel cache. +the only backend that can guarantee the guest write persistence is: + +A. DAX device (e.g., /dev/dax0.0, ) or +B. DAX file(mounted with dax option) + +When using B (A file supporting direct mapping of persistent memory) +as a backend, write persistence is guaranteed if the host kernel has +support for the MAP_SYNC flag in the mmap system call (available +since Linux 4.15 and on certain distro kernels) and additionally +both 'pmem' and 'share' flags are set to 'on' on the backend. + +If these conditions are not satisfied i.e. if either 'pmem' or 'share' +are not set, if the backend file does not support DAX or if MAP_SYNC +is not supported by the host kernel, write persistence is not +guaranteed after a system crash. For compatibility reasons, these +conditions are ignored if not satisfied. Currently, no way is +provided to test for them. +For more details, please reference mmap(2) man page: +http://man7.org/linux/man-pages/man2/mmap.2.html. When using other types of backends, it's suggested to set 'unarmed' option of '-device nvdimm' to 'on', which sets the unarmed flag of the diff --git a/exec.c b/exec.c index 2646207661..4e734770c2 100644 --- a/exec.c +++ b/exec.c @@ -983,14 +983,18 @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp) #endif } -const char *parse_cpu_model(const char *cpu_model) +const char *parse_cpu_option(const char *cpu_option) { ObjectClass *oc; CPUClass *cc; gchar **model_pieces; const char *cpu_type; - model_pieces = g_strsplit(cpu_model, ",", 2); + model_pieces = g_strsplit(cpu_option, ",", 2); + if (!model_pieces[0]) { + error_report("-cpu option cannot be empty"); + exit(1); + } oc = cpu_class_by_name(CPU_RESOLVING_TYPE, model_pieces[0]); if (oc == NULL) { @@ -1688,7 +1692,7 @@ void ram_block_dump(Monitor *mon) * when we actually open and map them. Iterate over the file * descriptors instead, and use qemu_fd_getpagesize(). */ -static int find_max_supported_pagesize(Object *obj, void *opaque) +static int find_min_backend_pagesize(Object *obj, void *opaque) { long *hpsize_min = opaque; @@ -1704,7 +1708,27 @@ static int find_max_supported_pagesize(Object *obj, void *opaque) return 0; } -long qemu_getrampagesize(void) +static int find_max_backend_pagesize(Object *obj, void *opaque) +{ + long *hpsize_max = opaque; + + if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) { + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + long hpsize = host_memory_backend_pagesize(backend); + + if (host_memory_backend_is_mapped(backend) && (hpsize > *hpsize_max)) { + *hpsize_max = hpsize; + } + } + + return 0; +} + +/* + * TODO: We assume right now that all mapped host memory backends are + * used as RAM, however some might be used for different purposes. + */ +long qemu_minrampagesize(void) { long hpsize = LONG_MAX; long mainrampagesize; @@ -1724,7 +1748,7 @@ long qemu_getrampagesize(void) */ memdev_root = object_resolve_path("/objects", NULL); if (memdev_root) { - object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize); + object_child_foreach(memdev_root, find_min_backend_pagesize, &hpsize); } if (hpsize == LONG_MAX) { /* No additional memory regions found ==> Report main RAM page size */ @@ -1747,8 +1771,24 @@ long qemu_getrampagesize(void) return hpsize; } + +long qemu_maxrampagesize(void) +{ + long pagesize = qemu_mempath_getpagesize(mem_path); + Object *memdev_root = object_resolve_path("/objects", NULL); + + if (memdev_root) { + object_child_foreach(memdev_root, find_max_backend_pagesize, + &pagesize); + } + return pagesize; +} #else -long qemu_getrampagesize(void) +long qemu_minrampagesize(void) +{ + return getpagesize(); +} +long qemu_maxrampagesize(void) { return getpagesize(); } @@ -1879,7 +1919,7 @@ static void *file_ram_alloc(RAMBlock *block, } area = qemu_ram_mmap(fd, memory, block->mr->align, - block->flags & RAM_SHARED); + block->flags & RAM_SHARED, block->flags & RAM_PMEM); if (area == MAP_FAILED) { error_setg_errno(errp, errno, "unable to map backing store for guest RAM"); diff --git a/hw/arm/virt.c b/hw/arm/virt.c index ce2664a30b..16ba67f7a7 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -1978,10 +1978,17 @@ static void machvirt_machine_init(void) } type_init(machvirt_machine_init); +static void virt_machine_4_1_options(MachineClass *mc) +{ +} +DEFINE_VIRT_MACHINE_AS_LATEST(4, 1) + static void virt_machine_4_0_options(MachineClass *mc) { + virt_machine_4_1_options(mc); + compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len); } -DEFINE_VIRT_MACHINE_AS_LATEST(4, 0) +DEFINE_VIRT_MACHINE(4, 0) static void virt_machine_3_1_options(MachineClass *mc) { diff --git a/hw/core/machine.c b/hw/core/machine.c index 743fef2898..5d046a43e3 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -24,6 +24,9 @@ #include "hw/pci/pci.h" #include "hw/mem/nvdimm.h" +GlobalProperty hw_compat_4_0[] = {}; +const size_t hw_compat_4_0_len = G_N_ELEMENTS(hw_compat_4_0); + GlobalProperty hw_compat_3_1[] = { { "pcie-root-port", "x-speed", "2_5" }, { "pcie-root-port", "x-width", "1" }, diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 6eabdf9917..4a4e2c7fd4 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -1601,6 +1601,8 @@ static void amdvi_class_init(ObjectClass *klass, void* data) dc_class->int_remap = amdvi_int_remap; /* Supported by the pc-q35-* machine types */ dc->user_creatable = true; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device"; } static const TypeInfo amdvi = { diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 2558f48fe6..44b1231157 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -3741,6 +3741,8 @@ static void vtd_class_init(ObjectClass *klass, void *data) x86_class->int_remap = vtd_int_remap; /* Supported by the pc-q35-* machine types */ dc->user_creatable = true; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + dc->desc = "Intel IOMMU (VT-d) DMA Remapping device"; } static const TypeInfo vtd_info = { diff --git a/hw/i386/pc.c b/hw/i386/pc.c index f2c15bf1f2..d98b737b8f 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -115,6 +115,9 @@ struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX}; /* Physical Address of PVH entry point read from kernel ELF NOTE */ static size_t pvh_start_addr; +GlobalProperty pc_compat_4_0[] = {}; +const size_t pc_compat_4_0_len = G_N_ELEMENTS(pc_compat_4_0); + GlobalProperty pc_compat_3_1[] = { { "intel-iommu", "dma-drain", "off" }, { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "off" }, diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 8ad8e885c6..c07c4a5b38 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -428,13 +428,25 @@ static void pc_i440fx_machine_options(MachineClass *m) machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); } -static void pc_i440fx_4_0_machine_options(MachineClass *m) +static void pc_i440fx_4_1_machine_options(MachineClass *m) { pc_i440fx_machine_options(m); m->alias = "pc"; m->is_default = 1; } +DEFINE_I440FX_MACHINE(v4_1, "pc-i440fx-4.1", NULL, + pc_i440fx_4_1_machine_options); + +static void pc_i440fx_4_0_machine_options(MachineClass *m) +{ + pc_i440fx_4_1_machine_options(m); + m->alias = NULL; + m->is_default = 0; + compat_props_add(m->compat_props, hw_compat_4_0, hw_compat_4_0_len); + compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len); +} + DEFINE_I440FX_MACHINE(v4_0, "pc-i440fx-4.0", NULL, pc_i440fx_4_0_machine_options); @@ -911,6 +923,7 @@ static void isa_bridge_class_init(ObjectClass *klass, void *data) PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); dc->desc = "ISA bridge faked to support IGD PT"; + set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); k->vendor_id = PCI_VENDOR_ID_INTEL; k->class_id = PCI_CLASS_BRIDGE_ISA; }; diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 372c6b73be..37dd350511 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -365,12 +365,23 @@ static void pc_q35_machine_options(MachineClass *m) m->max_cpus = 288; } -static void pc_q35_4_0_machine_options(MachineClass *m) +static void pc_q35_4_1_machine_options(MachineClass *m) { pc_q35_machine_options(m); m->alias = "q35"; } +DEFINE_Q35_MACHINE(v4_1, "pc-q35-4.1", NULL, + pc_q35_4_1_machine_options); + +static void pc_q35_4_0_machine_options(MachineClass *m) +{ + pc_q35_4_1_machine_options(m); + m->alias = NULL; + compat_props_add(m->compat_props, hw_compat_4_0, hw_compat_4_0_len); + compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len); +} + DEFINE_Q35_MACHINE(v4_0, "pc-q35-4.0", NULL, pc_q35_4_0_machine_options); diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index c56939a43b..2ef3ce4362 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -4395,14 +4395,25 @@ static const TypeInfo spapr_machine_info = { type_init(spapr_machine_register_##suffix) /* + * pseries-4.1 + */ +static void spapr_machine_4_1_class_options(MachineClass *mc) +{ + /* Defaults for the latest behaviour inherited from the base class */ +} + +DEFINE_SPAPR_MACHINE(4_1, "4.1", true); + +/* * pseries-4.0 */ static void spapr_machine_4_0_class_options(MachineClass *mc) { - /* Defaults for the latest behaviour inherited from the base class */ + spapr_machine_4_1_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len); } -DEFINE_SPAPR_MACHINE(4_0, "4.0", true); +DEFINE_SPAPR_MACHINE(4_0, "4.0", false); /* * pseries-3.1 diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c index edc5ed0e0c..9b1c10baa6 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c @@ -347,7 +347,7 @@ static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr, warn_report("Many guests require at least 64kiB hpt-max-page-size"); } - spapr_check_pagesize(spapr, qemu_getrampagesize(), errp); + spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); } static bool spapr_pagesize_cb(void *opaque, uint32_t seg_pshift, @@ -609,7 +609,7 @@ static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr, uint8_t mps; if (kvmppc_hpt_needs_host_contiguous_pages()) { - mps = ctz64(qemu_getrampagesize()); + mps = ctz64(qemu_minrampagesize()); } else { mps = 34; /* allow everything up to 16GiB, i.e. everything */ } diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c index 51b272e190..d0cc06a05f 100644 --- a/hw/s390x/ipl.c +++ b/hw/s390x/ipl.c @@ -19,6 +19,7 @@ #include "hw/loader.h" #include "hw/boards.h" #include "hw/s390x/virtio-ccw.h" +#include "hw/s390x/vfio-ccw.h" #include "hw/s390x/css.h" #include "hw/s390x/ebcdic.h" #include "ipl.h" @@ -303,16 +304,36 @@ static void s390_ipl_set_boot_menu(S390IPLState *ipl) ipl->qipl.boot_menu_timeout = cpu_to_be32(splash_time); } -static CcwDevice *s390_get_ccw_device(DeviceState *dev_st) +#define CCW_DEVTYPE_NONE 0x00 +#define CCW_DEVTYPE_VIRTIO 0x01 +#define CCW_DEVTYPE_VIRTIO_NET 0x02 +#define CCW_DEVTYPE_SCSI 0x03 +#define CCW_DEVTYPE_VFIO 0x04 + +static CcwDevice *s390_get_ccw_device(DeviceState *dev_st, int *devtype) { CcwDevice *ccw_dev = NULL; + int tmp_dt = CCW_DEVTYPE_NONE; if (dev_st) { + VirtIONet *virtio_net_dev = (VirtIONet *) + object_dynamic_cast(OBJECT(dev_st), TYPE_VIRTIO_NET); VirtioCcwDevice *virtio_ccw_dev = (VirtioCcwDevice *) object_dynamic_cast(OBJECT(qdev_get_parent_bus(dev_st)->parent), TYPE_VIRTIO_CCW_DEVICE); + VFIOCCWDevice *vfio_ccw_dev = (VFIOCCWDevice *) + object_dynamic_cast(OBJECT(dev_st), TYPE_VFIO_CCW); + if (virtio_ccw_dev) { ccw_dev = CCW_DEVICE(virtio_ccw_dev); + if (virtio_net_dev) { + tmp_dt = CCW_DEVTYPE_VIRTIO_NET; + } else { + tmp_dt = CCW_DEVTYPE_VIRTIO; + } + } else if (vfio_ccw_dev) { + ccw_dev = CCW_DEVICE(vfio_ccw_dev); + tmp_dt = CCW_DEVTYPE_VFIO; } else { SCSIDevice *sd = (SCSIDevice *) object_dynamic_cast(OBJECT(dev_st), @@ -325,9 +346,13 @@ static CcwDevice *s390_get_ccw_device(DeviceState *dev_st) ccw_dev = (CcwDevice *)object_dynamic_cast(OBJECT(scsi_ccw), TYPE_CCW_DEVICE); + tmp_dt = CCW_DEVTYPE_SCSI; } } } + if (devtype) { + *devtype = tmp_dt; + } return ccw_dev; } @@ -335,20 +360,22 @@ static bool s390_gen_initial_iplb(S390IPLState *ipl) { DeviceState *dev_st; CcwDevice *ccw_dev = NULL; + SCSIDevice *sd; + int devtype; dev_st = get_boot_device(0); if (dev_st) { - ccw_dev = s390_get_ccw_device(dev_st); + ccw_dev = s390_get_ccw_device(dev_st, &devtype); } /* * Currently allow IPL only from CCW devices. */ if (ccw_dev) { - SCSIDevice *sd = (SCSIDevice *) object_dynamic_cast(OBJECT(dev_st), - TYPE_SCSI_DEVICE); - - if (sd) { + switch (devtype) { + case CCW_DEVTYPE_SCSI: + sd = (SCSIDevice *) object_dynamic_cast(OBJECT(dev_st), + TYPE_SCSI_DEVICE); ipl->iplb.len = cpu_to_be32(S390_IPLB_MIN_QEMU_SCSI_LEN); ipl->iplb.blk0_len = cpu_to_be32(S390_IPLB_MIN_QEMU_SCSI_LEN - S390_IPLB_HEADER_LEN); @@ -358,20 +385,24 @@ static bool s390_gen_initial_iplb(S390IPLState *ipl) ipl->iplb.scsi.channel = cpu_to_be16(sd->channel); ipl->iplb.scsi.devno = cpu_to_be16(ccw_dev->sch->devno); ipl->iplb.scsi.ssid = ccw_dev->sch->ssid & 3; - } else { - VirtIONet *vn = (VirtIONet *) object_dynamic_cast(OBJECT(dev_st), - TYPE_VIRTIO_NET); - + break; + case CCW_DEVTYPE_VFIO: + ipl->iplb.len = cpu_to_be32(S390_IPLB_MIN_CCW_LEN); + ipl->iplb.pbt = S390_IPL_TYPE_CCW; + ipl->iplb.ccw.devno = cpu_to_be16(ccw_dev->sch->devno); + ipl->iplb.ccw.ssid = ccw_dev->sch->ssid & 3; + break; + case CCW_DEVTYPE_VIRTIO_NET: + ipl->netboot = true; + /* Fall through to CCW_DEVTYPE_VIRTIO case */ + case CCW_DEVTYPE_VIRTIO: ipl->iplb.len = cpu_to_be32(S390_IPLB_MIN_CCW_LEN); ipl->iplb.blk0_len = cpu_to_be32(S390_IPLB_MIN_CCW_LEN - S390_IPLB_HEADER_LEN); ipl->iplb.pbt = S390_IPL_TYPE_CCW; ipl->iplb.ccw.devno = cpu_to_be16(ccw_dev->sch->devno); ipl->iplb.ccw.ssid = ccw_dev->sch->ssid & 3; - - if (vn) { - ipl->netboot = true; - } + break; } if (!s390_ipl_set_loadparm(ipl->iplb.loadparm)) { @@ -530,7 +561,7 @@ void s390_ipl_reset_request(CPUState *cs, enum s390_reset reset_type) !ipl->netboot && ipl->iplb.pbt == S390_IPL_TYPE_CCW && is_virtio_scsi_device(&ipl->iplb)) { - CcwDevice *ccw_dev = s390_get_ccw_device(get_boot_device(0)); + CcwDevice *ccw_dev = s390_get_ccw_device(get_boot_device(0), NULL); if (ccw_dev && cpu_to_be16(ccw_dev->sch->devno) == ipl->iplb.ccw.devno && diff --git a/hw/s390x/s390-ccw.c b/hw/s390x/s390-ccw.c index cad91ee626..f5f025d1b6 100644 --- a/hw/s390x/s390-ccw.c +++ b/hw/s390x/s390-ccw.c @@ -124,6 +124,14 @@ static void s390_ccw_unrealize(S390CCWDevice *cdev, Error **errp) g_free(cdev->mdevid); } +static void s390_ccw_instance_init(Object *obj) +{ + S390CCWDevice *dev = S390_CCW_DEVICE(obj); + + device_add_bootindex_property(obj, &dev->bootindex, "bootindex", + "/disk@0,0", DEVICE(obj), NULL); +} + static void s390_ccw_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -137,6 +145,7 @@ static void s390_ccw_class_init(ObjectClass *klass, void *data) static const TypeInfo s390_ccw_info = { .name = TYPE_S390_CCW, .parent = TYPE_CCW_DEVICE, + .instance_init = s390_ccw_instance_init, .instance_size = sizeof(S390CCWDevice), .class_size = sizeof(S390CCWDeviceClass), .class_init = s390_ccw_class_init, diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index d11069b860..bbc6e8fa0b 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -15,6 +15,7 @@ #include "cpu.h" #include "hw/boards.h" #include "exec/address-spaces.h" +#include "exec/ram_addr.h" #include "hw/s390x/s390-virtio-hcall.h" #include "hw/s390x/sclp.h" #include "hw/s390x/s390_flic.h" @@ -163,6 +164,7 @@ static void s390_memory_init(ram_addr_t mem_size) MemoryRegion *sysmem = get_system_memory(); ram_addr_t chunk, offset = 0; unsigned int number = 0; + Error *local_err = NULL; gchar *name; /* allocate RAM for core */ @@ -182,6 +184,15 @@ static void s390_memory_init(ram_addr_t mem_size) } g_free(name); + /* + * Configure the maximum page size. As no memory devices were created + * yet, this is the page size of initial memory only. + */ + s390_set_max_pagesize(qemu_maxrampagesize(), &local_err); + if (local_err) { + error_report_err(local_err); + exit(EXIT_FAILURE); + } /* Initialize storage key device */ s390_skeys_init(); /* Initialize storage attributes device */ @@ -253,6 +264,7 @@ static void ccw_init(MachineState *machine) DeviceState *dev; s390_sclp_init(); + /* init memory + setup max page size. Required for the CPU model */ s390_memory_init(machine->ram_size); /* init CPUs (incl. CPU model) early so s390_has_feature() works */ @@ -646,14 +658,26 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +static void ccw_machine_4_1_instance_options(MachineState *machine) +{ +} + +static void ccw_machine_4_1_class_options(MachineClass *mc) +{ +} +DEFINE_CCW_MACHINE(4_1, "4.1", true); + static void ccw_machine_4_0_instance_options(MachineState *machine) { + ccw_machine_4_1_instance_options(machine); } static void ccw_machine_4_0_class_options(MachineClass *mc) { + ccw_machine_4_1_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len); } -DEFINE_CCW_MACHINE(4_0, "4.0", true); +DEFINE_CCW_MACHINE(4_0, "4.0", false); static void ccw_machine_3_1_instance_options(MachineState *machine) { diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c index c44d13cc50..31dd3a2a87 100644 --- a/hw/vfio/ccw.c +++ b/hw/vfio/ccw.c @@ -21,12 +21,12 @@ #include "hw/vfio/vfio.h" #include "hw/vfio/vfio-common.h" #include "hw/s390x/s390-ccw.h" +#include "hw/s390x/vfio-ccw.h" #include "hw/s390x/ccw-device.h" #include "exec/address-spaces.h" #include "qemu/error-report.h" -#define TYPE_VFIO_CCW "vfio-ccw" -typedef struct VFIOCCWDevice { +struct VFIOCCWDevice { S390CCWDevice cdev; VFIODevice vdev; uint64_t io_region_size; @@ -35,7 +35,7 @@ typedef struct VFIOCCWDevice { EventNotifier io_notifier; bool force_orb_pfch; bool warned_orb_pfch; -} VFIOCCWDevice; +}; static inline void warn_once_pfch(VFIOCCWDevice *vcdev, SubchDev *sch, const char *msg) diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c index 57fe758e54..96c0ad9d9b 100644 --- a/hw/vfio/spapr.c +++ b/hw/vfio/spapr.c @@ -148,7 +148,7 @@ int vfio_spapr_create_window(VFIOContainer *container, uint64_t pagesize = memory_region_iommu_get_min_page_size(iommu_mr); unsigned entries, bits_total, bits_per_level, max_levels; struct vfio_iommu_spapr_tce_create create = { .argsz = sizeof(create) }; - long rampagesize = qemu_getrampagesize(); + long rampagesize = qemu_minrampagesize(); /* * The host might not support the guest supported IOMMU page size, diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 9ecd911c3e..139ad79390 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -73,7 +73,8 @@ static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, bool ramblock_is_pmem(RAMBlock *rb); -long qemu_getrampagesize(void); +long qemu_minrampagesize(void); +long qemu_maxrampagesize(void); /** * qemu_ram_alloc_from_file, diff --git a/include/hw/boards.h b/include/hw/boards.h index e231860666..6f7916f88f 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -57,7 +57,6 @@ void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner, #define MACHINE_CLASS(klass) \ OBJECT_CLASS_CHECK(MachineClass, (klass), TYPE_MACHINE) -MachineClass *find_default_machine(void); extern MachineState *current_machine; void machine_run_board_init(MachineState *machine); @@ -293,6 +292,9 @@ struct MachineState { } \ type_init(machine_initfn##_register_types) +extern GlobalProperty hw_compat_4_0[]; +extern const size_t hw_compat_4_0_len; + extern GlobalProperty hw_compat_3_1[]; extern const size_t hw_compat_3_1_len; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index ca65ef18af..43df7230a2 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -293,6 +293,9 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t); int e820_get_num_entries(void); bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); +extern GlobalProperty pc_compat_4_0[]; +extern const size_t pc_compat_4_0_len; + extern GlobalProperty pc_compat_3_1[]; extern const size_t pc_compat_3_1_len; diff --git a/include/hw/s390x/s390-ccw.h b/include/hw/s390x/s390-ccw.h index 7d15a1a5d4..901d805d79 100644 --- a/include/hw/s390x/s390-ccw.h +++ b/include/hw/s390x/s390-ccw.h @@ -27,6 +27,7 @@ typedef struct S390CCWDevice { CcwDevice parent_obj; CssDevId hostid; char *mdevid; + int32_t bootindex; } S390CCWDevice; typedef struct S390CCWDeviceClass { diff --git a/include/hw/s390x/vfio-ccw.h b/include/hw/s390x/vfio-ccw.h new file mode 100644 index 0000000000..ee5250d0d7 --- /dev/null +++ b/include/hw/s390x/vfio-ccw.h @@ -0,0 +1,28 @@ +/* + * vfio based subchannel assignment support + * + * Copyright 2017, 2019 IBM Corp. + * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> + * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com> + * Pierre Morel <pmorel@linux.vnet.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#ifndef HW_VFIO_CCW_H +#define HW_VFIO_CCW_H + +#include "hw/vfio/vfio-common.h" +#include "hw/s390x/s390-ccw.h" +#include "hw/s390x/ccw-device.h" + +#define TYPE_VFIO_CCW "vfio-ccw" +#define VFIO_CCW(obj) \ + OBJECT_CHECK(VFIOCCWDevice, (obj), TYPE_VFIO_CCW) + +#define TYPE_VFIO_CCW "vfio-ccw" +typedef struct VFIOCCWDevice VFIOCCWDevice; + +#endif diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h index ef04f0ed5b..eec98d82c1 100644 --- a/include/qemu/mmap-alloc.h +++ b/include/qemu/mmap-alloc.h @@ -7,7 +7,26 @@ size_t qemu_fd_getpagesize(int fd); size_t qemu_mempath_getpagesize(const char *mem_path); -void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared); +/** + * qemu_ram_mmap: mmap the specified file or device. + * + * Parameters: + * @fd: the file or the device to mmap + * @size: the number of bytes to be mmaped + * @align: if not zero, specify the alignment of the starting mapping address; + * otherwise, the alignment in use will be determined by QEMU. + * @shared: map has RAM_SHARED flag. + * @is_pmem: map has RAM_PMEM flag. + * + * Return: + * On success, return a pointer to the mapped area. + * On failure, return MAP_FAILED. + */ +void *qemu_ram_mmap(int fd, + size_t size, + size_t align, + bool shared, + bool is_pmem); void qemu_ram_munmap(int fd, void *ptr, size_t size); diff --git a/include/qom/cpu.h b/include/qom/cpu.h index e9bec3a5bc..08abcbd3fe 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -681,15 +681,15 @@ ObjectClass *cpu_class_by_name(const char *typename, const char *cpu_model); CPUState *cpu_create(const char *typename); /** - * parse_cpu_model: - * @cpu_model: The model string including optional parameters. + * parse_cpu_option: + * @cpu_option: The -cpu option including optional parameters. * * processes optional parameters and registers them as global properties * * Returns: type of CPU to create or prints error and terminates process * if an error occurred. */ -const char *parse_cpu_model(const char *cpu_model); +const char *parse_cpu_option(const char *cpu_option); /** * cpu_has_work: diff --git a/linux-headers/asm-arm/mman.h b/linux-headers/asm-arm/mman.h new file mode 100644 index 0000000000..41f99c573b --- /dev/null +++ b/linux-headers/asm-arm/mman.h @@ -0,0 +1,4 @@ +#include <asm-generic/mman.h> + +#define arch_mmap_check(addr, len, flags) \ + (((flags) & MAP_FIXED && (addr) < FIRST_USER_ADDRESS) ? -EINVAL : 0) diff --git a/linux-headers/asm-arm64/mman.h b/linux-headers/asm-arm64/mman.h new file mode 100644 index 0000000000..8eebf89f5a --- /dev/null +++ b/linux-headers/asm-arm64/mman.h @@ -0,0 +1 @@ +#include <asm-generic/mman.h> diff --git a/linux-headers/asm-generic/hugetlb_encode.h b/linux-headers/asm-generic/hugetlb_encode.h new file mode 100644 index 0000000000..b0f8e87235 --- /dev/null +++ b/linux-headers/asm-generic/hugetlb_encode.h @@ -0,0 +1,36 @@ +#ifndef _ASM_GENERIC_HUGETLB_ENCODE_H_ +#define _ASM_GENERIC_HUGETLB_ENCODE_H_ + +/* + * Several system calls take a flag to request "hugetlb" huge pages. + * Without further specification, these system calls will use the + * system's default huge page size. If a system supports multiple + * huge page sizes, the desired huge page size can be specified in + * bits [26:31] of the flag arguments. The value in these 6 bits + * will encode the log2 of the huge page size. + * + * The following definitions are associated with this huge page size + * encoding in flag arguments. System call specific header files + * that use this encoding should include this file. They can then + * provide definitions based on these with their own specific prefix. + * for example: + * #define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT + */ + +#define HUGETLB_FLAG_ENCODE_SHIFT 26 +#define HUGETLB_FLAG_ENCODE_MASK 0x3f + +#define HUGETLB_FLAG_ENCODE_64KB (16 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_512KB (19 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_1MB (20 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_32MB (25 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_512MB (29 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT) + +#endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */ diff --git a/linux-headers/asm-generic/mman-common.h b/linux-headers/asm-generic/mman-common.h new file mode 100644 index 0000000000..e7ee32861d --- /dev/null +++ b/linux-headers/asm-generic/mman-common.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __ASM_GENERIC_MMAN_COMMON_H +#define __ASM_GENERIC_MMAN_COMMON_H + +/* + Author: Michael S. Tsirkin <mst@mellanox.co.il>, Mellanox Technologies Ltd. + Based on: asm-xxx/mman.h +*/ + +#define PROT_READ 0x1 /* page can be read */ +#define PROT_WRITE 0x2 /* page can be written */ +#define PROT_EXEC 0x4 /* page can be executed */ +#define PROT_SEM 0x8 /* page may be used for atomic ops */ +#define PROT_NONE 0x0 /* page can not be accessed */ +#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ +#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ + +#define MAP_SHARED 0x01 /* Share changes */ +#define MAP_PRIVATE 0x02 /* Changes are private */ +#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ +#define MAP_TYPE 0x0f /* Mask for type of mapping */ +#define MAP_FIXED 0x10 /* Interpret addr exactly */ +#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED +# define MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be uninitialized */ +#else +# define MAP_UNINITIALIZED 0x0 /* Don't support this flag */ +#endif + +/* 0x0100 - 0x80000 flags are defined in asm-generic/mman.h */ +#define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */ + +/* + * Flags for mlock + */ +#define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */ + +#define MS_ASYNC 1 /* sync memory asynchronously */ +#define MS_INVALIDATE 2 /* invalidate the caches */ +#define MS_SYNC 4 /* synchronous memory sync */ + +#define MADV_NORMAL 0 /* no further special treatment */ +#define MADV_RANDOM 1 /* expect random page references */ +#define MADV_SEQUENTIAL 2 /* expect sequential page references */ +#define MADV_WILLNEED 3 /* will need these pages */ +#define MADV_DONTNEED 4 /* don't need these pages */ + +/* common parameters: try to keep these consistent across architectures */ +#define MADV_FREE 8 /* free pages only if memory pressure */ +#define MADV_REMOVE 9 /* remove these pages & resources */ +#define MADV_DONTFORK 10 /* don't inherit across fork */ +#define MADV_DOFORK 11 /* do inherit across fork */ +#define MADV_HWPOISON 100 /* poison a page for testing */ +#define MADV_SOFT_OFFLINE 101 /* soft offline page for testing */ + +#define MADV_MERGEABLE 12 /* KSM may merge identical pages */ +#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */ + +#define MADV_HUGEPAGE 14 /* Worth backing with hugepages */ +#define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */ + +#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump, + overrides the coredump filter bits */ +#define MADV_DODUMP 17 /* Clear the MADV_DONTDUMP flag */ + +#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */ +#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ + +/* compatibility flags */ +#define MAP_FILE 0 + +#define PKEY_DISABLE_ACCESS 0x1 +#define PKEY_DISABLE_WRITE 0x2 +#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ + PKEY_DISABLE_WRITE) + +#endif /* __ASM_GENERIC_MMAN_COMMON_H */ diff --git a/linux-headers/asm-generic/mman.h b/linux-headers/asm-generic/mman.h new file mode 100644 index 0000000000..653687d977 --- /dev/null +++ b/linux-headers/asm-generic/mman.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __ASM_GENERIC_MMAN_H +#define __ASM_GENERIC_MMAN_H + +#include <asm-generic/mman-common.h> + +#define MAP_GROWSDOWN 0x0100 /* stack-like segment */ +#define MAP_DENYWRITE 0x0800 /* ETXTBSY */ +#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ +#define MAP_LOCKED 0x2000 /* pages are locked */ +#define MAP_NORESERVE 0x4000 /* don't check for reservations */ +#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ +#define MAP_NONBLOCK 0x10000 /* do not block on IO */ +#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ +#define MAP_SYNC 0x80000 /* perform synchronous page faults for the mapping */ + +/* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */ + +#define MCL_CURRENT 1 /* lock all current mappings */ +#define MCL_FUTURE 2 /* lock all future mappings */ +#define MCL_ONFAULT 4 /* lock all pages that are faulted in */ + +#endif /* __ASM_GENERIC_MMAN_H */ diff --git a/linux-headers/asm-mips/mman.h b/linux-headers/asm-mips/mman.h new file mode 100644 index 0000000000..3035ca499c --- /dev/null +++ b/linux-headers/asm-mips/mman.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1995, 1999, 2002 by Ralf Baechle + */ +#ifndef _ASM_MMAN_H +#define _ASM_MMAN_H + +/* + * Protections are chosen from these bits, OR'd together. The + * implementation does not necessarily support PROT_EXEC or PROT_WRITE + * without PROT_READ. The only guarantees are that no writing will be + * allowed without PROT_WRITE and no access will be allowed for PROT_NONE. + */ +#define PROT_NONE 0x00 /* page can not be accessed */ +#define PROT_READ 0x01 /* page can be read */ +#define PROT_WRITE 0x02 /* page can be written */ +#define PROT_EXEC 0x04 /* page can be executed */ +/* 0x08 reserved for PROT_EXEC_NOFLUSH */ +#define PROT_SEM 0x10 /* page may be used for atomic ops */ +#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ +#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ + +/* + * Flags for mmap + */ +#define MAP_SHARED 0x001 /* Share changes */ +#define MAP_PRIVATE 0x002 /* Changes are private */ +#define MAP_SHARED_VALIDATE 0x003 /* share + validate extension flags */ +#define MAP_TYPE 0x00f /* Mask for type of mapping */ +#define MAP_FIXED 0x010 /* Interpret addr exactly */ + +/* not used by linux, but here to make sure we don't clash with ABI defines */ +#define MAP_RENAME 0x020 /* Assign page to file */ +#define MAP_AUTOGROW 0x040 /* File may grow by writing */ +#define MAP_LOCAL 0x080 /* Copy on fork/sproc */ +#define MAP_AUTORSRV 0x100 /* Logical swap reserved on demand */ + +/* These are linux-specific */ +#define MAP_NORESERVE 0x0400 /* don't check for reservations */ +#define MAP_ANONYMOUS 0x0800 /* don't use a file */ +#define MAP_GROWSDOWN 0x1000 /* stack-like segment */ +#define MAP_DENYWRITE 0x2000 /* ETXTBSY */ +#define MAP_EXECUTABLE 0x4000 /* mark it as an executable */ +#define MAP_LOCKED 0x8000 /* pages are locked */ +#define MAP_POPULATE 0x10000 /* populate (prefault) pagetables */ +#define MAP_NONBLOCK 0x20000 /* do not block on IO */ +#define MAP_STACK 0x40000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x80000 /* create a huge page mapping */ +#define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */ + +/* + * Flags for msync + */ +#define MS_ASYNC 0x0001 /* sync memory asynchronously */ +#define MS_INVALIDATE 0x0002 /* invalidate mappings & caches */ +#define MS_SYNC 0x0004 /* synchronous memory sync */ + +/* + * Flags for mlockall + */ +#define MCL_CURRENT 1 /* lock all current mappings */ +#define MCL_FUTURE 2 /* lock all future mappings */ +#define MCL_ONFAULT 4 /* lock all pages that are faulted in */ + +/* + * Flags for mlock + */ +#define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */ + +#define MADV_NORMAL 0 /* no further special treatment */ +#define MADV_RANDOM 1 /* expect random page references */ +#define MADV_SEQUENTIAL 2 /* expect sequential page references */ +#define MADV_WILLNEED 3 /* will need these pages */ +#define MADV_DONTNEED 4 /* don't need these pages */ + +/* common parameters: try to keep these consistent across architectures */ +#define MADV_FREE 8 /* free pages only if memory pressure */ +#define MADV_REMOVE 9 /* remove these pages & resources */ +#define MADV_DONTFORK 10 /* don't inherit across fork */ +#define MADV_DOFORK 11 /* do inherit across fork */ + +#define MADV_MERGEABLE 12 /* KSM may merge identical pages */ +#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */ +#define MADV_HWPOISON 100 /* poison a page for testing */ + +#define MADV_HUGEPAGE 14 /* Worth backing with hugepages */ +#define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */ + +#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump, + overrides the coredump filter bits */ +#define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */ + +#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */ +#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ + +/* compatibility flags */ +#define MAP_FILE 0 + +#define PKEY_DISABLE_ACCESS 0x1 +#define PKEY_DISABLE_WRITE 0x2 +#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ + PKEY_DISABLE_WRITE) + +#endif /* _ASM_MMAN_H */ diff --git a/linux-headers/asm-powerpc/mman.h b/linux-headers/asm-powerpc/mman.h new file mode 100644 index 0000000000..1c2b3fca05 --- /dev/null +++ b/linux-headers/asm-powerpc/mman.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _ASM_POWERPC_MMAN_H +#define _ASM_POWERPC_MMAN_H + +#include <asm-generic/mman-common.h> + + +#define PROT_SAO 0x10 /* Strong Access Ordering */ + +#define MAP_RENAME MAP_ANONYMOUS /* In SunOS terminology */ +#define MAP_NORESERVE 0x40 /* don't reserve swap pages */ +#define MAP_LOCKED 0x80 + +#define MAP_GROWSDOWN 0x0100 /* stack-like segment */ +#define MAP_DENYWRITE 0x0800 /* ETXTBSY */ +#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ + +#define MCL_CURRENT 0x2000 /* lock all currently mapped pages */ +#define MCL_FUTURE 0x4000 /* lock all additions to address space */ +#define MCL_ONFAULT 0x8000 /* lock all pages that are faulted in */ + +#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ +#define MAP_NONBLOCK 0x10000 /* do not block on IO */ +#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ +#define MAP_HUGETLB 0x40000 /* create a huge page mapping */ + +/* Override any generic PKEY permission defines */ +#define PKEY_DISABLE_EXECUTE 0x4 +#undef PKEY_ACCESS_MASK +#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ + PKEY_DISABLE_WRITE |\ + PKEY_DISABLE_EXECUTE) +#endif /* _ASM_POWERPC_MMAN_H */ diff --git a/linux-headers/asm-s390/mman.h b/linux-headers/asm-s390/mman.h new file mode 100644 index 0000000000..8eebf89f5a --- /dev/null +++ b/linux-headers/asm-s390/mman.h @@ -0,0 +1 @@ +#include <asm-generic/mman.h> diff --git a/linux-headers/asm-x86/mman.h b/linux-headers/asm-x86/mman.h new file mode 100644 index 0000000000..d4a8d0424b --- /dev/null +++ b/linux-headers/asm-x86/mman.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_X86_MMAN_H +#define _ASM_X86_MMAN_H + +#define MAP_32BIT 0x40 /* only give out 32bit addresses */ + +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS +/* + * Take the 4 protection key bits out of the vma->vm_flags + * value and turn them in to the bits that we can put in + * to a pte. + * + * Only override these if Protection Keys are available + * (which is only on 64-bit). + */ +#define arch_vm_get_page_prot(vm_flags) __pgprot( \ + ((vm_flags) & VM_PKEY_BIT0 ? _PAGE_PKEY_BIT0 : 0) | \ + ((vm_flags) & VM_PKEY_BIT1 ? _PAGE_PKEY_BIT1 : 0) | \ + ((vm_flags) & VM_PKEY_BIT2 ? _PAGE_PKEY_BIT2 : 0) | \ + ((vm_flags) & VM_PKEY_BIT3 ? _PAGE_PKEY_BIT3 : 0)) + +#define arch_calc_vm_prot_bits(prot, key) ( \ + ((key) & 0x1 ? VM_PKEY_BIT0 : 0) | \ + ((key) & 0x2 ? VM_PKEY_BIT1 : 0) | \ + ((key) & 0x4 ? VM_PKEY_BIT2 : 0) | \ + ((key) & 0x8 ? VM_PKEY_BIT3 : 0)) +#endif + +#include <asm-generic/mman.h> + +#endif /* _ASM_X86_MMAN_H */ diff --git a/linux-headers/linux/mman.h b/linux-headers/linux/mman.h new file mode 100644 index 0000000000..3c44b6f480 --- /dev/null +++ b/linux-headers/linux/mman.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_MMAN_H +#define _LINUX_MMAN_H + +#include <asm/mman.h> +#include <asm-generic/hugetlb_encode.h> + +#define MREMAP_MAYMOVE 1 +#define MREMAP_FIXED 2 + +#define OVERCOMMIT_GUESS 0 +#define OVERCOMMIT_ALWAYS 1 +#define OVERCOMMIT_NEVER 2 + +/* + * Huge page size encoding when MAP_HUGETLB is specified, and a huge page + * size other than the default is desired. See hugetlb_encode.h. + * All known huge page size encodings are provided here. It is the + * responsibility of the application to know which sizes are supported on + * the running system. See mmap(2) man page for details. + */ +#define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT +#define MAP_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK + +#define MAP_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB +#define MAP_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB +#define MAP_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB +#define MAP_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB +#define MAP_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB +#define MAP_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB +#define MAP_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB +#define MAP_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB +#define MAP_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB +#define MAP_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB +#define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB +#define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB + +#endif /* _LINUX_MMAN_H */ diff --git a/linux-user/main.c b/linux-user/main.c index 17387166ab..3d2230320b 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -662,7 +662,7 @@ int main(int argc, char **argv, char **envp) if (cpu_model == NULL) { cpu_model = cpu_get_model(get_elf_eflags(execfd)); } - cpu_type = parse_cpu_model(cpu_model); + cpu_type = parse_cpu_option(cpu_model); /* init tcg before creating CPUs and to get qemu_host_page_size */ tcg_exec_init(0); diff --git a/pc-bios/s390-ccw.img b/pc-bios/s390-ccw.img index 450a076dc0..ba054828d3 100644 --- a/pc-bios/s390-ccw.img +++ b/pc-bios/s390-ccw.img Binary files differdiff --git a/pc-bios/s390-ccw/Makefile b/pc-bios/s390-ccw/Makefile index 1eb316b02f..a048b6b077 100644 --- a/pc-bios/s390-ccw/Makefile +++ b/pc-bios/s390-ccw/Makefile @@ -10,7 +10,7 @@ $(call set-vpath, $(SRC_PATH)/pc-bios/s390-ccw) .PHONY : all clean build-all OBJECTS = start.o main.o bootmap.o jump2ipl.o sclp.o menu.o \ - virtio.o virtio-scsi.o virtio-blkdev.o libc.o + virtio.o virtio-scsi.o virtio-blkdev.o libc.o cio.o dasd-ipl.o QEMU_CFLAGS := $(filter -W%, $(QEMU_CFLAGS)) QEMU_CFLAGS += -ffreestanding -fno-delete-null-pointer-checks -msoft-float diff --git a/pc-bios/s390-ccw/cio.c b/pc-bios/s390-ccw/cio.c new file mode 100644 index 0000000000..339ec5fbe7 --- /dev/null +++ b/pc-bios/s390-ccw/cio.c @@ -0,0 +1,423 @@ +/* + * S390 Channel I/O + * + * Copyright (c) 2013 Alexander Graf <agraf@suse.de> + * Copyright (c) 2019 IBM Corp. + * + * Author(s): Jason J. Herne <jjherne@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#include "libc.h" +#include "s390-ccw.h" +#include "s390-arch.h" +#include "helper.h" +#include "cio.h" + +static char chsc_page[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); + +static int __do_cio(SubChannelId schid, uint32_t ccw_addr, int fmt, Irb *irb); + +int enable_mss_facility(void) +{ + int ret; + ChscAreaSda *sda_area = (ChscAreaSda *) chsc_page; + + memset(sda_area, 0, PAGE_SIZE); + sda_area->request.length = 0x0400; + sda_area->request.code = 0x0031; + sda_area->operation_code = 0x2; + + ret = chsc(sda_area); + if ((ret == 0) && (sda_area->response.code == 0x0001)) { + return 0; + } + return -EIO; +} + +void enable_subchannel(SubChannelId schid) +{ + Schib schib; + + stsch_err(schid, &schib); + schib.pmcw.ena = 1; + msch(schid, &schib); +} + +uint16_t cu_type(SubChannelId schid) +{ + Ccw1 sense_id_ccw; + SenseId sense_data; + + sense_id_ccw.cmd_code = CCW_CMD_SENSE_ID; + sense_id_ccw.cda = ptr2u32(&sense_data); + sense_id_ccw.count = sizeof(sense_data); + sense_id_ccw.flags |= CCW_FLAG_SLI; + + if (do_cio(schid, CU_TYPE_UNKNOWN, ptr2u32(&sense_id_ccw), CCW_FMT1)) { + panic("Failed to run SenseID CCw\n"); + } + + return sense_data.cu_type; +} + +int basic_sense(SubChannelId schid, uint16_t cutype, void *sense_data, + uint16_t data_size) +{ + Ccw1 senseCcw; + Irb irb; + + senseCcw.cmd_code = CCW_CMD_BASIC_SENSE; + senseCcw.cda = ptr2u32(sense_data); + senseCcw.count = data_size; + + return __do_cio(schid, ptr2u32(&senseCcw), CCW_FMT1, &irb); +} + +static bool irb_error(Irb *irb) +{ + if (irb->scsw.cstat) { + return true; + } + return irb->scsw.dstat != (SCSW_DSTAT_DEVEND | SCSW_DSTAT_CHEND); +} + +static void print_eckd_dasd_sense_data(SenseDataEckdDasd *sd) +{ + char msgline[512]; + + if (sd->config_info & 0x8000) { + sclp_print("Eckd Dasd Sense Data (fmt 24-bytes):\n"); + } else { + sclp_print("Eckd Dasd Sense Data (fmt 32-bytes):\n"); + } + + strcat(msgline, " Sense Condition Flags :"); + if (sd->common_status & SNS_STAT0_CMD_REJECT) { + strcat(msgline, " [Cmd-Reject]"); + } + if (sd->common_status & SNS_STAT0_INTERVENTION_REQ) { + strcat(msgline, " [Intervention-Required]"); + } + if (sd->common_status & SNS_STAT0_BUS_OUT_CHECK) { + strcat(msgline, " [Bus-Out-Parity-Check]"); + } + if (sd->common_status & SNS_STAT0_EQUIPMENT_CHECK) { + strcat(msgline, " [Equipment-Check]"); + } + if (sd->common_status & SNS_STAT0_DATA_CHECK) { + strcat(msgline, " [Data-Check]"); + } + if (sd->common_status & SNS_STAT0_OVERRUN) { + strcat(msgline, " [Overrun]"); + } + if (sd->common_status & SNS_STAT0_INCOMPL_DOMAIN) { + strcat(msgline, " [Incomplete-Domain]"); + } + + if (sd->status[0] & SNS_STAT1_PERM_ERR) { + strcat(msgline, " [Permanent-Error]"); + } + if (sd->status[0] & SNS_STAT1_INV_TRACK_FORMAT) { + strcat(msgline, " [Invalid-Track-Fmt]"); + } + if (sd->status[0] & SNS_STAT1_EOC) { + strcat(msgline, " [End-of-Cyl]"); + } + if (sd->status[0] & SNS_STAT1_MESSAGE_TO_OPER) { + strcat(msgline, " [Operator-Msg]"); + } + if (sd->status[0] & SNS_STAT1_NO_REC_FOUND) { + strcat(msgline, " [No-Record-Found]"); + } + if (sd->status[0] & SNS_STAT1_FILE_PROTECTED) { + strcat(msgline, " [File-Protected]"); + } + if (sd->status[0] & SNS_STAT1_WRITE_INHIBITED) { + strcat(msgline, " [Write-Inhibited]"); + } + if (sd->status[0] & SNS_STAT1_IMPRECISE_END) { + strcat(msgline, " [Imprecise-Ending]"); + } + + if (sd->status[1] & SNS_STAT2_REQ_INH_WRITE) { + strcat(msgline, " [Req-Inhibit-Write]"); + } + if (sd->status[1] & SNS_STAT2_CORRECTABLE) { + strcat(msgline, " [Correctable-Data-Check]"); + } + if (sd->status[1] & SNS_STAT2_FIRST_LOG_ERR) { + strcat(msgline, " [First-Error-Log]"); + } + if (sd->status[1] & SNS_STAT2_ENV_DATA_PRESENT) { + strcat(msgline, " [Env-Data-Present]"); + } + if (sd->status[1] & SNS_STAT2_IMPRECISE_END) { + strcat(msgline, " [Imprecise-End]"); + } + strcat(msgline, "\n"); + sclp_print(msgline); + + print_int(" Residual Count =", sd->res_count); + print_int(" Phys Drive ID =", sd->phys_drive_id); + print_int(" low cyl address =", sd->low_cyl_addr); + print_int(" head addr & hi cyl =", sd->head_high_cyl_addr); + print_int(" format/message =", sd->fmt_msg); + print_int(" fmt-dependent[0-7] =", sd->fmt_dependent_info[0]); + print_int(" fmt-dependent[8-15]=", sd->fmt_dependent_info[1]); + print_int(" prog action code =", sd->program_action_code); + print_int(" Configuration info =", sd->config_info); + print_int(" mcode / hi-cyl =", sd->mcode_hicyl); + print_int(" cyl & head addr [0]=", sd->cyl_head_addr[0]); + print_int(" cyl & head addr [1]=", sd->cyl_head_addr[1]); + print_int(" cyl & head addr [2]=", sd->cyl_head_addr[2]); +} + +static void print_irb_err(Irb *irb) +{ + uint64_t this_ccw = *(uint64_t *)u32toptr(irb->scsw.cpa); + uint64_t prev_ccw = *(uint64_t *)u32toptr(irb->scsw.cpa - 8); + char msgline[256]; + + sclp_print("Interrupt Response Block Data:\n"); + + strcat(msgline, " Function Ctrl :"); + if (irb->scsw.ctrl & SCSW_FCTL_START_FUNC) { + strcat(msgline, " [Start]"); + } + if (irb->scsw.ctrl & SCSW_FCTL_HALT_FUNC) { + strcat(msgline, " [Halt]"); + } + if (irb->scsw.ctrl & SCSW_FCTL_CLEAR_FUNC) { + strcat(msgline, " [Clear]"); + } + strcat(msgline, "\n"); + sclp_print(msgline); + + msgline[0] = '\0'; + strcat(msgline, " Activity Ctrl :"); + if (irb->scsw.ctrl & SCSW_ACTL_RESUME_PEND) { + strcat(msgline, " [Resume-Pending]"); + } + if (irb->scsw.ctrl & SCSW_ACTL_START_PEND) { + strcat(msgline, " [Start-Pending]"); + } + if (irb->scsw.ctrl & SCSW_ACTL_HALT_PEND) { + strcat(msgline, " [Halt-Pending]"); + } + if (irb->scsw.ctrl & SCSW_ACTL_CLEAR_PEND) { + strcat(msgline, " [Clear-Pending]"); + } + if (irb->scsw.ctrl & SCSW_ACTL_CH_ACTIVE) { + strcat(msgline, " [Channel-Active]"); + } + if (irb->scsw.ctrl & SCSW_ACTL_DEV_ACTIVE) { + strcat(msgline, " [Device-Active]"); + } + if (irb->scsw.ctrl & SCSW_ACTL_SUSPENDED) { + strcat(msgline, " [Suspended]"); + } + strcat(msgline, "\n"); + sclp_print(msgline); + + msgline[0] = '\0'; + strcat(msgline, " Status Ctrl :"); + if (irb->scsw.ctrl & SCSW_SCTL_ALERT) { + strcat(msgline, " [Alert]"); + } + if (irb->scsw.ctrl & SCSW_SCTL_INTERMED) { + strcat(msgline, " [Intermediate]"); + } + if (irb->scsw.ctrl & SCSW_SCTL_PRIMARY) { + strcat(msgline, " [Primary]"); + } + if (irb->scsw.ctrl & SCSW_SCTL_SECONDARY) { + strcat(msgline, " [Secondary]"); + } + if (irb->scsw.ctrl & SCSW_SCTL_STATUS_PEND) { + strcat(msgline, " [Status-Pending]"); + } + + strcat(msgline, "\n"); + sclp_print(msgline); + + msgline[0] = '\0'; + strcat(msgline, " Device Status :"); + if (irb->scsw.dstat & SCSW_DSTAT_ATTN) { + strcat(msgline, " [Attention]"); + } + if (irb->scsw.dstat & SCSW_DSTAT_STATMOD) { + strcat(msgline, " [Status-Modifier]"); + } + if (irb->scsw.dstat & SCSW_DSTAT_CUEND) { + strcat(msgline, " [Ctrl-Unit-End]"); + } + if (irb->scsw.dstat & SCSW_DSTAT_BUSY) { + strcat(msgline, " [Busy]"); + } + if (irb->scsw.dstat & SCSW_DSTAT_CHEND) { + strcat(msgline, " [Channel-End]"); + } + if (irb->scsw.dstat & SCSW_DSTAT_DEVEND) { + strcat(msgline, " [Device-End]"); + } + if (irb->scsw.dstat & SCSW_DSTAT_UCHK) { + strcat(msgline, " [Unit-Check]"); + } + if (irb->scsw.dstat & SCSW_DSTAT_UEXCP) { + strcat(msgline, " [Unit-Exception]"); + } + strcat(msgline, "\n"); + sclp_print(msgline); + + msgline[0] = '\0'; + strcat(msgline, " Channel Status :"); + if (irb->scsw.cstat & SCSW_CSTAT_PCINT) { + strcat(msgline, " [Program-Ctrl-Interruption]"); + } + if (irb->scsw.cstat & SCSW_CSTAT_BADLEN) { + strcat(msgline, " [Incorrect-Length]"); + } + if (irb->scsw.cstat & SCSW_CSTAT_PROGCHK) { + strcat(msgline, " [Program-Check]"); + } + if (irb->scsw.cstat & SCSW_CSTAT_PROTCHK) { + strcat(msgline, " [Protection-Check]"); + } + if (irb->scsw.cstat & SCSW_CSTAT_CHDCHK) { + strcat(msgline, " [Channel-Data-Check]"); + } + if (irb->scsw.cstat & SCSW_CSTAT_CHCCHK) { + strcat(msgline, " [Channel-Ctrl-Check]"); + } + if (irb->scsw.cstat & SCSW_CSTAT_ICCHK) { + strcat(msgline, " [Interface-Ctrl-Check]"); + } + if (irb->scsw.cstat & SCSW_CSTAT_CHAINCHK) { + strcat(msgline, " [Chaining-Check]"); + } + strcat(msgline, "\n"); + sclp_print(msgline); + + print_int(" cpa=", irb->scsw.cpa); + print_int(" prev_ccw=", prev_ccw); + print_int(" this_ccw=", this_ccw); +} + +/* + * Handles executing ssch, tsch and returns the irb obtained from tsch. + * Returns 0 on success, -1 if unexpected status pending and we need to retry, + * otherwise returns condition code from ssch/tsch for error cases. + */ +static int __do_cio(SubChannelId schid, uint32_t ccw_addr, int fmt, Irb *irb) +{ + CmdOrb orb = {}; + int rc; + + IPL_assert(fmt == 0 || fmt == 1, "Invalid ccw format"); + + /* ccw_addr must be <= 24 bits and point to at least one whole ccw. */ + if (fmt == 0) { + IPL_assert(ccw_addr <= 0xFFFFFF - 8, "Invalid ccw address"); + } + + orb.fmt = fmt; + orb.pfch = 1; /* QEMU's cio implementation requires prefetch */ + orb.c64 = 1; /* QEMU's cio implementation requires 64-bit idaws */ + orb.lpm = 0xFF; /* All paths allowed */ + orb.cpa = ccw_addr; + + rc = ssch(schid, &orb); + if (rc == 1 || rc == 2) { + /* Subchannel status pending or busy. Eat status and ask for retry. */ + tsch(schid, irb); + return -1; + } + if (rc) { + print_int("ssch failed with cc=", rc); + return rc; + } + + consume_io_int(); + + /* collect status */ + rc = tsch(schid, irb); + if (rc) { + print_int("tsch failed with cc=", rc); + } + + return rc; +} + +/* + * Executes a channel program at a given subchannel. The request to run the + * channel program is sent to the subchannel, we then wait for the interrupt + * signaling completion of the I/O operation(s) performed by the channel + * program. Lastly we verify that the i/o operation completed without error and + * that the interrupt we received was for the subchannel used to run the + * channel program. + * + * Note: This function assumes it is running in an environment where no other + * cpus are generating or receiving I/O interrupts. So either run it in a + * single-cpu environment or make sure all other cpus are not doing I/O and + * have I/O interrupts masked off. We also assume that only one device is + * active (generating i/o interrupts). + * + * Returns non-zero on error. + */ +int do_cio(SubChannelId schid, uint16_t cutype, uint32_t ccw_addr, int fmt) +{ + Irb irb = {}; + SenseDataEckdDasd sd; + int rc, retries = 0; + + while (true) { + rc = __do_cio(schid, ccw_addr, fmt, &irb); + + if (rc == -1) { + retries++; + continue; + } + if (rc) { + /* ssch/tsch error. Message already reported by __do_cio */ + break; + } + + if (!irb_error(&irb)) { + break; + } + + /* + * Unexpected unit check, or interface-control-check. Use sense to + * clear (unit check only) then retry. + */ + if ((unit_check(&irb) || iface_ctrl_check(&irb)) && retries <= 2) { + if (unit_check(&irb)) { + basic_sense(schid, cutype, &sd, sizeof(sd)); + } + retries++; + continue; + } + + sclp_print("cio device error\n"); + print_int(" ssid ", schid.ssid); + print_int(" cssid ", schid.cssid); + print_int(" sch_no", schid.sch_no); + print_int(" ctrl-unit type", cutype); + sclp_print("\n"); + print_irb_err(&irb); + if (cutype == CU_TYPE_DASD_3990 || cutype == CU_TYPE_DASD_2107 || + cutype == CU_TYPE_UNKNOWN) { + if (!basic_sense(schid, cutype, &sd, sizeof(sd))) { + print_eckd_dasd_sense_data(&sd); + } + } + rc = -1; + break; + } + + return rc; +} diff --git a/pc-bios/s390-ccw/cio.h b/pc-bios/s390-ccw/cio.h index 1a0795f645..aaa432dedd 100644 --- a/pc-bios/s390-ccw/cio.h +++ b/pc-bios/s390-ccw/cio.h @@ -17,35 +17,35 @@ * path management control word */ struct pmcw { - __u32 intparm; /* interruption parameter */ - __u32 qf : 1; /* qdio facility */ - __u32 w : 1; - __u32 isc : 3; /* interruption sublass */ - __u32 res5 : 3; /* reserved zeros */ - __u32 ena : 1; /* enabled */ - __u32 lm : 2; /* limit mode */ - __u32 mme : 2; /* measurement-mode enable */ - __u32 mp : 1; /* multipath mode */ - __u32 tf : 1; /* timing facility */ - __u32 dnv : 1; /* device number valid */ - __u32 dev : 16; /* device number */ - __u8 lpm; /* logical path mask */ - __u8 pnom; /* path not operational mask */ - __u8 lpum; /* last path used mask */ - __u8 pim; /* path installed mask */ - __u16 mbi; /* measurement-block index */ - __u8 pom; /* path operational mask */ - __u8 pam; /* path available mask */ - __u8 chpid[8]; /* CHPID 0-7 (if available) */ - __u32 unused1 : 8; /* reserved zeros */ - __u32 st : 3; /* subchannel type */ - __u32 unused2 : 18; /* reserved zeros */ - __u32 mbfc : 1; /* measurement block format control */ - __u32 xmwme : 1; /* extended measurement word mode enable */ - __u32 csense : 1; /* concurrent sense; can be enabled ...*/ - /* ... per MSCH, however, if facility */ - /* ... is not installed, this results */ - /* ... in an operand exception. */ + __u32 intparm; /* interruption parameter */ + __u32 qf:1; /* qdio facility */ + __u32 w:1; + __u32 isc:3; /* interruption sublass */ + __u32 res5:3; /* reserved zeros */ + __u32 ena:1; /* enabled */ + __u32 lm:2; /* limit mode */ + __u32 mme:2; /* measurement-mode enable */ + __u32 mp:1; /* multipath mode */ + __u32 tf:1; /* timing facility */ + __u32 dnv:1; /* device number valid */ + __u32 dev:16; /* device number */ + __u8 lpm; /* logical path mask */ + __u8 pnom; /* path not operational mask */ + __u8 lpum; /* last path used mask */ + __u8 pim; /* path installed mask */ + __u16 mbi; /* measurement-block index */ + __u8 pom; /* path operational mask */ + __u8 pam; /* path available mask */ + __u8 chpid[8]; /* CHPID 0-7 (if available) */ + __u32 unused1:8; /* reserved zeros */ + __u32 st:3; /* subchannel type */ + __u32 unused2:18; /* reserved zeros */ + __u32 mbfc:1; /* measurement block format control */ + __u32 xmwme:1; /* extended measurement word mode enable */ + __u32 csense:1; /* concurrent sense; can be enabled ...*/ + /* ... per MSCH, however, if facility */ + /* ... is not installed, this results */ + /* ... in an operand exception. */ } __attribute__ ((packed)); /* Target SCHIB configuration. */ @@ -70,35 +70,72 @@ struct scsw { __u16 count; } __attribute__ ((packed)); -#define SCSW_FCTL_CLEAR_FUNC 0x1000 -#define SCSW_FCTL_HALT_FUNC 0x2000 +/* Function Control */ #define SCSW_FCTL_START_FUNC 0x4000 +#define SCSW_FCTL_HALT_FUNC 0x2000 +#define SCSW_FCTL_CLEAR_FUNC 0x1000 + +/* Activity Control */ +#define SCSW_ACTL_RESUME_PEND 0x0800 +#define SCSW_ACTL_START_PEND 0x0400 +#define SCSW_ACTL_HALT_PEND 0x0200 +#define SCSW_ACTL_CLEAR_PEND 0x0100 +#define SCSW_ACTL_CH_ACTIVE 0x0080 +#define SCSW_ACTL_DEV_ACTIVE 0x0040 +#define SCSW_ACTL_SUSPENDED 0x0020 + +/* Status Control */ +#define SCSW_SCTL_ALERT 0x0010 +#define SCSW_SCTL_INTERMED 0x0008 +#define SCSW_SCTL_PRIMARY 0x0004 +#define SCSW_SCTL_SECONDARY 0x0002 +#define SCSW_SCTL_STATUS_PEND 0x0001 + +/* SCSW Device Status Flags */ +#define SCSW_DSTAT_ATTN 0x80 +#define SCSW_DSTAT_STATMOD 0x40 +#define SCSW_DSTAT_CUEND 0x20 +#define SCSW_DSTAT_BUSY 0x10 +#define SCSW_DSTAT_CHEND 0x08 +#define SCSW_DSTAT_DEVEND 0x04 +#define SCSW_DSTAT_UCHK 0x02 +#define SCSW_DSTAT_UEXCP 0x01 + +/* SCSW Subchannel Status Flags */ +#define SCSW_CSTAT_PCINT 0x80 +#define SCSW_CSTAT_BADLEN 0x40 +#define SCSW_CSTAT_PROGCHK 0x20 +#define SCSW_CSTAT_PROTCHK 0x10 +#define SCSW_CSTAT_CHDCHK 0x08 +#define SCSW_CSTAT_CHCCHK 0x04 +#define SCSW_CSTAT_ICCHK 0x02 +#define SCSW_CSTAT_CHAINCHK 0x01 /* * subchannel information block */ -struct schib { +typedef struct schib { struct pmcw pmcw; /* path management control word */ struct scsw scsw; /* subchannel status word */ __u64 mba; /* measurement block address */ __u8 mda[4]; /* model dependent area */ -} __attribute__ ((packed,aligned(4))); - -struct subchannel_id { - __u32 cssid : 8; - __u32 : 4; - __u32 m : 1; - __u32 ssid : 2; - __u32 one : 1; - __u32 sch_no : 16; -} __attribute__ ((packed, aligned(4))); +} __attribute__ ((packed, aligned(4))) Schib; + +typedef struct subchannel_id { + __u32 cssid:8; + __u32:4; + __u32 m:1; + __u32 ssid:2; + __u32 one:1; + __u32 sch_no:16; +} __attribute__ ((packed, aligned(4))) SubChannelId; struct chsc_header { __u16 length; __u16 code; } __attribute__((packed)); -struct chsc_area_sda { +typedef struct chsc_area_sda { struct chsc_header request; __u8 reserved1:4; __u8 format:4; @@ -111,29 +148,49 @@ struct chsc_area_sda { __u32 reserved5:4; __u32 format2:4; __u32 reserved6:24; -} __attribute__((packed)); +} __attribute__((packed)) ChscAreaSda; /* * TPI info structure */ struct tpi_info { struct subchannel_id schid; - __u32 intparm; /* interruption parameter */ - __u32 adapter_IO : 1; - __u32 reserved2 : 1; - __u32 isc : 3; - __u32 reserved3 : 12; - __u32 int_type : 3; - __u32 reserved4 : 12; + __u32 intparm; /* interruption parameter */ + __u32 adapter_IO:1; + __u32 reserved2:1; + __u32 isc:3; + __u32 reserved3:12; + __u32 int_type:3; + __u32 reserved4:12; } __attribute__ ((packed, aligned(4))); -/* channel command word (type 1) */ -struct ccw1 { +/* channel command word (format 0) */ +typedef struct ccw0 { + __u8 cmd_code; + __u32 cda:24; + __u32 chainData:1; + __u32 chain:1; + __u32 sli:1; + __u32 skip:1; + __u32 pci:1; + __u32 ida:1; + __u32 suspend:1; + __u32 mida:1; + __u8 reserved; + __u16 count; +} __attribute__ ((packed, aligned(8))) Ccw0; + +/* channel command word (format 1) */ +typedef struct ccw1 { __u8 cmd_code; __u8 flags; __u16 count; __u32 cda; -} __attribute__ ((packed, aligned(8))); +} __attribute__ ((packed, aligned(8))) Ccw1; + +/* do_cio() CCW formats */ +#define CCW_FMT0 0x00 +#define CCW_FMT1 0x01 #define CCW_FLAG_DC 0x80 #define CCW_FLAG_CC 0x40 @@ -143,11 +200,14 @@ struct ccw1 { #define CCW_FLAG_IDA 0x04 #define CCW_FLAG_SUSPEND 0x02 +/* Common CCW commands */ +#define CCW_CMD_READ_IPL 0x02 #define CCW_CMD_NOOP 0x03 #define CCW_CMD_BASIC_SENSE 0x04 #define CCW_CMD_TIC 0x08 #define CCW_CMD_SENSE_ID 0xe4 +/* Virtio CCW commands */ #define CCW_CMD_SET_VQ 0x13 #define CCW_CMD_VDEV_RESET 0x33 #define CCW_CMD_READ_FEAT 0x12 @@ -159,10 +219,16 @@ struct ccw1 { #define CCW_CMD_SET_CONF_IND 0x53 #define CCW_CMD_READ_VQ_CONF 0x32 +/* DASD CCW commands */ +#define CCW_CMD_DASD_READ 0x06 +#define CCW_CMD_DASD_SEEK 0x07 +#define CCW_CMD_DASD_SEARCH_ID_EQ 0x31 +#define CCW_CMD_DASD_READ_MT 0x86 + /* * Command-mode operation request block */ -struct cmd_orb { +typedef struct cmd_orb { __u32 intparm; /* interruption parameter */ __u32 key:4; /* flags, like key, suspend control, etc. */ __u32 spnd:1; /* suspend control */ @@ -182,7 +248,7 @@ struct cmd_orb { __u32 zero:6; /* reserved zeros */ __u32 orbx:1; /* ORB extension control */ __u32 cpa; /* channel program address */ -} __attribute__ ((packed, aligned(4))); +} __attribute__ ((packed, aligned(4))) CmdOrb; struct ciw { __u8 type; @@ -190,10 +256,15 @@ struct ciw { __u16 count; }; +#define CU_TYPE_UNKNOWN 0x0000 +#define CU_TYPE_DASD_2107 0x2107 +#define CU_TYPE_VIRTIO 0x3832 +#define CU_TYPE_DASD_3990 0x3990 + /* * sense-id response buffer layout */ -struct senseid { +typedef struct senseid { /* common part */ __u8 reserved; /* always 0x'FF' */ __u16 cu_type; /* control unit type */ @@ -203,15 +274,94 @@ struct senseid { __u8 unused; /* padding byte */ /* extended part */ struct ciw ciw[62]; -} __attribute__ ((packed, aligned(4))); +} __attribute__ ((packed, aligned(4))) SenseId; + +/* + * architected values for first sense byte - common_status. Bits 0-5 of this + * field are common to all device types. + */ +#define SNS_STAT0_CMD_REJECT 0x80 +#define SNS_STAT0_INTERVENTION_REQ 0x40 +#define SNS_STAT0_BUS_OUT_CHECK 0x20 +#define SNS_STAT0_EQUIPMENT_CHECK 0x10 +#define SNS_STAT0_DATA_CHECK 0x08 +#define SNS_STAT0_OVERRUN 0x04 +#define SNS_STAT0_INCOMPL_DOMAIN 0x01 + +/* ECKD DASD status[0] byte */ +#define SNS_STAT1_PERM_ERR 0x80 +#define SNS_STAT1_INV_TRACK_FORMAT 0x40 +#define SNS_STAT1_EOC 0x20 +#define SNS_STAT1_MESSAGE_TO_OPER 0x10 +#define SNS_STAT1_NO_REC_FOUND 0x08 +#define SNS_STAT1_FILE_PROTECTED 0x04 +#define SNS_STAT1_WRITE_INHIBITED 0x02 +#define SNS_STAT1_IMPRECISE_END 0x01 + +/* ECKD DASD status[1] byte */ +#define SNS_STAT2_REQ_INH_WRITE 0x80 +#define SNS_STAT2_CORRECTABLE 0x40 +#define SNS_STAT2_FIRST_LOG_ERR 0x20 +#define SNS_STAT2_ENV_DATA_PRESENT 0x10 +#define SNS_STAT2_IMPRECISE_END 0x04 + +/* ECKD DASD 24-byte Sense fmt_msg codes */ +#define SENSE24_FMT_PROG_SYS 0x0 +#define SENSE24_FMT_EQUIPMENT 0x2 +#define SENSE24_FMT_CONTROLLER 0x3 +#define SENSE24_FMT_MISC 0xF + +/* basic sense response buffer layout */ +typedef struct SenseDataEckdDasd { + uint8_t common_status; + uint8_t status[2]; + uint8_t res_count; + uint8_t phys_drive_id; + uint8_t low_cyl_addr; + uint8_t head_high_cyl_addr; + uint8_t fmt_msg; + uint64_t fmt_dependent_info[2]; + uint8_t reserved; + uint8_t program_action_code; + uint16_t config_info; + uint8_t mcode_hicyl; + uint8_t cyl_head_addr[3]; +} __attribute__ ((packed, aligned(4))) SenseDataEckdDasd; + +#define ECKD_SENSE24_GET_FMT(sd) (sd->fmt_msg & 0xF0 >> 4) +#define ECKD_SENSE24_GET_MSG(sd) (sd->fmt_msg & 0x0F) + +#define unit_check(irb) ((irb)->scsw.dstat & SCSW_DSTAT_UCHK) +#define iface_ctrl_check(irb) ((irb)->scsw.cstat & SCSW_CSTAT_ICCHK) /* interruption response block */ -struct irb { +typedef struct irb { struct scsw scsw; __u32 esw[5]; __u32 ecw[8]; __u32 emw[8]; -} __attribute__ ((packed, aligned(4))); +} __attribute__ ((packed, aligned(4))) Irb; + +/* Used for SEEK ccw commands */ +typedef struct CcwSeekData { + uint16_t reserved; + uint16_t cyl; + uint16_t head; +} __attribute__((packed)) CcwSeekData; + +/* Used for SEARCH ID ccw commands */ +typedef struct CcwSearchIdData { + uint16_t cyl; + uint16_t head; + uint8_t record; +} __attribute__((packed)) CcwSearchIdData; + +int enable_mss_facility(void); +void enable_subchannel(SubChannelId schid); +uint16_t cu_type(SubChannelId schid); +int basic_sense(SubChannelId schid, uint16_t cutype, void *sense_data, + uint16_t data_size); +int do_cio(SubChannelId schid, uint16_t cutype, uint32_t ccw_addr, int fmt); /* * Some S390 specific IO instructions as inline diff --git a/pc-bios/s390-ccw/dasd-ipl.c b/pc-bios/s390-ccw/dasd-ipl.c new file mode 100644 index 0000000000..0fc879bb8e --- /dev/null +++ b/pc-bios/s390-ccw/dasd-ipl.c @@ -0,0 +1,235 @@ +/* + * S390 IPL (boot) from a real DASD device via vfio framework. + * + * Copyright (c) 2019 Jason J. Herne <jjherne@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#include "libc.h" +#include "s390-ccw.h" +#include "s390-arch.h" +#include "dasd-ipl.h" +#include "helper.h" + +static char prefix_page[PAGE_SIZE * 2] + __attribute__((__aligned__(PAGE_SIZE * 2))); + +static void enable_prefixing(void) +{ + memcpy(&prefix_page, lowcore, 4096); + set_prefix(ptr2u32(&prefix_page)); +} + +static void disable_prefixing(void) +{ + set_prefix(0); + /* Copy io interrupt info back to low core */ + memcpy((void *)&lowcore->subchannel_id, prefix_page + 0xB8, 12); +} + +static bool is_read_tic_ccw_chain(Ccw0 *ccw) +{ + Ccw0 *next_ccw = ccw + 1; + + return ((ccw->cmd_code == CCW_CMD_DASD_READ || + ccw->cmd_code == CCW_CMD_DASD_READ_MT) && + ccw->chain && next_ccw->cmd_code == CCW_CMD_TIC); +} + +static bool dynamic_cp_fixup(uint32_t ccw_addr, uint32_t *next_cpa) +{ + Ccw0 *cur_ccw = (Ccw0 *)(uint64_t)ccw_addr; + Ccw0 *tic_ccw; + + while (true) { + /* Skip over inline TIC (it might not have the chain bit on) */ + if (cur_ccw->cmd_code == CCW_CMD_TIC && + cur_ccw->cda == ptr2u32(cur_ccw) - 8) { + cur_ccw += 1; + continue; + } + + if (!cur_ccw->chain) { + break; + } + if (is_read_tic_ccw_chain(cur_ccw)) { + /* + * Breaking a chain of CCWs may alter the semantics or even the + * validity of a channel program. The heuristic implemented below + * seems to work well in practice for the channel programs + * generated by zipl. + */ + tic_ccw = cur_ccw + 1; + *next_cpa = tic_ccw->cda; + cur_ccw->chain = 0; + return true; + } + cur_ccw += 1; + } + return false; +} + +static int run_dynamic_ccw_program(SubChannelId schid, uint16_t cutype, + uint32_t cpa) +{ + bool has_next; + uint32_t next_cpa = 0; + int rc; + + do { + has_next = dynamic_cp_fixup(cpa, &next_cpa); + + print_int("executing ccw chain at ", cpa); + enable_prefixing(); + rc = do_cio(schid, cutype, cpa, CCW_FMT0); + disable_prefixing(); + + if (rc) { + break; + } + cpa = next_cpa; + } while (has_next); + + return rc; +} + +static void make_readipl(void) +{ + Ccw0 *ccwIplRead = (Ccw0 *)0x00; + + /* Create Read IPL ccw at address 0 */ + ccwIplRead->cmd_code = CCW_CMD_READ_IPL; + ccwIplRead->cda = 0x00; /* Read into address 0x00 in main memory */ + ccwIplRead->chain = 0; /* Chain flag */ + ccwIplRead->count = 0x18; /* Read 0x18 bytes of data */ +} + +static void run_readipl(SubChannelId schid, uint16_t cutype) +{ + if (do_cio(schid, cutype, 0x00, CCW_FMT0)) { + panic("dasd-ipl: Failed to run Read IPL channel program\n"); + } +} + +/* + * The architecture states that IPL1 data should consist of a psw followed by + * format-0 READ and TIC CCWs. Let's sanity check. + */ +static void check_ipl1(void) +{ + Ccw0 *ccwread = (Ccw0 *)0x08; + Ccw0 *ccwtic = (Ccw0 *)0x10; + + if (ccwread->cmd_code != CCW_CMD_DASD_READ || + ccwtic->cmd_code != CCW_CMD_TIC) { + panic("dasd-ipl: IPL1 data invalid. Is this disk really bootable?\n"); + } +} + +static void check_ipl2(uint32_t ipl2_addr) +{ + Ccw0 *ccw = u32toptr(ipl2_addr); + + if (ipl2_addr == 0x00) { + panic("IPL2 address invalid. Is this disk really bootable?\n"); + } + if (ccw->cmd_code == 0x00) { + panic("IPL2 ccw data invalid. Is this disk really bootable?\n"); + } +} + +static uint32_t read_ipl2_addr(void) +{ + Ccw0 *ccwtic = (Ccw0 *)0x10; + + return ccwtic->cda; +} + +static void ipl1_fixup(void) +{ + Ccw0 *ccwSeek = (Ccw0 *) 0x08; + Ccw0 *ccwSearchID = (Ccw0 *) 0x10; + Ccw0 *ccwSearchTic = (Ccw0 *) 0x18; + Ccw0 *ccwRead = (Ccw0 *) 0x20; + CcwSeekData *seekData = (CcwSeekData *) 0x30; + CcwSearchIdData *searchData = (CcwSearchIdData *) 0x38; + + /* move IPL1 CCWs to make room for CCWs needed to locate record 2 */ + memcpy(ccwRead, (void *)0x08, 16); + + /* Disable chaining so we don't TIC to IPL2 channel program */ + ccwRead->chain = 0x00; + + ccwSeek->cmd_code = CCW_CMD_DASD_SEEK; + ccwSeek->cda = ptr2u32(seekData); + ccwSeek->chain = 1; + ccwSeek->count = sizeof(*seekData); + seekData->reserved = 0x00; + seekData->cyl = 0x00; + seekData->head = 0x00; + + ccwSearchID->cmd_code = CCW_CMD_DASD_SEARCH_ID_EQ; + ccwSearchID->cda = ptr2u32(searchData); + ccwSearchID->chain = 1; + ccwSearchID->count = sizeof(*searchData); + searchData->cyl = 0; + searchData->head = 0; + searchData->record = 2; + + /* Go back to Search CCW if correct record not yet found */ + ccwSearchTic->cmd_code = CCW_CMD_TIC; + ccwSearchTic->cda = ptr2u32(ccwSearchID); +} + +static void run_ipl1(SubChannelId schid, uint16_t cutype) + { + uint32_t startAddr = 0x08; + + if (do_cio(schid, cutype, startAddr, CCW_FMT0)) { + panic("dasd-ipl: Failed to run IPL1 channel program\n"); + } +} + +static void run_ipl2(SubChannelId schid, uint16_t cutype, uint32_t addr) +{ + if (run_dynamic_ccw_program(schid, cutype, addr)) { + panic("dasd-ipl: Failed to run IPL2 channel program\n"); + } +} + +/* + * Limitations in vfio-ccw support complicate the IPL process. Details can + * be found in docs/devel/s390-dasd-ipl.txt + */ +void dasd_ipl(SubChannelId schid, uint16_t cutype) +{ + PSWLegacy *pswl = (PSWLegacy *) 0x00; + uint32_t ipl2_addr; + + /* Construct Read IPL CCW and run it to read IPL1 from boot disk */ + make_readipl(); + run_readipl(schid, cutype); + ipl2_addr = read_ipl2_addr(); + check_ipl1(); + + /* + * Fixup IPL1 channel program to account for vfio-ccw limitations, then run + * it to read IPL2 channel program from boot disk. + */ + ipl1_fixup(); + run_ipl1(schid, cutype); + check_ipl2(ipl2_addr); + + /* + * Run IPL2 channel program to read operating system code from boot disk + */ + run_ipl2(schid, cutype, ipl2_addr); + + /* Transfer control to the guest operating system */ + pswl->mask |= PSW_MASK_EAMODE; /* Force z-mode */ + pswl->addr |= PSW_MASK_BAMODE; /* ... */ + jump_to_low_kernel(); +} diff --git a/pc-bios/s390-ccw/dasd-ipl.h b/pc-bios/s390-ccw/dasd-ipl.h new file mode 100644 index 0000000000..c394828906 --- /dev/null +++ b/pc-bios/s390-ccw/dasd-ipl.h @@ -0,0 +1,16 @@ +/* + * S390 IPL (boot) from a real DASD device via vfio framework. + * + * Copyright (c) 2019 Jason J. Herne <jjherne@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#ifndef DASD_IPL_H +#define DASD_IPL_H + +void dasd_ipl(SubChannelId schid, uint16_t cutype); + +#endif /* DASD_IPL_H */ diff --git a/pc-bios/s390-ccw/helper.h b/pc-bios/s390-ccw/helper.h new file mode 100644 index 0000000000..78d5bc7442 --- /dev/null +++ b/pc-bios/s390-ccw/helper.h @@ -0,0 +1,31 @@ +/* + * Helper Functions + * + * Copyright (c) 2019 IBM Corp. + * + * Author(s): Jason J. Herne <jjherne@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#ifndef S390_CCW_HELPER_H +#define S390_CCW_HELPER_H + +#include "s390-ccw.h" + +/* Avoids compiler warnings when casting a pointer to a u32 */ +static inline uint32_t ptr2u32(void *ptr) +{ + IPL_assert((uint64_t)ptr <= 0xffffffff, "ptr2u32: ptr too large"); + return (uint32_t)(uint64_t)ptr; +} + +/* Avoids compiler warnings when casting a u32 to a pointer */ +static inline void *u32toptr(uint32_t n) +{ + return (void *)(uint64_t)n; +} + +#endif diff --git a/pc-bios/s390-ccw/libc.h b/pc-bios/s390-ccw/libc.h index 818517ff5d..bcdc45732d 100644 --- a/pc-bios/s390-ccw/libc.h +++ b/pc-bios/s390-ccw/libc.h @@ -67,6 +67,17 @@ static inline size_t strlen(const char *str) return i; } +static inline char *strcat(char *dest, const char *src) +{ + int i; + char *dest_end = dest + strlen(dest); + + for (i = 0; i <= strlen(src); i++) { + dest_end[i] = src[i]; + } + return dest; +} + static inline int isdigit(int c) { return (c >= '0') && (c <= '9'); diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c index 544851d672..a69c73349e 100644 --- a/pc-bios/s390-ccw/main.c +++ b/pc-bios/s390-ccw/main.c @@ -9,21 +9,27 @@ */ #include "libc.h" +#include "s390-arch.h" #include "s390-ccw.h" +#include "cio.h" #include "virtio.h" +#include "dasd-ipl.h" char stack[PAGE_SIZE * 8] __attribute__((__aligned__(PAGE_SIZE))); static SubChannelId blk_schid = { .one = 1 }; -IplParameterBlock iplb __attribute__((__aligned__(PAGE_SIZE))); static char loadparm_str[LOADPARM_LEN + 1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; QemuIplParameters qipl; +IplParameterBlock iplb __attribute__((__aligned__(PAGE_SIZE))); +static bool have_iplb; +static uint16_t cutype; +LowCore const *lowcore; /* Yes, this *is* a pointer to address 0 */ #define LOADPARM_PROMPT "PROMPT " #define LOADPARM_EMPTY " " #define BOOT_MENU_FLAG_MASK (QIPL_FLAG_BM_OPTS_CMD | QIPL_FLAG_BM_OPTS_ZIPL) /* - * Priniciples of Operations (SA22-7832-09) chapter 17 requires that + * Principles of Operations (SA22-7832-09) chapter 17 requires that * a subsystem-identification is at 184-187 and bytes 188-191 are zero * after list-directed-IPL and ccw-IPL. */ @@ -48,29 +54,64 @@ unsigned int get_loadparm_index(void) return atoui(loadparm_str); } -static bool find_dev(Schib *schib, int dev_no) +/* + * Find the subchannel connected to the given device (dev_no) and fill in the + * subchannel information block (schib) with the connected subchannel's info. + * NOTE: The global variable blk_schid is updated to contain the subchannel + * information. + * + * If the caller gives dev_no=-1 then the user did not specify a boot device. + * In this case we'll just use the first potentially bootable device we find. + */ +static bool find_subch(int dev_no) { + Schib schib; int i, r; + bool is_virtio; for (i = 0; i < 0x10000; i++) { blk_schid.sch_no = i; - r = stsch_err(blk_schid, schib); + r = stsch_err(blk_schid, &schib); if ((r == 3) || (r == -EIO)) { break; } - if (!schib->pmcw.dnv) { - continue; - } - if (!virtio_is_supported(blk_schid)) { + if (!schib.pmcw.dnv) { continue; } - /* Skip net devices since no IPLB is created and therefore no - * no network bootloader has been loaded + + enable_subchannel(blk_schid); + cutype = cu_type(blk_schid); + + /* + * Note: we always have to run virtio_is_supported() here to make + * sure that the vdev.senseid data gets pre-initialized correctly */ - if (virtio_get_device_type() == VIRTIO_ID_NET && dev_no < 0) { - continue; + is_virtio = virtio_is_supported(blk_schid); + + /* No specific devno given, just return 1st possibly bootable device */ + if (dev_no < 0) { + switch (cutype) { + case CU_TYPE_VIRTIO: + if (is_virtio) { + /* + * Skip net devices since no IPLB is created and therefore + * no network bootloader has been loaded + */ + if (virtio_get_device_type() != VIRTIO_ID_NET) { + return true; + } + } + continue; + case CU_TYPE_DASD_3990: + case CU_TYPE_DASD_2107: + return true; + default: + continue; + } } - if ((dev_no < 0) || (schib->pmcw.dev == dev_no)) { + + /* Caller asked for a specific devno */ + if (schib.pmcw.dev == dev_no) { return true; } } @@ -99,68 +140,88 @@ static void menu_setup(void) } } -static void virtio_setup(void) +/* + * Initialize the channel I/O subsystem so we can talk to our ipl/boot device. + */ +static void css_setup(void) { - Schib schib; - int ssid; - bool found = false; - uint16_t dev_no; - char ldp[] = "LOADPARM=[________]\n"; - VDev *vdev = virtio_get_device(); - QemuIplParameters *early_qipl = (QemuIplParameters *)QIPL_ADDRESS; - /* - * We unconditionally enable mss support. In every sane configuration, - * this will succeed; and even if it doesn't, stsch_err() can deal - * with the consequences. + * Unconditionally enable mss support. In every sane configuration this + * will succeed; and even if it doesn't, stsch_err() can handle it. */ enable_mss_facility(); +} + +/* + * Collect various pieces of information from the hypervisor/hardware that + * we'll use to determine exactly how we'll boot. + */ +static void boot_setup(void) +{ + char lpmsg[] = "LOADPARM=[________]\n"; sclp_get_loadparm_ascii(loadparm_str); - memcpy(ldp + 10, loadparm_str, LOADPARM_LEN); - sclp_print(ldp); + memcpy(lpmsg + 10, loadparm_str, 8); + sclp_print(lpmsg); - memcpy(&qipl, early_qipl, sizeof(QemuIplParameters)); + have_iplb = store_iplb(&iplb); +} - if (store_iplb(&iplb)) { - switch (iplb.pbt) { - case S390_IPL_TYPE_CCW: - dev_no = iplb.ccw.devno; - debug_print_int("device no. ", dev_no); - blk_schid.ssid = iplb.ccw.ssid & 0x3; - debug_print_int("ssid ", blk_schid.ssid); - found = find_dev(&schib, dev_no); - break; - case S390_IPL_TYPE_QEMU_SCSI: - vdev->scsi_device_selected = true; - vdev->selected_scsi_device.channel = iplb.scsi.channel; - vdev->selected_scsi_device.target = iplb.scsi.target; - vdev->selected_scsi_device.lun = iplb.scsi.lun; - blk_schid.ssid = iplb.scsi.ssid & 0x3; - found = find_dev(&schib, iplb.scsi.devno); - break; - default: - panic("List-directed IPL not supported yet!\n"); - } - menu_setup(); - } else { +static void find_boot_device(void) +{ + VDev *vdev = virtio_get_device(); + int ssid; + bool found; + + if (!have_iplb) { for (ssid = 0; ssid < 0x3; ssid++) { blk_schid.ssid = ssid; - found = find_dev(&schib, -1); + found = find_subch(-1); if (found) { - break; + return; } } + panic("Could not find a suitable boot device (none specified)\n"); + } + + switch (iplb.pbt) { + case S390_IPL_TYPE_CCW: + debug_print_int("device no. ", iplb.ccw.devno); + blk_schid.ssid = iplb.ccw.ssid & 0x3; + debug_print_int("ssid ", blk_schid.ssid); + found = find_subch(iplb.ccw.devno); + break; + case S390_IPL_TYPE_QEMU_SCSI: + vdev->scsi_device_selected = true; + vdev->selected_scsi_device.channel = iplb.scsi.channel; + vdev->selected_scsi_device.target = iplb.scsi.target; + vdev->selected_scsi_device.lun = iplb.scsi.lun; + blk_schid.ssid = iplb.scsi.ssid & 0x3; + found = find_subch(iplb.scsi.devno); + break; + default: + panic("List-directed IPL not supported yet!\n"); } - IPL_assert(found, "No virtio device found"); + IPL_assert(found, "Boot device not found\n"); +} + +static void virtio_setup(void) +{ + VDev *vdev = virtio_get_device(); + QemuIplParameters *early_qipl = (QemuIplParameters *)QIPL_ADDRESS; + + memcpy(&qipl, early_qipl, sizeof(QemuIplParameters)); + + if (have_iplb) { + menu_setup(); + } if (virtio_get_device_type() == VIRTIO_ID_NET) { sclp_print("Network boot device detected\n"); vdev->netboot_start_addr = qipl.netboot_start_addr; } else { virtio_blk_setup_device(blk_schid); - IPL_assert(virtio_ipl_disk_is_valid(), "No valid IPL device detected"); } } @@ -168,9 +229,24 @@ static void virtio_setup(void) int main(void) { sclp_setup(); - virtio_setup(); - - zipl_load(); /* no return */ + css_setup(); + boot_setup(); + find_boot_device(); + enable_subchannel(blk_schid); + + switch (cutype) { + case CU_TYPE_DASD_3990: + case CU_TYPE_DASD_2107: + dasd_ipl(blk_schid, cutype); /* no return */ + break; + case CU_TYPE_VIRTIO: + virtio_setup(); + zipl_load(); /* no return */ + break; + default: + print_int("Attempting to boot from unexpected device type", cutype); + panic(""); + } panic("Failed to load OS from hard disk\n"); return 0; /* make compiler happy */ diff --git a/pc-bios/s390-ccw/netboot.mak b/pc-bios/s390-ccw/netboot.mak index 14e96b2aa6..5eefb7c289 100644 --- a/pc-bios/s390-ccw/netboot.mak +++ b/pc-bios/s390-ccw/netboot.mak @@ -1,7 +1,7 @@ SLOF_DIR := $(SRC_PATH)/roms/SLOF -NETOBJS := start.o sclp.o virtio.o virtio-net.o jump2ipl.o netmain.o \ +NETOBJS := start.o sclp.o cio.o virtio.o virtio-net.o jump2ipl.o netmain.o \ libnet.a libc.a LIBC_INC := -nostdinc -I$(SLOF_DIR)/lib/libc/include diff --git a/pc-bios/s390-ccw/netmain.c b/pc-bios/s390-ccw/netmain.c index 0392131c27..f3542cb2cf 100644 --- a/pc-bios/s390-ccw/netmain.c +++ b/pc-bios/s390-ccw/netmain.c @@ -33,6 +33,7 @@ #include <pxelinux.h> #include "s390-ccw.h" +#include "cio.h" #include "virtio.h" #define DEFAULT_BOOT_RETRIES 10 @@ -475,6 +476,7 @@ static bool find_net_dev(Schib *schib, int dev_no) if (!schib->pmcw.dnv) { continue; } + enable_subchannel(net_schid); if (!virtio_is_supported(net_schid)) { continue; } diff --git a/pc-bios/s390-ccw/s390-arch.h b/pc-bios/s390-ccw/s390-arch.h new file mode 100644 index 0000000000..504fc7c2f0 --- /dev/null +++ b/pc-bios/s390-ccw/s390-arch.h @@ -0,0 +1,103 @@ +/* + * S390 Basic Architecture + * + * Copyright (c) 2019 Jason J. Herne <jjherne@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at + * your option) any later version. See the COPYING file in the top-level + * directory. + */ + +#ifndef S390_ARCH_H +#define S390_ARCH_H + +typedef struct PSW { + uint64_t mask; + uint64_t addr; +} __attribute__ ((aligned(8))) PSW; +_Static_assert(sizeof(struct PSW) == 16, "PSW size incorrect"); + +/* Older PSW format used by LPSW instruction */ +typedef struct PSWLegacy { + uint32_t mask; + uint32_t addr; +} __attribute__ ((aligned(8))) PSWLegacy; +_Static_assert(sizeof(struct PSWLegacy) == 8, "PSWLegacy size incorrect"); + +/* s390 psw bit masks */ +#define PSW_MASK_IOINT 0x0200000000000000ULL +#define PSW_MASK_WAIT 0x0002000000000000ULL +#define PSW_MASK_EAMODE 0x0000000100000000ULL +#define PSW_MASK_BAMODE 0x0000000080000000ULL +#define PSW_MASK_ZMODE (PSW_MASK_EAMODE | PSW_MASK_BAMODE) + +/* Low core mapping */ +typedef struct LowCore { + /* prefix area: defined by architecture */ + PSWLegacy ipl_psw; /* 0x000 */ + uint32_t ccw1[2]; /* 0x008 */ + uint32_t ccw2[2]; /* 0x010 */ + uint8_t pad1[0x80 - 0x18]; /* 0x018 */ + uint32_t ext_params; /* 0x080 */ + uint16_t cpu_addr; /* 0x084 */ + uint16_t ext_int_code; /* 0x086 */ + uint16_t svc_ilen; /* 0x088 */ + uint16_t svc_code; /* 0x08a */ + uint16_t pgm_ilen; /* 0x08c */ + uint16_t pgm_code; /* 0x08e */ + uint32_t data_exc_code; /* 0x090 */ + uint16_t mon_class_num; /* 0x094 */ + uint16_t per_perc_atmid; /* 0x096 */ + uint64_t per_address; /* 0x098 */ + uint8_t exc_access_id; /* 0x0a0 */ + uint8_t per_access_id; /* 0x0a1 */ + uint8_t op_access_id; /* 0x0a2 */ + uint8_t ar_access_id; /* 0x0a3 */ + uint8_t pad2[0xA8 - 0xA4]; /* 0x0a4 */ + uint64_t trans_exc_code; /* 0x0a8 */ + uint64_t monitor_code; /* 0x0b0 */ + uint16_t subchannel_id; /* 0x0b8 */ + uint16_t subchannel_nr; /* 0x0ba */ + uint32_t io_int_parm; /* 0x0bc */ + uint32_t io_int_word; /* 0x0c0 */ + uint8_t pad3[0xc8 - 0xc4]; /* 0x0c4 */ + uint32_t stfl_fac_list; /* 0x0c8 */ + uint8_t pad4[0xe8 - 0xcc]; /* 0x0cc */ + uint64_t mcic; /* 0x0e8 */ + uint8_t pad5[0xf4 - 0xf0]; /* 0x0f0 */ + uint32_t external_damage_code; /* 0x0f4 */ + uint64_t failing_storage_address; /* 0x0f8 */ + uint8_t pad6[0x110 - 0x100]; /* 0x100 */ + uint64_t per_breaking_event_addr; /* 0x110 */ + uint8_t pad7[0x120 - 0x118]; /* 0x118 */ + PSW restart_old_psw; /* 0x120 */ + PSW external_old_psw; /* 0x130 */ + PSW svc_old_psw; /* 0x140 */ + PSW program_old_psw; /* 0x150 */ + PSW mcck_old_psw; /* 0x160 */ + PSW io_old_psw; /* 0x170 */ + uint8_t pad8[0x1a0 - 0x180]; /* 0x180 */ + PSW restart_new_psw; /* 0x1a0 */ + PSW external_new_psw; /* 0x1b0 */ + PSW svc_new_psw; /* 0x1c0 */ + PSW program_new_psw; /* 0x1d0 */ + PSW mcck_new_psw; /* 0x1e0 */ + PSW io_new_psw; /* 0x1f0 */ +} __attribute__((packed, aligned(8192))) LowCore; + +extern LowCore const *lowcore; + +static inline void set_prefix(uint32_t address) +{ + asm volatile("spx %0" : : "m" (address) : "memory"); +} + +static inline uint32_t store_prefix(void) +{ + uint32_t address; + + asm volatile("stpx %0" : "=m" (address)); + return address; +} + +#endif diff --git a/pc-bios/s390-ccw/s390-ccw.h b/pc-bios/s390-ccw/s390-ccw.h index 9828aa233d..11bce7d73c 100644 --- a/pc-bios/s390-ccw/s390-ccw.h +++ b/pc-bios/s390-ccw/s390-ccw.h @@ -49,17 +49,10 @@ typedef unsigned long long __u64; #include "cio.h" #include "iplb.h" -typedef struct irb Irb; -typedef struct ccw1 Ccw1; -typedef struct cmd_orb CmdOrb; -typedef struct schib Schib; -typedef struct chsc_area_sda ChscAreaSda; -typedef struct senseid SenseId; -typedef struct subchannel_id SubChannelId; - /* start.s */ void disabled_wait(void); void consume_sclp_int(void); +void consume_io_int(void); /* main.c */ void panic(const char *string); @@ -80,7 +73,6 @@ unsigned long virtio_load_direct(ulong rec_list1, ulong rec_list2, bool virtio_is_supported(SubChannelId schid); void virtio_blk_setup_device(SubChannelId schid); int virtio_read(ulong sector, void *load_addr); -int enable_mss_facility(void); u64 get_clock(void); ulong get_second(void); diff --git a/pc-bios/s390-ccw/start.S b/pc-bios/s390-ccw/start.S index 5c22cb0849..aa8fceb19d 100644 --- a/pc-bios/s390-ccw/start.S +++ b/pc-bios/s390-ccw/start.S @@ -71,6 +71,26 @@ consume_sclp_int: larl %r1, enabled_wait_psw lpswe 0(%r1) +/* + * void consume_io_int(void) + * + * eats one I/O interrupt + */ + .globl consume_io_int +consume_io_int: + /* enable I/O interrupts in cr6 */ + stctg %c6,%c6,0(%r15) + oi 4(%r15), 0xff + lctlg %c6,%c6,0(%r15) + /* prepare i/o call handler */ + larl %r1, io_new_code + stg %r1, 0x1f8 + larl %r1, io_new_mask + mvc 0x1f0(8),0(%r1) + /* load enabled wait PSW */ + larl %r1, enabled_wait_psw + lpswe 0(%r1) + external_new_code: /* disable service interrupts in cr0 */ stctg %c0,%c0,0(%r15) @@ -78,6 +98,13 @@ external_new_code: lctlg %c0,%c0,0(%r15) br %r14 +io_new_code: + /* disable I/O interrupts in cr6 */ + stctg %c6,%c6,0(%r15) + ni 4(%r15), 0x00 + lctlg %c6,%c6,0(%r15) + br %r14 + .align 8 disabled_wait_psw: .quad 0x0002000180000000,0x0000000000000000 @@ -85,3 +112,5 @@ enabled_wait_psw: .quad 0x0302000180000000,0x0000000000000000 external_new_mask: .quad 0x0000000180000000 +io_new_mask: + .quad 0x0000000180000000 diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c index cdb66f459e..fb40ca9828 100644 --- a/pc-bios/s390-ccw/virtio.c +++ b/pc-bios/s390-ccw/virtio.c @@ -10,9 +10,11 @@ #include "libc.h" #include "s390-ccw.h" +#include "cio.h" #include "virtio.h" #include "virtio-scsi.h" #include "bswap.h" +#include "helper.h" #define VRING_WAIT_REPLY_TIMEOUT 30 @@ -20,8 +22,6 @@ static VRing block[VIRTIO_MAX_VQS]; static char ring_area[VIRTIO_RING_SIZE * VIRTIO_MAX_VQS] __attribute__((__aligned__(PAGE_SIZE))); -static char chsc_page[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); - static VDev vdev = { .nr_vqs = 1, .vrings = block, @@ -90,38 +90,19 @@ int drain_irqs(SubChannelId schid) } } -static int run_ccw(VDev *vdev, int cmd, void *ptr, int len) +static int run_ccw(VDev *vdev, int cmd, void *ptr, int len, bool sli) { Ccw1 ccw = {}; - CmdOrb orb = {}; - Schib schib; - int r; - - /* start command processing */ - stsch_err(vdev->schid, &schib); - /* enable the subchannel for IPL device */ - schib.pmcw.ena = 1; - msch(vdev->schid, &schib); - - /* start subchannel command */ - orb.fmt = 1; - orb.cpa = (u32)(long)&ccw; - orb.lpm = 0x80; ccw.cmd_code = cmd; ccw.cda = (long)ptr; ccw.count = len; - r = ssch(vdev->schid, &orb); - /* - * XXX Wait until device is done processing the CCW. For now we can - * assume that a simple tsch will have finished the CCW processing, - * but the architecture allows for asynchronous operation - */ - if (!r) { - r = drain_irqs(vdev->schid); + if (sli) { + ccw.flags |= CCW_FLAG_SLI; } - return r; + + return do_cio(vdev->schid, vdev->senseid.cu_type, ptr2u32(&ccw), CCW_FMT1); } static void vring_init(VRing *vr, VqInfo *info) @@ -263,7 +244,7 @@ void virtio_setup_ccw(VDev *vdev) vdev->config.blk.blk_size = 0; /* mark "illegal" - setup started... */ vdev->guessed_disk_nature = VIRTIO_GDN_NONE; - run_ccw(vdev, CCW_CMD_VDEV_RESET, NULL, 0); + run_ccw(vdev, CCW_CMD_VDEV_RESET, NULL, 0, false); switch (vdev->senseid.cu_model) { case VIRTIO_ID_NET: @@ -284,18 +265,19 @@ void virtio_setup_ccw(VDev *vdev) default: panic("Unsupported virtio device\n"); } - IPL_assert(run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size) == 0, - "Could not get block device configuration"); + IPL_assert( + run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size, false) == 0, + "Could not get block device configuration"); /* Feature negotiation */ for (i = 0; i < ARRAY_SIZE(vdev->guest_features); i++) { feats.features = 0; feats.index = i; - rc = run_ccw(vdev, CCW_CMD_READ_FEAT, &feats, sizeof(feats)); + rc = run_ccw(vdev, CCW_CMD_READ_FEAT, &feats, sizeof(feats), false); IPL_assert(rc == 0, "Could not get features bits"); vdev->guest_features[i] &= bswap32(feats.features); feats.features = bswap32(vdev->guest_features[i]); - rc = run_ccw(vdev, CCW_CMD_WRITE_FEAT, &feats, sizeof(feats)); + rc = run_ccw(vdev, CCW_CMD_WRITE_FEAT, &feats, sizeof(feats), false); IPL_assert(rc == 0, "Could not set features bits"); } @@ -312,16 +294,17 @@ void virtio_setup_ccw(VDev *vdev) }; IPL_assert( - run_ccw(vdev, CCW_CMD_READ_VQ_CONF, &config, sizeof(config)) == 0, + run_ccw(vdev, CCW_CMD_READ_VQ_CONF, &config, sizeof(config), false) == 0, "Could not get block device VQ configuration"); info.num = config.num; vring_init(&vdev->vrings[i], &info); vdev->vrings[i].schid = vdev->schid; - IPL_assert(run_ccw(vdev, CCW_CMD_SET_VQ, &info, sizeof(info)) == 0, - "Cannot set VQ info"); + IPL_assert( + run_ccw(vdev, CCW_CMD_SET_VQ, &info, sizeof(info), false) == 0, + "Cannot set VQ info"); } IPL_assert( - run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status)) == 0, + run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false) == 0, "Could not write status to host"); } @@ -329,8 +312,15 @@ bool virtio_is_supported(SubChannelId schid) { vdev.schid = schid; memset(&vdev.senseid, 0, sizeof(vdev.senseid)); - /* run sense id command */ - if (run_ccw(&vdev, CCW_CMD_SENSE_ID, &vdev.senseid, sizeof(vdev.senseid))) { + + /* + * Run sense id command. + * The size of the senseid data differs between devices (notably, + * between virtio devices and dasds), so specify the largest possible + * size and suppress the incorrect length indication for smaller sizes. + */ + if (run_ccw(&vdev, CCW_CMD_SENSE_ID, &vdev.senseid, sizeof(vdev.senseid), + true)) { return false; } if (vdev.senseid.cu_type == 0x3832) { @@ -343,20 +333,3 @@ bool virtio_is_supported(SubChannelId schid) } return false; } - -int enable_mss_facility(void) -{ - int ret; - ChscAreaSda *sda_area = (ChscAreaSda *) chsc_page; - - memset(sda_area, 0, PAGE_SIZE); - sda_area->request.length = 0x0400; - sda_area->request.code = 0x0031; - sda_area->operation_code = 0x2; - - ret = chsc(sda_area); - if ((ret == 0) && (sda_area->response.code == 0x0001)) { - return 0; - } - return -EIO; -} diff --git a/pc-bios/s390-netboot.img b/pc-bios/s390-netboot.img index 2c6886efb8..aa90fbccb1 100644 --- a/pc-bios/s390-netboot.img +++ b/pc-bios/s390-netboot.img Binary files differdiff --git a/qemu-options.hx b/qemu-options.hx index 08749a3391..bdc74c0620 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -4233,6 +4233,11 @@ using the SNIA NVM programming model (e.g. Intel NVDIMM). If @option{pmem} is set to 'on', QEMU will take necessary operations to guarantee the persistence of its own writes to @option{mem-path} (e.g. in vNVDIMM label emulation and live migration). +Also, we will map the backend-file with MAP_SYNC flag, which ensures the +file metadata is in sync for @option{mem-path} in case of host crash +or a power failure. MAP_SYNC requires support from both the host kernel +(since Linux kernel 4.15) and the filesystem of @option{mem-path} mounted +with DAX option. @item -object memory-backend-ram,id=@var{id},merge=@var{on|off},dump=@var{on|off},share=@var{on|off},prealloc=@var{on|off},size=@var{size},host-nodes=@var{host-nodes},policy=@var{default|preferred|bind|interleave} diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index a310a9072b..c3819d2b98 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -95,7 +95,7 @@ for arch in $ARCHLIST; do rm -rf "$output/linux-headers/asm-$arch" mkdir -p "$output/linux-headers/asm-$arch" - for header in kvm.h unistd.h bitsperlong.h; do + for header in kvm.h unistd.h bitsperlong.h mman.h; do cp "$tmpdir/include/asm/$header" "$output/linux-headers/asm-$arch" done @@ -139,13 +139,13 @@ done rm -rf "$output/linux-headers/linux" mkdir -p "$output/linux-headers/linux" for header in kvm.h vfio.h vfio_ccw.h vhost.h \ - psci.h psp-sev.h userfaultfd.h; do + psci.h psp-sev.h userfaultfd.h mman.h; do cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux" done rm -rf "$output/linux-headers/asm-generic" mkdir -p "$output/linux-headers/asm-generic" -for header in unistd.h bitsperlong.h; do +for header in unistd.h bitsperlong.h mman-common.h mman.h hugetlb_encode.h; do cp "$tmpdir/include/asm-generic/$header" "$output/linux-headers/asm-generic" done diff --git a/target/i386/cpu.c b/target/i386/cpu.c index e1687f7547..722c5514d4 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -2935,6 +2935,56 @@ static X86CPUDefinition builtin_x86_defs[] = { .model_id = "AMD EPYC Processor (with IBPB)", .cache_info = &epyc_cache_info, }, + { + .name = "Dhyana", + .level = 0xd, + .vendor = CPUID_VENDOR_HYGON, + .family = 24, + .model = 0, + .stepping = 1, + .features[FEAT_1_EDX] = + CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | + CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | + CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | + CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | + CPUID_VME | CPUID_FP87, + .features[FEAT_1_ECX] = + CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | + CPUID_EXT_XSAVE | CPUID_EXT_POPCNT | + CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | + CPUID_EXT_CX16 | CPUID_EXT_FMA | CPUID_EXT_SSSE3 | + CPUID_EXT_MONITOR | CPUID_EXT_SSE3, + .features[FEAT_8000_0001_EDX] = + CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | + CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | + CPUID_EXT2_SYSCALL, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | + CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | + CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | + CPUID_EXT3_TOPOEXT, + .features[FEAT_8000_0008_EBX] = + CPUID_8000_0008_EBX_IBPB, + .features[FEAT_7_0_EBX] = + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | + CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_RDSEED | + CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT, + /* + * Missing: XSAVES (not supported by some Linux versions, + * including v4.1 to v4.12). + * KVM doesn't yet expose any XSAVES state save component. + */ + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, + .features[FEAT_SVM] = + CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE, + .xlevel = 0x8000001E, + .model_id = "Hygon Dhyana Processor", + .cache_info = &epyc_cache_info, + }, }; typedef struct PropValue { @@ -4541,6 +4591,10 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, break; case 0x8000001D: *eax = 0; + if (cpu->cache_info_passthrough) { + host_cpuid(index, count, eax, ebx, ecx, edx); + break; + } switch (count) { case 0: /* L1 dcache info */ encode_cache_cpuid8000001d(env->cache_info_amd.l1d_cache, cs, diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 828067bd1c..0128910661 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -726,6 +726,8 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_VENDOR_VIA "CentaurHauls" +#define CPUID_VENDOR_HYGON "HygonGenuine" + #define CPUID_MWAIT_IBE (1U << 1) /* Interrupts can exit capability */ #define CPUID_MWAIT_EMX (1U << 0) /* enumeration supported */ diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 9e86db0963..02e22e2017 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -2164,7 +2164,7 @@ uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift) * or equal to the (logical) backing page size of guest RAM */ kvm_get_smmu_info(&info, &error_fatal); - rampagesize = qemu_getrampagesize(); + rampagesize = qemu_minrampagesize(); best_page_shift = 0; for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) { diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c index 698dd9cb82..b58ef0a8ef 100644 --- a/target/s390x/cpu.c +++ b/target/s390x/cpu.c @@ -399,6 +399,13 @@ int s390_set_memory_limit(uint64_t new_limit, uint64_t *hw_limit) return 0; } +void s390_set_max_pagesize(uint64_t pagesize, Error **errp) +{ + if (kvm_enabled()) { + kvm_s390_set_max_pagesize(pagesize, errp); + } +} + void s390_cmma_reset(void) { if (kvm_enabled()) { diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h index d8990c405a..7305cacc7b 100644 --- a/target/s390x/cpu.h +++ b/target/s390x/cpu.h @@ -734,6 +734,7 @@ static inline void s390_do_cpu_load_normal(CPUState *cs, run_on_cpu_data arg) /* cpu.c */ void s390_crypto_reset(void); int s390_set_memory_limit(uint64_t new_limit, uint64_t *hw_limit); +void s390_set_max_pagesize(uint64_t pagesize, Error **errp); void s390_cmma_reset(void); void s390_enable_css_support(S390CPU *cpu); int s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch_id, diff --git a/target/s390x/kvm-stub.c b/target/s390x/kvm-stub.c index bf7795e47a..22b4514ca6 100644 --- a/target/s390x/kvm-stub.c +++ b/target/s390x/kvm-stub.c @@ -93,6 +93,10 @@ int kvm_s390_set_mem_limit(uint64_t new_limit, uint64_t *hw_limit) return 0; } +void kvm_s390_set_max_pagesize(uint64_t pagesize, Error **errp) +{ +} + void kvm_s390_crypto_reset(void) { } diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c index 2c6e35b5aa..7df7be4a1b 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c @@ -283,44 +283,37 @@ void kvm_s390_crypto_reset(void) } } -static int kvm_s390_configure_mempath_backing(KVMState *s) +void kvm_s390_set_max_pagesize(uint64_t pagesize, Error **errp) { - size_t path_psize = qemu_getrampagesize(); - - if (path_psize == 4 * KiB) { - return 0; + if (pagesize == 4 * KiB) { + return; } if (!hpage_1m_allowed()) { - error_report("This QEMU machine does not support huge page " - "mappings"); - return -EINVAL; + error_setg(errp, "This QEMU machine does not support huge page " + "mappings"); + return; } - if (path_psize != 1 * MiB) { - error_report("Memory backing with 2G pages was specified, " - "but KVM does not support this memory backing"); - return -EINVAL; + if (pagesize != 1 * MiB) { + error_setg(errp, "Memory backing with 2G pages was specified, " + "but KVM does not support this memory backing"); + return; } - if (kvm_vm_enable_cap(s, KVM_CAP_S390_HPAGE_1M, 0)) { - error_report("Memory backing with 1M pages was specified, " - "but KVM does not support this memory backing"); - return -EINVAL; + if (kvm_vm_enable_cap(kvm_state, KVM_CAP_S390_HPAGE_1M, 0)) { + error_setg(errp, "Memory backing with 1M pages was specified, " + "but KVM does not support this memory backing"); + return; } cap_hpage_1m = 1; - return 0; } int kvm_arch_init(MachineState *ms, KVMState *s) { MachineClass *mc = MACHINE_GET_CLASS(ms); - if (kvm_s390_configure_mempath_backing(s)) { - return -EINVAL; - } - mc->default_cpu_type = S390_CPU_TYPE_NAME("host"); cap_sync_regs = kvm_check_extension(s, KVM_CAP_SYNC_REGS); cap_async_pf = kvm_check_extension(s, KVM_CAP_ASYNC_PF); diff --git a/target/s390x/kvm_s390x.h b/target/s390x/kvm_s390x.h index 6e52287da3..caf985955b 100644 --- a/target/s390x/kvm_s390x.h +++ b/target/s390x/kvm_s390x.h @@ -36,6 +36,7 @@ int kvm_s390_cmma_active(void); void kvm_s390_cmma_reset(void); void kvm_s390_reset_vcpu(S390CPU *cpu); int kvm_s390_set_mem_limit(uint64_t new_limit, uint64_t *hw_limit); +void kvm_s390_set_max_pagesize(uint64_t pagesize, Error **errp); void kvm_s390_crypto_reset(void); void kvm_s390_restart_interrupt(S390CPU *cpu); void kvm_s390_stop_interrupt(S390CPU *cpu); diff --git a/tests/acceptance/empty_cpu_model.py b/tests/acceptance/empty_cpu_model.py new file mode 100644 index 0000000000..3f4f663582 --- /dev/null +++ b/tests/acceptance/empty_cpu_model.py @@ -0,0 +1,19 @@ +# Check for crash when using empty -cpu option +# +# Copyright (c) 2019 Red Hat, Inc. +# +# Author: +# Eduardo Habkost <ehabkost@redhat.com> +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# later. See the COPYING file in the top-level directory. +import subprocess +from avocado_qemu import Test + +class EmptyCPUModel(Test): + def test(self): + cmd = [self.qemu_bin, '-S', '-display', 'none', '-machine', 'none', '-cpu', ''] + r = subprocess.run(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE) + self.assertEquals(r.returncode, 1, "QEMU exit code should be 1") + self.assertEquals(r.stdout, b'', "QEMU stdout should be empty") + self.assertNotEquals(r.stderr, b'', "QEMU stderr shouldn't be empty") diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c index c591748aaf..24852d4c7d 100644 --- a/tests/boot-serial-test.c +++ b/tests/boot-serial-test.c @@ -114,7 +114,7 @@ static testdef_t tests[] = { { "sparc", "SS-4", "", "MB86904" }, { "sparc", "SS-600MP", "", "TMS390Z55" }, { "sparc64", "sun4u", "", "UltraSPARC" }, - { "s390x", "s390-ccw-virtio", "", "virtio device" }, + { "s390x", "s390-ccw-virtio", "", "device" }, { "m68k", "mcf5208evb", "", "TT", sizeof(kernel_mcf5208), kernel_mcf5208 }, { "microblaze", "petalogix-s3adsp1800", "", "TT", sizeof(kernel_pls3adsp1800), kernel_pls3adsp1800 }, diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c index 8565885420..f7f177d0ea 100644 --- a/util/mmap-alloc.c +++ b/util/mmap-alloc.c @@ -10,6 +10,13 @@ * later. See the COPYING file in the top-level directory. */ +#ifdef CONFIG_LINUX +#include <linux/mman.h> +#else /* !CONFIG_LINUX */ +#define MAP_SYNC 0x0 +#define MAP_SHARED_VALIDATE 0x0 +#endif /* CONFIG_LINUX */ + #include "qemu/osdep.h" #include "qemu/mmap-alloc.h" #include "qemu/host-utils.h" @@ -75,9 +82,14 @@ size_t qemu_mempath_getpagesize(const char *mem_path) return getpagesize(); } -void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) +void *qemu_ram_mmap(int fd, + size_t size, + size_t align, + bool shared, + bool is_pmem) { int flags; + int map_sync_flags = 0; int guardfd; size_t offset; size_t pagesize; @@ -128,9 +140,40 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) flags = MAP_FIXED; flags |= fd == -1 ? MAP_ANONYMOUS : 0; flags |= shared ? MAP_SHARED : MAP_PRIVATE; + if (shared && is_pmem) { + map_sync_flags = MAP_SYNC | MAP_SHARED_VALIDATE; + } + offset = QEMU_ALIGN_UP((uintptr_t)guardptr, align) - (uintptr_t)guardptr; - ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE, flags, fd, 0); + ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE, + flags | map_sync_flags, fd, 0); + + if (ptr == MAP_FAILED && map_sync_flags) { + if (errno == ENOTSUP) { + char *proc_link, *file_name; + int len; + proc_link = g_strdup_printf("/proc/self/fd/%d", fd); + file_name = g_malloc0(PATH_MAX); + len = readlink(proc_link, file_name, PATH_MAX - 1); + if (len < 0) { + len = 0; + } + file_name[len] = '\0'; + fprintf(stderr, "Warning: requesting persistence across crashes " + "for backend file %s failed. Proceeding without " + "persistence, data might become corrupted in case of host " + "crash.\n", file_name); + g_free(proc_link); + g_free(file_name); + } + /* + * if map failed with MAP_SHARED_VALIDATE | MAP_SYNC, + * we will remove these flags to handle compatibility. + */ + ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE, + flags, fd, 0); + } if (ptr == MAP_FAILED) { munmap(guardptr, total); diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 88dda9cd39..d97b1717d5 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -203,7 +203,7 @@ void *qemu_memalign(size_t alignment, size_t size) void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared) { size_t align = QEMU_VMALLOC_ALIGN; - void *ptr = qemu_ram_mmap(-1, size, align, shared); + void *ptr = qemu_ram_mmap(-1, size, align, shared, false); if (ptr == MAP_FAILED) { return NULL; diff --git a/vl.c b/vl.c index ff5dfb6fbc..4019a4387d 100644 --- a/vl.c +++ b/vl.c @@ -1465,45 +1465,34 @@ static int usb_parse(const char *cmdline) MachineState *current_machine; -static MachineClass *find_machine(const char *name) +static MachineClass *find_machine(const char *name, GSList *machines) { - GSList *el, *machines = object_class_get_list(TYPE_MACHINE, false); - MachineClass *mc = NULL; + GSList *el; for (el = machines; el; el = el->next) { - MachineClass *temp = el->data; + MachineClass *mc = el->data; - if (!strcmp(temp->name, name)) { - mc = temp; - break; - } - if (temp->alias && - !strcmp(temp->alias, name)) { - mc = temp; - break; + if (!strcmp(mc->name, name) || !g_strcmp0(mc->alias, name)) { + return mc; } } - g_slist_free(machines); - return mc; + return NULL; } -MachineClass *find_default_machine(void) +static MachineClass *find_default_machine(GSList *machines) { - GSList *el, *machines = object_class_get_list(TYPE_MACHINE, false); - MachineClass *mc = NULL; + GSList *el; for (el = machines; el; el = el->next) { - MachineClass *temp = el->data; + MachineClass *mc = el->data; - if (temp->is_default) { - mc = temp; - break; + if (mc->is_default) { + return mc; } } - g_slist_free(machines); - return mc; + return NULL; } MachineInfoList *qmp_query_machines(Error **errp) @@ -2585,22 +2574,12 @@ static gint machine_class_cmp(gconstpointer a, gconstpointer b) object_class_get_name(OBJECT_CLASS(mc1))); } - static MachineClass *machine_parse(const char *name) +static MachineClass *machine_parse(const char *name, GSList *machines) { - MachineClass *mc = NULL; - GSList *el, *machines = object_class_get_list(TYPE_MACHINE, false); + MachineClass *mc; + GSList *el; - if (name) { - mc = find_machine(name); - } - if (mc) { - g_slist_free(machines); - return mc; - } - if (name && !is_help_option(name)) { - error_report("unsupported machine type"); - error_printf("Use -machine help to list supported machines\n"); - } else { + if (is_help_option(name)) { printf("Supported machines are:\n"); machines = g_slist_sort(machines, machine_class_cmp); for (el = machines; el; el = el->next) { @@ -2612,10 +2591,16 @@ static gint machine_class_cmp(gconstpointer a, gconstpointer b) mc->is_default ? " (default)" : "", mc->deprecation_reason ? " (deprecated)" : ""); } + exit(0); } - g_slist_free(machines); - exit(!name || !is_help_option(name)); + mc = find_machine(name, machines); + if (!mc) { + error_report("unsupported machine type"); + error_printf("Use -machine help to list supported machines\n"); + exit(1); + } + return mc; } void qemu_add_exit_notifier(Notifier *notify) @@ -2706,7 +2691,8 @@ static const QEMUOption *lookup_opt(int argc, char **argv, static MachineClass *select_machine(void) { - MachineClass *machine_class = find_default_machine(); + GSList *machines = object_class_get_list(TYPE_MACHINE, false); + MachineClass *machine_class = find_default_machine(machines); const char *optarg; QemuOpts *opts; Location loc; @@ -2718,7 +2704,7 @@ static MachineClass *select_machine(void) optarg = qemu_opt_get(opts, "type"); if (optarg) { - machine_class = machine_parse(optarg); + machine_class = machine_parse(optarg, machines); } if (!machine_class) { @@ -2728,6 +2714,7 @@ static MachineClass *select_machine(void) } loc_pop(&loc); + g_slist_free(machines); return machine_class; } @@ -3002,7 +2989,7 @@ int main(int argc, char **argv, char **envp) const char *optarg; const char *loadvm = NULL; MachineClass *machine_class; - const char *cpu_model; + const char *cpu_option; const char *vga_model = NULL; const char *qtest_chrdev = NULL; const char *qtest_log = NULL; @@ -3081,7 +3068,7 @@ int main(int argc, char **argv, char **envp) QLIST_INIT (&vm_change_state_head); os_setup_early_signal_handling(); - cpu_model = NULL; + cpu_option = NULL; snapshot = 0; nb_nics = 0; @@ -3133,7 +3120,7 @@ int main(int argc, char **argv, char **envp) switch(popt->index) { case QEMU_OPTION_cpu: /* hw initialization will check this */ - cpu_model = optarg; + cpu_option = optarg; break; case QEMU_OPTION_hda: case QEMU_OPTION_hdb: @@ -4050,8 +4037,8 @@ int main(int argc, char **argv, char **envp) qemu_set_hw_version(machine_class->hw_version); } - if (cpu_model && is_help_option(cpu_model)) { - list_cpus(cpu_model); + if (cpu_option && is_help_option(cpu_option)) { + list_cpus(cpu_option); exit(0); } @@ -4299,9 +4286,9 @@ int main(int argc, char **argv, char **envp) * Global properties get set up by qdev_prop_register_global(), * called from user_register_global_props(), and certain option * desugaring. Also in CPU feature desugaring (buried in - * parse_cpu_model()), which happens below this point, but may + * parse_cpu_option()), which happens below this point, but may * only target the CPU type, which can only be created after - * parse_cpu_model() returned the type. + * parse_cpu_option() returned the type. * * Machine compat properties: object_set_machine_compat_props(). * Accelerator compat props: object_set_accelerator_compat_props(), @@ -4465,8 +4452,8 @@ int main(int argc, char **argv, char **envp) /* parse features once if machine provides default cpu_type */ current_machine->cpu_type = machine_class->default_cpu_type; - if (cpu_model) { - current_machine->cpu_type = parse_cpu_model(cpu_model); + if (cpu_option) { + current_machine->cpu_type = parse_cpu_option(cpu_option); } parse_numa_opts(current_machine); |