diff options
Diffstat (limited to 'hw')
| -rw-r--r-- | hw/arm/sbsa-ref.c | 35 | ||||
| -rw-r--r-- | hw/arm/virt.c | 2 | ||||
| -rw-r--r-- | hw/core/machine.c | 42 | ||||
| -rw-r--r-- | hw/display/virtio-gpu-udmabuf.c | 3 | ||||
| -rw-r--r-- | hw/display/virtio-gpu-virgl.c | 43 | ||||
| -rw-r--r-- | hw/display/virtio-gpu.c | 99 | ||||
| -rw-r--r-- | hw/hppa/machine.c | 15 | ||||
| -rw-r--r-- | hw/intc/pnv_xive.c | 11 | ||||
| -rw-r--r-- | hw/intc/pnv_xive2.c | 44 | ||||
| -rw-r--r-- | hw/intc/spapr_xive.c | 16 | ||||
| -rw-r--r-- | hw/intc/xive.c | 57 | ||||
| -rw-r--r-- | hw/nvme/ctrl.c | 34 | ||||
| -rw-r--r-- | hw/nvme/ns.c | 53 | ||||
| -rw-r--r-- | hw/nvme/subsys.c | 6 | ||||
| -rw-r--r-- | hw/pci-host/pnv_phb4.c | 14 | ||||
| -rw-r--r-- | hw/ppc/meson.build | 1 | ||||
| -rw-r--r-- | hw/ppc/pnv.c | 3 | ||||
| -rw-r--r-- | hw/ppc/ppc.c | 6 | ||||
| -rw-r--r-- | hw/ppc/ppc440_bamboo.c | 17 | ||||
| -rw-r--r-- | hw/ppc/prep.c | 20 | ||||
| -rw-r--r-- | hw/ppc/spapr.c | 18 | ||||
| -rw-r--r-- | hw/ppc/spapr_caps.c | 14 | ||||
| -rw-r--r-- | hw/ppc/spapr_cpu_core.c | 7 | ||||
| -rw-r--r-- | hw/ppc/spapr_hcall.c | 335 | ||||
| -rw-r--r-- | hw/ppc/spapr_nested.c | 395 | ||||
| -rw-r--r-- | hw/remote/proxy.c | 1 | ||||
| -rw-r--r-- | hw/riscv/spike.c | 2 | ||||
| -rw-r--r-- | hw/riscv/virt.c | 2 |
28 files changed, 838 insertions, 457 deletions
diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c index de21200ff9..b774d80291 100644 --- a/hw/arm/sbsa-ref.c +++ b/hw/arm/sbsa-ref.c @@ -65,6 +65,7 @@ enum { SBSA_CPUPERIPHS, SBSA_GIC_DIST, SBSA_GIC_REDIST, + SBSA_GIC_ITS, SBSA_SECURE_EC, SBSA_GWDT_WS0, SBSA_GWDT_REFRESH, @@ -108,6 +109,7 @@ static const MemMapEntry sbsa_ref_memmap[] = { [SBSA_CPUPERIPHS] = { 0x40000000, 0x00040000 }, [SBSA_GIC_DIST] = { 0x40060000, 0x00010000 }, [SBSA_GIC_REDIST] = { 0x40080000, 0x04000000 }, + [SBSA_GIC_ITS] = { 0x44081000, 0x00020000 }, [SBSA_SECURE_EC] = { 0x50000000, 0x00001000 }, [SBSA_GWDT_REFRESH] = { 0x50010000, 0x00001000 }, [SBSA_GWDT_CONTROL] = { 0x50011000, 0x00001000 }, @@ -181,8 +183,15 @@ static void sbsa_fdt_add_gic_node(SBSAMachineState *sms) 2, sbsa_ref_memmap[SBSA_GIC_REDIST].base, 2, sbsa_ref_memmap[SBSA_GIC_REDIST].size); + nodename = g_strdup_printf("/intc/its"); + qemu_fdt_add_subnode(sms->fdt, nodename); + qemu_fdt_setprop_sized_cells(sms->fdt, nodename, "reg", + 2, sbsa_ref_memmap[SBSA_GIC_ITS].base, + 2, sbsa_ref_memmap[SBSA_GIC_ITS].size); + g_free(nodename); } + /* * Firmware on this machine only uses ACPI table to load OS, these limited * device tree nodes are just to let firmware know the info which varies from @@ -219,7 +228,7 @@ static void create_fdt(SBSAMachineState *sms) * fw compatibility. */ qemu_fdt_setprop_cell(fdt, "/", "machine-version-major", 0); - qemu_fdt_setprop_cell(fdt, "/", "machine-version-minor", 1); + qemu_fdt_setprop_cell(fdt, "/", "machine-version-minor", 2); if (ms->numa_state->have_numa_distance) { int size = nb_numa_nodes * nb_numa_nodes * 3 * sizeof(uint32_t); @@ -409,7 +418,20 @@ static void create_secure_ram(SBSAMachineState *sms, memory_region_add_subregion(secure_sysmem, base, secram); } -static void create_gic(SBSAMachineState *sms) +static void create_its(SBSAMachineState *sms) +{ + const char *itsclass = its_class_name(); + DeviceState *dev; + + dev = qdev_new(itsclass); + + object_property_set_link(OBJECT(dev), "parent-gicv3", OBJECT(sms->gic), + &error_abort); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, sbsa_ref_memmap[SBSA_GIC_ITS].base); +} + +static void create_gic(SBSAMachineState *sms, MemoryRegion *mem) { unsigned int smp_cpus = MACHINE(sms)->smp.cpus; SysBusDevice *gicbusdev; @@ -436,6 +458,10 @@ static void create_gic(SBSAMachineState *sms) qdev_prop_set_uint32(sms->gic, "len-redist-region-count", 1); qdev_prop_set_uint32(sms->gic, "redist-region-count[0]", redist0_count); + object_property_set_link(OBJECT(sms->gic), "sysmem", + OBJECT(mem), &error_fatal); + qdev_prop_set_bit(sms->gic, "has-lpi", true); + gicbusdev = SYS_BUS_DEVICE(sms->gic); sysbus_realize_and_unref(gicbusdev, &error_fatal); sysbus_mmio_map(gicbusdev, 0, sbsa_ref_memmap[SBSA_GIC_DIST].base); @@ -482,6 +508,7 @@ static void create_gic(SBSAMachineState *sms) sysbus_connect_irq(gicbusdev, i + 3 * smp_cpus, qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ)); } + create_its(sms); } static void create_uart(const SBSAMachineState *sms, int uart, @@ -788,7 +815,7 @@ static void sbsa_ref_init(MachineState *machine) create_secure_ram(sms, secure_sysmem); - create_gic(sms); + create_gic(sms, sysmem); create_uart(sms, SBSA_UART, sysmem, serial_hd(0)); create_uart(sms, SBSA_SECURE_UART, secure_sysmem, serial_hd(1)); @@ -883,6 +910,8 @@ static void sbsa_ref_class_init(ObjectClass *oc, void *data) mc->possible_cpu_arch_ids = sbsa_ref_possible_cpu_arch_ids; mc->cpu_index_to_instance_props = sbsa_ref_cpu_index_to_props; mc->get_default_cpu_node_id = sbsa_ref_get_default_cpu_node_id; + /* platform instead of architectural choice */ + mc->cpu_cluster_has_numa_boundary = true; } static const TypeInfo sbsa_ref_info = { diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 9b9f7d9c68..3937e30477 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -3033,6 +3033,8 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) mc->smp_props.clusters_supported = true; mc->auto_enable_numa_with_memhp = true; mc->auto_enable_numa_with_memdev = true; + /* platform instead of architectural choice */ + mc->cpu_cluster_has_numa_boundary = true; mc->default_ram_id = "mach-virt.ram"; mc->default_nic = "virtio-net-pci"; diff --git a/hw/core/machine.c b/hw/core/machine.c index 1000406211..46f8f9a2b0 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -1262,6 +1262,45 @@ static void machine_numa_finish_cpu_init(MachineState *machine) g_string_free(s, true); } +static void validate_cpu_cluster_to_numa_boundary(MachineState *ms) +{ + MachineClass *mc = MACHINE_GET_CLASS(ms); + NumaState *state = ms->numa_state; + const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); + const CPUArchId *cpus = possible_cpus->cpus; + int i, j; + + if (state->num_nodes <= 1 || possible_cpus->len <= 1) { + return; + } + + /* + * The Linux scheduling domain can't be parsed when the multiple CPUs + * in one cluster have been associated with different NUMA nodes. However, + * it's fine to associate one NUMA node with CPUs in different clusters. + */ + for (i = 0; i < possible_cpus->len; i++) { + for (j = i + 1; j < possible_cpus->len; j++) { + if (cpus[i].props.has_socket_id && + cpus[i].props.has_cluster_id && + cpus[i].props.has_node_id && + cpus[j].props.has_socket_id && + cpus[j].props.has_cluster_id && + cpus[j].props.has_node_id && + cpus[i].props.socket_id == cpus[j].props.socket_id && + cpus[i].props.cluster_id == cpus[j].props.cluster_id && + cpus[i].props.node_id != cpus[j].props.node_id) { + warn_report("CPU-%d and CPU-%d in socket-%" PRId64 "-cluster-%" PRId64 + " have been associated with node-%" PRId64 " and node-%" PRId64 + " respectively. It can cause OSes like Linux to" + " misbehave", i, j, cpus[i].props.socket_id, + cpus[i].props.cluster_id, cpus[i].props.node_id, + cpus[j].props.node_id); + } + } + } +} + MemoryRegion *machine_consume_memdev(MachineState *machine, HostMemoryBackend *backend) { @@ -1355,6 +1394,9 @@ void machine_run_board_init(MachineState *machine, const char *mem_path, Error * numa_complete_configuration(machine); if (machine->numa_state->num_nodes) { machine_numa_finish_cpu_init(machine); + if (machine_class->cpu_cluster_has_numa_boundary) { + validate_cpu_cluster_to_numa_boundary(machine); + } } } diff --git a/hw/display/virtio-gpu-udmabuf.c b/hw/display/virtio-gpu-udmabuf.c index 69e2cf0bd6..ef1a740de5 100644 --- a/hw/display/virtio-gpu-udmabuf.c +++ b/hw/display/virtio-gpu-udmabuf.c @@ -132,7 +132,8 @@ void virtio_gpu_init_udmabuf(struct virtio_gpu_simple_resource *res) void *pdata = NULL; res->dmabuf_fd = -1; - if (res->iov_cnt == 1) { + if (res->iov_cnt == 1 && + res->iov[0].iov_len < 4096) { pdata = res->iov[0].iov_base; } else { virtio_gpu_create_udmabuf(res); diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c index 1c47603d40..8bb7a2c21f 100644 --- a/hw/display/virtio-gpu-virgl.c +++ b/hw/display/virtio-gpu-virgl.c @@ -18,9 +18,17 @@ #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-gpu.h" +#include "ui/egl-helpers.h" + #include <virglrenderer.h> -static struct virgl_renderer_callbacks virtio_gpu_3d_cbs; +#if VIRGL_RENDERER_CALLBACKS_VERSION >= 4 +static void * +virgl_get_egl_display(G_GNUC_UNUSED void *cookie) +{ + return qemu_egl_display; +} +#endif static void virgl_cmd_create_resource_2d(VirtIOGPU *g, struct virtio_gpu_ctrl_command *cmd) @@ -145,7 +153,6 @@ static void virgl_cmd_set_scanout(VirtIOGPU *g, struct virtio_gpu_ctrl_command *cmd) { struct virtio_gpu_set_scanout ss; - struct virgl_renderer_resource_info info; int ret; VIRTIO_GPU_FILL_CMD(ss); @@ -160,10 +167,20 @@ static void virgl_cmd_set_scanout(VirtIOGPU *g, } g->parent_obj.enable = 1; - memset(&info, 0, sizeof(info)); - if (ss.resource_id && ss.r.width && ss.r.height) { + struct virgl_renderer_resource_info info; + void *d3d_tex2d = NULL; + +#ifdef HAVE_VIRGL_D3D_INFO_EXT + struct virgl_renderer_resource_info_ext ext; + memset(&ext, 0, sizeof(ext)); + ret = virgl_renderer_resource_get_info_ext(ss.resource_id, &ext); + info = ext.base; + d3d_tex2d = ext.d3d_tex2d; +#else + memset(&info, 0, sizeof(info)); ret = virgl_renderer_resource_get_info(ss.resource_id, &info); +#endif if (ret == -1) { qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal resource specified %d\n", @@ -178,7 +195,8 @@ static void virgl_cmd_set_scanout(VirtIOGPU *g, g->parent_obj.scanout[ss.scanout_id].con, info.tex_id, info.flags & VIRTIO_GPU_RESOURCE_FLAG_Y_0_TOP, info.width, info.height, - ss.r.x, ss.r.y, ss.r.width, ss.r.height); + ss.r.x, ss.r.y, ss.r.width, ss.r.height, + d3d_tex2d); } else { dpy_gfx_replace_surface( g->parent_obj.scanout[ss.scanout_id].con, NULL); @@ -607,8 +625,21 @@ void virtio_gpu_virgl_reset(VirtIOGPU *g) int virtio_gpu_virgl_init(VirtIOGPU *g) { int ret; + uint32_t flags = 0; + +#if VIRGL_RENDERER_CALLBACKS_VERSION >= 4 + if (qemu_egl_display) { + virtio_gpu_3d_cbs.version = 4; + virtio_gpu_3d_cbs.get_egl_display = virgl_get_egl_display; + } +#endif +#ifdef VIRGL_RENDERER_D3D11_SHARE_TEXTURE + if (qemu_egl_angle_d3d) { + flags |= VIRGL_RENDERER_D3D11_SHARE_TEXTURE; + } +#endif - ret = virgl_renderer_init(g, 0, &virtio_gpu_3d_cbs); + ret = virgl_renderer_init(g, flags, &virtio_gpu_3d_cbs); if (ret != 0) { error_report("virgl could not be initialized: %d", ret); return ret; diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index 66cddd94d9..347e17d490 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -258,6 +258,16 @@ static uint32_t calc_image_hostmem(pixman_format_code_t pformat, return height * stride; } +#ifdef WIN32 +static void +win32_pixman_image_destroy(pixman_image_t *image, void *data) +{ + HANDLE handle = data; + + qemu_win32_map_free(pixman_image_get_data(image), handle, &error_warn); +} +#endif + static void virtio_gpu_resource_create_2d(VirtIOGPU *g, struct virtio_gpu_ctrl_command *cmd) { @@ -304,12 +314,27 @@ static void virtio_gpu_resource_create_2d(VirtIOGPU *g, res->hostmem = calc_image_hostmem(pformat, c2d.width, c2d.height); if (res->hostmem + g->hostmem < g->conf_max_hostmem) { + void *bits = NULL; +#ifdef WIN32 + bits = qemu_win32_map_alloc(res->hostmem, &res->handle, &error_warn); + if (!bits) { + goto end; + } +#endif res->image = pixman_image_create_bits(pformat, c2d.width, c2d.height, - NULL, 0); + bits, res->hostmem / c2d.height); +#ifdef WIN32 + if (res->image) { + pixman_image_set_destroy_function(res->image, win32_pixman_image_destroy, res->handle); + } +#endif } +#ifdef WIN32 +end: +#endif if (!res->image) { qemu_log_mask(LOG_GUEST_ERROR, "%s: resource creation failed %d %d %d\n", @@ -438,11 +463,11 @@ static void virtio_gpu_transfer_to_host_2d(VirtIOGPU *g, struct virtio_gpu_ctrl_command *cmd) { struct virtio_gpu_simple_resource *res; - int h; + int h, bpp; uint32_t src_offset, dst_offset, stride; - int bpp; pixman_format_code_t format; struct virtio_gpu_transfer_to_host_2d t2d; + void *img_data; VIRTIO_GPU_FILL_CMD(t2d); virtio_gpu_t2d_bswap(&t2d); @@ -471,23 +496,23 @@ static void virtio_gpu_transfer_to_host_2d(VirtIOGPU *g, format = pixman_image_get_format(res->image); bpp = DIV_ROUND_UP(PIXMAN_FORMAT_BPP(format), 8); stride = pixman_image_get_stride(res->image); + img_data = pixman_image_get_data(res->image); - if (t2d.offset || t2d.r.x || t2d.r.y || - t2d.r.width != pixman_image_get_width(res->image)) { - void *img_data = pixman_image_get_data(res->image); + if (t2d.r.x || t2d.r.width != pixman_image_get_width(res->image)) { for (h = 0; h < t2d.r.height; h++) { src_offset = t2d.offset + stride * h; dst_offset = (t2d.r.y + h) * stride + (t2d.r.x * bpp); iov_to_buf(res->iov, res->iov_cnt, src_offset, - (uint8_t *)img_data - + dst_offset, t2d.r.width * bpp); + (uint8_t *)img_data + dst_offset, + t2d.r.width * bpp); } } else { - iov_to_buf(res->iov, res->iov_cnt, 0, - pixman_image_get_data(res->image), - pixman_image_get_stride(res->image) - * pixman_image_get_height(res->image)); + src_offset = t2d.offset; + dst_offset = t2d.r.y * stride + t2d.r.x * bpp; + iov_to_buf(res->iov, res->iov_cnt, src_offset, + (uint8_t *)img_data + dst_offset, + stride * t2d.r.height); } } @@ -498,6 +523,8 @@ static void virtio_gpu_resource_flush(VirtIOGPU *g, struct virtio_gpu_resource_flush rf; struct virtio_gpu_scanout *scanout; pixman_region16_t flush_region; + bool within_bounds = false; + bool update_submitted = false; int i; VIRTIO_GPU_FILL_CMD(rf); @@ -518,13 +545,28 @@ static void virtio_gpu_resource_flush(VirtIOGPU *g, rf.r.x < scanout->x + scanout->width && rf.r.x + rf.r.width >= scanout->x && rf.r.y < scanout->y + scanout->height && - rf.r.y + rf.r.height >= scanout->y && - console_has_gl(scanout->con)) { - dpy_gl_update(scanout->con, 0, 0, scanout->width, - scanout->height); + rf.r.y + rf.r.height >= scanout->y) { + within_bounds = true; + + if (console_has_gl(scanout->con)) { + dpy_gl_update(scanout->con, 0, 0, scanout->width, + scanout->height); + update_submitted = true; + } } } - return; + + if (update_submitted) { + return; + } + if (!within_bounds) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: flush bounds outside scanouts" + " bounds for flush %d: %d %d %d %d\n", + __func__, rf.resource_id, rf.r.x, rf.r.y, + rf.r.width, rf.r.height); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; + return; + } } if (!res->blob && @@ -634,8 +676,10 @@ static void virtio_gpu_do_set_scanout(VirtIOGPU *g, if (console_has_gl(scanout->con)) { if (!virtio_gpu_update_dmabuf(g, scanout_id, res, fb, r)) { virtio_gpu_update_scanout(g, scanout_id, res, r); - return; + } else { + *error = VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY; } + return; } data = res->blob; @@ -666,6 +710,9 @@ static void virtio_gpu_do_set_scanout(VirtIOGPU *g, *error = VIRTIO_GPU_RESP_ERR_UNSPEC; return; } +#ifdef WIN32 + qemu_displaysurface_win32_set_handle(scanout->ds, res->handle, fb->offset); +#endif pixman_image_unref(rect); dpy_gfx_replace_surface(g->parent_obj.scanout[scanout_id].con, @@ -1209,6 +1256,7 @@ static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size, struct virtio_gpu_simple_resource *res; struct virtio_gpu_scanout *scanout; uint32_t resource_id, pformat; + void *bits = NULL; int i; g->hostmem = 0; @@ -1233,15 +1281,23 @@ static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size, g_free(res); return -EINVAL; } + + res->hostmem = calc_image_hostmem(pformat, res->width, res->height); +#ifdef WIN32 + bits = qemu_win32_map_alloc(res->hostmem, &res->handle, &error_warn); + if (!bits) { + g_free(res); + return -EINVAL; + } +#endif res->image = pixman_image_create_bits(pformat, res->width, res->height, - NULL, 0); + bits, res->hostmem / res->height); if (!res->image) { g_free(res); return -EINVAL; } - res->hostmem = calc_image_hostmem(pformat, res->width, res->height); res->addrs = g_new(uint64_t, res->iov_cnt); res->iov = g_new(struct iovec, res->iov_cnt); @@ -1302,6 +1358,9 @@ static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size, if (!scanout->ds) { return -EINVAL; } +#ifdef WIN32 + qemu_displaysurface_win32_set_handle(scanout->ds, res->handle, 0); +#endif dpy_gfx_replace_surface(scanout->con, scanout->ds); dpy_gfx_update_full(scanout->con); diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c index b00a91ecfe..866e11d208 100644 --- a/hw/hppa/machine.c +++ b/hw/hppa/machine.c @@ -122,6 +122,7 @@ static FWCfgState *create_fw_cfg(MachineState *ms) { FWCfgState *fw_cfg; uint64_t val; + const char qemu_version[] = QEMU_VERSION; fw_cfg = fw_cfg_init_mem(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4); fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, ms->smp.cpus); @@ -147,6 +148,10 @@ static FWCfgState *create_fw_cfg(MachineState *ms) fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, ms->boot_config.order[0]); qemu_register_boot_set(fw_cfg_boot_set, fw_cfg); + fw_cfg_add_file(fw_cfg, "/etc/qemu-version", + g_memdup(qemu_version, sizeof(qemu_version)), + sizeof(qemu_version)); + return fw_cfg; } @@ -418,10 +423,16 @@ static void hppa_machine_reset(MachineState *ms, ShutdownCause reason) /* Start all CPUs at the firmware entry point. * Monarch CPU will initialize firmware, secondary CPUs - * will enter a small idle look and wait for rendevouz. */ + * will enter a small idle loop and wait for rendevouz. */ for (i = 0; i < smp_cpus; i++) { - cpu_set_pc(CPU(cpu[i]), firmware_entry); + CPUState *cs = CPU(cpu[i]); + + cpu_set_pc(cs, firmware_entry); + cpu[i]->env.psw = PSW_Q; cpu[i]->env.gr[5] = CPU_HPA + i * 0x1000; + + cs->exception_index = -1; + cs->halted = 0; } /* already initialized by machine_hppa_init()? */ diff --git a/hw/intc/pnv_xive.c b/hw/intc/pnv_xive.c index 622f9d28b7..e536b3ec26 100644 --- a/hw/intc/pnv_xive.c +++ b/hw/intc/pnv_xive.c @@ -479,6 +479,16 @@ static int pnv_xive_match_nvt(XivePresenter *xptr, uint8_t format, return count; } +static uint32_t pnv_xive_presenter_get_config(XivePresenter *xptr) +{ + uint32_t cfg = 0; + + /* TIMA GEN1 is all P9 knows */ + cfg |= XIVE_PRESENTER_GEN1_TIMA_OS; + + return cfg; +} + static uint8_t pnv_xive_get_block_id(XiveRouter *xrtr) { return pnv_xive_block_id(PNV_XIVE(xrtr)); @@ -1991,6 +2001,7 @@ static void pnv_xive_class_init(ObjectClass *klass, void *data) xnc->notify = pnv_xive_notify; xpc->match_nvt = pnv_xive_match_nvt; + xpc->get_config = pnv_xive_presenter_get_config; }; static const TypeInfo pnv_xive_info = { diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c index ec1edeb385..ed438a20ed 100644 --- a/hw/intc/pnv_xive2.c +++ b/hw/intc/pnv_xive2.c @@ -501,6 +501,17 @@ static int pnv_xive2_match_nvt(XivePresenter *xptr, uint8_t format, return count; } +static uint32_t pnv_xive2_presenter_get_config(XivePresenter *xptr) +{ + PnvXive2 *xive = PNV_XIVE2(xptr); + uint32_t cfg = 0; + + if (xive->cq_regs[CQ_XIVE_CFG >> 3] & CQ_XIVE_CFG_GEN1_TIMA_OS) { + cfg |= XIVE_PRESENTER_GEN1_TIMA_OS; + } + return cfg; +} + static uint8_t pnv_xive2_get_block_id(Xive2Router *xrtr) { return pnv_xive2_block_id(PNV_XIVE2(xrtr)); @@ -1645,17 +1656,6 @@ static const MemoryRegionOps pnv_xive2_ic_tm_indirect_ops = { /* * TIMA ops */ - -/* - * Special TIMA offsets to handle accesses in a POWER10 way. - * - * Only the CAM line updates done by the hypervisor should be handled - * specifically. - */ -#define HV_PAGE_OFFSET (XIVE_TM_HV_PAGE << TM_SHIFT) -#define HV_PUSH_OS_CTX_OFFSET (HV_PAGE_OFFSET | (TM_QW1_OS + TM_WORD2)) -#define HV_PULL_OS_CTX_OFFSET (HV_PAGE_OFFSET | TM_SPC_PULL_OS_CTX) - static void pnv_xive2_tm_write(void *opaque, hwaddr offset, uint64_t value, unsigned size) { @@ -1663,18 +1663,7 @@ static void pnv_xive2_tm_write(void *opaque, hwaddr offset, PnvXive2 *xive = pnv_xive2_tm_get_xive(cpu); XiveTCTX *tctx = XIVE_TCTX(pnv_cpu_state(cpu)->intc); XivePresenter *xptr = XIVE_PRESENTER(xive); - bool gen1_tima_os = - xive->cq_regs[CQ_XIVE_CFG >> 3] & CQ_XIVE_CFG_GEN1_TIMA_OS; - - offset &= TM_ADDRESS_MASK; - - /* TODO: should we switch the TM ops table instead ? */ - if (!gen1_tima_os && offset == HV_PUSH_OS_CTX_OFFSET) { - xive2_tm_push_os_ctx(xptr, tctx, offset, value, size); - return; - } - /* Other TM ops are the same as XIVE1 */ xive_tctx_tm_write(xptr, tctx, offset, value, size); } @@ -1684,17 +1673,7 @@ static uint64_t pnv_xive2_tm_read(void *opaque, hwaddr offset, unsigned size) PnvXive2 *xive = pnv_xive2_tm_get_xive(cpu); XiveTCTX *tctx = XIVE_TCTX(pnv_cpu_state(cpu)->intc); XivePresenter *xptr = XIVE_PRESENTER(xive); - bool gen1_tima_os = - xive->cq_regs[CQ_XIVE_CFG >> 3] & CQ_XIVE_CFG_GEN1_TIMA_OS; - - offset &= TM_ADDRESS_MASK; - - /* TODO: should we switch the TM ops table instead ? */ - if (!gen1_tima_os && offset == HV_PULL_OS_CTX_OFFSET) { - return xive2_tm_pull_os_ctx(xptr, tctx, offset, size); - } - /* Other TM ops are the same as XIVE1 */ return xive_tctx_tm_read(xptr, tctx, offset, size); } @@ -1987,6 +1966,7 @@ static void pnv_xive2_class_init(ObjectClass *klass, void *data) xnc->notify = pnv_xive2_notify; xpc->match_nvt = pnv_xive2_match_nvt; + xpc->get_config = pnv_xive2_presenter_get_config; }; static const TypeInfo pnv_xive2_info = { diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c index dc641cc604..8bcab2846c 100644 --- a/hw/intc/spapr_xive.c +++ b/hw/intc/spapr_xive.c @@ -475,6 +475,21 @@ static int spapr_xive_match_nvt(XivePresenter *xptr, uint8_t format, return count; } +static uint32_t spapr_xive_presenter_get_config(XivePresenter *xptr) +{ + uint32_t cfg = 0; + + /* + * Let's claim GEN1 TIMA format. If running with KVM on P10, the + * correct answer is deep in the hardware and not accessible to + * us. But it shouldn't matter as it only affects the presenter + * as seen by a guest OS. + */ + cfg |= XIVE_PRESENTER_GEN1_TIMA_OS; + + return cfg; +} + static uint8_t spapr_xive_get_block_id(XiveRouter *xrtr) { return SPAPR_XIVE_BLOCK_ID; @@ -832,6 +847,7 @@ static void spapr_xive_class_init(ObjectClass *klass, void *data) sicc->post_load = spapr_xive_post_load; xpc->match_nvt = spapr_xive_match_nvt; + xpc->get_config = spapr_xive_presenter_get_config; xpc->in_kernel = spapr_xive_in_kernel_xptr; } diff --git a/hw/intc/xive.c b/hw/intc/xive.c index 5204c14b87..84c079b034 100644 --- a/hw/intc/xive.c +++ b/hw/intc/xive.c @@ -20,6 +20,7 @@ #include "monitor/monitor.h" #include "hw/irq.h" #include "hw/ppc/xive.h" +#include "hw/ppc/xive2.h" #include "hw/ppc/xive_regs.h" #include "trace.h" @@ -461,6 +462,13 @@ static void xive_tm_push_os_ctx(XivePresenter *xptr, XiveTCTX *tctx, } } +static uint32_t xive_presenter_get_config(XivePresenter *xptr) +{ + XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr); + + return xpc->get_config(xptr); +} + /* * Define a mapping of "special" operations depending on the TIMA page * offset and the size of the operation. @@ -497,14 +505,47 @@ static const XiveTmOp xive_tm_operations[] = { { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 8, NULL, xive_tm_pull_pool_ctx }, }; -static const XiveTmOp *xive_tm_find_op(hwaddr offset, unsigned size, bool write) +static const XiveTmOp xive2_tm_operations[] = { + /* + * MMIOs below 2K : raw values and special operations without side + * effects + */ + { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR, 1, xive_tm_set_os_cppr, NULL }, + { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2, 4, xive2_tm_push_os_ctx, NULL }, + { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR, 1, xive_tm_set_hv_cppr, NULL }, + { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, xive_tm_vt_push, NULL }, + { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, NULL, xive_tm_vt_poll }, + + /* MMIOs above 2K : special operations with side effects */ + { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG, 2, NULL, xive_tm_ack_os_reg }, + { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, xive_tm_set_os_pending, NULL }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 4, NULL, xive2_tm_pull_os_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 8, NULL, xive2_tm_pull_os_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG, 2, NULL, xive_tm_ack_hv_reg }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 4, NULL, xive_tm_pull_pool_ctx }, + { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 8, NULL, xive_tm_pull_pool_ctx }, +}; + +static const XiveTmOp *xive_tm_find_op(XivePresenter *xptr, hwaddr offset, + unsigned size, bool write) { uint8_t page_offset = (offset >> TM_SHIFT) & 0x3; uint32_t op_offset = offset & TM_ADDRESS_MASK; - int i; + const XiveTmOp *tm_ops; + int i, tm_ops_count; + uint32_t cfg; + + cfg = xive_presenter_get_config(xptr); + if (cfg & XIVE_PRESENTER_GEN1_TIMA_OS) { + tm_ops = xive_tm_operations; + tm_ops_count = ARRAY_SIZE(xive_tm_operations); + } else { + tm_ops = xive2_tm_operations; + tm_ops_count = ARRAY_SIZE(xive2_tm_operations); + } - for (i = 0; i < ARRAY_SIZE(xive_tm_operations); i++) { - const XiveTmOp *xto = &xive_tm_operations[i]; + for (i = 0; i < tm_ops_count; i++) { + const XiveTmOp *xto = &tm_ops[i]; /* Accesses done from a more privileged TIMA page is allowed */ if (xto->page_offset >= page_offset && @@ -535,7 +576,7 @@ void xive_tctx_tm_write(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, * First, check for special operations in the 2K region */ if (offset & TM_SPECIAL_OP) { - xto = xive_tm_find_op(offset, size, true); + xto = xive_tm_find_op(tctx->xptr, offset, size, true); if (!xto) { qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA " "@%"HWADDR_PRIx"\n", offset); @@ -548,7 +589,7 @@ void xive_tctx_tm_write(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, /* * Then, for special operations in the region below 2K. */ - xto = xive_tm_find_op(offset, size, true); + xto = xive_tm_find_op(tctx->xptr, offset, size, true); if (xto) { xto->write_handler(xptr, tctx, offset, value, size); return; @@ -574,7 +615,7 @@ uint64_t xive_tctx_tm_read(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, * First, check for special operations in the 2K region */ if (offset & TM_SPECIAL_OP) { - xto = xive_tm_find_op(offset, size, false); + xto = xive_tm_find_op(tctx->xptr, offset, size, false); if (!xto) { qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access to TIMA" "@%"HWADDR_PRIx"\n", offset); @@ -587,7 +628,7 @@ uint64_t xive_tctx_tm_read(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset, /* * Then, for special operations in the region below 2K. */ - xto = xive_tm_find_op(offset, size, false); + xto = xive_tm_find_op(tctx->xptr, offset, size, false); if (xto) { ret = xto->read_handler(xptr, tctx, offset, size); goto out; diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index fd917fcda1..355668bdf8 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -43,7 +43,14 @@ * subsys=<subsys_id> * -device nvme-ns,drive=<drive_id>,bus=<bus_name>,nsid=<nsid>,\ * zoned=<true|false[optional]>, \ - * subsys=<subsys_id>,detached=<true|false[optional]> + * subsys=<subsys_id>,shared=<true|false[optional]>, \ + * detached=<true|false[optional]>, \ + * zoned.zone_size=<N[optional]>, \ + * zoned.zone_capacity=<N[optional]>, \ + * zoned.descr_ext_size=<N[optional]>, \ + * zoned.max_active=<N[optional]>, \ + * zoned.max_open=<N[optional]>, \ + * zoned.cross_read=<true|false[optional]> * * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at * offset 0 in BAR2 and supports only WDS, RDS and SQS for now. By default, the @@ -1748,6 +1755,7 @@ static void nvme_aio_err(NvmeRequest *req, int ret) case NVME_CMD_WRITE: case NVME_CMD_WRITE_ZEROES: case NVME_CMD_ZONE_APPEND: + case NVME_CMD_COPY: status = NVME_WRITE_FAULT; break; default: @@ -2847,6 +2855,25 @@ static void nvme_copy_source_range_parse(void *ranges, int idx, uint8_t format, } } +static inline uint16_t nvme_check_copy_mcl(NvmeNamespace *ns, + NvmeCopyAIOCB *iocb, uint16_t nr) +{ + uint32_t copy_len = 0; + + for (int idx = 0; idx < nr; idx++) { + uint32_t nlb; + nvme_copy_source_range_parse(iocb->ranges, idx, iocb->format, NULL, + &nlb, NULL, NULL, NULL); + copy_len += nlb + 1; + } + + if (copy_len > ns->id_ns.mcl) { + return NVME_CMD_SIZE_LIMIT | NVME_DNR; + } + + return NVME_SUCCESS; +} + static void nvme_copy_out_completed_cb(void *opaque, int ret) { NvmeCopyAIOCB *iocb = opaque; @@ -3159,6 +3186,11 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) } } + status = nvme_check_copy_mcl(ns, iocb, nr); + if (status) { + goto invalid; + } + iocb->req = req; iocb->ret = 0; iocb->nr = nr; diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c index 547c0b1543..44aba8f4d9 100644 --- a/hw/nvme/ns.c +++ b/hw/nvme/ns.c @@ -400,8 +400,9 @@ static bool nvme_ns_init_fdp(NvmeNamespace *ns, Error **errp) NvmeRuHandle *ruh; uint8_t lbafi = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas); g_autofree unsigned int *ruhids = NULL; - unsigned int *ruhid; - char *r, *p, *token; + unsigned int n, m, *ruhid; + const char *endptr, *token; + char *r, *p; uint16_t *ph; if (!ns->params.fdp.ruhs) { @@ -438,23 +439,55 @@ static bool nvme_ns_init_fdp(NvmeNamespace *ns, Error **errp) /* parse the placement handle identifiers */ while ((token = qemu_strsep(&p, ";")) != NULL) { - ns->fdp.nphs += 1; - if (ns->fdp.nphs > NVME_FDP_MAXPIDS || - ns->fdp.nphs == endgrp->fdp.nruh) { - error_setg(errp, "too many placement handles"); + if (qemu_strtoui(token, &endptr, 0, &n) < 0) { + error_setg(errp, "cannot parse reclaim unit handle identifier"); free(r); return false; } - if (qemu_strtoui(token, NULL, 0, ruhid++) < 0) { - error_setg(errp, "cannot parse reclaim unit handle identifier"); - free(r); - return false; + m = n; + + /* parse range */ + if (*endptr == '-') { + token = endptr + 1; + + if (qemu_strtoui(token, NULL, 0, &m) < 0) { + error_setg(errp, "cannot parse reclaim unit handle identifier"); + free(r); + return false; + } + + if (m < n) { + error_setg(errp, "invalid reclaim unit handle identifier range"); + free(r); + return false; + } + } + + for (; n <= m; n++) { + if (ns->fdp.nphs++ == endgrp->fdp.nruh) { + error_setg(errp, "too many placement handles"); + free(r); + return false; + } + + *ruhid++ = n; } } free(r); + /* verify that the ruhids are unique */ + for (unsigned int i = 0; i < ns->fdp.nphs; i++) { + for (unsigned int j = i + 1; j < ns->fdp.nphs; j++) { + if (ruhids[i] == ruhids[j]) { + error_setg(errp, "duplicate reclaim unit handle identifier: %u", + ruhids[i]); + return false; + } + } + } + ph = ns->fdp.phs = g_new(uint16_t, ns->fdp.nphs); ruhid = ruhids; diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c index 24ddec860e..d30bb8bfd5 100644 --- a/hw/nvme/subsys.c +++ b/hw/nvme/subsys.c @@ -158,8 +158,10 @@ static bool nvme_subsys_setup_fdp(NvmeSubsystem *subsys, Error **errp) endgrp->fdp.nrg = subsys->params.fdp.nrg; - if (!subsys->params.fdp.nruh) { - error_setg(errp, "fdp.nruh must be non-zero"); + if (!subsys->params.fdp.nruh || + subsys->params.fdp.nruh > NVME_FDP_MAXPIDS) { + error_setg(errp, "fdp.nruh must be non-zero and less than %u", + NVME_FDP_MAXPIDS); return false; } diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c index 542f9e2932..6232cbeee1 100644 --- a/hw/pci-host/pnv_phb4.c +++ b/hw/pci-host/pnv_phb4.c @@ -133,13 +133,13 @@ static void pnv_phb4_rc_config_write(PnvPHB4 *phb, unsigned off, PCIDevice *pdev; if (size != 4) { - phb_error(phb, "rc_config_write invalid size %d\n", size); + phb_error(phb, "rc_config_write invalid size %d", size); return; } pdev = pci_find_device(pci->bus, 0, 0); if (!pdev) { - phb_error(phb, "rc_config_write device not found\n"); + phb_error(phb, "rc_config_write device not found"); return; } @@ -155,13 +155,13 @@ static uint64_t pnv_phb4_rc_config_read(PnvPHB4 *phb, unsigned off, uint64_t val; if (size != 4) { - phb_error(phb, "rc_config_read invalid size %d\n", size); + phb_error(phb, "rc_config_read invalid size %d", size); return ~0ull; } pdev = pci_find_device(pci->bus, 0, 0); if (!pdev) { - phb_error(phb, "rc_config_read device not found\n"); + phb_error(phb, "rc_config_read device not found"); return ~0ull; } @@ -1039,19 +1039,19 @@ static void pnv_pec_stk_nest_xscom_write(void *opaque, hwaddr addr, if (phb->nest_regs[PEC_NEST_STK_BAR_EN] & (PEC_NEST_STK_BAR_EN_MMIO0 | PEC_NEST_STK_BAR_EN_MMIO1)) { - phb_pec_error(pec, "Changing enabled BAR unsupported\n"); + phb_pec_error(pec, "Changing enabled BAR unsupported"); } phb->nest_regs[reg] = val & 0xffffffffff000000ull; break; case PEC_NEST_STK_PHB_REGS_BAR: if (phb->nest_regs[PEC_NEST_STK_BAR_EN] & PEC_NEST_STK_BAR_EN_PHB) { - phb_pec_error(pec, "Changing enabled BAR unsupported\n"); + phb_pec_error(pec, "Changing enabled BAR unsupported"); } phb->nest_regs[reg] = val & 0xffffffffffc00000ull; break; case PEC_NEST_STK_INT_BAR: if (phb->nest_regs[PEC_NEST_STK_BAR_EN] & PEC_NEST_STK_BAR_EN_INT) { - phb_pec_error(pec, "Changing enabled BAR unsupported\n"); + phb_pec_error(pec, "Changing enabled BAR unsupported"); } phb->nest_regs[reg] = val & 0xfffffff000000000ull; break; diff --git a/hw/ppc/meson.build b/hw/ppc/meson.build index c927337da0..a313d4b964 100644 --- a/hw/ppc/meson.build +++ b/hw/ppc/meson.build @@ -15,6 +15,7 @@ ppc_ss.add(when: 'CONFIG_PSERIES', if_true: files( 'spapr_vio.c', 'spapr_events.c', 'spapr_hcall.c', + 'spapr_nested.c', 'spapr_iommu.c', 'spapr_rtas.c', 'spapr_pci.c', diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 590fc64b32..fc083173f3 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -799,7 +799,8 @@ static void pnv_init(MachineState *machine) DeviceState *dev; if (kvm_enabled()) { - error_report("The powernv machine does not work with KVM acceleration"); + error_report("machine %s does not support the KVM accelerator", + mc->name); exit(EXIT_FAILURE); } diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index 1b1220c423..82e4408c5c 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -1436,6 +1436,12 @@ int ppc_cpu_pir(PowerPCCPU *cpu) return env->spr_cb[SPR_PIR].default_value; } +int ppc_cpu_tir(PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + return env->spr_cb[SPR_TIR].default_value; +} + PowerPCCPU *ppc_get_vcpu_by_pir(int pir) { CPUState *cs; diff --git a/hw/ppc/ppc440_bamboo.c b/hw/ppc/ppc440_bamboo.c index f969fa3c29..f061b8cf3b 100644 --- a/hw/ppc/ppc440_bamboo.c +++ b/hw/ppc/ppc440_bamboo.c @@ -19,7 +19,6 @@ #include "hw/pci/pci.h" #include "hw/boards.h" #include "sysemu/kvm.h" -#include "kvm_ppc.h" #include "sysemu/device_tree.h" #include "hw/loader.h" #include "elf.h" @@ -97,16 +96,6 @@ static int bamboo_load_device_tree(MachineState *machine, fprintf(stderr, "couldn't set /chosen/bootargs\n"); } - /* - * Copy data from the host device tree into the guest. Since the guest can - * directly access the timebase without host involvement, we must expose - * the correct frequencies. - */ - if (kvm_enabled()) { - tb_freq = kvmppc_get_tbfreq(); - clock_freq = kvmppc_get_clockfreq(); - } - qemu_fdt_setprop_cell(fdt, "/cpus/cpu@0", "clock-frequency", clock_freq); qemu_fdt_setprop_cell(fdt, "/cpus/cpu@0", "timebase-frequency", @@ -175,6 +164,12 @@ static void bamboo_init(MachineState *machine) int success; int i; + if (kvm_enabled()) { + error_report("machine %s does not support the KVM accelerator", + MACHINE_GET_CLASS(machine)->name); + exit(EXIT_FAILURE); + } + cpu = POWERPC_CPU(cpu_create(machine->cpu_type)); env = &cpu->env; diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c index 33bf232f8b..d9231c7317 100644 --- a/hw/ppc/prep.c +++ b/hw/ppc/prep.c @@ -45,7 +45,6 @@ #include "trace.h" #include "elf.h" #include "qemu/units.h" -#include "kvm_ppc.h" /* SMP is not enabled, for now */ #define MAX_CPUS 1 @@ -245,6 +244,12 @@ static void ibm_40p_init(MachineState *machine) long kernel_size = 0, initrd_size = 0; char boot_device; + if (kvm_enabled()) { + error_report("machine %s does not support the KVM accelerator", + MACHINE_GET_CLASS(machine)->name); + exit(EXIT_FAILURE); + } + /* init CPU */ cpu = POWERPC_CPU(cpu_create(machine->cpu_type)); env = &cpu->env; @@ -392,18 +397,7 @@ static void ibm_40p_init(MachineState *machine) fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_HEIGHT, graphic_height); fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_DEPTH, graphic_depth); - fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_IS_KVM, kvm_enabled()); - if (kvm_enabled()) { - uint8_t *hypercall; - - fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, kvmppc_get_tbfreq()); - hypercall = g_malloc(16); - kvmppc_get_hypercall(env, hypercall, 16); - fw_cfg_add_bytes(fw_cfg, FW_CFG_PPC_KVM_HC, hypercall, 16); - fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_KVM_PID, getpid()); - } else { - fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, NANOSECONDS_PER_SECOND); - } + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, NANOSECONDS_PER_SECOND); fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, boot_device); qemu_register_boot_set(fw_cfg_boot_set, fw_cfg); diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index dcb7f1c70a..54dbfd7fe9 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -61,6 +61,7 @@ #include "hw/ppc/fdt.h" #include "hw/ppc/spapr.h" +#include "hw/ppc/spapr_nested.h" #include "hw/ppc/spapr_vio.h" #include "hw/ppc/vof.h" #include "hw/qdev-properties.h" @@ -2524,10 +2525,19 @@ static void spapr_set_vsmt_mode(SpaprMachineState *spapr, Error **errp) int ret; unsigned int smp_threads = ms->smp.threads; - if (!kvm_enabled() && (smp_threads > 1)) { - error_setg(errp, "TCG cannot support more than 1 thread/core " - "on a pseries machine"); - return; + if (tcg_enabled()) { + if (smp_threads > 1 && + !ppc_type_check_compat(ms->cpu_type, CPU_POWERPC_LOGICAL_2_07, 0, + spapr->max_compat_pvr)) { + error_setg(errp, "TCG only supports SMT on POWER8 or newer CPUs"); + return; + } + + if (smp_threads > 8) { + error_setg(errp, "TCG cannot support more than 8 threads/core " + "on a pseries machine"); + return; + } } if (!is_power_of_2(smp_threads)) { error_setg(errp, "Cannot support %d threads/core on a pseries " diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c index 3fd45a6dec..5a0755d34f 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c @@ -473,6 +473,20 @@ static void cap_nested_kvm_hv_apply(SpaprMachineState *spapr, error_append_hint(errp, "Try appending -machine cap-nested-hv=off\n"); } + } else if (tcg_enabled()) { + MachineState *ms = MACHINE(spapr); + unsigned int smp_threads = ms->smp.threads; + + /* + * Nested-HV vCPU env state to L2, so SMT-shared SPR updates, for + * example, do not necessarily update the correct SPR value on sibling + * threads that are in a different guest/host context. + */ + if (smp_threads > 1) { + error_setg(errp, "TCG does not support nested-HV with SMT"); + error_append_hint(errp, "Try appending -machine cap-nested-hv=off " + "or use threads=1 with -smp\n"); + } } } diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index 9b88dd549a..a4e3c2fadd 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -255,7 +255,7 @@ static void spapr_cpu_core_unrealize(DeviceState *dev) } static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, - SpaprCpuCore *sc, Error **errp) + SpaprCpuCore *sc, int thread_index, Error **errp) { CPUPPCState *env = &cpu->env; CPUState *cs = CPU(cpu); @@ -267,6 +267,9 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); kvmppc_set_papr(cpu); + env->spr_cb[SPR_PIR].default_value = cs->cpu_index; + env->spr_cb[SPR_TIR].default_value = thread_index; + /* Set time-base frequency to 512 MHz. vhyp must be set first. */ cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ); @@ -337,7 +340,7 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp) for (i = 0; i < cc->nr_threads; i++) { sc->threads[i] = spapr_create_vcpu(sc, i, errp); if (!sc->threads[i] || - !spapr_realize_vcpu(sc->threads[i], spapr, sc, errp)) { + !spapr_realize_vcpu(sc->threads[i], spapr, sc, i, errp)) { spapr_cpu_core_unrealize(dev); return; } diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index b904755575..002ea0b7c1 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -13,6 +13,7 @@ #include "hw/ppc/ppc.h" #include "hw/ppc/spapr.h" #include "hw/ppc/spapr_cpu_core.h" +#include "hw/ppc/spapr_nested.h" #include "mmu-hash64.h" #include "cpu-models.h" #include "trace.h" @@ -1498,349 +1499,17 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode, } #ifdef CONFIG_TCG -#define PRTS_MASK 0x1f - -static target_ulong h_set_ptbl(PowerPCCPU *cpu, - SpaprMachineState *spapr, - target_ulong opcode, - target_ulong *args) -{ - target_ulong ptcr = args[0]; - - if (!spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV)) { - return H_FUNCTION; - } - - if ((ptcr & PRTS_MASK) + 12 - 4 > 12) { - return H_PARAMETER; - } - - spapr->nested_ptcr = ptcr; /* Save new partition table */ - - return H_SUCCESS; -} - -static target_ulong h_tlb_invalidate(PowerPCCPU *cpu, - SpaprMachineState *spapr, - target_ulong opcode, - target_ulong *args) -{ - /* - * The spapr virtual hypervisor nested HV implementation retains no L2 - * translation state except for TLB. And the TLB is always invalidated - * across L1<->L2 transitions, so nothing is required here. - */ - - return H_SUCCESS; -} - -static target_ulong h_copy_tofrom_guest(PowerPCCPU *cpu, - SpaprMachineState *spapr, - target_ulong opcode, - target_ulong *args) -{ - /* - * This HCALL is not required, L1 KVM will take a slow path and walk the - * page tables manually to do the data copy. - */ - return H_FUNCTION; -} - -/* - * When this handler returns, the environment is switched to the L2 guest - * and TCG begins running that. spapr_exit_nested() performs the switch from - * L2 back to L1 and returns from the H_ENTER_NESTED hcall. - */ -static target_ulong h_enter_nested(PowerPCCPU *cpu, - SpaprMachineState *spapr, - target_ulong opcode, - target_ulong *args) -{ - PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); - CPUState *cs = CPU(cpu); - CPUPPCState *env = &cpu->env; - SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); - target_ulong hv_ptr = args[0]; - target_ulong regs_ptr = args[1]; - target_ulong hdec, now = cpu_ppc_load_tbl(env); - target_ulong lpcr, lpcr_mask; - struct kvmppc_hv_guest_state *hvstate; - struct kvmppc_hv_guest_state hv_state; - struct kvmppc_pt_regs *regs; - hwaddr len; - - if (spapr->nested_ptcr == 0) { - return H_NOT_AVAILABLE; - } - - len = sizeof(*hvstate); - hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, false, - MEMTXATTRS_UNSPECIFIED); - if (len != sizeof(*hvstate)) { - address_space_unmap(CPU(cpu)->as, hvstate, len, 0, false); - return H_PARAMETER; - } - - memcpy(&hv_state, hvstate, len); - - address_space_unmap(CPU(cpu)->as, hvstate, len, len, false); - - /* - * We accept versions 1 and 2. Version 2 fields are unused because TCG - * does not implement DAWR*. - */ - if (hv_state.version > HV_GUEST_STATE_VERSION) { - return H_PARAMETER; - } - - spapr_cpu->nested_host_state = g_try_new(CPUPPCState, 1); - if (!spapr_cpu->nested_host_state) { - return H_NO_MEM; - } - - memcpy(spapr_cpu->nested_host_state, env, sizeof(CPUPPCState)); - - len = sizeof(*regs); - regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, false, - MEMTXATTRS_UNSPECIFIED); - if (!regs || len != sizeof(*regs)) { - address_space_unmap(CPU(cpu)->as, regs, len, 0, false); - g_free(spapr_cpu->nested_host_state); - return H_P2; - } - - len = sizeof(env->gpr); - assert(len == sizeof(regs->gpr)); - memcpy(env->gpr, regs->gpr, len); - - env->lr = regs->link; - env->ctr = regs->ctr; - cpu_write_xer(env, regs->xer); - ppc_set_cr(env, regs->ccr); - - env->msr = regs->msr; - env->nip = regs->nip; - - address_space_unmap(CPU(cpu)->as, regs, len, len, false); - - env->cfar = hv_state.cfar; - - assert(env->spr[SPR_LPIDR] == 0); - env->spr[SPR_LPIDR] = hv_state.lpid; - - lpcr_mask = LPCR_DPFD | LPCR_ILE | LPCR_AIL | LPCR_LD | LPCR_MER; - lpcr = (env->spr[SPR_LPCR] & ~lpcr_mask) | (hv_state.lpcr & lpcr_mask); - lpcr |= LPCR_HR | LPCR_UPRT | LPCR_GTSE | LPCR_HVICE | LPCR_HDICE; - lpcr &= ~LPCR_LPES0; - env->spr[SPR_LPCR] = lpcr & pcc->lpcr_mask; - - env->spr[SPR_PCR] = hv_state.pcr; - /* hv_state.amor is not used */ - env->spr[SPR_DPDES] = hv_state.dpdes; - env->spr[SPR_HFSCR] = hv_state.hfscr; - hdec = hv_state.hdec_expiry - now; - spapr_cpu->nested_tb_offset = hv_state.tb_offset; - /* TCG does not implement DAWR*, CIABR, PURR, SPURR, IC, VTB, HEIR SPRs*/ - env->spr[SPR_SRR0] = hv_state.srr0; - env->spr[SPR_SRR1] = hv_state.srr1; - env->spr[SPR_SPRG0] = hv_state.sprg[0]; - env->spr[SPR_SPRG1] = hv_state.sprg[1]; - env->spr[SPR_SPRG2] = hv_state.sprg[2]; - env->spr[SPR_SPRG3] = hv_state.sprg[3]; - env->spr[SPR_BOOKS_PID] = hv_state.pidr; - env->spr[SPR_PPR] = hv_state.ppr; - - cpu_ppc_hdecr_init(env); - cpu_ppc_store_hdecr(env, hdec); - - /* - * The hv_state.vcpu_token is not needed. It is used by the KVM - * implementation to remember which L2 vCPU last ran on which physical - * CPU so as to invalidate process scope translations if it is moved - * between physical CPUs. For now TLBs are always flushed on L1<->L2 - * transitions so this is not a problem. - * - * Could validate that the same vcpu_token does not attempt to run on - * different L1 vCPUs at the same time, but that would be a L1 KVM bug - * and it's not obviously worth a new data structure to do it. - */ - - env->tb_env->tb_offset += spapr_cpu->nested_tb_offset; - spapr_cpu->in_nested = true; - - hreg_compute_hflags(env); - ppc_maybe_interrupt(env); - tlb_flush(cs); - env->reserve_addr = -1; /* Reset the reservation */ - - /* - * The spapr hcall helper sets env->gpr[3] to the return value, but at - * this point the L1 is not returning from the hcall but rather we - * start running the L2, so r3 must not be clobbered, so return env->gpr[3] - * to leave it unchanged. - */ - return env->gpr[3]; -} - -void spapr_exit_nested(PowerPCCPU *cpu, int excp) -{ - CPUState *cs = CPU(cpu); - CPUPPCState *env = &cpu->env; - SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); - target_ulong r3_return = env->excp_vectors[excp]; /* hcall return value */ - target_ulong hv_ptr = spapr_cpu->nested_host_state->gpr[4]; - target_ulong regs_ptr = spapr_cpu->nested_host_state->gpr[5]; - struct kvmppc_hv_guest_state *hvstate; - struct kvmppc_pt_regs *regs; - hwaddr len; - - assert(spapr_cpu->in_nested); - - cpu_ppc_hdecr_exit(env); - - len = sizeof(*hvstate); - hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, true, - MEMTXATTRS_UNSPECIFIED); - if (len != sizeof(*hvstate)) { - address_space_unmap(CPU(cpu)->as, hvstate, len, 0, true); - r3_return = H_PARAMETER; - goto out_restore_l1; - } - - hvstate->cfar = env->cfar; - hvstate->lpcr = env->spr[SPR_LPCR]; - hvstate->pcr = env->spr[SPR_PCR]; - hvstate->dpdes = env->spr[SPR_DPDES]; - hvstate->hfscr = env->spr[SPR_HFSCR]; - - if (excp == POWERPC_EXCP_HDSI) { - hvstate->hdar = env->spr[SPR_HDAR]; - hvstate->hdsisr = env->spr[SPR_HDSISR]; - hvstate->asdr = env->spr[SPR_ASDR]; - } else if (excp == POWERPC_EXCP_HISI) { - hvstate->asdr = env->spr[SPR_ASDR]; - } - - /* HEIR should be implemented for HV mode and saved here. */ - hvstate->srr0 = env->spr[SPR_SRR0]; - hvstate->srr1 = env->spr[SPR_SRR1]; - hvstate->sprg[0] = env->spr[SPR_SPRG0]; - hvstate->sprg[1] = env->spr[SPR_SPRG1]; - hvstate->sprg[2] = env->spr[SPR_SPRG2]; - hvstate->sprg[3] = env->spr[SPR_SPRG3]; - hvstate->pidr = env->spr[SPR_BOOKS_PID]; - hvstate->ppr = env->spr[SPR_PPR]; - - /* Is it okay to specify write length larger than actual data written? */ - address_space_unmap(CPU(cpu)->as, hvstate, len, len, true); - - len = sizeof(*regs); - regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, true, - MEMTXATTRS_UNSPECIFIED); - if (!regs || len != sizeof(*regs)) { - address_space_unmap(CPU(cpu)->as, regs, len, 0, true); - r3_return = H_P2; - goto out_restore_l1; - } - - len = sizeof(env->gpr); - assert(len == sizeof(regs->gpr)); - memcpy(regs->gpr, env->gpr, len); - - regs->link = env->lr; - regs->ctr = env->ctr; - regs->xer = cpu_read_xer(env); - regs->ccr = ppc_get_cr(env); - - if (excp == POWERPC_EXCP_MCHECK || - excp == POWERPC_EXCP_RESET || - excp == POWERPC_EXCP_SYSCALL) { - regs->nip = env->spr[SPR_SRR0]; - regs->msr = env->spr[SPR_SRR1] & env->msr_mask; - } else { - regs->nip = env->spr[SPR_HSRR0]; - regs->msr = env->spr[SPR_HSRR1] & env->msr_mask; - } - - /* Is it okay to specify write length larger than actual data written? */ - address_space_unmap(CPU(cpu)->as, regs, len, len, true); - -out_restore_l1: - memcpy(env->gpr, spapr_cpu->nested_host_state->gpr, sizeof(env->gpr)); - env->lr = spapr_cpu->nested_host_state->lr; - env->ctr = spapr_cpu->nested_host_state->ctr; - memcpy(env->crf, spapr_cpu->nested_host_state->crf, sizeof(env->crf)); - env->cfar = spapr_cpu->nested_host_state->cfar; - env->xer = spapr_cpu->nested_host_state->xer; - env->so = spapr_cpu->nested_host_state->so; - env->ov = spapr_cpu->nested_host_state->ov; - env->ov32 = spapr_cpu->nested_host_state->ov32; - env->ca32 = spapr_cpu->nested_host_state->ca32; - env->msr = spapr_cpu->nested_host_state->msr; - env->nip = spapr_cpu->nested_host_state->nip; - - assert(env->spr[SPR_LPIDR] != 0); - env->spr[SPR_LPCR] = spapr_cpu->nested_host_state->spr[SPR_LPCR]; - env->spr[SPR_LPIDR] = spapr_cpu->nested_host_state->spr[SPR_LPIDR]; - env->spr[SPR_PCR] = spapr_cpu->nested_host_state->spr[SPR_PCR]; - env->spr[SPR_DPDES] = 0; - env->spr[SPR_HFSCR] = spapr_cpu->nested_host_state->spr[SPR_HFSCR]; - env->spr[SPR_SRR0] = spapr_cpu->nested_host_state->spr[SPR_SRR0]; - env->spr[SPR_SRR1] = spapr_cpu->nested_host_state->spr[SPR_SRR1]; - env->spr[SPR_SPRG0] = spapr_cpu->nested_host_state->spr[SPR_SPRG0]; - env->spr[SPR_SPRG1] = spapr_cpu->nested_host_state->spr[SPR_SPRG1]; - env->spr[SPR_SPRG2] = spapr_cpu->nested_host_state->spr[SPR_SPRG2]; - env->spr[SPR_SPRG3] = spapr_cpu->nested_host_state->spr[SPR_SPRG3]; - env->spr[SPR_BOOKS_PID] = spapr_cpu->nested_host_state->spr[SPR_BOOKS_PID]; - env->spr[SPR_PPR] = spapr_cpu->nested_host_state->spr[SPR_PPR]; - - /* - * Return the interrupt vector address from H_ENTER_NESTED to the L1 - * (or error code). - */ - env->gpr[3] = r3_return; - - env->tb_env->tb_offset -= spapr_cpu->nested_tb_offset; - spapr_cpu->in_nested = false; - - hreg_compute_hflags(env); - ppc_maybe_interrupt(env); - tlb_flush(cs); - env->reserve_addr = -1; /* Reset the reservation */ - - g_free(spapr_cpu->nested_host_state); - spapr_cpu->nested_host_state = NULL; -} - -static void hypercall_register_nested(void) -{ - spapr_register_hypercall(KVMPPC_H_SET_PARTITION_TABLE, h_set_ptbl); - spapr_register_hypercall(KVMPPC_H_ENTER_NESTED, h_enter_nested); - spapr_register_hypercall(KVMPPC_H_TLB_INVALIDATE, h_tlb_invalidate); - spapr_register_hypercall(KVMPPC_H_COPY_TOFROM_GUEST, h_copy_tofrom_guest); -} - static void hypercall_register_softmmu(void) { /* DO NOTHING */ } #else -void spapr_exit_nested(PowerPCCPU *cpu, int excp) -{ - g_assert_not_reached(); -} - static target_ulong h_softmmu(PowerPCCPU *cpu, SpaprMachineState *spapr, target_ulong opcode, target_ulong *args) { g_assert_not_reached(); } -static void hypercall_register_nested(void) -{ - /* DO NOTHING */ -} - static void hypercall_register_softmmu(void) { /* hcall-pft */ @@ -1910,7 +1579,7 @@ static void hypercall_register_types(void) spapr_register_hypercall(KVMPPC_H_UPDATE_DT, h_update_dt); - hypercall_register_nested(); + spapr_register_nested(); } type_init(hypercall_register_types) diff --git a/hw/ppc/spapr_nested.c b/hw/ppc/spapr_nested.c new file mode 100644 index 0000000000..121aa96ddc --- /dev/null +++ b/hw/ppc/spapr_nested.c @@ -0,0 +1,395 @@ +#include "qemu/osdep.h" +#include "qemu/cutils.h" +#include "exec/exec-all.h" +#include "helper_regs.h" +#include "hw/ppc/ppc.h" +#include "hw/ppc/spapr.h" +#include "hw/ppc/spapr_cpu_core.h" +#include "hw/ppc/spapr_nested.h" + +#ifdef CONFIG_TCG +#define PRTS_MASK 0x1f + +static target_ulong h_set_ptbl(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + target_ulong ptcr = args[0]; + + if (!spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV)) { + return H_FUNCTION; + } + + if ((ptcr & PRTS_MASK) + 12 - 4 > 12) { + return H_PARAMETER; + } + + spapr->nested_ptcr = ptcr; /* Save new partition table */ + + return H_SUCCESS; +} + +static target_ulong h_tlb_invalidate(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + /* + * The spapr virtual hypervisor nested HV implementation retains no L2 + * translation state except for TLB. And the TLB is always invalidated + * across L1<->L2 transitions, so nothing is required here. + */ + + return H_SUCCESS; +} + +static target_ulong h_copy_tofrom_guest(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + /* + * This HCALL is not required, L1 KVM will take a slow path and walk the + * page tables manually to do the data copy. + */ + return H_FUNCTION; +} + +static void nested_save_state(struct nested_ppc_state *save, PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + + memcpy(save->gpr, env->gpr, sizeof(save->gpr)); + + save->lr = env->lr; + save->ctr = env->ctr; + save->cfar = env->cfar; + save->msr = env->msr; + save->nip = env->nip; + + save->cr = ppc_get_cr(env); + save->xer = cpu_read_xer(env); + + save->lpcr = env->spr[SPR_LPCR]; + save->lpidr = env->spr[SPR_LPIDR]; + save->pcr = env->spr[SPR_PCR]; + save->dpdes = env->spr[SPR_DPDES]; + save->hfscr = env->spr[SPR_HFSCR]; + save->srr0 = env->spr[SPR_SRR0]; + save->srr1 = env->spr[SPR_SRR1]; + save->sprg0 = env->spr[SPR_SPRG0]; + save->sprg1 = env->spr[SPR_SPRG1]; + save->sprg2 = env->spr[SPR_SPRG2]; + save->sprg3 = env->spr[SPR_SPRG3]; + save->pidr = env->spr[SPR_BOOKS_PID]; + save->ppr = env->spr[SPR_PPR]; + + save->tb_offset = env->tb_env->tb_offset; +} + +static void nested_load_state(PowerPCCPU *cpu, struct nested_ppc_state *load) +{ + CPUState *cs = CPU(cpu); + CPUPPCState *env = &cpu->env; + + memcpy(env->gpr, load->gpr, sizeof(env->gpr)); + + env->lr = load->lr; + env->ctr = load->ctr; + env->cfar = load->cfar; + env->msr = load->msr; + env->nip = load->nip; + + ppc_set_cr(env, load->cr); + cpu_write_xer(env, load->xer); + + env->spr[SPR_LPCR] = load->lpcr; + env->spr[SPR_LPIDR] = load->lpidr; + env->spr[SPR_PCR] = load->pcr; + env->spr[SPR_DPDES] = load->dpdes; + env->spr[SPR_HFSCR] = load->hfscr; + env->spr[SPR_SRR0] = load->srr0; + env->spr[SPR_SRR1] = load->srr1; + env->spr[SPR_SPRG0] = load->sprg0; + env->spr[SPR_SPRG1] = load->sprg1; + env->spr[SPR_SPRG2] = load->sprg2; + env->spr[SPR_SPRG3] = load->sprg3; + env->spr[SPR_BOOKS_PID] = load->pidr; + env->spr[SPR_PPR] = load->ppr; + + env->tb_env->tb_offset = load->tb_offset; + + /* + * MSR updated, compute hflags and possible interrupts. + */ + hreg_compute_hflags(env); + ppc_maybe_interrupt(env); + + /* + * Nested HV does not tag TLB entries between L1 and L2, so must + * flush on transition. + */ + tlb_flush(cs); + env->reserve_addr = -1; /* Reset the reservation */ +} + +/* + * When this handler returns, the environment is switched to the L2 guest + * and TCG begins running that. spapr_exit_nested() performs the switch from + * L2 back to L1 and returns from the H_ENTER_NESTED hcall. + */ +static target_ulong h_enter_nested(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + CPUPPCState *env = &cpu->env; + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); + struct nested_ppc_state l2_state; + target_ulong hv_ptr = args[0]; + target_ulong regs_ptr = args[1]; + target_ulong hdec, now = cpu_ppc_load_tbl(env); + target_ulong lpcr, lpcr_mask; + struct kvmppc_hv_guest_state *hvstate; + struct kvmppc_hv_guest_state hv_state; + struct kvmppc_pt_regs *regs; + hwaddr len; + + if (spapr->nested_ptcr == 0) { + return H_NOT_AVAILABLE; + } + + len = sizeof(*hvstate); + hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, false, + MEMTXATTRS_UNSPECIFIED); + if (len != sizeof(*hvstate)) { + address_space_unmap(CPU(cpu)->as, hvstate, len, 0, false); + return H_PARAMETER; + } + + memcpy(&hv_state, hvstate, len); + + address_space_unmap(CPU(cpu)->as, hvstate, len, len, false); + + /* + * We accept versions 1 and 2. Version 2 fields are unused because TCG + * does not implement DAWR*. + */ + if (hv_state.version > HV_GUEST_STATE_VERSION) { + return H_PARAMETER; + } + + if (hv_state.lpid == 0) { + return H_PARAMETER; + } + + spapr_cpu->nested_host_state = g_try_new(struct nested_ppc_state, 1); + if (!spapr_cpu->nested_host_state) { + return H_NO_MEM; + } + + assert(env->spr[SPR_LPIDR] == 0); + assert(env->spr[SPR_DPDES] == 0); + nested_save_state(spapr_cpu->nested_host_state, cpu); + + len = sizeof(*regs); + regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, false, + MEMTXATTRS_UNSPECIFIED); + if (!regs || len != sizeof(*regs)) { + address_space_unmap(CPU(cpu)->as, regs, len, 0, false); + g_free(spapr_cpu->nested_host_state); + return H_P2; + } + + len = sizeof(l2_state.gpr); + assert(len == sizeof(regs->gpr)); + memcpy(l2_state.gpr, regs->gpr, len); + + l2_state.lr = regs->link; + l2_state.ctr = regs->ctr; + l2_state.xer = regs->xer; + l2_state.cr = regs->ccr; + l2_state.msr = regs->msr; + l2_state.nip = regs->nip; + + address_space_unmap(CPU(cpu)->as, regs, len, len, false); + + l2_state.cfar = hv_state.cfar; + l2_state.lpidr = hv_state.lpid; + + lpcr_mask = LPCR_DPFD | LPCR_ILE | LPCR_AIL | LPCR_LD | LPCR_MER; + lpcr = (env->spr[SPR_LPCR] & ~lpcr_mask) | (hv_state.lpcr & lpcr_mask); + lpcr |= LPCR_HR | LPCR_UPRT | LPCR_GTSE | LPCR_HVICE | LPCR_HDICE; + lpcr &= ~LPCR_LPES0; + l2_state.lpcr = lpcr & pcc->lpcr_mask; + + l2_state.pcr = hv_state.pcr; + /* hv_state.amor is not used */ + l2_state.dpdes = hv_state.dpdes; + l2_state.hfscr = hv_state.hfscr; + /* TCG does not implement DAWR*, CIABR, PURR, SPURR, IC, VTB, HEIR SPRs*/ + l2_state.srr0 = hv_state.srr0; + l2_state.srr1 = hv_state.srr1; + l2_state.sprg0 = hv_state.sprg[0]; + l2_state.sprg1 = hv_state.sprg[1]; + l2_state.sprg2 = hv_state.sprg[2]; + l2_state.sprg3 = hv_state.sprg[3]; + l2_state.pidr = hv_state.pidr; + l2_state.ppr = hv_state.ppr; + l2_state.tb_offset = env->tb_env->tb_offset + hv_state.tb_offset; + + /* + * Switch to the nested guest environment and start the "hdec" timer. + */ + nested_load_state(cpu, &l2_state); + + hdec = hv_state.hdec_expiry - now; + cpu_ppc_hdecr_init(env); + cpu_ppc_store_hdecr(env, hdec); + + /* + * The hv_state.vcpu_token is not needed. It is used by the KVM + * implementation to remember which L2 vCPU last ran on which physical + * CPU so as to invalidate process scope translations if it is moved + * between physical CPUs. For now TLBs are always flushed on L1<->L2 + * transitions so this is not a problem. + * + * Could validate that the same vcpu_token does not attempt to run on + * different L1 vCPUs at the same time, but that would be a L1 KVM bug + * and it's not obviously worth a new data structure to do it. + */ + + spapr_cpu->in_nested = true; + + /* + * The spapr hcall helper sets env->gpr[3] to the return value, but at + * this point the L1 is not returning from the hcall but rather we + * start running the L2, so r3 must not be clobbered, so return env->gpr[3] + * to leave it unchanged. + */ + return env->gpr[3]; +} + +void spapr_exit_nested(PowerPCCPU *cpu, int excp) +{ + CPUPPCState *env = &cpu->env; + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); + struct nested_ppc_state l2_state; + target_ulong hv_ptr = spapr_cpu->nested_host_state->gpr[4]; + target_ulong regs_ptr = spapr_cpu->nested_host_state->gpr[5]; + target_ulong hsrr0, hsrr1, hdar, asdr, hdsisr; + struct kvmppc_hv_guest_state *hvstate; + struct kvmppc_pt_regs *regs; + hwaddr len; + + assert(spapr_cpu->in_nested); + + nested_save_state(&l2_state, cpu); + hsrr0 = env->spr[SPR_HSRR0]; + hsrr1 = env->spr[SPR_HSRR1]; + hdar = env->spr[SPR_HDAR]; + hdsisr = env->spr[SPR_HDSISR]; + asdr = env->spr[SPR_ASDR]; + + /* + * Switch back to the host environment (including for any error). + */ + assert(env->spr[SPR_LPIDR] != 0); + nested_load_state(cpu, spapr_cpu->nested_host_state); + env->gpr[3] = env->excp_vectors[excp]; /* hcall return value */ + + cpu_ppc_hdecr_exit(env); + + spapr_cpu->in_nested = false; + + g_free(spapr_cpu->nested_host_state); + spapr_cpu->nested_host_state = NULL; + + len = sizeof(*hvstate); + hvstate = address_space_map(CPU(cpu)->as, hv_ptr, &len, true, + MEMTXATTRS_UNSPECIFIED); + if (len != sizeof(*hvstate)) { + address_space_unmap(CPU(cpu)->as, hvstate, len, 0, true); + env->gpr[3] = H_PARAMETER; + return; + } + + hvstate->cfar = l2_state.cfar; + hvstate->lpcr = l2_state.lpcr; + hvstate->pcr = l2_state.pcr; + hvstate->dpdes = l2_state.dpdes; + hvstate->hfscr = l2_state.hfscr; + + if (excp == POWERPC_EXCP_HDSI) { + hvstate->hdar = hdar; + hvstate->hdsisr = hdsisr; + hvstate->asdr = asdr; + } else if (excp == POWERPC_EXCP_HISI) { + hvstate->asdr = asdr; + } + + /* HEIR should be implemented for HV mode and saved here. */ + hvstate->srr0 = l2_state.srr0; + hvstate->srr1 = l2_state.srr1; + hvstate->sprg[0] = l2_state.sprg0; + hvstate->sprg[1] = l2_state.sprg1; + hvstate->sprg[2] = l2_state.sprg2; + hvstate->sprg[3] = l2_state.sprg3; + hvstate->pidr = l2_state.pidr; + hvstate->ppr = l2_state.ppr; + + /* Is it okay to specify write length larger than actual data written? */ + address_space_unmap(CPU(cpu)->as, hvstate, len, len, true); + + len = sizeof(*regs); + regs = address_space_map(CPU(cpu)->as, regs_ptr, &len, true, + MEMTXATTRS_UNSPECIFIED); + if (!regs || len != sizeof(*regs)) { + address_space_unmap(CPU(cpu)->as, regs, len, 0, true); + env->gpr[3] = H_P2; + return; + } + + len = sizeof(env->gpr); + assert(len == sizeof(regs->gpr)); + memcpy(regs->gpr, l2_state.gpr, len); + + regs->link = l2_state.lr; + regs->ctr = l2_state.ctr; + regs->xer = l2_state.xer; + regs->ccr = l2_state.cr; + + if (excp == POWERPC_EXCP_MCHECK || + excp == POWERPC_EXCP_RESET || + excp == POWERPC_EXCP_SYSCALL) { + regs->nip = l2_state.srr0; + regs->msr = l2_state.srr1 & env->msr_mask; + } else { + regs->nip = hsrr0; + regs->msr = hsrr1 & env->msr_mask; + } + + /* Is it okay to specify write length larger than actual data written? */ + address_space_unmap(CPU(cpu)->as, regs, len, len, true); +} + +void spapr_register_nested(void) +{ + spapr_register_hypercall(KVMPPC_H_SET_PARTITION_TABLE, h_set_ptbl); + spapr_register_hypercall(KVMPPC_H_ENTER_NESTED, h_enter_nested); + spapr_register_hypercall(KVMPPC_H_TLB_INVALIDATE, h_tlb_invalidate); + spapr_register_hypercall(KVMPPC_H_COPY_TOFROM_GUEST, h_copy_tofrom_guest); +} +#else +void spapr_exit_nested(PowerPCCPU *cpu, int excp) +{ + g_assert_not_reached(); +} + +void spapr_register_nested(void) +{ + /* DO NOTHING */ +} +#endif diff --git a/hw/remote/proxy.c b/hw/remote/proxy.c index 1c7786b52c..2052d721e5 100644 --- a/hw/remote/proxy.c +++ b/hw/remote/proxy.c @@ -22,7 +22,6 @@ #include "qom/object.h" #include "qemu/event_notifier.h" #include "sysemu/kvm.h" -#include "util/event_notifier-posix.c" static void probe_pci_info(PCIDevice *dev, Error **errp); static void proxy_device_reset(DeviceState *dev); diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c index 2c5546560a..81f7e53aed 100644 --- a/hw/riscv/spike.c +++ b/hw/riscv/spike.c @@ -354,6 +354,8 @@ static void spike_machine_class_init(ObjectClass *oc, void *data) mc->cpu_index_to_instance_props = riscv_numa_cpu_index_to_props; mc->get_default_cpu_node_id = riscv_numa_get_default_cpu_node_id; mc->numa_mem_supported = true; + /* platform instead of architectural choice */ + mc->cpu_cluster_has_numa_boundary = true; mc->default_ram_id = "riscv.spike.ram"; object_class_property_add_str(oc, "signature", NULL, spike_set_signature); object_class_property_set_description(oc, "signature", diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index 95708d890e..ed4c27487e 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -1669,6 +1669,8 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) mc->cpu_index_to_instance_props = riscv_numa_cpu_index_to_props; mc->get_default_cpu_node_id = riscv_numa_get_default_cpu_node_id; mc->numa_mem_supported = true; + /* platform instead of architectural choice */ + mc->cpu_cluster_has_numa_boundary = true; mc->default_ram_id = "riscv_virt_board.ram"; assert(!mc->get_hotplug_handler); mc->get_hotplug_handler = virt_machine_get_hotplug_handler; |