diff options
Diffstat (limited to 'hw')
79 files changed, 2720 insertions, 363 deletions
diff --git a/hw/arm/gumstix.c b/hw/arm/gumstix.c index d5de5409e1..9146269153 100644 --- a/hw/arm/gumstix.c +++ b/hw/arm/gumstix.c @@ -106,6 +106,7 @@ static void connex_class_init(ObjectClass *oc, void *data) mc->desc = "Gumstix Connex (PXA255)"; mc->init = connex_init; mc->ignore_memory_transaction_failures = true; + mc->deprecation_reason = "machine is old and unmaintained"; } static const TypeInfo connex_type = { @@ -121,6 +122,7 @@ static void verdex_class_init(ObjectClass *oc, void *data) mc->desc = "Gumstix Verdex Pro XL6P COMs (PXA270)"; mc->init = verdex_init; mc->ignore_memory_transaction_failures = true; + mc->deprecation_reason = "machine is old and unmaintained"; mc->default_cpu_type = ARM_CPU_TYPE_NAME("pxa270-c0"); } diff --git a/hw/arm/mainstone.c b/hw/arm/mainstone.c index d2e2e68aa3..3a6c22fddb 100644 --- a/hw/arm/mainstone.c +++ b/hw/arm/mainstone.c @@ -169,6 +169,7 @@ static void mainstone2_machine_init(MachineClass *mc) mc->init = mainstone_init; mc->ignore_memory_transaction_failures = true; mc->default_cpu_type = ARM_CPU_TYPE_NAME("pxa270-c5"); + mc->deprecation_reason = "machine is old and unmaintained"; } DEFINE_MACHINE("mainstone", mainstone2_machine_init) diff --git a/hw/arm/nseries.c b/hw/arm/nseries.c index 35deb74f65..35364312c7 100644 --- a/hw/arm/nseries.c +++ b/hw/arm/nseries.c @@ -1430,6 +1430,7 @@ static void n800_class_init(ObjectClass *oc, void *data) /* Actually two chips of 0x4000000 bytes each */ mc->default_ram_size = 0x08000000; mc->default_ram_id = "omap2.dram"; + mc->deprecation_reason = "machine is old and unmaintained"; machine_add_audiodev_property(mc); } @@ -1452,6 +1453,7 @@ static void n810_class_init(ObjectClass *oc, void *data) /* Actually two chips of 0x4000000 bytes each */ mc->default_ram_size = 0x08000000; mc->default_ram_id = "omap2.dram"; + mc->deprecation_reason = "machine is old and unmaintained"; machine_add_audiodev_property(mc); } diff --git a/hw/arm/palm.c b/hw/arm/palm.c index 8c4c831614..e04ac92eb7 100644 --- a/hw/arm/palm.c +++ b/hw/arm/palm.c @@ -309,6 +309,7 @@ static void palmte_machine_init(MachineClass *mc) mc->default_cpu_type = ARM_CPU_TYPE_NAME("ti925t"); mc->default_ram_size = 0x02000000; mc->default_ram_id = "omap1.dram"; + mc->deprecation_reason = "machine is old and unmaintained"; machine_add_audiodev_property(mc); } diff --git a/hw/arm/spitz.c b/hw/arm/spitz.c index 643a02b180..62cd55ba91 100644 --- a/hw/arm/spitz.c +++ b/hw/arm/spitz.c @@ -1041,6 +1041,7 @@ static void spitz_common_class_init(ObjectClass *oc, void *data) mc->block_default_type = IF_IDE; mc->ignore_memory_transaction_failures = true; mc->init = spitz_common_init; + mc->deprecation_reason = "machine is old and unmaintained"; machine_add_audiodev_property(mc); } diff --git a/hw/arm/tosa.c b/hw/arm/tosa.c index 3ca2e4459c..5891f6064f 100644 --- a/hw/arm/tosa.c +++ b/hw/arm/tosa.c @@ -270,6 +270,7 @@ static void tosapda_machine_init(MachineClass *mc) mc->init = tosa_init; mc->block_default_type = IF_IDE; mc->ignore_memory_transaction_failures = true; + mc->deprecation_reason = "machine is old and unmaintained"; } DEFINE_MACHINE("tosa", tosapda_machine_init) diff --git a/hw/arm/z2.c b/hw/arm/z2.c index eb2ff8dbc8..fc5672e7ab 100644 --- a/hw/arm/z2.c +++ b/hw/arm/z2.c @@ -347,6 +347,7 @@ static void z2_machine_init(MachineClass *mc) mc->init = z2_init; mc->ignore_memory_transaction_failures = true; mc->default_cpu_type = ARM_CPU_TYPE_NAME("pxa270-c5"); + mc->deprecation_reason = "machine is old and unmaintained"; machine_add_audiodev_property(mc); } diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 738cb2ac36..92de315f17 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -1682,6 +1682,7 @@ static bool apply_iothread_vq_mapping( /* Context: BQL held */ static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp) { + ERRP_GUARD(); VirtIODevice *vdev = VIRTIO_DEVICE(s); VirtIOBlkConf *conf = &s->conf; BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c index 0108fb11db..4bd9c70a83 100644 --- a/hw/core/cpu-common.c +++ b/hw/core/cpu-common.c @@ -22,14 +22,10 @@ #include "qapi/error.h" #include "hw/core/cpu.h" #include "sysemu/hw_accel.h" -#include "qemu/notify.h" #include "qemu/log.h" #include "qemu/main-loop.h" #include "exec/log.h" -#include "exec/cpu-common.h" #include "exec/gdbstub.h" -#include "qemu/error-report.h" -#include "qemu/qemu-print.h" #include "sysemu/tcg.h" #include "hw/boards.h" #include "hw/qdev-properties.h" diff --git a/hw/core/loader-fit.c b/hw/core/loader-fit.c index b7c7b3ba94..9f20007dbb 100644 --- a/hw/core/loader-fit.c +++ b/hw/core/loader-fit.c @@ -120,6 +120,7 @@ static int fit_load_kernel(const struct fit_loader *ldr, const void *itb, int cfg, void *opaque, hwaddr *pend, Error **errp) { + ERRP_GUARD(); const char *name; const void *data; const void *load_data; @@ -178,6 +179,7 @@ static int fit_load_fdt(const struct fit_loader *ldr, const void *itb, int cfg, void *opaque, const void *match_data, hwaddr kernel_end, Error **errp) { + ERRP_GUARD(); Error *err = NULL; const char *name; const void *data; diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c index 3860a50c3b..4b72009cd3 100644 --- a/hw/core/machine-qmp-cmds.c +++ b/hw/core/machine-qmp-cmds.c @@ -19,7 +19,6 @@ #include "qapi/qmp/qobject.h" #include "qapi/qobject-input-visitor.h" #include "qapi/type-helpers.h" -#include "qemu/main-loop.h" #include "qemu/uuid.h" #include "qom/qom-qobject.h" #include "sysemu/hostmem.h" diff --git a/hw/core/machine.c b/hw/core/machine.c index 4b89172d1c..37ede0e7d4 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -34,6 +34,7 @@ #include "audio/audio.h" GlobalProperty hw_compat_8_2[] = { + { "migration", "zero-page-detection", "legacy"}, { TYPE_VIRTIO_IOMMU_PCI, "granule", "4k" }, { TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "64" }, }; @@ -104,6 +105,7 @@ GlobalProperty hw_compat_5_2[] = { { "PIIX4_PM", "smm-compat", "on"}, { "virtio-blk-device", "report-discard-granularity", "off" }, { "virtio-net-pci-base", "vectors", "3"}, + { "nvme", "msix-exclusive-bar", "on"}, }; const size_t hw_compat_5_2_len = G_N_ELEMENTS(hw_compat_5_2); @@ -720,7 +722,7 @@ HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine) mc->possible_cpu_arch_ids(machine); for (i = 0; i < machine->possible_cpus->len; i++) { - Object *cpu; + CPUState *cpu; HotpluggableCPU *cpu_item = g_new0(typeof(*cpu_item), 1); cpu_item->type = g_strdup(machine->possible_cpus->cpus[i].type); @@ -730,7 +732,7 @@ HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine) cpu = machine->possible_cpus->cpus[i].cpu; if (cpu) { - cpu_item->qom_path = object_get_canonical_path(cpu); + cpu_item->qom_path = object_get_canonical_path(OBJECT(cpu)); } QAPI_LIST_PREPEND(head, cpu_item); } diff --git a/hw/core/numa.c b/hw/core/numa.c index 58a32f1564..f8ce332cfe 100644 --- a/hw/core/numa.c +++ b/hw/core/numa.c @@ -28,7 +28,6 @@ #include "sysemu/numa.h" #include "exec/cpu-common.h" #include "exec/ramlist.h" -#include "qemu/bitmap.h" #include "qemu/error-report.h" #include "qapi/error.h" #include "qapi/opts-visitor.h" @@ -36,7 +35,6 @@ #include "sysemu/qtest.h" #include "hw/core/cpu.h" #include "hw/mem/pc-dimm.h" -#include "migration/vmstate.h" #include "hw/boards.h" #include "hw/mem/memory-device.h" #include "qemu/option.h" diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index 28ce6162c7..d79d6f4b53 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -242,6 +242,7 @@ static void get_chr(Object *obj, Visitor *v, const char *name, void *opaque, static void set_chr(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { + ERRP_GUARD(); Property *prop = opaque; CharBackend *be = object_field_prop_ptr(obj, prop); Chardev *s; @@ -693,6 +694,16 @@ const PropertyInfo qdev_prop_granule_mode = { .set_default_value = qdev_propinfo_set_default_value_enum, }; +const PropertyInfo qdev_prop_zero_page_detection = { + .name = "ZeroPageDetection", + .description = "zero_page_detection values, " + "none,legacy,multifd", + .enum_table = &ZeroPageDetection_lookup, + .get = qdev_propinfo_get_enum, + .set = qdev_propinfo_set_enum, + .set_default_value = qdev_propinfo_set_default_value_enum, +}; + /* --- Reserved Region --- */ /* diff --git a/hw/display/ati.c b/hw/display/ati.c index 569b8f6165..8d2501bd82 100644 --- a/hw/display/ati.c +++ b/hw/display/ati.c @@ -991,7 +991,7 @@ static void ati_vga_realize(PCIDevice *dev, Error **errp) } vga_init(vga, OBJECT(s), pci_address_space(dev), pci_address_space_io(dev), true); - vga->con = graphic_console_init(DEVICE(s), 0, s->vga.hw_ops, &s->vga); + vga->con = graphic_console_init(DEVICE(s), 0, s->vga.hw_ops, vga); if (s->cursor_guest_mode) { vga->cursor_invalidate = ati_cursor_invalidate; vga->cursor_draw_line = ati_cursor_draw_line; diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index 1c1ee230b3..78d5a4f164 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -600,6 +600,7 @@ static void virtio_unref_resource(pixman_image_t *image, void *data) static void virtio_gpu_update_scanout(VirtIOGPU *g, uint32_t scanout_id, struct virtio_gpu_simple_resource *res, + struct virtio_gpu_framebuffer *fb, struct virtio_gpu_rect *r) { struct virtio_gpu_simple_resource *ores; @@ -617,9 +618,10 @@ static void virtio_gpu_update_scanout(VirtIOGPU *g, scanout->y = r->y; scanout->width = r->width; scanout->height = r->height; + scanout->fb = *fb; } -static void virtio_gpu_do_set_scanout(VirtIOGPU *g, +static bool virtio_gpu_do_set_scanout(VirtIOGPU *g, uint32_t scanout_id, struct virtio_gpu_framebuffer *fb, struct virtio_gpu_simple_resource *res, @@ -645,7 +647,7 @@ static void virtio_gpu_do_set_scanout(VirtIOGPU *g, r->x, r->y, r->width, r->height, fb->width, fb->height); *error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; - return; + return false; } g->parent_obj.enable = 1; @@ -653,11 +655,12 @@ static void virtio_gpu_do_set_scanout(VirtIOGPU *g, if (res->blob) { if (console_has_gl(scanout->con)) { if (!virtio_gpu_update_dmabuf(g, scanout_id, res, fb, r)) { - virtio_gpu_update_scanout(g, scanout_id, res, r); + virtio_gpu_update_scanout(g, scanout_id, res, fb, r); } else { *error = VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY; + return false; } - return; + return true; } data = res->blob; @@ -684,10 +687,6 @@ static void virtio_gpu_do_set_scanout(VirtIOGPU *g, /* realloc the surface ptr */ scanout->ds = qemu_create_displaysurface_pixman(rect); - if (!scanout->ds) { - *error = VIRTIO_GPU_RESP_ERR_UNSPEC; - return; - } #ifdef WIN32 qemu_displaysurface_win32_set_handle(scanout->ds, res->handle, fb->offset); #endif @@ -697,7 +696,8 @@ static void virtio_gpu_do_set_scanout(VirtIOGPU *g, scanout->ds); } - virtio_gpu_update_scanout(g, scanout_id, res, r); + virtio_gpu_update_scanout(g, scanout_id, res, fb, r); + return true; } static void virtio_gpu_set_scanout(VirtIOGPU *g, @@ -1168,7 +1168,8 @@ static void virtio_gpu_cursor_bh(void *opaque) static const VMStateDescription vmstate_virtio_gpu_scanout = { .name = "virtio-gpu-one-scanout", - .version_id = 1, + .version_id = 2, + .minimum_version_id = 1, .fields = (const VMStateField[]) { VMSTATE_UINT32(resource_id, struct virtio_gpu_scanout), VMSTATE_UINT32(width, struct virtio_gpu_scanout), @@ -1180,6 +1181,12 @@ static const VMStateDescription vmstate_virtio_gpu_scanout = { VMSTATE_UINT32(cursor.hot_y, struct virtio_gpu_scanout), VMSTATE_UINT32(cursor.pos.x, struct virtio_gpu_scanout), VMSTATE_UINT32(cursor.pos.y, struct virtio_gpu_scanout), + VMSTATE_UINT32_V(fb.format, struct virtio_gpu_scanout, 2), + VMSTATE_UINT32_V(fb.bytes_pp, struct virtio_gpu_scanout, 2), + VMSTATE_UINT32_V(fb.width, struct virtio_gpu_scanout, 2), + VMSTATE_UINT32_V(fb.height, struct virtio_gpu_scanout, 2), + VMSTATE_UINT32_V(fb.stride, struct virtio_gpu_scanout, 2), + VMSTATE_UINT32_V(fb.offset, struct virtio_gpu_scanout, 2), VMSTATE_END_OF_LIST() }, }; @@ -1351,6 +1358,7 @@ static int virtio_gpu_blob_save(QEMUFile *f, void *opaque, size_t size, if (!res->blob_size) { continue; } + assert(!res->image); qemu_put_be32(f, res->resource_id); qemu_put_be32(f, res->blob_size); qemu_put_be32(f, res->iov_cnt); @@ -1413,24 +1421,40 @@ static int virtio_gpu_post_load(void *opaque, int version_id) int i; for (i = 0; i < g->parent_obj.conf.max_outputs; i++) { - /* FIXME: should take scanout.r.{x,y} into account */ scanout = &g->parent_obj.scanout[i]; if (!scanout->resource_id) { continue; } + res = virtio_gpu_find_resource(g, scanout->resource_id); if (!res) { return -EINVAL; } - scanout->ds = qemu_create_displaysurface_pixman(res->image); - if (!scanout->ds) { - return -EINVAL; - } + + if (scanout->fb.format != 0) { + uint32_t error = 0; + struct virtio_gpu_rect r = { + .x = scanout->x, + .y = scanout->y, + .width = scanout->width, + .height = scanout->height + }; + + if (!virtio_gpu_do_set_scanout(g, i, &scanout->fb, res, &r, &error)) { + return -EINVAL; + } + } else { + /* legacy v1 migration support */ + if (!res->image) { + return -EINVAL; + } + scanout->ds = qemu_create_displaysurface_pixman(res->image); #ifdef WIN32 - qemu_displaysurface_win32_set_handle(scanout->ds, res->handle, 0); + qemu_displaysurface_win32_set_handle(scanout->ds, res->handle, 0); #endif + dpy_gfx_replace_surface(scanout->con, scanout->ds); + } - dpy_gfx_replace_surface(scanout->con, scanout->ds); dpy_gfx_update_full(scanout->con); if (scanout->cursor.resource_id) { update_cursor(g, &scanout->cursor); diff --git a/hw/gpio/Kconfig b/hw/gpio/Kconfig index 712940b8e0..19c97cc823 100644 --- a/hw/gpio/Kconfig +++ b/hw/gpio/Kconfig @@ -19,3 +19,7 @@ config SIFIVE_GPIO config STM32L4X5_GPIO bool + +config PCF8574 + bool + depends on I2C diff --git a/hw/gpio/meson.build b/hw/gpio/meson.build index 3454b503ae..791e93a97b 100644 --- a/hw/gpio/meson.build +++ b/hw/gpio/meson.build @@ -16,3 +16,4 @@ system_ss.add(when: 'CONFIG_RASPI', if_true: files( system_ss.add(when: 'CONFIG_STM32L4X5_SOC', if_true: files('stm32l4x5_gpio.c')) system_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: files('aspeed_gpio.c')) system_ss.add(when: 'CONFIG_SIFIVE_GPIO', if_true: files('sifive_gpio.c')) +system_ss.add(when: 'CONFIG_PCF8574', if_true: files('pcf8574.c')) diff --git a/hw/gpio/pcf8574.c b/hw/gpio/pcf8574.c new file mode 100644 index 0000000000..d37909e2ad --- /dev/null +++ b/hw/gpio/pcf8574.c @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* + * NXP PCF8574 8-port I2C GPIO expansion chip. + * Copyright (c) 2024 KNS Group (YADRO). + * Written by Dmitrii Sharikhin <d.sharikhin@yadro.com> + */ + +#include "qemu/osdep.h" +#include "hw/i2c/i2c.h" +#include "hw/gpio/pcf8574.h" +#include "hw/irq.h" +#include "migration/vmstate.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "qom/object.h" + +/* + * PCF8574 and compatible chips incorporate quasi-bidirectional + * IO. Electrically it means that device sustain pull-up to line + * unless IO port is configured as output _and_ driven low. + * + * IO access is implemented as simple I2C single-byte read + * or write operation. So, to configure line to input user write 1 + * to corresponding bit. To configure line to output and drive it low + * user write 0 to corresponding bit. + * + * In essence, user can think of quasi-bidirectional IO as + * open-drain line, except presence of builtin rising edge acceleration + * embedded in PCF8574 IC + * + * PCF8574 has interrupt request line, which is being pulled down when + * port line state differs from last read. Port read operation clears + * state and INT line returns to high state via pullup. + */ + +OBJECT_DECLARE_SIMPLE_TYPE(PCF8574State, PCF8574) + +#define PORTS_COUNT (8) + +struct PCF8574State { + I2CSlave parent_obj; + uint8_t lastrq; /* Last requested state. If changed - assert irq */ + uint8_t input; /* external electrical line state */ + uint8_t output; /* Pull-up (1) or drive low (0) on bit */ + qemu_irq handler[PORTS_COUNT]; + qemu_irq intrq; /* External irq request */ +}; + +static void pcf8574_reset(DeviceState *dev) +{ + PCF8574State *s = PCF8574(dev); + s->lastrq = MAKE_64BIT_MASK(0, PORTS_COUNT); + s->input = MAKE_64BIT_MASK(0, PORTS_COUNT); + s->output = MAKE_64BIT_MASK(0, PORTS_COUNT); +} + +static inline uint8_t pcf8574_line_state(PCF8574State *s) +{ + /* we driving line low or external circuit does that */ + return s->input & s->output; +} + +static uint8_t pcf8574_rx(I2CSlave *i2c) +{ + PCF8574State *s = PCF8574(i2c); + uint8_t linestate = pcf8574_line_state(s); + if (s->lastrq != linestate) { + s->lastrq = linestate; + if (s->intrq) { + qemu_set_irq(s->intrq, 1); + } + } + return linestate; +} + +static int pcf8574_tx(I2CSlave *i2c, uint8_t data) +{ + PCF8574State *s = PCF8574(i2c); + uint8_t prev; + uint8_t diff; + uint8_t actual; + int line = 0; + + prev = pcf8574_line_state(s); + s->output = data; + actual = pcf8574_line_state(s); + + for (diff = (actual ^ prev); diff; diff &= ~(1 << line)) { + line = ctz32(diff); + if (s->handler[line]) { + qemu_set_irq(s->handler[line], (actual >> line) & 1); + } + } + + if (s->intrq) { + qemu_set_irq(s->intrq, actual == s->lastrq); + } + + return 0; +} + +static const VMStateDescription vmstate_pcf8574 = { + .name = "pcf8574", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_I2C_SLAVE(parent_obj, PCF8574State), + VMSTATE_UINT8(lastrq, PCF8574State), + VMSTATE_UINT8(input, PCF8574State), + VMSTATE_UINT8(output, PCF8574State), + VMSTATE_END_OF_LIST() + } +}; + +static void pcf8574_gpio_set(void *opaque, int line, int level) +{ + PCF8574State *s = (PCF8574State *) opaque; + assert(line >= 0 && line < ARRAY_SIZE(s->handler)); + + if (level) { + s->input |= (1 << line); + } else { + s->input &= ~(1 << line); + } + + if (pcf8574_line_state(s) != s->lastrq && s->intrq) { + qemu_set_irq(s->intrq, 0); + } +} + +static void pcf8574_realize(DeviceState *dev, Error **errp) +{ + PCF8574State *s = PCF8574(dev); + + qdev_init_gpio_in(dev, pcf8574_gpio_set, ARRAY_SIZE(s->handler)); + qdev_init_gpio_out(dev, s->handler, ARRAY_SIZE(s->handler)); + qdev_init_gpio_out_named(dev, &s->intrq, "nINT", 1); +} + +static void pcf8574_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + I2CSlaveClass *k = I2C_SLAVE_CLASS(klass); + + k->recv = pcf8574_rx; + k->send = pcf8574_tx; + dc->realize = pcf8574_realize; + dc->reset = pcf8574_reset; + dc->vmsd = &vmstate_pcf8574; +} + +static const TypeInfo pcf8574_infos[] = { + { + .name = TYPE_PCF8574, + .parent = TYPE_I2C_SLAVE, + .instance_size = sizeof(PCF8574State), + .class_init = pcf8574_class_init, + } +}; + +DEFINE_TYPES(pcf8574_infos); diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 1590352bb5..feb7a93083 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -699,7 +699,8 @@ void xen_load_linux(PCMachineState *pcms) assert(MACHINE(pcms)->kernel_filename != NULL); - fw_cfg = fw_cfg_init_io(FW_CFG_IO_BASE); + fw_cfg = fw_cfg_init_io_dma(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4, + &address_space_memory); fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); rom_set_fw(fw_cfg); diff --git a/hw/i386/x86.c b/hw/i386/x86.c index 807e09bcdb..ffbda48917 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -225,7 +225,7 @@ void x86_cpu_plug(HotplugHandler *hotplug_dev, } found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); - found_cpu->cpu = OBJECT(dev); + found_cpu->cpu = CPU(dev); out: error_propagate(errp, local_err); } diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci-internal.h index 7e63ea2310..7e63ea2310 100644 --- a/hw/ide/ahci_internal.h +++ b/hw/ide/ahci-internal.h diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index b8123bc73d..bfefad2965 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -37,7 +37,7 @@ #include "hw/ide/pci.h" #include "hw/ide/ahci-pci.h" #include "hw/ide/ahci-sysbus.h" -#include "ahci_internal.h" +#include "ahci-internal.h" #include "ide-internal.h" #include "trace.h" diff --git a/hw/ide/ich.c b/hw/ide/ich.c index 3ea793d790..9b909c87f3 100644 --- a/hw/ide/ich.c +++ b/hw/ide/ich.c @@ -70,7 +70,7 @@ #include "sysemu/dma.h" #include "hw/ide/pci.h" #include "hw/ide/ahci-pci.h" -#include "ahci_internal.h" +#include "ahci-internal.h" #define ICH9_MSI_CAP_OFFSET 0x80 #define ICH9_SATA_CAP_OFFSET 0xA8 diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c index 1e98d8bda5..efce112310 100644 --- a/hw/loongarch/virt.c +++ b/hw/loongarch/virt.c @@ -858,7 +858,7 @@ static void loongarch_init(MachineState *machine) for (i = 0; i < possible_cpus->len; i++) { cpu = cpu_create(machine->cpu_type); cpu->cpu_index = i; - machine->possible_cpus->cpus[i].cpu = OBJECT(cpu); + machine->possible_cpus->cpus[i].cpu = cpu; lacpu = LOONGARCH_CPU(cpu); lacpu->phy_id = machine->possible_cpus->cpus[i].arch_id; } diff --git a/hw/m68k/virt.c b/hw/m68k/virt.c index e2792ef46d..b8e5e102e6 100644 --- a/hw/m68k/virt.c +++ b/hw/m68k/virt.c @@ -239,9 +239,20 @@ static void virt_init(MachineState *machine) param_ptr = param_blob; BOOTINFO1(param_ptr, BI_MACHTYPE, MACH_VIRT); - BOOTINFO1(param_ptr, BI_FPUTYPE, FPU_68040); - BOOTINFO1(param_ptr, BI_MMUTYPE, MMU_68040); - BOOTINFO1(param_ptr, BI_CPUTYPE, CPU_68040); + if (m68k_feature(&cpu->env, M68K_FEATURE_M68020)) { + BOOTINFO1(param_ptr, BI_CPUTYPE, CPU_68020); + } else if (m68k_feature(&cpu->env, M68K_FEATURE_M68030)) { + BOOTINFO1(param_ptr, BI_MMUTYPE, MMU_68030); + BOOTINFO1(param_ptr, BI_CPUTYPE, CPU_68030); + } else if (m68k_feature(&cpu->env, M68K_FEATURE_M68040)) { + BOOTINFO1(param_ptr, BI_FPUTYPE, FPU_68040); + BOOTINFO1(param_ptr, BI_MMUTYPE, MMU_68040); + BOOTINFO1(param_ptr, BI_CPUTYPE, CPU_68040); + } else if (m68k_feature(&cpu->env, M68K_FEATURE_M68060)) { + BOOTINFO1(param_ptr, BI_FPUTYPE, FPU_68060); + BOOTINFO1(param_ptr, BI_MMUTYPE, MMU_68060); + BOOTINFO1(param_ptr, BI_CPUTYPE, CPU_68060); + } BOOTINFO2(param_ptr, BI_MEMCHUNK, 0, ram_size); BOOTINFO1(param_ptr, BI_VIRT_QEMU_VERSION, diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index a2fd0bc365..de49d1b8a8 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -832,6 +832,7 @@ static void ivshmem_write_config(PCIDevice *pdev, uint32_t address, static void ivshmem_common_realize(PCIDevice *dev, Error **errp) { + ERRP_GUARD(); IVShmemState *s = IVSHMEM_COMMON(dev); Error *err = NULL; uint8_t *pci_conf; diff --git a/hw/misc/macio/pmu.c b/hw/misc/macio/pmu.c index e9a90da88f..e40c51bf52 100644 --- a/hw/misc/macio/pmu.c +++ b/hw/misc/macio/pmu.c @@ -737,8 +737,7 @@ static void pmu_realize(DeviceState *dev, Error **errp) timer_mod(s->one_sec_timer, s->one_sec_target); if (s->has_adb) { - qbus_init(&s->adb_bus, sizeof(s->adb_bus), TYPE_ADB_BUS, - dev, "adb.0"); + qbus_init(adb_bus, sizeof(*adb_bus), TYPE_ADB_BUS, dev, "adb.0"); adb_register_autopoll_callback(adb_bus, pmu_adb_poll, s); } } diff --git a/hw/misc/pvpanic-pci.c b/hw/misc/pvpanic-pci.c index c01e4ce864..83be95d0d2 100644 --- a/hw/misc/pvpanic-pci.c +++ b/hw/misc/pvpanic-pci.c @@ -48,7 +48,7 @@ static void pvpanic_pci_realizefn(PCIDevice *dev, Error **errp) PVPanicPCIState *s = PVPANIC_PCI_DEVICE(dev); PVPanicState *ps = &s->pvpanic; - pvpanic_setup_io(&s->pvpanic, DEVICE(s), 2); + pvpanic_setup_io(ps, DEVICE(s), 2); pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &ps->mr); } diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index e324c02dd5..3ae2a184d5 100644 --- a/hw/net/e1000e_core.c +++ b/hw/net/e1000e_core.c @@ -123,14 +123,6 @@ e1000e_intmgr_timer_resume(E1000IntrDelayTimer *timer) } } -static void -e1000e_intmgr_timer_pause(E1000IntrDelayTimer *timer) -{ - if (timer->running) { - timer_del(timer->timer); - } -} - static inline void e1000e_intrmgr_stop_timer(E1000IntrDelayTimer *timer) { @@ -399,24 +391,6 @@ e1000e_intrmgr_resume(E1000ECore *core) } static void -e1000e_intrmgr_pause(E1000ECore *core) -{ - int i; - - e1000e_intmgr_timer_pause(&core->radv); - e1000e_intmgr_timer_pause(&core->rdtr); - e1000e_intmgr_timer_pause(&core->raid); - e1000e_intmgr_timer_pause(&core->tidv); - e1000e_intmgr_timer_pause(&core->tadv); - - e1000e_intmgr_timer_pause(&core->itr); - - for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) { - e1000e_intmgr_timer_pause(&core->eitr[i]); - } -} - -static void e1000e_intrmgr_reset(E1000ECore *core) { int i; @@ -3334,12 +3308,6 @@ e1000e_core_read(E1000ECore *core, hwaddr addr, unsigned size) return 0; } -static inline void -e1000e_autoneg_pause(E1000ECore *core) -{ - timer_del(core->autoneg_timer); -} - static void e1000e_autoneg_resume(E1000ECore *core) { @@ -3351,22 +3319,6 @@ e1000e_autoneg_resume(E1000ECore *core) } } -static void -e1000e_vm_state_change(void *opaque, bool running, RunState state) -{ - E1000ECore *core = opaque; - - if (running) { - trace_e1000e_vm_state_running(); - e1000e_intrmgr_resume(core); - e1000e_autoneg_resume(core); - } else { - trace_e1000e_vm_state_stopped(); - e1000e_autoneg_pause(core); - e1000e_intrmgr_pause(core); - } -} - void e1000e_core_pci_realize(E1000ECore *core, const uint16_t *eeprom_templ, @@ -3379,9 +3331,6 @@ e1000e_core_pci_realize(E1000ECore *core, e1000e_autoneg_timer, core); e1000e_intrmgr_pci_realize(core); - core->vmstate = - qemu_add_vm_change_state_handler(e1000e_vm_state_change, core); - for (i = 0; i < E1000E_NUM_QUEUES; i++) { net_tx_pkt_init(&core->tx[i].tx_pkt, E1000E_MAX_TX_FRAGS); } @@ -3405,8 +3354,6 @@ e1000e_core_pci_uninit(E1000ECore *core) e1000e_intrmgr_pci_unint(core); - qemu_del_vm_change_state_handler(core->vmstate); - for (i = 0; i < E1000E_NUM_QUEUES; i++) { net_tx_pkt_uninit(core->tx[i].tx_pkt); } @@ -3576,5 +3523,12 @@ e1000e_core_post_load(E1000ECore *core) */ nc->link_down = (core->mac[STATUS] & E1000_STATUS_LU) == 0; + /* + * we need to restart intrmgr timers, as an older version of + * QEMU can have stopped them before migration + */ + e1000e_intrmgr_resume(core); + e1000e_autoneg_resume(core); + return 0; } diff --git a/hw/net/e1000e_core.h b/hw/net/e1000e_core.h index 66b025cc43..01510ca78b 100644 --- a/hw/net/e1000e_core.h +++ b/hw/net/e1000e_core.h @@ -98,8 +98,6 @@ struct E1000Core { E1000IntrDelayTimer eitr[E1000E_MSIX_VEC_NUM]; - VMChangeStateEntry *vmstate; - uint32_t itr_guest_value; uint32_t eitr_guest_value[E1000E_MSIX_VEC_NUM]; diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index 2a7a11aa9e..bcd5f6cd9c 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -161,14 +161,6 @@ igb_intmgr_timer_resume(IGBIntrDelayTimer *timer) } static void -igb_intmgr_timer_pause(IGBIntrDelayTimer *timer) -{ - if (timer->running) { - timer_del(timer->timer); - } -} - -static void igb_intrmgr_on_msix_throttling_timer(void *opaque) { IGBIntrDelayTimer *timer = opaque; @@ -213,16 +205,6 @@ igb_intrmgr_resume(IGBCore *core) } static void -igb_intrmgr_pause(IGBCore *core) -{ - int i; - - for (i = 0; i < IGB_INTR_NUM; i++) { - igb_intmgr_timer_pause(&core->eitr[i]); - } -} - -static void igb_intrmgr_reset(IGBCore *core) { int i; @@ -4290,12 +4272,6 @@ igb_core_read(IGBCore *core, hwaddr addr, unsigned size) return 0; } -static inline void -igb_autoneg_pause(IGBCore *core) -{ - timer_del(core->autoneg_timer); -} - static void igb_autoneg_resume(IGBCore *core) { @@ -4307,22 +4283,6 @@ igb_autoneg_resume(IGBCore *core) } } -static void -igb_vm_state_change(void *opaque, bool running, RunState state) -{ - IGBCore *core = opaque; - - if (running) { - trace_e1000e_vm_state_running(); - igb_intrmgr_resume(core); - igb_autoneg_resume(core); - } else { - trace_e1000e_vm_state_stopped(); - igb_autoneg_pause(core); - igb_intrmgr_pause(core); - } -} - void igb_core_pci_realize(IGBCore *core, const uint16_t *eeprom_templ, @@ -4335,8 +4295,6 @@ igb_core_pci_realize(IGBCore *core, igb_autoneg_timer, core); igb_intrmgr_pci_realize(core); - core->vmstate = qemu_add_vm_change_state_handler(igb_vm_state_change, core); - for (i = 0; i < IGB_NUM_QUEUES; i++) { net_tx_pkt_init(&core->tx[i].tx_pkt, E1000E_MAX_TX_FRAGS); } @@ -4360,8 +4318,6 @@ igb_core_pci_uninit(IGBCore *core) igb_intrmgr_pci_unint(core); - qemu_del_vm_change_state_handler(core->vmstate); - for (i = 0; i < IGB_NUM_QUEUES; i++) { net_tx_pkt_uninit(core->tx[i].tx_pkt); } @@ -4586,5 +4542,12 @@ igb_core_post_load(IGBCore *core) */ nc->link_down = (core->mac[STATUS] & E1000_STATUS_LU) == 0; + /* + * we need to restart intrmgr timers, as an older version of + * QEMU can have stopped them before migration + */ + igb_intrmgr_resume(core); + igb_autoneg_resume(core); + return 0; } diff --git a/hw/net/igb_core.h b/hw/net/igb_core.h index bf8c46f26b..d70b54e318 100644 --- a/hw/net/igb_core.h +++ b/hw/net/igb_core.h @@ -90,8 +90,6 @@ struct IGBCore { IGBIntrDelayTimer eitr[IGB_INTR_NUM]; - VMChangeStateEntry *vmstate; - uint32_t eitr_guest_value[IGB_INTR_NUM]; uint8_t permanent_mac[ETH_ALEN]; diff --git a/hw/net/pcnet.c b/hw/net/pcnet.c index 494eab8479..ad675ab29d 100644 --- a/hw/net/pcnet.c +++ b/hw/net/pcnet.c @@ -632,7 +632,7 @@ static inline int ladr_match(PCNetState *s, const uint8_t *buf, int size) { struct qemu_ether_header *hdr = (void *)buf; if ((*(hdr->ether_dhost)&0x01) && - ((uint64_t *)&s->csr[8])[0] != 0LL) { + (s->csr[8] | s->csr[9] | s->csr[10] | s->csr[11]) != 0) { uint8_t ladr[8] = { s->csr[8] & 0xff, s->csr[8] >> 8, s->csr[9] & 0xff, s->csr[9] >> 8, diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 27055a4b8e..9959f1932b 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -42,6 +42,7 @@ #include "sysemu/sysemu.h" #include "trace.h" #include "monitor/qdev.h" +#include "monitor/monitor.h" #include "hw/pci/pci_device.h" #include "net_rx_pkt.h" #include "hw/virtio/vhost.h" @@ -1328,14 +1329,53 @@ static void virtio_net_detach_epbf_rss(VirtIONet *n) virtio_net_attach_ebpf_to_backend(n->nic, -1); } -static bool virtio_net_load_ebpf(VirtIONet *n) +static bool virtio_net_load_ebpf_fds(VirtIONet *n, Error **errp) { - if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) { - /* backend doesn't support steering ebpf */ - return false; + int fds[EBPF_RSS_MAX_FDS] = { [0 ... EBPF_RSS_MAX_FDS - 1] = -1}; + int ret = true; + int i = 0; + + ERRP_GUARD(); + + if (n->nr_ebpf_rss_fds != EBPF_RSS_MAX_FDS) { + error_setg(errp, + "Expected %d file descriptors but got %d", + EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds); + return false; + } + + for (i = 0; i < n->nr_ebpf_rss_fds; i++) { + fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i], errp); + if (*errp) { + ret = false; + goto exit; + } + } + + ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3]); + +exit: + if (!ret || *errp) { + for (i = 0; i < n->nr_ebpf_rss_fds && fds[i] != -1; i++) { + close(fds[i]); + } } - return ebpf_rss_load(&n->ebpf_rss); + return ret; +} + +static bool virtio_net_load_ebpf(VirtIONet *n, Error **errp) +{ + bool ret = false; + + if (virtio_net_attach_ebpf_to_backend(n->nic, -1)) { + if (!(n->ebpf_rss_fds + && virtio_net_load_ebpf_fds(n, errp))) { + ret = ebpf_rss_load(&n->ebpf_rss); + } + } + + return ret; } static void virtio_net_unload_ebpf(VirtIONet *n) @@ -3784,7 +3824,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) net_rx_pkt_init(&n->rx_pkt); if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { - virtio_net_load_ebpf(n); + virtio_net_load_ebpf(n, errp); } } @@ -3946,6 +3986,8 @@ static Property virtio_net_properties[] = { VIRTIO_NET_F_RSS, false), DEFINE_PROP_BIT64("hash", VirtIONet, host_features, VIRTIO_NET_F_HASH_REPORT, false), + DEFINE_PROP_ARRAY("ebpf-rss-fds", VirtIONet, nr_ebpf_rss_fds, + ebpf_rss_fds, qdev_prop_string, char*), DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features, VIRTIO_NET_F_RSC_EXT, false), DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout, diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 40159f39b8..c2b17de987 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -2855,7 +2855,7 @@ static inline uint16_t nvme_check_copy_mcl(NvmeNamespace *ns, uint32_t nlb; nvme_copy_source_range_parse(iocb->ranges, idx, iocb->format, NULL, &nlb, NULL, NULL, NULL); - copy_len += nlb + 1; + copy_len += nlb; } if (copy_len > ns->id_ns.mcl) { @@ -5642,6 +5642,10 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req) } QEMU_PACKED uuid = {}; struct { NvmeIdNsDescr hdr; + uint8_t v[NVME_NIDL_NGUID]; + } QEMU_PACKED nguid = {}; + struct { + NvmeIdNsDescr hdr; uint64_t v; } QEMU_PACKED eui64 = {}; struct { @@ -5668,6 +5672,14 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req) pos += sizeof(uuid); } + if (!nvme_nguid_is_null(&ns->params.nguid)) { + nguid.hdr.nidt = NVME_NIDT_NGUID; + nguid.hdr.nidl = NVME_NIDL_NGUID; + memcpy(nguid.v, ns->params.nguid.data, NVME_NIDL_NGUID); + memcpy(pos, &nguid, sizeof(nguid)); + pos += sizeof(nguid); + } + if (ns->params.eui64) { eui64.hdr.nidt = NVME_NIDT_EUI64; eui64.hdr.nidl = NVME_NIDL_EUI64; @@ -7794,6 +7806,11 @@ static bool nvme_check_params(NvmeCtrl *n, Error **errp) } if (n->pmr.dev) { + if (params->msix_exclusive_bar) { + error_setg(errp, "not enough BARs available to enable PMR"); + return false; + } + if (host_memory_backend_is_mapped(n->pmr.dev)) { error_setg(errp, "can't use already busy memdev: %s", object_get_canonical_path_component(OBJECT(n->pmr.dev))); @@ -7999,13 +8016,18 @@ static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev) memory_region_set_enabled(&n->pmr.dev->mr, false); } -static uint64_t nvme_bar_size(unsigned total_queues, unsigned total_irqs, - unsigned *msix_table_offset, - unsigned *msix_pba_offset) +static uint64_t nvme_mbar_size(unsigned total_queues, unsigned total_irqs, + unsigned *msix_table_offset, + unsigned *msix_pba_offset) { - uint64_t bar_size, msix_table_size, msix_pba_size; + uint64_t bar_size, msix_table_size; bar_size = sizeof(NvmeBar) + 2 * total_queues * NVME_DB_SIZE; + + if (total_irqs == 0) { + goto out; + } + bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB); if (msix_table_offset) { @@ -8020,11 +8042,10 @@ static uint64_t nvme_bar_size(unsigned total_queues, unsigned total_irqs, *msix_pba_offset = bar_size; } - msix_pba_size = QEMU_ALIGN_UP(total_irqs, 64) / 8; - bar_size += msix_pba_size; + bar_size += QEMU_ALIGN_UP(total_irqs, 64) / 8; - bar_size = pow2ceil(bar_size); - return bar_size; +out: + return pow2ceil(bar_size); } static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset) @@ -8032,7 +8053,7 @@ static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset) uint16_t vf_dev_id = n->params.use_intel_id ? PCI_DEVICE_ID_INTEL_NVME : PCI_DEVICE_ID_REDHAT_NVME; NvmePriCtrlCap *cap = &n->pri_ctrl_cap; - uint64_t bar_size = nvme_bar_size(le16_to_cpu(cap->vqfrsm), + uint64_t bar_size = nvme_mbar_size(le16_to_cpu(cap->vqfrsm), le16_to_cpu(cap->vifrsm), NULL, NULL); @@ -8071,7 +8092,7 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) ERRP_GUARD(); uint8_t *pci_conf = pci_dev->config; uint64_t bar_size; - unsigned msix_table_offset, msix_pba_offset; + unsigned msix_table_offset = 0, msix_pba_offset = 0; int ret; pci_conf[PCI_INTERRUPT_PIN] = 1; @@ -8093,24 +8114,38 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) pcie_ari_init(pci_dev, 0x100); } - /* add one to max_ioqpairs to account for the admin queue pair */ - bar_size = nvme_bar_size(n->params.max_ioqpairs + 1, n->params.msix_qsize, - &msix_table_offset, &msix_pba_offset); + if (n->params.msix_exclusive_bar && !pci_is_vf(pci_dev)) { + bar_size = nvme_mbar_size(n->params.max_ioqpairs + 1, 0, NULL, NULL); + memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme", + bar_size); + pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64, &n->iomem); + ret = msix_init_exclusive_bar(pci_dev, n->params.msix_qsize, 4, errp); + } else { + assert(n->params.msix_qsize >= 1); - memory_region_init(&n->bar0, OBJECT(n), "nvme-bar0", bar_size); - memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme", - msix_table_offset); - memory_region_add_subregion(&n->bar0, 0, &n->iomem); + /* add one to max_ioqpairs to account for the admin queue pair */ + bar_size = nvme_mbar_size(n->params.max_ioqpairs + 1, + n->params.msix_qsize, &msix_table_offset, + &msix_pba_offset); - if (pci_is_vf(pci_dev)) { - pcie_sriov_vf_register_bar(pci_dev, 0, &n->bar0); - } else { - pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | - PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0); + memory_region_init(&n->bar0, OBJECT(n), "nvme-bar0", bar_size); + memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme", + msix_table_offset); + memory_region_add_subregion(&n->bar0, 0, &n->iomem); + + if (pci_is_vf(pci_dev)) { + pcie_sriov_vf_register_bar(pci_dev, 0, &n->bar0); + } else { + pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0); + } + + ret = msix_init(pci_dev, n->params.msix_qsize, + &n->bar0, 0, msix_table_offset, + &n->bar0, 0, msix_pba_offset, 0, errp); } - ret = msix_init(pci_dev, n->params.msix_qsize, - &n->bar0, 0, msix_table_offset, - &n->bar0, 0, msix_pba_offset, 0, errp); + if (ret == -ENOTSUP) { /* report that msix is not supported, but do not error out */ warn_report_err(*errp); @@ -8305,9 +8340,15 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) if (pci_is_vf(pci_dev)) { /* * VFs derive settings from the parent. PF's lifespan exceeds - * that of VF's, so it's safe to share params.serial. + * that of VF's. */ memcpy(&n->params, &pn->params, sizeof(NvmeParams)); + + /* + * Set PF's serial value to a new string memory to prevent 'serial' + * property object release of PF when a VF is removed from the system. + */ + n->params.serial = g_strdup(pn->params.serial); n->subsys = pn->subsys; } @@ -8408,6 +8449,8 @@ static Property nvme_props[] = { params.sriov_max_vi_per_vf, 0), DEFINE_PROP_UINT8("sriov_max_vq_per_vf", NvmeCtrl, params.sriov_max_vq_per_vf, 0), + DEFINE_PROP_BOOL("msix-exclusive-bar", NvmeCtrl, params.msix_exclusive_bar, + false), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/nvme/meson.build b/hw/nvme/meson.build index 1a6a2ca2f3..7d5caa53c2 100644 --- a/hw/nvme/meson.build +++ b/hw/nvme/meson.build @@ -1 +1 @@ -system_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 'ns.c', 'subsys.c')) +system_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 'ns.c', 'subsys.c', 'nguid.c')) \ No newline at end of file diff --git a/hw/nvme/nguid.c b/hw/nvme/nguid.c new file mode 100644 index 0000000000..829832bd9f --- /dev/null +++ b/hw/nvme/nguid.c @@ -0,0 +1,187 @@ +/* + * QEMU NVMe NGUID functions + * + * Copyright 2024 Google LLC + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include "qemu/osdep.h" +#include "qapi/visitor.h" +#include "qemu/ctype.h" +#include "nvme.h" + +#define NGUID_SEPARATOR '-' + +#define NGUID_VALUE_AUTO "auto" + +#define NGUID_FMT \ + "%02hhx%02hhx%02hhx%02hhx" \ + "%02hhx%02hhx%02hhx%02hhx" \ + "%02hhx%02hhx%02hhx%02hhx" \ + "%02hhx%02hhx%02hhx%02hhx" + +#define NGUID_STR_LEN (2 * NGUID_LEN + 1) + +bool nvme_nguid_is_null(const NvmeNGUID *nguid) +{ + static NvmeNGUID null_nguid; + return memcmp(nguid, &null_nguid, sizeof(NvmeNGUID)) == 0; +} + +static void nvme_nguid_generate(NvmeNGUID *out) +{ + int i; + uint32_t x; + + QEMU_BUILD_BUG_ON((NGUID_LEN % sizeof(x)) != 0); + + for (i = 0; i < NGUID_LEN; i += sizeof(x)) { + x = g_random_int(); + memcpy(&out->data[i], &x, sizeof(x)); + } +} + +/* + * The Linux Kernel typically prints the NGUID of an NVMe namespace using the + * same format as the UUID. For instance: + * + * $ cat /sys/class/block/nvme0n1/nguid + * e9accd3b-8390-4e13-167c-f0593437f57d + * + * When there is no UUID but there is NGUID the Kernel will print the NGUID as + * wwid and it won't use the UUID format: + * + * $ cat /sys/class/block/nvme0n1/wwid + * eui.e9accd3b83904e13167cf0593437f57d + * + * The NGUID has different fields compared to the UUID, so the grouping used in + * the UUID format has no relation with the 3 fields of the NGUID. + * + * This implementation won't expect a strict format as the UUID one and instead + * it will admit any string of hexadecimal digits. Byte groups could be created + * using the '-' separator. The number of bytes needs to be exactly 16 and the + * separator '-' has to be exactly in a byte boundary. The following are + * examples of accepted formats for the NGUID string: + * + * nguid="e9accd3b-8390-4e13-167c-f0593437f57d" + * nguid="e9accd3b83904e13167cf0593437f57d" + * nguid="FEDCBA9876543210-ABCDEF-0123456789" + */ +static bool nvme_nguid_is_valid(const char *str) +{ + int i; + int digit_count = 0; + + for (i = 0; i < strlen(str); i++) { + const char c = str[i]; + if (qemu_isxdigit(c)) { + digit_count++; + continue; + } + if (c == NGUID_SEPARATOR) { + /* + * We need to make sure the separator is in a byte boundary, the + * string does not start with the separator and they are not back to + * back "--". + */ + if ((i > 0) && (str[i - 1] != NGUID_SEPARATOR) && + (digit_count % 2) == 0) { + continue; + } + } + return false; + } + /* + * The string should have the correct byte length and not finish with the + * separator + */ + return (digit_count == (2 * NGUID_LEN)) && (str[i - 1] != NGUID_SEPARATOR); +} + +static int nvme_nguid_parse(const char *str, NvmeNGUID *nguid) +{ + uint8_t *id = &nguid->data[0]; + int ret = 0; + int i; + const char *ptr = str; + + if (!nvme_nguid_is_valid(str)) { + return -1; + } + + for (i = 0; i < NGUID_LEN; i++) { + ret = sscanf(ptr, "%02hhx", &id[i]); + if (ret != 1) { + return -1; + } + ptr += 2; + if (*ptr == NGUID_SEPARATOR) { + ptr++; + } + } + + return 0; +} + +/* + * When converted back to string this implementation will use a raw hex number + * with no separators, for instance: + * + * "e9accd3b83904e13167cf0593437f57d" + */ +static void nvme_nguid_stringify(const NvmeNGUID *nguid, char *out) +{ + const uint8_t *id = &nguid->data[0]; + snprintf(out, NGUID_STR_LEN, NGUID_FMT, + id[0], id[1], id[2], id[3], id[4], id[5], id[6], id[7], + id[8], id[9], id[10], id[11], id[12], id[13], id[14], id[15]); +} + +static void get_nguid(Object *obj, Visitor *v, const char *name, void *opaque, + Error **errp) +{ + Property *prop = opaque; + NvmeNGUID *nguid = object_field_prop_ptr(obj, prop); + char buffer[NGUID_STR_LEN]; + char *p = buffer; + + nvme_nguid_stringify(nguid, buffer); + + visit_type_str(v, name, &p, errp); +} + +static void set_nguid(Object *obj, Visitor *v, const char *name, void *opaque, + Error **errp) +{ + Property *prop = opaque; + NvmeNGUID *nguid = object_field_prop_ptr(obj, prop); + char *str; + + if (!visit_type_str(v, name, &str, errp)) { + return; + } + + if (!strcmp(str, NGUID_VALUE_AUTO)) { + nvme_nguid_generate(nguid); + } else if (nvme_nguid_parse(str, nguid) < 0) { + error_set_from_qdev_prop_error(errp, EINVAL, obj, name, str); + } + g_free(str); +} + +const PropertyInfo qdev_prop_nguid = { + .name = "str", + .description = + "NGUID or \"" NGUID_VALUE_AUTO "\" for random value", + .get = get_nguid, + .set = set_nguid, +}; diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c index 0eabcf5cf5..ea8db175db 100644 --- a/hw/nvme/ns.c +++ b/hw/nvme/ns.c @@ -89,6 +89,7 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp) id_ns->mcl = cpu_to_le32(ns->params.mcl); id_ns->msrc = ns->params.msrc; id_ns->eui64 = cpu_to_be64(ns->params.eui64); + memcpy(&id_ns->nguid, &ns->params.nguid.data, sizeof(id_ns->nguid)); ds = 31 - clz32(ns->blkconf.logical_block_size); ms = ns->params.ms; @@ -797,6 +798,7 @@ static Property nvme_ns_props[] = { DEFINE_PROP_BOOL("shared", NvmeNamespace, params.shared, true), DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0), DEFINE_PROP_UUID_NODEFAULT("uuid", NvmeNamespace, params.uuid), + DEFINE_PROP_NGUID_NODEFAULT("nguid", NvmeNamespace, params.nguid), DEFINE_PROP_UINT64("eui64", NvmeNamespace, params.eui64, 0), DEFINE_PROP_UINT16("ms", NvmeNamespace, params.ms, 0), DEFINE_PROP_UINT8("mset", NvmeNamespace, params.mset, 0), diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index 5f2ae7b28b..bed8191bd5 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -171,13 +171,27 @@ static const uint8_t nvme_fdp_evf_shifts[FDP_EVT_MAX] = { [FDP_EVT_RUH_IMPLICIT_RU_CHANGE] = 33, }; +#define NGUID_LEN 16 + +typedef struct { + uint8_t data[NGUID_LEN]; +} NvmeNGUID; + +bool nvme_nguid_is_null(const NvmeNGUID *nguid); + +extern const PropertyInfo qdev_prop_nguid; + +#define DEFINE_PROP_NGUID_NODEFAULT(_name, _state, _field) \ + DEFINE_PROP(_name, _state, _field, qdev_prop_nguid, NvmeNGUID) + typedef struct NvmeNamespaceParams { - bool detached; - bool shared; - uint32_t nsid; - QemuUUID uuid; - uint64_t eui64; - bool eui64_default; + bool detached; + bool shared; + uint32_t nsid; + QemuUUID uuid; + NvmeNGUID nguid; + uint64_t eui64; + bool eui64_default; uint16_t ms; uint8_t mset; @@ -522,6 +536,7 @@ typedef struct NvmeParams { uint16_t sriov_vi_flexible; uint8_t sriov_max_vq_per_vf; uint8_t sriov_max_vi_per_vf; + bool msix_exclusive_bar; } NvmeParams; typedef struct NvmeCtrl { diff --git a/hw/pci-bridge/cxl_root_port.c b/hw/pci-bridge/cxl_root_port.c index 62f96994eb..8a30da602c 100644 --- a/hw/pci-bridge/cxl_root_port.c +++ b/hw/pci-bridge/cxl_root_port.c @@ -175,7 +175,7 @@ static void cxl_rp_realize(DeviceState *dev, Error **errp) cxl_cstate->dvsec_offset = CXL_ROOT_PORT_DVSEC_OFFSET; cxl_cstate->pdev = pci_dev; - build_dvsecs(&crp->cxl_cstate); + build_dvsecs(cxl_cstate); cxl_component_register_block_init(OBJECT(pci_dev), cxl_cstate, TYPE_CXL_ROOT_PORT); diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 0b4817e144..4b2f0805c6 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -28,6 +28,7 @@ #include "hw/pci/pcie_regs.h" #include "hw/pci/pcie_port.h" #include "qemu/range.h" +#include "trace.h" //#define DEBUG_PCIE #ifdef DEBUG_PCIE @@ -45,6 +46,23 @@ static bool pcie_sltctl_powered_off(uint16_t sltctl) && (sltctl & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_OFF; } +static const char *pcie_led_state_to_str(uint16_t value) +{ + switch (value) { + case PCI_EXP_SLTCTL_PWR_IND_ON: + case PCI_EXP_SLTCTL_ATTN_IND_ON: + return "on"; + case PCI_EXP_SLTCTL_PWR_IND_BLINK: + case PCI_EXP_SLTCTL_ATTN_IND_BLINK: + return "blink"; + case PCI_EXP_SLTCTL_PWR_IND_OFF: + case PCI_EXP_SLTCTL_ATTN_IND_OFF: + return "off"; + default: + return "invalid"; + } +} + /*************************************************************************** * pci express capability helper functions */ @@ -743,6 +761,28 @@ void pcie_cap_slot_get(PCIDevice *dev, uint16_t *slt_ctl, uint16_t *slt_sta) *slt_sta = pci_get_word(exp_cap + PCI_EXP_SLTSTA); } +static void find_child_fn(PCIBus *bus, PCIDevice *dev, void *opaque) +{ + PCIDevice **child = opaque; + + if (!*child) { + *child = dev; + } +} + +/* + * Returns the plugged device or first function of multifunction plugged device + */ +static PCIDevice *pcie_cap_slot_find_child(PCIDevice *dev) +{ + PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev)); + PCIDevice *child = NULL; + + pci_for_each_device(sec_bus, pci_bus_num(sec_bus), find_child_fn, &child); + + return child; +} + void pcie_cap_slot_write_config(PCIDevice *dev, uint16_t old_slt_ctl, uint16_t old_slt_sta, uint32_t addr, uint32_t val, int len) @@ -787,6 +827,22 @@ void pcie_cap_slot_write_config(PCIDevice *dev, sltsta); } + if (trace_event_get_state_backends(TRACE_PCIE_CAP_SLOT_WRITE_CONFIG)) { + DeviceState *parent = DEVICE(dev); + DeviceState *child = DEVICE(pcie_cap_slot_find_child(dev)); + + trace_pcie_cap_slot_write_config( + parent->canonical_path, + child ? child->canonical_path : "no-child", + (sltsta & PCI_EXP_SLTSTA_PDS) ? "present" : "not present", + pcie_led_state_to_str(old_slt_ctl & PCI_EXP_SLTCTL_PIC), + pcie_led_state_to_str(val & PCI_EXP_SLTCTL_PIC), + pcie_led_state_to_str(old_slt_ctl & PCI_EXP_SLTCTL_AIC), + pcie_led_state_to_str(val & PCI_EXP_SLTCTL_AIC), + (old_slt_ctl & PCI_EXP_SLTCTL_PWR_OFF) ? "off" : "on", + (val & PCI_EXP_SLTCTL_PWR_OFF) ? "off" : "on"); + } + /* * If the slot is populated, power indicator is off and power * controller is off, it is safe to detach the devices. diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c index d2a5eea69e..aac6f2d034 100644 --- a/hw/pci/shpc.c +++ b/hw/pci/shpc.c @@ -8,6 +8,7 @@ #include "hw/pci/pci.h" #include "hw/pci/pci_bus.h" #include "hw/pci/msi.h" +#include "trace.h" /* TODO: model power only and disabled slot states. */ /* TODO: handle SERR and wakeups */ @@ -123,6 +124,34 @@ #define SHPC_PCI_TO_IDX(pci_slot) ((pci_slot) - 1) #define SHPC_IDX_TO_PHYSICAL(slot) ((slot) + 1) +static const char *shpc_led_state_to_str(uint8_t value) +{ + switch (value) { + case SHPC_LED_ON: + return "on"; + case SHPC_LED_BLINK: + return "blink"; + case SHPC_LED_OFF: + return "off"; + default: + return "invalid"; + } +} + +static const char *shpc_slot_state_to_str(uint8_t value) +{ + switch (value) { + case SHPC_STATE_PWRONLY: + return "power-only"; + case SHPC_STATE_ENABLED: + return "enabled"; + case SHPC_STATE_DISABLED: + return "disabled"; + default: + return "invalid"; + } +} + static uint8_t shpc_get_status(SHPCDevice *shpc, int slot, uint16_t msk) { uint8_t *status = shpc->config + SHPC_SLOT_STATUS(slot); @@ -302,6 +331,23 @@ static void shpc_slot_command(PCIDevice *d, uint8_t target, shpc_set_status(shpc, slot, state, SHPC_SLOT_STATE_MASK); } + if (trace_event_get_state_backends(TRACE_SHPC_SLOT_COMMAND)) { + DeviceState *parent = DEVICE(d); + int pci_slot = SHPC_IDX_TO_PCI(slot); + DeviceState *child = + DEVICE(shpc->sec_bus->devices[PCI_DEVFN(pci_slot, 0)]); + + trace_shpc_slot_command( + parent->canonical_path, pci_slot, + child ? child->canonical_path : "no-child", + shpc_led_state_to_str(old_power), + shpc_led_state_to_str(power), + shpc_led_state_to_str(old_attn), + shpc_led_state_to_str(attn), + shpc_slot_state_to_str(old_state), + shpc_slot_state_to_str(state)); + } + if (!shpc_slot_is_off(old_state, old_power, old_attn) && shpc_slot_is_off(state, power, attn)) { diff --git a/hw/pci/trace-events b/hw/pci/trace-events index 42430869ce..19643aa8c6 100644 --- a/hw/pci/trace-events +++ b/hw/pci/trace-events @@ -16,3 +16,9 @@ msix_write_config(char *name, bool enabled, bool masked) "dev %s enabled %d mask sriov_register_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: creating %d vf devs" sriov_unregister_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: Unregistering %d vf devs" sriov_config_write(const char *name, int slot, int fun, uint32_t offset, uint32_t val, uint32_t len) "%s %02x:%x: sriov offset 0x%x val 0x%x len %d" + +# pcie.c +pcie_cap_slot_write_config(const char *parent, const char *child, const char *pds, const char *old_pic, const char *new_pic, const char *old_aic, const char *new_aic, const char *old_power, const char *new_power) "%s > %s: pds: %s, pic: %s->%s, aic: %s->%s, power: %s->%s" + +# shpc.c +shpc_slot_command(const char *parent, int pci_slot, const char *child, const char *old_pic, const char *new_pic, const char *old_aic, const char *new_aic, const char *old_state, const char *new_state) "%s[%d] > %s: pic: %s->%s, aic: %s->%s, state: %s->%s" diff --git a/hw/ppc/mpc8544_guts.c b/hw/ppc/mpc8544_guts.c index a26e83d048..e3540b0281 100644 --- a/hw/ppc/mpc8544_guts.c +++ b/hw/ppc/mpc8544_guts.c @@ -71,8 +71,7 @@ static uint64_t mpc8544_guts_read(void *opaque, hwaddr addr, unsigned size) { uint32_t value = 0; - PowerPCCPU *cpu = POWERPC_CPU(current_cpu); - CPUPPCState *env = &cpu->env; + CPUPPCState *env = cpu_env(current_cpu); addr &= MPC8544_GUTS_MMIO_SIZE - 1; switch (addr) { diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 0b47b92baa..6e3a5ccdec 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -133,7 +133,7 @@ static int get_cpus_node(void *fdt) * device tree, used in XSCOM to address cores and in interrupt * servers. */ -static void pnv_dt_core(PnvChip *chip, PnvCore *pc, void *fdt) +static int pnv_dt_core(PnvChip *chip, PnvCore *pc, void *fdt) { PowerPCCPU *cpu = pc->threads[0]; CPUState *cs = CPU(cpu); @@ -141,32 +141,31 @@ static void pnv_dt_core(PnvChip *chip, PnvCore *pc, void *fdt) int smt_threads = CPU_CORE(pc)->nr_threads; CPUPPCState *env = &cpu->env; PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs); + PnvChipClass *pnv_cc = PNV_CHIP_GET_CLASS(chip); g_autofree uint32_t *servers_prop = g_new(uint32_t, smt_threads); int i; + uint32_t pir; uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40), 0xffffffff, 0xffffffff}; uint32_t tbfreq = PNV_TIMEBASE_FREQ; uint32_t cpufreq = 1000000000; uint32_t page_sizes_prop[64]; size_t page_sizes_prop_size; - const uint8_t pa_features[] = { 24, 0, - 0xf6, 0x3f, 0xc7, 0xc0, 0x80, 0xf0, - 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, - 0x80, 0x00, 0x80, 0x00, 0x80, 0x00 }; int offset; char *nodename; int cpus_offset = get_cpus_node(fdt); - nodename = g_strdup_printf("%s@%x", dc->fw_name, pc->pir); + pir = pnv_cc->chip_pir(chip, pc->hwid, 0); + + nodename = g_strdup_printf("%s@%x", dc->fw_name, pir); offset = fdt_add_subnode(fdt, cpus_offset, nodename); _FDT(offset); g_free(nodename); _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", chip->chip_id))); - _FDT((fdt_setprop_cell(fdt, offset, "reg", pc->pir))); - _FDT((fdt_setprop_cell(fdt, offset, "ibm,pir", pc->pir))); + _FDT((fdt_setprop_cell(fdt, offset, "reg", pir))); + _FDT((fdt_setprop_cell(fdt, offset, "ibm,pir", pir))); _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu"))); _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR]))); @@ -236,20 +235,21 @@ static void pnv_dt_core(PnvChip *chip, PnvCore *pc, void *fdt) page_sizes_prop, page_sizes_prop_size))); } - _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", - pa_features, sizeof(pa_features)))); - /* Build interrupt servers properties */ for (i = 0; i < smt_threads; i++) { - servers_prop[i] = cpu_to_be32(pc->pir + i); + servers_prop[i] = cpu_to_be32(pnv_cc->chip_pir(chip, pc->hwid, i)); } _FDT((fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s", servers_prop, sizeof(*servers_prop) * smt_threads))); + + return offset; } -static void pnv_dt_icp(PnvChip *chip, void *fdt, uint32_t pir, +static void pnv_dt_icp(PnvChip *chip, void *fdt, uint32_t hwid, uint32_t nr_threads) { + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); + uint32_t pir = pcc->chip_pir(chip, hwid, 0); uint64_t addr = PNV_ICP_BASE(chip) | (pir << 12); char *name; const char compat[] = "IBM,power8-icp\0IBM,ppc-xicp"; @@ -263,6 +263,7 @@ static void pnv_dt_icp(PnvChip *chip, void *fdt, uint32_t pir, rsize = sizeof(uint64_t) * 2 * nr_threads; reg = g_malloc(rsize); for (i = 0; i < nr_threads; i++) { + /* We know P8 PIR is linear with thread id */ reg[i * 2] = cpu_to_be64(addr | ((pir + i) * 0x1000)); reg[i * 2 + 1] = cpu_to_be64(0x1000); } @@ -299,6 +300,17 @@ PnvChip *pnv_chip_add_phb(PnvChip *chip, PnvPHB *phb) return chip; } +/* + * Same as spapr pa_features_207 except pnv always enables CI largepages bit. + * HTM is always enabled because TCG does implement HTM, it's just a + * degenerate implementation. + */ +static const uint8_t pa_features_207[] = { 24, 0, + 0xf6, 0x3f, 0xc7, 0xc0, 0x00, 0xf0, + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00 }; + static void pnv_chip_power8_dt_populate(PnvChip *chip, void *fdt) { static const char compat[] = "ibm,power8-xscom\0ibm,xscom"; @@ -311,11 +323,15 @@ static void pnv_chip_power8_dt_populate(PnvChip *chip, void *fdt) for (i = 0; i < chip->nr_cores; i++) { PnvCore *pnv_core = chip->cores[i]; + int offset; + + offset = pnv_dt_core(chip, pnv_core, fdt); - pnv_dt_core(chip, pnv_core, fdt); + _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", + pa_features_207, sizeof(pa_features_207)))); /* Interrupt Control Presenters (ICP). One per core. */ - pnv_dt_icp(chip, fdt, pnv_core->pir, CPU_CORE(pnv_core)->nr_threads); + pnv_dt_icp(chip, fdt, pnv_core->hwid, CPU_CORE(pnv_core)->nr_threads); } if (chip->ram_size) { @@ -323,6 +339,35 @@ static void pnv_chip_power8_dt_populate(PnvChip *chip, void *fdt) } } +/* + * Same as spapr pa_features_300 except pnv always enables CI largepages bit. + */ +static const uint8_t pa_features_300[] = { 66, 0, + /* 0: MMU|FPU|SLB|RUN|DABR|NX, 1: CILRG|fri[nzpm]|DABRX|SPRG3|SLB0|PP110 */ + /* 2: VPM|DS205|PPR|DS202|DS206, 3: LSD|URG, 5: LE|CFAR|EB|LSQ */ + 0xf6, 0x3f, 0xc7, 0xc0, 0x00, 0xf0, /* 0 - 5 */ + /* 6: DS207 */ + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, /* 6 - 11 */ + /* 16: Vector */ + 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, /* 12 - 17 */ + /* 18: Vec. Scalar, 20: Vec. XOR, 22: HTM */ + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 18 - 23 */ + /* 24: Ext. Dec, 26: 64 bit ftrs, 28: PM ftrs */ + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 24 - 29 */ + /* 32: LE atomic, 34: EBB + ext EBB */ + 0x00, 0x00, 0x80, 0x00, 0xC0, 0x00, /* 30 - 35 */ + /* 40: Radix MMU */ + 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, /* 36 - 41 */ + /* 42: PM, 44: PC RA, 46: SC vec'd */ + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 42 - 47 */ + /* 48: SIMD, 50: QP BFP, 52: String */ + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 48 - 53 */ + /* 54: DecFP, 56: DecI, 58: SHA */ + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 54 - 59 */ + /* 60: NM atomic, 62: RNG */ + 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, /* 60 - 65 */ +}; + static void pnv_chip_power9_dt_populate(PnvChip *chip, void *fdt) { static const char compat[] = "ibm,power9-xscom\0ibm,xscom"; @@ -335,8 +380,12 @@ static void pnv_chip_power9_dt_populate(PnvChip *chip, void *fdt) for (i = 0; i < chip->nr_cores; i++) { PnvCore *pnv_core = chip->cores[i]; + int offset; - pnv_dt_core(chip, pnv_core, fdt); + offset = pnv_dt_core(chip, pnv_core, fdt); + + _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", + pa_features_300, sizeof(pa_features_300)))); } if (chip->ram_size) { @@ -346,6 +395,40 @@ static void pnv_chip_power9_dt_populate(PnvChip *chip, void *fdt) pnv_dt_lpc(chip, fdt, 0, PNV9_LPCM_BASE(chip), PNV9_LPCM_SIZE); } +/* + * Same as spapr pa_features_31 except pnv always enables CI largepages bit, + * always disables copy/paste. + */ +static const uint8_t pa_features_31[] = { 74, 0, + /* 0: MMU|FPU|SLB|RUN|DABR|NX, 1: CILRG|fri[nzpm]|DABRX|SPRG3|SLB0|PP110 */ + /* 2: VPM|DS205|PPR|DS202|DS206, 3: LSD|URG, 5: LE|CFAR|EB|LSQ */ + 0xf6, 0x3f, 0xc7, 0xc0, 0x00, 0xf0, /* 0 - 5 */ + /* 6: DS207 */ + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, /* 6 - 11 */ + /* 16: Vector */ + 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, /* 12 - 17 */ + /* 18: Vec. Scalar, 20: Vec. XOR */ + 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, /* 18 - 23 */ + /* 24: Ext. Dec, 26: 64 bit ftrs, 28: PM ftrs */ + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 24 - 29 */ + /* 32: LE atomic, 34: EBB + ext EBB */ + 0x00, 0x00, 0x80, 0x00, 0xC0, 0x00, /* 30 - 35 */ + /* 40: Radix MMU */ + 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, /* 36 - 41 */ + /* 42: PM, 44: PC RA, 46: SC vec'd */ + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 42 - 47 */ + /* 48: SIMD, 50: QP BFP, 52: String */ + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 48 - 53 */ + /* 54: DecFP, 56: DecI, 58: SHA */ + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 54 - 59 */ + /* 60: NM atomic, 62: RNG */ + 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, /* 60 - 65 */ + /* 68: DEXCR[SBHE|IBRTPDUS|SRAPD|NPHIE|PHIE] */ + 0x00, 0x00, 0xce, 0x00, 0x00, 0x00, /* 66 - 71 */ + /* 72: [P]HASHST/[P]HASHCHK */ + 0x80, 0x00, /* 72 - 73 */ +}; + static void pnv_chip_power10_dt_populate(PnvChip *chip, void *fdt) { static const char compat[] = "ibm,power10-xscom\0ibm,xscom"; @@ -358,8 +441,12 @@ static void pnv_chip_power10_dt_populate(PnvChip *chip, void *fdt) for (i = 0; i < chip->nr_cores; i++) { PnvCore *pnv_core = chip->cores[i]; + int offset; + + offset = pnv_dt_core(chip, pnv_core, fdt); - pnv_dt_core(chip, pnv_core, fdt); + _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", + pa_features_31, sizeof(pa_features_31)))); } if (chip->ram_size) { @@ -995,9 +1082,10 @@ static void pnv_init(MachineState *machine) * 25:28 Core number * 29:31 Thread ID */ -static uint32_t pnv_chip_core_pir_p8(PnvChip *chip, uint32_t core_id) +static uint32_t pnv_chip_pir_p8(PnvChip *chip, uint32_t core_id, + uint32_t thread_id) { - return (chip->chip_id << 7) | (core_id << 3); + return (chip->chip_id << 7) | (core_id << 3) | thread_id; } static void pnv_chip_power8_intc_create(PnvChip *chip, PowerPCCPU *cpu, @@ -1049,14 +1137,37 @@ static void pnv_chip_power8_intc_print_info(PnvChip *chip, PowerPCCPU *cpu, * * We only care about the lower bits. uint32_t is fine for the moment. */ -static uint32_t pnv_chip_core_pir_p9(PnvChip *chip, uint32_t core_id) +static uint32_t pnv_chip_pir_p9(PnvChip *chip, uint32_t core_id, + uint32_t thread_id) { - return (chip->chip_id << 8) | (core_id << 2); + if (chip->nr_threads == 8) { + return (chip->chip_id << 8) | ((thread_id & 1) << 2) | (core_id << 3) | + (thread_id >> 1); + } else { + return (chip->chip_id << 8) | (core_id << 2) | thread_id; + } } -static uint32_t pnv_chip_core_pir_p10(PnvChip *chip, uint32_t core_id) +/* + * 0:48 Reserved - Read as zeroes + * 49:52 Node ID + * 53:55 Chip ID + * 56 Reserved - Read as zero + * 57:59 Quad ID + * 60 Core Chiplet Pair ID + * 61:63 Thread/Core Chiplet ID t0-t2 + * + * We only care about the lower bits. uint32_t is fine for the moment. + */ +static uint32_t pnv_chip_pir_p10(PnvChip *chip, uint32_t core_id, + uint32_t thread_id) { - return (chip->chip_id << 8) | (core_id << 2); + if (chip->nr_threads == 8) { + return (chip->chip_id << 8) | ((core_id / 4) << 4) | + ((core_id % 2) << 3) | thread_id; + } else { + return (chip->chip_id << 8) | (core_id << 2) | thread_id; + } } static void pnv_chip_power9_intc_create(PnvChip *chip, PowerPCCPU *cpu, @@ -1235,7 +1346,7 @@ static void pnv_chip_icp_realize(Pnv8Chip *chip8, Error **errp) int core_hwid = CPU_CORE(pnv_core)->core_id; for (j = 0; j < CPU_CORE(pnv_core)->nr_threads; j++) { - uint32_t pir = pcc->core_pir(chip, core_hwid) + j; + uint32_t pir = pcc->chip_pir(chip, core_hwid, j); PnvICPState *icp = PNV_ICP(xics_icp_get(chip8->xics, pir)); memory_region_add_subregion(&chip8->icp_mmio, pir << 12, @@ -1265,11 +1376,11 @@ static void pnv_chip_power8_realize(DeviceState *dev, Error **errp) } /* Processor Service Interface (PSI) Host Bridge */ - object_property_set_int(OBJECT(&chip8->psi), "bar", PNV_PSIHB_BASE(chip), + object_property_set_int(OBJECT(psi8), "bar", PNV_PSIHB_BASE(chip), &error_fatal); - object_property_set_link(OBJECT(&chip8->psi), ICS_PROP_XICS, + object_property_set_link(OBJECT(psi8), ICS_PROP_XICS, OBJECT(chip8->xics), &error_abort); - if (!qdev_realize(DEVICE(&chip8->psi), NULL, errp)) { + if (!qdev_realize(DEVICE(psi8), NULL, errp)) { return; } pnv_xscom_add_subregion(chip, PNV_XSCOM_PSIHB_BASE, @@ -1300,7 +1411,7 @@ static void pnv_chip_power8_realize(DeviceState *dev, Error **errp) } pnv_xscom_add_subregion(chip, PNV_XSCOM_OCC_BASE, &chip8->occ.xscom_regs); qdev_connect_gpio_out(DEVICE(&chip8->occ), 0, - qdev_get_gpio_in(DEVICE(&chip8->psi), PSIHB_IRQ_OCC)); + qdev_get_gpio_in(DEVICE(psi8), PSIHB_IRQ_OCC)); /* OCC SRAM model */ memory_region_add_subregion(get_system_memory(), PNV_OCC_SENSOR_BASE(chip), @@ -1348,7 +1459,7 @@ static void pnv_chip_power8e_class_init(ObjectClass *klass, void *data) k->chip_cfam_id = 0x221ef04980000000ull; /* P8 Murano DD2.1 */ k->cores_mask = POWER8E_CORE_MASK; k->num_phbs = 3; - k->core_pir = pnv_chip_core_pir_p8; + k->chip_pir = pnv_chip_pir_p8; k->intc_create = pnv_chip_power8_intc_create; k->intc_reset = pnv_chip_power8_intc_reset; k->intc_destroy = pnv_chip_power8_intc_destroy; @@ -1372,7 +1483,7 @@ static void pnv_chip_power8_class_init(ObjectClass *klass, void *data) k->chip_cfam_id = 0x220ea04980000000ull; /* P8 Venice DD2.0 */ k->cores_mask = POWER8_CORE_MASK; k->num_phbs = 3; - k->core_pir = pnv_chip_core_pir_p8; + k->chip_pir = pnv_chip_pir_p8; k->intc_create = pnv_chip_power8_intc_create; k->intc_reset = pnv_chip_power8_intc_reset; k->intc_destroy = pnv_chip_power8_intc_destroy; @@ -1396,7 +1507,7 @@ static void pnv_chip_power8nvl_class_init(ObjectClass *klass, void *data) k->chip_cfam_id = 0x120d304980000000ull; /* P8 Naples DD1.0 */ k->cores_mask = POWER8_CORE_MASK; k->num_phbs = 4; - k->core_pir = pnv_chip_core_pir_p8; + k->chip_pir = pnv_chip_pir_p8; k->intc_create = pnv_chip_power8_intc_create; k->intc_reset = pnv_chip_power8_intc_reset; k->intc_destroy = pnv_chip_power8_intc_destroy; @@ -1553,12 +1664,12 @@ static void pnv_chip_power9_realize(DeviceState *dev, Error **errp) &chip9->xive.xscom_regs); /* Processor Service Interface (PSI) Host Bridge */ - object_property_set_int(OBJECT(&chip9->psi), "bar", PNV9_PSIHB_BASE(chip), + object_property_set_int(OBJECT(psi9), "bar", PNV9_PSIHB_BASE(chip), &error_fatal); /* This is the only device with 4k ESB pages */ - object_property_set_int(OBJECT(&chip9->psi), "shift", XIVE_ESB_4K, + object_property_set_int(OBJECT(psi9), "shift", XIVE_ESB_4K, &error_fatal); - if (!qdev_realize(DEVICE(&chip9->psi), NULL, errp)) { + if (!qdev_realize(DEVICE(psi9), NULL, errp)) { return; } pnv_xscom_add_subregion(chip, PNV9_XSCOM_PSIHB_BASE, @@ -1594,7 +1705,7 @@ static void pnv_chip_power9_realize(DeviceState *dev, Error **errp) } pnv_xscom_add_subregion(chip, PNV9_XSCOM_OCC_BASE, &chip9->occ.xscom_regs); qdev_connect_gpio_out(DEVICE(&chip9->occ), 0, qdev_get_gpio_in( - DEVICE(&chip9->psi), PSIHB9_IRQ_OCC)); + DEVICE(psi9), PSIHB9_IRQ_OCC)); /* OCC SRAM model */ memory_region_add_subregion(get_system_memory(), PNV9_OCC_SENSOR_BASE(chip), @@ -1609,7 +1720,7 @@ static void pnv_chip_power9_realize(DeviceState *dev, Error **errp) pnv_xscom_add_subregion(chip, PNV9_XSCOM_SBE_MBOX_BASE, &chip9->sbe.xscom_mbox_regs); qdev_connect_gpio_out(DEVICE(&chip9->sbe), 0, qdev_get_gpio_in( - DEVICE(&chip9->psi), PSIHB9_IRQ_PSU)); + DEVICE(psi9), PSIHB9_IRQ_PSU)); /* HOMER */ object_property_set_link(OBJECT(&chip9->homer), "chip", OBJECT(chip), @@ -1650,7 +1761,7 @@ static void pnv_chip_power9_realize(DeviceState *dev, Error **errp) PNV9_XSCOM_I2CM_SIZE, &chip9->i2c[i].xscom_regs); qdev_connect_gpio_out(DEVICE(&chip9->i2c[i]), 0, - qdev_get_gpio_in(DEVICE(&chip9->psi), + qdev_get_gpio_in(DEVICE(psi9), PSIHB9_IRQ_SBE_I2C)); } } @@ -1669,7 +1780,7 @@ static void pnv_chip_power9_class_init(ObjectClass *klass, void *data) k->chip_cfam_id = 0x220d104900008000ull; /* P9 Nimbus DD2.0 */ k->cores_mask = POWER9_CORE_MASK; - k->core_pir = pnv_chip_core_pir_p9; + k->chip_pir = pnv_chip_pir_p9; k->intc_create = pnv_chip_power9_intc_create; k->intc_reset = pnv_chip_power9_intc_reset; k->intc_destroy = pnv_chip_power9_intc_destroy; @@ -1981,7 +2092,7 @@ static void pnv_chip_power10_class_init(ObjectClass *klass, void *data) k->chip_cfam_id = 0x120da04900008000ull; /* P10 DD1.0 (with NX) */ k->cores_mask = POWER10_CORE_MASK; - k->core_pir = pnv_chip_core_pir_p10; + k->chip_pir = pnv_chip_pir_p10; k->intc_create = pnv_chip_power10_intc_create; k->intc_reset = pnv_chip_power10_intc_reset; k->intc_destroy = pnv_chip_power10_intc_destroy; @@ -2071,8 +2182,8 @@ static void pnv_chip_core_realize(PnvChip *chip, Error **errp) chip->nr_threads, &error_fatal); object_property_set_int(OBJECT(pnv_core), CPU_CORE_PROP_CORE_ID, core_hwid, &error_fatal); - object_property_set_int(OBJECT(pnv_core), "pir", - pcc->core_pir(chip, core_hwid), &error_fatal); + object_property_set_int(OBJECT(pnv_core), "hwid", core_hwid, + &error_fatal); object_property_set_int(OBJECT(pnv_core), "hrmor", pnv->fw_load_addr, &error_fatal); object_property_set_link(OBJECT(pnv_core), "chip", OBJECT(chip), @@ -2412,8 +2523,7 @@ static void pnv_machine_set_hb(Object *obj, bool value, Error **errp) static void pnv_cpu_do_nmi_on_cpu(CPUState *cs, run_on_cpu_data arg) { - PowerPCCPU *cpu = POWERPC_CPU(cs); - CPUPPCState *env = &cpu->env; + CPUPPCState *env = cpu_env(cs); cpu_synchronize_state(cs); ppc_cpu_do_system_reset(cs); diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c index 8c7afe037f..f40ab721d6 100644 --- a/hw/ppc/pnv_core.c +++ b/hw/ppc/pnv_core.c @@ -226,7 +226,7 @@ static void pnv_core_cpu_realize(PnvCore *pc, PowerPCCPU *cpu, Error **errp, int thread_index) { CPUPPCState *env = &cpu->env; - int core_pir; + int core_hwid; ppc_spr_t *pir = &env->spr_cb[SPR_PIR]; ppc_spr_t *tir = &env->spr_cb[SPR_TIR]; Error *local_err = NULL; @@ -242,10 +242,10 @@ static void pnv_core_cpu_realize(PnvCore *pc, PowerPCCPU *cpu, Error **errp, return; } - core_pir = object_property_get_uint(OBJECT(pc), "pir", &error_abort); + core_hwid = object_property_get_uint(OBJECT(pc), "hwid", &error_abort); tir->default_value = thread_index; - pir->default_value = core_pir + thread_index; + pir->default_value = pcc->chip_pir(pc->chip, core_hwid, thread_index); /* Set time-base frequency to 512 MHz */ cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ); @@ -342,7 +342,7 @@ static void pnv_core_unrealize(DeviceState *dev) } static Property pnv_core_properties[] = { - DEFINE_PROP_UINT32("pir", PnvCore, pir, 0), + DEFINE_PROP_UINT32("hwid", PnvCore, hwid, 0), DEFINE_PROP_UINT64("hrmor", PnvCore, hrmor, 0), DEFINE_PROP_LINK("chip", PnvCore, chip, TYPE_PNV_CHIP, PnvChip *), DEFINE_PROP_END_OF_LIST(), diff --git a/hw/ppc/pnv_xscom.c b/hw/ppc/pnv_xscom.c index 805b1d0c87..a17816d072 100644 --- a/hw/ppc/pnv_xscom.c +++ b/hw/ppc/pnv_xscom.c @@ -44,15 +44,12 @@ static void xscom_complete(CPUState *cs, uint64_t hmer_bits) * passed for the cpu, and no CPU completion is generated. */ if (cs) { - PowerPCCPU *cpu = POWERPC_CPU(cs); - CPUPPCState *env = &cpu->env; - /* * TODO: Need a CPU helper to set HMER, also handle generation * of HMIs */ cpu_synchronize_state(cs); - env->spr[SPR_HMER] |= hmer_bits; + cpu_env(cs)->spr[SPR_HMER] |= hmer_bits; } } diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index fadb8f5239..e6fa5580c0 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -633,6 +633,16 @@ void cpu_ppc_store_atbu (CPUPPCState *env, uint32_t value) ((uint64_t)value << 32) | tb); } +void cpu_ppc_increase_tb_by_offset(CPUPPCState *env, int64_t offset) +{ + env->tb_env->tb_offset += offset; +} + +void cpu_ppc_decrease_tb_by_offset(CPUPPCState *env, int64_t offset) +{ + env->tb_env->tb_offset -= offset; +} + uint64_t cpu_ppc_load_vtb(CPUPPCState *env) { ppc_tb_t *tb_env = env->tb_env; diff --git a/hw/ppc/ppce500_spin.c b/hw/ppc/ppce500_spin.c index bbce63e8a4..dfbe759481 100644 --- a/hw/ppc/ppce500_spin.c +++ b/hw/ppc/ppce500_spin.c @@ -90,8 +90,7 @@ static void mmubooke_create_initial_mapping(CPUPPCState *env, static void spin_kick(CPUState *cs, run_on_cpu_data data) { - PowerPCCPU *cpu = POWERPC_CPU(cs); - CPUPPCState *env = &cpu->env; + CPUPPCState *env = cpu_env(cs); SpinInfo *curspin = data.host_ptr; hwaddr map_size = 64 * MiB; hwaddr map_start; diff --git a/hw/ppc/sam460ex.c b/hw/ppc/sam460ex.c index 7e34b6c5e0..d42b677898 100644 --- a/hw/ppc/sam460ex.c +++ b/hw/ppc/sam460ex.c @@ -33,6 +33,7 @@ #include "hw/char/serial.h" #include "hw/i2c/ppc4xx_i2c.h" #include "hw/i2c/smbus_eeprom.h" +#include "hw/ide/pci.h" #include "hw/usb/hcd-ehci.h" #include "hw/ppc/fdt.h" #include "hw/qdev-properties.h" @@ -449,15 +450,27 @@ static void sam460ex_init(MachineState *machine) /* PCI devices */ pci_create_simple(pci_bus, PCI_DEVFN(6, 0), "sm501"); - /* SoC has a single SATA port but we don't emulate that yet + /* + * SoC has a single SATA port but we don't emulate that * However, firmware and usual clients have driver for SiI311x - * so add one for convenience by default */ + * PCI SATA card so add one for convenience by default + */ if (defaults_enabled()) { - pci_create_simple(pci_bus, -1, "sii3112"); + PCIIDEState *s = PCI_IDE(pci_create_simple(pci_bus, -1, "sii3112")); + DriveInfo *di; + + di = drive_get_by_index(IF_IDE, 0); + if (di) { + ide_bus_create_drive(&s->bus[0], 0, di); + } + /* Use index 2 only if 1 does not exist, this allows -cdrom */ + di = drive_get_by_index(IF_IDE, 1) ?: drive_get_by_index(IF_IDE, 2); + if (di) { + ide_bus_create_drive(&s->bus[1], 0, di); + } } - /* SoC has 4 UARTs - * but board has only one wired and two are present in fdt */ + /* SoC has 4 UARTs but board has only one wired and two described in fdt */ if (serial_hd(0) != NULL) { serial_mm_init(get_system_memory(), 0x4ef600300, 0, qdev_get_gpio_in(uic[1], 1), @@ -531,6 +544,7 @@ static void sam460ex_machine_init(MachineClass *mc) { mc->desc = "aCube Sam460ex"; mc->init = sam460ex_init; + mc->block_default_type = IF_IDE; mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("460exb"); mc->default_ram_size = 512 * MiB; mc->default_ram_id = "ppc4xx.sdram"; diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 55263f0815..c417f9dd52 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -233,29 +233,40 @@ static void spapr_dt_pa_features(SpaprMachineState *spapr, PowerPCCPU *cpu, void *fdt, int offset) { + /* + * SSO (SAO) ordering is supported on KVM and thread=single hosts, + * but not MTTCG, so disable it. To advertise it, a cap would have + * to be added, or support implemented for MTTCG. + * + * Copy/paste is not supported by TCG, so it is not advertised. KVM + * can execute them but it has no accelerator drivers which are usable, + * so there isn't much need for it anyway. + */ + + /* These should be kept in sync with pnv */ uint8_t pa_features_206[] = { 6, 0, - 0xf6, 0x1f, 0xc7, 0x00, 0x80, 0xc0 }; + 0xf6, 0x1f, 0xc7, 0x00, 0x00, 0xc0 }; uint8_t pa_features_207[] = { 24, 0, - 0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0, + 0xf6, 0x1f, 0xc7, 0xc0, 0x00, 0xf0, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00 }; uint8_t pa_features_300[] = { 66, 0, /* 0: MMU|FPU|SLB|RUN|DABR|NX, 1: fri[nzpm]|DABRX|SPRG3|SLB0|PP110 */ - /* 2: VPM|DS205|PPR|DS202|DS206, 3: LSD|URG, SSO, 5: LE|CFAR|EB|LSQ */ - 0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0, /* 0 - 5 */ + /* 2: VPM|DS205|PPR|DS202|DS206, 3: LSD|URG, 5: LE|CFAR|EB|LSQ */ + 0xf6, 0x1f, 0xc7, 0xc0, 0x00, 0xf0, /* 0 - 5 */ /* 6: DS207 */ 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, /* 6 - 11 */ /* 16: Vector */ 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, /* 12 - 17 */ - /* 18: Vec. Scalar, 20: Vec. XOR, 22: HTM */ + /* 18: Vec. Scalar, 20: Vec. XOR */ 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, /* 18 - 23 */ /* 24: Ext. Dec, 26: 64 bit ftrs, 28: PM ftrs */ 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 24 - 29 */ - /* 30: MMR, 32: LE atomic, 34: EBB + ext EBB */ - 0x80, 0x00, 0x80, 0x00, 0xC0, 0x00, /* 30 - 35 */ - /* 36: SPR SO, 38: Copy/Paste, 40: Radix MMU */ - 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 36 - 41 */ + /* 32: LE atomic, 34: EBB + ext EBB */ + 0x00, 0x00, 0x80, 0x00, 0xC0, 0x00, /* 30 - 35 */ + /* 40: Radix MMU */ + 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, /* 36 - 41 */ /* 42: PM, 44: PC RA, 46: SC vec'd */ 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 42 - 47 */ /* 48: SIMD, 50: QP BFP, 52: String */ @@ -265,6 +276,36 @@ static void spapr_dt_pa_features(SpaprMachineState *spapr, /* 60: NM atomic, 62: RNG */ 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, /* 60 - 65 */ }; + /* 3.1 removes SAO, HTM support */ + uint8_t pa_features_31[] = { 74, 0, + /* 0: MMU|FPU|SLB|RUN|DABR|NX, 1: fri[nzpm]|DABRX|SPRG3|SLB0|PP110 */ + /* 2: VPM|DS205|PPR|DS202|DS206, 3: LSD|URG, 5: LE|CFAR|EB|LSQ */ + 0xf6, 0x1f, 0xc7, 0xc0, 0x00, 0xf0, /* 0 - 5 */ + /* 6: DS207 */ + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, /* 6 - 11 */ + /* 16: Vector */ + 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, /* 12 - 17 */ + /* 18: Vec. Scalar, 20: Vec. XOR */ + 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, /* 18 - 23 */ + /* 24: Ext. Dec, 26: 64 bit ftrs, 28: PM ftrs */ + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 24 - 29 */ + /* 32: LE atomic, 34: EBB + ext EBB */ + 0x00, 0x00, 0x80, 0x00, 0xC0, 0x00, /* 30 - 35 */ + /* 40: Radix MMU */ + 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, /* 36 - 41 */ + /* 42: PM, 44: PC RA, 46: SC vec'd */ + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 42 - 47 */ + /* 48: SIMD, 50: QP BFP, 52: String */ + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 48 - 53 */ + /* 54: DecFP, 56: DecI, 58: SHA */ + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 54 - 59 */ + /* 60: NM atomic, 62: RNG */ + 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, /* 60 - 65 */ + /* 68: DEXCR[SBHE|IBRTPDUS|SRAPD|NPHIE|PHIE] */ + 0x00, 0x00, 0xce, 0x00, 0x00, 0x00, /* 66 - 71 */ + /* 72: [P]HASHST/[P]HASHCHK */ + 0x80, 0x00, /* 72 - 73 */ + }; uint8_t *pa_features = NULL; size_t pa_size; @@ -280,6 +321,10 @@ static void spapr_dt_pa_features(SpaprMachineState *spapr, pa_features = pa_features_300; pa_size = sizeof(pa_features_300); } + if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_10, 0, cpu->compat_pvr)) { + pa_features = pa_features_31; + pa_size = sizeof(pa_features_31); + } if (!pa_features) { return; } @@ -1362,7 +1407,6 @@ void spapr_init_all_lpcrs(target_ulong value, target_ulong mask) } } - static bool spapr_get_pate(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu, target_ulong lpid, ppc_v3_pate_t *entry) { @@ -1375,33 +1419,16 @@ static bool spapr_get_pate(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu, /* Copy PATE1:GR into PATE0:HR */ entry->dw0 = spapr->patb_entry & PATE0_HR; entry->dw1 = spapr->patb_entry; - + return true; } else { - uint64_t patb, pats; - - assert(lpid != 0); - - patb = spapr->nested_ptcr & PTCR_PATB; - pats = spapr->nested_ptcr & PTCR_PATS; - - /* Check if partition table is properly aligned */ - if (patb & MAKE_64BIT_MASK(0, pats + 12)) { - return false; - } - - /* Calculate number of entries */ - pats = 1ull << (pats + 12 - 4); - if (pats <= lpid) { - return false; + if (spapr_nested_api(spapr) == NESTED_API_KVM_HV) { + return spapr_get_pate_nested_hv(spapr, cpu, lpid, entry); + } else if (spapr_nested_api(spapr) == NESTED_API_PAPR) { + return spapr_get_pate_nested_papr(spapr, cpu, lpid, entry); + } else { + g_assert_not_reached(); } - - /* Grab entry */ - patb += 16 * lpid; - entry->dw0 = ldq_phys(CPU(cpu)->as, patb); - entry->dw1 = ldq_phys(CPU(cpu)->as, patb + 8); } - - return true; } #define HPTE(_table, _i) (void *)(((uint64_t *)(_table)) + ((_i) * 2)) @@ -1689,6 +1716,7 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) pef_kvm_reset(machine->cgs, &error_fatal); spapr_caps_apply(spapr); + spapr_nested_reset(spapr); first_ppc_cpu = POWERPC_CPU(first_cpu); if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && @@ -2138,6 +2166,7 @@ static const VMStateDescription vmstate_spapr = { &vmstate_spapr_cap_fwnmi, &vmstate_spapr_fwnmi, &vmstate_spapr_cap_rpt_invalidate, + &vmstate_spapr_cap_nested_papr, NULL } }; @@ -3481,8 +3510,7 @@ static void spapr_machine_finalizefn(Object *obj) void spapr_do_system_reset_on_cpu(CPUState *cs, run_on_cpu_data arg) { SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); - PowerPCCPU *cpu = POWERPC_CPU(cs); - CPUPPCState *env = &cpu->env; + CPUPPCState *env = cpu_env(cs); cpu_synchronize_state(cs); /* If FWNMI is inactive, addr will be -1, which will deliver to 0x100 */ @@ -3979,7 +4007,6 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev) SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); SpaprCpuCore *core = SPAPR_CPU_CORE(OBJECT(dev)); CPUCore *cc = CPU_CORE(dev); - CPUState *cs; SpaprDrc *drc; CPUArchId *core_slot; int index; @@ -4013,7 +4040,7 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev) } } - core_slot->cpu = OBJECT(dev); + core_slot->cpu = CPU(dev); /* * Set compatibility mode to match the boot CPU, which was either set @@ -4029,7 +4056,7 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev) if (smc->pre_2_10_has_unused_icps) { for (i = 0; i < cc->nr_threads; i++) { - cs = CPU(core->threads[i]); + CPUState *cs = CPU(core->threads[i]); pre_2_10_vmstate_unregister_dummy_icp(cs->cpu_index); } } @@ -4704,6 +4731,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_WORKAROUND; smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */ smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF; + smc->default_caps.caps[SPAPR_CAP_NESTED_PAPR] = SPAPR_CAP_OFF; smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON; smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON; smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_ON; diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c index e889244e52..0a15415a1d 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c @@ -194,8 +194,7 @@ static void cap_htm_apply(SpaprMachineState *spapr, uint8_t val, Error **errp) static void cap_vsx_apply(SpaprMachineState *spapr, uint8_t val, Error **errp) { ERRP_GUARD(); - PowerPCCPU *cpu = POWERPC_CPU(first_cpu); - CPUPPCState *env = &cpu->env; + CPUPPCState *env = cpu_env(first_cpu); if (!val) { /* TODO: We don't support disabling vsx yet */ @@ -213,14 +212,12 @@ static void cap_vsx_apply(SpaprMachineState *spapr, uint8_t val, Error **errp) static void cap_dfp_apply(SpaprMachineState *spapr, uint8_t val, Error **errp) { ERRP_GUARD(); - PowerPCCPU *cpu = POWERPC_CPU(first_cpu); - CPUPPCState *env = &cpu->env; if (!val) { /* TODO: We don't support disabling dfp yet */ return; } - if (!(env->insns_flags2 & PPC2_DFP)) { + if (!(cpu_env(first_cpu)->insns_flags2 & PPC2_DFP)) { error_setg(errp, "DFP support not available"); error_append_hint(errp, "Try appending -machine cap-dfp=off\n"); } @@ -487,6 +484,50 @@ static void cap_nested_kvm_hv_apply(SpaprMachineState *spapr, error_append_hint(errp, "Try appending -machine cap-nested-hv=off " "or use threads=1 with -smp\n"); } + if (spapr_nested_api(spapr) && + spapr_nested_api(spapr) != NESTED_API_KVM_HV) { + error_setg(errp, "Nested-HV APIs are mutually exclusive"); + error_append_hint(errp, "Please use either cap-nested-hv or " + "cap-nested-papr to proceed.\n"); + return; + } else { + spapr->nested.api = NESTED_API_KVM_HV; + } + } +} + +static void cap_nested_papr_apply(SpaprMachineState *spapr, + uint8_t val, Error **errp) +{ + ERRP_GUARD(); + PowerPCCPU *cpu = POWERPC_CPU(first_cpu); + CPUPPCState *env = &cpu->env; + + if (!val) { + /* capability disabled by default */ + return; + } + + if (tcg_enabled()) { + if (!(env->insns_flags2 & PPC2_ISA300)) { + error_setg(errp, "Nested-PAPR only supported on POWER9 and later"); + error_append_hint(errp, + "Try appending -machine cap-nested-papr=off\n"); + return; + } + if (spapr_nested_api(spapr) && + spapr_nested_api(spapr) != NESTED_API_PAPR) { + error_setg(errp, "Nested-HV APIs are mutually exclusive"); + error_append_hint(errp, "Please use either cap-nested-hv or " + "cap-nested-papr to proceed.\n"); + return; + } else { + spapr->nested.api = NESTED_API_PAPR; + } + } else if (kvm_enabled()) { + error_setg(errp, "KVM implementation does not support Nested-PAPR"); + error_append_hint(errp, + "Try appending -machine cap-nested-papr=off\n"); } } @@ -735,6 +776,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = { .type = "bool", .apply = cap_nested_kvm_hv_apply, }, + [SPAPR_CAP_NESTED_PAPR] = { + .name = "nested-papr", + .description = "Allow Nested HV (PAPR API)", + .index = SPAPR_CAP_NESTED_PAPR, + .get = spapr_cap_get_bool, + .set = spapr_cap_set_bool, + .type = "bool", + .apply = cap_nested_papr_apply, + }, [SPAPR_CAP_LARGE_DECREMENTER] = { .name = "large-decr", .description = "Allow Large Decrementer", @@ -919,6 +969,7 @@ SPAPR_CAP_MIG_STATE(sbbc, SPAPR_CAP_SBBC); SPAPR_CAP_MIG_STATE(ibs, SPAPR_CAP_IBS); SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE); SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV); +SPAPR_CAP_MIG_STATE(nested_papr, SPAPR_CAP_NESTED_PAPR); SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER); SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST); SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI); diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c index 40b7c52f7f..e7c9edd033 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -39,9 +39,13 @@ static void spapr_reset_vcpu(PowerPCCPU *cpu) /* * "PowerPC Processor binding to IEEE 1275" defines the initial MSR state - * as 32bit (MSR_SF=0) in "8.2.1. Initial Register Values". + * as 32bit (MSR_SF=0) with MSR_ME=1 and MSR_FP=1 in "8.2.1. Initial + * Register Values". This can also be found in "LoPAPR 1.1" "C.9.2.1 + * Initial Register Values". */ env->msr &= ~(1ULL << MSR_SF); + env->msr |= (1ULL << MSR_ME) | (1ULL << MSR_FP); + env->spr[SPR_HIOR] = 0; lpcr = env->spr[SPR_LPCR]; @@ -394,10 +398,8 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), DEFINE_SPAPR_CPU_CORE_TYPE("power8e_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8nvl_v1.0"), - DEFINE_SPAPR_CPU_CORE_TYPE("power9_v1.0"), DEFINE_SPAPR_CPU_CORE_TYPE("power9_v2.0"), DEFINE_SPAPR_CPU_CORE_TYPE("power9_v2.2"), - DEFINE_SPAPR_CPU_CORE_TYPE("power10_v1.0"), DEFINE_SPAPR_CPU_CORE_TYPE("power10_v2.0"), #ifdef CONFIG_KVM DEFINE_SPAPR_CPU_CORE_TYPE("host"), diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 75c2d12978..5e1d020e3d 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -1525,6 +1525,28 @@ void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn) *slot = fn; } +void spapr_unregister_hypercall(target_ulong opcode) +{ + spapr_hcall_fn *slot; + + if (opcode <= MAX_HCALL_OPCODE) { + assert((opcode & 0x3) == 0); + + slot = &papr_hypercall_table[opcode / 4]; + } else if (opcode >= SVM_HCALL_BASE && opcode <= SVM_HCALL_MAX) { + /* we only have SVM-related hcall numbers assigned in multiples of 4 */ + assert((opcode & 0x3) == 0); + + slot = &svm_hypercall_table[(opcode - SVM_HCALL_BASE) / 4]; + } else { + assert((opcode >= KVMPPC_HCALL_BASE) && (opcode <= KVMPPC_HCALL_MAX)); + + slot = &kvmppc_hypercall_table[opcode - KVMPPC_HCALL_BASE]; + } + + *slot = NULL; +} + target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode, target_ulong *args) { @@ -1638,8 +1660,6 @@ static void hypercall_register_types(void) spapr_register_hypercall(KVMPPC_H_CAS, h_client_architecture_support); spapr_register_hypercall(KVMPPC_H_UPDATE_DT, h_update_dt); - - spapr_register_nested(); } type_init(hypercall_register_types) diff --git a/hw/ppc/spapr_nested.c b/hw/ppc/spapr_nested.c index 121aa96ddc..936659b4c0 100644 --- a/hw/ppc/spapr_nested.c +++ b/hw/ppc/spapr_nested.c @@ -6,8 +6,85 @@ #include "hw/ppc/spapr.h" #include "hw/ppc/spapr_cpu_core.h" #include "hw/ppc/spapr_nested.h" +#include "mmu-book3s-v3.h" +#include "cpu-models.h" +#include "qemu/log.h" + +void spapr_nested_reset(SpaprMachineState *spapr) +{ + if (spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV)) { + spapr_unregister_nested_hv(); + spapr_register_nested_hv(); + } else if (spapr_get_cap(spapr, SPAPR_CAP_NESTED_PAPR)) { + spapr->nested.capabilities_set = false; + spapr_unregister_nested_papr(); + spapr_register_nested_papr(); + spapr_nested_gsb_init(); + } else { + spapr->nested.api = 0; + } +} + +uint8_t spapr_nested_api(SpaprMachineState *spapr) +{ + return spapr->nested.api; +} #ifdef CONFIG_TCG + +bool spapr_get_pate_nested_hv(SpaprMachineState *spapr, PowerPCCPU *cpu, + target_ulong lpid, ppc_v3_pate_t *entry) +{ + uint64_t patb, pats; + + assert(lpid != 0); + + patb = spapr->nested.ptcr & PTCR_PATB; + pats = spapr->nested.ptcr & PTCR_PATS; + + /* Check if partition table is properly aligned */ + if (patb & MAKE_64BIT_MASK(0, pats + 12)) { + return false; + } + + /* Calculate number of entries */ + pats = 1ull << (pats + 12 - 4); + if (pats <= lpid) { + return false; + } + + /* Grab entry */ + patb += 16 * lpid; + entry->dw0 = ldq_phys(CPU(cpu)->as, patb); + entry->dw1 = ldq_phys(CPU(cpu)->as, patb + 8); + return true; +} + +static +SpaprMachineStateNestedGuest *spapr_get_nested_guest(SpaprMachineState *spapr, + target_ulong guestid) +{ + SpaprMachineStateNestedGuest *guest; + + guest = g_hash_table_lookup(spapr->nested.guests, GINT_TO_POINTER(guestid)); + return guest; +} + +bool spapr_get_pate_nested_papr(SpaprMachineState *spapr, PowerPCCPU *cpu, + target_ulong lpid, ppc_v3_pate_t *entry) +{ + SpaprMachineStateNestedGuest *guest; + assert(lpid != 0); + guest = spapr_get_nested_guest(spapr, lpid); + if (!guest) { + return false; + } + + entry->dw0 = guest->parttbl[0]; + entry->dw1 = guest->parttbl[1]; + return true; +} + #define PRTS_MASK 0x1f static target_ulong h_set_ptbl(PowerPCCPU *cpu, @@ -25,7 +102,7 @@ static target_ulong h_set_ptbl(PowerPCCPU *cpu, return H_PARAMETER; } - spapr->nested_ptcr = ptcr; /* Save new partition table */ + spapr->nested.ptcr = ptcr; /* Save new partition table */ return H_SUCCESS; } @@ -59,6 +136,7 @@ static target_ulong h_copy_tofrom_guest(PowerPCCPU *cpu, static void nested_save_state(struct nested_ppc_state *save, PowerPCCPU *cpu) { CPUPPCState *env = &cpu->env; + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); memcpy(save->gpr, env->gpr, sizeof(save->gpr)); @@ -85,13 +163,79 @@ static void nested_save_state(struct nested_ppc_state *save, PowerPCCPU *cpu) save->pidr = env->spr[SPR_BOOKS_PID]; save->ppr = env->spr[SPR_PPR]; - save->tb_offset = env->tb_env->tb_offset; + if (spapr_nested_api(spapr) == NESTED_API_PAPR) { + save->amor = env->spr[SPR_AMOR]; + save->dawr0 = env->spr[SPR_DAWR0]; + save->dawrx0 = env->spr[SPR_DAWRX0]; + save->ciabr = env->spr[SPR_CIABR]; + save->purr = env->spr[SPR_PURR]; + save->spurr = env->spr[SPR_SPURR]; + save->ic = env->spr[SPR_IC]; + save->vtb = env->spr[SPR_VTB]; + save->hdar = env->spr[SPR_HDAR]; + save->hdsisr = env->spr[SPR_HDSISR]; + save->heir = env->spr[SPR_HEIR]; + save->asdr = env->spr[SPR_ASDR]; + save->dawr1 = env->spr[SPR_DAWR1]; + save->dawrx1 = env->spr[SPR_DAWRX1]; + save->dexcr = env->spr[SPR_DEXCR]; + save->hdexcr = env->spr[SPR_HDEXCR]; + save->hashkeyr = env->spr[SPR_HASHKEYR]; + save->hashpkeyr = env->spr[SPR_HASHPKEYR]; + memcpy(save->vsr, env->vsr, sizeof(save->vsr)); + save->ebbhr = env->spr[SPR_EBBHR]; + save->tar = env->spr[SPR_TAR]; + save->ebbrr = env->spr[SPR_EBBRR]; + save->bescr = env->spr[SPR_BESCR]; + save->iamr = env->spr[SPR_IAMR]; + save->amr = env->spr[SPR_AMR]; + save->uamor = env->spr[SPR_UAMOR]; + save->dscr = env->spr[SPR_DSCR]; + save->fscr = env->spr[SPR_FSCR]; + save->pspb = env->spr[SPR_PSPB]; + save->ctrl = env->spr[SPR_CTRL]; + save->vrsave = env->spr[SPR_VRSAVE]; + save->dar = env->spr[SPR_DAR]; + save->dsisr = env->spr[SPR_DSISR]; + save->pmc1 = env->spr[SPR_POWER_PMC1]; + save->pmc2 = env->spr[SPR_POWER_PMC2]; + save->pmc3 = env->spr[SPR_POWER_PMC3]; + save->pmc4 = env->spr[SPR_POWER_PMC4]; + save->pmc5 = env->spr[SPR_POWER_PMC5]; + save->pmc6 = env->spr[SPR_POWER_PMC6]; + save->mmcr0 = env->spr[SPR_POWER_MMCR0]; + save->mmcr1 = env->spr[SPR_POWER_MMCR1]; + save->mmcr2 = env->spr[SPR_POWER_MMCR2]; + save->mmcra = env->spr[SPR_POWER_MMCRA]; + save->sdar = env->spr[SPR_POWER_SDAR]; + save->siar = env->spr[SPR_POWER_SIAR]; + save->sier = env->spr[SPR_POWER_SIER]; + save->vscr = ppc_get_vscr(env); + save->fpscr = env->fpscr; + } else if (spapr_nested_api(spapr) == NESTED_API_KVM_HV) { + save->tb_offset = env->tb_env->tb_offset; + } +} + +static void nested_post_load_state(CPUPPCState *env, CPUState *cs) +{ + /* + * compute hflags and possible interrupts. + */ + hreg_compute_hflags(env); + ppc_maybe_interrupt(env); + /* + * Nested HV does not tag TLB entries between L1 and L2, so must + * flush on transition. + */ + tlb_flush(cs); + env->reserve_addr = -1; /* Reset the reservation */ } static void nested_load_state(PowerPCCPU *cpu, struct nested_ppc_state *load) { - CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); memcpy(env->gpr, load->gpr, sizeof(env->gpr)); @@ -118,20 +262,58 @@ static void nested_load_state(PowerPCCPU *cpu, struct nested_ppc_state *load) env->spr[SPR_BOOKS_PID] = load->pidr; env->spr[SPR_PPR] = load->ppr; - env->tb_env->tb_offset = load->tb_offset; - - /* - * MSR updated, compute hflags and possible interrupts. - */ - hreg_compute_hflags(env); - ppc_maybe_interrupt(env); - - /* - * Nested HV does not tag TLB entries between L1 and L2, so must - * flush on transition. - */ - tlb_flush(cs); - env->reserve_addr = -1; /* Reset the reservation */ + if (spapr_nested_api(spapr) == NESTED_API_PAPR) { + env->spr[SPR_AMOR] = load->amor; + env->spr[SPR_DAWR0] = load->dawr0; + env->spr[SPR_DAWRX0] = load->dawrx0; + env->spr[SPR_CIABR] = load->ciabr; + env->spr[SPR_PURR] = load->purr; + env->spr[SPR_SPURR] = load->purr; + env->spr[SPR_IC] = load->ic; + env->spr[SPR_VTB] = load->vtb; + env->spr[SPR_HDAR] = load->hdar; + env->spr[SPR_HDSISR] = load->hdsisr; + env->spr[SPR_HEIR] = load->heir; + env->spr[SPR_ASDR] = load->asdr; + env->spr[SPR_DAWR1] = load->dawr1; + env->spr[SPR_DAWRX1] = load->dawrx1; + env->spr[SPR_DEXCR] = load->dexcr; + env->spr[SPR_HDEXCR] = load->hdexcr; + env->spr[SPR_HASHKEYR] = load->hashkeyr; + env->spr[SPR_HASHPKEYR] = load->hashpkeyr; + memcpy(env->vsr, load->vsr, sizeof(env->vsr)); + env->spr[SPR_EBBHR] = load->ebbhr; + env->spr[SPR_TAR] = load->tar; + env->spr[SPR_EBBRR] = load->ebbrr; + env->spr[SPR_BESCR] = load->bescr; + env->spr[SPR_IAMR] = load->iamr; + env->spr[SPR_AMR] = load->amr; + env->spr[SPR_UAMOR] = load->uamor; + env->spr[SPR_DSCR] = load->dscr; + env->spr[SPR_FSCR] = load->fscr; + env->spr[SPR_PSPB] = load->pspb; + env->spr[SPR_CTRL] = load->ctrl; + env->spr[SPR_VRSAVE] = load->vrsave; + env->spr[SPR_DAR] = load->dar; + env->spr[SPR_DSISR] = load->dsisr; + env->spr[SPR_POWER_PMC1] = load->pmc1; + env->spr[SPR_POWER_PMC2] = load->pmc2; + env->spr[SPR_POWER_PMC3] = load->pmc3; + env->spr[SPR_POWER_PMC4] = load->pmc4; + env->spr[SPR_POWER_PMC5] = load->pmc5; + env->spr[SPR_POWER_PMC6] = load->pmc6; + env->spr[SPR_POWER_MMCR0] = load->mmcr0; + env->spr[SPR_POWER_MMCR1] = load->mmcr1; + env->spr[SPR_POWER_MMCR2] = load->mmcr2; + env->spr[SPR_POWER_MMCRA] = load->mmcra; + env->spr[SPR_POWER_SDAR] = load->sdar; + env->spr[SPR_POWER_SIAR] = load->siar; + env->spr[SPR_POWER_SIER] = load->sier; + ppc_store_vscr(env, load->vscr); + ppc_store_fpscr(env, load->fpscr); + } else if (spapr_nested_api(spapr) == NESTED_API_KVM_HV) { + env->tb_env->tb_offset = load->tb_offset; + } } /* @@ -146,6 +328,7 @@ static target_ulong h_enter_nested(PowerPCCPU *cpu, { PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); CPUPPCState *env = &cpu->env; + CPUState *cs = CPU(cpu); SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); struct nested_ppc_state l2_state; target_ulong hv_ptr = args[0]; @@ -157,7 +340,7 @@ static target_ulong h_enter_nested(PowerPCCPU *cpu, struct kvmppc_pt_regs *regs; hwaddr len; - if (spapr->nested_ptcr == 0) { + if (spapr->nested.ptcr == 0) { return H_NOT_AVAILABLE; } @@ -244,6 +427,7 @@ static target_ulong h_enter_nested(PowerPCCPU *cpu, * Switch to the nested guest environment and start the "hdec" timer. */ nested_load_state(cpu, &l2_state); + nested_post_load_state(env, cs); hdec = hv_state.hdec_expiry - now; cpu_ppc_hdecr_init(env); @@ -272,9 +456,10 @@ static target_ulong h_enter_nested(PowerPCCPU *cpu, return env->gpr[3]; } -void spapr_exit_nested(PowerPCCPU *cpu, int excp) +static void spapr_exit_nested_hv(PowerPCCPU *cpu, int excp) { CPUPPCState *env = &cpu->env; + CPUState *cs = CPU(cpu); SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); struct nested_ppc_state l2_state; target_ulong hv_ptr = spapr_cpu->nested_host_state->gpr[4]; @@ -284,8 +469,6 @@ void spapr_exit_nested(PowerPCCPU *cpu, int excp) struct kvmppc_pt_regs *regs; hwaddr len; - assert(spapr_cpu->in_nested); - nested_save_state(&l2_state, cpu); hsrr0 = env->spr[SPR_HSRR0]; hsrr1 = env->spr[SPR_HSRR1]; @@ -298,6 +481,7 @@ void spapr_exit_nested(PowerPCCPU *cpu, int excp) */ assert(env->spr[SPR_LPIDR] != 0); nested_load_state(cpu, spapr_cpu->nested_host_state); + nested_post_load_state(env, cs); env->gpr[3] = env->excp_vectors[excp]; /* hcall return value */ cpu_ppc_hdecr_exit(env); @@ -375,21 +559,1347 @@ void spapr_exit_nested(PowerPCCPU *cpu, int excp) address_space_unmap(CPU(cpu)->as, regs, len, len, true); } -void spapr_register_nested(void) +static bool spapr_nested_vcpu_check(SpaprMachineStateNestedGuest *guest, + target_ulong vcpuid, bool inoutbuf) +{ + struct SpaprMachineStateNestedGuestVcpu *vcpu; + /* + * Perform sanity checks for the provided vcpuid of a guest. + * For now, ensure its valid, allocated and enabled for use. + */ + + if (vcpuid >= PAPR_NESTED_GUEST_VCPU_MAX) { + return false; + } + + if (!(vcpuid < guest->nr_vcpus)) { + return false; + } + + vcpu = &guest->vcpus[vcpuid]; + if (!vcpu->enabled) { + return false; + } + + if (!inoutbuf) { + return true; + } + + /* Check to see if the in/out buffers are registered */ + if (vcpu->runbufin.addr && vcpu->runbufout.addr) { + return true; + } + + return false; +} + +static void *get_vcpu_state_ptr(SpaprMachineStateNestedGuest *guest, + target_ulong vcpuid) +{ + assert(spapr_nested_vcpu_check(guest, vcpuid, false)); + return &guest->vcpus[vcpuid].state; +} + +static void *get_vcpu_ptr(SpaprMachineStateNestedGuest *guest, + target_ulong vcpuid) +{ + assert(spapr_nested_vcpu_check(guest, vcpuid, false)); + return &guest->vcpus[vcpuid]; +} + +static void *get_guest_ptr(SpaprMachineStateNestedGuest *guest, + target_ulong vcpuid) +{ + return guest; /* for GSBE_NESTED */ +} + +/* + * set=1 means the L1 is trying to set some state + * set=0 means the L1 is trying to get some state + */ +static void copy_state_8to8(void *a, void *b, bool set) +{ + /* set takes from the Big endian element_buf and sets internal buffer */ + + if (set) { + *(uint64_t *)a = be64_to_cpu(*(uint64_t *)b); + } else { + *(uint64_t *)b = cpu_to_be64(*(uint64_t *)a); + } +} + +static void copy_state_4to4(void *a, void *b, bool set) +{ + if (set) { + *(uint32_t *)a = be32_to_cpu(*(uint32_t *)b); + } else { + *(uint32_t *)b = cpu_to_be32(*((uint32_t *)a)); + } +} + +static void copy_state_16to16(void *a, void *b, bool set) +{ + uint64_t *src, *dst; + + if (set) { + src = b; + dst = a; + + dst[1] = be64_to_cpu(src[0]); + dst[0] = be64_to_cpu(src[1]); + } else { + src = a; + dst = b; + + dst[1] = cpu_to_be64(src[0]); + dst[0] = cpu_to_be64(src[1]); + } +} + +static void copy_state_4to8(void *a, void *b, bool set) +{ + if (set) { + *(uint64_t *)a = (uint64_t) be32_to_cpu(*(uint32_t *)b); + } else { + *(uint32_t *)b = cpu_to_be32((uint32_t) (*((uint64_t *)a))); + } +} + +static void copy_state_pagetbl(void *a, void *b, bool set) +{ + uint64_t *pagetbl; + uint64_t *buf; /* 3 double words */ + uint64_t rts; + + assert(set); + + pagetbl = a; + buf = b; + + *pagetbl = be64_to_cpu(buf[0]); + /* as per ISA section 6.7.6.1 */ + *pagetbl |= PATE0_HR; /* Host Radix bit is 1 */ + + /* RTS */ + rts = be64_to_cpu(buf[1]); + assert(rts == 52); + rts = rts - 31; /* since radix tree size = 2^(RTS+31) */ + *pagetbl |= ((rts & 0x7) << 5); /* RTS2 is bit 56:58 */ + *pagetbl |= (((rts >> 3) & 0x3) << 61); /* RTS1 is bit 1:2 */ + + /* RPDS {Size = 2^(RPDS+3) , RPDS >=5} */ + *pagetbl |= 63 - clz64(be64_to_cpu(buf[2])) - 3; +} + +static void copy_state_proctbl(void *a, void *b, bool set) +{ + uint64_t *proctbl; + uint64_t *buf; /* 2 double words */ + + assert(set); + + proctbl = a; + buf = b; + /* PRTB: Process Table Base */ + *proctbl = be64_to_cpu(buf[0]); + /* PRTS: Process Table Size = 2^(12+PRTS) */ + if (be64_to_cpu(buf[1]) == (1ULL << 12)) { + *proctbl |= 0; + } else if (be64_to_cpu(buf[1]) == (1ULL << 24)) { + *proctbl |= 12; + } else { + g_assert_not_reached(); + } +} + +static void copy_state_runbuf(void *a, void *b, bool set) +{ + uint64_t *buf; /* 2 double words */ + struct SpaprMachineStateNestedGuestVcpuRunBuf *runbuf; + + assert(set); + + runbuf = a; + buf = b; + + runbuf->addr = be64_to_cpu(buf[0]); + assert(runbuf->addr); + + /* per spec */ + assert(be64_to_cpu(buf[1]) <= 16384); + + /* + * This will also hit in the input buffer but should be fine for + * now. If not we can split this function. + */ + assert(be64_to_cpu(buf[1]) >= VCPU_OUT_BUF_MIN_SZ); + + runbuf->size = be64_to_cpu(buf[1]); +} + +/* tell the L1 how big we want the output vcpu run buffer */ +static void out_buf_min_size(void *a, void *b, bool set) +{ + uint64_t *buf; /* 1 double word */ + + assert(!set); + + buf = b; + + buf[0] = cpu_to_be64(VCPU_OUT_BUF_MIN_SZ); +} + +static void copy_logical_pvr(void *a, void *b, bool set) +{ + SpaprMachineStateNestedGuest *guest; + uint32_t *buf; /* 1 word */ + uint32_t *pvr_logical_ptr; + uint32_t pvr_logical; + target_ulong pcr = 0; + + pvr_logical_ptr = a; + buf = b; + + if (!set) { + buf[0] = cpu_to_be32(*pvr_logical_ptr); + return; + } + + pvr_logical = be32_to_cpu(buf[0]); + + *pvr_logical_ptr = pvr_logical; + + if (*pvr_logical_ptr) { + switch (*pvr_logical_ptr) { + case CPU_POWERPC_LOGICAL_3_10: + pcr = PCR_COMPAT_3_10 | PCR_COMPAT_3_00; + break; + case CPU_POWERPC_LOGICAL_3_00: + pcr = PCR_COMPAT_3_00; + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, + "Could not set PCR for LPVR=0x%08x\n", + *pvr_logical_ptr); + return; + } + } + + guest = container_of(pvr_logical_ptr, + struct SpaprMachineStateNestedGuest, + pvr_logical); + for (int i = 0; i < guest->nr_vcpus; i++) { + guest->vcpus[i].state.pcr = ~pcr | HVMASK_PCR; + } +} + +static void copy_tb_offset(void *a, void *b, bool set) +{ + SpaprMachineStateNestedGuest *guest; + uint64_t *buf; /* 1 double word */ + uint64_t *tb_offset_ptr; + uint64_t tb_offset; + + tb_offset_ptr = a; + buf = b; + + if (!set) { + buf[0] = cpu_to_be64(*tb_offset_ptr); + return; + } + + tb_offset = be64_to_cpu(buf[0]); + /* need to copy this to the individual tb_offset for each vcpu */ + guest = container_of(tb_offset_ptr, + struct SpaprMachineStateNestedGuest, + tb_offset); + for (int i = 0; i < guest->nr_vcpus; i++) { + guest->vcpus[i].tb_offset = tb_offset; + } +} + +static void copy_state_hdecr(void *a, void *b, bool set) +{ + uint64_t *buf; /* 1 double word */ + uint64_t *hdecr_expiry_tb; + + hdecr_expiry_tb = a; + buf = b; + + if (!set) { + buf[0] = cpu_to_be64(*hdecr_expiry_tb); + return; + } + + *hdecr_expiry_tb = be64_to_cpu(buf[0]); +} + +struct guest_state_element_type guest_state_element_types[] = { + GUEST_STATE_ELEMENT_NOP(GSB_HV_VCPU_IGNORED_ID, 0), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR0, gpr[0]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR1, gpr[1]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR2, gpr[2]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR3, gpr[3]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR4, gpr[4]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR5, gpr[5]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR6, gpr[6]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR7, gpr[7]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR8, gpr[8]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR9, gpr[9]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR10, gpr[10]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR11, gpr[11]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR12, gpr[12]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR13, gpr[13]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR14, gpr[14]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR15, gpr[15]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR16, gpr[16]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR17, gpr[17]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR18, gpr[18]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR19, gpr[19]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR20, gpr[20]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR21, gpr[21]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR22, gpr[22]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR23, gpr[23]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR24, gpr[24]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR25, gpr[25]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR26, gpr[26]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR27, gpr[27]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR28, gpr[28]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR29, gpr[29]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR30, gpr[30]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_GPR31, gpr[31]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_NIA, nip), + GSE_ENV_DWM(GSB_VCPU_SPR_MSR, msr, HVMASK_MSR), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_CTR, ctr), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_LR, lr), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_XER, xer), + GUEST_STATE_ELEMENT_ENV_WW(GSB_VCPU_SPR_CR, cr), + GUEST_STATE_ELEMENT_NOP_DW(GSB_VCPU_SPR_MMCR3), + GUEST_STATE_ELEMENT_NOP_DW(GSB_VCPU_SPR_SIER2), + GUEST_STATE_ELEMENT_NOP_DW(GSB_VCPU_SPR_SIER3), + GUEST_STATE_ELEMENT_NOP_W(GSB_VCPU_SPR_WORT), + GSE_ENV_DWM(GSB_VCPU_SPR_LPCR, lpcr, HVMASK_LPCR), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_AMOR, amor), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_HFSCR, hfscr), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_DAWR0, dawr0), + GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_DAWRX0, dawrx0), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_CIABR, ciabr), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_PURR, purr), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SPURR, spurr), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_IC, ic), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_VTB, vtb), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_HDAR, hdar), + GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_HDSISR, hdsisr), + GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_HEIR, heir), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_ASDR, asdr), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SRR0, srr0), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SRR1, srr1), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SPRG0, sprg0), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SPRG1, sprg1), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SPRG2, sprg2), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SPRG3, sprg3), + GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PIDR, pidr), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_CFAR, cfar), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_PPR, ppr), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_DAWR1, dawr1), + GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_DAWRX1, dawrx1), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_DEXCR, dexcr), + GSE_ENV_DWM(GSB_VCPU_SPR_HDEXCR, hdexcr, HVMASK_HDEXCR), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_HASHKEYR, hashkeyr), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_HASHPKEYR, hashpkeyr), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR0, vsr[0]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR1, vsr[1]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR2, vsr[2]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR3, vsr[3]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR4, vsr[4]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR5, vsr[5]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR6, vsr[6]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR7, vsr[7]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR8, vsr[8]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR9, vsr[9]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR10, vsr[10]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR11, vsr[11]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR12, vsr[12]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR13, vsr[13]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR14, vsr[14]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR15, vsr[15]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR16, vsr[16]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR17, vsr[17]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR18, vsr[18]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR19, vsr[19]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR20, vsr[20]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR21, vsr[21]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR22, vsr[22]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR23, vsr[23]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR24, vsr[24]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR25, vsr[25]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR26, vsr[26]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR27, vsr[27]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR28, vsr[28]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR29, vsr[29]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR30, vsr[30]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR31, vsr[31]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR32, vsr[32]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR33, vsr[33]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR34, vsr[34]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR35, vsr[35]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR36, vsr[36]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR37, vsr[37]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR38, vsr[38]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR39, vsr[39]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR40, vsr[40]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR41, vsr[41]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR42, vsr[42]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR43, vsr[43]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR44, vsr[44]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR45, vsr[45]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR46, vsr[46]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR47, vsr[47]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR48, vsr[48]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR49, vsr[49]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR50, vsr[50]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR51, vsr[51]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR52, vsr[52]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR53, vsr[53]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR54, vsr[54]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR55, vsr[55]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR56, vsr[56]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR57, vsr[57]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR58, vsr[58]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR59, vsr[59]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR60, vsr[60]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR61, vsr[61]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR62, vsr[62]), + GUEST_STATE_ELEMENT_ENV_QW(GSB_VCPU_SPR_VSR63, vsr[63]), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_EBBHR, ebbhr), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_TAR, tar), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_EBBRR, ebbrr), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_BESCR, bescr), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_IAMR, iamr), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_AMR, amr), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_UAMOR, uamor), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_DSCR, dscr), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_FSCR, fscr), + GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PSPB, pspb), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_CTRL, ctrl), + GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_VRSAVE, vrsave), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_DAR, dar), + GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_DSISR, dsisr), + GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PMC1, pmc1), + GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PMC2, pmc2), + GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PMC3, pmc3), + GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PMC4, pmc4), + GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PMC5, pmc5), + GUEST_STATE_ELEMENT_ENV_W(GSB_VCPU_SPR_PMC6, pmc6), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_MMCR0, mmcr0), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_MMCR1, mmcr1), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_MMCR2, mmcr2), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_MMCRA, mmcra), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SDAR , sdar), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SIAR , siar), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_SIER , sier), + GUEST_STATE_ELEMENT_ENV_WW(GSB_VCPU_SPR_VSCR, vscr), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_SPR_FPSCR, fpscr), + GUEST_STATE_ELEMENT_ENV_DW(GSB_VCPU_DEC_EXPIRE_TB, dec_expiry_tb), + GSBE_NESTED(GSB_PART_SCOPED_PAGETBL, 0x18, parttbl[0], copy_state_pagetbl), + GSBE_NESTED(GSB_PROCESS_TBL, 0x10, parttbl[1], copy_state_proctbl), + GSBE_NESTED(GSB_VCPU_LPVR, 0x4, pvr_logical, copy_logical_pvr), + GSBE_NESTED_MSK(GSB_TB_OFFSET, 0x8, tb_offset, copy_tb_offset, + HVMASK_TB_OFFSET), + GSBE_NESTED_VCPU(GSB_VCPU_IN_BUFFER, 0x10, runbufin, copy_state_runbuf), + GSBE_NESTED_VCPU(GSB_VCPU_OUT_BUFFER, 0x10, runbufout, copy_state_runbuf), + GSBE_NESTED_VCPU(GSB_VCPU_OUT_BUF_MIN_SZ, 0x8, runbufout, out_buf_min_size), + GSBE_NESTED_VCPU(GSB_VCPU_HDEC_EXPIRY_TB, 0x8, hdecr_expiry_tb, + copy_state_hdecr) +}; + +void spapr_nested_gsb_init(void) +{ + struct guest_state_element_type *type; + + /* Init the guest state elements lookup table, flags for now */ + for (int i = 0; i < ARRAY_SIZE(guest_state_element_types); i++) { + type = &guest_state_element_types[i]; + + assert(type->id <= GSB_LAST); + if (type->id >= GSB_VCPU_SPR_HDAR) + /* 0xf000 - 0xf005 Thread + RO */ + type->flags = GUEST_STATE_ELEMENT_TYPE_FLAG_READ_ONLY; + else if (type->id >= GSB_VCPU_IN_BUFFER) + /* 0x0c00 - 0xf000 Thread + RW */ + type->flags = 0; + else if (type->id >= GSB_VCPU_LPVR) + /* 0x0003 - 0x0bff Guest + RW */ + type->flags = GUEST_STATE_ELEMENT_TYPE_FLAG_GUEST_WIDE; + else if (type->id >= GSB_HV_VCPU_STATE_SIZE) + /* 0x0001 - 0x0002 Guest + RO */ + type->flags = GUEST_STATE_ELEMENT_TYPE_FLAG_READ_ONLY | + GUEST_STATE_ELEMENT_TYPE_FLAG_GUEST_WIDE; + } +} + +static struct guest_state_element *guest_state_element_next( + struct guest_state_element *element, + int64_t *len, + int64_t *num_elements) +{ + uint16_t size; + + /* size is of element->value[] only. Not whole guest_state_element */ + size = be16_to_cpu(element->size); + + if (len) { + *len -= size + offsetof(struct guest_state_element, value); + } + + if (num_elements) { + *num_elements -= 1; + } + + return (struct guest_state_element *)(element->value + size); +} + +static +struct guest_state_element_type *guest_state_element_type_find(uint16_t id) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(guest_state_element_types); i++) + if (id == guest_state_element_types[i].id) { + return &guest_state_element_types[i]; + } + + return NULL; +} + +static void log_element(struct guest_state_element *element, + struct guest_state_request *gsr) +{ + qemu_log_mask(LOG_GUEST_ERROR, "h_guest_%s_state id:0x%04x size:0x%04x", + gsr->flags & GUEST_STATE_REQUEST_SET ? "set" : "get", + be16_to_cpu(element->id), be16_to_cpu(element->size)); + qemu_log_mask(LOG_GUEST_ERROR, "buf:0x%016"PRIx64" ...\n", + be64_to_cpu(*(uint64_t *)element->value)); +} + +static bool guest_state_request_check(struct guest_state_request *gsr) +{ + int64_t num_elements, len = gsr->len; + struct guest_state_buffer *gsb = gsr->gsb; + struct guest_state_element *element; + struct guest_state_element_type *type; + uint16_t id, size; + + /* gsb->num_elements = 0 == 32 bits long */ + assert(len >= 4); + + num_elements = be32_to_cpu(gsb->num_elements); + element = gsb->elements; + len -= sizeof(gsb->num_elements); + + /* Walk the buffer to validate the length */ + while (num_elements) { + + id = be16_to_cpu(element->id); + size = be16_to_cpu(element->size); + + if (false) { + log_element(element, gsr); + } + /* buffer size too small */ + if (len < 0) { + return false; + } + + type = guest_state_element_type_find(id); + if (!type) { + qemu_log_mask(LOG_GUEST_ERROR, "Element ID %04x unknown\n", id); + log_element(element, gsr); + return false; + } + + if (id == GSB_HV_VCPU_IGNORED_ID) { + goto next_element; + } + + if (size != type->size) { + qemu_log_mask(LOG_GUEST_ERROR, "Size mismatch. Element ID:%04x." + "Size Exp:%i Got:%i\n", id, type->size, size); + log_element(element, gsr); + return false; + } + + if ((type->flags & GUEST_STATE_ELEMENT_TYPE_FLAG_READ_ONLY) && + (gsr->flags & GUEST_STATE_REQUEST_SET)) { + qemu_log_mask(LOG_GUEST_ERROR, "Trying to set a read-only Element " + "ID:%04x.\n", id); + return false; + } + + if (type->flags & GUEST_STATE_ELEMENT_TYPE_FLAG_GUEST_WIDE) { + /* guest wide element type */ + if (!(gsr->flags & GUEST_STATE_REQUEST_GUEST_WIDE)) { + qemu_log_mask(LOG_GUEST_ERROR, "trying to set a guest wide " + "Element ID:%04x.\n", id); + return false; + } + } else { + /* thread wide element type */ + if (gsr->flags & GUEST_STATE_REQUEST_GUEST_WIDE) { + qemu_log_mask(LOG_GUEST_ERROR, "trying to set a thread wide " + "Element ID:%04x.\n", id); + return false; + } + } +next_element: + element = guest_state_element_next(element, &len, &num_elements); + + } + return true; +} + +static bool is_gsr_invalid(struct guest_state_request *gsr, + struct guest_state_element *element, + struct guest_state_element_type *type) +{ + if ((gsr->flags & GUEST_STATE_REQUEST_SET) && + (*(uint64_t *)(element->value) & ~(type->mask))) { + log_element(element, gsr); + qemu_log_mask(LOG_GUEST_ERROR, "L1 can't set reserved bits " + "(allowed mask: 0x%08"PRIx64")\n", type->mask); + return true; + } + return false; +} + +static target_ulong h_guest_get_capabilities(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + CPUPPCState *env = &cpu->env; + target_ulong flags = args[0]; + + if (flags) { /* don't handle any flags capabilities for now */ + return H_PARAMETER; + } + + /* P10 capabilities */ + if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_10, 0, + spapr->max_compat_pvr)) { + env->gpr[4] |= H_GUEST_CAPABILITIES_P10_MODE; + } + + /* P9 capabilities */ + if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0, + spapr->max_compat_pvr)) { + env->gpr[4] |= H_GUEST_CAPABILITIES_P9_MODE; + } + + return H_SUCCESS; +} + +static target_ulong h_guest_set_capabilities(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + CPUPPCState *env = &cpu->env; + target_ulong flags = args[0]; + target_ulong capabilities = args[1]; + env->gpr[4] = 0; + + if (flags) { /* don't handle any flags capabilities for now */ + return H_PARAMETER; + } + + if (capabilities & H_GUEST_CAPABILITIES_COPY_MEM) { + env->gpr[4] = 1; + return H_P2; /* isn't supported */ + } + + /* + * If there are no capabilities configured, set the R5 to the index of + * the first supported Power Processor Mode + */ + if (!capabilities) { + env->gpr[4] = 1; + + /* set R5 to the first supported Power Processor Mode */ + if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_10, 0, + spapr->max_compat_pvr)) { + env->gpr[5] = H_GUEST_CAP_P10_MODE_BMAP; + } else if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0, + spapr->max_compat_pvr)) { + env->gpr[5] = H_GUEST_CAP_P9_MODE_BMAP; + } + + return H_P2; + } + + /* + * If an invalid capability is set, R5 should contain the index of the + * invalid capability bit + */ + if (capabilities & ~H_GUEST_CAP_VALID_MASK) { + env->gpr[4] = 1; + + /* Set R5 to the index of the invalid capability */ + env->gpr[5] = 63 - ctz64(capabilities); + + return H_P2; + } + + if (!spapr->nested.capabilities_set) { + spapr->nested.capabilities_set = true; + spapr->nested.pvr_base = env->spr[SPR_PVR]; + return H_SUCCESS; + } else { + return H_STATE; + } +} + +static void +destroy_guest_helper(gpointer value) +{ + struct SpaprMachineStateNestedGuest *guest = value; + g_free(guest->vcpus); + g_free(guest); +} + +static target_ulong h_guest_create(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + CPUPPCState *env = &cpu->env; + target_ulong flags = args[0]; + target_ulong continue_token = args[1]; + uint64_t guestid; + int nguests = 0; + struct SpaprMachineStateNestedGuest *guest; + + if (flags) { /* don't handle any flags for now */ + return H_UNSUPPORTED_FLAG; + } + + if (continue_token != -1) { + return H_P2; + } + + if (!spapr->nested.capabilities_set) { + return H_STATE; + } + + if (!spapr->nested.guests) { + spapr->nested.guests = g_hash_table_new_full(NULL, + NULL, + NULL, + destroy_guest_helper); + } + + nguests = g_hash_table_size(spapr->nested.guests); + + if (nguests == PAPR_NESTED_GUEST_MAX) { + return H_NO_MEM; + } + + /* Lookup for available guestid */ + for (guestid = 1; guestid < PAPR_NESTED_GUEST_MAX; guestid++) { + if (!(g_hash_table_lookup(spapr->nested.guests, + GINT_TO_POINTER(guestid)))) { + break; + } + } + + if (guestid == PAPR_NESTED_GUEST_MAX) { + return H_NO_MEM; + } + + guest = g_try_new0(struct SpaprMachineStateNestedGuest, 1); + if (!guest) { + return H_NO_MEM; + } + + guest->pvr_logical = spapr->nested.pvr_base; + g_hash_table_insert(spapr->nested.guests, GINT_TO_POINTER(guestid), guest); + env->gpr[4] = guestid; + + return H_SUCCESS; +} + +static target_ulong h_guest_delete(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + target_ulong flags = args[0]; + target_ulong guestid = args[1]; + struct SpaprMachineStateNestedGuest *guest; + + /* + * handle flag deleteAllGuests, if set: + * guestid is ignored and all guests are deleted + * + */ + if (flags & ~H_GUEST_DELETE_ALL_FLAG) { + return H_UNSUPPORTED_FLAG; /* other flag bits reserved */ + } else if (flags & H_GUEST_DELETE_ALL_FLAG) { + g_hash_table_destroy(spapr->nested.guests); + return H_SUCCESS; + } + + guest = g_hash_table_lookup(spapr->nested.guests, GINT_TO_POINTER(guestid)); + if (!guest) { + return H_P2; + } + + g_hash_table_remove(spapr->nested.guests, GINT_TO_POINTER(guestid)); + + return H_SUCCESS; +} + +static target_ulong h_guest_create_vcpu(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + target_ulong flags = args[0]; + target_ulong guestid = args[1]; + target_ulong vcpuid = args[2]; + SpaprMachineStateNestedGuest *guest; + + if (flags) { /* don't handle any flags for now */ + return H_UNSUPPORTED_FLAG; + } + + guest = spapr_get_nested_guest(spapr, guestid); + if (!guest) { + return H_P2; + } + + if (vcpuid < guest->nr_vcpus) { + qemu_log_mask(LOG_UNIMP, "vcpuid " TARGET_FMT_ld " already in use.", + vcpuid); + return H_IN_USE; + } + /* linear vcpuid allocation only */ + assert(vcpuid == guest->nr_vcpus); + + if (guest->nr_vcpus >= PAPR_NESTED_GUEST_VCPU_MAX) { + return H_P3; + } + + SpaprMachineStateNestedGuestVcpu *vcpus, *curr_vcpu; + vcpus = g_try_renew(struct SpaprMachineStateNestedGuestVcpu, + guest->vcpus, + guest->nr_vcpus + 1); + if (!vcpus) { + return H_NO_MEM; + } + guest->vcpus = vcpus; + curr_vcpu = &vcpus[guest->nr_vcpus]; + memset(curr_vcpu, 0, sizeof(SpaprMachineStateNestedGuestVcpu)); + + curr_vcpu->enabled = true; + guest->nr_vcpus++; + + return H_SUCCESS; +} + +static target_ulong getset_state(SpaprMachineStateNestedGuest *guest, + uint64_t vcpuid, + struct guest_state_request *gsr) +{ + void *ptr; + uint16_t id; + struct guest_state_element *element; + struct guest_state_element_type *type; + int64_t lenleft, num_elements; + + lenleft = gsr->len; + + if (!guest_state_request_check(gsr)) { + return H_P3; + } + + num_elements = be32_to_cpu(gsr->gsb->num_elements); + element = gsr->gsb->elements; + /* Process the elements */ + while (num_elements) { + type = NULL; + /* log_element(element, gsr); */ + + id = be16_to_cpu(element->id); + if (id == GSB_HV_VCPU_IGNORED_ID) { + goto next_element; + } + + type = guest_state_element_type_find(id); + assert(type); + + /* Get pointer to guest data to get/set */ + if (type->location && type->copy) { + ptr = type->location(guest, vcpuid); + assert(ptr); + if (!~(type->mask) && is_gsr_invalid(gsr, element, type)) { + return H_INVALID_ELEMENT_VALUE; + } + type->copy(ptr + type->offset, element->value, + gsr->flags & GUEST_STATE_REQUEST_SET ? true : false); + } + +next_element: + element = guest_state_element_next(element, &lenleft, &num_elements); + } + + return H_SUCCESS; +} + +static target_ulong map_and_getset_state(PowerPCCPU *cpu, + SpaprMachineStateNestedGuest *guest, + uint64_t vcpuid, + struct guest_state_request *gsr) +{ + target_ulong rc; + int64_t len; + bool is_write; + + len = gsr->len; + /* only get_state would require write access to the provided buffer */ + is_write = (gsr->flags & GUEST_STATE_REQUEST_SET) ? false : true; + gsr->gsb = address_space_map(CPU(cpu)->as, gsr->buf, (uint64_t *)&len, + is_write, MEMTXATTRS_UNSPECIFIED); + if (!gsr->gsb) { + rc = H_P3; + goto out1; + } + + if (len != gsr->len) { + rc = H_P3; + goto out1; + } + + rc = getset_state(guest, vcpuid, gsr); + +out1: + address_space_unmap(CPU(cpu)->as, gsr->gsb, len, is_write, len); + return rc; +} + +static target_ulong h_guest_getset_state(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong *args, + bool set) +{ + target_ulong flags = args[0]; + target_ulong lpid = args[1]; + target_ulong vcpuid = args[2]; + target_ulong buf = args[3]; + target_ulong buflen = args[4]; + struct guest_state_request gsr; + SpaprMachineStateNestedGuest *guest; + + guest = spapr_get_nested_guest(spapr, lpid); + if (!guest) { + return H_P2; + } + gsr.buf = buf; + assert(buflen <= GSB_MAX_BUF_SIZE); + gsr.len = buflen; + gsr.flags = 0; + if (flags & H_GUEST_GETSET_STATE_FLAG_GUEST_WIDE) { + gsr.flags |= GUEST_STATE_REQUEST_GUEST_WIDE; + } + if (flags & !H_GUEST_GETSET_STATE_FLAG_GUEST_WIDE) { + return H_PARAMETER; /* flag not supported yet */ + } + + if (set) { + gsr.flags |= GUEST_STATE_REQUEST_SET; + } + return map_and_getset_state(cpu, guest, vcpuid, &gsr); +} + +static target_ulong h_guest_set_state(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + return h_guest_getset_state(cpu, spapr, args, true); +} + +static target_ulong h_guest_get_state(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + return h_guest_getset_state(cpu, spapr, args, false); +} + +static void exit_nested_store_l2(PowerPCCPU *cpu, int excp, + SpaprMachineStateNestedGuestVcpu *vcpu) +{ + CPUPPCState *env = &cpu->env; + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); + target_ulong now, hdar, hdsisr, asdr; + + assert(sizeof(env->gpr) == sizeof(vcpu->state.gpr)); /* sanity check */ + + now = cpu_ppc_load_tbl(env); /* L2 timebase */ + now -= vcpu->tb_offset; /* L1 timebase */ + vcpu->state.dec_expiry_tb = now - cpu_ppc_load_decr(env); + cpu_ppc_store_decr(env, spapr_cpu->nested_host_state->dec_expiry_tb - now); + /* backup hdar, hdsisr, asdr if reqd later below */ + hdar = vcpu->state.hdar; + hdsisr = vcpu->state.hdsisr; + asdr = vcpu->state.asdr; + + nested_save_state(&vcpu->state, cpu); + + if (excp == POWERPC_EXCP_MCHECK || + excp == POWERPC_EXCP_RESET || + excp == POWERPC_EXCP_SYSCALL) { + vcpu->state.nip = env->spr[SPR_SRR0]; + vcpu->state.msr = env->spr[SPR_SRR1] & env->msr_mask; + } else { + vcpu->state.nip = env->spr[SPR_HSRR0]; + vcpu->state.msr = env->spr[SPR_HSRR1] & env->msr_mask; + } + + /* hdar, hdsisr, asdr should be retained unless certain exceptions */ + if ((excp != POWERPC_EXCP_HDSI) && (excp != POWERPC_EXCP_HISI)) { + vcpu->state.asdr = asdr; + } else if (excp != POWERPC_EXCP_HDSI) { + vcpu->state.hdar = hdar; + vcpu->state.hdsisr = hdsisr; + } +} + +static int get_exit_ids(uint64_t srr0, uint16_t ids[16]) +{ + int nr; + + switch (srr0) { + case 0xc00: + nr = 10; + ids[0] = GSB_VCPU_GPR3; + ids[1] = GSB_VCPU_GPR4; + ids[2] = GSB_VCPU_GPR5; + ids[3] = GSB_VCPU_GPR6; + ids[4] = GSB_VCPU_GPR7; + ids[5] = GSB_VCPU_GPR8; + ids[6] = GSB_VCPU_GPR9; + ids[7] = GSB_VCPU_GPR10; + ids[8] = GSB_VCPU_GPR11; + ids[9] = GSB_VCPU_GPR12; + break; + case 0xe00: + nr = 5; + ids[0] = GSB_VCPU_SPR_HDAR; + ids[1] = GSB_VCPU_SPR_HDSISR; + ids[2] = GSB_VCPU_SPR_ASDR; + ids[3] = GSB_VCPU_SPR_NIA; + ids[4] = GSB_VCPU_SPR_MSR; + break; + case 0xe20: + nr = 4; + ids[0] = GSB_VCPU_SPR_HDAR; + ids[1] = GSB_VCPU_SPR_ASDR; + ids[2] = GSB_VCPU_SPR_NIA; + ids[3] = GSB_VCPU_SPR_MSR; + break; + case 0xe40: + nr = 3; + ids[0] = GSB_VCPU_SPR_HEIR; + ids[1] = GSB_VCPU_SPR_NIA; + ids[2] = GSB_VCPU_SPR_MSR; + break; + case 0xf80: + nr = 3; + ids[0] = GSB_VCPU_SPR_HFSCR; + ids[1] = GSB_VCPU_SPR_NIA; + ids[2] = GSB_VCPU_SPR_MSR; + break; + default: + nr = 0; + break; + } + + return nr; +} + +static void exit_process_output_buffer(PowerPCCPU *cpu, + SpaprMachineStateNestedGuest *guest, + target_ulong vcpuid, + target_ulong *r3) +{ + SpaprMachineStateNestedGuestVcpu *vcpu = &guest->vcpus[vcpuid]; + struct guest_state_request gsr; + struct guest_state_buffer *gsb; + struct guest_state_element *element; + struct guest_state_element_type *type; + int exit_id_count = 0; + uint16_t exit_cause_ids[16]; + hwaddr len; + + len = vcpu->runbufout.size; + gsb = address_space_map(CPU(cpu)->as, vcpu->runbufout.addr, &len, true, + MEMTXATTRS_UNSPECIFIED); + if (!gsb || len != vcpu->runbufout.size) { + address_space_unmap(CPU(cpu)->as, gsb, len, true, len); + *r3 = H_P2; + return; + } + + exit_id_count = get_exit_ids(*r3, exit_cause_ids); + + /* Create a buffer of elements to send back */ + gsb->num_elements = cpu_to_be32(exit_id_count); + element = gsb->elements; + for (int i = 0; i < exit_id_count; i++) { + type = guest_state_element_type_find(exit_cause_ids[i]); + assert(type); + element->id = cpu_to_be16(exit_cause_ids[i]); + element->size = cpu_to_be16(type->size); + element = guest_state_element_next(element, NULL, NULL); + } + gsr.gsb = gsb; + gsr.len = VCPU_OUT_BUF_MIN_SZ; + gsr.flags = 0; /* get + never guest wide */ + getset_state(guest, vcpuid, &gsr); + + address_space_unmap(CPU(cpu)->as, gsb, len, true, len); + return; +} + +static +void spapr_exit_nested_papr(SpaprMachineState *spapr, PowerPCCPU *cpu, int excp) +{ + CPUPPCState *env = &cpu->env; + CPUState *cs = CPU(cpu); + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); + target_ulong r3_return = env->excp_vectors[excp]; /* hcall return value */ + target_ulong lpid = 0, vcpuid = 0; + struct SpaprMachineStateNestedGuestVcpu *vcpu = NULL; + struct SpaprMachineStateNestedGuest *guest = NULL; + + lpid = spapr_cpu->nested_host_state->gpr[5]; + vcpuid = spapr_cpu->nested_host_state->gpr[6]; + guest = spapr_get_nested_guest(spapr, lpid); + assert(guest); + spapr_nested_vcpu_check(guest, vcpuid, false); + vcpu = &guest->vcpus[vcpuid]; + + exit_nested_store_l2(cpu, excp, vcpu); + /* do the output buffer for run_vcpu*/ + exit_process_output_buffer(cpu, guest, vcpuid, &r3_return); + + assert(env->spr[SPR_LPIDR] != 0); + nested_load_state(cpu, spapr_cpu->nested_host_state); + cpu_ppc_decrease_tb_by_offset(env, vcpu->tb_offset); + env->gpr[3] = H_SUCCESS; + env->gpr[4] = r3_return; + nested_post_load_state(env, cs); + cpu_ppc_hdecr_exit(env); + + spapr_cpu->in_nested = false; + g_free(spapr_cpu->nested_host_state); + spapr_cpu->nested_host_state = NULL; +} + +void spapr_exit_nested(PowerPCCPU *cpu, int excp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); + + assert(spapr_cpu->in_nested); + if (spapr_nested_api(spapr) == NESTED_API_KVM_HV) { + spapr_exit_nested_hv(cpu, excp); + } else if (spapr_nested_api(spapr) == NESTED_API_PAPR) { + spapr_exit_nested_papr(spapr, cpu, excp); + } else { + g_assert_not_reached(); + } +} + +static void nested_papr_load_l2(PowerPCCPU *cpu, + CPUPPCState *env, + SpaprMachineStateNestedGuestVcpu *vcpu, + target_ulong now) +{ + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + target_ulong lpcr, lpcr_mask, hdec; + lpcr_mask = LPCR_DPFD | LPCR_ILE | LPCR_AIL | LPCR_LD | LPCR_MER; + + assert(vcpu); + assert(sizeof(env->gpr) == sizeof(vcpu->state.gpr)); + nested_load_state(cpu, &vcpu->state); + lpcr = (env->spr[SPR_LPCR] & ~lpcr_mask) | + (vcpu->state.lpcr & lpcr_mask); + lpcr |= LPCR_HR | LPCR_UPRT | LPCR_GTSE | LPCR_HVICE | LPCR_HDICE; + lpcr &= ~LPCR_LPES0; + env->spr[SPR_LPCR] = lpcr & pcc->lpcr_mask; + + hdec = vcpu->hdecr_expiry_tb - now; + cpu_ppc_store_decr(env, vcpu->state.dec_expiry_tb - now); + cpu_ppc_hdecr_init(env); + cpu_ppc_store_hdecr(env, hdec); + + cpu_ppc_increase_tb_by_offset(env, vcpu->tb_offset); +} + +static void nested_papr_run_vcpu(PowerPCCPU *cpu, + uint64_t lpid, + SpaprMachineStateNestedGuestVcpu *vcpu) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + CPUPPCState *env = &cpu->env; + CPUState *cs = CPU(cpu); + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); + target_ulong now = cpu_ppc_load_tbl(env); + + assert(env->spr[SPR_LPIDR] == 0); + assert(spapr->nested.api); /* ensure API version is initialized */ + spapr_cpu->nested_host_state = g_try_new(struct nested_ppc_state, 1); + assert(spapr_cpu->nested_host_state); + nested_save_state(spapr_cpu->nested_host_state, cpu); + spapr_cpu->nested_host_state->dec_expiry_tb = now - cpu_ppc_load_decr(env); + nested_papr_load_l2(cpu, env, vcpu, now); + env->spr[SPR_LPIDR] = lpid; /* post load l2 */ + + spapr_cpu->in_nested = true; + nested_post_load_state(env, cs); +} + +static target_ulong h_guest_run_vcpu(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + CPUPPCState *env = &cpu->env; + target_ulong flags = args[0]; + target_ulong lpid = args[1]; + target_ulong vcpuid = args[2]; + struct SpaprMachineStateNestedGuestVcpu *vcpu; + struct guest_state_request gsr; + SpaprMachineStateNestedGuest *guest; + target_ulong rc; + + if (flags) /* don't handle any flags for now */ + return H_PARAMETER; + + guest = spapr_get_nested_guest(spapr, lpid); + if (!guest) { + return H_P2; + } + if (!spapr_nested_vcpu_check(guest, vcpuid, true)) { + return H_P3; + } + + if (guest->parttbl[0] == 0) { + /* At least need a partition scoped radix tree */ + return H_NOT_AVAILABLE; + } + + vcpu = &guest->vcpus[vcpuid]; + + /* Read run_vcpu input buffer to update state */ + gsr.buf = vcpu->runbufin.addr; + gsr.len = vcpu->runbufin.size; + gsr.flags = GUEST_STATE_REQUEST_SET; /* Thread wide + writing */ + rc = map_and_getset_state(cpu, guest, vcpuid, &gsr); + if (rc == H_SUCCESS) { + nested_papr_run_vcpu(cpu, lpid, vcpu); + } else { + env->gpr[3] = rc; + } + return env->gpr[3]; +} + +void spapr_register_nested_hv(void) { spapr_register_hypercall(KVMPPC_H_SET_PARTITION_TABLE, h_set_ptbl); spapr_register_hypercall(KVMPPC_H_ENTER_NESTED, h_enter_nested); spapr_register_hypercall(KVMPPC_H_TLB_INVALIDATE, h_tlb_invalidate); spapr_register_hypercall(KVMPPC_H_COPY_TOFROM_GUEST, h_copy_tofrom_guest); } + +void spapr_unregister_nested_hv(void) +{ + spapr_unregister_hypercall(KVMPPC_H_SET_PARTITION_TABLE); + spapr_unregister_hypercall(KVMPPC_H_ENTER_NESTED); + spapr_unregister_hypercall(KVMPPC_H_TLB_INVALIDATE); + spapr_unregister_hypercall(KVMPPC_H_COPY_TOFROM_GUEST); +} + +void spapr_register_nested_papr(void) +{ + spapr_register_hypercall(H_GUEST_GET_CAPABILITIES, + h_guest_get_capabilities); + spapr_register_hypercall(H_GUEST_SET_CAPABILITIES, + h_guest_set_capabilities); + spapr_register_hypercall(H_GUEST_CREATE, h_guest_create); + spapr_register_hypercall(H_GUEST_DELETE, h_guest_delete); + spapr_register_hypercall(H_GUEST_CREATE_VCPU, h_guest_create_vcpu); + spapr_register_hypercall(H_GUEST_SET_STATE, h_guest_set_state); + spapr_register_hypercall(H_GUEST_GET_STATE, h_guest_get_state); + spapr_register_hypercall(H_GUEST_RUN_VCPU, h_guest_run_vcpu); +} + +void spapr_unregister_nested_papr(void) +{ + spapr_unregister_hypercall(H_GUEST_GET_CAPABILITIES); + spapr_unregister_hypercall(H_GUEST_SET_CAPABILITIES); + spapr_unregister_hypercall(H_GUEST_CREATE); + spapr_unregister_hypercall(H_GUEST_DELETE); + spapr_unregister_hypercall(H_GUEST_CREATE_VCPU); + spapr_unregister_hypercall(H_GUEST_SET_STATE); + spapr_unregister_hypercall(H_GUEST_GET_STATE); + spapr_unregister_hypercall(H_GUEST_RUN_VCPU); +} + #else void spapr_exit_nested(PowerPCCPU *cpu, int excp) { g_assert_not_reached(); } -void spapr_register_nested(void) +void spapr_register_nested_hv(void) { /* DO NOTHING */ } + +void spapr_unregister_nested_hv(void) +{ + /* DO NOTHING */ +} + +bool spapr_get_pate_nested_hv(SpaprMachineState *spapr, PowerPCCPU *cpu, + target_ulong lpid, ppc_v3_pate_t *entry) +{ + return false; +} + +bool spapr_get_pate_nested_papr(SpaprMachineState *spapr, PowerPCCPU *cpu, + target_ulong lpid, ppc_v3_pate_t *entry) +{ + return false; +} + +void spapr_register_nested_papr(void) +{ + /* DO NOTHING */ +} + +void spapr_unregister_nested_papr(void) +{ + /* DO NOTHING */ +} + +void spapr_nested_gsb_init(void) +{ + /* DO NOTHING */ +} + #endif diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 62804cc228..b1dcb3857f 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -312,12 +312,12 @@ static void ccw_init(MachineState *machine) static void s390_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { + ERRP_GUARD(); MachineState *ms = MACHINE(hotplug_dev); S390CPU *cpu = S390_CPU(dev); - ERRP_GUARD(); g_assert(!ms->possible_cpus->cpus[cpu->env.core_id].cpu); - ms->possible_cpus->cpus[cpu->env.core_id].cpu = OBJECT(dev); + ms->possible_cpus->cpus[cpu->env.core_id].cpu = CPU(dev); if (s390_has_topology()) { s390_topology_setup_cpu(ms, cpu, errp); diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index 58a00336c2..ae26bc19a4 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -220,6 +220,7 @@ static int vhost_scsi_set_workers(VHostSCSICommon *vsc, bool per_virtqueue) static void vhost_scsi_realize(DeviceState *dev, Error **errp) { + ERRP_GUARD(); VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); VHostSCSICommon *vsc = VHOST_SCSI_COMMON(dev); Error *err = NULL; diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c index eda9b58a21..cff6d5abaf 100644 --- a/hw/sparc64/sun4u.c +++ b/hw/sparc64/sun4u.c @@ -360,8 +360,13 @@ static void ebus_realize(PCIDevice *pci_dev, Error **errp) pci_dev->config[0x09] = 0x00; // programming i/f pci_dev->config[0x0D] = 0x0a; // latency_timer - memory_region_init_alias(&s->bar0, OBJECT(s), "bar0", - pci_address_space_io(pci_dev), 0, 0x1000000); + /* + * BAR0 is accessed by OpenBSD but not for ebus device access: allow any + * memory access to this region to succeed which allows the OpenBSD kernel + * to boot. + */ + memory_region_init_io(&s->bar0, OBJECT(s), &unassigned_io_ops, s, + "bar0", 0x1000000); pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar0); memory_region_init_alias(&s->bar1, OBJECT(s), "bar1", pci_address_space_io(pci_dev), 0, 0x8000); diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c index e157aa1ff7..7c4caa5938 100644 --- a/hw/vfio/ap.c +++ b/hw/vfio/ap.c @@ -155,6 +155,7 @@ static void vfio_ap_unregister_irq_notifier(VFIOAPDevice *vapdev, static void vfio_ap_realize(DeviceState *dev, Error **errp) { + ERRP_GUARD(); int ret; Error *err = NULL; VFIOAPDevice *vapdev = VFIO_AP_DEVICE(dev); diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 059bfdc07a..011ceaab89 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -39,7 +39,6 @@ #include "sysemu/runstate.h" #include "trace.h" #include "qapi/error.h" -#include "migration/migration.h" #include "migration/misc.h" #include "migration/blocker.h" #include "migration/qemu-file.h" @@ -128,7 +127,7 @@ int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp) error_setg(&multiple_devices_migration_blocker, "Multiple VFIO devices migration is supported only if all of " "them support P2P migration"); - ret = migrate_add_blocker(&multiple_devices_migration_blocker, errp); + ret = migrate_add_blocker_normal(&multiple_devices_migration_blocker, errp); return ret; } @@ -150,14 +149,8 @@ bool vfio_viommu_preset(VFIODevice *vbasedev) static void vfio_set_migration_error(int err) { - MigrationState *ms = migrate_get_current(); - - if (migration_is_setup_or_active(ms->state)) { - WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) { - if (ms->to_dst_file) { - qemu_file_set_error(ms->to_dst_file, err); - } - } + if (migration_is_setup_or_active()) { + migration_file_set_error(err); } } @@ -180,10 +173,8 @@ bool vfio_device_state_is_precopy(VFIODevice *vbasedev) static bool vfio_devices_all_dirty_tracking(VFIOContainerBase *bcontainer) { VFIODevice *vbasedev; - MigrationState *ms = migrate_get_current(); - if (ms->state != MIGRATION_STATUS_ACTIVE && - ms->state != MIGRATION_STATUS_DEVICE) { + if (!migration_is_active() && !migration_is_device()) { return false; } @@ -225,7 +216,7 @@ vfio_devices_all_running_and_mig_active(const VFIOContainerBase *bcontainer) { VFIODevice *vbasedev; - if (!migration_is_active(migrate_get_current())) { + if (!migration_is_active()) { return false; } diff --git a/hw/vfio/container.c b/hw/vfio/container.c index bd25b9fbad..77bdec276e 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -32,7 +32,6 @@ #include "sysemu/reset.h" #include "trace.h" #include "qapi/error.h" -#include "migration/migration.h" #include "pci.h" VFIOGroupList vfio_group_list = @@ -621,10 +620,15 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, goto free_container_exit; } + ret = vfio_cpr_register_container(bcontainer, errp); + if (ret) { + goto free_container_exit; + } + ret = vfio_ram_block_discard_disable(container, true); if (ret) { error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken"); - goto free_container_exit; + goto unregister_container_exit; } assert(bcontainer->ops->setup); @@ -667,6 +671,9 @@ listener_release_exit: enable_discards_exit: vfio_ram_block_discard_disable(container, false); +unregister_container_exit: + vfio_cpr_unregister_container(bcontainer); + free_container_exit: g_free(container); @@ -710,6 +717,7 @@ static void vfio_disconnect_container(VFIOGroup *group) vfio_container_destroy(bcontainer); trace_vfio_disconnect_container(container->fd); + vfio_cpr_unregister_container(bcontainer); close(container->fd); g_free(container); @@ -719,6 +727,7 @@ static void vfio_disconnect_container(VFIOGroup *group) static VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp) { + ERRP_GUARD(); VFIOGroup *group; char path[32]; struct vfio_group_status status = { .argsz = sizeof(status) }; diff --git a/hw/vfio/cpr.c b/hw/vfio/cpr.c new file mode 100644 index 0000000000..392c2dd95d --- /dev/null +++ b/hw/vfio/cpr.c @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021-2024 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "hw/vfio/vfio-common.h" +#include "migration/misc.h" +#include "qapi/error.h" +#include "sysemu/runstate.h" + +static int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, + MigrationEvent *e, Error **errp) +{ + if (e->type == MIG_EVENT_PRECOPY_SETUP && + !runstate_check(RUN_STATE_SUSPENDED) && !vm_get_suspended()) { + + error_setg(errp, + "VFIO device only supports cpr-reboot for runstate suspended"); + + return -1; + } + return 0; +} + +int vfio_cpr_register_container(VFIOContainerBase *bcontainer, Error **errp) +{ + migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier, + vfio_cpr_reboot_notifier, + MIG_MODE_CPR_REBOOT); + return 0; +} + +void vfio_cpr_unregister_container(VFIOContainerBase *bcontainer) +{ + migration_remove_notifier(&bcontainer->cpr_reboot_notifier); +} diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c index 6789870802..47b4096c05 100644 --- a/hw/vfio/helpers.c +++ b/hw/vfio/helpers.c @@ -110,6 +110,7 @@ static const char *index_to_str(VFIODevice *vbasedev, int index) int vfio_set_irq_signaling(VFIODevice *vbasedev, int index, int subindex, int action, int fd, Error **errp) { + ERRP_GUARD(); struct vfio_irq_set *irq_set; int argsz, ret = 0; const char *name; @@ -613,6 +614,7 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type) int vfio_device_get_name(VFIODevice *vbasedev, Error **errp) { + ERRP_GUARD(); struct stat st; if (vbasedev->fd < 0) { @@ -644,6 +646,7 @@ int vfio_device_get_name(VFIODevice *vbasedev, Error **errp) void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) { + ERRP_GUARD(); int fd = monitor_fd_param(monitor_cur(), str, errp); if (fd < 0) { diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index 7baf49e6ee..bafddb8f5a 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -412,6 +412,11 @@ found_container: goto err_listener_register; } + ret = vfio_cpr_register_container(bcontainer, errp); + if (ret) { + goto err_listener_register; + } + /* * TODO: examine RAM_BLOCK_DISCARD stuff, should we do group level * for discarding incompatibility check as well? @@ -462,6 +467,7 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev) iommufd_cdev_ram_block_discard_disable(false); } + vfio_cpr_unregister_container(bcontainer); iommufd_cdev_detach_container(vbasedev, container); iommufd_cdev_container_destroy(container); vfio_put_address_space(space); diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build index bb98493b53..bba776f75c 100644 --- a/hw/vfio/meson.build +++ b/hw/vfio/meson.build @@ -5,6 +5,7 @@ vfio_ss.add(files( 'container-base.c', 'container.c', 'migration.c', + 'cpr.c', )) vfio_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr.c')) vfio_ss.add(when: 'CONFIG_IOMMUFD', if_true: files( diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c index 50140eda87..1149c6b374 100644 --- a/hw/vfio/migration.c +++ b/hw/vfio/migration.c @@ -17,14 +17,12 @@ #include "sysemu/runstate.h" #include "hw/vfio/vfio-common.h" -#include "migration/migration.h" -#include "migration/options.h" +#include "migration/misc.h" #include "migration/savevm.h" #include "migration/vmstate.h" #include "migration/qemu-file.h" #include "migration/register.h" #include "migration/blocker.h" -#include "migration/misc.h" #include "qapi/error.h" #include "exec/ramlist.h" #include "exec/ram_addr.h" @@ -505,6 +503,12 @@ static bool vfio_is_active_iterate(void *opaque) return vfio_device_state_is_precopy(vbasedev); } +/* + * Note about migration rate limiting: VFIO migration buffer size is currently + * limited to 1MB, so there is no need to check if migration rate exceeded (as + * in the worst case it will exceed by 1MB). However, if the buffer size is + * later changed to a bigger value, migration rate should be enforced here. + */ static int vfio_save_iterate(QEMUFile *f, void *opaque) { VFIODevice *vbasedev = opaque; @@ -529,11 +533,7 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque) trace_vfio_save_iterate(vbasedev->name, migration->precopy_init_size, migration->precopy_dirty_size); - /* - * A VFIO device's pre-copy dirty_bytes is not guaranteed to reach zero. - * Return 1 so following handlers will not be potentially blocked. - */ - return 1; + return !migration->precopy_init_size && !migration->precopy_dirty_size; } static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) @@ -713,9 +713,7 @@ static void vfio_vmstate_change_prepare(void *opaque, bool running, * Migration should be aborted in this case, but vm_state_notify() * currently does not support reporting failures. */ - if (migrate_get_current()->to_dst_file) { - qemu_file_set_error(migrate_get_current()->to_dst_file, ret); - } + migration_file_set_error(ret); } trace_vfio_vmstate_change_prepare(vbasedev->name, running, @@ -745,9 +743,7 @@ static void vfio_vmstate_change(void *opaque, bool running, RunState state) * Migration should be aborted in this case, but vm_state_notify() * currently does not support reporting failures. */ - if (migrate_get_current()->to_dst_file) { - qemu_file_set_error(migrate_get_current()->to_dst_file, ret); - } + migration_file_set_error(ret); } trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), @@ -889,7 +885,7 @@ static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp) vbasedev->migration_blocker = error_copy(err); error_free(err); - return migrate_add_blocker(&vbasedev->migration_blocker, errp); + return migrate_add_blocker_normal(&vbasedev->migration_blocker, errp); } /* ---------------------------------------------------------------------- */ diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index 84b1a7b948..496fd1ee86 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -1538,6 +1538,7 @@ static bool is_valid_std_cap_offset(uint8_t pos) static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) { + ERRP_GUARD(); PCIDevice *pdev = &vdev->pdev; int ret, pos; bool c8_conflict = false, d4_conflict = false; @@ -1630,6 +1631,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) #define VMD_SHADOW_CAP_LEN 24 static int vfio_add_vmd_shadow_cap(VFIOPCIDevice *vdev, Error **errp) { + ERRP_GUARD(); uint8_t membar_phys[16]; int ret, pos = 0xE8; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index a1522a011a..64780d1b79 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2136,6 +2136,7 @@ static void vfio_check_af_flr(VFIOPCIDevice *vdev, uint8_t pos) static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp) { + ERRP_GUARD(); PCIDevice *pdev = &vdev->pdev; uint8_t cap_id, next, size; int ret; @@ -2942,6 +2943,7 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev) static void vfio_realize(PCIDevice *pdev, Error **errp) { + ERRP_GUARD(); VFIOPCIDevice *vdev = VFIO_PCI(pdev); VFIODevice *vbasedev = &vdev->vbasedev; char *tmp, *subsys; diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index a8d9b7da63..dcd2365fb3 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -576,6 +576,7 @@ static int vfio_base_device_init(VFIODevice *vbasedev, Error **errp) */ static void vfio_platform_realize(DeviceState *dev, Error **errp) { + ERRP_GUARD(); VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev); SysBusDevice *sbdev = SYS_BUS_DEVICE(dev); VFIODevice *vbasedev = &vdev->vbasedev; diff --git a/hw/virtio/vhost-user-scmi.c b/hw/virtio/vhost-user-scmi.c index 918bb7dcf7..300847e672 100644 --- a/hw/virtio/vhost-user-scmi.c +++ b/hw/virtio/vhost-user-scmi.c @@ -56,9 +56,9 @@ static int vu_scmi_start(VirtIODevice *vdev) goto err_host_notifiers; } - vhost_ack_features(&scmi->vhost_dev, feature_bits, vdev->guest_features); + vhost_ack_features(vhost_dev, feature_bits, vdev->guest_features); - ret = vhost_dev_start(&scmi->vhost_dev, vdev, true); + ret = vhost_dev_start(vhost_dev, vdev, true); if (ret < 0) { error_report("Error starting vhost-user-scmi: %d", ret); goto err_guest_notifiers; @@ -71,7 +71,7 @@ static int vu_scmi_start(VirtIODevice *vdev) * enabling/disabling irqfd. */ for (i = 0; i < scmi->vhost_dev.nvqs; i++) { - vhost_virtqueue_mask(&scmi->vhost_dev, vdev, i, false); + vhost_virtqueue_mask(vhost_dev, vdev, i, false); } return 0; diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 9d654efd3d..cdf9af4a4b 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -26,7 +26,6 @@ #include "qemu/sockets.h" #include "sysemu/runstate.h" #include "sysemu/cryptodev.h" -#include "migration/migration.h" #include "migration/postcopy-ram.h" #include "trace.h" #include "exec/ramblock.h" diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c index d5ca0b5a10..3d4a5a97f4 100644 --- a/hw/virtio/vhost-vsock.c +++ b/hw/virtio/vhost-vsock.c @@ -121,6 +121,7 @@ static const VMStateDescription vmstate_virtio_vhost_vsock = { static void vhost_vsock_device_realize(DeviceState *dev, Error **errp) { + ERRP_GUARD(); VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(dev); VirtIODevice *vdev = VIRTIO_DEVICE(dev); VHostVSock *vsock = VHOST_VSOCK(dev); diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 2c9ac79468..2e4e040db8 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -2199,6 +2199,7 @@ int vhost_check_device_state(struct vhost_dev *dev, Error **errp) int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp) { + ERRP_GUARD(); /* Maximum chunk size in which to transfer the state */ const size_t chunk_size = 1 * 1024 * 1024; g_autofree void *transfer_buf = NULL; @@ -2291,6 +2292,7 @@ fail: int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp) { + ERRP_GUARD(); size_t transfer_buf_size = 0; g_autofree void *transfer_buf = NULL; g_autoptr(GError) g_err = NULL; diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index 89f853fa9e..609e39a821 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -31,8 +31,6 @@ #include "trace.h" #include "qemu/error-report.h" #include "migration/misc.h" -#include "migration/migration.h" -#include "migration/options.h" #include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-access.h" diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index e42ac6e7f9..eaaf86402c 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -2095,7 +2095,7 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY; uint8_t *config; uint32_t size; - VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + VirtIODevice *vdev = virtio_bus_get_device(bus); /* * Virtio capabilities present without diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c index 4f956d048e..7f59080ba7 100644 --- a/hw/xen/xen-mapcache.c +++ b/hw/xen/xen-mapcache.c @@ -476,11 +476,37 @@ static void xen_invalidate_map_cache_entry_unlocked(uint8_t *buffer) g_free(entry); } -void xen_invalidate_map_cache_entry(uint8_t *buffer) +typedef struct XenMapCacheData { + Coroutine *co; + uint8_t *buffer; +} XenMapCacheData; + +static void xen_invalidate_map_cache_entry_bh(void *opaque) { + XenMapCacheData *data = opaque; + mapcache_lock(); - xen_invalidate_map_cache_entry_unlocked(buffer); + xen_invalidate_map_cache_entry_unlocked(data->buffer); mapcache_unlock(); + + aio_co_wake(data->co); +} + +void coroutine_mixed_fn xen_invalidate_map_cache_entry(uint8_t *buffer) +{ + if (qemu_in_coroutine()) { + XenMapCacheData data = { + .co = qemu_coroutine_self(), + .buffer = buffer, + }; + aio_bh_schedule_oneshot(qemu_get_current_aio_context(), + xen_invalidate_map_cache_entry_bh, &data); + qemu_coroutine_yield(); + } else { + mapcache_lock(); + xen_invalidate_map_cache_entry_unlocked(buffer); + mapcache_unlock(); + } } void xen_invalidate_map_cache(void) diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index a8edabdabc..3635d1b39f 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -711,7 +711,7 @@ static void xen_pt_destroy(PCIDevice *d) { uint8_t intx; int rc; - if (machine_irq && !xen_host_pci_device_closed(&s->real_device)) { + if (machine_irq && !xen_host_pci_device_closed(host_dev)) { intx = xen_pt_pci_intx(s); rc = xc_domain_unbind_pt_irq(xen_xc, xen_domid, machine_irq, PT_IRQ_TYPE_PCI, @@ -760,8 +760,8 @@ static void xen_pt_destroy(PCIDevice *d) { memory_listener_unregister(&s->io_listener); s->listener_set = false; } - if (!xen_host_pci_device_closed(&s->real_device)) { - xen_host_pci_device_put(&s->real_device); + if (!xen_host_pci_device_closed(host_dev)) { + xen_host_pci_device_put(host_dev); } } /* init */ diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c index ba4cd78238..3edaeab1e3 100644 --- a/hw/xen/xen_pt_config_init.c +++ b/hw/xen/xen_pt_config_init.c @@ -292,7 +292,10 @@ static int xen_pt_header_type_reg_init(XenPCIPassthroughState *s, uint32_t *data) { /* read PCI_HEADER_TYPE */ - *data = reg->init_val | 0x80; + *data = reg->init_val; + if ((PCI_DEVICE(s)->cap_present & QEMU_PCI_CAP_MULTIFUNCTION)) { + *data |= PCI_HEADER_TYPE_MULTI_FUNCTION; + } return 0; } @@ -677,7 +680,7 @@ static XenPTRegInfo xen_pt_emu_reg_header0[] = { .size = 1, .init_val = 0x00, .ro_mask = 0xFF, - .emu_mask = 0x00, + .emu_mask = PCI_HEADER_TYPE_MULTI_FUNCTION, .init = xen_pt_header_type_reg_init, .u.b.read = xen_pt_byte_reg_read, .u.b.write = xen_pt_byte_reg_write, |