diff options
Diffstat (limited to 'hw')
66 files changed, 2617 insertions, 515 deletions
diff --git a/hw/arm/allwinner-h3.c b/hw/arm/allwinner-h3.c index bfce3c8d92..69d0ad6f50 100644 --- a/hw/arm/allwinner-h3.c +++ b/hw/arm/allwinner-h3.c @@ -54,6 +54,8 @@ const hwaddr allwinner_h3_memmap[] = { [AW_H3_DEV_UART2] = 0x01c28800, [AW_H3_DEV_UART3] = 0x01c28c00, [AW_H3_DEV_TWI0] = 0x01c2ac00, + [AW_H3_DEV_TWI1] = 0x01c2b000, + [AW_H3_DEV_TWI2] = 0x01c2b400, [AW_H3_DEV_EMAC] = 0x01c30000, [AW_H3_DEV_DRAMCOM] = 0x01c62000, [AW_H3_DEV_DRAMCTL] = 0x01c63000, @@ -64,6 +66,7 @@ const hwaddr allwinner_h3_memmap[] = { [AW_H3_DEV_GIC_VCPU] = 0x01c86000, [AW_H3_DEV_RTC] = 0x01f00000, [AW_H3_DEV_CPUCFG] = 0x01f01c00, + [AW_H3_DEV_R_TWI] = 0x01f02400, [AW_H3_DEV_SDRAM] = 0x40000000 }; @@ -107,8 +110,6 @@ struct AwH3Unimplemented { { "uart1", 0x01c28400, 1 * KiB }, { "uart2", 0x01c28800, 1 * KiB }, { "uart3", 0x01c28c00, 1 * KiB }, - { "twi1", 0x01c2b000, 1 * KiB }, - { "twi2", 0x01c2b400, 1 * KiB }, { "scr", 0x01c2c400, 1 * KiB }, { "gpu", 0x01c40000, 64 * KiB }, { "hstmr", 0x01c60000, 4 * KiB }, @@ -123,7 +124,6 @@ struct AwH3Unimplemented { { "r_prcm", 0x01f01400, 1 * KiB }, { "r_twd", 0x01f01800, 1 * KiB }, { "r_cir-rx", 0x01f02000, 1 * KiB }, - { "r_twi", 0x01f02400, 1 * KiB }, { "r_uart", 0x01f02800, 1 * KiB }, { "r_pio", 0x01f02c00, 1 * KiB }, { "r_pwm", 0x01f03800, 1 * KiB }, @@ -151,8 +151,11 @@ enum { AW_H3_GIC_SPI_UART2 = 2, AW_H3_GIC_SPI_UART3 = 3, AW_H3_GIC_SPI_TWI0 = 6, + AW_H3_GIC_SPI_TWI1 = 7, + AW_H3_GIC_SPI_TWI2 = 8, AW_H3_GIC_SPI_TIMER0 = 18, AW_H3_GIC_SPI_TIMER1 = 19, + AW_H3_GIC_SPI_R_TWI = 44, AW_H3_GIC_SPI_MMC0 = 60, AW_H3_GIC_SPI_EHCI0 = 72, AW_H3_GIC_SPI_OHCI0 = 73, @@ -227,7 +230,10 @@ static void allwinner_h3_init(Object *obj) object_initialize_child(obj, "rtc", &s->rtc, TYPE_AW_RTC_SUN6I); - object_initialize_child(obj, "twi0", &s->i2c0, TYPE_AW_I2C); + object_initialize_child(obj, "twi0", &s->i2c0, TYPE_AW_I2C_SUN6I); + object_initialize_child(obj, "twi1", &s->i2c1, TYPE_AW_I2C_SUN6I); + object_initialize_child(obj, "twi2", &s->i2c2, TYPE_AW_I2C_SUN6I); + object_initialize_child(obj, "r_twi", &s->r_twi, TYPE_AW_I2C_SUN6I); } static void allwinner_h3_realize(DeviceState *dev, Error **errp) @@ -432,6 +438,21 @@ static void allwinner_h3_realize(DeviceState *dev, Error **errp) sysbus_connect_irq(SYS_BUS_DEVICE(&s->i2c0), 0, qdev_get_gpio_in(DEVICE(&s->gic), AW_H3_GIC_SPI_TWI0)); + sysbus_realize(SYS_BUS_DEVICE(&s->i2c1), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(&s->i2c1), 0, s->memmap[AW_H3_DEV_TWI1]); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->i2c1), 0, + qdev_get_gpio_in(DEVICE(&s->gic), AW_H3_GIC_SPI_TWI1)); + + sysbus_realize(SYS_BUS_DEVICE(&s->i2c2), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(&s->i2c2), 0, s->memmap[AW_H3_DEV_TWI2]); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->i2c2), 0, + qdev_get_gpio_in(DEVICE(&s->gic), AW_H3_GIC_SPI_TWI2)); + + sysbus_realize(SYS_BUS_DEVICE(&s->r_twi), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(&s->r_twi), 0, s->memmap[AW_H3_DEV_R_TWI]); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->r_twi), 0, + qdev_get_gpio_in(DEVICE(&s->gic), AW_H3_GIC_SPI_R_TWI)); + /* Unimplemented devices */ for (i = 0; i < ARRAY_SIZE(unimplemented); i++) { create_unimplemented_device(unimplemented[i].device_name, diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c index 27dda58338..86601cb1a5 100644 --- a/hw/arm/aspeed.c +++ b/hw/arm/aspeed.c @@ -241,12 +241,9 @@ static void aspeed_reset_secondary(ARMCPU *cpu, cpu_set_pc(cs, info->smp_loader_start); } -#define FIRMWARE_ADDR 0x0 - -static void write_boot_rom(DriveInfo *dinfo, hwaddr addr, size_t rom_size, +static void write_boot_rom(BlockBackend *blk, hwaddr addr, size_t rom_size, Error **errp) { - BlockBackend *blk = blk_by_legacy_dinfo(dinfo); g_autofree void *storage = NULL; int64_t size; @@ -272,6 +269,22 @@ static void write_boot_rom(DriveInfo *dinfo, hwaddr addr, size_t rom_size, rom_add_blob_fixed("aspeed.boot_rom", storage, rom_size, addr); } +/* + * Create a ROM and copy the flash contents at the expected address + * (0x0). Boots faster than execute-in-place. + */ +static void aspeed_install_boot_rom(AspeedSoCState *soc, BlockBackend *blk, + uint64_t rom_size) +{ + MemoryRegion *boot_rom = g_new(MemoryRegion, 1); + + memory_region_init_rom(boot_rom, NULL, "aspeed.boot_rom", rom_size, + &error_abort); + memory_region_add_subregion_overlap(&soc->spi_boot_container, 0, + boot_rom, 1); + write_boot_rom(blk, ASPEED_SOC_SPI_BOOT_ADDR, rom_size, &error_abort); +} + void aspeed_board_init_flashes(AspeedSMCState *s, const char *flashtype, unsigned int count, int unit0) { @@ -293,7 +306,7 @@ void aspeed_board_init_flashes(AspeedSMCState *s, const char *flashtype, qdev_realize_and_unref(dev, BUS(s->spi), &error_fatal); cs_line = qdev_get_gpio_in_named(dev, SSI_GPIO_CS, 0); - sysbus_connect_irq(SYS_BUS_DEVICE(s), i + 1, cs_line); + qdev_connect_gpio_out_named(DEVICE(s), "cs", i, cs_line); } } @@ -332,7 +345,6 @@ static void aspeed_machine_init(MachineState *machine) AspeedMachineState *bmc = ASPEED_MACHINE(machine); AspeedMachineClass *amc = ASPEED_MACHINE_GET_CLASS(machine); AspeedSoCClass *sc; - DriveInfo *drive0 = drive_get(IF_MTD, 0, 0); int i; NICInfo *nd = &nd_table[0]; @@ -382,32 +394,6 @@ static void aspeed_machine_init(MachineState *machine) bmc->spi_model ? bmc->spi_model : amc->spi_model, 1, amc->num_cs); - /* Install first FMC flash content as a boot rom. */ - if (drive0) { - AspeedSMCFlash *fl = &bmc->soc.fmc.flashes[0]; - MemoryRegion *boot_rom = g_new(MemoryRegion, 1); - uint64_t size = memory_region_size(&fl->mmio); - - /* - * create a ROM region using the default mapping window size of - * the flash module. The window size is 64MB for the AST2400 - * SoC and 128MB for the AST2500 SoC, which is twice as big as - * needed by the flash modules of the Aspeed machines. - */ - if (ASPEED_MACHINE(machine)->mmio_exec) { - memory_region_init_alias(boot_rom, NULL, "aspeed.boot_rom", - &fl->mmio, 0, size); - memory_region_add_subregion(get_system_memory(), FIRMWARE_ADDR, - boot_rom); - } else { - memory_region_init_rom(boot_rom, NULL, "aspeed.boot_rom", - size, &error_abort); - memory_region_add_subregion(get_system_memory(), FIRMWARE_ADDR, - boot_rom); - write_boot_rom(drive0, FIRMWARE_ADDR, size, &error_abort); - } - } - if (machine->kernel_filename && sc->num_cpus > 1) { /* With no u-boot we must set up a boot stub for the secondary CPU */ MemoryRegion *smpboot = g_new(MemoryRegion, 1); @@ -438,6 +424,16 @@ static void aspeed_machine_init(MachineState *machine) drive_get(IF_SD, 0, bmc->soc.sdhci.num_slots)); } + if (!bmc->mmio_exec) { + DriveInfo *mtd0 = drive_get(IF_MTD, 0, 0); + + if (mtd0) { + uint64_t rom_size = memory_region_size(&bmc->soc.spi_boot); + aspeed_install_boot_rom(&bmc->soc, blk_by_legacy_dinfo(mtd0), + rom_size); + } + } + arm_load_kernel(ARM_CPU(first_cpu), machine, &aspeed_board_binfo); } @@ -521,6 +517,15 @@ static void ast2600_evb_i2c_init(AspeedMachineState *bmc) TYPE_TMP105, 0x4d); } +static void yosemitev2_bmc_i2c_init(AspeedMachineState *bmc) +{ + AspeedSoCState *soc = &bmc->soc; + + at24c_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 4), 0x51, 128 * KiB); + at24c_eeprom_init_rom(aspeed_i2c_get_bus(&soc->i2c, 8), 0x51, 128 * KiB, + yosemitev2_bmc_fruid, yosemitev2_bmc_fruid_len); +} + static void romulus_bmc_i2c_init(AspeedMachineState *bmc) { AspeedSoCState *soc = &bmc->soc; @@ -530,6 +535,15 @@ static void romulus_bmc_i2c_init(AspeedMachineState *bmc) i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 11), "ds1338", 0x32); } +static void tiogapass_bmc_i2c_init(AspeedMachineState *bmc) +{ + AspeedSoCState *soc = &bmc->soc; + + at24c_eeprom_init(aspeed_i2c_get_bus(&soc->i2c, 4), 0x54, 128 * KiB); + at24c_eeprom_init_rom(aspeed_i2c_get_bus(&soc->i2c, 6), 0x54, 128 * KiB, + tiogapass_bmc_fruid, tiogapass_bmc_fruid_len); +} + static void create_pca9552(AspeedSoCState *soc, int bus_id, int addr) { i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, bus_id), @@ -840,42 +854,46 @@ static void fuji_bmc_i2c_init(AspeedMachineState *bmc) i2c_slave_create_simple(i2c[17], TYPE_LM75, 0x4c); i2c_slave_create_simple(i2c[17], TYPE_LM75, 0x4d); - at24c_eeprom_init(i2c[19], 0x52, 64 * KiB); - at24c_eeprom_init(i2c[20], 0x50, 2 * KiB); - at24c_eeprom_init(i2c[22], 0x52, 2 * KiB); + /* + * EEPROM 24c64 size is 64Kbits or 8 Kbytes + * 24c02 size is 2Kbits or 256 bytes + */ + at24c_eeprom_init(i2c[19], 0x52, 8 * KiB); + at24c_eeprom_init(i2c[20], 0x50, 256); + at24c_eeprom_init(i2c[22], 0x52, 256); i2c_slave_create_simple(i2c[3], TYPE_LM75, 0x48); i2c_slave_create_simple(i2c[3], TYPE_LM75, 0x49); i2c_slave_create_simple(i2c[3], TYPE_LM75, 0x4a); i2c_slave_create_simple(i2c[3], TYPE_TMP422, 0x4c); - at24c_eeprom_init(i2c[8], 0x51, 64 * KiB); + at24c_eeprom_init(i2c[8], 0x51, 8 * KiB); i2c_slave_create_simple(i2c[8], TYPE_LM75, 0x4a); i2c_slave_create_simple(i2c[50], TYPE_LM75, 0x4c); - at24c_eeprom_init(i2c[50], 0x52, 64 * KiB); + at24c_eeprom_init(i2c[50], 0x52, 8 * KiB); i2c_slave_create_simple(i2c[51], TYPE_TMP75, 0x48); i2c_slave_create_simple(i2c[52], TYPE_TMP75, 0x49); i2c_slave_create_simple(i2c[59], TYPE_TMP75, 0x48); i2c_slave_create_simple(i2c[60], TYPE_TMP75, 0x49); - at24c_eeprom_init(i2c[65], 0x53, 64 * KiB); + at24c_eeprom_init(i2c[65], 0x53, 8 * KiB); i2c_slave_create_simple(i2c[66], TYPE_TMP75, 0x49); i2c_slave_create_simple(i2c[66], TYPE_TMP75, 0x48); - at24c_eeprom_init(i2c[68], 0x52, 64 * KiB); - at24c_eeprom_init(i2c[69], 0x52, 64 * KiB); - at24c_eeprom_init(i2c[70], 0x52, 64 * KiB); - at24c_eeprom_init(i2c[71], 0x52, 64 * KiB); + at24c_eeprom_init(i2c[68], 0x52, 8 * KiB); + at24c_eeprom_init(i2c[69], 0x52, 8 * KiB); + at24c_eeprom_init(i2c[70], 0x52, 8 * KiB); + at24c_eeprom_init(i2c[71], 0x52, 8 * KiB); - at24c_eeprom_init(i2c[73], 0x53, 64 * KiB); + at24c_eeprom_init(i2c[73], 0x53, 8 * KiB); i2c_slave_create_simple(i2c[74], TYPE_TMP75, 0x49); i2c_slave_create_simple(i2c[74], TYPE_TMP75, 0x48); - at24c_eeprom_init(i2c[76], 0x52, 64 * KiB); - at24c_eeprom_init(i2c[77], 0x52, 64 * KiB); - at24c_eeprom_init(i2c[78], 0x52, 64 * KiB); - at24c_eeprom_init(i2c[79], 0x52, 64 * KiB); - at24c_eeprom_init(i2c[28], 0x50, 2 * KiB); + at24c_eeprom_init(i2c[76], 0x52, 8 * KiB); + at24c_eeprom_init(i2c[77], 0x52, 8 * KiB); + at24c_eeprom_init(i2c[78], 0x52, 8 * KiB); + at24c_eeprom_init(i2c[79], 0x52, 8 * KiB); + at24c_eeprom_init(i2c[28], 0x50, 256); for (int i = 0; i < 8; i++) { at24c_eeprom_init(i2c[81 + i * 8], 0x56, 64 * KiB); @@ -1174,6 +1192,24 @@ static void aspeed_machine_ast2500_evb_class_init(ObjectClass *oc, void *data) aspeed_soc_num_cpus(amc->soc_name); }; +static void aspeed_machine_yosemitev2_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc); + + mc->desc = "Facebook YosemiteV2 BMC (ARM1176)"; + amc->soc_name = "ast2500-a1"; + amc->hw_strap1 = AST2500_EVB_HW_STRAP1; + amc->hw_strap2 = 0; + amc->fmc_model = "n25q256a"; + amc->spi_model = "mx25l25635e"; + amc->num_cs = 2; + amc->i2c_init = yosemitev2_bmc_i2c_init; + mc->default_ram_size = 512 * MiB; + mc->default_cpus = mc->min_cpus = mc->max_cpus = + aspeed_soc_num_cpus(amc->soc_name); +}; + static void aspeed_machine_romulus_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); @@ -1191,6 +1227,25 @@ static void aspeed_machine_romulus_class_init(ObjectClass *oc, void *data) aspeed_soc_num_cpus(amc->soc_name); }; +static void aspeed_machine_tiogapass_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc); + + mc->desc = "Facebook Tiogapass BMC (ARM1176)"; + amc->soc_name = "ast2500-a1"; + amc->hw_strap1 = AST2500_EVB_HW_STRAP1; + amc->hw_strap2 = 0; + amc->fmc_model = "n25q256a"; + amc->spi_model = "mx25l25635e"; + amc->num_cs = 2; + amc->i2c_init = tiogapass_bmc_i2c_init; + mc->default_ram_size = 1 * GiB; + mc->default_cpus = mc->min_cpus = mc->max_cpus = + aspeed_soc_num_cpus(amc->soc_name); + aspeed_soc_num_cpus(amc->soc_name); +}; + static void aspeed_machine_sonorapass_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); @@ -1563,10 +1618,18 @@ static const TypeInfo aspeed_machine_types[] = { .parent = TYPE_ASPEED_MACHINE, .class_init = aspeed_machine_ast2600_evb_class_init, }, { + .name = MACHINE_TYPE_NAME("yosemitev2-bmc"), + .parent = TYPE_ASPEED_MACHINE, + .class_init = aspeed_machine_yosemitev2_class_init, + }, { .name = MACHINE_TYPE_NAME("tacoma-bmc"), .parent = TYPE_ASPEED_MACHINE, .class_init = aspeed_machine_tacoma_class_init, }, { + .name = MACHINE_TYPE_NAME("tiogapass-bmc"), + .parent = TYPE_ASPEED_MACHINE, + .class_init = aspeed_machine_tiogapass_class_init, + }, { .name = MACHINE_TYPE_NAME("g220a-bmc"), .parent = TYPE_ASPEED_MACHINE, .class_init = aspeed_machine_g220a_class_init, diff --git a/hw/arm/aspeed_ast2600.c b/hw/arm/aspeed_ast2600.c index bb2769df04..1bf1246148 100644 --- a/hw/arm/aspeed_ast2600.c +++ b/hw/arm/aspeed_ast2600.c @@ -21,6 +21,7 @@ #define ASPEED_SOC_DPMCU_SIZE 0x00040000 static const hwaddr aspeed_soc_ast2600_memmap[] = { + [ASPEED_DEV_SPI_BOOT] = ASPEED_SOC_SPI_BOOT_ADDR, [ASPEED_DEV_SRAM] = 0x10000000, [ASPEED_DEV_DPMCU] = 0x18000000, /* 0x16000000 0x17FFFFFF : AHB BUS do LPC Bus bridge */ @@ -282,6 +283,12 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp) qemu_irq irq; g_autofree char *sram_name = NULL; + /* Default boot region (SPI memory or ROMs) */ + memory_region_init(&s->spi_boot_container, OBJECT(s), + "aspeed.spi_boot_container", 0x10000000); + memory_region_add_subregion(s->memory, sc->memmap[ASPEED_DEV_SPI_BOOT], + &s->spi_boot_container); + /* IO space */ aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&s->iomem), "aspeed.io", sc->memmap[ASPEED_DEV_IOMEM], @@ -431,6 +438,12 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp) sysbus_connect_irq(SYS_BUS_DEVICE(&s->fmc), 0, aspeed_soc_get_irq(s, ASPEED_DEV_FMC)); + /* Set up an alias on the FMC CE0 region (boot default) */ + MemoryRegion *fmc0_mmio = &s->fmc.flashes[0].mmio; + memory_region_init_alias(&s->spi_boot, OBJECT(s), "aspeed.spi_boot", + fmc0_mmio, 0, memory_region_size(fmc0_mmio)); + memory_region_add_subregion(&s->spi_boot_container, 0x0, &s->spi_boot); + /* SPI */ for (i = 0; i < sc->spis_num; i++) { object_property_set_link(OBJECT(&s->spi[i]), "dram", diff --git a/hw/arm/aspeed_eeprom.c b/hw/arm/aspeed_eeprom.c index 04463acc9d..2fb2d5dbb7 100644 --- a/hw/arm/aspeed_eeprom.c +++ b/hw/arm/aspeed_eeprom.c @@ -6,6 +6,27 @@ #include "aspeed_eeprom.h" +/* Tiogapass BMC FRU */ +const uint8_t tiogapass_bmc_fruid[] = { + 0x01, 0x00, 0x00, 0x01, 0x0d, 0x00, 0x00, 0xf1, 0x01, 0x0c, 0x00, 0x36, + 0xe6, 0xd0, 0xc6, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xd2, 0x42, 0x4d, + 0x43, 0x20, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x20, 0x4d, 0x6f, + 0x64, 0x75, 0x6c, 0x65, 0xcd, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, + 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xce, 0x58, 0x58, 0x58, 0x58, 0x58, + 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xc3, 0x31, 0x2e, + 0x30, 0xc9, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xd2, + 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, + 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xc1, 0x39, 0x01, 0x0c, 0x00, 0xc6, + 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xd2, 0x54, 0x69, 0x6f, 0x67, 0x61, + 0x20, 0x50, 0x61, 0x73, 0x73, 0x20, 0x53, 0x69, 0x6e, 0x67, 0x6c, 0x65, + 0x32, 0xce, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, + 0x58, 0x58, 0x58, 0x58, 0xc4, 0x58, 0x58, 0x58, 0x32, 0xcd, 0x58, 0x58, + 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xc7, + 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xc3, 0x31, 0x2e, 0x30, 0xc9, + 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xc8, 0x43, 0x6f, + 0x6e, 0x66, 0x69, 0x67, 0x20, 0x41, 0xc1, 0x45, +}; + const uint8_t fby35_nic_fruid[] = { 0x01, 0x00, 0x00, 0x01, 0x0f, 0x20, 0x00, 0xcf, 0x01, 0x0e, 0x19, 0xd7, 0x5e, 0xcf, 0xc8, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xdd, @@ -77,6 +98,30 @@ const uint8_t fby35_bmc_fruid[] = { 0x6e, 0x66, 0x69, 0x67, 0x20, 0x41, 0xc1, 0x45, }; +/* Yosemite V2 BMC FRU */ +const uint8_t yosemitev2_bmc_fruid[] = { + 0x01, 0x00, 0x00, 0x01, 0x0d, 0x00, 0x00, 0xf1, 0x01, 0x0c, 0x00, 0x36, + 0xe6, 0xd0, 0xc6, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xd2, 0x42, 0x4d, + 0x43, 0x20, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x20, 0x4d, 0x6f, + 0x64, 0x75, 0x6c, 0x65, 0xcd, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, + 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xce, 0x58, 0x58, 0x58, 0x58, 0x58, + 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xc3, 0x31, 0x2e, + 0x30, 0xc9, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xd2, + 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, + 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xc1, 0x39, 0x01, 0x0c, 0x00, 0xc6, + 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xd2, 0x59, 0x6f, 0x73, 0x65, 0x6d, + 0x69, 0x74, 0x65, 0x20, 0x56, 0x32, 0x2e, 0x30, 0x20, 0x45, 0x56, 0x54, + 0x32, 0xce, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, + 0x58, 0x58, 0x58, 0x58, 0xc4, 0x45, 0x56, 0x54, 0x32, 0xcd, 0x58, 0x58, + 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xc7, + 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xc3, 0x31, 0x2e, 0x30, 0xc9, + 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0x58, 0xc8, 0x43, 0x6f, + 0x6e, 0x66, 0x69, 0x67, 0x20, 0x41, 0xc1, 0x45, +}; + +const size_t tiogapass_bmc_fruid_len = sizeof(tiogapass_bmc_fruid); const size_t fby35_nic_fruid_len = sizeof(fby35_nic_fruid); const size_t fby35_bb_fruid_len = sizeof(fby35_bb_fruid); const size_t fby35_bmc_fruid_len = sizeof(fby35_bmc_fruid); + +const size_t yosemitev2_bmc_fruid_len = sizeof(yosemitev2_bmc_fruid); diff --git a/hw/arm/aspeed_eeprom.h b/hw/arm/aspeed_eeprom.h index a0f848fa6e..86db6f0479 100644 --- a/hw/arm/aspeed_eeprom.h +++ b/hw/arm/aspeed_eeprom.h @@ -9,6 +9,9 @@ #include "qemu/osdep.h" +extern const uint8_t tiogapass_bmc_fruid[]; +extern const size_t tiogapass_bmc_fruid_len; + extern const uint8_t fby35_nic_fruid[]; extern const uint8_t fby35_bb_fruid[]; extern const uint8_t fby35_bmc_fruid[]; @@ -16,4 +19,7 @@ extern const size_t fby35_nic_fruid_len; extern const size_t fby35_bb_fruid_len; extern const size_t fby35_bmc_fruid_len; +extern const uint8_t yosemitev2_bmc_fruid[]; +extern const size_t yosemitev2_bmc_fruid_len; + #endif diff --git a/hw/arm/aspeed_soc.c b/hw/arm/aspeed_soc.c index e884d6badc..bf22258de9 100644 --- a/hw/arm/aspeed_soc.c +++ b/hw/arm/aspeed_soc.c @@ -25,6 +25,7 @@ #define ASPEED_SOC_IOMEM_SIZE 0x00200000 static const hwaddr aspeed_soc_ast2400_memmap[] = { + [ASPEED_DEV_SPI_BOOT] = ASPEED_SOC_SPI_BOOT_ADDR, [ASPEED_DEV_IOMEM] = 0x1E600000, [ASPEED_DEV_FMC] = 0x1E620000, [ASPEED_DEV_SPI1] = 0x1E630000, @@ -59,6 +60,7 @@ static const hwaddr aspeed_soc_ast2400_memmap[] = { }; static const hwaddr aspeed_soc_ast2500_memmap[] = { + [ASPEED_DEV_SPI_BOOT] = ASPEED_SOC_SPI_BOOT_ADDR, [ASPEED_DEV_IOMEM] = 0x1E600000, [ASPEED_DEV_FMC] = 0x1E620000, [ASPEED_DEV_SPI1] = 0x1E630000, @@ -245,6 +247,12 @@ static void aspeed_soc_realize(DeviceState *dev, Error **errp) Error *err = NULL; g_autofree char *sram_name = NULL; + /* Default boot region (SPI memory or ROMs) */ + memory_region_init(&s->spi_boot_container, OBJECT(s), + "aspeed.spi_boot_container", 0x10000000); + memory_region_add_subregion(s->memory, sc->memmap[ASPEED_DEV_SPI_BOOT], + &s->spi_boot_container); + /* IO space */ aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&s->iomem), "aspeed.io", sc->memmap[ASPEED_DEV_IOMEM], @@ -354,6 +362,12 @@ static void aspeed_soc_realize(DeviceState *dev, Error **errp) sysbus_connect_irq(SYS_BUS_DEVICE(&s->fmc), 0, aspeed_soc_get_irq(s, ASPEED_DEV_FMC)); + /* Set up an alias on the FMC CE0 region (boot default) */ + MemoryRegion *fmc0_mmio = &s->fmc.flashes[0].mmio; + memory_region_init_alias(&s->spi_boot, OBJECT(s), "aspeed.spi_boot", + fmc0_mmio, 0, memory_region_size(fmc0_mmio)); + memory_region_add_subregion(&s->spi_boot_container, 0x0, &s->spi_boot); + /* SPI */ for (i = 0; i < sc->spis_num; i++) { if (!sysbus_realize(SYS_BUS_DEVICE(&s->spi[i]), errp)) { diff --git a/hw/arm/boot.c b/hw/arm/boot.c index 1e021c4a34..50e5141116 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -926,6 +926,12 @@ static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base, return -1; } size = len; + + /* Unpack the image if it is a EFI zboot image */ + if (unpack_efi_zboot_image(&buffer, &size) < 0) { + g_free(buffer); + return -1; + } } /* check the arm64 magic header value -- very old kernels may not have it */ diff --git a/hw/arm/fby35.c b/hw/arm/fby35.c index 90c04bbc33..f4600c290b 100644 --- a/hw/arm/fby35.c +++ b/hw/arm/fby35.c @@ -100,13 +100,7 @@ static void fby35_bmc_init(Fby35State *s) MemoryRegion *boot_rom = g_new(MemoryRegion, 1); uint64_t size = memory_region_size(&fl->mmio); - if (s->mmio_exec) { - memory_region_init_alias(boot_rom, NULL, "aspeed.boot_rom", - &fl->mmio, 0, size); - memory_region_add_subregion(&s->bmc_memory, FBY35_BMC_FIRMWARE_ADDR, - boot_rom); - } else { - + if (!s->mmio_exec) { memory_region_init_rom(boot_rom, NULL, "aspeed.boot_rom", size, &error_abort); memory_region_add_subregion(&s->bmc_memory, FBY35_BMC_FIRMWARE_ADDR, diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c index 0a5a60ca1e..e7f1c1f219 100644 --- a/hw/arm/smmu-common.c +++ b/hw/arm/smmu-common.c @@ -467,20 +467,6 @@ IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid) return NULL; } -/* Unmap the whole notifier's range */ -static void smmu_unmap_notifier_range(IOMMUNotifier *n) -{ - IOMMUTLBEvent event; - - event.type = IOMMU_NOTIFIER_UNMAP; - event.entry.target_as = &address_space_memory; - event.entry.iova = n->start; - event.entry.perm = IOMMU_NONE; - event.entry.addr_mask = n->end - n->start; - - memory_region_notify_iommu_one(n, &event); -} - /* Unmap all notifiers attached to @mr */ static void smmu_inv_notifiers_mr(IOMMUMemoryRegion *mr) { @@ -488,7 +474,7 @@ static void smmu_inv_notifiers_mr(IOMMUMemoryRegion *mr) trace_smmu_inv_notifiers_mr(mr->parent_obj.name); IOMMU_NOTIFIER_FOREACH(n, mr) { - smmu_unmap_notifier_range(n); + memory_region_unmap_iommu_notifier_range(n); } } diff --git a/hw/core/loader.c b/hw/core/loader.c index 173f8f67f6..cd53235fed 100644 --- a/hw/core/loader.c +++ b/hw/core/loader.c @@ -857,6 +857,97 @@ ssize_t load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz) return bytes; } +/* The PE/COFF MS-DOS stub magic number */ +#define EFI_PE_MSDOS_MAGIC "MZ" + +/* + * The Linux header magic number for a EFI PE/COFF + * image targetting an unspecified architecture. + */ +#define EFI_PE_LINUX_MAGIC "\xcd\x23\x82\x81" + +/* + * Bootable Linux kernel images may be packaged as EFI zboot images, which are + * self-decompressing executables when loaded via EFI. The compressed payload + * can also be extracted from the image and decompressed by a non-EFI loader. + * + * The de facto specification for this format is at the following URL: + * + * https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/firmware/efi/libstub/zboot-header.S + * + * This definition is based on Linux upstream commit 29636a5ce87beba. + */ +struct linux_efi_zboot_header { + uint8_t msdos_magic[2]; /* PE/COFF 'MZ' magic number */ + uint8_t reserved0[2]; + uint8_t zimg[4]; /* "zimg" for Linux EFI zboot images */ + uint32_t payload_offset; /* LE offset to compressed payload */ + uint32_t payload_size; /* LE size of the compressed payload */ + uint8_t reserved1[8]; + char compression_type[32]; /* Compression type, NUL terminated */ + uint8_t linux_magic[4]; /* Linux header magic */ + uint32_t pe_header_offset; /* LE offset to the PE header */ +}; + +/* + * Check whether *buffer points to a Linux EFI zboot image in memory. + * + * If it does, attempt to decompress it to a new buffer, and free the old one. + * If any of this fails, return an error to the caller. + * + * If the image is not a Linux EFI zboot image, do nothing and return success. + */ +ssize_t unpack_efi_zboot_image(uint8_t **buffer, int *size) +{ + const struct linux_efi_zboot_header *header; + uint8_t *data = NULL; + int ploff, plsize; + ssize_t bytes; + + /* ignore if this is too small to be a EFI zboot image */ + if (*size < sizeof(*header)) { + return 0; + } + + header = (struct linux_efi_zboot_header *)*buffer; + + /* ignore if this is not a Linux EFI zboot image */ + if (memcmp(&header->msdos_magic, EFI_PE_MSDOS_MAGIC, 2) != 0 || + memcmp(&header->zimg, "zimg", 4) != 0 || + memcmp(&header->linux_magic, EFI_PE_LINUX_MAGIC, 4) != 0) { + return 0; + } + + if (strcmp(header->compression_type, "gzip") != 0) { + fprintf(stderr, + "unable to handle EFI zboot image with \"%.*s\" compression\n", + (int)sizeof(header->compression_type) - 1, + header->compression_type); + return -1; + } + + ploff = ldl_le_p(&header->payload_offset); + plsize = ldl_le_p(&header->payload_size); + + if (ploff < 0 || plsize < 0 || ploff + plsize > *size) { + fprintf(stderr, "unable to handle corrupt EFI zboot image\n"); + return -1; + } + + data = g_malloc(LOAD_IMAGE_MAX_GUNZIP_BYTES); + bytes = gunzip(data, LOAD_IMAGE_MAX_GUNZIP_BYTES, *buffer + ploff, plsize); + if (bytes < 0) { + fprintf(stderr, "failed to decompress EFI zboot image\n"); + g_free(data); + return -1; + } + + g_free(*buffer); + *buffer = g_realloc(data, bytes); + *size = bytes; + return bytes; +} + /* * Functions for reboot-persistent memory regions. * - used for vga bios and option roms. diff --git a/hw/core/machine.c b/hw/core/machine.c index f29e700ee4..1cf6822e06 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -47,6 +47,8 @@ const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); GlobalProperty hw_compat_7_1[] = { { "virtio-device", "queue_reset", "false" }, { "virtio-rng-pci", "vectors", "0" }, + { "virtio-rng-pci-transitional", "vectors", "0" }, + { "virtio-rng-pci-non-transitional", "vectors", "0" }, }; const size_t hw_compat_7_1_len = G_N_ELEMENTS(hw_compat_7_1); diff --git a/hw/cxl/cxl-device-utils.c b/hw/cxl/cxl-device-utils.c index 83ce7a8270..4c5e88aaf5 100644 --- a/hw/cxl/cxl-device-utils.c +++ b/hw/cxl/cxl-device-utils.c @@ -267,5 +267,5 @@ void cxl_device_register_init_common(CXLDeviceState *cxl_dstate) cxl_device_cap_init(cxl_dstate, MEMORY_DEVICE, 0x4000); memdev_reg_init_common(cxl_dstate); - assert(cxl_initialize_mailbox(cxl_dstate) == 0); + cxl_initialize_mailbox(cxl_dstate); } diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index bc1bb18844..206e04a4b8 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -12,8 +12,11 @@ #include "hw/pci/pci.h" #include "qemu/cutils.h" #include "qemu/log.h" +#include "qemu/units.h" #include "qemu/uuid.h" +#define CXL_CAPACITY_MULTIPLIER (256 * MiB) + /* * How to add a new command, example. The command set FOO, with cmd BAR. * 1. Add the command set and cmd to the enum. @@ -138,7 +141,7 @@ static ret_code cmd_firmware_update_get_info(struct cxl_cmd *cmd, } QEMU_PACKED *fw_info; QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50); - if (cxl_dstate->pmem_size < (256 << 20)) { + if (cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER) { return CXL_MBOX_INTERNAL_ERROR; } @@ -190,7 +193,11 @@ static ret_code cmd_timestamp_set(struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } -static QemuUUID cel_uuid; +/* CXL 3.0 8.2.9.5.2.1 Command Effects Log (CEL) */ +static const QemuUUID cel_uuid = { + .data = UUID(0x0da9c0b5, 0xbf41, 0x4b78, 0x8f, 0x79, + 0x96, 0xb1, 0x62, 0x3b, 0x3f, 0x17) +}; /* 8.2.9.4.1 */ static ret_code cmd_logs_get_supported(struct cxl_cmd *cmd, @@ -283,7 +290,7 @@ static ret_code cmd_identify_memory_device(struct cxl_cmd *cmd, CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d); uint64_t size = cxl_dstate->pmem_size; - if (!QEMU_IS_ALIGNED(size, 256 << 20)) { + if (!QEMU_IS_ALIGNED(size, CXL_CAPACITY_MULTIPLIER)) { return CXL_MBOX_INTERNAL_ERROR; } @@ -293,8 +300,8 @@ static ret_code cmd_identify_memory_device(struct cxl_cmd *cmd, /* PMEM only */ snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0); - id->total_capacity = size / (256 << 20); - id->persistent_capacity = size / (256 << 20); + id->total_capacity = size / CXL_CAPACITY_MULTIPLIER; + id->persistent_capacity = size / CXL_CAPACITY_MULTIPLIER; id->lsa_size = cvc->get_lsa_size(ct3d); *len = sizeof(*id); @@ -314,14 +321,14 @@ static ret_code cmd_ccls_get_partition_info(struct cxl_cmd *cmd, QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20); uint64_t size = cxl_dstate->pmem_size; - if (!QEMU_IS_ALIGNED(size, 256 << 20)) { + if (!QEMU_IS_ALIGNED(size, CXL_CAPACITY_MULTIPLIER)) { return CXL_MBOX_INTERNAL_ERROR; } /* PMEM only */ part_info->active_vmem = 0; part_info->next_vmem = 0; - part_info->active_pmem = size / (256 << 20); + part_info->active_pmem = size / CXL_CAPACITY_MULTIPLIER; part_info->next_pmem = 0; *len = sizeof(*part_info); @@ -455,11 +462,8 @@ void cxl_process_mailbox(CXLDeviceState *cxl_dstate) DOORBELL, 0); } -int cxl_initialize_mailbox(CXLDeviceState *cxl_dstate) +void cxl_initialize_mailbox(CXLDeviceState *cxl_dstate) { - /* CXL 2.0: Table 169 Get Supported Logs Log Entry */ - const char *cel_uuidstr = "0da9c0b5-bf41-4b78-8f79-96b1623b3f17"; - for (int set = 0; set < 256; set++) { for (int cmd = 0; cmd < 256; cmd++) { if (cxl_cmd_set[set][cmd].handler) { @@ -473,6 +477,4 @@ int cxl_initialize_mailbox(CXLDeviceState *cxl_dstate) } } } - - return qemu_uuid_parse(cel_uuidstr, &cel_uuid); } diff --git a/hw/i2c/allwinner-i2c.c b/hw/i2c/allwinner-i2c.c index a435965836..f24c3ac6f0 100644 --- a/hw/i2c/allwinner-i2c.c +++ b/hw/i2c/allwinner-i2c.c @@ -357,10 +357,16 @@ static void allwinner_i2c_write(void *opaque, hwaddr offset, s->stat = STAT_FROM_STA(STAT_IDLE); s->cntr &= ~TWI_CNTR_M_STP; } - if ((s->cntr & TWI_CNTR_INT_FLAG) == 0) { - /* Interrupt flag cleared */ + + if (!s->irq_clear_inverted && !(s->cntr & TWI_CNTR_INT_FLAG)) { + /* Write 0 to clear this flag */ + qemu_irq_lower(s->irq); + } else if (s->irq_clear_inverted && (s->cntr & TWI_CNTR_INT_FLAG)) { + /* Write 1 to clear this flag */ + s->cntr &= ~TWI_CNTR_INT_FLAG; qemu_irq_lower(s->irq); } + if ((s->cntr & TWI_CNTR_A_ACK) == 0) { if (STAT_TO_STA(s->stat) == STAT_M_DATA_RX_ACK) { s->stat = STAT_FROM_STA(STAT_M_DATA_RX_NACK); @@ -451,9 +457,25 @@ static const TypeInfo allwinner_i2c_type_info = { .class_init = allwinner_i2c_class_init, }; +static void allwinner_i2c_sun6i_init(Object *obj) +{ + AWI2CState *s = AW_I2C(obj); + + s->irq_clear_inverted = true; +} + +static const TypeInfo allwinner_i2c_sun6i_type_info = { + .name = TYPE_AW_I2C_SUN6I, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(AWI2CState), + .instance_init = allwinner_i2c_sun6i_init, + .class_init = allwinner_i2c_class_init, +}; + static void allwinner_i2c_register_types(void) { type_register_static(&allwinner_i2c_type_info); + type_register_static(&allwinner_i2c_sun6i_type_info); } type_init(allwinner_i2c_register_types) diff --git a/hw/i2c/aspeed_i2c.c b/hw/i2c/aspeed_i2c.c index c166fd20fa..1f071a3811 100644 --- a/hw/i2c/aspeed_i2c.c +++ b/hw/i2c/aspeed_i2c.c @@ -550,6 +550,8 @@ static void aspeed_i2c_bus_handle_cmd(AspeedI2CBus *bus, uint64_t value) } SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, M_STOP_CMD, 0); aspeed_i2c_set_state(bus, I2CD_IDLE); + + i2c_schedule_pending_master(bus->bus); } if (aspeed_i2c_bus_pkt_mode_en(bus)) { diff --git a/hw/i2c/core.c b/hw/i2c/core.c index d4ba8146bf..bed594fe59 100644 --- a/hw/i2c/core.c +++ b/hw/i2c/core.c @@ -185,22 +185,39 @@ int i2c_start_transfer(I2CBus *bus, uint8_t address, bool is_recv) void i2c_bus_master(I2CBus *bus, QEMUBH *bh) { - if (i2c_bus_busy(bus)) { - I2CPendingMaster *node = g_new(struct I2CPendingMaster, 1); - node->bh = bh; + I2CPendingMaster *node = g_new(struct I2CPendingMaster, 1); + node->bh = bh; + + QSIMPLEQ_INSERT_TAIL(&bus->pending_masters, node, entry); +} + +void i2c_schedule_pending_master(I2CBus *bus) +{ + I2CPendingMaster *node; - QSIMPLEQ_INSERT_TAIL(&bus->pending_masters, node, entry); + if (i2c_bus_busy(bus)) { + /* someone is already controlling the bus; wait for it to release it */ + return; + } + if (QSIMPLEQ_EMPTY(&bus->pending_masters)) { return; } - bus->bh = bh; + node = QSIMPLEQ_FIRST(&bus->pending_masters); + bus->bh = node->bh; + + QSIMPLEQ_REMOVE_HEAD(&bus->pending_masters, entry); + g_free(node); + qemu_bh_schedule(bus->bh); } void i2c_bus_release(I2CBus *bus) { bus->bh = NULL; + + i2c_schedule_pending_master(bus); } int i2c_start_recv(I2CBus *bus, uint8_t address) @@ -234,16 +251,6 @@ void i2c_end_transfer(I2CBus *bus) g_free(node); } bus->broadcast = false; - - if (!QSIMPLEQ_EMPTY(&bus->pending_masters)) { - I2CPendingMaster *node = QSIMPLEQ_FIRST(&bus->pending_masters); - bus->bh = node->bh; - - QSIMPLEQ_REMOVE_HEAD(&bus->pending_masters, entry); - g_free(node); - - qemu_bh_schedule(bus->bh); - } } int i2c_send(I2CBus *bus, uint8_t data) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index d27921fd8f..b19fb4259e 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -1514,7 +1514,6 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(pkg, aml_eisaid("PNP0A03")); aml_append(dev, aml_name_decl("_CID", pkg)); aml_append(dev, aml_name_decl("_ADR", aml_int(0))); - aml_append(dev, aml_name_decl("_UID", aml_int(bus_num))); build_cxl_osc_method(dev); } else if (pci_bus_is_express(bus)) { aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08"))); diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 98a5c304a7..faade7def8 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -1530,13 +1530,17 @@ static int vtd_sync_shadow_page_table_range(VTDAddressSpace *vtd_as, return vtd_page_walk(s, ce, addr, addr + size, &info, vtd_as->pasid); } -static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as) +static int vtd_address_space_sync(VTDAddressSpace *vtd_as) { int ret; VTDContextEntry ce; IOMMUNotifier *n; - if (!(vtd_as->iommu.iommu_notify_flags & IOMMU_NOTIFIER_IOTLB_EVENTS)) { + /* If no MAP notifier registered, we simply invalidate all the cache */ + if (!vtd_as_has_map_notifier(vtd_as)) { + IOMMU_NOTIFIER_FOREACH(n, &vtd_as->iommu) { + memory_region_unmap_iommu_notifier_range(n); + } return 0; } @@ -2000,7 +2004,7 @@ static void vtd_iommu_replay_all(IntelIOMMUState *s) VTDAddressSpace *vtd_as; QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) { - vtd_sync_shadow_page_table(vtd_as); + vtd_address_space_sync(vtd_as); } } @@ -2082,7 +2086,7 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s, * framework will skip MAP notifications if that * happened. */ - vtd_sync_shadow_page_table(vtd_as); + vtd_address_space_sync(vtd_as); } } } @@ -2140,7 +2144,7 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id) if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), vtd_as->devfn, &ce) && domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) { - vtd_sync_shadow_page_table(vtd_as); + vtd_address_space_sync(vtd_as); } } } @@ -3179,6 +3183,7 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, { VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu); IntelIOMMUState *s = vtd_as->iommu_state; + X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); /* TODO: add support for VFIO and vhost users */ if (s->snoop_control) { @@ -3186,6 +3191,20 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu, "Snoop Control with vhost or VFIO is not supported"); return -ENOTSUP; } + if (!s->caching_mode && (new & IOMMU_NOTIFIER_MAP)) { + error_setg_errno(errp, ENOTSUP, + "device %02x.%02x.%x requires caching mode", + pci_bus_num(vtd_as->bus), PCI_SLOT(vtd_as->devfn), + PCI_FUNC(vtd_as->devfn)); + return -ENOTSUP; + } + if (!x86_iommu->dt_supported && (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP)) { + error_setg_errno(errp, ENOTSUP, + "device %02x.%02x.%x requires device IOTLB mode", + pci_bus_num(vtd_as->bus), PCI_SLOT(vtd_as->devfn), + PCI_FUNC(vtd_as->devfn)); + return -ENOTSUP; + } /* Update per-address-space notifier flags */ vtd_as->notifier_flags = new; @@ -3831,7 +3850,7 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) .domain_id = vtd_get_domain_id(s, &ce, vtd_as->pasid), }; - vtd_page_walk(s, &ce, 0, ~0ULL, &info, vtd_as->pasid); + vtd_page_walk(s, &ce, n->start, n->end, &info, vtd_as->pasid); } } else { trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn), diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c index 68c22016d2..3d606a20b4 100644 --- a/hw/i386/microvm.c +++ b/hw/i386/microvm.c @@ -328,7 +328,7 @@ static void microvm_memory_init(MicrovmMachineState *mms) rom_set_fw(fw_cfg); if (machine->kernel_filename != NULL) { - x86_load_linux(x86ms, fw_cfg, 0, true, false); + x86_load_linux(x86ms, fw_cfg, 0, true); } if (mms->option_roms) { @@ -376,8 +376,7 @@ static void microvm_fix_kernel_cmdline(MachineState *machine) MicrovmMachineState *mms = MICROVM_MACHINE(machine); BusState *bus; BusChild *kid; - char *cmdline, *existing_cmdline; - size_t len; + char *cmdline; /* * Find MMIO transports with attached devices, and add them to the kernel @@ -386,8 +385,7 @@ static void microvm_fix_kernel_cmdline(MachineState *machine) * Yes, this is a hack, but one that heavily improves the UX without * introducing any significant issues. */ - existing_cmdline = fw_cfg_read_bytes_ptr(x86ms->fw_cfg, FW_CFG_CMDLINE_DATA); - cmdline = g_strdup(existing_cmdline); + cmdline = g_strdup(machine->kernel_cmdline); bus = sysbus_get_default(); QTAILQ_FOREACH(kid, &bus->children, sibling) { DeviceState *dev = kid->child; @@ -411,12 +409,9 @@ static void microvm_fix_kernel_cmdline(MachineState *machine) } } - len = strlen(cmdline); - if (len > VIRTIO_CMDLINE_TOTAL_MAX_LEN + strlen(existing_cmdline)) { - fprintf(stderr, "qemu: virtio mmio cmdline too large, skipping\n"); - } else { - memcpy(existing_cmdline, cmdline, len + 1); - } + fw_cfg_modify_i32(x86ms->fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(cmdline) + 1); + fw_cfg_modify_string(x86ms->fw_cfg, FW_CFG_CMDLINE_DATA, cmdline); + g_free(cmdline); } diff --git a/hw/i386/pc.c b/hw/i386/pc.c index fd17ce7a94..7bebea57e3 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -810,7 +810,7 @@ void xen_load_linux(PCMachineState *pcms) rom_set_fw(fw_cfg); x86_load_linux(x86ms, fw_cfg, pcmc->acpi_data_size, - pcmc->pvh_enabled, pcmc->legacy_no_rng_seed); + pcmc->pvh_enabled); for (i = 0; i < nb_option_roms; i++) { assert(!strcmp(option_rom[i].name, "linuxboot.bin") || !strcmp(option_rom[i].name, "linuxboot_dma.bin") || @@ -1130,7 +1130,7 @@ void pc_memory_init(PCMachineState *pcms, if (linux_boot) { x86_load_linux(x86ms, fw_cfg, pcmc->acpi_data_size, - pcmc->pvh_enabled, pcmc->legacy_no_rng_seed); + pcmc->pvh_enabled); } for (i = 0; i < nb_option_roms; i++) { diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 126b6c11df..4bf15f9c1f 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -422,6 +422,7 @@ static void pc_xen_hvm_init(MachineState *machine) } pc_xen_hvm_init_pci(machine); + xen_igd_reserve_slot(pcms->bus); pci_create_simple(pcms->bus, -1, "xen-platform"); } #endif @@ -477,9 +478,7 @@ DEFINE_I440FX_MACHINE(v7_2, "pc-i440fx-7.2", NULL, static void pc_i440fx_7_1_machine_options(MachineClass *m) { - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_i440fx_7_2_machine_options(m); - pcmc->legacy_no_rng_seed = true; compat_props_add(m->compat_props, hw_compat_7_1, hw_compat_7_1_len); compat_props_add(m->compat_props, pc_compat_7_1, pc_compat_7_1_len); } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 09004f3f1f..797ba347fd 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -395,9 +395,7 @@ DEFINE_Q35_MACHINE(v7_2, "pc-q35-7.2", NULL, static void pc_q35_7_1_machine_options(MachineClass *m) { - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_q35_7_2_machine_options(m); - pcmc->legacy_no_rng_seed = true; compat_props_add(m->compat_props, hw_compat_7_1, hw_compat_7_1_len); compat_props_add(m->compat_props, pc_compat_7_1, pc_compat_7_1_len); } diff --git a/hw/i386/x86.c b/hw/i386/x86.c index a56b10b2fb..a88a126123 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -26,7 +26,6 @@ #include "qemu/cutils.h" #include "qemu/units.h" #include "qemu/datadir.h" -#include "qemu/guest-random.h" #include "qapi/error.h" #include "qapi/qapi-visit-common.h" #include "qapi/clone-visitor.h" @@ -36,7 +35,6 @@ #include "sysemu/whpx.h" #include "sysemu/numa.h" #include "sysemu/replay.h" -#include "sysemu/reset.h" #include "sysemu/sysemu.h" #include "sysemu/cpu-timers.h" #include "sysemu/xen.h" @@ -49,7 +47,6 @@ #include "hw/intc/i8259.h" #include "hw/rtc/mc146818rtc.h" #include "target/i386/sev.h" -#include "hw/i386/microvm.h" #include "hw/acpi/cpu_hotplug.h" #include "hw/irq.h" @@ -675,12 +672,12 @@ DeviceState *ioapic_init_secondary(GSIState *gsi_state) return dev; } -typedef struct SetupData { +struct setup_data { uint64_t next; uint32_t type; uint32_t len; uint8_t data[]; -} __attribute__((packed)) SetupData; +} __attribute__((packed)); /* @@ -787,35 +784,10 @@ static bool load_elfboot(const char *kernel_filename, return true; } -typedef struct SetupDataFixup { - void *pos; - hwaddr orig_val, new_val; - uint32_t addr; -} SetupDataFixup; - -static void fixup_setup_data(void *opaque) -{ - SetupDataFixup *fixup = opaque; - stq_p(fixup->pos, fixup->new_val); -} - -static void reset_setup_data(void *opaque) -{ - SetupDataFixup *fixup = opaque; - stq_p(fixup->pos, fixup->orig_val); -} - -static void reset_rng_seed(void *opaque) -{ - SetupData *setup_data = opaque; - qemu_guest_getrandom_nofail(setup_data->data, le32_to_cpu(setup_data->len)); -} - void x86_load_linux(X86MachineState *x86ms, FWCfgState *fw_cfg, int acpi_data_size, - bool pvh_enabled, - bool legacy_no_rng_seed) + bool pvh_enabled) { bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled; uint16_t protocol; @@ -823,26 +795,19 @@ void x86_load_linux(X86MachineState *x86ms, int dtb_size, setup_data_offset; uint32_t initrd_max; uint8_t header[8192], *setup, *kernel; - hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0, first_setup_data = 0; + hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; FILE *f; char *vmode; MachineState *machine = MACHINE(x86ms); - SetupData *setup_data; + struct setup_data *setup_data; const char *kernel_filename = machine->kernel_filename; const char *initrd_filename = machine->initrd_filename; const char *dtb_filename = machine->dtb; - char *kernel_cmdline; + const char *kernel_cmdline = machine->kernel_cmdline; SevKernelLoaderContext sev_load_ctx = {}; - enum { RNG_SEED_LENGTH = 32 }; - /* - * Add the NUL terminator, some padding for the microvm cmdline fiddling - * hack, and then align to 16 bytes as a paranoia measure - */ - cmdline_size = (strlen(machine->kernel_cmdline) + 1 + - VIRTIO_CMDLINE_TOTAL_MAX_LEN + 16) & ~15; - /* Make a copy, since we might append arbitrary bytes to it later. */ - kernel_cmdline = g_strndup(machine->kernel_cmdline, cmdline_size); + /* Align to 16 bytes as a paranoia measure */ + cmdline_size = (strlen(kernel_cmdline) + 16) & ~15; /* load the kernel header */ f = fopen(kernel_filename, "rb"); @@ -983,6 +948,12 @@ void x86_load_linux(X86MachineState *x86ms, initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; } + fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline) + 1); + fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); + sev_load_ctx.cmdline_data = (char *)kernel_cmdline; + sev_load_ctx.cmdline_size = strlen(kernel_cmdline) + 1; + if (protocol >= 0x202) { stl_p(header + 0x228, cmdline_addr); } else { @@ -1109,45 +1080,20 @@ void x86_load_linux(X86MachineState *x86ms, exit(1); } - setup_data_offset = cmdline_size; - cmdline_size += sizeof(SetupData) + dtb_size; - kernel_cmdline = g_realloc(kernel_cmdline, cmdline_size); - setup_data = (void *)kernel_cmdline + setup_data_offset; - setup_data->next = cpu_to_le64(first_setup_data); - first_setup_data = cmdline_addr + setup_data_offset; + setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); + kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; + kernel = g_realloc(kernel, kernel_size); + + stq_p(header + 0x250, prot_addr + setup_data_offset); + + setup_data = (struct setup_data *)(kernel + setup_data_offset); + setup_data->next = 0; setup_data->type = cpu_to_le32(SETUP_DTB); setup_data->len = cpu_to_le32(dtb_size); - load_image_size(dtb_filename, setup_data->data, dtb_size); - } - if (!legacy_no_rng_seed && protocol >= 0x209) { - setup_data_offset = cmdline_size; - cmdline_size += sizeof(SetupData) + RNG_SEED_LENGTH; - kernel_cmdline = g_realloc(kernel_cmdline, cmdline_size); - setup_data = (void *)kernel_cmdline + setup_data_offset; - setup_data->next = cpu_to_le64(first_setup_data); - first_setup_data = cmdline_addr + setup_data_offset; - setup_data->type = cpu_to_le32(SETUP_RNG_SEED); - setup_data->len = cpu_to_le32(RNG_SEED_LENGTH); - qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH); - qemu_register_reset_nosnapshotload(reset_rng_seed, setup_data); - fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_KERNEL_DATA, reset_rng_seed, NULL, - setup_data, kernel, kernel_size, true); - } else { - fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); + load_image_size(dtb_filename, setup_data->data, dtb_size); } - fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, cmdline_size); - fw_cfg_add_bytes(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline, cmdline_size); - sev_load_ctx.cmdline_data = (char *)kernel_cmdline; - sev_load_ctx.cmdline_size = cmdline_size; - - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); - sev_load_ctx.kernel_data = (char *)kernel; - sev_load_ctx.kernel_size = kernel_size; - /* * If we're starting an encrypted VM, it will be OVMF based, which uses the * efi stub for booting and doesn't require any values to be placed in the @@ -1155,21 +1101,17 @@ void x86_load_linux(X86MachineState *x86ms, * kernel on the other side of the fw_cfg interface matches the hash of the * file the user passed in. */ - if (!sev_enabled() && first_setup_data) { - SetupDataFixup *fixup = g_malloc(sizeof(*fixup)); - + if (!sev_enabled()) { memcpy(setup, header, MIN(sizeof(header), setup_size)); - /* Offset 0x250 is a pointer to the first setup_data link. */ - fixup->pos = setup + 0x250; - fixup->orig_val = ldq_p(fixup->pos); - fixup->new_val = first_setup_data; - fixup->addr = cpu_to_le32(real_addr); - fw_cfg_add_bytes_callback(fw_cfg, FW_CFG_SETUP_ADDR, fixup_setup_data, NULL, - fixup, &fixup->addr, sizeof(fixup->addr), true); - qemu_register_reset(reset_setup_data, fixup); - } else { - fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); } + + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); + fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); + sev_load_ctx.kernel_data = (char *)kernel; + sev_load_ctx.kernel_size = kernel_size; + + fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); sev_load_ctx.setup_data = (char *)setup; diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c index eee04643cb..b466a6abaf 100644 --- a/hw/intc/riscv_aclint.c +++ b/hw/intc/riscv_aclint.c @@ -130,7 +130,7 @@ static uint64_t riscv_aclint_mtimer_read(void *opaque, hwaddr addr, addr < (mtimer->timecmp_base + (mtimer->num_harts << 3))) { size_t hartid = mtimer->hartid_base + ((addr - mtimer->timecmp_base) >> 3); - CPUState *cpu = qemu_get_cpu(hartid); + CPUState *cpu = cpu_by_arch_id(hartid); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; if (!env) { qemu_log_mask(LOG_GUEST_ERROR, @@ -173,7 +173,7 @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr, addr < (mtimer->timecmp_base + (mtimer->num_harts << 3))) { size_t hartid = mtimer->hartid_base + ((addr - mtimer->timecmp_base) >> 3); - CPUState *cpu = qemu_get_cpu(hartid); + CPUState *cpu = cpu_by_arch_id(hartid); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; if (!env) { qemu_log_mask(LOG_GUEST_ERROR, @@ -231,7 +231,7 @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr, /* Check if timer interrupt is triggered for each hart. */ for (i = 0; i < mtimer->num_harts; i++) { - CPUState *cpu = qemu_get_cpu(mtimer->hartid_base + i); + CPUState *cpu = cpu_by_arch_id(mtimer->hartid_base + i); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; if (!env) { continue; @@ -292,7 +292,7 @@ static void riscv_aclint_mtimer_realize(DeviceState *dev, Error **errp) s->timecmp = g_new0(uint64_t, s->num_harts); /* Claim timer interrupt bits */ for (i = 0; i < s->num_harts; i++) { - RISCVCPU *cpu = RISCV_CPU(qemu_get_cpu(s->hartid_base + i)); + RISCVCPU *cpu = RISCV_CPU(cpu_by_arch_id(s->hartid_base + i)); if (riscv_cpu_claim_interrupts(cpu, MIP_MTIP) < 0) { error_report("MTIP already claimed"); exit(1); @@ -372,7 +372,7 @@ DeviceState *riscv_aclint_mtimer_create(hwaddr addr, hwaddr size, sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr); for (i = 0; i < num_harts; i++) { - CPUState *cpu = qemu_get_cpu(hartid_base + i); + CPUState *cpu = cpu_by_arch_id(hartid_base + i); RISCVCPU *rvcpu = RISCV_CPU(cpu); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; riscv_aclint_mtimer_callback *cb = @@ -407,7 +407,7 @@ static uint64_t riscv_aclint_swi_read(void *opaque, hwaddr addr, if (addr < (swi->num_harts << 2)) { size_t hartid = swi->hartid_base + (addr >> 2); - CPUState *cpu = qemu_get_cpu(hartid); + CPUState *cpu = cpu_by_arch_id(hartid); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; if (!env) { qemu_log_mask(LOG_GUEST_ERROR, @@ -430,7 +430,7 @@ static void riscv_aclint_swi_write(void *opaque, hwaddr addr, uint64_t value, if (addr < (swi->num_harts << 2)) { size_t hartid = swi->hartid_base + (addr >> 2); - CPUState *cpu = qemu_get_cpu(hartid); + CPUState *cpu = cpu_by_arch_id(hartid); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; if (!env) { qemu_log_mask(LOG_GUEST_ERROR, @@ -545,7 +545,7 @@ DeviceState *riscv_aclint_swi_create(hwaddr addr, uint32_t hartid_base, sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr); for (i = 0; i < num_harts; i++) { - CPUState *cpu = qemu_get_cpu(hartid_base + i); + CPUState *cpu = cpu_by_arch_id(hartid_base + i); RISCVCPU *rvcpu = RISCV_CPU(cpu); qdev_connect_gpio_out(dev, i, diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c index cfd007e629..cd7efc4ad4 100644 --- a/hw/intc/riscv_aplic.c +++ b/hw/intc/riscv_aplic.c @@ -833,7 +833,7 @@ static void riscv_aplic_realize(DeviceState *dev, Error **errp) /* Claim the CPU interrupt to be triggered by this APLIC */ for (i = 0; i < aplic->num_harts; i++) { - RISCVCPU *cpu = RISCV_CPU(qemu_get_cpu(aplic->hartid_base + i)); + RISCVCPU *cpu = RISCV_CPU(cpu_by_arch_id(aplic->hartid_base + i)); if (riscv_cpu_claim_interrupts(cpu, (aplic->mmode) ? MIP_MEIP : MIP_SEIP) < 0) { error_report("%s already claimed", @@ -966,7 +966,7 @@ DeviceState *riscv_aplic_create(hwaddr addr, hwaddr size, if (!msimode) { for (i = 0; i < num_harts; i++) { - CPUState *cpu = qemu_get_cpu(hartid_base + i); + CPUState *cpu = cpu_by_arch_id(hartid_base + i); qdev_connect_gpio_out_named(dev, NULL, i, qdev_get_gpio_in(DEVICE(cpu), diff --git a/hw/intc/riscv_imsic.c b/hw/intc/riscv_imsic.c index 4d4d5b50ca..fea3385b51 100644 --- a/hw/intc/riscv_imsic.c +++ b/hw/intc/riscv_imsic.c @@ -316,8 +316,8 @@ static const MemoryRegionOps riscv_imsic_ops = { static void riscv_imsic_realize(DeviceState *dev, Error **errp) { RISCVIMSICState *imsic = RISCV_IMSIC(dev); - RISCVCPU *rcpu = RISCV_CPU(qemu_get_cpu(imsic->hartid)); - CPUState *cpu = qemu_get_cpu(imsic->hartid); + RISCVCPU *rcpu = RISCV_CPU(cpu_by_arch_id(imsic->hartid)); + CPUState *cpu = cpu_by_arch_id(imsic->hartid); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; imsic->num_eistate = imsic->num_pages * imsic->num_irqs; @@ -413,7 +413,7 @@ DeviceState *riscv_imsic_create(hwaddr addr, uint32_t hartid, bool mmode, uint32_t num_pages, uint32_t num_ids) { DeviceState *dev = qdev_new(TYPE_RISCV_IMSIC); - CPUState *cpu = qemu_get_cpu(hartid); + CPUState *cpu = cpu_by_arch_id(hartid); uint32_t i; assert(!(addr & (IMSIC_MMIO_PAGE_SZ - 1))); diff --git a/hw/loongarch/acpi-build.c b/hw/loongarch/acpi-build.c index f551296a0e..6cb2472d33 100644 --- a/hw/loongarch/acpi-build.c +++ b/hw/loongarch/acpi-build.c @@ -260,6 +260,7 @@ build_la_ged_aml(Aml *dsdt, MachineState *machine) AML_SYSTEM_MEMORY, VIRT_GED_MEM_ADDR); } + acpi_dsdt_add_power_button(dsdt); } static void build_pci_device_aml(Aml *scope, LoongArchMachineState *lams) @@ -271,7 +272,7 @@ static void build_pci_device_aml(Aml *scope, LoongArchMachineState *lams) .pio.size = VIRT_PCI_IO_SIZE, .ecam.base = VIRT_PCI_CFG_BASE, .ecam.size = VIRT_PCI_CFG_SIZE, - .irq = PCH_PIC_IRQ_OFFSET + VIRT_DEVICE_IRQS, + .irq = VIRT_GSI_BASE + VIRT_DEVICE_IRQS, .bus = lams->pci_bus, }; diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c index 66be925068..38ef7cc49f 100644 --- a/hw/loongarch/virt.c +++ b/hw/loongarch/virt.c @@ -316,6 +316,14 @@ static void virt_machine_done(Notifier *notifier, void *data) loongarch_acpi_setup(lams); } +static void virt_powerdown_req(Notifier *notifier, void *opaque) +{ + LoongArchMachineState *s = container_of(notifier, + LoongArchMachineState, powerdown_notifier); + + acpi_send_event(s->acpi_ged, ACPI_POWER_DOWN_STATUS); +} + struct memmap_entry { uint64_t address; uint64_t length; @@ -432,7 +440,7 @@ static DeviceState *create_acpi_ged(DeviceState *pch_pic, LoongArchMachineState sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, VIRT_GED_REG_ADDR); sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, - qdev_get_gpio_in(pch_pic, VIRT_SCI_IRQ - PCH_PIC_IRQ_OFFSET)); + qdev_get_gpio_in(pch_pic, VIRT_SCI_IRQ - VIRT_GSI_BASE)); sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); return dev; } @@ -452,7 +460,7 @@ static DeviceState *create_platform_bus(DeviceState *pch_pic) sysbus = SYS_BUS_DEVICE(dev); for (i = 0; i < VIRT_PLATFORM_BUS_NUM_IRQS; i++) { - irq = VIRT_PLATFORM_BUS_IRQ - PCH_PIC_IRQ_OFFSET + i; + irq = VIRT_PLATFORM_BUS_IRQ - VIRT_GSI_BASE + i; sysbus_connect_irq(sysbus, i, qdev_get_gpio_in(pch_pic, irq)); } @@ -509,7 +517,7 @@ static void loongarch_devices_init(DeviceState *pch_pic, LoongArchMachineState * serial_mm_init(get_system_memory(), VIRT_UART_BASE, 0, qdev_get_gpio_in(pch_pic, - VIRT_UART_IRQ - PCH_PIC_IRQ_OFFSET), + VIRT_UART_IRQ - VIRT_GSI_BASE), 115200, serial_hd(0), DEVICE_LITTLE_ENDIAN); fdt_add_uart_node(lams); @@ -531,7 +539,7 @@ static void loongarch_devices_init(DeviceState *pch_pic, LoongArchMachineState * create_unimplemented_device("pci-dma-cfg", 0x1001041c, 0x4); sysbus_create_simple("ls7a_rtc", VIRT_RTC_REG_BASE, qdev_get_gpio_in(pch_pic, - VIRT_RTC_IRQ - PCH_PIC_IRQ_OFFSET)); + VIRT_RTC_IRQ - VIRT_GSI_BASE)); fdt_add_rtc_node(lams); pm_mem = g_new(MemoryRegion, 1); @@ -859,6 +867,10 @@ static void loongarch_init(MachineState *machine) VIRT_PLATFORM_BUS_IRQ); lams->machine_done.notify = virt_machine_done; qemu_add_machine_init_done_notifier(&lams->machine_done); + /* connect powerdown request */ + lams->powerdown_notifier.notify = virt_powerdown_req; + qemu_register_powerdown_notifier(&lams->powerdown_notifier); + fdt_add_pcie_node(lams); /* * Since lowmem region starts from 0 and Linux kernel legacy start address diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index dae4fd89ca..217a5e639b 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -401,14 +401,13 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) MemoryRegion *mr = ®s->component_registers; uint8_t *pci_conf = pci_dev->config; unsigned short msix_num = 1; - int i; + int i, rc; if (!cxl_setup_memory(ct3d, errp)) { return; } pci_config_set_prog_interface(pci_conf, 0x10); - pci_config_set_class(pci_conf, PCI_CLASS_MEMORY_CXL); pcie_endpoint_cap_init(pci_dev, 0x80); if (ct3d->sn != UI64_NULL) { @@ -438,7 +437,10 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) &ct3d->cxl_dstate.device_registers); /* MSI(-X) Initailization */ - msix_init_exclusive_bar(pci_dev, msix_num, 4, NULL); + rc = msix_init_exclusive_bar(pci_dev, msix_num, 4, NULL); + if (rc) { + goto err_address_space_free; + } for (i = 0; i < msix_num; i++) { msix_vector_use(pci_dev, i); } @@ -450,6 +452,11 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) cxl_cstate->cdat.free_cdat_table = ct3_free_cdat_table; cxl_cstate->cdat.private = ct3d; cxl_doe_cdat_init(cxl_cstate, errp); + return; + +err_address_space_free: + address_space_destroy(&ct3d->hostmem_as); + return; } static void ct3_exit(PCIDevice *pci_dev) @@ -619,7 +626,7 @@ static void ct3_class_init(ObjectClass *oc, void *data) pc->realize = ct3_realize; pc->exit = ct3_exit; - pc->class_id = PCI_CLASS_STORAGE_EXPRESS; + pc->class_id = PCI_CLASS_MEMORY_CXL; pc->vendor_id = PCI_VENDOR_ID_INTEL; pc->device_id = 0xd93; /* LVF for now */ pc->revision = 1; diff --git a/hw/misc/i2c-echo.c b/hw/misc/i2c-echo.c new file mode 100644 index 0000000000..5705ab5d73 --- /dev/null +++ b/hw/misc/i2c-echo.c @@ -0,0 +1,156 @@ +#include "qemu/osdep.h" +#include "qemu/timer.h" +#include "qemu/main-loop.h" +#include "block/aio.h" +#include "hw/i2c/i2c.h" + +#define TYPE_I2C_ECHO "i2c-echo" +OBJECT_DECLARE_SIMPLE_TYPE(I2CEchoState, I2C_ECHO) + +enum i2c_echo_state { + I2C_ECHO_STATE_IDLE, + I2C_ECHO_STATE_START_SEND, + I2C_ECHO_STATE_ACK, +}; + +typedef struct I2CEchoState { + I2CSlave parent_obj; + + I2CBus *bus; + + enum i2c_echo_state state; + QEMUBH *bh; + + unsigned int pos; + uint8_t data[3]; +} I2CEchoState; + +static void i2c_echo_bh(void *opaque) +{ + I2CEchoState *state = opaque; + + switch (state->state) { + case I2C_ECHO_STATE_IDLE: + return; + + case I2C_ECHO_STATE_START_SEND: + if (i2c_start_send_async(state->bus, state->data[0])) { + goto release_bus; + } + + state->pos++; + state->state = I2C_ECHO_STATE_ACK; + return; + + case I2C_ECHO_STATE_ACK: + if (state->pos > 2) { + break; + } + + if (i2c_send_async(state->bus, state->data[state->pos++])) { + break; + } + + return; + } + + + i2c_end_transfer(state->bus); +release_bus: + i2c_bus_release(state->bus); + + state->state = I2C_ECHO_STATE_IDLE; +} + +static int i2c_echo_event(I2CSlave *s, enum i2c_event event) +{ + I2CEchoState *state = I2C_ECHO(s); + + switch (event) { + case I2C_START_RECV: + state->pos = 0; + + break; + + case I2C_START_SEND: + state->pos = 0; + + break; + + case I2C_FINISH: + state->pos = 0; + state->state = I2C_ECHO_STATE_START_SEND; + i2c_bus_master(state->bus, state->bh); + + break; + + case I2C_NACK: + break; + + default: + return -1; + } + + return 0; +} + +static uint8_t i2c_echo_recv(I2CSlave *s) +{ + I2CEchoState *state = I2C_ECHO(s); + + if (state->pos > 2) { + return 0xff; + } + + return state->data[state->pos++]; +} + +static int i2c_echo_send(I2CSlave *s, uint8_t data) +{ + I2CEchoState *state = I2C_ECHO(s); + + if (state->pos > 2) { + return -1; + } + + state->data[state->pos++] = data; + + return 0; +} + +static void i2c_echo_realize(DeviceState *dev, Error **errp) +{ + I2CEchoState *state = I2C_ECHO(dev); + BusState *bus = qdev_get_parent_bus(dev); + + state->bus = I2C_BUS(bus); + state->bh = qemu_bh_new(i2c_echo_bh, state); + + return; +} + +static void i2c_echo_class_init(ObjectClass *oc, void *data) +{ + I2CSlaveClass *sc = I2C_SLAVE_CLASS(oc); + DeviceClass *dc = DEVICE_CLASS(oc); + + dc->realize = i2c_echo_realize; + + sc->event = i2c_echo_event; + sc->recv = i2c_echo_recv; + sc->send = i2c_echo_send; +} + +static const TypeInfo i2c_echo = { + .name = TYPE_I2C_ECHO, + .parent = TYPE_I2C_SLAVE, + .instance_size = sizeof(I2CEchoState), + .class_init = i2c_echo_class_init, +}; + +static void register_types(void) +{ + type_register_static(&i2c_echo); +} + +type_init(register_types); diff --git a/hw/misc/meson.build b/hw/misc/meson.build index fe869b98ca..a40245ad44 100644 --- a/hw/misc/meson.build +++ b/hw/misc/meson.build @@ -128,6 +128,8 @@ softmmu_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_rng.c')) softmmu_ss.add(when: 'CONFIG_GRLIB', if_true: files('grlib_ahb_apb_pnp.c')) +softmmu_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c')) + specific_ss.add(when: 'CONFIG_AVR_POWER', if_true: files('avr_power.c')) specific_ss.add(when: 'CONFIG_MAC_VIA', if_true: files('mac_via.c')) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 3ae909041a..09d5c7a664 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -820,6 +820,21 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, features |= (1ULL << VIRTIO_NET_F_MTU); } + /* + * Since GUEST_ANNOUNCE is emulated the feature bit could be set without + * enabled. This happens in the vDPA case. + * + * Make sure the feature set is not incoherent, as the driver could refuse + * to start. + * + * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes, + * helping guest to notify the new location with vDPA devices that does not + * support it. + */ + if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) { + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE); + } + return features; } diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index f25cc2c235..49c1210fce 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -238,6 +238,8 @@ static const bool nvme_feature_support[NVME_FID_MAX] = { [NVME_TIMESTAMP] = true, [NVME_HOST_BEHAVIOR_SUPPORT] = true, [NVME_COMMAND_SET_PROFILE] = true, + [NVME_FDP_MODE] = true, + [NVME_FDP_EVENTS] = true, }; static const uint32_t nvme_feature_cap[NVME_FID_MAX] = { @@ -249,6 +251,8 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = { [NVME_TIMESTAMP] = NVME_FEAT_CAP_CHANGE, [NVME_HOST_BEHAVIOR_SUPPORT] = NVME_FEAT_CAP_CHANGE, [NVME_COMMAND_SET_PROFILE] = NVME_FEAT_CAP_CHANGE, + [NVME_FDP_MODE] = NVME_FEAT_CAP_CHANGE, + [NVME_FDP_EVENTS] = NVME_FEAT_CAP_CHANGE | NVME_FEAT_CAP_NS, }; static const uint32_t nvme_cse_acs[256] = { @@ -266,6 +270,8 @@ static const uint32_t nvme_cse_acs[256] = { [NVME_ADM_CMD_VIRT_MNGMT] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_DBBUF_CONFIG] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_FORMAT_NVM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, + [NVME_ADM_CMD_DIRECTIVE_RECV] = NVME_CMD_EFF_CSUPP, + [NVME_ADM_CMD_DIRECTIVE_SEND] = NVME_CMD_EFF_CSUPP, }; static const uint32_t nvme_cse_iocs_none[256]; @@ -279,6 +285,8 @@ static const uint32_t nvme_cse_iocs_nvm[256] = { [NVME_CMD_VERIFY] = NVME_CMD_EFF_CSUPP, [NVME_CMD_COPY] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, [NVME_CMD_COMPARE] = NVME_CMD_EFF_CSUPP, + [NVME_CMD_IO_MGMT_RECV] = NVME_CMD_EFF_CSUPP, + [NVME_CMD_IO_MGMT_SEND] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, }; static const uint32_t nvme_cse_iocs_zoned[256] = { @@ -297,12 +305,66 @@ static const uint32_t nvme_cse_iocs_zoned[256] = { static void nvme_process_sq(void *opaque); static void nvme_ctrl_reset(NvmeCtrl *n, NvmeResetType rst); +static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n); static uint16_t nvme_sqid(NvmeRequest *req) { return le16_to_cpu(req->sq->sqid); } +static inline uint16_t nvme_make_pid(NvmeNamespace *ns, uint16_t rg, + uint16_t ph) +{ + uint16_t rgif = ns->endgrp->fdp.rgif; + + if (!rgif) { + return ph; + } + + return (rg << (16 - rgif)) | ph; +} + +static inline bool nvme_ph_valid(NvmeNamespace *ns, uint16_t ph) +{ + return ph < ns->fdp.nphs; +} + +static inline bool nvme_rg_valid(NvmeEnduranceGroup *endgrp, uint16_t rg) +{ + return rg < endgrp->fdp.nrg; +} + +static inline uint16_t nvme_pid2ph(NvmeNamespace *ns, uint16_t pid) +{ + uint16_t rgif = ns->endgrp->fdp.rgif; + + if (!rgif) { + return pid; + } + + return pid & ((1 << (15 - rgif)) - 1); +} + +static inline uint16_t nvme_pid2rg(NvmeNamespace *ns, uint16_t pid) +{ + uint16_t rgif = ns->endgrp->fdp.rgif; + + if (!rgif) { + return 0; + } + + return pid >> (16 - rgif); +} + +static inline bool nvme_parse_pid(NvmeNamespace *ns, uint16_t pid, + uint16_t *ph, uint16_t *rg) +{ + *rg = nvme_pid2rg(ns, pid); + *ph = nvme_pid2ph(ns, pid); + + return nvme_ph_valid(ns, *ph) && nvme_rg_valid(ns->endgrp, *rg); +} + static void nvme_assign_zone_state(NvmeNamespace *ns, NvmeZone *zone, NvmeZoneState state) { @@ -376,6 +438,69 @@ static uint16_t nvme_aor_check(NvmeNamespace *ns, uint32_t act, uint32_t opn) return nvme_zns_check_resources(ns, act, opn, 0); } +static NvmeFdpEvent *nvme_fdp_alloc_event(NvmeCtrl *n, NvmeFdpEventBuffer *ebuf) +{ + NvmeFdpEvent *ret = NULL; + bool is_full = ebuf->next == ebuf->start && ebuf->nelems; + + ret = &ebuf->events[ebuf->next++]; + if (unlikely(ebuf->next == NVME_FDP_MAX_EVENTS)) { + ebuf->next = 0; + } + if (is_full) { + ebuf->start = ebuf->next; + } else { + ebuf->nelems++; + } + + memset(ret, 0, sizeof(NvmeFdpEvent)); + ret->timestamp = nvme_get_timestamp(n); + + return ret; +} + +static inline int log_event(NvmeRuHandle *ruh, uint8_t event_type) +{ + return (ruh->event_filter >> nvme_fdp_evf_shifts[event_type]) & 0x1; +} + +static bool nvme_update_ruh(NvmeCtrl *n, NvmeNamespace *ns, uint16_t pid) +{ + NvmeEnduranceGroup *endgrp = ns->endgrp; + NvmeRuHandle *ruh; + NvmeReclaimUnit *ru; + NvmeFdpEvent *e = NULL; + uint16_t ph, rg, ruhid; + + if (!nvme_parse_pid(ns, pid, &ph, &rg)) { + return false; + } + + ruhid = ns->fdp.phs[ph]; + + ruh = &endgrp->fdp.ruhs[ruhid]; + ru = &ruh->rus[rg]; + + if (ru->ruamw) { + if (log_event(ruh, FDP_EVT_RU_NOT_FULLY_WRITTEN)) { + e = nvme_fdp_alloc_event(n, &endgrp->fdp.host_events); + e->type = FDP_EVT_RU_NOT_FULLY_WRITTEN; + e->flags = FDPEF_PIV | FDPEF_NSIDV | FDPEF_LV; + e->pid = cpu_to_le16(pid); + e->nsid = cpu_to_le32(ns->params.nsid); + e->rgid = cpu_to_le16(rg); + e->ruhid = cpu_to_le16(ruhid); + } + + /* log (eventual) GC overhead of prematurely swapping the RU */ + nvme_fdp_stat_inc(&endgrp->fdp.mbmw, nvme_l2b(ns, ru->ruamw)); + } + + ru->ruamw = ruh->ruamw; + + return true; +} + static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr) { hwaddr hi, lo; @@ -3320,6 +3445,41 @@ invalid: return status | NVME_DNR; } +static void nvme_do_write_fdp(NvmeCtrl *n, NvmeRequest *req, uint64_t slba, + uint32_t nlb) +{ + NvmeNamespace *ns = req->ns; + NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; + uint64_t data_size = nvme_l2b(ns, nlb); + uint32_t dw12 = le32_to_cpu(req->cmd.cdw12); + uint8_t dtype = (dw12 >> 20) & 0xf; + uint16_t pid = le16_to_cpu(rw->dspec); + uint16_t ph, rg, ruhid; + NvmeReclaimUnit *ru; + + if (dtype != NVME_DIRECTIVE_DATA_PLACEMENT || + !nvme_parse_pid(ns, pid, &ph, &rg)) { + ph = 0; + rg = 0; + } + + ruhid = ns->fdp.phs[ph]; + ru = &ns->endgrp->fdp.ruhs[ruhid].rus[rg]; + + nvme_fdp_stat_inc(&ns->endgrp->fdp.hbmw, data_size); + nvme_fdp_stat_inc(&ns->endgrp->fdp.mbmw, data_size); + + while (nlb) { + if (nlb < ru->ruamw) { + ru->ruamw -= nlb; + break; + } + + nlb -= ru->ruamw; + nvme_update_ruh(n, ns, pid); + } +} + static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, bool wrz) { @@ -3429,6 +3589,8 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, if (!(zone->d.za & NVME_ZA_ZRWA_VALID)) { zone->w_ptr += nlb; } + } else if (ns->endgrp && ns->endgrp->fdp.enabled) { + nvme_do_write_fdp(n, req, slba, nlb); } data_offset = nvme_l2b(ns, slba); @@ -4086,6 +4248,126 @@ static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req) return status; } +static uint16_t nvme_io_mgmt_recv_ruhs(NvmeCtrl *n, NvmeRequest *req, + size_t len) +{ + NvmeNamespace *ns = req->ns; + NvmeEnduranceGroup *endgrp; + NvmeRuhStatus *hdr; + NvmeRuhStatusDescr *ruhsd; + unsigned int nruhsd; + uint16_t rg, ph, *ruhid; + size_t trans_len; + g_autofree uint8_t *buf = NULL; + + if (!n->subsys) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (ns->params.nsid == 0 || ns->params.nsid == 0xffffffff) { + return NVME_INVALID_NSID | NVME_DNR; + } + + if (!n->subsys->endgrp.fdp.enabled) { + return NVME_FDP_DISABLED | NVME_DNR; + } + + endgrp = ns->endgrp; + + nruhsd = ns->fdp.nphs * endgrp->fdp.nrg; + trans_len = sizeof(NvmeRuhStatus) + nruhsd * sizeof(NvmeRuhStatusDescr); + buf = g_malloc(trans_len); + + trans_len = MIN(trans_len, len); + + hdr = (NvmeRuhStatus *)buf; + ruhsd = (NvmeRuhStatusDescr *)(buf + sizeof(NvmeRuhStatus)); + + hdr->nruhsd = cpu_to_le16(nruhsd); + + ruhid = ns->fdp.phs; + + for (ph = 0; ph < ns->fdp.nphs; ph++, ruhid++) { + NvmeRuHandle *ruh = &endgrp->fdp.ruhs[*ruhid]; + + for (rg = 0; rg < endgrp->fdp.nrg; rg++, ruhsd++) { + uint16_t pid = nvme_make_pid(ns, rg, ph); + + ruhsd->pid = cpu_to_le16(pid); + ruhsd->ruhid = *ruhid; + ruhsd->earutr = 0; + ruhsd->ruamw = cpu_to_le64(ruh->rus[rg].ruamw); + } + } + + return nvme_c2h(n, buf, trans_len, req); +} + +static uint16_t nvme_io_mgmt_recv(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeCmd *cmd = &req->cmd; + uint32_t cdw10 = le32_to_cpu(cmd->cdw10); + uint32_t numd = le32_to_cpu(cmd->cdw11); + uint8_t mo = (cdw10 & 0xff); + size_t len = (numd + 1) << 2; + + switch (mo) { + case NVME_IOMR_MO_NOP: + return 0; + case NVME_IOMR_MO_RUH_STATUS: + return nvme_io_mgmt_recv_ruhs(n, req, len); + default: + return NVME_INVALID_FIELD | NVME_DNR; + }; +} + +static uint16_t nvme_io_mgmt_send_ruh_update(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeCmd *cmd = &req->cmd; + NvmeNamespace *ns = req->ns; + uint32_t cdw10 = le32_to_cpu(cmd->cdw10); + uint16_t ret = NVME_SUCCESS; + uint32_t npid = (cdw10 >> 1) + 1; + unsigned int i = 0; + g_autofree uint16_t *pids = NULL; + uint32_t maxnpid = n->subsys->endgrp.fdp.nrg * n->subsys->endgrp.fdp.nruh; + + if (unlikely(npid >= MIN(NVME_FDP_MAXPIDS, maxnpid))) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + pids = g_new(uint16_t, npid); + + ret = nvme_h2c(n, pids, npid * sizeof(uint16_t), req); + if (ret) { + return ret; + } + + for (; i < npid; i++) { + if (!nvme_update_ruh(n, ns, pids[i])) { + return NVME_INVALID_FIELD | NVME_DNR; + } + } + + return ret; +} + +static uint16_t nvme_io_mgmt_send(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeCmd *cmd = &req->cmd; + uint32_t cdw10 = le32_to_cpu(cmd->cdw10); + uint8_t mo = (cdw10 & 0xff); + + switch (mo) { + case NVME_IOMS_MO_NOP: + return 0; + case NVME_IOMS_MO_RUH_UPDATE: + return nvme_io_mgmt_send_ruh_update(n, req); + default: + return NVME_INVALID_FIELD | NVME_DNR; + }; +} + static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) { NvmeNamespace *ns; @@ -4162,6 +4444,10 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) return nvme_zone_mgmt_send(n, req); case NVME_CMD_ZONE_MGMT_RECV: return nvme_zone_mgmt_recv(n, req); + case NVME_CMD_IO_MGMT_RECV: + return nvme_io_mgmt_recv(n, req); + case NVME_CMD_IO_MGMT_SEND: + return nvme_io_mgmt_send(n, req); default: assert(false); } @@ -4386,8 +4672,8 @@ static void nvme_set_blk_stats(NvmeNamespace *ns, struct nvme_stats *stats) { BlockAcctStats *s = blk_get_stats(ns->blkconf.blk); - stats->units_read += s->nr_bytes[BLOCK_ACCT_READ] >> BDRV_SECTOR_BITS; - stats->units_written += s->nr_bytes[BLOCK_ACCT_WRITE] >> BDRV_SECTOR_BITS; + stats->units_read += s->nr_bytes[BLOCK_ACCT_READ]; + stats->units_written += s->nr_bytes[BLOCK_ACCT_WRITE]; stats->read_commands += s->nr_ops[BLOCK_ACCT_READ]; stats->write_commands += s->nr_ops[BLOCK_ACCT_WRITE]; } @@ -4401,6 +4687,7 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, uint32_t trans_len; NvmeNamespace *ns; time_t current_ms; + uint64_t u_read, u_written; if (off >= sizeof(smart)) { return NVME_INVALID_FIELD | NVME_DNR; @@ -4427,10 +4714,11 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, trans_len = MIN(sizeof(smart) - off, buf_len); smart.critical_warning = n->smart_critical_warning; - smart.data_units_read[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_read, - 1000)); - smart.data_units_written[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_written, - 1000)); + u_read = DIV_ROUND_UP(stats.units_read >> BDRV_SECTOR_BITS, 1000); + u_written = DIV_ROUND_UP(stats.units_written >> BDRV_SECTOR_BITS, 1000); + + smart.data_units_read[0] = cpu_to_le64(u_read); + smart.data_units_written[0] = cpu_to_le64(u_written); smart.host_read_commands[0] = cpu_to_le64(stats.read_commands); smart.host_write_commands[0] = cpu_to_le64(stats.write_commands); @@ -4452,6 +4740,48 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, return nvme_c2h(n, (uint8_t *) &smart + off, trans_len, req); } +static uint16_t nvme_endgrp_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, + uint64_t off, NvmeRequest *req) +{ + uint32_t dw11 = le32_to_cpu(req->cmd.cdw11); + uint16_t endgrpid = (dw11 >> 16) & 0xffff; + struct nvme_stats stats = {}; + NvmeEndGrpLog info = {}; + int i; + + if (!n->subsys || endgrpid != 0x1) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (off >= sizeof(info)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { + NvmeNamespace *ns = nvme_subsys_ns(n->subsys, i); + if (!ns) { + continue; + } + + nvme_set_blk_stats(ns, &stats); + } + + info.data_units_read[0] = + cpu_to_le64(DIV_ROUND_UP(stats.units_read / 1000000000, 1000000000)); + info.data_units_written[0] = + cpu_to_le64(DIV_ROUND_UP(stats.units_written / 1000000000, 1000000000)); + info.media_units_written[0] = + cpu_to_le64(DIV_ROUND_UP(stats.units_written / 1000000000, 1000000000)); + + info.host_read_commands[0] = cpu_to_le64(stats.read_commands); + info.host_write_commands[0] = cpu_to_le64(stats.write_commands); + + buf_len = MIN(sizeof(info) - off, buf_len); + + return nvme_c2h(n, (uint8_t *)&info + off, buf_len, req); +} + + static uint16_t nvme_fw_log_info(NvmeCtrl *n, uint32_t buf_len, uint64_t off, NvmeRequest *req) { @@ -4577,6 +4907,207 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len, return nvme_c2h(n, ((uint8_t *)&log) + off, trans_len, req); } +static size_t sizeof_fdp_conf_descr(size_t nruh, size_t vss) +{ + size_t entry_siz = sizeof(NvmeFdpDescrHdr) + nruh * sizeof(NvmeRuhDescr) + + vss; + return ROUND_UP(entry_siz, 8); +} + +static uint16_t nvme_fdp_confs(NvmeCtrl *n, uint32_t endgrpid, uint32_t buf_len, + uint64_t off, NvmeRequest *req) +{ + uint32_t log_size, trans_len; + g_autofree uint8_t *buf = NULL; + NvmeFdpDescrHdr *hdr; + NvmeRuhDescr *ruhd; + NvmeEnduranceGroup *endgrp; + NvmeFdpConfsHdr *log; + size_t nruh, fdp_descr_size; + int i; + + if (endgrpid != 1 || !n->subsys) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + endgrp = &n->subsys->endgrp; + + if (endgrp->fdp.enabled) { + nruh = endgrp->fdp.nruh; + } else { + nruh = 1; + } + + fdp_descr_size = sizeof_fdp_conf_descr(nruh, FDPVSS); + log_size = sizeof(NvmeFdpConfsHdr) + fdp_descr_size; + + if (off >= log_size) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + trans_len = MIN(log_size - off, buf_len); + + buf = g_malloc0(log_size); + log = (NvmeFdpConfsHdr *)buf; + hdr = (NvmeFdpDescrHdr *)(log + 1); + ruhd = (NvmeRuhDescr *)(buf + sizeof(*log) + sizeof(*hdr)); + + log->num_confs = cpu_to_le16(0); + log->size = cpu_to_le32(log_size); + + hdr->descr_size = cpu_to_le16(fdp_descr_size); + if (endgrp->fdp.enabled) { + hdr->fdpa = FIELD_DP8(hdr->fdpa, FDPA, VALID, 1); + hdr->fdpa = FIELD_DP8(hdr->fdpa, FDPA, RGIF, endgrp->fdp.rgif); + hdr->nrg = cpu_to_le16(endgrp->fdp.nrg); + hdr->nruh = cpu_to_le16(endgrp->fdp.nruh); + hdr->maxpids = cpu_to_le16(NVME_FDP_MAXPIDS - 1); + hdr->nnss = cpu_to_le32(NVME_MAX_NAMESPACES); + hdr->runs = cpu_to_le64(endgrp->fdp.runs); + + for (i = 0; i < nruh; i++) { + ruhd->ruht = NVME_RUHT_INITIALLY_ISOLATED; + ruhd++; + } + } else { + /* 1 bit for RUH in PIF -> 2 RUHs max. */ + hdr->nrg = cpu_to_le16(1); + hdr->nruh = cpu_to_le16(1); + hdr->maxpids = cpu_to_le16(NVME_FDP_MAXPIDS - 1); + hdr->nnss = cpu_to_le32(1); + hdr->runs = cpu_to_le64(96 * MiB); + + ruhd->ruht = NVME_RUHT_INITIALLY_ISOLATED; + } + + return nvme_c2h(n, (uint8_t *)buf + off, trans_len, req); +} + +static uint16_t nvme_fdp_ruh_usage(NvmeCtrl *n, uint32_t endgrpid, + uint32_t dw10, uint32_t dw12, + uint32_t buf_len, uint64_t off, + NvmeRequest *req) +{ + NvmeRuHandle *ruh; + NvmeRuhuLog *hdr; + NvmeRuhuDescr *ruhud; + NvmeEnduranceGroup *endgrp; + g_autofree uint8_t *buf = NULL; + uint32_t log_size, trans_len; + uint16_t i; + + if (endgrpid != 1 || !n->subsys) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + endgrp = &n->subsys->endgrp; + + if (!endgrp->fdp.enabled) { + return NVME_FDP_DISABLED | NVME_DNR; + } + + log_size = sizeof(NvmeRuhuLog) + endgrp->fdp.nruh * sizeof(NvmeRuhuDescr); + + if (off >= log_size) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + trans_len = MIN(log_size - off, buf_len); + + buf = g_malloc0(log_size); + hdr = (NvmeRuhuLog *)buf; + ruhud = (NvmeRuhuDescr *)(hdr + 1); + + ruh = endgrp->fdp.ruhs; + hdr->nruh = cpu_to_le16(endgrp->fdp.nruh); + + for (i = 0; i < endgrp->fdp.nruh; i++, ruhud++, ruh++) { + ruhud->ruha = ruh->ruha; + } + + return nvme_c2h(n, (uint8_t *)buf + off, trans_len, req); +} + +static uint16_t nvme_fdp_stats(NvmeCtrl *n, uint32_t endgrpid, uint32_t buf_len, + uint64_t off, NvmeRequest *req) +{ + NvmeEnduranceGroup *endgrp; + NvmeFdpStatsLog log = {}; + uint32_t trans_len; + + if (off >= sizeof(NvmeFdpStatsLog)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (endgrpid != 1 || !n->subsys) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (!n->subsys->endgrp.fdp.enabled) { + return NVME_FDP_DISABLED | NVME_DNR; + } + + endgrp = &n->subsys->endgrp; + + trans_len = MIN(sizeof(log) - off, buf_len); + + /* spec value is 128 bit, we only use 64 bit */ + log.hbmw[0] = cpu_to_le64(endgrp->fdp.hbmw); + log.mbmw[0] = cpu_to_le64(endgrp->fdp.mbmw); + log.mbe[0] = cpu_to_le64(endgrp->fdp.mbe); + + return nvme_c2h(n, (uint8_t *)&log + off, trans_len, req); +} + +static uint16_t nvme_fdp_events(NvmeCtrl *n, uint32_t endgrpid, + uint32_t buf_len, uint64_t off, + NvmeRequest *req) +{ + NvmeEnduranceGroup *endgrp; + NvmeCmd *cmd = &req->cmd; + bool host_events = (cmd->cdw10 >> 8) & 0x1; + uint32_t log_size, trans_len; + NvmeFdpEventBuffer *ebuf; + g_autofree NvmeFdpEventsLog *elog = NULL; + NvmeFdpEvent *event; + + if (endgrpid != 1 || !n->subsys) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + endgrp = &n->subsys->endgrp; + + if (!endgrp->fdp.enabled) { + return NVME_FDP_DISABLED | NVME_DNR; + } + + if (host_events) { + ebuf = &endgrp->fdp.host_events; + } else { + ebuf = &endgrp->fdp.ctrl_events; + } + + log_size = sizeof(NvmeFdpEventsLog) + ebuf->nelems * sizeof(NvmeFdpEvent); + trans_len = MIN(log_size - off, buf_len); + elog = g_malloc0(log_size); + elog->num_events = cpu_to_le32(ebuf->nelems); + event = (NvmeFdpEvent *)(elog + 1); + + if (ebuf->nelems && ebuf->start == ebuf->next) { + unsigned int nelems = (NVME_FDP_MAX_EVENTS - ebuf->start); + /* wrap over, copy [start;NVME_FDP_MAX_EVENTS[ and [0; next[ */ + memcpy(event, &ebuf->events[ebuf->start], + sizeof(NvmeFdpEvent) * nelems); + memcpy(event + nelems, ebuf->events, + sizeof(NvmeFdpEvent) * ebuf->next); + } else if (ebuf->start < ebuf->next) { + memcpy(event, &ebuf->events[ebuf->start], + sizeof(NvmeFdpEvent) * (ebuf->next - ebuf->start)); + } + + return nvme_c2h(n, (uint8_t *)elog + off, trans_len, req); +} + static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) { NvmeCmd *cmd = &req->cmd; @@ -4589,13 +5120,14 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) uint8_t lsp = (dw10 >> 8) & 0xf; uint8_t rae = (dw10 >> 15) & 0x1; uint8_t csi = le32_to_cpu(cmd->cdw14) >> 24; - uint32_t numdl, numdu; + uint32_t numdl, numdu, lspi; uint64_t off, lpol, lpou; size_t len; uint16_t status; numdl = (dw10 >> 16); numdu = (dw11 & 0xffff); + lspi = (dw11 >> 16); lpol = dw12; lpou = dw13; @@ -4624,6 +5156,16 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req) return nvme_changed_nslist(n, rae, len, off, req); case NVME_LOG_CMD_EFFECTS: return nvme_cmd_effects(n, csi, len, off, req); + case NVME_LOG_ENDGRP: + return nvme_endgrp_info(n, rae, len, off, req); + case NVME_LOG_FDP_CONFS: + return nvme_fdp_confs(n, lspi, len, off, req); + case NVME_LOG_FDP_RUH_USAGE: + return nvme_fdp_ruh_usage(n, lspi, dw10, dw12, len, off, req); + case NVME_LOG_FDP_STATS: + return nvme_fdp_stats(n, lspi, len, off, req); + case NVME_LOG_FDP_EVENTS: + return nvme_fdp_events(n, lspi, len, off, req); default: trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid); return NVME_INVALID_FIELD | NVME_DNR; @@ -5210,6 +5752,84 @@ static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeRequest *req) return nvme_c2h(n, (uint8_t *)×tamp, sizeof(timestamp), req); } +static int nvme_get_feature_fdp(NvmeCtrl *n, uint32_t endgrpid, + uint32_t *result) +{ + *result = 0; + + if (!n->subsys || !n->subsys->endgrp.fdp.enabled) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + *result = FIELD_DP16(0, FEAT_FDP, FDPE, 1); + *result = FIELD_DP16(*result, FEAT_FDP, CONF_NDX, 0); + + return NVME_SUCCESS; +} + +static uint16_t nvme_get_feature_fdp_events(NvmeCtrl *n, NvmeNamespace *ns, + NvmeRequest *req, uint32_t *result) +{ + NvmeCmd *cmd = &req->cmd; + uint32_t cdw11 = le32_to_cpu(cmd->cdw11); + uint16_t ph = cdw11 & 0xffff; + uint8_t noet = (cdw11 >> 16) & 0xff; + uint16_t ruhid, ret; + uint32_t nentries = 0; + uint8_t s_events_ndx = 0; + size_t s_events_siz = sizeof(NvmeFdpEventDescr) * noet; + g_autofree NvmeFdpEventDescr *s_events = g_malloc0(s_events_siz); + NvmeRuHandle *ruh; + NvmeFdpEventDescr *s_event; + + if (!n->subsys || !n->subsys->endgrp.fdp.enabled) { + return NVME_FDP_DISABLED | NVME_DNR; + } + + if (!nvme_ph_valid(ns, ph)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + ruhid = ns->fdp.phs[ph]; + ruh = &n->subsys->endgrp.fdp.ruhs[ruhid]; + + assert(ruh); + + if (unlikely(noet == 0)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + for (uint8_t event_type = 0; event_type < FDP_EVT_MAX; event_type++) { + uint8_t shift = nvme_fdp_evf_shifts[event_type]; + if (!shift && event_type) { + /* + * only first entry (event_type == 0) has a shift value of 0 + * other entries are simply unpopulated. + */ + continue; + } + + nentries++; + + s_event = &s_events[s_events_ndx]; + s_event->evt = event_type; + s_event->evta = (ruh->event_filter >> shift) & 0x1; + + /* break if all `noet` entries are filled */ + if ((++s_events_ndx) == noet) { + break; + } + } + + ret = nvme_c2h(n, s_events, s_events_siz, req); + if (ret) { + return ret; + } + + *result = nentries; + return NVME_SUCCESS; +} + static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req) { NvmeCmd *cmd = &req->cmd; @@ -5222,6 +5842,7 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req) uint16_t iv; NvmeNamespace *ns; int i; + uint16_t endgrpid = 0, ret = NVME_SUCCESS; static const uint32_t nvme_feature_default[NVME_FID_MAX] = { [NVME_ARBITRATION] = NVME_ARB_AB_NOLIMIT, @@ -5319,6 +5940,33 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req) case NVME_HOST_BEHAVIOR_SUPPORT: return nvme_c2h(n, (uint8_t *)&n->features.hbs, sizeof(n->features.hbs), req); + case NVME_FDP_MODE: + endgrpid = dw11 & 0xff; + + if (endgrpid != 0x1) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + ret = nvme_get_feature_fdp(n, endgrpid, &result); + if (ret) { + return ret; + } + goto out; + case NVME_FDP_EVENTS: + if (!nvme_nsid_valid(n, nsid)) { + return NVME_INVALID_NSID | NVME_DNR; + } + + ns = nvme_ns(n, nsid); + if (unlikely(!ns)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + ret = nvme_get_feature_fdp_events(n, ns, req, &result); + if (ret) { + return ret; + } + goto out; default: break; } @@ -5352,6 +6000,20 @@ defaults: result |= NVME_INTVC_NOCOALESCING; } break; + case NVME_FDP_MODE: + endgrpid = dw11 & 0xff; + + if (endgrpid != 0x1) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + ret = nvme_get_feature_fdp(n, endgrpid, &result); + if (ret) { + return ret; + } + goto out; + + break; default: result = nvme_feature_default[fid]; break; @@ -5359,7 +6021,7 @@ defaults: out: req->cqe.result = cpu_to_le32(result); - return NVME_SUCCESS; + return ret; } static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeRequest *req) @@ -5377,6 +6039,51 @@ static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeRequest *req) return NVME_SUCCESS; } +static uint16_t nvme_set_feature_fdp_events(NvmeCtrl *n, NvmeNamespace *ns, + NvmeRequest *req) +{ + NvmeCmd *cmd = &req->cmd; + uint32_t cdw11 = le32_to_cpu(cmd->cdw11); + uint16_t ph = cdw11 & 0xffff; + uint8_t noet = (cdw11 >> 16) & 0xff; + uint16_t ret, ruhid; + uint8_t enable = le32_to_cpu(cmd->cdw12) & 0x1; + uint8_t event_mask = 0; + unsigned int i; + g_autofree uint8_t *events = g_malloc0(noet); + NvmeRuHandle *ruh = NULL; + + assert(ns); + + if (!n->subsys || !n->subsys->endgrp.fdp.enabled) { + return NVME_FDP_DISABLED | NVME_DNR; + } + + if (!nvme_ph_valid(ns, ph)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + ruhid = ns->fdp.phs[ph]; + ruh = &n->subsys->endgrp.fdp.ruhs[ruhid]; + + ret = nvme_h2c(n, events, noet, req); + if (ret) { + return ret; + } + + for (i = 0; i < noet; i++) { + event_mask |= (1 << nvme_fdp_evf_shifts[events[i]]); + } + + if (enable) { + ruh->event_filter |= event_mask; + } else { + ruh->event_filter = ruh->event_filter & ~event_mask; + } + + return NVME_SUCCESS; +} + static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req) { NvmeNamespace *ns = NULL; @@ -5536,6 +6243,11 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req) return NVME_CMD_SET_CMB_REJECTED | NVME_DNR; } break; + case NVME_FDP_MODE: + /* spec: abort with cmd seq err if there's one or more NS' in endgrp */ + return NVME_CMD_SEQ_ERROR | NVME_DNR; + case NVME_FDP_EVENTS: + return nvme_set_feature_fdp_events(n, ns, req); default: return NVME_FEAT_NOT_CHANGEABLE | NVME_DNR; } @@ -6104,6 +6816,61 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req) return NVME_SUCCESS; } +static uint16_t nvme_directive_send(NvmeCtrl *n, NvmeRequest *req) +{ + return NVME_INVALID_FIELD | NVME_DNR; +} + +static uint16_t nvme_directive_receive(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeNamespace *ns; + uint32_t dw10 = le32_to_cpu(req->cmd.cdw10); + uint32_t dw11 = le32_to_cpu(req->cmd.cdw11); + uint32_t nsid = le32_to_cpu(req->cmd.nsid); + uint8_t doper, dtype; + uint32_t numd, trans_len; + NvmeDirectiveIdentify id = { + .supported = 1 << NVME_DIRECTIVE_IDENTIFY, + .enabled = 1 << NVME_DIRECTIVE_IDENTIFY, + }; + + numd = dw10 + 1; + doper = dw11 & 0xff; + dtype = (dw11 >> 8) & 0xff; + + trans_len = MIN(sizeof(NvmeDirectiveIdentify), numd << 2); + + if (nsid == NVME_NSID_BROADCAST || dtype != NVME_DIRECTIVE_IDENTIFY || + doper != NVME_DIRECTIVE_RETURN_PARAMS) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + ns = nvme_ns(n, nsid); + if (!ns) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + switch (dtype) { + case NVME_DIRECTIVE_IDENTIFY: + switch (doper) { + case NVME_DIRECTIVE_RETURN_PARAMS: + if (ns->endgrp->fdp.enabled) { + id.supported |= 1 << NVME_DIRECTIVE_DATA_PLACEMENT; + id.enabled |= 1 << NVME_DIRECTIVE_DATA_PLACEMENT; + id.persistent |= 1 << NVME_DIRECTIVE_DATA_PLACEMENT; + } + + return nvme_c2h(n, (uint8_t *)&id, trans_len, req); + + default: + return NVME_INVALID_FIELD | NVME_DNR; + } + + default: + return NVME_INVALID_FIELD; + } +} + static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) { trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode, @@ -6152,6 +6919,10 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) return nvme_dbbuf_config(n, req); case NVME_ADM_CMD_FORMAT_NVM: return nvme_format(n, req); + case NVME_ADM_CMD_DIRECTIVE_SEND: + return nvme_directive_send(n, req); + case NVME_ADM_CMD_DIRECTIVE_RECV: + return nvme_directive_receive(n, req); default: assert(false); } @@ -7380,6 +8151,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) uint8_t *pci_conf = pci_dev->config; uint64_t cap = ldq_le_p(&n->bar.cap); NvmeSecCtrlEntry *sctrl = nvme_sctrl(n); + uint32_t ctratt; id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); @@ -7390,7 +8162,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->cntlid = cpu_to_le16(n->cntlid); id->oaes = cpu_to_le32(NVME_OAES_NS_ATTR); - id->ctratt |= cpu_to_le32(NVME_CTRATT_ELBAS); + ctratt = NVME_CTRATT_ELBAS; id->rab = 6; @@ -7407,7 +8179,8 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->mdts = n->params.mdts; id->ver = cpu_to_le32(NVME_SPEC_VER); id->oacs = - cpu_to_le16(NVME_OACS_NS_MGMT | NVME_OACS_FORMAT | NVME_OACS_DBBUF); + cpu_to_le16(NVME_OACS_NS_MGMT | NVME_OACS_FORMAT | NVME_OACS_DBBUF | + NVME_OACS_DIRECTIVES); id->cntrltype = 0x1; /* @@ -7457,8 +8230,17 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) if (n->subsys) { id->cmic |= NVME_CMIC_MULTI_CTRL; + ctratt |= NVME_CTRATT_ENDGRPS; + + id->endgidmax = cpu_to_le16(0x1); + + if (n->subsys->endgrp.fdp.enabled) { + ctratt |= NVME_CTRATT_FDPS; + } } + id->ctratt = cpu_to_le32(ctratt); + NVME_CAP_SET_MQES(cap, 0x7ff); NVME_CAP_SET_CQR(cap, 1); NVME_CAP_SET_TO(cap, 0xf); diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c index 62a1f97be0..cfac960dcf 100644 --- a/hw/nvme/ns.c +++ b/hw/nvme/ns.c @@ -14,8 +14,10 @@ #include "qemu/osdep.h" #include "qemu/units.h" +#include "qemu/cutils.h" #include "qemu/error-report.h" #include "qapi/error.h" +#include "qemu/bitops.h" #include "sysemu/sysemu.h" #include "sysemu/block-backend.h" @@ -377,6 +379,130 @@ static void nvme_zoned_ns_shutdown(NvmeNamespace *ns) assert(ns->nr_open_zones == 0); } +static NvmeRuHandle *nvme_find_ruh_by_attr(NvmeEnduranceGroup *endgrp, + uint8_t ruha, uint16_t *ruhid) +{ + for (uint16_t i = 0; i < endgrp->fdp.nruh; i++) { + NvmeRuHandle *ruh = &endgrp->fdp.ruhs[i]; + + if (ruh->ruha == ruha) { + *ruhid = i; + return ruh; + } + } + + return NULL; +} + +static bool nvme_ns_init_fdp(NvmeNamespace *ns, Error **errp) +{ + NvmeEnduranceGroup *endgrp = ns->endgrp; + NvmeRuHandle *ruh; + uint8_t lbafi = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas); + unsigned int *ruhid, *ruhids; + char *r, *p, *token; + uint16_t *ph; + + if (!ns->params.fdp.ruhs) { + ns->fdp.nphs = 1; + ph = ns->fdp.phs = g_new(uint16_t, 1); + + ruh = nvme_find_ruh_by_attr(endgrp, NVME_RUHA_CTRL, ph); + if (!ruh) { + ruh = nvme_find_ruh_by_attr(endgrp, NVME_RUHA_UNUSED, ph); + if (!ruh) { + error_setg(errp, "no unused reclaim unit handles left"); + return false; + } + + ruh->ruha = NVME_RUHA_CTRL; + ruh->lbafi = lbafi; + ruh->ruamw = endgrp->fdp.runs >> ns->lbaf.ds; + + for (uint16_t rg = 0; rg < endgrp->fdp.nrg; rg++) { + ruh->rus[rg].ruamw = ruh->ruamw; + } + } else if (ruh->lbafi != lbafi) { + error_setg(errp, "lba format index of controller assigned " + "reclaim unit handle does not match namespace lba " + "format index"); + return false; + } + + return true; + } + + ruhid = ruhids = g_new0(unsigned int, endgrp->fdp.nruh); + r = p = strdup(ns->params.fdp.ruhs); + + /* parse the placement handle identifiers */ + while ((token = qemu_strsep(&p, ";")) != NULL) { + ns->fdp.nphs += 1; + if (ns->fdp.nphs > NVME_FDP_MAXPIDS || + ns->fdp.nphs == endgrp->fdp.nruh) { + error_setg(errp, "too many placement handles"); + free(r); + return false; + } + + if (qemu_strtoui(token, NULL, 0, ruhid++) < 0) { + error_setg(errp, "cannot parse reclaim unit handle identifier"); + free(r); + return false; + } + } + + free(r); + + ph = ns->fdp.phs = g_new(uint16_t, ns->fdp.nphs); + + ruhid = ruhids; + + /* verify the identifiers */ + for (unsigned int i = 0; i < ns->fdp.nphs; i++, ruhid++, ph++) { + if (*ruhid >= endgrp->fdp.nruh) { + error_setg(errp, "invalid reclaim unit handle identifier"); + return false; + } + + ruh = &endgrp->fdp.ruhs[*ruhid]; + + switch (ruh->ruha) { + case NVME_RUHA_UNUSED: + ruh->ruha = NVME_RUHA_HOST; + ruh->lbafi = lbafi; + ruh->ruamw = endgrp->fdp.runs >> ns->lbaf.ds; + + for (uint16_t rg = 0; rg < endgrp->fdp.nrg; rg++) { + ruh->rus[rg].ruamw = ruh->ruamw; + } + + break; + + case NVME_RUHA_HOST: + if (ruh->lbafi != lbafi) { + error_setg(errp, "lba format index of host assigned" + "reclaim unit handle does not match namespace " + "lba format index"); + return false; + } + + break; + + case NVME_RUHA_CTRL: + error_setg(errp, "reclaim unit handle is controller assigned"); + return false; + + default: + abort(); + } + + *ph = *ruhid; + } + + return true; +} + static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp) { unsigned int pi_size; @@ -417,6 +543,11 @@ static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp) return -1; } + if (ns->params.zoned && ns->endgrp && ns->endgrp->fdp.enabled) { + error_setg(errp, "cannot be a zoned- in an FDP configuration"); + return -1; + } + if (ns->params.zoned) { if (ns->params.max_active_zones) { if (ns->params.max_open_zones > ns->params.max_active_zones) { @@ -502,6 +633,12 @@ int nvme_ns_setup(NvmeNamespace *ns, Error **errp) nvme_ns_init_zoned(ns); } + if (ns->endgrp && ns->endgrp->fdp.enabled) { + if (!nvme_ns_init_fdp(ns, errp)) { + return -1; + } + } + return 0; } @@ -525,6 +662,10 @@ void nvme_ns_cleanup(NvmeNamespace *ns) g_free(ns->zone_array); g_free(ns->zd_extensions); } + + if (ns->endgrp && ns->endgrp->fdp.enabled) { + g_free(ns->fdp.phs); + } } static void nvme_ns_unrealize(DeviceState *dev) @@ -561,6 +702,8 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp) if (!qdev_set_parent_bus(dev, &subsys->bus.parent_bus, errp)) { return; } + ns->subsys = subsys; + ns->endgrp = &subsys->endgrp; } if (nvme_ns_setup(ns, errp)) { @@ -591,6 +734,8 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp) if (subsys) { subsys->namespaces[nsid] = ns; + ns->id_ns.endgid = cpu_to_le16(0x1); + if (ns->params.detached) { return; } @@ -606,6 +751,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp) return; } + } nvme_attach_ns(n, ns); @@ -644,6 +790,7 @@ static Property nvme_ns_props[] = { DEFINE_PROP_SIZE("zoned.zrwafg", NvmeNamespace, params.zrwafg, -1), DEFINE_PROP_BOOL("eui64-default", NvmeNamespace, params.eui64_default, false), + DEFINE_PROP_STRING("fdp.ruhs", NvmeNamespace, params.fdp.ruhs), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index 16da27a69b..209e8f5b4c 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -27,6 +27,8 @@ #define NVME_MAX_CONTROLLERS 256 #define NVME_MAX_NAMESPACES 256 #define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000) +#define NVME_FDP_MAX_EVENTS 63 +#define NVME_FDP_MAXPIDS 128 QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1); @@ -45,17 +47,68 @@ typedef struct NvmeBus { OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS) #define SUBSYS_SLOT_RSVD (void *)0xFFFF +typedef struct NvmeReclaimUnit { + uint64_t ruamw; +} NvmeReclaimUnit; + +typedef struct NvmeRuHandle { + uint8_t ruht; + uint8_t ruha; + uint64_t event_filter; + uint8_t lbafi; + uint64_t ruamw; + + /* reclaim units indexed by reclaim group */ + NvmeReclaimUnit *rus; +} NvmeRuHandle; + +typedef struct NvmeFdpEventBuffer { + NvmeFdpEvent events[NVME_FDP_MAX_EVENTS]; + unsigned int nelems; + unsigned int start; + unsigned int next; +} NvmeFdpEventBuffer; + +typedef struct NvmeEnduranceGroup { + uint8_t event_conf; + + struct { + NvmeFdpEventBuffer host_events, ctrl_events; + + uint16_t nruh; + uint16_t nrg; + uint8_t rgif; + uint64_t runs; + + uint64_t hbmw; + uint64_t mbmw; + uint64_t mbe; + + bool enabled; + + NvmeRuHandle *ruhs; + } fdp; +} NvmeEnduranceGroup; + typedef struct NvmeSubsystem { DeviceState parent_obj; NvmeBus bus; uint8_t subnqn[256]; char *serial; - NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS]; - NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1]; + NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS]; + NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1]; + NvmeEnduranceGroup endgrp; struct { char *nqn; + + struct { + bool enabled; + uint64_t runs; + uint16_t nruh; + uint32_t nrg; + } fdp; } params; } NvmeSubsystem; @@ -96,6 +149,21 @@ typedef struct NvmeZone { QTAILQ_ENTRY(NvmeZone) entry; } NvmeZone; +#define FDP_EVT_MAX 0xff +#define NVME_FDP_MAX_NS_RUHS 32u +#define FDPVSS 0 + +static const uint8_t nvme_fdp_evf_shifts[FDP_EVT_MAX] = { + /* Host events */ + [FDP_EVT_RU_NOT_FULLY_WRITTEN] = 0, + [FDP_EVT_RU_ATL_EXCEEDED] = 1, + [FDP_EVT_CTRL_RESET_RUH] = 2, + [FDP_EVT_INVALID_PID] = 3, + /* CTRL events */ + [FDP_EVT_MEDIA_REALLOC] = 32, + [FDP_EVT_RUH_IMPLICIT_RU_CHANGE] = 33, +}; + typedef struct NvmeNamespaceParams { bool detached; bool shared; @@ -125,6 +193,10 @@ typedef struct NvmeNamespaceParams { uint32_t numzrwa; uint64_t zrwas; uint64_t zrwafg; + + struct { + char *ruhs; + } fdp; } NvmeNamespaceParams; typedef struct NvmeNamespace { @@ -167,10 +239,18 @@ typedef struct NvmeNamespace { int32_t nr_active_zones; NvmeNamespaceParams params; + NvmeSubsystem *subsys; + NvmeEnduranceGroup *endgrp; struct { uint32_t err_rec; } features; + + struct { + uint16_t nphs; + /* reclaim unit handle identifiers indexed by placement handle */ + uint16_t *phs; + } fdp; } NvmeNamespace; static inline uint32_t nvme_nsid(NvmeNamespace *ns) @@ -274,6 +354,12 @@ static inline void nvme_aor_dec_active(NvmeNamespace *ns) assert(ns->nr_active_zones >= 0); } +static inline void nvme_fdp_stat_inc(uint64_t *a, uint64_t b) +{ + uint64_t ret = *a + b; + *a = ret < *a ? UINT64_MAX : ret; +} + void nvme_ns_init_format(NvmeNamespace *ns); int nvme_ns_setup(NvmeNamespace *ns, Error **errp); void nvme_ns_drain(NvmeNamespace *ns); @@ -340,7 +426,9 @@ static inline const char *nvme_adm_opc_str(uint8_t opc) case NVME_ADM_CMD_GET_FEATURES: return "NVME_ADM_CMD_GET_FEATURES"; case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ"; case NVME_ADM_CMD_NS_ATTACHMENT: return "NVME_ADM_CMD_NS_ATTACHMENT"; + case NVME_ADM_CMD_DIRECTIVE_SEND: return "NVME_ADM_CMD_DIRECTIVE_SEND"; case NVME_ADM_CMD_VIRT_MNGMT: return "NVME_ADM_CMD_VIRT_MNGMT"; + case NVME_ADM_CMD_DIRECTIVE_RECV: return "NVME_ADM_CMD_DIRECTIVE_RECV"; case NVME_ADM_CMD_DBBUF_CONFIG: return "NVME_ADM_CMD_DBBUF_CONFIG"; case NVME_ADM_CMD_FORMAT_NVM: return "NVME_ADM_CMD_FORMAT_NVM"; default: return "NVME_ADM_CMD_UNKNOWN"; diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c index 9d2643678b..24ddec860e 100644 --- a/hw/nvme/subsys.c +++ b/hw/nvme/subsys.c @@ -7,10 +7,13 @@ */ #include "qemu/osdep.h" +#include "qemu/units.h" #include "qapi/error.h" #include "nvme.h" +#define NVME_DEFAULT_RU_SIZE (96 * MiB) + static int nvme_subsys_reserve_cntlids(NvmeCtrl *n, int start, int num) { NvmeSubsystem *subsys = n->subsys; @@ -109,13 +112,95 @@ void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n) n->cntlid = -1; } -static void nvme_subsys_setup(NvmeSubsystem *subsys) +static bool nvme_calc_rgif(uint16_t nruh, uint16_t nrg, uint8_t *rgif) +{ + uint16_t val; + unsigned int i; + + if (unlikely(nrg == 1)) { + /* PIDRG_NORGI scenario, all of pid is used for PHID */ + *rgif = 0; + return true; + } + + val = nrg; + i = 0; + while (val) { + val >>= 1; + i++; + } + *rgif = i; + + /* ensure remaining bits suffice to represent number of phids in a RG */ + if (unlikely((UINT16_MAX >> i) < nruh)) { + *rgif = 0; + return false; + } + + return true; +} + +static bool nvme_subsys_setup_fdp(NvmeSubsystem *subsys, Error **errp) +{ + NvmeEnduranceGroup *endgrp = &subsys->endgrp; + + if (!subsys->params.fdp.runs) { + error_setg(errp, "fdp.runs must be non-zero"); + return false; + } + + endgrp->fdp.runs = subsys->params.fdp.runs; + + if (!subsys->params.fdp.nrg) { + error_setg(errp, "fdp.nrg must be non-zero"); + return false; + } + + endgrp->fdp.nrg = subsys->params.fdp.nrg; + + if (!subsys->params.fdp.nruh) { + error_setg(errp, "fdp.nruh must be non-zero"); + return false; + } + + endgrp->fdp.nruh = subsys->params.fdp.nruh; + + if (!nvme_calc_rgif(endgrp->fdp.nruh, endgrp->fdp.nrg, &endgrp->fdp.rgif)) { + error_setg(errp, + "cannot derive a valid rgif (nruh %"PRIu16" nrg %"PRIu32")", + endgrp->fdp.nruh, endgrp->fdp.nrg); + return false; + } + + endgrp->fdp.ruhs = g_new(NvmeRuHandle, endgrp->fdp.nruh); + + for (uint16_t ruhid = 0; ruhid < endgrp->fdp.nruh; ruhid++) { + endgrp->fdp.ruhs[ruhid] = (NvmeRuHandle) { + .ruht = NVME_RUHT_INITIALLY_ISOLATED, + .ruha = NVME_RUHA_UNUSED, + }; + + endgrp->fdp.ruhs[ruhid].rus = g_new(NvmeReclaimUnit, endgrp->fdp.nrg); + } + + endgrp->fdp.enabled = true; + + return true; +} + +static bool nvme_subsys_setup(NvmeSubsystem *subsys, Error **errp) { const char *nqn = subsys->params.nqn ? subsys->params.nqn : subsys->parent_obj.id; snprintf((char *)subsys->subnqn, sizeof(subsys->subnqn), "nqn.2019-08.org.qemu:%s", nqn); + + if (subsys->params.fdp.enabled && !nvme_subsys_setup_fdp(subsys, errp)) { + return false; + } + + return true; } static void nvme_subsys_realize(DeviceState *dev, Error **errp) @@ -124,11 +209,16 @@ static void nvme_subsys_realize(DeviceState *dev, Error **errp) qbus_init(&subsys->bus, sizeof(NvmeBus), TYPE_NVME_BUS, dev, dev->id); - nvme_subsys_setup(subsys); + nvme_subsys_setup(subsys, errp); } static Property nvme_subsystem_props[] = { DEFINE_PROP_STRING("nqn", NvmeSubsystem, params.nqn), + DEFINE_PROP_BOOL("fdp", NvmeSubsystem, params.fdp.enabled, false), + DEFINE_PROP_SIZE("fdp.runs", NvmeSubsystem, params.fdp.runs, + NVME_DEFAULT_RU_SIZE), + DEFINE_PROP_UINT32("fdp.nrg", NvmeSubsystem, params.fdp.nrg, 1), + DEFINE_PROP_UINT16("fdp.nruh", NvmeSubsystem, params.fdp.nruh, 0), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events index b16f2260b4..7f7837e1a2 100644 --- a/hw/nvme/trace-events +++ b/hw/nvme/trace-events @@ -117,6 +117,7 @@ pci_nvme_clear_ns_reset(uint32_t state, uint64_t slba) "zone state=%"PRIu32", sl pci_nvme_zoned_zrwa_implicit_flush(uint64_t zslba, uint32_t nlb) "zslba 0x%"PRIx64" nlb %"PRIu32"" pci_nvme_pci_reset(void) "PCI Function Level Reset" pci_nvme_virt_mngmt(uint16_t cid, uint16_t act, uint16_t cntlid, const char* rt, uint16_t nr) "cid %"PRIu16", act=0x%"PRIx16", ctrlid=%"PRIu16" %s nr=%"PRIu16"" +pci_nvme_fdp_ruh_change(uint16_t rgid, uint16_t ruhid) "change RU on RUH rgid=%"PRIu16", ruhid=%"PRIu16"" # error conditions pci_nvme_err_mdts(size_t len) "len %zu" diff --git a/hw/nvram/eeprom_at24c.c b/hw/nvram/eeprom_at24c.c index 3328c32814..613c4929e3 100644 --- a/hw/nvram/eeprom_at24c.c +++ b/hw/nvram/eeprom_at24c.c @@ -41,6 +41,13 @@ struct EEPROMState { uint16_t cur; /* total size in bytes */ uint32_t rsize; + /* + * address byte number + * for 24c01, 24c02 size <= 256 byte, use only 1 byte + * otherwise size > 256, use 2 byte + */ + uint8_t asize; + bool writable; /* cells changed since last START? */ bool changed; @@ -91,7 +98,11 @@ uint8_t at24c_eeprom_recv(I2CSlave *s) EEPROMState *ee = AT24C_EE(s); uint8_t ret; - if (ee->haveaddr == 1) { + /* + * If got the byte address but not completely with address size + * will return the invalid value + */ + if (ee->haveaddr > 0 && ee->haveaddr < ee->asize) { return 0xff; } @@ -108,11 +119,11 @@ int at24c_eeprom_send(I2CSlave *s, uint8_t data) { EEPROMState *ee = AT24C_EE(s); - if (ee->haveaddr < 2) { + if (ee->haveaddr < ee->asize) { ee->cur <<= 8; ee->cur |= data; ee->haveaddr++; - if (ee->haveaddr == 2) { + if (ee->haveaddr == ee->asize) { ee->cur %= ee->rsize; DPRINTK("Set pointer %04x\n", ee->cur); } @@ -199,6 +210,18 @@ static void at24c_eeprom_realize(DeviceState *dev, Error **errp) } DPRINTK("Reset read backing file\n"); } + + /* + * If address size didn't define with property set + * value is 0 as default, setting it by Rom size detecting. + */ + if (ee->asize == 0) { + if (ee->rsize <= 256) { + ee->asize = 1; + } else { + ee->asize = 2; + } + } } static @@ -213,6 +236,7 @@ void at24c_eeprom_reset(DeviceState *state) static Property at24c_eeprom_props[] = { DEFINE_PROP_UINT32("rom-size", EEPROMState, rsize, 0), + DEFINE_PROP_UINT8("address-size", EEPROMState, asize, 0), DEFINE_PROP_BOOL("writable", EEPROMState, writable, true), DEFINE_PROP_DRIVE("drive", EEPROMState, blk), DEFINE_PROP_END_OF_LIST() diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c index 432754eda4..29a5bef1d5 100644 --- a/hw/nvram/fw_cfg.c +++ b/hw/nvram/fw_cfg.c @@ -693,12 +693,12 @@ static const VMStateDescription vmstate_fw_cfg = { } }; -void fw_cfg_add_bytes_callback(FWCfgState *s, uint16_t key, - FWCfgCallback select_cb, - FWCfgWriteCallback write_cb, - void *callback_opaque, - void *data, size_t len, - bool read_only) +static void fw_cfg_add_bytes_callback(FWCfgState *s, uint16_t key, + FWCfgCallback select_cb, + FWCfgWriteCallback write_cb, + void *callback_opaque, + void *data, size_t len, + bool read_only) { int arch = !!(key & FW_CFG_ARCH_LOCAL); @@ -741,15 +741,6 @@ void fw_cfg_add_bytes(FWCfgState *s, uint16_t key, void *data, size_t len) fw_cfg_add_bytes_callback(s, key, NULL, NULL, NULL, data, len, true); } -void *fw_cfg_read_bytes_ptr(FWCfgState *s, uint16_t key) -{ - int arch = !!(key & FW_CFG_ARCH_LOCAL); - - key &= FW_CFG_ENTRY_MASK; - assert(key < fw_cfg_max_entry(s)); - return s->entries[arch][key].data; -} - void fw_cfg_add_string(FWCfgState *s, uint16_t key, const char *value) { size_t sz = strlen(value) + 1; diff --git a/hw/pci-bridge/cxl_downstream.c b/hw/pci-bridge/cxl_downstream.c index 3d4e6b59cd..54f507318f 100644 --- a/hw/pci-bridge/cxl_downstream.c +++ b/hw/pci-bridge/cxl_downstream.c @@ -15,7 +15,7 @@ #include "hw/pci/pcie_port.h" #include "qapi/error.h" -typedef struct CXLDownStreamPort { +typedef struct CXLDownstreamPort { /*< private >*/ PCIESlot parent_obj; diff --git a/hw/pci-host/pnv_phb.c b/hw/pci-host/pnv_phb.c index c62b08538a..82332d7a05 100644 --- a/hw/pci-host/pnv_phb.c +++ b/hw/pci-host/pnv_phb.c @@ -62,6 +62,15 @@ static bool pnv_parent_fixup(Object *parent, BusState *parent_bus, return true; } +static Object *pnv_phb_user_get_parent(PnvChip *chip, PnvPHB *phb, Error **errp) +{ + if (phb->version == 3) { + return OBJECT(pnv_chip_add_phb(chip, phb)); + } else { + return OBJECT(pnv_pec_add_phb(chip, phb, errp)); + } +} + /* * User created devices won't have the initial setup that default * devices have. This setup consists of assigning a parent device @@ -79,7 +88,7 @@ static bool pnv_phb_user_device_init(PnvPHB *phb, Error **errp) return false; } - parent = pnv_chip_add_phb(chip, phb, errp); + parent = pnv_phb_user_get_parent(chip, phb, errp); if (!parent) { return false; } diff --git a/hw/pci-host/pnv_phb4_pec.c b/hw/pci-host/pnv_phb4_pec.c index 43267a428f..3b2850f7a3 100644 --- a/hw/pci-host/pnv_phb4_pec.c +++ b/hw/pci-host/pnv_phb4_pec.c @@ -112,9 +112,50 @@ static const MemoryRegionOps pnv_pec_pci_xscom_ops = { .endianness = DEVICE_BIG_ENDIAN, }; -static void pnv_pec_default_phb_realize(PnvPhb4PecState *pec, - int stack_no, - Error **errp) +PnvPhb4PecState *pnv_pec_add_phb(PnvChip *chip, PnvPHB *phb, Error **errp) +{ + PnvPhb4PecState *pecs = NULL; + int chip_id = phb->chip_id; + int index = phb->phb_id; + int i, j; + + if (phb->version == 4) { + Pnv9Chip *chip9 = PNV9_CHIP(chip); + + pecs = chip9->pecs; + } else if (phb->version == 5) { + Pnv10Chip *chip10 = PNV10_CHIP(chip); + + pecs = chip10->pecs; + } else { + g_assert_not_reached(); + } + + for (i = 0; i < chip->num_pecs; i++) { + /* + * For each PEC, check the amount of phbs it supports + * and see if the given phb4 index matches an index. + */ + PnvPhb4PecState *pec = &pecs[i]; + + for (j = 0; j < pec->num_phbs; j++) { + if (index == pnv_phb4_pec_get_phb_id(pec, j)) { + pec->phbs[j] = phb; + phb->pec = pec; + return pec; + } + } + } + error_setg(errp, + "pnv-phb4 chip-id %d index %d didn't match any existing PEC", + chip_id, index); + + return NULL; +} + +static PnvPHB *pnv_pec_default_phb_realize(PnvPhb4PecState *pec, + int stack_no, + Error **errp) { PnvPHB *phb = PNV_PHB(qdev_new(TYPE_PNV_PHB)); int phb_id = pnv_phb4_pec_get_phb_id(pec, stack_no); @@ -128,8 +169,9 @@ static void pnv_pec_default_phb_realize(PnvPhb4PecState *pec, &error_fatal); if (!sysbus_realize(SYS_BUS_DEVICE(phb), errp)) { - return; + return NULL; } + return phb; } static void pnv_pec_realize(DeviceState *dev, Error **errp) @@ -148,8 +190,9 @@ static void pnv_pec_realize(DeviceState *dev, Error **errp) /* Create PHBs if running with defaults */ if (defaults_enabled()) { + g_assert(pec->num_phbs <= MAX_PHBS_PER_PEC); for (i = 0; i < pec->num_phbs; i++) { - pnv_pec_default_phb_realize(pec, i, errp); + pec->phbs[i] = pnv_pec_default_phb_realize(pec, i, errp); } } @@ -197,9 +240,12 @@ static int pnv_pec_dt_xscom(PnvXScomInterface *dev, void *fdt, pecc->compat_size))); for (i = 0; i < pec->num_phbs; i++) { - int phb_id = pnv_phb4_pec_get_phb_id(pec, i); int stk_offset; + if (!pec->phbs[i]) { + continue; + } + name = g_strdup_printf("stack@%x", i); stk_offset = fdt_add_subnode(fdt, offset, name); _FDT(stk_offset); @@ -207,7 +253,8 @@ static int pnv_pec_dt_xscom(PnvXScomInterface *dev, void *fdt, _FDT((fdt_setprop(fdt, stk_offset, "compatible", pecc->stk_compat, pecc->stk_compat_size))); _FDT((fdt_setprop_cell(fdt, stk_offset, "reg", i))); - _FDT((fdt_setprop_cell(fdt, stk_offset, "ibm,phb-index", phb_id))); + _FDT((fdt_setprop_cell(fdt, stk_offset, "ibm,phb-index", + pec->phbs[i]->phb_id))); } return 0; diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 10c980b9f5..034fe49e9a 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -282,9 +282,13 @@ static void pci_change_irq_level(PCIDevice *pci_dev, int irq_num, int change) { PCIBus *bus; for (;;) { + int dev_irq = irq_num; bus = pci_get_bus(pci_dev); assert(bus->map_irq); irq_num = bus->map_irq(pci_dev, irq_num); + trace_pci_route_irq(dev_irq, DEVICE(pci_dev)->canonical_path, irq_num, + pci_bus_is_root(bus) ? "root-complex" + : DEVICE(bus->parent_dev)->canonical_path); if (bus->set_irq) break; pci_dev = bus->parent_dev; @@ -1617,8 +1621,12 @@ PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin) PCIBus *bus; do { + int dev_irq = pin; bus = pci_get_bus(dev); pin = bus->map_irq(dev, pin); + trace_pci_route_irq(dev_irq, DEVICE(dev)->canonical_path, pin, + pci_bus_is_root(bus) ? "root-complex" + : DEVICE(bus->parent_dev)->canonical_path); dev = bus->parent_dev; } while (dev); diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 924fdabd15..b8c24cf45f 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -39,6 +39,11 @@ #define PCIE_DEV_PRINTF(dev, fmt, ...) \ PCIE_DPRINTF("%s:%x "fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__) +static bool pcie_sltctl_powered_off(uint16_t sltctl) +{ + return (sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_OFF + && (sltctl & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_OFF; +} /*************************************************************************** * pci express capability helper functions @@ -373,8 +378,8 @@ void pcie_cap_slot_enable_power(PCIDevice *dev) uint32_t sltcap = pci_get_long(exp_cap + PCI_EXP_SLTCAP); if (sltcap & PCI_EXP_SLTCAP_PCP) { - pci_set_word_by_mask(exp_cap + PCI_EXP_SLTCTL, - PCI_EXP_SLTCTL_PCC, PCI_EXP_SLTCTL_PWR_ON); + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTCTL, + PCI_EXP_SLTCTL_PCC); } } @@ -395,6 +400,7 @@ static void pcie_cap_update_power(PCIDevice *hotplug_dev) if (sltcap & PCI_EXP_SLTCAP_PCP) { power = (sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_ON; + /* Don't we need to check also (sltctl & PCI_EXP_SLTCTL_PIC) ? */ } pci_for_each_device(sec_bus, pci_bus_num(sec_bus), @@ -579,8 +585,7 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, return; } - if (((sltctl & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_OFF) && - ((sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_OFF)) { + if (pcie_sltctl_powered_off(sltctl)) { /* slot is powered off -> unplug without round-trip to the guest */ pcie_cap_slot_do_unplug(hotplug_pdev); hotplug_event_notify(hotplug_pdev); @@ -634,8 +639,8 @@ void pcie_cap_slot_init(PCIDevice *dev, PCIESlot *s) PCI_EXP_SLTCTL_PIC | PCI_EXP_SLTCTL_AIC); pci_word_test_and_set_mask(dev->config + pos + PCI_EXP_SLTCTL, - PCI_EXP_SLTCTL_PIC_OFF | - PCI_EXP_SLTCTL_AIC_OFF); + PCI_EXP_SLTCTL_PWR_IND_OFF | + PCI_EXP_SLTCTL_ATTN_IND_OFF); pci_word_test_and_set_mask(dev->wmask + pos + PCI_EXP_SLTCTL, PCI_EXP_SLTCTL_PIC | PCI_EXP_SLTCTL_AIC | @@ -679,7 +684,8 @@ void pcie_cap_slot_reset(PCIDevice *dev) PCI_EXP_SLTCTL_PDCE | PCI_EXP_SLTCTL_ABPE); pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTCTL, - PCI_EXP_SLTCTL_AIC_OFF); + PCI_EXP_SLTCTL_PWR_IND_OFF | + PCI_EXP_SLTCTL_ATTN_IND_OFF); if (dev->cap_present & QEMU_PCIE_SLTCAP_PCP) { /* Downstream ports enforce device number 0. */ @@ -694,7 +700,8 @@ void pcie_cap_slot_reset(PCIDevice *dev) PCI_EXP_SLTCTL_PCC); } - pic = populated ? PCI_EXP_SLTCTL_PIC_ON : PCI_EXP_SLTCTL_PIC_OFF; + pic = populated ? + PCI_EXP_SLTCTL_PWR_IND_ON : PCI_EXP_SLTCTL_PWR_IND_OFF; pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTCTL, pic); } @@ -769,10 +776,9 @@ void pcie_cap_slot_write_config(PCIDevice *dev, * this is a work around for guests that overwrite * control of powered off slots before powering them on. */ - if ((sltsta & PCI_EXP_SLTSTA_PDS) && (val & PCI_EXP_SLTCTL_PCC) && - (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF && - (!(old_slt_ctl & PCI_EXP_SLTCTL_PCC) || - (old_slt_ctl & PCI_EXP_SLTCTL_PIC_OFF) != PCI_EXP_SLTCTL_PIC_OFF)) { + if ((sltsta & PCI_EXP_SLTSTA_PDS) && pcie_sltctl_powered_off(val) && + !pcie_sltctl_powered_off(old_slt_ctl)) + { pcie_cap_slot_do_unplug(dev); } pcie_cap_update_power(dev); diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c index fca7f6691a..e7bc7192f1 100644 --- a/hw/pci/shpc.c +++ b/hw/pci/shpc.c @@ -123,10 +123,13 @@ #define SHPC_PCI_TO_IDX(pci_slot) ((pci_slot) - 1) #define SHPC_IDX_TO_PHYSICAL(slot) ((slot) + 1) -static uint16_t shpc_get_status(SHPCDevice *shpc, int slot, uint16_t msk) +static uint8_t shpc_get_status(SHPCDevice *shpc, int slot, uint16_t msk) { uint8_t *status = shpc->config + SHPC_SLOT_STATUS(slot); - return (pci_get_word(status) & msk) >> ctz32(msk); + uint16_t result = (pci_get_word(status) & msk) >> ctz32(msk); + + assert(result <= UINT8_MAX); + return result; } static void shpc_set_status(SHPCDevice *shpc, @@ -223,6 +226,7 @@ void shpc_reset(PCIDevice *d) SHPC_SLOT_STATUS_PRSNT_MASK); shpc_set_status(shpc, i, SHPC_LED_OFF, SHPC_SLOT_PWR_LED_MASK); } + shpc_set_status(shpc, i, SHPC_LED_OFF, SHPC_SLOT_ATTN_LED_MASK); shpc_set_status(shpc, i, 0, SHPC_SLOT_STATUS_66); } shpc_set_sec_bus_speed(shpc, SHPC_SEC_BUS_33); @@ -254,60 +258,66 @@ static void shpc_free_devices_in_slot(SHPCDevice *shpc, int slot) } } -static void shpc_slot_command(SHPCDevice *shpc, uint8_t target, +static bool shpc_slot_is_off(uint8_t state, uint8_t power, uint8_t attn) +{ + return state == SHPC_STATE_DISABLED && power == SHPC_LED_OFF; +} + +static void shpc_slot_command(PCIDevice *d, uint8_t target, uint8_t state, uint8_t power, uint8_t attn) { - uint8_t current_state; + SHPCDevice *shpc = d->shpc; int slot = SHPC_LOGICAL_TO_IDX(target); + uint8_t old_state = shpc_get_status(shpc, slot, SHPC_SLOT_STATE_MASK); + uint8_t old_power = shpc_get_status(shpc, slot, SHPC_SLOT_PWR_LED_MASK); + uint8_t old_attn = shpc_get_status(shpc, slot, SHPC_SLOT_ATTN_LED_MASK); + if (target < SHPC_CMD_TRGT_MIN || slot >= shpc->nslots) { shpc_invalid_command(shpc); return; } - current_state = shpc_get_status(shpc, slot, SHPC_SLOT_STATE_MASK); - if (current_state == SHPC_STATE_ENABLED && state == SHPC_STATE_PWRONLY) { + + if (old_state == SHPC_STATE_ENABLED && state == SHPC_STATE_PWRONLY) { shpc_invalid_command(shpc); return; } - switch (power) { - case SHPC_LED_NO: - break; - default: + if (power == SHPC_LED_NO) { + power = old_power; + } else { /* TODO: send event to monitor */ shpc_set_status(shpc, slot, power, SHPC_SLOT_PWR_LED_MASK); } - switch (attn) { - case SHPC_LED_NO: - break; - default: + + if (attn == SHPC_LED_NO) { + attn = old_attn; + } else { /* TODO: send event to monitor */ shpc_set_status(shpc, slot, attn, SHPC_SLOT_ATTN_LED_MASK); } - if ((current_state == SHPC_STATE_DISABLED && state == SHPC_STATE_PWRONLY) || - (current_state == SHPC_STATE_DISABLED && state == SHPC_STATE_ENABLED)) { - shpc_set_status(shpc, slot, state, SHPC_SLOT_STATE_MASK); - } else if ((current_state == SHPC_STATE_ENABLED || - current_state == SHPC_STATE_PWRONLY) && - state == SHPC_STATE_DISABLED) { + if (state == SHPC_STATE_NO) { + state = old_state; + } else { shpc_set_status(shpc, slot, state, SHPC_SLOT_STATE_MASK); - power = shpc_get_status(shpc, slot, SHPC_SLOT_PWR_LED_MASK); - /* TODO: track what monitor requested. */ - /* Look at LED to figure out whether it's ok to remove the device. */ - if (power == SHPC_LED_OFF) { - shpc_free_devices_in_slot(shpc, slot); - shpc_set_status(shpc, slot, 1, SHPC_SLOT_STATUS_MRL_OPEN); - shpc_set_status(shpc, slot, SHPC_SLOT_STATUS_PRSNT_EMPTY, - SHPC_SLOT_STATUS_PRSNT_MASK); - shpc->config[SHPC_SLOT_EVENT_LATCH(slot)] |= - SHPC_SLOT_EVENT_MRL | - SHPC_SLOT_EVENT_PRESENCE; - } + } + + if (!shpc_slot_is_off(old_state, old_power, old_attn) && + shpc_slot_is_off(state, power, attn)) + { + shpc_free_devices_in_slot(shpc, slot); + shpc_set_status(shpc, slot, 1, SHPC_SLOT_STATUS_MRL_OPEN); + shpc_set_status(shpc, slot, SHPC_SLOT_STATUS_PRSNT_EMPTY, + SHPC_SLOT_STATUS_PRSNT_MASK); + shpc->config[SHPC_SLOT_EVENT_LATCH(slot)] |= + SHPC_SLOT_EVENT_MRL | + SHPC_SLOT_EVENT_PRESENCE; } } -static void shpc_command(SHPCDevice *shpc) +static void shpc_command(PCIDevice *d) { + SHPCDevice *shpc = d->shpc; uint8_t code = pci_get_byte(shpc->config + SHPC_CMD_CODE); uint8_t speed; uint8_t target; @@ -328,7 +338,7 @@ static void shpc_command(SHPCDevice *shpc) state = (code & SHPC_SLOT_STATE_MASK) >> SHPC_SLOT_STATE_SHIFT; power = (code & SHPC_SLOT_PWR_LED_MASK) >> SHPC_SLOT_PWR_LED_SHIFT; attn = (code & SHPC_SLOT_ATTN_LED_MASK) >> SHPC_SLOT_ATTN_LED_SHIFT; - shpc_slot_command(shpc, target, state, power, attn); + shpc_slot_command(d, target, state, power, attn); break; case 0x40 ... 0x47: speed = code & SHPC_SEC_BUS_MASK; @@ -346,10 +356,10 @@ static void shpc_command(SHPCDevice *shpc) } for (i = 0; i < shpc->nslots; ++i) { if (!(shpc_get_status(shpc, i, SHPC_SLOT_STATUS_MRL_OPEN))) { - shpc_slot_command(shpc, i + SHPC_CMD_TRGT_MIN, + shpc_slot_command(d, i + SHPC_CMD_TRGT_MIN, SHPC_STATE_PWRONLY, SHPC_LED_ON, SHPC_LED_NO); } else { - shpc_slot_command(shpc, i + SHPC_CMD_TRGT_MIN, + shpc_slot_command(d, i + SHPC_CMD_TRGT_MIN, SHPC_STATE_NO, SHPC_LED_OFF, SHPC_LED_NO); } } @@ -367,10 +377,10 @@ static void shpc_command(SHPCDevice *shpc) } for (i = 0; i < shpc->nslots; ++i) { if (!(shpc_get_status(shpc, i, SHPC_SLOT_STATUS_MRL_OPEN))) { - shpc_slot_command(shpc, i + SHPC_CMD_TRGT_MIN, + shpc_slot_command(d, i + SHPC_CMD_TRGT_MIN, SHPC_STATE_ENABLED, SHPC_LED_ON, SHPC_LED_NO); } else { - shpc_slot_command(shpc, i + SHPC_CMD_TRGT_MIN, + shpc_slot_command(d, i + SHPC_CMD_TRGT_MIN, SHPC_STATE_NO, SHPC_LED_OFF, SHPC_LED_NO); } } @@ -402,7 +412,7 @@ static void shpc_write(PCIDevice *d, unsigned addr, uint64_t val, int l) shpc->config[a] &= ~(val & w1cmask); /* W1C: Write 1 to Clear */ } if (ranges_overlap(addr, l, SHPC_CMD_CODE, 2)) { - shpc_command(shpc); + shpc_command(d); } shpc_interrupt_update(d); } @@ -486,8 +496,9 @@ static const MemoryRegionOps shpc_mmio_ops = { .max_access_size = 4, }, }; -static void shpc_device_plug_common(PCIDevice *affected_dev, int *slot, - SHPCDevice *shpc, Error **errp) + +static bool shpc_device_get_slot(PCIDevice *affected_dev, int *slot, + SHPCDevice *shpc, Error **errp) { int pci_slot = PCI_SLOT(affected_dev->devfn); *slot = SHPC_PCI_TO_IDX(pci_slot); @@ -497,21 +508,20 @@ static void shpc_device_plug_common(PCIDevice *affected_dev, int *slot, "controller. Valid slots are between %d and %d.", pci_slot, SHPC_IDX_TO_PCI(0), SHPC_IDX_TO_PCI(shpc->nslots) - 1); - return; + return false; } + + return true; } void shpc_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { - Error *local_err = NULL; PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev); SHPCDevice *shpc = pci_hotplug_dev->shpc; int slot; - shpc_device_plug_common(PCI_DEVICE(dev), &slot, shpc, &local_err); - if (local_err) { - error_propagate(errp, local_err); + if (!shpc_device_get_slot(PCI_DEVICE(dev), &slot, shpc, errp)) { return; } @@ -553,16 +563,13 @@ void shpc_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, void shpc_device_unplug_request_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { - Error *local_err = NULL; PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev); SHPCDevice *shpc = pci_hotplug_dev->shpc; uint8_t state; uint8_t led; int slot; - shpc_device_plug_common(PCI_DEVICE(dev), &slot, shpc, &local_err); - if (local_err) { - error_propagate(errp, local_err); + if (!shpc_device_get_slot(PCI_DEVICE(dev), &slot, shpc, errp)) { return; } diff --git a/hw/pci/trace-events b/hw/pci/trace-events index aaf46bc92d..42430869ce 100644 --- a/hw/pci/trace-events +++ b/hw/pci/trace-events @@ -3,6 +3,7 @@ # pci.c pci_update_mappings_del(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64 pci_update_mappings_add(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64 +pci_route_irq(int dev_irq, const char *dev_path, int parent_irq, const char *parent_path) "IRQ %d @%s -> IRQ %d @%s" # pci_host.c pci_cfg_read(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, unsigned offs, unsigned val) "%s %02x:%02x.%x @0x%x -> 0x%x" diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index 44b1fbbc93..11cb48af2f 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -284,73 +284,19 @@ static void pnv_dt_icp(PnvChip *chip, void *fdt, uint32_t pir, g_free(reg); } -static PnvPhb4PecState *pnv_phb4_get_pec(PnvChip *chip, PnvPHB4 *phb, - Error **errp) -{ - PnvPHB *phb_base = phb->phb_base; - PnvPhb4PecState *pecs = NULL; - int chip_id = phb->chip_id; - int index = phb->phb_id; - int i, j; - - if (phb_base->version == 4) { - Pnv9Chip *chip9 = PNV9_CHIP(chip); - - pecs = chip9->pecs; - } else if (phb_base->version == 5) { - Pnv10Chip *chip10 = PNV10_CHIP(chip); - - pecs = chip10->pecs; - } else { - g_assert_not_reached(); - } - - for (i = 0; i < chip->num_pecs; i++) { - /* - * For each PEC, check the amount of phbs it supports - * and see if the given phb4 index matches an index. - */ - PnvPhb4PecState *pec = &pecs[i]; - - for (j = 0; j < pec->num_phbs; j++) { - if (index == pnv_phb4_pec_get_phb_id(pec, j)) { - return pec; - } - } - } - error_setg(errp, - "pnv-phb4 chip-id %d index %d didn't match any existing PEC", - chip_id, index); - - return NULL; -} - /* - * Adds a PnvPHB to the chip. Returns the parent obj of the - * PHB which varies with each version (phb version 3 is parented - * by the chip, version 4 and 5 are parented by the PEC - * device). - * - * TODO: for version 3 we're still parenting the PHB with the - * chip. We should parent with a (so far not implemented) - * PHB3 PEC device. + * Adds a PnvPHB to the chip on P8. + * Implemented here, like for defaults PHBs */ -Object *pnv_chip_add_phb(PnvChip *chip, PnvPHB *phb, Error **errp) +PnvChip *pnv_chip_add_phb(PnvChip *chip, PnvPHB *phb) { - if (phb->version == 3) { - Pnv8Chip *chip8 = PNV8_CHIP(chip); - - phb->chip = chip; - - chip8->phbs[chip8->num_phbs] = phb; - chip8->num_phbs++; - - return OBJECT(chip); - } + Pnv8Chip *chip8 = PNV8_CHIP(chip); - phb->pec = pnv_phb4_get_pec(chip, PNV_PHB4(phb->backend), errp); + phb->chip = chip; - return OBJECT(phb->pec); + chip8->phbs[chip8->num_phbs] = phb; + chip8->num_phbs++; + return chip; } static void pnv_chip_power8_dt_populate(PnvChip *chip, void *fdt) diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig index 4550b3b938..6528ebfa3a 100644 --- a/hw/riscv/Kconfig +++ b/hw/riscv/Kconfig @@ -44,6 +44,7 @@ config RISCV_VIRT select VIRTIO_MMIO select FW_CFG_DMA select PLATFORM_BUS + select ACPI config SHAKTI_C bool diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build index ab6cae57ea..2f7ee81be3 100644 --- a/hw/riscv/meson.build +++ b/hw/riscv/meson.build @@ -9,5 +9,6 @@ riscv_ss.add(when: 'CONFIG_SIFIVE_E', if_true: files('sifive_e.c')) riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: files('sifive_u.c')) riscv_ss.add(when: 'CONFIG_SPIKE', if_true: files('spike.c')) riscv_ss.add(when: 'CONFIG_MICROCHIP_PFSOC', if_true: files('microchip_pfsoc.c')) +riscv_ss.add(when: 'CONFIG_ACPI', if_true: files('virt-acpi-build.c')) hw_arch += {'riscv': riscv_ss} diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c index ad3bb35b34..35a335b8d0 100644 --- a/hw/riscv/sifive_u.c +++ b/hw/riscv/sifive_u.c @@ -99,7 +99,7 @@ static void create_fdt(SiFiveUState *s, const MemMapEntry *memmap, MachineState *ms = MACHINE(s); uint64_t mem_size = ms->ram_size; void *fdt; - int cpu, fdt_size; + int cpu; uint32_t *cells; char *nodename; uint32_t plic_phandle, prci_phandle, gpio_phandle, phandle = 1; @@ -112,18 +112,10 @@ static void create_fdt(SiFiveUState *s, const MemMapEntry *memmap, "sifive,plic-1.0.0", "riscv,plic0" }; - if (ms->dtb) { - fdt = ms->fdt = load_device_tree(ms->dtb, &fdt_size); - if (!fdt) { - error_report("load_device_tree() failed"); - exit(1); - } - } else { - fdt = ms->fdt = create_device_tree(&fdt_size); - if (!fdt) { - error_report("create_device_tree() failed"); - exit(1); - } + fdt = ms->fdt = create_device_tree(&s->fdt_size); + if (!fdt) { + error_report("create_device_tree() failed"); + exit(1); } qemu_fdt_setprop_string(fdt, "/", "model", "SiFive HiFive Unleashed A00"); @@ -560,8 +552,16 @@ static void sifive_u_machine_init(MachineState *machine) qdev_connect_gpio_out(DEVICE(&(s->soc.gpio)), 10, qemu_allocate_irq(sifive_u_machine_reset, NULL, 0)); - /* create device tree */ - create_fdt(s, memmap, riscv_is_32bit(&s->soc.u_cpus)); + /* load/create device tree */ + if (machine->dtb) { + machine->fdt = load_device_tree(machine->dtb, &s->fdt_size); + if (!machine->fdt) { + error_report("load_device_tree() failed"); + exit(1); + } + } else { + create_fdt(s, memmap, riscv_is_32bit(&s->soc.u_cpus)); + } if (s->start_in_flash) { /* diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c new file mode 100644 index 0000000000..82da0a238c --- /dev/null +++ b/hw/riscv/virt-acpi-build.c @@ -0,0 +1,416 @@ +/* + * Support for generating ACPI tables and passing them to Guests + * + * RISC-V virt ACPI generation + * + * Copyright (C) 2008-2010 Kevin O'Connor <kevin@koconnor.net> + * Copyright (C) 2006 Fabrice Bellard + * Copyright (C) 2013 Red Hat Inc + * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO.,LTD. + * Copyright (C) 2021-2023 Ventana Micro Systems Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "hw/acpi/acpi-defs.h" +#include "hw/acpi/acpi.h" +#include "hw/acpi/aml-build.h" +#include "hw/acpi/utils.h" +#include "qapi/error.h" +#include "sysemu/reset.h" +#include "migration/vmstate.h" +#include "hw/riscv/virt.h" +#include "hw/riscv/numa.h" +#include "hw/intc/riscv_aclint.h" + +#define ACPI_BUILD_TABLE_SIZE 0x20000 + +typedef struct AcpiBuildState { + /* Copy of table in RAM (for patching) */ + MemoryRegion *table_mr; + MemoryRegion *rsdp_mr; + MemoryRegion *linker_mr; + /* Is table patched? */ + bool patched; +} AcpiBuildState; + +static void acpi_align_size(GArray *blob, unsigned align) +{ + /* + * Align size to multiple of given size. This reduces the chance + * we need to change size in the future (breaking cross version migration). + */ + g_array_set_size(blob, ROUND_UP(acpi_data_len(blob), align)); +} + +static void riscv_acpi_madt_add_rintc(uint32_t uid, + const CPUArchIdList *arch_ids, + GArray *entry) +{ + uint64_t hart_id = arch_ids->cpus[uid].arch_id; + + build_append_int_noprefix(entry, 0x18, 1); /* Type */ + build_append_int_noprefix(entry, 20, 1); /* Length */ + build_append_int_noprefix(entry, 1, 1); /* Version */ + build_append_int_noprefix(entry, 0, 1); /* Reserved */ + build_append_int_noprefix(entry, 0x1, 4); /* Flags */ + build_append_int_noprefix(entry, hart_id, 8); /* Hart ID */ + build_append_int_noprefix(entry, uid, 4); /* ACPI Processor UID */ +} + +static void acpi_dsdt_add_cpus(Aml *scope, RISCVVirtState *s) +{ + MachineClass *mc = MACHINE_GET_CLASS(s); + MachineState *ms = MACHINE(s); + const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(ms); + + for (int i = 0; i < arch_ids->len; i++) { + Aml *dev; + GArray *madt_buf = g_array_new(0, 1, 1); + + dev = aml_device("C%.03X", i); + aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0007"))); + aml_append(dev, aml_name_decl("_UID", + aml_int(arch_ids->cpus[i].arch_id))); + + /* build _MAT object */ + riscv_acpi_madt_add_rintc(i, arch_ids, madt_buf); + aml_append(dev, aml_name_decl("_MAT", + aml_buffer(madt_buf->len, + (uint8_t *)madt_buf->data))); + g_array_free(madt_buf, true); + + aml_append(scope, dev); + } +} + +static void acpi_dsdt_add_fw_cfg(Aml *scope, const MemMapEntry *fw_cfg_memmap) +{ + Aml *dev = aml_device("FWCF"); + aml_append(dev, aml_name_decl("_HID", aml_string("QEMU0002"))); + + /* device present, functioning, decoding, not shown in UI */ + aml_append(dev, aml_name_decl("_STA", aml_int(0xB))); + aml_append(dev, aml_name_decl("_CCA", aml_int(1))); + + Aml *crs = aml_resource_template(); + aml_append(crs, aml_memory32_fixed(fw_cfg_memmap->base, + fw_cfg_memmap->size, AML_READ_WRITE)); + aml_append(dev, aml_name_decl("_CRS", crs)); + aml_append(scope, dev); +} + +/* RHCT Node[N] starts at offset 56 */ +#define RHCT_NODE_ARRAY_OFFSET 56 + +/* + * ACPI spec, Revision 6.5+ + * 5.2.36 RISC-V Hart Capabilities Table (RHCT) + * REF: https://github.com/riscv-non-isa/riscv-acpi/issues/16 + * https://drive.google.com/file/d/1nP3nFiH4jkPMp6COOxP6123DCZKR-tia/view + */ +static void build_rhct(GArray *table_data, + BIOSLinker *linker, + RISCVVirtState *s) +{ + MachineClass *mc = MACHINE_GET_CLASS(s); + MachineState *ms = MACHINE(s); + const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(ms); + size_t len, aligned_len; + uint32_t isa_offset, num_rhct_nodes; + RISCVCPU *cpu; + char *isa; + + AcpiTable table = { .sig = "RHCT", .rev = 1, .oem_id = s->oem_id, + .oem_table_id = s->oem_table_id }; + + acpi_table_begin(&table, table_data); + + build_append_int_noprefix(table_data, 0x0, 4); /* Reserved */ + + /* Time Base Frequency */ + build_append_int_noprefix(table_data, + RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, 8); + + /* ISA + N hart info */ + num_rhct_nodes = 1 + ms->smp.cpus; + + /* Number of RHCT nodes*/ + build_append_int_noprefix(table_data, num_rhct_nodes, 4); + + /* Offset to the RHCT node array */ + build_append_int_noprefix(table_data, RHCT_NODE_ARRAY_OFFSET, 4); + + /* ISA String Node */ + isa_offset = table_data->len - table.table_offset; + build_append_int_noprefix(table_data, 0, 2); /* Type 0 */ + + cpu = &s->soc[0].harts[0]; + isa = riscv_isa_string(cpu); + len = 8 + strlen(isa) + 1; + aligned_len = (len % 2) ? (len + 1) : len; + + build_append_int_noprefix(table_data, aligned_len, 2); /* Length */ + build_append_int_noprefix(table_data, 0x1, 2); /* Revision */ + + /* ISA string length including NUL */ + build_append_int_noprefix(table_data, strlen(isa) + 1, 2); + g_array_append_vals(table_data, isa, strlen(isa) + 1); /* ISA string */ + + if (aligned_len != len) { + build_append_int_noprefix(table_data, 0x0, 1); /* Optional Padding */ + } + + /* Hart Info Node */ + for (int i = 0; i < arch_ids->len; i++) { + build_append_int_noprefix(table_data, 0xFFFF, 2); /* Type */ + build_append_int_noprefix(table_data, 16, 2); /* Length */ + build_append_int_noprefix(table_data, 0x1, 2); /* Revision */ + build_append_int_noprefix(table_data, 1, 2); /* Number of offsets */ + build_append_int_noprefix(table_data, i, 4); /* ACPI Processor UID */ + build_append_int_noprefix(table_data, isa_offset, 4); /* Offsets[0] */ + } + + acpi_table_end(linker, &table); +} + +/* FADT */ +static void build_fadt_rev6(GArray *table_data, + BIOSLinker *linker, + RISCVVirtState *s, + unsigned dsdt_tbl_offset) +{ + AcpiFadtData fadt = { + .rev = 6, + .minor_ver = 5, + .flags = 1 << ACPI_FADT_F_HW_REDUCED_ACPI, + .xdsdt_tbl_offset = &dsdt_tbl_offset, + }; + + build_fadt(table_data, linker, &fadt, s->oem_id, s->oem_table_id); +} + +/* DSDT */ +static void build_dsdt(GArray *table_data, + BIOSLinker *linker, + RISCVVirtState *s) +{ + Aml *scope, *dsdt; + const MemMapEntry *memmap = s->memmap; + AcpiTable table = { .sig = "DSDT", .rev = 2, .oem_id = s->oem_id, + .oem_table_id = s->oem_table_id }; + + + acpi_table_begin(&table, table_data); + dsdt = init_aml_allocator(); + + /* + * When booting the VM with UEFI, UEFI takes ownership of the RTC hardware. + * While UEFI can use libfdt to disable the RTC device node in the DTB that + * it passes to the OS, it cannot modify AML. Therefore, we won't generate + * the RTC ACPI device at all when using UEFI. + */ + scope = aml_scope("\\_SB"); + acpi_dsdt_add_cpus(scope, s); + + acpi_dsdt_add_fw_cfg(scope, &memmap[VIRT_FW_CFG]); + + aml_append(dsdt, scope); + + /* copy AML table into ACPI tables blob and patch header there */ + g_array_append_vals(table_data, dsdt->buf->data, dsdt->buf->len); + + acpi_table_end(linker, &table); + free_aml_allocator(); +} + +/* + * ACPI spec, Revision 6.5+ + * 5.2.12 Multiple APIC Description Table (MADT) + * REF: https://github.com/riscv-non-isa/riscv-acpi/issues/15 + * https://drive.google.com/file/d/1R6k4MshhN3WTT-hwqAquu5nX6xSEqK2l/view + */ +static void build_madt(GArray *table_data, + BIOSLinker *linker, + RISCVVirtState *s) +{ + MachineClass *mc = MACHINE_GET_CLASS(s); + MachineState *ms = MACHINE(s); + const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(ms); + + AcpiTable table = { .sig = "APIC", .rev = 6, .oem_id = s->oem_id, + .oem_table_id = s->oem_table_id }; + + acpi_table_begin(&table, table_data); + /* Local Interrupt Controller Address */ + build_append_int_noprefix(table_data, 0, 4); + build_append_int_noprefix(table_data, 0, 4); /* MADT Flags */ + + /* RISC-V Local INTC structures per HART */ + for (int i = 0; i < arch_ids->len; i++) { + riscv_acpi_madt_add_rintc(i, arch_ids, table_data); + } + + acpi_table_end(linker, &table); +} + +static void virt_acpi_build(RISCVVirtState *s, AcpiBuildTables *tables) +{ + GArray *table_offsets; + unsigned dsdt, xsdt; + GArray *tables_blob = tables->table_data; + + table_offsets = g_array_new(false, true, + sizeof(uint32_t)); + + bios_linker_loader_alloc(tables->linker, + ACPI_BUILD_TABLE_FILE, tables_blob, + 64, false); + + /* DSDT is pointed to by FADT */ + dsdt = tables_blob->len; + build_dsdt(tables_blob, tables->linker, s); + + /* FADT and others pointed to by XSDT */ + acpi_add_table(table_offsets, tables_blob); + build_fadt_rev6(tables_blob, tables->linker, s, dsdt); + + acpi_add_table(table_offsets, tables_blob); + build_madt(tables_blob, tables->linker, s); + + acpi_add_table(table_offsets, tables_blob); + build_rhct(tables_blob, tables->linker, s); + + /* XSDT is pointed to by RSDP */ + xsdt = tables_blob->len; + build_xsdt(tables_blob, tables->linker, table_offsets, s->oem_id, + s->oem_table_id); + + /* RSDP is in FSEG memory, so allocate it separately */ + { + AcpiRsdpData rsdp_data = { + .revision = 2, + .oem_id = s->oem_id, + .xsdt_tbl_offset = &xsdt, + .rsdt_tbl_offset = NULL, + }; + build_rsdp(tables->rsdp, tables->linker, &rsdp_data); + } + + /* + * The align size is 128, warn if 64k is not enough therefore + * the align size could be resized. + */ + if (tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) { + warn_report("ACPI table size %u exceeds %d bytes," + " migration may not work", + tables_blob->len, ACPI_BUILD_TABLE_SIZE / 2); + error_printf("Try removing some objects."); + } + + acpi_align_size(tables_blob, ACPI_BUILD_TABLE_SIZE); + + /* Clean up memory that's no longer used */ + g_array_free(table_offsets, true); +} + +static void acpi_ram_update(MemoryRegion *mr, GArray *data) +{ + uint32_t size = acpi_data_len(data); + + /* + * Make sure RAM size is correct - in case it got changed + * e.g. by migration + */ + memory_region_ram_resize(mr, size, &error_abort); + + memcpy(memory_region_get_ram_ptr(mr), data->data, size); + memory_region_set_dirty(mr, 0, size); +} + +static void virt_acpi_build_update(void *build_opaque) +{ + AcpiBuildState *build_state = build_opaque; + AcpiBuildTables tables; + + /* No state to update or already patched? Nothing to do. */ + if (!build_state || build_state->patched) { + return; + } + + build_state->patched = true; + + acpi_build_tables_init(&tables); + + virt_acpi_build(RISCV_VIRT_MACHINE(qdev_get_machine()), &tables); + + acpi_ram_update(build_state->table_mr, tables.table_data); + acpi_ram_update(build_state->rsdp_mr, tables.rsdp); + acpi_ram_update(build_state->linker_mr, tables.linker->cmd_blob); + + acpi_build_tables_cleanup(&tables, true); +} + +static void virt_acpi_build_reset(void *build_opaque) +{ + AcpiBuildState *build_state = build_opaque; + build_state->patched = false; +} + +static const VMStateDescription vmstate_virt_acpi_build = { + .name = "virt_acpi_build", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_BOOL(patched, AcpiBuildState), + VMSTATE_END_OF_LIST() + }, +}; + +void virt_acpi_setup(RISCVVirtState *s) +{ + AcpiBuildTables tables; + AcpiBuildState *build_state; + + build_state = g_malloc0(sizeof *build_state); + + acpi_build_tables_init(&tables); + virt_acpi_build(s, &tables); + + /* Now expose it all to Guest */ + build_state->table_mr = acpi_add_rom_blob(virt_acpi_build_update, + build_state, tables.table_data, + ACPI_BUILD_TABLE_FILE); + assert(build_state->table_mr != NULL); + + build_state->linker_mr = acpi_add_rom_blob(virt_acpi_build_update, + build_state, + tables.linker->cmd_blob, + ACPI_BUILD_LOADER_FILE); + + build_state->rsdp_mr = acpi_add_rom_blob(virt_acpi_build_update, + build_state, tables.rsdp, + ACPI_BUILD_RSDP_FILE); + + qemu_register_reset(virt_acpi_build_reset, build_state); + virt_acpi_build_reset(build_state); + vmstate_register(NULL, 0, &vmstate_virt_acpi_build, build_state); + + /* + * Clean up tables but don't free the memory: we track it + * in build_state. + */ + acpi_build_tables_cleanup(&tables, false); +} diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index 86c4adc0c9..4e3efbee16 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -49,6 +49,8 @@ #include "hw/pci/pci.h" #include "hw/pci-host/gpex.h" #include "hw/display/ramfb.h" +#include "hw/acpi/aml-build.h" +#include "qapi/qapi-visit-common.h" /* * The virt machine physical address space used by some of the devices @@ -228,26 +230,41 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int socket, int cpu; uint32_t cpu_phandle; MachineState *ms = MACHINE(s); - char *name, *cpu_name, *core_name, *intc_name; + char *name, *cpu_name, *core_name, *intc_name, *sv_name; bool is_32_bit = riscv_is_32bit(&s->soc[0]); + uint8_t satp_mode_max; for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) { + RISCVCPU *cpu_ptr = &s->soc[socket].harts[cpu]; + cpu_phandle = (*phandle)++; cpu_name = g_strdup_printf("/cpus/cpu@%d", s->soc[socket].hartid_base + cpu); qemu_fdt_add_subnode(ms->fdt, cpu_name); - if (riscv_feature(&s->soc[socket].harts[cpu].env, - RISCV_FEATURE_MMU)) { - qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type", - (is_32_bit) ? "riscv,sv32" : "riscv,sv48"); - } else { - qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type", - "riscv,none"); - } - name = riscv_isa_string(&s->soc[socket].harts[cpu]); + + satp_mode_max = satp_mode_max_from_map( + s->soc[socket].harts[cpu].cfg.satp_mode.map); + sv_name = g_strdup_printf("riscv,%s", + satp_mode_str(satp_mode_max, is_32_bit)); + qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type", sv_name); + g_free(sv_name); + + + name = riscv_isa_string(cpu_ptr); qemu_fdt_setprop_string(ms->fdt, cpu_name, "riscv,isa", name); g_free(name); + + if (cpu_ptr->cfg.ext_icbom) { + qemu_fdt_setprop_cell(ms->fdt, cpu_name, "riscv,cbom-block-size", + cpu_ptr->cfg.cbom_blocksize); + } + + if (cpu_ptr->cfg.ext_icboz) { + qemu_fdt_setprop_cell(ms->fdt, cpu_name, "riscv,cboz-block-size", + cpu_ptr->cfg.cboz_blocksize); + } + qemu_fdt_setprop_string(ms->fdt, cpu_name, "compatible", "riscv"); qemu_fdt_setprop_string(ms->fdt, cpu_name, "status", "okay"); qemu_fdt_setprop_cell(ms->fdt, cpu_name, "reg", @@ -1008,18 +1025,10 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap) uint32_t irq_pcie_phandle = 1, irq_virtio_phandle = 1; uint8_t rng_seed[32]; - if (ms->dtb) { - ms->fdt = load_device_tree(ms->dtb, &s->fdt_size); - if (!ms->fdt) { - error_report("load_device_tree() failed"); - exit(1); - } - } else { - ms->fdt = create_device_tree(&s->fdt_size); - if (!ms->fdt) { - error_report("create_device_tree() failed"); - exit(1); - } + ms->fdt = create_device_tree(&s->fdt_size); + if (!ms->fdt) { + error_report("create_device_tree() failed"); + exit(1); } qemu_fdt_setprop_string(ms->fdt, "/", "model", "riscv-virtio,qemu"); @@ -1314,6 +1323,10 @@ static void virt_machine_done(Notifier *notifier, void *data) if (kvm_enabled()) { riscv_setup_direct_kernel(kernel_entry, fdt_load_addr); } + + if (virt_is_acpi_enabled(s)) { + virt_acpi_setup(s); + } } static void virt_machine_init(MachineState *machine) @@ -1449,6 +1462,8 @@ static void virt_machine_init(MachineState *machine) ROUND_UP(virt_high_pcie_memmap.base, virt_high_pcie_memmap.size); } + s->memmap = virt_memmap; + /* register system main memory (actual RAM) */ memory_region_add_subregion(system_memory, memmap[VIRT_DRAM].base, machine->ram); @@ -1504,8 +1519,16 @@ static void virt_machine_init(MachineState *machine) } virt_flash_map(s, system_memory); - /* create device tree */ - create_fdt(s, memmap); + /* load/create device tree */ + if (machine->dtb) { + machine->fdt = load_device_tree(machine->dtb, &s->fdt_size); + if (!machine->fdt) { + error_report("load_device_tree() failed"); + exit(1); + } + } else { + create_fdt(s, memmap); + } s->machine_done.notify = virt_machine_done; qemu_add_machine_init_done_notifier(&s->machine_done); @@ -1513,6 +1536,11 @@ static void virt_machine_init(MachineState *machine) static void virt_machine_instance_init(Object *obj) { + RISCVVirtState *s = RISCV_VIRT_MACHINE(obj); + + s->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); + s->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); + s->acpi = ON_OFF_AUTO_AUTO; } static char *virt_get_aia_guests(Object *obj, Error **errp) @@ -1587,6 +1615,28 @@ static void virt_set_aclint(Object *obj, bool value, Error **errp) s->have_aclint = value; } +bool virt_is_acpi_enabled(RISCVVirtState *s) +{ + return s->acpi != ON_OFF_AUTO_OFF; +} + +static void virt_get_acpi(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + RISCVVirtState *s = RISCV_VIRT_MACHINE(obj); + OnOffAuto acpi = s->acpi; + + visit_type_OnOffAuto(v, name, &acpi, errp); +} + +static void virt_set_acpi(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + RISCVVirtState *s = RISCV_VIRT_MACHINE(obj); + + visit_type_OnOffAuto(v, name, &s->acpi, errp); +} + static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine, DeviceState *dev) { @@ -1658,6 +1708,11 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) sprintf(str, "Set number of guest MMIO pages for AIA IMSIC. Valid value " "should be between 0 and %d.", VIRT_IRQCHIP_MAX_GUESTS); object_class_property_set_description(oc, "aia-guests", str); + object_class_property_add(oc, "acpi", "OnOffAuto", + virt_get_acpi, virt_set_acpi, + NULL, NULL); + object_class_property_set_description(oc, "acpi", + "Enable ACPI"); } static const TypeInfo virt_machine_typeinfo = { diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c index 4869566cf5..d2007e70fb 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -750,14 +750,16 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance) t->core_count = (ms->smp.cores > 255) ? 0xFF : ms->smp.cores; t->core_enabled = t->core_count; - t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores); - t->thread_count = (ms->smp.threads > 255) ? 0xFF : ms->smp.threads; - t->thread_count2 = cpu_to_le16(ms->smp.threads); t->processor_characteristics = cpu_to_le16(0x02); /* Unknown */ t->processor_family2 = cpu_to_le16(0x01); /* Other */ + if (tbl_len == SMBIOS_TYPE_4_LEN_V30) { + t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores); + t->thread_count2 = cpu_to_le16(ms->smp.threads); + } + SMBIOS_BUILD_TABLE_POST; smbios_type4_count++; } diff --git a/hw/ssi/aspeed_smc.c b/hw/ssi/aspeed_smc.c index 22df4be528..7281169322 100644 --- a/hw/ssi/aspeed_smc.c +++ b/hw/ssi/aspeed_smc.c @@ -1134,10 +1134,7 @@ static void aspeed_smc_realize(DeviceState *dev, Error **errp) /* Setup cs_lines for peripherals */ s->cs_lines = g_new0(qemu_irq, asc->cs_num_max); - - for (i = 0; i < asc->cs_num_max; ++i) { - sysbus_init_irq(sbd, &s->cs_lines[i]); - } + qdev_init_gpio_out_named(DEVICE(s), s->cs_lines, "cs", asc->cs_num_max); /* The memory region for the controller registers */ memory_region_init_io(&s->mmio, OBJECT(s), &aspeed_smc_ops, s, diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c index 214d6a0501..6998094233 100644 --- a/hw/timer/hpet.c +++ b/hw/timer/hpet.c @@ -353,6 +353,16 @@ static const VMStateDescription vmstate_hpet = { } }; +static void hpet_arm(HPETTimer *t, uint64_t ticks) +{ + if (ticks < ns_to_ticks(INT64_MAX / 2)) { + timer_mod(t->qemu_timer, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + ticks_to_ns(ticks)); + } else { + timer_del(t->qemu_timer); + } +} + /* * timer expiration callback */ @@ -375,13 +385,11 @@ static void hpet_timer(void *opaque) } } diff = hpet_calculate_diff(t, cur_tick); - timer_mod(t->qemu_timer, - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + (int64_t)ticks_to_ns(diff)); + hpet_arm(t, diff); } else if (t->config & HPET_TN_32BIT && !timer_is_periodic(t)) { if (t->wrap_flag) { diff = hpet_calculate_diff(t, cur_tick); - timer_mod(t->qemu_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + - (int64_t)ticks_to_ns(diff)); + hpet_arm(t, diff); t->wrap_flag = 0; } } @@ -408,8 +416,7 @@ static void hpet_set_timer(HPETTimer *t) t->wrap_flag = 1; } } - timer_mod(t->qemu_timer, - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + (int64_t)ticks_to_ns(diff)); + hpet_arm(t, diff); } static void hpet_del_timer(HPETTimer *t) diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index 4307296358..515ccf870d 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -522,7 +522,7 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq, size_t vhost_svq_poll(VhostShadowVirtqueue *svq) { int64_t start_us = g_get_monotonic_time(); - uint32_t len; + uint32_t len = 0; do { if (vhost_svq_more_used(svq)) { diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c index fe3da32c74..d6927b610a 100644 --- a/hw/virtio/vhost-user-gpio.c +++ b/hw/virtio/vhost-user-gpio.c @@ -16,6 +16,7 @@ #include "trace.h" #define REALIZE_CONNECTION_RETRIES 3 +#define VHOST_NVQS 2 /* Features required from VirtIO */ static const int feature_bits[] = { @@ -208,8 +209,7 @@ static void do_vhost_user_cleanup(VirtIODevice *vdev, VHostUserGPIO *gpio) { virtio_delete_queue(gpio->command_vq); virtio_delete_queue(gpio->interrupt_vq); - g_free(gpio->vhost_dev.vqs); - gpio->vhost_dev.vqs = NULL; + g_free(gpio->vhost_vqs); virtio_cleanup(vdev); vhost_user_cleanup(&gpio->vhost_user); } @@ -229,6 +229,9 @@ static int vu_gpio_connect(DeviceState *dev, Error **errp) vhost_dev_set_config_notifier(vhost_dev, &gpio_ops); gpio->vhost_user.supports_config = true; + gpio->vhost_dev.nvqs = VHOST_NVQS; + gpio->vhost_dev.vqs = gpio->vhost_vqs; + ret = vhost_dev_init(vhost_dev, &gpio->vhost_user, VHOST_BACKEND_TYPE_USER, 0, errp); if (ret < 0) { @@ -347,10 +350,9 @@ static void vu_gpio_device_realize(DeviceState *dev, Error **errp) virtio_init(vdev, VIRTIO_ID_GPIO, sizeof(gpio->config)); - gpio->vhost_dev.nvqs = 2; gpio->command_vq = virtio_add_queue(vdev, 256, vu_gpio_handle_output); gpio->interrupt_vq = virtio_add_queue(vdev, 256, vu_gpio_handle_output); - gpio->vhost_dev.vqs = g_new0(struct vhost_virtqueue, gpio->vhost_dev.nvqs); + gpio->vhost_vqs = g_new0(struct vhost_virtqueue, VHOST_NVQS); gpio->connected = false; diff --git a/hw/virtio/vhost-user-i2c.c b/hw/virtio/vhost-user-i2c.c index dc5c828ba6..60eaf0d95b 100644 --- a/hw/virtio/vhost-user-i2c.c +++ b/hw/virtio/vhost-user-i2c.c @@ -143,8 +143,6 @@ static void do_vhost_user_cleanup(VirtIODevice *vdev, VHostUserI2C *i2c) vhost_user_cleanup(&i2c->vhost_user); virtio_delete_queue(i2c->vq); virtio_cleanup(vdev); - g_free(i2c->vhost_dev.vqs); - i2c->vhost_dev.vqs = NULL; } static int vu_i2c_connect(DeviceState *dev) @@ -228,6 +226,7 @@ static void vu_i2c_device_realize(DeviceState *dev, Error **errp) ret = vhost_dev_init(&i2c->vhost_dev, &i2c->vhost_user, VHOST_BACKEND_TYPE_USER, 0, errp); if (ret < 0) { + g_free(i2c->vhost_dev.vqs); do_vhost_user_cleanup(vdev, i2c); } @@ -239,10 +238,12 @@ static void vu_i2c_device_unrealize(DeviceState *dev) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); VHostUserI2C *i2c = VHOST_USER_I2C(dev); + struct vhost_virtqueue *vhost_vqs = i2c->vhost_dev.vqs; /* This will stop vhost backend if appropriate. */ vu_i2c_set_status(vdev, 0); vhost_dev_cleanup(&i2c->vhost_dev); + g_free(vhost_vqs); do_vhost_user_cleanup(vdev, i2c); } diff --git a/hw/virtio/vhost-user-rng.c b/hw/virtio/vhost-user-rng.c index 201a39e220..efc54cd3fb 100644 --- a/hw/virtio/vhost-user-rng.c +++ b/hw/virtio/vhost-user-rng.c @@ -229,6 +229,7 @@ static void vu_rng_device_realize(DeviceState *dev, Error **errp) return; vhost_dev_init_failed: + g_free(rng->vhost_dev.vqs); virtio_delete_queue(rng->req_vq); virtio_add_queue_failed: virtio_cleanup(vdev); @@ -239,12 +240,12 @@ static void vu_rng_device_unrealize(DeviceState *dev) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); VHostUserRNG *rng = VHOST_USER_RNG(dev); + struct vhost_virtqueue *vhost_vqs = rng->vhost_dev.vqs; vu_rng_set_status(vdev, 0); vhost_dev_cleanup(&rng->vhost_dev); - g_free(rng->vhost_dev.vqs); - rng->vhost_dev.vqs = NULL; + g_free(vhost_vqs); virtio_delete_queue(rng->req_vq); virtio_cleanup(vdev); vhost_user_cleanup(&rng->vhost_user); diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index e68daa35d4..8968541514 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -40,7 +40,7 @@ #define VHOST_MEMORY_BASELINE_NREGIONS 8 #define VHOST_USER_F_PROTOCOL_FEATURES 30 -#define VHOST_USER_SLAVE_MAX_FDS 8 +#define VHOST_USER_BACKEND_MAX_FDS 8 /* * Set maximum number of RAM slots supported to @@ -71,12 +71,12 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_RARP = 2, VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, VHOST_USER_PROTOCOL_F_NET_MTU = 4, - VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, + VHOST_USER_PROTOCOL_F_BACKEND_REQ = 5, VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, VHOST_USER_PROTOCOL_F_CONFIG = 9, - VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, + VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD = 10, VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, @@ -110,7 +110,7 @@ typedef enum VhostUserRequest { VHOST_USER_SET_VRING_ENABLE = 18, VHOST_USER_SEND_RARP = 19, VHOST_USER_NET_SET_MTU = 20, - VHOST_USER_SET_SLAVE_REQ_FD = 21, + VHOST_USER_SET_BACKEND_REQ_FD = 21, VHOST_USER_IOTLB_MSG = 22, VHOST_USER_SET_VRING_ENDIAN = 23, VHOST_USER_GET_CONFIG = 24, @@ -134,11 +134,11 @@ typedef enum VhostUserRequest { } VhostUserRequest; typedef enum VhostUserSlaveRequest { - VHOST_USER_SLAVE_NONE = 0, - VHOST_USER_SLAVE_IOTLB_MSG = 1, - VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, - VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, - VHOST_USER_SLAVE_MAX + VHOST_USER_BACKEND_NONE = 0, + VHOST_USER_BACKEND_IOTLB_MSG = 1, + VHOST_USER_BACKEND_CONFIG_CHANGE_MSG = 2, + VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG = 3, + VHOST_USER_BACKEND_MAX } VhostUserSlaveRequest; typedef struct VhostUserMemoryRegion { @@ -1638,13 +1638,13 @@ static gboolean slave_read(QIOChannel *ioc, GIOCondition condition, } switch (hdr.request) { - case VHOST_USER_SLAVE_IOTLB_MSG: + case VHOST_USER_BACKEND_IOTLB_MSG: ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); break; - case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG : + case VHOST_USER_BACKEND_CONFIG_CHANGE_MSG: ret = vhost_user_slave_handle_config_change(dev); break; - case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: + case VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG: ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area, fd ? fd[0] : -1); break; @@ -1696,7 +1696,7 @@ fdcleanup: static int vhost_setup_slave_channel(struct vhost_dev *dev) { VhostUserMsg msg = { - .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD, + .hdr.request = VHOST_USER_SET_BACKEND_REQ_FD, .hdr.flags = VHOST_USER_VERSION, }; struct vhost_user *u = dev->opaque; @@ -1707,7 +1707,7 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev) QIOChannel *ioc; if (!virtio_has_feature(dev->protocol_features, - VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { + VHOST_USER_PROTOCOL_F_BACKEND_REQ)) { return 0; } @@ -2065,7 +2065,7 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && !(virtio_has_feature(dev->protocol_features, - VHOST_USER_PROTOCOL_F_SLAVE_REQ) && + VHOST_USER_PROTOCOL_F_BACKEND_REQ) && virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_REPLY_ACK))) { error_setg(errp, "IOMMU support requires reply-ack and " diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 542e003101..df3a1e92ac 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -689,26 +689,11 @@ static int vhost_vdpa_get_device_id(struct vhost_dev *dev, return ret; } -static void vhost_vdpa_reset_svq(struct vhost_vdpa *v) -{ - if (!v->shadow_vqs_enabled) { - return; - } - - for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { - VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); - vhost_svq_stop(svq); - } -} - static int vhost_vdpa_reset_device(struct vhost_dev *dev) { - struct vhost_vdpa *v = dev->opaque; int ret; uint8_t status = 0; - vhost_vdpa_reset_svq(v); - ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); trace_vhost_vdpa_reset_device(dev, status); return ret; @@ -1100,6 +1085,8 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); + + vhost_svq_stop(svq); vhost_vdpa_svq_unmap_rings(dev, svq); event_notifier_cleanup(&svq->hdev_kick); diff --git a/hw/virtio/virtio-qmp.c b/hw/virtio/virtio-qmp.c index e4d4bece2d..b70148aba9 100644 --- a/hw/virtio/virtio-qmp.c +++ b/hw/virtio/virtio-qmp.c @@ -42,12 +42,12 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_RARP = 2, VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, VHOST_USER_PROTOCOL_F_NET_MTU = 4, - VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, + VHOST_USER_PROTOCOL_F_BACKEND_REQ = 5, VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, VHOST_USER_PROTOCOL_F_CONFIG = 9, - VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, + VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD = 10, VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, @@ -101,8 +101,8 @@ static const qmp_virtio_feature_map_t vhost_user_protocol_map[] = { "supported"), FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_NET_MTU, \ "VHOST_USER_PROTOCOL_F_NET_MTU: Expose host MTU to guest supported"), - FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_SLAVE_REQ, \ - "VHOST_USER_PROTOCOL_F_SLAVE_REQ: Socket fd for back-end initiated " + FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_BACKEND_REQ, \ + "VHOST_USER_PROTOCOL_F_BACKEND_REQ: Socket fd for back-end initiated " "requests supported"), FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_CROSS_ENDIAN, \ "VHOST_USER_PROTOCOL_F_CROSS_ENDIAN: Endianness of VQs for legacy " @@ -116,8 +116,8 @@ static const qmp_virtio_feature_map_t vhost_user_protocol_map[] = { FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_CONFIG, \ "VHOST_USER_PROTOCOL_F_CONFIG: Vhost-user messaging for virtio " "device configuration space supported"), - FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD, \ - "VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD: Slave fd communication " + FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD, \ + "VHOST_USER_PROTOCOL_F_BACKEND_SEND_FD: Slave fd communication " "channel supported"), FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_HOST_NOTIFIER, \ "VHOST_USER_PROTOCOL_F_HOST_NOTIFIER: Host notifiers for specified " diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index 8db0532632..85c93cffcf 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -57,6 +57,7 @@ #include <sys/ioctl.h> #include "hw/pci/pci.h" +#include "hw/pci/pci_bus.h" #include "hw/qdev-properties.h" #include "hw/qdev-properties-system.h" #include "hw/xen/xen.h" @@ -780,15 +781,6 @@ static void xen_pt_realize(PCIDevice *d, Error **errp) s->hostaddr.bus, s->hostaddr.slot, s->hostaddr.function, s->dev.devfn); - xen_host_pci_device_get(&s->real_device, - s->hostaddr.domain, s->hostaddr.bus, - s->hostaddr.slot, s->hostaddr.function, - errp); - if (*errp) { - error_append_hint(errp, "Failed to \"open\" the real pci device"); - return; - } - s->is_virtfn = s->real_device.is_virtfn; if (s->is_virtfn) { XEN_PT_LOG(d, "%04x:%02x:%02x.%d is a SR-IOV Virtual Function\n", @@ -803,8 +795,10 @@ static void xen_pt_realize(PCIDevice *d, Error **errp) s->io_listener = xen_pt_io_listener; /* Setup VGA bios for passthrough GFX */ - if ((s->real_device.domain == 0) && (s->real_device.bus == 0) && - (s->real_device.dev == 2) && (s->real_device.func == 0)) { + if ((s->real_device.domain == XEN_PCI_IGD_DOMAIN) && + (s->real_device.bus == XEN_PCI_IGD_BUS) && + (s->real_device.dev == XEN_PCI_IGD_DEV) && + (s->real_device.func == XEN_PCI_IGD_FN)) { if (!is_igd_vga_passthrough(&s->real_device)) { error_setg(errp, "Need to enable igd-passthru if you're trying" " to passthrough IGD GFX"); @@ -950,11 +944,58 @@ static void xen_pci_passthrough_instance_init(Object *obj) PCI_DEVICE(obj)->cap_present |= QEMU_PCI_CAP_EXPRESS; } +void xen_igd_reserve_slot(PCIBus *pci_bus) +{ + if (!xen_igd_gfx_pt_enabled()) { + return; + } + + XEN_PT_LOG(0, "Reserving PCI slot 2 for IGD\n"); + pci_bus->slot_reserved_mask |= XEN_PCI_IGD_SLOT_MASK; +} + +static void xen_igd_clear_slot(DeviceState *qdev, Error **errp) +{ + ERRP_GUARD(); + PCIDevice *pci_dev = (PCIDevice *)qdev; + XenPCIPassthroughState *s = XEN_PT_DEVICE(pci_dev); + XenPTDeviceClass *xpdc = XEN_PT_DEVICE_GET_CLASS(s); + PCIBus *pci_bus = pci_get_bus(pci_dev); + + xen_host_pci_device_get(&s->real_device, + s->hostaddr.domain, s->hostaddr.bus, + s->hostaddr.slot, s->hostaddr.function, + errp); + if (*errp) { + error_append_hint(errp, "Failed to \"open\" the real pci device"); + return; + } + + if (!(pci_bus->slot_reserved_mask & XEN_PCI_IGD_SLOT_MASK)) { + xpdc->pci_qdev_realize(qdev, errp); + return; + } + + if (is_igd_vga_passthrough(&s->real_device) && + s->real_device.domain == XEN_PCI_IGD_DOMAIN && + s->real_device.bus == XEN_PCI_IGD_BUS && + s->real_device.dev == XEN_PCI_IGD_DEV && + s->real_device.func == XEN_PCI_IGD_FN && + s->real_device.vendor_id == PCI_VENDOR_ID_INTEL) { + pci_bus->slot_reserved_mask &= ~XEN_PCI_IGD_SLOT_MASK; + XEN_PT_LOG(pci_dev, "Intel IGD found, using slot 2\n"); + } + xpdc->pci_qdev_realize(qdev, errp); +} + static void xen_pci_passthrough_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + XenPTDeviceClass *xpdc = XEN_PT_DEVICE_CLASS(klass); + xpdc->pci_qdev_realize = dc->realize; + dc->realize = xen_igd_clear_slot; k->realize = xen_pt_realize; k->exit = xen_pt_unregister_device; k->config_read = xen_pt_pci_read_config; @@ -977,6 +1018,7 @@ static const TypeInfo xen_pci_passthrough_info = { .instance_size = sizeof(XenPCIPassthroughState), .instance_finalize = xen_pci_passthrough_finalize, .class_init = xen_pci_passthrough_class_init, + .class_size = sizeof(XenPTDeviceClass), .instance_init = xen_pci_passthrough_instance_init, .interfaces = (InterfaceInfo[]) { { INTERFACE_CONVENTIONAL_PCI_DEVICE }, diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h index cf10fc7bbf..e184699740 100644 --- a/hw/xen/xen_pt.h +++ b/hw/xen/xen_pt.h @@ -40,7 +40,20 @@ typedef struct XenPTReg XenPTReg; #define TYPE_XEN_PT_DEVICE "xen-pci-passthrough" OBJECT_DECLARE_SIMPLE_TYPE(XenPCIPassthroughState, XEN_PT_DEVICE) +#define XEN_PT_DEVICE_CLASS(klass) \ + OBJECT_CLASS_CHECK(XenPTDeviceClass, klass, TYPE_XEN_PT_DEVICE) +#define XEN_PT_DEVICE_GET_CLASS(obj) \ + OBJECT_GET_CLASS(XenPTDeviceClass, obj, TYPE_XEN_PT_DEVICE) + +typedef void (*XenPTQdevRealize)(DeviceState *qdev, Error **errp); + +typedef struct XenPTDeviceClass { + PCIDeviceClass parent_class; + XenPTQdevRealize pci_qdev_realize; +} XenPTDeviceClass; + uint32_t igd_read_opregion(XenPCIPassthroughState *s); +void xen_igd_reserve_slot(PCIBus *pci_bus); void igd_write_opregion(XenPCIPassthroughState *s, uint32_t val); void xen_igd_passthrough_isa_bridge_create(XenPCIPassthroughState *s, XenHostPCIDevice *dev); @@ -75,6 +88,13 @@ typedef int (*xen_pt_conf_byte_read) #define XEN_PCI_INTEL_OPREGION 0xfc +#define XEN_PCI_IGD_DOMAIN 0 +#define XEN_PCI_IGD_BUS 0 +#define XEN_PCI_IGD_DEV 2 +#define XEN_PCI_IGD_FN 0 +#define XEN_PCI_IGD_SLOT_MASK \ + (1UL << PCI_SLOT(PCI_DEVFN(XEN_PCI_IGD_DEV, XEN_PCI_IGD_FN))) + typedef enum { XEN_PT_GRP_TYPE_HARDWIRED = 0, /* 0 Hardwired reg group */ XEN_PT_GRP_TYPE_EMU, /* emul reg group */ diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c index cde898b744..8b9b554352 100644 --- a/hw/xen/xen_pt_config_init.c +++ b/hw/xen/xen_pt_config_init.c @@ -1924,7 +1924,7 @@ static void xen_pt_config_reg_init(XenPCIPassthroughState *s, if (reg->init) { uint32_t host_mask, size_mask; unsigned int offset; - uint32_t val; + uint32_t val = 0; /* initialize emulate register */ rc = reg->init(s, reg_entry->reg, diff --git a/hw/xen/xen_pt_stub.c b/hw/xen/xen_pt_stub.c index 2d8cac8d54..5c108446a8 100644 --- a/hw/xen/xen_pt_stub.c +++ b/hw/xen/xen_pt_stub.c @@ -20,3 +20,7 @@ void xen_igd_gfx_pt_set(bool value, Error **errp) error_setg(errp, "Xen PCI passthrough support not built in"); } } + +void xen_igd_reserve_slot(PCIBus *pci_bus) +{ +} |