diff options
72 files changed, 3793 insertions, 1417 deletions
diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c index d50239e0e2..ba997f6cfe 100644 --- a/accel/tcg/tcg-accel-ops-mttcg.c +++ b/accel/tcg/tcg-accel-ops-mttcg.c @@ -70,6 +70,8 @@ static void *mttcg_cpu_thread_fn(void *arg) assert(tcg_enabled()); g_assert(!icount_enabled()); + tcg_cpu_init_cflags(cpu, current_machine->smp.max_cpus > 1); + rcu_register_thread(); force_rcu.notifier.notify = mttcg_force_rcu; force_rcu.cpu = cpu; @@ -139,9 +141,6 @@ void mttcg_start_vcpu_thread(CPUState *cpu) { char thread_name[VCPU_THREAD_NAME_SIZE]; - g_assert(tcg_enabled()); - tcg_cpu_init_cflags(cpu, current_machine->smp.max_cpus > 1); - cpu->thread = g_new0(QemuThread, 1); cpu->halt_cond = g_malloc0(sizeof(QemuCond)); qemu_cond_init(cpu->halt_cond); diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c index 1a72149f0e..cc8adc2380 100644 --- a/accel/tcg/tcg-accel-ops-rr.c +++ b/accel/tcg/tcg-accel-ops-rr.c @@ -152,7 +152,9 @@ static void *rr_cpu_thread_fn(void *arg) Notifier force_rcu; CPUState *cpu = arg; - assert(tcg_enabled()); + g_assert(tcg_enabled()); + tcg_cpu_init_cflags(cpu, false); + rcu_register_thread(); force_rcu.notify = rr_force_rcu; rcu_add_force_rcu_notifier(&force_rcu); @@ -275,9 +277,6 @@ void rr_start_vcpu_thread(CPUState *cpu) static QemuCond *single_tcg_halt_cond; static QemuThread *single_tcg_cpu_thread; - g_assert(tcg_enabled()); - tcg_cpu_init_cflags(cpu, false); - if (!single_tcg_cpu_thread) { cpu->thread = g_new0(QemuThread, 1); cpu->halt_cond = g_new0(QemuCond, 1); diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c index 684dc5a137..786d90c08f 100644 --- a/accel/tcg/tcg-accel-ops.c +++ b/accel/tcg/tcg-accel-ops.c @@ -97,16 +97,17 @@ static void tcg_accel_ops_init(AccelOpsClass *ops) ops->create_vcpu_thread = mttcg_start_vcpu_thread; ops->kick_vcpu_thread = mttcg_kick_vcpu_thread; ops->handle_interrupt = tcg_handle_interrupt; - } else if (icount_enabled()) { - ops->create_vcpu_thread = rr_start_vcpu_thread; - ops->kick_vcpu_thread = rr_kick_vcpu_thread; - ops->handle_interrupt = icount_handle_interrupt; - ops->get_virtual_clock = icount_get; - ops->get_elapsed_ticks = icount_get; } else { ops->create_vcpu_thread = rr_start_vcpu_thread; ops->kick_vcpu_thread = rr_kick_vcpu_thread; - ops->handle_interrupt = tcg_handle_interrupt; + + if (icount_enabled()) { + ops->handle_interrupt = icount_handle_interrupt; + ops->get_virtual_clock = icount_get; + ops->get_elapsed_ticks = icount_get; + } else { + ops->handle_interrupt = tcg_handle_interrupt; + } } } diff --git a/docs/system/devices/nvme.rst b/docs/system/devices/nvme.rst index b5acb2a9c1..aba253304e 100644 --- a/docs/system/devices/nvme.rst +++ b/docs/system/devices/nvme.rst @@ -239,3 +239,85 @@ The virtual namespace device supports DIF- and DIX-based protection information to ``1`` to transfer protection information as the first eight bytes of metadata. Otherwise, the protection information is transferred as the last eight bytes. + +Virtualization Enhancements and SR-IOV (Experimental Support) +------------------------------------------------------------- + +The ``nvme`` device supports Single Root I/O Virtualization and Sharing +along with Virtualization Enhancements. The controller has to be linked to +an NVM Subsystem device (``nvme-subsys``) for use with SR-IOV. + +A number of parameters are present (**please note, that they may be +subject to change**): + +``sriov_max_vfs`` (default: ``0``) + Indicates the maximum number of PCIe virtual functions supported + by the controller. Specifying a non-zero value enables reporting of both + SR-IOV and ARI (Alternative Routing-ID Interpretation) capabilities + by the NVMe device. Virtual function controllers will not report SR-IOV. + +``sriov_vq_flexible`` + Indicates the total number of flexible queue resources assignable to all + the secondary controllers. Implicitly sets the number of primary + controller's private resources to ``(max_ioqpairs - sriov_vq_flexible)``. + +``sriov_vi_flexible`` + Indicates the total number of flexible interrupt resources assignable to + all the secondary controllers. Implicitly sets the number of primary + controller's private resources to ``(msix_qsize - sriov_vi_flexible)``. + +``sriov_max_vi_per_vf`` (default: ``0``) + Indicates the maximum number of virtual interrupt resources assignable + to a secondary controller. The default ``0`` resolves to + ``(sriov_vi_flexible / sriov_max_vfs)`` + +``sriov_max_vq_per_vf`` (default: ``0``) + Indicates the maximum number of virtual queue resources assignable to + a secondary controller. The default ``0`` resolves to + ``(sriov_vq_flexible / sriov_max_vfs)`` + +The simplest possible invocation enables the capability to set up one VF +controller and assign an admin queue, an IO queue, and a MSI-X interrupt. + +.. code-block:: console + + -device nvme-subsys,id=subsys0 + -device nvme,serial=deadbeef,subsys=subsys0,sriov_max_vfs=1, + sriov_vq_flexible=2,sriov_vi_flexible=1 + +The minimum steps required to configure a functional NVMe secondary +controller are: + + * unbind flexible resources from the primary controller + +.. code-block:: console + + nvme virt-mgmt /dev/nvme0 -c 0 -r 1 -a 1 -n 0 + nvme virt-mgmt /dev/nvme0 -c 0 -r 0 -a 1 -n 0 + + * perform a Function Level Reset on the primary controller to actually + release the resources + +.. code-block:: console + + echo 1 > /sys/bus/pci/devices/0000:01:00.0/reset + + * enable VF + +.. code-block:: console + + echo 1 > /sys/bus/pci/devices/0000:01:00.0/sriov_numvfs + + * assign the flexible resources to the VF and set it ONLINE + +.. code-block:: console + + nvme virt-mgmt /dev/nvme0 -c 1 -r 1 -a 8 -n 1 + nvme virt-mgmt /dev/nvme0 -c 1 -r 0 -a 8 -n 2 + nvme virt-mgmt /dev/nvme0 -c 1 -r 0 -a 9 -n 0 + + * bind the NVMe driver to the VF + +.. code-block:: console + + echo 0000:01:00.1 > /sys/bus/pci/drivers/nvme/bind \ No newline at end of file diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c index bf65bbea49..84d75e6b84 100644 --- a/hw/acpi/pcihp.c +++ b/hw/acpi/pcihp.c @@ -192,8 +192,12 @@ static bool acpi_pcihp_pc_no_hotplug(AcpiPciHpState *s, PCIDevice *dev) * ACPI doesn't allow hotplug of bridge devices. Don't allow * hot-unplug of bridge devices unless they were added by hotplug * (and so, not described by acpi). + * + * Don't allow hot-unplug of SR-IOV Virtual Functions, as they + * will be removed implicitly, when Physical Function is unplugged. */ - return (pc->is_bridge && !dev->qdev.hotplugged) || !dc->hotpluggable; + return (pc->is_bridge && !dev->qdev.hotplugged) || !dc->hotpluggable || + pci_is_vf(dev); } static void acpi_pcihp_eject_slot(AcpiPciHpState *s, unsigned bsel, unsigned slots) diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c index 98dc185acd..a06f7c1b62 100644 --- a/hw/arm/aspeed.c +++ b/hw/arm/aspeed.c @@ -519,10 +519,6 @@ static void ast2500_evb_i2c_init(AspeedMachineState *bmc) /* The AST2500 EVB expects a LM75 but a TMP105 is compatible */ i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 7), TYPE_TMP105, 0x4d); - - /* The AST2500 EVB does not have an RTC. Let's pretend that one is - * plugged on the I2C bus header */ - i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 11), "ds1338", 0x32); } static void ast2600_evb_i2c_init(AspeedMachineState *bmc) @@ -1401,6 +1397,18 @@ static void aspeed_minibmc_machine_init(MachineState *machine) AST1030_INTERNAL_FLASH_SIZE); } +static void ast1030_evb_i2c_init(AspeedMachineState *bmc) +{ + AspeedSoCState *soc = &bmc->soc; + + /* U10 24C08 connects to SDA/SCL Groupt 1 by default */ + uint8_t *eeprom_buf = g_malloc0(32 * 1024); + smbus_eeprom_init_one(aspeed_i2c_get_bus(&soc->i2c, 0), 0x50, eeprom_buf); + + /* U11 LM75 connects to SDA/SCL Group 2 by default */ + i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, 1), "tmp105", 0x4d); +} + static void aspeed_minibmc_machine_ast1030_evb_class_init(ObjectClass *oc, void *data) { @@ -1412,6 +1420,7 @@ static void aspeed_minibmc_machine_ast1030_evb_class_init(ObjectClass *oc, amc->hw_strap1 = 0; amc->hw_strap2 = 0; mc->init = aspeed_minibmc_machine_init; + amc->i2c_init = ast1030_evb_i2c_init; mc->default_ram_size = 0; mc->default_cpus = mc->min_cpus = mc->max_cpus = 1; amc->fmc_model = "sst25vf032b"; diff --git a/hw/arm/aspeed_ast10x0.c b/hw/arm/aspeed_ast10x0.c index d534541684..5df480a21f 100644 --- a/hw/arm/aspeed_ast10x0.c +++ b/hw/arm/aspeed_ast10x0.c @@ -114,6 +114,9 @@ static void aspeed_soc_ast1030_init(Object *obj) object_property_add_alias(obj, "hw-strap1", OBJECT(&s->scu), "hw-strap1"); object_property_add_alias(obj, "hw-strap2", OBJECT(&s->scu), "hw-strap2"); + snprintf(typename, sizeof(typename), "aspeed.i2c-%s", socname); + object_initialize_child(obj, "i2c", &s->i2c, typename); + snprintf(typename, sizeof(typename), "aspeed.timer-%s", socname); object_initialize_child(obj, "timerctrl", &s->timerctrl, typename); @@ -188,6 +191,21 @@ static void aspeed_soc_ast1030_realize(DeviceState *dev_soc, Error **errp) } sysbus_mmio_map(SYS_BUS_DEVICE(&s->scu), 0, sc->memmap[ASPEED_DEV_SCU]); + /* I2C */ + + object_property_set_link(OBJECT(&s->i2c), "dram", OBJECT(&s->sram), + &error_abort); + if (!sysbus_realize(SYS_BUS_DEVICE(&s->i2c), errp)) { + return; + } + sysbus_mmio_map(SYS_BUS_DEVICE(&s->i2c), 0, sc->memmap[ASPEED_DEV_I2C]); + for (i = 0; i < ASPEED_I2C_GET_CLASS(&s->i2c)->num_busses; i++) { + qemu_irq irq = qdev_get_gpio_in(DEVICE(&s->armv7m), + sc->irqmap[ASPEED_DEV_I2C] + i); + /* The AST1030 I2C controller has one IRQ per bus. */ + sysbus_connect_irq(SYS_BUS_DEVICE(&s->i2c.busses[i]), 0, irq); + } + /* LPC */ if (!sysbus_realize(SYS_BUS_DEVICE(&s->lpc), errp)) { return; diff --git a/hw/i2c/aspeed_i2c.c b/hw/i2c/aspeed_i2c.c index 03a4f5a910..37ae1f2e04 100644 --- a/hw/i2c/aspeed_i2c.c +++ b/hw/i2c/aspeed_i2c.c @@ -21,6 +21,7 @@ #include "qemu/osdep.h" #include "hw/sysbus.h" #include "migration/vmstate.h" +#include "qemu/cutils.h" #include "qemu/log.h" #include "qemu/module.h" #include "qemu/error-report.h" @@ -28,196 +29,86 @@ #include "hw/i2c/aspeed_i2c.h" #include "hw/irq.h" #include "hw/qdev-properties.h" +#include "hw/registerfields.h" #include "trace.h" -/* I2C Global Register */ - -#define I2C_CTRL_STATUS 0x00 /* Device Interrupt Status */ -#define I2C_CTRL_ASSIGN 0x08 /* Device Interrupt Target - Assignment */ -#define I2C_CTRL_GLOBAL 0x0C /* Global Control Register */ -#define I2C_CTRL_SRAM_EN BIT(0) - -/* I2C Device (Bus) Register */ - -#define I2CD_FUN_CTRL_REG 0x00 /* I2CD Function Control */ -#define I2CD_POOL_PAGE_SEL(x) (((x) >> 20) & 0x7) /* AST2400 */ -#define I2CD_M_SDA_LOCK_EN (0x1 << 16) -#define I2CD_MULTI_MASTER_DIS (0x1 << 15) -#define I2CD_M_SCL_DRIVE_EN (0x1 << 14) -#define I2CD_MSB_STS (0x1 << 9) -#define I2CD_SDA_DRIVE_1T_EN (0x1 << 8) -#define I2CD_M_SDA_DRIVE_1T_EN (0x1 << 7) -#define I2CD_M_HIGH_SPEED_EN (0x1 << 6) -#define I2CD_DEF_ADDR_EN (0x1 << 5) -#define I2CD_DEF_ALERT_EN (0x1 << 4) -#define I2CD_DEF_ARP_EN (0x1 << 3) -#define I2CD_DEF_GCALL_EN (0x1 << 2) -#define I2CD_SLAVE_EN (0x1 << 1) -#define I2CD_MASTER_EN (0x1) - -#define I2CD_AC_TIMING_REG1 0x04 /* Clock and AC Timing Control #1 */ -#define I2CD_AC_TIMING_REG2 0x08 /* Clock and AC Timing Control #1 */ -#define I2CD_INTR_CTRL_REG 0x0c /* I2CD Interrupt Control */ -#define I2CD_INTR_STS_REG 0x10 /* I2CD Interrupt Status */ - -#define I2CD_INTR_SLAVE_ADDR_MATCH (0x1 << 31) /* 0: addr1 1: addr2 */ -#define I2CD_INTR_SLAVE_ADDR_RX_PENDING (0x1 << 30) -/* bits[19-16] Reserved */ - -/* All bits below are cleared by writing 1 */ -#define I2CD_INTR_SLAVE_INACTIVE_TIMEOUT (0x1 << 15) -#define I2CD_INTR_SDA_DL_TIMEOUT (0x1 << 14) -#define I2CD_INTR_BUS_RECOVER_DONE (0x1 << 13) -#define I2CD_INTR_SMBUS_ALERT (0x1 << 12) /* Bus [0-3] only */ -#define I2CD_INTR_SMBUS_ARP_ADDR (0x1 << 11) /* Removed */ -#define I2CD_INTR_SMBUS_DEV_ALERT_ADDR (0x1 << 10) /* Removed */ -#define I2CD_INTR_SMBUS_DEF_ADDR (0x1 << 9) /* Removed */ -#define I2CD_INTR_GCALL_ADDR (0x1 << 8) /* Removed */ -#define I2CD_INTR_SLAVE_ADDR_RX_MATCH (0x1 << 7) /* use RX_DONE */ -#define I2CD_INTR_SCL_TIMEOUT (0x1 << 6) -#define I2CD_INTR_ABNORMAL (0x1 << 5) -#define I2CD_INTR_NORMAL_STOP (0x1 << 4) -#define I2CD_INTR_ARBIT_LOSS (0x1 << 3) -#define I2CD_INTR_RX_DONE (0x1 << 2) -#define I2CD_INTR_TX_NAK (0x1 << 1) -#define I2CD_INTR_TX_ACK (0x1 << 0) - -#define I2CD_CMD_REG 0x14 /* I2CD Command/Status */ -#define I2CD_SDA_OE (0x1 << 28) -#define I2CD_SDA_O (0x1 << 27) -#define I2CD_SCL_OE (0x1 << 26) -#define I2CD_SCL_O (0x1 << 25) -#define I2CD_TX_TIMING (0x1 << 24) -#define I2CD_TX_STATUS (0x1 << 23) - -#define I2CD_TX_STATE_SHIFT 19 /* Tx State Machine */ -#define I2CD_TX_STATE_MASK 0xf -#define I2CD_IDLE 0x0 -#define I2CD_MACTIVE 0x8 -#define I2CD_MSTART 0x9 -#define I2CD_MSTARTR 0xa -#define I2CD_MSTOP 0xb -#define I2CD_MTXD 0xc -#define I2CD_MRXACK 0xd -#define I2CD_MRXD 0xe -#define I2CD_MTXACK 0xf -#define I2CD_SWAIT 0x1 -#define I2CD_SRXD 0x4 -#define I2CD_STXACK 0x5 -#define I2CD_STXD 0x6 -#define I2CD_SRXACK 0x7 -#define I2CD_RECOVER 0x3 - -#define I2CD_SCL_LINE_STS (0x1 << 18) -#define I2CD_SDA_LINE_STS (0x1 << 17) -#define I2CD_BUS_BUSY_STS (0x1 << 16) -#define I2CD_SDA_OE_OUT_DIR (0x1 << 15) -#define I2CD_SDA_O_OUT_DIR (0x1 << 14) -#define I2CD_SCL_OE_OUT_DIR (0x1 << 13) -#define I2CD_SCL_O_OUT_DIR (0x1 << 12) -#define I2CD_BUS_RECOVER_CMD_EN (0x1 << 11) -#define I2CD_S_ALT_EN (0x1 << 10) - -/* Command Bit */ -#define I2CD_RX_DMA_ENABLE (0x1 << 9) -#define I2CD_TX_DMA_ENABLE (0x1 << 8) -#define I2CD_RX_BUFF_ENABLE (0x1 << 7) -#define I2CD_TX_BUFF_ENABLE (0x1 << 6) -#define I2CD_M_STOP_CMD (0x1 << 5) -#define I2CD_M_S_RX_CMD_LAST (0x1 << 4) -#define I2CD_M_RX_CMD (0x1 << 3) -#define I2CD_S_TX_CMD (0x1 << 2) -#define I2CD_M_TX_CMD (0x1 << 1) -#define I2CD_M_START_CMD (0x1) - -#define I2CD_DEV_ADDR_REG 0x18 /* Slave Device Address */ -#define I2CD_POOL_CTRL_REG 0x1c /* Pool Buffer Control */ -#define I2CD_POOL_RX_COUNT(x) (((x) >> 24) & 0xff) -#define I2CD_POOL_RX_SIZE(x) ((((x) >> 16) & 0xff) + 1) -#define I2CD_POOL_TX_COUNT(x) ((((x) >> 8) & 0xff) + 1) -#define I2CD_POOL_OFFSET(x) (((x) & 0x3f) << 2) /* AST2400 */ -#define I2CD_BYTE_BUF_REG 0x20 /* Transmit/Receive Byte Buffer */ -#define I2CD_BYTE_BUF_TX_SHIFT 0 -#define I2CD_BYTE_BUF_TX_MASK 0xff -#define I2CD_BYTE_BUF_RX_SHIFT 8 -#define I2CD_BYTE_BUF_RX_MASK 0xff -#define I2CD_DMA_ADDR 0x24 /* DMA Buffer Address */ -#define I2CD_DMA_LEN 0x28 /* DMA Transfer Length < 4KB */ - -static inline bool aspeed_i2c_bus_is_master(AspeedI2CBus *bus) -{ - return bus->ctrl & I2CD_MASTER_EN; -} - -static inline bool aspeed_i2c_bus_is_enabled(AspeedI2CBus *bus) -{ - return bus->ctrl & (I2CD_MASTER_EN | I2CD_SLAVE_EN); -} +/* Enable SLAVE_ADDR_RX_MATCH always */ +#define R_I2CD_INTR_STS_ALWAYS_ENABLE R_I2CD_INTR_STS_SLAVE_ADDR_RX_MATCH_MASK static inline void aspeed_i2c_bus_raise_interrupt(AspeedI2CBus *bus) { AspeedI2CClass *aic = ASPEED_I2C_GET_CLASS(bus->controller); + uint32_t reg_intr_sts = aspeed_i2c_bus_intr_sts_offset(bus); + uint32_t intr_ctrl_reg = aspeed_i2c_bus_intr_ctrl_offset(bus); + uint32_t intr_ctrl_mask = bus->regs[intr_ctrl_reg] | + R_I2CD_INTR_STS_ALWAYS_ENABLE; + bool raise_irq; + + if (trace_event_get_state_backends(TRACE_ASPEED_I2C_BUS_RAISE_INTERRUPT)) { + g_autofree char *buf = g_strdup_printf("%s%s%s%s%s%s%s", + aspeed_i2c_bus_pkt_mode_en(bus) && + ARRAY_FIELD_EX32(bus->regs, I2CM_INTR_STS, PKT_CMD_DONE) ? + "pktdone|" : "", + SHARED_ARRAY_FIELD_EX32(bus->regs, reg_intr_sts, TX_NAK) ? + "nak|" : "", + SHARED_ARRAY_FIELD_EX32(bus->regs, reg_intr_sts, TX_ACK) ? + "ack|" : "", + SHARED_ARRAY_FIELD_EX32(bus->regs, reg_intr_sts, RX_DONE) ? + "done|" : "", + ARRAY_FIELD_EX32(bus->regs, I2CD_INTR_STS, SLAVE_ADDR_RX_MATCH) ? + "slave-match|" : "", + SHARED_ARRAY_FIELD_EX32(bus->regs, reg_intr_sts, NORMAL_STOP) ? + "normal|" : "", + SHARED_ARRAY_FIELD_EX32(bus->regs, reg_intr_sts, ABNORMAL) ? + "abnormal" : ""); + + trace_aspeed_i2c_bus_raise_interrupt(bus->regs[reg_intr_sts], buf); + } - trace_aspeed_i2c_bus_raise_interrupt(bus->intr_status, - bus->intr_status & I2CD_INTR_TX_NAK ? "nak|" : "", - bus->intr_status & I2CD_INTR_TX_ACK ? "ack|" : "", - bus->intr_status & I2CD_INTR_RX_DONE ? "done|" : "", - bus->intr_status & I2CD_INTR_NORMAL_STOP ? "normal|" : "", - bus->intr_status & I2CD_INTR_ABNORMAL ? "abnormal" : ""); + raise_irq = bus->regs[reg_intr_sts] & intr_ctrl_mask ; - bus->intr_status &= bus->intr_ctrl; - if (bus->intr_status) { + /* In packet mode we don't mask off INTR_STS */ + if (!aspeed_i2c_bus_pkt_mode_en(bus)) { + bus->regs[reg_intr_sts] &= intr_ctrl_mask; + } + + if (raise_irq) { bus->controller->intr_status |= 1 << bus->id; qemu_irq_raise(aic->bus_get_irq(bus)); } } -static uint64_t aspeed_i2c_bus_read(void *opaque, hwaddr offset, - unsigned size) +static uint64_t aspeed_i2c_bus_old_read(AspeedI2CBus *bus, hwaddr offset, + unsigned size) { - AspeedI2CBus *bus = opaque; AspeedI2CClass *aic = ASPEED_I2C_GET_CLASS(bus->controller); - uint64_t value = -1; + uint64_t value = bus->regs[offset / sizeof(*bus->regs)]; switch (offset) { - case I2CD_FUN_CTRL_REG: - value = bus->ctrl; + case A_I2CD_FUN_CTRL: + case A_I2CD_AC_TIMING1: + case A_I2CD_AC_TIMING2: + case A_I2CD_INTR_CTRL: + case A_I2CD_INTR_STS: + case A_I2CD_DEV_ADDR: + case A_I2CD_POOL_CTRL: + case A_I2CD_BYTE_BUF: + /* Value is already set, don't do anything. */ break; - case I2CD_AC_TIMING_REG1: - value = bus->timing[0]; + case A_I2CD_CMD: + value = SHARED_FIELD_DP32(value, BUS_BUSY_STS, i2c_bus_busy(bus->bus)); break; - case I2CD_AC_TIMING_REG2: - value = bus->timing[1]; - break; - case I2CD_INTR_CTRL_REG: - value = bus->intr_ctrl; - break; - case I2CD_INTR_STS_REG: - value = bus->intr_status; - break; - case I2CD_POOL_CTRL_REG: - value = bus->pool_ctrl; - break; - case I2CD_BYTE_BUF_REG: - value = bus->buf; - break; - case I2CD_CMD_REG: - value = bus->cmd | (i2c_bus_busy(bus->bus) << 16); - break; - case I2CD_DMA_ADDR: + case A_I2CD_DMA_ADDR: if (!aic->has_dma) { qemu_log_mask(LOG_GUEST_ERROR, "%s: No DMA support\n", __func__); - break; + value = -1; } - value = bus->dma_addr; break; - case I2CD_DMA_LEN: + case A_I2CD_DMA_LEN: if (!aic->has_dma) { qemu_log_mask(LOG_GUEST_ERROR, "%s: No DMA support\n", __func__); - break; + value = -1; } - value = bus->dma_len; break; default: @@ -231,32 +122,86 @@ static uint64_t aspeed_i2c_bus_read(void *opaque, hwaddr offset, return value; } +static uint64_t aspeed_i2c_bus_new_read(AspeedI2CBus *bus, hwaddr offset, + unsigned size) +{ + uint64_t value = bus->regs[offset / sizeof(*bus->regs)]; + + switch (offset) { + case A_I2CC_FUN_CTRL: + case A_I2CC_AC_TIMING: + case A_I2CC_POOL_CTRL: + case A_I2CM_INTR_CTRL: + case A_I2CM_INTR_STS: + case A_I2CC_MS_TXRX_BYTE_BUF: + case A_I2CM_DMA_LEN: + case A_I2CM_DMA_TX_ADDR: + case A_I2CM_DMA_RX_ADDR: + case A_I2CM_DMA_LEN_STS: + case A_I2CC_DMA_ADDR: + case A_I2CC_DMA_LEN: + /* Value is already set, don't do anything. */ + break; + case A_I2CM_CMD: + value = SHARED_FIELD_DP32(value, BUS_BUSY_STS, i2c_bus_busy(bus->bus)); + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Bad offset 0x%" HWADDR_PRIx "\n", __func__, offset); + value = -1; + break; + } + + trace_aspeed_i2c_bus_read(bus->id, offset, size, value); + return value; +} + +static uint64_t aspeed_i2c_bus_read(void *opaque, hwaddr offset, + unsigned size) +{ + AspeedI2CBus *bus = opaque; + if (aspeed_i2c_is_new_mode(bus->controller)) { + return aspeed_i2c_bus_new_read(bus, offset, size); + } + return aspeed_i2c_bus_old_read(bus, offset, size); +} + static void aspeed_i2c_set_state(AspeedI2CBus *bus, uint8_t state) { - bus->cmd &= ~(I2CD_TX_STATE_MASK << I2CD_TX_STATE_SHIFT); - bus->cmd |= (state & I2CD_TX_STATE_MASK) << I2CD_TX_STATE_SHIFT; + if (aspeed_i2c_is_new_mode(bus->controller)) { + SHARED_ARRAY_FIELD_DP32(bus->regs, R_I2CC_MS_TXRX_BYTE_BUF, TX_STATE, + state); + } else { + SHARED_ARRAY_FIELD_DP32(bus->regs, R_I2CD_CMD, TX_STATE, state); + } } static uint8_t aspeed_i2c_get_state(AspeedI2CBus *bus) { - return (bus->cmd >> I2CD_TX_STATE_SHIFT) & I2CD_TX_STATE_MASK; + if (aspeed_i2c_is_new_mode(bus->controller)) { + return SHARED_ARRAY_FIELD_EX32(bus->regs, R_I2CC_MS_TXRX_BYTE_BUF, + TX_STATE); + } + return SHARED_ARRAY_FIELD_EX32(bus->regs, R_I2CD_CMD, TX_STATE); } static int aspeed_i2c_dma_read(AspeedI2CBus *bus, uint8_t *data) { MemTxResult result; AspeedI2CState *s = bus->controller; + uint32_t reg_dma_addr = aspeed_i2c_bus_dma_addr_offset(bus); + uint32_t reg_dma_len = aspeed_i2c_bus_dma_len_offset(bus); - result = address_space_read(&s->dram_as, bus->dma_addr, + result = address_space_read(&s->dram_as, bus->regs[reg_dma_addr], MEMTXATTRS_UNSPECIFIED, data, 1); if (result != MEMTX_OK) { qemu_log_mask(LOG_GUEST_ERROR, "%s: DRAM read failed @%08x\n", - __func__, bus->dma_addr); + __func__, bus->regs[reg_dma_addr]); return -1; } - bus->dma_addr++; - bus->dma_len--; + bus->regs[reg_dma_addr]++; + bus->regs[reg_dma_len]--; return 0; } @@ -265,34 +210,51 @@ static int aspeed_i2c_bus_send(AspeedI2CBus *bus, uint8_t pool_start) AspeedI2CClass *aic = ASPEED_I2C_GET_CLASS(bus->controller); int ret = -1; int i; - - if (bus->cmd & I2CD_TX_BUFF_ENABLE) { - for (i = pool_start; i < I2CD_POOL_TX_COUNT(bus->pool_ctrl); i++) { + uint32_t reg_cmd = aspeed_i2c_bus_cmd_offset(bus); + uint32_t reg_pool_ctrl = aspeed_i2c_bus_pool_ctrl_offset(bus); + uint32_t reg_byte_buf = aspeed_i2c_bus_byte_buf_offset(bus); + uint32_t reg_dma_len = aspeed_i2c_bus_dma_len_offset(bus); + int pool_tx_count = SHARED_ARRAY_FIELD_EX32(bus->regs, reg_pool_ctrl, + TX_COUNT); + + if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, TX_BUFF_EN)) { + for (i = pool_start; i < pool_tx_count; i++) { uint8_t *pool_base = aic->bus_pool_base(bus); - trace_aspeed_i2c_bus_send("BUF", i + 1, - I2CD_POOL_TX_COUNT(bus->pool_ctrl), + trace_aspeed_i2c_bus_send("BUF", i + 1, pool_tx_count, pool_base[i]); ret = i2c_send(bus->bus, pool_base[i]); if (ret) { break; } } - bus->cmd &= ~I2CD_TX_BUFF_ENABLE; - } else if (bus->cmd & I2CD_TX_DMA_ENABLE) { - while (bus->dma_len) { + SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, TX_BUFF_EN, 0); + } else if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, TX_DMA_EN)) { + /* In new mode, clear how many bytes we TXed */ + if (aspeed_i2c_is_new_mode(bus->controller)) { + ARRAY_FIELD_DP32(bus->regs, I2CM_DMA_LEN_STS, TX_LEN, 0); + } + while (bus->regs[reg_dma_len]) { uint8_t data; aspeed_i2c_dma_read(bus, &data); - trace_aspeed_i2c_bus_send("DMA", bus->dma_len, bus->dma_len, data); + trace_aspeed_i2c_bus_send("DMA", bus->regs[reg_dma_len], + bus->regs[reg_dma_len], data); ret = i2c_send(bus->bus, data); if (ret) { break; } + /* In new mode, keep track of how many bytes we TXed */ + if (aspeed_i2c_is_new_mode(bus->controller)) { + ARRAY_FIELD_DP32(bus->regs, I2CM_DMA_LEN_STS, TX_LEN, + ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN_STS, + TX_LEN) + 1); + } } - bus->cmd &= ~I2CD_TX_DMA_ENABLE; + SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, TX_DMA_EN, 0); } else { - trace_aspeed_i2c_bus_send("BYTE", pool_start, 1, bus->buf); - ret = i2c_send(bus->bus, bus->buf); + trace_aspeed_i2c_bus_send("BYTE", pool_start, 1, + bus->regs[reg_byte_buf]); + ret = i2c_send(bus->bus, bus->regs[reg_byte_buf]); } return ret; @@ -304,74 +266,100 @@ static void aspeed_i2c_bus_recv(AspeedI2CBus *bus) AspeedI2CClass *aic = ASPEED_I2C_GET_CLASS(s); uint8_t data; int i; - - if (bus->cmd & I2CD_RX_BUFF_ENABLE) { + uint32_t reg_cmd = aspeed_i2c_bus_cmd_offset(bus); + uint32_t reg_pool_ctrl = aspeed_i2c_bus_pool_ctrl_offset(bus); + uint32_t reg_byte_buf = aspeed_i2c_bus_byte_buf_offset(bus); + uint32_t reg_dma_len = aspeed_i2c_bus_dma_len_offset(bus); + uint32_t reg_dma_addr = aspeed_i2c_bus_dma_addr_offset(bus); + int pool_rx_count = SHARED_ARRAY_FIELD_EX32(bus->regs, reg_pool_ctrl, + RX_COUNT); + + if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, RX_BUFF_EN)) { uint8_t *pool_base = aic->bus_pool_base(bus); - for (i = 0; i < I2CD_POOL_RX_SIZE(bus->pool_ctrl); i++) { + for (i = 0; i < pool_rx_count; i++) { pool_base[i] = i2c_recv(bus->bus); - trace_aspeed_i2c_bus_recv("BUF", i + 1, - I2CD_POOL_RX_SIZE(bus->pool_ctrl), + trace_aspeed_i2c_bus_recv("BUF", i + 1, pool_rx_count, pool_base[i]); } /* Update RX count */ - bus->pool_ctrl &= ~(0xff << 24); - bus->pool_ctrl |= (i & 0xff) << 24; - bus->cmd &= ~I2CD_RX_BUFF_ENABLE; - } else if (bus->cmd & I2CD_RX_DMA_ENABLE) { + SHARED_ARRAY_FIELD_DP32(bus->regs, reg_pool_ctrl, RX_COUNT, i & 0xff); + SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, RX_BUFF_EN, 0); + } else if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, RX_DMA_EN)) { uint8_t data; + /* In new mode, clear how many bytes we RXed */ + if (aspeed_i2c_is_new_mode(bus->controller)) { + ARRAY_FIELD_DP32(bus->regs, I2CM_DMA_LEN_STS, RX_LEN, 0); + } - while (bus->dma_len) { + while (bus->regs[reg_dma_len]) { MemTxResult result; data = i2c_recv(bus->bus); - trace_aspeed_i2c_bus_recv("DMA", bus->dma_len, bus->dma_len, data); - result = address_space_write(&s->dram_as, bus->dma_addr, + trace_aspeed_i2c_bus_recv("DMA", bus->regs[reg_dma_len], + bus->regs[reg_dma_len], data); + result = address_space_write(&s->dram_as, bus->regs[reg_dma_addr], MEMTXATTRS_UNSPECIFIED, &data, 1); if (result != MEMTX_OK) { qemu_log_mask(LOG_GUEST_ERROR, "%s: DRAM write failed @%08x\n", - __func__, bus->dma_addr); + __func__, bus->regs[reg_dma_addr]); return; } - bus->dma_addr++; - bus->dma_len--; + bus->regs[reg_dma_addr]++; + bus->regs[reg_dma_len]--; + /* In new mode, keep track of how many bytes we RXed */ + if (aspeed_i2c_is_new_mode(bus->controller)) { + ARRAY_FIELD_DP32(bus->regs, I2CM_DMA_LEN_STS, RX_LEN, + ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN_STS, + RX_LEN) + 1); + } } - bus->cmd &= ~I2CD_RX_DMA_ENABLE; + SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, RX_DMA_EN, 0); } else { data = i2c_recv(bus->bus); - trace_aspeed_i2c_bus_recv("BYTE", 1, 1, bus->buf); - bus->buf = (data & I2CD_BYTE_BUF_RX_MASK) << I2CD_BYTE_BUF_RX_SHIFT; + trace_aspeed_i2c_bus_recv("BYTE", 1, 1, bus->regs[reg_byte_buf]); + SHARED_ARRAY_FIELD_DP32(bus->regs, reg_byte_buf, RX_BUF, data); } } static void aspeed_i2c_handle_rx_cmd(AspeedI2CBus *bus) { + uint32_t reg_cmd = aspeed_i2c_bus_cmd_offset(bus); + uint32_t reg_intr_sts = aspeed_i2c_bus_intr_sts_offset(bus); + aspeed_i2c_set_state(bus, I2CD_MRXD); aspeed_i2c_bus_recv(bus); - bus->intr_status |= I2CD_INTR_RX_DONE; - if (bus->cmd & I2CD_M_S_RX_CMD_LAST) { + SHARED_ARRAY_FIELD_DP32(bus->regs, reg_intr_sts, RX_DONE, 1); + if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, M_S_RX_CMD_LAST)) { i2c_nack(bus->bus); } - bus->cmd &= ~(I2CD_M_RX_CMD | I2CD_M_S_RX_CMD_LAST); + SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, M_RX_CMD, 0); + SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, M_S_RX_CMD_LAST, 0); aspeed_i2c_set_state(bus, I2CD_MACTIVE); } static uint8_t aspeed_i2c_get_addr(AspeedI2CBus *bus) { AspeedI2CClass *aic = ASPEED_I2C_GET_CLASS(bus->controller); + uint32_t reg_byte_buf = aspeed_i2c_bus_byte_buf_offset(bus); + uint32_t reg_cmd = aspeed_i2c_bus_cmd_offset(bus); - if (bus->cmd & I2CD_TX_BUFF_ENABLE) { + if (aspeed_i2c_bus_pkt_mode_en(bus)) { + return (ARRAY_FIELD_EX32(bus->regs, I2CM_CMD, PKT_DEV_ADDR) << 1) | + SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, M_RX_CMD); + } + if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, TX_BUFF_EN)) { uint8_t *pool_base = aic->bus_pool_base(bus); return pool_base[0]; - } else if (bus->cmd & I2CD_TX_DMA_ENABLE) { + } else if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, TX_DMA_EN)) { uint8_t data; aspeed_i2c_dma_read(bus, &data); return data; } else { - return bus->buf; + return bus->regs[reg_byte_buf]; } } @@ -379,7 +367,11 @@ static bool aspeed_i2c_check_sram(AspeedI2CBus *bus) { AspeedI2CState *s = bus->controller; AspeedI2CClass *aic = ASPEED_I2C_GET_CLASS(s); - + uint32_t reg_cmd = aspeed_i2c_bus_cmd_offset(bus); + bool dma_en = SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, RX_DMA_EN) || + SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, TX_DMA_EN) || + SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, RX_BUFF_EN) || + SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, TX_BUFF_EN); if (!aic->check_sram) { return true; } @@ -388,9 +380,7 @@ static bool aspeed_i2c_check_sram(AspeedI2CBus *bus) * AST2500: SRAM must be enabled before using the Buffer Pool or * DMA mode. */ - if (!(s->ctrl_global & I2C_CTRL_SRAM_EN) && - (bus->cmd & (I2CD_RX_DMA_ENABLE | I2CD_TX_DMA_ENABLE | - I2CD_RX_BUFF_ENABLE | I2CD_TX_BUFF_ENABLE))) { + if (!FIELD_EX32(s->ctrl_global, I2C_CTRL_GLOBAL, SRAM_EN) && dma_en) { qemu_log_mask(LOG_GUEST_ERROR, "%s: SRAM is not enabled\n", __func__); return false; } @@ -402,27 +392,31 @@ static void aspeed_i2c_bus_cmd_dump(AspeedI2CBus *bus) { g_autofree char *cmd_flags = NULL; uint32_t count; - - if (bus->cmd & (I2CD_RX_BUFF_ENABLE | I2CD_RX_BUFF_ENABLE)) { - count = I2CD_POOL_TX_COUNT(bus->pool_ctrl); - } else if (bus->cmd & (I2CD_RX_DMA_ENABLE | I2CD_RX_DMA_ENABLE)) { - count = bus->dma_len; + uint32_t reg_cmd = aspeed_i2c_bus_cmd_offset(bus); + uint32_t reg_pool_ctrl = aspeed_i2c_bus_pool_ctrl_offset(bus); + uint32_t reg_intr_sts = aspeed_i2c_bus_intr_sts_offset(bus); + uint32_t reg_dma_len = aspeed_i2c_bus_dma_len_offset(bus); + if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, RX_BUFF_EN)) { + count = SHARED_ARRAY_FIELD_EX32(bus->regs, reg_pool_ctrl, TX_COUNT); + } else if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, RX_DMA_EN)) { + count = bus->regs[reg_dma_len]; } else { /* BYTE mode */ count = 1; } cmd_flags = g_strdup_printf("%s%s%s%s%s%s%s%s%s", - bus->cmd & I2CD_M_START_CMD ? "start|" : "", - bus->cmd & I2CD_RX_DMA_ENABLE ? "rxdma|" : "", - bus->cmd & I2CD_TX_DMA_ENABLE ? "txdma|" : "", - bus->cmd & I2CD_RX_BUFF_ENABLE ? "rxbuf|" : "", - bus->cmd & I2CD_TX_BUFF_ENABLE ? "txbuf|" : "", - bus->cmd & I2CD_M_TX_CMD ? "tx|" : "", - bus->cmd & I2CD_M_RX_CMD ? "rx|" : "", - bus->cmd & I2CD_M_S_RX_CMD_LAST ? "last|" : "", - bus->cmd & I2CD_M_STOP_CMD ? "stop" : ""); - - trace_aspeed_i2c_bus_cmd(bus->cmd, cmd_flags, count, bus->intr_status); + SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, M_START_CMD) ? "start|" : "", + SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, RX_DMA_EN) ? "rxdma|" : "", + SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, TX_DMA_EN) ? "txdma|" : "", + SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, RX_BUFF_EN) ? "rxbuf|" : "", + SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, TX_BUFF_EN) ? "txbuf|" : "", + SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, M_TX_CMD) ? "tx|" : "", + SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, M_RX_CMD) ? "rx|" : "", + SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, M_S_RX_CMD_LAST) ? "last|" : "", + SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, M_STOP_CMD) ? "stop|" : ""); + + trace_aspeed_i2c_bus_cmd(bus->regs[reg_cmd], cmd_flags, count, + bus->regs[reg_intr_sts]); } /* @@ -432,9 +426,10 @@ static void aspeed_i2c_bus_cmd_dump(AspeedI2CBus *bus) static void aspeed_i2c_bus_handle_cmd(AspeedI2CBus *bus, uint64_t value) { uint8_t pool_start = 0; - - bus->cmd &= ~0xFFFF; - bus->cmd |= value & 0xFFFF; + uint32_t reg_intr_sts = aspeed_i2c_bus_intr_sts_offset(bus); + uint32_t reg_cmd = aspeed_i2c_bus_cmd_offset(bus); + uint32_t reg_pool_ctrl = aspeed_i2c_bus_pool_ctrl_offset(bus); + uint32_t reg_dma_len = aspeed_i2c_bus_dma_len_offset(bus); if (!aspeed_i2c_check_sram(bus)) { return; @@ -444,7 +439,7 @@ static void aspeed_i2c_bus_handle_cmd(AspeedI2CBus *bus, uint64_t value) aspeed_i2c_bus_cmd_dump(bus); } - if (bus->cmd & I2CD_M_START_CMD) { + if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, M_START_CMD)) { uint8_t state = aspeed_i2c_get_state(bus) & I2CD_MACTIVE ? I2CD_MSTARTR : I2CD_MSTART; uint8_t addr; @@ -452,24 +447,30 @@ static void aspeed_i2c_bus_handle_cmd(AspeedI2CBus *bus, uint64_t value) aspeed_i2c_set_state(bus, state); addr = aspeed_i2c_get_addr(bus); - if (i2c_start_transfer(bus->bus, extract32(addr, 1, 7), extract32(addr, 0, 1))) { - bus->intr_status |= I2CD_INTR_TX_NAK; + SHARED_ARRAY_FIELD_DP32(bus->regs, reg_intr_sts, TX_NAK, 1); + if (aspeed_i2c_bus_pkt_mode_en(bus)) { + ARRAY_FIELD_DP32(bus->regs, I2CM_INTR_STS, PKT_CMD_FAIL, 1); + } } else { - bus->intr_status |= I2CD_INTR_TX_ACK; + /* START doesn't set TX_ACK in packet mode */ + if (!aspeed_i2c_bus_pkt_mode_en(bus)) { + SHARED_ARRAY_FIELD_DP32(bus->regs, reg_intr_sts, TX_ACK, 1); + } } - bus->cmd &= ~I2CD_M_START_CMD; + SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, M_START_CMD, 0); /* * The START command is also a TX command, as the slave * address is sent on the bus. Drop the TX flag if nothing * else needs to be sent in this sequence. */ - if (bus->cmd & I2CD_TX_BUFF_ENABLE) { - if (I2CD_POOL_TX_COUNT(bus->pool_ctrl) == 1) { - bus->cmd &= ~I2CD_M_TX_CMD; + if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, TX_BUFF_EN)) { + if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_pool_ctrl, TX_COUNT) + == 1) { + SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, M_TX_CMD, 0); } else { /* * Increase the start index in the TX pool buffer to @@ -477,105 +478,267 @@ static void aspeed_i2c_bus_handle_cmd(AspeedI2CBus *bus, uint64_t value) */ pool_start++; } - } else if (bus->cmd & I2CD_TX_DMA_ENABLE) { - if (bus->dma_len == 0) { - bus->cmd &= ~I2CD_M_TX_CMD; + } else if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, TX_DMA_EN)) { + if (bus->regs[reg_dma_len] == 0) { + SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, M_TX_CMD, 0); } } else { - bus->cmd &= ~I2CD_M_TX_CMD; + SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, M_TX_CMD, 0); } /* No slave found */ if (!i2c_bus_busy(bus->bus)) { + if (aspeed_i2c_bus_pkt_mode_en(bus)) { + ARRAY_FIELD_DP32(bus->regs, I2CM_INTR_STS, PKT_CMD_FAIL, 1); + ARRAY_FIELD_DP32(bus->regs, I2CM_INTR_STS, PKT_CMD_DONE, 1); + } return; } aspeed_i2c_set_state(bus, I2CD_MACTIVE); } - if (bus->cmd & I2CD_M_TX_CMD) { + if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, M_TX_CMD)) { aspeed_i2c_set_state(bus, I2CD_MTXD); if (aspeed_i2c_bus_send(bus, pool_start)) { - bus->intr_status |= (I2CD_INTR_TX_NAK); + SHARED_ARRAY_FIELD_DP32(bus->regs, reg_intr_sts, TX_NAK, 1); i2c_end_transfer(bus->bus); } else { - bus->intr_status |= I2CD_INTR_TX_ACK; + SHARED_ARRAY_FIELD_DP32(bus->regs, reg_intr_sts, TX_ACK, 1); } - bus->cmd &= ~I2CD_M_TX_CMD; + SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, M_TX_CMD, 0); aspeed_i2c_set_state(bus, I2CD_MACTIVE); } - if ((bus->cmd & (I2CD_M_RX_CMD | I2CD_M_S_RX_CMD_LAST)) && - !(bus->intr_status & I2CD_INTR_RX_DONE)) { + if ((SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, M_RX_CMD) || + SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, M_S_RX_CMD_LAST)) && + !SHARED_ARRAY_FIELD_EX32(bus->regs, reg_intr_sts, RX_DONE)) { aspeed_i2c_handle_rx_cmd(bus); } - if (bus->cmd & I2CD_M_STOP_CMD) { + if (SHARED_ARRAY_FIELD_EX32(bus->regs, reg_cmd, M_STOP_CMD)) { if (!(aspeed_i2c_get_state(bus) & I2CD_MACTIVE)) { qemu_log_mask(LOG_GUEST_ERROR, "%s: abnormal stop\n", __func__); - bus->intr_status |= I2CD_INTR_ABNORMAL; + SHARED_ARRAY_FIELD_DP32(bus->regs, reg_intr_sts, ABNORMAL, 1); + if (aspeed_i2c_bus_pkt_mode_en(bus)) { + ARRAY_FIELD_DP32(bus->regs, I2CM_INTR_STS, PKT_CMD_FAIL, 1); + } } else { aspeed_i2c_set_state(bus, I2CD_MSTOP); i2c_end_transfer(bus->bus); - bus->intr_status |= I2CD_INTR_NORMAL_STOP; + SHARED_ARRAY_FIELD_DP32(bus->regs, reg_intr_sts, NORMAL_STOP, 1); } - bus->cmd &= ~I2CD_M_STOP_CMD; + SHARED_ARRAY_FIELD_DP32(bus->regs, reg_cmd, M_STOP_CMD, 0); aspeed_i2c_set_state(bus, I2CD_IDLE); } + + if (aspeed_i2c_bus_pkt_mode_en(bus)) { + ARRAY_FIELD_DP32(bus->regs, I2CM_INTR_STS, PKT_CMD_DONE, 1); + } } -static void aspeed_i2c_bus_write(void *opaque, hwaddr offset, - uint64_t value, unsigned size) +static void aspeed_i2c_bus_new_write(AspeedI2CBus *bus, hwaddr offset, + uint64_t value, unsigned size) { - AspeedI2CBus *bus = opaque; AspeedI2CClass *aic = ASPEED_I2C_GET_CLASS(bus->controller); bool handle_rx; + bool w1t; trace_aspeed_i2c_bus_write(bus->id, offset, size, value); switch (offset) { - case I2CD_FUN_CTRL_REG: - if (value & I2CD_SLAVE_EN) { + case A_I2CC_FUN_CTRL: + if (SHARED_FIELD_EX32(value, SLAVE_EN)) { qemu_log_mask(LOG_UNIMP, "%s: slave mode not implemented\n", __func__); break; } - bus->ctrl = value & 0x0071C3FF; + bus->regs[R_I2CD_FUN_CTRL] = value & 0x007dc3ff; + break; + case A_I2CC_AC_TIMING: + bus->regs[R_I2CC_AC_TIMING] = value & 0x1ffff0ff; break; - case I2CD_AC_TIMING_REG1: - bus->timing[0] = value & 0xFFFFF0F; + case A_I2CC_MS_TXRX_BYTE_BUF: + SHARED_ARRAY_FIELD_DP32(bus->regs, R_I2CC_MS_TXRX_BYTE_BUF, TX_BUF, + value); break; - case I2CD_AC_TIMING_REG2: - bus->timing[1] = value & 0x7; + case A_I2CC_POOL_CTRL: + bus->regs[R_I2CC_POOL_CTRL] &= ~0xffffff; + bus->regs[R_I2CC_POOL_CTRL] |= (value & 0xffffff); break; - case I2CD_INTR_CTRL_REG: - bus->intr_ctrl = value & 0x7FFF; + case A_I2CM_INTR_CTRL: + bus->regs[R_I2CM_INTR_CTRL] = value & 0x0007f07f; break; - case I2CD_INTR_STS_REG: - handle_rx = (bus->intr_status & I2CD_INTR_RX_DONE) && - (value & I2CD_INTR_RX_DONE); - bus->intr_status &= ~(value & 0x7FFF); - if (!bus->intr_status) { + case A_I2CM_INTR_STS: + handle_rx = SHARED_ARRAY_FIELD_EX32(bus->regs, R_I2CM_INTR_STS, RX_DONE) + && SHARED_FIELD_EX32(value, RX_DONE); + + /* In packet mode, clearing PKT_CMD_DONE clears other interrupts. */ + if (aspeed_i2c_bus_pkt_mode_en(bus) && + FIELD_EX32(value, I2CM_INTR_STS, PKT_CMD_DONE)) { + bus->regs[R_I2CM_INTR_STS] &= 0xf0001000; + if (!bus->regs[R_I2CM_INTR_STS]) { + bus->controller->intr_status &= ~(1 << bus->id); + qemu_irq_lower(aic->bus_get_irq(bus)); + } + break; + } + bus->regs[R_I2CM_INTR_STS] &= ~(value & 0xf007f07f); + if (!bus->regs[R_I2CM_INTR_STS]) { bus->controller->intr_status &= ~(1 << bus->id); qemu_irq_lower(aic->bus_get_irq(bus)); } - if (handle_rx && (bus->cmd & (I2CD_M_RX_CMD | I2CD_M_S_RX_CMD_LAST))) { + if (handle_rx && (SHARED_ARRAY_FIELD_EX32(bus->regs, R_I2CM_CMD, + M_RX_CMD) || + SHARED_ARRAY_FIELD_EX32(bus->regs, R_I2CM_CMD, + M_S_RX_CMD_LAST))) { aspeed_i2c_handle_rx_cmd(bus); aspeed_i2c_bus_raise_interrupt(bus); } break; - case I2CD_DEV_ADDR_REG: - qemu_log_mask(LOG_UNIMP, "%s: slave mode not implemented\n", + case A_I2CM_CMD: + if (!aspeed_i2c_bus_is_enabled(bus)) { + break; + } + + if (!aspeed_i2c_bus_is_master(bus)) { + qemu_log_mask(LOG_UNIMP, "%s: slave mode not implemented\n", + __func__); + break; + } + + if (!aic->has_dma && + (SHARED_FIELD_EX32(value, RX_DMA_EN) || + SHARED_FIELD_EX32(value, TX_DMA_EN))) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: No DMA support\n", __func__); + break; + } + + if (bus->regs[R_I2CM_INTR_STS] & 0xffff0000) { + qemu_log_mask(LOG_UNIMP, "%s: Packet mode is not implemented\n", + __func__); + break; + } + + value &= 0xff0ffbfb; + if (ARRAY_FIELD_EX32(bus->regs, I2CM_CMD, W1_CTRL)) { + bus->regs[R_I2CM_CMD] |= value; + } else { + bus->regs[R_I2CM_CMD] = value; + } + + aspeed_i2c_bus_handle_cmd(bus, value); + aspeed_i2c_bus_raise_interrupt(bus); + break; + case A_I2CM_DMA_TX_ADDR: + bus->regs[R_I2CM_DMA_TX_ADDR] = FIELD_EX32(value, I2CM_DMA_TX_ADDR, + ADDR); + bus->regs[R_I2CC_DMA_ADDR] = FIELD_EX32(value, I2CM_DMA_TX_ADDR, ADDR); + bus->regs[R_I2CC_DMA_LEN] = ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, + TX_BUF_LEN) + 1; + break; + case A_I2CM_DMA_RX_ADDR: + bus->regs[R_I2CM_DMA_RX_ADDR] = FIELD_EX32(value, I2CM_DMA_RX_ADDR, + ADDR); + bus->regs[R_I2CC_DMA_ADDR] = FIELD_EX32(value, I2CM_DMA_RX_ADDR, ADDR); + bus->regs[R_I2CC_DMA_LEN] = ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, + RX_BUF_LEN) + 1; + break; + case A_I2CM_DMA_LEN: + w1t = ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, RX_BUF_LEN_W1T) || + ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, TX_BUF_LEN_W1T); + /* If none of the w1t bits are set, just write to the reg as normal. */ + if (!w1t) { + bus->regs[R_I2CM_DMA_LEN] = value; + break; + } + if (ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, RX_BUF_LEN_W1T)) { + ARRAY_FIELD_DP32(bus->regs, I2CM_DMA_LEN, RX_BUF_LEN, + FIELD_EX32(value, I2CM_DMA_LEN, RX_BUF_LEN)); + } + if (ARRAY_FIELD_EX32(bus->regs, I2CM_DMA_LEN, TX_BUF_LEN_W1T)) { + ARRAY_FIELD_DP32(bus->regs, I2CM_DMA_LEN, TX_BUF_LEN, + FIELD_EX32(value, I2CM_DMA_LEN, TX_BUF_LEN)); + } + break; + case A_I2CM_DMA_LEN_STS: + /* Writes clear to 0 */ + bus->regs[R_I2CM_DMA_LEN_STS] = 0; + break; + case A_I2CC_DMA_ADDR: + case A_I2CC_DMA_LEN: + /* RO */ + break; + case A_I2CS_DMA_LEN_STS: + case A_I2CS_DMA_TX_ADDR: + case A_I2CS_DMA_RX_ADDR: + case A_I2CS_DEV_ADDR: + case A_I2CS_INTR_CTRL: + case A_I2CS_INTR_STS: + case A_I2CS_CMD: + case A_I2CS_DMA_LEN: + qemu_log_mask(LOG_UNIMP, "%s: Slave mode is not implemented\n", __func__); break; - case I2CD_POOL_CTRL_REG: - bus->pool_ctrl &= ~0xffffff; - bus->pool_ctrl |= (value & 0xffffff); + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%" HWADDR_PRIx "\n", + __func__, offset); + } +} + +static void aspeed_i2c_bus_old_write(AspeedI2CBus *bus, hwaddr offset, + uint64_t value, unsigned size) +{ + AspeedI2CClass *aic = ASPEED_I2C_GET_CLASS(bus->controller); + bool handle_rx; + + trace_aspeed_i2c_bus_write(bus->id, offset, size, value); + + switch (offset) { + case A_I2CD_FUN_CTRL: + if (SHARED_FIELD_EX32(value, SLAVE_EN)) { + qemu_log_mask(LOG_UNIMP, "%s: slave mode not implemented\n", + __func__); + break; + } + bus->regs[R_I2CD_FUN_CTRL] = value & 0x0071C3FF; + break; + case A_I2CD_AC_TIMING1: + bus->regs[R_I2CD_AC_TIMING1] = value & 0xFFFFF0F; + break; + case A_I2CD_AC_TIMING2: + bus->regs[R_I2CD_AC_TIMING2] = value & 0x7; + break; + case A_I2CD_INTR_CTRL: + bus->regs[R_I2CD_INTR_CTRL] = value & 0x7FFF; + break; + case A_I2CD_INTR_STS: + handle_rx = SHARED_ARRAY_FIELD_EX32(bus->regs, R_I2CD_INTR_STS, RX_DONE) + && SHARED_FIELD_EX32(value, RX_DONE); + bus->regs[R_I2CD_INTR_STS] &= ~(value & 0x7FFF); + if (!bus->regs[R_I2CD_INTR_STS]) { + bus->controller->intr_status &= ~(1 << bus->id); + qemu_irq_lower(aic->bus_get_irq(bus)); + } + if (handle_rx && (SHARED_ARRAY_FIELD_EX32(bus->regs, R_I2CD_CMD, + M_RX_CMD) || + SHARED_ARRAY_FIELD_EX32(bus->regs, R_I2CD_CMD, + M_S_RX_CMD_LAST))) { + aspeed_i2c_handle_rx_cmd(bus); + aspeed_i2c_bus_raise_interrupt(bus); + } + break; + case A_I2CD_DEV_ADDR: + bus->regs[R_I2CD_DEV_ADDR] = value; + break; + case A_I2CD_POOL_CTRL: + bus->regs[R_I2CD_POOL_CTRL] &= ~0xffffff; + bus->regs[R_I2CD_POOL_CTRL] |= (value & 0xffffff); break; - case I2CD_BYTE_BUF_REG: - bus->buf = (value & I2CD_BYTE_BUF_TX_MASK) << I2CD_BYTE_BUF_TX_SHIFT; + case A_I2CD_BYTE_BUF: + SHARED_ARRAY_FIELD_DP32(bus->regs, R_I2CD_BYTE_BUF, TX_BUF, value); break; - case I2CD_CMD_REG: + case A_I2CD_CMD: if (!aspeed_i2c_bus_is_enabled(bus)) { break; } @@ -587,31 +750,35 @@ static void aspeed_i2c_bus_write(void *opaque, hwaddr offset, } if (!aic->has_dma && - value & (I2CD_RX_DMA_ENABLE | I2CD_TX_DMA_ENABLE)) { + (SHARED_FIELD_EX32(value, RX_DMA_EN) || + SHARED_FIELD_EX32(value, TX_DMA_EN))) { qemu_log_mask(LOG_GUEST_ERROR, "%s: No DMA support\n", __func__); break; } + bus->regs[R_I2CD_CMD] &= ~0xFFFF; + bus->regs[R_I2CD_CMD] |= value & 0xFFFF; + aspeed_i2c_bus_handle_cmd(bus, value); aspeed_i2c_bus_raise_interrupt(bus); break; - case I2CD_DMA_ADDR: + case A_I2CD_DMA_ADDR: if (!aic->has_dma) { qemu_log_mask(LOG_GUEST_ERROR, "%s: No DMA support\n", __func__); break; } - bus->dma_addr = value & 0x3ffffffc; + bus->regs[R_I2CD_DMA_ADDR] = value & 0x3ffffffc; break; - case I2CD_DMA_LEN: + case A_I2CD_DMA_LEN: if (!aic->has_dma) { qemu_log_mask(LOG_GUEST_ERROR, "%s: No DMA support\n", __func__); break; } - bus->dma_len = value & 0xfff; - if (!bus->dma_len) { + bus->regs[R_I2CD_DMA_LEN] = value & 0xfff; + if (!bus->regs[R_I2CD_DMA_LEN]) { qemu_log_mask(LOG_UNIMP, "%s: invalid DMA length\n", __func__); } break; @@ -622,16 +789,34 @@ static void aspeed_i2c_bus_write(void *opaque, hwaddr offset, } } +static void aspeed_i2c_bus_write(void *opaque, hwaddr offset, + uint64_t value, unsigned size) +{ + AspeedI2CBus *bus = opaque; + if (aspeed_i2c_is_new_mode(bus->controller)) { + aspeed_i2c_bus_new_write(bus, offset, value, size); + } else { + aspeed_i2c_bus_old_write(bus, offset, value, size); + } +} + static uint64_t aspeed_i2c_ctrl_read(void *opaque, hwaddr offset, unsigned size) { AspeedI2CState *s = opaque; switch (offset) { - case I2C_CTRL_STATUS: + case A_I2C_CTRL_STATUS: return s->intr_status; - case I2C_CTRL_GLOBAL: + case A_I2C_CTRL_GLOBAL: return s->ctrl_global; + case A_I2C_CTRL_NEW_CLK_DIVIDER: + if (aspeed_i2c_is_new_mode(s)) { + return s->new_clk_divider; + } + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%" HWADDR_PRIx "\n", + __func__, offset); + break; default: qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%" HWADDR_PRIx "\n", __func__, offset); @@ -647,10 +832,18 @@ static void aspeed_i2c_ctrl_write(void *opaque, hwaddr offset, AspeedI2CState *s = opaque; switch (offset) { - case I2C_CTRL_GLOBAL: + case A_I2C_CTRL_GLOBAL: s->ctrl_global = value; break; - case I2C_CTRL_STATUS: + case A_I2C_CTRL_NEW_CLK_DIVIDER: + if (aspeed_i2c_is_new_mode(s)) { + s->new_clk_divider = value; + } else { + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%" HWADDR_PRIx + "\n", __func__, offset); + } + break; + case A_I2C_CTRL_STATUS: default: qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%" HWADDR_PRIx "\n", __func__, offset); @@ -707,19 +900,10 @@ static const MemoryRegionOps aspeed_i2c_pool_ops = { static const VMStateDescription aspeed_i2c_bus_vmstate = { .name = TYPE_ASPEED_I2C, - .version_id = 3, - .minimum_version_id = 3, + .version_id = 5, + .minimum_version_id = 5, .fields = (VMStateField[]) { - VMSTATE_UINT8(id, AspeedI2CBus), - VMSTATE_UINT32(ctrl, AspeedI2CBus), - VMSTATE_UINT32_ARRAY(timing, AspeedI2CBus, 2), - VMSTATE_UINT32(intr_ctrl, AspeedI2CBus), - VMSTATE_UINT32(intr_status, AspeedI2CBus), - VMSTATE_UINT32(cmd, AspeedI2CBus), - VMSTATE_UINT32(buf, AspeedI2CBus), - VMSTATE_UINT32(pool_ctrl, AspeedI2CBus), - VMSTATE_UINT32(dma_addr, AspeedI2CBus), - VMSTATE_UINT32(dma_len, AspeedI2CBus), + VMSTATE_UINT32_ARRAY(regs, AspeedI2CBus, ASPEED_I2C_NEW_NUM_REG), VMSTATE_END_OF_LIST() } }; @@ -856,12 +1040,7 @@ static void aspeed_i2c_bus_reset(DeviceState *dev) { AspeedI2CBus *s = ASPEED_I2C_BUS(dev); - s->intr_ctrl = 0; - s->intr_status = 0; - s->cmd = 0; - s->buf = 0; - s->dma_addr = 0; - s->dma_len = 0; + memset(s->regs, 0, sizeof(s->regs)); i2c_end_transfer(s->bus); } @@ -919,9 +1098,10 @@ static qemu_irq aspeed_2400_i2c_bus_get_irq(AspeedI2CBus *bus) static uint8_t *aspeed_2400_i2c_bus_pool_base(AspeedI2CBus *bus) { uint8_t *pool_page = - &bus->controller->pool[I2CD_POOL_PAGE_SEL(bus->ctrl) * 0x100]; + &bus->controller->pool[ARRAY_FIELD_EX32(bus->regs, I2CD_FUN_CTRL, + POOL_PAGE_SEL) * 0x100]; - return &pool_page[I2CD_POOL_OFFSET(bus->pool_ctrl)]; + return &pool_page[ARRAY_FIELD_EX32(bus->regs, I2CD_POOL_CTRL, OFFSET)]; } static void aspeed_2400_i2c_class_init(ObjectClass *klass, void *data) @@ -1013,6 +1193,29 @@ static const TypeInfo aspeed_2600_i2c_info = { .class_init = aspeed_2600_i2c_class_init, }; +static void aspeed_1030_i2c_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + AspeedI2CClass *aic = ASPEED_I2C_CLASS(klass); + + dc->desc = "ASPEED 1030 I2C Controller"; + + aic->num_busses = 14; + aic->reg_size = 0x80; + aic->gap = -1; /* no gap */ + aic->bus_get_irq = aspeed_2600_i2c_bus_get_irq; + aic->pool_size = 0x200; + aic->pool_base = 0xC00; + aic->bus_pool_base = aspeed_2600_i2c_bus_pool_base; + aic->has_dma = true; +} + +static const TypeInfo aspeed_1030_i2c_info = { + .name = TYPE_ASPEED_1030_I2C, + .parent = TYPE_ASPEED_I2C, + .class_init = aspeed_1030_i2c_class_init, +}; + static void aspeed_i2c_register_types(void) { type_register_static(&aspeed_i2c_bus_info); @@ -1020,6 +1223,7 @@ static void aspeed_i2c_register_types(void) type_register_static(&aspeed_2400_i2c_info); type_register_static(&aspeed_2500_i2c_info); type_register_static(&aspeed_2600_i2c_info); + type_register_static(&aspeed_1030_i2c_info); } type_init(aspeed_i2c_register_types) diff --git a/hw/i2c/trace-events b/hw/i2c/trace-events index 7d8907c1ee..209275ed2d 100644 --- a/hw/i2c/trace-events +++ b/hw/i2c/trace-events @@ -9,7 +9,7 @@ i2c_recv(uint8_t address, uint8_t data) "recv(addr:0x%02x) data:0x%02x" # aspeed_i2c.c aspeed_i2c_bus_cmd(uint32_t cmd, const char *cmd_flags, uint32_t count, uint32_t intr_status) "handling cmd=0x%x %s count=%d intr=0x%x" -aspeed_i2c_bus_raise_interrupt(uint32_t intr_status, const char *str1, const char *str2, const char *str3, const char *str4, const char *str5) "handled intr=0x%x %s%s%s%s%s" +aspeed_i2c_bus_raise_interrupt(uint32_t intr_status, const char *s) "handled intr=0x%x %s" aspeed_i2c_bus_read(uint32_t busid, uint64_t offset, unsigned size, uint64_t value) "bus[%d]: To 0x%" PRIx64 " of size %u: 0x%" PRIx64 aspeed_i2c_bus_write(uint32_t busid, uint64_t offset, unsigned size, uint64_t value) "bus[%d]: To 0x%" PRIx64 " of size %u: 0x%" PRIx64 aspeed_i2c_bus_send(const char *mode, int i, int count, uint8_t byte) "%s send %d/%d 0x%02x" diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c index a39e070e82..f31c53c28d 100644 --- a/hw/intc/pnv_xive2.c +++ b/hw/intc/pnv_xive2.c @@ -1574,6 +1574,12 @@ static const MemoryRegionOps pnv_xive2_ic_sync_ops = { * When the TM direct pages of the IC controller are accessed, the * target HW thread is deduced from the page offset. */ +static uint32_t pnv_xive2_ic_tm_get_pir(PnvXive2 *xive, hwaddr offset) +{ + /* On P10, the node ID shift in the PIR register is 8 bits */ + return xive->chip->chip_id << 8 | offset >> xive->ic_shift; +} + static XiveTCTX *pnv_xive2_get_indirect_tctx(PnvXive2 *xive, uint32_t pir) { PnvChip *chip = xive->chip; @@ -1596,10 +1602,12 @@ static uint64_t pnv_xive2_ic_tm_indirect_read(void *opaque, hwaddr offset, unsigned size) { PnvXive2 *xive = PNV_XIVE2(opaque); - uint32_t pir = offset >> xive->ic_shift; - XiveTCTX *tctx = pnv_xive2_get_indirect_tctx(xive, pir); + uint32_t pir; + XiveTCTX *tctx; uint64_t val = -1; + pir = pnv_xive2_ic_tm_get_pir(xive, offset); + tctx = pnv_xive2_get_indirect_tctx(xive, pir); if (tctx) { val = xive_tctx_tm_read(NULL, tctx, offset, size); } @@ -1611,9 +1619,11 @@ static void pnv_xive2_ic_tm_indirect_write(void *opaque, hwaddr offset, uint64_t val, unsigned size) { PnvXive2 *xive = PNV_XIVE2(opaque); - uint32_t pir = offset >> xive->ic_shift; - XiveTCTX *tctx = pnv_xive2_get_indirect_tctx(xive, pir); + uint32_t pir; + XiveTCTX *tctx; + pir = pnv_xive2_ic_tm_get_pir(xive, offset); + tctx = pnv_xive2_get_indirect_tctx(xive, pir); if (tctx) { xive_tctx_tm_write(NULL, tctx, offset, val, size); } diff --git a/hw/misc/aspeed_hace.c b/hw/misc/aspeed_hace.c index 4b5997e18f..731234b78c 100644 --- a/hw/misc/aspeed_hace.c +++ b/hw/misc/aspeed_hace.c @@ -340,12 +340,12 @@ static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data, if ((data & HASH_HMAC_MASK)) { qemu_log_mask(LOG_UNIMP, - "%s: HMAC engine command mode %"PRIx64" not implemented", + "%s: HMAC engine command mode %"PRIx64" not implemented\n", __func__, (data & HASH_HMAC_MASK) >> 8); } if (data & BIT(1)) { qemu_log_mask(LOG_UNIMP, - "%s: Cascaded mode not implemented", + "%s: Cascaded mode not implemented\n", __func__); } algo = hash_algo_lookup(data); diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 1e6e0fcad9..d349b3e426 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -35,6 +35,11 @@ * mdts=<N[optional]>,vsl=<N[optional]>, \ * zoned.zasl=<N[optional]>, \ * zoned.auto_transition=<on|off[optional]>, \ + * sriov_max_vfs=<N[optional]> \ + * sriov_vq_flexible=<N[optional]> \ + * sriov_vi_flexible=<N[optional]> \ + * sriov_max_vi_per_vf=<N[optional]> \ + * sriov_max_vq_per_vf=<N[optional]> \ * subsys=<subsys_id> * -device nvme-ns,drive=<drive_id>,bus=<bus_name>,nsid=<nsid>,\ * zoned=<true|false[optional]>, \ @@ -106,6 +111,35 @@ * transitioned to zone state closed for resource management purposes. * Defaults to 'on'. * + * - `sriov_max_vfs` + * Indicates the maximum number of PCIe virtual functions supported + * by the controller. The default value is 0. Specifying a non-zero value + * enables reporting of both SR-IOV and ARI capabilities by the NVMe device. + * Virtual function controllers will not report SR-IOV capability. + * + * NOTE: Single Root I/O Virtualization support is experimental. + * All the related parameters may be subject to change. + * + * - `sriov_vq_flexible` + * Indicates the total number of flexible queue resources assignable to all + * the secondary controllers. Implicitly sets the number of primary + * controller's private resources to `(max_ioqpairs - sriov_vq_flexible)`. + * + * - `sriov_vi_flexible` + * Indicates the total number of flexible interrupt resources assignable to + * all the secondary controllers. Implicitly sets the number of primary + * controller's private resources to `(msix_qsize - sriov_vi_flexible)`. + * + * - `sriov_max_vi_per_vf` + * Indicates the maximum number of virtual interrupt resources assignable + * to a secondary controller. The default 0 resolves to + * `(sriov_vi_flexible / sriov_max_vfs)`. + * + * - `sriov_max_vq_per_vf` + * Indicates the maximum number of virtual queue resources assignable to + * a secondary controller. The default 0 resolves to + * `(sriov_vq_flexible / sriov_max_vfs)`. + * * nvme namespace device parameters * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * - `shared` @@ -154,12 +188,14 @@ #include "qemu/error-report.h" #include "qemu/log.h" #include "qemu/units.h" +#include "qemu/range.h" #include "qapi/error.h" #include "qapi/visitor.h" #include "sysemu/sysemu.h" #include "sysemu/block-backend.h" #include "sysemu/hostmem.h" #include "hw/pci/msix.h" +#include "hw/pci/pcie_sriov.h" #include "migration/vmstate.h" #include "nvme.h" @@ -176,6 +212,10 @@ #define NVME_TEMPERATURE_CRITICAL 0x175 #define NVME_NUM_FW_SLOTS 1 #define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB) +#define NVME_MAX_VFS 127 +#define NVME_VF_RES_GRANULARITY 1 +#define NVME_VF_OFFSET 0x1 +#define NVME_VF_STRIDE 1 #define NVME_GUEST_ERR(trace, fmt, ...) \ do { \ @@ -223,6 +263,7 @@ static const uint32_t nvme_cse_acs[256] = { [NVME_ADM_CMD_GET_FEATURES] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_NS_ATTACHMENT] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC, + [NVME_ADM_CMD_VIRT_MNGMT] = NVME_CMD_EFF_CSUPP, [NVME_ADM_CMD_FORMAT_NVM] = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC, }; @@ -254,6 +295,7 @@ static const uint32_t nvme_cse_iocs_zoned[256] = { }; static void nvme_process_sq(void *opaque); +static void nvme_ctrl_reset(NvmeCtrl *n, NvmeResetType rst); static uint16_t nvme_sqid(NvmeRequest *req) { @@ -437,12 +479,12 @@ static bool nvme_nsid_valid(NvmeCtrl *n, uint32_t nsid) static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid) { - return sqid < n->params.max_ioqpairs + 1 && n->sq[sqid] != NULL ? 0 : -1; + return sqid < n->conf_ioqpairs + 1 && n->sq[sqid] != NULL ? 0 : -1; } static int nvme_check_cqid(NvmeCtrl *n, uint16_t cqid) { - return cqid < n->params.max_ioqpairs + 1 && n->cq[cqid] != NULL ? 0 : -1; + return cqid < n->conf_ioqpairs + 1 && n->cq[cqid] != NULL ? 0 : -1; } static void nvme_inc_cq_tail(NvmeCQueue *cq) @@ -808,10 +850,6 @@ static uint16_t nvme_map_sgl_data(NvmeCtrl *n, NvmeSg *sg, uint8_t type = NVME_SGL_TYPE(segment[i].type); switch (type) { - case NVME_SGL_DESCR_TYPE_BIT_BUCKET: - if (cmd->opcode == NVME_CMD_WRITE) { - continue; - } case NVME_SGL_DESCR_TYPE_DATA_BLOCK: break; case NVME_SGL_DESCR_TYPE_SEGMENT: @@ -844,10 +882,6 @@ static uint16_t nvme_map_sgl_data(NvmeCtrl *n, NvmeSg *sg, trans_len = MIN(*len, dlen); - if (type == NVME_SGL_DESCR_TYPE_BIT_BUCKET) { - goto next; - } - addr = le64_to_cpu(segment[i].addr); if (UINT64_MAX - addr < dlen) { @@ -859,7 +893,6 @@ static uint16_t nvme_map_sgl_data(NvmeCtrl *n, NvmeSg *sg, return status; } -next: *len -= trans_len; } @@ -917,8 +950,7 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, NvmeSg *sg, NvmeSglDescriptor sgl, seg_len = le32_to_cpu(sgld->len); /* check the length of the (Last) Segment descriptor */ - if ((!seg_len || seg_len & 0xf) && - (NVME_SGL_TYPE(sgld->type) != NVME_SGL_DESCR_TYPE_BIT_BUCKET)) { + if (!seg_len || seg_len & 0xf) { return NVME_INVALID_SGL_SEG_DESCR | NVME_DNR; } @@ -956,26 +988,20 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, NvmeSg *sg, NvmeSglDescriptor sgl, last_sgld = &segment[nsgld - 1]; /* - * If the segment ends with a Data Block or Bit Bucket Descriptor Type, - * then we are done. + * If the segment ends with a Data Block, then we are done. */ - switch (NVME_SGL_TYPE(last_sgld->type)) { - case NVME_SGL_DESCR_TYPE_DATA_BLOCK: - case NVME_SGL_DESCR_TYPE_BIT_BUCKET: + if (NVME_SGL_TYPE(last_sgld->type) == NVME_SGL_DESCR_TYPE_DATA_BLOCK) { status = nvme_map_sgl_data(n, sg, segment, nsgld, &len, cmd); if (status) { goto unmap; } goto out; - - default: - break; } /* - * If the last descriptor was not a Data Block or Bit Bucket, then the - * current segment must not be a Last Segment. + * If the last descriptor was not a Data Block, then the current + * segment must not be a Last Segment. */ if (NVME_SGL_TYPE(sgld->type) == NVME_SGL_DESCR_TYPE_LAST_SEGMENT) { status = NVME_INVALID_SGL_SEG_DESCR | NVME_DNR; @@ -4284,8 +4310,7 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeRequest *req) trace_pci_nvme_err_invalid_create_sq_cqid(cqid); return NVME_INVALID_CQID | NVME_DNR; } - if (unlikely(!sqid || sqid > n->params.max_ioqpairs || - n->sq[sqid] != NULL)) { + if (unlikely(!sqid || sqid > n->conf_ioqpairs || n->sq[sqid] != NULL)) { trace_pci_nvme_err_invalid_create_sq_sqid(sqid); return NVME_INVALID_QID | NVME_DNR; } @@ -4637,8 +4662,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req) trace_pci_nvme_create_cq(prp1, cqid, vector, qsize, qflags, NVME_CQ_FLAGS_IEN(qflags) != 0); - if (unlikely(!cqid || cqid > n->params.max_ioqpairs || - n->cq[cqid] != NULL)) { + if (unlikely(!cqid || cqid > n->conf_ioqpairs || n->cq[cqid] != NULL)) { trace_pci_nvme_err_invalid_create_cq_cqid(cqid); return NVME_INVALID_QID | NVME_DNR; } @@ -4654,7 +4678,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req) trace_pci_nvme_err_invalid_create_cq_vector(vector); return NVME_INVALID_IRQ_VECTOR | NVME_DNR; } - if (unlikely(vector >= n->params.msix_qsize)) { + if (unlikely(vector >= n->conf_msix_qsize)) { trace_pci_nvme_err_invalid_create_cq_vector(vector); return NVME_INVALID_IRQ_VECTOR | NVME_DNR; } @@ -4793,6 +4817,37 @@ static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req, return nvme_c2h(n, (uint8_t *)list, sizeof(list), req); } +static uint16_t nvme_identify_pri_ctrl_cap(NvmeCtrl *n, NvmeRequest *req) +{ + trace_pci_nvme_identify_pri_ctrl_cap(le16_to_cpu(n->pri_ctrl_cap.cntlid)); + + return nvme_c2h(n, (uint8_t *)&n->pri_ctrl_cap, + sizeof(NvmePriCtrlCap), req); +} + +static uint16_t nvme_identify_sec_ctrl_list(NvmeCtrl *n, NvmeRequest *req) +{ + NvmeIdentify *c = (NvmeIdentify *)&req->cmd; + uint16_t pri_ctrl_id = le16_to_cpu(n->pri_ctrl_cap.cntlid); + uint16_t min_id = le16_to_cpu(c->ctrlid); + uint8_t num_sec_ctrl = n->sec_ctrl_list.numcntl; + NvmeSecCtrlList list = {0}; + uint8_t i; + + for (i = 0; i < num_sec_ctrl; i++) { + if (n->sec_ctrl_list.sec[i].scid >= min_id) { + list.numcntl = num_sec_ctrl - i; + memcpy(&list.sec, n->sec_ctrl_list.sec + i, + list.numcntl * sizeof(NvmeSecCtrlEntry)); + break; + } + } + + trace_pci_nvme_identify_sec_ctrl_list(pri_ctrl_id, list.numcntl); + + return nvme_c2h(n, (uint8_t *)&list, sizeof(list), req); +} + static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req, bool active) { @@ -5009,6 +5064,10 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req) return nvme_identify_ctrl_list(n, req, true); case NVME_ID_CNS_CTRL_LIST: return nvme_identify_ctrl_list(n, req, false); + case NVME_ID_CNS_PRIMARY_CTRL_CAP: + return nvme_identify_pri_ctrl_cap(n, req); + case NVME_ID_CNS_SECONDARY_CTRL_LIST: + return nvme_identify_sec_ctrl_list(n, req); case NVME_ID_CNS_CS_NS: return nvme_identify_ns_csi(n, req, true); case NVME_ID_CNS_CS_NS_PRESENT: @@ -5217,13 +5276,12 @@ defaults: break; case NVME_NUMBER_OF_QUEUES: - result = (n->params.max_ioqpairs - 1) | - ((n->params.max_ioqpairs - 1) << 16); + result = (n->conf_ioqpairs - 1) | ((n->conf_ioqpairs - 1) << 16); trace_pci_nvme_getfeat_numq(result); break; case NVME_INTERRUPT_VECTOR_CONF: iv = dw11 & 0xffff; - if (iv >= n->params.max_ioqpairs + 1) { + if (iv >= n->conf_ioqpairs + 1) { return NVME_INVALID_FIELD | NVME_DNR; } @@ -5379,10 +5437,10 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req) trace_pci_nvme_setfeat_numq((dw11 & 0xffff) + 1, ((dw11 >> 16) & 0xffff) + 1, - n->params.max_ioqpairs, - n->params.max_ioqpairs); - req->cqe.result = cpu_to_le32((n->params.max_ioqpairs - 1) | - ((n->params.max_ioqpairs - 1) << 16)); + n->conf_ioqpairs, + n->conf_ioqpairs); + req->cqe.result = cpu_to_le32((n->conf_ioqpairs - 1) | + ((n->conf_ioqpairs - 1) << 16)); break; case NVME_ASYNCHRONOUS_EVENT_CONF: n->features.async_config = dw11; @@ -5769,6 +5827,167 @@ out: return status; } +static void nvme_get_virt_res_num(NvmeCtrl *n, uint8_t rt, int *num_total, + int *num_prim, int *num_sec) +{ + *num_total = le32_to_cpu(rt ? + n->pri_ctrl_cap.vifrt : n->pri_ctrl_cap.vqfrt); + *num_prim = le16_to_cpu(rt ? + n->pri_ctrl_cap.virfap : n->pri_ctrl_cap.vqrfap); + *num_sec = le16_to_cpu(rt ? n->pri_ctrl_cap.virfa : n->pri_ctrl_cap.vqrfa); +} + +static uint16_t nvme_assign_virt_res_to_prim(NvmeCtrl *n, NvmeRequest *req, + uint16_t cntlid, uint8_t rt, + int nr) +{ + int num_total, num_prim, num_sec; + + if (cntlid != n->cntlid) { + return NVME_INVALID_CTRL_ID | NVME_DNR; + } + + nvme_get_virt_res_num(n, rt, &num_total, &num_prim, &num_sec); + + if (nr > num_total) { + return NVME_INVALID_NUM_RESOURCES | NVME_DNR; + } + + if (nr > num_total - num_sec) { + return NVME_INVALID_RESOURCE_ID | NVME_DNR; + } + + if (rt) { + n->next_pri_ctrl_cap.virfap = cpu_to_le16(nr); + } else { + n->next_pri_ctrl_cap.vqrfap = cpu_to_le16(nr); + } + + req->cqe.result = cpu_to_le32(nr); + return req->status; +} + +static void nvme_update_virt_res(NvmeCtrl *n, NvmeSecCtrlEntry *sctrl, + uint8_t rt, int nr) +{ + int prev_nr, prev_total; + + if (rt) { + prev_nr = le16_to_cpu(sctrl->nvi); + prev_total = le32_to_cpu(n->pri_ctrl_cap.virfa); + sctrl->nvi = cpu_to_le16(nr); + n->pri_ctrl_cap.virfa = cpu_to_le32(prev_total + nr - prev_nr); + } else { + prev_nr = le16_to_cpu(sctrl->nvq); + prev_total = le32_to_cpu(n->pri_ctrl_cap.vqrfa); + sctrl->nvq = cpu_to_le16(nr); + n->pri_ctrl_cap.vqrfa = cpu_to_le32(prev_total + nr - prev_nr); + } +} + +static uint16_t nvme_assign_virt_res_to_sec(NvmeCtrl *n, NvmeRequest *req, + uint16_t cntlid, uint8_t rt, int nr) +{ + int num_total, num_prim, num_sec, num_free, diff, limit; + NvmeSecCtrlEntry *sctrl; + + sctrl = nvme_sctrl_for_cntlid(n, cntlid); + if (!sctrl) { + return NVME_INVALID_CTRL_ID | NVME_DNR; + } + + if (sctrl->scs) { + return NVME_INVALID_SEC_CTRL_STATE | NVME_DNR; + } + + limit = le16_to_cpu(rt ? n->pri_ctrl_cap.vifrsm : n->pri_ctrl_cap.vqfrsm); + if (nr > limit) { + return NVME_INVALID_NUM_RESOURCES | NVME_DNR; + } + + nvme_get_virt_res_num(n, rt, &num_total, &num_prim, &num_sec); + num_free = num_total - num_prim - num_sec; + diff = nr - le16_to_cpu(rt ? sctrl->nvi : sctrl->nvq); + + if (diff > num_free) { + return NVME_INVALID_RESOURCE_ID | NVME_DNR; + } + + nvme_update_virt_res(n, sctrl, rt, nr); + req->cqe.result = cpu_to_le32(nr); + + return req->status; +} + +static uint16_t nvme_virt_set_state(NvmeCtrl *n, uint16_t cntlid, bool online) +{ + NvmeCtrl *sn = NULL; + NvmeSecCtrlEntry *sctrl; + int vf_index; + + sctrl = nvme_sctrl_for_cntlid(n, cntlid); + if (!sctrl) { + return NVME_INVALID_CTRL_ID | NVME_DNR; + } + + if (!pci_is_vf(&n->parent_obj)) { + vf_index = le16_to_cpu(sctrl->vfn) - 1; + sn = NVME(pcie_sriov_get_vf_at_index(&n->parent_obj, vf_index)); + } + + if (online) { + if (!sctrl->nvi || (le16_to_cpu(sctrl->nvq) < 2) || !sn) { + return NVME_INVALID_SEC_CTRL_STATE | NVME_DNR; + } + + if (!sctrl->scs) { + sctrl->scs = 0x1; + nvme_ctrl_reset(sn, NVME_RESET_FUNCTION); + } + } else { + nvme_update_virt_res(n, sctrl, NVME_VIRT_RES_INTERRUPT, 0); + nvme_update_virt_res(n, sctrl, NVME_VIRT_RES_QUEUE, 0); + + if (sctrl->scs) { + sctrl->scs = 0x0; + if (sn) { + nvme_ctrl_reset(sn, NVME_RESET_FUNCTION); + } + } + } + + return NVME_SUCCESS; +} + +static uint16_t nvme_virt_mngmt(NvmeCtrl *n, NvmeRequest *req) +{ + uint32_t dw10 = le32_to_cpu(req->cmd.cdw10); + uint32_t dw11 = le32_to_cpu(req->cmd.cdw11); + uint8_t act = dw10 & 0xf; + uint8_t rt = (dw10 >> 8) & 0x7; + uint16_t cntlid = (dw10 >> 16) & 0xffff; + int nr = dw11 & 0xffff; + + trace_pci_nvme_virt_mngmt(nvme_cid(req), act, cntlid, rt ? "VI" : "VQ", nr); + + if (rt != NVME_VIRT_RES_QUEUE && rt != NVME_VIRT_RES_INTERRUPT) { + return NVME_INVALID_RESOURCE_ID | NVME_DNR; + } + + switch (act) { + case NVME_VIRT_MNGMT_ACTION_SEC_ASSIGN: + return nvme_assign_virt_res_to_sec(n, req, cntlid, rt, nr); + case NVME_VIRT_MNGMT_ACTION_PRM_ALLOC: + return nvme_assign_virt_res_to_prim(n, req, cntlid, rt, nr); + case NVME_VIRT_MNGMT_ACTION_SEC_ONLINE: + return nvme_virt_set_state(n, cntlid, true); + case NVME_VIRT_MNGMT_ACTION_SEC_OFFLINE: + return nvme_virt_set_state(n, cntlid, false); + default: + return NVME_INVALID_FIELD | NVME_DNR; + } +} + static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) { trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode, @@ -5811,6 +6030,8 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req) return nvme_aer(n, req); case NVME_ADM_CMD_NS_ATTACHMENT: return nvme_ns_attachment(n, req); + case NVME_ADM_CMD_VIRT_MNGMT: + return nvme_virt_mngmt(n, req); case NVME_ADM_CMD_FORMAT_NVM: return nvme_format(n, req); default: @@ -5857,8 +6078,48 @@ static void nvme_process_sq(void *opaque) } } -static void nvme_ctrl_reset(NvmeCtrl *n) +static void nvme_update_msixcap_ts(PCIDevice *pci_dev, uint32_t table_size) { + uint8_t *config; + + if (!msix_present(pci_dev)) { + return; + } + + assert(table_size > 0 && table_size <= pci_dev->msix_entries_nr); + + config = pci_dev->config + pci_dev->msix_cap; + pci_set_word_by_mask(config + PCI_MSIX_FLAGS, PCI_MSIX_FLAGS_QSIZE, + table_size - 1); +} + +static void nvme_activate_virt_res(NvmeCtrl *n) +{ + PCIDevice *pci_dev = &n->parent_obj; + NvmePriCtrlCap *cap = &n->pri_ctrl_cap; + NvmeSecCtrlEntry *sctrl; + + /* -1 to account for the admin queue */ + if (pci_is_vf(pci_dev)) { + sctrl = nvme_sctrl(n); + cap->vqprt = sctrl->nvq; + cap->viprt = sctrl->nvi; + n->conf_ioqpairs = sctrl->nvq ? le16_to_cpu(sctrl->nvq) - 1 : 0; + n->conf_msix_qsize = sctrl->nvi ? le16_to_cpu(sctrl->nvi) : 1; + } else { + cap->vqrfap = n->next_pri_ctrl_cap.vqrfap; + cap->virfap = n->next_pri_ctrl_cap.virfap; + n->conf_ioqpairs = le16_to_cpu(cap->vqprt) + + le16_to_cpu(cap->vqrfap) - 1; + n->conf_msix_qsize = le16_to_cpu(cap->viprt) + + le16_to_cpu(cap->virfap); + } +} + +static void nvme_ctrl_reset(NvmeCtrl *n, NvmeResetType rst) +{ + PCIDevice *pci_dev = &n->parent_obj; + NvmeSecCtrlEntry *sctrl; NvmeNamespace *ns; int i; @@ -5888,9 +6149,41 @@ static void nvme_ctrl_reset(NvmeCtrl *n) g_free(event); } + if (n->params.sriov_max_vfs) { + if (!pci_is_vf(pci_dev)) { + for (i = 0; i < n->sec_ctrl_list.numcntl; i++) { + sctrl = &n->sec_ctrl_list.sec[i]; + nvme_virt_set_state(n, le16_to_cpu(sctrl->scid), false); + } + + if (rst != NVME_RESET_CONTROLLER) { + pcie_sriov_pf_disable_vfs(pci_dev); + } + } + + if (rst != NVME_RESET_CONTROLLER) { + nvme_activate_virt_res(n); + } + } + n->aer_queued = 0; + n->aer_mask = 0; n->outstanding_aers = 0; n->qs_created = false; + + nvme_update_msixcap_ts(pci_dev, n->conf_msix_qsize); + + if (pci_is_vf(pci_dev)) { + sctrl = nvme_sctrl(n); + + stl_le_p(&n->bar.csts, sctrl->scs ? 0 : NVME_CSTS_FAILED); + } else { + stl_le_p(&n->bar.csts, 0); + } + + stl_le_p(&n->bar.intms, 0); + stl_le_p(&n->bar.intmc, 0); + stl_le_p(&n->bar.cc, 0); } static void nvme_ctrl_shutdown(NvmeCtrl *n) @@ -5936,7 +6229,15 @@ static int nvme_start_ctrl(NvmeCtrl *n) uint64_t acq = ldq_le_p(&n->bar.acq); uint32_t page_bits = NVME_CC_MPS(cc) + 12; uint32_t page_size = 1 << page_bits; + NvmeSecCtrlEntry *sctrl = nvme_sctrl(n); + if (pci_is_vf(&n->parent_obj) && !sctrl->scs) { + trace_pci_nvme_err_startfail_virt_state(le16_to_cpu(sctrl->nvi), + le16_to_cpu(sctrl->nvq), + sctrl->scs ? "ONLINE" : + "OFFLINE"); + return -1; + } if (unlikely(n->cq[0])) { trace_pci_nvme_err_startfail_cq(); return -1; @@ -6017,8 +6318,6 @@ static int nvme_start_ctrl(NvmeCtrl *n) nvme_set_timestamp(n, 0ULL); - QTAILQ_INIT(&n->aer_queue); - nvme_select_iocs(n); return 0; @@ -6096,20 +6395,21 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, nvme_irq_check(n); break; case NVME_REG_CC: + stl_le_p(&n->bar.cc, data); + trace_pci_nvme_mmio_cfg(data & 0xffffffff); - /* Windows first sends data, then sends enable bit */ - if (!NVME_CC_EN(data) && !NVME_CC_EN(cc) && - !NVME_CC_SHN(data) && !NVME_CC_SHN(cc)) - { - cc = data; + if (NVME_CC_SHN(data) && !(NVME_CC_SHN(cc))) { + trace_pci_nvme_mmio_shutdown_set(); + nvme_ctrl_shutdown(n); + csts &= ~(CSTS_SHST_MASK << CSTS_SHST_SHIFT); + csts |= NVME_CSTS_SHST_COMPLETE; + } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(cc)) { + trace_pci_nvme_mmio_shutdown_cleared(); + csts &= ~(CSTS_SHST_MASK << CSTS_SHST_SHIFT); } if (NVME_CC_EN(data) && !NVME_CC_EN(cc)) { - cc = data; - - /* flush CC since nvme_start_ctrl() needs the value */ - stl_le_p(&n->bar.cc, cc); if (unlikely(nvme_start_ctrl(n))) { trace_pci_nvme_err_startfail(); csts = NVME_CSTS_FAILED; @@ -6119,23 +6419,11 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, } } else if (!NVME_CC_EN(data) && NVME_CC_EN(cc)) { trace_pci_nvme_mmio_stopped(); - nvme_ctrl_reset(n); - cc = 0; - csts &= ~NVME_CSTS_READY; - } + nvme_ctrl_reset(n, NVME_RESET_CONTROLLER); - if (NVME_CC_SHN(data) && !(NVME_CC_SHN(cc))) { - trace_pci_nvme_mmio_shutdown_set(); - nvme_ctrl_shutdown(n); - cc = data; - csts |= NVME_CSTS_SHST_COMPLETE; - } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(cc)) { - trace_pci_nvme_mmio_shutdown_cleared(); - csts &= ~NVME_CSTS_SHST_COMPLETE; - cc = data; + break; } - stl_le_p(&n->bar.cc, cc); stl_le_p(&n->bar.csts, csts); break; @@ -6319,6 +6607,12 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) return 0; } + if (pci_is_vf(&n->parent_obj) && !nvme_sctrl(n)->scs && + addr != NVME_REG_CSTS) { + trace_pci_nvme_err_ignored_mmio_vf_offline(addr, size); + return 0; + } + /* * When PMRWBM bit 1 is set then read from * from PMRSTS should ensure prior writes @@ -6468,6 +6762,12 @@ static void nvme_mmio_write(void *opaque, hwaddr addr, uint64_t data, trace_pci_nvme_mmio_write(addr, data, size); + if (pci_is_vf(&n->parent_obj) && !nvme_sctrl(n)->scs && + addr != NVME_REG_CSTS) { + trace_pci_nvme_err_ignored_mmio_vf_offline(addr, size); + return; + } + if (addr < sizeof(n->bar)) { nvme_write_bar(n, addr, data, size); } else { @@ -6569,19 +6869,140 @@ static void nvme_check_constraints(NvmeCtrl *n, Error **errp) error_setg(errp, "vsl must be non-zero"); return; } + + if (params->sriov_max_vfs) { + if (!n->subsys) { + error_setg(errp, "subsystem is required for the use of SR-IOV"); + return; + } + + if (params->sriov_max_vfs > NVME_MAX_VFS) { + error_setg(errp, "sriov_max_vfs must be between 0 and %d", + NVME_MAX_VFS); + return; + } + + if (params->cmb_size_mb) { + error_setg(errp, "CMB is not supported with SR-IOV"); + return; + } + + if (n->pmr.dev) { + error_setg(errp, "PMR is not supported with SR-IOV"); + return; + } + + if (!params->sriov_vq_flexible || !params->sriov_vi_flexible) { + error_setg(errp, "both sriov_vq_flexible and sriov_vi_flexible" + " must be set for the use of SR-IOV"); + return; + } + + if (params->sriov_vq_flexible < params->sriov_max_vfs * 2) { + error_setg(errp, "sriov_vq_flexible must be greater than or equal" + " to %d (sriov_max_vfs * 2)", params->sriov_max_vfs * 2); + return; + } + + if (params->max_ioqpairs < params->sriov_vq_flexible + 2) { + error_setg(errp, "(max_ioqpairs - sriov_vq_flexible) must be" + " greater than or equal to 2"); + return; + } + + if (params->sriov_vi_flexible < params->sriov_max_vfs) { + error_setg(errp, "sriov_vi_flexible must be greater than or equal" + " to %d (sriov_max_vfs)", params->sriov_max_vfs); + return; + } + + if (params->msix_qsize < params->sriov_vi_flexible + 1) { + error_setg(errp, "(msix_qsize - sriov_vi_flexible) must be" + " greater than or equal to 1"); + return; + } + + if (params->sriov_max_vi_per_vf && + (params->sriov_max_vi_per_vf - 1) % NVME_VF_RES_GRANULARITY) { + error_setg(errp, "sriov_max_vi_per_vf must meet:" + " (sriov_max_vi_per_vf - 1) %% %d == 0 and" + " sriov_max_vi_per_vf >= 1", NVME_VF_RES_GRANULARITY); + return; + } + + if (params->sriov_max_vq_per_vf && + (params->sriov_max_vq_per_vf < 2 || + (params->sriov_max_vq_per_vf - 1) % NVME_VF_RES_GRANULARITY)) { + error_setg(errp, "sriov_max_vq_per_vf must meet:" + " (sriov_max_vq_per_vf - 1) %% %d == 0 and" + " sriov_max_vq_per_vf >= 2", NVME_VF_RES_GRANULARITY); + return; + } + } } static void nvme_init_state(NvmeCtrl *n) { - /* add one to max_ioqpairs to account for the admin queue pair */ - n->reg_size = pow2ceil(sizeof(NvmeBar) + - 2 * (n->params.max_ioqpairs + 1) * NVME_DB_SIZE); + NvmePriCtrlCap *cap = &n->pri_ctrl_cap; + NvmeSecCtrlList *list = &n->sec_ctrl_list; + NvmeSecCtrlEntry *sctrl; + uint8_t max_vfs; + int i; + + if (pci_is_vf(&n->parent_obj)) { + sctrl = nvme_sctrl(n); + max_vfs = 0; + n->conf_ioqpairs = sctrl->nvq ? le16_to_cpu(sctrl->nvq) - 1 : 0; + n->conf_msix_qsize = sctrl->nvi ? le16_to_cpu(sctrl->nvi) : 1; + } else { + max_vfs = n->params.sriov_max_vfs; + n->conf_ioqpairs = n->params.max_ioqpairs; + n->conf_msix_qsize = n->params.msix_qsize; + } + n->sq = g_new0(NvmeSQueue *, n->params.max_ioqpairs + 1); n->cq = g_new0(NvmeCQueue *, n->params.max_ioqpairs + 1); n->temperature = NVME_TEMPERATURE; n->features.temp_thresh_hi = NVME_TEMPERATURE_WARNING; n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1); + QTAILQ_INIT(&n->aer_queue); + + list->numcntl = cpu_to_le16(max_vfs); + for (i = 0; i < max_vfs; i++) { + sctrl = &list->sec[i]; + sctrl->pcid = cpu_to_le16(n->cntlid); + sctrl->vfn = cpu_to_le16(i + 1); + } + + cap->cntlid = cpu_to_le16(n->cntlid); + cap->crt = NVME_CRT_VQ | NVME_CRT_VI; + + if (pci_is_vf(&n->parent_obj)) { + cap->vqprt = cpu_to_le16(1 + n->conf_ioqpairs); + } else { + cap->vqprt = cpu_to_le16(1 + n->params.max_ioqpairs - + n->params.sriov_vq_flexible); + cap->vqfrt = cpu_to_le32(n->params.sriov_vq_flexible); + cap->vqrfap = cap->vqfrt; + cap->vqgran = cpu_to_le16(NVME_VF_RES_GRANULARITY); + cap->vqfrsm = n->params.sriov_max_vq_per_vf ? + cpu_to_le16(n->params.sriov_max_vq_per_vf) : + cap->vqfrt / MAX(max_vfs, 1); + } + + if (pci_is_vf(&n->parent_obj)) { + cap->viprt = cpu_to_le16(n->conf_msix_qsize); + } else { + cap->viprt = cpu_to_le16(n->params.msix_qsize - + n->params.sriov_vi_flexible); + cap->vifrt = cpu_to_le32(n->params.sriov_vi_flexible); + cap->virfap = cap->vifrt; + cap->vigran = cpu_to_le16(NVME_VF_RES_GRANULARITY); + cap->vifrsm = n->params.sriov_max_vi_per_vf ? + cpu_to_le16(n->params.sriov_max_vi_per_vf) : + cap->vifrt / MAX(max_vfs, 1); + } } static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) @@ -6626,10 +7047,77 @@ static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev) memory_region_set_enabled(&n->pmr.dev->mr, false); } +static uint64_t nvme_bar_size(unsigned total_queues, unsigned total_irqs, + unsigned *msix_table_offset, + unsigned *msix_pba_offset) +{ + uint64_t bar_size, msix_table_size, msix_pba_size; + + bar_size = sizeof(NvmeBar) + 2 * total_queues * NVME_DB_SIZE; + bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB); + + if (msix_table_offset) { + *msix_table_offset = bar_size; + } + + msix_table_size = PCI_MSIX_ENTRY_SIZE * total_irqs; + bar_size += msix_table_size; + bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB); + + if (msix_pba_offset) { + *msix_pba_offset = bar_size; + } + + msix_pba_size = QEMU_ALIGN_UP(total_irqs, 64) / 8; + bar_size += msix_pba_size; + + bar_size = pow2ceil(bar_size); + return bar_size; +} + +static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset) +{ + uint16_t vf_dev_id = n->params.use_intel_id ? + PCI_DEVICE_ID_INTEL_NVME : PCI_DEVICE_ID_REDHAT_NVME; + NvmePriCtrlCap *cap = &n->pri_ctrl_cap; + uint64_t bar_size = nvme_bar_size(le16_to_cpu(cap->vqfrsm), + le16_to_cpu(cap->vifrsm), + NULL, NULL); + + pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id, + n->params.sriov_max_vfs, n->params.sriov_max_vfs, + NVME_VF_OFFSET, NVME_VF_STRIDE); + + pcie_sriov_pf_init_vf_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64, bar_size); +} + +static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset) +{ + Error *err = NULL; + int ret; + + ret = pci_add_capability(pci_dev, PCI_CAP_ID_PM, offset, + PCI_PM_SIZEOF, &err); + if (err) { + error_report_err(err); + return ret; + } + + pci_set_word(pci_dev->config + offset + PCI_PM_PMC, + PCI_PM_CAP_VER_1_2); + pci_set_word(pci_dev->config + offset + PCI_PM_CTRL, + PCI_PM_CTRL_NO_SOFT_RESET); + pci_set_word(pci_dev->wmask + offset + PCI_PM_CTRL, + PCI_PM_CTRL_STATE_MASK); + + return 0; +} + static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) { uint8_t *pci_conf = pci_dev->config; - uint64_t bar_size, msix_table_size, msix_pba_size; + uint64_t bar_size; unsigned msix_table_offset, msix_pba_offset; int ret; @@ -6640,34 +7128,35 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) if (n->params.use_intel_id) { pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL); - pci_config_set_device_id(pci_conf, 0x5845); + pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_INTEL_NVME); } else { pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_REDHAT); pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_REDHAT_NVME); } pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS); + nvme_add_pm_capability(pci_dev, 0x60); pcie_endpoint_cap_init(pci_dev, 0x80); + pcie_cap_flr_init(pci_dev); + if (n->params.sriov_max_vfs) { + pcie_ari_init(pci_dev, 0x100, 1); + } - bar_size = QEMU_ALIGN_UP(n->reg_size, 4 * KiB); - msix_table_offset = bar_size; - msix_table_size = PCI_MSIX_ENTRY_SIZE * n->params.msix_qsize; - - bar_size += msix_table_size; - bar_size = QEMU_ALIGN_UP(bar_size, 4 * KiB); - msix_pba_offset = bar_size; - msix_pba_size = QEMU_ALIGN_UP(n->params.msix_qsize, 64) / 8; - - bar_size += msix_pba_size; - bar_size = pow2ceil(bar_size); + /* add one to max_ioqpairs to account for the admin queue pair */ + bar_size = nvme_bar_size(n->params.max_ioqpairs + 1, n->params.msix_qsize, + &msix_table_offset, &msix_pba_offset); memory_region_init(&n->bar0, OBJECT(n), "nvme-bar0", bar_size); memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme", - n->reg_size); + msix_table_offset); memory_region_add_subregion(&n->bar0, 0, &n->iomem); - pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | - PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0); + if (pci_is_vf(pci_dev)) { + pcie_sriov_vf_register_bar(pci_dev, 0, &n->bar0); + } else { + pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0); + } ret = msix_init(pci_dev, n->params.msix_qsize, &n->bar0, 0, msix_table_offset, &n->bar0, 0, msix_pba_offset, 0, &err); @@ -6680,6 +7169,8 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) } } + nvme_update_msixcap_ts(pci_dev, n->conf_msix_qsize); + if (n->params.cmb_size_mb) { nvme_init_cmb(n, pci_dev); } @@ -6688,6 +7179,10 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) nvme_init_pmr(n, pci_dev); } + if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs) { + nvme_init_sriov(n, pci_dev, 0x120); + } + return 0; } @@ -6709,6 +7204,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) NvmeIdCtrl *id = &n->id_ctrl; uint8_t *pci_conf = pci_dev->config; uint64_t cap = ldq_le_p(&n->bar.cap); + NvmeSecCtrlEntry *sctrl = nvme_sctrl(n); id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); @@ -6775,8 +7271,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->vwc = NVME_VWC_NSID_BROADCAST_SUPPORT | NVME_VWC_PRESENT; id->ocfs = cpu_to_le16(NVME_OCFS_COPY_FORMAT_0 | NVME_OCFS_COPY_FORMAT_1); - id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN | - NVME_CTRL_SGLS_BITBUCKET); + id->sgls = cpu_to_le32(NVME_CTRL_SGLS_SUPPORT_NO_ALIGN); nvme_init_subnqn(n); @@ -6801,6 +7296,10 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) stl_le_p(&n->bar.vs, NVME_SPEC_VER); n->bar.intmc = n->bar.intms = 0; + + if (pci_is_vf(&n->parent_obj) && !sctrl->scs) { + stl_le_p(&n->bar.csts, NVME_CSTS_FAILED); + } } static int nvme_init_subsys(NvmeCtrl *n, Error **errp) @@ -6838,6 +7337,16 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) NvmeCtrl *n = NVME(pci_dev); NvmeNamespace *ns; Error *local_err = NULL; + NvmeCtrl *pn = NVME(pcie_sriov_get_pf(pci_dev)); + + if (pci_is_vf(pci_dev)) { + /* + * VFs derive settings from the parent. PF's lifespan exceeds + * that of VF's, so it's safe to share params.serial. + */ + memcpy(&n->params, &pn->params, sizeof(NvmeParams)); + n->subsys = pn->subsys; + } nvme_check_constraints(n, &local_err); if (local_err) { @@ -6848,15 +7357,14 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) qbus_init(&n->bus, sizeof(NvmeBus), TYPE_NVME_BUS, &pci_dev->qdev, n->parent_obj.qdev.id); - nvme_init_state(n); - if (nvme_init_pci(n, pci_dev, errp)) { - return; - } - if (nvme_init_subsys(n, errp)) { error_propagate(errp, local_err); return; } + nvme_init_state(n); + if (nvme_init_pci(n, pci_dev, errp)) { + return; + } nvme_init_ctrl(n, pci_dev); /* setup a namespace if the controller drive property was given */ @@ -6878,7 +7386,7 @@ static void nvme_exit(PCIDevice *pci_dev) NvmeNamespace *ns; int i; - nvme_ctrl_reset(n); + nvme_ctrl_reset(n, NVME_RESET_FUNCTION); if (n->subsys) { for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { @@ -6902,6 +7410,11 @@ static void nvme_exit(PCIDevice *pci_dev) if (n->pmr.dev) { host_memory_backend_set_mapped(n->pmr.dev, false); } + + if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs) { + pcie_sriov_pf_exit(pci_dev); + } + msix_uninit(pci_dev, &n->bar0, &n->bar0); memory_region_del_subregion(&n->bar0, &n->iomem); } @@ -6926,6 +7439,15 @@ static Property nvme_props[] = { DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0), DEFINE_PROP_BOOL("zoned.auto_transition", NvmeCtrl, params.auto_transition_zones, true), + DEFINE_PROP_UINT8("sriov_max_vfs", NvmeCtrl, params.sriov_max_vfs, 0), + DEFINE_PROP_UINT16("sriov_vq_flexible", NvmeCtrl, + params.sriov_vq_flexible, 0), + DEFINE_PROP_UINT16("sriov_vi_flexible", NvmeCtrl, + params.sriov_vi_flexible, 0), + DEFINE_PROP_UINT8("sriov_max_vi_per_vf", NvmeCtrl, + params.sriov_max_vi_per_vf, 0), + DEFINE_PROP_UINT8("sriov_max_vq_per_vf", NvmeCtrl, + params.sriov_max_vq_per_vf, 0), DEFINE_PROP_END_OF_LIST(), }; @@ -6971,6 +7493,47 @@ static void nvme_set_smart_warning(Object *obj, Visitor *v, const char *name, } } +static void nvme_pci_reset(DeviceState *qdev) +{ + PCIDevice *pci_dev = PCI_DEVICE(qdev); + NvmeCtrl *n = NVME(pci_dev); + + trace_pci_nvme_pci_reset(); + nvme_ctrl_reset(n, NVME_RESET_FUNCTION); +} + +static void nvme_sriov_pre_write_ctrl(PCIDevice *dev, uint32_t address, + uint32_t val, int len) +{ + NvmeCtrl *n = NVME(dev); + NvmeSecCtrlEntry *sctrl; + uint16_t sriov_cap = dev->exp.sriov_cap; + uint32_t off = address - sriov_cap; + int i, num_vfs; + + if (!sriov_cap) { + return; + } + + if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) { + if (!(val & PCI_SRIOV_CTRL_VFE)) { + num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); + for (i = 0; i < num_vfs; i++) { + sctrl = &n->sec_ctrl_list.sec[i]; + nvme_virt_set_state(n, le16_to_cpu(sctrl->scid), false); + } + } + } +} + +static void nvme_pci_write_config(PCIDevice *dev, uint32_t address, + uint32_t val, int len) +{ + nvme_sriov_pre_write_ctrl(dev, address, val, len); + pci_default_write_config(dev, address, val, len); + pcie_cap_flr_write_config(dev, address, val, len); +} + static const VMStateDescription nvme_vmstate = { .name = "nvme", .unmigratable = 1, @@ -6982,6 +7545,7 @@ static void nvme_class_init(ObjectClass *oc, void *data) PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc); pc->realize = nvme_realize; + pc->config_write = nvme_pci_write_config; pc->exit = nvme_exit; pc->class_id = PCI_CLASS_STORAGE_EXPRESS; pc->revision = 2; @@ -6990,6 +7554,7 @@ static void nvme_class_init(ObjectClass *oc, void *data) dc->desc = "Non-Volatile Memory Express"; device_class_set_props(dc, nvme_props); dc->vmsd = &nvme_vmstate; + dc->reset = nvme_pci_reset; } static void nvme_instance_init(Object *obj) diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c index 1b9c9d1156..870c3ca1a2 100644 --- a/hw/nvme/ns.c +++ b/hw/nvme/ns.c @@ -597,7 +597,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp) for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) { NvmeCtrl *ctrl = subsys->ctrls[i]; - if (ctrl) { + if (ctrl && ctrl != SUBSYS_SLOT_RSVD) { nvme_attach_ns(ctrl, ns); } } diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index e41771604f..99437d39bb 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -24,7 +24,7 @@ #include "block/nvme.h" -#define NVME_MAX_CONTROLLERS 32 +#define NVME_MAX_CONTROLLERS 256 #define NVME_MAX_NAMESPACES 256 #define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000) @@ -43,6 +43,7 @@ typedef struct NvmeBus { #define TYPE_NVME_SUBSYS "nvme-subsys" #define NVME_SUBSYS(obj) \ OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS) +#define SUBSYS_SLOT_RSVD (void *)0xFFFF typedef struct NvmeSubsystem { DeviceState parent_obj; @@ -68,6 +69,10 @@ static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys, return NULL; } + if (subsys->ctrls[cntlid] == SUBSYS_SLOT_RSVD) { + return NULL; + } + return subsys->ctrls[cntlid]; } @@ -335,6 +340,7 @@ static inline const char *nvme_adm_opc_str(uint8_t opc) case NVME_ADM_CMD_GET_FEATURES: return "NVME_ADM_CMD_GET_FEATURES"; case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ"; case NVME_ADM_CMD_NS_ATTACHMENT: return "NVME_ADM_CMD_NS_ATTACHMENT"; + case NVME_ADM_CMD_VIRT_MNGMT: return "NVME_ADM_CMD_VIRT_MNGMT"; case NVME_ADM_CMD_FORMAT_NVM: return "NVME_ADM_CMD_FORMAT_NVM"; default: return "NVME_ADM_CMD_UNKNOWN"; } @@ -406,6 +412,11 @@ typedef struct NvmeParams { uint8_t zasl; bool auto_transition_zones; bool legacy_cmb; + uint8_t sriov_max_vfs; + uint16_t sriov_vq_flexible; + uint16_t sriov_vi_flexible; + uint8_t sriov_max_vq_per_vf; + uint8_t sriov_max_vi_per_vf; } NvmeParams; typedef struct NvmeCtrl { @@ -423,7 +434,6 @@ typedef struct NvmeCtrl { uint16_t max_prp_ents; uint16_t cqe_size; uint16_t sqe_size; - uint32_t reg_size; uint32_t max_q_ents; uint8_t outstanding_aers; uint32_t irq_status; @@ -433,6 +443,8 @@ typedef struct NvmeCtrl { uint64_t starttime_ms; uint16_t temperature; uint8_t smart_critical_warning; + uint32_t conf_msix_qsize; + uint32_t conf_ioqpairs; struct { MemoryRegion mem; @@ -477,8 +489,20 @@ typedef struct NvmeCtrl { uint32_t async_config; NvmeHostBehaviorSupport hbs; } features; + + NvmePriCtrlCap pri_ctrl_cap; + NvmeSecCtrlList sec_ctrl_list; + struct { + uint16_t vqrfap; + uint16_t virfap; + } next_pri_ctrl_cap; /* These override pri_ctrl_cap after reset */ } NvmeCtrl; +typedef enum NvmeResetType { + NVME_RESET_FUNCTION = 0, + NVME_RESET_CONTROLLER = 1, +} NvmeResetType; + static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid) { if (!nsid || nsid > NVME_MAX_NAMESPACES) { @@ -511,6 +535,33 @@ static inline uint16_t nvme_cid(NvmeRequest *req) return le16_to_cpu(req->cqe.cid); } +static inline NvmeSecCtrlEntry *nvme_sctrl(NvmeCtrl *n) +{ + PCIDevice *pci_dev = &n->parent_obj; + NvmeCtrl *pf = NVME(pcie_sriov_get_pf(pci_dev)); + + if (pci_is_vf(pci_dev)) { + return &pf->sec_ctrl_list.sec[pcie_sriov_vf_number(pci_dev)]; + } + + return NULL; +} + +static inline NvmeSecCtrlEntry *nvme_sctrl_for_cntlid(NvmeCtrl *n, + uint16_t cntlid) +{ + NvmeSecCtrlList *list = &n->sec_ctrl_list; + uint8_t i; + + for (i = 0; i < list->numcntl; i++) { + if (le16_to_cpu(list->sec[i].scid) == cntlid) { + return &list->sec[i]; + } + } + + return NULL; +} + void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns); uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len, NvmeTxDirection dir, NvmeRequest *req); diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c index 691a90d209..9d2643678b 100644 --- a/hw/nvme/subsys.c +++ b/hw/nvme/subsys.c @@ -11,20 +11,71 @@ #include "nvme.h" -int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp) +static int nvme_subsys_reserve_cntlids(NvmeCtrl *n, int start, int num) { NvmeSubsystem *subsys = n->subsys; - int cntlid, nsid; + NvmeSecCtrlList *list = &n->sec_ctrl_list; + NvmeSecCtrlEntry *sctrl; + int i, cnt = 0; + + for (i = start; i < ARRAY_SIZE(subsys->ctrls) && cnt < num; i++) { + if (!subsys->ctrls[i]) { + sctrl = &list->sec[cnt]; + sctrl->scid = cpu_to_le16(i); + subsys->ctrls[i] = SUBSYS_SLOT_RSVD; + cnt++; + } + } + + return cnt; +} - for (cntlid = 0; cntlid < ARRAY_SIZE(subsys->ctrls); cntlid++) { - if (!subsys->ctrls[cntlid]) { - break; +static void nvme_subsys_unreserve_cntlids(NvmeCtrl *n) +{ + NvmeSubsystem *subsys = n->subsys; + NvmeSecCtrlList *list = &n->sec_ctrl_list; + NvmeSecCtrlEntry *sctrl; + int i, cntlid; + + for (i = 0; i < n->params.sriov_max_vfs; i++) { + sctrl = &list->sec[i]; + cntlid = le16_to_cpu(sctrl->scid); + + if (cntlid) { + assert(subsys->ctrls[cntlid] == SUBSYS_SLOT_RSVD); + subsys->ctrls[cntlid] = NULL; + sctrl->scid = 0; } } +} - if (cntlid == ARRAY_SIZE(subsys->ctrls)) { - error_setg(errp, "no more free controller id"); - return -1; +int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp) +{ + NvmeSubsystem *subsys = n->subsys; + NvmeSecCtrlEntry *sctrl = nvme_sctrl(n); + int cntlid, nsid, num_rsvd, num_vfs = n->params.sriov_max_vfs; + + if (pci_is_vf(&n->parent_obj)) { + cntlid = le16_to_cpu(sctrl->scid); + } else { + for (cntlid = 0; cntlid < ARRAY_SIZE(subsys->ctrls); cntlid++) { + if (!subsys->ctrls[cntlid]) { + break; + } + } + + if (cntlid == ARRAY_SIZE(subsys->ctrls)) { + error_setg(errp, "no more free controller id"); + return -1; + } + + num_rsvd = nvme_subsys_reserve_cntlids(n, cntlid + 1, num_vfs); + if (num_rsvd != num_vfs) { + nvme_subsys_unreserve_cntlids(n); + error_setg(errp, + "no more free controller ids for secondary controllers"); + return -1; + } } if (!subsys->serial) { @@ -48,7 +99,13 @@ int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp) void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n) { - subsys->ctrls[n->cntlid] = NULL; + if (pci_is_vf(&n->parent_obj)) { + subsys->ctrls[n->cntlid] = SUBSYS_SLOT_RSVD; + } else { + subsys->ctrls[n->cntlid] = NULL; + nvme_subsys_unreserve_cntlids(n); + } + n->cntlid = -1; } diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events index ff1b458969..065e1c891d 100644 --- a/hw/nvme/trace-events +++ b/hw/nvme/trace-events @@ -56,6 +56,8 @@ pci_nvme_identify_ctrl(void) "identify controller" pci_nvme_identify_ctrl_csi(uint8_t csi) "identify controller, csi=0x%"PRIx8"" pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32"" pci_nvme_identify_ctrl_list(uint8_t cns, uint16_t cntid) "cns 0x%"PRIx8" cntid %"PRIu16"" +pci_nvme_identify_pri_ctrl_cap(uint16_t cntlid) "identify primary controller capabilities cntlid=%"PRIu16"" +pci_nvme_identify_sec_ctrl_list(uint16_t cntlid, uint8_t numcntl) "identify secondary controller list cntlid=%"PRIu16" numcntl=%"PRIu8"" pci_nvme_identify_ns_csi(uint32_t ns, uint8_t csi) "nsid=%"PRIu32", csi=0x%"PRIx8"" pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32"" pci_nvme_identify_nslist_csi(uint16_t ns, uint8_t csi) "nsid=%"PRIu16", csi=0x%"PRIx8"" @@ -108,6 +110,8 @@ pci_nvme_zd_extension_set(uint32_t zone_idx) "set descriptor extension for zone_ pci_nvme_clear_ns_close(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Closed state" pci_nvme_clear_ns_reset(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Empty state" pci_nvme_zoned_zrwa_implicit_flush(uint64_t zslba, uint32_t nlb) "zslba 0x%"PRIx64" nlb %"PRIu32"" +pci_nvme_pci_reset(void) "PCI Function Level Reset" +pci_nvme_virt_mngmt(uint16_t cid, uint16_t act, uint16_t cntlid, const char* rt, uint16_t nr) "cid %"PRIu16", act=0x%"PRIx16", ctrlid=%"PRIu16" %s nr=%"PRIu16"" # error conditions pci_nvme_err_mdts(size_t len) "len %zu" @@ -177,7 +181,9 @@ pci_nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the pci_nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero" pci_nvme_err_startfail_zasl_too_small(uint32_t zasl, uint32_t pagesz) "nvme_start_ctrl failed because zone append size limit %"PRIu32" is too small, needs to be >= %"PRIu32"" pci_nvme_err_startfail(void) "setting controller enable bit failed" +pci_nvme_err_startfail_virt_state(uint16_t vq, uint16_t vi, const char *state) "nvme_start_ctrl failed due to ctrl state: vi=%u vq=%u %s" pci_nvme_err_invalid_mgmt_action(uint8_t action) "action=0x%"PRIx8"" +pci_nvme_err_ignored_mmio_vf_offline(uint64_t addr, unsigned size) "addr 0x%"PRIx64" size %d" # undefined behavior pci_nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64"" diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c index 3f03467dde..26ac9b7123 100644 --- a/hw/pci-host/pnv_phb3.c +++ b/hw/pci-host/pnv_phb3.c @@ -1088,10 +1088,10 @@ static const char *pnv_phb3_root_bus_path(PCIHostState *host_bridge, } static Property pnv_phb3_properties[] = { - DEFINE_PROP_UINT32("index", PnvPHB3, phb_id, 0), - DEFINE_PROP_UINT32("chip-id", PnvPHB3, chip_id, 0), - DEFINE_PROP_LINK("chip", PnvPHB3, chip, TYPE_PNV_CHIP, PnvChip *), - DEFINE_PROP_END_OF_LIST(), + DEFINE_PROP_UINT32("index", PnvPHB3, phb_id, 0), + DEFINE_PROP_UINT32("chip-id", PnvPHB3, chip_id, 0), + DEFINE_PROP_LINK("chip", PnvPHB3, chip, TYPE_PNV_CHIP, PnvChip *), + DEFINE_PROP_END_OF_LIST(), }; static void pnv_phb3_class_init(ObjectClass *klass, void *data) diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c index 13ba9e45d8..6594016121 100644 --- a/hw/pci-host/pnv_phb4.c +++ b/hw/pci-host/pnv_phb4.c @@ -1692,11 +1692,11 @@ static void pnv_phb4_xive_notify(XiveNotifier *xf, uint32_t srcno, } static Property pnv_phb4_properties[] = { - DEFINE_PROP_UINT32("index", PnvPHB4, phb_id, 0), - DEFINE_PROP_UINT32("chip-id", PnvPHB4, chip_id, 0), - DEFINE_PROP_LINK("pec", PnvPHB4, pec, TYPE_PNV_PHB4_PEC, - PnvPhb4PecState *), - DEFINE_PROP_END_OF_LIST(), + DEFINE_PROP_UINT32("index", PnvPHB4, phb_id, 0), + DEFINE_PROP_UINT32("chip-id", PnvPHB4, chip_id, 0), + DEFINE_PROP_LINK("pec", PnvPHB4, pec, TYPE_PNV_PHB4_PEC, + PnvPhb4PecState *), + DEFINE_PROP_END_OF_LIST(), }; static void pnv_phb4_class_init(ObjectClass *klass, void *data) diff --git a/hw/pci-host/pnv_phb4_pec.c b/hw/pci-host/pnv_phb4_pec.c index 61bc0b503e..8b7e823fa5 100644 --- a/hw/pci-host/pnv_phb4_pec.c +++ b/hw/pci-host/pnv_phb4_pec.c @@ -215,11 +215,11 @@ static int pnv_pec_dt_xscom(PnvXScomInterface *dev, void *fdt, } static Property pnv_pec_properties[] = { - DEFINE_PROP_UINT32("index", PnvPhb4PecState, index, 0), - DEFINE_PROP_UINT32("chip-id", PnvPhb4PecState, chip_id, 0), - DEFINE_PROP_LINK("chip", PnvPhb4PecState, chip, TYPE_PNV_CHIP, - PnvChip *), - DEFINE_PROP_END_OF_LIST(), + DEFINE_PROP_UINT32("index", PnvPhb4PecState, index, 0), + DEFINE_PROP_UINT32("chip-id", PnvPhb4PecState, chip_id, 0), + DEFINE_PROP_LINK("chip", PnvPhb4PecState, chip, TYPE_PNV_CHIP, + PnvChip *), + DEFINE_PROP_END_OF_LIST(), }; static uint32_t pnv_pec_xscom_pci_base(PnvPhb4PecState *pec) diff --git a/hw/ppc/ppc440_uc.c b/hw/ppc/ppc440_uc.c index 993e3ba955..a1ecf6dd1c 100644 --- a/hw/ppc/ppc440_uc.c +++ b/hw/ppc/ppc440_uc.c @@ -1180,6 +1180,14 @@ static void dcr_write_pcie(void *opaque, int dcrn, uint32_t val) case PEGPL_CFGMSK: s->cfg_mask = val; size = ~(val & 0xfffffffe) + 1; + /* + * Firmware sets this register to E0000001. Why we are not sure, + * but the current guess is anything above PCIE_MMCFG_SIZE_MAX is + * ignored. + */ + if (size > PCIE_MMCFG_SIZE_MAX) { + size = PCIE_MMCFG_SIZE_MAX; + } pcie_host_mmcfg_update(PCIE_HOST_BRIDGE(s), val & 1, s->cfg_base, size); break; case PEGPL_MSGBAH: diff --git a/include/block/nvme.h b/include/block/nvme.h index 3737351cc8..373c70b5ca 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -595,6 +595,7 @@ enum NvmeAdminCommands { NVME_ADM_CMD_ACTIVATE_FW = 0x10, NVME_ADM_CMD_DOWNLOAD_FW = 0x11, NVME_ADM_CMD_NS_ATTACHMENT = 0x15, + NVME_ADM_CMD_VIRT_MNGMT = 0x1c, NVME_ADM_CMD_FORMAT_NVM = 0x80, NVME_ADM_CMD_SECURITY_SEND = 0x81, NVME_ADM_CMD_SECURITY_RECV = 0x82, @@ -899,6 +900,10 @@ enum NvmeStatusCodes { NVME_NS_PRIVATE = 0x0119, NVME_NS_NOT_ATTACHED = 0x011a, NVME_NS_CTRL_LIST_INVALID = 0x011c, + NVME_INVALID_CTRL_ID = 0x011f, + NVME_INVALID_SEC_CTRL_STATE = 0x0120, + NVME_INVALID_NUM_RESOURCES = 0x0121, + NVME_INVALID_RESOURCE_ID = 0x0122, NVME_CONFLICTING_ATTRS = 0x0180, NVME_INVALID_PROT_INFO = 0x0181, NVME_WRITE_TO_RO = 0x0182, @@ -1033,6 +1038,8 @@ enum NvmeIdCns { NVME_ID_CNS_NS_PRESENT = 0x11, NVME_ID_CNS_NS_ATTACHED_CTRL_LIST = 0x12, NVME_ID_CNS_CTRL_LIST = 0x13, + NVME_ID_CNS_PRIMARY_CTRL_CAP = 0x14, + NVME_ID_CNS_SECONDARY_CTRL_LIST = 0x15, NVME_ID_CNS_CS_NS_PRESENT_LIST = 0x1a, NVME_ID_CNS_CS_NS_PRESENT = 0x1b, NVME_ID_CNS_IO_COMMAND_SET = 0x1c, @@ -1553,6 +1560,61 @@ typedef enum NvmeZoneState { NVME_ZONE_STATE_OFFLINE = 0x0f, } NvmeZoneState; +typedef struct QEMU_PACKED NvmePriCtrlCap { + uint16_t cntlid; + uint16_t portid; + uint8_t crt; + uint8_t rsvd5[27]; + uint32_t vqfrt; + uint32_t vqrfa; + uint16_t vqrfap; + uint16_t vqprt; + uint16_t vqfrsm; + uint16_t vqgran; + uint8_t rsvd48[16]; + uint32_t vifrt; + uint32_t virfa; + uint16_t virfap; + uint16_t viprt; + uint16_t vifrsm; + uint16_t vigran; + uint8_t rsvd80[4016]; +} NvmePriCtrlCap; + +typedef enum NvmePriCtrlCapCrt { + NVME_CRT_VQ = 1 << 0, + NVME_CRT_VI = 1 << 1, +} NvmePriCtrlCapCrt; + +typedef struct QEMU_PACKED NvmeSecCtrlEntry { + uint16_t scid; + uint16_t pcid; + uint8_t scs; + uint8_t rsvd5[3]; + uint16_t vfn; + uint16_t nvq; + uint16_t nvi; + uint8_t rsvd14[18]; +} NvmeSecCtrlEntry; + +typedef struct QEMU_PACKED NvmeSecCtrlList { + uint8_t numcntl; + uint8_t rsvd1[31]; + NvmeSecCtrlEntry sec[127]; +} NvmeSecCtrlList; + +typedef enum NvmeVirtMngmtAction { + NVME_VIRT_MNGMT_ACTION_PRM_ALLOC = 0x01, + NVME_VIRT_MNGMT_ACTION_SEC_OFFLINE = 0x07, + NVME_VIRT_MNGMT_ACTION_SEC_ASSIGN = 0x08, + NVME_VIRT_MNGMT_ACTION_SEC_ONLINE = 0x09, +} NvmeVirtMngmtAction; + +typedef enum NvmeVirtualResourceType { + NVME_VIRT_RES_QUEUE = 0x00, + NVME_VIRT_RES_INTERRUPT = 0x01, +} NvmeVirtualResourceType; + static inline void _nvme_check_size(void) { QEMU_BUILD_BUG_ON(sizeof(NvmeBar) != 4096); @@ -1588,5 +1650,8 @@ static inline void _nvme_check_size(void) QEMU_BUILD_BUG_ON(sizeof(NvmeIdNsDescr) != 4); QEMU_BUILD_BUG_ON(sizeof(NvmeZoneDescr) != 64); QEMU_BUILD_BUG_ON(sizeof(NvmeDifTuple) != 16); + QEMU_BUILD_BUG_ON(sizeof(NvmePriCtrlCap) != 4096); + QEMU_BUILD_BUG_ON(sizeof(NvmeSecCtrlEntry) != 32); + QEMU_BUILD_BUG_ON(sizeof(NvmeSecCtrlList) != 4096); } #endif diff --git a/include/hw/i2c/aspeed_i2c.h b/include/hw/i2c/aspeed_i2c.h index 4b9be09274..1398befc10 100644 --- a/include/hw/i2c/aspeed_i2c.h +++ b/include/hw/i2c/aspeed_i2c.h @@ -23,16 +23,196 @@ #include "hw/i2c/i2c.h" #include "hw/sysbus.h" +#include "hw/registerfields.h" #include "qom/object.h" #define TYPE_ASPEED_I2C "aspeed.i2c" #define TYPE_ASPEED_2400_I2C TYPE_ASPEED_I2C "-ast2400" #define TYPE_ASPEED_2500_I2C TYPE_ASPEED_I2C "-ast2500" #define TYPE_ASPEED_2600_I2C TYPE_ASPEED_I2C "-ast2600" +#define TYPE_ASPEED_1030_I2C TYPE_ASPEED_I2C "-ast1030" OBJECT_DECLARE_TYPE(AspeedI2CState, AspeedI2CClass, ASPEED_I2C) #define ASPEED_I2C_NR_BUSSES 16 #define ASPEED_I2C_MAX_POOL_SIZE 0x800 +#define ASPEED_I2C_OLD_NUM_REG 11 +#define ASPEED_I2C_NEW_NUM_REG 22 + +/* Tx State Machine */ +#define I2CD_TX_STATE_MASK 0xf +#define I2CD_IDLE 0x0 +#define I2CD_MACTIVE 0x8 +#define I2CD_MSTART 0x9 +#define I2CD_MSTARTR 0xa +#define I2CD_MSTOP 0xb +#define I2CD_MTXD 0xc +#define I2CD_MRXACK 0xd +#define I2CD_MRXD 0xe +#define I2CD_MTXACK 0xf +#define I2CD_SWAIT 0x1 +#define I2CD_SRXD 0x4 +#define I2CD_STXACK 0x5 +#define I2CD_STXD 0x6 +#define I2CD_SRXACK 0x7 +#define I2CD_RECOVER 0x3 + +/* I2C Global Register */ +REG32(I2C_CTRL_STATUS, 0x0) /* Device Interrupt Status */ +REG32(I2C_CTRL_ASSIGN, 0x8) /* Device Interrupt Target Assignment */ +REG32(I2C_CTRL_GLOBAL, 0xC) /* Global Control Register */ + FIELD(I2C_CTRL_GLOBAL, REG_MODE, 2, 1) + FIELD(I2C_CTRL_GLOBAL, SRAM_EN, 0, 1) +REG32(I2C_CTRL_NEW_CLK_DIVIDER, 0x10) /* New mode clock divider */ + +/* I2C Old Mode Device (Bus) Register */ +REG32(I2CD_FUN_CTRL, 0x0) /* I2CD Function Control */ + FIELD(I2CD_FUN_CTRL, POOL_PAGE_SEL, 20, 3) /* AST2400 */ + SHARED_FIELD(M_SDA_LOCK_EN, 16, 1) + SHARED_FIELD(MULTI_MASTER_DIS, 15, 1) + SHARED_FIELD(M_SCL_DRIVE_EN, 14, 1) + SHARED_FIELD(MSB_STS, 9, 1) + SHARED_FIELD(SDA_DRIVE_IT_EN, 8, 1) + SHARED_FIELD(M_SDA_DRIVE_IT_EN, 7, 1) + SHARED_FIELD(M_HIGH_SPEED_EN, 6, 1) + SHARED_FIELD(DEF_ADDR_EN, 5, 1) + SHARED_FIELD(DEF_ALERT_EN, 4, 1) + SHARED_FIELD(DEF_ARP_EN, 3, 1) + SHARED_FIELD(DEF_GCALL_EN, 2, 1) + SHARED_FIELD(SLAVE_EN, 1, 1) + SHARED_FIELD(MASTER_EN, 0, 1) +REG32(I2CD_AC_TIMING1, 0x04) /* Clock and AC Timing Control #1 */ +REG32(I2CD_AC_TIMING2, 0x08) /* Clock and AC Timing Control #2 */ +REG32(I2CD_INTR_CTRL, 0x0C) /* I2CD Interrupt Control */ +REG32(I2CD_INTR_STS, 0x10) /* I2CD Interrupt Status */ + SHARED_FIELD(SLAVE_ADDR_MATCH, 31, 1) /* 0: addr1 1: addr2 */ + SHARED_FIELD(SLAVE_ADDR_RX_PENDING, 29, 1) + SHARED_FIELD(SLAVE_INACTIVE_TIMEOUT, 15, 1) + SHARED_FIELD(SDA_DL_TIMEOUT, 14, 1) + SHARED_FIELD(BUS_RECOVER_DONE, 13, 1) + SHARED_FIELD(SMBUS_ALERT, 12, 1) /* Bus [0-3] only */ + FIELD(I2CD_INTR_STS, SMBUS_ARP_ADDR, 11, 1) /* Removed */ + FIELD(I2CD_INTR_STS, SMBUS_DEV_ALERT_ADDR, 10, 1) /* Removed */ + FIELD(I2CD_INTR_STS, SMBUS_DEF_ADDR, 9, 1) /* Removed */ + FIELD(I2CD_INTR_STS, GCALL_ADDR, 8, 1) /* Removed */ + FIELD(I2CD_INTR_STS, SLAVE_ADDR_RX_MATCH, 7, 1) /* use RX_DONE */ + SHARED_FIELD(SCL_TIMEOUT, 6, 1) + SHARED_FIELD(ABNORMAL, 5, 1) + SHARED_FIELD(NORMAL_STOP, 4, 1) + SHARED_FIELD(ARBIT_LOSS, 3, 1) + SHARED_FIELD(RX_DONE, 2, 1) + SHARED_FIELD(TX_NAK, 1, 1) + SHARED_FIELD(TX_ACK, 0, 1) +REG32(I2CD_CMD, 0x14) /* I2CD Command/Status */ + SHARED_FIELD(SDA_OE, 28, 1) + SHARED_FIELD(SDA_O, 27, 1) + SHARED_FIELD(SCL_OE, 26, 1) + SHARED_FIELD(SCL_O, 25, 1) + SHARED_FIELD(TX_TIMING, 23, 2) + SHARED_FIELD(TX_STATE, 19, 4) + SHARED_FIELD(SCL_LINE_STS, 18, 1) + SHARED_FIELD(SDA_LINE_STS, 17, 1) + SHARED_FIELD(BUS_BUSY_STS, 16, 1) + SHARED_FIELD(SDA_OE_OUT_DIR, 15, 1) + SHARED_FIELD(SDA_O_OUT_DIR, 14, 1) + SHARED_FIELD(SCL_OE_OUT_DIR, 13, 1) + SHARED_FIELD(SCL_O_OUT_DIR, 12, 1) + SHARED_FIELD(BUS_RECOVER_CMD_EN, 11, 1) + SHARED_FIELD(S_ALT_EN, 10, 1) + /* Command Bits */ + SHARED_FIELD(RX_DMA_EN, 9, 1) + SHARED_FIELD(TX_DMA_EN, 8, 1) + SHARED_FIELD(RX_BUFF_EN, 7, 1) + SHARED_FIELD(TX_BUFF_EN, 6, 1) + SHARED_FIELD(M_STOP_CMD, 5, 1) + SHARED_FIELD(M_S_RX_CMD_LAST, 4, 1) + SHARED_FIELD(M_RX_CMD, 3, 1) + SHARED_FIELD(S_TX_CMD, 2, 1) + SHARED_FIELD(M_TX_CMD, 1, 1) + SHARED_FIELD(M_START_CMD, 0, 1) +REG32(I2CD_DEV_ADDR, 0x18) /* Slave Device Address */ +REG32(I2CD_POOL_CTRL, 0x1C) /* Pool Buffer Control */ + SHARED_FIELD(RX_COUNT, 24, 5) + SHARED_FIELD(RX_SIZE, 16, 5) + SHARED_FIELD(TX_COUNT, 9, 5) + FIELD(I2CD_POOL_CTRL, OFFSET, 2, 6) /* AST2400 */ +REG32(I2CD_BYTE_BUF, 0x20) /* Transmit/Receive Byte Buffer */ + SHARED_FIELD(RX_BUF, 8, 8) + SHARED_FIELD(TX_BUF, 0, 8) +REG32(I2CD_DMA_ADDR, 0x24) /* DMA Buffer Address */ +REG32(I2CD_DMA_LEN, 0x28) /* DMA Transfer Length < 4KB */ + +/* I2C New Mode Device (Bus) Register */ +REG32(I2CC_FUN_CTRL, 0x0) + FIELD(I2CC_FUN_CTRL, RB_EARLY_DONE_EN, 22, 1) + FIELD(I2CC_FUN_CTRL, DMA_DIS_AUTO_RECOVER, 21, 1) + FIELD(I2CC_FUN_CTRL, S_SAVE_ADDR, 20, 1) + FIELD(I2CC_FUN_CTRL, M_PKT_RETRY_CNT, 18, 2) + /* 17:0 shared with I2CD_FUN_CTRL[17:0] */ +REG32(I2CC_AC_TIMING, 0x04) +REG32(I2CC_MS_TXRX_BYTE_BUF, 0x08) + /* 31:16 shared with I2CD_CMD[31:16] */ + /* 15:0 shared with I2CD_BYTE_BUF[15:0] */ +REG32(I2CC_POOL_CTRL, 0x0c) + /* 31:0 shared with I2CD_POOL_CTRL[31:0] */ +REG32(I2CM_INTR_CTRL, 0x10) +REG32(I2CM_INTR_STS, 0x14) + FIELD(I2CM_INTR_STS, PKT_STATE, 28, 4) + FIELD(I2CM_INTR_STS, PKT_CMD_TIMEOUT, 18, 1) + FIELD(I2CM_INTR_STS, PKT_CMD_FAIL, 17, 1) + FIELD(I2CM_INTR_STS, PKT_CMD_DONE, 16, 1) + FIELD(I2CM_INTR_STS, BUS_RECOVER_FAIL, 15, 1) + /* 14:0 shared with I2CD_INTR_STS[14:0] */ +REG32(I2CM_CMD, 0x18) + FIELD(I2CM_CMD, W1_CTRL, 31, 1) + FIELD(I2CM_CMD, PKT_DEV_ADDR, 24, 7) + FIELD(I2CM_CMD, HS_MASTER_MODE_LSB, 17, 3) + FIELD(I2CM_CMD, PKT_OP_EN, 16, 1) + /* 15:0 shared with I2CD_CMD[15:0] */ +REG32(I2CM_DMA_LEN, 0x1c) + FIELD(I2CM_DMA_LEN, RX_BUF_LEN_W1T, 31, 1) + FIELD(I2CM_DMA_LEN, RX_BUF_LEN, 16, 11) + FIELD(I2CM_DMA_LEN, TX_BUF_LEN_W1T, 15, 1) + FIELD(I2CM_DMA_LEN, TX_BUF_LEN, 0, 11) +REG32(I2CS_INTR_CTRL, 0x20) +REG32(I2CS_INTR_STS, 0x24) + /* 31:29 shared with I2CD_INTR_STS[31:29] */ + FIELD(I2CS_INTR_STS, SLAVE_PARKING_STS, 24, 2) + FIELD(I2CS_INTR_STS, SLAVE_ADDR3_NAK, 22, 1) + FIELD(I2CS_INTR_STS, SLAVE_ADDR2_NAK, 21, 1) + FIELD(I2CS_INTR_STS, SLAVE_ADDR1_NAK, 20, 1) + FIELD(I2CS_INTR_STS, SLAVE_ADDR_INDICATOR, 18, 2) + FIELD(I2CS_INTR_STS, PKT_CMD_FAIL, 17, 1) + FIELD(I2CS_INTR_STS, PKT_CMD_DONE, 16, 1) + /* 14:0 shared with I2CD_INTR_STS[14:0] */ +REG32(I2CS_CMD, 0x28) + FIELD(I2CS_CMD, W1_CTRL, 31, 1) + FIELD(I2CS_CMD, PKT_MODE_ACTIVE_ADDR, 17, 2) + FIELD(I2CS_CMD, PKT_MODE_EN, 16, 1) + FIELD(I2CS_CMD, AUTO_NAK_INACTIVE_ADDR, 15, 1) + FIELD(I2CS_CMD, AUTO_NAK_ACTIVE_ADDR, 14, 1) + /* 13:0 shared with I2CD_CMD[13:0] */ +REG32(I2CS_DMA_LEN, 0x2c) + FIELD(I2CS_DMA_LEN, RX_BUF_LEN_W1T, 31, 1) + FIELD(I2CS_DMA_LEN, RX_BUF_LEN, 16, 11) + FIELD(I2CS_DMA_LEN, TX_BUF_LEN_W1T, 15, 1) + FIELD(I2CS_DMA_LEN, TX_BUF_LEN, 0, 11) +REG32(I2CM_DMA_TX_ADDR, 0x30) + FIELD(I2CM_DMA_TX_ADDR, ADDR, 0, 31) +REG32(I2CM_DMA_RX_ADDR, 0x34) + FIELD(I2CM_DMA_RX_ADDR, ADDR, 0, 31) +REG32(I2CS_DMA_TX_ADDR, 0x38) + FIELD(I2CS_DMA_TX_ADDR, ADDR, 0, 31) +REG32(I2CS_DMA_RX_ADDR, 0x3c) + FIELD(I2CS_DMA_RX_ADDR, ADDR, 0, 31) +REG32(I2CS_DEV_ADDR, 0x40) +REG32(I2CM_DMA_LEN_STS, 0x48) + FIELD(I2CM_DMA_LEN_STS, RX_LEN, 16, 13) + FIELD(I2CM_DMA_LEN_STS, TX_LEN, 0, 13) +REG32(I2CS_DMA_LEN_STS, 0x4c) + FIELD(I2CS_DMA_LEN_STS, RX_LEN, 16, 13) + FIELD(I2CS_DMA_LEN_STS, TX_LEN, 0, 13) +REG32(I2CC_DMA_ADDR, 0x50) +REG32(I2CC_DMA_LEN, 0x54) struct AspeedI2CState; @@ -49,15 +229,7 @@ struct AspeedI2CBus { uint8_t id; qemu_irq irq; - uint32_t ctrl; - uint32_t timing[2]; - uint32_t intr_ctrl; - uint32_t intr_status; - uint32_t cmd; - uint32_t buf; - uint32_t pool_ctrl; - uint32_t dma_addr; - uint32_t dma_len; + uint32_t regs[ASPEED_I2C_NEW_NUM_REG]; }; struct AspeedI2CState { @@ -68,6 +240,7 @@ struct AspeedI2CState { uint32_t intr_status; uint32_t ctrl_global; + uint32_t new_clk_divider; MemoryRegion pool_iomem; uint8_t pool[ASPEED_I2C_MAX_POOL_SIZE]; @@ -93,6 +266,104 @@ struct AspeedI2CClass { }; +static inline bool aspeed_i2c_is_new_mode(AspeedI2CState *s) +{ + return FIELD_EX32(s->ctrl_global, I2C_CTRL_GLOBAL, REG_MODE); +} + +static inline bool aspeed_i2c_bus_pkt_mode_en(AspeedI2CBus *bus) +{ + if (aspeed_i2c_is_new_mode(bus->controller)) { + return ARRAY_FIELD_EX32(bus->regs, I2CM_CMD, PKT_OP_EN); + } + return false; +} + +static inline uint32_t aspeed_i2c_bus_ctrl_offset(AspeedI2CBus *bus) +{ + if (aspeed_i2c_is_new_mode(bus->controller)) { + return R_I2CC_FUN_CTRL; + } + return R_I2CD_FUN_CTRL; +} + +static inline uint32_t aspeed_i2c_bus_cmd_offset(AspeedI2CBus *bus) +{ + if (aspeed_i2c_is_new_mode(bus->controller)) { + return R_I2CM_CMD; + } + return R_I2CD_CMD; +} + +static inline uint32_t aspeed_i2c_bus_dev_addr_offset(AspeedI2CBus *bus) +{ + if (aspeed_i2c_is_new_mode(bus->controller)) { + return R_I2CS_DEV_ADDR; + } + return R_I2CD_DEV_ADDR; +} + +static inline uint32_t aspeed_i2c_bus_intr_ctrl_offset(AspeedI2CBus *bus) +{ + if (aspeed_i2c_is_new_mode(bus->controller)) { + return R_I2CM_INTR_CTRL; + } + return R_I2CD_INTR_CTRL; +} + +static inline uint32_t aspeed_i2c_bus_intr_sts_offset(AspeedI2CBus *bus) +{ + if (aspeed_i2c_is_new_mode(bus->controller)) { + return R_I2CM_INTR_STS; + } + return R_I2CD_INTR_STS; +} + +static inline uint32_t aspeed_i2c_bus_pool_ctrl_offset(AspeedI2CBus *bus) +{ + if (aspeed_i2c_is_new_mode(bus->controller)) { + return R_I2CC_POOL_CTRL; + } + return R_I2CD_POOL_CTRL; +} + +static inline uint32_t aspeed_i2c_bus_byte_buf_offset(AspeedI2CBus *bus) +{ + if (aspeed_i2c_is_new_mode(bus->controller)) { + return R_I2CC_MS_TXRX_BYTE_BUF; + } + return R_I2CD_BYTE_BUF; +} + +static inline uint32_t aspeed_i2c_bus_dma_len_offset(AspeedI2CBus *bus) +{ + if (aspeed_i2c_is_new_mode(bus->controller)) { + return R_I2CC_DMA_LEN; + } + return R_I2CD_DMA_LEN; +} + +static inline uint32_t aspeed_i2c_bus_dma_addr_offset(AspeedI2CBus *bus) +{ + if (aspeed_i2c_is_new_mode(bus->controller)) { + return R_I2CC_DMA_ADDR; + } + return R_I2CD_DMA_ADDR; +} + +static inline bool aspeed_i2c_bus_is_master(AspeedI2CBus *bus) +{ + return SHARED_ARRAY_FIELD_EX32(bus->regs, aspeed_i2c_bus_ctrl_offset(bus), + MASTER_EN); +} + +static inline bool aspeed_i2c_bus_is_enabled(AspeedI2CBus *bus) +{ + uint32_t ctrl_reg = aspeed_i2c_bus_ctrl_offset(bus); + return SHARED_ARRAY_FIELD_EX32(bus->regs, ctrl_reg, MASTER_EN) || + SHARED_ARRAY_FIELD_EX32(bus->regs, ctrl_reg, SLAVE_EN); +} + I2CBus *aspeed_i2c_get_bus(AspeedI2CState *s, int busnr); #endif /* ASPEED_I2C_H */ diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h index 898083b86f..d5ddea558b 100644 --- a/include/hw/pci/pci_ids.h +++ b/include/hw/pci/pci_ids.h @@ -238,6 +238,7 @@ #define PCI_DEVICE_ID_INTEL_82801BA_11 0x244e #define PCI_DEVICE_ID_INTEL_82801D 0x24CD #define PCI_DEVICE_ID_INTEL_ESB_9 0x25ab +#define PCI_DEVICE_ID_INTEL_NVME 0x5845 #define PCI_DEVICE_ID_INTEL_82371SB_0 0x7000 #define PCI_DEVICE_ID_INTEL_82371SB_1 0x7010 #define PCI_DEVICE_ID_INTEL_82371SB_2 0x7020 diff --git a/include/hw/registerfields.h b/include/hw/registerfields.h index 3a88e135d0..1330ca77de 100644 --- a/include/hw/registerfields.h +++ b/include/hw/registerfields.h @@ -154,4 +154,74 @@ #define ARRAY_FIELD_DP64(regs, reg, field, val) \ (regs)[R_ ## reg] = FIELD_DP64((regs)[R_ ## reg], reg, field, val); + +/* + * These macros can be used for defining and extracting fields that have the + * same bit position across multiple registers. + */ + +/* Define shared SHIFT, LENGTH, and MASK constants */ +#define SHARED_FIELD(name, shift, length) \ + enum { name ## _ ## SHIFT = (shift)}; \ + enum { name ## _ ## LENGTH = (length)}; \ + enum { name ## _ ## MASK = MAKE_64BIT_MASK(shift, length)}; + +/* Extract a shared field */ +#define SHARED_FIELD_EX8(storage, field) \ + extract8((storage), field ## _SHIFT, field ## _LENGTH) + +#define SHARED_FIELD_EX16(storage, field) \ + extract16((storage), field ## _SHIFT, field ## _LENGTH) + +#define SHARED_FIELD_EX32(storage, field) \ + extract32((storage), field ## _SHIFT, field ## _LENGTH) + +#define SHARED_FIELD_EX64(storage, field) \ + extract64((storage), field ## _SHIFT, field ## _LENGTH) + +/* Extract a shared field from a register array */ +#define SHARED_ARRAY_FIELD_EX32(regs, offset, field) \ + SHARED_FIELD_EX32((regs)[(offset)], field) +#define SHARED_ARRAY_FIELD_EX64(regs, offset, field) \ + SHARED_FIELD_EX64((regs)[(offset)], field) + +/* Deposit a shared field */ +#define SHARED_FIELD_DP8(storage, field, val) ({ \ + struct { \ + unsigned int v:field ## _LENGTH; \ + } _v = { .v = val }; \ + uint8_t _d; \ + _d = deposit32((storage), field ## _SHIFT, field ## _LENGTH, _v.v); \ + _d; }) + +#define SHARED_FIELD_DP16(storage, field, val) ({ \ + struct { \ + unsigned int v:field ## _LENGTH; \ + } _v = { .v = val }; \ + uint16_t _d; \ + _d = deposit32((storage), field ## _SHIFT, field ## _LENGTH, _v.v); \ + _d; }) + +#define SHARED_FIELD_DP32(storage, field, val) ({ \ + struct { \ + unsigned int v:field ## _LENGTH; \ + } _v = { .v = val }; \ + uint32_t _d; \ + _d = deposit32((storage), field ## _SHIFT, field ## _LENGTH, _v.v); \ + _d; }) + +#define SHARED_FIELD_DP64(storage, field, val) ({ \ + struct { \ + uint64_t v:field ## _LENGTH; \ + } _v = { .v = val }; \ + uint64_t _d; \ + _d = deposit64((storage), field ## _SHIFT, field ## _LENGTH, _v.v); \ + _d; }) + +/* Deposit a shared field to a register array */ +#define SHARED_ARRAY_FIELD_DP32(regs, offset, field, val) \ + (regs)[(offset)] = SHARED_FIELD_DP32((regs)[(offset)], field, val); +#define SHARED_ARRAY_FIELD_DP64(regs, offset, field, val) \ + (regs)[(offset)] = SHARED_FIELD_DP64((regs)[(offset)], field, val); + #endif diff --git a/include/io/channel-null.h b/include/io/channel-null.h new file mode 100644 index 0000000000..f6d54e63cf --- /dev/null +++ b/include/io/channel-null.h @@ -0,0 +1,55 @@ +/* + * QEMU I/O channels null driver + * + * Copyright (c) 2022 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + * + */ + +#ifndef QIO_CHANNEL_FILE_H +#define QIO_CHANNEL_FILE_H + +#include "io/channel.h" +#include "qom/object.h" + +#define TYPE_QIO_CHANNEL_NULL "qio-channel-null" +OBJECT_DECLARE_SIMPLE_TYPE(QIOChannelNull, QIO_CHANNEL_NULL) + + +/** + * QIOChannelNull: + * + * The QIOChannelNull object provides a channel implementation + * that discards all writes and returns EOF for all reads. + */ + +struct QIOChannelNull { + QIOChannel parent; + bool closed; +}; + + +/** + * qio_channel_null_new: + * + * Create a new IO channel object that discards all writes + * and returns EOF for all reads. + * + * Returns: the new channel object + */ +QIOChannelNull * +qio_channel_null_new(void); + +#endif /* QIO_CHANNEL_NULL_H */ diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h index f19bd29105..bc743f5e32 100644 --- a/include/qemu/host-utils.h +++ b/include/qemu/host-utils.h @@ -32,6 +32,7 @@ #include "qemu/compiler.h" #include "qemu/bswap.h" +#include "qemu/int128.h" #ifdef CONFIG_INT128 static inline void mulu64(uint64_t *plow, uint64_t *phigh, @@ -849,4 +850,6 @@ static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1, #endif } +Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor); +Int128 divs256(Int128 *plow, Int128 *phigh, Int128 divisor); #endif diff --git a/include/qemu/int128.h b/include/qemu/int128.h index ef71f56e3f..d2b76ca6ac 100644 --- a/include/qemu/int128.h +++ b/include/qemu/int128.h @@ -128,11 +128,21 @@ static inline bool int128_ge(Int128 a, Int128 b) return a >= b; } +static inline bool int128_uge(Int128 a, Int128 b) +{ + return ((__uint128_t)a) >= ((__uint128_t)b); +} + static inline bool int128_lt(Int128 a, Int128 b) { return a < b; } +static inline bool int128_ult(Int128 a, Int128 b) +{ + return (__uint128_t)a < (__uint128_t)b; +} + static inline bool int128_le(Int128 a, Int128 b) { return a <= b; @@ -177,6 +187,15 @@ static inline Int128 bswap128(Int128 a) #endif } +static inline int clz128(Int128 a) +{ + if (a >> 64) { + return __builtin_clzll(a >> 64); + } else { + return (a) ? __builtin_clzll((uint64_t)a) + 64 : 128; + } +} + static inline Int128 int128_divu(Int128 a, Int128 b) { return (__uint128_t)a / (__uint128_t)b; @@ -373,11 +392,21 @@ static inline bool int128_ge(Int128 a, Int128 b) return a.hi > b.hi || (a.hi == b.hi && a.lo >= b.lo); } +static inline bool int128_uge(Int128 a, Int128 b) +{ + return (uint64_t)a.hi > (uint64_t)b.hi || (a.hi == b.hi && a.lo >= b.lo); +} + static inline bool int128_lt(Int128 a, Int128 b) { return !int128_ge(a, b); } +static inline bool int128_ult(Int128 a, Int128 b) +{ + return !int128_uge(a, b); +} + static inline bool int128_le(Int128 a, Int128 b) { return int128_ge(b, a); @@ -418,6 +447,15 @@ static inline Int128 bswap128(Int128 a) return int128_make128(bswap64(a.hi), bswap64(a.lo)); } +static inline int clz128(Int128 a) +{ + if (a.hi) { + return __builtin_clzll(a.hi); + } else { + return (a.lo) ? __builtin_clzll(a.lo) + 64 : 128; + } +} + Int128 int128_divu(Int128, Int128); Int128 int128_remu(Int128, Int128); Int128 int128_divs(Int128, Int128); diff --git a/io/channel-null.c b/io/channel-null.c new file mode 100644 index 0000000000..75e3781507 --- /dev/null +++ b/io/channel-null.c @@ -0,0 +1,237 @@ +/* + * QEMU I/O channels null driver + * + * Copyright (c) 2022 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + * + */ + +#include "qemu/osdep.h" +#include "io/channel-null.h" +#include "io/channel-watch.h" +#include "qapi/error.h" +#include "trace.h" +#include "qemu/iov.h" + +typedef struct QIOChannelNullSource QIOChannelNullSource; +struct QIOChannelNullSource { + GSource parent; + QIOChannel *ioc; + GIOCondition condition; +}; + + +QIOChannelNull * +qio_channel_null_new(void) +{ + QIOChannelNull *ioc; + + ioc = QIO_CHANNEL_NULL(object_new(TYPE_QIO_CHANNEL_NULL)); + + trace_qio_channel_null_new(ioc); + + return ioc; +} + + +static void +qio_channel_null_init(Object *obj) +{ + QIOChannelNull *ioc = QIO_CHANNEL_NULL(obj); + ioc->closed = false; +} + + +static ssize_t +qio_channel_null_readv(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int **fds G_GNUC_UNUSED, + size_t *nfds G_GNUC_UNUSED, + Error **errp) +{ + QIOChannelNull *nioc = QIO_CHANNEL_NULL(ioc); + + if (nioc->closed) { + error_setg_errno(errp, EINVAL, + "Channel is closed"); + return -1; + } + + return 0; +} + + +static ssize_t +qio_channel_null_writev(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int *fds G_GNUC_UNUSED, + size_t nfds G_GNUC_UNUSED, + int flags G_GNUC_UNUSED, + Error **errp) +{ + QIOChannelNull *nioc = QIO_CHANNEL_NULL(ioc); + + if (nioc->closed) { + error_setg_errno(errp, EINVAL, + "Channel is closed"); + return -1; + } + + return iov_size(iov, niov); +} + + +static int +qio_channel_null_set_blocking(QIOChannel *ioc G_GNUC_UNUSED, + bool enabled G_GNUC_UNUSED, + Error **errp G_GNUC_UNUSED) +{ + return 0; +} + + +static off_t +qio_channel_null_seek(QIOChannel *ioc G_GNUC_UNUSED, + off_t offset G_GNUC_UNUSED, + int whence G_GNUC_UNUSED, + Error **errp G_GNUC_UNUSED) +{ + return 0; +} + + +static int +qio_channel_null_close(QIOChannel *ioc, + Error **errp G_GNUC_UNUSED) +{ + QIOChannelNull *nioc = QIO_CHANNEL_NULL(ioc); + + nioc->closed = true; + return 0; +} + + +static void +qio_channel_null_set_aio_fd_handler(QIOChannel *ioc G_GNUC_UNUSED, + AioContext *ctx G_GNUC_UNUSED, + IOHandler *io_read G_GNUC_UNUSED, + IOHandler *io_write G_GNUC_UNUSED, + void *opaque G_GNUC_UNUSED) +{ +} + + +static gboolean +qio_channel_null_source_prepare(GSource *source G_GNUC_UNUSED, + gint *timeout) +{ + *timeout = -1; + + return TRUE; +} + + +static gboolean +qio_channel_null_source_check(GSource *source G_GNUC_UNUSED) +{ + return TRUE; +} + + +static gboolean +qio_channel_null_source_dispatch(GSource *source, + GSourceFunc callback, + gpointer user_data) +{ + QIOChannelFunc func = (QIOChannelFunc)callback; + QIOChannelNullSource *ssource = (QIOChannelNullSource *)source; + + return (*func)(ssource->ioc, + ssource->condition, + user_data); +} + + +static void +qio_channel_null_source_finalize(GSource *source) +{ + QIOChannelNullSource *ssource = (QIOChannelNullSource *)source; + + object_unref(OBJECT(ssource->ioc)); +} + + +GSourceFuncs qio_channel_null_source_funcs = { + qio_channel_null_source_prepare, + qio_channel_null_source_check, + qio_channel_null_source_dispatch, + qio_channel_null_source_finalize +}; + + +static GSource * +qio_channel_null_create_watch(QIOChannel *ioc, + GIOCondition condition) +{ + GSource *source; + QIOChannelNullSource *ssource; + + source = g_source_new(&qio_channel_null_source_funcs, + sizeof(QIOChannelNullSource)); + ssource = (QIOChannelNullSource *)source; + + ssource->ioc = ioc; + object_ref(OBJECT(ioc)); + + ssource->condition = condition; + + return source; +} + + +static void +qio_channel_null_class_init(ObjectClass *klass, + void *class_data G_GNUC_UNUSED) +{ + QIOChannelClass *ioc_klass = QIO_CHANNEL_CLASS(klass); + + ioc_klass->io_writev = qio_channel_null_writev; + ioc_klass->io_readv = qio_channel_null_readv; + ioc_klass->io_set_blocking = qio_channel_null_set_blocking; + ioc_klass->io_seek = qio_channel_null_seek; + ioc_klass->io_close = qio_channel_null_close; + ioc_klass->io_create_watch = qio_channel_null_create_watch; + ioc_klass->io_set_aio_fd_handler = qio_channel_null_set_aio_fd_handler; +} + + +static const TypeInfo qio_channel_null_info = { + .parent = TYPE_QIO_CHANNEL, + .name = TYPE_QIO_CHANNEL_NULL, + .instance_size = sizeof(QIOChannelNull), + .instance_init = qio_channel_null_init, + .class_init = qio_channel_null_class_init, +}; + + +static void +qio_channel_null_register_types(void) +{ + type_register_static(&qio_channel_null_info); +} + +type_init(qio_channel_null_register_types); diff --git a/io/channel-socket.c b/io/channel-socket.c index dc9c165de1..4466bb1cd4 100644 --- a/io/channel-socket.c +++ b/io/channel-socket.c @@ -578,11 +578,17 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, memcpy(CMSG_DATA(cmsg), fds, fdsize); } -#ifdef QEMU_MSG_ZEROCOPY if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { +#ifdef QEMU_MSG_ZEROCOPY sflags = MSG_ZEROCOPY; - } +#else + /* + * We expect QIOChannel class entry point to have + * blocked this code path already + */ + g_assert_not_reached(); #endif + } retry: ret = sendmsg(sioc->fd, &msg, sflags); @@ -592,21 +598,24 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, return QIO_CHANNEL_ERR_BLOCK; case EINTR: goto retry; -#ifdef QEMU_MSG_ZEROCOPY case ENOBUFS: - if (sflags & MSG_ZEROCOPY) { + if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { error_setg_errno(errp, errno, "Process can't lock enough memory for using MSG_ZEROCOPY"); return -1; } break; -#endif } error_setg_errno(errp, errno, "Unable to write to socket"); return -1; } + + if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { + sioc->zero_copy_queued++; + } + return ret; } #else /* WIN32 */ diff --git a/io/meson.build b/io/meson.build index bbcd3c53a4..283b9b2bdb 100644 --- a/io/meson.build +++ b/io/meson.build @@ -3,6 +3,7 @@ io_ss.add(files( 'channel-buffer.c', 'channel-command.c', 'channel-file.c', + 'channel-null.c', 'channel-socket.c', 'channel-tls.c', 'channel-util.c', diff --git a/io/trace-events b/io/trace-events index c5e814eb44..3cc5cf1efd 100644 --- a/io/trace-events +++ b/io/trace-events @@ -10,6 +10,9 @@ qio_task_thread_result(void *task) "Task thread result task=%p" qio_task_thread_source_attach(void *task, void *source) "Task thread source attach task=%p source=%p" qio_task_thread_source_cancel(void *task, void *source) "Task thread source cancel task=%p source=%p" +# channel-null.c +qio_channel_null_new(void *ioc) "Null new ioc=%p" + # channel-socket.c qio_channel_socket_new(void *ioc) "Socket new ioc=%p" qio_channel_socket_new_fd(void *ioc, int fd) "Socket new ioc=%p fd=%d" diff --git a/migration/block.c b/migration/block.c index 077a413325..823453c977 100644 --- a/migration/block.c +++ b/migration/block.c @@ -756,8 +756,8 @@ static int block_save_setup(QEMUFile *f, void *opaque) static int block_save_iterate(QEMUFile *f, void *opaque) { int ret; - int64_t last_ftell = qemu_ftell(f); - int64_t delta_ftell; + int64_t last_bytes = qemu_file_total_transferred(f); + int64_t delta_bytes; trace_migration_block_save("iterate", block_mig_state.submitted, block_mig_state.transferred); @@ -809,10 +809,10 @@ static int block_save_iterate(QEMUFile *f, void *opaque) } qemu_put_be64(f, BLK_MIG_FLAG_EOS); - delta_ftell = qemu_ftell(f) - last_ftell; - if (delta_ftell > 0) { + delta_bytes = qemu_file_total_transferred(f) - last_bytes; + if (delta_bytes > 0) { return 1; - } else if (delta_ftell < 0) { + } else if (delta_bytes < 0) { return -1; } else { return 0; diff --git a/migration/channel-block.c b/migration/channel-block.c new file mode 100644 index 0000000000..c55c8c93ce --- /dev/null +++ b/migration/channel-block.c @@ -0,0 +1,195 @@ +/* + * QEMU I/O channels block driver + * + * Copyright (c) 2022 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + * + */ + +#include "qemu/osdep.h" +#include "migration/channel-block.h" +#include "qapi/error.h" +#include "block/block.h" +#include "trace.h" + +QIOChannelBlock * +qio_channel_block_new(BlockDriverState *bs) +{ + QIOChannelBlock *ioc; + + ioc = QIO_CHANNEL_BLOCK(object_new(TYPE_QIO_CHANNEL_BLOCK)); + + bdrv_ref(bs); + ioc->bs = bs; + + return ioc; +} + + +static void +qio_channel_block_finalize(Object *obj) +{ + QIOChannelBlock *ioc = QIO_CHANNEL_BLOCK(obj); + + g_clear_pointer(&ioc->bs, bdrv_unref); +} + + +static ssize_t +qio_channel_block_readv(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int **fds, + size_t *nfds, + Error **errp) +{ + QIOChannelBlock *bioc = QIO_CHANNEL_BLOCK(ioc); + QEMUIOVector qiov; + int ret; + + qemu_iovec_init_external(&qiov, (struct iovec *)iov, niov); + ret = bdrv_readv_vmstate(bioc->bs, &qiov, bioc->offset); + if (ret < 0) { + return ret; + } + + bioc->offset += qiov.size; + return qiov.size; +} + + +static ssize_t +qio_channel_block_writev(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int *fds, + size_t nfds, + int flags, + Error **errp) +{ + QIOChannelBlock *bioc = QIO_CHANNEL_BLOCK(ioc); + QEMUIOVector qiov; + int ret; + + qemu_iovec_init_external(&qiov, (struct iovec *)iov, niov); + ret = bdrv_writev_vmstate(bioc->bs, &qiov, bioc->offset); + if (ret < 0) { + return ret; + } + + bioc->offset += qiov.size; + return qiov.size; +} + + +static int +qio_channel_block_set_blocking(QIOChannel *ioc, + bool enabled, + Error **errp) +{ + if (!enabled) { + error_setg(errp, "Non-blocking mode not supported for block devices"); + return -1; + } + return 0; +} + + +static off_t +qio_channel_block_seek(QIOChannel *ioc, + off_t offset, + int whence, + Error **errp) +{ + QIOChannelBlock *bioc = QIO_CHANNEL_BLOCK(ioc); + + switch (whence) { + case SEEK_SET: + bioc->offset = offset; + break; + case SEEK_CUR: + bioc->offset += whence; + break; + case SEEK_END: + error_setg(errp, "Size of VMstate region is unknown"); + return (off_t)-1; + default: + g_assert_not_reached(); + } + + return bioc->offset; +} + + +static int +qio_channel_block_close(QIOChannel *ioc, + Error **errp) +{ + QIOChannelBlock *bioc = QIO_CHANNEL_BLOCK(ioc); + int rv = bdrv_flush(bioc->bs); + + if (rv < 0) { + error_setg_errno(errp, -rv, + "Unable to flush VMState"); + return -1; + } + + g_clear_pointer(&bioc->bs, bdrv_unref); + bioc->offset = 0; + + return 0; +} + + +static void +qio_channel_block_set_aio_fd_handler(QIOChannel *ioc, + AioContext *ctx, + IOHandler *io_read, + IOHandler *io_write, + void *opaque) +{ + /* XXX anything we can do here ? */ +} + + +static void +qio_channel_block_class_init(ObjectClass *klass, + void *class_data G_GNUC_UNUSED) +{ + QIOChannelClass *ioc_klass = QIO_CHANNEL_CLASS(klass); + + ioc_klass->io_writev = qio_channel_block_writev; + ioc_klass->io_readv = qio_channel_block_readv; + ioc_klass->io_set_blocking = qio_channel_block_set_blocking; + ioc_klass->io_seek = qio_channel_block_seek; + ioc_klass->io_close = qio_channel_block_close; + ioc_klass->io_set_aio_fd_handler = qio_channel_block_set_aio_fd_handler; +} + +static const TypeInfo qio_channel_block_info = { + .parent = TYPE_QIO_CHANNEL, + .name = TYPE_QIO_CHANNEL_BLOCK, + .instance_size = sizeof(QIOChannelBlock), + .instance_finalize = qio_channel_block_finalize, + .class_init = qio_channel_block_class_init, +}; + +static void +qio_channel_block_register_types(void) +{ + type_register_static(&qio_channel_block_info); +} + +type_init(qio_channel_block_register_types); diff --git a/migration/channel-block.h b/migration/channel-block.h new file mode 100644 index 0000000000..31673824e6 --- /dev/null +++ b/migration/channel-block.h @@ -0,0 +1,59 @@ +/* + * QEMU I/O channels block driver + * + * Copyright (c) 2022 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + * + */ + +#ifndef QIO_CHANNEL_BLOCK_H +#define QIO_CHANNEL_BLOCK_H + +#include "io/channel.h" +#include "qom/object.h" + +#define TYPE_QIO_CHANNEL_BLOCK "qio-channel-block" +OBJECT_DECLARE_SIMPLE_TYPE(QIOChannelBlock, QIO_CHANNEL_BLOCK) + + +/** + * QIOChannelBlock: + * + * The QIOChannelBlock object provides a channel implementation + * that is able to perform I/O on the BlockDriverState objects + * to the VMState region. + */ + +struct QIOChannelBlock { + QIOChannel parent; + BlockDriverState *bs; + off_t offset; +}; + + +/** + * qio_channel_block_new: + * @bs: the block driver state + * + * Create a new IO channel object that can perform + * I/O on a BlockDriverState object to the VMState + * region + * + * Returns: the new channel object + */ +QIOChannelBlock * +qio_channel_block_new(BlockDriverState *bs); + +#endif /* QIO_CHANNEL_BLOCK_H */ diff --git a/migration/channel.c b/migration/channel.c index a162d00fea..90087d8986 100644 --- a/migration/channel.c +++ b/migration/channel.c @@ -14,7 +14,7 @@ #include "channel.h" #include "tls.h" #include "migration.h" -#include "qemu-file-channel.h" +#include "qemu-file.h" #include "trace.h" #include "qapi/error.h" #include "io/channel-tls.h" @@ -85,7 +85,7 @@ void migration_channel_connect(MigrationState *s, return; } } else { - QEMUFile *f = qemu_fopen_channel_output(ioc); + QEMUFile *f = qemu_file_new_output(ioc); migration_ioc_register_yank(ioc); diff --git a/migration/colo.c b/migration/colo.c index 5f7071b3cd..2b71722fd6 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -14,7 +14,6 @@ #include "sysemu/sysemu.h" #include "qapi/error.h" #include "qapi/qapi-commands-migration.h" -#include "qemu-file-channel.h" #include "migration.h" #include "qemu-file.h" #include "savevm.h" @@ -559,7 +558,7 @@ static void colo_process_checkpoint(MigrationState *s) goto out; } bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE); - fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc)); + fb = qemu_file_new_output(QIO_CHANNEL(bioc)); object_unref(OBJECT(bioc)); qemu_mutex_lock_iothread(); @@ -873,7 +872,7 @@ void *colo_process_incoming_thread(void *opaque) colo_incoming_start_dirty_log(); bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE); - fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc)); + fb = qemu_file_new_input(QIO_CHANNEL(bioc)); object_unref(OBJECT(bioc)); qemu_mutex_lock_iothread(); diff --git a/migration/meson.build b/migration/meson.build index 6880b61b10..690487cf1a 100644 --- a/migration/meson.build +++ b/migration/meson.build @@ -4,7 +4,6 @@ migration_files = files( 'xbzrle.c', 'vmstate-types.c', 'vmstate.c', - 'qemu-file-channel.c', 'qemu-file.c', 'yank_functions.c', ) @@ -13,6 +12,7 @@ softmmu_ss.add(migration_files) softmmu_ss.add(files( 'block-dirty-bitmap.c', 'channel.c', + 'channel-block.c', 'colo-failover.c', 'colo.c', 'exec.c', diff --git a/migration/migration.c b/migration/migration.c index 31739b2af9..78f5057373 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -30,7 +30,6 @@ #include "migration/misc.h" #include "migration.h" #include "savevm.h" -#include "qemu-file-channel.h" #include "qemu-file.h" #include "migration/vmstate.h" #include "block/block.h" @@ -163,7 +162,8 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, MIGRATION_CAPABILITY_COMPRESS, MIGRATION_CAPABILITY_XBZRLE, MIGRATION_CAPABILITY_X_COLO, - MIGRATION_CAPABILITY_VALIDATE_UUID); + MIGRATION_CAPABILITY_VALIDATE_UUID, + MIGRATION_CAPABILITY_ZERO_COPY_SEND); /* When we add fault tolerance, we could have several migrations at once. For now we don't need to add @@ -722,7 +722,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) if (!mis->from_src_file) { /* The first connection (multifd may have multiple) */ - QEMUFile *f = qemu_fopen_channel_input(ioc); + QEMUFile *f = qemu_file_new_input(ioc); if (!migration_incoming_setup(f, errp)) { return; @@ -910,10 +910,6 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) params->multifd_zlib_level = s->parameters.multifd_zlib_level; params->has_multifd_zstd_level = true; params->multifd_zstd_level = s->parameters.multifd_zstd_level; -#ifdef CONFIG_LINUX - params->has_zero_copy_send = true; - params->zero_copy_send = s->parameters.zero_copy_send; -#endif params->has_xbzrle_cache_size = true; params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; params->has_max_postcopy_bandwidth = true; @@ -1275,6 +1271,24 @@ static bool migrate_caps_check(bool *cap_list, } } +#ifdef CONFIG_LINUX + if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && + (!cap_list[MIGRATION_CAPABILITY_MULTIFD] || + migrate_use_compression() || + migrate_use_tls())) { + error_setg(errp, + "Zero copy only available for non-compressed non-TLS multifd migration"); + return false; + } +#else + if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { + error_setg(errp, + "Zero copy currently only available on Linux"); + return false; + } +#endif + + /* incoming side only */ if (runstate_check(RUN_STATE_INMIGRATE) && !migrate_multi_channels_is_allowed() && @@ -1497,16 +1511,6 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); return false; } -#ifdef CONFIG_LINUX - if (params->zero_copy_send && - (!migrate_use_multifd() || - params->multifd_compression != MULTIFD_COMPRESSION_NONE || - (params->tls_creds && *params->tls_creds))) { - error_setg(errp, - "Zero copy only available for non-compressed non-TLS multifd migration"); - return false; - } -#endif return true; } @@ -1580,11 +1584,6 @@ static void migrate_params_test_apply(MigrateSetParameters *params, if (params->has_multifd_compression) { dest->multifd_compression = params->multifd_compression; } -#ifdef CONFIG_LINUX - if (params->has_zero_copy_send) { - dest->zero_copy_send = params->zero_copy_send; - } -#endif if (params->has_xbzrle_cache_size) { dest->xbzrle_cache_size = params->xbzrle_cache_size; } @@ -1697,11 +1696,6 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) if (params->has_multifd_compression) { s->parameters.multifd_compression = params->multifd_compression; } -#ifdef CONFIG_LINUX - if (params->has_zero_copy_send) { - s->parameters.zero_copy_send = params->zero_copy_send; - } -#endif if (params->has_xbzrle_cache_size) { s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; xbzrle_cache_resize(params->xbzrle_cache_size, errp); @@ -2593,7 +2587,7 @@ bool migrate_use_zero_copy_send(void) s = migrate_get_current(); - return s->parameters.zero_copy_send; + return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; } #endif @@ -3081,7 +3075,7 @@ static int postcopy_start(MigrationState *ms) */ bioc = qio_channel_buffer_new(4096); qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer"); - fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc)); + fb = qemu_file_new_output(QIO_CHANNEL(bioc)); object_unref(OBJECT(bioc)); /* @@ -3544,7 +3538,8 @@ static MigThrError migration_detect_error(MigrationState *s) /* How many bytes have we transferred since the beginning of the migration */ static uint64_t migration_total_bytes(MigrationState *s) { - return qemu_ftell(s->to_dst_file) + ram_counters.multifd_bytes; + return qemu_file_total_transferred(s->to_dst_file) + + ram_counters.multifd_bytes; } static void migration_calculate_complete(MigrationState *s) @@ -3970,7 +3965,7 @@ static void *bg_migration_thread(void *opaque) */ s->bioc = qio_channel_buffer_new(512 * 1024); qio_channel_set_name(QIO_CHANNEL(s->bioc), "vmstate-buffer"); - fb = qemu_fopen_channel_output(QIO_CHANNEL(s->bioc)); + fb = qemu_file_new_output(QIO_CHANNEL(s->bioc)); object_unref(OBJECT(s->bioc)); update_iteration_initial_status(s); @@ -4249,10 +4244,6 @@ static Property migration_properties[] = { DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, parameters.multifd_zstd_level, DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), -#ifdef CONFIG_LINUX - DEFINE_PROP_BOOL("zero_copy_send", MigrationState, - parameters.zero_copy_send, false), -#endif DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, parameters.xbzrle_cache_size, DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), @@ -4290,6 +4281,10 @@ static Property migration_properties[] = { DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), DEFINE_PROP_MIG_CAP("x-background-snapshot", MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), +#ifdef CONFIG_LINUX + DEFINE_PROP_MIG_CAP("x-zero-copy-send", + MIGRATION_CAPABILITY_ZERO_COPY_SEND), +#endif DEFINE_PROP_END_OF_LIST(), }; @@ -4350,9 +4345,6 @@ static void migration_instance_init(Object *obj) params->has_multifd_compression = true; params->has_multifd_zlib_level = true; params->has_multifd_zstd_level = true; -#ifdef CONFIG_LINUX - params->has_zero_copy_send = true; -#endif params->has_xbzrle_cache_size = true; params->has_max_postcopy_bandwidth = true; params->has_max_cpu_throttle = true; diff --git a/migration/multifd.c b/migration/multifd.c index 9282ab6aa4..684c014c86 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -435,7 +435,7 @@ static int multifd_send_pages(QEMUFile *f) p->pages = pages; transferred = ((uint64_t) pages->num) * qemu_target_page_size() + p->packet_len; - qemu_file_update_transfer(f, transferred); + qemu_file_acct_rate_limit(f, transferred); ram_counters.multifd_bytes += transferred; ram_counters.transferred += transferred; qemu_mutex_unlock(&p->mutex); @@ -610,7 +610,7 @@ int multifd_send_sync_main(QEMUFile *f) p->packet_num = multifd_send_state->packet_num++; p->flags |= MULTIFD_FLAG_SYNC; p->pending_job++; - qemu_file_update_transfer(f, p->packet_len); + qemu_file_acct_rate_limit(f, p->packet_len); ram_counters.multifd_bytes += p->packet_len; ram_counters.transferred += p->packet_len; qemu_mutex_unlock(&p->mutex); diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c deleted file mode 100644 index bb5a5752df..0000000000 --- a/migration/qemu-file-channel.c +++ /dev/null @@ -1,194 +0,0 @@ -/* - * QEMUFile backend for QIOChannel objects - * - * Copyright (c) 2015-2016 Red Hat, Inc - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "qemu-file-channel.h" -#include "qemu-file.h" -#include "io/channel-socket.h" -#include "io/channel-tls.h" -#include "qemu/iov.h" -#include "qemu/yank.h" -#include "yank_functions.h" - - -static ssize_t channel_writev_buffer(void *opaque, - struct iovec *iov, - int iovcnt, - int64_t pos, - Error **errp) -{ - QIOChannel *ioc = QIO_CHANNEL(opaque); - ssize_t done = 0; - struct iovec *local_iov = g_new(struct iovec, iovcnt); - struct iovec *local_iov_head = local_iov; - unsigned int nlocal_iov = iovcnt; - - nlocal_iov = iov_copy(local_iov, nlocal_iov, - iov, iovcnt, - 0, iov_size(iov, iovcnt)); - - while (nlocal_iov > 0) { - ssize_t len; - len = qio_channel_writev(ioc, local_iov, nlocal_iov, errp); - if (len == QIO_CHANNEL_ERR_BLOCK) { - if (qemu_in_coroutine()) { - qio_channel_yield(ioc, G_IO_OUT); - } else { - qio_channel_wait(ioc, G_IO_OUT); - } - continue; - } - if (len < 0) { - done = -EIO; - goto cleanup; - } - - iov_discard_front(&local_iov, &nlocal_iov, len); - done += len; - } - - cleanup: - g_free(local_iov_head); - return done; -} - - -static ssize_t channel_get_buffer(void *opaque, - uint8_t *buf, - int64_t pos, - size_t size, - Error **errp) -{ - QIOChannel *ioc = QIO_CHANNEL(opaque); - ssize_t ret; - - do { - ret = qio_channel_read(ioc, (char *)buf, size, errp); - if (ret < 0) { - if (ret == QIO_CHANNEL_ERR_BLOCK) { - if (qemu_in_coroutine()) { - qio_channel_yield(ioc, G_IO_IN); - } else { - qio_channel_wait(ioc, G_IO_IN); - } - } else { - return -EIO; - } - } - } while (ret == QIO_CHANNEL_ERR_BLOCK); - - return ret; -} - - -static int channel_close(void *opaque, Error **errp) -{ - int ret; - QIOChannel *ioc = QIO_CHANNEL(opaque); - ret = qio_channel_close(ioc, errp); - object_unref(OBJECT(ioc)); - return ret; -} - - -static int channel_shutdown(void *opaque, - bool rd, - bool wr, - Error **errp) -{ - QIOChannel *ioc = QIO_CHANNEL(opaque); - - if (qio_channel_has_feature(ioc, - QIO_CHANNEL_FEATURE_SHUTDOWN)) { - QIOChannelShutdown mode; - if (rd && wr) { - mode = QIO_CHANNEL_SHUTDOWN_BOTH; - } else if (rd) { - mode = QIO_CHANNEL_SHUTDOWN_READ; - } else { - mode = QIO_CHANNEL_SHUTDOWN_WRITE; - } - if (qio_channel_shutdown(ioc, mode, errp) < 0) { - return -EIO; - } - } - return 0; -} - - -static int channel_set_blocking(void *opaque, - bool enabled, - Error **errp) -{ - QIOChannel *ioc = QIO_CHANNEL(opaque); - - if (qio_channel_set_blocking(ioc, enabled, errp) < 0) { - return -1; - } - return 0; -} - -static QEMUFile *channel_get_input_return_path(void *opaque) -{ - QIOChannel *ioc = QIO_CHANNEL(opaque); - - return qemu_fopen_channel_output(ioc); -} - -static QEMUFile *channel_get_output_return_path(void *opaque) -{ - QIOChannel *ioc = QIO_CHANNEL(opaque); - - return qemu_fopen_channel_input(ioc); -} - -static const QEMUFileOps channel_input_ops = { - .get_buffer = channel_get_buffer, - .close = channel_close, - .shut_down = channel_shutdown, - .set_blocking = channel_set_blocking, - .get_return_path = channel_get_input_return_path, -}; - - -static const QEMUFileOps channel_output_ops = { - .writev_buffer = channel_writev_buffer, - .close = channel_close, - .shut_down = channel_shutdown, - .set_blocking = channel_set_blocking, - .get_return_path = channel_get_output_return_path, -}; - - -QEMUFile *qemu_fopen_channel_input(QIOChannel *ioc) -{ - object_ref(OBJECT(ioc)); - return qemu_fopen_ops(ioc, &channel_input_ops, true); -} - -QEMUFile *qemu_fopen_channel_output(QIOChannel *ioc) -{ - object_ref(OBJECT(ioc)); - return qemu_fopen_ops(ioc, &channel_output_ops, true); -} diff --git a/migration/qemu-file-channel.h b/migration/qemu-file-channel.h deleted file mode 100644 index 0028a09eb6..0000000000 --- a/migration/qemu-file-channel.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * QEMUFile backend for QIOChannel objects - * - * Copyright (c) 2015-2016 Red Hat, Inc - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#ifndef QEMU_FILE_CHANNEL_H -#define QEMU_FILE_CHANNEL_H - -#include "io/channel.h" - -QEMUFile *qemu_fopen_channel_input(QIOChannel *ioc); -QEMUFile *qemu_fopen_channel_output(QIOChannel *ioc); -#endif diff --git a/migration/qemu-file.c b/migration/qemu-file.c index 1479cddad9..1e80d496b7 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -35,15 +35,24 @@ #define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64) struct QEMUFile { - const QEMUFileOps *ops; const QEMUFileHooks *hooks; - void *opaque; + QIOChannel *ioc; + bool is_writable; - int64_t bytes_xfer; - int64_t xfer_limit; + /* + * Maximum amount of data in bytes to transfer during one + * rate limiting time window + */ + int64_t rate_limit_max; + /* + * Total amount of data in bytes queued for transfer + * during this rate limiting time window + */ + int64_t rate_limit_used; + + /* The sum of bytes transferred on the wire */ + int64_t total_transferred; - int64_t pos; /* start of buffer when writing, end of buffer - when reading */ int buf_index; int buf_size; /* 0 when writing */ uint8_t buf[IO_BUF_SIZE]; @@ -56,23 +65,28 @@ struct QEMUFile { Error *last_error_obj; /* has the file has been shutdown */ bool shutdown; - /* Whether opaque points to a QIOChannel */ - bool has_ioc; }; /* * Stop a file from being read/written - not all backing files can do this * typically only sockets can. + * + * TODO: convert to propagate Error objects instead of squashing + * to a fixed errno value */ int qemu_file_shutdown(QEMUFile *f) { - int ret; + int ret = 0; f->shutdown = true; - if (!f->ops->shut_down) { + if (!qio_channel_has_feature(f->ioc, + QIO_CHANNEL_FEATURE_SHUTDOWN)) { return -ENOSYS; } - ret = f->ops->shut_down(f->opaque, true, true, NULL); + + if (qio_channel_shutdown(f->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL) < 0) { + ret = -EIO; + } if (!f->last_error) { qemu_file_set_error(f, -EIO); @@ -80,18 +94,6 @@ int qemu_file_shutdown(QEMUFile *f) return ret; } -/* - * Result: QEMUFile* for a 'return path' for comms in the opposite direction - * NULL if not available - */ -QEMUFile *qemu_file_get_return_path(QEMUFile *f) -{ - if (!f->ops->get_return_path) { - return NULL; - } - return f->ops->get_return_path(f->opaque); -} - bool qemu_file_mode_is_not_valid(const char *mode) { if (mode == NULL || @@ -104,18 +106,37 @@ bool qemu_file_mode_is_not_valid(const char *mode) return false; } -QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops, bool has_ioc) +static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable) { QEMUFile *f; f = g_new0(QEMUFile, 1); - f->opaque = opaque; - f->ops = ops; - f->has_ioc = has_ioc; + object_ref(ioc); + f->ioc = ioc; + f->is_writable = is_writable; + return f; } +/* + * Result: QEMUFile* for a 'return path' for comms in the opposite direction + * NULL if not available + */ +QEMUFile *qemu_file_get_return_path(QEMUFile *f) +{ + return qemu_file_new_impl(f->ioc, !f->is_writable); +} + +QEMUFile *qemu_file_new_output(QIOChannel *ioc) +{ + return qemu_file_new_impl(ioc, true); +} + +QEMUFile *qemu_file_new_input(QIOChannel *ioc) +{ + return qemu_file_new_impl(ioc, false); +} void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks) { @@ -174,7 +195,7 @@ void qemu_file_set_error(QEMUFile *f, int ret) bool qemu_file_is_writable(QEMUFile *f) { - return f->ops->writev_buffer; + return f->is_writable; } static void qemu_iovec_release_ram(QEMUFile *f) @@ -212,6 +233,7 @@ static void qemu_iovec_release_ram(QEMUFile *f) memset(f->may_free, 0, sizeof(f->may_free)); } + /** * Flushes QEMUFile buffer * @@ -220,10 +242,6 @@ static void qemu_iovec_release_ram(QEMUFile *f) */ void qemu_fflush(QEMUFile *f) { - ssize_t ret = 0; - ssize_t expect = 0; - Error *local_error = NULL; - if (!qemu_file_is_writable(f)) { return; } @@ -232,22 +250,18 @@ void qemu_fflush(QEMUFile *f) return; } if (f->iovcnt > 0) { - expect = iov_size(f->iov, f->iovcnt); - ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos, - &local_error); + Error *local_error = NULL; + if (qio_channel_writev_all(f->ioc, + f->iov, f->iovcnt, + &local_error) < 0) { + qemu_file_set_error_obj(f, -EIO, local_error); + } else { + f->total_transferred += iov_size(f->iov, f->iovcnt); + } qemu_iovec_release_ram(f); } - if (ret >= 0) { - f->pos += ret; - } - /* We expect the QEMUFile write impl to send the full - * data set we requested, so sanity check that. - */ - if (ret != expect) { - qemu_file_set_error_obj(f, ret < 0 ? ret : -EIO, local_error); - } f->buf_index = 0; f->iovcnt = 0; } @@ -257,7 +271,7 @@ void ram_control_before_iterate(QEMUFile *f, uint64_t flags) int ret = 0; if (f->hooks && f->hooks->before_ram_iterate) { - ret = f->hooks->before_ram_iterate(f, f->opaque, flags, NULL); + ret = f->hooks->before_ram_iterate(f, flags, NULL); if (ret < 0) { qemu_file_set_error(f, ret); } @@ -269,7 +283,7 @@ void ram_control_after_iterate(QEMUFile *f, uint64_t flags) int ret = 0; if (f->hooks && f->hooks->after_ram_iterate) { - ret = f->hooks->after_ram_iterate(f, f->opaque, flags, NULL); + ret = f->hooks->after_ram_iterate(f, flags, NULL); if (ret < 0) { qemu_file_set_error(f, ret); } @@ -281,7 +295,7 @@ void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data) int ret = -EINVAL; if (f->hooks && f->hooks->hook_ram_load) { - ret = f->hooks->hook_ram_load(f, f->opaque, flags, data); + ret = f->hooks->hook_ram_load(f, flags, data); if (ret < 0) { qemu_file_set_error(f, ret); } @@ -301,16 +315,16 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, uint64_t *bytes_sent) { if (f->hooks && f->hooks->save_page) { - int ret = f->hooks->save_page(f, f->opaque, block_offset, + int ret = f->hooks->save_page(f, block_offset, offset, size, bytes_sent); if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { - f->bytes_xfer += size; + f->rate_limit_used += size; } if (ret != RAM_SAVE_CONTROL_DELAYED && ret != RAM_SAVE_CONTROL_NOT_SUPP) { if (bytes_sent && *bytes_sent > 0) { - qemu_update_position(f, *bytes_sent); + qemu_file_credit_transfer(f, *bytes_sent); } else if (ret < 0) { qemu_file_set_error(f, ret); } @@ -349,11 +363,25 @@ static ssize_t qemu_fill_buffer(QEMUFile *f) return 0; } - len = f->ops->get_buffer(f->opaque, f->buf + pending, f->pos, - IO_BUF_SIZE - pending, &local_error); + do { + len = qio_channel_read(f->ioc, + (char *)f->buf + pending, + IO_BUF_SIZE - pending, + &local_error); + if (len == QIO_CHANNEL_ERR_BLOCK) { + if (qemu_in_coroutine()) { + qio_channel_yield(f->ioc, G_IO_IN); + } else { + qio_channel_wait(f->ioc, G_IO_IN); + } + } else if (len < 0) { + len = -EIO; + } + } while (len == QIO_CHANNEL_ERR_BLOCK); + if (len > 0) { f->buf_size += len; - f->pos += len; + f->total_transferred += len; } else if (len == 0) { qemu_file_set_error_obj(f, -EIO, local_error); } else if (len != -EAGAIN) { @@ -365,9 +393,9 @@ static ssize_t qemu_fill_buffer(QEMUFile *f) return len; } -void qemu_update_position(QEMUFile *f, size_t size) +void qemu_file_credit_transfer(QEMUFile *f, size_t size) { - f->pos += size; + f->total_transferred += size; } /** Closes the file @@ -380,16 +408,16 @@ void qemu_update_position(QEMUFile *f, size_t size) */ int qemu_fclose(QEMUFile *f) { - int ret; + int ret, ret2; qemu_fflush(f); ret = qemu_file_get_error(f); - if (f->ops->close) { - int ret2 = f->ops->close(f->opaque, NULL); - if (ret >= 0) { - ret = ret2; - } + ret2 = qio_channel_close(f->ioc, NULL); + if (ret >= 0) { + ret = ret2; } + g_clear_pointer(&f->ioc, object_unref); + /* If any error was spotted before closing, we should report it * instead of the close() return value. */ @@ -457,7 +485,7 @@ void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size, return; } - f->bytes_xfer += size; + f->rate_limit_used += size; add_to_iovec(f, buf, size, may_free); } @@ -475,7 +503,7 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size) l = size; } memcpy(f->buf + f->buf_index, buf, l); - f->bytes_xfer += l; + f->rate_limit_used += l; add_buf_to_iovec(f, l); if (qemu_file_get_error(f)) { break; @@ -492,7 +520,7 @@ void qemu_put_byte(QEMUFile *f, int v) } f->buf[f->buf_index] = v; - f->bytes_xfer++; + f->rate_limit_used++; add_buf_to_iovec(f, 1); } @@ -648,9 +676,9 @@ int qemu_get_byte(QEMUFile *f) return result; } -int64_t qemu_ftell_fast(QEMUFile *f) +int64_t qemu_file_total_transferred_fast(QEMUFile *f) { - int64_t ret = f->pos; + int64_t ret = f->total_transferred; int i; for (i = 0; i < f->iovcnt; i++) { @@ -660,10 +688,10 @@ int64_t qemu_ftell_fast(QEMUFile *f) return ret; } -int64_t qemu_ftell(QEMUFile *f) +int64_t qemu_file_total_transferred(QEMUFile *f) { qemu_fflush(f); - return f->pos; + return f->total_transferred; } int qemu_file_rate_limit(QEMUFile *f) @@ -674,7 +702,7 @@ int qemu_file_rate_limit(QEMUFile *f) if (qemu_file_get_error(f)) { return 1; } - if (f->xfer_limit > 0 && f->bytes_xfer > f->xfer_limit) { + if (f->rate_limit_max > 0 && f->rate_limit_used > f->rate_limit_max) { return 1; } return 0; @@ -682,22 +710,22 @@ int qemu_file_rate_limit(QEMUFile *f) int64_t qemu_file_get_rate_limit(QEMUFile *f) { - return f->xfer_limit; + return f->rate_limit_max; } void qemu_file_set_rate_limit(QEMUFile *f, int64_t limit) { - f->xfer_limit = limit; + f->rate_limit_max = limit; } void qemu_file_reset_rate_limit(QEMUFile *f) { - f->bytes_xfer = 0; + f->rate_limit_used = 0; } -void qemu_file_update_transfer(QEMUFile *f, int64_t len) +void qemu_file_acct_rate_limit(QEMUFile *f, int64_t len) { - f->bytes_xfer += len; + f->rate_limit_used += len; } void qemu_put_be16(QEMUFile *f, unsigned int v) @@ -851,19 +879,18 @@ void qemu_put_counted_string(QEMUFile *f, const char *str) */ void qemu_file_set_blocking(QEMUFile *f, bool block) { - if (f->ops->set_blocking) { - f->ops->set_blocking(f->opaque, block, NULL); - } + qio_channel_set_blocking(f->ioc, block, NULL); } /* - * Return the ioc object if it's a migration channel. Note: it can return NULL - * for callers passing in a non-migration qemufile. E.g. see qemu_fopen_bdrv() - * and its usage in e.g. load_snapshot(). So we need to check against NULL - * before using it. If without the check, migration_incoming_state_destroy() - * could fail for load_snapshot(). + * qemu_file_get_ioc: + * + * Get the ioc object for the file, without incrementing + * the reference count. + * + * Returns: the ioc object */ QIOChannel *qemu_file_get_ioc(QEMUFile *file) { - return file->has_ioc ? QIO_CHANNEL(file->opaque) : NULL; + return file->ioc; } diff --git a/migration/qemu-file.h b/migration/qemu-file.h index 3f36d4dc8c..96e72d8bd8 100644 --- a/migration/qemu-file.h +++ b/migration/qemu-file.h @@ -29,47 +29,12 @@ #include "exec/cpu-common.h" #include "io/channel.h" -/* Read a chunk of data from a file at the given position. The pos argument - * can be ignored if the file is only be used for streaming. The number of - * bytes actually read should be returned. - */ -typedef ssize_t (QEMUFileGetBufferFunc)(void *opaque, uint8_t *buf, - int64_t pos, size_t size, - Error **errp); - -/* Close a file - * - * Return negative error number on error, 0 or positive value on success. - * - * The meaning of return value on success depends on the specific back-end being - * used. - */ -typedef int (QEMUFileCloseFunc)(void *opaque, Error **errp); - -/* Called to return the OS file descriptor associated to the QEMUFile. - */ -typedef int (QEMUFileGetFD)(void *opaque); - -/* Called to change the blocking mode of the file - */ -typedef int (QEMUFileSetBlocking)(void *opaque, bool enabled, Error **errp); - -/* - * This function writes an iovec to file. The handler must write all - * of the data or return a negative errno value. - */ -typedef ssize_t (QEMUFileWritevBufferFunc)(void *opaque, struct iovec *iov, - int iovcnt, int64_t pos, - Error **errp); - /* * This function provides hooks around different * stages of RAM migration. - * 'opaque' is the backend specific data in QEMUFile * 'data' is call specific data associated with the 'flags' value */ -typedef int (QEMURamHookFunc)(QEMUFile *f, void *opaque, uint64_t flags, - void *data); +typedef int (QEMURamHookFunc)(QEMUFile *f, uint64_t flags, void *data); /* * Constants used by ram_control_* hooks @@ -84,34 +49,11 @@ typedef int (QEMURamHookFunc)(QEMUFile *f, void *opaque, uint64_t flags, * This function allows override of where the RAM page * is saved (such as RDMA, for example.) */ -typedef size_t (QEMURamSaveFunc)(QEMUFile *f, void *opaque, - ram_addr_t block_offset, - ram_addr_t offset, - size_t size, - uint64_t *bytes_sent); - -/* - * Return a QEMUFile for comms in the opposite direction - */ -typedef QEMUFile *(QEMURetPathFunc)(void *opaque); - -/* - * Stop any read or write (depending on flags) on the underlying - * transport on the QEMUFile. - * Existing blocking reads/writes must be woken - * Returns 0 on success, -err on error - */ -typedef int (QEMUFileShutdownFunc)(void *opaque, bool rd, bool wr, - Error **errp); - -typedef struct QEMUFileOps { - QEMUFileGetBufferFunc *get_buffer; - QEMUFileCloseFunc *close; - QEMUFileSetBlocking *set_blocking; - QEMUFileWritevBufferFunc *writev_buffer; - QEMURetPathFunc *get_return_path; - QEMUFileShutdownFunc *shut_down; -} QEMUFileOps; +typedef size_t (QEMURamSaveFunc)(QEMUFile *f, + ram_addr_t block_offset, + ram_addr_t offset, + size_t size, + uint64_t *bytes_sent); typedef struct QEMUFileHooks { QEMURamHookFunc *before_ram_iterate; @@ -120,12 +62,41 @@ typedef struct QEMUFileHooks { QEMURamSaveFunc *save_page; } QEMUFileHooks; -QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops, bool has_ioc); +QEMUFile *qemu_file_new_input(QIOChannel *ioc); +QEMUFile *qemu_file_new_output(QIOChannel *ioc); void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks); -int qemu_get_fd(QEMUFile *f); int qemu_fclose(QEMUFile *f); -int64_t qemu_ftell(QEMUFile *f); -int64_t qemu_ftell_fast(QEMUFile *f); + +/* + * qemu_file_total_transferred: + * + * Report the total number of bytes transferred with + * this file. + * + * For writable files, any pending buffers will be + * flushed, so the reported value will be equal to + * the number of bytes transferred on the wire. + * + * For readable files, the reported value will be + * equal to the number of bytes transferred on the + * wire. + * + * Returns: the total bytes transferred + */ +int64_t qemu_file_total_transferred(QEMUFile *f); + +/* + * qemu_file_total_transferred_fast: + * + * As qemu_file_total_transferred except for writable + * files, where no flush is performed and the reported + * amount will include the size of any queued buffers, + * on top of the amount actually transferred. + * + * Returns: the total bytes transferred and queued + */ +int64_t qemu_file_total_transferred_fast(QEMUFile *f); + /* * put_buffer without copying the buffer. * The buffer should be available till it is sent asynchronously. @@ -150,9 +121,23 @@ int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src); */ int qemu_peek_byte(QEMUFile *f, int offset); void qemu_file_skip(QEMUFile *f, int size); -void qemu_update_position(QEMUFile *f, size_t size); +/* + * qemu_file_credit_transfer: + * + * Report on a number of bytes that have been transferred + * out of band from the main file object I/O methods. This + * accounting information tracks the total migration traffic. + */ +void qemu_file_credit_transfer(QEMUFile *f, size_t size); void qemu_file_reset_rate_limit(QEMUFile *f); -void qemu_file_update_transfer(QEMUFile *f, int64_t len); +/* + * qemu_file_acct_rate_limit: + * + * Report on a number of bytes the have been transferred + * out of band from the main file object I/O methods, and + * need to be applied to the rate limiting calcuations + */ +void qemu_file_acct_rate_limit(QEMUFile *f, int64_t len); void qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate); int64_t qemu_file_get_rate_limit(QEMUFile *f); int qemu_file_get_error_obj(QEMUFile *f, Error **errp); diff --git a/migration/ram.c b/migration/ram.c index 5f5e37f64d..01f9cc1d72 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -32,6 +32,7 @@ #include "qemu/bitmap.h" #include "qemu/madvise.h" #include "qemu/main-loop.h" +#include "io/channel-null.h" #include "xbzrle.h" #include "ram.h" #include "migration.h" @@ -457,8 +458,6 @@ static QemuThread *compress_threads; */ static QemuMutex comp_done_lock; static QemuCond comp_done_cond; -/* The empty QEMUFileOps will be used by file in CompressParam */ -static const QEMUFileOps empty_ops = { }; static QEMUFile *decomp_file; static DecompressParam *decomp_param; @@ -569,7 +568,8 @@ static int compress_threads_save_setup(void) /* comp_param[i].file is just used as a dummy buffer to save data, * set its ops to empty. */ - comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops, false); + comp_param[i].file = qemu_file_new_output( + QIO_CHANNEL(qio_channel_null_new())); comp_param[i].done = true; comp_param[i].quit = false; qemu_mutex_init(&comp_param[i].mutex); @@ -2300,7 +2300,7 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero) } else { ram_counters.normal += pages; ram_transferred_add(size); - qemu_update_position(f, size); + qemu_file_credit_transfer(f, size); } } diff --git a/migration/rdma.c b/migration/rdma.c index 672d1958a9..94a55dd95b 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -21,7 +21,6 @@ #include "migration.h" #include "qemu-file.h" #include "ram.h" -#include "qemu-file-channel.h" #include "qemu/error-report.h" #include "qemu/main-loop.h" #include "qemu/module.h" @@ -1371,30 +1370,6 @@ const char *print_wrid(int wrid) } /* - * RDMA requires memory registration (mlock/pinning), but this is not good for - * overcommitment. - * - * In preparation for the future where LRU information or workload-specific - * writable writable working set memory access behavior is available to QEMU - * it would be nice to have in place the ability to UN-register/UN-pin - * particular memory regions from the RDMA hardware when it is determine that - * those regions of memory will likely not be accessed again in the near future. - * - * While we do not yet have such information right now, the following - * compile-time option allows us to perform a non-optimized version of this - * behavior. - * - * By uncommenting this option, you will cause *all* RDMA transfers to be - * unregistered immediately after the transfer completes on both sides of the - * connection. This has no effect in 'rdma-pin-all' mode, only regular mode. - * - * This will have a terrible impact on migration performance, so until future - * workload information or LRU information is available, do not attempt to use - * this feature except for basic testing. - */ -/* #define RDMA_UNREGISTRATION_EXAMPLE */ - -/* * Perform a non-optimized memory unregistration after every transfer * for demonstration purposes, only if pin-all is not requested. * @@ -1487,34 +1462,6 @@ static uint64_t qemu_rdma_make_wrid(uint64_t wr_id, uint64_t index, } /* - * Set bit for unregistration in the next iteration. - * We cannot transmit right here, but will unpin later. - */ -static void qemu_rdma_signal_unregister(RDMAContext *rdma, uint64_t index, - uint64_t chunk, uint64_t wr_id) -{ - if (rdma->unregistrations[rdma->unregister_next] != 0) { - error_report("rdma migration: queue is full"); - } else { - RDMALocalBlock *block = &(rdma->local_ram_blocks.block[index]); - - if (!test_and_set_bit(chunk, block->unregister_bitmap)) { - trace_qemu_rdma_signal_unregister_append(chunk, - rdma->unregister_next); - - rdma->unregistrations[rdma->unregister_next++] = - qemu_rdma_make_wrid(wr_id, index, chunk); - - if (rdma->unregister_next == RDMA_SIGNALED_SEND_MAX) { - rdma->unregister_next = 0; - } - } else { - trace_qemu_rdma_signal_unregister_already(chunk); - } - } -} - -/* * Consult the connection manager to see a work request * (of any kind) has completed. * Return the work request ID that completed. @@ -1571,18 +1518,6 @@ static uint64_t qemu_rdma_poll(RDMAContext *rdma, struct ibv_cq *cq, if (rdma->nb_sent > 0) { rdma->nb_sent--; } - - if (!rdma->pin_all) { - /* - * FYI: If one wanted to signal a specific chunk to be unregistered - * using LRU or workload-specific information, this is the function - * you would call to do so. That chunk would then get asynchronously - * unregistered later. - */ -#ifdef RDMA_UNREGISTRATION_EXAMPLE - qemu_rdma_signal_unregister(rdma, index, chunk, wc.wr_id); -#endif - } } else { trace_qemu_rdma_poll_other(print_wrid(wr_id), wr_id, rdma->nb_sent); } @@ -2137,11 +2072,6 @@ retry: chunk_end = ram_chunk_end(block, chunk + chunks); - if (!rdma->pin_all) { -#ifdef RDMA_UNREGISTRATION_EXAMPLE - qemu_rdma_unregister_waiting(rdma); -#endif - } while (test_bit(chunk, block->transit_bitmap)) { (void)count; @@ -3278,33 +3208,17 @@ qio_channel_rdma_shutdown(QIOChannel *ioc, * Offset is an offset to be added to block_offset and used * to also lookup the corresponding RAMBlock. * - * @size > 0 : - * Initiate an transfer this size. - * - * @size == 0 : - * A 'hint' or 'advice' that means that we wish to speculatively - * and asynchronously unregister this memory. In this case, there is no - * guarantee that the unregister will actually happen, for example, - * if the memory is being actively transmitted. Additionally, the memory - * may be re-registered at any future time if a write within the same - * chunk was requested again, even if you attempted to unregister it - * here. - * - * @size < 0 : TODO, not yet supported - * Unregister the memory NOW. This means that the caller does not - * expect there to be any future RDMA transfers and we just want to clean - * things up. This is used in case the upper layer owns the memory and - * cannot wait for qemu_fclose() to occur. + * @size : Number of bytes to transfer * * @bytes_sent : User-specificed pointer to indicate how many bytes were * sent. Usually, this will not be more than a few bytes of * the protocol because most transfers are sent asynchronously. */ -static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque, +static size_t qemu_rdma_save_page(QEMUFile *f, ram_addr_t block_offset, ram_addr_t offset, size_t size, uint64_t *bytes_sent) { - QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque); + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f)); RDMAContext *rdma; int ret; @@ -3323,61 +3237,27 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque, qemu_fflush(f); - if (size > 0) { - /* - * Add this page to the current 'chunk'. If the chunk - * is full, or the page doesn't belong to the current chunk, - * an actual RDMA write will occur and a new chunk will be formed. - */ - ret = qemu_rdma_write(f, rdma, block_offset, offset, size); - if (ret < 0) { - error_report("rdma migration: write error! %d", ret); - goto err; - } - - /* - * We always return 1 bytes because the RDMA - * protocol is completely asynchronous. We do not yet know - * whether an identified chunk is zero or not because we're - * waiting for other pages to potentially be merged with - * the current chunk. So, we have to call qemu_update_position() - * later on when the actual write occurs. - */ - if (bytes_sent) { - *bytes_sent = 1; - } - } else { - uint64_t index, chunk; - - /* TODO: Change QEMUFileOps prototype to be signed: size_t => long - if (size < 0) { - ret = qemu_rdma_drain_cq(f, rdma); - if (ret < 0) { - fprintf(stderr, "rdma: failed to synchronously drain" - " completion queue before unregistration.\n"); - goto err; - } - } - */ - - ret = qemu_rdma_search_ram_block(rdma, block_offset, - offset, size, &index, &chunk); - - if (ret) { - error_report("ram block search failed"); - goto err; - } - - qemu_rdma_signal_unregister(rdma, index, chunk, 0); + /* + * Add this page to the current 'chunk'. If the chunk + * is full, or the page doesn't belong to the current chunk, + * an actual RDMA write will occur and a new chunk will be formed. + */ + ret = qemu_rdma_write(f, rdma, block_offset, offset, size); + if (ret < 0) { + error_report("rdma migration: write error! %d", ret); + goto err; + } - /* - * TODO: Synchronous, guaranteed unregistration (should not occur during - * fast-path). Otherwise, unregisters will process on the next call to - * qemu_rdma_drain_cq() - if (size < 0) { - qemu_rdma_unregister_waiting(rdma); - } - */ + /* + * We always return 1 bytes because the RDMA + * protocol is completely asynchronous. We do not yet know + * whether an identified chunk is zero or not because we're + * waiting for other pages to potentially be merged with + * the current chunk. So, we have to call qemu_update_position() + * later on when the actual write occurs. + */ + if (bytes_sent) { + *bytes_sent = 1; } /* @@ -3950,14 +3830,15 @@ rdma_block_notification_handle(QIOChannelRDMA *rioc, const char *name) return 0; } -static int rdma_load_hook(QEMUFile *f, void *opaque, uint64_t flags, void *data) +static int rdma_load_hook(QEMUFile *f, uint64_t flags, void *data) { + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f)); switch (flags) { case RAM_CONTROL_BLOCK_REG: - return rdma_block_notification_handle(opaque, data); + return rdma_block_notification_handle(rioc, data); case RAM_CONTROL_HOOK: - return qemu_rdma_registration_handle(f, opaque); + return qemu_rdma_registration_handle(f, rioc); default: /* Shouldn't be called with any other values */ @@ -3965,10 +3846,10 @@ static int rdma_load_hook(QEMUFile *f, void *opaque, uint64_t flags, void *data) } } -static int qemu_rdma_registration_start(QEMUFile *f, void *opaque, +static int qemu_rdma_registration_start(QEMUFile *f, uint64_t flags, void *data) { - QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque); + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f)); RDMAContext *rdma; RCU_READ_LOCK_GUARD(); @@ -3994,10 +3875,10 @@ static int qemu_rdma_registration_start(QEMUFile *f, void *opaque, * Inform dest that dynamic registrations are done for now. * First, flush writes, if any. */ -static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque, +static int qemu_rdma_registration_stop(QEMUFile *f, uint64_t flags, void *data) { - QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque); + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f)); RDMAContext *rdma; RDMAControlHeader head = { .len = 0, .repeat = 1 }; int ret = 0; @@ -4170,12 +4051,12 @@ static QEMUFile *qemu_fopen_rdma(RDMAContext *rdma, const char *mode) rioc = QIO_CHANNEL_RDMA(object_new(TYPE_QIO_CHANNEL_RDMA)); if (mode[0] == 'w') { - rioc->file = qemu_fopen_channel_output(QIO_CHANNEL(rioc)); + rioc->file = qemu_file_new_output(QIO_CHANNEL(rioc)); rioc->rdmaout = rdma; rioc->rdmain = rdma->return_path; qemu_file_set_hooks(rioc->file, &rdma_write_hooks); } else { - rioc->file = qemu_fopen_channel_input(QIO_CHANNEL(rioc)); + rioc->file = qemu_file_new_input(QIO_CHANNEL(rioc)); rioc->rdmain = rdma; rioc->rdmaout = rdma->return_path; qemu_file_set_hooks(rioc->file, &rdma_read_hooks); diff --git a/migration/savevm.c b/migration/savevm.c index d9076897b8..e8a1b96fcd 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -35,8 +35,8 @@ #include "migration/misc.h" #include "migration/register.h" #include "migration/global_state.h" +#include "migration/channel-block.h" #include "ram.h" -#include "qemu-file-channel.h" #include "qemu-file.h" #include "savevm.h" #include "postcopy-ram.h" @@ -130,48 +130,13 @@ static struct mig_cmd_args { /***********************************************************/ /* savevm/loadvm support */ -static ssize_t block_writev_buffer(void *opaque, struct iovec *iov, int iovcnt, - int64_t pos, Error **errp) -{ - int ret; - QEMUIOVector qiov; - - qemu_iovec_init_external(&qiov, iov, iovcnt); - ret = bdrv_writev_vmstate(opaque, &qiov, pos); - if (ret < 0) { - return ret; - } - - return qiov.size; -} - -static ssize_t block_get_buffer(void *opaque, uint8_t *buf, int64_t pos, - size_t size, Error **errp) -{ - return bdrv_load_vmstate(opaque, buf, pos, size); -} - -static int bdrv_fclose(void *opaque, Error **errp) -{ - return bdrv_flush(opaque); -} - -static const QEMUFileOps bdrv_read_ops = { - .get_buffer = block_get_buffer, - .close = bdrv_fclose -}; - -static const QEMUFileOps bdrv_write_ops = { - .writev_buffer = block_writev_buffer, - .close = bdrv_fclose -}; - static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable) { if (is_writable) { - return qemu_fopen_ops(bs, &bdrv_write_ops, false); + return qemu_file_new_output(QIO_CHANNEL(qio_channel_block_new(bs))); + } else { + return qemu_file_new_input(QIO_CHANNEL(qio_channel_block_new(bs))); } - return qemu_fopen_ops(bs, &bdrv_read_ops, false); } @@ -916,9 +881,9 @@ static void vmstate_save_old_style(QEMUFile *f, SaveStateEntry *se, { int64_t old_offset, size; - old_offset = qemu_ftell_fast(f); + old_offset = qemu_file_total_transferred_fast(f); se->ops->save_state(f, se->opaque); - size = qemu_ftell_fast(f) - old_offset; + size = qemu_file_total_transferred_fast(f) - old_offset; if (vmdesc) { json_writer_int64(vmdesc, "size", size); @@ -2193,7 +2158,7 @@ static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis) bioc->usage += length; trace_loadvm_handle_cmd_packaged_received(ret); - QEMUFile *packf = qemu_fopen_channel_input(QIO_CHANNEL(bioc)); + QEMUFile *packf = qemu_file_new_input(QIO_CHANNEL(bioc)); ret = qemu_loadvm_state_main(packf, mis); trace_loadvm_handle_cmd_packaged_main(ret); @@ -2887,7 +2852,7 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, goto the_end; } ret = qemu_savevm_state(f, errp); - vm_state_size = qemu_ftell(f); + vm_state_size = qemu_file_total_transferred(f); ret2 = qemu_fclose(f); if (ret < 0) { goto the_end; @@ -2951,7 +2916,7 @@ void qmp_xen_save_devices_state(const char *filename, bool has_live, bool live, goto the_end; } qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-save-state"); - f = qemu_fopen_channel_output(QIO_CHANNEL(ioc)); + f = qemu_file_new_output(QIO_CHANNEL(ioc)); object_unref(OBJECT(ioc)); ret = qemu_save_device_state(f); if (ret < 0 || qemu_fclose(f) < 0) { @@ -2998,7 +2963,7 @@ void qmp_xen_load_devices_state(const char *filename, Error **errp) return; } qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-load-state"); - f = qemu_fopen_channel_input(QIO_CHANNEL(ioc)); + f = qemu_file_new_input(QIO_CHANNEL(ioc)); object_unref(OBJECT(ioc)); ret = qemu_loadvm_state(f); diff --git a/migration/vmstate.c b/migration/vmstate.c index 36ae8b9e19..924494bda3 100644 --- a/migration/vmstate.c +++ b/migration/vmstate.c @@ -360,7 +360,7 @@ int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd, void *curr_elem = first_elem + size * i; vmsd_desc_field_start(vmsd, vmdesc_loop, field, i, n_elems); - old_offset = qemu_ftell_fast(f); + old_offset = qemu_file_total_transferred_fast(f); if (field->flags & VMS_ARRAY_OF_POINTER) { assert(curr_elem); curr_elem = *(void **)curr_elem; @@ -390,7 +390,8 @@ int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd, return ret; } - written_bytes = qemu_ftell_fast(f) - old_offset; + written_bytes = qemu_file_total_transferred_fast(f) - + old_offset; vmsd_desc_field_end(vmsd, vmdesc_loop, field, written_bytes, i); /* Compressed arrays only care about the first element */ diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c index 47a27326ee..ca98df0495 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c @@ -1311,12 +1311,6 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) p->has_multifd_zstd_level = true; visit_type_uint8(v, param, &p->multifd_zstd_level, &err); break; -#ifdef CONFIG_LINUX - case MIGRATION_PARAMETER_ZERO_COPY_SEND: - p->has_zero_copy_send = true; - visit_type_bool(v, param, &p->zero_copy_send, &err); - break; -#endif case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE: p->has_xbzrle_cache_size = true; if (!visit_type_size(v, param, &cache_size, &err)) { diff --git a/qapi/migration.json b/qapi/migration.json index 6130cd9fae..7102e474a6 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -461,6 +461,13 @@ # procedure starts. The VM RAM is saved with running VM. # (since 6.0) # +# @zero-copy-send: Controls behavior on sending memory pages on migration. +# When true, enables a zero-copy mechanism for sending +# memory pages, if host supports it. +# Requires that QEMU be permitted to use locked memory +# for guest RAM pages. +# (since 7.1) +# # Features: # @unstable: Members @x-colo and @x-ignore-shared are experimental. # @@ -474,7 +481,8 @@ 'block', 'return-path', 'pause-before-switchover', 'multifd', 'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate', { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] }, - 'validate-uuid', 'background-snapshot'] } + 'validate-uuid', 'background-snapshot', + 'zero-copy-send'] } ## # @MigrationCapabilityStatus: @@ -738,12 +746,6 @@ # will consume more CPU. # Defaults to 1. (Since 5.0) # -# @zero-copy-send: Controls behavior on sending memory pages on migration. -# When true, enables a zero-copy mechanism for sending -# memory pages, if host supports it. -# Requires that QEMU be permitted to use locked memory -# for guest RAM pages. -# Defaults to false. (Since 7.1) # # @block-bitmap-mapping: Maps block nodes and bitmaps on them to # aliases for the purpose of dirty bitmap migration. Such @@ -784,7 +786,6 @@ 'xbzrle-cache-size', 'max-postcopy-bandwidth', 'max-cpu-throttle', 'multifd-compression', 'multifd-zlib-level' ,'multifd-zstd-level', - { 'name': 'zero-copy-send', 'if' : 'CONFIG_LINUX'}, 'block-bitmap-mapping' ] } ## @@ -911,13 +912,6 @@ # will consume more CPU. # Defaults to 1. (Since 5.0) # -# @zero-copy-send: Controls behavior on sending memory pages on migration. -# When true, enables a zero-copy mechanism for sending -# memory pages, if host supports it. -# Requires that QEMU be permitted to use locked memory -# for guest RAM pages. -# Defaults to false. (Since 7.1) -# # @block-bitmap-mapping: Maps block nodes and bitmaps on them to # aliases for the purpose of dirty bitmap migration. Such # aliases may for example be the corresponding names on the @@ -972,7 +966,6 @@ '*multifd-compression': 'MultiFDCompression', '*multifd-zlib-level': 'uint8', '*multifd-zstd-level': 'uint8', - '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } ## @@ -1119,13 +1112,6 @@ # will consume more CPU. # Defaults to 1. (Since 5.0) # -# @zero-copy-send: Controls behavior on sending memory pages on migration. -# When true, enables a zero-copy mechanism for sending -# memory pages, if host supports it. -# Requires that QEMU be permitted to use locked memory -# for guest RAM pages. -# Defaults to false. (Since 7.1) -# # @block-bitmap-mapping: Maps block nodes and bitmaps on them to # aliases for the purpose of dirty bitmap migration. Such # aliases may for example be the corresponding names on the @@ -1178,7 +1164,6 @@ '*multifd-compression': 'MultiFDCompression', '*multifd-zlib-level': 'uint8', '*multifd-zstd-level': 'uint8', - '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } ## diff --git a/softmmu/physmem.c b/softmmu/physmem.c index fb16be57a6..dc3c3e5f2e 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -669,7 +669,7 @@ void tcg_iommu_init_notifier_list(CPUState *cpu) /* Called from RCU critical section */ MemoryRegionSection * -address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr, +address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr orig_addr, hwaddr *xlat, hwaddr *plen, MemTxAttrs attrs, int *prot) { @@ -678,6 +678,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr, IOMMUMemoryRegionClass *imrc; IOMMUTLBEntry iotlb; int iommu_idx; + hwaddr addr = orig_addr; AddressSpaceDispatch *d = qatomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch); @@ -722,6 +723,16 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr, return section; translate_fail: + /* + * We should be given a page-aligned address -- certainly + * tlb_set_page_with_attrs() does so. The page offset of xlat + * is used to index sections[], and PHYS_SECTION_UNASSIGNED = 0. + * The page portion of xlat will be logged by memory_region_access_valid() + * when this memory access is rejected, so use the original untranslated + * physical address. + */ + assert((orig_addr & ~TARGET_PAGE_MASK) == 0); + *xlat = orig_addr; return &d->map.sections[PHYS_SECTION_UNASSIGNED]; } diff --git a/target/avr/cpu.c b/target/avr/cpu.c index 5d70e34dd5..05b992ff73 100644 --- a/target/avr/cpu.c +++ b/target/avr/cpu.c @@ -214,7 +214,6 @@ static void avr_cpu_class_init(ObjectClass *oc, void *data) cc->has_work = avr_cpu_has_work; cc->dump_state = avr_cpu_dump_state; cc->set_pc = avr_cpu_set_pc; - cc->memory_rw_debug = avr_cpu_memory_rw_debug; dc->vmsd = &vms_avr_cpu; cc->sysemu_ops = &avr_sysemu_ops; cc->disas_set_info = avr_cpu_disas_set_info; diff --git a/target/avr/cpu.h b/target/avr/cpu.h index d304f33301..96419c0c2b 100644 --- a/target/avr/cpu.h +++ b/target/avr/cpu.h @@ -184,8 +184,6 @@ void avr_cpu_tcg_init(void); void avr_cpu_list(void); int cpu_avr_exec(CPUState *cpu); -int avr_cpu_memory_rw_debug(CPUState *cs, vaddr address, uint8_t *buf, - int len, bool is_write); enum { TB_FLAGS_FULL_ACCESS = 1, diff --git a/target/avr/helper.c b/target/avr/helper.c index c27f702901..db76452f9a 100644 --- a/target/avr/helper.c +++ b/target/avr/helper.c @@ -93,12 +93,6 @@ void avr_cpu_do_interrupt(CPUState *cs) cs->exception_index = -1; } -int avr_cpu_memory_rw_debug(CPUState *cs, vaddr addr, uint8_t *buf, - int len, bool is_write) -{ - return cpu_memory_rw_debug(cs, addr, buf, len, is_write); -} - hwaddr avr_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) { return addr; /* I assume 1:1 address correspondence */ diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index 0f891afa04..c16cb8dbe7 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -7186,6 +7186,9 @@ static void ppc_cpu_reset(DeviceState *dev) } pmu_update_summaries(env); } + + /* clean any pending stop state */ + env->resume_as_sreset = 0; #endif hreg_compute_hflags(env); env->reserve_addr = (target_ulong)-1ULL; diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index fed0ce420a..7ab6beadad 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -464,7 +464,7 @@ static void do_fpscr_check_status(CPUPPCState *env, uintptr_t raddr) } cs->exception_index = POWERPC_EXCP_PROGRAM; env->error_code = error | POWERPC_EXCP_FP; - env->fpscr |= error ? FP_FEX : 0; + env->fpscr |= FP_FEX; /* Deferred floating-point exception after target FPSCR update */ if (fp_exceptions_enabled(env)) { raise_exception_err_ra(env, cs->exception_index, diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 6233e28d85..d627cfe6ed 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -175,6 +175,14 @@ DEF_HELPER_FLAGS_3(VMULOSW, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(VMULOUB, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(VMULOUH, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(VMULOUW, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(VDIVSQ, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(VDIVUQ, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(VDIVESD, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(VDIVEUD, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(VDIVESQ, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(VDIVEUQ, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(VMODSQ, TCG_CALL_NO_RWG, void, avr, avr, avr) +DEF_HELPER_FLAGS_3(VMODUQ, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(vslo, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(vsro, TCG_CALL_NO_RWG, void, avr, avr, avr) DEF_HELPER_FLAGS_3(vsrv, TCG_CALL_NO_RWG, void, avr, avr, avr) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 18a94fa3b5..6ea48d5163 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -786,3 +786,26 @@ XVF64GERPP 111011 ... -- .... 0 ..... 00111010 ..- @XX3_at xa=%xx_xa_pair XVF64GERPN 111011 ... -- .... 0 ..... 10111010 ..- @XX3_at xa=%xx_xa_pair XVF64GERNP 111011 ... -- .... 0 ..... 01111010 ..- @XX3_at xa=%xx_xa_pair XVF64GERNN 111011 ... -- .... 0 ..... 11111010 ..- @XX3_at xa=%xx_xa_pair + +## Vector Division Instructions + +VDIVSW 000100 ..... ..... ..... 00110001011 @VX +VDIVUW 000100 ..... ..... ..... 00010001011 @VX +VDIVSD 000100 ..... ..... ..... 00111001011 @VX +VDIVUD 000100 ..... ..... ..... 00011001011 @VX +VDIVSQ 000100 ..... ..... ..... 00100001011 @VX +VDIVUQ 000100 ..... ..... ..... 00000001011 @VX + +VDIVESW 000100 ..... ..... ..... 01110001011 @VX +VDIVEUW 000100 ..... ..... ..... 01010001011 @VX +VDIVESD 000100 ..... ..... ..... 01111001011 @VX +VDIVEUD 000100 ..... ..... ..... 01011001011 @VX +VDIVESQ 000100 ..... ..... ..... 01100001011 @VX +VDIVEUQ 000100 ..... ..... ..... 01000001011 @VX + +VMODSW 000100 ..... ..... ..... 11110001011 @VX +VMODUW 000100 ..... ..... ..... 11010001011 @VX +VMODSD 000100 ..... ..... ..... 11111001011 @VX +VMODUD 000100 ..... ..... ..... 11011001011 @VX +VMODSQ 000100 ..... ..... ..... 11100001011 @VX +VMODUQ 000100 ..... ..... ..... 11000001011 @VX diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 105b626d1b..3ae03f73d3 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -789,7 +789,7 @@ static int64_t ger_rank8(uint32_t a, uint32_t b, uint32_t mask) int64_t psum = 0; for (int i = 0; i < 8; i++, mask >>= 1) { if (mask & 1) { - psum += sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4); + psum += (int64_t)sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4); } } return psum; @@ -811,7 +811,8 @@ static int64_t ger_rank2(uint32_t a, uint32_t b, uint32_t mask) int64_t psum = 0; for (int i = 0; i < 2; i++, mask >>= 1) { if (mask & 1) { - psum += sextract32(a, 16 * i, 16) * sextract32(b, 16 * i, 16); + psum += (int64_t)sextract32(a, 16 * i, 16) * + sextract32(b, 16 * i, 16); } } return psum; @@ -1162,6 +1163,112 @@ void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv, *t = tmp; } +void helper_VDIVSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) +{ + Int128 neg1 = int128_makes64(-1); + Int128 int128_min = int128_make128(0, INT64_MIN); + if (likely(int128_nz(b->s128) && + (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) { + t->s128 = int128_divs(a->s128, b->s128); + } else { + t->s128 = a->s128; /* Undefined behavior */ + } +} + +void helper_VDIVUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) +{ + if (int128_nz(b->s128)) { + t->s128 = int128_divu(a->s128, b->s128); + } else { + t->s128 = a->s128; /* Undefined behavior */ + } +} + +void helper_VDIVESD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) +{ + int i; + int64_t high; + uint64_t low; + for (i = 0; i < 2; i++) { + high = a->s64[i]; + low = 0; + if (unlikely((high == INT64_MIN && b->s64[i] == -1) || !b->s64[i])) { + t->s64[i] = a->s64[i]; /* Undefined behavior */ + } else { + divs128(&low, &high, b->s64[i]); + t->s64[i] = low; + } + } +} + +void helper_VDIVEUD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) +{ + int i; + uint64_t high, low; + for (i = 0; i < 2; i++) { + high = a->u64[i]; + low = 0; + if (unlikely(!b->u64[i])) { + t->u64[i] = a->u64[i]; /* Undefined behavior */ + } else { + divu128(&low, &high, b->u64[i]); + t->u64[i] = low; + } + } +} + +void helper_VDIVESQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) +{ + Int128 high, low; + Int128 int128_min = int128_make128(0, INT64_MIN); + Int128 neg1 = int128_makes64(-1); + + high = a->s128; + low = int128_zero(); + if (unlikely(!int128_nz(b->s128) || + (int128_eq(b->s128, neg1) && int128_eq(high, int128_min)))) { + t->s128 = a->s128; /* Undefined behavior */ + } else { + divs256(&low, &high, b->s128); + t->s128 = low; + } +} + +void helper_VDIVEUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) +{ + Int128 high, low; + + high = a->s128; + low = int128_zero(); + if (unlikely(!int128_nz(b->s128))) { + t->s128 = a->s128; /* Undefined behavior */ + } else { + divu256(&low, &high, b->s128); + t->s128 = low; + } +} + +void helper_VMODSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) +{ + Int128 neg1 = int128_makes64(-1); + Int128 int128_min = int128_make128(0, INT64_MIN); + if (likely(int128_nz(b->s128) && + (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) { + t->s128 = int128_rems(a->s128, b->s128); + } else { + t->s128 = int128_zero(); /* Undefined behavior */ + } +} + +void helper_VMODUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b) +{ + if (likely(int128_nz(b->s128))) { + t->s128 = int128_remu(a->s128, b->s128); + } else { + t->s128 = int128_zero(); /* Undefined behavior */ + } +} + void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) { ppc_avr_t result; @@ -1307,14 +1414,13 @@ XXGENPCV(XXGENPCVDM, 8) #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)]) #define VBPERMD_INDEX(i) (i) #define VBPERMQ_DW(index) (((index) & 0x40) != 0) -#define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1)) #else #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)]) #define VBPERMD_INDEX(i) (1 - i) #define VBPERMQ_DW(index) (((index) & 0x40) == 0) -#define EXTRACT_BIT(avr, i, index) \ - (extract64((avr)->u64[1 - i], 63 - index, 1)) #endif +#define EXTRACT_BIT(avr, i, index) \ + (extract64((avr)->VsrD(i), 63 - index, 1)) void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) { diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index d7524c3204..0b563bed37 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -3238,6 +3238,157 @@ TRANS(VMULHSD, do_vx_mulh, true , do_vx_vmulhd_i64) TRANS(VMULHUW, do_vx_mulh, false, do_vx_vmulhw_i64) TRANS(VMULHUD, do_vx_mulh, false, do_vx_vmulhd_i64) +static bool do_vdiv_vmod(DisasContext *ctx, arg_VX *a, const int vece, + void (*func_32)(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b), + void (*func_64)(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)) +{ + const GVecGen3 op = { + .fni4 = func_32, + .fni8 = func_64, + .vece = vece + }; + + REQUIRE_VECTOR(ctx); + + tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra), + avr_full_offset(a->vrb), 16, 16, &op); + + return true; +} + +#define DIVU32(NAME, DIV) \ +static void NAME(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) \ +{ \ + TCGv_i32 zero = tcg_constant_i32(0); \ + TCGv_i32 one = tcg_constant_i32(1); \ + tcg_gen_movcond_i32(TCG_COND_EQ, b, b, zero, one, b); \ + DIV(t, a, b); \ +} + +#define DIVS32(NAME, DIV) \ +static void NAME(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) \ +{ \ + TCGv_i32 t0 = tcg_temp_new_i32(); \ + TCGv_i32 t1 = tcg_temp_new_i32(); \ + tcg_gen_setcondi_i32(TCG_COND_EQ, t0, a, INT32_MIN); \ + tcg_gen_setcondi_i32(TCG_COND_EQ, t1, b, -1); \ + tcg_gen_and_i32(t0, t0, t1); \ + tcg_gen_setcondi_i32(TCG_COND_EQ, t1, b, 0); \ + tcg_gen_or_i32(t0, t0, t1); \ + tcg_gen_movi_i32(t1, 0); \ + tcg_gen_movcond_i32(TCG_COND_NE, b, t0, t1, t0, b); \ + DIV(t, a, b); \ + tcg_temp_free_i32(t0); \ + tcg_temp_free_i32(t1); \ +} + +#define DIVU64(NAME, DIV) \ +static void NAME(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) \ +{ \ + TCGv_i64 zero = tcg_constant_i64(0); \ + TCGv_i64 one = tcg_constant_i64(1); \ + tcg_gen_movcond_i64(TCG_COND_EQ, b, b, zero, one, b); \ + DIV(t, a, b); \ +} + +#define DIVS64(NAME, DIV) \ +static void NAME(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) \ +{ \ + TCGv_i64 t0 = tcg_temp_new_i64(); \ + TCGv_i64 t1 = tcg_temp_new_i64(); \ + tcg_gen_setcondi_i64(TCG_COND_EQ, t0, a, INT64_MIN); \ + tcg_gen_setcondi_i64(TCG_COND_EQ, t1, b, -1); \ + tcg_gen_and_i64(t0, t0, t1); \ + tcg_gen_setcondi_i64(TCG_COND_EQ, t1, b, 0); \ + tcg_gen_or_i64(t0, t0, t1); \ + tcg_gen_movi_i64(t1, 0); \ + tcg_gen_movcond_i64(TCG_COND_NE, b, t0, t1, t0, b); \ + DIV(t, a, b); \ + tcg_temp_free_i64(t0); \ + tcg_temp_free_i64(t1); \ +} + +DIVS32(do_divsw, tcg_gen_div_i32) +DIVU32(do_divuw, tcg_gen_divu_i32) +DIVS64(do_divsd, tcg_gen_div_i64) +DIVU64(do_divud, tcg_gen_divu_i64) + +TRANS_FLAGS2(ISA310, VDIVSW, do_vdiv_vmod, MO_32, do_divsw, NULL) +TRANS_FLAGS2(ISA310, VDIVUW, do_vdiv_vmod, MO_32, do_divuw, NULL) +TRANS_FLAGS2(ISA310, VDIVSD, do_vdiv_vmod, MO_64, NULL, do_divsd) +TRANS_FLAGS2(ISA310, VDIVUD, do_vdiv_vmod, MO_64, NULL, do_divud) +TRANS_FLAGS2(ISA310, VDIVSQ, do_vx_helper, gen_helper_VDIVSQ) +TRANS_FLAGS2(ISA310, VDIVUQ, do_vx_helper, gen_helper_VDIVUQ) + +static void do_dives_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i64 val1, val2; + + val1 = tcg_temp_new_i64(); + val2 = tcg_temp_new_i64(); + + tcg_gen_ext_i32_i64(val1, a); + tcg_gen_ext_i32_i64(val2, b); + + /* (a << 32)/b */ + tcg_gen_shli_i64(val1, val1, 32); + tcg_gen_div_i64(val1, val1, val2); + + /* if quotient doesn't fit in 32 bits the result is undefined */ + tcg_gen_extrl_i64_i32(t, val1); + + tcg_temp_free_i64(val1); + tcg_temp_free_i64(val2); +} + +static void do_diveu_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i64 val1, val2; + + val1 = tcg_temp_new_i64(); + val2 = tcg_temp_new_i64(); + + tcg_gen_extu_i32_i64(val1, a); + tcg_gen_extu_i32_i64(val2, b); + + /* (a << 32)/b */ + tcg_gen_shli_i64(val1, val1, 32); + tcg_gen_divu_i64(val1, val1, val2); + + /* if quotient doesn't fit in 32 bits the result is undefined */ + tcg_gen_extrl_i64_i32(t, val1); + + tcg_temp_free_i64(val1); + tcg_temp_free_i64(val2); +} + +DIVS32(do_divesw, do_dives_i32) +DIVU32(do_diveuw, do_diveu_i32) + +DIVS32(do_modsw, tcg_gen_rem_i32) +DIVU32(do_moduw, tcg_gen_remu_i32) +DIVS64(do_modsd, tcg_gen_rem_i64) +DIVU64(do_modud, tcg_gen_remu_i64) + +TRANS_FLAGS2(ISA310, VDIVESW, do_vdiv_vmod, MO_32, do_divesw, NULL) +TRANS_FLAGS2(ISA310, VDIVEUW, do_vdiv_vmod, MO_32, do_diveuw, NULL) +TRANS_FLAGS2(ISA310, VDIVESD, do_vx_helper, gen_helper_VDIVESD) +TRANS_FLAGS2(ISA310, VDIVEUD, do_vx_helper, gen_helper_VDIVEUD) +TRANS_FLAGS2(ISA310, VDIVESQ, do_vx_helper, gen_helper_VDIVESQ) +TRANS_FLAGS2(ISA310, VDIVEUQ, do_vx_helper, gen_helper_VDIVEUQ) + +TRANS_FLAGS2(ISA310, VMODSW, do_vdiv_vmod, MO_32, do_modsw , NULL) +TRANS_FLAGS2(ISA310, VMODUW, do_vdiv_vmod, MO_32, do_moduw, NULL) +TRANS_FLAGS2(ISA310, VMODSD, do_vdiv_vmod, MO_64, NULL, do_modsd) +TRANS_FLAGS2(ISA310, VMODUD, do_vdiv_vmod, MO_64, NULL, do_modud) +TRANS_FLAGS2(ISA310, VMODSQ, do_vx_helper, gen_helper_VMODSQ) +TRANS_FLAGS2(ISA310, VMODUQ, do_vx_helper, gen_helper_VMODUQ) + +#undef DIVS32 +#undef DIVU32 +#undef DIVS64 +#undef DIVU64 + #undef GEN_VR_LDX #undef GEN_VR_STX #undef GEN_VR_LVE diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index de4483e43b..1cbd047ab3 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -371,6 +371,8 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) #define MULHWU XO31( 11) #define DIVW XO31(491) #define DIVWU XO31(459) +#define MODSW XO31(779) +#define MODUW XO31(267) #define CMP XO31( 0) #define CMPL XO31( 32) #define LHBRX XO31(790) @@ -403,6 +405,8 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) #define MULHDU XO31( 9) #define DIVD XO31(489) #define DIVDU XO31(457) +#define MODSD XO31(777) +#define MODUD XO31(265) #define LBZX XO31( 87) #define LHZX XO31(279) @@ -2806,6 +2810,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2])); break; + case INDEX_op_rem_i32: + tcg_out32(s, MODSW | TAB(args[0], args[1], args[2])); + break; + + case INDEX_op_remu_i32: + tcg_out32(s, MODUW | TAB(args[0], args[1], args[2])); + break; + case INDEX_op_shl_i32: if (const_args[2]) { /* Limit immediate shift count lest we create an illegal insn. */ @@ -2947,6 +2959,12 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_divu_i64: tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2])); break; + case INDEX_op_rem_i64: + tcg_out32(s, MODSD | TAB(args[0], args[1], args[2])); + break; + case INDEX_op_remu_i64: + tcg_out32(s, MODUD | TAB(args[0], args[1], args[2])); + break; case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, args, false); @@ -3722,6 +3740,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_div_i32: case INDEX_op_divu_i32: + case INDEX_op_rem_i32: + case INDEX_op_remu_i32: case INDEX_op_nand_i32: case INDEX_op_nor_i32: case INDEX_op_muluh_i32: @@ -3732,6 +3752,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_nor_i64: case INDEX_op_div_i64: case INDEX_op_divu_i64: + case INDEX_op_rem_i64: + case INDEX_op_remu_i64: case INDEX_op_mulsh_i64: case INDEX_op_muluh_i64: return C_O1_I2(r, r, r); diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index e6cf72503f..b5cd225cfa 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -83,7 +83,7 @@ extern bool have_vsx; /* optional instructions */ #define TCG_TARGET_HAS_div_i32 1 -#define TCG_TARGET_HAS_rem_i32 0 +#define TCG_TARGET_HAS_rem_i32 have_isa_3_00 #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_ext8s_i32 1 #define TCG_TARGET_HAS_ext16s_i32 1 @@ -117,7 +117,7 @@ extern bool have_vsx; #define TCG_TARGET_HAS_extrl_i64_i32 0 #define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_div_i64 1 -#define TCG_TARGET_HAS_rem_i64 0 +#define TCG_TARGET_HAS_rem_i64 have_isa_3_00 #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_ext8s_i64 1 #define TCG_TARGET_HAS_ext16s_i64 1 diff --git a/tests/avocado/boot_linux_console.py b/tests/avocado/boot_linux_console.py index 45a2ceda22..6b1533c17c 100644 --- a/tests/avocado/boot_linux_console.py +++ b/tests/avocado/boot_linux_console.py @@ -1043,49 +1043,6 @@ class BootLinuxConsole(LinuxKernelTest): self.vm.add_args('-dtb', self.workdir + '/day16/vexpress-v2p-ca9.dtb') self.do_test_advcal_2018('16', tar_hash, 'winter.zImage') - def test_arm_ast2400_palmetto_openbmc_v2_9_0(self): - """ - :avocado: tags=arch:arm - :avocado: tags=machine:palmetto-bmc - """ - - image_url = ('https://github.com/openbmc/openbmc/releases/download/2.9.0/' - 'obmc-phosphor-image-palmetto.static.mtd') - image_hash = ('3e13bbbc28e424865dc42f35ad672b10f2e82cdb11846bb28fa625b48beafd0d') - image_path = self.fetch_asset(image_url, asset_hash=image_hash, - algorithm='sha256') - - self.do_test_arm_aspeed(image_path) - - def test_arm_ast2500_romulus_openbmc_v2_9_0(self): - """ - :avocado: tags=arch:arm - :avocado: tags=machine:romulus-bmc - """ - - image_url = ('https://github.com/openbmc/openbmc/releases/download/2.9.0/' - 'obmc-phosphor-image-romulus.static.mtd') - image_hash = ('820341076803f1955bc31e647a512c79f9add4f5233d0697678bab4604c7bb25') - image_path = self.fetch_asset(image_url, asset_hash=image_hash, - algorithm='sha256') - - self.do_test_arm_aspeed(image_path) - - def do_test_arm_aspeed(self, image): - self.vm.set_console() - self.vm.add_args('-drive', 'file=' + image + ',if=mtd,format=raw', - '-net', 'nic') - self.vm.launch() - - self.wait_for_console_pattern("U-Boot 2016.07") - self.wait_for_console_pattern("## Loading kernel from FIT Image at 20080000") - self.wait_for_console_pattern("Starting kernel ...") - self.wait_for_console_pattern("Booting Linux on physical CPU 0x0") - self.wait_for_console_pattern( - "aspeed-smc 1e620000.spi: read control register: 203b0641") - self.wait_for_console_pattern("ftgmac100 1e660000.ethernet eth0: irq ") - self.wait_for_console_pattern("systemd[1]: Set hostname to") - def test_arm_ast2600_debian(self): """ :avocado: tags=arch:arm diff --git a/tests/avocado/machine_aspeed.py b/tests/avocado/machine_aspeed.py index 33090af199..3b8f784a57 100644 --- a/tests/avocado/machine_aspeed.py +++ b/tests/avocado/machine_aspeed.py @@ -5,8 +5,11 @@ # This work is licensed under the terms of the GNU GPL, version 2 or # later. See the COPYING file in the top-level directory. +import time + from avocado_qemu import QemuSystemTest from avocado_qemu import wait_for_console_pattern +from avocado_qemu import exec_command from avocado_qemu import exec_command_and_wait_for_pattern from avocado.utils import archive @@ -34,3 +37,136 @@ class AST1030Machine(QemuSystemTest): wait_for_console_pattern(self, "Booting Zephyr OS") exec_command_and_wait_for_pattern(self, "help", "Available commands") + +class AST2x00Machine(QemuSystemTest): + + def wait_for_console_pattern(self, success_message, vm=None): + wait_for_console_pattern(self, success_message, + failure_message='Kernel panic - not syncing', + vm=vm) + + def do_test_arm_aspeed(self, image): + self.vm.set_console() + self.vm.add_args('-drive', 'file=' + image + ',if=mtd,format=raw', + '-net', 'nic') + self.vm.launch() + + self.wait_for_console_pattern("U-Boot 2016.07") + self.wait_for_console_pattern("## Loading kernel from FIT Image at 20080000") + self.wait_for_console_pattern("Starting kernel ...") + self.wait_for_console_pattern("Booting Linux on physical CPU 0x0") + wait_for_console_pattern(self, + "aspeed-smc 1e620000.spi: read control register: 203b0641") + self.wait_for_console_pattern("ftgmac100 1e660000.ethernet eth0: irq ") + self.wait_for_console_pattern("systemd[1]: Set hostname to") + + def test_arm_ast2400_palmetto_openbmc_v2_9_0(self): + """ + :avocado: tags=arch:arm + :avocado: tags=machine:palmetto-bmc + """ + + image_url = ('https://github.com/openbmc/openbmc/releases/download/2.9.0/' + 'obmc-phosphor-image-palmetto.static.mtd') + image_hash = ('3e13bbbc28e424865dc42f35ad672b10f2e82cdb11846bb28fa625b48beafd0d') + image_path = self.fetch_asset(image_url, asset_hash=image_hash, + algorithm='sha256') + + self.do_test_arm_aspeed(image_path) + + def test_arm_ast2500_romulus_openbmc_v2_9_0(self): + """ + :avocado: tags=arch:arm + :avocado: tags=machine:romulus-bmc + """ + + image_url = ('https://github.com/openbmc/openbmc/releases/download/2.9.0/' + 'obmc-phosphor-image-romulus.static.mtd') + image_hash = ('820341076803f1955bc31e647a512c79f9add4f5233d0697678bab4604c7bb25') + image_path = self.fetch_asset(image_url, asset_hash=image_hash, + algorithm='sha256') + + self.do_test_arm_aspeed(image_path) + + def do_test_arm_aspeed_buidroot_start(self, image, cpu_id): + self.vm.set_console() + self.vm.add_args('-drive', 'file=' + image + ',if=mtd,format=raw', + '-net', 'nic', '-net', 'user') + self.vm.launch() + + self.wait_for_console_pattern('U-Boot 2019.04') + self.wait_for_console_pattern('## Loading kernel from FIT Image') + self.wait_for_console_pattern('Starting kernel ...') + self.wait_for_console_pattern('Booting Linux on physical CPU ' + cpu_id) + self.wait_for_console_pattern('lease of 10.0.2.15') + self.wait_for_console_pattern('Aspeed EVB') + exec_command(self, 'root') + time.sleep(0.1) + + def do_test_arm_aspeed_buidroot_poweroff(self): + exec_command_and_wait_for_pattern(self, 'poweroff', + 'reboot: System halted'); + + def test_arm_ast2500_evb_builroot(self): + """ + :avocado: tags=arch:arm + :avocado: tags=machine:ast2500-evb + """ + + image_url = ('https://github.com/legoater/qemu-aspeed-boot/raw/master/' + 'images/ast2500-evb/buildroot-2022.05/flash.img') + image_hash = ('549db6e9d8cdaf4367af21c36385a68bb465779c18b5e37094fc7343decccd3f') + image_path = self.fetch_asset(image_url, asset_hash=image_hash, + algorithm='sha256') + + self.vm.add_args('-device', + 'tmp105,bus=aspeed.i2c.bus.3,address=0x4d,id=tmp-test'); + self.do_test_arm_aspeed_buidroot_start(image_path, '0x0') + + exec_command_and_wait_for_pattern(self, + 'echo lm75 0x4d > /sys/class/i2c-dev/i2c-3/device/new_device', + 'i2c i2c-3: new_device: Instantiated device lm75 at 0x4d'); + exec_command_and_wait_for_pattern(self, + 'cat /sys/class/hwmon/hwmon1/temp1_input', '0') + self.vm.command('qom-set', path='/machine/peripheral/tmp-test', + property='temperature', value=18000); + exec_command_and_wait_for_pattern(self, + 'cat /sys/class/hwmon/hwmon1/temp1_input', '18000') + + self.do_test_arm_aspeed_buidroot_poweroff() + + def test_arm_ast2600_evb_builroot(self): + """ + :avocado: tags=arch:arm + :avocado: tags=machine:ast2600-evb + """ + + image_url = ('https://github.com/legoater/qemu-aspeed-boot/raw/master/' + 'images/ast2600-evb/buildroot-2022.05/flash.img') + image_hash = ('6cc9e7d128fd4fa1fd01c883af67593cae8072c3239a0b8b6ace857f3538a92d') + image_path = self.fetch_asset(image_url, asset_hash=image_hash, + algorithm='sha256') + + self.vm.add_args('-device', + 'tmp105,bus=aspeed.i2c.bus.3,address=0x4d,id=tmp-test'); + self.vm.add_args('-device', + 'ds1338,bus=aspeed.i2c.bus.3,address=0x32'); + self.do_test_arm_aspeed_buidroot_start(image_path, '0xf00') + + exec_command_and_wait_for_pattern(self, + 'echo lm75 0x4d > /sys/class/i2c-dev/i2c-3/device/new_device', + 'i2c i2c-3: new_device: Instantiated device lm75 at 0x4d'); + exec_command_and_wait_for_pattern(self, + 'cat /sys/class/hwmon/hwmon0/temp1_input', '0') + self.vm.command('qom-set', path='/machine/peripheral/tmp-test', + property='temperature', value=18000); + exec_command_and_wait_for_pattern(self, + 'cat /sys/class/hwmon/hwmon0/temp1_input', '18000') + + exec_command_and_wait_for_pattern(self, + 'echo ds1307 0x32 > /sys/class/i2c-dev/i2c-3/device/new_device', + 'i2c i2c-3: new_device: Instantiated device ds1307 at 0x32'); + year = time.strftime("%Y") + exec_command_and_wait_for_pattern(self, 'hwclock -f /dev/rtc1', year); + + self.do_test_arm_aspeed_buidroot_poweroff() diff --git a/tests/qtest/aspeed_smc-test.c b/tests/qtest/aspeed_smc-test.c index ec233315e6..b1e682db65 100644 --- a/tests/qtest/aspeed_smc-test.c +++ b/tests/qtest/aspeed_smc-test.c @@ -135,6 +135,9 @@ static void flash_reset(void) spi_ctrl_start_user(); writeb(ASPEED_FLASH_BASE, RESET_ENABLE); writeb(ASPEED_FLASH_BASE, RESET_MEMORY); + writeb(ASPEED_FLASH_BASE, WREN); + writeb(ASPEED_FLASH_BASE, BULK_ERASE); + writeb(ASPEED_FLASH_BASE, WRDI); spi_ctrl_stop_user(); spi_conf_remove(CONF_ENABLE_W0); @@ -195,21 +198,41 @@ static void test_erase_sector(void) spi_conf(CONF_ENABLE_W0); + /* + * Previous page should be full of 0xffs after backend is + * initialized + */ + read_page(some_page_addr - FLASH_PAGE_SIZE, page); + for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) { + g_assert_cmphex(page[i], ==, 0xffffffff); + } + spi_ctrl_start_user(); - writeb(ASPEED_FLASH_BASE, WREN); writeb(ASPEED_FLASH_BASE, EN_4BYTE_ADDR); - writeb(ASPEED_FLASH_BASE, ERASE_SECTOR); + writeb(ASPEED_FLASH_BASE, WREN); + writeb(ASPEED_FLASH_BASE, PP); writel(ASPEED_FLASH_BASE, make_be32(some_page_addr)); + + /* Fill the page with its own addresses */ + for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) { + writel(ASPEED_FLASH_BASE, make_be32(some_page_addr + i * 4)); + } spi_ctrl_stop_user(); - /* Previous page should be full of zeroes as backend is not - * initialized */ - read_page(some_page_addr - FLASH_PAGE_SIZE, page); + /* Check the page is correctly written */ + read_page(some_page_addr, page); for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) { - g_assert_cmphex(page[i], ==, 0x0); + g_assert_cmphex(page[i], ==, some_page_addr + i * 4); } - /* But this one was erased */ + spi_ctrl_start_user(); + writeb(ASPEED_FLASH_BASE, WREN); + writeb(ASPEED_FLASH_BASE, EN_4BYTE_ADDR); + writeb(ASPEED_FLASH_BASE, ERASE_SECTOR); + writel(ASPEED_FLASH_BASE, make_be32(some_page_addr)); + spi_ctrl_stop_user(); + + /* Check the page is erased */ read_page(some_page_addr, page); for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) { g_assert_cmphex(page[i], ==, 0xffffffff); @@ -226,11 +249,31 @@ static void test_erase_all(void) spi_conf(CONF_ENABLE_W0); - /* Check some random page. Should be full of zeroes as backend is - * not initialized */ + /* + * Previous page should be full of 0xffs after backend is + * initialized + */ + read_page(some_page_addr - FLASH_PAGE_SIZE, page); + for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) { + g_assert_cmphex(page[i], ==, 0xffffffff); + } + + spi_ctrl_start_user(); + writeb(ASPEED_FLASH_BASE, EN_4BYTE_ADDR); + writeb(ASPEED_FLASH_BASE, WREN); + writeb(ASPEED_FLASH_BASE, PP); + writel(ASPEED_FLASH_BASE, make_be32(some_page_addr)); + + /* Fill the page with its own addresses */ + for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) { + writel(ASPEED_FLASH_BASE, make_be32(some_page_addr + i * 4)); + } + spi_ctrl_stop_user(); + + /* Check the page is correctly written */ read_page(some_page_addr, page); for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) { - g_assert_cmphex(page[i], ==, 0x0); + g_assert_cmphex(page[i], ==, some_page_addr + i * 4); } spi_ctrl_start_user(); @@ -238,7 +281,7 @@ static void test_erase_all(void) writeb(ASPEED_FLASH_BASE, BULK_ERASE); spi_ctrl_stop_user(); - /* Recheck that some random page */ + /* Check the page is erased */ read_page(some_page_addr, page); for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) { g_assert_cmphex(page[i], ==, 0xffffffff); @@ -299,6 +342,14 @@ static void test_read_page_mem(void) spi_conf(CONF_ENABLE_W0); spi_ctrl_start_user(); writeb(ASPEED_FLASH_BASE, EN_4BYTE_ADDR); + writeb(ASPEED_FLASH_BASE, WREN); + writeb(ASPEED_FLASH_BASE, PP); + writel(ASPEED_FLASH_BASE, make_be32(my_page_addr)); + + /* Fill the page with its own addresses */ + for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) { + writel(ASPEED_FLASH_BASE, make_be32(my_page_addr + i * 4)); + } spi_ctrl_stop_user(); spi_conf_remove(CONF_ENABLE_W0); @@ -417,6 +468,7 @@ int main(int argc, char **argv) qtest_add_func("/ast2400/smc/write_page_mem", test_write_page_mem); qtest_add_func("/ast2400/smc/read_status_reg", test_read_status_reg); + flash_reset(); ret = g_test_run(); qtest_quit(global_qtest); diff --git a/tests/unit/meson.build b/tests/unit/meson.build index 287b367ec3..b497a41378 100644 --- a/tests/unit/meson.build +++ b/tests/unit/meson.build @@ -88,6 +88,7 @@ if have_block 'test-io-channel-file': ['io-channel-helpers.c', io], 'test-io-channel-command': ['io-channel-helpers.c', io], 'test-io-channel-buffer': ['io-channel-helpers.c', io], + 'test-io-channel-null': [io], 'test-crypto-ivgen': [io], 'test-crypto-afsplit': [io], 'test-crypto-block': [io], diff --git a/tests/unit/test-io-channel-null.c b/tests/unit/test-io-channel-null.c new file mode 100644 index 0000000000..b3aab17ccc --- /dev/null +++ b/tests/unit/test-io-channel-null.c @@ -0,0 +1,95 @@ +/* + * QEMU I/O channel null test + * + * Copyright (c) 2022 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + * + */ + +#include "qemu/osdep.h" +#include "io/channel-null.h" +#include "qapi/error.h" + +static gboolean test_io_channel_watch(QIOChannel *ioc, + GIOCondition condition, + gpointer opaque) +{ + GIOCondition *gotcond = opaque; + *gotcond = condition; + return G_SOURCE_REMOVE; +} + +static void test_io_channel_null_io(void) +{ + g_autoptr(QIOChannelNull) null = qio_channel_null_new(); + char buf[1024]; + GIOCondition gotcond = 0; + Error *local_err = NULL; + + g_assert(qio_channel_write(QIO_CHANNEL(null), + "Hello World", 11, + &error_abort) == 11); + + g_assert(qio_channel_read(QIO_CHANNEL(null), + buf, sizeof(buf), + &error_abort) == 0); + + qio_channel_add_watch(QIO_CHANNEL(null), + G_IO_IN, + test_io_channel_watch, + &gotcond, + NULL); + + g_main_context_iteration(NULL, false); + + g_assert(gotcond == G_IO_IN); + + qio_channel_add_watch(QIO_CHANNEL(null), + G_IO_IN | G_IO_OUT, + test_io_channel_watch, + &gotcond, + NULL); + + g_main_context_iteration(NULL, false); + + g_assert(gotcond == (G_IO_IN | G_IO_OUT)); + + qio_channel_close(QIO_CHANNEL(null), &error_abort); + + g_assert(qio_channel_write(QIO_CHANNEL(null), + "Hello World", 11, + &local_err) == -1); + g_assert_nonnull(local_err); + + g_clear_pointer(&local_err, error_free); + + g_assert(qio_channel_read(QIO_CHANNEL(null), + buf, sizeof(buf), + &local_err) == -1); + g_assert_nonnull(local_err); + + g_clear_pointer(&local_err, error_free); +} + +int main(int argc, char **argv) +{ + module_call_init(MODULE_INIT_QOM); + + g_test_init(&argc, &argv, NULL); + + g_test_add_func("/io/channel/null/io", test_io_channel_null_io); + + return g_test_run(); +} diff --git a/tests/unit/test-vmstate.c b/tests/unit/test-vmstate.c index 6a417bb102..72077b5780 100644 --- a/tests/unit/test-vmstate.c +++ b/tests/unit/test-vmstate.c @@ -28,7 +28,6 @@ #include "migration/vmstate.h" #include "migration/qemu-file-types.h" #include "../migration/qemu-file.h" -#include "../migration/qemu-file-channel.h" #include "../migration/savevm.h" #include "qemu/coroutine.h" #include "qemu/module.h" @@ -52,9 +51,9 @@ static QEMUFile *open_test_file(bool write) } ioc = QIO_CHANNEL(qio_channel_file_new_fd(fd)); if (write) { - f = qemu_fopen_channel_output(ioc); + f = qemu_file_new_output(ioc); } else { - f = qemu_fopen_channel_input(ioc); + f = qemu_file_new_input(ioc); } object_unref(OBJECT(ioc)); return f; diff --git a/util/cacheflush.c b/util/cacheflush.c index 4b57186d89..2c2c73e085 100644 --- a/util/cacheflush.c +++ b/util/cacheflush.c @@ -1,5 +1,5 @@ /* - * Flush the host cpu caches. + * Info about, and flushing the host cpu caches. * * This work is licensed under the terms of the GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. @@ -9,8 +9,218 @@ #include "qemu/cacheflush.h" #include "qemu/cacheinfo.h" #include "qemu/bitops.h" +#include "qemu/host-utils.h" +#include "qemu/atomic.h" +int qemu_icache_linesize = 0; +int qemu_icache_linesize_log; +int qemu_dcache_linesize = 0; +int qemu_dcache_linesize_log; + +/* + * Operating system specific cache detection mechanisms. + */ + +#if defined(_WIN32) + +static void sys_cache_info(int *isize, int *dsize) +{ + SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf; + DWORD size = 0; + BOOL success; + size_t i, n; + + /* + * Check for the required buffer size first. Note that if the zero + * size we use for the probe results in success, then there is no + * data available; fail in that case. + */ + success = GetLogicalProcessorInformation(0, &size); + if (success || GetLastError() != ERROR_INSUFFICIENT_BUFFER) { + return; + } + + n = size / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); + size = n * sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); + buf = g_new0(SYSTEM_LOGICAL_PROCESSOR_INFORMATION, n); + if (!GetLogicalProcessorInformation(buf, &size)) { + goto fail; + } + + for (i = 0; i < n; i++) { + if (buf[i].Relationship == RelationCache + && buf[i].Cache.Level == 1) { + switch (buf[i].Cache.Type) { + case CacheUnified: + *isize = *dsize = buf[i].Cache.LineSize; + break; + case CacheInstruction: + *isize = buf[i].Cache.LineSize; + break; + case CacheData: + *dsize = buf[i].Cache.LineSize; + break; + default: + break; + } + } + } + fail: + g_free(buf); +} + +#elif defined(CONFIG_DARWIN) +# include <sys/sysctl.h> +static void sys_cache_info(int *isize, int *dsize) +{ + /* There's only a single sysctl for both I/D cache line sizes. */ + long size; + size_t len = sizeof(size); + if (!sysctlbyname("hw.cachelinesize", &size, &len, NULL, 0)) { + *isize = *dsize = size; + } +} +#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) +# include <sys/sysctl.h> +static void sys_cache_info(int *isize, int *dsize) +{ + /* There's only a single sysctl for both I/D cache line sizes. */ + int size; + size_t len = sizeof(size); + if (!sysctlbyname("machdep.cacheline_size", &size, &len, NULL, 0)) { + *isize = *dsize = size; + } +} +#else +/* POSIX */ + +static void sys_cache_info(int *isize, int *dsize) +{ +# ifdef _SC_LEVEL1_ICACHE_LINESIZE + int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE); + if (tmp_isize > 0) { + *isize = tmp_isize; + } +# endif +# ifdef _SC_LEVEL1_DCACHE_LINESIZE + int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE); + if (tmp_dsize > 0) { + *dsize = tmp_dsize; + } +# endif +} +#endif /* sys_cache_info */ + + +/* + * Architecture (+ OS) specific cache detection mechanisms. + */ + +#if defined(__powerpc__) +static bool have_coherent_icache; +#endif + +#if defined(__aarch64__) && !defined(CONFIG_DARWIN) +/* Apple does not expose CTR_EL0, so we must use system interfaces. */ +static uint64_t save_ctr_el0; +static void arch_cache_info(int *isize, int *dsize) +{ + uint64_t ctr; + + /* + * The real cache geometry is in CCSIDR_EL1/CLIDR_EL1/CSSELR_EL1, + * but (at least under Linux) these are marked protected by the + * kernel. However, CTR_EL0 contains the minimum linesize in the + * entire hierarchy, and is used by userspace cache flushing. + * + * We will also use this value in flush_idcache_range. + */ + asm volatile("mrs\t%0, ctr_el0" : "=r"(ctr)); + save_ctr_el0 = ctr; + + if (*isize == 0 || *dsize == 0) { + if (*isize == 0) { + *isize = 4 << (ctr & 0xf); + } + if (*dsize == 0) { + *dsize = 4 << ((ctr >> 16) & 0xf); + } + } +} + +#elif defined(_ARCH_PPC) && defined(__linux__) +# include "elf.h" + +static void arch_cache_info(int *isize, int *dsize) +{ + if (*isize == 0) { + *isize = qemu_getauxval(AT_ICACHEBSIZE); + } + if (*dsize == 0) { + *dsize = qemu_getauxval(AT_DCACHEBSIZE); + } + have_coherent_icache = qemu_getauxval(AT_HWCAP) & PPC_FEATURE_ICACHE_SNOOP; +} + +#else +static void arch_cache_info(int *isize, int *dsize) { } +#endif /* arch_cache_info */ + +/* + * ... and if all else fails ... + */ + +static void fallback_cache_info(int *isize, int *dsize) +{ + /* If we can only find one of the two, assume they're the same. */ + if (*isize) { + if (*dsize) { + /* Success! */ + } else { + *dsize = *isize; + } + } else if (*dsize) { + *isize = *dsize; + } else { +#if defined(_ARCH_PPC) + /* + * For PPC, we're going to use the cache sizes computed for + * flush_idcache_range. Which means that we must use the + * architecture minimum. + */ + *isize = *dsize = 16; +#else + /* Otherwise, 64 bytes is not uncommon. */ + *isize = *dsize = 64; +#endif + } +} + +static void __attribute__((constructor)) init_cache_info(void) +{ + int isize = 0, dsize = 0; + + sys_cache_info(&isize, &dsize); + arch_cache_info(&isize, &dsize); + fallback_cache_info(&isize, &dsize); + + assert((isize & (isize - 1)) == 0); + assert((dsize & (dsize - 1)) == 0); + + qemu_icache_linesize = isize; + qemu_icache_linesize_log = ctz32(isize); + qemu_dcache_linesize = dsize; + qemu_dcache_linesize_log = ctz32(dsize); + + qatomic64_init(); +} + + +/* + * Architecture (+ OS) specific cache flushing mechanisms. + */ + #if defined(__i386__) || defined(__x86_64__) || defined(__s390__) /* Caches are coherent and do not require flushing; symbol inline. */ @@ -29,17 +239,6 @@ void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) #else /* - * TODO: unify this with cacheinfo.c. - * We want to save the whole contents of CTR_EL0, so that we - * have more than the linesize, but also IDC and DIC. - */ -static uint64_t save_ctr_el0; -static void __attribute__((constructor)) init_ctr_el0(void) -{ - asm volatile("mrs\t%0, ctr_el0" : "=r"(save_ctr_el0)); -} - -/* * This is a copy of gcc's __aarch64_sync_cache_range, modified * to fit this three-operand interface. */ @@ -48,8 +247,8 @@ void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) const unsigned CTR_IDC = 1u << 28; const unsigned CTR_DIC = 1u << 29; const uint64_t ctr_el0 = save_ctr_el0; - const uintptr_t icache_lsize = 4 << extract64(ctr_el0, 0, 4); - const uintptr_t dcache_lsize = 4 << extract64(ctr_el0, 16, 4); + const uintptr_t icache_lsize = qemu_icache_linesize; + const uintptr_t dcache_lsize = qemu_dcache_linesize; uintptr_t p; /* @@ -104,8 +303,24 @@ void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) { uintptr_t p, b, e; - size_t dsize = qemu_dcache_linesize; - size_t isize = qemu_icache_linesize; + size_t dsize, isize; + + /* + * Some processors have coherent caches and support a simplified + * flushing procedure. See + * POWER9 UM, 4.6.2.2 Instruction Cache Block Invalidate (icbi) + * https://ibm.ent.box.com/s/tmklq90ze7aj8f4n32er1mu3sy9u8k3k + */ + if (have_coherent_icache) { + asm volatile ("sync\n\t" + "icbi 0,%0\n\t" + "isync" + : : "r"(rx) : "memory"); + return; + } + + dsize = qemu_dcache_linesize; + isize = qemu_icache_linesize; b = rw & ~(dsize - 1); e = (rw + len + dsize - 1) & ~(dsize - 1); diff --git a/util/cacheinfo.c b/util/cacheinfo.c deleted file mode 100644 index ab1644d490..0000000000 --- a/util/cacheinfo.c +++ /dev/null @@ -1,200 +0,0 @@ -/* - * cacheinfo.c - helpers to query the host about its caches - * - * Copyright (C) 2017, Emilio G. Cota <cota@braap.org> - * License: GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ - -#include "qemu/osdep.h" -#include "qemu/host-utils.h" -#include "qemu/atomic.h" -#include "qemu/cacheinfo.h" - -int qemu_icache_linesize = 0; -int qemu_icache_linesize_log; -int qemu_dcache_linesize = 0; -int qemu_dcache_linesize_log; - -/* - * Operating system specific detection mechanisms. - */ - -#if defined(_WIN32) - -static void sys_cache_info(int *isize, int *dsize) -{ - SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf; - DWORD size = 0; - BOOL success; - size_t i, n; - - /* Check for the required buffer size first. Note that if the zero - size we use for the probe results in success, then there is no - data available; fail in that case. */ - success = GetLogicalProcessorInformation(0, &size); - if (success || GetLastError() != ERROR_INSUFFICIENT_BUFFER) { - return; - } - - n = size / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); - size = n * sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); - buf = g_new0(SYSTEM_LOGICAL_PROCESSOR_INFORMATION, n); - if (!GetLogicalProcessorInformation(buf, &size)) { - goto fail; - } - - for (i = 0; i < n; i++) { - if (buf[i].Relationship == RelationCache - && buf[i].Cache.Level == 1) { - switch (buf[i].Cache.Type) { - case CacheUnified: - *isize = *dsize = buf[i].Cache.LineSize; - break; - case CacheInstruction: - *isize = buf[i].Cache.LineSize; - break; - case CacheData: - *dsize = buf[i].Cache.LineSize; - break; - default: - break; - } - } - } - fail: - g_free(buf); -} - -#elif defined(__APPLE__) -# include <sys/sysctl.h> -static void sys_cache_info(int *isize, int *dsize) -{ - /* There's only a single sysctl for both I/D cache line sizes. */ - long size; - size_t len = sizeof(size); - if (!sysctlbyname("hw.cachelinesize", &size, &len, NULL, 0)) { - *isize = *dsize = size; - } -} -#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) -# include <sys/sysctl.h> -static void sys_cache_info(int *isize, int *dsize) -{ - /* There's only a single sysctl for both I/D cache line sizes. */ - int size; - size_t len = sizeof(size); - if (!sysctlbyname("machdep.cacheline_size", &size, &len, NULL, 0)) { - *isize = *dsize = size; - } -} -#else -/* POSIX */ - -static void sys_cache_info(int *isize, int *dsize) -{ -# ifdef _SC_LEVEL1_ICACHE_LINESIZE - int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE); - if (tmp_isize > 0) { - *isize = tmp_isize; - } -# endif -# ifdef _SC_LEVEL1_DCACHE_LINESIZE - int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE); - if (tmp_dsize > 0) { - *dsize = tmp_dsize; - } -# endif -} -#endif /* sys_cache_info */ - -/* - * Architecture (+ OS) specific detection mechanisms. - */ - -#if defined(__aarch64__) - -static void arch_cache_info(int *isize, int *dsize) -{ - if (*isize == 0 || *dsize == 0) { - uint64_t ctr; - - /* The real cache geometry is in CCSIDR_EL1/CLIDR_EL1/CSSELR_EL1, - but (at least under Linux) these are marked protected by the - kernel. However, CTR_EL0 contains the minimum linesize in the - entire hierarchy, and is used by userspace cache flushing. */ - asm volatile("mrs\t%0, ctr_el0" : "=r"(ctr)); - if (*isize == 0) { - *isize = 4 << (ctr & 0xf); - } - if (*dsize == 0) { - *dsize = 4 << ((ctr >> 16) & 0xf); - } - } -} - -#elif defined(_ARCH_PPC) && defined(__linux__) -# include "elf.h" - -static void arch_cache_info(int *isize, int *dsize) -{ - if (*isize == 0) { - *isize = qemu_getauxval(AT_ICACHEBSIZE); - } - if (*dsize == 0) { - *dsize = qemu_getauxval(AT_DCACHEBSIZE); - } -} - -#else -static void arch_cache_info(int *isize, int *dsize) { } -#endif /* arch_cache_info */ - -/* - * ... and if all else fails ... - */ - -static void fallback_cache_info(int *isize, int *dsize) -{ - /* If we can only find one of the two, assume they're the same. */ - if (*isize) { - if (*dsize) { - /* Success! */ - } else { - *dsize = *isize; - } - } else if (*dsize) { - *isize = *dsize; - } else { -#if defined(_ARCH_PPC) - /* - * For PPC, we're going to use the cache sizes computed for - * flush_idcache_range. Which means that we must use the - * architecture minimum. - */ - *isize = *dsize = 16; -#else - /* Otherwise, 64 bytes is not uncommon. */ - *isize = *dsize = 64; -#endif - } -} - -static void __attribute__((constructor)) init_cache_info(void) -{ - int isize = 0, dsize = 0; - - sys_cache_info(&isize, &dsize); - arch_cache_info(&isize, &dsize); - fallback_cache_info(&isize, &dsize); - - assert((isize & (isize - 1)) == 0); - assert((dsize & (dsize - 1)) == 0); - - qemu_icache_linesize = isize; - qemu_icache_linesize_log = ctz32(isize); - qemu_dcache_linesize = dsize; - qemu_dcache_linesize_log = ctz32(dsize); - - qatomic64_init(); -} diff --git a/util/host-utils.c b/util/host-utils.c index 96d5dc0bed..fb91bcba82 100644 --- a/util/host-utils.c +++ b/util/host-utils.c @@ -266,3 +266,183 @@ void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow) *plow = *plow << shift; } } + +/* + * Unsigned 256-by-128 division. + * Returns the remainder via r. + * Returns lower 128 bit of quotient. + * Needs a normalized divisor (most significant bit set to 1). + * + * Adapted from include/qemu/host-utils.h udiv_qrnnd, + * from the GNU Multi Precision Library - longlong.h __udiv_qrnnd + * (https://gmplib.org/repo/gmp/file/tip/longlong.h) + * + * Licensed under the GPLv2/LGPLv3 + */ +static Int128 udiv256_qrnnd(Int128 *r, Int128 n1, Int128 n0, Int128 d) +{ + Int128 d0, d1, q0, q1, r1, r0, m; + uint64_t mp0, mp1; + + d0 = int128_make64(int128_getlo(d)); + d1 = int128_make64(int128_gethi(d)); + + r1 = int128_remu(n1, d1); + q1 = int128_divu(n1, d1); + mp0 = int128_getlo(q1); + mp1 = int128_gethi(q1); + mulu128(&mp0, &mp1, int128_getlo(d0)); + m = int128_make128(mp0, mp1); + r1 = int128_make128(int128_gethi(n0), int128_getlo(r1)); + if (int128_ult(r1, m)) { + q1 = int128_sub(q1, int128_one()); + r1 = int128_add(r1, d); + if (int128_uge(r1, d)) { + if (int128_ult(r1, m)) { + q1 = int128_sub(q1, int128_one()); + r1 = int128_add(r1, d); + } + } + } + r1 = int128_sub(r1, m); + + r0 = int128_remu(r1, d1); + q0 = int128_divu(r1, d1); + mp0 = int128_getlo(q0); + mp1 = int128_gethi(q0); + mulu128(&mp0, &mp1, int128_getlo(d0)); + m = int128_make128(mp0, mp1); + r0 = int128_make128(int128_getlo(n0), int128_getlo(r0)); + if (int128_ult(r0, m)) { + q0 = int128_sub(q0, int128_one()); + r0 = int128_add(r0, d); + if (int128_uge(r0, d)) { + if (int128_ult(r0, m)) { + q0 = int128_sub(q0, int128_one()); + r0 = int128_add(r0, d); + } + } + } + r0 = int128_sub(r0, m); + + *r = r0; + return int128_or(int128_lshift(q1, 64), q0); +} + +/* + * Unsigned 256-by-128 division. + * Returns the remainder. + * Returns quotient via plow and phigh. + * Also returns the remainder via the function return value. + */ +Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor) +{ + Int128 dhi = *phigh; + Int128 dlo = *plow; + Int128 rem, dhighest; + int sh; + + if (!int128_nz(divisor) || !int128_nz(dhi)) { + *plow = int128_divu(dlo, divisor); + *phigh = int128_zero(); + return int128_remu(dlo, divisor); + } else { + sh = clz128(divisor); + + if (int128_ult(dhi, divisor)) { + if (sh != 0) { + /* normalize the divisor, shifting the dividend accordingly */ + divisor = int128_lshift(divisor, sh); + dhi = int128_or(int128_lshift(dhi, sh), + int128_urshift(dlo, (128 - sh))); + dlo = int128_lshift(dlo, sh); + } + + *phigh = int128_zero(); + *plow = udiv256_qrnnd(&rem, dhi, dlo, divisor); + } else { + if (sh != 0) { + /* normalize the divisor, shifting the dividend accordingly */ + divisor = int128_lshift(divisor, sh); + dhighest = int128_rshift(dhi, (128 - sh)); + dhi = int128_or(int128_lshift(dhi, sh), + int128_urshift(dlo, (128 - sh))); + dlo = int128_lshift(dlo, sh); + + *phigh = udiv256_qrnnd(&dhi, dhighest, dhi, divisor); + } else { + /* + * dhi >= divisor + * Since the MSB of divisor is set (sh == 0), + * (dhi - divisor) < divisor + * + * Thus, the high part of the quotient is 1, and we can + * calculate the low part with a single call to udiv_qrnnd + * after subtracting divisor from dhi + */ + dhi = int128_sub(dhi, divisor); + *phigh = int128_one(); + } + + *plow = udiv256_qrnnd(&rem, dhi, dlo, divisor); + } + + /* + * since the dividend/divisor might have been normalized, + * the remainder might also have to be shifted back + */ + rem = int128_urshift(rem, sh); + return rem; + } +} + +/* + * Signed 256-by-128 division. + * Returns quotient via plow and phigh. + * Also returns the remainder via the function return value. + */ +Int128 divs256(Int128 *plow, Int128 *phigh, Int128 divisor) +{ + bool neg_quotient = false, neg_remainder = false; + Int128 unsig_hi = *phigh, unsig_lo = *plow; + Int128 rem; + + if (!int128_nonneg(*phigh)) { + neg_quotient = !neg_quotient; + neg_remainder = !neg_remainder; + + if (!int128_nz(unsig_lo)) { + unsig_hi = int128_neg(unsig_hi); + } else { + unsig_hi = int128_not(unsig_hi); + unsig_lo = int128_neg(unsig_lo); + } + } + + if (!int128_nonneg(divisor)) { + neg_quotient = !neg_quotient; + + divisor = int128_neg(divisor); + } + + rem = divu256(&unsig_lo, &unsig_hi, divisor); + + if (neg_quotient) { + if (!int128_nz(unsig_lo)) { + *phigh = int128_neg(unsig_hi); + *plow = int128_zero(); + } else { + *phigh = int128_not(unsig_hi); + *plow = int128_neg(unsig_lo); + } + } else { + *phigh = unsig_hi; + *plow = unsig_lo; + } + + if (neg_remainder) { + return int128_neg(rem); + } else { + return rem; + } +} diff --git a/util/meson.build b/util/meson.build index 8f16018cd4..4939b0b91c 100644 --- a/util/meson.build +++ b/util/meson.build @@ -27,7 +27,7 @@ util_ss.add(files('envlist.c', 'path.c', 'module.c')) util_ss.add(files('host-utils.c')) util_ss.add(files('bitmap.c', 'bitops.c')) util_ss.add(files('fifo8.c')) -util_ss.add(files('cacheinfo.c', 'cacheflush.c')) +util_ss.add(files('cacheflush.c')) util_ss.add(files('error.c', 'error-report.c')) util_ss.add(files('qemu-print.c')) util_ss.add(files('id.c')) diff --git a/util/qemu-timer.c b/util/qemu-timer.c index a670a57881..6a0de33dd2 100644 --- a/util/qemu-timer.c +++ b/util/qemu-timer.c @@ -261,6 +261,9 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask) } QLIST_FOREACH(timer_list, &clock->timerlists, list) { + if (!qatomic_read(&timer_list->active_timers)) { + continue; + } qemu_mutex_lock(&timer_list->active_timers_lock); ts = timer_list->active_timers; /* Skip all external timers */ |