diff options
Diffstat (limited to '')
38 files changed, 2700 insertions, 147 deletions
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index 0cd0c14c2a..aebadeaa03 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -1766,9 +1766,9 @@ static bool same_stat_id(const struct stat *a, const struct stat *b) static void coroutine_fn v9fs_walk(void *opaque) { - int name_idx; + int name_idx, nwalked; g_autofree V9fsQID *qids = NULL; - int i, err = 0; + int i, err = 0, any_err = 0; V9fsPath dpath, path; P9ARRAY_REF(V9fsPath) pathes = NULL; uint16_t nwnames; @@ -1834,54 +1834,61 @@ static void coroutine_fn v9fs_walk(void *opaque) * driver code altogether inside the following block. */ v9fs_co_run_in_worker({ + nwalked = 0; if (v9fs_request_cancelled(pdu)) { - err = -EINTR; + any_err |= err = -EINTR; break; } err = s->ops->lstat(&s->ctx, &dpath, &fidst); if (err < 0) { - err = -errno; + any_err |= err = -errno; break; } stbuf = fidst; - for (name_idx = 0; name_idx < nwnames; name_idx++) { + for (; nwalked < nwnames; nwalked++) { if (v9fs_request_cancelled(pdu)) { - err = -EINTR; + any_err |= err = -EINTR; break; } if (!same_stat_id(&pdu->s->root_st, &stbuf) || - strcmp("..", wnames[name_idx].data)) + strcmp("..", wnames[nwalked].data)) { err = s->ops->name_to_path(&s->ctx, &dpath, - wnames[name_idx].data, - &pathes[name_idx]); + wnames[nwalked].data, + &pathes[nwalked]); if (err < 0) { - err = -errno; + any_err |= err = -errno; break; } if (v9fs_request_cancelled(pdu)) { - err = -EINTR; + any_err |= err = -EINTR; break; } - err = s->ops->lstat(&s->ctx, &pathes[name_idx], &stbuf); + err = s->ops->lstat(&s->ctx, &pathes[nwalked], &stbuf); if (err < 0) { - err = -errno; + any_err |= err = -errno; break; } - stbufs[name_idx] = stbuf; - v9fs_path_copy(&dpath, &pathes[name_idx]); + stbufs[nwalked] = stbuf; + v9fs_path_copy(&dpath, &pathes[nwalked]); } } }); /* * Handle all the rest of this Twalk request on main thread ... + * + * NOTE: -EINTR is an exception where we deviate from the protocol spec + * and simply send a (R)Lerror response instead of bothering to assemble + * a (deducted) Rwalk response; because -EINTR is always the result of a + * Tflush request, so client would no longer wait for a response in this + * case anyway. */ - if (err < 0) { + if ((err < 0 && !nwalked) || err == -EINTR) { goto out; } - err = stat_to_qid(pdu, &fidst, &qid); - if (err < 0) { + any_err |= err = stat_to_qid(pdu, &fidst, &qid); + if (err < 0 && !nwalked) { goto out; } stbuf = fidst; @@ -1890,20 +1897,29 @@ static void coroutine_fn v9fs_walk(void *opaque) v9fs_path_copy(&dpath, &fidp->path); v9fs_path_copy(&path, &fidp->path); - for (name_idx = 0; name_idx < nwnames; name_idx++) { + for (name_idx = 0; name_idx < nwalked; name_idx++) { if (!same_stat_id(&pdu->s->root_st, &stbuf) || strcmp("..", wnames[name_idx].data)) { stbuf = stbufs[name_idx]; - err = stat_to_qid(pdu, &stbuf, &qid); + any_err |= err = stat_to_qid(pdu, &stbuf, &qid); if (err < 0) { - goto out; + break; } v9fs_path_copy(&path, &pathes[name_idx]); v9fs_path_copy(&dpath, &path); } memcpy(&qids[name_idx], &qid, sizeof(qid)); } + if (any_err < 0) { + if (!name_idx) { + /* don't send any QIDs, send Rlerror instead */ + goto out; + } else { + /* send QIDs (not Rlerror), but fid MUST remain unaffected */ + goto send_qids; + } + } if (fid == newfid) { if (fidp->fid_type != P9_FID_NONE) { err = -EINVAL; @@ -1921,8 +1937,9 @@ static void coroutine_fn v9fs_walk(void *opaque) newfidp->uid = fidp->uid; v9fs_path_copy(&newfidp->path, &path); } - err = v9fs_walk_marshal(pdu, nwnames, qids); - trace_v9fs_walk_return(pdu->tag, pdu->id, nwnames, qids); +send_qids: + err = v9fs_walk_marshal(pdu, name_idx, qids); + trace_v9fs_walk_return(pdu->tag, pdu->id, name_idx, qids); out: put_fid(pdu, fidp); if (newfidp) { diff --git a/hw/acpi/erst.c b/hw/acpi/erst.c index de509c2b48..df856b2669 100644 --- a/hw/acpi/erst.c +++ b/hw/acpi/erst.c @@ -440,6 +440,7 @@ static void check_erst_backend_storage(ERSTDeviceState *s, Error **errp) (record_size >= 4096) /* PAGE_SIZE */ )) { error_setg(errp, "ERST record_size %u is invalid", record_size); + return; } /* Validity check header */ @@ -450,6 +451,7 @@ static void check_erst_backend_storage(ERSTDeviceState *s, Error **errp) (le16_to_cpu(header->reserved) == 0) )) { error_setg(errp, "ERST backend storage header is invalid"); + return; } /* Check storage_size against record_size */ @@ -457,6 +459,7 @@ static void check_erst_backend_storage(ERSTDeviceState *s, Error **errp) (record_size > s->storage_size)) { error_setg(errp, "ACPI ERST requires storage size be multiple of " "record size (%uKiB)", record_size); + return; } /* Compute offset of first and last record storage slot */ diff --git a/hw/core/qdev.c b/hw/core/qdev.c index 84f3019440..0806d8fcaa 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -468,6 +468,28 @@ char *qdev_get_dev_path(DeviceState *dev) return NULL; } +void qdev_add_unplug_blocker(DeviceState *dev, Error *reason) +{ + dev->unplug_blockers = g_slist_prepend(dev->unplug_blockers, reason); +} + +void qdev_del_unplug_blocker(DeviceState *dev, Error *reason) +{ + dev->unplug_blockers = g_slist_remove(dev->unplug_blockers, reason); +} + +bool qdev_unplug_blocked(DeviceState *dev, Error **errp) +{ + ERRP_GUARD(); + + if (dev->unplug_blockers) { + error_propagate(errp, error_copy(dev->unplug_blockers->data)); + return true; + } + + return false; +} + static bool device_get_realized(Object *obj, Error **errp) { DeviceState *dev = DEVICE(obj); @@ -704,6 +726,8 @@ static void device_finalize(Object *obj) DeviceState *dev = DEVICE(obj); + g_assert(!dev->unplug_blockers); + QLIST_FOREACH_SAFE(ngl, &dev->gpios, node, next) { QLIST_REMOVE(ngl, node); qemu_free_irqs(ngl->in, ngl->num_in); diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c index efa14908d8..483d8eb13f 100644 --- a/hw/cxl/cxl-host.c +++ b/hw/cxl/cxl-host.c @@ -129,8 +129,9 @@ static bool cxl_hdm_find_target(uint32_t *cache_mem, hwaddr addr, static PCIDevice *cxl_cfmws_find_device(CXLFixedWindow *fw, hwaddr addr) { - CXLComponentState *hb_cstate; + CXLComponentState *hb_cstate, *usp_cstate; PCIHostState *hb; + CXLUpstreamPort *usp; int rb_index; uint32_t *cache_mem; uint8_t target; @@ -164,8 +165,46 @@ static PCIDevice *cxl_cfmws_find_device(CXLFixedWindow *fw, hwaddr addr) } d = pci_bridge_get_sec_bus(PCI_BRIDGE(rp))->devices[0]; + if (!d) { + return NULL; + } + + if (object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) { + return d; + } + + /* + * Could also be a switch. Note only one level of switching currently + * supported. + */ + if (!object_dynamic_cast(OBJECT(d), TYPE_CXL_USP)) { + return NULL; + } + usp = CXL_USP(d); + + usp_cstate = cxl_usp_to_cstate(usp); + if (!usp_cstate) { + return NULL; + } + + cache_mem = usp_cstate->crb.cache_mem_registers; + + target_found = cxl_hdm_find_target(cache_mem, addr, &target); + if (!target_found) { + return NULL; + } + + d = pcie_find_port_by_pn(&PCI_BRIDGE(d)->sec_bus, target); + if (!d) { + return NULL; + } + + d = pci_bridge_get_sec_bus(PCI_BRIDGE(d))->devices[0]; + if (!d) { + return NULL; + } - if (!d || !object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) { + if (!object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) { return NULL; } diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c index 790cec333c..a29f191aa8 100644 --- a/hw/display/virtio-gpu-base.c +++ b/hw/display/virtio-gpu-base.c @@ -69,16 +69,17 @@ static void virtio_gpu_notify_event(VirtIOGPUBase *g, uint32_t event_type) virtio_notify_config(&g->parent_obj); } -static int virtio_gpu_ui_info(void *opaque, uint32_t idx, QemuUIInfo *info) +static void virtio_gpu_ui_info(void *opaque, uint32_t idx, QemuUIInfo *info) { VirtIOGPUBase *g = opaque; if (idx >= g->conf.max_outputs) { - return -1; + return; } g->req_state[idx].x = info->xoff; g->req_state[idx].y = info->yoff; + g->req_state[idx].refresh_rate = info->refresh_rate; g->req_state[idx].width = info->width; g->req_state[idx].height = info->height; g->req_state[idx].width_mm = info->width_mm; @@ -92,7 +93,7 @@ static int virtio_gpu_ui_info(void *opaque, uint32_t idx, QemuUIInfo *info) /* send event to guest */ virtio_gpu_notify_event(g, VIRTIO_GPU_EVENT_DISPLAY); - return 0; + return; } static void diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index cd4a56056f..20cc703dcc 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -217,6 +217,7 @@ virtio_gpu_generate_edid(VirtIOGPU *g, int scanout, .height_mm = b->req_state[scanout].height_mm, .prefx = b->req_state[scanout].width, .prefy = b->req_state[scanout].height, + .refresh_rate = b->req_state[scanout].refresh_rate, }; edid->size = cpu_to_le32(sizeof(edid->edid)); @@ -514,6 +515,9 @@ static void virtio_gpu_resource_flush(VirtIOGPU *g, for (i = 0; i < g->parent_obj.conf.max_outputs; i++) { scanout = &g->parent_obj.scanout[i]; if (scanout->resource_id == res->resource_id && + rf.r.x >= scanout->x && rf.r.y >= scanout->y && + rf.r.x + rf.r.width <= scanout->x + scanout->width && + rf.r.y + rf.r.height <= scanout->y + scanout->height && console_has_gl(scanout->con)) { dpy_gl_update(scanout->con, 0, 0, scanout->width, scanout->height); diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c index c206b5da38..4dcb34c4a7 100644 --- a/hw/display/virtio-vga.c +++ b/hw/display/virtio-vga.c @@ -47,15 +47,14 @@ static void virtio_vga_base_text_update(void *opaque, console_ch_t *chardata) } } -static int virtio_vga_base_ui_info(void *opaque, uint32_t idx, QemuUIInfo *info) +static void virtio_vga_base_ui_info(void *opaque, uint32_t idx, QemuUIInfo *info) { VirtIOVGABase *vvga = opaque; VirtIOGPUBase *g = vvga->vgpu; if (g->hw_ops->ui_info) { - return g->hw_ops->ui_info(g, idx, info); + g->hw_ops->ui_info(g, idx, info); } - return -1; } static void virtio_vga_base_gl_block(void *opaque, bool block) diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c index cea10fe3c7..50857cd97a 100644 --- a/hw/display/xenfb.c +++ b/hw/display/xenfb.c @@ -777,16 +777,24 @@ static void xenfb_update(void *opaque) xenfb->up_fullscreen = 0; } -static void xenfb_update_interval(void *opaque, uint64_t interval) +static void xenfb_ui_info(void *opaque, uint32_t idx, QemuUIInfo *info) { struct XenFB *xenfb = opaque; + uint32_t refresh_rate; if (xenfb->feature_update) { #ifdef XENFB_TYPE_REFRESH_PERIOD if (xenfb_queue_full(xenfb)) { return; } - xenfb_send_refresh_period(xenfb, interval); + + refresh_rate = info->refresh_rate; + if (!refresh_rate) { + refresh_rate = 75; + } + + /* T = 1 / f = 1 [s*Hz] / f = 1000*1000 [ms*mHz] / f */ + xenfb_send_refresh_period(xenfb, 1000 * 1000 / refresh_rate); #endif } } @@ -983,5 +991,5 @@ struct XenDevOps xen_framebuffer_ops = { static const GraphicHwOps xenfb_ops = { .invalidate = xenfb_invalidate, .gfx_update = xenfb_update, - .update_interval = xenfb_update_interval, + .ui_info = xenfb_ui_info, }; diff --git a/hw/pci-bridge/cxl_downstream.c b/hw/pci-bridge/cxl_downstream.c new file mode 100644 index 0000000000..a361e519d0 --- /dev/null +++ b/hw/pci-bridge/cxl_downstream.c @@ -0,0 +1,249 @@ +/* + * Emulated CXL Switch Downstream Port + * + * Copyright (c) 2022 Huawei Technologies. + * + * Based on xio3130_downstream.c + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "hw/pci/msi.h" +#include "hw/pci/pcie.h" +#include "hw/pci/pcie_port.h" +#include "qapi/error.h" + +typedef struct CXLDownStreamPort { + /*< private >*/ + PCIESlot parent_obj; + + /*< public >*/ + CXLComponentState cxl_cstate; +} CXLDownstreamPort; + +#define TYPE_CXL_DSP "cxl-downstream" +DECLARE_INSTANCE_CHECKER(CXLDownstreamPort, CXL_DSP, TYPE_CXL_DSP) + +#define CXL_DOWNSTREAM_PORT_MSI_OFFSET 0x70 +#define CXL_DOWNSTREAM_PORT_MSI_NR_VECTOR 1 +#define CXL_DOWNSTREAM_PORT_EXP_OFFSET 0x90 +#define CXL_DOWNSTREAM_PORT_AER_OFFSET 0x100 +#define CXL_DOWNSTREAM_PORT_DVSEC_OFFSET \ + (CXL_DOWNSTREAM_PORT_AER_OFFSET + PCI_ERR_SIZEOF) + +static void latch_registers(CXLDownstreamPort *dsp) +{ + uint32_t *reg_state = dsp->cxl_cstate.crb.cache_mem_registers; + uint32_t *write_msk = dsp->cxl_cstate.crb.cache_mem_regs_write_mask; + + cxl_component_register_init_common(reg_state, write_msk, + CXL2_DOWNSTREAM_PORT); +} + +/* TODO: Look at sharing this code acorss all CXL port types */ +static void cxl_dsp_dvsec_write_config(PCIDevice *dev, uint32_t addr, + uint32_t val, int len) +{ + CXLDownstreamPort *dsp = CXL_DSP(dev); + CXLComponentState *cxl_cstate = &dsp->cxl_cstate; + + if (range_contains(&cxl_cstate->dvsecs[EXTENSIONS_PORT_DVSEC], addr)) { + uint8_t *reg = &dev->config[addr]; + addr -= cxl_cstate->dvsecs[EXTENSIONS_PORT_DVSEC].lob; + if (addr == PORT_CONTROL_OFFSET) { + if (pci_get_word(reg) & PORT_CONTROL_UNMASK_SBR) { + /* unmask SBR */ + qemu_log_mask(LOG_UNIMP, "SBR mask control is not supported\n"); + } + if (pci_get_word(reg) & PORT_CONTROL_ALT_MEMID_EN) { + /* Alt Memory & ID Space Enable */ + qemu_log_mask(LOG_UNIMP, + "Alt Memory & ID space is not supported\n"); + + } + } + } +} + +static void cxl_dsp_config_write(PCIDevice *d, uint32_t address, + uint32_t val, int len) +{ + uint16_t slt_ctl, slt_sta; + + pcie_cap_slot_get(d, &slt_ctl, &slt_sta); + pci_bridge_write_config(d, address, val, len); + pcie_cap_flr_write_config(d, address, val, len); + pcie_cap_slot_write_config(d, slt_ctl, slt_sta, address, val, len); + pcie_aer_write_config(d, address, val, len); + + cxl_dsp_dvsec_write_config(d, address, val, len); +} + +static void cxl_dsp_reset(DeviceState *qdev) +{ + PCIDevice *d = PCI_DEVICE(qdev); + CXLDownstreamPort *dsp = CXL_DSP(qdev); + + pcie_cap_deverr_reset(d); + pcie_cap_slot_reset(d); + pcie_cap_arifwd_reset(d); + pci_bridge_reset(qdev); + + latch_registers(dsp); +} + +static void build_dvsecs(CXLComponentState *cxl) +{ + uint8_t *dvsec; + + dvsec = (uint8_t *)&(CXLDVSECPortExtensions){ 0 }; + cxl_component_create_dvsec(cxl, CXL2_DOWNSTREAM_PORT, + EXTENSIONS_PORT_DVSEC_LENGTH, + EXTENSIONS_PORT_DVSEC, + EXTENSIONS_PORT_DVSEC_REVID, dvsec); + + dvsec = (uint8_t *)&(CXLDVSECPortFlexBus){ + .cap = 0x27, /* Cache, IO, Mem, non-MLD */ + .ctrl = 0x02, /* IO always enabled */ + .status = 0x26, /* same */ + .rcvd_mod_ts_data_phase1 = 0xef, /* WTF? */ + }; + cxl_component_create_dvsec(cxl, CXL2_DOWNSTREAM_PORT, + PCIE_FLEXBUS_PORT_DVSEC_LENGTH_2_0, + PCIE_FLEXBUS_PORT_DVSEC, + PCIE_FLEXBUS_PORT_DVSEC_REVID_2_0, dvsec); + + dvsec = (uint8_t *)&(CXLDVSECPortGPF){ + .rsvd = 0, + .phase1_ctrl = 1, /* 1μs timeout */ + .phase2_ctrl = 1, /* 1μs timeout */ + }; + cxl_component_create_dvsec(cxl, CXL2_DOWNSTREAM_PORT, + GPF_PORT_DVSEC_LENGTH, GPF_PORT_DVSEC, + GPF_PORT_DVSEC_REVID, dvsec); + + dvsec = (uint8_t *)&(CXLDVSECRegisterLocator){ + .rsvd = 0, + .reg0_base_lo = RBI_COMPONENT_REG | CXL_COMPONENT_REG_BAR_IDX, + .reg0_base_hi = 0, + }; + cxl_component_create_dvsec(cxl, CXL2_DOWNSTREAM_PORT, + REG_LOC_DVSEC_LENGTH, REG_LOC_DVSEC, + REG_LOC_DVSEC_REVID, dvsec); +} + +static void cxl_dsp_realize(PCIDevice *d, Error **errp) +{ + PCIEPort *p = PCIE_PORT(d); + PCIESlot *s = PCIE_SLOT(d); + CXLDownstreamPort *dsp = CXL_DSP(d); + CXLComponentState *cxl_cstate = &dsp->cxl_cstate; + ComponentRegisters *cregs = &cxl_cstate->crb; + MemoryRegion *component_bar = &cregs->component_registers; + int rc; + + pci_bridge_initfn(d, TYPE_PCIE_BUS); + pcie_port_init_reg(d); + + rc = msi_init(d, CXL_DOWNSTREAM_PORT_MSI_OFFSET, + CXL_DOWNSTREAM_PORT_MSI_NR_VECTOR, + true, true, errp); + if (rc) { + assert(rc == -ENOTSUP); + goto err_bridge; + } + + rc = pcie_cap_init(d, CXL_DOWNSTREAM_PORT_EXP_OFFSET, + PCI_EXP_TYPE_DOWNSTREAM, p->port, + errp); + if (rc < 0) { + goto err_msi; + } + + pcie_cap_flr_init(d); + pcie_cap_deverr_init(d); + pcie_cap_slot_init(d, s); + pcie_cap_arifwd_init(d); + + pcie_chassis_create(s->chassis); + rc = pcie_chassis_add_slot(s); + if (rc < 0) { + error_setg(errp, "Can't add chassis slot, error %d", rc); + goto err_pcie_cap; + } + + rc = pcie_aer_init(d, PCI_ERR_VER, CXL_DOWNSTREAM_PORT_AER_OFFSET, + PCI_ERR_SIZEOF, errp); + if (rc < 0) { + goto err_chassis; + } + + cxl_cstate->dvsec_offset = CXL_DOWNSTREAM_PORT_DVSEC_OFFSET; + cxl_cstate->pdev = d; + build_dvsecs(cxl_cstate); + cxl_component_register_block_init(OBJECT(d), cxl_cstate, TYPE_CXL_DSP); + pci_register_bar(d, CXL_COMPONENT_REG_BAR_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64, + component_bar); + + return; + + err_chassis: + pcie_chassis_del_slot(s); + err_pcie_cap: + pcie_cap_exit(d); + err_msi: + msi_uninit(d); + err_bridge: + pci_bridge_exitfn(d); +} + +static void cxl_dsp_exitfn(PCIDevice *d) +{ + PCIESlot *s = PCIE_SLOT(d); + + pcie_aer_exit(d); + pcie_chassis_del_slot(s); + pcie_cap_exit(d); + msi_uninit(d); + pci_bridge_exitfn(d); +} + +static void cxl_dsp_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + PCIDeviceClass *k = PCI_DEVICE_CLASS(oc); + + k->is_bridge = true; + k->config_write = cxl_dsp_config_write; + k->realize = cxl_dsp_realize; + k->exit = cxl_dsp_exitfn; + k->vendor_id = 0x19e5; /* Huawei */ + k->device_id = 0xa129; /* Emulated CXL Switch Downstream Port */ + k->revision = 0; + set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); + dc->desc = "CXL Switch Downstream Port"; + dc->reset = cxl_dsp_reset; +} + +static const TypeInfo cxl_dsp_info = { + .name = TYPE_CXL_DSP, + .instance_size = sizeof(CXLDownstreamPort), + .parent = TYPE_PCIE_SLOT, + .class_init = cxl_dsp_class_init, + .interfaces = (InterfaceInfo[]) { + { INTERFACE_PCIE_DEVICE }, + { INTERFACE_CXL_DEVICE }, + { } + }, +}; + +static void cxl_dsp_register_type(void) +{ + type_register_static(&cxl_dsp_info); +} + +type_init(cxl_dsp_register_type); diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c new file mode 100644 index 0000000000..a83a3e81e4 --- /dev/null +++ b/hw/pci-bridge/cxl_upstream.c @@ -0,0 +1,216 @@ +/* + * Emulated CXL Switch Upstream Port + * + * Copyright (c) 2022 Huawei Technologies. + * + * Based on xio3130_upstream.c + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "hw/pci/msi.h" +#include "hw/pci/pcie.h" +#include "hw/pci/pcie_port.h" + +#define CXL_UPSTREAM_PORT_MSI_NR_VECTOR 1 + +#define CXL_UPSTREAM_PORT_MSI_OFFSET 0x70 +#define CXL_UPSTREAM_PORT_PCIE_CAP_OFFSET 0x90 +#define CXL_UPSTREAM_PORT_AER_OFFSET 0x100 +#define CXL_UPSTREAM_PORT_DVSEC_OFFSET \ + (CXL_UPSTREAM_PORT_AER_OFFSET + PCI_ERR_SIZEOF) + +typedef struct CXLUpstreamPort { + /*< private >*/ + PCIEPort parent_obj; + + /*< public >*/ + CXLComponentState cxl_cstate; +} CXLUpstreamPort; + +CXLComponentState *cxl_usp_to_cstate(CXLUpstreamPort *usp) +{ + return &usp->cxl_cstate; +} + +static void cxl_usp_dvsec_write_config(PCIDevice *dev, uint32_t addr, + uint32_t val, int len) +{ + CXLUpstreamPort *usp = CXL_USP(dev); + + if (range_contains(&usp->cxl_cstate.dvsecs[EXTENSIONS_PORT_DVSEC], addr)) { + uint8_t *reg = &dev->config[addr]; + addr -= usp->cxl_cstate.dvsecs[EXTENSIONS_PORT_DVSEC].lob; + if (addr == PORT_CONTROL_OFFSET) { + if (pci_get_word(reg) & PORT_CONTROL_UNMASK_SBR) { + /* unmask SBR */ + qemu_log_mask(LOG_UNIMP, "SBR mask control is not supported\n"); + } + if (pci_get_word(reg) & PORT_CONTROL_ALT_MEMID_EN) { + /* Alt Memory & ID Space Enable */ + qemu_log_mask(LOG_UNIMP, + "Alt Memory & ID space is not supported\n"); + } + } + } +} + +static void cxl_usp_write_config(PCIDevice *d, uint32_t address, + uint32_t val, int len) +{ + pci_bridge_write_config(d, address, val, len); + pcie_cap_flr_write_config(d, address, val, len); + pcie_aer_write_config(d, address, val, len); + + cxl_usp_dvsec_write_config(d, address, val, len); +} + +static void latch_registers(CXLUpstreamPort *usp) +{ + uint32_t *reg_state = usp->cxl_cstate.crb.cache_mem_registers; + uint32_t *write_msk = usp->cxl_cstate.crb.cache_mem_regs_write_mask; + + cxl_component_register_init_common(reg_state, write_msk, + CXL2_UPSTREAM_PORT); + ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, 8); +} + +static void cxl_usp_reset(DeviceState *qdev) +{ + PCIDevice *d = PCI_DEVICE(qdev); + CXLUpstreamPort *usp = CXL_USP(qdev); + + pci_bridge_reset(qdev); + pcie_cap_deverr_reset(d); + latch_registers(usp); +} + +static void build_dvsecs(CXLComponentState *cxl) +{ + uint8_t *dvsec; + + dvsec = (uint8_t *)&(CXLDVSECPortExtensions){ + .status = 0x1, /* Port Power Management Init Complete */ + }; + cxl_component_create_dvsec(cxl, CXL2_UPSTREAM_PORT, + EXTENSIONS_PORT_DVSEC_LENGTH, + EXTENSIONS_PORT_DVSEC, + EXTENSIONS_PORT_DVSEC_REVID, dvsec); + dvsec = (uint8_t *)&(CXLDVSECPortFlexBus){ + .cap = 0x27, /* Cache, IO, Mem, non-MLD */ + .ctrl = 0x27, /* Cache, IO, Mem */ + .status = 0x26, /* same */ + .rcvd_mod_ts_data_phase1 = 0xef, /* WTF? */ + }; + cxl_component_create_dvsec(cxl, CXL2_UPSTREAM_PORT, + PCIE_FLEXBUS_PORT_DVSEC_LENGTH_2_0, + PCIE_FLEXBUS_PORT_DVSEC, + PCIE_FLEXBUS_PORT_DVSEC_REVID_2_0, dvsec); + + dvsec = (uint8_t *)&(CXLDVSECRegisterLocator){ + .rsvd = 0, + .reg0_base_lo = RBI_COMPONENT_REG | CXL_COMPONENT_REG_BAR_IDX, + .reg0_base_hi = 0, + }; + cxl_component_create_dvsec(cxl, CXL2_UPSTREAM_PORT, + REG_LOC_DVSEC_LENGTH, REG_LOC_DVSEC, + REG_LOC_DVSEC_REVID, dvsec); +} + +static void cxl_usp_realize(PCIDevice *d, Error **errp) +{ + PCIEPort *p = PCIE_PORT(d); + CXLUpstreamPort *usp = CXL_USP(d); + CXLComponentState *cxl_cstate = &usp->cxl_cstate; + ComponentRegisters *cregs = &cxl_cstate->crb; + MemoryRegion *component_bar = &cregs->component_registers; + int rc; + + pci_bridge_initfn(d, TYPE_PCIE_BUS); + pcie_port_init_reg(d); + + rc = msi_init(d, CXL_UPSTREAM_PORT_MSI_OFFSET, + CXL_UPSTREAM_PORT_MSI_NR_VECTOR, true, true, errp); + if (rc) { + assert(rc == -ENOTSUP); + goto err_bridge; + } + + rc = pcie_cap_init(d, CXL_UPSTREAM_PORT_PCIE_CAP_OFFSET, + PCI_EXP_TYPE_UPSTREAM, p->port, errp); + if (rc < 0) { + goto err_msi; + } + + pcie_cap_flr_init(d); + pcie_cap_deverr_init(d); + rc = pcie_aer_init(d, PCI_ERR_VER, CXL_UPSTREAM_PORT_AER_OFFSET, + PCI_ERR_SIZEOF, errp); + if (rc) { + goto err_cap; + } + + cxl_cstate->dvsec_offset = CXL_UPSTREAM_PORT_DVSEC_OFFSET; + cxl_cstate->pdev = d; + build_dvsecs(cxl_cstate); + cxl_component_register_block_init(OBJECT(d), cxl_cstate, TYPE_CXL_USP); + pci_register_bar(d, CXL_COMPONENT_REG_BAR_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64, + component_bar); + + return; + +err_cap: + pcie_cap_exit(d); +err_msi: + msi_uninit(d); +err_bridge: + pci_bridge_exitfn(d); +} + +static void cxl_usp_exitfn(PCIDevice *d) +{ + pcie_aer_exit(d); + pcie_cap_exit(d); + msi_uninit(d); + pci_bridge_exitfn(d); +} + +static void cxl_upstream_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + PCIDeviceClass *k = PCI_DEVICE_CLASS(oc); + + k->is_bridge = true; + k->config_write = cxl_usp_write_config; + k->realize = cxl_usp_realize; + k->exit = cxl_usp_exitfn; + k->vendor_id = 0x19e5; /* Huawei */ + k->device_id = 0xa128; /* Emulated CXL Switch Upstream Port */ + k->revision = 0; + set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); + dc->desc = "CXL Switch Upstream Port"; + dc->reset = cxl_usp_reset; +} + +static const TypeInfo cxl_usp_info = { + .name = TYPE_CXL_USP, + .parent = TYPE_PCIE_PORT, + .instance_size = sizeof(CXLUpstreamPort), + .class_init = cxl_upstream_class_init, + .interfaces = (InterfaceInfo[]) { + { INTERFACE_PCIE_DEVICE }, + { INTERFACE_CXL_DEVICE }, + { } + }, +}; + +static void cxl_usp_register_type(void) +{ + type_register_static(&cxl_usp_info); +} + +type_init(cxl_usp_register_type); diff --git a/hw/pci-bridge/meson.build b/hw/pci-bridge/meson.build index fdbe2e07c5..243ceeda50 100644 --- a/hw/pci-bridge/meson.build +++ b/hw/pci-bridge/meson.build @@ -6,7 +6,7 @@ pci_ss.add(when: 'CONFIG_PCIE_PORT', if_true: files('pcie_root_port.c', 'gen_pci pci_ss.add(when: 'CONFIG_PXB', if_true: files('pci_expander_bridge.c'), if_false: files('pci_expander_bridge_stubs.c')) pci_ss.add(when: 'CONFIG_XIO3130', if_true: files('xio3130_upstream.c', 'xio3130_downstream.c')) -pci_ss.add(when: 'CONFIG_CXL', if_true: files('cxl_root_port.c')) +pci_ss.add(when: 'CONFIG_CXL', if_true: files('cxl_root_port.c', 'cxl_upstream.c', 'cxl_downstream.c')) # NewWorld PowerMac pci_ss.add(when: 'CONFIG_DEC_PCI', if_true: files('dec.c')) diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index ab5a47aff5..20da121374 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -379,7 +379,8 @@ static void mch_update_smram(MCHPCIState *mch) memory_region_set_enabled(&mch->high_smram, false); } - if (pd->config[MCH_HOST_BRIDGE_ESMRAMC] & MCH_HOST_BRIDGE_ESMRAMC_T_EN) { + if ((pd->config[MCH_HOST_BRIDGE_ESMRAMC] & MCH_HOST_BRIDGE_ESMRAMC_T_EN) && + (pd->config[MCH_HOST_BRIDGE_SMRAM] & SMRAM_G_SMRAME)) { switch (pd->config[MCH_HOST_BRIDGE_ESMRAMC] & MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_MASK) { case MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_1MB: diff --git a/hw/pci/msi.c b/hw/pci/msi.c index 47d2b0f33c..5c471b9616 100644 --- a/hw/pci/msi.c +++ b/hw/pci/msi.c @@ -134,7 +134,7 @@ void msi_set_message(PCIDevice *dev, MSIMessage msg) pci_set_word(dev->config + msi_data_off(dev, msi64bit), msg.data); } -MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector) +static MSIMessage msi_prepare_message(PCIDevice *dev, unsigned int vector) { uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev)); bool msi64bit = flags & PCI_MSI_FLAGS_64BIT; @@ -159,6 +159,11 @@ MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector) return msg; } +MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector) +{ + return dev->msi_prepare_message(dev, vector); +} + bool msi_enabled(const PCIDevice *dev) { return msi_present(dev) && @@ -241,6 +246,8 @@ int msi_init(struct PCIDevice *dev, uint8_t offset, 0xffffffff >> (PCI_MSI_VECTORS_MAX - nr_vectors)); } + dev->msi_prepare_message = msi_prepare_message; + return 0; } @@ -256,6 +263,7 @@ void msi_uninit(struct PCIDevice *dev) cap_size = msi_cap_sizeof(flags); pci_del_capability(dev, PCI_CAP_ID_MSI, cap_size); dev->cap_present &= ~QEMU_PCI_CAP_MSI; + dev->msi_prepare_message = NULL; MSI_DEV_PRINTF(dev, "uninit\n"); } @@ -307,6 +315,39 @@ bool msi_is_masked(const PCIDevice *dev, unsigned int vector) return mask & (1U << vector); } +void msi_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp) +{ + ERRP_GUARD(); + uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev)); + bool msi64bit = flags & PCI_MSI_FLAGS_64BIT; + uint32_t irq_state, vector_mask, pending; + + if (vector > PCI_MSI_VECTORS_MAX) { + error_setg(errp, "msi: vector %d not allocated. max vector is %d", + vector, PCI_MSI_VECTORS_MAX); + return; + } + + vector_mask = (1U << vector); + + irq_state = pci_get_long(dev->config + msi_mask_off(dev, msi64bit)); + + if (mask) { + irq_state |= vector_mask; + } else { + irq_state &= ~vector_mask; + } + + pci_set_long(dev->config + msi_mask_off(dev, msi64bit), irq_state); + + pending = pci_get_long(dev->config + msi_pending_off(dev, msi64bit)); + if (!mask && (pending & vector_mask)) { + pending &= ~vector_mask; + pci_set_long(dev->config + msi_pending_off(dev, msi64bit), pending); + msi_notify(dev, vector); + } +} + void msi_notify(PCIDevice *dev, unsigned int vector) { uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev)); @@ -334,11 +375,7 @@ void msi_notify(PCIDevice *dev, unsigned int vector) void msi_send_message(PCIDevice *dev, MSIMessage msg) { - MemTxAttrs attrs = {}; - - attrs.requester_id = pci_requester_id(dev); - address_space_stl_le(&dev->bus_master_as, msg.address, msg.data, - attrs, NULL); + dev->msi_trigger(dev, msg); } /* Normally called by pci_default_write_config(). */ diff --git a/hw/pci/msix.c b/hw/pci/msix.c index ae9331cd0b..1e381a9813 100644 --- a/hw/pci/msix.c +++ b/hw/pci/msix.c @@ -31,7 +31,7 @@ #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8) #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8) -MSIMessage msix_get_message(PCIDevice *dev, unsigned vector) +static MSIMessage msix_prepare_message(PCIDevice *dev, unsigned vector) { uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE; MSIMessage msg; @@ -41,6 +41,11 @@ MSIMessage msix_get_message(PCIDevice *dev, unsigned vector) return msg; } +MSIMessage msix_get_message(PCIDevice *dev, unsigned vector) +{ + return dev->msix_prepare_message(dev, vector); +} + /* * Special API for POWER to configure the vectors through * a side channel. Should never be used by devices. @@ -131,6 +136,31 @@ static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked) } } +void msix_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp) +{ + ERRP_GUARD(); + unsigned offset; + bool was_masked; + + if (vector > dev->msix_entries_nr) { + error_setg(errp, "msix: vector %d not allocated. max vector is %d", + vector, dev->msix_entries_nr); + return; + } + + offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; + + was_masked = msix_is_masked(dev, vector); + + if (mask) { + dev->msix_table[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT; + } else { + dev->msix_table[offset] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT; + } + + msix_handle_mask_update(dev, vector, was_masked); +} + static bool msix_masked(PCIDevice *dev) { return dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK; @@ -344,6 +374,8 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries, "msix-pba", pba_size); memory_region_add_subregion(pba_bar, pba_offset, &dev->msix_pba_mmio); + dev->msix_prepare_message = msix_prepare_message; + return 0; } @@ -429,6 +461,7 @@ void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar) g_free(dev->msix_entry_used); dev->msix_entry_used = NULL; dev->cap_present &= ~QEMU_PCI_CAP_MSIX; + dev->msix_prepare_message = NULL; } void msix_uninit_exclusive_bar(PCIDevice *dev) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 6e7015329c..2f450f6a72 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -317,6 +317,15 @@ void pci_device_deassert_intx(PCIDevice *dev) } } +static void pci_msi_trigger(PCIDevice *dev, MSIMessage msg) +{ + MemTxAttrs attrs = {}; + + attrs.requester_id = pci_requester_id(dev); + address_space_stl_le(&dev->bus_master_as, msg.address, msg.data, + attrs, NULL); +} + static void pci_reset_regions(PCIDevice *dev) { int r; @@ -1212,6 +1221,8 @@ static void pci_qdev_unrealize(DeviceState *dev) pci_device_deassert_intx(pci_dev); do_pci_unregister_device(pci_dev); + + pci_dev->msi_trigger = NULL; } void pci_register_bar(PCIDevice *pci_dev, int region_num, @@ -2251,6 +2262,8 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) } pci_set_power(pci_dev, true); + + pci_dev->msi_trigger = pci_msi_trigger; } PCIDevice *pci_new_multifunction(int devfn, bool multifunction, diff --git a/hw/remote/Kconfig b/hw/remote/Kconfig index 08c16e235f..2d6b4f4cf4 100644 --- a/hw/remote/Kconfig +++ b/hw/remote/Kconfig @@ -2,3 +2,7 @@ config MULTIPROCESS bool depends on PCI && PCI_EXPRESS && KVM select REMOTE_PCIHOST + +config VFIO_USER_SERVER + bool + depends on MULTIPROCESS diff --git a/hw/remote/iommu.c b/hw/remote/iommu.c new file mode 100644 index 0000000000..fd723d91f3 --- /dev/null +++ b/hw/remote/iommu.c @@ -0,0 +1,131 @@ +/** + * IOMMU for remote device + * + * Copyright © 2022 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" + +#include "hw/remote/iommu.h" +#include "hw/pci/pci_bus.h" +#include "hw/pci/pci.h" +#include "exec/memory.h" +#include "exec/address-spaces.h" +#include "trace.h" + +/** + * IOMMU for TYPE_REMOTE_MACHINE - manages DMA address space isolation + * for remote machine. It is used by TYPE_VFIO_USER_SERVER. + * + * - Each TYPE_VFIO_USER_SERVER instance handles one PCIDevice on a PCIBus. + * There is one RemoteIommu per PCIBus, so the RemoteIommu tracks multiple + * PCIDevices by maintaining a ->elem_by_devfn mapping. + * + * - memory_region_init_iommu() is not used because vfio-user MemoryRegions + * will be added to the elem->mr container instead. This is more natural + * than implementing the IOMMUMemoryRegionClass APIs since vfio-user + * provides something that is close to a full-fledged MemoryRegion and + * not like an IOMMU mapping. + * + * - When a device is hot unplugged, the elem->mr reference is dropped so + * all vfio-user MemoryRegions associated with this vfio-user server are + * destroyed. + */ + +static AddressSpace *remote_iommu_find_add_as(PCIBus *pci_bus, + void *opaque, int devfn) +{ + RemoteIommu *iommu = opaque; + RemoteIommuElem *elem = NULL; + + qemu_mutex_lock(&iommu->lock); + + elem = g_hash_table_lookup(iommu->elem_by_devfn, INT2VOIDP(devfn)); + + if (!elem) { + elem = g_malloc0(sizeof(RemoteIommuElem)); + g_hash_table_insert(iommu->elem_by_devfn, INT2VOIDP(devfn), elem); + } + + if (!elem->mr) { + elem->mr = MEMORY_REGION(object_new(TYPE_MEMORY_REGION)); + memory_region_set_size(elem->mr, UINT64_MAX); + address_space_init(&elem->as, elem->mr, NULL); + } + + qemu_mutex_unlock(&iommu->lock); + + return &elem->as; +} + +void remote_iommu_unplug_dev(PCIDevice *pci_dev) +{ + AddressSpace *as = pci_device_iommu_address_space(pci_dev); + RemoteIommuElem *elem = NULL; + + if (as == &address_space_memory) { + return; + } + + elem = container_of(as, RemoteIommuElem, as); + + address_space_destroy(&elem->as); + + object_unref(elem->mr); + + elem->mr = NULL; +} + +static void remote_iommu_init(Object *obj) +{ + RemoteIommu *iommu = REMOTE_IOMMU(obj); + + iommu->elem_by_devfn = g_hash_table_new_full(NULL, NULL, NULL, g_free); + + qemu_mutex_init(&iommu->lock); +} + +static void remote_iommu_finalize(Object *obj) +{ + RemoteIommu *iommu = REMOTE_IOMMU(obj); + + qemu_mutex_destroy(&iommu->lock); + + g_hash_table_destroy(iommu->elem_by_devfn); + + iommu->elem_by_devfn = NULL; +} + +void remote_iommu_setup(PCIBus *pci_bus) +{ + RemoteIommu *iommu = NULL; + + g_assert(pci_bus); + + iommu = REMOTE_IOMMU(object_new(TYPE_REMOTE_IOMMU)); + + pci_setup_iommu(pci_bus, remote_iommu_find_add_as, iommu); + + object_property_add_child(OBJECT(pci_bus), "remote-iommu", OBJECT(iommu)); + + object_unref(OBJECT(iommu)); +} + +static const TypeInfo remote_iommu_info = { + .name = TYPE_REMOTE_IOMMU, + .parent = TYPE_OBJECT, + .instance_size = sizeof(RemoteIommu), + .instance_init = remote_iommu_init, + .instance_finalize = remote_iommu_finalize, +}; + +static void remote_iommu_register_types(void) +{ + type_register_static(&remote_iommu_info); +} + +type_init(remote_iommu_register_types) diff --git a/hw/remote/machine.c b/hw/remote/machine.c index 92d71d47bb..75d550daae 100644 --- a/hw/remote/machine.c +++ b/hw/remote/machine.c @@ -20,6 +20,11 @@ #include "qapi/error.h" #include "hw/pci/pci_host.h" #include "hw/remote/iohub.h" +#include "hw/remote/iommu.h" +#include "hw/qdev-core.h" +#include "hw/remote/iommu.h" +#include "hw/remote/vfio-user-obj.h" +#include "hw/pci/msi.h" static void remote_machine_init(MachineState *machine) { @@ -49,25 +54,102 @@ static void remote_machine_init(MachineState *machine) pci_host = PCI_HOST_BRIDGE(rem_host); - remote_iohub_init(&s->iohub); + if (s->vfio_user) { + remote_iommu_setup(pci_host->bus); - pci_bus_irqs(pci_host->bus, remote_iohub_set_irq, remote_iohub_map_irq, - &s->iohub, REMOTE_IOHUB_NB_PIRQS); + msi_nonbroken = true; + + vfu_object_set_bus_irq(pci_host->bus); + } else { + remote_iohub_init(&s->iohub); + + pci_bus_irqs(pci_host->bus, remote_iohub_set_irq, remote_iohub_map_irq, + &s->iohub, REMOTE_IOHUB_NB_PIRQS); + } + + qbus_set_hotplug_handler(BUS(pci_host->bus), OBJECT(s)); +} + +static bool remote_machine_get_vfio_user(Object *obj, Error **errp) +{ + RemoteMachineState *s = REMOTE_MACHINE(obj); + + return s->vfio_user; +} + +static void remote_machine_set_vfio_user(Object *obj, bool value, Error **errp) +{ + RemoteMachineState *s = REMOTE_MACHINE(obj); + + if (phase_check(PHASE_MACHINE_CREATED)) { + error_setg(errp, "Error enabling vfio-user - machine already created"); + return; + } + + s->vfio_user = value; +} + +static bool remote_machine_get_auto_shutdown(Object *obj, Error **errp) +{ + RemoteMachineState *s = REMOTE_MACHINE(obj); + + return s->auto_shutdown; +} + +static void remote_machine_set_auto_shutdown(Object *obj, bool value, + Error **errp) +{ + RemoteMachineState *s = REMOTE_MACHINE(obj); + + s->auto_shutdown = value; +} + +static void remote_machine_instance_init(Object *obj) +{ + RemoteMachineState *s = REMOTE_MACHINE(obj); + + s->auto_shutdown = true; +} + +static void remote_machine_dev_unplug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + qdev_unrealize(dev); + + if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + remote_iommu_unplug_dev(PCI_DEVICE(dev)); + } } static void remote_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); mc->init = remote_machine_init; mc->desc = "Experimental remote machine"; + + hc->unplug = remote_machine_dev_unplug_cb; + + object_class_property_add_bool(oc, "vfio-user", + remote_machine_get_vfio_user, + remote_machine_set_vfio_user); + + object_class_property_add_bool(oc, "auto-shutdown", + remote_machine_get_auto_shutdown, + remote_machine_set_auto_shutdown); } static const TypeInfo remote_machine = { .name = TYPE_REMOTE_MACHINE, .parent = TYPE_MACHINE, .instance_size = sizeof(RemoteMachineState), + .instance_init = remote_machine_instance_init, .class_init = remote_machine_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_HOTPLUG_HANDLER }, + { } + } }; static void remote_machine_register_types(void) diff --git a/hw/remote/meson.build b/hw/remote/meson.build index e6a5574242..ab25c04906 100644 --- a/hw/remote/meson.build +++ b/hw/remote/meson.build @@ -6,6 +6,10 @@ remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('message.c')) remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('remote-obj.c')) remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('proxy.c')) remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iohub.c')) +remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iommu.c')) +remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: files('vfio-user-obj.c')) + +remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: libvfio_user_dep) specific_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('memory.c')) specific_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('proxy-memory-listener.c')) diff --git a/hw/remote/trace-events b/hw/remote/trace-events index 0b23974f90..c167b3c7a5 100644 --- a/hw/remote/trace-events +++ b/hw/remote/trace-events @@ -2,3 +2,14 @@ mpqemu_send_io_error(int cmd, int size, int nfds) "send command %d size %d, %d file descriptors to remote process" mpqemu_recv_io_error(int cmd, int size, int nfds) "failed to receive %d size %d, %d file descriptors to remote process" + +# vfio-user-obj.c +vfu_prop(const char *prop, const char *val) "vfu: setting %s as %s" +vfu_cfg_read(uint32_t offset, uint32_t val) "vfu: cfg: 0x%u -> 0x%x" +vfu_cfg_write(uint32_t offset, uint32_t val) "vfu: cfg: 0x%u <- 0x%x" +vfu_dma_register(uint64_t gpa, size_t len) "vfu: registering GPA 0x%"PRIx64", %zu bytes" +vfu_dma_unregister(uint64_t gpa) "vfu: unregistering GPA 0x%"PRIx64"" +vfu_bar_register(int i, uint64_t addr, uint64_t size) "vfu: BAR %d: addr 0x%"PRIx64" size 0x%"PRIx64"" +vfu_bar_rw_enter(const char *op, uint64_t addr) "vfu: %s request for BAR address 0x%"PRIx64"" +vfu_bar_rw_exit(const char *op, uint64_t addr) "vfu: Finished %s of BAR address 0x%"PRIx64"" +vfu_interrupt(int pirq) "vfu: sending interrupt to device - PIRQ %d" diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c new file mode 100644 index 0000000000..c6cc53acf2 --- /dev/null +++ b/hw/remote/vfio-user-obj.c @@ -0,0 +1,958 @@ +/** + * QEMU vfio-user-server server object + * + * Copyright © 2022 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL-v2, version 2 or later. + * + * See the COPYING file in the top-level directory. + * + */ + +/** + * Usage: add options: + * -machine x-remote,vfio-user=on,auto-shutdown=on + * -device <PCI-device>,id=<pci-dev-id> + * -object x-vfio-user-server,id=<id>,type=unix,path=<socket-path>, + * device=<pci-dev-id> + * + * Note that x-vfio-user-server object must be used with x-remote machine only. + * This server could only support PCI devices for now. + * + * type - SocketAddress type - presently "unix" alone is supported. Required + * option + * + * path - named unix socket, it will be created by the server. It is + * a required option + * + * device - id of a device on the server, a required option. PCI devices + * alone are supported presently. + * + * notes - x-vfio-user-server could block IO and monitor during the + * initialization phase. + */ + +#include "qemu/osdep.h" + +#include "qom/object.h" +#include "qom/object_interfaces.h" +#include "qemu/error-report.h" +#include "trace.h" +#include "sysemu/runstate.h" +#include "hw/boards.h" +#include "hw/remote/machine.h" +#include "qapi/error.h" +#include "qapi/qapi-visit-sockets.h" +#include "qapi/qapi-events-misc.h" +#include "qemu/notify.h" +#include "qemu/thread.h" +#include "qemu/main-loop.h" +#include "sysemu/sysemu.h" +#include "libvfio-user.h" +#include "hw/qdev-core.h" +#include "hw/pci/pci.h" +#include "qemu/timer.h" +#include "exec/memory.h" +#include "hw/pci/msi.h" +#include "hw/pci/msix.h" +#include "hw/remote/vfio-user-obj.h" + +#define TYPE_VFU_OBJECT "x-vfio-user-server" +OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT) + +/** + * VFU_OBJECT_ERROR - reports an error message. If auto_shutdown + * is set, it aborts the machine on error. Otherwise, it logs an + * error message without aborting. + */ +#define VFU_OBJECT_ERROR(o, fmt, ...) \ + { \ + if (vfu_object_auto_shutdown()) { \ + error_setg(&error_abort, (fmt), ## __VA_ARGS__); \ + } else { \ + error_report((fmt), ## __VA_ARGS__); \ + } \ + } \ + +struct VfuObjectClass { + ObjectClass parent_class; + + unsigned int nr_devs; +}; + +struct VfuObject { + /* private */ + Object parent; + + SocketAddress *socket; + + char *device; + + Error *err; + + Notifier machine_done; + + vfu_ctx_t *vfu_ctx; + + PCIDevice *pci_dev; + + Error *unplug_blocker; + + int vfu_poll_fd; + + MSITriggerFunc *default_msi_trigger; + MSIPrepareMessageFunc *default_msi_prepare_message; + MSIxPrepareMessageFunc *default_msix_prepare_message; +}; + +static void vfu_object_init_ctx(VfuObject *o, Error **errp); + +static bool vfu_object_auto_shutdown(void) +{ + bool auto_shutdown = true; + Error *local_err = NULL; + + if (!current_machine) { + return auto_shutdown; + } + + auto_shutdown = object_property_get_bool(OBJECT(current_machine), + "auto-shutdown", + &local_err); + + /* + * local_err would be set if no such property exists - safe to ignore. + * Unlikely scenario as auto-shutdown is always defined for + * TYPE_REMOTE_MACHINE, and TYPE_VFU_OBJECT only works with + * TYPE_REMOTE_MACHINE + */ + if (local_err) { + auto_shutdown = true; + error_free(local_err); + } + + return auto_shutdown; +} + +static void vfu_object_set_socket(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + VfuObject *o = VFU_OBJECT(obj); + + if (o->vfu_ctx) { + error_setg(errp, "vfu: Unable to set socket property - server busy"); + return; + } + + qapi_free_SocketAddress(o->socket); + + o->socket = NULL; + + visit_type_SocketAddress(v, name, &o->socket, errp); + + if (o->socket->type != SOCKET_ADDRESS_TYPE_UNIX) { + error_setg(errp, "vfu: Unsupported socket type - %s", + SocketAddressType_str(o->socket->type)); + qapi_free_SocketAddress(o->socket); + o->socket = NULL; + return; + } + + trace_vfu_prop("socket", o->socket->u.q_unix.path); + + vfu_object_init_ctx(o, errp); +} + +static void vfu_object_set_device(Object *obj, const char *str, Error **errp) +{ + VfuObject *o = VFU_OBJECT(obj); + + if (o->vfu_ctx) { + error_setg(errp, "vfu: Unable to set device property - server busy"); + return; + } + + g_free(o->device); + + o->device = g_strdup(str); + + trace_vfu_prop("device", str); + + vfu_object_init_ctx(o, errp); +} + +static void vfu_object_ctx_run(void *opaque) +{ + VfuObject *o = opaque; + const char *vfu_id; + char *vfu_path, *pci_dev_path; + int ret = -1; + + while (ret != 0) { + ret = vfu_run_ctx(o->vfu_ctx); + if (ret < 0) { + if (errno == EINTR) { + continue; + } else if (errno == ENOTCONN) { + vfu_id = object_get_canonical_path_component(OBJECT(o)); + vfu_path = object_get_canonical_path(OBJECT(o)); + g_assert(o->pci_dev); + pci_dev_path = object_get_canonical_path(OBJECT(o->pci_dev)); + /* o->device is a required property and is non-NULL here */ + g_assert(o->device); + qapi_event_send_vfu_client_hangup(vfu_id, vfu_path, + o->device, pci_dev_path); + qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); + o->vfu_poll_fd = -1; + object_unparent(OBJECT(o)); + g_free(vfu_path); + g_free(pci_dev_path); + break; + } else { + VFU_OBJECT_ERROR(o, "vfu: Failed to run device %s - %s", + o->device, strerror(errno)); + break; + } + } + } +} + +static void vfu_object_attach_ctx(void *opaque) +{ + VfuObject *o = opaque; + GPollFD pfds[1]; + int ret; + + qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); + + pfds[0].fd = o->vfu_poll_fd; + pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR; + +retry_attach: + ret = vfu_attach_ctx(o->vfu_ctx); + if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) { + /** + * vfu_object_attach_ctx can block QEMU's main loop + * during attach - the monitor and other IO + * could be unresponsive during this time. + */ + (void)qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS); + goto retry_attach; + } else if (ret < 0) { + VFU_OBJECT_ERROR(o, "vfu: Failed to attach device %s to context - %s", + o->device, strerror(errno)); + return; + } + + o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx); + if (o->vfu_poll_fd < 0) { + VFU_OBJECT_ERROR(o, "vfu: Failed to get poll fd %s", o->device); + return; + } + + qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_ctx_run, NULL, o); +} + +static ssize_t vfu_object_cfg_access(vfu_ctx_t *vfu_ctx, char * const buf, + size_t count, loff_t offset, + const bool is_write) +{ + VfuObject *o = vfu_get_private(vfu_ctx); + uint32_t pci_access_width = sizeof(uint32_t); + size_t bytes = count; + uint32_t val = 0; + char *ptr = buf; + int len; + + /* + * Writes to the BAR registers would trigger an update to the + * global Memory and IO AddressSpaces. But the remote device + * never uses the global AddressSpaces, therefore overlapping + * memory regions are not a problem + */ + while (bytes > 0) { + len = (bytes > pci_access_width) ? pci_access_width : bytes; + if (is_write) { + memcpy(&val, ptr, len); + pci_host_config_write_common(o->pci_dev, offset, + pci_config_size(o->pci_dev), + val, len); + trace_vfu_cfg_write(offset, val); + } else { + val = pci_host_config_read_common(o->pci_dev, offset, + pci_config_size(o->pci_dev), len); + memcpy(ptr, &val, len); + trace_vfu_cfg_read(offset, val); + } + offset += len; + ptr += len; + bytes -= len; + } + + return count; +} + +static void dma_register(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) +{ + VfuObject *o = vfu_get_private(vfu_ctx); + AddressSpace *dma_as = NULL; + MemoryRegion *subregion = NULL; + g_autofree char *name = NULL; + struct iovec *iov = &info->iova; + + if (!info->vaddr) { + return; + } + + name = g_strdup_printf("mem-%s-%"PRIx64"", o->device, + (uint64_t)info->vaddr); + + subregion = g_new0(MemoryRegion, 1); + + memory_region_init_ram_ptr(subregion, NULL, name, + iov->iov_len, info->vaddr); + + dma_as = pci_device_iommu_address_space(o->pci_dev); + + memory_region_add_subregion(dma_as->root, (hwaddr)iov->iov_base, subregion); + + trace_vfu_dma_register((uint64_t)iov->iov_base, iov->iov_len); +} + +static void dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) +{ + VfuObject *o = vfu_get_private(vfu_ctx); + AddressSpace *dma_as = NULL; + MemoryRegion *mr = NULL; + ram_addr_t offset; + + mr = memory_region_from_host(info->vaddr, &offset); + if (!mr) { + return; + } + + dma_as = pci_device_iommu_address_space(o->pci_dev); + + memory_region_del_subregion(dma_as->root, mr); + + object_unparent((OBJECT(mr))); + + trace_vfu_dma_unregister((uint64_t)info->iova.iov_base); +} + +static int vfu_object_mr_rw(MemoryRegion *mr, uint8_t *buf, hwaddr offset, + hwaddr size, const bool is_write) +{ + uint8_t *ptr = buf; + bool release_lock = false; + uint8_t *ram_ptr = NULL; + MemTxResult result; + int access_size; + uint64_t val; + + if (memory_access_is_direct(mr, is_write)) { + /** + * Some devices expose a PCI expansion ROM, which could be buffer + * based as compared to other regions which are primarily based on + * MemoryRegionOps. memory_region_find() would already check + * for buffer overflow, we don't need to repeat it here. + */ + ram_ptr = memory_region_get_ram_ptr(mr); + + if (is_write) { + memcpy((ram_ptr + offset), buf, size); + } else { + memcpy(buf, (ram_ptr + offset), size); + } + + return 0; + } + + while (size) { + /** + * The read/write logic used below is similar to the ones in + * flatview_read/write_continue() + */ + release_lock = prepare_mmio_access(mr); + + access_size = memory_access_size(mr, size, offset); + + if (is_write) { + val = ldn_he_p(ptr, access_size); + + result = memory_region_dispatch_write(mr, offset, val, + size_memop(access_size), + MEMTXATTRS_UNSPECIFIED); + } else { + result = memory_region_dispatch_read(mr, offset, &val, + size_memop(access_size), + MEMTXATTRS_UNSPECIFIED); + + stn_he_p(ptr, access_size, val); + } + + if (release_lock) { + qemu_mutex_unlock_iothread(); + release_lock = false; + } + + if (result != MEMTX_OK) { + return -1; + } + + size -= access_size; + ptr += access_size; + offset += access_size; + } + + return 0; +} + +static size_t vfu_object_bar_rw(PCIDevice *pci_dev, int pci_bar, + hwaddr bar_offset, char * const buf, + hwaddr len, const bool is_write) +{ + MemoryRegionSection section = { 0 }; + uint8_t *ptr = (uint8_t *)buf; + MemoryRegion *section_mr = NULL; + uint64_t section_size; + hwaddr section_offset; + hwaddr size = 0; + + while (len) { + section = memory_region_find(pci_dev->io_regions[pci_bar].memory, + bar_offset, len); + + if (!section.mr) { + warn_report("vfu: invalid address 0x%"PRIx64"", bar_offset); + return size; + } + + section_mr = section.mr; + section_offset = section.offset_within_region; + section_size = int128_get64(section.size); + + if (is_write && section_mr->readonly) { + warn_report("vfu: attempting to write to readonly region in " + "bar %d - [0x%"PRIx64" - 0x%"PRIx64"]", + pci_bar, bar_offset, + (bar_offset + section_size)); + memory_region_unref(section_mr); + return size; + } + + if (vfu_object_mr_rw(section_mr, ptr, section_offset, + section_size, is_write)) { + warn_report("vfu: failed to %s " + "[0x%"PRIx64" - 0x%"PRIx64"] in bar %d", + is_write ? "write to" : "read from", bar_offset, + (bar_offset + section_size), pci_bar); + memory_region_unref(section_mr); + return size; + } + + size += section_size; + bar_offset += section_size; + ptr += section_size; + len -= section_size; + + memory_region_unref(section_mr); + } + + return size; +} + +/** + * VFU_OBJECT_BAR_HANDLER - macro for defining handlers for PCI BARs. + * + * To create handler for BAR number 2, VFU_OBJECT_BAR_HANDLER(2) would + * define vfu_object_bar2_handler + */ +#define VFU_OBJECT_BAR_HANDLER(BAR_NO) \ + static ssize_t vfu_object_bar##BAR_NO##_handler(vfu_ctx_t *vfu_ctx, \ + char * const buf, size_t count, \ + loff_t offset, const bool is_write) \ + { \ + VfuObject *o = vfu_get_private(vfu_ctx); \ + PCIDevice *pci_dev = o->pci_dev; \ + \ + return vfu_object_bar_rw(pci_dev, BAR_NO, offset, \ + buf, count, is_write); \ + } \ + +VFU_OBJECT_BAR_HANDLER(0) +VFU_OBJECT_BAR_HANDLER(1) +VFU_OBJECT_BAR_HANDLER(2) +VFU_OBJECT_BAR_HANDLER(3) +VFU_OBJECT_BAR_HANDLER(4) +VFU_OBJECT_BAR_HANDLER(5) +VFU_OBJECT_BAR_HANDLER(6) + +static vfu_region_access_cb_t *vfu_object_bar_handlers[PCI_NUM_REGIONS] = { + &vfu_object_bar0_handler, + &vfu_object_bar1_handler, + &vfu_object_bar2_handler, + &vfu_object_bar3_handler, + &vfu_object_bar4_handler, + &vfu_object_bar5_handler, + &vfu_object_bar6_handler, +}; + +/** + * vfu_object_register_bars - Identify active BAR regions of pdev and setup + * callbacks to handle read/write accesses + */ +static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx, PCIDevice *pdev) +{ + int flags = VFU_REGION_FLAG_RW; + int i; + + for (i = 0; i < PCI_NUM_REGIONS; i++) { + if (!pdev->io_regions[i].size) { + continue; + } + + if ((i == VFU_PCI_DEV_ROM_REGION_IDX) || + pdev->io_regions[i].memory->readonly) { + flags &= ~VFU_REGION_FLAG_WRITE; + } + + vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX + i, + (size_t)pdev->io_regions[i].size, + vfu_object_bar_handlers[i], + flags, NULL, 0, -1, 0); + + trace_vfu_bar_register(i, pdev->io_regions[i].addr, + pdev->io_regions[i].size); + } +} + +static int vfu_object_map_irq(PCIDevice *pci_dev, int intx) +{ + int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)), + pci_dev->devfn); + + return pci_bdf; +} + +static void vfu_object_set_irq(void *opaque, int pirq, int level) +{ + PCIBus *pci_bus = opaque; + PCIDevice *pci_dev = NULL; + vfu_ctx_t *vfu_ctx = NULL; + int pci_bus_num, devfn; + + if (level) { + pci_bus_num = PCI_BUS_NUM(pirq); + devfn = PCI_BDF_TO_DEVFN(pirq); + + /* + * pci_find_device() performs at O(1) if the device is attached + * to the root PCI bus. Whereas, if the device is attached to a + * secondary PCI bus (such as when a root port is involved), + * finding the parent PCI bus could take O(n) + */ + pci_dev = pci_find_device(pci_bus, pci_bus_num, devfn); + + vfu_ctx = pci_dev->irq_opaque; + + g_assert(vfu_ctx); + + vfu_irq_trigger(vfu_ctx, 0); + } +} + +static MSIMessage vfu_object_msi_prepare_msg(PCIDevice *pci_dev, + unsigned int vector) +{ + MSIMessage msg; + + msg.address = 0; + msg.data = vector; + + return msg; +} + +static void vfu_object_msi_trigger(PCIDevice *pci_dev, MSIMessage msg) +{ + vfu_ctx_t *vfu_ctx = pci_dev->irq_opaque; + + vfu_irq_trigger(vfu_ctx, msg.data); +} + +static void vfu_object_setup_msi_cbs(VfuObject *o) +{ + o->default_msi_trigger = o->pci_dev->msi_trigger; + o->default_msi_prepare_message = o->pci_dev->msi_prepare_message; + o->default_msix_prepare_message = o->pci_dev->msix_prepare_message; + + o->pci_dev->msi_trigger = vfu_object_msi_trigger; + o->pci_dev->msi_prepare_message = vfu_object_msi_prepare_msg; + o->pci_dev->msix_prepare_message = vfu_object_msi_prepare_msg; +} + +static void vfu_object_restore_msi_cbs(VfuObject *o) +{ + o->pci_dev->msi_trigger = o->default_msi_trigger; + o->pci_dev->msi_prepare_message = o->default_msi_prepare_message; + o->pci_dev->msix_prepare_message = o->default_msix_prepare_message; +} + +static void vfu_msix_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start, + uint32_t count, bool mask) +{ + VfuObject *o = vfu_get_private(vfu_ctx); + Error *err = NULL; + uint32_t vector; + + for (vector = start; vector < count; vector++) { + msix_set_mask(o->pci_dev, vector, mask, &err); + if (err) { + VFU_OBJECT_ERROR(o, "vfu: %s: %s", o->device, + error_get_pretty(err)); + error_free(err); + err = NULL; + } + } +} + +static void vfu_msi_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start, + uint32_t count, bool mask) +{ + VfuObject *o = vfu_get_private(vfu_ctx); + Error *err = NULL; + uint32_t vector; + + for (vector = start; vector < count; vector++) { + msi_set_mask(o->pci_dev, vector, mask, &err); + if (err) { + VFU_OBJECT_ERROR(o, "vfu: %s: %s", o->device, + error_get_pretty(err)); + error_free(err); + err = NULL; + } + } +} + +static int vfu_object_setup_irqs(VfuObject *o, PCIDevice *pci_dev) +{ + vfu_ctx_t *vfu_ctx = o->vfu_ctx; + int ret; + + ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1); + if (ret < 0) { + return ret; + } + + if (msix_nr_vectors_allocated(pci_dev)) { + ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ, + msix_nr_vectors_allocated(pci_dev)); + vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSIX_IRQ, + &vfu_msix_irq_state); + } else if (msi_nr_vectors_allocated(pci_dev)) { + ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSI_IRQ, + msi_nr_vectors_allocated(pci_dev)); + vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSI_IRQ, + &vfu_msi_irq_state); + } + + if (ret < 0) { + return ret; + } + + vfu_object_setup_msi_cbs(o); + + pci_dev->irq_opaque = vfu_ctx; + + return 0; +} + +void vfu_object_set_bus_irq(PCIBus *pci_bus) +{ + int bus_num = pci_bus_num(pci_bus); + int max_bdf = PCI_BUILD_BDF(bus_num, PCI_DEVFN_MAX - 1); + + pci_bus_irqs(pci_bus, vfu_object_set_irq, vfu_object_map_irq, pci_bus, + max_bdf); +} + +static int vfu_object_device_reset(vfu_ctx_t *vfu_ctx, vfu_reset_type_t type) +{ + VfuObject *o = vfu_get_private(vfu_ctx); + + /* vfu_object_ctx_run() handles lost connection */ + if (type == VFU_RESET_LOST_CONN) { + return 0; + } + + qdev_reset_all(DEVICE(o->pci_dev)); + + return 0; +} + +/* + * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device' + * properties. It also depends on devices instantiated in QEMU. These + * dependencies are not available during the instance_init phase of this + * object's life-cycle. As such, the server is initialized after the + * machine is setup. machine_init_done_notifier notifies TYPE_VFU_OBJECT + * when the machine is setup, and the dependencies are available. + */ +static void vfu_object_machine_done(Notifier *notifier, void *data) +{ + VfuObject *o = container_of(notifier, VfuObject, machine_done); + Error *err = NULL; + + vfu_object_init_ctx(o, &err); + + if (err) { + error_propagate(&error_abort, err); + } +} + +/** + * vfu_object_init_ctx: Create and initialize libvfio-user context. Add + * an unplug blocker for the associated PCI device. Setup a FD handler + * to process incoming messages in the context's socket. + * + * The socket and device properties are mandatory, and this function + * will not create the context without them - the setters for these + * properties should call this function when the property is set. The + * machine should also be ready when this function is invoked - it is + * because QEMU objects are initialized before devices, and the + * associated PCI device wouldn't be available at the object + * initialization time. Until these conditions are satisfied, this + * function would return early without performing any task. + */ +static void vfu_object_init_ctx(VfuObject *o, Error **errp) +{ + ERRP_GUARD(); + DeviceState *dev = NULL; + vfu_pci_type_t pci_type = VFU_PCI_TYPE_CONVENTIONAL; + int ret; + + if (o->vfu_ctx || !o->socket || !o->device || + !phase_check(PHASE_MACHINE_READY)) { + return; + } + + if (o->err) { + error_propagate(errp, o->err); + o->err = NULL; + return; + } + + o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket->u.q_unix.path, + LIBVFIO_USER_FLAG_ATTACH_NB, + o, VFU_DEV_TYPE_PCI); + if (o->vfu_ctx == NULL) { + error_setg(errp, "vfu: Failed to create context - %s", strerror(errno)); + return; + } + + dev = qdev_find_recursive(sysbus_get_default(), o->device); + if (dev == NULL) { + error_setg(errp, "vfu: Device %s not found", o->device); + goto fail; + } + + if (!object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + error_setg(errp, "vfu: %s not a PCI device", o->device); + goto fail; + } + + o->pci_dev = PCI_DEVICE(dev); + + object_ref(OBJECT(o->pci_dev)); + + if (pci_is_express(o->pci_dev)) { + pci_type = VFU_PCI_TYPE_EXPRESS; + } + + ret = vfu_pci_init(o->vfu_ctx, pci_type, PCI_HEADER_TYPE_NORMAL, 0); + if (ret < 0) { + error_setg(errp, + "vfu: Failed to attach PCI device %s to context - %s", + o->device, strerror(errno)); + goto fail; + } + + error_setg(&o->unplug_blocker, + "vfu: %s for %s must be deleted before unplugging", + TYPE_VFU_OBJECT, o->device); + qdev_add_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker); + + ret = vfu_setup_region(o->vfu_ctx, VFU_PCI_DEV_CFG_REGION_IDX, + pci_config_size(o->pci_dev), &vfu_object_cfg_access, + VFU_REGION_FLAG_RW | VFU_REGION_FLAG_ALWAYS_CB, + NULL, 0, -1, 0); + if (ret < 0) { + error_setg(errp, + "vfu: Failed to setup config space handlers for %s- %s", + o->device, strerror(errno)); + goto fail; + } + + ret = vfu_setup_device_dma(o->vfu_ctx, &dma_register, &dma_unregister); + if (ret < 0) { + error_setg(errp, "vfu: Failed to setup DMA handlers for %s", + o->device); + goto fail; + } + + vfu_object_register_bars(o->vfu_ctx, o->pci_dev); + + ret = vfu_object_setup_irqs(o, o->pci_dev); + if (ret < 0) { + error_setg(errp, "vfu: Failed to setup interrupts for %s", + o->device); + goto fail; + } + + ret = vfu_setup_device_reset_cb(o->vfu_ctx, &vfu_object_device_reset); + if (ret < 0) { + error_setg(errp, "vfu: Failed to setup reset callback"); + goto fail; + } + + ret = vfu_realize_ctx(o->vfu_ctx); + if (ret < 0) { + error_setg(errp, "vfu: Failed to realize device %s- %s", + o->device, strerror(errno)); + goto fail; + } + + o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx); + if (o->vfu_poll_fd < 0) { + error_setg(errp, "vfu: Failed to get poll fd %s", o->device); + goto fail; + } + + qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_attach_ctx, NULL, o); + + return; + +fail: + vfu_destroy_ctx(o->vfu_ctx); + if (o->unplug_blocker && o->pci_dev) { + qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker); + error_free(o->unplug_blocker); + o->unplug_blocker = NULL; + } + if (o->pci_dev) { + vfu_object_restore_msi_cbs(o); + o->pci_dev->irq_opaque = NULL; + object_unref(OBJECT(o->pci_dev)); + o->pci_dev = NULL; + } + o->vfu_ctx = NULL; +} + +static void vfu_object_init(Object *obj) +{ + VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj); + VfuObject *o = VFU_OBJECT(obj); + + k->nr_devs++; + + if (!object_dynamic_cast(OBJECT(current_machine), TYPE_REMOTE_MACHINE)) { + error_setg(&o->err, "vfu: %s only compatible with %s machine", + TYPE_VFU_OBJECT, TYPE_REMOTE_MACHINE); + return; + } + + if (!phase_check(PHASE_MACHINE_READY)) { + o->machine_done.notify = vfu_object_machine_done; + qemu_add_machine_init_done_notifier(&o->machine_done); + } + + o->vfu_poll_fd = -1; +} + +static void vfu_object_finalize(Object *obj) +{ + VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj); + VfuObject *o = VFU_OBJECT(obj); + + k->nr_devs--; + + qapi_free_SocketAddress(o->socket); + + o->socket = NULL; + + if (o->vfu_poll_fd != -1) { + qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); + o->vfu_poll_fd = -1; + } + + if (o->vfu_ctx) { + vfu_destroy_ctx(o->vfu_ctx); + o->vfu_ctx = NULL; + } + + g_free(o->device); + + o->device = NULL; + + if (o->unplug_blocker && o->pci_dev) { + qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker); + error_free(o->unplug_blocker); + o->unplug_blocker = NULL; + } + + if (o->pci_dev) { + vfu_object_restore_msi_cbs(o); + o->pci_dev->irq_opaque = NULL; + object_unref(OBJECT(o->pci_dev)); + o->pci_dev = NULL; + } + + if (!k->nr_devs && vfu_object_auto_shutdown()) { + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); + } + + if (o->machine_done.notify) { + qemu_remove_machine_init_done_notifier(&o->machine_done); + o->machine_done.notify = NULL; + } +} + +static void vfu_object_class_init(ObjectClass *klass, void *data) +{ + VfuObjectClass *k = VFU_OBJECT_CLASS(klass); + + k->nr_devs = 0; + + object_class_property_add(klass, "socket", "SocketAddress", NULL, + vfu_object_set_socket, NULL, NULL); + object_class_property_set_description(klass, "socket", + "SocketAddress " + "(ex: type=unix,path=/tmp/sock). " + "Only UNIX is presently supported"); + object_class_property_add_str(klass, "device", NULL, + vfu_object_set_device); + object_class_property_set_description(klass, "device", + "device ID - only PCI devices " + "are presently supported"); +} + +static const TypeInfo vfu_object_info = { + .name = TYPE_VFU_OBJECT, + .parent = TYPE_OBJECT, + .instance_size = sizeof(VfuObject), + .instance_init = vfu_object_init, + .instance_finalize = vfu_object_finalize, + .class_size = sizeof(VfuObjectClass), + .class_init = vfu_object_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + +static void vfu_register_types(void) +{ + type_register_static(&vfu_object_info); +} + +type_init(vfu_register_types); diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index 15b458527e..e33e5207ab 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -249,12 +249,11 @@ static int virtio_ccw_set_vqs(SubchDev *sch, VqInfoBlock *info, return 0; } -static void virtio_ccw_reset_virtio(VirtioCcwDevice *dev, VirtIODevice *vdev) +static void virtio_ccw_reset_virtio(VirtioCcwDevice *dev) { CcwDevice *ccw_dev = CCW_DEVICE(dev); - virtio_ccw_stop_ioeventfd(dev); - virtio_reset(vdev); + virtio_bus_reset(&dev->bus); if (dev->indicators) { release_indicator(&dev->routes.adapter, dev->indicators); dev->indicators = NULL; @@ -359,7 +358,7 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw) ret = virtio_ccw_handle_set_vq(sch, ccw, check_len, dev->revision < 1); break; case CCW_CMD_VDEV_RESET: - virtio_ccw_reset_virtio(dev, vdev); + virtio_ccw_reset_virtio(dev); ret = 0; break; case CCW_CMD_READ_FEAT: @@ -536,7 +535,7 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw) } if (virtio_set_status(vdev, status) == 0) { if (vdev->status == 0) { - virtio_ccw_reset_virtio(dev, vdev); + virtio_ccw_reset_virtio(dev); } if (status & VIRTIO_CONFIG_S_DRIVER_OK) { virtio_ccw_start_ioeventfd(dev); @@ -921,10 +920,9 @@ static void virtio_ccw_notify(DeviceState *d, uint16_t vector) static void virtio_ccw_reset(DeviceState *d) { VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d); - VirtIODevice *vdev = virtio_bus_get_device(&dev->bus); VirtIOCCWDeviceClass *vdc = VIRTIO_CCW_DEVICE_GET_CLASS(dev); - virtio_ccw_reset_virtio(dev, vdev); + virtio_ccw_reset_virtio(dev); if (vdc->parent_reset) { vdc->parent_reset(d); } diff --git a/hw/usb/Kconfig b/hw/usb/Kconfig index 53f8283ffd..ce4f433976 100644 --- a/hw/usb/Kconfig +++ b/hw/usb/Kconfig @@ -119,6 +119,11 @@ config USB_U2F default y depends on USB +config USB_CANOKEY + bool + default y + depends on USB + config IMX_USBPHY bool default y diff --git a/hw/usb/canokey.c b/hw/usb/canokey.c new file mode 100644 index 0000000000..4a08b1cbd7 --- /dev/null +++ b/hw/usb/canokey.c @@ -0,0 +1,313 @@ +/* + * CanoKey QEMU device implementation. + * + * Copyright (c) 2021-2022 Canokeys.org <contact@canokeys.org> + * Written by Hongren (Zenithal) Zheng <i@zenithal.me> + * + * This code is licensed under the Apache-2.0. + */ + +#include "qemu/osdep.h" +#include <canokey-qemu.h> + +#include "qemu/module.h" +#include "qapi/error.h" +#include "hw/usb.h" +#include "hw/qdev-properties.h" +#include "trace.h" +#include "desc.h" +#include "canokey.h" + +#define CANOKEY_EP_IN(ep) ((ep) & 0x7F) + +#define CANOKEY_VENDOR_NUM 0x20a0 +#define CANOKEY_PRODUCT_NUM 0x42d2 + +/* + * placeholder, canokey-qemu implements its own usb desc + * Namely we do not use usb_desc_handle_contorl + */ +enum { + STR_MANUFACTURER = 1, + STR_PRODUCT, + STR_SERIALNUMBER +}; + +static const USBDescStrings desc_strings = { + [STR_MANUFACTURER] = "canokeys.org", + [STR_PRODUCT] = "CanoKey QEMU", + [STR_SERIALNUMBER] = "0" +}; + +static const USBDescDevice desc_device_canokey = { + .bcdUSB = 0x0, + .bMaxPacketSize0 = 16, + .bNumConfigurations = 0, + .confs = NULL, +}; + +static const USBDesc desc_canokey = { + .id = { + .idVendor = CANOKEY_VENDOR_NUM, + .idProduct = CANOKEY_PRODUCT_NUM, + .bcdDevice = 0x0100, + .iManufacturer = STR_MANUFACTURER, + .iProduct = STR_PRODUCT, + .iSerialNumber = STR_SERIALNUMBER, + }, + .full = &desc_device_canokey, + .high = &desc_device_canokey, + .str = desc_strings, +}; + + +/* + * libcanokey-qemu.so side functions + * All functions are called from canokey_emu_device_loop + */ +int canokey_emu_stall_ep(void *base, uint8_t ep) +{ + trace_canokey_emu_stall_ep(ep); + CanoKeyState *key = base; + uint8_t ep_in = CANOKEY_EP_IN(ep); /* INTR IN has ep 129 */ + key->ep_in_size[ep_in] = 0; + key->ep_in_state[ep_in] = CANOKEY_EP_IN_STALL; + return 0; +} + +int canokey_emu_set_address(void *base, uint8_t addr) +{ + trace_canokey_emu_set_address(addr); + CanoKeyState *key = base; + key->dev.addr = addr; + return 0; +} + +int canokey_emu_prepare_receive( + void *base, uint8_t ep, uint8_t *pbuf, uint16_t size) +{ + trace_canokey_emu_prepare_receive(ep, size); + CanoKeyState *key = base; + key->ep_out[ep] = pbuf; + key->ep_out_size[ep] = size; + return 0; +} + +int canokey_emu_transmit( + void *base, uint8_t ep, const uint8_t *pbuf, uint16_t size) +{ + trace_canokey_emu_transmit(ep, size); + CanoKeyState *key = base; + uint8_t ep_in = CANOKEY_EP_IN(ep); /* INTR IN has ep 129 */ + memcpy(key->ep_in[ep_in] + key->ep_in_size[ep_in], + pbuf, size); + key->ep_in_size[ep_in] += size; + key->ep_in_state[ep_in] = CANOKEY_EP_IN_READY; + /* + * ready for more data in device loop + * + * Note: this is a quirk for CanoKey CTAPHID + * because it calls multiple emu_transmit in one device_loop + * but w/o data_in it would stuck in device_loop + * This has no side effect for CCID as it is strictly + * OUT then IN transfer + * However it has side effect for Control transfer + */ + if (ep_in != 0) { + canokey_emu_data_in(ep_in); + } + return 0; +} + +uint32_t canokey_emu_get_rx_data_size(void *base, uint8_t ep) +{ + CanoKeyState *key = base; + return key->ep_out_size[ep]; +} + +/* + * QEMU side functions + */ +static void canokey_handle_reset(USBDevice *dev) +{ + trace_canokey_handle_reset(); + CanoKeyState *key = CANOKEY(dev); + for (int i = 0; i != CANOKEY_EP_NUM; ++i) { + key->ep_in_state[i] = CANOKEY_EP_IN_WAIT; + key->ep_in_pos[i] = 0; + key->ep_in_size[i] = 0; + } + canokey_emu_reset(); +} + +static void canokey_handle_control(USBDevice *dev, USBPacket *p, + int request, int value, int index, int length, uint8_t *data) +{ + trace_canokey_handle_control_setup(request, value, index, length); + CanoKeyState *key = CANOKEY(dev); + + canokey_emu_setup(request, value, index, length); + + uint32_t dir_in = request & DeviceRequest; + if (!dir_in) { + /* OUT */ + trace_canokey_handle_control_out(); + if (key->ep_out[0] != NULL) { + memcpy(key->ep_out[0], data, length); + } + canokey_emu_data_out(p->ep->nr, data); + } + + canokey_emu_device_loop(); + + /* IN */ + switch (key->ep_in_state[0]) { + case CANOKEY_EP_IN_WAIT: + p->status = USB_RET_NAK; + break; + case CANOKEY_EP_IN_STALL: + p->status = USB_RET_STALL; + break; + case CANOKEY_EP_IN_READY: + memcpy(data, key->ep_in[0], key->ep_in_size[0]); + p->actual_length = key->ep_in_size[0]; + trace_canokey_handle_control_in(p->actual_length); + /* reset state */ + key->ep_in_state[0] = CANOKEY_EP_IN_WAIT; + key->ep_in_size[0] = 0; + key->ep_in_pos[0] = 0; + break; + } +} + +static void canokey_handle_data(USBDevice *dev, USBPacket *p) +{ + CanoKeyState *key = CANOKEY(dev); + + uint8_t ep_in = CANOKEY_EP_IN(p->ep->nr); + uint8_t ep_out = p->ep->nr; + uint32_t in_len; + uint32_t out_pos; + uint32_t out_len; + switch (p->pid) { + case USB_TOKEN_OUT: + trace_canokey_handle_data_out(ep_out, p->iov.size); + usb_packet_copy(p, key->ep_out_buffer[ep_out], p->iov.size); + out_pos = 0; + while (out_pos != p->iov.size) { + /* + * key->ep_out[ep_out] set by prepare_receive + * to be a buffer inside libcanokey-qemu.so + * key->ep_out_size[ep_out] set by prepare_receive + * to be the buffer length + */ + out_len = MIN(p->iov.size - out_pos, key->ep_out_size[ep_out]); + memcpy(key->ep_out[ep_out], + key->ep_out_buffer[ep_out] + out_pos, out_len); + out_pos += out_len; + /* update ep_out_size to actual len */ + key->ep_out_size[ep_out] = out_len; + canokey_emu_data_out(ep_out, NULL); + } + break; + case USB_TOKEN_IN: + if (key->ep_in_pos[ep_in] == 0) { /* first time IN */ + canokey_emu_data_in(ep_in); + canokey_emu_device_loop(); /* may call transmit multiple times */ + } + switch (key->ep_in_state[ep_in]) { + case CANOKEY_EP_IN_WAIT: + /* NAK for early INTR IN */ + p->status = USB_RET_NAK; + break; + case CANOKEY_EP_IN_STALL: + p->status = USB_RET_STALL; + break; + case CANOKEY_EP_IN_READY: + /* submit part of ep_in buffer to USBPacket */ + in_len = MIN(key->ep_in_size[ep_in] - key->ep_in_pos[ep_in], + p->iov.size); + usb_packet_copy(p, + key->ep_in[ep_in] + key->ep_in_pos[ep_in], in_len); + key->ep_in_pos[ep_in] += in_len; + /* reset state if all data submitted */ + if (key->ep_in_pos[ep_in] == key->ep_in_size[ep_in]) { + key->ep_in_state[ep_in] = CANOKEY_EP_IN_WAIT; + key->ep_in_size[ep_in] = 0; + key->ep_in_pos[ep_in] = 0; + } + trace_canokey_handle_data_in(ep_in, in_len); + break; + } + break; + default: + p->status = USB_RET_STALL; + break; + } +} + +static void canokey_realize(USBDevice *base, Error **errp) +{ + trace_canokey_realize(); + CanoKeyState *key = CANOKEY(base); + + if (key->file == NULL) { + error_setg(errp, "You must provide file=/path/to/canokey-file"); + return; + } + + usb_desc_init(base); + + for (int i = 0; i != CANOKEY_EP_NUM; ++i) { + key->ep_in_state[i] = CANOKEY_EP_IN_WAIT; + key->ep_in_size[i] = 0; + key->ep_in_pos[i] = 0; + } + + if (canokey_emu_init(key, key->file)) { + error_setg(errp, "canokey can not create or read %s", key->file); + return; + } +} + +static void canokey_unrealize(USBDevice *base) +{ + trace_canokey_unrealize(); +} + +static Property canokey_properties[] = { + DEFINE_PROP_STRING("file", CanoKeyState, file), + DEFINE_PROP_END_OF_LIST(), +}; + +static void canokey_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + USBDeviceClass *uc = USB_DEVICE_CLASS(klass); + + uc->product_desc = "CanoKey QEMU"; + uc->usb_desc = &desc_canokey; + uc->handle_reset = canokey_handle_reset; + uc->handle_control = canokey_handle_control; + uc->handle_data = canokey_handle_data; + uc->handle_attach = usb_desc_attach; + uc->realize = canokey_realize; + uc->unrealize = canokey_unrealize; + dc->desc = "CanoKey QEMU"; + device_class_set_props(dc, canokey_properties); + set_bit(DEVICE_CATEGORY_MISC, dc->categories); +} + +static const TypeInfo canokey_info = { + .name = TYPE_CANOKEY, + .parent = TYPE_USB_DEVICE, + .instance_size = sizeof(CanoKeyState), + .class_init = canokey_class_init +}; + +static void canokey_register_types(void) +{ + type_register_static(&canokey_info); +} + +type_init(canokey_register_types) diff --git a/hw/usb/canokey.h b/hw/usb/canokey.h new file mode 100644 index 0000000000..24cf304203 --- /dev/null +++ b/hw/usb/canokey.h @@ -0,0 +1,69 @@ +/* + * CanoKey QEMU device header. + * + * Copyright (c) 2021-2022 Canokeys.org <contact@canokeys.org> + * Written by Hongren (Zenithal) Zheng <i@zenithal.me> + * + * This code is licensed under the Apache-2.0. + */ + +#ifndef CANOKEY_H +#define CANOKEY_H + +#include "hw/qdev-core.h" + +#define TYPE_CANOKEY "canokey" +#define CANOKEY(obj) \ + OBJECT_CHECK(CanoKeyState, (obj), TYPE_CANOKEY) + +/* + * State of Canokey (i.e. hw/canokey.c) + */ + +/* CTRL INTR BULK */ +#define CANOKEY_EP_NUM 3 +/* BULK/INTR IN can be up to 1352 bytes, e.g. get key info */ +#define CANOKEY_EP_IN_BUFFER_SIZE 2048 +/* BULK OUT can be up to 270 bytes, e.g. PIV import cert */ +#define CANOKEY_EP_OUT_BUFFER_SIZE 512 + +typedef enum { + CANOKEY_EP_IN_WAIT, + CANOKEY_EP_IN_READY, + CANOKEY_EP_IN_STALL +} CanoKeyEPState; + +typedef struct CanoKeyState { + USBDevice dev; + + /* IN packets from canokey device loop */ + uint8_t ep_in[CANOKEY_EP_NUM][CANOKEY_EP_IN_BUFFER_SIZE]; + /* + * See canokey_emu_transmit + * + * For large INTR IN, receive multiple data from canokey device loop + * in this case ep_in_size would increase with every call + */ + uint32_t ep_in_size[CANOKEY_EP_NUM]; + /* + * Used in canokey_handle_data + * for IN larger than p->iov.size, we would do multiple handle_data() + * + * The difference between ep_in_pos and ep_in_size: + * We first increase ep_in_size to fill ep_in buffer in device_loop, + * then use ep_in_pos to submit data from ep_in buffer in handle_data + */ + uint32_t ep_in_pos[CANOKEY_EP_NUM]; + CanoKeyEPState ep_in_state[CANOKEY_EP_NUM]; + + /* OUT pointer to canokey recv buffer */ + uint8_t *ep_out[CANOKEY_EP_NUM]; + uint32_t ep_out_size[CANOKEY_EP_NUM]; + /* For large BULK OUT, multiple write to ep_out is needed */ + uint8_t ep_out_buffer[CANOKEY_EP_NUM][CANOKEY_EP_OUT_BUFFER_SIZE]; + + /* Properties */ + char *file; /* canokey-file */ +} CanoKeyState; + +#endif /* CANOKEY_H */ diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c index 33a8a377bd..d4da8dcb8d 100644 --- a/hw/usb/hcd-ehci.c +++ b/hw/usb/hcd-ehci.c @@ -2011,7 +2011,10 @@ static int ehci_state_writeback(EHCIQueue *q) ehci_trace_qtd(q, NLPTR_GET(p->qtdaddr), (EHCIqtd *) &q->qh.next_qtd); qtd = (uint32_t *) &q->qh.next_qtd; addr = NLPTR_GET(p->qtdaddr); - put_dwords(q->ehci, addr + 2 * sizeof(uint32_t), qtd + 2, 2); + /* First write back the offset */ + put_dwords(q->ehci, addr + 3 * sizeof(uint32_t), qtd + 3, 1); + /* Then write back the token, clearing the 'active' bit */ + put_dwords(q->ehci, addr + 2 * sizeof(uint32_t), qtd + 2, 1); ehci_free_packet(p); /* diff --git a/hw/usb/meson.build b/hw/usb/meson.build index de853d780d..793df42e21 100644 --- a/hw/usb/meson.build +++ b/hw/usb/meson.build @@ -63,6 +63,11 @@ if u2f.found() softmmu_ss.add(when: 'CONFIG_USB_U2F', if_true: [u2f, files('u2f-emulated.c')]) endif +# CanoKey +if canokey.found() + softmmu_ss.add(when: 'CONFIG_USB_CANOKEY', if_true: [canokey, files('canokey.c')]) +endif + # usb redirect if usbredir.found() usbredir_ss = ss.source_set() diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c index fd7df599bc..1bd30efc3e 100644 --- a/hw/usb/redirect.c +++ b/hw/usb/redirect.c @@ -1280,7 +1280,8 @@ static void usbredir_create_parser(USBRedirDevice *dev) } #endif - if (runstate_check(RUN_STATE_INMIGRATE)) { + if (runstate_check(RUN_STATE_INMIGRATE) || + runstate_check(RUN_STATE_PRELAUNCH)) { flags |= usbredirparser_fl_no_hello; } usbredirparser_init(dev->parser, VERSION, caps, USB_REDIR_CAPS_SIZE, diff --git a/hw/usb/trace-events b/hw/usb/trace-events index 9773cb5330..914ca71668 100644 --- a/hw/usb/trace-events +++ b/hw/usb/trace-events @@ -345,3 +345,19 @@ usb_serial_set_baud(int bus, int addr, int baud) "dev %d:%u baud rate %d" usb_serial_set_data(int bus, int addr, int parity, int data, int stop) "dev %d:%u parity %c, data bits %d, stop bits %d" usb_serial_set_flow_control(int bus, int addr, int index) "dev %d:%u flow control %d" usb_serial_set_xonxoff(int bus, int addr, uint8_t xon, uint8_t xoff) "dev %d:%u xon 0x%x xoff 0x%x" + +# canokey.c +canokey_emu_stall_ep(uint8_t ep) "ep %d" +canokey_emu_set_address(uint8_t addr) "addr %d" +canokey_emu_prepare_receive(uint8_t ep, uint16_t size) "ep %d size %d" +canokey_emu_transmit(uint8_t ep, uint16_t size) "ep %d size %d" +canokey_thread_start(void) +canokey_thread_stop(void) +canokey_handle_reset(void) +canokey_handle_control_setup(int request, int value, int index, int length) "request 0x%04X value 0x%04X index 0x%04X length 0x%04X" +canokey_handle_control_out(void) +canokey_handle_control_in(int actual_len) "len %d" +canokey_handle_data_out(uint8_t ep_out, uint32_t out_len) "ep %d len %d" +canokey_handle_data_in(uint8_t ep_in, uint32_t in_len) "ep %d len %d" +canokey_realize(void) +canokey_unrealize(void) diff --git a/hw/vfio/display.c b/hw/vfio/display.c index 89bc90508f..78f4d82c1c 100644 --- a/hw/vfio/display.c +++ b/hw/vfio/display.c @@ -106,14 +106,14 @@ err: return; } -static int vfio_display_edid_ui_info(void *opaque, uint32_t idx, - QemuUIInfo *info) +static void vfio_display_edid_ui_info(void *opaque, uint32_t idx, + QemuUIInfo *info) { VFIOPCIDevice *vdev = opaque; VFIODisplay *dpy = vdev->dpy; if (!dpy->edid_regs) { - return 0; + return; } if (info->width && info->height) { @@ -121,8 +121,6 @@ static int vfio_display_edid_ui_info(void *opaque, uint32_t idx, } else { vfio_display_edid_update(vdev, false, 0, 0); } - - return 0; } static void vfio_display_edid_init(VFIOPCIDevice *vdev) diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index ab8e095b73..20af2e7ebd 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -124,6 +124,7 @@ virtio_iommu_remap(const char *name, uint64_t virt_start, uint64_t virt_end, uin virtio_iommu_set_page_size_mask(const char *name, uint64_t old, uint64_t new) "mr=%s old_mask=0x%"PRIx64" new_mask=0x%"PRIx64 virtio_iommu_notify_flag_add(const char *name) "add notifier to mr %s" virtio_iommu_notify_flag_del(const char *name) "del notifier from mr %s" +virtio_iommu_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)" # virtio-mem.c virtio_mem_send_response(uint16_t type) "type=%" PRIu16 diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 0594178224..4b9be26e84 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -1525,7 +1525,7 @@ static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u, { VhostUserHostNotifier *n = NULL; if (idx >= u->notifiers->len) { - g_ptr_array_set_size(u->notifiers, idx); + g_ptr_array_set_size(u->notifiers, idx + 1); } n = g_ptr_array_index(u->notifiers, idx); diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index dd3263df56..6c41fa13e3 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -886,6 +886,10 @@ static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log) err_vq: for (; i >= 0; --i) { idx = dev->vhost_ops->vhost_get_vq_index(dev, dev->vq_index + i); + addr = virtio_queue_get_desc_addr(dev->vdev, idx); + if (!addr) { + continue; + } vhost_virtqueue_set_addr(dev, dev->vqs + i, idx, dev->log_enabled); } diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c index d7ec023adf..896feb37a1 100644 --- a/hw/virtio/virtio-bus.c +++ b/hw/virtio/virtio-bus.c @@ -104,6 +104,7 @@ void virtio_bus_reset(VirtioBusState *bus) VirtIODevice *vdev = virtio_bus_get_device(bus); DPRINTF("%s: reset device.\n", BUS(bus)->name); + virtio_bus_stop_ioeventfd(bus); if (vdev != NULL) { virtio_reset(vdev); } diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c index c3829e7498..c1243c3f93 100644 --- a/hw/virtio/virtio-crypto.c +++ b/hw/virtio/virtio-crypto.c @@ -83,7 +83,8 @@ virtio_crypto_create_sym_session(VirtIOCrypto *vcrypto, struct iovec *iov, unsigned int out_num) { VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto); - CryptoDevBackendSymSessionInfo info; + CryptoDevBackendSessionInfo info; + CryptoDevBackendSymSessionInfo *sym_info; int64_t session_id; int queue_index; uint32_t op_type; @@ -92,11 +93,13 @@ virtio_crypto_create_sym_session(VirtIOCrypto *vcrypto, memset(&info, 0, sizeof(info)); op_type = ldl_le_p(&sess_req->op_type); - info.op_type = op_type; info.op_code = opcode; + sym_info = &info.u.sym_sess_info; + sym_info->op_type = op_type; + if (op_type == VIRTIO_CRYPTO_SYM_OP_CIPHER) { - ret = virtio_crypto_cipher_session_helper(vdev, &info, + ret = virtio_crypto_cipher_session_helper(vdev, sym_info, &sess_req->u.cipher.para, &iov, &out_num); if (ret < 0) { @@ -105,47 +108,47 @@ virtio_crypto_create_sym_session(VirtIOCrypto *vcrypto, } else if (op_type == VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING) { size_t s; /* cipher part */ - ret = virtio_crypto_cipher_session_helper(vdev, &info, + ret = virtio_crypto_cipher_session_helper(vdev, sym_info, &sess_req->u.chain.para.cipher_param, &iov, &out_num); if (ret < 0) { goto err; } /* hash part */ - info.alg_chain_order = ldl_le_p( + sym_info->alg_chain_order = ldl_le_p( &sess_req->u.chain.para.alg_chain_order); - info.add_len = ldl_le_p(&sess_req->u.chain.para.aad_len); - info.hash_mode = ldl_le_p(&sess_req->u.chain.para.hash_mode); - if (info.hash_mode == VIRTIO_CRYPTO_SYM_HASH_MODE_AUTH) { - info.hash_alg = ldl_le_p(&sess_req->u.chain.para.u.mac_param.algo); - info.auth_key_len = ldl_le_p( + sym_info->add_len = ldl_le_p(&sess_req->u.chain.para.aad_len); + sym_info->hash_mode = ldl_le_p(&sess_req->u.chain.para.hash_mode); + if (sym_info->hash_mode == VIRTIO_CRYPTO_SYM_HASH_MODE_AUTH) { + sym_info->hash_alg = + ldl_le_p(&sess_req->u.chain.para.u.mac_param.algo); + sym_info->auth_key_len = ldl_le_p( &sess_req->u.chain.para.u.mac_param.auth_key_len); - info.hash_result_len = ldl_le_p( + sym_info->hash_result_len = ldl_le_p( &sess_req->u.chain.para.u.mac_param.hash_result_len); - if (info.auth_key_len > vcrypto->conf.max_auth_key_len) { + if (sym_info->auth_key_len > vcrypto->conf.max_auth_key_len) { error_report("virtio-crypto length of auth key is too big: %u", - info.auth_key_len); + sym_info->auth_key_len); ret = -VIRTIO_CRYPTO_ERR; goto err; } /* get auth key */ - if (info.auth_key_len > 0) { - DPRINTF("auth_keylen=%" PRIu32 "\n", info.auth_key_len); - info.auth_key = g_malloc(info.auth_key_len); - s = iov_to_buf(iov, out_num, 0, info.auth_key, - info.auth_key_len); - if (unlikely(s != info.auth_key_len)) { + if (sym_info->auth_key_len > 0) { + sym_info->auth_key = g_malloc(sym_info->auth_key_len); + s = iov_to_buf(iov, out_num, 0, sym_info->auth_key, + sym_info->auth_key_len); + if (unlikely(s != sym_info->auth_key_len)) { virtio_error(vdev, "virtio-crypto authenticated key incorrect"); ret = -EFAULT; goto err; } - iov_discard_front(&iov, &out_num, info.auth_key_len); + iov_discard_front(&iov, &out_num, sym_info->auth_key_len); } - } else if (info.hash_mode == VIRTIO_CRYPTO_SYM_HASH_MODE_PLAIN) { - info.hash_alg = ldl_le_p( + } else if (sym_info->hash_mode == VIRTIO_CRYPTO_SYM_HASH_MODE_PLAIN) { + sym_info->hash_alg = ldl_le_p( &sess_req->u.chain.para.u.hash_param.algo); - info.hash_result_len = ldl_le_p( + sym_info->hash_result_len = ldl_le_p( &sess_req->u.chain.para.u.hash_param.hash_result_len); } else { /* VIRTIO_CRYPTO_SYM_HASH_MODE_NESTED */ @@ -161,13 +164,10 @@ virtio_crypto_create_sym_session(VirtIOCrypto *vcrypto, } queue_index = virtio_crypto_vq2q(queue_id); - session_id = cryptodev_backend_sym_create_session( + session_id = cryptodev_backend_create_session( vcrypto->cryptodev, &info, queue_index, &local_err); if (session_id >= 0) { - DPRINTF("create session_id=%" PRIu64 " successfully\n", - session_id); - ret = session_id; } else { if (local_err) { @@ -177,11 +177,78 @@ virtio_crypto_create_sym_session(VirtIOCrypto *vcrypto, } err: - g_free(info.cipher_key); - g_free(info.auth_key); + g_free(sym_info->cipher_key); + g_free(sym_info->auth_key); return ret; } +static int64_t +virtio_crypto_create_asym_session(VirtIOCrypto *vcrypto, + struct virtio_crypto_akcipher_create_session_req *sess_req, + uint32_t queue_id, uint32_t opcode, + struct iovec *iov, unsigned int out_num) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto); + CryptoDevBackendSessionInfo info = {0}; + CryptoDevBackendAsymSessionInfo *asym_info; + int64_t session_id; + int queue_index; + uint32_t algo, keytype, keylen; + g_autofree uint8_t *key = NULL; + Error *local_err = NULL; + + algo = ldl_le_p(&sess_req->para.algo); + keytype = ldl_le_p(&sess_req->para.keytype); + keylen = ldl_le_p(&sess_req->para.keylen); + + if ((keytype != VIRTIO_CRYPTO_AKCIPHER_KEY_TYPE_PUBLIC) + && (keytype != VIRTIO_CRYPTO_AKCIPHER_KEY_TYPE_PRIVATE)) { + error_report("unsupported asym keytype: %d", keytype); + return -VIRTIO_CRYPTO_NOTSUPP; + } + + if (keylen) { + key = g_malloc(keylen); + if (iov_to_buf(iov, out_num, 0, key, keylen) != keylen) { + virtio_error(vdev, "virtio-crypto asym key incorrect"); + return -EFAULT; + } + iov_discard_front(&iov, &out_num, keylen); + } + + info.op_code = opcode; + asym_info = &info.u.asym_sess_info; + asym_info->algo = algo; + asym_info->keytype = keytype; + asym_info->keylen = keylen; + asym_info->key = key; + switch (asym_info->algo) { + case VIRTIO_CRYPTO_AKCIPHER_RSA: + asym_info->u.rsa.padding_algo = + ldl_le_p(&sess_req->para.u.rsa.padding_algo); + asym_info->u.rsa.hash_algo = + ldl_le_p(&sess_req->para.u.rsa.hash_algo); + break; + + /* TODO DSA&ECDSA handling */ + + default: + return -VIRTIO_CRYPTO_ERR; + } + + queue_index = virtio_crypto_vq2q(queue_id); + session_id = cryptodev_backend_create_session(vcrypto->cryptodev, &info, + queue_index, &local_err); + if (session_id < 0) { + if (local_err) { + error_report_err(local_err); + } + return -VIRTIO_CRYPTO_ERR; + } + + return session_id; +} + static uint8_t virtio_crypto_handle_close_session(VirtIOCrypto *vcrypto, struct virtio_crypto_destroy_session_req *close_sess_req, @@ -195,7 +262,7 @@ virtio_crypto_handle_close_session(VirtIOCrypto *vcrypto, session_id = ldq_le_p(&close_sess_req->session_id); DPRINTF("close session, id=%" PRIu64 "\n", session_id); - ret = cryptodev_backend_sym_close_session( + ret = cryptodev_backend_close_session( vcrypto->cryptodev, session_id, queue_id, &local_err); if (ret == 0) { status = VIRTIO_CRYPTO_OK; @@ -260,13 +327,22 @@ static void virtio_crypto_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) opcode = ldl_le_p(&ctrl.header.opcode); queue_id = ldl_le_p(&ctrl.header.queue_id); + memset(&input, 0, sizeof(input)); switch (opcode) { case VIRTIO_CRYPTO_CIPHER_CREATE_SESSION: - memset(&input, 0, sizeof(input)); session_id = virtio_crypto_create_sym_session(vcrypto, &ctrl.u.sym_create_session, queue_id, opcode, out_iov, out_num); + goto check_session; + + case VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION: + session_id = virtio_crypto_create_asym_session(vcrypto, + &ctrl.u.akcipher_create_session, + queue_id, opcode, + out_iov, out_num); + +check_session: /* Serious errors, need to reset virtio crypto device */ if (session_id == -EFAULT) { virtqueue_detach_element(vq, elem, 0); @@ -290,10 +366,12 @@ static void virtio_crypto_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) virtqueue_push(vq, elem, sizeof(input)); virtio_notify(vdev, vq); break; + case VIRTIO_CRYPTO_CIPHER_DESTROY_SESSION: case VIRTIO_CRYPTO_HASH_DESTROY_SESSION: case VIRTIO_CRYPTO_MAC_DESTROY_SESSION: case VIRTIO_CRYPTO_AEAD_DESTROY_SESSION: + case VIRTIO_CRYPTO_AKCIPHER_DESTROY_SESSION: status = virtio_crypto_handle_close_session(vcrypto, &ctrl.u.destroy_session, queue_id); /* The status only occupy one byte, we can directly use it */ @@ -311,7 +389,6 @@ static void virtio_crypto_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) case VIRTIO_CRYPTO_AEAD_CREATE_SESSION: default: error_report("virtio-crypto unsupported ctrl opcode: %d", opcode); - memset(&input, 0, sizeof(input)); stl_le_p(&input.status, VIRTIO_CRYPTO_NOTSUPP); s = iov_from_buf(in_iov, in_num, 0, &input, sizeof(input)); if (unlikely(s != sizeof(input))) { @@ -339,28 +416,39 @@ static void virtio_crypto_init_request(VirtIOCrypto *vcrypto, VirtQueue *vq, req->in_num = 0; req->in_len = 0; req->flags = CRYPTODEV_BACKEND_ALG__MAX; - req->u.sym_op_info = NULL; + memset(&req->op_info, 0x00, sizeof(req->op_info)); } static void virtio_crypto_free_request(VirtIOCryptoReq *req) { - if (req) { - if (req->flags == CRYPTODEV_BACKEND_ALG_SYM) { - size_t max_len; - CryptoDevBackendSymOpInfo *op_info = req->u.sym_op_info; - - max_len = op_info->iv_len + - op_info->aad_len + - op_info->src_len + - op_info->dst_len + - op_info->digest_result_len; - - /* Zeroize and free request data structure */ - memset(op_info, 0, sizeof(*op_info) + max_len); + if (!req) { + return; + } + + if (req->flags == CRYPTODEV_BACKEND_ALG_SYM) { + size_t max_len; + CryptoDevBackendSymOpInfo *op_info = req->op_info.u.sym_op_info; + + max_len = op_info->iv_len + + op_info->aad_len + + op_info->src_len + + op_info->dst_len + + op_info->digest_result_len; + + /* Zeroize and free request data structure */ + memset(op_info, 0, sizeof(*op_info) + max_len); + g_free(op_info); + } else if (req->flags == CRYPTODEV_BACKEND_ALG_ASYM) { + CryptoDevBackendAsymOpInfo *op_info = req->op_info.u.asym_op_info; + if (op_info) { + g_free(op_info->src); + g_free(op_info->dst); + memset(op_info, 0, sizeof(*op_info)); g_free(op_info); } - g_free(req); } + + g_free(req); } static void @@ -397,6 +485,35 @@ virtio_crypto_sym_input_data_helper(VirtIODevice *vdev, } } +static void +virtio_crypto_akcipher_input_data_helper(VirtIODevice *vdev, + VirtIOCryptoReq *req, int32_t status, + CryptoDevBackendAsymOpInfo *asym_op_info) +{ + size_t s, len; + + if (status != VIRTIO_CRYPTO_OK) { + return; + } + + len = asym_op_info->dst_len; + if (!len) { + return; + } + + s = iov_from_buf(req->in_iov, req->in_num, 0, asym_op_info->dst, len); + if (s != len) { + virtio_error(vdev, "virtio-crypto asym dest data incorrect"); + return; + } + + iov_discard_front(&req->in_iov, &req->in_num, len); + + /* For akcipher, dst_len may be changed after operation */ + req->in_len = sizeof(struct virtio_crypto_inhdr) + asym_op_info->dst_len; +} + + static void virtio_crypto_req_complete(VirtIOCryptoReq *req, uint8_t status) { VirtIOCrypto *vcrypto = req->vcrypto; @@ -404,7 +521,10 @@ static void virtio_crypto_req_complete(VirtIOCryptoReq *req, uint8_t status) if (req->flags == CRYPTODEV_BACKEND_ALG_SYM) { virtio_crypto_sym_input_data_helper(vdev, req, status, - req->u.sym_op_info); + req->op_info.u.sym_op_info); + } else if (req->flags == CRYPTODEV_BACKEND_ALG_ASYM) { + virtio_crypto_akcipher_input_data_helper(vdev, req, status, + req->op_info.u.asym_op_info); } stb_p(&req->in->status, status); virtqueue_push(req->vq, &req->elem, req->in_len); @@ -543,42 +663,101 @@ err: static int virtio_crypto_handle_sym_req(VirtIOCrypto *vcrypto, struct virtio_crypto_sym_data_req *req, - CryptoDevBackendSymOpInfo **sym_op_info, + CryptoDevBackendOpInfo *op_info, struct iovec *iov, unsigned int out_num) { VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto); + CryptoDevBackendSymOpInfo *sym_op_info; uint32_t op_type; - CryptoDevBackendSymOpInfo *op_info; op_type = ldl_le_p(&req->op_type); - if (op_type == VIRTIO_CRYPTO_SYM_OP_CIPHER) { - op_info = virtio_crypto_sym_op_helper(vdev, &req->u.cipher.para, + sym_op_info = virtio_crypto_sym_op_helper(vdev, &req->u.cipher.para, NULL, iov, out_num); - if (!op_info) { + if (!sym_op_info) { return -EFAULT; } - op_info->op_type = op_type; } else if (op_type == VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING) { - op_info = virtio_crypto_sym_op_helper(vdev, NULL, + sym_op_info = virtio_crypto_sym_op_helper(vdev, NULL, &req->u.chain.para, iov, out_num); - if (!op_info) { + if (!sym_op_info) { return -EFAULT; } - op_info->op_type = op_type; } else { /* VIRTIO_CRYPTO_SYM_OP_NONE */ error_report("virtio-crypto unsupported cipher type"); return -VIRTIO_CRYPTO_NOTSUPP; } - *sym_op_info = op_info; + sym_op_info->op_type = op_type; + op_info->u.sym_op_info = sym_op_info; return 0; } static int +virtio_crypto_handle_asym_req(VirtIOCrypto *vcrypto, + struct virtio_crypto_akcipher_data_req *req, + CryptoDevBackendOpInfo *op_info, + struct iovec *iov, unsigned int out_num) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto); + CryptoDevBackendAsymOpInfo *asym_op_info; + uint32_t src_len; + uint32_t dst_len; + uint32_t len; + uint8_t *src = NULL; + uint8_t *dst = NULL; + + asym_op_info = g_malloc0(sizeof(CryptoDevBackendAsymOpInfo)); + src_len = ldl_le_p(&req->para.src_data_len); + dst_len = ldl_le_p(&req->para.dst_data_len); + + if (src_len > 0) { + src = g_malloc0(src_len); + len = iov_to_buf(iov, out_num, 0, src, src_len); + if (unlikely(len != src_len)) { + virtio_error(vdev, "virtio-crypto asym src data incorrect" + "expected %u, actual %u", src_len, len); + goto err; + } + + iov_discard_front(&iov, &out_num, src_len); + } + + if (dst_len > 0) { + dst = g_malloc0(dst_len); + + if (op_info->op_code == VIRTIO_CRYPTO_AKCIPHER_VERIFY) { + len = iov_to_buf(iov, out_num, 0, dst, dst_len); + if (unlikely(len != dst_len)) { + virtio_error(vdev, "virtio-crypto asym dst data incorrect" + "expected %u, actual %u", dst_len, len); + goto err; + } + + iov_discard_front(&iov, &out_num, dst_len); + } + } + + asym_op_info->src_len = src_len; + asym_op_info->dst_len = dst_len; + asym_op_info->src = src; + asym_op_info->dst = dst; + op_info->u.asym_op_info = asym_op_info; + + return 0; + + err: + g_free(asym_op_info); + g_free(src); + g_free(dst); + + return -EFAULT; +} + +static int virtio_crypto_handle_request(VirtIOCryptoReq *request) { VirtIOCrypto *vcrypto = request->vcrypto; @@ -595,8 +774,7 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request) unsigned out_num; uint32_t opcode; uint8_t status = VIRTIO_CRYPTO_ERR; - uint64_t session_id; - CryptoDevBackendSymOpInfo *sym_op_info = NULL; + CryptoDevBackendOpInfo *op_info = &request->op_info; Error *local_err = NULL; if (elem->out_num < 1 || elem->in_num < 1) { @@ -639,15 +817,28 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request) request->in_iov = in_iov; opcode = ldl_le_p(&req.header.opcode); - session_id = ldq_le_p(&req.header.session_id); + op_info->session_id = ldq_le_p(&req.header.session_id); + op_info->op_code = opcode; switch (opcode) { case VIRTIO_CRYPTO_CIPHER_ENCRYPT: case VIRTIO_CRYPTO_CIPHER_DECRYPT: + op_info->algtype = request->flags = CRYPTODEV_BACKEND_ALG_SYM; ret = virtio_crypto_handle_sym_req(vcrypto, - &req.u.sym_req, - &sym_op_info, + &req.u.sym_req, op_info, + out_iov, out_num); + goto check_result; + + case VIRTIO_CRYPTO_AKCIPHER_ENCRYPT: + case VIRTIO_CRYPTO_AKCIPHER_DECRYPT: + case VIRTIO_CRYPTO_AKCIPHER_SIGN: + case VIRTIO_CRYPTO_AKCIPHER_VERIFY: + op_info->algtype = request->flags = CRYPTODEV_BACKEND_ALG_ASYM; + ret = virtio_crypto_handle_asym_req(vcrypto, + &req.u.akcipher_req, op_info, out_iov, out_num); + +check_result: /* Serious errors, need to reset virtio crypto device */ if (ret == -EFAULT) { return -1; @@ -655,11 +846,8 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request) virtio_crypto_req_complete(request, VIRTIO_CRYPTO_NOTSUPP); virtio_crypto_free_request(request); } else { - sym_op_info->session_id = session_id; /* Set request's parameter */ - request->flags = CRYPTODEV_BACKEND_ALG_SYM; - request->u.sym_op_info = sym_op_info; ret = cryptodev_backend_crypto_operation(vcrypto->cryptodev, request, queue_index, &local_err); if (ret < 0) { @@ -674,6 +862,7 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request) virtio_crypto_free_request(request); } break; + case VIRTIO_CRYPTO_HASH: case VIRTIO_CRYPTO_MAC: case VIRTIO_CRYPTO_AEAD_ENCRYPT: @@ -779,6 +968,7 @@ static void virtio_crypto_init_config(VirtIODevice *vdev) vcrypto->conf.mac_algo_l = vcrypto->conf.cryptodev->conf.mac_algo_l; vcrypto->conf.mac_algo_h = vcrypto->conf.cryptodev->conf.mac_algo_h; vcrypto->conf.aead_algo = vcrypto->conf.cryptodev->conf.aead_algo; + vcrypto->conf.akcipher_algo = vcrypto->conf.cryptodev->conf.akcipher_algo; vcrypto->conf.max_cipher_key_len = vcrypto->conf.cryptodev->conf.max_cipher_key_len; vcrypto->conf.max_auth_key_len = @@ -891,6 +1081,7 @@ static void virtio_crypto_get_config(VirtIODevice *vdev, uint8_t *config) stl_le_p(&crypto_cfg.max_cipher_key_len, c->conf.max_cipher_key_len); stl_le_p(&crypto_cfg.max_auth_key_len, c->conf.max_auth_key_len); stq_le_p(&crypto_cfg.max_size, c->conf.max_size); + stl_le_p(&crypto_cfg.akcipher_algo, c->conf.akcipher_algo); memcpy(config, &crypto_cfg, c->config_size); } diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index 2597e166f9..7c122ab957 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -69,6 +69,77 @@ static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev) return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn); } +static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev) +{ + uint32_t sid; + bool bypassed; + VirtIOIOMMU *s = sdev->viommu; + VirtIOIOMMUEndpoint *ep; + + sid = virtio_iommu_get_bdf(sdev); + + qemu_rec_mutex_lock(&s->mutex); + /* need to check bypass before system reset */ + if (!s->endpoints) { + bypassed = s->config.bypass; + goto unlock; + } + + ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); + if (!ep || !ep->domain) { + bypassed = s->config.bypass; + } else { + bypassed = ep->domain->bypass; + } + +unlock: + qemu_rec_mutex_unlock(&s->mutex); + return bypassed; +} + +/* Return whether the device is using IOMMU translation. */ +static bool virtio_iommu_switch_address_space(IOMMUDevice *sdev) +{ + bool use_remapping; + + assert(sdev); + + use_remapping = !virtio_iommu_device_bypassed(sdev); + + trace_virtio_iommu_switch_address_space(pci_bus_num(sdev->bus), + PCI_SLOT(sdev->devfn), + PCI_FUNC(sdev->devfn), + use_remapping); + + /* Turn off first then on the other */ + if (use_remapping) { + memory_region_set_enabled(&sdev->bypass_mr, false); + memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), true); + } else { + memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), false); + memory_region_set_enabled(&sdev->bypass_mr, true); + } + + return use_remapping; +} + +static void virtio_iommu_switch_address_space_all(VirtIOIOMMU *s) +{ + GHashTableIter iter; + IOMMUPciBus *iommu_pci_bus; + int i; + + g_hash_table_iter_init(&iter, s->as_by_busptr); + while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) { + for (i = 0; i < PCI_DEVFN_MAX; i++) { + if (!iommu_pci_bus->pbdev[i]) { + continue; + } + virtio_iommu_switch_address_space(iommu_pci_bus->pbdev[i]); + } + } +} + /** * The bus number is used for lookup when SID based operations occur. * In that case we lazily populate the IOMMUPciBus array from the bus hash @@ -213,6 +284,7 @@ static gboolean virtio_iommu_notify_map_cb(gpointer key, gpointer value, static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep) { VirtIOIOMMUDomain *domain = ep->domain; + IOMMUDevice *sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr); if (!ep->domain) { return; @@ -221,6 +293,7 @@ static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep) ep->iommu_mr); QLIST_REMOVE(ep, next); ep->domain = NULL; + virtio_iommu_switch_address_space(sdev); } static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s, @@ -323,12 +396,39 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, trace_virtio_iommu_init_iommu_mr(name); + memory_region_init(&sdev->root, OBJECT(s), name, UINT64_MAX); + address_space_init(&sdev->as, &sdev->root, TYPE_VIRTIO_IOMMU); + + /* + * Build the IOMMU disabled container with aliases to the + * shared MRs. Note that aliasing to a shared memory region + * could help the memory API to detect same FlatViews so we + * can have devices to share the same FlatView when in bypass + * mode. (either by not configuring virtio-iommu driver or with + * "iommu=pt"). It will greatly reduce the total number of + * FlatViews of the system hence VM runs faster. + */ + memory_region_init_alias(&sdev->bypass_mr, OBJECT(s), + "system", get_system_memory(), 0, + memory_region_size(get_system_memory())); + memory_region_init_iommu(&sdev->iommu_mr, sizeof(sdev->iommu_mr), TYPE_VIRTIO_IOMMU_MEMORY_REGION, OBJECT(s), name, UINT64_MAX); - address_space_init(&sdev->as, - MEMORY_REGION(&sdev->iommu_mr), TYPE_VIRTIO_IOMMU); + + /* + * Hook both the containers under the root container, we + * switch between iommu & bypass MRs by enable/disable + * corresponding sub-containers + */ + memory_region_add_subregion_overlap(&sdev->root, 0, + MEMORY_REGION(&sdev->iommu_mr), + 0); + memory_region_add_subregion_overlap(&sdev->root, 0, + &sdev->bypass_mr, 0); + + virtio_iommu_switch_address_space(sdev); g_free(name); } return &sdev->as; @@ -342,6 +442,7 @@ static int virtio_iommu_attach(VirtIOIOMMU *s, uint32_t flags = le32_to_cpu(req->flags); VirtIOIOMMUDomain *domain; VirtIOIOMMUEndpoint *ep; + IOMMUDevice *sdev; trace_virtio_iommu_attach(domain_id, ep_id); @@ -375,6 +476,8 @@ static int virtio_iommu_attach(VirtIOIOMMU *s, QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next); ep->domain = domain; + sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr); + virtio_iommu_switch_address_space(sdev); /* Replay domain mappings on the associated memory region */ g_tree_foreach(domain->mappings, virtio_iommu_notify_map_cb, @@ -642,7 +745,7 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) tail.status = VIRTIO_IOMMU_S_DEVERR; goto out; } - qemu_mutex_lock(&s->mutex); + qemu_rec_mutex_lock(&s->mutex); switch (head.type) { case VIRTIO_IOMMU_T_ATTACH: tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt); @@ -671,7 +774,7 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) default: tail.status = VIRTIO_IOMMU_S_UNSUPP; } - qemu_mutex_unlock(&s->mutex); + qemu_rec_mutex_unlock(&s->mutex); out: sz = iov_from_buf(elem->in_sg, elem->in_num, 0, @@ -759,9 +862,13 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, sid = virtio_iommu_get_bdf(sdev); trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag); - qemu_mutex_lock(&s->mutex); + qemu_rec_mutex_lock(&s->mutex); ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); + + if (bypass_allowed) + assert(ep && ep->domain && !ep->domain->bypass); + if (!ep) { if (!bypass_allowed) { error_report_once("%s sid=%d is not known!!", __func__, sid); @@ -843,7 +950,7 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, trace_virtio_iommu_translate_out(addr, entry.translated_addr, sid); unlock: - qemu_mutex_unlock(&s->mutex); + qemu_rec_mutex_unlock(&s->mutex); return entry; } @@ -887,6 +994,7 @@ static void virtio_iommu_set_config(VirtIODevice *vdev, return; } dev_config->bypass = in_config->bypass; + virtio_iommu_switch_address_space_all(dev); } trace_virtio_iommu_set_config(in_config->bypass); @@ -931,7 +1039,7 @@ static void virtio_iommu_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n) sid = virtio_iommu_get_bdf(sdev); - qemu_mutex_lock(&s->mutex); + qemu_rec_mutex_lock(&s->mutex); if (!s->endpoints) { goto unlock; @@ -945,7 +1053,7 @@ static void virtio_iommu_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n) g_tree_foreach(ep->domain->mappings, virtio_iommu_remap, mr); unlock: - qemu_mutex_unlock(&s->mutex); + qemu_rec_mutex_unlock(&s->mutex); } static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr, @@ -1026,6 +1134,8 @@ static void virtio_iommu_system_reset(void *opaque) * system reset */ s->config.bypass = s->boot_bypass; + virtio_iommu_switch_address_space_all(s); + } static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) @@ -1041,6 +1151,11 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) virtio_iommu_handle_command); s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL); + /* + * config.bypass is needed to get initial address space early, such as + * in vfio realize + */ + s->config.bypass = s->boot_bypass; s->config.page_size_mask = TARGET_PAGE_MASK; s->config.input_range.end = UINT64_MAX; s->config.domain_range.end = UINT32_MAX; @@ -1056,7 +1171,7 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) virtio_add_feature(&s->features, VIRTIO_IOMMU_F_PROBE); virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS_CONFIG); - qemu_mutex_init(&s->mutex); + qemu_rec_mutex_init(&s->mutex); s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free); @@ -1084,6 +1199,8 @@ static void virtio_iommu_device_unrealize(DeviceState *dev) g_tree_destroy(s->endpoints); } + qemu_rec_mutex_destroy(&s->mutex); + virtio_delete_queue(s->req_vq); virtio_delete_queue(s->event_vq); virtio_cleanup(vdev); diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c index 688eccda94..d240efef97 100644 --- a/hw/virtio/virtio-mmio.c +++ b/hw/virtio/virtio-mmio.c @@ -72,12 +72,12 @@ static void virtio_mmio_soft_reset(VirtIOMMIOProxy *proxy) { int i; - if (proxy->legacy) { - return; - } + virtio_bus_reset(&proxy->bus); - for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { - proxy->vqs[i].enabled = 0; + if (!proxy->legacy) { + for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { + proxy->vqs[i].enabled = 0; + } } } @@ -376,7 +376,7 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, return; } if (value == 0) { - virtio_reset(vdev); + virtio_mmio_soft_reset(proxy); } else { virtio_queue_set_addr(vdev, vdev->queue_sel, value << proxy->guest_page_shift); @@ -432,7 +432,6 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, } if (vdev->status == 0) { - virtio_reset(vdev); virtio_mmio_soft_reset(proxy); } break; @@ -627,8 +626,8 @@ static void virtio_mmio_reset(DeviceState *d) VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d); int i; - virtio_mmio_stop_ioeventfd(proxy); - virtio_bus_reset(&proxy->bus); + virtio_mmio_soft_reset(proxy); + proxy->host_features_sel = 0; proxy->guest_features_sel = 0; proxy->guest_page_shift = 0; @@ -637,7 +636,6 @@ static void virtio_mmio_reset(DeviceState *d) proxy->guest_features[0] = proxy->guest_features[1] = 0; for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { - proxy->vqs[i].enabled = 0; proxy->vqs[i].num = 0; proxy->vqs[i].desc[0] = proxy->vqs[i].desc[1] = 0; proxy->vqs[i].avail[0] = proxy->vqs[i].avail[1] = 0; diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 0566ad7d00..45327f0b31 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1945,7 +1945,6 @@ static void virtio_pci_reset(DeviceState *qdev) PCIDevice *dev = PCI_DEVICE(qdev); int i; - virtio_pci_stop_ioeventfd(proxy); virtio_bus_reset(bus); msix_unuse_all_vectors(&proxy->pci_dev); |