diff options
Diffstat (limited to '')
| -rw-r--r-- | hw/Kconfig | 1 | ||||
| -rw-r--r-- | hw/block/Kconfig | 5 | ||||
| -rw-r--r-- | hw/block/meson.build | 1 | ||||
| -rw-r--r-- | hw/block/nvme-dif.h | 63 | ||||
| -rw-r--r-- | hw/block/nvme-ns.h | 229 | ||||
| -rw-r--r-- | hw/block/nvme-subsys.h | 59 | ||||
| -rw-r--r-- | hw/block/nvme.h | 266 | ||||
| -rw-r--r-- | hw/block/trace-events | 206 | ||||
| -rw-r--r-- | hw/block/vhost-user-blk.c | 85 | ||||
| -rw-r--r-- | hw/meson.build | 1 | ||||
| -rw-r--r-- | hw/nvme/Kconfig | 4 | ||||
| -rw-r--r-- | hw/nvme/ctrl.c (renamed from hw/block/nvme.c) | 298 | ||||
| -rw-r--r-- | hw/nvme/dif.c (renamed from hw/block/nvme-dif.c) | 57 | ||||
| -rw-r--r-- | hw/nvme/meson.build | 1 | ||||
| -rw-r--r-- | hw/nvme/ns.c (renamed from hw/block/nvme-ns.c) | 106 | ||||
| -rw-r--r-- | hw/nvme/nvme.h | 547 | ||||
| -rw-r--r-- | hw/nvme/subsys.c (renamed from hw/block/nvme-subsys.c) | 12 | ||||
| -rw-r--r-- | hw/nvme/trace-events | 204 | ||||
| -rw-r--r-- | hw/nvme/trace.h | 1 | ||||
| -rw-r--r-- | hw/ppc/meson.build | 3 | ||||
| -rw-r--r-- | hw/ppc/pnv.c | 2 | ||||
| -rw-r--r-- | hw/ppc/spapr.c | 21 | ||||
| -rw-r--r-- | hw/ppc/spapr_caps.c | 59 | ||||
| -rw-r--r-- | hw/ppc/spapr_hcall.c | 634 | ||||
| -rw-r--r-- | hw/ppc/spapr_softmmu.c | 627 | ||||
| -rw-r--r-- | hw/virtio/vhost-user.c | 5 | ||||
| -rw-r--r-- | hw/virtio/virtio-bus.c | 5 |
27 files changed, 1766 insertions, 1736 deletions
diff --git a/hw/Kconfig b/hw/Kconfig index aa10357adf..805860f564 100644 --- a/hw/Kconfig +++ b/hw/Kconfig @@ -21,6 +21,7 @@ source mem/Kconfig source misc/Kconfig source net/Kconfig source nubus/Kconfig +source nvme/Kconfig source nvram/Kconfig source pci-bridge/Kconfig source pci-host/Kconfig diff --git a/hw/block/Kconfig b/hw/block/Kconfig index 4fcd152166..295441e64a 100644 --- a/hw/block/Kconfig +++ b/hw/block/Kconfig @@ -25,11 +25,6 @@ config ONENAND config TC58128 bool -config NVME_PCI - bool - default y if PCI_DEVICES - depends on PCI - config VIRTIO_BLK bool default y diff --git a/hw/block/meson.build b/hw/block/meson.build index 5b4a7699f9..8b0de54db1 100644 --- a/hw/block/meson.build +++ b/hw/block/meson.build @@ -13,7 +13,6 @@ softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80.c')) softmmu_ss.add(when: 'CONFIG_SWIM', if_true: files('swim.c')) softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c')) softmmu_ss.add(when: 'CONFIG_TC58128', if_true: files('tc58128.c')) -softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('nvme.c', 'nvme-ns.c', 'nvme-subsys.c', 'nvme-dif.c')) specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c')) specific_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk.c')) diff --git a/hw/block/nvme-dif.h b/hw/block/nvme-dif.h deleted file mode 100644 index 524faffbd7..0000000000 --- a/hw/block/nvme-dif.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * QEMU NVM Express End-to-End Data Protection support - * - * Copyright (c) 2021 Samsung Electronics Co., Ltd. - * - * Authors: - * Klaus Jensen <k.jensen@samsung.com> - * Gollu Appalanaidu <anaidu.gollu@samsung.com> - */ - -#ifndef HW_NVME_DIF_H -#define HW_NVME_DIF_H - -/* from Linux kernel (crypto/crct10dif_common.c) */ -static const uint16_t t10_dif_crc_table[256] = { - 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, - 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, - 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, - 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, - 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, - 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, - 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, - 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, - 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, - 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, - 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, - 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, - 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, - 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, - 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, - 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, - 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, - 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, - 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, - 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, - 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, - 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, - 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, - 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, - 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, - 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, - 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, - 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, - 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, - 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, - 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, - 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 -}; - -uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint16_t ctrl, uint64_t slba, - uint32_t reftag); -uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, - uint64_t slba); -void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, - uint8_t *mbuf, size_t mlen, uint16_t apptag, - uint32_t reftag); -uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, - uint8_t *mbuf, size_t mlen, uint16_t ctrl, - uint64_t slba, uint16_t apptag, - uint16_t appmask, uint32_t reftag); -uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req); - -#endif /* HW_NVME_DIF_H */ diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h deleted file mode 100644 index fb0a41f912..0000000000 --- a/hw/block/nvme-ns.h +++ /dev/null @@ -1,229 +0,0 @@ -/* - * QEMU NVM Express Virtual Namespace - * - * Copyright (c) 2019 CNEX Labs - * Copyright (c) 2020 Samsung Electronics - * - * Authors: - * Klaus Jensen <k.jensen@samsung.com> - * - * This work is licensed under the terms of the GNU GPL, version 2. See the - * COPYING file in the top-level directory. - * - */ - -#ifndef NVME_NS_H -#define NVME_NS_H - -#include "qemu/uuid.h" - -#define TYPE_NVME_NS "nvme-ns" -#define NVME_NS(obj) \ - OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS) - -typedef struct NvmeZone { - NvmeZoneDescr d; - uint64_t w_ptr; - QTAILQ_ENTRY(NvmeZone) entry; -} NvmeZone; - -typedef struct NvmeNamespaceParams { - bool detached; - bool shared; - uint32_t nsid; - QemuUUID uuid; - - uint16_t ms; - uint8_t mset; - uint8_t pi; - uint8_t pil; - - uint16_t mssrl; - uint32_t mcl; - uint8_t msrc; - - bool zoned; - bool cross_zone_read; - uint64_t zone_size_bs; - uint64_t zone_cap_bs; - uint32_t max_active_zones; - uint32_t max_open_zones; - uint32_t zd_extension_size; -} NvmeNamespaceParams; - -typedef struct NvmeNamespace { - DeviceState parent_obj; - BlockConf blkconf; - int32_t bootindex; - int64_t size; - int64_t mdata_offset; - NvmeIdNs id_ns; - const uint32_t *iocs; - uint8_t csi; - uint16_t status; - int attached; - - QTAILQ_ENTRY(NvmeNamespace) entry; - - NvmeIdNsZoned *id_ns_zoned; - NvmeZone *zone_array; - QTAILQ_HEAD(, NvmeZone) exp_open_zones; - QTAILQ_HEAD(, NvmeZone) imp_open_zones; - QTAILQ_HEAD(, NvmeZone) closed_zones; - QTAILQ_HEAD(, NvmeZone) full_zones; - uint32_t num_zones; - uint64_t zone_size; - uint64_t zone_capacity; - uint32_t zone_size_log2; - uint8_t *zd_extensions; - int32_t nr_open_zones; - int32_t nr_active_zones; - - NvmeNamespaceParams params; - - struct { - uint32_t err_rec; - } features; -} NvmeNamespace; - -static inline uint16_t nvme_ns_status(NvmeNamespace *ns) -{ - return ns->status; -} - -static inline uint32_t nvme_nsid(NvmeNamespace *ns) -{ - if (ns) { - return ns->params.nsid; - } - - return 0; -} - -static inline NvmeLBAF *nvme_ns_lbaf(NvmeNamespace *ns) -{ - NvmeIdNs *id_ns = &ns->id_ns; - return &id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)]; -} - -static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns) -{ - return nvme_ns_lbaf(ns)->ds; -} - -/* convert an LBA to the equivalent in bytes */ -static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba) -{ - return lba << nvme_ns_lbads(ns); -} - -static inline size_t nvme_lsize(NvmeNamespace *ns) -{ - return 1 << nvme_ns_lbads(ns); -} - -static inline uint16_t nvme_msize(NvmeNamespace *ns) -{ - return nvme_ns_lbaf(ns)->ms; -} - -static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba) -{ - return nvme_msize(ns) * lba; -} - -static inline bool nvme_ns_ext(NvmeNamespace *ns) -{ - return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas); -} - -/* calculate the number of LBAs that the namespace can accomodate */ -static inline uint64_t nvme_ns_nlbas(NvmeNamespace *ns) -{ - if (nvme_msize(ns)) { - return ns->size / (nvme_lsize(ns) + nvme_msize(ns)); - } - return ns->size >> nvme_ns_lbads(ns); -} - -typedef struct NvmeCtrl NvmeCtrl; - -static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone) -{ - return zone->d.zs >> 4; -} - -static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state) -{ - zone->d.zs = state << 4; -} - -static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone) -{ - return zone->d.zslba + ns->zone_size; -} - -static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone) -{ - return zone->d.zslba + zone->d.zcap; -} - -static inline bool nvme_wp_is_valid(NvmeZone *zone) -{ - uint8_t st = nvme_get_zone_state(zone); - - return st != NVME_ZONE_STATE_FULL && - st != NVME_ZONE_STATE_READ_ONLY && - st != NVME_ZONE_STATE_OFFLINE; -} - -static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns, - uint32_t zone_idx) -{ - return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size]; -} - -static inline void nvme_aor_inc_open(NvmeNamespace *ns) -{ - assert(ns->nr_open_zones >= 0); - if (ns->params.max_open_zones) { - ns->nr_open_zones++; - assert(ns->nr_open_zones <= ns->params.max_open_zones); - } -} - -static inline void nvme_aor_dec_open(NvmeNamespace *ns) -{ - if (ns->params.max_open_zones) { - assert(ns->nr_open_zones > 0); - ns->nr_open_zones--; - } - assert(ns->nr_open_zones >= 0); -} - -static inline void nvme_aor_inc_active(NvmeNamespace *ns) -{ - assert(ns->nr_active_zones >= 0); - if (ns->params.max_active_zones) { - ns->nr_active_zones++; - assert(ns->nr_active_zones <= ns->params.max_active_zones); - } -} - -static inline void nvme_aor_dec_active(NvmeNamespace *ns) -{ - if (ns->params.max_active_zones) { - assert(ns->nr_active_zones > 0); - ns->nr_active_zones--; - assert(ns->nr_active_zones >= ns->nr_open_zones); - } - assert(ns->nr_active_zones >= 0); -} - -void nvme_ns_init_format(NvmeNamespace *ns); -int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp); -void nvme_ns_drain(NvmeNamespace *ns); -void nvme_ns_shutdown(NvmeNamespace *ns); -void nvme_ns_cleanup(NvmeNamespace *ns); - -#endif /* NVME_NS_H */ diff --git a/hw/block/nvme-subsys.h b/hw/block/nvme-subsys.h deleted file mode 100644 index 7d7ef5f7f1..0000000000 --- a/hw/block/nvme-subsys.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * QEMU NVM Express Subsystem: nvme-subsys - * - * Copyright (c) 2021 Minwoo Im <minwoo.im.dev@gmail.com> - * - * This code is licensed under the GNU GPL v2. Refer COPYING. - */ - -#ifndef NVME_SUBSYS_H -#define NVME_SUBSYS_H - -#define TYPE_NVME_SUBSYS "nvme-subsys" -#define NVME_SUBSYS(obj) \ - OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS) - -#define NVME_SUBSYS_MAX_CTRLS 32 -#define NVME_MAX_NAMESPACES 256 - -typedef struct NvmeCtrl NvmeCtrl; -typedef struct NvmeNamespace NvmeNamespace; -typedef struct NvmeSubsystem { - DeviceState parent_obj; - uint8_t subnqn[256]; - - NvmeCtrl *ctrls[NVME_SUBSYS_MAX_CTRLS]; - /* Allocated namespaces for this subsystem */ - NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1]; - - struct { - char *nqn; - } params; -} NvmeSubsystem; - -int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp); - -static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys, - uint32_t cntlid) -{ - if (!subsys || cntlid >= NVME_SUBSYS_MAX_CTRLS) { - return NULL; - } - - return subsys->ctrls[cntlid]; -} - -/* - * Return allocated namespace of the specified nsid in the subsystem. - */ -static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys, - uint32_t nsid) -{ - if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) { - return NULL; - } - - return subsys->namespaces[nsid]; -} - -#endif /* NVME_SUBSYS_H */ diff --git a/hw/block/nvme.h b/hw/block/nvme.h deleted file mode 100644 index 5d05ec368f..0000000000 --- a/hw/block/nvme.h +++ /dev/null @@ -1,266 +0,0 @@ -#ifndef HW_NVME_H -#define HW_NVME_H - -#include "block/nvme.h" -#include "hw/pci/pci.h" -#include "nvme-subsys.h" -#include "nvme-ns.h" - -#define NVME_DEFAULT_ZONE_SIZE (128 * MiB) -#define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB) - -typedef struct NvmeParams { - char *serial; - uint32_t num_queues; /* deprecated since 5.1 */ - uint32_t max_ioqpairs; - uint16_t msix_qsize; - uint32_t cmb_size_mb; - uint8_t aerl; - uint32_t aer_max_queued; - uint8_t mdts; - uint8_t vsl; - bool use_intel_id; - uint8_t zasl; - bool legacy_cmb; -} NvmeParams; - -typedef struct NvmeAsyncEvent { - QTAILQ_ENTRY(NvmeAsyncEvent) entry; - NvmeAerResult result; -} NvmeAsyncEvent; - -enum { - NVME_SG_ALLOC = 1 << 0, - NVME_SG_DMA = 1 << 1, -}; - -typedef struct NvmeSg { - int flags; - - union { - QEMUSGList qsg; - QEMUIOVector iov; - }; -} NvmeSg; - -typedef struct NvmeRequest { - struct NvmeSQueue *sq; - struct NvmeNamespace *ns; - BlockAIOCB *aiocb; - uint16_t status; - void *opaque; - NvmeCqe cqe; - NvmeCmd cmd; - BlockAcctCookie acct; - NvmeSg sg; - QTAILQ_ENTRY(NvmeRequest)entry; -} NvmeRequest; - -typedef struct NvmeBounceContext { - NvmeRequest *req; - - struct { - QEMUIOVector iov; - uint8_t *bounce; - } data, mdata; -} NvmeBounceContext; - -static inline const char *nvme_adm_opc_str(uint8_t opc) -{ - switch (opc) { - case NVME_ADM_CMD_DELETE_SQ: return "NVME_ADM_CMD_DELETE_SQ"; - case NVME_ADM_CMD_CREATE_SQ: return "NVME_ADM_CMD_CREATE_SQ"; - case NVME_ADM_CMD_GET_LOG_PAGE: return "NVME_ADM_CMD_GET_LOG_PAGE"; - case NVME_ADM_CMD_DELETE_CQ: return "NVME_ADM_CMD_DELETE_CQ"; - case NVME_ADM_CMD_CREATE_CQ: return "NVME_ADM_CMD_CREATE_CQ"; - case NVME_ADM_CMD_IDENTIFY: return "NVME_ADM_CMD_IDENTIFY"; - case NVME_ADM_CMD_ABORT: return "NVME_ADM_CMD_ABORT"; - case NVME_ADM_CMD_SET_FEATURES: return "NVME_ADM_CMD_SET_FEATURES"; - case NVME_ADM_CMD_GET_FEATURES: return "NVME_ADM_CMD_GET_FEATURES"; - case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ"; - case NVME_ADM_CMD_NS_ATTACHMENT: return "NVME_ADM_CMD_NS_ATTACHMENT"; - case NVME_ADM_CMD_FORMAT_NVM: return "NVME_ADM_CMD_FORMAT_NVM"; - default: return "NVME_ADM_CMD_UNKNOWN"; - } -} - -static inline const char *nvme_io_opc_str(uint8_t opc) -{ - switch (opc) { - case NVME_CMD_FLUSH: return "NVME_NVM_CMD_FLUSH"; - case NVME_CMD_WRITE: return "NVME_NVM_CMD_WRITE"; - case NVME_CMD_READ: return "NVME_NVM_CMD_READ"; - case NVME_CMD_COMPARE: return "NVME_NVM_CMD_COMPARE"; - case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES"; - case NVME_CMD_DSM: return "NVME_NVM_CMD_DSM"; - case NVME_CMD_VERIFY: return "NVME_NVM_CMD_VERIFY"; - case NVME_CMD_COPY: return "NVME_NVM_CMD_COPY"; - case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_MGMT_SEND"; - case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_MGMT_RECV"; - case NVME_CMD_ZONE_APPEND: return "NVME_ZONED_CMD_ZONE_APPEND"; - default: return "NVME_NVM_CMD_UNKNOWN"; - } -} - -typedef struct NvmeSQueue { - struct NvmeCtrl *ctrl; - uint16_t sqid; - uint16_t cqid; - uint32_t head; - uint32_t tail; - uint32_t size; - uint64_t dma_addr; - QEMUTimer *timer; - NvmeRequest *io_req; - QTAILQ_HEAD(, NvmeRequest) req_list; - QTAILQ_HEAD(, NvmeRequest) out_req_list; - QTAILQ_ENTRY(NvmeSQueue) entry; -} NvmeSQueue; - -typedef struct NvmeCQueue { - struct NvmeCtrl *ctrl; - uint8_t phase; - uint16_t cqid; - uint16_t irq_enabled; - uint32_t head; - uint32_t tail; - uint32_t vector; - uint32_t size; - uint64_t dma_addr; - QEMUTimer *timer; - QTAILQ_HEAD(, NvmeSQueue) sq_list; - QTAILQ_HEAD(, NvmeRequest) req_list; -} NvmeCQueue; - -#define TYPE_NVME_BUS "nvme-bus" -#define NVME_BUS(obj) OBJECT_CHECK(NvmeBus, (obj), TYPE_NVME_BUS) - -typedef struct NvmeBus { - BusState parent_bus; -} NvmeBus; - -#define TYPE_NVME "nvme" -#define NVME(obj) \ - OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME) - -typedef struct NvmeFeatureVal { - struct { - uint16_t temp_thresh_hi; - uint16_t temp_thresh_low; - }; - uint32_t async_config; -} NvmeFeatureVal; - -typedef struct NvmeCtrl { - PCIDevice parent_obj; - MemoryRegion bar0; - MemoryRegion iomem; - NvmeBar bar; - NvmeParams params; - NvmeBus bus; - - uint16_t cntlid; - bool qs_created; - uint32_t page_size; - uint16_t page_bits; - uint16_t max_prp_ents; - uint16_t cqe_size; - uint16_t sqe_size; - uint32_t reg_size; - uint32_t num_namespaces; - uint32_t max_q_ents; - uint8_t outstanding_aers; - uint32_t irq_status; - uint64_t host_timestamp; /* Timestamp sent by the host */ - uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ - uint64_t starttime_ms; - uint16_t temperature; - uint8_t smart_critical_warning; - - struct { - MemoryRegion mem; - uint8_t *buf; - bool cmse; - hwaddr cba; - } cmb; - - struct { - HostMemoryBackend *dev; - bool cmse; - hwaddr cba; - } pmr; - - uint8_t aer_mask; - NvmeRequest **aer_reqs; - QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue; - int aer_queued; - - uint32_t dmrsl; - - /* Namespace ID is started with 1 so bitmap should be 1-based */ -#define NVME_CHANGED_NSID_SIZE (NVME_MAX_NAMESPACES + 1) - DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE); - - NvmeSubsystem *subsys; - - NvmeNamespace namespace; - /* - * Attached namespaces to this controller. If subsys is not given, all - * namespaces in this list will always be attached. - */ - NvmeNamespace *namespaces[NVME_MAX_NAMESPACES]; - NvmeSQueue **sq; - NvmeCQueue **cq; - NvmeSQueue admin_sq; - NvmeCQueue admin_cq; - NvmeIdCtrl id_ctrl; - NvmeFeatureVal features; -} NvmeCtrl; - -static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid) -{ - if (!nsid || nsid > n->num_namespaces) { - return NULL; - } - - return n->namespaces[nsid - 1]; -} - -static inline NvmeCQueue *nvme_cq(NvmeRequest *req) -{ - NvmeSQueue *sq = req->sq; - NvmeCtrl *n = sq->ctrl; - - return n->cq[sq->cqid]; -} - -static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req) -{ - NvmeSQueue *sq = req->sq; - return sq->ctrl; -} - -static inline uint16_t nvme_cid(NvmeRequest *req) -{ - if (!req) { - return 0xffff; - } - - return le16_to_cpu(req->cqe.cid); -} - -typedef enum NvmeTxDirection { - NVME_TX_DIRECTION_TO_DEVICE = 0, - NVME_TX_DIRECTION_FROM_DEVICE = 1, -} NvmeTxDirection; - -void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns); -uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len, - NvmeTxDirection dir, NvmeRequest *req); -uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len, - NvmeTxDirection dir, NvmeRequest *req); -void nvme_rw_complete_cb(void *opaque, int ret); -uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len, - NvmeCmd *cmd); - -#endif /* HW_NVME_H */ diff --git a/hw/block/trace-events b/hw/block/trace-events index fa12e3a67a..646917d045 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -49,212 +49,6 @@ virtio_blk_submit_multireq(void *vdev, void *mrb, int start, int num_reqs, uint6 hd_geometry_lchs_guess(void *blk, int cyls, int heads, int secs) "blk %p LCHS %d %d %d" hd_geometry_guess(void *blk, uint32_t cyls, uint32_t heads, uint32_t secs, int trans) "blk %p CHS %u %u %u trans %d" -# nvme.c -# nvme traces for successful events -pci_nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u" -pci_nvme_irq_pin(void) "pulsing IRQ pin" -pci_nvme_irq_masked(void) "IRQ is masked" -pci_nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64"" -pci_nvme_map_addr(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64"" -pci_nvme_map_addr_cmb(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64"" -pci_nvme_map_prp(uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2, int num_prps) "trans_len %"PRIu64" len %"PRIu32" prp1 0x%"PRIx64" prp2 0x%"PRIx64" num_prps %d" -pci_nvme_map_sgl(uint8_t typ, uint64_t len) "type 0x%"PRIx8" len %"PRIu64"" -pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'" -pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'" -pci_nvme_flush(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32"" -pci_nvme_format(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8"" -pci_nvme_format_ns(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8"" -pci_nvme_format_cb(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32"" -pci_nvme_read(uint16_t cid, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64"" -pci_nvme_write(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64"" -pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" -pci_nvme_misc_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" -pci_nvme_dif_rw(uint8_t pract, uint8_t prinfo) "pract 0x%"PRIx8" prinfo 0x%"PRIx8"" -pci_nvme_dif_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" -pci_nvme_dif_rw_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" -pci_nvme_dif_rw_mdata_out_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" -pci_nvme_dif_rw_check_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32"" -pci_nvme_dif_pract_generate_dif(size_t len, size_t lba_size, size_t chksum_len, uint16_t apptag, uint32_t reftag) "len %zu lba_size %zu chksum_len %zu apptag 0x%"PRIx16" reftag 0x%"PRIx32"" -pci_nvme_dif_check(uint8_t prinfo, uint16_t chksum_len) "prinfo 0x%"PRIx8" chksum_len %"PRIu16"" -pci_nvme_dif_prchk_disabled(uint16_t apptag, uint32_t reftag) "apptag 0x%"PRIx16" reftag 0x%"PRIx32"" -pci_nvme_dif_prchk_guard(uint16_t guard, uint16_t crc) "guard 0x%"PRIx16" crc 0x%"PRIx16"" -pci_nvme_dif_prchk_apptag(uint16_t apptag, uint16_t elbat, uint16_t elbatm) "apptag 0x%"PRIx16" elbat 0x%"PRIx16" elbatm 0x%"PRIx16"" -pci_nvme_dif_prchk_reftag(uint32_t reftag, uint32_t elbrt) "reftag 0x%"PRIx32" elbrt 0x%"PRIx32"" -pci_nvme_copy(uint16_t cid, uint32_t nsid, uint16_t nr, uint8_t format) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu16" format 0x%"PRIx8"" -pci_nvme_copy_source_range(uint64_t slba, uint32_t nlb) "slba 0x%"PRIx64" nlb %"PRIu32"" -pci_nvme_copy_in_complete(uint16_t cid) "cid %"PRIu16"" -pci_nvme_copy_cb(uint16_t cid) "cid %"PRIu16"" -pci_nvme_verify(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32"" -pci_nvme_verify_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" -pci_nvme_verify_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32"" -pci_nvme_rw_complete_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" -pci_nvme_block_status(int64_t offset, int64_t bytes, int64_t pnum, int ret, bool zeroed) "offset %"PRId64" bytes %"PRId64" pnum %"PRId64" ret 0x%x zeroed %d" -pci_nvme_dsm(uint16_t cid, uint32_t nsid, uint32_t nr, uint32_t attr) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu32" attr 0x%"PRIx32"" -pci_nvme_dsm_deallocate(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba %"PRIu64" nlb %"PRIu32"" -pci_nvme_dsm_single_range_limit_exceeded(uint32_t nlb, uint32_t dmrsl) "nlb %"PRIu32" dmrsl %"PRIu32"" -pci_nvme_compare(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32"" -pci_nvme_compare_data_cb(uint16_t cid) "cid %"PRIu16"" -pci_nvme_compare_mdata_cb(uint16_t cid) "cid %"PRIu16"" -pci_nvme_aio_discard_cb(uint16_t cid) "cid %"PRIu16"" -pci_nvme_aio_copy_in_cb(uint16_t cid) "cid %"PRIu16"" -pci_nvme_aio_zone_reset_cb(uint16_t cid, uint64_t zslba) "cid %"PRIu16" zslba 0x%"PRIx64"" -pci_nvme_aio_flush_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" -pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16"" -pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d" -pci_nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16"" -pci_nvme_del_cq(uint16_t cqid) "deleted completion queue, cqid=%"PRIu16"" -pci_nvme_identify(uint16_t cid, uint8_t cns, uint16_t ctrlid, uint8_t csi) "cid %"PRIu16" cns 0x%"PRIx8" ctrlid %"PRIu16" csi 0x%"PRIx8"" -pci_nvme_identify_ctrl(void) "identify controller" -pci_nvme_identify_ctrl_csi(uint8_t csi) "identify controller, csi=0x%"PRIx8"" -pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32"" -pci_nvme_identify_ns_attached_list(uint16_t cntid) "cntid=%"PRIu16"" -pci_nvme_identify_ns_csi(uint32_t ns, uint8_t csi) "nsid=%"PRIu32", csi=0x%"PRIx8"" -pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32"" -pci_nvme_identify_nslist_csi(uint16_t ns, uint8_t csi) "nsid=%"PRIu16", csi=0x%"PRIx8"" -pci_nvme_identify_cmd_set(void) "identify i/o command set" -pci_nvme_identify_ns_descr_list(uint32_t ns) "nsid %"PRIu32"" -pci_nvme_get_log(uint16_t cid, uint8_t lid, uint8_t lsp, uint8_t rae, uint32_t len, uint64_t off) "cid %"PRIu16" lid 0x%"PRIx8" lsp 0x%"PRIx8" rae 0x%"PRIx8" len %"PRIu32" off %"PRIu64"" -pci_nvme_getfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t sel, uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" sel 0x%"PRIx8" cdw11 0x%"PRIx32"" -pci_nvme_setfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t save, uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" save 0x%"PRIx8" cdw11 0x%"PRIx32"" -pci_nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s" -pci_nvme_getfeat_numq(int result) "get feature number of queues, result=%d" -pci_nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d" -pci_nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64"" -pci_nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64"" -pci_nvme_process_aers(int queued) "queued %d" -pci_nvme_aer(uint16_t cid) "cid %"PRIu16"" -pci_nvme_aer_aerl_exceeded(void) "aerl exceeded" -pci_nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 0x%"PRIx8"" -pci_nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8"" -pci_nvme_ns_attachment(uint16_t cid, uint8_t sel) "cid %"PRIu16", sel=0x%"PRIx8"" -pci_nvme_ns_attachment_attach(uint16_t cntlid, uint32_t nsid) "cntlid=0x%"PRIx16", nsid=0x%"PRIx32"" -pci_nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8"" -pci_nvme_enqueue_event_noqueue(int queued) "queued %d" -pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8"" -pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs" -pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" status 0x%"PRIx16"" -pci_nvme_mmio_read(uint64_t addr, unsigned size) "addr 0x%"PRIx64" size %d" -pci_nvme_mmio_write(uint64_t addr, uint64_t data, unsigned size) "addr 0x%"PRIx64" data 0x%"PRIx64" size %d" -pci_nvme_mmio_doorbell_cq(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16" new_head %"PRIu16"" -pci_nvme_mmio_doorbell_sq(uint16_t sqid, uint16_t new_tail) "sqid %"PRIu16" new_tail %"PRIu16"" -pci_nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64"" -pci_nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64"" -pci_nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64"" -pci_nvme_mmio_aqattr(uint64_t data) "wrote MMIO, admin queue attributes=0x%"PRIx64"" -pci_nvme_mmio_asqaddr(uint64_t data) "wrote MMIO, admin submission queue address=0x%"PRIx64"" -pci_nvme_mmio_acqaddr(uint64_t data) "wrote MMIO, admin completion queue address=0x%"PRIx64"" -pci_nvme_mmio_asqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin submission queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" -pci_nvme_mmio_acqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin completion queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" -pci_nvme_mmio_start_success(void) "setting controller enable bit succeeded" -pci_nvme_mmio_stopped(void) "cleared controller enable bit" -pci_nvme_mmio_shutdown_set(void) "shutdown bit set" -pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared" -pci_nvme_open_zone(uint64_t slba, uint32_t zone_idx, int all) "open zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" -pci_nvme_close_zone(uint64_t slba, uint32_t zone_idx, int all) "close zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" -pci_nvme_finish_zone(uint64_t slba, uint32_t zone_idx, int all) "finish zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" -pci_nvme_reset_zone(uint64_t slba, uint32_t zone_idx, int all) "reset zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" -pci_nvme_offline_zone(uint64_t slba, uint32_t zone_idx, int all) "offline zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" -pci_nvme_set_descriptor_extension(uint64_t slba, uint32_t zone_idx) "set zone descriptor extension, slba=%"PRIu64", idx=%"PRIu32"" -pci_nvme_zd_extension_set(uint32_t zone_idx) "set descriptor extension for zone_idx=%"PRIu32"" -pci_nvme_clear_ns_close(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Closed state" -pci_nvme_clear_ns_reset(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Empty state" - -# nvme traces for error conditions -pci_nvme_err_mdts(size_t len) "len %zu" -pci_nvme_err_zasl(size_t len) "len %zu" -pci_nvme_err_req_status(uint16_t cid, uint32_t nsid, uint16_t status, uint8_t opc) "cid %"PRIu16" nsid %"PRIu32" status 0x%"PRIx16" opc 0x%"PRIx8"" -pci_nvme_err_addr_read(uint64_t addr) "addr 0x%"PRIx64"" -pci_nvme_err_addr_write(uint64_t addr) "addr 0x%"PRIx64"" -pci_nvme_err_cfs(void) "controller fatal status" -pci_nvme_err_aio(uint16_t cid, const char *errname, uint16_t status) "cid %"PRIu16" err '%s' status 0x%"PRIx16"" -pci_nvme_err_copy_invalid_format(uint8_t format) "format 0x%"PRIx8"" -pci_nvme_err_invalid_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 0x%"PRIx8"" -pci_nvme_err_invalid_num_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 0x%"PRIx8"" -pci_nvme_err_invalid_sgl_excess_length(uint32_t residual) "residual %"PRIu32"" -pci_nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size" -pci_nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is not page aligned: 0x%"PRIx64"" -pci_nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64"" -pci_nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8"" -pci_nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8"" -pci_nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64"" -pci_nvme_err_invalid_log_page_offset(uint64_t ofs, uint64_t size) "must be <= %"PRIu64", got %"PRIu64"" -pci_nvme_err_cmb_invalid_cba(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64"" -pci_nvme_err_cmb_not_enabled(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64"" -pci_nvme_err_unaligned_zone_cmd(uint8_t action, uint64_t slba, uint64_t zslba) "unaligned zone op 0x%"PRIx32", got slba=%"PRIu64", zslba=%"PRIu64"" -pci_nvme_err_invalid_zone_state_transition(uint8_t action, uint64_t slba, uint8_t attrs) "action=0x%"PRIx8", slba=%"PRIu64", attrs=0x%"PRIx32"" -pci_nvme_err_write_not_at_wp(uint64_t slba, uint64_t zone, uint64_t wp) "writing at slba=%"PRIu64", zone=%"PRIu64", but wp=%"PRIu64"" -pci_nvme_err_append_not_at_start(uint64_t slba, uint64_t zone) "appending at slba=%"PRIu64", but zone=%"PRIu64"" -pci_nvme_err_zone_is_full(uint64_t zslba) "zslba 0x%"PRIx64"" -pci_nvme_err_zone_is_read_only(uint64_t zslba) "zslba 0x%"PRIx64"" -pci_nvme_err_zone_is_offline(uint64_t zslba) "zslba 0x%"PRIx64"" -pci_nvme_err_zone_boundary(uint64_t slba, uint32_t nlb, uint64_t zcap) "lba 0x%"PRIx64" nlb %"PRIu32" zcap 0x%"PRIx64"" -pci_nvme_err_zone_invalid_write(uint64_t slba, uint64_t wp) "lba 0x%"PRIx64" wp 0x%"PRIx64"" -pci_nvme_err_zone_write_not_ok(uint64_t slba, uint32_t nlb, uint16_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16"" -pci_nvme_err_zone_read_not_ok(uint64_t slba, uint32_t nlb, uint16_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16"" -pci_nvme_err_insuff_active_res(uint32_t max_active) "max_active=%"PRIu32" zone limit exceeded" -pci_nvme_err_insuff_open_res(uint32_t max_open) "max_open=%"PRIu32" zone limit exceeded" -pci_nvme_err_zd_extension_map_error(uint32_t zone_idx) "can't map descriptor extension for zone_idx=%"PRIu32"" -pci_nvme_err_invalid_iocsci(uint32_t idx) "unsupported command set combination index %"PRIu32"" -pci_nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16"" -pci_nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16"" -pci_nvme_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating submission queue, invalid sqid=%"PRIu16"" -pci_nvme_err_invalid_create_sq_size(uint16_t qsize) "failed creating submission queue, invalid qsize=%"PRIu16"" -pci_nvme_err_invalid_create_sq_addr(uint64_t addr) "failed creating submission queue, addr=0x%"PRIx64"" -pci_nvme_err_invalid_create_sq_qflags(uint16_t qflags) "failed creating submission queue, qflags=%"PRIu16"" -pci_nvme_err_invalid_del_cq_cqid(uint16_t cqid) "failed deleting completion queue, cqid=%"PRIu16"" -pci_nvme_err_invalid_del_cq_notempty(uint16_t cqid) "failed deleting completion queue, it is not empty, cqid=%"PRIu16"" -pci_nvme_err_invalid_create_cq_cqid(uint16_t cqid) "failed creating completion queue, cqid=%"PRIu16"" -pci_nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion queue, size=%"PRIu16"" -pci_nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64"" -pci_nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16"" -pci_nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16"" -pci_nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16"" -pci_nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32"" -pci_nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32"" -pci_nvme_err_invalid_log_page(uint16_t cid, uint16_t lid) "cid %"PRIu16" lid 0x%"PRIx16"" -pci_nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues" -pci_nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues" -pci_nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null" -pci_nvme_err_startfail_nbaracq(void) "nvme_start_ctrl failed because the admin completion queue address is null" -pci_nvme_err_startfail_asq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin submission queue address is misaligned: 0x%"PRIx64"" -pci_nvme_err_startfail_acq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin completion queue address is misaligned: 0x%"PRIx64"" -pci_nvme_err_startfail_page_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too small: log2size=%u, min=%u" -pci_nvme_err_startfail_page_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too large: log2size=%u, max=%u" -pci_nvme_err_startfail_cqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too small: log2size=%u, min=%u" -pci_nvme_err_startfail_cqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too large: log2size=%u, max=%u" -pci_nvme_err_startfail_sqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too small: log2size=%u, min=%u" -pci_nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too large: log2size=%u, max=%u" -pci_nvme_err_startfail_css(uint8_t css) "nvme_start_ctrl failed because invalid command set selected:%u" -pci_nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero" -pci_nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero" -pci_nvme_err_startfail_zasl_too_small(uint32_t zasl, uint32_t pagesz) "nvme_start_ctrl failed because zone append size limit %"PRIu32" is too small, needs to be >= %"PRIu32"" -pci_nvme_err_startfail(void) "setting controller enable bit failed" -pci_nvme_err_invalid_mgmt_action(uint8_t action) "action=0x%"PRIx8"" - -# Traces for undefined behavior -pci_nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64"" -pci_nvme_ub_mmiowr_toosmall(uint64_t offset, unsigned size) "MMIO write smaller than 32 bits, offset=0x%"PRIx64", size=%u" -pci_nvme_ub_mmiowr_intmask_with_msix(void) "undefined access to interrupt mask set when MSI-X is enabled" -pci_nvme_ub_mmiowr_ro_csts(void) "attempted to set a read only bit of controller status" -pci_nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CAP.NSSRS is zero (not supported)" -pci_nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)" -pci_nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored" -pci_nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored" -pci_nvme_ub_mmiowr_pmrcap_readonly(void) "invalid write to read only PMRCAP, ignored" -pci_nvme_ub_mmiowr_pmrsts_readonly(void) "invalid write to read only PMRSTS, ignored" -pci_nvme_ub_mmiowr_pmrebs_readonly(void) "invalid write to read only PMREBS, ignored" -pci_nvme_ub_mmiowr_pmrswtp_readonly(void) "invalid write to read only PMRSWTP, ignored" -pci_nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64"" -pci_nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64"" -pci_nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64"" -pci_nvme_ub_mmiord_invalid_ofs(uint64_t offset) "MMIO read beyond last register, offset=0x%"PRIx64", returning 0" -pci_nvme_ub_db_wr_misaligned(uint64_t offset) "doorbell write not 32-bit aligned, offset=0x%"PRIx64", ignoring" -pci_nvme_ub_db_wr_invalid_cq(uint32_t qid) "completion queue doorbell write for nonexistent queue, cqid=%"PRIu32", ignoring" -pci_nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion queue doorbell write value beyond queue size, cqid=%"PRIu32", new_head=%"PRIu16", ignoring" -pci_nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring" -pci_nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring" -pci_nvme_ub_unknown_css_value(void) "unknown value in cc.css field" - # xen-block.c xen_block_realize(const char *type, uint32_t disk, uint32_t partition) "%s d%up%u" xen_block_connect(const char *type, uint32_t disk, uint32_t partition) "%s d%up%u" diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index f5e9682703..c6210fad0c 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -47,9 +47,13 @@ static const int user_feature_bits[] = { VIRTIO_RING_F_INDIRECT_DESC, VIRTIO_RING_F_EVENT_IDX, VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_F_RING_PACKED, + VIRTIO_F_IOMMU_PLATFORM, VHOST_INVALID_FEATURE_BIT }; +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event); + static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config) { VHostUserBlk *s = VHOST_USER_BLK(vdev); @@ -309,7 +313,7 @@ static void vhost_user_blk_reset(VirtIODevice *vdev) vhost_dev_free_inflight(s->inflight); } -static int vhost_user_blk_connect(DeviceState *dev) +static int vhost_user_blk_connect(DeviceState *dev, Error **errp) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); VHostUserBlk *s = VHOST_USER_BLK(vdev); @@ -320,6 +324,7 @@ static int vhost_user_blk_connect(DeviceState *dev) } s->connected = true; + s->dev.num_queues = s->num_queues; s->dev.nvqs = s->num_queues; s->dev.vqs = s->vhost_vqs; s->dev.vq_index = 0; @@ -329,8 +334,7 @@ static int vhost_user_blk_connect(DeviceState *dev) ret = vhost_dev_init(&s->dev, &s->vhost_user, VHOST_BACKEND_TYPE_USER, 0); if (ret < 0) { - error_report("vhost-user-blk: vhost initialization failed: %s", - strerror(-ret)); + error_setg_errno(errp, -ret, "vhost initialization failed"); return ret; } @@ -338,8 +342,7 @@ static int vhost_user_blk_connect(DeviceState *dev) if (virtio_device_started(vdev, vdev->status)) { ret = vhost_user_blk_start(vdev); if (ret < 0) { - error_report("vhost-user-blk: vhost start failed: %s", - strerror(-ret)); + error_setg_errno(errp, -ret, "vhost start failed"); return ret; } } @@ -362,19 +365,6 @@ static void vhost_user_blk_disconnect(DeviceState *dev) vhost_dev_cleanup(&s->dev); } -static void vhost_user_blk_event(void *opaque, QEMUChrEvent event, - bool realized); - -static void vhost_user_blk_event_realize(void *opaque, QEMUChrEvent event) -{ - vhost_user_blk_event(opaque, event, false); -} - -static void vhost_user_blk_event_oper(void *opaque, QEMUChrEvent event) -{ - vhost_user_blk_event(opaque, event, true); -} - static void vhost_user_blk_chr_closed_bh(void *opaque) { DeviceState *dev = opaque; @@ -382,36 +372,27 @@ static void vhost_user_blk_chr_closed_bh(void *opaque) VHostUserBlk *s = VHOST_USER_BLK(vdev); vhost_user_blk_disconnect(dev); - qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, - vhost_user_blk_event_oper, NULL, opaque, NULL, true); + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, + NULL, opaque, NULL, true); } -static void vhost_user_blk_event(void *opaque, QEMUChrEvent event, - bool realized) +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event) { DeviceState *dev = opaque; VirtIODevice *vdev = VIRTIO_DEVICE(dev); VHostUserBlk *s = VHOST_USER_BLK(vdev); + Error *local_err = NULL; switch (event) { case CHR_EVENT_OPENED: - if (vhost_user_blk_connect(dev) < 0) { + if (vhost_user_blk_connect(dev, &local_err) < 0) { + error_report_err(local_err); qemu_chr_fe_disconnect(&s->chardev); return; } break; case CHR_EVENT_CLOSED: - /* - * Closing the connection should happen differently on device - * initialization and operation stages. - * On initalization, we want to re-start vhost_dev initialization - * from the very beginning right away when the connection is closed, - * so we clean up vhost_dev on each connection closing. - * On operation, we want to postpone vhost_dev cleanup to let the - * other code perform its own cleanup sequence using vhost_dev data - * (e.g. vhost_dev_set_log). - */ - if (realized && !runstate_check(RUN_STATE_SHUTDOWN)) { + if (!runstate_check(RUN_STATE_SHUTDOWN)) { /* * A close event may happen during a read/write, but vhost * code assumes the vhost_dev remains setup, so delay the @@ -431,8 +412,6 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event, * knowing its type (in this case vhost-user). */ s->dev.started = false; - } else { - vhost_user_blk_disconnect(dev); } break; case CHR_EVENT_BREAK: @@ -447,11 +426,10 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); VHostUserBlk *s = VHOST_USER_BLK(vdev); - Error *err = NULL; int i, ret; if (!s->chardev.chr) { - error_setg(errp, "vhost-user-blk: chardev is mandatory"); + error_setg(errp, "chardev is mandatory"); return; } @@ -459,16 +437,16 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) s->num_queues = 1; } if (!s->num_queues || s->num_queues > VIRTIO_QUEUE_MAX) { - error_setg(errp, "vhost-user-blk: invalid number of IO queues"); + error_setg(errp, "invalid number of IO queues"); return; } if (!s->queue_size) { - error_setg(errp, "vhost-user-blk: queue size must be non-zero"); + error_setg(errp, "queue size must be non-zero"); return; } if (s->queue_size > VIRTQUEUE_MAX_SIZE) { - error_setg(errp, "vhost-user-blk: queue size must not exceed %d", + error_setg(errp, "queue size must not exceed %d", VIRTQUEUE_MAX_SIZE); return; } @@ -490,34 +468,31 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues); s->connected = false; - qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, - vhost_user_blk_event_realize, NULL, (void *)dev, - NULL, true); - -reconnect: - if (qemu_chr_fe_wait_connected(&s->chardev, &err) < 0) { - error_report_err(err); + if (qemu_chr_fe_wait_connected(&s->chardev, errp) < 0) { goto virtio_err; } - /* check whether vhost_user_blk_connect() failed or not */ - if (!s->connected) { - goto reconnect; + if (vhost_user_blk_connect(dev, errp) < 0) { + qemu_chr_fe_disconnect(&s->chardev); + goto virtio_err; } + assert(s->connected); ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg, sizeof(struct virtio_blk_config)); if (ret < 0) { - error_report("vhost-user-blk: get block config failed"); - goto reconnect; + error_setg(errp, "vhost-user-blk: get block config failed"); + goto vhost_err; } - /* we're fully initialized, now we can operate, so change the handler */ + /* we're fully initialized, now we can operate, so add the handler */ qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, - vhost_user_blk_event_oper, NULL, (void *)dev, + vhost_user_blk_event, NULL, (void *)dev, NULL, true); return; +vhost_err: + vhost_dev_cleanup(&s->dev); virtio_err: g_free(s->vhost_vqs); s->vhost_vqs = NULL; diff --git a/hw/meson.build b/hw/meson.build index 6bdbae0e81..ba0601e36e 100644 --- a/hw/meson.build +++ b/hw/meson.build @@ -21,6 +21,7 @@ subdir('mem') subdir('misc') subdir('net') subdir('nubus') +subdir('nvme') subdir('nvram') subdir('pci') subdir('pci-bridge') diff --git a/hw/nvme/Kconfig b/hw/nvme/Kconfig new file mode 100644 index 0000000000..8ac90942e5 --- /dev/null +++ b/hw/nvme/Kconfig @@ -0,0 +1,4 @@ +config NVME_PCI + bool + default y if PCI_DEVICES + depends on PCI diff --git a/hw/block/nvme.c b/hw/nvme/ctrl.c index 5fe082ec34..0bcaf7192f 100644 --- a/hw/block/nvme.c +++ b/hw/nvme/ctrl.c @@ -12,10 +12,19 @@ * Reference Specs: http://www.nvmexpress.org, 1.4, 1.3, 1.2, 1.1, 1.0e * * https://nvmexpress.org/developers/nvme-specification/ - */ - -/** - * Usage: add options: + * + * + * Notes on coding style + * --------------------- + * While QEMU coding style prefers lowercase hexadecimals in constants, the + * NVMe subsystem use thes format from the NVMe specifications in the comments + * (i.e. 'h' suffix instead of '0x' prefix). + * + * Usage + * ----- + * See docs/system/nvme.rst for extensive documentation. + * + * Add options: * -drive file=<file>,if=none,id=<drive_id> * -device nvme-subsys,id=<subsys_id>,nqn=<nqn_id> * -device nvme,serial=<serial>,id=<bus_name>, \ @@ -135,26 +144,20 @@ */ #include "qemu/osdep.h" -#include "qemu/units.h" +#include "qemu/cutils.h" #include "qemu/error-report.h" -#include "hw/block/block.h" -#include "hw/pci/msix.h" -#include "hw/pci/pci.h" -#include "hw/qdev-properties.h" -#include "migration/vmstate.h" -#include "sysemu/sysemu.h" +#include "qemu/log.h" +#include "qemu/units.h" #include "qapi/error.h" #include "qapi/visitor.h" -#include "sysemu/hostmem.h" +#include "sysemu/sysemu.h" #include "sysemu/block-backend.h" -#include "exec/memory.h" -#include "qemu/log.h" -#include "qemu/module.h" -#include "qemu/cutils.h" -#include "trace.h" +#include "sysemu/hostmem.h" +#include "hw/pci/msix.h" +#include "migration/vmstate.h" + #include "nvme.h" -#include "nvme-ns.h" -#include "nvme-dif.h" +#include "trace.h" #define NVME_MAX_IOQPAIRS 0xffff #define NVME_DB_SIZE 4 @@ -165,6 +168,7 @@ #define NVME_TEMPERATURE_WARNING 0x157 #define NVME_TEMPERATURE_CRITICAL 0x175 #define NVME_NUM_FW_SLOTS 1 +#define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB) #define NVME_GUEST_ERR(trace, fmt, ...) \ do { \ @@ -185,6 +189,7 @@ static const bool nvme_feature_support[NVME_FID_MAX] = { [NVME_WRITE_ATOMICITY] = true, [NVME_ASYNCHRONOUS_EVENT_CONF] = true, [NVME_TIMESTAMP] = true, + [NVME_COMMAND_SET_PROFILE] = true, }; static const uint32_t nvme_feature_cap[NVME_FID_MAX] = { @@ -194,6 +199,7 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = { [NVME_NUMBER_OF_QUEUES] = NVME_FEAT_CAP_CHANGE, [NVME_ASYNCHRONOUS_EVENT_CONF] = NVME_FEAT_CAP_CHANGE, [NVME_TIMESTAMP] = NVME_FEAT_CAP_CHANGE, + [NVME_COMMAND_SET_PROFILE] = NVME_FEAT_CAP_CHANGE, }; static const uint32_t nvme_cse_acs[256] = { @@ -387,7 +393,8 @@ static int nvme_addr_write(NvmeCtrl *n, hwaddr addr, void *buf, int size) static bool nvme_nsid_valid(NvmeCtrl *n, uint32_t nsid) { - return nsid && (nsid == NVME_NSID_BROADCAST || nsid <= n->num_namespaces); + return nsid && + (nsid == NVME_NSID_BROADCAST || nsid <= NVME_MAX_NAMESPACES); } static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid) @@ -511,9 +518,7 @@ static void nvme_sg_split(NvmeSg *sg, NvmeNamespace *ns, NvmeSg *data, NvmeSg *mdata) { NvmeSg *dst = data; - size_t size = nvme_lsize(ns); - size_t msize = nvme_msize(ns); - uint32_t trans_len, count = size; + uint32_t trans_len, count = ns->lbasz; uint64_t offset = 0; bool dma = sg->flags & NVME_SG_DMA; size_t sge_len; @@ -545,7 +550,7 @@ static void nvme_sg_split(NvmeSg *sg, NvmeNamespace *ns, NvmeSg *data, if (count == 0) { dst = (dst == data) ? mdata : data; - count = (dst == data) ? size : msize; + count = (dst == data) ? ns->lbasz : ns->lbaf.ms; } if (sge_len == offset) { @@ -574,7 +579,7 @@ static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr, } static uint16_t nvme_map_addr_pmr(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr, - size_t len) + size_t len) { if (!len) { return NVME_SUCCESS; @@ -1004,7 +1009,7 @@ static uint16_t nvme_map_data(NvmeCtrl *n, uint32_t nlb, NvmeRequest *req) uint16_t status; if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) && - (ctrl & NVME_RW_PRINFO_PRACT && nvme_msize(ns) == 8)) { + (ctrl & NVME_RW_PRINFO_PRACT && ns->lbaf.ms == 8)) { goto out; } @@ -1187,12 +1192,9 @@ uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len, uint16_t ctrl = le16_to_cpu(rw->control); if (nvme_ns_ext(ns) && - !(ctrl & NVME_RW_PRINFO_PRACT && nvme_msize(ns) == 8)) { - size_t lsize = nvme_lsize(ns); - size_t msize = nvme_msize(ns); - - return nvme_tx_interleaved(n, &req->sg, ptr, len, lsize, msize, 0, - dir); + !(ctrl & NVME_RW_PRINFO_PRACT && ns->lbaf.ms == 8)) { + return nvme_tx_interleaved(n, &req->sg, ptr, len, ns->lbasz, + ns->lbaf.ms, 0, dir); } return nvme_tx(n, &req->sg, ptr, len, dir); @@ -1205,11 +1207,8 @@ uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len, uint16_t status; if (nvme_ns_ext(ns)) { - size_t lsize = nvme_lsize(ns); - size_t msize = nvme_msize(ns); - - return nvme_tx_interleaved(n, &req->sg, ptr, len, msize, lsize, lsize, - dir); + return nvme_tx_interleaved(n, &req->sg, ptr, len, ns->lbaf.ms, + ns->lbasz, ns->lbasz, dir); } nvme_sg_unmap(&req->sg); @@ -1426,6 +1425,7 @@ static inline uint16_t nvme_check_bounds(NvmeNamespace *ns, uint64_t slba, uint64_t nsze = le64_to_cpu(ns->id_ns.nsze); if (unlikely(UINT64_MAX - slba < nlb || slba + nlb > nsze)) { + trace_pci_nvme_err_invalid_lba_range(slba, nlb, nsze); return NVME_LBA_RANGE | NVME_DNR; } @@ -1682,8 +1682,12 @@ static void nvme_zrm_auto_transition_zone(NvmeNamespace *ns) } } -static uint16_t __nvme_zrm_open(NvmeNamespace *ns, NvmeZone *zone, - bool implicit) +enum { + NVME_ZRM_AUTO = 1 << 0, +}; + +static uint16_t nvme_zrm_open_flags(NvmeNamespace *ns, NvmeZone *zone, + int flags) { int act = 0; uint16_t status; @@ -1707,7 +1711,7 @@ static uint16_t __nvme_zrm_open(NvmeNamespace *ns, NvmeZone *zone, nvme_aor_inc_open(ns); - if (implicit) { + if (flags & NVME_ZRM_AUTO) { nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_IMPLICITLY_OPEN); return NVME_SUCCESS; } @@ -1715,7 +1719,7 @@ static uint16_t __nvme_zrm_open(NvmeNamespace *ns, NvmeZone *zone, /* fallthrough */ case NVME_ZONE_STATE_IMPLICITLY_OPEN: - if (implicit) { + if (flags & NVME_ZRM_AUTO) { return NVME_SUCCESS; } @@ -1733,16 +1737,16 @@ static uint16_t __nvme_zrm_open(NvmeNamespace *ns, NvmeZone *zone, static inline uint16_t nvme_zrm_auto(NvmeNamespace *ns, NvmeZone *zone) { - return __nvme_zrm_open(ns, zone, true); + return nvme_zrm_open_flags(ns, zone, NVME_ZRM_AUTO); } static inline uint16_t nvme_zrm_open(NvmeNamespace *ns, NvmeZone *zone) { - return __nvme_zrm_open(ns, zone, false); + return nvme_zrm_open_flags(ns, zone, 0); } -static void __nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone, - uint32_t nlb) +static void nvme_advance_zone_wp(NvmeNamespace *ns, NvmeZone *zone, + uint32_t nlb) { zone->d.wp += nlb; @@ -1762,7 +1766,7 @@ static void nvme_finalize_zoned_write(NvmeNamespace *ns, NvmeRequest *req) nlb = le16_to_cpu(rw->nlb) + 1; zone = nvme_get_zone_by_slba(ns, slba); - __nvme_advance_zone_wp(ns, zone, nlb); + nvme_advance_zone_wp(ns, zone, nlb); } static inline bool nvme_is_write(NvmeRequest *req) @@ -1832,11 +1836,11 @@ static void nvme_rw_cb(void *opaque, int ret) goto out; } - if (nvme_msize(ns)) { + if (ns->lbaf.ms) { NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; uint64_t slba = le64_to_cpu(rw->slba); uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1; - uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba); + uint64_t offset = nvme_moff(ns, slba); if (req->cmd.opcode == NVME_CMD_WRITE_ZEROES) { size_t mlen = nvme_m2b(ns, nlb); @@ -2002,7 +2006,7 @@ static void nvme_verify_mdata_in_cb(void *opaque, int ret) uint64_t slba = le64_to_cpu(rw->slba); uint32_t nlb = le16_to_cpu(rw->nlb) + 1; size_t mlen = nvme_m2b(ns, nlb); - uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba); + uint64_t offset = nvme_moff(ns, slba); BlockBackend *blk = ns->blkconf.blk; trace_pci_nvme_verify_mdata_in_cb(nvme_cid(req), blk_name(blk)); @@ -2104,8 +2108,8 @@ static void nvme_aio_zone_reset_cb(void *opaque, int ret) goto out; } - if (nvme_msize(ns)) { - int64_t offset = ns->mdata_offset + nvme_m2b(ns, zone->d.zslba); + if (ns->lbaf.ms) { + int64_t offset = nvme_moff(ns, zone->d.zslba); blk_aio_pwrite_zeroes(ns->blkconf.blk, offset, nvme_m2b(ns, ns->zone_size), BDRV_REQ_MAY_UNMAP, @@ -2151,7 +2155,7 @@ out: uint64_t sdlba = le64_to_cpu(copy->sdlba); NvmeZone *zone = nvme_get_zone_by_slba(ns, sdlba); - __nvme_advance_zone_wp(ns, zone, ctx->nlb); + nvme_advance_zone_wp(ns, zone, ctx->nlb); } g_free(ctx->bounce); @@ -2173,10 +2177,10 @@ static void nvme_copy_cb(void *opaque, int ret) goto out; } - if (nvme_msize(ns)) { + if (ns->lbaf.ms) { NvmeCopyCmd *copy = (NvmeCopyCmd *)&req->cmd; uint64_t sdlba = le64_to_cpu(copy->sdlba); - int64_t offset = ns->mdata_offset + nvme_m2b(ns, sdlba); + int64_t offset = nvme_moff(ns, sdlba); qemu_iovec_reset(&req->sg.iov); qemu_iovec_add(&req->sg.iov, ctx->mbounce, nvme_m2b(ns, ctx->nlb)); @@ -2268,7 +2272,6 @@ static void nvme_copy_in_complete(NvmeRequest *req) status = nvme_check_bounds(ns, sdlba, ctx->nlb); if (status) { - trace_pci_nvme_err_invalid_lba_range(sdlba, ctx->nlb, ns->id_ns.nsze); goto invalid; } @@ -2369,10 +2372,19 @@ static void nvme_compare_mdata_cb(void *opaque, int ret) uint32_t reftag = le32_to_cpu(rw->reftag); struct nvme_compare_ctx *ctx = req->opaque; g_autofree uint8_t *buf = NULL; + BlockBackend *blk = ns->blkconf.blk; + BlockAcctCookie *acct = &req->acct; + BlockAcctStats *stats = blk_get_stats(blk); uint16_t status = NVME_SUCCESS; trace_pci_nvme_compare_mdata_cb(nvme_cid(req)); + if (ret) { + block_acct_failed(stats, acct); + nvme_aio_err(req, ret); + goto out; + } + buf = g_malloc(ctx->mdata.iov.size); status = nvme_bounce_mdata(n, buf, ctx->mdata.iov.size, @@ -2387,7 +2399,6 @@ static void nvme_compare_mdata_cb(void *opaque, int ret) uint8_t *bufp; uint8_t *mbufp = ctx->mdata.bounce; uint8_t *end = mbufp + ctx->mdata.iov.size; - size_t msize = nvme_msize(ns); int16_t pil = 0; status = nvme_dif_check(ns, ctx->data.bounce, ctx->data.iov.size, @@ -2403,11 +2414,11 @@ static void nvme_compare_mdata_cb(void *opaque, int ret) * tuple. */ if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { - pil = nvme_msize(ns) - sizeof(NvmeDifTuple); + pil = ns->lbaf.ms - sizeof(NvmeDifTuple); } - for (bufp = buf; mbufp < end; bufp += msize, mbufp += msize) { - if (memcmp(bufp + pil, mbufp + pil, msize - pil)) { + for (bufp = buf; mbufp < end; bufp += ns->lbaf.ms, mbufp += ns->lbaf.ms) { + if (memcmp(bufp + pil, mbufp + pil, ns->lbaf.ms - pil)) { req->status = NVME_CMP_FAILURE; goto out; } @@ -2421,6 +2432,8 @@ static void nvme_compare_mdata_cb(void *opaque, int ret) goto out; } + block_acct_done(stats, acct); + out: qemu_iovec_destroy(&ctx->data.iov); g_free(ctx->data.bounce); @@ -2468,12 +2481,12 @@ static void nvme_compare_data_cb(void *opaque, int ret) goto out; } - if (nvme_msize(ns)) { + if (ns->lbaf.ms) { NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; uint64_t slba = le64_to_cpu(rw->slba); uint32_t nlb = le16_to_cpu(rw->nlb) + 1; size_t mlen = nvme_m2b(ns, nlb); - uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba); + uint64_t offset = nvme_moff(ns, slba); ctx->mdata.bounce = g_malloc(mlen); @@ -2530,8 +2543,6 @@ static uint16_t nvme_dsm(NvmeCtrl *n, NvmeRequest *req) uint32_t nlb = le32_to_cpu(range[i].nlb); if (nvme_check_bounds(ns, slba, nlb)) { - trace_pci_nvme_err_invalid_lba_range(slba, nlb, - ns->id_ns.nsze); continue; } @@ -2604,7 +2615,6 @@ static uint16_t nvme_verify(NvmeCtrl *n, NvmeRequest *req) status = nvme_check_bounds(ns, slba, nlb); if (status) { - trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); return status; } @@ -2689,7 +2699,6 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) status = nvme_check_bounds(ns, slba, _nlb); if (status) { - trace_pci_nvme_err_invalid_lba_range(slba, _nlb, ns->id_ns.nsze); goto out; } @@ -2716,7 +2725,7 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) } bounce = bouncep = g_malloc(nvme_l2b(ns, nlb)); - if (nvme_msize(ns)) { + if (ns->lbaf.ms) { mbounce = mbouncep = g_malloc(nvme_m2b(ns, nlb)); } @@ -2752,9 +2761,9 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) bouncep += len; - if (nvme_msize(ns)) { + if (ns->lbaf.ms) { len = nvme_m2b(ns, nlb); - offset = ns->mdata_offset + nvme_m2b(ns, slba); + offset = nvme_moff(ns, slba); in_ctx = g_new(struct nvme_copy_in_ctx, 1); in_ctx->req = req; @@ -2818,7 +2827,6 @@ static uint16_t nvme_compare(NvmeCtrl *n, NvmeRequest *req) status = nvme_check_bounds(ns, slba, nlb); if (status) { - trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); return status; } @@ -2875,7 +2883,7 @@ static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req) /* 1-initialize; see comment in nvme_dsm */ *num_flushes = 1; - for (int i = 1; i <= n->num_namespaces; i++) { + for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) { ns = nvme_ns(n, i); if (!ns) { continue; @@ -2923,7 +2931,7 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req) if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { bool pract = ctrl & NVME_RW_PRINFO_PRACT; - if (pract && nvme_msize(ns) == 8) { + if (pract && ns->lbaf.ms == 8) { mapped_size = data_size; } } @@ -2938,7 +2946,6 @@ static uint16_t nvme_read(NvmeCtrl *n, NvmeRequest *req) status = nvme_check_bounds(ns, slba, nlb); if (status) { - trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); goto invalid; } @@ -3000,7 +3007,7 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, if (NVME_ID_NS_DPS_TYPE(ns->id_ns.dps)) { bool pract = ctrl & NVME_RW_PRINFO_PRACT; - if (pract && nvme_msize(ns) == 8) { + if (pract && ns->lbaf.ms == 8) { mapped_size -= nvme_m2b(ns, nlb); } } @@ -3018,7 +3025,6 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest *req, bool append, status = nvme_check_bounds(ns, slba, nlb); if (status) { - trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); goto invalid; } @@ -3595,8 +3601,8 @@ static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, NvmeRequest *req) static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) { + NvmeNamespace *ns; uint32_t nsid = le32_to_cpu(req->cmd.nsid); - uint16_t status; trace_pci_nvme_io_cmd(nvme_cid(req), nsid, nvme_sqid(req), req->cmd.opcode, nvme_io_opc_str(req->cmd.opcode)); @@ -3607,18 +3613,18 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) /* * In the base NVM command set, Flush may apply to all namespaces - * (indicated by NSID being set to 0xFFFFFFFF). But if that feature is used + * (indicated by NSID being set to FFFFFFFFh). But if that feature is used * along with TP 4056 (Namespace Types), it may be pretty screwed up. * - * If NSID is indeed set to 0xFFFFFFFF, we simply cannot associate the + * If NSID is indeed set to FFFFFFFFh, we simply cannot associate the * opcode with a specific command since we cannot determine a unique I/O - * command set. Opcode 0x0 could have any other meaning than something + * command set. Opcode 0h could have any other meaning than something * equivalent to flushing and say it DOES have completely different - * semantics in some other command set - does an NSID of 0xFFFFFFFF then + * semantics in some other command set - does an NSID of FFFFFFFFh then * mean "for all namespaces, apply whatever command set specific command - * that uses the 0x0 opcode?" Or does it mean "for all namespaces, apply - * whatever command that uses the 0x0 opcode if, and only if, it allows - * NSID to be 0xFFFFFFFF"? + * that uses the 0h opcode?" Or does it mean "for all namespaces, apply + * whatever command that uses the 0h opcode if, and only if, it allows NSID + * to be FFFFFFFFh"? * * Anyway (and luckily), for now, we do not care about this since the * device only supports namespace types that includes the NVM Flush command @@ -3628,21 +3634,22 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req) return nvme_flush(n, req); } - req->ns = nvme_ns(n, nsid); - if (unlikely(!req->ns)) { + ns = nvme_ns(n, nsid); + if (unlikely(!ns)) { return NVME_INVALID_FIELD | NVME_DNR; } - if (!(req->ns->iocs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) { + if (!(ns->iocs[req->cmd.opcode] & NVME_CMD_EFF_CSUPP)) { trace_pci_nvme_err_invalid_opc(req->cmd.opcode); return NVME_INVALID_OPCODE | NVME_DNR; } - status = nvme_ns_status(req->ns); - if (unlikely(status)) { - return status; + if (ns->status) { + return ns->status; } + req->ns = ns; + switch (req->cmd.opcode) { case NVME_CMD_WRITE_ZEROES: return nvme_write_zeroes(n, req); @@ -3844,7 +3851,7 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, } else { int i; - for (i = 1; i <= n->num_namespaces; i++) { + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { ns = nvme_ns(n, i); if (!ns) { continue; @@ -3934,7 +3941,7 @@ static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, NVME_CHANGED_NSID_SIZE) { /* * If more than 1024 namespaces, the first entry in the log page should - * be set to 0xffffffff and the others to 0 as spec. + * be set to FFFFFFFFh and the others to 0 as spec. */ if (i == ARRAY_SIZE(nslist)) { memset(nslist, 0x0, sizeof(nslist)); @@ -4332,7 +4339,7 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req, trace_pci_nvme_identify_nslist(min_nsid); /* - * Both 0xffffffff (NVME_NSID_BROADCAST) and 0xfffffffe are invalid values + * Both FFFFFFFFh (NVME_NSID_BROADCAST) and FFFFFFFFEh are invalid values * since the Active Namespace ID List should return namespaces with ids * *higher* than the NSID specified in the command. This is also specified * in the spec (NVM Express v1.3d, Section 5.15.4). @@ -4341,7 +4348,7 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req, return NVME_INVALID_NSID | NVME_DNR; } - for (i = 1; i <= n->num_namespaces; i++) { + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { ns = nvme_ns(n, i); if (!ns) { if (!active) { @@ -4379,7 +4386,7 @@ static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req, trace_pci_nvme_identify_nslist_csi(min_nsid, c->csi); /* - * Same as in nvme_identify_nslist(), 0xffffffff/0xfffffffe are invalid. + * Same as in nvme_identify_nslist(), FFFFFFFFh/FFFFFFFFEh are invalid. */ if (min_nsid >= NVME_NSID_BROADCAST - 1) { return NVME_INVALID_NSID | NVME_DNR; @@ -4389,7 +4396,7 @@ static uint16_t nvme_identify_nslist_csi(NvmeCtrl *n, NvmeRequest *req, return NVME_INVALID_FIELD | NVME_DNR; } - for (i = 1; i <= n->num_namespaces; i++) { + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { ns = nvme_ns(n, i); if (!ns) { if (!active) { @@ -4446,7 +4453,7 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req) /* * Because the NGUID and EUI64 fields are 0 in the Identify Namespace data - * structure, a Namespace UUID (nidt = 0x3) must be reported in the + * structure, a Namespace UUID (nidt = 3h) must be reported in the * Namespace Identification Descriptor. Add the namespace UUID here. */ ns_descrs->uuid.hdr.nidt = NVME_NIDT_UUID; @@ -4595,7 +4602,7 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req) /* * The Reservation Notification Mask and Reservation Persistence * features require a status code of Invalid Field in Command when - * NSID is 0xFFFFFFFF. Since the device does not support those + * NSID is FFFFFFFFh. Since the device does not support those * features we can always return Invalid Namespace or Format as we * should do for all other features. */ @@ -4655,7 +4662,7 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req) goto out; case NVME_VOLATILE_WRITE_CACHE: result = 0; - for (i = 1; i <= n->num_namespaces; i++) { + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { ns = nvme_ns(n, i); if (!ns) { continue; @@ -4707,9 +4714,6 @@ defaults: result |= NVME_INTVC_NOCOALESCING; } break; - case NVME_COMMAND_SET_PROFILE: - result = 0; - break; default: result = nvme_feature_default[fid]; break; @@ -4805,7 +4809,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req) break; case NVME_ERROR_RECOVERY: if (nsid == NVME_NSID_BROADCAST) { - for (i = 1; i <= n->num_namespaces; i++) { + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { ns = nvme_ns(n, i); if (!ns) { @@ -4826,7 +4830,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req) } break; case NVME_VOLATILE_WRITE_CACHE: - for (i = 1; i <= n->num_namespaces; i++) { + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { ns = nvme_ns(n, i); if (!ns) { continue; @@ -4847,15 +4851,15 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req) } /* - * NVMe v1.3, Section 5.21.1.7: 0xffff is not an allowed value for NCQR + * NVMe v1.3, Section 5.21.1.7: FFFFh is not an allowed value for NCQR * and NSQR. */ if ((dw11 & 0xffff) == 0xffff || ((dw11 >> 16) & 0xffff) == 0xffff) { return NVME_INVALID_FIELD | NVME_DNR; } - trace_pci_nvme_setfeat_numq((dw11 & 0xFFFF) + 1, - ((dw11 >> 16) & 0xFFFF) + 1, + trace_pci_nvme_setfeat_numq((dw11 & 0xffff) + 1, + ((dw11 >> 16) & 0xffff) + 1, n->params.max_ioqpairs, n->params.max_ioqpairs); req->cqe.result = cpu_to_le32((n->params.max_ioqpairs - 1) | @@ -4912,7 +4916,25 @@ static void nvme_update_dmrsl(NvmeCtrl *n) } } -static void __nvme_select_ns_iocs(NvmeCtrl *n, NvmeNamespace *ns); +static void nvme_select_iocs_ns(NvmeCtrl *n, NvmeNamespace *ns) +{ + ns->iocs = nvme_cse_iocs_none; + switch (ns->csi) { + case NVME_CSI_NVM: + if (NVME_CC_CSS(n->bar.cc) != NVME_CC_CSS_ADMIN_ONLY) { + ns->iocs = nvme_cse_iocs_nvm; + } + break; + case NVME_CSI_ZONED: + if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_CSI) { + ns->iocs = nvme_cse_iocs_zoned; + } else if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_NVM) { + ns->iocs = nvme_cse_iocs_nvm; + } + break; + } +} + static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req) { NvmeNamespace *ns; @@ -4963,13 +4985,13 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req) } nvme_attach_ns(ctrl, ns); - __nvme_select_ns_iocs(ctrl, ns); + nvme_select_iocs_ns(ctrl, ns); } else { if (!nvme_ns(ctrl, nsid)) { return NVME_NS_NOT_ATTACHED | NVME_DNR; } - ctrl->namespaces[nsid - 1] = NULL; + ctrl->namespaces[nsid] = NULL; ns->attached--; nvme_update_dmrsl(ctrl); @@ -5101,7 +5123,7 @@ static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req) req->status = status; } } else { - for (i = 1; i <= n->num_namespaces; i++) { + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { ns = nvme_ns(n, i); if (!ns) { continue; @@ -5212,7 +5234,7 @@ static void nvme_ctrl_reset(NvmeCtrl *n) NvmeNamespace *ns; int i; - for (i = 1; i <= n->num_namespaces; i++) { + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { ns = nvme_ns(n, i); if (!ns) { continue; @@ -5254,7 +5276,7 @@ static void nvme_ctrl_shutdown(NvmeCtrl *n) memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size); } - for (i = 1; i <= n->num_namespaces; i++) { + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { ns = nvme_ns(n, i); if (!ns) { continue; @@ -5264,37 +5286,18 @@ static void nvme_ctrl_shutdown(NvmeCtrl *n) } } -static void __nvme_select_ns_iocs(NvmeCtrl *n, NvmeNamespace *ns) -{ - ns->iocs = nvme_cse_iocs_none; - switch (ns->csi) { - case NVME_CSI_NVM: - if (NVME_CC_CSS(n->bar.cc) != NVME_CC_CSS_ADMIN_ONLY) { - ns->iocs = nvme_cse_iocs_nvm; - } - break; - case NVME_CSI_ZONED: - if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_CSI) { - ns->iocs = nvme_cse_iocs_zoned; - } else if (NVME_CC_CSS(n->bar.cc) == NVME_CC_CSS_NVM) { - ns->iocs = nvme_cse_iocs_nvm; - } - break; - } -} - -static void nvme_select_ns_iocs(NvmeCtrl *n) +static void nvme_select_iocs(NvmeCtrl *n) { NvmeNamespace *ns; int i; - for (i = 1; i <= n->num_namespaces; i++) { + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { ns = nvme_ns(n, i); if (!ns) { continue; } - __nvme_select_ns_iocs(n, ns); + nvme_select_iocs_ns(n, ns); } } @@ -5396,7 +5399,7 @@ static int nvme_start_ctrl(NvmeCtrl *n) QTAILQ_INIT(&n->aer_queue); - nvme_select_ns_iocs(n); + nvme_select_iocs(n); return 0; } @@ -5493,7 +5496,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, n->bar.cc = data; } break; - case 0x1C: /* CSTS */ + case 0x1c: /* CSTS */ if (data & (1 << 4)) { NVME_GUEST_ERR(pci_nvme_ub_mmiowr_ssreset_w1c_unsupported, "attempted to W1C CSTS.NSSRO" @@ -5505,7 +5508,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, } break; case 0x20: /* NSSR */ - if (data == 0x4E564D65) { + if (data == 0x4e564d65) { trace_pci_nvme_ub_mmiowr_ssreset_unsupported(); } else { /* The spec says that writes of other values have no effect */ @@ -5575,11 +5578,11 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, n->bar.cmbmsc = (n->bar.cmbmsc & 0xffffffff) | (data << 32); return; - case 0xE00: /* PMRCAP */ + case 0xe00: /* PMRCAP */ NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrcap_readonly, "invalid write to PMRCAP register, ignored"); return; - case 0xE04: /* PMRCTL */ + case 0xe04: /* PMRCTL */ n->bar.pmrctl = data; if (NVME_PMRCTL_EN(data)) { memory_region_set_enabled(&n->pmr.dev->mr, true); @@ -5590,19 +5593,19 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, n->pmr.cmse = false; } return; - case 0xE08: /* PMRSTS */ + case 0xe08: /* PMRSTS */ NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrsts_readonly, "invalid write to PMRSTS register, ignored"); return; - case 0xE0C: /* PMREBS */ + case 0xe0C: /* PMREBS */ NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrebs_readonly, "invalid write to PMREBS register, ignored"); return; - case 0xE10: /* PMRSWTP */ + case 0xe10: /* PMRSWTP */ NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrswtp_readonly, "invalid write to PMRSWTP register, ignored"); return; - case 0xE14: /* PMRMSCL */ + case 0xe14: /* PMRMSCL */ if (!NVME_CAP_PMRS(n->bar.cap)) { return; } @@ -5622,7 +5625,7 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, } return; - case 0xE18: /* PMRMSCU */ + case 0xe18: /* PMRMSCU */ if (!NVME_CAP_PMRS(n->bar.cap)) { return; } @@ -5664,7 +5667,7 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) * from PMRSTS should ensure prior writes * made it to persistent media */ - if (addr == 0xE08 && + if (addr == 0xe08 && (NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) { memory_region_msync(&n->pmr.dev->mr, 0, n->pmr.dev->size); } @@ -5915,7 +5918,6 @@ static void nvme_check_constraints(NvmeCtrl *n, Error **errp) static void nvme_init_state(NvmeCtrl *n) { - n->num_namespaces = NVME_MAX_NAMESPACES; /* add one to max_ioqpairs to account for the admin queue pair */ n->reg_size = pow2ceil(sizeof(NvmeBar) + 2 * (n->params.max_ioqpairs + 1) * NVME_DB_SIZE); @@ -6096,7 +6098,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->sqes = (0x6 << 4) | 0x6; id->cqes = (0x4 << 4) | 0x4; - id->nn = cpu_to_le32(n->num_namespaces); + id->nn = cpu_to_le32(NVME_MAX_NAMESPACES); id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROES | NVME_ONCS_TIMESTAMP | NVME_ONCS_FEATURES | NVME_ONCS_DSM | NVME_ONCS_COMPARE | NVME_ONCS_COPY); @@ -6161,7 +6163,7 @@ void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns) uint32_t nsid = ns->params.nsid; assert(nsid && nsid <= NVME_MAX_NAMESPACES); - n->namespaces[nsid - 1] = ns; + n->namespaces[nsid] = ns; ns->attached++; n->dmrsl = MIN_NON_ZERO(n->dmrsl, @@ -6215,7 +6217,7 @@ static void nvme_exit(PCIDevice *pci_dev) nvme_ctrl_reset(n); - for (i = 1; i <= n->num_namespaces; i++) { + for (i = 1; i <= NVME_MAX_NAMESPACES; i++) { ns = nvme_ns(n, i); if (!ns) { continue; diff --git a/hw/block/nvme-dif.c b/hw/nvme/dif.c index 81b0a4cb13..88efcbe9bd 100644 --- a/hw/block/nvme-dif.c +++ b/hw/nvme/dif.c @@ -9,13 +9,11 @@ */ #include "qemu/osdep.h" -#include "hw/block/block.h" -#include "sysemu/dma.h" -#include "sysemu/block-backend.h" #include "qapi/error.h" -#include "trace.h" +#include "sysemu/block-backend.h" + #include "nvme.h" -#include "nvme-dif.h" +#include "trace.h" uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint16_t ctrl, uint64_t slba, uint32_t reftag) @@ -46,20 +44,18 @@ void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, uint32_t reftag) { uint8_t *end = buf + len; - size_t lsize = nvme_lsize(ns); - size_t msize = nvme_msize(ns); int16_t pil = 0; if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { - pil = nvme_msize(ns) - sizeof(NvmeDifTuple); + pil = ns->lbaf.ms - sizeof(NvmeDifTuple); } - trace_pci_nvme_dif_pract_generate_dif(len, lsize, lsize + pil, apptag, - reftag); + trace_pci_nvme_dif_pract_generate_dif(len, ns->lbasz, ns->lbasz + pil, + apptag, reftag); - for (; buf < end; buf += lsize, mbuf += msize) { + for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) { NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); - uint16_t crc = crc_t10dif(0x0, buf, lsize); + uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz); if (pil) { crc = crc_t10dif(crc, mbuf, pil); @@ -100,7 +96,7 @@ static uint16_t nvme_dif_prchk(NvmeNamespace *ns, NvmeDifTuple *dif, } if (ctrl & NVME_RW_PRINFO_PRCHK_GUARD) { - uint16_t crc = crc_t10dif(0x0, buf, nvme_lsize(ns)); + uint16_t crc = crc_t10dif(0x0, buf, ns->lbasz); if (pil) { crc = crc_t10dif(crc, mbuf, pil); @@ -139,8 +135,6 @@ uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, uint16_t appmask, uint32_t reftag) { uint8_t *end = buf + len; - size_t lsize = nvme_lsize(ns); - size_t msize = nvme_msize(ns); int16_t pil = 0; uint16_t status; @@ -150,12 +144,12 @@ uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, } if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { - pil = nvme_msize(ns) - sizeof(NvmeDifTuple); + pil = ns->lbaf.ms - sizeof(NvmeDifTuple); } - trace_pci_nvme_dif_check(NVME_RW_PRINFO(ctrl), lsize + pil); + trace_pci_nvme_dif_check(NVME_RW_PRINFO(ctrl), ns->lbasz + pil); - for (; buf < end; buf += lsize, mbuf += msize) { + for (; buf < end; buf += ns->lbasz, mbuf += ns->lbaf.ms) { NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); status = nvme_dif_prchk(ns, dif, buf, mbuf, pil, ctrl, apptag, @@ -178,20 +172,18 @@ uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, BlockBackend *blk = ns->blkconf.blk; BlockDriverState *bs = blk_bs(blk); - size_t msize = nvme_msize(ns); - size_t lsize = nvme_lsize(ns); int64_t moffset = 0, offset = nvme_l2b(ns, slba); uint8_t *mbufp, *end; bool zeroed; int16_t pil = 0; - int64_t bytes = (mlen / msize) * lsize; + int64_t bytes = (mlen / ns->lbaf.ms) << ns->lbaf.ds; int64_t pnum = 0; Error *err = NULL; if (!(ns->id_ns.dps & NVME_ID_NS_DPS_FIRST_EIGHT)) { - pil = nvme_msize(ns) - sizeof(NvmeDifTuple); + pil = ns->lbaf.ms - sizeof(NvmeDifTuple); } do { @@ -213,15 +205,15 @@ uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, if (zeroed) { mbufp = mbuf + moffset; - mlen = (pnum / lsize) * msize; + mlen = (pnum >> ns->lbaf.ds) * ns->lbaf.ms; end = mbufp + mlen; - for (; mbufp < end; mbufp += msize) { + for (; mbufp < end; mbufp += ns->lbaf.ms) { memset(mbufp + pil, 0xff, sizeof(NvmeDifTuple)); } } - moffset += (pnum / lsize) * msize; + moffset += (pnum >> ns->lbaf.ds) * ns->lbaf.ms; offset += pnum; } while (pnum != bytes); @@ -291,7 +283,7 @@ static void nvme_dif_rw_check_cb(void *opaque, int ret) goto out; } - if (ctrl & NVME_RW_PRINFO_PRACT && nvme_msize(ns) == 8) { + if (ctrl & NVME_RW_PRINFO_PRACT && ns->lbaf.ms == 8) { goto out; } @@ -314,7 +306,7 @@ static void nvme_dif_rw_mdata_in_cb(void *opaque, int ret) uint64_t slba = le64_to_cpu(rw->slba); uint32_t nlb = le16_to_cpu(rw->nlb) + 1; size_t mlen = nvme_m2b(ns, nlb); - uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba); + uint64_t offset = nvme_moff(ns, slba); BlockBackend *blk = ns->blkconf.blk; trace_pci_nvme_dif_rw_mdata_in_cb(nvme_cid(req), blk_name(blk)); @@ -343,7 +335,7 @@ static void nvme_dif_rw_mdata_out_cb(void *opaque, int ret) NvmeNamespace *ns = req->ns; NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd; uint64_t slba = le64_to_cpu(rw->slba); - uint64_t offset = ns->mdata_offset + nvme_m2b(ns, slba); + uint64_t offset = nvme_moff(ns, slba); BlockBackend *blk = ns->blkconf.blk; trace_pci_nvme_dif_rw_mdata_out_cb(nvme_cid(req), blk_name(blk)); @@ -395,8 +387,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) if (pract) { uint8_t *mbuf, *end; - size_t msize = nvme_msize(ns); - int16_t pil = msize - sizeof(NvmeDifTuple); + int16_t pil = ns->lbaf.ms - sizeof(NvmeDifTuple); status = nvme_check_prinfo(ns, ctrl, slba, reftag); if (status) { @@ -417,7 +408,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) pil = 0; } - for (; mbuf < end; mbuf += msize) { + for (; mbuf < end; mbuf += ns->lbaf.ms) { NvmeDifTuple *dif = (NvmeDifTuple *)(mbuf + pil); dif->apptag = cpu_to_be16(apptag); @@ -436,7 +427,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) return NVME_NO_COMPLETE; } - if (nvme_ns_ext(ns) && !(pract && nvme_msize(ns) == 8)) { + if (nvme_ns_ext(ns) && !(pract && ns->lbaf.ms == 8)) { mapped_len += mlen; } @@ -470,7 +461,7 @@ uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req) qemu_iovec_init(&ctx->mdata.iov, 1); qemu_iovec_add(&ctx->mdata.iov, ctx->mdata.bounce, mlen); - if (!(pract && nvme_msize(ns) == 8)) { + if (!(pract && ns->lbaf.ms == 8)) { status = nvme_bounce_mdata(n, ctx->mdata.bounce, ctx->mdata.iov.size, NVME_TX_DIRECTION_TO_DEVICE, req); if (status) { diff --git a/hw/nvme/meson.build b/hw/nvme/meson.build new file mode 100644 index 0000000000..3cf40046ee --- /dev/null +++ b/hw/nvme/meson.build @@ -0,0 +1 @@ +softmmu_ss.add(when: 'CONFIG_NVME_PCI', if_true: files('ctrl.c', 'dif.c', 'ns.c', 'subsys.c')) diff --git a/hw/block/nvme-ns.c b/hw/nvme/ns.c index 7bb618f182..992e5a13f5 100644 --- a/hw/block/nvme-ns.c +++ b/hw/nvme/ns.c @@ -14,23 +14,16 @@ #include "qemu/osdep.h" #include "qemu/units.h" -#include "qemu/cutils.h" -#include "qemu/log.h" #include "qemu/error-report.h" -#include "hw/block/block.h" -#include "hw/pci/pci.h" +#include "qapi/error.h" #include "sysemu/sysemu.h" #include "sysemu/block-backend.h" -#include "qapi/error.h" - -#include "hw/qdev-properties.h" -#include "hw/qdev-core.h" -#include "trace.h" #include "nvme.h" -#include "nvme-ns.h" +#include "trace.h" #define MIN_DISCARD_GRANULARITY (4 * KiB) +#define NVME_DEFAULT_ZONE_SIZE (128 * MiB) void nvme_ns_init_format(NvmeNamespace *ns) { @@ -38,7 +31,10 @@ void nvme_ns_init_format(NvmeNamespace *ns) BlockDriverInfo bdi; int npdg, nlbas, ret; - nlbas = nvme_ns_nlbas(ns); + ns->lbaf = id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)]; + ns->lbasz = 1 << ns->lbaf.ds; + + nlbas = ns->size / (ns->lbasz + ns->lbaf.ms); id_ns->nsze = cpu_to_le64(nlbas); @@ -46,13 +42,13 @@ void nvme_ns_init_format(NvmeNamespace *ns) id_ns->ncap = id_ns->nsze; id_ns->nuse = id_ns->ncap; - ns->mdata_offset = nvme_l2b(ns, nlbas); + ns->moff = (int64_t)nlbas << ns->lbaf.ds; - npdg = ns->blkconf.discard_granularity / nvme_lsize(ns); + npdg = ns->blkconf.discard_granularity / ns->lbasz; ret = bdrv_get_info(blk_bs(ns->blkconf.blk), &bdi); if (ret >= 0 && bdi.cluster_size > ns->blkconf.discard_granularity) { - npdg = bdi.cluster_size / nvme_lsize(ns); + npdg = bdi.cluster_size / ns->lbasz; } id_ns->npda = id_ns->npdg = npdg - 1; @@ -170,7 +166,6 @@ static int nvme_ns_init_blk(NvmeNamespace *ns, Error **errp) static int nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp) { uint64_t zone_size, zone_cap; - uint32_t lbasz = nvme_lsize(ns); /* Make sure that the values of ZNS properties are sane */ if (ns->params.zone_size_bs) { @@ -188,14 +183,14 @@ static int nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp) "zone size %"PRIu64"B", zone_cap, zone_size); return -1; } - if (zone_size < lbasz) { + if (zone_size < ns->lbasz) { error_setg(errp, "zone size %"PRIu64"B too small, " - "must be at least %"PRIu32"B", zone_size, lbasz); + "must be at least %zuB", zone_size, ns->lbasz); return -1; } - if (zone_cap < lbasz) { + if (zone_cap < ns->lbasz) { error_setg(errp, "zone capacity %"PRIu64"B too small, " - "must be at least %"PRIu32"B", zone_cap, lbasz); + "must be at least %zuB", zone_cap, ns->lbasz); return -1; } @@ -203,9 +198,9 @@ static int nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp) * Save the main zone geometry values to avoid * calculating them later again. */ - ns->zone_size = zone_size / lbasz; - ns->zone_capacity = zone_cap / lbasz; - ns->num_zones = nvme_ns_nlbas(ns) / ns->zone_size; + ns->zone_size = zone_size / ns->lbasz; + ns->zone_capacity = zone_cap / ns->lbasz; + ns->num_zones = le64_to_cpu(ns->id_ns.nsze) / ns->zone_size; /* Do a few more sanity checks of ZNS properties */ if (!ns->num_zones) { @@ -215,43 +210,6 @@ static int nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp) return -1; } - if (ns->params.max_open_zones > ns->num_zones) { - error_setg(errp, - "max_open_zones value %u exceeds the number of zones %u", - ns->params.max_open_zones, ns->num_zones); - return -1; - } - if (ns->params.max_active_zones > ns->num_zones) { - error_setg(errp, - "max_active_zones value %u exceeds the number of zones %u", - ns->params.max_active_zones, ns->num_zones); - return -1; - } - - if (ns->params.max_active_zones) { - if (ns->params.max_open_zones > ns->params.max_active_zones) { - error_setg(errp, "max_open_zones (%u) exceeds max_active_zones (%u)", - ns->params.max_open_zones, ns->params.max_active_zones); - return -1; - } - - if (!ns->params.max_open_zones) { - ns->params.max_open_zones = ns->params.max_active_zones; - } - } - - if (ns->params.zd_extension_size) { - if (ns->params.zd_extension_size & 0x3f) { - error_setg(errp, - "zone descriptor extension size must be a multiple of 64B"); - return -1; - } - if ((ns->params.zd_extension_size >> 6) > 0xff) { - error_setg(errp, "zone descriptor extension size is too large"); - return -1; - } - } - return 0; } @@ -303,7 +261,7 @@ static void nvme_ns_init_zoned(NvmeNamespace *ns) id_ns_z = g_malloc0(sizeof(NvmeIdNsZoned)); - /* MAR/MOR are zeroes-based, 0xffffffff means no limit */ + /* MAR/MOR are zeroes-based, FFFFFFFFFh means no limit */ id_ns_z->mar = cpu_to_le32(ns->params.max_active_zones - 1); id_ns_z->mor = cpu_to_le32(ns->params.max_open_zones - 1); id_ns_z->zoc = 0; @@ -421,6 +379,34 @@ static int nvme_ns_check_constraints(NvmeCtrl *n, NvmeNamespace *ns, } } + if (ns->params.zoned) { + if (ns->params.max_active_zones) { + if (ns->params.max_open_zones > ns->params.max_active_zones) { + error_setg(errp, "max_open_zones (%u) exceeds " + "max_active_zones (%u)", ns->params.max_open_zones, + ns->params.max_active_zones); + return -1; + } + + if (!ns->params.max_open_zones) { + ns->params.max_open_zones = ns->params.max_active_zones; + } + } + + if (ns->params.zd_extension_size) { + if (ns->params.zd_extension_size & 0x3f) { + error_setg(errp, "zone descriptor extension size must be a " + "multiple of 64B"); + return -1; + } + if ((ns->params.zd_extension_size >> 6) > 0xff) { + error_setg(errp, + "zone descriptor extension size is too large"); + return -1; + } + } + } + return 0; } diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h new file mode 100644 index 0000000000..81a35cda14 --- /dev/null +++ b/hw/nvme/nvme.h @@ -0,0 +1,547 @@ +/* + * QEMU NVM Express + * + * Copyright (c) 2012 Intel Corporation + * Copyright (c) 2021 Minwoo Im + * Copyright (c) 2021 Samsung Electronics Co., Ltd. + * + * Authors: + * Keith Busch <kbusch@kernel.org> + * Klaus Jensen <k.jensen@samsung.com> + * Gollu Appalanaidu <anaidu.gollu@samsung.com> + * Dmitry Fomichev <dmitry.fomichev@wdc.com> + * Minwoo Im <minwoo.im.dev@gmail.com> + * + * This code is licensed under the GNU GPL v2 or later. + */ + +#ifndef HW_NVME_INTERNAL_H +#define HW_NVME_INTERNAL_H + +#include "qemu/uuid.h" +#include "hw/pci/pci.h" +#include "hw/block/block.h" + +#include "block/nvme.h" + +#define NVME_MAX_CONTROLLERS 32 +#define NVME_MAX_NAMESPACES 256 + +typedef struct NvmeCtrl NvmeCtrl; +typedef struct NvmeNamespace NvmeNamespace; + +#define TYPE_NVME_SUBSYS "nvme-subsys" +#define NVME_SUBSYS(obj) \ + OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS) + +typedef struct NvmeSubsystem { + DeviceState parent_obj; + uint8_t subnqn[256]; + + NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS]; + NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1]; + + struct { + char *nqn; + } params; +} NvmeSubsystem; + +int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp); + +static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys, + uint32_t cntlid) +{ + if (!subsys || cntlid >= NVME_MAX_CONTROLLERS) { + return NULL; + } + + return subsys->ctrls[cntlid]; +} + +static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys, + uint32_t nsid) +{ + if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) { + return NULL; + } + + return subsys->namespaces[nsid]; +} + +#define TYPE_NVME_NS "nvme-ns" +#define NVME_NS(obj) \ + OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS) + +typedef struct NvmeZone { + NvmeZoneDescr d; + uint64_t w_ptr; + QTAILQ_ENTRY(NvmeZone) entry; +} NvmeZone; + +typedef struct NvmeNamespaceParams { + bool detached; + bool shared; + uint32_t nsid; + QemuUUID uuid; + + uint16_t ms; + uint8_t mset; + uint8_t pi; + uint8_t pil; + + uint16_t mssrl; + uint32_t mcl; + uint8_t msrc; + + bool zoned; + bool cross_zone_read; + uint64_t zone_size_bs; + uint64_t zone_cap_bs; + uint32_t max_active_zones; + uint32_t max_open_zones; + uint32_t zd_extension_size; +} NvmeNamespaceParams; + +typedef struct NvmeNamespace { + DeviceState parent_obj; + BlockConf blkconf; + int32_t bootindex; + int64_t size; + int64_t moff; + NvmeIdNs id_ns; + NvmeLBAF lbaf; + size_t lbasz; + const uint32_t *iocs; + uint8_t csi; + uint16_t status; + int attached; + + QTAILQ_ENTRY(NvmeNamespace) entry; + + NvmeIdNsZoned *id_ns_zoned; + NvmeZone *zone_array; + QTAILQ_HEAD(, NvmeZone) exp_open_zones; + QTAILQ_HEAD(, NvmeZone) imp_open_zones; + QTAILQ_HEAD(, NvmeZone) closed_zones; + QTAILQ_HEAD(, NvmeZone) full_zones; + uint32_t num_zones; + uint64_t zone_size; + uint64_t zone_capacity; + uint32_t zone_size_log2; + uint8_t *zd_extensions; + int32_t nr_open_zones; + int32_t nr_active_zones; + + NvmeNamespaceParams params; + + struct { + uint32_t err_rec; + } features; +} NvmeNamespace; + +static inline uint32_t nvme_nsid(NvmeNamespace *ns) +{ + if (ns) { + return ns->params.nsid; + } + + return 0; +} + +static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba) +{ + return lba << ns->lbaf.ds; +} + +static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba) +{ + return ns->lbaf.ms * lba; +} + +static inline int64_t nvme_moff(NvmeNamespace *ns, uint64_t lba) +{ + return ns->moff + nvme_m2b(ns, lba); +} + +static inline bool nvme_ns_ext(NvmeNamespace *ns) +{ + return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas); +} + +static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone) +{ + return zone->d.zs >> 4; +} + +static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state) +{ + zone->d.zs = state << 4; +} + +static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone) +{ + return zone->d.zslba + ns->zone_size; +} + +static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone) +{ + return zone->d.zslba + zone->d.zcap; +} + +static inline bool nvme_wp_is_valid(NvmeZone *zone) +{ + uint8_t st = nvme_get_zone_state(zone); + + return st != NVME_ZONE_STATE_FULL && + st != NVME_ZONE_STATE_READ_ONLY && + st != NVME_ZONE_STATE_OFFLINE; +} + +static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns, + uint32_t zone_idx) +{ + return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size]; +} + +static inline void nvme_aor_inc_open(NvmeNamespace *ns) +{ + assert(ns->nr_open_zones >= 0); + if (ns->params.max_open_zones) { + ns->nr_open_zones++; + assert(ns->nr_open_zones <= ns->params.max_open_zones); + } +} + +static inline void nvme_aor_dec_open(NvmeNamespace *ns) +{ + if (ns->params.max_open_zones) { + assert(ns->nr_open_zones > 0); + ns->nr_open_zones--; + } + assert(ns->nr_open_zones >= 0); +} + +static inline void nvme_aor_inc_active(NvmeNamespace *ns) +{ + assert(ns->nr_active_zones >= 0); + if (ns->params.max_active_zones) { + ns->nr_active_zones++; + assert(ns->nr_active_zones <= ns->params.max_active_zones); + } +} + +static inline void nvme_aor_dec_active(NvmeNamespace *ns) +{ + if (ns->params.max_active_zones) { + assert(ns->nr_active_zones > 0); + ns->nr_active_zones--; + assert(ns->nr_active_zones >= ns->nr_open_zones); + } + assert(ns->nr_active_zones >= 0); +} + +void nvme_ns_init_format(NvmeNamespace *ns); +int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp); +void nvme_ns_drain(NvmeNamespace *ns); +void nvme_ns_shutdown(NvmeNamespace *ns); +void nvme_ns_cleanup(NvmeNamespace *ns); + +typedef struct NvmeAsyncEvent { + QTAILQ_ENTRY(NvmeAsyncEvent) entry; + NvmeAerResult result; +} NvmeAsyncEvent; + +enum { + NVME_SG_ALLOC = 1 << 0, + NVME_SG_DMA = 1 << 1, +}; + +typedef struct NvmeSg { + int flags; + + union { + QEMUSGList qsg; + QEMUIOVector iov; + }; +} NvmeSg; + +typedef enum NvmeTxDirection { + NVME_TX_DIRECTION_TO_DEVICE = 0, + NVME_TX_DIRECTION_FROM_DEVICE = 1, +} NvmeTxDirection; + +typedef struct NvmeRequest { + struct NvmeSQueue *sq; + struct NvmeNamespace *ns; + BlockAIOCB *aiocb; + uint16_t status; + void *opaque; + NvmeCqe cqe; + NvmeCmd cmd; + BlockAcctCookie acct; + NvmeSg sg; + QTAILQ_ENTRY(NvmeRequest)entry; +} NvmeRequest; + +typedef struct NvmeBounceContext { + NvmeRequest *req; + + struct { + QEMUIOVector iov; + uint8_t *bounce; + } data, mdata; +} NvmeBounceContext; + +static inline const char *nvme_adm_opc_str(uint8_t opc) +{ + switch (opc) { + case NVME_ADM_CMD_DELETE_SQ: return "NVME_ADM_CMD_DELETE_SQ"; + case NVME_ADM_CMD_CREATE_SQ: return "NVME_ADM_CMD_CREATE_SQ"; + case NVME_ADM_CMD_GET_LOG_PAGE: return "NVME_ADM_CMD_GET_LOG_PAGE"; + case NVME_ADM_CMD_DELETE_CQ: return "NVME_ADM_CMD_DELETE_CQ"; + case NVME_ADM_CMD_CREATE_CQ: return "NVME_ADM_CMD_CREATE_CQ"; + case NVME_ADM_CMD_IDENTIFY: return "NVME_ADM_CMD_IDENTIFY"; + case NVME_ADM_CMD_ABORT: return "NVME_ADM_CMD_ABORT"; + case NVME_ADM_CMD_SET_FEATURES: return "NVME_ADM_CMD_SET_FEATURES"; + case NVME_ADM_CMD_GET_FEATURES: return "NVME_ADM_CMD_GET_FEATURES"; + case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ"; + case NVME_ADM_CMD_NS_ATTACHMENT: return "NVME_ADM_CMD_NS_ATTACHMENT"; + case NVME_ADM_CMD_FORMAT_NVM: return "NVME_ADM_CMD_FORMAT_NVM"; + default: return "NVME_ADM_CMD_UNKNOWN"; + } +} + +static inline const char *nvme_io_opc_str(uint8_t opc) +{ + switch (opc) { + case NVME_CMD_FLUSH: return "NVME_NVM_CMD_FLUSH"; + case NVME_CMD_WRITE: return "NVME_NVM_CMD_WRITE"; + case NVME_CMD_READ: return "NVME_NVM_CMD_READ"; + case NVME_CMD_COMPARE: return "NVME_NVM_CMD_COMPARE"; + case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES"; + case NVME_CMD_DSM: return "NVME_NVM_CMD_DSM"; + case NVME_CMD_VERIFY: return "NVME_NVM_CMD_VERIFY"; + case NVME_CMD_COPY: return "NVME_NVM_CMD_COPY"; + case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_MGMT_SEND"; + case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_MGMT_RECV"; + case NVME_CMD_ZONE_APPEND: return "NVME_ZONED_CMD_ZONE_APPEND"; + default: return "NVME_NVM_CMD_UNKNOWN"; + } +} + +typedef struct NvmeSQueue { + struct NvmeCtrl *ctrl; + uint16_t sqid; + uint16_t cqid; + uint32_t head; + uint32_t tail; + uint32_t size; + uint64_t dma_addr; + QEMUTimer *timer; + NvmeRequest *io_req; + QTAILQ_HEAD(, NvmeRequest) req_list; + QTAILQ_HEAD(, NvmeRequest) out_req_list; + QTAILQ_ENTRY(NvmeSQueue) entry; +} NvmeSQueue; + +typedef struct NvmeCQueue { + struct NvmeCtrl *ctrl; + uint8_t phase; + uint16_t cqid; + uint16_t irq_enabled; + uint32_t head; + uint32_t tail; + uint32_t vector; + uint32_t size; + uint64_t dma_addr; + QEMUTimer *timer; + QTAILQ_HEAD(, NvmeSQueue) sq_list; + QTAILQ_HEAD(, NvmeRequest) req_list; +} NvmeCQueue; + +#define TYPE_NVME_BUS "nvme-bus" +#define NVME_BUS(obj) OBJECT_CHECK(NvmeBus, (obj), TYPE_NVME_BUS) + +typedef struct NvmeBus { + BusState parent_bus; +} NvmeBus; + +#define TYPE_NVME "nvme" +#define NVME(obj) \ + OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME) + +typedef struct NvmeParams { + char *serial; + uint32_t num_queues; /* deprecated since 5.1 */ + uint32_t max_ioqpairs; + uint16_t msix_qsize; + uint32_t cmb_size_mb; + uint8_t aerl; + uint32_t aer_max_queued; + uint8_t mdts; + uint8_t vsl; + bool use_intel_id; + uint8_t zasl; + bool legacy_cmb; +} NvmeParams; + +typedef struct NvmeCtrl { + PCIDevice parent_obj; + MemoryRegion bar0; + MemoryRegion iomem; + NvmeBar bar; + NvmeParams params; + NvmeBus bus; + + uint16_t cntlid; + bool qs_created; + uint32_t page_size; + uint16_t page_bits; + uint16_t max_prp_ents; + uint16_t cqe_size; + uint16_t sqe_size; + uint32_t reg_size; + uint32_t max_q_ents; + uint8_t outstanding_aers; + uint32_t irq_status; + uint64_t host_timestamp; /* Timestamp sent by the host */ + uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ + uint64_t starttime_ms; + uint16_t temperature; + uint8_t smart_critical_warning; + + struct { + MemoryRegion mem; + uint8_t *buf; + bool cmse; + hwaddr cba; + } cmb; + + struct { + HostMemoryBackend *dev; + bool cmse; + hwaddr cba; + } pmr; + + uint8_t aer_mask; + NvmeRequest **aer_reqs; + QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue; + int aer_queued; + + uint32_t dmrsl; + + /* Namespace ID is started with 1 so bitmap should be 1-based */ +#define NVME_CHANGED_NSID_SIZE (NVME_MAX_NAMESPACES + 1) + DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE); + + NvmeSubsystem *subsys; + + NvmeNamespace namespace; + NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1]; + NvmeSQueue **sq; + NvmeCQueue **cq; + NvmeSQueue admin_sq; + NvmeCQueue admin_cq; + NvmeIdCtrl id_ctrl; + + struct { + struct { + uint16_t temp_thresh_hi; + uint16_t temp_thresh_low; + }; + uint32_t async_config; + } features; +} NvmeCtrl; + +static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid) +{ + if (!nsid || nsid > NVME_MAX_NAMESPACES) { + return NULL; + } + + return n->namespaces[nsid]; +} + +static inline NvmeCQueue *nvme_cq(NvmeRequest *req) +{ + NvmeSQueue *sq = req->sq; + NvmeCtrl *n = sq->ctrl; + + return n->cq[sq->cqid]; +} + +static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req) +{ + NvmeSQueue *sq = req->sq; + return sq->ctrl; +} + +static inline uint16_t nvme_cid(NvmeRequest *req) +{ + if (!req) { + return 0xffff; + } + + return le16_to_cpu(req->cqe.cid); +} + +void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns); +uint16_t nvme_bounce_data(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeTxDirection dir, NvmeRequest *req); +uint16_t nvme_bounce_mdata(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + NvmeTxDirection dir, NvmeRequest *req); +void nvme_rw_complete_cb(void *opaque, int ret); +uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len, + NvmeCmd *cmd); + +/* from Linux kernel (crypto/crct10dif_common.c) */ +static const uint16_t t10_dif_crc_table[256] = { + 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, + 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, + 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, + 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, + 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, + 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, + 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, + 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, + 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, + 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, + 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, + 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, + 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, + 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, + 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, + 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, + 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, + 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, + 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, + 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, + 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, + 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, + 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, + 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, + 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, + 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, + 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, + 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, + 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, + 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, + 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, + 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 +}; + +uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint16_t ctrl, uint64_t slba, + uint32_t reftag); +uint16_t nvme_dif_mangle_mdata(NvmeNamespace *ns, uint8_t *mbuf, size_t mlen, + uint64_t slba); +void nvme_dif_pract_generate_dif(NvmeNamespace *ns, uint8_t *buf, size_t len, + uint8_t *mbuf, size_t mlen, uint16_t apptag, + uint32_t reftag); +uint16_t nvme_dif_check(NvmeNamespace *ns, uint8_t *buf, size_t len, + uint8_t *mbuf, size_t mlen, uint16_t ctrl, + uint64_t slba, uint16_t apptag, + uint16_t appmask, uint32_t reftag); +uint16_t nvme_dif_rw(NvmeCtrl *n, NvmeRequest *req); + + +#endif /* HW_NVME_INTERNAL_H */ diff --git a/hw/block/nvme-subsys.c b/hw/nvme/subsys.c index 9604c19117..192223d17c 100644 --- a/hw/block/nvme-subsys.c +++ b/hw/nvme/subsys.c @@ -6,20 +6,10 @@ * This code is licensed under the GNU GPL v2. Refer COPYING. */ -#include "qemu/units.h" #include "qemu/osdep.h" -#include "qemu/uuid.h" -#include "qemu/iov.h" -#include "qemu/cutils.h" #include "qapi/error.h" -#include "hw/qdev-properties.h" -#include "hw/qdev-core.h" -#include "hw/block/block.h" -#include "block/aio.h" -#include "block/accounting.h" -#include "hw/pci/pci.h" + #include "nvme.h" -#include "nvme-subsys.h" int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp) { diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events new file mode 100644 index 0000000000..ea33d0ccc3 --- /dev/null +++ b/hw/nvme/trace-events @@ -0,0 +1,204 @@ +# successful events +pci_nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u" +pci_nvme_irq_pin(void) "pulsing IRQ pin" +pci_nvme_irq_masked(void) "IRQ is masked" +pci_nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64"" +pci_nvme_map_addr(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64"" +pci_nvme_map_addr_cmb(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64"" +pci_nvme_map_prp(uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2, int num_prps) "trans_len %"PRIu64" len %"PRIu32" prp1 0x%"PRIx64" prp2 0x%"PRIx64" num_prps %d" +pci_nvme_map_sgl(uint8_t typ, uint64_t len) "type 0x%"PRIx8" len %"PRIu64"" +pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'" +pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode, const char *opname) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8" opname '%s'" +pci_nvme_flush(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32"" +pci_nvme_format(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8"" +pci_nvme_format_ns(uint16_t cid, uint32_t nsid, uint8_t lbaf, uint8_t mset, uint8_t pi, uint8_t pil) "cid %"PRIu16" nsid %"PRIu32" lbaf %"PRIu8" mset %"PRIu8" pi %"PRIu8" pil %"PRIu8"" +pci_nvme_format_cb(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32"" +pci_nvme_read(uint16_t cid, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64"" +pci_nvme_write(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" opname '%s' nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64"" +pci_nvme_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_misc_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_dif_rw(uint8_t pract, uint8_t prinfo) "pract 0x%"PRIx8" prinfo 0x%"PRIx8"" +pci_nvme_dif_rw_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_dif_rw_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_dif_rw_mdata_out_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_dif_rw_check_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32"" +pci_nvme_dif_pract_generate_dif(size_t len, size_t lba_size, size_t chksum_len, uint16_t apptag, uint32_t reftag) "len %zu lba_size %zu chksum_len %zu apptag 0x%"PRIx16" reftag 0x%"PRIx32"" +pci_nvme_dif_check(uint8_t prinfo, uint16_t chksum_len) "prinfo 0x%"PRIx8" chksum_len %"PRIu16"" +pci_nvme_dif_prchk_disabled(uint16_t apptag, uint32_t reftag) "apptag 0x%"PRIx16" reftag 0x%"PRIx32"" +pci_nvme_dif_prchk_guard(uint16_t guard, uint16_t crc) "guard 0x%"PRIx16" crc 0x%"PRIx16"" +pci_nvme_dif_prchk_apptag(uint16_t apptag, uint16_t elbat, uint16_t elbatm) "apptag 0x%"PRIx16" elbat 0x%"PRIx16" elbatm 0x%"PRIx16"" +pci_nvme_dif_prchk_reftag(uint32_t reftag, uint32_t elbrt) "reftag 0x%"PRIx32" elbrt 0x%"PRIx32"" +pci_nvme_copy(uint16_t cid, uint32_t nsid, uint16_t nr, uint8_t format) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu16" format 0x%"PRIx8"" +pci_nvme_copy_source_range(uint64_t slba, uint32_t nlb) "slba 0x%"PRIx64" nlb %"PRIu32"" +pci_nvme_copy_in_complete(uint16_t cid) "cid %"PRIu16"" +pci_nvme_copy_cb(uint16_t cid) "cid %"PRIu16"" +pci_nvme_verify(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32"" +pci_nvme_verify_mdata_in_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_verify_cb(uint16_t cid, uint8_t prinfo, uint16_t apptag, uint16_t appmask, uint32_t reftag) "cid %"PRIu16" prinfo 0x%"PRIx8" apptag 0x%"PRIx16" appmask 0x%"PRIx16" reftag 0x%"PRIx32"" +pci_nvme_rw_complete_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_block_status(int64_t offset, int64_t bytes, int64_t pnum, int ret, bool zeroed) "offset %"PRId64" bytes %"PRId64" pnum %"PRId64" ret 0x%x zeroed %d" +pci_nvme_dsm(uint16_t cid, uint32_t nsid, uint32_t nr, uint32_t attr) "cid %"PRIu16" nsid %"PRIu32" nr %"PRIu32" attr 0x%"PRIx32"" +pci_nvme_dsm_deallocate(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba %"PRIu64" nlb %"PRIu32"" +pci_nvme_dsm_single_range_limit_exceeded(uint32_t nlb, uint32_t dmrsl) "nlb %"PRIu32" dmrsl %"PRIu32"" +pci_nvme_compare(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba 0x%"PRIx64" nlb %"PRIu32"" +pci_nvme_compare_data_cb(uint16_t cid) "cid %"PRIu16"" +pci_nvme_compare_mdata_cb(uint16_t cid) "cid %"PRIu16"" +pci_nvme_aio_discard_cb(uint16_t cid) "cid %"PRIu16"" +pci_nvme_aio_copy_in_cb(uint16_t cid) "cid %"PRIu16"" +pci_nvme_aio_zone_reset_cb(uint16_t cid, uint64_t zslba) "cid %"PRIu16" zslba 0x%"PRIx64"" +pci_nvme_aio_flush_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'" +pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16"" +pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d" +pci_nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16"" +pci_nvme_del_cq(uint16_t cqid) "deleted completion queue, cqid=%"PRIu16"" +pci_nvme_identify(uint16_t cid, uint8_t cns, uint16_t ctrlid, uint8_t csi) "cid %"PRIu16" cns 0x%"PRIx8" ctrlid %"PRIu16" csi 0x%"PRIx8"" +pci_nvme_identify_ctrl(void) "identify controller" +pci_nvme_identify_ctrl_csi(uint8_t csi) "identify controller, csi=0x%"PRIx8"" +pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32"" +pci_nvme_identify_ns_attached_list(uint16_t cntid) "cntid=%"PRIu16"" +pci_nvme_identify_ns_csi(uint32_t ns, uint8_t csi) "nsid=%"PRIu32", csi=0x%"PRIx8"" +pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32"" +pci_nvme_identify_nslist_csi(uint16_t ns, uint8_t csi) "nsid=%"PRIu16", csi=0x%"PRIx8"" +pci_nvme_identify_cmd_set(void) "identify i/o command set" +pci_nvme_identify_ns_descr_list(uint32_t ns) "nsid %"PRIu32"" +pci_nvme_get_log(uint16_t cid, uint8_t lid, uint8_t lsp, uint8_t rae, uint32_t len, uint64_t off) "cid %"PRIu16" lid 0x%"PRIx8" lsp 0x%"PRIx8" rae 0x%"PRIx8" len %"PRIu32" off %"PRIu64"" +pci_nvme_getfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t sel, uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" sel 0x%"PRIx8" cdw11 0x%"PRIx32"" +pci_nvme_setfeat(uint16_t cid, uint32_t nsid, uint8_t fid, uint8_t save, uint32_t cdw11) "cid %"PRIu16" nsid 0x%"PRIx32" fid 0x%"PRIx8" save 0x%"PRIx8" cdw11 0x%"PRIx32"" +pci_nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s" +pci_nvme_getfeat_numq(int result) "get feature number of queues, result=%d" +pci_nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d" +pci_nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64"" +pci_nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64"" +pci_nvme_process_aers(int queued) "queued %d" +pci_nvme_aer(uint16_t cid) "cid %"PRIu16"" +pci_nvme_aer_aerl_exceeded(void) "aerl exceeded" +pci_nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 0x%"PRIx8"" +pci_nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8"" +pci_nvme_ns_attachment(uint16_t cid, uint8_t sel) "cid %"PRIu16", sel=0x%"PRIx8"" +pci_nvme_ns_attachment_attach(uint16_t cntlid, uint32_t nsid) "cntlid=0x%"PRIx16", nsid=0x%"PRIx32"" +pci_nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8"" +pci_nvme_enqueue_event_noqueue(int queued) "queued %d" +pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8"" +pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs" +pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" status 0x%"PRIx16"" +pci_nvme_mmio_read(uint64_t addr, unsigned size) "addr 0x%"PRIx64" size %d" +pci_nvme_mmio_write(uint64_t addr, uint64_t data, unsigned size) "addr 0x%"PRIx64" data 0x%"PRIx64" size %d" +pci_nvme_mmio_doorbell_cq(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16" new_head %"PRIu16"" +pci_nvme_mmio_doorbell_sq(uint16_t sqid, uint16_t new_tail) "sqid %"PRIu16" new_tail %"PRIu16"" +pci_nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64"" +pci_nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64"" +pci_nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64"" +pci_nvme_mmio_aqattr(uint64_t data) "wrote MMIO, admin queue attributes=0x%"PRIx64"" +pci_nvme_mmio_asqaddr(uint64_t data) "wrote MMIO, admin submission queue address=0x%"PRIx64"" +pci_nvme_mmio_acqaddr(uint64_t data) "wrote MMIO, admin completion queue address=0x%"PRIx64"" +pci_nvme_mmio_asqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin submission queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" +pci_nvme_mmio_acqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin completion queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" +pci_nvme_mmio_start_success(void) "setting controller enable bit succeeded" +pci_nvme_mmio_stopped(void) "cleared controller enable bit" +pci_nvme_mmio_shutdown_set(void) "shutdown bit set" +pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared" +pci_nvme_open_zone(uint64_t slba, uint32_t zone_idx, int all) "open zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" +pci_nvme_close_zone(uint64_t slba, uint32_t zone_idx, int all) "close zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" +pci_nvme_finish_zone(uint64_t slba, uint32_t zone_idx, int all) "finish zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" +pci_nvme_reset_zone(uint64_t slba, uint32_t zone_idx, int all) "reset zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" +pci_nvme_offline_zone(uint64_t slba, uint32_t zone_idx, int all) "offline zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32"" +pci_nvme_set_descriptor_extension(uint64_t slba, uint32_t zone_idx) "set zone descriptor extension, slba=%"PRIu64", idx=%"PRIu32"" +pci_nvme_zd_extension_set(uint32_t zone_idx) "set descriptor extension for zone_idx=%"PRIu32"" +pci_nvme_clear_ns_close(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Closed state" +pci_nvme_clear_ns_reset(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Empty state" + +# error conditions +pci_nvme_err_mdts(size_t len) "len %zu" +pci_nvme_err_zasl(size_t len) "len %zu" +pci_nvme_err_req_status(uint16_t cid, uint32_t nsid, uint16_t status, uint8_t opc) "cid %"PRIu16" nsid %"PRIu32" status 0x%"PRIx16" opc 0x%"PRIx8"" +pci_nvme_err_addr_read(uint64_t addr) "addr 0x%"PRIx64"" +pci_nvme_err_addr_write(uint64_t addr) "addr 0x%"PRIx64"" +pci_nvme_err_cfs(void) "controller fatal status" +pci_nvme_err_aio(uint16_t cid, const char *errname, uint16_t status) "cid %"PRIu16" err '%s' status 0x%"PRIx16"" +pci_nvme_err_copy_invalid_format(uint8_t format) "format 0x%"PRIx8"" +pci_nvme_err_invalid_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 0x%"PRIx8"" +pci_nvme_err_invalid_num_sgld(uint16_t cid, uint8_t typ) "cid %"PRIu16" type 0x%"PRIx8"" +pci_nvme_err_invalid_sgl_excess_length(uint32_t residual) "residual %"PRIu32"" +pci_nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size" +pci_nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is not page aligned: 0x%"PRIx64"" +pci_nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64"" +pci_nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8"" +pci_nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8"" +pci_nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64"" +pci_nvme_err_invalid_log_page_offset(uint64_t ofs, uint64_t size) "must be <= %"PRIu64", got %"PRIu64"" +pci_nvme_err_cmb_invalid_cba(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64"" +pci_nvme_err_cmb_not_enabled(uint64_t cmbmsc) "cmbmsc 0x%"PRIx64"" +pci_nvme_err_unaligned_zone_cmd(uint8_t action, uint64_t slba, uint64_t zslba) "unaligned zone op 0x%"PRIx32", got slba=%"PRIu64", zslba=%"PRIu64"" +pci_nvme_err_invalid_zone_state_transition(uint8_t action, uint64_t slba, uint8_t attrs) "action=0x%"PRIx8", slba=%"PRIu64", attrs=0x%"PRIx32"" +pci_nvme_err_write_not_at_wp(uint64_t slba, uint64_t zone, uint64_t wp) "writing at slba=%"PRIu64", zone=%"PRIu64", but wp=%"PRIu64"" +pci_nvme_err_append_not_at_start(uint64_t slba, uint64_t zone) "appending at slba=%"PRIu64", but zone=%"PRIu64"" +pci_nvme_err_zone_is_full(uint64_t zslba) "zslba 0x%"PRIx64"" +pci_nvme_err_zone_is_read_only(uint64_t zslba) "zslba 0x%"PRIx64"" +pci_nvme_err_zone_is_offline(uint64_t zslba) "zslba 0x%"PRIx64"" +pci_nvme_err_zone_boundary(uint64_t slba, uint32_t nlb, uint64_t zcap) "lba 0x%"PRIx64" nlb %"PRIu32" zcap 0x%"PRIx64"" +pci_nvme_err_zone_invalid_write(uint64_t slba, uint64_t wp) "lba 0x%"PRIx64" wp 0x%"PRIx64"" +pci_nvme_err_zone_write_not_ok(uint64_t slba, uint32_t nlb, uint16_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16"" +pci_nvme_err_zone_read_not_ok(uint64_t slba, uint32_t nlb, uint16_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16"" +pci_nvme_err_insuff_active_res(uint32_t max_active) "max_active=%"PRIu32" zone limit exceeded" +pci_nvme_err_insuff_open_res(uint32_t max_open) "max_open=%"PRIu32" zone limit exceeded" +pci_nvme_err_zd_extension_map_error(uint32_t zone_idx) "can't map descriptor extension for zone_idx=%"PRIu32"" +pci_nvme_err_invalid_iocsci(uint32_t idx) "unsupported command set combination index %"PRIu32"" +pci_nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16"" +pci_nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16"" +pci_nvme_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating submission queue, invalid sqid=%"PRIu16"" +pci_nvme_err_invalid_create_sq_size(uint16_t qsize) "failed creating submission queue, invalid qsize=%"PRIu16"" +pci_nvme_err_invalid_create_sq_addr(uint64_t addr) "failed creating submission queue, addr=0x%"PRIx64"" +pci_nvme_err_invalid_create_sq_qflags(uint16_t qflags) "failed creating submission queue, qflags=%"PRIu16"" +pci_nvme_err_invalid_del_cq_cqid(uint16_t cqid) "failed deleting completion queue, cqid=%"PRIu16"" +pci_nvme_err_invalid_del_cq_notempty(uint16_t cqid) "failed deleting completion queue, it is not empty, cqid=%"PRIu16"" +pci_nvme_err_invalid_create_cq_cqid(uint16_t cqid) "failed creating completion queue, cqid=%"PRIu16"" +pci_nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion queue, size=%"PRIu16"" +pci_nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64"" +pci_nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16"" +pci_nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16"" +pci_nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16"" +pci_nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32"" +pci_nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32"" +pci_nvme_err_invalid_log_page(uint16_t cid, uint16_t lid) "cid %"PRIu16" lid 0x%"PRIx16"" +pci_nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues" +pci_nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues" +pci_nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null" +pci_nvme_err_startfail_nbaracq(void) "nvme_start_ctrl failed because the admin completion queue address is null" +pci_nvme_err_startfail_asq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin submission queue address is misaligned: 0x%"PRIx64"" +pci_nvme_err_startfail_acq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin completion queue address is misaligned: 0x%"PRIx64"" +pci_nvme_err_startfail_page_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too small: log2size=%u, min=%u" +pci_nvme_err_startfail_page_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too large: log2size=%u, max=%u" +pci_nvme_err_startfail_cqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too small: log2size=%u, min=%u" +pci_nvme_err_startfail_cqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too large: log2size=%u, max=%u" +pci_nvme_err_startfail_sqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too small: log2size=%u, min=%u" +pci_nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too large: log2size=%u, max=%u" +pci_nvme_err_startfail_css(uint8_t css) "nvme_start_ctrl failed because invalid command set selected:%u" +pci_nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero" +pci_nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero" +pci_nvme_err_startfail_zasl_too_small(uint32_t zasl, uint32_t pagesz) "nvme_start_ctrl failed because zone append size limit %"PRIu32" is too small, needs to be >= %"PRIu32"" +pci_nvme_err_startfail(void) "setting controller enable bit failed" +pci_nvme_err_invalid_mgmt_action(uint8_t action) "action=0x%"PRIx8"" + +# undefined behavior +pci_nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64"" +pci_nvme_ub_mmiowr_toosmall(uint64_t offset, unsigned size) "MMIO write smaller than 32 bits, offset=0x%"PRIx64", size=%u" +pci_nvme_ub_mmiowr_intmask_with_msix(void) "undefined access to interrupt mask set when MSI-X is enabled" +pci_nvme_ub_mmiowr_ro_csts(void) "attempted to set a read only bit of controller status" +pci_nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CAP.NSSRS is zero (not supported)" +pci_nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)" +pci_nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored" +pci_nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored" +pci_nvme_ub_mmiowr_pmrcap_readonly(void) "invalid write to read only PMRCAP, ignored" +pci_nvme_ub_mmiowr_pmrsts_readonly(void) "invalid write to read only PMRSTS, ignored" +pci_nvme_ub_mmiowr_pmrebs_readonly(void) "invalid write to read only PMREBS, ignored" +pci_nvme_ub_mmiowr_pmrswtp_readonly(void) "invalid write to read only PMRSWTP, ignored" +pci_nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64"" +pci_nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64"" +pci_nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64"" +pci_nvme_ub_mmiord_invalid_ofs(uint64_t offset) "MMIO read beyond last register, offset=0x%"PRIx64", returning 0" +pci_nvme_ub_db_wr_misaligned(uint64_t offset) "doorbell write not 32-bit aligned, offset=0x%"PRIx64", ignoring" +pci_nvme_ub_db_wr_invalid_cq(uint32_t qid) "completion queue doorbell write for nonexistent queue, cqid=%"PRIu32", ignoring" +pci_nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion queue doorbell write value beyond queue size, cqid=%"PRIu32", new_head=%"PRIu16", ignoring" +pci_nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring" +pci_nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring" +pci_nvme_ub_unknown_css_value(void) "unknown value in cc.css field" diff --git a/hw/nvme/trace.h b/hw/nvme/trace.h new file mode 100644 index 0000000000..b398ea107f --- /dev/null +++ b/hw/nvme/trace.h @@ -0,0 +1 @@ +#include "trace/trace-hw_nvme.h" diff --git a/hw/ppc/meson.build b/hw/ppc/meson.build index 86d6f379d1..597d974dd4 100644 --- a/hw/ppc/meson.build +++ b/hw/ppc/meson.build @@ -29,6 +29,9 @@ ppc_ss.add(when: 'CONFIG_PSERIES', if_true: files( 'spapr_numa.c', 'pef.c', )) +ppc_ss.add(when: ['CONFIG_PSERIES', 'CONFIG_TCG'], if_true: files( + 'spapr_softmmu.c', +)) ppc_ss.add(when: 'CONFIG_SPAPR_RNG', if_true: files('spapr_rng.c')) ppc_ss.add(when: ['CONFIG_PSERIES', 'CONFIG_LINUX'], if_true: files( 'spapr_pci_vfio.c', diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c index ffe01977cd..d16dd2d080 100644 --- a/hw/ppc/pnv.c +++ b/hw/ppc/pnv.c @@ -196,7 +196,7 @@ static void pnv_dt_core(PnvChip *chip, PnvCore *pc, void *fdt) _FDT((fdt_setprop_string(fdt, offset, "status", "okay"))); _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0))); - if (env->spr_cb[SPR_PURR].oea_read) { + if (ppc_has_spr(cpu, SPR_PURR)) { _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0))); } diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 8f40319aee..c23bcc4490 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -703,10 +703,10 @@ static void spapr_dt_cpu(CPUState *cs, void *fdt, int offset, _FDT((fdt_setprop_string(fdt, offset, "status", "okay"))); _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0))); - if (env->spr_cb[SPR_PURR].oea_read) { + if (ppc_has_spr(cpu, SPR_PURR)) { _FDT((fdt_setprop_cell(fdt, offset, "ibm,purr", 1))); } - if (env->spr_cb[SPR_SPURR].oea_read) { + if (ppc_has_spr(cpu, SPR_PURR)) { _FDT((fdt_setprop_cell(fdt, offset, "ibm,spurr", 1))); } @@ -979,6 +979,7 @@ static void spapr_dt_ov5_platform_support(SpaprMachineState *spapr, void *fdt, */ val[1] = SPAPR_OV5_XIVE_LEGACY; /* XICS */ val[3] = 0x00; /* Hash */ + spapr_check_mmu_mode(false); } else if (kvm_enabled()) { if (kvmppc_has_cap_mmu_radix() && kvmppc_has_cap_mmu_hash_v3()) { val[3] = 0x80; /* OV5_MMU_BOTH */ @@ -1556,6 +1557,22 @@ void spapr_setup_hpt(SpaprMachineState *spapr) } } +void spapr_check_mmu_mode(bool guest_radix) +{ + if (guest_radix) { + if (kvm_enabled() && !kvmppc_has_cap_mmu_radix()) { + error_report("Guest requested unavailable MMU mode (radix)."); + exit(EXIT_FAILURE); + } + } else { + if (kvm_enabled() && kvmppc_has_cap_mmu_radix() + && !kvmppc_has_cap_mmu_hash_v3()) { + error_report("Guest requested unavailable MMU mode (hash)."); + exit(EXIT_FAILURE); + } + } +} + static void spapr_machine_reset(MachineState *machine) { SpaprMachineState *spapr = SPAPR_MACHINE(machine); diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c index 9ea7ddd1e9..d0c419b392 100644 --- a/hw/ppc/spapr_caps.c +++ b/hw/ppc/spapr_caps.c @@ -371,6 +371,65 @@ static bool spapr_pagesize_cb(void *opaque, uint32_t seg_pshift, return true; } +static void ppc_hash64_filter_pagesizes(PowerPCCPU *cpu, + bool (*cb)(void *, uint32_t, uint32_t), + void *opaque) +{ + PPCHash64Options *opts = cpu->hash64_opts; + int i; + int n = 0; + bool ci_largepage = false; + + assert(opts); + + n = 0; + for (i = 0; i < ARRAY_SIZE(opts->sps); i++) { + PPCHash64SegmentPageSizes *sps = &opts->sps[i]; + int j; + int m = 0; + + assert(n <= i); + + if (!sps->page_shift) { + break; + } + + for (j = 0; j < ARRAY_SIZE(sps->enc); j++) { + PPCHash64PageSize *ps = &sps->enc[j]; + + assert(m <= j); + if (!ps->page_shift) { + break; + } + + if (cb(opaque, sps->page_shift, ps->page_shift)) { + if (ps->page_shift >= 16) { + ci_largepage = true; + } + sps->enc[m++] = *ps; + } + } + + /* Clear rest of the row */ + for (j = m; j < ARRAY_SIZE(sps->enc); j++) { + memset(&sps->enc[j], 0, sizeof(sps->enc[j])); + } + + if (m) { + n++; + } + } + + /* Clear the rest of the table */ + for (i = n; i < ARRAY_SIZE(opts->sps); i++) { + memset(&opts->sps[i], 0, sizeof(opts->sps[i])); + } + + if (!ci_largepage) { + opts->flags &= ~PPC_HASH64_CI_LARGEPAGE; + } +} + static void cap_hpt_maxpagesize_cpu_apply(SpaprMachineState *spapr, PowerPCCPU *cpu, uint8_t val, Error **errp) diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 16c719c3de..f25014afda 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -20,24 +20,7 @@ #include "mmu-book3s-v3.h" #include "hw/mem/memory-device.h" -static bool has_spr(PowerPCCPU *cpu, int spr) -{ - /* We can test whether the SPR is defined by checking for a valid name */ - return cpu->env.spr_cb[spr].name != NULL; -} - -static inline bool valid_ptex(PowerPCCPU *cpu, target_ulong ptex) -{ - /* - * hash value/pteg group index is normalized by HPT mask - */ - if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) { - return false; - } - return true; -} - -static bool is_ram_address(SpaprMachineState *spapr, hwaddr addr) +bool is_ram_address(SpaprMachineState *spapr, hwaddr addr) { MachineState *machine = MACHINE(spapr); DeviceMemoryState *dms = machine->device_memory; @@ -53,355 +36,6 @@ static bool is_ram_address(SpaprMachineState *spapr, hwaddr addr) return false; } -static target_ulong h_enter(PowerPCCPU *cpu, SpaprMachineState *spapr, - target_ulong opcode, target_ulong *args) -{ - target_ulong flags = args[0]; - target_ulong ptex = args[1]; - target_ulong pteh = args[2]; - target_ulong ptel = args[3]; - unsigned apshift; - target_ulong raddr; - target_ulong slot; - const ppc_hash_pte64_t *hptes; - - apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel); - if (!apshift) { - /* Bad page size encoding */ - return H_PARAMETER; - } - - raddr = (ptel & HPTE64_R_RPN) & ~((1ULL << apshift) - 1); - - if (is_ram_address(spapr, raddr)) { - /* Regular RAM - should have WIMG=0010 */ - if ((ptel & HPTE64_R_WIMG) != HPTE64_R_M) { - return H_PARAMETER; - } - } else { - target_ulong wimg_flags; - /* Looks like an IO address */ - /* FIXME: What WIMG combinations could be sensible for IO? - * For now we allow WIMG=010x, but are there others? */ - /* FIXME: Should we check against registered IO addresses? */ - wimg_flags = (ptel & (HPTE64_R_W | HPTE64_R_I | HPTE64_R_M)); - - if (wimg_flags != HPTE64_R_I && - wimg_flags != (HPTE64_R_I | HPTE64_R_M)) { - return H_PARAMETER; - } - } - - pteh &= ~0x60ULL; - - if (!valid_ptex(cpu, ptex)) { - return H_PARAMETER; - } - - slot = ptex & 7ULL; - ptex = ptex & ~7ULL; - - if (likely((flags & H_EXACT) == 0)) { - hptes = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP); - for (slot = 0; slot < 8; slot++) { - if (!(ppc_hash64_hpte0(cpu, hptes, slot) & HPTE64_V_VALID)) { - break; - } - } - ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP); - if (slot == 8) { - return H_PTEG_FULL; - } - } else { - hptes = ppc_hash64_map_hptes(cpu, ptex + slot, 1); - if (ppc_hash64_hpte0(cpu, hptes, 0) & HPTE64_V_VALID) { - ppc_hash64_unmap_hptes(cpu, hptes, ptex + slot, 1); - return H_PTEG_FULL; - } - ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1); - } - - spapr_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel); - - args[0] = ptex + slot; - return H_SUCCESS; -} - -typedef enum { - REMOVE_SUCCESS = 0, - REMOVE_NOT_FOUND = 1, - REMOVE_PARM = 2, - REMOVE_HW = 3, -} RemoveResult; - -static RemoveResult remove_hpte(PowerPCCPU *cpu - , target_ulong ptex, - target_ulong avpn, - target_ulong flags, - target_ulong *vp, target_ulong *rp) -{ - const ppc_hash_pte64_t *hptes; - target_ulong v, r; - - if (!valid_ptex(cpu, ptex)) { - return REMOVE_PARM; - } - - hptes = ppc_hash64_map_hptes(cpu, ptex, 1); - v = ppc_hash64_hpte0(cpu, hptes, 0); - r = ppc_hash64_hpte1(cpu, hptes, 0); - ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1); - - if ((v & HPTE64_V_VALID) == 0 || - ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) || - ((flags & H_ANDCOND) && (v & avpn) != 0)) { - return REMOVE_NOT_FOUND; - } - *vp = v; - *rp = r; - spapr_store_hpte(cpu, ptex, HPTE64_V_HPTE_DIRTY, 0); - ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r); - return REMOVE_SUCCESS; -} - -static target_ulong h_remove(PowerPCCPU *cpu, SpaprMachineState *spapr, - target_ulong opcode, target_ulong *args) -{ - CPUPPCState *env = &cpu->env; - target_ulong flags = args[0]; - target_ulong ptex = args[1]; - target_ulong avpn = args[2]; - RemoveResult ret; - - ret = remove_hpte(cpu, ptex, avpn, flags, - &args[0], &args[1]); - - switch (ret) { - case REMOVE_SUCCESS: - check_tlb_flush(env, true); - return H_SUCCESS; - - case REMOVE_NOT_FOUND: - return H_NOT_FOUND; - - case REMOVE_PARM: - return H_PARAMETER; - - case REMOVE_HW: - return H_HARDWARE; - } - - g_assert_not_reached(); -} - -#define H_BULK_REMOVE_TYPE 0xc000000000000000ULL -#define H_BULK_REMOVE_REQUEST 0x4000000000000000ULL -#define H_BULK_REMOVE_RESPONSE 0x8000000000000000ULL -#define H_BULK_REMOVE_END 0xc000000000000000ULL -#define H_BULK_REMOVE_CODE 0x3000000000000000ULL -#define H_BULK_REMOVE_SUCCESS 0x0000000000000000ULL -#define H_BULK_REMOVE_NOT_FOUND 0x1000000000000000ULL -#define H_BULK_REMOVE_PARM 0x2000000000000000ULL -#define H_BULK_REMOVE_HW 0x3000000000000000ULL -#define H_BULK_REMOVE_RC 0x0c00000000000000ULL -#define H_BULK_REMOVE_FLAGS 0x0300000000000000ULL -#define H_BULK_REMOVE_ABSOLUTE 0x0000000000000000ULL -#define H_BULK_REMOVE_ANDCOND 0x0100000000000000ULL -#define H_BULK_REMOVE_AVPN 0x0200000000000000ULL -#define H_BULK_REMOVE_PTEX 0x00ffffffffffffffULL - -#define H_BULK_REMOVE_MAX_BATCH 4 - -static target_ulong h_bulk_remove(PowerPCCPU *cpu, SpaprMachineState *spapr, - target_ulong opcode, target_ulong *args) -{ - CPUPPCState *env = &cpu->env; - int i; - target_ulong rc = H_SUCCESS; - - for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) { - target_ulong *tsh = &args[i*2]; - target_ulong tsl = args[i*2 + 1]; - target_ulong v, r, ret; - - if ((*tsh & H_BULK_REMOVE_TYPE) == H_BULK_REMOVE_END) { - break; - } else if ((*tsh & H_BULK_REMOVE_TYPE) != H_BULK_REMOVE_REQUEST) { - return H_PARAMETER; - } - - *tsh &= H_BULK_REMOVE_PTEX | H_BULK_REMOVE_FLAGS; - *tsh |= H_BULK_REMOVE_RESPONSE; - - if ((*tsh & H_BULK_REMOVE_ANDCOND) && (*tsh & H_BULK_REMOVE_AVPN)) { - *tsh |= H_BULK_REMOVE_PARM; - return H_PARAMETER; - } - - ret = remove_hpte(cpu, *tsh & H_BULK_REMOVE_PTEX, tsl, - (*tsh & H_BULK_REMOVE_FLAGS) >> 26, - &v, &r); - - *tsh |= ret << 60; - - switch (ret) { - case REMOVE_SUCCESS: - *tsh |= (r & (HPTE64_R_C | HPTE64_R_R)) << 43; - break; - - case REMOVE_PARM: - rc = H_PARAMETER; - goto exit; - - case REMOVE_HW: - rc = H_HARDWARE; - goto exit; - } - } - exit: - check_tlb_flush(env, true); - - return rc; -} - -static target_ulong h_protect(PowerPCCPU *cpu, SpaprMachineState *spapr, - target_ulong opcode, target_ulong *args) -{ - CPUPPCState *env = &cpu->env; - target_ulong flags = args[0]; - target_ulong ptex = args[1]; - target_ulong avpn = args[2]; - const ppc_hash_pte64_t *hptes; - target_ulong v, r; - - if (!valid_ptex(cpu, ptex)) { - return H_PARAMETER; - } - - hptes = ppc_hash64_map_hptes(cpu, ptex, 1); - v = ppc_hash64_hpte0(cpu, hptes, 0); - r = ppc_hash64_hpte1(cpu, hptes, 0); - ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1); - - if ((v & HPTE64_V_VALID) == 0 || - ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) { - return H_NOT_FOUND; - } - - r &= ~(HPTE64_R_PP0 | HPTE64_R_PP | HPTE64_R_N | - HPTE64_R_KEY_HI | HPTE64_R_KEY_LO); - r |= (flags << 55) & HPTE64_R_PP0; - r |= (flags << 48) & HPTE64_R_KEY_HI; - r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO); - spapr_store_hpte(cpu, ptex, - (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0); - ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r); - /* Flush the tlb */ - check_tlb_flush(env, true); - /* Don't need a memory barrier, due to qemu's global lock */ - spapr_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r); - return H_SUCCESS; -} - -static target_ulong h_read(PowerPCCPU *cpu, SpaprMachineState *spapr, - target_ulong opcode, target_ulong *args) -{ - target_ulong flags = args[0]; - target_ulong ptex = args[1]; - int i, ridx, n_entries = 1; - const ppc_hash_pte64_t *hptes; - - if (!valid_ptex(cpu, ptex)) { - return H_PARAMETER; - } - - if (flags & H_READ_4) { - /* Clear the two low order bits */ - ptex &= ~(3ULL); - n_entries = 4; - } - - hptes = ppc_hash64_map_hptes(cpu, ptex, n_entries); - for (i = 0, ridx = 0; i < n_entries; i++) { - args[ridx++] = ppc_hash64_hpte0(cpu, hptes, i); - args[ridx++] = ppc_hash64_hpte1(cpu, hptes, i); - } - ppc_hash64_unmap_hptes(cpu, hptes, ptex, n_entries); - - return H_SUCCESS; -} - -struct SpaprPendingHpt { - /* These fields are read-only after initialization */ - int shift; - QemuThread thread; - - /* These fields are protected by the BQL */ - bool complete; - - /* These fields are private to the preparation thread if - * !complete, otherwise protected by the BQL */ - int ret; - void *hpt; -}; - -static void free_pending_hpt(SpaprPendingHpt *pending) -{ - if (pending->hpt) { - qemu_vfree(pending->hpt); - } - - g_free(pending); -} - -static void *hpt_prepare_thread(void *opaque) -{ - SpaprPendingHpt *pending = opaque; - size_t size = 1ULL << pending->shift; - - pending->hpt = qemu_try_memalign(size, size); - if (pending->hpt) { - memset(pending->hpt, 0, size); - pending->ret = H_SUCCESS; - } else { - pending->ret = H_NO_MEM; - } - - qemu_mutex_lock_iothread(); - - if (SPAPR_MACHINE(qdev_get_machine())->pending_hpt == pending) { - /* Ready to go */ - pending->complete = true; - } else { - /* We've been cancelled, clean ourselves up */ - free_pending_hpt(pending); - } - - qemu_mutex_unlock_iothread(); - return NULL; -} - -/* Must be called with BQL held */ -static void cancel_hpt_prepare(SpaprMachineState *spapr) -{ - SpaprPendingHpt *pending = spapr->pending_hpt; - - /* Let the thread know it's cancelled */ - spapr->pending_hpt = NULL; - - if (!pending) { - /* Nothing to do */ - return; - } - - if (!pending->complete) { - /* thread will clean itself up */ - return; - } - - free_pending_hpt(pending); -} - /* Convert a return code from the KVM ioctl()s implementing resize HPT * into a PAPR hypercall return code */ static target_ulong resize_hpt_convert_rc(int ret) @@ -447,7 +81,6 @@ static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu, { target_ulong flags = args[0]; int shift = args[1]; - SpaprPendingHpt *pending = spapr->pending_hpt; uint64_t current_ram_size; int rc; @@ -484,182 +117,11 @@ static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu, return resize_hpt_convert_rc(rc); } - if (pending) { - /* something already in progress */ - if (pending->shift == shift) { - /* and it's suitable */ - if (pending->complete) { - return pending->ret; - } else { - return H_LONG_BUSY_ORDER_100_MSEC; - } - } - - /* not suitable, cancel and replace */ - cancel_hpt_prepare(spapr); - } - - if (!shift) { - /* nothing to do */ - return H_SUCCESS; - } - - /* start new prepare */ - - pending = g_new0(SpaprPendingHpt, 1); - pending->shift = shift; - pending->ret = H_HARDWARE; - - qemu_thread_create(&pending->thread, "sPAPR HPT prepare", - hpt_prepare_thread, pending, QEMU_THREAD_DETACHED); - - spapr->pending_hpt = pending; - - /* In theory we could estimate the time more accurately based on - * the new size, but there's not much point */ - return H_LONG_BUSY_ORDER_100_MSEC; -} - -static uint64_t new_hpte_load0(void *htab, uint64_t pteg, int slot) -{ - uint8_t *addr = htab; - - addr += pteg * HASH_PTEG_SIZE_64; - addr += slot * HASH_PTE_SIZE_64; - return ldq_p(addr); -} - -static void new_hpte_store(void *htab, uint64_t pteg, int slot, - uint64_t pte0, uint64_t pte1) -{ - uint8_t *addr = htab; - - addr += pteg * HASH_PTEG_SIZE_64; - addr += slot * HASH_PTE_SIZE_64; - - stq_p(addr, pte0); - stq_p(addr + HASH_PTE_SIZE_64 / 2, pte1); -} - -static int rehash_hpte(PowerPCCPU *cpu, - const ppc_hash_pte64_t *hptes, - void *old_hpt, uint64_t oldsize, - void *new_hpt, uint64_t newsize, - uint64_t pteg, int slot) -{ - uint64_t old_hash_mask = (oldsize >> 7) - 1; - uint64_t new_hash_mask = (newsize >> 7) - 1; - target_ulong pte0 = ppc_hash64_hpte0(cpu, hptes, slot); - target_ulong pte1; - uint64_t avpn; - unsigned base_pg_shift; - uint64_t hash, new_pteg, replace_pte0; - - if (!(pte0 & HPTE64_V_VALID) || !(pte0 & HPTE64_V_BOLTED)) { - return H_SUCCESS; - } - - pte1 = ppc_hash64_hpte1(cpu, hptes, slot); - - base_pg_shift = ppc_hash64_hpte_page_shift_noslb(cpu, pte0, pte1); - assert(base_pg_shift); /* H_ENTER shouldn't allow a bad encoding */ - avpn = HPTE64_V_AVPN_VAL(pte0) & ~(((1ULL << base_pg_shift) - 1) >> 23); - - if (pte0 & HPTE64_V_SECONDARY) { - pteg = ~pteg; - } - - if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_256M) { - uint64_t offset, vsid; - - /* We only have 28 - 23 bits of offset in avpn */ - offset = (avpn & 0x1f) << 23; - vsid = avpn >> 5; - /* We can find more bits from the pteg value */ - if (base_pg_shift < 23) { - offset |= ((vsid ^ pteg) & old_hash_mask) << base_pg_shift; - } - - hash = vsid ^ (offset >> base_pg_shift); - } else if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_1T) { - uint64_t offset, vsid; - - /* We only have 40 - 23 bits of seg_off in avpn */ - offset = (avpn & 0x1ffff) << 23; - vsid = avpn >> 17; - if (base_pg_shift < 23) { - offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) - << base_pg_shift; - } - - hash = vsid ^ (vsid << 25) ^ (offset >> base_pg_shift); - } else { - error_report("rehash_pte: Bad segment size in HPTE"); + if (kvm_enabled()) { return H_HARDWARE; } - new_pteg = hash & new_hash_mask; - if (pte0 & HPTE64_V_SECONDARY) { - assert(~pteg == (hash & old_hash_mask)); - new_pteg = ~new_pteg; - } else { - assert(pteg == (hash & old_hash_mask)); - } - assert((oldsize != newsize) || (pteg == new_pteg)); - replace_pte0 = new_hpte_load0(new_hpt, new_pteg, slot); - /* - * Strictly speaking, we don't need all these tests, since we only - * ever rehash bolted HPTEs. We might in future handle non-bolted - * HPTEs, though so make the logic correct for those cases as - * well. - */ - if (replace_pte0 & HPTE64_V_VALID) { - assert(newsize < oldsize); - if (replace_pte0 & HPTE64_V_BOLTED) { - if (pte0 & HPTE64_V_BOLTED) { - /* Bolted collision, nothing we can do */ - return H_PTEG_FULL; - } else { - /* Discard this hpte */ - return H_SUCCESS; - } - } - } - - new_hpte_store(new_hpt, new_pteg, slot, pte0, pte1); - return H_SUCCESS; -} - -static int rehash_hpt(PowerPCCPU *cpu, - void *old_hpt, uint64_t oldsize, - void *new_hpt, uint64_t newsize) -{ - uint64_t n_ptegs = oldsize >> 7; - uint64_t pteg; - int slot; - int rc; - - for (pteg = 0; pteg < n_ptegs; pteg++) { - hwaddr ptex = pteg * HPTES_PER_GROUP; - const ppc_hash_pte64_t *hptes - = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP); - - if (!hptes) { - return H_HARDWARE; - } - - for (slot = 0; slot < HPTES_PER_GROUP; slot++) { - rc = rehash_hpte(cpu, hptes, old_hpt, oldsize, new_hpt, newsize, - pteg, slot); - if (rc != H_SUCCESS) { - ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP); - return rc; - } - } - ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP); - } - - return H_SUCCESS; + return softmmu_resize_hpt_prepare(cpu, spapr, shift); } static void do_push_sregs_to_kvm_pr(CPUState *cs, run_on_cpu_data data) @@ -675,7 +137,7 @@ static void do_push_sregs_to_kvm_pr(CPUState *cs, run_on_cpu_data data) } } -static void push_sregs_to_kvm_pr(SpaprMachineState *spapr) +void push_sregs_to_kvm_pr(SpaprMachineState *spapr) { CPUState *cs; @@ -700,9 +162,7 @@ static target_ulong h_resize_hpt_commit(PowerPCCPU *cpu, { target_ulong flags = args[0]; target_ulong shift = args[1]; - SpaprPendingHpt *pending = spapr->pending_hpt; int rc; - size_t newsize; if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) { return H_AUTHORITY; @@ -725,42 +185,14 @@ static target_ulong h_resize_hpt_commit(PowerPCCPU *cpu, return rc; } - if (flags != 0) { - return H_PARAMETER; - } - - if (!pending || (pending->shift != shift)) { - /* no matching prepare */ - return H_CLOSED; - } - - if (!pending->complete) { - /* prepare has not completed */ - return H_BUSY; + if (kvm_enabled()) { + return H_HARDWARE; } - /* Shouldn't have got past PREPARE without an HPT */ - g_assert(spapr->htab_shift); - - newsize = 1ULL << pending->shift; - rc = rehash_hpt(cpu, spapr->htab, HTAB_SIZE(spapr), - pending->hpt, newsize); - if (rc == H_SUCCESS) { - qemu_vfree(spapr->htab); - spapr->htab = pending->hpt; - spapr->htab_shift = pending->shift; - - push_sregs_to_kvm_pr(spapr); - - pending->hpt = NULL; /* so it's not free()d */ - } + return softmmu_resize_hpt_commit(cpu, spapr, flags, shift); +} - /* Clean up */ - spapr->pending_hpt = NULL; - free_pending_hpt(pending); - return rc; -} static target_ulong h_set_sprg0(PowerPCCPU *cpu, SpaprMachineState *spapr, target_ulong opcode, target_ulong *args) @@ -774,12 +206,12 @@ static target_ulong h_set_sprg0(PowerPCCPU *cpu, SpaprMachineState *spapr, static target_ulong h_set_dabr(PowerPCCPU *cpu, SpaprMachineState *spapr, target_ulong opcode, target_ulong *args) { - if (!has_spr(cpu, SPR_DABR)) { + if (!ppc_has_spr(cpu, SPR_DABR)) { return H_HARDWARE; /* DABR register not available */ } cpu_synchronize_state(CPU(cpu)); - if (has_spr(cpu, SPR_DABRX)) { + if (ppc_has_spr(cpu, SPR_DABRX)) { cpu->env.spr[SPR_DABRX] = 0x3; /* Use Problem and Privileged state */ } else if (!(args[0] & 0x4)) { /* Breakpoint Translation set? */ return H_RESERVED_DABR; @@ -794,7 +226,7 @@ static target_ulong h_set_xdabr(PowerPCCPU *cpu, SpaprMachineState *spapr, { target_ulong dabrx = args[1]; - if (!has_spr(cpu, SPR_DABR) || !has_spr(cpu, SPR_DABRX)) { + if (!ppc_has_spr(cpu, SPR_DABR) || !ppc_has_spr(cpu, SPR_DABRX)) { return H_HARDWARE; } @@ -1760,18 +1192,8 @@ target_ulong do_client_architecture_support(PowerPCCPU *cpu, spapr_ovec_intersect(spapr->ov5_cas, spapr->ov5, ov5_guest); spapr_ovec_cleanup(ov5_guest); - if (guest_radix) { - if (kvm_enabled() && !kvmppc_has_cap_mmu_radix()) { - error_report("Guest requested unavailable MMU mode (radix)."); - exit(EXIT_FAILURE); - } - } else { - if (kvm_enabled() && kvmppc_has_cap_mmu_radix() - && !kvmppc_has_cap_mmu_hash_v3()) { - error_report("Guest requested unavailable MMU mode (hash)."); - exit(EXIT_FAILURE); - } - } + spapr_check_mmu_mode(guest_radix); + spapr->cas_pre_isa3_guest = !spapr_ovec_test(ov1_guest, OV1_PPC_3_00); spapr_ovec_cleanup(ov1_guest); @@ -2023,16 +1445,34 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode, return H_FUNCTION; } -static void hypercall_register_types(void) +#ifndef CONFIG_TCG +static target_ulong h_softmmu(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + g_assert_not_reached(); +} + +static void hypercall_register_softmmu(void) { /* hcall-pft */ - spapr_register_hypercall(H_ENTER, h_enter); - spapr_register_hypercall(H_REMOVE, h_remove); - spapr_register_hypercall(H_PROTECT, h_protect); - spapr_register_hypercall(H_READ, h_read); + spapr_register_hypercall(H_ENTER, h_softmmu); + spapr_register_hypercall(H_REMOVE, h_softmmu); + spapr_register_hypercall(H_PROTECT, h_softmmu); + spapr_register_hypercall(H_READ, h_softmmu); /* hcall-bulk */ - spapr_register_hypercall(H_BULK_REMOVE, h_bulk_remove); + spapr_register_hypercall(H_BULK_REMOVE, h_softmmu); +} +#else +static void hypercall_register_softmmu(void) +{ + /* DO NOTHING */ +} +#endif + +static void hypercall_register_types(void) +{ + hypercall_register_softmmu(); /* hcall-hpt-resize */ spapr_register_hypercall(H_RESIZE_HPT_PREPARE, h_resize_hpt_prepare); diff --git a/hw/ppc/spapr_softmmu.c b/hw/ppc/spapr_softmmu.c new file mode 100644 index 0000000000..6c6b86dd3c --- /dev/null +++ b/hw/ppc/spapr_softmmu.c @@ -0,0 +1,627 @@ +#include "qemu/osdep.h" +#include "qemu/cutils.h" +#include "qapi/error.h" +#include "sysemu/hw_accel.h" +#include "sysemu/runstate.h" +#include "qemu/log.h" +#include "qemu/main-loop.h" +#include "qemu/module.h" +#include "qemu/error-report.h" +#include "cpu.h" +#include "exec/exec-all.h" +#include "helper_regs.h" +#include "hw/ppc/spapr.h" +#include "hw/ppc/spapr_cpu_core.h" +#include "mmu-hash64.h" +#include "cpu-models.h" +#include "trace.h" +#include "kvm_ppc.h" +#include "hw/ppc/fdt.h" +#include "hw/ppc/spapr_ovec.h" +#include "mmu-book3s-v3.h" +#include "hw/mem/memory-device.h" + +static inline bool valid_ptex(PowerPCCPU *cpu, target_ulong ptex) +{ + /* + * hash value/pteg group index is normalized by HPT mask + */ + if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) { + return false; + } + return true; +} + +static target_ulong h_enter(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + target_ulong flags = args[0]; + target_ulong ptex = args[1]; + target_ulong pteh = args[2]; + target_ulong ptel = args[3]; + unsigned apshift; + target_ulong raddr; + target_ulong slot; + const ppc_hash_pte64_t *hptes; + + apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel); + if (!apshift) { + /* Bad page size encoding */ + return H_PARAMETER; + } + + raddr = (ptel & HPTE64_R_RPN) & ~((1ULL << apshift) - 1); + + if (is_ram_address(spapr, raddr)) { + /* Regular RAM - should have WIMG=0010 */ + if ((ptel & HPTE64_R_WIMG) != HPTE64_R_M) { + return H_PARAMETER; + } + } else { + target_ulong wimg_flags; + /* Looks like an IO address */ + /* FIXME: What WIMG combinations could be sensible for IO? + * For now we allow WIMG=010x, but are there others? */ + /* FIXME: Should we check against registered IO addresses? */ + wimg_flags = (ptel & (HPTE64_R_W | HPTE64_R_I | HPTE64_R_M)); + + if (wimg_flags != HPTE64_R_I && + wimg_flags != (HPTE64_R_I | HPTE64_R_M)) { + return H_PARAMETER; + } + } + + pteh &= ~0x60ULL; + + if (!valid_ptex(cpu, ptex)) { + return H_PARAMETER; + } + + slot = ptex & 7ULL; + ptex = ptex & ~7ULL; + + if (likely((flags & H_EXACT) == 0)) { + hptes = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP); + for (slot = 0; slot < 8; slot++) { + if (!(ppc_hash64_hpte0(cpu, hptes, slot) & HPTE64_V_VALID)) { + break; + } + } + ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP); + if (slot == 8) { + return H_PTEG_FULL; + } + } else { + hptes = ppc_hash64_map_hptes(cpu, ptex + slot, 1); + if (ppc_hash64_hpte0(cpu, hptes, 0) & HPTE64_V_VALID) { + ppc_hash64_unmap_hptes(cpu, hptes, ptex + slot, 1); + return H_PTEG_FULL; + } + ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1); + } + + spapr_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel); + + args[0] = ptex + slot; + return H_SUCCESS; +} + +typedef enum { + REMOVE_SUCCESS = 0, + REMOVE_NOT_FOUND = 1, + REMOVE_PARM = 2, + REMOVE_HW = 3, +} RemoveResult; + +static RemoveResult remove_hpte(PowerPCCPU *cpu + , target_ulong ptex, + target_ulong avpn, + target_ulong flags, + target_ulong *vp, target_ulong *rp) +{ + const ppc_hash_pte64_t *hptes; + target_ulong v, r; + + if (!valid_ptex(cpu, ptex)) { + return REMOVE_PARM; + } + + hptes = ppc_hash64_map_hptes(cpu, ptex, 1); + v = ppc_hash64_hpte0(cpu, hptes, 0); + r = ppc_hash64_hpte1(cpu, hptes, 0); + ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1); + + if ((v & HPTE64_V_VALID) == 0 || + ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) || + ((flags & H_ANDCOND) && (v & avpn) != 0)) { + return REMOVE_NOT_FOUND; + } + *vp = v; + *rp = r; + spapr_store_hpte(cpu, ptex, HPTE64_V_HPTE_DIRTY, 0); + ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r); + return REMOVE_SUCCESS; +} + +static target_ulong h_remove(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + CPUPPCState *env = &cpu->env; + target_ulong flags = args[0]; + target_ulong ptex = args[1]; + target_ulong avpn = args[2]; + RemoveResult ret; + + ret = remove_hpte(cpu, ptex, avpn, flags, + &args[0], &args[1]); + + switch (ret) { + case REMOVE_SUCCESS: + check_tlb_flush(env, true); + return H_SUCCESS; + + case REMOVE_NOT_FOUND: + return H_NOT_FOUND; + + case REMOVE_PARM: + return H_PARAMETER; + + case REMOVE_HW: + return H_HARDWARE; + } + + g_assert_not_reached(); +} + +#define H_BULK_REMOVE_TYPE 0xc000000000000000ULL +#define H_BULK_REMOVE_REQUEST 0x4000000000000000ULL +#define H_BULK_REMOVE_RESPONSE 0x8000000000000000ULL +#define H_BULK_REMOVE_END 0xc000000000000000ULL +#define H_BULK_REMOVE_CODE 0x3000000000000000ULL +#define H_BULK_REMOVE_SUCCESS 0x0000000000000000ULL +#define H_BULK_REMOVE_NOT_FOUND 0x1000000000000000ULL +#define H_BULK_REMOVE_PARM 0x2000000000000000ULL +#define H_BULK_REMOVE_HW 0x3000000000000000ULL +#define H_BULK_REMOVE_RC 0x0c00000000000000ULL +#define H_BULK_REMOVE_FLAGS 0x0300000000000000ULL +#define H_BULK_REMOVE_ABSOLUTE 0x0000000000000000ULL +#define H_BULK_REMOVE_ANDCOND 0x0100000000000000ULL +#define H_BULK_REMOVE_AVPN 0x0200000000000000ULL +#define H_BULK_REMOVE_PTEX 0x00ffffffffffffffULL + +#define H_BULK_REMOVE_MAX_BATCH 4 + +static target_ulong h_bulk_remove(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + CPUPPCState *env = &cpu->env; + int i; + target_ulong rc = H_SUCCESS; + + for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) { + target_ulong *tsh = &args[i*2]; + target_ulong tsl = args[i*2 + 1]; + target_ulong v, r, ret; + + if ((*tsh & H_BULK_REMOVE_TYPE) == H_BULK_REMOVE_END) { + break; + } else if ((*tsh & H_BULK_REMOVE_TYPE) != H_BULK_REMOVE_REQUEST) { + return H_PARAMETER; + } + + *tsh &= H_BULK_REMOVE_PTEX | H_BULK_REMOVE_FLAGS; + *tsh |= H_BULK_REMOVE_RESPONSE; + + if ((*tsh & H_BULK_REMOVE_ANDCOND) && (*tsh & H_BULK_REMOVE_AVPN)) { + *tsh |= H_BULK_REMOVE_PARM; + return H_PARAMETER; + } + + ret = remove_hpte(cpu, *tsh & H_BULK_REMOVE_PTEX, tsl, + (*tsh & H_BULK_REMOVE_FLAGS) >> 26, + &v, &r); + + *tsh |= ret << 60; + + switch (ret) { + case REMOVE_SUCCESS: + *tsh |= (r & (HPTE64_R_C | HPTE64_R_R)) << 43; + break; + + case REMOVE_PARM: + rc = H_PARAMETER; + goto exit; + + case REMOVE_HW: + rc = H_HARDWARE; + goto exit; + } + } + exit: + check_tlb_flush(env, true); + + return rc; +} + +static target_ulong h_protect(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + CPUPPCState *env = &cpu->env; + target_ulong flags = args[0]; + target_ulong ptex = args[1]; + target_ulong avpn = args[2]; + const ppc_hash_pte64_t *hptes; + target_ulong v, r; + + if (!valid_ptex(cpu, ptex)) { + return H_PARAMETER; + } + + hptes = ppc_hash64_map_hptes(cpu, ptex, 1); + v = ppc_hash64_hpte0(cpu, hptes, 0); + r = ppc_hash64_hpte1(cpu, hptes, 0); + ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1); + + if ((v & HPTE64_V_VALID) == 0 || + ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) { + return H_NOT_FOUND; + } + + r &= ~(HPTE64_R_PP0 | HPTE64_R_PP | HPTE64_R_N | + HPTE64_R_KEY_HI | HPTE64_R_KEY_LO); + r |= (flags << 55) & HPTE64_R_PP0; + r |= (flags << 48) & HPTE64_R_KEY_HI; + r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO); + spapr_store_hpte(cpu, ptex, + (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0); + ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r); + /* Flush the tlb */ + check_tlb_flush(env, true); + /* Don't need a memory barrier, due to qemu's global lock */ + spapr_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r); + return H_SUCCESS; +} + +static target_ulong h_read(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + target_ulong flags = args[0]; + target_ulong ptex = args[1]; + int i, ridx, n_entries = 1; + const ppc_hash_pte64_t *hptes; + + if (!valid_ptex(cpu, ptex)) { + return H_PARAMETER; + } + + if (flags & H_READ_4) { + /* Clear the two low order bits */ + ptex &= ~(3ULL); + n_entries = 4; + } + + hptes = ppc_hash64_map_hptes(cpu, ptex, n_entries); + for (i = 0, ridx = 0; i < n_entries; i++) { + args[ridx++] = ppc_hash64_hpte0(cpu, hptes, i); + args[ridx++] = ppc_hash64_hpte1(cpu, hptes, i); + } + ppc_hash64_unmap_hptes(cpu, hptes, ptex, n_entries); + + return H_SUCCESS; +} + +struct SpaprPendingHpt { + /* These fields are read-only after initialization */ + int shift; + QemuThread thread; + + /* These fields are protected by the BQL */ + bool complete; + + /* These fields are private to the preparation thread if + * !complete, otherwise protected by the BQL */ + int ret; + void *hpt; +}; + +static void free_pending_hpt(SpaprPendingHpt *pending) +{ + if (pending->hpt) { + qemu_vfree(pending->hpt); + } + + g_free(pending); +} + +static void *hpt_prepare_thread(void *opaque) +{ + SpaprPendingHpt *pending = opaque; + size_t size = 1ULL << pending->shift; + + pending->hpt = qemu_try_memalign(size, size); + if (pending->hpt) { + memset(pending->hpt, 0, size); + pending->ret = H_SUCCESS; + } else { + pending->ret = H_NO_MEM; + } + + qemu_mutex_lock_iothread(); + + if (SPAPR_MACHINE(qdev_get_machine())->pending_hpt == pending) { + /* Ready to go */ + pending->complete = true; + } else { + /* We've been cancelled, clean ourselves up */ + free_pending_hpt(pending); + } + + qemu_mutex_unlock_iothread(); + return NULL; +} + +/* Must be called with BQL held */ +static void cancel_hpt_prepare(SpaprMachineState *spapr) +{ + SpaprPendingHpt *pending = spapr->pending_hpt; + + /* Let the thread know it's cancelled */ + spapr->pending_hpt = NULL; + + if (!pending) { + /* Nothing to do */ + return; + } + + if (!pending->complete) { + /* thread will clean itself up */ + return; + } + + free_pending_hpt(pending); +} + +target_ulong softmmu_resize_hpt_prepare(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong shift) +{ + SpaprPendingHpt *pending = spapr->pending_hpt; + + if (pending) { + /* something already in progress */ + if (pending->shift == shift) { + /* and it's suitable */ + if (pending->complete) { + return pending->ret; + } else { + return H_LONG_BUSY_ORDER_100_MSEC; + } + } + + /* not suitable, cancel and replace */ + cancel_hpt_prepare(spapr); + } + + if (!shift) { + /* nothing to do */ + return H_SUCCESS; + } + + /* start new prepare */ + + pending = g_new0(SpaprPendingHpt, 1); + pending->shift = shift; + pending->ret = H_HARDWARE; + + qemu_thread_create(&pending->thread, "sPAPR HPT prepare", + hpt_prepare_thread, pending, QEMU_THREAD_DETACHED); + + spapr->pending_hpt = pending; + + /* In theory we could estimate the time more accurately based on + * the new size, but there's not much point */ + return H_LONG_BUSY_ORDER_100_MSEC; +} + +static uint64_t new_hpte_load0(void *htab, uint64_t pteg, int slot) +{ + uint8_t *addr = htab; + + addr += pteg * HASH_PTEG_SIZE_64; + addr += slot * HASH_PTE_SIZE_64; + return ldq_p(addr); +} + +static void new_hpte_store(void *htab, uint64_t pteg, int slot, + uint64_t pte0, uint64_t pte1) +{ + uint8_t *addr = htab; + + addr += pteg * HASH_PTEG_SIZE_64; + addr += slot * HASH_PTE_SIZE_64; + + stq_p(addr, pte0); + stq_p(addr + HASH_PTE_SIZE_64 / 2, pte1); +} + +static int rehash_hpte(PowerPCCPU *cpu, + const ppc_hash_pte64_t *hptes, + void *old_hpt, uint64_t oldsize, + void *new_hpt, uint64_t newsize, + uint64_t pteg, int slot) +{ + uint64_t old_hash_mask = (oldsize >> 7) - 1; + uint64_t new_hash_mask = (newsize >> 7) - 1; + target_ulong pte0 = ppc_hash64_hpte0(cpu, hptes, slot); + target_ulong pte1; + uint64_t avpn; + unsigned base_pg_shift; + uint64_t hash, new_pteg, replace_pte0; + + if (!(pte0 & HPTE64_V_VALID) || !(pte0 & HPTE64_V_BOLTED)) { + return H_SUCCESS; + } + + pte1 = ppc_hash64_hpte1(cpu, hptes, slot); + + base_pg_shift = ppc_hash64_hpte_page_shift_noslb(cpu, pte0, pte1); + assert(base_pg_shift); /* H_ENTER shouldn't allow a bad encoding */ + avpn = HPTE64_V_AVPN_VAL(pte0) & ~(((1ULL << base_pg_shift) - 1) >> 23); + + if (pte0 & HPTE64_V_SECONDARY) { + pteg = ~pteg; + } + + if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_256M) { + uint64_t offset, vsid; + + /* We only have 28 - 23 bits of offset in avpn */ + offset = (avpn & 0x1f) << 23; + vsid = avpn >> 5; + /* We can find more bits from the pteg value */ + if (base_pg_shift < 23) { + offset |= ((vsid ^ pteg) & old_hash_mask) << base_pg_shift; + } + + hash = vsid ^ (offset >> base_pg_shift); + } else if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_1T) { + uint64_t offset, vsid; + + /* We only have 40 - 23 bits of seg_off in avpn */ + offset = (avpn & 0x1ffff) << 23; + vsid = avpn >> 17; + if (base_pg_shift < 23) { + offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) + << base_pg_shift; + } + + hash = vsid ^ (vsid << 25) ^ (offset >> base_pg_shift); + } else { + error_report("rehash_pte: Bad segment size in HPTE"); + return H_HARDWARE; + } + + new_pteg = hash & new_hash_mask; + if (pte0 & HPTE64_V_SECONDARY) { + assert(~pteg == (hash & old_hash_mask)); + new_pteg = ~new_pteg; + } else { + assert(pteg == (hash & old_hash_mask)); + } + assert((oldsize != newsize) || (pteg == new_pteg)); + replace_pte0 = new_hpte_load0(new_hpt, new_pteg, slot); + /* + * Strictly speaking, we don't need all these tests, since we only + * ever rehash bolted HPTEs. We might in future handle non-bolted + * HPTEs, though so make the logic correct for those cases as + * well. + */ + if (replace_pte0 & HPTE64_V_VALID) { + assert(newsize < oldsize); + if (replace_pte0 & HPTE64_V_BOLTED) { + if (pte0 & HPTE64_V_BOLTED) { + /* Bolted collision, nothing we can do */ + return H_PTEG_FULL; + } else { + /* Discard this hpte */ + return H_SUCCESS; + } + } + } + + new_hpte_store(new_hpt, new_pteg, slot, pte0, pte1); + return H_SUCCESS; +} + +static int rehash_hpt(PowerPCCPU *cpu, + void *old_hpt, uint64_t oldsize, + void *new_hpt, uint64_t newsize) +{ + uint64_t n_ptegs = oldsize >> 7; + uint64_t pteg; + int slot; + int rc; + + for (pteg = 0; pteg < n_ptegs; pteg++) { + hwaddr ptex = pteg * HPTES_PER_GROUP; + const ppc_hash_pte64_t *hptes + = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP); + + if (!hptes) { + return H_HARDWARE; + } + + for (slot = 0; slot < HPTES_PER_GROUP; slot++) { + rc = rehash_hpte(cpu, hptes, old_hpt, oldsize, new_hpt, newsize, + pteg, slot); + if (rc != H_SUCCESS) { + ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP); + return rc; + } + } + ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP); + } + + return H_SUCCESS; +} + +target_ulong softmmu_resize_hpt_commit(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong flags, + target_ulong shift) +{ + SpaprPendingHpt *pending = spapr->pending_hpt; + int rc; + size_t newsize; + + if (flags != 0) { + return H_PARAMETER; + } + + if (!pending || (pending->shift != shift)) { + /* no matching prepare */ + return H_CLOSED; + } + + if (!pending->complete) { + /* prepare has not completed */ + return H_BUSY; + } + + /* Shouldn't have got past PREPARE without an HPT */ + g_assert(spapr->htab_shift); + + newsize = 1ULL << pending->shift; + rc = rehash_hpt(cpu, spapr->htab, HTAB_SIZE(spapr), + pending->hpt, newsize); + if (rc == H_SUCCESS) { + qemu_vfree(spapr->htab); + spapr->htab = pending->hpt; + spapr->htab_shift = pending->shift; + + push_sregs_to_kvm_pr(spapr); + + pending->hpt = NULL; /* so it's not free()d */ + } + + /* Clean up */ + spapr->pending_hpt = NULL; + free_pending_hpt(pending); + + return rc; +} + +static void hypercall_register_types(void) +{ + /* hcall-pft */ + spapr_register_hypercall(H_ENTER, h_enter); + spapr_register_hypercall(H_REMOVE, h_remove); + spapr_register_hypercall(H_PROTECT, h_protect); + spapr_register_hypercall(H_READ, h_read); + + /* hcall-bulk */ + spapr_register_hypercall(H_BULK_REMOVE, h_bulk_remove); + +} + +type_init(hypercall_register_types) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index ded0c10453..ee57abe045 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -1909,6 +1909,11 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque) return err; } } + if (dev->num_queues && dev->max_queues < dev->num_queues) { + error_report("The maximum number of queues supported by the " + "backend is %" PRIu64, dev->max_queues); + return -EINVAL; + } if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && !(virtio_has_feature(dev->protocol_features, diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c index d6332d45c3..859978d248 100644 --- a/hw/virtio/virtio-bus.c +++ b/hw/virtio/virtio-bus.c @@ -69,6 +69,11 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) return; } + if (has_iommu && !virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) { + error_setg(errp, "iommu_platform=true is not supported by the device"); + return; + } + if (klass->device_plugged != NULL) { klass->device_plugged(qbus->parent, &local_err); } |