diff options
96 files changed, 6110 insertions, 888 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index bf2366815b..00562f924f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1026,6 +1026,16 @@ S: Maintained F: hw/ssi/xlnx-versal-ospi.c F: include/hw/ssi/xlnx-versal-ospi.h +Xilinx Versal CFI +M: Francisco Iglesias <francisco.iglesias@amd.com> +S: Maintained +F: hw/misc/xlnx-cfi-if.c +F: include/hw/misc/xlnx-cfi-if.h +F: hw/misc/xlnx-versal-cfu.c +F: include/hw/misc/xlnx-versal-cfu.h +F: hw/misc/xlnx-versal-cframe-reg.c +F: include/hw/misc/xlnx-versal-cframe-reg.h + STM32F100 M: Alexandre Iooss <erdnaxe@crans.org> L: qemu-arm@nongnu.org diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 2ba7521695..ff1578bb32 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -3763,6 +3763,7 @@ static void kvm_accel_instance_init(Object *obj) /* KVM dirty ring is by default off */ s->kvm_dirty_ring_size = 0; s->kvm_dirty_ring_with_bitmap = false; + s->kvm_eager_split_size = 0; s->notify_vmexit = NOTIFY_VMEXIT_OPTION_RUN; s->notify_window = 0; s->xen_version = 0; diff --git a/block.c b/block.c index cfb7e08895..8da89aaa62 100644 --- a/block.c +++ b/block.c @@ -415,7 +415,7 @@ BlockDriverState *bdrv_new(void) for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { QLIST_INIT(&bs->op_blockers[i]); } - qemu_co_mutex_init(&bs->reqs_lock); + qemu_mutex_init(&bs->reqs_lock); qemu_mutex_init(&bs->dirty_bitmap_mutex); bs->refcnt = 1; bs->aio_context = qemu_get_aio_context(); @@ -661,8 +661,10 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, blk = blk_co_new_open(filename, NULL, options, BDRV_O_RDWR | BDRV_O_RESIZE, errp); if (!blk) { - error_prepend(errp, "Protocol driver '%s' does not support image " - "creation, and opening the image failed: ", + error_prepend(errp, "Protocol driver '%s' does not support creating " + "new images, so an existing image must be selected as " + "the target; however, opening the given target as an " + "existing image failed: ", drv->format_name); return -EINVAL; } @@ -5476,6 +5478,8 @@ static void bdrv_delete(BlockDriverState *bs) bdrv_close(bs); + qemu_mutex_destroy(&bs->reqs_lock); + g_free(bs); } diff --git a/block/copy-before-write.c b/block/copy-before-write.c index b866e42271..9a0e2b69d9 100644 --- a/block/copy-before-write.c +++ b/block/copy-before-write.c @@ -503,7 +503,7 @@ static void cbw_close(BlockDriverState *bs) s->bcs = NULL; } -BlockDriver bdrv_cbw_filter = { +static BlockDriver bdrv_cbw_filter = { .format_name = "copy-before-write", .instance_size = sizeof(BDRVCopyBeforeWriteState), diff --git a/block/io.c b/block/io.c index 19edab5d5a..ba23a9bcd3 100644 --- a/block/io.c +++ b/block/io.c @@ -591,10 +591,16 @@ static void coroutine_fn tracked_request_end(BdrvTrackedRequest *req) qatomic_dec(&req->bs->serialising_in_flight); } - qemu_co_mutex_lock(&req->bs->reqs_lock); + qemu_mutex_lock(&req->bs->reqs_lock); QLIST_REMOVE(req, list); + qemu_mutex_unlock(&req->bs->reqs_lock); + + /* + * At this point qemu_co_queue_wait(&req->wait_queue, ...) won't be called + * anymore because the request has been removed from the list, so it's safe + * to restart the queue outside reqs_lock to minimize the critical section. + */ qemu_co_queue_restart_all(&req->wait_queue); - qemu_co_mutex_unlock(&req->bs->reqs_lock); } /** @@ -621,9 +627,9 @@ static void coroutine_fn tracked_request_begin(BdrvTrackedRequest *req, qemu_co_queue_init(&req->wait_queue); - qemu_co_mutex_lock(&bs->reqs_lock); + qemu_mutex_lock(&bs->reqs_lock); QLIST_INSERT_HEAD(&bs->tracked_requests, req, list); - qemu_co_mutex_unlock(&bs->reqs_lock); + qemu_mutex_unlock(&bs->reqs_lock); } static bool tracked_request_overlaps(BdrvTrackedRequest *req, @@ -787,9 +793,9 @@ bdrv_wait_serialising_requests(BdrvTrackedRequest *self) return; } - qemu_co_mutex_lock(&bs->reqs_lock); + qemu_mutex_lock(&bs->reqs_lock); bdrv_wait_serialising_requests_locked(self); - qemu_co_mutex_unlock(&bs->reqs_lock); + qemu_mutex_unlock(&bs->reqs_lock); } void coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req, @@ -797,12 +803,12 @@ void coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req, { IO_CODE(); - qemu_co_mutex_lock(&req->bs->reqs_lock); + qemu_mutex_lock(&req->bs->reqs_lock); tracked_request_set_serialising(req, align); bdrv_wait_serialising_requests_locked(req); - qemu_co_mutex_unlock(&req->bs->reqs_lock); + qemu_mutex_unlock(&req->bs->reqs_lock); } int bdrv_check_qiov_request(int64_t offset, int64_t bytes, @@ -2996,7 +3002,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs) goto early_exit; } - qemu_co_mutex_lock(&bs->reqs_lock); + qemu_mutex_lock(&bs->reqs_lock); current_gen = qatomic_read(&bs->write_gen); /* Wait until any previous flushes are completed */ @@ -3006,7 +3012,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs) /* Flushes reach this point in nondecreasing current_gen order. */ bs->active_flush_req = true; - qemu_co_mutex_unlock(&bs->reqs_lock); + qemu_mutex_unlock(&bs->reqs_lock); /* Write back all layers by calling one driver function */ if (bs->drv->bdrv_co_flush) { @@ -3094,11 +3100,11 @@ out: bs->flushed_gen = current_gen; } - qemu_co_mutex_lock(&bs->reqs_lock); + qemu_mutex_lock(&bs->reqs_lock); bs->active_flush_req = false; /* Return value is ignored - it's ok if wait queue is empty */ qemu_co_queue_next(&bs->flush_queue); - qemu_co_mutex_unlock(&bs->reqs_lock); + qemu_mutex_unlock(&bs->reqs_lock); early_exit: bdrv_dec_in_flight(bs); diff --git a/block/iscsi.c b/block/iscsi.c index 34f97ab646..5640c8b565 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -1058,6 +1058,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs, return NULL; } + /* Must use malloc(): this is freed via scsi_free_scsi_task() */ acb->task = malloc(sizeof(struct scsi_task)); if (acb->task == NULL) { error_report("iSCSI: Failed to allocate task for scsi command. %s", diff --git a/block/meson.build b/block/meson.build index 529fc172c6..f351b9d0d3 100644 --- a/block/meson.build +++ b/block/meson.build @@ -4,41 +4,41 @@ block_ss.add(files( 'aio_task.c', 'amend.c', 'backup.c', - 'copy-before-write.c', 'blkdebug.c', 'blklogwrites.c', 'blkverify.c', 'block-backend.c', 'block-copy.c', - 'graph-lock.c', 'commit.c', + 'copy-before-write.c', 'copy-on-read.c', - 'preallocate.c', - 'progress_meter.c', 'create.c', 'crypto.c', 'dirty-bitmap.c', 'filter-compress.c', + 'graph-lock.c', 'io.c', 'mirror.c', 'nbd.c', 'null.c', 'plug.c', + 'preallocate.c', + 'progress_meter.c', 'qapi.c', + 'qcow2.c', 'qcow2-bitmap.c', 'qcow2-cache.c', 'qcow2-cluster.c', 'qcow2-refcount.c', 'qcow2-snapshot.c', 'qcow2-threads.c', - 'qcow2.c', 'quorum.c', 'raw-format.c', 'reqlist.c', 'snapshot.c', 'snapshot-access.c', - 'throttle-groups.c', 'throttle.c', + 'throttle-groups.c', 'write-threshold.c', ), zstd, zlib, gnutls) diff --git a/block/preallocate.c b/block/preallocate.c index 4d82125036..3d0f621003 100644 --- a/block/preallocate.c +++ b/block/preallocate.c @@ -535,7 +535,7 @@ static void preallocate_child_perm(BlockDriverState *bs, BdrvChild *c, } } -BlockDriver bdrv_preallocate_filter = { +static BlockDriver bdrv_preallocate_filter = { .format_name = "preallocate", .instance_size = sizeof(BDRVPreallocateState), diff --git a/block/qapi.c b/block/qapi.c index f34f95e0ef..1cbb0935ff 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -48,7 +48,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, { ImageInfo **p_image_info; ImageInfo *backing_info; - BlockDriverState *bs0, *backing; + BlockDriverState *backing; BlockDeviceInfo *info; ERRP_GUARD(); @@ -145,7 +145,6 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, info->write_threshold = bdrv_write_threshold_get(bs); - bs0 = bs; p_image_info = &info->image; info->backing_file_depth = 0; @@ -153,7 +152,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, * Skip automatically inserted nodes that the user isn't aware of for * query-block (blk != NULL), but not for query-named-block-nodes */ - bdrv_query_image_info(bs0, p_image_info, flat, blk != NULL, errp); + bdrv_query_image_info(bs, p_image_info, flat, blk != NULL, errp); if (*errp) { qapi_free_BlockDeviceInfo(info); return NULL; @@ -310,33 +309,6 @@ out: } /** - * bdrv_query_block_node_info: - * @bs: block node to examine - * @p_info: location to store node information - * @errp: location to store error information - * - * Store image information about @bs in @p_info. - * - * @p_info will be set only on success. On error, store error in @errp. - */ -void bdrv_query_block_node_info(BlockDriverState *bs, - BlockNodeInfo **p_info, - Error **errp) -{ - BlockNodeInfo *info; - ERRP_GUARD(); - - info = g_new0(BlockNodeInfo, 1); - bdrv_do_query_node_info(bs, info, errp); - if (*errp) { - qapi_free_BlockNodeInfo(info); - return; - } - - *p_info = info; -} - -/** * bdrv_query_image_info: * @bs: block node to examine * @p_info: location to store image information diff --git a/block/snapshot-access.c b/block/snapshot-access.c index 67ea339da9..8d4e8932b8 100644 --- a/block/snapshot-access.c +++ b/block/snapshot-access.c @@ -108,7 +108,7 @@ static void snapshot_access_child_perm(BlockDriverState *bs, BdrvChild *c, *nshared = BLK_PERM_ALL; } -BlockDriver bdrv_snapshot_access_drv = { +static BlockDriver bdrv_snapshot_access_drv = { .format_name = "snapshot-access", .bdrv_open = snapshot_access_open, diff --git a/block/vmdk.c b/block/vmdk.c index 70066c2b01..58ce290e9c 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1207,7 +1207,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, bs, &child_of_bds, extent_role, false, &local_err); g_free(extent_path); - if (local_err) { + if (!extent_file) { error_propagate(errp, local_err); ret = -EINVAL; goto out; diff --git a/block/vpc.c b/block/vpc.c index 3810a601a3..ceb87dd3d8 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -510,7 +510,7 @@ get_image_offset(BlockDriverState *bs, uint64_t offset, bool write, int *err) miss sparse read optimization, but it's not a problem in terms of correctness. */ if (write && (s->last_bitmap_offset != bitmap_offset)) { - uint8_t bitmap[s->bitmap_size]; + g_autofree uint8_t *bitmap = g_malloc(s->bitmap_size); int r; s->last_bitmap_offset = bitmap_offset; @@ -558,7 +558,7 @@ alloc_block(BlockDriverState *bs, int64_t offset) int64_t bat_offset; uint32_t index, bat_value; int ret; - uint8_t bitmap[s->bitmap_size]; + g_autofree uint8_t *bitmap = g_malloc(s->bitmap_size); /* Check if sector_num is valid */ if ((offset < 0) || (offset > bs->total_sectors * BDRV_SECTOR_SIZE)) { diff --git a/crypto/aes.c b/crypto/aes.c index 836d7d5c0b..df4362ac60 100644 --- a/crypto/aes.c +++ b/crypto/aes.c @@ -272,7 +272,7 @@ AES_Td3[x] = Si[x].[09, 0d, 0b, 0e]; AES_Td4[x] = Si[x].[01, 01, 01, 01]; */ -static const uint32_t AES_Te0[256] = { +const uint32_t AES_Te0[256] = { 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU, 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U, 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU, @@ -607,7 +607,7 @@ static const uint32_t AES_Te4[256] = { 0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U, }; -static const uint32_t AES_Td0[256] = { +const uint32_t AES_Td0[256] = { 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U, 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U, 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U, diff --git a/crypto/sm4.c b/crypto/sm4.c index 9f0cd452c7..2987306cf7 100644 --- a/crypto/sm4.c +++ b/crypto/sm4.c @@ -47,3 +47,13 @@ uint8_t const sm4_sbox[] = { 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48, }; +uint32_t const sm4_ck[] = { + 0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269, + 0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9, + 0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249, + 0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9, + 0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229, + 0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299, + 0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209, + 0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279 +}; diff --git a/docs/system/arm/cpu-features.rst b/docs/system/arm/cpu-features.rst index 6bb88a40c7..a5fb929243 100644 --- a/docs/system/arm/cpu-features.rst +++ b/docs/system/arm/cpu-features.rst @@ -210,15 +210,20 @@ TCG VCPU Features TCG VCPU features are CPU features that are specific to TCG. Below is the list of TCG VCPU features and their descriptions. +``pauth`` + Enable or disable ``FEAT_Pauth`` entirely. + ``pauth-impdef`` - When ``FEAT_Pauth`` is enabled, either the *impdef* (Implementation - Defined) algorithm is enabled or the *architected* QARMA algorithm - is enabled. By default the impdef algorithm is disabled, and QARMA - is enabled. - - The architected QARMA algorithm has good cryptographic properties, - but can be quite slow to emulate. The impdef algorithm used by QEMU - is non-cryptographic but significantly faster. + When ``pauth`` is enabled, select the QEMU implementation defined algorithm. + +``pauth-qarma3`` + When ``pauth`` is enabled, select the architected QARMA3 algorithm. + +Without either ``pauth-impdef`` or ``pauth-qarma3`` enabled, +the architected QARMA5 algorithm is used. The architected QARMA5 +and QARMA3 algorithms have good cryptographic properties, but can +be quite slow to emulate. The impdef algorithm used by QEMU is +non-cryptographic but significantly faster. SVE CPU Properties ================== diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst index 2e6a7c8961..3df936fc35 100644 --- a/docs/system/arm/emulation.rst +++ b/docs/system/arm/emulation.rst @@ -28,12 +28,15 @@ the following architecture extensions: - FEAT_DotProd (Advanced SIMD dot product instructions) - FEAT_DoubleFault (Double Fault Extension) - FEAT_E0PD (Preventing EL0 access to halves of address maps) +- FEAT_EPAC (Enhanced pointer authentication) - FEAT_ETS (Enhanced Translation Synchronization) - FEAT_EVT (Enhanced Virtualization Traps) - FEAT_FCMA (Floating-point complex number instructions) - FEAT_FGT (Fine-Grained Traps) - FEAT_FHM (Floating-point half-precision multiplication instructions) - FEAT_FP16 (Half-precision floating-point data processing) +- FEAT_FPAC (Faulting on AUT* instructions) +- FEAT_FPACCOMBINE (Faulting on combined pointer authentication instructions) - FEAT_FRINTTS (Floating-point to integer instructions) - FEAT_FlagM (Flag manipulation instructions v2) - FEAT_FlagM2 (Enhancements to flag manipulation instructions) @@ -57,10 +60,14 @@ the following architecture extensions: - FEAT_MTE (Memory Tagging Extension) - FEAT_MTE2 (Memory Tagging Extension) - FEAT_MTE3 (MTE Asymmetric Fault Handling) +- FEAT_PACIMP (Pointer authentication - IMPLEMENTATION DEFINED algorithm) +- FEAT_PACQARMA3 (Pointer authentication - QARMA3 algorithm) +- FEAT_PACQARMA5 (Pointer authentication - QARMA5 algorithm) - FEAT_PAN (Privileged access never) - FEAT_PAN2 (AT S1E1R and AT S1E1W instruction variants affected by PSTATE.PAN) - FEAT_PAN3 (Support for SCTLR_ELx.EPAN) - FEAT_PAuth (Pointer authentication) +- FEAT_PAuth2 (Enhacements to pointer authentication) - FEAT_PMULL (PMULL, PMULL2 instructions) - FEAT_PMUv3p1 (PMU Extensions v3.1) - FEAT_PMUv3p4 (PMU Extensions v3.4) @@ -85,6 +92,7 @@ the following architecture extensions: - FEAT_SME_I16I64 (16-bit to 64-bit integer widening outer product instructions) - FEAT_SPECRES (Speculation restriction instructions) - FEAT_SSBS (Speculative Store Bypass Safe) +- FEAT_TIDCP1 (EL0 use of IMPLEMENTATION DEFINED functionality) - FEAT_TLBIOS (TLB invalidate instructions in Outer Shareable domain) - FEAT_TLBIRANGE (TLB invalidate range instructions) - FEAT_TTCNP (Translation table Common not private translations) diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst index 51cdac6841..e1697ac8f4 100644 --- a/docs/system/arm/virt.rst +++ b/docs/system/arm/virt.rst @@ -58,6 +58,7 @@ Supported guest CPU types: - ``cortex-a57`` (64-bit) - ``cortex-a72`` (64-bit) - ``cortex-a76`` (64-bit) +- ``cortex-a710`` (64-bit) - ``a64fx`` (64-bit) - ``host`` (with KVM only) - ``neoverse-n1`` (64-bit) diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst index 15aeddc6d8..ca5a2773cf 100644 --- a/docs/tools/qemu-img.rst +++ b/docs/tools/qemu-img.rst @@ -106,7 +106,11 @@ by the used format or see the format descriptions below for details. .. option:: -c - Indicates that target image must be compressed (qcow format only). + Indicates that target image must be compressed (qcow/qcow2 and vmdk with + streamOptimized subformat only). + + For qcow2, the compression algorithm can be specified with the ``-o + compression_type=...`` option (see below). .. option:: -h @@ -776,7 +780,7 @@ Supported image file formats: QEMU image format, the most versatile format. Use it to have smaller images (useful if your filesystem does not supports holes, for example - on Windows), optional AES encryption, zlib based compression and + on Windows), optional AES encryption, zlib or zstd based compression and support of multiple VM snapshots. Supported options: @@ -794,6 +798,17 @@ Supported image file formats: ``backing_fmt`` Image format of the base image + ``compression_type`` + This option configures which compression algorithm will be used for + compressed clusters on the image. Note that setting this option doesn't yet + cause the image to actually receive compressed writes. It is most commonly + used with the ``-c`` option of ``qemu-img convert``, but can also be used + with the ``compress`` filter driver or backup block jobs with compression + enabled. + + Valid values are ``zlib`` and ``zstd``. For images that use + ``compat=0.10``, only ``zlib`` compression is available. + ``encryption`` If this option is set to ``on``, the image is encrypted with 128-bit AES-CBC. diff --git a/hw/arm/virt.c b/hw/arm/virt.c index a13c658bbf..8ad78b23c2 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -211,6 +211,7 @@ static const char *valid_cpus[] = { ARM_CPU_TYPE_NAME("cortex-a55"), ARM_CPU_TYPE_NAME("cortex-a72"), ARM_CPU_TYPE_NAME("cortex-a76"), + ARM_CPU_TYPE_NAME("cortex-a710"), ARM_CPU_TYPE_NAME("a64fx"), ARM_CPU_TYPE_NAME("neoverse-n1"), ARM_CPU_TYPE_NAME("neoverse-v1"), diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c index 60bf5fe657..fa556d8764 100644 --- a/hw/arm/xlnx-versal.c +++ b/hw/arm/xlnx-versal.c @@ -27,7 +27,7 @@ #define XLNX_VERSAL_RCPU_TYPE ARM_CPU_TYPE_NAME("cortex-r5f") #define GEM_REVISION 0x40070106 -#define VERSAL_NUM_PMC_APB_IRQS 3 +#define VERSAL_NUM_PMC_APB_IRQS 18 #define NUM_OSPI_IRQ_LINES 3 static void versal_create_apu_cpus(Versal *s) @@ -341,6 +341,7 @@ static void versal_create_pmc_apb_irq_orgate(Versal *s, qemu_irq *pic) * - RTC * - BBRAM * - PMC SLCR + * - CFRAME regs (input 3 - 17 to the orgate) */ object_initialize_child(OBJECT(s), "pmc-apb-irq-orgate", &s->pmc.apb_irq_orgate, TYPE_OR_IRQ); @@ -570,6 +571,157 @@ static void versal_create_ospi(Versal *s, qemu_irq *pic) qdev_connect_gpio_out(orgate, 0, pic[VERSAL_OSPI_IRQ]); } +static void versal_create_cfu(Versal *s, qemu_irq *pic) +{ + SysBusDevice *sbd; + DeviceState *dev; + int i; + const struct { + uint64_t reg_base; + uint64_t fdri_base; + } cframe_addr[] = { + { MM_PMC_CFRAME0_REG, MM_PMC_CFRAME0_FDRI }, + { MM_PMC_CFRAME1_REG, MM_PMC_CFRAME1_FDRI }, + { MM_PMC_CFRAME2_REG, MM_PMC_CFRAME2_FDRI }, + { MM_PMC_CFRAME3_REG, MM_PMC_CFRAME3_FDRI }, + { MM_PMC_CFRAME4_REG, MM_PMC_CFRAME4_FDRI }, + { MM_PMC_CFRAME5_REG, MM_PMC_CFRAME5_FDRI }, + { MM_PMC_CFRAME6_REG, MM_PMC_CFRAME6_FDRI }, + { MM_PMC_CFRAME7_REG, MM_PMC_CFRAME7_FDRI }, + { MM_PMC_CFRAME8_REG, MM_PMC_CFRAME8_FDRI }, + { MM_PMC_CFRAME9_REG, MM_PMC_CFRAME9_FDRI }, + { MM_PMC_CFRAME10_REG, MM_PMC_CFRAME10_FDRI }, + { MM_PMC_CFRAME11_REG, MM_PMC_CFRAME11_FDRI }, + { MM_PMC_CFRAME12_REG, MM_PMC_CFRAME12_FDRI }, + { MM_PMC_CFRAME13_REG, MM_PMC_CFRAME13_FDRI }, + { MM_PMC_CFRAME14_REG, MM_PMC_CFRAME14_FDRI }, + }; + const struct { + uint32_t blktype0_frames; + uint32_t blktype1_frames; + uint32_t blktype2_frames; + uint32_t blktype3_frames; + uint32_t blktype4_frames; + uint32_t blktype5_frames; + uint32_t blktype6_frames; + } cframe_cfg[] = { + [0] = { 34111, 3528, 12800, 11, 5, 1, 1 }, + [1] = { 38498, 3841, 15361, 13, 7, 3, 1 }, + [2] = { 38498, 3841, 15361, 13, 7, 3, 1 }, + [3] = { 38498, 3841, 15361, 13, 7, 3, 1 }, + }; + + /* CFU FDRO */ + object_initialize_child(OBJECT(s), "cfu-fdro", &s->pmc.cfu_fdro, + TYPE_XLNX_VERSAL_CFU_FDRO); + sbd = SYS_BUS_DEVICE(&s->pmc.cfu_fdro); + + sysbus_realize(sbd, &error_fatal); + memory_region_add_subregion(&s->mr_ps, MM_PMC_CFU_FDRO, + sysbus_mmio_get_region(sbd, 0)); + + /* CFRAME REG */ + for (i = 0; i < ARRAY_SIZE(s->pmc.cframe); i++) { + g_autofree char *name = g_strdup_printf("cframe%d", i); + + object_initialize_child(OBJECT(s), name, &s->pmc.cframe[i], + TYPE_XLNX_VERSAL_CFRAME_REG); + + sbd = SYS_BUS_DEVICE(&s->pmc.cframe[i]); + dev = DEVICE(&s->pmc.cframe[i]); + + if (i < ARRAY_SIZE(cframe_cfg)) { + object_property_set_int(OBJECT(dev), "blktype0-frames", + cframe_cfg[i].blktype0_frames, + &error_abort); + object_property_set_int(OBJECT(dev), "blktype1-frames", + cframe_cfg[i].blktype1_frames, + &error_abort); + object_property_set_int(OBJECT(dev), "blktype2-frames", + cframe_cfg[i].blktype2_frames, + &error_abort); + object_property_set_int(OBJECT(dev), "blktype3-frames", + cframe_cfg[i].blktype3_frames, + &error_abort); + object_property_set_int(OBJECT(dev), "blktype4-frames", + cframe_cfg[i].blktype4_frames, + &error_abort); + object_property_set_int(OBJECT(dev), "blktype5-frames", + cframe_cfg[i].blktype5_frames, + &error_abort); + object_property_set_int(OBJECT(dev), "blktype6-frames", + cframe_cfg[i].blktype6_frames, + &error_abort); + } + object_property_set_link(OBJECT(dev), "cfu-fdro", + OBJECT(&s->pmc.cfu_fdro), &error_fatal); + + sysbus_realize(SYS_BUS_DEVICE(dev), &error_fatal); + + memory_region_add_subregion(&s->mr_ps, cframe_addr[i].reg_base, + sysbus_mmio_get_region(sbd, 0)); + memory_region_add_subregion(&s->mr_ps, cframe_addr[i].fdri_base, + sysbus_mmio_get_region(sbd, 1)); + sysbus_connect_irq(sbd, 0, + qdev_get_gpio_in(DEVICE(&s->pmc.apb_irq_orgate), + 3 + i)); + } + + /* CFRAME BCAST */ + object_initialize_child(OBJECT(s), "cframe_bcast", &s->pmc.cframe_bcast, + TYPE_XLNX_VERSAL_CFRAME_BCAST_REG); + + sbd = SYS_BUS_DEVICE(&s->pmc.cframe_bcast); + dev = DEVICE(&s->pmc.cframe_bcast); + + for (i = 0; i < ARRAY_SIZE(s->pmc.cframe); i++) { + g_autofree char *propname = g_strdup_printf("cframe%d", i); + object_property_set_link(OBJECT(dev), propname, + OBJECT(&s->pmc.cframe[i]), &error_fatal); + } + + sysbus_realize(sbd, &error_fatal); + + memory_region_add_subregion(&s->mr_ps, MM_PMC_CFRAME_BCAST_REG, + sysbus_mmio_get_region(sbd, 0)); + memory_region_add_subregion(&s->mr_ps, MM_PMC_CFRAME_BCAST_FDRI, + sysbus_mmio_get_region(sbd, 1)); + + /* CFU APB */ + object_initialize_child(OBJECT(s), "cfu-apb", &s->pmc.cfu_apb, + TYPE_XLNX_VERSAL_CFU_APB); + sbd = SYS_BUS_DEVICE(&s->pmc.cfu_apb); + dev = DEVICE(&s->pmc.cfu_apb); + + for (i = 0; i < ARRAY_SIZE(s->pmc.cframe); i++) { + g_autofree char *propname = g_strdup_printf("cframe%d", i); + object_property_set_link(OBJECT(dev), propname, + OBJECT(&s->pmc.cframe[i]), &error_fatal); + } + + sysbus_realize(sbd, &error_fatal); + memory_region_add_subregion(&s->mr_ps, MM_PMC_CFU_APB, + sysbus_mmio_get_region(sbd, 0)); + memory_region_add_subregion(&s->mr_ps, MM_PMC_CFU_STREAM, + sysbus_mmio_get_region(sbd, 1)); + memory_region_add_subregion(&s->mr_ps, MM_PMC_CFU_STREAM_2, + sysbus_mmio_get_region(sbd, 2)); + sysbus_connect_irq(sbd, 0, pic[VERSAL_CFU_IRQ_0]); + + /* CFU SFR */ + object_initialize_child(OBJECT(s), "cfu-sfr", &s->pmc.cfu_sfr, + TYPE_XLNX_VERSAL_CFU_SFR); + + sbd = SYS_BUS_DEVICE(&s->pmc.cfu_sfr); + + object_property_set_link(OBJECT(&s->pmc.cfu_sfr), + "cfu", OBJECT(&s->pmc.cfu_apb), &error_abort); + + sysbus_realize(sbd, &error_fatal); + memory_region_add_subregion(&s->mr_ps, MM_PMC_CFU_SFR, + sysbus_mmio_get_region(sbd, 0)); +} + static void versal_create_crl(Versal *s, qemu_irq *pic) { SysBusDevice *sbd; @@ -763,6 +915,7 @@ static void versal_realize(DeviceState *dev, Error **errp) versal_create_pmc_iou_slcr(s, pic); versal_create_ospi(s, pic); versal_create_crl(s, pic); + versal_create_cfu(s, pic); versal_map_ddr(s); versal_unimp(s); diff --git a/hw/char/riscv_htif.c b/hw/char/riscv_htif.c index 37d3ccc76b..40de6b8b77 100644 --- a/hw/char/riscv_htif.c +++ b/hw/char/riscv_htif.c @@ -30,6 +30,7 @@ #include "qemu/timer.h" #include "qemu/error-report.h" #include "exec/address-spaces.h" +#include "exec/tswap.h" #include "sysemu/dma.h" #define RISCV_DEBUG_HTIF 0 @@ -209,11 +210,11 @@ static void htif_handle_tohost_write(HTIFState *s, uint64_t val_written) } else { uint64_t syscall[8]; cpu_physical_memory_read(payload, syscall, sizeof(syscall)); - if (syscall[0] == PK_SYS_WRITE && - syscall[1] == HTIF_DEV_CONSOLE && - syscall[3] == HTIF_CONSOLE_CMD_PUTC) { + if (tswap64(syscall[0]) == PK_SYS_WRITE && + tswap64(syscall[1]) == HTIF_DEV_CONSOLE && + tswap64(syscall[3]) == HTIF_CONSOLE_CMD_PUTC) { uint8_t ch; - cpu_physical_memory_read(syscall[2], &ch, 1); + cpu_physical_memory_read(tswap64(syscall[2]), &ch, 1); qemu_chr_fe_write(&s->chr, &ch, 1); resp = 0x100 | (uint8_t)payload; } else { @@ -232,7 +233,8 @@ static void htif_handle_tohost_write(HTIFState *s, uint64_t val_written) s->tohost = 0; /* clear to indicate we read */ return; } else if (cmd == HTIF_CONSOLE_CMD_PUTC) { - qemu_chr_fe_write(&s->chr, (uint8_t *)&payload, 1); + uint8_t ch = (uint8_t)payload; + qemu_chr_fe_write(&s->chr, &ch, 1); resp = 0x100 | (uint8_t)payload; } else { qemu_log("HTIF device %d: unknown command\n", device); diff --git a/hw/intc/arm_gicv3_its.c b/hw/intc/arm_gicv3_its.c index 43dfd7a35c..5f552b4d37 100644 --- a/hw/intc/arm_gicv3_its.c +++ b/hw/intc/arm_gicv3_its.c @@ -330,23 +330,20 @@ static MemTxResult get_vte(GICv3ITSState *s, uint32_t vpeid, VTEntry *vte) if (entry_addr == -1) { /* No L2 table entry, i.e. no valid VTE, or a memory error */ vte->valid = false; - goto out; + trace_gicv3_its_vte_read_fault(vpeid); + return MEMTX_OK; } vteval = address_space_ldq_le(as, entry_addr, MEMTXATTRS_UNSPECIFIED, &res); if (res != MEMTX_OK) { - goto out; + trace_gicv3_its_vte_read_fault(vpeid); + return res; } vte->valid = FIELD_EX64(vteval, VTE, VALID); vte->vptsize = FIELD_EX64(vteval, VTE, VPTSIZE); vte->vptaddr = FIELD_EX64(vteval, VTE, VPTADDR); vte->rdbase = FIELD_EX64(vteval, VTE, RDBASE); -out: - if (res != MEMTX_OK) { - trace_gicv3_its_vte_read_fault(vpeid); - } else { - trace_gicv3_its_vte_read(vpeid, vte->valid, vte->vptsize, - vte->vptaddr, vte->rdbase); - } + trace_gicv3_its_vte_read(vpeid, vte->valid, vte->vptsize, + vte->vptaddr, vte->rdbase); return res; } diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c index b466a6abaf..25cf7a5d9d 100644 --- a/hw/intc/riscv_aclint.c +++ b/hw/intc/riscv_aclint.c @@ -64,13 +64,13 @@ static void riscv_aclint_mtimer_write_timecmp(RISCVAclintMTimerState *mtimer, uint64_t next; uint64_t diff; - uint64_t rtc_r = cpu_riscv_read_rtc(mtimer); + uint64_t rtc = cpu_riscv_read_rtc(mtimer); /* Compute the relative hartid w.r.t the socket */ hartid = hartid - mtimer->hartid_base; mtimer->timecmp[hartid] = value; - if (mtimer->timecmp[hartid] <= rtc_r) { + if (mtimer->timecmp[hartid] <= rtc) { /* * If we're setting an MTIMECMP value in the "past", * immediately raise the timer interrupt @@ -81,7 +81,7 @@ static void riscv_aclint_mtimer_write_timecmp(RISCVAclintMTimerState *mtimer, /* otherwise, set up the future timer interrupt */ qemu_irq_lower(mtimer->timer_irqs[hartid]); - diff = mtimer->timecmp[hartid] - rtc_r; + diff = mtimer->timecmp[hartid] - rtc; /* back to ns (note args switched in muldiv64) */ uint64_t ns_diff = muldiv64(diff, NANOSECONDS_PER_SECOND, timebase_freq); @@ -208,11 +208,12 @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr, return; } else if (addr == mtimer->time_base || addr == mtimer->time_base + 4) { uint64_t rtc_r = cpu_riscv_read_rtc_raw(mtimer->timebase_freq); + uint64_t rtc = cpu_riscv_read_rtc(mtimer); if (addr == mtimer->time_base) { if (size == 4) { /* time_lo for RV32/RV64 */ - mtimer->time_delta = ((rtc_r & ~0xFFFFFFFFULL) | value) - rtc_r; + mtimer->time_delta = ((rtc & ~0xFFFFFFFFULL) | value) - rtc_r; } else { /* time for RV64 */ mtimer->time_delta = value - rtc_r; @@ -220,7 +221,7 @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr, } else { if (size == 4) { /* time_hi for RV32/RV64 */ - mtimer->time_delta = (value << 32 | (rtc_r & 0xFFFFFFFF)) - rtc_r; + mtimer->time_delta = (value << 32 | (rtc & 0xFFFFFFFF)) - rtc_r; } else { qemu_log_mask(LOG_GUEST_ERROR, "aclint-mtimer: invalid time_hi write: %08x", diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c index 4bdc6a5d1a..99aae8ccbe 100644 --- a/hw/intc/riscv_aplic.c +++ b/hw/intc/riscv_aplic.c @@ -31,6 +31,8 @@ #include "hw/irq.h" #include "target/riscv/cpu.h" #include "sysemu/sysemu.h" +#include "sysemu/kvm.h" +#include "kvm_riscv.h" #include "migration/vmstate.h" #define APLIC_MAX_IDC (1UL << 14) @@ -148,6 +150,15 @@ #define APLIC_IDC_CLAIMI 0x1c +/* + * KVM AIA only supports APLIC MSI, fallback to QEMU emulation if we want to use + * APLIC Wired. + */ +static bool is_kvm_aia(bool msimode) +{ + return kvm_irqchip_in_kernel() && msimode; +} + static uint32_t riscv_aplic_read_input_word(RISCVAPLICState *aplic, uint32_t word) { @@ -801,29 +812,35 @@ static void riscv_aplic_realize(DeviceState *dev, Error **errp) uint32_t i; RISCVAPLICState *aplic = RISCV_APLIC(dev); - aplic->bitfield_words = (aplic->num_irqs + 31) >> 5; - aplic->sourcecfg = g_new0(uint32_t, aplic->num_irqs); - aplic->state = g_new0(uint32_t, aplic->num_irqs); - aplic->target = g_new0(uint32_t, aplic->num_irqs); - if (!aplic->msimode) { - for (i = 0; i < aplic->num_irqs; i++) { - aplic->target[i] = 1; + if (!is_kvm_aia(aplic->msimode)) { + aplic->bitfield_words = (aplic->num_irqs + 31) >> 5; + aplic->sourcecfg = g_new0(uint32_t, aplic->num_irqs); + aplic->state = g_new0(uint32_t, aplic->num_irqs); + aplic->target = g_new0(uint32_t, aplic->num_irqs); + if (!aplic->msimode) { + for (i = 0; i < aplic->num_irqs; i++) { + aplic->target[i] = 1; + } } - } - aplic->idelivery = g_new0(uint32_t, aplic->num_harts); - aplic->iforce = g_new0(uint32_t, aplic->num_harts); - aplic->ithreshold = g_new0(uint32_t, aplic->num_harts); + aplic->idelivery = g_new0(uint32_t, aplic->num_harts); + aplic->iforce = g_new0(uint32_t, aplic->num_harts); + aplic->ithreshold = g_new0(uint32_t, aplic->num_harts); - memory_region_init_io(&aplic->mmio, OBJECT(dev), &riscv_aplic_ops, aplic, - TYPE_RISCV_APLIC, aplic->aperture_size); - sysbus_init_mmio(SYS_BUS_DEVICE(dev), &aplic->mmio); + memory_region_init_io(&aplic->mmio, OBJECT(dev), &riscv_aplic_ops, + aplic, TYPE_RISCV_APLIC, aplic->aperture_size); + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &aplic->mmio); + } /* * Only root APLICs have hardware IRQ lines. All non-root APLICs * have IRQ lines delegated by their parent APLIC. */ if (!aplic->parent) { - qdev_init_gpio_in(dev, riscv_aplic_request, aplic->num_irqs); + if (kvm_enabled() && is_kvm_aia(aplic->msimode)) { + qdev_init_gpio_in(dev, riscv_kvm_aplic_request, aplic->num_irqs); + } else { + qdev_init_gpio_in(dev, riscv_aplic_request, aplic->num_irqs); + } } /* Create output IRQ lines for non-MSI mode */ @@ -958,7 +975,10 @@ DeviceState *riscv_aplic_create(hwaddr addr, hwaddr size, qdev_prop_set_bit(dev, "mmode", mmode); sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr); + + if (!is_kvm_aia(msimode)) { + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr); + } if (parent) { riscv_aplic_add_child(parent, dev); diff --git a/hw/intc/riscv_imsic.c b/hw/intc/riscv_imsic.c index fea3385b51..760dbddcf7 100644 --- a/hw/intc/riscv_imsic.c +++ b/hw/intc/riscv_imsic.c @@ -32,6 +32,7 @@ #include "target/riscv/cpu.h" #include "target/riscv/cpu_bits.h" #include "sysemu/sysemu.h" +#include "sysemu/kvm.h" #include "migration/vmstate.h" #define IMSIC_MMIO_PAGE_LE 0x00 @@ -283,6 +284,20 @@ static void riscv_imsic_write(void *opaque, hwaddr addr, uint64_t value, goto err; } +#if defined(CONFIG_KVM) + if (kvm_irqchip_in_kernel()) { + struct kvm_msi msi; + + msi.address_lo = extract64(imsic->mmio.addr + addr, 0, 32); + msi.address_hi = extract64(imsic->mmio.addr + addr, 32, 32); + msi.data = le32_to_cpu(value); + + kvm_vm_ioctl(kvm_state, KVM_SIGNAL_MSI, &msi); + + return; + } +#endif + /* Writes only supported for MSI little-endian registers */ page = addr >> IMSIC_MMIO_PAGE_SHIFT; if ((addr & (IMSIC_MMIO_PAGE_SZ - 1)) == IMSIC_MMIO_PAGE_LE) { @@ -320,10 +335,12 @@ static void riscv_imsic_realize(DeviceState *dev, Error **errp) CPUState *cpu = cpu_by_arch_id(imsic->hartid); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; - imsic->num_eistate = imsic->num_pages * imsic->num_irqs; - imsic->eidelivery = g_new0(uint32_t, imsic->num_pages); - imsic->eithreshold = g_new0(uint32_t, imsic->num_pages); - imsic->eistate = g_new0(uint32_t, imsic->num_eistate); + if (!kvm_irqchip_in_kernel()) { + imsic->num_eistate = imsic->num_pages * imsic->num_irqs; + imsic->eidelivery = g_new0(uint32_t, imsic->num_pages); + imsic->eithreshold = g_new0(uint32_t, imsic->num_pages); + imsic->eistate = g_new0(uint32_t, imsic->num_eistate); + } memory_region_init_io(&imsic->mmio, OBJECT(dev), &riscv_imsic_ops, imsic, TYPE_RISCV_IMSIC, diff --git a/hw/misc/meson.build b/hw/misc/meson.build index d9a370c1de..88ecab8392 100644 --- a/hw/misc/meson.build +++ b/hw/misc/meson.build @@ -98,6 +98,9 @@ specific_ss.add(when: 'CONFIG_XLNX_VERSAL', if_true: files('xlnx-versal-crl.c')) system_ss.add(when: 'CONFIG_XLNX_VERSAL', if_true: files( 'xlnx-versal-xramc.c', 'xlnx-versal-pmc-iou-slcr.c', + 'xlnx-versal-cfu.c', + 'xlnx-cfi-if.c', + 'xlnx-versal-cframe-reg.c', )) system_ss.add(when: 'CONFIG_STM32F2XX_SYSCFG', if_true: files('stm32f2xx_syscfg.c')) system_ss.add(when: 'CONFIG_STM32F4XX_SYSCFG', if_true: files('stm32f4xx_syscfg.c')) diff --git a/hw/misc/xlnx-cfi-if.c b/hw/misc/xlnx-cfi-if.c new file mode 100644 index 0000000000..c45f05c4aa --- /dev/null +++ b/hw/misc/xlnx-cfi-if.c @@ -0,0 +1,34 @@ +/* + * Xilinx CFI interface + * + * Copyright (C) 2023, Advanced Micro Devices, Inc. + * + * Written by Francisco Iglesias <francisco.iglesias@amd.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#include "qemu/osdep.h" +#include "hw/misc/xlnx-cfi-if.h" + +void xlnx_cfi_transfer_packet(XlnxCfiIf *cfi_if, XlnxCfiPacket *pkt) +{ + XlnxCfiIfClass *xcic = XLNX_CFI_IF_GET_CLASS(cfi_if); + + if (xcic->cfi_transfer_packet) { + xcic->cfi_transfer_packet(cfi_if, pkt); + } +} + +static const TypeInfo xlnx_cfi_if_info = { + .name = TYPE_XLNX_CFI_IF, + .parent = TYPE_INTERFACE, + .class_size = sizeof(XlnxCfiIfClass), +}; + +static void xlnx_cfi_if_register_types(void) +{ + type_register_static(&xlnx_cfi_if_info); +} + +type_init(xlnx_cfi_if_register_types) + diff --git a/hw/misc/xlnx-versal-cframe-reg.c b/hw/misc/xlnx-versal-cframe-reg.c new file mode 100644 index 0000000000..8e8ec0715a --- /dev/null +++ b/hw/misc/xlnx-versal-cframe-reg.c @@ -0,0 +1,858 @@ +/* + * QEMU model of the Configuration Frame Control module + * + * Copyright (C) 2023, Advanced Micro Devices, Inc. + * + * Written by Francisco Iglesias <francisco.iglesias@amd.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "hw/sysbus.h" +#include "hw/register.h" +#include "hw/registerfields.h" +#include "qemu/bitops.h" +#include "qemu/log.h" +#include "qemu/units.h" +#include "qapi/error.h" +#include "hw/qdev-properties.h" +#include "migration/vmstate.h" +#include "hw/irq.h" +#include "hw/misc/xlnx-versal-cframe-reg.h" + +#ifndef XLNX_VERSAL_CFRAME_REG_ERR_DEBUG +#define XLNX_VERSAL_CFRAME_REG_ERR_DEBUG 0 +#endif + +#define KEYHOLE_STREAM_4K (4 * KiB) +#define N_WORDS_128BIT 4 + +#define MAX_BLOCKTYPE 6 +#define MAX_BLOCKTYPE_FRAMES 0xFFFFF + +enum { + CFRAME_CMD_WCFG = 1, + CFRAME_CMD_ROWON = 2, + CFRAME_CMD_ROWOFF = 3, + CFRAME_CMD_RCFG = 4, + CFRAME_CMD_DLPARK = 5, +}; + +static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data) +{ + guint ua = GPOINTER_TO_UINT(a); + guint ub = GPOINTER_TO_UINT(b); + return (ua > ub) - (ua < ub); +} + +static void cfrm_imr_update_irq(XlnxVersalCFrameReg *s) +{ + bool pending = s->regs[R_CFRM_ISR0] & ~s->regs[R_CFRM_IMR0]; + qemu_set_irq(s->irq_cfrm_imr, pending); +} + +static void cfrm_isr_postw(RegisterInfo *reg, uint64_t val64) +{ + XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(reg->opaque); + cfrm_imr_update_irq(s); +} + +static uint64_t cfrm_ier_prew(RegisterInfo *reg, uint64_t val64) +{ + XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(reg->opaque); + + s->regs[R_CFRM_IMR0] &= ~s->regs[R_CFRM_IER0]; + s->regs[R_CFRM_IER0] = 0; + cfrm_imr_update_irq(s); + return 0; +} + +static uint64_t cfrm_idr_prew(RegisterInfo *reg, uint64_t val64) +{ + XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(reg->opaque); + + s->regs[R_CFRM_IMR0] |= s->regs[R_CFRM_IDR0]; + s->regs[R_CFRM_IDR0] = 0; + cfrm_imr_update_irq(s); + return 0; +} + +static uint64_t cfrm_itr_prew(RegisterInfo *reg, uint64_t val64) +{ + XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(reg->opaque); + + s->regs[R_CFRM_ISR0] |= s->regs[R_CFRM_ITR0]; + s->regs[R_CFRM_ITR0] = 0; + cfrm_imr_update_irq(s); + return 0; +} + +static void cframe_incr_far(XlnxVersalCFrameReg *s) +{ + uint32_t faddr = ARRAY_FIELD_EX32(s->regs, FAR0, FRAME_ADDR); + uint32_t blktype = ARRAY_FIELD_EX32(s->regs, FAR0, BLOCKTYPE); + + assert(blktype <= MAX_BLOCKTYPE); + + faddr++; + if (faddr > s->cfg.blktype_num_frames[blktype]) { + /* Restart from 0 and increment block type */ + faddr = 0; + blktype++; + + assert(blktype <= MAX_BLOCKTYPE); + + ARRAY_FIELD_DP32(s->regs, FAR0, BLOCKTYPE, blktype); + } + + ARRAY_FIELD_DP32(s->regs, FAR0, FRAME_ADDR, faddr); +} + +static void cfrm_fdri_post_write(RegisterInfo *reg, uint64_t val) +{ + XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(reg->opaque); + + if (s->row_configured && s->rowon && s->wcfg) { + + if (fifo32_num_free(&s->new_f_data) >= N_WORDS_128BIT) { + fifo32_push(&s->new_f_data, s->regs[R_FDRI0]); + fifo32_push(&s->new_f_data, s->regs[R_FDRI1]); + fifo32_push(&s->new_f_data, s->regs[R_FDRI2]); + fifo32_push(&s->new_f_data, s->regs[R_FDRI3]); + } + + if (fifo32_is_full(&s->new_f_data)) { + uint32_t addr = extract32(s->regs[R_FAR0], 0, 23); + XlnxCFrame *f = g_new(XlnxCFrame, 1); + + for (int i = 0; i < FRAME_NUM_WORDS; i++) { + f->data[i] = fifo32_pop(&s->new_f_data); + } + + g_tree_replace(s->cframes, GUINT_TO_POINTER(addr), f); + + cframe_incr_far(s); + + fifo32_reset(&s->new_f_data); + } + } +} + +static void cfrm_readout_frames(XlnxVersalCFrameReg *s, uint32_t start_addr, + uint32_t end_addr) +{ + /* + * NB: when our minimum glib version is at least 2.68 we can improve the + * performance of the cframe traversal by using g_tree_lookup_node and + * g_tree_node_next (instead of calling g_tree_lookup for finding each + * cframe). + */ + for (uint32_t addr = start_addr; addr < end_addr; addr++) { + XlnxCFrame *f = g_tree_lookup(s->cframes, GUINT_TO_POINTER(addr)); + + /* Transmit the data if a frame was found */ + if (f) { + for (int i = 0; i < FRAME_NUM_WORDS; i += 4) { + XlnxCfiPacket pkt = {}; + + pkt.data[0] = f->data[i]; + pkt.data[1] = f->data[i + 1]; + pkt.data[2] = f->data[i + 2]; + pkt.data[3] = f->data[i + 3]; + + if (s->cfg.cfu_fdro) { + xlnx_cfi_transfer_packet(s->cfg.cfu_fdro, &pkt); + } + } + } + } +} + +static void cfrm_frcnt_post_write(RegisterInfo *reg, uint64_t val) +{ + XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(reg->opaque); + + if (s->row_configured && s->rowon && s->rcfg) { + uint32_t start_addr = extract32(s->regs[R_FAR0], 0, 23); + uint32_t end_addr = start_addr + s->regs[R_FRCNT0] / FRAME_NUM_QWORDS; + + cfrm_readout_frames(s, start_addr, end_addr); + } +} + +static void cfrm_cmd_post_write(RegisterInfo *reg, uint64_t val) +{ + XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(reg->opaque); + + if (s->row_configured) { + uint8_t cmd = ARRAY_FIELD_EX32(s->regs, CMD0, CMD); + + switch (cmd) { + case CFRAME_CMD_WCFG: + s->wcfg = true; + break; + case CFRAME_CMD_ROWON: + s->rowon = true; + break; + case CFRAME_CMD_ROWOFF: + s->rowon = false; + break; + case CFRAME_CMD_RCFG: + s->rcfg = true; + break; + case CFRAME_CMD_DLPARK: + s->wcfg = false; + s->rcfg = false; + break; + default: + break; + }; + } +} + +static uint64_t cfrm_last_frame_bot_post_read(RegisterInfo *reg, + uint64_t val64) +{ + XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(reg->opaque); + uint64_t val = 0; + + switch (reg->access->addr) { + case A_LAST_FRAME_BOT0: + val = FIELD_DP32(val, LAST_FRAME_BOT0, BLOCKTYPE1_LAST_FRAME_LSB, + s->cfg.blktype_num_frames[1]); + val = FIELD_DP32(val, LAST_FRAME_BOT0, BLOCKTYPE0_LAST_FRAME, + s->cfg.blktype_num_frames[0]); + break; + case A_LAST_FRAME_BOT1: + val = FIELD_DP32(val, LAST_FRAME_BOT1, BLOCKTYPE3_LAST_FRAME_LSB, + s->cfg.blktype_num_frames[3]); + val = FIELD_DP32(val, LAST_FRAME_BOT1, BLOCKTYPE2_LAST_FRAME, + s->cfg.blktype_num_frames[2]); + val = FIELD_DP32(val, LAST_FRAME_BOT1, BLOCKTYPE1_LAST_FRAME_MSB, + (s->cfg.blktype_num_frames[1] >> 12)); + break; + case A_LAST_FRAME_BOT2: + val = FIELD_DP32(val, LAST_FRAME_BOT2, BLOCKTYPE3_LAST_FRAME_MSB, + (s->cfg.blktype_num_frames[3] >> 4)); + break; + case A_LAST_FRAME_BOT3: + default: + break; + } + + return val; +} + +static uint64_t cfrm_last_frame_top_post_read(RegisterInfo *reg, + uint64_t val64) +{ + XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(reg->opaque); + uint64_t val = 0; + + switch (reg->access->addr) { + case A_LAST_FRAME_TOP0: + val = FIELD_DP32(val, LAST_FRAME_TOP0, BLOCKTYPE5_LAST_FRAME_LSB, + s->cfg.blktype_num_frames[5]); + val = FIELD_DP32(val, LAST_FRAME_TOP0, BLOCKTYPE4_LAST_FRAME, + s->cfg.blktype_num_frames[4]); + break; + case A_LAST_FRAME_TOP1: + val = FIELD_DP32(val, LAST_FRAME_TOP1, BLOCKTYPE6_LAST_FRAME, + s->cfg.blktype_num_frames[6]); + val = FIELD_DP32(val, LAST_FRAME_TOP1, BLOCKTYPE5_LAST_FRAME_MSB, + (s->cfg.blktype_num_frames[5] >> 12)); + break; + case A_LAST_FRAME_TOP2: + case A_LAST_FRAME_BOT3: + default: + break; + } + + return val; +} + +static void cfrm_far_sfr_post_write(RegisterInfo *reg, uint64_t val) +{ + XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(reg->opaque); + + if (s->row_configured && s->rowon && s->rcfg) { + uint32_t start_addr = extract32(s->regs[R_FAR_SFR0], 0, 23); + + /* Readback 1 frame */ + cfrm_readout_frames(s, start_addr, start_addr + 1); + } +} + +static const RegisterAccessInfo cframe_reg_regs_info[] = { + { .name = "CRC0", .addr = A_CRC0, + .rsvd = 0x00000000, + },{ .name = "CRC1", .addr = A_CRC0, + .rsvd = 0xffffffff, + },{ .name = "CRC2", .addr = A_CRC0, + .rsvd = 0xffffffff, + },{ .name = "CRC3", .addr = A_CRC0, + .rsvd = 0xffffffff, + },{ .name = "FAR0", .addr = A_FAR0, + .rsvd = 0xfe000000, + },{ .name = "FAR1", .addr = A_FAR1, + .rsvd = 0xffffffff, + },{ .name = "FAR2", .addr = A_FAR2, + .rsvd = 0xffffffff, + },{ .name = "FAR3", .addr = A_FAR3, + .rsvd = 0xffffffff, + },{ .name = "FAR_SFR0", .addr = A_FAR_SFR0, + .rsvd = 0xff800000, + },{ .name = "FAR_SFR1", .addr = A_FAR_SFR1, + .rsvd = 0xffffffff, + },{ .name = "FAR_SFR2", .addr = A_FAR_SFR2, + .rsvd = 0xffffffff, + },{ .name = "FAR_SFR3", .addr = A_FAR_SFR3, + .rsvd = 0xffffffff, + .post_write = cfrm_far_sfr_post_write, + },{ .name = "FDRI0", .addr = A_FDRI0, + },{ .name = "FDRI1", .addr = A_FDRI1, + },{ .name = "FDRI2", .addr = A_FDRI2, + },{ .name = "FDRI3", .addr = A_FDRI3, + .post_write = cfrm_fdri_post_write, + },{ .name = "FRCNT0", .addr = A_FRCNT0, + .rsvd = 0x00000000, + },{ .name = "FRCNT1", .addr = A_FRCNT1, + .rsvd = 0xffffffff, + },{ .name = "FRCNT2", .addr = A_FRCNT2, + .rsvd = 0xffffffff, + },{ .name = "FRCNT3", .addr = A_FRCNT3, + .rsvd = 0xffffffff, + .post_write = cfrm_frcnt_post_write + },{ .name = "CMD0", .addr = A_CMD0, + .rsvd = 0xffffffe0, + },{ .name = "CMD1", .addr = A_CMD1, + .rsvd = 0xffffffff, + },{ .name = "CMD2", .addr = A_CMD2, + .rsvd = 0xffffffff, + },{ .name = "CMD3", .addr = A_CMD3, + .rsvd = 0xffffffff, + .post_write = cfrm_cmd_post_write + },{ .name = "CR_MASK0", .addr = A_CR_MASK0, + .rsvd = 0x00000000, + },{ .name = "CR_MASK1", .addr = A_CR_MASK1, + .rsvd = 0x00000000, + },{ .name = "CR_MASK2", .addr = A_CR_MASK2, + .rsvd = 0x00000000, + },{ .name = "CR_MASK3", .addr = A_CR_MASK3, + .rsvd = 0xffffffff, + },{ .name = "CTL0", .addr = A_CTL0, + .rsvd = 0xfffffff8, + },{ .name = "CTL1", .addr = A_CTL1, + .rsvd = 0xffffffff, + },{ .name = "CTL2", .addr = A_CTL2, + .rsvd = 0xffffffff, + },{ .name = "CTL3", .addr = A_CTL3, + .rsvd = 0xffffffff, + },{ .name = "CFRM_ISR0", .addr = A_CFRM_ISR0, + .rsvd = 0xffc04000, + .w1c = 0x3bfff, + },{ .name = "CFRM_ISR1", .addr = A_CFRM_ISR1, + .rsvd = 0xffffffff, + },{ .name = "CFRM_ISR2", .addr = A_CFRM_ISR2, + .rsvd = 0xffffffff, + },{ .name = "CFRM_ISR3", .addr = A_CFRM_ISR3, + .rsvd = 0xffffffff, + .post_write = cfrm_isr_postw, + },{ .name = "CFRM_IMR0", .addr = A_CFRM_IMR0, + .rsvd = 0xffc04000, + .ro = 0xfffff, + .reset = 0x3bfff, + },{ .name = "CFRM_IMR1", .addr = A_CFRM_IMR1, + .rsvd = 0xffffffff, + },{ .name = "CFRM_IMR2", .addr = A_CFRM_IMR2, + .rsvd = 0xffffffff, + },{ .name = "CFRM_IMR3", .addr = A_CFRM_IMR3, + .rsvd = 0xffffffff, + },{ .name = "CFRM_IER0", .addr = A_CFRM_IER0, + .rsvd = 0xffc04000, + },{ .name = "CFRM_IER1", .addr = A_CFRM_IER1, + .rsvd = 0xffffffff, + },{ .name = "CFRM_IER2", .addr = A_CFRM_IER2, + .rsvd = 0xffffffff, + },{ .name = "CFRM_IER3", .addr = A_CFRM_IER3, + .rsvd = 0xffffffff, + .pre_write = cfrm_ier_prew, + },{ .name = "CFRM_IDR0", .addr = A_CFRM_IDR0, + .rsvd = 0xffc04000, + },{ .name = "CFRM_IDR1", .addr = A_CFRM_IDR1, + .rsvd = 0xffffffff, + },{ .name = "CFRM_IDR2", .addr = A_CFRM_IDR2, + .rsvd = 0xffffffff, + },{ .name = "CFRM_IDR3", .addr = A_CFRM_IDR3, + .rsvd = 0xffffffff, + .pre_write = cfrm_idr_prew, + },{ .name = "CFRM_ITR0", .addr = A_CFRM_ITR0, + .rsvd = 0xffc04000, + },{ .name = "CFRM_ITR1", .addr = A_CFRM_ITR1, + .rsvd = 0xffffffff, + },{ .name = "CFRM_ITR2", .addr = A_CFRM_ITR2, + .rsvd = 0xffffffff, + },{ .name = "CFRM_ITR3", .addr = A_CFRM_ITR3, + .rsvd = 0xffffffff, + .pre_write = cfrm_itr_prew, + },{ .name = "SEU_SYNDRM00", .addr = A_SEU_SYNDRM00, + },{ .name = "SEU_SYNDRM01", .addr = A_SEU_SYNDRM01, + },{ .name = "SEU_SYNDRM02", .addr = A_SEU_SYNDRM02, + },{ .name = "SEU_SYNDRM03", .addr = A_SEU_SYNDRM03, + },{ .name = "SEU_SYNDRM10", .addr = A_SEU_SYNDRM10, + },{ .name = "SEU_SYNDRM11", .addr = A_SEU_SYNDRM11, + },{ .name = "SEU_SYNDRM12", .addr = A_SEU_SYNDRM12, + },{ .name = "SEU_SYNDRM13", .addr = A_SEU_SYNDRM13, + },{ .name = "SEU_SYNDRM20", .addr = A_SEU_SYNDRM20, + },{ .name = "SEU_SYNDRM21", .addr = A_SEU_SYNDRM21, + },{ .name = "SEU_SYNDRM22", .addr = A_SEU_SYNDRM22, + },{ .name = "SEU_SYNDRM23", .addr = A_SEU_SYNDRM23, + },{ .name = "SEU_SYNDRM30", .addr = A_SEU_SYNDRM30, + },{ .name = "SEU_SYNDRM31", .addr = A_SEU_SYNDRM31, + },{ .name = "SEU_SYNDRM32", .addr = A_SEU_SYNDRM32, + },{ .name = "SEU_SYNDRM33", .addr = A_SEU_SYNDRM33, + },{ .name = "SEU_VIRTUAL_SYNDRM0", .addr = A_SEU_VIRTUAL_SYNDRM0, + },{ .name = "SEU_VIRTUAL_SYNDRM1", .addr = A_SEU_VIRTUAL_SYNDRM1, + },{ .name = "SEU_VIRTUAL_SYNDRM2", .addr = A_SEU_VIRTUAL_SYNDRM2, + },{ .name = "SEU_VIRTUAL_SYNDRM3", .addr = A_SEU_VIRTUAL_SYNDRM3, + },{ .name = "SEU_CRC0", .addr = A_SEU_CRC0, + },{ .name = "SEU_CRC1", .addr = A_SEU_CRC1, + },{ .name = "SEU_CRC2", .addr = A_SEU_CRC2, + },{ .name = "SEU_CRC3", .addr = A_SEU_CRC3, + },{ .name = "CFRAME_FAR_BOT0", .addr = A_CFRAME_FAR_BOT0, + },{ .name = "CFRAME_FAR_BOT1", .addr = A_CFRAME_FAR_BOT1, + },{ .name = "CFRAME_FAR_BOT2", .addr = A_CFRAME_FAR_BOT2, + },{ .name = "CFRAME_FAR_BOT3", .addr = A_CFRAME_FAR_BOT3, + },{ .name = "CFRAME_FAR_TOP0", .addr = A_CFRAME_FAR_TOP0, + },{ .name = "CFRAME_FAR_TOP1", .addr = A_CFRAME_FAR_TOP1, + },{ .name = "CFRAME_FAR_TOP2", .addr = A_CFRAME_FAR_TOP2, + },{ .name = "CFRAME_FAR_TOP3", .addr = A_CFRAME_FAR_TOP3, + },{ .name = "LAST_FRAME_BOT0", .addr = A_LAST_FRAME_BOT0, + .ro = 0xffffffff, + .post_read = cfrm_last_frame_bot_post_read, + },{ .name = "LAST_FRAME_BOT1", .addr = A_LAST_FRAME_BOT1, + .ro = 0xffffffff, + .post_read = cfrm_last_frame_bot_post_read, + },{ .name = "LAST_FRAME_BOT2", .addr = A_LAST_FRAME_BOT2, + .ro = 0xffffffff, + .post_read = cfrm_last_frame_bot_post_read, + },{ .name = "LAST_FRAME_BOT3", .addr = A_LAST_FRAME_BOT3, + .ro = 0xffffffff, + .post_read = cfrm_last_frame_bot_post_read, + },{ .name = "LAST_FRAME_TOP0", .addr = A_LAST_FRAME_TOP0, + .ro = 0xffffffff, + .post_read = cfrm_last_frame_top_post_read, + },{ .name = "LAST_FRAME_TOP1", .addr = A_LAST_FRAME_TOP1, + .ro = 0xffffffff, + .post_read = cfrm_last_frame_top_post_read, + },{ .name = "LAST_FRAME_TOP2", .addr = A_LAST_FRAME_TOP2, + .ro = 0xffffffff, + .post_read = cfrm_last_frame_top_post_read, + },{ .name = "LAST_FRAME_TOP3", .addr = A_LAST_FRAME_TOP3, + .ro = 0xffffffff, + .post_read = cfrm_last_frame_top_post_read, + } +}; + +static void cframe_reg_cfi_transfer_packet(XlnxCfiIf *cfi_if, + XlnxCfiPacket *pkt) +{ + XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(cfi_if); + uint64_t we = MAKE_64BIT_MASK(0, 4 * 8); + + if (!s->row_configured) { + return; + } + + switch (pkt->reg_addr) { + case CFRAME_FAR: + s->regs[R_FAR0] = pkt->data[0]; + break; + case CFRAME_SFR: + s->regs[R_FAR_SFR0] = pkt->data[0]; + register_write(&s->regs_info[R_FAR_SFR3], 0, + we, object_get_typename(OBJECT(s)), + XLNX_VERSAL_CFRAME_REG_ERR_DEBUG); + break; + case CFRAME_FDRI: + s->regs[R_FDRI0] = pkt->data[0]; + s->regs[R_FDRI1] = pkt->data[1]; + s->regs[R_FDRI2] = pkt->data[2]; + register_write(&s->regs_info[R_FDRI3], pkt->data[3], + we, object_get_typename(OBJECT(s)), + XLNX_VERSAL_CFRAME_REG_ERR_DEBUG); + break; + case CFRAME_CMD: + ARRAY_FIELD_DP32(s->regs, CMD0, CMD, pkt->data[0]); + + register_write(&s->regs_info[R_CMD3], 0, + we, object_get_typename(OBJECT(s)), + XLNX_VERSAL_CFRAME_REG_ERR_DEBUG); + break; + default: + break; + } +} + +static uint64_t cframe_reg_fdri_read(void *opaque, hwaddr addr, unsigned size) +{ + qemu_log_mask(LOG_GUEST_ERROR, "%s: Unsupported read from addr=%" + HWADDR_PRIx "\n", __func__, addr); + return 0; +} + +static void cframe_reg_fdri_write(void *opaque, hwaddr addr, uint64_t value, + unsigned size) +{ + XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(opaque); + uint32_t wfifo[WFIFO_SZ]; + + if (update_wfifo(addr, value, s->wfifo, wfifo)) { + uint64_t we = MAKE_64BIT_MASK(0, 4 * 8); + + s->regs[R_FDRI0] = wfifo[0]; + s->regs[R_FDRI1] = wfifo[1]; + s->regs[R_FDRI2] = wfifo[2]; + register_write(&s->regs_info[R_FDRI3], wfifo[3], + we, object_get_typename(OBJECT(s)), + XLNX_VERSAL_CFRAME_REG_ERR_DEBUG); + } +} + +static void cframe_reg_reset_enter(Object *obj, ResetType type) +{ + XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(obj); + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(s->regs_info); ++i) { + register_reset(&s->regs_info[i]); + } + memset(s->wfifo, 0, WFIFO_SZ * sizeof(uint32_t)); + fifo32_reset(&s->new_f_data); + + if (g_tree_nnodes(s->cframes)) { + /* + * Take a reference so when g_tree_destroy() unrefs it we keep the + * GTree and only destroy its contents. NB: when our minimum + * glib version is at least 2.70 we could use g_tree_remove_all(). + */ + g_tree_ref(s->cframes); + g_tree_destroy(s->cframes); + } +} + +static void cframe_reg_reset_hold(Object *obj) +{ + XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(obj); + + cfrm_imr_update_irq(s); +} + +static const MemoryRegionOps cframe_reg_ops = { + .read = register_read_memory, + .write = register_write_memory, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static const MemoryRegionOps cframe_reg_fdri_ops = { + .read = cframe_reg_fdri_read, + .write = cframe_reg_fdri_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static uint64_t cframes_bcast_reg_read(void *opaque, hwaddr addr, unsigned size) +{ + qemu_log_mask(LOG_GUEST_ERROR, "%s: Unsupported read from addr=%" + HWADDR_PRIx "\n", __func__, addr); + return 0; +} + +static void cframes_bcast_write(XlnxVersalCFrameBcastReg *s, uint8_t reg_addr, + uint32_t *wfifo) +{ + XlnxCfiPacket pkt = { + .reg_addr = reg_addr, + .data[0] = wfifo[0], + .data[1] = wfifo[1], + .data[2] = wfifo[2], + .data[3] = wfifo[3] + }; + + for (int i = 0; i < ARRAY_SIZE(s->cfg.cframe); i++) { + if (s->cfg.cframe[i]) { + xlnx_cfi_transfer_packet(s->cfg.cframe[i], &pkt); + } + } +} + +static void cframes_bcast_reg_write(void *opaque, hwaddr addr, uint64_t value, + unsigned size) +{ + XlnxVersalCFrameBcastReg *s = XLNX_VERSAL_CFRAME_BCAST_REG(opaque); + uint32_t wfifo[WFIFO_SZ]; + + if (update_wfifo(addr, value, s->wfifo, wfifo)) { + uint8_t reg_addr = extract32(addr, 4, 6); + + cframes_bcast_write(s, reg_addr, wfifo); + } +} + +static uint64_t cframes_bcast_fdri_read(void *opaque, hwaddr addr, + unsigned size) +{ + qemu_log_mask(LOG_GUEST_ERROR, "%s: Unsupported read from addr=%" + HWADDR_PRIx "\n", __func__, addr); + return 0; +} + +static void cframes_bcast_fdri_write(void *opaque, hwaddr addr, uint64_t value, + unsigned size) +{ + XlnxVersalCFrameBcastReg *s = XLNX_VERSAL_CFRAME_BCAST_REG(opaque); + uint32_t wfifo[WFIFO_SZ]; + + if (update_wfifo(addr, value, s->wfifo, wfifo)) { + cframes_bcast_write(s, CFRAME_FDRI, wfifo); + } +} + +static const MemoryRegionOps cframes_bcast_reg_reg_ops = { + .read = cframes_bcast_reg_read, + .write = cframes_bcast_reg_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static const MemoryRegionOps cframes_bcast_reg_fdri_ops = { + .read = cframes_bcast_fdri_read, + .write = cframes_bcast_fdri_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static void cframe_reg_realize(DeviceState *dev, Error **errp) +{ + XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(dev); + + for (int i = 0; i < ARRAY_SIZE(s->cfg.blktype_num_frames); i++) { + if (s->cfg.blktype_num_frames[i] > MAX_BLOCKTYPE_FRAMES) { + error_setg(errp, + "blktype-frames%d > 0xFFFFF (max frame per block)", + i); + return; + } + if (s->cfg.blktype_num_frames[i]) { + s->row_configured = true; + } + } +} + +static void cframe_reg_init(Object *obj) +{ + XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + RegisterInfoArray *reg_array; + + memory_region_init(&s->iomem, obj, TYPE_XLNX_VERSAL_CFRAME_REG, + CFRAME_REG_R_MAX * 4); + reg_array = + register_init_block32(DEVICE(obj), cframe_reg_regs_info, + ARRAY_SIZE(cframe_reg_regs_info), + s->regs_info, s->regs, + &cframe_reg_ops, + XLNX_VERSAL_CFRAME_REG_ERR_DEBUG, + CFRAME_REG_R_MAX * 4); + memory_region_add_subregion(&s->iomem, + 0x0, + ®_array->mem); + sysbus_init_mmio(sbd, &s->iomem); + memory_region_init_io(&s->iomem_fdri, obj, &cframe_reg_fdri_ops, s, + TYPE_XLNX_VERSAL_CFRAME_REG "-fdri", + KEYHOLE_STREAM_4K); + sysbus_init_mmio(sbd, &s->iomem_fdri); + sysbus_init_irq(sbd, &s->irq_cfrm_imr); + + s->cframes = g_tree_new_full((GCompareDataFunc)int_cmp, NULL, + NULL, (GDestroyNotify)g_free); + fifo32_create(&s->new_f_data, FRAME_NUM_WORDS); +} + +static const VMStateDescription vmstate_cframe = { + .name = "cframe", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32_ARRAY(data, XlnxCFrame, FRAME_NUM_WORDS), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription vmstate_cframe_reg = { + .name = TYPE_XLNX_VERSAL_CFRAME_REG, + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32_ARRAY(wfifo, XlnxVersalCFrameReg, 4), + VMSTATE_UINT32_ARRAY(regs, XlnxVersalCFrameReg, CFRAME_REG_R_MAX), + VMSTATE_BOOL(rowon, XlnxVersalCFrameReg), + VMSTATE_BOOL(wcfg, XlnxVersalCFrameReg), + VMSTATE_BOOL(rcfg, XlnxVersalCFrameReg), + VMSTATE_GTREE_DIRECT_KEY_V(cframes, XlnxVersalCFrameReg, 1, + &vmstate_cframe, XlnxCFrame), + VMSTATE_FIFO32(new_f_data, XlnxVersalCFrameReg), + VMSTATE_END_OF_LIST(), + } +}; + +static Property cframe_regs_props[] = { + DEFINE_PROP_LINK("cfu-fdro", XlnxVersalCFrameReg, cfg.cfu_fdro, + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_UINT32("blktype0-frames", XlnxVersalCFrameReg, + cfg.blktype_num_frames[0], 0), + DEFINE_PROP_UINT32("blktype1-frames", XlnxVersalCFrameReg, + cfg.blktype_num_frames[1], 0), + DEFINE_PROP_UINT32("blktype2-frames", XlnxVersalCFrameReg, + cfg.blktype_num_frames[2], 0), + DEFINE_PROP_UINT32("blktype3-frames", XlnxVersalCFrameReg, + cfg.blktype_num_frames[3], 0), + DEFINE_PROP_UINT32("blktype4-frames", XlnxVersalCFrameReg, + cfg.blktype_num_frames[4], 0), + DEFINE_PROP_UINT32("blktype5-frames", XlnxVersalCFrameReg, + cfg.blktype_num_frames[5], 0), + DEFINE_PROP_UINT32("blktype6-frames", XlnxVersalCFrameReg, + cfg.blktype_num_frames[6], 0), + DEFINE_PROP_END_OF_LIST(), +}; + +static void cframe_bcast_reg_init(Object *obj) +{ + XlnxVersalCFrameBcastReg *s = XLNX_VERSAL_CFRAME_BCAST_REG(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + + memory_region_init_io(&s->iomem_reg, obj, &cframes_bcast_reg_reg_ops, s, + TYPE_XLNX_VERSAL_CFRAME_BCAST_REG, KEYHOLE_STREAM_4K); + memory_region_init_io(&s->iomem_fdri, obj, &cframes_bcast_reg_fdri_ops, s, + TYPE_XLNX_VERSAL_CFRAME_BCAST_REG "-fdri", + KEYHOLE_STREAM_4K); + sysbus_init_mmio(sbd, &s->iomem_reg); + sysbus_init_mmio(sbd, &s->iomem_fdri); +} + +static void cframe_bcast_reg_reset_enter(Object *obj, ResetType type) +{ + XlnxVersalCFrameBcastReg *s = XLNX_VERSAL_CFRAME_BCAST_REG(obj); + + memset(s->wfifo, 0, WFIFO_SZ * sizeof(uint32_t)); +} + +static const VMStateDescription vmstate_cframe_bcast_reg = { + .name = TYPE_XLNX_VERSAL_CFRAME_BCAST_REG, + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32_ARRAY(wfifo, XlnxVersalCFrameBcastReg, 4), + VMSTATE_END_OF_LIST(), + } +}; + +static Property cframe_bcast_regs_props[] = { + DEFINE_PROP_LINK("cframe0", XlnxVersalCFrameBcastReg, cfg.cframe[0], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe1", XlnxVersalCFrameBcastReg, cfg.cframe[1], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe2", XlnxVersalCFrameBcastReg, cfg.cframe[2], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe3", XlnxVersalCFrameBcastReg, cfg.cframe[3], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe4", XlnxVersalCFrameBcastReg, cfg.cframe[4], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe5", XlnxVersalCFrameBcastReg, cfg.cframe[5], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe6", XlnxVersalCFrameBcastReg, cfg.cframe[6], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe7", XlnxVersalCFrameBcastReg, cfg.cframe[7], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe8", XlnxVersalCFrameBcastReg, cfg.cframe[8], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe9", XlnxVersalCFrameBcastReg, cfg.cframe[9], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe10", XlnxVersalCFrameBcastReg, cfg.cframe[10], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe11", XlnxVersalCFrameBcastReg, cfg.cframe[11], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe12", XlnxVersalCFrameBcastReg, cfg.cframe[12], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe13", XlnxVersalCFrameBcastReg, cfg.cframe[13], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe14", XlnxVersalCFrameBcastReg, cfg.cframe[14], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_END_OF_LIST(), +}; + +static void cframe_reg_class_init(ObjectClass *klass, void *data) +{ + ResettableClass *rc = RESETTABLE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + XlnxCfiIfClass *xcic = XLNX_CFI_IF_CLASS(klass); + + dc->vmsd = &vmstate_cframe_reg; + dc->realize = cframe_reg_realize; + rc->phases.enter = cframe_reg_reset_enter; + rc->phases.hold = cframe_reg_reset_hold; + device_class_set_props(dc, cframe_regs_props); + xcic->cfi_transfer_packet = cframe_reg_cfi_transfer_packet; +} + +static void cframe_bcast_reg_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); + + dc->vmsd = &vmstate_cframe_bcast_reg; + device_class_set_props(dc, cframe_bcast_regs_props); + rc->phases.enter = cframe_bcast_reg_reset_enter; +} + +static const TypeInfo cframe_reg_info = { + .name = TYPE_XLNX_VERSAL_CFRAME_REG, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(XlnxVersalCFrameReg), + .class_init = cframe_reg_class_init, + .instance_init = cframe_reg_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_XLNX_CFI_IF }, + { } + } +}; + +static const TypeInfo cframe_bcast_reg_info = { + .name = TYPE_XLNX_VERSAL_CFRAME_BCAST_REG, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(XlnxVersalCFrameBcastReg), + .class_init = cframe_bcast_reg_class_init, + .instance_init = cframe_bcast_reg_init, +}; + +static void cframe_reg_register_types(void) +{ + type_register_static(&cframe_reg_info); + type_register_static(&cframe_bcast_reg_info); +} + +type_init(cframe_reg_register_types) diff --git a/hw/misc/xlnx-versal-cfu.c b/hw/misc/xlnx-versal-cfu.c new file mode 100644 index 0000000000..8e588ac1d8 --- /dev/null +++ b/hw/misc/xlnx-versal-cfu.c @@ -0,0 +1,563 @@ +/* + * QEMU model of the CFU Configuration Unit. + * + * Copyright (C) 2023, Advanced Micro Devices, Inc. + * + * Written by Edgar E. Iglesias <edgar.iglesias@gmail.com>, + * Sai Pavan Boddu <sai.pavan.boddu@amd.com>, + * Francisco Iglesias <francisco.iglesias@amd.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "hw/sysbus.h" +#include "hw/register.h" +#include "hw/irq.h" +#include "qemu/bitops.h" +#include "qemu/log.h" +#include "qemu/units.h" +#include "migration/vmstate.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" +#include "hw/misc/xlnx-versal-cfu.h" + +#ifndef XLNX_VERSAL_CFU_APB_ERR_DEBUG +#define XLNX_VERSAL_CFU_APB_ERR_DEBUG 0 +#endif + +#define KEYHOLE_STREAM_4K (4 * KiB) +#define KEYHOLE_STREAM_256K (256 * KiB) +#define CFRAME_BROADCAST_ROW 0x1F + +bool update_wfifo(hwaddr addr, uint64_t value, + uint32_t *wfifo, uint32_t *wfifo_ret) +{ + unsigned int idx = extract32(addr, 2, 2); + + wfifo[idx] = value; + + if (idx == 3) { + memcpy(wfifo_ret, wfifo, WFIFO_SZ * sizeof(uint32_t)); + memset(wfifo, 0, WFIFO_SZ * sizeof(uint32_t)); + return true; + } + + return false; +} + +static void cfu_imr_update_irq(XlnxVersalCFUAPB *s) +{ + bool pending = s->regs[R_CFU_ISR] & ~s->regs[R_CFU_IMR]; + qemu_set_irq(s->irq_cfu_imr, pending); +} + +static void cfu_isr_postw(RegisterInfo *reg, uint64_t val64) +{ + XlnxVersalCFUAPB *s = XLNX_VERSAL_CFU_APB(reg->opaque); + cfu_imr_update_irq(s); +} + +static uint64_t cfu_ier_prew(RegisterInfo *reg, uint64_t val64) +{ + XlnxVersalCFUAPB *s = XLNX_VERSAL_CFU_APB(reg->opaque); + uint32_t val = val64; + + s->regs[R_CFU_IMR] &= ~val; + cfu_imr_update_irq(s); + return 0; +} + +static uint64_t cfu_idr_prew(RegisterInfo *reg, uint64_t val64) +{ + XlnxVersalCFUAPB *s = XLNX_VERSAL_CFU_APB(reg->opaque); + uint32_t val = val64; + + s->regs[R_CFU_IMR] |= val; + cfu_imr_update_irq(s); + return 0; +} + +static uint64_t cfu_itr_prew(RegisterInfo *reg, uint64_t val64) +{ + XlnxVersalCFUAPB *s = XLNX_VERSAL_CFU_APB(reg->opaque); + uint32_t val = val64; + + s->regs[R_CFU_ISR] |= val; + cfu_imr_update_irq(s); + return 0; +} + +static void cfu_fgcr_postw(RegisterInfo *reg, uint64_t val64) +{ + XlnxVersalCFUAPB *s = XLNX_VERSAL_CFU_APB(reg->opaque); + uint32_t val = (uint32_t)val64; + + /* Do a scan. It always looks good. */ + if (FIELD_EX32(val, CFU_FGCR, SC_HBC_TRIGGER)) { + ARRAY_FIELD_DP32(s->regs, CFU_STATUS, SCAN_CLEAR_PASS, 1); + ARRAY_FIELD_DP32(s->regs, CFU_STATUS, SCAN_CLEAR_DONE, 1); + } +} + +static const RegisterAccessInfo cfu_apb_regs_info[] = { + { .name = "CFU_ISR", .addr = A_CFU_ISR, + .rsvd = 0xfffffc00, + .w1c = 0x3ff, + .post_write = cfu_isr_postw, + },{ .name = "CFU_IMR", .addr = A_CFU_IMR, + .reset = 0x3ff, + .rsvd = 0xfffffc00, + .ro = 0x3ff, + },{ .name = "CFU_IER", .addr = A_CFU_IER, + .rsvd = 0xfffffc00, + .pre_write = cfu_ier_prew, + },{ .name = "CFU_IDR", .addr = A_CFU_IDR, + .rsvd = 0xfffffc00, + .pre_write = cfu_idr_prew, + },{ .name = "CFU_ITR", .addr = A_CFU_ITR, + .rsvd = 0xfffffc00, + .pre_write = cfu_itr_prew, + },{ .name = "CFU_PROTECT", .addr = A_CFU_PROTECT, + .reset = 0x1, + },{ .name = "CFU_FGCR", .addr = A_CFU_FGCR, + .rsvd = 0xffff8000, + .post_write = cfu_fgcr_postw, + },{ .name = "CFU_CTL", .addr = A_CFU_CTL, + .rsvd = 0xffff0000, + },{ .name = "CFU_CRAM_RW", .addr = A_CFU_CRAM_RW, + .reset = 0x401f7d9, + .rsvd = 0xf8000000, + },{ .name = "CFU_MASK", .addr = A_CFU_MASK, + },{ .name = "CFU_CRC_EXPECT", .addr = A_CFU_CRC_EXPECT, + },{ .name = "CFU_CFRAME_LEFT_T0", .addr = A_CFU_CFRAME_LEFT_T0, + .rsvd = 0xfff00000, + },{ .name = "CFU_CFRAME_LEFT_T1", .addr = A_CFU_CFRAME_LEFT_T1, + .rsvd = 0xfff00000, + },{ .name = "CFU_CFRAME_LEFT_T2", .addr = A_CFU_CFRAME_LEFT_T2, + .rsvd = 0xfff00000, + },{ .name = "CFU_ROW_RANGE", .addr = A_CFU_ROW_RANGE, + .rsvd = 0xffffffc0, + .ro = 0x3f, + },{ .name = "CFU_STATUS", .addr = A_CFU_STATUS, + .rsvd = 0x80000000, + .ro = 0x7fffffff, + },{ .name = "CFU_INTERNAL_STATUS", .addr = A_CFU_INTERNAL_STATUS, + .rsvd = 0xff800000, + .ro = 0x7fffff, + },{ .name = "CFU_QWORD_CNT", .addr = A_CFU_QWORD_CNT, + .ro = 0xffffffff, + },{ .name = "CFU_CRC_LIVE", .addr = A_CFU_CRC_LIVE, + .ro = 0xffffffff, + },{ .name = "CFU_PENDING_READ_CNT", .addr = A_CFU_PENDING_READ_CNT, + .rsvd = 0xfe000000, + .ro = 0x1ffffff, + },{ .name = "CFU_FDRI_CNT", .addr = A_CFU_FDRI_CNT, + .ro = 0xffffffff, + },{ .name = "CFU_ECO1", .addr = A_CFU_ECO1, + },{ .name = "CFU_ECO2", .addr = A_CFU_ECO2, + } +}; + +static void cfu_apb_reset(DeviceState *dev) +{ + XlnxVersalCFUAPB *s = XLNX_VERSAL_CFU_APB(dev); + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(s->regs_info); ++i) { + register_reset(&s->regs_info[i]); + } + memset(s->wfifo, 0, WFIFO_SZ * sizeof(uint32_t)); + + s->regs[R_CFU_STATUS] |= R_CFU_STATUS_HC_COMPLETE_MASK; + cfu_imr_update_irq(s); +} + +static const MemoryRegionOps cfu_apb_ops = { + .read = register_read_memory, + .write = register_write_memory, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static void cfu_transfer_cfi_packet(XlnxVersalCFUAPB *s, uint8_t row_addr, + XlnxCfiPacket *pkt) +{ + if (row_addr == CFRAME_BROADCAST_ROW) { + for (int i = 0; i < ARRAY_SIZE(s->cfg.cframe); i++) { + if (s->cfg.cframe[i]) { + xlnx_cfi_transfer_packet(s->cfg.cframe[i], pkt); + } + } + } else { + assert(row_addr < ARRAY_SIZE(s->cfg.cframe)); + + if (s->cfg.cframe[row_addr]) { + xlnx_cfi_transfer_packet(s->cfg.cframe[row_addr], pkt); + } + } +} + +static uint64_t cfu_stream_read(void *opaque, hwaddr addr, unsigned size) +{ + qemu_log_mask(LOG_GUEST_ERROR, "%s: Unsupported read from addr=%" + HWADDR_PRIx "\n", __func__, addr); + return 0; +} + +static void cfu_stream_write(void *opaque, hwaddr addr, uint64_t value, + unsigned size) +{ + XlnxVersalCFUAPB *s = XLNX_VERSAL_CFU_APB(opaque); + uint32_t wfifo[WFIFO_SZ]; + + if (update_wfifo(addr, value, s->wfifo, wfifo)) { + uint8_t packet_type, row_addr, reg_addr; + + packet_type = extract32(wfifo[0], 24, 8); + row_addr = extract32(wfifo[0], 16, 5); + reg_addr = extract32(wfifo[0], 8, 6); + + /* Compressed bitstreams are not supported yet. */ + if (ARRAY_FIELD_EX32(s->regs, CFU_CTL, DECOMPRESS) == 0) { + if (s->regs[R_CFU_FDRI_CNT]) { + XlnxCfiPacket pkt = { + .reg_addr = CFRAME_FDRI, + .data[0] = wfifo[0], + .data[1] = wfifo[1], + .data[2] = wfifo[2], + .data[3] = wfifo[3] + }; + + cfu_transfer_cfi_packet(s, s->fdri_row_addr, &pkt); + + s->regs[R_CFU_FDRI_CNT]--; + + } else if (packet_type == PACKET_TYPE_CFU && + reg_addr == CFRAME_FDRI) { + + /* Load R_CFU_FDRI_CNT, must be multiple of 25 */ + s->regs[R_CFU_FDRI_CNT] = wfifo[1]; + + /* Store target row_addr */ + s->fdri_row_addr = row_addr; + + if (wfifo[1] % 25 != 0) { + qemu_log_mask(LOG_GUEST_ERROR, + "CFU FDRI_CNT is not loaded with " + "a multiple of 25 value\n"); + } + + } else if (packet_type == PACKET_TYPE_CFRAME) { + XlnxCfiPacket pkt = { + .reg_addr = reg_addr, + .data[0] = wfifo[1], + .data[1] = wfifo[2], + .data[2] = wfifo[3], + }; + cfu_transfer_cfi_packet(s, row_addr, &pkt); + } + } + } +} + +static uint64_t cfu_sfr_read(void *opaque, hwaddr addr, unsigned size) +{ + qemu_log_mask(LOG_GUEST_ERROR, "%s: Unsupported read from addr=%" + HWADDR_PRIx "\n", __func__, addr); + return 0; +} + +static void cfu_sfr_write(void *opaque, hwaddr addr, uint64_t value, + unsigned size) +{ + XlnxVersalCFUSFR *s = XLNX_VERSAL_CFU_SFR(opaque); + uint32_t wfifo[WFIFO_SZ]; + + if (update_wfifo(addr, value, s->wfifo, wfifo)) { + uint8_t row_addr = extract32(wfifo[0], 23, 5); + uint32_t frame_addr = extract32(wfifo[0], 0, 23); + XlnxCfiPacket pkt = { .reg_addr = CFRAME_SFR, + .data[0] = frame_addr }; + + if (s->cfg.cfu) { + cfu_transfer_cfi_packet(s->cfg.cfu, row_addr, &pkt); + } + } +} + +static uint64_t cfu_fdro_read(void *opaque, hwaddr addr, unsigned size) +{ + XlnxVersalCFUFDRO *s = XLNX_VERSAL_CFU_FDRO(opaque); + uint64_t ret = 0; + + if (!fifo32_is_empty(&s->fdro_data)) { + ret = fifo32_pop(&s->fdro_data); + } + + return ret; +} + +static void cfu_fdro_write(void *opaque, hwaddr addr, uint64_t value, + unsigned size) +{ + qemu_log_mask(LOG_GUEST_ERROR, "%s: Unsupported write from addr=%" + HWADDR_PRIx "\n", __func__, addr); +} + +static const MemoryRegionOps cfu_stream_ops = { + .read = cfu_stream_read, + .write = cfu_stream_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 8, + }, +}; + +static const MemoryRegionOps cfu_sfr_ops = { + .read = cfu_sfr_read, + .write = cfu_sfr_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static const MemoryRegionOps cfu_fdro_ops = { + .read = cfu_fdro_read, + .write = cfu_fdro_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static void cfu_apb_init(Object *obj) +{ + XlnxVersalCFUAPB *s = XLNX_VERSAL_CFU_APB(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + RegisterInfoArray *reg_array; + unsigned int i; + char *name; + + memory_region_init(&s->iomem, obj, TYPE_XLNX_VERSAL_CFU_APB, R_MAX * 4); + reg_array = + register_init_block32(DEVICE(obj), cfu_apb_regs_info, + ARRAY_SIZE(cfu_apb_regs_info), + s->regs_info, s->regs, + &cfu_apb_ops, + XLNX_VERSAL_CFU_APB_ERR_DEBUG, + R_MAX * 4); + memory_region_add_subregion(&s->iomem, + 0x0, + ®_array->mem); + sysbus_init_mmio(sbd, &s->iomem); + for (i = 0; i < NUM_STREAM; i++) { + name = g_strdup_printf(TYPE_XLNX_VERSAL_CFU_APB "-stream%d", i); + memory_region_init_io(&s->iomem_stream[i], obj, &cfu_stream_ops, s, + name, i == 0 ? KEYHOLE_STREAM_4K : + KEYHOLE_STREAM_256K); + sysbus_init_mmio(sbd, &s->iomem_stream[i]); + g_free(name); + } + sysbus_init_irq(sbd, &s->irq_cfu_imr); +} + +static void cfu_sfr_init(Object *obj) +{ + XlnxVersalCFUSFR *s = XLNX_VERSAL_CFU_SFR(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + + memory_region_init_io(&s->iomem_sfr, obj, &cfu_sfr_ops, s, + TYPE_XLNX_VERSAL_CFU_SFR, KEYHOLE_STREAM_4K); + sysbus_init_mmio(sbd, &s->iomem_sfr); +} + +static void cfu_sfr_reset_enter(Object *obj, ResetType type) +{ + XlnxVersalCFUSFR *s = XLNX_VERSAL_CFU_SFR(obj); + + memset(s->wfifo, 0, WFIFO_SZ * sizeof(uint32_t)); +} + +static void cfu_fdro_init(Object *obj) +{ + XlnxVersalCFUFDRO *s = XLNX_VERSAL_CFU_FDRO(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + + memory_region_init_io(&s->iomem_fdro, obj, &cfu_fdro_ops, s, + TYPE_XLNX_VERSAL_CFU_FDRO, KEYHOLE_STREAM_4K); + sysbus_init_mmio(sbd, &s->iomem_fdro); + fifo32_create(&s->fdro_data, 8 * KiB / sizeof(uint32_t)); +} + +static void cfu_fdro_reset_enter(Object *obj, ResetType type) +{ + XlnxVersalCFUFDRO *s = XLNX_VERSAL_CFU_FDRO(obj); + + fifo32_reset(&s->fdro_data); +} + +static void cfu_fdro_cfi_transfer_packet(XlnxCfiIf *cfi_if, XlnxCfiPacket *pkt) +{ + XlnxVersalCFUFDRO *s = XLNX_VERSAL_CFU_FDRO(cfi_if); + + if (fifo32_num_free(&s->fdro_data) >= ARRAY_SIZE(pkt->data)) { + for (int i = 0; i < ARRAY_SIZE(pkt->data); i++) { + fifo32_push(&s->fdro_data, pkt->data[i]); + } + } else { + /* It is a programming error to fill the fifo. */ + qemu_log_mask(LOG_GUEST_ERROR, + "CFU_FDRO: CFI data dropped due to full read fifo\n"); + } +} + +static Property cfu_props[] = { + DEFINE_PROP_LINK("cframe0", XlnxVersalCFUAPB, cfg.cframe[0], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe1", XlnxVersalCFUAPB, cfg.cframe[1], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe2", XlnxVersalCFUAPB, cfg.cframe[2], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe3", XlnxVersalCFUAPB, cfg.cframe[3], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe4", XlnxVersalCFUAPB, cfg.cframe[4], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe5", XlnxVersalCFUAPB, cfg.cframe[5], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe6", XlnxVersalCFUAPB, cfg.cframe[6], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe7", XlnxVersalCFUAPB, cfg.cframe[7], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe8", XlnxVersalCFUAPB, cfg.cframe[8], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe9", XlnxVersalCFUAPB, cfg.cframe[9], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe10", XlnxVersalCFUAPB, cfg.cframe[10], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe11", XlnxVersalCFUAPB, cfg.cframe[11], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe12", XlnxVersalCFUAPB, cfg.cframe[12], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe13", XlnxVersalCFUAPB, cfg.cframe[13], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_LINK("cframe14", XlnxVersalCFUAPB, cfg.cframe[14], + TYPE_XLNX_CFI_IF, XlnxCfiIf *), + DEFINE_PROP_END_OF_LIST(), +}; + +static Property cfu_sfr_props[] = { + DEFINE_PROP_LINK("cfu", XlnxVersalCFUSFR, cfg.cfu, + TYPE_XLNX_VERSAL_CFU_APB, XlnxVersalCFUAPB *), + DEFINE_PROP_END_OF_LIST(), +}; + +static const VMStateDescription vmstate_cfu_apb = { + .name = TYPE_XLNX_VERSAL_CFU_APB, + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32_ARRAY(wfifo, XlnxVersalCFUAPB, 4), + VMSTATE_UINT32_ARRAY(regs, XlnxVersalCFUAPB, R_MAX), + VMSTATE_UINT8(fdri_row_addr, XlnxVersalCFUAPB), + VMSTATE_END_OF_LIST(), + } +}; + +static const VMStateDescription vmstate_cfu_fdro = { + .name = TYPE_XLNX_VERSAL_CFU_FDRO, + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_FIFO32(fdro_data, XlnxVersalCFUFDRO), + VMSTATE_END_OF_LIST(), + } +}; + +static const VMStateDescription vmstate_cfu_sfr = { + .name = TYPE_XLNX_VERSAL_CFU_SFR, + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32_ARRAY(wfifo, XlnxVersalCFUSFR, 4), + VMSTATE_END_OF_LIST(), + } +}; + +static void cfu_apb_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->reset = cfu_apb_reset; + dc->vmsd = &vmstate_cfu_apb; + device_class_set_props(dc, cfu_props); +} + +static void cfu_fdro_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); + XlnxCfiIfClass *xcic = XLNX_CFI_IF_CLASS(klass); + + dc->vmsd = &vmstate_cfu_fdro; + xcic->cfi_transfer_packet = cfu_fdro_cfi_transfer_packet; + rc->phases.enter = cfu_fdro_reset_enter; +} + +static void cfu_sfr_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); + + device_class_set_props(dc, cfu_sfr_props); + dc->vmsd = &vmstate_cfu_sfr; + rc->phases.enter = cfu_sfr_reset_enter; +} + +static const TypeInfo cfu_apb_info = { + .name = TYPE_XLNX_VERSAL_CFU_APB, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(XlnxVersalCFUAPB), + .class_init = cfu_apb_class_init, + .instance_init = cfu_apb_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_XLNX_CFI_IF }, + { } + } +}; + +static const TypeInfo cfu_fdro_info = { + .name = TYPE_XLNX_VERSAL_CFU_FDRO, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(XlnxVersalCFUFDRO), + .class_init = cfu_fdro_class_init, + .instance_init = cfu_fdro_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_XLNX_CFI_IF }, + { } + } +}; + +static const TypeInfo cfu_sfr_info = { + .name = TYPE_XLNX_VERSAL_CFU_SFR, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(XlnxVersalCFUSFR), + .class_init = cfu_sfr_class_init, + .instance_init = cfu_sfr_init, +}; + +static void cfu_apb_register_types(void) +{ + type_register_static(&cfu_apb_info); + type_register_static(&cfu_fdro_info); + type_register_static(&cfu_sfr_info); +} + +type_init(cfu_apb_register_types) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index a5ac3ab777..5edc1d98d2 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -35,6 +35,7 @@ #include "hw/riscv/virt.h" #include "hw/riscv/boot.h" #include "hw/riscv/numa.h" +#include "kvm_riscv.h" #include "hw/intc/riscv_aclint.h" #include "hw/intc/riscv_aplic.h" #include "hw/intc/riscv_imsic.h" @@ -75,6 +76,12 @@ #error "Can't accommodate all IMSIC groups in address space" #endif +/* KVM AIA only supports APLIC MSI. APLIC Wired is always emulated by QEMU. */ +static bool virt_use_kvm_aia(RISCVVirtState *s) +{ + return kvm_irqchip_in_kernel() && s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC; +} + static const MemMapEntry virt_memmap[] = { [VIRT_DEBUG] = { 0x0, 0x100 }, [VIRT_MROM] = { 0x1000, 0xf000 }, @@ -516,79 +523,28 @@ static uint32_t imsic_num_bits(uint32_t count) return ret; } -static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap, - uint32_t *phandle, uint32_t *intc_phandles, - uint32_t *msi_m_phandle, uint32_t *msi_s_phandle) +static void create_fdt_one_imsic(RISCVVirtState *s, hwaddr base_addr, + uint32_t *intc_phandles, uint32_t msi_phandle, + bool m_mode, uint32_t imsic_guest_bits) { int cpu, socket; char *imsic_name; MachineState *ms = MACHINE(s); int socket_count = riscv_socket_count(ms); - uint32_t imsic_max_hart_per_socket, imsic_guest_bits; + uint32_t imsic_max_hart_per_socket; uint32_t *imsic_cells, *imsic_regs, imsic_addr, imsic_size; - *msi_m_phandle = (*phandle)++; - *msi_s_phandle = (*phandle)++; imsic_cells = g_new0(uint32_t, ms->smp.cpus * 2); imsic_regs = g_new0(uint32_t, socket_count * 4); - /* M-level IMSIC node */ for (cpu = 0; cpu < ms->smp.cpus; cpu++) { imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); - imsic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_EXT); - } - imsic_max_hart_per_socket = 0; - for (socket = 0; socket < socket_count; socket++) { - imsic_addr = memmap[VIRT_IMSIC_M].base + - socket * VIRT_IMSIC_GROUP_MAX_SIZE; - imsic_size = IMSIC_HART_SIZE(0) * s->soc[socket].num_harts; - imsic_regs[socket * 4 + 0] = 0; - imsic_regs[socket * 4 + 1] = cpu_to_be32(imsic_addr); - imsic_regs[socket * 4 + 2] = 0; - imsic_regs[socket * 4 + 3] = cpu_to_be32(imsic_size); - if (imsic_max_hart_per_socket < s->soc[socket].num_harts) { - imsic_max_hart_per_socket = s->soc[socket].num_harts; - } - } - imsic_name = g_strdup_printf("/soc/imsics@%lx", - (unsigned long)memmap[VIRT_IMSIC_M].base); - qemu_fdt_add_subnode(ms->fdt, imsic_name); - qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible", - "riscv,imsics"); - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#interrupt-cells", - FDT_IMSIC_INT_CELLS); - qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller", - NULL, 0); - qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller", - NULL, 0); - qemu_fdt_setprop(ms->fdt, imsic_name, "interrupts-extended", - imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2); - qemu_fdt_setprop(ms->fdt, imsic_name, "reg", imsic_regs, - socket_count * sizeof(uint32_t) * 4); - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,num-ids", - VIRT_IRQCHIP_NUM_MSIS); - if (socket_count > 1) { - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,hart-index-bits", - imsic_num_bits(imsic_max_hart_per_socket)); - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-bits", - imsic_num_bits(socket_count)); - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-shift", - IMSIC_MMIO_GROUP_MIN_SHIFT); + imsic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT); } - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", *msi_m_phandle); - g_free(imsic_name); - - /* S-level IMSIC node */ - for (cpu = 0; cpu < ms->smp.cpus; cpu++) { - imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); - imsic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_S_EXT); - } - imsic_guest_bits = imsic_num_bits(s->aia_guests + 1); imsic_max_hart_per_socket = 0; for (socket = 0; socket < socket_count; socket++) { - imsic_addr = memmap[VIRT_IMSIC_S].base + - socket * VIRT_IMSIC_GROUP_MAX_SIZE; + imsic_addr = base_addr + socket * VIRT_IMSIC_GROUP_MAX_SIZE; imsic_size = IMSIC_HART_SIZE(imsic_guest_bits) * s->soc[socket].num_harts; imsic_regs[socket * 4 + 0] = 0; @@ -599,119 +555,151 @@ static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap, imsic_max_hart_per_socket = s->soc[socket].num_harts; } } - imsic_name = g_strdup_printf("/soc/imsics@%lx", - (unsigned long)memmap[VIRT_IMSIC_S].base); + + imsic_name = g_strdup_printf("/soc/imsics@%lx", (unsigned long)base_addr); qemu_fdt_add_subnode(ms->fdt, imsic_name); - qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible", - "riscv,imsics"); + qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible", "riscv,imsics"); qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#interrupt-cells", - FDT_IMSIC_INT_CELLS); - qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller", - NULL, 0); - qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller", - NULL, 0); + FDT_IMSIC_INT_CELLS); + qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller", NULL, 0); + qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller", NULL, 0); qemu_fdt_setprop(ms->fdt, imsic_name, "interrupts-extended", - imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2); + imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2); qemu_fdt_setprop(ms->fdt, imsic_name, "reg", imsic_regs, - socket_count * sizeof(uint32_t) * 4); + socket_count * sizeof(uint32_t) * 4); qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,num-ids", - VIRT_IRQCHIP_NUM_MSIS); + VIRT_IRQCHIP_NUM_MSIS); + if (imsic_guest_bits) { qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,guest-index-bits", - imsic_guest_bits); + imsic_guest_bits); } + if (socket_count > 1) { qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,hart-index-bits", - imsic_num_bits(imsic_max_hart_per_socket)); + imsic_num_bits(imsic_max_hart_per_socket)); qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-bits", - imsic_num_bits(socket_count)); + imsic_num_bits(socket_count)); qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-shift", - IMSIC_MMIO_GROUP_MIN_SHIFT); + IMSIC_MMIO_GROUP_MIN_SHIFT); } - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", *msi_s_phandle); - g_free(imsic_name); + qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", msi_phandle); + g_free(imsic_name); g_free(imsic_regs); g_free(imsic_cells); } -static void create_fdt_socket_aplic(RISCVVirtState *s, - const MemMapEntry *memmap, int socket, - uint32_t msi_m_phandle, - uint32_t msi_s_phandle, - uint32_t *phandle, - uint32_t *intc_phandles, - uint32_t *aplic_phandles) +static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap, + uint32_t *phandle, uint32_t *intc_phandles, + uint32_t *msi_m_phandle, uint32_t *msi_s_phandle) +{ + *msi_m_phandle = (*phandle)++; + *msi_s_phandle = (*phandle)++; + + if (!kvm_enabled()) { + /* M-level IMSIC node */ + create_fdt_one_imsic(s, memmap[VIRT_IMSIC_M].base, intc_phandles, + *msi_m_phandle, true, 0); + } + + /* S-level IMSIC node */ + create_fdt_one_imsic(s, memmap[VIRT_IMSIC_S].base, intc_phandles, + *msi_s_phandle, false, + imsic_num_bits(s->aia_guests + 1)); + +} + +static void create_fdt_one_aplic(RISCVVirtState *s, int socket, + unsigned long aplic_addr, uint32_t aplic_size, + uint32_t msi_phandle, + uint32_t *intc_phandles, + uint32_t aplic_phandle, + uint32_t aplic_child_phandle, + bool m_mode, int num_harts) { int cpu; char *aplic_name; uint32_t *aplic_cells; - unsigned long aplic_addr; MachineState *ms = MACHINE(s); - uint32_t aplic_m_phandle, aplic_s_phandle; - aplic_m_phandle = (*phandle)++; - aplic_s_phandle = (*phandle)++; - aplic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2); + aplic_cells = g_new0(uint32_t, num_harts * 2); - /* M-level APLIC node */ - for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) { + for (cpu = 0; cpu < num_harts; cpu++) { aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); - aplic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_EXT); + aplic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT); } - aplic_addr = memmap[VIRT_APLIC_M].base + - (memmap[VIRT_APLIC_M].size * socket); + aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr); qemu_fdt_add_subnode(ms->fdt, aplic_name); qemu_fdt_setprop_string(ms->fdt, aplic_name, "compatible", "riscv,aplic"); qemu_fdt_setprop_cell(ms->fdt, aplic_name, - "#interrupt-cells", FDT_APLIC_INT_CELLS); + "#interrupt-cells", FDT_APLIC_INT_CELLS); qemu_fdt_setprop(ms->fdt, aplic_name, "interrupt-controller", NULL, 0); + if (s->aia_type == VIRT_AIA_TYPE_APLIC) { qemu_fdt_setprop(ms->fdt, aplic_name, "interrupts-extended", - aplic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 2); + aplic_cells, num_harts * sizeof(uint32_t) * 2); } else { - qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", - msi_m_phandle); + qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", msi_phandle); } + qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg", - 0x0, aplic_addr, 0x0, memmap[VIRT_APLIC_M].size); + 0x0, aplic_addr, 0x0, aplic_size); qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources", - VIRT_IRQCHIP_NUM_SOURCES); - qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,children", - aplic_s_phandle); - qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegate", - aplic_s_phandle, 0x1, VIRT_IRQCHIP_NUM_SOURCES); + VIRT_IRQCHIP_NUM_SOURCES); + + if (aplic_child_phandle) { + qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,children", + aplic_child_phandle); + qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegate", + aplic_child_phandle, 0x1, + VIRT_IRQCHIP_NUM_SOURCES); + } + riscv_socket_fdt_write_id(ms, aplic_name, socket); - qemu_fdt_setprop_cell(ms->fdt, aplic_name, "phandle", aplic_m_phandle); + qemu_fdt_setprop_cell(ms->fdt, aplic_name, "phandle", aplic_phandle); + g_free(aplic_name); + g_free(aplic_cells); +} - /* S-level APLIC node */ - for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) { - aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); - aplic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_S_EXT); +static void create_fdt_socket_aplic(RISCVVirtState *s, + const MemMapEntry *memmap, int socket, + uint32_t msi_m_phandle, + uint32_t msi_s_phandle, + uint32_t *phandle, + uint32_t *intc_phandles, + uint32_t *aplic_phandles, + int num_harts) +{ + char *aplic_name; + unsigned long aplic_addr; + MachineState *ms = MACHINE(s); + uint32_t aplic_m_phandle, aplic_s_phandle; + + aplic_m_phandle = (*phandle)++; + aplic_s_phandle = (*phandle)++; + + if (!kvm_enabled()) { + /* M-level APLIC node */ + aplic_addr = memmap[VIRT_APLIC_M].base + + (memmap[VIRT_APLIC_M].size * socket); + create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_M].size, + msi_m_phandle, intc_phandles, + aplic_m_phandle, aplic_s_phandle, + true, num_harts); } + + /* S-level APLIC node */ aplic_addr = memmap[VIRT_APLIC_S].base + (memmap[VIRT_APLIC_S].size * socket); + create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_S].size, + msi_s_phandle, intc_phandles, + aplic_s_phandle, 0, + false, num_harts); + aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr); - qemu_fdt_add_subnode(ms->fdt, aplic_name); - qemu_fdt_setprop_string(ms->fdt, aplic_name, "compatible", "riscv,aplic"); - qemu_fdt_setprop_cell(ms->fdt, aplic_name, - "#interrupt-cells", FDT_APLIC_INT_CELLS); - qemu_fdt_setprop(ms->fdt, aplic_name, "interrupt-controller", NULL, 0); - if (s->aia_type == VIRT_AIA_TYPE_APLIC) { - qemu_fdt_setprop(ms->fdt, aplic_name, "interrupts-extended", - aplic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 2); - } else { - qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", - msi_s_phandle); - } - qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg", - 0x0, aplic_addr, 0x0, memmap[VIRT_APLIC_S].size); - qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources", - VIRT_IRQCHIP_NUM_SOURCES); - riscv_socket_fdt_write_id(ms, aplic_name, socket); - qemu_fdt_setprop_cell(ms->fdt, aplic_name, "phandle", aplic_s_phandle); if (!socket) { platform_bus_add_all_fdt_nodes(ms->fdt, aplic_name, @@ -722,7 +710,6 @@ static void create_fdt_socket_aplic(RISCVVirtState *s, g_free(aplic_name); - g_free(aplic_cells); aplic_phandles[socket] = aplic_s_phandle; } @@ -732,7 +719,7 @@ static void create_fdt_pmu(RISCVVirtState *s) MachineState *ms = MACHINE(s); RISCVCPU hart = s->soc[0].harts[0]; - pmu_name = g_strdup_printf("/soc/pmu"); + pmu_name = g_strdup_printf("/pmu"); qemu_fdt_add_subnode(ms->fdt, pmu_name); qemu_fdt_setprop_string(ms->fdt, pmu_name, "compatible", "riscv,pmu"); riscv_pmu_generate_fdt_node(ms->fdt, hart.cfg.pmu_num, pmu_name); @@ -794,34 +781,51 @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap, *msi_pcie_phandle = msi_s_phandle; } - phandle_pos = ms->smp.cpus; - for (socket = (socket_count - 1); socket >= 0; socket--) { - phandle_pos -= s->soc[socket].num_harts; - - if (s->aia_type == VIRT_AIA_TYPE_NONE) { - create_fdt_socket_plic(s, memmap, socket, phandle, - &intc_phandles[phandle_pos], xplic_phandles); - } else { - create_fdt_socket_aplic(s, memmap, socket, - msi_m_phandle, msi_s_phandle, phandle, - &intc_phandles[phandle_pos], xplic_phandles); + /* KVM AIA only has one APLIC instance */ + if (kvm_enabled() && virt_use_kvm_aia(s)) { + create_fdt_socket_aplic(s, memmap, 0, + msi_m_phandle, msi_s_phandle, phandle, + &intc_phandles[0], xplic_phandles, + ms->smp.cpus); + } else { + phandle_pos = ms->smp.cpus; + for (socket = (socket_count - 1); socket >= 0; socket--) { + phandle_pos -= s->soc[socket].num_harts; + + if (s->aia_type == VIRT_AIA_TYPE_NONE) { + create_fdt_socket_plic(s, memmap, socket, phandle, + &intc_phandles[phandle_pos], + xplic_phandles); + } else { + create_fdt_socket_aplic(s, memmap, socket, + msi_m_phandle, msi_s_phandle, phandle, + &intc_phandles[phandle_pos], + xplic_phandles, + s->soc[socket].num_harts); + } } } g_free(intc_phandles); - for (socket = 0; socket < socket_count; socket++) { - if (socket == 0) { - *irq_mmio_phandle = xplic_phandles[socket]; - *irq_virtio_phandle = xplic_phandles[socket]; - *irq_pcie_phandle = xplic_phandles[socket]; - } - if (socket == 1) { - *irq_virtio_phandle = xplic_phandles[socket]; - *irq_pcie_phandle = xplic_phandles[socket]; - } - if (socket == 2) { - *irq_pcie_phandle = xplic_phandles[socket]; + if (kvm_enabled() && virt_use_kvm_aia(s)) { + *irq_mmio_phandle = xplic_phandles[0]; + *irq_virtio_phandle = xplic_phandles[0]; + *irq_pcie_phandle = xplic_phandles[0]; + } else { + for (socket = 0; socket < socket_count; socket++) { + if (socket == 0) { + *irq_mmio_phandle = xplic_phandles[socket]; + *irq_virtio_phandle = xplic_phandles[socket]; + *irq_pcie_phandle = xplic_phandles[socket]; + } + if (socket == 1) { + *irq_virtio_phandle = xplic_phandles[socket]; + *irq_pcie_phandle = xplic_phandles[socket]; + } + if (socket == 2) { + *irq_pcie_phandle = xplic_phandles[socket]; + } } } @@ -1163,16 +1167,20 @@ static DeviceState *virt_create_aia(RISCVVirtAIAType aia_type, int aia_guests, int i; hwaddr addr; uint32_t guest_bits; - DeviceState *aplic_m; - bool msimode = (aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) ? true : false; + DeviceState *aplic_s = NULL; + DeviceState *aplic_m = NULL; + bool msimode = aia_type == VIRT_AIA_TYPE_APLIC_IMSIC; if (msimode) { - /* Per-socket M-level IMSICs */ - addr = memmap[VIRT_IMSIC_M].base + socket * VIRT_IMSIC_GROUP_MAX_SIZE; - for (i = 0; i < hart_count; i++) { - riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0), - base_hartid + i, true, 1, - VIRT_IRQCHIP_NUM_MSIS); + if (!kvm_enabled()) { + /* Per-socket M-level IMSICs */ + addr = memmap[VIRT_IMSIC_M].base + + socket * VIRT_IMSIC_GROUP_MAX_SIZE; + for (i = 0; i < hart_count; i++) { + riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0), + base_hartid + i, true, 1, + VIRT_IRQCHIP_NUM_MSIS); + } } /* Per-socket S-level IMSICs */ @@ -1185,29 +1193,29 @@ static DeviceState *virt_create_aia(RISCVVirtAIAType aia_type, int aia_guests, } } - /* Per-socket M-level APLIC */ - aplic_m = riscv_aplic_create( - memmap[VIRT_APLIC_M].base + socket * memmap[VIRT_APLIC_M].size, - memmap[VIRT_APLIC_M].size, - (msimode) ? 0 : base_hartid, - (msimode) ? 0 : hart_count, - VIRT_IRQCHIP_NUM_SOURCES, - VIRT_IRQCHIP_NUM_PRIO_BITS, - msimode, true, NULL); - - if (aplic_m) { - /* Per-socket S-level APLIC */ - riscv_aplic_create( - memmap[VIRT_APLIC_S].base + socket * memmap[VIRT_APLIC_S].size, - memmap[VIRT_APLIC_S].size, - (msimode) ? 0 : base_hartid, - (msimode) ? 0 : hart_count, - VIRT_IRQCHIP_NUM_SOURCES, - VIRT_IRQCHIP_NUM_PRIO_BITS, - msimode, false, aplic_m); + if (!kvm_enabled()) { + /* Per-socket M-level APLIC */ + aplic_m = riscv_aplic_create(memmap[VIRT_APLIC_M].base + + socket * memmap[VIRT_APLIC_M].size, + memmap[VIRT_APLIC_M].size, + (msimode) ? 0 : base_hartid, + (msimode) ? 0 : hart_count, + VIRT_IRQCHIP_NUM_SOURCES, + VIRT_IRQCHIP_NUM_PRIO_BITS, + msimode, true, NULL); } - return aplic_m; + /* Per-socket S-level APLIC */ + aplic_s = riscv_aplic_create(memmap[VIRT_APLIC_S].base + + socket * memmap[VIRT_APLIC_S].size, + memmap[VIRT_APLIC_S].size, + (msimode) ? 0 : base_hartid, + (msimode) ? 0 : hart_count, + VIRT_IRQCHIP_NUM_SOURCES, + VIRT_IRQCHIP_NUM_PRIO_BITS, + msimode, false, aplic_m); + + return kvm_enabled() ? aplic_s : aplic_m; } static void create_platform_bus(RISCVVirtState *s, DeviceState *irqchip) @@ -1453,6 +1461,14 @@ static void virt_machine_init(MachineState *machine) } } + if (kvm_enabled() && virt_use_kvm_aia(s)) { + kvm_riscv_aia_create(machine, IMSIC_MMIO_GROUP_MIN_SHIFT, + VIRT_IRQCHIP_NUM_SOURCES, VIRT_IRQCHIP_NUM_MSIS, + memmap[VIRT_APLIC_S].base, + memmap[VIRT_IMSIC_S].base, + s->aia_guests); + } + if (riscv_is_32bit(&s->soc[0])) { #if HOST_LONG_BITS == 64 /* limit RAM size in a 32-bit system */ diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 309038fd46..969c25f4cf 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2825,8 +2825,9 @@ static int virtio_device_put(QEMUFile *f, void *opaque, size_t size, } /* A wrapper for use as a VMState .get function */ -static int virtio_device_get(QEMUFile *f, void *opaque, size_t size, - const VMStateField *field) +static int coroutine_mixed_fn +virtio_device_get(QEMUFile *f, void *opaque, size_t size, + const VMStateField *field) { VirtIODevice *vdev = VIRTIO_DEVICE(opaque); DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev)); @@ -2853,6 +2854,39 @@ static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val) return bad ? -1 : 0; } +typedef struct VirtioSetFeaturesNocheckData { + Coroutine *co; + VirtIODevice *vdev; + uint64_t val; + int ret; +} VirtioSetFeaturesNocheckData; + +static void virtio_set_features_nocheck_bh(void *opaque) +{ + VirtioSetFeaturesNocheckData *data = opaque; + + data->ret = virtio_set_features_nocheck(data->vdev, data->val); + aio_co_wake(data->co); +} + +static int coroutine_mixed_fn +virtio_set_features_nocheck_maybe_co(VirtIODevice *vdev, uint64_t val) +{ + if (qemu_in_coroutine()) { + VirtioSetFeaturesNocheckData data = { + .co = qemu_coroutine_self(), + .vdev = vdev, + .val = val, + }; + aio_bh_schedule_oneshot(qemu_get_current_aio_context(), + virtio_set_features_nocheck_bh, &data); + qemu_coroutine_yield(); + return data.ret; + } else { + return virtio_set_features_nocheck(vdev, val); + } +} + int virtio_set_features(VirtIODevice *vdev, uint64_t val) { int ret; @@ -2906,7 +2940,8 @@ size_t virtio_get_config_size(const VirtIOConfigSizeParams *params, return config_size; } -int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) +int coroutine_mixed_fn +virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) { int i, ret; int32_t config_len; @@ -3023,14 +3058,14 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) * host_features. */ uint64_t features64 = vdev->guest_features; - if (virtio_set_features_nocheck(vdev, features64) < 0) { + if (virtio_set_features_nocheck_maybe_co(vdev, features64) < 0) { error_report("Features 0x%" PRIx64 " unsupported. " "Allowed features: 0x%" PRIx64, features64, vdev->host_features); return -1; } } else { - if (virtio_set_features_nocheck(vdev, features) < 0) { + if (virtio_set_features_nocheck_maybe_co(vdev, features) < 0) { error_report("Features 0x%x unsupported. " "Allowed features: 0x%" PRIx64, features, vdev->host_features); diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index e09d277328..85be256c09 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -1231,7 +1231,7 @@ struct BlockDriverState { unsigned int write_gen; /* Current data generation */ /* Protected by reqs_lock. */ - CoMutex reqs_lock; + QemuMutex reqs_lock; QLIST_HEAD(, BdrvTrackedRequest) tracked_requests; CoQueue flush_queue; /* Serializing flush queue */ bool active_flush_req; /* Flush request in flight? */ diff --git a/include/block/qapi.h b/include/block/qapi.h index 18d48ddb70..8663971c58 100644 --- a/include/block/qapi.h +++ b/include/block/qapi.h @@ -36,9 +36,6 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, int bdrv_query_snapshot_info_list(BlockDriverState *bs, SnapshotInfoList **p_list, Error **errp); -void bdrv_query_block_node_info(BlockDriverState *bs, - BlockNodeInfo **p_info, - Error **errp); void bdrv_query_image_info(BlockDriverState *bs, ImageInfo **p_info, bool flat, diff --git a/include/crypto/aes.h b/include/crypto/aes.h index 709d4d226b..381f24c902 100644 --- a/include/crypto/aes.h +++ b/include/crypto/aes.h @@ -30,4 +30,11 @@ void AES_decrypt(const unsigned char *in, unsigned char *out, extern const uint8_t AES_sbox[256]; extern const uint8_t AES_isbox[256]; +/* +AES_Te0[x] = S [x].[02, 01, 01, 03]; +AES_Td0[x] = Si[x].[0e, 09, 0d, 0b]; +*/ + +extern const uint32_t AES_Te0[256], AES_Td0[256]; + #endif diff --git a/include/crypto/sm4.h b/include/crypto/sm4.h index 9bd3ebc62e..382b26d922 100644 --- a/include/crypto/sm4.h +++ b/include/crypto/sm4.h @@ -2,5 +2,14 @@ #define QEMU_SM4_H extern const uint8_t sm4_sbox[256]; +extern const uint32_t sm4_ck[32]; + +static inline uint32_t sm4_subword(uint32_t word) +{ + return sm4_sbox[word & 0xff] | + sm4_sbox[(word >> 8) & 0xff] << 8 | + sm4_sbox[(word >> 16) & 0xff] << 16 | + sm4_sbox[(word >> 24) & 0xff] << 24; +} #endif diff --git a/include/hw/arm/xlnx-versal.h b/include/hw/arm/xlnx-versal.h index 39ee31185c..7b419f88c2 100644 --- a/include/hw/arm/xlnx-versal.h +++ b/include/hw/arm/xlnx-versal.h @@ -32,6 +32,8 @@ #include "hw/misc/xlnx-versal-crl.h" #include "hw/misc/xlnx-versal-pmc-iou-slcr.h" #include "hw/net/xlnx-versal-canfd.h" +#include "hw/misc/xlnx-versal-cfu.h" +#include "hw/misc/xlnx-versal-cframe-reg.h" #define TYPE_XLNX_VERSAL "xlnx-versal" OBJECT_DECLARE_SIMPLE_TYPE(Versal, XLNX_VERSAL) @@ -46,6 +48,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(Versal, XLNX_VERSAL) #define XLNX_VERSAL_NR_IRQS 192 #define XLNX_VERSAL_NR_CANFD 2 #define XLNX_VERSAL_CANFD_REF_CLK (24 * 1000 * 1000) +#define XLNX_VERSAL_NR_CFRAME 15 struct Versal { /*< private >*/ @@ -117,6 +120,11 @@ struct Versal { XlnxEFuse efuse; XlnxVersalEFuseCtrl efuse_ctrl; XlnxVersalEFuseCache efuse_cache; + XlnxVersalCFUAPB cfu_apb; + XlnxVersalCFUFDRO cfu_fdro; + XlnxVersalCFUSFR cfu_sfr; + XlnxVersalCFrameReg cframe[XLNX_VERSAL_NR_CFRAME]; + XlnxVersalCFrameBcastReg cframe_bcast; OrIRQState apb_irq_orgate; } pmc; @@ -147,6 +155,7 @@ struct Versal { #define VERSAL_GEM1_WAKE_IRQ_0 59 #define VERSAL_ADMA_IRQ_0 60 #define VERSAL_XRAM_IRQ_0 79 +#define VERSAL_CFU_IRQ_0 120 #define VERSAL_PMC_APB_IRQ 121 #define VERSAL_OSPI_IRQ 124 #define VERSAL_SD0_IRQ_0 126 @@ -240,6 +249,82 @@ struct Versal { #define MM_PMC_EFUSE_CACHE 0xf1250000 #define MM_PMC_EFUSE_CACHE_SIZE 0x00C00 +#define MM_PMC_CFU_APB 0xf12b0000 +#define MM_PMC_CFU_APB_SIZE 0x10000 +#define MM_PMC_CFU_STREAM 0xf12c0000 +#define MM_PMC_CFU_STREAM_SIZE 0x1000 +#define MM_PMC_CFU_SFR 0xf12c1000 +#define MM_PMC_CFU_SFR_SIZE 0x1000 +#define MM_PMC_CFU_FDRO 0xf12c2000 +#define MM_PMC_CFU_FDRO_SIZE 0x1000 +#define MM_PMC_CFU_STREAM_2 0xf1f80000 +#define MM_PMC_CFU_STREAM_2_SIZE 0x40000 + +#define MM_PMC_CFRAME0_REG 0xf12d0000 +#define MM_PMC_CFRAME0_REG_SIZE 0x1000 +#define MM_PMC_CFRAME0_FDRI 0xf12d1000 +#define MM_PMC_CFRAME0_FDRI_SIZE 0x1000 +#define MM_PMC_CFRAME1_REG 0xf12d2000 +#define MM_PMC_CFRAME1_REG_SIZE 0x1000 +#define MM_PMC_CFRAME1_FDRI 0xf12d3000 +#define MM_PMC_CFRAME1_FDRI_SIZE 0x1000 +#define MM_PMC_CFRAME2_REG 0xf12d4000 +#define MM_PMC_CFRAME2_REG_SIZE 0x1000 +#define MM_PMC_CFRAME2_FDRI 0xf12d5000 +#define MM_PMC_CFRAME2_FDRI_SIZE 0x1000 +#define MM_PMC_CFRAME3_REG 0xf12d6000 +#define MM_PMC_CFRAME3_REG_SIZE 0x1000 +#define MM_PMC_CFRAME3_FDRI 0xf12d7000 +#define MM_PMC_CFRAME3_FDRI_SIZE 0x1000 +#define MM_PMC_CFRAME4_REG 0xf12d8000 +#define MM_PMC_CFRAME4_REG_SIZE 0x1000 +#define MM_PMC_CFRAME4_FDRI 0xf12d9000 +#define MM_PMC_CFRAME4_FDRI_SIZE 0x1000 +#define MM_PMC_CFRAME5_REG 0xf12da000 +#define MM_PMC_CFRAME5_REG_SIZE 0x1000 +#define MM_PMC_CFRAME5_FDRI 0xf12db000 +#define MM_PMC_CFRAME5_FDRI_SIZE 0x1000 +#define MM_PMC_CFRAME6_REG 0xf12dc000 +#define MM_PMC_CFRAME6_REG_SIZE 0x1000 +#define MM_PMC_CFRAME6_FDRI 0xf12dd000 +#define MM_PMC_CFRAME6_FDRI_SIZE 0x1000 +#define MM_PMC_CFRAME7_REG 0xf12de000 +#define MM_PMC_CFRAME7_REG_SIZE 0x1000 +#define MM_PMC_CFRAME7_FDRI 0xf12df000 +#define MM_PMC_CFRAME7_FDRI_SIZE 0x1000 +#define MM_PMC_CFRAME8_REG 0xf12e0000 +#define MM_PMC_CFRAME8_REG_SIZE 0x1000 +#define MM_PMC_CFRAME8_FDRI 0xf12e1000 +#define MM_PMC_CFRAME8_FDRI_SIZE 0x1000 +#define MM_PMC_CFRAME9_REG 0xf12e2000 +#define MM_PMC_CFRAME9_REG_SIZE 0x1000 +#define MM_PMC_CFRAME9_FDRI 0xf12e3000 +#define MM_PMC_CFRAME9_FDRI_SIZE 0x1000 +#define MM_PMC_CFRAME10_REG 0xf12e4000 +#define MM_PMC_CFRAME10_REG_SIZE 0x1000 +#define MM_PMC_CFRAME10_FDRI 0xf12e5000 +#define MM_PMC_CFRAME10_FDRI_SIZE 0x1000 +#define MM_PMC_CFRAME11_REG 0xf12e6000 +#define MM_PMC_CFRAME11_REG_SIZE 0x1000 +#define MM_PMC_CFRAME11_FDRI 0xf12e7000 +#define MM_PMC_CFRAME11_FDRI_SIZE 0x1000 +#define MM_PMC_CFRAME12_REG 0xf12e8000 +#define MM_PMC_CFRAME12_REG_SIZE 0x1000 +#define MM_PMC_CFRAME12_FDRI 0xf12e9000 +#define MM_PMC_CFRAME12_FDRI_SIZE 0x1000 +#define MM_PMC_CFRAME13_REG 0xf12ea000 +#define MM_PMC_CFRAME13_REG_SIZE 0x1000 +#define MM_PMC_CFRAME13_FDRI 0xf12eb000 +#define MM_PMC_CFRAME13_FDRI_SIZE 0x1000 +#define MM_PMC_CFRAME14_REG 0xf12ec000 +#define MM_PMC_CFRAME14_REG_SIZE 0x1000 +#define MM_PMC_CFRAME14_FDRI 0xf12ed000 +#define MM_PMC_CFRAME14_FDRI_SIZE 0x1000 +#define MM_PMC_CFRAME_BCAST_REG 0xf12ee000 +#define MM_PMC_CFRAME_BCAST_REG_SIZE 0x1000 +#define MM_PMC_CFRAME_BCAST_FDRI 0xf12ef000 +#define MM_PMC_CFRAME_BCAST_FDRI_SIZE 0x1000 + #define MM_PMC_CRP 0xf1260000U #define MM_PMC_CRP_SIZE 0x10000 #define MM_PMC_RTC 0xf12a0000 diff --git a/include/hw/misc/xlnx-cfi-if.h b/include/hw/misc/xlnx-cfi-if.h new file mode 100644 index 0000000000..f9bd12292d --- /dev/null +++ b/include/hw/misc/xlnx-cfi-if.h @@ -0,0 +1,59 @@ +/* + * Xilinx CFI interface + * + * Copyright (C) 2023, Advanced Micro Devices, Inc. + * + * Written by Francisco Iglesias <francisco.iglesias@amd.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#ifndef XLNX_CFI_IF_H +#define XLNX_CFI_IF_H 1 + +#include "qemu/help-texts.h" +#include "hw/hw.h" +#include "qom/object.h" + +#define TYPE_XLNX_CFI_IF "xlnx-cfi-if" +typedef struct XlnxCfiIfClass XlnxCfiIfClass; +DECLARE_CLASS_CHECKERS(XlnxCfiIfClass, XLNX_CFI_IF, TYPE_XLNX_CFI_IF) + +#define XLNX_CFI_IF(obj) \ + INTERFACE_CHECK(XlnxCfiIf, (obj), TYPE_XLNX_CFI_IF) + +typedef enum { + PACKET_TYPE_CFU = 0x52, + PACKET_TYPE_CFRAME = 0xA1, +} xlnx_cfi_packet_type; + +typedef enum { + CFRAME_FAR = 1, + CFRAME_SFR = 2, + CFRAME_FDRI = 4, + CFRAME_CMD = 6, +} xlnx_cfi_reg_addr; + +typedef struct XlnxCfiPacket { + uint8_t reg_addr; + uint32_t data[4]; +} XlnxCfiPacket; + +typedef struct XlnxCfiIf { + Object Parent; +} XlnxCfiIf; + +typedef struct XlnxCfiIfClass { + InterfaceClass parent; + + void (*cfi_transfer_packet)(XlnxCfiIf *cfi_if, XlnxCfiPacket *pkt); +} XlnxCfiIfClass; + +/** + * Transfer a XlnxCfiPacket. + * + * @cfi_if: the object implementing this interface + * @XlnxCfiPacket: a pointer to the XlnxCfiPacket to transfer + */ +void xlnx_cfi_transfer_packet(XlnxCfiIf *cfi_if, XlnxCfiPacket *pkt); + +#endif /* XLNX_CFI_IF_H */ diff --git a/include/hw/misc/xlnx-versal-cframe-reg.h b/include/hw/misc/xlnx-versal-cframe-reg.h new file mode 100644 index 0000000000..a14fbd7fe4 --- /dev/null +++ b/include/hw/misc/xlnx-versal-cframe-reg.h @@ -0,0 +1,303 @@ +/* + * QEMU model of the Configuration Frame Control module + * + * Copyright (C) 2023, Advanced Micro Devices, Inc. + * + * Written by Francisco Iglesias <francisco.iglesias@amd.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + * + * References: + * [1] Versal ACAP Technical Reference Manual, + * https://www.xilinx.com/support/documentation/architecture-manuals/am011-versal-acap-trm.pdf + * + * [2] Versal ACAP Register Reference, + * https://www.xilinx.com/htmldocs/registers/am012/am012-versal-register-reference.html + */ +#ifndef HW_MISC_XLNX_VERSAL_CFRAME_REG_H +#define HW_MISC_XLNX_VERSAL_CFRAME_REG_H + +#include "hw/sysbus.h" +#include "hw/register.h" +#include "hw/misc/xlnx-cfi-if.h" +#include "hw/misc/xlnx-versal-cfu.h" +#include "qemu/fifo32.h" + +#define TYPE_XLNX_VERSAL_CFRAME_REG "xlnx,cframe-reg" +OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalCFrameReg, XLNX_VERSAL_CFRAME_REG) + +#define TYPE_XLNX_VERSAL_CFRAME_BCAST_REG "xlnx.cframe-bcast-reg" +OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalCFrameBcastReg, + XLNX_VERSAL_CFRAME_BCAST_REG) + +/* + * The registers in this module are 128 bits wide but it is ok to write + * and read them through 4 sequential 32 bit accesses (address[3:2] = 0, + * 1, 2, 3). + */ +REG32(CRC0, 0x0) + FIELD(CRC, CRC, 0, 32) +REG32(CRC1, 0x4) +REG32(CRC2, 0x8) +REG32(CRC3, 0xc) +REG32(FAR0, 0x10) + FIELD(FAR0, SEGMENT, 23, 2) + FIELD(FAR0, BLOCKTYPE, 20, 3) + FIELD(FAR0, FRAME_ADDR, 0, 20) +REG32(FAR1, 0x14) +REG32(FAR2, 0x18) +REG32(FAR3, 0x1c) +REG32(FAR_SFR0, 0x20) + FIELD(FAR_SFR0, BLOCKTYPE, 20, 3) + FIELD(FAR_SFR0, FRAME_ADDR, 0, 20) +REG32(FAR_SFR1, 0x24) +REG32(FAR_SFR2, 0x28) +REG32(FAR_SFR3, 0x2c) +REG32(FDRI0, 0x40) +REG32(FDRI1, 0x44) +REG32(FDRI2, 0x48) +REG32(FDRI3, 0x4c) +REG32(FRCNT0, 0x50) + FIELD(FRCNT0, FRCNT, 0, 32) +REG32(FRCNT1, 0x54) +REG32(FRCNT2, 0x58) +REG32(FRCNT3, 0x5c) +REG32(CMD0, 0x60) + FIELD(CMD0, CMD, 0, 5) +REG32(CMD1, 0x64) +REG32(CMD2, 0x68) +REG32(CMD3, 0x6c) +REG32(CR_MASK0, 0x70) +REG32(CR_MASK1, 0x74) +REG32(CR_MASK2, 0x78) +REG32(CR_MASK3, 0x7c) +REG32(CTL0, 0x80) + FIELD(CTL, PER_FRAME_CRC, 0, 1) +REG32(CTL1, 0x84) +REG32(CTL2, 0x88) +REG32(CTL3, 0x8c) +REG32(CFRM_ISR0, 0x150) + FIELD(CFRM_ISR0, READ_BROADCAST_ERROR, 21, 1) + FIELD(CFRM_ISR0, CMD_MISSING_ERROR, 20, 1) + FIELD(CFRM_ISR0, RW_ROWOFF_ERROR, 19, 1) + FIELD(CFRM_ISR0, READ_REG_ADDR_ERROR, 18, 1) + FIELD(CFRM_ISR0, READ_BLK_TYPE_ERROR, 17, 1) + FIELD(CFRM_ISR0, READ_FRAME_ADDR_ERROR, 16, 1) + FIELD(CFRM_ISR0, WRITE_REG_ADDR_ERROR, 15, 1) + FIELD(CFRM_ISR0, WRITE_BLK_TYPE_ERROR, 13, 1) + FIELD(CFRM_ISR0, WRITE_FRAME_ADDR_ERROR, 12, 1) + FIELD(CFRM_ISR0, MFW_OVERRUN_ERROR, 11, 1) + FIELD(CFRM_ISR0, FAR_FIFO_UNDERFLOW, 10, 1) + FIELD(CFRM_ISR0, FAR_FIFO_OVERFLOW, 9, 1) + FIELD(CFRM_ISR0, PER_FRAME_SEQ_ERROR, 8, 1) + FIELD(CFRM_ISR0, CRC_ERROR, 7, 1) + FIELD(CFRM_ISR0, WRITE_OVERRUN_ERROR, 6, 1) + FIELD(CFRM_ISR0, READ_OVERRUN_ERROR, 5, 1) + FIELD(CFRM_ISR0, CMD_INTERRUPT_ERROR, 4, 1) + FIELD(CFRM_ISR0, WRITE_INTERRUPT_ERROR, 3, 1) + FIELD(CFRM_ISR0, READ_INTERRUPT_ERROR, 2, 1) + FIELD(CFRM_ISR0, SEU_CRC_ERROR, 1, 1) + FIELD(CFRM_ISR0, SEU_ECC_ERROR, 0, 1) +REG32(CFRM_ISR1, 0x154) +REG32(CFRM_ISR2, 0x158) +REG32(CFRM_ISR3, 0x15c) +REG32(CFRM_IMR0, 0x160) + FIELD(CFRM_IMR0, READ_BROADCAST_ERROR, 21, 1) + FIELD(CFRM_IMR0, CMD_MISSING_ERROR, 20, 1) + FIELD(CFRM_IMR0, RW_ROWOFF_ERROR, 19, 1) + FIELD(CFRM_IMR0, READ_REG_ADDR_ERROR, 18, 1) + FIELD(CFRM_IMR0, READ_BLK_TYPE_ERROR, 17, 1) + FIELD(CFRM_IMR0, READ_FRAME_ADDR_ERROR, 16, 1) + FIELD(CFRM_IMR0, WRITE_REG_ADDR_ERROR, 15, 1) + FIELD(CFRM_IMR0, WRITE_BLK_TYPE_ERROR, 13, 1) + FIELD(CFRM_IMR0, WRITE_FRAME_ADDR_ERROR, 12, 1) + FIELD(CFRM_IMR0, MFW_OVERRUN_ERROR, 11, 1) + FIELD(CFRM_IMR0, FAR_FIFO_UNDERFLOW, 10, 1) + FIELD(CFRM_IMR0, FAR_FIFO_OVERFLOW, 9, 1) + FIELD(CFRM_IMR0, PER_FRAME_SEQ_ERROR, 8, 1) + FIELD(CFRM_IMR0, CRC_ERROR, 7, 1) + FIELD(CFRM_IMR0, WRITE_OVERRUN_ERROR, 6, 1) + FIELD(CFRM_IMR0, READ_OVERRUN_ERROR, 5, 1) + FIELD(CFRM_IMR0, CMD_INTERRUPT_ERROR, 4, 1) + FIELD(CFRM_IMR0, WRITE_INTERRUPT_ERROR, 3, 1) + FIELD(CFRM_IMR0, READ_INTERRUPT_ERROR, 2, 1) + FIELD(CFRM_IMR0, SEU_CRC_ERROR, 1, 1) + FIELD(CFRM_IMR0, SEU_ECC_ERROR, 0, 1) +REG32(CFRM_IMR1, 0x164) +REG32(CFRM_IMR2, 0x168) +REG32(CFRM_IMR3, 0x16c) +REG32(CFRM_IER0, 0x170) + FIELD(CFRM_IER0, READ_BROADCAST_ERROR, 21, 1) + FIELD(CFRM_IER0, CMD_MISSING_ERROR, 20, 1) + FIELD(CFRM_IER0, RW_ROWOFF_ERROR, 19, 1) + FIELD(CFRM_IER0, READ_REG_ADDR_ERROR, 18, 1) + FIELD(CFRM_IER0, READ_BLK_TYPE_ERROR, 17, 1) + FIELD(CFRM_IER0, READ_FRAME_ADDR_ERROR, 16, 1) + FIELD(CFRM_IER0, WRITE_REG_ADDR_ERROR, 15, 1) + FIELD(CFRM_IER0, WRITE_BLK_TYPE_ERROR, 13, 1) + FIELD(CFRM_IER0, WRITE_FRAME_ADDR_ERROR, 12, 1) + FIELD(CFRM_IER0, MFW_OVERRUN_ERROR, 11, 1) + FIELD(CFRM_IER0, FAR_FIFO_UNDERFLOW, 10, 1) + FIELD(CFRM_IER0, FAR_FIFO_OVERFLOW, 9, 1) + FIELD(CFRM_IER0, PER_FRAME_SEQ_ERROR, 8, 1) + FIELD(CFRM_IER0, CRC_ERROR, 7, 1) + FIELD(CFRM_IER0, WRITE_OVERRUN_ERROR, 6, 1) + FIELD(CFRM_IER0, READ_OVERRUN_ERROR, 5, 1) + FIELD(CFRM_IER0, CMD_INTERRUPT_ERROR, 4, 1) + FIELD(CFRM_IER0, WRITE_INTERRUPT_ERROR, 3, 1) + FIELD(CFRM_IER0, READ_INTERRUPT_ERROR, 2, 1) + FIELD(CFRM_IER0, SEU_CRC_ERROR, 1, 1) + FIELD(CFRM_IER0, SEU_ECC_ERROR, 0, 1) +REG32(CFRM_IER1, 0x174) +REG32(CFRM_IER2, 0x178) +REG32(CFRM_IER3, 0x17c) +REG32(CFRM_IDR0, 0x180) + FIELD(CFRM_IDR0, READ_BROADCAST_ERROR, 21, 1) + FIELD(CFRM_IDR0, CMD_MISSING_ERROR, 20, 1) + FIELD(CFRM_IDR0, RW_ROWOFF_ERROR, 19, 1) + FIELD(CFRM_IDR0, READ_REG_ADDR_ERROR, 18, 1) + FIELD(CFRM_IDR0, READ_BLK_TYPE_ERROR, 17, 1) + FIELD(CFRM_IDR0, READ_FRAME_ADDR_ERROR, 16, 1) + FIELD(CFRM_IDR0, WRITE_REG_ADDR_ERROR, 15, 1) + FIELD(CFRM_IDR0, WRITE_BLK_TYPE_ERROR, 13, 1) + FIELD(CFRM_IDR0, WRITE_FRAME_ADDR_ERROR, 12, 1) + FIELD(CFRM_IDR0, MFW_OVERRUN_ERROR, 11, 1) + FIELD(CFRM_IDR0, FAR_FIFO_UNDERFLOW, 10, 1) + FIELD(CFRM_IDR0, FAR_FIFO_OVERFLOW, 9, 1) + FIELD(CFRM_IDR0, PER_FRAME_SEQ_ERROR, 8, 1) + FIELD(CFRM_IDR0, CRC_ERROR, 7, 1) + FIELD(CFRM_IDR0, WRITE_OVERRUN_ERROR, 6, 1) + FIELD(CFRM_IDR0, READ_OVERRUN_ERROR, 5, 1) + FIELD(CFRM_IDR0, CMD_INTERRUPT_ERROR, 4, 1) + FIELD(CFRM_IDR0, WRITE_INTERRUPT_ERROR, 3, 1) + FIELD(CFRM_IDR0, READ_INTERRUPT_ERROR, 2, 1) + FIELD(CFRM_IDR0, SEU_CRC_ERROR, 1, 1) + FIELD(CFRM_IDR0, SEU_ECC_ERROR, 0, 1) +REG32(CFRM_IDR1, 0x184) +REG32(CFRM_IDR2, 0x188) +REG32(CFRM_IDR3, 0x18c) +REG32(CFRM_ITR0, 0x190) + FIELD(CFRM_ITR0, READ_BROADCAST_ERROR, 21, 1) + FIELD(CFRM_ITR0, CMD_MISSING_ERROR, 20, 1) + FIELD(CFRM_ITR0, RW_ROWOFF_ERROR, 19, 1) + FIELD(CFRM_ITR0, READ_REG_ADDR_ERROR, 18, 1) + FIELD(CFRM_ITR0, READ_BLK_TYPE_ERROR, 17, 1) + FIELD(CFRM_ITR0, READ_FRAME_ADDR_ERROR, 16, 1) + FIELD(CFRM_ITR0, WRITE_REG_ADDR_ERROR, 15, 1) + FIELD(CFRM_ITR0, WRITE_BLK_TYPE_ERROR, 13, 1) + FIELD(CFRM_ITR0, WRITE_FRAME_ADDR_ERROR, 12, 1) + FIELD(CFRM_ITR0, MFW_OVERRUN_ERROR, 11, 1) + FIELD(CFRM_ITR0, FAR_FIFO_UNDERFLOW, 10, 1) + FIELD(CFRM_ITR0, FAR_FIFO_OVERFLOW, 9, 1) + FIELD(CFRM_ITR0, PER_FRAME_SEQ_ERROR, 8, 1) + FIELD(CFRM_ITR0, CRC_ERROR, 7, 1) + FIELD(CFRM_ITR0, WRITE_OVERRUN_ERROR, 6, 1) + FIELD(CFRM_ITR0, READ_OVERRUN_ERROR, 5, 1) + FIELD(CFRM_ITR0, CMD_INTERRUPT_ERROR, 4, 1) + FIELD(CFRM_ITR0, WRITE_INTERRUPT_ERROR, 3, 1) + FIELD(CFRM_ITR0, READ_INTERRUPT_ERROR, 2, 1) + FIELD(CFRM_ITR0, SEU_CRC_ERROR, 1, 1) + FIELD(CFRM_ITR0, SEU_ECC_ERROR, 0, 1) +REG32(CFRM_ITR1, 0x194) +REG32(CFRM_ITR2, 0x198) +REG32(CFRM_ITR3, 0x19c) +REG32(SEU_SYNDRM00, 0x1a0) +REG32(SEU_SYNDRM01, 0x1a4) +REG32(SEU_SYNDRM02, 0x1a8) +REG32(SEU_SYNDRM03, 0x1ac) +REG32(SEU_SYNDRM10, 0x1b0) +REG32(SEU_SYNDRM11, 0x1b4) +REG32(SEU_SYNDRM12, 0x1b8) +REG32(SEU_SYNDRM13, 0x1bc) +REG32(SEU_SYNDRM20, 0x1c0) +REG32(SEU_SYNDRM21, 0x1c4) +REG32(SEU_SYNDRM22, 0x1c8) +REG32(SEU_SYNDRM23, 0x1cc) +REG32(SEU_SYNDRM30, 0x1d0) +REG32(SEU_SYNDRM31, 0x1d4) +REG32(SEU_SYNDRM32, 0x1d8) +REG32(SEU_SYNDRM33, 0x1dc) +REG32(SEU_VIRTUAL_SYNDRM0, 0x1e0) +REG32(SEU_VIRTUAL_SYNDRM1, 0x1e4) +REG32(SEU_VIRTUAL_SYNDRM2, 0x1e8) +REG32(SEU_VIRTUAL_SYNDRM3, 0x1ec) +REG32(SEU_CRC0, 0x1f0) +REG32(SEU_CRC1, 0x1f4) +REG32(SEU_CRC2, 0x1f8) +REG32(SEU_CRC3, 0x1fc) +REG32(CFRAME_FAR_BOT0, 0x200) +REG32(CFRAME_FAR_BOT1, 0x204) +REG32(CFRAME_FAR_BOT2, 0x208) +REG32(CFRAME_FAR_BOT3, 0x20c) +REG32(CFRAME_FAR_TOP0, 0x210) +REG32(CFRAME_FAR_TOP1, 0x214) +REG32(CFRAME_FAR_TOP2, 0x218) +REG32(CFRAME_FAR_TOP3, 0x21c) +REG32(LAST_FRAME_BOT0, 0x220) + FIELD(LAST_FRAME_BOT0, BLOCKTYPE1_LAST_FRAME_LSB, 20, 12) + FIELD(LAST_FRAME_BOT0, BLOCKTYPE0_LAST_FRAME, 0, 20) +REG32(LAST_FRAME_BOT1, 0x224) + FIELD(LAST_FRAME_BOT1, BLOCKTYPE3_LAST_FRAME_LSB, 28, 4) + FIELD(LAST_FRAME_BOT1, BLOCKTYPE2_LAST_FRAME, 8, 20) + FIELD(LAST_FRAME_BOT1, BLOCKTYPE1_LAST_FRAME_MSB, 0, 8) +REG32(LAST_FRAME_BOT2, 0x228) + FIELD(LAST_FRAME_BOT2, BLOCKTYPE3_LAST_FRAME_MSB, 0, 16) +REG32(LAST_FRAME_BOT3, 0x22c) +REG32(LAST_FRAME_TOP0, 0x230) + FIELD(LAST_FRAME_TOP0, BLOCKTYPE5_LAST_FRAME_LSB, 20, 12) + FIELD(LAST_FRAME_TOP0, BLOCKTYPE4_LAST_FRAME, 0, 20) +REG32(LAST_FRAME_TOP1, 0x234) + FIELD(LAST_FRAME_TOP1, BLOCKTYPE6_LAST_FRAME, 8, 20) + FIELD(LAST_FRAME_TOP1, BLOCKTYPE5_LAST_FRAME_MSB, 0, 8) +REG32(LAST_FRAME_TOP2, 0x238) +REG32(LAST_FRAME_TOP3, 0x23c) + +#define CFRAME_REG_R_MAX (R_LAST_FRAME_TOP3 + 1) + +#define FRAME_NUM_QWORDS 25 +#define FRAME_NUM_WORDS (FRAME_NUM_QWORDS * 4) /* 25 * 128 bits */ + +typedef struct XlnxCFrame { + uint32_t data[FRAME_NUM_WORDS]; +} XlnxCFrame; + +struct XlnxVersalCFrameReg { + SysBusDevice parent_obj; + MemoryRegion iomem; + MemoryRegion iomem_fdri; + qemu_irq irq_cfrm_imr; + + /* 128-bit wfifo. */ + uint32_t wfifo[WFIFO_SZ]; + + uint32_t regs[CFRAME_REG_R_MAX]; + RegisterInfo regs_info[CFRAME_REG_R_MAX]; + + bool rowon; + bool wcfg; + bool rcfg; + + GTree *cframes; + Fifo32 new_f_data; + + struct { + XlnxCfiIf *cfu_fdro; + uint32_t blktype_num_frames[7]; + } cfg; + bool row_configured; +}; + +struct XlnxVersalCFrameBcastReg { + SysBusDevice parent_obj; + MemoryRegion iomem_reg; + MemoryRegion iomem_fdri; + + /* 128-bit wfifo. */ + uint32_t wfifo[WFIFO_SZ]; + + struct { + XlnxCfiIf *cframe[15]; + } cfg; +}; + +#endif diff --git a/include/hw/misc/xlnx-versal-cfu.h b/include/hw/misc/xlnx-versal-cfu.h new file mode 100644 index 0000000000..86fb841053 --- /dev/null +++ b/include/hw/misc/xlnx-versal-cfu.h @@ -0,0 +1,258 @@ +/* + * QEMU model of the CFU Configuration Unit. + * + * Copyright (C) 2023, Advanced Micro Devices, Inc. + * + * Written by Francisco Iglesias <francisco.iglesias@amd.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + * + * References: + * [1] Versal ACAP Technical Reference Manual, + * https://www.xilinx.com/support/documentation/architecture-manuals/am011-versal-acap-trm.pdf + * + * [2] Versal ACAP Register Reference, + * https://www.xilinx.com/htmldocs/registers/am012/am012-versal-register-reference.html + */ +#ifndef HW_MISC_XLNX_VERSAL_CFU_APB_H +#define HW_MISC_XLNX_VERSAL_CFU_APB_H + +#include "hw/sysbus.h" +#include "hw/register.h" +#include "hw/misc/xlnx-cfi-if.h" +#include "qemu/fifo32.h" + +#define TYPE_XLNX_VERSAL_CFU_APB "xlnx,versal-cfu-apb" +OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalCFUAPB, XLNX_VERSAL_CFU_APB) + +#define TYPE_XLNX_VERSAL_CFU_FDRO "xlnx,versal-cfu-fdro" +OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalCFUFDRO, XLNX_VERSAL_CFU_FDRO) + +#define TYPE_XLNX_VERSAL_CFU_SFR "xlnx,versal-cfu-sfr" +OBJECT_DECLARE_SIMPLE_TYPE(XlnxVersalCFUSFR, XLNX_VERSAL_CFU_SFR) + +REG32(CFU_ISR, 0x0) + FIELD(CFU_ISR, USR_GTS_EVENT, 9, 1) + FIELD(CFU_ISR, USR_GSR_EVENT, 8, 1) + FIELD(CFU_ISR, SLVERR, 7, 1) + FIELD(CFU_ISR, DECOMP_ERROR, 6, 1) + FIELD(CFU_ISR, BAD_CFI_PACKET, 5, 1) + FIELD(CFU_ISR, AXI_ALIGN_ERROR, 4, 1) + FIELD(CFU_ISR, CFI_ROW_ERROR, 3, 1) + FIELD(CFU_ISR, CRC32_ERROR, 2, 1) + FIELD(CFU_ISR, CRC8_ERROR, 1, 1) + FIELD(CFU_ISR, SEU_ENDOFCALIB, 0, 1) +REG32(CFU_IMR, 0x4) + FIELD(CFU_IMR, USR_GTS_EVENT, 9, 1) + FIELD(CFU_IMR, USR_GSR_EVENT, 8, 1) + FIELD(CFU_IMR, SLVERR, 7, 1) + FIELD(CFU_IMR, DECOMP_ERROR, 6, 1) + FIELD(CFU_IMR, BAD_CFI_PACKET, 5, 1) + FIELD(CFU_IMR, AXI_ALIGN_ERROR, 4, 1) + FIELD(CFU_IMR, CFI_ROW_ERROR, 3, 1) + FIELD(CFU_IMR, CRC32_ERROR, 2, 1) + FIELD(CFU_IMR, CRC8_ERROR, 1, 1) + FIELD(CFU_IMR, SEU_ENDOFCALIB, 0, 1) +REG32(CFU_IER, 0x8) + FIELD(CFU_IER, USR_GTS_EVENT, 9, 1) + FIELD(CFU_IER, USR_GSR_EVENT, 8, 1) + FIELD(CFU_IER, SLVERR, 7, 1) + FIELD(CFU_IER, DECOMP_ERROR, 6, 1) + FIELD(CFU_IER, BAD_CFI_PACKET, 5, 1) + FIELD(CFU_IER, AXI_ALIGN_ERROR, 4, 1) + FIELD(CFU_IER, CFI_ROW_ERROR, 3, 1) + FIELD(CFU_IER, CRC32_ERROR, 2, 1) + FIELD(CFU_IER, CRC8_ERROR, 1, 1) + FIELD(CFU_IER, SEU_ENDOFCALIB, 0, 1) +REG32(CFU_IDR, 0xc) + FIELD(CFU_IDR, USR_GTS_EVENT, 9, 1) + FIELD(CFU_IDR, USR_GSR_EVENT, 8, 1) + FIELD(CFU_IDR, SLVERR, 7, 1) + FIELD(CFU_IDR, DECOMP_ERROR, 6, 1) + FIELD(CFU_IDR, BAD_CFI_PACKET, 5, 1) + FIELD(CFU_IDR, AXI_ALIGN_ERROR, 4, 1) + FIELD(CFU_IDR, CFI_ROW_ERROR, 3, 1) + FIELD(CFU_IDR, CRC32_ERROR, 2, 1) + FIELD(CFU_IDR, CRC8_ERROR, 1, 1) + FIELD(CFU_IDR, SEU_ENDOFCALIB, 0, 1) +REG32(CFU_ITR, 0x10) + FIELD(CFU_ITR, USR_GTS_EVENT, 9, 1) + FIELD(CFU_ITR, USR_GSR_EVENT, 8, 1) + FIELD(CFU_ITR, SLVERR, 7, 1) + FIELD(CFU_ITR, DECOMP_ERROR, 6, 1) + FIELD(CFU_ITR, BAD_CFI_PACKET, 5, 1) + FIELD(CFU_ITR, AXI_ALIGN_ERROR, 4, 1) + FIELD(CFU_ITR, CFI_ROW_ERROR, 3, 1) + FIELD(CFU_ITR, CRC32_ERROR, 2, 1) + FIELD(CFU_ITR, CRC8_ERROR, 1, 1) + FIELD(CFU_ITR, SEU_ENDOFCALIB, 0, 1) +REG32(CFU_PROTECT, 0x14) + FIELD(CFU_PROTECT, ACTIVE, 0, 1) +REG32(CFU_FGCR, 0x18) + FIELD(CFU_FGCR, GCLK_CAL, 14, 1) + FIELD(CFU_FGCR, SC_HBC_TRIGGER, 13, 1) + FIELD(CFU_FGCR, GLOW, 12, 1) + FIELD(CFU_FGCR, GPWRDWN, 11, 1) + FIELD(CFU_FGCR, GCAP, 10, 1) + FIELD(CFU_FGCR, GSCWE, 9, 1) + FIELD(CFU_FGCR, GHIGH_B, 8, 1) + FIELD(CFU_FGCR, GMC_B, 7, 1) + FIELD(CFU_FGCR, GWE, 6, 1) + FIELD(CFU_FGCR, GRESTORE, 5, 1) + FIELD(CFU_FGCR, GTS_CFG_B, 4, 1) + FIELD(CFU_FGCR, GLUTMASK, 3, 1) + FIELD(CFU_FGCR, EN_GLOBS_B, 2, 1) + FIELD(CFU_FGCR, EOS, 1, 1) + FIELD(CFU_FGCR, INIT_COMPLETE, 0, 1) +REG32(CFU_CTL, 0x1c) + FIELD(CFU_CTL, GSR_GSC, 15, 1) + FIELD(CFU_CTL, SLVERR_EN, 14, 1) + FIELD(CFU_CTL, CRC32_RESET, 13, 1) + FIELD(CFU_CTL, AXI_ERROR_EN, 12, 1) + FIELD(CFU_CTL, FLUSH_AXI, 11, 1) + FIELD(CFU_CTL, SSI_PER_SLR_PR, 10, 1) + FIELD(CFU_CTL, GCAP_CLK_EN, 9, 1) + FIELD(CFU_CTL, STATUS_SYNC_DISABLE, 8, 1) + FIELD(CFU_CTL, IGNORE_CFI_ERROR, 7, 1) + FIELD(CFU_CTL, CFRAME_DISABLE, 6, 1) + FIELD(CFU_CTL, QWORD_CNT_RESET, 5, 1) + FIELD(CFU_CTL, CRC8_DISABLE, 4, 1) + FIELD(CFU_CTL, CRC32_CHECK, 3, 1) + FIELD(CFU_CTL, DECOMPRESS, 2, 1) + FIELD(CFU_CTL, SEU_GO, 1, 1) + FIELD(CFU_CTL, CFI_LOCAL_RESET, 0, 1) +REG32(CFU_CRAM_RW, 0x20) + FIELD(CFU_CRAM_RW, RFIFO_AFULL_DEPTH, 18, 9) + FIELD(CFU_CRAM_RW, RD_WAVE_CNT_LEFT, 12, 6) + FIELD(CFU_CRAM_RW, RD_WAVE_CNT, 6, 6) + FIELD(CFU_CRAM_RW, WR_WAVE_CNT, 0, 6) +REG32(CFU_MASK, 0x28) +REG32(CFU_CRC_EXPECT, 0x2c) +REG32(CFU_CFRAME_LEFT_T0, 0x60) + FIELD(CFU_CFRAME_LEFT_T0, NUM, 0, 20) +REG32(CFU_CFRAME_LEFT_T1, 0x64) + FIELD(CFU_CFRAME_LEFT_T1, NUM, 0, 20) +REG32(CFU_CFRAME_LEFT_T2, 0x68) + FIELD(CFU_CFRAME_LEFT_T2, NUM, 0, 20) +REG32(CFU_ROW_RANGE, 0x6c) + FIELD(CFU_ROW_RANGE, HALF_FSR, 5, 1) + FIELD(CFU_ROW_RANGE, NUM, 0, 5) +REG32(CFU_STATUS, 0x100) + FIELD(CFU_STATUS, SEU_WRITE_ERROR, 30, 1) + FIELD(CFU_STATUS, FRCNT_ERROR, 29, 1) + FIELD(CFU_STATUS, RSVD_ERROR, 28, 1) + FIELD(CFU_STATUS, FDRO_ERROR, 27, 1) + FIELD(CFU_STATUS, FDRI_ERROR, 26, 1) + FIELD(CFU_STATUS, FDRI_READ_ERROR, 25, 1) + FIELD(CFU_STATUS, READ_FDRI_ERROR, 24, 1) + FIELD(CFU_STATUS, READ_SFR_ERROR, 23, 1) + FIELD(CFU_STATUS, READ_STREAM_ERROR, 22, 1) + FIELD(CFU_STATUS, UNKNOWN_STREAM_PKT, 21, 1) + FIELD(CFU_STATUS, USR_GTS, 20, 1) + FIELD(CFU_STATUS, USR_GSR, 19, 1) + FIELD(CFU_STATUS, AXI_BAD_WSTRB, 18, 1) + FIELD(CFU_STATUS, AXI_BAD_AR_SIZE, 17, 1) + FIELD(CFU_STATUS, AXI_BAD_AW_SIZE, 16, 1) + FIELD(CFU_STATUS, AXI_BAD_ARADDR, 15, 1) + FIELD(CFU_STATUS, AXI_BAD_AWADDR, 14, 1) + FIELD(CFU_STATUS, SCAN_CLEAR_PASS, 13, 1) + FIELD(CFU_STATUS, HC_SEC_ERROR, 12, 1) + FIELD(CFU_STATUS, GHIGH_B_ISHIGH, 11, 1) + FIELD(CFU_STATUS, GHIGH_B_ISLOW, 10, 1) + FIELD(CFU_STATUS, GMC_B_ISHIGH, 9, 1) + FIELD(CFU_STATUS, GMC_B_ISLOW, 8, 1) + FIELD(CFU_STATUS, GPWRDWN_B_ISHIGH, 7, 1) + FIELD(CFU_STATUS, CFI_SEU_CRC_ERROR, 6, 1) + FIELD(CFU_STATUS, CFI_SEU_ECC_ERROR, 5, 1) + FIELD(CFU_STATUS, CFI_SEU_HEARTBEAT, 4, 1) + FIELD(CFU_STATUS, SCAN_CLEAR_DONE, 3, 1) + FIELD(CFU_STATUS, HC_COMPLETE, 2, 1) + FIELD(CFU_STATUS, CFI_CFRAME_BUSY, 1, 1) + FIELD(CFU_STATUS, CFU_STREAM_BUSY, 0, 1) +REG32(CFU_INTERNAL_STATUS, 0x104) + FIELD(CFU_INTERNAL_STATUS, SSI_EOS, 22, 1) + FIELD(CFU_INTERNAL_STATUS, SSI_GWE, 21, 1) + FIELD(CFU_INTERNAL_STATUS, RFIFO_EMPTY, 20, 1) + FIELD(CFU_INTERNAL_STATUS, RFIFO_FULL, 19, 1) + FIELD(CFU_INTERNAL_STATUS, SEL_SFR, 18, 1) + FIELD(CFU_INTERNAL_STATUS, STREAM_CFRAME, 17, 1) + FIELD(CFU_INTERNAL_STATUS, FDRI_PHASE, 16, 1) + FIELD(CFU_INTERNAL_STATUS, CFI_PIPE_EN, 15, 1) + FIELD(CFU_INTERNAL_STATUS, AWFIFO_DCNT, 10, 5) + FIELD(CFU_INTERNAL_STATUS, WFIFO_DCNT, 5, 5) + FIELD(CFU_INTERNAL_STATUS, REPAIR_BUSY, 4, 1) + FIELD(CFU_INTERNAL_STATUS, TRIMU_BUSY, 3, 1) + FIELD(CFU_INTERNAL_STATUS, TRIMB_BUSY, 2, 1) + FIELD(CFU_INTERNAL_STATUS, HCLEANR_BUSY, 1, 1) + FIELD(CFU_INTERNAL_STATUS, HCLEAN_BUSY, 0, 1) +REG32(CFU_QWORD_CNT, 0x108) +REG32(CFU_CRC_LIVE, 0x10c) +REG32(CFU_PENDING_READ_CNT, 0x110) + FIELD(CFU_PENDING_READ_CNT, NUM, 0, 25) +REG32(CFU_FDRI_CNT, 0x114) +REG32(CFU_ECO1, 0x118) +REG32(CFU_ECO2, 0x11c) + +#define R_MAX (R_CFU_ECO2 + 1) + +#define NUM_STREAM 2 +#define WFIFO_SZ 4 + +struct XlnxVersalCFUAPB { + SysBusDevice parent_obj; + MemoryRegion iomem; + MemoryRegion iomem_stream[NUM_STREAM]; + qemu_irq irq_cfu_imr; + + /* 128-bit wfifo. */ + uint32_t wfifo[WFIFO_SZ]; + + uint32_t regs[R_MAX]; + RegisterInfo regs_info[R_MAX]; + + uint8_t fdri_row_addr; + + struct { + XlnxCfiIf *cframe[15]; + } cfg; +}; + + +struct XlnxVersalCFUFDRO { + SysBusDevice parent_obj; + MemoryRegion iomem_fdro; + + Fifo32 fdro_data; +}; + +struct XlnxVersalCFUSFR { + SysBusDevice parent_obj; + MemoryRegion iomem_sfr; + + /* 128-bit wfifo. */ + uint32_t wfifo[WFIFO_SZ]; + + struct { + XlnxVersalCFUAPB *cfu; + } cfg; +}; + +/** + * This is a helper function for updating a CFI data write fifo, an array of 4 + * uint32_t and 128 bits of data that are allowed to be written through 4 + * sequential 32 bit accesses. After the last index has been written into the + * write fifo (wfifo), the data is copied to and returned in a secondary fifo + * provided to the function (wfifo_ret), and the write fifo is cleared + * (zeroized). + * + * @addr: the address used when calculating the wfifo array index to update + * @value: the value to write into the wfifo array + * @wfifo: the wfifo to update + * @wfifo_out: will return the wfifo data when all 128 bits have been written + * + * @return: true if all 128 bits have been updated. + */ +bool update_wfifo(hwaddr addr, uint64_t value, + uint32_t *wfifo, uint32_t *wfifo_ret); + +#endif diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index d1b8abe08d..e4db910339 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -41,9 +41,11 @@ typedef struct VMStateField VMStateField; */ struct VMStateInfo { const char *name; - int (*get)(QEMUFile *f, void *pv, size_t size, const VMStateField *field); - int (*put)(QEMUFile *f, void *pv, size_t size, const VMStateField *field, - JSONWriter *vmdesc); + int coroutine_mixed_fn (*get)(QEMUFile *f, void *pv, size_t size, + const VMStateField *field); + int coroutine_mixed_fn (*put)(QEMUFile *f, void *pv, size_t size, + const VMStateField *field, + JSONWriter *vmdesc); }; enum VMStateFlags { diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h index 511b42bde5..a5b9122cb8 100644 --- a/include/sysemu/kvm_int.h +++ b/include/sysemu/kvm_int.h @@ -116,6 +116,7 @@ struct KVMState uint64_t kvm_dirty_ring_bytes; /* Size of the per-vcpu dirty ring */ uint32_t kvm_dirty_ring_size; /* Number of dirty GFNs per ring */ bool kvm_dirty_ring_with_bitmap; + uint64_t kvm_eager_split_size; /* Eager Page Splitting chunk size */ struct KVMDirtyRingReaper reaper; NotifyVmexitOption notify_vmexit; uint32_t notify_window; diff --git a/linux-user/riscv/signal.c b/linux-user/riscv/signal.c index eaa168199a..f989f7f51f 100644 --- a/linux-user/riscv/signal.c +++ b/linux-user/riscv/signal.c @@ -38,8 +38,8 @@ struct target_sigcontext { }; /* cf. riscv-linux:arch/riscv/include/uapi/asm/ptrace.h */ struct target_ucontext { - unsigned long uc_flags; - struct target_ucontext *uc_link; + abi_ulong uc_flags; + abi_ptr uc_link; target_stack_t uc_stack; target_sigset_t uc_sigmask; uint8_t __unused[1024 / 8 - sizeof(target_sigset_t)]; diff --git a/linux-user/syscall.c b/linux-user/syscall.c index dac0641bab..3521a2d70b 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -8793,6 +8793,10 @@ static int do_getdents64(abi_long dirfd, abi_long arg2, abi_long count) #define RISCV_HWPROBE_KEY_IMA_EXT_0 4 #define RISCV_HWPROBE_IMA_FD (1 << 0) #define RISCV_HWPROBE_IMA_C (1 << 1) +#define RISCV_HWPROBE_IMA_V (1 << 2) +#define RISCV_HWPROBE_EXT_ZBA (1 << 3) +#define RISCV_HWPROBE_EXT_ZBB (1 << 4) +#define RISCV_HWPROBE_EXT_ZBS (1 << 5) #define RISCV_HWPROBE_KEY_CPUPERF_0 5 #define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) @@ -8840,7 +8844,15 @@ static void risc_hwprobe_fill_pairs(CPURISCVState *env, riscv_has_ext(env, RVD) ? RISCV_HWPROBE_IMA_FD : 0; value |= riscv_has_ext(env, RVC) ? - RISCV_HWPROBE_IMA_C : pair->value; + RISCV_HWPROBE_IMA_C : 0; + value |= riscv_has_ext(env, RVV) ? + RISCV_HWPROBE_IMA_V : 0; + value |= cfg->ext_zba ? + RISCV_HWPROBE_EXT_ZBA : 0; + value |= cfg->ext_zbb ? + RISCV_HWPROBE_EXT_ZBB : 0; + value |= cfg->ext_zbs ? + RISCV_HWPROBE_EXT_ZBS : 0; __put_user(value, &pair->value); break; case RISCV_HWPROBE_KEY_CPUPERF_0: diff --git a/qemu-img.c b/qemu-img.c index 27f48051b0..0756dbb835 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -3468,8 +3468,8 @@ static int img_snapshot(int argc, char **argv) ret = bdrv_snapshot_create(bs, &sn); if (ret) { - error_report("Could not create snapshot '%s': %d (%s)", - snapshot_name, ret, strerror(-ret)); + error_report("Could not create snapshot '%s': %s", + snapshot_name, strerror(-ret)); } break; diff --git a/qemu-options.hx b/qemu-options.hx index 56efe3d153..6be621c232 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -186,6 +186,7 @@ DEF("accel", HAS_ARG, QEMU_OPTION_accel, " split-wx=on|off (enable TCG split w^x mapping)\n" " tb-size=n (TCG translation block cache size)\n" " dirty-ring-size=n (KVM dirty ring GFN count, default 0)\n" + " eager-split-size=n (KVM Eager Page Split chunk size, default 0, disabled. ARM only)\n" " notify-vmexit=run|internal-error|disable,notify-window=n (enable notify VM exit and set notify window, x86 only)\n" " thread=single|multi (enable multi-threaded TCG)\n", QEMU_ARCH_ALL) SRST @@ -244,6 +245,20 @@ SRST is disabled (dirty-ring-size=0). When enabled, KVM will instead record dirty pages in a bitmap. + ``eager-split-size=n`` + KVM implements dirty page logging at the PAGE_SIZE granularity and + enabling dirty-logging on a huge-page requires breaking it into + PAGE_SIZE pages in the first place. KVM on ARM does this splitting + lazily by default. There are performance benefits in doing huge-page + split eagerly, especially in situations where TLBI costs associated + with break-before-make sequences are considerable and also if guest + workloads are read intensive. The size here specifies how many pages + to break at a time and needs to be a valid block size which is + 1GB/2MB/4KB, 32MB/16KB and 512MB/64KB for 4KB/16KB/64KB PAGE_SIZE + respectively. Be wary of specifying a higher size as it will have an + impact on the memory. By default, this feature is disabled + (eager-split-size=0). + ``notify-vmexit=run|internal-error|disable,notify-window=n`` Enables or disables notify VM exit support on x86 host and specify the corresponding notify window to trigger the VM exit if enabled. diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c index c8fa524002..b53d5efe13 100644 --- a/target/arm/arm-qmp-cmds.c +++ b/target/arm/arm-qmp-cmds.c @@ -95,7 +95,7 @@ static const char *cpu_model_advertised_features[] = { "sve640", "sve768", "sve896", "sve1024", "sve1152", "sve1280", "sve1408", "sve1536", "sve1664", "sve1792", "sve1920", "sve2048", "kvm-no-adjvtime", "kvm-steal-time", - "pauth", "pauth-impdef", + "pauth", "pauth-impdef", "pauth-qarma3", NULL }; diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 0bb0585441..b9e09a702d 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -243,6 +243,10 @@ static void arm_cpu_reset_hold(Object *obj) SCTLR_EnDA | SCTLR_EnDB); /* Trap on btype=3 for PACIxSP. */ env->cp15.sctlr_el[1] |= SCTLR_BT0; + /* Trap on implementation defined registers. */ + if (cpu_isar_feature(aa64_tidcp1, cpu)) { + env->cp15.sctlr_el[1] |= SCTLR_TIDCP; + } /* and to the FP/Neon instructions */ env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1, CPACR_EL1, FPEN, 3); diff --git a/target/arm/cpu.h b/target/arm/cpu.h index d50cd91858..f2e3dc49a6 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -1033,6 +1033,7 @@ struct ArchCPU { uint32_t dbgdevid1; uint64_t id_aa64isar0; uint64_t id_aa64isar1; + uint64_t id_aa64isar2; uint64_t id_aa64pfr0; uint64_t id_aa64pfr1; uint64_t id_aa64mmfr0; @@ -1071,6 +1072,7 @@ struct ArchCPU { */ bool prop_pauth; bool prop_pauth_impdef; + bool prop_pauth_qarma3; bool prop_lpa2; /* DCZ blocksize, in log_2(words), ie low 4 bits of DCZID_EL0 */ @@ -3795,28 +3797,59 @@ static inline bool isar_feature_aa64_fcma(const ARMISARegisters *id) return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FCMA) != 0; } +/* + * These are the values from APA/API/APA3. + * In general these must be compared '>=', per the normal Arm ARM + * treatment of fields in ID registers. + */ +typedef enum { + PauthFeat_None = 0, + PauthFeat_1 = 1, + PauthFeat_EPAC = 2, + PauthFeat_2 = 3, + PauthFeat_FPAC = 4, + PauthFeat_FPACCOMBINED = 5, +} ARMPauthFeature; + +static inline ARMPauthFeature +isar_feature_pauth_feature(const ARMISARegisters *id) +{ + /* + * Architecturally, only one of {APA,API,APA3} may be active (non-zero) + * and the other two must be zero. Thus we may avoid conditionals. + */ + return (FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, APA) | + FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, API) | + FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, APA3)); +} + static inline bool isar_feature_aa64_pauth(const ARMISARegisters *id) { /* * Return true if any form of pauth is enabled, as this * predicate controls migration of the 128-bit keys. */ - return (id->id_aa64isar1 & - (FIELD_DP64(0, ID_AA64ISAR1, APA, 0xf) | - FIELD_DP64(0, ID_AA64ISAR1, API, 0xf) | - FIELD_DP64(0, ID_AA64ISAR1, GPA, 0xf) | - FIELD_DP64(0, ID_AA64ISAR1, GPI, 0xf))) != 0; + return isar_feature_pauth_feature(id) != PauthFeat_None; } -static inline bool isar_feature_aa64_pauth_arch(const ARMISARegisters *id) +static inline bool isar_feature_aa64_pauth_qarma5(const ARMISARegisters *id) { /* - * Return true if pauth is enabled with the architected QARMA algorithm. - * QEMU will always set APA+GPA to the same value. + * Return true if pauth is enabled with the architected QARMA5 algorithm. + * QEMU will always enable or disable both APA and GPA. */ return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, APA) != 0; } +static inline bool isar_feature_aa64_pauth_qarma3(const ARMISARegisters *id) +{ + /* + * Return true if pauth is enabled with the architected QARMA3 algorithm. + * QEMU will always enable or disable both APA3 and GPA3. + */ + return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, APA3) != 0; +} + static inline bool isar_feature_aa64_tlbirange(const ARMISARegisters *id) { return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TLB) == 2; @@ -3939,6 +3972,11 @@ static inline bool isar_feature_aa64_hcx(const ARMISARegisters *id) return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HCX) != 0; } +static inline bool isar_feature_aa64_tidcp1(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR1, TIDCP1) != 0; +} + static inline bool isar_feature_aa64_uao(const ARMISARegisters *id) { return FIELD_EX64(id->id_aa64mmfr2, ID_AA64MMFR2, UAO) != 0; diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 96158093cc..f3d87e001f 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -473,43 +473,80 @@ void aarch64_add_sme_properties(Object *obj) void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp) { - int arch_val = 0, impdef_val = 0; - uint64_t t; + ARMPauthFeature features = cpu_isar_feature(pauth_feature, cpu); + uint64_t isar1, isar2; + + /* + * These properties enable or disable Pauth as a whole, or change + * the pauth algorithm, but do not change the set of features that + * are present. We have saved a copy of those features above and + * will now place it into the field that chooses the algorithm. + * + * Begin by disabling all fields. + */ + isar1 = cpu->isar.id_aa64isar1; + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, APA, 0); + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPA, 0); + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, API, 0); + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPI, 0); + + isar2 = cpu->isar.id_aa64isar2; + isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, APA3, 0); + isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, GPA3, 0); - /* Exit early if PAuth is enabled, and fall through to disable it */ - if ((kvm_enabled() || hvf_enabled()) && cpu->prop_pauth) { - if (!cpu_isar_feature(aa64_pauth, cpu)) { - error_setg(errp, "'pauth' feature not supported by %s on this host", - kvm_enabled() ? "KVM" : "hvf"); + if (kvm_enabled() || hvf_enabled()) { + /* + * Exit early if PAuth is enabled and fall through to disable it. + * The algorithm selection properties are not present. + */ + if (cpu->prop_pauth) { + if (features == 0) { + error_setg(errp, "'pauth' feature not supported by " + "%s on this host", current_accel_name()); + } + return; + } + } else { + /* Pauth properties are only present when the model supports it. */ + if (features == 0) { + assert(!cpu->prop_pauth); + return; } - return; - } + if (cpu->prop_pauth) { + if (cpu->prop_pauth_impdef && cpu->prop_pauth_qarma3) { + error_setg(errp, + "cannot enable both pauth-impdef and pauth-qarma3"); + return; + } - /* TODO: Handle HaveEnhancedPAC, HaveEnhancedPAC2, HaveFPAC. */ - if (cpu->prop_pauth) { - if (cpu->prop_pauth_impdef) { - impdef_val = 1; - } else { - arch_val = 1; + if (cpu->prop_pauth_impdef) { + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, API, features); + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPI, 1); + } else if (cpu->prop_pauth_qarma3) { + isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, APA3, features); + isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, GPA3, 1); + } else { + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, APA, features); + isar1 = FIELD_DP64(isar1, ID_AA64ISAR1, GPA, 1); + } + } else if (cpu->prop_pauth_impdef || cpu->prop_pauth_qarma3) { + error_setg(errp, "cannot enable pauth-impdef or " + "pauth-qarma3 without pauth"); + error_append_hint(errp, "Add pauth=on to the CPU property list.\n"); } - } else if (cpu->prop_pauth_impdef) { - error_setg(errp, "cannot enable pauth-impdef without pauth"); - error_append_hint(errp, "Add pauth=on to the CPU property list.\n"); } - t = cpu->isar.id_aa64isar1; - t = FIELD_DP64(t, ID_AA64ISAR1, APA, arch_val); - t = FIELD_DP64(t, ID_AA64ISAR1, GPA, arch_val); - t = FIELD_DP64(t, ID_AA64ISAR1, API, impdef_val); - t = FIELD_DP64(t, ID_AA64ISAR1, GPI, impdef_val); - cpu->isar.id_aa64isar1 = t; + cpu->isar.id_aa64isar1 = isar1; + cpu->isar.id_aa64isar2 = isar2; } static Property arm_cpu_pauth_property = DEFINE_PROP_BOOL("pauth", ARMCPU, prop_pauth, true); static Property arm_cpu_pauth_impdef_property = DEFINE_PROP_BOOL("pauth-impdef", ARMCPU, prop_pauth_impdef, false); +static Property arm_cpu_pauth_qarma3_property = + DEFINE_PROP_BOOL("pauth-qarma3", ARMCPU, prop_pauth_qarma3, false); void aarch64_add_pauth_properties(Object *obj) { @@ -529,6 +566,7 @@ void aarch64_add_pauth_properties(Object *obj) cpu->prop_pauth = cpu_isar_feature(aa64_pauth, cpu); } else { qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_impdef_property); + qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_qarma3_property); } } diff --git a/target/arm/helper.c b/target/arm/helper.c index e3f5a7d2bd..3b22596eab 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -8435,11 +8435,11 @@ void register_cp_regs_for_features(ARMCPU *cpu) .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, .resetvalue = cpu->isar.id_aa64isar1 }, - { .name = "ID_AA64ISAR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, + { .name = "ID_AA64ISAR2_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 2, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = 0 }, + .resetvalue = cpu->isar.id_aa64isar2 }, { .name = "ID_AA64ISAR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, @@ -8682,16 +8682,25 @@ void register_cp_regs_for_features(ARMCPU *cpu) }; modify_arm_cp_regs(v8_idregs, v8_user_idregs); #endif - /* RVBAR_EL1 is only implemented if EL1 is the highest EL */ + /* + * RVBAR_EL1 and RMR_EL1 only implemented if EL1 is the highest EL. + * TODO: For RMR, a write with bit 1 set should do something with + * cpu_reset(). In the meantime, "the bit is strictly a request", + * so we are in spec just ignoring writes. + */ if (!arm_feature(env, ARM_FEATURE_EL3) && !arm_feature(env, ARM_FEATURE_EL2)) { - ARMCPRegInfo rvbar = { - .name = "RVBAR_EL1", .state = ARM_CP_STATE_BOTH, - .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 1, - .access = PL1_R, - .fieldoffset = offsetof(CPUARMState, cp15.rvbar), + ARMCPRegInfo el1_reset_regs[] = { + { .name = "RVBAR_EL1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 1, + .access = PL1_R, + .fieldoffset = offsetof(CPUARMState, cp15.rvbar) }, + { .name = "RMR_EL1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 2, + .access = PL1_RW, .type = ARM_CP_CONST, + .resetvalue = arm_feature(env, ARM_FEATURE_AARCH64) } }; - define_one_arm_cp_reg(cpu, &rvbar); + define_arm_cp_regs(cpu, el1_reset_regs); } define_arm_cp_regs(cpu, v8_idregs); define_arm_cp_regs(cpu, v8_cp_reginfo); @@ -8775,22 +8784,25 @@ void register_cp_regs_for_features(ARMCPU *cpu) if (cpu_isar_feature(aa64_sel2, cpu)) { define_arm_cp_regs(cpu, el2_sec_cp_reginfo); } - /* RVBAR_EL2 is only implemented if EL2 is the highest EL */ + /* + * RVBAR_EL2 and RMR_EL2 only implemented if EL2 is the highest EL. + * See commentary near RMR_EL1. + */ if (!arm_feature(env, ARM_FEATURE_EL3)) { - ARMCPRegInfo rvbar[] = { - { - .name = "RVBAR_EL2", .state = ARM_CP_STATE_AA64, - .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 1, - .access = PL2_R, - .fieldoffset = offsetof(CPUARMState, cp15.rvbar), - }, - { .name = "RVBAR", .type = ARM_CP_ALIAS, - .cp = 15, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 1, - .access = PL2_R, - .fieldoffset = offsetof(CPUARMState, cp15.rvbar), - }, + static const ARMCPRegInfo el2_reset_regs[] = { + { .name = "RVBAR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 1, + .access = PL2_R, + .fieldoffset = offsetof(CPUARMState, cp15.rvbar) }, + { .name = "RVBAR", .type = ARM_CP_ALIAS, + .cp = 15, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 1, + .access = PL2_R, + .fieldoffset = offsetof(CPUARMState, cp15.rvbar) }, + { .name = "RMR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 2, + .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 1 }, }; - define_arm_cp_regs(cpu, rvbar); + define_arm_cp_regs(cpu, el2_reset_regs); } } @@ -8801,8 +8813,14 @@ void register_cp_regs_for_features(ARMCPU *cpu) { .name = "RVBAR_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 12, .crm = 0, .opc2 = 1, .access = PL3_R, - .fieldoffset = offsetof(CPUARMState, cp15.rvbar), - }, + .fieldoffset = offsetof(CPUARMState, cp15.rvbar), }, + { .name = "RMR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 12, .crm = 0, .opc2 = 2, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 1 }, + { .name = "RMR", .state = ARM_CP_STATE_AA32, + .cp = 15, .opc1 = 0, .crn = 12, .crm = 0, .opc2 = 2, + .access = PL3_RW, .type = ARM_CP_CONST, + .resetvalue = arm_feature(env, ARM_FEATURE_AARCH64) }, { .name = "SCTLR_EL3", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 6, .crn = 1, .crm = 0, .opc2 = 0, .access = PL3_RW, diff --git a/target/arm/helper.h b/target/arm/helper.h index 95e32a697a..2b02733305 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -81,6 +81,8 @@ DEF_HELPER_FLAGS_2(check_bxj_trap, TCG_CALL_NO_WG, void, env, i32) DEF_HELPER_4(access_check_cp_reg, cptr, env, i32, i32, i32) DEF_HELPER_FLAGS_2(lookup_cp_reg, TCG_CALL_NO_RWG_SE, cptr, env, i32) +DEF_HELPER_FLAGS_2(tidcp_el0, TCG_CALL_NO_WG, void, env, i32) +DEF_HELPER_FLAGS_2(tidcp_el1, TCG_CALL_NO_WG, void, env, i32) DEF_HELPER_3(set_cp_reg, void, env, cptr, i32) DEF_HELPER_2(get_cp_reg, i32, env, cptr) DEF_HELPER_3(set_cp_reg64, void, env, cptr, i64) diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c index 486f90be1d..546c0e817f 100644 --- a/target/arm/hvf/hvf.c +++ b/target/arm/hvf/hvf.c @@ -847,6 +847,7 @@ static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) { HV_SYS_REG_ID_AA64DFR1_EL1, &host_isar.id_aa64dfr1 }, { HV_SYS_REG_ID_AA64ISAR0_EL1, &host_isar.id_aa64isar0 }, { HV_SYS_REG_ID_AA64ISAR1_EL1, &host_isar.id_aa64isar1 }, + /* Add ID_AA64ISAR2_EL1 here when HVF supports it */ { HV_SYS_REG_ID_AA64MMFR0_EL1, &host_isar.id_aa64mmfr0 }, { HV_SYS_REG_ID_AA64MMFR1_EL1, &host_isar.id_aa64mmfr1 }, { HV_SYS_REG_ID_AA64MMFR2_EL1, &host_isar.id_aa64mmfr2 }, diff --git a/target/arm/kvm.c b/target/arm/kvm.c index 23aeb09949..b66b936a95 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -30,6 +30,7 @@ #include "exec/address-spaces.h" #include "hw/boards.h" #include "hw/irq.h" +#include "qapi/visitor.h" #include "qemu/log.h" const KVMCapabilityInfo kvm_arch_required_capabilities[] = { @@ -287,6 +288,26 @@ int kvm_arch_init(MachineState *ms, KVMState *s) } } + if (s->kvm_eager_split_size) { + uint32_t sizes; + + sizes = kvm_vm_check_extension(s, KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES); + if (!sizes) { + s->kvm_eager_split_size = 0; + warn_report("Eager Page Split support not available"); + } else if (!(s->kvm_eager_split_size & sizes)) { + error_report("Eager Page Split requested chunk size not valid"); + ret = -EINVAL; + } else { + ret = kvm_vm_enable_cap(s, KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE, 0, + s->kvm_eager_split_size); + if (ret < 0) { + error_report("Enabling of Eager Page Split failed: %s", + strerror(-ret)); + } + } + } + kvm_arm_init_debug(s); return ret; @@ -1069,6 +1090,46 @@ bool kvm_arch_cpu_check_are_resettable(void) return true; } +static void kvm_arch_get_eager_split_size(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + KVMState *s = KVM_STATE(obj); + uint64_t value = s->kvm_eager_split_size; + + visit_type_size(v, name, &value, errp); +} + +static void kvm_arch_set_eager_split_size(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + KVMState *s = KVM_STATE(obj); + uint64_t value; + + if (s->fd != -1) { + error_setg(errp, "Unable to set early-split-size after KVM has been initialized"); + return; + } + + if (!visit_type_size(v, name, &value, errp)) { + return; + } + + if (value && !is_power_of_2(value)) { + error_setg(errp, "early-split-size must be a power of two"); + return; + } + + s->kvm_eager_split_size = value; +} + void kvm_arch_accel_class_init(ObjectClass *oc) { + object_class_property_add(oc, "eager-split-size", "size", + kvm_arch_get_eager_split_size, + kvm_arch_set_eager_split_size, NULL, NULL); + + object_class_property_set_description(oc, "eager-split-size", + "Eager Page Split chunk size for hugepages. (default: 0, disabled)"); } diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c index 4d904a1d11..5e95c496bb 100644 --- a/target/arm/kvm64.c +++ b/target/arm/kvm64.c @@ -304,6 +304,8 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) ARM64_SYS_REG(3, 0, 0, 6, 0)); err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1, ARM64_SYS_REG(3, 0, 0, 6, 1)); + err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar2, + ARM64_SYS_REG(3, 0, 0, 6, 2)); err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr0, ARM64_SYS_REG(3, 0, 0, 7, 0)); err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr1, @@ -672,6 +674,7 @@ typedef struct CPRegStateLevel { */ static const CPRegStateLevel non_runtime_cpregs[] = { { KVM_REG_ARM_TIMER_CNT, KVM_PUT_FULL_STATE }, + { KVM_REG_ARM_PTIMER_CNT, KVM_PUT_FULL_STATE }, }; int kvm_arm_cpreg_level(uint64_t regidx) diff --git a/target/arm/syndrome.h b/target/arm/syndrome.h index 62254d0e51..8a6b8f8162 100644 --- a/target/arm/syndrome.h +++ b/target/arm/syndrome.h @@ -49,6 +49,7 @@ enum arm_exception_class { EC_SYSTEMREGISTERTRAP = 0x18, EC_SVEACCESSTRAP = 0x19, EC_ERETTRAP = 0x1a, + EC_PACFAIL = 0x1c, EC_SMETRAP = 0x1d, EC_GPC = 0x1e, EC_INSNABORT = 0x20, @@ -232,6 +233,12 @@ static inline uint32_t syn_smetrap(SMEExceptionType etype, bool is_16bit) | (is_16bit ? 0 : ARM_EL_IL) | etype; } +static inline uint32_t syn_pacfail(bool data, int keynumber) +{ + int error_code = (data << 1) | keynumber; + return (EC_PACFAIL << ARM_EL_EC_SHIFT) | ARM_EL_IL | error_code; +} + static inline uint32_t syn_pactrap(void) { return EC_PACTRAP << ARM_EL_EC_SHIFT; diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c index 0f8972950d..7264ab5ead 100644 --- a/target/arm/tcg/cpu64.c +++ b/target/arm/tcg/cpu64.c @@ -745,6 +745,217 @@ static void aarch64_neoverse_v1_initfn(Object *obj) aarch64_add_sve_properties(obj); } +static const ARMCPRegInfo cortex_a710_cp_reginfo[] = { + { .name = "CPUACTLR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, + { .name = "CPUACTLR2_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 1, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, + { .name = "CPUACTLR3_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 2, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, + { .name = "CPUACTLR4_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 3, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, + { .name = "CPUECTLR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 4, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, + { .name = "CPUECTLR2_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 1, .opc2 = 5, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, + { .name = "CPUPPMCR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 2, .opc2 = 4, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPWRCTLR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 2, .opc2 = 7, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, + { .name = "ATCR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 7, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUACTLR5_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 8, .opc2 = 0, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, + { .name = "CPUACTLR6_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 8, .opc2 = 1, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, + { .name = "CPUACTLR7_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 15, .crm = 8, .opc2 = 2, + .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, + .accessfn = access_actlr_w }, + { .name = "ATCR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 15, .crm = 7, .opc2 = 0, + .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "AVTCR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 15, .crm = 7, .opc2 = 1, + .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPPMCR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 0, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPPMCR2_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 1, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPPMCR4_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 4, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPPMCR5_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 5, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPPMCR6_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 2, .opc2 = 6, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUACTLR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 4, .opc2 = 0, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "ATCR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 7, .opc2 = 0, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPSELR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 0, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPCR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 1, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPOR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 2, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPMR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 3, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPOR2_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 4, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPMR2_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 5, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "CPUPFR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 8, .opc2 = 6, + .access = PL3_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, + + /* + * Stub RAMINDEX, as we don't actually implement caches, BTB, + * or anything else with cpu internal memory. + * "Read" zeros into the IDATA* and DDATA* output registers. + */ + { .name = "RAMINDEX_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 6, .crn = 15, .crm = 0, .opc2 = 0, + .access = PL3_W, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IDATA0_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 0, .opc2 = 0, + .access = PL3_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IDATA1_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 0, .opc2 = 1, + .access = PL3_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "IDATA2_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 0, .opc2 = 2, + .access = PL3_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "DDATA0_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 1, .opc2 = 0, + .access = PL3_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "DDATA1_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 1, .opc2 = 1, + .access = PL3_R, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "DDATA2_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 15, .crm = 1, .opc2 = 2, + .access = PL3_R, .type = ARM_CP_CONST, .resetvalue = 0 }, +}; + +static void aarch64_a710_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,cortex-a710"; + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_AARCH64); + set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); + set_feature(&cpu->env, ARM_FEATURE_EL2); + set_feature(&cpu->env, ARM_FEATURE_EL3); + set_feature(&cpu->env, ARM_FEATURE_PMU); + + /* Ordered by Section B.4: AArch64 registers */ + cpu->midr = 0x412FD471; /* r2p1 */ + cpu->revidr = 0; + cpu->isar.id_pfr0 = 0x21110131; + cpu->isar.id_pfr1 = 0x00010000; /* GIC filled in later */ + cpu->isar.id_dfr0 = 0x16011099; + cpu->id_afr0 = 0; + cpu->isar.id_mmfr0 = 0x10201105; + cpu->isar.id_mmfr1 = 0x40000000; + cpu->isar.id_mmfr2 = 0x01260000; + cpu->isar.id_mmfr3 = 0x02122211; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; + cpu->isar.id_isar3 = 0x01112131; + cpu->isar.id_isar4 = 0x00010142; + cpu->isar.id_isar5 = 0x11011121; /* with Crypto */ + cpu->isar.id_mmfr4 = 0x21021110; + cpu->isar.id_isar6 = 0x01111111; + cpu->isar.mvfr0 = 0x10110222; + cpu->isar.mvfr1 = 0x13211111; + cpu->isar.mvfr2 = 0x00000043; + cpu->isar.id_pfr2 = 0x00000011; + cpu->isar.id_aa64pfr0 = 0x1201111120111112ull; /* GIC filled in later */ + cpu->isar.id_aa64pfr1 = 0x0000000000000221ull; + cpu->isar.id_aa64zfr0 = 0x0000110100110021ull; /* with Crypto */ + cpu->isar.id_aa64dfr0 = 0x000011f010305611ull; + cpu->isar.id_aa64dfr1 = 0; + cpu->id_aa64afr0 = 0; + cpu->id_aa64afr1 = 0; + cpu->isar.id_aa64isar0 = 0x0221111110212120ull; /* with Crypto */ + cpu->isar.id_aa64isar1 = 0x0010111101211032ull; + cpu->isar.id_aa64mmfr0 = 0x0000022200101122ull; + cpu->isar.id_aa64mmfr1 = 0x0000000010212122ull; + cpu->isar.id_aa64mmfr2 = 0x1221011110101011ull; + cpu->clidr = 0x0000001482000023ull; + cpu->gm_blocksize = 4; + cpu->ctr = 0x000000049444c004ull; + cpu->dcz_blocksize = 4; + /* TODO FEAT_MPAM: mpamidr_el1 = 0x0000_0001_0006_003f */ + + /* Section B.5.2: PMCR_EL0 */ + cpu->isar.reset_pmcr_el0 = 0xa000; /* with 20 counters */ + + /* Section B.6.7: ICH_VTR_EL2 */ + cpu->gic_num_lrs = 4; + cpu->gic_vpribits = 5; + cpu->gic_vprebits = 5; + cpu->gic_pribits = 5; + + /* Section 14: Scalable Vector Extensions support */ + cpu->sve_vq.supported = 1 << 0; /* 128bit */ + + /* + * The cortex-a710 TRM does not list CCSIDR values. The layout of + * the caches are in text in Table 7-1, Table 8-1, and Table 9-1. + * + * L1: 4-way set associative 64-byte line size, total either 32K or 64K. + * L2: 8-way set associative 64 byte line size, total either 256K or 512K. + */ + cpu->ccsidr[0] = make_ccsidr64(4, 64, 64 * KiB); /* L1 dcache */ + cpu->ccsidr[1] = cpu->ccsidr[0]; /* L1 icache */ + cpu->ccsidr[2] = make_ccsidr64(8, 64, 512 * KiB); /* L2 cache */ + + /* FIXME: Not documented -- copied from neoverse-v1 */ + cpu->reset_sctlr = 0x30c50838; + + define_arm_cp_regs(cpu, cortex_a710_cp_reginfo); + + aarch64_add_pauth_properties(obj); + aarch64_add_sve_properties(obj); +} + /* * -cpu max: a CPU with as many features enabled as our emulation supports. * The version of '-cpu max' for qemu-system-arm is defined in cpu32.c; @@ -803,6 +1014,8 @@ void aarch64_max_tcg_initfn(Object *obj) t = cpu->isar.id_aa64isar1; t = FIELD_DP64(t, ID_AA64ISAR1, DPB, 2); /* FEAT_DPB2 */ + t = FIELD_DP64(t, ID_AA64ISAR1, APA, PauthFeat_FPACCOMBINED); + t = FIELD_DP64(t, ID_AA64ISAR1, API, 1); t = FIELD_DP64(t, ID_AA64ISAR1, JSCVT, 1); /* FEAT_JSCVT */ t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 1); /* FEAT_FCMA */ t = FIELD_DP64(t, ID_AA64ISAR1, LRCPC, 2); /* FEAT_LRCPC2 */ @@ -858,6 +1071,7 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64MMFR1, XNX, 1); /* FEAT_XNX */ t = FIELD_DP64(t, ID_AA64MMFR1, ETS, 1); /* FEAT_ETS */ t = FIELD_DP64(t, ID_AA64MMFR1, HCX, 1); /* FEAT_HCX */ + t = FIELD_DP64(t, ID_AA64MMFR1, TIDCP1, 1); /* FEAT_TIDCP1 */ cpu->isar.id_aa64mmfr1 = t; t = cpu->isar.id_aa64mmfr2; @@ -934,6 +1148,7 @@ static const ARMCPUInfo aarch64_cpus[] = { { .name = "cortex-a55", .initfn = aarch64_a55_initfn }, { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, { .name = "cortex-a76", .initfn = aarch64_a76_initfn }, + { .name = "cortex-a710", .initfn = aarch64_a710_initfn }, { .name = "a64fx", .initfn = aarch64_a64fx_initfn }, { .name = "neoverse-n1", .initfn = aarch64_neoverse_n1_initfn }, { .name = "neoverse-v1", .initfn = aarch64_neoverse_v1_initfn }, diff --git a/target/arm/tcg/crypto_helper.c b/target/arm/tcg/crypto_helper.c index fdd70abbfd..7cadd61e12 100644 --- a/target/arm/tcg/crypto_helper.c +++ b/target/arm/tcg/crypto_helper.c @@ -614,10 +614,7 @@ static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm) CR_ST_WORD(d, (i + 3) % 4) ^ CR_ST_WORD(n, i); - t = sm4_sbox[t & 0xff] | - sm4_sbox[(t >> 8) & 0xff] << 8 | - sm4_sbox[(t >> 16) & 0xff] << 16 | - sm4_sbox[(t >> 24) & 0xff] << 24; + t = sm4_subword(t); CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^ rol32(t, 24); @@ -651,10 +648,7 @@ static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm) CR_ST_WORD(d, (i + 3) % 4) ^ CR_ST_WORD(m, i); - t = sm4_sbox[t & 0xff] | - sm4_sbox[(t >> 8) & 0xff] << 8 | - sm4_sbox[(t >> 16) & 0xff] << 16 | - sm4_sbox[(t >> 24) & 0xff] << 24; + t = sm4_subword(t); CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23); } diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h index 3d5957c11f..57cfd68569 100644 --- a/target/arm/tcg/helper-a64.h +++ b/target/arm/tcg/helper-a64.h @@ -90,9 +90,13 @@ DEF_HELPER_FLAGS_3(pacda, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(pacdb, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(pacga, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(autia, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(autia_combined, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(autib, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(autib_combined, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(autda, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(autda_combined, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(autdb, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(autdb_combined, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_2(xpaci, TCG_CALL_NO_RWG_SE, i64, env, i64) DEF_HELPER_FLAGS_2(xpacd, TCG_CALL_NO_RWG_SE, i64, env, i64) diff --git a/target/arm/tcg/op_helper.c b/target/arm/tcg/op_helper.c index 3baf8004f6..403f8b09d3 100644 --- a/target/arm/tcg/op_helper.c +++ b/target/arm/tcg/op_helper.c @@ -764,6 +764,39 @@ const void *HELPER(lookup_cp_reg)(CPUARMState *env, uint32_t key) return ri; } +/* + * Test for HCR_EL2.TIDCP at EL1. + * Since implementation defined registers are rare, and within QEMU + * most of them are no-op, do not waste HFLAGS space for this and + * always use a helper. + */ +void HELPER(tidcp_el1)(CPUARMState *env, uint32_t syndrome) +{ + if (arm_hcr_el2_eff(env) & HCR_TIDCP) { + raise_exception_ra(env, EXCP_UDEF, syndrome, 2, GETPC()); + } +} + +/* + * Similarly, for FEAT_TIDCP1 at EL0. + * We have already checked for the presence of the feature. + */ +void HELPER(tidcp_el0)(CPUARMState *env, uint32_t syndrome) +{ + /* See arm_sctlr(), but we also need the sctlr el. */ + ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, 0); + int target_el = mmu_idx == ARMMMUIdx_E20_0 ? 2 : 1; + + /* + * The bit is not valid unless the target el is aa64, but since the + * bit test is simpler perform that first and check validity after. + */ + if ((env->cp15.sctlr_el[target_el] & SCTLR_TIDCP) + && arm_el_is_aa64(env, target_el)) { + raise_exception_ra(env, EXCP_UDEF, syndrome, target_el, GETPC()); + } +} + void HELPER(set_cp_reg)(CPUARMState *env, const void *rip, uint32_t value) { const ARMCPRegInfo *ri = rip; diff --git a/target/arm/tcg/pauth_helper.c b/target/arm/tcg/pauth_helper.c index 62af569341..4da2962ad5 100644 --- a/target/arm/tcg/pauth_helper.c +++ b/target/arm/tcg/pauth_helper.c @@ -96,6 +96,21 @@ static uint64_t pac_sub(uint64_t i) return o; } +static uint64_t pac_sub1(uint64_t i) +{ + static const uint8_t sub1[16] = { + 0xa, 0xd, 0xe, 0x6, 0xf, 0x7, 0x3, 0x5, + 0x9, 0x8, 0x0, 0xc, 0xb, 0x1, 0x2, 0x4, + }; + uint64_t o = 0; + int b; + + for (b = 0; b < 64; b += 4) { + o |= (uint64_t)sub1[(i >> b) & 0xf] << b; + } + return o; +} + static uint64_t pac_inv_sub(uint64_t i) { static const uint8_t inv_sub[16] = { @@ -209,7 +224,7 @@ static uint64_t tweak_inv_shuffle(uint64_t i) } static uint64_t pauth_computepac_architected(uint64_t data, uint64_t modifier, - ARMPACKey key) + ARMPACKey key, bool isqarma3) { static const uint64_t RC[5] = { 0x0000000000000000ull, @@ -219,6 +234,7 @@ static uint64_t pauth_computepac_architected(uint64_t data, uint64_t modifier, 0x452821E638D01377ull, }; const uint64_t alpha = 0xC0AC29B7C97C50DDull; + int iterations = isqarma3 ? 2 : 4; /* * Note that in the ARM pseudocode, key0 contains bits <127:64> * and key1 contains bits <63:0> of the 128-bit key. @@ -231,7 +247,7 @@ static uint64_t pauth_computepac_architected(uint64_t data, uint64_t modifier, runningmod = modifier; workingval = data ^ key0; - for (i = 0; i <= 4; ++i) { + for (i = 0; i <= iterations; ++i) { roundkey = key1 ^ runningmod; workingval ^= roundkey; workingval ^= RC[i]; @@ -239,32 +255,48 @@ static uint64_t pauth_computepac_architected(uint64_t data, uint64_t modifier, workingval = pac_cell_shuffle(workingval); workingval = pac_mult(workingval); } - workingval = pac_sub(workingval); + if (isqarma3) { + workingval = pac_sub1(workingval); + } else { + workingval = pac_sub(workingval); + } runningmod = tweak_shuffle(runningmod); } roundkey = modk0 ^ runningmod; workingval ^= roundkey; workingval = pac_cell_shuffle(workingval); workingval = pac_mult(workingval); - workingval = pac_sub(workingval); + if (isqarma3) { + workingval = pac_sub1(workingval); + } else { + workingval = pac_sub(workingval); + } workingval = pac_cell_shuffle(workingval); workingval = pac_mult(workingval); workingval ^= key1; workingval = pac_cell_inv_shuffle(workingval); - workingval = pac_inv_sub(workingval); + if (isqarma3) { + workingval = pac_sub1(workingval); + } else { + workingval = pac_inv_sub(workingval); + } workingval = pac_mult(workingval); workingval = pac_cell_inv_shuffle(workingval); workingval ^= key0; workingval ^= runningmod; - for (i = 0; i <= 4; ++i) { - workingval = pac_inv_sub(workingval); - if (i < 4) { + for (i = 0; i <= iterations; ++i) { + if (isqarma3) { + workingval = pac_sub1(workingval); + } else { + workingval = pac_inv_sub(workingval); + } + if (i < iterations) { workingval = pac_mult(workingval); workingval = pac_cell_inv_shuffle(workingval); } runningmod = tweak_inv_shuffle(runningmod); roundkey = key1 ^ runningmod; - workingval ^= RC[4 - i]; + workingval ^= RC[iterations - i]; workingval ^= roundkey; workingval ^= alpha; } @@ -282,8 +314,10 @@ static uint64_t pauth_computepac_impdef(uint64_t data, uint64_t modifier, static uint64_t pauth_computepac(CPUARMState *env, uint64_t data, uint64_t modifier, ARMPACKey key) { - if (cpu_isar_feature(aa64_pauth_arch, env_archcpu(env))) { - return pauth_computepac_architected(data, modifier, key); + if (cpu_isar_feature(aa64_pauth_qarma5, env_archcpu(env))) { + return pauth_computepac_architected(data, modifier, key, false); + } else if (cpu_isar_feature(aa64_pauth_qarma3, env_archcpu(env))) { + return pauth_computepac_architected(data, modifier, key, true); } else { return pauth_computepac_impdef(data, modifier, key); } @@ -292,8 +326,10 @@ static uint64_t pauth_computepac(CPUARMState *env, uint64_t data, static uint64_t pauth_addpac(CPUARMState *env, uint64_t ptr, uint64_t modifier, ARMPACKey *key, bool data) { + ARMCPU *cpu = env_archcpu(env); ARMMMUIdx mmu_idx = arm_stage1_mmu_idx(env); ARMVAParameters param = aa64_va_parameters(env, ptr, mmu_idx, data, false); + ARMPauthFeature pauth_feature = cpu_isar_feature(pauth_feature, cpu); uint64_t pac, ext_ptr, ext, test; int bot_bit, top_bit; @@ -317,17 +353,26 @@ static uint64_t pauth_addpac(CPUARMState *env, uint64_t ptr, uint64_t modifier, */ test = sextract64(ptr, bot_bit, top_bit - bot_bit); if (test != 0 && test != -1) { - /* - * Note that our top_bit is one greater than the pseudocode's - * version, hence "- 2" here. - */ - pac ^= MAKE_64BIT_MASK(top_bit - 2, 1); + if (pauth_feature >= PauthFeat_2) { + /* No action required */ + } else if (pauth_feature == PauthFeat_EPAC) { + pac = 0; + } else { + /* + * Note that our top_bit is one greater than the pseudocode's + * version, hence "- 2" here. + */ + pac ^= MAKE_64BIT_MASK(top_bit - 2, 1); + } } /* * Preserve the determination between upper and lower at bit 55, * and insert pointer authentication code. */ + if (pauth_feature >= PauthFeat_2) { + pac ^= ptr; + } if (param.tbi) { ptr &= ~MAKE_64BIT_MASK(bot_bit, 55 - bot_bit + 1); pac &= MAKE_64BIT_MASK(bot_bit, 54 - bot_bit + 1); @@ -351,21 +396,46 @@ static uint64_t pauth_original_ptr(uint64_t ptr, ARMVAParameters param) } } +static G_NORETURN +void pauth_fail_exception(CPUARMState *env, bool data, + int keynumber, uintptr_t ra) +{ + raise_exception_ra(env, EXCP_UDEF, syn_pacfail(data, keynumber), + exception_target_el(env), ra); +} + static uint64_t pauth_auth(CPUARMState *env, uint64_t ptr, uint64_t modifier, - ARMPACKey *key, bool data, int keynumber) + ARMPACKey *key, bool data, int keynumber, + uintptr_t ra, bool is_combined) { + ARMCPU *cpu = env_archcpu(env); ARMMMUIdx mmu_idx = arm_stage1_mmu_idx(env); ARMVAParameters param = aa64_va_parameters(env, ptr, mmu_idx, data, false); + ARMPauthFeature pauth_feature = cpu_isar_feature(pauth_feature, cpu); int bot_bit, top_bit; - uint64_t pac, orig_ptr, test; + uint64_t pac, orig_ptr, cmp_mask; orig_ptr = pauth_original_ptr(ptr, param); pac = pauth_computepac(env, orig_ptr, modifier, *key); bot_bit = 64 - param.tsz; top_bit = 64 - 8 * param.tbi; - test = (pac ^ ptr) & ~MAKE_64BIT_MASK(55, 1); - if (unlikely(extract64(test, bot_bit, top_bit - bot_bit))) { + cmp_mask = MAKE_64BIT_MASK(bot_bit, top_bit - bot_bit); + cmp_mask &= ~MAKE_64BIT_MASK(55, 1); + + if (pauth_feature >= PauthFeat_2) { + ARMPauthFeature fault_feature = + is_combined ? PauthFeat_FPACCOMBINED : PauthFeat_FPAC; + uint64_t result = ptr ^ (pac & cmp_mask); + + if (pauth_feature >= fault_feature + && ((result ^ sextract64(result, 55, 1)) & cmp_mask)) { + pauth_fail_exception(env, data, keynumber, ra); + } + return result; + } + + if ((pac ^ ptr) & cmp_mask) { int error_code = (keynumber << 1) | (keynumber ^ 1); if (param.tbi) { return deposit64(orig_ptr, 53, 2, error_code); @@ -466,44 +536,88 @@ uint64_t HELPER(pacga)(CPUARMState *env, uint64_t x, uint64_t y) return pac & 0xffffffff00000000ull; } -uint64_t HELPER(autia)(CPUARMState *env, uint64_t x, uint64_t y) +static uint64_t pauth_autia(CPUARMState *env, uint64_t x, uint64_t y, + uintptr_t ra, bool is_combined) { int el = arm_current_el(env); if (!pauth_key_enabled(env, el, SCTLR_EnIA)) { return x; } - pauth_check_trap(env, el, GETPC()); - return pauth_auth(env, x, y, &env->keys.apia, false, 0); + pauth_check_trap(env, el, ra); + return pauth_auth(env, x, y, &env->keys.apia, false, 0, ra, is_combined); } -uint64_t HELPER(autib)(CPUARMState *env, uint64_t x, uint64_t y) +uint64_t HELPER(autia)(CPUARMState *env, uint64_t x, uint64_t y) +{ + return pauth_autia(env, x, y, GETPC(), false); +} + +uint64_t HELPER(autia_combined)(CPUARMState *env, uint64_t x, uint64_t y) +{ + return pauth_autia(env, x, y, GETPC(), true); +} + +static uint64_t pauth_autib(CPUARMState *env, uint64_t x, uint64_t y, + uintptr_t ra, bool is_combined) { int el = arm_current_el(env); if (!pauth_key_enabled(env, el, SCTLR_EnIB)) { return x; } - pauth_check_trap(env, el, GETPC()); - return pauth_auth(env, x, y, &env->keys.apib, false, 1); + pauth_check_trap(env, el, ra); + return pauth_auth(env, x, y, &env->keys.apib, false, 1, ra, is_combined); } -uint64_t HELPER(autda)(CPUARMState *env, uint64_t x, uint64_t y) +uint64_t HELPER(autib)(CPUARMState *env, uint64_t x, uint64_t y) +{ + return pauth_autib(env, x, y, GETPC(), false); +} + +uint64_t HELPER(autib_combined)(CPUARMState *env, uint64_t x, uint64_t y) +{ + return pauth_autib(env, x, y, GETPC(), true); +} + +static uint64_t pauth_autda(CPUARMState *env, uint64_t x, uint64_t y, + uintptr_t ra, bool is_combined) { int el = arm_current_el(env); if (!pauth_key_enabled(env, el, SCTLR_EnDA)) { return x; } - pauth_check_trap(env, el, GETPC()); - return pauth_auth(env, x, y, &env->keys.apda, true, 0); + pauth_check_trap(env, el, ra); + return pauth_auth(env, x, y, &env->keys.apda, true, 0, ra, is_combined); } -uint64_t HELPER(autdb)(CPUARMState *env, uint64_t x, uint64_t y) +uint64_t HELPER(autda)(CPUARMState *env, uint64_t x, uint64_t y) +{ + return pauth_autda(env, x, y, GETPC(), false); +} + +uint64_t HELPER(autda_combined)(CPUARMState *env, uint64_t x, uint64_t y) +{ + return pauth_autda(env, x, y, GETPC(), true); +} + +static uint64_t pauth_autdb(CPUARMState *env, uint64_t x, uint64_t y, + uintptr_t ra, bool is_combined) { int el = arm_current_el(env); if (!pauth_key_enabled(env, el, SCTLR_EnDB)) { return x; } - pauth_check_trap(env, el, GETPC()); - return pauth_auth(env, x, y, &env->keys.apdb, true, 1); + pauth_check_trap(env, el, ra); + return pauth_auth(env, x, y, &env->keys.apdb, true, 1, ra, is_combined); +} + +uint64_t HELPER(autdb)(CPUARMState *env, uint64_t x, uint64_t y) +{ + return pauth_autdb(env, x, y, GETPC(), false); +} + +uint64_t HELPER(autdb_combined)(CPUARMState *env, uint64_t x, uint64_t y) +{ + return pauth_autdb(env, x, y, GETPC(), true); } uint64_t HELPER(xpaci)(CPUARMState *env, uint64_t a) diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index 0b77c92437..1b6fbb61e2 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -1530,9 +1530,9 @@ static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst, truedst = tcg_temp_new_i64(); if (use_key_a) { - gen_helper_autia(truedst, cpu_env, dst, modifier); + gen_helper_autia_combined(truedst, cpu_env, dst, modifier); } else { - gen_helper_autib(truedst, cpu_env, dst, modifier); + gen_helper_autib_combined(truedst, cpu_env, dst, modifier); } return truedst; } @@ -2154,6 +2154,25 @@ static void handle_sys(DisasContext *s, bool isread, bool need_exit_tb = false; TCGv_ptr tcg_ri = NULL; TCGv_i64 tcg_rt; + uint32_t syndrome; + + if (crn == 11 || crn == 15) { + /* + * Check for TIDCP trap, which must take precedence over + * the UNDEF for "no such register" etc. + */ + syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); + switch (s->current_el) { + case 0: + if (dc_isar_feature(aa64_tidcp1, s)) { + gen_helper_tidcp_el0(cpu_env, tcg_constant_i32(syndrome)); + } + break; + case 1: + gen_helper_tidcp_el1(cpu_env, tcg_constant_i32(syndrome)); + break; + } + } if (!ri) { /* Unknown register; this might be a guest error or a QEMU @@ -2176,8 +2195,6 @@ static void handle_sys(DisasContext *s, bool isread, /* Emit code to perform further access permissions checks at * runtime; this may result in an exception. */ - uint32_t syndrome; - syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); gen_a64_update_pc(s, 0); tcg_ri = tcg_temp_new_ptr(); @@ -3020,37 +3037,17 @@ static bool trans_STGP(DisasContext *s, arg_ldstpair *a) tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); } - if (!s->ata) { - /* - * TODO: We could rely on the stores below, at least for - * system mode, if we arrange to add MO_ALIGN_16. - */ - gen_helper_stg_stub(cpu_env, dirty_addr); - } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { - gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr); - } else { - gen_helper_stg(cpu_env, dirty_addr, dirty_addr); - } - - mop = finalize_memop(s, MO_64); - clean_addr = gen_mte_checkN(s, dirty_addr, true, false, 2 << MO_64, mop); - + clean_addr = clean_data_tbi(s, dirty_addr); tcg_rt = cpu_reg(s, a->rt); tcg_rt2 = cpu_reg(s, a->rt2); /* - * STGP is defined as two 8-byte memory operations and one tag operation. - * We implement it as one single 16-byte memory operation for convenience. - * Rebuild mop as for STP. - * TODO: The atomicity with LSE2 is stronger than required. - * Need a form of MO_ATOM_WITHIN16_PAIR that never requires - * 16-byte atomicity. + * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE, + * and one tag operation. We implement it as one single aligned 16-byte + * memory operation for convenience. Note that the alignment ensures + * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store. */ - mop = MO_128; - if (s->align_mem) { - mop |= MO_ALIGN_8; - } - mop = finalize_memop_pair(s, mop); + mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR); tmp = tcg_temp_new_i128(); if (s->be_data == MO_LE) { @@ -3060,6 +3057,15 @@ static bool trans_STGP(DisasContext *s, arg_ldstpair *a) } tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); + /* Perform the tag store, if tag access enabled. */ + if (s->ata) { + if (tb_cflags(s->base.tb) & CF_PARALLEL) { + gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr); + } else { + gen_helper_stg(cpu_env, dirty_addr, dirty_addr); + } + } + op_addr_ldstpair_post(s, a, dirty_addr, offset); return true; } @@ -3352,11 +3358,11 @@ static bool trans_LDRA(DisasContext *s, arg_LDRA *a) if (s->pauth_active) { if (!a->m) { - gen_helper_autda(dirty_addr, cpu_env, dirty_addr, - tcg_constant_i64(0)); + gen_helper_autda_combined(dirty_addr, cpu_env, dirty_addr, + tcg_constant_i64(0)); } else { - gen_helper_autdb(dirty_addr, cpu_env, dirty_addr, - tcg_constant_i64(0)); + gen_helper_autdb_combined(dirty_addr, cpu_env, dirty_addr, + tcg_constant_i64(0)); } } diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c index 38ad8dd4bd..976b704200 100644 --- a/target/arm/tcg/translate.c +++ b/target/arm/tcg/translate.c @@ -4538,6 +4538,20 @@ void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); } +static bool aa32_cpreg_encoding_in_impdef_space(uint8_t crn, uint8_t crm) +{ + static const uint16_t mask[3] = { + 0b0000000111100111, /* crn == 9, crm == {c0-c2, c5-c8} */ + 0b0000000100010011, /* crn == 10, crm == {c0, c1, c4, c8} */ + 0b1000000111111111, /* crn == 11, crm == {c0-c8, c15} */ + }; + + if (crn >= 9 && crn <= 11) { + return (mask[crn - 9] >> crm) & 1; + } + return false; +} + static void do_coproc_insn(DisasContext *s, int cpnum, int is64, int opc1, int crn, int crm, int opc2, bool isread, int rt, int rt2) @@ -4619,6 +4633,25 @@ static void do_coproc_insn(DisasContext *s, int cpnum, int is64, } } + if (cpnum == 15 && aa32_cpreg_encoding_in_impdef_space(crn, crm)) { + /* + * Check for TIDCP trap, which must take precedence over the UNDEF + * for "no such register" etc. It shares precedence with HSTR, + * but raises the same exception, so order doesn't matter. + */ + switch (s->current_el) { + case 0: + if (arm_dc_feature(s, ARM_FEATURE_AARCH64) + && dc_isar_feature(aa64_tidcp1, s)) { + gen_helper_tidcp_el0(cpu_env, tcg_constant_i32(syndrome)); + } + break; + case 1: + gen_helper_tidcp_el1(cpu_env, tcg_constant_i32(syndrome)); + break; + } + } + if (!ri) { /* * Unknown register; this might be a guest error or a QEMU diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 6b93b04453..f227c7664e 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -87,7 +87,9 @@ static const struct isa_ext_data isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(zicond, PRIV_VERSION_1_12_0, ext_zicond), ISA_EXT_DATA_ENTRY(zicsr, PRIV_VERSION_1_10_0, ext_icsr), ISA_EXT_DATA_ENTRY(zifencei, PRIV_VERSION_1_10_0, ext_ifencei), + ISA_EXT_DATA_ENTRY(zihintntl, PRIV_VERSION_1_10_0, ext_zihintntl), ISA_EXT_DATA_ENTRY(zihintpause, PRIV_VERSION_1_10_0, ext_zihintpause), + ISA_EXT_DATA_ENTRY(zmmul, PRIV_VERSION_1_12_0, ext_zmmul), ISA_EXT_DATA_ENTRY(zawrs, PRIV_VERSION_1_12_0, ext_zawrs), ISA_EXT_DATA_ENTRY(zfa, PRIV_VERSION_1_12_0, ext_zfa), ISA_EXT_DATA_ENTRY(zfbfmin, PRIV_VERSION_1_12_0, ext_zfbfmin), @@ -119,6 +121,8 @@ static const struct isa_ext_data isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(zksed, PRIV_VERSION_1_12_0, ext_zksed), ISA_EXT_DATA_ENTRY(zksh, PRIV_VERSION_1_12_0, ext_zksh), ISA_EXT_DATA_ENTRY(zkt, PRIV_VERSION_1_12_0, ext_zkt), + ISA_EXT_DATA_ENTRY(zvbb, PRIV_VERSION_1_12_0, ext_zvbb), + ISA_EXT_DATA_ENTRY(zvbc, PRIV_VERSION_1_12_0, ext_zvbc), ISA_EXT_DATA_ENTRY(zve32f, PRIV_VERSION_1_10_0, ext_zve32f), ISA_EXT_DATA_ENTRY(zve64f, PRIV_VERSION_1_10_0, ext_zve64f), ISA_EXT_DATA_ENTRY(zve64d, PRIV_VERSION_1_10_0, ext_zve64d), @@ -126,9 +130,16 @@ static const struct isa_ext_data isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(zvfbfwma, PRIV_VERSION_1_12_0, ext_zvfbfwma), ISA_EXT_DATA_ENTRY(zvfh, PRIV_VERSION_1_12_0, ext_zvfh), ISA_EXT_DATA_ENTRY(zvfhmin, PRIV_VERSION_1_12_0, ext_zvfhmin), + ISA_EXT_DATA_ENTRY(zvkg, PRIV_VERSION_1_12_0, ext_zvkg), + ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned), + ISA_EXT_DATA_ENTRY(zvknha, PRIV_VERSION_1_12_0, ext_zvknha), + ISA_EXT_DATA_ENTRY(zvknhb, PRIV_VERSION_1_12_0, ext_zvknhb), + ISA_EXT_DATA_ENTRY(zvksed, PRIV_VERSION_1_12_0, ext_zvksed), + ISA_EXT_DATA_ENTRY(zvksh, PRIV_VERSION_1_12_0, ext_zvksh), ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx), ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin), ISA_EXT_DATA_ENTRY(smaia, PRIV_VERSION_1_12_0, ext_smaia), + ISA_EXT_DATA_ENTRY(smepmp, PRIV_VERSION_1_12_0, epmp), ISA_EXT_DATA_ENTRY(smstateen, PRIV_VERSION_1_12_0, ext_smstateen), ISA_EXT_DATA_ENTRY(ssaia, PRIV_VERSION_1_12_0, ext_ssaia), ISA_EXT_DATA_ENTRY(sscofpmf, PRIV_VERSION_1_12_0, ext_sscofpmf), @@ -298,6 +309,17 @@ static uint8_t satp_mode_from_str(const char *satp_mode_str) uint8_t satp_mode_max_from_map(uint32_t map) { + /* + * 'map = 0' will make us return (31 - 32), which C will + * happily overflow to UINT_MAX. There's no good result to + * return if 'map = 0' (e.g. returning 0 will be ambiguous + * with the result for 'map = 1'). + * + * Assert out if map = 0. Callers will have to deal with + * it outside of this function. + */ + g_assert(map > 0); + /* map here has at least one bit set, so no problem with clz */ return 31 - __builtin_clz(map); } @@ -875,9 +897,9 @@ static void riscv_cpu_reset_hold(Object *obj) env->two_stage_lookup = false; env->menvcfg = (cpu->cfg.ext_svpbmt ? MENVCFG_PBMTE : 0) | - (cpu->cfg.ext_svadu ? MENVCFG_HADE : 0); + (cpu->cfg.ext_svadu ? MENVCFG_ADUE : 0); env->henvcfg = (cpu->cfg.ext_svpbmt ? HENVCFG_PBMTE : 0) | - (cpu->cfg.ext_svadu ? HENVCFG_HADE : 0); + (cpu->cfg.ext_svadu ? HENVCFG_ADUE : 0); /* Initialized default priorities of local interrupts. */ for (i = 0; i < ARRAY_SIZE(env->miprio); i++) { @@ -904,7 +926,7 @@ static void riscv_cpu_reset_hold(Object *obj) #ifndef CONFIG_USER_ONLY if (cpu->cfg.debug) { - riscv_trigger_init(env); + riscv_trigger_reset_hold(env); } if (kvm_enabled()) { @@ -1269,6 +1291,25 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) return; } + /* + * In principle Zve*x would also suffice here, were they supported + * in qemu + */ + if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkg || cpu->cfg.ext_zvkned || + cpu->cfg.ext_zvknha || cpu->cfg.ext_zvksed || cpu->cfg.ext_zvksh) && + !cpu->cfg.ext_zve32f) { + error_setg(errp, + "Vector crypto extensions require V or Zve* extensions"); + return; + } + + if ((cpu->cfg.ext_zvbc || cpu->cfg.ext_zvknhb) && !cpu->cfg.ext_zve64f) { + error_setg( + errp, + "Zvbc and Zvknhb extensions require V or Zve64{f,d} extensions"); + return; + } + if (cpu->cfg.ext_zk) { cpu->cfg.ext_zkn = true; cpu->cfg.ext_zkr = true; @@ -1303,9 +1344,15 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) static void riscv_cpu_satp_mode_finalize(RISCVCPU *cpu, Error **errp) { bool rv32 = riscv_cpu_mxl(&cpu->env) == MXL_RV32; - uint8_t satp_mode_map_max; - uint8_t satp_mode_supported_max = - satp_mode_max_from_map(cpu->cfg.satp_mode.supported); + uint8_t satp_mode_map_max, satp_mode_supported_max; + + /* The CPU wants the OS to decide which satp mode to use */ + if (cpu->cfg.satp_mode.supported == 0) { + return; + } + + satp_mode_supported_max = + satp_mode_max_from_map(cpu->cfg.satp_mode.supported); if (cpu->cfg.satp_mode.map == 0) { if (cpu->cfg.satp_mode.init == 0) { @@ -1395,6 +1442,11 @@ static void riscv_cpu_realize_tcg(DeviceState *dev, Error **errp) CPURISCVState *env = &cpu->env; Error *local_err = NULL; + if (object_dynamic_cast(OBJECT(dev), TYPE_RISCV_CPU_HOST)) { + error_setg(errp, "'host' CPU is not compatible with TCG acceleration"); + return; + } + riscv_cpu_validate_misa_mxl(cpu, &local_err); if (local_err != NULL) { error_propagate(errp, local_err); @@ -1473,6 +1525,12 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp) riscv_cpu_register_gdb_regs_for_features(cs); +#ifndef CONFIG_USER_ONLY + if (cpu->cfg.debug) { + riscv_trigger_realize(&cpu->env); + } +#endif + qemu_init_vcpu(cs); cpu_reset(cs); @@ -1756,6 +1814,7 @@ static Property riscv_cpu_extensions[] = { DEFINE_PROP_BOOL("sscofpmf", RISCVCPU, cfg.ext_sscofpmf, false), DEFINE_PROP_BOOL("Zifencei", RISCVCPU, cfg.ext_ifencei, true), DEFINE_PROP_BOOL("Zicsr", RISCVCPU, cfg.ext_icsr, true), + DEFINE_PROP_BOOL("Zihintntl", RISCVCPU, cfg.ext_zihintntl, true), DEFINE_PROP_BOOL("Zihintpause", RISCVCPU, cfg.ext_zihintpause, true), DEFINE_PROP_BOOL("Zawrs", RISCVCPU, cfg.ext_zawrs, true), DEFINE_PROP_BOOL("Zfa", RISCVCPU, cfg.ext_zfa, true), @@ -1816,6 +1875,7 @@ static Property riscv_cpu_extensions[] = { DEFINE_PROP_BOOL("zcf", RISCVCPU, cfg.ext_zcf, false), DEFINE_PROP_BOOL("zcmp", RISCVCPU, cfg.ext_zcmp, false), DEFINE_PROP_BOOL("zcmt", RISCVCPU, cfg.ext_zcmt, false), + DEFINE_PROP_BOOL("zicond", RISCVCPU, cfg.ext_zicond, false), /* Vendor-specific custom extensions */ DEFINE_PROP_BOOL("xtheadba", RISCVCPU, cfg.ext_xtheadba, false), @@ -1832,7 +1892,6 @@ static Property riscv_cpu_extensions[] = { DEFINE_PROP_BOOL("xventanacondops", RISCVCPU, cfg.ext_XVentanaCondOps, false), /* These are experimental so mark with 'x-' */ - DEFINE_PROP_BOOL("x-zicond", RISCVCPU, cfg.ext_zicond, false), /* ePMP 0.9.3 */ DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false), @@ -1846,6 +1905,16 @@ static Property riscv_cpu_extensions[] = { DEFINE_PROP_BOOL("x-zvfbfmin", RISCVCPU, cfg.ext_zvfbfmin, false), DEFINE_PROP_BOOL("x-zvfbfwma", RISCVCPU, cfg.ext_zvfbfwma, false), + /* Vector cryptography extensions */ + DEFINE_PROP_BOOL("x-zvbb", RISCVCPU, cfg.ext_zvbb, false), + DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false), + DEFINE_PROP_BOOL("x-zvkg", RISCVCPU, cfg.ext_zvkg, false), + DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false), + DEFINE_PROP_BOOL("x-zvknha", RISCVCPU, cfg.ext_zvknha, false), + DEFINE_PROP_BOOL("x-zvknhb", RISCVCPU, cfg.ext_zvknhb, false), + DEFINE_PROP_BOOL("x-zvksed", RISCVCPU, cfg.ext_zvksed, false), + DEFINE_PROP_BOOL("x-zvksh", RISCVCPU, cfg.ext_zvksh, false), + DEFINE_PROP_END_OF_LIST(), }; diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h index 31a8d80990..3d6ffaabc7 100644 --- a/target/riscv/cpu_bits.h +++ b/target/riscv/cpu_bits.h @@ -745,12 +745,12 @@ typedef enum RISCVException { #define MENVCFG_CBIE (3UL << 4) #define MENVCFG_CBCFE BIT(6) #define MENVCFG_CBZE BIT(7) -#define MENVCFG_HADE (1ULL << 61) +#define MENVCFG_ADUE (1ULL << 61) #define MENVCFG_PBMTE (1ULL << 62) #define MENVCFG_STCE (1ULL << 63) /* For RV32 */ -#define MENVCFGH_HADE BIT(29) +#define MENVCFGH_ADUE BIT(29) #define MENVCFGH_PBMTE BIT(30) #define MENVCFGH_STCE BIT(31) @@ -763,12 +763,12 @@ typedef enum RISCVException { #define HENVCFG_CBIE MENVCFG_CBIE #define HENVCFG_CBCFE MENVCFG_CBCFE #define HENVCFG_CBZE MENVCFG_CBZE -#define HENVCFG_HADE MENVCFG_HADE +#define HENVCFG_ADUE MENVCFG_ADUE #define HENVCFG_PBMTE MENVCFG_PBMTE #define HENVCFG_STCE MENVCFG_STCE /* For RV32 */ -#define HENVCFGH_HADE MENVCFGH_HADE +#define HENVCFGH_ADUE MENVCFGH_ADUE #define HENVCFGH_PBMTE MENVCFGH_PBMTE #define HENVCFGH_STCE MENVCFGH_STCE diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h index 2bd9510ba3..0e6a0f245c 100644 --- a/target/riscv/cpu_cfg.h +++ b/target/riscv/cpu_cfg.h @@ -66,6 +66,7 @@ struct RISCVCPUConfig { bool ext_icbom; bool ext_icboz; bool ext_zicond; + bool ext_zihintntl; bool ext_zihintpause; bool ext_smstateen; bool ext_sstc; @@ -85,6 +86,14 @@ struct RISCVCPUConfig { bool ext_zve32f; bool ext_zve64f; bool ext_zve64d; + bool ext_zvbb; + bool ext_zvbc; + bool ext_zvkg; + bool ext_zvkned; + bool ext_zvknha; + bool ext_zvknhb; + bool ext_zvksed; + bool ext_zvksh; bool ext_zmmul; bool ext_zvfbfmin; bool ext_zvfbfwma; diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c index 9f611d89bb..3a02079290 100644 --- a/target/riscv/cpu_helper.c +++ b/target/riscv/cpu_helper.c @@ -861,11 +861,11 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, } bool pbmte = env->menvcfg & MENVCFG_PBMTE; - bool hade = env->menvcfg & MENVCFG_HADE; + bool adue = env->menvcfg & MENVCFG_ADUE; if (first_stage && two_stage && env->virt_enabled) { pbmte = pbmte && (env->henvcfg & HENVCFG_PBMTE); - hade = hade && (env->henvcfg & HENVCFG_HADE); + adue = adue && (env->henvcfg & HENVCFG_ADUE); } int ptshift = (levels - 1) * ptidxbits; @@ -1026,7 +1026,7 @@ restart: /* Page table updates need to be atomic with MTTCG enabled */ if (updated_pte != pte && !is_debug) { - if (!hade) { + if (!adue) { return TRANSLATE_FAIL; } diff --git a/target/riscv/crypto_helper.c b/target/riscv/crypto_helper.c index 99d85a6188..bb084e00ef 100644 --- a/target/riscv/crypto_helper.c +++ b/target/riscv/crypto_helper.c @@ -25,29 +25,6 @@ #include "crypto/aes-round.h" #include "crypto/sm4.h" -#define AES_XTIME(a) \ - ((a << 1) ^ ((a & 0x80) ? 0x1b : 0)) - -#define AES_GFMUL(a, b) (( \ - (((b) & 0x1) ? (a) : 0) ^ \ - (((b) & 0x2) ? AES_XTIME(a) : 0) ^ \ - (((b) & 0x4) ? AES_XTIME(AES_XTIME(a)) : 0) ^ \ - (((b) & 0x8) ? AES_XTIME(AES_XTIME(AES_XTIME(a))) : 0)) & 0xFF) - -static inline uint32_t aes_mixcolumn_byte(uint8_t x, bool fwd) -{ - uint32_t u; - - if (fwd) { - u = (AES_GFMUL(x, 3) << 24) | (x << 16) | (x << 8) | - (AES_GFMUL(x, 2) << 0); - } else { - u = (AES_GFMUL(x, 0xb) << 24) | (AES_GFMUL(x, 0xd) << 16) | - (AES_GFMUL(x, 0x9) << 8) | (AES_GFMUL(x, 0xe) << 0); - } - return u; -} - #define sext32_xlen(x) (target_ulong)(int32_t)(x) static inline target_ulong aes32_operation(target_ulong shamt, @@ -55,23 +32,20 @@ static inline target_ulong aes32_operation(target_ulong shamt, bool enc, bool mix) { uint8_t si = rs2 >> shamt; - uint8_t so; uint32_t mixed; target_ulong res; if (enc) { - so = AES_sbox[si]; if (mix) { - mixed = aes_mixcolumn_byte(so, true); + mixed = be32_to_cpu(AES_Te0[si]); } else { - mixed = so; + mixed = AES_sbox[si]; } } else { - so = AES_isbox[si]; if (mix) { - mixed = aes_mixcolumn_byte(so, false); + mixed = be32_to_cpu(AES_Td0[si]); } else { - mixed = so; + mixed = AES_isbox[si]; } } mixed = rol32(mixed, shamt); @@ -174,24 +148,17 @@ target_ulong HELPER(aes64ks1i)(target_ulong rs1, target_ulong rnum) uint8_t enc_rnum = rnum; uint32_t temp = (RS1 >> 32) & 0xFFFFFFFF; - uint8_t rcon_ = 0; - target_ulong result; + AESState t, rc = {}; if (enc_rnum != 0xA) { temp = ror32(temp, 8); /* Rotate right by 8 */ - rcon_ = round_consts[enc_rnum]; + rc.w[0] = rc.w[1] = round_consts[enc_rnum]; } - temp = ((uint32_t)AES_sbox[(temp >> 24) & 0xFF] << 24) | - ((uint32_t)AES_sbox[(temp >> 16) & 0xFF] << 16) | - ((uint32_t)AES_sbox[(temp >> 8) & 0xFF] << 8) | - ((uint32_t)AES_sbox[(temp >> 0) & 0xFF] << 0); - - temp ^= rcon_; + t.w[0] = t.w[1] = t.w[2] = t.w[3] = temp; + aesenc_SB_SR_AK(&t, &t, &rc, false); - result = ((uint64_t)temp << 32) | temp; - - return result; + return t.d[0]; } target_ulong HELPER(aes64im)(target_ulong rs1) diff --git a/target/riscv/csr.c b/target/riscv/csr.c index ca95ae1527..85a31dc420 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -1684,7 +1684,7 @@ static int rmw_iprio(target_ulong xlen, static int rmw_xireg(CPURISCVState *env, int csrno, target_ulong *val, target_ulong new_val, target_ulong wr_mask) { - bool virt; + bool virt, isel_reserved; uint8_t *iprio; int ret = -EINVAL; target_ulong priv, isel, vgein; @@ -1694,6 +1694,7 @@ static int rmw_xireg(CPURISCVState *env, int csrno, target_ulong *val, /* Decode register details from CSR number */ virt = false; + isel_reserved = false; switch (csrno) { case CSR_MIREG: iprio = env->miprio; @@ -1738,11 +1739,13 @@ static int rmw_xireg(CPURISCVState *env, int csrno, target_ulong *val, riscv_cpu_mxl_bits(env)), val, new_val, wr_mask); } + } else { + isel_reserved = true; } done: if (ret) { - return (env->virt_enabled && virt) ? + return (env->virt_enabled && virt && !isel_reserved) ? RISCV_EXCP_VIRT_INSTRUCTION_FAULT : RISCV_EXCP_ILLEGAL_INST; } return RISCV_EXCP_NONE; @@ -1833,8 +1836,11 @@ static RISCVException write_mcountinhibit(CPURISCVState *env, int csrno, { int cidx; PMUCTRState *counter; + RISCVCPU *cpu = env_archcpu(env); - env->mcountinhibit = val; + /* WARL register - disable unavailable counters; TM bit is always 0 */ + env->mcountinhibit = + val & (cpu->pmu_avail_ctrs | COUNTEREN_CY | COUNTEREN_IR); /* Check if any other counter is also monitoring cycles/instructions */ for (cidx = 0; cidx < RV_MAX_MHPMCOUNTERS; cidx++) { @@ -1857,7 +1863,11 @@ static RISCVException read_mcounteren(CPURISCVState *env, int csrno, static RISCVException write_mcounteren(CPURISCVState *env, int csrno, target_ulong val) { - env->mcounteren = val; + RISCVCPU *cpu = env_archcpu(env); + + /* WARL register - disable unavailable counters */ + env->mcounteren = val & (cpu->pmu_avail_ctrs | COUNTEREN_CY | COUNTEREN_TM | + COUNTEREN_IR); return RISCV_EXCP_NONE; } @@ -1950,7 +1960,7 @@ static RISCVException write_menvcfg(CPURISCVState *env, int csrno, if (riscv_cpu_mxl(env) == MXL_RV64) { mask |= (cfg->ext_svpbmt ? MENVCFG_PBMTE : 0) | (cfg->ext_sstc ? MENVCFG_STCE : 0) | - (cfg->ext_svadu ? MENVCFG_HADE : 0); + (cfg->ext_svadu ? MENVCFG_ADUE : 0); } env->menvcfg = (env->menvcfg & ~mask) | (val & mask); @@ -1970,7 +1980,7 @@ static RISCVException write_menvcfgh(CPURISCVState *env, int csrno, const RISCVCPUConfig *cfg = riscv_cpu_cfg(env); uint64_t mask = (cfg->ext_svpbmt ? MENVCFG_PBMTE : 0) | (cfg->ext_sstc ? MENVCFG_STCE : 0) | - (cfg->ext_svadu ? MENVCFG_HADE : 0); + (cfg->ext_svadu ? MENVCFG_ADUE : 0); uint64_t valh = (uint64_t)val << 32; env->menvcfg = (env->menvcfg & ~mask) | (valh & mask); @@ -2022,7 +2032,7 @@ static RISCVException read_henvcfg(CPURISCVState *env, int csrno, * henvcfg.stce is read_only 0 when menvcfg.stce = 0 * henvcfg.hade is read_only 0 when menvcfg.hade = 0 */ - *val = env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_HADE) | + *val = env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_ADUE) | env->menvcfg); return RISCV_EXCP_NONE; } @@ -2039,7 +2049,7 @@ static RISCVException write_henvcfg(CPURISCVState *env, int csrno, } if (riscv_cpu_mxl(env) == MXL_RV64) { - mask |= env->menvcfg & (HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_HADE); + mask |= env->menvcfg & (HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_ADUE); } env->henvcfg = (env->henvcfg & ~mask) | (val & mask); @@ -2057,7 +2067,7 @@ static RISCVException read_henvcfgh(CPURISCVState *env, int csrno, return ret; } - *val = (env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_HADE) | + *val = (env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_ADUE) | env->menvcfg)) >> 32; return RISCV_EXCP_NONE; } @@ -2066,7 +2076,7 @@ static RISCVException write_henvcfgh(CPURISCVState *env, int csrno, target_ulong val) { uint64_t mask = env->menvcfg & (HENVCFG_PBMTE | HENVCFG_STCE | - HENVCFG_HADE); + HENVCFG_ADUE); uint64_t valh = (uint64_t)val << 32; RISCVException ret; @@ -3907,21 +3917,27 @@ static RISCVException riscv_csrrw_do64(CPURISCVState *env, int csrno, target_ulong write_mask) { RISCVException ret; - target_ulong old_value; + target_ulong old_value = 0; /* execute combined read/write operation if it exists */ if (csr_ops[csrno].op) { return csr_ops[csrno].op(env, csrno, ret_value, new_value, write_mask); } - /* if no accessor exists then return failure */ - if (!csr_ops[csrno].read) { - return RISCV_EXCP_ILLEGAL_INST; - } - /* read old value */ - ret = csr_ops[csrno].read(env, csrno, &old_value); - if (ret != RISCV_EXCP_NONE) { - return ret; + /* + * ret_value == NULL means that rd=x0 and we're coming from helper_csrw() + * and we can't throw side effects caused by CSR reads. + */ + if (ret_value) { + /* if no accessor exists then return failure */ + if (!csr_ops[csrno].read) { + return RISCV_EXCP_ILLEGAL_INST; + } + /* read old value */ + ret = csr_ops[csrno].read(env, csrno, &old_value); + if (ret != RISCV_EXCP_NONE) { + return ret; + } } /* write value if writable and write mask set, otherwise drop writes */ diff --git a/target/riscv/debug.c b/target/riscv/debug.c index 211f5921b6..4945d1a1f2 100644 --- a/target/riscv/debug.c +++ b/target/riscv/debug.c @@ -903,7 +903,17 @@ bool riscv_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp) return false; } -void riscv_trigger_init(CPURISCVState *env) +void riscv_trigger_realize(CPURISCVState *env) +{ + int i; + + for (i = 0; i < RV_MAX_TRIGGERS; i++) { + env->itrigger_timer[i] = timer_new_ns(QEMU_CLOCK_VIRTUAL, + riscv_itrigger_timer_cb, env); + } +} + +void riscv_trigger_reset_hold(CPURISCVState *env) { target_ulong tdata1 = build_tdata1(env, TRIGGER_TYPE_AD_MATCH, 0, 0); int i; @@ -928,7 +938,6 @@ void riscv_trigger_init(CPURISCVState *env) env->tdata3[i] = 0; env->cpu_breakpoint[i] = NULL; env->cpu_watchpoint[i] = NULL; - env->itrigger_timer[i] = timer_new_ns(QEMU_CLOCK_VIRTUAL, - riscv_itrigger_timer_cb, env); + timer_del(env->itrigger_timer[i]); } } diff --git a/target/riscv/debug.h b/target/riscv/debug.h index c471748d5a..5794aa6ee5 100644 --- a/target/riscv/debug.h +++ b/target/riscv/debug.h @@ -143,7 +143,8 @@ void riscv_cpu_debug_excp_handler(CPUState *cs); bool riscv_cpu_debug_check_breakpoint(CPUState *cs); bool riscv_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp); -void riscv_trigger_init(CPURISCVState *env); +void riscv_trigger_realize(CPURISCVState *env); +void riscv_trigger_reset_hold(CPURISCVState *env); bool riscv_itrigger_enabled(CPURISCVState *env); void riscv_itrigger_update_priv(CPURISCVState *env); diff --git a/target/riscv/helper.h b/target/riscv/helper.h index c95adaf08a..8a63523851 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -1182,3 +1182,101 @@ DEF_HELPER_5(vfwcvtbf16_f_f_v, void, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwmaccbf16_vv, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwmaccbf16_vf, void, ptr, ptr, i64, ptr, env, i32) + +/* Vector crypto functions */ +DEF_HELPER_6(vclmul_vv, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vclmul_vx, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vclmulh_vv, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vclmulh_vx, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vror_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vror_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vror_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vror_vv_d, void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_6(vror_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vror_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vror_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vror_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vrol_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrol_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrol_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrol_vv_d, void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_6(vrol_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrol_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrol_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrol_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_5(vrev8_v_b, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vrev8_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vrev8_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vrev8_v_d, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vbrev8_v_b, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vbrev8_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vbrev8_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vbrev8_v_d, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vbrev_v_b, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vbrev_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vbrev_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vbrev_v_d, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_5(vclz_v_b, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vclz_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vclz_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vclz_v_d, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vctz_v_b, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vctz_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vctz_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vctz_v_d, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vcpop_v_b, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vcpop_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vcpop_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vcpop_v_d, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_6(vwsll_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsll_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsll_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsll_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsll_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsll_vx_w, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vandn_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vandn_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vandn_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vandn_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vandn_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vandn_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vandn_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vandn_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_2(egs_check, void, i32, env) + +DEF_HELPER_4(vaesef_vv, void, ptr, ptr, env, i32) +DEF_HELPER_4(vaesef_vs, void, ptr, ptr, env, i32) +DEF_HELPER_4(vaesdf_vv, void, ptr, ptr, env, i32) +DEF_HELPER_4(vaesdf_vs, void, ptr, ptr, env, i32) +DEF_HELPER_4(vaesem_vv, void, ptr, ptr, env, i32) +DEF_HELPER_4(vaesem_vs, void, ptr, ptr, env, i32) +DEF_HELPER_4(vaesdm_vv, void, ptr, ptr, env, i32) +DEF_HELPER_4(vaesdm_vs, void, ptr, ptr, env, i32) +DEF_HELPER_4(vaesz_vs, void, ptr, ptr, env, i32) +DEF_HELPER_5(vaeskf1_vi, void, ptr, ptr, i32, env, i32) +DEF_HELPER_5(vaeskf2_vi, void, ptr, ptr, i32, env, i32) + +DEF_HELPER_5(vsha2ms_vv, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vsha2ch32_vv, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vsha2ch64_vv, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vsha2cl32_vv, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vsha2cl64_vv, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_5(vsm3me_vv, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vsm3c_vi, void, ptr, ptr, i32, env, i32) + +DEF_HELPER_5(vghsh_vv, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_4(vgmul_vv, void, ptr, ptr, env, i32) + +DEF_HELPER_5(vsm4k_vi, void, ptr, ptr, i32, env, i32) +DEF_HELPER_4(vsm4r_vv, void, ptr, ptr, env, i32) +DEF_HELPER_4(vsm4r_vs, void, ptr, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index e341fa9213..33597fe2bb 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -37,6 +37,7 @@ %imm_u 12:s20 !function=ex_shift_12 %imm_bs 30:2 !function=ex_shift_3 %imm_rnum 20:4 +%imm_z6 26:1 15:5 # Argument sets: &empty @@ -74,6 +75,7 @@ @r_rm ....... ..... ..... ... ..... ....... %rs2 %rs1 %rm %rd @r2_rm ....... ..... ..... ... ..... ....... %rs1 %rm %rd @r2 ....... ..... ..... ... ..... ....... &r2 %rs1 %rd +@r2_vm_1 ...... . ..... ..... ... ..... ....... &rmr vm=1 %rs2 %rd @r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd @r2_vm ...... vm:1 ..... ..... ... ..... ....... &rmr %rs2 %rd @r1_vm ...... vm:1 ..... ..... ... ..... ....... %rd @@ -82,6 +84,7 @@ @r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd @r_vm_1 ...... . ..... ..... ... ..... ....... &rmrr vm=1 %rs2 %rs1 %rd @r_vm_0 ...... . ..... ..... ... ..... ....... &rmrr vm=0 %rs2 %rs1 %rd +@r2_zimm6 ..... . vm:1 ..... ..... ... ..... ....... &rmrr %rs2 rs1=%imm_z6 %rd @r2_zimm11 . zimm:11 ..... ... ..... ....... %rs1 %rd @r2_zimm10 .. zimm:10 ..... ... ..... ....... %rs1 %rd @r2_s ....... ..... ..... ... ..... ....... %rs2 %rs1 @@ -946,3 +949,58 @@ vfwcvtbf16_f_f_v 010010 . ..... 01101 001 ..... 1010111 @r2_vm # *** Zvfbfwma Standard Extension *** vfwmaccbf16_vv 111011 . ..... ..... 001 ..... 1010111 @r_vm vfwmaccbf16_vf 111011 . ..... ..... 101 ..... 1010111 @r_vm + +# *** Zvbc vector crypto extension *** +vclmul_vv 001100 . ..... ..... 010 ..... 1010111 @r_vm +vclmul_vx 001100 . ..... ..... 110 ..... 1010111 @r_vm +vclmulh_vv 001101 . ..... ..... 010 ..... 1010111 @r_vm +vclmulh_vx 001101 . ..... ..... 110 ..... 1010111 @r_vm + +# *** Zvbb vector crypto extension *** +vrol_vv 010101 . ..... ..... 000 ..... 1010111 @r_vm +vrol_vx 010101 . ..... ..... 100 ..... 1010111 @r_vm +vror_vv 010100 . ..... ..... 000 ..... 1010111 @r_vm +vror_vx 010100 . ..... ..... 100 ..... 1010111 @r_vm +vror_vi 01010. . ..... ..... 011 ..... 1010111 @r2_zimm6 +vbrev8_v 010010 . ..... 01000 010 ..... 1010111 @r2_vm +vrev8_v 010010 . ..... 01001 010 ..... 1010111 @r2_vm +vandn_vv 000001 . ..... ..... 000 ..... 1010111 @r_vm +vandn_vx 000001 . ..... ..... 100 ..... 1010111 @r_vm +vbrev_v 010010 . ..... 01010 010 ..... 1010111 @r2_vm +vclz_v 010010 . ..... 01100 010 ..... 1010111 @r2_vm +vctz_v 010010 . ..... 01101 010 ..... 1010111 @r2_vm +vcpop_v 010010 . ..... 01110 010 ..... 1010111 @r2_vm +vwsll_vv 110101 . ..... ..... 000 ..... 1010111 @r_vm +vwsll_vx 110101 . ..... ..... 100 ..... 1010111 @r_vm +vwsll_vi 110101 . ..... ..... 011 ..... 1010111 @r_vm + +# *** Zvkned vector crypto extension *** +vaesef_vv 101000 1 ..... 00011 010 ..... 1110111 @r2_vm_1 +vaesef_vs 101001 1 ..... 00011 010 ..... 1110111 @r2_vm_1 +vaesdf_vv 101000 1 ..... 00001 010 ..... 1110111 @r2_vm_1 +vaesdf_vs 101001 1 ..... 00001 010 ..... 1110111 @r2_vm_1 +vaesem_vv 101000 1 ..... 00010 010 ..... 1110111 @r2_vm_1 +vaesem_vs 101001 1 ..... 00010 010 ..... 1110111 @r2_vm_1 +vaesdm_vv 101000 1 ..... 00000 010 ..... 1110111 @r2_vm_1 +vaesdm_vs 101001 1 ..... 00000 010 ..... 1110111 @r2_vm_1 +vaesz_vs 101001 1 ..... 00111 010 ..... 1110111 @r2_vm_1 +vaeskf1_vi 100010 1 ..... ..... 010 ..... 1110111 @r_vm_1 +vaeskf2_vi 101010 1 ..... ..... 010 ..... 1110111 @r_vm_1 + +# *** Zvknh vector crypto extension *** +vsha2ms_vv 101101 1 ..... ..... 010 ..... 1110111 @r_vm_1 +vsha2ch_vv 101110 1 ..... ..... 010 ..... 1110111 @r_vm_1 +vsha2cl_vv 101111 1 ..... ..... 010 ..... 1110111 @r_vm_1 + +# *** Zvksh vector crypto extension *** +vsm3me_vv 100000 1 ..... ..... 010 ..... 1110111 @r_vm_1 +vsm3c_vi 101011 1 ..... ..... 010 ..... 1110111 @r_vm_1 + +# *** Zvkg vector crypto extension *** +vghsh_vv 101100 1 ..... ..... 010 ..... 1110111 @r_vm_1 +vgmul_vv 101000 1 ..... 10001 010 ..... 1110111 @r2_vm_1 + +# *** Zvksed vector crypto extension *** +vsm4k_vi 100001 1 ..... ..... 010 ..... 1110111 @r_vm_1 +vsm4r_vv 101000 1 ..... 10000 010 ..... 1110111 @r2_vm_1 +vsm4r_vs 101001 1 ..... 10000 010 ..... 1110111 @r2_vm_1 diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 6ab63f4442..63404f61fc 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -617,7 +617,6 @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data, TCGv_i32 desc; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); @@ -786,7 +785,6 @@ static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2, TCGv_i32 desc; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); @@ -893,7 +891,6 @@ static bool ldst_index_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, TCGv_i32 desc; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); @@ -1034,7 +1031,6 @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data, TCGv_i32 desc; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); @@ -1187,11 +1183,7 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn, gen_helper_gvec_4_ptr *fn) { TCGLabel *over = gen_new_label(); - if (!opivv_check(s, a)) { - return false; - } - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { @@ -1223,6 +1215,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ }; \ + if (!opivv_check(s, a)) { \ + return false; \ + } \ return do_opivv_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ } @@ -1241,7 +1236,6 @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm, uint32_t data = 0; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); @@ -1282,10 +1276,6 @@ static inline bool do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn, gen_helper_opivx *fn) { - if (!opivx_check(s, a)) { - return false; - } - if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { TCGv_i64 src1 = tcg_temp_new_i64(); @@ -1307,6 +1297,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ }; \ + if (!opivx_check(s, a)) { \ + return false; \ + } \ return do_opivx_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ } @@ -1405,7 +1398,6 @@ static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm, uint32_t data = 0; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); @@ -1439,10 +1431,6 @@ static inline bool do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn, gen_helper_opivx *fn, imm_mode_t imm_mode) { - if (!opivx_check(s, a)) { - return false; - } - if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), extract_imm(s, a->rs1, imm_mode), MAXSZ(s), MAXSZ(s)); @@ -1460,6 +1448,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \ gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \ }; \ + if (!opivx_check(s, a)) { \ + return false; \ + } \ return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF, \ fns[s->sew], IMM_MODE); \ } @@ -1492,7 +1483,6 @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a, if (checkfn(s, a)) { uint32_t data = 0; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); data = FIELD_DP32(data, VDATA, VM, a->vm); @@ -1536,30 +1526,24 @@ static bool opivx_widen_check(DisasContext *s, arg_rmrr *a) vext_check_ds(s, a->rd, a->rs2, a->vm); } -static bool do_opivx_widen(DisasContext *s, arg_rmrr *a, - gen_helper_opivx *fn) -{ - if (opivx_widen_check(s, a)) { - return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); - } - return false; -} - -#define GEN_OPIVX_WIDEN_TRANS(NAME) \ -static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ -{ \ - static gen_helper_opivx * const fns[3] = { \ - gen_helper_##NAME##_b, \ - gen_helper_##NAME##_h, \ - gen_helper_##NAME##_w \ - }; \ - return do_opivx_widen(s, a, fns[s->sew]); \ +#define GEN_OPIVX_WIDEN_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a)) { \ + static gen_helper_opivx * const fns[3] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w \ + }; \ + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s); \ + } \ + return false; \ } -GEN_OPIVX_WIDEN_TRANS(vwaddu_vx) -GEN_OPIVX_WIDEN_TRANS(vwadd_vx) -GEN_OPIVX_WIDEN_TRANS(vwsubu_vx) -GEN_OPIVX_WIDEN_TRANS(vwsub_vx) +GEN_OPIVX_WIDEN_TRANS(vwaddu_vx, opivx_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwadd_vx, opivx_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwsubu_vx, opivx_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwsub_vx, opivx_widen_check) /* WIDEN OPIVV with WIDEN */ static bool opiwv_widen_check(DisasContext *s, arg_rmrr *a) @@ -1575,7 +1559,6 @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a, if (opiwv_widen_check(s, a)) { uint32_t data = 0; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); data = FIELD_DP32(data, VDATA, VM, a->vm); @@ -1643,38 +1626,39 @@ GEN_OPIWX_WIDEN_TRANS(vwadd_wx) GEN_OPIWX_WIDEN_TRANS(vwsubu_wx) GEN_OPIWX_WIDEN_TRANS(vwsub_wx) +static bool opivv_trans(uint32_t vd, uint32_t vs1, uint32_t vs2, uint32_t vm, + gen_helper_gvec_4_ptr *fn, DisasContext *s) +{ + uint32_t data = 0; + TCGLabel *over = gen_new_label(); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); + + data = FIELD_DP32(data, VDATA, VM, vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); + data = FIELD_DP32(data, VDATA, VMA, s->vma); + tcg_gen_gvec_4_ptr(vreg_ofs(s, vd), vreg_ofs(s, 0), vreg_ofs(s, vs1), + vreg_ofs(s, vs2), cpu_env, s->cfg_ptr->vlen / 8, + s->cfg_ptr->vlen / 8, data, fn); + mark_vs_dirty(s); + gen_set_label(over); + return true; +} + /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ /* OPIVV without GVEC IR */ -#define GEN_OPIVV_TRANS(NAME, CHECK) \ -static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ -{ \ - if (CHECK(s, a)) { \ - uint32_t data = 0; \ - static gen_helper_gvec_4_ptr * const fns[4] = { \ - gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ - gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ - }; \ - TCGLabel *over = gen_new_label(); \ - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ - tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ - \ - data = FIELD_DP32(data, VDATA, VM, a->vm); \ - data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ - data = FIELD_DP32(data, VDATA, VTA, s->vta); \ - data = \ - FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\ - data = FIELD_DP32(data, VDATA, VMA, s->vma); \ - tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ - vreg_ofs(s, a->rs1), \ - vreg_ofs(s, a->rs2), cpu_env, \ - s->cfg_ptr->vlen / 8, \ - s->cfg_ptr->vlen / 8, data, \ - fns[s->sew]); \ - mark_vs_dirty(s); \ - gen_set_label(over); \ - return true; \ - } \ - return false; \ +#define GEN_OPIVV_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a)) { \ + static gen_helper_gvec_4_ptr * const fns[4] = { \ + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ + }; \ + return opivv_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\ + } \ + return false; \ } /* @@ -1783,10 +1767,6 @@ static inline bool do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, GVecGen2sFn32 *gvec_fn, gen_helper_opivx *fn) { - if (!opivx_check(s, a)) { - return false; - } - if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { TCGv_i32 src1 = tcg_temp_new_i32(); @@ -1808,7 +1788,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ }; \ - \ + if (!opivx_check(s, a)) { \ + return false; \ + } \ return do_opivx_gvec_shift(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ } @@ -1840,7 +1822,6 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ gen_helper_##NAME##_w, \ }; \ TCGLabel *over = gen_new_label(); \ - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ @@ -2010,9 +1991,9 @@ GEN_OPIVX_TRANS(vrem_vx, opivx_check) GEN_OPIVV_WIDEN_TRANS(vwmul_vv, opivv_widen_check) GEN_OPIVV_WIDEN_TRANS(vwmulu_vv, opivv_widen_check) GEN_OPIVV_WIDEN_TRANS(vwmulsu_vv, opivv_widen_check) -GEN_OPIVX_WIDEN_TRANS(vwmul_vx) -GEN_OPIVX_WIDEN_TRANS(vwmulu_vx) -GEN_OPIVX_WIDEN_TRANS(vwmulsu_vx) +GEN_OPIVX_WIDEN_TRANS(vwmul_vx, opivx_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmulu_vx, opivx_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmulsu_vx, opivx_widen_check) /* Vector Single-Width Integer Multiply-Add Instructions */ GEN_OPIVV_TRANS(vmacc_vv, opivv_check) @@ -2028,10 +2009,10 @@ GEN_OPIVX_TRANS(vnmsub_vx, opivx_check) GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_widen_check) GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_widen_check) GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_widen_check) -GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx) -GEN_OPIVX_WIDEN_TRANS(vwmacc_vx) -GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx) -GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx) +GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx, opivx_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmacc_vx, opivx_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx, opivx_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx, opivx_widen_check) /* Vector Integer Merge and Move Instructions */ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a) @@ -2052,7 +2033,6 @@ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a) gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d, }; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), @@ -2076,7 +2056,6 @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a) vext_check_ss(s, a->rd, 0, 1)) { TCGv s1; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); s1 = get_gpr(s, a->rs1, EXT_SIGN); @@ -2138,7 +2117,6 @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a) gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, }; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); s1 = tcg_constant_i64(simm); @@ -2286,7 +2264,6 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ }; \ TCGLabel *over = gen_new_label(); \ gen_set_rm(s, RISCV_FRM_DYN); \ - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ @@ -2321,7 +2298,6 @@ static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, TCGv_i64 t1; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); @@ -2406,7 +2382,6 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ }; \ TCGLabel *over = gen_new_label(); \ gen_set_rm(s, RISCV_FRM_DYN); \ - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);\ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ @@ -2481,7 +2456,6 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ }; \ TCGLabel *over = gen_new_label(); \ gen_set_rm(s, RISCV_FRM_DYN); \ - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ @@ -2599,7 +2573,6 @@ static bool do_opfv(DisasContext *s, arg_rmr *a, uint32_t data = 0; TCGLabel *over = gen_new_label(); gen_set_rm_chkfrm(s, rm); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); data = FIELD_DP32(data, VDATA, VM, a->vm); @@ -2711,7 +2684,6 @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) gen_helper_vmv_v_x_d, }; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); t1 = tcg_temp_new_i64(); @@ -2790,7 +2762,6 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ }; \ TCGLabel *over = gen_new_label(); \ gen_set_rm_chkfrm(s, FRM); \ - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ @@ -2842,7 +2813,6 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ }; \ TCGLabel *over = gen_new_label(); \ gen_set_rm(s, RISCV_FRM_DYN); \ - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ @@ -2910,7 +2880,6 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ }; \ TCGLabel *over = gen_new_label(); \ gen_set_rm_chkfrm(s, FRM); \ - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ @@ -2960,7 +2929,6 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ }; \ TCGLabel *over = gen_new_label(); \ gen_set_rm_chkfrm(s, FRM); \ - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ @@ -3051,7 +3019,6 @@ static bool trans_##NAME(DisasContext *s, arg_r *a) \ uint32_t data = 0; \ gen_helper_gvec_4_ptr *fn = gen_helper_##NAME; \ TCGLabel *over = gen_new_label(); \ - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -3222,7 +3189,6 @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a) require_vm(a->vm, a->rd)) { uint32_t data = 0; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); data = FIELD_DP32(data, VDATA, VM, a->vm); @@ -3409,7 +3375,6 @@ static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a) TCGv s1; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); t1 = tcg_temp_new_i64(); @@ -3466,8 +3431,7 @@ static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a) TCGv_i64 t1; TCGLabel *over = gen_new_label(); - /* if vl == 0 or vstart >= vl, skip vector register write back */ - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + /* if vstart >= vl, skip vector register write back */ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); /* NaN-box f[rs1] */ @@ -3718,7 +3682,6 @@ static bool int_ext_op(DisasContext *s, arg_rmr *a, uint8_t seq) uint32_t data = 0; gen_helper_gvec_3_ptr *fn; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); static gen_helper_gvec_3_ptr * const fns[6][4] = { diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc new file mode 100644 index 0000000000..c00c70dfc6 --- /dev/null +++ b/target/riscv/insn_trans/trans_rvvk.c.inc @@ -0,0 +1,606 @@ +/* + * RISC-V translation routines for the vector crypto extension. + * + * Copyright (C) 2023 SiFive, Inc. + * Written by Codethink Ltd and SiFive. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Zvbc + */ + +#define GEN_VV_MASKED_TRANS(NAME, CHECK) \ + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ + { \ + if (CHECK(s, a)) { \ + return opivv_trans(a->rd, a->rs1, a->rs2, a->vm, \ + gen_helper_##NAME, s); \ + } \ + return false; \ + } + +static bool vclmul_vv_check(DisasContext *s, arg_rmrr *a) +{ + return opivv_check(s, a) && + s->cfg_ptr->ext_zvbc == true && + s->sew == MO_64; +} + +GEN_VV_MASKED_TRANS(vclmul_vv, vclmul_vv_check) +GEN_VV_MASKED_TRANS(vclmulh_vv, vclmul_vv_check) + +#define GEN_VX_MASKED_TRANS(NAME, CHECK) \ + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ + { \ + if (CHECK(s, a)) { \ + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, \ + gen_helper_##NAME, s); \ + } \ + return false; \ + } + +static bool vclmul_vx_check(DisasContext *s, arg_rmrr *a) +{ + return opivx_check(s, a) && + s->cfg_ptr->ext_zvbc == true && + s->sew == MO_64; +} + +GEN_VX_MASKED_TRANS(vclmul_vx, vclmul_vx_check) +GEN_VX_MASKED_TRANS(vclmulh_vx, vclmul_vx_check) + +/* + * Zvbb + */ + +#define GEN_OPIVI_GVEC_TRANS_CHECK(NAME, IMM_MODE, OPIVX, SUF, CHECK) \ + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ + { \ + if (CHECK(s, a)) { \ + static gen_helper_opivx *const fns[4] = { \ + gen_helper_##OPIVX##_b, \ + gen_helper_##OPIVX##_h, \ + gen_helper_##OPIVX##_w, \ + gen_helper_##OPIVX##_d, \ + }; \ + return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew], \ + IMM_MODE); \ + } \ + return false; \ + } + +#define GEN_OPIVV_GVEC_TRANS_CHECK(NAME, SUF, CHECK) \ + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ + { \ + if (CHECK(s, a)) { \ + static gen_helper_gvec_4_ptr *const fns[4] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + gen_helper_##NAME##_d, \ + }; \ + return do_opivv_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ + } \ + return false; \ + } + +#define GEN_OPIVX_GVEC_SHIFT_TRANS_CHECK(NAME, SUF, CHECK) \ + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ + { \ + if (CHECK(s, a)) { \ + static gen_helper_opivx *const fns[4] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + gen_helper_##NAME##_d, \ + }; \ + return do_opivx_gvec_shift(s, a, tcg_gen_gvec_##SUF, \ + fns[s->sew]); \ + } \ + return false; \ + } + +static bool zvbb_vv_check(DisasContext *s, arg_rmrr *a) +{ + return opivv_check(s, a) && s->cfg_ptr->ext_zvbb == true; +} + +static bool zvbb_vx_check(DisasContext *s, arg_rmrr *a) +{ + return opivx_check(s, a) && s->cfg_ptr->ext_zvbb == true; +} + +/* vrol.v[vx] */ +GEN_OPIVV_GVEC_TRANS_CHECK(vrol_vv, rotlv, zvbb_vv_check) +GEN_OPIVX_GVEC_SHIFT_TRANS_CHECK(vrol_vx, rotls, zvbb_vx_check) + +/* vror.v[vxi] */ +GEN_OPIVV_GVEC_TRANS_CHECK(vror_vv, rotrv, zvbb_vv_check) +GEN_OPIVX_GVEC_SHIFT_TRANS_CHECK(vror_vx, rotrs, zvbb_vx_check) +GEN_OPIVI_GVEC_TRANS_CHECK(vror_vi, IMM_TRUNC_SEW, vror_vx, rotri, zvbb_vx_check) + +#define GEN_OPIVX_GVEC_TRANS_CHECK(NAME, SUF, CHECK) \ + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ + { \ + if (CHECK(s, a)) { \ + static gen_helper_opivx *const fns[4] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + gen_helper_##NAME##_d, \ + }; \ + return do_opivx_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ + } \ + return false; \ + } + +/* vandn.v[vx] */ +GEN_OPIVV_GVEC_TRANS_CHECK(vandn_vv, andc, zvbb_vv_check) +GEN_OPIVX_GVEC_TRANS_CHECK(vandn_vx, andcs, zvbb_vx_check) + +#define GEN_OPIV_TRANS(NAME, CHECK) \ + static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ + { \ + if (CHECK(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_3_ptr *const fns[4] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + gen_helper_##NAME##_d, \ + }; \ + TCGLabel *over = gen_new_label(); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ + \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \ + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs2), cpu_env, \ + s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, \ + data, fns[s->sew]); \ + mark_vs_dirty(s); \ + gen_set_label(over); \ + return true; \ + } \ + return false; \ + } + +static bool zvbb_opiv_check(DisasContext *s, arg_rmr *a) +{ + return s->cfg_ptr->ext_zvbb == true && + require_rvv(s) && + vext_check_isa_ill(s) && + vext_check_ss(s, a->rd, a->rs2, a->vm); +} + +GEN_OPIV_TRANS(vbrev8_v, zvbb_opiv_check) +GEN_OPIV_TRANS(vrev8_v, zvbb_opiv_check) +GEN_OPIV_TRANS(vbrev_v, zvbb_opiv_check) +GEN_OPIV_TRANS(vclz_v, zvbb_opiv_check) +GEN_OPIV_TRANS(vctz_v, zvbb_opiv_check) +GEN_OPIV_TRANS(vcpop_v, zvbb_opiv_check) + +static bool vwsll_vv_check(DisasContext *s, arg_rmrr *a) +{ + return s->cfg_ptr->ext_zvbb && opivv_widen_check(s, a); +} + +static bool vwsll_vx_check(DisasContext *s, arg_rmrr *a) +{ + return s->cfg_ptr->ext_zvbb && opivx_widen_check(s, a); +} + +/* OPIVI without GVEC IR */ +#define GEN_OPIVI_WIDEN_TRANS(NAME, IMM_MODE, OPIVX, CHECK) \ + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ + { \ + if (CHECK(s, a)) { \ + static gen_helper_opivx *const fns[3] = { \ + gen_helper_##OPIVX##_b, \ + gen_helper_##OPIVX##_h, \ + gen_helper_##OPIVX##_w, \ + }; \ + return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s, \ + IMM_MODE); \ + } \ + return false; \ + } + +GEN_OPIVV_WIDEN_TRANS(vwsll_vv, vwsll_vv_check) +GEN_OPIVX_WIDEN_TRANS(vwsll_vx, vwsll_vx_check) +GEN_OPIVI_WIDEN_TRANS(vwsll_vi, IMM_ZX, vwsll_vx, vwsll_vx_check) + +/* + * Zvkned + */ + +#define ZVKNED_EGS 4 + +#define GEN_V_UNMASKED_TRANS(NAME, CHECK, EGS) \ + static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \ + { \ + if (CHECK(s, a)) { \ + TCGv_ptr rd_v, rs2_v; \ + TCGv_i32 desc, egs; \ + uint32_t data = 0; \ + TCGLabel *over = gen_new_label(); \ + \ + if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { \ + /* save opcode for unwinding in case we throw an exception */ \ + decode_save_opc(s); \ + egs = tcg_constant_i32(EGS); \ + gen_helper_egs_check(egs, cpu_env); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ + } \ + \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \ + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ + rd_v = tcg_temp_new_ptr(); \ + rs2_v = tcg_temp_new_ptr(); \ + desc = tcg_constant_i32( \ + simd_desc(s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, data)); \ + tcg_gen_addi_ptr(rd_v, cpu_env, vreg_ofs(s, a->rd)); \ + tcg_gen_addi_ptr(rs2_v, cpu_env, vreg_ofs(s, a->rs2)); \ + gen_helper_##NAME(rd_v, rs2_v, cpu_env, desc); \ + mark_vs_dirty(s); \ + gen_set_label(over); \ + return true; \ + } \ + return false; \ + } + +static bool vaes_check_vv(DisasContext *s, arg_rmr *a) +{ + int egw_bytes = ZVKNED_EGS << s->sew; + return s->cfg_ptr->ext_zvkned == true && + require_rvv(s) && + vext_check_isa_ill(s) && + MAXSZ(s) >= egw_bytes && + require_align(a->rd, s->lmul) && + require_align(a->rs2, s->lmul) && + s->sew == MO_32; +} + +static bool vaes_check_overlap(DisasContext *s, int vd, int vs2) +{ + int8_t op_size = s->lmul <= 0 ? 1 : 1 << s->lmul; + return !is_overlapped(vd, op_size, vs2, 1); +} + +static bool vaes_check_vs(DisasContext *s, arg_rmr *a) +{ + int egw_bytes = ZVKNED_EGS << s->sew; + return vaes_check_overlap(s, a->rd, a->rs2) && + MAXSZ(s) >= egw_bytes && + s->cfg_ptr->ext_zvkned == true && + require_rvv(s) && + vext_check_isa_ill(s) && + require_align(a->rd, s->lmul) && + s->sew == MO_32; +} + +GEN_V_UNMASKED_TRANS(vaesef_vv, vaes_check_vv, ZVKNED_EGS) +GEN_V_UNMASKED_TRANS(vaesef_vs, vaes_check_vs, ZVKNED_EGS) +GEN_V_UNMASKED_TRANS(vaesdf_vv, vaes_check_vv, ZVKNED_EGS) +GEN_V_UNMASKED_TRANS(vaesdf_vs, vaes_check_vs, ZVKNED_EGS) +GEN_V_UNMASKED_TRANS(vaesdm_vv, vaes_check_vv, ZVKNED_EGS) +GEN_V_UNMASKED_TRANS(vaesdm_vs, vaes_check_vs, ZVKNED_EGS) +GEN_V_UNMASKED_TRANS(vaesz_vs, vaes_check_vs, ZVKNED_EGS) +GEN_V_UNMASKED_TRANS(vaesem_vv, vaes_check_vv, ZVKNED_EGS) +GEN_V_UNMASKED_TRANS(vaesem_vs, vaes_check_vs, ZVKNED_EGS) + +#define GEN_VI_UNMASKED_TRANS(NAME, CHECK, EGS) \ + static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \ + { \ + if (CHECK(s, a)) { \ + TCGv_ptr rd_v, rs2_v; \ + TCGv_i32 uimm_v, desc, egs; \ + uint32_t data = 0; \ + TCGLabel *over = gen_new_label(); \ + \ + if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { \ + /* save opcode for unwinding in case we throw an exception */ \ + decode_save_opc(s); \ + egs = tcg_constant_i32(EGS); \ + gen_helper_egs_check(egs, cpu_env); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ + } \ + \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \ + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ + \ + rd_v = tcg_temp_new_ptr(); \ + rs2_v = tcg_temp_new_ptr(); \ + uimm_v = tcg_constant_i32(a->rs1); \ + desc = tcg_constant_i32( \ + simd_desc(s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, data)); \ + tcg_gen_addi_ptr(rd_v, cpu_env, vreg_ofs(s, a->rd)); \ + tcg_gen_addi_ptr(rs2_v, cpu_env, vreg_ofs(s, a->rs2)); \ + gen_helper_##NAME(rd_v, rs2_v, uimm_v, cpu_env, desc); \ + mark_vs_dirty(s); \ + gen_set_label(over); \ + return true; \ + } \ + return false; \ + } + +static bool vaeskf1_check(DisasContext *s, arg_vaeskf1_vi *a) +{ + int egw_bytes = ZVKNED_EGS << s->sew; + return s->cfg_ptr->ext_zvkned == true && + require_rvv(s) && + vext_check_isa_ill(s) && + MAXSZ(s) >= egw_bytes && + s->sew == MO_32 && + require_align(a->rd, s->lmul) && + require_align(a->rs2, s->lmul); +} + +static bool vaeskf2_check(DisasContext *s, arg_vaeskf2_vi *a) +{ + int egw_bytes = ZVKNED_EGS << s->sew; + return s->cfg_ptr->ext_zvkned == true && + require_rvv(s) && + vext_check_isa_ill(s) && + MAXSZ(s) >= egw_bytes && + s->sew == MO_32 && + require_align(a->rd, s->lmul) && + require_align(a->rs2, s->lmul); +} + +GEN_VI_UNMASKED_TRANS(vaeskf1_vi, vaeskf1_check, ZVKNED_EGS) +GEN_VI_UNMASKED_TRANS(vaeskf2_vi, vaeskf2_check, ZVKNED_EGS) + +/* + * Zvknh + */ + +#define ZVKNH_EGS 4 + +#define GEN_VV_UNMASKED_TRANS(NAME, CHECK, EGS) \ + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ + { \ + if (CHECK(s, a)) { \ + uint32_t data = 0; \ + TCGLabel *over = gen_new_label(); \ + TCGv_i32 egs; \ + \ + if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { \ + /* save opcode for unwinding in case we throw an exception */ \ + decode_save_opc(s); \ + egs = tcg_constant_i32(EGS); \ + gen_helper_egs_check(egs, cpu_env); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ + } \ + \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \ + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ + \ + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), \ + vreg_ofs(s, a->rs2), cpu_env, \ + s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, \ + data, gen_helper_##NAME); \ + \ + mark_vs_dirty(s); \ + gen_set_label(over); \ + return true; \ + } \ + return false; \ + } + +static bool vsha_check_sew(DisasContext *s) +{ + return (s->cfg_ptr->ext_zvknha == true && s->sew == MO_32) || + (s->cfg_ptr->ext_zvknhb == true && + (s->sew == MO_32 || s->sew == MO_64)); +} + +static bool vsha_check(DisasContext *s, arg_rmrr *a) +{ + int egw_bytes = ZVKNH_EGS << s->sew; + int mult = 1 << MAX(s->lmul, 0); + return opivv_check(s, a) && + vsha_check_sew(s) && + MAXSZ(s) >= egw_bytes && + !is_overlapped(a->rd, mult, a->rs1, mult) && + !is_overlapped(a->rd, mult, a->rs2, mult) && + s->lmul >= 0; +} + +GEN_VV_UNMASKED_TRANS(vsha2ms_vv, vsha_check, ZVKNH_EGS) + +static bool trans_vsha2cl_vv(DisasContext *s, arg_rmrr *a) +{ + if (vsha_check(s, a)) { + uint32_t data = 0; + TCGLabel *over = gen_new_label(); + TCGv_i32 egs; + + if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { + /* save opcode for unwinding in case we throw an exception */ + decode_save_opc(s); + egs = tcg_constant_i32(ZVKNH_EGS); + gen_helper_egs_check(egs, cpu_env); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); + } + + data = FIELD_DP32(data, VDATA, VM, a->vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); + data = FIELD_DP32(data, VDATA, VMA, s->vma); + + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), + vreg_ofs(s, a->rs2), cpu_env, s->cfg_ptr->vlen / 8, + s->cfg_ptr->vlen / 8, data, + s->sew == MO_32 ? + gen_helper_vsha2cl32_vv : gen_helper_vsha2cl64_vv); + + mark_vs_dirty(s); + gen_set_label(over); + return true; + } + return false; +} + +static bool trans_vsha2ch_vv(DisasContext *s, arg_rmrr *a) +{ + if (vsha_check(s, a)) { + uint32_t data = 0; + TCGLabel *over = gen_new_label(); + TCGv_i32 egs; + + if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { + /* save opcode for unwinding in case we throw an exception */ + decode_save_opc(s); + egs = tcg_constant_i32(ZVKNH_EGS); + gen_helper_egs_check(egs, cpu_env); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); + } + + data = FIELD_DP32(data, VDATA, VM, a->vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); + data = FIELD_DP32(data, VDATA, VMA, s->vma); + + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), + vreg_ofs(s, a->rs2), cpu_env, s->cfg_ptr->vlen / 8, + s->cfg_ptr->vlen / 8, data, + s->sew == MO_32 ? + gen_helper_vsha2ch32_vv : gen_helper_vsha2ch64_vv); + + mark_vs_dirty(s); + gen_set_label(over); + return true; + } + return false; +} + +/* + * Zvksh + */ + +#define ZVKSH_EGS 8 + +static inline bool vsm3_check(DisasContext *s, arg_rmrr *a) +{ + int egw_bytes = ZVKSH_EGS << s->sew; + int mult = 1 << MAX(s->lmul, 0); + return s->cfg_ptr->ext_zvksh == true && + require_rvv(s) && + vext_check_isa_ill(s) && + !is_overlapped(a->rd, mult, a->rs2, mult) && + MAXSZ(s) >= egw_bytes && + s->sew == MO_32; +} + +static inline bool vsm3me_check(DisasContext *s, arg_rmrr *a) +{ + return vsm3_check(s, a) && vext_check_sss(s, a->rd, a->rs1, a->rs2, a->vm); +} + +static inline bool vsm3c_check(DisasContext *s, arg_rmrr *a) +{ + return vsm3_check(s, a) && vext_check_ss(s, a->rd, a->rs2, a->vm); +} + +GEN_VV_UNMASKED_TRANS(vsm3me_vv, vsm3me_check, ZVKSH_EGS) +GEN_VI_UNMASKED_TRANS(vsm3c_vi, vsm3c_check, ZVKSH_EGS) + +/* + * Zvkg + */ + +#define ZVKG_EGS 4 + +static bool vgmul_check(DisasContext *s, arg_rmr *a) +{ + int egw_bytes = ZVKG_EGS << s->sew; + return s->cfg_ptr->ext_zvkg == true && + vext_check_isa_ill(s) && + require_rvv(s) && + MAXSZ(s) >= egw_bytes && + vext_check_ss(s, a->rd, a->rs2, a->vm) && + s->sew == MO_32; +} + +GEN_V_UNMASKED_TRANS(vgmul_vv, vgmul_check, ZVKG_EGS) + +static bool vghsh_check(DisasContext *s, arg_rmrr *a) +{ + int egw_bytes = ZVKG_EGS << s->sew; + return s->cfg_ptr->ext_zvkg == true && + opivv_check(s, a) && + MAXSZ(s) >= egw_bytes && + s->sew == MO_32; +} + +GEN_VV_UNMASKED_TRANS(vghsh_vv, vghsh_check, ZVKG_EGS) + +/* + * Zvksed + */ + +#define ZVKSED_EGS 4 + +static bool zvksed_check(DisasContext *s) +{ + int egw_bytes = ZVKSED_EGS << s->sew; + return s->cfg_ptr->ext_zvksed == true && + require_rvv(s) && + vext_check_isa_ill(s) && + MAXSZ(s) >= egw_bytes && + s->sew == MO_32; +} + +static bool vsm4k_vi_check(DisasContext *s, arg_rmrr *a) +{ + return zvksed_check(s) && + require_align(a->rd, s->lmul) && + require_align(a->rs2, s->lmul); +} + +GEN_VI_UNMASKED_TRANS(vsm4k_vi, vsm4k_vi_check, ZVKSED_EGS) + +static bool vsm4r_vv_check(DisasContext *s, arg_rmr *a) +{ + return zvksed_check(s) && + require_align(a->rd, s->lmul) && + require_align(a->rs2, s->lmul); +} + +GEN_V_UNMASKED_TRANS(vsm4r_vv, vsm4r_vv_check, ZVKSED_EGS) + +static bool vsm4r_vs_check(DisasContext *s, arg_rmr *a) +{ + return zvksed_check(s) && + !is_overlapped(a->rd, 1 << MAX(s->lmul, 0), a->rs2, 1) && + require_align(a->rd, s->lmul); +} + +GEN_V_UNMASKED_TRANS(vsm4r_vs, vsm4r_vs_check, ZVKSED_EGS) diff --git a/target/riscv/insn_trans/trans_rvzfa.c.inc b/target/riscv/insn_trans/trans_rvzfa.c.inc index 2c715af3e5..0fdd2698f6 100644 --- a/target/riscv/insn_trans/trans_rvzfa.c.inc +++ b/target/riscv/insn_trans/trans_rvzfa.c.inc @@ -470,7 +470,7 @@ bool trans_fleq_d(DisasContext *ctx, arg_fleq_d *a) TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); - gen_helper_fltq_s(dest, cpu_env, src1, src2); + gen_helper_fleq_d(dest, cpu_env, src1, src2); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -485,7 +485,7 @@ bool trans_fltq_d(DisasContext *ctx, arg_fltq_d *a) TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); - gen_helper_fltq_s(dest, cpu_env, src1, src2); + gen_helper_fltq_d(dest, cpu_env, src1, src2); gen_set_gpr(ctx, a->rd, dest); return true; } diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c index dbcf26f27d..c01cfb03f4 100644 --- a/target/riscv/kvm.c +++ b/target/riscv/kvm.c @@ -36,6 +36,7 @@ #include "exec/address-spaces.h" #include "hw/boards.h" #include "hw/irq.h" +#include "hw/intc/riscv_imsic.h" #include "qemu/log.h" #include "hw/loader.h" #include "kvm_riscv.h" @@ -43,6 +44,12 @@ #include "chardev/char-fe.h" #include "migration/migration.h" #include "sysemu/runstate.h" +#include "hw/riscv/numa.h" + +void riscv_kvm_aplic_request(void *opaque, int irq, int level) +{ + kvm_set_irq(kvm_state, irq, !!level); +} static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type, uint64_t idx) @@ -926,7 +933,15 @@ int kvm_arch_init(MachineState *ms, KVMState *s) int kvm_arch_irqchip_create(KVMState *s) { - return 0; + if (kvm_kernel_irqchip_split()) { + error_report("-machine kernel_irqchip=split is not supported on RISC-V."); + exit(1); + } + + /* + * We can create the VAIA using the newer device control API. + */ + return kvm_check_extension(s, KVM_CAP_DEVICE_CTRL); } int kvm_arch_process_async_events(CPUState *cs) @@ -1027,6 +1042,190 @@ bool kvm_arch_cpu_check_are_resettable(void) return true; } +static int aia_mode; + +static const char *kvm_aia_mode_str(uint64_t mode) +{ + switch (mode) { + case KVM_DEV_RISCV_AIA_MODE_EMUL: + return "emul"; + case KVM_DEV_RISCV_AIA_MODE_HWACCEL: + return "hwaccel"; + case KVM_DEV_RISCV_AIA_MODE_AUTO: + default: + return "auto"; + }; +} + +static char *riscv_get_kvm_aia(Object *obj, Error **errp) +{ + return g_strdup(kvm_aia_mode_str(aia_mode)); +} + +static void riscv_set_kvm_aia(Object *obj, const char *val, Error **errp) +{ + if (!strcmp(val, "emul")) { + aia_mode = KVM_DEV_RISCV_AIA_MODE_EMUL; + } else if (!strcmp(val, "hwaccel")) { + aia_mode = KVM_DEV_RISCV_AIA_MODE_HWACCEL; + } else if (!strcmp(val, "auto")) { + aia_mode = KVM_DEV_RISCV_AIA_MODE_AUTO; + } else { + error_setg(errp, "Invalid KVM AIA mode"); + error_append_hint(errp, "Valid values are emul, hwaccel, and auto.\n"); + } +} + void kvm_arch_accel_class_init(ObjectClass *oc) { + object_class_property_add_str(oc, "riscv-aia", riscv_get_kvm_aia, + riscv_set_kvm_aia); + object_class_property_set_description(oc, "riscv-aia", + "Set KVM AIA mode. Valid values are " + "emul, hwaccel, and auto. Default " + "is auto."); + object_property_set_default_str(object_class_property_find(oc, "riscv-aia"), + "auto"); +} + +void kvm_riscv_aia_create(MachineState *machine, uint64_t group_shift, + uint64_t aia_irq_num, uint64_t aia_msi_num, + uint64_t aplic_base, uint64_t imsic_base, + uint64_t guest_num) +{ + int ret, i; + int aia_fd = -1; + uint64_t default_aia_mode; + uint64_t socket_count = riscv_socket_count(machine); + uint64_t max_hart_per_socket = 0; + uint64_t socket, base_hart, hart_count, socket_imsic_base, imsic_addr; + uint64_t socket_bits, hart_bits, guest_bits; + + aia_fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_RISCV_AIA, false); + + if (aia_fd < 0) { + error_report("Unable to create in-kernel irqchip"); + exit(1); + } + + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, + KVM_DEV_RISCV_AIA_CONFIG_MODE, + &default_aia_mode, false, NULL); + if (ret < 0) { + error_report("KVM AIA: failed to get current KVM AIA mode"); + exit(1); + } + qemu_log("KVM AIA: default mode is %s\n", + kvm_aia_mode_str(default_aia_mode)); + + if (default_aia_mode != aia_mode) { + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, + KVM_DEV_RISCV_AIA_CONFIG_MODE, + &aia_mode, true, NULL); + if (ret < 0) + warn_report("KVM AIA: failed to set KVM AIA mode"); + else + qemu_log("KVM AIA: set current mode to %s\n", + kvm_aia_mode_str(aia_mode)); + } + + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, + KVM_DEV_RISCV_AIA_CONFIG_SRCS, + &aia_irq_num, true, NULL); + if (ret < 0) { + error_report("KVM AIA: failed to set number of input irq lines"); + exit(1); + } + + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, + KVM_DEV_RISCV_AIA_CONFIG_IDS, + &aia_msi_num, true, NULL); + if (ret < 0) { + error_report("KVM AIA: failed to set number of msi"); + exit(1); + } + + socket_bits = find_last_bit(&socket_count, BITS_PER_LONG) + 1; + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, + KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS, + &socket_bits, true, NULL); + if (ret < 0) { + error_report("KVM AIA: failed to set group_bits"); + exit(1); + } + + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, + KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT, + &group_shift, true, NULL); + if (ret < 0) { + error_report("KVM AIA: failed to set group_shift"); + exit(1); + } + + guest_bits = guest_num == 0 ? 0 : + find_last_bit(&guest_num, BITS_PER_LONG) + 1; + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, + KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS, + &guest_bits, true, NULL); + if (ret < 0) { + error_report("KVM AIA: failed to set guest_bits"); + exit(1); + } + + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_ADDR, + KVM_DEV_RISCV_AIA_ADDR_APLIC, + &aplic_base, true, NULL); + if (ret < 0) { + error_report("KVM AIA: failed to set the base address of APLIC"); + exit(1); + } + + for (socket = 0; socket < socket_count; socket++) { + socket_imsic_base = imsic_base + socket * (1U << group_shift); + hart_count = riscv_socket_hart_count(machine, socket); + base_hart = riscv_socket_first_hartid(machine, socket); + + if (max_hart_per_socket < hart_count) { + max_hart_per_socket = hart_count; + } + + for (i = 0; i < hart_count; i++) { + imsic_addr = socket_imsic_base + i * IMSIC_HART_SIZE(guest_bits); + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_ADDR, + KVM_DEV_RISCV_AIA_ADDR_IMSIC(i + base_hart), + &imsic_addr, true, NULL); + if (ret < 0) { + error_report("KVM AIA: failed to set the IMSIC address for hart %d", i); + exit(1); + } + } + } + + hart_bits = find_last_bit(&max_hart_per_socket, BITS_PER_LONG) + 1; + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, + KVM_DEV_RISCV_AIA_CONFIG_HART_BITS, + &hart_bits, true, NULL); + if (ret < 0) { + error_report("KVM AIA: failed to set hart_bits"); + exit(1); + } + + if (kvm_has_gsi_routing()) { + for (uint64_t idx = 0; idx < aia_irq_num + 1; ++idx) { + /* KVM AIA only has one APLIC instance */ + kvm_irqchip_add_irq_route(kvm_state, idx, 0, idx); + } + kvm_gsi_routing_allowed = true; + kvm_irqchip_commit_routes(kvm_state); + } + + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CTRL, + KVM_DEV_RISCV_AIA_CTRL_INIT, + NULL, true, NULL); + if (ret < 0) { + error_report("KVM AIA: initialized fail"); + exit(1); + } + + kvm_msi_via_irqfd_allowed = kvm_irqfds_enabled(); } diff --git a/target/riscv/kvm_riscv.h b/target/riscv/kvm_riscv.h index e3ba935808..de8c209ebc 100644 --- a/target/riscv/kvm_riscv.h +++ b/target/riscv/kvm_riscv.h @@ -22,5 +22,10 @@ void kvm_riscv_init_user_properties(Object *cpu_obj); void kvm_riscv_reset_vcpu(RISCVCPU *cpu); void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level); +void kvm_riscv_aia_create(MachineState *machine, uint64_t group_shift, + uint64_t aia_irq_num, uint64_t aia_msi_num, + uint64_t aplic_base, uint64_t imsic_base, + uint64_t guest_num); +void riscv_kvm_aplic_request(void *opaque, int irq, int level); #endif diff --git a/target/riscv/meson.build b/target/riscv/meson.build index 7f56c5f88d..660078bda1 100644 --- a/target/riscv/meson.build +++ b/target/riscv/meson.build @@ -16,11 +16,13 @@ riscv_ss.add(files( 'gdbstub.c', 'op_helper.c', 'vector_helper.c', + 'vector_internals.c', 'bitmanip_helper.c', 'translate.c', 'm128_helper.c', 'crypto_helper.c', - 'zce_helper.c' + 'zce_helper.c', + 'vcrypto_helper.c' )) riscv_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c'), if_false: files('kvm-stub.c')) diff --git a/target/riscv/pmp.c b/target/riscv/pmp.c index 9d8db493e6..5e60c26031 100644 --- a/target/riscv/pmp.c +++ b/target/riscv/pmp.c @@ -44,6 +44,10 @@ static inline uint8_t pmp_get_a_field(uint8_t cfg) */ static inline int pmp_is_locked(CPURISCVState *env, uint32_t pmp_index) { + /* mseccfg.RLB is set */ + if (MSECCFG_RLB_ISSET(env)) { + return 0; + } if (env->pmp_state.pmp[pmp_index].cfg_reg & PMP_LOCK) { return 1; diff --git a/target/riscv/translate.c b/target/riscv/translate.c index 697df1be9e..7dbf173adb 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -1094,6 +1094,7 @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc) #include "insn_trans/trans_rvzfa.c.inc" #include "insn_trans/trans_rvzfh.c.inc" #include "insn_trans/trans_rvk.c.inc" +#include "insn_trans/trans_rvvk.c.inc" #include "insn_trans/trans_privileged.c.inc" #include "insn_trans/trans_svinval.c.inc" #include "insn_trans/trans_rvbf16.c.inc" diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c new file mode 100644 index 0000000000..e2d719b13b --- /dev/null +++ b/target/riscv/vcrypto_helper.c @@ -0,0 +1,970 @@ +/* + * RISC-V Vector Crypto Extension Helpers for QEMU. + * + * Copyright (C) 2023 SiFive, Inc. + * Written by Codethink Ltd and SiFive. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qemu/host-utils.h" +#include "qemu/bitops.h" +#include "qemu/bswap.h" +#include "cpu.h" +#include "crypto/aes.h" +#include "crypto/aes-round.h" +#include "crypto/sm4.h" +#include "exec/memop.h" +#include "exec/exec-all.h" +#include "exec/helper-proto.h" +#include "internals.h" +#include "vector_internals.h" + +static uint64_t clmul64(uint64_t y, uint64_t x) +{ + uint64_t result = 0; + for (int j = 63; j >= 0; j--) { + if ((y >> j) & 1) { + result ^= (x << j); + } + } + return result; +} + +static uint64_t clmulh64(uint64_t y, uint64_t x) +{ + uint64_t result = 0; + for (int j = 63; j >= 1; j--) { + if ((y >> j) & 1) { + result ^= (x >> (64 - j)); + } + } + return result; +} + +RVVCALL(OPIVV2, vclmul_vv, OP_UUU_D, H8, H8, H8, clmul64) +GEN_VEXT_VV(vclmul_vv, 8) +RVVCALL(OPIVX2, vclmul_vx, OP_UUU_D, H8, H8, clmul64) +GEN_VEXT_VX(vclmul_vx, 8) +RVVCALL(OPIVV2, vclmulh_vv, OP_UUU_D, H8, H8, H8, clmulh64) +GEN_VEXT_VV(vclmulh_vv, 8) +RVVCALL(OPIVX2, vclmulh_vx, OP_UUU_D, H8, H8, clmulh64) +GEN_VEXT_VX(vclmulh_vx, 8) + +RVVCALL(OPIVV2, vror_vv_b, OP_UUU_B, H1, H1, H1, ror8) +RVVCALL(OPIVV2, vror_vv_h, OP_UUU_H, H2, H2, H2, ror16) +RVVCALL(OPIVV2, vror_vv_w, OP_UUU_W, H4, H4, H4, ror32) +RVVCALL(OPIVV2, vror_vv_d, OP_UUU_D, H8, H8, H8, ror64) +GEN_VEXT_VV(vror_vv_b, 1) +GEN_VEXT_VV(vror_vv_h, 2) +GEN_VEXT_VV(vror_vv_w, 4) +GEN_VEXT_VV(vror_vv_d, 8) + +RVVCALL(OPIVX2, vror_vx_b, OP_UUU_B, H1, H1, ror8) +RVVCALL(OPIVX2, vror_vx_h, OP_UUU_H, H2, H2, ror16) +RVVCALL(OPIVX2, vror_vx_w, OP_UUU_W, H4, H4, ror32) +RVVCALL(OPIVX2, vror_vx_d, OP_UUU_D, H8, H8, ror64) +GEN_VEXT_VX(vror_vx_b, 1) +GEN_VEXT_VX(vror_vx_h, 2) +GEN_VEXT_VX(vror_vx_w, 4) +GEN_VEXT_VX(vror_vx_d, 8) + +RVVCALL(OPIVV2, vrol_vv_b, OP_UUU_B, H1, H1, H1, rol8) +RVVCALL(OPIVV2, vrol_vv_h, OP_UUU_H, H2, H2, H2, rol16) +RVVCALL(OPIVV2, vrol_vv_w, OP_UUU_W, H4, H4, H4, rol32) +RVVCALL(OPIVV2, vrol_vv_d, OP_UUU_D, H8, H8, H8, rol64) +GEN_VEXT_VV(vrol_vv_b, 1) +GEN_VEXT_VV(vrol_vv_h, 2) +GEN_VEXT_VV(vrol_vv_w, 4) +GEN_VEXT_VV(vrol_vv_d, 8) + +RVVCALL(OPIVX2, vrol_vx_b, OP_UUU_B, H1, H1, rol8) +RVVCALL(OPIVX2, vrol_vx_h, OP_UUU_H, H2, H2, rol16) +RVVCALL(OPIVX2, vrol_vx_w, OP_UUU_W, H4, H4, rol32) +RVVCALL(OPIVX2, vrol_vx_d, OP_UUU_D, H8, H8, rol64) +GEN_VEXT_VX(vrol_vx_b, 1) +GEN_VEXT_VX(vrol_vx_h, 2) +GEN_VEXT_VX(vrol_vx_w, 4) +GEN_VEXT_VX(vrol_vx_d, 8) + +static uint64_t brev8(uint64_t val) +{ + val = ((val & 0x5555555555555555ull) << 1) | + ((val & 0xAAAAAAAAAAAAAAAAull) >> 1); + val = ((val & 0x3333333333333333ull) << 2) | + ((val & 0xCCCCCCCCCCCCCCCCull) >> 2); + val = ((val & 0x0F0F0F0F0F0F0F0Full) << 4) | + ((val & 0xF0F0F0F0F0F0F0F0ull) >> 4); + + return val; +} + +RVVCALL(OPIVV1, vbrev8_v_b, OP_UU_B, H1, H1, brev8) +RVVCALL(OPIVV1, vbrev8_v_h, OP_UU_H, H2, H2, brev8) +RVVCALL(OPIVV1, vbrev8_v_w, OP_UU_W, H4, H4, brev8) +RVVCALL(OPIVV1, vbrev8_v_d, OP_UU_D, H8, H8, brev8) +GEN_VEXT_V(vbrev8_v_b, 1) +GEN_VEXT_V(vbrev8_v_h, 2) +GEN_VEXT_V(vbrev8_v_w, 4) +GEN_VEXT_V(vbrev8_v_d, 8) + +#define DO_IDENTITY(a) (a) +RVVCALL(OPIVV1, vrev8_v_b, OP_UU_B, H1, H1, DO_IDENTITY) +RVVCALL(OPIVV1, vrev8_v_h, OP_UU_H, H2, H2, bswap16) +RVVCALL(OPIVV1, vrev8_v_w, OP_UU_W, H4, H4, bswap32) +RVVCALL(OPIVV1, vrev8_v_d, OP_UU_D, H8, H8, bswap64) +GEN_VEXT_V(vrev8_v_b, 1) +GEN_VEXT_V(vrev8_v_h, 2) +GEN_VEXT_V(vrev8_v_w, 4) +GEN_VEXT_V(vrev8_v_d, 8) + +#define DO_ANDN(a, b) ((a) & ~(b)) +RVVCALL(OPIVV2, vandn_vv_b, OP_UUU_B, H1, H1, H1, DO_ANDN) +RVVCALL(OPIVV2, vandn_vv_h, OP_UUU_H, H2, H2, H2, DO_ANDN) +RVVCALL(OPIVV2, vandn_vv_w, OP_UUU_W, H4, H4, H4, DO_ANDN) +RVVCALL(OPIVV2, vandn_vv_d, OP_UUU_D, H8, H8, H8, DO_ANDN) +GEN_VEXT_VV(vandn_vv_b, 1) +GEN_VEXT_VV(vandn_vv_h, 2) +GEN_VEXT_VV(vandn_vv_w, 4) +GEN_VEXT_VV(vandn_vv_d, 8) + +RVVCALL(OPIVX2, vandn_vx_b, OP_UUU_B, H1, H1, DO_ANDN) +RVVCALL(OPIVX2, vandn_vx_h, OP_UUU_H, H2, H2, DO_ANDN) +RVVCALL(OPIVX2, vandn_vx_w, OP_UUU_W, H4, H4, DO_ANDN) +RVVCALL(OPIVX2, vandn_vx_d, OP_UUU_D, H8, H8, DO_ANDN) +GEN_VEXT_VX(vandn_vx_b, 1) +GEN_VEXT_VX(vandn_vx_h, 2) +GEN_VEXT_VX(vandn_vx_w, 4) +GEN_VEXT_VX(vandn_vx_d, 8) + +RVVCALL(OPIVV1, vbrev_v_b, OP_UU_B, H1, H1, revbit8) +RVVCALL(OPIVV1, vbrev_v_h, OP_UU_H, H2, H2, revbit16) +RVVCALL(OPIVV1, vbrev_v_w, OP_UU_W, H4, H4, revbit32) +RVVCALL(OPIVV1, vbrev_v_d, OP_UU_D, H8, H8, revbit64) +GEN_VEXT_V(vbrev_v_b, 1) +GEN_VEXT_V(vbrev_v_h, 2) +GEN_VEXT_V(vbrev_v_w, 4) +GEN_VEXT_V(vbrev_v_d, 8) + +RVVCALL(OPIVV1, vclz_v_b, OP_UU_B, H1, H1, clz8) +RVVCALL(OPIVV1, vclz_v_h, OP_UU_H, H2, H2, clz16) +RVVCALL(OPIVV1, vclz_v_w, OP_UU_W, H4, H4, clz32) +RVVCALL(OPIVV1, vclz_v_d, OP_UU_D, H8, H8, clz64) +GEN_VEXT_V(vclz_v_b, 1) +GEN_VEXT_V(vclz_v_h, 2) +GEN_VEXT_V(vclz_v_w, 4) +GEN_VEXT_V(vclz_v_d, 8) + +RVVCALL(OPIVV1, vctz_v_b, OP_UU_B, H1, H1, ctz8) +RVVCALL(OPIVV1, vctz_v_h, OP_UU_H, H2, H2, ctz16) +RVVCALL(OPIVV1, vctz_v_w, OP_UU_W, H4, H4, ctz32) +RVVCALL(OPIVV1, vctz_v_d, OP_UU_D, H8, H8, ctz64) +GEN_VEXT_V(vctz_v_b, 1) +GEN_VEXT_V(vctz_v_h, 2) +GEN_VEXT_V(vctz_v_w, 4) +GEN_VEXT_V(vctz_v_d, 8) + +RVVCALL(OPIVV1, vcpop_v_b, OP_UU_B, H1, H1, ctpop8) +RVVCALL(OPIVV1, vcpop_v_h, OP_UU_H, H2, H2, ctpop16) +RVVCALL(OPIVV1, vcpop_v_w, OP_UU_W, H4, H4, ctpop32) +RVVCALL(OPIVV1, vcpop_v_d, OP_UU_D, H8, H8, ctpop64) +GEN_VEXT_V(vcpop_v_b, 1) +GEN_VEXT_V(vcpop_v_h, 2) +GEN_VEXT_V(vcpop_v_w, 4) +GEN_VEXT_V(vcpop_v_d, 8) + +#define DO_SLL(N, M) (N << (M & (sizeof(N) * 8 - 1))) +RVVCALL(OPIVV2, vwsll_vv_b, WOP_UUU_B, H2, H1, H1, DO_SLL) +RVVCALL(OPIVV2, vwsll_vv_h, WOP_UUU_H, H4, H2, H2, DO_SLL) +RVVCALL(OPIVV2, vwsll_vv_w, WOP_UUU_W, H8, H4, H4, DO_SLL) +GEN_VEXT_VV(vwsll_vv_b, 2) +GEN_VEXT_VV(vwsll_vv_h, 4) +GEN_VEXT_VV(vwsll_vv_w, 8) + +RVVCALL(OPIVX2, vwsll_vx_b, WOP_UUU_B, H2, H1, DO_SLL) +RVVCALL(OPIVX2, vwsll_vx_h, WOP_UUU_H, H4, H2, DO_SLL) +RVVCALL(OPIVX2, vwsll_vx_w, WOP_UUU_W, H8, H4, DO_SLL) +GEN_VEXT_VX(vwsll_vx_b, 2) +GEN_VEXT_VX(vwsll_vx_h, 4) +GEN_VEXT_VX(vwsll_vx_w, 8) + +void HELPER(egs_check)(uint32_t egs, CPURISCVState *env) +{ + uint32_t vl = env->vl; + uint32_t vstart = env->vstart; + + if (vl % egs != 0 || vstart % egs != 0) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + } +} + +static inline void xor_round_key(AESState *round_state, AESState *round_key) +{ + round_state->v = round_state->v ^ round_key->v; +} + +#define GEN_ZVKNED_HELPER_VV(NAME, ...) \ + void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \ + uint32_t desc) \ + { \ + uint32_t vl = env->vl; \ + uint32_t total_elems = vext_get_total_elems(env, desc, 4); \ + uint32_t vta = vext_vta(desc); \ + \ + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { \ + AESState round_key; \ + round_key.d[0] = *((uint64_t *)vs2 + H8(i * 2 + 0)); \ + round_key.d[1] = *((uint64_t *)vs2 + H8(i * 2 + 1)); \ + AESState round_state; \ + round_state.d[0] = *((uint64_t *)vd + H8(i * 2 + 0)); \ + round_state.d[1] = *((uint64_t *)vd + H8(i * 2 + 1)); \ + __VA_ARGS__; \ + *((uint64_t *)vd + H8(i * 2 + 0)) = round_state.d[0]; \ + *((uint64_t *)vd + H8(i * 2 + 1)) = round_state.d[1]; \ + } \ + env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4); \ + } + +#define GEN_ZVKNED_HELPER_VS(NAME, ...) \ + void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \ + uint32_t desc) \ + { \ + uint32_t vl = env->vl; \ + uint32_t total_elems = vext_get_total_elems(env, desc, 4); \ + uint32_t vta = vext_vta(desc); \ + \ + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { \ + AESState round_key; \ + round_key.d[0] = *((uint64_t *)vs2 + H8(0)); \ + round_key.d[1] = *((uint64_t *)vs2 + H8(1)); \ + AESState round_state; \ + round_state.d[0] = *((uint64_t *)vd + H8(i * 2 + 0)); \ + round_state.d[1] = *((uint64_t *)vd + H8(i * 2 + 1)); \ + __VA_ARGS__; \ + *((uint64_t *)vd + H8(i * 2 + 0)) = round_state.d[0]; \ + *((uint64_t *)vd + H8(i * 2 + 1)) = round_state.d[1]; \ + } \ + env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4); \ + } + +GEN_ZVKNED_HELPER_VV(vaesef_vv, aesenc_SB_SR_AK(&round_state, + &round_state, + &round_key, + false);) +GEN_ZVKNED_HELPER_VS(vaesef_vs, aesenc_SB_SR_AK(&round_state, + &round_state, + &round_key, + false);) +GEN_ZVKNED_HELPER_VV(vaesdf_vv, aesdec_ISB_ISR_AK(&round_state, + &round_state, + &round_key, + false);) +GEN_ZVKNED_HELPER_VS(vaesdf_vs, aesdec_ISB_ISR_AK(&round_state, + &round_state, + &round_key, + false);) +GEN_ZVKNED_HELPER_VV(vaesem_vv, aesenc_SB_SR_MC_AK(&round_state, + &round_state, + &round_key, + false);) +GEN_ZVKNED_HELPER_VS(vaesem_vs, aesenc_SB_SR_MC_AK(&round_state, + &round_state, + &round_key, + false);) +GEN_ZVKNED_HELPER_VV(vaesdm_vv, aesdec_ISB_ISR_AK_IMC(&round_state, + &round_state, + &round_key, + false);) +GEN_ZVKNED_HELPER_VS(vaesdm_vs, aesdec_ISB_ISR_AK_IMC(&round_state, + &round_state, + &round_key, + false);) +GEN_ZVKNED_HELPER_VS(vaesz_vs, xor_round_key(&round_state, &round_key);) + +void HELPER(vaeskf1_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm, + CPURISCVState *env, uint32_t desc) +{ + uint32_t *vd = vd_vptr; + uint32_t *vs2 = vs2_vptr; + uint32_t vl = env->vl; + uint32_t total_elems = vext_get_total_elems(env, desc, 4); + uint32_t vta = vext_vta(desc); + + uimm &= 0b1111; + if (uimm > 10 || uimm == 0) { + uimm ^= 0b1000; + } + + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { + uint32_t rk[8], tmp; + static const uint32_t rcon[] = { + 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, + 0x00000020, 0x00000040, 0x00000080, 0x0000001B, 0x00000036, + }; + + rk[0] = vs2[i * 4 + H4(0)]; + rk[1] = vs2[i * 4 + H4(1)]; + rk[2] = vs2[i * 4 + H4(2)]; + rk[3] = vs2[i * 4 + H4(3)]; + tmp = ror32(rk[3], 8); + + rk[4] = rk[0] ^ (((uint32_t)AES_sbox[(tmp >> 24) & 0xff] << 24) | + ((uint32_t)AES_sbox[(tmp >> 16) & 0xff] << 16) | + ((uint32_t)AES_sbox[(tmp >> 8) & 0xff] << 8) | + ((uint32_t)AES_sbox[(tmp >> 0) & 0xff] << 0)) + ^ rcon[uimm - 1]; + rk[5] = rk[1] ^ rk[4]; + rk[6] = rk[2] ^ rk[5]; + rk[7] = rk[3] ^ rk[6]; + + vd[i * 4 + H4(0)] = rk[4]; + vd[i * 4 + H4(1)] = rk[5]; + vd[i * 4 + H4(2)] = rk[6]; + vd[i * 4 + H4(3)] = rk[7]; + } + env->vstart = 0; + /* set tail elements to 1s */ + vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4); +} + +void HELPER(vaeskf2_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm, + CPURISCVState *env, uint32_t desc) +{ + uint32_t *vd = vd_vptr; + uint32_t *vs2 = vs2_vptr; + uint32_t vl = env->vl; + uint32_t total_elems = vext_get_total_elems(env, desc, 4); + uint32_t vta = vext_vta(desc); + + uimm &= 0b1111; + if (uimm > 14 || uimm < 2) { + uimm ^= 0b1000; + } + + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { + uint32_t rk[12], tmp; + static const uint32_t rcon[] = { + 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, + 0x00000020, 0x00000040, 0x00000080, 0x0000001B, 0x00000036, + }; + + rk[0] = vd[i * 4 + H4(0)]; + rk[1] = vd[i * 4 + H4(1)]; + rk[2] = vd[i * 4 + H4(2)]; + rk[3] = vd[i * 4 + H4(3)]; + rk[4] = vs2[i * 4 + H4(0)]; + rk[5] = vs2[i * 4 + H4(1)]; + rk[6] = vs2[i * 4 + H4(2)]; + rk[7] = vs2[i * 4 + H4(3)]; + + if (uimm % 2 == 0) { + tmp = ror32(rk[7], 8); + rk[8] = rk[0] ^ (((uint32_t)AES_sbox[(tmp >> 24) & 0xff] << 24) | + ((uint32_t)AES_sbox[(tmp >> 16) & 0xff] << 16) | + ((uint32_t)AES_sbox[(tmp >> 8) & 0xff] << 8) | + ((uint32_t)AES_sbox[(tmp >> 0) & 0xff] << 0)) + ^ rcon[(uimm - 1) / 2]; + } else { + rk[8] = rk[0] ^ (((uint32_t)AES_sbox[(rk[7] >> 24) & 0xff] << 24) | + ((uint32_t)AES_sbox[(rk[7] >> 16) & 0xff] << 16) | + ((uint32_t)AES_sbox[(rk[7] >> 8) & 0xff] << 8) | + ((uint32_t)AES_sbox[(rk[7] >> 0) & 0xff] << 0)); + } + rk[9] = rk[1] ^ rk[8]; + rk[10] = rk[2] ^ rk[9]; + rk[11] = rk[3] ^ rk[10]; + + vd[i * 4 + H4(0)] = rk[8]; + vd[i * 4 + H4(1)] = rk[9]; + vd[i * 4 + H4(2)] = rk[10]; + vd[i * 4 + H4(3)] = rk[11]; + } + env->vstart = 0; + /* set tail elements to 1s */ + vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4); +} + +static inline uint32_t sig0_sha256(uint32_t x) +{ + return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3); +} + +static inline uint32_t sig1_sha256(uint32_t x) +{ + return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10); +} + +static inline uint64_t sig0_sha512(uint64_t x) +{ + return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7); +} + +static inline uint64_t sig1_sha512(uint64_t x) +{ + return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6); +} + +static inline void vsha2ms_e32(uint32_t *vd, uint32_t *vs1, uint32_t *vs2) +{ + uint32_t res[4]; + res[0] = sig1_sha256(vs1[H4(2)]) + vs2[H4(1)] + sig0_sha256(vd[H4(1)]) + + vd[H4(0)]; + res[1] = sig1_sha256(vs1[H4(3)]) + vs2[H4(2)] + sig0_sha256(vd[H4(2)]) + + vd[H4(1)]; + res[2] = + sig1_sha256(res[0]) + vs2[H4(3)] + sig0_sha256(vd[H4(3)]) + vd[H4(2)]; + res[3] = + sig1_sha256(res[1]) + vs1[H4(0)] + sig0_sha256(vs2[H4(0)]) + vd[H4(3)]; + vd[H4(3)] = res[3]; + vd[H4(2)] = res[2]; + vd[H4(1)] = res[1]; + vd[H4(0)] = res[0]; +} + +static inline void vsha2ms_e64(uint64_t *vd, uint64_t *vs1, uint64_t *vs2) +{ + uint64_t res[4]; + res[0] = sig1_sha512(vs1[2]) + vs2[1] + sig0_sha512(vd[1]) + vd[0]; + res[1] = sig1_sha512(vs1[3]) + vs2[2] + sig0_sha512(vd[2]) + vd[1]; + res[2] = sig1_sha512(res[0]) + vs2[3] + sig0_sha512(vd[3]) + vd[2]; + res[3] = sig1_sha512(res[1]) + vs1[0] + sig0_sha512(vs2[0]) + vd[3]; + vd[3] = res[3]; + vd[2] = res[2]; + vd[1] = res[1]; + vd[0] = res[0]; +} + +void HELPER(vsha2ms_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, + uint32_t desc) +{ + uint32_t sew = FIELD_EX64(env->vtype, VTYPE, VSEW); + uint32_t esz = sew == MO_32 ? 4 : 8; + uint32_t total_elems; + uint32_t vta = vext_vta(desc); + + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { + if (sew == MO_32) { + vsha2ms_e32(((uint32_t *)vd) + i * 4, ((uint32_t *)vs1) + i * 4, + ((uint32_t *)vs2) + i * 4); + } else { + /* If not 32 then SEW should be 64 */ + vsha2ms_e64(((uint64_t *)vd) + i * 4, ((uint64_t *)vs1) + i * 4, + ((uint64_t *)vs2) + i * 4); + } + } + /* set tail elements to 1s */ + total_elems = vext_get_total_elems(env, desc, esz); + vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz); + env->vstart = 0; +} + +static inline uint64_t sum0_64(uint64_t x) +{ + return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39); +} + +static inline uint32_t sum0_32(uint32_t x) +{ + return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22); +} + +static inline uint64_t sum1_64(uint64_t x) +{ + return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41); +} + +static inline uint32_t sum1_32(uint32_t x) +{ + return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25); +} + +#define ch(x, y, z) ((x & y) ^ ((~x) & z)) + +#define maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z)) + +static void vsha2c_64(uint64_t *vs2, uint64_t *vd, uint64_t *vs1) +{ + uint64_t a = vs2[3], b = vs2[2], e = vs2[1], f = vs2[0]; + uint64_t c = vd[3], d = vd[2], g = vd[1], h = vd[0]; + uint64_t W0 = vs1[0], W1 = vs1[1]; + uint64_t T1 = h + sum1_64(e) + ch(e, f, g) + W0; + uint64_t T2 = sum0_64(a) + maj(a, b, c); + + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + + T1 = h + sum1_64(e) + ch(e, f, g) + W1; + T2 = sum0_64(a) + maj(a, b, c); + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + + vd[0] = f; + vd[1] = e; + vd[2] = b; + vd[3] = a; +} + +static void vsha2c_32(uint32_t *vs2, uint32_t *vd, uint32_t *vs1) +{ + uint32_t a = vs2[H4(3)], b = vs2[H4(2)], e = vs2[H4(1)], f = vs2[H4(0)]; + uint32_t c = vd[H4(3)], d = vd[H4(2)], g = vd[H4(1)], h = vd[H4(0)]; + uint32_t W0 = vs1[H4(0)], W1 = vs1[H4(1)]; + uint32_t T1 = h + sum1_32(e) + ch(e, f, g) + W0; + uint32_t T2 = sum0_32(a) + maj(a, b, c); + + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + + T1 = h + sum1_32(e) + ch(e, f, g) + W1; + T2 = sum0_32(a) + maj(a, b, c); + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + + vd[H4(0)] = f; + vd[H4(1)] = e; + vd[H4(2)] = b; + vd[H4(3)] = a; +} + +void HELPER(vsha2ch32_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, + uint32_t desc) +{ + const uint32_t esz = 4; + uint32_t total_elems; + uint32_t vta = vext_vta(desc); + + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { + vsha2c_32(((uint32_t *)vs2) + 4 * i, ((uint32_t *)vd) + 4 * i, + ((uint32_t *)vs1) + 4 * i + 2); + } + + /* set tail elements to 1s */ + total_elems = vext_get_total_elems(env, desc, esz); + vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz); + env->vstart = 0; +} + +void HELPER(vsha2ch64_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, + uint32_t desc) +{ + const uint32_t esz = 8; + uint32_t total_elems; + uint32_t vta = vext_vta(desc); + + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { + vsha2c_64(((uint64_t *)vs2) + 4 * i, ((uint64_t *)vd) + 4 * i, + ((uint64_t *)vs1) + 4 * i + 2); + } + + /* set tail elements to 1s */ + total_elems = vext_get_total_elems(env, desc, esz); + vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz); + env->vstart = 0; +} + +void HELPER(vsha2cl32_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, + uint32_t desc) +{ + const uint32_t esz = 4; + uint32_t total_elems; + uint32_t vta = vext_vta(desc); + + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { + vsha2c_32(((uint32_t *)vs2) + 4 * i, ((uint32_t *)vd) + 4 * i, + (((uint32_t *)vs1) + 4 * i)); + } + + /* set tail elements to 1s */ + total_elems = vext_get_total_elems(env, desc, esz); + vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz); + env->vstart = 0; +} + +void HELPER(vsha2cl64_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, + uint32_t desc) +{ + uint32_t esz = 8; + uint32_t total_elems; + uint32_t vta = vext_vta(desc); + + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { + vsha2c_64(((uint64_t *)vs2) + 4 * i, ((uint64_t *)vd) + 4 * i, + (((uint64_t *)vs1) + 4 * i)); + } + + /* set tail elements to 1s */ + total_elems = vext_get_total_elems(env, desc, esz); + vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz); + env->vstart = 0; +} + +static inline uint32_t p1(uint32_t x) +{ + return x ^ rol32(x, 15) ^ rol32(x, 23); +} + +static inline uint32_t zvksh_w(uint32_t m16, uint32_t m9, uint32_t m3, + uint32_t m13, uint32_t m6) +{ + return p1(m16 ^ m9 ^ rol32(m3, 15)) ^ rol32(m13, 7) ^ m6; +} + +void HELPER(vsm3me_vv)(void *vd_vptr, void *vs1_vptr, void *vs2_vptr, + CPURISCVState *env, uint32_t desc) +{ + uint32_t esz = memop_size(FIELD_EX64(env->vtype, VTYPE, VSEW)); + uint32_t total_elems = vext_get_total_elems(env, desc, esz); + uint32_t vta = vext_vta(desc); + uint32_t *vd = vd_vptr; + uint32_t *vs1 = vs1_vptr; + uint32_t *vs2 = vs2_vptr; + + for (int i = env->vstart / 8; i < env->vl / 8; i++) { + uint32_t w[24]; + for (int j = 0; j < 8; j++) { + w[j] = bswap32(vs1[H4((i * 8) + j)]); + w[j + 8] = bswap32(vs2[H4((i * 8) + j)]); + } + for (int j = 0; j < 8; j++) { + w[j + 16] = + zvksh_w(w[j], w[j + 7], w[j + 13], w[j + 3], w[j + 10]); + } + for (int j = 0; j < 8; j++) { + vd[(i * 8) + j] = bswap32(w[H4(j + 16)]); + } + } + vext_set_elems_1s(vd_vptr, vta, env->vl * esz, total_elems * esz); + env->vstart = 0; +} + +static inline uint32_t ff1(uint32_t x, uint32_t y, uint32_t z) +{ + return x ^ y ^ z; +} + +static inline uint32_t ff2(uint32_t x, uint32_t y, uint32_t z) +{ + return (x & y) | (x & z) | (y & z); +} + +static inline uint32_t ff_j(uint32_t x, uint32_t y, uint32_t z, uint32_t j) +{ + return (j <= 15) ? ff1(x, y, z) : ff2(x, y, z); +} + +static inline uint32_t gg1(uint32_t x, uint32_t y, uint32_t z) +{ + return x ^ y ^ z; +} + +static inline uint32_t gg2(uint32_t x, uint32_t y, uint32_t z) +{ + return (x & y) | (~x & z); +} + +static inline uint32_t gg_j(uint32_t x, uint32_t y, uint32_t z, uint32_t j) +{ + return (j <= 15) ? gg1(x, y, z) : gg2(x, y, z); +} + +static inline uint32_t t_j(uint32_t j) +{ + return (j <= 15) ? 0x79cc4519 : 0x7a879d8a; +} + +static inline uint32_t p_0(uint32_t x) +{ + return x ^ rol32(x, 9) ^ rol32(x, 17); +} + +static void sm3c(uint32_t *vd, uint32_t *vs1, uint32_t *vs2, uint32_t uimm) +{ + uint32_t x0, x1; + uint32_t j; + uint32_t ss1, ss2, tt1, tt2; + x0 = vs2[0] ^ vs2[4]; + x1 = vs2[1] ^ vs2[5]; + j = 2 * uimm; + ss1 = rol32(rol32(vs1[0], 12) + vs1[4] + rol32(t_j(j), j % 32), 7); + ss2 = ss1 ^ rol32(vs1[0], 12); + tt1 = ff_j(vs1[0], vs1[1], vs1[2], j) + vs1[3] + ss2 + x0; + tt2 = gg_j(vs1[4], vs1[5], vs1[6], j) + vs1[7] + ss1 + vs2[0]; + vs1[3] = vs1[2]; + vd[3] = rol32(vs1[1], 9); + vs1[1] = vs1[0]; + vd[1] = tt1; + vs1[7] = vs1[6]; + vd[7] = rol32(vs1[5], 19); + vs1[5] = vs1[4]; + vd[5] = p_0(tt2); + j = 2 * uimm + 1; + ss1 = rol32(rol32(vd[1], 12) + vd[5] + rol32(t_j(j), j % 32), 7); + ss2 = ss1 ^ rol32(vd[1], 12); + tt1 = ff_j(vd[1], vs1[1], vd[3], j) + vs1[3] + ss2 + x1; + tt2 = gg_j(vd[5], vs1[5], vd[7], j) + vs1[7] + ss1 + vs2[1]; + vd[2] = rol32(vs1[1], 9); + vd[0] = tt1; + vd[6] = rol32(vs1[5], 19); + vd[4] = p_0(tt2); +} + +void HELPER(vsm3c_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm, + CPURISCVState *env, uint32_t desc) +{ + uint32_t esz = memop_size(FIELD_EX64(env->vtype, VTYPE, VSEW)); + uint32_t total_elems = vext_get_total_elems(env, desc, esz); + uint32_t vta = vext_vta(desc); + uint32_t *vd = vd_vptr; + uint32_t *vs2 = vs2_vptr; + uint32_t v1[8], v2[8], v3[8]; + + for (int i = env->vstart / 8; i < env->vl / 8; i++) { + for (int k = 0; k < 8; k++) { + v2[k] = bswap32(vd[H4(i * 8 + k)]); + v3[k] = bswap32(vs2[H4(i * 8 + k)]); + } + sm3c(v1, v2, v3, uimm); + for (int k = 0; k < 8; k++) { + vd[i * 8 + k] = bswap32(v1[H4(k)]); + } + } + vext_set_elems_1s(vd_vptr, vta, env->vl * esz, total_elems * esz); + env->vstart = 0; +} + +void HELPER(vghsh_vv)(void *vd_vptr, void *vs1_vptr, void *vs2_vptr, + CPURISCVState *env, uint32_t desc) +{ + uint64_t *vd = vd_vptr; + uint64_t *vs1 = vs1_vptr; + uint64_t *vs2 = vs2_vptr; + uint32_t vta = vext_vta(desc); + uint32_t total_elems = vext_get_total_elems(env, desc, 4); + + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { + uint64_t Y[2] = {vd[i * 2 + 0], vd[i * 2 + 1]}; + uint64_t H[2] = {brev8(vs2[i * 2 + 0]), brev8(vs2[i * 2 + 1])}; + uint64_t X[2] = {vs1[i * 2 + 0], vs1[i * 2 + 1]}; + uint64_t Z[2] = {0, 0}; + + uint64_t S[2] = {brev8(Y[0] ^ X[0]), brev8(Y[1] ^ X[1])}; + + for (int j = 0; j < 128; j++) { + if ((S[j / 64] >> (j % 64)) & 1) { + Z[0] ^= H[0]; + Z[1] ^= H[1]; + } + bool reduce = ((H[1] >> 63) & 1); + H[1] = H[1] << 1 | H[0] >> 63; + H[0] = H[0] << 1; + if (reduce) { + H[0] ^= 0x87; + } + } + + vd[i * 2 + 0] = brev8(Z[0]); + vd[i * 2 + 1] = brev8(Z[1]); + } + /* set tail elements to 1s */ + vext_set_elems_1s(vd, vta, env->vl * 4, total_elems * 4); + env->vstart = 0; +} + +void HELPER(vgmul_vv)(void *vd_vptr, void *vs2_vptr, CPURISCVState *env, + uint32_t desc) +{ + uint64_t *vd = vd_vptr; + uint64_t *vs2 = vs2_vptr; + uint32_t vta = vext_vta(desc); + uint32_t total_elems = vext_get_total_elems(env, desc, 4); + + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { + uint64_t Y[2] = {brev8(vd[i * 2 + 0]), brev8(vd[i * 2 + 1])}; + uint64_t H[2] = {brev8(vs2[i * 2 + 0]), brev8(vs2[i * 2 + 1])}; + uint64_t Z[2] = {0, 0}; + + for (int j = 0; j < 128; j++) { + if ((Y[j / 64] >> (j % 64)) & 1) { + Z[0] ^= H[0]; + Z[1] ^= H[1]; + } + bool reduce = ((H[1] >> 63) & 1); + H[1] = H[1] << 1 | H[0] >> 63; + H[0] = H[0] << 1; + if (reduce) { + H[0] ^= 0x87; + } + } + + vd[i * 2 + 0] = brev8(Z[0]); + vd[i * 2 + 1] = brev8(Z[1]); + } + /* set tail elements to 1s */ + vext_set_elems_1s(vd, vta, env->vl * 4, total_elems * 4); + env->vstart = 0; +} + +void HELPER(vsm4k_vi)(void *vd, void *vs2, uint32_t uimm5, CPURISCVState *env, + uint32_t desc) +{ + const uint32_t egs = 4; + uint32_t rnd = uimm5 & 0x7; + uint32_t group_start = env->vstart / egs; + uint32_t group_end = env->vl / egs; + uint32_t esz = sizeof(uint32_t); + uint32_t total_elems = vext_get_total_elems(env, desc, esz); + + for (uint32_t i = group_start; i < group_end; ++i) { + uint32_t vstart = i * egs; + uint32_t vend = (i + 1) * egs; + uint32_t rk[4] = {0}; + uint32_t tmp[8] = {0}; + + for (uint32_t j = vstart; j < vend; ++j) { + rk[j - vstart] = *((uint32_t *)vs2 + H4(j)); + } + + for (uint32_t j = 0; j < egs; ++j) { + tmp[j] = rk[j]; + } + + for (uint32_t j = 0; j < egs; ++j) { + uint32_t b, s; + b = tmp[j + 1] ^ tmp[j + 2] ^ tmp[j + 3] ^ sm4_ck[rnd * 4 + j]; + + s = sm4_subword(b); + + tmp[j + 4] = tmp[j] ^ (s ^ rol32(s, 13) ^ rol32(s, 23)); + } + + for (uint32_t j = vstart; j < vend; ++j) { + *((uint32_t *)vd + H4(j)) = tmp[egs + (j - vstart)]; + } + } + + env->vstart = 0; + /* set tail elements to 1s */ + vext_set_elems_1s(vd, vext_vta(desc), env->vl * esz, total_elems * esz); +} + +static void do_sm4_round(uint32_t *rk, uint32_t *buf) +{ + const uint32_t egs = 4; + uint32_t s, b; + + for (uint32_t j = egs; j < egs * 2; ++j) { + b = buf[j - 3] ^ buf[j - 2] ^ buf[j - 1] ^ rk[j - 4]; + + s = sm4_subword(b); + + buf[j] = buf[j - 4] ^ (s ^ rol32(s, 2) ^ rol32(s, 10) ^ rol32(s, 18) ^ + rol32(s, 24)); + } +} + +void HELPER(vsm4r_vv)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) +{ + const uint32_t egs = 4; + uint32_t group_start = env->vstart / egs; + uint32_t group_end = env->vl / egs; + uint32_t esz = sizeof(uint32_t); + uint32_t total_elems = vext_get_total_elems(env, desc, esz); + + for (uint32_t i = group_start; i < group_end; ++i) { + uint32_t vstart = i * egs; + uint32_t vend = (i + 1) * egs; + uint32_t rk[4] = {0}; + uint32_t tmp[8] = {0}; + + for (uint32_t j = vstart; j < vend; ++j) { + rk[j - vstart] = *((uint32_t *)vs2 + H4(j)); + } + + for (uint32_t j = vstart; j < vend; ++j) { + tmp[j - vstart] = *((uint32_t *)vd + H4(j)); + } + + do_sm4_round(rk, tmp); + + for (uint32_t j = vstart; j < vend; ++j) { + *((uint32_t *)vd + H4(j)) = tmp[egs + (j - vstart)]; + } + } + + env->vstart = 0; + /* set tail elements to 1s */ + vext_set_elems_1s(vd, vext_vta(desc), env->vl * esz, total_elems * esz); +} + +void HELPER(vsm4r_vs)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) +{ + const uint32_t egs = 4; + uint32_t group_start = env->vstart / egs; + uint32_t group_end = env->vl / egs; + uint32_t esz = sizeof(uint32_t); + uint32_t total_elems = vext_get_total_elems(env, desc, esz); + + for (uint32_t i = group_start; i < group_end; ++i) { + uint32_t vstart = i * egs; + uint32_t vend = (i + 1) * egs; + uint32_t rk[4] = {0}; + uint32_t tmp[8] = {0}; + + for (uint32_t j = 0; j < egs; ++j) { + rk[j] = *((uint32_t *)vs2 + H4(j)); + } + + for (uint32_t j = vstart; j < vend; ++j) { + tmp[j - vstart] = *((uint32_t *)vd + H4(j)); + } + + do_sm4_round(rk, tmp); + + for (uint32_t j = vstart; j < vend; ++j) { + *((uint32_t *)vd + H4(j)) = tmp[egs + (j - vstart)]; + } + } + + env->vstart = 0; + /* set tail elements to 1s */ + vext_set_elems_1s(vd, vext_vta(desc), env->vl * esz, total_elems * esz); +} diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index bc9e151aa9..3fb05cc3d6 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -27,6 +27,7 @@ #include "fpu/softfloat.h" #include "tcg/tcg-gvec-desc.h" #include "internals.h" +#include "vector_internals.h" #include <math.h> target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, @@ -74,68 +75,6 @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, } /* - * Note that vector data is stored in host-endian 64-bit chunks, - * so addressing units smaller than that needs a host-endian fixup. - */ -#if HOST_BIG_ENDIAN -#define H1(x) ((x) ^ 7) -#define H1_2(x) ((x) ^ 6) -#define H1_4(x) ((x) ^ 4) -#define H2(x) ((x) ^ 3) -#define H4(x) ((x) ^ 1) -#define H8(x) ((x)) -#else -#define H1(x) (x) -#define H1_2(x) (x) -#define H1_4(x) (x) -#define H2(x) (x) -#define H4(x) (x) -#define H8(x) (x) -#endif - -static inline uint32_t vext_nf(uint32_t desc) -{ - return FIELD_EX32(simd_data(desc), VDATA, NF); -} - -static inline uint32_t vext_vm(uint32_t desc) -{ - return FIELD_EX32(simd_data(desc), VDATA, VM); -} - -/* - * Encode LMUL to lmul as following: - * LMUL vlmul lmul - * 1 000 0 - * 2 001 1 - * 4 010 2 - * 8 011 3 - * - 100 - - * 1/8 101 -3 - * 1/4 110 -2 - * 1/2 111 -1 - */ -static inline int32_t vext_lmul(uint32_t desc) -{ - return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); -} - -static inline uint32_t vext_vta(uint32_t desc) -{ - return FIELD_EX32(simd_data(desc), VDATA, VTA); -} - -static inline uint32_t vext_vma(uint32_t desc) -{ - return FIELD_EX32(simd_data(desc), VDATA, VMA); -} - -static inline uint32_t vext_vta_all_1s(uint32_t desc) -{ - return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); -} - -/* * Get the maximum number of elements can be operated. * * log2_esz: log2 of element size in bytes. @@ -153,21 +92,6 @@ static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) return scale < 0 ? vlenb >> -scale : vlenb << scale; } -/* - * Get number of total elements, including prestart, body and tail elements. - * Note that when LMUL < 1, the tail includes the elements past VLMAX that - * are held in the same vector register. - */ -static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, - uint32_t esz) -{ - uint32_t vlenb = simd_maxsz(desc); - uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); - int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : - ctzl(esz) - ctzl(sew) + vext_lmul(desc); - return (vlenb << emul) / esz; -} - static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) { return (addr & ~env->cur_pmmask) | env->cur_pmbase; @@ -200,20 +124,6 @@ static void probe_pages(CPURISCVState *env, target_ulong addr, } } -/* set agnostic elements to 1s */ -static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, - uint32_t tot) -{ - if (is_agnostic == 0) { - /* policy undisturbed */ - return; - } - if (tot - cnt == 0) { - return; - } - memset(base + cnt, -1, tot - cnt); -} - static inline void vext_set_elem_mask(void *v0, int index, uint8_t value) { @@ -223,18 +133,6 @@ static inline void vext_set_elem_mask(void *v0, int index, ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); } -/* - * Earlier designs (pre-0.9) had a varying number of bits - * per mask value (MLEN). In the 0.9 design, MLEN=1. - * (Section 4.5) - */ -static inline int vext_elem_mask(void *v0, int index) -{ - int idx = index / 64; - int pos = index % 64; - return (((uint64_t *)v0)[idx] >> pos) & 1; -} - /* elements operations for load and store */ typedef void vext_ldst_elem_fn(CPURISCVState *env, abi_ptr addr, uint32_t idx, void *vd, uintptr_t retaddr); @@ -584,7 +482,7 @@ vext_ldff(void *vd, void *v0, target_ulong base, cpu_mmu_index(env, false)); if (host) { #ifdef CONFIG_USER_ONLY - if (page_check_range(addr, offset, PAGE_READ)) { + if (!page_check_range(addr, offset, PAGE_READ)) { vl = i; goto ProbeSuccess; } @@ -729,25 +627,15 @@ GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) * Vector Integer Arithmetic Instructions */ -/* expand macro args before macro */ -#define RVVCALL(macro, ...) macro(__VA_ARGS__) - /* (TD, T1, T2, TX1, TX2) */ #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t -#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t -#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t -#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t -#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t -#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t -#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t -#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t @@ -764,16 +652,6 @@ GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t -/* operation of two vector elements */ -typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); - -#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ -static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ -{ \ - TX1 s1 = *((T1 *)vs1 + HS1(i)); \ - TX2 s2 = *((T2 *)vs2 + HS2(i)); \ - *((TD *)vd + HD(i)) = OP(s2, s1); \ -} #define DO_SUB(N, M) (N - M) #define DO_RSUB(N, M) (M - N) @@ -786,40 +664,6 @@ RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) -static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, - CPURISCVState *env, uint32_t desc, - opivv2_fn *fn, uint32_t esz) -{ - uint32_t vm = vext_vm(desc); - uint32_t vl = env->vl; - uint32_t total_elems = vext_get_total_elems(env, desc, esz); - uint32_t vta = vext_vta(desc); - uint32_t vma = vext_vma(desc); - uint32_t i; - - for (i = env->vstart; i < vl; i++) { - if (!vm && !vext_elem_mask(v0, i)) { - /* set masked-off elements to 1s */ - vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); - continue; - } - fn(vd, vs1, vs2, i); - } - env->vstart = 0; - /* set tail elements to 1s */ - vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); -} - -/* generate the helpers for OPIVV */ -#define GEN_VEXT_VV(NAME, ESZ) \ -void HELPER(NAME)(void *vd, void *v0, void *vs1, \ - void *vs2, CPURISCVState *env, \ - uint32_t desc) \ -{ \ - do_vext_vv(vd, v0, vs1, vs2, env, desc, \ - do_##NAME, ESZ); \ -} - GEN_VEXT_VV(vadd_vv_b, 1) GEN_VEXT_VV(vadd_vv_h, 2) GEN_VEXT_VV(vadd_vv_w, 4) @@ -829,18 +673,6 @@ GEN_VEXT_VV(vsub_vv_h, 2) GEN_VEXT_VV(vsub_vv_w, 4) GEN_VEXT_VV(vsub_vv_d, 8) -typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); - -/* - * (T1)s1 gives the real operator type. - * (TX1)(T1)s1 expands the operator type of widen or narrow operations. - */ -#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ -static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ -{ \ - TX2 s2 = *((T2 *)vs2 + HS2(i)); \ - *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ -} RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) @@ -855,40 +687,6 @@ RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) -static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, - CPURISCVState *env, uint32_t desc, - opivx2_fn fn, uint32_t esz) -{ - uint32_t vm = vext_vm(desc); - uint32_t vl = env->vl; - uint32_t total_elems = vext_get_total_elems(env, desc, esz); - uint32_t vta = vext_vta(desc); - uint32_t vma = vext_vma(desc); - uint32_t i; - - for (i = env->vstart; i < vl; i++) { - if (!vm && !vext_elem_mask(v0, i)) { - /* set masked-off elements to 1s */ - vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); - continue; - } - fn(vd, s1, vs2, i); - } - env->vstart = 0; - /* set tail elements to 1s */ - vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); -} - -/* generate the helpers for OPIVX */ -#define GEN_VEXT_VX(NAME, ESZ) \ -void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ - void *vs2, CPURISCVState *env, \ - uint32_t desc) \ -{ \ - do_vext_vx(vd, v0, s1, vs2, env, desc, \ - do_##NAME, ESZ); \ -} - GEN_VEXT_VX(vadd_vx_b, 1) GEN_VEXT_VX(vadd_vx_h, 2) GEN_VEXT_VX(vadd_vx_w, 4) @@ -3637,11 +3435,6 @@ GEN_VEXT_VF(vfwnmsac_vf_h, 4) GEN_VEXT_VF(vfwnmsac_vf_w, 8) /* Vector Floating-Point Square-Root Instruction */ -/* (TD, T2, TX2) */ -#define OP_UU_H uint16_t, uint16_t, uint16_t -#define OP_UU_W uint32_t, uint32_t, uint32_t -#define OP_UU_D uint64_t, uint64_t, uint64_t - #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ static void do_##NAME(void *vd, void *vs2, int i, \ CPURISCVState *env) \ @@ -4338,40 +4131,6 @@ GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) /* Vector Floating-Point Classify Instruction */ -#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ -static void do_##NAME(void *vd, void *vs2, int i) \ -{ \ - TX2 s2 = *((T2 *)vs2 + HS2(i)); \ - *((TD *)vd + HD(i)) = OP(s2); \ -} - -#define GEN_VEXT_V(NAME, ESZ) \ -void HELPER(NAME)(void *vd, void *v0, void *vs2, \ - CPURISCVState *env, uint32_t desc) \ -{ \ - uint32_t vm = vext_vm(desc); \ - uint32_t vl = env->vl; \ - uint32_t total_elems = \ - vext_get_total_elems(env, desc, ESZ); \ - uint32_t vta = vext_vta(desc); \ - uint32_t vma = vext_vma(desc); \ - uint32_t i; \ - \ - for (i = env->vstart; i < vl; i++) { \ - if (!vm && !vext_elem_mask(v0, i)) { \ - /* set masked-off elements to 1s */ \ - vext_set_elems_1s(vd, vma, i * ESZ, \ - (i + 1) * ESZ); \ - continue; \ - } \ - do_##NAME(vd, vs2, i); \ - } \ - env->vstart = 0; \ - /* set tail elements to 1s */ \ - vext_set_elems_1s(vd, vta, vl * ESZ, \ - total_elems * ESZ); \ -} - target_ulong fclass_h(uint64_t frs1) { float16 f = frs1; diff --git a/target/riscv/vector_internals.c b/target/riscv/vector_internals.c new file mode 100644 index 0000000000..9cf5c17cde --- /dev/null +++ b/target/riscv/vector_internals.c @@ -0,0 +1,81 @@ +/* + * RISC-V Vector Extension Internals + * + * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "vector_internals.h" + +/* set agnostic elements to 1s */ +void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, + uint32_t tot) +{ + if (is_agnostic == 0) { + /* policy undisturbed */ + return; + } + if (tot - cnt == 0) { + return ; + } + memset(base + cnt, -1, tot - cnt); +} + +void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, + CPURISCVState *env, uint32_t desc, + opivv2_fn *fn, uint32_t esz) +{ + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + uint32_t total_elems = vext_get_total_elems(env, desc, esz); + uint32_t vta = vext_vta(desc); + uint32_t vma = vext_vma(desc); + uint32_t i; + + for (i = env->vstart; i < vl; i++) { + if (!vm && !vext_elem_mask(v0, i)) { + /* set masked-off elements to 1s */ + vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); + continue; + } + fn(vd, vs1, vs2, i); + } + env->vstart = 0; + /* set tail elements to 1s */ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); +} + +void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, + CPURISCVState *env, uint32_t desc, + opivx2_fn fn, uint32_t esz) +{ + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + uint32_t total_elems = vext_get_total_elems(env, desc, esz); + uint32_t vta = vext_vta(desc); + uint32_t vma = vext_vma(desc); + uint32_t i; + + for (i = env->vstart; i < vl; i++) { + if (!vm && !vext_elem_mask(v0, i)) { + /* set masked-off elements to 1s */ + vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); + continue; + } + fn(vd, s1, vs2, i); + } + env->vstart = 0; + /* set tail elements to 1s */ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); +} diff --git a/target/riscv/vector_internals.h b/target/riscv/vector_internals.h new file mode 100644 index 0000000000..8133111e5f --- /dev/null +++ b/target/riscv/vector_internals.h @@ -0,0 +1,228 @@ +/* + * RISC-V Vector Extension Internals + * + * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef TARGET_RISCV_VECTOR_INTERNALS_H +#define TARGET_RISCV_VECTOR_INTERNALS_H + +#include "qemu/osdep.h" +#include "qemu/bitops.h" +#include "cpu.h" +#include "tcg/tcg-gvec-desc.h" +#include "internals.h" + +static inline uint32_t vext_nf(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, NF); +} + +/* + * Note that vector data is stored in host-endian 64-bit chunks, + * so addressing units smaller than that needs a host-endian fixup. + */ +#if HOST_BIG_ENDIAN +#define H1(x) ((x) ^ 7) +#define H1_2(x) ((x) ^ 6) +#define H1_4(x) ((x) ^ 4) +#define H2(x) ((x) ^ 3) +#define H4(x) ((x) ^ 1) +#define H8(x) ((x)) +#else +#define H1(x) (x) +#define H1_2(x) (x) +#define H1_4(x) (x) +#define H2(x) (x) +#define H4(x) (x) +#define H8(x) (x) +#endif + +/* + * Encode LMUL to lmul as following: + * LMUL vlmul lmul + * 1 000 0 + * 2 001 1 + * 4 010 2 + * 8 011 3 + * - 100 - + * 1/8 101 -3 + * 1/4 110 -2 + * 1/2 111 -1 + */ +static inline int32_t vext_lmul(uint32_t desc) +{ + return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); +} + +static inline uint32_t vext_vm(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, VM); +} + +static inline uint32_t vext_vma(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, VMA); +} + +static inline uint32_t vext_vta(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, VTA); +} + +static inline uint32_t vext_vta_all_1s(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); +} + +/* + * Earlier designs (pre-0.9) had a varying number of bits + * per mask value (MLEN). In the 0.9 design, MLEN=1. + * (Section 4.5) + */ +static inline int vext_elem_mask(void *v0, int index) +{ + int idx = index / 64; + int pos = index % 64; + return (((uint64_t *)v0)[idx] >> pos) & 1; +} + +/* + * Get number of total elements, including prestart, body and tail elements. + * Note that when LMUL < 1, the tail includes the elements past VLMAX that + * are held in the same vector register. + */ +static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, + uint32_t esz) +{ + uint32_t vlenb = simd_maxsz(desc); + uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); + int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : + ctzl(esz) - ctzl(sew) + vext_lmul(desc); + return (vlenb << emul) / esz; +} + +/* set agnostic elements to 1s */ +void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, + uint32_t tot); + +/* expand macro args before macro */ +#define RVVCALL(macro, ...) macro(__VA_ARGS__) + +/* (TD, T2, TX2) */ +#define OP_UU_B uint8_t, uint8_t, uint8_t +#define OP_UU_H uint16_t, uint16_t, uint16_t +#define OP_UU_W uint32_t, uint32_t, uint32_t +#define OP_UU_D uint64_t, uint64_t, uint64_t + +/* (TD, T1, T2, TX1, TX2) */ +#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t +#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t +#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t +#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t + +#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ +static void do_##NAME(void *vd, void *vs2, int i) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2); \ +} + +#define GEN_VEXT_V(NAME, ESZ) \ +void HELPER(NAME)(void *vd, void *v0, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t total_elems = \ + vext_get_total_elems(env, desc, ESZ); \ + uint32_t vta = vext_vta(desc); \ + uint32_t vma = vext_vma(desc); \ + uint32_t i; \ + \ + for (i = env->vstart; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, i)) { \ + /* set masked-off elements to 1s */ \ + vext_set_elems_1s(vd, vma, i * ESZ, \ + (i + 1) * ESZ); \ + continue; \ + } \ + do_##NAME(vd, vs2, i); \ + } \ + env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * ESZ, \ + total_elems * ESZ); \ +} + +/* operation of two vector elements */ +typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); + +#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ +static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ +{ \ + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2, s1); \ +} + +void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, + CPURISCVState *env, uint32_t desc, + opivv2_fn *fn, uint32_t esz); + +/* generate the helpers for OPIVV */ +#define GEN_VEXT_VV(NAME, ESZ) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + do_vext_vv(vd, v0, vs1, vs2, env, desc, \ + do_##NAME, ESZ); \ +} + +typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); + +/* + * (T1)s1 gives the real operator type. + * (TX1)(T1)s1 expands the operator type of widen or narrow operations. + */ +#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ +static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ +} + +void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, + CPURISCVState *env, uint32_t desc, + opivx2_fn fn, uint32_t esz); + +/* generate the helpers for OPIVX */ +#define GEN_VEXT_VX(NAME, ESZ) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + do_vext_vx(vd, v0, s1, vs2, env, desc, \ + do_##NAME, ESZ); \ +} + +/* Three of the widening shortening macros: */ +/* (TD, T1, T2, TX1, TX2) */ +#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t +#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t +#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t + +#endif /* TARGET_RISCV_VECTOR_INTERNALS_H */ diff --git a/tests/qemu-iotests/080.out b/tests/qemu-iotests/080.out index 45ab01db8e..d8acb3e723 100644 --- a/tests/qemu-iotests/080.out +++ b/tests/qemu-iotests/080.out @@ -33,7 +33,7 @@ qemu-io: can't open device TEST_DIR/t.qcow2: Snapshot table offset invalid == Hitting snapshot table size limit == Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 -qemu-img: Could not create snapshot 'test': -27 (File too large) +qemu-img: Could not create snapshot 'test': File too large read 512/512 bytes at offset 0 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) @@ -56,8 +56,8 @@ qemu-io: can't open device TEST_DIR/t.qcow2: Backing file name too long Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 wrote 512/512 bytes at offset 0 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -qemu-img: Could not create snapshot 'test': -27 (File too large) -qemu-img: Could not create snapshot 'test': -11 (Resource temporarily unavailable) +qemu-img: Could not create snapshot 'test': File too large +qemu-img: Could not create snapshot 'test': Resource temporarily unavailable == Invalid snapshot L1 table offset == Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 diff --git a/tests/qemu-iotests/109.out b/tests/qemu-iotests/109.out index e29280015e..2611d6a40f 100644 --- a/tests/qemu-iotests/109.out +++ b/tests/qemu-iotests/109.out @@ -44,6 +44,8 @@ read 512/512 bytes at offset 0 {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} @@ -94,6 +96,8 @@ read 512/512 bytes at offset 0 {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 197120, "offset": 197120, "speed": 0, "type": "mirror"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} @@ -144,6 +148,8 @@ read 512/512 bytes at offset 0 {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} @@ -194,6 +200,8 @@ read 512/512 bytes at offset 0 {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} @@ -244,6 +252,8 @@ read 512/512 bytes at offset 0 {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 65536, "offset": 65536, "speed": 0, "type": "mirror"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} @@ -294,6 +304,8 @@ read 512/512 bytes at offset 0 {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} @@ -343,6 +355,8 @@ read 512/512 bytes at offset 0 {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} @@ -392,6 +406,8 @@ read 512/512 bytes at offset 0 {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 31457280, "offset": 31457280, "speed": 0, "type": "mirror"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} @@ -441,6 +457,8 @@ read 512/512 bytes at offset 0 {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} @@ -490,6 +508,8 @@ read 512/512 bytes at offset 0 {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 2048, "offset": 2048, "speed": 0, "type": "mirror"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} @@ -519,6 +539,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} @@ -541,6 +563,8 @@ Images are identical. {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "src"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} diff --git a/tests/qemu-iotests/112.out b/tests/qemu-iotests/112.out index dd3cc4383c..ebf426febc 100644 --- a/tests/qemu-iotests/112.out +++ b/tests/qemu-iotests/112.out @@ -32,7 +32,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 refcount bits: 1 wrote 512/512 bytes at offset 0 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -qemu-img: Could not create snapshot 'foo': -22 (Invalid argument) +qemu-img: Could not create snapshot 'foo': Invalid argument Leaked cluster 6 refcount=1 reference=0 1 leaked clusters were found on the image. @@ -44,7 +44,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 refcount bits: 2 wrote 512/512 bytes at offset 0 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -qemu-img: Could not create snapshot 'baz': -22 (Invalid argument) +qemu-img: Could not create snapshot 'baz': Invalid argument Leaked cluster 7 refcount=1 reference=0 1 leaked clusters were found on the image. @@ -75,7 +75,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 refcount bits: 64 wrote 512/512 bytes at offset 0 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -qemu-img: Could not create snapshot 'foo': -22 (Invalid argument) +qemu-img: Could not create snapshot 'foo': Invalid argument Leaked cluster 5 refcount=18446744073709551615 reference=1 Leaked cluster 6 refcount=1 reference=0 diff --git a/tests/qemu-iotests/185 b/tests/qemu-iotests/185 index 8b1143dc16..2ae0a85bbf 100755 --- a/tests/qemu-iotests/185 +++ b/tests/qemu-iotests/185 @@ -354,6 +354,8 @@ wait_for_job_and_quit() { QEMU_EVENTS= # Ignore all JOB_STATUS_CHANGE events that came before SHUTDOWN _wait_event $h 'JOB_STATUS_CHANGE' # standby _wait_event $h 'JOB_STATUS_CHANGE' # ready + _wait_event $h 'JOB_STATUS_CHANGE' # standby + _wait_event $h 'JOB_STATUS_CHANGE' # ready _wait_event $h 'JOB_STATUS_CHANGE' # aborting # Filter the offset (depends on when exactly `quit` was issued) _wait_event $h 'BLOCK_JOB_CANCELLED' \ diff --git a/tests/qemu-iotests/185.out b/tests/qemu-iotests/185.out index 70e8dd6c87..7292c26bae 100644 --- a/tests/qemu-iotests/185.out +++ b/tests/qemu-iotests/185.out @@ -137,6 +137,8 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "mirror"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "mirror"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "mirror"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "mirror"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "mirror"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "mirror", "len": 33554432, "offset": (filtered), "speed": 0, "type": "mirror"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "mirror"}} @@ -160,6 +162,8 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "commit"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "commit"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "commit"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "commit"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "commit"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "commit", "len": 33554432, "offset": (filtered), "speed": 0, "type": "commit"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "commit"}} diff --git a/tests/qemu-iotests/244.out b/tests/qemu-iotests/244.out index 5e03add054..4815a489b0 100644 --- a/tests/qemu-iotests/244.out +++ b/tests/qemu-iotests/244.out @@ -41,7 +41,7 @@ write failed: Operation not supported No errors were found on the image. Take an internal snapshot: -qemu-img: Could not create snapshot 'test': -95 (Operation not supported) +qemu-img: Could not create snapshot 'test': Operation not supported No errors were found on the image. === Standalone image with external data file (efficient) === diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c index 3fc33fc24d..a8a4c668ad 100644 --- a/tests/qtest/arm-cpu-features.c +++ b/tests/qtest/arm-cpu-features.c @@ -417,12 +417,22 @@ static void pauth_tests_default(QTestState *qts, const char *cpu_type) { assert_has_feature_enabled(qts, cpu_type, "pauth"); assert_has_feature_disabled(qts, cpu_type, "pauth-impdef"); + assert_has_feature_disabled(qts, cpu_type, "pauth-qarma3"); assert_set_feature(qts, cpu_type, "pauth", false); assert_set_feature(qts, cpu_type, "pauth", true); assert_set_feature(qts, cpu_type, "pauth-impdef", true); assert_set_feature(qts, cpu_type, "pauth-impdef", false); - assert_error(qts, cpu_type, "cannot enable pauth-impdef without pauth", + assert_set_feature(qts, cpu_type, "pauth-qarma3", true); + assert_set_feature(qts, cpu_type, "pauth-qarma3", false); + assert_error(qts, cpu_type, + "cannot enable pauth-impdef or pauth-qarma3 without pauth", "{ 'pauth': false, 'pauth-impdef': true }"); + assert_error(qts, cpu_type, + "cannot enable pauth-impdef or pauth-qarma3 without pauth", + "{ 'pauth': false, 'pauth-qarma3': true }"); + assert_error(qts, cpu_type, + "cannot enable both pauth-impdef and pauth-qarma3", + "{ 'pauth': true, 'pauth-impdef': true, 'pauth-qarma3': true }"); } static void test_query_cpu_model_expansion(const void *data) diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target index b77bbd9b3c..2efacf9a5a 100644 --- a/tests/tcg/aarch64/Makefile.target +++ b/tests/tcg/aarch64/Makefile.target @@ -42,7 +42,11 @@ endif ifneq ($(CROSS_CC_HAS_ARMV8_3),) AARCH64_TESTS += pauth-1 pauth-2 pauth-4 pauth-5 pauth-%: CFLAGS += -march=armv8.3-a -run-pauth-%: QEMU_OPTS += -cpu max +run-pauth-1: QEMU_OPTS += -cpu max +run-pauth-2: QEMU_OPTS += -cpu max +# Choose a cpu with FEAT_Pauth but without FEAT_FPAC for pauth-[45]. +run-pauth-4: QEMU_OPTS += -cpu neoverse-v1 +run-pauth-5: QEMU_OPTS += -cpu neoverse-v1 endif # BTI Tests diff --git a/tests/tcg/aarch64/pauth-2.c b/tests/tcg/aarch64/pauth-2.c index 978652ede3..89ffdbf1df 100644 --- a/tests/tcg/aarch64/pauth-2.c +++ b/tests/tcg/aarch64/pauth-2.c @@ -1,5 +1,22 @@ #include <stdint.h> +#include <signal.h> +#include <stdlib.h> #include <assert.h> +#include "pauth.h" + + +static void sigill(int sig, siginfo_t *info, void *vuc) +{ + ucontext_t *uc = vuc; + uint64_t test; + + /* There is only one insn below that is allowed to fault. */ + asm volatile("adr %0, auth2_insn" : "=r"(test)); + assert(test == uc->uc_mcontext.pc); + exit(0); +} + +static int pac_feature; void do_test(uint64_t value) { @@ -27,31 +44,52 @@ void do_test(uint64_t value) * An invalid salt usually fails authorization, but again there * is a chance of choosing another salt that works. * Iterate until we find another salt which does fail. + * + * With FEAT_FPAC, this will SIGILL instead of producing a result. */ for (salt2 = salt1 + 1; ; salt2++) { - asm volatile("autda %0, %2" : "=r"(decode) : "0"(encode), "r"(salt2)); + asm volatile("auth2_insn: autda %0, %2" + : "=r"(decode) : "0"(encode), "r"(salt2)); if (decode != value) { break; } } + assert(pac_feature < 4); /* No FEAT_FPAC */ + /* The VA bits, bit 55, and the TBI bits, should be unchanged. */ assert(((decode ^ value) & 0xff80ffffffffffffull) == 0); /* - * Bits [54:53] are an error indicator based on the key used; - * the DA key above is keynumber 0, so error == 0b01. Otherwise - * bit 55 of the original is sign-extended into the rest of the auth. + * Without FEAT_Pauth2, bits [54:53] are an error indicator based on + * the key used; the DA key above is keynumber 0, so error == 0b01. + * Otherwise, bit 55 of the original is sign-extended into the rest + * of the auth. */ - if ((value >> 55) & 1) { - assert(((decode >> 48) & 0xff) == 0b10111111); - } else { - assert(((decode >> 48) & 0xff) == 0b00100000); + if (pac_feature < 3) { + if ((value >> 55) & 1) { + assert(((decode >> 48) & 0xff) == 0b10111111); + } else { + assert(((decode >> 48) & 0xff) == 0b00100000); + } } } int main() { + static const struct sigaction sa = { + .sa_sigaction = sigill, + .sa_flags = SA_SIGINFO + }; + + pac_feature = get_pac_feature(); + assert(pac_feature != 0); + + if (pac_feature >= 4) { + /* FEAT_FPAC */ + sigaction(SIGILL, &sa, NULL); + } + do_test(0); do_test(0xda004acedeadbeefull); return 0; diff --git a/tests/tcg/aarch64/pauth-4.c b/tests/tcg/aarch64/pauth-4.c index 24a639e36c..b254f413af 100644 --- a/tests/tcg/aarch64/pauth-4.c +++ b/tests/tcg/aarch64/pauth-4.c @@ -2,14 +2,24 @@ #include <assert.h> #include <stdio.h> #include <stdlib.h> +#include "pauth.h" #define TESTS 1000 int main() { + char base[TESTS]; int i, count = 0; float perc; - void *base = malloc(TESTS); + int pac_feature = get_pac_feature(); + + /* + * Exit if no PAuth or FEAT_FPAC, which will SIGILL on AUTIA failure + * rather than return an error for us to check below. + */ + if (pac_feature == 0 || pac_feature >= 4) { + return 0; + } for (i = 0; i < TESTS; i++) { uintptr_t in, x, y; @@ -17,7 +27,7 @@ int main() in = i + (uintptr_t) base; asm("mov %0, %[in]\n\t" - "pacia %0, sp\n\t" /* sigill if pauth not supported */ + "pacia %0, sp\n\t" "eor %0, %0, #4\n\t" /* corrupt single bit */ "mov %1, %0\n\t" "autia %1, sp\n\t" /* validate corrupted pointer */ @@ -36,10 +46,10 @@ int main() if (x != y) { count++; } - } + perc = (float) count / (float) TESTS; - printf("Checks Passed: %0.2f%%", perc * 100.0); + printf("Checks Passed: %0.2f%%\n", perc * 100.0); assert(perc > 0.95); return 0; } diff --git a/tests/tcg/aarch64/pauth-5.c b/tests/tcg/aarch64/pauth-5.c index 67c257918b..ed8d5a926b 100644 --- a/tests/tcg/aarch64/pauth-5.c +++ b/tests/tcg/aarch64/pauth-5.c @@ -1,4 +1,5 @@ #include <assert.h> +#include "pauth.h" static int x; @@ -6,6 +7,15 @@ int main() { int *p0 = &x, *p1, *p2, *p3; unsigned long salt = 0; + int pac_feature = get_pac_feature(); + + /* + * Exit if no PAuth or FEAT_FPAC, which will SIGILL on AUTDA failure + * rather than return an error for us to check below. + */ + if (pac_feature == 0 || pac_feature >= 4) { + return 0; + } /* * With TBI enabled and a 48-bit VA, there are 7 bits of auth, and so diff --git a/tests/tcg/aarch64/pauth.h b/tests/tcg/aarch64/pauth.h new file mode 100644 index 0000000000..543b234437 --- /dev/null +++ b/tests/tcg/aarch64/pauth.h @@ -0,0 +1,23 @@ +/* + * Helper for pauth test case + * + * Copyright (c) 2023 Linaro Ltd + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <assert.h> +#include <sys/auxv.h> + +static int get_pac_feature(void) +{ + unsigned long isar1, isar2; + + assert(getauxval(AT_HWCAP) & HWCAP_CPUID); + + asm("mrs %0, id_aa64isar1_el1" : "=r"(isar1)); + asm("mrs %0, S3_0_C0_C6_2" : "=r"(isar2)); /* id_aa64isar2_el1 */ + + return ((isar1 >> 4) & 0xf) /* APA */ + | ((isar1 >> 8) & 0xf) /* API */ + | ((isar2 >> 12) & 0xf); /* APA3 */ +} |