diff options
| author | Richard Henderson <richard.henderson@linaro.org> | 2024-05-31 11:10:10 -0700 |
|---|---|---|
| committer | Richard Henderson <richard.henderson@linaro.org> | 2024-05-31 11:10:10 -0700 |
| commit | 74abb45dac6979e7ff76172b7f0a24e869405184 (patch) | |
| tree | 5c9cd353f6a3dfbf4a24da0a45817c70cf6715dd /target/arm/tcg/vec_helper.c | |
| parent | 3b2fe44bb7f605f179e5e7feb2c13c2eb3abbb80 (diff) | |
| parent | 3c3c233677d4f2fe5f35c5d6d6e9b53df48054f4 (diff) | |
| download | focaccia-qemu-74abb45dac6979e7ff76172b7f0a24e869405184.tar.gz focaccia-qemu-74abb45dac6979e7ff76172b7f0a24e869405184.zip | |
Merge tag 'pull-target-arm-20240531' of https://git.linaro.org/people/pmaydell/qemu-arm into staging
target-arm: * hw/intc/arm_gic: Fix set pending of PPIs * hw/intc/arm_gic: Fix writes to GICD_ITARGETSRn * xilinx_zynq: Add cache controller * xilinx_zynq: Support up to two CPU cores * tests/avocado: update sbsa-ref firmware * sbsa-ref: move to Neoverse-N2 as default * More decodetree conversion of A64 ASIMD insns * docs/system/target-arm: Re-alphabetize board list * Implement FEAT WFxT and enable for '-cpu max' * hw/usb/hcd-ohci: Fix #1510, #303: pid not IN or OUT # -----BEGIN PGP SIGNATURE----- # # iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmZZvHgZHHBldGVyLm1h # eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3uArEACZgk0hqKtRcEzwdJi7w7ax # ta/Iyl7AA+ngmh0qcE8QX8rzZhcGcKhsaQ8dNESMIBqVi1fS0hmNrIUWhXqmvNmZ # 07WJvQx7Ki9YNX02frjkRZTwWozsbW8uoaXgnngFK93PNh/IoQBRP5T/LIZ5t3d7 # 7I/O/tnS/LZrL6wtP4EbRIEvZ4dfJe3X+uSCHSF8iOYrJLrZCsy/ItJqzY6Y0f96 # iUoOfXjrYH2hM9VkJGHIGy1r9nYRkCxXREQh7ahw/z6mv0nIB1YTS1eR0dH9D1yM # afdby8iPN7k+f3en+2dHfyPjani4vPd1/k9mgLnQtVLOHrdw2APs1Q59YwYhunhe # ZC0Fcp6jBSkcI6LHRY0bRtY0U3SBPrfkSD5sJrNH1obnsSvizeSU3uCq1QmKRCRY # FuARmE77ywY8CURiqfwPSrC/ecSnamueIQNKNPZVQ5ve3dbokp/Gr1eJgcq80ovK # wIKmNhJq60qBcj2zQ1aw1PP3+zvbZ/rl2j0abGbxBH3Kkp9AvALDiLRMciazVWph # vbx7e1Y90Zrs3ap1AAUFUyWexYPNvZWmSGOaWv6Wdt+1Yf/YDW9wrwjVd3eRG9rM # vgNMrccysBUNDpS4s0KSbqLy9AsjqAa41SiKipWFBekUyQFboNpTNfDNCspIPj9m # dnI4fyXkVmSCYFiW2akmjg== # =Jy5P # -----END PGP SIGNATURE----- # gpg: Signature made Fri 31 May 2024 05:03:04 AM PDT # gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE # gpg: issuer "peter.maydell@linaro.org" # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [full] # gpg: aka "Peter Maydell <pmaydell@gmail.com>" [full] # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [full] # gpg: aka "Peter Maydell <peter@archaic.org.uk>" [unknown] * tag 'pull-target-arm-20240531' of https://git.linaro.org/people/pmaydell/qemu-arm: (43 commits) hw/usb/hcd-ohci: Fix #1510, #303: pid not IN or OUT target/arm: Implement FEAT WFxT and enable for '-cpu max' accel/tcg: Make TCGCPUOps::cpu_exec_halt return bool for whether to halt docs/system/target-arm: Re-alphabetize board list target/arm: Disable SVE extensions when SVE is disabled target/arm: Convert FCSEL to decodetree target/arm: Convert FMADD, FMSUB, FNMADD, FNMSUB to decodetree target/arm: Convert SQDMULH, SQRDMULH to decodetree target/arm: Tidy SQDMULH, SQRDMULH (vector) target/arm: Convert MLA, MLS to decodetree target/arm: Convert MUL, PMUL to decodetree target/arm: Convert SABA, SABD, UABA, UABD to decodetree target/arm: Convert SMAX, SMIN, UMAX, UMIN to decodetree target/arm: Convert SRHADD, URHADD to decodetree target/arm: Convert SRHADD, URHADD to gvec target/arm: Convert SHSUB, UHSUB to decodetree target/arm: Convert SHSUB, UHSUB to gvec target/arm: Convert SHADD, UHADD to decodetree target/arm: Convert SHADD, UHADD to gvec target/arm: Use TCG_COND_TSTNE in gen_cmtst_vec ... Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'target/arm/tcg/vec_helper.c')
| -rw-r--r-- | target/arm/tcg/vec_helper.c | 128 |
1 files changed, 128 insertions, 0 deletions
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c index 56fea14edb..b05922b425 100644 --- a/target/arm/tcg/vec_helper.c +++ b/target/arm/tcg/vec_helper.c @@ -311,6 +311,38 @@ void HELPER(neon_sqrdmulh_h)(void *vd, void *vn, void *vm, clear_tail(d, opr_sz, simd_maxsz(desc)); } +void HELPER(neon_sqdmulh_idx_h)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + int idx = simd_data(desc); + int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx); + + for (i = 0; i < opr_sz / 2; i += 16 / 2) { + int16_t mm = m[i]; + for (j = 0; j < 16 / 2; ++j) { + d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, false, vq); + } + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(neon_sqrdmulh_idx_h)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + int idx = simd_data(desc); + int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx); + + for (i = 0; i < opr_sz / 2; i += 16 / 2) { + int16_t mm = m[i]; + for (j = 0; j < 16 / 2; ++j) { + d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, true, vq); + } + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + void HELPER(sve2_sqrdmlah_h)(void *vd, void *vn, void *vm, void *va, uint32_t desc) { @@ -474,6 +506,38 @@ void HELPER(neon_sqrdmulh_s)(void *vd, void *vn, void *vm, clear_tail(d, opr_sz, simd_maxsz(desc)); } +void HELPER(neon_sqdmulh_idx_s)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + int idx = simd_data(desc); + int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx); + + for (i = 0; i < opr_sz / 4; i += 16 / 4) { + int32_t mm = m[i]; + for (j = 0; j < 16 / 4; ++j) { + d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, false, vq); + } + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(neon_sqrdmulh_idx_s)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + int idx = simd_data(desc); + int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx); + + for (i = 0; i < opr_sz / 4; i += 16 / 4) { + int32_t mm = m[i]; + for (j = 0; j < 16 / 4; ++j) { + d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, true, vq); + } + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + void HELPER(sve2_sqrdmlah_s)(void *vd, void *vn, void *vm, void *va, uint32_t desc) { @@ -1555,6 +1619,14 @@ DO_SAT(gvec_sqsub_b, int, int8_t, int8_t, -, INT8_MIN, INT8_MAX) DO_SAT(gvec_sqsub_h, int, int16_t, int16_t, -, INT16_MIN, INT16_MAX) DO_SAT(gvec_sqsub_s, int64_t, int32_t, int32_t, -, INT32_MIN, INT32_MAX) +DO_SAT(gvec_usqadd_b, int, uint8_t, int8_t, +, 0, UINT8_MAX) +DO_SAT(gvec_usqadd_h, int, uint16_t, int16_t, +, 0, UINT16_MAX) +DO_SAT(gvec_usqadd_s, int64_t, uint32_t, int32_t, +, 0, UINT32_MAX) + +DO_SAT(gvec_suqadd_b, int, int8_t, uint8_t, +, INT8_MIN, INT8_MAX) +DO_SAT(gvec_suqadd_h, int, int16_t, uint16_t, +, INT16_MIN, INT16_MAX) +DO_SAT(gvec_suqadd_s, int64_t, int32_t, uint32_t, +, INT32_MIN, INT32_MAX) + #undef DO_SAT void HELPER(gvec_uqadd_d)(void *vd, void *vq, void *vn, @@ -1645,6 +1717,62 @@ void HELPER(gvec_sqsub_d)(void *vd, void *vq, void *vn, clear_tail(d, oprsz, simd_maxsz(desc)); } +void HELPER(gvec_usqadd_d)(void *vd, void *vq, void *vn, + void *vm, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + uint64_t *d = vd, *n = vn, *m = vm; + bool q = false; + + for (i = 0; i < oprsz / 8; i++) { + uint64_t nn = n[i]; + int64_t mm = m[i]; + uint64_t dd = nn + mm; + + if (mm < 0) { + if (nn < (uint64_t)-mm) { + dd = 0; + q = true; + } + } else { + if (dd < nn) { + dd = UINT64_MAX; + q = true; + } + } + d[i] = dd; + } + if (q) { + uint32_t *qc = vq; + qc[0] = 1; + } + clear_tail(d, oprsz, simd_maxsz(desc)); +} + +void HELPER(gvec_suqadd_d)(void *vd, void *vq, void *vn, + void *vm, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + uint64_t *d = vd, *n = vn, *m = vm; + bool q = false; + + for (i = 0; i < oprsz / 8; i++) { + int64_t nn = n[i]; + uint64_t mm = m[i]; + int64_t dd = nn + mm; + + if (mm > (uint64_t)(INT64_MAX - nn)) { + dd = INT64_MAX; + q = true; + } + d[i] = dd; + } + if (q) { + uint32_t *qc = vq; + qc[0] = 1; + } + clear_tail(d, oprsz, simd_maxsz(desc)); +} #define DO_SRA(NAME, TYPE) \ void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \ |