summary refs log tree commit diff stats
path: root/target/arm/tcg/vec_helper.c
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2024-05-31 11:10:10 -0700
committerRichard Henderson <richard.henderson@linaro.org>2024-05-31 11:10:10 -0700
commit74abb45dac6979e7ff76172b7f0a24e869405184 (patch)
tree5c9cd353f6a3dfbf4a24da0a45817c70cf6715dd /target/arm/tcg/vec_helper.c
parent3b2fe44bb7f605f179e5e7feb2c13c2eb3abbb80 (diff)
parent3c3c233677d4f2fe5f35c5d6d6e9b53df48054f4 (diff)
downloadfocaccia-qemu-74abb45dac6979e7ff76172b7f0a24e869405184.tar.gz
focaccia-qemu-74abb45dac6979e7ff76172b7f0a24e869405184.zip
Merge tag 'pull-target-arm-20240531' of https://git.linaro.org/people/pmaydell/qemu-arm into staging
target-arm:
 * hw/intc/arm_gic: Fix set pending of PPIs
 * hw/intc/arm_gic: Fix writes to GICD_ITARGETSRn
 * xilinx_zynq: Add cache controller
 * xilinx_zynq: Support up to two CPU cores
 * tests/avocado: update sbsa-ref firmware
 * sbsa-ref: move to Neoverse-N2 as default
 * More decodetree conversion of A64 ASIMD insns
 * docs/system/target-arm: Re-alphabetize board list
 * Implement FEAT WFxT and enable for '-cpu max'
 * hw/usb/hcd-ohci: Fix #1510, #303: pid not IN or OUT

# -----BEGIN PGP SIGNATURE-----
#
# iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmZZvHgZHHBldGVyLm1h
# eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3uArEACZgk0hqKtRcEzwdJi7w7ax
# ta/Iyl7AA+ngmh0qcE8QX8rzZhcGcKhsaQ8dNESMIBqVi1fS0hmNrIUWhXqmvNmZ
# 07WJvQx7Ki9YNX02frjkRZTwWozsbW8uoaXgnngFK93PNh/IoQBRP5T/LIZ5t3d7
# 7I/O/tnS/LZrL6wtP4EbRIEvZ4dfJe3X+uSCHSF8iOYrJLrZCsy/ItJqzY6Y0f96
# iUoOfXjrYH2hM9VkJGHIGy1r9nYRkCxXREQh7ahw/z6mv0nIB1YTS1eR0dH9D1yM
# afdby8iPN7k+f3en+2dHfyPjani4vPd1/k9mgLnQtVLOHrdw2APs1Q59YwYhunhe
# ZC0Fcp6jBSkcI6LHRY0bRtY0U3SBPrfkSD5sJrNH1obnsSvizeSU3uCq1QmKRCRY
# FuARmE77ywY8CURiqfwPSrC/ecSnamueIQNKNPZVQ5ve3dbokp/Gr1eJgcq80ovK
# wIKmNhJq60qBcj2zQ1aw1PP3+zvbZ/rl2j0abGbxBH3Kkp9AvALDiLRMciazVWph
# vbx7e1Y90Zrs3ap1AAUFUyWexYPNvZWmSGOaWv6Wdt+1Yf/YDW9wrwjVd3eRG9rM
# vgNMrccysBUNDpS4s0KSbqLy9AsjqAa41SiKipWFBekUyQFboNpTNfDNCspIPj9m
# dnI4fyXkVmSCYFiW2akmjg==
# =Jy5P
# -----END PGP SIGNATURE-----
# gpg: Signature made Fri 31 May 2024 05:03:04 AM PDT
# gpg:                using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg:                issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [full]
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>" [full]
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [full]
# gpg:                 aka "Peter Maydell <peter@archaic.org.uk>" [unknown]

* tag 'pull-target-arm-20240531' of https://git.linaro.org/people/pmaydell/qemu-arm: (43 commits)
  hw/usb/hcd-ohci: Fix #1510, #303: pid not IN or OUT
  target/arm: Implement FEAT WFxT and enable for '-cpu max'
  accel/tcg: Make TCGCPUOps::cpu_exec_halt return bool for whether to halt
  docs/system/target-arm: Re-alphabetize board list
  target/arm: Disable SVE extensions when SVE is disabled
  target/arm: Convert FCSEL to decodetree
  target/arm: Convert FMADD, FMSUB, FNMADD, FNMSUB to decodetree
  target/arm: Convert SQDMULH, SQRDMULH to decodetree
  target/arm: Tidy SQDMULH, SQRDMULH (vector)
  target/arm: Convert MLA, MLS to decodetree
  target/arm: Convert MUL, PMUL to decodetree
  target/arm: Convert SABA, SABD, UABA, UABD to decodetree
  target/arm: Convert SMAX, SMIN, UMAX, UMIN to decodetree
  target/arm: Convert SRHADD, URHADD to decodetree
  target/arm: Convert SRHADD, URHADD to gvec
  target/arm: Convert SHSUB, UHSUB to decodetree
  target/arm: Convert SHSUB, UHSUB to gvec
  target/arm: Convert SHADD, UHADD to decodetree
  target/arm: Convert SHADD, UHADD to gvec
  target/arm: Use TCG_COND_TSTNE in gen_cmtst_vec
  ...

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'target/arm/tcg/vec_helper.c')
-rw-r--r--target/arm/tcg/vec_helper.c128
1 files changed, 128 insertions, 0 deletions
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index 56fea14edb..b05922b425 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -311,6 +311,38 @@ void HELPER(neon_sqrdmulh_h)(void *vd, void *vn, void *vm,
     clear_tail(d, opr_sz, simd_maxsz(desc));
 }
 
+void HELPER(neon_sqdmulh_idx_h)(void *vd, void *vn, void *vm,
+                                void *vq, uint32_t desc)
+{
+    intptr_t i, j, opr_sz = simd_oprsz(desc);
+    int idx = simd_data(desc);
+    int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx);
+
+    for (i = 0; i < opr_sz / 2; i += 16 / 2) {
+        int16_t mm = m[i];
+        for (j = 0; j < 16 / 2; ++j) {
+            d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, false, vq);
+        }
+    }
+    clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(neon_sqrdmulh_idx_h)(void *vd, void *vn, void *vm,
+                                 void *vq, uint32_t desc)
+{
+    intptr_t i, j, opr_sz = simd_oprsz(desc);
+    int idx = simd_data(desc);
+    int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx);
+
+    for (i = 0; i < opr_sz / 2; i += 16 / 2) {
+        int16_t mm = m[i];
+        for (j = 0; j < 16 / 2; ++j) {
+            d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, true, vq);
+        }
+    }
+    clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
 void HELPER(sve2_sqrdmlah_h)(void *vd, void *vn, void *vm,
                              void *va, uint32_t desc)
 {
@@ -474,6 +506,38 @@ void HELPER(neon_sqrdmulh_s)(void *vd, void *vn, void *vm,
     clear_tail(d, opr_sz, simd_maxsz(desc));
 }
 
+void HELPER(neon_sqdmulh_idx_s)(void *vd, void *vn, void *vm,
+                                void *vq, uint32_t desc)
+{
+    intptr_t i, j, opr_sz = simd_oprsz(desc);
+    int idx = simd_data(desc);
+    int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx);
+
+    for (i = 0; i < opr_sz / 4; i += 16 / 4) {
+        int32_t mm = m[i];
+        for (j = 0; j < 16 / 4; ++j) {
+            d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, false, vq);
+        }
+    }
+    clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(neon_sqrdmulh_idx_s)(void *vd, void *vn, void *vm,
+                                 void *vq, uint32_t desc)
+{
+    intptr_t i, j, opr_sz = simd_oprsz(desc);
+    int idx = simd_data(desc);
+    int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx);
+
+    for (i = 0; i < opr_sz / 4; i += 16 / 4) {
+        int32_t mm = m[i];
+        for (j = 0; j < 16 / 4; ++j) {
+            d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, true, vq);
+        }
+    }
+    clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
 void HELPER(sve2_sqrdmlah_s)(void *vd, void *vn, void *vm,
                              void *va, uint32_t desc)
 {
@@ -1555,6 +1619,14 @@ DO_SAT(gvec_sqsub_b, int, int8_t, int8_t, -, INT8_MIN, INT8_MAX)
 DO_SAT(gvec_sqsub_h, int, int16_t, int16_t, -, INT16_MIN, INT16_MAX)
 DO_SAT(gvec_sqsub_s, int64_t, int32_t, int32_t, -, INT32_MIN, INT32_MAX)
 
+DO_SAT(gvec_usqadd_b, int, uint8_t, int8_t, +, 0, UINT8_MAX)
+DO_SAT(gvec_usqadd_h, int, uint16_t, int16_t, +, 0, UINT16_MAX)
+DO_SAT(gvec_usqadd_s, int64_t, uint32_t, int32_t, +, 0, UINT32_MAX)
+
+DO_SAT(gvec_suqadd_b, int, int8_t, uint8_t, +, INT8_MIN, INT8_MAX)
+DO_SAT(gvec_suqadd_h, int, int16_t, uint16_t, +, INT16_MIN, INT16_MAX)
+DO_SAT(gvec_suqadd_s, int64_t, int32_t, uint32_t, +, INT32_MIN, INT32_MAX)
+
 #undef DO_SAT
 
 void HELPER(gvec_uqadd_d)(void *vd, void *vq, void *vn,
@@ -1645,6 +1717,62 @@ void HELPER(gvec_sqsub_d)(void *vd, void *vq, void *vn,
     clear_tail(d, oprsz, simd_maxsz(desc));
 }
 
+void HELPER(gvec_usqadd_d)(void *vd, void *vq, void *vn,
+                           void *vm, uint32_t desc)
+{
+    intptr_t i, oprsz = simd_oprsz(desc);
+    uint64_t *d = vd, *n = vn, *m = vm;
+    bool q = false;
+
+    for (i = 0; i < oprsz / 8; i++) {
+        uint64_t nn = n[i];
+        int64_t mm = m[i];
+        uint64_t dd = nn + mm;
+
+        if (mm < 0) {
+            if (nn < (uint64_t)-mm) {
+                dd = 0;
+                q = true;
+            }
+        } else {
+            if (dd < nn) {
+                dd = UINT64_MAX;
+                q = true;
+            }
+        }
+        d[i] = dd;
+    }
+    if (q) {
+        uint32_t *qc = vq;
+        qc[0] = 1;
+    }
+    clear_tail(d, oprsz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_suqadd_d)(void *vd, void *vq, void *vn,
+                           void *vm, uint32_t desc)
+{
+    intptr_t i, oprsz = simd_oprsz(desc);
+    uint64_t *d = vd, *n = vn, *m = vm;
+    bool q = false;
+
+    for (i = 0; i < oprsz / 8; i++) {
+        int64_t nn = n[i];
+        uint64_t mm = m[i];
+        int64_t dd = nn + mm;
+
+        if (mm > (uint64_t)(INT64_MAX - nn)) {
+            dd = INT64_MAX;
+            q = true;
+        }
+        d[i] = dd;
+    }
+    if (q) {
+        uint32_t *qc = vq;
+        qc[0] = 1;
+    }
+    clear_tail(d, oprsz, simd_maxsz(desc));
+}
 
 #define DO_SRA(NAME, TYPE)                              \
 void HELPER(NAME)(void *vd, void *vn, uint32_t desc)    \