summary refs log tree commit diff stats
path: root/target/arm/tcg/vec_internal.h
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2023-02-27 14:46:00 +0000
committerPeter Maydell <peter.maydell@linaro.org>2023-02-27 14:46:00 +0000
commite1f9f73ba15e0356ce1aa3317d7bd294f587ab58 (patch)
treed12f488cfe0f67ba5f073f005e851dd4c480ceae /target/arm/tcg/vec_internal.h
parent3db629f03e8caf39526cd0415dac16a6a6484107 (diff)
parente844f0c5d0bd2c4d8d3c1622eb2a88586c9c4677 (diff)
downloadfocaccia-qemu-e1f9f73ba15e0356ce1aa3317d7bd294f587ab58.tar.gz
focaccia-qemu-e1f9f73ba15e0356ce1aa3317d7bd294f587ab58.zip
Merge tag 'pull-target-arm-20230227' of https://git.linaro.org/people/pmaydell/qemu-arm into staging
target-arm queue:
 * Various code cleanups
 * More refactoring working towards allowing a build
   without CONFIG_TCG

# -----BEGIN PGP SIGNATURE-----
#
# iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmP8ty0ZHHBldGVyLm1h
# eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3na0EACToAEGC4/iFigdKD7ZwG3F
# FvoDcMRRSdElcSo7BTDrFBBOH5/BYhorUq+mVpPvEYADXNaPOCmXWieSJpu68sJC
# VpVLPMhGS8lTsT16C2vB/4Lh4t8pJgs7aej90nqKk2rGgKw4ZNwMS+7Eg6n2lKf/
# V27+O+drJxgYzO6feveuKtIQXsHkx4//DNOCDPLLvrrOk+1NWnyPyT/UDxV/emyr
# KLBbeXqcNhPkn7xZtvM7WARSHZcqhEPBkIAJG2H9HE4imxNm8d8ADZjEMbfE9ZNE
# MDanpM6BYYDWw4y2A8J5QmbiLu3znH8RWmWHww1v6UQ7qyBCLx+HyEGKipGd3Eoe
# 48hi/ktsAJUb1lRrk9gOJ+NsokGINzI5urFOReUh1q6+5us0Q0VpwjyVvhi8REy3
# 5gOMDC7O2zH+bLN08kseDXfc7vR9wLrIHqMloMgJzpjG5KcL67nVCPHcOwxe0sfn
# 0SYWUY0UFNSYgEGBG6JfM6LiM1lRREzlw6YnnaJ+GUf/jdIUbMV6PKpL34TGLeQ3
# xEWrKV0+PMoWHwN0Pdo1tMXm7mc/9H27Mf7hB5k0Hp3dfQ7nIdkfnFA2YEUSxIQt
# OXYsKLTJmO/4XIAYCHhIOncPTmM6KWNQajDJMIuEdYYV67Xb88EIv5Hg8q6tS/mN
# uuQfun3Z2UbAtGvzN5Yx1w==
# =K0Vo
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 27 Feb 2023 13:59:09 GMT
# gpg:                using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg:                issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate]
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>" [ultimate]
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate]
# gpg:                 aka "Peter Maydell <peter@archaic.org.uk>" [ultimate]
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83  15CF 3C25 25ED 1436 0CDE

* tag 'pull-target-arm-20230227' of https://git.linaro.org/people/pmaydell/qemu-arm: (25 commits)
  hw: Replace qemu_or_irq typedef by OrIRQState
  hw/or-irq: Declare QOM macros using OBJECT_DECLARE_SIMPLE_TYPE()
  hw/irq: Declare QOM macros using OBJECT_DECLARE_SIMPLE_TYPE()
  iothread: Remove unused IOThreadClass / IOTHREAD_CLASS
  hw/arm/musicpal: Remove unused dummy MemoryRegion
  hw/intc/armv7m_nvic: Use QOM cast CPU() macro
  hw/timer/cmsdk-apb-timer: Remove unused 'qdev-properties.h' header
  hw/char/cmsdk-apb-uart: Open-code cmsdk_apb_uart_create()
  hw/char/xilinx_uartlite: Open-code xilinx_uartlite_create()
  hw/char/xilinx_uartlite: Expose XILINX_UARTLITE QOM type
  hw/char/pl011: Open-code pl011_luminary_create()
  hw/char/pl011: Un-inline pl011_create()
  hw/gpio/max7310: Simplify max7310_realize()
  tests/avocado: add machine:none tag to version.py
  cpu-defs.h: Expose CPUTLBEntryFull to non-TCG code
  target/arm: Don't access TCG code when debugging with KVM
  target/arm: Move regime_using_lpae_format into internal.h
  target/arm: Move hflags code into the tcg directory
  target/arm: Wrap arm_rebuild_hflags calls with tcg_enabled
  target/arm: Move psci.c into the tcg directory
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'target/arm/tcg/vec_internal.h')
-rw-r--r--target/arm/tcg/vec_internal.h246
1 files changed, 246 insertions, 0 deletions
diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h
new file mode 100644
index 0000000000..1f4ed80ff7
--- /dev/null
+++ b/target/arm/tcg/vec_internal.h
@@ -0,0 +1,246 @@
+/*
+ * ARM AdvSIMD / SVE Vector Helpers
+ *
+ * Copyright (c) 2020 Linaro
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef TARGET_ARM_VEC_INTERNAL_H
+#define TARGET_ARM_VEC_INTERNAL_H
+
+/*
+ * Note that vector data is stored in host-endian 64-bit chunks,
+ * so addressing units smaller than that needs a host-endian fixup.
+ *
+ * The H<N> macros are used when indexing an array of elements of size N.
+ *
+ * The H1_<N> macros are used when performing byte arithmetic and then
+ * casting the final pointer to a type of size N.
+ */
+#if HOST_BIG_ENDIAN
+#define H1(x)   ((x) ^ 7)
+#define H1_2(x) ((x) ^ 6)
+#define H1_4(x) ((x) ^ 4)
+#define H2(x)   ((x) ^ 3)
+#define H4(x)   ((x) ^ 1)
+#else
+#define H1(x)   (x)
+#define H1_2(x) (x)
+#define H1_4(x) (x)
+#define H2(x)   (x)
+#define H4(x)   (x)
+#endif
+/*
+ * Access to 64-bit elements isn't host-endian dependent; we provide H8
+ * and H1_8 so that when a function is being generated from a macro we
+ * can pass these rather than an empty macro argument, for clarity.
+ */
+#define H8(x)   (x)
+#define H1_8(x) (x)
+
+/*
+ * Expand active predicate bits to bytes, for byte elements.
+ */
+extern const uint64_t expand_pred_b_data[256];
+static inline uint64_t expand_pred_b(uint8_t byte)
+{
+    return expand_pred_b_data[byte];
+}
+
+/* Similarly for half-word elements. */
+extern const uint64_t expand_pred_h_data[0x55 + 1];
+static inline uint64_t expand_pred_h(uint8_t byte)
+{
+    return expand_pred_h_data[byte & 0x55];
+}
+
+static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
+{
+    uint64_t *d = vd + opr_sz;
+    uintptr_t i;
+
+    for (i = opr_sz; i < max_sz; i += 8) {
+        *d++ = 0;
+    }
+}
+
+static inline int32_t do_sqrshl_bhs(int32_t src, int32_t shift, int bits,
+                                    bool round, uint32_t *sat)
+{
+    if (shift <= -bits) {
+        /* Rounding the sign bit always produces 0. */
+        if (round) {
+            return 0;
+        }
+        return src >> 31;
+    } else if (shift < 0) {
+        if (round) {
+            src >>= -shift - 1;
+            return (src >> 1) + (src & 1);
+        }
+        return src >> -shift;
+    } else if (shift < bits) {
+        int32_t val = src << shift;
+        if (bits == 32) {
+            if (!sat || val >> shift == src) {
+                return val;
+            }
+        } else {
+            int32_t extval = sextract32(val, 0, bits);
+            if (!sat || val == extval) {
+                return extval;
+            }
+        }
+    } else if (!sat || src == 0) {
+        return 0;
+    }
+
+    *sat = 1;
+    return (1u << (bits - 1)) - (src >= 0);
+}
+
+static inline uint32_t do_uqrshl_bhs(uint32_t src, int32_t shift, int bits,
+                                     bool round, uint32_t *sat)
+{
+    if (shift <= -(bits + round)) {
+        return 0;
+    } else if (shift < 0) {
+        if (round) {
+            src >>= -shift - 1;
+            return (src >> 1) + (src & 1);
+        }
+        return src >> -shift;
+    } else if (shift < bits) {
+        uint32_t val = src << shift;
+        if (bits == 32) {
+            if (!sat || val >> shift == src) {
+                return val;
+            }
+        } else {
+            uint32_t extval = extract32(val, 0, bits);
+            if (!sat || val == extval) {
+                return extval;
+            }
+        }
+    } else if (!sat || src == 0) {
+        return 0;
+    }
+
+    *sat = 1;
+    return MAKE_64BIT_MASK(0, bits);
+}
+
+static inline int32_t do_suqrshl_bhs(int32_t src, int32_t shift, int bits,
+                                     bool round, uint32_t *sat)
+{
+    if (sat && src < 0) {
+        *sat = 1;
+        return 0;
+    }
+    return do_uqrshl_bhs(src, shift, bits, round, sat);
+}
+
+static inline int64_t do_sqrshl_d(int64_t src, int64_t shift,
+                                  bool round, uint32_t *sat)
+{
+    if (shift <= -64) {
+        /* Rounding the sign bit always produces 0. */
+        if (round) {
+            return 0;
+        }
+        return src >> 63;
+    } else if (shift < 0) {
+        if (round) {
+            src >>= -shift - 1;
+            return (src >> 1) + (src & 1);
+        }
+        return src >> -shift;
+    } else if (shift < 64) {
+        int64_t val = src << shift;
+        if (!sat || val >> shift == src) {
+            return val;
+        }
+    } else if (!sat || src == 0) {
+        return 0;
+    }
+
+    *sat = 1;
+    return src < 0 ? INT64_MIN : INT64_MAX;
+}
+
+static inline uint64_t do_uqrshl_d(uint64_t src, int64_t shift,
+                                   bool round, uint32_t *sat)
+{
+    if (shift <= -(64 + round)) {
+        return 0;
+    } else if (shift < 0) {
+        if (round) {
+            src >>= -shift - 1;
+            return (src >> 1) + (src & 1);
+        }
+        return src >> -shift;
+    } else if (shift < 64) {
+        uint64_t val = src << shift;
+        if (!sat || val >> shift == src) {
+            return val;
+        }
+    } else if (!sat || src == 0) {
+        return 0;
+    }
+
+    *sat = 1;
+    return UINT64_MAX;
+}
+
+static inline int64_t do_suqrshl_d(int64_t src, int64_t shift,
+                                   bool round, uint32_t *sat)
+{
+    if (sat && src < 0) {
+        *sat = 1;
+        return 0;
+    }
+    return do_uqrshl_d(src, shift, round, sat);
+}
+
+int8_t do_sqrdmlah_b(int8_t, int8_t, int8_t, bool, bool);
+int16_t do_sqrdmlah_h(int16_t, int16_t, int16_t, bool, bool, uint32_t *);
+int32_t do_sqrdmlah_s(int32_t, int32_t, int32_t, bool, bool, uint32_t *);
+int64_t do_sqrdmlah_d(int64_t, int64_t, int64_t, bool, bool);
+
+/*
+ * 8 x 8 -> 16 vector polynomial multiply where the inputs are
+ * in the low 8 bits of each 16-bit element
+*/
+uint64_t pmull_h(uint64_t op1, uint64_t op2);
+/*
+ * 16 x 16 -> 32 vector polynomial multiply where the inputs are
+ * in the low 16 bits of each 32-bit element
+ */
+uint64_t pmull_w(uint64_t op1, uint64_t op2);
+
+/**
+ * bfdotadd:
+ * @sum: addend
+ * @e1, @e2: multiplicand vectors
+ *
+ * BFloat16 2-way dot product of @e1 & @e2, accumulating with @sum.
+ * The @e1 and @e2 operands correspond to the 32-bit source vector
+ * slots and contain two Bfloat16 values each.
+ *
+ * Corresponds to the ARM pseudocode function BFDotAdd.
+ */
+float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2);
+
+#endif /* TARGET_ARM_VEC_INTERNAL_H */