diff options
| author | Peter Maydell <peter.maydell@linaro.org> | 2023-02-27 14:46:00 +0000 |
|---|---|---|
| committer | Peter Maydell <peter.maydell@linaro.org> | 2023-02-27 14:46:00 +0000 |
| commit | e1f9f73ba15e0356ce1aa3317d7bd294f587ab58 (patch) | |
| tree | d12f488cfe0f67ba5f073f005e851dd4c480ceae /target/arm/tcg/vec_internal.h | |
| parent | 3db629f03e8caf39526cd0415dac16a6a6484107 (diff) | |
| parent | e844f0c5d0bd2c4d8d3c1622eb2a88586c9c4677 (diff) | |
| download | focaccia-qemu-e1f9f73ba15e0356ce1aa3317d7bd294f587ab58.tar.gz focaccia-qemu-e1f9f73ba15e0356ce1aa3317d7bd294f587ab58.zip | |
Merge tag 'pull-target-arm-20230227' of https://git.linaro.org/people/pmaydell/qemu-arm into staging
target-arm queue: * Various code cleanups * More refactoring working towards allowing a build without CONFIG_TCG # -----BEGIN PGP SIGNATURE----- # # iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmP8ty0ZHHBldGVyLm1h # eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3na0EACToAEGC4/iFigdKD7ZwG3F # FvoDcMRRSdElcSo7BTDrFBBOH5/BYhorUq+mVpPvEYADXNaPOCmXWieSJpu68sJC # VpVLPMhGS8lTsT16C2vB/4Lh4t8pJgs7aej90nqKk2rGgKw4ZNwMS+7Eg6n2lKf/ # V27+O+drJxgYzO6feveuKtIQXsHkx4//DNOCDPLLvrrOk+1NWnyPyT/UDxV/emyr # KLBbeXqcNhPkn7xZtvM7WARSHZcqhEPBkIAJG2H9HE4imxNm8d8ADZjEMbfE9ZNE # MDanpM6BYYDWw4y2A8J5QmbiLu3znH8RWmWHww1v6UQ7qyBCLx+HyEGKipGd3Eoe # 48hi/ktsAJUb1lRrk9gOJ+NsokGINzI5urFOReUh1q6+5us0Q0VpwjyVvhi8REy3 # 5gOMDC7O2zH+bLN08kseDXfc7vR9wLrIHqMloMgJzpjG5KcL67nVCPHcOwxe0sfn # 0SYWUY0UFNSYgEGBG6JfM6LiM1lRREzlw6YnnaJ+GUf/jdIUbMV6PKpL34TGLeQ3 # xEWrKV0+PMoWHwN0Pdo1tMXm7mc/9H27Mf7hB5k0Hp3dfQ7nIdkfnFA2YEUSxIQt # OXYsKLTJmO/4XIAYCHhIOncPTmM6KWNQajDJMIuEdYYV67Xb88EIv5Hg8q6tS/mN # uuQfun3Z2UbAtGvzN5Yx1w== # =K0Vo # -----END PGP SIGNATURE----- # gpg: Signature made Mon 27 Feb 2023 13:59:09 GMT # gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE # gpg: issuer "peter.maydell@linaro.org" # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate] # gpg: aka "Peter Maydell <peter@archaic.org.uk>" [ultimate] # Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE * tag 'pull-target-arm-20230227' of https://git.linaro.org/people/pmaydell/qemu-arm: (25 commits) hw: Replace qemu_or_irq typedef by OrIRQState hw/or-irq: Declare QOM macros using OBJECT_DECLARE_SIMPLE_TYPE() hw/irq: Declare QOM macros using OBJECT_DECLARE_SIMPLE_TYPE() iothread: Remove unused IOThreadClass / IOTHREAD_CLASS hw/arm/musicpal: Remove unused dummy MemoryRegion hw/intc/armv7m_nvic: Use QOM cast CPU() macro hw/timer/cmsdk-apb-timer: Remove unused 'qdev-properties.h' header hw/char/cmsdk-apb-uart: Open-code cmsdk_apb_uart_create() hw/char/xilinx_uartlite: Open-code xilinx_uartlite_create() hw/char/xilinx_uartlite: Expose XILINX_UARTLITE QOM type hw/char/pl011: Open-code pl011_luminary_create() hw/char/pl011: Un-inline pl011_create() hw/gpio/max7310: Simplify max7310_realize() tests/avocado: add machine:none tag to version.py cpu-defs.h: Expose CPUTLBEntryFull to non-TCG code target/arm: Don't access TCG code when debugging with KVM target/arm: Move regime_using_lpae_format into internal.h target/arm: Move hflags code into the tcg directory target/arm: Wrap arm_rebuild_hflags calls with tcg_enabled target/arm: Move psci.c into the tcg directory ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'target/arm/tcg/vec_internal.h')
| -rw-r--r-- | target/arm/tcg/vec_internal.h | 246 |
1 files changed, 246 insertions, 0 deletions
diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h new file mode 100644 index 0000000000..1f4ed80ff7 --- /dev/null +++ b/target/arm/tcg/vec_internal.h @@ -0,0 +1,246 @@ +/* + * ARM AdvSIMD / SVE Vector Helpers + * + * Copyright (c) 2020 Linaro + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef TARGET_ARM_VEC_INTERNAL_H +#define TARGET_ARM_VEC_INTERNAL_H + +/* + * Note that vector data is stored in host-endian 64-bit chunks, + * so addressing units smaller than that needs a host-endian fixup. + * + * The H<N> macros are used when indexing an array of elements of size N. + * + * The H1_<N> macros are used when performing byte arithmetic and then + * casting the final pointer to a type of size N. + */ +#if HOST_BIG_ENDIAN +#define H1(x) ((x) ^ 7) +#define H1_2(x) ((x) ^ 6) +#define H1_4(x) ((x) ^ 4) +#define H2(x) ((x) ^ 3) +#define H4(x) ((x) ^ 1) +#else +#define H1(x) (x) +#define H1_2(x) (x) +#define H1_4(x) (x) +#define H2(x) (x) +#define H4(x) (x) +#endif +/* + * Access to 64-bit elements isn't host-endian dependent; we provide H8 + * and H1_8 so that when a function is being generated from a macro we + * can pass these rather than an empty macro argument, for clarity. + */ +#define H8(x) (x) +#define H1_8(x) (x) + +/* + * Expand active predicate bits to bytes, for byte elements. + */ +extern const uint64_t expand_pred_b_data[256]; +static inline uint64_t expand_pred_b(uint8_t byte) +{ + return expand_pred_b_data[byte]; +} + +/* Similarly for half-word elements. */ +extern const uint64_t expand_pred_h_data[0x55 + 1]; +static inline uint64_t expand_pred_h(uint8_t byte) +{ + return expand_pred_h_data[byte & 0x55]; +} + +static inline void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz) +{ + uint64_t *d = vd + opr_sz; + uintptr_t i; + + for (i = opr_sz; i < max_sz; i += 8) { + *d++ = 0; + } +} + +static inline int32_t do_sqrshl_bhs(int32_t src, int32_t shift, int bits, + bool round, uint32_t *sat) +{ + if (shift <= -bits) { + /* Rounding the sign bit always produces 0. */ + if (round) { + return 0; + } + return src >> 31; + } else if (shift < 0) { + if (round) { + src >>= -shift - 1; + return (src >> 1) + (src & 1); + } + return src >> -shift; + } else if (shift < bits) { + int32_t val = src << shift; + if (bits == 32) { + if (!sat || val >> shift == src) { + return val; + } + } else { + int32_t extval = sextract32(val, 0, bits); + if (!sat || val == extval) { + return extval; + } + } + } else if (!sat || src == 0) { + return 0; + } + + *sat = 1; + return (1u << (bits - 1)) - (src >= 0); +} + +static inline uint32_t do_uqrshl_bhs(uint32_t src, int32_t shift, int bits, + bool round, uint32_t *sat) +{ + if (shift <= -(bits + round)) { + return 0; + } else if (shift < 0) { + if (round) { + src >>= -shift - 1; + return (src >> 1) + (src & 1); + } + return src >> -shift; + } else if (shift < bits) { + uint32_t val = src << shift; + if (bits == 32) { + if (!sat || val >> shift == src) { + return val; + } + } else { + uint32_t extval = extract32(val, 0, bits); + if (!sat || val == extval) { + return extval; + } + } + } else if (!sat || src == 0) { + return 0; + } + + *sat = 1; + return MAKE_64BIT_MASK(0, bits); +} + +static inline int32_t do_suqrshl_bhs(int32_t src, int32_t shift, int bits, + bool round, uint32_t *sat) +{ + if (sat && src < 0) { + *sat = 1; + return 0; + } + return do_uqrshl_bhs(src, shift, bits, round, sat); +} + +static inline int64_t do_sqrshl_d(int64_t src, int64_t shift, + bool round, uint32_t *sat) +{ + if (shift <= -64) { + /* Rounding the sign bit always produces 0. */ + if (round) { + return 0; + } + return src >> 63; + } else if (shift < 0) { + if (round) { + src >>= -shift - 1; + return (src >> 1) + (src & 1); + } + return src >> -shift; + } else if (shift < 64) { + int64_t val = src << shift; + if (!sat || val >> shift == src) { + return val; + } + } else if (!sat || src == 0) { + return 0; + } + + *sat = 1; + return src < 0 ? INT64_MIN : INT64_MAX; +} + +static inline uint64_t do_uqrshl_d(uint64_t src, int64_t shift, + bool round, uint32_t *sat) +{ + if (shift <= -(64 + round)) { + return 0; + } else if (shift < 0) { + if (round) { + src >>= -shift - 1; + return (src >> 1) + (src & 1); + } + return src >> -shift; + } else if (shift < 64) { + uint64_t val = src << shift; + if (!sat || val >> shift == src) { + return val; + } + } else if (!sat || src == 0) { + return 0; + } + + *sat = 1; + return UINT64_MAX; +} + +static inline int64_t do_suqrshl_d(int64_t src, int64_t shift, + bool round, uint32_t *sat) +{ + if (sat && src < 0) { + *sat = 1; + return 0; + } + return do_uqrshl_d(src, shift, round, sat); +} + +int8_t do_sqrdmlah_b(int8_t, int8_t, int8_t, bool, bool); +int16_t do_sqrdmlah_h(int16_t, int16_t, int16_t, bool, bool, uint32_t *); +int32_t do_sqrdmlah_s(int32_t, int32_t, int32_t, bool, bool, uint32_t *); +int64_t do_sqrdmlah_d(int64_t, int64_t, int64_t, bool, bool); + +/* + * 8 x 8 -> 16 vector polynomial multiply where the inputs are + * in the low 8 bits of each 16-bit element +*/ +uint64_t pmull_h(uint64_t op1, uint64_t op2); +/* + * 16 x 16 -> 32 vector polynomial multiply where the inputs are + * in the low 16 bits of each 32-bit element + */ +uint64_t pmull_w(uint64_t op1, uint64_t op2); + +/** + * bfdotadd: + * @sum: addend + * @e1, @e2: multiplicand vectors + * + * BFloat16 2-way dot product of @e1 & @e2, accumulating with @sum. + * The @e1 and @e2 operands correspond to the 32-bit source vector + * slots and contain two Bfloat16 values each. + * + * Corresponds to the ARM pseudocode function BFDotAdd. + */ +float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2); + +#endif /* TARGET_ARM_VEC_INTERNAL_H */ |