diff options
| author | Peter Maydell <peter.maydell@linaro.org> | 2021-05-25 16:17:06 +0100 |
|---|---|---|
| committer | Peter Maydell <peter.maydell@linaro.org> | 2021-05-25 16:17:06 +0100 |
| commit | 92f8c6fef13b31ba222c4d20ad8afd2b79c4c28e (patch) | |
| tree | e205a3fc166810e1c27b2b5a614a92c6e975d545 /target/arm/neon_helper.c | |
| parent | 0dab1d36f55c3ed649bb8e4c74b9269ef3a63049 (diff) | |
| parent | f8680aaa6e5bfc6022b75157c23db7d2ea98ab11 (diff) | |
| download | focaccia-qemu-92f8c6fef13b31ba222c4d20ad8afd2b79c4c28e.tar.gz focaccia-qemu-92f8c6fef13b31ba222c4d20ad8afd2b79c4c28e.zip | |
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20210525' into staging
target-arm queue: * Implement SVE2 emulation * Implement integer matrix multiply accumulate * Implement FEAT_TLBIOS * Implement FEAT_TLBRANGE * disas/libvixl: Protect C system header for C++ compiler * Use correct SP in M-profile exception return * AN524, AN547: Correct modelling of internal SRAMs * hw/intc/arm_gicv3_cpuif: Fix EOIR write access check logic * hw/arm/smmuv3: Another range invalidation fix # gpg: Signature made Tue 25 May 2021 16:02:25 BST # gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE # gpg: issuer "peter.maydell@linaro.org" # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate] # Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE * remotes/pmaydell/tags/pull-target-arm-20210525: (114 commits) target/arm: Enable SVE2 and related extensions linux-user/aarch64: Enable hwcap bits for sve2 and related extensions target/arm: Implement integer matrix multiply accumulate target/arm: Implement aarch32 VSUDOT, VUSDOT target/arm: Split decode of VSDOT and VUDOT target/arm: Split out do_neon_ddda target/arm: Fix decode for VDOT (indexed) target/arm: Remove unused fpst from VDOT_scalar target/arm: Split out do_neon_ddda_fpst target/arm: Implement aarch64 SUDOT, USDOT target/arm: Implement SVE2 fp multiply-add long target/arm: Move endian adjustment macros to vec_internal.h target/arm: Implement SVE2 bitwise shift immediate target/arm: Implement 128-bit ZIP, UZP, TRN target/arm: Implement SVE2 LD1RO target/arm: Tidy do_ldrq target/arm: Share table of sve load functions target/arm: Implement SVE2 FLOGB target/arm: Implement SVE2 FCVTXNT, FCVTX target/arm: Implement SVE2 FCVTLT ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'target/arm/neon_helper.c')
| -rw-r--r-- | target/arm/neon_helper.c | 519 |
1 files changed, 89 insertions, 430 deletions
diff --git a/target/arm/neon_helper.c b/target/arm/neon_helper.c index b637265691..338b9189d5 100644 --- a/target/arm/neon_helper.c +++ b/target/arm/neon_helper.c @@ -11,6 +11,7 @@ #include "cpu.h" #include "exec/helper-proto.h" #include "fpu/softfloat.h" +#include "vec_internal.h" #define SIGNBIT (uint32_t)0x80000000 #define SIGNBIT64 ((uint64_t)1 << 63) @@ -576,496 +577,154 @@ NEON_POP(pmax_s16, neon_s16, 2) NEON_POP(pmax_u16, neon_u16, 2) #undef NEON_FN -#define NEON_FN(dest, src1, src2) do { \ - int8_t tmp; \ - tmp = (int8_t)src2; \ - if (tmp >= (ssize_t)sizeof(src1) * 8 || \ - tmp <= -(ssize_t)sizeof(src1) * 8) { \ - dest = 0; \ - } else if (tmp < 0) { \ - dest = src1 >> -tmp; \ - } else { \ - dest = src1 << tmp; \ - }} while (0) +#define NEON_FN(dest, src1, src2) \ + (dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, false, NULL)) NEON_VOP(shl_u16, neon_u16, 2) #undef NEON_FN -#define NEON_FN(dest, src1, src2) do { \ - int8_t tmp; \ - tmp = (int8_t)src2; \ - if (tmp >= (ssize_t)sizeof(src1) * 8) { \ - dest = 0; \ - } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ - dest = src1 >> (sizeof(src1) * 8 - 1); \ - } else if (tmp < 0) { \ - dest = src1 >> -tmp; \ - } else { \ - dest = src1 << tmp; \ - }} while (0) +#define NEON_FN(dest, src1, src2) \ + (dest = do_sqrshl_bhs(src1, (int8_t)src2, 16, false, NULL)) NEON_VOP(shl_s16, neon_s16, 2) #undef NEON_FN -#define NEON_FN(dest, src1, src2) do { \ - int8_t tmp; \ - tmp = (int8_t)src2; \ - if ((tmp >= (ssize_t)sizeof(src1) * 8) \ - || (tmp <= -(ssize_t)sizeof(src1) * 8)) { \ - dest = 0; \ - } else if (tmp < 0) { \ - dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ - } else { \ - dest = src1 << tmp; \ - }} while (0) +#define NEON_FN(dest, src1, src2) \ + (dest = do_sqrshl_bhs(src1, (int8_t)src2, 8, true, NULL)) NEON_VOP(rshl_s8, neon_s8, 4) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_sqrshl_bhs(src1, (int8_t)src2, 16, true, NULL)) NEON_VOP(rshl_s16, neon_s16, 2) #undef NEON_FN -/* The addition of the rounding constant may overflow, so we use an - * intermediate 64 bit accumulator. */ -uint32_t HELPER(neon_rshl_s32)(uint32_t valop, uint32_t shiftop) +uint32_t HELPER(neon_rshl_s32)(uint32_t val, uint32_t shift) { - int32_t dest; - int32_t val = (int32_t)valop; - int8_t shift = (int8_t)shiftop; - if ((shift >= 32) || (shift <= -32)) { - dest = 0; - } else if (shift < 0) { - int64_t big_dest = ((int64_t)val + (1 << (-1 - shift))); - dest = big_dest >> -shift; - } else { - dest = val << shift; - } - return dest; + return do_sqrshl_bhs(val, (int8_t)shift, 32, true, NULL); } -/* Handling addition overflow with 64 bit input values is more - * tricky than with 32 bit values. */ -uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) -{ - int8_t shift = (int8_t)shiftop; - int64_t val = valop; - if ((shift >= 64) || (shift <= -64)) { - val = 0; - } else if (shift < 0) { - val >>= (-shift - 1); - if (val == INT64_MAX) { - /* In this case, it means that the rounding constant is 1, - * and the addition would overflow. Return the actual - * result directly. */ - val = 0x4000000000000000LL; - } else { - val++; - val >>= 1; - } - } else { - val <<= shift; - } - return val; +uint64_t HELPER(neon_rshl_s64)(uint64_t val, uint64_t shift) +{ + return do_sqrshl_d(val, (int8_t)shift, true, NULL); } -#define NEON_FN(dest, src1, src2) do { \ - int8_t tmp; \ - tmp = (int8_t)src2; \ - if (tmp >= (ssize_t)sizeof(src1) * 8 || \ - tmp < -(ssize_t)sizeof(src1) * 8) { \ - dest = 0; \ - } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \ - dest = src1 >> (-tmp - 1); \ - } else if (tmp < 0) { \ - dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ - } else { \ - dest = src1 << tmp; \ - }} while (0) +#define NEON_FN(dest, src1, src2) \ + (dest = do_uqrshl_bhs(src1, (int8_t)src2, 8, true, NULL)) NEON_VOP(rshl_u8, neon_u8, 4) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, true, NULL)) NEON_VOP(rshl_u16, neon_u16, 2) #undef NEON_FN -/* The addition of the rounding constant may overflow, so we use an - * intermediate 64 bit accumulator. */ -uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shiftop) +uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shift) { - uint32_t dest; - int8_t shift = (int8_t)shiftop; - if (shift >= 32 || shift < -32) { - dest = 0; - } else if (shift == -32) { - dest = val >> 31; - } else if (shift < 0) { - uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift))); - dest = big_dest >> -shift; - } else { - dest = val << shift; - } - return dest; + return do_uqrshl_bhs(val, (int8_t)shift, 32, true, NULL); } -/* Handling addition overflow with 64 bit input values is more - * tricky than with 32 bit values. */ -uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop) -{ - int8_t shift = (uint8_t)shiftop; - if (shift >= 64 || shift < -64) { - val = 0; - } else if (shift == -64) { - /* Rounding a 1-bit result just preserves that bit. */ - val >>= 63; - } else if (shift < 0) { - val >>= (-shift - 1); - if (val == UINT64_MAX) { - /* In this case, it means that the rounding constant is 1, - * and the addition would overflow. Return the actual - * result directly. */ - val = 0x8000000000000000ULL; - } else { - val++; - val >>= 1; - } - } else { - val <<= shift; - } - return val; +uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shift) +{ + return do_uqrshl_d(val, (int8_t)shift, true, NULL); } -#define NEON_FN(dest, src1, src2) do { \ - int8_t tmp; \ - tmp = (int8_t)src2; \ - if (tmp >= (ssize_t)sizeof(src1) * 8) { \ - if (src1) { \ - SET_QC(); \ - dest = ~0; \ - } else { \ - dest = 0; \ - } \ - } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ - dest = 0; \ - } else if (tmp < 0) { \ - dest = src1 >> -tmp; \ - } else { \ - dest = src1 << tmp; \ - if ((dest >> tmp) != src1) { \ - SET_QC(); \ - dest = ~0; \ - } \ - }} while (0) +#define NEON_FN(dest, src1, src2) \ + (dest = do_uqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc)) NEON_VOP_ENV(qshl_u8, neon_u8, 4) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc)) NEON_VOP_ENV(qshl_u16, neon_u16, 2) -NEON_VOP_ENV(qshl_u32, neon_u32, 1) #undef NEON_FN -uint64_t HELPER(neon_qshl_u64)(CPUARMState *env, uint64_t val, uint64_t shiftop) -{ - int8_t shift = (int8_t)shiftop; - if (shift >= 64) { - if (val) { - val = ~(uint64_t)0; - SET_QC(); - } - } else if (shift <= -64) { - val = 0; - } else if (shift < 0) { - val >>= -shift; - } else { - uint64_t tmp = val; - val <<= shift; - if ((val >> shift) != tmp) { - SET_QC(); - val = ~(uint64_t)0; - } - } - return val; +uint32_t HELPER(neon_qshl_u32)(CPUARMState *env, uint32_t val, uint32_t shift) +{ + return do_uqrshl_bhs(val, (int8_t)shift, 32, false, env->vfp.qc); } -#define NEON_FN(dest, src1, src2) do { \ - int8_t tmp; \ - tmp = (int8_t)src2; \ - if (tmp >= (ssize_t)sizeof(src1) * 8) { \ - if (src1) { \ - SET_QC(); \ - dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \ - if (src1 > 0) { \ - dest--; \ - } \ - } else { \ - dest = src1; \ - } \ - } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ - dest = src1 >> 31; \ - } else if (tmp < 0) { \ - dest = src1 >> -tmp; \ - } else { \ - dest = src1 << tmp; \ - if ((dest >> tmp) != src1) { \ - SET_QC(); \ - dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \ - if (src1 > 0) { \ - dest--; \ - } \ - } \ - }} while (0) +uint64_t HELPER(neon_qshl_u64)(CPUARMState *env, uint64_t val, uint64_t shift) +{ + return do_uqrshl_d(val, (int8_t)shift, false, env->vfp.qc); +} + +#define NEON_FN(dest, src1, src2) \ + (dest = do_sqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc)) NEON_VOP_ENV(qshl_s8, neon_s8, 4) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_sqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc)) NEON_VOP_ENV(qshl_s16, neon_s16, 2) -NEON_VOP_ENV(qshl_s32, neon_s32, 1) #undef NEON_FN -uint64_t HELPER(neon_qshl_s64)(CPUARMState *env, uint64_t valop, uint64_t shiftop) -{ - int8_t shift = (uint8_t)shiftop; - int64_t val = valop; - if (shift >= 64) { - if (val) { - SET_QC(); - val = (val >> 63) ^ ~SIGNBIT64; - } - } else if (shift <= -64) { - val >>= 63; - } else if (shift < 0) { - val >>= -shift; - } else { - int64_t tmp = val; - val <<= shift; - if ((val >> shift) != tmp) { - SET_QC(); - val = (tmp >> 63) ^ ~SIGNBIT64; - } - } - return val; +uint32_t HELPER(neon_qshl_s32)(CPUARMState *env, uint32_t val, uint32_t shift) +{ + return do_sqrshl_bhs(val, (int8_t)shift, 32, false, env->vfp.qc); } -#define NEON_FN(dest, src1, src2) do { \ - if (src1 & (1 << (sizeof(src1) * 8 - 1))) { \ - SET_QC(); \ - dest = 0; \ - } else { \ - int8_t tmp; \ - tmp = (int8_t)src2; \ - if (tmp >= (ssize_t)sizeof(src1) * 8) { \ - if (src1) { \ - SET_QC(); \ - dest = ~0; \ - } else { \ - dest = 0; \ - } \ - } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ - dest = 0; \ - } else if (tmp < 0) { \ - dest = src1 >> -tmp; \ - } else { \ - dest = src1 << tmp; \ - if ((dest >> tmp) != src1) { \ - SET_QC(); \ - dest = ~0; \ - } \ - } \ - }} while (0) -NEON_VOP_ENV(qshlu_s8, neon_u8, 4) -NEON_VOP_ENV(qshlu_s16, neon_u16, 2) +uint64_t HELPER(neon_qshl_s64)(CPUARMState *env, uint64_t val, uint64_t shift) +{ + return do_sqrshl_d(val, (int8_t)shift, false, env->vfp.qc); +} + +#define NEON_FN(dest, src1, src2) \ + (dest = do_suqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc)) +NEON_VOP_ENV(qshlu_s8, neon_s8, 4) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_suqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc)) +NEON_VOP_ENV(qshlu_s16, neon_s16, 2) #undef NEON_FN -uint32_t HELPER(neon_qshlu_s32)(CPUARMState *env, uint32_t valop, uint32_t shiftop) +uint32_t HELPER(neon_qshlu_s32)(CPUARMState *env, uint32_t val, uint32_t shift) { - if ((int32_t)valop < 0) { - SET_QC(); - return 0; - } - return helper_neon_qshl_u32(env, valop, shiftop); + return do_suqrshl_bhs(val, (int8_t)shift, 32, false, env->vfp.qc); } -uint64_t HELPER(neon_qshlu_s64)(CPUARMState *env, uint64_t valop, uint64_t shiftop) +uint64_t HELPER(neon_qshlu_s64)(CPUARMState *env, uint64_t val, uint64_t shift) { - if ((int64_t)valop < 0) { - SET_QC(); - return 0; - } - return helper_neon_qshl_u64(env, valop, shiftop); + return do_suqrshl_d(val, (int8_t)shift, false, env->vfp.qc); } -#define NEON_FN(dest, src1, src2) do { \ - int8_t tmp; \ - tmp = (int8_t)src2; \ - if (tmp >= (ssize_t)sizeof(src1) * 8) { \ - if (src1) { \ - SET_QC(); \ - dest = ~0; \ - } else { \ - dest = 0; \ - } \ - } else if (tmp < -(ssize_t)sizeof(src1) * 8) { \ - dest = 0; \ - } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \ - dest = src1 >> (sizeof(src1) * 8 - 1); \ - } else if (tmp < 0) { \ - dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ - } else { \ - dest = src1 << tmp; \ - if ((dest >> tmp) != src1) { \ - SET_QC(); \ - dest = ~0; \ - } \ - }} while (0) +#define NEON_FN(dest, src1, src2) \ + (dest = do_uqrshl_bhs(src1, (int8_t)src2, 8, true, env->vfp.qc)) NEON_VOP_ENV(qrshl_u8, neon_u8, 4) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, true, env->vfp.qc)) NEON_VOP_ENV(qrshl_u16, neon_u16, 2) #undef NEON_FN -/* The addition of the rounding constant may overflow, so we use an - * intermediate 64 bit accumulator. */ -uint32_t HELPER(neon_qrshl_u32)(CPUARMState *env, uint32_t val, uint32_t shiftop) +uint32_t HELPER(neon_qrshl_u32)(CPUARMState *env, uint32_t val, uint32_t shift) { - uint32_t dest; - int8_t shift = (int8_t)shiftop; - if (shift >= 32) { - if (val) { - SET_QC(); - dest = ~0; - } else { - dest = 0; - } - } else if (shift < -32) { - dest = 0; - } else if (shift == -32) { - dest = val >> 31; - } else if (shift < 0) { - uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift))); - dest = big_dest >> -shift; - } else { - dest = val << shift; - if ((dest >> shift) != val) { - SET_QC(); - dest = ~0; - } - } - return dest; + return do_uqrshl_bhs(val, (int8_t)shift, 32, true, env->vfp.qc); } -/* Handling addition overflow with 64 bit input values is more - * tricky than with 32 bit values. */ -uint64_t HELPER(neon_qrshl_u64)(CPUARMState *env, uint64_t val, uint64_t shiftop) -{ - int8_t shift = (int8_t)shiftop; - if (shift >= 64) { - if (val) { - SET_QC(); - val = ~0; - } - } else if (shift < -64) { - val = 0; - } else if (shift == -64) { - val >>= 63; - } else if (shift < 0) { - val >>= (-shift - 1); - if (val == UINT64_MAX) { - /* In this case, it means that the rounding constant is 1, - * and the addition would overflow. Return the actual - * result directly. */ - val = 0x8000000000000000ULL; - } else { - val++; - val >>= 1; - } - } else { \ - uint64_t tmp = val; - val <<= shift; - if ((val >> shift) != tmp) { - SET_QC(); - val = ~0; - } - } - return val; +uint64_t HELPER(neon_qrshl_u64)(CPUARMState *env, uint64_t val, uint64_t shift) +{ + return do_uqrshl_d(val, (int8_t)shift, true, env->vfp.qc); } -#define NEON_FN(dest, src1, src2) do { \ - int8_t tmp; \ - tmp = (int8_t)src2; \ - if (tmp >= (ssize_t)sizeof(src1) * 8) { \ - if (src1) { \ - SET_QC(); \ - dest = (typeof(dest))(1 << (sizeof(src1) * 8 - 1)); \ - if (src1 > 0) { \ - dest--; \ - } \ - } else { \ - dest = 0; \ - } \ - } else if (tmp <= -(ssize_t)sizeof(src1) * 8) { \ - dest = 0; \ - } else if (tmp < 0) { \ - dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ - } else { \ - dest = src1 << tmp; \ - if ((dest >> tmp) != src1) { \ - SET_QC(); \ - dest = (uint32_t)(1 << (sizeof(src1) * 8 - 1)); \ - if (src1 > 0) { \ - dest--; \ - } \ - } \ - }} while (0) +#define NEON_FN(dest, src1, src2) \ + (dest = do_sqrshl_bhs(src1, (int8_t)src2, 8, true, env->vfp.qc)) NEON_VOP_ENV(qrshl_s8, neon_s8, 4) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_sqrshl_bhs(src1, (int8_t)src2, 16, true, env->vfp.qc)) NEON_VOP_ENV(qrshl_s16, neon_s16, 2) #undef NEON_FN -/* The addition of the rounding constant may overflow, so we use an - * intermediate 64 bit accumulator. */ -uint32_t HELPER(neon_qrshl_s32)(CPUARMState *env, uint32_t valop, uint32_t shiftop) +uint32_t HELPER(neon_qrshl_s32)(CPUARMState *env, uint32_t val, uint32_t shift) { - int32_t dest; - int32_t val = (int32_t)valop; - int8_t shift = (int8_t)shiftop; - if (shift >= 32) { - if (val) { - SET_QC(); - dest = (val >> 31) ^ ~SIGNBIT; - } else { - dest = 0; - } - } else if (shift <= -32) { - dest = 0; - } else if (shift < 0) { - int64_t big_dest = ((int64_t)val + (1 << (-1 - shift))); - dest = big_dest >> -shift; - } else { - dest = val << shift; - if ((dest >> shift) != val) { - SET_QC(); - dest = (val >> 31) ^ ~SIGNBIT; - } - } - return dest; + return do_sqrshl_bhs(val, (int8_t)shift, 32, true, env->vfp.qc); } -/* Handling addition overflow with 64 bit input values is more - * tricky than with 32 bit values. */ -uint64_t HELPER(neon_qrshl_s64)(CPUARMState *env, uint64_t valop, uint64_t shiftop) -{ - int8_t shift = (uint8_t)shiftop; - int64_t val = valop; - - if (shift >= 64) { - if (val) { - SET_QC(); - val = (val >> 63) ^ ~SIGNBIT64; - } - } else if (shift <= -64) { - val = 0; - } else if (shift < 0) { - val >>= (-shift - 1); - if (val == INT64_MAX) { - /* In this case, it means that the rounding constant is 1, - * and the addition would overflow. Return the actual - * result directly. */ - val = 0x4000000000000000ULL; - } else { - val++; - val >>= 1; - } - } else { - int64_t tmp = val; - val <<= shift; - if ((val >> shift) != tmp) { - SET_QC(); - val = (tmp >> 63) ^ ~SIGNBIT64; - } - } - return val; +uint64_t HELPER(neon_qrshl_s64)(CPUARMState *env, uint64_t val, uint64_t shift) +{ + return do_sqrshl_d(val, (int8_t)shift, true, env->vfp.qc); } uint32_t HELPER(neon_add_u8)(uint32_t a, uint32_t b) |