diff options
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/docker/Makefile.include | 39 | ||||
| -rwxr-xr-x | tests/docker/dockerfiles/debian-microblaze-cross.d/build-toolchain.sh | 88 | ||||
| -rwxr-xr-x | tests/docker/dockerfiles/debian-nios2-cross.d/build-toolchain.sh | 87 | ||||
| -rw-r--r-- | tests/docker/dockerfiles/debian-toolchain.docker | 36 | ||||
| -rw-r--r-- | tests/plugin/insn.c | 37 | ||||
| -rwxr-xr-x | tests/tcg/configure.sh | 26 | ||||
| -rw-r--r-- | tests/tcg/hexagon/hvx_histogram.c | 88 | ||||
| -rw-r--r-- | tests/tcg/hexagon/hvx_histogram_input.h | 717 | ||||
| -rw-r--r-- | tests/tcg/hexagon/hvx_histogram_row.S | 294 | ||||
| -rw-r--r-- | tests/tcg/hexagon/hvx_histogram_row.h | 24 | ||||
| -rw-r--r-- | tests/tcg/hexagon/hvx_misc.c | 469 | ||||
| -rw-r--r-- | tests/tcg/hexagon/scatter_gather.c | 1011 | ||||
| -rw-r--r-- | tests/tcg/hexagon/vector_add_int.c | 61 | ||||
| -rw-r--r-- | tests/tcg/multiarch/Makefile.target | 13 | ||||
| -rw-r--r-- | tests/tcg/multiarch/gdbstub/test-thread-breakpoint.py | 60 | ||||
| -rw-r--r-- | tests/tcg/nios2/Makefile.target | 11 | ||||
| -rw-r--r-- | tests/tcg/sh4/Makefile.target | 6 | ||||
| -rwxr-xr-x | tests/vm/openbsd | 7 |
18 files changed, 3054 insertions, 20 deletions
diff --git a/tests/docker/Makefile.include b/tests/docker/Makefile.include index b9d4094c2e..7a63a3b7f7 100644 --- a/tests/docker/Makefile.include +++ b/tests/docker/Makefile.include @@ -11,8 +11,10 @@ HOST_ARCH = $(if $(ARCH),$(ARCH),$(shell uname -m)) DOCKER_SUFFIX := .docker DOCKER_FILES_DIR := $(SRC_PATH)/tests/docker/dockerfiles # we don't run tests on intermediate images (used as base by another image) -DOCKER_PARTIAL_IMAGES := debian10 debian11 debian-bootstrap empty -DOCKER_IMAGES := $(sort $(notdir $(basename $(wildcard $(DOCKER_FILES_DIR)/*.docker)))) +DOCKER_PARTIAL_IMAGES := debian10 debian11 +# we don't directly build virtual images (they are used to build other images) +DOCKER_VIRTUAL_IMAGES := debian-bootstrap debian-toolchain empty +DOCKER_IMAGES := $(sort $(filter-out $(DOCKER_VIRTUAL_IMAGES), $(notdir $(basename $(wildcard $(DOCKER_FILES_DIR)/*.docker))))) DOCKER_TARGETS := $(patsubst %,docker-image-%,$(DOCKER_IMAGES)) # Use a global constant ccache directory to speed up repetitive builds DOCKER_CCACHE_DIR := $$HOME/.cache/qemu-docker-ccache @@ -171,10 +173,39 @@ docker-image-debian-hexagon-cross: $(DOCKER_FILES_DIR)/debian-hexagon-cross.dock qemu/debian-hexagon-cross --add-current-user, \ "PREPARE", "debian-hexagon-cross")) +debian-toolchain-run = \ + $(if $(NOCACHE), \ + $(call quiet-command, \ + $(DOCKER_SCRIPT) build -t qemu/$1 -f $< \ + $(if $V,,--quiet) --no-cache \ + --registry $(DOCKER_REGISTRY) --extra-files \ + $(DOCKER_FILES_DIR)/$1.d/build-toolchain.sh, \ + "BUILD", $1), \ + $(call quiet-command, \ + $(DOCKER_SCRIPT) fetch $(if $V,,--quiet) \ + qemu/$1 $(DOCKER_REGISTRY), \ + "FETCH", $1) \ + $(call quiet-command, \ + $(DOCKER_SCRIPT) update $(if $V,,--quiet) \ + qemu/$1 \ + $(if $(NOUSER),,--add-current-user) \ + "PREPARE", $1)) +debian-toolchain = $(call debian-toolchain-run,$(patsubst docker-image-%,%,$1)) + +docker-image-debian-microblaze-cross: $(DOCKER_FILES_DIR)/debian-toolchain.docker \ + $(DOCKER_FILES_DIR)/debian-microblaze-cross.d/build-toolchain.sh + $(call debian-toolchain, $@) + +docker-image-debian-nios2-cross: $(DOCKER_FILES_DIR)/debian-toolchain.docker \ + $(DOCKER_FILES_DIR)/debian-nios2-cross.d/build-toolchain.sh + $(call debian-toolchain, $@) + # Specialist build images, sometimes very limited tools docker-image-debian-tricore-cross: docker-image-debian10 docker-image-debian-all-test-cross: docker-image-debian10 docker-image-debian-arm64-test-cross: docker-image-debian11 +docker-image-debian-microblaze-cross: docker-image-debian10 +docker-image-debian-nios2-cross: docker-image-debian10 docker-image-debian-powerpc-test-cross: docker-image-debian11 # These images may be good enough for building tests but not for test builds @@ -183,6 +214,8 @@ DOCKER_PARTIAL_IMAGES += debian-arm64-test-cross DOCKER_PARTIAL_IMAGES += debian-powerpc-test-cross DOCKER_PARTIAL_IMAGES += debian-hppa-cross DOCKER_PARTIAL_IMAGES += debian-m68k-cross debian-mips64-cross +DOCKER_PARTIAL_IMAGES += debian-microblaze-cross +DOCKER_PARTIAL_IMAGES += debian-nios2-cross DOCKER_PARTIAL_IMAGES += debian-sh4-cross debian-sparc64-cross DOCKER_PARTIAL_IMAGES += debian-tricore-cross DOCKER_PARTIAL_IMAGES += debian-xtensa-cross @@ -195,7 +228,7 @@ DOCKER_PARTIAL_IMAGES += fedora-cris-cross # packages. # Expand all the pre-requistes for each docker image and test combination -$(foreach i,$(filter-out $(DOCKER_PARTIAL_IMAGES),$(DOCKER_IMAGES)), \ +$(foreach i,$(filter-out $(DOCKER_PARTIAL_IMAGES) $(DOCKER_VIRTUAL_IMAGES),$(DOCKER_IMAGES)), \ $(foreach t,$(DOCKER_TESTS), \ $(eval .PHONY: docker-$t@$i) \ $(eval docker-$t@$i: docker-image-$i docker-run-$t@$i) \ diff --git a/tests/docker/dockerfiles/debian-microblaze-cross.d/build-toolchain.sh b/tests/docker/dockerfiles/debian-microblaze-cross.d/build-toolchain.sh new file mode 100755 index 0000000000..23ec0aa9a7 --- /dev/null +++ b/tests/docker/dockerfiles/debian-microblaze-cross.d/build-toolchain.sh @@ -0,0 +1,88 @@ +#!/bin/bash + +set -e + +TARGET=microblaze-linux-musl +LINUX_ARCH=microblaze + +J=$(expr $(nproc) / 2) +TOOLCHAIN_INSTALL=/usr/local +TOOLCHAIN_BIN=${TOOLCHAIN_INSTALL}/bin +CROSS_SYSROOT=${TOOLCHAIN_INSTALL}/$TARGET/sys-root + +export PATH=${TOOLCHAIN_BIN}:$PATH + +# +# Grab all of the source for the toolchain bootstrap. +# + +wget https://ftp.gnu.org/gnu/binutils/binutils-2.37.tar.xz +wget https://ftp.gnu.org/gnu/gcc/gcc-11.2.0/gcc-11.2.0.tar.xz +wget https://www.musl-libc.org/releases/musl-1.2.2.tar.gz +wget https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.70.tar.xz + +tar axf binutils-2.37.tar.xz +tar axf gcc-11.2.0.tar.xz +tar axf musl-1.2.2.tar.gz +tar axf linux-5.10.70.tar.xz + +mv binutils-2.37 src-binu +mv gcc-11.2.0 src-gcc +mv musl-1.2.2 src-musl +mv linux-5.10.70 src-linux + +mkdir -p bld-hdr bld-binu bld-gcc bld-musl +mkdir -p ${CROSS_SYSROOT}/usr/include + +# +# Install kernel headers +# + +cd src-linux +make headers_install ARCH=${LINUX_ARCH} INSTALL_HDR_PATH=${CROSS_SYSROOT}/usr +cd .. + +# +# Build binutils +# + +cd bld-binu +../src-binu/configure --disable-werror \ + --prefix=${TOOLCHAIN_INSTALL} --with-sysroot --target=${TARGET} +make -j${J} +make install +cd .. + +# +# Build gcc, just the compiler so far. +# + +cd bld-gcc +../src-gcc/configure --disable-werror --disable-shared \ + --prefix=${TOOLCHAIN_INSTALL} --with-sysroot --target=${TARGET} \ + --enable-languages=c --disable-libssp --disable-libsanitizer \ + --disable-libatomic --disable-libgomp --disable-libquadmath +make -j${J} all-gcc +make install-gcc +cd .. + +# +# Build musl. +# We won't go through the extra step of building shared libraries +# because we don't actually use them in QEMU docker testing. +# + +cd bld-musl +../src-musl/configure --prefix=/usr --host=${TARGET} --disable-shared +make -j${j} +make install DESTDIR=${CROSS_SYSROOT} +cd .. + +# +# Go back and build the compiler runtime +# + +cd bld-gcc +make -j${j} +make install +cd .. diff --git a/tests/docker/dockerfiles/debian-nios2-cross.d/build-toolchain.sh b/tests/docker/dockerfiles/debian-nios2-cross.d/build-toolchain.sh new file mode 100755 index 0000000000..ba3c9d8aff --- /dev/null +++ b/tests/docker/dockerfiles/debian-nios2-cross.d/build-toolchain.sh @@ -0,0 +1,87 @@ +#!/bin/bash + +set -e + +TARGET=nios2-linux-gnu +LINUX_ARCH=nios2 + +J=$(expr $(nproc) / 2) +TOOLCHAIN_INSTALL=/usr/local +TOOLCHAIN_BIN=${TOOLCHAIN_INSTALL}/bin +CROSS_SYSROOT=${TOOLCHAIN_INSTALL}/$TARGET/sys-root + +export PATH=${TOOLCHAIN_BIN}:$PATH + +# +# Grab all of the source for the toolchain bootstrap. +# + +wget https://ftp.gnu.org/gnu/binutils/binutils-2.37.tar.xz +wget https://ftp.gnu.org/gnu/gcc/gcc-11.2.0/gcc-11.2.0.tar.xz +wget https://ftp.gnu.org/gnu/glibc/glibc-2.34.tar.xz +wget https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.70.tar.xz + +tar axf binutils-2.37.tar.xz +tar axf gcc-11.2.0.tar.xz +tar axf glibc-2.34.tar.xz +tar axf linux-5.10.70.tar.xz + +mv binutils-2.37 src-binu +mv gcc-11.2.0 src-gcc +mv glibc-2.34 src-glibc +mv linux-5.10.70 src-linux + +mkdir -p bld-hdr bld-binu bld-gcc bld-glibc +mkdir -p ${CROSS_SYSROOT}/usr/include + +# +# Install kernel and glibc headers +# + +cd src-linux +make headers_install ARCH=${LINUX_ARCH} INSTALL_HDR_PATH=${CROSS_SYSROOT}/usr +cd .. + +cd bld-hdr +../src-glibc/configure --prefix=/usr --host=${TARGET} +make install-headers DESTDIR=${CROSS_SYSROOT} +touch ${CROSS_SYSROOT}/usr/include/gnu/stubs.h +cd .. + +# +# Build binutils +# + +cd bld-binu +../src-binu/configure --disable-werror \ + --prefix=${TOOLCHAIN_INSTALL} --with-sysroot --target=${TARGET} +make -j${J} +make install +cd .. + +# +# Build gcc, without shared libraries, because we do not yet +# have a shared libc against which to link. +# + +cd bld-gcc +../src-gcc/configure --disable-werror --disable-shared \ + --prefix=${TOOLCHAIN_INSTALL} --with-sysroot --target=${TARGET} \ + --enable-languages=c --disable-libssp --disable-libsanitizer \ + --disable-libatomic --disable-libgomp --disable-libquadmath +make -j${J} +make install +cd .. + +# +# Build glibc +# There are a few random things that use c++ but we didn't build that +# cross-compiler. We can get away without them. Disable CXX so that +# glibc doesn't try to use the host c++ compiler. +# + +cd bld-glibc +CXX=false ../src-glibc/configure --prefix=/usr --host=${TARGET} +make -j${j} +make install DESTDIR=${CROSS_SYSROOT} +cd .. diff --git a/tests/docker/dockerfiles/debian-toolchain.docker b/tests/docker/dockerfiles/debian-toolchain.docker new file mode 100644 index 0000000000..738d808aa6 --- /dev/null +++ b/tests/docker/dockerfiles/debian-toolchain.docker @@ -0,0 +1,36 @@ +# +# Docker toolchain cross-compiler +# +# This dockerfile is used for building a cross-compiler toolchain. +# The script for building the toolchain is supplied via extra-files. +# +FROM qemu/debian10 + +# Install build utilities for building gcc and glibc. +# ??? The build-dep isn't working, missing a number of +# minimal build dependiencies, e.g. libmpc. + +RUN apt update && \ + DEBIAN_FRONTEND=noninteractive apt install -yy eatmydata && \ + DEBIAN_FRONTEND=noninteractive eatmydata \ + apt install -y --no-install-recommends \ + bison \ + flex \ + gawk \ + libmpc-dev \ + libmpfr-dev \ + rsync \ + texinfo \ + wget && \ + DEBIAN_FRONTEND=noninteractive eatmydata \ + apt build-dep -yy --arch-only gcc glibc + +ADD build-toolchain.sh /root/build-toolchain.sh + +RUN cd /root && ./build-toolchain.sh + +# Throw away the extra toolchain build deps, the downloaded source, +# and the build trees by restoring the original debian10 image, +# then copying the built toolchain from stage 0. +FROM qemu/debian10 +COPY --from=0 /usr/local /usr/local diff --git a/tests/plugin/insn.c b/tests/plugin/insn.c index 0f6a1938c1..d229fdc001 100644 --- a/tests/plugin/insn.c +++ b/tests/plugin/insn.c @@ -18,6 +18,8 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; static uint64_t insn_count; static bool do_inline; +static bool do_size; +static GArray *sizes; static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata) { @@ -49,13 +51,35 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) insn, vcpu_insn_exec_before, QEMU_PLUGIN_CB_NO_REGS, GUINT_TO_POINTER(vaddr)); } + + if (do_size) { + size_t sz = qemu_plugin_insn_size(insn); + if (sz > sizes->len) { + g_array_set_size(sizes, sz); + } + unsigned long *cnt = &g_array_index(sizes, unsigned long, sz); + (*cnt)++; + } } } static void plugin_exit(qemu_plugin_id_t id, void *p) { - g_autofree gchar *out = g_strdup_printf("insns: %" PRIu64 "\n", insn_count); - qemu_plugin_outs(out); + g_autoptr(GString) out = g_string_new(NULL); + + if (do_size) { + int i; + for (i = 0; i <= sizes->len; i++) { + unsigned long *cnt = &g_array_index(sizes, unsigned long, i); + if (*cnt) { + g_string_append_printf(out, + "len %d bytes: %ld insns\n", i, *cnt); + } + } + } else { + g_string_append_printf(out, "insns: %" PRIu64 "\n", insn_count); + } + qemu_plugin_outs(out->str); } QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, @@ -70,12 +94,21 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, fprintf(stderr, "boolean argument parsing failed: %s\n", opt); return -1; } + } else if (g_strcmp0(tokens[0], "sizes") == 0) { + if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &do_size)) { + fprintf(stderr, "boolean argument parsing failed: %s\n", opt); + return -1; + } } else { fprintf(stderr, "option parsing failed: %s\n", opt); return -1; } } + if (do_size) { + sizes = g_array_new(true, true, sizeof(unsigned long)); + } + qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); qemu_plugin_register_atexit_cb(id, plugin_exit, NULL); return 0; diff --git a/tests/tcg/configure.sh b/tests/tcg/configure.sh index 1f985ccfc0..9b76f58258 100755 --- a/tests/tcg/configure.sh +++ b/tests/tcg/configure.sh @@ -46,7 +46,7 @@ fi : ${cross_cc_aarch64="aarch64-linux-gnu-gcc"} : ${cross_cc_aarch64_be="$cross_cc_aarch64"} : ${cross_cc_cflags_aarch64_be="-mbig-endian"} -: $(cross_cc_alpha="alpha-linux-gnu-gcc") +: ${cross_cc_alpha="alpha-linux-gnu-gcc"} : ${cross_cc_arm="arm-linux-gnueabihf-gcc"} : ${cross_cc_cflags_armeb="-mbig-endian"} : ${cross_cc_hexagon="hexagon-unknown-linux-musl-clang"} @@ -55,17 +55,19 @@ fi : ${cross_cc_i386="i686-linux-gnu-gcc"} : ${cross_cc_cflags_i386="-m32"} : ${cross_cc_m68k="m68k-linux-gnu-gcc"} -: $(cross_cc_mips64el="mips64el-linux-gnuabi64-gcc") -: $(cross_cc_mips64="mips64-linux-gnuabi64-gcc") -: $(cross_cc_mipsel="mipsel-linux-gnu-gcc") -: $(cross_cc_mips="mips-linux-gnu-gcc") +: ${cross_cc_microblaze="microblaze-linux-musl-gcc"} +: ${cross_cc_mips64el="mips64el-linux-gnuabi64-gcc"} +: ${cross_cc_mips64="mips64-linux-gnuabi64-gcc"} +: ${cross_cc_mipsel="mipsel-linux-gnu-gcc"} +: ${cross_cc_mips="mips-linux-gnu-gcc"} +: ${cross_cc_nios2="nios2-linux-gnu-gcc"} : ${cross_cc_ppc="powerpc-linux-gnu-gcc"} : ${cross_cc_cflags_ppc="-m32"} : ${cross_cc_ppc64="powerpc64-linux-gnu-gcc"} : ${cross_cc_ppc64le="powerpc64le-linux-gnu-gcc"} -: $(cross_cc_riscv64="riscv64-linux-gnu-gcc") +: ${cross_cc_riscv64="riscv64-linux-gnu-gcc"} : ${cross_cc_s390x="s390x-linux-gnu-gcc"} -: $(cross_cc_sh4="sh4-linux-gnu-gcc") +: ${cross_cc_sh4="sh4-linux-gnu-gcc"} : ${cross_cc_cflags_sparc="-m32 -mv8plus -mcpu=ultrasparc"} : ${cross_cc_sparc64="sparc64-linux-gnu-gcc"} : ${cross_cc_cflags_sparc64="-m64 -mcpu=ultrasparc"} @@ -133,6 +135,11 @@ for target in $target_list; do container_image=debian-m68k-cross container_cross_cc=m68k-linux-gnu-gcc ;; + microblaze-*) + container_hosts=x86_64 + container_image=debian-microblaze-cross + container_cross_cc=microblaze-linux-musl-gcc + ;; mips64el-*) container_hosts=x86_64 container_image=debian-mips64el-cross @@ -153,6 +160,11 @@ for target in $target_list; do container_image=debian-mips-cross container_cross_cc=mips-linux-gnu-gcc ;; + nios2-*) + container_hosts=x86_64 + container_image=debian-nios2-cross + container_cross_cc=nios2-linux-gnu-gcc + ;; ppc-*|ppc64abi32-*) container_hosts=x86_64 container_image=debian-powerpc-test-cross diff --git a/tests/tcg/hexagon/hvx_histogram.c b/tests/tcg/hexagon/hvx_histogram.c new file mode 100644 index 0000000000..43377a9abb --- /dev/null +++ b/tests/tcg/hexagon/hvx_histogram.c @@ -0,0 +1,88 @@ +/* + * Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include "hvx_histogram_row.h" + +const int vector_len = 128; +const int width = 275; +const int height = 20; +const int stride = (width + vector_len - 1) & -vector_len; + +int err; + +static uint8_t input[height][stride] __attribute__((aligned(128))) = { +#include "hvx_histogram_input.h" +}; + +static int result[256] __attribute__((aligned(128))); +static int expect[256] __attribute__((aligned(128))); + +static void check(void) +{ + for (int i = 0; i < 256; i++) { + int res = result[i]; + int exp = expect[i]; + if (res != exp) { + printf("ERROR at %3d: 0x%04x != 0x%04x\n", + i, res, exp); + err++; + } + } +} + +static void ref_histogram(uint8_t *src, int stride, int width, int height, + int *hist) +{ + for (int i = 0; i < 256; i++) { + hist[i] = 0; + } + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + hist[src[i * stride + j]]++; + } + } +} + +static void hvx_histogram(uint8_t *src, int stride, int width, int height, + int *hist) +{ + int n = 8192 / width; + + for (int i = 0; i < 256; i++) { + hist[i] = 0; + } + + for (int i = 0; i < height; i += n) { + int k = height - i > n ? n : height - i; + hvx_histogram_row(src, stride, width, k, hist); + src += n * stride; + } +} + +int main() +{ + ref_histogram(&input[0][0], stride, width, height, expect); + hvx_histogram(&input[0][0], stride, width, height, result); + check(); + + puts(err ? "FAIL" : "PASS"); + return err ? 1 : 0; +} diff --git a/tests/tcg/hexagon/hvx_histogram_input.h b/tests/tcg/hexagon/hvx_histogram_input.h new file mode 100644 index 0000000000..2f9109255e --- /dev/null +++ b/tests/tcg/hexagon/hvx_histogram_input.h @@ -0,0 +1,717 @@ +/* + * Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + + { 0x26, 0x32, 0x2e, 0x2e, 0x2d, 0x2c, 0x2d, 0x2d, + 0x2c, 0x2e, 0x31, 0x33, 0x36, 0x39, 0x3b, 0x3f, + 0x42, 0x46, 0x4a, 0x4c, 0x51, 0x53, 0x53, 0x54, + 0x56, 0x57, 0x58, 0x57, 0x56, 0x52, 0x51, 0x4f, + 0x4c, 0x49, 0x47, 0x42, 0x3e, 0x3b, 0x38, 0x35, + 0x33, 0x30, 0x2e, 0x2c, 0x2b, 0x2a, 0x2a, 0x28, + 0x28, 0x27, 0x27, 0x28, 0x29, 0x2a, 0x2c, 0x2e, + 0x2f, 0x33, 0x36, 0x38, 0x3c, 0x3d, 0x40, 0x42, + 0x43, 0x42, 0x43, 0x44, 0x43, 0x41, 0x40, 0x3b, + 0x3b, 0x3a, 0x38, 0x35, 0x32, 0x2f, 0x2c, 0x29, + 0x27, 0x26, 0x23, 0x21, 0x1e, 0x1c, 0x1a, 0x19, + 0x17, 0x15, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, + 0x0f, 0x0e, 0x0f, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, + 0x0c, 0x0d, 0x0e, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, + 0x0c, 0x0c, 0x0d, 0x0c, 0x0f, 0x0e, 0x0f, 0x0f, + 0x0f, 0x10, 0x11, 0x12, 0x14, 0x16, 0x17, 0x19, + 0x1c, 0x1d, 0x21, 0x25, 0x27, 0x29, 0x2b, 0x2f, + 0x31, 0x33, 0x36, 0x38, 0x39, 0x3a, 0x3b, 0x3c, + 0x3c, 0x3d, 0x3e, 0x3e, 0x3c, 0x3b, 0x3a, 0x39, + 0x39, 0x3a, 0x3a, 0x3a, 0x3a, 0x3c, 0x3e, 0x43, + 0x47, 0x4a, 0x4d, 0x51, 0x51, 0x54, 0x56, 0x56, + 0x57, 0x56, 0x53, 0x4f, 0x4b, 0x47, 0x43, 0x41, + 0x3e, 0x3c, 0x3a, 0x37, 0x36, 0x33, 0x32, 0x34, + 0x34, 0x34, 0x34, 0x35, 0x36, 0x39, 0x3d, 0x3d, + 0x3f, 0x40, 0x40, 0x40, 0x40, 0x3e, 0x40, 0x40, + 0x42, 0x44, 0x47, 0x48, 0x4b, 0x4e, 0x56, 0x5c, + 0x62, 0x68, 0x6f, 0x73, 0x76, 0x79, 0x7a, 0x7c, + 0x7e, 0x7c, 0x78, 0x72, 0x6e, 0x69, 0x65, 0x60, + 0x5b, 0x56, 0x52, 0x4d, 0x4a, 0x48, 0x47, 0x46, + 0x44, 0x43, 0x42, 0x41, 0x41, 0x41, 0x40, 0x40, + 0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3b, 0x38, 0x37, + 0x36, 0x35, 0x36, 0x35, 0x36, 0x37, 0x38, 0x3c, + 0x3d, 0x3f, 0x42, 0x44, 0x46, 0x48, 0x4b, 0x4c, + 0x4e, 0x4e, 0x4d, 0x4c, 0x4a, 0x48, 0x49, 0x49, + 0x4b, 0x4d, 0x4e, }, + { 0x23, 0x2d, 0x29, 0x29, 0x28, 0x28, 0x29, 0x29, + 0x28, 0x2b, 0x2d, 0x2f, 0x32, 0x34, 0x36, 0x3a, + 0x3d, 0x41, 0x44, 0x47, 0x4a, 0x4c, 0x4e, 0x4e, + 0x50, 0x51, 0x51, 0x51, 0x4f, 0x4c, 0x4b, 0x48, + 0x46, 0x44, 0x40, 0x3d, 0x39, 0x36, 0x34, 0x30, + 0x2f, 0x2d, 0x2a, 0x29, 0x28, 0x27, 0x26, 0x25, + 0x25, 0x24, 0x24, 0x24, 0x26, 0x28, 0x28, 0x2a, + 0x2b, 0x2e, 0x32, 0x34, 0x37, 0x39, 0x3b, 0x3c, + 0x3d, 0x3d, 0x3e, 0x3e, 0x3e, 0x3c, 0x3b, 0x38, + 0x37, 0x35, 0x33, 0x30, 0x2e, 0x2b, 0x27, 0x25, + 0x24, 0x21, 0x20, 0x1d, 0x1b, 0x1a, 0x18, 0x16, + 0x15, 0x14, 0x13, 0x12, 0x10, 0x11, 0x10, 0x0e, + 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0c, 0x0c, 0x0b, + 0x0b, 0x0b, 0x0c, 0x0b, 0x0b, 0x09, 0x0a, 0x0b, + 0x0b, 0x0a, 0x0a, 0x0c, 0x0c, 0x0c, 0x0d, 0x0e, + 0x0e, 0x0f, 0x0f, 0x11, 0x12, 0x15, 0x15, 0x17, + 0x1a, 0x1c, 0x1f, 0x22, 0x25, 0x26, 0x29, 0x2a, + 0x2d, 0x30, 0x33, 0x34, 0x35, 0x35, 0x37, 0x37, + 0x39, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x36, 0x37, + 0x35, 0x36, 0x35, 0x35, 0x36, 0x37, 0x3a, 0x3e, + 0x40, 0x43, 0x48, 0x49, 0x4b, 0x4c, 0x4d, 0x4e, + 0x4f, 0x4f, 0x4c, 0x48, 0x45, 0x41, 0x3e, 0x3b, + 0x3a, 0x37, 0x36, 0x33, 0x32, 0x31, 0x30, 0x31, + 0x32, 0x31, 0x31, 0x31, 0x31, 0x34, 0x37, 0x38, + 0x3a, 0x3b, 0x3b, 0x3b, 0x3c, 0x3b, 0x3d, 0x3e, + 0x3f, 0x40, 0x43, 0x44, 0x47, 0x4b, 0x4f, 0x56, + 0x5a, 0x60, 0x66, 0x69, 0x6a, 0x6e, 0x71, 0x72, + 0x73, 0x72, 0x6d, 0x69, 0x66, 0x60, 0x5c, 0x59, + 0x54, 0x50, 0x4d, 0x48, 0x46, 0x44, 0x44, 0x43, + 0x42, 0x41, 0x41, 0x40, 0x3f, 0x3f, 0x3e, 0x3d, + 0x3d, 0x3d, 0x3c, 0x3a, 0x39, 0x38, 0x35, 0x35, + 0x34, 0x34, 0x35, 0x34, 0x35, 0x36, 0x39, 0x3c, + 0x3d, 0x3e, 0x41, 0x43, 0x44, 0x46, 0x48, 0x49, + 0x4a, 0x49, 0x48, 0x47, 0x45, 0x43, 0x43, 0x44, + 0x45, 0x47, 0x48, }, + { 0x23, 0x2d, 0x2a, 0x2a, 0x29, 0x29, 0x2a, 0x2a, + 0x29, 0x2c, 0x2d, 0x2f, 0x32, 0x34, 0x36, 0x3a, + 0x3d, 0x40, 0x44, 0x48, 0x4a, 0x4c, 0x4e, 0x4e, + 0x50, 0x51, 0x51, 0x51, 0x4f, 0x4c, 0x4b, 0x48, + 0x46, 0x44, 0x40, 0x3d, 0x39, 0x36, 0x34, 0x30, + 0x2f, 0x2d, 0x2a, 0x29, 0x28, 0x27, 0x26, 0x25, + 0x25, 0x24, 0x24, 0x25, 0x26, 0x28, 0x29, 0x2a, + 0x2b, 0x2e, 0x31, 0x34, 0x37, 0x39, 0x3b, 0x3c, + 0x3d, 0x3e, 0x3e, 0x3d, 0x3e, 0x3c, 0x3c, 0x3a, + 0x37, 0x35, 0x33, 0x30, 0x2f, 0x2b, 0x28, 0x26, + 0x24, 0x21, 0x20, 0x1e, 0x1c, 0x1b, 0x18, 0x17, + 0x16, 0x14, 0x13, 0x12, 0x10, 0x10, 0x0f, 0x0e, + 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0c, + 0x0b, 0x0b, 0x0c, 0x0c, 0x0c, 0x0b, 0x0b, 0x0c, + 0x0c, 0x0b, 0x0b, 0x0c, 0x0d, 0x0c, 0x0e, 0x0e, + 0x0e, 0x0f, 0x11, 0x11, 0x13, 0x14, 0x16, 0x18, + 0x1a, 0x1d, 0x1f, 0x22, 0x25, 0x26, 0x29, 0x2b, + 0x2d, 0x31, 0x33, 0x34, 0x36, 0x37, 0x38, 0x38, + 0x39, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x37, 0x37, + 0x35, 0x36, 0x35, 0x36, 0x35, 0x38, 0x3a, 0x3e, + 0x40, 0x41, 0x45, 0x47, 0x49, 0x4a, 0x4c, 0x4d, + 0x4e, 0x4d, 0x4a, 0x47, 0x44, 0x40, 0x3d, 0x3b, + 0x39, 0x37, 0x34, 0x34, 0x32, 0x31, 0x31, 0x33, + 0x32, 0x31, 0x32, 0x33, 0x32, 0x36, 0x38, 0x39, + 0x3b, 0x3c, 0x3c, 0x3c, 0x3d, 0x3d, 0x3e, 0x3e, + 0x41, 0x42, 0x43, 0x45, 0x48, 0x4c, 0x50, 0x56, + 0x5b, 0x5f, 0x62, 0x67, 0x69, 0x6c, 0x6e, 0x6e, + 0x70, 0x6f, 0x6b, 0x67, 0x63, 0x5e, 0x5b, 0x58, + 0x54, 0x51, 0x4e, 0x4a, 0x48, 0x46, 0x46, 0x46, + 0x45, 0x46, 0x44, 0x43, 0x44, 0x43, 0x42, 0x42, + 0x41, 0x40, 0x3f, 0x3e, 0x3c, 0x3b, 0x3a, 0x39, + 0x39, 0x39, 0x38, 0x37, 0x37, 0x3a, 0x3e, 0x40, + 0x42, 0x43, 0x47, 0x47, 0x48, 0x4a, 0x4b, 0x4c, + 0x4c, 0x4b, 0x4a, 0x48, 0x46, 0x44, 0x43, 0x45, + 0x45, 0x46, 0x47, }, + { 0x21, 0x2b, 0x28, 0x28, 0x28, 0x28, 0x29, 0x29, + 0x28, 0x2a, 0x2d, 0x30, 0x32, 0x34, 0x37, 0x3a, + 0x3c, 0x40, 0x44, 0x48, 0x4a, 0x4c, 0x4e, 0x4e, + 0x50, 0x51, 0x52, 0x51, 0x4f, 0x4b, 0x4b, 0x48, + 0x45, 0x43, 0x3f, 0x3c, 0x39, 0x36, 0x33, 0x30, + 0x2f, 0x2d, 0x2b, 0x2a, 0x28, 0x27, 0x26, 0x25, + 0x24, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2c, 0x2d, 0x31, 0x34, 0x37, 0x39, 0x3b, 0x3c, + 0x3d, 0x3e, 0x3e, 0x3e, 0x3e, 0x3d, 0x3c, 0x3a, + 0x37, 0x35, 0x33, 0x30, 0x2f, 0x2b, 0x28, 0x26, + 0x25, 0x21, 0x20, 0x1e, 0x1c, 0x19, 0x19, 0x18, + 0x17, 0x15, 0x15, 0x12, 0x11, 0x11, 0x11, 0x0f, + 0x0e, 0x0e, 0x0e, 0x0e, 0x0d, 0x0d, 0x0d, 0x0c, + 0x0c, 0x0c, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x0c, 0x0c, 0x0c, 0x0c, 0x0e, 0x0e, 0x0f, 0x0f, + 0x0f, 0x10, 0x11, 0x13, 0x13, 0x15, 0x16, 0x18, + 0x1a, 0x1c, 0x1f, 0x22, 0x25, 0x28, 0x29, 0x2d, + 0x2f, 0x32, 0x34, 0x35, 0x36, 0x37, 0x38, 0x38, + 0x39, 0x3a, 0x39, 0x39, 0x37, 0x36, 0x37, 0x36, + 0x35, 0x35, 0x37, 0x35, 0x36, 0x37, 0x3a, 0x3d, + 0x3e, 0x41, 0x43, 0x46, 0x46, 0x47, 0x48, 0x49, + 0x4a, 0x49, 0x47, 0x45, 0x42, 0x3f, 0x3d, 0x3b, + 0x3a, 0x38, 0x36, 0x34, 0x32, 0x32, 0x32, 0x32, + 0x32, 0x31, 0x33, 0x32, 0x34, 0x37, 0x38, 0x38, + 0x3a, 0x3b, 0x3d, 0x3d, 0x3d, 0x3e, 0x3f, 0x41, + 0x42, 0x44, 0x44, 0x46, 0x49, 0x4d, 0x50, 0x54, + 0x58, 0x5c, 0x61, 0x63, 0x65, 0x69, 0x6a, 0x6c, + 0x6d, 0x6c, 0x68, 0x64, 0x61, 0x5c, 0x59, 0x57, + 0x53, 0x51, 0x4f, 0x4c, 0x4a, 0x48, 0x48, 0x49, + 0x49, 0x48, 0x48, 0x48, 0x47, 0x47, 0x46, 0x46, + 0x45, 0x44, 0x42, 0x41, 0x3f, 0x3e, 0x3c, 0x3c, + 0x3c, 0x3d, 0x3c, 0x3c, 0x3c, 0x3e, 0x41, 0x43, + 0x46, 0x48, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4e, + 0x4e, 0x4d, 0x4b, 0x49, 0x47, 0x44, 0x44, 0x45, + 0x45, 0x45, 0x46, }, + { 0x22, 0x2b, 0x27, 0x27, 0x27, 0x27, 0x28, 0x28, + 0x28, 0x2a, 0x2c, 0x2f, 0x30, 0x34, 0x37, 0x3b, + 0x3d, 0x41, 0x45, 0x48, 0x4a, 0x4c, 0x4e, 0x4e, + 0x50, 0x51, 0x52, 0x51, 0x4f, 0x4b, 0x4b, 0x47, + 0x45, 0x43, 0x3f, 0x3c, 0x39, 0x36, 0x33, 0x30, + 0x2f, 0x2d, 0x2b, 0x2a, 0x27, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2c, 0x2e, 0x31, 0x34, 0x37, 0x39, 0x3a, 0x3b, + 0x3d, 0x3e, 0x3e, 0x3f, 0x3f, 0x3d, 0x3c, 0x3a, + 0x38, 0x36, 0x34, 0x31, 0x2e, 0x2c, 0x29, 0x26, + 0x25, 0x22, 0x20, 0x1e, 0x1c, 0x1a, 0x19, 0x18, + 0x16, 0x15, 0x14, 0x12, 0x10, 0x11, 0x11, 0x0f, + 0x0e, 0x0e, 0x0e, 0x0e, 0x0d, 0x0c, 0x0d, 0x0c, + 0x0c, 0x0c, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x0c, 0x0c, 0x0c, 0x0d, 0x0d, 0x0e, 0x0f, 0x0f, + 0x0f, 0x10, 0x11, 0x13, 0x13, 0x15, 0x15, 0x18, + 0x19, 0x1d, 0x1f, 0x21, 0x24, 0x27, 0x2a, 0x2c, + 0x30, 0x33, 0x35, 0x36, 0x37, 0x38, 0x39, 0x39, + 0x3a, 0x3a, 0x39, 0x39, 0x37, 0x36, 0x37, 0x36, + 0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x39, 0x3a, + 0x3d, 0x3e, 0x41, 0x43, 0x43, 0x45, 0x46, 0x46, + 0x47, 0x46, 0x44, 0x42, 0x40, 0x3d, 0x3a, 0x39, + 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x32, 0x32, + 0x32, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, + 0x39, 0x3c, 0x3c, 0x3e, 0x3e, 0x3e, 0x41, 0x43, + 0x44, 0x45, 0x46, 0x48, 0x49, 0x4c, 0x51, 0x54, + 0x56, 0x5a, 0x5f, 0x61, 0x63, 0x65, 0x67, 0x69, + 0x6a, 0x69, 0x67, 0x61, 0x5f, 0x5b, 0x58, 0x56, + 0x54, 0x51, 0x50, 0x4e, 0x4c, 0x4a, 0x4b, 0x4c, + 0x4c, 0x4b, 0x4b, 0x4b, 0x4b, 0x49, 0x4a, 0x49, + 0x49, 0x48, 0x46, 0x44, 0x42, 0x41, 0x40, 0x3f, + 0x3f, 0x40, 0x40, 0x40, 0x40, 0x42, 0x46, 0x49, + 0x4b, 0x4c, 0x4f, 0x4f, 0x50, 0x52, 0x51, 0x51, + 0x50, 0x4f, 0x4c, 0x4a, 0x48, 0x46, 0x45, 0x44, + 0x44, 0x45, 0x46, }, + { 0x21, 0x2a, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, + 0x27, 0x29, 0x2d, 0x2f, 0x31, 0x34, 0x37, 0x3b, + 0x3e, 0x41, 0x45, 0x48, 0x4a, 0x4c, 0x4e, 0x4e, + 0x50, 0x51, 0x52, 0x51, 0x4f, 0x4b, 0x4b, 0x48, + 0x45, 0x43, 0x3f, 0x3c, 0x39, 0x36, 0x33, 0x2f, + 0x2f, 0x2d, 0x2a, 0x2a, 0x27, 0x26, 0x25, 0x24, + 0x22, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2c, 0x2f, 0x31, 0x34, 0x37, 0x39, 0x3a, 0x3c, + 0x3d, 0x3e, 0x3f, 0x40, 0x3f, 0x3d, 0x3d, 0x3a, + 0x38, 0x36, 0x34, 0x31, 0x2e, 0x2c, 0x29, 0x26, + 0x25, 0x22, 0x21, 0x1f, 0x1d, 0x1b, 0x19, 0x18, + 0x16, 0x14, 0x14, 0x13, 0x11, 0x11, 0x11, 0x0f, + 0x0f, 0x0f, 0x0e, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0c, 0x0b, 0x0b, 0x0b, 0x0b, 0x0c, + 0x0c, 0x0d, 0x0d, 0x0d, 0x0e, 0x0e, 0x0f, 0x0f, + 0x0f, 0x10, 0x13, 0x13, 0x14, 0x15, 0x17, 0x19, + 0x1a, 0x1d, 0x1f, 0x22, 0x25, 0x27, 0x2a, 0x2e, + 0x31, 0x33, 0x35, 0x38, 0x39, 0x3a, 0x3b, 0x3b, + 0x3c, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x38, 0x37, + 0x36, 0x36, 0x37, 0x36, 0x37, 0x38, 0x38, 0x3a, + 0x3b, 0x3e, 0x40, 0x40, 0x41, 0x42, 0x43, 0x42, + 0x43, 0x42, 0x40, 0x40, 0x3f, 0x3c, 0x3b, 0x39, + 0x38, 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x33, + 0x32, 0x32, 0x34, 0x35, 0x35, 0x36, 0x39, 0x39, + 0x3a, 0x3c, 0x3c, 0x3f, 0x40, 0x41, 0x43, 0x45, + 0x45, 0x47, 0x48, 0x4a, 0x4b, 0x4d, 0x50, 0x53, + 0x56, 0x59, 0x5c, 0x5f, 0x60, 0x65, 0x64, 0x66, + 0x68, 0x66, 0x64, 0x61, 0x5e, 0x5a, 0x59, 0x56, + 0x54, 0x52, 0x51, 0x50, 0x4e, 0x4c, 0x4d, 0x4f, + 0x4f, 0x4f, 0x50, 0x50, 0x4f, 0x4f, 0x4e, 0x4d, + 0x4c, 0x4b, 0x49, 0x47, 0x45, 0x44, 0x43, 0x43, + 0x42, 0x43, 0x44, 0x44, 0x46, 0x47, 0x49, 0x4d, + 0x4f, 0x51, 0x53, 0x54, 0x53, 0x54, 0x54, 0x53, + 0x53, 0x51, 0x4e, 0x4b, 0x4a, 0x47, 0x45, 0x44, + 0x44, 0x45, 0x46, }, + { 0x20, 0x28, 0x26, 0x26, 0x25, 0x24, 0x27, 0x27, + 0x27, 0x29, 0x2c, 0x2e, 0x31, 0x34, 0x37, 0x3b, + 0x3e, 0x41, 0x45, 0x48, 0x4a, 0x4c, 0x4e, 0x4e, + 0x50, 0x51, 0x52, 0x51, 0x4f, 0x4b, 0x4a, 0x49, + 0x45, 0x43, 0x3f, 0x3c, 0x3a, 0x36, 0x33, 0x30, + 0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2c, 0x2e, 0x31, 0x34, 0x37, 0x39, 0x3b, 0x3c, + 0x3d, 0x3e, 0x3f, 0x40, 0x3e, 0x3d, 0x3d, 0x3a, + 0x38, 0x36, 0x34, 0x31, 0x2f, 0x2c, 0x29, 0x27, + 0x25, 0x21, 0x21, 0x1f, 0x1c, 0x1d, 0x19, 0x18, + 0x16, 0x15, 0x15, 0x13, 0x12, 0x11, 0x11, 0x0f, + 0x0f, 0x0e, 0x0f, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, + 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, + 0x0d, 0x0d, 0x0d, 0x0e, 0x0e, 0x0e, 0x0f, 0x10, + 0x10, 0x10, 0x12, 0x13, 0x15, 0x16, 0x18, 0x1a, + 0x1c, 0x1d, 0x20, 0x22, 0x25, 0x27, 0x2a, 0x2e, + 0x30, 0x34, 0x38, 0x39, 0x3a, 0x3b, 0x3b, 0x3b, + 0x3c, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x37, + 0x36, 0x36, 0x38, 0x37, 0x37, 0x37, 0x38, 0x3a, + 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x40, 0x40, + 0x42, 0x40, 0x3f, 0x3e, 0x3d, 0x3b, 0x3a, 0x39, + 0x37, 0x36, 0x36, 0x35, 0x34, 0x34, 0x33, 0x33, + 0x33, 0x34, 0x35, 0x35, 0x35, 0x36, 0x38, 0x39, + 0x3a, 0x3b, 0x3d, 0x3f, 0x42, 0x43, 0x45, 0x45, + 0x46, 0x48, 0x49, 0x4b, 0x4b, 0x4d, 0x50, 0x53, + 0x56, 0x57, 0x5a, 0x5c, 0x5e, 0x61, 0x63, 0x65, + 0x66, 0x64, 0x62, 0x5f, 0x5c, 0x59, 0x58, 0x56, + 0x55, 0x54, 0x52, 0x51, 0x50, 0x51, 0x51, 0x52, + 0x52, 0x52, 0x52, 0x52, 0x51, 0x51, 0x51, 0x50, + 0x4f, 0x4e, 0x4c, 0x4a, 0x47, 0x46, 0x45, 0x45, + 0x45, 0x46, 0x46, 0x46, 0x4a, 0x4c, 0x4d, 0x52, + 0x54, 0x56, 0x58, 0x58, 0x56, 0x57, 0x57, 0x56, + 0x55, 0x53, 0x50, 0x4d, 0x49, 0x45, 0x44, 0x44, + 0x43, 0x44, 0x45, }, + { 0x1f, 0x27, 0x24, 0x23, 0x25, 0x24, 0x25, 0x26, + 0x26, 0x28, 0x2b, 0x2e, 0x31, 0x34, 0x37, 0x3a, + 0x3d, 0x41, 0x45, 0x48, 0x4b, 0x4d, 0x4f, 0x4e, + 0x50, 0x51, 0x52, 0x50, 0x4f, 0x4b, 0x4a, 0x49, + 0x45, 0x43, 0x3f, 0x3c, 0x3a, 0x36, 0x33, 0x30, + 0x2f, 0x2d, 0x29, 0x28, 0x27, 0x26, 0x25, 0x24, + 0x23, 0x25, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2c, 0x2f, 0x32, 0x34, 0x37, 0x39, 0x3b, 0x3c, + 0x3e, 0x3f, 0x3f, 0x40, 0x3e, 0x3d, 0x3c, 0x3a, + 0x38, 0x36, 0x34, 0x31, 0x30, 0x2c, 0x29, 0x28, + 0x25, 0x23, 0x22, 0x1f, 0x1c, 0x1c, 0x18, 0x18, + 0x16, 0x14, 0x14, 0x13, 0x11, 0x11, 0x11, 0x0f, + 0x0f, 0x0e, 0x0f, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, + 0x0c, 0x0c, 0x0b, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, + 0x0d, 0x0e, 0x0e, 0x0f, 0x0d, 0x0f, 0x10, 0x10, + 0x10, 0x11, 0x13, 0x14, 0x15, 0x16, 0x19, 0x1a, + 0x1c, 0x1f, 0x20, 0x23, 0x26, 0x28, 0x2a, 0x2e, + 0x31, 0x35, 0x38, 0x39, 0x3a, 0x3c, 0x3d, 0x3d, + 0x3e, 0x3e, 0x3d, 0x3c, 0x3a, 0x3a, 0x39, 0x39, + 0x38, 0x37, 0x38, 0x38, 0x37, 0x38, 0x39, 0x3a, + 0x3c, 0x3c, 0x3d, 0x3e, 0x3f, 0x3f, 0x40, 0x3f, + 0x41, 0x40, 0x3e, 0x3e, 0x3d, 0x3b, 0x3b, 0x39, + 0x37, 0x37, 0x35, 0x36, 0x34, 0x34, 0x34, 0x35, + 0x35, 0x34, 0x34, 0x35, 0x35, 0x37, 0x38, 0x39, + 0x3a, 0x3c, 0x3f, 0x3f, 0x43, 0x43, 0x45, 0x47, + 0x48, 0x48, 0x4a, 0x4b, 0x4e, 0x4d, 0x51, 0x53, + 0x56, 0x58, 0x59, 0x5b, 0x5d, 0x60, 0x62, 0x63, + 0x64, 0x63, 0x61, 0x5e, 0x5c, 0x5a, 0x57, 0x56, + 0x55, 0x54, 0x53, 0x52, 0x51, 0x51, 0x52, 0x52, + 0x54, 0x54, 0x55, 0x55, 0x55, 0x54, 0x54, 0x53, + 0x52, 0x50, 0x4e, 0x4d, 0x4b, 0x4a, 0x48, 0x48, + 0x48, 0x48, 0x4a, 0x4b, 0x4d, 0x4f, 0x52, 0x55, + 0x58, 0x5a, 0x5b, 0x5b, 0x5b, 0x5b, 0x5a, 0x59, + 0x58, 0x55, 0x51, 0x4e, 0x4a, 0x46, 0x45, 0x44, + 0x44, 0x44, 0x44, }, + { 0x1e, 0x26, 0x23, 0x23, 0x25, 0x24, 0x25, 0x26, + 0x26, 0x28, 0x2b, 0x2e, 0x31, 0x34, 0x37, 0x3a, + 0x3e, 0x42, 0x45, 0x48, 0x4b, 0x4d, 0x4f, 0x4f, + 0x50, 0x51, 0x52, 0x50, 0x4f, 0x4b, 0x4a, 0x48, + 0x46, 0x44, 0x3f, 0x3b, 0x39, 0x36, 0x33, 0x30, + 0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2c, 0x2f, 0x32, 0x34, 0x37, 0x39, 0x3b, 0x3d, + 0x3e, 0x3f, 0x41, 0x41, 0x40, 0x3e, 0x3d, 0x3b, + 0x38, 0x37, 0x34, 0x32, 0x30, 0x2c, 0x2a, 0x27, + 0x26, 0x23, 0x22, 0x20, 0x1d, 0x1b, 0x1a, 0x19, + 0x17, 0x15, 0x15, 0x13, 0x12, 0x12, 0x11, 0x0f, + 0x11, 0x0f, 0x0e, 0x0e, 0x0d, 0x0d, 0x0d, 0x0c, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0e, 0x0e, 0x0e, 0x0f, 0x10, 0x10, 0x11, 0x11, + 0x11, 0x13, 0x16, 0x15, 0x15, 0x18, 0x1a, 0x1b, + 0x1d, 0x20, 0x22, 0x24, 0x27, 0x29, 0x2c, 0x30, + 0x33, 0x37, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3e, + 0x40, 0x40, 0x40, 0x3f, 0x3e, 0x3d, 0x3c, 0x3a, + 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3b, 0x3d, + 0x3d, 0x3f, 0x40, 0x40, 0x3f, 0x41, 0x41, 0x41, + 0x41, 0x41, 0x40, 0x40, 0x3f, 0x3e, 0x3c, 0x3b, + 0x3a, 0x39, 0x37, 0x36, 0x36, 0x35, 0x35, 0x36, + 0x36, 0x35, 0x35, 0x36, 0x36, 0x38, 0x39, 0x39, + 0x3b, 0x3c, 0x3e, 0x40, 0x41, 0x43, 0x45, 0x47, + 0x48, 0x48, 0x4b, 0x4c, 0x4d, 0x4f, 0x51, 0x53, + 0x56, 0x56, 0x59, 0x5b, 0x5d, 0x5f, 0x61, 0x62, + 0x63, 0x63, 0x61, 0x5e, 0x5c, 0x5a, 0x59, 0x57, + 0x56, 0x54, 0x54, 0x53, 0x52, 0x53, 0x53, 0x55, + 0x56, 0x56, 0x57, 0x57, 0x57, 0x57, 0x56, 0x56, + 0x55, 0x53, 0x51, 0x4f, 0x4d, 0x4b, 0x49, 0x4b, + 0x4b, 0x4c, 0x4d, 0x4e, 0x51, 0x53, 0x55, 0x58, + 0x5b, 0x5c, 0x60, 0x60, 0x5f, 0x5e, 0x5d, 0x5c, + 0x5a, 0x57, 0x53, 0x4f, 0x4b, 0x46, 0x45, 0x44, + 0x44, 0x44, 0x44, }, + { 0x1d, 0x25, 0x22, 0x22, 0x23, 0x23, 0x24, 0x25, + 0x25, 0x28, 0x2b, 0x2e, 0x31, 0x34, 0x37, 0x3a, + 0x3e, 0x42, 0x45, 0x48, 0x4b, 0x4d, 0x4f, 0x4f, + 0x50, 0x51, 0x52, 0x50, 0x4f, 0x4b, 0x4a, 0x47, + 0x45, 0x43, 0x3f, 0x3c, 0x38, 0x35, 0x33, 0x30, + 0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2b, 0x2f, 0x32, 0x34, 0x37, 0x39, 0x3c, 0x3d, + 0x3e, 0x3f, 0x40, 0x41, 0x40, 0x3e, 0x3d, 0x3b, + 0x39, 0x36, 0x34, 0x32, 0x30, 0x2d, 0x2a, 0x26, + 0x26, 0x24, 0x22, 0x1f, 0x1d, 0x1c, 0x1a, 0x19, + 0x18, 0x16, 0x15, 0x14, 0x12, 0x12, 0x12, 0x10, + 0x10, 0x0f, 0x0e, 0x10, 0x0e, 0x0e, 0x0d, 0x0c, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0e, 0x0d, 0x0e, + 0x0f, 0x0f, 0x0f, 0x10, 0x11, 0x11, 0x11, 0x12, + 0x13, 0x14, 0x16, 0x16, 0x18, 0x1a, 0x1b, 0x1c, + 0x1e, 0x21, 0x23, 0x25, 0x28, 0x2a, 0x2e, 0x32, + 0x34, 0x38, 0x3a, 0x3c, 0x3d, 0x3f, 0x40, 0x42, + 0x43, 0x43, 0x43, 0x42, 0x40, 0x3e, 0x3e, 0x3c, + 0x3b, 0x3b, 0x3c, 0x3a, 0x3b, 0x3b, 0x3e, 0x3e, + 0x40, 0x3f, 0x41, 0x41, 0x41, 0x42, 0x42, 0x43, + 0x42, 0x41, 0x41, 0x41, 0x40, 0x3e, 0x3d, 0x3c, + 0x3b, 0x3a, 0x39, 0x37, 0x36, 0x35, 0x36, 0x37, + 0x35, 0x36, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, + 0x3b, 0x3d, 0x3e, 0x40, 0x41, 0x41, 0x44, 0x46, + 0x48, 0x48, 0x4a, 0x4c, 0x4d, 0x4f, 0x51, 0x53, + 0x55, 0x57, 0x59, 0x5a, 0x5b, 0x5e, 0x5f, 0x61, + 0x62, 0x61, 0x60, 0x5e, 0x5c, 0x5a, 0x59, 0x58, + 0x56, 0x55, 0x54, 0x53, 0x53, 0x54, 0x54, 0x55, + 0x57, 0x57, 0x58, 0x59, 0x5a, 0x58, 0x59, 0x58, + 0x57, 0x55, 0x53, 0x52, 0x4f, 0x4e, 0x4d, 0x4d, + 0x4d, 0x4f, 0x51, 0x50, 0x54, 0x56, 0x59, 0x5c, + 0x5f, 0x61, 0x64, 0x64, 0x63, 0x61, 0x5e, 0x5e, + 0x5c, 0x59, 0x54, 0x50, 0x4c, 0x46, 0x45, 0x44, + 0x44, 0x44, 0x44, }, + { 0x1c, 0x24, 0x21, 0x21, 0x21, 0x22, 0x23, 0x23, + 0x25, 0x27, 0x2a, 0x2e, 0x31, 0x33, 0x37, 0x3b, + 0x3e, 0x42, 0x45, 0x48, 0x4b, 0x4c, 0x50, 0x4f, + 0x50, 0x51, 0x52, 0x50, 0x4e, 0x4b, 0x4a, 0x49, + 0x45, 0x42, 0x3f, 0x3c, 0x38, 0x35, 0x33, 0x30, + 0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2b, 0x2f, 0x32, 0x34, 0x38, 0x39, 0x3c, 0x3d, + 0x3e, 0x3e, 0x40, 0x41, 0x40, 0x3e, 0x3c, 0x3a, + 0x39, 0x37, 0x35, 0x33, 0x30, 0x2d, 0x2b, 0x28, + 0x26, 0x23, 0x23, 0x20, 0x1e, 0x1b, 0x19, 0x19, + 0x17, 0x16, 0x15, 0x14, 0x12, 0x12, 0x11, 0x10, + 0x0f, 0x0e, 0x0e, 0x10, 0x0e, 0x0d, 0x0c, 0x0c, + 0x0c, 0x0d, 0x0d, 0x0d, 0x0d, 0x0e, 0x0d, 0x0e, + 0x0f, 0x0f, 0x0f, 0x10, 0x11, 0x11, 0x12, 0x14, + 0x14, 0x14, 0x16, 0x18, 0x19, 0x1b, 0x1c, 0x1e, + 0x20, 0x23, 0x26, 0x27, 0x29, 0x2c, 0x2f, 0x33, + 0x36, 0x38, 0x3b, 0x3e, 0x3e, 0x42, 0x43, 0x46, + 0x46, 0x46, 0x46, 0x44, 0x42, 0x41, 0x3f, 0x3e, + 0x3d, 0x3d, 0x3e, 0x3d, 0x3d, 0x3e, 0x3e, 0x40, + 0x40, 0x40, 0x43, 0x43, 0x42, 0x43, 0x45, 0x43, + 0x43, 0x43, 0x42, 0x42, 0x41, 0x40, 0x40, 0x3e, + 0x3c, 0x3a, 0x3a, 0x38, 0x36, 0x36, 0x36, 0x36, + 0x37, 0x37, 0x36, 0x38, 0x38, 0x39, 0x3b, 0x3b, + 0x3e, 0x3e, 0x3e, 0x40, 0x41, 0x43, 0x45, 0x46, + 0x46, 0x49, 0x4c, 0x4c, 0x4d, 0x4f, 0x51, 0x54, + 0x56, 0x57, 0x58, 0x5a, 0x5c, 0x5e, 0x60, 0x60, + 0x61, 0x61, 0x60, 0x5f, 0x5c, 0x5a, 0x59, 0x58, + 0x57, 0x57, 0x55, 0x54, 0x53, 0x55, 0x55, 0x58, + 0x58, 0x59, 0x5a, 0x5a, 0x5a, 0x5b, 0x5b, 0x5b, + 0x5a, 0x59, 0x56, 0x54, 0x53, 0x4e, 0x4e, 0x50, + 0x50, 0x51, 0x52, 0x52, 0x57, 0x59, 0x5d, 0x60, + 0x63, 0x63, 0x66, 0x66, 0x66, 0x64, 0x63, 0x61, + 0x60, 0x5b, 0x55, 0x51, 0x4d, 0x48, 0x45, 0x44, + 0x43, 0x43, 0x43, }, + { 0x1b, 0x23, 0x20, 0x21, 0x22, 0x22, 0x23, 0x24, + 0x26, 0x27, 0x2a, 0x2e, 0x31, 0x33, 0x37, 0x3b, + 0x3d, 0x42, 0x46, 0x49, 0x4a, 0x4c, 0x4f, 0x4f, + 0x50, 0x50, 0x52, 0x50, 0x4e, 0x4b, 0x4b, 0x49, + 0x45, 0x42, 0x3e, 0x3c, 0x38, 0x35, 0x33, 0x30, + 0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2c, 0x2f, 0x32, 0x35, 0x38, 0x3a, 0x3c, 0x3d, + 0x3e, 0x3e, 0x40, 0x41, 0x40, 0x3f, 0x3d, 0x3b, + 0x3a, 0x38, 0x36, 0x33, 0x30, 0x2d, 0x2b, 0x29, + 0x27, 0x24, 0x24, 0x21, 0x1e, 0x1c, 0x1b, 0x1a, + 0x18, 0x17, 0x16, 0x15, 0x13, 0x12, 0x10, 0x0f, + 0x10, 0x0f, 0x0e, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0e, 0x0e, 0x0e, 0x0f, 0x0e, 0x0f, + 0x10, 0x11, 0x11, 0x12, 0x13, 0x13, 0x14, 0x15, + 0x15, 0x16, 0x17, 0x1a, 0x1b, 0x1d, 0x1e, 0x20, + 0x21, 0x25, 0x27, 0x29, 0x2b, 0x2d, 0x31, 0x35, + 0x37, 0x39, 0x3c, 0x3f, 0x40, 0x43, 0x46, 0x47, + 0x4a, 0x49, 0x48, 0x46, 0x45, 0x43, 0x42, 0x41, + 0x3f, 0x40, 0x3f, 0x3f, 0x40, 0x3f, 0x41, 0x43, + 0x43, 0x43, 0x44, 0x45, 0x45, 0x45, 0x45, 0x45, + 0x45, 0x45, 0x44, 0x43, 0x43, 0x42, 0x42, 0x40, + 0x3e, 0x3d, 0x3c, 0x39, 0x38, 0x38, 0x38, 0x38, + 0x38, 0x36, 0x38, 0x39, 0x39, 0x3a, 0x3c, 0x3d, + 0x3e, 0x3e, 0x3f, 0x41, 0x42, 0x42, 0x43, 0x45, + 0x46, 0x49, 0x4b, 0x4d, 0x4f, 0x50, 0x53, 0x54, + 0x57, 0x58, 0x5a, 0x5c, 0x5b, 0x5e, 0x60, 0x61, + 0x60, 0x60, 0x5f, 0x5f, 0x5d, 0x5b, 0x5b, 0x59, + 0x58, 0x57, 0x56, 0x55, 0x55, 0x55, 0x57, 0x59, + 0x5b, 0x5b, 0x5d, 0x5c, 0x5c, 0x5e, 0x5e, 0x5e, + 0x5d, 0x5b, 0x59, 0x56, 0x54, 0x51, 0x51, 0x51, + 0x52, 0x55, 0x56, 0x56, 0x5a, 0x5d, 0x5f, 0x63, + 0x66, 0x68, 0x6b, 0x6b, 0x68, 0x67, 0x66, 0x64, + 0x61, 0x5d, 0x57, 0x52, 0x4f, 0x49, 0x46, 0x45, + 0x43, 0x43, 0x43, }, + { 0x1a, 0x22, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, + 0x26, 0x27, 0x2a, 0x2d, 0x31, 0x33, 0x37, 0x3b, + 0x3d, 0x41, 0x46, 0x49, 0x4a, 0x4d, 0x4f, 0x4f, + 0x50, 0x51, 0x52, 0x50, 0x4e, 0x4b, 0x4b, 0x48, + 0x44, 0x42, 0x3e, 0x3c, 0x39, 0x35, 0x33, 0x30, + 0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2d, 0x2f, 0x32, 0x35, 0x39, 0x3a, 0x3c, 0x3d, + 0x3e, 0x3f, 0x40, 0x41, 0x40, 0x3f, 0x3e, 0x3c, + 0x3a, 0x38, 0x36, 0x33, 0x31, 0x2d, 0x2c, 0x29, + 0x27, 0x26, 0x24, 0x21, 0x1f, 0x1d, 0x1c, 0x1a, + 0x19, 0x18, 0x16, 0x15, 0x14, 0x13, 0x12, 0x10, + 0x11, 0x10, 0x0f, 0x0f, 0x0f, 0x0e, 0x0e, 0x0e, + 0x0f, 0x0f, 0x0e, 0x0e, 0x0e, 0x0f, 0x0f, 0x10, + 0x11, 0x12, 0x12, 0x13, 0x15, 0x15, 0x16, 0x16, + 0x17, 0x18, 0x1a, 0x1b, 0x1c, 0x1e, 0x1f, 0x21, + 0x22, 0x25, 0x27, 0x2a, 0x2c, 0x2e, 0x33, 0x36, + 0x39, 0x3a, 0x3d, 0x40, 0x41, 0x45, 0x47, 0x4a, + 0x4c, 0x4d, 0x4c, 0x4a, 0x48, 0x45, 0x44, 0x41, + 0x42, 0x42, 0x42, 0x42, 0x42, 0x43, 0x43, 0x44, + 0x45, 0x47, 0x47, 0x48, 0x47, 0x48, 0x47, 0x47, + 0x48, 0x48, 0x46, 0x46, 0x46, 0x43, 0x43, 0x41, + 0x3f, 0x3e, 0x3b, 0x39, 0x38, 0x37, 0x37, 0x37, + 0x38, 0x38, 0x37, 0x39, 0x39, 0x3a, 0x3c, 0x3e, + 0x3e, 0x3f, 0x3f, 0x3f, 0x42, 0x43, 0x43, 0x45, + 0x47, 0x48, 0x4b, 0x4c, 0x4e, 0x50, 0x51, 0x54, + 0x56, 0x58, 0x5a, 0x5c, 0x5c, 0x5f, 0x5f, 0x5f, + 0x61, 0x60, 0x5f, 0x5f, 0x5e, 0x5b, 0x5c, 0x5b, + 0x59, 0x59, 0x57, 0x56, 0x55, 0x56, 0x57, 0x59, + 0x5a, 0x5b, 0x5c, 0x5c, 0x5d, 0x5e, 0x5e, 0x5d, + 0x5e, 0x5c, 0x5a, 0x57, 0x55, 0x52, 0x51, 0x52, + 0x53, 0x55, 0x57, 0x58, 0x5c, 0x5e, 0x61, 0x65, + 0x69, 0x6b, 0x6c, 0x6b, 0x6a, 0x69, 0x67, 0x64, + 0x61, 0x5d, 0x59, 0x53, 0x4d, 0x48, 0x46, 0x45, + 0x44, 0x44, 0x43, }, + { 0x1a, 0x21, 0x1e, 0x1f, 0x20, 0x21, 0x23, 0x24, + 0x25, 0x28, 0x2a, 0x2e, 0x31, 0x33, 0x37, 0x3b, + 0x3e, 0x41, 0x46, 0x49, 0x4b, 0x4d, 0x4f, 0x4e, + 0x50, 0x51, 0x51, 0x50, 0x4e, 0x4b, 0x4a, 0x48, + 0x44, 0x42, 0x3e, 0x3c, 0x39, 0x35, 0x32, 0x30, + 0x2f, 0x2d, 0x29, 0x27, 0x27, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x26, 0x27, 0x29, 0x2a, + 0x2c, 0x2f, 0x32, 0x35, 0x38, 0x3b, 0x3c, 0x3e, + 0x3f, 0x3f, 0x40, 0x41, 0x40, 0x3f, 0x3e, 0x3c, + 0x3a, 0x39, 0x36, 0x34, 0x31, 0x2d, 0x2c, 0x29, + 0x27, 0x26, 0x24, 0x21, 0x1f, 0x1d, 0x1c, 0x1a, + 0x19, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x10, + 0x11, 0x10, 0x0f, 0x0f, 0x0f, 0x0e, 0x0e, 0x0e, + 0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0f, 0x0f, 0x10, + 0x11, 0x13, 0x14, 0x14, 0x15, 0x16, 0x17, 0x19, + 0x19, 0x1a, 0x1c, 0x1d, 0x1e, 0x20, 0x22, 0x24, + 0x25, 0x27, 0x29, 0x2c, 0x2e, 0x31, 0x35, 0x38, + 0x3a, 0x3d, 0x41, 0x42, 0x45, 0x48, 0x4c, 0x4e, + 0x4f, 0x4f, 0x4f, 0x4d, 0x4b, 0x49, 0x47, 0x47, + 0x46, 0x45, 0x45, 0x45, 0x44, 0x44, 0x46, 0x47, + 0x48, 0x49, 0x4b, 0x4b, 0x4a, 0x4b, 0x4b, 0x4a, + 0x4b, 0x4a, 0x49, 0x49, 0x48, 0x46, 0x46, 0x44, + 0x42, 0x41, 0x3d, 0x3b, 0x3a, 0x38, 0x38, 0x38, + 0x37, 0x37, 0x39, 0x38, 0x3a, 0x3a, 0x3c, 0x3c, + 0x3e, 0x40, 0x40, 0x41, 0x43, 0x43, 0x45, 0x46, + 0x48, 0x49, 0x4b, 0x4e, 0x4f, 0x50, 0x53, 0x55, + 0x57, 0x59, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, + 0x60, 0x60, 0x5f, 0x5f, 0x5e, 0x5c, 0x5b, 0x5a, + 0x59, 0x58, 0x57, 0x57, 0x56, 0x56, 0x57, 0x58, + 0x59, 0x5a, 0x5b, 0x5c, 0x5c, 0x5d, 0x5e, 0x5d, + 0x5c, 0x5b, 0x58, 0x57, 0x54, 0x52, 0x52, 0x53, + 0x54, 0x57, 0x58, 0x58, 0x5b, 0x5e, 0x62, 0x65, + 0x69, 0x6b, 0x6d, 0x6c, 0x6a, 0x69, 0x67, 0x64, + 0x62, 0x5e, 0x59, 0x54, 0x4d, 0x48, 0x47, 0x46, + 0x45, 0x45, 0x44, }, + { 0x1a, 0x21, 0x1e, 0x1f, 0x20, 0x21, 0x23, 0x24, + 0x25, 0x28, 0x2a, 0x2e, 0x31, 0x34, 0x37, 0x3b, + 0x3e, 0x42, 0x47, 0x49, 0x4b, 0x4d, 0x4f, 0x4f, + 0x50, 0x51, 0x51, 0x50, 0x50, 0x4c, 0x4a, 0x47, + 0x44, 0x42, 0x3e, 0x3c, 0x39, 0x35, 0x32, 0x31, + 0x2f, 0x2d, 0x29, 0x27, 0x26, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x25, 0x25, 0x26, 0x27, 0x29, 0x2b, + 0x2c, 0x2f, 0x33, 0x35, 0x38, 0x3a, 0x3c, 0x3e, + 0x40, 0x40, 0x41, 0x42, 0x41, 0x3f, 0x3f, 0x3d, + 0x3b, 0x39, 0x36, 0x33, 0x32, 0x2e, 0x2d, 0x2a, + 0x27, 0x26, 0x25, 0x22, 0x1f, 0x1d, 0x1c, 0x1b, + 0x19, 0x17, 0x17, 0x16, 0x15, 0x14, 0x12, 0x11, + 0x11, 0x11, 0x10, 0x10, 0x0f, 0x0f, 0x0f, 0x0f, + 0x0f, 0x0f, 0x10, 0x11, 0x10, 0x11, 0x11, 0x12, + 0x11, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1b, + 0x1c, 0x1c, 0x1e, 0x20, 0x21, 0x22, 0x23, 0x25, + 0x27, 0x2a, 0x2c, 0x2f, 0x31, 0x35, 0x38, 0x3b, + 0x3d, 0x40, 0x44, 0x47, 0x49, 0x4c, 0x4f, 0x51, + 0x53, 0x53, 0x53, 0x51, 0x50, 0x4e, 0x4c, 0x4b, + 0x4a, 0x49, 0x49, 0x49, 0x49, 0x4a, 0x4a, 0x4d, + 0x4e, 0x4e, 0x4f, 0x50, 0x4f, 0x50, 0x51, 0x50, + 0x50, 0x4e, 0x4d, 0x4c, 0x4b, 0x48, 0x48, 0x47, + 0x44, 0x42, 0x3f, 0x3d, 0x3b, 0x3a, 0x39, 0x39, + 0x39, 0x38, 0x39, 0x3b, 0x3a, 0x3c, 0x3e, 0x3d, + 0x40, 0x40, 0x40, 0x42, 0x42, 0x42, 0x45, 0x46, + 0x47, 0x49, 0x4c, 0x4e, 0x50, 0x50, 0x53, 0x56, + 0x58, 0x59, 0x5d, 0x5d, 0x5e, 0x60, 0x61, 0x61, + 0x62, 0x61, 0x60, 0x60, 0x5e, 0x5d, 0x5d, 0x5b, + 0x57, 0x58, 0x56, 0x55, 0x55, 0x56, 0x56, 0x59, + 0x59, 0x58, 0x5a, 0x5a, 0x5a, 0x5c, 0x5c, 0x5c, + 0x5b, 0x5b, 0x58, 0x57, 0x54, 0x53, 0x52, 0x53, + 0x54, 0x57, 0x58, 0x59, 0x5c, 0x5f, 0x63, 0x67, + 0x6b, 0x6d, 0x6e, 0x6e, 0x6b, 0x6a, 0x68, 0x64, + 0x62, 0x5e, 0x58, 0x53, 0x4f, 0x49, 0x47, 0x46, + 0x45, 0x45, 0x44, }, + { 0x19, 0x20, 0x1e, 0x1e, 0x1f, 0x20, 0x22, 0x23, + 0x25, 0x27, 0x2a, 0x2e, 0x31, 0x34, 0x37, 0x3a, + 0x3e, 0x41, 0x46, 0x49, 0x4a, 0x4d, 0x4f, 0x4e, + 0x50, 0x51, 0x51, 0x4f, 0x4f, 0x4d, 0x49, 0x47, + 0x44, 0x42, 0x3e, 0x3c, 0x39, 0x36, 0x32, 0x31, + 0x2f, 0x2d, 0x29, 0x27, 0x26, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x25, 0x25, 0x26, 0x28, 0x29, 0x2b, + 0x2c, 0x2f, 0x33, 0x35, 0x38, 0x3a, 0x3c, 0x3e, + 0x3f, 0x3f, 0x41, 0x42, 0x41, 0x3f, 0x3f, 0x3d, + 0x3c, 0x39, 0x36, 0x33, 0x32, 0x2e, 0x2d, 0x2a, + 0x27, 0x26, 0x25, 0x22, 0x1f, 0x1e, 0x1d, 0x1b, + 0x1a, 0x17, 0x17, 0x17, 0x14, 0x14, 0x12, 0x11, + 0x11, 0x12, 0x11, 0x11, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x11, 0x11, 0x11, 0x12, 0x13, 0x14, + 0x14, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1c, 0x1e, + 0x1e, 0x1f, 0x22, 0x23, 0x23, 0x24, 0x25, 0x27, + 0x2a, 0x2d, 0x2f, 0x31, 0x35, 0x38, 0x3a, 0x3e, + 0x41, 0x44, 0x48, 0x4b, 0x4d, 0x51, 0x53, 0x55, + 0x57, 0x57, 0x56, 0x55, 0x54, 0x52, 0x52, 0x50, + 0x4e, 0x50, 0x4e, 0x4d, 0x4d, 0x4d, 0x4f, 0x51, + 0x51, 0x52, 0x54, 0x55, 0x55, 0x55, 0x57, 0x55, + 0x54, 0x53, 0x52, 0x4e, 0x4d, 0x4b, 0x4a, 0x49, + 0x46, 0x44, 0x41, 0x3f, 0x3d, 0x3b, 0x3a, 0x3a, + 0x39, 0x39, 0x39, 0x39, 0x3a, 0x3b, 0x3d, 0x3e, + 0x3f, 0x40, 0x41, 0x42, 0x44, 0x44, 0x45, 0x47, + 0x49, 0x49, 0x4a, 0x4d, 0x50, 0x51, 0x53, 0x57, + 0x5a, 0x5b, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x62, + 0x63, 0x62, 0x60, 0x60, 0x5e, 0x5c, 0x5c, 0x59, + 0x58, 0x56, 0x55, 0x55, 0x55, 0x55, 0x55, 0x54, + 0x56, 0x56, 0x57, 0x58, 0x58, 0x59, 0x5a, 0x59, + 0x58, 0x57, 0x56, 0x55, 0x54, 0x52, 0x53, 0x53, + 0x53, 0x56, 0x57, 0x59, 0x5b, 0x5e, 0x62, 0x66, + 0x6a, 0x6c, 0x6d, 0x6e, 0x6b, 0x69, 0x67, 0x64, + 0x61, 0x5d, 0x58, 0x54, 0x50, 0x4a, 0x47, 0x46, + 0x45, 0x45, 0x44, }, + { 0x1a, 0x21, 0x1e, 0x1f, 0x1f, 0x20, 0x22, 0x23, + 0x25, 0x27, 0x2b, 0x2e, 0x31, 0x34, 0x37, 0x3b, + 0x3d, 0x42, 0x45, 0x49, 0x4a, 0x4d, 0x4e, 0x4e, + 0x51, 0x52, 0x50, 0x4f, 0x4f, 0x4c, 0x49, 0x48, + 0x45, 0x42, 0x3e, 0x3b, 0x39, 0x36, 0x32, 0x32, + 0x2f, 0x2c, 0x2a, 0x28, 0x26, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x25, 0x28, 0x29, 0x2b, + 0x2d, 0x2f, 0x33, 0x35, 0x38, 0x3a, 0x3c, 0x3e, + 0x3f, 0x3f, 0x41, 0x42, 0x41, 0x3f, 0x3e, 0x3c, + 0x3c, 0x3a, 0x37, 0x33, 0x32, 0x2f, 0x2d, 0x2b, + 0x28, 0x26, 0x25, 0x22, 0x20, 0x1e, 0x1d, 0x1b, + 0x1a, 0x17, 0x17, 0x16, 0x14, 0x14, 0x12, 0x11, + 0x12, 0x11, 0x11, 0x11, 0x11, 0x10, 0x10, 0x10, + 0x10, 0x11, 0x12, 0x12, 0x12, 0x13, 0x14, 0x14, + 0x16, 0x18, 0x19, 0x1a, 0x1b, 0x1d, 0x1e, 0x1f, + 0x21, 0x22, 0x23, 0x25, 0x26, 0x26, 0x28, 0x2a, + 0x2c, 0x2e, 0x32, 0x34, 0x39, 0x39, 0x3d, 0x41, + 0x45, 0x47, 0x4c, 0x4e, 0x51, 0x54, 0x56, 0x58, + 0x5b, 0x5c, 0x5a, 0x59, 0x58, 0x56, 0x55, 0x53, + 0x53, 0x52, 0x52, 0x51, 0x52, 0x52, 0x53, 0x55, + 0x57, 0x58, 0x5a, 0x5a, 0x59, 0x5b, 0x59, 0x59, + 0x58, 0x57, 0x55, 0x53, 0x51, 0x4e, 0x4c, 0x4a, + 0x48, 0x46, 0x43, 0x40, 0x3e, 0x3c, 0x3b, 0x3b, + 0x38, 0x39, 0x38, 0x39, 0x3a, 0x3d, 0x3d, 0x3e, + 0x3f, 0x40, 0x41, 0x43, 0x44, 0x45, 0x46, 0x48, + 0x4a, 0x4b, 0x4d, 0x4e, 0x50, 0x52, 0x54, 0x56, + 0x59, 0x5c, 0x5e, 0x5f, 0x60, 0x62, 0x62, 0x63, + 0x63, 0x63, 0x61, 0x5f, 0x5e, 0x5d, 0x5c, 0x5b, + 0x59, 0x56, 0x56, 0x55, 0x54, 0x53, 0x53, 0x54, + 0x55, 0x54, 0x55, 0x55, 0x55, 0x57, 0x58, 0x57, + 0x57, 0x56, 0x55, 0x54, 0x54, 0x52, 0x52, 0x53, + 0x54, 0x55, 0x57, 0x58, 0x5b, 0x5e, 0x62, 0x65, + 0x69, 0x6b, 0x6d, 0x6e, 0x6a, 0x69, 0x67, 0x63, + 0x61, 0x5d, 0x58, 0x54, 0x4f, 0x4b, 0x48, 0x47, + 0x46, 0x45, 0x45, }, + { 0x1a, 0x21, 0x1e, 0x1f, 0x1f, 0x20, 0x22, 0x23, + 0x25, 0x27, 0x2b, 0x2d, 0x31, 0x34, 0x37, 0x3b, + 0x3d, 0x42, 0x45, 0x48, 0x4c, 0x4e, 0x4e, 0x4f, + 0x51, 0x52, 0x50, 0x50, 0x4f, 0x4c, 0x4a, 0x48, + 0x45, 0x42, 0x3f, 0x3b, 0x39, 0x36, 0x32, 0x31, + 0x2f, 0x2c, 0x2a, 0x28, 0x26, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x27, 0x28, 0x29, 0x2b, + 0x2d, 0x30, 0x33, 0x36, 0x39, 0x3b, 0x3d, 0x3f, + 0x3f, 0x40, 0x42, 0x43, 0x42, 0x40, 0x3e, 0x3c, + 0x3c, 0x3a, 0x37, 0x34, 0x32, 0x2f, 0x2d, 0x2c, + 0x2a, 0x27, 0x26, 0x23, 0x20, 0x1e, 0x1d, 0x1c, + 0x1a, 0x18, 0x18, 0x17, 0x15, 0x16, 0x14, 0x12, + 0x12, 0x12, 0x12, 0x12, 0x12, 0x11, 0x11, 0x12, + 0x12, 0x12, 0x13, 0x14, 0x14, 0x14, 0x15, 0x16, + 0x17, 0x19, 0x1b, 0x1c, 0x1e, 0x20, 0x20, 0x22, + 0x24, 0x25, 0x26, 0x27, 0x28, 0x2a, 0x2c, 0x2c, + 0x2f, 0x32, 0x35, 0x37, 0x3b, 0x3c, 0x41, 0x45, + 0x48, 0x4c, 0x50, 0x52, 0x54, 0x57, 0x5a, 0x5c, + 0x5f, 0x5f, 0x5f, 0x5d, 0x5c, 0x5b, 0x5a, 0x58, + 0x57, 0x57, 0x57, 0x56, 0x56, 0x57, 0x57, 0x5a, + 0x5c, 0x5e, 0x5f, 0x61, 0x5f, 0x5f, 0x5f, 0x5e, + 0x5d, 0x5c, 0x5a, 0x57, 0x55, 0x52, 0x4f, 0x4e, + 0x4a, 0x47, 0x46, 0x42, 0x41, 0x3e, 0x3d, 0x3c, + 0x3b, 0x3a, 0x39, 0x39, 0x3b, 0x3c, 0x3d, 0x3f, + 0x40, 0x42, 0x42, 0x44, 0x45, 0x46, 0x49, 0x49, + 0x4b, 0x4c, 0x4e, 0x4f, 0x51, 0x54, 0x57, 0x58, + 0x5b, 0x5d, 0x61, 0x61, 0x61, 0x63, 0x65, 0x65, + 0x64, 0x64, 0x62, 0x61, 0x60, 0x5e, 0x5d, 0x5c, + 0x59, 0x58, 0x56, 0x54, 0x53, 0x53, 0x53, 0x54, + 0x54, 0x53, 0x53, 0x54, 0x54, 0x54, 0x55, 0x55, + 0x56, 0x55, 0x54, 0x53, 0x53, 0x52, 0x52, 0x53, + 0x55, 0x56, 0x57, 0x58, 0x5b, 0x5e, 0x62, 0x66, + 0x69, 0x6b, 0x6d, 0x6d, 0x6b, 0x69, 0x67, 0x64, + 0x61, 0x5d, 0x58, 0x55, 0x50, 0x4b, 0x48, 0x47, + 0x46, 0x46, 0x46, }, + { 0x1a, 0x20, 0x1e, 0x1f, 0x1f, 0x21, 0x22, 0x23, + 0x25, 0x27, 0x2b, 0x2d, 0x31, 0x34, 0x37, 0x3b, + 0x3d, 0x42, 0x45, 0x48, 0x4c, 0x4e, 0x4f, 0x4f, + 0x51, 0x52, 0x51, 0x50, 0x4e, 0x4b, 0x4a, 0x48, + 0x45, 0x42, 0x3f, 0x3b, 0x38, 0x36, 0x32, 0x31, + 0x2f, 0x2c, 0x2a, 0x28, 0x26, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x27, 0x28, 0x29, 0x2b, + 0x2e, 0x30, 0x33, 0x36, 0x39, 0x3b, 0x3d, 0x3f, + 0x3f, 0x40, 0x41, 0x42, 0x41, 0x40, 0x3e, 0x3c, + 0x3c, 0x3a, 0x37, 0x34, 0x33, 0x30, 0x2e, 0x2b, + 0x29, 0x26, 0x24, 0x24, 0x20, 0x1f, 0x1d, 0x1d, + 0x1a, 0x19, 0x17, 0x16, 0x16, 0x16, 0x16, 0x14, + 0x13, 0x12, 0x13, 0x13, 0x13, 0x12, 0x12, 0x13, + 0x13, 0x14, 0x15, 0x15, 0x14, 0x15, 0x16, 0x18, + 0x19, 0x1b, 0x1c, 0x1e, 0x20, 0x21, 0x22, 0x24, + 0x27, 0x28, 0x29, 0x2a, 0x2c, 0x2c, 0x2d, 0x2f, + 0x32, 0x35, 0x37, 0x3a, 0x3c, 0x3e, 0x44, 0x48, + 0x4c, 0x50, 0x54, 0x56, 0x58, 0x5b, 0x5e, 0x60, + 0x61, 0x63, 0x62, 0x61, 0x60, 0x5f, 0x5e, 0x5e, + 0x5c, 0x5c, 0x5b, 0x5a, 0x5a, 0x5b, 0x5c, 0x5e, + 0x60, 0x63, 0x64, 0x65, 0x63, 0x62, 0x63, 0x63, + 0x61, 0x60, 0x5e, 0x5b, 0x58, 0x55, 0x51, 0x4f, + 0x4c, 0x4a, 0x47, 0x44, 0x42, 0x41, 0x3e, 0x3c, + 0x3b, 0x3a, 0x3a, 0x3b, 0x3b, 0x3c, 0x3e, 0x3f, + 0x40, 0x42, 0x43, 0x45, 0x46, 0x47, 0x49, 0x4a, + 0x4c, 0x4c, 0x4f, 0x51, 0x52, 0x55, 0x58, 0x5b, + 0x5c, 0x5f, 0x61, 0x62, 0x63, 0x64, 0x64, 0x65, + 0x66, 0x65, 0x63, 0x62, 0x5f, 0x5e, 0x5e, 0x5c, + 0x5b, 0x58, 0x56, 0x55, 0x54, 0x53, 0x52, 0x53, + 0x52, 0x52, 0x52, 0x52, 0x52, 0x53, 0x55, 0x55, + 0x55, 0x53, 0x53, 0x53, 0x52, 0x51, 0x52, 0x52, + 0x55, 0x55, 0x58, 0x58, 0x5b, 0x5d, 0x61, 0x65, + 0x68, 0x6a, 0x6c, 0x6b, 0x69, 0x68, 0x67, 0x64, + 0x61, 0x5e, 0x58, 0x54, 0x4f, 0x4b, 0x49, 0x48, + 0x47, 0x46, 0x45, }, + { 0x19, 0x20, 0x1d, 0x1f, 0x1f, 0x20, 0x23, 0x23, + 0x25, 0x27, 0x2b, 0x2d, 0x31, 0x34, 0x37, 0x3b, + 0x3d, 0x42, 0x45, 0x48, 0x4c, 0x4e, 0x4f, 0x4f, + 0x51, 0x52, 0x51, 0x50, 0x4e, 0x4b, 0x4a, 0x48, + 0x44, 0x42, 0x3f, 0x3a, 0x38, 0x36, 0x32, 0x30, + 0x2f, 0x2c, 0x2a, 0x28, 0x26, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x26, 0x28, 0x29, 0x2b, + 0x2e, 0x30, 0x34, 0x36, 0x39, 0x3b, 0x3d, 0x3f, + 0x3f, 0x40, 0x41, 0x42, 0x41, 0x40, 0x3e, 0x3c, + 0x3c, 0x3a, 0x37, 0x34, 0x33, 0x30, 0x2e, 0x2b, + 0x29, 0x27, 0x25, 0x24, 0x21, 0x1f, 0x1e, 0x1c, + 0x1b, 0x19, 0x17, 0x16, 0x16, 0x16, 0x16, 0x14, + 0x13, 0x12, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, + 0x13, 0x14, 0x15, 0x14, 0x14, 0x14, 0x17, 0x19, + 0x1a, 0x1c, 0x1e, 0x20, 0x21, 0x23, 0x24, 0x26, + 0x29, 0x29, 0x2b, 0x2c, 0x2d, 0x2e, 0x30, 0x31, + 0x34, 0x38, 0x3b, 0x3c, 0x3f, 0x42, 0x47, 0x4c, + 0x50, 0x54, 0x57, 0x5b, 0x5c, 0x5e, 0x62, 0x63, + 0x66, 0x66, 0x66, 0x65, 0x64, 0x63, 0x61, 0x62, + 0x60, 0x60, 0x5f, 0x5e, 0x5e, 0x5f, 0x60, 0x62, + 0x65, 0x67, 0x69, 0x6a, 0x69, 0x68, 0x69, 0x67, + 0x66, 0x64, 0x62, 0x5f, 0x5c, 0x58, 0x54, 0x51, + 0x4e, 0x4b, 0x49, 0x45, 0x43, 0x41, 0x40, 0x3e, + 0x3c, 0x3a, 0x3b, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x41, 0x42, 0x44, 0x46, 0x46, 0x48, 0x49, 0x4b, + 0x4d, 0x50, 0x51, 0x53, 0x55, 0x57, 0x58, 0x5c, + 0x5f, 0x60, 0x63, 0x64, 0x64, 0x65, 0x66, 0x66, + 0x66, 0x65, 0x65, 0x63, 0x61, 0x5f, 0x5e, 0x5c, + 0x5a, 0x58, 0x56, 0x55, 0x54, 0x53, 0x52, 0x52, + 0x53, 0x52, 0x52, 0x52, 0x52, 0x53, 0x53, 0x53, + 0x54, 0x53, 0x53, 0x52, 0x53, 0x51, 0x53, 0x53, + 0x55, 0x57, 0x58, 0x59, 0x5b, 0x5d, 0x62, 0x64, + 0x68, 0x6a, 0x6c, 0x6b, 0x69, 0x68, 0x67, 0x64, + 0x61, 0x5d, 0x57, 0x54, 0x50, 0x4a, 0x48, 0x47, + 0x46, 0x45, 0x45, }, diff --git a/tests/tcg/hexagon/hvx_histogram_row.S b/tests/tcg/hexagon/hvx_histogram_row.S new file mode 100644 index 0000000000..5e42c33145 --- /dev/null +++ b/tests/tcg/hexagon/hvx_histogram_row.S @@ -0,0 +1,294 @@ +/* + * Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + + +/* + * void hvx_histogram_row(uint8_t *src, => r0 + * int stride, => r1 + * int width, => r2 + * int height, => r3 + * int *hist => r4) + */ + .text + .p2align 2 + .global hvx_histogram_row + .type hvx_histogram_row, @function +hvx_histogram_row: + { r2 = lsr(r2, #7) /* size / VLEN */ + r5 = and(r2, #127) /* size % VLEN */ + v1 = #0 + v0 = #0 + } + /* + * Step 1: Clean the whole vector register file + */ + { v3:2 = v1:0 + v5:4 = v1:0 + p0 = cmp.gt(r2, #0) /* P0 = (width / VLEN > 0) */ + p1 = cmp.eq(r5, #0) /* P1 = (width % VLEN == 0) */ + } + { q0 = vsetq(r5) + v7:6 = v1:0 + } + { v9:8 = v1:0 + v11:10 = v1:0 + } + { v13:12 = v1:0 + v15:14 = v1:0 + } + { v17:16 = v1:0 + v19:18 = v1:0 + } + { v21:20 = v1:0 + v23:22 = v1:0 + } + { v25:24 = v1:0 + v27:26 = v1:0 + } + { v29:28 = v1:0 + v31:30 = v1:0 + r10 = add(r0, r1) /* R10 = &src[2 * stride] */ + loop1(.outerloop, r3) + } + + /* + * Step 2: vhist + */ + .falign +.outerloop: + { if (!p0) jump .loopend + loop0(.innerloop, r2) + } + + .falign +.innerloop: + { v12.tmp = vmem(R0++#1) + vhist + }:endloop0 + + .falign +.loopend: + if (p1) jump .skip /* if (width % VLEN == 0) done with current row */ + { v13.tmp = vmem(r0 + #0) + vhist(q0) + } + + .falign +.skip: + { r0 = r10 /* R0 = &src[(i + 1) * stride] */ + r10 = add(r10, r1) /* R10 = &src[(i + 2) * stride] */ + }:endloop1 + + + /* + * Step 3: Sum up the data + */ + { v0.h = vshuff(v0.h) + r10 = ##0x00010001 + } + v1.h = vshuff(v1.h) + { V2.h = vshuff(v2.h) + v0.w = vdmpy(v0.h, r10.h):sat + } + { v3.h = vshuff(v3.h) + v1.w = vdmpy(v1.h, r10.h):sat + } + { v4.h = vshuff(V4.h) + v2.w = vdmpy(v2.h, r10.h):sat + } + { v5.h = vshuff(v5.h) + v3.w = vdmpy(v3.h, r10.h):sat + } + { v6.h = vshuff(v6.h) + v4.w = vdmpy(v4.h, r10.h):sat + } + { v7.h = vshuff(v7.h) + v5.w = vdmpy(v5.h, r10.h):sat + } + { v8.h = vshuff(V8.h) + v6.w = vdmpy(v6.h, r10.h):sat + } + { v9.h = vshuff(V9.h) + v7.w = vdmpy(v7.h, r10.h):sat + } + { v10.h = vshuff(v10.h) + v8.w = vdmpy(v8.h, r10.h):sat + } + { v11.h = vshuff(v11.h) + v9.w = vdmpy(v9.h, r10.h):sat + } + { v12.h = vshuff(v12.h) + v10.w = vdmpy(v10.h, r10.h):sat + } + { v13.h = vshuff(V13.h) + v11.w = vdmpy(v11.h, r10.h):sat + } + { v14.h = vshuff(v14.h) + v12.w = vdmpy(v12.h, r10.h):sat + } + { v15.h = vshuff(v15.h) + v13.w = vdmpy(v13.h, r10.h):sat + } + { v16.h = vshuff(v16.h) + v14.w = vdmpy(v14.h, r10.h):sat + } + { v17.h = vshuff(v17.h) + v15.w = vdmpy(v15.h, r10.h):sat + } + { v18.h = vshuff(v18.h) + v16.w = vdmpy(v16.h, r10.h):sat + } + { v19.h = vshuff(v19.h) + v17.w = vdmpy(v17.h, r10.h):sat + } + { v20.h = vshuff(v20.h) + v18.W = vdmpy(v18.h, r10.h):sat + } + { v21.h = vshuff(v21.h) + v19.w = vdmpy(v19.h, r10.h):sat + } + { v22.h = vshuff(v22.h) + v20.w = vdmpy(v20.h, r10.h):sat + } + { v23.h = vshuff(v23.h) + v21.w = vdmpy(v21.h, r10.h):sat + } + { v24.h = vshuff(v24.h) + v22.w = vdmpy(v22.h, r10.h):sat + } + { v25.h = vshuff(v25.h) + v23.w = vdmpy(v23.h, r10.h):sat + } + { v26.h = vshuff(v26.h) + v24.w = vdmpy(v24.h, r10.h):sat + } + { v27.h = vshuff(V27.h) + v25.w = vdmpy(v25.h, r10.h):sat + } + { v28.h = vshuff(v28.h) + v26.w = vdmpy(v26.h, r10.h):sat + } + { v29.h = vshuff(v29.h) + v27.w = vdmpy(v27.h, r10.h):sat + } + { v30.h = vshuff(v30.h) + v28.w = vdmpy(v28.h, r10.h):sat + } + { v31.h = vshuff(v31.h) + v29.w = vdmpy(v29.h, r10.h):sat + r28 = #32 + } + { vshuff(v1, v0, r28) + v30.w = vdmpy(v30.h, r10.h):sat + } + { vshuff(v3, v2, r28) + v31.w = vdmpy(v31.h, r10.h):sat + } + { vshuff(v5, v4, r28) + v0.w = vadd(v1.w, v0.w) + v2.w = vadd(v3.w, v2.w) + } + { vshuff(v7, v6, r28) + r7 = #64 + } + { vshuff(v9, v8, r28) + v4.w = vadd(v5.w, v4.w) + v6.w = vadd(v7.w, v6.w) + } + vshuff(v11, v10, r28) + { vshuff(v13, v12, r28) + v8.w = vadd(v9.w, v8.w) + v10.w = vadd(v11.w, v10.w) + } + vshuff(v15, v14, r28) + { vshuff(v17, v16, r28) + v12.w = vadd(v13.w, v12.w) + v14.w = vadd(v15.w, v14.w) + } + vshuff(v19, v18, r28) + { vshuff(v21, v20, r28) + v16.w = vadd(v17.w, v16.w) + v18.w = vadd(v19.w, v18.w) + } + vshuff(v23, v22, r28) + { vshuff(v25, v24, r28) + v20.w = vadd(v21.w, v20.w) + v22.w = vadd(v23.w, v22.w) + } + vshuff(v27, v26, r28) + { vshuff(v29, v28, r28) + v24.w = vadd(v25.w, v24.w) + v26.w = vadd(v27.w, v26.w) + } + vshuff(v31, v30, r28) + { v28.w = vadd(v29.w, v28.w) + vshuff(v2, v0, r7) + } + { v30.w = vadd(v31.w, v30.w) + vshuff(v6, v4, r7) + v0.w = vadd(v0.w, v2.w) + } + { vshuff(v10, v8, r7) + v1.tmp = vmem(r4 + #0) /* update hist[0-31] */ + v0.w = vadd(v0.w, v1.w) + vmem(r4++#1) = v0.new + } + { vshuff(v14, v12, r7) + v4.w = vadd(v4.w, v6.w) + v8.w = vadd(v8.w, v10.w) + } + { vshuff(v18, v16, r7) + v1.tmp = vmem(r4 + #0) /* update hist[32-63] */ + v4.w = vadd(v4.w, v1.w) + vmem(r4++#1) = v4.new + } + { vshuff(v22, v20, r7) + v12.w = vadd(v12.w, v14.w) + V16.w = vadd(v16.w, v18.w) + } + { vshuff(v26, v24, r7) + v1.tmp = vmem(r4 + #0) /* update hist[64-95] */ + v8.w = vadd(v8.w, v1.w) + vmem(r4++#1) = v8.new + } + { vshuff(v30, v28, r7) + v1.tmp = vmem(r4 + #0) /* update hist[96-127] */ + v12.w = vadd(v12.w, v1.w) + vmem(r4++#1) = v12.new + } + + { v20.w = vadd(v20.w, v22.w) + v1.tmp = vmem(r4 + #0) /* update hist[128-159] */ + v16.w = vadd(v16.w, v1.w) + vmem(r4++#1) = v16.new + } + { v24.w = vadd(v24.w, v26.w) + v1.tmp = vmem(r4 + #0) /* update hist[160-191] */ + v20.w = vadd(v20.w, v1.w) + vmem(r4++#1) = v20.new + } + { v28.w = vadd(v28.w, v30.w) + v1.tmp = vmem(r4 + #0) /* update hist[192-223] */ + v24.w = vadd(v24.w, v1.w) + vmem(r4++#1) = v24.new + } + { v1.tmp = vmem(r4 + #0) /* update hist[224-255] */ + v28.w = vadd(v28.w, v1.w) + vmem(r4++#1) = v28.new + } + jumpr r31 + .size hvx_histogram_row, .-hvx_histogram_row diff --git a/tests/tcg/hexagon/hvx_histogram_row.h b/tests/tcg/hexagon/hvx_histogram_row.h new file mode 100644 index 0000000000..6a4531a92d --- /dev/null +++ b/tests/tcg/hexagon/hvx_histogram_row.h @@ -0,0 +1,24 @@ +/* + * Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef HVX_HISTOGRAM_ROW_H +#define HVX_HISTOGRAM_ROW_H + +void hvx_histogram_row(uint8_t *src, int stride, int width, int height, + int *hist); + +#endif diff --git a/tests/tcg/hexagon/hvx_misc.c b/tests/tcg/hexagon/hvx_misc.c new file mode 100644 index 0000000000..312bb98b41 --- /dev/null +++ b/tests/tcg/hexagon/hvx_misc.c @@ -0,0 +1,469 @@ +/* + * Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdint.h> +#include <stdbool.h> +#include <string.h> + +int err; + +static void __check(int line, int i, int j, uint64_t result, uint64_t expect) +{ + if (result != expect) { + printf("ERROR at line %d: [%d][%d] 0x%016llx != 0x%016llx\n", + line, i, j, result, expect); + err++; + } +} + +#define check(RES, EXP) __check(__LINE__, RES, EXP) + +#define MAX_VEC_SIZE_BYTES 128 + +typedef union { + uint64_t ud[MAX_VEC_SIZE_BYTES / 8]; + int64_t d[MAX_VEC_SIZE_BYTES / 8]; + uint32_t uw[MAX_VEC_SIZE_BYTES / 4]; + int32_t w[MAX_VEC_SIZE_BYTES / 4]; + uint16_t uh[MAX_VEC_SIZE_BYTES / 2]; + int16_t h[MAX_VEC_SIZE_BYTES / 2]; + uint8_t ub[MAX_VEC_SIZE_BYTES / 1]; + int8_t b[MAX_VEC_SIZE_BYTES / 1]; +} MMVector; + +#define BUFSIZE 16 +#define OUTSIZE 16 +#define MASKMOD 3 + +MMVector buffer0[BUFSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES))); +MMVector buffer1[BUFSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES))); +MMVector mask[BUFSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES))); +MMVector output[OUTSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES))); +MMVector expect[OUTSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES))); + +#define CHECK_OUTPUT_FUNC(FIELD, FIELDSZ) \ +static void check_output_##FIELD(int line, size_t num_vectors) \ +{ \ + for (int i = 0; i < num_vectors; i++) { \ + for (int j = 0; j < MAX_VEC_SIZE_BYTES / FIELDSZ; j++) { \ + __check(line, i, j, output[i].FIELD[j], expect[i].FIELD[j]); \ + } \ + } \ +} + +CHECK_OUTPUT_FUNC(d, 8) +CHECK_OUTPUT_FUNC(w, 4) +CHECK_OUTPUT_FUNC(h, 2) +CHECK_OUTPUT_FUNC(b, 1) + +static void init_buffers(void) +{ + int counter0 = 0; + int counter1 = 17; + for (int i = 0; i < BUFSIZE; i++) { + for (int j = 0; j < MAX_VEC_SIZE_BYTES; j++) { + buffer0[i].b[j] = counter0++; + buffer1[i].b[j] = counter1++; + } + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + mask[i].w[j] = (i + j % MASKMOD == 0) ? 0 : 1; + } + } +} + +static void test_load_tmp(void) +{ + void *p0 = buffer0; + void *p1 = buffer1; + void *pout = output; + + for (int i = 0; i < BUFSIZE; i++) { + /* + * Load into v12 as .tmp, then use it in the next packet + * Should get the new value within the same packet and + * the old value in the next packet + */ + asm("v3 = vmem(%0 + #0)\n\t" + "r1 = #1\n\t" + "v12 = vsplat(r1)\n\t" + "{\n\t" + " v12.tmp = vmem(%1 + #0)\n\t" + " v4.w = vadd(v12.w, v3.w)\n\t" + "}\n\t" + "v4.w = vadd(v4.w, v12.w)\n\t" + "vmem(%2 + #0) = v4\n\t" + : : "r"(p0), "r"(p1), "r"(pout) + : "r1", "v12", "v3", "v4", "v6", "memory"); + p0 += sizeof(MMVector); + p1 += sizeof(MMVector); + pout += sizeof(MMVector); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + expect[i].w[j] = buffer0[i].w[j] + buffer1[i].w[j] + 1; + } + } + + check_output_w(__LINE__, BUFSIZE); +} + +static void test_load_cur(void) +{ + void *p0 = buffer0; + void *pout = output; + + for (int i = 0; i < BUFSIZE; i++) { + asm("{\n\t" + " v2.cur = vmem(%0 + #0)\n\t" + " vmem(%1 + #0) = v2\n\t" + "}\n\t" + : : "r"(p0), "r"(pout) : "v2", "memory"); + p0 += sizeof(MMVector); + pout += sizeof(MMVector); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + expect[i].uw[j] = buffer0[i].uw[j]; + } + } + + check_output_w(__LINE__, BUFSIZE); +} + +static void test_load_aligned(void) +{ + /* Aligned loads ignore the low bits of the address */ + void *p0 = buffer0; + void *pout = output; + const size_t offset = 13; + + p0 += offset; /* Create an unaligned address */ + asm("v2 = vmem(%0 + #0)\n\t" + "vmem(%1 + #0) = v2\n\t" + : : "r"(p0), "r"(pout) : "v2", "memory"); + + expect[0] = buffer0[0]; + + check_output_w(__LINE__, 1); +} + +static void test_load_unaligned(void) +{ + void *p0 = buffer0; + void *pout = output; + const size_t offset = 12; + + p0 += offset; /* Create an unaligned address */ + asm("v2 = vmemu(%0 + #0)\n\t" + "vmem(%1 + #0) = v2\n\t" + : : "r"(p0), "r"(pout) : "v2", "memory"); + + memcpy(expect, &buffer0[0].ub[offset], sizeof(MMVector)); + + check_output_w(__LINE__, 1); +} + +static void test_store_aligned(void) +{ + /* Aligned stores ignore the low bits of the address */ + void *p0 = buffer0; + void *pout = output; + const size_t offset = 13; + + pout += offset; /* Create an unaligned address */ + asm("v2 = vmem(%0 + #0)\n\t" + "vmem(%1 + #0) = v2\n\t" + : : "r"(p0), "r"(pout) : "v2", "memory"); + + expect[0] = buffer0[0]; + + check_output_w(__LINE__, 1); +} + +static void test_store_unaligned(void) +{ + void *p0 = buffer0; + void *pout = output; + const size_t offset = 12; + + pout += offset; /* Create an unaligned address */ + asm("v2 = vmem(%0 + #0)\n\t" + "vmemu(%1 + #0) = v2\n\t" + : : "r"(p0), "r"(pout) : "v2", "memory"); + + memcpy(expect, buffer0, 2 * sizeof(MMVector)); + memcpy(&expect[0].ub[offset], buffer0, sizeof(MMVector)); + + check_output_w(__LINE__, 2); +} + +static void test_masked_store(bool invert) +{ + void *p0 = buffer0; + void *pmask = mask; + void *pout = output; + + memset(expect, 0xff, sizeof(expect)); + memset(output, 0xff, sizeof(expect)); + + for (int i = 0; i < BUFSIZE; i++) { + if (invert) { + asm("r4 = #0\n\t" + "v4 = vsplat(r4)\n\t" + "v5 = vmem(%0 + #0)\n\t" + "q0 = vcmp.eq(v4.w, v5.w)\n\t" + "v5 = vmem(%1)\n\t" + "if (!q0) vmem(%2) = v5\n\t" /* Inverted test */ + : : "r"(pmask), "r"(p0), "r"(pout) + : "r4", "v4", "v5", "q0", "memory"); + } else { + asm("r4 = #0\n\t" + "v4 = vsplat(r4)\n\t" + "v5 = vmem(%0 + #0)\n\t" + "q0 = vcmp.eq(v4.w, v5.w)\n\t" + "v5 = vmem(%1)\n\t" + "if (q0) vmem(%2) = v5\n\t" /* Non-inverted test */ + : : "r"(pmask), "r"(p0), "r"(pout) + : "r4", "v4", "v5", "q0", "memory"); + } + p0 += sizeof(MMVector); + pmask += sizeof(MMVector); + pout += sizeof(MMVector); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + if (invert) { + if (i + j % MASKMOD != 0) { + expect[i].w[j] = buffer0[i].w[j]; + } + } else { + if (i + j % MASKMOD == 0) { + expect[i].w[j] = buffer0[i].w[j]; + } + } + } + } + + check_output_w(__LINE__, BUFSIZE); +} + +static void test_new_value_store(void) +{ + void *p0 = buffer0; + void *pout = output; + + asm("{\n\t" + " v2 = vmem(%0 + #0)\n\t" + " vmem(%1 + #0) = v2.new\n\t" + "}\n\t" + : : "r"(p0), "r"(pout) : "v2", "memory"); + + expect[0] = buffer0[0]; + + check_output_w(__LINE__, 1); +} + +static void test_max_temps() +{ + void *p0 = buffer0; + void *pout = output; + + asm("v0 = vmem(%0 + #0)\n\t" + "v1 = vmem(%0 + #1)\n\t" + "v2 = vmem(%0 + #2)\n\t" + "v3 = vmem(%0 + #3)\n\t" + "v4 = vmem(%0 + #4)\n\t" + "{\n\t" + " v1:0.w = vadd(v3:2.w, v1:0.w)\n\t" + " v2.b = vshuffe(v3.b, v2.b)\n\t" + " v3.w = vadd(v1.w, v4.w)\n\t" + " v4.tmp = vmem(%0 + #5)\n\t" + "}\n\t" + "vmem(%1 + #0) = v0\n\t" + "vmem(%1 + #1) = v1\n\t" + "vmem(%1 + #2) = v2\n\t" + "vmem(%1 + #3) = v3\n\t" + "vmem(%1 + #4) = v4\n\t" + : : "r"(p0), "r"(pout) : "memory"); + + /* The first two vectors come from the vadd-pair instruction */ + for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; i++) { + expect[0].w[i] = buffer0[0].w[i] + buffer0[2].w[i]; + expect[1].w[i] = buffer0[1].w[i] + buffer0[3].w[i]; + } + /* The third vector comes from the vshuffe instruction */ + for (int i = 0; i < MAX_VEC_SIZE_BYTES / 2; i++) { + expect[2].uh[i] = (buffer0[2].uh[i] & 0xff) | + (buffer0[3].uh[i] & 0xff) << 8; + } + /* The fourth vector comes from the vadd-single instruction */ + for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; i++) { + expect[3].w[i] = buffer0[1].w[i] + buffer0[5].w[i]; + } + /* + * The fifth vector comes from the load to v4 + * make sure the .tmp is dropped + */ + expect[4] = buffer0[4]; + + check_output_b(__LINE__, 5); +} + +#define VEC_OP1(ASM, EL, IN, OUT) \ + asm("v2 = vmem(%0 + #0)\n\t" \ + "v2" #EL " = " #ASM "(v2" #EL ")\n\t" \ + "vmem(%1 + #0) = v2\n\t" \ + : : "r"(IN), "r"(OUT) : "v2", "memory") + +#define VEC_OP2(ASM, EL, IN0, IN1, OUT) \ + asm("v2 = vmem(%0 + #0)\n\t" \ + "v3 = vmem(%1 + #0)\n\t" \ + "v2" #EL " = " #ASM "(v2" #EL ", v3" #EL ")\n\t" \ + "vmem(%2 + #0) = v2\n\t" \ + : : "r"(IN0), "r"(IN1), "r"(OUT) : "v2", "v3", "memory") + +#define TEST_VEC_OP1(NAME, ASM, EL, FIELD, FIELDSZ, OP) \ +static void test_##NAME(void) \ +{ \ + void *pin = buffer0; \ + void *pout = output; \ + for (int i = 0; i < BUFSIZE; i++) { \ + VEC_OP1(ASM, EL, pin, pout); \ + pin += sizeof(MMVector); \ + pout += sizeof(MMVector); \ + } \ + for (int i = 0; i < BUFSIZE; i++) { \ + for (int j = 0; j < MAX_VEC_SIZE_BYTES / FIELDSZ; j++) { \ + expect[i].FIELD[j] = OP buffer0[i].FIELD[j]; \ + } \ + } \ + check_output_##FIELD(__LINE__, BUFSIZE); \ +} + +#define TEST_VEC_OP2(NAME, ASM, EL, FIELD, FIELDSZ, OP) \ +static void test_##NAME(void) \ +{ \ + void *p0 = buffer0; \ + void *p1 = buffer1; \ + void *pout = output; \ + for (int i = 0; i < BUFSIZE; i++) { \ + VEC_OP2(ASM, EL, p0, p1, pout); \ + p0 += sizeof(MMVector); \ + p1 += sizeof(MMVector); \ + pout += sizeof(MMVector); \ + } \ + for (int i = 0; i < BUFSIZE; i++) { \ + for (int j = 0; j < MAX_VEC_SIZE_BYTES / FIELDSZ; j++) { \ + expect[i].FIELD[j] = buffer0[i].FIELD[j] OP buffer1[i].FIELD[j]; \ + } \ + } \ + check_output_##FIELD(__LINE__, BUFSIZE); \ +} + +#define THRESHOLD 31 + +#define PRED_OP2(ASM, IN0, IN1, OUT, INV) \ + asm("r4 = #%3\n\t" \ + "v1.b = vsplat(r4)\n\t" \ + "v2 = vmem(%0 + #0)\n\t" \ + "q0 = vcmp.gt(v2.b, v1.b)\n\t" \ + "v3 = vmem(%1 + #0)\n\t" \ + "q1 = vcmp.gt(v3.b, v1.b)\n\t" \ + "q2 = " #ASM "(q0, " INV "q1)\n\t" \ + "r4 = #0xff\n\t" \ + "v1.b = vsplat(r4)\n\t" \ + "if (q2) vmem(%2 + #0) = v1\n\t" \ + : : "r"(IN0), "r"(IN1), "r"(OUT), "i"(THRESHOLD) \ + : "r4", "v1", "v2", "v3", "q0", "q1", "q2", "memory") + +#define TEST_PRED_OP2(NAME, ASM, OP, INV) \ +static void test_##NAME(bool invert) \ +{ \ + void *p0 = buffer0; \ + void *p1 = buffer1; \ + void *pout = output; \ + memset(output, 0, sizeof(expect)); \ + for (int i = 0; i < BUFSIZE; i++) { \ + PRED_OP2(ASM, p0, p1, pout, INV); \ + p0 += sizeof(MMVector); \ + p1 += sizeof(MMVector); \ + pout += sizeof(MMVector); \ + } \ + for (int i = 0; i < BUFSIZE; i++) { \ + for (int j = 0; j < MAX_VEC_SIZE_BYTES; j++) { \ + bool p0 = (buffer0[i].b[j] > THRESHOLD); \ + bool p1 = (buffer1[i].b[j] > THRESHOLD); \ + if (invert) { \ + expect[i].b[j] = (p0 OP !p1) ? 0xff : 0x00; \ + } else { \ + expect[i].b[j] = (p0 OP p1) ? 0xff : 0x00; \ + } \ + } \ + } \ + check_output_b(__LINE__, BUFSIZE); \ +} + +TEST_VEC_OP2(vadd_w, vadd, .w, w, 4, +) +TEST_VEC_OP2(vadd_h, vadd, .h, h, 2, +) +TEST_VEC_OP2(vadd_b, vadd, .b, b, 1, +) +TEST_VEC_OP2(vsub_w, vsub, .w, w, 4, -) +TEST_VEC_OP2(vsub_h, vsub, .h, h, 2, -) +TEST_VEC_OP2(vsub_b, vsub, .b, b, 1, -) +TEST_VEC_OP2(vxor, vxor, , d, 8, ^) +TEST_VEC_OP2(vand, vand, , d, 8, &) +TEST_VEC_OP2(vor, vor, , d, 8, |) +TEST_VEC_OP1(vnot, vnot, , d, 8, ~) + +TEST_PRED_OP2(pred_or, or, |, "") +TEST_PRED_OP2(pred_or_n, or, |, "!") +TEST_PRED_OP2(pred_and, and, &, "") +TEST_PRED_OP2(pred_and_n, and, &, "!") +TEST_PRED_OP2(pred_xor, xor, ^, "") + +int main() +{ + init_buffers(); + + test_load_tmp(); + test_load_cur(); + test_load_aligned(); + test_load_unaligned(); + test_store_aligned(); + test_store_unaligned(); + test_masked_store(false); + test_masked_store(true); + test_new_value_store(); + test_max_temps(); + + test_vadd_w(); + test_vadd_h(); + test_vadd_b(); + test_vsub_w(); + test_vsub_h(); + test_vsub_b(); + test_vxor(); + test_vand(); + test_vor(); + test_vnot(); + + test_pred_or(false); + test_pred_or_n(true); + test_pred_and(false); + test_pred_and_n(true); + test_pred_xor(false); + + puts(err ? "FAIL" : "PASS"); + return err ? 1 : 0; +} diff --git a/tests/tcg/hexagon/scatter_gather.c b/tests/tcg/hexagon/scatter_gather.c new file mode 100644 index 0000000000..b93eb18133 --- /dev/null +++ b/tests/tcg/hexagon/scatter_gather.c @@ -0,0 +1,1011 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * This example tests the HVX scatter/gather instructions + * + * See section 5.13 of the V68 HVX Programmer's Reference + * + * There are 3 main classes operations + * _16 16-bit elements and 16-bit offsets + * _32 32-bit elements and 32-bit offsets + * _16_32 16-bit elements and 32-bit offsets + * + * There are also masked and accumulate versions + */ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <inttypes.h> + +typedef long HVX_Vector __attribute__((__vector_size__(128))) + __attribute__((aligned(128))); +typedef long HVX_VectorPair __attribute__((__vector_size__(256))) + __attribute__((aligned(128))); +typedef long HVX_VectorPred __attribute__((__vector_size__(128))) + __attribute__((aligned(128))); + +#define VSCATTER_16(BASE, RGN, OFF, VALS) \ + __builtin_HEXAGON_V6_vscattermh_128B((int)BASE, RGN, OFF, VALS) +#define VSCATTER_16_MASKED(MASK, BASE, RGN, OFF, VALS) \ + __builtin_HEXAGON_V6_vscattermhq_128B(MASK, (int)BASE, RGN, OFF, VALS) +#define VSCATTER_32(BASE, RGN, OFF, VALS) \ + __builtin_HEXAGON_V6_vscattermw_128B((int)BASE, RGN, OFF, VALS) +#define VSCATTER_32_MASKED(MASK, BASE, RGN, OFF, VALS) \ + __builtin_HEXAGON_V6_vscattermwq_128B(MASK, (int)BASE, RGN, OFF, VALS) +#define VSCATTER_16_32(BASE, RGN, OFF, VALS) \ + __builtin_HEXAGON_V6_vscattermhw_128B((int)BASE, RGN, OFF, VALS) +#define VSCATTER_16_32_MASKED(MASK, BASE, RGN, OFF, VALS) \ + __builtin_HEXAGON_V6_vscattermhwq_128B(MASK, (int)BASE, RGN, OFF, VALS) +#define VSCATTER_16_ACC(BASE, RGN, OFF, VALS) \ + __builtin_HEXAGON_V6_vscattermh_add_128B((int)BASE, RGN, OFF, VALS) +#define VSCATTER_32_ACC(BASE, RGN, OFF, VALS) \ + __builtin_HEXAGON_V6_vscattermw_add_128B((int)BASE, RGN, OFF, VALS) +#define VSCATTER_16_32_ACC(BASE, RGN, OFF, VALS) \ + __builtin_HEXAGON_V6_vscattermhw_add_128B((int)BASE, RGN, OFF, VALS) + +#define VGATHER_16(DSTADDR, BASE, RGN, OFF) \ + __builtin_HEXAGON_V6_vgathermh_128B(DSTADDR, (int)BASE, RGN, OFF) +#define VGATHER_16_MASKED(DSTADDR, MASK, BASE, RGN, OFF) \ + __builtin_HEXAGON_V6_vgathermhq_128B(DSTADDR, MASK, (int)BASE, RGN, OFF) +#define VGATHER_32(DSTADDR, BASE, RGN, OFF) \ + __builtin_HEXAGON_V6_vgathermw_128B(DSTADDR, (int)BASE, RGN, OFF) +#define VGATHER_32_MASKED(DSTADDR, MASK, BASE, RGN, OFF) \ + __builtin_HEXAGON_V6_vgathermwq_128B(DSTADDR, MASK, (int)BASE, RGN, OFF) +#define VGATHER_16_32(DSTADDR, BASE, RGN, OFF) \ + __builtin_HEXAGON_V6_vgathermhw_128B(DSTADDR, (int)BASE, RGN, OFF) +#define VGATHER_16_32_MASKED(DSTADDR, MASK, BASE, RGN, OFF) \ + __builtin_HEXAGON_V6_vgathermhwq_128B(DSTADDR, MASK, (int)BASE, RGN, OFF) + +#define VSHUFF_H(V) \ + __builtin_HEXAGON_V6_vshuffh_128B(V) +#define VSPLAT_H(X) \ + __builtin_HEXAGON_V6_lvsplath_128B(X) +#define VAND_VAL(PRED, VAL) \ + __builtin_HEXAGON_V6_vandvrt_128B(PRED, VAL) +#define VDEAL_H(V) \ + __builtin_HEXAGON_V6_vdealh_128B(V) + +int err; + +/* define the number of rows/cols in a square matrix */ +#define MATRIX_SIZE 64 + +/* define the size of the scatter buffer */ +#define SCATTER_BUFFER_SIZE (MATRIX_SIZE * MATRIX_SIZE) + +/* fake vtcm - put buffers together and force alignment */ +static struct { + unsigned short vscatter16[SCATTER_BUFFER_SIZE]; + unsigned short vgather16[MATRIX_SIZE]; + unsigned int vscatter32[SCATTER_BUFFER_SIZE]; + unsigned int vgather32[MATRIX_SIZE]; + unsigned short vscatter16_32[SCATTER_BUFFER_SIZE]; + unsigned short vgather16_32[MATRIX_SIZE]; +} vtcm __attribute__((aligned(0x10000))); + +/* declare the arrays of reference values */ +unsigned short vscatter16_ref[SCATTER_BUFFER_SIZE]; +unsigned short vgather16_ref[MATRIX_SIZE]; +unsigned int vscatter32_ref[SCATTER_BUFFER_SIZE]; +unsigned int vgather32_ref[MATRIX_SIZE]; +unsigned short vscatter16_32_ref[SCATTER_BUFFER_SIZE]; +unsigned short vgather16_32_ref[MATRIX_SIZE]; + +/* declare the arrays of offsets */ +unsigned short half_offsets[MATRIX_SIZE]; +unsigned int word_offsets[MATRIX_SIZE]; + +/* declare the arrays of values */ +unsigned short half_values[MATRIX_SIZE]; +unsigned short half_values_acc[MATRIX_SIZE]; +unsigned short half_values_masked[MATRIX_SIZE]; +unsigned int word_values[MATRIX_SIZE]; +unsigned int word_values_acc[MATRIX_SIZE]; +unsigned int word_values_masked[MATRIX_SIZE]; + +/* declare the arrays of predicates */ +unsigned short half_predicates[MATRIX_SIZE]; +unsigned int word_predicates[MATRIX_SIZE]; + +/* make this big enough for all the intrinsics */ +const size_t region_len = sizeof(vtcm); + +/* optionally add sync instructions */ +#define SYNC_VECTOR 1 + +static void sync_scatter(void *addr) +{ +#if SYNC_VECTOR + /* + * Do the scatter release followed by a dummy load to complete the + * synchronization. Normally the dummy load would be deferred as + * long as possible to minimize stalls. + */ + asm volatile("vmem(%0 + #0):scatter_release\n" : : "r"(addr)); + /* use volatile to force the load */ + volatile HVX_Vector vDummy = *(HVX_Vector *)addr; vDummy = vDummy; +#endif +} + +static void sync_gather(void *addr) +{ +#if SYNC_VECTOR + /* use volatile to force the load */ + volatile HVX_Vector vDummy = *(HVX_Vector *)addr; vDummy = vDummy; +#endif +} + +/* optionally print the results */ +#define PRINT_DATA 0 + +#define FILL_CHAR '.' + +/* fill vtcm scratch with ee */ +void prefill_vtcm_scratch(void) +{ + memset(&vtcm, FILL_CHAR, sizeof(vtcm)); +} + +/* create byte offsets to be a diagonal of the matrix with 16 bit elements */ +void create_offsets_values_preds_16(void) +{ + unsigned short half_element = 0; + unsigned short half_element_masked = 0; + char letter = 'A'; + char letter_masked = '@'; + + for (int i = 0; i < MATRIX_SIZE; i++) { + half_offsets[i] = i * (2 * MATRIX_SIZE + 2); + + half_element = 0; + half_element_masked = 0; + for (int j = 0; j < 2; j++) { + half_element |= letter << j * 8; + half_element_masked |= letter_masked << j * 8; + } + + half_values[i] = half_element; + half_values_acc[i] = ((i % 10) << 8) + (i % 10); + half_values_masked[i] = half_element_masked; + + letter++; + /* reset to 'A' */ + if (letter == 'M') { + letter = 'A'; + } + + half_predicates[i] = (i % 3 == 0 || i % 5 == 0) ? ~0 : 0; + } +} + +/* create byte offsets to be a diagonal of the matrix with 32 bit elements */ +void create_offsets_values_preds_32(void) +{ + unsigned int word_element = 0; + unsigned int word_element_masked = 0; + char letter = 'A'; + char letter_masked = '&'; + + for (int i = 0; i < MATRIX_SIZE; i++) { + word_offsets[i] = i * (4 * MATRIX_SIZE + 4); + + word_element = 0; + word_element_masked = 0; + for (int j = 0; j < 4; j++) { + word_element |= letter << j * 8; + word_element_masked |= letter_masked << j * 8; + } + + word_values[i] = word_element; + word_values_acc[i] = ((i % 10) << 8) + (i % 10); + word_values_masked[i] = word_element_masked; + + letter++; + /* reset to 'A' */ + if (letter == 'M') { + letter = 'A'; + } + + word_predicates[i] = (i % 4 == 0 || i % 7 == 0) ? ~0 : 0; + } +} + +/* + * create byte offsets to be a diagonal of the matrix with 16 bit elements + * and 32 bit offsets + */ +void create_offsets_values_preds_16_32(void) +{ + unsigned short half_element = 0; + unsigned short half_element_masked = 0; + char letter = 'D'; + char letter_masked = '$'; + + for (int i = 0; i < MATRIX_SIZE; i++) { + word_offsets[i] = i * (2 * MATRIX_SIZE + 2); + + half_element = 0; + half_element_masked = 0; + for (int j = 0; j < 2; j++) { + half_element |= letter << j * 8; + half_element_masked |= letter_masked << j * 8; + } + + half_values[i] = half_element; + half_values_acc[i] = ((i % 10) << 8) + (i % 10); + half_values_masked[i] = half_element_masked; + + letter++; + /* reset to 'A' */ + if (letter == 'P') { + letter = 'D'; + } + + half_predicates[i] = (i % 2 == 0 || i % 13 == 0) ? ~0 : 0; + } +} + +/* scatter the 16 bit elements using intrinsics */ +void vector_scatter_16(void) +{ + /* copy the offsets and values to vectors */ + HVX_Vector offsets = *(HVX_Vector *)half_offsets; + HVX_Vector values = *(HVX_Vector *)half_values; + + VSCATTER_16(&vtcm.vscatter16, region_len, offsets, values); + + sync_scatter(vtcm.vscatter16); +} + +/* scatter-accumulate the 16 bit elements using intrinsics */ +void vector_scatter_16_acc(void) +{ + /* copy the offsets and values to vectors */ + HVX_Vector offsets = *(HVX_Vector *)half_offsets; + HVX_Vector values = *(HVX_Vector *)half_values_acc; + + VSCATTER_16_ACC(&vtcm.vscatter16, region_len, offsets, values); + + sync_scatter(vtcm.vscatter16); +} + +/* scatter the 16 bit elements using intrinsics */ +void vector_scatter_16_masked(void) +{ + /* copy the offsets and values to vectors */ + HVX_Vector offsets = *(HVX_Vector *)half_offsets; + HVX_Vector values = *(HVX_Vector *)half_values_masked; + HVX_Vector pred_reg = *(HVX_Vector *)half_predicates; + HVX_VectorPred preds = VAND_VAL(pred_reg, ~0); + + VSCATTER_16_MASKED(preds, &vtcm.vscatter16, region_len, offsets, values); + + sync_scatter(vtcm.vscatter16); +} + +/* scatter the 32 bit elements using intrinsics */ +void vector_scatter_32(void) +{ + /* copy the offsets and values to vectors */ + HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; + HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + HVX_Vector valueslo = *(HVX_Vector *)word_values; + HVX_Vector valueshi = *(HVX_Vector *)&word_values[MATRIX_SIZE / 2]; + + VSCATTER_32(&vtcm.vscatter32, region_len, offsetslo, valueslo); + VSCATTER_32(&vtcm.vscatter32, region_len, offsetshi, valueshi); + + sync_scatter(vtcm.vscatter32); +} + +/* scatter-acc the 32 bit elements using intrinsics */ +void vector_scatter_32_acc(void) +{ + /* copy the offsets and values to vectors */ + HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; + HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + HVX_Vector valueslo = *(HVX_Vector *)word_values_acc; + HVX_Vector valueshi = *(HVX_Vector *)&word_values_acc[MATRIX_SIZE / 2]; + + VSCATTER_32_ACC(&vtcm.vscatter32, region_len, offsetslo, valueslo); + VSCATTER_32_ACC(&vtcm.vscatter32, region_len, offsetshi, valueshi); + + sync_scatter(vtcm.vscatter32); +} + +/* scatter the 32 bit elements using intrinsics */ +void vector_scatter_32_masked(void) +{ + /* copy the offsets and values to vectors */ + HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; + HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + HVX_Vector valueslo = *(HVX_Vector *)word_values_masked; + HVX_Vector valueshi = *(HVX_Vector *)&word_values_masked[MATRIX_SIZE / 2]; + HVX_Vector pred_reglo = *(HVX_Vector *)word_predicates; + HVX_Vector pred_reghi = *(HVX_Vector *)&word_predicates[MATRIX_SIZE / 2]; + HVX_VectorPred predslo = VAND_VAL(pred_reglo, ~0); + HVX_VectorPred predshi = VAND_VAL(pred_reghi, ~0); + + VSCATTER_32_MASKED(predslo, &vtcm.vscatter32, region_len, offsetslo, + valueslo); + VSCATTER_32_MASKED(predshi, &vtcm.vscatter32, region_len, offsetshi, + valueshi); + + sync_scatter(vtcm.vscatter16); +} + +/* scatter the 16 bit elements with 32 bit offsets using intrinsics */ +void vector_scatter_16_32(void) +{ + HVX_VectorPair offsets; + HVX_Vector values; + + /* get the word offsets in a vector pair */ + offsets = *(HVX_VectorPair *)word_offsets; + + /* these values need to be shuffled for the scatter */ + values = *(HVX_Vector *)half_values; + values = VSHUFF_H(values); + + VSCATTER_16_32(&vtcm.vscatter16_32, region_len, offsets, values); + + sync_scatter(vtcm.vscatter16_32); +} + +/* scatter-acc the 16 bit elements with 32 bit offsets using intrinsics */ +void vector_scatter_16_32_acc(void) +{ + HVX_VectorPair offsets; + HVX_Vector values; + + /* get the word offsets in a vector pair */ + offsets = *(HVX_VectorPair *)word_offsets; + + /* these values need to be shuffled for the scatter */ + values = *(HVX_Vector *)half_values_acc; + values = VSHUFF_H(values); + + VSCATTER_16_32_ACC(&vtcm.vscatter16_32, region_len, offsets, values); + + sync_scatter(vtcm.vscatter16_32); +} + +/* masked scatter the 16 bit elements with 32 bit offsets using intrinsics */ +void vector_scatter_16_32_masked(void) +{ + HVX_VectorPair offsets; + HVX_Vector values; + HVX_Vector pred_reg; + + /* get the word offsets in a vector pair */ + offsets = *(HVX_VectorPair *)word_offsets; + + /* these values need to be shuffled for the scatter */ + values = *(HVX_Vector *)half_values_masked; + values = VSHUFF_H(values); + + pred_reg = *(HVX_Vector *)half_predicates; + pred_reg = VSHUFF_H(pred_reg); + HVX_VectorPred preds = VAND_VAL(pred_reg, ~0); + + VSCATTER_16_32_MASKED(preds, &vtcm.vscatter16_32, region_len, offsets, + values); + + sync_scatter(vtcm.vscatter16_32); +} + +/* gather the elements from the scatter16 buffer */ +void vector_gather_16(void) +{ + HVX_Vector *vgather = (HVX_Vector *)&vtcm.vgather16; + HVX_Vector offsets = *(HVX_Vector *)half_offsets; + + VGATHER_16(vgather, &vtcm.vscatter16, region_len, offsets); + + sync_gather(vgather); +} + +static unsigned short gather_16_masked_init(void) +{ + char letter = '?'; + return letter | (letter << 8); +} + +void vector_gather_16_masked(void) +{ + HVX_Vector *vgather = (HVX_Vector *)&vtcm.vgather16; + HVX_Vector offsets = *(HVX_Vector *)half_offsets; + HVX_Vector pred_reg = *(HVX_Vector *)half_predicates; + HVX_VectorPred preds = VAND_VAL(pred_reg, ~0); + + *vgather = VSPLAT_H(gather_16_masked_init()); + VGATHER_16_MASKED(vgather, preds, &vtcm.vscatter16, region_len, offsets); + + sync_gather(vgather); +} + +/* gather the elements from the scatter32 buffer */ +void vector_gather_32(void) +{ + HVX_Vector *vgatherlo = (HVX_Vector *)&vtcm.vgather32; + HVX_Vector *vgatherhi = + (HVX_Vector *)((int)&vtcm.vgather32 + (MATRIX_SIZE * 2)); + HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; + HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + + VGATHER_32(vgatherlo, &vtcm.vscatter32, region_len, offsetslo); + VGATHER_32(vgatherhi, &vtcm.vscatter32, region_len, offsetshi); + + sync_gather(vgatherhi); +} + +static unsigned int gather_32_masked_init(void) +{ + char letter = '?'; + return letter | (letter << 8) | (letter << 16) | (letter << 24); +} + +void vector_gather_32_masked(void) +{ + HVX_Vector *vgatherlo = (HVX_Vector *)&vtcm.vgather32; + HVX_Vector *vgatherhi = + (HVX_Vector *)((int)&vtcm.vgather32 + (MATRIX_SIZE * 2)); + HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; + HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + HVX_Vector pred_reglo = *(HVX_Vector *)word_predicates; + HVX_VectorPred predslo = VAND_VAL(pred_reglo, ~0); + HVX_Vector pred_reghi = *(HVX_Vector *)&word_predicates[MATRIX_SIZE / 2]; + HVX_VectorPred predshi = VAND_VAL(pred_reghi, ~0); + + *vgatherlo = VSPLAT_H(gather_32_masked_init()); + *vgatherhi = VSPLAT_H(gather_32_masked_init()); + VGATHER_32_MASKED(vgatherlo, predslo, &vtcm.vscatter32, region_len, + offsetslo); + VGATHER_32_MASKED(vgatherhi, predshi, &vtcm.vscatter32, region_len, + offsetshi); + + sync_gather(vgatherlo); + sync_gather(vgatherhi); +} + +/* gather the elements from the scatter16_32 buffer */ +void vector_gather_16_32(void) +{ + HVX_Vector *vgather; + HVX_VectorPair offsets; + HVX_Vector values; + + /* get the vtcm address to gather from */ + vgather = (HVX_Vector *)&vtcm.vgather16_32; + + /* get the word offsets in a vector pair */ + offsets = *(HVX_VectorPair *)word_offsets; + + VGATHER_16_32(vgather, &vtcm.vscatter16_32, region_len, offsets); + + /* deal the elements to get the order back */ + values = *(HVX_Vector *)vgather; + values = VDEAL_H(values); + + /* write it back to vtcm address */ + *(HVX_Vector *)vgather = values; +} + +void vector_gather_16_32_masked(void) +{ + HVX_Vector *vgather; + HVX_VectorPair offsets; + HVX_Vector pred_reg; + HVX_VectorPred preds; + HVX_Vector values; + + /* get the vtcm address to gather from */ + vgather = (HVX_Vector *)&vtcm.vgather16_32; + + /* get the word offsets in a vector pair */ + offsets = *(HVX_VectorPair *)word_offsets; + pred_reg = *(HVX_Vector *)half_predicates; + pred_reg = VSHUFF_H(pred_reg); + preds = VAND_VAL(pred_reg, ~0); + + *vgather = VSPLAT_H(gather_16_masked_init()); + VGATHER_16_32_MASKED(vgather, preds, &vtcm.vscatter16_32, region_len, + offsets); + + /* deal the elements to get the order back */ + values = *(HVX_Vector *)vgather; + values = VDEAL_H(values); + + /* write it back to vtcm address */ + *(HVX_Vector *)vgather = values; +} + +static void check_buffer(const char *name, void *c, void *r, size_t size) +{ + char *check = (char *)c; + char *ref = (char *)r; + for (int i = 0; i < size; i++) { + if (check[i] != ref[i]) { + printf("ERROR %s [%d]: 0x%x (%c) != 0x%x (%c)\n", name, i, + check[i], check[i], ref[i], ref[i]); + err++; + } + } +} + +/* + * These scalar functions are the C equivalents of the vector functions that + * use HVX + */ + +/* scatter the 16 bit elements using C */ +void scalar_scatter_16(unsigned short *vscatter16) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter16[half_offsets[i] / 2] = half_values[i]; + } +} + +void check_scatter_16() +{ + memset(vscatter16_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16(vscatter16_ref); + check_buffer(__func__, vtcm.vscatter16, vscatter16_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +/* scatter the 16 bit elements using C */ +void scalar_scatter_16_acc(unsigned short *vscatter16) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter16[half_offsets[i] / 2] += half_values_acc[i]; + } +} + +void check_scatter_16_acc() +{ + memset(vscatter16_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16(vscatter16_ref); + scalar_scatter_16_acc(vscatter16_ref); + check_buffer(__func__, vtcm.vscatter16, vscatter16_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +/* scatter the 16 bit elements using C */ +void scalar_scatter_16_masked(unsigned short *vscatter16) +{ + for (int i = 0; i < MATRIX_SIZE; i++) { + if (half_predicates[i]) { + vscatter16[half_offsets[i] / 2] = half_values_masked[i]; + } + } + +} + +void check_scatter_16_masked() +{ + memset(vscatter16_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16(vscatter16_ref); + scalar_scatter_16_acc(vscatter16_ref); + scalar_scatter_16_masked(vscatter16_ref); + check_buffer(__func__, vtcm.vscatter16, vscatter16_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +/* scatter the 32 bit elements using C */ +void scalar_scatter_32(unsigned int *vscatter32) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter32[word_offsets[i] / 4] = word_values[i]; + } +} + +void check_scatter_32() +{ + memset(vscatter32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); + scalar_scatter_32(vscatter32_ref); + check_buffer(__func__, vtcm.vscatter32, vscatter32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); +} + +/* scatter the 32 bit elements using C */ +void scalar_scatter_32_acc(unsigned int *vscatter32) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter32[word_offsets[i] / 4] += word_values_acc[i]; + } +} + +void check_scatter_32_acc() +{ + memset(vscatter32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); + scalar_scatter_32(vscatter32_ref); + scalar_scatter_32_acc(vscatter32_ref); + check_buffer(__func__, vtcm.vscatter32, vscatter32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); +} + +/* scatter the 32 bit elements using C */ +void scalar_scatter_32_masked(unsigned int *vscatter32) +{ + for (int i = 0; i < MATRIX_SIZE; i++) { + if (word_predicates[i]) { + vscatter32[word_offsets[i] / 4] = word_values_masked[i]; + } + } +} + +void check_scatter_32_masked() +{ + memset(vscatter32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); + scalar_scatter_32(vscatter32_ref); + scalar_scatter_32_acc(vscatter32_ref); + scalar_scatter_32_masked(vscatter32_ref); + check_buffer(__func__, vtcm.vscatter32, vscatter32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); +} + +/* scatter the 32 bit elements using C */ +void scalar_scatter_16_32(unsigned short *vscatter16_32) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter16_32[word_offsets[i] / 2] = half_values[i]; + } +} + +void check_scatter_16_32() +{ + memset(vscatter16_32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16_32(vscatter16_32_ref); + check_buffer(__func__, vtcm.vscatter16_32, vscatter16_32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +/* scatter the 32 bit elements using C */ +void scalar_scatter_16_32_acc(unsigned short *vscatter16_32) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter16_32[word_offsets[i] / 2] += half_values_acc[i]; + } +} + +void check_scatter_16_32_acc() +{ + memset(vscatter16_32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16_32(vscatter16_32_ref); + scalar_scatter_16_32_acc(vscatter16_32_ref); + check_buffer(__func__, vtcm.vscatter16_32, vscatter16_32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +void scalar_scatter_16_32_masked(unsigned short *vscatter16_32) +{ + for (int i = 0; i < MATRIX_SIZE; i++) { + if (half_predicates[i]) { + vscatter16_32[word_offsets[i] / 2] = half_values_masked[i]; + } + } +} + +void check_scatter_16_32_masked() +{ + memset(vscatter16_32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16_32(vscatter16_32_ref); + scalar_scatter_16_32_acc(vscatter16_32_ref); + scalar_scatter_16_32_masked(vscatter16_32_ref); + check_buffer(__func__, vtcm.vscatter16_32, vscatter16_32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +/* gather the elements from the scatter buffer using C */ +void scalar_gather_16(unsigned short *vgather16) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + vgather16[i] = vtcm.vscatter16[half_offsets[i] / 2]; + } +} + +void check_gather_16() +{ + memset(vgather16_ref, 0, MATRIX_SIZE * sizeof(unsigned short)); + scalar_gather_16(vgather16_ref); + check_buffer(__func__, vtcm.vgather16, vgather16_ref, + MATRIX_SIZE * sizeof(unsigned short)); +} + +void scalar_gather_16_masked(unsigned short *vgather16) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + if (half_predicates[i]) { + vgather16[i] = vtcm.vscatter16[half_offsets[i] / 2]; + } + } +} + +void check_gather_16_masked() +{ + memset(vgather16_ref, gather_16_masked_init(), + MATRIX_SIZE * sizeof(unsigned short)); + scalar_gather_16_masked(vgather16_ref); + check_buffer(__func__, vtcm.vgather16, vgather16_ref, + MATRIX_SIZE * sizeof(unsigned short)); +} + +/* gather the elements from the scatter buffer using C */ +void scalar_gather_32(unsigned int *vgather32) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + vgather32[i] = vtcm.vscatter32[word_offsets[i] / 4]; + } +} + +void check_gather_32(void) +{ + memset(vgather32_ref, 0, MATRIX_SIZE * sizeof(unsigned int)); + scalar_gather_32(vgather32_ref); + check_buffer(__func__, vtcm.vgather32, vgather32_ref, + MATRIX_SIZE * sizeof(unsigned int)); +} + +void scalar_gather_32_masked(unsigned int *vgather32) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + if (word_predicates[i]) { + vgather32[i] = vtcm.vscatter32[word_offsets[i] / 4]; + } + } +} + + +void check_gather_32_masked(void) +{ + memset(vgather32_ref, gather_32_masked_init(), + MATRIX_SIZE * sizeof(unsigned int)); + scalar_gather_32_masked(vgather32_ref); + check_buffer(__func__, vtcm.vgather32, + vgather32_ref, MATRIX_SIZE * sizeof(unsigned int)); +} + +/* gather the elements from the scatter buffer using C */ +void scalar_gather_16_32(unsigned short *vgather16_32) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + vgather16_32[i] = vtcm.vscatter16_32[word_offsets[i] / 2]; + } +} + +void check_gather_16_32(void) +{ + memset(vgather16_32_ref, 0, MATRIX_SIZE * sizeof(unsigned short)); + scalar_gather_16_32(vgather16_32_ref); + check_buffer(__func__, vtcm.vgather16_32, vgather16_32_ref, + MATRIX_SIZE * sizeof(unsigned short)); +} + +void scalar_gather_16_32_masked(unsigned short *vgather16_32) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + if (half_predicates[i]) { + vgather16_32[i] = vtcm.vscatter16_32[word_offsets[i] / 2]; + } + } + +} + +void check_gather_16_32_masked(void) +{ + memset(vgather16_32_ref, gather_16_masked_init(), + MATRIX_SIZE * sizeof(unsigned short)); + scalar_gather_16_32_masked(vgather16_32_ref); + check_buffer(__func__, vtcm.vgather16_32, vgather16_32_ref, + MATRIX_SIZE * sizeof(unsigned short)); +} + +/* print scatter16 buffer */ +void print_scatter16_buffer(void) +{ + if (PRINT_DATA) { + printf("\n\nPrinting the 16 bit scatter buffer"); + + for (int i = 0; i < SCATTER_BUFFER_SIZE; i++) { + if ((i % MATRIX_SIZE) == 0) { + printf("\n"); + } + for (int j = 0; j < 2; j++) { + printf("%c", (char)((vtcm.vscatter16[i] >> j * 8) & 0xff)); + } + printf(" "); + } + printf("\n"); + } +} + +/* print the gather 16 buffer */ +void print_gather_result_16(void) +{ + if (PRINT_DATA) { + printf("\n\nPrinting the 16 bit gather result\n"); + + for (int i = 0; i < MATRIX_SIZE; i++) { + for (int j = 0; j < 2; j++) { + printf("%c", (char)((vtcm.vgather16[i] >> j * 8) & 0xff)); + } + printf(" "); + } + printf("\n"); + } +} + +/* print the scatter32 buffer */ +void print_scatter32_buffer(void) +{ + if (PRINT_DATA) { + printf("\n\nPrinting the 32 bit scatter buffer"); + + for (int i = 0; i < SCATTER_BUFFER_SIZE; i++) { + if ((i % MATRIX_SIZE) == 0) { + printf("\n"); + } + for (int j = 0; j < 4; j++) { + printf("%c", (char)((vtcm.vscatter32[i] >> j * 8) & 0xff)); + } + printf(" "); + } + printf("\n"); + } +} + +/* print the gather 32 buffer */ +void print_gather_result_32(void) +{ + if (PRINT_DATA) { + printf("\n\nPrinting the 32 bit gather result\n"); + + for (int i = 0; i < MATRIX_SIZE; i++) { + for (int j = 0; j < 4; j++) { + printf("%c", (char)((vtcm.vgather32[i] >> j * 8) & 0xff)); + } + printf(" "); + } + printf("\n"); + } +} + +/* print the scatter16_32 buffer */ +void print_scatter16_32_buffer(void) +{ + if (PRINT_DATA) { + printf("\n\nPrinting the 16_32 bit scatter buffer"); + + for (int i = 0; i < SCATTER_BUFFER_SIZE; i++) { + if ((i % MATRIX_SIZE) == 0) { + printf("\n"); + } + for (int j = 0; j < 2; j++) { + printf("%c", + (unsigned char)((vtcm.vscatter16_32[i] >> j * 8) & 0xff)); + } + printf(" "); + } + printf("\n"); + } +} + +/* print the gather 16_32 buffer */ +void print_gather_result_16_32(void) +{ + if (PRINT_DATA) { + printf("\n\nPrinting the 16_32 bit gather result\n"); + + for (int i = 0; i < MATRIX_SIZE; i++) { + for (int j = 0; j < 2; j++) { + printf("%c", + (unsigned char)((vtcm.vgather16_32[i] >> j * 8) & 0xff)); + } + printf(" "); + } + printf("\n"); + } +} + +int main() +{ + prefill_vtcm_scratch(); + + /* 16 bit elements with 16 bit offsets */ + create_offsets_values_preds_16(); + + vector_scatter_16(); + print_scatter16_buffer(); + check_scatter_16(); + + vector_gather_16(); + print_gather_result_16(); + check_gather_16(); + + vector_gather_16_masked(); + print_gather_result_16(); + check_gather_16_masked(); + + vector_scatter_16_acc(); + print_scatter16_buffer(); + check_scatter_16_acc(); + + vector_scatter_16_masked(); + print_scatter16_buffer(); + check_scatter_16_masked(); + + /* 32 bit elements with 32 bit offsets */ + create_offsets_values_preds_32(); + + vector_scatter_32(); + print_scatter32_buffer(); + check_scatter_32(); + + vector_gather_32(); + print_gather_result_32(); + check_gather_32(); + + vector_gather_32_masked(); + print_gather_result_32(); + check_gather_32_masked(); + + vector_scatter_32_acc(); + print_scatter32_buffer(); + check_scatter_32_acc(); + + vector_scatter_32_masked(); + print_scatter32_buffer(); + check_scatter_32_masked(); + + /* 16 bit elements with 32 bit offsets */ + create_offsets_values_preds_16_32(); + + vector_scatter_16_32(); + print_scatter16_32_buffer(); + check_scatter_16_32(); + + vector_gather_16_32(); + print_gather_result_16_32(); + check_gather_16_32(); + + vector_gather_16_32_masked(); + print_gather_result_16_32(); + check_gather_16_32_masked(); + + vector_scatter_16_32_acc(); + print_scatter16_32_buffer(); + check_scatter_16_32_acc(); + + vector_scatter_16_32_masked(); + print_scatter16_32_buffer(); + check_scatter_16_32_masked(); + + puts(err ? "FAIL" : "PASS"); + return err; +} diff --git a/tests/tcg/hexagon/vector_add_int.c b/tests/tcg/hexagon/vector_add_int.c new file mode 100644 index 0000000000..d6010ea14b --- /dev/null +++ b/tests/tcg/hexagon/vector_add_int.c @@ -0,0 +1,61 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> + +int gA[401]; +int gB[401]; +int gC[401]; + +void vector_add_int() +{ + int i; + for (i = 0; i < 400; i++) { + gA[i] = gB[i] + gC[i]; + } +} + +int main() +{ + int error = 0; + int i; + for (i = 0; i < 400; i++) { + gB[i] = i * 2; + gC[i] = i * 3; + } + gA[400] = 17; + vector_add_int(); + for (i = 0; i < 400; i++) { + if (gA[i] != i * 5) { + error++; + printf("ERROR: gB[%d] = %d\t", i, gB[i]); + printf("gC[%d] = %d\t", i, gC[i]); + printf("gA[%d] = %d\n", i, gA[i]); + } + } + if (gA[400] != 17) { + error++; + printf("ERROR: Overran the buffer\n"); + } + if (!error) { + printf("PASS\n"); + return 0; + } else { + printf("FAIL\n"); + return 1; + } +} diff --git a/tests/tcg/multiarch/Makefile.target b/tests/tcg/multiarch/Makefile.target index 6ccb592aac..a83efb4a9d 100644 --- a/tests/tcg/multiarch/Makefile.target +++ b/tests/tcg/multiarch/Makefile.target @@ -16,7 +16,6 @@ MULTIARCH_SRCS += $(notdir $(wildcard $(MULTIARCH_SRC)/linux/*.c)) endif MULTIARCH_TESTS = $(MULTIARCH_SRCS:.c=) -$(info SRCS=${MULTIARCH_SRCS} and ${MULTIARCH_TESTS}) # # The following are any additional rules needed to build things # @@ -61,8 +60,6 @@ run-gdbstub-sha1: sha1 --bin $< --test $(MULTIARCH_SRC)/gdbstub/sha1.py, \ "basic gdbstub support") -EXTRA_RUNS += run-gdbstub-sha1 - run-gdbstub-qxfer-auxv-read: sha1 $(call run-test, $@, $(GDB_SCRIPT) \ --gdb $(HAVE_GDB_BIN) \ @@ -70,11 +67,19 @@ run-gdbstub-qxfer-auxv-read: sha1 --bin $< --test $(MULTIARCH_SRC)/gdbstub/test-qxfer-auxv-read.py, \ "basic gdbstub qXfer:auxv:read support") +run-gdbstub-thread-breakpoint: testthread + $(call run-test, $@, $(GDB_SCRIPT) \ + --gdb $(HAVE_GDB_BIN) \ + --qemu $(QEMU) --qargs "$(QEMU_OPTS)" \ + --bin $< --test $(MULTIARCH_SRC)/gdbstub/test-thread-breakpoint.py, \ + "hitting a breakpoint on non-main thread") + else run-gdbstub-%: $(call skip-test, "gdbstub test $*", "need working gdb") endif -EXTRA_RUNS += run-gdbstub-sha1 run-gdbstub-qxfer-auxv-read +EXTRA_RUNS += run-gdbstub-sha1 run-gdbstub-qxfer-auxv-read \ + run-gdbstub-thread-breakpoint # ARM Compatible Semi Hosting Tests # diff --git a/tests/tcg/multiarch/gdbstub/test-thread-breakpoint.py b/tests/tcg/multiarch/gdbstub/test-thread-breakpoint.py new file mode 100644 index 0000000000..798d508bc7 --- /dev/null +++ b/tests/tcg/multiarch/gdbstub/test-thread-breakpoint.py @@ -0,0 +1,60 @@ +from __future__ import print_function +# +# Test auxiliary vector is loaded via gdbstub +# +# This is launched via tests/guest-debug/run-test.py +# + +import gdb +import sys + +failcount = 0 + +def report(cond, msg): + "Report success/fail of test" + if cond: + print ("PASS: %s" % (msg)) + else: + print ("FAIL: %s" % (msg)) + global failcount + failcount += 1 + +def run_test(): + "Run through the tests one by one" + + sym, ok = gdb.lookup_symbol("thread1_func") + gdb.execute("b thread1_func") + gdb.execute("c") + + frame = gdb.selected_frame() + report(str(frame.function()) == "thread1_func", "break @ %s"%frame) + +# +# This runs as the script it sourced (via -x, via run-test.py) +# +try: + inferior = gdb.selected_inferior() + arch = inferior.architecture() + print("ATTACHED: %s" % arch.name()) +except (gdb.error, AttributeError): + print("SKIPPING (not connected)", file=sys.stderr) + exit(0) + +if gdb.parse_and_eval('$pc') == 0: + print("SKIP: PC not set") + exit(0) + +try: + # These are not very useful in scripts + gdb.execute("set pagination off") + gdb.execute("set confirm off") + + # Run the actual tests + run_test() +except (gdb.error): + print ("GDB Exception: %s" % (sys.exc_info()[0])) + failcount += 1 + pass + +print("All tests complete: %d failures" % failcount) +exit(failcount) diff --git a/tests/tcg/nios2/Makefile.target b/tests/tcg/nios2/Makefile.target new file mode 100644 index 0000000000..b38e2352b7 --- /dev/null +++ b/tests/tcg/nios2/Makefile.target @@ -0,0 +1,11 @@ +# nios2 specific test tweaks + +# Currently nios2 signal handling is broken +run-signals: signals + $(call skip-test, $<, "BROKEN") +run-plugin-signals-with-%: + $(call skip-test, $<, "BROKEN") +run-linux-test: linux-test + $(call skip-test, $<, "BROKEN") +run-plugin-linux-test-with-%: + $(call skip-test, $<, "BROKEN") diff --git a/tests/tcg/sh4/Makefile.target b/tests/tcg/sh4/Makefile.target index 47c39a44b6..0e96aeff16 100644 --- a/tests/tcg/sh4/Makefile.target +++ b/tests/tcg/sh4/Makefile.target @@ -12,3 +12,9 @@ run-signals: signals $(call skip-test, $<, "BROKEN") run-plugin-signals-with-%: $(call skip-test, $<, "BROKEN") + +# This test is currently broken: https://gitlab.com/qemu-project/qemu/-/issues/704 +run-linux-test: linux-test + $(call skip-test, $<, "BROKEN") +run-plugin-linux-test-with-%: + $(call skip-test, $<, "BROKEN") diff --git a/tests/vm/openbsd b/tests/vm/openbsd index c4c78a80f1..337fe7c303 100755 --- a/tests/vm/openbsd +++ b/tests/vm/openbsd @@ -22,8 +22,8 @@ class OpenBSDVM(basevm.BaseVM): name = "openbsd" arch = "x86_64" - link = "https://cdn.openbsd.org/pub/OpenBSD/6.9/amd64/install69.iso" - csum = "140d26548aec680e34bb5f82295414228e7f61e4f5e7951af066014fda2d6e43" + link = "https://cdn.openbsd.org/pub/OpenBSD/7.0/amd64/install70.iso" + csum = "1882f9a23c9800e5dba3dbd2cf0126f552605c915433ef4c5bb672610a4ca3a4" size = "20G" pkgs = [ # tools @@ -95,10 +95,9 @@ class OpenBSDVM(basevm.BaseVM): self.console_wait_send("Terminal type", "xterm\n") self.console_wait_send("System hostname", "openbsd\n") self.console_wait_send("Which network interface", "vio0\n") - self.console_wait_send("IPv4 address", "dhcp\n") + self.console_wait_send("IPv4 address", "autoconf\n") self.console_wait_send("IPv6 address", "none\n") self.console_wait_send("Which network interface", "done\n") - self.console_wait_send("DNS domain name", "localnet\n") self.console_wait("Password for root account") self.console_send("%s\n" % self._config["root_pass"]) self.console_wait("Password for root account") |