From 1fd286385c31e42a60db0a298c01e1c8ec290e3e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 23 May 2019 15:09:49 +0200 Subject: s390x/tcg: Implement VECTOR FIND ANY ELEMENT EQUAL Complicated stuff. Provide two different helpers for CC an !CC handling. We might want to add more helpers later. zero_search() and match_index() are courtesy of Richard H. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/vec_string_helper.c | 154 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 target/s390x/vec_string_helper.c (limited to 'target/s390x/vec_string_helper.c') diff --git a/target/s390x/vec_string_helper.c b/target/s390x/vec_string_helper.c new file mode 100644 index 0000000000..56dc89c824 --- /dev/null +++ b/target/s390x/vec_string_helper.c @@ -0,0 +1,154 @@ +/* + * QEMU TCG support -- s390x vector string instruction support + * + * Copyright (C) 2019 Red Hat Inc + * + * Authors: + * David Hildenbrand + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "cpu.h" +#include "internal.h" +#include "vec.h" +#include "tcg/tcg.h" +#include "tcg/tcg-gvec-desc.h" +#include "exec/helper-proto.h" + +/* + * Returns a bit set in the MSB of each element that is zero, + * as defined by the mask. + */ +static inline uint64_t zero_search(uint64_t a, uint64_t mask) +{ + return ~(((a & mask) + mask) | a | mask); +} + +/* + * Returns the byte offset for the first match, or 16 for no match. + */ +static inline int match_index(uint64_t c0, uint64_t c1) +{ + return (c0 ? clz64(c0) : clz64(c1) + 64) >> 3; +} + +/* + * Returns the number of bits composing one element. + */ +static uint8_t get_element_bits(uint8_t es) +{ + return (1 << es) * BITS_PER_BYTE; +} + +/* + * Returns the bitmask for a single element. + */ +static uint64_t get_single_element_mask(uint8_t es) +{ + return -1ull >> (64 - get_element_bits(es)); +} + +/* + * Returns the bitmask for a single element (excluding the MSB). + */ +static uint64_t get_single_element_lsbs_mask(uint8_t es) +{ + return -1ull >> (65 - get_element_bits(es)); +} + +/* + * Returns the bitmasks for multiple elements (excluding the MSBs). + */ +static uint64_t get_element_lsbs_mask(uint8_t es) +{ + return dup_const(es, get_single_element_lsbs_mask(es)); +} + +static int vfae(void *v1, const void *v2, const void *v3, bool in, + bool rt, bool zs, uint8_t es) +{ + const uint64_t mask = get_element_lsbs_mask(es); + const int bits = get_element_bits(es); + uint64_t a0, a1, b0, b1, e0, e1, t0, t1, z0, z1; + uint64_t first_zero = 16; + uint64_t first_equal; + int i; + + a0 = s390_vec_read_element64(v2, 0); + a1 = s390_vec_read_element64(v2, 1); + b0 = s390_vec_read_element64(v3, 0); + b1 = s390_vec_read_element64(v3, 1); + e0 = 0; + e1 = 0; + /* compare against equality with every other element */ + for (i = 0; i < 64; i += bits) { + t0 = rol64(b0, i); + t1 = rol64(b1, i); + e0 |= zero_search(a0 ^ t0, mask); + e0 |= zero_search(a0 ^ t1, mask); + e1 |= zero_search(a1 ^ t0, mask); + e1 |= zero_search(a1 ^ t1, mask); + } + /* invert the result if requested - invert only the MSBs */ + if (in) { + e0 = ~e0 & ~mask; + e1 = ~e1 & ~mask; + } + first_equal = match_index(e0, e1); + + if (zs) { + z0 = zero_search(a0, mask); + z1 = zero_search(a1, mask); + first_zero = match_index(z0, z1); + } + + if (rt) { + e0 = (e0 >> (bits - 1)) * get_single_element_mask(es); + e1 = (e1 >> (bits - 1)) * get_single_element_mask(es); + s390_vec_write_element64(v1, 0, e0); + s390_vec_write_element64(v1, 1, e1); + } else { + s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero)); + s390_vec_write_element64(v1, 1, 0); + } + + if (first_zero == 16 && first_equal == 16) { + return 3; /* no match */ + } else if (first_zero == 16) { + return 1; /* matching elements, no match for zero */ + } else if (first_equal < first_zero) { + return 2; /* matching elements before match for zero */ + } + return 0; /* match for zero */ +} + +#define DEF_VFAE_HELPER(BITS) \ +void HELPER(gvec_vfae##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + const bool in = extract32(simd_data(desc), 3, 1); \ + const bool rt = extract32(simd_data(desc), 2, 1); \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \ +} +DEF_VFAE_HELPER(8) +DEF_VFAE_HELPER(16) +DEF_VFAE_HELPER(32) + +#define DEF_VFAE_CC_HELPER(BITS) \ +void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3, \ + CPUS390XState *env, uint32_t desc) \ +{ \ + const bool in = extract32(simd_data(desc), 3, 1); \ + const bool rt = extract32(simd_data(desc), 2, 1); \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + env->cc_op = vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \ +} +DEF_VFAE_CC_HELPER(8) +DEF_VFAE_CC_HELPER(16) +DEF_VFAE_CC_HELPER(32) -- cgit 1.4.1 From 8c0e1e58ce45ab1317bed817a9821b0286f926a2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 24 May 2019 11:25:58 +0200 Subject: s390x/tcg: Implement VECTOR FIND ELEMENT EQUAL Core logic courtesy of Richard H. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/helper.h | 6 +++++ target/s390x/insn-data.def | 2 ++ target/s390x/translate_vx.inc.c | 31 ++++++++++++++++++++++ target/s390x/vec_string_helper.c | 57 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 96 insertions(+) (limited to 'target/s390x/vec_string_helper.c') diff --git a/target/s390x/helper.h b/target/s390x/helper.h index c45328cf73..a1b169b666 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -218,6 +218,12 @@ DEF_HELPER_FLAGS_4(gvec_vfae32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) DEF_HELPER_5(gvec_vfae_cc8, void, ptr, cptr, cptr, env, i32) DEF_HELPER_5(gvec_vfae_cc16, void, ptr, cptr, cptr, env, i32) DEF_HELPER_5(gvec_vfae_cc32, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vfee8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_4(gvec_vfee16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_4(gvec_vfee32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_5(gvec_vfee_cc8, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vfee_cc16, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vfee_cc32, void, ptr, cptr, cptr, env, i32) #ifndef CONFIG_USER_ONLY DEF_HELPER_3(servc, i32, env, i64, i64) diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def index 070ce2a471..d8907ef6a5 100644 --- a/target/s390x/insn-data.def +++ b/target/s390x/insn-data.def @@ -1195,6 +1195,8 @@ /* VECTOR FIND ANY ELEMENT EQUAL */ F(0xe782, VFAE, VRR_b, V, 0, 0, 0, 0, vfae, 0, IF_VEC) +/* VECTOR FIND ELEMENT EQUAL */ + F(0xe780, VFEE, VRR_b, V, 0, 0, 0, 0, vfee, 0, IF_VEC) #ifndef CONFIG_USER_ONLY /* COMPARE AND SWAP AND PURGE */ diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c index ebd7a877f1..b25afbc011 100644 --- a/target/s390x/translate_vx.inc.c +++ b/target/s390x/translate_vx.inc.c @@ -2383,3 +2383,34 @@ static DisasJumpType op_vfae(DisasContext *s, DisasOps *o) } return DISAS_NEXT; } + +static DisasJumpType op_vfee(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s->fields, m4); + const uint8_t m5 = get_field(s->fields, m5); + static gen_helper_gvec_3 * const g[3] = { + gen_helper_gvec_vfee8, + gen_helper_gvec_vfee16, + gen_helper_gvec_vfee32, + }; + static gen_helper_gvec_3_ptr * const g_cc[3] = { + gen_helper_gvec_vfee_cc8, + gen_helper_gvec_vfee_cc16, + gen_helper_gvec_vfee_cc32, + }; + + if (es > ES_32 || m5 & ~0x3) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + if (extract32(m5, 0, 1)) { + gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2), + get_field(s->fields, v3), cpu_env, m5, g_cc[es]); + set_cc_static(s); + } else { + gen_gvec_3_ool(get_field(s->fields, v1), get_field(s->fields, v2), + get_field(s->fields, v3), m5, g[es]); + } + return DISAS_NEXT; +} diff --git a/target/s390x/vec_string_helper.c b/target/s390x/vec_string_helper.c index 56dc89c824..05ad99e173 100644 --- a/target/s390x/vec_string_helper.c +++ b/target/s390x/vec_string_helper.c @@ -152,3 +152,60 @@ void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3, \ DEF_VFAE_CC_HELPER(8) DEF_VFAE_CC_HELPER(16) DEF_VFAE_CC_HELPER(32) + +static int vfee(void *v1, const void *v2, const void *v3, bool zs, uint8_t es) +{ + const uint64_t mask = get_element_lsbs_mask(es); + uint64_t a0, a1, b0, b1, e0, e1, z0, z1; + uint64_t first_zero = 16; + uint64_t first_equal; + + a0 = s390_vec_read_element64(v2, 0); + a1 = s390_vec_read_element64(v2, 1); + b0 = s390_vec_read_element64(v3, 0); + b1 = s390_vec_read_element64(v3, 1); + e0 = zero_search(a0 ^ b0, mask); + e1 = zero_search(a1 ^ b1, mask); + first_equal = match_index(e0, e1); + + if (zs) { + z0 = zero_search(a0, mask); + z1 = zero_search(a1, mask); + first_zero = match_index(z0, z1); + } + + s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero)); + s390_vec_write_element64(v1, 1, 0); + if (first_zero == 16 && first_equal == 16) { + return 3; /* no match */ + } else if (first_zero == 16) { + return 1; /* matching elements, no match for zero */ + } else if (first_equal < first_zero) { + return 2; /* matching elements before match for zero */ + } + return 0; /* match for zero */ +} + +#define DEF_VFEE_HELPER(BITS) \ +void HELPER(gvec_vfee##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + vfee(v1, v2, v3, zs, MO_##BITS); \ +} +DEF_VFEE_HELPER(8) +DEF_VFEE_HELPER(16) +DEF_VFEE_HELPER(32) + +#define DEF_VFEE_CC_HELPER(BITS) \ +void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3, \ + CPUS390XState *env, uint32_t desc) \ +{ \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + env->cc_op = vfee(v1, v2, v3, zs, MO_##BITS); \ +} +DEF_VFEE_CC_HELPER(8) +DEF_VFEE_CC_HELPER(16) +DEF_VFEE_CC_HELPER(32) -- cgit 1.4.1 From 074e99b3b5552b297f76c820ea55c724209bb6d1 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 24 May 2019 11:26:45 +0200 Subject: s390x/tcg: Implement VECTOR FIND ELEMENT NOT EQUAL Similar to VECTOR FIND ELEMENT EQUAL. Core logic courtesy of Richard H. Add s390_vec_read_element() that can deal with element sizes. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/helper.h | 6 ++++ target/s390x/insn-data.def | 2 ++ target/s390x/translate_vx.inc.c | 31 +++++++++++++++++ target/s390x/vec.h | 19 +++++++++++ target/s390x/vec_string_helper.c | 74 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 132 insertions(+) (limited to 'target/s390x/vec_string_helper.c') diff --git a/target/s390x/helper.h b/target/s390x/helper.h index a1b169b666..fb50b404db 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -224,6 +224,12 @@ DEF_HELPER_FLAGS_4(gvec_vfee32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) DEF_HELPER_5(gvec_vfee_cc8, void, ptr, cptr, cptr, env, i32) DEF_HELPER_5(gvec_vfee_cc16, void, ptr, cptr, cptr, env, i32) DEF_HELPER_5(gvec_vfee_cc32, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vfene8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_4(gvec_vfene16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_4(gvec_vfene32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_5(gvec_vfene_cc8, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vfene_cc16, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vfene_cc32, void, ptr, cptr, cptr, env, i32) #ifndef CONFIG_USER_ONLY DEF_HELPER_3(servc, i32, env, i64, i64) diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def index d8907ef6a5..d03c1ee0b3 100644 --- a/target/s390x/insn-data.def +++ b/target/s390x/insn-data.def @@ -1197,6 +1197,8 @@ F(0xe782, VFAE, VRR_b, V, 0, 0, 0, 0, vfae, 0, IF_VEC) /* VECTOR FIND ELEMENT EQUAL */ F(0xe780, VFEE, VRR_b, V, 0, 0, 0, 0, vfee, 0, IF_VEC) +/* VECTOR FIND ELEMENT NOT EQUAL */ + F(0xe781, VFENE, VRR_b, V, 0, 0, 0, 0, vfene, 0, IF_VEC) #ifndef CONFIG_USER_ONLY /* COMPARE AND SWAP AND PURGE */ diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c index b25afbc011..1ad0b62517 100644 --- a/target/s390x/translate_vx.inc.c +++ b/target/s390x/translate_vx.inc.c @@ -2414,3 +2414,34 @@ static DisasJumpType op_vfee(DisasContext *s, DisasOps *o) } return DISAS_NEXT; } + +static DisasJumpType op_vfene(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s->fields, m4); + const uint8_t m5 = get_field(s->fields, m5); + static gen_helper_gvec_3 * const g[3] = { + gen_helper_gvec_vfene8, + gen_helper_gvec_vfene16, + gen_helper_gvec_vfene32, + }; + static gen_helper_gvec_3_ptr * const g_cc[3] = { + gen_helper_gvec_vfene_cc8, + gen_helper_gvec_vfene_cc16, + gen_helper_gvec_vfene_cc32, + }; + + if (es > ES_32 || m5 & ~0x3) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + if (extract32(m5, 0, 1)) { + gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2), + get_field(s->fields, v3), cpu_env, m5, g_cc[es]); + set_cc_static(s); + } else { + gen_gvec_3_ool(get_field(s->fields, v1), get_field(s->fields, v2), + get_field(s->fields, v3), m5, g[es]); + } + return DISAS_NEXT; +} diff --git a/target/s390x/vec.h b/target/s390x/vec.h index 3313fb43ee..affc62874c 100644 --- a/target/s390x/vec.h +++ b/target/s390x/vec.h @@ -12,6 +12,8 @@ #ifndef S390X_VEC_H #define S390X_VEC_H +#include "tcg/tcg.h" + typedef union S390Vector { uint64_t doubleword[2]; uint32_t word[4]; @@ -70,6 +72,23 @@ static inline uint64_t s390_vec_read_element64(const S390Vector *v, uint8_t enr) return v->doubleword[enr]; } +static inline uint64_t s390_vec_read_element(const S390Vector *v, uint8_t enr, + uint8_t es) +{ + switch (es) { + case MO_8: + return s390_vec_read_element8(v, enr); + case MO_16: + return s390_vec_read_element16(v, enr); + case MO_32: + return s390_vec_read_element32(v, enr); + case MO_64: + return s390_vec_read_element64(v, enr); + default: + g_assert_not_reached(); + } +} + static inline void s390_vec_write_element8(S390Vector *v, uint8_t enr, uint8_t data) { diff --git a/target/s390x/vec_string_helper.c b/target/s390x/vec_string_helper.c index 05ad99e173..0ee3470112 100644 --- a/target/s390x/vec_string_helper.c +++ b/target/s390x/vec_string_helper.c @@ -27,6 +27,15 @@ static inline uint64_t zero_search(uint64_t a, uint64_t mask) return ~(((a & mask) + mask) | a | mask); } +/* + * Returns a bit set in the MSB of each element that is not zero, + * as defined by the mask. + */ +static inline uint64_t nonzero_search(uint64_t a, uint64_t mask) +{ + return (((a & mask) + mask) | a) & ~mask; +} + /* * Returns the byte offset for the first match, or 16 for no match. */ @@ -209,3 +218,68 @@ void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3, \ DEF_VFEE_CC_HELPER(8) DEF_VFEE_CC_HELPER(16) DEF_VFEE_CC_HELPER(32) + +static int vfene(void *v1, const void *v2, const void *v3, bool zs, uint8_t es) +{ + const uint64_t mask = get_element_lsbs_mask(es); + uint64_t a0, a1, b0, b1, e0, e1, z0, z1; + uint64_t first_zero = 16; + uint64_t first_inequal; + bool smaller = false; + + a0 = s390_vec_read_element64(v2, 0); + a1 = s390_vec_read_element64(v2, 1); + b0 = s390_vec_read_element64(v3, 0); + b1 = s390_vec_read_element64(v3, 1); + e0 = nonzero_search(a0 ^ b0, mask); + e1 = nonzero_search(a1 ^ b1, mask); + first_inequal = match_index(e0, e1); + + /* identify the smaller element */ + if (first_inequal < 16) { + uint8_t enr = first_inequal / (1 << es); + uint32_t a = s390_vec_read_element(v2, enr, es); + uint32_t b = s390_vec_read_element(v3, enr, es); + + smaller = a < b; + } + + if (zs) { + z0 = zero_search(a0, mask); + z1 = zero_search(a1, mask); + first_zero = match_index(z0, z1); + } + + s390_vec_write_element64(v1, 0, MIN(first_inequal, first_zero)); + s390_vec_write_element64(v1, 1, 0); + if (first_zero == 16 && first_inequal == 16) { + return 3; + } else if (first_zero < first_inequal) { + return 0; + } + return smaller ? 1 : 2; +} + +#define DEF_VFENE_HELPER(BITS) \ +void HELPER(gvec_vfene##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + vfene(v1, v2, v3, zs, MO_##BITS); \ +} +DEF_VFENE_HELPER(8) +DEF_VFENE_HELPER(16) +DEF_VFENE_HELPER(32) + +#define DEF_VFENE_CC_HELPER(BITS) \ +void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3, \ + CPUS390XState *env, uint32_t desc) \ +{ \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + env->cc_op = vfene(v1, v2, v3, zs, MO_##BITS); \ +} +DEF_VFENE_CC_HELPER(8) +DEF_VFENE_CC_HELPER(16) +DEF_VFENE_CC_HELPER(32) -- cgit 1.4.1 From be6324c6b73478f181bba4920de2ef6af317482b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 24 May 2019 11:27:56 +0200 Subject: s390x/tcg: Implement VECTOR ISOLATE STRING Logic mostly courtesy of Richard H. Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/helper.h | 6 ++++++ target/s390x/insn-data.def | 2 ++ target/s390x/translate_vx.inc.c | 34 ++++++++++++++++++++++++++++++ target/s390x/vec_string_helper.c | 45 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 87 insertions(+) (limited to 'target/s390x/vec_string_helper.c') diff --git a/target/s390x/helper.h b/target/s390x/helper.h index fb50b404db..1f9f0b463b 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -230,6 +230,12 @@ DEF_HELPER_FLAGS_4(gvec_vfene32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) DEF_HELPER_5(gvec_vfene_cc8, void, ptr, cptr, cptr, env, i32) DEF_HELPER_5(gvec_vfene_cc16, void, ptr, cptr, cptr, env, i32) DEF_HELPER_5(gvec_vfene_cc32, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_3(gvec_vistr8, TCG_CALL_NO_RWG, void, ptr, cptr, i32) +DEF_HELPER_FLAGS_3(gvec_vistr16, TCG_CALL_NO_RWG, void, ptr, cptr, i32) +DEF_HELPER_FLAGS_3(gvec_vistr32, TCG_CALL_NO_RWG, void, ptr, cptr, i32) +DEF_HELPER_4(gvec_vistr_cc8, void, ptr, cptr, env, i32) +DEF_HELPER_4(gvec_vistr_cc16, void, ptr, cptr, env, i32) +DEF_HELPER_4(gvec_vistr_cc32, void, ptr, cptr, env, i32) #ifndef CONFIG_USER_ONLY DEF_HELPER_3(servc, i32, env, i64, i64) diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def index d03c1ee0b3..b4a6b59608 100644 --- a/target/s390x/insn-data.def +++ b/target/s390x/insn-data.def @@ -1199,6 +1199,8 @@ F(0xe780, VFEE, VRR_b, V, 0, 0, 0, 0, vfee, 0, IF_VEC) /* VECTOR FIND ELEMENT NOT EQUAL */ F(0xe781, VFENE, VRR_b, V, 0, 0, 0, 0, vfene, 0, IF_VEC) +/* VECTOR ISOLATE STRING */ + F(0xe75c, VISTR, VRR_a, V, 0, 0, 0, 0, vistr, 0, IF_VEC) #ifndef CONFIG_USER_ONLY /* COMPARE AND SWAP AND PURGE */ diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c index 1ad0b62517..08a62eab52 100644 --- a/target/s390x/translate_vx.inc.c +++ b/target/s390x/translate_vx.inc.c @@ -188,6 +188,9 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr, #define gen_gvec_2s(v1, v2, c, gen) \ tcg_gen_gvec_2s(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ 16, 16, c, gen) +#define gen_gvec_2_ool(v1, v2, data, fn) \ + tcg_gen_gvec_2_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + 16, 16, data, fn) #define gen_gvec_2i_ool(v1, v2, c, data, fn) \ tcg_gen_gvec_2i_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ c, 16, 16, data, fn) @@ -2445,3 +2448,34 @@ static DisasJumpType op_vfene(DisasContext *s, DisasOps *o) } return DISAS_NEXT; } + +static DisasJumpType op_vistr(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s->fields, m4); + const uint8_t m5 = get_field(s->fields, m5); + static gen_helper_gvec_2 * const g[3] = { + gen_helper_gvec_vistr8, + gen_helper_gvec_vistr16, + gen_helper_gvec_vistr32, + }; + static gen_helper_gvec_2_ptr * const g_cc[3] = { + gen_helper_gvec_vistr_cc8, + gen_helper_gvec_vistr_cc16, + gen_helper_gvec_vistr_cc32, + }; + + if (es > ES_32 || m5 & ~0x1) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + if (extract32(m5, 0, 1)) { + gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2), + cpu_env, 0, g_cc[es]); + set_cc_static(s); + } else { + gen_gvec_2_ool(get_field(s->fields, v1), get_field(s->fields, v2), 0, + g[es]); + } + return DISAS_NEXT; +} diff --git a/target/s390x/vec_string_helper.c b/target/s390x/vec_string_helper.c index 0ee3470112..6bafa23bd7 100644 --- a/target/s390x/vec_string_helper.c +++ b/target/s390x/vec_string_helper.c @@ -283,3 +283,48 @@ void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3, \ DEF_VFENE_CC_HELPER(8) DEF_VFENE_CC_HELPER(16) DEF_VFENE_CC_HELPER(32) + +static int vistr(void *v1, const void *v2, uint8_t es) +{ + const uint64_t mask = get_element_lsbs_mask(es); + uint64_t a0 = s390_vec_read_element64(v2, 0); + uint64_t a1 = s390_vec_read_element64(v2, 1); + uint64_t z; + int cc = 3; + + z = zero_search(a0, mask); + if (z) { + a0 &= ~(-1ull >> clz64(z)); + a1 = 0; + cc = 0; + } else { + z = zero_search(a1, mask); + if (z) { + a1 &= ~(-1ull >> clz64(z)); + cc = 0; + } + } + + s390_vec_write_element64(v1, 0, a0); + s390_vec_write_element64(v1, 1, a1); + return cc; +} + +#define DEF_VISTR_HELPER(BITS) \ +void HELPER(gvec_vistr##BITS)(void *v1, const void *v2, uint32_t desc) \ +{ \ + vistr(v1, v2, MO_##BITS); \ +} +DEF_VISTR_HELPER(8) +DEF_VISTR_HELPER(16) +DEF_VISTR_HELPER(32) + +#define DEF_VISTR_CC_HELPER(BITS) \ +void HELPER(gvec_vistr_cc##BITS)(void *v1, const void *v2, CPUS390XState *env, \ + uint32_t desc) \ +{ \ + env->cc_op = vistr(v1, v2, MO_##BITS); \ +} +DEF_VISTR_CC_HELPER(8) +DEF_VISTR_CC_HELPER(16) +DEF_VISTR_CC_HELPER(32) -- cgit 1.4.1 From 13b0228f77ffa14a0f82bd8a9d0fd5859b0d6a7d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 24 May 2019 11:23:49 +0200 Subject: s390x/tcg: Implement VECTOR STRING RANGE COMPARE Unfortunately, there is no easy way to avoid looping over all elements in v2. Provide specialized variants for !cc,!rt/!cc,rt/cc,!rt/cc,rt and all element types. Especially for different values of rt, the compiler might be able to optimize the code a lot. Add s390_vec_write_element(). Reviewed-by: Richard Henderson Signed-off-by: David Hildenbrand --- target/s390x/helper.h | 12 ++++ target/s390x/insn-data.def | 2 + target/s390x/translate_vx.inc.c | 59 ++++++++++++++++ target/s390x/vec.h | 21 ++++++ target/s390x/vec_string_helper.c | 143 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 237 insertions(+) (limited to 'target/s390x/vec_string_helper.c') diff --git a/target/s390x/helper.h b/target/s390x/helper.h index 1f9f0b463b..5db67779d3 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -236,6 +236,18 @@ DEF_HELPER_FLAGS_3(gvec_vistr32, TCG_CALL_NO_RWG, void, ptr, cptr, i32) DEF_HELPER_4(gvec_vistr_cc8, void, ptr, cptr, env, i32) DEF_HELPER_4(gvec_vistr_cc16, void, ptr, cptr, env, i32) DEF_HELPER_4(gvec_vistr_cc32, void, ptr, cptr, env, i32) +DEF_HELPER_FLAGS_5(gvec_vstrc8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_5(gvec_vstrc16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_5(gvec_vstrc32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_5(gvec_vstrc_rt8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_5(gvec_vstrc_rt16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_5(gvec_vstrc_rt32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32) +DEF_HELPER_6(gvec_vstrc_cc8, void, ptr, cptr, cptr, cptr, env, i32) +DEF_HELPER_6(gvec_vstrc_cc16, void, ptr, cptr, cptr, cptr, env, i32) +DEF_HELPER_6(gvec_vstrc_cc32, void, ptr, cptr, cptr, cptr, env, i32) +DEF_HELPER_6(gvec_vstrc_cc_rt8, void, ptr, cptr, cptr, cptr, env, i32) +DEF_HELPER_6(gvec_vstrc_cc_rt16, void, ptr, cptr, cptr, cptr, env, i32) +DEF_HELPER_6(gvec_vstrc_cc_rt32, void, ptr, cptr, cptr, cptr, env, i32) #ifndef CONFIG_USER_ONLY DEF_HELPER_3(servc, i32, env, i64, i64) diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def index b4a6b59608..a2969fab58 100644 --- a/target/s390x/insn-data.def +++ b/target/s390x/insn-data.def @@ -1201,6 +1201,8 @@ F(0xe781, VFENE, VRR_b, V, 0, 0, 0, 0, vfene, 0, IF_VEC) /* VECTOR ISOLATE STRING */ F(0xe75c, VISTR, VRR_a, V, 0, 0, 0, 0, vistr, 0, IF_VEC) +/* VECTOR STRING RANGE COMPARE */ + F(0xe78a, VSTRC, VRR_d, V, 0, 0, 0, 0, vstrc, 0, IF_VEC) #ifndef CONFIG_USER_ONLY /* COMPARE AND SWAP AND PURGE */ diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c index 08a62eab52..f26ffa2895 100644 --- a/target/s390x/translate_vx.inc.c +++ b/target/s390x/translate_vx.inc.c @@ -217,6 +217,10 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr, tcg_gen_gvec_4_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ vec_full_reg_offset(v3), vec_full_reg_offset(v4), \ 16, 16, data, fn) +#define gen_gvec_4_ptr(v1, v2, v3, v4, ptr, data, fn) \ + tcg_gen_gvec_4_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + vec_full_reg_offset(v3), vec_full_reg_offset(v4), \ + ptr, 16, 16, data, fn) #define gen_gvec_dup_i64(es, v1, c) \ tcg_gen_gvec_dup_i64(es, vec_full_reg_offset(v1), 16, 16, c) #define gen_gvec_mov(v1, v2) \ @@ -2479,3 +2483,58 @@ static DisasJumpType op_vistr(DisasContext *s, DisasOps *o) } return DISAS_NEXT; } + +static DisasJumpType op_vstrc(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s->fields, m5); + const uint8_t m6 = get_field(s->fields, m6); + static gen_helper_gvec_4 * const g[3] = { + gen_helper_gvec_vstrc8, + gen_helper_gvec_vstrc16, + gen_helper_gvec_vstrc32, + }; + static gen_helper_gvec_4 * const g_rt[3] = { + gen_helper_gvec_vstrc_rt8, + gen_helper_gvec_vstrc_rt16, + gen_helper_gvec_vstrc_rt32, + }; + static gen_helper_gvec_4_ptr * const g_cc[3] = { + gen_helper_gvec_vstrc_cc8, + gen_helper_gvec_vstrc_cc16, + gen_helper_gvec_vstrc_cc32, + }; + static gen_helper_gvec_4_ptr * const g_cc_rt[3] = { + gen_helper_gvec_vstrc_cc_rt8, + gen_helper_gvec_vstrc_cc_rt16, + gen_helper_gvec_vstrc_cc_rt32, + }; + + if (es > ES_32) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + if (extract32(m6, 0, 1)) { + if (extract32(m6, 2, 1)) { + gen_gvec_4_ptr(get_field(s->fields, v1), get_field(s->fields, v2), + get_field(s->fields, v3), get_field(s->fields, v4), + cpu_env, m6, g_cc_rt[es]); + } else { + gen_gvec_4_ptr(get_field(s->fields, v1), get_field(s->fields, v2), + get_field(s->fields, v3), get_field(s->fields, v4), + cpu_env, m6, g_cc[es]); + } + set_cc_static(s); + } else { + if (extract32(m6, 2, 1)) { + gen_gvec_4_ool(get_field(s->fields, v1), get_field(s->fields, v2), + get_field(s->fields, v3), get_field(s->fields, v4), + m6, g_rt[es]); + } else { + gen_gvec_4_ool(get_field(s->fields, v1), get_field(s->fields, v2), + get_field(s->fields, v3), get_field(s->fields, v4), + m6, g[es]); + } + } + return DISAS_NEXT; +} diff --git a/target/s390x/vec.h b/target/s390x/vec.h index affc62874c..a6e361869b 100644 --- a/target/s390x/vec.h +++ b/target/s390x/vec.h @@ -117,4 +117,25 @@ static inline void s390_vec_write_element64(S390Vector *v, uint8_t enr, v->doubleword[enr] = data; } +static inline void s390_vec_write_element(S390Vector *v, uint8_t enr, + uint8_t es, uint64_t data) +{ + switch (es) { + case MO_8: + s390_vec_write_element8(v, enr, data); + break; + case MO_16: + s390_vec_write_element16(v, enr, data); + break; + case MO_32: + s390_vec_write_element32(v, enr, data); + break; + case MO_64: + s390_vec_write_element64(v, enr, data); + break; + default: + g_assert_not_reached(); + } +} + #endif /* S390X_VEC_H */ diff --git a/target/s390x/vec_string_helper.c b/target/s390x/vec_string_helper.c index 6bafa23bd7..c516c0ceeb 100644 --- a/target/s390x/vec_string_helper.c +++ b/target/s390x/vec_string_helper.c @@ -328,3 +328,146 @@ void HELPER(gvec_vistr_cc##BITS)(void *v1, const void *v2, CPUS390XState *env, \ DEF_VISTR_CC_HELPER(8) DEF_VISTR_CC_HELPER(16) DEF_VISTR_CC_HELPER(32) + +static bool element_compare(uint32_t data, uint32_t l, uint8_t c) +{ + const bool equal = extract32(c, 7, 1); + const bool lower = extract32(c, 6, 1); + const bool higher = extract32(c, 5, 1); + + if (data < l) { + return lower; + } else if (data > l) { + return higher; + } + return equal; +} + +static int vstrc(void *v1, const void *v2, const void *v3, const void *v4, + bool in, bool rt, bool zs, uint8_t es) +{ + const uint64_t mask = get_element_lsbs_mask(es); + uint64_t a0 = s390_vec_read_element64(v2, 0); + uint64_t a1 = s390_vec_read_element64(v2, 1); + int first_zero = 16, first_match = 16; + S390Vector rt_result = {}; + uint64_t z0, z1; + int i, j; + + if (zs) { + z0 = zero_search(a0, mask); + z1 = zero_search(a1, mask); + first_zero = match_index(z0, z1); + } + + for (i = 0; i < 16 / (1 << es); i++) { + const uint32_t data = s390_vec_read_element(v2, i, es); + const int cur_byte = i * (1 << es); + bool any_match = false; + + /* if we don't need a bit vector, we can stop early */ + if (cur_byte == first_zero && !rt) { + break; + } + + for (j = 0; j < 16 / (1 << es); j += 2) { + const uint32_t l1 = s390_vec_read_element(v3, j, es); + const uint32_t l2 = s390_vec_read_element(v3, j + 1, es); + /* we are only interested in the highest byte of each element */ + const uint8_t c1 = s390_vec_read_element8(v4, j * (1 << es)); + const uint8_t c2 = s390_vec_read_element8(v4, (j + 1) * (1 << es)); + + if (element_compare(data, l1, c1) && + element_compare(data, l2, c2)) { + any_match = true; + break; + } + } + /* invert the result if requested */ + any_match = in ^ any_match; + + if (any_match) { + /* indicate bit vector if requested */ + if (rt) { + const uint64_t val = -1ull; + + first_match = MIN(cur_byte, first_match); + s390_vec_write_element(&rt_result, i, es, val); + } else { + /* stop on the first match */ + first_match = cur_byte; + break; + } + } + } + + if (rt) { + *(S390Vector *)v1 = rt_result; + } else { + s390_vec_write_element64(v1, 0, MIN(first_match, first_zero)); + s390_vec_write_element64(v1, 1, 0); + } + + if (first_zero == 16 && first_match == 16) { + return 3; /* no match */ + } else if (first_zero == 16) { + return 1; /* matching elements, no match for zero */ + } else if (first_match < first_zero) { + return 2; /* matching elements before match for zero */ + } + return 0; /* match for zero */ +} + +#define DEF_VSTRC_HELPER(BITS) \ +void HELPER(gvec_vstrc##BITS)(void *v1, const void *v2, const void *v3, \ + const void *v4, uint32_t desc) \ +{ \ + const bool in = extract32(simd_data(desc), 3, 1); \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS); \ +} +DEF_VSTRC_HELPER(8) +DEF_VSTRC_HELPER(16) +DEF_VSTRC_HELPER(32) + +#define DEF_VSTRC_RT_HELPER(BITS) \ +void HELPER(gvec_vstrc_rt##BITS)(void *v1, const void *v2, const void *v3, \ + const void *v4, uint32_t desc) \ +{ \ + const bool in = extract32(simd_data(desc), 3, 1); \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS); \ +} +DEF_VSTRC_RT_HELPER(8) +DEF_VSTRC_RT_HELPER(16) +DEF_VSTRC_RT_HELPER(32) + +#define DEF_VSTRC_CC_HELPER(BITS) \ +void HELPER(gvec_vstrc_cc##BITS)(void *v1, const void *v2, const void *v3, \ + const void *v4, CPUS390XState *env, \ + uint32_t desc) \ +{ \ + const bool in = extract32(simd_data(desc), 3, 1); \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + env->cc_op = vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS); \ +} +DEF_VSTRC_CC_HELPER(8) +DEF_VSTRC_CC_HELPER(16) +DEF_VSTRC_CC_HELPER(32) + +#define DEF_VSTRC_CC_RT_HELPER(BITS) \ +void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void *v2, const void *v3, \ + const void *v4, CPUS390XState *env, \ + uint32_t desc) \ +{ \ + const bool in = extract32(simd_data(desc), 3, 1); \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + env->cc_op = vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS); \ +} +DEF_VSTRC_CC_RT_HELPER(8) +DEF_VSTRC_CC_RT_HELPER(16) +DEF_VSTRC_CC_RT_HELPER(32) -- cgit 1.4.1