diff options
Diffstat (limited to 'target/ppc/int_helper.c')
| -rw-r--r-- | target/ppc/int_helper.c | 66 |
1 files changed, 30 insertions, 36 deletions
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 96cdb3c7e3..6fd00684a5 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -25,6 +25,7 @@ #include "exec/helper-proto.h" #include "crypto/aes.h" #include "crypto/aes-round.h" +#include "crypto/clmul.h" #include "fpu/softfloat.h" #include "qapi/error.h" #include "qemu/guest-random.h" @@ -1424,46 +1425,39 @@ void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) #undef VBPERMQ_INDEX #undef VBPERMQ_DW -#define PMSUM(name, srcfld, trgfld, trgtyp) \ -void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ -{ \ - int i, j; \ - trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \ - \ - VECTOR_FOR_INORDER_I(i, srcfld) { \ - prod[i] = 0; \ - for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \ - if (a->srcfld[i] & (1ull << j)) { \ - prod[i] ^= ((trgtyp)b->srcfld[i] << j); \ - } \ - } \ - } \ - \ - VECTOR_FOR_INORDER_I(i, trgfld) { \ - r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \ - } \ -} - -PMSUM(vpmsumb, u8, u16, uint16_t) -PMSUM(vpmsumh, u16, u32, uint32_t) -PMSUM(vpmsumw, u32, u64, uint64_t) +/* + * There is no carry across the two doublewords, so their order does + * not matter. Nor is there partial overlap between registers. + */ +void helper_vpmsumb(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + for (int i = 0; i < 2; ++i) { + uint64_t aa = a->u64[i], bb = b->u64[i]; + r->u64[i] = clmul_8x4_even(aa, bb) ^ clmul_8x4_odd(aa, bb); + } +} -void helper_VPMSUMD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +void helper_vpmsumh(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) { - int i, j; - Int128 tmp, prod[2] = {int128_zero(), int128_zero()}; - - for (j = 0; j < 64; j++) { - for (i = 0; i < ARRAY_SIZE(r->u64); i++) { - if (a->VsrD(i) & (1ull << j)) { - tmp = int128_make64(b->VsrD(i)); - tmp = int128_lshift(tmp, j); - prod[i] = int128_xor(prod[i], tmp); - } - } + for (int i = 0; i < 2; ++i) { + uint64_t aa = a->u64[i], bb = b->u64[i]; + r->u64[i] = clmul_16x2_even(aa, bb) ^ clmul_16x2_odd(aa, bb); + } +} + +void helper_vpmsumw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + for (int i = 0; i < 2; ++i) { + uint64_t aa = a->u64[i], bb = b->u64[i]; + r->u64[i] = clmul_32(aa, bb) ^ clmul_32(aa >> 32, bb >> 32); } +} - r->s128 = int128_xor(prod[0], prod[1]); +void helper_VPMSUMD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + Int128 e = clmul_64(a->u64[0], b->u64[0]); + Int128 o = clmul_64(a->u64[1], b->u64[1]); + r->s128 = int128_xor(e, o); } #if HOST_BIG_ENDIAN |