diff options
Diffstat (limited to 'target/sparc/vis_helper.c')
| -rw-r--r-- | target/sparc/vis_helper.c | 189 |
1 files changed, 47 insertions, 142 deletions
diff --git a/target/sparc/vis_helper.c b/target/sparc/vis_helper.c index 7763b16c24..e15c6bb34e 100644 --- a/target/sparc/vis_helper.c +++ b/target/sparc/vis_helper.c @@ -44,6 +44,7 @@ target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize) #if HOST_BIG_ENDIAN #define VIS_B64(n) b[7 - (n)] +#define VIS_SB64(n) sb[7 - (n)] #define VIS_W64(n) w[3 - (n)] #define VIS_SW64(n) sw[3 - (n)] #define VIS_L64(n) l[1 - (n)] @@ -51,6 +52,7 @@ target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize) #define VIS_W32(n) w[1 - (n)] #else #define VIS_B64(n) b[n] +#define VIS_SB64(n) sb[n] #define VIS_W64(n) w[n] #define VIS_SW64(n) sw[n] #define VIS_L64(n) l[n] @@ -60,6 +62,7 @@ target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize) typedef union { uint8_t b[8]; + int8_t sb[8]; uint16_t w[4]; int16_t sw[4]; uint32_t l[2]; @@ -74,94 +77,60 @@ typedef union { float32 f; } VIS32; -uint64_t helper_fpmerge(uint64_t src1, uint64_t src2) +uint64_t helper_fpmerge(uint32_t src1, uint32_t src2) { - VIS64 s, d; + VIS32 s1, s2; + VIS64 d; - s.ll = src1; - d.ll = src2; + s1.l = src1; + s2.l = src2; + d.ll = 0; - /* Reverse calculation order to handle overlap */ - d.VIS_B64(7) = s.VIS_B64(3); - d.VIS_B64(6) = d.VIS_B64(3); - d.VIS_B64(5) = s.VIS_B64(2); - d.VIS_B64(4) = d.VIS_B64(2); - d.VIS_B64(3) = s.VIS_B64(1); - d.VIS_B64(2) = d.VIS_B64(1); - d.VIS_B64(1) = s.VIS_B64(0); - /* d.VIS_B64(0) = d.VIS_B64(0); */ + d.VIS_B64(7) = s1.VIS_B32(3); + d.VIS_B64(6) = s2.VIS_B32(3); + d.VIS_B64(5) = s1.VIS_B32(2); + d.VIS_B64(4) = s2.VIS_B32(2); + d.VIS_B64(3) = s1.VIS_B32(1); + d.VIS_B64(2) = s2.VIS_B32(1); + d.VIS_B64(1) = s1.VIS_B32(0); + d.VIS_B64(0) = s2.VIS_B32(0); return d.ll; } -uint64_t helper_fmul8x16(uint64_t src1, uint64_t src2) +static inline int do_ms16b(int x, int y) { - VIS64 s, d; - uint32_t tmp; - - s.ll = src1; - d.ll = src2; - -#define PMUL(r) \ - tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r); \ - if ((tmp & 0xff) > 0x7f) { \ - tmp += 0x100; \ - } \ - d.VIS_W64(r) = tmp >> 8; - - PMUL(0); - PMUL(1); - PMUL(2); - PMUL(3); -#undef PMUL - - return d.ll; + return ((x * y) + 0x80) >> 8; } -uint64_t helper_fmul8x16al(uint64_t src1, uint64_t src2) +uint64_t helper_fmul8x16(uint32_t src1, uint64_t src2) { - VIS64 s, d; - uint32_t tmp; + VIS64 d; + VIS32 s; - s.ll = src1; + s.l = src1; d.ll = src2; -#define PMUL(r) \ - tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r); \ - if ((tmp & 0xff) > 0x7f) { \ - tmp += 0x100; \ - } \ - d.VIS_W64(r) = tmp >> 8; - - PMUL(0); - PMUL(1); - PMUL(2); - PMUL(3); -#undef PMUL + d.VIS_W64(0) = do_ms16b(s.VIS_B32(0), d.VIS_SW64(0)); + d.VIS_W64(1) = do_ms16b(s.VIS_B32(1), d.VIS_SW64(1)); + d.VIS_W64(2) = do_ms16b(s.VIS_B32(2), d.VIS_SW64(2)); + d.VIS_W64(3) = do_ms16b(s.VIS_B32(3), d.VIS_SW64(3)); return d.ll; } -uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2) +uint64_t helper_fmul8x16a(uint32_t src1, int32_t src2) { - VIS64 s, d; - uint32_t tmp; - - s.ll = src1; - d.ll = src2; + VIS32 s; + VIS64 d; -#define PMUL(r) \ - tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r); \ - if ((tmp & 0xff) > 0x7f) { \ - tmp += 0x100; \ - } \ - d.VIS_W64(r) = tmp >> 8; + s.l = src1; + d.ll = 0; - PMUL(0); - PMUL(1); - PMUL(2); - PMUL(3); -#undef PMUL + d.VIS_W64(0) = do_ms16b(s.VIS_B32(0), src2); + d.VIS_W64(1) = do_ms16b(s.VIS_B32(1), src2); + d.VIS_W64(2) = do_ms16b(s.VIS_B32(2), src2); + d.VIS_W64(3) = do_ms16b(s.VIS_B32(3), src2); return d.ll; } @@ -169,23 +138,14 @@ uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2) uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2) { VIS64 s, d; - uint32_t tmp; s.ll = src1; d.ll = src2; -#define PMUL(r) \ - tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \ - if ((tmp & 0xff) > 0x7f) { \ - tmp += 0x100; \ - } \ - d.VIS_W64(r) = tmp >> 8; - - PMUL(0); - PMUL(1); - PMUL(2); - PMUL(3); -#undef PMUL + d.VIS_W64(0) = do_ms16b(s.VIS_SB64(1), d.VIS_SW64(0)); + d.VIS_W64(1) = do_ms16b(s.VIS_SB64(3), d.VIS_SW64(1)); + d.VIS_W64(2) = do_ms16b(s.VIS_SB64(5), d.VIS_SW64(2)); + d.VIS_W64(3) = do_ms16b(s.VIS_SB64(7), d.VIS_SW64(3)); return d.ll; } @@ -193,80 +153,25 @@ uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2) uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2) { VIS64 s, d; - uint32_t tmp; s.ll = src1; d.ll = src2; -#define PMUL(r) \ - tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \ - if ((tmp & 0xff) > 0x7f) { \ - tmp += 0x100; \ - } \ - d.VIS_W64(r) = tmp >> 8; - - PMUL(0); - PMUL(1); - PMUL(2); - PMUL(3); -#undef PMUL - - return d.ll; -} - -uint64_t helper_fmuld8sux16(uint64_t src1, uint64_t src2) -{ - VIS64 s, d; - uint32_t tmp; - - s.ll = src1; - d.ll = src2; - -#define PMUL(r) \ - tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \ - if ((tmp & 0xff) > 0x7f) { \ - tmp += 0x100; \ - } \ - d.VIS_L64(r) = tmp; - - /* Reverse calculation order to handle overlap */ - PMUL(1); - PMUL(0); -#undef PMUL + d.VIS_W64(0) = do_ms16b(s.VIS_B64(0), d.VIS_SW64(0)); + d.VIS_W64(1) = do_ms16b(s.VIS_B64(2), d.VIS_SW64(1)); + d.VIS_W64(2) = do_ms16b(s.VIS_B64(4), d.VIS_SW64(2)); + d.VIS_W64(3) = do_ms16b(s.VIS_B64(6), d.VIS_SW64(3)); return d.ll; } -uint64_t helper_fmuld8ulx16(uint64_t src1, uint64_t src2) -{ - VIS64 s, d; - uint32_t tmp; - - s.ll = src1; - d.ll = src2; - -#define PMUL(r) \ - tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \ - if ((tmp & 0xff) > 0x7f) { \ - tmp += 0x100; \ - } \ - d.VIS_L64(r) = tmp; - - /* Reverse calculation order to handle overlap */ - PMUL(1); - PMUL(0); -#undef PMUL - - return d.ll; -} - -uint64_t helper_fexpand(uint64_t src1, uint64_t src2) +uint64_t helper_fexpand(uint32_t src2) { VIS32 s; VIS64 d; - s.l = (uint32_t)src1; - d.ll = src2; + s.l = src2; + d.ll = 0; d.VIS_W64(0) = s.VIS_B32(0) << 4; d.VIS_W64(1) = s.VIS_B32(1) << 4; d.VIS_W64(2) = s.VIS_B32(2) << 4; |