1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
|
// build with gcc -O0 -g -msha -msse4.2 test28.c -o test28
// and -m32 for 32bits version
#include <inttypes.h>
#include <string.h>
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <stdint.h>
#include <math.h>
#include <pmmintrin.h>
#include <immintrin.h>
typedef unsigned char u8x16 __attribute__ ((vector_size (16)));
typedef unsigned short u16x8 __attribute__ ((vector_size (16)));
typedef unsigned int u32x4 __attribute__ ((vector_size (16)));
typedef unsigned long int u64x2 __attribute__ ((vector_size (16)));
typedef float f32x4 __attribute__ ((vector_size (16)));
typedef double d64x2 __attribute__ ((vector_size (16)));
typedef union {
__m128i mm;
__m128 mf;
__m128d md;
u8x16 u8;
u16x8 u16;
u32x4 u32;
u64x2 u64;
f32x4 f32;
d64x2 d64;
} v128;
static const uint32_t A[] = {0, 0, 0, 0};
static const uint32_t B[] = {1, 2, 3, 4};
static const uint32_t C[] = {0x80000000, 0x1234567, 0, 0xffffffff};
static void print_u32(v128 a) {
printf("%08x-%08x-%08x-%08x", a.u32[3], a.u32[2], a.u32[1], a.u32[0]);
}
static void print_u32_res(v128 a, v128 b, const char* op, v128 res) {
printf("%s(", op);
print_u32(a);
printf(" ,");
print_u32(b);
printf(") => ");
print_u32(res);
printf("\n");
}
static void print_u32_u8_res(v128 a, v128 b, const char* op, uint8_t ib, v128 res) {
printf("%s(", op);
print_u32(a);
printf(" ,");
print_u32(b);
printf(", 0x%x) => ", ib);
print_u32(res);
printf("\n");
}
static void print_3u32_res(v128 a, v128 b, v128 c, const char* op, v128 res) {
printf("%s(", op);
print_u32(a);
printf(" ,");
print_u32(b);
printf(" ,");
print_u32(c);
printf(") => ");
print_u32(res);
printf("\n");
}
int main(int argc, const char** argv)
{
printf("test SHA Ext\n");
v128 a, b, c, d;
int ret;
#define LOAD(a, A) a.u32[0] = A[0]; a.u32[1] = A[1]; a.u32[2] = A[2]; a.u32[3] = A[3]
#define GO2I_(A, B, C, I) \
LOAD(a, A); \
LOAD(b, B); \
c.mm = _mm_##C##_epu32(a.mm, b.mm, I); \
print_u32_u8_res(a, b, #C, I, c)
#define GO2(A, B, C) \
LOAD(a, A); \
LOAD(b, B); \
c.mm = _mm_##C##_epu32(a.mm, b.mm); \
print_u32_res(a, b, #C, c)
#define GO3(A, B, C, D) \
LOAD(a, A); \
LOAD(b, B); \
LOAD(c, C); \
d.mm = _mm_##D##_epu32(a.mm, b.mm, c.mm); \
print_3u32_res(a, b, c, #D, d)
#define GO2I(A, B, C) \
GO2I_(A, B, C, 0x00); \
GO2I_(A, B, C, 0x01); \
GO2I_(A, B, C, 0x02); \
GO2I_(A, B, C, 0x03)
GO2I(A, B, sha1rnds4);
GO2I(A, C, sha1rnds4);
GO2I(B, C, sha1rnds4);
GO2(A, B, sha1msg1);
GO2(A, C, sha1msg1);
GO2(B, C, sha1msg1);
GO2(A, B, sha1msg2);
GO2(A, C, sha1msg2);
GO2(B, C, sha1msg2);
GO2(A, B, sha1nexte);
GO2(A, C, sha1nexte);
GO2(B, C, sha1nexte);
GO2(A, B, sha256msg1);
GO2(A, C, sha256msg1);
GO2(B, C, sha256msg1);
GO2(A, B, sha256msg2);
GO2(A, C, sha256msg2);
GO2(B, C, sha256msg2);
GO3(A, B, C, sha256rnds2);
GO3(B, A, C, sha256rnds2);
GO3(C, B, A, sha256rnds2);
return 0;
}
|