Commit | Line | Data |
---|---|---|
5ba3f43e A |
1 | #ifdef T_NAMESPACE |
2 | #undef T_NAMESPACE | |
3 | #endif | |
4 | ||
5 | #include <darwintest.h> | |
6 | #include <unistd.h> | |
7 | #include <signal.h> | |
8 | #include <sys/time.h> | |
9 | #include <sys/mman.h> | |
10 | #include <immintrin.h> | |
11 | #include <mach/mach.h> | |
12 | #include <stdio.h> | |
13 | #include <string.h> | |
14 | #include <err.h> | |
15 | #include <i386/cpu_capabilities.h> | |
16 | ||
17 | T_GLOBAL_META( | |
18 | T_META_NAMESPACE("xnu.intel"), | |
19 | T_META_CHECK_LEAKS(false) | |
20 | ); | |
21 | ||
22 | #define NORMAL_RUN_TIME (10) | |
23 | #define LONG_RUN_TIME (10*60) | |
24 | #define TIMEOUT_OVERHEAD (10) | |
25 | ||
26 | volatile boolean_t checking = true; | |
27 | char vec_str_buf[8196]; | |
28 | char karray_str_buf[1024]; | |
29 | ||
30 | /* | |
31 | * ymm defines/globals/prototypes | |
32 | */ | |
33 | #define STOP_COOKIE_256 0x01234567 | |
34 | #if defined(__x86_64__) | |
35 | #define YMM_MAX 16 | |
36 | #define X86_AVX_STATE_T x86_avx_state64_t | |
37 | #define X86_AVX_STATE_COUNT x86_AVX_STATE64_COUNT | |
38 | #define X86_AVX_STATE_FLAVOR x86_AVX_STATE64 | |
39 | #define MCONTEXT_SIZE_256 sizeof(struct __darwin_mcontext_avx64) | |
40 | #else | |
41 | #define YMM_MAX 8 | |
42 | #define X86_AVX_STATE_T x86_avx_state32_t | |
43 | #define X86_AVX_STATE_COUNT x86_AVX_STATE32_COUNT | |
44 | #define X86_AVX_STATE_FLAVOR x86_AVX_STATE32 | |
45 | #define MCONTEXT_SIZE_256 sizeof(struct __darwin_mcontext_avx32) | |
46 | #endif | |
47 | #define VECTOR256 __m256 | |
48 | #define VEC256ALIGN __attribute ((aligned(32))) | |
49 | static inline void populate_ymm(void); | |
50 | static inline void check_ymm(void); | |
51 | VECTOR256 vec256array0[YMM_MAX] VEC256ALIGN; | |
52 | VECTOR256 vec256array1[YMM_MAX] VEC256ALIGN; | |
53 | VECTOR256 vec256array2[YMM_MAX] VEC256ALIGN; | |
54 | VECTOR256 vec256array3[YMM_MAX] VEC256ALIGN; | |
55 | ||
56 | /* | |
57 | * zmm defines/globals/prototypes | |
58 | */ | |
59 | #define STOP_COOKIE_512 0x0123456789abcdefULL | |
60 | #if defined(__x86_64__) | |
61 | #define ZMM_MAX 32 | |
62 | #define X86_AVX512_STATE_T x86_avx512_state64_t | |
63 | #define X86_AVX512_STATE_COUNT x86_AVX512_STATE64_COUNT | |
64 | #define X86_AVX512_STATE_FLAVOR x86_AVX512_STATE64 | |
65 | #define MCONTEXT_SIZE_512 sizeof(struct __darwin_mcontext_avx512_64) | |
66 | #else | |
67 | #define ZMM_MAX 8 | |
68 | #define X86_AVX512_STATE_T x86_avx512_state32_t | |
69 | #define X86_AVX512_STATE_COUNT x86_AVX512_STATE32_COUNT | |
70 | #define X86_AVX512_STATE_FLAVOR x86_AVX512_STATE32 | |
71 | #define MCONTEXT_SIZE_512 sizeof(struct __darwin_mcontext_avx512_32) | |
72 | #endif | |
73 | #define VECTOR512 __m512 | |
74 | #define VEC512ALIGN __attribute ((aligned(64))) | |
75 | #define OPMASK uint64_t | |
76 | #define KARRAY_MAX 8 | |
77 | static inline void populate_zmm(void); | |
78 | static inline void populate_opmask(void); | |
79 | static inline void check_zmm(void); | |
80 | VECTOR512 vec512array0[ZMM_MAX] VEC512ALIGN; | |
81 | VECTOR512 vec512array1[ZMM_MAX] VEC512ALIGN; | |
82 | VECTOR512 vec512array2[ZMM_MAX] VEC512ALIGN; | |
83 | VECTOR512 vec512array3[ZMM_MAX] VEC512ALIGN; | |
84 | OPMASK karray0[8]; | |
85 | OPMASK karray1[8]; | |
86 | OPMASK karray2[8]; | |
87 | OPMASK karray3[8]; | |
88 | ||
89 | ||
90 | /* | |
91 | * Common functions | |
92 | */ | |
93 | ||
94 | int | |
95 | memcmp_unoptimized(const void *s1, const void *s2, size_t n) { | |
96 | if (n != 0) { | |
97 | const unsigned char *p1 = s1, *p2 = s2; | |
98 | do { | |
99 | if (*p1++ != *p2++) | |
100 | return (*--p1 - *--p2); | |
101 | } while (--n != 0); | |
102 | } | |
103 | return (0); | |
104 | } | |
105 | ||
106 | void | |
107 | start_timer(int seconds, void (*handler)(int, siginfo_t *, void *)) { | |
108 | struct sigaction sigalrm_action = { | |
109 | .sa_sigaction = handler, | |
110 | .sa_flags = SA_RESTART, | |
111 | .sa_mask = 0 | |
112 | }; | |
113 | struct itimerval timer = { | |
114 | .it_value.tv_sec = seconds, | |
115 | .it_value.tv_usec = 0, | |
116 | .it_interval.tv_sec = 0, | |
117 | .it_interval.tv_usec = 0 | |
118 | }; | |
119 | T_QUIET; T_WITH_ERRNO; | |
120 | T_ASSERT_NE(sigaction(SIGALRM, &sigalrm_action, NULL), -1, NULL); | |
121 | T_QUIET; T_WITH_ERRNO; | |
122 | T_ASSERT_NE(setitimer(ITIMER_REAL, &timer, NULL), -1, NULL); | |
123 | } | |
124 | ||
125 | void | |
126 | require_avx(void) { | |
127 | if((_get_cpu_capabilities() & kHasAVX1_0) != kHasAVX1_0) { | |
128 | T_SKIP("AVX not supported on this system"); | |
129 | } | |
130 | } | |
131 | ||
132 | void | |
133 | require_avx512(void) { | |
134 | if((_get_cpu_capabilities() & kHasAVX512F) != kHasAVX512F) { | |
135 | T_SKIP("AVX-512 not supported on this system"); | |
136 | } | |
137 | } | |
138 | ||
139 | /* | |
140 | * ymm functions | |
141 | */ | |
142 | ||
143 | static inline void | |
144 | store_ymm(VECTOR256 *vec256array) { | |
145 | int i = 0; | |
146 | __asm__ volatile("vmovaps %%ymm0, %0" :"=m" (vec256array[i])); | |
147 | i++;__asm__ volatile("vmovaps %%ymm1, %0" :"=m" (vec256array[i])); | |
148 | i++;__asm__ volatile("vmovaps %%ymm2, %0" :"=m" (vec256array[i])); | |
149 | i++;__asm__ volatile("vmovaps %%ymm3, %0" :"=m" (vec256array[i])); | |
150 | i++;__asm__ volatile("vmovaps %%ymm4, %0" :"=m" (vec256array[i])); | |
151 | i++;__asm__ volatile("vmovaps %%ymm5, %0" :"=m" (vec256array[i])); | |
152 | i++;__asm__ volatile("vmovaps %%ymm6, %0" :"=m" (vec256array[i])); | |
153 | i++;__asm__ volatile("vmovaps %%ymm7, %0" :"=m" (vec256array[i])); | |
154 | #if defined(__x86_64__) | |
155 | i++;__asm__ volatile("vmovaps %%ymm8, %0" :"=m" (vec256array[i])); | |
156 | i++;__asm__ volatile("vmovaps %%ymm9, %0" :"=m" (vec256array[i])); | |
157 | i++;__asm__ volatile("vmovaps %%ymm10, %0" :"=m" (vec256array[i])); | |
158 | i++;__asm__ volatile("vmovaps %%ymm11, %0" :"=m" (vec256array[i])); | |
159 | i++;__asm__ volatile("vmovaps %%ymm12, %0" :"=m" (vec256array[i])); | |
160 | i++;__asm__ volatile("vmovaps %%ymm13, %0" :"=m" (vec256array[i])); | |
161 | i++;__asm__ volatile("vmovaps %%ymm14, %0" :"=m" (vec256array[i])); | |
162 | i++;__asm__ volatile("vmovaps %%ymm15, %0" :"=m" (vec256array[i])); | |
163 | #endif | |
164 | } | |
165 | ||
166 | static inline void | |
167 | populate_ymm(void) { | |
168 | int j; | |
169 | uint32_t p[8] VEC256ALIGN; | |
170 | ||
171 | for (j = 0; j < (int) (sizeof(p)/sizeof(p[0])); j++) | |
172 | p[j] = getpid(); | |
173 | ||
174 | p[0] = 0x22222222; | |
175 | p[7] = 0x77777777; | |
176 | __asm__ volatile("vmovaps %0, %%ymm0" :: "m" (*(__m256i*)p) : "ymm0"); | |
177 | __asm__ volatile("vmovaps %0, %%ymm1" :: "m" (*(__m256i*)p) : "ymm1"); | |
178 | __asm__ volatile("vmovaps %0, %%ymm2" :: "m" (*(__m256i*)p) : "ymm2"); | |
179 | __asm__ volatile("vmovaps %0, %%ymm3" :: "m" (*(__m256i*)p) : "ymm3"); | |
180 | ||
181 | p[0] = 0x44444444; | |
182 | p[7] = 0xEEEEEEEE; | |
183 | __asm__ volatile("vmovaps %0, %%ymm4" :: "m" (*(__m256i*)p) : "ymm4"); | |
184 | __asm__ volatile("vmovaps %0, %%ymm5" :: "m" (*(__m256i*)p) : "ymm5"); | |
185 | __asm__ volatile("vmovaps %0, %%ymm6" :: "m" (*(__m256i*)p) : "ymm6"); | |
186 | __asm__ volatile("vmovaps %0, %%ymm7" :: "m" (*(__m256i*)p) : "ymm7"); | |
187 | ||
188 | #if defined(__x86_64__) | |
189 | p[0] = 0x88888888; | |
190 | p[7] = 0xAAAAAAAA; | |
191 | __asm__ volatile("vmovaps %0, %%ymm8" :: "m" (*(__m256i*)p) : "ymm8"); | |
192 | __asm__ volatile("vmovaps %0, %%ymm9" :: "m" (*(__m256i*)p) : "ymm9"); | |
193 | __asm__ volatile("vmovaps %0, %%ymm10" :: "m" (*(__m256i*)p) : "ymm10"); | |
194 | __asm__ volatile("vmovaps %0, %%ymm11" :: "m" (*(__m256i*)p) : "ymm11"); | |
195 | ||
196 | p[0] = 0xBBBBBBBB; | |
197 | p[7] = 0xCCCCCCCC; | |
198 | __asm__ volatile("vmovaps %0, %%ymm12" :: "m" (*(__m256i*)p) : "ymm12"); | |
199 | __asm__ volatile("vmovaps %0, %%ymm13" :: "m" (*(__m256i*)p) : "ymm13"); | |
200 | __asm__ volatile("vmovaps %0, %%ymm14" :: "m" (*(__m256i*)p) : "ymm14"); | |
201 | __asm__ volatile("vmovaps %0, %%ymm15" :: "m" (*(__m256i*)p) : "ymm15"); | |
202 | #endif | |
203 | ||
204 | store_ymm(vec256array0); | |
205 | } | |
206 | ||
207 | void | |
208 | vec256_to_string(VECTOR256 *vec, char *buf) { | |
209 | unsigned int vec_idx = 0; | |
210 | unsigned int buf_idx = 0; | |
211 | int ret = 0; | |
212 | ||
213 | for (vec_idx = 0; vec_idx < YMM_MAX; vec_idx++) { | |
214 | uint64_t a[4]; | |
215 | bcopy(&vec[vec_idx], &a[0], sizeof(a)); | |
216 | ret = sprintf( | |
217 | buf + buf_idx, | |
218 | "0x%016llx:%016llx:%016llx:%016llx\n", | |
219 | a[0], a[1], a[2], a[3] | |
220 | ); | |
221 | T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sprintf()"); | |
222 | buf_idx += ret; | |
223 | } | |
224 | } | |
225 | ||
226 | void | |
227 | assert_ymm_eq(void *a, void *b, int c) { | |
228 | if(memcmp_unoptimized(a, b, c)) { | |
229 | vec256_to_string(a, vec_str_buf); | |
230 | T_LOG("Compare failed, vector A:\n%s", vec_str_buf); | |
231 | vec256_to_string(b, vec_str_buf); | |
232 | T_LOG("Compare failed, vector B:\n%s", vec_str_buf); | |
233 | T_ASSERT_FAIL("vectors not equal"); | |
234 | } | |
235 | } | |
236 | ||
237 | void | |
238 | check_ymm(void) { | |
239 | uint32_t *p = (uint32_t *) &vec256array1[7]; | |
240 | store_ymm(vec256array1); | |
241 | if (p[0] == STOP_COOKIE_256) { | |
242 | return; | |
243 | } | |
244 | assert_ymm_eq(vec256array0, vec256array1, sizeof(vec256array0)); | |
245 | } | |
246 | ||
247 | static void | |
248 | copy_ymm_state_to_vector(X86_AVX_STATE_T *sp, VECTOR256 *vp) { | |
249 | int i; | |
250 | struct __darwin_xmm_reg *xmm = &sp->__fpu_xmm0; | |
251 | struct __darwin_xmm_reg *ymmh = &sp->__fpu_ymmh0; | |
252 | ||
253 | for (i = 0; i < YMM_MAX; i++ ) { | |
254 | bcopy(&xmm[i], &vp[i], sizeof(*xmm)); | |
255 | bcopy(&ymmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*ymmh)), sizeof(*ymmh)); | |
256 | } | |
257 | } | |
258 | ||
259 | static void | |
260 | ymm_sigalrm_handler(int signum __unused, siginfo_t *info __unused, void *ctx) | |
261 | { | |
262 | ucontext_t *contextp = (ucontext_t *) ctx; | |
263 | mcontext_t mcontext = contextp->uc_mcontext; | |
264 | X86_AVX_STATE_T *avx_state = (X86_AVX_STATE_T *) &mcontext->__fs; | |
265 | uint32_t *xp = (uint32_t *) &avx_state->__fpu_xmm7; | |
266 | uint32_t *yp = (uint32_t *) &avx_state->__fpu_ymmh7; | |
267 | ||
268 | T_LOG("Got SIGALRM"); | |
269 | ||
270 | /* Check for AVX state */ | |
271 | T_QUIET; | |
272 | T_ASSERT_GE(contextp->uc_mcsize, MCONTEXT_SIZE_256, "check context size"); | |
273 | ||
274 | /* Check that the state in the context is what's set and expected */ | |
275 | copy_ymm_state_to_vector(avx_state, vec256array3); | |
276 | assert_ymm_eq(vec256array3, vec256array0, sizeof(vec256array1)); | |
277 | ||
278 | /* Change the context and break the main loop */ | |
279 | xp[0] = STOP_COOKIE_256; | |
280 | yp[0] = STOP_COOKIE_256; | |
281 | checking = FALSE; | |
282 | } | |
283 | ||
284 | void | |
285 | ymm_integrity(int time) { | |
286 | mach_msg_type_number_t avx_count = X86_AVX_STATE_COUNT; | |
287 | kern_return_t kret; | |
288 | X86_AVX_STATE_T avx_state, avx_state2; | |
289 | mach_port_t ts = mach_thread_self(); | |
290 | ||
291 | bzero(&avx_state, sizeof(avx_state)); | |
292 | bzero(&avx_state2, sizeof(avx_state)); | |
293 | ||
294 | kret = thread_get_state( | |
295 | ts, X86_AVX_STATE_FLAVOR, (thread_state_t)&avx_state, &avx_count | |
296 | ); | |
297 | ||
298 | store_ymm(vec256array2); | |
299 | ||
300 | T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()"); | |
301 | vec256_to_string(vec256array2, vec_str_buf); | |
302 | T_LOG("Initial state:\n%s", vec_str_buf); | |
303 | ||
304 | copy_ymm_state_to_vector(&avx_state, vec256array1); | |
305 | assert_ymm_eq(vec256array2, vec256array1, sizeof(vec256array1)); | |
306 | ||
307 | populate_ymm(); | |
308 | ||
309 | kret = thread_get_state( | |
310 | ts, X86_AVX_STATE_FLAVOR, (thread_state_t)&avx_state2, &avx_count | |
311 | ); | |
312 | ||
313 | store_ymm(vec256array2); | |
314 | ||
315 | T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()"); | |
316 | vec256_to_string(vec256array2, vec_str_buf); | |
317 | T_LOG("Populated state:\n%s", vec_str_buf); | |
318 | ||
319 | copy_ymm_state_to_vector(&avx_state2, vec256array1); | |
320 | assert_ymm_eq(vec256array2, vec256array1, sizeof(vec256array0)); | |
321 | ||
322 | T_LOG("Running for %ds…", time); | |
323 | start_timer(time, ymm_sigalrm_handler); | |
324 | ||
325 | /* re-populate because printing mucks up XMMs */ | |
326 | populate_ymm(); | |
327 | ||
328 | /* Check state until timer fires */ | |
329 | while(checking) { | |
330 | check_ymm(); | |
331 | } | |
332 | ||
333 | /* Check that the sig handler changed out AVX state */ | |
334 | store_ymm(vec256array1); | |
335 | ||
336 | uint32_t *p = (uint32_t *) &vec256array1[7]; | |
337 | if (p[0] != STOP_COOKIE_256 || | |
338 | p[4] != STOP_COOKIE_256) { | |
339 | vec256_to_string(vec256array1, vec_str_buf); | |
340 | T_ASSERT_FAIL("sigreturn failed to stick"); | |
341 | T_LOG("State:\n%s", vec_str_buf); | |
342 | } | |
343 | ||
344 | T_LOG("Ran for %ds", time); | |
345 | T_PASS("No ymm register corruption occurred"); | |
346 | } | |
347 | ||
348 | /* | |
349 | * zmm functions | |
350 | */ | |
351 | ||
352 | static inline void | |
353 | store_opmask(OPMASK k[]) { | |
354 | __asm__ volatile("kmovq %%k0, %0" :"=m" (k[0])); | |
355 | __asm__ volatile("kmovq %%k1, %0" :"=m" (k[1])); | |
356 | __asm__ volatile("kmovq %%k2, %0" :"=m" (k[2])); | |
357 | __asm__ volatile("kmovq %%k3, %0" :"=m" (k[3])); | |
358 | __asm__ volatile("kmovq %%k4, %0" :"=m" (k[4])); | |
359 | __asm__ volatile("kmovq %%k5, %0" :"=m" (k[5])); | |
360 | __asm__ volatile("kmovq %%k6, %0" :"=m" (k[6])); | |
361 | __asm__ volatile("kmovq %%k7, %0" :"=m" (k[7])); | |
362 | } | |
363 | ||
364 | static inline void | |
365 | store_zmm(VECTOR512 *vecarray) { | |
366 | int i = 0; | |
367 | __asm__ volatile("vmovaps %%zmm0, %0" :"=m" (vecarray[i])); | |
368 | i++;__asm__ volatile("vmovaps %%zmm1, %0" :"=m" (vecarray[i])); | |
369 | i++;__asm__ volatile("vmovaps %%zmm2, %0" :"=m" (vecarray[i])); | |
370 | i++;__asm__ volatile("vmovaps %%zmm3, %0" :"=m" (vecarray[i])); | |
371 | i++;__asm__ volatile("vmovaps %%zmm4, %0" :"=m" (vecarray[i])); | |
372 | i++;__asm__ volatile("vmovaps %%zmm5, %0" :"=m" (vecarray[i])); | |
373 | i++;__asm__ volatile("vmovaps %%zmm6, %0" :"=m" (vecarray[i])); | |
374 | i++;__asm__ volatile("vmovaps %%zmm7, %0" :"=m" (vecarray[i])); | |
375 | #if defined(__x86_64__) | |
376 | i++;__asm__ volatile("vmovaps %%zmm8, %0" :"=m" (vecarray[i])); | |
377 | i++;__asm__ volatile("vmovaps %%zmm9, %0" :"=m" (vecarray[i])); | |
378 | i++;__asm__ volatile("vmovaps %%zmm10, %0" :"=m" (vecarray[i])); | |
379 | i++;__asm__ volatile("vmovaps %%zmm11, %0" :"=m" (vecarray[i])); | |
380 | i++;__asm__ volatile("vmovaps %%zmm12, %0" :"=m" (vecarray[i])); | |
381 | i++;__asm__ volatile("vmovaps %%zmm13, %0" :"=m" (vecarray[i])); | |
382 | i++;__asm__ volatile("vmovaps %%zmm14, %0" :"=m" (vecarray[i])); | |
383 | i++;__asm__ volatile("vmovaps %%zmm15, %0" :"=m" (vecarray[i])); | |
384 | i++;__asm__ volatile("vmovaps %%zmm16, %0" :"=m" (vecarray[i])); | |
385 | i++;__asm__ volatile("vmovaps %%zmm17, %0" :"=m" (vecarray[i])); | |
386 | i++;__asm__ volatile("vmovaps %%zmm18, %0" :"=m" (vecarray[i])); | |
387 | i++;__asm__ volatile("vmovaps %%zmm19, %0" :"=m" (vecarray[i])); | |
388 | i++;__asm__ volatile("vmovaps %%zmm20, %0" :"=m" (vecarray[i])); | |
389 | i++;__asm__ volatile("vmovaps %%zmm21, %0" :"=m" (vecarray[i])); | |
390 | i++;__asm__ volatile("vmovaps %%zmm22, %0" :"=m" (vecarray[i])); | |
391 | i++;__asm__ volatile("vmovaps %%zmm23, %0" :"=m" (vecarray[i])); | |
392 | i++;__asm__ volatile("vmovaps %%zmm24, %0" :"=m" (vecarray[i])); | |
393 | i++;__asm__ volatile("vmovaps %%zmm25, %0" :"=m" (vecarray[i])); | |
394 | i++;__asm__ volatile("vmovaps %%zmm26, %0" :"=m" (vecarray[i])); | |
395 | i++;__asm__ volatile("vmovaps %%zmm27, %0" :"=m" (vecarray[i])); | |
396 | i++;__asm__ volatile("vmovaps %%zmm28, %0" :"=m" (vecarray[i])); | |
397 | i++;__asm__ volatile("vmovaps %%zmm29, %0" :"=m" (vecarray[i])); | |
398 | i++;__asm__ volatile("vmovaps %%zmm30, %0" :"=m" (vecarray[i])); | |
399 | i++;__asm__ volatile("vmovaps %%zmm31, %0" :"=m" (vecarray[i])); | |
400 | #endif | |
401 | } | |
402 | ||
403 | static inline void | |
404 | populate_opmask(void) { | |
405 | uint64_t k[8]; | |
406 | ||
407 | for (int j = 0; j < 8; j++) | |
408 | k[j] = ((uint64_t) getpid() << 32) + (0x11111111 * j); | |
409 | ||
410 | __asm__ volatile("kmovq %0, %%k0" : :"m" (k[0])); | |
411 | __asm__ volatile("kmovq %0, %%k1" : :"m" (k[1])); | |
412 | __asm__ volatile("kmovq %0, %%k2" : :"m" (k[2])); | |
413 | __asm__ volatile("kmovq %0, %%k3" : :"m" (k[3])); | |
414 | __asm__ volatile("kmovq %0, %%k4" : :"m" (k[4])); | |
415 | __asm__ volatile("kmovq %0, %%k5" : :"m" (k[5])); | |
416 | __asm__ volatile("kmovq %0, %%k6" : :"m" (k[6])); | |
417 | __asm__ volatile("kmovq %0, %%k7" : :"m" (k[7])); | |
418 | ||
419 | store_opmask(karray0); | |
420 | } | |
421 | ||
422 | static inline void | |
423 | populate_zmm(void) { | |
424 | int j; | |
425 | uint64_t p[8] VEC512ALIGN; | |
426 | ||
427 | for (j = 0; j < (int) (sizeof(p)/sizeof(p[0])); j++) | |
428 | p[j] = ((uint64_t) getpid() << 32) + getpid(); | |
429 | ||
430 | p[0] = 0x0000000000000000ULL; | |
431 | p[2] = 0x4444444444444444ULL; | |
432 | p[4] = 0x8888888888888888ULL; | |
433 | p[7] = 0xCCCCCCCCCCCCCCCCULL; | |
434 | __asm__ volatile("vmovaps %0, %%zmm0" :: "m" (*(__m256i*)p) ); | |
435 | __asm__ volatile("vmovaps %0, %%zmm1" :: "m" (*(__m512i*)p) ); | |
436 | __asm__ volatile("vmovaps %0, %%zmm2" :: "m" (*(__m512i*)p) ); | |
437 | __asm__ volatile("vmovaps %0, %%zmm3" :: "m" (*(__m512i*)p) ); | |
438 | __asm__ volatile("vmovaps %0, %%zmm4" :: "m" (*(__m512i*)p) ); | |
439 | __asm__ volatile("vmovaps %0, %%zmm5" :: "m" (*(__m512i*)p) ); | |
440 | __asm__ volatile("vmovaps %0, %%zmm6" :: "m" (*(__m512i*)p) ); | |
441 | __asm__ volatile("vmovaps %0, %%zmm7" :: "m" (*(__m512i*)p) ); | |
442 | ||
443 | #if defined(__x86_64__) | |
444 | p[0] = 0x1111111111111111ULL; | |
445 | p[2] = 0x5555555555555555ULL; | |
446 | p[4] = 0x9999999999999999ULL; | |
447 | p[7] = 0xDDDDDDDDDDDDDDDDULL; | |
448 | __asm__ volatile("vmovaps %0, %%zmm8" :: "m" (*(__m512i*)p) ); | |
449 | __asm__ volatile("vmovaps %0, %%zmm9" :: "m" (*(__m512i*)p) ); | |
450 | __asm__ volatile("vmovaps %0, %%zmm10" :: "m" (*(__m512i*)p) ); | |
451 | __asm__ volatile("vmovaps %0, %%zmm11" :: "m" (*(__m512i*)p) ); | |
452 | __asm__ volatile("vmovaps %0, %%zmm12" :: "m" (*(__m512i*)p) ); | |
453 | __asm__ volatile("vmovaps %0, %%zmm13" :: "m" (*(__m512i*)p) ); | |
454 | __asm__ volatile("vmovaps %0, %%zmm14" :: "m" (*(__m512i*)p) ); | |
455 | __asm__ volatile("vmovaps %0, %%zmm15" :: "m" (*(__m512i*)p) ); | |
456 | ||
457 | p[0] = 0x2222222222222222ULL; | |
458 | p[2] = 0x6666666666666666ULL; | |
459 | p[4] = 0xAAAAAAAAAAAAAAAAULL; | |
460 | p[7] = 0xEEEEEEEEEEEEEEEEULL; | |
461 | __asm__ volatile("vmovaps %0, %%zmm16" :: "m" (*(__m512i*)p) ); | |
462 | __asm__ volatile("vmovaps %0, %%zmm17" :: "m" (*(__m512i*)p) ); | |
463 | __asm__ volatile("vmovaps %0, %%zmm18" :: "m" (*(__m512i*)p) ); | |
464 | __asm__ volatile("vmovaps %0, %%zmm19" :: "m" (*(__m512i*)p) ); | |
465 | __asm__ volatile("vmovaps %0, %%zmm20" :: "m" (*(__m512i*)p) ); | |
466 | __asm__ volatile("vmovaps %0, %%zmm21" :: "m" (*(__m512i*)p) ); | |
467 | __asm__ volatile("vmovaps %0, %%zmm22" :: "m" (*(__m512i*)p) ); | |
468 | __asm__ volatile("vmovaps %0, %%zmm23" :: "m" (*(__m512i*)p) ); | |
469 | ||
470 | p[0] = 0x3333333333333333ULL; | |
471 | p[2] = 0x7777777777777777ULL; | |
472 | p[4] = 0xBBBBBBBBBBBBBBBBULL; | |
473 | p[7] = 0xFFFFFFFFFFFFFFFFULL; | |
474 | __asm__ volatile("vmovaps %0, %%zmm24" :: "m" (*(__m512i*)p) ); | |
475 | __asm__ volatile("vmovaps %0, %%zmm25" :: "m" (*(__m512i*)p) ); | |
476 | __asm__ volatile("vmovaps %0, %%zmm26" :: "m" (*(__m512i*)p) ); | |
477 | __asm__ volatile("vmovaps %0, %%zmm27" :: "m" (*(__m512i*)p) ); | |
478 | __asm__ volatile("vmovaps %0, %%zmm28" :: "m" (*(__m512i*)p) ); | |
479 | __asm__ volatile("vmovaps %0, %%zmm29" :: "m" (*(__m512i*)p) ); | |
480 | __asm__ volatile("vmovaps %0, %%zmm30" :: "m" (*(__m512i*)p) ); | |
481 | __asm__ volatile("vmovaps %0, %%zmm31" :: "m" (*(__m512i*)p) ); | |
482 | #endif | |
483 | ||
484 | store_zmm(vec512array0); | |
485 | } | |
486 | ||
487 | void | |
488 | vec512_to_string(VECTOR512 *vec, char *buf) { | |
489 | unsigned int vec_idx = 0; | |
490 | unsigned int buf_idx = 0; | |
491 | int ret = 0; | |
492 | ||
493 | for (vec_idx = 0; vec_idx < ZMM_MAX; vec_idx++) { | |
494 | uint64_t a[8]; | |
495 | bcopy(&vec[vec_idx], &a[0], sizeof(a)); | |
496 | ret = sprintf( | |
497 | buf + buf_idx, | |
498 | "0x%016llx:%016llx:%016llx:%016llx:" | |
499 | "%016llx:%016llx:%016llx:%016llx%s", | |
500 | a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7], | |
501 | vec_idx < ZMM_MAX - 1 ? "\n" : "" | |
502 | ); | |
503 | T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sprintf()"); | |
504 | buf_idx += ret; | |
505 | } | |
506 | } | |
507 | ||
508 | void | |
509 | opmask_to_string(OPMASK *karray, char *buf) { | |
510 | unsigned int karray_idx = 0; | |
511 | unsigned int buf_idx = 0; | |
512 | int ret = 0; | |
513 | ||
514 | for(karray_idx = 0; karray_idx < KARRAY_MAX; karray_idx++) { | |
515 | ret = sprintf( | |
516 | buf + buf_idx, | |
517 | "k%d: 0x%016llx%s", | |
518 | karray_idx, karray[karray_idx], | |
519 | karray_idx < KARRAY_MAX ? "\n" : "" | |
520 | ); | |
521 | T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sprintf()"); | |
522 | buf_idx += ret; | |
523 | } | |
524 | } | |
525 | ||
526 | static void | |
527 | assert_zmm_eq(void *a, void *b, int c) { | |
528 | if(memcmp_unoptimized(a, b, c)) { | |
529 | vec512_to_string(a, vec_str_buf); | |
530 | T_LOG("Compare failed, vector A:\n%s", vec_str_buf); | |
531 | vec512_to_string(b, vec_str_buf); | |
532 | T_LOG("Compare failed, vector B:\n%s", vec_str_buf); | |
533 | T_ASSERT_FAIL("Vectors not equal"); | |
534 | } | |
535 | } | |
536 | ||
537 | static void | |
538 | assert_opmask_eq(OPMASK *a, OPMASK *b) { | |
539 | for (int i = 0; i < KARRAY_MAX; i++) { | |
540 | if (a[i] != b[i]) { | |
541 | opmask_to_string(a, karray_str_buf); | |
542 | T_LOG("Compare failed, opmask A:\n%s", karray_str_buf); | |
543 | opmask_to_string(b, karray_str_buf); | |
544 | T_LOG("Compare failed, opmask B:\n%s", karray_str_buf); | |
545 | T_ASSERT_FAIL("opmasks not equal"); | |
546 | } | |
547 | } | |
548 | } | |
549 | ||
550 | void | |
551 | check_zmm(void) { | |
552 | uint64_t *p = (uint64_t *) &vec512array1[7]; | |
553 | store_opmask(karray1); | |
554 | store_zmm(vec512array1); | |
555 | if (p[0] == STOP_COOKIE_512) { | |
556 | return; | |
557 | } | |
558 | ||
559 | assert_zmm_eq(vec512array0, vec512array1, sizeof(vec512array0)); | |
560 | assert_opmask_eq(karray0, karray1); | |
561 | } | |
562 | ||
563 | static void copy_state_to_opmask(X86_AVX512_STATE_T *sp, OPMASK *op) { | |
564 | OPMASK *k = (OPMASK *) &sp->__fpu_k0; | |
565 | for (int i = 0; i < KARRAY_MAX; i++) { | |
566 | bcopy(&k[i], &op[i], sizeof(*op)); | |
567 | } | |
568 | } | |
569 | ||
570 | static void copy_zmm_state_to_vector(X86_AVX512_STATE_T *sp, VECTOR512 *vp) { | |
571 | int i; | |
572 | struct __darwin_xmm_reg *xmm = &sp->__fpu_xmm0; | |
573 | struct __darwin_xmm_reg *ymmh = &sp->__fpu_ymmh0; | |
574 | struct __darwin_ymm_reg *zmmh = &sp->__fpu_zmmh0; | |
575 | #if defined(__x86_64__) | |
576 | struct __darwin_zmm_reg *zmm = &sp->__fpu_zmm16; | |
577 | ||
578 | for (i = 0; i < ZMM_MAX/2; i++ ) { | |
579 | bcopy(&xmm[i], &vp[i], sizeof(*xmm)); | |
580 | bcopy(&ymmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*ymmh)), sizeof(*ymmh)); | |
581 | bcopy(&zmmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*zmmh)), sizeof(*zmmh)); | |
582 | bcopy(&zmm[i], &vp[(ZMM_MAX/2)+i], sizeof(*zmm)); | |
583 | } | |
584 | #else | |
585 | for (i = 0; i < ZMM_MAX; i++ ) { | |
586 | bcopy(&xmm[i], &vp[i], sizeof(*xmm)); | |
587 | bcopy(&ymmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*ymmh)), sizeof(*ymmh)); | |
588 | bcopy(&zmmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*zmmh)), sizeof(*zmmh)); | |
589 | } | |
590 | #endif | |
591 | } | |
592 | ||
593 | static void | |
594 | zmm_sigalrm_handler(int signum __unused, siginfo_t *info __unused, void *ctx) | |
595 | { | |
596 | ucontext_t *contextp = (ucontext_t *) ctx; | |
597 | mcontext_t mcontext = contextp->uc_mcontext; | |
598 | X86_AVX512_STATE_T *avx_state = (X86_AVX512_STATE_T *) &mcontext->__fs; | |
599 | uint64_t *xp = (uint64_t *) &avx_state->__fpu_xmm7; | |
600 | uint64_t *yp = (uint64_t *) &avx_state->__fpu_ymmh7; | |
601 | uint64_t *zp = (uint64_t *) &avx_state->__fpu_zmmh7; | |
602 | uint64_t *kp = (uint64_t *) &avx_state->__fpu_k0; | |
603 | ||
604 | /* Check for AVX512 state */ | |
605 | T_QUIET; | |
606 | T_ASSERT_GE(contextp->uc_mcsize, MCONTEXT_SIZE_512, "check context size"); | |
607 | ||
608 | /* Check that the state in the context is what's set and expected */ | |
609 | copy_zmm_state_to_vector(avx_state, vec512array3); | |
610 | assert_zmm_eq(vec512array3, vec512array0, sizeof(vec512array1)); | |
611 | copy_state_to_opmask(avx_state, karray3); | |
612 | assert_opmask_eq(karray3, karray0); | |
613 | ||
614 | /* Change the context and break the main loop */ | |
615 | xp[0] = STOP_COOKIE_512; | |
616 | yp[0] = STOP_COOKIE_512; | |
617 | zp[0] = STOP_COOKIE_512; | |
618 | kp[7] = STOP_COOKIE_512; | |
619 | checking = FALSE; | |
620 | } | |
621 | ||
622 | void | |
623 | zmm_integrity(int time) { | |
624 | mach_msg_type_number_t avx_count = X86_AVX512_STATE_COUNT; | |
625 | kern_return_t kret; | |
626 | X86_AVX512_STATE_T avx_state, avx_state2; | |
627 | mach_port_t ts = mach_thread_self(); | |
628 | ||
629 | bzero(&avx_state, sizeof(avx_state)); | |
630 | bzero(&avx_state2, sizeof(avx_state)); | |
631 | ||
632 | store_zmm(vec512array2); | |
633 | store_opmask(karray2); | |
634 | ||
635 | kret = thread_get_state( | |
636 | ts, X86_AVX512_STATE_FLAVOR, (thread_state_t)&avx_state, &avx_count | |
637 | ); | |
638 | ||
639 | T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()"); | |
640 | vec512_to_string(vec512array2, vec_str_buf); | |
641 | opmask_to_string(karray2, karray_str_buf); | |
642 | T_LOG("Initial state:\n%s\n%s", vec_str_buf, karray_str_buf); | |
643 | ||
644 | copy_zmm_state_to_vector(&avx_state, vec512array1); | |
645 | assert_zmm_eq(vec512array2, vec512array1, sizeof(vec512array1)); | |
646 | copy_state_to_opmask(&avx_state, karray1); | |
647 | assert_opmask_eq(karray2, karray1); | |
648 | ||
649 | populate_zmm(); | |
650 | populate_opmask(); | |
651 | ||
652 | kret = thread_get_state( | |
653 | ts, X86_AVX512_STATE_FLAVOR, (thread_state_t)&avx_state2, &avx_count | |
654 | ); | |
655 | ||
656 | store_zmm(vec512array2); | |
657 | store_opmask(karray2); | |
658 | ||
659 | T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()"); | |
660 | vec512_to_string(vec512array2, vec_str_buf); | |
661 | opmask_to_string(karray2, karray_str_buf); | |
662 | T_LOG("Populated state:\n%s\n%s", vec_str_buf, karray_str_buf); | |
663 | ||
664 | copy_zmm_state_to_vector(&avx_state2, vec512array1); | |
665 | assert_zmm_eq(vec512array2, vec512array1, sizeof(vec512array1)); | |
666 | copy_state_to_opmask(&avx_state2, karray1); | |
667 | assert_opmask_eq(karray2, karray1); | |
668 | ||
669 | T_LOG("Running for %ds…", time); | |
670 | start_timer(time, zmm_sigalrm_handler); | |
671 | ||
672 | /* re-populate because printing mucks up XMMs */ | |
673 | populate_zmm(); | |
674 | populate_opmask(); | |
675 | ||
676 | /* Check state until timer fires */ | |
677 | while(checking) { | |
678 | check_zmm(); | |
679 | } | |
680 | ||
681 | /* Check that the sig handler changed our AVX state */ | |
682 | store_zmm(vec512array1); | |
683 | store_opmask(karray1); | |
684 | ||
685 | uint64_t *p = (uint64_t *) &vec512array1[7]; | |
686 | if (p[0] != STOP_COOKIE_512 || | |
687 | p[2] != STOP_COOKIE_512 || | |
688 | p[4] != STOP_COOKIE_512 || | |
689 | karray1[7] != STOP_COOKIE_512) { | |
690 | vec512_to_string(vec512array1, vec_str_buf); | |
691 | opmask_to_string(karray1, karray_str_buf); | |
692 | T_ASSERT_FAIL("sigreturn failed to stick"); | |
693 | T_LOG("State:\n%s\n%s", vec_str_buf, karray_str_buf); | |
694 | } | |
695 | ||
696 | T_LOG("Ran for %ds", time); | |
697 | T_PASS("No zmm register corruption occurred"); | |
698 | } | |
699 | ||
700 | /* | |
701 | * Main test declarations | |
702 | */ | |
703 | T_DECL(ymm_integrity, | |
704 | "Quick soak test to verify that AVX " | |
705 | "register state is maintained correctly", | |
706 | T_META_TIMEOUT(NORMAL_RUN_TIME + TIMEOUT_OVERHEAD)) { | |
707 | require_avx(); | |
708 | ymm_integrity(NORMAL_RUN_TIME); | |
709 | } | |
710 | ||
711 | T_DECL(ymm_integrity_stress, | |
712 | "Extended soak test to verify that AVX " | |
713 | "register state is maintained correctly", | |
714 | T_META_TIMEOUT(LONG_RUN_TIME + TIMEOUT_OVERHEAD), | |
715 | T_META_ENABLED(false)) { | |
716 | require_avx(); | |
717 | ymm_integrity(LONG_RUN_TIME); | |
718 | } | |
719 | ||
720 | T_DECL(zmm_integrity, | |
721 | "Quick soak test to verify that AVX-512 " | |
722 | "register state is maintained correctly", | |
723 | T_META_TIMEOUT(LONG_RUN_TIME + TIMEOUT_OVERHEAD)) { | |
724 | require_avx512(); | |
725 | zmm_integrity(NORMAL_RUN_TIME); | |
726 | } | |
727 | ||
728 | T_DECL(zmm_integrity_stress, | |
729 | "Extended soak test to verify that AVX-512 " | |
730 | "register state is maintained correctly", | |
731 | T_META_TIMEOUT(NORMAL_RUN_TIME + TIMEOUT_OVERHEAD), | |
732 | T_META_ENABLED(false)) { | |
733 | require_avx512(); | |
734 | zmm_integrity(LONG_RUN_TIME); | |
735 | } | |
736 |