]> git.saurik.com Git - apple/xnu.git/blob - tools/tests/darwintests/avx.c
xnu-4570.1.46.tar.gz
[apple/xnu.git] / tools / tests / darwintests / avx.c
1 #ifdef T_NAMESPACE
2 #undef T_NAMESPACE
3 #endif
4
5 #include <darwintest.h>
6 #include <unistd.h>
7 #include <signal.h>
8 #include <sys/time.h>
9 #include <sys/mman.h>
10 #include <immintrin.h>
11 #include <mach/mach.h>
12 #include <stdio.h>
13 #include <string.h>
14 #include <err.h>
15 #include <i386/cpu_capabilities.h>
16
17 T_GLOBAL_META(
18 T_META_NAMESPACE("xnu.intel"),
19 T_META_CHECK_LEAKS(false)
20 );
21
22 #define NORMAL_RUN_TIME (10)
23 #define LONG_RUN_TIME (10*60)
24 #define TIMEOUT_OVERHEAD (10)
25
26 volatile boolean_t checking = true;
27 char vec_str_buf[8196];
28 char karray_str_buf[1024];
29
30 /*
31 * ymm defines/globals/prototypes
32 */
33 #define STOP_COOKIE_256 0x01234567
34 #if defined(__x86_64__)
35 #define YMM_MAX 16
36 #define X86_AVX_STATE_T x86_avx_state64_t
37 #define X86_AVX_STATE_COUNT x86_AVX_STATE64_COUNT
38 #define X86_AVX_STATE_FLAVOR x86_AVX_STATE64
39 #define MCONTEXT_SIZE_256 sizeof(struct __darwin_mcontext_avx64)
40 #else
41 #define YMM_MAX 8
42 #define X86_AVX_STATE_T x86_avx_state32_t
43 #define X86_AVX_STATE_COUNT x86_AVX_STATE32_COUNT
44 #define X86_AVX_STATE_FLAVOR x86_AVX_STATE32
45 #define MCONTEXT_SIZE_256 sizeof(struct __darwin_mcontext_avx32)
46 #endif
47 #define VECTOR256 __m256
48 #define VEC256ALIGN __attribute ((aligned(32)))
49 static inline void populate_ymm(void);
50 static inline void check_ymm(void);
51 VECTOR256 vec256array0[YMM_MAX] VEC256ALIGN;
52 VECTOR256 vec256array1[YMM_MAX] VEC256ALIGN;
53 VECTOR256 vec256array2[YMM_MAX] VEC256ALIGN;
54 VECTOR256 vec256array3[YMM_MAX] VEC256ALIGN;
55
56 /*
57 * zmm defines/globals/prototypes
58 */
59 #define STOP_COOKIE_512 0x0123456789abcdefULL
60 #if defined(__x86_64__)
61 #define ZMM_MAX 32
62 #define X86_AVX512_STATE_T x86_avx512_state64_t
63 #define X86_AVX512_STATE_COUNT x86_AVX512_STATE64_COUNT
64 #define X86_AVX512_STATE_FLAVOR x86_AVX512_STATE64
65 #define MCONTEXT_SIZE_512 sizeof(struct __darwin_mcontext_avx512_64)
66 #else
67 #define ZMM_MAX 8
68 #define X86_AVX512_STATE_T x86_avx512_state32_t
69 #define X86_AVX512_STATE_COUNT x86_AVX512_STATE32_COUNT
70 #define X86_AVX512_STATE_FLAVOR x86_AVX512_STATE32
71 #define MCONTEXT_SIZE_512 sizeof(struct __darwin_mcontext_avx512_32)
72 #endif
73 #define VECTOR512 __m512
74 #define VEC512ALIGN __attribute ((aligned(64)))
75 #define OPMASK uint64_t
76 #define KARRAY_MAX 8
77 static inline void populate_zmm(void);
78 static inline void populate_opmask(void);
79 static inline void check_zmm(void);
80 VECTOR512 vec512array0[ZMM_MAX] VEC512ALIGN;
81 VECTOR512 vec512array1[ZMM_MAX] VEC512ALIGN;
82 VECTOR512 vec512array2[ZMM_MAX] VEC512ALIGN;
83 VECTOR512 vec512array3[ZMM_MAX] VEC512ALIGN;
84 OPMASK karray0[8];
85 OPMASK karray1[8];
86 OPMASK karray2[8];
87 OPMASK karray3[8];
88
89
90 /*
91 * Common functions
92 */
93
94 int
95 memcmp_unoptimized(const void *s1, const void *s2, size_t n) {
96 if (n != 0) {
97 const unsigned char *p1 = s1, *p2 = s2;
98 do {
99 if (*p1++ != *p2++)
100 return (*--p1 - *--p2);
101 } while (--n != 0);
102 }
103 return (0);
104 }
105
106 void
107 start_timer(int seconds, void (*handler)(int, siginfo_t *, void *)) {
108 struct sigaction sigalrm_action = {
109 .sa_sigaction = handler,
110 .sa_flags = SA_RESTART,
111 .sa_mask = 0
112 };
113 struct itimerval timer = {
114 .it_value.tv_sec = seconds,
115 .it_value.tv_usec = 0,
116 .it_interval.tv_sec = 0,
117 .it_interval.tv_usec = 0
118 };
119 T_QUIET; T_WITH_ERRNO;
120 T_ASSERT_NE(sigaction(SIGALRM, &sigalrm_action, NULL), -1, NULL);
121 T_QUIET; T_WITH_ERRNO;
122 T_ASSERT_NE(setitimer(ITIMER_REAL, &timer, NULL), -1, NULL);
123 }
124
125 void
126 require_avx(void) {
127 if((_get_cpu_capabilities() & kHasAVX1_0) != kHasAVX1_0) {
128 T_SKIP("AVX not supported on this system");
129 }
130 }
131
132 void
133 require_avx512(void) {
134 if((_get_cpu_capabilities() & kHasAVX512F) != kHasAVX512F) {
135 T_SKIP("AVX-512 not supported on this system");
136 }
137 }
138
139 /*
140 * ymm functions
141 */
142
143 static inline void
144 store_ymm(VECTOR256 *vec256array) {
145 int i = 0;
146 __asm__ volatile("vmovaps %%ymm0, %0" :"=m" (vec256array[i]));
147 i++;__asm__ volatile("vmovaps %%ymm1, %0" :"=m" (vec256array[i]));
148 i++;__asm__ volatile("vmovaps %%ymm2, %0" :"=m" (vec256array[i]));
149 i++;__asm__ volatile("vmovaps %%ymm3, %0" :"=m" (vec256array[i]));
150 i++;__asm__ volatile("vmovaps %%ymm4, %0" :"=m" (vec256array[i]));
151 i++;__asm__ volatile("vmovaps %%ymm5, %0" :"=m" (vec256array[i]));
152 i++;__asm__ volatile("vmovaps %%ymm6, %0" :"=m" (vec256array[i]));
153 i++;__asm__ volatile("vmovaps %%ymm7, %0" :"=m" (vec256array[i]));
154 #if defined(__x86_64__)
155 i++;__asm__ volatile("vmovaps %%ymm8, %0" :"=m" (vec256array[i]));
156 i++;__asm__ volatile("vmovaps %%ymm9, %0" :"=m" (vec256array[i]));
157 i++;__asm__ volatile("vmovaps %%ymm10, %0" :"=m" (vec256array[i]));
158 i++;__asm__ volatile("vmovaps %%ymm11, %0" :"=m" (vec256array[i]));
159 i++;__asm__ volatile("vmovaps %%ymm12, %0" :"=m" (vec256array[i]));
160 i++;__asm__ volatile("vmovaps %%ymm13, %0" :"=m" (vec256array[i]));
161 i++;__asm__ volatile("vmovaps %%ymm14, %0" :"=m" (vec256array[i]));
162 i++;__asm__ volatile("vmovaps %%ymm15, %0" :"=m" (vec256array[i]));
163 #endif
164 }
165
166 static inline void
167 populate_ymm(void) {
168 int j;
169 uint32_t p[8] VEC256ALIGN;
170
171 for (j = 0; j < (int) (sizeof(p)/sizeof(p[0])); j++)
172 p[j] = getpid();
173
174 p[0] = 0x22222222;
175 p[7] = 0x77777777;
176 __asm__ volatile("vmovaps %0, %%ymm0" :: "m" (*(__m256i*)p) : "ymm0");
177 __asm__ volatile("vmovaps %0, %%ymm1" :: "m" (*(__m256i*)p) : "ymm1");
178 __asm__ volatile("vmovaps %0, %%ymm2" :: "m" (*(__m256i*)p) : "ymm2");
179 __asm__ volatile("vmovaps %0, %%ymm3" :: "m" (*(__m256i*)p) : "ymm3");
180
181 p[0] = 0x44444444;
182 p[7] = 0xEEEEEEEE;
183 __asm__ volatile("vmovaps %0, %%ymm4" :: "m" (*(__m256i*)p) : "ymm4");
184 __asm__ volatile("vmovaps %0, %%ymm5" :: "m" (*(__m256i*)p) : "ymm5");
185 __asm__ volatile("vmovaps %0, %%ymm6" :: "m" (*(__m256i*)p) : "ymm6");
186 __asm__ volatile("vmovaps %0, %%ymm7" :: "m" (*(__m256i*)p) : "ymm7");
187
188 #if defined(__x86_64__)
189 p[0] = 0x88888888;
190 p[7] = 0xAAAAAAAA;
191 __asm__ volatile("vmovaps %0, %%ymm8" :: "m" (*(__m256i*)p) : "ymm8");
192 __asm__ volatile("vmovaps %0, %%ymm9" :: "m" (*(__m256i*)p) : "ymm9");
193 __asm__ volatile("vmovaps %0, %%ymm10" :: "m" (*(__m256i*)p) : "ymm10");
194 __asm__ volatile("vmovaps %0, %%ymm11" :: "m" (*(__m256i*)p) : "ymm11");
195
196 p[0] = 0xBBBBBBBB;
197 p[7] = 0xCCCCCCCC;
198 __asm__ volatile("vmovaps %0, %%ymm12" :: "m" (*(__m256i*)p) : "ymm12");
199 __asm__ volatile("vmovaps %0, %%ymm13" :: "m" (*(__m256i*)p) : "ymm13");
200 __asm__ volatile("vmovaps %0, %%ymm14" :: "m" (*(__m256i*)p) : "ymm14");
201 __asm__ volatile("vmovaps %0, %%ymm15" :: "m" (*(__m256i*)p) : "ymm15");
202 #endif
203
204 store_ymm(vec256array0);
205 }
206
207 void
208 vec256_to_string(VECTOR256 *vec, char *buf) {
209 unsigned int vec_idx = 0;
210 unsigned int buf_idx = 0;
211 int ret = 0;
212
213 for (vec_idx = 0; vec_idx < YMM_MAX; vec_idx++) {
214 uint64_t a[4];
215 bcopy(&vec[vec_idx], &a[0], sizeof(a));
216 ret = sprintf(
217 buf + buf_idx,
218 "0x%016llx:%016llx:%016llx:%016llx\n",
219 a[0], a[1], a[2], a[3]
220 );
221 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sprintf()");
222 buf_idx += ret;
223 }
224 }
225
226 void
227 assert_ymm_eq(void *a, void *b, int c) {
228 if(memcmp_unoptimized(a, b, c)) {
229 vec256_to_string(a, vec_str_buf);
230 T_LOG("Compare failed, vector A:\n%s", vec_str_buf);
231 vec256_to_string(b, vec_str_buf);
232 T_LOG("Compare failed, vector B:\n%s", vec_str_buf);
233 T_ASSERT_FAIL("vectors not equal");
234 }
235 }
236
237 void
238 check_ymm(void) {
239 uint32_t *p = (uint32_t *) &vec256array1[7];
240 store_ymm(vec256array1);
241 if (p[0] == STOP_COOKIE_256) {
242 return;
243 }
244 assert_ymm_eq(vec256array0, vec256array1, sizeof(vec256array0));
245 }
246
247 static void
248 copy_ymm_state_to_vector(X86_AVX_STATE_T *sp, VECTOR256 *vp) {
249 int i;
250 struct __darwin_xmm_reg *xmm = &sp->__fpu_xmm0;
251 struct __darwin_xmm_reg *ymmh = &sp->__fpu_ymmh0;
252
253 for (i = 0; i < YMM_MAX; i++ ) {
254 bcopy(&xmm[i], &vp[i], sizeof(*xmm));
255 bcopy(&ymmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*ymmh)), sizeof(*ymmh));
256 }
257 }
258
259 static void
260 ymm_sigalrm_handler(int signum __unused, siginfo_t *info __unused, void *ctx)
261 {
262 ucontext_t *contextp = (ucontext_t *) ctx;
263 mcontext_t mcontext = contextp->uc_mcontext;
264 X86_AVX_STATE_T *avx_state = (X86_AVX_STATE_T *) &mcontext->__fs;
265 uint32_t *xp = (uint32_t *) &avx_state->__fpu_xmm7;
266 uint32_t *yp = (uint32_t *) &avx_state->__fpu_ymmh7;
267
268 T_LOG("Got SIGALRM");
269
270 /* Check for AVX state */
271 T_QUIET;
272 T_ASSERT_GE(contextp->uc_mcsize, MCONTEXT_SIZE_256, "check context size");
273
274 /* Check that the state in the context is what's set and expected */
275 copy_ymm_state_to_vector(avx_state, vec256array3);
276 assert_ymm_eq(vec256array3, vec256array0, sizeof(vec256array1));
277
278 /* Change the context and break the main loop */
279 xp[0] = STOP_COOKIE_256;
280 yp[0] = STOP_COOKIE_256;
281 checking = FALSE;
282 }
283
284 void
285 ymm_integrity(int time) {
286 mach_msg_type_number_t avx_count = X86_AVX_STATE_COUNT;
287 kern_return_t kret;
288 X86_AVX_STATE_T avx_state, avx_state2;
289 mach_port_t ts = mach_thread_self();
290
291 bzero(&avx_state, sizeof(avx_state));
292 bzero(&avx_state2, sizeof(avx_state));
293
294 kret = thread_get_state(
295 ts, X86_AVX_STATE_FLAVOR, (thread_state_t)&avx_state, &avx_count
296 );
297
298 store_ymm(vec256array2);
299
300 T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()");
301 vec256_to_string(vec256array2, vec_str_buf);
302 T_LOG("Initial state:\n%s", vec_str_buf);
303
304 copy_ymm_state_to_vector(&avx_state, vec256array1);
305 assert_ymm_eq(vec256array2, vec256array1, sizeof(vec256array1));
306
307 populate_ymm();
308
309 kret = thread_get_state(
310 ts, X86_AVX_STATE_FLAVOR, (thread_state_t)&avx_state2, &avx_count
311 );
312
313 store_ymm(vec256array2);
314
315 T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()");
316 vec256_to_string(vec256array2, vec_str_buf);
317 T_LOG("Populated state:\n%s", vec_str_buf);
318
319 copy_ymm_state_to_vector(&avx_state2, vec256array1);
320 assert_ymm_eq(vec256array2, vec256array1, sizeof(vec256array0));
321
322 T_LOG("Running for %ds…", time);
323 start_timer(time, ymm_sigalrm_handler);
324
325 /* re-populate because printing mucks up XMMs */
326 populate_ymm();
327
328 /* Check state until timer fires */
329 while(checking) {
330 check_ymm();
331 }
332
333 /* Check that the sig handler changed out AVX state */
334 store_ymm(vec256array1);
335
336 uint32_t *p = (uint32_t *) &vec256array1[7];
337 if (p[0] != STOP_COOKIE_256 ||
338 p[4] != STOP_COOKIE_256) {
339 vec256_to_string(vec256array1, vec_str_buf);
340 T_ASSERT_FAIL("sigreturn failed to stick");
341 T_LOG("State:\n%s", vec_str_buf);
342 }
343
344 T_LOG("Ran for %ds", time);
345 T_PASS("No ymm register corruption occurred");
346 }
347
348 /*
349 * zmm functions
350 */
351
352 static inline void
353 store_opmask(OPMASK k[]) {
354 __asm__ volatile("kmovq %%k0, %0" :"=m" (k[0]));
355 __asm__ volatile("kmovq %%k1, %0" :"=m" (k[1]));
356 __asm__ volatile("kmovq %%k2, %0" :"=m" (k[2]));
357 __asm__ volatile("kmovq %%k3, %0" :"=m" (k[3]));
358 __asm__ volatile("kmovq %%k4, %0" :"=m" (k[4]));
359 __asm__ volatile("kmovq %%k5, %0" :"=m" (k[5]));
360 __asm__ volatile("kmovq %%k6, %0" :"=m" (k[6]));
361 __asm__ volatile("kmovq %%k7, %0" :"=m" (k[7]));
362 }
363
364 static inline void
365 store_zmm(VECTOR512 *vecarray) {
366 int i = 0;
367 __asm__ volatile("vmovaps %%zmm0, %0" :"=m" (vecarray[i]));
368 i++;__asm__ volatile("vmovaps %%zmm1, %0" :"=m" (vecarray[i]));
369 i++;__asm__ volatile("vmovaps %%zmm2, %0" :"=m" (vecarray[i]));
370 i++;__asm__ volatile("vmovaps %%zmm3, %0" :"=m" (vecarray[i]));
371 i++;__asm__ volatile("vmovaps %%zmm4, %0" :"=m" (vecarray[i]));
372 i++;__asm__ volatile("vmovaps %%zmm5, %0" :"=m" (vecarray[i]));
373 i++;__asm__ volatile("vmovaps %%zmm6, %0" :"=m" (vecarray[i]));
374 i++;__asm__ volatile("vmovaps %%zmm7, %0" :"=m" (vecarray[i]));
375 #if defined(__x86_64__)
376 i++;__asm__ volatile("vmovaps %%zmm8, %0" :"=m" (vecarray[i]));
377 i++;__asm__ volatile("vmovaps %%zmm9, %0" :"=m" (vecarray[i]));
378 i++;__asm__ volatile("vmovaps %%zmm10, %0" :"=m" (vecarray[i]));
379 i++;__asm__ volatile("vmovaps %%zmm11, %0" :"=m" (vecarray[i]));
380 i++;__asm__ volatile("vmovaps %%zmm12, %0" :"=m" (vecarray[i]));
381 i++;__asm__ volatile("vmovaps %%zmm13, %0" :"=m" (vecarray[i]));
382 i++;__asm__ volatile("vmovaps %%zmm14, %0" :"=m" (vecarray[i]));
383 i++;__asm__ volatile("vmovaps %%zmm15, %0" :"=m" (vecarray[i]));
384 i++;__asm__ volatile("vmovaps %%zmm16, %0" :"=m" (vecarray[i]));
385 i++;__asm__ volatile("vmovaps %%zmm17, %0" :"=m" (vecarray[i]));
386 i++;__asm__ volatile("vmovaps %%zmm18, %0" :"=m" (vecarray[i]));
387 i++;__asm__ volatile("vmovaps %%zmm19, %0" :"=m" (vecarray[i]));
388 i++;__asm__ volatile("vmovaps %%zmm20, %0" :"=m" (vecarray[i]));
389 i++;__asm__ volatile("vmovaps %%zmm21, %0" :"=m" (vecarray[i]));
390 i++;__asm__ volatile("vmovaps %%zmm22, %0" :"=m" (vecarray[i]));
391 i++;__asm__ volatile("vmovaps %%zmm23, %0" :"=m" (vecarray[i]));
392 i++;__asm__ volatile("vmovaps %%zmm24, %0" :"=m" (vecarray[i]));
393 i++;__asm__ volatile("vmovaps %%zmm25, %0" :"=m" (vecarray[i]));
394 i++;__asm__ volatile("vmovaps %%zmm26, %0" :"=m" (vecarray[i]));
395 i++;__asm__ volatile("vmovaps %%zmm27, %0" :"=m" (vecarray[i]));
396 i++;__asm__ volatile("vmovaps %%zmm28, %0" :"=m" (vecarray[i]));
397 i++;__asm__ volatile("vmovaps %%zmm29, %0" :"=m" (vecarray[i]));
398 i++;__asm__ volatile("vmovaps %%zmm30, %0" :"=m" (vecarray[i]));
399 i++;__asm__ volatile("vmovaps %%zmm31, %0" :"=m" (vecarray[i]));
400 #endif
401 }
402
403 static inline void
404 populate_opmask(void) {
405 uint64_t k[8];
406
407 for (int j = 0; j < 8; j++)
408 k[j] = ((uint64_t) getpid() << 32) + (0x11111111 * j);
409
410 __asm__ volatile("kmovq %0, %%k0" : :"m" (k[0]));
411 __asm__ volatile("kmovq %0, %%k1" : :"m" (k[1]));
412 __asm__ volatile("kmovq %0, %%k2" : :"m" (k[2]));
413 __asm__ volatile("kmovq %0, %%k3" : :"m" (k[3]));
414 __asm__ volatile("kmovq %0, %%k4" : :"m" (k[4]));
415 __asm__ volatile("kmovq %0, %%k5" : :"m" (k[5]));
416 __asm__ volatile("kmovq %0, %%k6" : :"m" (k[6]));
417 __asm__ volatile("kmovq %0, %%k7" : :"m" (k[7]));
418
419 store_opmask(karray0);
420 }
421
422 static inline void
423 populate_zmm(void) {
424 int j;
425 uint64_t p[8] VEC512ALIGN;
426
427 for (j = 0; j < (int) (sizeof(p)/sizeof(p[0])); j++)
428 p[j] = ((uint64_t) getpid() << 32) + getpid();
429
430 p[0] = 0x0000000000000000ULL;
431 p[2] = 0x4444444444444444ULL;
432 p[4] = 0x8888888888888888ULL;
433 p[7] = 0xCCCCCCCCCCCCCCCCULL;
434 __asm__ volatile("vmovaps %0, %%zmm0" :: "m" (*(__m256i*)p) );
435 __asm__ volatile("vmovaps %0, %%zmm1" :: "m" (*(__m512i*)p) );
436 __asm__ volatile("vmovaps %0, %%zmm2" :: "m" (*(__m512i*)p) );
437 __asm__ volatile("vmovaps %0, %%zmm3" :: "m" (*(__m512i*)p) );
438 __asm__ volatile("vmovaps %0, %%zmm4" :: "m" (*(__m512i*)p) );
439 __asm__ volatile("vmovaps %0, %%zmm5" :: "m" (*(__m512i*)p) );
440 __asm__ volatile("vmovaps %0, %%zmm6" :: "m" (*(__m512i*)p) );
441 __asm__ volatile("vmovaps %0, %%zmm7" :: "m" (*(__m512i*)p) );
442
443 #if defined(__x86_64__)
444 p[0] = 0x1111111111111111ULL;
445 p[2] = 0x5555555555555555ULL;
446 p[4] = 0x9999999999999999ULL;
447 p[7] = 0xDDDDDDDDDDDDDDDDULL;
448 __asm__ volatile("vmovaps %0, %%zmm8" :: "m" (*(__m512i*)p) );
449 __asm__ volatile("vmovaps %0, %%zmm9" :: "m" (*(__m512i*)p) );
450 __asm__ volatile("vmovaps %0, %%zmm10" :: "m" (*(__m512i*)p) );
451 __asm__ volatile("vmovaps %0, %%zmm11" :: "m" (*(__m512i*)p) );
452 __asm__ volatile("vmovaps %0, %%zmm12" :: "m" (*(__m512i*)p) );
453 __asm__ volatile("vmovaps %0, %%zmm13" :: "m" (*(__m512i*)p) );
454 __asm__ volatile("vmovaps %0, %%zmm14" :: "m" (*(__m512i*)p) );
455 __asm__ volatile("vmovaps %0, %%zmm15" :: "m" (*(__m512i*)p) );
456
457 p[0] = 0x2222222222222222ULL;
458 p[2] = 0x6666666666666666ULL;
459 p[4] = 0xAAAAAAAAAAAAAAAAULL;
460 p[7] = 0xEEEEEEEEEEEEEEEEULL;
461 __asm__ volatile("vmovaps %0, %%zmm16" :: "m" (*(__m512i*)p) );
462 __asm__ volatile("vmovaps %0, %%zmm17" :: "m" (*(__m512i*)p) );
463 __asm__ volatile("vmovaps %0, %%zmm18" :: "m" (*(__m512i*)p) );
464 __asm__ volatile("vmovaps %0, %%zmm19" :: "m" (*(__m512i*)p) );
465 __asm__ volatile("vmovaps %0, %%zmm20" :: "m" (*(__m512i*)p) );
466 __asm__ volatile("vmovaps %0, %%zmm21" :: "m" (*(__m512i*)p) );
467 __asm__ volatile("vmovaps %0, %%zmm22" :: "m" (*(__m512i*)p) );
468 __asm__ volatile("vmovaps %0, %%zmm23" :: "m" (*(__m512i*)p) );
469
470 p[0] = 0x3333333333333333ULL;
471 p[2] = 0x7777777777777777ULL;
472 p[4] = 0xBBBBBBBBBBBBBBBBULL;
473 p[7] = 0xFFFFFFFFFFFFFFFFULL;
474 __asm__ volatile("vmovaps %0, %%zmm24" :: "m" (*(__m512i*)p) );
475 __asm__ volatile("vmovaps %0, %%zmm25" :: "m" (*(__m512i*)p) );
476 __asm__ volatile("vmovaps %0, %%zmm26" :: "m" (*(__m512i*)p) );
477 __asm__ volatile("vmovaps %0, %%zmm27" :: "m" (*(__m512i*)p) );
478 __asm__ volatile("vmovaps %0, %%zmm28" :: "m" (*(__m512i*)p) );
479 __asm__ volatile("vmovaps %0, %%zmm29" :: "m" (*(__m512i*)p) );
480 __asm__ volatile("vmovaps %0, %%zmm30" :: "m" (*(__m512i*)p) );
481 __asm__ volatile("vmovaps %0, %%zmm31" :: "m" (*(__m512i*)p) );
482 #endif
483
484 store_zmm(vec512array0);
485 }
486
487 void
488 vec512_to_string(VECTOR512 *vec, char *buf) {
489 unsigned int vec_idx = 0;
490 unsigned int buf_idx = 0;
491 int ret = 0;
492
493 for (vec_idx = 0; vec_idx < ZMM_MAX; vec_idx++) {
494 uint64_t a[8];
495 bcopy(&vec[vec_idx], &a[0], sizeof(a));
496 ret = sprintf(
497 buf + buf_idx,
498 "0x%016llx:%016llx:%016llx:%016llx:"
499 "%016llx:%016llx:%016llx:%016llx%s",
500 a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7],
501 vec_idx < ZMM_MAX - 1 ? "\n" : ""
502 );
503 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sprintf()");
504 buf_idx += ret;
505 }
506 }
507
508 void
509 opmask_to_string(OPMASK *karray, char *buf) {
510 unsigned int karray_idx = 0;
511 unsigned int buf_idx = 0;
512 int ret = 0;
513
514 for(karray_idx = 0; karray_idx < KARRAY_MAX; karray_idx++) {
515 ret = sprintf(
516 buf + buf_idx,
517 "k%d: 0x%016llx%s",
518 karray_idx, karray[karray_idx],
519 karray_idx < KARRAY_MAX ? "\n" : ""
520 );
521 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sprintf()");
522 buf_idx += ret;
523 }
524 }
525
526 static void
527 assert_zmm_eq(void *a, void *b, int c) {
528 if(memcmp_unoptimized(a, b, c)) {
529 vec512_to_string(a, vec_str_buf);
530 T_LOG("Compare failed, vector A:\n%s", vec_str_buf);
531 vec512_to_string(b, vec_str_buf);
532 T_LOG("Compare failed, vector B:\n%s", vec_str_buf);
533 T_ASSERT_FAIL("Vectors not equal");
534 }
535 }
536
537 static void
538 assert_opmask_eq(OPMASK *a, OPMASK *b) {
539 for (int i = 0; i < KARRAY_MAX; i++) {
540 if (a[i] != b[i]) {
541 opmask_to_string(a, karray_str_buf);
542 T_LOG("Compare failed, opmask A:\n%s", karray_str_buf);
543 opmask_to_string(b, karray_str_buf);
544 T_LOG("Compare failed, opmask B:\n%s", karray_str_buf);
545 T_ASSERT_FAIL("opmasks not equal");
546 }
547 }
548 }
549
550 void
551 check_zmm(void) {
552 uint64_t *p = (uint64_t *) &vec512array1[7];
553 store_opmask(karray1);
554 store_zmm(vec512array1);
555 if (p[0] == STOP_COOKIE_512) {
556 return;
557 }
558
559 assert_zmm_eq(vec512array0, vec512array1, sizeof(vec512array0));
560 assert_opmask_eq(karray0, karray1);
561 }
562
563 static void copy_state_to_opmask(X86_AVX512_STATE_T *sp, OPMASK *op) {
564 OPMASK *k = (OPMASK *) &sp->__fpu_k0;
565 for (int i = 0; i < KARRAY_MAX; i++) {
566 bcopy(&k[i], &op[i], sizeof(*op));
567 }
568 }
569
570 static void copy_zmm_state_to_vector(X86_AVX512_STATE_T *sp, VECTOR512 *vp) {
571 int i;
572 struct __darwin_xmm_reg *xmm = &sp->__fpu_xmm0;
573 struct __darwin_xmm_reg *ymmh = &sp->__fpu_ymmh0;
574 struct __darwin_ymm_reg *zmmh = &sp->__fpu_zmmh0;
575 #if defined(__x86_64__)
576 struct __darwin_zmm_reg *zmm = &sp->__fpu_zmm16;
577
578 for (i = 0; i < ZMM_MAX/2; i++ ) {
579 bcopy(&xmm[i], &vp[i], sizeof(*xmm));
580 bcopy(&ymmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*ymmh)), sizeof(*ymmh));
581 bcopy(&zmmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*zmmh)), sizeof(*zmmh));
582 bcopy(&zmm[i], &vp[(ZMM_MAX/2)+i], sizeof(*zmm));
583 }
584 #else
585 for (i = 0; i < ZMM_MAX; i++ ) {
586 bcopy(&xmm[i], &vp[i], sizeof(*xmm));
587 bcopy(&ymmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*ymmh)), sizeof(*ymmh));
588 bcopy(&zmmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*zmmh)), sizeof(*zmmh));
589 }
590 #endif
591 }
592
593 static void
594 zmm_sigalrm_handler(int signum __unused, siginfo_t *info __unused, void *ctx)
595 {
596 ucontext_t *contextp = (ucontext_t *) ctx;
597 mcontext_t mcontext = contextp->uc_mcontext;
598 X86_AVX512_STATE_T *avx_state = (X86_AVX512_STATE_T *) &mcontext->__fs;
599 uint64_t *xp = (uint64_t *) &avx_state->__fpu_xmm7;
600 uint64_t *yp = (uint64_t *) &avx_state->__fpu_ymmh7;
601 uint64_t *zp = (uint64_t *) &avx_state->__fpu_zmmh7;
602 uint64_t *kp = (uint64_t *) &avx_state->__fpu_k0;
603
604 /* Check for AVX512 state */
605 T_QUIET;
606 T_ASSERT_GE(contextp->uc_mcsize, MCONTEXT_SIZE_512, "check context size");
607
608 /* Check that the state in the context is what's set and expected */
609 copy_zmm_state_to_vector(avx_state, vec512array3);
610 assert_zmm_eq(vec512array3, vec512array0, sizeof(vec512array1));
611 copy_state_to_opmask(avx_state, karray3);
612 assert_opmask_eq(karray3, karray0);
613
614 /* Change the context and break the main loop */
615 xp[0] = STOP_COOKIE_512;
616 yp[0] = STOP_COOKIE_512;
617 zp[0] = STOP_COOKIE_512;
618 kp[7] = STOP_COOKIE_512;
619 checking = FALSE;
620 }
621
622 void
623 zmm_integrity(int time) {
624 mach_msg_type_number_t avx_count = X86_AVX512_STATE_COUNT;
625 kern_return_t kret;
626 X86_AVX512_STATE_T avx_state, avx_state2;
627 mach_port_t ts = mach_thread_self();
628
629 bzero(&avx_state, sizeof(avx_state));
630 bzero(&avx_state2, sizeof(avx_state));
631
632 store_zmm(vec512array2);
633 store_opmask(karray2);
634
635 kret = thread_get_state(
636 ts, X86_AVX512_STATE_FLAVOR, (thread_state_t)&avx_state, &avx_count
637 );
638
639 T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()");
640 vec512_to_string(vec512array2, vec_str_buf);
641 opmask_to_string(karray2, karray_str_buf);
642 T_LOG("Initial state:\n%s\n%s", vec_str_buf, karray_str_buf);
643
644 copy_zmm_state_to_vector(&avx_state, vec512array1);
645 assert_zmm_eq(vec512array2, vec512array1, sizeof(vec512array1));
646 copy_state_to_opmask(&avx_state, karray1);
647 assert_opmask_eq(karray2, karray1);
648
649 populate_zmm();
650 populate_opmask();
651
652 kret = thread_get_state(
653 ts, X86_AVX512_STATE_FLAVOR, (thread_state_t)&avx_state2, &avx_count
654 );
655
656 store_zmm(vec512array2);
657 store_opmask(karray2);
658
659 T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()");
660 vec512_to_string(vec512array2, vec_str_buf);
661 opmask_to_string(karray2, karray_str_buf);
662 T_LOG("Populated state:\n%s\n%s", vec_str_buf, karray_str_buf);
663
664 copy_zmm_state_to_vector(&avx_state2, vec512array1);
665 assert_zmm_eq(vec512array2, vec512array1, sizeof(vec512array1));
666 copy_state_to_opmask(&avx_state2, karray1);
667 assert_opmask_eq(karray2, karray1);
668
669 T_LOG("Running for %ds…", time);
670 start_timer(time, zmm_sigalrm_handler);
671
672 /* re-populate because printing mucks up XMMs */
673 populate_zmm();
674 populate_opmask();
675
676 /* Check state until timer fires */
677 while(checking) {
678 check_zmm();
679 }
680
681 /* Check that the sig handler changed our AVX state */
682 store_zmm(vec512array1);
683 store_opmask(karray1);
684
685 uint64_t *p = (uint64_t *) &vec512array1[7];
686 if (p[0] != STOP_COOKIE_512 ||
687 p[2] != STOP_COOKIE_512 ||
688 p[4] != STOP_COOKIE_512 ||
689 karray1[7] != STOP_COOKIE_512) {
690 vec512_to_string(vec512array1, vec_str_buf);
691 opmask_to_string(karray1, karray_str_buf);
692 T_ASSERT_FAIL("sigreturn failed to stick");
693 T_LOG("State:\n%s\n%s", vec_str_buf, karray_str_buf);
694 }
695
696 T_LOG("Ran for %ds", time);
697 T_PASS("No zmm register corruption occurred");
698 }
699
700 /*
701 * Main test declarations
702 */
703 T_DECL(ymm_integrity,
704 "Quick soak test to verify that AVX "
705 "register state is maintained correctly",
706 T_META_TIMEOUT(NORMAL_RUN_TIME + TIMEOUT_OVERHEAD)) {
707 require_avx();
708 ymm_integrity(NORMAL_RUN_TIME);
709 }
710
711 T_DECL(ymm_integrity_stress,
712 "Extended soak test to verify that AVX "
713 "register state is maintained correctly",
714 T_META_TIMEOUT(LONG_RUN_TIME + TIMEOUT_OVERHEAD),
715 T_META_ENABLED(false)) {
716 require_avx();
717 ymm_integrity(LONG_RUN_TIME);
718 }
719
720 T_DECL(zmm_integrity,
721 "Quick soak test to verify that AVX-512 "
722 "register state is maintained correctly",
723 T_META_TIMEOUT(LONG_RUN_TIME + TIMEOUT_OVERHEAD)) {
724 require_avx512();
725 zmm_integrity(NORMAL_RUN_TIME);
726 }
727
728 T_DECL(zmm_integrity_stress,
729 "Extended soak test to verify that AVX-512 "
730 "register state is maintained correctly",
731 T_META_TIMEOUT(NORMAL_RUN_TIME + TIMEOUT_OVERHEAD),
732 T_META_ENABLED(false)) {
733 require_avx512();
734 zmm_integrity(LONG_RUN_TIME);
735 }
736