]> git.saurik.com Git - apple/xnu.git/blame - tests/avx.c
xnu-6153.81.5.tar.gz
[apple/xnu.git] / tests / avx.c
CommitLineData
5ba3f43e
A
1#ifdef T_NAMESPACE
2#undef T_NAMESPACE
3#endif
4
5#include <darwintest.h>
6#include <unistd.h>
7#include <signal.h>
8#include <sys/time.h>
9#include <sys/mman.h>
10#include <immintrin.h>
11#include <mach/mach.h>
12#include <stdio.h>
13#include <string.h>
14#include <err.h>
15#include <i386/cpu_capabilities.h>
16
17T_GLOBAL_META(
18 T_META_NAMESPACE("xnu.intel"),
cb323159
A
19 T_META_CHECK_LEAKS(false),
20 T_META_RUN_CONCURRENTLY(true)
0a7de745 21 );
5ba3f43e
A
22
23#define NORMAL_RUN_TIME (10)
24#define LONG_RUN_TIME (10*60)
25#define TIMEOUT_OVERHEAD (10)
26
27volatile boolean_t checking = true;
28char vec_str_buf[8196];
29char karray_str_buf[1024];
30
31/*
32 * ymm defines/globals/prototypes
33 */
0a7de745 34#define STOP_COOKIE_256 0x01234567
5ba3f43e 35#if defined(__x86_64__)
0a7de745
A
36#define YMM_MAX 16
37#define X86_AVX_STATE_T x86_avx_state64_t
38#define X86_AVX_STATE_COUNT x86_AVX_STATE64_COUNT
39#define X86_AVX_STATE_FLAVOR x86_AVX_STATE64
40#define MCONTEXT_SIZE_256 sizeof(struct __darwin_mcontext_avx64)
5ba3f43e 41#else
0a7de745
A
42#define YMM_MAX 8
43#define X86_AVX_STATE_T x86_avx_state32_t
44#define X86_AVX_STATE_COUNT x86_AVX_STATE32_COUNT
45#define X86_AVX_STATE_FLAVOR x86_AVX_STATE32
46#define MCONTEXT_SIZE_256 sizeof(struct __darwin_mcontext_avx32)
5ba3f43e
A
47#endif
48#define VECTOR256 __m256
49#define VEC256ALIGN __attribute ((aligned(32)))
50static inline void populate_ymm(void);
51static inline void check_ymm(void);
0a7de745
A
52VECTOR256 vec256array0[YMM_MAX] VEC256ALIGN;
53VECTOR256 vec256array1[YMM_MAX] VEC256ALIGN;
54VECTOR256 vec256array2[YMM_MAX] VEC256ALIGN;
55VECTOR256 vec256array3[YMM_MAX] VEC256ALIGN;
5ba3f43e
A
56
57/*
58 * zmm defines/globals/prototypes
59 */
60#define STOP_COOKIE_512 0x0123456789abcdefULL
61#if defined(__x86_64__)
0a7de745
A
62#define ZMM_MAX 32
63#define X86_AVX512_STATE_T x86_avx512_state64_t
64#define X86_AVX512_STATE_COUNT x86_AVX512_STATE64_COUNT
65#define X86_AVX512_STATE_FLAVOR x86_AVX512_STATE64
66#define MCONTEXT_SIZE_512 sizeof(struct __darwin_mcontext_avx512_64)
5ba3f43e 67#else
0a7de745
A
68#define ZMM_MAX 8
69#define X86_AVX512_STATE_T x86_avx512_state32_t
70#define X86_AVX512_STATE_COUNT x86_AVX512_STATE32_COUNT
71#define X86_AVX512_STATE_FLAVOR x86_AVX512_STATE32
72#define MCONTEXT_SIZE_512 sizeof(struct __darwin_mcontext_avx512_32)
5ba3f43e
A
73#endif
74#define VECTOR512 __m512
75#define VEC512ALIGN __attribute ((aligned(64)))
76#define OPMASK uint64_t
77#define KARRAY_MAX 8
78static inline void populate_zmm(void);
79static inline void populate_opmask(void);
80static inline void check_zmm(void);
0a7de745
A
81VECTOR512 vec512array0[ZMM_MAX] VEC512ALIGN;
82VECTOR512 vec512array1[ZMM_MAX] VEC512ALIGN;
83VECTOR512 vec512array2[ZMM_MAX] VEC512ALIGN;
84VECTOR512 vec512array3[ZMM_MAX] VEC512ALIGN;
5ba3f43e
A
85OPMASK karray0[8];
86OPMASK karray1[8];
87OPMASK karray2[8];
88OPMASK karray3[8];
89
0a7de745
A
90kern_return_t _thread_get_state_avx(thread_t thread, int flavor, thread_state_t state,
91 mach_msg_type_number_t *state_count);
92kern_return_t _thread_get_state_avx512(thread_t thread, int flavor, thread_state_t state,
93 mach_msg_type_number_t *state_count);
5ba3f43e
A
94
95/*
96 * Common functions
97 */
98
99int
0a7de745
A
100memcmp_unoptimized(const void *s1, const void *s2, size_t n)
101{
5ba3f43e
A
102 if (n != 0) {
103 const unsigned char *p1 = s1, *p2 = s2;
104 do {
0a7de745
A
105 if (*p1++ != *p2++) {
106 return *--p1 - *--p2;
107 }
5ba3f43e
A
108 } while (--n != 0);
109 }
0a7de745 110 return 0;
5ba3f43e
A
111}
112
113void
0a7de745
A
114start_timer(int seconds, void (*handler)(int, siginfo_t *, void *))
115{
5ba3f43e
A
116 struct sigaction sigalrm_action = {
117 .sa_sigaction = handler,
118 .sa_flags = SA_RESTART,
119 .sa_mask = 0
120 };
121 struct itimerval timer = {
122 .it_value.tv_sec = seconds,
123 .it_value.tv_usec = 0,
124 .it_interval.tv_sec = 0,
125 .it_interval.tv_usec = 0
126 };
127 T_QUIET; T_WITH_ERRNO;
128 T_ASSERT_NE(sigaction(SIGALRM, &sigalrm_action, NULL), -1, NULL);
129 T_QUIET; T_WITH_ERRNO;
130 T_ASSERT_NE(setitimer(ITIMER_REAL, &timer, NULL), -1, NULL);
131}
132
133void
0a7de745
A
134require_avx(void)
135{
136 if ((_get_cpu_capabilities() & kHasAVX1_0) != kHasAVX1_0) {
5ba3f43e
A
137 T_SKIP("AVX not supported on this system");
138 }
139}
140
141void
0a7de745
A
142require_avx512(void)
143{
144 if ((_get_cpu_capabilities() & kHasAVX512F) != kHasAVX512F) {
5ba3f43e
A
145 T_SKIP("AVX-512 not supported on this system");
146 }
147}
148
149/*
150 * ymm functions
151 */
152
153static inline void
0a7de745
A
154store_ymm(VECTOR256 *vec256array)
155{
5ba3f43e 156 int i = 0;
0a7de745
A
157 __asm__ volatile ("vmovaps %%ymm0, %0" :"=m" (vec256array[i]));
158 i++; __asm__ volatile ("vmovaps %%ymm1, %0" :"=m" (vec256array[i]));
159 i++; __asm__ volatile ("vmovaps %%ymm2, %0" :"=m" (vec256array[i]));
160 i++; __asm__ volatile ("vmovaps %%ymm3, %0" :"=m" (vec256array[i]));
161 i++; __asm__ volatile ("vmovaps %%ymm4, %0" :"=m" (vec256array[i]));
162 i++; __asm__ volatile ("vmovaps %%ymm5, %0" :"=m" (vec256array[i]));
163 i++; __asm__ volatile ("vmovaps %%ymm6, %0" :"=m" (vec256array[i]));
164 i++; __asm__ volatile ("vmovaps %%ymm7, %0" :"=m" (vec256array[i]));
165#if defined(__x86_64__)
166 i++; __asm__ volatile ("vmovaps %%ymm8, %0" :"=m" (vec256array[i]));
167 i++; __asm__ volatile ("vmovaps %%ymm9, %0" :"=m" (vec256array[i]));
168 i++; __asm__ volatile ("vmovaps %%ymm10, %0" :"=m" (vec256array[i]));
169 i++; __asm__ volatile ("vmovaps %%ymm11, %0" :"=m" (vec256array[i]));
170 i++; __asm__ volatile ("vmovaps %%ymm12, %0" :"=m" (vec256array[i]));
171 i++; __asm__ volatile ("vmovaps %%ymm13, %0" :"=m" (vec256array[i]));
172 i++; __asm__ volatile ("vmovaps %%ymm14, %0" :"=m" (vec256array[i]));
173 i++; __asm__ volatile ("vmovaps %%ymm15, %0" :"=m" (vec256array[i]));
174#endif
175}
176
177static inline void
178restore_ymm(VECTOR256 *vec256array)
179{
180 VECTOR256 *p = vec256array;
181
182 __asm__ volatile ("vmovaps %0, %%ymm0" :: "m" (*(__m256i*)p) : "ymm0"); p++;
183 __asm__ volatile ("vmovaps %0, %%ymm1" :: "m" (*(__m256i*)p) : "ymm1"); p++;
184 __asm__ volatile ("vmovaps %0, %%ymm2" :: "m" (*(__m256i*)p) : "ymm2"); p++;
185 __asm__ volatile ("vmovaps %0, %%ymm3" :: "m" (*(__m256i*)p) : "ymm3"); p++;
186 __asm__ volatile ("vmovaps %0, %%ymm4" :: "m" (*(__m256i*)p) : "ymm4"); p++;
187 __asm__ volatile ("vmovaps %0, %%ymm5" :: "m" (*(__m256i*)p) : "ymm5"); p++;
188 __asm__ volatile ("vmovaps %0, %%ymm6" :: "m" (*(__m256i*)p) : "ymm6"); p++;
189 __asm__ volatile ("vmovaps %0, %%ymm7" :: "m" (*(__m256i*)p) : "ymm7");
190
5ba3f43e 191#if defined(__x86_64__)
0a7de745
A
192 ++p; __asm__ volatile ("vmovaps %0, %%ymm8" :: "m" (*(__m256i*)p) : "ymm8"); p++;
193 __asm__ volatile ("vmovaps %0, %%ymm9" :: "m" (*(__m256i*)p) : "ymm9"); p++;
194 __asm__ volatile ("vmovaps %0, %%ymm10" :: "m" (*(__m256i*)p) : "ymm10"); p++;
195 __asm__ volatile ("vmovaps %0, %%ymm11" :: "m" (*(__m256i*)p) : "ymm11"); p++;
196 __asm__ volatile ("vmovaps %0, %%ymm12" :: "m" (*(__m256i*)p) : "ymm12"); p++;
197 __asm__ volatile ("vmovaps %0, %%ymm13" :: "m" (*(__m256i*)p) : "ymm13"); p++;
198 __asm__ volatile ("vmovaps %0, %%ymm14" :: "m" (*(__m256i*)p) : "ymm14"); p++;
199 __asm__ volatile ("vmovaps %0, %%ymm15" :: "m" (*(__m256i*)p) : "ymm15");
5ba3f43e
A
200#endif
201}
202
203static inline void
0a7de745
A
204populate_ymm(void)
205{
5ba3f43e
A
206 int j;
207 uint32_t p[8] VEC256ALIGN;
208
0a7de745 209 for (j = 0; j < (int) (sizeof(p) / sizeof(p[0])); j++) {
5ba3f43e 210 p[j] = getpid();
0a7de745 211 }
5ba3f43e
A
212
213 p[0] = 0x22222222;
214 p[7] = 0x77777777;
0a7de745
A
215 __asm__ volatile ("vmovaps %0, %%ymm0" :: "m" (*(__m256i*)p) : "ymm0");
216 __asm__ volatile ("vmovaps %0, %%ymm1" :: "m" (*(__m256i*)p) : "ymm1");
217 __asm__ volatile ("vmovaps %0, %%ymm2" :: "m" (*(__m256i*)p) : "ymm2");
218 __asm__ volatile ("vmovaps %0, %%ymm3" :: "m" (*(__m256i*)p) : "ymm3");
5ba3f43e
A
219
220 p[0] = 0x44444444;
221 p[7] = 0xEEEEEEEE;
0a7de745
A
222 __asm__ volatile ("vmovaps %0, %%ymm4" :: "m" (*(__m256i*)p) : "ymm4");
223 __asm__ volatile ("vmovaps %0, %%ymm5" :: "m" (*(__m256i*)p) : "ymm5");
224 __asm__ volatile ("vmovaps %0, %%ymm6" :: "m" (*(__m256i*)p) : "ymm6");
225 __asm__ volatile ("vmovaps %0, %%ymm7" :: "m" (*(__m256i*)p) : "ymm7");
5ba3f43e
A
226
227#if defined(__x86_64__)
228 p[0] = 0x88888888;
229 p[7] = 0xAAAAAAAA;
0a7de745
A
230 __asm__ volatile ("vmovaps %0, %%ymm8" :: "m" (*(__m256i*)p) : "ymm8");
231 __asm__ volatile ("vmovaps %0, %%ymm9" :: "m" (*(__m256i*)p) : "ymm9");
232 __asm__ volatile ("vmovaps %0, %%ymm10" :: "m" (*(__m256i*)p) : "ymm10");
233 __asm__ volatile ("vmovaps %0, %%ymm11" :: "m" (*(__m256i*)p) : "ymm11");
5ba3f43e
A
234
235 p[0] = 0xBBBBBBBB;
236 p[7] = 0xCCCCCCCC;
0a7de745
A
237 __asm__ volatile ("vmovaps %0, %%ymm12" :: "m" (*(__m256i*)p) : "ymm12");
238 __asm__ volatile ("vmovaps %0, %%ymm13" :: "m" (*(__m256i*)p) : "ymm13");
239 __asm__ volatile ("vmovaps %0, %%ymm14" :: "m" (*(__m256i*)p) : "ymm14");
240 __asm__ volatile ("vmovaps %0, %%ymm15" :: "m" (*(__m256i*)p) : "ymm15");
5ba3f43e
A
241#endif
242
243 store_ymm(vec256array0);
244}
245
246void
0a7de745
A
247vec256_to_string(VECTOR256 *vec, char *buf)
248{
5ba3f43e
A
249 unsigned int vec_idx = 0;
250 unsigned int buf_idx = 0;
251 int ret = 0;
252
253 for (vec_idx = 0; vec_idx < YMM_MAX; vec_idx++) {
254 uint64_t a[4];
255 bcopy(&vec[vec_idx], &a[0], sizeof(a));
256 ret = sprintf(
257 buf + buf_idx,
258 "0x%016llx:%016llx:%016llx:%016llx\n",
259 a[0], a[1], a[2], a[3]
0a7de745 260 );
5ba3f43e
A
261 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sprintf()");
262 buf_idx += ret;
263 }
264}
265
266void
0a7de745
A
267assert_ymm_eq(void *a, void *b, int c)
268{
269 if (memcmp_unoptimized(a, b, c)) {
5ba3f43e
A
270 vec256_to_string(a, vec_str_buf);
271 T_LOG("Compare failed, vector A:\n%s", vec_str_buf);
272 vec256_to_string(b, vec_str_buf);
273 T_LOG("Compare failed, vector B:\n%s", vec_str_buf);
274 T_ASSERT_FAIL("vectors not equal");
275 }
276}
277
278void
0a7de745
A
279check_ymm(void)
280{
5ba3f43e
A
281 uint32_t *p = (uint32_t *) &vec256array1[7];
282 store_ymm(vec256array1);
283 if (p[0] == STOP_COOKIE_256) {
284 return;
285 }
286 assert_ymm_eq(vec256array0, vec256array1, sizeof(vec256array0));
287}
288
289static void
0a7de745
A
290copy_ymm_state_to_vector(X86_AVX_STATE_T *sp, VECTOR256 *vp)
291{
5ba3f43e
A
292 int i;
293 struct __darwin_xmm_reg *xmm = &sp->__fpu_xmm0;
294 struct __darwin_xmm_reg *ymmh = &sp->__fpu_ymmh0;
295
0a7de745
A
296 for (i = 0; i < YMM_MAX; i++) {
297 bcopy(&xmm[i], &vp[i], sizeof(*xmm));
5ba3f43e
A
298 bcopy(&ymmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*ymmh)), sizeof(*ymmh));
299 }
300}
301
302static void
303ymm_sigalrm_handler(int signum __unused, siginfo_t *info __unused, void *ctx)
304{
305 ucontext_t *contextp = (ucontext_t *) ctx;
306 mcontext_t mcontext = contextp->uc_mcontext;
307 X86_AVX_STATE_T *avx_state = (X86_AVX_STATE_T *) &mcontext->__fs;
308 uint32_t *xp = (uint32_t *) &avx_state->__fpu_xmm7;
309 uint32_t *yp = (uint32_t *) &avx_state->__fpu_ymmh7;
310
311 T_LOG("Got SIGALRM");
312
313 /* Check for AVX state */
314 T_QUIET;
315 T_ASSERT_GE(contextp->uc_mcsize, MCONTEXT_SIZE_256, "check context size");
316
317 /* Check that the state in the context is what's set and expected */
318 copy_ymm_state_to_vector(avx_state, vec256array3);
319 assert_ymm_eq(vec256array3, vec256array0, sizeof(vec256array1));
320
321 /* Change the context and break the main loop */
322 xp[0] = STOP_COOKIE_256;
323 yp[0] = STOP_COOKIE_256;
324 checking = FALSE;
325}
326
0a7de745
A
327kern_return_t
328_thread_get_state_avx(
329 thread_t thread,
330 int flavor,
331 thread_state_t state, /* pointer to OUT array */
332 mach_msg_type_number_t *state_count) /*IN/OUT*/
333{
334 kern_return_t rv;
335 VECTOR256 ymms[YMM_MAX];
336
337 /*
338 * We must save and restore the YMMs across thread_get_state() because
339 * code in thread_get_state changes at least one xmm register AFTER the
340 * thread_get_state has saved the state in userspace. While it's still
341 * possible for something to muck with %xmms BEFORE making the mach
342 * system call (and rendering this save/restore useless), that does not
343 * currently occur, and since we depend on the avx state saved in the
344 * thread_get_state to be the same as that manually copied from YMMs after
345 * thread_get_state returns, we have to go through these machinations.
346 */
347 store_ymm(ymms);
348
349 rv = thread_get_state(thread, flavor, state, state_count);
350
351 restore_ymm(ymms);
352
353 return rv;
354}
355
5ba3f43e 356void
0a7de745
A
357ymm_integrity(int time)
358{
5ba3f43e
A
359 mach_msg_type_number_t avx_count = X86_AVX_STATE_COUNT;
360 kern_return_t kret;
361 X86_AVX_STATE_T avx_state, avx_state2;
362 mach_port_t ts = mach_thread_self();
363
364 bzero(&avx_state, sizeof(avx_state));
365 bzero(&avx_state2, sizeof(avx_state));
366
0a7de745 367 kret = _thread_get_state_avx(
5ba3f43e 368 ts, X86_AVX_STATE_FLAVOR, (thread_state_t)&avx_state, &avx_count
0a7de745 369 );
5ba3f43e
A
370
371 store_ymm(vec256array2);
372
373 T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()");
374 vec256_to_string(vec256array2, vec_str_buf);
375 T_LOG("Initial state:\n%s", vec_str_buf);
376
377 copy_ymm_state_to_vector(&avx_state, vec256array1);
378 assert_ymm_eq(vec256array2, vec256array1, sizeof(vec256array1));
379
380 populate_ymm();
381
0a7de745 382 kret = _thread_get_state_avx(
5ba3f43e 383 ts, X86_AVX_STATE_FLAVOR, (thread_state_t)&avx_state2, &avx_count
0a7de745 384 );
5ba3f43e
A
385
386 store_ymm(vec256array2);
387
388 T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()");
389 vec256_to_string(vec256array2, vec_str_buf);
390 T_LOG("Populated state:\n%s", vec_str_buf);
391
392 copy_ymm_state_to_vector(&avx_state2, vec256array1);
393 assert_ymm_eq(vec256array2, vec256array1, sizeof(vec256array0));
394
395 T_LOG("Running for %ds…", time);
396 start_timer(time, ymm_sigalrm_handler);
397
398 /* re-populate because printing mucks up XMMs */
399 populate_ymm();
400
401 /* Check state until timer fires */
0a7de745 402 while (checking) {
5ba3f43e
A
403 check_ymm();
404 }
405
406 /* Check that the sig handler changed out AVX state */
407 store_ymm(vec256array1);
408
409 uint32_t *p = (uint32_t *) &vec256array1[7];
410 if (p[0] != STOP_COOKIE_256 ||
411 p[4] != STOP_COOKIE_256) {
412 vec256_to_string(vec256array1, vec_str_buf);
413 T_ASSERT_FAIL("sigreturn failed to stick");
414 T_LOG("State:\n%s", vec_str_buf);
415 }
416
417 T_LOG("Ran for %ds", time);
418 T_PASS("No ymm register corruption occurred");
419}
420
421/*
422 * zmm functions
423 */
424
425static inline void
0a7de745
A
426store_opmask(OPMASK k[])
427{
428 __asm__ volatile ("kmovq %%k0, %0" :"=m" (k[0]));
429 __asm__ volatile ("kmovq %%k1, %0" :"=m" (k[1]));
430 __asm__ volatile ("kmovq %%k2, %0" :"=m" (k[2]));
431 __asm__ volatile ("kmovq %%k3, %0" :"=m" (k[3]));
432 __asm__ volatile ("kmovq %%k4, %0" :"=m" (k[4]));
433 __asm__ volatile ("kmovq %%k5, %0" :"=m" (k[5]));
434 __asm__ volatile ("kmovq %%k6, %0" :"=m" (k[6]));
435 __asm__ volatile ("kmovq %%k7, %0" :"=m" (k[7]));
5ba3f43e
A
436}
437
438static inline void
0a7de745
A
439store_zmm(VECTOR512 *vecarray)
440{
5ba3f43e 441 int i = 0;
0a7de745
A
442 __asm__ volatile ("vmovaps %%zmm0, %0" :"=m" (vecarray[i]));
443 i++; __asm__ volatile ("vmovaps %%zmm1, %0" :"=m" (vecarray[i]));
444 i++; __asm__ volatile ("vmovaps %%zmm2, %0" :"=m" (vecarray[i]));
445 i++; __asm__ volatile ("vmovaps %%zmm3, %0" :"=m" (vecarray[i]));
446 i++; __asm__ volatile ("vmovaps %%zmm4, %0" :"=m" (vecarray[i]));
447 i++; __asm__ volatile ("vmovaps %%zmm5, %0" :"=m" (vecarray[i]));
448 i++; __asm__ volatile ("vmovaps %%zmm6, %0" :"=m" (vecarray[i]));
449 i++; __asm__ volatile ("vmovaps %%zmm7, %0" :"=m" (vecarray[i]));
5ba3f43e 450#if defined(__x86_64__)
0a7de745
A
451 i++; __asm__ volatile ("vmovaps %%zmm8, %0" :"=m" (vecarray[i]));
452 i++; __asm__ volatile ("vmovaps %%zmm9, %0" :"=m" (vecarray[i]));
453 i++; __asm__ volatile ("vmovaps %%zmm10, %0" :"=m" (vecarray[i]));
454 i++; __asm__ volatile ("vmovaps %%zmm11, %0" :"=m" (vecarray[i]));
455 i++; __asm__ volatile ("vmovaps %%zmm12, %0" :"=m" (vecarray[i]));
456 i++; __asm__ volatile ("vmovaps %%zmm13, %0" :"=m" (vecarray[i]));
457 i++; __asm__ volatile ("vmovaps %%zmm14, %0" :"=m" (vecarray[i]));
458 i++; __asm__ volatile ("vmovaps %%zmm15, %0" :"=m" (vecarray[i]));
459 i++; __asm__ volatile ("vmovaps %%zmm16, %0" :"=m" (vecarray[i]));
460 i++; __asm__ volatile ("vmovaps %%zmm17, %0" :"=m" (vecarray[i]));
461 i++; __asm__ volatile ("vmovaps %%zmm18, %0" :"=m" (vecarray[i]));
462 i++; __asm__ volatile ("vmovaps %%zmm19, %0" :"=m" (vecarray[i]));
463 i++; __asm__ volatile ("vmovaps %%zmm20, %0" :"=m" (vecarray[i]));
464 i++; __asm__ volatile ("vmovaps %%zmm21, %0" :"=m" (vecarray[i]));
465 i++; __asm__ volatile ("vmovaps %%zmm22, %0" :"=m" (vecarray[i]));
466 i++; __asm__ volatile ("vmovaps %%zmm23, %0" :"=m" (vecarray[i]));
467 i++; __asm__ volatile ("vmovaps %%zmm24, %0" :"=m" (vecarray[i]));
468 i++; __asm__ volatile ("vmovaps %%zmm25, %0" :"=m" (vecarray[i]));
469 i++; __asm__ volatile ("vmovaps %%zmm26, %0" :"=m" (vecarray[i]));
470 i++; __asm__ volatile ("vmovaps %%zmm27, %0" :"=m" (vecarray[i]));
471 i++; __asm__ volatile ("vmovaps %%zmm28, %0" :"=m" (vecarray[i]));
472 i++; __asm__ volatile ("vmovaps %%zmm29, %0" :"=m" (vecarray[i]));
473 i++; __asm__ volatile ("vmovaps %%zmm30, %0" :"=m" (vecarray[i]));
474 i++; __asm__ volatile ("vmovaps %%zmm31, %0" :"=m" (vecarray[i]));
5ba3f43e
A
475#endif
476}
477
478static inline void
0a7de745
A
479restore_zmm(VECTOR512 *vecarray)
480{
481 VECTOR512 *p = vecarray;
482
483 __asm__ volatile ("vmovaps %0, %%zmm0" :: "m" (*(__m512i*)p) : "zmm0"); p++;
484 __asm__ volatile ("vmovaps %0, %%zmm1" :: "m" (*(__m512i*)p) : "zmm1"); p++;
485 __asm__ volatile ("vmovaps %0, %%zmm2" :: "m" (*(__m512i*)p) : "zmm2"); p++;
486 __asm__ volatile ("vmovaps %0, %%zmm3" :: "m" (*(__m512i*)p) : "zmm3"); p++;
487 __asm__ volatile ("vmovaps %0, %%zmm4" :: "m" (*(__m512i*)p) : "zmm4"); p++;
488 __asm__ volatile ("vmovaps %0, %%zmm5" :: "m" (*(__m512i*)p) : "zmm5"); p++;
489 __asm__ volatile ("vmovaps %0, %%zmm6" :: "m" (*(__m512i*)p) : "zmm6"); p++;
490 __asm__ volatile ("vmovaps %0, %%zmm7" :: "m" (*(__m512i*)p) : "zmm7");
491
492#if defined(__x86_64__)
493 ++p; __asm__ volatile ("vmovaps %0, %%zmm8" :: "m" (*(__m512i*)p) : "zmm8"); p++;
494 __asm__ volatile ("vmovaps %0, %%zmm9" :: "m" (*(__m512i*)p) : "zmm9"); p++;
495 __asm__ volatile ("vmovaps %0, %%zmm10" :: "m" (*(__m512i*)p) : "zmm10"); p++;
496 __asm__ volatile ("vmovaps %0, %%zmm11" :: "m" (*(__m512i*)p) : "zmm11"); p++;
497 __asm__ volatile ("vmovaps %0, %%zmm12" :: "m" (*(__m512i*)p) : "zmm12"); p++;
498 __asm__ volatile ("vmovaps %0, %%zmm13" :: "m" (*(__m512i*)p) : "zmm13"); p++;
499 __asm__ volatile ("vmovaps %0, %%zmm14" :: "m" (*(__m512i*)p) : "zmm14"); p++;
500 __asm__ volatile ("vmovaps %0, %%zmm15" :: "m" (*(__m512i*)p) : "zmm15"); p++;
501 __asm__ volatile ("vmovaps %0, %%zmm16" :: "m" (*(__m512i*)p) : "zmm16"); p++;
502 __asm__ volatile ("vmovaps %0, %%zmm17" :: "m" (*(__m512i*)p) : "zmm17"); p++;
503 __asm__ volatile ("vmovaps %0, %%zmm18" :: "m" (*(__m512i*)p) : "zmm18"); p++;
504 __asm__ volatile ("vmovaps %0, %%zmm19" :: "m" (*(__m512i*)p) : "zmm19"); p++;
505 __asm__ volatile ("vmovaps %0, %%zmm20" :: "m" (*(__m512i*)p) : "zmm20"); p++;
506 __asm__ volatile ("vmovaps %0, %%zmm21" :: "m" (*(__m512i*)p) : "zmm21"); p++;
507 __asm__ volatile ("vmovaps %0, %%zmm22" :: "m" (*(__m512i*)p) : "zmm22"); p++;
508 __asm__ volatile ("vmovaps %0, %%zmm23" :: "m" (*(__m512i*)p) : "zmm23"); p++;
509 __asm__ volatile ("vmovaps %0, %%zmm24" :: "m" (*(__m512i*)p) : "zmm24"); p++;
510 __asm__ volatile ("vmovaps %0, %%zmm25" :: "m" (*(__m512i*)p) : "zmm25"); p++;
511 __asm__ volatile ("vmovaps %0, %%zmm26" :: "m" (*(__m512i*)p) : "zmm26"); p++;
512 __asm__ volatile ("vmovaps %0, %%zmm27" :: "m" (*(__m512i*)p) : "zmm27"); p++;
513 __asm__ volatile ("vmovaps %0, %%zmm28" :: "m" (*(__m512i*)p) : "zmm28"); p++;
514 __asm__ volatile ("vmovaps %0, %%zmm29" :: "m" (*(__m512i*)p) : "zmm29"); p++;
515 __asm__ volatile ("vmovaps %0, %%zmm30" :: "m" (*(__m512i*)p) : "zmm30"); p++;
516 __asm__ volatile ("vmovaps %0, %%zmm31" :: "m" (*(__m512i*)p) : "zmm31");
517#endif
518}
519
520static inline void
521populate_opmask(void)
522{
5ba3f43e
A
523 uint64_t k[8];
524
0a7de745
A
525 for (int j = 0; j < 8; j++) {
526 k[j] = ((uint64_t) getpid() << 32) + (0x11111111 * j);
527 }
5ba3f43e 528
0a7de745
A
529 __asm__ volatile ("kmovq %0, %%k0" : :"m" (k[0]));
530 __asm__ volatile ("kmovq %0, %%k1" : :"m" (k[1]));
531 __asm__ volatile ("kmovq %0, %%k2" : :"m" (k[2]));
532 __asm__ volatile ("kmovq %0, %%k3" : :"m" (k[3]));
533 __asm__ volatile ("kmovq %0, %%k4" : :"m" (k[4]));
534 __asm__ volatile ("kmovq %0, %%k5" : :"m" (k[5]));
535 __asm__ volatile ("kmovq %0, %%k6" : :"m" (k[6]));
536 __asm__ volatile ("kmovq %0, %%k7" : :"m" (k[7]));
5ba3f43e
A
537
538 store_opmask(karray0);
539}
540
0a7de745
A
541kern_return_t
542_thread_get_state_avx512(
543 thread_t thread,
544 int flavor,
545 thread_state_t state, /* pointer to OUT array */
546 mach_msg_type_number_t *state_count) /*IN/OUT*/
547{
548 kern_return_t rv;
549 VECTOR512 zmms[ZMM_MAX];
550
551 /*
552 * We must save and restore the ZMMs across thread_get_state() because
553 * code in thread_get_state changes at least one xmm register AFTER the
554 * thread_get_state has saved the state in userspace. While it's still
555 * possible for something to muck with %XMMs BEFORE making the mach
556 * system call (and rendering this save/restore useless), that does not
557 * currently occur, and since we depend on the avx512 state saved in the
558 * thread_get_state to be the same as that manually copied from ZMMs after
559 * thread_get_state returns, we have to go through these machinations.
560 */
561 store_zmm(zmms);
562
563 rv = thread_get_state(thread, flavor, state, state_count);
564
565 restore_zmm(zmms);
566
567 return rv;
568}
569
5ba3f43e 570static inline void
0a7de745
A
571populate_zmm(void)
572{
5ba3f43e
A
573 int j;
574 uint64_t p[8] VEC512ALIGN;
575
0a7de745
A
576 for (j = 0; j < (int) (sizeof(p) / sizeof(p[0])); j++) {
577 p[j] = ((uint64_t) getpid() << 32) + getpid();
578 }
5ba3f43e
A
579
580 p[0] = 0x0000000000000000ULL;
581 p[2] = 0x4444444444444444ULL;
582 p[4] = 0x8888888888888888ULL;
583 p[7] = 0xCCCCCCCCCCCCCCCCULL;
0a7de745
A
584 __asm__ volatile ("vmovaps %0, %%zmm0" :: "m" (*(__m256i*)p));
585 __asm__ volatile ("vmovaps %0, %%zmm1" :: "m" (*(__m512i*)p));
586 __asm__ volatile ("vmovaps %0, %%zmm2" :: "m" (*(__m512i*)p));
587 __asm__ volatile ("vmovaps %0, %%zmm3" :: "m" (*(__m512i*)p));
588 __asm__ volatile ("vmovaps %0, %%zmm4" :: "m" (*(__m512i*)p));
589 __asm__ volatile ("vmovaps %0, %%zmm5" :: "m" (*(__m512i*)p));
590 __asm__ volatile ("vmovaps %0, %%zmm6" :: "m" (*(__m512i*)p));
591 __asm__ volatile ("vmovaps %0, %%zmm7" :: "m" (*(__m512i*)p));
5ba3f43e
A
592
593#if defined(__x86_64__)
594 p[0] = 0x1111111111111111ULL;
595 p[2] = 0x5555555555555555ULL;
596 p[4] = 0x9999999999999999ULL;
597 p[7] = 0xDDDDDDDDDDDDDDDDULL;
0a7de745
A
598 __asm__ volatile ("vmovaps %0, %%zmm8" :: "m" (*(__m512i*)p));
599 __asm__ volatile ("vmovaps %0, %%zmm9" :: "m" (*(__m512i*)p));
600 __asm__ volatile ("vmovaps %0, %%zmm10" :: "m" (*(__m512i*)p));
601 __asm__ volatile ("vmovaps %0, %%zmm11" :: "m" (*(__m512i*)p));
602 __asm__ volatile ("vmovaps %0, %%zmm12" :: "m" (*(__m512i*)p));
603 __asm__ volatile ("vmovaps %0, %%zmm13" :: "m" (*(__m512i*)p));
604 __asm__ volatile ("vmovaps %0, %%zmm14" :: "m" (*(__m512i*)p));
605 __asm__ volatile ("vmovaps %0, %%zmm15" :: "m" (*(__m512i*)p));
5ba3f43e
A
606
607 p[0] = 0x2222222222222222ULL;
608 p[2] = 0x6666666666666666ULL;
609 p[4] = 0xAAAAAAAAAAAAAAAAULL;
610 p[7] = 0xEEEEEEEEEEEEEEEEULL;
0a7de745
A
611 __asm__ volatile ("vmovaps %0, %%zmm16" :: "m" (*(__m512i*)p));
612 __asm__ volatile ("vmovaps %0, %%zmm17" :: "m" (*(__m512i*)p));
613 __asm__ volatile ("vmovaps %0, %%zmm18" :: "m" (*(__m512i*)p));
614 __asm__ volatile ("vmovaps %0, %%zmm19" :: "m" (*(__m512i*)p));
615 __asm__ volatile ("vmovaps %0, %%zmm20" :: "m" (*(__m512i*)p));
616 __asm__ volatile ("vmovaps %0, %%zmm21" :: "m" (*(__m512i*)p));
617 __asm__ volatile ("vmovaps %0, %%zmm22" :: "m" (*(__m512i*)p));
618 __asm__ volatile ("vmovaps %0, %%zmm23" :: "m" (*(__m512i*)p));
5ba3f43e
A
619
620 p[0] = 0x3333333333333333ULL;
621 p[2] = 0x7777777777777777ULL;
622 p[4] = 0xBBBBBBBBBBBBBBBBULL;
623 p[7] = 0xFFFFFFFFFFFFFFFFULL;
0a7de745
A
624 __asm__ volatile ("vmovaps %0, %%zmm24" :: "m" (*(__m512i*)p));
625 __asm__ volatile ("vmovaps %0, %%zmm25" :: "m" (*(__m512i*)p));
626 __asm__ volatile ("vmovaps %0, %%zmm26" :: "m" (*(__m512i*)p));
627 __asm__ volatile ("vmovaps %0, %%zmm27" :: "m" (*(__m512i*)p));
628 __asm__ volatile ("vmovaps %0, %%zmm28" :: "m" (*(__m512i*)p));
629 __asm__ volatile ("vmovaps %0, %%zmm29" :: "m" (*(__m512i*)p));
630 __asm__ volatile ("vmovaps %0, %%zmm30" :: "m" (*(__m512i*)p));
631 __asm__ volatile ("vmovaps %0, %%zmm31" :: "m" (*(__m512i*)p));
5ba3f43e
A
632#endif
633
634 store_zmm(vec512array0);
635}
636
637void
0a7de745
A
638vec512_to_string(VECTOR512 *vec, char *buf)
639{
5ba3f43e
A
640 unsigned int vec_idx = 0;
641 unsigned int buf_idx = 0;
642 int ret = 0;
643
644 for (vec_idx = 0; vec_idx < ZMM_MAX; vec_idx++) {
645 uint64_t a[8];
646 bcopy(&vec[vec_idx], &a[0], sizeof(a));
647 ret = sprintf(
648 buf + buf_idx,
649 "0x%016llx:%016llx:%016llx:%016llx:"
650 "%016llx:%016llx:%016llx:%016llx%s",
651 a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7],
652 vec_idx < ZMM_MAX - 1 ? "\n" : ""
0a7de745 653 );
5ba3f43e
A
654 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sprintf()");
655 buf_idx += ret;
656 }
657}
658
659void
0a7de745
A
660opmask_to_string(OPMASK *karray, char *buf)
661{
5ba3f43e
A
662 unsigned int karray_idx = 0;
663 unsigned int buf_idx = 0;
664 int ret = 0;
665
0a7de745 666 for (karray_idx = 0; karray_idx < KARRAY_MAX; karray_idx++) {
5ba3f43e
A
667 ret = sprintf(
668 buf + buf_idx,
669 "k%d: 0x%016llx%s",
670 karray_idx, karray[karray_idx],
671 karray_idx < KARRAY_MAX ? "\n" : ""
0a7de745 672 );
5ba3f43e
A
673 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sprintf()");
674 buf_idx += ret;
675 }
676}
677
678static void
0a7de745
A
679assert_zmm_eq(void *a, void *b, int c)
680{
681 if (memcmp_unoptimized(a, b, c)) {
5ba3f43e
A
682 vec512_to_string(a, vec_str_buf);
683 T_LOG("Compare failed, vector A:\n%s", vec_str_buf);
684 vec512_to_string(b, vec_str_buf);
685 T_LOG("Compare failed, vector B:\n%s", vec_str_buf);
686 T_ASSERT_FAIL("Vectors not equal");
687 }
688}
689
690static void
0a7de745
A
691assert_opmask_eq(OPMASK *a, OPMASK *b)
692{
5ba3f43e
A
693 for (int i = 0; i < KARRAY_MAX; i++) {
694 if (a[i] != b[i]) {
695 opmask_to_string(a, karray_str_buf);
696 T_LOG("Compare failed, opmask A:\n%s", karray_str_buf);
697 opmask_to_string(b, karray_str_buf);
698 T_LOG("Compare failed, opmask B:\n%s", karray_str_buf);
699 T_ASSERT_FAIL("opmasks not equal");
700 }
701 }
702}
703
704void
0a7de745
A
705check_zmm(void)
706{
5ba3f43e
A
707 uint64_t *p = (uint64_t *) &vec512array1[7];
708 store_opmask(karray1);
709 store_zmm(vec512array1);
710 if (p[0] == STOP_COOKIE_512) {
711 return;
712 }
713
714 assert_zmm_eq(vec512array0, vec512array1, sizeof(vec512array0));
715 assert_opmask_eq(karray0, karray1);
716}
717
0a7de745
A
718static void
719copy_state_to_opmask(X86_AVX512_STATE_T *sp, OPMASK *op)
720{
5ba3f43e
A
721 OPMASK *k = (OPMASK *) &sp->__fpu_k0;
722 for (int i = 0; i < KARRAY_MAX; i++) {
723 bcopy(&k[i], &op[i], sizeof(*op));
724 }
725}
726
0a7de745
A
727static void
728copy_zmm_state_to_vector(X86_AVX512_STATE_T *sp, VECTOR512 *vp)
729{
5ba3f43e
A
730 int i;
731 struct __darwin_xmm_reg *xmm = &sp->__fpu_xmm0;
732 struct __darwin_xmm_reg *ymmh = &sp->__fpu_ymmh0;
733 struct __darwin_ymm_reg *zmmh = &sp->__fpu_zmmh0;
734#if defined(__x86_64__)
735 struct __darwin_zmm_reg *zmm = &sp->__fpu_zmm16;
736
0a7de745
A
737 for (i = 0; i < ZMM_MAX / 2; i++) {
738 bcopy(&xmm[i], &vp[i], sizeof(*xmm));
5ba3f43e
A
739 bcopy(&ymmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*ymmh)), sizeof(*ymmh));
740 bcopy(&zmmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*zmmh)), sizeof(*zmmh));
0a7de745 741 bcopy(&zmm[i], &vp[(ZMM_MAX / 2) + i], sizeof(*zmm));
5ba3f43e
A
742 }
743#else
0a7de745
A
744 for (i = 0; i < ZMM_MAX; i++) {
745 bcopy(&xmm[i], &vp[i], sizeof(*xmm));
5ba3f43e
A
746 bcopy(&ymmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*ymmh)), sizeof(*ymmh));
747 bcopy(&zmmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*zmmh)), sizeof(*zmmh));
748 }
749#endif
750}
751
752static void
753zmm_sigalrm_handler(int signum __unused, siginfo_t *info __unused, void *ctx)
754{
755 ucontext_t *contextp = (ucontext_t *) ctx;
756 mcontext_t mcontext = contextp->uc_mcontext;
757 X86_AVX512_STATE_T *avx_state = (X86_AVX512_STATE_T *) &mcontext->__fs;
758 uint64_t *xp = (uint64_t *) &avx_state->__fpu_xmm7;
759 uint64_t *yp = (uint64_t *) &avx_state->__fpu_ymmh7;
760 uint64_t *zp = (uint64_t *) &avx_state->__fpu_zmmh7;
761 uint64_t *kp = (uint64_t *) &avx_state->__fpu_k0;
762
763 /* Check for AVX512 state */
764 T_QUIET;
765 T_ASSERT_GE(contextp->uc_mcsize, MCONTEXT_SIZE_512, "check context size");
766
767 /* Check that the state in the context is what's set and expected */
768 copy_zmm_state_to_vector(avx_state, vec512array3);
769 assert_zmm_eq(vec512array3, vec512array0, sizeof(vec512array1));
770 copy_state_to_opmask(avx_state, karray3);
771 assert_opmask_eq(karray3, karray0);
772
773 /* Change the context and break the main loop */
774 xp[0] = STOP_COOKIE_512;
775 yp[0] = STOP_COOKIE_512;
776 zp[0] = STOP_COOKIE_512;
777 kp[7] = STOP_COOKIE_512;
778 checking = FALSE;
779}
780
781void
0a7de745
A
782zmm_integrity(int time)
783{
5ba3f43e
A
784 mach_msg_type_number_t avx_count = X86_AVX512_STATE_COUNT;
785 kern_return_t kret;
786 X86_AVX512_STATE_T avx_state, avx_state2;
787 mach_port_t ts = mach_thread_self();
788
789 bzero(&avx_state, sizeof(avx_state));
790 bzero(&avx_state2, sizeof(avx_state));
791
792 store_zmm(vec512array2);
793 store_opmask(karray2);
794
0a7de745 795 kret = _thread_get_state_avx512(
5ba3f43e 796 ts, X86_AVX512_STATE_FLAVOR, (thread_state_t)&avx_state, &avx_count
0a7de745 797 );
5ba3f43e
A
798
799 T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()");
800 vec512_to_string(vec512array2, vec_str_buf);
801 opmask_to_string(karray2, karray_str_buf);
802 T_LOG("Initial state:\n%s\n%s", vec_str_buf, karray_str_buf);
803
804 copy_zmm_state_to_vector(&avx_state, vec512array1);
805 assert_zmm_eq(vec512array2, vec512array1, sizeof(vec512array1));
806 copy_state_to_opmask(&avx_state, karray1);
807 assert_opmask_eq(karray2, karray1);
808
809 populate_zmm();
810 populate_opmask();
811
0a7de745 812 kret = _thread_get_state_avx512(
5ba3f43e 813 ts, X86_AVX512_STATE_FLAVOR, (thread_state_t)&avx_state2, &avx_count
0a7de745 814 );
5ba3f43e
A
815
816 store_zmm(vec512array2);
817 store_opmask(karray2);
818
819 T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()");
820 vec512_to_string(vec512array2, vec_str_buf);
821 opmask_to_string(karray2, karray_str_buf);
822 T_LOG("Populated state:\n%s\n%s", vec_str_buf, karray_str_buf);
823
824 copy_zmm_state_to_vector(&avx_state2, vec512array1);
825 assert_zmm_eq(vec512array2, vec512array1, sizeof(vec512array1));
826 copy_state_to_opmask(&avx_state2, karray1);
827 assert_opmask_eq(karray2, karray1);
828
829 T_LOG("Running for %ds…", time);
830 start_timer(time, zmm_sigalrm_handler);
831
832 /* re-populate because printing mucks up XMMs */
833 populate_zmm();
834 populate_opmask();
835
836 /* Check state until timer fires */
0a7de745 837 while (checking) {
5ba3f43e
A
838 check_zmm();
839 }
840
841 /* Check that the sig handler changed our AVX state */
842 store_zmm(vec512array1);
843 store_opmask(karray1);
844
845 uint64_t *p = (uint64_t *) &vec512array1[7];
846 if (p[0] != STOP_COOKIE_512 ||
847 p[2] != STOP_COOKIE_512 ||
848 p[4] != STOP_COOKIE_512 ||
849 karray1[7] != STOP_COOKIE_512) {
850 vec512_to_string(vec512array1, vec_str_buf);
851 opmask_to_string(karray1, karray_str_buf);
852 T_ASSERT_FAIL("sigreturn failed to stick");
853 T_LOG("State:\n%s\n%s", vec_str_buf, karray_str_buf);
854 }
855
856 T_LOG("Ran for %ds", time);
857 T_PASS("No zmm register corruption occurred");
858}
859
860/*
861 * Main test declarations
862 */
863T_DECL(ymm_integrity,
0a7de745
A
864 "Quick soak test to verify that AVX "
865 "register state is maintained correctly",
866 T_META_TIMEOUT(NORMAL_RUN_TIME + TIMEOUT_OVERHEAD)) {
5ba3f43e
A
867 require_avx();
868 ymm_integrity(NORMAL_RUN_TIME);
869}
870
871T_DECL(ymm_integrity_stress,
0a7de745
A
872 "Extended soak test to verify that AVX "
873 "register state is maintained correctly",
874 T_META_TIMEOUT(LONG_RUN_TIME + TIMEOUT_OVERHEAD),
875 T_META_ENABLED(false)) {
5ba3f43e
A
876 require_avx();
877 ymm_integrity(LONG_RUN_TIME);
878}
879
880T_DECL(zmm_integrity,
0a7de745
A
881 "Quick soak test to verify that AVX-512 "
882 "register state is maintained correctly",
883 T_META_TIMEOUT(LONG_RUN_TIME + TIMEOUT_OVERHEAD)) {
5ba3f43e
A
884 require_avx512();
885 zmm_integrity(NORMAL_RUN_TIME);
886}
887
888T_DECL(zmm_integrity_stress,
0a7de745
A
889 "Extended soak test to verify that AVX-512 "
890 "register state is maintained correctly",
891 T_META_TIMEOUT(NORMAL_RUN_TIME + TIMEOUT_OVERHEAD),
892 T_META_ENABLED(false)) {
5ba3f43e
A
893 require_avx512();
894 zmm_integrity(LONG_RUN_TIME);
895}