5 #include <darwintest.h>
10 #include <immintrin.h>
11 #include <mach/mach.h>
15 #include <i386/cpu_capabilities.h>
18 T_META_NAMESPACE("xnu.intel"),
19 T_META_CHECK_LEAKS(false),
20 T_META_RUN_CONCURRENTLY(true)
23 #define NORMAL_RUN_TIME (10)
24 #define LONG_RUN_TIME (10*60)
25 #define TIMEOUT_OVERHEAD (10)
27 volatile boolean_t checking
= true;
28 char vec_str_buf
[8196];
29 char karray_str_buf
[1024];
32 * ymm defines/globals/prototypes
34 #define STOP_COOKIE_256 0x01234567
35 #if defined(__x86_64__)
37 #define X86_AVX_STATE_T x86_avx_state64_t
38 #define X86_AVX_STATE_COUNT x86_AVX_STATE64_COUNT
39 #define X86_AVX_STATE_FLAVOR x86_AVX_STATE64
40 #define MCONTEXT_SIZE_256 sizeof(struct __darwin_mcontext_avx64)
43 #define X86_AVX_STATE_T x86_avx_state32_t
44 #define X86_AVX_STATE_COUNT x86_AVX_STATE32_COUNT
45 #define X86_AVX_STATE_FLAVOR x86_AVX_STATE32
46 #define MCONTEXT_SIZE_256 sizeof(struct __darwin_mcontext_avx32)
48 #define VECTOR256 __m256
49 #define VEC256ALIGN __attribute ((aligned(32)))
50 static inline void populate_ymm(void);
51 static inline void check_ymm(void);
52 VECTOR256 vec256array0
[YMM_MAX
] VEC256ALIGN
;
53 VECTOR256 vec256array1
[YMM_MAX
] VEC256ALIGN
;
54 VECTOR256 vec256array2
[YMM_MAX
] VEC256ALIGN
;
55 VECTOR256 vec256array3
[YMM_MAX
] VEC256ALIGN
;
58 * zmm defines/globals/prototypes
60 #define STOP_COOKIE_512 0x0123456789abcdefULL
61 #if defined(__x86_64__)
63 #define X86_AVX512_STATE_T x86_avx512_state64_t
64 #define X86_AVX512_STATE_COUNT x86_AVX512_STATE64_COUNT
65 #define X86_AVX512_STATE_FLAVOR x86_AVX512_STATE64
66 #define MCONTEXT_SIZE_512 sizeof(struct __darwin_mcontext_avx512_64)
69 #define X86_AVX512_STATE_T x86_avx512_state32_t
70 #define X86_AVX512_STATE_COUNT x86_AVX512_STATE32_COUNT
71 #define X86_AVX512_STATE_FLAVOR x86_AVX512_STATE32
72 #define MCONTEXT_SIZE_512 sizeof(struct __darwin_mcontext_avx512_32)
74 #define VECTOR512 __m512
75 #define VEC512ALIGN __attribute ((aligned(64)))
76 #define OPMASK uint64_t
78 static inline void populate_zmm(void);
79 static inline void populate_opmask(void);
80 static inline void check_zmm(void);
81 VECTOR512 vec512array0
[ZMM_MAX
] VEC512ALIGN
;
82 VECTOR512 vec512array1
[ZMM_MAX
] VEC512ALIGN
;
83 VECTOR512 vec512array2
[ZMM_MAX
] VEC512ALIGN
;
84 VECTOR512 vec512array3
[ZMM_MAX
] VEC512ALIGN
;
90 kern_return_t
_thread_get_state_avx(thread_t thread
, int flavor
, thread_state_t state
,
91 mach_msg_type_number_t
*state_count
);
92 kern_return_t
_thread_get_state_avx512(thread_t thread
, int flavor
, thread_state_t state
,
93 mach_msg_type_number_t
*state_count
);
100 memcmp_unoptimized(const void *s1
, const void *s2
, size_t n
)
103 const unsigned char *p1
= s1
, *p2
= s2
;
105 if (*p1
++ != *p2
++) {
106 return *--p1
- *--p2
;
114 start_timer(int seconds
, void (*handler
)(int, siginfo_t
*, void *))
116 struct sigaction sigalrm_action
= {
117 .sa_sigaction
= handler
,
118 .sa_flags
= SA_RESTART
,
121 struct itimerval timer
= {
122 .it_value
.tv_sec
= seconds
,
123 .it_value
.tv_usec
= 0,
124 .it_interval
.tv_sec
= 0,
125 .it_interval
.tv_usec
= 0
127 T_QUIET
; T_WITH_ERRNO
;
128 T_ASSERT_NE(sigaction(SIGALRM
, &sigalrm_action
, NULL
), -1, NULL
);
129 T_QUIET
; T_WITH_ERRNO
;
130 T_ASSERT_NE(setitimer(ITIMER_REAL
, &timer
, NULL
), -1, NULL
);
136 if ((_get_cpu_capabilities() & kHasAVX1_0
) != kHasAVX1_0
) {
137 T_SKIP("AVX not supported on this system");
144 if ((_get_cpu_capabilities() & kHasAVX512F
) != kHasAVX512F
) {
145 T_SKIP("AVX-512 not supported on this system");
154 store_ymm(VECTOR256
*vec256array
)
157 __asm__
volatile ("vmovaps %%ymm0, %0" :"=m" (vec256array
[i
]));
158 i
++; __asm__
volatile ("vmovaps %%ymm1, %0" :"=m" (vec256array
[i
]));
159 i
++; __asm__
volatile ("vmovaps %%ymm2, %0" :"=m" (vec256array
[i
]));
160 i
++; __asm__
volatile ("vmovaps %%ymm3, %0" :"=m" (vec256array
[i
]));
161 i
++; __asm__
volatile ("vmovaps %%ymm4, %0" :"=m" (vec256array
[i
]));
162 i
++; __asm__
volatile ("vmovaps %%ymm5, %0" :"=m" (vec256array
[i
]));
163 i
++; __asm__
volatile ("vmovaps %%ymm6, %0" :"=m" (vec256array
[i
]));
164 i
++; __asm__
volatile ("vmovaps %%ymm7, %0" :"=m" (vec256array
[i
]));
165 #if defined(__x86_64__)
166 i
++; __asm__
volatile ("vmovaps %%ymm8, %0" :"=m" (vec256array
[i
]));
167 i
++; __asm__
volatile ("vmovaps %%ymm9, %0" :"=m" (vec256array
[i
]));
168 i
++; __asm__
volatile ("vmovaps %%ymm10, %0" :"=m" (vec256array
[i
]));
169 i
++; __asm__
volatile ("vmovaps %%ymm11, %0" :"=m" (vec256array
[i
]));
170 i
++; __asm__
volatile ("vmovaps %%ymm12, %0" :"=m" (vec256array
[i
]));
171 i
++; __asm__
volatile ("vmovaps %%ymm13, %0" :"=m" (vec256array
[i
]));
172 i
++; __asm__
volatile ("vmovaps %%ymm14, %0" :"=m" (vec256array
[i
]));
173 i
++; __asm__
volatile ("vmovaps %%ymm15, %0" :"=m" (vec256array
[i
]));
178 restore_ymm(VECTOR256
*vec256array
)
180 VECTOR256
*p
= vec256array
;
182 __asm__
volatile ("vmovaps %0, %%ymm0" :: "m" (*(__m256i
*)p
) : "ymm0"); p
++;
183 __asm__
volatile ("vmovaps %0, %%ymm1" :: "m" (*(__m256i
*)p
) : "ymm1"); p
++;
184 __asm__
volatile ("vmovaps %0, %%ymm2" :: "m" (*(__m256i
*)p
) : "ymm2"); p
++;
185 __asm__
volatile ("vmovaps %0, %%ymm3" :: "m" (*(__m256i
*)p
) : "ymm3"); p
++;
186 __asm__
volatile ("vmovaps %0, %%ymm4" :: "m" (*(__m256i
*)p
) : "ymm4"); p
++;
187 __asm__
volatile ("vmovaps %0, %%ymm5" :: "m" (*(__m256i
*)p
) : "ymm5"); p
++;
188 __asm__
volatile ("vmovaps %0, %%ymm6" :: "m" (*(__m256i
*)p
) : "ymm6"); p
++;
189 __asm__
volatile ("vmovaps %0, %%ymm7" :: "m" (*(__m256i
*)p
) : "ymm7");
191 #if defined(__x86_64__)
192 ++p
; __asm__
volatile ("vmovaps %0, %%ymm8" :: "m" (*(__m256i
*)p
) : "ymm8"); p
++;
193 __asm__
volatile ("vmovaps %0, %%ymm9" :: "m" (*(__m256i
*)p
) : "ymm9"); p
++;
194 __asm__
volatile ("vmovaps %0, %%ymm10" :: "m" (*(__m256i
*)p
) : "ymm10"); p
++;
195 __asm__
volatile ("vmovaps %0, %%ymm11" :: "m" (*(__m256i
*)p
) : "ymm11"); p
++;
196 __asm__
volatile ("vmovaps %0, %%ymm12" :: "m" (*(__m256i
*)p
) : "ymm12"); p
++;
197 __asm__
volatile ("vmovaps %0, %%ymm13" :: "m" (*(__m256i
*)p
) : "ymm13"); p
++;
198 __asm__
volatile ("vmovaps %0, %%ymm14" :: "m" (*(__m256i
*)p
) : "ymm14"); p
++;
199 __asm__
volatile ("vmovaps %0, %%ymm15" :: "m" (*(__m256i
*)p
) : "ymm15");
207 uint32_t p
[8] VEC256ALIGN
;
209 for (j
= 0; j
< (int) (sizeof(p
) / sizeof(p
[0])); j
++) {
215 __asm__
volatile ("vmovaps %0, %%ymm0" :: "m" (*(__m256i
*)p
) : "ymm0");
216 __asm__
volatile ("vmovaps %0, %%ymm1" :: "m" (*(__m256i
*)p
) : "ymm1");
217 __asm__
volatile ("vmovaps %0, %%ymm2" :: "m" (*(__m256i
*)p
) : "ymm2");
218 __asm__
volatile ("vmovaps %0, %%ymm3" :: "m" (*(__m256i
*)p
) : "ymm3");
222 __asm__
volatile ("vmovaps %0, %%ymm4" :: "m" (*(__m256i
*)p
) : "ymm4");
223 __asm__
volatile ("vmovaps %0, %%ymm5" :: "m" (*(__m256i
*)p
) : "ymm5");
224 __asm__
volatile ("vmovaps %0, %%ymm6" :: "m" (*(__m256i
*)p
) : "ymm6");
225 __asm__
volatile ("vmovaps %0, %%ymm7" :: "m" (*(__m256i
*)p
) : "ymm7");
227 #if defined(__x86_64__)
230 __asm__
volatile ("vmovaps %0, %%ymm8" :: "m" (*(__m256i
*)p
) : "ymm8");
231 __asm__
volatile ("vmovaps %0, %%ymm9" :: "m" (*(__m256i
*)p
) : "ymm9");
232 __asm__
volatile ("vmovaps %0, %%ymm10" :: "m" (*(__m256i
*)p
) : "ymm10");
233 __asm__
volatile ("vmovaps %0, %%ymm11" :: "m" (*(__m256i
*)p
) : "ymm11");
237 __asm__
volatile ("vmovaps %0, %%ymm12" :: "m" (*(__m256i
*)p
) : "ymm12");
238 __asm__
volatile ("vmovaps %0, %%ymm13" :: "m" (*(__m256i
*)p
) : "ymm13");
239 __asm__
volatile ("vmovaps %0, %%ymm14" :: "m" (*(__m256i
*)p
) : "ymm14");
240 __asm__
volatile ("vmovaps %0, %%ymm15" :: "m" (*(__m256i
*)p
) : "ymm15");
243 store_ymm(vec256array0
);
247 vec256_to_string(VECTOR256
*vec
, char *buf
)
249 unsigned int vec_idx
= 0;
250 unsigned int buf_idx
= 0;
253 for (vec_idx
= 0; vec_idx
< YMM_MAX
; vec_idx
++) {
255 bcopy(&vec
[vec_idx
], &a
[0], sizeof(a
));
258 "0x%016llx:%016llx:%016llx:%016llx\n",
259 a
[0], a
[1], a
[2], a
[3]
261 T_QUIET
; T_ASSERT_POSIX_SUCCESS(ret
, "sprintf()");
267 assert_ymm_eq(void *a
, void *b
, int c
)
269 if (memcmp_unoptimized(a
, b
, c
)) {
270 vec256_to_string(a
, vec_str_buf
);
271 T_LOG("Compare failed, vector A:\n%s", vec_str_buf
);
272 vec256_to_string(b
, vec_str_buf
);
273 T_LOG("Compare failed, vector B:\n%s", vec_str_buf
);
274 T_ASSERT_FAIL("vectors not equal");
281 uint32_t *p
= (uint32_t *) &vec256array1
[7];
282 store_ymm(vec256array1
);
283 if (p
[0] == STOP_COOKIE_256
) {
286 assert_ymm_eq(vec256array0
, vec256array1
, sizeof(vec256array0
));
290 copy_ymm_state_to_vector(X86_AVX_STATE_T
*sp
, VECTOR256
*vp
)
293 struct __darwin_xmm_reg
*xmm
= &sp
->__fpu_xmm0
;
294 struct __darwin_xmm_reg
*ymmh
= &sp
->__fpu_ymmh0
;
296 for (i
= 0; i
< YMM_MAX
; i
++) {
297 bcopy(&xmm
[i
], &vp
[i
], sizeof(*xmm
));
298 bcopy(&ymmh
[i
], (void *) ((uint64_t)&vp
[i
] + sizeof(*ymmh
)), sizeof(*ymmh
));
303 ymm_sigalrm_handler(int signum __unused
, siginfo_t
*info __unused
, void *ctx
)
305 ucontext_t
*contextp
= (ucontext_t
*) ctx
;
306 mcontext_t mcontext
= contextp
->uc_mcontext
;
307 X86_AVX_STATE_T
*avx_state
= (X86_AVX_STATE_T
*) &mcontext
->__fs
;
308 uint32_t *xp
= (uint32_t *) &avx_state
->__fpu_xmm7
;
309 uint32_t *yp
= (uint32_t *) &avx_state
->__fpu_ymmh7
;
311 T_LOG("Got SIGALRM");
313 /* Check for AVX state */
315 T_ASSERT_GE(contextp
->uc_mcsize
, MCONTEXT_SIZE_256
, "check context size");
317 /* Check that the state in the context is what's set and expected */
318 copy_ymm_state_to_vector(avx_state
, vec256array3
);
319 assert_ymm_eq(vec256array3
, vec256array0
, sizeof(vec256array1
));
321 /* Change the context and break the main loop */
322 xp
[0] = STOP_COOKIE_256
;
323 yp
[0] = STOP_COOKIE_256
;
328 _thread_get_state_avx(
331 thread_state_t state
, /* pointer to OUT array */
332 mach_msg_type_number_t
*state_count
) /*IN/OUT*/
335 VECTOR256 ymms
[YMM_MAX
];
338 * We must save and restore the YMMs across thread_get_state() because
339 * code in thread_get_state changes at least one xmm register AFTER the
340 * thread_get_state has saved the state in userspace. While it's still
341 * possible for something to muck with %xmms BEFORE making the mach
342 * system call (and rendering this save/restore useless), that does not
343 * currently occur, and since we depend on the avx state saved in the
344 * thread_get_state to be the same as that manually copied from YMMs after
345 * thread_get_state returns, we have to go through these machinations.
349 rv
= thread_get_state(thread
, flavor
, state
, state_count
);
357 ymm_integrity(int time
)
359 mach_msg_type_number_t avx_count
= X86_AVX_STATE_COUNT
;
361 X86_AVX_STATE_T avx_state
, avx_state2
;
362 mach_port_t ts
= mach_thread_self();
364 bzero(&avx_state
, sizeof(avx_state
));
365 bzero(&avx_state2
, sizeof(avx_state
));
367 kret
= _thread_get_state_avx(
368 ts
, X86_AVX_STATE_FLAVOR
, (thread_state_t
)&avx_state
, &avx_count
371 store_ymm(vec256array2
);
373 T_QUIET
; T_ASSERT_MACH_SUCCESS(kret
, "thread_get_state()");
374 vec256_to_string(vec256array2
, vec_str_buf
);
375 T_LOG("Initial state:\n%s", vec_str_buf
);
377 copy_ymm_state_to_vector(&avx_state
, vec256array1
);
378 assert_ymm_eq(vec256array2
, vec256array1
, sizeof(vec256array1
));
382 kret
= _thread_get_state_avx(
383 ts
, X86_AVX_STATE_FLAVOR
, (thread_state_t
)&avx_state2
, &avx_count
386 store_ymm(vec256array2
);
388 T_QUIET
; T_ASSERT_MACH_SUCCESS(kret
, "thread_get_state()");
389 vec256_to_string(vec256array2
, vec_str_buf
);
390 T_LOG("Populated state:\n%s", vec_str_buf
);
392 copy_ymm_state_to_vector(&avx_state2
, vec256array1
);
393 assert_ymm_eq(vec256array2
, vec256array1
, sizeof(vec256array0
));
395 T_LOG("Running for %ds…", time
);
396 start_timer(time
, ymm_sigalrm_handler
);
398 /* re-populate because printing mucks up XMMs */
401 /* Check state until timer fires */
406 /* Check that the sig handler changed out AVX state */
407 store_ymm(vec256array1
);
409 uint32_t *p
= (uint32_t *) &vec256array1
[7];
410 if (p
[0] != STOP_COOKIE_256
||
411 p
[4] != STOP_COOKIE_256
) {
412 vec256_to_string(vec256array1
, vec_str_buf
);
413 T_ASSERT_FAIL("sigreturn failed to stick");
414 T_LOG("State:\n%s", vec_str_buf
);
417 T_LOG("Ran for %ds", time
);
418 T_PASS("No ymm register corruption occurred");
426 store_opmask(OPMASK k
[])
428 __asm__
volatile ("kmovq %%k0, %0" :"=m" (k
[0]));
429 __asm__
volatile ("kmovq %%k1, %0" :"=m" (k
[1]));
430 __asm__
volatile ("kmovq %%k2, %0" :"=m" (k
[2]));
431 __asm__
volatile ("kmovq %%k3, %0" :"=m" (k
[3]));
432 __asm__
volatile ("kmovq %%k4, %0" :"=m" (k
[4]));
433 __asm__
volatile ("kmovq %%k5, %0" :"=m" (k
[5]));
434 __asm__
volatile ("kmovq %%k6, %0" :"=m" (k
[6]));
435 __asm__
volatile ("kmovq %%k7, %0" :"=m" (k
[7]));
439 store_zmm(VECTOR512
*vecarray
)
442 __asm__
volatile ("vmovaps %%zmm0, %0" :"=m" (vecarray
[i
]));
443 i
++; __asm__
volatile ("vmovaps %%zmm1, %0" :"=m" (vecarray
[i
]));
444 i
++; __asm__
volatile ("vmovaps %%zmm2, %0" :"=m" (vecarray
[i
]));
445 i
++; __asm__
volatile ("vmovaps %%zmm3, %0" :"=m" (vecarray
[i
]));
446 i
++; __asm__
volatile ("vmovaps %%zmm4, %0" :"=m" (vecarray
[i
]));
447 i
++; __asm__
volatile ("vmovaps %%zmm5, %0" :"=m" (vecarray
[i
]));
448 i
++; __asm__
volatile ("vmovaps %%zmm6, %0" :"=m" (vecarray
[i
]));
449 i
++; __asm__
volatile ("vmovaps %%zmm7, %0" :"=m" (vecarray
[i
]));
450 #if defined(__x86_64__)
451 i
++; __asm__
volatile ("vmovaps %%zmm8, %0" :"=m" (vecarray
[i
]));
452 i
++; __asm__
volatile ("vmovaps %%zmm9, %0" :"=m" (vecarray
[i
]));
453 i
++; __asm__
volatile ("vmovaps %%zmm10, %0" :"=m" (vecarray
[i
]));
454 i
++; __asm__
volatile ("vmovaps %%zmm11, %0" :"=m" (vecarray
[i
]));
455 i
++; __asm__
volatile ("vmovaps %%zmm12, %0" :"=m" (vecarray
[i
]));
456 i
++; __asm__
volatile ("vmovaps %%zmm13, %0" :"=m" (vecarray
[i
]));
457 i
++; __asm__
volatile ("vmovaps %%zmm14, %0" :"=m" (vecarray
[i
]));
458 i
++; __asm__
volatile ("vmovaps %%zmm15, %0" :"=m" (vecarray
[i
]));
459 i
++; __asm__
volatile ("vmovaps %%zmm16, %0" :"=m" (vecarray
[i
]));
460 i
++; __asm__
volatile ("vmovaps %%zmm17, %0" :"=m" (vecarray
[i
]));
461 i
++; __asm__
volatile ("vmovaps %%zmm18, %0" :"=m" (vecarray
[i
]));
462 i
++; __asm__
volatile ("vmovaps %%zmm19, %0" :"=m" (vecarray
[i
]));
463 i
++; __asm__
volatile ("vmovaps %%zmm20, %0" :"=m" (vecarray
[i
]));
464 i
++; __asm__
volatile ("vmovaps %%zmm21, %0" :"=m" (vecarray
[i
]));
465 i
++; __asm__
volatile ("vmovaps %%zmm22, %0" :"=m" (vecarray
[i
]));
466 i
++; __asm__
volatile ("vmovaps %%zmm23, %0" :"=m" (vecarray
[i
]));
467 i
++; __asm__
volatile ("vmovaps %%zmm24, %0" :"=m" (vecarray
[i
]));
468 i
++; __asm__
volatile ("vmovaps %%zmm25, %0" :"=m" (vecarray
[i
]));
469 i
++; __asm__
volatile ("vmovaps %%zmm26, %0" :"=m" (vecarray
[i
]));
470 i
++; __asm__
volatile ("vmovaps %%zmm27, %0" :"=m" (vecarray
[i
]));
471 i
++; __asm__
volatile ("vmovaps %%zmm28, %0" :"=m" (vecarray
[i
]));
472 i
++; __asm__
volatile ("vmovaps %%zmm29, %0" :"=m" (vecarray
[i
]));
473 i
++; __asm__
volatile ("vmovaps %%zmm30, %0" :"=m" (vecarray
[i
]));
474 i
++; __asm__
volatile ("vmovaps %%zmm31, %0" :"=m" (vecarray
[i
]));
479 restore_zmm(VECTOR512
*vecarray
)
481 VECTOR512
*p
= vecarray
;
483 __asm__
volatile ("vmovaps %0, %%zmm0" :: "m" (*(__m512i
*)p
) : "zmm0"); p
++;
484 __asm__
volatile ("vmovaps %0, %%zmm1" :: "m" (*(__m512i
*)p
) : "zmm1"); p
++;
485 __asm__
volatile ("vmovaps %0, %%zmm2" :: "m" (*(__m512i
*)p
) : "zmm2"); p
++;
486 __asm__
volatile ("vmovaps %0, %%zmm3" :: "m" (*(__m512i
*)p
) : "zmm3"); p
++;
487 __asm__
volatile ("vmovaps %0, %%zmm4" :: "m" (*(__m512i
*)p
) : "zmm4"); p
++;
488 __asm__
volatile ("vmovaps %0, %%zmm5" :: "m" (*(__m512i
*)p
) : "zmm5"); p
++;
489 __asm__
volatile ("vmovaps %0, %%zmm6" :: "m" (*(__m512i
*)p
) : "zmm6"); p
++;
490 __asm__
volatile ("vmovaps %0, %%zmm7" :: "m" (*(__m512i
*)p
) : "zmm7");
492 #if defined(__x86_64__)
493 ++p
; __asm__
volatile ("vmovaps %0, %%zmm8" :: "m" (*(__m512i
*)p
) : "zmm8"); p
++;
494 __asm__
volatile ("vmovaps %0, %%zmm9" :: "m" (*(__m512i
*)p
) : "zmm9"); p
++;
495 __asm__
volatile ("vmovaps %0, %%zmm10" :: "m" (*(__m512i
*)p
) : "zmm10"); p
++;
496 __asm__
volatile ("vmovaps %0, %%zmm11" :: "m" (*(__m512i
*)p
) : "zmm11"); p
++;
497 __asm__
volatile ("vmovaps %0, %%zmm12" :: "m" (*(__m512i
*)p
) : "zmm12"); p
++;
498 __asm__
volatile ("vmovaps %0, %%zmm13" :: "m" (*(__m512i
*)p
) : "zmm13"); p
++;
499 __asm__
volatile ("vmovaps %0, %%zmm14" :: "m" (*(__m512i
*)p
) : "zmm14"); p
++;
500 __asm__
volatile ("vmovaps %0, %%zmm15" :: "m" (*(__m512i
*)p
) : "zmm15"); p
++;
501 __asm__
volatile ("vmovaps %0, %%zmm16" :: "m" (*(__m512i
*)p
) : "zmm16"); p
++;
502 __asm__
volatile ("vmovaps %0, %%zmm17" :: "m" (*(__m512i
*)p
) : "zmm17"); p
++;
503 __asm__
volatile ("vmovaps %0, %%zmm18" :: "m" (*(__m512i
*)p
) : "zmm18"); p
++;
504 __asm__
volatile ("vmovaps %0, %%zmm19" :: "m" (*(__m512i
*)p
) : "zmm19"); p
++;
505 __asm__
volatile ("vmovaps %0, %%zmm20" :: "m" (*(__m512i
*)p
) : "zmm20"); p
++;
506 __asm__
volatile ("vmovaps %0, %%zmm21" :: "m" (*(__m512i
*)p
) : "zmm21"); p
++;
507 __asm__
volatile ("vmovaps %0, %%zmm22" :: "m" (*(__m512i
*)p
) : "zmm22"); p
++;
508 __asm__
volatile ("vmovaps %0, %%zmm23" :: "m" (*(__m512i
*)p
) : "zmm23"); p
++;
509 __asm__
volatile ("vmovaps %0, %%zmm24" :: "m" (*(__m512i
*)p
) : "zmm24"); p
++;
510 __asm__
volatile ("vmovaps %0, %%zmm25" :: "m" (*(__m512i
*)p
) : "zmm25"); p
++;
511 __asm__
volatile ("vmovaps %0, %%zmm26" :: "m" (*(__m512i
*)p
) : "zmm26"); p
++;
512 __asm__
volatile ("vmovaps %0, %%zmm27" :: "m" (*(__m512i
*)p
) : "zmm27"); p
++;
513 __asm__
volatile ("vmovaps %0, %%zmm28" :: "m" (*(__m512i
*)p
) : "zmm28"); p
++;
514 __asm__
volatile ("vmovaps %0, %%zmm29" :: "m" (*(__m512i
*)p
) : "zmm29"); p
++;
515 __asm__
volatile ("vmovaps %0, %%zmm30" :: "m" (*(__m512i
*)p
) : "zmm30"); p
++;
516 __asm__
volatile ("vmovaps %0, %%zmm31" :: "m" (*(__m512i
*)p
) : "zmm31");
521 populate_opmask(void)
525 for (int j
= 0; j
< 8; j
++) {
526 k
[j
] = ((uint64_t) getpid() << 32) + (0x11111111 * j
);
529 __asm__
volatile ("kmovq %0, %%k0" : :"m" (k
[0]));
530 __asm__
volatile ("kmovq %0, %%k1" : :"m" (k
[1]));
531 __asm__
volatile ("kmovq %0, %%k2" : :"m" (k
[2]));
532 __asm__
volatile ("kmovq %0, %%k3" : :"m" (k
[3]));
533 __asm__
volatile ("kmovq %0, %%k4" : :"m" (k
[4]));
534 __asm__
volatile ("kmovq %0, %%k5" : :"m" (k
[5]));
535 __asm__
volatile ("kmovq %0, %%k6" : :"m" (k
[6]));
536 __asm__
volatile ("kmovq %0, %%k7" : :"m" (k
[7]));
538 store_opmask(karray0
);
542 _thread_get_state_avx512(
545 thread_state_t state
, /* pointer to OUT array */
546 mach_msg_type_number_t
*state_count
) /*IN/OUT*/
549 VECTOR512 zmms
[ZMM_MAX
];
552 * We must save and restore the ZMMs across thread_get_state() because
553 * code in thread_get_state changes at least one xmm register AFTER the
554 * thread_get_state has saved the state in userspace. While it's still
555 * possible for something to muck with %XMMs BEFORE making the mach
556 * system call (and rendering this save/restore useless), that does not
557 * currently occur, and since we depend on the avx512 state saved in the
558 * thread_get_state to be the same as that manually copied from ZMMs after
559 * thread_get_state returns, we have to go through these machinations.
563 rv
= thread_get_state(thread
, flavor
, state
, state_count
);
574 uint64_t p
[8] VEC512ALIGN
;
576 for (j
= 0; j
< (int) (sizeof(p
) / sizeof(p
[0])); j
++) {
577 p
[j
] = ((uint64_t) getpid() << 32) + getpid();
580 p
[0] = 0x0000000000000000ULL
;
581 p
[2] = 0x4444444444444444ULL
;
582 p
[4] = 0x8888888888888888ULL
;
583 p
[7] = 0xCCCCCCCCCCCCCCCCULL
;
584 __asm__
volatile ("vmovaps %0, %%zmm0" :: "m" (*(__m256i
*)p
));
585 __asm__
volatile ("vmovaps %0, %%zmm1" :: "m" (*(__m512i
*)p
));
586 __asm__
volatile ("vmovaps %0, %%zmm2" :: "m" (*(__m512i
*)p
));
587 __asm__
volatile ("vmovaps %0, %%zmm3" :: "m" (*(__m512i
*)p
));
588 __asm__
volatile ("vmovaps %0, %%zmm4" :: "m" (*(__m512i
*)p
));
589 __asm__
volatile ("vmovaps %0, %%zmm5" :: "m" (*(__m512i
*)p
));
590 __asm__
volatile ("vmovaps %0, %%zmm6" :: "m" (*(__m512i
*)p
));
591 __asm__
volatile ("vmovaps %0, %%zmm7" :: "m" (*(__m512i
*)p
));
593 #if defined(__x86_64__)
594 p
[0] = 0x1111111111111111ULL
;
595 p
[2] = 0x5555555555555555ULL
;
596 p
[4] = 0x9999999999999999ULL
;
597 p
[7] = 0xDDDDDDDDDDDDDDDDULL
;
598 __asm__
volatile ("vmovaps %0, %%zmm8" :: "m" (*(__m512i
*)p
));
599 __asm__
volatile ("vmovaps %0, %%zmm9" :: "m" (*(__m512i
*)p
));
600 __asm__
volatile ("vmovaps %0, %%zmm10" :: "m" (*(__m512i
*)p
));
601 __asm__
volatile ("vmovaps %0, %%zmm11" :: "m" (*(__m512i
*)p
));
602 __asm__
volatile ("vmovaps %0, %%zmm12" :: "m" (*(__m512i
*)p
));
603 __asm__
volatile ("vmovaps %0, %%zmm13" :: "m" (*(__m512i
*)p
));
604 __asm__
volatile ("vmovaps %0, %%zmm14" :: "m" (*(__m512i
*)p
));
605 __asm__
volatile ("vmovaps %0, %%zmm15" :: "m" (*(__m512i
*)p
));
607 p
[0] = 0x2222222222222222ULL
;
608 p
[2] = 0x6666666666666666ULL
;
609 p
[4] = 0xAAAAAAAAAAAAAAAAULL
;
610 p
[7] = 0xEEEEEEEEEEEEEEEEULL
;
611 __asm__
volatile ("vmovaps %0, %%zmm16" :: "m" (*(__m512i
*)p
));
612 __asm__
volatile ("vmovaps %0, %%zmm17" :: "m" (*(__m512i
*)p
));
613 __asm__
volatile ("vmovaps %0, %%zmm18" :: "m" (*(__m512i
*)p
));
614 __asm__
volatile ("vmovaps %0, %%zmm19" :: "m" (*(__m512i
*)p
));
615 __asm__
volatile ("vmovaps %0, %%zmm20" :: "m" (*(__m512i
*)p
));
616 __asm__
volatile ("vmovaps %0, %%zmm21" :: "m" (*(__m512i
*)p
));
617 __asm__
volatile ("vmovaps %0, %%zmm22" :: "m" (*(__m512i
*)p
));
618 __asm__
volatile ("vmovaps %0, %%zmm23" :: "m" (*(__m512i
*)p
));
620 p
[0] = 0x3333333333333333ULL
;
621 p
[2] = 0x7777777777777777ULL
;
622 p
[4] = 0xBBBBBBBBBBBBBBBBULL
;
623 p
[7] = 0xFFFFFFFFFFFFFFFFULL
;
624 __asm__
volatile ("vmovaps %0, %%zmm24" :: "m" (*(__m512i
*)p
));
625 __asm__
volatile ("vmovaps %0, %%zmm25" :: "m" (*(__m512i
*)p
));
626 __asm__
volatile ("vmovaps %0, %%zmm26" :: "m" (*(__m512i
*)p
));
627 __asm__
volatile ("vmovaps %0, %%zmm27" :: "m" (*(__m512i
*)p
));
628 __asm__
volatile ("vmovaps %0, %%zmm28" :: "m" (*(__m512i
*)p
));
629 __asm__
volatile ("vmovaps %0, %%zmm29" :: "m" (*(__m512i
*)p
));
630 __asm__
volatile ("vmovaps %0, %%zmm30" :: "m" (*(__m512i
*)p
));
631 __asm__
volatile ("vmovaps %0, %%zmm31" :: "m" (*(__m512i
*)p
));
634 store_zmm(vec512array0
);
638 vec512_to_string(VECTOR512
*vec
, char *buf
)
640 unsigned int vec_idx
= 0;
641 unsigned int buf_idx
= 0;
644 for (vec_idx
= 0; vec_idx
< ZMM_MAX
; vec_idx
++) {
646 bcopy(&vec
[vec_idx
], &a
[0], sizeof(a
));
649 "0x%016llx:%016llx:%016llx:%016llx:"
650 "%016llx:%016llx:%016llx:%016llx%s",
651 a
[0], a
[1], a
[2], a
[3], a
[4], a
[5], a
[6], a
[7],
652 vec_idx
< ZMM_MAX
- 1 ? "\n" : ""
654 T_QUIET
; T_ASSERT_POSIX_SUCCESS(ret
, "sprintf()");
660 opmask_to_string(OPMASK
*karray
, char *buf
)
662 unsigned int karray_idx
= 0;
663 unsigned int buf_idx
= 0;
666 for (karray_idx
= 0; karray_idx
< KARRAY_MAX
; karray_idx
++) {
670 karray_idx
, karray
[karray_idx
],
671 karray_idx
< KARRAY_MAX
? "\n" : ""
673 T_QUIET
; T_ASSERT_POSIX_SUCCESS(ret
, "sprintf()");
679 assert_zmm_eq(void *a
, void *b
, int c
)
681 if (memcmp_unoptimized(a
, b
, c
)) {
682 vec512_to_string(a
, vec_str_buf
);
683 T_LOG("Compare failed, vector A:\n%s", vec_str_buf
);
684 vec512_to_string(b
, vec_str_buf
);
685 T_LOG("Compare failed, vector B:\n%s", vec_str_buf
);
686 T_ASSERT_FAIL("Vectors not equal");
691 assert_opmask_eq(OPMASK
*a
, OPMASK
*b
)
693 for (int i
= 0; i
< KARRAY_MAX
; i
++) {
695 opmask_to_string(a
, karray_str_buf
);
696 T_LOG("Compare failed, opmask A:\n%s", karray_str_buf
);
697 opmask_to_string(b
, karray_str_buf
);
698 T_LOG("Compare failed, opmask B:\n%s", karray_str_buf
);
699 T_ASSERT_FAIL("opmasks not equal");
707 uint64_t *p
= (uint64_t *) &vec512array1
[7];
708 store_opmask(karray1
);
709 store_zmm(vec512array1
);
710 if (p
[0] == STOP_COOKIE_512
) {
714 assert_zmm_eq(vec512array0
, vec512array1
, sizeof(vec512array0
));
715 assert_opmask_eq(karray0
, karray1
);
719 copy_state_to_opmask(X86_AVX512_STATE_T
*sp
, OPMASK
*op
)
721 OPMASK
*k
= (OPMASK
*) &sp
->__fpu_k0
;
722 for (int i
= 0; i
< KARRAY_MAX
; i
++) {
723 bcopy(&k
[i
], &op
[i
], sizeof(*op
));
728 copy_zmm_state_to_vector(X86_AVX512_STATE_T
*sp
, VECTOR512
*vp
)
731 struct __darwin_xmm_reg
*xmm
= &sp
->__fpu_xmm0
;
732 struct __darwin_xmm_reg
*ymmh
= &sp
->__fpu_ymmh0
;
733 struct __darwin_ymm_reg
*zmmh
= &sp
->__fpu_zmmh0
;
734 #if defined(__x86_64__)
735 struct __darwin_zmm_reg
*zmm
= &sp
->__fpu_zmm16
;
737 for (i
= 0; i
< ZMM_MAX
/ 2; i
++) {
738 bcopy(&xmm
[i
], &vp
[i
], sizeof(*xmm
));
739 bcopy(&ymmh
[i
], (void *) ((uint64_t)&vp
[i
] + sizeof(*ymmh
)), sizeof(*ymmh
));
740 bcopy(&zmmh
[i
], (void *) ((uint64_t)&vp
[i
] + sizeof(*zmmh
)), sizeof(*zmmh
));
741 bcopy(&zmm
[i
], &vp
[(ZMM_MAX
/ 2) + i
], sizeof(*zmm
));
744 for (i
= 0; i
< ZMM_MAX
; i
++) {
745 bcopy(&xmm
[i
], &vp
[i
], sizeof(*xmm
));
746 bcopy(&ymmh
[i
], (void *) ((uint64_t)&vp
[i
] + sizeof(*ymmh
)), sizeof(*ymmh
));
747 bcopy(&zmmh
[i
], (void *) ((uint64_t)&vp
[i
] + sizeof(*zmmh
)), sizeof(*zmmh
));
753 zmm_sigalrm_handler(int signum __unused
, siginfo_t
*info __unused
, void *ctx
)
755 ucontext_t
*contextp
= (ucontext_t
*) ctx
;
756 mcontext_t mcontext
= contextp
->uc_mcontext
;
757 X86_AVX512_STATE_T
*avx_state
= (X86_AVX512_STATE_T
*) &mcontext
->__fs
;
758 uint64_t *xp
= (uint64_t *) &avx_state
->__fpu_xmm7
;
759 uint64_t *yp
= (uint64_t *) &avx_state
->__fpu_ymmh7
;
760 uint64_t *zp
= (uint64_t *) &avx_state
->__fpu_zmmh7
;
761 uint64_t *kp
= (uint64_t *) &avx_state
->__fpu_k0
;
763 /* Check for AVX512 state */
765 T_ASSERT_GE(contextp
->uc_mcsize
, MCONTEXT_SIZE_512
, "check context size");
767 /* Check that the state in the context is what's set and expected */
768 copy_zmm_state_to_vector(avx_state
, vec512array3
);
769 assert_zmm_eq(vec512array3
, vec512array0
, sizeof(vec512array1
));
770 copy_state_to_opmask(avx_state
, karray3
);
771 assert_opmask_eq(karray3
, karray0
);
773 /* Change the context and break the main loop */
774 xp
[0] = STOP_COOKIE_512
;
775 yp
[0] = STOP_COOKIE_512
;
776 zp
[0] = STOP_COOKIE_512
;
777 kp
[7] = STOP_COOKIE_512
;
782 zmm_integrity(int time
)
784 mach_msg_type_number_t avx_count
= X86_AVX512_STATE_COUNT
;
786 X86_AVX512_STATE_T avx_state
, avx_state2
;
787 mach_port_t ts
= mach_thread_self();
789 bzero(&avx_state
, sizeof(avx_state
));
790 bzero(&avx_state2
, sizeof(avx_state
));
792 store_zmm(vec512array2
);
793 store_opmask(karray2
);
795 kret
= _thread_get_state_avx512(
796 ts
, X86_AVX512_STATE_FLAVOR
, (thread_state_t
)&avx_state
, &avx_count
799 T_QUIET
; T_ASSERT_MACH_SUCCESS(kret
, "thread_get_state()");
800 vec512_to_string(vec512array2
, vec_str_buf
);
801 opmask_to_string(karray2
, karray_str_buf
);
802 T_LOG("Initial state:\n%s\n%s", vec_str_buf
, karray_str_buf
);
804 copy_zmm_state_to_vector(&avx_state
, vec512array1
);
805 assert_zmm_eq(vec512array2
, vec512array1
, sizeof(vec512array1
));
806 copy_state_to_opmask(&avx_state
, karray1
);
807 assert_opmask_eq(karray2
, karray1
);
812 kret
= _thread_get_state_avx512(
813 ts
, X86_AVX512_STATE_FLAVOR
, (thread_state_t
)&avx_state2
, &avx_count
816 store_zmm(vec512array2
);
817 store_opmask(karray2
);
819 T_QUIET
; T_ASSERT_MACH_SUCCESS(kret
, "thread_get_state()");
820 vec512_to_string(vec512array2
, vec_str_buf
);
821 opmask_to_string(karray2
, karray_str_buf
);
822 T_LOG("Populated state:\n%s\n%s", vec_str_buf
, karray_str_buf
);
824 copy_zmm_state_to_vector(&avx_state2
, vec512array1
);
825 assert_zmm_eq(vec512array2
, vec512array1
, sizeof(vec512array1
));
826 copy_state_to_opmask(&avx_state2
, karray1
);
827 assert_opmask_eq(karray2
, karray1
);
829 T_LOG("Running for %ds…", time
);
830 start_timer(time
, zmm_sigalrm_handler
);
832 /* re-populate because printing mucks up XMMs */
836 /* Check state until timer fires */
841 /* Check that the sig handler changed our AVX state */
842 store_zmm(vec512array1
);
843 store_opmask(karray1
);
845 uint64_t *p
= (uint64_t *) &vec512array1
[7];
846 if (p
[0] != STOP_COOKIE_512
||
847 p
[2] != STOP_COOKIE_512
||
848 p
[4] != STOP_COOKIE_512
||
849 karray1
[7] != STOP_COOKIE_512
) {
850 vec512_to_string(vec512array1
, vec_str_buf
);
851 opmask_to_string(karray1
, karray_str_buf
);
852 T_ASSERT_FAIL("sigreturn failed to stick");
853 T_LOG("State:\n%s\n%s", vec_str_buf
, karray_str_buf
);
856 T_LOG("Ran for %ds", time
);
857 T_PASS("No zmm register corruption occurred");
861 * Main test declarations
863 T_DECL(ymm_integrity
,
864 "Quick soak test to verify that AVX "
865 "register state is maintained correctly",
866 T_META_TIMEOUT(NORMAL_RUN_TIME
+ TIMEOUT_OVERHEAD
)) {
868 ymm_integrity(NORMAL_RUN_TIME
);
871 T_DECL(ymm_integrity_stress
,
872 "Extended soak test to verify that AVX "
873 "register state is maintained correctly",
874 T_META_TIMEOUT(LONG_RUN_TIME
+ TIMEOUT_OVERHEAD
),
875 T_META_ENABLED(false)) {
877 ymm_integrity(LONG_RUN_TIME
);
880 T_DECL(zmm_integrity
,
881 "Quick soak test to verify that AVX-512 "
882 "register state is maintained correctly",
883 T_META_TIMEOUT(LONG_RUN_TIME
+ TIMEOUT_OVERHEAD
)) {
885 zmm_integrity(NORMAL_RUN_TIME
);
888 T_DECL(zmm_integrity_stress
,
889 "Extended soak test to verify that AVX-512 "
890 "register state is maintained correctly",
891 T_META_TIMEOUT(NORMAL_RUN_TIME
+ TIMEOUT_OVERHEAD
),
892 T_META_ENABLED(false)) {
894 zmm_integrity(LONG_RUN_TIME
);