5 #include <darwintest.h>
10 #include <immintrin.h>
11 #include <mach/mach.h>
15 #include <i386/cpu_capabilities.h>
18 T_META_NAMESPACE("xnu.intel"),
19 T_META_CHECK_LEAKS(false)
22 #define NORMAL_RUN_TIME (10)
23 #define LONG_RUN_TIME (10*60)
24 #define TIMEOUT_OVERHEAD (10)
26 volatile boolean_t checking
= true;
27 char vec_str_buf
[8196];
28 char karray_str_buf
[1024];
31 * ymm defines/globals/prototypes
33 #define STOP_COOKIE_256 0x01234567
34 #if defined(__x86_64__)
36 #define X86_AVX_STATE_T x86_avx_state64_t
37 #define X86_AVX_STATE_COUNT x86_AVX_STATE64_COUNT
38 #define X86_AVX_STATE_FLAVOR x86_AVX_STATE64
39 #define MCONTEXT_SIZE_256 sizeof(struct __darwin_mcontext_avx64)
42 #define X86_AVX_STATE_T x86_avx_state32_t
43 #define X86_AVX_STATE_COUNT x86_AVX_STATE32_COUNT
44 #define X86_AVX_STATE_FLAVOR x86_AVX_STATE32
45 #define MCONTEXT_SIZE_256 sizeof(struct __darwin_mcontext_avx32)
47 #define VECTOR256 __m256
48 #define VEC256ALIGN __attribute ((aligned(32)))
49 static inline void populate_ymm(void);
50 static inline void check_ymm(void);
51 VECTOR256 vec256array0
[YMM_MAX
] VEC256ALIGN
;
52 VECTOR256 vec256array1
[YMM_MAX
] VEC256ALIGN
;
53 VECTOR256 vec256array2
[YMM_MAX
] VEC256ALIGN
;
54 VECTOR256 vec256array3
[YMM_MAX
] VEC256ALIGN
;
57 * zmm defines/globals/prototypes
59 #define STOP_COOKIE_512 0x0123456789abcdefULL
60 #if defined(__x86_64__)
62 #define X86_AVX512_STATE_T x86_avx512_state64_t
63 #define X86_AVX512_STATE_COUNT x86_AVX512_STATE64_COUNT
64 #define X86_AVX512_STATE_FLAVOR x86_AVX512_STATE64
65 #define MCONTEXT_SIZE_512 sizeof(struct __darwin_mcontext_avx512_64)
68 #define X86_AVX512_STATE_T x86_avx512_state32_t
69 #define X86_AVX512_STATE_COUNT x86_AVX512_STATE32_COUNT
70 #define X86_AVX512_STATE_FLAVOR x86_AVX512_STATE32
71 #define MCONTEXT_SIZE_512 sizeof(struct __darwin_mcontext_avx512_32)
73 #define VECTOR512 __m512
74 #define VEC512ALIGN __attribute ((aligned(64)))
75 #define OPMASK uint64_t
77 static inline void populate_zmm(void);
78 static inline void populate_opmask(void);
79 static inline void check_zmm(void);
80 VECTOR512 vec512array0
[ZMM_MAX
] VEC512ALIGN
;
81 VECTOR512 vec512array1
[ZMM_MAX
] VEC512ALIGN
;
82 VECTOR512 vec512array2
[ZMM_MAX
] VEC512ALIGN
;
83 VECTOR512 vec512array3
[ZMM_MAX
] VEC512ALIGN
;
95 memcmp_unoptimized(const void *s1
, const void *s2
, size_t n
) {
97 const unsigned char *p1
= s1
, *p2
= s2
;
100 return (*--p1
- *--p2
);
107 start_timer(int seconds
, void (*handler
)(int, siginfo_t
*, void *)) {
108 struct sigaction sigalrm_action
= {
109 .sa_sigaction
= handler
,
110 .sa_flags
= SA_RESTART
,
113 struct itimerval timer
= {
114 .it_value
.tv_sec
= seconds
,
115 .it_value
.tv_usec
= 0,
116 .it_interval
.tv_sec
= 0,
117 .it_interval
.tv_usec
= 0
119 T_QUIET
; T_WITH_ERRNO
;
120 T_ASSERT_NE(sigaction(SIGALRM
, &sigalrm_action
, NULL
), -1, NULL
);
121 T_QUIET
; T_WITH_ERRNO
;
122 T_ASSERT_NE(setitimer(ITIMER_REAL
, &timer
, NULL
), -1, NULL
);
127 if((_get_cpu_capabilities() & kHasAVX1_0
) != kHasAVX1_0
) {
128 T_SKIP("AVX not supported on this system");
133 require_avx512(void) {
134 if((_get_cpu_capabilities() & kHasAVX512F
) != kHasAVX512F
) {
135 T_SKIP("AVX-512 not supported on this system");
144 store_ymm(VECTOR256
*vec256array
) {
146 __asm__
volatile("vmovaps %%ymm0, %0" :"=m" (vec256array
[i
]));
147 i
++;__asm__
volatile("vmovaps %%ymm1, %0" :"=m" (vec256array
[i
]));
148 i
++;__asm__
volatile("vmovaps %%ymm2, %0" :"=m" (vec256array
[i
]));
149 i
++;__asm__
volatile("vmovaps %%ymm3, %0" :"=m" (vec256array
[i
]));
150 i
++;__asm__
volatile("vmovaps %%ymm4, %0" :"=m" (vec256array
[i
]));
151 i
++;__asm__
volatile("vmovaps %%ymm5, %0" :"=m" (vec256array
[i
]));
152 i
++;__asm__
volatile("vmovaps %%ymm6, %0" :"=m" (vec256array
[i
]));
153 i
++;__asm__
volatile("vmovaps %%ymm7, %0" :"=m" (vec256array
[i
]));
154 #if defined(__x86_64__)
155 i
++;__asm__
volatile("vmovaps %%ymm8, %0" :"=m" (vec256array
[i
]));
156 i
++;__asm__
volatile("vmovaps %%ymm9, %0" :"=m" (vec256array
[i
]));
157 i
++;__asm__
volatile("vmovaps %%ymm10, %0" :"=m" (vec256array
[i
]));
158 i
++;__asm__
volatile("vmovaps %%ymm11, %0" :"=m" (vec256array
[i
]));
159 i
++;__asm__
volatile("vmovaps %%ymm12, %0" :"=m" (vec256array
[i
]));
160 i
++;__asm__
volatile("vmovaps %%ymm13, %0" :"=m" (vec256array
[i
]));
161 i
++;__asm__
volatile("vmovaps %%ymm14, %0" :"=m" (vec256array
[i
]));
162 i
++;__asm__
volatile("vmovaps %%ymm15, %0" :"=m" (vec256array
[i
]));
169 uint32_t p
[8] VEC256ALIGN
;
171 for (j
= 0; j
< (int) (sizeof(p
)/sizeof(p
[0])); j
++)
176 __asm__
volatile("vmovaps %0, %%ymm0" :: "m" (*(__m256i
*)p
) : "ymm0");
177 __asm__
volatile("vmovaps %0, %%ymm1" :: "m" (*(__m256i
*)p
) : "ymm1");
178 __asm__
volatile("vmovaps %0, %%ymm2" :: "m" (*(__m256i
*)p
) : "ymm2");
179 __asm__
volatile("vmovaps %0, %%ymm3" :: "m" (*(__m256i
*)p
) : "ymm3");
183 __asm__
volatile("vmovaps %0, %%ymm4" :: "m" (*(__m256i
*)p
) : "ymm4");
184 __asm__
volatile("vmovaps %0, %%ymm5" :: "m" (*(__m256i
*)p
) : "ymm5");
185 __asm__
volatile("vmovaps %0, %%ymm6" :: "m" (*(__m256i
*)p
) : "ymm6");
186 __asm__
volatile("vmovaps %0, %%ymm7" :: "m" (*(__m256i
*)p
) : "ymm7");
188 #if defined(__x86_64__)
191 __asm__
volatile("vmovaps %0, %%ymm8" :: "m" (*(__m256i
*)p
) : "ymm8");
192 __asm__
volatile("vmovaps %0, %%ymm9" :: "m" (*(__m256i
*)p
) : "ymm9");
193 __asm__
volatile("vmovaps %0, %%ymm10" :: "m" (*(__m256i
*)p
) : "ymm10");
194 __asm__
volatile("vmovaps %0, %%ymm11" :: "m" (*(__m256i
*)p
) : "ymm11");
198 __asm__
volatile("vmovaps %0, %%ymm12" :: "m" (*(__m256i
*)p
) : "ymm12");
199 __asm__
volatile("vmovaps %0, %%ymm13" :: "m" (*(__m256i
*)p
) : "ymm13");
200 __asm__
volatile("vmovaps %0, %%ymm14" :: "m" (*(__m256i
*)p
) : "ymm14");
201 __asm__
volatile("vmovaps %0, %%ymm15" :: "m" (*(__m256i
*)p
) : "ymm15");
204 store_ymm(vec256array0
);
208 vec256_to_string(VECTOR256
*vec
, char *buf
) {
209 unsigned int vec_idx
= 0;
210 unsigned int buf_idx
= 0;
213 for (vec_idx
= 0; vec_idx
< YMM_MAX
; vec_idx
++) {
215 bcopy(&vec
[vec_idx
], &a
[0], sizeof(a
));
218 "0x%016llx:%016llx:%016llx:%016llx\n",
219 a
[0], a
[1], a
[2], a
[3]
221 T_QUIET
; T_ASSERT_POSIX_SUCCESS(ret
, "sprintf()");
227 assert_ymm_eq(void *a
, void *b
, int c
) {
228 if(memcmp_unoptimized(a
, b
, c
)) {
229 vec256_to_string(a
, vec_str_buf
);
230 T_LOG("Compare failed, vector A:\n%s", vec_str_buf
);
231 vec256_to_string(b
, vec_str_buf
);
232 T_LOG("Compare failed, vector B:\n%s", vec_str_buf
);
233 T_ASSERT_FAIL("vectors not equal");
239 uint32_t *p
= (uint32_t *) &vec256array1
[7];
240 store_ymm(vec256array1
);
241 if (p
[0] == STOP_COOKIE_256
) {
244 assert_ymm_eq(vec256array0
, vec256array1
, sizeof(vec256array0
));
248 copy_ymm_state_to_vector(X86_AVX_STATE_T
*sp
, VECTOR256
*vp
) {
250 struct __darwin_xmm_reg
*xmm
= &sp
->__fpu_xmm0
;
251 struct __darwin_xmm_reg
*ymmh
= &sp
->__fpu_ymmh0
;
253 for (i
= 0; i
< YMM_MAX
; i
++ ) {
254 bcopy(&xmm
[i
], &vp
[i
], sizeof(*xmm
));
255 bcopy(&ymmh
[i
], (void *) ((uint64_t)&vp
[i
] + sizeof(*ymmh
)), sizeof(*ymmh
));
260 ymm_sigalrm_handler(int signum __unused
, siginfo_t
*info __unused
, void *ctx
)
262 ucontext_t
*contextp
= (ucontext_t
*) ctx
;
263 mcontext_t mcontext
= contextp
->uc_mcontext
;
264 X86_AVX_STATE_T
*avx_state
= (X86_AVX_STATE_T
*) &mcontext
->__fs
;
265 uint32_t *xp
= (uint32_t *) &avx_state
->__fpu_xmm7
;
266 uint32_t *yp
= (uint32_t *) &avx_state
->__fpu_ymmh7
;
268 T_LOG("Got SIGALRM");
270 /* Check for AVX state */
272 T_ASSERT_GE(contextp
->uc_mcsize
, MCONTEXT_SIZE_256
, "check context size");
274 /* Check that the state in the context is what's set and expected */
275 copy_ymm_state_to_vector(avx_state
, vec256array3
);
276 assert_ymm_eq(vec256array3
, vec256array0
, sizeof(vec256array1
));
278 /* Change the context and break the main loop */
279 xp
[0] = STOP_COOKIE_256
;
280 yp
[0] = STOP_COOKIE_256
;
285 ymm_integrity(int time
) {
286 mach_msg_type_number_t avx_count
= X86_AVX_STATE_COUNT
;
288 X86_AVX_STATE_T avx_state
, avx_state2
;
289 mach_port_t ts
= mach_thread_self();
291 bzero(&avx_state
, sizeof(avx_state
));
292 bzero(&avx_state2
, sizeof(avx_state
));
294 kret
= thread_get_state(
295 ts
, X86_AVX_STATE_FLAVOR
, (thread_state_t
)&avx_state
, &avx_count
298 store_ymm(vec256array2
);
300 T_QUIET
; T_ASSERT_MACH_SUCCESS(kret
, "thread_get_state()");
301 vec256_to_string(vec256array2
, vec_str_buf
);
302 T_LOG("Initial state:\n%s", vec_str_buf
);
304 copy_ymm_state_to_vector(&avx_state
, vec256array1
);
305 assert_ymm_eq(vec256array2
, vec256array1
, sizeof(vec256array1
));
309 kret
= thread_get_state(
310 ts
, X86_AVX_STATE_FLAVOR
, (thread_state_t
)&avx_state2
, &avx_count
313 store_ymm(vec256array2
);
315 T_QUIET
; T_ASSERT_MACH_SUCCESS(kret
, "thread_get_state()");
316 vec256_to_string(vec256array2
, vec_str_buf
);
317 T_LOG("Populated state:\n%s", vec_str_buf
);
319 copy_ymm_state_to_vector(&avx_state2
, vec256array1
);
320 assert_ymm_eq(vec256array2
, vec256array1
, sizeof(vec256array0
));
322 T_LOG("Running for %ds…", time
);
323 start_timer(time
, ymm_sigalrm_handler
);
325 /* re-populate because printing mucks up XMMs */
328 /* Check state until timer fires */
333 /* Check that the sig handler changed out AVX state */
334 store_ymm(vec256array1
);
336 uint32_t *p
= (uint32_t *) &vec256array1
[7];
337 if (p
[0] != STOP_COOKIE_256
||
338 p
[4] != STOP_COOKIE_256
) {
339 vec256_to_string(vec256array1
, vec_str_buf
);
340 T_ASSERT_FAIL("sigreturn failed to stick");
341 T_LOG("State:\n%s", vec_str_buf
);
344 T_LOG("Ran for %ds", time
);
345 T_PASS("No ymm register corruption occurred");
353 store_opmask(OPMASK k
[]) {
354 __asm__
volatile("kmovq %%k0, %0" :"=m" (k
[0]));
355 __asm__
volatile("kmovq %%k1, %0" :"=m" (k
[1]));
356 __asm__
volatile("kmovq %%k2, %0" :"=m" (k
[2]));
357 __asm__
volatile("kmovq %%k3, %0" :"=m" (k
[3]));
358 __asm__
volatile("kmovq %%k4, %0" :"=m" (k
[4]));
359 __asm__
volatile("kmovq %%k5, %0" :"=m" (k
[5]));
360 __asm__
volatile("kmovq %%k6, %0" :"=m" (k
[6]));
361 __asm__
volatile("kmovq %%k7, %0" :"=m" (k
[7]));
365 store_zmm(VECTOR512
*vecarray
) {
367 __asm__
volatile("vmovaps %%zmm0, %0" :"=m" (vecarray
[i
]));
368 i
++;__asm__
volatile("vmovaps %%zmm1, %0" :"=m" (vecarray
[i
]));
369 i
++;__asm__
volatile("vmovaps %%zmm2, %0" :"=m" (vecarray
[i
]));
370 i
++;__asm__
volatile("vmovaps %%zmm3, %0" :"=m" (vecarray
[i
]));
371 i
++;__asm__
volatile("vmovaps %%zmm4, %0" :"=m" (vecarray
[i
]));
372 i
++;__asm__
volatile("vmovaps %%zmm5, %0" :"=m" (vecarray
[i
]));
373 i
++;__asm__
volatile("vmovaps %%zmm6, %0" :"=m" (vecarray
[i
]));
374 i
++;__asm__
volatile("vmovaps %%zmm7, %0" :"=m" (vecarray
[i
]));
375 #if defined(__x86_64__)
376 i
++;__asm__
volatile("vmovaps %%zmm8, %0" :"=m" (vecarray
[i
]));
377 i
++;__asm__
volatile("vmovaps %%zmm9, %0" :"=m" (vecarray
[i
]));
378 i
++;__asm__
volatile("vmovaps %%zmm10, %0" :"=m" (vecarray
[i
]));
379 i
++;__asm__
volatile("vmovaps %%zmm11, %0" :"=m" (vecarray
[i
]));
380 i
++;__asm__
volatile("vmovaps %%zmm12, %0" :"=m" (vecarray
[i
]));
381 i
++;__asm__
volatile("vmovaps %%zmm13, %0" :"=m" (vecarray
[i
]));
382 i
++;__asm__
volatile("vmovaps %%zmm14, %0" :"=m" (vecarray
[i
]));
383 i
++;__asm__
volatile("vmovaps %%zmm15, %0" :"=m" (vecarray
[i
]));
384 i
++;__asm__
volatile("vmovaps %%zmm16, %0" :"=m" (vecarray
[i
]));
385 i
++;__asm__
volatile("vmovaps %%zmm17, %0" :"=m" (vecarray
[i
]));
386 i
++;__asm__
volatile("vmovaps %%zmm18, %0" :"=m" (vecarray
[i
]));
387 i
++;__asm__
volatile("vmovaps %%zmm19, %0" :"=m" (vecarray
[i
]));
388 i
++;__asm__
volatile("vmovaps %%zmm20, %0" :"=m" (vecarray
[i
]));
389 i
++;__asm__
volatile("vmovaps %%zmm21, %0" :"=m" (vecarray
[i
]));
390 i
++;__asm__
volatile("vmovaps %%zmm22, %0" :"=m" (vecarray
[i
]));
391 i
++;__asm__
volatile("vmovaps %%zmm23, %0" :"=m" (vecarray
[i
]));
392 i
++;__asm__
volatile("vmovaps %%zmm24, %0" :"=m" (vecarray
[i
]));
393 i
++;__asm__
volatile("vmovaps %%zmm25, %0" :"=m" (vecarray
[i
]));
394 i
++;__asm__
volatile("vmovaps %%zmm26, %0" :"=m" (vecarray
[i
]));
395 i
++;__asm__
volatile("vmovaps %%zmm27, %0" :"=m" (vecarray
[i
]));
396 i
++;__asm__
volatile("vmovaps %%zmm28, %0" :"=m" (vecarray
[i
]));
397 i
++;__asm__
volatile("vmovaps %%zmm29, %0" :"=m" (vecarray
[i
]));
398 i
++;__asm__
volatile("vmovaps %%zmm30, %0" :"=m" (vecarray
[i
]));
399 i
++;__asm__
volatile("vmovaps %%zmm31, %0" :"=m" (vecarray
[i
]));
404 populate_opmask(void) {
407 for (int j
= 0; j
< 8; j
++)
408 k
[j
] = ((uint64_t) getpid() << 32) + (0x11111111 * j
);
410 __asm__
volatile("kmovq %0, %%k0" : :"m" (k
[0]));
411 __asm__
volatile("kmovq %0, %%k1" : :"m" (k
[1]));
412 __asm__
volatile("kmovq %0, %%k2" : :"m" (k
[2]));
413 __asm__
volatile("kmovq %0, %%k3" : :"m" (k
[3]));
414 __asm__
volatile("kmovq %0, %%k4" : :"m" (k
[4]));
415 __asm__
volatile("kmovq %0, %%k5" : :"m" (k
[5]));
416 __asm__
volatile("kmovq %0, %%k6" : :"m" (k
[6]));
417 __asm__
volatile("kmovq %0, %%k7" : :"m" (k
[7]));
419 store_opmask(karray0
);
425 uint64_t p
[8] VEC512ALIGN
;
427 for (j
= 0; j
< (int) (sizeof(p
)/sizeof(p
[0])); j
++)
428 p
[j
] = ((uint64_t) getpid() << 32) + getpid();
430 p
[0] = 0x0000000000000000ULL
;
431 p
[2] = 0x4444444444444444ULL
;
432 p
[4] = 0x8888888888888888ULL
;
433 p
[7] = 0xCCCCCCCCCCCCCCCCULL
;
434 __asm__
volatile("vmovaps %0, %%zmm0" :: "m" (*(__m256i
*)p
) );
435 __asm__
volatile("vmovaps %0, %%zmm1" :: "m" (*(__m512i
*)p
) );
436 __asm__
volatile("vmovaps %0, %%zmm2" :: "m" (*(__m512i
*)p
) );
437 __asm__
volatile("vmovaps %0, %%zmm3" :: "m" (*(__m512i
*)p
) );
438 __asm__
volatile("vmovaps %0, %%zmm4" :: "m" (*(__m512i
*)p
) );
439 __asm__
volatile("vmovaps %0, %%zmm5" :: "m" (*(__m512i
*)p
) );
440 __asm__
volatile("vmovaps %0, %%zmm6" :: "m" (*(__m512i
*)p
) );
441 __asm__
volatile("vmovaps %0, %%zmm7" :: "m" (*(__m512i
*)p
) );
443 #if defined(__x86_64__)
444 p
[0] = 0x1111111111111111ULL
;
445 p
[2] = 0x5555555555555555ULL
;
446 p
[4] = 0x9999999999999999ULL
;
447 p
[7] = 0xDDDDDDDDDDDDDDDDULL
;
448 __asm__
volatile("vmovaps %0, %%zmm8" :: "m" (*(__m512i
*)p
) );
449 __asm__
volatile("vmovaps %0, %%zmm9" :: "m" (*(__m512i
*)p
) );
450 __asm__
volatile("vmovaps %0, %%zmm10" :: "m" (*(__m512i
*)p
) );
451 __asm__
volatile("vmovaps %0, %%zmm11" :: "m" (*(__m512i
*)p
) );
452 __asm__
volatile("vmovaps %0, %%zmm12" :: "m" (*(__m512i
*)p
) );
453 __asm__
volatile("vmovaps %0, %%zmm13" :: "m" (*(__m512i
*)p
) );
454 __asm__
volatile("vmovaps %0, %%zmm14" :: "m" (*(__m512i
*)p
) );
455 __asm__
volatile("vmovaps %0, %%zmm15" :: "m" (*(__m512i
*)p
) );
457 p
[0] = 0x2222222222222222ULL
;
458 p
[2] = 0x6666666666666666ULL
;
459 p
[4] = 0xAAAAAAAAAAAAAAAAULL
;
460 p
[7] = 0xEEEEEEEEEEEEEEEEULL
;
461 __asm__
volatile("vmovaps %0, %%zmm16" :: "m" (*(__m512i
*)p
) );
462 __asm__
volatile("vmovaps %0, %%zmm17" :: "m" (*(__m512i
*)p
) );
463 __asm__
volatile("vmovaps %0, %%zmm18" :: "m" (*(__m512i
*)p
) );
464 __asm__
volatile("vmovaps %0, %%zmm19" :: "m" (*(__m512i
*)p
) );
465 __asm__
volatile("vmovaps %0, %%zmm20" :: "m" (*(__m512i
*)p
) );
466 __asm__
volatile("vmovaps %0, %%zmm21" :: "m" (*(__m512i
*)p
) );
467 __asm__
volatile("vmovaps %0, %%zmm22" :: "m" (*(__m512i
*)p
) );
468 __asm__
volatile("vmovaps %0, %%zmm23" :: "m" (*(__m512i
*)p
) );
470 p
[0] = 0x3333333333333333ULL
;
471 p
[2] = 0x7777777777777777ULL
;
472 p
[4] = 0xBBBBBBBBBBBBBBBBULL
;
473 p
[7] = 0xFFFFFFFFFFFFFFFFULL
;
474 __asm__
volatile("vmovaps %0, %%zmm24" :: "m" (*(__m512i
*)p
) );
475 __asm__
volatile("vmovaps %0, %%zmm25" :: "m" (*(__m512i
*)p
) );
476 __asm__
volatile("vmovaps %0, %%zmm26" :: "m" (*(__m512i
*)p
) );
477 __asm__
volatile("vmovaps %0, %%zmm27" :: "m" (*(__m512i
*)p
) );
478 __asm__
volatile("vmovaps %0, %%zmm28" :: "m" (*(__m512i
*)p
) );
479 __asm__
volatile("vmovaps %0, %%zmm29" :: "m" (*(__m512i
*)p
) );
480 __asm__
volatile("vmovaps %0, %%zmm30" :: "m" (*(__m512i
*)p
) );
481 __asm__
volatile("vmovaps %0, %%zmm31" :: "m" (*(__m512i
*)p
) );
484 store_zmm(vec512array0
);
488 vec512_to_string(VECTOR512
*vec
, char *buf
) {
489 unsigned int vec_idx
= 0;
490 unsigned int buf_idx
= 0;
493 for (vec_idx
= 0; vec_idx
< ZMM_MAX
; vec_idx
++) {
495 bcopy(&vec
[vec_idx
], &a
[0], sizeof(a
));
498 "0x%016llx:%016llx:%016llx:%016llx:"
499 "%016llx:%016llx:%016llx:%016llx%s",
500 a
[0], a
[1], a
[2], a
[3], a
[4], a
[5], a
[6], a
[7],
501 vec_idx
< ZMM_MAX
- 1 ? "\n" : ""
503 T_QUIET
; T_ASSERT_POSIX_SUCCESS(ret
, "sprintf()");
509 opmask_to_string(OPMASK
*karray
, char *buf
) {
510 unsigned int karray_idx
= 0;
511 unsigned int buf_idx
= 0;
514 for(karray_idx
= 0; karray_idx
< KARRAY_MAX
; karray_idx
++) {
518 karray_idx
, karray
[karray_idx
],
519 karray_idx
< KARRAY_MAX
? "\n" : ""
521 T_QUIET
; T_ASSERT_POSIX_SUCCESS(ret
, "sprintf()");
527 assert_zmm_eq(void *a
, void *b
, int c
) {
528 if(memcmp_unoptimized(a
, b
, c
)) {
529 vec512_to_string(a
, vec_str_buf
);
530 T_LOG("Compare failed, vector A:\n%s", vec_str_buf
);
531 vec512_to_string(b
, vec_str_buf
);
532 T_LOG("Compare failed, vector B:\n%s", vec_str_buf
);
533 T_ASSERT_FAIL("Vectors not equal");
538 assert_opmask_eq(OPMASK
*a
, OPMASK
*b
) {
539 for (int i
= 0; i
< KARRAY_MAX
; i
++) {
541 opmask_to_string(a
, karray_str_buf
);
542 T_LOG("Compare failed, opmask A:\n%s", karray_str_buf
);
543 opmask_to_string(b
, karray_str_buf
);
544 T_LOG("Compare failed, opmask B:\n%s", karray_str_buf
);
545 T_ASSERT_FAIL("opmasks not equal");
552 uint64_t *p
= (uint64_t *) &vec512array1
[7];
553 store_opmask(karray1
);
554 store_zmm(vec512array1
);
555 if (p
[0] == STOP_COOKIE_512
) {
559 assert_zmm_eq(vec512array0
, vec512array1
, sizeof(vec512array0
));
560 assert_opmask_eq(karray0
, karray1
);
563 static void copy_state_to_opmask(X86_AVX512_STATE_T
*sp
, OPMASK
*op
) {
564 OPMASK
*k
= (OPMASK
*) &sp
->__fpu_k0
;
565 for (int i
= 0; i
< KARRAY_MAX
; i
++) {
566 bcopy(&k
[i
], &op
[i
], sizeof(*op
));
570 static void copy_zmm_state_to_vector(X86_AVX512_STATE_T
*sp
, VECTOR512
*vp
) {
572 struct __darwin_xmm_reg
*xmm
= &sp
->__fpu_xmm0
;
573 struct __darwin_xmm_reg
*ymmh
= &sp
->__fpu_ymmh0
;
574 struct __darwin_ymm_reg
*zmmh
= &sp
->__fpu_zmmh0
;
575 #if defined(__x86_64__)
576 struct __darwin_zmm_reg
*zmm
= &sp
->__fpu_zmm16
;
578 for (i
= 0; i
< ZMM_MAX
/2; i
++ ) {
579 bcopy(&xmm
[i
], &vp
[i
], sizeof(*xmm
));
580 bcopy(&ymmh
[i
], (void *) ((uint64_t)&vp
[i
] + sizeof(*ymmh
)), sizeof(*ymmh
));
581 bcopy(&zmmh
[i
], (void *) ((uint64_t)&vp
[i
] + sizeof(*zmmh
)), sizeof(*zmmh
));
582 bcopy(&zmm
[i
], &vp
[(ZMM_MAX
/2)+i
], sizeof(*zmm
));
585 for (i
= 0; i
< ZMM_MAX
; i
++ ) {
586 bcopy(&xmm
[i
], &vp
[i
], sizeof(*xmm
));
587 bcopy(&ymmh
[i
], (void *) ((uint64_t)&vp
[i
] + sizeof(*ymmh
)), sizeof(*ymmh
));
588 bcopy(&zmmh
[i
], (void *) ((uint64_t)&vp
[i
] + sizeof(*zmmh
)), sizeof(*zmmh
));
594 zmm_sigalrm_handler(int signum __unused
, siginfo_t
*info __unused
, void *ctx
)
596 ucontext_t
*contextp
= (ucontext_t
*) ctx
;
597 mcontext_t mcontext
= contextp
->uc_mcontext
;
598 X86_AVX512_STATE_T
*avx_state
= (X86_AVX512_STATE_T
*) &mcontext
->__fs
;
599 uint64_t *xp
= (uint64_t *) &avx_state
->__fpu_xmm7
;
600 uint64_t *yp
= (uint64_t *) &avx_state
->__fpu_ymmh7
;
601 uint64_t *zp
= (uint64_t *) &avx_state
->__fpu_zmmh7
;
602 uint64_t *kp
= (uint64_t *) &avx_state
->__fpu_k0
;
604 /* Check for AVX512 state */
606 T_ASSERT_GE(contextp
->uc_mcsize
, MCONTEXT_SIZE_512
, "check context size");
608 /* Check that the state in the context is what's set and expected */
609 copy_zmm_state_to_vector(avx_state
, vec512array3
);
610 assert_zmm_eq(vec512array3
, vec512array0
, sizeof(vec512array1
));
611 copy_state_to_opmask(avx_state
, karray3
);
612 assert_opmask_eq(karray3
, karray0
);
614 /* Change the context and break the main loop */
615 xp
[0] = STOP_COOKIE_512
;
616 yp
[0] = STOP_COOKIE_512
;
617 zp
[0] = STOP_COOKIE_512
;
618 kp
[7] = STOP_COOKIE_512
;
623 zmm_integrity(int time
) {
624 mach_msg_type_number_t avx_count
= X86_AVX512_STATE_COUNT
;
626 X86_AVX512_STATE_T avx_state
, avx_state2
;
627 mach_port_t ts
= mach_thread_self();
629 bzero(&avx_state
, sizeof(avx_state
));
630 bzero(&avx_state2
, sizeof(avx_state
));
632 store_zmm(vec512array2
);
633 store_opmask(karray2
);
635 kret
= thread_get_state(
636 ts
, X86_AVX512_STATE_FLAVOR
, (thread_state_t
)&avx_state
, &avx_count
639 T_QUIET
; T_ASSERT_MACH_SUCCESS(kret
, "thread_get_state()");
640 vec512_to_string(vec512array2
, vec_str_buf
);
641 opmask_to_string(karray2
, karray_str_buf
);
642 T_LOG("Initial state:\n%s\n%s", vec_str_buf
, karray_str_buf
);
644 copy_zmm_state_to_vector(&avx_state
, vec512array1
);
645 assert_zmm_eq(vec512array2
, vec512array1
, sizeof(vec512array1
));
646 copy_state_to_opmask(&avx_state
, karray1
);
647 assert_opmask_eq(karray2
, karray1
);
652 kret
= thread_get_state(
653 ts
, X86_AVX512_STATE_FLAVOR
, (thread_state_t
)&avx_state2
, &avx_count
656 store_zmm(vec512array2
);
657 store_opmask(karray2
);
659 T_QUIET
; T_ASSERT_MACH_SUCCESS(kret
, "thread_get_state()");
660 vec512_to_string(vec512array2
, vec_str_buf
);
661 opmask_to_string(karray2
, karray_str_buf
);
662 T_LOG("Populated state:\n%s\n%s", vec_str_buf
, karray_str_buf
);
664 copy_zmm_state_to_vector(&avx_state2
, vec512array1
);
665 assert_zmm_eq(vec512array2
, vec512array1
, sizeof(vec512array1
));
666 copy_state_to_opmask(&avx_state2
, karray1
);
667 assert_opmask_eq(karray2
, karray1
);
669 T_LOG("Running for %ds…", time
);
670 start_timer(time
, zmm_sigalrm_handler
);
672 /* re-populate because printing mucks up XMMs */
676 /* Check state until timer fires */
681 /* Check that the sig handler changed our AVX state */
682 store_zmm(vec512array1
);
683 store_opmask(karray1
);
685 uint64_t *p
= (uint64_t *) &vec512array1
[7];
686 if (p
[0] != STOP_COOKIE_512
||
687 p
[2] != STOP_COOKIE_512
||
688 p
[4] != STOP_COOKIE_512
||
689 karray1
[7] != STOP_COOKIE_512
) {
690 vec512_to_string(vec512array1
, vec_str_buf
);
691 opmask_to_string(karray1
, karray_str_buf
);
692 T_ASSERT_FAIL("sigreturn failed to stick");
693 T_LOG("State:\n%s\n%s", vec_str_buf
, karray_str_buf
);
696 T_LOG("Ran for %ds", time
);
697 T_PASS("No zmm register corruption occurred");
701 * Main test declarations
703 T_DECL(ymm_integrity
,
704 "Quick soak test to verify that AVX "
705 "register state is maintained correctly",
706 T_META_TIMEOUT(NORMAL_RUN_TIME
+ TIMEOUT_OVERHEAD
)) {
708 ymm_integrity(NORMAL_RUN_TIME
);
711 T_DECL(ymm_integrity_stress
,
712 "Extended soak test to verify that AVX "
713 "register state is maintained correctly",
714 T_META_TIMEOUT(LONG_RUN_TIME
+ TIMEOUT_OVERHEAD
),
715 T_META_ENABLED(false)) {
717 ymm_integrity(LONG_RUN_TIME
);
720 T_DECL(zmm_integrity
,
721 "Quick soak test to verify that AVX-512 "
722 "register state is maintained correctly",
723 T_META_TIMEOUT(LONG_RUN_TIME
+ TIMEOUT_OVERHEAD
)) {
725 zmm_integrity(NORMAL_RUN_TIME
);
728 T_DECL(zmm_integrity_stress
,
729 "Extended soak test to verify that AVX-512 "
730 "register state is maintained correctly",
731 T_META_TIMEOUT(NORMAL_RUN_TIME
+ TIMEOUT_OVERHEAD
),
732 T_META_ENABLED(false)) {
734 zmm_integrity(LONG_RUN_TIME
);