]>
Commit | Line | Data |
---|---|---|
5ba3f43e A |
1 | #ifdef T_NAMESPACE |
2 | #undef T_NAMESPACE | |
3 | #endif | |
4 | ||
5 | #include <darwintest.h> | |
6 | #include <unistd.h> | |
7 | #include <signal.h> | |
8 | #include <sys/time.h> | |
9 | #include <sys/mman.h> | |
10 | #include <immintrin.h> | |
11 | #include <mach/mach.h> | |
12 | #include <stdio.h> | |
13 | #include <string.h> | |
14 | #include <err.h> | |
15 | #include <i386/cpu_capabilities.h> | |
16 | ||
17 | T_GLOBAL_META( | |
18 | T_META_NAMESPACE("xnu.intel"), | |
cb323159 A |
19 | T_META_CHECK_LEAKS(false), |
20 | T_META_RUN_CONCURRENTLY(true) | |
0a7de745 | 21 | ); |
5ba3f43e A |
22 | |
23 | #define NORMAL_RUN_TIME (10) | |
24 | #define LONG_RUN_TIME (10*60) | |
25 | #define TIMEOUT_OVERHEAD (10) | |
26 | ||
27 | volatile boolean_t checking = true; | |
28 | char vec_str_buf[8196]; | |
29 | char karray_str_buf[1024]; | |
30 | ||
31 | /* | |
32 | * ymm defines/globals/prototypes | |
33 | */ | |
0a7de745 | 34 | #define STOP_COOKIE_256 0x01234567 |
5ba3f43e | 35 | #if defined(__x86_64__) |
0a7de745 A |
36 | #define YMM_MAX 16 |
37 | #define X86_AVX_STATE_T x86_avx_state64_t | |
38 | #define X86_AVX_STATE_COUNT x86_AVX_STATE64_COUNT | |
39 | #define X86_AVX_STATE_FLAVOR x86_AVX_STATE64 | |
40 | #define MCONTEXT_SIZE_256 sizeof(struct __darwin_mcontext_avx64) | |
5ba3f43e | 41 | #else |
0a7de745 A |
42 | #define YMM_MAX 8 |
43 | #define X86_AVX_STATE_T x86_avx_state32_t | |
44 | #define X86_AVX_STATE_COUNT x86_AVX_STATE32_COUNT | |
45 | #define X86_AVX_STATE_FLAVOR x86_AVX_STATE32 | |
46 | #define MCONTEXT_SIZE_256 sizeof(struct __darwin_mcontext_avx32) | |
5ba3f43e A |
47 | #endif |
48 | #define VECTOR256 __m256 | |
49 | #define VEC256ALIGN __attribute ((aligned(32))) | |
50 | static inline void populate_ymm(void); | |
51 | static inline void check_ymm(void); | |
0a7de745 A |
52 | VECTOR256 vec256array0[YMM_MAX] VEC256ALIGN; |
53 | VECTOR256 vec256array1[YMM_MAX] VEC256ALIGN; | |
54 | VECTOR256 vec256array2[YMM_MAX] VEC256ALIGN; | |
55 | VECTOR256 vec256array3[YMM_MAX] VEC256ALIGN; | |
5ba3f43e A |
56 | |
57 | /* | |
58 | * zmm defines/globals/prototypes | |
59 | */ | |
60 | #define STOP_COOKIE_512 0x0123456789abcdefULL | |
61 | #if defined(__x86_64__) | |
0a7de745 A |
62 | #define ZMM_MAX 32 |
63 | #define X86_AVX512_STATE_T x86_avx512_state64_t | |
64 | #define X86_AVX512_STATE_COUNT x86_AVX512_STATE64_COUNT | |
65 | #define X86_AVX512_STATE_FLAVOR x86_AVX512_STATE64 | |
66 | #define MCONTEXT_SIZE_512 sizeof(struct __darwin_mcontext_avx512_64) | |
5ba3f43e | 67 | #else |
0a7de745 A |
68 | #define ZMM_MAX 8 |
69 | #define X86_AVX512_STATE_T x86_avx512_state32_t | |
70 | #define X86_AVX512_STATE_COUNT x86_AVX512_STATE32_COUNT | |
71 | #define X86_AVX512_STATE_FLAVOR x86_AVX512_STATE32 | |
72 | #define MCONTEXT_SIZE_512 sizeof(struct __darwin_mcontext_avx512_32) | |
5ba3f43e A |
73 | #endif |
74 | #define VECTOR512 __m512 | |
75 | #define VEC512ALIGN __attribute ((aligned(64))) | |
76 | #define OPMASK uint64_t | |
77 | #define KARRAY_MAX 8 | |
78 | static inline void populate_zmm(void); | |
79 | static inline void populate_opmask(void); | |
80 | static inline void check_zmm(void); | |
0a7de745 A |
81 | VECTOR512 vec512array0[ZMM_MAX] VEC512ALIGN; |
82 | VECTOR512 vec512array1[ZMM_MAX] VEC512ALIGN; | |
83 | VECTOR512 vec512array2[ZMM_MAX] VEC512ALIGN; | |
84 | VECTOR512 vec512array3[ZMM_MAX] VEC512ALIGN; | |
5ba3f43e A |
85 | OPMASK karray0[8]; |
86 | OPMASK karray1[8]; | |
87 | OPMASK karray2[8]; | |
88 | OPMASK karray3[8]; | |
89 | ||
0a7de745 A |
90 | kern_return_t _thread_get_state_avx(thread_t thread, int flavor, thread_state_t state, |
91 | mach_msg_type_number_t *state_count); | |
92 | kern_return_t _thread_get_state_avx512(thread_t thread, int flavor, thread_state_t state, | |
93 | mach_msg_type_number_t *state_count); | |
5ba3f43e A |
94 | |
95 | /* | |
96 | * Common functions | |
97 | */ | |
98 | ||
99 | int | |
0a7de745 A |
100 | memcmp_unoptimized(const void *s1, const void *s2, size_t n) |
101 | { | |
5ba3f43e A |
102 | if (n != 0) { |
103 | const unsigned char *p1 = s1, *p2 = s2; | |
104 | do { | |
0a7de745 A |
105 | if (*p1++ != *p2++) { |
106 | return *--p1 - *--p2; | |
107 | } | |
5ba3f43e A |
108 | } while (--n != 0); |
109 | } | |
0a7de745 | 110 | return 0; |
5ba3f43e A |
111 | } |
112 | ||
113 | void | |
0a7de745 A |
114 | start_timer(int seconds, void (*handler)(int, siginfo_t *, void *)) |
115 | { | |
5ba3f43e A |
116 | struct sigaction sigalrm_action = { |
117 | .sa_sigaction = handler, | |
118 | .sa_flags = SA_RESTART, | |
119 | .sa_mask = 0 | |
120 | }; | |
121 | struct itimerval timer = { | |
122 | .it_value.tv_sec = seconds, | |
123 | .it_value.tv_usec = 0, | |
124 | .it_interval.tv_sec = 0, | |
125 | .it_interval.tv_usec = 0 | |
126 | }; | |
127 | T_QUIET; T_WITH_ERRNO; | |
128 | T_ASSERT_NE(sigaction(SIGALRM, &sigalrm_action, NULL), -1, NULL); | |
129 | T_QUIET; T_WITH_ERRNO; | |
130 | T_ASSERT_NE(setitimer(ITIMER_REAL, &timer, NULL), -1, NULL); | |
131 | } | |
132 | ||
133 | void | |
0a7de745 A |
134 | require_avx(void) |
135 | { | |
136 | if ((_get_cpu_capabilities() & kHasAVX1_0) != kHasAVX1_0) { | |
5ba3f43e A |
137 | T_SKIP("AVX not supported on this system"); |
138 | } | |
139 | } | |
140 | ||
141 | void | |
0a7de745 A |
142 | require_avx512(void) |
143 | { | |
144 | if ((_get_cpu_capabilities() & kHasAVX512F) != kHasAVX512F) { | |
5ba3f43e A |
145 | T_SKIP("AVX-512 not supported on this system"); |
146 | } | |
147 | } | |
148 | ||
149 | /* | |
150 | * ymm functions | |
151 | */ | |
152 | ||
153 | static inline void | |
0a7de745 A |
154 | store_ymm(VECTOR256 *vec256array) |
155 | { | |
5ba3f43e | 156 | int i = 0; |
0a7de745 A |
157 | __asm__ volatile ("vmovaps %%ymm0, %0" :"=m" (vec256array[i])); |
158 | i++; __asm__ volatile ("vmovaps %%ymm1, %0" :"=m" (vec256array[i])); | |
159 | i++; __asm__ volatile ("vmovaps %%ymm2, %0" :"=m" (vec256array[i])); | |
160 | i++; __asm__ volatile ("vmovaps %%ymm3, %0" :"=m" (vec256array[i])); | |
161 | i++; __asm__ volatile ("vmovaps %%ymm4, %0" :"=m" (vec256array[i])); | |
162 | i++; __asm__ volatile ("vmovaps %%ymm5, %0" :"=m" (vec256array[i])); | |
163 | i++; __asm__ volatile ("vmovaps %%ymm6, %0" :"=m" (vec256array[i])); | |
164 | i++; __asm__ volatile ("vmovaps %%ymm7, %0" :"=m" (vec256array[i])); | |
165 | #if defined(__x86_64__) | |
166 | i++; __asm__ volatile ("vmovaps %%ymm8, %0" :"=m" (vec256array[i])); | |
167 | i++; __asm__ volatile ("vmovaps %%ymm9, %0" :"=m" (vec256array[i])); | |
168 | i++; __asm__ volatile ("vmovaps %%ymm10, %0" :"=m" (vec256array[i])); | |
169 | i++; __asm__ volatile ("vmovaps %%ymm11, %0" :"=m" (vec256array[i])); | |
170 | i++; __asm__ volatile ("vmovaps %%ymm12, %0" :"=m" (vec256array[i])); | |
171 | i++; __asm__ volatile ("vmovaps %%ymm13, %0" :"=m" (vec256array[i])); | |
172 | i++; __asm__ volatile ("vmovaps %%ymm14, %0" :"=m" (vec256array[i])); | |
173 | i++; __asm__ volatile ("vmovaps %%ymm15, %0" :"=m" (vec256array[i])); | |
174 | #endif | |
175 | } | |
176 | ||
177 | static inline void | |
178 | restore_ymm(VECTOR256 *vec256array) | |
179 | { | |
180 | VECTOR256 *p = vec256array; | |
181 | ||
182 | __asm__ volatile ("vmovaps %0, %%ymm0" :: "m" (*(__m256i*)p) : "ymm0"); p++; | |
183 | __asm__ volatile ("vmovaps %0, %%ymm1" :: "m" (*(__m256i*)p) : "ymm1"); p++; | |
184 | __asm__ volatile ("vmovaps %0, %%ymm2" :: "m" (*(__m256i*)p) : "ymm2"); p++; | |
185 | __asm__ volatile ("vmovaps %0, %%ymm3" :: "m" (*(__m256i*)p) : "ymm3"); p++; | |
186 | __asm__ volatile ("vmovaps %0, %%ymm4" :: "m" (*(__m256i*)p) : "ymm4"); p++; | |
187 | __asm__ volatile ("vmovaps %0, %%ymm5" :: "m" (*(__m256i*)p) : "ymm5"); p++; | |
188 | __asm__ volatile ("vmovaps %0, %%ymm6" :: "m" (*(__m256i*)p) : "ymm6"); p++; | |
189 | __asm__ volatile ("vmovaps %0, %%ymm7" :: "m" (*(__m256i*)p) : "ymm7"); | |
190 | ||
5ba3f43e | 191 | #if defined(__x86_64__) |
0a7de745 A |
192 | ++p; __asm__ volatile ("vmovaps %0, %%ymm8" :: "m" (*(__m256i*)p) : "ymm8"); p++; |
193 | __asm__ volatile ("vmovaps %0, %%ymm9" :: "m" (*(__m256i*)p) : "ymm9"); p++; | |
194 | __asm__ volatile ("vmovaps %0, %%ymm10" :: "m" (*(__m256i*)p) : "ymm10"); p++; | |
195 | __asm__ volatile ("vmovaps %0, %%ymm11" :: "m" (*(__m256i*)p) : "ymm11"); p++; | |
196 | __asm__ volatile ("vmovaps %0, %%ymm12" :: "m" (*(__m256i*)p) : "ymm12"); p++; | |
197 | __asm__ volatile ("vmovaps %0, %%ymm13" :: "m" (*(__m256i*)p) : "ymm13"); p++; | |
198 | __asm__ volatile ("vmovaps %0, %%ymm14" :: "m" (*(__m256i*)p) : "ymm14"); p++; | |
199 | __asm__ volatile ("vmovaps %0, %%ymm15" :: "m" (*(__m256i*)p) : "ymm15"); | |
5ba3f43e A |
200 | #endif |
201 | } | |
202 | ||
203 | static inline void | |
0a7de745 A |
204 | populate_ymm(void) |
205 | { | |
5ba3f43e A |
206 | int j; |
207 | uint32_t p[8] VEC256ALIGN; | |
208 | ||
0a7de745 | 209 | for (j = 0; j < (int) (sizeof(p) / sizeof(p[0])); j++) { |
5ba3f43e | 210 | p[j] = getpid(); |
0a7de745 | 211 | } |
5ba3f43e A |
212 | |
213 | p[0] = 0x22222222; | |
214 | p[7] = 0x77777777; | |
0a7de745 A |
215 | __asm__ volatile ("vmovaps %0, %%ymm0" :: "m" (*(__m256i*)p) : "ymm0"); |
216 | __asm__ volatile ("vmovaps %0, %%ymm1" :: "m" (*(__m256i*)p) : "ymm1"); | |
217 | __asm__ volatile ("vmovaps %0, %%ymm2" :: "m" (*(__m256i*)p) : "ymm2"); | |
218 | __asm__ volatile ("vmovaps %0, %%ymm3" :: "m" (*(__m256i*)p) : "ymm3"); | |
5ba3f43e A |
219 | |
220 | p[0] = 0x44444444; | |
221 | p[7] = 0xEEEEEEEE; | |
0a7de745 A |
222 | __asm__ volatile ("vmovaps %0, %%ymm4" :: "m" (*(__m256i*)p) : "ymm4"); |
223 | __asm__ volatile ("vmovaps %0, %%ymm5" :: "m" (*(__m256i*)p) : "ymm5"); | |
224 | __asm__ volatile ("vmovaps %0, %%ymm6" :: "m" (*(__m256i*)p) : "ymm6"); | |
225 | __asm__ volatile ("vmovaps %0, %%ymm7" :: "m" (*(__m256i*)p) : "ymm7"); | |
5ba3f43e A |
226 | |
227 | #if defined(__x86_64__) | |
228 | p[0] = 0x88888888; | |
229 | p[7] = 0xAAAAAAAA; | |
0a7de745 A |
230 | __asm__ volatile ("vmovaps %0, %%ymm8" :: "m" (*(__m256i*)p) : "ymm8"); |
231 | __asm__ volatile ("vmovaps %0, %%ymm9" :: "m" (*(__m256i*)p) : "ymm9"); | |
232 | __asm__ volatile ("vmovaps %0, %%ymm10" :: "m" (*(__m256i*)p) : "ymm10"); | |
233 | __asm__ volatile ("vmovaps %0, %%ymm11" :: "m" (*(__m256i*)p) : "ymm11"); | |
5ba3f43e A |
234 | |
235 | p[0] = 0xBBBBBBBB; | |
236 | p[7] = 0xCCCCCCCC; | |
0a7de745 A |
237 | __asm__ volatile ("vmovaps %0, %%ymm12" :: "m" (*(__m256i*)p) : "ymm12"); |
238 | __asm__ volatile ("vmovaps %0, %%ymm13" :: "m" (*(__m256i*)p) : "ymm13"); | |
239 | __asm__ volatile ("vmovaps %0, %%ymm14" :: "m" (*(__m256i*)p) : "ymm14"); | |
240 | __asm__ volatile ("vmovaps %0, %%ymm15" :: "m" (*(__m256i*)p) : "ymm15"); | |
5ba3f43e A |
241 | #endif |
242 | ||
243 | store_ymm(vec256array0); | |
244 | } | |
245 | ||
246 | void | |
0a7de745 A |
247 | vec256_to_string(VECTOR256 *vec, char *buf) |
248 | { | |
5ba3f43e A |
249 | unsigned int vec_idx = 0; |
250 | unsigned int buf_idx = 0; | |
251 | int ret = 0; | |
252 | ||
253 | for (vec_idx = 0; vec_idx < YMM_MAX; vec_idx++) { | |
254 | uint64_t a[4]; | |
255 | bcopy(&vec[vec_idx], &a[0], sizeof(a)); | |
256 | ret = sprintf( | |
257 | buf + buf_idx, | |
258 | "0x%016llx:%016llx:%016llx:%016llx\n", | |
259 | a[0], a[1], a[2], a[3] | |
0a7de745 | 260 | ); |
5ba3f43e A |
261 | T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sprintf()"); |
262 | buf_idx += ret; | |
263 | } | |
264 | } | |
265 | ||
266 | void | |
0a7de745 A |
267 | assert_ymm_eq(void *a, void *b, int c) |
268 | { | |
269 | if (memcmp_unoptimized(a, b, c)) { | |
5ba3f43e A |
270 | vec256_to_string(a, vec_str_buf); |
271 | T_LOG("Compare failed, vector A:\n%s", vec_str_buf); | |
272 | vec256_to_string(b, vec_str_buf); | |
273 | T_LOG("Compare failed, vector B:\n%s", vec_str_buf); | |
274 | T_ASSERT_FAIL("vectors not equal"); | |
275 | } | |
276 | } | |
277 | ||
278 | void | |
0a7de745 A |
279 | check_ymm(void) |
280 | { | |
5ba3f43e A |
281 | uint32_t *p = (uint32_t *) &vec256array1[7]; |
282 | store_ymm(vec256array1); | |
283 | if (p[0] == STOP_COOKIE_256) { | |
284 | return; | |
285 | } | |
286 | assert_ymm_eq(vec256array0, vec256array1, sizeof(vec256array0)); | |
287 | } | |
288 | ||
289 | static void | |
0a7de745 A |
290 | copy_ymm_state_to_vector(X86_AVX_STATE_T *sp, VECTOR256 *vp) |
291 | { | |
5ba3f43e A |
292 | int i; |
293 | struct __darwin_xmm_reg *xmm = &sp->__fpu_xmm0; | |
294 | struct __darwin_xmm_reg *ymmh = &sp->__fpu_ymmh0; | |
295 | ||
0a7de745 A |
296 | for (i = 0; i < YMM_MAX; i++) { |
297 | bcopy(&xmm[i], &vp[i], sizeof(*xmm)); | |
5ba3f43e A |
298 | bcopy(&ymmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*ymmh)), sizeof(*ymmh)); |
299 | } | |
300 | } | |
301 | ||
302 | static void | |
303 | ymm_sigalrm_handler(int signum __unused, siginfo_t *info __unused, void *ctx) | |
304 | { | |
305 | ucontext_t *contextp = (ucontext_t *) ctx; | |
306 | mcontext_t mcontext = contextp->uc_mcontext; | |
307 | X86_AVX_STATE_T *avx_state = (X86_AVX_STATE_T *) &mcontext->__fs; | |
308 | uint32_t *xp = (uint32_t *) &avx_state->__fpu_xmm7; | |
309 | uint32_t *yp = (uint32_t *) &avx_state->__fpu_ymmh7; | |
310 | ||
311 | T_LOG("Got SIGALRM"); | |
312 | ||
313 | /* Check for AVX state */ | |
314 | T_QUIET; | |
315 | T_ASSERT_GE(contextp->uc_mcsize, MCONTEXT_SIZE_256, "check context size"); | |
316 | ||
317 | /* Check that the state in the context is what's set and expected */ | |
318 | copy_ymm_state_to_vector(avx_state, vec256array3); | |
319 | assert_ymm_eq(vec256array3, vec256array0, sizeof(vec256array1)); | |
320 | ||
321 | /* Change the context and break the main loop */ | |
322 | xp[0] = STOP_COOKIE_256; | |
323 | yp[0] = STOP_COOKIE_256; | |
324 | checking = FALSE; | |
325 | } | |
326 | ||
0a7de745 A |
327 | kern_return_t |
328 | _thread_get_state_avx( | |
329 | thread_t thread, | |
330 | int flavor, | |
331 | thread_state_t state, /* pointer to OUT array */ | |
332 | mach_msg_type_number_t *state_count) /*IN/OUT*/ | |
333 | { | |
334 | kern_return_t rv; | |
335 | VECTOR256 ymms[YMM_MAX]; | |
336 | ||
337 | /* | |
338 | * We must save and restore the YMMs across thread_get_state() because | |
339 | * code in thread_get_state changes at least one xmm register AFTER the | |
340 | * thread_get_state has saved the state in userspace. While it's still | |
341 | * possible for something to muck with %xmms BEFORE making the mach | |
342 | * system call (and rendering this save/restore useless), that does not | |
343 | * currently occur, and since we depend on the avx state saved in the | |
344 | * thread_get_state to be the same as that manually copied from YMMs after | |
345 | * thread_get_state returns, we have to go through these machinations. | |
346 | */ | |
347 | store_ymm(ymms); | |
348 | ||
349 | rv = thread_get_state(thread, flavor, state, state_count); | |
350 | ||
351 | restore_ymm(ymms); | |
352 | ||
353 | return rv; | |
354 | } | |
355 | ||
5ba3f43e | 356 | void |
0a7de745 A |
357 | ymm_integrity(int time) |
358 | { | |
5ba3f43e A |
359 | mach_msg_type_number_t avx_count = X86_AVX_STATE_COUNT; |
360 | kern_return_t kret; | |
361 | X86_AVX_STATE_T avx_state, avx_state2; | |
362 | mach_port_t ts = mach_thread_self(); | |
363 | ||
364 | bzero(&avx_state, sizeof(avx_state)); | |
365 | bzero(&avx_state2, sizeof(avx_state)); | |
366 | ||
0a7de745 | 367 | kret = _thread_get_state_avx( |
5ba3f43e | 368 | ts, X86_AVX_STATE_FLAVOR, (thread_state_t)&avx_state, &avx_count |
0a7de745 | 369 | ); |
5ba3f43e A |
370 | |
371 | store_ymm(vec256array2); | |
372 | ||
373 | T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()"); | |
374 | vec256_to_string(vec256array2, vec_str_buf); | |
375 | T_LOG("Initial state:\n%s", vec_str_buf); | |
376 | ||
377 | copy_ymm_state_to_vector(&avx_state, vec256array1); | |
378 | assert_ymm_eq(vec256array2, vec256array1, sizeof(vec256array1)); | |
379 | ||
380 | populate_ymm(); | |
381 | ||
0a7de745 | 382 | kret = _thread_get_state_avx( |
5ba3f43e | 383 | ts, X86_AVX_STATE_FLAVOR, (thread_state_t)&avx_state2, &avx_count |
0a7de745 | 384 | ); |
5ba3f43e A |
385 | |
386 | store_ymm(vec256array2); | |
387 | ||
388 | T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()"); | |
389 | vec256_to_string(vec256array2, vec_str_buf); | |
390 | T_LOG("Populated state:\n%s", vec_str_buf); | |
391 | ||
392 | copy_ymm_state_to_vector(&avx_state2, vec256array1); | |
393 | assert_ymm_eq(vec256array2, vec256array1, sizeof(vec256array0)); | |
394 | ||
395 | T_LOG("Running for %ds…", time); | |
396 | start_timer(time, ymm_sigalrm_handler); | |
397 | ||
398 | /* re-populate because printing mucks up XMMs */ | |
399 | populate_ymm(); | |
400 | ||
401 | /* Check state until timer fires */ | |
0a7de745 | 402 | while (checking) { |
5ba3f43e A |
403 | check_ymm(); |
404 | } | |
405 | ||
406 | /* Check that the sig handler changed out AVX state */ | |
407 | store_ymm(vec256array1); | |
408 | ||
409 | uint32_t *p = (uint32_t *) &vec256array1[7]; | |
410 | if (p[0] != STOP_COOKIE_256 || | |
411 | p[4] != STOP_COOKIE_256) { | |
412 | vec256_to_string(vec256array1, vec_str_buf); | |
413 | T_ASSERT_FAIL("sigreturn failed to stick"); | |
414 | T_LOG("State:\n%s", vec_str_buf); | |
415 | } | |
416 | ||
417 | T_LOG("Ran for %ds", time); | |
418 | T_PASS("No ymm register corruption occurred"); | |
419 | } | |
420 | ||
421 | /* | |
422 | * zmm functions | |
423 | */ | |
424 | ||
425 | static inline void | |
0a7de745 A |
426 | store_opmask(OPMASK k[]) |
427 | { | |
428 | __asm__ volatile ("kmovq %%k0, %0" :"=m" (k[0])); | |
429 | __asm__ volatile ("kmovq %%k1, %0" :"=m" (k[1])); | |
430 | __asm__ volatile ("kmovq %%k2, %0" :"=m" (k[2])); | |
431 | __asm__ volatile ("kmovq %%k3, %0" :"=m" (k[3])); | |
432 | __asm__ volatile ("kmovq %%k4, %0" :"=m" (k[4])); | |
433 | __asm__ volatile ("kmovq %%k5, %0" :"=m" (k[5])); | |
434 | __asm__ volatile ("kmovq %%k6, %0" :"=m" (k[6])); | |
435 | __asm__ volatile ("kmovq %%k7, %0" :"=m" (k[7])); | |
5ba3f43e A |
436 | } |
437 | ||
438 | static inline void | |
0a7de745 A |
439 | store_zmm(VECTOR512 *vecarray) |
440 | { | |
5ba3f43e | 441 | int i = 0; |
0a7de745 A |
442 | __asm__ volatile ("vmovaps %%zmm0, %0" :"=m" (vecarray[i])); |
443 | i++; __asm__ volatile ("vmovaps %%zmm1, %0" :"=m" (vecarray[i])); | |
444 | i++; __asm__ volatile ("vmovaps %%zmm2, %0" :"=m" (vecarray[i])); | |
445 | i++; __asm__ volatile ("vmovaps %%zmm3, %0" :"=m" (vecarray[i])); | |
446 | i++; __asm__ volatile ("vmovaps %%zmm4, %0" :"=m" (vecarray[i])); | |
447 | i++; __asm__ volatile ("vmovaps %%zmm5, %0" :"=m" (vecarray[i])); | |
448 | i++; __asm__ volatile ("vmovaps %%zmm6, %0" :"=m" (vecarray[i])); | |
449 | i++; __asm__ volatile ("vmovaps %%zmm7, %0" :"=m" (vecarray[i])); | |
5ba3f43e | 450 | #if defined(__x86_64__) |
0a7de745 A |
451 | i++; __asm__ volatile ("vmovaps %%zmm8, %0" :"=m" (vecarray[i])); |
452 | i++; __asm__ volatile ("vmovaps %%zmm9, %0" :"=m" (vecarray[i])); | |
453 | i++; __asm__ volatile ("vmovaps %%zmm10, %0" :"=m" (vecarray[i])); | |
454 | i++; __asm__ volatile ("vmovaps %%zmm11, %0" :"=m" (vecarray[i])); | |
455 | i++; __asm__ volatile ("vmovaps %%zmm12, %0" :"=m" (vecarray[i])); | |
456 | i++; __asm__ volatile ("vmovaps %%zmm13, %0" :"=m" (vecarray[i])); | |
457 | i++; __asm__ volatile ("vmovaps %%zmm14, %0" :"=m" (vecarray[i])); | |
458 | i++; __asm__ volatile ("vmovaps %%zmm15, %0" :"=m" (vecarray[i])); | |
459 | i++; __asm__ volatile ("vmovaps %%zmm16, %0" :"=m" (vecarray[i])); | |
460 | i++; __asm__ volatile ("vmovaps %%zmm17, %0" :"=m" (vecarray[i])); | |
461 | i++; __asm__ volatile ("vmovaps %%zmm18, %0" :"=m" (vecarray[i])); | |
462 | i++; __asm__ volatile ("vmovaps %%zmm19, %0" :"=m" (vecarray[i])); | |
463 | i++; __asm__ volatile ("vmovaps %%zmm20, %0" :"=m" (vecarray[i])); | |
464 | i++; __asm__ volatile ("vmovaps %%zmm21, %0" :"=m" (vecarray[i])); | |
465 | i++; __asm__ volatile ("vmovaps %%zmm22, %0" :"=m" (vecarray[i])); | |
466 | i++; __asm__ volatile ("vmovaps %%zmm23, %0" :"=m" (vecarray[i])); | |
467 | i++; __asm__ volatile ("vmovaps %%zmm24, %0" :"=m" (vecarray[i])); | |
468 | i++; __asm__ volatile ("vmovaps %%zmm25, %0" :"=m" (vecarray[i])); | |
469 | i++; __asm__ volatile ("vmovaps %%zmm26, %0" :"=m" (vecarray[i])); | |
470 | i++; __asm__ volatile ("vmovaps %%zmm27, %0" :"=m" (vecarray[i])); | |
471 | i++; __asm__ volatile ("vmovaps %%zmm28, %0" :"=m" (vecarray[i])); | |
472 | i++; __asm__ volatile ("vmovaps %%zmm29, %0" :"=m" (vecarray[i])); | |
473 | i++; __asm__ volatile ("vmovaps %%zmm30, %0" :"=m" (vecarray[i])); | |
474 | i++; __asm__ volatile ("vmovaps %%zmm31, %0" :"=m" (vecarray[i])); | |
5ba3f43e A |
475 | #endif |
476 | } | |
477 | ||
478 | static inline void | |
0a7de745 A |
479 | restore_zmm(VECTOR512 *vecarray) |
480 | { | |
481 | VECTOR512 *p = vecarray; | |
482 | ||
483 | __asm__ volatile ("vmovaps %0, %%zmm0" :: "m" (*(__m512i*)p) : "zmm0"); p++; | |
484 | __asm__ volatile ("vmovaps %0, %%zmm1" :: "m" (*(__m512i*)p) : "zmm1"); p++; | |
485 | __asm__ volatile ("vmovaps %0, %%zmm2" :: "m" (*(__m512i*)p) : "zmm2"); p++; | |
486 | __asm__ volatile ("vmovaps %0, %%zmm3" :: "m" (*(__m512i*)p) : "zmm3"); p++; | |
487 | __asm__ volatile ("vmovaps %0, %%zmm4" :: "m" (*(__m512i*)p) : "zmm4"); p++; | |
488 | __asm__ volatile ("vmovaps %0, %%zmm5" :: "m" (*(__m512i*)p) : "zmm5"); p++; | |
489 | __asm__ volatile ("vmovaps %0, %%zmm6" :: "m" (*(__m512i*)p) : "zmm6"); p++; | |
490 | __asm__ volatile ("vmovaps %0, %%zmm7" :: "m" (*(__m512i*)p) : "zmm7"); | |
491 | ||
492 | #if defined(__x86_64__) | |
493 | ++p; __asm__ volatile ("vmovaps %0, %%zmm8" :: "m" (*(__m512i*)p) : "zmm8"); p++; | |
494 | __asm__ volatile ("vmovaps %0, %%zmm9" :: "m" (*(__m512i*)p) : "zmm9"); p++; | |
495 | __asm__ volatile ("vmovaps %0, %%zmm10" :: "m" (*(__m512i*)p) : "zmm10"); p++; | |
496 | __asm__ volatile ("vmovaps %0, %%zmm11" :: "m" (*(__m512i*)p) : "zmm11"); p++; | |
497 | __asm__ volatile ("vmovaps %0, %%zmm12" :: "m" (*(__m512i*)p) : "zmm12"); p++; | |
498 | __asm__ volatile ("vmovaps %0, %%zmm13" :: "m" (*(__m512i*)p) : "zmm13"); p++; | |
499 | __asm__ volatile ("vmovaps %0, %%zmm14" :: "m" (*(__m512i*)p) : "zmm14"); p++; | |
500 | __asm__ volatile ("vmovaps %0, %%zmm15" :: "m" (*(__m512i*)p) : "zmm15"); p++; | |
501 | __asm__ volatile ("vmovaps %0, %%zmm16" :: "m" (*(__m512i*)p) : "zmm16"); p++; | |
502 | __asm__ volatile ("vmovaps %0, %%zmm17" :: "m" (*(__m512i*)p) : "zmm17"); p++; | |
503 | __asm__ volatile ("vmovaps %0, %%zmm18" :: "m" (*(__m512i*)p) : "zmm18"); p++; | |
504 | __asm__ volatile ("vmovaps %0, %%zmm19" :: "m" (*(__m512i*)p) : "zmm19"); p++; | |
505 | __asm__ volatile ("vmovaps %0, %%zmm20" :: "m" (*(__m512i*)p) : "zmm20"); p++; | |
506 | __asm__ volatile ("vmovaps %0, %%zmm21" :: "m" (*(__m512i*)p) : "zmm21"); p++; | |
507 | __asm__ volatile ("vmovaps %0, %%zmm22" :: "m" (*(__m512i*)p) : "zmm22"); p++; | |
508 | __asm__ volatile ("vmovaps %0, %%zmm23" :: "m" (*(__m512i*)p) : "zmm23"); p++; | |
509 | __asm__ volatile ("vmovaps %0, %%zmm24" :: "m" (*(__m512i*)p) : "zmm24"); p++; | |
510 | __asm__ volatile ("vmovaps %0, %%zmm25" :: "m" (*(__m512i*)p) : "zmm25"); p++; | |
511 | __asm__ volatile ("vmovaps %0, %%zmm26" :: "m" (*(__m512i*)p) : "zmm26"); p++; | |
512 | __asm__ volatile ("vmovaps %0, %%zmm27" :: "m" (*(__m512i*)p) : "zmm27"); p++; | |
513 | __asm__ volatile ("vmovaps %0, %%zmm28" :: "m" (*(__m512i*)p) : "zmm28"); p++; | |
514 | __asm__ volatile ("vmovaps %0, %%zmm29" :: "m" (*(__m512i*)p) : "zmm29"); p++; | |
515 | __asm__ volatile ("vmovaps %0, %%zmm30" :: "m" (*(__m512i*)p) : "zmm30"); p++; | |
516 | __asm__ volatile ("vmovaps %0, %%zmm31" :: "m" (*(__m512i*)p) : "zmm31"); | |
517 | #endif | |
518 | } | |
519 | ||
520 | static inline void | |
521 | populate_opmask(void) | |
522 | { | |
5ba3f43e A |
523 | uint64_t k[8]; |
524 | ||
0a7de745 A |
525 | for (int j = 0; j < 8; j++) { |
526 | k[j] = ((uint64_t) getpid() << 32) + (0x11111111 * j); | |
527 | } | |
5ba3f43e | 528 | |
0a7de745 A |
529 | __asm__ volatile ("kmovq %0, %%k0" : :"m" (k[0])); |
530 | __asm__ volatile ("kmovq %0, %%k1" : :"m" (k[1])); | |
531 | __asm__ volatile ("kmovq %0, %%k2" : :"m" (k[2])); | |
532 | __asm__ volatile ("kmovq %0, %%k3" : :"m" (k[3])); | |
533 | __asm__ volatile ("kmovq %0, %%k4" : :"m" (k[4])); | |
534 | __asm__ volatile ("kmovq %0, %%k5" : :"m" (k[5])); | |
535 | __asm__ volatile ("kmovq %0, %%k6" : :"m" (k[6])); | |
536 | __asm__ volatile ("kmovq %0, %%k7" : :"m" (k[7])); | |
5ba3f43e A |
537 | |
538 | store_opmask(karray0); | |
539 | } | |
540 | ||
0a7de745 A |
541 | kern_return_t |
542 | _thread_get_state_avx512( | |
543 | thread_t thread, | |
544 | int flavor, | |
545 | thread_state_t state, /* pointer to OUT array */ | |
546 | mach_msg_type_number_t *state_count) /*IN/OUT*/ | |
547 | { | |
548 | kern_return_t rv; | |
549 | VECTOR512 zmms[ZMM_MAX]; | |
550 | ||
551 | /* | |
552 | * We must save and restore the ZMMs across thread_get_state() because | |
553 | * code in thread_get_state changes at least one xmm register AFTER the | |
554 | * thread_get_state has saved the state in userspace. While it's still | |
555 | * possible for something to muck with %XMMs BEFORE making the mach | |
556 | * system call (and rendering this save/restore useless), that does not | |
557 | * currently occur, and since we depend on the avx512 state saved in the | |
558 | * thread_get_state to be the same as that manually copied from ZMMs after | |
559 | * thread_get_state returns, we have to go through these machinations. | |
560 | */ | |
561 | store_zmm(zmms); | |
562 | ||
563 | rv = thread_get_state(thread, flavor, state, state_count); | |
564 | ||
565 | restore_zmm(zmms); | |
566 | ||
567 | return rv; | |
568 | } | |
569 | ||
5ba3f43e | 570 | static inline void |
0a7de745 A |
571 | populate_zmm(void) |
572 | { | |
5ba3f43e A |
573 | int j; |
574 | uint64_t p[8] VEC512ALIGN; | |
575 | ||
0a7de745 A |
576 | for (j = 0; j < (int) (sizeof(p) / sizeof(p[0])); j++) { |
577 | p[j] = ((uint64_t) getpid() << 32) + getpid(); | |
578 | } | |
5ba3f43e A |
579 | |
580 | p[0] = 0x0000000000000000ULL; | |
581 | p[2] = 0x4444444444444444ULL; | |
582 | p[4] = 0x8888888888888888ULL; | |
583 | p[7] = 0xCCCCCCCCCCCCCCCCULL; | |
0a7de745 A |
584 | __asm__ volatile ("vmovaps %0, %%zmm0" :: "m" (*(__m256i*)p)); |
585 | __asm__ volatile ("vmovaps %0, %%zmm1" :: "m" (*(__m512i*)p)); | |
586 | __asm__ volatile ("vmovaps %0, %%zmm2" :: "m" (*(__m512i*)p)); | |
587 | __asm__ volatile ("vmovaps %0, %%zmm3" :: "m" (*(__m512i*)p)); | |
588 | __asm__ volatile ("vmovaps %0, %%zmm4" :: "m" (*(__m512i*)p)); | |
589 | __asm__ volatile ("vmovaps %0, %%zmm5" :: "m" (*(__m512i*)p)); | |
590 | __asm__ volatile ("vmovaps %0, %%zmm6" :: "m" (*(__m512i*)p)); | |
591 | __asm__ volatile ("vmovaps %0, %%zmm7" :: "m" (*(__m512i*)p)); | |
5ba3f43e A |
592 | |
593 | #if defined(__x86_64__) | |
594 | p[0] = 0x1111111111111111ULL; | |
595 | p[2] = 0x5555555555555555ULL; | |
596 | p[4] = 0x9999999999999999ULL; | |
597 | p[7] = 0xDDDDDDDDDDDDDDDDULL; | |
0a7de745 A |
598 | __asm__ volatile ("vmovaps %0, %%zmm8" :: "m" (*(__m512i*)p)); |
599 | __asm__ volatile ("vmovaps %0, %%zmm9" :: "m" (*(__m512i*)p)); | |
600 | __asm__ volatile ("vmovaps %0, %%zmm10" :: "m" (*(__m512i*)p)); | |
601 | __asm__ volatile ("vmovaps %0, %%zmm11" :: "m" (*(__m512i*)p)); | |
602 | __asm__ volatile ("vmovaps %0, %%zmm12" :: "m" (*(__m512i*)p)); | |
603 | __asm__ volatile ("vmovaps %0, %%zmm13" :: "m" (*(__m512i*)p)); | |
604 | __asm__ volatile ("vmovaps %0, %%zmm14" :: "m" (*(__m512i*)p)); | |
605 | __asm__ volatile ("vmovaps %0, %%zmm15" :: "m" (*(__m512i*)p)); | |
5ba3f43e A |
606 | |
607 | p[0] = 0x2222222222222222ULL; | |
608 | p[2] = 0x6666666666666666ULL; | |
609 | p[4] = 0xAAAAAAAAAAAAAAAAULL; | |
610 | p[7] = 0xEEEEEEEEEEEEEEEEULL; | |
0a7de745 A |
611 | __asm__ volatile ("vmovaps %0, %%zmm16" :: "m" (*(__m512i*)p)); |
612 | __asm__ volatile ("vmovaps %0, %%zmm17" :: "m" (*(__m512i*)p)); | |
613 | __asm__ volatile ("vmovaps %0, %%zmm18" :: "m" (*(__m512i*)p)); | |
614 | __asm__ volatile ("vmovaps %0, %%zmm19" :: "m" (*(__m512i*)p)); | |
615 | __asm__ volatile ("vmovaps %0, %%zmm20" :: "m" (*(__m512i*)p)); | |
616 | __asm__ volatile ("vmovaps %0, %%zmm21" :: "m" (*(__m512i*)p)); | |
617 | __asm__ volatile ("vmovaps %0, %%zmm22" :: "m" (*(__m512i*)p)); | |
618 | __asm__ volatile ("vmovaps %0, %%zmm23" :: "m" (*(__m512i*)p)); | |
5ba3f43e A |
619 | |
620 | p[0] = 0x3333333333333333ULL; | |
621 | p[2] = 0x7777777777777777ULL; | |
622 | p[4] = 0xBBBBBBBBBBBBBBBBULL; | |
623 | p[7] = 0xFFFFFFFFFFFFFFFFULL; | |
0a7de745 A |
624 | __asm__ volatile ("vmovaps %0, %%zmm24" :: "m" (*(__m512i*)p)); |
625 | __asm__ volatile ("vmovaps %0, %%zmm25" :: "m" (*(__m512i*)p)); | |
626 | __asm__ volatile ("vmovaps %0, %%zmm26" :: "m" (*(__m512i*)p)); | |
627 | __asm__ volatile ("vmovaps %0, %%zmm27" :: "m" (*(__m512i*)p)); | |
628 | __asm__ volatile ("vmovaps %0, %%zmm28" :: "m" (*(__m512i*)p)); | |
629 | __asm__ volatile ("vmovaps %0, %%zmm29" :: "m" (*(__m512i*)p)); | |
630 | __asm__ volatile ("vmovaps %0, %%zmm30" :: "m" (*(__m512i*)p)); | |
631 | __asm__ volatile ("vmovaps %0, %%zmm31" :: "m" (*(__m512i*)p)); | |
5ba3f43e A |
632 | #endif |
633 | ||
634 | store_zmm(vec512array0); | |
635 | } | |
636 | ||
637 | void | |
0a7de745 A |
638 | vec512_to_string(VECTOR512 *vec, char *buf) |
639 | { | |
5ba3f43e A |
640 | unsigned int vec_idx = 0; |
641 | unsigned int buf_idx = 0; | |
642 | int ret = 0; | |
643 | ||
644 | for (vec_idx = 0; vec_idx < ZMM_MAX; vec_idx++) { | |
645 | uint64_t a[8]; | |
646 | bcopy(&vec[vec_idx], &a[0], sizeof(a)); | |
647 | ret = sprintf( | |
648 | buf + buf_idx, | |
649 | "0x%016llx:%016llx:%016llx:%016llx:" | |
650 | "%016llx:%016llx:%016llx:%016llx%s", | |
651 | a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7], | |
652 | vec_idx < ZMM_MAX - 1 ? "\n" : "" | |
0a7de745 | 653 | ); |
5ba3f43e A |
654 | T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sprintf()"); |
655 | buf_idx += ret; | |
656 | } | |
657 | } | |
658 | ||
659 | void | |
0a7de745 A |
660 | opmask_to_string(OPMASK *karray, char *buf) |
661 | { | |
5ba3f43e A |
662 | unsigned int karray_idx = 0; |
663 | unsigned int buf_idx = 0; | |
664 | int ret = 0; | |
665 | ||
0a7de745 | 666 | for (karray_idx = 0; karray_idx < KARRAY_MAX; karray_idx++) { |
5ba3f43e A |
667 | ret = sprintf( |
668 | buf + buf_idx, | |
669 | "k%d: 0x%016llx%s", | |
670 | karray_idx, karray[karray_idx], | |
671 | karray_idx < KARRAY_MAX ? "\n" : "" | |
0a7de745 | 672 | ); |
5ba3f43e A |
673 | T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sprintf()"); |
674 | buf_idx += ret; | |
675 | } | |
676 | } | |
677 | ||
678 | static void | |
0a7de745 A |
679 | assert_zmm_eq(void *a, void *b, int c) |
680 | { | |
681 | if (memcmp_unoptimized(a, b, c)) { | |
5ba3f43e A |
682 | vec512_to_string(a, vec_str_buf); |
683 | T_LOG("Compare failed, vector A:\n%s", vec_str_buf); | |
684 | vec512_to_string(b, vec_str_buf); | |
685 | T_LOG("Compare failed, vector B:\n%s", vec_str_buf); | |
686 | T_ASSERT_FAIL("Vectors not equal"); | |
687 | } | |
688 | } | |
689 | ||
690 | static void | |
0a7de745 A |
691 | assert_opmask_eq(OPMASK *a, OPMASK *b) |
692 | { | |
5ba3f43e A |
693 | for (int i = 0; i < KARRAY_MAX; i++) { |
694 | if (a[i] != b[i]) { | |
695 | opmask_to_string(a, karray_str_buf); | |
696 | T_LOG("Compare failed, opmask A:\n%s", karray_str_buf); | |
697 | opmask_to_string(b, karray_str_buf); | |
698 | T_LOG("Compare failed, opmask B:\n%s", karray_str_buf); | |
699 | T_ASSERT_FAIL("opmasks not equal"); | |
700 | } | |
701 | } | |
702 | } | |
703 | ||
704 | void | |
0a7de745 A |
705 | check_zmm(void) |
706 | { | |
5ba3f43e A |
707 | uint64_t *p = (uint64_t *) &vec512array1[7]; |
708 | store_opmask(karray1); | |
709 | store_zmm(vec512array1); | |
710 | if (p[0] == STOP_COOKIE_512) { | |
711 | return; | |
712 | } | |
713 | ||
714 | assert_zmm_eq(vec512array0, vec512array1, sizeof(vec512array0)); | |
715 | assert_opmask_eq(karray0, karray1); | |
716 | } | |
717 | ||
0a7de745 A |
718 | static void |
719 | copy_state_to_opmask(X86_AVX512_STATE_T *sp, OPMASK *op) | |
720 | { | |
5ba3f43e A |
721 | OPMASK *k = (OPMASK *) &sp->__fpu_k0; |
722 | for (int i = 0; i < KARRAY_MAX; i++) { | |
723 | bcopy(&k[i], &op[i], sizeof(*op)); | |
724 | } | |
725 | } | |
726 | ||
0a7de745 A |
727 | static void |
728 | copy_zmm_state_to_vector(X86_AVX512_STATE_T *sp, VECTOR512 *vp) | |
729 | { | |
5ba3f43e A |
730 | int i; |
731 | struct __darwin_xmm_reg *xmm = &sp->__fpu_xmm0; | |
732 | struct __darwin_xmm_reg *ymmh = &sp->__fpu_ymmh0; | |
733 | struct __darwin_ymm_reg *zmmh = &sp->__fpu_zmmh0; | |
734 | #if defined(__x86_64__) | |
735 | struct __darwin_zmm_reg *zmm = &sp->__fpu_zmm16; | |
736 | ||
0a7de745 A |
737 | for (i = 0; i < ZMM_MAX / 2; i++) { |
738 | bcopy(&xmm[i], &vp[i], sizeof(*xmm)); | |
5ba3f43e A |
739 | bcopy(&ymmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*ymmh)), sizeof(*ymmh)); |
740 | bcopy(&zmmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*zmmh)), sizeof(*zmmh)); | |
0a7de745 | 741 | bcopy(&zmm[i], &vp[(ZMM_MAX / 2) + i], sizeof(*zmm)); |
5ba3f43e A |
742 | } |
743 | #else | |
0a7de745 A |
744 | for (i = 0; i < ZMM_MAX; i++) { |
745 | bcopy(&xmm[i], &vp[i], sizeof(*xmm)); | |
5ba3f43e A |
746 | bcopy(&ymmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*ymmh)), sizeof(*ymmh)); |
747 | bcopy(&zmmh[i], (void *) ((uint64_t)&vp[i] + sizeof(*zmmh)), sizeof(*zmmh)); | |
748 | } | |
749 | #endif | |
750 | } | |
751 | ||
752 | static void | |
753 | zmm_sigalrm_handler(int signum __unused, siginfo_t *info __unused, void *ctx) | |
754 | { | |
755 | ucontext_t *contextp = (ucontext_t *) ctx; | |
756 | mcontext_t mcontext = contextp->uc_mcontext; | |
757 | X86_AVX512_STATE_T *avx_state = (X86_AVX512_STATE_T *) &mcontext->__fs; | |
758 | uint64_t *xp = (uint64_t *) &avx_state->__fpu_xmm7; | |
759 | uint64_t *yp = (uint64_t *) &avx_state->__fpu_ymmh7; | |
760 | uint64_t *zp = (uint64_t *) &avx_state->__fpu_zmmh7; | |
761 | uint64_t *kp = (uint64_t *) &avx_state->__fpu_k0; | |
762 | ||
763 | /* Check for AVX512 state */ | |
764 | T_QUIET; | |
765 | T_ASSERT_GE(contextp->uc_mcsize, MCONTEXT_SIZE_512, "check context size"); | |
766 | ||
767 | /* Check that the state in the context is what's set and expected */ | |
768 | copy_zmm_state_to_vector(avx_state, vec512array3); | |
769 | assert_zmm_eq(vec512array3, vec512array0, sizeof(vec512array1)); | |
770 | copy_state_to_opmask(avx_state, karray3); | |
771 | assert_opmask_eq(karray3, karray0); | |
772 | ||
773 | /* Change the context and break the main loop */ | |
774 | xp[0] = STOP_COOKIE_512; | |
775 | yp[0] = STOP_COOKIE_512; | |
776 | zp[0] = STOP_COOKIE_512; | |
777 | kp[7] = STOP_COOKIE_512; | |
778 | checking = FALSE; | |
779 | } | |
780 | ||
781 | void | |
0a7de745 A |
782 | zmm_integrity(int time) |
783 | { | |
5ba3f43e A |
784 | mach_msg_type_number_t avx_count = X86_AVX512_STATE_COUNT; |
785 | kern_return_t kret; | |
786 | X86_AVX512_STATE_T avx_state, avx_state2; | |
787 | mach_port_t ts = mach_thread_self(); | |
788 | ||
789 | bzero(&avx_state, sizeof(avx_state)); | |
790 | bzero(&avx_state2, sizeof(avx_state)); | |
791 | ||
792 | store_zmm(vec512array2); | |
793 | store_opmask(karray2); | |
794 | ||
0a7de745 | 795 | kret = _thread_get_state_avx512( |
5ba3f43e | 796 | ts, X86_AVX512_STATE_FLAVOR, (thread_state_t)&avx_state, &avx_count |
0a7de745 | 797 | ); |
5ba3f43e A |
798 | |
799 | T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()"); | |
800 | vec512_to_string(vec512array2, vec_str_buf); | |
801 | opmask_to_string(karray2, karray_str_buf); | |
802 | T_LOG("Initial state:\n%s\n%s", vec_str_buf, karray_str_buf); | |
803 | ||
804 | copy_zmm_state_to_vector(&avx_state, vec512array1); | |
805 | assert_zmm_eq(vec512array2, vec512array1, sizeof(vec512array1)); | |
806 | copy_state_to_opmask(&avx_state, karray1); | |
807 | assert_opmask_eq(karray2, karray1); | |
808 | ||
809 | populate_zmm(); | |
810 | populate_opmask(); | |
811 | ||
0a7de745 | 812 | kret = _thread_get_state_avx512( |
5ba3f43e | 813 | ts, X86_AVX512_STATE_FLAVOR, (thread_state_t)&avx_state2, &avx_count |
0a7de745 | 814 | ); |
5ba3f43e A |
815 | |
816 | store_zmm(vec512array2); | |
817 | store_opmask(karray2); | |
818 | ||
819 | T_QUIET; T_ASSERT_MACH_SUCCESS(kret, "thread_get_state()"); | |
820 | vec512_to_string(vec512array2, vec_str_buf); | |
821 | opmask_to_string(karray2, karray_str_buf); | |
822 | T_LOG("Populated state:\n%s\n%s", vec_str_buf, karray_str_buf); | |
823 | ||
824 | copy_zmm_state_to_vector(&avx_state2, vec512array1); | |
825 | assert_zmm_eq(vec512array2, vec512array1, sizeof(vec512array1)); | |
826 | copy_state_to_opmask(&avx_state2, karray1); | |
827 | assert_opmask_eq(karray2, karray1); | |
828 | ||
829 | T_LOG("Running for %ds…", time); | |
830 | start_timer(time, zmm_sigalrm_handler); | |
831 | ||
832 | /* re-populate because printing mucks up XMMs */ | |
833 | populate_zmm(); | |
834 | populate_opmask(); | |
835 | ||
836 | /* Check state until timer fires */ | |
0a7de745 | 837 | while (checking) { |
5ba3f43e A |
838 | check_zmm(); |
839 | } | |
840 | ||
841 | /* Check that the sig handler changed our AVX state */ | |
842 | store_zmm(vec512array1); | |
843 | store_opmask(karray1); | |
844 | ||
845 | uint64_t *p = (uint64_t *) &vec512array1[7]; | |
846 | if (p[0] != STOP_COOKIE_512 || | |
847 | p[2] != STOP_COOKIE_512 || | |
848 | p[4] != STOP_COOKIE_512 || | |
849 | karray1[7] != STOP_COOKIE_512) { | |
850 | vec512_to_string(vec512array1, vec_str_buf); | |
851 | opmask_to_string(karray1, karray_str_buf); | |
852 | T_ASSERT_FAIL("sigreturn failed to stick"); | |
853 | T_LOG("State:\n%s\n%s", vec_str_buf, karray_str_buf); | |
854 | } | |
855 | ||
856 | T_LOG("Ran for %ds", time); | |
857 | T_PASS("No zmm register corruption occurred"); | |
858 | } | |
859 | ||
860 | /* | |
861 | * Main test declarations | |
862 | */ | |
863 | T_DECL(ymm_integrity, | |
0a7de745 A |
864 | "Quick soak test to verify that AVX " |
865 | "register state is maintained correctly", | |
866 | T_META_TIMEOUT(NORMAL_RUN_TIME + TIMEOUT_OVERHEAD)) { | |
5ba3f43e A |
867 | require_avx(); |
868 | ymm_integrity(NORMAL_RUN_TIME); | |
869 | } | |
870 | ||
871 | T_DECL(ymm_integrity_stress, | |
0a7de745 A |
872 | "Extended soak test to verify that AVX " |
873 | "register state is maintained correctly", | |
874 | T_META_TIMEOUT(LONG_RUN_TIME + TIMEOUT_OVERHEAD), | |
875 | T_META_ENABLED(false)) { | |
5ba3f43e A |
876 | require_avx(); |
877 | ymm_integrity(LONG_RUN_TIME); | |
878 | } | |
879 | ||
880 | T_DECL(zmm_integrity, | |
0a7de745 A |
881 | "Quick soak test to verify that AVX-512 " |
882 | "register state is maintained correctly", | |
883 | T_META_TIMEOUT(LONG_RUN_TIME + TIMEOUT_OVERHEAD)) { | |
5ba3f43e A |
884 | require_avx512(); |
885 | zmm_integrity(NORMAL_RUN_TIME); | |
886 | } | |
887 | ||
888 | T_DECL(zmm_integrity_stress, | |
0a7de745 A |
889 | "Extended soak test to verify that AVX-512 " |
890 | "register state is maintained correctly", | |
891 | T_META_TIMEOUT(NORMAL_RUN_TIME + TIMEOUT_OVERHEAD), | |
892 | T_META_ENABLED(false)) { | |
5ba3f43e A |
893 | require_avx512(); |
894 | zmm_integrity(LONG_RUN_TIME); | |
895 | } |