]>
Commit | Line | Data |
---|---|---|
5ba3f43e A |
1 | /* |
2 | * Copyright (c) 2017 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <kern/assert.h> | |
30 | #include <kern/monotonic.h> | |
31 | #include <kern/thread.h> | |
32 | #include <machine/atomic.h> | |
33 | #include <machine/monotonic.h> | |
34 | #include <mach/mach_traps.h> | |
35 | #include <stdatomic.h> | |
36 | #include <sys/errno.h> | |
37 | ||
38 | bool mt_debug = false; | |
39 | _Atomic uint64_t mt_pmis = 0; | |
40 | _Atomic uint64_t mt_retrograde = 0; | |
41 | ||
42 | #define MT_KDBG_INSTRS_CYCLES(CODE) \ | |
43 | KDBG_EVENTID(DBG_MONOTONIC, DBG_MT_INSTRS_CYCLES, CODE) | |
44 | ||
45 | #define MT_KDBG_IC_CPU_CSWITCH MT_KDBG_INSTRS_CYCLES(1) | |
46 | ||
47 | /* | |
48 | * Updating the thread counters takes place in the context switch path, so it | |
49 | * cannot introduce too much overhead. Thus, updating takes no locks, instead | |
50 | * updating a generation count to an odd value to indicate that it's in the | |
51 | * critical section and that readers should wait until the generation count | |
52 | * returns to an even value. | |
53 | * | |
54 | * Reading the counters also needs to not see any "torn" states of the counters, | |
55 | * where a few of the counters are from a previous state and the rest are from | |
56 | * the current state. For this reason, the reader redrives the entire read | |
57 | * operation if it sees mismatching generation counts at the beginning and end | |
58 | * of reading. | |
59 | */ | |
60 | ||
61 | #define MAXSPINS 100 | |
62 | #define MAXRETRIES 10 | |
63 | ||
64 | int | |
65 | mt_fixed_thread_counts(thread_t thread, uint64_t *counts_out) | |
66 | { | |
67 | uint64_t start_gen, end_gen; | |
68 | uint64_t spins = 0, retries = 0; | |
69 | uint64_t counts[MT_CORE_NFIXED]; | |
70 | ||
71 | /* | |
72 | * Try to read a thread's counter values by ensuring its gen count is | |
73 | * even. If it's odd, it means that a thread is trying to update its | |
74 | * counters. | |
75 | * | |
76 | * Spin until the gen count is even. | |
77 | */ | |
78 | spin: | |
79 | start_gen = atomic_load_explicit(&thread->t_monotonic.mth_gen, | |
80 | memory_order_acquire); | |
81 | retry: | |
82 | if (start_gen & 1) { | |
83 | spins++; | |
84 | if (spins > MAXSPINS) { | |
85 | return EBUSY; | |
86 | } | |
87 | goto spin; | |
88 | } | |
89 | ||
90 | for (int i = 0; i < MT_CORE_NFIXED; i++) { | |
91 | counts[i] = thread->t_monotonic.mth_counts[i]; | |
92 | } | |
93 | ||
94 | /* | |
95 | * After reading the counters, check the gen count again. If it is | |
96 | * different from the value that we started with, the thread raced | |
97 | * writing its counters with us reading them. We need to redrive the | |
98 | * entire operation. | |
99 | * | |
100 | * Go back to check if the value we just read was even and try to read | |
101 | * again. | |
102 | */ | |
103 | end_gen = atomic_load_explicit(&thread->t_monotonic.mth_gen, | |
104 | memory_order_acquire); | |
105 | if (end_gen != start_gen) { | |
106 | retries++; | |
107 | if (retries > MAXRETRIES) { | |
108 | return EAGAIN; | |
109 | } | |
110 | start_gen = end_gen; | |
111 | goto retry; | |
112 | } | |
113 | ||
114 | /* | |
115 | * Only after getting a consistent snapshot of the counters should we | |
116 | * write them into the provided buffer. | |
117 | */ | |
118 | for (int i = 0; i < MT_CORE_NFIXED; i++) { | |
119 | counts_out[i] = counts[i]; | |
120 | } | |
121 | return 0; | |
122 | } | |
123 | ||
124 | static void mt_fixed_counts_internal(uint64_t *counts, uint64_t *counts_since); | |
125 | ||
126 | bool | |
127 | mt_update_thread(thread_t thread) | |
128 | { | |
129 | if (!mt_core_supported) { | |
130 | return false; | |
131 | } | |
132 | ||
133 | assert(ml_get_interrupts_enabled() == FALSE); | |
134 | ||
135 | uint64_t counts[MT_CORE_NFIXED], counts_since[MT_CORE_NFIXED]; | |
136 | mt_fixed_counts_internal(counts, counts_since); | |
137 | ||
138 | /* | |
139 | * Enter the update cycle by incrementing the gen count to be odd -- | |
140 | * this tells any readers to spin on the gen count, waiting for it to go | |
141 | * even. | |
142 | */ | |
143 | __assert_only uint64_t enter_gen = atomic_fetch_add_explicit( | |
144 | &thread->t_monotonic.mth_gen, 1, memory_order_release); | |
145 | /* | |
146 | * Should not have pre-empted a modification to the counts. | |
147 | */ | |
148 | assert((enter_gen & 1) == 0); | |
149 | ||
150 | for (int i = 0; i < MT_CORE_NFIXED; i++) { | |
151 | thread->t_monotonic.mth_counts[i] += counts_since[i]; | |
152 | } | |
153 | ||
154 | /* | |
155 | * Exit the update by making the gen count even again. Readers check | |
156 | * the gen count for equality, and will redrive the reads if the values | |
157 | * before and after reading don't match. | |
158 | */ | |
159 | __assert_only uint64_t exit_gen = atomic_fetch_add_explicit( | |
160 | &thread->t_monotonic.mth_gen, 1, memory_order_release); | |
161 | /* | |
162 | * Make sure no other writers came through behind us. | |
163 | */ | |
164 | assert(exit_gen == (enter_gen + 1)); | |
165 | ||
166 | return true; | |
167 | } | |
168 | ||
169 | void | |
170 | mt_sched_update(thread_t thread) | |
171 | { | |
172 | bool updated = mt_update_thread(thread); | |
173 | if (!updated) { | |
174 | return; | |
175 | } | |
176 | ||
177 | if (kdebug_debugid_explicitly_enabled(MT_KDBG_IC_CPU_CSWITCH)) { | |
178 | struct mt_cpu *mtc = mt_cur_cpu(); | |
179 | ||
180 | KDBG_RELEASE(MT_KDBG_IC_CPU_CSWITCH, | |
181 | #ifdef MT_CORE_INSTRS | |
182 | mtc->mtc_counts[MT_CORE_INSTRS], | |
183 | #else /* defined(MT_CORE_INSTRS) */ | |
184 | 0, | |
185 | #endif /* !defined(MT_CORE_INSTRS) */ | |
186 | mtc->mtc_counts[MT_CORE_CYCLES]); | |
187 | } | |
188 | } | |
189 | ||
190 | int | |
191 | mt_fixed_task_counts(task_t task, uint64_t *counts_out) | |
192 | { | |
193 | assert(task != TASK_NULL); | |
194 | assert(counts_out != NULL); | |
195 | ||
196 | uint64_t counts[MT_CORE_NFIXED]; | |
197 | if (!mt_core_supported) { | |
198 | for (int i = 0; i < MT_CORE_NFIXED; i++) { | |
199 | counts[i] = 0; | |
200 | } | |
201 | return 0; | |
202 | } | |
203 | ||
204 | task_lock(task); | |
205 | ||
206 | for (int i = 0; i < MT_CORE_NFIXED; i++) { | |
207 | counts[i] = task->task_monotonic.mtk_counts[i]; | |
208 | } | |
209 | ||
210 | uint64_t thread_counts[MT_CORE_NFIXED] = {}; | |
211 | thread_t thread = THREAD_NULL; | |
212 | thread_t curthread = current_thread(); | |
213 | bool needs_current = false; | |
214 | int r = 0; | |
215 | queue_iterate(&task->threads, thread, thread_t, task_threads) { | |
216 | /* | |
217 | * Get the current thread's counters after doing this | |
218 | * processing, without holding the task lock. | |
219 | */ | |
220 | if (thread == curthread) { | |
221 | needs_current = true; | |
222 | continue; | |
223 | } else { | |
224 | r = mt_fixed_thread_counts(thread, thread_counts); | |
225 | if (r) { | |
226 | goto error; | |
227 | } | |
228 | } | |
229 | ||
230 | for (int i = 0; i < MT_CORE_NFIXED; i++) { | |
231 | counts[i] += thread_counts[i]; | |
232 | } | |
233 | } | |
234 | ||
235 | task_unlock(task); | |
236 | ||
237 | if (needs_current) { | |
238 | mt_cur_thread_fixed_counts(thread_counts); | |
239 | } | |
240 | ||
241 | for (int i = 0; i < MT_CORE_NFIXED; i++) { | |
242 | if (needs_current) { | |
243 | counts[i] += thread_counts[i]; | |
244 | } | |
245 | counts_out[i] = counts[i]; | |
246 | } | |
247 | return 0; | |
248 | ||
249 | error: | |
250 | task_unlock(task); | |
251 | return r; | |
252 | } | |
253 | ||
254 | uint64_t | |
255 | mt_mtc_update_count(struct mt_cpu *mtc, unsigned int ctr) | |
256 | { | |
257 | uint64_t snap = mt_core_snap(ctr); | |
258 | if (snap < mtc->mtc_snaps[ctr]) { | |
259 | if (mt_debug) { | |
260 | kprintf("monotonic: cpu %d: thread %#llx: " | |
261 | "retrograde counter %u value: %llu, last read = %llu\n", | |
262 | cpu_number(), thread_tid(current_thread()), ctr, snap, | |
263 | mtc->mtc_snaps[ctr]); | |
264 | } | |
265 | (void)atomic_fetch_add_explicit(&mt_retrograde, 1, | |
266 | memory_order_relaxed); | |
267 | mtc->mtc_snaps[ctr] = snap; | |
268 | return 0; | |
269 | } | |
270 | ||
271 | uint64_t count = snap - mtc->mtc_snaps[ctr]; | |
272 | mtc->mtc_snaps[ctr] = snap; | |
273 | ||
274 | return count; | |
275 | } | |
276 | ||
277 | uint64_t | |
278 | mt_cpu_update_count(cpu_data_t *cpu, unsigned int ctr) | |
279 | { | |
280 | return mt_mtc_update_count(&cpu->cpu_monotonic, ctr); | |
281 | } | |
282 | ||
283 | static void | |
284 | mt_fixed_counts_internal(uint64_t *counts, uint64_t *counts_since) | |
285 | { | |
286 | assert(ml_get_interrupts_enabled() == FALSE); | |
287 | ||
288 | struct mt_cpu *mtc = mt_cur_cpu(); | |
289 | assert(mtc != NULL); | |
290 | ||
291 | mt_mtc_update_fixed_counts(mtc, counts, counts_since); | |
292 | } | |
293 | ||
294 | void | |
295 | mt_mtc_update_fixed_counts(struct mt_cpu *mtc, uint64_t *counts, | |
296 | uint64_t *counts_since) | |
297 | { | |
298 | if (!mt_core_supported) { | |
299 | return; | |
300 | } | |
301 | ||
302 | for (int i = 0; i < MT_CORE_NFIXED; i++) { | |
303 | uint64_t last_delta; | |
304 | uint64_t count; | |
305 | ||
306 | last_delta = mt_mtc_update_count(mtc, i); | |
307 | count = mtc->mtc_counts[i] + last_delta; | |
308 | ||
309 | if (counts) { | |
310 | counts[i] = count; | |
311 | } | |
312 | if (counts_since) { | |
313 | assert(counts != NULL); | |
314 | counts_since[i] = count - mtc->mtc_counts_last[i]; | |
315 | mtc->mtc_counts_last[i] = count; | |
316 | } | |
317 | ||
318 | mtc->mtc_counts[i] = count; | |
319 | } | |
320 | } | |
321 | ||
322 | void | |
323 | mt_update_fixed_counts(void) | |
324 | { | |
325 | assert(ml_get_interrupts_enabled() == FALSE); | |
326 | ||
327 | #if defined(__x86_64__) | |
328 | __builtin_ia32_lfence(); | |
329 | #elif defined(__arm__) || defined(__arm64__) | |
330 | __builtin_arm_isb(ISB_SY); | |
331 | #endif /* !defined(__x86_64__) && (defined(__arm__) || defined(__arm64__)) */ | |
332 | ||
333 | mt_fixed_counts_internal(NULL, NULL); | |
334 | } | |
335 | ||
336 | void | |
337 | mt_fixed_counts(uint64_t *counts) | |
338 | { | |
339 | #if defined(__x86_64__) | |
340 | __builtin_ia32_lfence(); | |
341 | #elif defined(__arm__) || defined(__arm64__) | |
342 | __builtin_arm_isb(ISB_SY); | |
343 | #endif /* !defined(__x86_64__) && (defined(__arm__) || defined(__arm64__)) */ | |
344 | ||
345 | int intrs_en = ml_set_interrupts_enabled(FALSE); | |
346 | mt_fixed_counts_internal(counts, NULL); | |
347 | ml_set_interrupts_enabled(intrs_en); | |
348 | } | |
349 | ||
350 | void | |
351 | mt_cur_thread_fixed_counts(uint64_t *counts) | |
352 | { | |
353 | if (!mt_core_supported) { | |
354 | for (int i = 0; i < MT_CORE_NFIXED; i++) { | |
355 | counts[i] = 0; | |
356 | } | |
357 | return; | |
358 | } | |
359 | ||
360 | thread_t curthread = current_thread(); | |
361 | int intrs_en = ml_set_interrupts_enabled(FALSE); | |
362 | (void)mt_update_thread(curthread); | |
363 | for (int i = 0; i < MT_CORE_NFIXED; i++) { | |
364 | counts[i] = curthread->t_monotonic.mth_counts[i]; | |
365 | } | |
366 | ml_set_interrupts_enabled(intrs_en); | |
367 | } | |
368 | ||
369 | void | |
370 | mt_cur_task_fixed_counts(uint64_t *counts) | |
371 | { | |
372 | task_t curtask = current_task(); | |
373 | ||
374 | mt_fixed_task_counts(curtask, counts); | |
375 | } | |
376 | ||
377 | /* FIXME these should only update the counter that is being accessed */ | |
378 | ||
379 | uint64_t | |
380 | mt_cur_thread_instrs(void) | |
381 | { | |
382 | #ifdef MT_CORE_INSTRS | |
383 | thread_t curthread = current_thread(); | |
384 | boolean_t intrs_en; | |
385 | uint64_t count; | |
386 | ||
387 | if (!mt_core_supported) { | |
388 | return 0; | |
389 | } | |
390 | ||
391 | intrs_en = ml_set_interrupts_enabled(FALSE); | |
392 | (void)mt_update_thread(curthread); | |
393 | count = curthread->t_monotonic.mth_counts[MT_CORE_INSTRS]; | |
394 | ml_set_interrupts_enabled(intrs_en); | |
395 | ||
396 | return count; | |
397 | #else /* defined(MT_CORE_INSTRS) */ | |
398 | return 0; | |
399 | #endif /* !defined(MT_CORE_INSTRS) */ | |
400 | } | |
401 | ||
402 | uint64_t | |
403 | mt_cur_thread_cycles(void) | |
404 | { | |
405 | thread_t curthread = current_thread(); | |
406 | boolean_t intrs_en; | |
407 | uint64_t count; | |
408 | ||
409 | if (!mt_core_supported) { | |
410 | return 0; | |
411 | } | |
412 | ||
413 | intrs_en = ml_set_interrupts_enabled(FALSE); | |
414 | (void)mt_update_thread(curthread); | |
415 | count = curthread->t_monotonic.mth_counts[MT_CORE_CYCLES]; | |
416 | ml_set_interrupts_enabled(intrs_en); | |
417 | ||
418 | return count; | |
419 | } | |
420 | ||
421 | uint64_t | |
422 | mt_cur_cpu_instrs(void) | |
423 | { | |
424 | #ifdef MT_CORE_INSTRS | |
425 | uint64_t counts[MT_CORE_NFIXED]; | |
426 | ||
427 | if (!mt_core_supported) { | |
428 | return 0; | |
429 | } | |
430 | ||
431 | mt_fixed_counts(counts); | |
432 | return counts[MT_CORE_INSTRS]; | |
433 | #else /* defined(MT_CORE_INSTRS) */ | |
434 | return 0; | |
435 | #endif /* !defined(MT_CORE_INSTRS) */ | |
436 | } | |
437 | ||
438 | uint64_t | |
439 | mt_cur_cpu_cycles(void) | |
440 | { | |
441 | uint64_t counts[MT_CORE_NFIXED]; | |
442 | ||
443 | if (!mt_core_supported) { | |
444 | return 0; | |
445 | } | |
446 | ||
447 | mt_fixed_counts(counts); | |
448 | return counts[MT_CORE_CYCLES]; | |
449 | } | |
450 | ||
451 | void | |
452 | mt_update_task(task_t task, thread_t thread) | |
453 | { | |
454 | task_lock_assert_owned(task); | |
455 | ||
456 | if (!mt_core_supported) { | |
457 | return; | |
458 | } | |
459 | ||
460 | for (int i = 0; i < MT_CORE_NFIXED; i++) { | |
461 | task->task_monotonic.mtk_counts[i] += thread->t_monotonic.mth_counts[i]; | |
462 | } | |
463 | } | |
464 | ||
465 | void | |
466 | mt_terminate_update(task_t task, thread_t thread) | |
467 | { | |
468 | mt_update_task(task, thread); | |
469 | } | |
470 | ||
471 | void | |
472 | mt_perfcontrol(uint64_t *instrs, uint64_t *cycles) | |
473 | { | |
474 | if (!mt_core_supported) { | |
475 | *instrs = 0; | |
476 | *cycles = 0; | |
477 | return; | |
478 | } | |
479 | ||
480 | struct mt_cpu *mtc = mt_cur_cpu(); | |
481 | ||
482 | /* | |
483 | * The performance controller queries the hardware directly, so provide the | |
484 | * last snapshot we took for the core. This is the value from when we | |
485 | * updated the thread counts. | |
486 | */ | |
487 | ||
488 | #ifdef MT_CORE_INSTRS | |
489 | *instrs = mtc->mtc_snaps[MT_CORE_INSTRS]; | |
490 | #else /* defined(MT_CORE_INSTRS) */ | |
491 | *instrs = 0; | |
492 | #endif /* !defined(MT_CORE_INSTRS) */ | |
493 | ||
494 | *cycles = mtc->mtc_snaps[MT_CORE_CYCLES]; | |
495 | } | |
496 | ||
497 | void | |
498 | mt_stackshot_thread(thread_t thread, uint64_t *instrs, uint64_t *cycles) | |
499 | { | |
500 | assert(mt_core_supported); | |
501 | ||
502 | #ifdef MT_CORE_INSTRS | |
503 | *instrs = thread->t_monotonic.mth_counts[MT_CORE_INSTRS]; | |
504 | #else /* defined(MT_CORE_INSTRS) */ | |
505 | *instrs = 0; | |
506 | #endif /* !defined(MT_CORE_INSTRS) */ | |
507 | ||
508 | *cycles = thread->t_monotonic.mth_counts[MT_CORE_CYCLES]; | |
509 | } | |
510 | ||
511 | void | |
512 | mt_stackshot_task(task_t task, uint64_t *instrs, uint64_t *cycles) | |
513 | { | |
514 | assert(mt_core_supported); | |
515 | ||
516 | #ifdef MT_CORE_INSTRS | |
517 | *instrs = task->task_monotonic.mtk_counts[MT_CORE_INSTRS]; | |
518 | #else /* defined(MT_CORE_INSTRS) */ | |
519 | *instrs = 0; | |
520 | #endif /* !defined(MT_CORE_INSTRS) */ | |
521 | ||
522 | *cycles = task->task_monotonic.mtk_counts[MT_CORE_CYCLES]; | |
523 | } |