]>
Commit | Line | Data |
---|---|---|
f427ee49 | 1 | // Copyright (c) 2017-2020 Apple Computer, Inc. All rights reserved. |
5ba3f43e | 2 | |
39037602 | 3 | #include <darwintest.h> |
5ba3f43e | 4 | #include <darwintest_utils.h> |
39037602 A |
5 | #include <dispatch/dispatch.h> |
6 | #include <inttypes.h> | |
5ba3f43e A |
7 | #include <ktrace/session.h> |
8 | #include <ktrace/private.h> | |
f427ee49 A |
9 | #include <sys/kdebug.h> |
10 | #include <sys/syscall.h> | |
cb323159 | 11 | #include <kperf/kpc.h> |
39037602 A |
12 | #include <kperf/kperf.h> |
13 | #include <kperfdata/kpdecode.h> | |
14 | #include <os/assumes.h> | |
15 | #include <stdint.h> | |
16 | #include <sys/sysctl.h> | |
17 | ||
18 | #include "kperf_helpers.h" | |
cb323159 | 19 | #include "ktrace_helpers.h" |
39037602 | 20 | |
5ba3f43e | 21 | T_GLOBAL_META( |
f427ee49 | 22 | T_META_NAMESPACE("xnu.ktrace"), |
0a7de745 A |
23 | T_META_CHECK_LEAKS(false), |
24 | T_META_ASROOT(true)); | |
5ba3f43e A |
25 | |
26 | #define MAX_CPUS 64 | |
27 | #define MAX_THREADS 64 | |
28 | ||
29 | volatile static bool running_threads = true; | |
30 | ||
31 | static void * | |
32 | spinning_thread(void *semp) | |
33 | { | |
34 | T_QUIET; | |
35 | T_ASSERT_NOTNULL(semp, "semaphore passed to thread should not be NULL"); | |
36 | dispatch_semaphore_signal(*(dispatch_semaphore_t *)semp); | |
37 | ||
0a7de745 A |
38 | while (running_threads) { |
39 | ; | |
40 | } | |
5ba3f43e A |
41 | return NULL; |
42 | } | |
43 | ||
cb323159 A |
44 | #define PERF_STK_KHDR UINT32_C(0x25020014) |
45 | #define PERF_STK_UHDR UINT32_C(0x25020018) | |
46 | #define PERF_TMR_FIRE KDBG_EVENTID(DBG_PERF, 3, 0) | |
47 | #define PERF_TMR_HNDLR KDBG_EVENTID(DBG_PERF, 3, 2) | |
48 | #define PERF_TMR_PEND KDBG_EVENTID(DBG_PERF, 3, 3) | |
49 | #define PERF_TMR_SKIP KDBG_EVENTID(DBG_PERF, 3, 4) | |
50 | #define PERF_KPC_CONFIG KDBG_EVENTID(DBG_PERF, 6, 4) | |
51 | #define PERF_KPC_REG KDBG_EVENTID(DBG_PERF, 6, 5) | |
52 | #define PERF_KPC_REG32 KDBG_EVENTID(DBG_PERF, 6, 7) | |
53 | #define PERF_INSTR_DATA KDBG_EVENTID(DBG_PERF, 1, 17) | |
ea3f0419 | 54 | #define PERF_EVENT KDBG_EVENTID(DBG_PERF, 0, 0) |
5ba3f43e | 55 | |
f427ee49 A |
56 | #define SCHED_DISPATCH KDBG_EVENTID(DBG_MACH, DBG_MACH_SCHED, MACH_DISPATCH) |
57 | #define SCHED_SWITCH KDBG_EVENTID(DBG_MACH, DBG_MACH_SCHED, MACH_SCHED) | |
58 | #define SCHED_HANDOFF KDBG_EVENTID(DBG_MACH, DBG_MACH_SCHED, MACH_STACK_HANDOFF) | |
59 | #define SCHED_IDLE KDBG_EVENTID(DBG_MACH, DBG_MACH_SCHED, MACH_IDLE) | |
5ba3f43e A |
60 | |
61 | #define MP_CPUS_CALL UINT32_C(0x1900004) | |
62 | ||
63 | #define DISPATCH_AFTER_EVENT UINT32_C(0xfefffffc) | |
64 | #define TIMEOUT_SECS 10 | |
65 | ||
66 | #define TIMER_PERIOD_NS (1 * NSEC_PER_MSEC) | |
67 | ||
f427ee49 A |
68 | static void |
69 | start_tracing_with_timeout(ktrace_session_t s, unsigned int timeout_secs) | |
70 | { | |
71 | // Only set the timeout after we've seen an event that was traced by us. | |
72 | // This helps set a reasonable timeout after we're guaranteed to get a | |
73 | // few events. | |
74 | dispatch_queue_t q = dispatch_get_global_queue(QOS_CLASS_USER_INITIATED, 0); | |
75 | ||
76 | ktrace_events_single(s, DISPATCH_AFTER_EVENT, | |
77 | ^(__unused struct trace_point *tp) | |
78 | { | |
79 | T_LOG("arming timer to stop tracing after %d seconds", timeout_secs); | |
80 | dispatch_after(dispatch_time(DISPATCH_TIME_NOW, | |
81 | timeout_secs * NSEC_PER_SEC), q, ^{ | |
82 | T_LOG("ending tracing due to timeout"); | |
83 | ktrace_end(s, 0); | |
84 | }); | |
85 | }); | |
86 | ktrace_set_collection_interval(s, 100); | |
87 | ||
88 | T_ASSERT_POSIX_ZERO(ktrace_start(s, q), "start ktrace"); | |
89 | ||
90 | kdebug_trace(DISPATCH_AFTER_EVENT, 0, 0, 0, 0); | |
91 | T_LOG("trace point emitted"); | |
92 | } | |
93 | ||
94 | static void | |
95 | configure_kperf_timer_samplers(uint64_t period_ns, uint32_t samplers) | |
96 | { | |
97 | T_SETUPBEGIN; | |
98 | ||
99 | (void)kperf_action_count_set(1); | |
100 | T_QUIET; | |
101 | T_ASSERT_POSIX_SUCCESS(kperf_action_samplers_set(1, samplers), | |
102 | NULL); | |
103 | (void)kperf_timer_count_set(1); | |
104 | T_QUIET; | |
105 | T_ASSERT_POSIX_SUCCESS(kperf_timer_period_set(0, | |
106 | kperf_ns_to_ticks(period_ns)), NULL); | |
107 | T_QUIET; | |
108 | T_ASSERT_POSIX_SUCCESS(kperf_timer_action_set(0, 1), NULL); | |
109 | ||
110 | T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), "start kperf sampling"); | |
111 | ||
112 | T_SETUPEND; | |
113 | } | |
5ba3f43e | 114 | |
f427ee49 A |
115 | static double |
116 | timestamp_secs(ktrace_session_t s, uint64_t timestamp) | |
117 | { | |
118 | uint64_t ns = 0; | |
119 | T_QUIET; | |
120 | T_ASSERT_POSIX_ZERO(ktrace_convert_timestamp_to_nanoseconds(s, timestamp, | |
121 | &ns), NULL); | |
122 | return (double)ns / NSEC_PER_SEC; | |
123 | } | |
124 | ||
125 | #pragma mark - timers | |
126 | ||
127 | // Ensure that kperf is correctly sampling CPUs that are actively scheduling by | |
128 | // bringing up threads and ensuring that threads on-core are sampled by each | |
129 | // timer fire. | |
130 | ||
131 | T_DECL(kperf_sample_active_cpus, | |
132 | "make sure that kperf samples all active CPUs") | |
5ba3f43e | 133 | { |
cb323159 A |
134 | start_controlling_ktrace(); |
135 | ||
f427ee49 A |
136 | T_SETUPBEGIN; |
137 | ||
5ba3f43e A |
138 | int ncpus = dt_ncpu(); |
139 | T_QUIET; | |
140 | T_ASSERT_LT(ncpus, MAX_CPUS, | |
0a7de745 | 141 | "only supports up to %d CPUs", MAX_CPUS); |
5ba3f43e | 142 | T_LOG("found %d CPUs", ncpus); |
39037602 | 143 | |
5ba3f43e A |
144 | int nthreads = ncpus - 1; |
145 | T_QUIET; | |
146 | T_ASSERT_LT(nthreads, MAX_THREADS, | |
0a7de745 | 147 | "only supports up to %d threads", MAX_THREADS); |
5ba3f43e A |
148 | |
149 | static pthread_t threads[MAX_THREADS]; | |
150 | ||
5ba3f43e | 151 | ktrace_session_t s = ktrace_session_create(); |
f427ee49 A |
152 | T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create"); |
153 | ktrace_set_collection_interval(s, 100); | |
5ba3f43e A |
154 | |
155 | __block uint64_t nfires = 0; | |
156 | __block uint64_t nsamples = 0; | |
157 | static uint64_t idle_tids[MAX_CPUS] = { 0 }; | |
f427ee49 A |
158 | __block double sum_saturation = 0; |
159 | __block uint64_t last_nsamples = 0; | |
160 | ||
161 | // As a test debugging aid, take an additonal argument that specifies the | |
162 | // number of fires to stop tracing after. This also turns on additional | |
163 | // logging of scheduler trace events. | |
164 | int stopafter = 0; | |
165 | if (argc > 0) { | |
166 | stopafter = atoi(argv[0]); | |
167 | if (stopafter < 0) { | |
168 | T_ASSERT_FAIL("argument must be positive"); | |
169 | } | |
170 | } | |
171 | ||
172 | static uint64_t first_timestamp = 0; | |
173 | static uint64_t last_timestamp = 0; | |
174 | ktrace_events_any(s, ^(struct trace_point *tp) { | |
175 | if (first_timestamp == 0) { | |
176 | first_timestamp = tp->timestamp; | |
177 | } | |
178 | last_timestamp = tp->timestamp; | |
179 | }); | |
5ba3f43e A |
180 | |
181 | ktrace_set_completion_handler(s, ^{ | |
182 | T_LOG("stopping threads"); | |
183 | ||
184 | running_threads = false; | |
185 | ||
186 | for (int i = 0; i < nthreads; i++) { | |
0a7de745 A |
187 | T_QUIET; |
188 | T_ASSERT_POSIX_ZERO(pthread_join(threads[i], NULL), NULL); | |
5ba3f43e A |
189 | } |
190 | ||
f427ee49 | 191 | double saturation = sum_saturation / nfires * 100; |
5ba3f43e | 192 | |
f427ee49 A |
193 | T_LOG("over %.1f seconds, saw %" PRIu64 " timer fires, %" PRIu64 |
194 | " samples, %g samples/fire, %.2f%% saturation", | |
195 | timestamp_secs(s, last_timestamp - first_timestamp), nfires, | |
196 | nsamples, (double)nsamples / (double)nfires, saturation); | |
197 | T_ASSERT_GT(saturation, 95.0, | |
198 | "saw reasonable percentage of full samples"); | |
5ba3f43e A |
199 | |
200 | T_END; | |
201 | }); | |
202 | ||
f427ee49 | 203 | // Track which threads are running on each CPU. |
5ba3f43e | 204 | static uint64_t tids_on_cpu[MAX_CPUS] = { 0 }; |
f427ee49 A |
205 | void (^switch_cb)(struct trace_point *, const char *name) = |
206 | ^(struct trace_point *tp, const char *name) { | |
5ba3f43e | 207 | uint64_t new_thread = tp->arg2; |
5ba3f43e | 208 | |
f427ee49 A |
209 | if (idle_tids[tp->cpuid] != new_thread) { |
210 | tids_on_cpu[tp->cpuid] = new_thread; | |
5ba3f43e A |
211 | } |
212 | ||
f427ee49 A |
213 | if (stopafter) { |
214 | T_LOG("%.7g: %s on %d: %llx", timestamp_secs(s, tp->timestamp), | |
215 | name, tp->cpuid, tp->arg2); | |
216 | } | |
5ba3f43e A |
217 | }; |
218 | ||
f427ee49 A |
219 | ktrace_events_single(s, SCHED_SWITCH, ^(struct trace_point *tp) { |
220 | switch_cb(tp, "switch"); | |
221 | }); | |
222 | ktrace_events_single(s, SCHED_HANDOFF, ^(struct trace_point *tp) { | |
223 | switch_cb(tp, "hndoff"); | |
224 | }); | |
5ba3f43e | 225 | |
f427ee49 | 226 | // Determine the thread IDs of the idle threads on each CPU. |
5ba3f43e | 227 | ktrace_events_single(s, SCHED_IDLE, ^(struct trace_point *tp) { |
f427ee49 A |
228 | if (tp->debugid & DBG_FUNC_END) { |
229 | return; | |
230 | } | |
5ba3f43e | 231 | tids_on_cpu[tp->cpuid] = 0; |
f427ee49 A |
232 | idle_tids[tp->cpuid] = tp->threadid; |
233 | if (stopafter) { | |
234 | T_LOG("%.7g: idle on %d: %llx", timestamp_secs(s, tp->timestamp), | |
235 | tp->cpuid, tp->threadid); | |
5ba3f43e | 236 | } |
5ba3f43e A |
237 | }); |
238 | ||
f427ee49 A |
239 | // On each timer fire, go through all the cores and mark any threads |
240 | // that should be sampled. | |
5ba3f43e A |
241 | |
242 | __block int last_fire_cpu = -1; | |
f427ee49 | 243 | static bool sample_missing[MAX_CPUS] = { false }; |
5ba3f43e A |
244 | static uint64_t tids_snap[MAX_CPUS] = { 0 }; |
245 | __block int nexpected = 0; | |
f427ee49 A |
246 | __block int nextra = 0; |
247 | __block int nidles = 0; | |
5ba3f43e A |
248 | |
249 | ktrace_events_single(s, PERF_TMR_FIRE, ^(struct trace_point *tp) { | |
f427ee49 A |
250 | T_QUIET; T_ASSERT_EQ((tp->debugid & DBG_FUNC_START), 0, |
251 | "no timer fire start events are allowed"); | |
5ba3f43e A |
252 | int last_expected = nexpected; |
253 | nfires++; | |
254 | ||
255 | nexpected = 0; | |
256 | for (int i = 0; i < ncpus; i++) { | |
f427ee49 A |
257 | if (sample_missing[i]) { |
258 | T_LOG("missed sample on CPU %d for thread %#llx from " | |
259 | "timer on CPU %d (expected %d samples)", | |
260 | tp->cpuid, tids_snap[i], last_fire_cpu, last_expected); | |
261 | sample_missing[i] = false; | |
5ba3f43e A |
262 | } |
263 | ||
f427ee49 A |
264 | if (tids_on_cpu[i] != 0) { |
265 | tids_snap[i] = tids_on_cpu[i]; | |
266 | sample_missing[i] = true; | |
267 | nexpected++; | |
5ba3f43e A |
268 | } |
269 | } | |
f427ee49 A |
270 | if (stopafter) { |
271 | T_LOG("%.7g: FIRE on %d: %d extra, %d idles", | |
272 | timestamp_secs(s, tp->timestamp), tp->cpuid, nextra, nidles); | |
273 | } | |
274 | ||
275 | if (nfires == 1) { | |
276 | return; | |
277 | } | |
278 | ||
279 | if (last_expected == 0) { | |
280 | sum_saturation += 1; | |
281 | } else { | |
282 | sum_saturation += (double)(nsamples - last_nsamples) / | |
283 | last_expected; | |
284 | } | |
285 | last_nsamples = nsamples; | |
286 | nextra = 0; | |
287 | nidles = 0; | |
5ba3f43e A |
288 | |
289 | T_QUIET; | |
290 | T_ASSERT_LT((int)tp->cpuid, ncpus, "timer fire should not occur on an IOP"); | |
291 | last_fire_cpu = (int)tp->cpuid; | |
5ba3f43e | 292 | |
f427ee49 A |
293 | if (stopafter && (uint64_t)stopafter == nfires) { |
294 | ktrace_end(s, 1); | |
5ba3f43e | 295 | } |
5ba3f43e | 296 | }); |
5ba3f43e | 297 | |
f427ee49 | 298 | // On the timer handler for each CPU, unset the missing sample bitmap. |
5ba3f43e A |
299 | |
300 | ktrace_events_single(s, PERF_TMR_HNDLR, ^(struct trace_point *tp) { | |
301 | nsamples++; | |
302 | if ((int)tp->cpuid > ncpus) { | |
f427ee49 | 303 | // Skip IOPs; they're not scheduling any relevant threads. |
0a7de745 | 304 | return; |
5ba3f43e A |
305 | } |
306 | ||
f427ee49 A |
307 | if (!sample_missing[tp->cpuid] && idle_tids[tp->cpuid] != 0) { |
308 | T_LOG("sampled additional thread %llx on CPU %d", tp->threadid, | |
309 | tp->cpuid); | |
310 | nextra++; | |
311 | } | |
312 | if (tp->threadid == idle_tids[tp->cpuid]) { | |
313 | T_LOG("sampled idle thread on CPU %d", tp->cpuid); | |
314 | nidles++; | |
315 | } | |
316 | sample_missing[tp->cpuid] = false; | |
5ba3f43e A |
317 | }); |
318 | ||
f427ee49 | 319 | configure_kperf_timer_samplers(TIMER_PERIOD_NS, KPERF_SAMPLER_KSTACK); |
5ba3f43e | 320 | |
f427ee49 | 321 | T_SETUPEND; |
5ba3f43e | 322 | |
f427ee49 | 323 | start_tracing_with_timeout(s, TIMEOUT_SECS); |
5ba3f43e | 324 | |
f427ee49 | 325 | // Create threads to bring up all of the CPUs. |
5ba3f43e | 326 | |
f427ee49 A |
327 | dispatch_semaphore_t thread_spinning = dispatch_semaphore_create(0); |
328 | ||
329 | for (int i = 0; i < nthreads; i++) { | |
330 | T_QUIET; | |
331 | T_ASSERT_POSIX_ZERO( | |
332 | pthread_create(&threads[i], NULL, &spinning_thread, | |
333 | &thread_spinning), NULL); | |
334 | dispatch_semaphore_wait(thread_spinning, DISPATCH_TIME_FOREVER); | |
335 | } | |
336 | ||
337 | T_LOG("spun up %d thread%s", nthreads, nthreads == 1 ? "" : "s"); | |
5ba3f43e A |
338 | |
339 | dispatch_main(); | |
340 | } | |
341 | ||
f427ee49 A |
342 | #define FIRES_THRESHOLD (5000) |
343 | ||
344 | T_DECL(kperf_timer_fires_enough_times, | |
345 | "ensure the correct number of timers fire in a period of time") | |
346 | { | |
347 | start_controlling_ktrace(); | |
348 | ||
349 | dispatch_semaphore_t thread_spinning = dispatch_semaphore_create(0); | |
350 | ||
351 | ktrace_session_t s = ktrace_session_create(); | |
352 | T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create"); | |
353 | ktrace_set_collection_interval(s, 100); | |
354 | ||
355 | __block uint64_t nfires = 0; | |
356 | __block uint64_t first_fire_ns = 0; | |
357 | __block uint64_t last_fire_ns = 0; | |
358 | ||
359 | int ncpus = dt_ncpu(); | |
360 | ||
361 | ktrace_events_single(s, PERF_TMR_FIRE, ^(struct trace_point *tp) { | |
362 | nfires++; | |
363 | if (first_fire_ns == 0) { | |
364 | ktrace_convert_timestamp_to_nanoseconds(s, tp->timestamp, | |
365 | &first_fire_ns); | |
366 | } | |
367 | ktrace_convert_timestamp_to_nanoseconds(s, tp->timestamp, | |
368 | &last_fire_ns); | |
369 | ||
370 | T_QUIET; T_ASSERT_LT((int)tp->cpuid, ncpus, | |
371 | "timer fire should not occur on an IOP"); | |
372 | if (nfires >= FIRES_THRESHOLD) { | |
373 | ktrace_end(s, 1); | |
374 | } | |
375 | }); | |
376 | ||
377 | configure_kperf_timer_samplers(TIMER_PERIOD_NS, KPERF_SAMPLER_KSTACK); | |
378 | ||
379 | pthread_t thread; | |
380 | T_QUIET; | |
381 | T_ASSERT_POSIX_ZERO(pthread_create(&thread, NULL, &spinning_thread, | |
382 | &thread_spinning), NULL); | |
383 | dispatch_semaphore_wait(thread_spinning, DISPATCH_TIME_FOREVER); | |
384 | ||
385 | ktrace_set_completion_handler(s, ^{ | |
386 | running_threads = false; | |
387 | ||
388 | double duration_secs = (double)(last_fire_ns - first_fire_ns) / | |
389 | NSEC_PER_SEC; | |
390 | T_LOG("stopping thread after %.2f seconds", duration_secs); | |
391 | ||
392 | T_QUIET; T_ASSERT_POSIX_ZERO(pthread_join(thread, NULL), NULL); | |
393 | ||
394 | T_LOG("saw %" PRIu64 " timer fires (%g fires/second)", nfires, | |
395 | (double)nfires / (double)duration_secs); | |
396 | double expected_nfires = duration_secs * NSEC_PER_SEC / TIMER_PERIOD_NS; | |
397 | T_LOG("expecting %g timer fires", expected_nfires); | |
398 | double nfires_seen_pct = expected_nfires / nfires * 100; | |
399 | T_ASSERT_GT(nfires_seen_pct, 95.0, | |
400 | "saw reasonable number of missed timer fires"); | |
401 | T_ASSERT_LT(nfires_seen_pct, 105.0, | |
402 | "saw reasonable number of extra timer fires"); | |
403 | ||
404 | T_END; | |
405 | }); | |
406 | ||
407 | start_tracing_with_timeout(s, TIMEOUT_SECS); | |
408 | ||
409 | dispatch_main(); | |
410 | } | |
411 | ||
412 | // kperf_timer_not_oversampling ensures that the profiling timer fires are | |
413 | // spaced apart by the programmed timer period. Otherwise, tools that rely on | |
414 | // sample count as a proxy for CPU usage will over-estimate. | |
415 | ||
416 | #define FIRE_PERIOD_THRESHOLD_NS \ | |
417 | (TIMER_PERIOD_NS - (uint64_t)(TIMER_PERIOD_NS * 0.05)) | |
418 | ||
419 | struct cirq { | |
420 | unsigned int nslots; | |
421 | unsigned int tail_slot; | |
422 | unsigned int slot_size; | |
423 | }; | |
424 | ||
425 | #define CIRQ_INIT(TYPE, NSLOTS) \ | |
426 | (struct cirq){ \ | |
427 | .nslots = NSLOTS, .tail_slot = 0, .slot_size = sizeof(TYPE), \ | |
428 | } | |
429 | ||
430 | static inline void * | |
431 | cirq_get(struct cirq *cq, unsigned int i) | |
432 | { | |
433 | return (char *)cq + sizeof(*cq) + (cq->slot_size * i); | |
434 | } | |
435 | ||
436 | static void * | |
437 | cirq_top(void *vcq) | |
438 | { | |
439 | struct cirq *cq = vcq; | |
440 | unsigned int tail_slot = cq->tail_slot; | |
441 | unsigned int top_slot = (tail_slot > 0 ? tail_slot : cq->nslots) - 1; | |
442 | return cirq_get(cq, top_slot); | |
443 | } | |
444 | ||
445 | static void * | |
446 | cirq_push(void *vcq) | |
447 | { | |
448 | struct cirq *cq = vcq; | |
449 | unsigned int tail_slot = cq->tail_slot; | |
450 | unsigned int next_slot = tail_slot == cq->nslots - 1 ? 0 : tail_slot + 1; | |
451 | cq->tail_slot = next_slot; | |
452 | return cirq_get(cq, tail_slot); | |
453 | } | |
454 | ||
455 | static void | |
456 | cirq_for(void *vcq, void (^iter)(void *elt)) | |
457 | { | |
458 | struct cirq *cq = vcq; | |
459 | for (unsigned int i = cq->tail_slot; i < cq->nslots; i++) { | |
460 | iter(cirq_get(cq, i)); | |
461 | } | |
462 | for (unsigned int i = 0; i < cq->tail_slot; i++) { | |
463 | iter(cirq_get(cq, i)); | |
464 | } | |
465 | } | |
466 | ||
467 | #define HISTORY_LEN 5 | |
468 | ||
469 | struct instval { | |
470 | uint64_t iv_instant_ns; | |
471 | uint64_t iv_val; | |
472 | }; | |
473 | ||
474 | struct cirq_instval { | |
475 | struct cirq cq; | |
476 | struct instval elts[HISTORY_LEN]; | |
477 | }; | |
478 | ||
479 | struct cirq_u64 { | |
480 | struct cirq cq; | |
481 | uint64_t elts[HISTORY_LEN]; | |
482 | }; | |
483 | ||
484 | struct cpu_oversample { | |
485 | struct cirq_instval timer_latencies; | |
486 | struct cirq_instval fire_latencies; | |
487 | }; | |
488 | ||
489 | static void | |
490 | cpu_oversample_log(struct cpu_oversample *cpu, unsigned int cpuid) | |
491 | { | |
492 | T_LOG("CPU %d timer latencies:", cpuid); | |
493 | __block int i = -HISTORY_LEN; | |
494 | cirq_for(&cpu->timer_latencies, ^(void *viv) { | |
495 | struct instval *iv = viv; | |
496 | T_LOG("\t%llu timer latency %d: %llu", iv->iv_instant_ns, i, | |
497 | iv->iv_val); | |
498 | i++; | |
499 | }); | |
500 | ||
501 | T_LOG("CPU %d fire latencies:", cpuid); | |
502 | i = -HISTORY_LEN; | |
503 | cirq_for(&cpu->fire_latencies, ^(void *viv) { | |
504 | struct instval *iv = viv; | |
505 | T_LOG("\t%llu fire latency %d: %llu", iv->iv_instant_ns, i, iv->iv_val); | |
506 | i++; | |
507 | }); | |
508 | } | |
509 | ||
510 | T_DECL(kperf_timer_not_oversampling, | |
511 | "ensure that time between fires is long enough") | |
512 | { | |
513 | start_controlling_ktrace(); | |
514 | ||
515 | ktrace_session_t s = ktrace_session_create(); | |
516 | T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create"); | |
517 | // Try not to perturb the system with more work. | |
518 | ktrace_set_collection_interval(s, 1000); | |
519 | __block uint64_t nfires = 0; | |
520 | __block uint64_t first_fire_ns = 0; | |
521 | __block uint64_t last_fire_ns = 0; | |
522 | __block unsigned int last_fire_cpuid = 0; | |
523 | ||
524 | int ncpus = dt_ncpu(); | |
525 | T_QUIET; T_ASSERT_GT(ncpus, 0, "should see positive number of CPUs"); | |
526 | ||
527 | struct cpu_oversample *per_cpu = calloc((unsigned int)ncpus, | |
528 | sizeof(per_cpu[0])); | |
529 | T_QUIET; T_WITH_ERRNO; | |
530 | T_ASSERT_NOTNULL(per_cpu, "allocated timer latency tracking"); | |
531 | for (int i = 0; i < ncpus; i++) { | |
532 | per_cpu[i].timer_latencies.cq = CIRQ_INIT(struct instval, HISTORY_LEN); | |
533 | per_cpu[i].fire_latencies.cq = CIRQ_INIT(struct instval, HISTORY_LEN); | |
534 | } | |
535 | ||
536 | __block bool in_stackshot = false; | |
537 | __block uint64_t last_stackshot_ns = 0; | |
538 | ||
539 | // Stackshots are the primary source of interrupt latency on the system. | |
540 | ktrace_events_single(s, KDBG_EVENTID(DBG_BSD, DBG_BSD_EXCP_SC, | |
541 | SYS_stack_snapshot_with_config), ^(struct trace_point *tp) { | |
542 | bool start = tp->debugid & DBG_FUNC_START; | |
543 | uint64_t cur_ns = relns_from_abs(s, tp->timestamp); | |
544 | T_LOG("%llu: %s stackshot syscall from process %s", | |
545 | cur_ns, start ? "start" : "finish", tp->command); | |
546 | in_stackshot = start; | |
547 | if (!start) { | |
548 | last_stackshot_ns = cur_ns; | |
549 | } | |
550 | }); | |
551 | ||
552 | struct cirq_u64 *fire_times = calloc(1, sizeof(*fire_times)); | |
553 | T_ASSERT_NOTNULL(fire_times, "allocated fire time tracking"); | |
554 | fire_times->cq = CIRQ_INIT(uint64_t, HISTORY_LEN); | |
555 | ||
556 | // Track the decrementer's latency values to find any unexpectedly long | |
557 | // interrupt latencies that could affect the firing cadence. | |
558 | ktrace_events_single(s, MACHDBG_CODE(DBG_MACH_EXCP_DECI, 0), | |
559 | ^(struct trace_point *tp) { | |
560 | uint64_t cur_ns = relns_from_abs(s, tp->timestamp); | |
561 | uint64_t latency_ns = ns_from_abs(s, 0 - tp->arg1); | |
562 | struct instval *latency = cirq_push(&per_cpu[tp->cpuid].timer_latencies); | |
563 | latency->iv_instant_ns = cur_ns; | |
564 | latency->iv_val = latency_ns; | |
565 | }); | |
566 | ||
567 | ktrace_events_single(s, PERF_TMR_FIRE, ^(struct trace_point *tp) { | |
568 | T_QUIET; T_ASSERT_LT((int)tp->cpuid, ncpus, | |
569 | "timer fire should not occur on an IOP"); | |
570 | ||
571 | nfires++; | |
572 | uint64_t cur_ns = relns_from_abs(s, tp->timestamp); | |
573 | uint64_t *fire_ns = cirq_push(fire_times); | |
574 | *fire_ns = cur_ns; | |
575 | ||
576 | struct cpu_oversample *cur_cpu = &per_cpu[tp->cpuid]; | |
577 | struct instval *last_timer_latency = cirq_top( | |
578 | &cur_cpu->timer_latencies); | |
579 | uint64_t timer_latency_ns = last_timer_latency->iv_val; | |
580 | ||
581 | if (first_fire_ns == 0) { | |
582 | first_fire_ns = cur_ns; | |
583 | } else { | |
584 | struct cpu_oversample *last_cpu = &per_cpu[last_fire_cpuid]; | |
585 | struct instval *last_latency = cirq_top(&last_cpu->fire_latencies); | |
586 | uint64_t last_fire_latency_ns = last_latency->iv_val; | |
587 | ||
588 | if (timer_latency_ns > TIMER_PERIOD_NS / 4) { | |
589 | T_LOG("%llu: long timer latency at fire: %llu", cur_ns, | |
590 | timer_latency_ns); | |
591 | } | |
592 | ||
593 | // Long interrupt latencies will cause the timer to miss its fire | |
594 | // time and report a fire past when it should have, making the next | |
595 | // period too short. Keep track of the latency as a leeway | |
596 | // adjustment for the subsequent fire. | |
597 | uint64_t fire_period_ns = cur_ns - last_fire_ns; | |
598 | uint64_t fire_period_adj_ns = fire_period_ns + | |
599 | last_fire_latency_ns + timer_latency_ns; | |
600 | bool too_short = fire_period_adj_ns < FIRE_PERIOD_THRESHOLD_NS; | |
601 | if (too_short) { | |
602 | T_LOG("%llu: period of timer fire %llu is %llu + %llu + %llu = " | |
603 | "%llu < %llu", | |
604 | cur_ns, nfires, fire_period_ns, last_fire_latency_ns, | |
605 | timer_latency_ns, fire_period_adj_ns, | |
606 | FIRE_PERIOD_THRESHOLD_NS); | |
607 | ||
608 | T_LOG("short profile timer fired on CPU %d", tp->cpuid); | |
609 | cpu_oversample_log(cur_cpu, tp->cpuid); | |
610 | ||
611 | if (cur_cpu == last_cpu) { | |
612 | T_LOG("fired back-to-back on CPU %d", tp->cpuid); | |
613 | } else { | |
614 | T_LOG("previous profile timer fired on CPU %d", | |
615 | last_fire_cpuid); | |
616 | cpu_oversample_log(last_cpu, last_fire_cpuid); | |
617 | } | |
618 | ||
619 | T_LOG("profile timer fires:"); | |
620 | cirq_for(fire_times, ^(void *vu64) { | |
621 | T_LOG("\tfire: %llu", *(uint64_t *)vu64); | |
622 | }); | |
623 | if (nfires < (unsigned int)ncpus) { | |
624 | T_LOG("ignoring timer fire %llu as context may be missing", | |
625 | nfires); | |
626 | } else { | |
627 | if (in_stackshot) { | |
628 | T_LOG("skipping assertion because stackshot is " | |
629 | "happening"); | |
630 | } else if (last_stackshot_ns != 0 && | |
631 | cur_ns > last_stackshot_ns && | |
632 | cur_ns - last_stackshot_ns < TIMER_PERIOD_NS) { | |
633 | T_LOG("skipping assertion because stackshot happened " | |
634 | "%" PRIu64 "ns ago", | |
635 | cur_ns - last_stackshot_ns); | |
636 | } else { | |
637 | T_ASSERT_FAIL("profiling period is shorter than " | |
638 | "expected with no stackshot interference"); | |
639 | } | |
640 | } | |
641 | } | |
642 | ||
643 | struct instval *latency = cirq_push(&cur_cpu->fire_latencies); | |
644 | latency->iv_instant_ns = cur_ns; | |
645 | latency->iv_val = timer_latency_ns; | |
646 | ||
647 | // Snapshot this timer fire's interrupt latency, so the next one | |
648 | // can make an adjustment to the period. | |
649 | last_fire_latency_ns = timer_latency_ns; | |
650 | } | |
651 | last_fire_ns = cur_ns; | |
652 | last_fire_cpuid = tp->cpuid; | |
653 | ||
654 | if (nfires >= FIRES_THRESHOLD) { | |
655 | ktrace_end(s, 1); | |
656 | } | |
657 | }); | |
658 | ||
659 | configure_kperf_timer_samplers(TIMER_PERIOD_NS, KPERF_SAMPLER_TINFO); | |
660 | ||
661 | ktrace_set_completion_handler(s, ^{ | |
662 | double duration_secs = (double)(last_fire_ns - first_fire_ns) / | |
663 | NSEC_PER_SEC; | |
664 | T_LOG("stopping trace after %.2f seconds", duration_secs); | |
665 | ||
666 | T_PASS("saw %" PRIu64 " timer fires (%g fires/second) without " | |
667 | "oversampling", nfires, (double)nfires / (double)duration_secs); | |
668 | ||
669 | T_END; | |
670 | }); | |
671 | ||
672 | start_tracing_with_timeout(s, 5); | |
673 | ||
674 | // Get all CPUs out of idle. | |
675 | uint64_t *counts = kpc_counterbuf_alloc(); | |
676 | (void)kpc_get_cpu_counters(true,KPC_CLASS_CONFIGURABLE_MASK, NULL, counts); | |
677 | free(counts); | |
678 | ||
679 | dispatch_main(); | |
680 | } | |
681 | ||
682 | T_DECL(kperf_timer_stress, "repeatedly enable and disable timers") | |
683 | { | |
684 | start_controlling_ktrace(); | |
685 | ||
686 | const int niters = 500; | |
687 | for (int i = 0; i < niters; i++) { | |
688 | configure_kperf_stacks_timer(-1, 1, true); | |
689 | T_QUIET; | |
690 | T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), "start kperf sampling"); | |
691 | usleep(2000); | |
692 | kperf_reset(); | |
693 | } | |
694 | T_LOG("configured kperf with a timer %d times", niters); | |
695 | } | |
696 | ||
697 | #pragma mark - kdebug triggers | |
39037602 A |
698 | |
699 | #define KDEBUG_TRIGGER_TIMEOUT_NS (10 * NSEC_PER_SEC) | |
700 | ||
cb323159 A |
701 | #define NON_TRIGGER_CLASS UINT32_C(0xfd) |
702 | #define NON_TRIGGER_SUBCLASS UINT32_C(0xff) | |
703 | #define NON_TRIGGER_CODE UINT32_C(0xff) | |
39037602 A |
704 | |
705 | #define NON_TRIGGER_EVENT \ | |
0a7de745 A |
706 | (KDBG_EVENTID(NON_TRIGGER_CLASS, NON_TRIGGER_SUBCLASS, \ |
707 | NON_TRIGGER_CODE)) | |
39037602 A |
708 | |
709 | static void | |
710 | expect_kdebug_trigger(const char *filter_desc, const uint32_t *debugids, | |
0a7de745 | 711 | unsigned int n_debugids) |
39037602 | 712 | { |
5ba3f43e A |
713 | __block int missing_kernel_stacks = 0; |
714 | __block int missing_user_stacks = 0; | |
715 | ktrace_session_t s; | |
716 | kperf_kdebug_filter_t filter; | |
717 | ||
718 | s = ktrace_session_create(); | |
f427ee49 A |
719 | T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create"); |
720 | ktrace_set_collection_interval(s, 100); | |
5ba3f43e A |
721 | |
722 | ktrace_events_single(s, PERF_STK_KHDR, ^(struct trace_point *tp) { | |
0a7de745 | 723 | missing_kernel_stacks--; |
cb323159 A |
724 | T_LOG("saw kernel stack with %" PRIu64 " frames, flags = %#" |
725 | PRIx64, tp->arg2, tp->arg1); | |
0a7de745 | 726 | }); |
5ba3f43e | 727 | ktrace_events_single(s, PERF_STK_UHDR, ^(struct trace_point *tp) { |
0a7de745 | 728 | missing_user_stacks--; |
cb323159 A |
729 | T_LOG("saw user stack with %" PRIu64 " frames, flags = %#" |
730 | PRIx64, tp->arg2, tp->arg1); | |
0a7de745 | 731 | }); |
5ba3f43e A |
732 | |
733 | for (unsigned int i = 0; i < n_debugids; i++) { | |
734 | ktrace_events_single(s, debugids[i], ^(struct trace_point *tp) { | |
0a7de745 A |
735 | missing_kernel_stacks++; |
736 | missing_user_stacks++; | |
737 | T_LOG("saw event with debugid 0x%" PRIx32, tp->debugid); | |
738 | }); | |
5ba3f43e A |
739 | } |
740 | ||
741 | ktrace_events_single(s, NON_TRIGGER_EVENT, | |
0a7de745 A |
742 | ^(__unused struct trace_point *tp) |
743 | { | |
744 | ktrace_end(s, 0); | |
745 | }); | |
5ba3f43e A |
746 | |
747 | ktrace_set_completion_handler(s, ^{ | |
0a7de745 A |
748 | T_EXPECT_LE(missing_kernel_stacks, 0, NULL); |
749 | T_EXPECT_LE(missing_user_stacks, 0, NULL); | |
5ba3f43e | 750 | |
0a7de745 A |
751 | ktrace_session_destroy(s); |
752 | T_END; | |
753 | }); | |
5ba3f43e | 754 | |
5ba3f43e A |
755 | kperf_reset(); |
756 | ||
757 | (void)kperf_action_count_set(1); | |
758 | T_ASSERT_POSIX_SUCCESS(kperf_action_samplers_set(1, | |
0a7de745 | 759 | KPERF_SAMPLER_KSTACK | KPERF_SAMPLER_USTACK), NULL); |
5ba3f43e A |
760 | |
761 | filter = kperf_kdebug_filter_create(); | |
762 | T_ASSERT_NOTNULL(filter, NULL); | |
763 | ||
764 | T_ASSERT_POSIX_SUCCESS(kperf_kdebug_action_set(1), NULL); | |
765 | T_ASSERT_POSIX_SUCCESS(kperf_kdebug_filter_add_desc(filter, filter_desc), | |
0a7de745 | 766 | NULL); |
5ba3f43e A |
767 | T_ASSERT_POSIX_SUCCESS(kperf_kdebug_filter_set(filter), NULL); |
768 | kperf_kdebug_filter_destroy(filter); | |
769 | ||
770 | T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), NULL); | |
771 | ||
772 | T_ASSERT_POSIX_ZERO(ktrace_start(s, dispatch_get_main_queue()), NULL); | |
773 | ||
f427ee49 | 774 | // Trace the triggering events. |
5ba3f43e A |
775 | |
776 | for (unsigned int i = 0; i < n_debugids; i++) { | |
777 | T_ASSERT_POSIX_SUCCESS(kdebug_trace(debugids[i], 0, 0, 0, 0), NULL); | |
778 | } | |
779 | ||
780 | T_ASSERT_POSIX_SUCCESS(kdebug_trace(NON_TRIGGER_EVENT, 0, 0, 0, 0), NULL); | |
781 | ||
782 | dispatch_after(dispatch_time(DISPATCH_TIME_NOW, KDEBUG_TRIGGER_TIMEOUT_NS), | |
0a7de745 A |
783 | dispatch_get_main_queue(), ^(void) |
784 | { | |
785 | ktrace_end(s, 1); | |
786 | }); | |
39037602 A |
787 | } |
788 | ||
cb323159 A |
789 | #define TRIGGER_CLASS UINT32_C(0xfe) |
790 | #define TRIGGER_CLASS_END UINT32_C(0xfd) | |
791 | #define TRIGGER_SUBCLASS UINT32_C(0xff) | |
792 | #define TRIGGER_CODE UINT32_C(0) | |
39037602 | 793 | #define TRIGGER_DEBUGID \ |
0a7de745 | 794 | (KDBG_EVENTID(TRIGGER_CLASS, TRIGGER_SUBCLASS, TRIGGER_CODE)) |
39037602 | 795 | |
f427ee49 | 796 | T_DECL(kperf_kdebug_trigger_classes, |
0a7de745 | 797 | "test that kdebug trigger samples on classes") |
39037602 | 798 | { |
cb323159 A |
799 | start_controlling_ktrace(); |
800 | ||
5ba3f43e A |
801 | const uint32_t class_debugids[] = { |
802 | KDBG_EVENTID(TRIGGER_CLASS, 1, 1), | |
803 | KDBG_EVENTID(TRIGGER_CLASS, 2, 1), | |
804 | KDBG_EVENTID(TRIGGER_CLASS_END, 1, 1) | DBG_FUNC_END, | |
805 | KDBG_EVENTID(TRIGGER_CLASS_END, 2, 1) | DBG_FUNC_END, | |
806 | }; | |
807 | ||
808 | expect_kdebug_trigger("C0xfe,C0xfdr", class_debugids, | |
0a7de745 | 809 | sizeof(class_debugids) / sizeof(class_debugids[0])); |
5ba3f43e | 810 | dispatch_main(); |
39037602 A |
811 | } |
812 | ||
f427ee49 | 813 | T_DECL(kperf_kdebug_trigger_subclasses, |
0a7de745 | 814 | "test that kdebug trigger samples on subclasses") |
39037602 | 815 | { |
cb323159 A |
816 | start_controlling_ktrace(); |
817 | ||
5ba3f43e A |
818 | const uint32_t subclass_debugids[] = { |
819 | KDBG_EVENTID(TRIGGER_CLASS, TRIGGER_SUBCLASS, 0), | |
820 | KDBG_EVENTID(TRIGGER_CLASS, TRIGGER_SUBCLASS, 1), | |
821 | KDBG_EVENTID(TRIGGER_CLASS_END, TRIGGER_SUBCLASS, 0) | DBG_FUNC_END, | |
822 | KDBG_EVENTID(TRIGGER_CLASS_END, TRIGGER_SUBCLASS, 1) | DBG_FUNC_END | |
823 | }; | |
824 | ||
825 | expect_kdebug_trigger("S0xfeff,S0xfdffr", subclass_debugids, | |
0a7de745 | 826 | sizeof(subclass_debugids) / sizeof(subclass_debugids[0])); |
5ba3f43e | 827 | dispatch_main(); |
39037602 A |
828 | } |
829 | ||
f427ee49 | 830 | T_DECL(kperf_kdebug_trigger_debugids, |
0a7de745 | 831 | "test that kdebug trigger samples on debugids") |
39037602 | 832 | { |
cb323159 A |
833 | start_controlling_ktrace(); |
834 | ||
5ba3f43e A |
835 | const uint32_t debugids[] = { |
836 | TRIGGER_DEBUGID | |
837 | }; | |
39037602 | 838 | |
5ba3f43e | 839 | expect_kdebug_trigger("D0xfeff0000", debugids, |
0a7de745 | 840 | sizeof(debugids) / sizeof(debugids[0])); |
5ba3f43e | 841 | dispatch_main(); |
39037602 A |
842 | } |
843 | ||
f427ee49 A |
844 | // TODO Set a single function specifier filter, expect not to trigger of all |
845 | // events from that class. | |
39037602 | 846 | |
cb323159 A |
847 | static void |
848 | reset_kperf(void) | |
849 | { | |
850 | (void)kperf_reset(); | |
851 | } | |
852 | ||
f427ee49 | 853 | T_DECL(kperf_kdbg_callstacks, |
0a7de745 | 854 | "test that the kdbg_callstacks samples on syscalls") |
39037602 | 855 | { |
cb323159 A |
856 | start_controlling_ktrace(); |
857 | ||
5ba3f43e A |
858 | ktrace_session_t s; |
859 | __block bool saw_user_stack = false; | |
39037602 | 860 | |
5ba3f43e | 861 | s = ktrace_session_create(); |
f427ee49 A |
862 | T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create"); |
863 | ktrace_set_collection_interval(s, 100); | |
39037602 | 864 | |
f427ee49 | 865 | // Make sure BSD events are traced in order to trigger samples on syscalls. |
0a7de745 | 866 | ktrace_events_class(s, DBG_BSD, ^void (__unused struct trace_point *tp) {}); |
39037602 | 867 | |
5ba3f43e A |
868 | ktrace_events_single(s, PERF_STK_UHDR, ^(__unused struct trace_point *tp) { |
869 | saw_user_stack = true; | |
870 | ktrace_end(s, 1); | |
871 | }); | |
39037602 | 872 | |
5ba3f43e A |
873 | ktrace_set_completion_handler(s, ^{ |
874 | ktrace_session_destroy(s); | |
39037602 | 875 | |
5ba3f43e | 876 | T_EXPECT_TRUE(saw_user_stack, |
0a7de745 | 877 | "saw user stack after configuring kdbg_callstacks"); |
5ba3f43e A |
878 | T_END; |
879 | }); | |
39037602 A |
880 | |
881 | #pragma clang diagnostic push | |
882 | #pragma clang diagnostic ignored "-Wdeprecated-declarations" | |
5ba3f43e | 883 | T_ASSERT_POSIX_SUCCESS(kperf_kdbg_callstacks_set(1), NULL); |
39037602 | 884 | #pragma clang diagnostic pop |
cb323159 | 885 | T_ATEND(reset_kperf); |
39037602 | 886 | |
5ba3f43e | 887 | T_ASSERT_POSIX_ZERO(ktrace_start(s, dispatch_get_main_queue()), NULL); |
39037602 | 888 | |
5ba3f43e | 889 | dispatch_after(dispatch_time(DISPATCH_TIME_NOW, 10 * NSEC_PER_SEC), |
0a7de745 | 890 | dispatch_get_main_queue(), ^(void) { |
5ba3f43e A |
891 | ktrace_end(s, 1); |
892 | }); | |
39037602 | 893 | |
5ba3f43e | 894 | dispatch_main(); |
39037602 A |
895 | } |
896 | ||
f427ee49 | 897 | #pragma mark - PET |
39037602 A |
898 | |
899 | #define STACKS_WAIT_DURATION_NS (3 * NSEC_PER_SEC) | |
900 | ||
901 | static void | |
f427ee49 | 902 | expect_stacks_traced(void (^setup)(ktrace_session_t s), void (^complete)(void)) |
39037602 | 903 | { |
5ba3f43e A |
904 | ktrace_session_t s; |
905 | ||
906 | s = ktrace_session_create(); | |
f427ee49 A |
907 | T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create"); |
908 | ktrace_set_collection_interval(s, 100); | |
909 | if (setup) { | |
910 | setup(s); | |
911 | } | |
5ba3f43e A |
912 | |
913 | __block unsigned int user_stacks = 0; | |
914 | __block unsigned int kernel_stacks = 0; | |
915 | ||
916 | ktrace_events_single(s, PERF_STK_UHDR, ^(__unused struct trace_point *tp) { | |
0a7de745 A |
917 | user_stacks++; |
918 | }); | |
5ba3f43e | 919 | ktrace_events_single(s, PERF_STK_KHDR, ^(__unused struct trace_point *tp) { |
0a7de745 A |
920 | kernel_stacks++; |
921 | }); | |
5ba3f43e A |
922 | |
923 | ktrace_set_completion_handler(s, ^(void) { | |
0a7de745 A |
924 | ktrace_session_destroy(s); |
925 | T_EXPECT_GT(user_stacks, 0U, NULL); | |
926 | T_EXPECT_GT(kernel_stacks, 0U, NULL); | |
f427ee49 | 927 | complete(); |
0a7de745 | 928 | }); |
5ba3f43e A |
929 | |
930 | T_QUIET; T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), NULL); | |
931 | ||
932 | T_ASSERT_POSIX_ZERO(ktrace_start(s, dispatch_get_main_queue()), NULL); | |
933 | ||
934 | dispatch_after(dispatch_time(DISPATCH_TIME_NOW, STACKS_WAIT_DURATION_NS), | |
0a7de745 A |
935 | dispatch_get_main_queue(), ^(void) |
936 | { | |
937 | kperf_reset(); | |
938 | ktrace_end(s, 0); | |
939 | }); | |
39037602 A |
940 | } |
941 | ||
f427ee49 | 942 | T_DECL(kperf_pet, "test that PET mode samples kernel and user stacks") |
39037602 | 943 | { |
cb323159 A |
944 | start_controlling_ktrace(); |
945 | ||
f427ee49 | 946 | configure_kperf_stacks_timer(-1, 10, false); |
5ba3f43e | 947 | T_ASSERT_POSIX_SUCCESS(kperf_timer_pet_set(0), NULL); |
39037602 | 948 | |
f427ee49 | 949 | expect_stacks_traced(NULL, ^(void) { |
0a7de745 A |
950 | T_END; |
951 | }); | |
39037602 | 952 | |
5ba3f43e | 953 | dispatch_main(); |
39037602 A |
954 | } |
955 | ||
f427ee49 | 956 | T_DECL(kperf_lightweight_pet, |
0a7de745 A |
957 | "test that lightweight PET mode samples kernel and user stacks", |
958 | T_META_ASROOT(true)) | |
39037602 | 959 | { |
cb323159 A |
960 | start_controlling_ktrace(); |
961 | ||
5ba3f43e | 962 | int set = 1; |
39037602 | 963 | |
f427ee49 | 964 | configure_kperf_stacks_timer(-1, 10, false); |
5ba3f43e | 965 | T_ASSERT_POSIX_SUCCESS(sysctlbyname("kperf.lightweight_pet", NULL, NULL, |
0a7de745 | 966 | &set, sizeof(set)), NULL); |
5ba3f43e | 967 | T_ASSERT_POSIX_SUCCESS(kperf_timer_pet_set(0), NULL); |
39037602 | 968 | |
f427ee49 A |
969 | __block uint64_t nfires = 0; |
970 | ||
971 | expect_stacks_traced(^(ktrace_session_t s) { | |
972 | ktrace_events_single(s, PERF_TMR_FIRE, ^(struct trace_point *tp) { | |
973 | nfires++; | |
974 | T_QUIET; | |
975 | T_ASSERT_EQ(tp->arg1, (uint64_t)0, | |
976 | "timer fire should have timer ID of 0"); | |
977 | T_QUIET; | |
978 | T_ASSERT_EQ(tp->arg2, (uint64_t)1, | |
979 | "timer fire should have PET bit set"); | |
980 | }); | |
981 | }, ^(void) { | |
982 | T_ASSERT_GT(nfires, (uint64_t)0, "timer fired at least once"); | |
0a7de745 A |
983 | T_END; |
984 | }); | |
39037602 | 985 | |
5ba3f43e | 986 | dispatch_main(); |
39037602 | 987 | } |
0a7de745 | 988 | |
f427ee49 | 989 | T_DECL(kperf_pet_stress, "repeatedly enable and disable PET mode") |
0a7de745 | 990 | { |
cb323159 A |
991 | start_controlling_ktrace(); |
992 | ||
f427ee49 A |
993 | const int niters = 500; |
994 | for (int i = 0; i < niters; i++) { | |
995 | configure_kperf_stacks_timer(-1, 1, true); | |
0a7de745 | 996 | T_QUIET; T_ASSERT_POSIX_SUCCESS(kperf_timer_pet_set(0), NULL); |
f427ee49 A |
997 | T_QUIET; |
998 | T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), "start kperf sampling"); | |
999 | usleep(2000); | |
0a7de745 A |
1000 | kperf_reset(); |
1001 | } | |
0a7de745 | 1002 | |
f427ee49 | 1003 | T_PASS("configured kperf PET %d times", niters); |
0a7de745 | 1004 | } |
cb323159 | 1005 | |
f427ee49 A |
1006 | #pragma mark - PMCs |
1007 | ||
1008 | T_DECL(kperf_pmc_config_only, | |
1009 | "shouldn't show PMC config events unless requested") | |
cb323159 A |
1010 | { |
1011 | start_controlling_ktrace(); | |
1012 | ||
1013 | __block bool saw_kpc_config = false; | |
1014 | __block bool saw_kpc_reg = false; | |
1015 | ||
1016 | ktrace_session_t s = ktrace_session_create(); | |
f427ee49 A |
1017 | T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create"); |
1018 | ktrace_set_collection_interval(s, 100); | |
cb323159 | 1019 | |
cb323159 A |
1020 | ktrace_events_single(s, PERF_KPC_CONFIG, |
1021 | ^(__unused struct trace_point *tp) { | |
1022 | saw_kpc_config = true; | |
1023 | }); | |
1024 | ktrace_events_single(s, PERF_KPC_REG, | |
1025 | ^(__unused struct trace_point *tp) { | |
1026 | saw_kpc_reg = true; | |
1027 | }); | |
1028 | ktrace_events_single(s, PERF_KPC_REG32, | |
1029 | ^(__unused struct trace_point *tp) { | |
1030 | saw_kpc_reg = true; | |
1031 | }); | |
1032 | ||
1033 | ktrace_set_completion_handler(s, ^{ | |
1034 | ktrace_session_destroy(s); | |
1035 | T_EXPECT_FALSE(saw_kpc_config, | |
1036 | "should see no KPC configs without sampler enabled"); | |
1037 | T_EXPECT_FALSE(saw_kpc_reg, | |
1038 | "should see no KPC registers without sampler enabled"); | |
1039 | T_END; | |
1040 | }); | |
1041 | ||
1042 | uint32_t nconfigs = kpc_get_config_count(KPC_CLASS_CONFIGURABLE_MASK); | |
1043 | uint64_t *config = calloc(nconfigs, sizeof(*config)); | |
1044 | config[0] = 0x02; | |
1045 | int ret = kpc_set_config(KPC_CLASS_CONFIGURABLE_MASK, config); | |
1046 | T_ASSERT_POSIX_SUCCESS(ret, "configured kpc"); | |
1047 | T_QUIET; | |
1048 | T_ASSERT_POSIX_SUCCESS(kpc_set_counting(KPC_CLASS_CONFIGURABLE_MASK), | |
1049 | "kpc_set_counting"); | |
1050 | ||
1051 | (void)kperf_action_count_set(1); | |
1052 | T_ATEND(reset_kperf); | |
1053 | T_QUIET; | |
1054 | T_ASSERT_POSIX_SUCCESS(kperf_action_samplers_set(1, KPERF_SAMPLER_PMC_CPU), | |
1055 | NULL); | |
1056 | ||
1057 | (void)kperf_timer_count_set(1); | |
1058 | T_QUIET; | |
1059 | T_ASSERT_POSIX_SUCCESS(kperf_timer_period_set(0, | |
1060 | kperf_ns_to_ticks(TIMER_PERIOD_NS)), NULL); | |
1061 | T_QUIET; | |
1062 | T_ASSERT_POSIX_SUCCESS(kperf_timer_action_set(0, 1), NULL); | |
1063 | ||
1064 | T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), "start kperf sampling"); | |
1065 | ||
1066 | T_ASSERT_POSIX_ZERO(ktrace_start(s, dispatch_get_main_queue()), NULL); | |
1067 | ||
1068 | dispatch_after(dispatch_time(DISPATCH_TIME_NOW, 10 * NSEC_PER_SEC), | |
1069 | dispatch_get_main_queue(), ^(void) { | |
1070 | ktrace_end(s, 1); | |
1071 | }); | |
1072 | ||
1073 | dispatch_main(); | |
1074 | } | |
1075 | ||
1076 | static void | |
1077 | skip_if_monotonic_unsupported(void) | |
1078 | { | |
1079 | int r; | |
1080 | int supported = 0; | |
1081 | size_t supported_size = sizeof(supported); | |
1082 | ||
1083 | r = sysctlbyname("kern.monotonic.supported", &supported, &supported_size, | |
1084 | NULL, 0); | |
1085 | if (r < 0) { | |
1086 | T_WITH_ERRNO; | |
1087 | T_SKIP("could not find \"kern.monotonic.supported\" sysctl"); | |
1088 | } | |
1089 | ||
1090 | if (!supported) { | |
1091 | T_SKIP("monotonic is not supported on this platform"); | |
1092 | } | |
1093 | } | |
1094 | ||
1095 | #define INSTRS_CYCLES_UPPER 500 | |
1096 | #define INSTRS_CYCLES_LOWER 50 | |
1097 | ||
f427ee49 A |
1098 | T_DECL(kperf_sample_instrs_cycles, |
1099 | "ensure instructions and cycles are sampled") | |
cb323159 A |
1100 | { |
1101 | skip_if_monotonic_unsupported(); | |
1102 | ||
1103 | start_controlling_ktrace(); | |
1104 | ||
1105 | ktrace_session_t sess = ktrace_session_create(); | |
f427ee49 A |
1106 | T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(sess, "ktrace_session_create"); |
1107 | ktrace_set_collection_interval(sess, 100); | |
cb323159 A |
1108 | |
1109 | __block uint64_t ninstrs_cycles = 0; | |
1110 | __block uint64_t nzeroes = 0; | |
1111 | ktrace_events_single(sess, PERF_INSTR_DATA, | |
1112 | ^(__unused struct trace_point *tp) { | |
1113 | ninstrs_cycles++; | |
1114 | if (tp->arg1 == 0) { | |
1115 | T_LOG("%llx (%s)\n", tp->threadid, tp->command); | |
1116 | nzeroes++; | |
1117 | } | |
1118 | if (ninstrs_cycles >= INSTRS_CYCLES_UPPER) { | |
1119 | ktrace_end(sess, 1); | |
1120 | } | |
1121 | }); | |
1122 | ||
1123 | ktrace_set_collection_interval(sess, 200); | |
1124 | ||
1125 | ktrace_set_completion_handler(sess, ^{ | |
1126 | T_EXPECT_GE(ninstrs_cycles, (uint64_t)INSTRS_CYCLES_LOWER, | |
1127 | "saw enough instructions and cycles events"); | |
1128 | T_EXPECT_EQ(nzeroes, UINT64_C(0), | |
1129 | "saw no events with 0 instructions"); | |
1130 | T_END; | |
1131 | }); | |
1132 | ||
1133 | (void)kperf_action_count_set(1); | |
1134 | T_ATEND(reset_kperf); | |
1135 | T_QUIET; | |
1136 | T_ASSERT_POSIX_SUCCESS(kperf_action_samplers_set(1, | |
1137 | KPERF_SAMPLER_TH_INSTRS_CYCLES), NULL); | |
1138 | ||
1139 | (void)kperf_timer_count_set(1); | |
1140 | T_QUIET; | |
1141 | T_ASSERT_POSIX_SUCCESS(kperf_timer_period_set(0, | |
1142 | kperf_ns_to_ticks(TIMER_PERIOD_NS)), NULL); | |
1143 | T_QUIET; | |
1144 | T_ASSERT_POSIX_SUCCESS(kperf_timer_action_set(0, 1), NULL); | |
1145 | ||
1146 | T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), "start kperf sampling"); | |
1147 | ||
1148 | T_ASSERT_POSIX_ZERO(ktrace_start(sess, dispatch_get_main_queue()), | |
1149 | NULL); | |
1150 | ||
1151 | dispatch_after(dispatch_time(DISPATCH_TIME_NOW, 10 * NSEC_PER_SEC), | |
1152 | dispatch_get_main_queue(), ^(void) { | |
1153 | ktrace_end(sess, 1); | |
1154 | }); | |
1155 | ||
1156 | dispatch_main(); | |
1157 | } |