]> git.saurik.com Git - apple/xnu.git/blame - osfmk/kperf/pet.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / osfmk / kperf / pet.c
CommitLineData
316670eb 1/*
f427ee49 2 * Copyright (c) 2011-2018 Apple Computer, Inc. All rights reserved.
316670eb
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39037602 5 *
316670eb
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39037602 14 *
316670eb
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39037602 17 *
316670eb
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39037602 25 *
316670eb
A
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
39037602 29/*
f427ee49
A
30 * Profile Every Thread (PET) provides a profile of all threads on the system
31 * when a timer fires. PET supports the "record waiting threads" mode in
32 * Instruments, and used to be called All Thread States (ATS). New tools should
33 * adopt the lightweight PET mode, which provides the same information, but with
34 * much less overhead.
35 *
36 * When traditional (non-lightweight) PET is active, a migrating timer call
37 * causes the PET thread to wake up. The timer handler also issues a broadcast
38 * IPI to the other CPUs, to provide a (somewhat) synchronized set of on-core
39 * samples. This is provided for backwards-compatibility with clients that
40 * expect on-core samples, when PET's timer was based off the on-core timers.
41 * Because PET sampling can take on the order of milliseconds, the PET thread
42 * will enter a new timer deadline after it finished sampling This perturbs the
43 * timer cadence by the duration of PET sampling, but it leaves the system to
44 * work on non-profiling tasks for the duration of the timer period.
45 *
46 * Lightweight PET samples the system less-intrusively than normal PET
39037602 47 * mode. Instead of iterating tasks and threads on each sample, it increments
f427ee49 48 * a global generation count, `kppet_gencount`, which is checked as threads are
39037602
A
49 * context switched on-core. If the thread's local generation count is older
50 * than the global generation, the thread samples itself.
51 *
52 * | |
53 * thread A +--+---------|
54 * | |
55 * thread B |--+---------------|
56 * | |
57 * thread C | | |-------------------------------------
58 * | | |
59 * thread D | | | |-------------------------------
60 * | | | |
61 * +--+---------+-----+--------------------------------> time
62 * | │ |
63 * | +-----+--- threads sampled when they come on-core in
64 * | kperf_pet_switch_context
65 * |
66 * +--- PET timer fire, sample on-core threads A and B,
f427ee49 67 * increment kppet_gencount
39037602 68 */
316670eb 69
f427ee49
A
70#include <mach/mach_types.h>
71#include <sys/errno.h>
316670eb 72
f427ee49
A
73#include <kperf/kperf.h>
74#include <kperf/buffer.h>
75#include <kperf/sample.h>
76#include <kperf/context.h>
77#include <kperf/action.h>
78#include <kperf/pet.h>
79#include <kperf/kptimer.h>
316670eb 80
f427ee49
A
81#include <kern/task.h>
82#include <kern/kalloc.h>
83#if defined(__x86_64__)
84#include <i386/mp.h>
85#endif /* defined(__x86_64__) */
39236c6e 86
f427ee49 87static LCK_MTX_DECLARE(kppet_mtx, &kperf_lck_grp);
39236c6e 88
f427ee49
A
89static struct {
90 unsigned int g_actionid;
91 /*
92 * The idle rate controls how many sampling periods to skip if a thread
93 * is idle.
94 */
95 uint32_t g_idle_rate;
96 bool g_setup:1;
97 bool g_lightweight:1;
98 struct kperf_sample *g_sample;
39236c6e 99
f427ee49 100 thread_t g_sample_thread;
316670eb 101
f427ee49
A
102 /*
103 * Used by the PET thread to manage which threads and tasks to sample.
104 */
105 thread_t *g_threads;
106 unsigned int g_nthreads;
107 size_t g_threads_size;
316670eb 108
f427ee49
A
109 task_t *g_tasks;
110 unsigned int g_ntasks;
111 size_t g_tasks_size;
112} kppet = {
113 .g_actionid = 0,
114 .g_idle_rate = KPERF_PET_DEFAULT_IDLE_RATE,
115};
316670eb 116
f427ee49
A
117bool kppet_lightweight_active = false;
118_Atomic uint32_t kppet_gencount = 0;
316670eb 119
f427ee49
A
120static uint64_t kppet_sample_tasks(uint32_t idle_rate);
121static void kppet_thread(void * param, wait_result_t wr);
39236c6e 122
f427ee49
A
123static void
124kppet_lock_assert_owned(void)
39037602 125{
f427ee49 126 lck_mtx_assert(&kppet_mtx, LCK_MTX_ASSERT_OWNED);
316670eb
A
127}
128
f427ee49
A
129static void
130kppet_lock(void)
316670eb 131{
f427ee49
A
132 lck_mtx_lock(&kppet_mtx);
133}
316670eb 134
f427ee49
A
135static void
136kppet_unlock(void)
137{
138 lck_mtx_unlock(&kppet_mtx);
316670eb
A
139}
140
39037602 141void
f427ee49 142kppet_on_cpu(thread_t thread, thread_continue_t continuation,
0a7de745 143 uintptr_t *starting_fp)
316670eb 144{
39037602
A
145 assert(thread != NULL);
146 assert(ml_get_interrupts_enabled() == FALSE);
147
f427ee49 148 uint32_t actionid = kppet.g_actionid;
0a7de745
A
149 if (actionid == 0) {
150 return;
151 }
152
f427ee49
A
153 if (thread->kperf_pet_gen != atomic_load(&kppet_gencount)) {
154 BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_START,
155 atomic_load_explicit(&kppet_gencount,
156 memory_order_relaxed), thread->kperf_pet_gen);
39037602 157
d9a64523 158 task_t task = get_threadtask(thread);
39037602
A
159 struct kperf_context ctx = {
160 .cur_thread = thread,
d9a64523
A
161 .cur_task = task,
162 .cur_pid = task_pid(task),
39037602
A
163 .starting_fp = starting_fp,
164 };
165 /*
166 * Use a per-CPU interrupt buffer, since this is only called
167 * while interrupts are disabled, from the scheduler.
316670eb 168 */
39037602
A
169 struct kperf_sample *sample = kperf_intr_sample_buffer();
170 if (!sample) {
171 BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_END, 1);
172 return;
316670eb 173 }
39236c6e 174
39037602
A
175 unsigned int flags = SAMPLE_FLAG_NON_INTERRUPT | SAMPLE_FLAG_PEND_USER;
176 if (continuation != NULL) {
177 flags |= SAMPLE_FLAG_CONTINUATION;
316670eb 178 }
0a7de745 179 kperf_sample(sample, &ctx, actionid, flags);
39236c6e 180
39037602
A
181 BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_END);
182 } else {
f427ee49
A
183 BUF_VERB(PERF_PET_SAMPLE_THREAD,
184 os_atomic_load(&kppet_gencount, relaxed), thread->kperf_pet_gen);
39037602 185 }
316670eb
A
186}
187
f427ee49 188#pragma mark - state transitions
39037602 189
f427ee49
A
190/*
191 * Lazily initialize PET. The PET thread never exits once PET has been used
192 * once.
193 */
194static void
195kppet_setup(void)
316670eb 196{
f427ee49 197 if (kppet.g_setup) {
39037602
A
198 return;
199 }
316670eb 200
f427ee49
A
201 kern_return_t kr = kernel_thread_start(kppet_thread, NULL,
202 &kppet.g_sample_thread);
203 if (kr != KERN_SUCCESS) {
204 panic("kperf: failed to create PET thread %d", kr);
316670eb
A
205 }
206
f427ee49
A
207 thread_set_thread_name(kppet.g_sample_thread, "kperf-pet-sampling");
208 kppet.g_setup = true;
316670eb
A
209}
210
39037602 211void
f427ee49 212kppet_config(unsigned int actionid)
316670eb 213{
f427ee49
A
214 /*
215 * Resetting kperf shouldn't get the PET thread started.
216 */
217 if (actionid == 0 && !kppet.g_setup) {
316670eb
A
218 return;
219 }
220
f427ee49 221 kppet_setup();
316670eb 222
f427ee49 223 kppet_lock();
316670eb 224
f427ee49 225 kppet.g_actionid = actionid;
316670eb 226
f427ee49
A
227 if (actionid > 0) {
228 if (!kppet.g_sample) {
229 kppet.g_sample = kalloc_tag(sizeof(*kppet.g_sample),
230 VM_KERN_MEMORY_DIAG);
231 }
232 } else {
233 if (kppet.g_tasks) {
234 assert(kppet.g_tasks_size != 0);
235 kfree(kppet.g_tasks, kppet.g_tasks_size);
236 kppet.g_tasks = NULL;
237 kppet.g_tasks_size = 0;
238 kppet.g_ntasks = 0;
239 }
240 if (kppet.g_threads) {
241 assert(kppet.g_threads_size != 0);
242 kfree(kppet.g_threads, kppet.g_threads_size);
243 kppet.g_threads = NULL;
244 kppet.g_threads_size = 0;
245 kppet.g_nthreads = 0;
246 }
247 if (kppet.g_sample != NULL) {
248 kfree(kppet.g_sample, sizeof(*kppet.g_sample));
249 kppet.g_sample = NULL;
250 }
316670eb
A
251 }
252
f427ee49 253 kppet_unlock();
316670eb
A
254}
255
f427ee49
A
256void
257kppet_reset(void)
316670eb 258{
f427ee49
A
259 kppet_config(0);
260 kppet_set_idle_rate(KPERF_PET_DEFAULT_IDLE_RATE);
261 kppet_set_lightweight_pet(0);
316670eb
A
262}
263
f427ee49
A
264void
265kppet_wake_thread(void)
316670eb 266{
f427ee49 267 thread_wakeup(&kppet);
39037602
A
268}
269
270__attribute__((noreturn))
271static void
f427ee49 272kppet_thread(void * __unused param, wait_result_t __unused wr)
39037602 273{
f427ee49 274 kppet_lock();
316670eb 275
39037602
A
276 for (;;) {
277 BUF_INFO(PERF_PET_IDLE);
f427ee49
A
278
279 do {
280 (void)lck_mtx_sleep(&kppet_mtx, LCK_SLEEP_DEFAULT, &kppet,
281 THREAD_UNINT);
282 } while (kppet.g_actionid == 0);
316670eb 283
39037602 284 BUF_INFO(PERF_PET_RUN);
39236c6e 285
f427ee49 286 uint64_t sampledur_abs = kppet_sample_tasks(kppet.g_idle_rate);
316670eb 287
f427ee49 288 kptimer_pet_enter(sampledur_abs);
39037602
A
289 }
290}
316670eb 291
f427ee49 292#pragma mark - sampling
39037602
A
293
294static void
f427ee49 295kppet_sample_thread(int pid, task_t task, thread_t thread, uint32_t idle_rate)
39037602 296{
f427ee49 297 kppet_lock_assert_owned();
39037602 298
94ff46dc
A
299 uint32_t sample_flags = SAMPLE_FLAG_IDLE_THREADS |
300 SAMPLE_FLAG_THREAD_ONLY;
39037602
A
301
302 BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_START);
303
39037602
A
304 struct kperf_context ctx = {
305 .cur_thread = thread,
d9a64523 306 .cur_task = task,
39037602
A
307 .cur_pid = pid,
308 };
309
310 boolean_t thread_dirty = kperf_thread_get_dirty(thread);
311
312 /*
313 * Clean a dirty thread and skip callstack sample if the thread was not
f427ee49 314 * dirty and thread had skipped less than `idle_rate` samples.
39037602
A
315 */
316 if (thread_dirty) {
317 kperf_thread_set_dirty(thread, FALSE);
318 } else if ((thread->kperf_pet_cnt % idle_rate) != 0) {
319 sample_flags |= SAMPLE_FLAG_EMPTY_CALLSTACK;
316670eb 320 }
39037602
A
321 thread->kperf_pet_cnt++;
322
f427ee49
A
323 kperf_sample(kppet.g_sample, &ctx, kppet.g_actionid, sample_flags);
324 kperf_sample_user(&kppet.g_sample->usample, &ctx, kppet.g_actionid,
94ff46dc 325 sample_flags);
39037602
A
326
327 BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_END);
316670eb
A
328}
329
39037602 330static kern_return_t
f427ee49 331kppet_threads_prepare(task_t task)
316670eb 332{
f427ee49 333 kppet_lock_assert_owned();
39037602
A
334
335 vm_size_t threads_size_needed;
336
39037602
A
337 for (;;) {
338 task_lock(task);
339
340 if (!task->active) {
341 task_unlock(task);
39037602
A
342 return KERN_FAILURE;
343 }
344
f427ee49
A
345 /*
346 * With the task locked, figure out if enough space has been allocated to
347 * contain all of the thread references.
348 */
39037602 349 threads_size_needed = task->thread_count * sizeof(thread_t);
f427ee49 350 if (threads_size_needed <= kppet.g_threads_size) {
39037602
A
351 break;
352 }
316670eb 353
f427ee49
A
354 /*
355 * Otherwise, allocate more and try again.
356 */
39037602 357 task_unlock(task);
316670eb 358
f427ee49
A
359 if (kppet.g_threads_size != 0) {
360 kfree(kppet.g_threads, kppet.g_threads_size);
39037602
A
361 }
362
363 assert(threads_size_needed > 0);
f427ee49 364 kppet.g_threads_size = threads_size_needed;
316670eb 365
f427ee49
A
366 kppet.g_threads = kalloc_tag(kppet.g_threads_size, VM_KERN_MEMORY_DIAG);
367 if (kppet.g_threads == NULL) {
368 kppet.g_threads_size = 0;
39037602
A
369 return KERN_RESOURCE_SHORTAGE;
370 }
371 }
372
39037602 373 thread_t thread;
f427ee49 374 kppet.g_nthreads = 0;
39037602
A
375 queue_iterate(&(task->threads), thread, thread_t, task_threads) {
376 thread_reference_internal(thread);
f427ee49 377 kppet.g_threads[kppet.g_nthreads++] = thread;
39037602
A
378 }
379
39037602
A
380 task_unlock(task);
381
f427ee49 382 return (kppet.g_nthreads > 0) ? KERN_SUCCESS : KERN_FAILURE;
316670eb
A
383}
384
f427ee49
A
385/*
386 * Sample a `task`, using `idle_rate` to control whether idle threads need to be
387 * re-sampled.
388 *
389 * The task must be referenced.
390 */
39037602 391static void
f427ee49 392kppet_sample_task(task_t task, uint32_t idle_rate)
316670eb 393{
f427ee49
A
394 kppet_lock_assert_owned();
395 assert(task != kernel_task);
396 if (task == kernel_task) {
397 return;
398 }
39037602
A
399
400 BUF_VERB(PERF_PET_SAMPLE_TASK | DBG_FUNC_START);
401
d9a64523 402 int pid = task_pid(task);
f427ee49 403 if (kperf_action_has_task(kppet.g_actionid)) {
d9a64523
A
404 struct kperf_context ctx = {
405 .cur_task = task,
406 .cur_pid = pid,
407 };
408
f427ee49
A
409 kperf_sample(kppet.g_sample, &ctx, kppet.g_actionid,
410 SAMPLE_FLAG_TASK_ONLY);
d9a64523
A
411 }
412
f427ee49 413 if (!kperf_action_has_thread(kppet.g_actionid)) {
d9a64523 414 BUF_VERB(PERF_PET_SAMPLE_TASK | DBG_FUNC_END);
39236c6e 415 return;
39037602
A
416 }
417
d9a64523
A
418 /*
419 * Suspend the task to see an atomic snapshot of all its threads. This
f427ee49 420 * is expensive and disruptive.
d9a64523 421 */
f427ee49
A
422 kern_return_t kr = task_suspend_internal(task);
423 if (kr != KERN_SUCCESS) {
424 BUF_VERB(PERF_PET_SAMPLE_TASK | DBG_FUNC_END, 1);
425 return;
d9a64523
A
426 }
427
f427ee49 428 kr = kppet_threads_prepare(task);
d9a64523
A
429 if (kr != KERN_SUCCESS) {
430 BUF_INFO(PERF_PET_ERROR, ERR_THREAD, kr);
431 goto out;
432 }
39236c6e 433
f427ee49
A
434 for (unsigned int i = 0; i < kppet.g_nthreads; i++) {
435 thread_t thread = kppet.g_threads[i];
d9a64523 436 assert(thread != THREAD_NULL);
39037602 437
f427ee49 438 kppet_sample_thread(pid, task, thread, idle_rate);
39037602 439
f427ee49 440 thread_deallocate(kppet.g_threads[i]);
39037602
A
441 }
442
d9a64523 443out:
f427ee49 444 task_resume_internal(task);
d9a64523 445
f427ee49 446 BUF_VERB(PERF_PET_SAMPLE_TASK | DBG_FUNC_END, kppet.g_nthreads);
316670eb
A
447}
448
f427ee49
A
449/*
450 * Store and reference all tasks on the system, so they can be safely inspected
451 * outside the `tasks_threads_lock`.
452 */
39037602 453static kern_return_t
f427ee49 454kppet_tasks_prepare(void)
39037602 455{
f427ee49 456 kppet_lock_assert_owned();
316670eb 457
f427ee49 458 vm_size_t size_needed = 0;
39037602
A
459
460 for (;;) {
461 lck_mtx_lock(&tasks_threads_lock);
462
f427ee49
A
463 /*
464 * With the lock held, break out of the lock/unlock loop if
465 * there's enough space to store all the tasks.
466 */
467 size_needed = tasks_count * sizeof(task_t);
468 if (size_needed <= kppet.g_tasks_size) {
39037602
A
469 break;
470 }
471
f427ee49
A
472 /*
473 * Otherwise, allocate more memory outside of the lock.
474 */
39037602
A
475 lck_mtx_unlock(&tasks_threads_lock);
476
f427ee49
A
477 if (size_needed > kppet.g_tasks_size) {
478 if (kppet.g_tasks_size != 0) {
479 kfree(kppet.g_tasks, kppet.g_tasks_size);
39037602
A
480 }
481
f427ee49
A
482 assert(size_needed > 0);
483 kppet.g_tasks_size = size_needed;
39037602 484
f427ee49
A
485 kppet.g_tasks = kalloc_tag(kppet.g_tasks_size, VM_KERN_MEMORY_DIAG);
486 if (!kppet.g_tasks) {
487 kppet.g_tasks_size = 0;
39037602
A
488 return KERN_RESOURCE_SHORTAGE;
489 }
490 }
491 }
492
f427ee49
A
493 task_t task = TASK_NULL;
494 kppet.g_ntasks = 0;
39037602 495 queue_iterate(&tasks, task, task_t, tasks) {
f427ee49
A
496 bool eligible_task = task != kernel_task;
497 if (eligible_task) {
39037602 498 task_reference_internal(task);
f427ee49 499 kppet.g_tasks[kppet.g_ntasks++] = task;
39037602
A
500 }
501 }
502
503 lck_mtx_unlock(&tasks_threads_lock);
39236c6e 504
39037602 505 return KERN_SUCCESS;
316670eb
A
506}
507
f427ee49
A
508static uint64_t
509kppet_sample_tasks(uint32_t idle_rate)
316670eb 510{
f427ee49
A
511 kppet_lock_assert_owned();
512 assert(kppet.g_actionid > 0);
513
514 uint64_t start_abs = mach_absolute_time();
316670eb 515
39037602 516 BUF_INFO(PERF_PET_SAMPLE | DBG_FUNC_START);
316670eb 517
f427ee49 518 kern_return_t kr = kppet_tasks_prepare();
39037602
A
519 if (kr != KERN_SUCCESS) {
520 BUF_INFO(PERF_PET_ERROR, ERR_TASK, kr);
f427ee49
A
521 BUF_INFO(PERF_PET_SAMPLE | DBG_FUNC_END);
522 return mach_absolute_time() - start_abs;
39037602 523 }
316670eb 524
f427ee49
A
525 for (unsigned int i = 0; i < kppet.g_ntasks; i++) {
526 task_t task = kppet.g_tasks[i];
527 assert(task != TASK_NULL);
528 kppet_sample_task(task, idle_rate);
529 task_deallocate(task);
530 kppet.g_tasks[i] = TASK_NULL;
39037602
A
531 }
532
f427ee49
A
533 BUF_INFO(PERF_PET_SAMPLE | DBG_FUNC_END, kppet.g_ntasks);
534 kppet.g_ntasks = 0;
535 return mach_absolute_time() - start_abs;
316670eb 536}
39236c6e 537
f427ee49 538#pragma mark - sysctl accessors
39037602 539
39236c6e 540int
f427ee49 541kppet_get_idle_rate(void)
39236c6e 542{
f427ee49 543 return kppet.g_idle_rate;
39236c6e
A
544}
545
39037602 546int
f427ee49 547kppet_set_idle_rate(int new_idle_rate)
39236c6e 548{
f427ee49 549 kppet.g_idle_rate = new_idle_rate;
39037602
A
550 return 0;
551}
552
f427ee49
A
553void
554kppet_lightweight_active_update(void)
555{
556 kppet_lightweight_active = (kperf_is_sampling() && kppet.g_lightweight);
557 kperf_on_cpu_update();
558}
559
39037602 560int
f427ee49 561kppet_get_lightweight_pet(void)
39037602 562{
f427ee49 563 return kppet.g_lightweight;
39037602
A
564}
565
566int
f427ee49 567kppet_set_lightweight_pet(int on)
39037602 568{
f427ee49 569 if (kperf_is_sampling()) {
39037602
A
570 return EBUSY;
571 }
572
f427ee49
A
573 kppet.g_lightweight = (on == 1);
574 kppet_lightweight_active_update();
39037602
A
575 return 0;
576}