2 * Copyright (c) 2011-2016 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 /* all thread states code */
30 #include <mach/mach_types.h>
31 #include <sys/errno.h>
33 #include <kperf/kperf.h>
34 #include <kperf/buffer.h>
35 #include <kperf/sample.h>
36 #include <kperf/context.h>
37 #include <kperf/action.h>
38 #include <kperf/pet.h>
39 #include <kperf/kperf_timer.h>
41 #include <kern/task.h>
42 #include <kern/kalloc.h>
44 /* action ID to call for each sample
46 * Address is used as the sync point for waiting.
48 static unsigned int pet_action_id
= 0;
50 static lck_mtx_t
*pet_lock
;
51 static boolean_t pet_initted
= FALSE
;
52 static boolean_t pet_running
= FALSE
;
54 /* number of callstack samples to skip for idle threads */
55 static uint32_t pet_idle_rate
= KPERF_PET_DEFAULT_IDLE_RATE
;
58 * Lightweight PET mode samples the system less-intrusively than normal PET
59 * mode. Instead of iterating tasks and threads on each sample, it increments
60 * a global generation count, kperf_pet_gen, which is checked as threads are
61 * context switched on-core. If the thread's local generation count is older
62 * than the global generation, the thread samples itself.
65 * thread A +--+---------|
67 * thread B |--+---------------|
69 * thread C | | |-------------------------------------
71 * thread D | | | |-------------------------------
73 * +--+---------+-----+--------------------------------> time
75 * | +-----+--- threads sampled when they come on-core in
76 * | kperf_pet_switch_context
78 * +--- PET timer fire, sample on-core threads A and B,
79 * increment kperf_pet_gen
81 static boolean_t lightweight_pet
= FALSE
;
84 * Whether or not lightweight PET and sampling is active.
86 boolean_t kperf_lightweight_pet_active
= FALSE
;
88 uint32_t kperf_pet_gen
= 0;
90 static struct kperf_sample
*pet_sample
;
92 /* thread lifecycle */
94 static kern_return_t
pet_init(void);
95 static void pet_start(void);
96 static void pet_stop(void);
100 static void pet_thread_loop(void *param
, wait_result_t wr
);
101 static void pet_thread_idle(void);
102 static void pet_thread_work_unit(void);
104 /* listing things to sample */
106 static task_array_t pet_tasks
= NULL
;
107 static vm_size_t pet_tasks_size
= 0;
108 static vm_size_t pet_tasks_count
= 0;
110 static thread_array_t pet_threads
= NULL
;
111 static vm_size_t pet_threads_size
= 0;
112 static vm_size_t pet_threads_count
= 0;
114 static kern_return_t
pet_tasks_prepare(void);
115 static kern_return_t
pet_tasks_prepare_internal(void);
117 static kern_return_t
pet_threads_prepare(task_t task
);
121 static void pet_sample_all_tasks(uint32_t idle_rate
);
122 static void pet_sample_task(task_t task
, uint32_t idle_rate
);
123 static void pet_sample_thread(int pid
, task_t task
, thread_t thread
,
126 /* functions called by other areas of kperf */
129 kperf_pet_fire_before(void)
131 if (!pet_initted
|| !pet_running
) {
135 if (lightweight_pet
) {
136 BUF_INFO(PERF_PET_SAMPLE
);
137 OSIncrementAtomic(&kperf_pet_gen
);
142 kperf_pet_fire_after(void)
144 if (!pet_initted
|| !pet_running
) {
148 if (lightweight_pet
) {
149 kperf_timer_pet_rearm(0);
151 thread_wakeup(&pet_action_id
);
156 kperf_pet_on_cpu(thread_t thread
, thread_continue_t continuation
,
157 uintptr_t *starting_fp
)
159 assert(thread
!= NULL
);
160 assert(ml_get_interrupts_enabled() == FALSE
);
162 uint32_t actionid
= pet_action_id
;
167 if (thread
->kperf_pet_gen
!= kperf_pet_gen
) {
168 BUF_VERB(PERF_PET_SAMPLE_THREAD
| DBG_FUNC_START
, kperf_pet_gen
, thread
->kperf_pet_gen
);
170 task_t task
= get_threadtask(thread
);
171 struct kperf_context ctx
= {
172 .cur_thread
= thread
,
174 .cur_pid
= task_pid(task
),
175 .starting_fp
= starting_fp
,
178 * Use a per-CPU interrupt buffer, since this is only called
179 * while interrupts are disabled, from the scheduler.
181 struct kperf_sample
*sample
= kperf_intr_sample_buffer();
183 BUF_VERB(PERF_PET_SAMPLE_THREAD
| DBG_FUNC_END
, 1);
187 unsigned int flags
= SAMPLE_FLAG_NON_INTERRUPT
| SAMPLE_FLAG_PEND_USER
;
188 if (continuation
!= NULL
) {
189 flags
|= SAMPLE_FLAG_CONTINUATION
;
191 kperf_sample(sample
, &ctx
, actionid
, flags
);
193 BUF_VERB(PERF_PET_SAMPLE_THREAD
| DBG_FUNC_END
);
195 BUF_VERB(PERF_PET_SAMPLE_THREAD
, kperf_pet_gen
, thread
->kperf_pet_gen
);
200 kperf_pet_config(unsigned int action_id
)
202 if (action_id
== 0 && !pet_initted
) {
206 kern_return_t kr
= pet_init();
207 if (kr
!= KERN_SUCCESS
) {
211 lck_mtx_lock(pet_lock
);
213 BUF_INFO(PERF_PET_THREAD
, 3, action_id
);
215 if (action_id
== 0) {
221 pet_action_id
= action_id
;
223 lck_mtx_unlock(pet_lock
);
226 /* handle resource allocation */
231 lck_mtx_assert(pet_lock
, LCK_MTX_ASSERT_OWNED
);
237 pet_sample
= kalloc(sizeof(struct kperf_sample
));
248 lck_mtx_assert(pet_lock
, LCK_MTX_ASSERT_OWNED
);
254 if (pet_tasks
!= NULL
) {
255 assert(pet_tasks_size
!= 0);
256 kfree(pet_tasks
, pet_tasks_size
);
263 if (pet_threads
!= NULL
) {
264 assert(pet_threads_size
!= 0);
265 kfree(pet_threads
, pet_threads_size
);
268 pet_threads_size
= 0;
269 pet_threads_count
= 0;
272 if (pet_sample
!= NULL
) {
273 kfree(pet_sample
, sizeof(struct kperf_sample
));
281 * Lazily initialize PET. The PET thread never exits once PET has been used
291 /* make the sync point */
292 pet_lock
= lck_mtx_alloc_init(&kperf_lck_grp
, NULL
);
293 assert(pet_lock
!= NULL
);
295 /* create the thread */
297 BUF_INFO(PERF_PET_THREAD
, 0);
299 kern_return_t kr
= kernel_thread_start(pet_thread_loop
, NULL
, &t
);
300 if (kr
!= KERN_SUCCESS
) {
301 lck_mtx_free(pet_lock
, &kperf_lck_grp
);
305 thread_set_thread_name(t
, "kperf sampling");
306 /* let the thread hold the only reference */
307 thread_deallocate(t
);
314 /* called by PET thread only */
317 pet_thread_work_unit(void)
319 pet_sample_all_tasks(pet_idle_rate
);
323 pet_thread_idle(void)
325 lck_mtx_assert(pet_lock
, LCK_MTX_ASSERT_OWNED
);
328 (void)lck_mtx_sleep(pet_lock
, LCK_SLEEP_DEFAULT
, &pet_action_id
,
330 } while (pet_action_id
== 0);
333 __attribute__((noreturn
))
335 pet_thread_loop(void *param
, wait_result_t wr
)
337 #pragma unused(param, wr)
338 uint64_t work_unit_ticks
;
340 BUF_INFO(PERF_PET_THREAD
, 1);
342 lck_mtx_lock(pet_lock
);
344 BUF_INFO(PERF_PET_IDLE
);
347 BUF_INFO(PERF_PET_RUN
);
349 /* measure how long the work unit takes */
350 work_unit_ticks
= mach_absolute_time();
351 pet_thread_work_unit();
352 work_unit_ticks
= mach_absolute_time() - work_unit_ticks
;
354 /* re-program the timer */
355 kperf_timer_pet_rearm(work_unit_ticks
);
362 pet_sample_thread(int pid
, task_t task
, thread_t thread
, uint32_t idle_rate
)
364 lck_mtx_assert(pet_lock
, LCK_MTX_ASSERT_OWNED
);
366 uint32_t sample_flags
= SAMPLE_FLAG_IDLE_THREADS
|
367 SAMPLE_FLAG_THREAD_ONLY
;
369 BUF_VERB(PERF_PET_SAMPLE_THREAD
| DBG_FUNC_START
);
371 /* work out the context */
372 struct kperf_context ctx
= {
373 .cur_thread
= thread
,
378 boolean_t thread_dirty
= kperf_thread_get_dirty(thread
);
381 * Clean a dirty thread and skip callstack sample if the thread was not
382 * dirty and thread has skipped less than pet_idle_rate samples.
385 kperf_thread_set_dirty(thread
, FALSE
);
386 } else if ((thread
->kperf_pet_cnt
% idle_rate
) != 0) {
387 sample_flags
|= SAMPLE_FLAG_EMPTY_CALLSTACK
;
389 thread
->kperf_pet_cnt
++;
391 kperf_sample(pet_sample
, &ctx
, pet_action_id
, sample_flags
);
392 kperf_sample_user(&pet_sample
->usample
, &ctx
, pet_action_id
,
395 BUF_VERB(PERF_PET_SAMPLE_THREAD
| DBG_FUNC_END
);
399 pet_threads_prepare(task_t task
)
401 lck_mtx_assert(pet_lock
, LCK_MTX_ASSERT_OWNED
);
403 vm_size_t threads_size_needed
;
405 if (task
== TASK_NULL
) {
406 return KERN_INVALID_ARGUMENT
;
418 /* do we have the memory we need? */
419 threads_size_needed
= task
->thread_count
* sizeof(thread_t
);
420 if (threads_size_needed
<= pet_threads_size
) {
424 /* not enough memory, unlock the task and increase allocation */
427 if (pet_threads_size
!= 0) {
428 kfree(pet_threads
, pet_threads_size
);
431 assert(threads_size_needed
> 0);
432 pet_threads_size
= threads_size_needed
;
434 pet_threads
= kalloc(pet_threads_size
);
435 if (pet_threads
== NULL
) {
436 pet_threads_size
= 0;
437 return KERN_RESOURCE_SHORTAGE
;
441 /* have memory and the task is locked and active */
443 pet_threads_count
= 0;
444 queue_iterate(&(task
->threads
), thread
, thread_t
, task_threads
) {
445 thread_reference_internal(thread
);
446 pet_threads
[pet_threads_count
++] = thread
;
449 /* can unlock task now that threads are referenced */
452 return (pet_threads_count
== 0) ? KERN_FAILURE
: KERN_SUCCESS
;
456 pet_sample_task(task_t task
, uint32_t idle_rate
)
458 lck_mtx_assert(pet_lock
, LCK_MTX_ASSERT_OWNED
);
460 BUF_VERB(PERF_PET_SAMPLE_TASK
| DBG_FUNC_START
);
462 int pid
= task_pid(task
);
463 if (kperf_action_has_task(pet_action_id
)) {
464 struct kperf_context ctx
= {
469 kperf_sample(pet_sample
, &ctx
, pet_action_id
, SAMPLE_FLAG_TASK_ONLY
);
472 if (!kperf_action_has_thread(pet_action_id
)) {
473 BUF_VERB(PERF_PET_SAMPLE_TASK
| DBG_FUNC_END
);
477 kern_return_t kr
= KERN_SUCCESS
;
480 * Suspend the task to see an atomic snapshot of all its threads. This
481 * is expensive, and disruptive.
483 bool needs_suspend
= task
!= kernel_task
;
485 kr
= task_suspend_internal(task
);
486 if (kr
!= KERN_SUCCESS
) {
487 BUF_VERB(PERF_PET_SAMPLE_TASK
| DBG_FUNC_END
, 1);
490 needs_suspend
= true;
493 kr
= pet_threads_prepare(task
);
494 if (kr
!= KERN_SUCCESS
) {
495 BUF_INFO(PERF_PET_ERROR
, ERR_THREAD
, kr
);
499 for (unsigned int i
= 0; i
< pet_threads_count
; i
++) {
500 thread_t thread
= pet_threads
[i
];
501 assert(thread
!= THREAD_NULL
);
504 * Do not sample the thread if it was on a CPU when the timer fired.
507 for (cpu
= 0; cpu
< machine_info
.logical_cpu_max
; cpu
++) {
508 if (kperf_tid_on_cpus
[cpu
] == thread_tid(thread
)) {
513 /* the thread was not on a CPU */
514 if (cpu
== machine_info
.logical_cpu_max
) {
515 pet_sample_thread(pid
, task
, thread
, idle_rate
);
518 thread_deallocate(pet_threads
[i
]);
523 task_resume_internal(task
);
526 BUF_VERB(PERF_PET_SAMPLE_TASK
| DBG_FUNC_END
, pet_threads_count
);
530 pet_tasks_prepare_internal(void)
532 lck_mtx_assert(pet_lock
, LCK_MTX_ASSERT_OWNED
);
534 vm_size_t tasks_size_needed
= 0;
537 lck_mtx_lock(&tasks_threads_lock
);
539 /* do we have the memory we need? */
540 tasks_size_needed
= tasks_count
* sizeof(task_t
);
541 if (tasks_size_needed
<= pet_tasks_size
) {
545 /* unlock and allocate more memory */
546 lck_mtx_unlock(&tasks_threads_lock
);
548 /* grow task array */
549 if (tasks_size_needed
> pet_tasks_size
) {
550 if (pet_tasks_size
!= 0) {
551 kfree(pet_tasks
, pet_tasks_size
);
554 assert(tasks_size_needed
> 0);
555 pet_tasks_size
= tasks_size_needed
;
557 pet_tasks
= (task_array_t
)kalloc(pet_tasks_size
);
558 if (pet_tasks
== NULL
) {
560 return KERN_RESOURCE_SHORTAGE
;
569 pet_tasks_prepare(void)
571 lck_mtx_assert(pet_lock
, LCK_MTX_ASSERT_OWNED
);
573 /* allocate space and take the tasks_threads_lock */
574 kern_return_t kr
= pet_tasks_prepare_internal();
575 if (KERN_SUCCESS
!= kr
) {
578 lck_mtx_assert(&tasks_threads_lock
, LCK_MTX_ASSERT_OWNED
);
580 /* make sure the tasks are not deallocated after dropping the lock */
583 queue_iterate(&tasks
, task
, task_t
, tasks
) {
584 if (task
!= kernel_task
) {
585 task_reference_internal(task
);
586 pet_tasks
[pet_tasks_count
++] = task
;
590 lck_mtx_unlock(&tasks_threads_lock
);
596 pet_sample_all_tasks(uint32_t idle_rate
)
598 lck_mtx_assert(pet_lock
, LCK_MTX_ASSERT_OWNED
);
599 assert(pet_action_id
> 0);
601 BUF_INFO(PERF_PET_SAMPLE
| DBG_FUNC_START
);
603 kern_return_t kr
= pet_tasks_prepare();
604 if (kr
!= KERN_SUCCESS
) {
605 BUF_INFO(PERF_PET_ERROR
, ERR_TASK
, kr
);
606 BUF_INFO(PERF_PET_SAMPLE
| DBG_FUNC_END
, 0);
610 for (unsigned int i
= 0; i
< pet_tasks_count
; i
++) {
611 task_t task
= pet_tasks
[i
];
613 pet_sample_task(task
, idle_rate
);
616 for (unsigned int i
= 0; i
< pet_tasks_count
; i
++) {
617 task_deallocate(pet_tasks
[i
]);
620 BUF_INFO(PERF_PET_SAMPLE
| DBG_FUNC_END
, pet_tasks_count
);
623 /* support sysctls */
626 kperf_get_pet_idle_rate(void)
628 return pet_idle_rate
;
632 kperf_set_pet_idle_rate(int val
)
640 kperf_get_lightweight_pet(void)
642 return lightweight_pet
;
646 kperf_set_lightweight_pet(int val
)
648 if (kperf_sampling_status() == KPERF_SAMPLING_ON
) {
652 lightweight_pet
= (val
== 1);
653 kperf_lightweight_pet_active_update();
659 kperf_lightweight_pet_active_update(void)
661 kperf_lightweight_pet_active
= (kperf_sampling_status() && lightweight_pet
);
662 kperf_on_cpu_update();