osfmk/kperf/pet.c

   1 /*
   2  * Copyright (c) 2011-2016 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 /* all thread states code */
  30 #include <mach/mach_types.h>
  31 #include <sys/errno.h>
  32
  33 #include <kperf/kperf.h>
  34 #include <kperf/buffer.h>
  35 #include <kperf/sample.h>
  36 #include <kperf/context.h>
  37 #include <kperf/action.h>
  38 #include <kperf/pet.h>
  39 #include <kperf/kperf_timer.h>
  40
  41 #include <kern/task.h>
  42 #include <kern/kalloc.h>
  43
  44 /* action ID to call for each sample
  45  *
  46  * Address is used as the sync point for waiting.
  47  */
  48 static unsigned int pet_action_id = 0;
  49
  50 static lck_mtx_t *pet_lock;
  51 static boolean_t pet_initted = FALSE;
  52 static boolean_t pet_running = FALSE;
  53
  54 /* number of callstack samples to skip for idle threads */
  55 static uint32_t pet_idle_rate = KPERF_PET_DEFAULT_IDLE_RATE;
  56
  57 /*
  58  * Lightweight PET mode samples the system less-intrusively than normal PET
  59  * mode.  Instead of iterating tasks and threads on each sample, it increments
  60  * a global generation count, kperf_pet_gen, which is checked as threads are
  61  * context switched on-core.  If the thread's local generation count is older
  62  * than the global generation, the thread samples itself.
  63  *
  64  *            |  |
  65  * thread A   +--+---------|
  66  *            |  |
  67  * thread B   |--+---------------|
  68  *            |  |
  69  * thread C   |  |         |-------------------------------------
  70  *            |  |         |
  71  * thread D   |  |         |     |-------------------------------
  72  *            |  |         |     |
  73  *            +--+---------+-----+--------------------------------> time
  74  *               |         │     |
  75  *               |         +-----+--- threads sampled when they come on-core in
  76  *               |                    kperf_pet_switch_context
  77  *               |
  78  *               +--- PET timer fire, sample on-core threads A and B,
  79  *                    increment kperf_pet_gen
  80  */
  81 static boolean_t lightweight_pet = FALSE;
  82
  83 /*
  84  * Whether or not lightweight PET and sampling is active.
  85  */
  86 boolean_t kperf_lightweight_pet_active = FALSE;
  87
  88 uint32_t kperf_pet_gen = 0;
  89
  90 static struct kperf_sample *pet_sample;
  91
  92 /* thread lifecycle */
  93
  94 static kern_return_t pet_init(void);
  95 static void pet_start(void);
  96 static void pet_stop(void);
  97
  98 /* PET thread-only */
  99
 100 static void pet_thread_loop(void *param, wait_result_t wr);
 101 static void pet_thread_idle(void);
 102 static void pet_thread_work_unit(void);
 103
 104 /* listing things to sample */
 105
 106 static task_array_t pet_tasks = NULL;
 107 static vm_size_t pet_tasks_size = 0;
 108 static vm_size_t pet_tasks_count = 0;
 109
 110 static thread_array_t pet_threads = NULL;
 111 static vm_size_t pet_threads_size = 0;
 112 static vm_size_t pet_threads_count = 0;
 113
 114 static kern_return_t pet_tasks_prepare(void);
 115 static kern_return_t pet_tasks_prepare_internal(void);
 116
 117 static kern_return_t pet_threads_prepare(task_t task);
 118
 119 /* sampling */
 120
 121 static void pet_sample_all_tasks(uint32_t idle_rate);
 122 static void pet_sample_task(task_t task, uint32_t idle_rate);
 123 static void pet_sample_thread(int pid, task_t task, thread_t thread,
 124     uint32_t idle_rate);
 125
 126 /* functions called by other areas of kperf */
 127
 128 void
 129 kperf_pet_fire_before(void)
 130 {
 131         if (!pet_initted || !pet_running) {
 132                 return;
 133         }
 134
 135         if (lightweight_pet) {
 136                 BUF_INFO(PERF_PET_SAMPLE);
 137                 OSIncrementAtomic(&kperf_pet_gen);
 138         }
 139 }
 140
 141 void
 142 kperf_pet_fire_after(void)
 143 {
 144         if (!pet_initted || !pet_running) {
 145                 return;
 146         }
 147
 148         if (lightweight_pet) {
 149                 kperf_timer_pet_rearm(0);
 150         } else {
 151                 thread_wakeup(&pet_action_id);
 152         }
 153 }
 154
 155 void
 156 kperf_pet_on_cpu(thread_t thread, thread_continue_t continuation,
 157     uintptr_t *starting_fp)
 158 {
 159         assert(thread != NULL);
 160         assert(ml_get_interrupts_enabled() == FALSE);
 161
 162         uint32_t actionid = pet_action_id;
 163         if (actionid == 0) {
 164                 return;
 165         }
 166
 167         if (thread->kperf_pet_gen != kperf_pet_gen) {
 168                 BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_START, kperf_pet_gen, thread->kperf_pet_gen);
 169
 170                 task_t task = get_threadtask(thread);
 171                 struct kperf_context ctx = {
 172                         .cur_thread = thread,
 173                         .cur_task = task,
 174                         .cur_pid = task_pid(task),
 175                         .starting_fp = starting_fp,
 176                 };
 177                 /*
 178                  * Use a per-CPU interrupt buffer, since this is only called
 179                  * while interrupts are disabled, from the scheduler.
 180                  */
 181                 struct kperf_sample *sample = kperf_intr_sample_buffer();
 182                 if (!sample) {
 183                         BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_END, 1);
 184                         return;
 185                 }
 186
 187                 unsigned int flags = SAMPLE_FLAG_NON_INTERRUPT | SAMPLE_FLAG_PEND_USER;
 188                 if (continuation != NULL) {
 189                         flags |= SAMPLE_FLAG_CONTINUATION;
 190                 }
 191                 kperf_sample(sample, &ctx, actionid, flags);
 192
 193                 BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_END);
 194         } else {
 195                 BUF_VERB(PERF_PET_SAMPLE_THREAD, kperf_pet_gen, thread->kperf_pet_gen);
 196         }
 197 }
 198
 199 void
 200 kperf_pet_config(unsigned int action_id)
 201 {
 202         if (action_id == 0 && !pet_initted) {
 203                 return;
 204         }
 205
 206         kern_return_t kr = pet_init();
 207         if (kr != KERN_SUCCESS) {
 208                 return;
 209         }
 210
 211         lck_mtx_lock(pet_lock);
 212
 213         BUF_INFO(PERF_PET_THREAD, 3, action_id);
 214
 215         if (action_id == 0) {
 216                 pet_stop();
 217         } else {
 218                 pet_start();
 219         }
 220
 221         pet_action_id = action_id;
 222
 223         lck_mtx_unlock(pet_lock);
 224 }
 225
 226 /* handle resource allocation */
 227
 228 void
 229 pet_start(void)
 230 {
 231         lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
 232
 233         if (pet_running) {
 234                 return;
 235         }
 236
 237         pet_sample = kalloc(sizeof(struct kperf_sample));
 238         if (!pet_sample) {
 239                 return;
 240         }
 241
 242         pet_running = TRUE;
 243 }
 244
 245 void
 246 pet_stop(void)
 247 {
 248         lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
 249
 250         if (!pet_initted) {
 251                 return;
 252         }
 253
 254         if (pet_tasks != NULL) {
 255                 assert(pet_tasks_size != 0);
 256                 kfree(pet_tasks, pet_tasks_size);
 257
 258                 pet_tasks = NULL;
 259                 pet_tasks_size = 0;
 260                 pet_tasks_count = 0;
 261         }
 262
 263         if (pet_threads != NULL) {
 264                 assert(pet_threads_size != 0);
 265                 kfree(pet_threads, pet_threads_size);
 266
 267                 pet_threads = NULL;
 268                 pet_threads_size = 0;
 269                 pet_threads_count = 0;
 270         }
 271
 272         if (pet_sample != NULL) {
 273                 kfree(pet_sample, sizeof(struct kperf_sample));
 274                 pet_sample = NULL;
 275         }
 276
 277         pet_running = FALSE;
 278 }
 279
 280 /*
 281  * Lazily initialize PET.  The PET thread never exits once PET has been used
 282  * once.
 283  */
 284 static kern_return_t
 285 pet_init(void)
 286 {
 287         if (pet_initted) {
 288                 return KERN_SUCCESS;
 289         }
 290
 291         /* make the sync point */
 292         pet_lock = lck_mtx_alloc_init(&kperf_lck_grp, NULL);
 293         assert(pet_lock != NULL);
 294
 295         /* create the thread */
 296
 297         BUF_INFO(PERF_PET_THREAD, 0);
 298         thread_t t;
 299         kern_return_t kr = kernel_thread_start(pet_thread_loop, NULL, &t);
 300         if (kr != KERN_SUCCESS) {
 301                 lck_mtx_free(pet_lock, &kperf_lck_grp);
 302                 return kr;
 303         }
 304
 305         thread_set_thread_name(t, "kperf sampling");
 306         /* let the thread hold the only reference */
 307         thread_deallocate(t);
 308
 309         pet_initted = TRUE;
 310
 311         return KERN_SUCCESS;
 312 }
 313
 314 /* called by PET thread only */
 315
 316 static void
 317 pet_thread_work_unit(void)
 318 {
 319         pet_sample_all_tasks(pet_idle_rate);
 320 }
 321
 322 static void
 323 pet_thread_idle(void)
 324 {
 325         lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
 326
 327         do {
 328                 (void)lck_mtx_sleep(pet_lock, LCK_SLEEP_DEFAULT, &pet_action_id,
 329                     THREAD_UNINT);
 330         } while (pet_action_id == 0);
 331 }
 332
 333 __attribute__((noreturn))
 334 static void
 335 pet_thread_loop(void *param, wait_result_t wr)
 336 {
 337 #pragma unused(param, wr)
 338         uint64_t work_unit_ticks;
 339
 340         BUF_INFO(PERF_PET_THREAD, 1);
 341
 342         lck_mtx_lock(pet_lock);
 343         for (;;) {
 344                 BUF_INFO(PERF_PET_IDLE);
 345                 pet_thread_idle();
 346
 347                 BUF_INFO(PERF_PET_RUN);
 348
 349                 /* measure how long the work unit takes */
 350                 work_unit_ticks = mach_absolute_time();
 351                 pet_thread_work_unit();
 352                 work_unit_ticks = mach_absolute_time() - work_unit_ticks;
 353
 354                 /* re-program the timer */
 355                 kperf_timer_pet_rearm(work_unit_ticks);
 356         }
 357 }
 358
 359 /* sampling */
 360
 361 static void
 362 pet_sample_thread(int pid, task_t task, thread_t thread, uint32_t idle_rate)
 363 {
 364         lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
 365
 366         uint32_t sample_flags = SAMPLE_FLAG_IDLE_THREADS | SAMPLE_FLAG_THREAD_ONLY;
 367
 368         BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_START);
 369
 370         /* work out the context */
 371         struct kperf_context ctx = {
 372                 .cur_thread = thread,
 373                 .cur_task = task,
 374                 .cur_pid = pid,
 375         };
 376
 377         boolean_t thread_dirty = kperf_thread_get_dirty(thread);
 378
 379         /*
 380          * Clean a dirty thread and skip callstack sample if the thread was not
 381          * dirty and thread has skipped less than pet_idle_rate samples.
 382          */
 383         if (thread_dirty) {
 384                 kperf_thread_set_dirty(thread, FALSE);
 385         } else if ((thread->kperf_pet_cnt % idle_rate) != 0) {
 386                 sample_flags |= SAMPLE_FLAG_EMPTY_CALLSTACK;
 387         }
 388         thread->kperf_pet_cnt++;
 389
 390         kperf_sample(pet_sample, &ctx, pet_action_id, sample_flags);
 391
 392         BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_END);
 393 }
 394
 395 static kern_return_t
 396 pet_threads_prepare(task_t task)
 397 {
 398         lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
 399
 400         vm_size_t threads_size_needed;
 401
 402         if (task == TASK_NULL) {
 403                 return KERN_INVALID_ARGUMENT;
 404         }
 405
 406         for (;;) {
 407                 task_lock(task);
 408
 409                 if (!task->active) {
 410                         task_unlock(task);
 411
 412                         return KERN_FAILURE;
 413                 }
 414
 415                 /* do we have the memory we need? */
 416                 threads_size_needed = task->thread_count * sizeof(thread_t);
 417                 if (threads_size_needed <= pet_threads_size) {
 418                         break;
 419                 }
 420
 421                 /* not enough memory, unlock the task and increase allocation */
 422                 task_unlock(task);
 423
 424                 if (pet_threads_size != 0) {
 425                         kfree(pet_threads, pet_threads_size);
 426                 }
 427
 428                 assert(threads_size_needed > 0);
 429                 pet_threads_size = threads_size_needed;
 430
 431                 pet_threads = kalloc(pet_threads_size);
 432                 if (pet_threads == NULL) {
 433                         pet_threads_size = 0;
 434                         return KERN_RESOURCE_SHORTAGE;
 435                 }
 436         }
 437
 438         /* have memory and the task is locked and active */
 439         thread_t thread;
 440         pet_threads_count = 0;
 441         queue_iterate(&(task->threads), thread, thread_t, task_threads) {
 442                 thread_reference_internal(thread);
 443                 pet_threads[pet_threads_count++] = thread;
 444         }
 445
 446         /* can unlock task now that threads are referenced */
 447         task_unlock(task);
 448
 449         return (pet_threads_count == 0) ? KERN_FAILURE : KERN_SUCCESS;
 450 }
 451
 452 static void
 453 pet_sample_task(task_t task, uint32_t idle_rate)
 454 {
 455         lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
 456
 457         BUF_VERB(PERF_PET_SAMPLE_TASK | DBG_FUNC_START);
 458
 459         int pid = task_pid(task);
 460         if (kperf_action_has_task(pet_action_id)) {
 461                 struct kperf_context ctx = {
 462                         .cur_task = task,
 463                         .cur_pid = pid,
 464                 };
 465
 466                 kperf_sample(pet_sample, &ctx, pet_action_id, SAMPLE_FLAG_TASK_ONLY);
 467         }
 468
 469         if (!kperf_action_has_thread(pet_action_id)) {
 470                 BUF_VERB(PERF_PET_SAMPLE_TASK | DBG_FUNC_END);
 471                 return;
 472         }
 473
 474         kern_return_t kr = KERN_SUCCESS;
 475
 476         /*
 477          * Suspend the task to see an atomic snapshot of all its threads.  This
 478          * is expensive, and disruptive.
 479          */
 480         bool needs_suspend = task != kernel_task;
 481         if (needs_suspend) {
 482                 kr = task_suspend_internal(task);
 483                 if (kr != KERN_SUCCESS) {
 484                         BUF_VERB(PERF_PET_SAMPLE_TASK | DBG_FUNC_END, 1);
 485                         return;
 486                 }
 487                 needs_suspend = true;
 488         }
 489
 490         kr = pet_threads_prepare(task);
 491         if (kr != KERN_SUCCESS) {
 492                 BUF_INFO(PERF_PET_ERROR, ERR_THREAD, kr);
 493                 goto out;
 494         }
 495
 496         for (unsigned int i = 0; i < pet_threads_count; i++) {
 497                 thread_t thread = pet_threads[i];
 498                 assert(thread != THREAD_NULL);
 499
 500                 /*
 501                  * Do not sample the thread if it was on a CPU when the timer fired.
 502                  */
 503                 int cpu = 0;
 504                 for (cpu = 0; cpu < machine_info.logical_cpu_max; cpu++) {
 505                         if (kperf_tid_on_cpus[cpu] == thread_tid(thread)) {
 506                                 break;
 507                         }
 508                 }
 509
 510                 /* the thread was not on a CPU */
 511                 if (cpu == machine_info.logical_cpu_max) {
 512                         pet_sample_thread(pid, task, thread, idle_rate);
 513                 }
 514
 515                 thread_deallocate(pet_threads[i]);
 516         }
 517
 518 out:
 519         if (needs_suspend) {
 520                 task_resume_internal(task);
 521         }
 522
 523         BUF_VERB(PERF_PET_SAMPLE_TASK | DBG_FUNC_END, pet_threads_count);
 524 }
 525
 526 static kern_return_t
 527 pet_tasks_prepare_internal(void)
 528 {
 529         lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
 530
 531         vm_size_t tasks_size_needed = 0;
 532
 533         for (;;) {
 534                 lck_mtx_lock(&tasks_threads_lock);
 535
 536                 /* do we have the memory we need? */
 537                 tasks_size_needed = tasks_count * sizeof(task_t);
 538                 if (tasks_size_needed <= pet_tasks_size) {
 539                         break;
 540                 }
 541
 542                 /* unlock and allocate more memory */
 543                 lck_mtx_unlock(&tasks_threads_lock);
 544
 545                 /* grow task array */
 546                 if (tasks_size_needed > pet_tasks_size) {
 547                         if (pet_tasks_size != 0) {
 548                                 kfree(pet_tasks, pet_tasks_size);
 549                         }
 550
 551                         assert(tasks_size_needed > 0);
 552                         pet_tasks_size = tasks_size_needed;
 553
 554                         pet_tasks = (task_array_t)kalloc(pet_tasks_size);
 555                         if (pet_tasks == NULL) {
 556                                 pet_tasks_size = 0;
 557                                 return KERN_RESOURCE_SHORTAGE;
 558                         }
 559                 }
 560         }
 561
 562         return KERN_SUCCESS;
 563 }
 564
 565 static kern_return_t
 566 pet_tasks_prepare(void)
 567 {
 568         lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
 569
 570         /* allocate space and take the tasks_threads_lock */
 571         kern_return_t kr = pet_tasks_prepare_internal();
 572         if (KERN_SUCCESS != kr) {
 573                 return kr;
 574         }
 575         lck_mtx_assert(&tasks_threads_lock, LCK_MTX_ASSERT_OWNED);
 576
 577         /* make sure the tasks are not deallocated after dropping the lock */
 578         task_t task;
 579         pet_tasks_count = 0;
 580         queue_iterate(&tasks, task, task_t, tasks) {
 581                 if (task != kernel_task) {
 582                         task_reference_internal(task);
 583                         pet_tasks[pet_tasks_count++] = task;
 584                 }
 585         }
 586
 587         lck_mtx_unlock(&tasks_threads_lock);
 588
 589         return KERN_SUCCESS;
 590 }
 591
 592 static void
 593 pet_sample_all_tasks(uint32_t idle_rate)
 594 {
 595         lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
 596         assert(pet_action_id > 0);
 597
 598         BUF_INFO(PERF_PET_SAMPLE | DBG_FUNC_START);
 599
 600         kern_return_t kr = pet_tasks_prepare();
 601         if (kr != KERN_SUCCESS) {
 602                 BUF_INFO(PERF_PET_ERROR, ERR_TASK, kr);
 603                 BUF_INFO(PERF_PET_SAMPLE | DBG_FUNC_END, 0);
 604                 return;
 605         }
 606
 607         for (unsigned int i = 0; i < pet_tasks_count; i++) {
 608                 task_t task = pet_tasks[i];
 609
 610                 pet_sample_task(task, idle_rate);
 611         }
 612
 613         for (unsigned int i = 0; i < pet_tasks_count; i++) {
 614                 task_deallocate(pet_tasks[i]);
 615         }
 616
 617         BUF_INFO(PERF_PET_SAMPLE | DBG_FUNC_END, pet_tasks_count);
 618 }
 619
 620 /* support sysctls */
 621
 622 int
 623 kperf_get_pet_idle_rate(void)
 624 {
 625         return pet_idle_rate;
 626 }
 627
 628 int
 629 kperf_set_pet_idle_rate(int val)
 630 {
 631         pet_idle_rate = val;
 632
 633         return 0;
 634 }
 635
 636 int
 637 kperf_get_lightweight_pet(void)
 638 {
 639         return lightweight_pet;
 640 }
 641
 642 int
 643 kperf_set_lightweight_pet(int val)
 644 {
 645         if (kperf_sampling_status() == KPERF_SAMPLING_ON) {
 646                 return EBUSY;
 647         }
 648
 649         lightweight_pet = (val == 1);
 650         kperf_lightweight_pet_active_update();
 651
 652         return 0;
 653 }
 654
 655 void
 656 kperf_lightweight_pet_active_update(void)
 657 {
 658         kperf_lightweight_pet_active = (kperf_sampling_status() && lightweight_pet);
 659         kperf_on_cpu_update();
 660 }