osfmk/kperf/pet.c

   1 /*
   2  * Copyright (c) 2011-2016 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 /* all thread states code */
  30 #include <mach/mach_types.h>
  31 #include <sys/errno.h>
  32
  33 #include <kperf/kperf.h>
  34 #include <kperf/buffer.h>
  35 #include <kperf/sample.h>
  36 #include <kperf/context.h>
  37 #include <kperf/action.h>
  38 #include <kperf/pet.h>
  39 #include <kperf/kperf_timer.h>
  40
  41 #include <kern/task.h>
  42 #include <kern/kalloc.h>
  43
  44 /* action ID to call for each sample
  45  *
  46  * Address is used as the sync point for waiting.
  47  */
  48 static unsigned int pet_action_id = 0;
  49
  50 static lck_mtx_t *pet_lock;
  51 static boolean_t pet_initted = FALSE;
  52 static boolean_t pet_running = FALSE;
  53
  54 /* number of callstack samples to skip for idle threads */
  55 static uint32_t pet_idle_rate = KPERF_PET_DEFAULT_IDLE_RATE;
  56
  57 /*
  58  * Lightweight PET mode samples the system less-intrusively than normal PET
  59  * mode.  Instead of iterating tasks and threads on each sample, it increments
  60  * a global generation count, kperf_pet_gen, which is checked as threads are
  61  * context switched on-core.  If the thread's local generation count is older
  62  * than the global generation, the thread samples itself.
  63  *
  64  *            |  |
  65  * thread A   +--+---------|
  66  *            |  |
  67  * thread B   |--+---------------|
  68  *            |  |
  69  * thread C   |  |         |-------------------------------------
  70  *            |  |         |
  71  * thread D   |  |         |     |-------------------------------
  72  *            |  |         |     |
  73  *            +--+---------+-----+--------------------------------> time
  74  *               |         │     |
  75  *               |         +-----+--- threads sampled when they come on-core in
  76  *               |                    kperf_pet_switch_context
  77  *               |
  78  *               +--- PET timer fire, sample on-core threads A and B,
  79  *                    increment kperf_pet_gen
  80  */
  81 static boolean_t lightweight_pet = FALSE;
  82
  83 /*
  84  * Whether or not lightweight PET and sampling is active.
  85  */
  86 boolean_t kperf_lightweight_pet_active = FALSE;
  87
  88 uint32_t kperf_pet_gen = 0;
  89
  90 static struct kperf_sample *pet_sample;
  91
  92 /* thread lifecycle */
  93
  94 static kern_return_t pet_init(void);
  95 static void pet_start(void);
  96 static void pet_stop(void);
  97
  98 /* PET thread-only */
  99
 100 static void pet_thread_loop(void *param, wait_result_t wr);
 101 static void pet_thread_idle(void);
 102 static void pet_thread_work_unit(void);
 103
 104 /* listing things to sample */
 105
 106 static task_array_t pet_tasks = NULL;
 107 static vm_size_t pet_tasks_size = 0;
 108 static vm_size_t pet_tasks_count = 0;
 109
 110 static thread_array_t pet_threads = NULL;
 111 static vm_size_t pet_threads_size = 0;
 112 static vm_size_t pet_threads_count = 0;
 113
 114 static kern_return_t pet_tasks_prepare(void);
 115 static kern_return_t pet_tasks_prepare_internal(void);
 116
 117 static kern_return_t pet_threads_prepare(task_t task);
 118
 119 /* sampling */
 120
 121 static void pet_sample_all_tasks(uint32_t idle_rate);
 122 static void pet_sample_task(task_t task, uint32_t idle_rate);
 123 static void pet_sample_thread(int pid, task_t task, thread_t thread,
 124     uint32_t idle_rate);
 125
 126 /* functions called by other areas of kperf */
 127
 128 void
 129 kperf_pet_fire_before(void)
 130 {
 131         if (!pet_initted || !pet_running) {
 132                 return;
 133         }
 134
 135         if (lightweight_pet) {
 136                 BUF_INFO(PERF_PET_SAMPLE);
 137                 OSIncrementAtomic(&kperf_pet_gen);
 138         }
 139 }
 140
 141 void
 142 kperf_pet_fire_after(void)
 143 {
 144         if (!pet_initted || !pet_running) {
 145                 return;
 146         }
 147
 148         if (lightweight_pet) {
 149                 kperf_timer_pet_rearm(0);
 150         } else {
 151                 thread_wakeup(&pet_action_id);
 152         }
 153 }
 154
 155 void
 156 kperf_pet_on_cpu(thread_t thread, thread_continue_t continuation,
 157     uintptr_t *starting_fp)
 158 {
 159         assert(thread != NULL);
 160         assert(ml_get_interrupts_enabled() == FALSE);
 161
 162         uint32_t actionid = pet_action_id;
 163         if (actionid == 0) {
 164                 return;
 165         }
 166
 167         if (thread->kperf_pet_gen != kperf_pet_gen) {
 168                 BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_START, kperf_pet_gen, thread->kperf_pet_gen);
 169
 170                 task_t task = get_threadtask(thread);
 171                 struct kperf_context ctx = {
 172                         .cur_thread = thread,
 173                         .cur_task = task,
 174                         .cur_pid = task_pid(task),
 175                         .starting_fp = starting_fp,
 176                 };
 177                 /*
 178                  * Use a per-CPU interrupt buffer, since this is only called
 179                  * while interrupts are disabled, from the scheduler.
 180                  */
 181                 struct kperf_sample *sample = kperf_intr_sample_buffer();
 182                 if (!sample) {
 183                         BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_END, 1);
 184                         return;
 185                 }
 186
 187                 unsigned int flags = SAMPLE_FLAG_NON_INTERRUPT | SAMPLE_FLAG_PEND_USER;
 188                 if (continuation != NULL) {
 189                         flags |= SAMPLE_FLAG_CONTINUATION;
 190                 }
 191                 kperf_sample(sample, &ctx, actionid, flags);
 192
 193                 BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_END);
 194         } else {
 195                 BUF_VERB(PERF_PET_SAMPLE_THREAD, kperf_pet_gen, thread->kperf_pet_gen);
 196         }
 197 }
 198
 199 void
 200 kperf_pet_config(unsigned int action_id)
 201 {
 202         if (action_id == 0 && !pet_initted) {
 203                 return;
 204         }
 205
 206         kern_return_t kr = pet_init();
 207         if (kr != KERN_SUCCESS) {
 208                 return;
 209         }
 210
 211         lck_mtx_lock(pet_lock);
 212
 213         BUF_INFO(PERF_PET_THREAD, 3, action_id);
 214
 215         if (action_id == 0) {
 216                 pet_stop();
 217         } else {
 218                 pet_start();
 219         }
 220
 221         pet_action_id = action_id;
 222
 223         lck_mtx_unlock(pet_lock);
 224 }
 225
 226 /* handle resource allocation */
 227
 228 void
 229 pet_start(void)
 230 {
 231         lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
 232
 233         if (pet_running) {
 234                 return;
 235         }
 236
 237         pet_sample = kalloc(sizeof(struct kperf_sample));
 238         if (!pet_sample) {
 239                 return;
 240         }
 241
 242         pet_running = TRUE;
 243 }
 244
 245 void
 246 pet_stop(void)
 247 {
 248         lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
 249
 250         if (!pet_initted) {
 251                 return;
 252         }
 253
 254         if (pet_tasks != NULL) {
 255                 assert(pet_tasks_size != 0);
 256                 kfree(pet_tasks, pet_tasks_size);
 257
 258                 pet_tasks = NULL;
 259                 pet_tasks_size = 0;
 260                 pet_tasks_count = 0;
 261         }
 262
 263         if (pet_threads != NULL) {
 264                 assert(pet_threads_size != 0);
 265                 kfree(pet_threads, pet_threads_size);
 266
 267                 pet_threads = NULL;
 268                 pet_threads_size = 0;
 269                 pet_threads_count = 0;
 270         }
 271
 272         if (pet_sample != NULL) {
 273                 kfree(pet_sample, sizeof(struct kperf_sample));
 274                 pet_sample = NULL;
 275         }
 276
 277         pet_running = FALSE;
 278 }
 279
 280 /*
 281  * Lazily initialize PET.  The PET thread never exits once PET has been used
 282  * once.
 283  */
 284 static kern_return_t
 285 pet_init(void)
 286 {
 287         if (pet_initted) {
 288                 return KERN_SUCCESS;
 289         }
 290
 291         /* make the sync point */
 292         pet_lock = lck_mtx_alloc_init(&kperf_lck_grp, NULL);
 293         assert(pet_lock != NULL);
 294
 295         /* create the thread */
 296
 297         BUF_INFO(PERF_PET_THREAD, 0);
 298         thread_t t;
 299         kern_return_t kr = kernel_thread_start(pet_thread_loop, NULL, &t);
 300         if (kr != KERN_SUCCESS) {
 301                 lck_mtx_free(pet_lock, &kperf_lck_grp);
 302                 return kr;
 303         }
 304
 305         thread_set_thread_name(t, "kperf sampling");
 306         /* let the thread hold the only reference */
 307         thread_deallocate(t);
 308
 309         pet_initted = TRUE;
 310
 311         return KERN_SUCCESS;
 312 }
 313
 314 /* called by PET thread only */
 315
 316 static void
 317 pet_thread_work_unit(void)
 318 {
 319         pet_sample_all_tasks(pet_idle_rate);
 320 }
 321
 322 static void
 323 pet_thread_idle(void)
 324 {
 325         lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
 326
 327         do {
 328                 (void)lck_mtx_sleep(pet_lock, LCK_SLEEP_DEFAULT, &pet_action_id,
 329                     THREAD_UNINT);
 330         } while (pet_action_id == 0);
 331 }
 332
 333 __attribute__((noreturn))
 334 static void
 335 pet_thread_loop(void *param, wait_result_t wr)
 336 {
 337 #pragma unused(param, wr)
 338         uint64_t work_unit_ticks;
 339
 340         BUF_INFO(PERF_PET_THREAD, 1);
 341
 342         lck_mtx_lock(pet_lock);
 343         for (;;) {
 344                 BUF_INFO(PERF_PET_IDLE);
 345                 pet_thread_idle();
 346
 347                 BUF_INFO(PERF_PET_RUN);
 348
 349                 /* measure how long the work unit takes */
 350                 work_unit_ticks = mach_absolute_time();
 351                 pet_thread_work_unit();
 352                 work_unit_ticks = mach_absolute_time() - work_unit_ticks;
 353
 354                 /* re-program the timer */
 355                 kperf_timer_pet_rearm(work_unit_ticks);
 356         }
 357 }
 358
 359 /* sampling */
 360
 361 static void
 362 pet_sample_thread(int pid, task_t task, thread_t thread, uint32_t idle_rate)
 363 {
 364         lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
 365
 366         uint32_t sample_flags = SAMPLE_FLAG_IDLE_THREADS |
 367             SAMPLE_FLAG_THREAD_ONLY;
 368
 369         BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_START);
 370
 371         /* work out the context */
 372         struct kperf_context ctx = {
 373                 .cur_thread = thread,
 374                 .cur_task = task,
 375                 .cur_pid = pid,
 376         };
 377
 378         boolean_t thread_dirty = kperf_thread_get_dirty(thread);
 379
 380         /*
 381          * Clean a dirty thread and skip callstack sample if the thread was not
 382          * dirty and thread has skipped less than pet_idle_rate samples.
 383          */
 384         if (thread_dirty) {
 385                 kperf_thread_set_dirty(thread, FALSE);
 386         } else if ((thread->kperf_pet_cnt % idle_rate) != 0) {
 387                 sample_flags |= SAMPLE_FLAG_EMPTY_CALLSTACK;
 388         }
 389         thread->kperf_pet_cnt++;
 390
 391         kperf_sample(pet_sample, &ctx, pet_action_id, sample_flags);
 392         kperf_sample_user(&pet_sample->usample, &ctx, pet_action_id,
 393             sample_flags);
 394
 395         BUF_VERB(PERF_PET_SAMPLE_THREAD | DBG_FUNC_END);
 396 }
 397
 398 static kern_return_t
 399 pet_threads_prepare(task_t task)
 400 {
 401         lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
 402
 403         vm_size_t threads_size_needed;
 404
 405         if (task == TASK_NULL) {
 406                 return KERN_INVALID_ARGUMENT;
 407         }
 408
 409         for (;;) {
 410                 task_lock(task);
 411
 412                 if (!task->active) {
 413                         task_unlock(task);
 414
 415                         return KERN_FAILURE;
 416                 }
 417
 418                 /* do we have the memory we need? */
 419                 threads_size_needed = task->thread_count * sizeof(thread_t);
 420                 if (threads_size_needed <= pet_threads_size) {
 421                         break;
 422                 }
 423
 424                 /* not enough memory, unlock the task and increase allocation */
 425                 task_unlock(task);
 426
 427                 if (pet_threads_size != 0) {
 428                         kfree(pet_threads, pet_threads_size);
 429                 }
 430
 431                 assert(threads_size_needed > 0);
 432                 pet_threads_size = threads_size_needed;
 433
 434                 pet_threads = kalloc(pet_threads_size);
 435                 if (pet_threads == NULL) {
 436                         pet_threads_size = 0;
 437                         return KERN_RESOURCE_SHORTAGE;
 438                 }
 439         }
 440
 441         /* have memory and the task is locked and active */
 442         thread_t thread;
 443         pet_threads_count = 0;
 444         queue_iterate(&(task->threads), thread, thread_t, task_threads) {
 445                 thread_reference_internal(thread);
 446                 pet_threads[pet_threads_count++] = thread;
 447         }
 448
 449         /* can unlock task now that threads are referenced */
 450         task_unlock(task);
 451
 452         return (pet_threads_count == 0) ? KERN_FAILURE : KERN_SUCCESS;
 453 }
 454
 455 static void
 456 pet_sample_task(task_t task, uint32_t idle_rate)
 457 {
 458         lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
 459
 460         BUF_VERB(PERF_PET_SAMPLE_TASK | DBG_FUNC_START);
 461
 462         int pid = task_pid(task);
 463         if (kperf_action_has_task(pet_action_id)) {
 464                 struct kperf_context ctx = {
 465                         .cur_task = task,
 466                         .cur_pid = pid,
 467                 };
 468
 469                 kperf_sample(pet_sample, &ctx, pet_action_id, SAMPLE_FLAG_TASK_ONLY);
 470         }
 471
 472         if (!kperf_action_has_thread(pet_action_id)) {
 473                 BUF_VERB(PERF_PET_SAMPLE_TASK | DBG_FUNC_END);
 474                 return;
 475         }
 476
 477         kern_return_t kr = KERN_SUCCESS;
 478
 479         /*
 480          * Suspend the task to see an atomic snapshot of all its threads.  This
 481          * is expensive, and disruptive.
 482          */
 483         bool needs_suspend = task != kernel_task;
 484         if (needs_suspend) {
 485                 kr = task_suspend_internal(task);
 486                 if (kr != KERN_SUCCESS) {
 487                         BUF_VERB(PERF_PET_SAMPLE_TASK | DBG_FUNC_END, 1);
 488                         return;
 489                 }
 490                 needs_suspend = true;
 491         }
 492
 493         kr = pet_threads_prepare(task);
 494         if (kr != KERN_SUCCESS) {
 495                 BUF_INFO(PERF_PET_ERROR, ERR_THREAD, kr);
 496                 goto out;
 497         }
 498
 499         for (unsigned int i = 0; i < pet_threads_count; i++) {
 500                 thread_t thread = pet_threads[i];
 501                 assert(thread != THREAD_NULL);
 502
 503                 /*
 504                  * Do not sample the thread if it was on a CPU when the timer fired.
 505                  */
 506                 int cpu = 0;
 507                 for (cpu = 0; cpu < machine_info.logical_cpu_max; cpu++) {
 508                         if (kperf_tid_on_cpus[cpu] == thread_tid(thread)) {
 509                                 break;
 510                         }
 511                 }
 512
 513                 /* the thread was not on a CPU */
 514                 if (cpu == machine_info.logical_cpu_max) {
 515                         pet_sample_thread(pid, task, thread, idle_rate);
 516                 }
 517
 518                 thread_deallocate(pet_threads[i]);
 519         }
 520
 521 out:
 522         if (needs_suspend) {
 523                 task_resume_internal(task);
 524         }
 525
 526         BUF_VERB(PERF_PET_SAMPLE_TASK | DBG_FUNC_END, pet_threads_count);
 527 }
 528
 529 static kern_return_t
 530 pet_tasks_prepare_internal(void)
 531 {
 532         lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
 533
 534         vm_size_t tasks_size_needed = 0;
 535
 536         for (;;) {
 537                 lck_mtx_lock(&tasks_threads_lock);
 538
 539                 /* do we have the memory we need? */
 540                 tasks_size_needed = tasks_count * sizeof(task_t);
 541                 if (tasks_size_needed <= pet_tasks_size) {
 542                         break;
 543                 }
 544
 545                 /* unlock and allocate more memory */
 546                 lck_mtx_unlock(&tasks_threads_lock);
 547
 548                 /* grow task array */
 549                 if (tasks_size_needed > pet_tasks_size) {
 550                         if (pet_tasks_size != 0) {
 551                                 kfree(pet_tasks, pet_tasks_size);
 552                         }
 553
 554                         assert(tasks_size_needed > 0);
 555                         pet_tasks_size = tasks_size_needed;
 556
 557                         pet_tasks = (task_array_t)kalloc(pet_tasks_size);
 558                         if (pet_tasks == NULL) {
 559                                 pet_tasks_size = 0;
 560                                 return KERN_RESOURCE_SHORTAGE;
 561                         }
 562                 }
 563         }
 564
 565         return KERN_SUCCESS;
 566 }
 567
 568 static kern_return_t
 569 pet_tasks_prepare(void)
 570 {
 571         lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
 572
 573         /* allocate space and take the tasks_threads_lock */
 574         kern_return_t kr = pet_tasks_prepare_internal();
 575         if (KERN_SUCCESS != kr) {
 576                 return kr;
 577         }
 578         lck_mtx_assert(&tasks_threads_lock, LCK_MTX_ASSERT_OWNED);
 579
 580         /* make sure the tasks are not deallocated after dropping the lock */
 581         task_t task;
 582         pet_tasks_count = 0;
 583         queue_iterate(&tasks, task, task_t, tasks) {
 584                 if (task != kernel_task) {
 585                         task_reference_internal(task);
 586                         pet_tasks[pet_tasks_count++] = task;
 587                 }
 588         }
 589
 590         lck_mtx_unlock(&tasks_threads_lock);
 591
 592         return KERN_SUCCESS;
 593 }
 594
 595 static void
 596 pet_sample_all_tasks(uint32_t idle_rate)
 597 {
 598         lck_mtx_assert(pet_lock, LCK_MTX_ASSERT_OWNED);
 599         assert(pet_action_id > 0);
 600
 601         BUF_INFO(PERF_PET_SAMPLE | DBG_FUNC_START);
 602
 603         kern_return_t kr = pet_tasks_prepare();
 604         if (kr != KERN_SUCCESS) {
 605                 BUF_INFO(PERF_PET_ERROR, ERR_TASK, kr);
 606                 BUF_INFO(PERF_PET_SAMPLE | DBG_FUNC_END, 0);
 607                 return;
 608         }
 609
 610         for (unsigned int i = 0; i < pet_tasks_count; i++) {
 611                 task_t task = pet_tasks[i];
 612
 613                 pet_sample_task(task, idle_rate);
 614         }
 615
 616         for (unsigned int i = 0; i < pet_tasks_count; i++) {
 617                 task_deallocate(pet_tasks[i]);
 618         }
 619
 620         BUF_INFO(PERF_PET_SAMPLE | DBG_FUNC_END, pet_tasks_count);
 621 }
 622
 623 /* support sysctls */
 624
 625 int
 626 kperf_get_pet_idle_rate(void)
 627 {
 628         return pet_idle_rate;
 629 }
 630
 631 int
 632 kperf_set_pet_idle_rate(int val)
 633 {
 634         pet_idle_rate = val;
 635
 636         return 0;
 637 }
 638
 639 int
 640 kperf_get_lightweight_pet(void)
 641 {
 642         return lightweight_pet;
 643 }
 644
 645 int
 646 kperf_set_lightweight_pet(int val)
 647 {
 648         if (kperf_sampling_status() == KPERF_SAMPLING_ON) {
 649                 return EBUSY;
 650         }
 651
 652         lightweight_pet = (val == 1);
 653         kperf_lightweight_pet_active_update();
 654
 655         return 0;
 656 }
 657
 658 void
 659 kperf_lightweight_pet_active_update(void)
 660 {
 661         kperf_lightweight_pet_active = (kperf_sampling_status() && lightweight_pet);
 662         kperf_on_cpu_update();
 663 }