osfmk/kern/kern_monotonic.c

   1 /*
   2  * Copyright (c) 2017 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <kern/assert.h>
  30 #include <kern/monotonic.h>
  31 #include <kern/thread.h>
  32 #include <machine/atomic.h>
  33 #include <machine/monotonic.h>
  34 #include <mach/mach_traps.h>
  35 #include <stdatomic.h>
  36 #include <sys/errno.h>
  37
  38 bool mt_debug = false;
  39 _Atomic uint64_t mt_pmis = 0;
  40 _Atomic uint64_t mt_retrograde = 0;
  41
  42 #define MT_KDBG_INSTRS_CYCLES(CODE) \
  43         KDBG_EVENTID(DBG_MONOTONIC, DBG_MT_INSTRS_CYCLES, CODE)
  44
  45 #define MT_KDBG_IC_CPU_CSWITCH MT_KDBG_INSTRS_CYCLES(1)
  46
  47 /*
  48  * Updating the thread counters takes place in the context switch path, so it
  49  * cannot introduce too much overhead.  Thus, updating takes no locks, instead
  50  * updating a generation count to an odd value to indicate that it's in the
  51  * critical section and that readers should wait until the generation count
  52  * returns to an even value.
  53  *
  54  * Reading the counters also needs to not see any "torn" states of the counters,
  55  * where a few of the counters are from a previous state and the rest are from
  56  * the current state.  For this reason, the reader redrives the entire read
  57  * operation if it sees mismatching generation counts at the beginning and end
  58  * of reading.
  59  */
  60
  61 #define MAXSPINS   100
  62 #define MAXRETRIES 10
  63
  64 /*
  65  * Write the fixed counter values for the thread `thread` into `counts_out`.
  66  *
  67  * This function does not include the accumulated counter values since the
  68  * thread's last context switch or quantum expiration.
  69  */
  70 int
  71 mt_fixed_thread_counts(thread_t thread, uint64_t *counts_out)
  72 {
  73         uint64_t start_gen, end_gen;
  74         uint64_t spins = 0, retries = 0;
  75         uint64_t counts[MT_CORE_NFIXED];
  76
  77         /*
  78          * Try to read a thread's counter values by ensuring its gen count is
  79          * even.  If it's odd, it means that a thread is trying to update its
  80          * counters.
  81          *
  82          * Spin until the gen count is even.
  83          */
  84 spin:
  85         start_gen = atomic_load_explicit(&thread->t_monotonic.mth_gen,
  86             memory_order_acquire);
  87 retry:
  88         if (start_gen & 1) {
  89                 spins++;
  90                 if (spins > MAXSPINS) {
  91                         return EBUSY;
  92                 }
  93                 goto spin;
  94         }
  95
  96         for (int i = 0; i < MT_CORE_NFIXED; i++) {
  97                 counts[i] = thread->t_monotonic.mth_counts[i];
  98         }
  99
 100         /*
 101          * After reading the counters, check the gen count again.  If it is
 102          * different from the value that we started with, the thread raced
 103          * writing its counters with us reading them.  We need to redrive the
 104          * entire operation.
 105          *
 106          * Go back to check if the value we just read was even and try to read
 107          * again.
 108          */
 109         end_gen = atomic_load_explicit(&thread->t_monotonic.mth_gen,
 110             memory_order_acquire);
 111         if (end_gen != start_gen) {
 112                 retries++;
 113                 if (retries > MAXRETRIES) {
 114                         return EAGAIN;
 115                 }
 116                 start_gen = end_gen;
 117                 goto retry;
 118         }
 119
 120         /*
 121          * Only after getting a consistent snapshot of the counters should we
 122          * write them into the provided buffer.
 123          */
 124         for (int i = 0; i < MT_CORE_NFIXED; i++) {
 125                 counts_out[i] = counts[i];
 126         }
 127         return 0;
 128 }
 129
 130 static void mt_fixed_counts_internal(uint64_t *counts, uint64_t *counts_since);
 131
 132 bool
 133 mt_update_thread(thread_t thread)
 134 {
 135         if (!mt_core_supported) {
 136                 return false;
 137         }
 138
 139         assert(ml_get_interrupts_enabled() == FALSE);
 140
 141         uint64_t counts[MT_CORE_NFIXED], counts_since[MT_CORE_NFIXED];
 142         mt_fixed_counts_internal(counts, counts_since);
 143
 144         /*
 145          * Enter the update cycle by incrementing the gen count to be odd --
 146          * this tells any readers to spin on the gen count, waiting for it to go
 147          * even.
 148          */
 149         __assert_only uint64_t enter_gen = atomic_fetch_add_explicit(
 150                 &thread->t_monotonic.mth_gen, 1, memory_order_release);
 151         /*
 152          * Should not have pre-empted a modification to the counts.
 153          */
 154         assert((enter_gen & 1) == 0);
 155
 156         for (int i = 0; i < MT_CORE_NFIXED; i++) {
 157                 thread->t_monotonic.mth_counts[i] += counts_since[i];
 158         }
 159
 160         /*
 161          * Exit the update by making the gen count even again.  Readers check
 162          * the gen count for equality, and will redrive the reads if the values
 163          * before and after reading don't match.
 164          */
 165         __assert_only uint64_t exit_gen = atomic_fetch_add_explicit(
 166                 &thread->t_monotonic.mth_gen, 1, memory_order_release);
 167         /*
 168          * Make sure no other writers came through behind us.
 169          */
 170         assert(exit_gen == (enter_gen + 1));
 171
 172         return true;
 173 }
 174
 175 void
 176 mt_sched_update(thread_t thread)
 177 {
 178         bool updated = mt_update_thread(thread);
 179         if (!updated) {
 180                 return;
 181         }
 182
 183         if (kdebug_debugid_explicitly_enabled(MT_KDBG_IC_CPU_CSWITCH)) {
 184                 struct mt_cpu *mtc = mt_cur_cpu();
 185
 186                 KDBG_RELEASE(MT_KDBG_IC_CPU_CSWITCH,
 187 #ifdef MT_CORE_INSTRS
 188                     mtc->mtc_counts[MT_CORE_INSTRS],
 189 #else /* defined(MT_CORE_INSTRS) */
 190                     0,
 191 #endif /* !defined(MT_CORE_INSTRS) */
 192                     mtc->mtc_counts[MT_CORE_CYCLES]);
 193         }
 194 }
 195
 196 int
 197 mt_fixed_task_counts(task_t task, uint64_t *counts_out)
 198 {
 199         assert(task != TASK_NULL);
 200         assert(counts_out != NULL);
 201
 202         if (!mt_core_supported) {
 203                 memset(counts_out, 0, sizeof(*counts_out) * MT_CORE_NFIXED);
 204                 return 1;
 205         }
 206
 207         task_lock(task);
 208
 209         uint64_t counts[MT_CORE_NFIXED] = { 0 };
 210         for (int i = 0; i < MT_CORE_NFIXED; i++) {
 211                 counts[i] = task->task_monotonic.mtk_counts[i];
 212         }
 213
 214         uint64_t thread_counts[MT_CORE_NFIXED] = { 0 };
 215         thread_t thread = THREAD_NULL;
 216         thread_t curthread = current_thread();
 217         bool needs_current = false;
 218         int r = 0;
 219         queue_iterate(&task->threads, thread, thread_t, task_threads) {
 220                 /*
 221                  * Get the current thread's counters after doing this
 222                  * processing, without holding the task lock.
 223                  */
 224                 if (thread == curthread) {
 225                         needs_current = true;
 226                         continue;
 227                 } else {
 228                         r = mt_fixed_thread_counts(thread, thread_counts);
 229                         if (r) {
 230                                 goto error;
 231                         }
 232                 }
 233
 234                 for (int i = 0; i < MT_CORE_NFIXED; i++) {
 235                         counts[i] += thread_counts[i];
 236                 }
 237         }
 238
 239         task_unlock(task);
 240
 241         if (needs_current) {
 242                 mt_cur_thread_fixed_counts(thread_counts);
 243         }
 244
 245         for (int i = 0; i < MT_CORE_NFIXED; i++) {
 246                 if (needs_current) {
 247                         counts[i] += thread_counts[i];
 248                 }
 249                 counts_out[i] = counts[i];
 250         }
 251         return 0;
 252
 253 error:
 254         task_unlock(task);
 255         return r;
 256 }
 257
 258 uint64_t
 259 mt_mtc_update_count(struct mt_cpu *mtc, unsigned int ctr)
 260 {
 261         uint64_t snap = mt_core_snap(ctr);
 262         if (snap < mtc->mtc_snaps[ctr]) {
 263                 if (mt_debug) {
 264                         kprintf("monotonic: cpu %d: thread %#llx: "
 265                             "retrograde counter %u value: %llu, last read = %llu\n",
 266                             cpu_number(), thread_tid(current_thread()), ctr, snap,
 267                             mtc->mtc_snaps[ctr]);
 268                 }
 269                 (void)atomic_fetch_add_explicit(&mt_retrograde, 1,
 270                     memory_order_relaxed);
 271                 mtc->mtc_snaps[ctr] = snap;
 272                 return 0;
 273         }
 274
 275         uint64_t count = snap - mtc->mtc_snaps[ctr];
 276         mtc->mtc_snaps[ctr] = snap;
 277
 278         return count;
 279 }
 280
 281 uint64_t
 282 mt_cpu_update_count(cpu_data_t *cpu, unsigned int ctr)
 283 {
 284         return mt_mtc_update_count(&cpu->cpu_monotonic, ctr);
 285 }
 286
 287 static void
 288 mt_fixed_counts_internal(uint64_t *counts, uint64_t *counts_since)
 289 {
 290         assert(ml_get_interrupts_enabled() == FALSE);
 291
 292         struct mt_cpu *mtc = mt_cur_cpu();
 293         assert(mtc != NULL);
 294
 295         mt_mtc_update_fixed_counts(mtc, counts, counts_since);
 296 }
 297
 298 void
 299 mt_mtc_update_fixed_counts(struct mt_cpu *mtc, uint64_t *counts,
 300     uint64_t *counts_since)
 301 {
 302         if (!mt_core_supported) {
 303                 return;
 304         }
 305
 306         for (int i = 0; i < MT_CORE_NFIXED; i++) {
 307                 uint64_t last_delta;
 308                 uint64_t count;
 309
 310                 last_delta = mt_mtc_update_count(mtc, i);
 311                 count = mtc->mtc_counts[i] + last_delta;
 312
 313                 if (counts) {
 314                         counts[i] = count;
 315                 }
 316                 if (counts_since) {
 317                         assert(counts != NULL);
 318                         counts_since[i] = count - mtc->mtc_counts_last[i];
 319                         mtc->mtc_counts_last[i] = count;
 320                 }
 321
 322                 mtc->mtc_counts[i] = count;
 323         }
 324 }
 325
 326 void
 327 mt_update_fixed_counts(void)
 328 {
 329         assert(ml_get_interrupts_enabled() == FALSE);
 330
 331 #if defined(__x86_64__)
 332         __builtin_ia32_lfence();
 333 #elif defined(__arm__) || defined(__arm64__)
 334         __builtin_arm_isb(ISB_SY);
 335 #endif /* !defined(__x86_64__) && (defined(__arm__) || defined(__arm64__)) */
 336
 337         mt_fixed_counts_internal(NULL, NULL);
 338 }
 339
 340 void
 341 mt_fixed_counts(uint64_t *counts)
 342 {
 343 #if defined(__x86_64__)
 344         __builtin_ia32_lfence();
 345 #elif defined(__arm__) || defined(__arm64__)
 346         __builtin_arm_isb(ISB_SY);
 347 #endif /* !defined(__x86_64__) && (defined(__arm__) || defined(__arm64__)) */
 348
 349         int intrs_en = ml_set_interrupts_enabled(FALSE);
 350         mt_fixed_counts_internal(counts, NULL);
 351         ml_set_interrupts_enabled(intrs_en);
 352 }
 353
 354 void
 355 mt_cur_thread_fixed_counts(uint64_t *counts)
 356 {
 357         if (!mt_core_supported) {
 358                 memset(counts, 0, sizeof(*counts) * MT_CORE_NFIXED);
 359                 return;
 360         }
 361
 362         thread_t curthread = current_thread();
 363         int intrs_en = ml_set_interrupts_enabled(FALSE);
 364         (void)mt_update_thread(curthread);
 365         for (int i = 0; i < MT_CORE_NFIXED; i++) {
 366                 counts[i] = curthread->t_monotonic.mth_counts[i];
 367         }
 368         ml_set_interrupts_enabled(intrs_en);
 369 }
 370
 371 void
 372 mt_cur_task_fixed_counts(uint64_t *counts)
 373 {
 374         task_t curtask = current_task();
 375
 376         mt_fixed_task_counts(curtask, counts);
 377 }
 378
 379 /* FIXME these should only update the counter that is being accessed */
 380
 381 uint64_t
 382 mt_cur_thread_instrs(void)
 383 {
 384 #ifdef MT_CORE_INSTRS
 385         thread_t curthread = current_thread();
 386         boolean_t intrs_en;
 387         uint64_t count;
 388
 389         if (!mt_core_supported) {
 390                 return 0;
 391         }
 392
 393         intrs_en = ml_set_interrupts_enabled(FALSE);
 394         (void)mt_update_thread(curthread);
 395         count = curthread->t_monotonic.mth_counts[MT_CORE_INSTRS];
 396         ml_set_interrupts_enabled(intrs_en);
 397
 398         return count;
 399 #else /* defined(MT_CORE_INSTRS) */
 400         return 0;
 401 #endif /* !defined(MT_CORE_INSTRS) */
 402 }
 403
 404 uint64_t
 405 mt_cur_thread_cycles(void)
 406 {
 407         thread_t curthread = current_thread();
 408         boolean_t intrs_en;
 409         uint64_t count;
 410
 411         if (!mt_core_supported) {
 412                 return 0;
 413         }
 414
 415         intrs_en = ml_set_interrupts_enabled(FALSE);
 416         (void)mt_update_thread(curthread);
 417         count = curthread->t_monotonic.mth_counts[MT_CORE_CYCLES];
 418         ml_set_interrupts_enabled(intrs_en);
 419
 420         return count;
 421 }
 422
 423 uint64_t
 424 mt_cur_cpu_instrs(void)
 425 {
 426 #ifdef MT_CORE_INSTRS
 427         uint64_t counts[MT_CORE_NFIXED];
 428
 429         if (!mt_core_supported) {
 430                 return 0;
 431         }
 432
 433         mt_fixed_counts(counts);
 434         return counts[MT_CORE_INSTRS];
 435 #else /* defined(MT_CORE_INSTRS) */
 436         return 0;
 437 #endif /* !defined(MT_CORE_INSTRS) */
 438 }
 439
 440 uint64_t
 441 mt_cur_cpu_cycles(void)
 442 {
 443         uint64_t counts[MT_CORE_NFIXED];
 444
 445         if (!mt_core_supported) {
 446                 return 0;
 447         }
 448
 449         mt_fixed_counts(counts);
 450         return counts[MT_CORE_CYCLES];
 451 }
 452
 453 void
 454 mt_update_task(task_t task, thread_t thread)
 455 {
 456         task_lock_assert_owned(task);
 457
 458         if (!mt_core_supported) {
 459                 return;
 460         }
 461
 462         for (int i = 0; i < MT_CORE_NFIXED; i++) {
 463                 task->task_monotonic.mtk_counts[i] += thread->t_monotonic.mth_counts[i];
 464         }
 465 }
 466
 467 void
 468 mt_terminate_update(task_t task, thread_t thread)
 469 {
 470         mt_update_task(task, thread);
 471 }
 472
 473 void
 474 mt_perfcontrol(uint64_t *instrs, uint64_t *cycles)
 475 {
 476         if (!mt_core_supported) {
 477                 *instrs = 0;
 478                 *cycles = 0;
 479                 return;
 480         }
 481
 482         struct mt_cpu *mtc = mt_cur_cpu();
 483
 484         /*
 485          * The performance controller queries the hardware directly, so provide the
 486          * last snapshot we took for the core.  This is the value from when we
 487          * updated the thread counts.
 488          */
 489
 490 #ifdef MT_CORE_INSTRS
 491         *instrs = mtc->mtc_snaps[MT_CORE_INSTRS];
 492 #else /* defined(MT_CORE_INSTRS) */
 493         *instrs = 0;
 494 #endif /* !defined(MT_CORE_INSTRS) */
 495
 496         *cycles = mtc->mtc_snaps[MT_CORE_CYCLES];
 497 }
 498
 499 void
 500 mt_stackshot_thread(thread_t thread, uint64_t *instrs, uint64_t *cycles)
 501 {
 502         assert(mt_core_supported);
 503
 504 #ifdef MT_CORE_INSTRS
 505         *instrs = thread->t_monotonic.mth_counts[MT_CORE_INSTRS];
 506 #else /* defined(MT_CORE_INSTRS) */
 507         *instrs = 0;
 508 #endif /* !defined(MT_CORE_INSTRS) */
 509
 510         *cycles = thread->t_monotonic.mth_counts[MT_CORE_CYCLES];
 511 }
 512
 513 void
 514 mt_stackshot_task(task_t task, uint64_t *instrs, uint64_t *cycles)
 515 {
 516         assert(mt_core_supported);
 517
 518 #ifdef MT_CORE_INSTRS
 519         *instrs = task->task_monotonic.mtk_counts[MT_CORE_INSTRS];
 520 #else /* defined(MT_CORE_INSTRS) */
 521         *instrs = 0;
 522 #endif /* !defined(MT_CORE_INSTRS) */
 523
 524         *cycles = task->task_monotonic.mtk_counts[MT_CORE_CYCLES];
 525 }
 526
 527 /*
 528  * Maintain reset values for the fixed instruction and cycle counters so
 529  * clients can be notified after a given number of those events occur.  This is
 530  * only used by microstackshot.
 531  */
 532
 533 bool mt_microstackshots = false;
 534 unsigned int mt_microstackshot_ctr = 0;
 535 mt_pmi_fn mt_microstackshot_pmi_handler = NULL;
 536 void *mt_microstackshot_ctx = NULL;
 537 uint64_t mt_core_reset_values[MT_CORE_NFIXED] = { 0 };
 538
 539 #define MT_MIN_FIXED_PERIOD (10 * 1000 * 1000)
 540
 541 int
 542 mt_microstackshot_start(unsigned int ctr, uint64_t period, mt_pmi_fn handler,
 543     void *ctx)
 544 {
 545         assert(ctr < MT_CORE_NFIXED);
 546
 547         if (period < MT_MIN_FIXED_PERIOD) {
 548                 return EINVAL;
 549         }
 550         if (mt_microstackshots) {
 551                 return EBUSY;
 552         }
 553
 554         mt_microstackshot_ctr = ctr;
 555         mt_microstackshot_pmi_handler = handler;
 556         mt_microstackshot_ctx = ctx;
 557
 558         int error = mt_microstackshot_start_arch(period);
 559         if (error) {
 560                 mt_microstackshot_ctr = 0;
 561                 mt_microstackshot_pmi_handler = NULL;
 562                 mt_microstackshot_ctx = NULL;
 563                 return error;
 564         }
 565
 566         mt_microstackshots = true;
 567
 568         return 0;
 569 }
 570
 571 int
 572 mt_microstackshot_stop(void)
 573 {
 574         mt_microstackshots = false;
 575         memset(mt_core_reset_values, 0, sizeof(mt_core_reset_values));
 576
 577         return 0;
 578 }