osfmk/i386/pmCPU.c

   1 /*
   2  * Copyright (c) 2004-2011 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 /*
  30  * CPU-specific power management support.
  31  *
  32  * Implements the "wrappers" to the KEXT.
  33  */
  34 #include <i386/asm.h>
  35 #include <i386/machine_cpu.h>
  36 #include <i386/mp.h>
  37 #include <i386/machine_routines.h>
  38 #include <i386/proc_reg.h>
  39 #include <i386/pmap.h>
  40 #include <i386/misc_protos.h>
  41 #include <kern/machine.h>
  42 #include <kern/pms.h>
  43 #include <kern/processor.h>
  44 #include <kern/timer_queue.h>
  45 #include <i386/cpu_threads.h>
  46 #include <i386/pmCPU.h>
  47 #include <i386/cpuid.h>
  48 #include <i386/rtclock_protos.h>
  49 #include <kern/sched_prim.h>
  50 #include <i386/lapic.h>
  51 #include <i386/pal_routines.h>
  52 #include <sys/kdebug.h>
  53 #include <i386/tsc.h>
  54
  55 #include <kern/sched_urgency.h>
  56
  57 extern int disableConsoleOutput;
  58
  59 #define DELAY_UNSET             0xFFFFFFFFFFFFFFFFULL
  60
  61 uint64_t cpu_itime_bins[CPU_ITIME_BINS] = {16 * NSEC_PER_USEC, 32 * NSEC_PER_USEC, 64 * NSEC_PER_USEC, 128 * NSEC_PER_USEC, 256 * NSEC_PER_USEC, 512 * NSEC_PER_USEC, 1024 * NSEC_PER_USEC, 2048 * NSEC_PER_USEC, 4096 * NSEC_PER_USEC, 8192 * NSEC_PER_USEC, 16384 * NSEC_PER_USEC, 32768 * NSEC_PER_USEC};
  62 uint64_t *cpu_rtime_bins = &cpu_itime_bins[0];
  63
  64 /*
  65  * The following is set when the KEXT loads and initializes.
  66  */
  67 pmDispatch_t    *pmDispatch     = NULL;
  68
  69 uint32_t                pmInitDone              = 0;
  70 static boolean_t        earlyTopology           = FALSE;
  71 static uint64_t         earlyMaxBusDelay        = DELAY_UNSET;
  72 static uint64_t         earlyMaxIntDelay        = DELAY_UNSET;
  73
  74 /*
  75  * Initialize the Cstate change code.
  76  */
  77 void
  78 power_management_init(void)
  79 {
  80         if (pmDispatch != NULL && pmDispatch->cstateInit != NULL) {
  81                 (*pmDispatch->cstateInit)();
  82         }
  83 }
  84
  85 static inline void
  86 machine_classify_interval(uint64_t interval, uint64_t *bins, uint64_t *binvals, uint32_t nbins)
  87 {
  88         uint32_t i;
  89         for (i = 0; i < nbins; i++) {
  90                 if (interval < binvals[i]) {
  91                         bins[i]++;
  92                         break;
  93                 }
  94         }
  95 }
  96
  97 uint64_t        idle_pending_timers_processed;
  98 uint32_t        idle_entry_timer_processing_hdeadline_threshold = 5000000;
  99
 100 /*
 101  * Called when the CPU is idle.  It calls into the power management kext
 102  * to determine the best way to idle the CPU.
 103  */
 104 void
 105 machine_idle(void)
 106 {
 107         cpu_data_t              *my_cpu         = current_cpu_datap();
 108         __unused uint32_t       cnum = my_cpu->cpu_number;
 109         uint64_t                ctime, rtime, itime;
 110 #if CST_DEMOTION_DEBUG
 111         processor_t             cproc = my_cpu->cpu_processor;
 112         uint64_t                cwakeups = my_cpu->cpu_wakeups_issued_total;
 113 #endif /* CST_DEMOTION_DEBUG */
 114         uint64_t esdeadline, ehdeadline;
 115         boolean_t do_process_pending_timers = FALSE;
 116
 117         ctime = mach_absolute_time();
 118         esdeadline = my_cpu->rtclock_timer.queue.earliest_soft_deadline;
 119         ehdeadline = my_cpu->rtclock_timer.deadline;
 120 /* Determine if pending timers exist */
 121         if ((ctime >= esdeadline) && (ctime < ehdeadline) &&
 122             ((ehdeadline - ctime) < idle_entry_timer_processing_hdeadline_threshold)) {
 123                 idle_pending_timers_processed++;
 124                 do_process_pending_timers = TRUE;
 125                 goto machine_idle_exit;
 126         } else {
 127                 TCOAL_DEBUG(0xCCCC0000, ctime, my_cpu->rtclock_timer.queue.earliest_soft_deadline, my_cpu->rtclock_timer.deadline, idle_pending_timers_processed, 0);
 128         }
 129
 130         my_cpu->lcpu.state = LCPU_IDLE;
 131         DBGLOG(cpu_handle, cpu_number(), MP_IDLE);
 132         MARK_CPU_IDLE(cnum);
 133
 134         rtime = ctime - my_cpu->cpu_ixtime;
 135
 136         my_cpu->cpu_rtime_total += rtime;
 137         machine_classify_interval(rtime, &my_cpu->cpu_rtimes[0], &cpu_rtime_bins[0], CPU_RTIME_BINS);
 138 #if CST_DEMOTION_DEBUG
 139         uint32_t cl = 0, ch = 0;
 140         uint64_t c3res, c6res, c7res;
 141         rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch);
 142         c3res = ((uint64_t)ch << 32) | cl;
 143         rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch);
 144         c6res = ((uint64_t)ch << 32) | cl;
 145         rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch);
 146         c7res = ((uint64_t)ch << 32) | cl;
 147 #endif
 148
 149         if (pmInitDone) {
 150                 /*
 151                  * Handle case where ml_set_maxbusdelay() or ml_set_maxintdelay()
 152                  * were called prior to the CPU PM kext being registered.  We do
 153                  * this here since we know at this point the values will be first
 154                  * used since idle is where the decisions using these values is made.
 155                  */
 156                 if (earlyMaxBusDelay != DELAY_UNSET) {
 157                         ml_set_maxbusdelay((uint32_t)(earlyMaxBusDelay & 0xFFFFFFFF));
 158                 }
 159                 if (earlyMaxIntDelay != DELAY_UNSET) {
 160                         ml_set_maxintdelay(earlyMaxIntDelay);
 161                 }
 162         }
 163
 164         if (pmInitDone
 165             && pmDispatch != NULL
 166             && pmDispatch->MachineIdle != NULL) {
 167                 (*pmDispatch->MachineIdle)(0x7FFFFFFFFFFFFFFFULL);
 168         } else {
 169                 /*
 170                  * If no power management, re-enable interrupts and halt.
 171                  * This will keep the CPU from spinning through the scheduler
 172                  * and will allow at least some minimal power savings (but it
 173                  * cause problems in some MP configurations w.r.t. the APIC
 174                  * stopping during a GV3 transition).
 175                  */
 176                 pal_hlt();
 177                 /* Once woken, re-disable interrupts. */
 178                 pal_cli();
 179         }
 180
 181         /*
 182          * Mark the CPU as running again.
 183          */
 184         MARK_CPU_ACTIVE(cnum);
 185         DBGLOG(cpu_handle, cnum, MP_UNIDLE);
 186         my_cpu->lcpu.state = LCPU_RUN;
 187         uint64_t ixtime = my_cpu->cpu_ixtime = mach_absolute_time();
 188         itime = ixtime - ctime;
 189         my_cpu->cpu_idle_exits++;
 190         my_cpu->cpu_itime_total += itime;
 191         machine_classify_interval(itime, &my_cpu->cpu_itimes[0], &cpu_itime_bins[0], CPU_ITIME_BINS);
 192 #if CST_DEMOTION_DEBUG
 193         cl = ch = 0;
 194         rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch);
 195         c3res = (((uint64_t)ch << 32) | cl) - c3res;
 196         rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch);
 197         c6res = (((uint64_t)ch << 32) | cl) - c6res;
 198         rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch);
 199         c7res = (((uint64_t)ch << 32) | cl) - c7res;
 200
 201         uint64_t ndelta = itime - tmrCvt(c3res + c6res + c7res, tscFCvtt2n);
 202         KERNEL_DEBUG_CONSTANT(0xcead0000, ndelta, itime, c7res, c6res, c3res);
 203         if ((itime > 1000000) && (ndelta > 250000)) {
 204                 KERNEL_DEBUG_CONSTANT(0xceae0000, ndelta, itime, c7res, c6res, c3res);
 205         }
 206 #endif
 207
 208 machine_idle_exit:
 209         /*
 210          * Re-enable interrupts.
 211          */
 212
 213         pal_sti();
 214
 215         if (do_process_pending_timers) {
 216                 TCOAL_DEBUG(0xBBBB0000 | DBG_FUNC_START, ctime, esdeadline, ehdeadline, idle_pending_timers_processed, 0);
 217
 218                 /* Adjust to reflect that this isn't truly a package idle exit */
 219                 __sync_fetch_and_sub(&my_cpu->lcpu.package->num_idle, 1);
 220                 lapic_timer_swi(); /* Trigger software timer interrupt */
 221                 __sync_fetch_and_add(&my_cpu->lcpu.package->num_idle, 1);
 222
 223                 TCOAL_DEBUG(0xBBBB0000 | DBG_FUNC_END, ctime, esdeadline, idle_pending_timers_processed, 0, 0);
 224         }
 225 #if CST_DEMOTION_DEBUG
 226         uint64_t nwakeups = my_cpu->cpu_wakeups_issued_total;
 227
 228         if ((nwakeups == cwakeups) && (topoParms.nLThreadsPerPackage == my_cpu->lcpu.package->num_idle)) {
 229                 KERNEL_DEBUG_CONSTANT(0xceaa0000, cwakeups, 0, 0, 0, 0);
 230         }
 231 #endif
 232 }
 233
 234 /*
 235  * Called when the CPU is to be halted.  It will choose the best C-State
 236  * to be in.
 237  */
 238 void
 239 pmCPUHalt(uint32_t reason)
 240 {
 241         cpu_data_t  *cpup   = current_cpu_datap();
 242
 243         switch (reason) {
 244         case PM_HALT_DEBUG:
 245                 cpup->lcpu.state = LCPU_PAUSE;
 246                 pal_stop_cpu(FALSE);
 247                 break;
 248
 249         case PM_HALT_PANIC:
 250                 cpup->lcpu.state = LCPU_PAUSE;
 251                 pal_stop_cpu(TRUE);
 252                 break;
 253
 254         case PM_HALT_NORMAL:
 255         case PM_HALT_SLEEP:
 256         default:
 257                 pal_cli();
 258
 259                 if (pmInitDone
 260                     && pmDispatch != NULL
 261                     && pmDispatch->pmCPUHalt != NULL) {
 262                         /*
 263                          * Halt the CPU (and put it in a low power state.
 264                          */
 265                         (*pmDispatch->pmCPUHalt)();
 266
 267                         /*
 268                          * We've exited halt, so get the CPU schedulable again.
 269                          * - by calling the fast init routine for a slave, or
 270                          * - by returning if we're the master processor.
 271                          */
 272                         if (cpup->cpu_number != master_cpu) {
 273                                 i386_init_slave_fast();
 274                                 panic("init_slave_fast returned");
 275                         }
 276                 } else {
 277                         /*
 278                          * If no power managment and a processor is taken off-line,
 279                          * then invalidate the cache and halt it (it will not be able
 280                          * to be brought back on-line without resetting the CPU).
 281                          */
 282                         __asm__ volatile ("wbinvd");
 283                         cpup->lcpu.state = LCPU_HALT;
 284                         pal_stop_cpu(FALSE);
 285
 286                         panic("back from Halt");
 287                 }
 288
 289                 break;
 290         }
 291 }
 292
 293 void
 294 pmMarkAllCPUsOff(void)
 295 {
 296         if (pmInitDone
 297             && pmDispatch != NULL
 298             && pmDispatch->markAllCPUsOff != NULL) {
 299                 (*pmDispatch->markAllCPUsOff)();
 300         }
 301 }
 302
 303 static void
 304 pmInitComplete(void)
 305 {
 306         if (earlyTopology
 307             && pmDispatch != NULL
 308             && pmDispatch->pmCPUStateInit != NULL) {
 309                 (*pmDispatch->pmCPUStateInit)();
 310                 earlyTopology = FALSE;
 311         }
 312         pmInitDone = 1;
 313 }
 314
 315 x86_lcpu_t *
 316 pmGetLogicalCPU(int cpu)
 317 {
 318         return cpu_to_lcpu(cpu);
 319 }
 320
 321 x86_lcpu_t *
 322 pmGetMyLogicalCPU(void)
 323 {
 324         cpu_data_t  *cpup   = current_cpu_datap();
 325
 326         return &cpup->lcpu;
 327 }
 328
 329 static x86_core_t *
 330 pmGetCore(int cpu)
 331 {
 332         return cpu_to_core(cpu);
 333 }
 334
 335 static x86_core_t *
 336 pmGetMyCore(void)
 337 {
 338         cpu_data_t  *cpup   = current_cpu_datap();
 339
 340         return cpup->lcpu.core;
 341 }
 342
 343 static x86_die_t *
 344 pmGetDie(int cpu)
 345 {
 346         return cpu_to_die(cpu);
 347 }
 348
 349 static x86_die_t *
 350 pmGetMyDie(void)
 351 {
 352         cpu_data_t  *cpup   = current_cpu_datap();
 353
 354         return cpup->lcpu.die;
 355 }
 356
 357 static x86_pkg_t *
 358 pmGetPackage(int cpu)
 359 {
 360         return cpu_to_package(cpu);
 361 }
 362
 363 static x86_pkg_t *
 364 pmGetMyPackage(void)
 365 {
 366         cpu_data_t  *cpup   = current_cpu_datap();
 367
 368         return cpup->lcpu.package;
 369 }
 370
 371 static void
 372 pmLockCPUTopology(int lock)
 373 {
 374         if (lock) {
 375                 mp_safe_spin_lock(&x86_topo_lock);
 376         } else {
 377                 simple_unlock(&x86_topo_lock);
 378         }
 379 }
 380
 381 /*
 382  * Called to get the next deadline that has been set by the
 383  * power management code.
 384  * Note: a return of 0 from AICPM and this routine signifies
 385  * that no deadline is set.
 386  */
 387 uint64_t
 388 pmCPUGetDeadline(cpu_data_t *cpu)
 389 {
 390         uint64_t    deadline        = 0;
 391
 392         if (pmInitDone
 393             && pmDispatch != NULL
 394             && pmDispatch->GetDeadline != NULL) {
 395                 deadline = (*pmDispatch->GetDeadline)(&cpu->lcpu);
 396         }
 397
 398         return deadline;
 399 }
 400
 401 /*
 402  * Called to determine if the supplied deadline or the power management
 403  * deadline is sooner.  Returns which ever one is first.
 404  */
 405
 406 uint64_t
 407 pmCPUSetDeadline(cpu_data_t *cpu, uint64_t deadline)
 408 {
 409         if (pmInitDone
 410             && pmDispatch != NULL
 411             && pmDispatch->SetDeadline != NULL) {
 412                 deadline = (*pmDispatch->SetDeadline)(&cpu->lcpu, deadline);
 413         }
 414
 415         return deadline;
 416 }
 417
 418 /*
 419  * Called when a power management deadline expires.
 420  */
 421 void
 422 pmCPUDeadline(cpu_data_t *cpu)
 423 {
 424         if (pmInitDone
 425             && pmDispatch != NULL
 426             && pmDispatch->Deadline != NULL) {
 427                 (*pmDispatch->Deadline)(&cpu->lcpu);
 428         }
 429 }
 430
 431 /*
 432  * Called to get a CPU out of idle.
 433  */
 434 boolean_t
 435 pmCPUExitIdle(cpu_data_t *cpu)
 436 {
 437         boolean_t           do_ipi;
 438
 439         if (pmInitDone
 440             && pmDispatch != NULL
 441             && pmDispatch->exitIdle != NULL) {
 442                 do_ipi = (*pmDispatch->exitIdle)(&cpu->lcpu);
 443         } else {
 444                 do_ipi = TRUE;
 445         }
 446
 447         return do_ipi;
 448 }
 449
 450 kern_return_t
 451 pmCPUExitHalt(int cpu)
 452 {
 453         kern_return_t       rc      = KERN_INVALID_ARGUMENT;
 454
 455         if (pmInitDone
 456             && pmDispatch != NULL
 457             && pmDispatch->exitHalt != NULL) {
 458                 rc = pmDispatch->exitHalt(cpu_to_lcpu(cpu));
 459         }
 460
 461         return rc;
 462 }
 463
 464 kern_return_t
 465 pmCPUExitHaltToOff(int cpu)
 466 {
 467         kern_return_t       rc      = KERN_SUCCESS;
 468
 469         if (pmInitDone
 470             && pmDispatch != NULL
 471             && pmDispatch->exitHaltToOff != NULL) {
 472                 rc = pmDispatch->exitHaltToOff(cpu_to_lcpu(cpu));
 473         }
 474
 475         return rc;
 476 }
 477
 478 /*
 479  * Called to initialize the power management structures for the CPUs.
 480  */
 481 void
 482 pmCPUStateInit(void)
 483 {
 484         if (pmDispatch != NULL && pmDispatch->pmCPUStateInit != NULL) {
 485                 (*pmDispatch->pmCPUStateInit)();
 486         } else {
 487                 earlyTopology = TRUE;
 488         }
 489 }
 490
 491 /*
 492  * Called when a CPU is being restarted after being powered off (as in S3).
 493  */
 494 void
 495 pmCPUMarkRunning(cpu_data_t *cpu)
 496 {
 497         cpu_data_t  *cpup   = current_cpu_datap();
 498
 499         if (pmInitDone
 500             && pmDispatch != NULL
 501             && pmDispatch->markCPURunning != NULL) {
 502                 (*pmDispatch->markCPURunning)(&cpu->lcpu);
 503         } else {
 504                 cpup->lcpu.state = LCPU_RUN;
 505         }
 506 }
 507
 508 /*
 509  * Called to get/set CPU power management state.
 510  */
 511 int
 512 pmCPUControl(uint32_t cmd, void *datap)
 513 {
 514         int         rc      = -1;
 515
 516         if (pmDispatch != NULL
 517             && pmDispatch->pmCPUControl != NULL) {
 518                 rc = (*pmDispatch->pmCPUControl)(cmd, datap);
 519         }
 520
 521         return rc;
 522 }
 523
 524 /*
 525  * Called to save the timer state used by power management prior
 526  * to "sleeping".
 527  */
 528 void
 529 pmTimerSave(void)
 530 {
 531         if (pmDispatch != NULL
 532             && pmDispatch->pmTimerStateSave != NULL) {
 533                 (*pmDispatch->pmTimerStateSave)();
 534         }
 535 }
 536
 537 /*
 538  * Called to restore the timer state used by power management after
 539  * waking from "sleep".
 540  */
 541 void
 542 pmTimerRestore(void)
 543 {
 544         if (pmDispatch != NULL
 545             && pmDispatch->pmTimerStateRestore != NULL) {
 546                 (*pmDispatch->pmTimerStateRestore)();
 547         }
 548 }
 549
 550 /*
 551  * Set the worst-case time for the C4 to C2 transition.
 552  * No longer does anything.
 553  */
 554 void
 555 ml_set_maxsnoop(__unused uint32_t maxdelay)
 556 {
 557 }
 558
 559
 560 /*
 561  * Get the worst-case time for the C4 to C2 transition.  Returns nanoseconds.
 562  */
 563 unsigned
 564 ml_get_maxsnoop(void)
 565 {
 566         uint64_t    max_snoop       = 0;
 567
 568         if (pmInitDone
 569             && pmDispatch != NULL
 570             && pmDispatch->getMaxSnoop != NULL) {
 571                 max_snoop = pmDispatch->getMaxSnoop();
 572         }
 573
 574         return (unsigned)(max_snoop & 0xffffffff);
 575 }
 576
 577
 578 uint32_t
 579 ml_get_maxbusdelay(void)
 580 {
 581         uint64_t    max_delay       = 0;
 582
 583         if (pmInitDone
 584             && pmDispatch != NULL
 585             && pmDispatch->getMaxBusDelay != NULL) {
 586                 max_delay = pmDispatch->getMaxBusDelay();
 587         }
 588
 589         return (uint32_t)(max_delay & 0xffffffff);
 590 }
 591
 592 /*
 593  * Advertise a memory access latency tolerance of "mdelay" ns
 594  */
 595 void
 596 ml_set_maxbusdelay(uint32_t mdelay)
 597 {
 598         uint64_t    maxdelay        = mdelay;
 599
 600         if (pmDispatch != NULL
 601             && pmDispatch->setMaxBusDelay != NULL) {
 602                 earlyMaxBusDelay = DELAY_UNSET;
 603                 pmDispatch->setMaxBusDelay(maxdelay);
 604         } else {
 605                 earlyMaxBusDelay = maxdelay;
 606         }
 607 }
 608
 609 uint64_t
 610 ml_get_maxintdelay(void)
 611 {
 612         uint64_t    max_delay       = 0;
 613
 614         if (pmDispatch != NULL
 615             && pmDispatch->getMaxIntDelay != NULL) {
 616                 max_delay = pmDispatch->getMaxIntDelay();
 617         }
 618
 619         return max_delay;
 620 }
 621
 622 /*
 623  * Set the maximum delay allowed for an interrupt.
 624  */
 625 void
 626 ml_set_maxintdelay(uint64_t mdelay)
 627 {
 628         if (pmDispatch != NULL
 629             && pmDispatch->setMaxIntDelay != NULL) {
 630                 earlyMaxIntDelay = DELAY_UNSET;
 631                 pmDispatch->setMaxIntDelay(mdelay);
 632         } else {
 633                 earlyMaxIntDelay = mdelay;
 634         }
 635 }
 636
 637 boolean_t
 638 ml_get_interrupt_prewake_applicable()
 639 {
 640         boolean_t applicable = FALSE;
 641
 642         if (pmInitDone
 643             && pmDispatch != NULL
 644             && pmDispatch->pmInterruptPrewakeApplicable != NULL) {
 645                 applicable = pmDispatch->pmInterruptPrewakeApplicable();
 646         }
 647
 648         return applicable;
 649 }
 650
 651 /*
 652  * Put a CPU into "safe" mode with respect to power.
 653  *
 654  * Some systems cannot operate at a continuous "normal" speed without
 655  * exceeding the thermal design.  This is called per-CPU to place the
 656  * CPUs into a "safe" operating mode.
 657  */
 658 void
 659 pmSafeMode(x86_lcpu_t *lcpu, uint32_t flags)
 660 {
 661         if (pmDispatch != NULL
 662             && pmDispatch->pmCPUSafeMode != NULL) {
 663                 pmDispatch->pmCPUSafeMode(lcpu, flags);
 664         } else {
 665                 /*
 666                  * Do something reasonable if the KEXT isn't present.
 667                  *
 668                  * We only look at the PAUSE and RESUME flags.  The other flag(s)
 669                  * will not make any sense without the KEXT, so just ignore them.
 670                  *
 671                  * We set the CPU's state to indicate that it's halted.  If this
 672                  * is the CPU we're currently running on, then spin until the
 673                  * state becomes non-halted.
 674                  */
 675                 if (flags & PM_SAFE_FL_PAUSE) {
 676                         lcpu->state = LCPU_PAUSE;
 677                         if (lcpu == x86_lcpu()) {
 678                                 while (lcpu->state == LCPU_PAUSE) {
 679                                         cpu_pause();
 680                                 }
 681                         }
 682                 }
 683
 684                 /*
 685                  * Clear the halted flag for the specified CPU, that will
 686                  * get it out of it's spin loop.
 687                  */
 688                 if (flags & PM_SAFE_FL_RESUME) {
 689                         lcpu->state = LCPU_RUN;
 690                 }
 691         }
 692 }
 693
 694 static uint32_t         saved_run_count = 0;
 695
 696 void
 697 machine_run_count(uint32_t count)
 698 {
 699         if (pmDispatch != NULL
 700             && pmDispatch->pmSetRunCount != NULL) {
 701                 pmDispatch->pmSetRunCount(count);
 702         } else {
 703                 saved_run_count = count;
 704         }
 705 }
 706
 707 processor_t
 708 machine_choose_processor(processor_set_t pset,
 709     processor_t preferred)
 710 {
 711         int         startCPU;
 712         int         endCPU;
 713         int         preferredCPU;
 714         int         chosenCPU;
 715
 716         if (!pmInitDone) {
 717                 return preferred;
 718         }
 719
 720         if (pset == NULL) {
 721                 startCPU = -1;
 722                 endCPU = -1;
 723         } else {
 724                 startCPU = pset->cpu_set_low;
 725                 endCPU = pset->cpu_set_hi;
 726         }
 727
 728         if (preferred == NULL) {
 729                 preferredCPU = -1;
 730         } else {
 731                 preferredCPU = preferred->cpu_id;
 732         }
 733
 734         if (pmDispatch != NULL
 735             && pmDispatch->pmChooseCPU != NULL) {
 736                 chosenCPU = pmDispatch->pmChooseCPU(startCPU, endCPU, preferredCPU);
 737
 738                 if (chosenCPU == -1) {
 739                         return NULL;
 740                 }
 741                 return cpu_datap(chosenCPU)->cpu_processor;
 742         }
 743
 744         return preferred;
 745 }
 746
 747 static int
 748 pmThreadGetUrgency(uint64_t *rt_period, uint64_t *rt_deadline)
 749 {
 750         thread_urgency_t urgency;
 751         uint64_t        arg1, arg2;
 752
 753         urgency = thread_get_urgency(THREAD_NULL, &arg1, &arg2);
 754
 755         if (urgency == THREAD_URGENCY_REAL_TIME) {
 756                 if (rt_period != NULL) {
 757                         *rt_period = arg1;
 758                 }
 759
 760                 if (rt_deadline != NULL) {
 761                         *rt_deadline = arg2;
 762                 }
 763         }
 764
 765         return (int)urgency;
 766 }
 767
 768 #if     DEBUG
 769 uint32_t        urgency_stats[64][THREAD_URGENCY_MAX];
 770 #endif
 771
 772 #define         URGENCY_NOTIFICATION_ASSERT_NS (5 * 1000 * 1000)
 773 uint64_t        urgency_notification_assert_abstime_threshold, urgency_notification_max_recorded;
 774
 775 void
 776 thread_tell_urgency(thread_urgency_t urgency,
 777     uint64_t rt_period,
 778     uint64_t rt_deadline,
 779     uint64_t sched_latency,
 780     thread_t nthread)
 781 {
 782         uint64_t        urgency_notification_time_start = 0, delta;
 783         boolean_t       urgency_assert = (urgency_notification_assert_abstime_threshold != 0);
 784         assert(get_preemption_level() > 0 || ml_get_interrupts_enabled() == FALSE);
 785 #if     DEBUG
 786         urgency_stats[cpu_number() % 64][urgency]++;
 787 #endif
 788         if (!pmInitDone
 789             || pmDispatch == NULL
 790             || pmDispatch->pmThreadTellUrgency == NULL) {
 791                 return;
 792         }
 793
 794         SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, rt_deadline, sched_latency, 0);
 795
 796         if (__improbable((urgency_assert == TRUE))) {
 797                 urgency_notification_time_start = mach_absolute_time();
 798         }
 799
 800         current_cpu_datap()->cpu_nthread = nthread;
 801         pmDispatch->pmThreadTellUrgency(urgency, rt_period, rt_deadline);
 802
 803         if (__improbable((urgency_assert == TRUE))) {
 804                 delta = mach_absolute_time() - urgency_notification_time_start;
 805
 806                 if (__improbable(delta > urgency_notification_max_recorded)) {
 807                         /* This is not synchronized, but it doesn't matter
 808                          * if we (rarely) miss an event, as it is statistically
 809                          * unlikely that it will never recur.
 810                          */
 811                         urgency_notification_max_recorded = delta;
 812
 813                         if (__improbable((delta > urgency_notification_assert_abstime_threshold) && !machine_timeout_suspended())) {
 814                                 panic("Urgency notification callout %p exceeded threshold, 0x%llx abstime units", pmDispatch->pmThreadTellUrgency, delta);
 815                         }
 816                 }
 817         }
 818
 819         SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, rt_deadline, 0, 0);
 820 }
 821
 822 void
 823 machine_thread_going_on_core(__unused thread_t      new_thread,
 824     __unused thread_urgency_t           urgency,
 825     __unused uint64_t      sched_latency,
 826     __unused uint64_t      same_pri_latency,
 827     __unused uint64_t      dispatch_time)
 828 {
 829 }
 830
 831 void
 832 machine_thread_going_off_core(thread_t old_thread, boolean_t thread_terminating,
 833     uint64_t last_dispatch, boolean_t thread_runnable)
 834 {
 835         if (!pmInitDone
 836             || pmDispatch == NULL
 837             || pmDispatch->pmThreadGoingOffCore == NULL) {
 838                 return;
 839         }
 840
 841         pmDispatch->pmThreadGoingOffCore(old_thread, thread_terminating,
 842             last_dispatch, thread_runnable);
 843 }
 844
 845 void
 846 machine_max_runnable_latency(__unused uint64_t bg_max_latency,
 847     __unused uint64_t default_max_latency,
 848     __unused uint64_t realtime_max_latency)
 849 {
 850 }
 851
 852 void
 853 machine_work_interval_notify(__unused thread_t thread,
 854     __unused struct kern_work_interval_args* kwi_args)
 855 {
 856 }
 857
 858
 859 void
 860 machine_switch_perfcontrol_context(__unused perfcontrol_event event,
 861     __unused uint64_t timestamp,
 862     __unused uint32_t flags,
 863     __unused uint64_t new_thread_same_pri_latency,
 864     __unused thread_t old,
 865     __unused thread_t new)
 866 {
 867 }
 868
 869 void
 870 machine_switch_perfcontrol_state_update(__unused perfcontrol_event event,
 871     __unused uint64_t timestamp,
 872     __unused uint32_t flags,
 873     __unused thread_t thread)
 874 {
 875 }
 876
 877 void
 878 active_rt_threads(boolean_t active)
 879 {
 880         if (!pmInitDone
 881             || pmDispatch == NULL
 882             || pmDispatch->pmActiveRTThreads == NULL) {
 883                 return;
 884         }
 885
 886         pmDispatch->pmActiveRTThreads(active);
 887 }
 888
 889 static uint32_t
 890 pmGetSavedRunCount(void)
 891 {
 892         return saved_run_count;
 893 }
 894
 895 /*
 896  * Returns the root of the package tree.
 897  */
 898 x86_pkg_t *
 899 pmGetPkgRoot(void)
 900 {
 901         return x86_pkgs;
 902 }
 903
 904 static boolean_t
 905 pmCPUGetHibernate(int cpu)
 906 {
 907         return cpu_datap(cpu)->cpu_hibernate;
 908 }
 909
 910 processor_t
 911 pmLCPUtoProcessor(int lcpu)
 912 {
 913         return cpu_datap(lcpu)->cpu_processor;
 914 }
 915
 916 static void
 917 pmReSyncDeadlines(int cpu)
 918 {
 919         static boolean_t    registered      = FALSE;
 920
 921         if (!registered) {
 922                 PM_interrupt_register(&timer_resync_deadlines);
 923                 registered = TRUE;
 924         }
 925
 926         if ((uint32_t)cpu == current_cpu_datap()->lcpu.cpu_num) {
 927                 timer_resync_deadlines();
 928         } else {
 929                 cpu_PM_interrupt(cpu);
 930         }
 931 }
 932
 933 static void
 934 pmSendIPI(int cpu)
 935 {
 936         lapic_send_ipi(cpu, LAPIC_PM_INTERRUPT);
 937 }
 938
 939 static void
 940 pmGetNanotimeInfo(pm_rtc_nanotime_t *rtc_nanotime)
 941 {
 942         /*
 943          * Make sure that nanotime didn't change while we were reading it.
 944          */
 945         do {
 946                 rtc_nanotime->generation = pal_rtc_nanotime_info.generation; /* must be first */
 947                 rtc_nanotime->tsc_base = pal_rtc_nanotime_info.tsc_base;
 948                 rtc_nanotime->ns_base = pal_rtc_nanotime_info.ns_base;
 949                 rtc_nanotime->scale = pal_rtc_nanotime_info.scale;
 950                 rtc_nanotime->shift = pal_rtc_nanotime_info.shift;
 951         } while (pal_rtc_nanotime_info.generation != 0
 952             && rtc_nanotime->generation != pal_rtc_nanotime_info.generation);
 953 }
 954
 955 uint32_t
 956 pmTimerQueueMigrate(int target_cpu)
 957 {
 958         /* Call the etimer code to do this. */
 959         return (target_cpu != cpu_number())
 960                ? timer_queue_migrate_cpu(target_cpu)
 961                : 0;
 962 }
 963
 964
 965 /*
 966  * Called by the power management kext to register itself and to get the
 967  * callbacks it might need into other kernel functions.  This interface
 968  * is versioned to allow for slight mis-matches between the kext and the
 969  * kernel.
 970  */
 971 void
 972 pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs,
 973     pmCallBacks_t *callbacks)
 974 {
 975         if (callbacks != NULL && version == PM_DISPATCH_VERSION) {
 976                 callbacks->setRTCPop            = setPop;
 977                 callbacks->resyncDeadlines      = pmReSyncDeadlines;
 978                 callbacks->initComplete         = pmInitComplete;
 979                 callbacks->GetLCPU              = pmGetLogicalCPU;
 980                 callbacks->GetCore              = pmGetCore;
 981                 callbacks->GetDie               = pmGetDie;
 982                 callbacks->GetPackage           = pmGetPackage;
 983                 callbacks->GetMyLCPU            = pmGetMyLogicalCPU;
 984                 callbacks->GetMyCore            = pmGetMyCore;
 985                 callbacks->GetMyDie             = pmGetMyDie;
 986                 callbacks->GetMyPackage         = pmGetMyPackage;
 987                 callbacks->GetPkgRoot           = pmGetPkgRoot;
 988                 callbacks->LockCPUTopology      = pmLockCPUTopology;
 989                 callbacks->GetHibernate         = pmCPUGetHibernate;
 990                 callbacks->LCPUtoProcessor      = pmLCPUtoProcessor;
 991                 callbacks->ThreadBind           = thread_bind;
 992                 callbacks->GetSavedRunCount     = pmGetSavedRunCount;
 993                 callbacks->GetNanotimeInfo      = pmGetNanotimeInfo;
 994                 callbacks->ThreadGetUrgency     = pmThreadGetUrgency;
 995                 callbacks->RTCClockAdjust       = rtc_clock_adjust;
 996                 callbacks->timerQueueMigrate    = pmTimerQueueMigrate;
 997                 callbacks->topoParms            = &topoParms;
 998                 callbacks->pmSendIPI            = pmSendIPI;
 999                 callbacks->InterruptPending     = lapic_is_interrupt_pending;
1000                 callbacks->IsInterrupting       = lapic_is_interrupting;
1001                 callbacks->InterruptStats       = lapic_interrupt_counts;
1002                 callbacks->DisableApicTimer     = lapic_disable_timer;
1003         } else {
1004                 panic("Version mis-match between Kernel and CPU PM");
1005         }
1006
1007         if (cpuFuncs != NULL) {
1008                 if (pmDispatch) {
1009                         panic("Attempt to re-register power management interface--AICPM present in xcpm mode? %p->%p", pmDispatch, cpuFuncs);
1010                 }
1011
1012                 pmDispatch = cpuFuncs;
1013
1014                 if (earlyTopology
1015                     && pmDispatch->pmCPUStateInit != NULL) {
1016                         (*pmDispatch->pmCPUStateInit)();
1017                         earlyTopology = FALSE;
1018                 }
1019
1020                 if (pmDispatch->pmIPIHandler != NULL) {
1021                         lapic_set_pm_func((i386_intr_func_t)pmDispatch->pmIPIHandler);
1022                 }
1023         }
1024 }
1025
1026 /*
1027  * Unregisters the power management functions from the kext.
1028  */
1029 void
1030 pmUnRegister(pmDispatch_t *cpuFuncs)
1031 {
1032         if (cpuFuncs != NULL && pmDispatch == cpuFuncs) {
1033                 pmDispatch = NULL;
1034         }
1035 }
1036
1037 void
1038 machine_track_platform_idle(boolean_t entry)
1039 {
1040         cpu_data_t              *my_cpu         = current_cpu_datap();
1041
1042         if (entry) {
1043                 (void)__sync_fetch_and_add(&my_cpu->lcpu.package->num_idle, 1);
1044         } else {
1045                 uint32_t nidle = __sync_fetch_and_sub(&my_cpu->lcpu.package->num_idle, 1);
1046                 if (nidle == topoParms.nLThreadsPerPackage) {
1047                         my_cpu->lcpu.package->package_idle_exits++;
1048                 }
1049         }
1050 }