2 * Copyright (c) 2004-2011 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 * CPU-specific power management support.
32 * Implements the "wrappers" to the KEXT.
35 #include <i386/machine_cpu.h>
37 #include <i386/machine_routines.h>
38 #include <i386/proc_reg.h>
39 #include <i386/pmap.h>
40 #include <i386/misc_protos.h>
41 #include <kern/machine.h>
43 #include <kern/processor.h>
44 #include <kern/etimer.h>
45 #include <i386/cpu_threads.h>
46 #include <i386/pmCPU.h>
47 #include <i386/cpuid.h>
48 #include <i386/rtclock_protos.h>
49 #include <kern/sched_prim.h>
50 #include <i386/lapic.h>
51 #include <i386/pal_routines.h>
53 #include <sys/kdebug.h>
55 extern int disableConsoleOutput
;
57 #define DELAY_UNSET 0xFFFFFFFFFFFFFFFFULL
60 * The following is set when the KEXT loads and initializes.
62 pmDispatch_t
*pmDispatch
= NULL
;
64 static uint32_t pmInitDone
= 0;
65 static boolean_t earlyTopology
= FALSE
;
66 static uint64_t earlyMaxBusDelay
= DELAY_UNSET
;
67 static uint64_t earlyMaxIntDelay
= DELAY_UNSET
;
70 * Initialize the Cstate change code.
73 power_management_init(void)
75 if (pmDispatch
!= NULL
&& pmDispatch
->cstateInit
!= NULL
)
76 (*pmDispatch
->cstateInit
)();
80 * Called when the CPU is idle. It calls into the power management kext
81 * to determine the best way to idle the CPU.
86 cpu_data_t
*my_cpu
= current_cpu_datap();
91 my_cpu
->lcpu
.state
= LCPU_IDLE
;
92 DBGLOG(cpu_handle
, cpu_number(), MP_IDLE
);
93 MARK_CPU_IDLE(cpu_number());
97 * Handle case where ml_set_maxbusdelay() or ml_set_maxintdelay()
98 * were called prior to the CPU PM kext being registered. We do
99 * this here since we know at this point the values will be first
100 * used since idle is where the decisions using these values is made.
102 if (earlyMaxBusDelay
!= DELAY_UNSET
)
103 ml_set_maxbusdelay((uint32_t)(earlyMaxBusDelay
& 0xFFFFFFFF));
105 if (earlyMaxIntDelay
!= DELAY_UNSET
)
106 ml_set_maxintdelay(earlyMaxIntDelay
);
110 && pmDispatch
!= NULL
111 && pmDispatch
->MachineIdle
!= NULL
)
112 (*pmDispatch
->MachineIdle
)(0x7FFFFFFFFFFFFFFFULL
);
115 * If no power management, re-enable interrupts and halt.
116 * This will keep the CPU from spinning through the scheduler
117 * and will allow at least some minimal power savings (but it
118 * cause problems in some MP configurations w.r.t. the APIC
119 * stopping during a GV3 transition).
123 /* Once woken, re-disable interrupts. */
128 * Mark the CPU as running again.
130 MARK_CPU_ACTIVE(cpu_number());
131 DBGLOG(cpu_handle
, cpu_number(), MP_UNIDLE
);
132 my_cpu
->lcpu
.state
= LCPU_RUN
;
135 * Re-enable interrupts.
142 * Called when the CPU is to be halted. It will choose the best C-State
146 pmCPUHalt(uint32_t reason
)
148 cpu_data_t
*cpup
= current_cpu_datap();
152 cpup
->lcpu
.state
= LCPU_PAUSE
;
157 cpup
->lcpu
.state
= LCPU_PAUSE
;
166 && pmDispatch
!= NULL
167 && pmDispatch
->pmCPUHalt
!= NULL
) {
169 * Halt the CPU (and put it in a low power state.
171 (*pmDispatch
->pmCPUHalt
)();
174 * We've exited halt, so get the the CPU schedulable again.
176 i386_init_slave_fast();
178 panic("init_slave_fast returned");
182 * If no power managment and a processor is taken off-line,
183 * then invalidate the cache and halt it (it will not be able
184 * to be brought back on-line without resetting the CPU).
186 __asm__
volatile ("wbinvd");
187 cpup
->lcpu
.state
= LCPU_HALT
;
190 panic("back from Halt");
198 pmMarkAllCPUsOff(void)
201 && pmDispatch
!= NULL
202 && pmDispatch
->markAllCPUsOff
!= NULL
)
203 (*pmDispatch
->markAllCPUsOff
)();
210 && pmDispatch
!= NULL
211 && pmDispatch
->pmCPUStateInit
!= NULL
) {
212 (*pmDispatch
->pmCPUStateInit
)();
213 earlyTopology
= FALSE
;
220 pmGetLogicalCPU(int cpu
)
222 return(cpu_to_lcpu(cpu
));
226 pmGetMyLogicalCPU(void)
228 cpu_data_t
*cpup
= current_cpu_datap();
236 return(cpu_to_core(cpu
));
242 cpu_data_t
*cpup
= current_cpu_datap();
244 return(cpup
->lcpu
.core
);
250 return(cpu_to_die(cpu
));
256 cpu_data_t
*cpup
= current_cpu_datap();
258 return(cpup
->lcpu
.die
);
262 pmGetPackage(int cpu
)
264 return(cpu_to_package(cpu
));
270 cpu_data_t
*cpup
= current_cpu_datap();
272 return(cpup
->lcpu
.package
);
276 pmLockCPUTopology(int lock
)
279 simple_lock(&x86_topo_lock
);
281 simple_unlock(&x86_topo_lock
);
286 * Called to get the next deadline that has been set by the
287 * power management code.
288 * Note: a return of 0 from AICPM and this routine signifies
289 * that no deadline is set.
292 pmCPUGetDeadline(cpu_data_t
*cpu
)
294 uint64_t deadline
= 0;
297 && pmDispatch
!= NULL
298 && pmDispatch
->GetDeadline
!= NULL
)
299 deadline
= (*pmDispatch
->GetDeadline
)(&cpu
->lcpu
);
305 * Called to determine if the supplied deadline or the power management
306 * deadline is sooner. Returns which ever one is first.
309 pmCPUSetDeadline(cpu_data_t
*cpu
, uint64_t deadline
)
312 && pmDispatch
!= NULL
313 && pmDispatch
->SetDeadline
!= NULL
)
314 deadline
= (*pmDispatch
->SetDeadline
)(&cpu
->lcpu
, deadline
);
320 * Called when a power management deadline expires.
323 pmCPUDeadline(cpu_data_t
*cpu
)
326 && pmDispatch
!= NULL
327 && pmDispatch
->Deadline
!= NULL
)
328 (*pmDispatch
->Deadline
)(&cpu
->lcpu
);
332 * Called to get a CPU out of idle.
335 pmCPUExitIdle(cpu_data_t
*cpu
)
340 && pmDispatch
!= NULL
341 && pmDispatch
->exitIdle
!= NULL
)
342 do_ipi
= (*pmDispatch
->exitIdle
)(&cpu
->lcpu
);
350 pmCPUExitHalt(int cpu
)
352 kern_return_t rc
= KERN_INVALID_ARGUMENT
;
355 && pmDispatch
!= NULL
356 && pmDispatch
->exitHalt
!= NULL
)
357 rc
= pmDispatch
->exitHalt(cpu_to_lcpu(cpu
));
363 pmCPUExitHaltToOff(int cpu
)
365 kern_return_t rc
= KERN_INVALID_ARGUMENT
;
368 && pmDispatch
!= NULL
369 && pmDispatch
->exitHaltToOff
!= NULL
)
370 rc
= pmDispatch
->exitHaltToOff(cpu_to_lcpu(cpu
));
376 * Called to initialize the power management structures for the CPUs.
381 if (pmDispatch
!= NULL
&& pmDispatch
->pmCPUStateInit
!= NULL
)
382 (*pmDispatch
->pmCPUStateInit
)();
384 earlyTopology
= TRUE
;
388 * Called when a CPU is being restarted after being powered off (as in S3).
391 pmCPUMarkRunning(cpu_data_t
*cpu
)
393 cpu_data_t
*cpup
= current_cpu_datap();
396 && pmDispatch
!= NULL
397 && pmDispatch
->markCPURunning
!= NULL
)
398 (*pmDispatch
->markCPURunning
)(&cpu
->lcpu
);
400 cpup
->lcpu
.state
= LCPU_RUN
;
404 * Called to get/set CPU power management state.
407 pmCPUControl(uint32_t cmd
, void *datap
)
411 if (pmDispatch
!= NULL
412 && pmDispatch
->pmCPUControl
!= NULL
)
413 rc
= (*pmDispatch
->pmCPUControl
)(cmd
, datap
);
419 * Called to save the timer state used by power management prior
425 if (pmDispatch
!= NULL
426 && pmDispatch
->pmTimerStateSave
!= NULL
)
427 (*pmDispatch
->pmTimerStateSave
)();
431 * Called to restore the timer state used by power management after
432 * waking from "sleep".
437 if (pmDispatch
!= NULL
438 && pmDispatch
->pmTimerStateRestore
!= NULL
)
439 (*pmDispatch
->pmTimerStateRestore
)();
443 * Set the worst-case time for the C4 to C2 transition.
444 * No longer does anything.
447 ml_set_maxsnoop(__unused
uint32_t maxdelay
)
453 * Get the worst-case time for the C4 to C2 transition. Returns nanoseconds.
456 ml_get_maxsnoop(void)
458 uint64_t max_snoop
= 0;
461 && pmDispatch
!= NULL
462 && pmDispatch
->getMaxSnoop
!= NULL
)
463 max_snoop
= pmDispatch
->getMaxSnoop();
465 return((unsigned)(max_snoop
& 0xffffffff));
470 ml_get_maxbusdelay(void)
472 uint64_t max_delay
= 0;
475 && pmDispatch
!= NULL
476 && pmDispatch
->getMaxBusDelay
!= NULL
)
477 max_delay
= pmDispatch
->getMaxBusDelay();
479 return((uint32_t)(max_delay
& 0xffffffff));
483 * Set the maximum delay time allowed for snoop on the bus.
485 * Note that this value will be compared to the amount of time that it takes
486 * to transition from a non-snooping power state (C4) to a snooping state (C2).
487 * If maxBusDelay is less than C4C2SnoopDelay,
488 * we will not enter the lowest power state.
491 ml_set_maxbusdelay(uint32_t mdelay
)
493 uint64_t maxdelay
= mdelay
;
495 if (pmDispatch
!= NULL
496 && pmDispatch
->setMaxBusDelay
!= NULL
) {
497 earlyMaxBusDelay
= DELAY_UNSET
;
498 pmDispatch
->setMaxBusDelay(maxdelay
);
500 earlyMaxBusDelay
= maxdelay
;
504 ml_get_maxintdelay(void)
506 uint64_t max_delay
= 0;
508 if (pmDispatch
!= NULL
509 && pmDispatch
->getMaxIntDelay
!= NULL
)
510 max_delay
= pmDispatch
->getMaxIntDelay();
516 * Set the maximum delay allowed for an interrupt.
519 ml_set_maxintdelay(uint64_t mdelay
)
521 if (pmDispatch
!= NULL
522 && pmDispatch
->setMaxIntDelay
!= NULL
) {
523 earlyMaxIntDelay
= DELAY_UNSET
;
524 pmDispatch
->setMaxIntDelay(mdelay
);
526 earlyMaxIntDelay
= mdelay
;
530 ml_get_interrupt_prewake_applicable()
532 boolean_t applicable
= FALSE
;
535 && pmDispatch
!= NULL
536 && pmDispatch
->pmInterruptPrewakeApplicable
!= NULL
)
537 applicable
= pmDispatch
->pmInterruptPrewakeApplicable();
543 * Put a CPU into "safe" mode with respect to power.
545 * Some systems cannot operate at a continuous "normal" speed without
546 * exceeding the thermal design. This is called per-CPU to place the
547 * CPUs into a "safe" operating mode.
550 pmSafeMode(x86_lcpu_t
*lcpu
, uint32_t flags
)
552 if (pmDispatch
!= NULL
553 && pmDispatch
->pmCPUSafeMode
!= NULL
)
554 pmDispatch
->pmCPUSafeMode(lcpu
, flags
);
557 * Do something reasonable if the KEXT isn't present.
559 * We only look at the PAUSE and RESUME flags. The other flag(s)
560 * will not make any sense without the KEXT, so just ignore them.
562 * We set the CPU's state to indicate that it's halted. If this
563 * is the CPU we're currently running on, then spin until the
564 * state becomes non-halted.
566 if (flags
& PM_SAFE_FL_PAUSE
) {
567 lcpu
->state
= LCPU_PAUSE
;
568 if (lcpu
== x86_lcpu()) {
569 while (lcpu
->state
== LCPU_PAUSE
)
575 * Clear the halted flag for the specified CPU, that will
576 * get it out of it's spin loop.
578 if (flags
& PM_SAFE_FL_RESUME
) {
579 lcpu
->state
= LCPU_RUN
;
584 static uint32_t saved_run_count
= 0;
587 machine_run_count(uint32_t count
)
589 if (pmDispatch
!= NULL
590 && pmDispatch
->pmSetRunCount
!= NULL
)
591 pmDispatch
->pmSetRunCount(count
);
593 saved_run_count
= count
;
597 machine_processor_is_inactive(processor_t processor
)
599 int cpu
= processor
->cpu_id
;
601 if (pmDispatch
!= NULL
602 && pmDispatch
->pmIsCPUUnAvailable
!= NULL
)
603 return(pmDispatch
->pmIsCPUUnAvailable(cpu_to_lcpu(cpu
)));
609 machine_choose_processor(processor_set_t pset
,
610 processor_t preferred
)
624 startCPU
= pset
->cpu_set_low
;
625 endCPU
= pset
->cpu_set_hi
;
628 if (preferred
== NULL
)
631 preferredCPU
= preferred
->cpu_id
;
633 if (pmDispatch
!= NULL
634 && pmDispatch
->pmChooseCPU
!= NULL
) {
635 chosenCPU
= pmDispatch
->pmChooseCPU(startCPU
, endCPU
, preferredCPU
);
639 return(cpu_datap(chosenCPU
)->cpu_processor
);
646 pmThreadGetUrgency(uint64_t *rt_period
, uint64_t *rt_deadline
)
649 return(thread_get_urgency(rt_period
, rt_deadline
));
653 uint32_t urgency_stats
[64][THREAD_URGENCY_MAX
];
656 #define URGENCY_NOTIFICATION_ASSERT_NS (5 * 1000 * 1000)
657 uint64_t urgency_notification_assert_abstime_threshold
, urgency_notification_max_recorded
;
660 thread_tell_urgency(int urgency
,
662 uint64_t rt_deadline
)
664 uint64_t urgency_notification_time_start
, delta
;
665 boolean_t urgency_assert
= (urgency_notification_assert_abstime_threshold
!= 0);
666 assert(get_preemption_level() > 0 || ml_get_interrupts_enabled() == FALSE
);
668 urgency_stats
[cpu_number() % 64][urgency
]++;
671 || pmDispatch
== NULL
672 || pmDispatch
->pmThreadTellUrgency
== NULL
)
675 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
,MACH_URGENCY
) | DBG_FUNC_START
, urgency
, rt_period
, (rt_deadline
>> 32), rt_deadline
, 0);
677 if (__improbable((urgency_assert
== TRUE
)))
678 urgency_notification_time_start
= mach_absolute_time();
680 pmDispatch
->pmThreadTellUrgency(urgency
, rt_period
, rt_deadline
);
682 if (__improbable((urgency_assert
== TRUE
))) {
683 delta
= mach_absolute_time() - urgency_notification_time_start
;
685 if (__improbable(delta
> urgency_notification_max_recorded
)) {
686 /* This is not synchronized, but it doesn't matter
687 * if we (rarely) miss an event, as it is statistically
688 * unlikely that it will never recur.
690 urgency_notification_max_recorded
= delta
;
692 if (__improbable((delta
> urgency_notification_assert_abstime_threshold
) && !machine_timeout_suspended()))
693 panic("Urgency notification callout %p exceeded threshold, 0x%llx abstime units", pmDispatch
->pmThreadTellUrgency
, delta
);
697 KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED
,MACH_URGENCY
) | DBG_FUNC_END
, urgency
, rt_period
, (rt_deadline
>> 32), rt_deadline
, 0);
701 active_rt_threads(boolean_t active
)
704 || pmDispatch
== NULL
705 || pmDispatch
->pmActiveRTThreads
== NULL
)
708 pmDispatch
->pmActiveRTThreads(active
);
712 pmGetSavedRunCount(void)
714 return(saved_run_count
);
718 * Returns the root of the package tree.
727 pmCPUGetHibernate(int cpu
)
729 return(cpu_datap(cpu
)->cpu_hibernate
);
733 pmLCPUtoProcessor(int lcpu
)
735 return(cpu_datap(lcpu
)->cpu_processor
);
739 pmReSyncDeadlines(int cpu
)
741 static boolean_t registered
= FALSE
;
744 PM_interrupt_register(&etimer_resync_deadlines
);
748 if ((uint32_t)cpu
== current_cpu_datap()->lcpu
.cpu_num
)
749 etimer_resync_deadlines();
751 cpu_PM_interrupt(cpu
);
757 lapic_send_ipi(cpu
, LAPIC_PM_INTERRUPT
);
761 pmGetNanotimeInfo(pm_rtc_nanotime_t
*rtc_nanotime
)
764 * Make sure that nanotime didn't change while we were reading it.
767 rtc_nanotime
->generation
= pal_rtc_nanotime_info
.generation
; /* must be first */
768 rtc_nanotime
->tsc_base
= pal_rtc_nanotime_info
.tsc_base
;
769 rtc_nanotime
->ns_base
= pal_rtc_nanotime_info
.ns_base
;
770 rtc_nanotime
->scale
= pal_rtc_nanotime_info
.scale
;
771 rtc_nanotime
->shift
= pal_rtc_nanotime_info
.shift
;
772 } while(pal_rtc_nanotime_info
.generation
!= 0
773 && rtc_nanotime
->generation
!= pal_rtc_nanotime_info
.generation
);
777 pmTimerQueueMigrate(int target_cpu
)
779 /* Call the etimer code to do this. */
780 return (target_cpu
!= cpu_number())
781 ? etimer_queue_migrate(target_cpu
)
787 * Called by the power management kext to register itself and to get the
788 * callbacks it might need into other kernel functions. This interface
789 * is versioned to allow for slight mis-matches between the kext and the
793 pmKextRegister(uint32_t version
, pmDispatch_t
*cpuFuncs
,
794 pmCallBacks_t
*callbacks
)
796 if (callbacks
!= NULL
&& version
== PM_DISPATCH_VERSION
) {
797 callbacks
->setRTCPop
= setPop
;
798 callbacks
->resyncDeadlines
= pmReSyncDeadlines
;
799 callbacks
->initComplete
= pmInitComplete
;
800 callbacks
->GetLCPU
= pmGetLogicalCPU
;
801 callbacks
->GetCore
= pmGetCore
;
802 callbacks
->GetDie
= pmGetDie
;
803 callbacks
->GetPackage
= pmGetPackage
;
804 callbacks
->GetMyLCPU
= pmGetMyLogicalCPU
;
805 callbacks
->GetMyCore
= pmGetMyCore
;
806 callbacks
->GetMyDie
= pmGetMyDie
;
807 callbacks
->GetMyPackage
= pmGetMyPackage
;
808 callbacks
->GetPkgRoot
= pmGetPkgRoot
;
809 callbacks
->LockCPUTopology
= pmLockCPUTopology
;
810 callbacks
->GetHibernate
= pmCPUGetHibernate
;
811 callbacks
->LCPUtoProcessor
= pmLCPUtoProcessor
;
812 callbacks
->ThreadBind
= thread_bind
;
813 callbacks
->GetSavedRunCount
= pmGetSavedRunCount
;
814 callbacks
->GetNanotimeInfo
= pmGetNanotimeInfo
;
815 callbacks
->ThreadGetUrgency
= pmThreadGetUrgency
;
816 callbacks
->RTCClockAdjust
= rtc_clock_adjust
;
817 callbacks
->timerQueueMigrate
= pmTimerQueueMigrate
;
818 callbacks
->topoParms
= &topoParms
;
819 callbacks
->pmSendIPI
= pmSendIPI
;
820 callbacks
->InterruptPending
= lapic_is_interrupt_pending
;
821 callbacks
->IsInterrupting
= lapic_is_interrupting
;
822 callbacks
->InterruptStats
= lapic_interrupt_counts
;
823 callbacks
->DisableApicTimer
= lapic_disable_timer
;
825 panic("Version mis-match between Kernel and CPU PM");
828 if (cpuFuncs
!= NULL
) {
829 pmDispatch
= cpuFuncs
;
832 && pmDispatch
->pmCPUStateInit
!= NULL
) {
833 (*pmDispatch
->pmCPUStateInit
)();
834 earlyTopology
= FALSE
;
837 if (pmDispatch
->pmIPIHandler
!= NULL
) {
838 lapic_set_pm_func((i386_intr_func_t
)pmDispatch
->pmIPIHandler
);
844 * Unregisters the power management functions from the kext.
847 pmUnRegister(pmDispatch_t
*cpuFuncs
)
849 if (cpuFuncs
!= NULL
&& pmDispatch
== cpuFuncs
) {
854 /******************************************************************************
856 * All of the following are deprecated interfaces and no longer used.
858 ******************************************************************************/
860 pmsControl(__unused
uint32_t request
, __unused user_addr_t reqaddr
,
861 __unused
uint32_t reqsize
)
863 return(KERN_SUCCESS
);
882 pmsRun(__unused
uint32_t nstep
)
887 pmsBuild(__unused pmsDef
*pd
, __unused
uint32_t pdsize
,
888 __unused pmsSetFunc_t
*functab
,
889 __unused
uint32_t platformData
, __unused pmsQueryFunc_t queryFunc
)
891 return(KERN_SUCCESS
);