2 * Copyright (c) 2004-2007 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 * CPU-specific power management support.
32 * Implements the "wrappers" to the KEXT.
34 #include <i386/machine_routines.h>
35 #include <i386/machine_cpu.h>
36 #include <i386/misc_protos.h>
37 #include <i386/pmap.h>
40 #include <i386/proc_reg.h>
42 #include <kern/processor.h>
43 #include <i386/cpu_threads.h>
44 #include <i386/pmCPU.h>
45 #include <i386/cpuid.h>
46 #include <i386/rtclock.h>
48 extern int disableConsoleOutput
;
50 decl_simple_lock_data(,pm_init_lock
);
53 * The following is set when the KEXT loads and initializes.
55 pmDispatch_t
*pmDispatch
= NULL
;
58 * Current power management states (for use until KEXT is loaded).
60 static pmInitState_t pmInitState
;
62 static uint32_t pmInitDone
= 0;
65 * Nap control variables:
67 uint32_t forcenap
= 0; /* Force nap (fn) boot-arg controls */
70 * Do any initialization needed
75 static int initialized
= 0;
78 * Initialize some of the initial state to "uninitialized" until
79 * it gets set with something more useful. This allows the KEXT
80 * to determine if the initial value was actually set to something.
83 pmInitState
.PState
= -1;
84 pmInitState
.PLimit
= -1;
85 pmInitState
.maxBusDelay
= -1;
89 if (pmDispatch
!= NULL
&& pmDispatch
->pmsInit
!= NULL
)
90 (*pmDispatch
->pmsInit
)();
94 * Start the power management stepper on all processors
96 * All processors must be parked. This should be called when the hardware
97 * is ready to step. Probably only at boot and after wake from sleep.
103 if (pmDispatch
!= NULL
&& pmDispatch
->pmsStart
!= NULL
)
104 (*pmDispatch
->pmsStart
)();
108 * Park the stepper execution. This will force the stepper on this
109 * processor to abandon its current step and stop. No changes to the
110 * hardware state is made and any previous step is lost.
112 * This is used as the initial state at startup and when the step table
119 if (pmDispatch
!= NULL
&& pmDispatch
->pmsPark
!= NULL
)
120 (*pmDispatch
->pmsPark
)();
124 * Control the Power Management Stepper.
125 * Called from user state by the superuser.
126 * Interrupts disabled.
128 * This interface is deprecated and is now a no-op.
131 pmsControl(__unused
uint32_t request
, __unused user_addr_t reqaddr
,
132 __unused
uint32_t reqsize
)
134 return(KERN_SUCCESS
);
138 * Broadcast a change to all processors including ourselves.
140 * Interrupts disabled.
143 pmsRun(uint32_t nstep
)
145 if (pmDispatch
!= NULL
&& pmDispatch
->pmsRun
!= NULL
)
146 (*pmDispatch
->pmsRun
)(nstep
);
150 * Build the tables needed for the stepper. This includes both the step
151 * definitions and the step control table.
153 * We most absolutely need to be parked before this happens because we're
154 * going to change the table. We also have to be complte about checking
155 * for errors. A copy is always made because we don't want to be crippled
156 * by not being able to change the table or description formats.
158 * We pass in a table of external functions and the new stepper def uses
159 * the corresponding indexes rather than actual function addresses. This
160 * is done so that a proper table can be built with the control syscall.
161 * It can't supply addresses, so the index has to do. We internalize the
162 * table so our caller does not need to keep it. Note that passing in a 0
163 * will use the current function table. Also note that entry 0 is reserved
164 * and must be 0, we will check and fail the build.
166 * The platformData parameter is a 32-bit word of data that is passed unaltered
167 * to the set function.
169 * The queryFunc parameter is the address of a function that will return the
170 * current state of the platform. The format of the data returned is the same
171 * as the platform specific portions of pmsSetCmd, i.e., pmsXClk, pmsVoltage,
172 * and any part of pmsPowerID that is maintained by the platform hardware
173 * (an example would be the values of the gpios that correspond to pmsPowerID).
174 * The value should be constructed by querying hardware rather than returning
175 * a value cached by software. One of the intents of this function is to help
176 * recover lost or determine initial power states.
180 pmsBuild(pmsDef
*pd
, uint32_t pdsize
, pmsSetFunc_t
*functab
,
181 uint32_t platformData
, pmsQueryFunc_t queryFunc
)
183 kern_return_t rc
= 0;
185 if (pmDispatch
!= NULL
&& pmDispatch
->pmsBuild
!= NULL
)
186 rc
= (*pmDispatch
->pmsBuild
)(pd
, pdsize
, functab
,
187 platformData
, queryFunc
);
194 * Load a new ratio/VID table.
196 * Note that this interface is specific to the Intel SpeedStep implementation.
197 * It is expected that this will only be called once to override the default
198 * ratio/VID table when the platform starts.
200 * Normally, the table will need to be replaced at the same time that the
201 * stepper program proper is replaced, as the PState indices from an old
202 * program may no longer be valid. When replacing the default program this
203 * should not be a problem as any new table will have at least two PState
204 * entries and the default program only references P0 and P1.
207 pmsCPULoadVIDTable(uint16_t *tablep
, int nstates
)
209 if (pmDispatch
!= NULL
&& pmDispatch
->pmsCPULoadVIDTable
!= NULL
)
210 return((*pmDispatch
->pmsCPULoadVIDTable
)(tablep
, nstates
));
214 if (nstates
> MAX_PSTATES
)
215 return(KERN_FAILURE
);
217 for (i
= 0; i
< nstates
; i
+= 1)
218 pmInitState
.VIDTable
[i
] = tablep
[i
];
220 return(KERN_SUCCESS
);
224 * Set the (global) PState limit. CPUs will not be permitted to run at
225 * a lower (more performant) PState than this.
228 pmsCPUSetPStateLimit(uint32_t limit
)
230 if (pmDispatch
!= NULL
&& pmDispatch
->pmsCPUSetPStateLimit
!= NULL
)
231 return((*pmDispatch
->pmsCPUSetPStateLimit
)(limit
));
233 pmInitState
.PLimit
= limit
;
234 return(KERN_SUCCESS
);
238 * Initialize the Cstate change code.
241 power_management_init(void)
243 static boolean_t initialized
= FALSE
;
246 * Initialize the lock for the KEXT initialization.
249 simple_lock_init(&pm_init_lock
, 0);
253 if (pmDispatch
!= NULL
&& pmDispatch
->cstateInit
!= NULL
)
254 (*pmDispatch
->cstateInit
)();
258 * ACPI calls the following routine to set/update mwait hints. A table
259 * (possibly null) specifies the available Cstates and their hints, all
260 * other states are assumed to be invalid. ACPI may update available
261 * states to change the nap policy (for example, while AC power is
265 Cstate_table_set(Cstate_hint_t
*tablep
, unsigned int nstates
)
268 return(KERN_SUCCESS
);
270 if (pmDispatch
!= NULL
&& pmDispatch
->cstateTableSet
!= NULL
)
271 return((*pmDispatch
->cstateTableSet
)(tablep
, nstates
));
275 for (i
= 0; i
< nstates
; i
+= 1) {
276 pmInitState
.CStates
[i
].number
= tablep
[i
].number
;
277 pmInitState
.CStates
[i
].hint
= tablep
[i
].hint
;
280 pmInitState
.CStatesCount
= nstates
;
282 return(KERN_SUCCESS
);
286 * Called when the CPU is idle. It will choose the best C state to
290 machine_idle_cstate(boolean_t halted
)
293 && pmDispatch
!= NULL
294 && pmDispatch
->cstateMachineIdle
!= NULL
)
295 (*pmDispatch
->cstateMachineIdle
)(!halted
?
296 0x7FFFFFFFFFFFFFFFULL
: 0ULL);
299 * If no power managment and a processor is taken off-line,
300 * then invalidate the cache and halt it (it will not be able
301 * to be brought back on-line without resetting the CPU).
303 __asm__
volatile ( "wbinvd; hlt" );
306 * If no power management, re-enable interrupts and halt.
307 * This will keep the CPU from spinning through the scheduler
308 * and will allow at least some minimal power savings (but it
309 * may cause problems in some MP configurations w.r.t to the
310 * APIC stopping during a P-State transition).
312 __asm__
volatile ( "sti; hlt" );
317 * Called when the CPU is to be halted. It will choose the best C-State
321 pmCPUHalt(uint32_t reason
)
326 __asm__
volatile ("wbinvd; hlt");
330 __asm__
volatile ("cli; wbinvd; hlt");
335 __asm__
volatile ("cli");
338 && pmDispatch
!= NULL
339 && pmDispatch
->pmCPUHalt
!= NULL
) {
340 (*pmDispatch
->pmCPUHalt
)();
342 cpu_data_t
*cpup
= current_cpu_datap();
345 * If no power managment and a processor is taken off-line,
346 * then invalidate the cache and halt it (it will not be able
347 * to be brought back on-line without resetting the CPU).
349 __asm__
volatile ("wbinvd");
350 cpup
->lcpu
.halted
= TRUE
;
351 __asm__
volatile ( "wbinvd; hlt" );
358 * Called to initialize the power management structures for the CPUs.
363 if (pmDispatch
!= NULL
&& pmDispatch
->pmCPUStateInit
!= NULL
)
364 (*pmDispatch
->pmCPUStateInit
)();
374 pmGetLogicalCPU(int cpu
)
376 return(cpu_to_lcpu(cpu
));
380 pmGetMyLogicalCPU(void)
382 cpu_data_t
*cpup
= current_cpu_datap();
390 return(cpu_to_core(cpu
));
396 cpu_data_t
*cpup
= current_cpu_datap();
398 return(cpup
->lcpu
.core
);
402 pmGetPackage(int cpu
)
404 return(cpu_to_package(cpu
));
410 cpu_data_t
*cpup
= current_cpu_datap();
412 return(cpup
->lcpu
.core
->package
);
416 pmLockCPUTopology(int lock
)
419 simple_lock(&x86_topo_lock
);
421 simple_unlock(&x86_topo_lock
);
426 * Called to get the next deadline that has been set by the
427 * power management code.
430 pmCPUGetDeadline(cpu_data_t
*cpu
)
432 uint64_t deadline
= EndOfAllTime
;
435 && pmDispatch
!= NULL
436 && pmDispatch
->GetDeadline
!= NULL
)
437 deadline
= (*pmDispatch
->GetDeadline
)(&cpu
->lcpu
);
443 * Called to determine if the supplied deadline or the power management
444 * deadline is sooner. Returns which ever one is first.
447 pmCPUSetDeadline(cpu_data_t
*cpu
, uint64_t deadline
)
450 && pmDispatch
!= NULL
451 && pmDispatch
->SetDeadline
!= NULL
)
452 deadline
= (*pmDispatch
->SetDeadline
)(&cpu
->lcpu
, deadline
);
458 * Called when a power management deadline expires.
461 pmCPUDeadline(cpu_data_t
*cpu
)
464 && pmDispatch
!= NULL
465 && pmDispatch
->Deadline
!= NULL
)
466 (*pmDispatch
->Deadline
)(&cpu
->lcpu
);
470 * Called to get a CPU out of idle.
473 pmCPUExitIdle(cpu_data_t
*cpu
)
478 && pmDispatch
!= NULL
479 && pmDispatch
->exitIdle
!= NULL
)
480 do_ipi
= (*pmDispatch
->exitIdle
)(&cpu
->lcpu
);
488 * Called when a CPU is being restarted after being powered off (as in S3).
491 pmCPUMarkRunning(cpu_data_t
*cpu
)
494 && pmDispatch
!= NULL
495 && pmDispatch
->markCPURunning
!= NULL
)
496 (*pmDispatch
->markCPURunning
)(&cpu
->lcpu
);
500 * Called from the HPET interrupt handler to perform the
501 * necessary power management work.
504 pmHPETInterrupt(void)
507 && pmDispatch
!= NULL
508 && pmDispatch
->HPETInterrupt
!= NULL
)
509 (*pmDispatch
->HPETInterrupt
)();
513 * Called to get/set CPU power management state.
516 pmCPUControl(uint32_t cmd
, void *datap
)
520 if (pmDispatch
!= NULL
521 && pmDispatch
->pmCPUControl
!= NULL
)
522 rc
= (*pmDispatch
->pmCPUControl
)(cmd
, datap
);
528 * Set the worst-case time for the C4 to C2 transition.
529 * No longer does anything.
532 ml_set_maxsnoop(__unused
uint32_t maxdelay
)
538 * Get the worst-case time for the C4 to C2 transition. Returns nanoseconds.
541 ml_get_maxsnoop(void)
543 uint64_t max_snoop
= 0;
545 if (pmDispatch
!= NULL
546 && pmDispatch
->getMaxSnoop
!= NULL
)
547 max_snoop
= pmDispatch
->getMaxSnoop();
549 return((unsigned)(max_snoop
& 0xffffffff));
554 ml_get_maxbusdelay(void)
556 uint64_t max_delay
= 0;
558 if (pmDispatch
!= NULL
559 && pmDispatch
->getMaxBusDelay
!= NULL
)
560 max_delay
= pmDispatch
->getMaxBusDelay();
562 return((uint32_t)(max_delay
& 0xffffffff));
566 * Set the maximum delay time allowed for snoop on the bus.
568 * Note that this value will be compared to the amount of time that it takes
569 * to transition from a non-snooping power state (C4) to a snooping state (C2).
570 * If maxBusDelay is less than C4C2SnoopDelay,
571 * we will not enter the lowest power state.
574 ml_set_maxbusdelay(uint32_t mdelay
)
576 uint64_t maxdelay
= mdelay
;
578 if (pmDispatch
!= NULL
579 && pmDispatch
->setMaxBusDelay
!= NULL
)
580 pmDispatch
->setMaxBusDelay(maxdelay
);
582 pmInitState
.maxBusDelay
= maxdelay
;
586 * Put a CPU into "safe" mode with respect to power.
588 * Some systems cannot operate at a continuous "normal" speed without
589 * exceeding the thermal design. This is called per-CPU to place the
590 * CPUs into a "safe" operating mode.
593 pmSafeMode(x86_lcpu_t
*lcpu
, uint32_t flags
)
595 if (pmDispatch
!= NULL
596 && pmDispatch
->pmCPUSafeMode
!= NULL
)
597 pmDispatch
->pmCPUSafeMode(lcpu
, flags
);
600 * Do something reasonable if the KEXT isn't present.
602 * We only look at the PAUSE and RESUME flags. The other flag(s)
603 * will not make any sense without the KEXT, so just ignore them.
605 * We set the halted flag in the LCPU structure to indicate
606 * that this CPU isn't to do anything. If it's the CPU we're
607 * currently running on, then spin until the halted flag is
610 if (flags
& PM_SAFE_FL_PAUSE
) {
612 if (lcpu
== x86_lcpu()) {
619 * Clear the halted flag for the specified CPU, that will
620 * get it out of it's spin loop.
622 if (flags
& PM_SAFE_FL_RESUME
) {
623 lcpu
->halted
= FALSE
;
629 * Returns the root of the package tree.
638 pmCPUGetHibernate(int cpu
)
640 return(cpu_datap(cpu
)->cpu_hibernate
);
644 pmLCPUtoProcessor(int lcpu
)
646 return(cpu_datap(lcpu
)->cpu_processor
);
650 * Called by the power management kext to register itself and to get the
651 * callbacks it might need into other kernel functions. This interface
652 * is versioned to allow for slight mis-matches between the kext and the
656 pmKextRegister(uint32_t version
, pmDispatch_t
*cpuFuncs
,
657 pmCallBacks_t
*callbacks
)
659 if (callbacks
!= NULL
&& version
== PM_DISPATCH_VERSION
) {
660 callbacks
->InitState
= &pmInitState
;
661 callbacks
->setRTCPop
= setPop
;
662 callbacks
->resyncDeadlines
= etimer_resync_deadlines
;
663 callbacks
->initComplete
= pmInitComplete
;
664 callbacks
->GetLCPU
= pmGetLogicalCPU
;
665 callbacks
->GetCore
= pmGetCore
;
666 callbacks
->GetPackage
= pmGetPackage
;
667 callbacks
->GetMyLCPU
= pmGetMyLogicalCPU
;
668 callbacks
->GetMyCore
= pmGetMyCore
;
669 callbacks
->GetMyPackage
= pmGetMyPackage
;
670 callbacks
->CoresPerPkg
= cpuid_info()->cpuid_cores_per_package
;
671 callbacks
->GetPkgRoot
= pmGetPkgRoot
;
672 callbacks
->LockCPUTopology
= pmLockCPUTopology
;
673 callbacks
->GetHibernate
= pmCPUGetHibernate
;
674 callbacks
->LCPUtoProcessor
= pmLCPUtoProcessor
;
677 if (cpuFuncs
!= NULL
) {
678 pmDispatch
= cpuFuncs
;
683 * Unregisters the power management functions from the kext.
686 pmUnRegister(pmDispatch_t
*cpuFuncs
)
688 if (cpuFuncs
!= NULL
&& pmDispatch
== cpuFuncs
) {