X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/4452a7af2eac33dbad800bcc91f2399d62c18f53..cf7d32b81c573a0536dc4da4157f9c26f8d0bed3:/osfmk/i386/pmCPU.c diff --git a/osfmk/i386/pmCPU.c b/osfmk/i386/pmCPU.c index 63108e188..1f12073fb 100644 --- a/osfmk/i386/pmCPU.c +++ b/osfmk/i386/pmCPU.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2004-2007 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -40,19 +40,10 @@ #include #include #include +#include #include #include #include -#if MACH_KDB -#include -#include -#include -#include -#include -#include -#include -#include -#endif extern int disableConsoleOutput; @@ -68,142 +59,137 @@ pmDispatch_t *pmDispatch = NULL; */ static pmInitState_t pmInitState; +static uint32_t pmInitDone = 0; + /* * Nap control variables: */ -uint32_t napCtl = 0; /* Defaults to neither napping - nor halting */ uint32_t forcenap = 0; /* Force nap (fn) boot-arg controls */ -uint32_t maxBusDelay = 0xFFFFFFFF; /* Maximum memory bus delay that - I/O devices can tolerate - before errors (nanoseconds) */ -uint32_t C4C2SnoopDelay = 0; /* C4 to C2 transition time - - time before a C4 system - can snoop (nanoseconds) */ - -/* - * We are being asked to set PState (sel). - */ -void -pmsCPUSet(uint32_t sel) -{ - if (pmDispatch != NULL && pmDispatch->pmsCPUSet != NULL) - (*pmDispatch->pmsCPUSet)(sel); - else - pmInitState.PState = sel; -} /* - * This code configures the initial step tables. It should be called after - * the timebase frequency is initialized. - * - * Note that this is not used in normal operation. It is strictly for - * debugging/testing purposes. + * Do any initialization needed */ void -pmsCPUConf(void) +pmsInit(void) { + static int initialized = 0; - if (pmDispatch != NULL && pmDispatch->pmsCPUConf != NULL) - (*pmDispatch->pmsCPUConf)(); -} - -/* - * Machine-dependent initialization. - */ -void -pmsCPUMachineInit(void) -{ /* * Initialize some of the initial state to "uninitialized" until * it gets set with something more useful. This allows the KEXT * to determine if the initial value was actually set to something. */ - pmInitState.PState = -1; - pmInitState.PLimit = -1; + if (!initialized) { + pmInitState.PState = -1; + pmInitState.PLimit = -1; + pmInitState.maxBusDelay = -1; + initialized = 1; + } - if (pmDispatch != NULL && pmDispatch->pmsCPUMachineInit != NULL) - (*pmDispatch->pmsCPUMachineInit)(); + if (pmDispatch != NULL && pmDispatch->pmsInit != NULL) + (*pmDispatch->pmsInit)(); } /* - * This function should be called once for each processor to force the - * processor to the correct initial voltage and frequency. + * Start the power management stepper on all processors + * + * All processors must be parked. This should be called when the hardware + * is ready to step. Probably only at boot and after wake from sleep. + * */ void -pmsCPUInit(void) +pmsStart(void) { - pmsCPUMachineInit(); - if (pmDispatch != NULL && pmDispatch->pmsCPUInit != NULL) - (*pmDispatch->pmsCPUInit)(); + if (pmDispatch != NULL && pmDispatch->pmsStart != NULL) + (*pmDispatch->pmsStart)(); } /* - * Broadcast a change to all processing including ourselves. + * Park the stepper execution. This will force the stepper on this + * processor to abandon its current step and stop. No changes to the + * hardware state is made and any previous step is lost. + * + * This is used as the initial state at startup and when the step table + * is being changed. + * */ void -pmsCPURun(uint32_t nstep) +pmsPark(void) { - if (pmDispatch != NULL && pmDispatch->pmsCPURun != NULL) - (*pmDispatch->pmsCPURun)(nstep); + if (pmDispatch != NULL && pmDispatch->pmsPark != NULL) + (*pmDispatch->pmsPark)(); } /* - * Return the current state of a core. - */ -uint32_t -pmsCPUQuery(void) -{ - if (pmDispatch != NULL && pmDispatch->pmsCPUQuery != NULL) - return((*pmDispatch->pmsCPUQuery)()); - - /* - * Return a non-sense value. - */ - return((~0) << 16); -} - -/* - * Return the current state of the package. + * Control the Power Management Stepper. + * Called from user state by the superuser. + * Interrupts disabled. + * + * This interface is deprecated and is now a no-op. */ -uint32_t -pmsCPUPackageQuery(void) +kern_return_t +pmsControl(__unused uint32_t request, __unused user_addr_t reqaddr, + __unused uint32_t reqsize) { - if (pmDispatch != NULL && pmDispatch->pmsCPUPackageQuery != NULL) - return((*pmDispatch->pmsCPUPackageQuery)()); - - /* - * Return a non-sense value. - */ - return((~0) << 16); + return(KERN_SUCCESS); } /* - * Force the CPU package to the lowest power level. This is a low-level - * interface meant to be called from the panic or debugger code to bring - * the CPU to a safe power level for unmanaged operation. + * Broadcast a change to all processors including ourselves. * - * Note that while this will bring an entire package to a safe level, it - * cannot affect other packages. As a general rule, this should be run on - * every code as part of entering the debugger or on the panic path. + * Interrupts disabled. */ void -pmsCPUYellowFlag(void) +pmsRun(uint32_t nstep) { - if (pmDispatch != NULL && pmDispatch->pmsCPUYellowFlag != NULL) - (*pmDispatch->pmsCPUYellowFlag)(); + if (pmDispatch != NULL && pmDispatch->pmsRun != NULL) + (*pmDispatch->pmsRun)(nstep); } /* - * Restore the CPU to the power state it was in before a yellow flag. + * Build the tables needed for the stepper. This includes both the step + * definitions and the step control table. + * + * We most absolutely need to be parked before this happens because we're + * going to change the table. We also have to be complte about checking + * for errors. A copy is always made because we don't want to be crippled + * by not being able to change the table or description formats. + * + * We pass in a table of external functions and the new stepper def uses + * the corresponding indexes rather than actual function addresses. This + * is done so that a proper table can be built with the control syscall. + * It can't supply addresses, so the index has to do. We internalize the + * table so our caller does not need to keep it. Note that passing in a 0 + * will use the current function table. Also note that entry 0 is reserved + * and must be 0, we will check and fail the build. + * + * The platformData parameter is a 32-bit word of data that is passed unaltered + * to the set function. + * + * The queryFunc parameter is the address of a function that will return the + * current state of the platform. The format of the data returned is the same + * as the platform specific portions of pmsSetCmd, i.e., pmsXClk, pmsVoltage, + * and any part of pmsPowerID that is maintained by the platform hardware + * (an example would be the values of the gpios that correspond to pmsPowerID). + * The value should be constructed by querying hardware rather than returning + * a value cached by software. One of the intents of this function is to help + * recover lost or determine initial power states. + * */ -void -pmsCPUGreenFlag(void) +kern_return_t +pmsBuild(pmsDef *pd, uint32_t pdsize, pmsSetFunc_t *functab, + uint32_t platformData, pmsQueryFunc_t queryFunc) { - if (pmDispatch != NULL && pmDispatch->pmsCPUGreenFlag != NULL) - (*pmDispatch->pmsCPUGreenFlag)(); + kern_return_t rc = 0; + + if (pmDispatch != NULL && pmDispatch->pmsBuild != NULL) + rc = (*pmDispatch->pmsBuild)(pd, pdsize, functab, + platformData, queryFunc); + + return(rc); } + /* * Load a new ratio/VID table. * @@ -254,45 +240,20 @@ pmsCPUSetPStateLimit(uint32_t limit) void power_management_init(void) { - uint32_t cpuModel; - uint32_t cpuFamily; - uint32_t xcpuid[4]; + static boolean_t initialized = FALSE; /* * Initialize the lock for the KEXT initialization. */ - simple_lock_init(&pm_init_lock, 0); - - /* - * XXX - * - * The following is a hack to disable power management on some systems - * until the KEXT is done. This is strictly temporary!!! - */ - do_cpuid(1, xcpuid); - cpuFamily = (xcpuid[eax] >> 8) & 0xf; - cpuModel = (xcpuid[eax] >> 4) & 0xf; - - if (cpuFamily != 0x6 || cpuModel < 0xe) - pmDispatch = NULL; + if (!initialized) { + simple_lock_init(&pm_init_lock, 0); + initialized = TRUE; + } if (pmDispatch != NULL && pmDispatch->cstateInit != NULL) (*pmDispatch->cstateInit)(); } -/* - * This function will update the system nap policy. It should be called - * whenever conditions change: when the system is ready to being napping - * and if something changes the rules (e.g. a sysctl altering the policy - * for debugging). - */ -void -machine_nap_policy(void) -{ - if (pmDispatch != NULL && pmDispatch->cstateNapPolicy != NULL) - napCtl = (*pmDispatch->cstateNapPolicy)(forcenap, napCtl); -} - /* * ACPI calls the following routine to set/update mwait hints. A table * (possibly null) specifies the available Cstates and their hints, all @@ -321,155 +282,411 @@ Cstate_table_set(Cstate_hint_t *tablep, unsigned int nstates) return(KERN_SUCCESS); } -static inline void -sti(void) { - __asm__ volatile ( "sti" : : : "memory"); -} - /* * Called when the CPU is idle. It will choose the best C state to * be in. */ void -machine_idle_cstate(void) +machine_idle_cstate(boolean_t halted) { - if (pmDispatch != NULL && pmDispatch->cstateMachineIdle != NULL) - (*pmDispatch->cstateMachineIdle)(napCtl); - else { - sti(); + if (pmInitDone + && pmDispatch != NULL + && pmDispatch->cstateMachineIdle != NULL) + (*pmDispatch->cstateMachineIdle)(!halted ? + 0x7FFFFFFFFFFFFFFFULL : 0ULL); + else if (halted) { + /* + * If no power managment and a processor is taken off-line, + * then invalidate the cache and halt it (it will not be able + * to be brought back on-line without resetting the CPU). + */ + __asm__ volatile ( "wbinvd; hlt" ); + } else { + /* + * If no power management, re-enable interrupts and halt. + * This will keep the CPU from spinning through the scheduler + * and will allow at least some minimal power savings (but it + * may cause problems in some MP configurations w.r.t to the + * APIC stopping during a P-State transition). + */ + __asm__ volatile ( "sti; hlt" ); + } +} + +/* + * Called when the CPU is to be halted. It will choose the best C-State + * to be in. + */ +void +pmCPUHalt(uint32_t reason) +{ + + switch (reason) { + case PM_HALT_DEBUG: + __asm__ volatile ("wbinvd; hlt"); + break; + + case PM_HALT_PANIC: + __asm__ volatile ("cli; wbinvd; hlt"); + break; + + case PM_HALT_NORMAL: + default: + __asm__ volatile ("cli"); + + if (pmInitDone + && pmDispatch != NULL + && pmDispatch->pmCPUHalt != NULL) { + (*pmDispatch->pmCPUHalt)(); + } else { + cpu_data_t *cpup = current_cpu_datap(); + + /* + * If no power managment and a processor is taken off-line, + * then invalidate the cache and halt it (it will not be able + * to be brought back on-line without resetting the CPU). + */ + __asm__ volatile ("wbinvd"); + cpup->lcpu.halted = TRUE; + __asm__ volatile ( "wbinvd; hlt" ); + } + break; } } -static pmStats_t * -pmsCPUStats(void) +/* + * Called to initialize the power management structures for the CPUs. + */ +void +pmCPUStateInit(void) +{ + if (pmDispatch != NULL && pmDispatch->pmCPUStateInit != NULL) + (*pmDispatch->pmCPUStateInit)(); +} + +static void +pmInitComplete(void) +{ + pmInitDone = 1; +} + +static x86_lcpu_t * +pmGetLogicalCPU(int cpu) +{ + return(cpu_to_lcpu(cpu)); +} + +static x86_lcpu_t * +pmGetMyLogicalCPU(void) +{ + cpu_data_t *cpup = current_cpu_datap(); + + return(&cpup->lcpu); +} + +static x86_core_t * +pmGetCore(int cpu) +{ + return(cpu_to_core(cpu)); +} + +static x86_core_t * +pmGetMyCore(void) +{ + cpu_data_t *cpup = current_cpu_datap(); + + return(cpup->lcpu.core); +} + +static x86_pkg_t * +pmGetPackage(int cpu) +{ + return(cpu_to_package(cpu)); +} + +static x86_pkg_t * +pmGetMyPackage(void) { - cpu_data_t *pp; + cpu_data_t *cpup = current_cpu_datap(); - pp = current_cpu_datap(); - return(&pp->cpu_pmStats); + return(cpup->lcpu.core->package); } -static pmsd * -pmsCPUStepperData(void) +static void +pmLockCPUTopology(int lock) +{ + if (lock) { + simple_lock(&x86_topo_lock); + } else { + simple_unlock(&x86_topo_lock); + } +} + +/* + * Called to get the next deadline that has been set by the + * power management code. + */ +uint64_t +pmCPUGetDeadline(cpu_data_t *cpu) { - cpu_data_t *pp; + uint64_t deadline = EndOfAllTime; - pp = current_cpu_datap(); - return(&pp->pms); + if (pmInitDone + && pmDispatch != NULL + && pmDispatch->GetDeadline != NULL) + deadline = (*pmDispatch->GetDeadline)(&cpu->lcpu); + + return(deadline); } -static uint64_t * -CPUHPETAddr(void) +/* + * Called to determine if the supplied deadline or the power management + * deadline is sooner. Returns which ever one is first. + */ +uint64_t +pmCPUSetDeadline(cpu_data_t *cpu, uint64_t deadline) { - cpu_data_t *pp; - pp = current_cpu_datap(); - return(pp->cpu_pmHpet); + if (pmInitDone + && pmDispatch != NULL + && pmDispatch->SetDeadline != NULL) + deadline = (*pmDispatch->SetDeadline)(&cpu->lcpu, deadline); + + return(deadline); } /* - * Called by the power management kext to register itself and to get the - * callbacks it might need into other power management functions. + * Called when a power management deadline expires. */ void -pmRegister(pmDispatch_t *cpuFuncs, pmCallBacks_t *callbacks) -{ - if (callbacks != NULL) { - callbacks->Park = pmsPark; - callbacks->Run = pmsRun; - callbacks->RunLocal = pmsRunLocal; - callbacks->SetStep = pmsSetStep; - callbacks->NapPolicy = machine_nap_policy; - callbacks->Build = pmsBuild; - callbacks->Stats = pmsCPUStats; - callbacks->StepperData = pmsCPUStepperData; - callbacks->HPETAddr = CPUHPETAddr; - callbacks->InitState = &pmInitState; - callbacks->resetPop = resetPop; - } +pmCPUDeadline(cpu_data_t *cpu) +{ + if (pmInitDone + && pmDispatch != NULL + && pmDispatch->Deadline != NULL) + (*pmDispatch->Deadline)(&cpu->lcpu); +} - if (cpuFuncs != NULL) - pmDispatch = cpuFuncs; +/* + * Called to get a CPU out of idle. + */ +boolean_t +pmCPUExitIdle(cpu_data_t *cpu) +{ + boolean_t do_ipi; + + if (pmInitDone + && pmDispatch != NULL + && pmDispatch->exitIdle != NULL) + do_ipi = (*pmDispatch->exitIdle)(&cpu->lcpu); + else + do_ipi = TRUE; + + return(do_ipi); } /* - * Unregisters the power management functions from the kext. + * Called when a CPU is being restarted after being powered off (as in S3). */ void -pmUnRegister(pmDispatch_t *cpuFuncs) +pmCPUMarkRunning(cpu_data_t *cpu) { - if (cpuFuncs != NULL && pmDispatch == cpuFuncs) - pmDispatch = NULL; + if (pmInitDone + && pmDispatch != NULL + && pmDispatch->markCPURunning != NULL) + (*pmDispatch->markCPURunning)(&cpu->lcpu); } -#if MACH_KDB /* - * XXX stubs for now + * Called from the HPET interrupt handler to perform the + * necessary power management work. */ void -db_cfg(__unused db_expr_t addr, - __unused int have_addr, - __unused db_expr_t count, - __unused char *modif) +pmHPETInterrupt(void) { - return; + if (pmInitDone + && pmDispatch != NULL + && pmDispatch->HPETInterrupt != NULL) + (*pmDispatch->HPETInterrupt)(); } -void -db_display_iokit(__unused db_expr_t addr, - __unused int have_addr, - __unused db_expr_t count, - __unused char *modif) +/* + * Called to get/set CPU power management state. + */ +int +pmCPUControl(uint32_t cmd, void *datap) { - return; + int rc = -1; + + if (pmDispatch != NULL + && pmDispatch->pmCPUControl != NULL) + rc = (*pmDispatch->pmCPUControl)(cmd, datap); + + return(rc); } +/* + * Set the worst-case time for the C4 to C2 transition. + * No longer does anything. + */ void -db_dtimers(__unused db_expr_t addr, - __unused int have_addr, - __unused db_expr_t count, - __unused char *modif) +ml_set_maxsnoop(__unused uint32_t maxdelay) { - return; } -void -db_intcnt(__unused db_expr_t addr, - __unused int have_addr, - __unused db_expr_t count, - __unused char *modif) + +/* + * Get the worst-case time for the C4 to C2 transition. Returns nanoseconds. + */ +unsigned +ml_get_maxsnoop(void) { - return; + uint64_t max_snoop = 0; + + if (pmDispatch != NULL + && pmDispatch->getMaxSnoop != NULL) + max_snoop = pmDispatch->getMaxSnoop(); + + return((unsigned)(max_snoop & 0xffffffff)); } + +uint32_t +ml_get_maxbusdelay(void) +{ + uint64_t max_delay = 0; + + if (pmDispatch != NULL + && pmDispatch->getMaxBusDelay != NULL) + max_delay = pmDispatch->getMaxBusDelay(); + + return((uint32_t)(max_delay & 0xffffffff)); +} + +/* + * Set the maximum delay time allowed for snoop on the bus. + * + * Note that this value will be compared to the amount of time that it takes + * to transition from a non-snooping power state (C4) to a snooping state (C2). + * If maxBusDelay is less than C4C2SnoopDelay, + * we will not enter the lowest power state. + */ void -db_nap(__unused db_expr_t addr, - __unused int have_addr, - __unused db_expr_t count, - __unused char *modif) +ml_set_maxbusdelay(uint32_t mdelay) { - return; + uint64_t maxdelay = mdelay; + + if (pmDispatch != NULL + && pmDispatch->setMaxBusDelay != NULL) + pmDispatch->setMaxBusDelay(maxdelay); + else + pmInitState.maxBusDelay = maxdelay; } +/* + * Put a CPU into "safe" mode with respect to power. + * + * Some systems cannot operate at a continuous "normal" speed without + * exceeding the thermal design. This is called per-CPU to place the + * CPUs into a "safe" operating mode. + */ void -db_pmgr(__unused db_expr_t addr, - __unused int have_addr, - __unused db_expr_t count, - __unused char *modif) +pmSafeMode(x86_lcpu_t *lcpu, uint32_t flags) +{ + if (pmDispatch != NULL + && pmDispatch->pmCPUSafeMode != NULL) + pmDispatch->pmCPUSafeMode(lcpu, flags); + else { + /* + * Do something reasonable if the KEXT isn't present. + * + * We only look at the PAUSE and RESUME flags. The other flag(s) + * will not make any sense without the KEXT, so just ignore them. + * + * We set the halted flag in the LCPU structure to indicate + * that this CPU isn't to do anything. If it's the CPU we're + * currently running on, then spin until the halted flag is + * reset. + */ + if (flags & PM_SAFE_FL_PAUSE) { + lcpu->halted = TRUE; + if (lcpu == x86_lcpu()) { + while (lcpu->halted) + cpu_pause(); + } + } + + /* + * Clear the halted flag for the specified CPU, that will + * get it out of it's spin loop. + */ + if (flags & PM_SAFE_FL_RESUME) { + lcpu->halted = FALSE; + } + } +} + +/* + * Returns the root of the package tree. + */ +static x86_pkg_t * +pmGetPkgRoot(void) +{ + return(x86_pkgs); +} + +static boolean_t +pmCPUGetHibernate(int cpu) { - return; + return(cpu_datap(cpu)->cpu_hibernate); } +static processor_t +pmLCPUtoProcessor(int lcpu) +{ + return(cpu_datap(lcpu)->cpu_processor); +} + +/* + * Called by the power management kext to register itself and to get the + * callbacks it might need into other kernel functions. This interface + * is versioned to allow for slight mis-matches between the kext and the + * kernel. + */ void -db_test(__unused db_expr_t addr, - __unused int have_addr, - __unused db_expr_t count, - __unused char *modif) +pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs, + pmCallBacks_t *callbacks) { - return; + if (callbacks != NULL && version == PM_DISPATCH_VERSION) { + callbacks->InitState = &pmInitState; + callbacks->setRTCPop = setPop; + callbacks->resyncDeadlines = etimer_resync_deadlines; + callbacks->initComplete= pmInitComplete; + callbacks->GetLCPU = pmGetLogicalCPU; + callbacks->GetCore = pmGetCore; + callbacks->GetPackage = pmGetPackage; + callbacks->GetMyLCPU = pmGetMyLogicalCPU; + callbacks->GetMyCore = pmGetMyCore; + callbacks->GetMyPackage= pmGetMyPackage; + callbacks->CoresPerPkg = cpuid_info()->cpuid_cores_per_package; + callbacks->GetPkgRoot = pmGetPkgRoot; + callbacks->LockCPUTopology = pmLockCPUTopology; + callbacks->GetHibernate = pmCPUGetHibernate; + callbacks->LCPUtoProcessor = pmLCPUtoProcessor; + } + + if (cpuFuncs != NULL) { + pmDispatch = cpuFuncs; + } } +/* + * Unregisters the power management functions from the kext. + */ void -db_getpmgr(__unused pmData_t *pmj) +pmUnRegister(pmDispatch_t *cpuFuncs) { + if (cpuFuncs != NULL && pmDispatch == cpuFuncs) { + pmDispatch = NULL; + } } -#endif +