/*
- * Copyright (c) 2004-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2004-2011 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
*
* Implements the "wrappers" to the KEXT.
*/
-#include <i386/machine_routines.h>
-#include <i386/machine_cpu.h>
-#include <i386/misc_protos.h>
-#include <i386/pmap.h>
#include <i386/asm.h>
+#include <i386/machine_cpu.h>
#include <i386/mp.h>
+#include <i386/machine_routines.h>
#include <i386/proc_reg.h>
+#include <i386/pmap.h>
+#include <i386/misc_protos.h>
+#include <kern/machine.h>
#include <kern/pms.h>
#include <kern/processor.h>
+#include <kern/etimer.h>
+#include <i386/cpu_threads.h>
#include <i386/pmCPU.h>
#include <i386/cpuid.h>
-#include <i386/rtclock.h>
-#if MACH_KDB
-#include <i386/db_machdep.h>
-#include <ddb/db_aout.h>
-#include <ddb/db_access.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_variables.h>
-#include <ddb/db_command.h>
-#include <ddb/db_output.h>
-#include <ddb/db_expr.h>
-#endif
+#include <i386/rtclock_protos.h>
+#include <kern/sched_prim.h>
+#include <i386/lapic.h>
+#include <i386/pal_routines.h>
+
+#include <sys/kdebug.h>
extern int disableConsoleOutput;
-decl_simple_lock_data(,pm_init_lock);
+#define DELAY_UNSET 0xFFFFFFFFFFFFFFFFULL
/*
* The following is set when the KEXT loads and initializes.
*/
pmDispatch_t *pmDispatch = NULL;
-/*
- * Current power management states (for use until KEXT is loaded).
- */
-static pmInitState_t pmInitState;
+static uint32_t pmInitDone = 0;
+static boolean_t earlyTopology = FALSE;
+static uint64_t earlyMaxBusDelay = DELAY_UNSET;
+static uint64_t earlyMaxIntDelay = DELAY_UNSET;
/*
- * Nap control variables:
+ * Initialize the Cstate change code.
*/
-uint32_t napCtl = 0; /* Defaults to neither napping
- nor halting */
-uint32_t forcenap = 0; /* Force nap (fn) boot-arg controls */
-uint32_t maxBusDelay = 0xFFFFFFFF; /* Maximum memory bus delay that
- I/O devices can tolerate
- before errors (nanoseconds) */
-uint32_t C4C2SnoopDelay = 0; /* C4 to C2 transition time -
- time before a C4 system
- can snoop (nanoseconds) */
+void
+power_management_init(void)
+{
+ if (pmDispatch != NULL && pmDispatch->cstateInit != NULL)
+ (*pmDispatch->cstateInit)();
+}
/*
- * We are being asked to set PState (sel).
+ * Called when the CPU is idle. It calls into the power management kext
+ * to determine the best way to idle the CPU.
*/
void
-pmsCPUSet(uint32_t sel)
+machine_idle(void)
{
- if (pmDispatch != NULL && pmDispatch->pmsCPUSet != NULL)
- (*pmDispatch->pmsCPUSet)(sel);
- else
- pmInitState.PState = sel;
+ cpu_data_t *my_cpu = current_cpu_datap();
+
+ if (my_cpu == NULL)
+ goto out;
+
+ my_cpu->lcpu.state = LCPU_IDLE;
+ DBGLOG(cpu_handle, cpu_number(), MP_IDLE);
+ MARK_CPU_IDLE(cpu_number());
+
+ if (pmInitDone) {
+ /*
+ * Handle case where ml_set_maxbusdelay() or ml_set_maxintdelay()
+ * were called prior to the CPU PM kext being registered. We do
+ * this here since we know at this point the values will be first
+ * used since idle is where the decisions using these values is made.
+ */
+ if (earlyMaxBusDelay != DELAY_UNSET)
+ ml_set_maxbusdelay((uint32_t)(earlyMaxBusDelay & 0xFFFFFFFF));
+
+ if (earlyMaxIntDelay != DELAY_UNSET)
+ ml_set_maxintdelay(earlyMaxIntDelay);
+ }
+
+ if (pmInitDone
+ && pmDispatch != NULL
+ && pmDispatch->MachineIdle != NULL)
+ (*pmDispatch->MachineIdle)(0x7FFFFFFFFFFFFFFFULL);
+ else {
+ /*
+ * If no power management, re-enable interrupts and halt.
+ * This will keep the CPU from spinning through the scheduler
+ * and will allow at least some minimal power savings (but it
+ * cause problems in some MP configurations w.r.t. the APIC
+ * stopping during a GV3 transition).
+ */
+ pal_hlt();
+
+ /* Once woken, re-disable interrupts. */
+ pal_cli();
+ }
+
+ /*
+ * Mark the CPU as running again.
+ */
+ MARK_CPU_ACTIVE(cpu_number());
+ DBGLOG(cpu_handle, cpu_number(), MP_UNIDLE);
+ my_cpu->lcpu.state = LCPU_RUN;
+
+ /*
+ * Re-enable interrupts.
+ */
+ out:
+ pal_sti();
}
/*
- * This code configures the initial step tables. It should be called after
- * the timebase frequency is initialized.
- *
- * Note that this is not used in normal operation. It is strictly for
- * debugging/testing purposes.
+ * Called when the CPU is to be halted. It will choose the best C-State
+ * to be in.
*/
void
-pmsCPUConf(void)
+pmCPUHalt(uint32_t reason)
{
+ cpu_data_t *cpup = current_cpu_datap();
+
+ switch (reason) {
+ case PM_HALT_DEBUG:
+ cpup->lcpu.state = LCPU_PAUSE;
+ pal_stop_cpu(FALSE);
+ break;
+
+ case PM_HALT_PANIC:
+ cpup->lcpu.state = LCPU_PAUSE;
+ pal_stop_cpu(TRUE);
+ break;
+
+ case PM_HALT_NORMAL:
+ default:
+ pal_cli();
+
+ if (pmInitDone
+ && pmDispatch != NULL
+ && pmDispatch->pmCPUHalt != NULL) {
+ /*
+ * Halt the CPU (and put it in a low power state.
+ */
+ (*pmDispatch->pmCPUHalt)();
+
+ /*
+ * We've exited halt, so get the the CPU schedulable again.
+ */
+ i386_init_slave_fast();
+
+ panic("init_slave_fast returned");
+ } else
+ {
+ /*
+ * If no power managment and a processor is taken off-line,
+ * then invalidate the cache and halt it (it will not be able
+ * to be brought back on-line without resetting the CPU).
+ */
+ __asm__ volatile ("wbinvd");
+ cpup->lcpu.state = LCPU_HALT;
+ pal_stop_cpu(FALSE);
+
+ panic("back from Halt");
+ }
+
+ break;
+ }
+}
+
+void
+pmMarkAllCPUsOff(void)
+{
+ if (pmInitDone
+ && pmDispatch != NULL
+ && pmDispatch->markAllCPUsOff != NULL)
+ (*pmDispatch->markAllCPUsOff)();
+}
- if (pmDispatch != NULL && pmDispatch->pmsCPUConf != NULL)
- (*pmDispatch->pmsCPUConf)();
+static void
+pmInitComplete(void)
+{
+ if (earlyTopology
+ && pmDispatch != NULL
+ && pmDispatch->pmCPUStateInit != NULL) {
+ (*pmDispatch->pmCPUStateInit)();
+ earlyTopology = FALSE;
+ }
+
+ pmInitDone = 1;
+}
+
+static x86_lcpu_t *
+pmGetLogicalCPU(int cpu)
+{
+ return(cpu_to_lcpu(cpu));
+}
+
+static x86_lcpu_t *
+pmGetMyLogicalCPU(void)
+{
+ cpu_data_t *cpup = current_cpu_datap();
+
+ return(&cpup->lcpu);
+}
+
+static x86_core_t *
+pmGetCore(int cpu)
+{
+ return(cpu_to_core(cpu));
+}
+
+static x86_core_t *
+pmGetMyCore(void)
+{
+ cpu_data_t *cpup = current_cpu_datap();
+
+ return(cpup->lcpu.core);
+}
+
+static x86_die_t *
+pmGetDie(int cpu)
+{
+ return(cpu_to_die(cpu));
+}
+
+static x86_die_t *
+pmGetMyDie(void)
+{
+ cpu_data_t *cpup = current_cpu_datap();
+
+ return(cpup->lcpu.die);
+}
+
+static x86_pkg_t *
+pmGetPackage(int cpu)
+{
+ return(cpu_to_package(cpu));
+}
+
+static x86_pkg_t *
+pmGetMyPackage(void)
+{
+ cpu_data_t *cpup = current_cpu_datap();
+
+ return(cpup->lcpu.package);
+}
+
+static void
+pmLockCPUTopology(int lock)
+{
+ if (lock) {
+ simple_lock(&x86_topo_lock);
+ } else {
+ simple_unlock(&x86_topo_lock);
+ }
}
/*
- * Machine-dependent initialization.
+ * Called to get the next deadline that has been set by the
+ * power management code.
+ * Note: a return of 0 from AICPM and this routine signifies
+ * that no deadline is set.
*/
-void
-pmsCPUMachineInit(void)
+uint64_t
+pmCPUGetDeadline(cpu_data_t *cpu)
{
- /*
- * Initialize some of the initial state to "uninitialized" until
- * it gets set with something more useful. This allows the KEXT
- * to determine if the initial value was actually set to something.
- */
- pmInitState.PState = -1;
- pmInitState.PLimit = -1;
+ uint64_t deadline = 0;
+
+ if (pmInitDone
+ && pmDispatch != NULL
+ && pmDispatch->GetDeadline != NULL)
+ deadline = (*pmDispatch->GetDeadline)(&cpu->lcpu);
- if (pmDispatch != NULL && pmDispatch->pmsCPUMachineInit != NULL)
- (*pmDispatch->pmsCPUMachineInit)();
+ return(deadline);
}
/*
- * This function should be called once for each processor to force the
- * processor to the correct initial voltage and frequency.
+ * Called to determine if the supplied deadline or the power management
+ * deadline is sooner. Returns which ever one is first.
*/
-void
-pmsCPUInit(void)
+uint64_t
+pmCPUSetDeadline(cpu_data_t *cpu, uint64_t deadline)
{
- pmsCPUMachineInit();
- if (pmDispatch != NULL && pmDispatch->pmsCPUInit != NULL)
- (*pmDispatch->pmsCPUInit)();
+ if (pmInitDone
+ && pmDispatch != NULL
+ && pmDispatch->SetDeadline != NULL)
+ deadline = (*pmDispatch->SetDeadline)(&cpu->lcpu, deadline);
+
+ return(deadline);
}
/*
- * Broadcast a change to all processing including ourselves.
+ * Called when a power management deadline expires.
*/
void
-pmsCPURun(uint32_t nstep)
+pmCPUDeadline(cpu_data_t *cpu)
{
- if (pmDispatch != NULL && pmDispatch->pmsCPURun != NULL)
- (*pmDispatch->pmsCPURun)(nstep);
+ if (pmInitDone
+ && pmDispatch != NULL
+ && pmDispatch->Deadline != NULL)
+ (*pmDispatch->Deadline)(&cpu->lcpu);
}
/*
- * Return the current state of a core.
+ * Called to get a CPU out of idle.
*/
-uint32_t
-pmsCPUQuery(void)
+boolean_t
+pmCPUExitIdle(cpu_data_t *cpu)
{
- if (pmDispatch != NULL && pmDispatch->pmsCPUQuery != NULL)
- return((*pmDispatch->pmsCPUQuery)());
+ boolean_t do_ipi;
- /*
- * Return a non-sense value.
- */
- return((~0) << 16);
+ if (pmInitDone
+ && pmDispatch != NULL
+ && pmDispatch->exitIdle != NULL)
+ do_ipi = (*pmDispatch->exitIdle)(&cpu->lcpu);
+ else
+ do_ipi = TRUE;
+
+ return(do_ipi);
}
-/*
- * Return the current state of the package.
- */
-uint32_t
-pmsCPUPackageQuery(void)
+kern_return_t
+pmCPUExitHalt(int cpu)
{
- if (pmDispatch != NULL && pmDispatch->pmsCPUPackageQuery != NULL)
- return((*pmDispatch->pmsCPUPackageQuery)());
+ kern_return_t rc = KERN_INVALID_ARGUMENT;
- /*
- * Return a non-sense value.
- */
- return((~0) << 16);
+ if (pmInitDone
+ && pmDispatch != NULL
+ && pmDispatch->exitHalt != NULL)
+ rc = pmDispatch->exitHalt(cpu_to_lcpu(cpu));
+
+ return(rc);
+}
+
+kern_return_t
+pmCPUExitHaltToOff(int cpu)
+{
+ kern_return_t rc = KERN_INVALID_ARGUMENT;
+
+ if (pmInitDone
+ && pmDispatch != NULL
+ && pmDispatch->exitHaltToOff != NULL)
+ rc = pmDispatch->exitHaltToOff(cpu_to_lcpu(cpu));
+
+ return(rc);
}
/*
- * Force the CPU package to the lowest power level. This is a low-level
- * interface meant to be called from the panic or debugger code to bring
- * the CPU to a safe power level for unmanaged operation.
- *
- * Note that while this will bring an entire package to a safe level, it
- * cannot affect other packages. As a general rule, this should be run on
- * every code as part of entering the debugger or on the panic path.
+ * Called to initialize the power management structures for the CPUs.
*/
void
-pmsCPUYellowFlag(void)
+pmCPUStateInit(void)
{
- if (pmDispatch != NULL && pmDispatch->pmsCPUYellowFlag != NULL)
- (*pmDispatch->pmsCPUYellowFlag)();
+ if (pmDispatch != NULL && pmDispatch->pmCPUStateInit != NULL)
+ (*pmDispatch->pmCPUStateInit)();
+ else
+ earlyTopology = TRUE;
}
/*
- * Restore the CPU to the power state it was in before a yellow flag.
+ * Called when a CPU is being restarted after being powered off (as in S3).
*/
void
-pmsCPUGreenFlag(void)
+pmCPUMarkRunning(cpu_data_t *cpu)
{
- if (pmDispatch != NULL && pmDispatch->pmsCPUGreenFlag != NULL)
- (*pmDispatch->pmsCPUGreenFlag)();
+ cpu_data_t *cpup = current_cpu_datap();
+
+ if (pmInitDone
+ && pmDispatch != NULL
+ && pmDispatch->markCPURunning != NULL)
+ (*pmDispatch->markCPURunning)(&cpu->lcpu);
+ else
+ cpup->lcpu.state = LCPU_RUN;
}
/*
- * Load a new ratio/VID table.
- *
- * Note that this interface is specific to the Intel SpeedStep implementation.
- * It is expected that this will only be called once to override the default
- * ratio/VID table when the platform starts.
- *
- * Normally, the table will need to be replaced at the same time that the
- * stepper program proper is replaced, as the PState indices from an old
- * program may no longer be valid. When replacing the default program this
- * should not be a problem as any new table will have at least two PState
- * entries and the default program only references P0 and P1.
+ * Called to get/set CPU power management state.
*/
-kern_return_t
-pmsCPULoadVIDTable(uint16_t *tablep, int nstates)
+int
+pmCPUControl(uint32_t cmd, void *datap)
{
- if (pmDispatch != NULL && pmDispatch->pmsCPULoadVIDTable != NULL)
- return((*pmDispatch->pmsCPULoadVIDTable)(tablep, nstates));
- else {
- int i;
+ int rc = -1;
- if (nstates > MAX_PSTATES)
- return(KERN_FAILURE);
+ if (pmDispatch != NULL
+ && pmDispatch->pmCPUControl != NULL)
+ rc = (*pmDispatch->pmCPUControl)(cmd, datap);
- for (i = 0; i < nstates; i += 1)
- pmInitState.VIDTable[i] = tablep[i];
- }
- return(KERN_SUCCESS);
+ return(rc);
}
/*
- * Set the (global) PState limit. CPUs will not be permitted to run at
- * a lower (more performant) PState than this.
+ * Called to save the timer state used by power management prior
+ * to "sleeping".
*/
-kern_return_t
-pmsCPUSetPStateLimit(uint32_t limit)
+void
+pmTimerSave(void)
{
- if (pmDispatch != NULL && pmDispatch->pmsCPUSetPStateLimit != NULL)
- return((*pmDispatch->pmsCPUSetPStateLimit)(limit));
+ if (pmDispatch != NULL
+ && pmDispatch->pmTimerStateSave != NULL)
+ (*pmDispatch->pmTimerStateSave)();
+}
- pmInitState.PLimit = limit;
- return(KERN_SUCCESS);
+/*
+ * Called to restore the timer state used by power management after
+ * waking from "sleep".
+ */
+void
+pmTimerRestore(void)
+{
+ if (pmDispatch != NULL
+ && pmDispatch->pmTimerStateRestore != NULL)
+ (*pmDispatch->pmTimerStateRestore)();
}
/*
- * Initialize the Cstate change code.
+ * Set the worst-case time for the C4 to C2 transition.
+ * No longer does anything.
*/
void
-power_management_init(void)
+ml_set_maxsnoop(__unused uint32_t maxdelay)
{
- uint32_t cpuModel;
- uint32_t cpuFamily;
- uint32_t xcpuid[4];
+}
- /*
- * Initialize the lock for the KEXT initialization.
- */
- simple_lock_init(&pm_init_lock, 0);
- /*
- * XXX
- *
- * The following is a hack to disable power management on some systems
- * until the KEXT is done. This is strictly temporary!!!
- */
- do_cpuid(1, xcpuid);
- cpuFamily = (xcpuid[eax] >> 8) & 0xf;
- cpuModel = (xcpuid[eax] >> 4) & 0xf;
+/*
+ * Get the worst-case time for the C4 to C2 transition. Returns nanoseconds.
+ */
+unsigned
+ml_get_maxsnoop(void)
+{
+ uint64_t max_snoop = 0;
- if (cpuFamily != 0x6 || cpuModel < 0xe)
- pmDispatch = NULL;
+ if (pmInitDone
+ && pmDispatch != NULL
+ && pmDispatch->getMaxSnoop != NULL)
+ max_snoop = pmDispatch->getMaxSnoop();
- if (pmDispatch != NULL && pmDispatch->cstateInit != NULL)
- (*pmDispatch->cstateInit)();
+ return((unsigned)(max_snoop & 0xffffffff));
+}
+
+
+uint32_t
+ml_get_maxbusdelay(void)
+{
+ uint64_t max_delay = 0;
+
+ if (pmInitDone
+ && pmDispatch != NULL
+ && pmDispatch->getMaxBusDelay != NULL)
+ max_delay = pmDispatch->getMaxBusDelay();
+
+ return((uint32_t)(max_delay & 0xffffffff));
}
/*
- * This function will update the system nap policy. It should be called
- * whenever conditions change: when the system is ready to being napping
- * and if something changes the rules (e.g. a sysctl altering the policy
- * for debugging).
+ * Set the maximum delay time allowed for snoop on the bus.
+ *
+ * Note that this value will be compared to the amount of time that it takes
+ * to transition from a non-snooping power state (C4) to a snooping state (C2).
+ * If maxBusDelay is less than C4C2SnoopDelay,
+ * we will not enter the lowest power state.
*/
void
-machine_nap_policy(void)
+ml_set_maxbusdelay(uint32_t mdelay)
{
- if (pmDispatch != NULL && pmDispatch->cstateNapPolicy != NULL)
- napCtl = (*pmDispatch->cstateNapPolicy)(forcenap, napCtl);
+ uint64_t maxdelay = mdelay;
+
+ if (pmDispatch != NULL
+ && pmDispatch->setMaxBusDelay != NULL) {
+ earlyMaxBusDelay = DELAY_UNSET;
+ pmDispatch->setMaxBusDelay(maxdelay);
+ } else
+ earlyMaxBusDelay = maxdelay;
}
-/*
- * ACPI calls the following routine to set/update mwait hints. A table
- * (possibly null) specifies the available Cstates and their hints, all
- * other states are assumed to be invalid. ACPI may update available
- * states to change the nap policy (for example, while AC power is
- * available).
- */
-kern_return_t
-Cstate_table_set(Cstate_hint_t *tablep, unsigned int nstates)
+uint64_t
+ml_get_maxintdelay(void)
{
- if (forcenap)
- return(KERN_SUCCESS);
+ uint64_t max_delay = 0;
- if (pmDispatch != NULL && pmDispatch->cstateTableSet != NULL)
- return((*pmDispatch->cstateTableSet)(tablep, nstates));
- else {
- unsigned int i;
+ if (pmDispatch != NULL
+ && pmDispatch->getMaxIntDelay != NULL)
+ max_delay = pmDispatch->getMaxIntDelay();
- for (i = 0; i < nstates; i += 1) {
- pmInitState.CStates[i].number = tablep[i].number;
- pmInitState.CStates[i].hint = tablep[i].hint;
- }
+ return(max_delay);
+}
- pmInitState.CStatesCount = nstates;
- }
- return(KERN_SUCCESS);
+/*
+ * Set the maximum delay allowed for an interrupt.
+ */
+void
+ml_set_maxintdelay(uint64_t mdelay)
+{
+ if (pmDispatch != NULL
+ && pmDispatch->setMaxIntDelay != NULL) {
+ earlyMaxIntDelay = DELAY_UNSET;
+ pmDispatch->setMaxIntDelay(mdelay);
+ } else
+ earlyMaxIntDelay = mdelay;
}
-static inline void
-sti(void) {
- __asm__ volatile ( "sti" : : : "memory");
+boolean_t
+ml_get_interrupt_prewake_applicable()
+{
+ boolean_t applicable = FALSE;
+
+ if (pmInitDone
+ && pmDispatch != NULL
+ && pmDispatch->pmInterruptPrewakeApplicable != NULL)
+ applicable = pmDispatch->pmInterruptPrewakeApplicable();
+
+ return applicable;
}
/*
- * Called when the CPU is idle. It will choose the best C state to
- * be in.
+ * Put a CPU into "safe" mode with respect to power.
+ *
+ * Some systems cannot operate at a continuous "normal" speed without
+ * exceeding the thermal design. This is called per-CPU to place the
+ * CPUs into a "safe" operating mode.
*/
void
-machine_idle_cstate(void)
+pmSafeMode(x86_lcpu_t *lcpu, uint32_t flags)
{
- if (pmDispatch != NULL && pmDispatch->cstateMachineIdle != NULL)
- (*pmDispatch->cstateMachineIdle)(napCtl);
+ if (pmDispatch != NULL
+ && pmDispatch->pmCPUSafeMode != NULL)
+ pmDispatch->pmCPUSafeMode(lcpu, flags);
else {
- sti();
+ /*
+ * Do something reasonable if the KEXT isn't present.
+ *
+ * We only look at the PAUSE and RESUME flags. The other flag(s)
+ * will not make any sense without the KEXT, so just ignore them.
+ *
+ * We set the CPU's state to indicate that it's halted. If this
+ * is the CPU we're currently running on, then spin until the
+ * state becomes non-halted.
+ */
+ if (flags & PM_SAFE_FL_PAUSE) {
+ lcpu->state = LCPU_PAUSE;
+ if (lcpu == x86_lcpu()) {
+ while (lcpu->state == LCPU_PAUSE)
+ cpu_pause();
+ }
+ }
+
+ /*
+ * Clear the halted flag for the specified CPU, that will
+ * get it out of it's spin loop.
+ */
+ if (flags & PM_SAFE_FL_RESUME) {
+ lcpu->state = LCPU_RUN;
+ }
}
}
-static pmStats_t *
-pmsCPUStats(void)
+static uint32_t saved_run_count = 0;
+
+void
+machine_run_count(uint32_t count)
+{
+ if (pmDispatch != NULL
+ && pmDispatch->pmSetRunCount != NULL)
+ pmDispatch->pmSetRunCount(count);
+ else
+ saved_run_count = count;
+}
+
+boolean_t
+machine_processor_is_inactive(processor_t processor)
{
- cpu_data_t *pp;
+ int cpu = processor->cpu_id;
- pp = current_cpu_datap();
- return(&pp->cpu_pmStats);
+ if (pmDispatch != NULL
+ && pmDispatch->pmIsCPUUnAvailable != NULL)
+ return(pmDispatch->pmIsCPUUnAvailable(cpu_to_lcpu(cpu)));
+ else
+ return(FALSE);
}
-static pmsd *
-pmsCPUStepperData(void)
+processor_t
+machine_choose_processor(processor_set_t pset,
+ processor_t preferred)
{
- cpu_data_t *pp;
+ int startCPU;
+ int endCPU;
+ int preferredCPU;
+ int chosenCPU;
+
+ if (!pmInitDone)
+ return(preferred);
+
+ if (pset == NULL) {
+ startCPU = -1;
+ endCPU = -1;
+ } else {
+ startCPU = pset->cpu_set_low;
+ endCPU = pset->cpu_set_hi;
+ }
+
+ if (preferred == NULL)
+ preferredCPU = -1;
+ else
+ preferredCPU = preferred->cpu_id;
+
+ if (pmDispatch != NULL
+ && pmDispatch->pmChooseCPU != NULL) {
+ chosenCPU = pmDispatch->pmChooseCPU(startCPU, endCPU, preferredCPU);
- pp = current_cpu_datap();
- return(&pp->pms);
+ if (chosenCPU == -1)
+ return(NULL);
+ return(cpu_datap(chosenCPU)->cpu_processor);
+ }
+
+ return(preferred);
}
-static uint64_t *
-CPUHPETAddr(void)
+static int
+pmThreadGetUrgency(uint64_t *rt_period, uint64_t *rt_deadline)
{
- cpu_data_t *pp;
- pp = current_cpu_datap();
- return(pp->cpu_pmHpet);
+
+ return(thread_get_urgency(rt_period, rt_deadline));
}
-/*
- * Called by the power management kext to register itself and to get the
- * callbacks it might need into other power management functions.
- */
+#if DEBUG
+uint32_t urgency_stats[64][THREAD_URGENCY_MAX];
+#endif
+
+#define URGENCY_NOTIFICATION_ASSERT_NS (5 * 1000 * 1000)
+uint64_t urgency_notification_assert_abstime_threshold, urgency_notification_max_recorded;
+
void
-pmRegister(pmDispatch_t *cpuFuncs, pmCallBacks_t *callbacks)
-{
- if (callbacks != NULL) {
- callbacks->Park = pmsPark;
- callbacks->Run = pmsRun;
- callbacks->RunLocal = pmsRunLocal;
- callbacks->SetStep = pmsSetStep;
- callbacks->NapPolicy = machine_nap_policy;
- callbacks->Build = pmsBuild;
- callbacks->Stats = pmsCPUStats;
- callbacks->StepperData = pmsCPUStepperData;
- callbacks->HPETAddr = CPUHPETAddr;
- callbacks->InitState = &pmInitState;
- callbacks->resetPop = resetPop;
- }
+thread_tell_urgency(int urgency,
+ uint64_t rt_period,
+ uint64_t rt_deadline)
+{
+ uint64_t urgency_notification_time_start, delta;
+ boolean_t urgency_assert = (urgency_notification_assert_abstime_threshold != 0);
+ assert(get_preemption_level() > 0 || ml_get_interrupts_enabled() == FALSE);
+#if DEBUG
+ urgency_stats[cpu_number() % 64][urgency]++;
+#endif
+ if (!pmInitDone
+ || pmDispatch == NULL
+ || pmDispatch->pmThreadTellUrgency == NULL)
+ return;
- if (cpuFuncs != NULL)
- pmDispatch = cpuFuncs;
+ KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, (rt_deadline >> 32), rt_deadline, 0);
+
+ if (__improbable((urgency_assert == TRUE)))
+ urgency_notification_time_start = mach_absolute_time();
+
+ pmDispatch->pmThreadTellUrgency(urgency, rt_period, rt_deadline);
+
+ if (__improbable((urgency_assert == TRUE))) {
+ delta = mach_absolute_time() - urgency_notification_time_start;
+
+ if (__improbable(delta > urgency_notification_max_recorded)) {
+ /* This is not synchronized, but it doesn't matter
+ * if we (rarely) miss an event, as it is statistically
+ * unlikely that it will never recur.
+ */
+ urgency_notification_max_recorded = delta;
+
+ if (__improbable((delta > urgency_notification_assert_abstime_threshold) && !machine_timeout_suspended()))
+ panic("Urgency notification callout %p exceeded threshold, 0x%llx abstime units", pmDispatch->pmThreadTellUrgency, delta);
+ }
+ }
+
+ KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, (rt_deadline >> 32), rt_deadline, 0);
+}
+
+void
+active_rt_threads(boolean_t active)
+{
+ if (!pmInitDone
+ || pmDispatch == NULL
+ || pmDispatch->pmActiveRTThreads == NULL)
+ return;
+
+ pmDispatch->pmActiveRTThreads(active);
+}
+
+static uint32_t
+pmGetSavedRunCount(void)
+{
+ return(saved_run_count);
}
/*
- * Unregisters the power management functions from the kext.
+ * Returns the root of the package tree.
*/
-void
-pmUnRegister(pmDispatch_t *cpuFuncs)
+static x86_pkg_t *
+pmGetPkgRoot(void)
{
- if (cpuFuncs != NULL && pmDispatch == cpuFuncs)
- pmDispatch = NULL;
+ return(x86_pkgs);
}
-#if MACH_KDB
+static boolean_t
+pmCPUGetHibernate(int cpu)
+{
+ return(cpu_datap(cpu)->cpu_hibernate);
+}
+
+static processor_t
+pmLCPUtoProcessor(int lcpu)
+{
+ return(cpu_datap(lcpu)->cpu_processor);
+}
+
+static void
+pmReSyncDeadlines(int cpu)
+{
+ static boolean_t registered = FALSE;
+
+ if (!registered) {
+ PM_interrupt_register(&etimer_resync_deadlines);
+ registered = TRUE;
+ }
+
+ if ((uint32_t)cpu == current_cpu_datap()->lcpu.cpu_num)
+ etimer_resync_deadlines();
+ else
+ cpu_PM_interrupt(cpu);
+}
+
+static void
+pmSendIPI(int cpu)
+{
+ lapic_send_ipi(cpu, LAPIC_PM_INTERRUPT);
+}
+
+static void
+pmGetNanotimeInfo(pm_rtc_nanotime_t *rtc_nanotime)
+{
+ /*
+ * Make sure that nanotime didn't change while we were reading it.
+ */
+ do {
+ rtc_nanotime->generation = pal_rtc_nanotime_info.generation; /* must be first */
+ rtc_nanotime->tsc_base = pal_rtc_nanotime_info.tsc_base;
+ rtc_nanotime->ns_base = pal_rtc_nanotime_info.ns_base;
+ rtc_nanotime->scale = pal_rtc_nanotime_info.scale;
+ rtc_nanotime->shift = pal_rtc_nanotime_info.shift;
+ } while(pal_rtc_nanotime_info.generation != 0
+ && rtc_nanotime->generation != pal_rtc_nanotime_info.generation);
+}
+
+static uint32_t
+pmTimerQueueMigrate(int target_cpu)
+{
+ /* Call the etimer code to do this. */
+ return (target_cpu != cpu_number())
+ ? etimer_queue_migrate(target_cpu)
+ : 0;
+}
+
+
/*
- * XXX stubs for now
+ * Called by the power management kext to register itself and to get the
+ * callbacks it might need into other kernel functions. This interface
+ * is versioned to allow for slight mis-matches between the kext and the
+ * kernel.
*/
void
-db_cfg(__unused db_expr_t addr,
- __unused int have_addr,
- __unused db_expr_t count,
- __unused char *modif)
+pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs,
+ pmCallBacks_t *callbacks)
{
- return;
+ if (callbacks != NULL && version == PM_DISPATCH_VERSION) {
+ callbacks->setRTCPop = setPop;
+ callbacks->resyncDeadlines = pmReSyncDeadlines;
+ callbacks->initComplete = pmInitComplete;
+ callbacks->GetLCPU = pmGetLogicalCPU;
+ callbacks->GetCore = pmGetCore;
+ callbacks->GetDie = pmGetDie;
+ callbacks->GetPackage = pmGetPackage;
+ callbacks->GetMyLCPU = pmGetMyLogicalCPU;
+ callbacks->GetMyCore = pmGetMyCore;
+ callbacks->GetMyDie = pmGetMyDie;
+ callbacks->GetMyPackage = pmGetMyPackage;
+ callbacks->GetPkgRoot = pmGetPkgRoot;
+ callbacks->LockCPUTopology = pmLockCPUTopology;
+ callbacks->GetHibernate = pmCPUGetHibernate;
+ callbacks->LCPUtoProcessor = pmLCPUtoProcessor;
+ callbacks->ThreadBind = thread_bind;
+ callbacks->GetSavedRunCount = pmGetSavedRunCount;
+ callbacks->GetNanotimeInfo = pmGetNanotimeInfo;
+ callbacks->ThreadGetUrgency = pmThreadGetUrgency;
+ callbacks->RTCClockAdjust = rtc_clock_adjust;
+ callbacks->timerQueueMigrate = pmTimerQueueMigrate;
+ callbacks->topoParms = &topoParms;
+ callbacks->pmSendIPI = pmSendIPI;
+ callbacks->InterruptPending = lapic_is_interrupt_pending;
+ callbacks->IsInterrupting = lapic_is_interrupting;
+ callbacks->InterruptStats = lapic_interrupt_counts;
+ callbacks->DisableApicTimer = lapic_disable_timer;
+ } else {
+ panic("Version mis-match between Kernel and CPU PM");
+ }
+
+ if (cpuFuncs != NULL) {
+ pmDispatch = cpuFuncs;
+
+ if (earlyTopology
+ && pmDispatch->pmCPUStateInit != NULL) {
+ (*pmDispatch->pmCPUStateInit)();
+ earlyTopology = FALSE;
+ }
+
+ if (pmDispatch->pmIPIHandler != NULL) {
+ lapic_set_pm_func((i386_intr_func_t)pmDispatch->pmIPIHandler);
+ }
+ }
}
+/*
+ * Unregisters the power management functions from the kext.
+ */
void
-db_display_iokit(__unused db_expr_t addr,
- __unused int have_addr,
- __unused db_expr_t count,
- __unused char *modif)
+pmUnRegister(pmDispatch_t *cpuFuncs)
{
- return;
+ if (cpuFuncs != NULL && pmDispatch == cpuFuncs) {
+ pmDispatch = NULL;
+ }
}
-void
-db_dtimers(__unused db_expr_t addr,
- __unused int have_addr,
- __unused db_expr_t count,
- __unused char *modif)
+/******************************************************************************
+ *
+ * All of the following are deprecated interfaces and no longer used.
+ *
+ ******************************************************************************/
+kern_return_t
+pmsControl(__unused uint32_t request, __unused user_addr_t reqaddr,
+ __unused uint32_t reqsize)
{
- return;
+ return(KERN_SUCCESS);
}
void
-db_intcnt(__unused db_expr_t addr,
- __unused int have_addr,
- __unused db_expr_t count,
- __unused char *modif)
+pmsInit(void)
{
- return;
}
void
-db_nap(__unused db_expr_t addr,
- __unused int have_addr,
- __unused db_expr_t count,
- __unused char *modif)
+pmsStart(void)
{
- return;
}
void
-db_pmgr(__unused db_expr_t addr,
- __unused int have_addr,
- __unused db_expr_t count,
- __unused char *modif)
+pmsPark(void)
{
- return;
}
void
-db_test(__unused db_expr_t addr,
- __unused int have_addr,
- __unused db_expr_t count,
- __unused char *modif)
+pmsRun(__unused uint32_t nstep)
{
- return;
}
-void
-db_getpmgr(__unused pmData_t *pmj)
+kern_return_t
+pmsBuild(__unused pmsDef *pd, __unused uint32_t pdsize,
+ __unused pmsSetFunc_t *functab,
+ __unused uint32_t platformData, __unused pmsQueryFunc_t queryFunc)
{
+ return(KERN_SUCCESS);
}
-#endif