]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/i386/pmCPU.c
xnu-2422.90.20.tar.gz
[apple/xnu.git] / osfmk / i386 / pmCPU.c
index 8decbb9437375f69632d9fca8b7a38341c1cfec3..1efffe69c5cfa53deb63e579152eac81ae1c0f58 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2004-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2011 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  *
  * Implements the "wrappers" to the KEXT.
  */
  *
  * Implements the "wrappers" to the KEXT.
  */
-#include <kern/machine.h>
-#include <i386/machine_routines.h>
-#include <i386/machine_cpu.h>
-#include <i386/misc_protos.h>
-#include <i386/pmap.h>
 #include <i386/asm.h>
 #include <i386/asm.h>
+#include <i386/machine_cpu.h>
 #include <i386/mp.h>
 #include <i386/mp.h>
+#include <i386/machine_routines.h>
 #include <i386/proc_reg.h>
 #include <i386/proc_reg.h>
+#include <i386/pmap.h>
+#include <i386/misc_protos.h>
+#include <kern/machine.h>
 #include <kern/pms.h>
 #include <kern/processor.h>
 #include <kern/pms.h>
 #include <kern/processor.h>
+#include <kern/timer_queue.h>
 #include <i386/cpu_threads.h>
 #include <i386/pmCPU.h>
 #include <i386/cpuid.h>
 #include <i386/cpu_threads.h>
 #include <i386/pmCPU.h>
 #include <i386/cpuid.h>
-#include <i386/rtclock.h>
+#include <i386/rtclock_protos.h>
 #include <kern/sched_prim.h>
 #include <kern/sched_prim.h>
-
-/*
- * Kernel parameter determining whether threads are halted unconditionally
- * in the idle state.  This is the default behavior.
- * See machine_idle() for use.
- */
-int idlehalt                                   = 1;
+#include <i386/lapic.h>
+#include <i386/pal_routines.h>
+#include <sys/kdebug.h>
+#include <i386/tsc.h>
 
 extern int disableConsoleOutput;
 
 
 extern int disableConsoleOutput;
 
-decl_simple_lock_data(,pm_init_lock);
+#define DELAY_UNSET            0xFFFFFFFFFFFFFFFFULL
+
+uint64_t cpu_itime_bins[CPU_ITIME_BINS] = {16* NSEC_PER_USEC, 32* NSEC_PER_USEC, 64* NSEC_PER_USEC, 128* NSEC_PER_USEC, 256* NSEC_PER_USEC, 512* NSEC_PER_USEC, 1024* NSEC_PER_USEC, 2048* NSEC_PER_USEC, 4096* NSEC_PER_USEC, 8192* NSEC_PER_USEC, 16384* NSEC_PER_USEC, 32768* NSEC_PER_USEC};
+uint64_t *cpu_rtime_bins = &cpu_itime_bins[0];
 
 /*
  * The following is set when the KEXT loads and initializes.
  */
 pmDispatch_t   *pmDispatch     = NULL;
 
 
 /*
  * The following is set when the KEXT loads and initializes.
  */
 pmDispatch_t   *pmDispatch     = NULL;
 
-static uint32_t                pmInitDone      = 0;
-
+uint32_t               pmInitDone              = 0;
+static boolean_t       earlyTopology           = FALSE;
+static uint64_t                earlyMaxBusDelay        = DELAY_UNSET;
+static uint64_t                earlyMaxIntDelay        = DELAY_UNSET;
 
 /*
  * Initialize the Cstate change code.
 
 /*
  * Initialize the Cstate change code.
@@ -72,20 +75,23 @@ static uint32_t             pmInitDone      = 0;
 void
 power_management_init(void)
 {
 void
 power_management_init(void)
 {
-    static boolean_t   initialized     = FALSE;
-
-    /*
-     * Initialize the lock for the KEXT initialization.
-     */
-    if (!initialized) {
-       simple_lock_init(&pm_init_lock, 0);
-       initialized = TRUE;
-    }
-
     if (pmDispatch != NULL && pmDispatch->cstateInit != NULL)
        (*pmDispatch->cstateInit)();
 }
 
     if (pmDispatch != NULL && pmDispatch->cstateInit != NULL)
        (*pmDispatch->cstateInit)();
 }
 
+static inline void machine_classify_interval(uint64_t interval, uint64_t *bins, uint64_t *binvals, uint32_t nbins) {
+       uint32_t i;
+       for (i = 0; i < nbins; i++) {
+               if (interval < binvals[i]) {
+                       bins[i]++;
+                       break;
+               }
+       }
+}
+
+uint64_t       idle_pending_timers_processed;
+uint32_t       idle_entry_timer_processing_hdeadline_threshold = 5000000;
+
 /*
  * Called when the CPU is idle.  It calls into the power management kext
  * to determine the best way to idle the CPU.
 /*
  * Called when the CPU is idle.  It calls into the power management kext
  * to determine the best way to idle the CPU.
@@ -93,51 +99,128 @@ power_management_init(void)
 void
 machine_idle(void)
 {
 void
 machine_idle(void)
 {
-    cpu_data_t         *my_cpu         = current_cpu_datap();
-
-    if (my_cpu == NULL)
-       goto out;
-
-    /*
-     * If idlehalt isn't set, then don't do any power management related
-     * idle handling.
-     */
-    if (!idlehalt)
-       goto out;
+       cpu_data_t              *my_cpu         = current_cpu_datap();
+       __unused uint32_t       cnum = my_cpu->cpu_number;
+       uint64_t                ctime, rtime, itime;
+#if CST_DEMOTION_DEBUG
+       processor_t             cproc = my_cpu->cpu_processor;
+       uint64_t                cwakeups = PROCESSOR_DATA(cproc, wakeups_issued_total);
+#endif /* CST_DEMOTION_DEBUG */
+       uint64_t esdeadline, ehdeadline;
+       boolean_t do_process_pending_timers = FALSE;
+
+       ctime = mach_absolute_time();
+       esdeadline = my_cpu->rtclock_timer.queue.earliest_soft_deadline;
+       ehdeadline = my_cpu->rtclock_timer.deadline;
+/* Determine if pending timers exist */    
+       if ((ctime >= esdeadline) && (ctime < ehdeadline) &&
+           ((ehdeadline - ctime) < idle_entry_timer_processing_hdeadline_threshold)) {
+               idle_pending_timers_processed++;
+               do_process_pending_timers = TRUE;
+               goto machine_idle_exit;
+       } else {
+               TCOAL_DEBUG(0xCCCC0000, ctime, my_cpu->rtclock_timer.queue.earliest_soft_deadline, my_cpu->rtclock_timer.deadline, idle_pending_timers_processed, 0);
+       }
+    
+       my_cpu->lcpu.state = LCPU_IDLE;
+       DBGLOG(cpu_handle, cpu_number(), MP_IDLE);
+       MARK_CPU_IDLE(cnum);
+
+       rtime = ctime - my_cpu->cpu_ixtime;
+
+       my_cpu->cpu_rtime_total += rtime;
+       machine_classify_interval(rtime, &my_cpu->cpu_rtimes[0], &cpu_rtime_bins[0], CPU_RTIME_BINS);
+#if CST_DEMOTION_DEBUG
+       uint32_t cl = 0, ch = 0;
+       uint64_t c3res, c6res, c7res;
+       rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch);
+       c3res = ((uint64_t)ch << 32) | cl;
+       rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch);
+       c6res = ((uint64_t)ch << 32) | cl;
+       rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch);
+       c7res = ((uint64_t)ch << 32) | cl;
+#endif
+
+       if (pmInitDone) {
+               /*
+                * Handle case where ml_set_maxbusdelay() or ml_set_maxintdelay()
+                * were called prior to the CPU PM kext being registered.  We do
+                * this here since we know at this point the values will be first
+                * used since idle is where the decisions using these values is made.
+                */
+               if (earlyMaxBusDelay != DELAY_UNSET)
+                       ml_set_maxbusdelay((uint32_t)(earlyMaxBusDelay & 0xFFFFFFFF));
+               if (earlyMaxIntDelay != DELAY_UNSET)
+                       ml_set_maxintdelay(earlyMaxIntDelay);
+       }
 
 
-    my_cpu->lcpu.state = LCPU_IDLE;
-    my_cpu->lcpu.flags |= X86CORE_FL_IDLE;
-    DBGLOG(cpu_handle, cpu_number(), MP_IDLE);
-    MARK_CPU_IDLE(cpu_number());
+       if (pmInitDone
+           && pmDispatch != NULL
+           && pmDispatch->MachineIdle != NULL)
+               (*pmDispatch->MachineIdle)(0x7FFFFFFFFFFFFFFFULL);
+       else {
+               /*
+                * If no power management, re-enable interrupts and halt.
+                * This will keep the CPU from spinning through the scheduler
+                * and will allow at least some minimal power savings (but it
+                * cause problems in some MP configurations w.r.t. the APIC
+                * stopping during a GV3 transition).
+                */
+               pal_hlt();
+               /* Once woken, re-disable interrupts. */
+               pal_cli();
+       }
 
 
-    if (pmInitDone
-       && pmDispatch != NULL
-       && pmDispatch->cstateMachineIdle != NULL)
-       (*pmDispatch->cstateMachineIdle)(0x7FFFFFFFFFFFFFFFULL);
-    else {
        /*
        /*
-        * If no power management, re-enable interrupts and halt.
-        * This will keep the CPU from spinning through the scheduler
-        * and will allow at least some minimal power savings (but it
-        * cause problems in some MP configurations w.r.t. the APIC
-        * stopping during a GV3 transition).
+        * Mark the CPU as running again.
         */
         */
-       __asm__ volatile ("sti; hlt");
-    }
+       MARK_CPU_ACTIVE(cnum);
+       DBGLOG(cpu_handle, cnum, MP_UNIDLE);
+       my_cpu->lcpu.state = LCPU_RUN;
+       uint64_t ixtime = my_cpu->cpu_ixtime = mach_absolute_time();
+       itime = ixtime - ctime;
+       my_cpu->cpu_idle_exits++;
+        my_cpu->cpu_itime_total += itime;
+       machine_classify_interval(itime, &my_cpu->cpu_itimes[0], &cpu_itime_bins[0], CPU_ITIME_BINS);
+#if CST_DEMOTION_DEBUG
+       cl = ch = 0;
+       rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch);
+       c3res = (((uint64_t)ch << 32) | cl) - c3res;
+       rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch);
+       c6res = (((uint64_t)ch << 32) | cl) - c6res;
+       rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch);
+       c7res = (((uint64_t)ch << 32) | cl) - c7res;
+
+       uint64_t ndelta = itime - tmrCvt(c3res + c6res + c7res, tscFCvtt2n);
+       KERNEL_DEBUG_CONSTANT(0xcead0000, ndelta, itime, c7res, c6res, c3res);
+       if ((itime > 1000000) && (ndelta > 250000))
+               KERNEL_DEBUG_CONSTANT(0xceae0000, ndelta, itime, c7res, c6res, c3res);
+#endif
+
+       machine_idle_exit:
+       /*
+        * Re-enable interrupts.
+        */
+
+       pal_sti();
 
 
-    /*
-     * Mark the CPU as running again.
-     */
-    MARK_CPU_ACTIVE(cpu_number());
-    DBGLOG(cpu_handle, cpu_number(), MP_UNIDLE);
-    my_cpu->lcpu.flags &= ~(X86CORE_FL_IDLE | X86CORE_FL_WAKEUP);
-    my_cpu->lcpu.state = LCPU_RUN;
+       if (do_process_pending_timers) {
+               TCOAL_DEBUG(0xBBBB0000 | DBG_FUNC_START, ctime, esdeadline, ehdeadline, idle_pending_timers_processed, 0);
 
 
-    /*
-     * Re-enable interrupts.
-     */
-  out:
-    __asm__ volatile("sti");
+               /* Adjust to reflect that this isn't truly a package idle exit */
+               __sync_fetch_and_sub(&my_cpu->lcpu.package->num_idle, 1);
+               lapic_timer_swi(); /* Trigger software timer interrupt */
+               __sync_fetch_and_add(&my_cpu->lcpu.package->num_idle, 1);
+
+               TCOAL_DEBUG(0xBBBB0000 | DBG_FUNC_END, ctime, esdeadline, idle_pending_timers_processed, 0, 0);
+       }
+#if CST_DEMOTION_DEBUG
+       uint64_t nwakeups = PROCESSOR_DATA(cproc, wakeups_issued_total);
+
+       if ((nwakeups == cwakeups) && (topoParms.nLThreadsPerPackage == my_cpu->lcpu.package->num_idle)) {
+               KERNEL_DEBUG_CONSTANT(0xceaa0000, cwakeups, 0, 0, 0, 0);
+       }
+#endif    
 }
 
 /*
 }
 
 /*
@@ -152,17 +235,18 @@ pmCPUHalt(uint32_t reason)
     switch (reason) {
     case PM_HALT_DEBUG:
        cpup->lcpu.state = LCPU_PAUSE;
     switch (reason) {
     case PM_HALT_DEBUG:
        cpup->lcpu.state = LCPU_PAUSE;
-       __asm__ volatile ("wbinvd; hlt");
+       pal_stop_cpu(FALSE);
        break;
 
     case PM_HALT_PANIC:
        cpup->lcpu.state = LCPU_PAUSE;
        break;
 
     case PM_HALT_PANIC:
        cpup->lcpu.state = LCPU_PAUSE;
-       __asm__ volatile ("cli; wbinvd; hlt");
+       pal_stop_cpu(TRUE);
        break;
 
     case PM_HALT_NORMAL:
        break;
 
     case PM_HALT_NORMAL:
+    case PM_HALT_SLEEP:
     default:
     default:
-       __asm__ volatile ("cli");
+        pal_cli();
 
        if (pmInitDone
            && pmDispatch != NULL
 
        if (pmInitDone
            && pmDispatch != NULL
@@ -173,12 +257,16 @@ pmCPUHalt(uint32_t reason)
            (*pmDispatch->pmCPUHalt)();
 
            /*
            (*pmDispatch->pmCPUHalt)();
 
            /*
-            * We've exited halt, so get the the CPU schedulable again.
+            * We've exited halt, so get the CPU schedulable again.
+            * - by calling the fast init routine for a slave, or
+            * - by returning if we're the master processor.
             */
             */
-           i386_init_slave_fast();
-
-           panic("init_slave_fast returned");
-       } else {
+           if (cpup->cpu_number != master_cpu) {
+               i386_init_slave_fast();
+               panic("init_slave_fast returned");
+           }
+       } else
+       {
            /*
             * If no power managment and a processor is taken off-line,
             * then invalidate the cache and halt it (it will not be able
            /*
             * If no power managment and a processor is taken off-line,
             * then invalidate the cache and halt it (it will not be able
@@ -186,10 +274,11 @@ pmCPUHalt(uint32_t reason)
             */
            __asm__ volatile ("wbinvd");
            cpup->lcpu.state = LCPU_HALT;
             */
            __asm__ volatile ("wbinvd");
            cpup->lcpu.state = LCPU_HALT;
-           __asm__ volatile ( "wbinvd; hlt" );
+           pal_stop_cpu(FALSE);
 
            panic("back from Halt");
        }
 
            panic("back from Halt");
        }
+
        break;
     }
 }
        break;
     }
 }
@@ -206,16 +295,22 @@ pmMarkAllCPUsOff(void)
 static void
 pmInitComplete(void)
 {
 static void
 pmInitComplete(void)
 {
+    if (earlyTopology
+       && pmDispatch != NULL
+       && pmDispatch->pmCPUStateInit != NULL) {
+       (*pmDispatch->pmCPUStateInit)();
+       earlyTopology = FALSE;
+    }
     pmInitDone = 1;
 }
 
     pmInitDone = 1;
 }
 
-static x86_lcpu_t *
+x86_lcpu_t *
 pmGetLogicalCPU(int cpu)
 {
     return(cpu_to_lcpu(cpu));
 }
 
 pmGetLogicalCPU(int cpu)
 {
     return(cpu_to_lcpu(cpu));
 }
 
-static x86_lcpu_t *
+x86_lcpu_t *
 pmGetMyLogicalCPU(void)
 {
     cpu_data_t *cpup   = current_cpu_datap();
 pmGetMyLogicalCPU(void)
 {
     cpu_data_t *cpup   = current_cpu_datap();
@@ -278,11 +373,13 @@ pmLockCPUTopology(int lock)
 /*
  * Called to get the next deadline that has been set by the
  * power management code.
 /*
  * Called to get the next deadline that has been set by the
  * power management code.
+ * Note: a return of 0 from AICPM and this routine signifies
+ * that no deadline is set.
  */
 uint64_t
 pmCPUGetDeadline(cpu_data_t *cpu)
 {
  */
 uint64_t
 pmCPUGetDeadline(cpu_data_t *cpu)
 {
-    uint64_t   deadline        = EndOfAllTime;
+    uint64_t   deadline        = 0;
 
     if (pmInitDone
        && pmDispatch != NULL
 
     if (pmInitDone
        && pmDispatch != NULL
@@ -296,10 +393,11 @@ pmCPUGetDeadline(cpu_data_t *cpu)
  * Called to determine if the supplied deadline or the power management
  * deadline is sooner.  Returns which ever one is first.
  */
  * Called to determine if the supplied deadline or the power management
  * deadline is sooner.  Returns which ever one is first.
  */
+
 uint64_t
 pmCPUSetDeadline(cpu_data_t *cpu, uint64_t deadline)
 {
 uint64_t
 pmCPUSetDeadline(cpu_data_t *cpu, uint64_t deadline)
 {
-    if (pmInitDone
+   if (pmInitDone
        && pmDispatch != NULL
        && pmDispatch->SetDeadline != NULL)
        deadline = (*pmDispatch->SetDeadline)(&cpu->lcpu, deadline);
        && pmDispatch != NULL
        && pmDispatch->SetDeadline != NULL)
        deadline = (*pmDispatch->SetDeadline)(&cpu->lcpu, deadline);
@@ -327,7 +425,6 @@ pmCPUExitIdle(cpu_data_t *cpu)
 {
     boolean_t          do_ipi;
 
 {
     boolean_t          do_ipi;
 
-    cpu->lcpu.flags |= X86CORE_FL_WAKEUP;
     if (pmInitDone
        && pmDispatch != NULL
        && pmDispatch->exitIdle != NULL)
     if (pmInitDone
        && pmDispatch != NULL
        && pmDispatch->exitIdle != NULL)
@@ -335,9 +432,6 @@ pmCPUExitIdle(cpu_data_t *cpu)
     else
        do_ipi = TRUE;
 
     else
        do_ipi = TRUE;
 
-    if (do_ipi)
-       cpu->lcpu.flags &= ~X86CORE_FL_WAKEUP;
-
     return(do_ipi);
 }
 
     return(do_ipi);
 }
 
@@ -357,7 +451,7 @@ pmCPUExitHalt(int cpu)
 kern_return_t
 pmCPUExitHaltToOff(int cpu)
 {
 kern_return_t
 pmCPUExitHaltToOff(int cpu)
 {
-    kern_return_t      rc      = KERN_INVALID_ARGUMENT;
+    kern_return_t      rc      = KERN_SUCCESS;
 
     if (pmInitDone
        && pmDispatch != NULL
 
     if (pmInitDone
        && pmDispatch != NULL
@@ -375,6 +469,8 @@ pmCPUStateInit(void)
 {
     if (pmDispatch != NULL && pmDispatch->pmCPUStateInit != NULL)
        (*pmDispatch->pmCPUStateInit)();
 {
     if (pmDispatch != NULL && pmDispatch->pmCPUStateInit != NULL)
        (*pmDispatch->pmCPUStateInit)();
+    else
+       earlyTopology = TRUE;
 }
 
 /*
 }
 
 /*
@@ -450,7 +546,8 @@ ml_get_maxsnoop(void)
 {
     uint64_t   max_snoop       = 0;
 
 {
     uint64_t   max_snoop       = 0;
 
-    if (pmDispatch != NULL
+    if (pmInitDone
+       && pmDispatch != NULL
        && pmDispatch->getMaxSnoop != NULL)
        max_snoop = pmDispatch->getMaxSnoop();
 
        && pmDispatch->getMaxSnoop != NULL)
        max_snoop = pmDispatch->getMaxSnoop();
 
@@ -463,7 +560,8 @@ ml_get_maxbusdelay(void)
 {
     uint64_t   max_delay       = 0;
 
 {
     uint64_t   max_delay       = 0;
 
-    if (pmDispatch != NULL
+    if (pmInitDone
+       && pmDispatch != NULL
        && pmDispatch->getMaxBusDelay != NULL)
        max_delay = pmDispatch->getMaxBusDelay();
 
        && pmDispatch->getMaxBusDelay != NULL)
        max_delay = pmDispatch->getMaxBusDelay();
 
@@ -471,12 +569,7 @@ ml_get_maxbusdelay(void)
 }
 
 /*
 }
 
 /*
- * Set the maximum delay time allowed for snoop on the bus.
- *
- * Note that this value will be compared to the amount of time that it takes
- * to transition from a non-snooping power state (C4) to a snooping state (C2).
- * If maxBusDelay is less than C4C2SnoopDelay,
- * we will not enter the lowest power state.
+ * Advertise a memory access latency tolerance of "mdelay" ns
  */
 void
 ml_set_maxbusdelay(uint32_t mdelay)
  */
 void
 ml_set_maxbusdelay(uint32_t mdelay)
@@ -484,8 +577,11 @@ ml_set_maxbusdelay(uint32_t mdelay)
     uint64_t   maxdelay        = mdelay;
 
     if (pmDispatch != NULL
     uint64_t   maxdelay        = mdelay;
 
     if (pmDispatch != NULL
-       && pmDispatch->setMaxBusDelay != NULL)
+       && pmDispatch->setMaxBusDelay != NULL) {
+       earlyMaxBusDelay = DELAY_UNSET;
        pmDispatch->setMaxBusDelay(maxdelay);
        pmDispatch->setMaxBusDelay(maxdelay);
+    } else
+       earlyMaxBusDelay = maxdelay;
 }
 
 uint64_t
 }
 
 uint64_t
@@ -507,8 +603,24 @@ void
 ml_set_maxintdelay(uint64_t mdelay)
 {
     if (pmDispatch != NULL
 ml_set_maxintdelay(uint64_t mdelay)
 {
     if (pmDispatch != NULL
-       && pmDispatch->setMaxIntDelay != NULL)
+       && pmDispatch->setMaxIntDelay != NULL) {
+       earlyMaxIntDelay = DELAY_UNSET;
        pmDispatch->setMaxIntDelay(mdelay);
        pmDispatch->setMaxIntDelay(mdelay);
+    } else
+       earlyMaxIntDelay = mdelay;
+}
+
+boolean_t
+ml_get_interrupt_prewake_applicable()
+{
+    boolean_t applicable = FALSE;
+
+    if (pmInitDone 
+       && pmDispatch != NULL
+       && pmDispatch->pmInterruptPrewakeApplicable != NULL)
+       applicable = pmDispatch->pmInterruptPrewakeApplicable();
+
+    return applicable;
 }
 
 /*
 }
 
 /*
@@ -566,8 +678,10 @@ machine_run_count(uint32_t count)
 }
 
 boolean_t
 }
 
 boolean_t
-machine_cpu_is_inactive(int cpu)
+machine_processor_is_inactive(processor_t processor)
 {
 {
+    int                cpu = processor->cpu_id;
+
     if (pmDispatch != NULL
        && pmDispatch->pmIsCPUUnAvailable != NULL)
        return(pmDispatch->pmIsCPUUnAvailable(cpu_to_lcpu(cpu)));
     if (pmDispatch != NULL
        && pmDispatch->pmIsCPUUnAvailable != NULL)
        return(pmDispatch->pmIsCPUUnAvailable(cpu_to_lcpu(cpu)));
@@ -575,6 +689,125 @@ machine_cpu_is_inactive(int cpu)
        return(FALSE);
 }
 
        return(FALSE);
 }
 
+processor_t
+machine_choose_processor(processor_set_t pset,
+                        processor_t preferred)
+{
+    int                startCPU;
+    int                endCPU;
+    int                preferredCPU;
+    int                chosenCPU;
+
+    if (!pmInitDone)
+       return(preferred);
+
+    if (pset == NULL) {
+       startCPU = -1;
+       endCPU = -1;
+    } else {
+       startCPU = pset->cpu_set_low;
+       endCPU = pset->cpu_set_hi;
+    }
+
+    if (preferred == NULL)
+       preferredCPU = -1;
+    else
+       preferredCPU = preferred->cpu_id;
+
+    if (pmDispatch != NULL
+       && pmDispatch->pmChooseCPU != NULL) {
+       chosenCPU = pmDispatch->pmChooseCPU(startCPU, endCPU, preferredCPU);
+
+       if (chosenCPU == -1)
+           return(NULL);
+       return(cpu_datap(chosenCPU)->cpu_processor);
+    }
+
+    return(preferred);
+}
+
+static int
+pmThreadGetUrgency(uint64_t *rt_period, uint64_t *rt_deadline)
+{
+       int             urgency;
+       uint64_t        arg1, arg2;
+
+       urgency = thread_get_urgency(current_processor()->next_thread, &arg1, &arg2);
+
+       if (urgency == THREAD_URGENCY_REAL_TIME) {
+               if (rt_period != NULL)
+                       *rt_period = arg1;
+               
+               if (rt_deadline != NULL)
+                       *rt_deadline = arg2;
+       }
+
+       KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_GET_URGENCY), urgency, arg1, arg2, 0, 0);
+
+       return(urgency);
+}
+
+#if    DEBUG
+uint32_t       urgency_stats[64][THREAD_URGENCY_MAX];
+#endif
+
+#define                URGENCY_NOTIFICATION_ASSERT_NS (5 * 1000 * 1000)
+uint64_t       urgency_notification_assert_abstime_threshold, urgency_notification_max_recorded;
+
+void
+thread_tell_urgency(int urgency,
+    uint64_t rt_period,
+    uint64_t rt_deadline,
+    thread_t nthread)
+{
+       uint64_t        urgency_notification_time_start, delta;
+       boolean_t       urgency_assert = (urgency_notification_assert_abstime_threshold != 0);
+       assert(get_preemption_level() > 0 || ml_get_interrupts_enabled() == FALSE);
+#if    DEBUG
+       urgency_stats[cpu_number() % 64][urgency]++;
+#endif
+       if (!pmInitDone
+           || pmDispatch == NULL
+           || pmDispatch->pmThreadTellUrgency == NULL)
+               return;
+
+       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, rt_deadline, 0, 0);
+
+       if (__improbable((urgency_assert == TRUE)))
+               urgency_notification_time_start = mach_absolute_time();
+
+       current_cpu_datap()->cpu_nthread = nthread;
+       pmDispatch->pmThreadTellUrgency(urgency, rt_period, rt_deadline);
+
+       if (__improbable((urgency_assert == TRUE))) {
+               delta = mach_absolute_time() - urgency_notification_time_start;
+
+               if (__improbable(delta > urgency_notification_max_recorded)) {
+                       /* This is not synchronized, but it doesn't matter
+                        * if we (rarely) miss an event, as it is statistically
+                        * unlikely that it will never recur.
+                        */
+                       urgency_notification_max_recorded = delta;
+
+                       if (__improbable((delta > urgency_notification_assert_abstime_threshold) && !machine_timeout_suspended()))
+                               panic("Urgency notification callout %p exceeded threshold, 0x%llx abstime units", pmDispatch->pmThreadTellUrgency, delta);
+               }
+       }
+
+       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, rt_deadline, 0, 0);
+}
+
+void
+active_rt_threads(boolean_t active)
+{
+    if (!pmInitDone
+       || pmDispatch == NULL
+       || pmDispatch->pmActiveRTThreads == NULL)
+       return;
+
+    pmDispatch->pmActiveRTThreads(active);
+}
+
 static uint32_t
 pmGetSavedRunCount(void)
 {
 static uint32_t
 pmGetSavedRunCount(void)
 {
@@ -584,7 +817,7 @@ pmGetSavedRunCount(void)
 /*
  * Returns the root of the package tree.
  */
 /*
  * Returns the root of the package tree.
  */
-static x86_pkg_t *
+x86_pkg_t *
 pmGetPkgRoot(void)
 {
     return(x86_pkgs);
 pmGetPkgRoot(void)
 {
     return(x86_pkgs);
@@ -596,7 +829,7 @@ pmCPUGetHibernate(int cpu)
     return(cpu_datap(cpu)->cpu_hibernate);
 }
 
     return(cpu_datap(cpu)->cpu_hibernate);
 }
 
-static processor_t
+processor_t
 pmLCPUtoProcessor(int lcpu)
 {
     return(cpu_datap(lcpu)->cpu_processor);
 pmLCPUtoProcessor(int lcpu)
 {
     return(cpu_datap(lcpu)->cpu_processor);
@@ -608,16 +841,48 @@ pmReSyncDeadlines(int cpu)
     static boolean_t   registered      = FALSE;
 
     if (!registered) {
     static boolean_t   registered      = FALSE;
 
     if (!registered) {
-       PM_interrupt_register(&etimer_resync_deadlines);
+       PM_interrupt_register(&timer_resync_deadlines);
        registered = TRUE;
     }
 
     if ((uint32_t)cpu == current_cpu_datap()->lcpu.cpu_num)
        registered = TRUE;
     }
 
     if ((uint32_t)cpu == current_cpu_datap()->lcpu.cpu_num)
-       etimer_resync_deadlines();
+       timer_resync_deadlines();
     else
        cpu_PM_interrupt(cpu);
 }
 
     else
        cpu_PM_interrupt(cpu);
 }
 
+static void
+pmSendIPI(int cpu)
+{
+    lapic_send_ipi(cpu, LAPIC_PM_INTERRUPT);
+}
+
+static void
+pmGetNanotimeInfo(pm_rtc_nanotime_t *rtc_nanotime)
+{
+       /*
+        * Make sure that nanotime didn't change while we were reading it.
+        */
+       do {
+               rtc_nanotime->generation = pal_rtc_nanotime_info.generation; /* must be first */
+               rtc_nanotime->tsc_base = pal_rtc_nanotime_info.tsc_base;
+               rtc_nanotime->ns_base = pal_rtc_nanotime_info.ns_base;
+               rtc_nanotime->scale = pal_rtc_nanotime_info.scale;
+               rtc_nanotime->shift = pal_rtc_nanotime_info.shift;
+       } while(pal_rtc_nanotime_info.generation != 0
+               && rtc_nanotime->generation != pal_rtc_nanotime_info.generation);
+}
+
+uint32_t
+pmTimerQueueMigrate(int target_cpu)
+{
+    /* Call the etimer code to do this. */
+    return (target_cpu != cpu_number())
+               ? timer_queue_migrate_cpu(target_cpu)
+               : 0;
+}
+
+
 /*
  * Called by the power management kext to register itself and to get the
  * callbacks it might need into other kernel functions.  This interface
 /*
  * Called by the power management kext to register itself and to get the
  * callbacks it might need into other kernel functions.  This interface
@@ -626,34 +891,57 @@ pmReSyncDeadlines(int cpu)
  */
 void
 pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs,
  */
 void
 pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs,
-              pmCallBacks_t *callbacks)
-{
-    if (callbacks != NULL && version == PM_DISPATCH_VERSION) {
-       callbacks->setRTCPop            = setPop;
-       callbacks->resyncDeadlines      = pmReSyncDeadlines;
-       callbacks->initComplete         = pmInitComplete;
-       callbacks->GetLCPU              = pmGetLogicalCPU;
-       callbacks->GetCore              = pmGetCore;
-       callbacks->GetDie               = pmGetDie;
-       callbacks->GetPackage           = pmGetPackage;
-       callbacks->GetMyLCPU            = pmGetMyLogicalCPU;
-       callbacks->GetMyCore            = pmGetMyCore;
-       callbacks->GetMyDie             = pmGetMyDie;
-       callbacks->GetMyPackage         = pmGetMyPackage;
-       callbacks->GetPkgRoot           = pmGetPkgRoot;
-       callbacks->LockCPUTopology      = pmLockCPUTopology;
-       callbacks->GetHibernate         = pmCPUGetHibernate;
-       callbacks->LCPUtoProcessor      = pmLCPUtoProcessor;
-       callbacks->ThreadBind           = thread_bind;
-       callbacks->GetSavedRunCount     = pmGetSavedRunCount;
-       callbacks->topoParms            = &topoParms;
-    } else {
-       panic("Version mis-match between Kernel and CPU PM");
-    }
+    pmCallBacks_t *callbacks)
+{
+       if (callbacks != NULL && version == PM_DISPATCH_VERSION) {
+               callbacks->setRTCPop            = setPop;
+               callbacks->resyncDeadlines      = pmReSyncDeadlines;
+               callbacks->initComplete         = pmInitComplete;
+               callbacks->GetLCPU              = pmGetLogicalCPU;
+               callbacks->GetCore              = pmGetCore;
+               callbacks->GetDie               = pmGetDie;
+               callbacks->GetPackage           = pmGetPackage;
+               callbacks->GetMyLCPU            = pmGetMyLogicalCPU;
+               callbacks->GetMyCore            = pmGetMyCore;
+               callbacks->GetMyDie             = pmGetMyDie;
+               callbacks->GetMyPackage         = pmGetMyPackage;
+               callbacks->GetPkgRoot           = pmGetPkgRoot;
+               callbacks->LockCPUTopology      = pmLockCPUTopology;
+               callbacks->GetHibernate         = pmCPUGetHibernate;
+               callbacks->LCPUtoProcessor      = pmLCPUtoProcessor;
+               callbacks->ThreadBind           = thread_bind;
+               callbacks->GetSavedRunCount     = pmGetSavedRunCount;
+               callbacks->GetNanotimeInfo      = pmGetNanotimeInfo;
+               callbacks->ThreadGetUrgency     = pmThreadGetUrgency;
+               callbacks->RTCClockAdjust       = rtc_clock_adjust;
+               callbacks->timerQueueMigrate    = pmTimerQueueMigrate;
+               callbacks->topoParms            = &topoParms;
+               callbacks->pmSendIPI            = pmSendIPI;
+               callbacks->InterruptPending     = lapic_is_interrupt_pending;
+               callbacks->IsInterrupting       = lapic_is_interrupting;
+               callbacks->InterruptStats       = lapic_interrupt_counts;
+               callbacks->DisableApicTimer     = lapic_disable_timer;
+       } else {
+               panic("Version mis-match between Kernel and CPU PM");
+       }
 
 
-    if (cpuFuncs != NULL) {
-       pmDispatch = cpuFuncs;
-    }
+       if (cpuFuncs != NULL) {
+               if (pmDispatch) {
+                       panic("Attempt to re-register power management interface--AICPM present in xcpm mode? %p->%p", pmDispatch, cpuFuncs);
+               }
+
+               pmDispatch = cpuFuncs;
+
+               if (earlyTopology
+                   && pmDispatch->pmCPUStateInit != NULL) {
+                       (*pmDispatch->pmCPUStateInit)();
+                       earlyTopology = FALSE;
+               }
+
+               if (pmDispatch->pmIPIHandler != NULL) {
+                       lapic_set_pm_func((i386_intr_func_t)pmDispatch->pmIPIHandler);
+               }
+       }
 }
 
 /*
 }
 
 /*
@@ -667,42 +955,16 @@ pmUnRegister(pmDispatch_t *cpuFuncs)
     }
 }
 
     }
 }
 
-/******************************************************************************
- *
- * All of the following are deprecated interfaces and no longer used.
- *
- ******************************************************************************/
-kern_return_t
-pmsControl(__unused uint32_t request, __unused user_addr_t reqaddr,
-          __unused uint32_t reqsize)
-{
-    return(KERN_SUCCESS);
-}
+void machine_track_platform_idle(boolean_t entry) {
+       cpu_data_t              *my_cpu         = current_cpu_datap();
 
 
-void
-pmsInit(void)
-{
-}
-
-void
-pmsStart(void)
-{
-}
-
-void
-pmsPark(void)
-{
-}
-
-void
-pmsRun(__unused uint32_t nstep)
-{
-}
-
-kern_return_t
-pmsBuild(__unused pmsDef *pd, __unused uint32_t pdsize,
-        __unused pmsSetFunc_t *functab,
-        __unused uint32_t platformData, __unused pmsQueryFunc_t queryFunc)
-{
-    return(KERN_SUCCESS);
+       if (entry) {
+               (void)__sync_fetch_and_add(&my_cpu->lcpu.package->num_idle, 1);
+       }
+       else {
+               uint32_t nidle = __sync_fetch_and_sub(&my_cpu->lcpu.package->num_idle, 1);
+               if (nidle == topoParms.nLThreadsPerPackage) {
+                       my_cpu->lcpu.package->package_idle_exits++;
+               }
+       }
 }
 }