xnu-2050.22.13.tar.gz

[apple/xnu.git] / osfmk / i386 / pmCPU.c
diff --git a/osfmk/i386/pmCPU.c b/osfmk/i386/pmCPU.c

index 56fe44b1722b903e98ddc86360c4685c7a5605e5..b22749df79e3ce64935e3ba068baf6815d4611f2 100644 (file)
--- a/osfmk/i386/pmCPU.c
+++ b/osfmk/i386/pmCPU.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2004-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2011 Apple Inc. All rights reserved.
   *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   * 
@@ -45,28 +45,26 @@
  #include <i386/cpu_threads.h>
  #include <i386/pmCPU.h>
  #include <i386/cpuid.h>
-#include <i386/rtclock.h>
+#include <i386/rtclock_protos.h>
  #include <kern/sched_prim.h>
  #include <i386/lapic.h>
+#include <i386/pal_routines.h>
  
-/*
- * Kernel parameter determining whether threads are halted unconditionally
- * in the idle state.  This is the default behavior.
- * See machine_idle() for use.
- */
-int idlehalt                                   = 1;
+#include <sys/kdebug.h>
  
  extern int disableConsoleOutput;
  
-decl_simple_lock_data(,pm_init_lock);
+#define DELAY_UNSET            0xFFFFFFFFFFFFFFFFULL
  
  /*
   * The following is set when the KEXT loads and initializes.
   */
  pmDispatch_t   *pmDispatch     = NULL;
  
-static uint32_t                pmInitDone      = 0;
-
+static uint32_t                pmInitDone              = 0;
+static boolean_t       earlyTopology           = FALSE;
+static uint64_t                earlyMaxBusDelay        = DELAY_UNSET;
+static uint64_t                earlyMaxIntDelay        = DELAY_UNSET;
  
  /*
   * Initialize the Cstate change code.
@@ -74,16 +72,6 @@ static uint32_t              pmInitDone      = 0;
  void
  power_management_init(void)
  {
-    static boolean_t   initialized     = FALSE;
-
-    /*
-     * Initialize the lock for the KEXT initialization.
-     */
-    if (!initialized) {
-       simple_lock_init(&pm_init_lock, 0);
-       initialized = TRUE;
-    }
-
      if (pmDispatch != NULL && pmDispatch->cstateInit != NULL)
         (*pmDispatch->cstateInit)();
  }
@@ -100,21 +88,28 @@ machine_idle(void)
      if (my_cpu == NULL)
         goto out;
  
-    /*
-     * If idlehalt isn't set, then don't do any power management related
-     * idle handling.
-     */
-    if (!idlehalt)
-       goto out;
-
      my_cpu->lcpu.state = LCPU_IDLE;
      DBGLOG(cpu_handle, cpu_number(), MP_IDLE);
      MARK_CPU_IDLE(cpu_number());
  
+    if (pmInitDone) {
+       /*
+        * Handle case where ml_set_maxbusdelay() or ml_set_maxintdelay()
+        * were called prior to the CPU PM kext being registered.  We do
+        * this here since we know at this point the values will be first
+        * used since idle is where the decisions using these values is made.
+        */
+       if (earlyMaxBusDelay != DELAY_UNSET)
+           ml_set_maxbusdelay((uint32_t)(earlyMaxBusDelay & 0xFFFFFFFF));
+
+       if (earlyMaxIntDelay != DELAY_UNSET)
+           ml_set_maxintdelay(earlyMaxIntDelay);
+    }
+
      if (pmInitDone
         && pmDispatch != NULL
-       && pmDispatch->cstateMachineIdle != NULL)
-       (*pmDispatch->cstateMachineIdle)(0x7FFFFFFFFFFFFFFFULL);
+       && pmDispatch->MachineIdle != NULL)
+       (*pmDispatch->MachineIdle)(0x7FFFFFFFFFFFFFFFULL);
      else {
         /*
          * If no power management, re-enable interrupts and halt.
@@ -123,7 +118,10 @@ machine_idle(void)
          * cause problems in some MP configurations w.r.t. the APIC
          * stopping during a GV3 transition).
          */
-       __asm__ volatile ("sti; hlt");
+       pal_hlt();
+
+       /* Once woken, re-disable interrupts. */
+       pal_cli();
      }
  
      /*
@@ -137,7 +135,7 @@ machine_idle(void)
       * Re-enable interrupts.
       */
    out:
-    __asm__ volatile("sti");
+    pal_sti();
  }
  
  /*
@@ -152,19 +150,19 @@ pmCPUHalt(uint32_t reason)
      switch (reason) {
      case PM_HALT_DEBUG:
         cpup->lcpu.state = LCPU_PAUSE;
-       __asm__ volatile ("wbinvd; hlt");
+       pal_stop_cpu(FALSE);
         break;
  
      case PM_HALT_PANIC:
         cpup->lcpu.state = LCPU_PAUSE;
-       __asm__ volatile ("cli; wbinvd; hlt");
+       pal_stop_cpu(TRUE);
         break;
  
      case PM_HALT_NORMAL:
      default:
-       __asm__ volatile ("cli");
+        pal_cli();
  
-    if (pmInitDone
+       if (pmInitDone
             && pmDispatch != NULL
             && pmDispatch->pmCPUHalt != NULL) {
             /*
@@ -178,7 +176,8 @@ pmCPUHalt(uint32_t reason)
             i386_init_slave_fast();
  
             panic("init_slave_fast returned");
-       } else {
+       } else
+       {
             /*
              * If no power managment and a processor is taken off-line,
              * then invalidate the cache and halt it (it will not be able
@@ -186,10 +185,11 @@ pmCPUHalt(uint32_t reason)
              */
             __asm__ volatile ("wbinvd");
             cpup->lcpu.state = LCPU_HALT;
-           __asm__ volatile ( "wbinvd; hlt" );
+           pal_stop_cpu(FALSE);
  
             panic("back from Halt");
         }
+
         break;
      }
  }
@@ -206,6 +206,13 @@ pmMarkAllCPUsOff(void)
  static void
  pmInitComplete(void)
  {
+    if (earlyTopology
+       && pmDispatch != NULL
+       && pmDispatch->pmCPUStateInit != NULL) {
+       (*pmDispatch->pmCPUStateInit)();
+       earlyTopology = FALSE;
+    }
+
      pmInitDone = 1;
  }
  
@@ -278,13 +285,15 @@ pmLockCPUTopology(int lock)
  /*
   * Called to get the next deadline that has been set by the
   * power management code.
+ * Note: a return of 0 from AICPM and this routine signifies
+ * that no deadline is set.
   */
  uint64_t
  pmCPUGetDeadline(cpu_data_t *cpu)
  {
-    uint64_t   deadline        = EndOfAllTime;
+    uint64_t   deadline        = 0;
  
-       if (pmInitDone
+    if (pmInitDone
         && pmDispatch != NULL
         && pmDispatch->GetDeadline != NULL)
         deadline = (*pmDispatch->GetDeadline)(&cpu->lcpu);
@@ -371,6 +380,8 @@ pmCPUStateInit(void)
  {
      if (pmDispatch != NULL && pmDispatch->pmCPUStateInit != NULL)
         (*pmDispatch->pmCPUStateInit)();
+    else
+       earlyTopology = TRUE;
  }
  
  /*
@@ -446,7 +457,8 @@ ml_get_maxsnoop(void)
  {
      uint64_t   max_snoop       = 0;
  
-    if (pmDispatch != NULL
+    if (pmInitDone
+       && pmDispatch != NULL
         && pmDispatch->getMaxSnoop != NULL)
         max_snoop = pmDispatch->getMaxSnoop();
  
@@ -459,7 +471,8 @@ ml_get_maxbusdelay(void)
  {
      uint64_t   max_delay       = 0;
  
-    if (pmDispatch != NULL
+    if (pmInitDone
+       && pmDispatch != NULL
         && pmDispatch->getMaxBusDelay != NULL)
         max_delay = pmDispatch->getMaxBusDelay();
  
@@ -480,8 +493,11 @@ ml_set_maxbusdelay(uint32_t mdelay)
      uint64_t   maxdelay        = mdelay;
  
      if (pmDispatch != NULL
-       && pmDispatch->setMaxBusDelay != NULL)
+       && pmDispatch->setMaxBusDelay != NULL) {
+       earlyMaxBusDelay = DELAY_UNSET;
         pmDispatch->setMaxBusDelay(maxdelay);
+    } else
+       earlyMaxBusDelay = maxdelay;
  }
  
  uint64_t
@@ -503,8 +519,24 @@ void
  ml_set_maxintdelay(uint64_t mdelay)
  {
      if (pmDispatch != NULL
-       && pmDispatch->setMaxIntDelay != NULL)
+       && pmDispatch->setMaxIntDelay != NULL) {
+       earlyMaxIntDelay = DELAY_UNSET;
         pmDispatch->setMaxIntDelay(mdelay);
+    } else
+       earlyMaxIntDelay = mdelay;
+}
+
+boolean_t
+ml_get_interrupt_prewake_applicable()
+{
+    boolean_t applicable = FALSE;
+
+    if (pmInitDone 
+       && pmDispatch != NULL
+       && pmDispatch->pmInterruptPrewakeApplicable != NULL)
+       applicable = pmDispatch->pmInterruptPrewakeApplicable();
+
+    return applicable;
  }
  
  /*
@@ -562,8 +594,10 @@ machine_run_count(uint32_t count)
  }
  
  boolean_t
-machine_cpu_is_inactive(int cpu)
+machine_processor_is_inactive(processor_t processor)
  {
+    int                cpu = processor->cpu_id;
+
      if (pmDispatch != NULL
         && pmDispatch->pmIsCPUUnAvailable != NULL)
         return(pmDispatch->pmIsCPUUnAvailable(cpu_to_lcpu(cpu)));
@@ -571,6 +605,109 @@ machine_cpu_is_inactive(int cpu)
         return(FALSE);
  }
  
+processor_t
+machine_choose_processor(processor_set_t pset,
+                        processor_t preferred)
+{
+    int                startCPU;
+    int                endCPU;
+    int                preferredCPU;
+    int                chosenCPU;
+
+    if (!pmInitDone)
+       return(preferred);
+
+    if (pset == NULL) {
+       startCPU = -1;
+       endCPU = -1;
+    } else {
+       startCPU = pset->cpu_set_low;
+       endCPU = pset->cpu_set_hi;
+    }
+
+    if (preferred == NULL)
+       preferredCPU = -1;
+    else
+       preferredCPU = preferred->cpu_id;
+
+    if (pmDispatch != NULL
+       && pmDispatch->pmChooseCPU != NULL) {
+       chosenCPU = pmDispatch->pmChooseCPU(startCPU, endCPU, preferredCPU);
+
+       if (chosenCPU == -1)
+           return(NULL);
+       return(cpu_datap(chosenCPU)->cpu_processor);
+    }
+
+    return(preferred);
+}
+
+static int
+pmThreadGetUrgency(uint64_t *rt_period, uint64_t *rt_deadline)
+{
+
+    return(thread_get_urgency(rt_period, rt_deadline));
+}
+
+#if    DEBUG
+uint32_t       urgency_stats[64][THREAD_URGENCY_MAX];
+#endif
+
+#define                URGENCY_NOTIFICATION_ASSERT_NS (5 * 1000 * 1000)
+uint64_t       urgency_notification_assert_abstime_threshold, urgency_notification_max_recorded;
+
+void
+thread_tell_urgency(int urgency,
+    uint64_t rt_period,
+    uint64_t rt_deadline)
+{
+       uint64_t        urgency_notification_time_start, delta;
+       boolean_t       urgency_assert = (urgency_notification_assert_abstime_threshold != 0);
+       assert(get_preemption_level() > 0 || ml_get_interrupts_enabled() == FALSE);
+#if    DEBUG
+       urgency_stats[cpu_number() % 64][urgency]++;
+#endif
+       if (!pmInitDone
+           || pmDispatch == NULL
+           || pmDispatch->pmThreadTellUrgency == NULL)
+               return;
+
+       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, (rt_deadline >> 32), rt_deadline, 0);
+
+       if (__improbable((urgency_assert == TRUE)))
+               urgency_notification_time_start = mach_absolute_time();
+
+       pmDispatch->pmThreadTellUrgency(urgency, rt_period, rt_deadline);
+
+       if (__improbable((urgency_assert == TRUE))) {
+               delta = mach_absolute_time() - urgency_notification_time_start;
+
+               if (__improbable(delta > urgency_notification_max_recorded)) {
+                       /* This is not synchronized, but it doesn't matter
+                        * if we (rarely) miss an event, as it is statistically
+                        * unlikely that it will never recur.
+                        */
+                       urgency_notification_max_recorded = delta;
+
+                       if (__improbable((delta > urgency_notification_assert_abstime_threshold) && !machine_timeout_suspended()))
+                               panic("Urgency notification callout %p exceeded threshold, 0x%llx abstime units", pmDispatch->pmThreadTellUrgency, delta);
+               }
+       }
+
+       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, (rt_deadline >> 32), rt_deadline, 0);
+}
+
+void
+active_rt_threads(boolean_t active)
+{
+    if (!pmInitDone
+       || pmDispatch == NULL
+       || pmDispatch->pmActiveRTThreads == NULL)
+       return;
+
+    pmDispatch->pmActiveRTThreads(active);
+}
+
  static uint32_t
  pmGetSavedRunCount(void)
  {
@@ -620,12 +757,32 @@ pmSendIPI(int cpu)
      lapic_send_ipi(cpu, LAPIC_PM_INTERRUPT);
  }
  
-static rtc_nanotime_t *
-pmGetNanotimeInfo(void)
+static void
+pmGetNanotimeInfo(pm_rtc_nanotime_t *rtc_nanotime)
  {
-    return(&rtc_nanotime_info);
+       /*
+        * Make sure that nanotime didn't change while we were reading it.
+        */
+       do {
+               rtc_nanotime->generation = pal_rtc_nanotime_info.generation; /* must be first */
+               rtc_nanotime->tsc_base = pal_rtc_nanotime_info.tsc_base;
+               rtc_nanotime->ns_base = pal_rtc_nanotime_info.ns_base;
+               rtc_nanotime->scale = pal_rtc_nanotime_info.scale;
+               rtc_nanotime->shift = pal_rtc_nanotime_info.shift;
+       } while(pal_rtc_nanotime_info.generation != 0
+               && rtc_nanotime->generation != pal_rtc_nanotime_info.generation);
+}
+
+static uint32_t
+pmTimerQueueMigrate(int target_cpu)
+{
+    /* Call the etimer code to do this. */
+    return (target_cpu != cpu_number())
+               ? etimer_queue_migrate(target_cpu)
+               : 0;
  }
  
+
  /*
   * Called by the power management kext to register itself and to get the
   * callbacks it might need into other kernel functions.  This interface
@@ -654,9 +811,16 @@ pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs,
         callbacks->LCPUtoProcessor      = pmLCPUtoProcessor;
         callbacks->ThreadBind           = thread_bind;
         callbacks->GetSavedRunCount     = pmGetSavedRunCount;
-       callbacks->pmSendIPI            = pmSendIPI;
         callbacks->GetNanotimeInfo      = pmGetNanotimeInfo;
+       callbacks->ThreadGetUrgency     = pmThreadGetUrgency;
+       callbacks->RTCClockAdjust       = rtc_clock_adjust;
+       callbacks->timerQueueMigrate    = pmTimerQueueMigrate;
         callbacks->topoParms            = &topoParms;
+       callbacks->pmSendIPI            = pmSendIPI;
+       callbacks->InterruptPending     = lapic_is_interrupt_pending;
+       callbacks->IsInterrupting       = lapic_is_interrupting;
+       callbacks->InterruptStats       = lapic_interrupt_counts;
+       callbacks->DisableApicTimer     = lapic_disable_timer;
      } else {
         panic("Version mis-match between Kernel and CPU PM");
      }
@@ -664,6 +828,12 @@ pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs,
      if (cpuFuncs != NULL) {
         pmDispatch = cpuFuncs;
  
+       if (earlyTopology
+           && pmDispatch->pmCPUStateInit != NULL) {
+           (*pmDispatch->pmCPUStateInit)();
+           earlyTopology = FALSE;
+       }
+
         if (pmDispatch->pmIPIHandler != NULL) {
             lapic_set_pm_func((i386_intr_func_t)pmDispatch->pmIPIHandler);
         }