xnu-2782.1.97.tar.gz

[apple/xnu.git] / osfmk / kern / processor.c
diff --git a/osfmk/kern/processor.c b/osfmk/kern/processor.c

index 23a5496119f7ea63cce73147b87f3c0cd5b8bdc6..355b1b1dcb7bc859c004a21f4abdf9ad4bba417e 100644 (file)
--- a/osfmk/kern/processor.c
+++ b/osfmk/kern/processor.c
@@ -91,6 +91,7 @@ decl_simple_lock_data(static,pset_node_lock)
  queue_head_t                   tasks;
  queue_head_t                   terminated_tasks;       /* To be used ONLY for stackshot. */
  int                                            tasks_count;
+int                                            terminated_tasks_count;
  queue_head_t                   threads;
  int                                            threads_count;
  decl_lck_mtx_data(,tasks_threads_lock)
@@ -143,6 +144,8 @@ processor_init(
         int                                     cpu_id,
         processor_set_t         pset)
  {
+       spl_t           s;
+
         if (processor != master_processor) {
                 /* Scheduler state deferred until sched_init() */
                 SCHED(processor_init)(processor);
@@ -155,13 +158,17 @@ processor_init(
         processor->current_thmode = TH_MODE_NONE;
         processor->cpu_id = cpu_id;
         timer_call_setup(&processor->quantum_timer, thread_quantum_expire, processor);
+       processor->quantum_end = UINT64_MAX;
         processor->deadline = UINT64_MAX;
         processor->timeslice = 0;
-       processor->processor_meta = PROCESSOR_META_NULL;
+       processor->processor_primary = processor; /* no SMT relationship known at this point */
+       processor->processor_secondary = NULL;
+       processor->is_SMT = FALSE;
         processor->processor_self = IP_NULL;
         processor_data_init(processor);
         processor->processor_list = NULL;
  
+       s = splsched();
         pset_lock(pset);
         if (pset->cpu_set_count++ == 0)
                 pset->cpu_set_low = pset->cpu_set_hi = cpu_id;
@@ -170,6 +177,7 @@ processor_init(
                 pset->cpu_set_hi = (cpu_id > pset->cpu_set_hi)? cpu_id: pset->cpu_set_hi;
         }
         pset_unlock(pset);
+       splx(s);
  
         simple_lock(&processor_list_lock);
         if (processor_list == NULL)
@@ -182,21 +190,26 @@ processor_init(
  }
  
  void
-processor_meta_init(
+processor_set_primary(
         processor_t             processor,
         processor_t             primary)
  {
-       processor_meta_t        pmeta = primary->processor_meta;
-
-       if (pmeta == PROCESSOR_META_NULL) {
-               pmeta = kalloc(sizeof (*pmeta));
-
-               queue_init(&pmeta->idle_queue);
-
-               pmeta->primary = primary;
+       assert(processor->processor_primary == primary || processor->processor_primary == processor);
+       /* Re-adjust primary point for this (possibly) secondary processor */
+       processor->processor_primary = primary;
+
+       assert(primary->processor_secondary == NULL || primary->processor_secondary == processor);
+       if (primary != processor) {
+               /* Link primary to secondary, assumes a 2-way SMT model
+                * We'll need to move to a queue if any future architecture
+                * requires otherwise.
+                */
+               assert(processor->processor_secondary == NULL);
+               primary->processor_secondary = processor;
+               /* Mark both processors as SMT siblings */
+               primary->is_SMT = TRUE;
+               processor->is_SMT = TRUE;
         }
-
-       processor->processor_meta = pmeta;
  }
  
  processor_set_t
@@ -216,6 +229,12 @@ processor_set_t
  pset_create(
         pset_node_t                     node)
  {
+#if defined(CONFIG_SCHED_MULTIQ)
+       /* multiq scheduler is not currently compatible with multiple psets */
+       if (sched_groups_enabled)
+               return processor_pset(master_processor);
+#endif /* defined(CONFIG_SCHED_MULTIQ) */
+
         processor_set_t         *prev, pset = kalloc(sizeof (*pset));
  
         if (pset != PROCESSOR_SET_NULL) {
@@ -250,11 +269,11 @@ pset_init(
  
         queue_init(&pset->active_queue);
         queue_init(&pset->idle_queue);
+       queue_init(&pset->idle_secondary_queue);
         pset->online_processor_count = 0;
-       pset_pri_init_hint(pset, PROCESSOR_NULL);
-       pset_count_init_hint(pset, PROCESSOR_NULL);
         pset->cpu_set_low = pset->cpu_set_hi = 0;
         pset->cpu_set_count = 0;
+       pset->pending_AST_cpu_mask = 0;
         pset_lock_init(pset);
         pset->pset_self = IP_NULL;
         pset->pset_name_self = IP_NULL;
@@ -333,8 +352,20 @@ processor_info(
         case PROCESSOR_CPU_LOAD_INFO:
         {
                 processor_cpu_load_info_t       cpu_load_info;
-               timer_data_t    idle_temp;
                 timer_t         idle_state;
+               uint64_t        idle_time_snapshot1, idle_time_snapshot2;
+               uint64_t        idle_time_tstamp1, idle_time_tstamp2;
+
+               /*
+                * We capture the accumulated idle time twice over
+                * the course of this function, as well as the timestamps
+                * when each were last updated. Since these are
+                * all done using non-atomic racy mechanisms, the
+                * most we can infer is whether values are stable.
+                * timer_grab() is the only function that can be
+                * used reliably on another processor's per-processor
+                * data.
+                */
  
                 if (*count < PROCESSOR_CPU_LOAD_INFO_COUNT)
                         return (KERN_FAILURE);
@@ -354,17 +385,35 @@ processor_info(
                 }
  
                 idle_state = &PROCESSOR_DATA(processor, idle_state);
-               idle_temp = *idle_state;
-
-               if (PROCESSOR_DATA(processor, current_state) != idle_state ||
-                   timer_grab(&idle_temp) != timer_grab(idle_state)) {
+               idle_time_snapshot1 = timer_grab(idle_state);
+               idle_time_tstamp1 = idle_state->tstamp;
+
+               /*
+                * Idle processors are not continually updating their
+                * per-processor idle timer, so it may be extremely
+                * out of date, resulting in an over-representation
+                * of non-idle time between two measurement
+                * intervals by e.g. top(1). If we are non-idle, or
+                * have evidence that the timer is being updated
+                * concurrently, we consider its value up-to-date.
+                */
+               if (PROCESSOR_DATA(processor, current_state) != idle_state) {
                         cpu_load_info->cpu_ticks[CPU_STATE_IDLE] =
-                                                       (uint32_t)(timer_grab(&PROCESSOR_DATA(processor, idle_state)) / hz_tick_interval);
+                                                       (uint32_t)(idle_time_snapshot1 / hz_tick_interval);
+               } else if ((idle_time_snapshot1 != (idle_time_snapshot2 = timer_grab(idle_state))) ||
+                                  (idle_time_tstamp1 != (idle_time_tstamp2 = idle_state->tstamp))){
+                       /* Idle timer is being updated concurrently, second stamp is good enough */
+                       cpu_load_info->cpu_ticks[CPU_STATE_IDLE] =
+                                                       (uint32_t)(idle_time_snapshot2 / hz_tick_interval);
                 } else {
-                       timer_advance(&idle_temp, mach_absolute_time() - idle_temp.tstamp);
+                       /*
+                        * Idle timer may be very stale. Fortunately we have established
+                        * that idle_time_snapshot1 and idle_time_tstamp1 are unchanging
+                        */
+                       idle_time_snapshot1 += mach_absolute_time() - idle_time_tstamp1;
                                 
                         cpu_load_info->cpu_ticks[CPU_STATE_IDLE] =
-                               (uint32_t)(timer_grab(&idle_temp) / hz_tick_interval);
+                               (uint32_t)(idle_time_snapshot1 / hz_tick_interval);
                 }
  
                 cpu_load_info->cpu_ticks[CPU_STATE_NICE] = 0;
@@ -951,15 +1000,6 @@ processor_set_threads(
  {
      return KERN_FAILURE;
  }
-#elif defined(CONFIG_EMBEDDED)
-kern_return_t
-processor_set_threads(
-       __unused processor_set_t                pset,
-       __unused thread_array_t         *thread_list,
-       __unused mach_msg_type_number_t *count)
-{
-    return KERN_NOT_SUPPORTED;
-}
  #else
  kern_return_t
  processor_set_threads(