]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/kern/processor.c
xnu-2782.1.97.tar.gz
[apple/xnu.git] / osfmk / kern / processor.c
index 518891c79c9de2144f1babd19ea97f1a765c9fc7..355b1b1dcb7bc859c004a21f4abdf9ad4bba417e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -89,10 +89,12 @@ struct pset_node            pset_node0;
 decl_simple_lock_data(static,pset_node_lock)
 
 queue_head_t                   tasks;
+queue_head_t                   terminated_tasks;       /* To be used ONLY for stackshot. */
 int                                            tasks_count;
+int                                            terminated_tasks_count;
 queue_head_t                   threads;
 int                                            threads_count;
-decl_mutex_data(,tasks_threads_lock)
+decl_lck_mtx_data(,tasks_threads_lock)
 
 processor_t                            processor_list;
 unsigned int                   processor_count;
@@ -101,8 +103,9 @@ decl_simple_lock_data(,processor_list_lock)
 
 uint32_t                               processor_avail_count;
 
-processor_t    master_processor;
-int            master_cpu = 0;
+processor_t            master_processor;
+int                    master_cpu = 0;
+boolean_t              sched_stats_active = FALSE;
 
 /* Forwards */
 kern_return_t  processor_set_things(
@@ -119,8 +122,8 @@ processor_bootstrap(void)
 
        simple_lock_init(&pset_node_lock, 0);
 
-       mutex_init(&tasks_threads_lock, 0);
        queue_init(&tasks);
+       queue_init(&terminated_tasks);
        queue_init(&threads);
 
        simple_lock_init(&processor_list_lock, 0);
@@ -132,40 +135,83 @@ processor_bootstrap(void)
 
 /*
  *     Initialize the given processor for the cpu
- *     indicated by slot_num, and assign to the
+ *     indicated by cpu_id, and assign to the
  *     specified processor set.
  */
 void
 processor_init(
-       processor_t             p,
-       int                             slot_num,
-       processor_set_t pset)
+       processor_t                     processor,
+       int                                     cpu_id,
+       processor_set_t         pset)
 {
-       run_queue_init(&p->runq);
-
-       p->state = PROCESSOR_OFF_LINE;
-       p->active_thread = p->next_thread = p->idle_thread = THREAD_NULL;
-       p->processor_set = pset;
-       p->current_pri = MINPRI;
-       timer_call_setup(&p->quantum_timer, thread_quantum_expire, p);
-       p->deadline = UINT64_MAX;
-       p->timeslice = 0;
-       p->processor_self = IP_NULL;
-       simple_lock_init(&p->lock, 0);
-       processor_data_init(p);
-       PROCESSOR_DATA(p, slot_num) = slot_num;
-       p->processor_list = NULL;
+       spl_t           s;
+
+       if (processor != master_processor) {
+               /* Scheduler state deferred until sched_init() */
+               SCHED(processor_init)(processor);
+       }
+
+       processor->state = PROCESSOR_OFF_LINE;
+       processor->active_thread = processor->next_thread = processor->idle_thread = THREAD_NULL;
+       processor->processor_set = pset;
+       processor->current_pri = MINPRI;
+       processor->current_thmode = TH_MODE_NONE;
+       processor->cpu_id = cpu_id;
+       timer_call_setup(&processor->quantum_timer, thread_quantum_expire, processor);
+       processor->quantum_end = UINT64_MAX;
+       processor->deadline = UINT64_MAX;
+       processor->timeslice = 0;
+       processor->processor_primary = processor; /* no SMT relationship known at this point */
+       processor->processor_secondary = NULL;
+       processor->is_SMT = FALSE;
+       processor->processor_self = IP_NULL;
+       processor_data_init(processor);
+       processor->processor_list = NULL;
+
+       s = splsched();
+       pset_lock(pset);
+       if (pset->cpu_set_count++ == 0)
+               pset->cpu_set_low = pset->cpu_set_hi = cpu_id;
+       else {
+               pset->cpu_set_low = (cpu_id < pset->cpu_set_low)? cpu_id: pset->cpu_set_low;
+               pset->cpu_set_hi = (cpu_id > pset->cpu_set_hi)? cpu_id: pset->cpu_set_hi;
+       }
+       pset_unlock(pset);
+       splx(s);
 
        simple_lock(&processor_list_lock);
        if (processor_list == NULL)
-               processor_list = p;
+               processor_list = processor;
        else
-               processor_list_tail->processor_list = p;
-       processor_list_tail = p;
+               processor_list_tail->processor_list = processor;
+       processor_list_tail = processor;
        processor_count++;
        simple_unlock(&processor_list_lock);
 }
 
+void
+processor_set_primary(
+       processor_t             processor,
+       processor_t             primary)
+{
+       assert(processor->processor_primary == primary || processor->processor_primary == processor);
+       /* Re-adjust primary point for this (possibly) secondary processor */
+       processor->processor_primary = primary;
+
+       assert(primary->processor_secondary == NULL || primary->processor_secondary == processor);
+       if (primary != processor) {
+               /* Link primary to secondary, assumes a 2-way SMT model
+                * We'll need to move to a queue if any future architecture
+                * requires otherwise.
+                */
+               assert(processor->processor_secondary == NULL);
+               primary->processor_secondary = processor;
+               /* Mark both processors as SMT siblings */
+               primary->is_SMT = TRUE;
+               processor->is_SMT = TRUE;
+       }
+}
+
 processor_set_t
 processor_pset(
        processor_t     processor)
@@ -183,6 +229,12 @@ processor_set_t
 pset_create(
        pset_node_t                     node)
 {
+#if defined(CONFIG_SCHED_MULTIQ)
+       /* multiq scheduler is not currently compatible with multiple psets */
+       if (sched_groups_enabled)
+               return processor_pset(master_processor);
+#endif /* defined(CONFIG_SCHED_MULTIQ) */
+
        processor_set_t         *prev, pset = kalloc(sizeof (*pset));
 
        if (pset != PROCESSOR_SET_NULL) {
@@ -210,11 +262,18 @@ pset_init(
        processor_set_t         pset,
        pset_node_t                     node)
 {
+       if (pset != &pset0) {
+               /* Scheduler state deferred until sched_init() */
+               SCHED(pset_init)(pset);
+       }
+
        queue_init(&pset->active_queue);
        queue_init(&pset->idle_queue);
-       pset->idle_count = 0;
-       pset->processor_count = 0;
-       pset->low_pri = PROCESSOR_NULL;
+       queue_init(&pset->idle_secondary_queue);
+       pset->online_processor_count = 0;
+       pset->cpu_set_low = pset->cpu_set_hi = 0;
+       pset->cpu_set_count = 0;
+       pset->pending_AST_cpu_mask = 0;
        pset_lock_init(pset);
        pset->pset_self = IP_NULL;
        pset->pset_name_self = IP_NULL;
@@ -253,13 +312,13 @@ processor_info(
        processor_info_t                info,
        mach_msg_type_number_t  *count)
 {
-       register int    slot_num, state;
+       register int    cpu_id, state;
        kern_return_t   result;
 
        if (processor == PROCESSOR_NULL)
                return (KERN_INVALID_ARGUMENT);
 
-       slot_num = PROCESSOR_DATA(processor, slot_num);
+       cpu_id = processor->cpu_id;
 
        switch (flavor) {
 
@@ -271,14 +330,14 @@ processor_info(
                        return (KERN_FAILURE);
 
                basic_info = (processor_basic_info_t) info;
-               basic_info->cpu_type = slot_type(slot_num);
-               basic_info->cpu_subtype = slot_subtype(slot_num);
+               basic_info->cpu_type = slot_type(cpu_id);
+               basic_info->cpu_subtype = slot_subtype(cpu_id);
                state = processor->state;
                if (state == PROCESSOR_OFF_LINE)
                        basic_info->running = FALSE;
                else
                        basic_info->running = TRUE;
-               basic_info->slot_num = slot_num;
+               basic_info->slot_num = cpu_id;
                if (processor == master_processor) 
                        basic_info->is_master = TRUE;
                else
@@ -292,18 +351,71 @@ processor_info(
 
        case PROCESSOR_CPU_LOAD_INFO:
        {
-               register processor_cpu_load_info_t      cpu_load_info;
-
-           if (*count < PROCESSOR_CPU_LOAD_INFO_COUNT)
+               processor_cpu_load_info_t       cpu_load_info;
+               timer_t         idle_state;
+               uint64_t        idle_time_snapshot1, idle_time_snapshot2;
+               uint64_t        idle_time_tstamp1, idle_time_tstamp2;
+
+               /*
+                * We capture the accumulated idle time twice over
+                * the course of this function, as well as the timestamps
+                * when each were last updated. Since these are
+                * all done using non-atomic racy mechanisms, the
+                * most we can infer is whether values are stable.
+                * timer_grab() is the only function that can be
+                * used reliably on another processor's per-processor
+                * data.
+                */
+
+               if (*count < PROCESSOR_CPU_LOAD_INFO_COUNT)
                        return (KERN_FAILURE);
 
-           cpu_load_info = (processor_cpu_load_info_t) info;
-               cpu_load_info->cpu_ticks[CPU_STATE_USER] =
-                                                       timer_grab(&PROCESSOR_DATA(processor, user_state)) / hz_tick_interval;
-               cpu_load_info->cpu_ticks[CPU_STATE_SYSTEM] =
-                                                       timer_grab(&PROCESSOR_DATA(processor, system_state)) / hz_tick_interval;
-               cpu_load_info->cpu_ticks[CPU_STATE_IDLE] =
-                                                       timer_grab(&PROCESSOR_DATA(processor, idle_state)) / hz_tick_interval;
+               cpu_load_info = (processor_cpu_load_info_t) info;
+               if (precise_user_kernel_time) {
+                       cpu_load_info->cpu_ticks[CPU_STATE_USER] =
+                                                       (uint32_t)(timer_grab(&PROCESSOR_DATA(processor, user_state)) / hz_tick_interval);
+                       cpu_load_info->cpu_ticks[CPU_STATE_SYSTEM] =
+                                                       (uint32_t)(timer_grab(&PROCESSOR_DATA(processor, system_state)) / hz_tick_interval);
+               } else {
+                       uint64_t tval = timer_grab(&PROCESSOR_DATA(processor, user_state)) +
+                               timer_grab(&PROCESSOR_DATA(processor, system_state));
+
+                       cpu_load_info->cpu_ticks[CPU_STATE_USER] = (uint32_t)(tval / hz_tick_interval);
+                       cpu_load_info->cpu_ticks[CPU_STATE_SYSTEM] = 0;
+               }
+
+               idle_state = &PROCESSOR_DATA(processor, idle_state);
+               idle_time_snapshot1 = timer_grab(idle_state);
+               idle_time_tstamp1 = idle_state->tstamp;
+
+               /*
+                * Idle processors are not continually updating their
+                * per-processor idle timer, so it may be extremely
+                * out of date, resulting in an over-representation
+                * of non-idle time between two measurement
+                * intervals by e.g. top(1). If we are non-idle, or
+                * have evidence that the timer is being updated
+                * concurrently, we consider its value up-to-date.
+                */
+               if (PROCESSOR_DATA(processor, current_state) != idle_state) {
+                       cpu_load_info->cpu_ticks[CPU_STATE_IDLE] =
+                                                       (uint32_t)(idle_time_snapshot1 / hz_tick_interval);
+               } else if ((idle_time_snapshot1 != (idle_time_snapshot2 = timer_grab(idle_state))) ||
+                                  (idle_time_tstamp1 != (idle_time_tstamp2 = idle_state->tstamp))){
+                       /* Idle timer is being updated concurrently, second stamp is good enough */
+                       cpu_load_info->cpu_ticks[CPU_STATE_IDLE] =
+                                                       (uint32_t)(idle_time_snapshot2 / hz_tick_interval);
+               } else {
+                       /*
+                        * Idle timer may be very stale. Fortunately we have established
+                        * that idle_time_snapshot1 and idle_time_tstamp1 are unchanging
+                        */
+                       idle_time_snapshot1 += mach_absolute_time() - idle_time_tstamp1;
+                               
+                       cpu_load_info->cpu_ticks[CPU_STATE_IDLE] =
+                               (uint32_t)(idle_time_snapshot1 / hz_tick_interval);
+               }
+
                cpu_load_info->cpu_ticks[CPU_STATE_NICE] = 0;
 
            *count = PROCESSOR_CPU_LOAD_INFO_COUNT;
@@ -313,7 +425,7 @@ processor_info(
        }
 
        default:
-           result = cpu_info(flavor, slot_num, info, count);
+           result = cpu_info(flavor, cpu_id, info, count);
            if (result == KERN_SUCCESS)
                        *host = &realhost;                 
 
@@ -339,7 +451,7 @@ processor_start(
                prev = thread_bind(processor);
                thread_block(THREAD_CONTINUE_NULL);
 
-               result = cpu_start(PROCESSOR_DATA(processor, slot_num));
+               result = cpu_start(processor->cpu_id);
 
                thread_bind(prev);
 
@@ -408,12 +520,11 @@ processor_start(
        if (processor->processor_self == IP_NULL)
                ipc_processor_init(processor);
 
-       result = cpu_start(PROCESSOR_DATA(processor, slot_num));
+       result = cpu_start(processor->cpu_id);
        if (result != KERN_SUCCESS) {
                s = splsched();
                pset_lock(pset);
                processor->state = PROCESSOR_OFF_LINE;
-               timer_call_shutdown(processor);
                pset_unlock(pset);
                splx(s);
 
@@ -444,7 +555,7 @@ processor_control(
        if (processor == PROCESSOR_NULL)
                return(KERN_INVALID_ARGUMENT);
 
-       return(cpu_control(PROCESSOR_DATA(processor, slot_num), info, count));
+       return(cpu_control(processor->cpu_id, info, count));
 }
            
 kern_return_t
@@ -470,6 +581,9 @@ processor_get_assignment(
 {
        int state;
 
+       if (processor == PROCESSOR_NULL)
+               return(KERN_INVALID_ARGUMENT);
+
        state = processor->state;
        if (state == PROCESSOR_SHUTDOWN || state == PROCESSOR_OFF_LINE)
                return(KERN_FAILURE);
@@ -712,7 +826,7 @@ processor_set_things(
        addr = NULL;
 
        for (;;) {
-               mutex_lock(&tasks_threads_lock);
+               lck_mtx_lock(&tasks_threads_lock);
 
                if (type == THING_TASK)
                        maxthings = tasks_count;
@@ -726,7 +840,7 @@ processor_set_things(
                        break;
 
                /* unlock and allocate more memory */
-               mutex_unlock(&tasks_threads_lock);
+               lck_mtx_unlock(&tasks_threads_lock);
 
                if (size != 0)
                        kfree(addr, size);
@@ -778,7 +892,7 @@ processor_set_things(
 
        }
                
-       mutex_unlock(&tasks_threads_lock);
+       lck_mtx_unlock(&tasks_threads_lock);
 
        if (actual < maxthings)
                size_needed = actual * sizeof (mach_port_t);