xnu-4570.20.62.tar.gz

[apple/xnu.git] / osfmk / kern / syscall_subr.c
diff --git a/osfmk/kern/syscall_subr.c b/osfmk/kern/syscall_subr.c

index da2fe9e7403f27260bd07784c09f4147e4bf04d5..e944c79d94956f64150b36fb3fa67025a689a17e 100644 (file)
--- a/osfmk/kern/syscall_subr.c
+++ b/osfmk/kern/syscall_subr.c
@@ -68,18 +68,22 @@
  #include <kern/spl.h>
  #include <kern/task.h>
  #include <kern/thread.h>
+#include <kern/policy_internal.h>
+
  #include <mach/policy.h>
  
  #include <kern/syscall_subr.h>
  #include <mach/mach_host_server.h>
  #include <mach/mach_syscalls.h>
-
+#include <sys/kdebug.h>
+#include <kern/ast.h>
  
  #ifdef MACH_BSD
  extern void workqueue_thread_yielded(void);
  extern sched_call_t workqueue_get_sched_callback(void);
  #endif /* MACH_BSD */
  
+extern wait_result_t thread_handoff_reason(thread_t thread, ast_t reason);
  
  /* Called from commpage to take a delayed preemption when exiting
   * the "Preemption Free Zone" (PFZ).
@@ -98,25 +102,18 @@ __unused   struct pfz_exit_args *args)
   *     swtch and swtch_pri both attempt to context switch (logic in
   *     thread_block no-ops the context switch if nothing would happen).
   *     A boolean is returned that indicates whether there is anything
- *     else runnable.
- *
- *     This boolean can be used by a thread waiting on a
- *     lock or condition:  If FALSE is returned, the thread is justified
- *     in becoming a resource hog by continuing to spin because there's
- *     nothing else useful that the processor could do.  If TRUE is
- *     returned, the thread should make one more check on the
- *     lock and then be a good citizen and really suspend.
+ *     else runnable.  That's no excuse to spin, though.
   */
  
  static void
  swtch_continue(void)
  {
-       register processor_t    myprocessor;
-    boolean_t                          result;
+       processor_t     myprocessor;
+       boolean_t       result;
  
-    disable_preemption();
+       disable_preemption();
         myprocessor = current_processor();
-       result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0;
+       result = SCHED(thread_should_yield)(myprocessor, current_thread());
         enable_preemption();
  
         thread_syscall_return(result);
@@ -127,12 +124,12 @@ boolean_t
  swtch(
         __unused struct swtch_args *args)
  {
-       register processor_t    myprocessor;
+       processor_t     myprocessor;
         boolean_t                               result;
  
         disable_preemption();
         myprocessor = current_processor();
-       if (SCHED(processor_queue_empty)(myprocessor) &&        rt_runq.count == 0) {
+       if (!SCHED(thread_should_yield)(myprocessor, current_thread())) {
                 mp_enable_preemption();
  
                 return (FALSE);
@@ -145,7 +142,7 @@ swtch(
  
         disable_preemption();
         myprocessor = current_processor();
-       result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0;
+       result = SCHED(thread_should_yield)(myprocessor, current_thread());
         enable_preemption();
  
         return (result);
@@ -154,14 +151,14 @@ swtch(
  static void
  swtch_pri_continue(void)
  {
-       register processor_t    myprocessor;
-    boolean_t                          result;
+       processor_t     myprocessor;
+       boolean_t       result;
  
         thread_depress_abort_internal(current_thread());
  
-    disable_preemption();
+       disable_preemption();
         myprocessor = current_processor();
-       result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0;
+       result = SCHED(thread_should_yield)(myprocessor, current_thread());
         mp_enable_preemption();
  
         thread_syscall_return(result);
@@ -172,12 +169,12 @@ boolean_t
  swtch_pri(
  __unused       struct swtch_pri_args *args)
  {
-       register processor_t    myprocessor;
+       processor_t     myprocessor;
         boolean_t                               result;
  
         disable_preemption();
         myprocessor = current_processor();
-       if (SCHED(processor_queue_empty)(myprocessor) && rt_runq.count == 0) {
+       if (!SCHED(thread_should_yield)(myprocessor, current_thread())) {
                 mp_enable_preemption();
  
                 return (FALSE);
@@ -194,44 +191,30 @@ __unused  struct swtch_pri_args *args)
  
         disable_preemption();
         myprocessor = current_processor();
-       result = !SCHED(processor_queue_empty)(myprocessor) || rt_runq.count > 0;
+       result = SCHED(thread_should_yield)(myprocessor, current_thread());
         enable_preemption();
  
         return (result);
  }
  
-static int
+static boolean_t
  thread_switch_disable_workqueue_sched_callback(void)
  {
         sched_call_t callback = workqueue_get_sched_callback();
-       thread_t self = current_thread();
-       if (!callback || self->sched_call != callback) {
-               return FALSE;
-       }
-       spl_t s = splsched();
-       thread_lock(self);
-       thread_sched_call(self, NULL);
-       thread_unlock(self);
-       splx(s);
-       return TRUE;
+       return thread_disable_sched_call(current_thread(), callback) != NULL;
  }
  
  static void
  thread_switch_enable_workqueue_sched_callback(void)
  {
         sched_call_t callback = workqueue_get_sched_callback();
-       thread_t self = current_thread();
-       spl_t s = splsched();
-       thread_lock(self);
-       thread_sched_call(self, callback);
-       thread_unlock(self);
-       splx(s);
+       thread_reenable_sched_call(current_thread(), callback);
  }
  
  static void
  thread_switch_continue(void)
  {
-       register thread_t       self = current_thread();
+       thread_t        self = current_thread();
         int                                     option = self->saved.swtch.option;
         boolean_t                       reenable_workq_callback = self->saved.swtch.reenable_workq_callback;
  
@@ -255,7 +238,8 @@ kern_return_t
  thread_switch(
         struct thread_switch_args *args)
  {
-       register thread_t               thread, self = current_thread();
+       thread_t                        thread = THREAD_NULL;
+       thread_t                        self = current_thread();
         mach_port_name_t                thread_name = args->thread_name;
         int                                             option = args->option;
         mach_msg_timeout_t              option_time = args->option_time;
@@ -303,11 +287,11 @@ thread_switch(
         /*
          * Translate the port name if supplied.
          */
-    if (thread_name != MACH_PORT_NULL) {
-               ipc_port_t                      port;
+       if (thread_name != MACH_PORT_NULL) {
+               ipc_port_t port;
  
                 if (ipc_port_translate_send(self->task->itk_space,
-                                                                       thread_name, &port) == KERN_SUCCESS) {
+                                           thread_name, &port) == KERN_SUCCESS) {
                         ip_reference(port);
                         ip_unlock(port);
  
@@ -315,16 +299,11 @@ thread_switch(
                         ip_release(port);
  
                         if (thread == self) {
-                               (void)thread_deallocate_internal(thread);
+                               thread_deallocate(thread);
                                 thread = THREAD_NULL;
                         }
                 }
-               else
-                       thread = THREAD_NULL;
         }
-       else
-               thread = THREAD_NULL;
-
  
         if (option == SWITCH_OPTION_OSLOCK_DEPRESS || option == SWITCH_OPTION_OSLOCK_WAIT) {
                 if (thread != THREAD_NULL) {
@@ -336,16 +315,18 @@ thread_switch(
                                  * a thread in another task)
                                  */
  
-                               (void)thread_deallocate_internal(thread);
+                               thread_deallocate(thread);
                                 thread = THREAD_NULL;
                         } else {
                                 /*
                                  * Attempt to kick the lock owner up to our same IO throttling tier.
                                  * If the thread is currently blocked in throttle_lowpri_io(),
                                  * it will immediately break out.
+                                *
+                                * TODO: SFI break out?
                                  */
                                 int new_policy = proc_get_effective_thread_policy(self, TASK_POLICY_IO);
-                               
+
                                 set_thread_iotier_override(thread, new_policy);
                         }
                 }
@@ -355,59 +336,43 @@ thread_switch(
          * Try to handoff if supplied.
          */
         if (thread != THREAD_NULL) {
-               processor_t             processor;
-               spl_t                   s;
-
-               s = splsched();
-               thread_lock(thread);
-
-               /*
-                *      Check that the thread is not bound
-                *      to a different processor, and that realtime
-                *      is not involved.
-                *
-                *      Next, pull it off its run queue.  If it
-                *      doesn't come, it's not eligible.
-                */
-               processor = current_processor();
-               if (processor->current_pri < BASEPRI_RTQUEUES                   &&
-                       thread->sched_pri < BASEPRI_RTQUEUES                            &&
-                       (thread->bound_processor == PROCESSOR_NULL      ||
-                        thread->bound_processor == processor)                          &&
-                               thread_run_queue_remove(thread)                                                 ) {
-                       /*
-                        *      Hah, got it!!
-                        */
-                       thread_unlock(thread);
-
-                       (void)thread_deallocate_internal(thread);
+               spl_t s = splsched();
+
+               /* This may return a different thread if the target is pushing on something */
+               thread_t pulled_thread = thread_run_queue_remove_for_handoff(thread);
+
+               KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED_THREAD_SWITCH)|DBG_FUNC_NONE,
+                                     thread_tid(thread), thread->state,
+                                     pulled_thread ? TRUE : FALSE, 0, 0);
+
+               if (pulled_thread != THREAD_NULL) {
+                       /* We can't be dropping the last ref here */
+                       thread_deallocate_safe(thread);
  
                         if (wait_option)
                                 assert_wait_timeout((event_t)assert_wait_timeout, THREAD_ABORTSAFE,
-                                                                                                               option_time, scale_factor);
-                       else
-                       if (depress_option)
+                                                   option_time, scale_factor);
+                       else if (depress_option)
                                 thread_depress_ms(option_time);
  
                         self->saved.swtch.option = option;
                         self->saved.swtch.reenable_workq_callback = reenable_workq_callback;
  
-                       thread_run(self, (thread_continue_t)thread_switch_continue, NULL, thread);
+                       thread_run(self, (thread_continue_t)thread_switch_continue, NULL, pulled_thread);
                         /* NOTREACHED */
+                       panic("returned from thread_run!");
                 }
  
-               thread_unlock(thread);
                 splx(s);
  
                 thread_deallocate(thread);
         }
-               
+
         if (wait_option)
                 assert_wait_timeout((event_t)assert_wait_timeout, THREAD_ABORTSAFE, option_time, scale_factor);
-       else
-       if (depress_option)
+       else if (depress_option)
                 thread_depress_ms(option_time);
-         
+
         self->saved.swtch.option = option;
         self->saved.swtch.reenable_workq_callback = reenable_workq_callback;
  
@@ -422,6 +387,88 @@ thread_switch(
      return (KERN_SUCCESS);
  }
  
+/* Returns a +1 thread reference */
+thread_t
+port_name_to_thread_for_ulock(mach_port_name_t thread_name)
+{
+       thread_t thread = THREAD_NULL;
+       thread_t self = current_thread();
+
+       /*
+        * Translate the port name if supplied.
+        */
+       if (thread_name != MACH_PORT_NULL) {
+               ipc_port_t port;
+
+               if (ipc_port_translate_send(self->task->itk_space,
+                                           thread_name, &port) == KERN_SUCCESS) {
+                       ip_reference(port);
+                       ip_unlock(port);
+
+                       thread = convert_port_to_thread(port);
+                       ip_release(port);
+
+                       if (thread == THREAD_NULL) {
+                               return thread;
+                       }
+
+                       if ((thread == self) || (thread->task != self->task)) {
+                               thread_deallocate(thread);
+                               thread = THREAD_NULL;
+                       }
+               }
+       }
+
+       return thread;
+}
+
+/* This function is called after an assert_wait(), therefore it must not
+ * cause another wait until after the thread_run() or thread_block()
+ *
+ * Consumes a ref on thread
+ */
+wait_result_t
+thread_handoff(thread_t thread)
+{
+       thread_t deallocate_thread = THREAD_NULL;
+       thread_t self = current_thread();
+
+       /*
+        * Try to handoff if supplied.
+        */
+       if (thread != THREAD_NULL) {
+               spl_t s = splsched();
+
+               thread_t pulled_thread = thread_run_queue_remove_for_handoff(thread);
+
+               KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED_THREAD_SWITCH)|DBG_FUNC_NONE,
+                                     thread_tid(thread), thread->state,
+                                     pulled_thread ? TRUE : FALSE, 0, 0);
+
+               if (pulled_thread != THREAD_NULL) {
+                       /* We can't be dropping the last ref here */
+                       thread_deallocate_safe(thread);
+
+                       int result = thread_run(self, THREAD_CONTINUE_NULL, NULL, pulled_thread);
+
+                       splx(s);
+                       return result;
+               }
+
+               splx(s);
+
+               deallocate_thread = thread;
+               thread = THREAD_NULL;
+       }
+
+       int result = thread_block(THREAD_CONTINUE_NULL);
+       if (deallocate_thread != THREAD_NULL) {
+               thread_deallocate(deallocate_thread);
+       }
+
+       return result;
+}
+
  /*
   * Depress thread's priority to lowest possible for the specified interval,
   * with a value of zero resulting in no timeout being scheduled.
@@ -430,7 +477,7 @@ void
  thread_depress_abstime(
         uint64_t                                interval)
  {
-       register thread_t               self = current_thread();
+       thread_t                self = current_thread();
         uint64_t                                deadline;
      spl_t                                      s;
  
@@ -440,7 +487,16 @@ thread_depress_abstime(
                 processor_t             myprocessor = self->last_processor;
  
                 self->sched_pri = DEPRESSPRI;
+
+               KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHANGE_PRIORITY),
+                                     (uintptr_t)thread_tid(self),
+                                     self->base_pri,
+                                     self->sched_pri,
+                                     0, /* eventually, 'reason' */
+                                     0);
+
                 myprocessor->current_pri = self->sched_pri;
+               myprocessor->current_perfctl_class = thread_get_perfcontrol_class(self);
                 self->sched_flags |= TH_SFLAG_DEPRESS;
  
                 if (interval != 0) {
@@ -479,7 +535,7 @@ thread_depress_expire(
      thread_lock(thread);
         if (--thread->depress_timer_active == 0) {
                 thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
-               SCHED(compute_priority)(thread, FALSE);
+               thread_recompute_sched_pri(thread, FALSE);
         }
      thread_unlock(thread);
      splx(s);
@@ -500,7 +556,7 @@ thread_depress_abort_internal(
         if (!(thread->sched_flags & TH_SFLAG_POLLDEPRESS)) {
                 if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
                         thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
-                       SCHED(compute_priority)(thread, FALSE);
+                       thread_recompute_sched_pri(thread, FALSE);
                         result = KERN_SUCCESS;
                 }
  
@@ -535,7 +591,16 @@ thread_poll_yield(
                         thread_lock(self);
                         if (!(self->sched_flags & TH_SFLAG_DEPRESSED_MASK)) {
                                 self->sched_pri = DEPRESSPRI;
+
+                               KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHANGE_PRIORITY),
+                                                     (uintptr_t)thread_tid(self),
+                                                     self->base_pri,
+                                                     self->sched_pri,
+                                                     0, /* eventually, 'reason' */
+                                                     0);
+
                                 myprocessor->current_pri = self->sched_pri;
+                               myprocessor->current_perfctl_class = thread_get_perfcontrol_class(self);
                         }
                         self->computation_epoch = abstime;
                         self->computation_metered = 0;
@@ -544,10 +609,11 @@ thread_poll_yield(
                         abstime += (total_computation >> sched_poll_yield_shift);
                         if (!timer_call_enter(&self->depress_timer, abstime, TIMER_CALL_USER_CRITICAL))
                                 self->depress_timer_active++;
-                       thread_unlock(self);
  
-                       if ((preempt = csw_check(myprocessor)) != AST_NONE)
+                       if ((preempt = csw_check(myprocessor, AST_NONE)) != AST_NONE)
                                 ast_on(preempt);
+
+                       thread_unlock(self);
                 }
         }
         splx(s);
@@ -562,7 +628,7 @@ thread_yield_internal(
  
         disable_preemption();
         myprocessor = current_processor();
-       if (SCHED(processor_queue_empty)(myprocessor) && rt_runq.count == 0) {
+       if (!SCHED(thread_should_yield)(myprocessor, current_thread())) {
                 mp_enable_preemption();
  
                 return;
@@ -576,3 +642,40 @@ thread_yield_internal(
         thread_depress_abort_internal(current_thread());
  }
  
+/*
+ * This yields to a possible non-urgent preemption pending on the current processor.
+ *
+ * This is useful when doing a long computation in the kernel without returning to userspace.
+ *
+ * As opposed to other yielding mechanisms, this does not drop the priority of the current thread.
+ */
+void
+thread_yield_to_preemption()
+{
+       /* 
+        * ast_pending() should ideally be called with interrupts disabled, but 
+        * the check here is fine because csw_check() will do the right thing.
+        */
+       ast_t *pending_ast = ast_pending();
+       ast_t ast = AST_NONE;
+       processor_t p;
+
+       if (*pending_ast & AST_PREEMPT) {
+               thread_t self = current_thread();
+
+               spl_t s = splsched();
+
+               p = current_processor();
+               thread_lock(self);
+               ast = csw_check(p, AST_YIELD);
+               ast_on(ast);
+               thread_unlock(self);
+
+               if (ast != AST_NONE) {
+                       (void)thread_block_reason(THREAD_CONTINUE_NULL, NULL, ast);
+               }
+
+               splx(s);
+       }
+}
+