]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/kern/sync_sema.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / osfmk / kern / sync_sema.c
index 36f9dbb9adea986af43724b6421536c74d9b1289..dfa8d1153ae4a89e0e4910f0df7ff31b387deb03 100644 (file)
@@ -1,8 +1,8 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
+ *
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
  * unlawful or unlicensed copies of an Apple operating system, or to
  * circumvent, violate, or enable the circumvention or violation of, any
  * terms of an Apple operating system software license agreement.
- * 
+ *
  * Please obtain a copy of the License at
  * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
+ *
  * The Original Code and all software distributed under the License are
  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  * Please see the License for the specific language governing rights and
  * limitations under the License.
- * 
+ *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /*
  * @OSF_COPYRIGHT@
- * 
+ *
  */
 /*
  *     File:   kern/sync_sema.c
 #include <ipc/ipc_port.h>
 #include <ipc/ipc_space.h>
 #include <kern/host.h>
-#include <kern/wait_queue.h>
+#include <kern/waitq.h>
 #include <kern/zalloc.h>
 #include <kern/mach_param.h>
 
+#include <libkern/OSAtomic.h>
+
 static unsigned int semaphore_event;
-#define SEMAPHORE_EVENT ((event64_t)&semaphore_event)
+#define SEMAPHORE_EVENT CAST_EVENT64_T(&semaphore_event)
 
-zone_t semaphore_zone;
-unsigned int semaphore_max = SEMAPHORE_MAX;
+ZONE_DECLARE(semaphore_zone, "semaphores", sizeof(struct semaphore), ZC_NONE);
+
+os_refgrp_decl(static, sema_refgrp, "semaphore", NULL);
 
 /* Forward declarations */
 
 
-kern_return_t 
+kern_return_t
 semaphore_wait_trap_internal(
-                               mach_port_name_t name,
-                               void (*caller_cont)(kern_return_t));
+       mach_port_name_t name,
+       void (*caller_cont)(kern_return_t));
 
-kern_return_t 
+kern_return_t
 semaphore_wait_signal_trap_internal(
-                               mach_port_name_t wait_name,
-                               mach_port_name_t signal_name,
-                               void (*caller_cont)(kern_return_t));
+       mach_port_name_t wait_name,
+       mach_port_name_t signal_name,
+       void (*caller_cont)(kern_return_t));
 
-kern_return_t 
+kern_return_t
 semaphore_timedwait_trap_internal(
-                               mach_port_name_t name,
-                               unsigned int sec,
-                               clock_res_t nsec,
-                               void (*caller_cont)(kern_return_t));
+       mach_port_name_t name,
+       unsigned int sec,
+       clock_res_t nsec,
+       void (*caller_cont)(kern_return_t));
 
-kern_return_t 
+kern_return_t
 semaphore_timedwait_signal_trap_internal(
-                               mach_port_name_t wait_name,
-                               mach_port_name_t signal_name,
-                               unsigned int sec,
-                               clock_res_t nsec,
-                               void (*caller_cont)(kern_return_t));
+       mach_port_name_t wait_name,
+       mach_port_name_t signal_name,
+       unsigned int sec,
+       clock_res_t nsec,
+       void (*caller_cont)(kern_return_t));
 
+kern_return_t
+semaphore_signal_internal_trap(mach_port_name_t sema_name);
 
 kern_return_t
 semaphore_signal_internal(
-                       semaphore_t             semaphore,
-                       thread_t                        thread,
-                       int                             options);
+       semaphore_t             semaphore,
+       thread_t                        thread,
+       int                             options);
 
 kern_return_t
 semaphore_convert_wait_result(
-                       int                             wait_result);
+       int                             wait_result);
 
 void
-semaphore_wait_continue(void);
+semaphore_wait_continue(void *arg __unused, wait_result_t wr);
 
-kern_return_t
+static kern_return_t
 semaphore_wait_internal(
-                       semaphore_t             wait_semaphore,
-                       semaphore_t             signal_semaphore,
-                       mach_timespec_t *wait_timep,
-                       void (*caller_cont)(kern_return_t));
-
-/*
- *     ROUTINE:        semaphore_init          [private]
- *
- *     Initialize the semaphore mechanisms.
- *     Right now, we only need to initialize the semaphore zone.
- */      
-void
-semaphore_init(void)
+       semaphore_t             wait_semaphore,
+       semaphore_t             signal_semaphore,
+       uint64_t                deadline,
+       int                             option,
+       void (*caller_cont)(kern_return_t));
+
+static __inline__ uint64_t
+semaphore_deadline(
+       unsigned int            sec,
+       clock_res_t                     nsec)
 {
-  semaphore_zone = zinit(sizeof(struct semaphore),
-                       semaphore_max * sizeof(struct semaphore),
-                       sizeof(struct semaphore),
-                       "semaphores");
+       uint64_t        abstime;
+
+       nanoseconds_to_absolutetime((uint64_t)sec *     NSEC_PER_SEC + nsec, &abstime);
+       clock_absolutetime_interval_to_deadline(abstime, &abstime);
+
+       return abstime;
 }
 
 /*
@@ -137,100 +141,85 @@ semaphore_init(void)
  */
 kern_return_t
 semaphore_create(
-       task_t                  task,
-       semaphore_t             *new_semaphore,
-       int                             policy,
-       int                             value)
+       task_t                  task,
+       semaphore_t             *new_semaphore,
+       int                     policy,
+       int                     value)
 {
-       semaphore_t              s = SEMAPHORE_NULL;
-
+       semaphore_t             s = SEMAPHORE_NULL;
+       kern_return_t           kret;
 
-
-       if (task == TASK_NULL || value < 0 || policy > SYNC_POLICY_MAX) {
-               *new_semaphore = SEMAPHORE_NULL;
+       *new_semaphore = SEMAPHORE_NULL;
+       if (task == TASK_NULL || value < 0 || policy > SYNC_POLICY_MAX || policy < 0) {
                return KERN_INVALID_ARGUMENT;
        }
 
-       s = (semaphore_t) zalloc (semaphore_zone);
+       s = (semaphore_t) zalloc(semaphore_zone);
 
        if (s == SEMAPHORE_NULL) {
-               *new_semaphore = SEMAPHORE_NULL;
-               return KERN_RESOURCE_SHORTAGE; 
+               return KERN_RESOURCE_SHORTAGE;
        }
 
-       wait_queue_init(&s->wait_queue, policy); /* also inits lock */
-       s->count = value;
-       s->ref_count = 1;
+       kret = waitq_init(&s->waitq, policy | SYNC_POLICY_DISABLE_IRQ); /* also inits lock */
+       if (kret != KERN_SUCCESS) {
+               zfree(semaphore_zone, s);
+               return kret;
+       }
 
        /*
-        *  Create and initialize the semaphore port
+        * Initialize the semaphore values.
         */
-       s->port = ipc_port_alloc_kernel();
-       if (s->port == IP_NULL) {       
-               /* This will deallocate the semaphore */        
-               semaphore_dereference(s);
-               *new_semaphore = SEMAPHORE_NULL;
-               return KERN_RESOURCE_SHORTAGE; 
-       }
-
-       ipc_kobject_set (s->port, (ipc_kobject_t) s, IKOT_SEMAPHORE);
+       s->port = IP_NULL;
+       os_ref_init(&s->ref_count, &sema_refgrp);
+       s->count = value;
+       s->active = TRUE;
+       s->owner = task;
 
        /*
         *  Associate the new semaphore with the task by adding
         *  the new semaphore to the task's semaphore list.
-        *
-        *  Associate the task with the new semaphore by having the
-        *  semaphores task pointer point to the owning task's structure.
         */
        task_lock(task);
+       /* Check for race with task_terminate */
+       if (!task->active) {
+               task_unlock(task);
+               zfree(semaphore_zone, s);
+               return KERN_INVALID_TASK;
+       }
        enqueue_head(&task->semaphore_list, (queue_entry_t) s);
        task->semaphores_owned++;
-       s->owner = task;
-       s->active = TRUE;
        task_unlock(task);
 
        *new_semaphore = s;
 
        return KERN_SUCCESS;
-}                
+}
 
 /*
- *     Routine:        semaphore_destroy
+ *     Routine:        semaphore_destroy_internal
  *
- *     Destroys a semaphore.  This call will only succeed if the
- *     specified task is the SAME task name specified at the semaphore's
- *     creation.
+ *     Disassociate a semaphore from its owning task, mark it inactive,
+ *     and set any waiting threads running with THREAD_RESTART.
  *
- *     All threads currently blocked on the semaphore are awoken.  These
- *     threads will return with the KERN_TERMINATED error.
+ *     Conditions:
+ *                     task is locked
+ *                     semaphore is locked
+ *                     semaphore is owned by the specified task
+ *     Returns:
+ *                     with semaphore unlocked
  */
-kern_return_t
-semaphore_destroy(
-       task_t                  task,
-       semaphore_t             semaphore)
+static void
+semaphore_destroy_internal(
+       task_t                  task,
+       semaphore_t             semaphore)
 {
-       int                             old_count;
-       spl_t                   spl_level;
+       int                     old_count;
 
-
-       if (task == TASK_NULL || semaphore == SEMAPHORE_NULL)
-               return KERN_INVALID_ARGUMENT;
-
-       /*
-        *  Disown semaphore
-        */
-       task_lock(task);
-       if (semaphore->owner != task) {
-               task_unlock(task);
-               return KERN_INVALID_ARGUMENT;
-       }
-       remqueue(&task->semaphore_list, (queue_entry_t) semaphore);
+       /* unlink semaphore from owning task */
+       assert(semaphore->owner == task);
+       remqueue((queue_entry_t) semaphore);
        semaphore->owner = TASK_NULL;
        task->semaphores_owned--;
-       task_unlock(task);
-
-       spl_level = splsched();
-       semaphore_lock(semaphore);
 
        /*
         *  Deactivate semaphore
@@ -239,44 +228,121 @@ semaphore_destroy(
        semaphore->active = FALSE;
 
        /*
-        *  Wakeup blocked threads  
+        *  Wakeup blocked threads
         */
        old_count = semaphore->count;
        semaphore->count = 0;
 
        if (old_count < 0) {
-               wait_queue_wakeup64_all_locked(&semaphore->wait_queue,
-                                            SEMAPHORE_EVENT,
-                                            THREAD_RESTART,
-                                            TRUE);             /* unlock? */
+               waitq_wakeup64_all_locked(&semaphore->waitq,
+                   SEMAPHORE_EVENT,
+                   THREAD_RESTART, NULL,
+                   WAITQ_ALL_PRIORITIES,
+                   WAITQ_UNLOCK);
+               /* waitq/semaphore is unlocked */
        } else {
                semaphore_unlock(semaphore);
        }
+}
+
+/*
+ *     Routine:        semaphore_destroy
+ *
+ *     Destroys a semaphore and consume the caller's reference on the
+ *     semaphore.
+ */
+kern_return_t
+semaphore_destroy(
+       task_t                  task,
+       semaphore_t             semaphore)
+{
+       spl_t spl_level;
+
+       if (semaphore == SEMAPHORE_NULL) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       if (task == TASK_NULL) {
+               semaphore_dereference(semaphore);
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       task_lock(task);
+       spl_level = splsched();
+       semaphore_lock(semaphore);
+
+       if (semaphore->owner != task) {
+               semaphore_unlock(semaphore);
+               semaphore_dereference(semaphore);
+               splx(spl_level);
+               task_unlock(task);
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       semaphore_destroy_internal(task, semaphore);
+       /* semaphore unlocked */
+
        splx(spl_level);
+       task_unlock(task);
 
-       /*
-        *  Deallocate
-        *
-        *  Drop the semaphore reference, which in turn deallocates the
-        *  semaphore structure if the reference count goes to zero.
-        */
-       ipc_port_dealloc_kernel(semaphore->port);
        semaphore_dereference(semaphore);
        return KERN_SUCCESS;
 }
 
+/*
+ *     Routine:        semaphore_destroy_all
+ *
+ *     Destroy all the semaphores associated with a given task.
+ */
+#define SEMASPERSPL 20  /* max number of semaphores to destroy per spl hold */
+
+void
+semaphore_destroy_all(
+       task_t                  task)
+{
+       uint32_t count;
+       spl_t spl_level;
+
+       count = 0;
+       task_lock(task);
+       while (!queue_empty(&task->semaphore_list)) {
+               semaphore_t semaphore;
+
+               semaphore = (semaphore_t) queue_first(&task->semaphore_list);
+
+               if (count == 0) {
+                       spl_level = splsched();
+               }
+               semaphore_lock(semaphore);
+
+               semaphore_destroy_internal(task, semaphore);
+               /* semaphore unlocked */
+
+               /* throttle number of semaphores per interrupt disablement */
+               if (++count == SEMASPERSPL) {
+                       count = 0;
+                       splx(spl_level);
+               }
+       }
+       if (count != 0) {
+               splx(spl_level);
+       }
+
+       task_unlock(task);
+}
+
 /*
  *     Routine:        semaphore_signal_internal
  *
- *             Signals the semaphore as direct.  
+ *             Signals the semaphore as direct.
  *     Assumptions:
  *             Semaphore is locked.
  */
 kern_return_t
 semaphore_signal_internal(
-       semaphore_t             semaphore,
-       thread_t                thread,
-       int                             options)
+       semaphore_t             semaphore,
+       thread_t                thread,
+       int                             options)
 {
        kern_return_t kr;
        spl_t  spl_level;
@@ -292,51 +358,62 @@ semaphore_signal_internal(
 
        if (thread != THREAD_NULL) {
                if (semaphore->count < 0) {
-                       kr = wait_queue_wakeup64_thread_locked(
-                                       &semaphore->wait_queue,
-                                       SEMAPHORE_EVENT,
-                                       thread,
-                                       THREAD_AWAKENED,
-                                       TRUE);  /* unlock? */
+                       kr = waitq_wakeup64_thread_locked(
+                               &semaphore->waitq,
+                               SEMAPHORE_EVENT,
+                               thread,
+                               THREAD_AWAKENED,
+                               WAITQ_UNLOCK);
+                       /* waitq/semaphore is unlocked */
                } else {
-                       semaphore_unlock(semaphore);
                        kr = KERN_NOT_WAITING;
+                       semaphore_unlock(semaphore);
                }
                splx(spl_level);
                return kr;
-       } 
+       }
 
        if (options & SEMAPHORE_SIGNAL_ALL) {
                int old_count = semaphore->count;
 
+               kr = KERN_NOT_WAITING;
                if (old_count < 0) {
                        semaphore->count = 0;  /* always reset */
-                       kr = wait_queue_wakeup64_all_locked(
-                                       &semaphore->wait_queue,
-                                       SEMAPHORE_EVENT,
-                                       THREAD_AWAKENED,
-                                       TRUE);          /* unlock? */
+                       kr = waitq_wakeup64_all_locked(
+                               &semaphore->waitq,
+                               SEMAPHORE_EVENT,
+                               THREAD_AWAKENED, NULL,
+                               WAITQ_ALL_PRIORITIES,
+                               WAITQ_UNLOCK);
+                       /* waitq / semaphore is unlocked */
                } else {
-                       if (options & SEMAPHORE_SIGNAL_PREPOST)
+                       if (options & SEMAPHORE_SIGNAL_PREPOST) {
                                semaphore->count++;
-                       semaphore_unlock(semaphore);
+                       }
                        kr = KERN_SUCCESS;
+                       semaphore_unlock(semaphore);
                }
                splx(spl_level);
                return kr;
        }
-       
+
        if (semaphore->count < 0) {
-               if (wait_queue_wakeup64_one_locked(
-                                       &semaphore->wait_queue,
-                                       SEMAPHORE_EVENT,
-                                       THREAD_AWAKENED,
-                                       FALSE) == KERN_SUCCESS) {
+               waitq_options_t wq_option = (options & SEMAPHORE_THREAD_HANDOFF) ?
+                   WQ_OPTION_HANDOFF : WQ_OPTION_NONE;
+               kr = waitq_wakeup64_one_locked(
+                       &semaphore->waitq,
+                       SEMAPHORE_EVENT,
+                       THREAD_AWAKENED, NULL,
+                       WAITQ_ALL_PRIORITIES,
+                       WAITQ_KEEP_LOCKED,
+                       wq_option);
+               if (kr == KERN_SUCCESS) {
                        semaphore_unlock(semaphore);
                        splx(spl_level);
                        return KERN_SUCCESS;
-               } else
+               } else {
                        semaphore->count = 0;  /* all waiters gone */
+               }
        }
 
        if (options & SEMAPHORE_SIGNAL_PREPOST) {
@@ -358,19 +435,20 @@ semaphore_signal_internal(
  */
 kern_return_t
 semaphore_signal_thread(
-       semaphore_t     semaphore,
-       thread_t        thread)
+       semaphore_t     semaphore,
+       thread_t        thread)
 {
-       kern_return_t           ret;
+       kern_return_t           ret;
 
-       if (semaphore == SEMAPHORE_NULL)
+       if (semaphore == SEMAPHORE_NULL) {
                return KERN_INVALID_ARGUMENT;
+       }
 
        ret = semaphore_signal_internal(semaphore,
-                                       thread,
-                                       SEMAPHORE_OPTION_NONE);
+           thread,
+           SEMAPHORE_OPTION_NONE);
        return ret;
-}      
+}
 
 /*
  *     Routine:        semaphore_signal_thread_trap
@@ -383,27 +461,29 @@ semaphore_signal_thread_trap(
 {
        mach_port_name_t sema_name = args->signal_name;
        mach_port_name_t thread_name = args->thread_name;
-       semaphore_t     semaphore;
-       thread_t        thread;
-       kern_return_t   kr;
+       semaphore_t     semaphore;
+       thread_t        thread;
+       kern_return_t   kr;
 
-       /* 
+       /*
         * MACH_PORT_NULL is not an error. It means that we want to
         * select any one thread that is already waiting, but not to
         * pre-post the semaphore.
         */
        if (thread_name != MACH_PORT_NULL) {
-               thread = port_name_to_thread(thread_name);
-               if (thread == THREAD_NULL)
+               thread = port_name_to_thread(thread_name, PORT_TO_THREAD_NONE);
+               if (thread == THREAD_NULL) {
                        return KERN_INVALID_ARGUMENT;
-       } else
+               }
+       } else {
                thread = THREAD_NULL;
+       }
 
        kr = port_name_to_semaphore(sema_name, &semaphore);
        if (kr == KERN_SUCCESS) {
                kr = semaphore_signal_internal(semaphore,
-                               thread,
-                               SEMAPHORE_OPTION_NONE);
+                   thread,
+                   SEMAPHORE_OPTION_NONE);
                semaphore_dereference(semaphore);
        }
        if (thread != THREAD_NULL) {
@@ -427,18 +507,20 @@ semaphore_signal_thread_trap(
  */
 kern_return_t
 semaphore_signal(
-       semaphore_t             semaphore)
+       semaphore_t             semaphore)
 {
-       kern_return_t           kr;
+       kern_return_t           kr;
 
-       if (semaphore == SEMAPHORE_NULL)
+       if (semaphore == SEMAPHORE_NULL) {
                return KERN_INVALID_ARGUMENT;
+       }
 
        kr = semaphore_signal_internal(semaphore,
-                                      THREAD_NULL, 
-                                      SEMAPHORE_SIGNAL_PREPOST);
-       if (kr == KERN_NOT_WAITING)
+           THREAD_NULL,
+           SEMAPHORE_SIGNAL_PREPOST);
+       if (kr == KERN_NOT_WAITING) {
                return KERN_SUCCESS;
+       }
        return kr;
 }
 
@@ -452,17 +534,25 @@ semaphore_signal_trap(
        struct semaphore_signal_trap_args *args)
 {
        mach_port_name_t sema_name = args->signal_name;
-       semaphore_t     semaphore;
+
+       return semaphore_signal_internal_trap(sema_name);
+}
+
+kern_return_t
+semaphore_signal_internal_trap(mach_port_name_t sema_name)
+{
+       semaphore_t     semaphore;
        kern_return_t kr;
 
        kr = port_name_to_semaphore(sema_name, &semaphore);
        if (kr == KERN_SUCCESS) {
-               kr = semaphore_signal_internal(semaphore, 
-                               THREAD_NULL, 
-                               SEMAPHORE_SIGNAL_PREPOST);
+               kr = semaphore_signal_internal(semaphore,
+                   THREAD_NULL,
+                   SEMAPHORE_SIGNAL_PREPOST);
                semaphore_dereference(semaphore);
-               if (kr == KERN_NOT_WAITING)
+               if (kr == KERN_NOT_WAITING) {
                        kr = KERN_SUCCESS;
+               }
        }
        return kr;
 }
@@ -475,18 +565,20 @@ semaphore_signal_trap(
  */
 kern_return_t
 semaphore_signal_all(
-       semaphore_t             semaphore)
+       semaphore_t             semaphore)
 {
        kern_return_t kr;
 
-       if (semaphore == SEMAPHORE_NULL)
+       if (semaphore == SEMAPHORE_NULL) {
                return KERN_INVALID_ARGUMENT;
+       }
 
        kr = semaphore_signal_internal(semaphore,
-                                      THREAD_NULL, 
-                                      SEMAPHORE_SIGNAL_ALL);
-       if (kr == KERN_NOT_WAITING)
+           THREAD_NULL,
+           SEMAPHORE_SIGNAL_ALL);
+       if (kr == KERN_NOT_WAITING) {
                return KERN_SUCCESS;
+       }
        return kr;
 }
 
@@ -500,17 +592,18 @@ semaphore_signal_all_trap(
        struct semaphore_signal_all_trap_args *args)
 {
        mach_port_name_t sema_name = args->signal_name;
-       semaphore_t     semaphore;
+       semaphore_t     semaphore;
        kern_return_t kr;
 
        kr = port_name_to_semaphore(sema_name, &semaphore);
        if (kr == KERN_SUCCESS) {
                kr = semaphore_signal_internal(semaphore,
-                               THREAD_NULL, 
-                               SEMAPHORE_SIGNAL_ALL);
+                   THREAD_NULL,
+                   SEMAPHORE_SIGNAL_ALL);
                semaphore_dereference(semaphore);
-               if (kr == KERN_NOT_WAITING)
+               if (kr == KERN_NOT_WAITING) {
                        kr = KERN_SUCCESS;
+               }
        }
        return kr;
 }
@@ -531,7 +624,7 @@ semaphore_convert_wait_result(int wait_result)
 
        case THREAD_TIMED_OUT:
                return KERN_OPERATION_TIMED_OUT;
-               
+
        case THREAD_INTERRUPTED:
                return KERN_ABORTED;
 
@@ -551,19 +644,20 @@ semaphore_convert_wait_result(int wait_result)
  *     It returns directly to user space.
  */
 void
-semaphore_wait_continue(void)
+semaphore_wait_continue(void *arg __unused, wait_result_t wr)
 {
        thread_t self = current_thread();
-       int wait_result = self->wait_result;
        void (*caller_cont)(kern_return_t) = self->sth_continuation;
 
        assert(self->sth_waitsemaphore != SEMAPHORE_NULL);
        semaphore_dereference(self->sth_waitsemaphore);
-       if (self->sth_signalsemaphore != SEMAPHORE_NULL)
+       if (self->sth_signalsemaphore != SEMAPHORE_NULL) {
                semaphore_dereference(self->sth_signalsemaphore);
+       }
 
+       assert(self->handoff_thread == THREAD_NULL);
        assert(caller_cont != (void (*)(kern_return_t))0);
-       (*caller_cont)(semaphore_convert_wait_result(wait_result));
+       (*caller_cont)(semaphore_convert_wait_result(wr));
 }
 
 /*
@@ -577,59 +671,45 @@ semaphore_wait_continue(void)
  *             The reference
  *             A reference is held on the signal semaphore.
  */
-kern_return_t
+static kern_return_t
 semaphore_wait_internal(
-       semaphore_t             wait_semaphore,
-       semaphore_t             signal_semaphore,
-       mach_timespec_t         *wait_timep,
-       void                    (*caller_cont)(kern_return_t))
+       semaphore_t             wait_semaphore,
+       semaphore_t             signal_semaphore,
+       uint64_t                deadline,
+       int                             option,
+       void                    (*caller_cont)(kern_return_t))
 {
-       boolean_t                       nonblocking;
-       int                                     wait_result;
-       spl_t                           spl_level;
-       kern_return_t           kr = KERN_ALREADY_WAITING;
+       int                                     wait_result;
+       spl_t                           spl_level;
+       kern_return_t           kr = KERN_ALREADY_WAITING;
 
        spl_level = splsched();
        semaphore_lock(wait_semaphore);
-
-       /*
-        * Decide if we really have to wait.
-        */
-       nonblocking = (wait_timep != (mach_timespec_t *)0) ?
-                     (wait_timep->tv_sec == 0 && wait_timep->tv_nsec == 0) :
-                     FALSE;
+       thread_t self = current_thread();
+       thread_t handoff_thread = THREAD_NULL;
+       thread_handoff_option_t handoff_option = THREAD_HANDOFF_NONE;
+       int semaphore_signal_options = SEMAPHORE_SIGNAL_PREPOST;
 
        if (!wait_semaphore->active) {
                kr = KERN_TERMINATED;
        } else if (wait_semaphore->count > 0) {
                wait_semaphore->count--;
                kr = KERN_SUCCESS;
-       } else if (nonblocking) {
+       } else if (option & SEMAPHORE_TIMEOUT_NOBLOCK) {
                kr = KERN_OPERATION_TIMED_OUT;
        } else {
-               uint64_t        abstime;
-               thread_t        self = current_thread();
-
                wait_semaphore->count = -1;  /* we don't keep an actual count */
-               thread_lock(self);
-               
-               /*
-                * If it is a timed wait, calculate the wake up deadline.
-                */
-               if (wait_timep != (mach_timespec_t *)0) {
-                       nanoseconds_to_absolutetime((uint64_t)wait_timep->tv_sec *
-                                                                                       NSEC_PER_SEC + wait_timep->tv_nsec, &abstime);
-                       clock_absolutetime_interval_to_deadline(abstime, &abstime);
-               }
-               else
-                       abstime = 0;
 
-               (void)wait_queue_assert_wait64_locked(
-                                       &wait_semaphore->wait_queue,
-                                       SEMAPHORE_EVENT,
-                                       THREAD_ABORTSAFE, abstime,
-                                       self);
-               thread_unlock(self);
+               thread_set_pending_block_hint(self, kThreadWaitSemaphore);
+               (void)waitq_assert_wait64_locked(
+                       &wait_semaphore->waitq,
+                       SEMAPHORE_EVENT,
+                       THREAD_ABORTSAFE,
+                       TIMEOUT_URGENCY_USER_NORMAL,
+                       deadline, TIMEOUT_NO_LEEWAY,
+                       self);
+
+               semaphore_signal_options |= SEMAPHORE_THREAD_HANDOFF;
        }
        semaphore_unlock(wait_semaphore);
        splx(spl_level);
@@ -647,13 +727,13 @@ semaphore_wait_internal(
                 * our intention to wait above).
                 */
                signal_kr = semaphore_signal_internal(signal_semaphore,
-                                                     THREAD_NULL,
-                                                     SEMAPHORE_SIGNAL_PREPOST);
+                   THREAD_NULL, semaphore_signal_options);
 
-               if (signal_kr == KERN_NOT_WAITING)
+               if (signal_kr == KERN_NOT_WAITING) {
+                       assert(self->handoff_thread == THREAD_NULL);
                        signal_kr = KERN_SUCCESS;
-               else if (signal_kr == KERN_TERMINATED) {
-                       /* 
+               else if (signal_kr == KERN_TERMINATED) {
+                       /*
                         * Uh!Oh!  The semaphore we were to signal died.
                         * We have to get ourselves out of the wait in
                         * case we get stuck here forever (it is assumed
@@ -666,42 +746,49 @@ semaphore_wait_internal(
                         * (most important) result.  Otherwise,
                         * return the KERN_TERMINATED status.
                         */
-                       thread_t self = current_thread();
-
+                       assert(self->handoff_thread == THREAD_NULL);
                        clear_wait(self, THREAD_INTERRUPTED);
                        kr = semaphore_convert_wait_result(self->wait_result);
-                       if (kr == KERN_ABORTED)
+                       if (kr == KERN_ABORTED) {
                                kr = KERN_TERMINATED;
+                       }
                }
        }
-       
+
        /*
         * If we had an error, or we didn't really need to wait we can
         * return now that we have signalled the signal semaphore.
         */
-       if (kr != KERN_ALREADY_WAITING)
+       if (kr != KERN_ALREADY_WAITING) {
+               assert(self->handoff_thread == THREAD_NULL);
                return kr;
+       }
 
+       if (self->handoff_thread) {
+               handoff_thread = self->handoff_thread;
+               self->handoff_thread = THREAD_NULL;
+               handoff_option = THREAD_HANDOFF_SETRUN_NEEDED;
+       }
        /*
         * Now, we can block.  If the caller supplied a continuation
         * pointer of his own for after the block, block with the
-        * appropriate semaphore continuation.  Thiswill gather the
+        * appropriate semaphore continuation.  This will gather the
         * semaphore results, release references on the semaphore(s),
         * and then call the caller's continuation.
         */
        if (caller_cont) {
-               thread_t self = current_thread();
-
                self->sth_continuation = caller_cont;
                self->sth_waitsemaphore = wait_semaphore;
                self->sth_signalsemaphore = signal_semaphore;
-               wait_result = thread_block((thread_continue_t)semaphore_wait_continue);
-       }
-       else {
-               wait_result = thread_block(THREAD_CONTINUE_NULL);
+
+               thread_handoff_parameter(handoff_thread, semaphore_wait_continue,
+                   NULL, handoff_option);
+       } else {
+               wait_result = thread_handoff_deallocate(handoff_thread, handoff_option);
        }
 
-       return (semaphore_convert_wait_result(wait_result));
+       assert(self->handoff_thread == THREAD_NULL);
+       return semaphore_convert_wait_result(wait_result);
 }
 
 
@@ -709,20 +796,49 @@ semaphore_wait_internal(
  *     Routine:        semaphore_wait
  *
  *     Traditional (non-continuation) interface presented to
- *     in-kernel clients to wait on a semaphore.
+ *      in-kernel clients to wait on a semaphore.
  */
 kern_return_t
 semaphore_wait(
-       semaphore_t             semaphore)
-{      
+       semaphore_t             semaphore)
+{
+       if (semaphore == SEMAPHORE_NULL) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       return semaphore_wait_internal(semaphore,
+                  SEMAPHORE_NULL,
+                  0ULL, SEMAPHORE_OPTION_NONE,
+                  (void (*)(kern_return_t))0);
+}
 
-       if (semaphore == SEMAPHORE_NULL)
+kern_return_t
+semaphore_wait_noblock(
+       semaphore_t             semaphore)
+{
+       if (semaphore == SEMAPHORE_NULL) {
                return KERN_INVALID_ARGUMENT;
+       }
 
-       return(semaphore_wait_internal(semaphore,
-                                      SEMAPHORE_NULL,
-                                      (mach_timespec_t *)0,
-                                      (void (*)(kern_return_t))0));
+       return semaphore_wait_internal(semaphore,
+                  SEMAPHORE_NULL,
+                  0ULL, SEMAPHORE_TIMEOUT_NOBLOCK,
+                  (void (*)(kern_return_t))0);
+}
+
+kern_return_t
+semaphore_wait_deadline(
+       semaphore_t             semaphore,
+       uint64_t                deadline)
+{
+       if (semaphore == SEMAPHORE_NULL) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       return semaphore_wait_internal(semaphore,
+                  SEMAPHORE_NULL,
+                  deadline, SEMAPHORE_OPTION_NONE,
+                  (void (*)(kern_return_t))0);
 }
 
 /*
@@ -736,25 +852,25 @@ kern_return_t
 semaphore_wait_trap(
        struct semaphore_wait_trap_args *args)
 {
-       return(semaphore_wait_trap_internal(args->wait_name, thread_syscall_return));
+       return semaphore_wait_trap_internal(args->wait_name, thread_syscall_return);
 }
 
 
 
 kern_return_t
 semaphore_wait_trap_internal(
-       mach_port_name_t name, 
+       mach_port_name_t name,
        void (*caller_cont)(kern_return_t))
-{      
-       semaphore_t     semaphore;
+{
+       semaphore_t     semaphore;
        kern_return_t kr;
 
        kr = port_name_to_semaphore(name, &semaphore);
        if (kr == KERN_SUCCESS) {
                kr = semaphore_wait_internal(semaphore,
-                               SEMAPHORE_NULL,
-                               (mach_timespec_t *)0,
-                               caller_cont);
+                   SEMAPHORE_NULL,
+                   0ULL, SEMAPHORE_OPTION_NONE,
+                   caller_cont);
                semaphore_dereference(semaphore);
        }
        return kr;
@@ -764,26 +880,36 @@ semaphore_wait_trap_internal(
  *     Routine:        semaphore_timedwait
  *
  *     Traditional (non-continuation) interface presented to
- *     in-kernel clients to wait on a semaphore with a timeout.
+ *      in-kernel clients to wait on a semaphore with a timeout.
  *
  *     A timeout of {0,0} is considered non-blocking.
  */
 kern_return_t
 semaphore_timedwait(
-       semaphore_t             semaphore,
-       mach_timespec_t         wait_time)
-{      
-       if (semaphore == SEMAPHORE_NULL)
+       semaphore_t             semaphore,
+       mach_timespec_t         wait_time)
+{
+       int                             option = SEMAPHORE_OPTION_NONE;
+       uint64_t                deadline = 0;
+
+       if (semaphore == SEMAPHORE_NULL) {
                return KERN_INVALID_ARGUMENT;
-       
-       if(BAD_MACH_TIMESPEC(&wait_time))
+       }
+
+       if (BAD_MACH_TIMESPEC(&wait_time)) {
                return KERN_INVALID_VALUE;
-       
-       return (semaphore_wait_internal(semaphore,
-                                       SEMAPHORE_NULL,
-                                       &wait_time,
-                                       (void(*)(kern_return_t))0));
-       
+       }
+
+       if (wait_time.tv_sec == 0 && wait_time.tv_nsec == 0) {
+               option = SEMAPHORE_TIMEOUT_NOBLOCK;
+       } else {
+               deadline = semaphore_deadline(wait_time.tv_sec, wait_time.tv_nsec);
+       }
+
+       return semaphore_wait_internal(semaphore,
+                  SEMAPHORE_NULL,
+                  deadline, option,
+                  (void (*)(kern_return_t))0);
 }
 
 /*
@@ -800,9 +926,8 @@ semaphore_timedwait(
 kern_return_t
 semaphore_timedwait_trap(
        struct semaphore_timedwait_trap_args *args)
-{      
-
-       return(semaphore_timedwait_trap_internal(args->wait_name, args->sec, args->nsec, thread_syscall_return));
+{
+       return semaphore_timedwait_trap_internal(args->wait_name, args->sec, args->nsec, thread_syscall_return);
 }
 
 
@@ -813,22 +938,31 @@ semaphore_timedwait_trap_internal(
        clock_res_t             nsec,
        void (*caller_cont)(kern_return_t))
 {
-
        semaphore_t semaphore;
        mach_timespec_t wait_time;
        kern_return_t kr;
 
        wait_time.tv_sec = sec;
        wait_time.tv_nsec = nsec;
-       if(BAD_MACH_TIMESPEC(&wait_time))
+       if (BAD_MACH_TIMESPEC(&wait_time)) {
                return KERN_INVALID_VALUE;
-       
+       }
+
        kr = port_name_to_semaphore(name, &semaphore);
        if (kr == KERN_SUCCESS) {
+               int                             option = SEMAPHORE_OPTION_NONE;
+               uint64_t                deadline = 0;
+
+               if (sec == 0 && nsec == 0) {
+                       option = SEMAPHORE_TIMEOUT_NOBLOCK;
+               } else {
+                       deadline = semaphore_deadline(sec, nsec);
+               }
+
                kr = semaphore_wait_internal(semaphore,
-                               SEMAPHORE_NULL,
-                               &wait_time,
-                               caller_cont);
+                   SEMAPHORE_NULL,
+                   deadline, option,
+                   caller_cont);
                semaphore_dereference(semaphore);
        }
        return kr;
@@ -844,29 +978,30 @@ semaphore_timedwait_trap_internal(
  */
 kern_return_t
 semaphore_wait_signal(
-       semaphore_t             wait_semaphore,
-       semaphore_t             signal_semaphore)
+       semaphore_t             wait_semaphore,
+       semaphore_t             signal_semaphore)
 {
-       if (wait_semaphore == SEMAPHORE_NULL)
+       if (wait_semaphore == SEMAPHORE_NULL) {
                return KERN_INVALID_ARGUMENT;
-       
-       return(semaphore_wait_internal(wait_semaphore,
-                                      signal_semaphore,
-                                      (mach_timespec_t *)0,
-                                      (void(*)(kern_return_t))0));
+       }
+
+       return semaphore_wait_internal(wait_semaphore,
+                  signal_semaphore,
+                  0ULL, SEMAPHORE_OPTION_NONE,
+                  (void (*)(kern_return_t))0);
 }
 
 /*
  *     Trap:   semaphore_wait_signal_trap
  *
  *     Atomically register a wait on a semaphore and THEN signal
- *     another.  This is the trap version from user space.  
+ *     another.  This is the trap version from user space.
  */
 kern_return_t
 semaphore_wait_signal_trap(
        struct semaphore_wait_signal_trap_args *args)
 {
-       return(semaphore_wait_signal_trap_internal(args->wait_name, args->signal_name, thread_syscall_return));
+       return semaphore_wait_signal_trap_internal(args->wait_name, args->signal_name, thread_syscall_return);
 }
 
 kern_return_t
@@ -884,9 +1019,9 @@ semaphore_wait_signal_trap_internal(
                kr = port_name_to_semaphore(wait_name, &wait_semaphore);
                if (kr == KERN_SUCCESS) {
                        kr = semaphore_wait_internal(wait_semaphore,
-                                       signal_semaphore,
-                                       (mach_timespec_t *)0,
-                                       caller_cont);
+                           signal_semaphore,
+                           0ULL, SEMAPHORE_OPTION_NONE,
+                           caller_cont);
                        semaphore_dereference(wait_semaphore);
                }
                semaphore_dereference(signal_semaphore);
@@ -906,33 +1041,44 @@ semaphore_wait_signal_trap_internal(
  */
 kern_return_t
 semaphore_timedwait_signal(
-       semaphore_t             wait_semaphore,
-       semaphore_t             signal_semaphore,
-       mach_timespec_t         wait_time)
+       semaphore_t             wait_semaphore,
+       semaphore_t             signal_semaphore,
+       mach_timespec_t         wait_time)
 {
-       if (wait_semaphore == SEMAPHORE_NULL)
+       int                             option = SEMAPHORE_OPTION_NONE;
+       uint64_t                deadline = 0;
+
+       if (wait_semaphore == SEMAPHORE_NULL) {
                return KERN_INVALID_ARGUMENT;
-       
-       if(BAD_MACH_TIMESPEC(&wait_time))
+       }
+
+       if (BAD_MACH_TIMESPEC(&wait_time)) {
                return KERN_INVALID_VALUE;
-       
-       return(semaphore_wait_internal(wait_semaphore,
-                                      signal_semaphore,
-                                      &wait_time,
-                                      (void(*)(kern_return_t))0));
+       }
+
+       if (wait_time.tv_sec == 0 && wait_time.tv_nsec == 0) {
+               option = SEMAPHORE_TIMEOUT_NOBLOCK;
+       } else {
+               deadline = semaphore_deadline(wait_time.tv_sec, wait_time.tv_nsec);
+       }
+
+       return semaphore_wait_internal(wait_semaphore,
+                  signal_semaphore,
+                  deadline, option,
+                  (void (*)(kern_return_t))0);
 }
 
 /*
  *     Trap:   semaphore_timedwait_signal_trap
  *
  *     Atomically register a timed wait on a semaphore and THEN signal
- *     another.  This is the trap version from user space.  
+ *     another.  This is the trap version from user space.
  */
 kern_return_t
 semaphore_timedwait_signal_trap(
        struct semaphore_timedwait_signal_trap_args *args)
 {
-       return(semaphore_timedwait_signal_trap_internal(args->wait_name, args->signal_name, args->sec, args->nsec, thread_syscall_return));
+       return semaphore_timedwait_signal_trap_internal(args->wait_name, args->signal_name, args->sec, args->nsec, thread_syscall_return);
 }
 
 kern_return_t
@@ -950,17 +1096,27 @@ semaphore_timedwait_signal_trap_internal(
 
        wait_time.tv_sec = sec;
        wait_time.tv_nsec = nsec;
-       if(BAD_MACH_TIMESPEC(&wait_time))
+       if (BAD_MACH_TIMESPEC(&wait_time)) {
                return KERN_INVALID_VALUE;
-       
+       }
+
        kr = port_name_to_semaphore(signal_name, &signal_semaphore);
        if (kr == KERN_SUCCESS) {
                kr = port_name_to_semaphore(wait_name, &wait_semaphore);
                if (kr == KERN_SUCCESS) {
+                       int                             option = SEMAPHORE_OPTION_NONE;
+                       uint64_t                deadline = 0;
+
+                       if (sec == 0 && nsec == 0) {
+                               option = SEMAPHORE_TIMEOUT_NOBLOCK;
+                       } else {
+                               deadline = semaphore_deadline(sec, nsec);
+                       }
+
                        kr = semaphore_wait_internal(wait_semaphore,
-                                       signal_semaphore,
-                                       &wait_time,
-                                       caller_cont);
+                           signal_semaphore,
+                           deadline, option,
+                           caller_cont);
                        semaphore_dereference(wait_semaphore);
                }
                semaphore_dereference(signal_semaphore);
@@ -977,17 +1133,9 @@ semaphore_timedwait_signal_trap_internal(
  */
 void
 semaphore_reference(
-       semaphore_t             semaphore)
+       semaphore_t             semaphore)
 {
-       spl_t                   spl_level;
-
-       spl_level = splsched();
-       semaphore_lock(semaphore);
-
-       semaphore->ref_count++;
-
-       semaphore_unlock(semaphore);
-       splx(spl_level);
+       os_ref_retain(&semaphore->ref_count);
 }
 
 /*
@@ -998,23 +1146,76 @@ semaphore_reference(
  */
 void
 semaphore_dereference(
-       semaphore_t             semaphore)
+       semaphore_t             semaphore)
 {
-       int                     ref_count;
-       spl_t                   spl_level;
+       uint32_t collisions;
+       spl_t spl_level;
+
+       if (semaphore == NULL) {
+               return;
+       }
+
+       if (os_ref_release(&semaphore->ref_count) > 0) {
+               return;
+       }
 
-       if (semaphore != NULL) {
-           spl_level = splsched();
-           semaphore_lock(semaphore);
+       /*
+        * Last ref, clean up the port [if any]
+        * associated with the semaphore, destroy
+        * it (if still active) and then free
+        * the semaphore.
+        */
+       ipc_port_t port = semaphore->port;
+
+       if (IP_VALID(port)) {
+               assert(!port->ip_srights);
+               ipc_port_dealloc_kernel(port);
+       }
 
-           ref_count = --(semaphore->ref_count);
+       /*
+        * Lock the semaphore to lock in the owner task reference.
+        * Then continue to try to lock the task (inverse order).
+        */
+       spl_level = splsched();
+       semaphore_lock(semaphore);
+       for (collisions = 0; semaphore->active; collisions++) {
+               task_t task = semaphore->owner;
+
+               assert(task != TASK_NULL);
+
+               if (task_lock_try(task)) {
+                       semaphore_destroy_internal(task, semaphore);
+                       /* semaphore unlocked */
+                       splx(spl_level);
+                       task_unlock(task);
+                       goto out;
+               }
+
+               /* failed to get out-of-order locks */
+               semaphore_unlock(semaphore);
+               splx(spl_level);
+               mutex_pause(collisions);
+               spl_level = splsched();
+               semaphore_lock(semaphore);
+       }
+       semaphore_unlock(semaphore);
+       splx(spl_level);
+
+out:
+       zfree(semaphore_zone, semaphore);
+}
+
+#define WAITQ_TO_SEMA(wq) ((semaphore_t) ((uintptr_t)(wq) - offsetof(struct semaphore, waitq)))
+void
+kdp_sema_find_owner(struct waitq * waitq, __assert_only event64_t event, thread_waitinfo_t * waitinfo)
+{
+       semaphore_t sem = WAITQ_TO_SEMA(waitq);
+       assert(event == SEMAPHORE_EVENT);
 
-           semaphore_unlock(semaphore);
-           splx(spl_level);
+       zone_require(semaphore_zone, sem);
 
-           if (ref_count == 0) {
-                       assert(wait_queue_empty(&semaphore->wait_queue));
-                       zfree(semaphore_zone, semaphore);
-           }
+       waitinfo->context = VM_KERNEL_UNSLIDE_OR_PERM(sem->port);
+       if (sem->owner) {
+               waitinfo->owner = pid_from_task(sem->owner);
        }
 }