]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/kern/wait_queue.c
xnu-1456.1.26.tar.gz
[apple/xnu.git] / osfmk / kern / wait_queue.c
index eef777359290480c5954f525203739a81b1894a7..a7a19a02433179eab61ed0716fe12c9a296292d7 100644 (file)
@@ -1,23 +1,29 @@
 /*
- * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
  *
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
  * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
  * 
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /*
  * @OSF_FREE_COPYRIGHT@
 
 #include <kern/kern_types.h>
 #include <kern/simple_lock.h>
-#include <kern/kalloc.h>
+#include <kern/zalloc.h>
 #include <kern/queue.h>
 #include <kern/spl.h>
 #include <mach/sync_policy.h>
+#include <kern/mach_param.h>
 #include <kern/sched_prim.h>
 
 #include <kern/wait_queue.h>
+#include <vm/vm_kern.h>
+
+/* forward declarations */
+static boolean_t wait_queue_member_locked(
+                       wait_queue_t            wq,
+                       wait_queue_set_t        wq_set);
+
+static void wait_queues_init(void) __attribute__((section("__TEXT, initcode")));
+
+
+#define WAIT_QUEUE_MAX thread_max
+#define WAIT_QUEUE_SET_MAX task_max * 3
+#define WAIT_QUEUE_LINK_MAX PORT_MAX / 2 + (WAIT_QUEUE_MAX * WAIT_QUEUE_SET_MAX) / 64
+
+static zone_t _wait_queue_link_zone;
+static zone_t _wait_queue_set_zone;
+static zone_t _wait_queue_zone;
+
+/* see rdar://6737748&5561610; we need an unshadowed
+ * definition of a WaitQueueLink for debugging,
+ * but it needs to be used somewhere to wind up in
+ * the dSYM file. */
+volatile WaitQueueLink *unused_except_for_debugging;
+
+
+/*
+ *     Waiting protocols and implementation:
+ *
+ *     Each thread may be waiting for exactly one event; this event
+ *     is set using assert_wait().  That thread may be awakened either
+ *     by performing a thread_wakeup_prim() on its event,
+ *     or by directly waking that thread up with clear_wait().
+ *
+ *     The implementation of wait events uses a hash table.  Each
+ *     bucket is queue of threads having the same hash function
+ *     value; the chain for the queue (linked list) is the run queue
+ *     field.  [It is not possible to be waiting and runnable at the
+ *     same time.]
+ *
+ *     Locks on both the thread and on the hash buckets govern the
+ *     wait event field and the queue chain field.  Because wakeup
+ *     operations only have the event as an argument, the event hash
+ *     bucket must be locked before any thread.
+ *
+ *     Scheduling operations may also occur at interrupt level; therefore,
+ *     interrupts below splsched() must be prevented when holding
+ *     thread or hash bucket locks.
+ *
+ *     The wait event hash table declarations are as follows:
+ */
+
+struct wait_queue boot_wait_queue[1];
+__private_extern__ struct wait_queue *wait_queues = &boot_wait_queue[0];
+
+__private_extern__ uint32_t num_wait_queues = 1;
+
+static uint32_t
+compute_wait_hash_size(__unused unsigned cpu_count, __unused uint64_t memsize) {
+       uint32_t hsize = (uint32_t)round_page_64((thread_max / 11) * sizeof(struct wait_queue));
+       uint32_t bhsize;
+       
+       if (PE_parse_boot_argn("wqsize", &bhsize, sizeof(bhsize)))
+               hsize = bhsize;
+
+       return hsize;
+}
+
+static void
+wait_queues_init(void)
+{
+       uint32_t        i, whsize;
+       kern_return_t   kret;
+
+       whsize = compute_wait_hash_size(processor_avail_count, machine_info.max_mem);
+       num_wait_queues = (whsize / ((uint32_t)sizeof(struct wait_queue))) - 1;
+
+       kret = kernel_memory_allocate(kernel_map, (vm_offset_t *) &wait_queues, whsize, 0, KMA_KOBJECT|KMA_NOPAGEWAIT);
+
+       if (kret != KERN_SUCCESS || wait_queues == NULL)
+               panic("kernel_memory_allocate() failed to allocate wait queues, error: %d, whsize: 0x%x", kret, whsize);
+
+       for (i = 0; i < num_wait_queues; i++) {
+               wait_queue_init(&wait_queues[i], SYNC_POLICY_FIFO);
+       }
+}
+
+void
+wait_queue_bootstrap(void)
+{
+       wait_queues_init();
+       _wait_queue_zone = zinit(sizeof(struct wait_queue),
+                                     WAIT_QUEUE_MAX * sizeof(struct wait_queue),
+                                     sizeof(struct wait_queue),
+                                     "wait queues");
+       _wait_queue_set_zone = zinit(sizeof(struct wait_queue_set),
+                                     WAIT_QUEUE_SET_MAX * sizeof(struct wait_queue_set),
+                                     sizeof(struct wait_queue_set),
+                                     "wait queue sets");
+       _wait_queue_link_zone = zinit(sizeof(struct _wait_queue_link),
+                                     WAIT_QUEUE_LINK_MAX * sizeof(struct _wait_queue_link),
+                                     sizeof(struct _wait_queue_link),
+                                     "wait queue links");
+}
 
 /*
  *     Routine:        wait_queue_init
@@ -82,10 +192,11 @@ wait_queue_init(
        wait_queue_t wq,
        int policy)
 {
-       if (!((policy & SYNC_POLICY_ORDER_MASK) == SYNC_POLICY_FIFO))
+       /* only FIFO and LIFO for now */
+       if ((policy & SYNC_POLICY_FIXED_PRIORITY) != 0)
                return KERN_INVALID_ARGUMENT;
 
-       wq->wq_fifo = TRUE;
+       wq->wq_fifo = ((policy & SYNC_POLICY_REVERSED) == 0);
        wq->wq_type = _WAIT_QUEUE_inited;
        queue_init(&wq->wq_queue);
        hw_lock_init(&wq->wq_interlock);
@@ -110,11 +221,11 @@ wait_queue_alloc(
        wait_queue_t wq;
        kern_return_t ret;
 
-       wq = (wait_queue_t) kalloc(sizeof(struct wait_queue));
+       wq = (wait_queue_t) zalloc(_wait_queue_zone);
        if (wq != WAIT_QUEUE_NULL) {
                ret = wait_queue_init(wq, policy);
                if (ret != KERN_SUCCESS) {
-                       kfree((vm_offset_t)wq, sizeof(struct wait_queue));
+                       zfree(_wait_queue_zone, wq);
                        wq = WAIT_QUEUE_NULL;
                }
        }
@@ -136,7 +247,7 @@ wait_queue_free(
                return KERN_INVALID_ARGUMENT;
        if (!queue_empty(&wq->wq_queue))
                return KERN_FAILURE;
-       kfree((vm_offset_t)wq, sizeof(struct wait_queue));
+       zfree(_wait_queue_zone, wq);
        return KERN_SUCCESS;
 }
 
@@ -161,15 +272,15 @@ wait_queue_set_init(
 
        wqset->wqs_wait_queue.wq_type = _WAIT_QUEUE_SET_inited;
        if (policy & SYNC_POLICY_PREPOST)
-               wqset->wqs_wait_queue.wq_isprepost = TRUE;
+               wqset->wqs_wait_queue.wq_prepost = TRUE;
        else 
-               wqset->wqs_wait_queue.wq_isprepost = FALSE;
+               wqset->wqs_wait_queue.wq_prepost = FALSE;
        queue_init(&wqset->wqs_setlinks);
-       wqset->wqs_refcount = 0;
+       queue_init(&wqset->wqs_preposts);
        return KERN_SUCCESS;
 }
 
-/* legacy API */
+
 kern_return_t
 wait_queue_sub_init(
        wait_queue_set_t wqset,
@@ -178,6 +289,29 @@ wait_queue_sub_init(
        return wait_queue_set_init(wqset, policy);
 }
 
+kern_return_t
+wait_queue_sub_clearrefs(
+        wait_queue_set_t wq_set)
+{
+       wait_queue_link_t wql;
+       queue_t q;
+       spl_t s;
+
+       if (!wait_queue_is_set(wq_set))
+               return KERN_INVALID_ARGUMENT;
+
+       s = splsched();
+       wqs_lock(wq_set);
+       q = &wq_set->wqs_preposts;
+       while (!queue_empty(q)) {
+               queue_remove_first(q, wql, wait_queue_link_t, wql_preposts);
+               assert(!wql_is_preposted(wql));
+       }
+       wqs_unlock(wq_set);
+       splx(s);
+       return KERN_SUCCESS;
+}
+
 /*
  *     Routine:        wait_queue_set_alloc
  *     Purpose:
@@ -195,13 +329,13 @@ wait_queue_set_alloc(
 {
        wait_queue_set_t wq_set;
 
-       wq_set = (wait_queue_set_t) kalloc(sizeof(struct wait_queue_set));
+       wq_set = (wait_queue_set_t) zalloc(_wait_queue_set_zone);
        if (wq_set != WAIT_QUEUE_SET_NULL) {
                kern_return_t ret;
 
                ret = wait_queue_set_init(wq_set, policy);
                if (ret != KERN_SUCCESS) {
-                       kfree((vm_offset_t)wq_set, sizeof(struct wait_queue_set));
+                       zfree(_wait_queue_set_zone, wq_set);
                        wq_set = WAIT_QUEUE_SET_NULL;
                }
        }
@@ -225,22 +359,10 @@ wait_queue_set_free(
        if (!queue_empty(&wq_set->wqs_wait_queue.wq_queue))
                return KERN_FAILURE;
 
-       kfree((vm_offset_t)wq_set, sizeof(struct wait_queue_set));
+       zfree(_wait_queue_set_zone, wq_set);
        return KERN_SUCCESS;
 }
 
-kern_return_t
-wait_queue_sub_clearrefs(
-        wait_queue_set_t wq_set)
-{
-       if (!wait_queue_is_set(wq_set))
-               return KERN_INVALID_ARGUMENT;
-
-       wqs_lock(wq_set);
-       wq_set->wqs_refcount = 0;
-       wqs_unlock(wq_set);
-       return KERN_SUCCESS;
-}
 
 /*
  *     
@@ -254,9 +376,11 @@ unsigned int wait_queue_link_size(void) { return sizeof(WaitQueueLink); }
 
 /* declare a unique type for wait queue link structures */
 static unsigned int _wait_queue_link;
+static unsigned int _wait_queue_link_noalloc;
 static unsigned int _wait_queue_unlinked;
 
 #define WAIT_QUEUE_LINK ((void *)&_wait_queue_link)
+#define WAIT_QUEUE_LINK_NOALLOC ((void *)&_wait_queue_link_noalloc)
 #define WAIT_QUEUE_UNLINKED ((void *)&_wait_queue_unlinked)
 
 #define WAIT_QUEUE_ELEMENT_CHECK(wq, wqe) \
@@ -274,12 +398,14 @@ static unsigned int _wait_queue_unlinked;
                        (queue_t)(wql) : &(wql)->wql_setlinks)))
 
 #define WAIT_QUEUE_SET_LINK_CHECK(wqs, wql) \
-               WQASSERT((((wql)->wql_type == WAIT_QUEUE_LINK) && \
+               WQASSERT(((((wql)->wql_type == WAIT_QUEUE_LINK) || \
+                          ((wql)->wql_type == WAIT_QUEUE_LINK_NOALLOC)) && \
                        ((wql)->wql_setqueue == (wqs)) && \
-                       ((wql)->wql_queue->wq_type == _WAIT_QUEUE_inited) && \
+                       (((wql)->wql_queue->wq_type == _WAIT_QUEUE_inited) || \
+                        ((wql)->wql_queue->wq_type == _WAIT_QUEUE_SET_inited)) && \
                        (WQSNEXT((wqs), WQSPREV((wqs),(wql))) == (wql))), \
                        "wait queue set links corruption: wqs=%#x, wql=%#x", \
-                       (wqs), (wql))
+                        (wqs), (wql))
 
 #if defined(_WAIT_QUEUE_DEBUG_)
 
@@ -322,7 +448,7 @@ MACRO_END
  *             The wait queue is locked
  *             The set queue is just that, a set queue
  */
-__private_extern__ boolean_t
+static boolean_t
 wait_queue_member_locked(
        wait_queue_t wq,
        wait_queue_set_t wq_set)
@@ -338,7 +464,8 @@ wait_queue_member_locked(
        wq_element = (wait_queue_element_t) queue_first(q);
        while (!queue_end(q, (queue_entry_t)wq_element)) {
                WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
-               if ((wq_element->wqe_type == WAIT_QUEUE_LINK)) {
+               if ((wq_element->wqe_type == WAIT_QUEUE_LINK) ||
+                   (wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC)) {
                        wait_queue_link_t wql = (wait_queue_link_t)wq_element;
 
                        if (wql->wql_setqueue == wq_set)
@@ -380,16 +507,18 @@ wait_queue_member(
 
 
 /*
- *     Routine:        wait_queue_link_noalloc
+ *     Routine:        wait_queue_link_internal
  *     Purpose:
  *             Insert a set wait queue into a wait queue.  This
  *             requires us to link the two together using a wait_queue_link
- *             structure that we allocate.
+ *             structure that was provided.
  *     Conditions:
  *             The wait queue being inserted must be inited as a set queue
+ *             The wait_queue_link structure must already be properly typed
  */
+static 
 kern_return_t
-wait_queue_link_noalloc(
+wait_queue_link_internal(
        wait_queue_t wq,
        wait_queue_set_t wq_set,
        wait_queue_link_t wql)
@@ -398,13 +527,13 @@ wait_queue_link_noalloc(
        queue_t q;
        spl_t s;
 
-       if (!wait_queue_is_queue(wq) || !wait_queue_is_set(wq_set))
+       if (!wait_queue_is_valid(wq) || !wait_queue_is_set(wq_set))
                return KERN_INVALID_ARGUMENT;
 
        /*
-        * There are probably less threads and sets associated with
-        * the wait queue, then there are wait queues associated with
-        * the set.  So lets validate it that way.
+        * There are probably fewer threads and sets associated with
+        * the wait queue than there are wait queues associated with
+        * the set.  So let's validate it that way.
         */
        s = splsched();
        wait_queue_lock(wq);
@@ -412,7 +541,8 @@ wait_queue_link_noalloc(
        wq_element = (wait_queue_element_t) queue_first(q);
        while (!queue_end(q, (queue_entry_t)wq_element)) {
                WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
-               if (wq_element->wqe_type == WAIT_QUEUE_LINK &&
+               if ((wq_element->wqe_type == WAIT_QUEUE_LINK ||
+                    wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) &&
                    ((wait_queue_link_t)wq_element)->wql_setqueue == wq_set) {
                        wait_queue_unlock(wq);
                        splx(s);
@@ -429,11 +559,14 @@ wait_queue_link_noalloc(
 
        WAIT_QUEUE_SET_CHECK(wq_set);
 
+       assert(wql->wql_type == WAIT_QUEUE_LINK ||
+              wql->wql_type == WAIT_QUEUE_LINK_NOALLOC);
+
        wql->wql_queue = wq;
+       wql_clear_prepost(wql);
        queue_enter(&wq->wq_queue, wql, wait_queue_link_t, wql_links);
        wql->wql_setqueue = wq_set;
        queue_enter(&wq_set->wqs_setlinks, wql, wait_queue_link_t, wql_setlinks);
-       wql->wql_type = WAIT_QUEUE_LINK;
 
        wqs_unlock(wq_set);
        wait_queue_unlock(wq);
@@ -442,6 +575,25 @@ wait_queue_link_noalloc(
        return KERN_SUCCESS;
 }      
 
+/*
+ *     Routine:        wait_queue_link_noalloc
+ *     Purpose:
+ *             Insert a set wait queue into a wait queue.  This
+ *             requires us to link the two together using a wait_queue_link
+ *             structure that we allocate.
+ *     Conditions:
+ *             The wait queue being inserted must be inited as a set queue
+ */
+kern_return_t
+wait_queue_link_noalloc(
+       wait_queue_t wq,
+       wait_queue_set_t wq_set,
+       wait_queue_link_t wql)
+{
+       wql->wql_type = WAIT_QUEUE_LINK_NOALLOC;
+       return wait_queue_link_internal(wq, wq_set, wql);
+}
+
 /*
  *     Routine:        wait_queue_link
  *     Purpose:
@@ -459,20 +611,21 @@ wait_queue_link(
        wait_queue_link_t wql;
        kern_return_t ret;
 
-       wql = (wait_queue_link_t) kalloc(sizeof(struct wait_queue_link));
+       wql = (wait_queue_link_t) zalloc(_wait_queue_link_zone);
        if (wql == WAIT_QUEUE_LINK_NULL)
                return KERN_RESOURCE_SHORTAGE;
 
-       ret = wait_queue_link_noalloc(wq, wq_set, wql);
+       wql->wql_type = WAIT_QUEUE_LINK;
+       ret = wait_queue_link_internal(wq, wq_set, wql);
        if (ret != KERN_SUCCESS)
-               kfree((vm_offset_t)wql, sizeof(struct wait_queue_link));
+               zfree(_wait_queue_link_zone, wql);
 
        return ret;
 }      
 
 
 /*
- *     Routine:        wait_queue_unlink_nofree
+ *     Routine:        wait_queue_unlink_locked
  *     Purpose:
  *             Undo the linkage between a wait queue and a set.
  */
@@ -489,6 +642,10 @@ wait_queue_unlink_locked(
        queue_remove(&wq->wq_queue, wql, wait_queue_link_t, wql_links);
        wql->wql_setqueue = WAIT_QUEUE_SET_NULL;
        queue_remove(&wq_set->wqs_setlinks, wql, wait_queue_link_t, wql_setlinks);
+       if (wql_is_preposted(wql)) {
+               queue_t ppq = &wq_set->wqs_preposts;
+               queue_remove(ppq, wql, wait_queue_link_t, wql_preposts);
+       }
        wql->wql_type = WAIT_QUEUE_UNLINKED;
 
        WAIT_QUEUE_CHECK(wq);
@@ -513,7 +670,7 @@ wait_queue_unlink(
        queue_t q;
        spl_t s;
 
-       if (!wait_queue_is_queue(wq) || !wait_queue_is_set(wq_set)) {
+       if (!wait_queue_is_valid(wq) || !wait_queue_is_set(wq_set)) {
                return KERN_INVALID_ARGUMENT;
        }
        s = splsched();
@@ -523,16 +680,22 @@ wait_queue_unlink(
        wq_element = (wait_queue_element_t) queue_first(q);
        while (!queue_end(q, (queue_entry_t)wq_element)) {
                WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
-               if (wq_element->wqe_type == WAIT_QUEUE_LINK) {
+               if (wq_element->wqe_type == WAIT_QUEUE_LINK ||
+                   wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) {
+
                        wql = (wait_queue_link_t)wq_element;
                        
                        if (wql->wql_setqueue == wq_set) {
+                               boolean_t alloced;
+
+                               alloced = (wql->wql_type == WAIT_QUEUE_LINK);
                                wqs_lock(wq_set);
                                wait_queue_unlink_locked(wq, wq_set, wql);
                                wqs_unlock(wq_set);
                                wait_queue_unlock(wq);
                                splx(s);
-                               kfree((vm_offset_t)wql, sizeof(struct wait_queue_link));
+                               if (alloced)
+                                       zfree(_wait_queue_link_zone, wql);
                                return KERN_SUCCESS;
                        }
                }
@@ -544,65 +707,12 @@ wait_queue_unlink(
        return KERN_NOT_IN_SET;
 }      
 
-
-/*
- *     Routine:        wait_queue_unlinkall_nofree
- *     Purpose:
- *             Remove the linkage between a wait queue and all its
- *             sets. The caller is responsible for freeing
- *             the wait queue link structures.
- */
-
-kern_return_t
-wait_queue_unlinkall_nofree(
-       wait_queue_t wq)
-{
-       wait_queue_element_t wq_element;
-       wait_queue_element_t wq_next_element;
-       wait_queue_set_t wq_set;
-       wait_queue_link_t wql;
-       queue_head_t links_queue_head;
-       queue_t links = &links_queue_head;
-       queue_t q;
-       spl_t s;
-
-       if (!wait_queue_is_queue(wq)) {
-               return KERN_INVALID_ARGUMENT;
-       }
-
-       queue_init(links);
-
-       s = splsched();
-       wait_queue_lock(wq);
-
-       q = &wq->wq_queue;
-
-       wq_element = (wait_queue_element_t) queue_first(q);
-       while (!queue_end(q, (queue_entry_t)wq_element)) {
-               WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
-               wq_next_element = (wait_queue_element_t)
-                            queue_next((queue_t) wq_element);
-
-               if (wq_element->wqe_type == WAIT_QUEUE_LINK) {
-                       wql = (wait_queue_link_t)wq_element;
-                       wq_set = wql->wql_setqueue;
-                       wqs_lock(wq_set);
-                       wait_queue_unlink_locked(wq, wq_set, wql);
-                       wqs_unlock(wq_set);
-               }
-               wq_element = wq_next_element;
-       }
-       wait_queue_unlock(wq);
-       splx(s);
-       return(KERN_SUCCESS);
-}      
-
-
 /*
  *     Routine:        wait_queue_unlink_all
  *     Purpose:
- *             Remove the linkage between a wait queue and all its     sets.
- *             All the linkage structures are freed.
+ *             Remove the linkage between a wait queue and all its sets.
+ *             All the linkage structures that were allocated internally
+ *             are freed.  The others are the caller's responsibility.
  *     Conditions:
  *             Nothing of interest locked.
  */
@@ -620,7 +730,7 @@ wait_queue_unlink_all(
        queue_t q;
        spl_t s;
 
-       if (!wait_queue_is_queue(wq)) {
+       if (!wait_queue_is_valid(wq)) {
                return KERN_INVALID_ARGUMENT;
        }
 
@@ -633,17 +743,21 @@ wait_queue_unlink_all(
 
        wq_element = (wait_queue_element_t) queue_first(q);
        while (!queue_end(q, (queue_entry_t)wq_element)) {
+               boolean_t alloced;
+
                WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
                wq_next_element = (wait_queue_element_t)
                             queue_next((queue_t) wq_element);
 
-               if (wq_element->wqe_type == WAIT_QUEUE_LINK) {
+               alloced = (wq_element->wqe_type == WAIT_QUEUE_LINK);
+               if (alloced || wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) {
                        wql = (wait_queue_link_t)wq_element;
                        wq_set = wql->wql_setqueue;
                        wqs_lock(wq_set);
                        wait_queue_unlink_locked(wq, wq_set, wql);
                        wqs_unlock(wq_set);
-                       enqueue(links, &wql->wql_links);
+                       if (alloced)
+                               enqueue(links, &wql->wql_links);
                }
                wq_element = wq_next_element;
        }
@@ -652,69 +766,18 @@ wait_queue_unlink_all(
 
        while(!queue_empty(links)) {
                wql = (wait_queue_link_t) dequeue(links);
-               kfree((vm_offset_t) wql, sizeof(struct wait_queue_link));
+               zfree(_wait_queue_link_zone, wql);
        }
 
        return(KERN_SUCCESS);
 }      
 
-/*
- *     Routine:        wait_queue_set_unlink_all_nofree
- *     Purpose:
- *             Remove the linkage between a set wait queue and all its
- *             member wait queues. The link structures are not freed, nor
- *             returned. It is the caller's responsibility to track and free
- *             them.
- *     Conditions:
- *             The wait queue being must be a member set queue
- */
-kern_return_t
-wait_queue_set_unlink_all_nofree(
-       wait_queue_set_t wq_set)
-{
-       wait_queue_link_t wql;
-       wait_queue_t wq;
-       queue_t q;
-       kern_return_t kret;
-       spl_t s;
-
-       if (!wait_queue_is_set(wq_set)) {
-               return KERN_INVALID_ARGUMENT;
-       }
-
-retry:
-       s = splsched();
-       wqs_lock(wq_set);
-
-       q = &wq_set->wqs_setlinks;
-
-       wql = (wait_queue_link_t)queue_first(q);
-       while (!queue_end(q, (queue_entry_t)wql)) {
-               WAIT_QUEUE_SET_LINK_CHECK(wq_set, wql);
-               wq = wql->wql_queue;
-               if (wait_queue_lock_try(wq)) {
-                       wait_queue_unlink_locked(wq, wq_set, wql);
-                       wait_queue_unlock(wq);
-                       wql = (wait_queue_link_t)queue_first(q);
-               } else {
-                       wqs_unlock(wq_set);
-                       splx(s);
-                       delay(1);
-                       goto retry;
-               }
-       }
-       wqs_unlock(wq_set);
-       splx(s);
-
-       return(KERN_SUCCESS);
-}      
-
 /* legacy interface naming */
 kern_return_t
 wait_subqueue_unlink_all(
        wait_queue_set_t        wq_set)
 {
-       return wait_queue_set_unlink_all_nofree(wq_set);
+       return wait_queue_set_unlink_all(wq_set);
 }
 
 
@@ -722,7 +785,8 @@ wait_subqueue_unlink_all(
  *     Routine:        wait_queue_set_unlink_all
  *     Purpose:
  *             Remove the linkage between a set wait queue and all its
- *             member wait queues. The link structures are freed.
+ *             member wait queues. The link structures are freed for those
+ *             links which were dynamically allocated.
  *     Conditions:
  *             The wait queue must be a set
  */
@@ -735,7 +799,6 @@ wait_queue_set_unlink_all(
        queue_t q;
        queue_head_t links_queue_head;
        queue_t links = &links_queue_head;
-       kern_return_t kret;
        spl_t s;
 
        if (!wait_queue_is_set(wq_set)) {
@@ -755,9 +818,13 @@ retry:
                WAIT_QUEUE_SET_LINK_CHECK(wq_set, wql);
                wq = wql->wql_queue;
                if (wait_queue_lock_try(wq)) {
+                       boolean_t alloced;
+
+                       alloced = (wql->wql_type == WAIT_QUEUE_LINK);
                        wait_queue_unlink_locked(wq, wq_set, wql);
                        wait_queue_unlock(wq);
-                       enqueue(links, &wql->wql_links);
+                       if (alloced)
+                               enqueue(links, &wql->wql_links);
                        wql = (wait_queue_link_t)queue_first(q);
                } else {
                        wqs_unlock(wq_set);
@@ -771,59 +838,11 @@ retry:
 
        while (!queue_empty (links)) {
                wql = (wait_queue_link_t) dequeue(links);
-               kfree((vm_offset_t)wql, sizeof(struct wait_queue_link));
+               zfree(_wait_queue_link_zone, wql);
        }
        return(KERN_SUCCESS);
 }      
 
-
-/*
- *     Routine:        wait_queue_unlink_one
- *     Purpose:
- *             Find and unlink one set wait queue
- *     Conditions:
- *             Nothing of interest locked.
- */
-void
-wait_queue_unlink_one(
-       wait_queue_t wq,
-       wait_queue_set_t *wq_setp)
-{
-       wait_queue_element_t wq_element;
-       queue_t q;
-       spl_t s;
-
-       s = splsched();
-       wait_queue_lock(wq);
-
-       q = &wq->wq_queue;
-       
-       wq_element = (wait_queue_element_t) queue_first(q);
-       while (!queue_end(q, (queue_entry_t)wq_element)) {
-
-               if (wq_element->wqe_type == WAIT_QUEUE_LINK) {
-                       wait_queue_link_t wql = (wait_queue_link_t)wq_element;
-                       wait_queue_set_t wq_set = wql->wql_setqueue;
-                       
-                       wqs_lock(wq_set);
-                       wait_queue_unlink_locked(wq, wq_set, wql);
-                       wqs_unlock(wq_set);
-                       wait_queue_unlock(wq);
-                       splx(s);
-                       kfree((vm_offset_t)wql,sizeof(struct wait_queue_link));
-                       *wq_setp = wq_set;
-                       return;
-               }
-
-               wq_element = (wait_queue_element_t)
-                       queue_next((queue_t) wq_element);
-       }
-       wait_queue_unlock(wq);
-       splx(s);
-       *wq_setp = WAIT_QUEUE_SET_NULL;
-}
-
-
 /*
  *     Routine:        wait_queue_assert_wait64_locked
  *     Purpose:
@@ -840,6 +859,7 @@ wait_queue_assert_wait64_locked(
        wait_queue_t wq,
        event64_t event,
        wait_interrupt_t interruptible,
+       uint64_t deadline,
        thread_t thread)
 {
        wait_result_t wait_result;
@@ -850,7 +870,7 @@ wait_queue_assert_wait64_locked(
        if (wq->wq_type == _WAIT_QUEUE_SET_inited) {
                wait_queue_set_t wqs = (wait_queue_set_t)wq;
 
-               if (wqs->wqs_isprepost && wqs->wqs_refcount > 0)
+               if (event == NO_EVENT64 && wqs_is_preposted(wqs))
                        return(THREAD_AWAKENED);
        }
          
@@ -862,12 +882,19 @@ wait_queue_assert_wait64_locked(
         */
        wait_result = thread_mark_wait_locked(thread, interruptible);
        if (wait_result == THREAD_WAITING) {
-               if (thread->vm_privilege)
+               if (!wq->wq_fifo || thread->options & TH_OPT_VMPRIV)
                        enqueue_head(&wq->wq_queue, (queue_entry_t) thread);
                else
                        enqueue_tail(&wq->wq_queue, (queue_entry_t) thread);
+
                thread->wait_event = event;
                thread->wait_queue = wq;
+
+               if (deadline != 0) {
+                       if (!timer_call_enter(&thread->wait_timer, deadline))
+                               thread->wait_timer_active++;
+                       thread->wait_timer_is_set = TRUE;
+               }
        }
        return(wait_result);
 }
@@ -885,25 +912,23 @@ wait_result_t
 wait_queue_assert_wait(
        wait_queue_t wq,
        event_t event,
-       wait_interrupt_t interruptible)
+       wait_interrupt_t interruptible,
+       uint64_t deadline)
 {
        spl_t s;
        wait_result_t ret;
-       thread_t cur_thread = current_thread();
+       thread_t thread = current_thread();
 
        /* If it is an invalid wait queue, you can't wait on it */
-       if (!wait_queue_is_valid(wq)) {
-               thread_t thread = current_thread();
+       if (!wait_queue_is_valid(wq))
                return (thread->wait_result = THREAD_RESTART);
-       }
 
        s = splsched();
        wait_queue_lock(wq);
-       thread_lock(cur_thread);
-       ret = wait_queue_assert_wait64_locked(
-                               wq, (event64_t)((uint32_t)event),
-                               interruptible, cur_thread);
-       thread_unlock(cur_thread);
+       thread_lock(thread);
+       ret = wait_queue_assert_wait64_locked(wq, CAST_DOWN(event64_t,event),
+                                                                                       interruptible, deadline, thread);
+       thread_unlock(thread);
        wait_queue_unlock(wq);
        splx(s);
        return(ret);
@@ -921,29 +946,27 @@ wait_result_t
 wait_queue_assert_wait64(
        wait_queue_t wq,
        event64_t event,
-       wait_interrupt_t interruptible)
+       wait_interrupt_t interruptible,
+       uint64_t deadline)
 {
        spl_t s;
        wait_result_t ret;
-       thread_t cur_thread = current_thread();
+       thread_t thread = current_thread();
 
        /* If it is an invalid wait queue, you cant wait on it */
-       if (!wait_queue_is_valid(wq)) {
-               thread_t thread = current_thread();
+       if (!wait_queue_is_valid(wq))
                return (thread->wait_result = THREAD_RESTART);
-       }
 
        s = splsched();
        wait_queue_lock(wq);
-       thread_lock(cur_thread);
-       ret = wait_queue_assert_wait64_locked(wq, event, interruptible, cur_thread);
-       thread_unlock(cur_thread);
+       thread_lock(thread);
+       ret = wait_queue_assert_wait64_locked(wq, event, interruptible, deadline, thread);
+       thread_unlock(thread);
        wait_queue_unlock(wq);
        splx(s);
        return(ret);
 }
 
-
 /*
  *     Routine:        _wait_queue_select64_all
  *     Purpose:
@@ -978,27 +1001,24 @@ _wait_queue_select64_all(
                /*
                 * We may have to recurse if this is a compound wait queue.
                 */
-               if (wq_element->wqe_type == WAIT_QUEUE_LINK) {
+               if (wq_element->wqe_type == WAIT_QUEUE_LINK ||
+                   wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) {
                        wait_queue_link_t wql = (wait_queue_link_t)wq_element;
-                       wait_queue_t set_queue;
+                       wait_queue_set_t set_queue = wql->wql_setqueue;
 
                        /*
-                        * We have to check the set wait queue.
+                        * We have to check the set wait queue. If it is marked
+                        * as pre-post, and it is the "generic event" then mark
+                        * it pre-posted now (if not already).
                         */
-                       set_queue = (wait_queue_t)wql->wql_setqueue;
-                       wait_queue_lock(set_queue);
-                       if (set_queue->wq_isprepost) {
-                               wait_queue_set_t wqs = (wait_queue_set_t)set_queue;
-                               
-                               /*
-                                * Preposting is only for sets and wait queue
-                                * is the first element of set 
-                                */
-                               wqs->wqs_refcount++;
+                       wqs_lock(set_queue);
+                       if (event == NO_EVENT64 && set_queue->wqs_prepost && !wql_is_preposted(wql)) {
+                               queue_t ppq = &set_queue->wqs_preposts;
+                               queue_enter(ppq, wql, wait_queue_link_t, wql_preposts);
                        }
-                       if (! wait_queue_empty(set_queue)) 
-                               _wait_queue_select64_all(set_queue, event, wake_queue);
-                       wait_queue_unlock(set_queue);
+                       if (! wait_queue_empty(&set_queue->wqs_wait_queue)) 
+                               _wait_queue_select64_all(&set_queue->wqs_wait_queue, event, wake_queue);
+                       wqs_unlock(set_queue);
                } else {
                        
                        /*
@@ -1044,7 +1064,11 @@ wait_queue_wakeup64_all_locked(
        queue_t q = &wake_queue_head;
        kern_return_t res;
 
-       assert(wait_queue_held(wq));
+//     assert(wait_queue_held(wq));
+//     if(!wq->wq_interlock.lock_data) {               /* (BRINGUP */
+//             panic("wait_queue_wakeup64_all_locked: lock not held on %p\n", wq);     /* (BRINGUP) */
+//     }
+
        queue_init(q);
 
        /*
@@ -1062,7 +1086,7 @@ wait_queue_wakeup64_all_locked(
        res = KERN_NOT_WAITING;
        while (!queue_empty (q)) {
                thread_t thread = (thread_t) dequeue(q);
-               res = thread_go_locked(thread, result);
+               res = thread_go(thread, result);
                assert(res == KERN_SUCCESS);
                thread_unlock(thread);
        }
@@ -1096,8 +1120,11 @@ wait_queue_wakeup_all(
 
        s = splsched();
        wait_queue_lock(wq);
+//     if(!wq->wq_interlock.lock_data) {               /* (BRINGUP */
+//             panic("wait_queue_wakeup_all: we did not get the lock on %p\n", wq);    /* (BRINGUP) */
+//     }
        ret = wait_queue_wakeup64_all_locked(
-                               wq, (event64_t)((uint32_t)event),
+                               wq, CAST_DOWN(event64_t,event),
                                result, TRUE);
        /* lock released */
        splx(s);
@@ -1149,7 +1176,7 @@ wait_queue_wakeup64_all(
  *             a locked thread - if one found
  *     Note:
  *             This is where the sync policy of the wait queue comes
- *             into effect.  For now, we just assume FIFO.
+ *             into effect.  For now, we just assume FIFO/LIFO.
  */
 static thread_t
 _wait_queue_select64_one(
@@ -1161,8 +1188,6 @@ _wait_queue_select64_one(
        thread_t t = THREAD_NULL;
        queue_t q;
 
-       assert(wq->wq_fifo);
-
        q = &wq->wq_queue;
 
        wq_element = (wait_queue_element_t) queue_first(q);
@@ -1174,21 +1199,34 @@ _wait_queue_select64_one(
                /*
                 * We may have to recurse if this is a compound wait queue.
                 */
-               if (wq_element->wqe_type == WAIT_QUEUE_LINK) {
+               if (wq_element->wqe_type == WAIT_QUEUE_LINK ||
+                   wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) {
                        wait_queue_link_t wql = (wait_queue_link_t)wq_element;
-                       wait_queue_t set_queue;
+                       wait_queue_set_t set_queue = wql->wql_setqueue;
 
                        /*
-                        * We have to check the set wait queue.
+                        * We have to check the set wait queue. If the set
+                        * supports pre-posting, it isn't already preposted,
+                        * and we didn't find a thread in the set, then mark it.
+                        *
+                        * If we later find a thread, there may be a spurious
+                        * pre-post here on this set.  The wait side has to check
+                        * for that either pre- or post-wait.
                         */
-                       set_queue = (wait_queue_t)wql->wql_setqueue;
-                       wait_queue_lock(set_queue);
-                       if (! wait_queue_empty(set_queue)) {
-                               t = _wait_queue_select64_one(set_queue, event);
+                       wqs_lock(set_queue);
+                       if (! wait_queue_empty(&set_queue->wqs_wait_queue)) {
+                               t = _wait_queue_select64_one(&set_queue->wqs_wait_queue, event);
                        }
-                       wait_queue_unlock(set_queue);
-                       if (t != THREAD_NULL)
+                       if (t != THREAD_NULL) {
+                               wqs_unlock(set_queue);
                                return t;
+                       }
+                       if (event == NO_EVENT64 && set_queue->wqs_prepost && !wql_is_preposted(wql)) {
+                               queue_t ppq = &set_queue->wqs_preposts;
+                               queue_enter(ppq, wql, wait_queue_link_t, wql_preposts);
+                       }
+                       wqs_unlock(set_queue);
+
                } else {
                        
                        /*
@@ -1196,8 +1234,7 @@ _wait_queue_select64_one(
                         * the event we are posting to this queue, pull
                         * it off the queue and stick it in out wake_queue.
                         */
-                       thread_t t = (thread_t)wq_element;
-
+                       t = (thread_t)wq_element;
                        if (t->wait_event == event) {
                                thread_lock(t);
                                remqueue(q, (queue_entry_t) t);
@@ -1206,101 +1243,20 @@ _wait_queue_select64_one(
                                t->at_safe_point = FALSE;
                                return t;       /* still locked */
                        }
+
+                       t = THREAD_NULL;
                }
                wq_element = wqe_next;
        }
        return THREAD_NULL;
 }
 
-/*
- *     Routine:        wait_queue_peek64_locked
- *     Purpose:
- *             Select the best thread from a wait queue that meet the
- *             supplied criteria, but leave it on the queue it was
- *             found on.  The thread, and the actual wait_queue the
- *             thread was found on are identified.
- *     Conditions:
- *             at splsched
- *             wait queue locked
- *             possibly recursive
- *     Returns:
- *             a locked thread - if one found
- *             a locked waitq - the one the thread was found on
- *     Note:
- *             Both the waitq the thread was actually found on, and
- *             the supplied wait queue, are locked after this.
- */
-__private_extern__ void
-wait_queue_peek64_locked(
-       wait_queue_t wq,
-       event64_t event,
-       thread_t *tp,
-       wait_queue_t *wqp)
-{
-       wait_queue_element_t wq_element;
-       wait_queue_element_t wqe_next;
-       thread_t t;
-       queue_t q;
-
-       assert(wq->wq_fifo);
-
-       *tp = THREAD_NULL;
-
-       q = &wq->wq_queue;
-
-       wq_element = (wait_queue_element_t) queue_first(q);
-       while (!queue_end(q, (queue_entry_t)wq_element)) {
-               WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
-               wqe_next = (wait_queue_element_t)
-                              queue_next((queue_t) wq_element);
-
-               /*
-                * We may have to recurse if this is a compound wait queue.
-                */
-               if (wq_element->wqe_type == WAIT_QUEUE_LINK) {
-                       wait_queue_link_t wql = (wait_queue_link_t)wq_element;
-                       wait_queue_t set_queue;
-
-                       /*
-                        * We have to check the set wait queue.
-                        */
-                       set_queue = (wait_queue_t)wql->wql_setqueue;
-                       wait_queue_lock(set_queue);
-                       if (! wait_queue_empty(set_queue)) {
-                               wait_queue_peek64_locked(set_queue, event, tp, wqp);
-                       }
-                       if (*tp != THREAD_NULL) {
-                               if (*wqp != set_queue)
-                                       wait_queue_unlock(set_queue);
-                               return;  /* thread and its waitq locked */
-                       }
-
-                       wait_queue_unlock(set_queue);
-               } else {
-                       
-                       /*
-                        * Otherwise, its a thread.  If it is waiting on
-                        * the event we are posting to this queue, return
-                        * it locked, but leave it on the queue.
-                        */
-                       thread_t t = (thread_t)wq_element;
-
-                       if (t->wait_event == event) {
-                               thread_lock(t);
-                               *tp = t;
-                               *wqp = wq;
-                               return;
-                       }
-               }
-               wq_element = wqe_next;
-       }
-}
 
 /*
  *     Routine:        wait_queue_pull_thread_locked
  *     Purpose:
- *             Pull a thread that was previously "peeked" off the wait
- *             queue and (possibly) unlock the waitq.
+ *             Pull a thread off its wait queue and (possibly) unlock 
+ *             the waitq.
  *     Conditions:
  *             at splsched
  *             wait queue locked
@@ -1373,18 +1329,18 @@ _wait_queue_select64_thread(
                wqe_next = (wait_queue_element_t)
                               queue_next((queue_t) wq_element);
 
-               if (wq_element->wqe_type == WAIT_QUEUE_LINK) {
+               if (wq_element->wqe_type == WAIT_QUEUE_LINK ||
+                   wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) {
                        wait_queue_link_t wql = (wait_queue_link_t)wq_element;
-                       wait_queue_t set_queue;
+                       wait_queue_set_t set_queue = wql->wql_setqueue;
 
-                       set_queue = (wait_queue_t)wql->wql_setqueue;
-                       wait_queue_lock(set_queue);
-                       if (! wait_queue_empty(set_queue)) {
-                               res = _wait_queue_select64_thread(set_queue,
+                       wqs_lock(set_queue);
+                       if (! wait_queue_empty(&set_queue->wqs_wait_queue)) {
+                               res = _wait_queue_select64_thread(&set_queue->wqs_wait_queue,
                                                                event,
                                                                thread);
                        }
-                       wait_queue_unlock(set_queue);
+                       wqs_unlock(set_queue);
                        if (res == KERN_SUCCESS)
                                return KERN_SUCCESS;
                }
@@ -1419,13 +1375,12 @@ wait_queue_wakeup64_identity_locked(
 
        assert(wait_queue_held(wq));
 
-
        thread = _wait_queue_select64_one(wq, event);
        if (unlock)
                wait_queue_unlock(wq);
 
        if (thread) {
-               res = thread_go_locked(thread, result);
+               res = thread_go(thread, result);
                assert(res == KERN_SUCCESS);
        }
        return thread;  /* still locked if not NULL */
@@ -1464,7 +1419,7 @@ wait_queue_wakeup64_one_locked(
        if (thread) {
                kern_return_t res;
                
-               res = thread_go_locked(thread, result);
+               res = thread_go(thread, result);
                assert(res == KERN_SUCCESS);
                thread_unlock(thread);
                return res;
@@ -1499,13 +1454,13 @@ wait_queue_wakeup_one(
 
        s = splsched();
        wait_queue_lock(wq);
-       thread = _wait_queue_select64_one(wq, (event64_t)((uint32_t)event));
+       thread = _wait_queue_select64_one(wq, CAST_DOWN(event64_t,event));
        wait_queue_unlock(wq);
 
        if (thread) {
                kern_return_t res;
 
-               res = thread_go_locked(thread, result);
+               res = thread_go(thread, result);
                assert(res == KERN_SUCCESS);
                thread_unlock(thread);
                splx(s);
@@ -1547,7 +1502,7 @@ wait_queue_wakeup64_one(
        if (thread) {
                kern_return_t res;
 
-               res = thread_go_locked(thread, result);
+               res = thread_go(thread, result);
                assert(res == KERN_SUCCESS);
                thread_unlock(thread);
                splx(s);
@@ -1601,7 +1556,7 @@ wait_queue_wakeup64_thread_locked(
        if (res != KERN_SUCCESS)
                return KERN_NOT_WAITING;
 
-       res = thread_go_locked(thread, result);
+       res = thread_go(thread, result);
        assert(res == KERN_SUCCESS);
        thread_unlock(thread);
        return res;
@@ -1642,11 +1597,11 @@ wait_queue_wakeup_thread(
 
        s = splsched();
        wait_queue_lock(wq);
-       res = _wait_queue_select64_thread(wq, (event64_t)((uint32_t)event), thread);
+       res = _wait_queue_select64_thread(wq, CAST_DOWN(event64_t,event), thread);
        wait_queue_unlock(wq);
 
        if (res == KERN_SUCCESS) {
-               res = thread_go_locked(thread, result);
+               res = thread_go(thread, result);
                assert(res == KERN_SUCCESS);
                thread_unlock(thread);
                splx(s);
@@ -1695,7 +1650,7 @@ wait_queue_wakeup64_thread(
        wait_queue_unlock(wq);
 
        if (res == KERN_SUCCESS) {
-               res = thread_go_locked(thread, result);
+               res = thread_go(thread, result);
                assert(res == KERN_SUCCESS);
                thread_unlock(thread);
                splx(s);