xnu-2422.90.20.tar.gz

[apple/xnu.git] / osfmk / kern / wait_queue.c
diff --git a/osfmk/kern/wait_queue.c b/osfmk/kern/wait_queue.c

index 6763ac65c3429796a74e7be5c7c87b4f3fc83450..b51ff419fc49d33a56431e6f6c2ada5a7aa9a24f 100644 (file)
--- a/osfmk/kern/wait_queue.c
+++ b/osfmk/kern/wait_queue.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
   *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   * 
@@ -82,8 +82,7 @@ static boolean_t wait_queue_member_locked(
                         wait_queue_t            wq,
                         wait_queue_set_t        wq_set);
  
-static void wait_queues_init(void) __attribute__((section("__TEXT, initcode")));
-
+static void wait_queues_init(void);
  
  #define WAIT_QUEUE_MAX thread_max
  #define WAIT_QUEUE_SET_MAX task_max * 3
@@ -128,16 +127,21 @@ volatile WaitQueueLink *unused_except_for_debugging;
  
  struct wait_queue boot_wait_queue[1];
  __private_extern__ struct wait_queue *wait_queues = &boot_wait_queue[0];
-
  __private_extern__ uint32_t num_wait_queues = 1;
  
+#define        P2ROUNDUP(x, align) (-(-((uint32_t)(x)) & -(align)))
+#define ROUNDDOWN(x,y) (((x)/(y))*(y))
+
  static uint32_t
-compute_wait_hash_size(__unused unsigned cpu_count, __unused uint64_t memsize) {
-       uint32_t hsize = (uint32_t)round_page_64((thread_max / 11) * sizeof(struct wait_queue));
-       uint32_t bhsize;
+compute_wait_hash_size(void)
+{
+       uint32_t hsize, queues;
         
-       if (PE_parse_boot_argn("wqsize", &bhsize, sizeof(bhsize)))
-               hsize = bhsize;
+       if (PE_parse_boot_argn("wqsize", &hsize, sizeof(hsize)))
+               return (hsize);
+
+       queues = thread_max / 11;
+       hsize = P2ROUNDUP(queues * sizeof(struct wait_queue), PAGE_SIZE);
  
         return hsize;
  }
@@ -145,13 +149,37 @@ compute_wait_hash_size(__unused unsigned cpu_count, __unused uint64_t memsize) {
  static void
  wait_queues_init(void)
  {
-       uint32_t        i, whsize;
+       uint32_t        i, whsize, qsz;
         kern_return_t   kret;
  
-       whsize = compute_wait_hash_size(processor_avail_count, machine_info.max_mem);
-       num_wait_queues = (whsize / ((uint32_t)sizeof(struct wait_queue))) - 1;
+       /*
+        * Determine the amount of memory we're willing to reserve for
+        * the waitqueue hash table
+        */
+       whsize = compute_wait_hash_size();
+
+       /* Determine the number of waitqueues we can fit. */
+       qsz = sizeof (struct wait_queue);
+       whsize = ROUNDDOWN(whsize, qsz);
+       num_wait_queues = whsize / qsz;
+
+       /*
+        * The hash algorithm requires that this be a power of 2, so we
+        * just mask off all the low-order bits.
+        */
+       for (i = 0; i < 31; i++) {
+               uint32_t bit = (1 << i);
+               if ((num_wait_queues & bit) == num_wait_queues)
+                       break;
+               num_wait_queues &= ~bit;
+       }
+       assert(num_wait_queues > 0);
+
+       /* Now determine how much memory we really need. */
+       whsize = P2ROUNDUP(num_wait_queues * qsz, PAGE_SIZE);
  
-       kret = kernel_memory_allocate(kernel_map, (vm_offset_t *) &wait_queues, whsize, 0, KMA_KOBJECT|KMA_NOPAGEWAIT);
+       kret = kernel_memory_allocate(kernel_map, (vm_offset_t *) &wait_queues,
+           whsize, 0, KMA_KOBJECT|KMA_NOPAGEWAIT);
  
         if (kret != KERN_SUCCESS || wait_queues == NULL)
                 panic("kernel_memory_allocate() failed to allocate wait queues, error: %d, whsize: 0x%x", kret, whsize);
@@ -203,6 +231,7 @@ wait_queue_init(
  
         wq->wq_fifo = ((policy & SYNC_POLICY_REVERSED) == 0);
         wq->wq_type = _WAIT_QUEUE_inited;
+       wq->wq_eventmask = 0;
         queue_init(&wq->wq_queue);
         hw_lock_init(&wq->wq_interlock);
         return KERN_SUCCESS;
@@ -445,6 +474,22 @@ MACRO_END
  
  #endif /* !_WAIT_QUEUE_DEBUG_ */
  
+/*
+ *     Routine:        wait_queue_global
+ *     Purpose:
+ *             Indicate if this wait queue is a global wait queue or not.
+ */
+static boolean_t
+wait_queue_global(
+       wait_queue_t wq)
+{
+       if ((wq >= wait_queues) && (wq <= (wait_queues + num_wait_queues))) {
+               return TRUE;
+       }
+       return FALSE;
+}
+
+
  /*
   *     Routine:        wait_queue_member_locked
   *     Purpose:
@@ -628,6 +673,25 @@ wait_queue_link(
         return ret;
  }      
  
+wait_queue_link_t
+wait_queue_link_allocate(void)
+{
+       wait_queue_link_t wql;
+
+       wql = zalloc(_wait_queue_link_zone); /* Can't fail */
+       bzero(wql, sizeof(*wql));
+       wql->wql_type = WAIT_QUEUE_UNLINKED;
+
+       return wql;
+}
+
+kern_return_t
+wait_queue_link_free(wait_queue_link_t wql) 
+{
+       zfree(_wait_queue_link_zone, wql);
+       return KERN_SUCCESS;
+}
+
  
  /*
   *     Routine:        wait_queue_unlink_locked
@@ -657,6 +721,60 @@ wait_queue_unlink_locked(
         WAIT_QUEUE_SET_CHECK(wq_set);
  }
  
+/*
+ *     Routine:        wait_queue_unlink_nofree
+ *     Purpose:
+ *             Remove the linkage between a wait queue and a set,
+ *             returning the linkage structure to the caller to
+ *             free later.
+ *     Conditions:
+ *             The wait queue being must be a member set queue
+ */
+kern_return_t
+wait_queue_unlink_nofree(
+       wait_queue_t wq,
+       wait_queue_set_t wq_set,
+       wait_queue_link_t *wqlp)
+{
+       wait_queue_element_t wq_element;
+       wait_queue_link_t wql;
+       queue_t q;
+       spl_t s;
+
+       if (!wait_queue_is_valid(wq) || !wait_queue_is_set(wq_set)) {
+               return KERN_INVALID_ARGUMENT;
+       }
+       s = splsched();
+       wait_queue_lock(wq);
+
+       q = &wq->wq_queue;
+       wq_element = (wait_queue_element_t) queue_first(q);
+       while (!queue_end(q, (queue_entry_t)wq_element)) {
+               WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
+               if (wq_element->wqe_type == WAIT_QUEUE_LINK ||
+                   wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) {
+
+                       wql = (wait_queue_link_t)wq_element;
+                       
+                       if (wql->wql_setqueue == wq_set) {
+
+                               wqs_lock(wq_set);
+                               wait_queue_unlink_locked(wq, wq_set, wql);
+                               wqs_unlock(wq_set);
+                               wait_queue_unlock(wq);
+                               splx(s);
+                               *wqlp = wql;
+                               return KERN_SUCCESS;
+                       }
+               }
+               wq_element = (wait_queue_element_t)
+                               queue_next((queue_t) wq_element);
+       }
+       wait_queue_unlock(wq);
+       splx(s);
+       return KERN_NOT_IN_SET;
+}      
+
  /*
   *     Routine:        wait_queue_unlink
   *     Purpose:
@@ -713,36 +831,97 @@ wait_queue_unlink(
  }      
  
  /*
- *     Routine:        wait_queue_unlink_all
+ *     Routine:        wait_queue_unlink_all_nofree_locked
   *     Purpose:
   *             Remove the linkage between a wait queue and all its sets.
- *             All the linkage structures that were allocated internally
- *             are freed.  The others are the caller's responsibility.
+ *             All the linkage structures are returned to the caller for
+ *             later freeing.
   *     Conditions:
- *             Nothing of interest locked.
+ *             Wait queue locked.
   */
  
-kern_return_t
-wait_queue_unlink_all(
-       wait_queue_t wq)
+static void
+wait_queue_unlink_all_nofree_locked(
+       wait_queue_t wq,
+       queue_t links)
  {
         wait_queue_element_t wq_element;
         wait_queue_element_t wq_next_element;
         wait_queue_set_t wq_set;
         wait_queue_link_t wql;
-       queue_head_t links_queue_head;
-       queue_t links = &links_queue_head;
         queue_t q;
+
+       q = &wq->wq_queue;
+
+       wq_element = (wait_queue_element_t) queue_first(q);
+       while (!queue_end(q, (queue_entry_t)wq_element)) {
+
+               WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
+               wq_next_element = (wait_queue_element_t)
+                            queue_next((queue_t) wq_element);
+
+               if (wq_element->wqe_type == WAIT_QUEUE_LINK ||
+                   wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) {
+                       wql = (wait_queue_link_t)wq_element;
+                       wq_set = wql->wql_setqueue;
+                       wqs_lock(wq_set);
+                       wait_queue_unlink_locked(wq, wq_set, wql);
+                       wqs_unlock(wq_set);
+                       enqueue(links, &wql->wql_links);
+               }
+               wq_element = wq_next_element;
+       }
+}      
+
+/*
+ *     Routine:        wait_queue_unlink_all_nofree
+ *     Purpose:
+ *             Remove the linkage between a wait queue and all its sets.
+ *             All the linkage structures are returned to the caller for
+ *             later freeing.
+ *     Conditions:
+ *             Nothing of interest locked.
+ */
+
+kern_return_t
+wait_queue_unlink_all_nofree(
+       wait_queue_t wq,
+       queue_t links)
+{
         spl_t s;
  
         if (!wait_queue_is_valid(wq)) {
                 return KERN_INVALID_ARGUMENT;
         }
  
-       queue_init(links);
-
         s = splsched();
         wait_queue_lock(wq);
+       wait_queue_unlink_all_nofree_locked(wq, links);
+       wait_queue_unlock(wq);
+       splx(s);
+
+       return(KERN_SUCCESS);
+}      
+
+/*
+ *     Routine:        wait_queue_unlink_all_locked
+ *     Purpose:
+ *             Remove the linkage between a locked wait queue and all its
+ *             sets and enqueue the allocated ones onto the links queue
+ *             provided.
+ *     Conditions:
+ *             Wait queue locked.
+ */
+static void
+wait_queue_unlink_all_locked(
+       wait_queue_t wq,
+       queue_t links)
+{
+       wait_queue_element_t wq_element;
+       wait_queue_element_t wq_next_element;
+       wait_queue_set_t wq_set;
+       wait_queue_link_t wql;
+       queue_t q;
  
         q = &wq->wq_queue;
  
@@ -766,6 +945,38 @@ wait_queue_unlink_all(
                 }
                 wq_element = wq_next_element;
         }
+
+}
+
+
+/*
+ *     Routine:        wait_queue_unlink_all
+ *     Purpose:
+ *             Remove the linkage between a wait queue and all its sets.
+ *             All the linkage structures that were allocated internally
+ *             are freed.  The others are the caller's responsibility.
+ *     Conditions:
+ *             Nothing of interest locked.
+ */
+
+kern_return_t
+wait_queue_unlink_all(
+       wait_queue_t wq)
+{
+       wait_queue_link_t wql;
+       queue_head_t links_queue_head;
+       queue_t links = &links_queue_head;
+       spl_t s;
+
+       if (!wait_queue_is_valid(wq)) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       queue_init(links);
+
+       s = splsched();
+       wait_queue_lock(wq);
+       wait_queue_unlink_all_locked(wq, links);
         wait_queue_unlock(wq);
         splx(s);
  
@@ -786,12 +997,70 @@ wait_subqueue_unlink_all(
  }
  
  
+/*
+ *     Routine:        wait_queue_set_unlink_all_nofree
+ *     Purpose:
+ *             Remove the linkage between a set wait queue and all its
+ *             member wait queues and all the sets it may be a member of.
+ *             The links structures are returned for later freeing by the
+ *             caller.
+ *     Conditions:
+ *             The wait queue must be a set
+ */
+kern_return_t
+wait_queue_set_unlink_all_nofree(
+       wait_queue_set_t wq_set,
+       queue_t         links)
+{
+       wait_queue_link_t wql;
+       wait_queue_t wq;
+       queue_t q;
+       spl_t s;
+
+       if (!wait_queue_is_set(wq_set)) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+retry:
+       s = splsched();
+       wqs_lock(wq_set);
+
+       /* remove the wait queues that are members of our set */
+       q = &wq_set->wqs_setlinks;
+
+       wql = (wait_queue_link_t)queue_first(q);
+       while (!queue_end(q, (queue_entry_t)wql)) {
+               WAIT_QUEUE_SET_LINK_CHECK(wq_set, wql);
+               wq = wql->wql_queue;
+               if (wait_queue_lock_try(wq)) {
+                       wait_queue_unlink_locked(wq, wq_set, wql);
+                       wait_queue_unlock(wq);
+                       enqueue(links, &wql->wql_links);
+                       wql = (wait_queue_link_t)queue_first(q);
+               } else {
+                       wqs_unlock(wq_set);
+                       splx(s);
+                       delay(1);
+                       goto retry;
+               }
+       }
+
+       /* remove this set from sets it belongs to */
+       wait_queue_unlink_all_nofree_locked(&wq_set->wqs_wait_queue, links);
+
+       wqs_unlock(wq_set);
+       splx(s);
+
+       return(KERN_SUCCESS);
+}      
+
  /*
   *     Routine:        wait_queue_set_unlink_all
   *     Purpose:
   *             Remove the linkage between a set wait queue and all its
- *             member wait queues. The link structures are freed for those
- *             links which were dynamically allocated.
+ *             member wait queues and all the sets it may be members of.
+ *             The link structures are freed for those links which were
+ *             dynamically allocated.
   *     Conditions:
   *             The wait queue must be a set
   */
@@ -816,6 +1085,7 @@ retry:
         s = splsched();
         wqs_lock(wq_set);
  
+       /* remove the wait queues that are members of our set */
         q = &wq_set->wqs_setlinks;
  
         wql = (wait_queue_link_t)queue_first(q);
@@ -838,6 +1108,11 @@ retry:
                         goto retry;
                 }
         }
+
+
+       /* remove this set from sets it belongs to */
+       wait_queue_unlink_all_locked(&wq_set->wqs_wait_queue, links);
+
         wqs_unlock(wq_set);
         splx(s);
  
@@ -848,6 +1123,48 @@ retry:
         return(KERN_SUCCESS);
  }      
  
+kern_return_t
+wait_queue_set_unlink_one(
+       wait_queue_set_t wq_set,
+       wait_queue_link_t wql)
+{
+       wait_queue_t wq;
+       spl_t s;
+
+       assert(wait_queue_is_set(wq_set));
+
+retry:
+       s = splsched();
+       wqs_lock(wq_set);
+
+       WAIT_QUEUE_SET_CHECK(wq_set);
+
+       /* Already unlinked, e.g. by selclearthread() */
+       if (wql->wql_type == WAIT_QUEUE_UNLINKED) {
+               goto out;
+       }
+
+       WAIT_QUEUE_SET_LINK_CHECK(wq_set, wql);
+
+       /* On a wait queue, and we hold set queue lock ... */
+       wq = wql->wql_queue;
+       if (wait_queue_lock_try(wq)) {
+               wait_queue_unlink_locked(wq, wq_set, wql);
+               wait_queue_unlock(wq);
+       } else {
+               wqs_unlock(wq_set);
+               splx(s);
+               delay(1);
+               goto retry;
+       }
+
+out:
+       wqs_unlock(wq_set);
+       splx(s);
+
+       return KERN_SUCCESS;
+}
+
  /*
   *     Routine:        wait_queue_assert_wait64_locked
   *     Purpose:
@@ -864,10 +1181,13 @@ wait_queue_assert_wait64_locked(
         wait_queue_t wq,
         event64_t event,
         wait_interrupt_t interruptible,
+       wait_timeout_urgency_t urgency,
         uint64_t deadline,
+       uint64_t leeway,
         thread_t thread)
  {
         wait_result_t wait_result;
+       boolean_t realtime;
  
         if (!wait_queue_assert_possible(thread))
                 panic("wait_queue_assert_wait64_locked");
@@ -878,7 +1198,17 @@ wait_queue_assert_wait64_locked(
                 if (event == NO_EVENT64 && wqs_is_preposted(wqs))
                         return(THREAD_AWAKENED);
         }
-         
+
+       /*
+        * Realtime threads get priority for wait queue placements.
+        * This allows wait_queue_wakeup_one to prefer a waiting
+        * realtime thread, similar in principle to performing
+        * a wait_queue_wakeup_all and allowing scheduler prioritization
+        * to run the realtime thread, but without causing the
+        * lock contention of that scenario.
+        */
+       realtime = (thread->sched_pri >= BASEPRI_REALTIME);
+
         /*
          * This is the extent to which we currently take scheduling attributes
          * into account.  If the thread is vm priviledged, we stick it at
@@ -887,7 +1217,9 @@ wait_queue_assert_wait64_locked(
          */
         wait_result = thread_mark_wait_locked(thread, interruptible);
         if (wait_result == THREAD_WAITING) {
-               if (!wq->wq_fifo || thread->options & TH_OPT_VMPRIV)
+               if (!wq->wq_fifo
+                       || (thread->options & TH_OPT_VMPRIV)
+                       || realtime)
                         enqueue_head(&wq->wq_queue, (queue_entry_t) thread);
                 else
                         enqueue_tail(&wq->wq_queue, (queue_entry_t) thread);
@@ -896,10 +1228,16 @@ wait_queue_assert_wait64_locked(
                 thread->wait_queue = wq;
  
                 if (deadline != 0) {
-                       if (!timer_call_enter(&thread->wait_timer, deadline))
+
+                       if (!timer_call_enter_with_leeway(&thread->wait_timer, NULL,
+                               deadline, leeway, urgency, FALSE))
                                 thread->wait_timer_active++;
                         thread->wait_timer_is_set = TRUE;
                 }
+               if (wait_queue_global(wq)) {
+                       wq->wq_eventmask = wq->wq_eventmask | CAST_TO_EVENT_MASK(event);
+               }
+
         }
         return(wait_result);
  }
@@ -932,7 +1270,50 @@ wait_queue_assert_wait(
         wait_queue_lock(wq);
         thread_lock(thread);
         ret = wait_queue_assert_wait64_locked(wq, CAST_DOWN(event64_t,event),
-                                                                                       interruptible, deadline, thread);
+                                             interruptible, 
+                                             TIMEOUT_URGENCY_SYS_NORMAL, 
+                                             deadline, 0,
+                                             thread);
+       thread_unlock(thread);
+       wait_queue_unlock(wq);
+       splx(s);
+       return(ret);
+}
+
+/*
+ *     Routine:        wait_queue_assert_wait_with_leeway
+ *     Purpose:
+ *             Insert the current thread into the supplied wait queue
+ *             waiting for a particular event to be posted to that queue.
+ *             Deadline values are specified with urgency and leeway.
+ *
+ *     Conditions:
+ *             nothing of interest locked.
+ */
+wait_result_t
+wait_queue_assert_wait_with_leeway(
+       wait_queue_t wq,
+       event_t event,
+       wait_interrupt_t interruptible,
+       wait_timeout_urgency_t urgency,
+       uint64_t deadline,
+       uint64_t leeway)
+{
+       spl_t s;
+       wait_result_t ret;
+       thread_t thread = current_thread();
+
+       /* If it is an invalid wait queue, you can't wait on it */
+       if (!wait_queue_is_valid(wq))
+               return (thread->wait_result = THREAD_RESTART);
+
+       s = splsched();
+       wait_queue_lock(wq);
+       thread_lock(thread);
+       ret = wait_queue_assert_wait64_locked(wq, CAST_DOWN(event64_t,event),
+                                             interruptible, 
+                                             urgency, deadline, leeway,
+                                             thread);
         thread_unlock(thread);
         wait_queue_unlock(wq);
         splx(s);
@@ -965,7 +1346,48 @@ wait_queue_assert_wait64(
         s = splsched();
         wait_queue_lock(wq);
         thread_lock(thread);
-       ret = wait_queue_assert_wait64_locked(wq, event, interruptible, deadline, thread);
+       ret = wait_queue_assert_wait64_locked(wq, event, interruptible, 
+                                             TIMEOUT_URGENCY_SYS_NORMAL,
+                                             deadline, 0,
+                                             thread);
+       thread_unlock(thread);
+       wait_queue_unlock(wq);
+       splx(s);
+       return(ret);
+}
+
+/*
+ *     Routine:        wait_queue_assert_wait64_with_leeway
+ *     Purpose:
+ *             Insert the current thread into the supplied wait queue
+ *             waiting for a particular event to be posted to that queue.
+ *             Deadline values are specified with urgency and leeway.
+ *     Conditions:
+ *             nothing of interest locked.
+ */
+wait_result_t
+wait_queue_assert_wait64_with_leeway(
+       wait_queue_t wq,
+       event64_t event,
+       wait_interrupt_t interruptible,
+       wait_timeout_urgency_t urgency,
+       uint64_t deadline,
+       uint64_t leeway)
+{
+       spl_t s;
+       wait_result_t ret;
+       thread_t thread = current_thread();
+
+       /* If it is an invalid wait queue, you cant wait on it */
+       if (!wait_queue_is_valid(wq))
+               return (thread->wait_result = THREAD_RESTART);
+
+       s = splsched();
+       wait_queue_lock(wq);
+       thread_lock(thread);
+       ret = wait_queue_assert_wait64_locked(wq, event, interruptible, 
+                                             urgency, deadline, leeway,
+                                             thread);
         thread_unlock(thread);
         wait_queue_unlock(wq);
         splx(s);
@@ -993,8 +1415,18 @@ _wait_queue_select64_all(
  {
         wait_queue_element_t wq_element;
         wait_queue_element_t wqe_next;
+       unsigned long eventmask = 0;
+       boolean_t is_queue_global = FALSE;
         queue_t q;
  
+       is_queue_global = wait_queue_global(wq);
+       if (is_queue_global) {
+               eventmask = CAST_TO_EVENT_MASK(event);
+               if ((wq->wq_eventmask & eventmask) != eventmask) {
+                       return;
+               }
+               eventmask = 0;
+       }
         q = &wq->wq_queue;
  
         wq_element = (wait_queue_element_t) queue_first(q);
@@ -1031,20 +1463,30 @@ _wait_queue_select64_all(
                          * the event we are posting to this queue, pull
                          * it off the queue and stick it in out wake_queue.
                          */
-                       thread_t t = (thread_t)wq_element;
+                       thread_t t = (thread_t)(void *)wq_element;
  
                         if (t->wait_event == event) {
                                 thread_lock(t);
-                               remqueue(q, (queue_entry_t) t);
+                               remqueue((queue_entry_t) t);
                                 enqueue (wake_queue, (queue_entry_t) t);
                                 t->wait_queue = WAIT_QUEUE_NULL;
                                 t->wait_event = NO_EVENT64;
                                 t->at_safe_point = FALSE;
                                 /* returned locked */
+                       } else {
+                               if (is_queue_global) {
+                                       eventmask = eventmask | 
+                                               CAST_TO_EVENT_MASK(t->wait_event);
+                               }
                         }
                 }
                 wq_element = wqe_next;
         }
+       /* Update event mask if global wait queue */
+       if (is_queue_global) {
+               wq->wq_eventmask = eventmask;
+       }
+
  }
  
  /*
@@ -1090,7 +1532,7 @@ wait_queue_wakeup64_all_locked(
          */
         res = KERN_NOT_WAITING;
         while (!queue_empty (q)) {
-               thread_t thread = (thread_t) dequeue(q);
+               thread_t thread = (thread_t)(void *) dequeue(q);
                 res = thread_go(thread, result);
                 assert(res == KERN_SUCCESS);
                 thread_unlock(thread);
@@ -1191,8 +1633,26 @@ _wait_queue_select64_one(
         wait_queue_element_t wq_element;
         wait_queue_element_t wqe_next;
         thread_t t = THREAD_NULL;
+       thread_t fifo_thread = THREAD_NULL;
+       boolean_t is_queue_fifo = TRUE;
+       boolean_t is_queue_global = FALSE;
+       boolean_t thread_imp_donor = FALSE;
+       boolean_t realtime = FALSE;
+       unsigned long eventmask = 0;
         queue_t q;
  
+       if (wait_queue_global(wq)) {
+               eventmask = CAST_TO_EVENT_MASK(event);
+               if ((wq->wq_eventmask & eventmask) != eventmask) {
+                       return THREAD_NULL;
+               }
+               eventmask = 0;
+               is_queue_global = TRUE;
+#if IMPORTANCE_INHERITANCE
+               is_queue_fifo = FALSE;
+#endif /* IMPORTANCE_INHERITANCE */
+       }
+
         q = &wq->wq_queue;
  
         wq_element = (wait_queue_element_t) queue_first(q);
@@ -1239,20 +1699,55 @@ _wait_queue_select64_one(
                          * the event we are posting to this queue, pull
                          * it off the queue and stick it in out wake_queue.
                          */
-                       t = (thread_t)wq_element;
+                       t = (thread_t)(void *)wq_element;
                         if (t->wait_event == event) {
-                               thread_lock(t);
-                               remqueue(q, (queue_entry_t) t);
-                               t->wait_queue = WAIT_QUEUE_NULL;
-                               t->wait_event = NO_EVENT64;
-                               t->at_safe_point = FALSE;
-                               return t;       /* still locked */
+                               if (fifo_thread == THREAD_NULL) {
+                                       fifo_thread = t;
+                               }
+#if IMPORTANCE_INHERITANCE
+                               /* 
+                                * Checking imp donor bit does not need thread lock or
+                                * or task lock since we have the wait queue lock and
+                                * thread can not be removed from it without acquiring
+                                * wait queue lock. The imp donor bit may change
+                                * once we read its value, but it is ok to wake 
+                                * a thread while someone drops importance assertion 
+                                * on the that thread.
+                                */
+                               thread_imp_donor = task_is_importance_donor(t->task);
+#endif /* IMPORTANCE_INHERITANCE */
+                               realtime = (t->sched_pri >= BASEPRI_REALTIME);
+                               if (is_queue_fifo || thread_imp_donor || realtime || 
+                                               (t->options & TH_OPT_VMPRIV)) {
+                                       thread_lock(t);
+                                       remqueue((queue_entry_t) t);
+                                       t->wait_queue = WAIT_QUEUE_NULL;
+                                       t->wait_event = NO_EVENT64;
+                                       t->at_safe_point = FALSE;
+                                       return t;       /* still locked */
+                               }
+                       }
+                       if (is_queue_global) {
+                               eventmask = eventmask | CAST_TO_EVENT_MASK(t->wait_event);
                         }
-
                         t = THREAD_NULL;
                 }
                 wq_element = wqe_next;
         }
+
+       if (is_queue_global) {
+               wq->wq_eventmask = eventmask;
+       }
+#if IMPORTANCE_INHERITANCE
+       if (fifo_thread != THREAD_NULL) {
+               thread_lock(fifo_thread);
+               remqueue((queue_entry_t) fifo_thread);
+               fifo_thread->wait_queue = WAIT_QUEUE_NULL;
+               fifo_thread->wait_event = NO_EVENT64;
+               fifo_thread->at_safe_point = FALSE;
+               return fifo_thread;     /* still locked */
+       }
+#endif /* IMPORTANCE_INHERITANCE */
         return THREAD_NULL;
  }
  
@@ -1278,7 +1773,7 @@ wait_queue_pull_thread_locked(
  
         assert(thread->wait_queue == waitq);
  
-       remqueue(&waitq->wq_queue, (queue_entry_t)thread );
+       remqueue((queue_entry_t)thread );
         thread->wait_queue = WAIT_QUEUE_NULL;
         thread->wait_event = NO_EVENT64;
         thread->at_safe_point = FALSE;
@@ -1314,7 +1809,7 @@ _wait_queue_select64_thread(
  
         thread_lock(thread);
         if ((thread->wait_queue == wq) && (thread->wait_event == event)) {
-               remqueue(q, (queue_entry_t) thread);
+               remqueue((queue_entry_t) thread);
                 thread->at_safe_point = FALSE;
                 thread->wait_event = NO_EVENT64;
                 thread->wait_queue = WAIT_QUEUE_NULL;
@@ -1448,7 +1943,8 @@ kern_return_t
  wait_queue_wakeup_one(
         wait_queue_t wq,
         event_t event,
-       wait_result_t result)
+       wait_result_t result,
+       int priority)
  {
         thread_t thread;
         spl_t s;
@@ -1465,6 +1961,14 @@ wait_queue_wakeup_one(
         if (thread) {
                 kern_return_t res;
  
+               if (thread->sched_pri < priority) {
+                       if (priority <= MAXPRI) {
+                               set_sched_pri(thread, priority);
+
+                               thread->was_promoted_on_wakeup = 1;
+                               thread->sched_flags |= TH_SFLAG_PROMOTED;
+                       }
+               }
                 res = thread_go(thread, result);
                 assert(res == KERN_SUCCESS);
                 thread_unlock(thread);