xnu-2050.48.11.tar.gz

[apple/xnu.git] / osfmk / kern / wait_queue.c
diff --git a/osfmk/kern/wait_queue.c b/osfmk/kern/wait_queue.c

index a7a19a02433179eab61ed0716fe12c9a296292d7..6bcabf8c1cfcc12672e7e1a2da49e7f208e4b300 100644 (file)
--- a/osfmk/kern/wait_queue.c
+++ b/osfmk/kern/wait_queue.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
   *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   * 
@@ -84,7 +84,6 @@ static boolean_t wait_queue_member_locked(
  
  static void wait_queues_init(void) __attribute__((section("__TEXT, initcode")));
  
-
  #define WAIT_QUEUE_MAX thread_max
  #define WAIT_QUEUE_SET_MAX task_max * 3
  #define WAIT_QUEUE_LINK_MAX PORT_MAX / 2 + (WAIT_QUEUE_MAX * WAIT_QUEUE_SET_MAX) / 64
@@ -128,16 +127,21 @@ volatile WaitQueueLink *unused_except_for_debugging;
  
  struct wait_queue boot_wait_queue[1];
  __private_extern__ struct wait_queue *wait_queues = &boot_wait_queue[0];
-
  __private_extern__ uint32_t num_wait_queues = 1;
  
+#define        P2ROUNDUP(x, align) (-(-((uint32_t)(x)) & -(align)))
+#define ROUNDDOWN(x,y) (((x)/(y))*(y))
+
  static uint32_t
-compute_wait_hash_size(__unused unsigned cpu_count, __unused uint64_t memsize) {
-       uint32_t hsize = (uint32_t)round_page_64((thread_max / 11) * sizeof(struct wait_queue));
-       uint32_t bhsize;
+compute_wait_hash_size(void)
+{
+       uint32_t hsize, queues;
         
-       if (PE_parse_boot_argn("wqsize", &bhsize, sizeof(bhsize)))
-               hsize = bhsize;
+       if (PE_parse_boot_argn("wqsize", &hsize, sizeof(hsize)))
+               return (hsize);
+
+       queues = thread_max / 11;
+       hsize = P2ROUNDUP(queues * sizeof(struct wait_queue), PAGE_SIZE);
  
         return hsize;
  }
@@ -145,13 +149,37 @@ compute_wait_hash_size(__unused unsigned cpu_count, __unused uint64_t memsize) {
  static void
  wait_queues_init(void)
  {
-       uint32_t        i, whsize;
+       uint32_t        i, whsize, qsz;
         kern_return_t   kret;
  
-       whsize = compute_wait_hash_size(processor_avail_count, machine_info.max_mem);
-       num_wait_queues = (whsize / ((uint32_t)sizeof(struct wait_queue))) - 1;
+       /*
+        * Determine the amount of memory we're willing to reserve for
+        * the waitqueue hash table
+        */
+       whsize = compute_wait_hash_size();
  
-       kret = kernel_memory_allocate(kernel_map, (vm_offset_t *) &wait_queues, whsize, 0, KMA_KOBJECT|KMA_NOPAGEWAIT);
+       /* Determine the number of waitqueues we can fit. */
+       qsz = sizeof (struct wait_queue);
+       whsize = ROUNDDOWN(whsize, qsz);
+       num_wait_queues = whsize / qsz;
+
+       /*
+        * The hash algorithm requires that this be a power of 2, so we
+        * just mask off all the low-order bits.
+        */
+       for (i = 0; i < 31; i++) {
+               uint32_t bit = (1 << i);
+               if ((num_wait_queues & bit) == num_wait_queues)
+                       break;
+               num_wait_queues &= ~bit;
+       }
+       assert(num_wait_queues > 0);
+
+       /* Now determine how much memory we really need. */
+       whsize = P2ROUNDUP(num_wait_queues * qsz, PAGE_SIZE);
+
+       kret = kernel_memory_allocate(kernel_map, (vm_offset_t *) &wait_queues,
+           whsize, 0, KMA_KOBJECT|KMA_NOPAGEWAIT);
  
         if (kret != KERN_SUCCESS || wait_queues == NULL)
                 panic("kernel_memory_allocate() failed to allocate wait queues, error: %d, whsize: 0x%x", kret, whsize);
@@ -169,14 +197,19 @@ wait_queue_bootstrap(void)
                                       WAIT_QUEUE_MAX * sizeof(struct wait_queue),
                                       sizeof(struct wait_queue),
                                       "wait queues");
+       zone_change(_wait_queue_zone, Z_NOENCRYPT, TRUE);
+
         _wait_queue_set_zone = zinit(sizeof(struct wait_queue_set),
                                       WAIT_QUEUE_SET_MAX * sizeof(struct wait_queue_set),
                                       sizeof(struct wait_queue_set),
                                       "wait queue sets");
+       zone_change(_wait_queue_set_zone, Z_NOENCRYPT, TRUE);
+
         _wait_queue_link_zone = zinit(sizeof(struct _wait_queue_link),
                                       WAIT_QUEUE_LINK_MAX * sizeof(struct _wait_queue_link),
                                       sizeof(struct _wait_queue_link),
                                       "wait queue links");
+       zone_change(_wait_queue_link_zone, Z_NOENCRYPT, TRUE);
  }
  
  /*
@@ -623,6 +656,25 @@ wait_queue_link(
         return ret;
  }      
  
+wait_queue_link_t
+wait_queue_link_allocate(void)
+{
+       wait_queue_link_t wql;
+
+       wql = zalloc(_wait_queue_link_zone); /* Can't fail */
+       bzero(wql, sizeof(*wql));
+       wql->wql_type = WAIT_QUEUE_UNLINKED;
+
+       return wql;
+}
+
+kern_return_t
+wait_queue_link_free(wait_queue_link_t wql) 
+{
+       zfree(_wait_queue_link_zone, wql);
+       return KERN_SUCCESS;
+}
+
  
  /*
   *     Routine:        wait_queue_unlink_locked
@@ -652,6 +704,60 @@ wait_queue_unlink_locked(
         WAIT_QUEUE_SET_CHECK(wq_set);
  }
  
+/*
+ *     Routine:        wait_queue_unlink_nofree
+ *     Purpose:
+ *             Remove the linkage between a wait queue and a set,
+ *             returning the linkage structure to the caller to
+ *             free later.
+ *     Conditions:
+ *             The wait queue being must be a member set queue
+ */
+kern_return_t
+wait_queue_unlink_nofree(
+       wait_queue_t wq,
+       wait_queue_set_t wq_set,
+       wait_queue_link_t *wqlp)
+{
+       wait_queue_element_t wq_element;
+       wait_queue_link_t wql;
+       queue_t q;
+       spl_t s;
+
+       if (!wait_queue_is_valid(wq) || !wait_queue_is_set(wq_set)) {
+               return KERN_INVALID_ARGUMENT;
+       }
+       s = splsched();
+       wait_queue_lock(wq);
+
+       q = &wq->wq_queue;
+       wq_element = (wait_queue_element_t) queue_first(q);
+       while (!queue_end(q, (queue_entry_t)wq_element)) {
+               WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
+               if (wq_element->wqe_type == WAIT_QUEUE_LINK ||
+                   wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) {
+
+                       wql = (wait_queue_link_t)wq_element;
+                       
+                       if (wql->wql_setqueue == wq_set) {
+
+                               wqs_lock(wq_set);
+                               wait_queue_unlink_locked(wq, wq_set, wql);
+                               wqs_unlock(wq_set);
+                               wait_queue_unlock(wq);
+                               splx(s);
+                               *wqlp = wql;
+                               return KERN_SUCCESS;
+                       }
+               }
+               wq_element = (wait_queue_element_t)
+                               queue_next((queue_t) wq_element);
+       }
+       wait_queue_unlock(wq);
+       splx(s);
+       return KERN_NOT_IN_SET;
+}      
+
  /*
   *     Routine:        wait_queue_unlink
   *     Purpose:
@@ -708,36 +814,97 @@ wait_queue_unlink(
  }      
  
  /*
- *     Routine:        wait_queue_unlink_all
+ *     Routine:        wait_queue_unlink_all_nofree_locked
   *     Purpose:
   *             Remove the linkage between a wait queue and all its sets.
- *             All the linkage structures that were allocated internally
- *             are freed.  The others are the caller's responsibility.
+ *             All the linkage structures are returned to the caller for
+ *             later freeing.
   *     Conditions:
- *             Nothing of interest locked.
+ *             Wait queue locked.
   */
  
-kern_return_t
-wait_queue_unlink_all(
-       wait_queue_t wq)
+static void
+wait_queue_unlink_all_nofree_locked(
+       wait_queue_t wq,
+       queue_t links)
  {
         wait_queue_element_t wq_element;
         wait_queue_element_t wq_next_element;
         wait_queue_set_t wq_set;
         wait_queue_link_t wql;
-       queue_head_t links_queue_head;
-       queue_t links = &links_queue_head;
         queue_t q;
+
+       q = &wq->wq_queue;
+
+       wq_element = (wait_queue_element_t) queue_first(q);
+       while (!queue_end(q, (queue_entry_t)wq_element)) {
+
+               WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element);
+               wq_next_element = (wait_queue_element_t)
+                            queue_next((queue_t) wq_element);
+
+               if (wq_element->wqe_type == WAIT_QUEUE_LINK ||
+                   wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) {
+                       wql = (wait_queue_link_t)wq_element;
+                       wq_set = wql->wql_setqueue;
+                       wqs_lock(wq_set);
+                       wait_queue_unlink_locked(wq, wq_set, wql);
+                       wqs_unlock(wq_set);
+                       enqueue(links, &wql->wql_links);
+               }
+               wq_element = wq_next_element;
+       }
+}      
+
+/*
+ *     Routine:        wait_queue_unlink_all_nofree
+ *     Purpose:
+ *             Remove the linkage between a wait queue and all its sets.
+ *             All the linkage structures are returned to the caller for
+ *             later freeing.
+ *     Conditions:
+ *             Nothing of interest locked.
+ */
+
+kern_return_t
+wait_queue_unlink_all_nofree(
+       wait_queue_t wq,
+       queue_t links)
+{
         spl_t s;
  
         if (!wait_queue_is_valid(wq)) {
                 return KERN_INVALID_ARGUMENT;
         }
  
-       queue_init(links);
-
         s = splsched();
         wait_queue_lock(wq);
+       wait_queue_unlink_all_nofree_locked(wq, links);
+       wait_queue_unlock(wq);
+       splx(s);
+
+       return(KERN_SUCCESS);
+}      
+
+/*
+ *     Routine:        wait_queue_unlink_all_locked
+ *     Purpose:
+ *             Remove the linkage between a locked wait queue and all its
+ *             sets and enqueue the allocated ones onto the links queue
+ *             provided.
+ *     Conditions:
+ *             Wait queue locked.
+ */
+static void
+wait_queue_unlink_all_locked(
+       wait_queue_t wq,
+       queue_t links)
+{
+       wait_queue_element_t wq_element;
+       wait_queue_element_t wq_next_element;
+       wait_queue_set_t wq_set;
+       wait_queue_link_t wql;
+       queue_t q;
  
         q = &wq->wq_queue;
  
@@ -761,6 +928,38 @@ wait_queue_unlink_all(
                 }
                 wq_element = wq_next_element;
         }
+
+}
+
+
+/*
+ *     Routine:        wait_queue_unlink_all
+ *     Purpose:
+ *             Remove the linkage between a wait queue and all its sets.
+ *             All the linkage structures that were allocated internally
+ *             are freed.  The others are the caller's responsibility.
+ *     Conditions:
+ *             Nothing of interest locked.
+ */
+
+kern_return_t
+wait_queue_unlink_all(
+       wait_queue_t wq)
+{
+       wait_queue_link_t wql;
+       queue_head_t links_queue_head;
+       queue_t links = &links_queue_head;
+       spl_t s;
+
+       if (!wait_queue_is_valid(wq)) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       queue_init(links);
+
+       s = splsched();
+       wait_queue_lock(wq);
+       wait_queue_unlink_all_locked(wq, links);
         wait_queue_unlock(wq);
         splx(s);
  
@@ -781,12 +980,70 @@ wait_subqueue_unlink_all(
  }
  
  
+/*
+ *     Routine:        wait_queue_set_unlink_all_nofree
+ *     Purpose:
+ *             Remove the linkage between a set wait queue and all its
+ *             member wait queues and all the sets it may be a member of.
+ *             The links structures are returned for later freeing by the
+ *             caller.
+ *     Conditions:
+ *             The wait queue must be a set
+ */
+kern_return_t
+wait_queue_set_unlink_all_nofree(
+       wait_queue_set_t wq_set,
+       queue_t         links)
+{
+       wait_queue_link_t wql;
+       wait_queue_t wq;
+       queue_t q;
+       spl_t s;
+
+       if (!wait_queue_is_set(wq_set)) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+retry:
+       s = splsched();
+       wqs_lock(wq_set);
+
+       /* remove the wait queues that are members of our set */
+       q = &wq_set->wqs_setlinks;
+
+       wql = (wait_queue_link_t)queue_first(q);
+       while (!queue_end(q, (queue_entry_t)wql)) {
+               WAIT_QUEUE_SET_LINK_CHECK(wq_set, wql);
+               wq = wql->wql_queue;
+               if (wait_queue_lock_try(wq)) {
+                       wait_queue_unlink_locked(wq, wq_set, wql);
+                       wait_queue_unlock(wq);
+                       enqueue(links, &wql->wql_links);
+                       wql = (wait_queue_link_t)queue_first(q);
+               } else {
+                       wqs_unlock(wq_set);
+                       splx(s);
+                       delay(1);
+                       goto retry;
+               }
+       }
+
+       /* remove this set from sets it belongs to */
+       wait_queue_unlink_all_nofree_locked(&wq_set->wqs_wait_queue, links);
+
+       wqs_unlock(wq_set);
+       splx(s);
+
+       return(KERN_SUCCESS);
+}      
+
  /*
   *     Routine:        wait_queue_set_unlink_all
   *     Purpose:
   *             Remove the linkage between a set wait queue and all its
- *             member wait queues. The link structures are freed for those
- *             links which were dynamically allocated.
+ *             member wait queues and all the sets it may be members of.
+ *             The link structures are freed for those links which were
+ *             dynamically allocated.
   *     Conditions:
   *             The wait queue must be a set
   */
@@ -811,6 +1068,7 @@ retry:
         s = splsched();
         wqs_lock(wq_set);
  
+       /* remove the wait queues that are members of our set */
         q = &wq_set->wqs_setlinks;
  
         wql = (wait_queue_link_t)queue_first(q);
@@ -833,6 +1091,11 @@ retry:
                         goto retry;
                 }
         }
+
+
+       /* remove this set from sets it belongs to */
+       wait_queue_unlink_all_locked(&wq_set->wqs_wait_queue, links);
+
         wqs_unlock(wq_set);
         splx(s);
  
@@ -843,6 +1106,48 @@ retry:
         return(KERN_SUCCESS);
  }      
  
+kern_return_t
+wait_queue_set_unlink_one(
+       wait_queue_set_t wq_set,
+       wait_queue_link_t wql)
+{
+       wait_queue_t wq;
+       spl_t s;
+
+       assert(wait_queue_is_set(wq_set));
+
+retry:
+       s = splsched();
+       wqs_lock(wq_set);
+
+       WAIT_QUEUE_SET_CHECK(wq_set);
+
+       /* Already unlinked, e.g. by selclearthread() */
+       if (wql->wql_type == WAIT_QUEUE_UNLINKED) {
+               goto out;
+       }
+
+       WAIT_QUEUE_SET_LINK_CHECK(wq_set, wql);
+
+       /* On a wait queue, and we hold set queue lock ... */
+       wq = wql->wql_queue;
+       if (wait_queue_lock_try(wq)) {
+               wait_queue_unlink_locked(wq, wq_set, wql);
+               wait_queue_unlock(wq);
+       } else {
+               wqs_unlock(wq_set);
+               splx(s);
+               delay(1);
+               goto retry;
+       }
+
+out:
+       wqs_unlock(wq_set);
+       splx(s);
+
+       return KERN_SUCCESS;
+}
+
  /*
   *     Routine:        wait_queue_assert_wait64_locked
   *     Purpose:
@@ -863,6 +1168,7 @@ wait_queue_assert_wait64_locked(
         thread_t thread)
  {
         wait_result_t wait_result;
+       boolean_t realtime;
  
         if (!wait_queue_assert_possible(thread))
                 panic("wait_queue_assert_wait64_locked");
@@ -873,7 +1179,17 @@ wait_queue_assert_wait64_locked(
                 if (event == NO_EVENT64 && wqs_is_preposted(wqs))
                         return(THREAD_AWAKENED);
         }
-         
+
+       /*
+        * Realtime threads get priority for wait queue placements.
+        * This allows wait_queue_wakeup_one to prefer a waiting
+        * realtime thread, similar in principle to performing
+        * a wait_queue_wakeup_all and allowing scheduler prioritization
+        * to run the realtime thread, but without causing the
+        * lock contention of that scenario.
+        */
+       realtime = (thread->sched_pri >= BASEPRI_REALTIME);
+
         /*
          * This is the extent to which we currently take scheduling attributes
          * into account.  If the thread is vm priviledged, we stick it at
@@ -882,7 +1198,9 @@ wait_queue_assert_wait64_locked(
          */
         wait_result = thread_mark_wait_locked(thread, interruptible);
         if (wait_result == THREAD_WAITING) {
-               if (!wq->wq_fifo || thread->options & TH_OPT_VMPRIV)
+               if (!wq->wq_fifo
+                       || (thread->options & TH_OPT_VMPRIV)
+                       || realtime)
                         enqueue_head(&wq->wq_queue, (queue_entry_t) thread);
                 else
                         enqueue_tail(&wq->wq_queue, (queue_entry_t) thread);
@@ -891,7 +1209,11 @@ wait_queue_assert_wait64_locked(
                 thread->wait_queue = wq;
  
                 if (deadline != 0) {
-                       if (!timer_call_enter(&thread->wait_timer, deadline))
+                       uint32_t flags;
+
+                       flags = realtime ? TIMER_CALL_CRITICAL : 0;
+
+                       if (!timer_call_enter(&thread->wait_timer, deadline, flags))
                                 thread->wait_timer_active++;
                         thread->wait_timer_is_set = TRUE;
                 }
@@ -1030,7 +1352,7 @@ _wait_queue_select64_all(
  
                         if (t->wait_event == event) {
                                 thread_lock(t);
-                               remqueue(q, (queue_entry_t) t);
+                               remqueue((queue_entry_t) t);
                                 enqueue (wake_queue, (queue_entry_t) t);
                                 t->wait_queue = WAIT_QUEUE_NULL;
                                 t->wait_event = NO_EVENT64;
@@ -1237,7 +1559,7 @@ _wait_queue_select64_one(
                         t = (thread_t)wq_element;
                         if (t->wait_event == event) {
                                 thread_lock(t);
-                               remqueue(q, (queue_entry_t) t);
+                               remqueue((queue_entry_t) t);
                                 t->wait_queue = WAIT_QUEUE_NULL;
                                 t->wait_event = NO_EVENT64;
                                 t->at_safe_point = FALSE;
@@ -1273,7 +1595,7 @@ wait_queue_pull_thread_locked(
  
         assert(thread->wait_queue == waitq);
  
-       remqueue(&waitq->wq_queue, (queue_entry_t)thread );
+       remqueue((queue_entry_t)thread );
         thread->wait_queue = WAIT_QUEUE_NULL;
         thread->wait_event = NO_EVENT64;
         thread->at_safe_point = FALSE;
@@ -1309,7 +1631,7 @@ _wait_queue_select64_thread(
  
         thread_lock(thread);
         if ((thread->wait_queue == wq) && (thread->wait_event == event)) {
-               remqueue(q, (queue_entry_t) thread);
+               remqueue((queue_entry_t) thread);
                 thread->at_safe_point = FALSE;
                 thread->wait_event = NO_EVENT64;
                 thread->wait_queue = WAIT_QUEUE_NULL;
@@ -1443,7 +1765,8 @@ kern_return_t
  wait_queue_wakeup_one(
         wait_queue_t wq,
         event_t event,
-       wait_result_t result)
+       wait_result_t result,
+       int priority)
  {
         thread_t thread;
         spl_t s;
@@ -1460,6 +1783,14 @@ wait_queue_wakeup_one(
         if (thread) {
                 kern_return_t res;
  
+               if (thread->sched_pri < priority) {
+                       if (priority <= MAXPRI) {
+                               set_sched_pri(thread, priority);
+
+                               thread->was_promoted_on_wakeup = 1;
+                               thread->sched_flags |= TH_SFLAG_PROMOTED;
+                       }
+               }
                 res = thread_go(thread, result);
                 assert(res == KERN_SUCCESS);
                 thread_unlock(thread);