]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/kern/thread_call.c
xnu-2050.7.9.tar.gz
[apple/xnu.git] / osfmk / kern / thread_call.c
index b69dcdd30efcae91807a5df3e261aeefecf72495..7d43919ae6cbb3610b92fa80cd5c518c7c1cd62c 100644 (file)
 /*
- * Copyright (c) 1993-1995, 1999-2000 Apple Computer, Inc.
- * All rights reserved.
+ * Copyright (c) 1993-1995, 1999-2008 Apple Inc. All rights reserved.
  *
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
  * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
  * 
- * @APPLE_LICENSE_HEADER_END@
- */
-/*
- * Thread-based callout module.
- *
- * HISTORY
- *
- * 10 July 1999 (debo)
- *  Pulled into Mac OS X (microkernel).
- *
- * 3 July 1993 (debo)
- *     Created.
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
  
 #include <mach/mach_types.h>
+#include <mach/thread_act.h>
 
+#include <kern/kern_types.h>
+#include <kern/zalloc.h>
 #include <kern/sched_prim.h>
 #include <kern/clock.h>
 #include <kern/task.h>
 #include <kern/thread.h>
+#include <kern/wait_queue.h>
+
+#include <vm/vm_pageout.h>
 
 #include <kern/thread_call.h>
 #include <kern/call_entry.h>
-
 #include <kern/timer_call.h>
 
-#define internal_call_num      768
+#include <libkern/OSAtomic.h>
 
-#define thread_call_thread_min 4
+#include <sys/kdebug.h>
 
-static
-thread_call_data_t
-       internal_call_storage[internal_call_num];
 
-decl_simple_lock_data(static,thread_call_lock)
+static zone_t                  thread_call_zone;
+static struct wait_queue       daemon_wqueue;
 
-static
-timer_call_data_t
-       thread_call_delayed_timer;
+struct thread_call_group {
+       queue_head_t            pending_queue;
+       uint32_t                pending_count;
 
-static
-queue_head_t
-       internal_call_free_queue,
-       pending_call_queue, delayed_call_queue;
+       queue_head_t            delayed_queue;
+       uint32_t                delayed_count;
 
-static
-struct wait_queue
-       call_thread_idle_queue;
+       timer_call_data_t       delayed_timer;
+       timer_call_data_t       dealloc_timer;
 
-static
-thread_t
-       activate_thread;
+       struct wait_queue       idle_wqueue;
+       uint32_t                idle_count, active_count;
 
-static
-boolean_t
-       activate_thread_awake;
-
-static struct {
-       int             pending_num,
-                       pending_hiwat;
-       int             active_num,
-                       active_hiwat,
-                       active_lowat;
-       int             delayed_num,
-                       delayed_hiwat;
-       int             idle_thread_num;
-       int             thread_num,
-                       thread_hiwat,
-                       thread_lowat;
-} thread_calls;
+       integer_t               pri;
+       uint32_t                target_thread_count;
+       uint64_t                idle_timestamp;
 
-static boolean_t
-       thread_call_initialized = FALSE;
+       uint32_t                flags;
+       sched_call_t            sched_call;
+};
 
-static __inline__ thread_call_t
-       _internal_call_allocate(void);
+typedef struct thread_call_group       *thread_call_group_t;
 
-static __inline__ void
-_internal_call_release(
-       thread_call_t           call
-);
+#define TCG_PARALLEL           0x01
+#define TCG_DEALLOC_ACTIVE     0x02
 
-static __inline__ void
-_pending_call_enqueue(
-       thread_call_t           call
-),
-_pending_call_dequeue(
-       thread_call_t           call
-),
-_delayed_call_enqueue(
-       thread_call_t           call
-),
-_delayed_call_dequeue(
-       thread_call_t           call
-);
+#define THREAD_CALL_GROUP_COUNT                4
+#define THREAD_CALL_THREAD_MIN         4
+#define INTERNAL_CALL_COUNT            768
+#define THREAD_CALL_DEALLOC_INTERVAL_NS (5 * 1000 * 1000) /* 5 ms */
+#define THREAD_CALL_ADD_RATIO          4
+#define THREAD_CALL_MACH_FACTOR_CAP    3
+
+static struct thread_call_group        thread_call_groups[THREAD_CALL_GROUP_COUNT];
+static boolean_t               thread_call_daemon_awake;
+static thread_call_data_t      internal_call_storage[INTERNAL_CALL_COUNT];
+static queue_head_t            thread_call_internal_queue;
+static uint64_t                thread_call_dealloc_interval_abs;
+
+static __inline__ thread_call_t        _internal_call_allocate(void);
+static __inline__ void         _internal_call_release(thread_call_t call);
+static __inline__ boolean_t    _pending_call_enqueue(thread_call_t call, thread_call_group_t group);
+static __inline__ boolean_t    _delayed_call_enqueue(thread_call_t call, thread_call_group_t group, uint64_t deadline);
+static __inline__ boolean_t    _call_dequeue(thread_call_t call, thread_call_group_t group);
+static __inline__ void         thread_call_wake(thread_call_group_t group);
+static __inline__ void         _set_delayed_call_timer(thread_call_t call, thread_call_group_t group);
+static boolean_t               _remove_from_pending_queue(thread_call_func_t func, thread_call_param_t param0, boolean_t remove_all);
+static boolean_t               _remove_from_delayed_queue(thread_call_func_t func, thread_call_param_t param0, boolean_t remove_all);
+static void                    thread_call_daemon(void *arg);
+static void                    thread_call_thread(thread_call_group_t group, wait_result_t wres);
+extern void                    thread_call_delayed_timer(timer_call_param_t p0, timer_call_param_t p1);
+static void                    thread_call_dealloc_timer(timer_call_param_t p0, timer_call_param_t p1);
+static void                    thread_call_group_setup(thread_call_group_t group, thread_call_priority_t pri, uint32_t target_thread_count, boolean_t parallel);
+static void                    sched_call_thread(int type, thread_t thread);
+static void                    thread_call_start_deallocate_timer(thread_call_group_t group);
+static void                    thread_call_wait_locked(thread_call_t call);
+
+#define qe(x)          ((queue_entry_t)(x))
+#define TC(x)          ((thread_call_t)(x))
+
+
+lck_grp_t               thread_call_queues_lck_grp;
+lck_grp_t               thread_call_lck_grp;
+lck_attr_t              thread_call_lck_attr;
+lck_grp_attr_t          thread_call_lck_grp_attr;
+
+#if defined(__i386__) || defined(__x86_64__)
+lck_mtx_t              thread_call_lock_data;
+#else
+lck_spin_t             thread_call_lock_data;
+#endif
+
+
+#define thread_call_lock_spin()                        \
+       lck_mtx_lock_spin_always(&thread_call_lock_data)
+
+#define thread_call_unlock()                   \
+       lck_mtx_unlock_always(&thread_call_lock_data)
+
+
+static inline spl_t
+disable_ints_and_lock(void)
+{
+       spl_t s;
+
+       s = splsched();
+       thread_call_lock_spin();
+
+       return s;
+}
+
+static inline void 
+enable_ints_and_unlock(void)
+{
+       thread_call_unlock();
+       (void)spllo();
+}
+
+
+static inline boolean_t
+group_isparallel(thread_call_group_t group)
+{
+       return ((group->flags & TCG_PARALLEL) != 0);
+}
 
-static void __inline__
-_set_delayed_call_timer(
-       thread_call_t           call
-);
-                                       
 static boolean_t
-_remove_from_pending_queue(
-       thread_call_func_t      func,
-       thread_call_param_t     param0,
-       boolean_t                       remove_all
-),
-_remove_from_delayed_queue(
-       thread_call_func_t      func,
-       thread_call_param_t     param0,
-       boolean_t                       remove_all
-);
+thread_call_group_should_add_thread(thread_call_group_t group) 
+{
+       uint32_t thread_count;
 
-static __inline__ void
-       _call_thread_wake(void);
+       if (!group_isparallel(group)) {
+               if (group->pending_count > 0 && group->active_count == 0) {
+                       return TRUE;
+               }
 
-static void
-       _call_thread(void),
-       _activate_thread(void);
+               return FALSE;
+       }
+
+       if (group->pending_count > 0) {
+               if (group->idle_count > 0) {
+                       panic("Pending work, but threads are idle?");
+               }
+
+               thread_count = group->active_count;
+
+               /*
+                * Add a thread if either there are no threads,
+                * the group has fewer than its target number of
+                * threads, or the amount of work is large relative
+                * to the number of threads.  In the last case, pay attention
+                * to the total load on the system, and back off if 
+         * it's high.
+                */
+               if ((thread_count == 0) ||
+                       (thread_count < group->target_thread_count) ||
+                       ((group->pending_count > THREAD_CALL_ADD_RATIO * thread_count) && 
+                        (sched_mach_factor < THREAD_CALL_MACH_FACTOR_CAP))) {
+                       return TRUE;
+               }
+       }
+                       
+       return FALSE;
+}
+
+static inline integer_t
+thread_call_priority_to_sched_pri(thread_call_priority_t pri) 
+{
+       switch (pri) {
+       case THREAD_CALL_PRIORITY_HIGH:
+               return BASEPRI_PREEMPT;
+       case THREAD_CALL_PRIORITY_KERNEL:
+               return BASEPRI_KERNEL;
+       case THREAD_CALL_PRIORITY_USER:
+               return BASEPRI_DEFAULT;
+       case THREAD_CALL_PRIORITY_LOW:
+               return DEPRESSPRI;
+       default:
+               panic("Invalid priority.");
+       }
+
+       return 0;
+}
+
+/* Lock held */
+static inline thread_call_group_t
+thread_call_get_group(
+               thread_call_t call)
+{
+       thread_call_priority_t  pri = call->tc_pri;
+
+       assert(pri == THREAD_CALL_PRIORITY_LOW ||
+                       pri == THREAD_CALL_PRIORITY_USER ||
+                       pri == THREAD_CALL_PRIORITY_KERNEL ||
+                       pri == THREAD_CALL_PRIORITY_HIGH);
+
+       return &thread_call_groups[pri];
+}
 
 static void
-_delayed_call_timer(
-       timer_call_param_t              p0,
-       timer_call_param_t              p1
-);
+thread_call_group_setup(
+               thread_call_group_t             group, 
+               thread_call_priority_t          pri,
+               uint32_t                        target_thread_count,
+               boolean_t                       parallel)
+{
+       queue_init(&group->pending_queue);
+       queue_init(&group->delayed_queue);
 
-#define qe(x)          ((queue_entry_t)(x))
-#define TC(x)          ((thread_call_t)(x))
+       timer_call_setup(&group->delayed_timer, thread_call_delayed_timer, group);
+       timer_call_setup(&group->dealloc_timer, thread_call_dealloc_timer, group);
+
+       wait_queue_init(&group->idle_wqueue, SYNC_POLICY_FIFO);
+
+       group->target_thread_count = target_thread_count;
+       group->pri = thread_call_priority_to_sched_pri(pri);
+
+       group->sched_call = sched_call_thread; 
+       if (parallel) {
+               group->flags |= TCG_PARALLEL;
+               group->sched_call = NULL;
+       } 
+}
 
 /*
- * Routine:    thread_call_initialize [public]
- *
- * Description:        Initialize this module, called
- *             early during system initialization.
- *
- * Preconditions:      None.
- *
- * Postconditions:     None.
+ * Simple wrapper for creating threads bound to 
+ * thread call groups.
  */
-
-void
-thread_call_initialize(void)
+static kern_return_t
+thread_call_thread_create(
+               thread_call_group_t             group)
 {
-    thread_call_t              call;
-       spl_t                           s;
+       thread_t thread;
+       kern_return_t result;
 
-    if (thread_call_initialized)
-       panic("thread_call_initialize");
-
-    simple_lock_init(&thread_call_lock, ETAP_MISC_TIMER);
+       result = kernel_thread_start_priority((thread_continue_t)thread_call_thread, group, group->pri, &thread);
+       if (result != KERN_SUCCESS) {
+               return result;
+       }
 
-       s = splsched();
-       simple_lock(&thread_call_lock);
+       if (group->pri < BASEPRI_PREEMPT) {
+               /*
+                * New style doesn't get to run to completion in 
+                * kernel if there are higher priority threads 
+                * available.
+                */
+               thread_set_eager_preempt(thread);
+       }
 
-    queue_init(&pending_call_queue);
-    queue_init(&delayed_call_queue);
+       thread_deallocate(thread);
+       return KERN_SUCCESS;
+}
 
-    queue_init(&internal_call_free_queue);
-    for (
-               call = internal_call_storage;
-                       call < &internal_call_storage[internal_call_num];
+/*
+ *     thread_call_initialize:
+ *
+ *     Initialize this module, called
+ *     early during system initialization.
+ */
+void
+thread_call_initialize(void)
+{
+       thread_call_t                   call;
+       kern_return_t                   result;
+       thread_t                        thread;
+       int                             i;
+
+       i = sizeof (thread_call_data_t);
+       thread_call_zone = zinit(i, 4096 * i, 16 * i, "thread_call");
+       zone_change(thread_call_zone, Z_CALLERACCT, FALSE);
+       zone_change(thread_call_zone, Z_NOENCRYPT, TRUE);
+
+       lck_attr_setdefault(&thread_call_lck_attr);
+       lck_grp_attr_setdefault(&thread_call_lck_grp_attr);
+       lck_grp_init(&thread_call_queues_lck_grp, "thread_call_queues", &thread_call_lck_grp_attr);
+       lck_grp_init(&thread_call_lck_grp, "thread_call", &thread_call_lck_grp_attr);
+
+#if defined(__i386__) || defined(__x86_64__)
+        lck_mtx_init(&thread_call_lock_data, &thread_call_lck_grp, &thread_call_lck_attr);
+#else
+        lck_spin_init(&thread_call_lock_data, &thread_call_lck_grp, &thread_call_lck_attr);
+#endif
+
+       nanotime_to_absolutetime(0, THREAD_CALL_DEALLOC_INTERVAL_NS, &thread_call_dealloc_interval_abs);
+       wait_queue_init(&daemon_wqueue, SYNC_POLICY_FIFO);
+
+       thread_call_group_setup(&thread_call_groups[THREAD_CALL_PRIORITY_LOW], THREAD_CALL_PRIORITY_LOW, 0, TRUE);
+       thread_call_group_setup(&thread_call_groups[THREAD_CALL_PRIORITY_USER], THREAD_CALL_PRIORITY_USER, 0, TRUE);
+       thread_call_group_setup(&thread_call_groups[THREAD_CALL_PRIORITY_KERNEL], THREAD_CALL_PRIORITY_KERNEL, 1, TRUE);
+       thread_call_group_setup(&thread_call_groups[THREAD_CALL_PRIORITY_HIGH], THREAD_CALL_PRIORITY_HIGH, THREAD_CALL_THREAD_MIN, FALSE);
+
+       disable_ints_and_lock();
+
+       queue_init(&thread_call_internal_queue);
+       for (
+                       call = internal_call_storage;
+                       call < &internal_call_storage[INTERNAL_CALL_COUNT];
                        call++) {
 
-               enqueue_tail(&internal_call_free_queue, qe(call));
-    }
-
-       timer_call_setup(&thread_call_delayed_timer, _delayed_call_timer, NULL);
+               enqueue_tail(&thread_call_internal_queue, qe(call));
+       }
 
-       wait_queue_init(&call_thread_idle_queue, SYNC_POLICY_FIFO);
-       thread_calls.thread_lowat = thread_call_thread_min;
+       thread_call_daemon_awake = TRUE;
 
-       activate_thread_awake = TRUE;
-    thread_call_initialized = TRUE;
+       enable_ints_and_unlock();
 
-       simple_unlock(&thread_call_lock);
-       splx(s);
+       result = kernel_thread_start_priority((thread_continue_t)thread_call_daemon, NULL, BASEPRI_PREEMPT + 1, &thread);
+       if (result != KERN_SUCCESS)
+               panic("thread_call_initialize");
 
-    activate_thread = kernel_thread_with_priority(
-                                                                               kernel_task, MAXPRI_KERNEL - 2,
-                                                                                               _activate_thread, TRUE, TRUE);
+       thread_deallocate(thread);
 }
 
 void
 thread_call_setup(
        thread_call_t                   call,
        thread_call_func_t              func,
-       thread_call_param_t             param0
-)
+       thread_call_param_t             param0)
 {
-       call_entry_setup(call, func, param0);
+       bzero(call, sizeof(*call));
+       call_entry_setup((call_entry_t)call, func, param0);
+       call->tc_pri = THREAD_CALL_PRIORITY_HIGH; /* Default priority */
 }
 
 /*
- * Routine:    _internal_call_allocate [private, inline]
+ *     _internal_call_allocate:
  *
- * Purpose:    Allocate an internal callout entry.
+ *     Allocate an internal callout entry.
  *
- * Preconditions:      thread_call_lock held.
- *
- * Postconditions:     None.
+ *     Called with thread_call_lock held.
  */
-
 static __inline__ thread_call_t
 _internal_call_allocate(void)
 {
     thread_call_t              call;
     
-    if (queue_empty(&internal_call_free_queue))
+    if (queue_empty(&thread_call_internal_queue))
        panic("_internal_call_allocate");
        
-    call = TC(dequeue_head(&internal_call_free_queue));
+    call = TC(dequeue_head(&thread_call_internal_queue));
     
     return (call);
 }
 
 /*
- * Routine:    _internal_call_release [private, inline]
+ *     _internal_call_release:
  *
- * Purpose:    Release an internal callout entry which
- *             is no longer pending (or delayed).
+ *     Release an internal callout entry which
+ *     is no longer pending (or delayed).
  *
- * Preconditions:      thread_call_lock held.
- *
- * Postconditions:     None.
+ *     Called with thread_call_lock held.
  */
-
-static __inline__
-void
+static __inline__ void
 _internal_call_release(
-    thread_call_t              call
-)
+    thread_call_t              call)
 {
     if (    call >= internal_call_storage                                              &&
-                   call < &internal_call_storage[internal_call_num]            )
-               enqueue_tail(&internal_call_free_queue, qe(call));
+                   call < &internal_call_storage[INTERNAL_CALL_COUNT]          )
+               enqueue_head(&thread_call_internal_queue, qe(call));
 }
 
 /*
- * Routine:    _pending_call_enqueue [private, inline]
+ *     _pending_call_enqueue:
  *
- * Purpose:    Place an entry at the end of the
- *             pending queue, to be executed soon.
+ *     Place an entry at the end of the
+ *     pending queue, to be executed soon.
  *
- * Preconditions:      thread_call_lock held.
+ *     Returns TRUE if the entry was already
+ *     on a queue.
  *
- * Postconditions:     None.
+ *     Called with thread_call_lock held.
  */
-
-static __inline__
-void
+static __inline__ boolean_t
 _pending_call_enqueue(
-    thread_call_t              call
-)
+    thread_call_t              call,
+       thread_call_group_t     group)
 {
-    enqueue_tail(&pending_call_queue, qe(call));
-       if (++thread_calls.pending_num > thread_calls.pending_hiwat)
-               thread_calls.pending_hiwat = thread_calls.pending_num;
+       queue_head_t            *old_queue;
 
-    call->state = PENDING;
-}
+       old_queue = call_entry_enqueue_tail(CE(call), &group->pending_queue);
 
-/*
- * Routine:    _pending_call_dequeue [private, inline]
- *
- * Purpose:    Remove an entry from the pending queue,
- *             effectively unscheduling it.
- *
- * Preconditions:      thread_call_lock held.
- *
- * Postconditions:     None.
- */
+       if (old_queue == NULL) {
+               call->tc_submit_count++;
+       }
 
-static __inline__
-void
-_pending_call_dequeue(
-    thread_call_t              call
-)
-{
-    (void)remque(qe(call));
-       thread_calls.pending_num--;
-    
-    call->state = IDLE;
+       group->pending_count++;
+
+       thread_call_wake(group);
+
+       return (old_queue != NULL);
 }
 
 /*
- * Routine:    _delayed_call_enqueue [private, inline]
+ *     _delayed_call_enqueue:
  *
- * Purpose:    Place an entry on the delayed queue,
- *             after existing entries with an earlier
- *             (or identical) deadline.
+ *     Place an entry on the delayed queue,
+ *     after existing entries with an earlier
+ *     (or identical) deadline.
  *
- * Preconditions:      thread_call_lock held.
+ *     Returns TRUE if the entry was already
+ *     on a queue.
  *
- * Postconditions:     None.
+ *     Called with thread_call_lock held.
  */
-
-static __inline__
-void
+static __inline__ boolean_t
 _delayed_call_enqueue(
-    thread_call_t              call
-)
+       thread_call_t           call,
+       thread_call_group_t     group,
+       uint64_t                deadline)
 {
-    thread_call_t              current;
-    
-    current = TC(queue_first(&delayed_call_queue));
-    
-    while (TRUE) {
-       if (    queue_end(&delayed_call_queue, qe(current))             ||
-                                       call->deadline < current->deadline                      ) {
-                       current = TC(queue_prev(qe(current)));
-                       break;
-               }
-           
-               current = TC(queue_next(qe(current)));
-    }
+       queue_head_t            *old_queue;
 
-    insque(qe(call), qe(current));
-       if (++thread_calls.delayed_num > thread_calls.delayed_hiwat)
-               thread_calls.delayed_hiwat = thread_calls.delayed_num;
-    
-    call->state = DELAYED;
+       old_queue = call_entry_enqueue_deadline(CE(call), &group->delayed_queue, deadline);
+
+       if (old_queue == &group->pending_queue)
+               group->pending_count--;
+       else if (old_queue == NULL) 
+               call->tc_submit_count++;
+
+       return (old_queue != NULL);
 }
 
 /*
- * Routine:    _delayed_call_dequeue [private, inline]
+ *     _call_dequeue:
  *
- * Purpose:    Remove an entry from the delayed queue,
- *             effectively unscheduling it.
+ *     Remove an entry from a queue.
  *
- * Preconditions:      thread_call_lock held.
+ *     Returns TRUE if the entry was on a queue.
  *
- * Postconditions:     None.
+ *     Called with thread_call_lock held.
  */
-
-static __inline__
-void
-_delayed_call_dequeue(
-    thread_call_t              call
-)
+static __inline__ boolean_t
+_call_dequeue(
+       thread_call_t           call,
+       thread_call_group_t     group)
 {
-    (void)remque(qe(call));
-       thread_calls.delayed_num--;
-    
-    call->state = IDLE;
+       queue_head_t            *old_queue;
+
+       old_queue = call_entry_dequeue(CE(call));
+
+       if (old_queue != NULL) {
+               call->tc_finish_count++;
+               if (old_queue == &group->pending_queue)
+                       group->pending_count--;
+       }
+
+       return (old_queue != NULL);
 }
 
 /*
- * Routine:    _set_delayed_call_timer [private]
+ *     _set_delayed_call_timer:
  *
- * Purpose:    Reset the timer so that it
- *             next expires when the entry is due.
+ *     Reset the timer so that it
+ *     next expires when the entry is due.
  *
- * Preconditions:      thread_call_lock held.
- *
- * Postconditions:     None.
+ *     Called with thread_call_lock held.
  */
-
 static __inline__ void
 _set_delayed_call_timer(
-    thread_call_t              call
-)
+    thread_call_t              call,
+       thread_call_group_t     group)
 {
-    timer_call_enter(&thread_call_delayed_timer, call->deadline);
+    timer_call_enter(&group->delayed_timer, call->tc_call.deadline, 0);
 }
 
 /*
- * Routine:    _remove_from_pending_queue [private]
+ *     _remove_from_pending_queue:
  *
- * Purpose:    Remove the first (or all) matching
- *             entries from the pending queue,
- *             effectively unscheduling them.
- *             Returns whether any matching entries
- *             were found.
+ *     Remove the first (or all) matching
+ *     entries from the pending queue.
  *
- * Preconditions:      thread_call_lock held.
+ *     Returns TRUE if any matching entries
+ *     were found.
  *
- * Postconditions:     None.
+ *     Called with thread_call_lock held.
  */
-
-static
-boolean_t
+static boolean_t
 _remove_from_pending_queue(
     thread_call_func_t         func,
     thread_call_param_t                param0,
-    boolean_t                          remove_all
-)
+    boolean_t                          remove_all)
 {
-       boolean_t                       call_removed = FALSE;
-       thread_call_t           call;
-    
-    call = TC(queue_first(&pending_call_queue));
-    
-    while (!queue_end(&pending_call_queue, qe(call))) {
-       if (    call->func == func                      &&
-                               call->param0 == param0                  ) {
+       boolean_t                               call_removed = FALSE;
+       thread_call_t                   call;
+       thread_call_group_t             group = &thread_call_groups[THREAD_CALL_PRIORITY_HIGH];
+
+       call = TC(queue_first(&group->pending_queue));
+
+       while (!queue_end(&group->pending_queue, qe(call))) {
+               if (call->tc_call.func == func &&
+                               call->tc_call.param0 == param0) {
                        thread_call_t   next = TC(queue_next(qe(call)));
-               
-                       _pending_call_dequeue(call);
+
+                       _call_dequeue(call, group);
 
                        _internal_call_release(call);
-           
+
                        call_removed = TRUE;
                        if (!remove_all)
                                break;
-               
+
                        call = next;
                }
                else    
                        call = TC(queue_next(qe(call)));
-    }
-    
-    return (call_removed);
+       }
+
+       return (call_removed);
 }
 
 /*
- * Routine:    _remove_from_delayed_queue [private]
+ *     _remove_from_delayed_queue:
  *
- * Purpose:    Remove the first (or all) matching
- *             entries from the delayed queue,
- *             effectively unscheduling them.
- *             Returns whether any matching entries
- *             were found.
+ *     Remove the first (or all) matching
+ *     entries from the delayed queue.
  *
- * Preconditions:      thread_call_lock held.
+ *     Returns TRUE if any matching entries
+ *     were found.
  *
- * Postconditions:     None.
+ *     Called with thread_call_lock held.
  */
-
-static
-boolean_t
+static boolean_t
 _remove_from_delayed_queue(
     thread_call_func_t         func,
     thread_call_param_t                param0,
-    boolean_t                          remove_all
-)
+    boolean_t                          remove_all)
 {
-    boolean_t                  call_removed = FALSE;
-    thread_call_t              call;
-    
-    call = TC(queue_first(&delayed_call_queue));
-    
-    while (!queue_end(&delayed_call_queue, qe(call))) {
-       if (    call->func == func                      &&
-                               call->param0 == param0                  ) {
+       boolean_t                       call_removed = FALSE;
+       thread_call_t                   call;
+       thread_call_group_t             group = &thread_call_groups[THREAD_CALL_PRIORITY_HIGH];
+
+       call = TC(queue_first(&group->delayed_queue));
+
+       while (!queue_end(&group->delayed_queue, qe(call))) {
+               if (call->tc_call.func == func  &&
+                               call->tc_call.param0 == param0) {
                        thread_call_t   next = TC(queue_next(qe(call)));
-               
-                       _delayed_call_dequeue(call);
-           
+
+                       _call_dequeue(call, group);
+
                        _internal_call_release(call);
-           
+
                        call_removed = TRUE;
                        if (!remove_all)
                                break;
-               
+
                        call = next;
                }
                else    
                        call = TC(queue_next(qe(call)));
-    }
-    
-    return (call_removed);
+       }
+
+       return (call_removed);
 }
 
+#ifndef        __LP64__
+
 /*
- * Routine:    thread_call_func [public]
- *
- * Purpose:    Schedule a function callout.
- *             Guarantees { function, argument }
- *             uniqueness if unique_call is TRUE.
+ *     thread_call_func:
  *
- * Preconditions:      Callable from an interrupt context
- *                                     below splsched.
+ *     Enqueue a function callout.
  *
- * Postconditions:     None.
+ *     Guarantees { function, argument }
+ *     uniqueness if unique_call is TRUE.
  */
-
 void
 thread_call_func(
     thread_call_func_t         func,
     thread_call_param_t                param,
-    boolean_t                          unique_call
-)
+    boolean_t                          unique_call)
 {
-    thread_call_t              call;
-    int                                        s;
-    
-    if (!thread_call_initialized)
-       panic("thread_call_func");
-       
-    s = splsched();
-    simple_lock(&thread_call_lock);
-    
-    call = TC(queue_first(&pending_call_queue));
-    
-       while (unique_call && !queue_end(&pending_call_queue, qe(call))) {
-       if (    call->func == func                      &&
-                               call->param0 == param                   ) {
+       thread_call_t           call;
+       thread_call_group_t     group = &thread_call_groups[THREAD_CALL_PRIORITY_HIGH];
+       spl_t                   s;
+
+       s = splsched();
+       thread_call_lock_spin();
+
+       call = TC(queue_first(&group->pending_queue));
+
+       while (unique_call && !queue_end(&group->pending_queue, qe(call))) {
+               if (call->tc_call.func == func && call->tc_call.param0 == param) {
                        break;
                }
-       
+
                call = TC(queue_next(qe(call)));
-    }
-    
-    if (!unique_call || queue_end(&pending_call_queue, qe(call))) {
+       }
+
+       if (!unique_call || queue_end(&group->pending_queue, qe(call))) {
                call = _internal_call_allocate();
-               call->func                      = func;
-               call->param0            = param;
-               call->param1            = 0;
-       
-               _pending_call_enqueue(call);
-               
-               if (thread_calls.active_num <= 0)
-                       _call_thread_wake();
-    }
+               call->tc_call.func      = func;
+               call->tc_call.param0    = param;
+               call->tc_call.param1    = NULL;
 
-       simple_unlock(&thread_call_lock);
-    splx(s);
+               _pending_call_enqueue(call, group);
+       }
+
+       thread_call_unlock();
+       splx(s);
 }
 
+#endif /* __LP64__ */
+
 /*
- * Routine:    thread_call_func_delayed [public]
+ *     thread_call_func_delayed:
  *
- * Purpose:    Schedule a function callout to
- *             occur at the stated time.
- *
- * Preconditions:      Callable from an interrupt context
- *                                     below splsched.
- *
- * Postconditions:     None.
+ *     Enqueue a function callout to
+ *     occur at the stated time.
  */
-
 void
 thread_call_func_delayed(
-    thread_call_func_t         func,
-    thread_call_param_t                param,
-    uint64_t                           deadline
-)
+               thread_call_func_t              func,
+               thread_call_param_t             param,
+               uint64_t                        deadline)
 {
-    thread_call_t              call;
-    int                                        s;
-    
-    if (!thread_call_initialized)
-       panic("thread_call_func_delayed");
+       thread_call_t           call;
+       thread_call_group_t     group = &thread_call_groups[THREAD_CALL_PRIORITY_HIGH];
+       spl_t                   s;
 
-    s = splsched();
-    simple_lock(&thread_call_lock);
-    
-    call = _internal_call_allocate();
-    call->func                 = func;
-    call->param0               = param;
-    call->param1               = 0;
-    call->deadline             = deadline;
-    
-    _delayed_call_enqueue(call);
-    
-    if (queue_first(&delayed_call_queue) == qe(call))
-       _set_delayed_call_timer(call);
-    
-    simple_unlock(&thread_call_lock);
-    splx(s);
+       s = splsched();
+       thread_call_lock_spin();
+
+       call = _internal_call_allocate();
+       call->tc_call.func      = func;
+       call->tc_call.param0    = param;
+       call->tc_call.param1    = 0;
+
+       _delayed_call_enqueue(call, group, deadline);
+
+       if (queue_first(&group->delayed_queue) == qe(call))
+               _set_delayed_call_timer(call, group);
+
+       thread_call_unlock();
+       splx(s);
 }
 
 /*
- * Routine:    thread_call_func_cancel [public]
+ *     thread_call_func_cancel:
  *
- * Purpose:    Unschedule a function callout.
- *             Removes one (or all)
- *             { function, argument }
- *             instance(s) from either (or both)
- *             the pending and the delayed queue,
- *             in that order.  Returns a boolean
- *             indicating whether any calls were
- *             cancelled.
+ *     Dequeue a function callout.
  *
- * Preconditions:      Callable from an interrupt context
- *                                     below splsched.
+ *     Removes one (or all) { function, argument }
+ *     instance(s) from either (or both)
+ *     the pending and the delayed queue,
+ *     in that order.
  *
- * Postconditions:     None.
+ *     Returns TRUE if any calls were cancelled.
  */
-
 boolean_t
 thread_call_func_cancel(
-    thread_call_func_t         func,
-    thread_call_param_t                param,
-    boolean_t                          cancel_all
-)
+               thread_call_func_t              func,
+               thread_call_param_t             param,
+               boolean_t                       cancel_all)
 {
-       boolean_t                       result;
-    int                                        s;
-    
-    s = splsched();
-    simple_lock(&thread_call_lock);
+       boolean_t       result;
+       spl_t           s;
+
+       s = splsched();
+       thread_call_lock_spin();
 
-    if (cancel_all)
+       if (cancel_all)
                result = _remove_from_pending_queue(func, param, cancel_all) |
-                                               _remove_from_delayed_queue(func, param, cancel_all);
+                       _remove_from_delayed_queue(func, param, cancel_all);
        else
                result = _remove_from_pending_queue(func, param, cancel_all) ||
-                                               _remove_from_delayed_queue(func, param, cancel_all);
-    
-    simple_unlock(&thread_call_lock);
-    splx(s);
+                       _remove_from_delayed_queue(func, param, cancel_all);
+
+       thread_call_unlock();
+       splx(s);
 
        return (result);
 }
 
 /*
- * Routine:    thread_call_allocate [public]
- *
- * Purpose:    Allocate an external callout
- *             entry.
- *
- * Preconditions:      None.
- *
- * Postconditions:     None.
+ * Allocate a thread call with a given priority.  Importances
+ * other than THREAD_CALL_PRIORITY_HIGH will be run in threads
+ * with eager preemption enabled (i.e. may be aggressively preempted
+ * by higher-priority threads which are not in the normal "urgent" bands).
  */
+thread_call_t
+thread_call_allocate_with_priority(
+               thread_call_func_t              func,
+               thread_call_param_t             param0,
+               thread_call_priority_t          pri)
+{
+       thread_call_t call;
+
+       if (pri > THREAD_CALL_PRIORITY_LOW) {
+               panic("Invalid pri: %d\n", pri);
+       }
+
+       call = thread_call_allocate(func, param0);
+       call->tc_pri = pri;
 
+       return call;
+}
+
+/*
+ *     thread_call_allocate:
+ *
+ *     Allocate a callout entry.
+ */
 thread_call_t
 thread_call_allocate(
-    thread_call_func_t         func,
-    thread_call_param_t                param0
-)
+               thread_call_func_t              func,
+               thread_call_param_t             param0)
 {
-    thread_call_t              call = (void *)kalloc(sizeof (thread_call_data_t));
-    
-    call->func                 = func;
-    call->param0               = param0;
-    call->state                        = IDLE;
-    
-    return (call);
+       thread_call_t   call = zalloc(thread_call_zone);
+
+       thread_call_setup(call, func, param0);
+       call->tc_refs = 1;
+       call->tc_flags = THREAD_CALL_ALLOC;
+
+       return (call);
 }
 
 /*
- * Routine:    thread_call_free [public]
+ *     thread_call_free:
  *
- * Purpose:    Free an external callout
- *             entry.
- *
- * Preconditions:      None.
- *
- * Postconditions:     None.
+ *     Release a callout.  If the callout is currently
+ *     executing, it will be freed when all invocations
+ *     finish.
  */
-
 boolean_t
 thread_call_free(
-    thread_call_t              call
-)
+               thread_call_t           call)
 {
-    int                        s;
-    
-    s = splsched();
-    simple_lock(&thread_call_lock);
-    
-    if (call->state != IDLE) {
-       simple_unlock(&thread_call_lock);
+       spl_t   s;
+       int32_t refs;
+
+       s = splsched();
+       thread_call_lock_spin();
+
+       if (call->tc_call.queue != NULL) {
+               thread_call_unlock();
                splx(s);
 
                return (FALSE);
-    }
-    
-    simple_unlock(&thread_call_lock);
-    splx(s);
-    
-    kfree((vm_offset_t)call, sizeof (thread_call_data_t));
+       }
+
+       refs = --call->tc_refs;
+       if (refs < 0) {
+               panic("Refcount negative: %d\n", refs);
+       }       
+
+       thread_call_unlock();
+       splx(s);
+
+       if (refs == 0) {
+               zfree(thread_call_zone, call);
+       }
 
        return (TRUE);
 }
 
 /*
- * Routine:    thread_call_enter [public]
- *
- * Purpose:    Schedule an external callout 
- *             entry to occur "soon".  Returns a
- *             boolean indicating whether the call
- *             had been already scheduled.
+ *     thread_call_enter:
  *
- * Preconditions:      Callable from an interrupt context
- *                                     below splsched.
+ *     Enqueue a callout entry to occur "soon".
  *
- * Postconditions:     None.
+ *     Returns TRUE if the call was
+ *     already on a queue.
  */
-
 boolean_t
 thread_call_enter(
-    thread_call_t              call
-)
+               thread_call_t           call)
 {
        boolean_t               result = TRUE;
-    int                                s;
-    
-    s = splsched();
-    simple_lock(&thread_call_lock);
-    
-    if (call->state != PENDING) {
-               if (call->state == DELAYED)
-                       _delayed_call_dequeue(call);
-               else if (call->state == IDLE)
-                       result = FALSE;
+       thread_call_group_t     group;
+       spl_t                   s;
 
-       _pending_call_enqueue(call);
-               
-               if (thread_calls.active_num <= 0)
-                       _call_thread_wake();
+       group = thread_call_get_group(call);
+
+       s = splsched();
+       thread_call_lock_spin();
+
+       if (call->tc_call.queue != &group->pending_queue) {
+               result = _pending_call_enqueue(call, group);
        }
 
-       call->param1 = 0;
+       call->tc_call.param1 = 0;
 
-       simple_unlock(&thread_call_lock);
-    splx(s);
+       thread_call_unlock();
+       splx(s);
 
        return (result);
 }
 
 boolean_t
 thread_call_enter1(
-    thread_call_t                      call,
-    thread_call_param_t                param1
-)
+               thread_call_t                   call,
+               thread_call_param_t             param1)
 {
-       boolean_t                       result = TRUE;
-    int                                        s;
-    
-    s = splsched();
-    simple_lock(&thread_call_lock);
-    
-    if (call->state != PENDING) {
-               if (call->state == DELAYED)
-                       _delayed_call_dequeue(call);
-               else if (call->state == IDLE)
-                       result = FALSE;
+       boolean_t               result = TRUE;
+       thread_call_group_t     group;
+       spl_t                   s;
 
-       _pending_call_enqueue(call);
+       group = thread_call_get_group(call);
 
-               if (thread_calls.active_num <= 0)
-                       _call_thread_wake();
-    }
+       s = splsched();
+       thread_call_lock_spin();
+
+       if (call->tc_call.queue != &group->pending_queue) {
+               result = _pending_call_enqueue(call, group);
+       }
 
-       call->param1 = param1;
+       call->tc_call.param1 = param1;
 
-       simple_unlock(&thread_call_lock);
-    splx(s);
+       thread_call_unlock();
+       splx(s);
 
        return (result);
 }
 
 /*
- * Routine:    thread_call_enter_delayed [public]
- *
- * Purpose:    Schedule an external callout 
- *             entry to occur at the stated time.
- *             Returns a boolean indicating whether
- *             the call had been already scheduled.
+ *     thread_call_enter_delayed:
  *
- * Preconditions:      Callable from an interrupt context
- *                                     below splsched.
+ *     Enqueue a callout entry to occur
+ *     at the stated time.
  *
- * Postconditions:     None.
+ *     Returns TRUE if the call was
+ *     already on a queue.
  */
-
 boolean_t
 thread_call_enter_delayed(
-    thread_call_t              call,
-    uint64_t                   deadline
-)
+               thread_call_t           call,
+               uint64_t                        deadline)
 {
        boolean_t               result = TRUE;
-    int                                s;
-
-    s = splsched();
-    simple_lock(&thread_call_lock);
+       thread_call_group_t     group;
+       spl_t                   s;
+       
+       group = thread_call_get_group(call);
 
-       if (call->state == PENDING)
-               _pending_call_dequeue(call);
-       else if (call->state == DELAYED)
-               _delayed_call_dequeue(call);
-       else if (call->state == IDLE)
-               result = FALSE;
+       s = splsched();
+       thread_call_lock_spin();
 
-       call->param1    = 0;
-       call->deadline  = deadline;
+       result = _delayed_call_enqueue(call, group, deadline);
 
-       _delayed_call_enqueue(call);
+       if (queue_first(&group->delayed_queue) == qe(call))
+               _set_delayed_call_timer(call, group);
 
-       if (queue_first(&delayed_call_queue) == qe(call))
-               _set_delayed_call_timer(call);
+       call->tc_call.param1 = 0;
 
-    simple_unlock(&thread_call_lock);
-    splx(s);
+       thread_call_unlock();
+       splx(s);
 
        return (result);
 }
 
 boolean_t
 thread_call_enter1_delayed(
-    thread_call_t                      call,
-    thread_call_param_t                param1,
-    uint64_t                           deadline
-)
+               thread_call_t                   call,
+               thread_call_param_t             param1,
+               uint64_t                        deadline)
 {
-       boolean_t                       result = TRUE;
-    int                                        s;
+       boolean_t               result = TRUE;
+       thread_call_group_t     group;
+       spl_t                   s;
 
-    s = splsched();
-    simple_lock(&thread_call_lock);
+       group = thread_call_get_group(call);
 
-       if (call->state == PENDING)
-               _pending_call_dequeue(call);
-       else if (call->state == DELAYED)
-               _delayed_call_dequeue(call);
-       else if (call->state == IDLE)
-               result = FALSE;
+       s = splsched();
+       thread_call_lock_spin();
 
-       call->param1    = param1;
-       call->deadline  = deadline;
+       result = _delayed_call_enqueue(call, group, deadline);
 
-       _delayed_call_enqueue(call);
+       if (queue_first(&group->delayed_queue) == qe(call))
+               _set_delayed_call_timer(call, group);
 
-       if (queue_first(&delayed_call_queue) == qe(call))
-               _set_delayed_call_timer(call);
+       call->tc_call.param1 = param1;
 
-    simple_unlock(&thread_call_lock);
-    splx(s);
+       thread_call_unlock();
+       splx(s);
 
        return (result);
 }
 
 /*
- * Routine:    thread_call_cancel [public]
- *
- * Purpose:    Unschedule a callout entry.
- *             Returns a boolean indicating
- *             whether the call had actually
- *             been scheduled.
+ *     thread_call_cancel:
  *
- * Preconditions:      Callable from an interrupt context
- *                                     below splsched.
+ *     Dequeue a callout entry.
  *
- * Postconditions:     None.
+ *     Returns TRUE if the call was
+ *     on a queue.
  */
-
 boolean_t
 thread_call_cancel(
-    thread_call_t              call
-)
+               thread_call_t           call)
 {
-       boolean_t               result = TRUE;
-    int                                s;
-    
-    s = splsched();
-    simple_lock(&thread_call_lock);
-    
-    if (call->state == PENDING)
-       _pending_call_dequeue(call);
-    else if (call->state == DELAYED)
-       _delayed_call_dequeue(call);
-    else
-       result = FALSE;
-       
-    simple_unlock(&thread_call_lock);
-    splx(s);
+       boolean_t               result;
+       thread_call_group_t     group;
+       spl_t                   s;
+
+       group = thread_call_get_group(call);
+
+       s = splsched();
+       thread_call_lock_spin();
+
+       result = _call_dequeue(call, group);
+
+       thread_call_unlock();
+       splx(s);
 
        return (result);
 }
 
 /*
- * Routine:    thread_call_is_delayed [public]
- *
- * Purpose:    Returns a boolean indicating
- *             whether a call is currently scheduled
- *             to occur at a later time.  Optionally
- *             returns the expiration time.
+ * Cancel a thread call.  If it cannot be cancelled (i.e.
+ * is already in flight), waits for the most recent invocation
+ * to finish.  Note that if clients re-submit this thread call,
+ * it may still be pending or in flight when thread_call_cancel_wait
+ * returns, but all requests to execute this work item prior
+ * to the call to thread_call_cancel_wait will have finished.
+ */
+boolean_t
+thread_call_cancel_wait(
+               thread_call_t           call)
+{
+       boolean_t               result;
+       thread_call_group_t     group;
+
+       if ((call->tc_flags & THREAD_CALL_ALLOC) == 0) {
+               panic("%s: Can't wait on thread call whose storage I don't own.", __FUNCTION__);
+       }
+
+       group = thread_call_get_group(call);
+
+       (void) splsched();
+       thread_call_lock_spin();
+
+       result = _call_dequeue(call, group);
+       if (result == FALSE) {
+               thread_call_wait_locked(call);
+       }
+
+       thread_call_unlock();
+       (void) spllo();
+
+       return result;
+}
+
+
+#ifndef        __LP64__
+
+/*
+ *     thread_call_is_delayed:
  *
- * Preconditions:      Callable from an interrupt context
- *                                     below splsched.
+ *     Returns TRUE if the call is
+ *     currently on a delayed queue.
  *
- * Postconditions:     None.
+ *     Optionally returns the expiration time.
  */
-
 boolean_t
 thread_call_is_delayed(
        thread_call_t           call,
        uint64_t                        *deadline)
 {
-       boolean_t               result = FALSE;
-       int                             s;
+       boolean_t                       result = FALSE;
+       thread_call_group_t             group;
+       spl_t                           s;
+
+       group = thread_call_get_group(call);
 
        s = splsched();
-       simple_lock(&thread_call_lock);
+       thread_call_lock_spin();
 
-       if (call->state == DELAYED) {
+       if (call->tc_call.queue == &group->delayed_queue) {
                if (deadline != NULL)
-                       *deadline = call->deadline;
+                       *deadline = call->tc_call.deadline;
                result = TRUE;
        }
 
-       simple_unlock(&thread_call_lock);
+       thread_call_unlock();
        splx(s);
 
        return (result);
 }
 
+#endif /* __LP64__ */
+
 /*
- * Routine:    _call_thread_wake [private, inline]
+ *     thread_call_wake:
  *
- * Purpose:    Wake a callout thread to service
- *             pending callout entries.  May wake
- *             the activate thread in order to
- *             create additional callout threads.
+ *     Wake a call thread to service
+ *     pending call entries.  May wake
+ *     the daemon thread in order to
+ *     create additional call threads.
  *
- * Preconditions:      thread_call_lock held.
+ *     Called with thread_call_lock held.
  *
- * Postconditions:     None.
+ *     For high-priority group, only does wakeup/creation if there are no threads
+ *     running.
  */
-
-static __inline__
-void
-_call_thread_wake(void)
+static __inline__ void
+thread_call_wake(
+       thread_call_group_t             group)
 {
-       if (wait_queue_wakeup_one(
-                                       &call_thread_idle_queue, &call_thread_idle_queue,
-                                                                               THREAD_AWAKENED) == KERN_SUCCESS) {
-               thread_calls.idle_thread_num--;
-
-               if (++thread_calls.active_num > thread_calls.active_hiwat)
-                       thread_calls.active_hiwat = thread_calls.active_num;
-       }
-       else
-       if (!activate_thread_awake) {
-               clear_wait(activate_thread, THREAD_AWAKENED);
-               activate_thread_awake = TRUE;
+       /* 
+        * New behavior: use threads if you've got 'em.
+        * Traditional behavior: wake only if no threads running.
+        */
+       if (group_isparallel(group) || group->active_count == 0) {
+               if (wait_queue_wakeup_one(&group->idle_wqueue, NO_EVENT, THREAD_AWAKENED, -1) == KERN_SUCCESS) {
+                       group->idle_count--; group->active_count++;
+
+                       if (group->idle_count == 0) {
+                               timer_call_cancel(&group->dealloc_timer);
+                               group->flags &= TCG_DEALLOC_ACTIVE;
+                       }
+               } else {
+                       if (!thread_call_daemon_awake && thread_call_group_should_add_thread(group)) {
+                               thread_call_daemon_awake = TRUE;
+                               wait_queue_wakeup_one(&daemon_wqueue, NO_EVENT, THREAD_AWAKENED, -1);
+                       }
+               }
        }
 }
 
 /*
- * Routine:    call_thread_block [private]
- *
- * Purpose:    Hook via thread dispatch on
- *             the occasion of a callout blocking.
- *
- * Preconditions:      splsched.
+ *     sched_call_thread:
  *
- * Postconditions:     None.
+ *     Call out invoked by the scheduler.  Used only for high-priority
+ *     thread call group.
  */
-
-void
-call_thread_block(void)
+static void
+sched_call_thread(
+               int                             type,
+               __unused        thread_t                thread)
 {
-       simple_lock(&thread_call_lock);
+       thread_call_group_t             group;
 
-       if (--thread_calls.active_num < thread_calls.active_lowat)
-               thread_calls.active_lowat = thread_calls.active_num;
+       group = &thread_call_groups[THREAD_CALL_PRIORITY_HIGH]; /* XXX */
 
-       if (    thread_calls.active_num <= 0    &&
-                       thread_calls.pending_num > 0            )
-               _call_thread_wake();
+       thread_call_lock_spin();
 
-       simple_unlock(&thread_call_lock);
+       switch (type) {
+
+               case SCHED_CALL_BLOCK:
+                       --group->active_count;
+                       if (group->pending_count > 0)
+                               thread_call_wake(group);
+                       break;
+
+               case SCHED_CALL_UNBLOCK:
+                       group->active_count++;
+                       break;
+       }
+
+       thread_call_unlock();
 }
 
-/*
- * Routine:    call_thread_unblock [private]
- *
- * Purpose:    Hook via thread wakeup on
- *             the occasion of a callout unblocking.
- *
- * Preconditions:      splsched.
- *
- * Postconditions:     None.
+/* 
+ * Interrupts disabled, lock held; returns the same way. 
+ * Only called on thread calls whose storage we own.  Wakes up
+ * anyone who might be waiting on this work item and frees it
+ * if the client has so requested.
  */
-
-void
-call_thread_unblock(void)
+static void
+thread_call_finish(thread_call_t call)
 {
-       simple_lock(&thread_call_lock);
+       boolean_t dowake = FALSE;
+
+       call->tc_finish_count++;
+       call->tc_refs--;
+
+       if ((call->tc_flags & THREAD_CALL_WAIT) != 0) {
+               dowake = TRUE;
+               call->tc_flags &= ~THREAD_CALL_WAIT;
+
+               /* 
+                * Dropping lock here because the sched call for the 
+                * high-pri group can take the big lock from under
+                * a thread lock.
+                */
+               thread_call_unlock();
+               thread_wakeup((event_t)call);
+               thread_call_lock_spin();
+       }
 
-       if (++thread_calls.active_num > thread_calls.active_hiwat)
-               thread_calls.active_hiwat = thread_calls.active_num;
+       if (call->tc_refs == 0) {
+               if (dowake) {
+                       panic("Someone waiting on a thread call that is scheduled for free: %p\n", call->tc_call.func);
+               }
+
+               enable_ints_and_unlock();
+
+               zfree(thread_call_zone, call);
+
+               (void)disable_ints_and_lock();
+       }
 
-       simple_unlock(&thread_call_lock);
 }
 
 /*
- * Routine:    _call_thread [private]
- *
- * Purpose:    Executed by a callout thread.
- *
- * Preconditions:      None.
- *
- * Postconditions:     None.
+ *     thread_call_thread:
  */
-
-static
-void
-_call_thread_continue(void)
+static void
+thread_call_thread(
+               thread_call_group_t             group,
+               wait_result_t                   wres)
 {
-       thread_t                self = current_thread();
+       thread_t        self = current_thread();
+       boolean_t       canwait;
 
-    (void) splsched();
-    simple_lock(&thread_call_lock);
+       /*
+        * A wakeup with THREAD_INTERRUPTED indicates that 
+        * we should terminate.
+        */
+       if (wres == THREAD_INTERRUPTED) {
+               thread_terminate(self);
 
-       self->active_callout = TRUE;
+               /* NOTREACHED */
+               panic("thread_terminate() returned?");
+       }
 
-    while (thread_calls.pending_num > 0) {
+       (void)disable_ints_and_lock();
+
+       thread_sched_call(self, group->sched_call);
+
+       while (group->pending_count > 0) {
                thread_call_t                   call;
                thread_call_func_t              func;
                thread_call_param_t             param0, param1;
 
-               call = TC(dequeue_head(&pending_call_queue));
-               thread_calls.pending_num--;
+               call = TC(dequeue_head(&group->pending_queue));
+               group->pending_count--;
 
-               func = call->func;
-               param0 = call->param0;
-               param1 = call->param1;
-       
-               call->state = IDLE;
+               func = call->tc_call.func;
+               param0 = call->tc_call.param0;
+               param1 = call->tc_call.param1;
+
+               call->tc_call.queue = NULL;
 
                _internal_call_release(call);
 
-               simple_unlock(&thread_call_lock);
-               (void) spllo();
+               /*
+                * Can only do wakeups for thread calls whose storage
+                * we control.
+                */
+               if ((call->tc_flags & THREAD_CALL_ALLOC) != 0) {
+                       canwait = TRUE;
+                       call->tc_refs++;        /* Delay free until we're done */
+               } else
+                       canwait = FALSE;
+
+               enable_ints_and_unlock();
+
+               KERNEL_DEBUG_CONSTANT(
+                               MACHDBG_CODE(DBG_MACH_SCHED,MACH_CALLOUT) | DBG_FUNC_NONE,
+                               VM_KERNEL_UNSLIDE(func), param0, param1, 0, 0);
 
                (*func)(param0, param1);
 
-               (void)thread_funnel_set(self->funnel_lock, FALSE);
+               if (get_preemption_level() != 0) {
+                       int pl = get_preemption_level();
+                       panic("thread_call_thread: preemption_level %d, last callout %p(%p, %p)",
+                                       pl, (void *)VM_KERNEL_UNSLIDE(func), param0, param1);
+               }
 
-               (void) splsched();
-               simple_lock(&thread_call_lock);
-    }
+               (void)thread_funnel_set(self->funnel_lock, FALSE);              /* XXX */
 
-       self->active_callout = FALSE;
+               (void) disable_ints_and_lock();
+               
+               if (canwait) {
+                       /* Frees if so desired */
+                       thread_call_finish(call);
+               }
+       }
 
-       if (--thread_calls.active_num < thread_calls.active_lowat)
-               thread_calls.active_lowat = thread_calls.active_num;
-       
-    if (thread_calls.idle_thread_num < thread_calls.thread_lowat) {
-               thread_calls.idle_thread_num++;
+       thread_sched_call(self, NULL);
+       group->active_count--;
 
-               wait_queue_assert_wait(
-                                       &call_thread_idle_queue, &call_thread_idle_queue,
-                                                                                                               THREAD_INTERRUPTIBLE);
-       
-               simple_unlock(&thread_call_lock);
-               (void) spllo();
+       if (group_isparallel(group)) {
+               /*
+                * For new style of thread group, thread always blocks. 
+                * If we have more than the target number of threads,
+                * and this is the first to block, and it isn't active 
+                * already, set a timer for deallocating a thread if we 
+                * continue to have a surplus.
+                */
+               group->idle_count++;
 
-               thread_block(_call_thread_continue);
-               /* NOTREACHED */
-    }
-    
-    thread_calls.thread_num--;
-    
-    simple_unlock(&thread_call_lock);
-    (void) spllo();
-    
-    (void) thread_terminate(self->top_act);
-       /* NOTREACHED */
-}
+               if (group->idle_count == 1) {
+                       group->idle_timestamp = mach_absolute_time();
+               }   
 
-static
-void
-_call_thread(void)
-{
-       thread_t                                        self = current_thread();
+               if (((group->flags & TCG_DEALLOC_ACTIVE) == 0) &&
+                               ((group->active_count + group->idle_count) > group->target_thread_count)) {
+                       group->flags |= TCG_DEALLOC_ACTIVE;
+                       thread_call_start_deallocate_timer(group);
+               }   
+
+               /* Wait for more work (or termination) */
+               wres = wait_queue_assert_wait(&group->idle_wqueue, NO_EVENT, THREAD_INTERRUPTIBLE, 0); 
+               if (wres != THREAD_WAITING) {
+                       panic("kcall worker unable to assert wait?");
+               }   
+
+               enable_ints_and_unlock();
+
+               thread_block_parameter((thread_continue_t)thread_call_thread, group);
+       } else {
+               if (group->idle_count < group->target_thread_count) {
+                       group->idle_count++;
 
-    stack_privilege(self);
+                       wait_queue_assert_wait(&group->idle_wqueue, NO_EVENT, THREAD_UNINT, 0); /* Interrupted means to exit */
 
-    _call_thread_continue();
-    /* NOTREACHED */
+                       enable_ints_and_unlock();
+
+                       thread_block_parameter((thread_continue_t)thread_call_thread, group);
+                       /* NOTREACHED */
+               }
+       }
+
+       enable_ints_and_unlock();
+
+       thread_terminate(self);
+       /* NOTREACHED */
 }
 
 /*
- * Routine:    _activate_thread [private]
- *
- * Purpose:    Executed by the activate thread.
- *
- * Preconditions:      None.
- *
- * Postconditions:     Never terminates.
+ *     thread_call_daemon: walk list of groups, allocating
+ *     threads if appropriate (as determined by 
+ *     thread_call_group_should_add_thread()).  
  */
-
-static
-void
-_activate_thread_continue(void)
+static void
+thread_call_daemon_continue(__unused void *arg)
 {
-    (void) splsched();
-    simple_lock(&thread_call_lock);
-        
-       while (         thread_calls.active_num <= 0    &&
-                               thread_calls.pending_num > 0            ) {
+       int             i;
+       kern_return_t   kr;
+       thread_call_group_t group;
+
+       (void)disable_ints_and_lock();
+
+       /* Starting at zero happens to be high-priority first. */
+       for (i = 0; i < THREAD_CALL_GROUP_COUNT; i++) {
+               group = &thread_call_groups[i];
+               while (thread_call_group_should_add_thread(group)) {
+                       group->active_count++;
+
+                       enable_ints_and_unlock();
+
+                       kr = thread_call_thread_create(group);
+                       if (kr != KERN_SUCCESS) {
+                               /*
+                                * On failure, just pause for a moment and give up. 
+                                * We can try again later.
+                                */
+                               delay(10000); /* 10 ms */
+                               (void)disable_ints_and_lock();
+                               goto out;
+                       }
+
+                       (void)disable_ints_and_lock();
+               }
+       }
 
-               if (++thread_calls.active_num > thread_calls.active_hiwat)
-                       thread_calls.active_hiwat = thread_calls.active_num;
+out:
+       thread_call_daemon_awake = FALSE;
+       wait_queue_assert_wait(&daemon_wqueue, NO_EVENT, THREAD_UNINT, 0);
 
-               if (++thread_calls.thread_num > thread_calls.thread_hiwat)
-                       thread_calls.thread_hiwat = thread_calls.thread_num;
+       enable_ints_and_unlock();
 
-               simple_unlock(&thread_call_lock);
-               (void) spllo();
-       
-               (void) kernel_thread_with_priority(
-                                                                       kernel_task, MAXPRI_KERNEL - 1,
-                                                                                                       _call_thread, TRUE, TRUE);
-               (void) splsched();
-               simple_lock(&thread_call_lock);
-    }
-               
-    assert_wait(&activate_thread_awake, THREAD_INTERRUPTIBLE);
-       activate_thread_awake = FALSE;
-    
-    simple_unlock(&thread_call_lock);
-       (void) spllo();
-    
-       thread_block(_activate_thread_continue);
+       thread_block_parameter((thread_continue_t)thread_call_daemon_continue, NULL);
        /* NOTREACHED */
 }
 
-static
-void
-_activate_thread(void)
+static void
+thread_call_daemon(
+               __unused void    *arg)
 {
-       thread_t                self = current_thread();
+       thread_t        self = current_thread();
 
-       self->vm_privilege = TRUE;
+       self->options |= TH_OPT_VMPRIV;
        vm_page_free_reserve(2);        /* XXX */
-    stack_privilege(self);
-    
-    _activate_thread_continue();
-    /* NOTREACHED */
+
+       thread_call_daemon_continue(NULL);
+       /* NOTREACHED */
+}
+
+/*
+ * Schedule timer to deallocate a worker thread if we have a surplus 
+ * of threads (in excess of the group's target) and at least one thread
+ * is idle the whole time.
+ */
+static void
+thread_call_start_deallocate_timer(
+               thread_call_group_t group)
+{
+        uint64_t deadline;
+        boolean_t onqueue;
+
+       assert(group->idle_count > 0);
+
+        group->flags |= TCG_DEALLOC_ACTIVE;
+        deadline = group->idle_timestamp + thread_call_dealloc_interval_abs;
+        onqueue = timer_call_enter(&group->dealloc_timer, deadline, 0); 
+
+        if (onqueue) {
+                panic("Deallocate timer already active?");
+        }   
 }
 
-static
 void
-_delayed_call_timer(
-       timer_call_param_t              p0,
-       timer_call_param_t              p1
+thread_call_delayed_timer(
+               timer_call_param_t              p0,
+               __unused timer_call_param_t     p1
 )
 {
-       uint64_t                        timestamp;
-    thread_call_t              call;
-       boolean_t                       new_pending = FALSE;
-    int                                        s;
+       thread_call_t                   call;
+       thread_call_group_t             group = p0;
+       uint64_t                                timestamp;
 
-    s = splsched();
-    simple_lock(&thread_call_lock);
+       thread_call_lock_spin();
 
-       clock_get_uptime(&timestamp);
-    
-    call = TC(queue_first(&delayed_call_queue));
-    
-    while (!queue_end(&delayed_call_queue, qe(call))) {
-       if (call->deadline <= timestamp) {
-                       _delayed_call_dequeue(call);
+       timestamp = mach_absolute_time();
+
+       call = TC(queue_first(&group->delayed_queue));
 
-                       _pending_call_enqueue(call);
-                       new_pending = TRUE;
+       while (!queue_end(&group->delayed_queue, qe(call))) {
+               if (call->tc_call.deadline <= timestamp) {
+                       _pending_call_enqueue(call, group);
                }
                else
                        break;
-           
-               call = TC(queue_first(&delayed_call_queue));
-    }
 
-       if (!queue_end(&delayed_call_queue, qe(call)))
-               _set_delayed_call_timer(call);
+               call = TC(queue_first(&group->delayed_queue));
+       }
+
+       if (!queue_end(&group->delayed_queue, qe(call)))
+               _set_delayed_call_timer(call, group);
+
+       thread_call_unlock();
+}
+
+/*
+ * Timer callback to tell a thread to terminate if
+ * we have an excess of threads and at least one has been
+ * idle for a long time.
+ */
+static void
+thread_call_dealloc_timer(
+               timer_call_param_t              p0,
+               __unused timer_call_param_t     p1)
+{
+       thread_call_group_t group = (thread_call_group_t)p0;
+       uint64_t now;
+       kern_return_t res;
+       boolean_t terminated = FALSE;
+       
+       thread_call_lock_spin();
+
+       now = mach_absolute_time();
+       if (group->idle_count > 0) {
+               if (now > group->idle_timestamp + thread_call_dealloc_interval_abs) {
+                       terminated = TRUE;
+                       group->idle_count--;
+                       res = wait_queue_wakeup_one(&group->idle_wqueue, NO_EVENT, THREAD_INTERRUPTED, -1);
+                       if (res != KERN_SUCCESS) {
+                               panic("Unable to wake up idle thread for termination?");
+                       }
+               }
+
+       }
+
+       /*
+        * If we still have an excess of threads, schedule another
+        * invocation of this function.
+        */
+       if (group->idle_count > 0 && (group->idle_count + group->active_count > group->target_thread_count)) {
+               /*
+                * If we killed someone just now, push out the
+                * next deadline.
+                */
+               if (terminated) {
+                       group->idle_timestamp = now;
+               }
+
+               thread_call_start_deallocate_timer(group);
+       } else {
+               group->flags &= ~TCG_DEALLOC_ACTIVE;
+       }
+
+       thread_call_unlock();
+}
+
+/*
+ * Wait for all requested invocations of a thread call prior to now
+ * to finish.  Can only be invoked on thread calls whose storage we manage.  
+ * Just waits for the finish count to catch up to the submit count we find
+ * at the beginning of our wait.
+ */
+static void
+thread_call_wait_locked(thread_call_t call)
+{
+       uint64_t submit_count;
+       wait_result_t res;
+
+       assert(call->tc_flags & THREAD_CALL_ALLOC);
+
+       submit_count = call->tc_submit_count;
+
+       while (call->tc_finish_count < submit_count) {
+               call->tc_flags |= THREAD_CALL_WAIT;
+
+               res = assert_wait(call, THREAD_UNINT);
+               if (res != THREAD_WAITING) {
+                       panic("Unable to assert wait?");
+               }
 
-    if (new_pending && thread_calls.active_num <= 0)
-               _call_thread_wake();
+               thread_call_unlock();
+               (void) spllo();
 
-    simple_unlock(&thread_call_lock);
-    splx(s);
+               res = thread_block(NULL);
+               if (res != THREAD_AWAKENED) {
+                       panic("Awoken with %d?", res);
+               }
+       
+               (void) splsched();
+               thread_call_lock_spin();
+       }
 }
+
+/*
+ * Determine whether a thread call is either on a queue or
+ * currently being executed.
+ */
+boolean_t
+thread_call_isactive(thread_call_t call) 
+{
+       boolean_t active;
+
+       disable_ints_and_lock();
+       active = (call->tc_submit_count > call->tc_finish_count);
+       enable_ints_and_unlock();
+
+       return active;
+}
+