xnu-2050.48.11.tar.gz
[apple/xnu.git] / bsd / kern / pthread_synch.c
index 7a00399cc23cd26e444a9a958bbd04a946858f8f..d037ee0a1402234839c5e9c7754c9776403d3d13 100644 (file)
@@ -91,6 +91,7 @@
 #include <mach/port.h>
 #include <vm/vm_protos.h>
 #include <vm/vm_map.h> /* for current_map() */
+#include <vm/vm_fault.h>
 #include <mach/thread_act.h> /* for thread_resume */
 #include <machine/machine_routines.h>
 #if defined(__i386__)
 #define KERNEL_DEBUG1 KERNEL_DEBUG_CONSTANT1
 #endif
 
-
-#if defined(__ppc__) || defined(__ppc64__)
-#include <architecture/ppc/cframe.h>
-#endif
-
-
 lck_grp_attr_t   *pthread_lck_grp_attr;
 lck_grp_t    *pthread_lck_grp;
 lck_attr_t   *pthread_lck_attr;
 
-extern kern_return_t thread_getstatus(register thread_t act, int flavor,
-                       thread_state_t tstate, mach_msg_type_number_t *count);
-extern kern_return_t thread_setstatus(thread_t thread, int flavor,
-                       thread_state_t tstate, mach_msg_type_number_t count);
 extern void thread_set_cthreadself(thread_t thread, uint64_t pself, int isLP64);
 extern kern_return_t mach_port_deallocate(ipc_space_t, mach_port_name_t);
 extern kern_return_t semaphore_signal_internal_trap(mach_port_name_t);
 
 extern void workqueue_thread_yielded(void);
 
-static int workqueue_additem(struct workqueue *wq, int prio, user_addr_t item, int affinity);
-static int workqueue_removeitem(struct workqueue *wq, int prio, user_addr_t item);
-static boolean_t workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t th,
-                                       user_addr_t oc_item, int oc_prio, int oc_affinity);
-static void wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
+#if defined(__i386__) || defined(__x86_64__)
+extern boolean_t is_useraddr64_canonical(uint64_t addr64);
+#endif
+
+static boolean_t workqueue_run_nextreq(proc_t p, struct workqueue *wq, thread_t th, boolean_t force_oc,
+                                       boolean_t  overcommit, int oc_prio, int oc_affinity);
+
+static boolean_t workqueue_run_one(proc_t p, struct workqueue *wq, boolean_t overcommit, int priority);
+
+static void wq_runreq(proc_t p, boolean_t overcommit, uint32_t priority, thread_t th, struct threadlist *tl,
                       int reuse_thread, int wake_thread, int return_directly);
+
+static int setup_wqthread(proc_t p, thread_t th, boolean_t overcommit, uint32_t priority, int reuse_thread, struct threadlist *tl);
+
 static void wq_unpark_continue(void);
 static void wq_unsuspend_continue(void);
-static int setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct threadlist *tl);
-static boolean_t workqueue_addnewthread(struct workqueue *wq);
-static void workqueue_removethread(struct threadlist *tl);
+
+static boolean_t workqueue_addnewthread(struct workqueue *wq, boolean_t oc_thread);
+static void workqueue_removethread(struct threadlist *tl, int fromexit);
 static void workqueue_lock_spin(proc_t);
 static void workqueue_unlock(proc_t);
+
 int proc_settargetconc(pid_t pid, int queuenum, int32_t targetconc);
 int proc_setalltargetconc(pid_t pid, int32_t * targetconcp);
 
@@ -156,6 +156,12 @@ int proc_setalltargetconc(pid_t pid, int32_t * targetconcp);
 #define TRUNC_DOWN64(a,c)       ((((uint64_t)a)-(c)) & ((uint64_t)(-(c))))
 
 
+/* flag values for reuse field in the libc side _pthread_wqthread */
+#define        WQ_FLAG_THREAD_PRIOMASK         0x0000ffff
+#define        WQ_FLAG_THREAD_OVERCOMMIT       0x00010000      /* thread is with overcommit prio */
+#define        WQ_FLAG_THREAD_REUSE            0x00020000      /* thread is being reused */
+#define        WQ_FLAG_THREAD_NEWSPI           0x00040000      /* the call is with new SPIs */
+
 /*
  * Flags filed passed to bsdthread_create and back in pthread_start 
 31  <---------------------------------> 0
@@ -215,9 +221,7 @@ bsdthread_create(__unused struct proc *p, struct bsdthread_create_args  *uap, us
        isLP64 = IS_64BIT_PROCESS(p);
 
 
-#if defined(__ppc__)
-       stackaddr = 0xF0000000;
-#elif defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__)
        stackaddr = 0xB0000000;
 #else
 #error Need to define a stack address hint for this architecture
@@ -266,6 +270,22 @@ bsdthread_create(__unused struct proc *p, struct bsdthread_create_args  *uap, us
                th_stack = (stackaddr + th_stacksize + PTH_DEFAULT_GUARDSIZE);
                th_pthread = (stackaddr + th_stacksize + PTH_DEFAULT_GUARDSIZE);
                user_stacksize = th_stacksize;
+               
+              /*
+               * Pre-fault the first page of the new thread's stack and the page that will
+               * contain the pthread_t structure.
+               */      
+               vm_fault( vmap,
+                 vm_map_trunc_page(th_stack - PAGE_SIZE_64),
+                 VM_PROT_READ | VM_PROT_WRITE,
+                 FALSE, 
+                 THREAD_UNINT, NULL, 0);
+               
+               vm_fault( vmap,
+                 vm_map_trunc_page(th_pthread),
+                 VM_PROT_READ | VM_PROT_WRITE,
+                 FALSE, 
+                 THREAD_UNINT, NULL, 0);
        } else {
                th_stack = user_stack;
                user_stacksize = user_stack;
@@ -275,31 +295,7 @@ bsdthread_create(__unused struct proc *p, struct bsdthread_create_args  *uap, us
 #endif
        }
        
-#if defined(__ppc__)
-       /*
-        * Set up PowerPC registers...
-        * internally they are always kept as 64 bit and
-        * since the register set is the same between 32 and 64bit modes
-        * we don't need 2 different methods for setting the state
-        */
-       {
-               ppc_thread_state64_t state64;
-               ppc_thread_state64_t *ts64 = &state64;
-
-               ts64->srr0 = (uint64_t)p->p_threadstart;
-               ts64->r1 = (uint64_t)(th_stack - C_ARGSAVE_LEN - C_RED_ZONE);
-               ts64->r3 = (uint64_t)th_pthread;
-               ts64->r4 = (uint64_t)(th_thport);
-               ts64->r5 = (uint64_t)user_func;
-               ts64->r6 = (uint64_t)user_funcarg;
-               ts64->r7 = (uint64_t)user_stacksize;
-               ts64->r8 = (uint64_t)uap->flags;
-
-               thread_set_wq_state64(th, (thread_state_t)ts64);
-
-               thread_set_cthreadself(th, (uint64_t)th_pthread, isLP64);
-       }
-#elif defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__)
        {
         /*
          * Set up i386 registers & function call.
@@ -338,6 +334,13 @@ bsdthread_create(__unused struct proc *p, struct bsdthread_create_args  *uap, us
                 */
                ts64->rsp = (uint64_t)(th_stack - C_64_REDZONE_LEN);
 
+               /* Disallow setting non-canonical PC or stack */
+               if (!is_useraddr64_canonical(ts64->rsp) ||
+                   !is_useraddr64_canonical(ts64->rip)) {
+                       error = EINVAL;
+                       goto out;
+               }
+
                thread_set_wq_state64(th, (thread_state_t)ts64);
        }
        }
@@ -348,8 +351,16 @@ bsdthread_create(__unused struct proc *p, struct bsdthread_create_args  *uap, us
        if ((flags & PTHREAD_START_SETSCHED) != 0) {
                thread_extended_policy_data_t    extinfo;
                thread_precedence_policy_data_t   precedinfo;
+#if CONFIG_EMBEDDED
+               int ret = 0;
+#endif /* CONFIG_EMBEDDED */
 
                importance = (flags & PTHREAD_START_IMPORTANCE_MASK);
+#if CONFIG_EMBEDDED
+               /* sets the saved importance for apple ios daemon if backgrounded. else returns 0 */
+               ret = proc_setthread_saved_importance(th, importance);
+               if (ret == 0) {
+#endif /* CONFIG_EMBEDDED */
                policy = (flags >> PTHREAD_START_POLICY_BITSHIFT) & PTHREAD_START_POLICY_MASK;
 
                if (policy == SCHED_OTHER)
@@ -361,6 +372,9 @@ bsdthread_create(__unused struct proc *p, struct bsdthread_create_args  *uap, us
 #define BASEPRI_DEFAULT 31
                precedinfo.importance = (importance - BASEPRI_DEFAULT);
                thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
+#if CONFIG_EMBEDDED
+               }
+#endif /* CONFIG_EMBEDDED */
        }
 
        kret = thread_resume(th);
@@ -453,26 +467,33 @@ uint32_t wq_stalled_window_usecs  = WQ_STALLED_WINDOW_USECS;
 uint32_t wq_reduce_pool_window_usecs   = WQ_REDUCE_POOL_WINDOW_USECS;
 uint32_t wq_max_timer_interval_usecs   = WQ_MAX_TIMER_INTERVAL_USECS;
 uint32_t wq_max_threads                        = WORKQUEUE_MAXTHREADS;
+uint32_t wq_max_constrained_threads    = WORKQUEUE_MAXTHREADS / 8;
 
 
-SYSCTL_INT(_kern, OID_AUTO, wq_yielded_threshold, CTLFLAG_RW,
+SYSCTL_INT(_kern, OID_AUTO, wq_yielded_threshold, CTLFLAG_RW | CTLFLAG_LOCKED,
           &wq_yielded_threshold, 0, "");
 
-SYSCTL_INT(_kern, OID_AUTO, wq_yielded_window_usecs, CTLFLAG_RW,
+SYSCTL_INT(_kern, OID_AUTO, wq_yielded_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
           &wq_yielded_window_usecs, 0, "");
 
-SYSCTL_INT(_kern, OID_AUTO, wq_stalled_window_usecs, CTLFLAG_RW,
+SYSCTL_INT(_kern, OID_AUTO, wq_stalled_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
           &wq_stalled_window_usecs, 0, "");
 
-SYSCTL_INT(_kern, OID_AUTO, wq_reduce_pool_window_usecs, CTLFLAG_RW,
+SYSCTL_INT(_kern, OID_AUTO, wq_reduce_pool_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
           &wq_reduce_pool_window_usecs, 0, "");
 
-SYSCTL_INT(_kern, OID_AUTO, wq_max_timer_interval_usecs, CTLFLAG_RW,
+SYSCTL_INT(_kern, OID_AUTO, wq_max_timer_interval_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
           &wq_max_timer_interval_usecs, 0, "");
 
-SYSCTL_INT(_kern, OID_AUTO, wq_max_threads, CTLFLAG_RW,
+SYSCTL_INT(_kern, OID_AUTO, wq_max_threads, CTLFLAG_RW | CTLFLAG_LOCKED,
           &wq_max_threads, 0, "");
 
+SYSCTL_INT(_kern, OID_AUTO, wq_max_constrained_threads, CTLFLAG_RW | CTLFLAG_LOCKED,
+          &wq_max_constrained_threads, 0, "");
+
+
+static uint32_t wq_init_constrained_limit = 1;
+
 
 void
 workqueue_init_lock(proc_t p)
@@ -519,7 +540,7 @@ workqueue_interval_timer_start(struct workqueue *wq)
 
        thread_call_enter_delayed(wq->wq_atimer_call, deadline);
 
-       KERNEL_DEBUG(0xefffd110, wq, wq->wq_itemcount, wq->wq_flags, wq->wq_timer_interval, 0);
+       KERNEL_DEBUG(0xefffd110, wq, wq->wq_reqcount, wq->wq_flags, wq->wq_timer_interval, 0);
 }
 
 
@@ -542,11 +563,9 @@ wq_thread_is_busy(uint64_t cur_ts, uint64_t *lastblocked_tsp)
         */
        lastblocked_ts = *lastblocked_tsp;
 
-#if defined(__ppc__)
-#else
        if ( !OSCompareAndSwap64((UInt64)lastblocked_ts, (UInt64)lastblocked_ts, lastblocked_tsp))
                return (TRUE);
-#endif
+
        if (lastblocked_ts >= cur_ts) {
                /*
                 * because the update of the timestamp when a thread blocks isn't
@@ -641,14 +660,14 @@ again:
                 * new work within our acceptable time interval because
                 * there were no idle threads left to schedule
                 */
-               if (wq->wq_itemcount) {
+               if (wq->wq_reqcount) {
                        uint32_t        priority;
                        uint32_t        affinity_tag;
                        uint32_t        i;
                        uint64_t        curtime;
 
                        for (priority = 0; priority < WORKQUEUE_NUMPRIOS; priority++) {
-                               if (wq->wq_list_bitmap & (1 << priority))
+                               if (wq->wq_requests[priority])
                                        break;
                        }
                        assert(priority < WORKQUEUE_NUMPRIOS);
@@ -682,27 +701,27 @@ again:
                                        }
                                }
                                if (add_thread == TRUE) {
-                                       retval = workqueue_addnewthread(wq);
+                                       retval = workqueue_addnewthread(wq, FALSE);
                                        break;
                                }
                        }
-                       if (wq->wq_itemcount) {
+                       if (wq->wq_reqcount) {
                                /*
                                 * as long as we have threads to schedule, and we successfully
                                 * scheduled new work, keep trying
                                 */
                                while (wq->wq_thidlecount && !(wq->wq_flags & WQ_EXITING)) {
                                        /*
-                                        * workqueue_run_nextitem is responsible for
+                                        * workqueue_run_nextreq is responsible for
                                         * dropping the workqueue lock in all cases
                                         */
-                                       retval = workqueue_run_nextitem(p, wq, THREAD_NULL, 0, 0, 0);
+                                       retval = workqueue_run_nextreq(p, wq, THREAD_NULL, FALSE, FALSE, 0, 0);
                                        workqueue_lock_spin(p);
 
                                        if (retval == FALSE)
                                                break;
                                }
-                               if ( !(wq->wq_flags & WQ_EXITING) && wq->wq_itemcount) {
+                               if ( !(wq->wq_flags & WQ_EXITING) && wq->wq_reqcount) {
 
                                        if (wq->wq_thidlecount == 0 && retval == TRUE && add_thread == TRUE)
                                                goto again;
@@ -710,7 +729,7 @@ again:
                                        if (wq->wq_thidlecount == 0 || busycount)
                                                WQ_TIMER_NEEDED(wq, start_timer);
 
-                                       KERNEL_DEBUG(0xefffd108 | DBG_FUNC_NONE, wq, wq->wq_itemcount, wq->wq_thidlecount, busycount, 0);
+                                       KERNEL_DEBUG(0xefffd108 | DBG_FUNC_NONE, wq, wq->wq_reqcount, wq->wq_thidlecount, busycount, 0);
                                }
                        }
                }
@@ -745,12 +764,12 @@ workqueue_thread_yielded(void)
 
        p = current_proc();
 
-       if ((wq = p->p_wqptr) == NULL || wq->wq_itemcount == 0)
+       if ((wq = p->p_wqptr) == NULL || wq->wq_reqcount == 0)
                return;
        
        workqueue_lock_spin(p);
 
-       if (wq->wq_itemcount) {
+       if (wq->wq_reqcount) {
                uint64_t        curtime;
                uint64_t        elapsed;
                clock_sec_t     secs;
@@ -763,7 +782,7 @@ workqueue_thread_yielded(void)
                        workqueue_unlock(p);
                        return;
                }
-               KERNEL_DEBUG(0xefffd138 | DBG_FUNC_START, wq, wq->wq_thread_yielded_count, wq->wq_itemcount, 0, 0);
+               KERNEL_DEBUG(0xefffd138 | DBG_FUNC_START, wq, wq->wq_thread_yielded_count, wq->wq_reqcount, 0, 0);
 
                wq->wq_thread_yielded_count = 0;
 
@@ -774,16 +793,16 @@ workqueue_thread_yielded(void)
                if (secs == 0 && usecs < wq_yielded_window_usecs) {
 
                        if (wq->wq_thidlecount == 0) {
-                               workqueue_addnewthread(wq);
+                               workqueue_addnewthread(wq, TRUE);
                                /*
                                 * 'workqueue_addnewthread' drops the workqueue lock
                                 * when creating the new thread and then retakes it before
                                 * returning... this window allows other threads to process
-                                * work on the queue, so we need to recheck for available work
+                                * requests, so we need to recheck for available work
                                 * if none found, we just return...  the newly created thread
                                 * will eventually get used (if it hasn't already)...
                                 */
-                               if (wq->wq_itemcount == 0) {
+                               if (wq->wq_reqcount == 0) {
                                        workqueue_unlock(p);
                                        return;
                                }
@@ -791,9 +810,8 @@ workqueue_thread_yielded(void)
                        if (wq->wq_thidlecount) {
                                uint32_t        priority;
                                uint32_t        affinity = -1;
-                               user_addr_t     item;
-                               struct workitem *witem = NULL;
-                               struct workitemlist *wl = NULL;
+                               boolean_t       overcommit = FALSE;
+                               boolean_t       force_oc = FALSE;
                                struct uthread    *uth;
                                struct threadlist *tl;
 
@@ -802,38 +820,31 @@ workqueue_thread_yielded(void)
                                        affinity = tl->th_affinity_tag;
 
                                for (priority = 0; priority < WORKQUEUE_NUMPRIOS; priority++) {
-                                       if (wq->wq_list_bitmap & (1 << priority)) {
-                                               wl = (struct workitemlist *)&wq->wq_list[priority];
+                                       if (wq->wq_requests[priority])
                                                break;
-                                       }
                                }
-                               assert(wl != NULL);
-                               assert(!(TAILQ_EMPTY(&wl->wl_itemlist)));
-
-                               witem = TAILQ_FIRST(&wl->wl_itemlist);
-                               TAILQ_REMOVE(&wl->wl_itemlist, witem, wi_entry);
+                               assert(priority < WORKQUEUE_NUMPRIOS);
 
-                               if (TAILQ_EMPTY(&wl->wl_itemlist))
-                                       wq->wq_list_bitmap &= ~(1 << priority);
-                               wq->wq_itemcount--;
+                               wq->wq_reqcount--;
+                               wq->wq_requests[priority]--;
 
-                               item = witem->wi_item;
-                               witem->wi_item = (user_addr_t)0;
-                               witem->wi_affinity = 0;
+                               if (wq->wq_ocrequests[priority]) {
+                                       wq->wq_ocrequests[priority]--;
+                                       overcommit = TRUE;
+                               } else
+                                       force_oc = TRUE;
 
-                               TAILQ_INSERT_HEAD(&wl->wl_freelist, witem, wi_entry);
-
-                               (void)workqueue_run_nextitem(p, wq, THREAD_NULL, item, priority, affinity);
+                               (void)workqueue_run_nextreq(p, wq, THREAD_NULL, force_oc, overcommit, priority, affinity);
                                /*
-                                * workqueue_run_nextitem is responsible for
+                                * workqueue_run_nextreq is responsible for
                                 * dropping the workqueue lock in all cases
                                 */
-                               KERNEL_DEBUG(0xefffd138 | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_itemcount, 1, 0);
+                               KERNEL_DEBUG(0xefffd138 | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_reqcount, 1, 0);
 
                                return;
                        }
                }
-               KERNEL_DEBUG(0xefffd138 | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_itemcount, 2, 0);
+               KERNEL_DEBUG(0xefffd138 | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_reqcount, 2, 0);
        }
        workqueue_unlock(p);
 }
@@ -876,15 +887,10 @@ workqueue_callback(int type, thread_t thread)
                         * since another thread would have to get scheduled and then block after we start down 
                         * this path), it's not a problem.  Either timestamp is adequate, so no need to retry
                         */
-#if defined(__ppc__)
-                       /*
-                        * this doesn't have to actually work reliablly for PPC, it just has to compile/link
-                        */
-                       *lastblocked_ptr = (UInt64)curtime;
-#else
+
                        OSCompareAndSwap64(*lastblocked_ptr, (UInt64)curtime, lastblocked_ptr);
-#endif
-                       if (wq->wq_itemcount)
+
+                       if (wq->wq_reqcount)
                                WQ_TIMER_NEEDED(wq, start_timer);
 
                        if (start_timer == TRUE)
@@ -913,17 +919,23 @@ workqueue_callback(int type, thread_t thread)
 
 
 static void
-workqueue_removethread(struct threadlist *tl)
+workqueue_removethread(struct threadlist *tl, int fromexit)
 {
        struct workqueue *wq;
        struct uthread * uth;
 
+       /* 
+        * If fromexit is set, the call is from workqueue_exit(,
+        * so some cleanups are to be avoided.
+        */
        wq = tl->th_workq;
 
        TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry);
 
-       wq->wq_nthreads--;
-       wq->wq_thidlecount--;
+       if (fromexit == 0) {
+               wq->wq_nthreads--;
+               wq->wq_thidlecount--;
+       }
 
        /*
         * Clear the threadlist pointer in uthread so 
@@ -937,7 +949,10 @@ workqueue_removethread(struct threadlist *tl)
        if (uth != (struct uthread *)0) {
                uth->uu_threadlist = NULL;
        }
-       workqueue_unlock(wq->wq_proc);
+       if (fromexit == 0) {
+               /* during exit the lock is not held */
+               workqueue_unlock(wq->wq_proc);
+       }
 
        if ( (tl->th_flags & TH_LIST_SUSPENDED) ) {
                /*
@@ -946,7 +961,10 @@ workqueue_removethread(struct threadlist *tl)
                 * since we're not going to spin up through the
                 * normal exit path triggered from Libc
                 */
-               (void)mach_vm_deallocate(wq->wq_map, tl->th_stackaddr, tl->th_allocsize);
+               if (fromexit == 0) {
+                       /* vm map is already deallocated when this is called from exit */
+                       (void)mach_vm_deallocate(wq->wq_map, tl->th_stackaddr, tl->th_allocsize);
+               }
                (void)mach_port_deallocate(get_task_ipcspace(wq->wq_task), tl->th_thport);
 
                KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_END, wq, (uintptr_t)thread_tid(current_thread()), wq->wq_nthreads, 0xdead, thread_tid(tl->th_thread));
@@ -963,9 +981,13 @@ workqueue_removethread(struct threadlist *tl)
 }
 
 
-
+/*
+ * called with workq lock held
+ * dropped and retaken around thread creation
+ * return with workq lock held
+ */
 static boolean_t
-workqueue_addnewthread(struct workqueue *wq)
+workqueue_addnewthread(struct workqueue *wq, boolean_t oc_thread)
 {
        struct threadlist *tl;
        struct uthread  *uth;
@@ -975,8 +997,28 @@ workqueue_addnewthread(struct workqueue *wq)
        void            *sright;
        mach_vm_offset_t stackaddr;
 
-       if (wq->wq_nthreads >= wq_max_threads || wq->wq_nthreads >= (CONFIG_THREAD_MAX - 20))
+       if ((wq->wq_flags & WQ_EXITING) == WQ_EXITING)
                return (FALSE);
+
+       if (wq->wq_nthreads >= wq_max_threads || wq->wq_nthreads >= (CONFIG_THREAD_MAX - 20)) {
+               wq->wq_lflags |= WQL_EXCEEDED_TOTAL_THREAD_LIMIT;
+               return (FALSE);
+       }
+       wq->wq_lflags &= ~WQL_EXCEEDED_TOTAL_THREAD_LIMIT;
+
+       if (oc_thread == FALSE && wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
+               /*
+                * if we're not creating this thread to service an overcommit request,
+                * then check the size of the constrained thread pool...  if we've already
+                * reached our max for threads scheduled from this pool, don't create a new
+                * one... the callers of this function are prepared for failure.
+                */
+               wq->wq_lflags |= WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
+               return (FALSE);
+       }
+       if (wq->wq_constrained_threads_scheduled < wq_max_constrained_threads)
+               wq->wq_lflags &= ~WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
+
        wq->wq_nthreads++;
 
        p = wq->wq_proc;
@@ -990,9 +1032,7 @@ workqueue_addnewthread(struct workqueue *wq)
        tl = kalloc(sizeof(struct threadlist));
        bzero(tl, sizeof(struct threadlist));
 
-#if defined(__ppc__)
-       stackaddr = 0xF0000000;
-#elif defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__)
        stackaddr = 0xB0000000;
 #else
 #error Need to define a stack address hint for this architecture
@@ -1023,6 +1063,7 @@ workqueue_addnewthread(struct workqueue *wq)
        }
        if (kret != KERN_SUCCESS) {
                (void) thread_terminate(th);
+               thread_deallocate(th);
 
                kfree(tl, sizeof(struct threadlist));
                goto failed;
@@ -1043,16 +1084,11 @@ workqueue_addnewthread(struct workqueue *wq)
        tl->th_priority = WORKQUEUE_NUMPRIOS;
        tl->th_policy = -1;
 
-#if defined(__ppc__)
-       //ml_fp_setvalid(FALSE);
-       thread_set_cthreadself(th, (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE), IS_64BIT_PROCESS(p));
-#endif /* __ppc__ */
-
        uth = get_bsdthread_info(tl->th_thread);
-       uth->uu_threadlist = (void *)tl;
 
         workqueue_lock_spin(p);
        
+       uth->uu_threadlist = (void *)tl;
        TAILQ_INSERT_TAIL(&wq->wq_thidlelist, tl, th_entry);
 
        wq->wq_thidlecount++;
@@ -1076,17 +1112,31 @@ workq_open(struct proc *p, __unused struct workq_open_args  *uap, __unused int32
        int wq_size;
        char * ptr;
        char * nptr;
-       int j;
        uint32_t i;
        uint32_t num_cpus;
        int error = 0;
        boolean_t need_wakeup = FALSE;
-       struct workitem * witem;
-       struct workitemlist *wl;
+
 
        if ((p->p_lflag & P_LREGISTER) == 0)
                return(EINVAL);
 
+       num_cpus = ml_get_max_cpus();
+
+       if (wq_init_constrained_limit) {
+               uint32_t limit;
+               /*
+                * set up the limit for the constrained pool
+                * this is a virtual pool in that we don't
+                * maintain it on a separate idle and run list
+                */
+               limit = num_cpus * (WORKQUEUE_NUMPRIOS + 1);
+
+               if (limit > wq_max_constrained_threads)
+                       wq_max_constrained_threads = limit;
+
+               wq_init_constrained_limit = 0;
+       }
        workqueue_lock_spin(p);
 
        if (p->p_wqptr == NULL) {
@@ -1107,13 +1157,11 @@ workq_open(struct proc *p, __unused struct workq_open_args  *uap, __unused int32
 
                workqueue_unlock(p);
 
-               num_cpus = ml_get_max_cpus();
-
                wq_size = sizeof(struct workqueue) +
-                       (num_cpus * WORKQUEUE_NUMPRIOS * sizeof(uint32_t)) +
+                       (num_cpus * WORKQUEUE_NUMPRIOS * sizeof(uint16_t)) +
                        (num_cpus * WORKQUEUE_NUMPRIOS * sizeof(uint32_t)) +
                        (num_cpus * WORKQUEUE_NUMPRIOS * sizeof(uint64_t)) +
-                       sizeof(uint64_t);
+                       sizeof(uint32_t) + sizeof(uint64_t);
 
                ptr = (char *)kalloc(wq_size);
                bzero(ptr, wq_size);
@@ -1125,25 +1173,20 @@ workq_open(struct proc *p, __unused struct workq_open_args  *uap, __unused int32
                wq->wq_task = current_task();
                wq->wq_map  = current_map();
 
-               for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) {
-                       wl = (struct workitemlist *)&wq->wq_list[i];
-                       TAILQ_INIT(&wl->wl_itemlist);
-                       TAILQ_INIT(&wl->wl_freelist);
-
-                       for (j = 0; j < WORKITEM_SIZE; j++) {
-                               witem = &wq->wq_array[(i*WORKITEM_SIZE) + j];
-                               TAILQ_INSERT_TAIL(&wl->wl_freelist, witem, wi_entry);
-                       }
+               for (i = 0; i < WORKQUEUE_NUMPRIOS; i++)
                        wq->wq_reqconc[i] = wq->wq_affinity_max;
-               }
+
                nptr = ptr + sizeof(struct workqueue);
 
                for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) {
-                       wq->wq_thactive_count[i] = (uint32_t *)nptr;
-                       nptr += (num_cpus * sizeof(uint32_t));
+                       wq->wq_thscheduled_count[i] = (uint16_t *)nptr;
+                       nptr += (num_cpus * sizeof(uint16_t));
                }
+               nptr += (sizeof(uint32_t) - 1);
+               nptr = (char *)((uintptr_t)nptr & ~(sizeof(uint32_t) - 1));
+
                for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) {
-                       wq->wq_thscheduled_count[i] = (uint32_t *)nptr;
+                       wq->wq_thactive_count[i] = (uint32_t *)nptr;
                        nptr += (num_cpus * sizeof(uint32_t));
                }
                /*
@@ -1153,7 +1196,7 @@ workq_open(struct proc *p, __unused struct workq_open_args  *uap, __unused int32
                 * the size for the allocation of the workqueue struct
                 */
                nptr += (sizeof(uint64_t) - 1);
-               nptr = (char *)((long)nptr & ~(sizeof(uint64_t) - 1));
+               nptr = (char *)((uintptr_t)nptr & ~(sizeof(uint64_t) - 1));
 
                for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) {
                        wq->wq_lastblocked_ts[i] = (uint64_t *)nptr;
@@ -1180,73 +1223,86 @@ out:
        return(error);
 }
 
+
 int
 workq_kernreturn(struct proc *p, struct workq_kernreturn_args  *uap, __unused int32_t *retval)
 {
-       user_addr_t item = uap->item;
-       int options     = uap->options;
-       int prio        = uap->prio;    /* should  be used to find the right workqueue */
-       int affinity    = uap->affinity;
-       int error       = 0;
-       thread_t th     = THREAD_NULL;
-       user_addr_t oc_item = 0;
         struct workqueue *wq;
+       int error       = 0;
 
        if ((p->p_lflag & P_LREGISTER) == 0)
                return(EINVAL);
 
-       /*
-        * affinity not yet hooked up on this path
-        */
-       affinity = -1;
+       switch (uap->options) {
 
-       switch (options) {
+               case WQOPS_QUEUE_NEWSPISUPP:
+                       break;
+
+               case WQOPS_QUEUE_REQTHREADS: {
+                       /*
+                        * for this operation, we re-purpose the affinity
+                        * argument as the number of threads to start
+                        */
+                       boolean_t overcommit = FALSE;
+                       int priority         = uap->prio;
+                       int reqcount         = uap->affinity;
 
-               case WQOPS_QUEUE_ADD: {
-                       
-                       if (prio & WORKQUEUE_OVERCOMMIT) {
-                               prio &= ~WORKQUEUE_OVERCOMMIT;
-                               oc_item = item;
+                       if (priority & WORKQUEUE_OVERCOMMIT) {
+                               priority &= ~WORKQUEUE_OVERCOMMIT;
+                               overcommit = TRUE;
+                       }
+                       if ((reqcount <= 0) || (priority < 0) || (priority >= WORKQUEUE_NUMPRIOS)) {
+                               error = EINVAL;
+                               break;
                        }
-                       if ((prio < 0) || (prio >= WORKQUEUE_NUMPRIOS))
-                               return (EINVAL);
+                        workqueue_lock_spin(p);
 
-                       workqueue_lock_spin(p);
+                        if ((wq = (struct workqueue *)p->p_wqptr) == NULL) {
+                                workqueue_unlock(p);
 
-                       if ((wq = (struct workqueue *)p->p_wqptr) == NULL) {
-                               workqueue_unlock(p);
-                               return (EINVAL);
-                       }
-                       if (wq->wq_thidlecount == 0 && (oc_item || (wq->wq_nthreads < wq->wq_affinity_max))) {
+                               error = EINVAL;
+                               break;
+                        }
+                       if (overcommit == FALSE) {
+                               wq->wq_reqcount += reqcount;
+                               wq->wq_requests[priority] += reqcount;
+                               
+                               KERNEL_DEBUG(0xefffd008 | DBG_FUNC_NONE, wq, priority, wq->wq_requests[priority], reqcount, 0);
+
+                               while (wq->wq_reqcount) {
+                                       if (workqueue_run_one(p, wq, overcommit, priority) == FALSE)
+                                               break;
+                               }
+                       } else {
+                               KERNEL_DEBUG(0xefffd13c | DBG_FUNC_NONE, wq, priority, wq->wq_requests[priority], reqcount, 0);
 
-                               workqueue_addnewthread(wq);
+                               while (reqcount) {
+                                       if (workqueue_run_one(p, wq, overcommit, priority) == FALSE)
+                                               break;
+                                       reqcount--;
+                               }
+                               if (reqcount) {
+                                       /*
+                                        * we need to delay starting some of the overcommit requests...
+                                        * we should only fail to create the overcommit threads if
+                                        * we're at the max thread limit... as existing threads
+                                        * return to the kernel, we'll notice the ocrequests
+                                        * and spin them back to user space as the overcommit variety
+                                        */
+                                       wq->wq_reqcount += reqcount;
+                                       wq->wq_requests[priority] += reqcount;
+                                       wq->wq_ocrequests[priority] += reqcount;
 
-                               if (wq->wq_thidlecount == 0)
-                                       oc_item = 0;
+                                       KERNEL_DEBUG(0xefffd140 | DBG_FUNC_NONE, wq, priority, wq->wq_requests[priority], reqcount, 0);
+                               }
                        }
-                       if (oc_item == 0)
-                               error = workqueue_additem(wq, prio, item, affinity);
+                       workqueue_unlock(p);
 
-                       KERNEL_DEBUG(0xefffd008 | DBG_FUNC_NONE, wq, prio, affinity, oc_item, 0);
                        }
                        break;
-               case WQOPS_QUEUE_REMOVE: {
 
-                       if ((prio < 0) || (prio >= WORKQUEUE_NUMPRIOS))
-                               return (EINVAL);
-
-                       workqueue_lock_spin(p);
-
-                       if ((wq = (struct workqueue *)p->p_wqptr) == NULL) {
-                               workqueue_unlock(p);
-                               return (EINVAL);
-                       }
-                       error = workqueue_removeitem(wq, prio, item);
-                       }
-                       break;
                case WQOPS_THREAD_RETURN: {
-
-                       th = current_thread();
+                       thread_t th = current_thread();
                        struct uthread *uth = get_bsdthread_info(th);
 
                        /* reset signal mask on the workqueue thread to default state */
@@ -1255,78 +1311,51 @@ workq_kernreturn(struct proc *p, struct workq_kernreturn_args  *uap, __unused in
                                uth->uu_sigmask = ~workq_threadmask;
                                proc_unlock(p);
                        }
-
                        workqueue_lock_spin(p);
 
                        if ((wq = (struct workqueue *)p->p_wqptr) == NULL || (uth->uu_threadlist == NULL)) {
                                workqueue_unlock(p);
-                               return (EINVAL);
+
+                               error = EINVAL;
+                               break;
                        }
                        KERNEL_DEBUG(0xefffd004 | DBG_FUNC_END, wq, 0, 0, 0, 0);
-                       }
-                       break;
-               case WQOPS_THREAD_SETCONC: {
-
-                       if ((prio < 0) || (prio > WORKQUEUE_NUMPRIOS))
-                               return (EINVAL);
 
-                       workqueue_lock_spin(p);
-
-                       if ((wq = (struct workqueue *)p->p_wqptr) == NULL) {
-                               workqueue_unlock(p);
-                               return (EINVAL);
-                       }
+                       (void)workqueue_run_nextreq(p, wq, th, FALSE, FALSE, 0, -1);
                        /*
-                        * for this operation, we re-purpose the affinity
-                        * argument as the concurrency target
+                        * workqueue_run_nextreq is responsible for
+                        * dropping the workqueue lock in all cases
                         */
-                       if (prio < WORKQUEUE_NUMPRIOS)
-                               wq->wq_reqconc[prio] = affinity;
-                       else {
-                               for (prio = 0; prio < WORKQUEUE_NUMPRIOS; prio++)
-                                       wq->wq_reqconc[prio] = affinity;
-
-                       }
                        }
                        break;
+               
                default:
-                       return (EINVAL);
+                       error = EINVAL;
+                       break;
        }
-       (void)workqueue_run_nextitem(p, wq, th, oc_item, prio, affinity);
-       /*
-        * workqueue_run_nextitem is responsible for
-        * dropping the workqueue lock in all cases
-        */
        return (error);
-
 }
 
+/*
+ * Routine:    workqueue_mark_exiting
+ *
+ * Function:   Mark the work queue such that new threads will not be added to the
+ *             work queue after we return.  
+ *
+ * Conditions: Called against the current process.
+ */
 void
-workqueue_exit(struct proc *p)
+workqueue_mark_exiting(struct proc *p)
 {
        struct workqueue  * wq;
-       struct threadlist  * tl, *tlist;
-       struct uthread  *uth;
-       int wq_size = 0;
 
-       if (p->p_wqptr != NULL) {
+       wq = p->p_wqptr;
+       if (wq != NULL) {
 
-               KERNEL_DEBUG(0x900808c | DBG_FUNC_START, p->p_wqptr, 0, 0, 0, 0);
+               KERNEL_DEBUG(0x9008088 | DBG_FUNC_START, p->p_wqptr, 0, 0, 0, 0);
 
                workqueue_lock_spin(p);
 
-               wq = (struct workqueue *)p->p_wqptr;
-
-               if (wq == NULL) {
-                       workqueue_unlock(p);
-
-                       KERNEL_DEBUG(0x900808c | DBG_FUNC_END, 0, 0, 0, -1, 0);
-                       return;
-               }
-               wq_size = p->p_wqsize;
-               p->p_wqptr = NULL;
-               p->p_wqsize = 0;
-
                /*
                 * we now arm the timer in the callback function w/o holding the workq lock...
                 * we do this by setting  WQ_ATIMER_RUNNING via OSCompareAndSwap in order to 
@@ -1356,6 +1385,40 @@ workqueue_exit(struct proc *p)
                }
                workqueue_unlock(p);
 
+               KERNEL_DEBUG(0x9008088 | DBG_FUNC_END, 0, 0, 0, 0, 0);
+       }
+}
+
+/*
+ * Routine:    workqueue_exit
+ *
+ * Function:   clean up the work queue structure(s) now that there are no threads
+ *             left running inside the work queue (except possibly current_thread).
+ *
+ * Conditions: Called by the last thread in the process.
+ *             Called against current process.
+ */
+void
+workqueue_exit(struct proc *p)
+{
+       struct workqueue  * wq;
+       struct threadlist  * tl, *tlist;
+       struct uthread  *uth;
+       int wq_size = 0;
+
+       wq = (struct workqueue *)p->p_wqptr;
+       if (wq != NULL) {
+
+               KERNEL_DEBUG(0x900808c | DBG_FUNC_START, p->p_wqptr, 0, 0, 0, 0);
+
+               wq_size = p->p_wqsize;
+               p->p_wqptr = NULL;
+               p->p_wqsize = 0;
+
+               /*
+                * Clean up workqueue data structures for threads that exited and
+                * didn't get a chance to clean up after themselves.
+                */
                TAILQ_FOREACH_SAFE(tl, &wq->wq_thrunlist, th_entry, tlist) {
 
                        thread_sched_call(tl->th_thread, NULL);
@@ -1374,21 +1437,7 @@ workqueue_exit(struct proc *p)
                        kfree(tl, sizeof(struct threadlist));
                }
                TAILQ_FOREACH_SAFE(tl, &wq->wq_thidlelist, th_entry, tlist) {
-
-                       thread_sched_call(tl->th_thread, NULL);
-
-                       uth = get_bsdthread_info(tl->th_thread);
-                       if (uth != (struct uthread *)0) {
-                               uth->uu_threadlist = NULL;
-                       }
-                       TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry);
-
-                       /*
-                        * drop our last ref on the thread
-                        */
-                       thread_deallocate(tl->th_thread);
-
-                       kfree(tl, sizeof(struct threadlist));
+                       workqueue_removethread(tl, 1);
                }
                thread_call_free(wq->wq_atimer_call);
 
@@ -1398,101 +1447,83 @@ workqueue_exit(struct proc *p)
        }
 }
 
-static int 
-workqueue_additem(struct workqueue *wq, int prio, user_addr_t item, int affinity)
-{
-       struct workitem *witem;
-       struct workitemlist *wl;
-
-       wl = (struct workitemlist *)&wq->wq_list[prio];
-
-       if (TAILQ_EMPTY(&wl->wl_freelist))
-               return (ENOMEM);
-
-       witem = (struct workitem *)TAILQ_FIRST(&wl->wl_freelist);
-       TAILQ_REMOVE(&wl->wl_freelist, witem, wi_entry);
 
-       witem->wi_item = item;
-       witem->wi_affinity = affinity;
-       TAILQ_INSERT_TAIL(&wl->wl_itemlist, witem, wi_entry);
-
-       wq->wq_list_bitmap |= (1 << prio);
-
-       wq->wq_itemcount++;
+static int workqueue_importance[WORKQUEUE_NUMPRIOS] = 
+{
+       2, 0, -2, INT_MIN,
+};
 
-       return (0);
-}
+#define WORKQ_POLICY_TIMESHARE 1
 
-static int 
-workqueue_removeitem(struct workqueue *wq, int prio, user_addr_t item)
+static int workqueue_policy[WORKQUEUE_NUMPRIOS] = 
 {
-       struct workitem *witem;
-       struct workitemlist *wl;
-       int error = ESRCH;
+       WORKQ_POLICY_TIMESHARE, WORKQ_POLICY_TIMESHARE, WORKQ_POLICY_TIMESHARE, WORKQ_POLICY_TIMESHARE
+};
 
-       wl = (struct workitemlist *)&wq->wq_list[prio];
 
-       TAILQ_FOREACH(witem, &wl->wl_itemlist, wi_entry) {
-               if (witem->wi_item == item) {
-                       TAILQ_REMOVE(&wl->wl_itemlist, witem, wi_entry);
 
-                       if (TAILQ_EMPTY(&wl->wl_itemlist))
-                               wq->wq_list_bitmap &= ~(1 << prio);
-                       wq->wq_itemcount--;
-                       
-                       witem->wi_item = (user_addr_t)0;
-                       witem->wi_affinity = 0;
-                       TAILQ_INSERT_HEAD(&wl->wl_freelist, witem, wi_entry);
+static boolean_t
+workqueue_run_one(proc_t p, struct workqueue *wq, boolean_t overcommit, int priority)
+{
+       boolean_t       ran_one;
 
-                       error = 0;
-                       break;
+       if (wq->wq_thidlecount == 0) {
+               if (overcommit == FALSE) {
+                       if (wq->wq_constrained_threads_scheduled < wq->wq_affinity_max)
+                               workqueue_addnewthread(wq, overcommit);
+               } else {
+                       workqueue_addnewthread(wq, overcommit);
+
+                       if (wq->wq_thidlecount == 0)
+                               return (FALSE);
                }
        }
-       return (error);
-}
+       ran_one = workqueue_run_nextreq(p, wq, THREAD_NULL, FALSE, overcommit, priority, -1);
+       /*
+        * workqueue_run_nextreq is responsible for
+        * dropping the workqueue lock in all cases
+        */
+       workqueue_lock_spin(p);
 
-static int workqueue_importance[WORKQUEUE_NUMPRIOS] = 
-{
-       2, 0, -2,
-};
+       return (ran_one);
+}
 
-static int workqueue_policy[WORKQUEUE_NUMPRIOS] = 
-{
-       1, 1, 1,
-};
 
 
 /*
- * workqueue_run_nextitem:
+ * workqueue_run_nextreq:
  *   called with the workqueue lock held...
  *   responsible for dropping it in all cases
  */
 static boolean_t
-workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t thread, user_addr_t oc_item, int oc_prio, int oc_affinity)
+workqueue_run_nextreq(proc_t p, struct workqueue *wq, thread_t thread,
+                     boolean_t force_oc, boolean_t overcommit, int oc_prio, int oc_affinity)
 {
-       struct workitem *witem = NULL;
-       user_addr_t item = 0;
        thread_t th_to_run = THREAD_NULL;
        thread_t th_to_park = THREAD_NULL;
        int wake_thread = 0;
-       int reuse_thread = 1;
+       int reuse_thread = WQ_FLAG_THREAD_REUSE;
        uint32_t priority, orig_priority;
        uint32_t affinity_tag, orig_affinity_tag;
        uint32_t i, n;
-       uint32_t activecount;
        uint32_t busycount;
        uint32_t us_to_wait;
        struct threadlist *tl = NULL;
        struct threadlist *ttl = NULL;
        struct uthread *uth = NULL;
-       struct workitemlist *wl = NULL;
        boolean_t start_timer = FALSE;
        boolean_t adjust_counters = TRUE;
        uint64_t  curtime;
 
 
-       KERNEL_DEBUG(0xefffd000 | DBG_FUNC_START, wq, thread, wq->wq_thidlecount, wq->wq_itemcount, 0);
+       KERNEL_DEBUG(0xefffd000 | DBG_FUNC_START, wq, thread, wq->wq_thidlecount, wq->wq_reqcount, 0);
 
+       if (thread != THREAD_NULL) {
+               uth = get_bsdthread_info(thread);
+
+               if ( (tl = uth->uu_threadlist) == NULL)
+                       panic("wq thread with no threadlist ");
+       }
        /*
         * from here until we drop the workq lock
         * we can't be pre-empted since we hold 
@@ -1502,14 +1533,15 @@ workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t thread, user_add
         * and these values are used to index the multi-dimensional
         * counter arrays in 'workqueue_callback'
         */
-       if (oc_item) {
+dispatch_overcommit:
+
+       if (overcommit == TRUE || force_oc == TRUE) {
                uint32_t min_scheduled = 0;
                uint32_t scheduled_count;
                uint32_t active_count;
                uint32_t t_affinity = 0;
 
                priority = oc_prio;
-               item = oc_item;
 
                if ((affinity_tag = oc_affinity) == (uint32_t)-1) {
                        for (affinity_tag = 0; affinity_tag < wq->wq_reqconc[priority]; affinity_tag++) {
@@ -1534,27 +1566,55 @@ workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t thread, user_add
                        }
                        affinity_tag = t_affinity;
                }
+               if (thread != THREAD_NULL) {
+                       th_to_run = thread;
+                       goto pick_up_work;
+               }
                goto grab_idle_thread;
        }
-       if (wq->wq_itemcount == 0) {
+       if (wq->wq_reqcount) {
+               for (priority = 0; priority < WORKQUEUE_NUMPRIOS; priority++) {
+                       if (wq->wq_requests[priority])
+                               break;
+               }
+               assert(priority < WORKQUEUE_NUMPRIOS);
+
+               if (wq->wq_ocrequests[priority] && (thread != THREAD_NULL || wq->wq_thidlecount)) {
+                       /*
+                        * handle delayed overcommit request...
+                        * they have priority over normal requests
+                        * within a given priority level
+                        */
+                       wq->wq_reqcount--;
+                       wq->wq_requests[priority]--;
+                       wq->wq_ocrequests[priority]--;
+
+                       oc_prio = priority;
+                       overcommit = TRUE;
+
+                       goto dispatch_overcommit;
+               }
+       }
+       /*
+        * if we get here, the work should be handled by a constrained thread
+        */
+       if (wq->wq_reqcount == 0 || wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
+               /*
+                * no work to do, or we're already at or over the scheduling limit for
+                * constrained threads...  just return or park the thread...
+                * do not start the timer for this condition... if we don't have any work,
+                * we'll check again when new work arrives... if we're over the limit, we need 1 or more
+                * constrained threads to return to the kernel before we can dispatch additional work
+                */
                if ((th_to_park = thread) == THREAD_NULL)
                        goto out_of_work;
-               goto parkit;
+               goto parkit;
        }
-       for (priority = 0; priority < WORKQUEUE_NUMPRIOS; priority++) {
-               if (wq->wq_list_bitmap & (1 << priority)) {
-                       wl = (struct workitemlist *)&wq->wq_list[priority];
-                       break;
-               }
-       }
-       assert(wl != NULL);
-       assert(!(TAILQ_EMPTY(&wl->wl_itemlist)));
 
        curtime = mach_absolute_time();
 
        if (thread != THREAD_NULL) {
-               uth = get_bsdthread_info(thread);
-               tl = uth->uu_threadlist;
+
                affinity_tag = tl->th_affinity_tag;
 
                /*
@@ -1564,6 +1624,10 @@ workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t thread, user_add
                 * we're considering running work for
                 */
                if (affinity_tag < wq->wq_reqconc[priority]) {
+                       uint32_t  bcount = 0;
+                       uint32_t  acount = 0;
+                       uint32_t  tcount = 0;
+
                        /*
                         * we're a worker thread from the pool... currently we
                         * are considered 'active' which means we're counted
@@ -1571,56 +1635,84 @@ workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t thread, user_add
                         * add up the active counts of all the priority levels
                         * up to and including the one we want to schedule
                         */
-                       for (activecount = 0, i = 0; i <= priority; i++) {
-                               uint32_t  acount;
+                       for (i = 0; i <= priority; i++) {
 
-                               acount = wq->wq_thactive_count[i][affinity_tag];
+                               tcount = wq->wq_thactive_count[i][affinity_tag];
+                               acount += tcount;
 
-                               if (acount == 0 && wq->wq_thscheduled_count[i][affinity_tag]) {
+                               if (tcount == 0 && wq->wq_thscheduled_count[i][affinity_tag]) {
                                        if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i][affinity_tag]))
-                                               acount = 1;
+                                               bcount++;
                                }
-                               activecount += acount;
                        }
-                       if (activecount == 1) {
+                       if ((acount + bcount) == 1) {
                                /*
                                 * we're the only active thread associated with our
                                 * affinity group at this priority level and higher,
+                                * and there are no threads considered 'busy',
                                 * so pick up some work and keep going
                                 */
                                th_to_run = thread;
                                goto pick_up_work;
                        }
+                       if (wq->wq_reqconc[priority] == 1) {
+                               /*
+                                * we have at least one other active or busy thread running at this
+                                * priority level or higher and since we only have 
+                                * 1 affinity group to schedule against, no need
+                                * to try and find another... we can't start up another thread to
+                                * service the request and we already have the info
+                                * needed to determine if we need to start a timer or not
+                                */
+                               if (acount == 1) {
+                                       /*
+                                        * we're the only active thread, but we must have found
+                                        * at least 1 busy thread, so indicate that we need
+                                        * to start a timer
+                                        */
+                                       busycount = 1;
+                               } else
+                                       busycount = 0;
+
+                               affinity_tag = 1;
+                               goto cant_schedule;
+                       }
                }
                /*
                 * there's more than 1 thread running in this affinity group
                 * or the concurrency level has been cut back for this priority...
-                * lets continue on and look for an 'empty' group to run this
-                * work item in
+                * let's continue on and look for an 'empty' group to run this
+                * work request in
                 */
        }
        busycount = 0;
 
        for (affinity_tag = 0; affinity_tag < wq->wq_reqconc[priority]; affinity_tag++) {
+               boolean_t       can_schedule;
+
                /*
                 * look for first affinity group that is currently not active
                 * i.e. no active threads at this priority level or higher
                 * and no threads that have run recently
                 */
-               for (activecount = 0, i = 0; i <= priority; i++) {
-                       if ((activecount = wq->wq_thactive_count[i][affinity_tag]))
+               for (i = 0; i <= priority; i++) {
+                       can_schedule = FALSE;
+
+                       if (wq->wq_thactive_count[i][affinity_tag])
                                break;
 
-                       if (wq->wq_thscheduled_count[i][affinity_tag]) {
-                               if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i][affinity_tag])) {
-                                       busycount++;
-                                       break;
-                               }
+                       if (wq->wq_thscheduled_count[i][affinity_tag] &&
+                           wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i][affinity_tag])) {
+                               busycount++;
+                               break;
                        }
+                       can_schedule = TRUE;
                }
-               if (activecount == 0 && busycount == 0)
+               if (can_schedule == TRUE)
                        break;
        }
+cant_schedule:
+
        if (affinity_tag >= wq->wq_reqconc[priority]) {
                /*
                 * we've already got at least 1 thread per
@@ -1632,7 +1724,7 @@ workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t thread, user_add
                         * 'busy' state... make sure we start
                         * the timer because if they are the only
                         * threads keeping us from scheduling
-                        * this workitem, we won't get a callback
+                        * this work request, we won't get a callback
                         * to kick off the timer... we need to
                         * start it now...
                         */
@@ -1659,6 +1751,8 @@ workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t thread, user_add
                th_to_run = thread;
                goto pick_up_work;
        }
+
+grab_idle_thread:
        if (wq->wq_thidlecount == 0) {
                /*
                 * we don't have a thread to schedule, but we have
@@ -1671,14 +1765,12 @@ workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t thread, user_add
 
                goto no_thread_to_run;
        }
-
-grab_idle_thread:
        /*
         * we've got a candidate (affinity group with no currently
         * active threads) to start a new thread on...
         * we already know there is both work available
         * and an idle thread, so activate a thread and then
-        * fall into the code that pulls a new workitem...
+        * fall into the code that pulls a new work request...
         */
        TAILQ_FOREACH(ttl, &wq->wq_thidlelist, th_entry) {
                if (ttl->th_affinity_tag == affinity_tag || ttl->th_affinity_tag == (uint16_t)-1) {
@@ -1715,18 +1807,19 @@ grab_idle_thread:
        th_to_run = tl->th_thread;
 
 pick_up_work:
-       if (item == 0) {
-               witem = TAILQ_FIRST(&wl->wl_itemlist);
-               TAILQ_REMOVE(&wl->wl_itemlist, witem, wi_entry);
-
-               if (TAILQ_EMPTY(&wl->wl_itemlist))
-                       wq->wq_list_bitmap &= ~(1 << priority);
-               wq->wq_itemcount--;
-
-               item = witem->wi_item;
-               witem->wi_item = (user_addr_t)0;
-               witem->wi_affinity = 0;
-               TAILQ_INSERT_HEAD(&wl->wl_freelist, witem, wi_entry);
+       if (overcommit == FALSE && force_oc == FALSE) {
+               wq->wq_reqcount--;
+               wq->wq_requests[priority]--;
+
+               if ( !(tl->th_flags & TH_LIST_CONSTRAINED)) {
+                       wq->wq_constrained_threads_scheduled++;
+                       tl->th_flags |= TH_LIST_CONSTRAINED;
+               }
+       } else {
+               if (tl->th_flags & TH_LIST_CONSTRAINED) {
+                       wq->wq_constrained_threads_scheduled--;
+                       tl->th_flags &= ~TH_LIST_CONSTRAINED;
+               }
        }
        orig_priority = tl->th_priority;
        orig_affinity_tag = tl->th_affinity_tag;
@@ -1770,22 +1863,43 @@ pick_up_work:
                thread_precedence_policy_data_t precedinfo;
                thread_extended_policy_data_t   extinfo;
                uint32_t        policy;
+#if CONFIG_EMBEDDED
+               int retval = 0;
 
+               /* sets the saved importance for apple ios daemon if backgrounded. else returns 0 */
+               retval = proc_setthread_saved_importance(th_to_run, workqueue_importance[priority]);
+               if (retval == 0) {
+#endif /* CONFIG_EMBEDDED */
                policy = workqueue_policy[priority];
                
                KERNEL_DEBUG(0xefffd120 | DBG_FUNC_START, wq, orig_priority, tl->th_policy, 0, 0);
 
-               if (tl->th_policy != policy) {
+               if ((orig_priority == WORKQUEUE_BG_PRIOQUEUE) || (priority == WORKQUEUE_BG_PRIOQUEUE)) {
+                       if (orig_priority == WORKQUEUE_BG_PRIOQUEUE) {
+                               /* remove the disk throttle, importance will be reset in anycase */
+                               proc_restore_workq_bgthreadpolicy(th_to_run);
+                       } 
 
+                       if (priority == WORKQUEUE_BG_PRIOQUEUE) {
+                               proc_apply_workq_bgthreadpolicy(th_to_run);
+                       }
+               }
+
+               if (tl->th_policy != policy) {
                        extinfo.timeshare = policy;
                        (void)thread_policy_set_internal(th_to_run, THREAD_EXTENDED_POLICY, (thread_policy_t)&extinfo, THREAD_EXTENDED_POLICY_COUNT);
 
                        tl->th_policy = policy;
                }
+
                 precedinfo.importance = workqueue_importance[priority];
                 (void)thread_policy_set_internal(th_to_run, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
 
+
                KERNEL_DEBUG(0xefffd120 | DBG_FUNC_END, wq,  priority, policy, 0, 0);
+#if CONFIG_EMBEDDED
+               }
+#endif /* CONFIG_EMBEDDED */
        }
        if (kdebug_enable) {
                int     lpri = -1;
@@ -1813,11 +1927,11 @@ pick_up_work:
                }
        }
        /*
-        * if current thread is reused for workitem, does not return via unix_syscall
+        * if current thread is reused for work request, does not return via unix_syscall
         */
-       wq_runitem(p, item, th_to_run, tl, reuse_thread, wake_thread, (thread == th_to_run));
+       wq_runreq(p, overcommit, priority, th_to_run, tl, reuse_thread, wake_thread, (thread == th_to_run));
        
-       KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END, wq, thread_tid(th_to_run), item, 1, 0);
+       KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END, wq, thread_tid(th_to_run), overcommit, 1, 0);
 
        return (TRUE);
 
@@ -1841,11 +1955,6 @@ parkit:
         * this is a workqueue thread with no more
         * work to do... park it for now
         */
-       uth = get_bsdthread_info(th_to_park);
-       tl = uth->uu_threadlist;
-       if (tl == 0) 
-               panic("wq thread with no threadlist ");
-       
        TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
        tl->th_flags &= ~TH_LIST_RUNNING;
 
@@ -1858,12 +1967,18 @@ parkit:
        wq->wq_thscheduled_count[tl->th_priority][tl->th_affinity_tag]--;
        wq->wq_threads_scheduled--;
 
+       if (tl->th_flags & TH_LIST_CONSTRAINED) {
+               wq->wq_constrained_threads_scheduled--;
+               wq->wq_lflags &= ~WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
+               tl->th_flags &= ~TH_LIST_CONSTRAINED;
+       }
        if (wq->wq_thidlecount < 100)
                us_to_wait = wq_reduce_pool_window_usecs - (wq->wq_thidlecount * (wq_reduce_pool_window_usecs / 100));
        else
                us_to_wait = wq_reduce_pool_window_usecs / 100;
 
        wq->wq_thidlecount++;
+       wq->wq_lflags &= ~WQL_EXCEEDED_TOTAL_THREAD_LIMIT;
 
        assert_wait_timeout((caddr_t)tl, (THREAD_INTERRUPTIBLE), us_to_wait, NSEC_PER_USEC);
 
@@ -1922,7 +2037,7 @@ normal_resume_to_user:
                         * queue... remove it from our domain...
                         * workqueue_removethread consumes the lock
                         */
-                       workqueue_removethread(tl);
+                       workqueue_removethread(tl, 0);
 
                        thread_bootstrap_return();
                }
@@ -1973,7 +2088,7 @@ wq_unpark_continue(void)
                        if ((tl->th_flags & (TH_LIST_RUNNING | TH_LIST_BUSY)) == TH_LIST_RUNNING) {
                                /*
                                 * a normal wakeup of this thread occurred... no need 
-                                * for any synchronization with the timer and wq_runitem
+                                * for any synchronization with the timer and wq_runreq
                                 */
 normal_return_to_user:                 
                                thread_sched_call(th_to_unpark, workqueue_callback);
@@ -1994,7 +2109,7 @@ normal_return_to_user:
                                 *
                                 * workqueue_removethread consumes the lock
                                 */
-                               workqueue_removethread(tl);
+                               workqueue_removethread(tl, 0);
                                        
                                thread_exception_return();
                        }
@@ -2029,7 +2144,7 @@ normal_return_to_user:
 
 
 static void 
-wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
+wq_runreq(proc_t p, boolean_t overcommit, uint32_t priority, thread_t th, struct threadlist *tl,
           int reuse_thread, int wake_thread, int return_directly)
 {
        int ret = 0;
@@ -2037,7 +2152,7 @@ wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
 
        KERNEL_DEBUG1(0xefffd004 | DBG_FUNC_START, tl->th_workq, tl->th_priority, tl->th_affinity_tag, thread_tid(current_thread()), thread_tid(th));
 
-       ret = setup_wqthread(p, th, item, reuse_thread, tl);
+       ret = setup_wqthread(p, th, overcommit, priority, reuse_thread, tl);
 
        if (ret != 0)
                panic("setup_wqthread failed  %x\n", ret);
@@ -2047,7 +2162,7 @@ wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
 
                thread_exception_return();
 
-               panic("wq_runitem: thread_exception_return returned ...\n");
+               panic("wq_runreq: thread_exception_return returned ...\n");
        }
        if (wake_thread) {
                workqueue_lock_spin(p);
@@ -2080,34 +2195,18 @@ wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
        }
 }
 
+
 int
-setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct threadlist *tl)
+setup_wqthread(proc_t p, thread_t th, boolean_t overcommit, uint32_t priority, int reuse_thread, struct threadlist *tl)
 {
-#if defined(__ppc__)
-       /*
-        * Set up PowerPC registers...
-        * internally they are always kept as 64 bit and
-        * since the register set is the same between 32 and 64bit modes
-        * we don't need 2 different methods for setting the state
-        */
-       {
-               ppc_thread_state64_t state64;
-               ppc_thread_state64_t *ts64 = &state64;
-
-               ts64->srr0 = (uint64_t)p->p_wqthread;
-               ts64->r1 = (uint64_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE) - C_ARGSAVE_LEN - C_RED_ZONE);
-               ts64->r3 = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE);
-               ts64->r4 = (uint64_t)(tl->th_thport);
-               ts64->r5 = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_GUARDSIZE);
-               ts64->r6 = (uint64_t)item;
-               ts64->r7 = (uint64_t)reuse_thread;
-               ts64->r8 = (uint64_t)0;
-
-               if ((reuse_thread != 0) && (ts64->r3 == (uint64_t)0))
-                       panic("setup_wqthread: setting reuse thread with null pthread\n");
-               thread_set_wq_state64(th, (thread_state_t)ts64);
-       }
-#elif defined(__i386__) || defined(__x86_64__)
+       uint32_t flags = reuse_thread | WQ_FLAG_THREAD_NEWSPI;
+
+       if (overcommit == TRUE)
+               flags |= WQ_FLAG_THREAD_OVERCOMMIT;
+
+       flags |= priority;
+
+#if defined(__i386__) || defined(__x86_64__)
        int isLP64 = 0;
 
        isLP64 = IS_64BIT_PROCESS(p);
@@ -2122,16 +2221,14 @@ setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct
                ts->eax = (unsigned int)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE);
                ts->ebx = (unsigned int)tl->th_thport;
                ts->ecx = (unsigned int)(tl->th_stackaddr + PTH_DEFAULT_GUARDSIZE);
-               ts->edx = (unsigned int)item;
-               ts->edi = (unsigned int)reuse_thread;
+               ts->edx = (unsigned int)0;
+               ts->edi = (unsigned int)flags;
                ts->esi = (unsigned int)0;
                /*
                 * set stack pointer
                 */
                ts->esp = (int)((vm_offset_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE) - C_32_STK_ALIGN));
 
-               if ((reuse_thread != 0) && (ts->eax == (unsigned int)0))
-                       panic("setup_wqthread: setting reuse thread with null pthread\n");
                thread_set_wq_state32(th, (thread_state_t)ts);
 
        } else {
@@ -2142,8 +2239,8 @@ setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct
                ts64->rdi = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE);
                ts64->rsi = (uint64_t)(tl->th_thport);
                ts64->rdx = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_GUARDSIZE);
-               ts64->rcx = (uint64_t)item;
-               ts64->r8 = (uint64_t)reuse_thread;
+               ts64->rcx = (uint64_t)0;
+               ts64->r8 = (uint64_t)flags;
                ts64->r9 = (uint64_t)0;
 
                /*
@@ -2151,8 +2248,6 @@ setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct
                 */
                ts64->rsp = (uint64_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE) - C_64_REDZONE_LEN);
 
-               if ((reuse_thread != 0) && (ts64->rdi == (uint64_t)0))
-                       panic("setup_wqthread: setting reuse thread with null pthread\n");
                thread_set_wq_state64(th, (thread_state_t)ts64);
        }
 #else
@@ -2183,6 +2278,14 @@ fill_procworkqueue(proc_t p, struct proc_workqueueinfo * pwqinfo)
        pwqinfo->pwq_nthreads = wq->wq_nthreads;
        pwqinfo->pwq_runthreads = activecount;
        pwqinfo->pwq_blockedthreads = wq->wq_threads_scheduled - activecount;
+       pwqinfo->pwq_state = 0;
+
+       if (wq->wq_lflags & WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT)
+               pwqinfo->pwq_state |= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
+
+       if (wq->wq_lflags & WQL_EXCEEDED_TOTAL_THREAD_LIMIT)
+               pwqinfo->pwq_state |= WQ_EXCEEDED_TOTAL_THREAD_LIMIT;
+
 out:
        workqueue_unlock(p);
        return(error);
@@ -2308,5 +2411,6 @@ pthread_init(void)
        
        pth_global_hashinit();
        psynch_thcall = thread_call_allocate(psynch_wq_cleanup, NULL);
+       psynch_zoneinit();
 #endif /* PSYNCH */
 }