bsd/kern/pthread_synch.c

   1 /*
   2  * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
  29 /*
  30  *      pthread_synch.c
  31  */
  32
  33 #define  _PTHREAD_CONDATTR_T
  34 #define  _PTHREAD_COND_T
  35 #define _PTHREAD_MUTEXATTR_T
  36 #define _PTHREAD_MUTEX_T
  37 #define _PTHREAD_RWLOCKATTR_T
  38 #define _PTHREAD_RWLOCK_T
  39
  40 #undef pthread_mutexattr_t
  41 #undef pthread_mutex_t
  42 #undef pthread_condattr_t
  43 #undef pthread_cond_t
  44 #undef pthread_rwlockattr_t
  45 #undef pthread_rwlock_t
  46
  47 #include <sys/param.h>
  48 #include <sys/queue.h>
  49 #include <sys/resourcevar.h>
  50 #include <sys/proc_internal.h>
  51 #include <sys/kauth.h>
  52 #include <sys/systm.h>
  53 #include <sys/timeb.h>
  54 #include <sys/times.h>
  55 #include <sys/acct.h>
  56 #include <sys/kernel.h>
  57 #include <sys/wait.h>
  58 #include <sys/signalvar.h>
  59 #include <sys/syslog.h>
  60 #include <sys/stat.h>
  61 #include <sys/lock.h>
  62 #include <sys/kdebug.h>
  63 #include <sys/sysproto.h>
  64 #include <sys/pthread_internal.h>
  65 #include <sys/vm.h>
  66 #include <sys/user.h>           /* for coredump */
  67 #include <sys/proc_info.h>      /* for fill_procworkqueue */
  68
  69
  70 #include <mach/mach_types.h>
  71 #include <mach/vm_prot.h>
  72 #include <mach/semaphore.h>
  73 #include <mach/sync_policy.h>
  74 #include <mach/task.h>
  75 #include <kern/kern_types.h>
  76 #include <kern/task.h>
  77 #include <kern/clock.h>
  78 #include <mach/kern_return.h>
  79 #include <kern/thread.h>
  80 #include <kern/sched_prim.h>
  81 #include <kern/kalloc.h>
  82 #include <kern/sched_prim.h>    /* for thread_exception_return */
  83 #include <kern/processor.h>
  84 #include <kern/affinity.h>
  85 #include <kern/assert.h>
  86 #include <mach/mach_vm.h>
  87 #include <mach/mach_param.h>
  88 #include <mach/thread_status.h>
  89 #include <mach/thread_policy.h>
  90 #include <mach/message.h>
  91 #include <mach/port.h>
  92 #include <vm/vm_protos.h>
  93 #include <vm/vm_map.h>  /* for current_map() */
  94 #include <vm/vm_fault.h>
  95 #include <mach/thread_act.h> /* for thread_resume */
  96 #include <machine/machine_routines.h>
  97 #if defined(__i386__)
  98 #include <i386/machine_routines.h>
  99 #include <i386/eflags.h>
 100 #include <i386/psl.h>
 101 #include <i386/seg.h>
 102 #endif
 103
 104 #include <libkern/OSAtomic.h>
 105
 106 #if 0
 107 #undef KERNEL_DEBUG
 108 #define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT
 109 #undef KERNEL_DEBUG1
 110 #define KERNEL_DEBUG1 KERNEL_DEBUG_CONSTANT1
 111 #endif
 112
 113 lck_grp_attr_t   *pthread_lck_grp_attr;
 114 lck_grp_t    *pthread_lck_grp;
 115 lck_attr_t   *pthread_lck_attr;
 116
 117 extern kern_return_t thread_getstatus(register thread_t act, int flavor,
 118                         thread_state_t tstate, mach_msg_type_number_t *count);
 119 extern kern_return_t thread_setstatus(thread_t thread, int flavor,
 120                         thread_state_t tstate, mach_msg_type_number_t count);
 121 extern void thread_set_cthreadself(thread_t thread, uint64_t pself, int isLP64);
 122 extern kern_return_t mach_port_deallocate(ipc_space_t, mach_port_name_t);
 123 extern kern_return_t semaphore_signal_internal_trap(mach_port_name_t);
 124
 125 extern void workqueue_thread_yielded(void);
 126
 127 static int workqueue_additem(struct workqueue *wq, int prio, user_addr_t item, int affinity);
 128 static boolean_t workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t th,
 129                                         user_addr_t oc_item, int oc_prio, int oc_affinity);
 130 static void wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
 131                        int reuse_thread, int wake_thread, int return_directly);
 132 static void wq_unpark_continue(void);
 133 static void wq_unsuspend_continue(void);
 134 static int setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct threadlist *tl);
 135 static boolean_t workqueue_addnewthread(struct workqueue *wq, boolean_t oc_thread);
 136 static void workqueue_removethread(struct threadlist *tl, int fromexit);
 137 static void workqueue_lock_spin(proc_t);
 138 static void workqueue_unlock(proc_t);
 139 int proc_settargetconc(pid_t pid, int queuenum, int32_t targetconc);
 140 int proc_setalltargetconc(pid_t pid, int32_t * targetconcp);
 141
 142 #define WQ_MAXPRI_MIN   0       /* low prio queue num */
 143 #define WQ_MAXPRI_MAX   2       /* max  prio queuenum */
 144 #define WQ_PRI_NUM      3       /* number of prio work queues */
 145
 146 #define C_32_STK_ALIGN          16
 147 #define C_64_STK_ALIGN          16
 148 #define C_64_REDZONE_LEN        128
 149 #define TRUNC_DOWN32(a,c)       ((((uint32_t)a)-(c)) & ((uint32_t)(-(c))))
 150 #define TRUNC_DOWN64(a,c)       ((((uint64_t)a)-(c)) & ((uint64_t)(-(c))))
 151
 152
 153 /*
 154  * Flags filed passed to bsdthread_create and back in pthread_start
 155 31  <---------------------------------> 0
 156 _________________________________________
 157 | flags(8) | policy(8) | importance(16) |
 158 -----------------------------------------
 159 */
 160 void _pthread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * funarg, size_t stacksize, unsigned int flags);
 161
 162 #define PTHREAD_START_CUSTOM    0x01000000
 163 #define PTHREAD_START_SETSCHED  0x02000000
 164 #define PTHREAD_START_DETACHED  0x04000000
 165 #define PTHREAD_START_POLICY_BITSHIFT 16
 166 #define PTHREAD_START_POLICY_MASK 0xff
 167 #define PTHREAD_START_IMPORTANCE_MASK 0xffff
 168
 169 #define SCHED_OTHER      POLICY_TIMESHARE
 170 #define SCHED_FIFO       POLICY_FIFO
 171 #define SCHED_RR         POLICY_RR
 172
 173
 174
 175 int
 176 bsdthread_create(__unused struct proc *p, struct bsdthread_create_args  *uap, user_addr_t *retval)
 177 {
 178         kern_return_t kret;
 179         void * sright;
 180         int error = 0;
 181         int allocated = 0;
 182         mach_vm_offset_t stackaddr;
 183         mach_vm_size_t th_allocsize = 0;
 184         mach_vm_size_t user_stacksize;
 185         mach_vm_size_t th_stacksize;
 186         mach_vm_offset_t th_stackaddr;
 187         mach_vm_offset_t th_stack;
 188         mach_vm_offset_t th_pthread;
 189         mach_port_name_t th_thport;
 190         thread_t th;
 191         user_addr_t user_func = uap->func;
 192         user_addr_t user_funcarg = uap->func_arg;
 193         user_addr_t user_stack = uap->stack;
 194         user_addr_t user_pthread = uap->pthread;
 195         unsigned int  flags = (unsigned int)uap->flags;
 196         vm_map_t vmap = current_map();
 197         task_t ctask = current_task();
 198         unsigned int policy, importance;
 199
 200         int isLP64 = 0;
 201
 202
 203         if ((p->p_lflag & P_LREGISTER) == 0)
 204                 return(EINVAL);
 205 #if 0
 206         KERNEL_DEBUG_CONSTANT(0x9000080 | DBG_FUNC_START, flags, 0, 0, 0, 0);
 207 #endif
 208
 209         isLP64 = IS_64BIT_PROCESS(p);
 210
 211
 212 #if defined(__i386__) || defined(__x86_64__)
 213         stackaddr = 0xB0000000;
 214 #else
 215 #error Need to define a stack address hint for this architecture
 216 #endif
 217         kret = thread_create(ctask, &th);
 218         if (kret != KERN_SUCCESS)
 219                 return(ENOMEM);
 220         thread_reference(th);
 221
 222         sright = (void *) convert_thread_to_port(th);
 223         th_thport = ipc_port_copyout_send(sright, get_task_ipcspace(ctask));
 224
 225         if ((flags & PTHREAD_START_CUSTOM) == 0) {
 226                 th_stacksize = (mach_vm_size_t)user_stack;              /* if it is custom them it is stacksize */
 227                 th_allocsize = th_stacksize + PTH_DEFAULT_GUARDSIZE + p->p_pthsize;
 228
 229                 kret = mach_vm_map(vmap, &stackaddr,
 230                                 th_allocsize,
 231                                 page_size-1,
 232                                 VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE , NULL,
 233                                 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
 234                                 VM_INHERIT_DEFAULT);
 235                 if (kret != KERN_SUCCESS)
 236                         kret = mach_vm_allocate(vmap,
 237                                         &stackaddr, th_allocsize,
 238                                         VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE);
 239                 if (kret != KERN_SUCCESS) {
 240                         error = ENOMEM;
 241                         goto out;
 242                 }
 243 #if 0
 244                 KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_NONE, th_allocsize, stackaddr, 0, 2, 0);
 245 #endif
 246                 th_stackaddr = stackaddr;
 247                 allocated = 1;
 248                 /*
 249                  * The guard page is at the lowest address
 250                  * The stack base is the highest address
 251                  */
 252                 kret = mach_vm_protect(vmap,  stackaddr, PTH_DEFAULT_GUARDSIZE, FALSE, VM_PROT_NONE);
 253
 254                 if (kret != KERN_SUCCESS) {
 255                         error = ENOMEM;
 256                         goto out1;
 257                 }
 258                 th_stack = (stackaddr + th_stacksize + PTH_DEFAULT_GUARDSIZE);
 259                 th_pthread = (stackaddr + th_stacksize + PTH_DEFAULT_GUARDSIZE);
 260                 user_stacksize = th_stacksize;
 261
 262                /*
 263                 * Pre-fault the first page of the new thread's stack and the page that will
 264                 * contain the pthread_t structure.
 265                 */
 266                 vm_fault( vmap,
 267                   vm_map_trunc_page(th_stack - PAGE_SIZE_64),
 268                   VM_PROT_READ | VM_PROT_WRITE,
 269                   FALSE,
 270                   THREAD_UNINT, NULL, 0);
 271
 272                 vm_fault( vmap,
 273                   vm_map_trunc_page(th_pthread),
 274                   VM_PROT_READ | VM_PROT_WRITE,
 275                   FALSE,
 276                   THREAD_UNINT, NULL, 0);
 277         } else {
 278                 th_stack = user_stack;
 279                 user_stacksize = user_stack;
 280                 th_pthread = user_pthread;
 281 #if 0
 282                 KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_NONE, 0, 0, 0, 3, 0);
 283 #endif
 284         }
 285
 286 #if defined(__i386__) || defined(__x86_64__)
 287         {
 288         /*
 289          * Set up i386 registers & function call.
 290          */
 291         if (isLP64 == 0) {
 292                 x86_thread_state32_t state;
 293                 x86_thread_state32_t *ts = &state;
 294
 295                 ts->eip = (int)p->p_threadstart;
 296                 ts->eax = (unsigned int)th_pthread;
 297                 ts->ebx = (unsigned int)th_thport;
 298                 ts->ecx = (unsigned int)user_func;
 299                 ts->edx = (unsigned int)user_funcarg;
 300                 ts->edi = (unsigned int)user_stacksize;
 301                 ts->esi = (unsigned int)uap->flags;
 302                 /*
 303                  * set stack pointer
 304                  */
 305                 ts->esp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN));
 306
 307                 thread_set_wq_state32(th, (thread_state_t)ts);
 308
 309         } else {
 310                 x86_thread_state64_t state64;
 311                 x86_thread_state64_t *ts64 = &state64;
 312
 313                 ts64->rip = (uint64_t)p->p_threadstart;
 314                 ts64->rdi = (uint64_t)th_pthread;
 315                 ts64->rsi = (uint64_t)(th_thport);
 316                 ts64->rdx = (uint64_t)user_func;
 317                 ts64->rcx = (uint64_t)user_funcarg;
 318                 ts64->r8 = (uint64_t)user_stacksize;
 319                 ts64->r9 = (uint64_t)uap->flags;
 320                 /*
 321                  * set stack pointer aligned to 16 byte boundary
 322                  */
 323                 ts64->rsp = (uint64_t)(th_stack - C_64_REDZONE_LEN);
 324
 325                 thread_set_wq_state64(th, (thread_state_t)ts64);
 326         }
 327         }
 328 #else
 329 #error bsdthread_create  not defined for this architecture
 330 #endif
 331         /* Set scheduling parameters if needed */
 332         if ((flags & PTHREAD_START_SETSCHED) != 0) {
 333                 thread_extended_policy_data_t    extinfo;
 334                 thread_precedence_policy_data_t   precedinfo;
 335
 336                 importance = (flags & PTHREAD_START_IMPORTANCE_MASK);
 337                 policy = (flags >> PTHREAD_START_POLICY_BITSHIFT) & PTHREAD_START_POLICY_MASK;
 338
 339                 if (policy == SCHED_OTHER)
 340                         extinfo.timeshare = 1;
 341                 else
 342                         extinfo.timeshare = 0;
 343                 thread_policy_set(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extinfo, THREAD_EXTENDED_POLICY_COUNT);
 344
 345 #define BASEPRI_DEFAULT 31
 346                 precedinfo.importance = (importance - BASEPRI_DEFAULT);
 347                 thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
 348         }
 349
 350         kret = thread_resume(th);
 351         if (kret != KERN_SUCCESS) {
 352                 error = EINVAL;
 353                 goto out1;
 354         }
 355         thread_deallocate(th);  /* drop the creator reference */
 356 #if 0
 357         KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_END, error, th_pthread, 0, 0, 0);
 358 #endif
 359         *retval = th_pthread;
 360
 361         return(0);
 362
 363 out1:
 364         if (allocated != 0)
 365                 (void)mach_vm_deallocate(vmap,  stackaddr, th_allocsize);
 366 out:
 367         (void)mach_port_deallocate(get_task_ipcspace(ctask), th_thport);
 368         (void)thread_terminate(th);
 369         (void)thread_deallocate(th);
 370         return(error);
 371 }
 372
 373 int
 374 bsdthread_terminate(__unused struct proc *p, struct bsdthread_terminate_args  *uap, __unused int32_t *retval)
 375 {
 376         mach_vm_offset_t  freeaddr;
 377         mach_vm_size_t freesize;
 378         kern_return_t kret;
 379         mach_port_name_t kthport = (mach_port_name_t)uap->port;
 380         mach_port_name_t sem = (mach_port_name_t)uap->sem;
 381
 382         freeaddr = (mach_vm_offset_t)uap->stackaddr;
 383         freesize = uap->freesize;
 384
 385 #if 0
 386         KERNEL_DEBUG_CONSTANT(0x9000084 |DBG_FUNC_START, freeaddr, freesize, kthport, 0xff, 0);
 387 #endif
 388         if ((freesize != (mach_vm_size_t)0) && (freeaddr != (mach_vm_offset_t)0)) {
 389                 kret = mach_vm_deallocate(current_map(), freeaddr, freesize);
 390                 if (kret != KERN_SUCCESS) {
 391                         return(EINVAL);
 392                 }
 393         }
 394
 395         (void) thread_terminate(current_thread());
 396         if (sem != MACH_PORT_NULL) {
 397                  kret = semaphore_signal_internal_trap(sem);
 398                 if (kret != KERN_SUCCESS) {
 399                         return(EINVAL);
 400                 }
 401         }
 402
 403         if (kthport != MACH_PORT_NULL)
 404                         mach_port_deallocate(get_task_ipcspace(current_task()), kthport);
 405         thread_exception_return();
 406         panic("bsdthread_terminate: still running\n");
 407 #if 0
 408         KERNEL_DEBUG_CONSTANT(0x9000084 |DBG_FUNC_END, 0, 0, 0, 0xff, 0);
 409 #endif
 410         return(0);
 411 }
 412
 413
 414 int
 415 bsdthread_register(struct proc *p, struct bsdthread_register_args  *uap, __unused int32_t *retval)
 416 {
 417         /* prevent multiple registrations */
 418         if ((p->p_lflag & P_LREGISTER) != 0)
 419                 return(EINVAL);
 420         /* syscall randomizer test can pass bogus values */
 421         if (uap->pthsize > MAX_PTHREAD_SIZE) {
 422                 return(EINVAL);
 423         }
 424         p->p_threadstart = uap->threadstart;
 425         p->p_wqthread = uap->wqthread;
 426         p->p_pthsize = uap->pthsize;
 427         p->p_targconc = uap->targetconc_ptr;
 428         p->p_dispatchqueue_offset = uap->dispatchqueue_offset;
 429         proc_setregister(p);
 430
 431         return(0);
 432 }
 433
 434 uint32_t wq_yielded_threshold           = WQ_YIELDED_THRESHOLD;
 435 uint32_t wq_yielded_window_usecs        = WQ_YIELDED_WINDOW_USECS;
 436 uint32_t wq_stalled_window_usecs        = WQ_STALLED_WINDOW_USECS;
 437 uint32_t wq_reduce_pool_window_usecs    = WQ_REDUCE_POOL_WINDOW_USECS;
 438 uint32_t wq_max_timer_interval_usecs    = WQ_MAX_TIMER_INTERVAL_USECS;
 439 uint32_t wq_max_threads                 = WORKQUEUE_MAXTHREADS;
 440 uint32_t wq_max_constrained_threads     = WORKQUEUE_MAXTHREADS / 8;
 441
 442
 443 SYSCTL_INT(_kern, OID_AUTO, wq_yielded_threshold, CTLFLAG_RW | CTLFLAG_LOCKED,
 444            &wq_yielded_threshold, 0, "");
 445
 446 SYSCTL_INT(_kern, OID_AUTO, wq_yielded_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
 447            &wq_yielded_window_usecs, 0, "");
 448
 449 SYSCTL_INT(_kern, OID_AUTO, wq_stalled_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
 450            &wq_stalled_window_usecs, 0, "");
 451
 452 SYSCTL_INT(_kern, OID_AUTO, wq_reduce_pool_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
 453            &wq_reduce_pool_window_usecs, 0, "");
 454
 455 SYSCTL_INT(_kern, OID_AUTO, wq_max_timer_interval_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
 456            &wq_max_timer_interval_usecs, 0, "");
 457
 458 SYSCTL_INT(_kern, OID_AUTO, wq_max_threads, CTLFLAG_RW | CTLFLAG_LOCKED,
 459            &wq_max_threads, 0, "");
 460
 461 SYSCTL_INT(_kern, OID_AUTO, wq_max_constrained_threads, CTLFLAG_RW | CTLFLAG_LOCKED,
 462            &wq_max_constrained_threads, 0, "");
 463
 464
 465 static uint32_t wq_init_constrained_limit = 1;
 466
 467
 468 void
 469 workqueue_init_lock(proc_t p)
 470 {
 471         lck_spin_init(&p->p_wqlock, pthread_lck_grp, pthread_lck_attr);
 472
 473         p->p_wqiniting = FALSE;
 474 }
 475
 476 void
 477 workqueue_destroy_lock(proc_t p)
 478 {
 479         lck_spin_destroy(&p->p_wqlock, pthread_lck_grp);
 480 }
 481
 482
 483 static void
 484 workqueue_lock_spin(proc_t p)
 485 {
 486         lck_spin_lock(&p->p_wqlock);
 487 }
 488
 489 static void
 490 workqueue_unlock(proc_t p)
 491 {
 492         lck_spin_unlock(&p->p_wqlock);
 493 }
 494
 495
 496 static void
 497 workqueue_interval_timer_start(struct workqueue *wq)
 498 {
 499         uint64_t deadline;
 500
 501         if (wq->wq_timer_interval == 0)
 502                 wq->wq_timer_interval = wq_stalled_window_usecs;
 503         else {
 504                 wq->wq_timer_interval = wq->wq_timer_interval * 2;
 505
 506                 if (wq->wq_timer_interval > wq_max_timer_interval_usecs)
 507                         wq->wq_timer_interval = wq_max_timer_interval_usecs;
 508         }
 509         clock_interval_to_deadline(wq->wq_timer_interval, 1000, &deadline);
 510
 511         thread_call_enter_delayed(wq->wq_atimer_call, deadline);
 512
 513         KERNEL_DEBUG(0xefffd110, wq, wq->wq_itemcount, wq->wq_flags, wq->wq_timer_interval, 0);
 514 }
 515
 516
 517 static boolean_t
 518 wq_thread_is_busy(uint64_t cur_ts, uint64_t *lastblocked_tsp)
 519 {       clock_sec_t     secs;
 520         clock_usec_t    usecs;
 521         uint64_t lastblocked_ts;
 522         uint64_t elapsed;
 523
 524         /*
 525          * the timestamp is updated atomically w/o holding the workqueue lock
 526          * so we need to do an atomic read of the 64 bits so that we don't see
 527          * a mismatched pair of 32 bit reads... we accomplish this in an architecturally
 528          * independent fashion by using OSCompareAndSwap64 to write back the
 529          * value we grabbed... if it succeeds, then we have a good timestamp to
 530          * evaluate... if it fails, we straddled grabbing the timestamp while it
 531          * was being updated... treat a failed update as a busy thread since
 532          * it implies we are about to see a really fresh timestamp anyway
 533          */
 534         lastblocked_ts = *lastblocked_tsp;
 535
 536         if ( !OSCompareAndSwap64((UInt64)lastblocked_ts, (UInt64)lastblocked_ts, lastblocked_tsp))
 537                 return (TRUE);
 538
 539         if (lastblocked_ts >= cur_ts) {
 540                 /*
 541                  * because the update of the timestamp when a thread blocks isn't
 542                  * serialized against us looking at it (i.e. we don't hold the workq lock)
 543                  * it's possible to have a timestamp that matches the current time or
 544                  * that even looks to be in the future relative to when we grabbed the current
 545                  * time... just treat this as a busy thread since it must have just blocked.
 546                  */
 547                 return (TRUE);
 548         }
 549         elapsed = cur_ts - lastblocked_ts;
 550
 551         absolutetime_to_microtime(elapsed, &secs, &usecs);
 552
 553         if (secs == 0 && usecs < wq_stalled_window_usecs)
 554                 return (TRUE);
 555         return (FALSE);
 556 }
 557
 558
 559 #define WQ_TIMER_NEEDED(wq, start_timer) do {           \
 560         int oldflags = wq->wq_flags;                    \
 561                                                         \
 562         if ( !(oldflags & (WQ_EXITING | WQ_ATIMER_RUNNING))) {  \
 563                 if (OSCompareAndSwap(oldflags, oldflags | WQ_ATIMER_RUNNING, (UInt32 *)&wq->wq_flags)) \
 564                         start_timer = TRUE;                     \
 565         }                                                       \
 566 } while (0)
 567
 568
 569
 570 static void
 571 workqueue_add_timer(struct workqueue *wq, __unused int param1)
 572 {
 573         proc_t          p;
 574         boolean_t       start_timer = FALSE;
 575         boolean_t       retval;
 576         boolean_t       add_thread;
 577         uint32_t        busycount;
 578
 579         KERNEL_DEBUG(0xefffd108 | DBG_FUNC_START, wq, wq->wq_flags, wq->wq_nthreads, wq->wq_thidlecount, 0);
 580
 581         p = wq->wq_proc;
 582
 583         workqueue_lock_spin(p);
 584
 585         /*
 586          * because workqueue_callback now runs w/o taking the workqueue lock
 587          * we are unsynchronized w/r to a change in state of the running threads...
 588          * to make sure we always evaluate that change, we allow it to start up
 589          * a new timer if the current one is actively evalutating the state
 590          * however, we do not need more than 2 timers fired up (1 active and 1 pending)
 591          * and we certainly do not want 2 active timers evaluating the state
 592          * simultaneously... so use WQL_ATIMER_BUSY to serialize the timers...
 593          * note that WQL_ATIMER_BUSY is in a different flag word from WQ_ATIMER_RUNNING since
 594          * it is always protected by the workq lock... WQ_ATIMER_RUNNING is evaluated
 595          * and set atomimcally since the callback function needs to manipulate it
 596          * w/o holding the workq lock...
 597          *
 598          * !WQ_ATIMER_RUNNING && !WQL_ATIMER_BUSY   ==   no pending timer, no active timer
 599          * !WQ_ATIMER_RUNNING && WQL_ATIMER_BUSY    ==   no pending timer, 1 active timer
 600          * WQ_ATIMER_RUNNING && !WQL_ATIMER_BUSY    ==   1 pending timer, no active timer
 601          * WQ_ATIMER_RUNNING && WQL_ATIMER_BUSY     ==   1 pending timer, 1 active timer
 602          */
 603         while (wq->wq_lflags & WQL_ATIMER_BUSY) {
 604                 wq->wq_lflags |= WQL_ATIMER_WAITING;
 605
 606                 assert_wait((caddr_t)wq, (THREAD_UNINT));
 607                 workqueue_unlock(p);
 608
 609                 thread_block(THREAD_CONTINUE_NULL);
 610
 611                 workqueue_lock_spin(p);
 612         }
 613         wq->wq_lflags |= WQL_ATIMER_BUSY;
 614
 615         /*
 616          * the workq lock will protect us from seeing WQ_EXITING change state, but we
 617          * still need to update this atomically in case someone else tries to start
 618          * the timer just as we're releasing it
 619          */
 620         while ( !(OSCompareAndSwap(wq->wq_flags, (wq->wq_flags & ~WQ_ATIMER_RUNNING), (UInt32 *)&wq->wq_flags)));
 621
 622 again:
 623         retval = TRUE;
 624         add_thread = FALSE;
 625
 626         if ( !(wq->wq_flags & WQ_EXITING)) {
 627                 /*
 628                  * check to see if the stall frequency was beyond our tolerance
 629                  * or we have work on the queue, but haven't scheduled any
 630                  * new work within our acceptable time interval because
 631                  * there were no idle threads left to schedule
 632                  */
 633                 if (wq->wq_itemcount) {
 634                         uint32_t        priority;
 635                         uint32_t        affinity_tag;
 636                         uint32_t        i;
 637                         uint64_t        curtime;
 638
 639                         for (priority = 0; priority < WORKQUEUE_NUMPRIOS; priority++) {
 640                                 if (wq->wq_list_bitmap & (1 << priority))
 641                                         break;
 642                         }
 643                         assert(priority < WORKQUEUE_NUMPRIOS);
 644
 645                         curtime = mach_absolute_time();
 646                         busycount = 0;
 647
 648                         for (affinity_tag = 0; affinity_tag < wq->wq_reqconc[priority]; affinity_tag++) {
 649                                 /*
 650                                  * if we have no idle threads, we can try to add them if needed
 651                                  */
 652                                 if (wq->wq_thidlecount == 0)
 653                                         add_thread = TRUE;
 654
 655                                 /*
 656                                  * look for first affinity group that is currently not active
 657                                  * i.e. no active threads at this priority level or higher
 658                                  * and has not been active recently at this priority level or higher
 659                                  */
 660                                 for (i = 0; i <= priority; i++) {
 661                                         if (wq->wq_thactive_count[i][affinity_tag]) {
 662                                                 add_thread = FALSE;
 663                                                 break;
 664                                         }
 665                                         if (wq->wq_thscheduled_count[i][affinity_tag]) {
 666                                                 if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i][affinity_tag])) {
 667                                                         add_thread = FALSE;
 668                                                         busycount++;
 669                                                         break;
 670                                                 }
 671                                         }
 672                                 }
 673                                 if (add_thread == TRUE) {
 674                                         retval = workqueue_addnewthread(wq, FALSE);
 675                                         break;
 676                                 }
 677                         }
 678                         if (wq->wq_itemcount) {
 679                                 /*
 680                                  * as long as we have threads to schedule, and we successfully
 681                                  * scheduled new work, keep trying
 682                                  */
 683                                 while (wq->wq_thidlecount && !(wq->wq_flags & WQ_EXITING)) {
 684                                         /*
 685                                          * workqueue_run_nextitem is responsible for
 686                                          * dropping the workqueue lock in all cases
 687                                          */
 688                                         retval = workqueue_run_nextitem(p, wq, THREAD_NULL, 0, 0, 0);
 689                                         workqueue_lock_spin(p);
 690
 691                                         if (retval == FALSE)
 692                                                 break;
 693                                 }
 694                                 if ( !(wq->wq_flags & WQ_EXITING) && wq->wq_itemcount) {
 695
 696                                         if (wq->wq_thidlecount == 0 && retval == TRUE && add_thread == TRUE)
 697                                                 goto again;
 698
 699                                         if (wq->wq_thidlecount == 0 || busycount)
 700                                                 WQ_TIMER_NEEDED(wq, start_timer);
 701
 702                                         KERNEL_DEBUG(0xefffd108 | DBG_FUNC_NONE, wq, wq->wq_itemcount, wq->wq_thidlecount, busycount, 0);
 703                                 }
 704                         }
 705                 }
 706         }
 707         if ( !(wq->wq_flags & WQ_ATIMER_RUNNING))
 708                 wq->wq_timer_interval = 0;
 709
 710         wq->wq_lflags &= ~WQL_ATIMER_BUSY;
 711
 712         if ((wq->wq_flags & WQ_EXITING) || (wq->wq_lflags & WQL_ATIMER_WAITING)) {
 713                 /*
 714                  * wakeup the thread hung up in workqueue_exit or workqueue_add_timer waiting for this timer
 715                  * to finish getting out of the way
 716                  */
 717                 wq->wq_lflags &= ~WQL_ATIMER_WAITING;
 718                 wakeup(wq);
 719         }
 720         KERNEL_DEBUG(0xefffd108 | DBG_FUNC_END, wq, start_timer, wq->wq_nthreads, wq->wq_thidlecount, 0);
 721
 722         workqueue_unlock(p);
 723
 724         if (start_timer == TRUE)
 725                 workqueue_interval_timer_start(wq);
 726 }
 727
 728
 729 void
 730 workqueue_thread_yielded(void)
 731 {
 732         struct workqueue *wq;
 733         proc_t          p;
 734
 735         p = current_proc();
 736
 737         if ((wq = p->p_wqptr) == NULL || wq->wq_itemcount == 0)
 738                 return;
 739
 740         workqueue_lock_spin(p);
 741
 742         if (wq->wq_itemcount) {
 743                 uint64_t        curtime;
 744                 uint64_t        elapsed;
 745                 clock_sec_t     secs;
 746                 clock_usec_t    usecs;
 747
 748                 if (wq->wq_thread_yielded_count++ == 0)
 749                         wq->wq_thread_yielded_timestamp = mach_absolute_time();
 750
 751                 if (wq->wq_thread_yielded_count < wq_yielded_threshold) {
 752                         workqueue_unlock(p);
 753                         return;
 754                 }
 755                 KERNEL_DEBUG(0xefffd138 | DBG_FUNC_START, wq, wq->wq_thread_yielded_count, wq->wq_itemcount, 0, 0);
 756
 757                 wq->wq_thread_yielded_count = 0;
 758
 759                 curtime = mach_absolute_time();
 760                 elapsed = curtime - wq->wq_thread_yielded_timestamp;
 761                 absolutetime_to_microtime(elapsed, &secs, &usecs);
 762
 763                 if (secs == 0 && usecs < wq_yielded_window_usecs) {
 764
 765                         if (wq->wq_thidlecount == 0) {
 766                                 workqueue_addnewthread(wq, TRUE);
 767                                 /*
 768                                  * 'workqueue_addnewthread' drops the workqueue lock
 769                                  * when creating the new thread and then retakes it before
 770                                  * returning... this window allows other threads to process
 771                                  * work on the queue, so we need to recheck for available work
 772                                  * if none found, we just return...  the newly created thread
 773                                  * will eventually get used (if it hasn't already)...
 774                                  */
 775                                 if (wq->wq_itemcount == 0) {
 776                                         workqueue_unlock(p);
 777                                         return;
 778                                 }
 779                         }
 780                         if (wq->wq_thidlecount) {
 781                                 uint32_t        priority;
 782                                 uint32_t        affinity = -1;
 783                                 user_addr_t     item;
 784                                 struct workitem *witem = NULL;
 785                                 struct workitemlist *wl = NULL;
 786                                 struct uthread    *uth;
 787                                 struct threadlist *tl;
 788
 789                                 uth = get_bsdthread_info(current_thread());
 790                                 if ((tl = uth->uu_threadlist))
 791                                         affinity = tl->th_affinity_tag;
 792
 793                                 for (priority = 0; priority < WORKQUEUE_NUMPRIOS; priority++) {
 794                                         if (wq->wq_list_bitmap & (1 << priority)) {
 795                                                 wl = (struct workitemlist *)&wq->wq_list[priority];
 796                                                 break;
 797                                         }
 798                                 }
 799                                 assert(wl != NULL);
 800                                 assert(!(TAILQ_EMPTY(&wl->wl_itemlist)));
 801
 802                                 witem = TAILQ_FIRST(&wl->wl_itemlist);
 803                                 TAILQ_REMOVE(&wl->wl_itemlist, witem, wi_entry);
 804
 805                                 if (TAILQ_EMPTY(&wl->wl_itemlist))
 806                                         wq->wq_list_bitmap &= ~(1 << priority);
 807                                 wq->wq_itemcount--;
 808
 809                                 item = witem->wi_item;
 810                                 witem->wi_item = (user_addr_t)0;
 811                                 witem->wi_affinity = 0;
 812
 813                                 TAILQ_INSERT_HEAD(&wl->wl_freelist, witem, wi_entry);
 814
 815                                 (void)workqueue_run_nextitem(p, wq, THREAD_NULL, item, priority, affinity);
 816                                 /*
 817                                  * workqueue_run_nextitem is responsible for
 818                                  * dropping the workqueue lock in all cases
 819                                  */
 820                                 KERNEL_DEBUG(0xefffd138 | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_itemcount, 1, 0);
 821
 822                                 return;
 823                         }
 824                 }
 825                 KERNEL_DEBUG(0xefffd138 | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_itemcount, 2, 0);
 826         }
 827         workqueue_unlock(p);
 828 }
 829
 830
 831
 832 static void
 833 workqueue_callback(int type, thread_t thread)
 834 {
 835         struct uthread    *uth;
 836         struct threadlist *tl;
 837         struct workqueue  *wq;
 838
 839         uth = get_bsdthread_info(thread);
 840         tl = uth->uu_threadlist;
 841         wq = tl->th_workq;
 842
 843         switch (type) {
 844
 845               case SCHED_CALL_BLOCK:
 846                 {
 847                 uint32_t        old_activecount;
 848
 849                 old_activecount = OSAddAtomic(-1, &wq->wq_thactive_count[tl->th_priority][tl->th_affinity_tag]);
 850
 851                 if (old_activecount == 1) {
 852                         boolean_t       start_timer = FALSE;
 853                         uint64_t        curtime;
 854                         UInt64          *lastblocked_ptr;
 855
 856                         /*
 857                          * we were the last active thread on this affinity set
 858                          * and we've got work to do
 859                          */
 860                         lastblocked_ptr = (UInt64 *)&wq->wq_lastblocked_ts[tl->th_priority][tl->th_affinity_tag];
 861                         curtime = mach_absolute_time();
 862
 863                         /*
 864                          * if we collide with another thread trying to update the last_blocked (really unlikely
 865                          * since another thread would have to get scheduled and then block after we start down
 866                          * this path), it's not a problem.  Either timestamp is adequate, so no need to retry
 867                          */
 868
 869                         OSCompareAndSwap64(*lastblocked_ptr, (UInt64)curtime, lastblocked_ptr);
 870
 871                         if (wq->wq_itemcount)
 872                                 WQ_TIMER_NEEDED(wq, start_timer);
 873
 874                         if (start_timer == TRUE)
 875                                 workqueue_interval_timer_start(wq);
 876                 }
 877                 KERNEL_DEBUG1(0xefffd020 | DBG_FUNC_START, wq, old_activecount, tl->th_priority, tl->th_affinity_tag, thread_tid(thread));
 878                 }
 879                 break;
 880
 881               case SCHED_CALL_UNBLOCK:
 882                 /*
 883                  * we cannot take the workqueue_lock here...
 884                  * an UNBLOCK can occur from a timer event which
 885                  * is run from an interrupt context... if the workqueue_lock
 886                  * is already held by this processor, we'll deadlock...
 887                  * the thread lock for the thread being UNBLOCKED
 888                  * is also held
 889                  */
 890                  OSAddAtomic(1, &wq->wq_thactive_count[tl->th_priority][tl->th_affinity_tag]);
 891
 892                  KERNEL_DEBUG1(0xefffd020 | DBG_FUNC_END, wq, wq->wq_threads_scheduled, tl->th_priority, tl->th_affinity_tag, thread_tid(thread));
 893
 894                  break;
 895         }
 896 }
 897
 898
 899 static void
 900 workqueue_removethread(struct threadlist *tl, int fromexit)
 901 {
 902         struct workqueue *wq;
 903         struct uthread * uth;
 904
 905         /*
 906          * If fromexit is set, the call is from workqueue_exit(,
 907          * so some cleanups are to be avoided.
 908          */
 909         wq = tl->th_workq;
 910
 911         TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry);
 912
 913         if (fromexit == 0) {
 914                 wq->wq_nthreads--;
 915                 wq->wq_thidlecount--;
 916         }
 917
 918         /*
 919          * Clear the threadlist pointer in uthread so
 920          * blocked thread on wakeup for termination will
 921          * not access the thread list as it is going to be
 922          * freed.
 923          */
 924         thread_sched_call(tl->th_thread, NULL);
 925
 926         uth = get_bsdthread_info(tl->th_thread);
 927         if (uth != (struct uthread *)0) {
 928                 uth->uu_threadlist = NULL;
 929         }
 930         if (fromexit == 0) {
 931                 /* during exit the lock is not held */
 932                 workqueue_unlock(wq->wq_proc);
 933         }
 934
 935         if ( (tl->th_flags & TH_LIST_SUSPENDED) ) {
 936                 /*
 937                  * thread was created, but never used...
 938                  * need to clean up the stack and port ourselves
 939                  * since we're not going to spin up through the
 940                  * normal exit path triggered from Libc
 941                  */
 942                 if (fromexit == 0) {
 943                         /* vm map is already deallocated when this is called from exit */
 944                         (void)mach_vm_deallocate(wq->wq_map, tl->th_stackaddr, tl->th_allocsize);
 945                 }
 946                 (void)mach_port_deallocate(get_task_ipcspace(wq->wq_task), tl->th_thport);
 947
 948                 KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_END, wq, (uintptr_t)thread_tid(current_thread()), wq->wq_nthreads, 0xdead, thread_tid(tl->th_thread));
 949         } else {
 950
 951                 KERNEL_DEBUG1(0xefffd018 | DBG_FUNC_END, wq, (uintptr_t)thread_tid(current_thread()), wq->wq_nthreads, 0xdead, thread_tid(tl->th_thread));
 952         }
 953         /*
 954          * drop our ref on the thread
 955          */
 956         thread_deallocate(tl->th_thread);
 957
 958         kfree(tl, sizeof(struct threadlist));
 959 }
 960
 961
 962 /*
 963  * called with workq lock held
 964  * dropped and retaken around thread creation
 965  * return with workq lock held
 966  */
 967 static boolean_t
 968 workqueue_addnewthread(struct workqueue *wq, boolean_t oc_thread)
 969 {
 970         struct threadlist *tl;
 971         struct uthread  *uth;
 972         kern_return_t   kret;
 973         thread_t        th;
 974         proc_t          p;
 975         void            *sright;
 976         mach_vm_offset_t stackaddr;
 977
 978         if ((wq->wq_flags & WQ_EXITING) == WQ_EXITING)
 979                 return (FALSE);
 980
 981         if (wq->wq_nthreads >= wq_max_threads || wq->wq_nthreads >= (CONFIG_THREAD_MAX - 20)) {
 982                 wq->wq_lflags |= WQL_EXCEEDED_TOTAL_THREAD_LIMIT;
 983                 return (FALSE);
 984         }
 985         wq->wq_lflags &= ~WQL_EXCEEDED_TOTAL_THREAD_LIMIT;
 986
 987         if (oc_thread == FALSE && wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
 988                 /*
 989                  * if we're not creating this thread to service an overcommit request,
 990                  * then check the size of the constrained thread pool...  if we've already
 991                  * reached our max for threads scheduled from this pool, don't create a new
 992                  * one... the callers of this function are prepared for failure.
 993                  */
 994                 wq->wq_lflags |= WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
 995                 return (FALSE);
 996         }
 997         if (wq->wq_constrained_threads_scheduled < wq_max_constrained_threads)
 998                 wq->wq_lflags &= ~WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
 999
1000         wq->wq_nthreads++;
1001
1002         p = wq->wq_proc;
1003         workqueue_unlock(p);
1004
1005         kret = thread_create_workq(wq->wq_task, (thread_continue_t)wq_unsuspend_continue, &th);
1006
1007         if (kret != KERN_SUCCESS)
1008                 goto failed;
1009
1010         tl = kalloc(sizeof(struct threadlist));
1011         bzero(tl, sizeof(struct threadlist));
1012
1013 #if defined(__i386__) || defined(__x86_64__)
1014         stackaddr = 0xB0000000;
1015 #else
1016 #error Need to define a stack address hint for this architecture
1017 #endif
1018         tl->th_allocsize = PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE + p->p_pthsize;
1019
1020         kret = mach_vm_map(wq->wq_map, &stackaddr,
1021                         tl->th_allocsize,
1022                         page_size-1,
1023                         VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE , NULL,
1024                         0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
1025                         VM_INHERIT_DEFAULT);
1026
1027         if (kret != KERN_SUCCESS) {
1028                 kret = mach_vm_allocate(wq->wq_map,
1029                                         &stackaddr, tl->th_allocsize,
1030                                         VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE);
1031         }
1032         if (kret == KERN_SUCCESS) {
1033                 /*
1034                  * The guard page is at the lowest address
1035                  * The stack base is the highest address
1036                  */
1037                 kret = mach_vm_protect(wq->wq_map, stackaddr, PTH_DEFAULT_GUARDSIZE, FALSE, VM_PROT_NONE);
1038
1039                 if (kret != KERN_SUCCESS)
1040                         (void) mach_vm_deallocate(wq->wq_map, stackaddr, tl->th_allocsize);
1041         }
1042         if (kret != KERN_SUCCESS) {
1043                 (void) thread_terminate(th);
1044                 thread_deallocate(th);
1045
1046                 kfree(tl, sizeof(struct threadlist));
1047                 goto failed;
1048         }
1049         thread_reference(th);
1050
1051         sright = (void *) convert_thread_to_port(th);
1052         tl->th_thport = ipc_port_copyout_send(sright, get_task_ipcspace(wq->wq_task));
1053
1054         thread_static_param(th, TRUE);
1055
1056         tl->th_flags = TH_LIST_INITED | TH_LIST_SUSPENDED;
1057
1058         tl->th_thread = th;
1059         tl->th_workq = wq;
1060         tl->th_stackaddr = stackaddr;
1061         tl->th_affinity_tag = -1;
1062         tl->th_priority = WORKQUEUE_NUMPRIOS;
1063         tl->th_policy = -1;
1064
1065         uth = get_bsdthread_info(tl->th_thread);
1066
1067         workqueue_lock_spin(p);
1068
1069         uth->uu_threadlist = (void *)tl;
1070         TAILQ_INSERT_TAIL(&wq->wq_thidlelist, tl, th_entry);
1071
1072         wq->wq_thidlecount++;
1073
1074         KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_START, wq, wq->wq_nthreads, 0, thread_tid(current_thread()), thread_tid(tl->th_thread));
1075
1076         return (TRUE);
1077
1078 failed:
1079         workqueue_lock_spin(p);
1080         wq->wq_nthreads--;
1081
1082         return (FALSE);
1083 }
1084
1085
1086 int
1087 workq_open(struct proc *p, __unused struct workq_open_args  *uap, __unused int32_t *retval)
1088 {
1089         struct workqueue * wq;
1090         int wq_size;
1091         char * ptr;
1092         char * nptr;
1093         int j;
1094         uint32_t i;
1095         uint32_t num_cpus;
1096         int error = 0;
1097         boolean_t need_wakeup = FALSE;
1098         struct workitem * witem;
1099         struct workitemlist *wl;
1100
1101         if ((p->p_lflag & P_LREGISTER) == 0)
1102                 return(EINVAL);
1103
1104         num_cpus = ml_get_max_cpus();
1105
1106         if (wq_init_constrained_limit) {
1107                 uint32_t limit;
1108                 /*
1109                  * set up the limit for the constrained pool
1110                  * this is a virtual pool in that we don't
1111                  * maintain it on a separate idle and run list
1112                  */
1113                 limit = num_cpus * (WORKQUEUE_NUMPRIOS + 1);
1114
1115                 if (limit > wq_max_constrained_threads)
1116                         wq_max_constrained_threads = limit;
1117
1118                 wq_init_constrained_limit = 0;
1119         }
1120         workqueue_lock_spin(p);
1121
1122         if (p->p_wqptr == NULL) {
1123
1124                 while (p->p_wqiniting == TRUE) {
1125
1126                         assert_wait((caddr_t)&p->p_wqiniting, THREAD_UNINT);
1127                         workqueue_unlock(p);
1128
1129                         thread_block(THREAD_CONTINUE_NULL);
1130
1131                         workqueue_lock_spin(p);
1132                 }
1133                 if (p->p_wqptr != NULL)
1134                         goto out;
1135
1136                 p->p_wqiniting = TRUE;
1137
1138                 workqueue_unlock(p);
1139
1140                 wq_size = sizeof(struct workqueue) +
1141                         (num_cpus * WORKQUEUE_NUMPRIOS * sizeof(uint32_t)) +
1142                         (num_cpus * WORKQUEUE_NUMPRIOS * sizeof(uint32_t)) +
1143                         (num_cpus * WORKQUEUE_NUMPRIOS * sizeof(uint64_t)) +
1144                         sizeof(uint64_t);
1145
1146                 ptr = (char *)kalloc(wq_size);
1147                 bzero(ptr, wq_size);
1148
1149                 wq = (struct workqueue *)ptr;
1150                 wq->wq_flags = WQ_LIST_INITED;
1151                 wq->wq_proc = p;
1152                 wq->wq_affinity_max = num_cpus;
1153                 wq->wq_task = current_task();
1154                 wq->wq_map  = current_map();
1155
1156                 for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) {
1157                         wl = (struct workitemlist *)&wq->wq_list[i];
1158                         TAILQ_INIT(&wl->wl_itemlist);
1159                         TAILQ_INIT(&wl->wl_freelist);
1160
1161                         for (j = 0; j < WORKITEM_SIZE; j++) {
1162                                 witem = &wq->wq_array[(i*WORKITEM_SIZE) + j];
1163                                 TAILQ_INSERT_TAIL(&wl->wl_freelist, witem, wi_entry);
1164                         }
1165                         wq->wq_reqconc[i] = wq->wq_affinity_max;
1166                 }
1167                 nptr = ptr + sizeof(struct workqueue);
1168
1169                 for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) {
1170                         wq->wq_thactive_count[i] = (uint32_t *)nptr;
1171                         nptr += (num_cpus * sizeof(uint32_t));
1172                 }
1173                 for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) {
1174                         wq->wq_thscheduled_count[i] = (uint32_t *)nptr;
1175                         nptr += (num_cpus * sizeof(uint32_t));
1176                 }
1177                 /*
1178                  * align nptr on a 64 bit boundary so that we can do nice
1179                  * atomic64 operations on the timestamps...
1180                  * note that we requested an extra uint64_t when calcuating
1181                  * the size for the allocation of the workqueue struct
1182                  */
1183                 nptr += (sizeof(uint64_t) - 1);
1184                 nptr = (char *)((uintptr_t)nptr & ~(sizeof(uint64_t) - 1));
1185
1186                 for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) {
1187                         wq->wq_lastblocked_ts[i] = (uint64_t *)nptr;
1188                         nptr += (num_cpus * sizeof(uint64_t));
1189                 }
1190                 TAILQ_INIT(&wq->wq_thrunlist);
1191                 TAILQ_INIT(&wq->wq_thidlelist);
1192
1193                 wq->wq_atimer_call = thread_call_allocate((thread_call_func_t)workqueue_add_timer, (thread_call_param_t)wq);
1194
1195                 workqueue_lock_spin(p);
1196
1197                 p->p_wqptr = (void *)wq;
1198                 p->p_wqsize = wq_size;
1199
1200                 p->p_wqiniting = FALSE;
1201                 need_wakeup = TRUE;
1202         }
1203 out:
1204         workqueue_unlock(p);
1205
1206         if (need_wakeup == TRUE)
1207                 wakeup(&p->p_wqiniting);
1208         return(error);
1209 }
1210
1211 int
1212 workq_kernreturn(struct proc *p, struct workq_kernreturn_args  *uap, __unused int32_t *retval)
1213 {
1214         user_addr_t item = uap->item;
1215         int options     = uap->options;
1216         int prio        = uap->prio;    /* should  be used to find the right workqueue */
1217         int affinity    = uap->affinity;
1218         int error       = 0;
1219         thread_t th     = THREAD_NULL;
1220         user_addr_t oc_item = 0;
1221         struct workqueue *wq;
1222
1223         if ((p->p_lflag & P_LREGISTER) == 0)
1224                 return(EINVAL);
1225
1226         /*
1227          * affinity not yet hooked up on this path
1228          */
1229         affinity = -1;
1230
1231         switch (options) {
1232
1233                 case WQOPS_QUEUE_ADD: {
1234
1235                         if (prio & WORKQUEUE_OVERCOMMIT) {
1236                                 prio &= ~WORKQUEUE_OVERCOMMIT;
1237                                 oc_item = item;
1238                         }
1239                         if ((prio < 0) || (prio >= WORKQUEUE_NUMPRIOS))
1240                                 return (EINVAL);
1241
1242                         workqueue_lock_spin(p);
1243
1244                         if ((wq = (struct workqueue *)p->p_wqptr) == NULL) {
1245                                 workqueue_unlock(p);
1246                                 return (EINVAL);
1247                         }
1248                         if (wq->wq_thidlecount == 0 && (oc_item || (wq->wq_constrained_threads_scheduled < wq->wq_affinity_max))) {
1249
1250                                 workqueue_addnewthread(wq, oc_item ? TRUE : FALSE);
1251
1252                                 if (wq->wq_thidlecount == 0)
1253                                         oc_item = 0;
1254                         }
1255                         if (oc_item == 0)
1256                                 error = workqueue_additem(wq, prio, item, affinity);
1257
1258                         KERNEL_DEBUG(0xefffd008 | DBG_FUNC_NONE, wq, prio, affinity, oc_item, 0);
1259                         }
1260                         break;
1261                 case WQOPS_THREAD_RETURN: {
1262
1263                         th = current_thread();
1264                         struct uthread *uth = get_bsdthread_info(th);
1265
1266                         /* reset signal mask on the workqueue thread to default state */
1267                         if (uth->uu_sigmask != (sigset_t)(~workq_threadmask)) {
1268                                 proc_lock(p);
1269                                 uth->uu_sigmask = ~workq_threadmask;
1270                                 proc_unlock(p);
1271                         }
1272
1273                         workqueue_lock_spin(p);
1274
1275                         if ((wq = (struct workqueue *)p->p_wqptr) == NULL || (uth->uu_threadlist == NULL)) {
1276                                 workqueue_unlock(p);
1277                                 return (EINVAL);
1278                         }
1279                         KERNEL_DEBUG(0xefffd004 | DBG_FUNC_END, wq, 0, 0, 0, 0);
1280                         }
1281                         break;
1282                 case WQOPS_THREAD_SETCONC: {
1283
1284                         if ((prio < 0) || (prio > WORKQUEUE_NUMPRIOS))
1285                                 return (EINVAL);
1286
1287                         workqueue_lock_spin(p);
1288
1289                         if ((wq = (struct workqueue *)p->p_wqptr) == NULL) {
1290                                 workqueue_unlock(p);
1291                                 return (EINVAL);
1292                         }
1293                         /*
1294                          * for this operation, we re-purpose the affinity
1295                          * argument as the concurrency target
1296                          */
1297                         if (prio < WORKQUEUE_NUMPRIOS)
1298                                 wq->wq_reqconc[prio] = affinity;
1299                         else {
1300                                 for (prio = 0; prio < WORKQUEUE_NUMPRIOS; prio++)
1301                                         wq->wq_reqconc[prio] = affinity;
1302
1303                         }
1304                         }
1305                         break;
1306                 default:
1307                         return (EINVAL);
1308         }
1309         (void)workqueue_run_nextitem(p, wq, th, oc_item, prio, affinity);
1310         /*
1311          * workqueue_run_nextitem is responsible for
1312          * dropping the workqueue lock in all cases
1313          */
1314         return (error);
1315
1316 }
1317
1318 /*
1319  * Routine:     workqueue_mark_exiting
1320  *
1321  * Function:    Mark the work queue such that new threads will not be added to the
1322  *              work queue after we return.
1323  *
1324  * Conditions:  Called against the current process.
1325  */
1326 void
1327 workqueue_mark_exiting(struct proc *p)
1328 {
1329         struct workqueue  * wq;
1330
1331         wq = p->p_wqptr;
1332         if (wq != NULL) {
1333
1334                 KERNEL_DEBUG(0x9008088 | DBG_FUNC_START, p->p_wqptr, 0, 0, 0, 0);
1335
1336                 workqueue_lock_spin(p);
1337
1338                 /*
1339                  * we now arm the timer in the callback function w/o holding the workq lock...
1340                  * we do this by setting  WQ_ATIMER_RUNNING via OSCompareAndSwap in order to
1341                  * insure only a single timer if running and to notice that WQ_EXITING has
1342                  * been set (we don't want to start a timer once WQ_EXITING is posted)
1343                  *
1344                  * so once we have successfully set WQ_EXITING, we cannot fire up a new timer...
1345                  * therefor no need to clear the timer state atomically from the flags
1346                  *
1347                  * since we always hold the workq lock when dropping WQ_ATIMER_RUNNING
1348                  * the check for and sleep until clear is protected
1349                  */
1350                 while ( !(OSCompareAndSwap(wq->wq_flags, (wq->wq_flags | WQ_EXITING), (UInt32 *)&wq->wq_flags)));
1351
1352                 if (wq->wq_flags & WQ_ATIMER_RUNNING) {
1353                         if (thread_call_cancel(wq->wq_atimer_call) == TRUE)
1354                                 wq->wq_flags &= ~WQ_ATIMER_RUNNING;
1355                 }
1356                 while ((wq->wq_flags & WQ_ATIMER_RUNNING) || (wq->wq_lflags & WQL_ATIMER_BUSY)) {
1357
1358                         assert_wait((caddr_t)wq, (THREAD_UNINT));
1359                         workqueue_unlock(p);
1360
1361                         thread_block(THREAD_CONTINUE_NULL);
1362
1363                         workqueue_lock_spin(p);
1364                 }
1365                 workqueue_unlock(p);
1366
1367                 KERNEL_DEBUG(0x9008088 | DBG_FUNC_END, 0, 0, 0, 0, 0);
1368         }
1369 }
1370
1371 /*
1372  * Routine:     workqueue_exit
1373  *
1374  * Function:    clean up the work queue structure(s) now that there are no threads
1375  *              left running inside the work queue (except possibly current_thread).
1376  *
1377  * Conditions:  Called by the last thread in the process.
1378  *              Called against current process.
1379  */
1380 void
1381 workqueue_exit(struct proc *p)
1382 {
1383         struct workqueue  * wq;
1384         struct threadlist  * tl, *tlist;
1385         struct uthread  *uth;
1386         int wq_size = 0;
1387
1388         wq = (struct workqueue *)p->p_wqptr;
1389         if (wq != NULL) {
1390
1391                 KERNEL_DEBUG(0x900808c | DBG_FUNC_START, p->p_wqptr, 0, 0, 0, 0);
1392
1393                 wq_size = p->p_wqsize;
1394                 p->p_wqptr = NULL;
1395                 p->p_wqsize = 0;
1396
1397                 /*
1398                  * Clean up workqueue data structures for threads that exited and
1399                  * didn't get a chance to clean up after themselves.
1400                  */
1401                 TAILQ_FOREACH_SAFE(tl, &wq->wq_thrunlist, th_entry, tlist) {
1402
1403                         thread_sched_call(tl->th_thread, NULL);
1404
1405                         uth = get_bsdthread_info(tl->th_thread);
1406                         if (uth != (struct uthread *)0) {
1407                                 uth->uu_threadlist = NULL;
1408                         }
1409                         TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
1410
1411                         /*
1412                          * drop our last ref on the thread
1413                          */
1414                         thread_deallocate(tl->th_thread);
1415
1416                         kfree(tl, sizeof(struct threadlist));
1417                 }
1418                 TAILQ_FOREACH_SAFE(tl, &wq->wq_thidlelist, th_entry, tlist) {
1419                         workqueue_removethread(tl, 1);
1420                 }
1421                 thread_call_free(wq->wq_atimer_call);
1422
1423                 kfree(wq, wq_size);
1424
1425                 KERNEL_DEBUG(0x900808c | DBG_FUNC_END, 0, 0, 0, 0, 0);
1426         }
1427 }
1428
1429 static int
1430 workqueue_additem(struct workqueue *wq, int prio, user_addr_t item, int affinity)
1431 {
1432         struct workitem *witem;
1433         struct workitemlist *wl;
1434
1435         wl = (struct workitemlist *)&wq->wq_list[prio];
1436
1437         if (TAILQ_EMPTY(&wl->wl_freelist))
1438                 return (ENOMEM);
1439
1440         witem = (struct workitem *)TAILQ_FIRST(&wl->wl_freelist);
1441         TAILQ_REMOVE(&wl->wl_freelist, witem, wi_entry);
1442
1443         witem->wi_item = item;
1444         witem->wi_affinity = affinity;
1445         TAILQ_INSERT_TAIL(&wl->wl_itemlist, witem, wi_entry);
1446
1447         wq->wq_list_bitmap |= (1 << prio);
1448
1449         wq->wq_itemcount++;
1450
1451         return (0);
1452 }
1453
1454 static int workqueue_importance[WORKQUEUE_NUMPRIOS] =
1455 {
1456         2, 0, -2, INT_MIN,
1457 };
1458
1459 #define WORKQ_POLICY_TIMESHARE 1
1460
1461 static int workqueue_policy[WORKQUEUE_NUMPRIOS] =
1462 {
1463         WORKQ_POLICY_TIMESHARE, WORKQ_POLICY_TIMESHARE, WORKQ_POLICY_TIMESHARE, WORKQ_POLICY_TIMESHARE
1464 };
1465
1466
1467 /*
1468  * workqueue_run_nextitem:
1469  *   called with the workqueue lock held...
1470  *   responsible for dropping it in all cases
1471  */
1472 static boolean_t
1473 workqueue_run_nextitem(proc_t p, struct workqueue *wq, thread_t thread, user_addr_t oc_item, int oc_prio, int oc_affinity)
1474 {
1475         struct workitem *witem = NULL;
1476         user_addr_t item = 0;
1477         thread_t th_to_run = THREAD_NULL;
1478         thread_t th_to_park = THREAD_NULL;
1479         int wake_thread = 0;
1480         int reuse_thread = 1;
1481         uint32_t priority, orig_priority;
1482         uint32_t affinity_tag, orig_affinity_tag;
1483         uint32_t i, n;
1484         uint32_t activecount;
1485         uint32_t busycount;
1486         uint32_t us_to_wait;
1487         struct threadlist *tl = NULL;
1488         struct threadlist *ttl = NULL;
1489         struct uthread *uth = NULL;
1490         struct workitemlist *wl = NULL;
1491         boolean_t start_timer = FALSE;
1492         boolean_t adjust_counters = TRUE;
1493         uint64_t  curtime;
1494
1495
1496         KERNEL_DEBUG(0xefffd000 | DBG_FUNC_START, wq, thread, wq->wq_thidlecount, wq->wq_itemcount, 0);
1497
1498         /*
1499          * from here until we drop the workq lock
1500          * we can't be pre-empted since we hold
1501          * the lock in spin mode... this is important
1502          * since we have to independently update the priority
1503          * and affinity that the thread is associated with
1504          * and these values are used to index the multi-dimensional
1505          * counter arrays in 'workqueue_callback'
1506          */
1507         if (oc_item) {
1508                 uint32_t min_scheduled = 0;
1509                 uint32_t scheduled_count;
1510                 uint32_t active_count;
1511                 uint32_t t_affinity = 0;
1512
1513                 priority = oc_prio;
1514                 item = oc_item;
1515
1516                 if ((affinity_tag = oc_affinity) == (uint32_t)-1) {
1517                         for (affinity_tag = 0; affinity_tag < wq->wq_reqconc[priority]; affinity_tag++) {
1518                                 /*
1519                                  * look for the affinity group with the least number of threads
1520                                  */
1521                                 scheduled_count = 0;
1522                                 active_count = 0;
1523
1524                                 for (i = 0; i <= priority; i++) {
1525                                         scheduled_count += wq->wq_thscheduled_count[i][affinity_tag];
1526                                         active_count += wq->wq_thactive_count[i][affinity_tag];
1527                                 }
1528                                 if (active_count == 0) {
1529                                         t_affinity = affinity_tag;
1530                                         break;
1531                                 }
1532                                 if (affinity_tag == 0 || scheduled_count < min_scheduled) {
1533                                         min_scheduled = scheduled_count;
1534                                         t_affinity = affinity_tag;
1535                                 }
1536                         }
1537                         affinity_tag = t_affinity;
1538                 }
1539                 goto grab_idle_thread;
1540         }
1541         /*
1542          * if we get here, the work should be handled by a constrained thread
1543          */
1544         if (wq->wq_itemcount == 0 || wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
1545                 /*
1546                  * no work to do, or we're already at or over the scheduling limit for
1547                  * constrained threads...  just return or park the thread...
1548                  * do not start the timer for this condition... if we don't have any work,
1549                  * we'll check again when new work arrives... if we're over the limit, we need 1 or more
1550                  * constrained threads to return to the kernel before we can dispatch work from our queue
1551                  */
1552                 if ((th_to_park = thread) == THREAD_NULL)
1553                         goto out_of_work;
1554                 goto parkit;
1555         }
1556         for (priority = 0; priority < WORKQUEUE_NUMPRIOS; priority++) {
1557                 if (wq->wq_list_bitmap & (1 << priority)) {
1558                         wl = (struct workitemlist *)&wq->wq_list[priority];
1559                         break;
1560                 }
1561         }
1562         assert(wl != NULL);
1563         assert(!(TAILQ_EMPTY(&wl->wl_itemlist)));
1564
1565         curtime = mach_absolute_time();
1566
1567         if (thread != THREAD_NULL) {
1568                 uth = get_bsdthread_info(thread);
1569                 tl = uth->uu_threadlist;
1570                 affinity_tag = tl->th_affinity_tag;
1571
1572                 /*
1573                  * check to see if the affinity group this thread is
1574                  * associated with is still within the bounds of the
1575                  * specified concurrency for the priority level
1576                  * we're considering running work for
1577                  */
1578                 if (affinity_tag < wq->wq_reqconc[priority]) {
1579                         /*
1580                          * we're a worker thread from the pool... currently we
1581                          * are considered 'active' which means we're counted
1582                          * in "wq_thactive_count"
1583                          * add up the active counts of all the priority levels
1584                          * up to and including the one we want to schedule
1585                          */
1586                         for (activecount = 0, i = 0; i <= priority; i++) {
1587                                 uint32_t  acount;
1588
1589                                 acount = wq->wq_thactive_count[i][affinity_tag];
1590
1591                                 if (acount == 0 && wq->wq_thscheduled_count[i][affinity_tag]) {
1592                                         if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i][affinity_tag]))
1593                                                 acount = 1;
1594                                 }
1595                                 activecount += acount;
1596                         }
1597                         if (activecount == 1) {
1598                                 /*
1599                                  * we're the only active thread associated with our
1600                                  * affinity group at this priority level and higher,
1601                                  * so pick up some work and keep going
1602                                  */
1603                                 th_to_run = thread;
1604                                 goto pick_up_work;
1605                         }
1606                 }
1607                 /*
1608                  * there's more than 1 thread running in this affinity group
1609                  * or the concurrency level has been cut back for this priority...
1610                  * lets continue on and look for an 'empty' group to run this
1611                  * work item in
1612                  */
1613         }
1614         busycount = 0;
1615
1616         for (affinity_tag = 0; affinity_tag < wq->wq_reqconc[priority]; affinity_tag++) {
1617                 /*
1618                  * look for first affinity group that is currently not active
1619                  * i.e. no active threads at this priority level or higher
1620                  * and no threads that have run recently
1621                  */
1622                 for (activecount = 0, i = 0; i <= priority; i++) {
1623                         if ((activecount = wq->wq_thactive_count[i][affinity_tag]))
1624                                 break;
1625
1626                         if (wq->wq_thscheduled_count[i][affinity_tag]) {
1627                                 if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i][affinity_tag])) {
1628                                         busycount++;
1629                                         break;
1630                                 }
1631                         }
1632                 }
1633                 if (activecount == 0 && busycount == 0)
1634                         break;
1635         }
1636         if (affinity_tag >= wq->wq_reqconc[priority]) {
1637                 /*
1638                  * we've already got at least 1 thread per
1639                  * affinity group in the active state...
1640                  */
1641                 if (busycount) {
1642                         /*
1643                          * we found at least 1 thread in the
1644                          * 'busy' state... make sure we start
1645                          * the timer because if they are the only
1646                          * threads keeping us from scheduling
1647                          * this workitem, we won't get a callback
1648                          * to kick off the timer... we need to
1649                          * start it now...
1650                          */
1651                         WQ_TIMER_NEEDED(wq, start_timer);
1652                 }
1653                 KERNEL_DEBUG(0xefffd000 | DBG_FUNC_NONE, wq, busycount, start_timer, 0, 0);
1654
1655                 if (thread != THREAD_NULL) {
1656                         /*
1657                          * go park this one for later
1658                          */
1659                         th_to_park = thread;
1660                         goto parkit;
1661                 }
1662                 goto out_of_work;
1663         }
1664         if (thread != THREAD_NULL) {
1665                 /*
1666                  * we're overbooked on the affinity group this thread is
1667                  * currently associated with, but we have work to do
1668                  * and at least 1 idle processor, so we'll just retarget
1669                  * this thread to a new affinity group
1670                  */
1671                 th_to_run = thread;
1672                 goto pick_up_work;
1673         }
1674         if (wq->wq_thidlecount == 0) {
1675                 /*
1676                  * we don't have a thread to schedule, but we have
1677                  * work to do and at least 1 affinity group that
1678                  * doesn't currently have an active thread...
1679                  */
1680                 WQ_TIMER_NEEDED(wq, start_timer);
1681
1682                 KERNEL_DEBUG(0xefffd118, wq, wq->wq_nthreads, start_timer, 0, 0);
1683
1684                 goto no_thread_to_run;
1685         }
1686
1687 grab_idle_thread:
1688         /*
1689          * we've got a candidate (affinity group with no currently
1690          * active threads) to start a new thread on...
1691          * we already know there is both work available
1692          * and an idle thread, so activate a thread and then
1693          * fall into the code that pulls a new workitem...
1694          */
1695         TAILQ_FOREACH(ttl, &wq->wq_thidlelist, th_entry) {
1696                 if (ttl->th_affinity_tag == affinity_tag || ttl->th_affinity_tag == (uint16_t)-1) {
1697
1698                         TAILQ_REMOVE(&wq->wq_thidlelist, ttl, th_entry);
1699                         tl = ttl;
1700
1701                         break;
1702                 }
1703         }
1704         if (tl == NULL) {
1705                 tl = TAILQ_FIRST(&wq->wq_thidlelist);
1706                 TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry);
1707         }
1708         wq->wq_thidlecount--;
1709
1710         TAILQ_INSERT_TAIL(&wq->wq_thrunlist, tl, th_entry);
1711
1712         if ((tl->th_flags & TH_LIST_SUSPENDED) == TH_LIST_SUSPENDED) {
1713                 tl->th_flags &= ~TH_LIST_SUSPENDED;
1714                 reuse_thread = 0;
1715
1716         } else if ((tl->th_flags & TH_LIST_BLOCKED) == TH_LIST_BLOCKED) {
1717                 tl->th_flags &= ~TH_LIST_BLOCKED;
1718                 wake_thread = 1;
1719         }
1720         tl->th_flags |= TH_LIST_RUNNING | TH_LIST_BUSY;
1721
1722         wq->wq_threads_scheduled++;
1723         wq->wq_thscheduled_count[priority][affinity_tag]++;
1724         OSAddAtomic(1, &wq->wq_thactive_count[priority][affinity_tag]);
1725
1726         adjust_counters = FALSE;
1727         th_to_run = tl->th_thread;
1728
1729 pick_up_work:
1730         if (item == 0) {
1731                 witem = TAILQ_FIRST(&wl->wl_itemlist);
1732                 TAILQ_REMOVE(&wl->wl_itemlist, witem, wi_entry);
1733
1734                 if (TAILQ_EMPTY(&wl->wl_itemlist))
1735                         wq->wq_list_bitmap &= ~(1 << priority);
1736                 wq->wq_itemcount--;
1737
1738                 item = witem->wi_item;
1739                 witem->wi_item = (user_addr_t)0;
1740                 witem->wi_affinity = 0;
1741                 TAILQ_INSERT_HEAD(&wl->wl_freelist, witem, wi_entry);
1742
1743                 if ( !(tl->th_flags & TH_LIST_CONSTRAINED)) {
1744                         wq->wq_constrained_threads_scheduled++;
1745                         tl->th_flags |= TH_LIST_CONSTRAINED;
1746                 }
1747         } else {
1748                 if (tl->th_flags & TH_LIST_CONSTRAINED) {
1749                         wq->wq_constrained_threads_scheduled--;
1750                         tl->th_flags &= ~TH_LIST_CONSTRAINED;
1751                 }
1752         }
1753         orig_priority = tl->th_priority;
1754         orig_affinity_tag = tl->th_affinity_tag;
1755
1756         tl->th_priority = priority;
1757         tl->th_affinity_tag = affinity_tag;
1758
1759         if (adjust_counters == TRUE && (orig_priority != priority || orig_affinity_tag != affinity_tag)) {
1760                 /*
1761                  * we need to adjust these counters based on this
1762                  * thread's new disposition w/r to affinity and priority
1763                  */
1764                 OSAddAtomic(-1, &wq->wq_thactive_count[orig_priority][orig_affinity_tag]);
1765                 OSAddAtomic(1, &wq->wq_thactive_count[priority][affinity_tag]);
1766
1767                 wq->wq_thscheduled_count[orig_priority][orig_affinity_tag]--;
1768                 wq->wq_thscheduled_count[priority][affinity_tag]++;
1769         }
1770         wq->wq_thread_yielded_count = 0;
1771
1772         workqueue_unlock(p);
1773
1774         if (orig_affinity_tag != affinity_tag) {
1775                 /*
1776                  * this thread's affinity does not match the affinity group
1777                  * its being placed on (it's either a brand new thread or
1778                  * we're retargeting an existing thread to a new group)...
1779                  * affinity tag of 0 means no affinity...
1780                  * but we want our tags to be 0 based because they
1781                  * are used to index arrays, so...
1782                  * keep it 0 based internally and bump by 1 when
1783                  * calling out to set it
1784                  */
1785                 KERNEL_DEBUG(0xefffd114 | DBG_FUNC_START, wq, orig_affinity_tag, 0, 0, 0);
1786
1787                 (void)thread_affinity_set(th_to_run, affinity_tag + 1);
1788
1789                 KERNEL_DEBUG(0xefffd114 | DBG_FUNC_END, wq, affinity_tag, 0, 0, 0);
1790         }
1791         if (orig_priority != priority) {
1792                 thread_precedence_policy_data_t precedinfo;
1793                 thread_extended_policy_data_t   extinfo;
1794                 uint32_t        policy;
1795
1796                 policy = workqueue_policy[priority];
1797
1798                 KERNEL_DEBUG(0xefffd120 | DBG_FUNC_START, wq, orig_priority, tl->th_policy, 0, 0);
1799
1800                 if ((orig_priority == WORKQUEUE_BG_PRIOQUEUE) || (priority == WORKQUEUE_BG_PRIOQUEUE)) {
1801                         struct uthread *ut = NULL;
1802
1803                         ut = get_bsdthread_info(th_to_run);
1804
1805                         if (orig_priority == WORKQUEUE_BG_PRIOQUEUE) {
1806                                 /* remove the disk throttle, importance will be reset in anycase */
1807 #if !CONFIG_EMBEDDED
1808                                 proc_restore_workq_bgthreadpolicy(th_to_run);
1809 #else /* !CONFIG_EMBEDDED */
1810                                 if ((ut->uu_flag & UT_BACKGROUND) != 0) {
1811                                         ut->uu_flag &= ~UT_BACKGROUND;
1812                                         ut->uu_iopol_disk = IOPOL_NORMAL;
1813                                 }
1814 #endif /* !CONFIG_EMBEDDED */
1815                         }
1816
1817                         if (priority == WORKQUEUE_BG_PRIOQUEUE) {
1818 #if !CONFIG_EMBEDDED
1819                         proc_apply_workq_bgthreadpolicy(th_to_run);
1820 #else /* !CONFIG_EMBEDDED */
1821                                 if ((ut->uu_flag & UT_BACKGROUND) == 0) {
1822                                         /* set diskthrottling */
1823                                         ut->uu_flag |= UT_BACKGROUND;
1824                                         ut->uu_iopol_disk = IOPOL_THROTTLE;
1825                                 }
1826 #endif /* !CONFIG_EMBEDDED */
1827                         }
1828                 }
1829
1830                 if (tl->th_policy != policy) {
1831                         extinfo.timeshare = policy;
1832                         (void)thread_policy_set_internal(th_to_run, THREAD_EXTENDED_POLICY, (thread_policy_t)&extinfo, THREAD_EXTENDED_POLICY_COUNT);
1833
1834                         tl->th_policy = policy;
1835                 }
1836
1837                 precedinfo.importance = workqueue_importance[priority];
1838                 (void)thread_policy_set_internal(th_to_run, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
1839
1840
1841                 KERNEL_DEBUG(0xefffd120 | DBG_FUNC_END, wq,  priority, policy, 0, 0);
1842         }
1843         if (kdebug_enable) {
1844                 int     lpri = -1;
1845                 int     laffinity = -1;
1846                 int     first = -1;
1847                 uint32_t  code = 0xefffd02c | DBG_FUNC_START;
1848
1849                 for (n = 0; n < WORKQUEUE_NUMPRIOS; n++) {
1850                         for (i = 0; i < wq->wq_affinity_max; i++) {
1851                                 if (wq->wq_thactive_count[n][i]) {
1852                                         if (lpri != -1) {
1853                                                 KERNEL_DEBUG(code, lpri, laffinity, wq->wq_thactive_count[lpri][laffinity], first, 0);
1854                                                 code = 0xefffd02c;
1855                                                 first = 0;
1856                                         }
1857                                         lpri = n;
1858                                         laffinity = i;
1859                                 }
1860                         }
1861                 }
1862                 if (lpri != -1) {
1863                         if (first == -1)
1864                                 first = 0xeeeeeeee;
1865                         KERNEL_DEBUG(0xefffd02c | DBG_FUNC_END, lpri, laffinity, wq->wq_thactive_count[lpri][laffinity], first, 0);
1866                 }
1867         }
1868         /*
1869          * if current thread is reused for workitem, does not return via unix_syscall
1870          */
1871         wq_runitem(p, item, th_to_run, tl, reuse_thread, wake_thread, (thread == th_to_run));
1872
1873         KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END, wq, thread_tid(th_to_run), item, 1, 0);
1874
1875         return (TRUE);
1876
1877 out_of_work:
1878         /*
1879          * we have no work to do or we are fully booked
1880          * w/r to running threads...
1881          */
1882 no_thread_to_run:
1883         workqueue_unlock(p);
1884
1885         if (start_timer)
1886                 workqueue_interval_timer_start(wq);
1887
1888         KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END, wq, thread_tid(thread), 0, 2, 0);
1889
1890         return (FALSE);
1891
1892 parkit:
1893         /*
1894          * this is a workqueue thread with no more
1895          * work to do... park it for now
1896          */
1897         uth = get_bsdthread_info(th_to_park);
1898         tl = uth->uu_threadlist;
1899         if (tl == 0)
1900                 panic("wq thread with no threadlist ");
1901
1902         TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
1903         tl->th_flags &= ~TH_LIST_RUNNING;
1904
1905         tl->th_flags |= TH_LIST_BLOCKED;
1906         TAILQ_INSERT_HEAD(&wq->wq_thidlelist, tl, th_entry);
1907
1908         thread_sched_call(th_to_park, NULL);
1909
1910         OSAddAtomic(-1, &wq->wq_thactive_count[tl->th_priority][tl->th_affinity_tag]);
1911         wq->wq_thscheduled_count[tl->th_priority][tl->th_affinity_tag]--;
1912         wq->wq_threads_scheduled--;
1913
1914         if (tl->th_flags & TH_LIST_CONSTRAINED) {
1915                 wq->wq_constrained_threads_scheduled--;
1916                 wq->wq_lflags &= ~WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
1917                 tl->th_flags &= ~TH_LIST_CONSTRAINED;
1918         }
1919         if (wq->wq_thidlecount < 100)
1920                 us_to_wait = wq_reduce_pool_window_usecs - (wq->wq_thidlecount * (wq_reduce_pool_window_usecs / 100));
1921         else
1922                 us_to_wait = wq_reduce_pool_window_usecs / 100;
1923
1924         wq->wq_thidlecount++;
1925         wq->wq_lflags &= ~WQL_EXCEEDED_TOTAL_THREAD_LIMIT;
1926
1927         assert_wait_timeout((caddr_t)tl, (THREAD_INTERRUPTIBLE), us_to_wait, NSEC_PER_USEC);
1928
1929         workqueue_unlock(p);
1930
1931         if (start_timer)
1932                 workqueue_interval_timer_start(wq);
1933
1934         KERNEL_DEBUG1(0xefffd018 | DBG_FUNC_START, wq, wq->wq_threads_scheduled, wq->wq_thidlecount, us_to_wait, thread_tid(th_to_park));
1935         KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END, wq, thread_tid(thread), 0, 3, 0);
1936
1937         thread_block((thread_continue_t)wq_unpark_continue);
1938         /* NOT REACHED */
1939
1940         return (FALSE);
1941 }
1942
1943
1944 static void
1945 wq_unsuspend_continue(void)
1946 {
1947         struct uthread *uth = NULL;
1948         thread_t th_to_unsuspend;
1949         struct threadlist *tl;
1950         proc_t  p;
1951
1952         th_to_unsuspend = current_thread();
1953         uth = get_bsdthread_info(th_to_unsuspend);
1954
1955         if (uth != NULL && (tl = uth->uu_threadlist) != NULL) {
1956
1957                 if ((tl->th_flags & (TH_LIST_RUNNING | TH_LIST_BUSY)) == TH_LIST_RUNNING) {
1958                         /*
1959                          * most likely a normal resume of this thread occurred...
1960                          * it's also possible that the thread was aborted after we
1961                          * finished setting it up so that it could be dispatched... if
1962                          * so, thread_bootstrap_return will notice the abort and put
1963                          * the thread on the path to self-destruction
1964                          */
1965 normal_resume_to_user:
1966                         thread_sched_call(th_to_unsuspend, workqueue_callback);
1967
1968                         thread_bootstrap_return();
1969                 }
1970                 /*
1971                  * if we get here, it's because we've been resumed due to
1972                  * an abort of this thread (process is crashing)
1973                  */
1974                 p = current_proc();
1975
1976                 workqueue_lock_spin(p);
1977
1978                 if (tl->th_flags & TH_LIST_SUSPENDED) {
1979                         /*
1980                          * thread has been aborted while still on our idle
1981                          * queue... remove it from our domain...
1982                          * workqueue_removethread consumes the lock
1983                          */
1984                         workqueue_removethread(tl, 0);
1985
1986                         thread_bootstrap_return();
1987                 }
1988                 while ((tl->th_flags & TH_LIST_BUSY)) {
1989                         /*
1990                          * this thread was aborted after we started making
1991                          * it runnable, but before we finished dispatching it...
1992                          * we need to wait for that process to finish,
1993                          * and we need to ask for a wakeup instead of a
1994                          * thread_resume since the abort has already resumed us
1995                          */
1996                         tl->th_flags |= TH_LIST_NEED_WAKEUP;
1997
1998                         assert_wait((caddr_t)tl, (THREAD_UNINT));
1999
2000                         workqueue_unlock(p);
2001
2002                         thread_block(THREAD_CONTINUE_NULL);
2003
2004                         workqueue_lock_spin(p);
2005                 }
2006                 workqueue_unlock(p);
2007                 /*
2008                  * we have finished setting up the thread's context...
2009                  * thread_bootstrap_return will take us through the abort path
2010                  * where the thread will self destruct
2011                  */
2012                 goto normal_resume_to_user;
2013         }
2014         thread_bootstrap_return();
2015 }
2016
2017
2018 static void
2019 wq_unpark_continue(void)
2020 {
2021         struct uthread *uth = NULL;
2022         struct threadlist *tl;
2023         thread_t th_to_unpark;
2024         proc_t  p;
2025
2026         th_to_unpark = current_thread();
2027         uth = get_bsdthread_info(th_to_unpark);
2028
2029         if (uth != NULL) {
2030                 if ((tl = uth->uu_threadlist) != NULL) {
2031
2032                         if ((tl->th_flags & (TH_LIST_RUNNING | TH_LIST_BUSY)) == TH_LIST_RUNNING) {
2033                                 /*
2034                                  * a normal wakeup of this thread occurred... no need
2035                                  * for any synchronization with the timer and wq_runitem
2036                                  */
2037 normal_return_to_user:
2038                                 thread_sched_call(th_to_unpark, workqueue_callback);
2039
2040                                 KERNEL_DEBUG(0xefffd018 | DBG_FUNC_END, tl->th_workq, 0, 0, 0, 0);
2041
2042                                 thread_exception_return();
2043                         }
2044                         p = current_proc();
2045
2046                         workqueue_lock_spin(p);
2047
2048                         if ( !(tl->th_flags & TH_LIST_RUNNING)) {
2049                                 /*
2050                                  * the timer popped us out and we've not
2051                                  * been moved off of the idle list
2052                                  * so we should now self-destruct
2053                                  *
2054                                  * workqueue_removethread consumes the lock
2055                                  */
2056                                 workqueue_removethread(tl, 0);
2057
2058                                 thread_exception_return();
2059                         }
2060                         /*
2061                          * the timer woke us up, but we have already
2062                          * started to make this a runnable thread,
2063                          * but have not yet finished that process...
2064                          * so wait for the normal wakeup
2065                          */
2066                         while ((tl->th_flags & TH_LIST_BUSY)) {
2067
2068                                 assert_wait((caddr_t)tl, (THREAD_UNINT));
2069
2070                                 workqueue_unlock(p);
2071
2072                                 thread_block(THREAD_CONTINUE_NULL);
2073
2074                                 workqueue_lock_spin(p);
2075                         }
2076                         /*
2077                          * we have finished setting up the thread's context
2078                          * now we can return as if we got a normal wakeup
2079                          */
2080                         workqueue_unlock(p);
2081
2082                         goto normal_return_to_user;
2083                 }
2084         }
2085         thread_exception_return();
2086 }
2087
2088
2089
2090 static void
2091 wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
2092            int reuse_thread, int wake_thread, int return_directly)
2093 {
2094         int ret = 0;
2095         boolean_t need_resume = FALSE;
2096
2097         KERNEL_DEBUG1(0xefffd004 | DBG_FUNC_START, tl->th_workq, tl->th_priority, tl->th_affinity_tag, thread_tid(current_thread()), thread_tid(th));
2098
2099         ret = setup_wqthread(p, th, item, reuse_thread, tl);
2100
2101         if (ret != 0)
2102                 panic("setup_wqthread failed  %x\n", ret);
2103
2104         if (return_directly) {
2105                 KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END, tl->th_workq, 0, 0, 4, 0);
2106
2107                 thread_exception_return();
2108
2109                 panic("wq_runitem: thread_exception_return returned ...\n");
2110         }
2111         if (wake_thread) {
2112                 workqueue_lock_spin(p);
2113
2114                 tl->th_flags &= ~TH_LIST_BUSY;
2115                 wakeup(tl);
2116
2117                 workqueue_unlock(p);
2118         } else {
2119                 KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_END, tl->th_workq, 0, 0, thread_tid(current_thread()), thread_tid(th));
2120
2121                 workqueue_lock_spin(p);
2122
2123                 if (tl->th_flags & TH_LIST_NEED_WAKEUP)
2124                         wakeup(tl);
2125                 else
2126                         need_resume = TRUE;
2127
2128                 tl->th_flags &= ~(TH_LIST_BUSY | TH_LIST_NEED_WAKEUP);
2129
2130                 workqueue_unlock(p);
2131
2132                 if (need_resume) {
2133                         /*
2134                          * need to do this outside of the workqueue spin lock
2135                          * since thread_resume locks the thread via a full mutex
2136                          */
2137                         thread_resume(th);
2138                 }
2139         }
2140 }
2141
2142
2143 int
2144 setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct threadlist *tl)
2145 {
2146 #if defined(__i386__) || defined(__x86_64__)
2147         int isLP64 = 0;
2148
2149         isLP64 = IS_64BIT_PROCESS(p);
2150         /*
2151          * Set up i386 registers & function call.
2152          */
2153         if (isLP64 == 0) {
2154                 x86_thread_state32_t state;
2155                 x86_thread_state32_t *ts = &state;
2156
2157                 ts->eip = (int)p->p_wqthread;
2158                 ts->eax = (unsigned int)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE);
2159                 ts->ebx = (unsigned int)tl->th_thport;
2160                 ts->ecx = (unsigned int)(tl->th_stackaddr + PTH_DEFAULT_GUARDSIZE);
2161                 ts->edx = (unsigned int)item;
2162                 ts->edi = (unsigned int)reuse_thread;
2163                 ts->esi = (unsigned int)0;
2164                 /*
2165                  * set stack pointer
2166                  */
2167                 ts->esp = (int)((vm_offset_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE) - C_32_STK_ALIGN));
2168
2169                 if ((reuse_thread != 0) && (ts->eax == (unsigned int)0))
2170                         panic("setup_wqthread: setting reuse thread with null pthread\n");
2171                 thread_set_wq_state32(th, (thread_state_t)ts);
2172
2173         } else {
2174                 x86_thread_state64_t state64;
2175                 x86_thread_state64_t *ts64 = &state64;
2176
2177                 ts64->rip = (uint64_t)p->p_wqthread;
2178                 ts64->rdi = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE);
2179                 ts64->rsi = (uint64_t)(tl->th_thport);
2180                 ts64->rdx = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_GUARDSIZE);
2181                 ts64->rcx = (uint64_t)item;
2182                 ts64->r8 = (uint64_t)reuse_thread;
2183                 ts64->r9 = (uint64_t)0;
2184
2185                 /*
2186                  * set stack pointer aligned to 16 byte boundary
2187                  */
2188                 ts64->rsp = (uint64_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE) - C_64_REDZONE_LEN);
2189
2190                 if ((reuse_thread != 0) && (ts64->rdi == (uint64_t)0))
2191                         panic("setup_wqthread: setting reuse thread with null pthread\n");
2192                 thread_set_wq_state64(th, (thread_state_t)ts64);
2193         }
2194 #else
2195 #error setup_wqthread  not defined for this architecture
2196 #endif
2197         return(0);
2198 }
2199
2200 int
2201 fill_procworkqueue(proc_t p, struct proc_workqueueinfo * pwqinfo)
2202 {
2203         struct workqueue * wq;
2204         int error = 0;
2205         int     activecount;
2206         uint32_t pri, affinity;
2207
2208         workqueue_lock_spin(p);
2209         if ((wq = p->p_wqptr) == NULL) {
2210                 error = EINVAL;
2211                 goto out;
2212         }
2213         activecount = 0;
2214
2215         for (pri = 0; pri < WORKQUEUE_NUMPRIOS; pri++) {
2216                 for (affinity = 0; affinity < wq->wq_affinity_max; affinity++)
2217                         activecount += wq->wq_thactive_count[pri][affinity];
2218         }
2219         pwqinfo->pwq_nthreads = wq->wq_nthreads;
2220         pwqinfo->pwq_runthreads = activecount;
2221         pwqinfo->pwq_blockedthreads = wq->wq_threads_scheduled - activecount;
2222         pwqinfo->pwq_state = 0;
2223
2224         if (wq->wq_lflags & WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT)
2225                 pwqinfo->pwq_state |= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
2226
2227         if (wq->wq_lflags & WQL_EXCEEDED_TOTAL_THREAD_LIMIT)
2228                 pwqinfo->pwq_state |= WQ_EXCEEDED_TOTAL_THREAD_LIMIT;
2229
2230 out:
2231         workqueue_unlock(p);
2232         return(error);
2233 }
2234
2235 /* Set target concurrency of one of the  queue(0,1,2) with specified value */
2236 int
2237 proc_settargetconc(pid_t pid, int queuenum, int32_t targetconc)
2238 {
2239         proc_t p, self;
2240         uint64_t addr;
2241         int32_t conc = targetconc;
2242         int error = 0;
2243         vm_map_t oldmap = VM_MAP_NULL;
2244         int gotref = 0;
2245
2246         self = current_proc();
2247         if (self->p_pid != pid) {
2248                 /* if not on self, hold a refernce on the process */
2249
2250                 if (pid == 0)
2251                         return(EINVAL);
2252
2253                 p = proc_find(pid);
2254
2255                 if (p == PROC_NULL)
2256                         return(ESRCH);
2257                 gotref = 1;
2258
2259         } else
2260                 p = self;
2261
2262         if ((addr = p->p_targconc) == (uint64_t)0) {
2263                 error = EINVAL;
2264                 goto out;
2265         }
2266
2267
2268         if ((queuenum >= WQ_MAXPRI_MIN) && (queuenum <= WQ_MAXPRI_MAX)) {
2269                 addr += (queuenum * sizeof(int32_t));
2270                 if (gotref == 1)
2271                         oldmap = vm_map_switch(get_task_map(p->task));
2272                 error = copyout(&conc, addr, sizeof(int32_t));
2273                 if (gotref == 1)
2274                         (void)vm_map_switch(oldmap);
2275
2276         } else  {
2277                 error = EINVAL;
2278         }
2279 out:
2280         if (gotref == 1)
2281                 proc_rele(p);
2282         return(error);
2283 }
2284
2285
2286 /* Set target concurrency on all the prio queues with specified value */
2287 int
2288 proc_setalltargetconc(pid_t pid, int32_t * targetconcp)
2289 {
2290         proc_t p, self;
2291         uint64_t addr;
2292         int error = 0;
2293         vm_map_t oldmap = VM_MAP_NULL;
2294         int gotref = 0;
2295
2296         self = current_proc();
2297         if (self->p_pid != pid) {
2298                 /* if not on self, hold a refernce on the process */
2299
2300                 if (pid == 0)
2301                         return(EINVAL);
2302
2303                 p = proc_find(pid);
2304
2305                 if (p == PROC_NULL)
2306                         return(ESRCH);
2307                 gotref = 1;
2308
2309         } else
2310                 p = self;
2311
2312         if ((addr = (uint64_t)p->p_targconc) == (uint64_t)0) {
2313                 error = EINVAL;
2314                 goto out;
2315         }
2316
2317
2318         if (gotref == 1)
2319                 oldmap = vm_map_switch(get_task_map(p->task));
2320
2321         error = copyout(targetconcp, addr, WQ_PRI_NUM * sizeof(int32_t));
2322         if (gotref == 1)
2323                 (void)vm_map_switch(oldmap);
2324
2325 out:
2326         if (gotref == 1)
2327                 proc_rele(p);
2328         return(error);
2329 }
2330
2331 int thread_selfid(__unused struct proc *p, __unused struct thread_selfid_args *uap, uint64_t *retval)
2332 {
2333         thread_t thread = current_thread();
2334         *retval = thread_tid(thread);
2335         return KERN_SUCCESS;
2336 }
2337
2338 void
2339 pthread_init(void)
2340 {
2341         pthread_lck_grp_attr = lck_grp_attr_alloc_init();
2342         pthread_lck_grp = lck_grp_alloc_init("pthread", pthread_lck_grp_attr);
2343
2344         /*
2345          * allocate the lock attribute for pthread synchronizers
2346          */
2347         pthread_lck_attr = lck_attr_alloc_init();
2348
2349         workqueue_init_lock((proc_t) get_bsdtask_info(kernel_task));
2350 #if PSYNCH
2351         pthread_list_mlock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr);
2352
2353         pth_global_hashinit();
2354         psynch_thcall = thread_call_allocate(psynch_wq_cleanup, NULL);
2355         psynch_zoneinit();
2356 #endif /* PSYNCH */
2357 }