2  * Copyright (c) 2000-2009 Apple Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  28 /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */ 
  33 #define  _PTHREAD_CONDATTR_T 
  34 #define  _PTHREAD_COND_T 
  35 #define _PTHREAD_MUTEXATTR_T 
  36 #define _PTHREAD_MUTEX_T 
  37 #define _PTHREAD_RWLOCKATTR_T 
  38 #define _PTHREAD_RWLOCK_T 
  40 #undef pthread_mutexattr_t 
  41 #undef pthread_mutex_t 
  42 #undef pthread_condattr_t 
  44 #undef pthread_rwlockattr_t 
  45 #undef pthread_rwlock_t 
  47 #include <sys/param.h> 
  48 #include <sys/queue.h> 
  49 #include <sys/resourcevar.h> 
  50 #include <sys/proc_internal.h> 
  51 #include <sys/kauth.h> 
  52 #include <sys/systm.h> 
  53 #include <sys/timeb.h> 
  54 #include <sys/times.h> 
  56 #include <sys/kernel.h> 
  58 #include <sys/signalvar.h> 
  59 #include <sys/syslog.h> 
  62 #include <sys/kdebug.h> 
  63 #include <sys/sysproto.h> 
  64 #include <sys/pthread_internal.h> 
  66 #include <sys/user.h>           /* for coredump */ 
  67 #include <sys/proc_info.h>      /* for fill_procworkqueue */ 
  70 #include <mach/mach_types.h> 
  71 #include <mach/vm_prot.h> 
  72 #include <mach/semaphore.h> 
  73 #include <mach/sync_policy.h> 
  74 #include <mach/task.h> 
  75 #include <kern/kern_types.h> 
  76 #include <kern/task.h> 
  77 #include <kern/clock.h> 
  78 #include <mach/kern_return.h> 
  79 #include <kern/thread.h> 
  80 #include <kern/sched_prim.h> 
  81 #include <kern/kalloc.h> 
  82 #include <kern/sched_prim.h>    /* for thread_exception_return */ 
  83 #include <kern/processor.h> 
  84 #include <kern/affinity.h> 
  85 #include <kern/assert.h> 
  86 #include <mach/mach_vm.h> 
  87 #include <mach/mach_param.h> 
  88 #include <mach/thread_status.h> 
  89 #include <mach/thread_policy.h> 
  90 #include <mach/message.h> 
  91 #include <mach/port.h> 
  92 #include <vm/vm_protos.h> 
  93 #include <vm/vm_map.h>  /* for current_map() */ 
  94 #include <vm/vm_fault.h> 
  95 #include <mach/thread_act.h> /* for thread_resume */ 
  96 #include <machine/machine_routines.h> 
  98 #include <i386/machine_routines.h> 
  99 #include <i386/eflags.h> 
 100 #include <i386/psl.h>    
 101 #include <i386/seg.h>    
 104 #include <libkern/OSAtomic.h> 
 108 #define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT 
 110 #define KERNEL_DEBUG1 KERNEL_DEBUG_CONSTANT1 
 113 lck_grp_attr_t   
*pthread_lck_grp_attr
; 
 114 lck_grp_t    
*pthread_lck_grp
; 
 115 lck_attr_t   
*pthread_lck_attr
; 
 117 extern kern_return_t 
thread_getstatus(register thread_t act
, int flavor
, 
 118                         thread_state_t tstate
, mach_msg_type_number_t 
*count
); 
 119 extern kern_return_t 
thread_setstatus(thread_t thread
, int flavor
, 
 120                         thread_state_t tstate
, mach_msg_type_number_t count
); 
 121 extern void thread_set_cthreadself(thread_t thread
, uint64_t pself
, int isLP64
); 
 122 extern kern_return_t 
mach_port_deallocate(ipc_space_t
, mach_port_name_t
); 
 123 extern kern_return_t 
semaphore_signal_internal_trap(mach_port_name_t
); 
 125 extern void workqueue_thread_yielded(void); 
 127 static int workqueue_additem(struct workqueue 
*wq
, int prio
, user_addr_t item
, int affinity
); 
 128 static boolean_t 
workqueue_run_nextitem(proc_t p
, struct workqueue 
*wq
, thread_t th
, 
 129                                         user_addr_t oc_item
, int oc_prio
, int oc_affinity
); 
 130 static void wq_runitem(proc_t p
, user_addr_t item
, thread_t th
, struct threadlist 
*tl
, 
 131                        int reuse_thread
, int wake_thread
, int return_directly
); 
 132 static void wq_unpark_continue(void); 
 133 static void wq_unsuspend_continue(void); 
 134 static int setup_wqthread(proc_t p
, thread_t th
, user_addr_t item
, int reuse_thread
, struct threadlist 
*tl
); 
 135 static boolean_t 
workqueue_addnewthread(struct workqueue 
*wq
, boolean_t oc_thread
); 
 136 static void workqueue_removethread(struct threadlist 
*tl
, int fromexit
); 
 137 static void workqueue_lock_spin(proc_t
); 
 138 static void workqueue_unlock(proc_t
); 
 139 int proc_settargetconc(pid_t pid
, int queuenum
, int32_t targetconc
); 
 140 int proc_setalltargetconc(pid_t pid
, int32_t * targetconcp
); 
 142 #define WQ_MAXPRI_MIN   0       /* low prio queue num */ 
 143 #define WQ_MAXPRI_MAX   2       /* max  prio queuenum */ 
 144 #define WQ_PRI_NUM      3       /* number of prio work queues */ 
 146 #define C_32_STK_ALIGN          16 
 147 #define C_64_STK_ALIGN          16 
 148 #define C_64_REDZONE_LEN        128 
 149 #define TRUNC_DOWN32(a,c)       ((((uint32_t)a)-(c)) & ((uint32_t)(-(c)))) 
 150 #define TRUNC_DOWN64(a,c)       ((((uint64_t)a)-(c)) & ((uint64_t)(-(c)))) 
 154  * Flags filed passed to bsdthread_create and back in pthread_start  
 155 31  <---------------------------------> 0 
 156 _________________________________________ 
 157 | flags(8) | policy(8) | importance(16) | 
 158 ----------------------------------------- 
 160 void _pthread_start(pthread_t self
, mach_port_t kport
, void *(*fun
)(void *), void * funarg
, size_t stacksize
, unsigned int flags
); 
 162 #define PTHREAD_START_CUSTOM    0x01000000 
 163 #define PTHREAD_START_SETSCHED  0x02000000 
 164 #define PTHREAD_START_DETACHED  0x04000000 
 165 #define PTHREAD_START_POLICY_BITSHIFT 16 
 166 #define PTHREAD_START_POLICY_MASK 0xff 
 167 #define PTHREAD_START_IMPORTANCE_MASK 0xffff 
 169 #define SCHED_OTHER      POLICY_TIMESHARE 
 170 #define SCHED_FIFO       POLICY_FIFO 
 171 #define SCHED_RR         POLICY_RR 
 176 bsdthread_create(__unused 
struct proc 
*p
, struct bsdthread_create_args  
*uap
, user_addr_t 
*retval
) 
 182         mach_vm_offset_t stackaddr
; 
 183         mach_vm_size_t th_allocsize 
= 0; 
 184         mach_vm_size_t user_stacksize
; 
 185         mach_vm_size_t th_stacksize
; 
 186         mach_vm_offset_t th_stackaddr
; 
 187         mach_vm_offset_t th_stack
; 
 188         mach_vm_offset_t th_pthread
; 
 189         mach_port_name_t th_thport
; 
 191         user_addr_t user_func 
= uap
->func
; 
 192         user_addr_t user_funcarg 
= uap
->func_arg
; 
 193         user_addr_t user_stack 
= uap
->stack
; 
 194         user_addr_t user_pthread 
= uap
->pthread
; 
 195         unsigned int  flags 
= (unsigned int)uap
->flags
; 
 196         vm_map_t vmap 
= current_map(); 
 197         task_t ctask 
= current_task(); 
 198         unsigned int policy
, importance
; 
 203         if ((p
->p_lflag 
& P_LREGISTER
) == 0) 
 206         KERNEL_DEBUG_CONSTANT(0x9000080 | DBG_FUNC_START
, flags
, 0, 0, 0, 0); 
 209         isLP64 
= IS_64BIT_PROCESS(p
); 
 212 #if defined(__i386__) || defined(__x86_64__) 
 213         stackaddr 
= 0xB0000000; 
 215 #error Need to define a stack address hint for this architecture 
 217         kret 
= thread_create(ctask
, &th
); 
 218         if (kret 
!= KERN_SUCCESS
) 
 220         thread_reference(th
); 
 222         sright 
= (void *) convert_thread_to_port(th
); 
 223         th_thport 
= ipc_port_copyout_send(sright
, get_task_ipcspace(ctask
)); 
 225         if ((flags 
& PTHREAD_START_CUSTOM
) == 0) { 
 226                 th_stacksize 
= (mach_vm_size_t
)user_stack
;              /* if it is custom them it is stacksize */ 
 227                 th_allocsize 
= th_stacksize 
+ PTH_DEFAULT_GUARDSIZE 
+ p
->p_pthsize
; 
 229                 kret 
= mach_vm_map(vmap
, &stackaddr
, 
 232                                 VM_MAKE_TAG(VM_MEMORY_STACK
)| VM_FLAGS_ANYWHERE 
, NULL
, 
 233                                 0, FALSE
, VM_PROT_DEFAULT
, VM_PROT_ALL
, 
 235                 if (kret 
!= KERN_SUCCESS
) 
 236                         kret 
= mach_vm_allocate(vmap
, 
 237                                         &stackaddr
, th_allocsize
, 
 238                                         VM_MAKE_TAG(VM_MEMORY_STACK
)| VM_FLAGS_ANYWHERE
); 
 239                 if (kret 
!= KERN_SUCCESS
) { 
 244                 KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_NONE
, th_allocsize
, stackaddr
, 0, 2, 0); 
 246                 th_stackaddr 
= stackaddr
; 
 249                  * The guard page is at the lowest address 
 250                  * The stack base is the highest address 
 252                 kret 
= mach_vm_protect(vmap
,  stackaddr
, PTH_DEFAULT_GUARDSIZE
, FALSE
, VM_PROT_NONE
); 
 254                 if (kret 
!= KERN_SUCCESS
) {  
 258                 th_stack 
= (stackaddr 
+ th_stacksize 
+ PTH_DEFAULT_GUARDSIZE
); 
 259                 th_pthread 
= (stackaddr 
+ th_stacksize 
+ PTH_DEFAULT_GUARDSIZE
); 
 260                 user_stacksize 
= th_stacksize
; 
 263                 * Pre-fault the first page of the new thread's stack and the page that will 
 264                 * contain the pthread_t structure. 
 267                   vm_map_trunc_page(th_stack 
- PAGE_SIZE_64
), 
 268                   VM_PROT_READ 
| VM_PROT_WRITE
, 
 270                   THREAD_UNINT
, NULL
, 0); 
 273                   vm_map_trunc_page(th_pthread
), 
 274                   VM_PROT_READ 
| VM_PROT_WRITE
, 
 276                   THREAD_UNINT
, NULL
, 0); 
 278                 th_stack 
= user_stack
; 
 279                 user_stacksize 
= user_stack
; 
 280                 th_pthread 
= user_pthread
; 
 282                 KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_NONE
, 0, 0, 0, 3, 0); 
 286 #if defined(__i386__) || defined(__x86_64__) 
 289          * Set up i386 registers & function call. 
 292                 x86_thread_state32_t state
; 
 293                 x86_thread_state32_t 
*ts 
= &state
; 
 295                 ts
->eip 
= (int)p
->p_threadstart
; 
 296                 ts
->eax 
= (unsigned int)th_pthread
; 
 297                 ts
->ebx 
= (unsigned int)th_thport
; 
 298                 ts
->ecx 
= (unsigned int)user_func
; 
 299                 ts
->edx 
= (unsigned int)user_funcarg
; 
 300                 ts
->edi 
= (unsigned int)user_stacksize
; 
 301                 ts
->esi 
= (unsigned int)uap
->flags
; 
 305                 ts
->esp 
= (int)((vm_offset_t
)(th_stack
-C_32_STK_ALIGN
)); 
 307                 thread_set_wq_state32(th
, (thread_state_t
)ts
); 
 310                 x86_thread_state64_t state64
; 
 311                 x86_thread_state64_t 
*ts64 
= &state64
; 
 313                 ts64
->rip 
= (uint64_t)p
->p_threadstart
; 
 314                 ts64
->rdi 
= (uint64_t)th_pthread
; 
 315                 ts64
->rsi 
= (uint64_t)(th_thport
); 
 316                 ts64
->rdx 
= (uint64_t)user_func
; 
 317                 ts64
->rcx 
= (uint64_t)user_funcarg
; 
 318                 ts64
->r8 
= (uint64_t)user_stacksize
; 
 319                 ts64
->r9 
= (uint64_t)uap
->flags
; 
 321                  * set stack pointer aligned to 16 byte boundary 
 323                 ts64
->rsp 
= (uint64_t)(th_stack 
- C_64_REDZONE_LEN
); 
 325                 thread_set_wq_state64(th
, (thread_state_t
)ts64
); 
 329 #error bsdthread_create  not defined for this architecture 
 331         /* Set scheduling parameters if needed */ 
 332         if ((flags 
& PTHREAD_START_SETSCHED
) != 0) { 
 333                 thread_extended_policy_data_t    extinfo
; 
 334                 thread_precedence_policy_data_t   precedinfo
; 
 336                 importance 
= (flags 
& PTHREAD_START_IMPORTANCE_MASK
); 
 337                 policy 
= (flags 
>> PTHREAD_START_POLICY_BITSHIFT
) & PTHREAD_START_POLICY_MASK
; 
 339                 if (policy 
== SCHED_OTHER
) 
 340                         extinfo
.timeshare 
= 1; 
 342                         extinfo
.timeshare 
= 0; 
 343                 thread_policy_set(th
, THREAD_EXTENDED_POLICY
, (thread_policy_t
)&extinfo
, THREAD_EXTENDED_POLICY_COUNT
); 
 345 #define BASEPRI_DEFAULT 31 
 346                 precedinfo
.importance 
= (importance 
- BASEPRI_DEFAULT
); 
 347                 thread_policy_set(th
, THREAD_PRECEDENCE_POLICY
, (thread_policy_t
)&precedinfo
, THREAD_PRECEDENCE_POLICY_COUNT
); 
 350         kret 
= thread_resume(th
); 
 351         if (kret 
!= KERN_SUCCESS
) { 
 355         thread_deallocate(th
);  /* drop the creator reference */ 
 357         KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_END
, error
, th_pthread
, 0, 0, 0); 
 359         *retval 
= th_pthread
; 
 365                 (void)mach_vm_deallocate(vmap
,  stackaddr
, th_allocsize
); 
 367         (void)mach_port_deallocate(get_task_ipcspace(ctask
), th_thport
); 
 368         (void)thread_terminate(th
); 
 369         (void)thread_deallocate(th
); 
 374 bsdthread_terminate(__unused 
struct proc 
*p
, struct bsdthread_terminate_args  
*uap
, __unused 
int32_t *retval
) 
 376         mach_vm_offset_t  freeaddr
; 
 377         mach_vm_size_t freesize
; 
 379         mach_port_name_t kthport 
= (mach_port_name_t
)uap
->port
; 
 380         mach_port_name_t sem 
= (mach_port_name_t
)uap
->sem
; 
 382         freeaddr 
= (mach_vm_offset_t
)uap
->stackaddr
; 
 383         freesize 
= uap
->freesize
; 
 386         KERNEL_DEBUG_CONSTANT(0x9000084 |DBG_FUNC_START
, freeaddr
, freesize
, kthport
, 0xff, 0); 
 388         if ((freesize 
!= (mach_vm_size_t
)0) && (freeaddr 
!= (mach_vm_offset_t
)0)) { 
 389                 kret 
= mach_vm_deallocate(current_map(), freeaddr
, freesize
); 
 390                 if (kret 
!= KERN_SUCCESS
) { 
 395         (void) thread_terminate(current_thread()); 
 396         if (sem 
!= MACH_PORT_NULL
) { 
 397                  kret 
= semaphore_signal_internal_trap(sem
); 
 398                 if (kret 
!= KERN_SUCCESS
) { 
 403         if (kthport 
!= MACH_PORT_NULL
) 
 404                         mach_port_deallocate(get_task_ipcspace(current_task()), kthport
); 
 405         thread_exception_return(); 
 406         panic("bsdthread_terminate: still running\n"); 
 408         KERNEL_DEBUG_CONSTANT(0x9000084 |DBG_FUNC_END
, 0, 0, 0, 0xff, 0); 
 415 bsdthread_register(struct proc 
*p
, struct bsdthread_register_args  
*uap
, __unused 
int32_t *retval
) 
 417         /* prevent multiple registrations */ 
 418         if ((p
->p_lflag 
& P_LREGISTER
) != 0) 
 420         /* syscall randomizer test can pass bogus values */ 
 421         if (uap
->pthsize 
> MAX_PTHREAD_SIZE
) { 
 424         p
->p_threadstart 
= uap
->threadstart
; 
 425         p
->p_wqthread 
= uap
->wqthread
; 
 426         p
->p_pthsize 
= uap
->pthsize
; 
 427         p
->p_targconc 
= uap
->targetconc_ptr
; 
 428         p
->p_dispatchqueue_offset 
= uap
->dispatchqueue_offset
; 
 434 uint32_t wq_yielded_threshold           
= WQ_YIELDED_THRESHOLD
; 
 435 uint32_t wq_yielded_window_usecs        
= WQ_YIELDED_WINDOW_USECS
; 
 436 uint32_t wq_stalled_window_usecs        
= WQ_STALLED_WINDOW_USECS
; 
 437 uint32_t wq_reduce_pool_window_usecs    
= WQ_REDUCE_POOL_WINDOW_USECS
; 
 438 uint32_t wq_max_timer_interval_usecs    
= WQ_MAX_TIMER_INTERVAL_USECS
; 
 439 uint32_t wq_max_threads                 
= WORKQUEUE_MAXTHREADS
; 
 440 uint32_t wq_max_constrained_threads     
= WORKQUEUE_MAXTHREADS 
/ 8; 
 443 SYSCTL_INT(_kern
, OID_AUTO
, wq_yielded_threshold
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 444            &wq_yielded_threshold
, 0, ""); 
 446 SYSCTL_INT(_kern
, OID_AUTO
, wq_yielded_window_usecs
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 447            &wq_yielded_window_usecs
, 0, ""); 
 449 SYSCTL_INT(_kern
, OID_AUTO
, wq_stalled_window_usecs
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 450            &wq_stalled_window_usecs
, 0, ""); 
 452 SYSCTL_INT(_kern
, OID_AUTO
, wq_reduce_pool_window_usecs
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 453            &wq_reduce_pool_window_usecs
, 0, ""); 
 455 SYSCTL_INT(_kern
, OID_AUTO
, wq_max_timer_interval_usecs
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 456            &wq_max_timer_interval_usecs
, 0, ""); 
 458 SYSCTL_INT(_kern
, OID_AUTO
, wq_max_threads
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 459            &wq_max_threads
, 0, ""); 
 461 SYSCTL_INT(_kern
, OID_AUTO
, wq_max_constrained_threads
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 462            &wq_max_constrained_threads
, 0, ""); 
 465 static uint32_t wq_init_constrained_limit 
= 1; 
 469 workqueue_init_lock(proc_t p
) 
 471         lck_spin_init(&p
->p_wqlock
, pthread_lck_grp
, pthread_lck_attr
); 
 473         p
->p_wqiniting 
= FALSE
; 
 477 workqueue_destroy_lock(proc_t p
) 
 479         lck_spin_destroy(&p
->p_wqlock
, pthread_lck_grp
); 
 484 workqueue_lock_spin(proc_t p
) 
 486         lck_spin_lock(&p
->p_wqlock
); 
 490 workqueue_unlock(proc_t p
) 
 492         lck_spin_unlock(&p
->p_wqlock
); 
 497 workqueue_interval_timer_start(struct workqueue 
*wq
) 
 501         if (wq
->wq_timer_interval 
== 0) 
 502                 wq
->wq_timer_interval 
= wq_stalled_window_usecs
; 
 504                 wq
->wq_timer_interval 
= wq
->wq_timer_interval 
* 2; 
 506                 if (wq
->wq_timer_interval 
> wq_max_timer_interval_usecs
) 
 507                         wq
->wq_timer_interval 
= wq_max_timer_interval_usecs
; 
 509         clock_interval_to_deadline(wq
->wq_timer_interval
, 1000, &deadline
); 
 511         thread_call_enter_delayed(wq
->wq_atimer_call
, deadline
); 
 513         KERNEL_DEBUG(0xefffd110, wq
, wq
->wq_itemcount
, wq
->wq_flags
, wq
->wq_timer_interval
, 0); 
 518 wq_thread_is_busy(uint64_t cur_ts
, uint64_t *lastblocked_tsp
) 
 521         uint64_t lastblocked_ts
; 
 525          * the timestamp is updated atomically w/o holding the workqueue lock 
 526          * so we need to do an atomic read of the 64 bits so that we don't see 
 527          * a mismatched pair of 32 bit reads... we accomplish this in an architecturally 
 528          * independent fashion by using OSCompareAndSwap64 to write back the 
 529          * value we grabbed... if it succeeds, then we have a good timestamp to 
 530          * evaluate... if it fails, we straddled grabbing the timestamp while it 
 531          * was being updated... treat a failed update as a busy thread since 
 532          * it implies we are about to see a really fresh timestamp anyway 
 534         lastblocked_ts 
= *lastblocked_tsp
; 
 536         if ( !OSCompareAndSwap64((UInt64
)lastblocked_ts
, (UInt64
)lastblocked_ts
, lastblocked_tsp
)) 
 539         if (lastblocked_ts 
>= cur_ts
) { 
 541                  * because the update of the timestamp when a thread blocks isn't 
 542                  * serialized against us looking at it (i.e. we don't hold the workq lock) 
 543                  * it's possible to have a timestamp that matches the current time or 
 544                  * that even looks to be in the future relative to when we grabbed the current 
 545                  * time... just treat this as a busy thread since it must have just blocked. 
 549         elapsed 
= cur_ts 
- lastblocked_ts
; 
 551         absolutetime_to_microtime(elapsed
, &secs
, &usecs
); 
 553         if (secs 
== 0 && usecs 
< wq_stalled_window_usecs
) 
 559 #define WQ_TIMER_NEEDED(wq, start_timer) do {           \ 
 560         int oldflags = wq->wq_flags;                    \ 
 562         if ( !(oldflags & (WQ_EXITING | WQ_ATIMER_RUNNING))) {  \ 
 563                 if (OSCompareAndSwap(oldflags, oldflags | WQ_ATIMER_RUNNING, (UInt32 *)&wq->wq_flags)) \ 
 564                         start_timer = TRUE;                     \ 
 571 workqueue_add_timer(struct workqueue 
*wq
, __unused 
int param1
) 
 574         boolean_t       start_timer 
= FALSE
; 
 576         boolean_t       add_thread
; 
 579         KERNEL_DEBUG(0xefffd108 | DBG_FUNC_START
, wq
, wq
->wq_flags
, wq
->wq_nthreads
, wq
->wq_thidlecount
, 0); 
 583         workqueue_lock_spin(p
); 
 586          * because workqueue_callback now runs w/o taking the workqueue lock 
 587          * we are unsynchronized w/r to a change in state of the running threads... 
 588          * to make sure we always evaluate that change, we allow it to start up  
 589          * a new timer if the current one is actively evalutating the state 
 590          * however, we do not need more than 2 timers fired up (1 active and 1 pending) 
 591          * and we certainly do not want 2 active timers evaluating the state 
 592          * simultaneously... so use WQL_ATIMER_BUSY to serialize the timers... 
 593          * note that WQL_ATIMER_BUSY is in a different flag word from WQ_ATIMER_RUNNING since 
 594          * it is always protected by the workq lock... WQ_ATIMER_RUNNING is evaluated 
 595          * and set atomimcally since the callback function needs to manipulate it 
 596          * w/o holding the workq lock... 
 598          * !WQ_ATIMER_RUNNING && !WQL_ATIMER_BUSY   ==   no pending timer, no active timer 
 599          * !WQ_ATIMER_RUNNING && WQL_ATIMER_BUSY    ==   no pending timer, 1 active timer 
 600          * WQ_ATIMER_RUNNING && !WQL_ATIMER_BUSY    ==   1 pending timer, no active timer 
 601          * WQ_ATIMER_RUNNING && WQL_ATIMER_BUSY     ==   1 pending timer, 1 active timer 
 603         while (wq
->wq_lflags 
& WQL_ATIMER_BUSY
) { 
 604                 wq
->wq_lflags 
|= WQL_ATIMER_WAITING
; 
 606                 assert_wait((caddr_t
)wq
, (THREAD_UNINT
)); 
 609                 thread_block(THREAD_CONTINUE_NULL
); 
 611                 workqueue_lock_spin(p
); 
 613         wq
->wq_lflags 
|= WQL_ATIMER_BUSY
; 
 616          * the workq lock will protect us from seeing WQ_EXITING change state, but we 
 617          * still need to update this atomically in case someone else tries to start 
 618          * the timer just as we're releasing it 
 620         while ( !(OSCompareAndSwap(wq
->wq_flags
, (wq
->wq_flags 
& ~WQ_ATIMER_RUNNING
), (UInt32 
*)&wq
->wq_flags
))); 
 626         if ( !(wq
->wq_flags 
& WQ_EXITING
)) { 
 628                  * check to see if the stall frequency was beyond our tolerance 
 629                  * or we have work on the queue, but haven't scheduled any  
 630                  * new work within our acceptable time interval because 
 631                  * there were no idle threads left to schedule 
 633                 if (wq
->wq_itemcount
) { 
 635                         uint32_t        affinity_tag
; 
 639                         for (priority 
= 0; priority 
< WORKQUEUE_NUMPRIOS
; priority
++) { 
 640                                 if (wq
->wq_list_bitmap 
& (1 << priority
)) 
 643                         assert(priority 
< WORKQUEUE_NUMPRIOS
); 
 645                         curtime 
= mach_absolute_time(); 
 648                         for (affinity_tag 
= 0; affinity_tag 
< wq
->wq_reqconc
[priority
]; affinity_tag
++) { 
 650                                  * if we have no idle threads, we can try to add them if needed 
 652                                 if (wq
->wq_thidlecount 
== 0) 
 656                                  * look for first affinity group that is currently not active 
 657                                  * i.e. no active threads at this priority level or higher 
 658                                  * and has not been active recently at this priority level or higher 
 660                                 for (i 
= 0; i 
<= priority
; i
++) { 
 661                                         if (wq
->wq_thactive_count
[i
][affinity_tag
]) { 
 665                                         if (wq
->wq_thscheduled_count
[i
][affinity_tag
]) { 
 666                                                 if (wq_thread_is_busy(curtime
, &wq
->wq_lastblocked_ts
[i
][affinity_tag
])) { 
 673                                 if (add_thread 
== TRUE
) { 
 674                                         retval 
= workqueue_addnewthread(wq
, FALSE
); 
 678                         if (wq
->wq_itemcount
) { 
 680                                  * as long as we have threads to schedule, and we successfully 
 681                                  * scheduled new work, keep trying 
 683                                 while (wq
->wq_thidlecount 
&& !(wq
->wq_flags 
& WQ_EXITING
)) { 
 685                                          * workqueue_run_nextitem is responsible for 
 686                                          * dropping the workqueue lock in all cases 
 688                                         retval 
= workqueue_run_nextitem(p
, wq
, THREAD_NULL
, 0, 0, 0); 
 689                                         workqueue_lock_spin(p
); 
 694                                 if ( !(wq
->wq_flags 
& WQ_EXITING
) && wq
->wq_itemcount
) { 
 696                                         if (wq
->wq_thidlecount 
== 0 && retval 
== TRUE 
&& add_thread 
== TRUE
) 
 699                                         if (wq
->wq_thidlecount 
== 0 || busycount
) 
 700                                                 WQ_TIMER_NEEDED(wq
, start_timer
); 
 702                                         KERNEL_DEBUG(0xefffd108 | DBG_FUNC_NONE
, wq
, wq
->wq_itemcount
, wq
->wq_thidlecount
, busycount
, 0); 
 707         if ( !(wq
->wq_flags 
& WQ_ATIMER_RUNNING
)) 
 708                 wq
->wq_timer_interval 
= 0; 
 710         wq
->wq_lflags 
&= ~WQL_ATIMER_BUSY
; 
 712         if ((wq
->wq_flags 
& WQ_EXITING
) || (wq
->wq_lflags 
& WQL_ATIMER_WAITING
)) { 
 714                  * wakeup the thread hung up in workqueue_exit or workqueue_add_timer waiting for this timer 
 715                  * to finish getting out of the way 
 717                 wq
->wq_lflags 
&= ~WQL_ATIMER_WAITING
; 
 720         KERNEL_DEBUG(0xefffd108 | DBG_FUNC_END
, wq
, start_timer
, wq
->wq_nthreads
, wq
->wq_thidlecount
, 0); 
 724         if (start_timer 
== TRUE
) 
 725                 workqueue_interval_timer_start(wq
); 
 730 workqueue_thread_yielded(void) 
 732         struct workqueue 
*wq
; 
 737         if ((wq 
= p
->p_wqptr
) == NULL 
|| wq
->wq_itemcount 
== 0) 
 740         workqueue_lock_spin(p
); 
 742         if (wq
->wq_itemcount
) { 
 748                 if (wq
->wq_thread_yielded_count
++ == 0) 
 749                         wq
->wq_thread_yielded_timestamp 
= mach_absolute_time(); 
 751                 if (wq
->wq_thread_yielded_count 
< wq_yielded_threshold
) { 
 755                 KERNEL_DEBUG(0xefffd138 | DBG_FUNC_START
, wq
, wq
->wq_thread_yielded_count
, wq
->wq_itemcount
, 0, 0); 
 757                 wq
->wq_thread_yielded_count 
= 0; 
 759                 curtime 
= mach_absolute_time(); 
 760                 elapsed 
= curtime 
- wq
->wq_thread_yielded_timestamp
; 
 761                 absolutetime_to_microtime(elapsed
, &secs
, &usecs
); 
 763                 if (secs 
== 0 && usecs 
< wq_yielded_window_usecs
) { 
 765                         if (wq
->wq_thidlecount 
== 0) { 
 766                                 workqueue_addnewthread(wq
, TRUE
); 
 768                                  * 'workqueue_addnewthread' drops the workqueue lock 
 769                                  * when creating the new thread and then retakes it before 
 770                                  * returning... this window allows other threads to process 
 771                                  * work on the queue, so we need to recheck for available work 
 772                                  * if none found, we just return...  the newly created thread 
 773                                  * will eventually get used (if it hasn't already)... 
 775                                 if (wq
->wq_itemcount 
== 0) { 
 780                         if (wq
->wq_thidlecount
) { 
 782                                 uint32_t        affinity 
= -1; 
 784                                 struct workitem 
*witem 
= NULL
; 
 785                                 struct workitemlist 
*wl 
= NULL
; 
 787                                 struct threadlist 
*tl
; 
 789                                 uth 
= get_bsdthread_info(current_thread()); 
 790                                 if ((tl 
= uth
->uu_threadlist
)) 
 791                                         affinity 
= tl
->th_affinity_tag
; 
 793                                 for (priority 
= 0; priority 
< WORKQUEUE_NUMPRIOS
; priority
++) { 
 794                                         if (wq
->wq_list_bitmap 
& (1 << priority
)) { 
 795                                                 wl 
= (struct workitemlist 
*)&wq
->wq_list
[priority
]; 
 800                                 assert(!(TAILQ_EMPTY(&wl
->wl_itemlist
))); 
 802                                 witem 
= TAILQ_FIRST(&wl
->wl_itemlist
); 
 803                                 TAILQ_REMOVE(&wl
->wl_itemlist
, witem
, wi_entry
); 
 805                                 if (TAILQ_EMPTY(&wl
->wl_itemlist
)) 
 806                                         wq
->wq_list_bitmap 
&= ~(1 << priority
); 
 809                                 item 
= witem
->wi_item
; 
 810                                 witem
->wi_item 
= (user_addr_t
)0; 
 811                                 witem
->wi_affinity 
= 0; 
 813                                 TAILQ_INSERT_HEAD(&wl
->wl_freelist
, witem
, wi_entry
); 
 815                                 (void)workqueue_run_nextitem(p
, wq
, THREAD_NULL
, item
, priority
, affinity
); 
 817                                  * workqueue_run_nextitem is responsible for 
 818                                  * dropping the workqueue lock in all cases 
 820                                 KERNEL_DEBUG(0xefffd138 | DBG_FUNC_END
, wq
, wq
->wq_thread_yielded_count
, wq
->wq_itemcount
, 1, 0); 
 825                 KERNEL_DEBUG(0xefffd138 | DBG_FUNC_END
, wq
, wq
->wq_thread_yielded_count
, wq
->wq_itemcount
, 2, 0); 
 833 workqueue_callback(int type
, thread_t thread
) 
 836         struct threadlist 
*tl
; 
 837         struct workqueue  
*wq
; 
 839         uth 
= get_bsdthread_info(thread
); 
 840         tl 
= uth
->uu_threadlist
; 
 845               case SCHED_CALL_BLOCK
: 
 847                 uint32_t        old_activecount
; 
 849                 old_activecount 
= OSAddAtomic(-1, &wq
->wq_thactive_count
[tl
->th_priority
][tl
->th_affinity_tag
]); 
 851                 if (old_activecount 
== 1) { 
 852                         boolean_t       start_timer 
= FALSE
; 
 854                         UInt64          
*lastblocked_ptr
; 
 857                          * we were the last active thread on this affinity set 
 858                          * and we've got work to do 
 860                         lastblocked_ptr 
= (UInt64 
*)&wq
->wq_lastblocked_ts
[tl
->th_priority
][tl
->th_affinity_tag
]; 
 861                         curtime 
= mach_absolute_time(); 
 864                          * if we collide with another thread trying to update the last_blocked (really unlikely 
 865                          * since another thread would have to get scheduled and then block after we start down  
 866                          * this path), it's not a problem.  Either timestamp is adequate, so no need to retry 
 869                         OSCompareAndSwap64(*lastblocked_ptr
, (UInt64
)curtime
, lastblocked_ptr
); 
 871                         if (wq
->wq_itemcount
) 
 872                                 WQ_TIMER_NEEDED(wq
, start_timer
); 
 874                         if (start_timer 
== TRUE
) 
 875                                 workqueue_interval_timer_start(wq
); 
 877                 KERNEL_DEBUG1(0xefffd020 | DBG_FUNC_START
, wq
, old_activecount
, tl
->th_priority
, tl
->th_affinity_tag
, thread_tid(thread
)); 
 881               case SCHED_CALL_UNBLOCK
: 
 883                  * we cannot take the workqueue_lock here... 
 884                  * an UNBLOCK can occur from a timer event which 
 885                  * is run from an interrupt context... if the workqueue_lock 
 886                  * is already held by this processor, we'll deadlock... 
 887                  * the thread lock for the thread being UNBLOCKED 
 890                  OSAddAtomic(1, &wq
->wq_thactive_count
[tl
->th_priority
][tl
->th_affinity_tag
]); 
 892                  KERNEL_DEBUG1(0xefffd020 | DBG_FUNC_END
, wq
, wq
->wq_threads_scheduled
, tl
->th_priority
, tl
->th_affinity_tag
, thread_tid(thread
)); 
 900 workqueue_removethread(struct threadlist 
*tl
, int fromexit
) 
 902         struct workqueue 
*wq
; 
 903         struct uthread 
* uth
; 
 906          * If fromexit is set, the call is from workqueue_exit(, 
 907          * so some cleanups are to be avoided. 
 911         TAILQ_REMOVE(&wq
->wq_thidlelist
, tl
, th_entry
); 
 915                 wq
->wq_thidlecount
--; 
 919          * Clear the threadlist pointer in uthread so  
 920          * blocked thread on wakeup for termination will 
 921          * not access the thread list as it is going to be 
 924         thread_sched_call(tl
->th_thread
, NULL
); 
 926         uth 
= get_bsdthread_info(tl
->th_thread
); 
 927         if (uth 
!= (struct uthread 
*)0) { 
 928                 uth
->uu_threadlist 
= NULL
; 
 931                 /* during exit the lock is not held */ 
 932                 workqueue_unlock(wq
->wq_proc
); 
 935         if ( (tl
->th_flags 
& TH_LIST_SUSPENDED
) ) { 
 937                  * thread was created, but never used...  
 938                  * need to clean up the stack and port ourselves 
 939                  * since we're not going to spin up through the 
 940                  * normal exit path triggered from Libc 
 943                         /* vm map is already deallocated when this is called from exit */ 
 944                         (void)mach_vm_deallocate(wq
->wq_map
, tl
->th_stackaddr
, tl
->th_allocsize
); 
 946                 (void)mach_port_deallocate(get_task_ipcspace(wq
->wq_task
), tl
->th_thport
); 
 948                 KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_END
, wq
, (uintptr_t)thread_tid(current_thread()), wq
->wq_nthreads
, 0xdead, thread_tid(tl
->th_thread
)); 
 951                 KERNEL_DEBUG1(0xefffd018 | DBG_FUNC_END
, wq
, (uintptr_t)thread_tid(current_thread()), wq
->wq_nthreads
, 0xdead, thread_tid(tl
->th_thread
)); 
 954          * drop our ref on the thread 
 956         thread_deallocate(tl
->th_thread
); 
 958         kfree(tl
, sizeof(struct threadlist
)); 
 963  * called with workq lock held 
 964  * dropped and retaken around thread creation 
 965  * return with workq lock held 
 968 workqueue_addnewthread(struct workqueue 
*wq
, boolean_t oc_thread
) 
 970         struct threadlist 
*tl
; 
 976         mach_vm_offset_t stackaddr
; 
 978         if (wq
->wq_nthreads 
>= wq_max_threads 
|| wq
->wq_nthreads 
>= (CONFIG_THREAD_MAX 
- 20)) { 
 979                 wq
->wq_lflags 
|= WQL_EXCEEDED_TOTAL_THREAD_LIMIT
; 
 982         wq
->wq_lflags 
&= ~WQL_EXCEEDED_TOTAL_THREAD_LIMIT
; 
 984         if (oc_thread 
== FALSE 
&& wq
->wq_constrained_threads_scheduled 
>= wq_max_constrained_threads
) { 
 986                  * if we're not creating this thread to service an overcommit request, 
 987                  * then check the size of the constrained thread pool...  if we've already 
 988                  * reached our max for threads scheduled from this pool, don't create a new 
 989                  * one... the callers of this function are prepared for failure. 
 991                 wq
->wq_lflags 
|= WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT
; 
 994         if (wq
->wq_constrained_threads_scheduled 
< wq_max_constrained_threads
) 
 995                 wq
->wq_lflags 
&= ~WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT
; 
1000         workqueue_unlock(p
); 
1002         kret 
= thread_create_workq(wq
->wq_task
, (thread_continue_t
)wq_unsuspend_continue
, &th
); 
1004         if (kret 
!= KERN_SUCCESS
) 
1007         tl 
= kalloc(sizeof(struct threadlist
)); 
1008         bzero(tl
, sizeof(struct threadlist
)); 
1010 #if defined(__i386__) || defined(__x86_64__) 
1011         stackaddr 
= 0xB0000000; 
1013 #error Need to define a stack address hint for this architecture 
1015         tl
->th_allocsize 
= PTH_DEFAULT_STACKSIZE 
+ PTH_DEFAULT_GUARDSIZE 
+ p
->p_pthsize
; 
1017         kret 
= mach_vm_map(wq
->wq_map
, &stackaddr
, 
1020                         VM_MAKE_TAG(VM_MEMORY_STACK
)| VM_FLAGS_ANYWHERE 
, NULL
, 
1021                         0, FALSE
, VM_PROT_DEFAULT
, VM_PROT_ALL
, 
1022                         VM_INHERIT_DEFAULT
); 
1024         if (kret 
!= KERN_SUCCESS
) { 
1025                 kret 
= mach_vm_allocate(wq
->wq_map
, 
1026                                         &stackaddr
, tl
->th_allocsize
, 
1027                                         VM_MAKE_TAG(VM_MEMORY_STACK
) | VM_FLAGS_ANYWHERE
); 
1029         if (kret 
== KERN_SUCCESS
) { 
1031                  * The guard page is at the lowest address 
1032                  * The stack base is the highest address 
1034                 kret 
= mach_vm_protect(wq
->wq_map
, stackaddr
, PTH_DEFAULT_GUARDSIZE
, FALSE
, VM_PROT_NONE
); 
1036                 if (kret 
!= KERN_SUCCESS
) 
1037                         (void) mach_vm_deallocate(wq
->wq_map
, stackaddr
, tl
->th_allocsize
); 
1039         if (kret 
!= KERN_SUCCESS
) { 
1040                 (void) thread_terminate(th
); 
1041                 thread_deallocate(th
); 
1043                 kfree(tl
, sizeof(struct threadlist
)); 
1046         thread_reference(th
); 
1048         sright 
= (void *) convert_thread_to_port(th
); 
1049         tl
->th_thport 
= ipc_port_copyout_send(sright
, get_task_ipcspace(wq
->wq_task
)); 
1051         thread_static_param(th
, TRUE
); 
1053         tl
->th_flags 
= TH_LIST_INITED 
| TH_LIST_SUSPENDED
; 
1057         tl
->th_stackaddr 
= stackaddr
; 
1058         tl
->th_affinity_tag 
= -1; 
1059         tl
->th_priority 
= WORKQUEUE_NUMPRIOS
; 
1062         uth 
= get_bsdthread_info(tl
->th_thread
); 
1064         workqueue_lock_spin(p
); 
1066         uth
->uu_threadlist 
= (void *)tl
; 
1067         TAILQ_INSERT_TAIL(&wq
->wq_thidlelist
, tl
, th_entry
); 
1069         wq
->wq_thidlecount
++; 
1071         KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_START
, wq
, wq
->wq_nthreads
, 0, thread_tid(current_thread()), thread_tid(tl
->th_thread
)); 
1076         workqueue_lock_spin(p
); 
1084 workq_open(struct proc 
*p
, __unused 
struct workq_open_args  
*uap
, __unused 
int32_t *retval
) 
1086         struct workqueue 
* wq
; 
1094         boolean_t need_wakeup 
= FALSE
; 
1095         struct workitem 
* witem
; 
1096         struct workitemlist 
*wl
; 
1098         if ((p
->p_lflag 
& P_LREGISTER
) == 0) 
1101         num_cpus 
= ml_get_max_cpus(); 
1103         if (wq_init_constrained_limit
) { 
1106                  * set up the limit for the constrained pool 
1107                  * this is a virtual pool in that we don't 
1108                  * maintain it on a separate idle and run list 
1110                 limit 
= num_cpus 
* (WORKQUEUE_NUMPRIOS 
+ 1); 
1112                 if (limit 
> wq_max_constrained_threads
) 
1113                         wq_max_constrained_threads 
= limit
; 
1115                 wq_init_constrained_limit 
= 0; 
1117         workqueue_lock_spin(p
); 
1119         if (p
->p_wqptr 
== NULL
) { 
1121                 while (p
->p_wqiniting 
== TRUE
) { 
1123                         assert_wait((caddr_t
)&p
->p_wqiniting
, THREAD_UNINT
); 
1124                         workqueue_unlock(p
); 
1126                         thread_block(THREAD_CONTINUE_NULL
); 
1128                         workqueue_lock_spin(p
); 
1130                 if (p
->p_wqptr 
!= NULL
) 
1133                 p
->p_wqiniting 
= TRUE
; 
1135                 workqueue_unlock(p
); 
1137                 wq_size 
= sizeof(struct workqueue
) + 
1138                         (num_cpus 
* WORKQUEUE_NUMPRIOS 
* sizeof(uint32_t)) + 
1139                         (num_cpus 
* WORKQUEUE_NUMPRIOS 
* sizeof(uint32_t)) + 
1140                         (num_cpus 
* WORKQUEUE_NUMPRIOS 
* sizeof(uint64_t)) + 
1143                 ptr 
= (char *)kalloc(wq_size
); 
1144                 bzero(ptr
, wq_size
); 
1146                 wq 
= (struct workqueue 
*)ptr
; 
1147                 wq
->wq_flags 
= WQ_LIST_INITED
; 
1149                 wq
->wq_affinity_max 
= num_cpus
; 
1150                 wq
->wq_task 
= current_task(); 
1151                 wq
->wq_map  
= current_map(); 
1153                 for (i 
= 0; i 
< WORKQUEUE_NUMPRIOS
; i
++) { 
1154                         wl 
= (struct workitemlist 
*)&wq
->wq_list
[i
]; 
1155                         TAILQ_INIT(&wl
->wl_itemlist
); 
1156                         TAILQ_INIT(&wl
->wl_freelist
); 
1158                         for (j 
= 0; j 
< WORKITEM_SIZE
; j
++) { 
1159                                 witem 
= &wq
->wq_array
[(i
*WORKITEM_SIZE
) + j
]; 
1160                                 TAILQ_INSERT_TAIL(&wl
->wl_freelist
, witem
, wi_entry
); 
1162                         wq
->wq_reqconc
[i
] = wq
->wq_affinity_max
; 
1164                 nptr 
= ptr 
+ sizeof(struct workqueue
); 
1166                 for (i 
= 0; i 
< WORKQUEUE_NUMPRIOS
; i
++) { 
1167                         wq
->wq_thactive_count
[i
] = (uint32_t *)nptr
; 
1168                         nptr 
+= (num_cpus 
* sizeof(uint32_t)); 
1170                 for (i 
= 0; i 
< WORKQUEUE_NUMPRIOS
; i
++) { 
1171                         wq
->wq_thscheduled_count
[i
] = (uint32_t *)nptr
; 
1172                         nptr 
+= (num_cpus 
* sizeof(uint32_t)); 
1175                  * align nptr on a 64 bit boundary so that we can do nice 
1176                  * atomic64 operations on the timestamps... 
1177                  * note that we requested an extra uint64_t when calcuating 
1178                  * the size for the allocation of the workqueue struct 
1180                 nptr 
+= (sizeof(uint64_t) - 1); 
1181                 nptr 
= (char *)((uintptr_t)nptr 
& ~(sizeof(uint64_t) - 1)); 
1183                 for (i 
= 0; i 
< WORKQUEUE_NUMPRIOS
; i
++) { 
1184                         wq
->wq_lastblocked_ts
[i
] = (uint64_t *)nptr
; 
1185                         nptr 
+= (num_cpus 
* sizeof(uint64_t)); 
1187                 TAILQ_INIT(&wq
->wq_thrunlist
); 
1188                 TAILQ_INIT(&wq
->wq_thidlelist
); 
1190                 wq
->wq_atimer_call 
= thread_call_allocate((thread_call_func_t
)workqueue_add_timer
, (thread_call_param_t
)wq
); 
1192                 workqueue_lock_spin(p
); 
1194                 p
->p_wqptr 
= (void *)wq
; 
1195                 p
->p_wqsize 
= wq_size
; 
1197                 p
->p_wqiniting 
= FALSE
; 
1201         workqueue_unlock(p
); 
1203         if (need_wakeup 
== TRUE
) 
1204                 wakeup(&p
->p_wqiniting
); 
1209 workq_kernreturn(struct proc 
*p
, struct workq_kernreturn_args  
*uap
, __unused 
int32_t *retval
) 
1211         user_addr_t item 
= uap
->item
; 
1212         int options     
= uap
->options
; 
1213         int prio        
= uap
->prio
;    /* should  be used to find the right workqueue */ 
1214         int affinity    
= uap
->affinity
; 
1216         thread_t th     
= THREAD_NULL
; 
1217         user_addr_t oc_item 
= 0; 
1218         struct workqueue 
*wq
; 
1220         if ((p
->p_lflag 
& P_LREGISTER
) == 0) 
1224          * affinity not yet hooked up on this path 
1230                 case WQOPS_QUEUE_ADD
: { 
1232                         if (prio 
& WORKQUEUE_OVERCOMMIT
) { 
1233                                 prio 
&= ~WORKQUEUE_OVERCOMMIT
; 
1236                         if ((prio 
< 0) || (prio 
>= WORKQUEUE_NUMPRIOS
)) 
1239                         workqueue_lock_spin(p
); 
1241                         if ((wq 
= (struct workqueue 
*)p
->p_wqptr
) == NULL
) { 
1242                                 workqueue_unlock(p
); 
1245                         if (wq
->wq_thidlecount 
== 0 && (oc_item 
|| (wq
->wq_constrained_threads_scheduled 
< wq
->wq_affinity_max
))) { 
1247                                 workqueue_addnewthread(wq
, oc_item 
? TRUE 
: FALSE
); 
1249                                 if (wq
->wq_thidlecount 
== 0) 
1253                                 error 
= workqueue_additem(wq
, prio
, item
, affinity
); 
1255                         KERNEL_DEBUG(0xefffd008 | DBG_FUNC_NONE
, wq
, prio
, affinity
, oc_item
, 0); 
1258                 case WQOPS_THREAD_RETURN
: { 
1260                         th 
= current_thread(); 
1261                         struct uthread 
*uth 
= get_bsdthread_info(th
); 
1263                         /* reset signal mask on the workqueue thread to default state */ 
1264                         if (uth
->uu_sigmask 
!= (sigset_t
)(~workq_threadmask
)) { 
1266                                 uth
->uu_sigmask 
= ~workq_threadmask
; 
1270                         workqueue_lock_spin(p
); 
1272                         if ((wq 
= (struct workqueue 
*)p
->p_wqptr
) == NULL 
|| (uth
->uu_threadlist 
== NULL
)) { 
1273                                 workqueue_unlock(p
); 
1276                         KERNEL_DEBUG(0xefffd004 | DBG_FUNC_END
, wq
, 0, 0, 0, 0); 
1279                 case WQOPS_THREAD_SETCONC
: { 
1281                         if ((prio 
< 0) || (prio 
> WORKQUEUE_NUMPRIOS
)) 
1284                         workqueue_lock_spin(p
); 
1286                         if ((wq 
= (struct workqueue 
*)p
->p_wqptr
) == NULL
) { 
1287                                 workqueue_unlock(p
); 
1291                          * for this operation, we re-purpose the affinity 
1292                          * argument as the concurrency target 
1294                         if (prio 
< WORKQUEUE_NUMPRIOS
) 
1295                                 wq
->wq_reqconc
[prio
] = affinity
; 
1297                                 for (prio 
= 0; prio 
< WORKQUEUE_NUMPRIOS
; prio
++) 
1298                                         wq
->wq_reqconc
[prio
] = affinity
; 
1306         (void)workqueue_run_nextitem(p
, wq
, th
, oc_item
, prio
, affinity
); 
1308          * workqueue_run_nextitem is responsible for 
1309          * dropping the workqueue lock in all cases 
1316 workqueue_exit(struct proc 
*p
) 
1318         struct workqueue  
* wq
; 
1319         struct threadlist  
* tl
, *tlist
; 
1320         struct uthread  
*uth
; 
1323         if (p
->p_wqptr 
!= NULL
) { 
1325                 KERNEL_DEBUG(0x900808c | DBG_FUNC_START
, p
->p_wqptr
, 0, 0, 0, 0); 
1327                 workqueue_lock_spin(p
); 
1329                 wq 
= (struct workqueue 
*)p
->p_wqptr
; 
1332                         workqueue_unlock(p
); 
1334                         KERNEL_DEBUG(0x900808c | DBG_FUNC_END
, 0, 0, 0, -1, 0); 
1337                 wq_size 
= p
->p_wqsize
; 
1342                  * we now arm the timer in the callback function w/o holding the workq lock... 
1343                  * we do this by setting  WQ_ATIMER_RUNNING via OSCompareAndSwap in order to  
1344                  * insure only a single timer if running and to notice that WQ_EXITING has 
1345                  * been set (we don't want to start a timer once WQ_EXITING is posted) 
1347                  * so once we have successfully set WQ_EXITING, we cannot fire up a new timer... 
1348                  * therefor no need to clear the timer state atomically from the flags 
1350                  * since we always hold the workq lock when dropping WQ_ATIMER_RUNNING 
1351                  * the check for and sleep until clear is protected 
1353                 while ( !(OSCompareAndSwap(wq
->wq_flags
, (wq
->wq_flags 
| WQ_EXITING
), (UInt32 
*)&wq
->wq_flags
))); 
1355                 if (wq
->wq_flags 
& WQ_ATIMER_RUNNING
) { 
1356                         if (thread_call_cancel(wq
->wq_atimer_call
) == TRUE
) 
1357                                 wq
->wq_flags 
&= ~WQ_ATIMER_RUNNING
; 
1359                 while ((wq
->wq_flags 
& WQ_ATIMER_RUNNING
) || (wq
->wq_lflags 
& WQL_ATIMER_BUSY
)) { 
1361                         assert_wait((caddr_t
)wq
, (THREAD_UNINT
)); 
1362                         workqueue_unlock(p
); 
1364                         thread_block(THREAD_CONTINUE_NULL
); 
1366                         workqueue_lock_spin(p
); 
1368                 workqueue_unlock(p
); 
1370                 TAILQ_FOREACH_SAFE(tl
, &wq
->wq_thrunlist
, th_entry
, tlist
) { 
1372                         thread_sched_call(tl
->th_thread
, NULL
); 
1374                         uth 
= get_bsdthread_info(tl
->th_thread
); 
1375                         if (uth 
!= (struct uthread 
*)0) { 
1376                                 uth
->uu_threadlist 
= NULL
; 
1378                         TAILQ_REMOVE(&wq
->wq_thrunlist
, tl
, th_entry
); 
1381                          * drop our last ref on the thread 
1383                         thread_deallocate(tl
->th_thread
); 
1385                         kfree(tl
, sizeof(struct threadlist
)); 
1387                 TAILQ_FOREACH_SAFE(tl
, &wq
->wq_thidlelist
, th_entry
, tlist
) { 
1388                         workqueue_removethread(tl
, 1); 
1390                 thread_call_free(wq
->wq_atimer_call
); 
1394                 KERNEL_DEBUG(0x900808c | DBG_FUNC_END
, 0, 0, 0, 0, 0); 
1399 workqueue_additem(struct workqueue 
*wq
, int prio
, user_addr_t item
, int affinity
) 
1401         struct workitem 
*witem
; 
1402         struct workitemlist 
*wl
; 
1404         wl 
= (struct workitemlist 
*)&wq
->wq_list
[prio
]; 
1406         if (TAILQ_EMPTY(&wl
->wl_freelist
)) 
1409         witem 
= (struct workitem 
*)TAILQ_FIRST(&wl
->wl_freelist
); 
1410         TAILQ_REMOVE(&wl
->wl_freelist
, witem
, wi_entry
); 
1412         witem
->wi_item 
= item
; 
1413         witem
->wi_affinity 
= affinity
; 
1414         TAILQ_INSERT_TAIL(&wl
->wl_itemlist
, witem
, wi_entry
); 
1416         wq
->wq_list_bitmap 
|= (1 << prio
); 
1423 static int workqueue_importance
[WORKQUEUE_NUMPRIOS
] =  
1428 #define WORKQ_POLICY_TIMESHARE 1 
1430 static int workqueue_policy
[WORKQUEUE_NUMPRIOS
] =  
1432         WORKQ_POLICY_TIMESHARE
, WORKQ_POLICY_TIMESHARE
, WORKQ_POLICY_TIMESHARE
, WORKQ_POLICY_TIMESHARE
 
1437  * workqueue_run_nextitem: 
1438  *   called with the workqueue lock held... 
1439  *   responsible for dropping it in all cases 
1442 workqueue_run_nextitem(proc_t p
, struct workqueue 
*wq
, thread_t thread
, user_addr_t oc_item
, int oc_prio
, int oc_affinity
) 
1444         struct workitem 
*witem 
= NULL
; 
1445         user_addr_t item 
= 0; 
1446         thread_t th_to_run 
= THREAD_NULL
; 
1447         thread_t th_to_park 
= THREAD_NULL
; 
1448         int wake_thread 
= 0; 
1449         int reuse_thread 
= 1; 
1450         uint32_t priority
, orig_priority
; 
1451         uint32_t affinity_tag
, orig_affinity_tag
; 
1453         uint32_t activecount
; 
1455         uint32_t us_to_wait
; 
1456         struct threadlist 
*tl 
= NULL
; 
1457         struct threadlist 
*ttl 
= NULL
; 
1458         struct uthread 
*uth 
= NULL
; 
1459         struct workitemlist 
*wl 
= NULL
; 
1460         boolean_t start_timer 
= FALSE
; 
1461         boolean_t adjust_counters 
= TRUE
; 
1465         KERNEL_DEBUG(0xefffd000 | DBG_FUNC_START
, wq
, thread
, wq
->wq_thidlecount
, wq
->wq_itemcount
, 0); 
1468          * from here until we drop the workq lock 
1469          * we can't be pre-empted since we hold  
1470          * the lock in spin mode... this is important 
1471          * since we have to independently update the priority 
1472          * and affinity that the thread is associated with 
1473          * and these values are used to index the multi-dimensional 
1474          * counter arrays in 'workqueue_callback' 
1477                 uint32_t min_scheduled 
= 0; 
1478                 uint32_t scheduled_count
; 
1479                 uint32_t active_count
; 
1480                 uint32_t t_affinity 
= 0; 
1485                 if ((affinity_tag 
= oc_affinity
) == (uint32_t)-1) { 
1486                         for (affinity_tag 
= 0; affinity_tag 
< wq
->wq_reqconc
[priority
]; affinity_tag
++) { 
1488                                  * look for the affinity group with the least number of threads 
1490                                 scheduled_count 
= 0; 
1493                                 for (i 
= 0; i 
<= priority
; i
++) { 
1494                                         scheduled_count 
+= wq
->wq_thscheduled_count
[i
][affinity_tag
]; 
1495                                         active_count 
+= wq
->wq_thactive_count
[i
][affinity_tag
]; 
1497                                 if (active_count 
== 0) { 
1498                                         t_affinity 
= affinity_tag
; 
1501                                 if (affinity_tag 
== 0 || scheduled_count 
< min_scheduled
) { 
1502                                         min_scheduled 
= scheduled_count
; 
1503                                         t_affinity 
= affinity_tag
; 
1506                         affinity_tag 
= t_affinity
; 
1508                 goto grab_idle_thread
; 
1511          * if we get here, the work should be handled by a constrained thread 
1513         if (wq
->wq_itemcount 
== 0 || wq
->wq_constrained_threads_scheduled 
>= wq_max_constrained_threads
) { 
1515                  * no work to do, or we're already at or over the scheduling limit for 
1516                  * constrained threads...  just return or park the thread... 
1517                  * do not start the timer for this condition... if we don't have any work, 
1518                  * we'll check again when new work arrives... if we're over the limit, we need 1 or more 
1519                  * constrained threads to return to the kernel before we can dispatch work from our queue 
1521                 if ((th_to_park 
= thread
) == THREAD_NULL
) 
1525         for (priority 
= 0; priority 
< WORKQUEUE_NUMPRIOS
; priority
++) { 
1526                 if (wq
->wq_list_bitmap 
& (1 << priority
)) { 
1527                         wl 
= (struct workitemlist 
*)&wq
->wq_list
[priority
]; 
1532         assert(!(TAILQ_EMPTY(&wl
->wl_itemlist
))); 
1534         curtime 
= mach_absolute_time(); 
1536         if (thread 
!= THREAD_NULL
) { 
1537                 uth 
= get_bsdthread_info(thread
); 
1538                 tl 
= uth
->uu_threadlist
; 
1539                 affinity_tag 
= tl
->th_affinity_tag
; 
1542                  * check to see if the affinity group this thread is 
1543                  * associated with is still within the bounds of the 
1544                  * specified concurrency for the priority level 
1545                  * we're considering running work for 
1547                 if (affinity_tag 
< wq
->wq_reqconc
[priority
]) { 
1549                          * we're a worker thread from the pool... currently we 
1550                          * are considered 'active' which means we're counted 
1551                          * in "wq_thactive_count" 
1552                          * add up the active counts of all the priority levels 
1553                          * up to and including the one we want to schedule 
1555                         for (activecount 
= 0, i 
= 0; i 
<= priority
; i
++) { 
1558                                 acount 
= wq
->wq_thactive_count
[i
][affinity_tag
]; 
1560                                 if (acount 
== 0 && wq
->wq_thscheduled_count
[i
][affinity_tag
]) { 
1561                                         if (wq_thread_is_busy(curtime
, &wq
->wq_lastblocked_ts
[i
][affinity_tag
])) 
1564                                 activecount 
+= acount
; 
1566                         if (activecount 
== 1) { 
1568                                  * we're the only active thread associated with our 
1569                                  * affinity group at this priority level and higher, 
1570                                  * so pick up some work and keep going 
1577                  * there's more than 1 thread running in this affinity group 
1578                  * or the concurrency level has been cut back for this priority... 
1579                  * lets continue on and look for an 'empty' group to run this 
1585         for (affinity_tag 
= 0; affinity_tag 
< wq
->wq_reqconc
[priority
]; affinity_tag
++) { 
1587                  * look for first affinity group that is currently not active 
1588                  * i.e. no active threads at this priority level or higher 
1589                  * and no threads that have run recently 
1591                 for (activecount 
= 0, i 
= 0; i 
<= priority
; i
++) { 
1592                         if ((activecount 
= wq
->wq_thactive_count
[i
][affinity_tag
])) 
1595                         if (wq
->wq_thscheduled_count
[i
][affinity_tag
]) { 
1596                                 if (wq_thread_is_busy(curtime
, &wq
->wq_lastblocked_ts
[i
][affinity_tag
])) { 
1602                 if (activecount 
== 0 && busycount 
== 0) 
1605         if (affinity_tag 
>= wq
->wq_reqconc
[priority
]) { 
1607                  * we've already got at least 1 thread per 
1608                  * affinity group in the active state... 
1612                          * we found at least 1 thread in the 
1613                          * 'busy' state... make sure we start 
1614                          * the timer because if they are the only 
1615                          * threads keeping us from scheduling 
1616                          * this workitem, we won't get a callback 
1617                          * to kick off the timer... we need to 
1620                         WQ_TIMER_NEEDED(wq
, start_timer
); 
1622                 KERNEL_DEBUG(0xefffd000 | DBG_FUNC_NONE
, wq
, busycount
, start_timer
, 0, 0); 
1624                 if (thread 
!= THREAD_NULL
) { 
1626                          * go park this one for later 
1628                         th_to_park 
= thread
; 
1633         if (thread 
!= THREAD_NULL
) { 
1635                  * we're overbooked on the affinity group this thread is 
1636                  * currently associated with, but we have work to do 
1637                  * and at least 1 idle processor, so we'll just retarget 
1638                  * this thread to a new affinity group 
1643         if (wq
->wq_thidlecount 
== 0) { 
1645                  * we don't have a thread to schedule, but we have 
1646                  * work to do and at least 1 affinity group that  
1647                  * doesn't currently have an active thread...  
1649                 WQ_TIMER_NEEDED(wq
, start_timer
); 
1651                 KERNEL_DEBUG(0xefffd118, wq
, wq
->wq_nthreads
, start_timer
, 0, 0); 
1653                 goto no_thread_to_run
; 
1658          * we've got a candidate (affinity group with no currently 
1659          * active threads) to start a new thread on... 
1660          * we already know there is both work available 
1661          * and an idle thread, so activate a thread and then 
1662          * fall into the code that pulls a new workitem... 
1664         TAILQ_FOREACH(ttl
, &wq
->wq_thidlelist
, th_entry
) { 
1665                 if (ttl
->th_affinity_tag 
== affinity_tag 
|| ttl
->th_affinity_tag 
== (uint16_t)-1) { 
1667                         TAILQ_REMOVE(&wq
->wq_thidlelist
, ttl
, th_entry
); 
1674                 tl 
= TAILQ_FIRST(&wq
->wq_thidlelist
); 
1675                 TAILQ_REMOVE(&wq
->wq_thidlelist
, tl
, th_entry
); 
1677         wq
->wq_thidlecount
--; 
1679         TAILQ_INSERT_TAIL(&wq
->wq_thrunlist
, tl
, th_entry
); 
1681         if ((tl
->th_flags 
& TH_LIST_SUSPENDED
) == TH_LIST_SUSPENDED
) { 
1682                 tl
->th_flags 
&= ~TH_LIST_SUSPENDED
; 
1685         } else if ((tl
->th_flags 
& TH_LIST_BLOCKED
) == TH_LIST_BLOCKED
) { 
1686                 tl
->th_flags 
&= ~TH_LIST_BLOCKED
; 
1689         tl
->th_flags 
|= TH_LIST_RUNNING 
| TH_LIST_BUSY
; 
1691         wq
->wq_threads_scheduled
++; 
1692         wq
->wq_thscheduled_count
[priority
][affinity_tag
]++; 
1693         OSAddAtomic(1, &wq
->wq_thactive_count
[priority
][affinity_tag
]); 
1695         adjust_counters 
= FALSE
; 
1696         th_to_run 
= tl
->th_thread
; 
1700                 witem 
= TAILQ_FIRST(&wl
->wl_itemlist
); 
1701                 TAILQ_REMOVE(&wl
->wl_itemlist
, witem
, wi_entry
); 
1703                 if (TAILQ_EMPTY(&wl
->wl_itemlist
)) 
1704                         wq
->wq_list_bitmap 
&= ~(1 << priority
); 
1707                 item 
= witem
->wi_item
; 
1708                 witem
->wi_item 
= (user_addr_t
)0; 
1709                 witem
->wi_affinity 
= 0; 
1710                 TAILQ_INSERT_HEAD(&wl
->wl_freelist
, witem
, wi_entry
); 
1712                 if ( !(tl
->th_flags 
& TH_LIST_CONSTRAINED
)) { 
1713                         wq
->wq_constrained_threads_scheduled
++; 
1714                         tl
->th_flags 
|= TH_LIST_CONSTRAINED
; 
1717                 if (tl
->th_flags 
& TH_LIST_CONSTRAINED
) { 
1718                         wq
->wq_constrained_threads_scheduled
--; 
1719                         tl
->th_flags 
&= ~TH_LIST_CONSTRAINED
; 
1722         orig_priority 
= tl
->th_priority
; 
1723         orig_affinity_tag 
= tl
->th_affinity_tag
; 
1725         tl
->th_priority 
= priority
; 
1726         tl
->th_affinity_tag 
= affinity_tag
; 
1728         if (adjust_counters 
== TRUE 
&& (orig_priority 
!= priority 
|| orig_affinity_tag 
!= affinity_tag
)) { 
1730                  * we need to adjust these counters based on this 
1731                  * thread's new disposition w/r to affinity and priority 
1733                 OSAddAtomic(-1, &wq
->wq_thactive_count
[orig_priority
][orig_affinity_tag
]); 
1734                 OSAddAtomic(1, &wq
->wq_thactive_count
[priority
][affinity_tag
]); 
1736                 wq
->wq_thscheduled_count
[orig_priority
][orig_affinity_tag
]--; 
1737                 wq
->wq_thscheduled_count
[priority
][affinity_tag
]++; 
1739         wq
->wq_thread_yielded_count 
= 0; 
1741         workqueue_unlock(p
); 
1743         if (orig_affinity_tag 
!= affinity_tag
) { 
1745                  * this thread's affinity does not match the affinity group 
1746                  * its being placed on (it's either a brand new thread or 
1747                  * we're retargeting an existing thread to a new group)... 
1748                  * affinity tag of 0 means no affinity... 
1749                  * but we want our tags to be 0 based because they 
1750                  * are used to index arrays, so... 
1751                  * keep it 0 based internally and bump by 1 when 
1752                  * calling out to set it 
1754                 KERNEL_DEBUG(0xefffd114 | DBG_FUNC_START
, wq
, orig_affinity_tag
, 0, 0, 0); 
1756                 (void)thread_affinity_set(th_to_run
, affinity_tag 
+ 1); 
1758                 KERNEL_DEBUG(0xefffd114 | DBG_FUNC_END
, wq
, affinity_tag
, 0, 0, 0); 
1760         if (orig_priority 
!= priority
) { 
1761                 thread_precedence_policy_data_t precedinfo
; 
1762                 thread_extended_policy_data_t   extinfo
; 
1765                 policy 
= workqueue_policy
[priority
]; 
1767                 KERNEL_DEBUG(0xefffd120 | DBG_FUNC_START
, wq
, orig_priority
, tl
->th_policy
, 0, 0); 
1769                 if ((orig_priority 
== WORKQUEUE_BG_PRIOQUEUE
) || (priority 
== WORKQUEUE_BG_PRIOQUEUE
)) { 
1770                         struct uthread 
*ut 
= NULL
; 
1772                         ut 
= get_bsdthread_info(th_to_run
); 
1774                         if (orig_priority 
== WORKQUEUE_BG_PRIOQUEUE
) { 
1775                                 /* remove the disk throttle, importance will be reset in anycase */ 
1776 #if !CONFIG_EMBEDDED 
1777                                 proc_restore_workq_bgthreadpolicy(th_to_run
); 
1778 #else /* !CONFIG_EMBEDDED */ 
1779                                 if ((ut
->uu_flag 
& UT_BACKGROUND
) != 0) { 
1780                                         ut
->uu_flag 
&= ~UT_BACKGROUND
; 
1781                                         ut
->uu_iopol_disk 
= IOPOL_NORMAL
; 
1783 #endif /* !CONFIG_EMBEDDED */ 
1786                         if (priority 
== WORKQUEUE_BG_PRIOQUEUE
) { 
1787 #if !CONFIG_EMBEDDED 
1788                         proc_apply_workq_bgthreadpolicy(th_to_run
); 
1789 #else /* !CONFIG_EMBEDDED */ 
1790                                 if ((ut
->uu_flag 
& UT_BACKGROUND
) == 0) { 
1791                                         /* set diskthrottling */ 
1792                                         ut
->uu_flag 
|= UT_BACKGROUND
; 
1793                                         ut
->uu_iopol_disk 
= IOPOL_THROTTLE
; 
1795 #endif /* !CONFIG_EMBEDDED */ 
1799                 if (tl
->th_policy 
!= policy
) { 
1800                         extinfo
.timeshare 
= policy
; 
1801                         (void)thread_policy_set_internal(th_to_run
, THREAD_EXTENDED_POLICY
, (thread_policy_t
)&extinfo
, THREAD_EXTENDED_POLICY_COUNT
); 
1803                         tl
->th_policy 
= policy
; 
1806                 precedinfo
.importance 
= workqueue_importance
[priority
]; 
1807                 (void)thread_policy_set_internal(th_to_run
, THREAD_PRECEDENCE_POLICY
, (thread_policy_t
)&precedinfo
, THREAD_PRECEDENCE_POLICY_COUNT
); 
1810                 KERNEL_DEBUG(0xefffd120 | DBG_FUNC_END
, wq
,  priority
, policy
, 0, 0); 
1812         if (kdebug_enable
) { 
1816                 uint32_t  code 
= 0xefffd02c | DBG_FUNC_START
; 
1818                 for (n 
= 0; n 
< WORKQUEUE_NUMPRIOS
; n
++) { 
1819                         for (i 
= 0; i 
< wq
->wq_affinity_max
; i
++) { 
1820                                 if (wq
->wq_thactive_count
[n
][i
]) { 
1822                                                 KERNEL_DEBUG(code
, lpri
, laffinity
, wq
->wq_thactive_count
[lpri
][laffinity
], first
, 0); 
1834                         KERNEL_DEBUG(0xefffd02c | DBG_FUNC_END
, lpri
, laffinity
, wq
->wq_thactive_count
[lpri
][laffinity
], first
, 0); 
1838          * if current thread is reused for workitem, does not return via unix_syscall 
1840         wq_runitem(p
, item
, th_to_run
, tl
, reuse_thread
, wake_thread
, (thread 
== th_to_run
)); 
1842         KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END
, wq
, thread_tid(th_to_run
), item
, 1, 0); 
1848          * we have no work to do or we are fully booked 
1849          * w/r to running threads... 
1852         workqueue_unlock(p
); 
1855                 workqueue_interval_timer_start(wq
); 
1857         KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END
, wq
, thread_tid(thread
), 0, 2, 0); 
1863          * this is a workqueue thread with no more 
1864          * work to do... park it for now 
1866         uth 
= get_bsdthread_info(th_to_park
); 
1867         tl 
= uth
->uu_threadlist
; 
1869                 panic("wq thread with no threadlist "); 
1871         TAILQ_REMOVE(&wq
->wq_thrunlist
, tl
, th_entry
); 
1872         tl
->th_flags 
&= ~TH_LIST_RUNNING
; 
1874         tl
->th_flags 
|= TH_LIST_BLOCKED
; 
1875         TAILQ_INSERT_HEAD(&wq
->wq_thidlelist
, tl
, th_entry
); 
1877         thread_sched_call(th_to_park
, NULL
); 
1879         OSAddAtomic(-1, &wq
->wq_thactive_count
[tl
->th_priority
][tl
->th_affinity_tag
]); 
1880         wq
->wq_thscheduled_count
[tl
->th_priority
][tl
->th_affinity_tag
]--; 
1881         wq
->wq_threads_scheduled
--; 
1883         if (tl
->th_flags 
& TH_LIST_CONSTRAINED
) { 
1884                 wq
->wq_constrained_threads_scheduled
--; 
1885                 wq
->wq_lflags 
&= ~WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT
; 
1886                 tl
->th_flags 
&= ~TH_LIST_CONSTRAINED
; 
1888         if (wq
->wq_thidlecount 
< 100) 
1889                 us_to_wait 
= wq_reduce_pool_window_usecs 
- (wq
->wq_thidlecount 
* (wq_reduce_pool_window_usecs 
/ 100)); 
1891                 us_to_wait 
= wq_reduce_pool_window_usecs 
/ 100; 
1893         wq
->wq_thidlecount
++; 
1894         wq
->wq_lflags 
&= ~WQL_EXCEEDED_TOTAL_THREAD_LIMIT
; 
1896         assert_wait_timeout((caddr_t
)tl
, (THREAD_INTERRUPTIBLE
), us_to_wait
, NSEC_PER_USEC
); 
1898         workqueue_unlock(p
); 
1901                 workqueue_interval_timer_start(wq
); 
1903         KERNEL_DEBUG1(0xefffd018 | DBG_FUNC_START
, wq
, wq
->wq_threads_scheduled
, wq
->wq_thidlecount
, us_to_wait
, thread_tid(th_to_park
)); 
1904         KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END
, wq
, thread_tid(thread
), 0, 3, 0); 
1906         thread_block((thread_continue_t
)wq_unpark_continue
); 
1914 wq_unsuspend_continue(void) 
1916         struct uthread 
*uth 
= NULL
; 
1917         thread_t th_to_unsuspend
; 
1918         struct threadlist 
*tl
; 
1921         th_to_unsuspend 
= current_thread(); 
1922         uth 
= get_bsdthread_info(th_to_unsuspend
); 
1924         if (uth 
!= NULL 
&& (tl 
= uth
->uu_threadlist
) != NULL
) { 
1926                 if ((tl
->th_flags 
& (TH_LIST_RUNNING 
| TH_LIST_BUSY
)) == TH_LIST_RUNNING
) { 
1928                          * most likely a normal resume of this thread occurred... 
1929                          * it's also possible that the thread was aborted after we 
1930                          * finished setting it up so that it could be dispatched... if 
1931                          * so, thread_bootstrap_return will notice the abort and put 
1932                          * the thread on the path to self-destruction 
1934 normal_resume_to_user
: 
1935                         thread_sched_call(th_to_unsuspend
, workqueue_callback
); 
1937                         thread_bootstrap_return(); 
1940                  * if we get here, it's because we've been resumed due to 
1941                  * an abort of this thread (process is crashing) 
1945                 workqueue_lock_spin(p
); 
1947                 if (tl
->th_flags 
& TH_LIST_SUSPENDED
) { 
1949                          * thread has been aborted while still on our idle 
1950                          * queue... remove it from our domain... 
1951                          * workqueue_removethread consumes the lock 
1953                         workqueue_removethread(tl
, 0); 
1955                         thread_bootstrap_return(); 
1957                 while ((tl
->th_flags 
& TH_LIST_BUSY
)) { 
1959                          * this thread was aborted after we started making 
1960                          * it runnable, but before we finished dispatching it... 
1961                          * we need to wait for that process to finish, 
1962                          * and we need to ask for a wakeup instead of a 
1963                          * thread_resume since the abort has already resumed us 
1965                         tl
->th_flags 
|= TH_LIST_NEED_WAKEUP
; 
1967                         assert_wait((caddr_t
)tl
, (THREAD_UNINT
)); 
1969                         workqueue_unlock(p
); 
1971                         thread_block(THREAD_CONTINUE_NULL
); 
1973                         workqueue_lock_spin(p
); 
1975                 workqueue_unlock(p
); 
1977                  * we have finished setting up the thread's context... 
1978                  * thread_bootstrap_return will take us through the abort path 
1979                  * where the thread will self destruct 
1981                 goto normal_resume_to_user
; 
1983         thread_bootstrap_return(); 
1988 wq_unpark_continue(void) 
1990         struct uthread 
*uth 
= NULL
; 
1991         struct threadlist 
*tl
; 
1992         thread_t th_to_unpark
; 
1995         th_to_unpark 
= current_thread(); 
1996         uth 
= get_bsdthread_info(th_to_unpark
); 
1999                 if ((tl 
= uth
->uu_threadlist
) != NULL
) { 
2001                         if ((tl
->th_flags 
& (TH_LIST_RUNNING 
| TH_LIST_BUSY
)) == TH_LIST_RUNNING
) { 
2003                                  * a normal wakeup of this thread occurred... no need  
2004                                  * for any synchronization with the timer and wq_runitem 
2006 normal_return_to_user
:                   
2007                                 thread_sched_call(th_to_unpark
, workqueue_callback
); 
2009                                 KERNEL_DEBUG(0xefffd018 | DBG_FUNC_END
, tl
->th_workq
, 0, 0, 0, 0); 
2011                                 thread_exception_return(); 
2015                         workqueue_lock_spin(p
); 
2017                         if ( !(tl
->th_flags 
& TH_LIST_RUNNING
)) { 
2019                                  * the timer popped us out and we've not 
2020                                  * been moved off of the idle list 
2021                                  * so we should now self-destruct 
2023                                  * workqueue_removethread consumes the lock 
2025                                 workqueue_removethread(tl
, 0); 
2027                                 thread_exception_return(); 
2030                          * the timer woke us up, but we have already 
2031                          * started to make this a runnable thread, 
2032                          * but have not yet finished that process... 
2033                          * so wait for the normal wakeup 
2035                         while ((tl
->th_flags 
& TH_LIST_BUSY
)) { 
2037                                 assert_wait((caddr_t
)tl
, (THREAD_UNINT
)); 
2039                                 workqueue_unlock(p
); 
2041                                 thread_block(THREAD_CONTINUE_NULL
); 
2043                                 workqueue_lock_spin(p
); 
2046                          * we have finished setting up the thread's context 
2047                          * now we can return as if we got a normal wakeup 
2049                         workqueue_unlock(p
); 
2051                         goto normal_return_to_user
; 
2054         thread_exception_return(); 
2060 wq_runitem(proc_t p
, user_addr_t item
, thread_t th
, struct threadlist 
*tl
, 
2061            int reuse_thread
, int wake_thread
, int return_directly
) 
2064         boolean_t need_resume 
= FALSE
; 
2066         KERNEL_DEBUG1(0xefffd004 | DBG_FUNC_START
, tl
->th_workq
, tl
->th_priority
, tl
->th_affinity_tag
, thread_tid(current_thread()), thread_tid(th
)); 
2068         ret 
= setup_wqthread(p
, th
, item
, reuse_thread
, tl
); 
2071                 panic("setup_wqthread failed  %x\n", ret
); 
2073         if (return_directly
) { 
2074                 KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END
, tl
->th_workq
, 0, 0, 4, 0); 
2076                 thread_exception_return(); 
2078                 panic("wq_runitem: thread_exception_return returned ...\n"); 
2081                 workqueue_lock_spin(p
); 
2083                 tl
->th_flags 
&= ~TH_LIST_BUSY
; 
2086                 workqueue_unlock(p
); 
2088                 KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_END
, tl
->th_workq
, 0, 0, thread_tid(current_thread()), thread_tid(th
)); 
2090                 workqueue_lock_spin(p
); 
2092                 if (tl
->th_flags 
& TH_LIST_NEED_WAKEUP
) 
2097                 tl
->th_flags 
&= ~(TH_LIST_BUSY 
| TH_LIST_NEED_WAKEUP
); 
2099                 workqueue_unlock(p
); 
2103                          * need to do this outside of the workqueue spin lock 
2104                          * since thread_resume locks the thread via a full mutex 
2113 setup_wqthread(proc_t p
, thread_t th
, user_addr_t item
, int reuse_thread
, struct threadlist 
*tl
) 
2115 #if defined(__i386__) || defined(__x86_64__) 
2118         isLP64 
= IS_64BIT_PROCESS(p
); 
2120          * Set up i386 registers & function call. 
2123                 x86_thread_state32_t state
; 
2124                 x86_thread_state32_t 
*ts 
= &state
; 
2126                 ts
->eip 
= (int)p
->p_wqthread
; 
2127                 ts
->eax 
= (unsigned int)(tl
->th_stackaddr 
+ PTH_DEFAULT_STACKSIZE 
+ PTH_DEFAULT_GUARDSIZE
); 
2128                 ts
->ebx 
= (unsigned int)tl
->th_thport
; 
2129                 ts
->ecx 
= (unsigned int)(tl
->th_stackaddr 
+ PTH_DEFAULT_GUARDSIZE
); 
2130                 ts
->edx 
= (unsigned int)item
; 
2131                 ts
->edi 
= (unsigned int)reuse_thread
; 
2132                 ts
->esi 
= (unsigned int)0; 
2136                 ts
->esp 
= (int)((vm_offset_t
)((tl
->th_stackaddr 
+ PTH_DEFAULT_STACKSIZE 
+ PTH_DEFAULT_GUARDSIZE
) - C_32_STK_ALIGN
)); 
2138                 if ((reuse_thread 
!= 0) && (ts
->eax 
== (unsigned int)0)) 
2139                         panic("setup_wqthread: setting reuse thread with null pthread\n"); 
2140                 thread_set_wq_state32(th
, (thread_state_t
)ts
); 
2143                 x86_thread_state64_t state64
; 
2144                 x86_thread_state64_t 
*ts64 
= &state64
; 
2146                 ts64
->rip 
= (uint64_t)p
->p_wqthread
; 
2147                 ts64
->rdi 
= (uint64_t)(tl
->th_stackaddr 
+ PTH_DEFAULT_STACKSIZE 
+ PTH_DEFAULT_GUARDSIZE
); 
2148                 ts64
->rsi 
= (uint64_t)(tl
->th_thport
); 
2149                 ts64
->rdx 
= (uint64_t)(tl
->th_stackaddr 
+ PTH_DEFAULT_GUARDSIZE
); 
2150                 ts64
->rcx 
= (uint64_t)item
; 
2151                 ts64
->r8 
= (uint64_t)reuse_thread
; 
2152                 ts64
->r9 
= (uint64_t)0; 
2155                  * set stack pointer aligned to 16 byte boundary 
2157                 ts64
->rsp 
= (uint64_t)((tl
->th_stackaddr 
+ PTH_DEFAULT_STACKSIZE 
+ PTH_DEFAULT_GUARDSIZE
) - C_64_REDZONE_LEN
); 
2159                 if ((reuse_thread 
!= 0) && (ts64
->rdi 
== (uint64_t)0)) 
2160                         panic("setup_wqthread: setting reuse thread with null pthread\n"); 
2161                 thread_set_wq_state64(th
, (thread_state_t
)ts64
); 
2164 #error setup_wqthread  not defined for this architecture 
2170 fill_procworkqueue(proc_t p
, struct proc_workqueueinfo 
* pwqinfo
) 
2172         struct workqueue 
* wq
; 
2175         uint32_t pri
, affinity
; 
2177         workqueue_lock_spin(p
); 
2178         if ((wq 
= p
->p_wqptr
) == NULL
) { 
2184         for (pri 
= 0; pri 
< WORKQUEUE_NUMPRIOS
; pri
++) { 
2185                 for (affinity 
= 0; affinity 
< wq
->wq_affinity_max
; affinity
++) 
2186                         activecount 
+= wq
->wq_thactive_count
[pri
][affinity
]; 
2188         pwqinfo
->pwq_nthreads 
= wq
->wq_nthreads
; 
2189         pwqinfo
->pwq_runthreads 
= activecount
; 
2190         pwqinfo
->pwq_blockedthreads 
= wq
->wq_threads_scheduled 
- activecount
; 
2191         pwqinfo
->pwq_state 
= 0; 
2193         if (wq
->wq_lflags 
& WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT
) 
2194                 pwqinfo
->pwq_state 
|= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT
; 
2196         if (wq
->wq_lflags 
& WQL_EXCEEDED_TOTAL_THREAD_LIMIT
) 
2197                 pwqinfo
->pwq_state 
|= WQ_EXCEEDED_TOTAL_THREAD_LIMIT
; 
2200         workqueue_unlock(p
); 
2204 /* Set target concurrency of one of the  queue(0,1,2) with specified value */ 
2206 proc_settargetconc(pid_t pid
, int queuenum
, int32_t targetconc
) 
2210         int32_t conc 
= targetconc
; 
2212         vm_map_t oldmap 
= VM_MAP_NULL
; 
2215         self 
= current_proc(); 
2216         if (self
->p_pid 
!= pid
) { 
2217                 /* if not on self, hold a refernce on the process */ 
2231         if ((addr 
= p
->p_targconc
) == (uint64_t)0) { 
2237         if ((queuenum 
>= WQ_MAXPRI_MIN
) && (queuenum 
<= WQ_MAXPRI_MAX
)) { 
2238                 addr 
+= (queuenum 
* sizeof(int32_t)); 
2240                         oldmap 
= vm_map_switch(get_task_map(p
->task
)); 
2241                 error 
= copyout(&conc
, addr
, sizeof(int32_t)); 
2243                         (void)vm_map_switch(oldmap
); 
2255 /* Set target concurrency on all the prio queues with specified value */ 
2257 proc_setalltargetconc(pid_t pid
, int32_t * targetconcp
) 
2262         vm_map_t oldmap 
= VM_MAP_NULL
; 
2265         self 
= current_proc(); 
2266         if (self
->p_pid 
!= pid
) { 
2267                 /* if not on self, hold a refernce on the process */ 
2281         if ((addr 
= (uint64_t)p
->p_targconc
) == (uint64_t)0) { 
2288                 oldmap 
= vm_map_switch(get_task_map(p
->task
)); 
2290         error 
= copyout(targetconcp
, addr
, WQ_PRI_NUM 
* sizeof(int32_t)); 
2292                 (void)vm_map_switch(oldmap
); 
2300 int thread_selfid(__unused 
struct proc 
*p
, __unused 
struct thread_selfid_args 
*uap
, uint64_t *retval
) 
2302         thread_t thread 
= current_thread(); 
2303         *retval 
= thread_tid(thread
); 
2304         return KERN_SUCCESS
; 
2310         pthread_lck_grp_attr 
= lck_grp_attr_alloc_init(); 
2311         pthread_lck_grp 
= lck_grp_alloc_init("pthread", pthread_lck_grp_attr
); 
2314          * allocate the lock attribute for pthread synchronizers 
2316         pthread_lck_attr 
= lck_attr_alloc_init(); 
2318         workqueue_init_lock((proc_t
) get_bsdtask_info(kernel_task
)); 
2320         pthread_list_mlock 
= lck_mtx_alloc_init(pthread_lck_grp
, pthread_lck_attr
); 
2322         pth_global_hashinit(); 
2323         psynch_thcall 
= thread_call_allocate(psynch_wq_cleanup
, NULL
);