2  * Copyright (c) 2000-2012 Apple Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  28 /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */ 
  33 #pragma mark - Front Matter 
  35 #define  _PTHREAD_CONDATTR_T 
  36 #define  _PTHREAD_COND_T 
  37 #define _PTHREAD_MUTEXATTR_T 
  38 #define _PTHREAD_MUTEX_T 
  39 #define _PTHREAD_RWLOCKATTR_T 
  40 #define _PTHREAD_RWLOCK_T 
  42 #undef pthread_mutexattr_t 
  43 #undef pthread_mutex_t 
  44 #undef pthread_condattr_t 
  46 #undef pthread_rwlockattr_t 
  47 #undef pthread_rwlock_t 
  49 #include <sys/cdefs.h> 
  52 // <rdar://problem/26158937> panic() should be marked noreturn 
  53 extern void panic(const char *string
, ...) __printflike(1,2) __dead2
; 
  55 #include <sys/param.h> 
  56 #include <sys/queue.h> 
  57 #include <sys/resourcevar.h> 
  58 //#include <sys/proc_internal.h> 
  59 #include <sys/kauth.h> 
  60 #include <sys/systm.h> 
  61 #include <sys/timeb.h> 
  62 #include <sys/times.h> 
  64 #include <sys/kernel.h> 
  66 #include <sys/signalvar.h> 
  67 #include <sys/sysctl.h> 
  68 #include <sys/syslog.h> 
  71 #include <sys/kdebug.h> 
  72 //#include <sys/sysproto.h> 
  74 #include <sys/user.h>           /* for coredump */ 
  75 #include <sys/proc_info.h>      /* for fill_procworkqueue */ 
  77 #include <mach/mach_port.h> 
  78 #include <mach/mach_types.h> 
  79 #include <mach/semaphore.h> 
  80 #include <mach/sync_policy.h> 
  81 #include <mach/task.h> 
  82 #include <mach/vm_prot.h> 
  83 #include <kern/kern_types.h> 
  84 #include <kern/task.h> 
  85 #include <kern/clock.h> 
  86 #include <mach/kern_return.h> 
  87 #include <kern/thread.h> 
  88 #include <kern/zalloc.h> 
  89 #include <kern/sched_prim.h>    /* for thread_exception_return */ 
  90 #include <kern/processor.h> 
  91 #include <kern/assert.h> 
  92 #include <mach/mach_vm.h> 
  93 #include <mach/mach_param.h> 
  94 #include <mach/thread_status.h> 
  95 #include <mach/thread_policy.h> 
  96 #include <mach/message.h> 
  97 #include <mach/port.h> 
  98 //#include <vm/vm_protos.h> 
  99 #include <vm/vm_fault.h> 
 100 #include <vm/vm_map.h> 
 101 #include <mach/thread_act.h> /* for thread_resume */ 
 102 #include <machine/machine_routines.h> 
 103 #include <mach/shared_region.h> 
 105 #include <libkern/OSAtomic.h> 
 106 #include <libkern/libkern.h> 
 108 #include <sys/pthread_shims.h> 
 109 #include "kern_internal.h" 
 111 // XXX: Dirty import for sys/signarvar.h that's wrapped in BSD_KERNEL_PRIVATE 
 112 #define sigcantmask (sigmask(SIGKILL) | sigmask(SIGSTOP)) 
 114 // XXX: Ditto for thread tags from kern/thread.h 
 115 #define THREAD_TAG_MAINTHREAD 0x1 
 116 #define THREAD_TAG_PTHREAD 0x10 
 117 #define THREAD_TAG_WORKQUEUE 0x20 
 119 lck_grp_attr_t   
*pthread_lck_grp_attr
; 
 120 lck_grp_t    
*pthread_lck_grp
; 
 121 lck_attr_t   
*pthread_lck_attr
; 
 123 zone_t pthread_zone_workqueue
; 
 124 zone_t pthread_zone_threadlist
; 
 125 zone_t pthread_zone_threadreq
; 
 127 extern void thread_set_cthreadself(thread_t thread
, uint64_t pself
, int isLP64
); 
 128 extern void workqueue_thread_yielded(void); 
 130 #define WQ_SETUP_FIRST_USE  1 
 131 #define WQ_SETUP_CLEAR_VOUCHER  2 
 132 static void _setup_wqthread(proc_t p
, thread_t th
, struct workqueue 
*wq
, 
 133                 struct threadlist 
*tl
, int flags
); 
 135 static void reset_priority(struct threadlist 
*tl
, pthread_priority_t pri
); 
 136 static pthread_priority_t 
pthread_priority_from_wq_class_index(struct workqueue 
*wq
, int index
); 
 138 static void wq_unpark_continue(void* ptr
, wait_result_t wait_result
) __dead2
; 
 140 static bool workqueue_addnewthread(proc_t p
, struct workqueue 
*wq
); 
 141 static void workqueue_removethread(struct threadlist 
*tl
, bool fromexit
, bool first_use
); 
 142 static void workqueue_lock_spin(struct workqueue 
*); 
 143 static void workqueue_unlock(struct workqueue 
*); 
 145 #define WQ_RUN_TR_THROTTLED 0 
 146 #define WQ_RUN_TR_THREAD_NEEDED 1 
 147 #define WQ_RUN_TR_THREAD_STARTED 2 
 148 #define WQ_RUN_TR_EXITING 3 
 149 static int workqueue_run_threadreq_and_unlock(proc_t p
, struct workqueue 
*wq
, 
 150                 struct threadlist 
*tl
, struct threadreq 
*req
, bool may_add_new_thread
); 
 152 static bool may_start_constrained_thread(struct workqueue 
*wq
, 
 153                 uint32_t at_priclass
, struct threadlist 
*tl
, bool may_start_timer
); 
 155 static mach_vm_offset_t 
stack_addr_hint(proc_t p
, vm_map_t vmap
); 
 156 static boolean_t 
wq_thread_is_busy(uint64_t cur_ts
, 
 157                 _Atomic 
uint64_t *lastblocked_tsp
); 
 159 int proc_settargetconc(pid_t pid
, int queuenum
, int32_t targetconc
); 
 160 int proc_setalltargetconc(pid_t pid
, int32_t * targetconcp
); 
 162 #define WQ_MAXPRI_MIN   0       /* low prio queue num */ 
 163 #define WQ_MAXPRI_MAX   2       /* max  prio queuenum */ 
 164 #define WQ_PRI_NUM      3       /* number of prio work queues */ 
 166 #define C_32_STK_ALIGN          16 
 167 #define C_64_STK_ALIGN          16 
 168 #define C_64_REDZONE_LEN        128 
 170 #define PTHREAD_T_OFFSET 0 
 173  * Flags filed passed to bsdthread_create and back in pthread_start 
 174 31  <---------------------------------> 0 
 175 _________________________________________ 
 176 | flags(8) | policy(8) | importance(16) | 
 177 ----------------------------------------- 
 180 #define PTHREAD_START_CUSTOM            0x01000000 
 181 #define PTHREAD_START_SETSCHED          0x02000000 
 182 #define PTHREAD_START_DETACHED          0x04000000 
 183 #define PTHREAD_START_QOSCLASS          0x08000000 
 184 #define PTHREAD_START_TSD_BASE_SET      0x10000000 
 185 #define PTHREAD_START_QOSCLASS_MASK     0x00ffffff 
 186 #define PTHREAD_START_POLICY_BITSHIFT 16 
 187 #define PTHREAD_START_POLICY_MASK 0xff 
 188 #define PTHREAD_START_IMPORTANCE_MASK 0xffff 
 190 #define SCHED_OTHER      POLICY_TIMESHARE 
 191 #define SCHED_FIFO       POLICY_FIFO 
 192 #define SCHED_RR         POLICY_RR 
 194 #define BASEPRI_DEFAULT 31 
 198 static uint32_t wq_stalled_window_usecs 
= WQ_STALLED_WINDOW_USECS
; 
 199 static uint32_t wq_reduce_pool_window_usecs     
= WQ_REDUCE_POOL_WINDOW_USECS
; 
 200 static uint32_t wq_max_timer_interval_usecs     
= WQ_MAX_TIMER_INTERVAL_USECS
; 
 201 static uint32_t wq_max_threads                  
= WORKQUEUE_MAXTHREADS
; 
 202 static uint32_t wq_max_constrained_threads      
= WORKQUEUE_MAXTHREADS 
/ 8; 
 203 static uint32_t wq_max_concurrency
[WORKQUEUE_NUM_BUCKETS 
+ 1]; // set to ncpus on load 
 205 SYSCTL_INT(_kern
, OID_AUTO
, wq_stalled_window_usecs
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 206            &wq_stalled_window_usecs
, 0, ""); 
 208 SYSCTL_INT(_kern
, OID_AUTO
, wq_reduce_pool_window_usecs
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 209            &wq_reduce_pool_window_usecs
, 0, ""); 
 211 SYSCTL_INT(_kern
, OID_AUTO
, wq_max_timer_interval_usecs
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 212            &wq_max_timer_interval_usecs
, 0, ""); 
 214 SYSCTL_INT(_kern
, OID_AUTO
, wq_max_threads
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 215            &wq_max_threads
, 0, ""); 
 217 SYSCTL_INT(_kern
, OID_AUTO
, wq_max_constrained_threads
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 218            &wq_max_constrained_threads
, 0, ""); 
 221 static int wq_kevent_test SYSCTL_HANDLER_ARGS
; 
 222 SYSCTL_PROC(_debug
, OID_AUTO
, wq_kevent_test
, CTLFLAG_MASKED 
| CTLFLAG_RW 
| CTLFLAG_LOCKED 
| CTLFLAG_ANYBODY 
| CTLTYPE_OPAQUE
, NULL
, 0, wq_kevent_test
, 0, "-"); 
 225 static uint32_t wq_init_constrained_limit 
= 1; 
 227 uint32_t pthread_debug_tracing 
= 1; 
 229 SYSCTL_INT(_kern
, OID_AUTO
, pthread_debug_tracing
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 230                    &pthread_debug_tracing
, 0, "") 
 233  *       +-----+-----+-----+-----+-----+-----+-----+ 
 234  *       | MT  | BG  | UT  | DE  | IN  | UN  | mgr | 
 235  * +-----+-----+-----+-----+-----+-----+-----+-----+ 
 236  * | pri |  5  |  4  |  3  |  2  |  1  |  0  |  6  | 
 237  * | qos |  1  |  2  |  3  |  4  |  5  |  6  |  7  | 
 238  * +-----+-----+-----+-----+-----+-----+-----+-----+ 
 240 static inline uint32_t 
 241 _wq_bucket_to_thread_qos(int pri
) 
 243         if (pri 
== WORKQUEUE_EVENT_MANAGER_BUCKET
) { 
 244                 return WORKQUEUE_EVENT_MANAGER_BUCKET 
+ 1; 
 246         return WORKQUEUE_EVENT_MANAGER_BUCKET 
- pri
; 
 249 #pragma mark wq_thactive 
 251 #if defined(__LP64__) 
 253 //   7 * 16 bits for each QoS bucket request count (including manager) 
 254 //   3 bits of best QoS among all pending constrained requests 
 256 #define WQ_THACTIVE_BUCKET_WIDTH 16 
 257 #define WQ_THACTIVE_QOS_SHIFT    (7 * WQ_THACTIVE_BUCKET_WIDTH) 
 260 //   6 * 10 bits for each QoS bucket request count (except manager) 
 261 //   1 bit for the manager bucket 
 262 //   3 bits of best QoS among all pending constrained requests 
 263 #define WQ_THACTIVE_BUCKET_WIDTH 10 
 264 #define WQ_THACTIVE_QOS_SHIFT    (6 * WQ_THACTIVE_BUCKET_WIDTH + 1) 
 266 #define WQ_THACTIVE_BUCKET_MASK  ((1U << WQ_THACTIVE_BUCKET_WIDTH) - 1) 
 267 #define WQ_THACTIVE_BUCKET_HALF  (1U << (WQ_THACTIVE_BUCKET_WIDTH - 1)) 
 268 #define WQ_THACTIVE_NO_PENDING_REQUEST 6 
 270 _Static_assert(sizeof(wq_thactive_t
) * CHAR_BIT 
- WQ_THACTIVE_QOS_SHIFT 
>= 3, 
 271                 "Make sure we have space to encode a QoS"); 
 273 static inline wq_thactive_t
 
 274 _wq_thactive_fetch_and_add(struct workqueue 
*wq
, wq_thactive_t offset
) 
 276 #if PTHREAD_INLINE_RMW_ATOMICS || !defined(__LP64__) 
 277         return atomic_fetch_add_explicit(&wq
->wq_thactive
, offset
, 
 278                         memory_order_relaxed
); 
 280         return pthread_kern
->atomic_fetch_add_128_relaxed(&wq
->wq_thactive
, offset
); 
 284 static inline wq_thactive_t
 
 285 _wq_thactive(struct workqueue 
*wq
) 
 287 #if PTHREAD_INLINE_RMW_ATOMICS || !defined(__LP64__) 
 288         return atomic_load_explicit(&wq
->wq_thactive
, memory_order_relaxed
); 
 290         return pthread_kern
->atomic_load_128_relaxed(&wq
->wq_thactive
); 
 294 #define WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(tha) \ 
 295                 ((tha) >> WQ_THACTIVE_QOS_SHIFT) 
 297 static inline uint32_t 
 298 _wq_thactive_best_constrained_req_qos(struct workqueue 
*wq
) 
 300         // Avoid expensive atomic operations: the three bits we're loading are in 
 301         // a single byte, and always updated under the workqueue lock 
 302         wq_thactive_t v 
= *(wq_thactive_t 
*)&wq
->wq_thactive
; 
 303         return WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(v
); 
 306 static inline wq_thactive_t
 
 307 _wq_thactive_set_best_constrained_req_qos(struct workqueue 
*wq
, 
 308                 uint32_t orig_qos
, uint32_t new_qos
) 
 311         v 
= (wq_thactive_t
)(new_qos 
- orig_qos
) << WQ_THACTIVE_QOS_SHIFT
; 
 313          * We can do an atomic add relative to the initial load because updates 
 314          * to this qos are always serialized under the workqueue lock. 
 316         return _wq_thactive_fetch_and_add(wq
, v
) + v
; 
 319 static inline wq_thactive_t
 
 320 _wq_thactive_offset_for_qos(int qos
) 
 322         return (wq_thactive_t
)1 << (qos 
* WQ_THACTIVE_BUCKET_WIDTH
); 
 325 static inline wq_thactive_t
 
 326 _wq_thactive_inc(struct workqueue 
*wq
, int qos
) 
 328         return _wq_thactive_fetch_and_add(wq
, _wq_thactive_offset_for_qos(qos
)); 
 331 static inline wq_thactive_t
 
 332 _wq_thactive_dec(struct workqueue 
*wq
, int qos
) 
 334         return _wq_thactive_fetch_and_add(wq
, -_wq_thactive_offset_for_qos(qos
)); 
 337 static inline wq_thactive_t
 
 338 _wq_thactive_move(struct workqueue 
*wq
, int oldqos
, int newqos
) 
 340         return _wq_thactive_fetch_and_add(wq
, _wq_thactive_offset_for_qos(newqos
) - 
 341                         _wq_thactive_offset_for_qos(oldqos
)); 
 344 static inline uint32_t 
 345 _wq_thactive_aggregate_downto_qos(struct workqueue 
*wq
, wq_thactive_t v
, 
 346                 int qos
, uint32_t *busycount
, uint32_t *max_busycount
) 
 348         uint32_t count 
= 0, active
; 
 353          * on 32bits the manager bucket is a single bit and the best constrained 
 354          * request QoS 3 bits are where the 10 bits of a regular QoS bucket count 
 355          * would be. Mask them out. 
 357         v 
&= ~(~0ull << WQ_THACTIVE_QOS_SHIFT
); 
 360                 curtime 
= mach_absolute_time(); 
 364                 *max_busycount 
= qos 
+ 1; 
 366         for (int i 
= 0; i 
<= qos
; i
++, v 
>>= WQ_THACTIVE_BUCKET_WIDTH
) { 
 367                 active 
= v 
& WQ_THACTIVE_BUCKET_MASK
; 
 369                 if (busycount 
&& wq
->wq_thscheduled_count
[i
] > active
) { 
 370                         if (wq_thread_is_busy(curtime
, &wq
->wq_lastblocked_ts
[i
])) { 
 372                                  * We only consider the last blocked thread for a given bucket 
 373                                  * as busy because we don't want to take the list lock in each 
 374                                  * sched callback. However this is an approximation that could 
 375                                  * contribute to thread creation storms. 
 384 #pragma mark - Process/Thread Setup/Teardown syscalls 
 386 static mach_vm_offset_t
 
 387 stack_addr_hint(proc_t p
, vm_map_t vmap
) 
 389         mach_vm_offset_t stackaddr
; 
 390         mach_vm_offset_t aslr_offset
; 
 391         bool proc64bit 
= proc_is64bit(p
); 
 393         // We can't safely take random values % something unless its a power-of-two 
 394         _Static_assert(powerof2(PTH_DEFAULT_STACKSIZE
), "PTH_DEFAULT_STACKSIZE is a power-of-two"); 
 396 #if defined(__i386__) || defined(__x86_64__) 
 398                 // Matches vm_map_get_max_aslr_slide_pages's image shift in xnu 
 399                 aslr_offset 
= random() % (1 << 28); // about 512 stacks 
 401                 // Actually bigger than the image shift, we've got ~256MB to work with 
 402                 aslr_offset 
= random() % (16 * PTH_DEFAULT_STACKSIZE
); 
 404         aslr_offset 
= vm_map_trunc_page_mask(aslr_offset
, vm_map_page_mask(vmap
)); 
 406                 // Above nanomalloc range (see NANOZONE_SIGNATURE) 
 407                 stackaddr 
= 0x700000000000 + aslr_offset
; 
 409                 stackaddr 
= SHARED_REGION_BASE_I386 
+ SHARED_REGION_SIZE_I386 
+ aslr_offset
; 
 411 #elif defined(__arm__) || defined(__arm64__) 
 412         user_addr_t main_thread_stack_top 
= 0; 
 413         if (pthread_kern
->proc_get_user_stack
) { 
 414                 main_thread_stack_top 
= pthread_kern
->proc_get_user_stack(p
); 
 416         if (proc64bit 
&& main_thread_stack_top
) { 
 417                 // The main thread stack position is randomly slid by xnu (c.f. 
 418                 // load_main() in mach_loader.c), so basing pthread stack allocations 
 419                 // where the main thread stack ends is already ASLRd and doing so 
 420                 // avoids creating a gap in the process address space that may cause 
 421                 // extra PTE memory usage. rdar://problem/33328206 
 422                 stackaddr 
= vm_map_trunc_page_mask((vm_map_offset_t
)main_thread_stack_top
, 
 423                                 vm_map_page_mask(vmap
)); 
 425                 // vm_map_get_max_aslr_slide_pages ensures 1MB of slide, we do better 
 426                 aslr_offset 
= random() % ((proc64bit 
? 4 : 2) * PTH_DEFAULT_STACKSIZE
); 
 427                 aslr_offset 
= vm_map_trunc_page_mask((vm_map_offset_t
)aslr_offset
, 
 428                                 vm_map_page_mask(vmap
)); 
 430                         // 64 stacks below shared region 
 431                         stackaddr 
= SHARED_REGION_BASE_ARM64 
- 64 * PTH_DEFAULT_STACKSIZE 
- aslr_offset
; 
 433                         // If you try to slide down from this point, you risk ending up in memory consumed by malloc 
 434                         stackaddr 
= SHARED_REGION_BASE_ARM 
- 32 * PTH_DEFAULT_STACKSIZE 
+ aslr_offset
; 
 438 #error Need to define a stack address hint for this architecture 
 444  * bsdthread_create system call.  Used by pthread_create. 
 447 _bsdthread_create(struct proc 
*p
, user_addr_t user_func
, user_addr_t user_funcarg
, user_addr_t user_stack
, user_addr_t user_pthread
, uint32_t flags
, user_addr_t 
*retval
) 
 453         mach_vm_offset_t stackaddr
; 
 454         mach_vm_size_t th_allocsize 
= 0; 
 455         mach_vm_size_t th_guardsize
; 
 456         mach_vm_offset_t th_stack
; 
 457         mach_vm_offset_t th_pthread
; 
 458         mach_vm_offset_t th_tsd_base
; 
 459         mach_port_name_t th_thport
; 
 461         vm_map_t vmap 
= pthread_kern
->current_map(); 
 462         task_t ctask 
= current_task(); 
 463         unsigned int policy
, importance
; 
 468         if (pthread_kern
->proc_get_register(p
) == 0) { 
 472         PTHREAD_TRACE(TRACE_pthread_thread_create 
| DBG_FUNC_START
, flags
, 0, 0, 0, 0); 
 474         isLP64 
= proc_is64bit(p
); 
 475         th_guardsize 
= vm_map_page_size(vmap
); 
 477         stackaddr 
= pthread_kern
->proc_get_stack_addr_hint(p
); 
 478         kret 
= pthread_kern
->thread_create(ctask
, &th
); 
 479         if (kret 
!= KERN_SUCCESS
) 
 481         thread_reference(th
); 
 483         pthread_kern
->thread_set_tag(th
, THREAD_TAG_PTHREAD
); 
 485         sright 
= (void *)pthread_kern
->convert_thread_to_port(th
); 
 486         th_thport 
= pthread_kern
->ipc_port_copyout_send(sright
, pthread_kern
->task_get_ipcspace(ctask
)); 
 487         if (!MACH_PORT_VALID(th_thport
)) { 
 488                 error 
= EMFILE
; // userland will convert this into a crash 
 492         if ((flags 
& PTHREAD_START_CUSTOM
) == 0) { 
 493                 mach_vm_size_t pthread_size 
= 
 494                         vm_map_round_page_mask(pthread_kern
->proc_get_pthsize(p
) + PTHREAD_T_OFFSET
, vm_map_page_mask(vmap
)); 
 495                 th_allocsize 
= th_guardsize 
+ user_stack 
+ pthread_size
; 
 496                 user_stack 
+= PTHREAD_T_OFFSET
; 
 498                 kret 
= mach_vm_map(vmap
, &stackaddr
, 
 501                                 VM_MAKE_TAG(VM_MEMORY_STACK
)| VM_FLAGS_ANYWHERE 
, NULL
, 
 502                                 0, FALSE
, VM_PROT_DEFAULT
, VM_PROT_ALL
, 
 504                 if (kret 
!= KERN_SUCCESS
){ 
 505                         kret 
= mach_vm_allocate(vmap
, 
 506                                         &stackaddr
, th_allocsize
, 
 507                                         VM_MAKE_TAG(VM_MEMORY_STACK
)| VM_FLAGS_ANYWHERE
); 
 509                 if (kret 
!= KERN_SUCCESS
) { 
 514                 PTHREAD_TRACE(TRACE_pthread_thread_create
|DBG_FUNC_NONE
, th_allocsize
, stackaddr
, 0, 2, 0); 
 518                  * The guard page is at the lowest address 
 519                  * The stack base is the highest address 
 521                 kret 
= mach_vm_protect(vmap
,  stackaddr
, th_guardsize
, FALSE
, VM_PROT_NONE
); 
 523                 if (kret 
!= KERN_SUCCESS
) { 
 528                 th_pthread 
= stackaddr 
+ th_guardsize 
+ user_stack
; 
 529                 th_stack 
= th_pthread
; 
 532                 * Pre-fault the first page of the new thread's stack and the page that will 
 533                 * contain the pthread_t structure. 
 535                 if (vm_map_trunc_page_mask((vm_map_offset_t
)(th_stack 
- C_64_REDZONE_LEN
), vm_map_page_mask(vmap
)) != 
 536                                 vm_map_trunc_page_mask((vm_map_offset_t
)th_pthread
, vm_map_page_mask(vmap
))){ 
 538                                         vm_map_trunc_page_mask((vm_map_offset_t
)(th_stack 
- C_64_REDZONE_LEN
), vm_map_page_mask(vmap
)), 
 539                                         VM_PROT_READ 
| VM_PROT_WRITE
, 
 541                                         THREAD_UNINT
, NULL
, 0); 
 545                                 vm_map_trunc_page_mask((vm_map_offset_t
)th_pthread
, vm_map_page_mask(vmap
)), 
 546                                 VM_PROT_READ 
| VM_PROT_WRITE
, 
 548                                 THREAD_UNINT
, NULL
, 0); 
 551                 th_stack 
= user_stack
; 
 552                 th_pthread 
= user_pthread
; 
 554                 PTHREAD_TRACE(TRACE_pthread_thread_create
|DBG_FUNC_NONE
, 0, 0, 0, 3, 0); 
 557         tsd_offset 
= pthread_kern
->proc_get_pthread_tsd_offset(p
); 
 559                 th_tsd_base 
= th_pthread 
+ tsd_offset
; 
 560                 kret 
= pthread_kern
->thread_set_tsd_base(th
, th_tsd_base
); 
 561                 if (kret 
== KERN_SUCCESS
) { 
 562                         flags 
|= PTHREAD_START_TSD_BASE_SET
; 
 566 #if defined(__i386__) || defined(__x86_64__) 
 568          * Set up i386 registers & function call. 
 571                 x86_thread_state32_t state 
= { 
 572                         .eip 
= (unsigned int)pthread_kern
->proc_get_threadstart(p
), 
 573                         .eax 
= (unsigned int)th_pthread
, 
 574                         .ebx 
= (unsigned int)th_thport
, 
 575                         .ecx 
= (unsigned int)user_func
, 
 576                         .edx 
= (unsigned int)user_funcarg
, 
 577                         .edi 
= (unsigned int)user_stack
, 
 578                         .esi 
= (unsigned int)flags
, 
 582                         .esp 
= (int)((vm_offset_t
)(th_stack
-C_32_STK_ALIGN
)) 
 585                 error 
= pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)&state
); 
 586                 if (error 
!= KERN_SUCCESS
) { 
 591                 x86_thread_state64_t state64 
= { 
 592                         .rip 
= (uint64_t)pthread_kern
->proc_get_threadstart(p
), 
 593                         .rdi 
= (uint64_t)th_pthread
, 
 594                         .rsi 
= (uint64_t)(th_thport
), 
 595                         .rdx 
= (uint64_t)user_func
, 
 596                         .rcx 
= (uint64_t)user_funcarg
, 
 597                         .r8 
= (uint64_t)user_stack
, 
 598                         .r9 
= (uint64_t)flags
, 
 600                          * set stack pointer aligned to 16 byte boundary 
 602                         .rsp 
= (uint64_t)(th_stack 
- C_64_REDZONE_LEN
) 
 605                 error 
= pthread_kern
->thread_set_wq_state64(th
, (thread_state_t
)&state64
); 
 606                 if (error 
!= KERN_SUCCESS
) { 
 612 #elif defined(__arm__) 
 613         arm_thread_state_t state 
= { 
 614                 .pc 
= (int)pthread_kern
->proc_get_threadstart(p
), 
 615                 .r
[0] = (unsigned int)th_pthread
, 
 616                 .r
[1] = (unsigned int)th_thport
, 
 617                 .r
[2] = (unsigned int)user_func
, 
 618                 .r
[3] = (unsigned int)user_funcarg
, 
 619                 .r
[4] = (unsigned int)user_stack
, 
 620                 .r
[5] = (unsigned int)flags
, 
 622                 /* Set r7 & lr to 0 for better back tracing */ 
 629                 .sp 
= (int)((vm_offset_t
)(th_stack
-C_32_STK_ALIGN
)) 
 632         (void) pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)&state
); 
 635 #error bsdthread_create  not defined for this architecture 
 638         if ((flags 
& PTHREAD_START_SETSCHED
) != 0) { 
 639                 /* Set scheduling parameters if needed */ 
 640                 thread_extended_policy_data_t    extinfo
; 
 641                 thread_precedence_policy_data_t   precedinfo
; 
 643                 importance 
= (flags 
& PTHREAD_START_IMPORTANCE_MASK
); 
 644                 policy 
= (flags 
>> PTHREAD_START_POLICY_BITSHIFT
) & PTHREAD_START_POLICY_MASK
; 
 646                 if (policy 
== SCHED_OTHER
) { 
 647                         extinfo
.timeshare 
= 1; 
 649                         extinfo
.timeshare 
= 0; 
 652                 thread_policy_set(th
, THREAD_EXTENDED_POLICY
, (thread_policy_t
)&extinfo
, THREAD_EXTENDED_POLICY_COUNT
); 
 654                 precedinfo
.importance 
= (importance 
- BASEPRI_DEFAULT
); 
 655                 thread_policy_set(th
, THREAD_PRECEDENCE_POLICY
, (thread_policy_t
)&precedinfo
, THREAD_PRECEDENCE_POLICY_COUNT
); 
 656         } else if ((flags 
& PTHREAD_START_QOSCLASS
) != 0) { 
 657                 /* Set thread QoS class if requested. */ 
 658                 pthread_priority_t priority 
= (pthread_priority_t
)(flags 
& PTHREAD_START_QOSCLASS_MASK
); 
 660                 thread_qos_policy_data_t qos
; 
 661                 qos
.qos_tier 
= pthread_priority_get_thread_qos(priority
); 
 662                 qos
.tier_importance 
= (qos
.qos_tier 
== QOS_CLASS_UNSPECIFIED
) ? 0 : 
 663                                 _pthread_priority_get_relpri(priority
); 
 665                 pthread_kern
->thread_policy_set_internal(th
, THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, THREAD_QOS_POLICY_COUNT
); 
 668         if (pthread_kern
->proc_get_mach_thread_self_tsd_offset
) { 
 669                 uint64_t mach_thread_self_offset 
= 
 670                                 pthread_kern
->proc_get_mach_thread_self_tsd_offset(p
); 
 671                 if (mach_thread_self_offset 
&& tsd_offset
) { 
 672                         bool proc64bit 
= proc_is64bit(p
); 
 674                                 uint64_t th_thport_tsd 
= (uint64_t)th_thport
; 
 675                                 error 
= copyout(&th_thport_tsd
, th_pthread 
+ tsd_offset 
+ 
 676                                                 mach_thread_self_offset
, sizeof(th_thport_tsd
)); 
 678                                 uint32_t th_thport_tsd 
= (uint32_t)th_thport
; 
 679                                 error 
= copyout(&th_thport_tsd
, th_pthread 
+ tsd_offset 
+ 
 680                                                 mach_thread_self_offset
, sizeof(th_thport_tsd
)); 
 688         kret 
= pthread_kern
->thread_resume(th
); 
 689         if (kret 
!= KERN_SUCCESS
) { 
 693         thread_deallocate(th
);  /* drop the creator reference */ 
 695         PTHREAD_TRACE(TRACE_pthread_thread_create
|DBG_FUNC_END
, error
, th_pthread
, 0, 0, 0); 
 697         // cast required as mach_vm_offset_t is always 64 bits even on 32-bit platforms 
 698         *retval 
= (user_addr_t
)th_pthread
; 
 703         if (allocated 
!= 0) { 
 704                 (void)mach_vm_deallocate(vmap
, stackaddr
, th_allocsize
); 
 707         (void)pthread_kern
->mach_port_deallocate(pthread_kern
->task_get_ipcspace(ctask
), th_thport
); 
 708         if (pthread_kern
->thread_will_park_or_terminate
) { 
 709                 pthread_kern
->thread_will_park_or_terminate(th
); 
 711         (void)thread_terminate(th
); 
 712         (void)thread_deallocate(th
); 
 717  * bsdthread_terminate system call.  Used by pthread_terminate 
 720 _bsdthread_terminate(__unused 
struct proc 
*p
, 
 721                      user_addr_t stackaddr
, 
 725                      __unused 
int32_t *retval
) 
 727         mach_vm_offset_t freeaddr
; 
 728         mach_vm_size_t freesize
; 
 730         thread_t th 
= current_thread(); 
 732         freeaddr 
= (mach_vm_offset_t
)stackaddr
; 
 735         PTHREAD_TRACE(TRACE_pthread_thread_terminate
|DBG_FUNC_START
, freeaddr
, freesize
, kthport
, 0xff, 0); 
 737         if ((freesize 
!= (mach_vm_size_t
)0) && (freeaddr 
!= (mach_vm_offset_t
)0)) { 
 738                 if (pthread_kern
->thread_get_tag(th
) & THREAD_TAG_MAINTHREAD
){ 
 739                         vm_map_t user_map 
= pthread_kern
->current_map(); 
 740                         freesize 
= vm_map_trunc_page_mask((vm_map_offset_t
)freesize 
- 1, vm_map_page_mask(user_map
)); 
 741                         kret 
= mach_vm_behavior_set(user_map
, freeaddr
, freesize
, VM_BEHAVIOR_REUSABLE
); 
 742                         assert(kret 
== KERN_SUCCESS 
|| kret 
== KERN_INVALID_ADDRESS
); 
 743                         kret 
= kret 
? kret 
: mach_vm_protect(user_map
, freeaddr
, freesize
, FALSE
, VM_PROT_NONE
); 
 744                         assert(kret 
== KERN_SUCCESS 
|| kret 
== KERN_INVALID_ADDRESS
); 
 746                         kret 
= mach_vm_deallocate(pthread_kern
->current_map(), freeaddr
, freesize
); 
 747                         if (kret 
!= KERN_SUCCESS
) { 
 748                                 PTHREAD_TRACE(TRACE_pthread_thread_terminate
|DBG_FUNC_END
, kret
, 0, 0, 0, 0); 
 754         if (pthread_kern
->thread_will_park_or_terminate
) { 
 755                 pthread_kern
->thread_will_park_or_terminate(th
); 
 757         (void)thread_terminate(th
); 
 758         if (sem 
!= MACH_PORT_NULL
) { 
 759                  kret 
= pthread_kern
->semaphore_signal_internal_trap(sem
); 
 760                 if (kret 
!= KERN_SUCCESS
) { 
 761                         PTHREAD_TRACE(TRACE_pthread_thread_terminate
|DBG_FUNC_END
, kret
, 0, 0, 0, 0); 
 766         if (kthport 
!= MACH_PORT_NULL
) { 
 767                 pthread_kern
->mach_port_deallocate(pthread_kern
->task_get_ipcspace(current_task()), kthport
); 
 770         PTHREAD_TRACE(TRACE_pthread_thread_terminate
|DBG_FUNC_END
, 0, 0, 0, 0, 0); 
 772         pthread_kern
->thread_exception_return(); 
 773         panic("bsdthread_terminate: still running\n"); 
 775         PTHREAD_TRACE(TRACE_pthread_thread_terminate
|DBG_FUNC_END
, 0, 0xff, 0, 0, 0); 
 781  * bsdthread_register system call.  Performs per-process setup.  Responsible for 
 782  * returning capabilitiy bits to userspace and receiving userspace function addresses. 
 785 _bsdthread_register(struct proc 
*p
, 
 786                     user_addr_t threadstart
, 
 787                     user_addr_t wqthread
, 
 789                     user_addr_t pthread_init_data
, 
 790                     user_addr_t pthread_init_data_size
, 
 791                     uint64_t dispatchqueue_offset
, 
 794         struct _pthread_registration_data data 
= {}; 
 795         uint32_t max_tsd_offset
; 
 797         size_t pthread_init_sz 
= 0; 
 799         /* syscall randomizer test can pass bogus values */ 
 800         if (pthsize 
< 0 || pthsize 
> MAX_PTHREAD_SIZE
) { 
 804          * if we have pthread_init_data, then we use that and target_concptr 
 805          * (which is an offset) get data. 
 807         if (pthread_init_data 
!= 0) { 
 808                 if (pthread_init_data_size 
< sizeof(data
.version
)) { 
 811                 pthread_init_sz 
= MIN(sizeof(data
), (size_t)pthread_init_data_size
); 
 812                 int ret 
= copyin(pthread_init_data
, &data
, pthread_init_sz
); 
 816                 if (data
.version 
!= (size_t)pthread_init_data_size
) { 
 820                 data
.dispatch_queue_offset 
= dispatchqueue_offset
; 
 823         /* We have to do this before proc_get_register so that it resets after fork */ 
 824         mach_vm_offset_t stackaddr 
= stack_addr_hint(p
, pthread_kern
->current_map()); 
 825         pthread_kern
->proc_set_stack_addr_hint(p
, (user_addr_t
)stackaddr
); 
 827         /* prevent multiple registrations */ 
 828         if (pthread_kern
->proc_get_register(p
) != 0) { 
 832         pthread_kern
->proc_set_threadstart(p
, threadstart
); 
 833         pthread_kern
->proc_set_wqthread(p
, wqthread
); 
 834         pthread_kern
->proc_set_pthsize(p
, pthsize
); 
 835         pthread_kern
->proc_set_register(p
); 
 837         uint32_t tsd_slot_sz 
= proc_is64bit(p
) ? sizeof(uint64_t) : sizeof(uint32_t); 
 838         if ((uint32_t)pthsize 
>= tsd_slot_sz 
&& 
 839                         data
.tsd_offset 
<= (uint32_t)(pthsize 
- tsd_slot_sz
)) { 
 840                 max_tsd_offset 
= ((uint32_t)pthsize 
- data
.tsd_offset 
- tsd_slot_sz
); 
 845         pthread_kern
->proc_set_pthread_tsd_offset(p
, data
.tsd_offset
); 
 847         if (data
.dispatch_queue_offset 
> max_tsd_offset
) { 
 848                 data
.dispatch_queue_offset 
= 0; 
 850         pthread_kern
->proc_set_dispatchqueue_offset(p
, data
.dispatch_queue_offset
); 
 852         if (pthread_kern
->proc_set_return_to_kernel_offset
) { 
 853                 if (data
.return_to_kernel_offset 
> max_tsd_offset
) { 
 854                         data
.return_to_kernel_offset 
= 0; 
 856                 pthread_kern
->proc_set_return_to_kernel_offset(p
, 
 857                                 data
.return_to_kernel_offset
); 
 860         if (pthread_kern
->proc_set_mach_thread_self_tsd_offset
) { 
 861                 if (data
.mach_thread_self_offset 
> max_tsd_offset
) { 
 862                         data
.mach_thread_self_offset 
= 0; 
 864                 pthread_kern
->proc_set_mach_thread_self_tsd_offset(p
, 
 865                                 data
.mach_thread_self_offset
); 
 868         if (pthread_init_data 
!= 0) { 
 869                 /* Outgoing data that userspace expects as a reply */ 
 870                 data
.version 
= sizeof(struct _pthread_registration_data
); 
 871                 if (pthread_kern
->qos_main_thread_active()) { 
 872                         mach_msg_type_number_t nqos 
= THREAD_QOS_POLICY_COUNT
; 
 873                         thread_qos_policy_data_t qos
; 
 874                         boolean_t gd 
= FALSE
; 
 876                         kr 
= pthread_kern
->thread_policy_get(current_thread(), THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, &nqos
, &gd
); 
 877                         if (kr 
!= KERN_SUCCESS 
|| qos
.qos_tier 
== THREAD_QOS_UNSPECIFIED
) { 
 878                                 /* Unspecified threads means the kernel wants us to impose legacy upon the thread. */ 
 879                                 qos
.qos_tier 
= THREAD_QOS_LEGACY
; 
 880                                 qos
.tier_importance 
= 0; 
 882                                 kr 
= pthread_kern
->thread_policy_set_internal(current_thread(), THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, THREAD_QOS_POLICY_COUNT
); 
 885                         if (kr 
== KERN_SUCCESS
) { 
 886                                 data
.main_qos 
= thread_qos_get_pthread_priority(qos
.qos_tier
); 
 888                                 data
.main_qos 
= _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED
, 0, 0); 
 891                         data
.main_qos 
= _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED
, 0, 0); 
 894                 kr 
= copyout(&data
, pthread_init_data
, pthread_init_sz
); 
 895                 if (kr 
!= KERN_SUCCESS
) { 
 900         /* return the supported feature set as the return value. */ 
 901         *retval 
= PTHREAD_FEATURE_SUPPORTED
; 
 906 #pragma mark - QoS Manipulation 
 909 _bsdthread_ctl_set_qos(struct proc 
*p
, user_addr_t __unused cmd
, mach_port_name_t kport
, user_addr_t tsd_priority_addr
, user_addr_t arg3
, int *retval
) 
 914         pthread_priority_t priority
; 
 916         /* Unused parameters must be zero. */ 
 921         /* QoS is stored in a given slot in the pthread TSD. We need to copy that in and set our QoS based on it. */ 
 922         if (proc_is64bit(p
)) { 
 924                 rv 
= copyin(tsd_priority_addr
, &v
, sizeof(v
)); 
 926                 priority 
= (int)(v 
& 0xffffffff); 
 929                 rv 
= copyin(tsd_priority_addr
, &v
, sizeof(v
)); 
 934         if ((th 
= port_name_to_thread(kport
)) == THREAD_NULL
) { 
 938         /* <rdar://problem/16211829> Disable pthread_set_qos_class_np() on threads other than pthread_self */ 
 939         if (th 
!= current_thread()) { 
 940                 thread_deallocate(th
); 
 944         rv 
= _bsdthread_ctl_set_self(p
, 0, priority
, 0, _PTHREAD_SET_SELF_QOS_FLAG
, retval
); 
 946         /* Static param the thread, we just set QoS on it, so its stuck in QoS land now. */ 
 947         /* pthread_kern->thread_static_param(th, TRUE); */ // see <rdar://problem/16433744>, for details 
 949         thread_deallocate(th
); 
 955 static inline struct threadlist 
* 
 956 util_get_thread_threadlist_entry(thread_t th
) 
 958         struct uthread 
*uth 
= pthread_kern
->get_bsdthread_info(th
); 
 960                 struct threadlist 
*tl 
= pthread_kern
->uthread_get_threadlist(uth
); 
 967 _workq_thread_has_been_unbound(thread_t th
, int qos_class
) 
 969         struct threadlist 
*tl 
= util_get_thread_threadlist_entry(th
); 
 974         struct workqueue 
*wq 
= tl
->th_workq
; 
 975         workqueue_lock_spin(wq
); 
 977         if (tl
->th_priority 
== WORKQUEUE_EVENT_MANAGER_BUCKET
) { 
 979         } else if (qos_class 
!= class_index_get_thread_qos(tl
->th_priority
)) { 
 983         if ((tl
->th_flags 
& TH_LIST_KEVENT_BOUND
)){ 
 986         tl
->th_flags 
&= ~TH_LIST_KEVENT_BOUND
; 
 988         workqueue_unlock(wq
); 
 992         workqueue_unlock(wq
); 
 997 _bsdthread_ctl_set_self(struct proc 
*p
, user_addr_t __unused cmd
, pthread_priority_t priority
, mach_port_name_t voucher
, _pthread_set_flags_t flags
, int __unused 
*retval
) 
 999         thread_qos_policy_data_t qos
; 
1000         mach_msg_type_number_t nqos 
= THREAD_QOS_POLICY_COUNT
; 
1001         boolean_t gd 
= FALSE
; 
1002         thread_t th 
= current_thread(); 
1003         struct workqueue 
*wq 
= NULL
; 
1004         struct threadlist 
*tl 
= NULL
; 
1007         int qos_rv 
= 0, voucher_rv 
= 0, fixedpri_rv 
= 0; 
1009         if ((flags 
& _PTHREAD_SET_SELF_WQ_KEVENT_UNBIND
) != 0) { 
1010                 tl 
= util_get_thread_threadlist_entry(th
); 
1017                 workqueue_lock_spin(wq
); 
1018                 if (tl
->th_flags 
& TH_LIST_KEVENT_BOUND
) { 
1019                         tl
->th_flags 
&= ~TH_LIST_KEVENT_BOUND
; 
1020                         unsigned int kevent_flags 
= KEVENT_FLAG_WORKQ 
| KEVENT_FLAG_UNBIND_CHECK_FLAGS
; 
1021                         if (tl
->th_priority 
== WORKQUEUE_EVENT_MANAGER_BUCKET
) { 
1022                                 kevent_flags 
|= KEVENT_FLAG_WORKQ_MANAGER
; 
1025                         workqueue_unlock(wq
); 
1026                         __assert_only 
int ret 
= kevent_qos_internal_unbind(p
, class_index_get_thread_qos(tl
->th_priority
), th
, kevent_flags
); 
1029                         workqueue_unlock(wq
); 
1034         if ((flags 
& _PTHREAD_SET_SELF_QOS_FLAG
) != 0) { 
1035                 kr 
= pthread_kern
->thread_policy_get(th
, THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, &nqos
, &gd
); 
1036                 if (kr 
!= KERN_SUCCESS
) { 
1042                  * If we have main-thread QoS then we don't allow a thread to come out 
1043                  * of QOS_CLASS_UNSPECIFIED. 
1045                 if (pthread_kern
->qos_main_thread_active() && qos
.qos_tier 
== 
1046                                 THREAD_QOS_UNSPECIFIED
) { 
1052                         tl 
= util_get_thread_threadlist_entry(th
); 
1053                         if (tl
) wq 
= tl
->th_workq
; 
1056                 PTHREAD_TRACE_WQ(TRACE_pthread_set_qos_self 
| DBG_FUNC_START
, wq
, qos
.qos_tier
, qos
.tier_importance
, 0, 0); 
1058                 qos
.qos_tier 
= pthread_priority_get_thread_qos(priority
); 
1059                 qos
.tier_importance 
= (qos
.qos_tier 
== QOS_CLASS_UNSPECIFIED
) ? 0 : _pthread_priority_get_relpri(priority
); 
1061                 if (qos
.qos_tier 
== QOS_CLASS_UNSPECIFIED 
|| 
1062                                 qos
.tier_importance 
> 0 || qos
.tier_importance 
< THREAD_QOS_MIN_TIER_IMPORTANCE
) { 
1068                  * If we're a workqueue, the threadlist item priority needs adjusting, 
1069                  * along with the bucket we were running in. 
1072                         bool try_run_threadreq 
= false; 
1074                         workqueue_lock_spin(wq
); 
1075                         kr 
= pthread_kern
->thread_set_workq_qos(th
, qos
.qos_tier
, qos
.tier_importance
); 
1076                         assert(kr 
== KERN_SUCCESS 
|| kr 
== KERN_TERMINATED
); 
1078                         /* Fix up counters. */ 
1079                         uint8_t old_bucket 
= tl
->th_priority
; 
1080                         uint8_t new_bucket 
= pthread_priority_get_class_index(priority
); 
1082                         if (old_bucket 
!= new_bucket
) { 
1083                                 _wq_thactive_move(wq
, old_bucket
, new_bucket
); 
1084                                 wq
->wq_thscheduled_count
[old_bucket
]--; 
1085                                 wq
->wq_thscheduled_count
[new_bucket
]++; 
1086                                 if (old_bucket 
== WORKQUEUE_EVENT_MANAGER_BUCKET 
|| 
1087                                                 old_bucket 
< new_bucket
) { 
1089                                          * if the QoS of the thread was lowered, then this could 
1090                                          * allow for a higher QoS thread request to run, so we need 
1093                                         try_run_threadreq 
= true; 
1095                                 tl
->th_priority 
= new_bucket
; 
1098                         bool old_overcommit 
= !(tl
->th_flags 
& TH_LIST_CONSTRAINED
); 
1099                         bool new_overcommit 
= priority 
& _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
; 
1100                         if (!old_overcommit 
&& new_overcommit
) { 
1101                                 if (wq
->wq_constrained_threads_scheduled
-- == 
1102                                                 wq_max_constrained_threads
) { 
1103                                         try_run_threadreq 
= true; 
1105                                 tl
->th_flags 
&= ~TH_LIST_CONSTRAINED
; 
1106                         } else if (old_overcommit 
&& !new_overcommit
) { 
1107                                 wq
->wq_constrained_threads_scheduled
++; 
1108                                 tl
->th_flags 
|= TH_LIST_CONSTRAINED
; 
1111                         if (try_run_threadreq
) { 
1112                                 workqueue_run_threadreq_and_unlock(p
, wq
, NULL
, NULL
, true); 
1114                                 workqueue_unlock(wq
); 
1117                         kr 
= pthread_kern
->thread_policy_set_internal(th
, THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, THREAD_QOS_POLICY_COUNT
); 
1118                         if (kr 
!= KERN_SUCCESS
) { 
1123                 PTHREAD_TRACE_WQ(TRACE_pthread_set_qos_self 
| DBG_FUNC_END
, wq
, qos
.qos_tier
, qos
.tier_importance
, 0, 0); 
1127         if ((flags 
& _PTHREAD_SET_SELF_VOUCHER_FLAG
) != 0) { 
1128                 kr 
= pthread_kern
->thread_set_voucher_name(voucher
); 
1129                 if (kr 
!= KERN_SUCCESS
) { 
1130                         voucher_rv 
= ENOENT
; 
1136         if (qos_rv
) goto done
; 
1137         if ((flags 
& _PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG
) != 0) { 
1138                 thread_extended_policy_data_t extpol 
= {.timeshare 
= 0}; 
1140                 if (!tl
) tl  
= util_get_thread_threadlist_entry(th
); 
1142                         /* Not allowed on workqueue threads */ 
1143                         fixedpri_rv 
= ENOTSUP
; 
1147                 kr 
= pthread_kern
->thread_policy_set_internal(th
, THREAD_EXTENDED_POLICY
, (thread_policy_t
)&extpol
, THREAD_EXTENDED_POLICY_COUNT
); 
1148                 if (kr 
!= KERN_SUCCESS
) { 
1149                         fixedpri_rv 
= EINVAL
; 
1152         } else if ((flags 
& _PTHREAD_SET_SELF_TIMESHARE_FLAG
) != 0) { 
1153                 thread_extended_policy_data_t extpol 
= {.timeshare 
= 1}; 
1155                 if (!tl
) tl 
= util_get_thread_threadlist_entry(th
); 
1157                         /* Not allowed on workqueue threads */ 
1158                         fixedpri_rv 
= ENOTSUP
; 
1162                 kr 
= pthread_kern
->thread_policy_set_internal(th
, THREAD_EXTENDED_POLICY
, (thread_policy_t
)&extpol
, THREAD_EXTENDED_POLICY_COUNT
); 
1163                 if (kr 
!= KERN_SUCCESS
) { 
1164                         fixedpri_rv 
= EINVAL
; 
1170         if (qos_rv 
&& voucher_rv
) { 
1171                 /* Both failed, give that a unique error. */ 
1191 _bsdthread_ctl_qos_override_start(struct proc __unused 
*p
, user_addr_t __unused cmd
, mach_port_name_t kport
, pthread_priority_t priority
, user_addr_t resource
, int __unused 
*retval
) 
1196         if ((th 
= port_name_to_thread(kport
)) == THREAD_NULL
) { 
1200         int override_qos 
= pthread_priority_get_thread_qos(priority
); 
1202         struct threadlist 
*tl 
= util_get_thread_threadlist_entry(th
); 
1204                 PTHREAD_TRACE_WQ(TRACE_wq_override_start 
| DBG_FUNC_NONE
, tl
->th_workq
, thread_tid(th
), 1, priority
, 0); 
1207         /* The only failure case here is if we pass a tid and have it lookup the thread, we pass the uthread, so this all always succeeds. */ 
1208         pthread_kern
->proc_usynch_thread_qos_add_override_for_resource_check_owner(th
, override_qos
, TRUE
, 
1209                         resource
, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE
, USER_ADDR_NULL
, MACH_PORT_NULL
); 
1210         thread_deallocate(th
); 
1215 _bsdthread_ctl_qos_override_end(struct proc __unused 
*p
, user_addr_t __unused cmd
, mach_port_name_t kport
, user_addr_t resource
, user_addr_t arg3
, int __unused 
*retval
) 
1224         if ((th 
= port_name_to_thread(kport
)) == THREAD_NULL
) { 
1228         struct uthread 
*uth 
= pthread_kern
->get_bsdthread_info(th
); 
1230         struct threadlist 
*tl 
= util_get_thread_threadlist_entry(th
); 
1232                 PTHREAD_TRACE_WQ(TRACE_wq_override_end 
| DBG_FUNC_NONE
, tl
->th_workq
, thread_tid(th
), 0, 0, 0); 
1235         pthread_kern
->proc_usynch_thread_qos_remove_override_for_resource(current_task(), uth
, 0, resource
, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE
); 
1237         thread_deallocate(th
); 
1242 _bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(mach_port_name_t kport
, pthread_priority_t priority
, user_addr_t resource
, user_addr_t ulock_addr
) 
1247         if ((th 
= port_name_to_thread(kport
)) == THREAD_NULL
) { 
1251         int override_qos 
= pthread_priority_get_thread_qos(priority
); 
1253         struct threadlist 
*tl 
= util_get_thread_threadlist_entry(th
); 
1255                 thread_deallocate(th
); 
1259         PTHREAD_TRACE_WQ(TRACE_wq_override_dispatch 
| DBG_FUNC_NONE
, tl
->th_workq
, thread_tid(th
), 1, priority
, 0); 
1261         rv 
= pthread_kern
->proc_usynch_thread_qos_add_override_for_resource_check_owner(th
, override_qos
, TRUE
, 
1262                         resource
, THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE
, ulock_addr
, kport
); 
1264         thread_deallocate(th
); 
1268 int _bsdthread_ctl_qos_dispatch_asynchronous_override_add(struct proc __unused 
*p
, user_addr_t __unused cmd
, 
1269                 mach_port_name_t kport
, pthread_priority_t priority
, user_addr_t resource
, int __unused 
*retval
) 
1271         return _bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(kport
, priority
, resource
, USER_ADDR_NULL
); 
1275 _bsdthread_ctl_qos_override_dispatch(struct proc 
*p __unused
, user_addr_t cmd __unused
, mach_port_name_t kport
, pthread_priority_t priority
, user_addr_t ulock_addr
, int __unused 
*retval
) 
1277         return _bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(kport
, priority
, USER_ADDR_NULL
, ulock_addr
); 
1281 _bsdthread_ctl_qos_override_reset(struct proc 
*p
, user_addr_t cmd
, user_addr_t arg1
, user_addr_t arg2
, user_addr_t arg3
, int *retval
) 
1283         if (arg1 
!= 0 || arg2 
!= 0 || arg3 
!= 0) { 
1287         return _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(p
, cmd
, 1 /* reset_all */, 0, 0, retval
); 
1291 _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(struct proc __unused 
*p
, user_addr_t __unused cmd
, int reset_all
, user_addr_t resource
, user_addr_t arg3
, int __unused 
*retval
) 
1293         if ((reset_all 
&& (resource 
!= 0)) || arg3 
!= 0) { 
1297         thread_t th 
= current_thread(); 
1298         struct uthread 
*uth 
= pthread_kern
->get_bsdthread_info(th
); 
1299         struct threadlist 
*tl 
= pthread_kern
->uthread_get_threadlist(uth
); 
1305         PTHREAD_TRACE_WQ(TRACE_wq_override_reset 
| DBG_FUNC_NONE
, tl
->th_workq
, 0, 0, 0, 0); 
1307         resource 
= reset_all 
? THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD 
: resource
; 
1308         pthread_kern
->proc_usynch_thread_qos_reset_override_for_resource(current_task(), uth
, 0, resource
, THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE
); 
1314 _bsdthread_ctl_max_parallelism(struct proc __unused 
*p
, user_addr_t __unused cmd
, 
1315                 int qos
, unsigned long flags
, int *retval
) 
1317         _Static_assert(QOS_PARALLELISM_COUNT_LOGICAL 
== 
1318                         _PTHREAD_QOS_PARALLELISM_COUNT_LOGICAL
, "logical"); 
1319         _Static_assert(QOS_PARALLELISM_REALTIME 
== 
1320                         _PTHREAD_QOS_PARALLELISM_REALTIME
, "realtime"); 
1322         if (flags 
& ~(QOS_PARALLELISM_REALTIME 
| QOS_PARALLELISM_COUNT_LOGICAL
)) { 
1326         if (flags 
& QOS_PARALLELISM_REALTIME
) { 
1330         } else if (qos 
== THREAD_QOS_UNSPECIFIED 
|| qos 
>= THREAD_QOS_LAST
) { 
1334         *retval 
= pthread_kern
->qos_max_parallelism(qos
, flags
); 
1339 _bsdthread_ctl(struct proc 
*p
, user_addr_t cmd
, user_addr_t arg1
, user_addr_t arg2
, user_addr_t arg3
, int *retval
) 
1342         case BSDTHREAD_CTL_SET_QOS
: 
1343                 return _bsdthread_ctl_set_qos(p
, cmd
, (mach_port_name_t
)arg1
, arg2
, arg3
, retval
); 
1344         case BSDTHREAD_CTL_QOS_OVERRIDE_START
: 
1345                 return _bsdthread_ctl_qos_override_start(p
, cmd
, (mach_port_name_t
)arg1
, (pthread_priority_t
)arg2
, arg3
, retval
); 
1346         case BSDTHREAD_CTL_QOS_OVERRIDE_END
: 
1347                 return _bsdthread_ctl_qos_override_end(p
, cmd
, (mach_port_name_t
)arg1
, arg2
, arg3
, retval
); 
1348         case BSDTHREAD_CTL_QOS_OVERRIDE_RESET
: 
1349                 return _bsdthread_ctl_qos_override_reset(p
, cmd
, arg1
, arg2
, arg3
, retval
); 
1350         case BSDTHREAD_CTL_QOS_OVERRIDE_DISPATCH
: 
1351                 return _bsdthread_ctl_qos_override_dispatch(p
, cmd
, (mach_port_name_t
)arg1
, (pthread_priority_t
)arg2
, arg3
, retval
); 
1352         case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_ADD
: 
1353                 return _bsdthread_ctl_qos_dispatch_asynchronous_override_add(p
, cmd
, (mach_port_name_t
)arg1
, (pthread_priority_t
)arg2
, arg3
, retval
); 
1354         case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_RESET
: 
1355                 return _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(p
, cmd
, (int)arg1
, arg2
, arg3
, retval
); 
1356         case BSDTHREAD_CTL_SET_SELF
: 
1357                 return _bsdthread_ctl_set_self(p
, cmd
, (pthread_priority_t
)arg1
, (mach_port_name_t
)arg2
, (_pthread_set_flags_t
)arg3
, retval
); 
1358         case BSDTHREAD_CTL_QOS_MAX_PARALLELISM
: 
1359                 return _bsdthread_ctl_max_parallelism(p
, cmd
, (int)arg1
, (unsigned long)arg2
, retval
); 
1365 #pragma mark - Workqueue Implementation 
1367 #pragma mark wq_flags 
1369 static inline uint32_t 
1370 _wq_flags(struct workqueue 
*wq
) 
1372         return atomic_load_explicit(&wq
->wq_flags
, memory_order_relaxed
); 
1376 _wq_exiting(struct workqueue 
*wq
) 
1378         return _wq_flags(wq
) & WQ_EXITING
; 
1381 static inline uint32_t 
1382 _wq_flags_or_orig(struct workqueue 
*wq
, uint32_t v
) 
1384 #if PTHREAD_INLINE_RMW_ATOMICS 
1387                 state 
= _wq_flags(wq
); 
1388         } while (!OSCompareAndSwap(state
, state 
| v
, &wq
->wq_flags
)); 
1391         return atomic_fetch_or_explicit(&wq
->wq_flags
, v
, memory_order_relaxed
); 
1395 static inline uint32_t 
1396 _wq_flags_and_orig(struct workqueue 
*wq
, uint32_t v
) 
1398 #if PTHREAD_INLINE_RMW_ATOMICS 
1401                 state 
= _wq_flags(wq
); 
1402         } while (!OSCompareAndSwap(state
, state 
& v
, &wq
->wq_flags
)); 
1405         return atomic_fetch_and_explicit(&wq
->wq_flags
, v
, memory_order_relaxed
); 
1410 WQ_TIMER_DELAYED_NEEDED(struct workqueue 
*wq
) 
1412         uint32_t oldflags
, newflags
; 
1414                 oldflags 
= _wq_flags(wq
); 
1415                 if (oldflags 
& (WQ_EXITING 
| WQ_ATIMER_DELAYED_RUNNING
)) { 
1418                 newflags 
= oldflags 
| WQ_ATIMER_DELAYED_RUNNING
; 
1419         } while (!OSCompareAndSwap(oldflags
, newflags
, &wq
->wq_flags
)); 
1424 WQ_TIMER_IMMEDIATE_NEEDED(struct workqueue 
*wq
) 
1426         uint32_t oldflags
, newflags
; 
1428                 oldflags 
= _wq_flags(wq
); 
1429                 if (oldflags 
& (WQ_EXITING 
| WQ_ATIMER_IMMEDIATE_RUNNING
)) { 
1432                 newflags 
= oldflags 
| WQ_ATIMER_IMMEDIATE_RUNNING
; 
1433         } while (!OSCompareAndSwap(oldflags
, newflags
, &wq
->wq_flags
)); 
1437 #pragma mark thread requests pacing 
1439 static inline uint32_t 
1440 _wq_pacing_shift_for_pri(int pri
) 
1442         return _wq_bucket_to_thread_qos(pri
) - 1; 
1446 _wq_highest_paced_priority(struct workqueue 
*wq
) 
1448         uint8_t paced 
= wq
->wq_paced
; 
1449         int msb 
= paced 
? 32 - __builtin_clz(paced
) : 0; // fls(paced) == bit + 1 
1450         return WORKQUEUE_EVENT_MANAGER_BUCKET 
- msb
; 
1453 static inline uint8_t 
1454 _wq_pacing_bit_for_pri(int pri
) 
1456         return 1u << _wq_pacing_shift_for_pri(pri
); 
1460 _wq_should_pace_priority(struct workqueue 
*wq
, int pri
) 
1462         return wq
->wq_paced 
>= _wq_pacing_bit_for_pri(pri
); 
1466 _wq_pacing_start(struct workqueue 
*wq
, struct threadlist 
*tl
) 
1468         uint8_t bit 
= _wq_pacing_bit_for_pri(tl
->th_priority
); 
1469         assert((tl
->th_flags 
& TH_LIST_PACING
) == 0); 
1470         assert((wq
->wq_paced 
& bit
) == 0); 
1471         wq
->wq_paced 
|= bit
; 
1472         tl
->th_flags 
|= TH_LIST_PACING
; 
1476 _wq_pacing_end(struct workqueue 
*wq
, struct threadlist 
*tl
) 
1478         if (tl
->th_flags 
& TH_LIST_PACING
) { 
1479                 uint8_t bit 
= _wq_pacing_bit_for_pri(tl
->th_priority
); 
1480                 assert((wq
->wq_paced 
& bit
) != 0); 
1481                 wq
->wq_paced 
^= bit
; 
1482                 tl
->th_flags 
&= ~TH_LIST_PACING
; 
1483                 return wq
->wq_paced 
< bit
; // !_wq_should_pace_priority 
1488 #pragma mark thread requests 
1491 _threadreq_init_alloced(struct threadreq 
*req
, int priority
, int flags
) 
1493         assert((flags 
& TR_FLAG_ONSTACK
) == 0); 
1494         req
->tr_state 
= TR_STATE_NEW
; 
1495         req
->tr_priority 
= priority
; 
1496         req
->tr_flags 
= flags
; 
1500 _threadreq_init_stack(struct threadreq 
*req
, int priority
, int flags
) 
1502         req
->tr_state 
= TR_STATE_NEW
; 
1503         req
->tr_priority 
= priority
; 
1504         req
->tr_flags 
= flags 
| TR_FLAG_ONSTACK
; 
1508 _threadreq_copy_prepare(struct workqueue 
*wq
) 
1511         if (wq
->wq_cached_threadreq
) { 
1515         workqueue_unlock(wq
); 
1516         struct threadreq 
*req 
= zalloc(pthread_zone_threadreq
); 
1517         workqueue_lock_spin(wq
); 
1519         if (wq
->wq_cached_threadreq
) { 
1521                  * We lost the race and someone left behind an extra threadreq for us 
1522                  * to use.  Throw away our request and retry. 
1524                 workqueue_unlock(wq
); 
1525                 zfree(pthread_zone_threadreq
, req
); 
1526                 workqueue_lock_spin(wq
); 
1529                 wq
->wq_cached_threadreq 
= req
; 
1532         assert(wq
->wq_cached_threadreq
); 
1536 _threadreq_copy_prepare_noblock(struct workqueue 
*wq
) 
1538         if (wq
->wq_cached_threadreq
) { 
1542         wq
->wq_cached_threadreq 
= zalloc_noblock(pthread_zone_threadreq
); 
1544         return wq
->wq_cached_threadreq 
!= NULL
; 
1547 static inline struct threadreq_head 
* 
1548 _threadreq_list_for_req(struct workqueue 
*wq
, const struct threadreq 
*req
) 
1550         if (req
->tr_flags 
& TR_FLAG_OVERCOMMIT
) { 
1551                 return &wq
->wq_overcommit_reqlist
[req
->tr_priority
]; 
1553                 return &wq
->wq_reqlist
[req
->tr_priority
]; 
1558 _threadreq_enqueue(struct workqueue 
*wq
, struct threadreq 
*req
) 
1560         assert(req 
&& req
->tr_state 
== TR_STATE_NEW
); 
1561         if (req
->tr_priority 
== WORKQUEUE_EVENT_MANAGER_BUCKET
) { 
1562                 assert(wq
->wq_event_manager_threadreq
.tr_state 
!= TR_STATE_WAITING
); 
1563                 memcpy(&wq
->wq_event_manager_threadreq
, req
, sizeof(struct threadreq
)); 
1564                 req 
= &wq
->wq_event_manager_threadreq
; 
1565                 req
->tr_flags 
&= ~(TR_FLAG_ONSTACK 
| TR_FLAG_NO_PACING
); 
1567                 if (req
->tr_flags 
& TR_FLAG_ONSTACK
) { 
1568                         assert(wq
->wq_cached_threadreq
); 
1569                         struct threadreq 
*newreq 
= wq
->wq_cached_threadreq
; 
1570                         wq
->wq_cached_threadreq 
= NULL
; 
1572                         memcpy(newreq
, req
, sizeof(struct threadreq
)); 
1573                         newreq
->tr_flags 
&= ~(TR_FLAG_ONSTACK 
| TR_FLAG_NO_PACING
); 
1574                         req
->tr_state 
= TR_STATE_DEAD
; 
1577                 TAILQ_INSERT_TAIL(_threadreq_list_for_req(wq
, req
), req
, tr_entry
); 
1579         req
->tr_state 
= TR_STATE_WAITING
; 
1584 _threadreq_dequeue(struct workqueue 
*wq
, struct threadreq 
*req
) 
1586         if (req
->tr_priority 
!= WORKQUEUE_EVENT_MANAGER_BUCKET
) { 
1587                 struct threadreq_head 
*req_list 
= _threadreq_list_for_req(wq
, req
); 
1589                 struct threadreq 
*cursor 
= NULL
; 
1590                 TAILQ_FOREACH(cursor
, req_list
, tr_entry
) { 
1591                         if (cursor 
== req
) break; 
1593                 assert(cursor 
== req
); 
1595                 TAILQ_REMOVE(req_list
, req
, tr_entry
); 
1601  * Mark a thread request as complete.  At this point, it is treated as owned by 
1602  * the submitting subsystem and you should assume it could be freed. 
1604  * Called with the workqueue lock held. 
1607 _threadreq_complete_and_unlock(proc_t p
, struct workqueue 
*wq
, 
1608                 struct threadreq 
*req
, struct threadlist 
*tl
) 
1610         struct threadreq 
*req_tofree 
= NULL
; 
1611         bool sync 
= (req
->tr_state 
== TR_STATE_NEW
); 
1612         bool workloop 
= req
->tr_flags 
& TR_FLAG_WORKLOOP
; 
1613         bool onstack 
= req
->tr_flags 
& TR_FLAG_ONSTACK
; 
1614         bool kevent 
= req
->tr_flags 
& TR_FLAG_KEVENT
; 
1615         bool unbinding 
= tl
->th_flags 
& TH_LIST_UNBINDING
; 
1617         bool waking_parked_thread 
= (tl
->th_flags 
& TH_LIST_BUSY
); 
1620         req
->tr_state 
= TR_STATE_COMPLETE
; 
1622         if (!workloop 
&& !onstack 
&& req 
!= &wq
->wq_event_manager_threadreq
) { 
1623                 if (wq
->wq_cached_threadreq
) { 
1626                         wq
->wq_cached_threadreq 
= req
; 
1630         if (tl
->th_flags 
& TH_LIST_UNBINDING
) { 
1631                 tl
->th_flags 
&= ~TH_LIST_UNBINDING
; 
1632                 assert((tl
->th_flags 
& TH_LIST_KEVENT_BOUND
)); 
1633         } else if (workloop 
|| kevent
) { 
1634                 assert((tl
->th_flags 
& TH_LIST_KEVENT_BOUND
) == 0); 
1635                 tl
->th_flags 
|= TH_LIST_KEVENT_BOUND
; 
1639                 workqueue_unlock(wq
); 
1640                 ret 
= pthread_kern
->workloop_fulfill_threadreq(wq
->wq_proc
, (void*)req
, 
1641                                 tl
->th_thread
, sync 
? WORKLOOP_FULFILL_THREADREQ_SYNC 
: 0); 
1644         } else if (kevent
) { 
1645                 unsigned int kevent_flags 
= KEVENT_FLAG_WORKQ
; 
1647                         kevent_flags 
|= KEVENT_FLAG_SYNCHRONOUS_BIND
; 
1649                 if (tl
->th_priority 
== WORKQUEUE_EVENT_MANAGER_BUCKET
) { 
1650                         kevent_flags 
|= KEVENT_FLAG_WORKQ_MANAGER
; 
1652                 workqueue_unlock(wq
); 
1653                 ret 
= kevent_qos_internal_bind(wq
->wq_proc
, 
1654                                 class_index_get_thread_qos(tl
->th_priority
), tl
->th_thread
, 
1657                         workqueue_lock_spin(wq
); 
1658                         tl
->th_flags 
&= ~TH_LIST_KEVENT_BOUND
; 
1668         PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq 
| DBG_FUNC_END
, wq
, 0, 0, 0, 0); 
1669         PTHREAD_TRACE_WQ_REQ(TRACE_wq_runitem 
| DBG_FUNC_START
, wq
, req
, tl
->th_priority
, 
1670                         thread_tid(current_thread()), thread_tid(tl
->th_thread
)); 
1672         if (waking_parked_thread
) { 
1674                         workqueue_lock_spin(wq
); 
1676                 tl
->th_flags 
&= ~(TH_LIST_BUSY
); 
1677                 if ((tl
->th_flags 
& TH_LIST_REMOVING_VOUCHER
) == 0) { 
1679                          * If the thread is in the process of removing its voucher, then it 
1680                          * isn't actually in the wait event yet and we don't need to wake 
1681                          * it up.  Save the trouble (and potential lock-ordering issues 
1684                         thread_wakeup_thread(tl
, tl
->th_thread
); 
1686                 workqueue_unlock(wq
); 
1688                 if (req_tofree
) zfree(pthread_zone_threadreq
, req_tofree
); 
1689                 return WQ_RUN_TR_THREAD_STARTED
; 
1692         assert ((tl
->th_flags 
& TH_LIST_PACING
) == 0); 
1694                 workqueue_unlock(wq
); 
1696         if (req_tofree
) zfree(pthread_zone_threadreq
, req_tofree
); 
1698                 return WQ_RUN_TR_THREAD_STARTED
; 
1700         _setup_wqthread(p
, tl
->th_thread
, wq
, tl
, WQ_SETUP_CLEAR_VOUCHER
); 
1701         pthread_kern
->unix_syscall_return(EJUSTRETURN
); 
1702         __builtin_unreachable(); 
1706  * Mark a thread request as cancelled.  Has similar ownership semantics to the 
1707  * complete call above. 
1710 _threadreq_cancel(struct workqueue 
*wq
, struct threadreq 
*req
) 
1712         assert(req
->tr_state 
== TR_STATE_WAITING
); 
1713         req
->tr_state 
= TR_STATE_DEAD
; 
1715         assert((req
->tr_flags 
& TR_FLAG_ONSTACK
) == 0); 
1716         if (req
->tr_flags 
& TR_FLAG_WORKLOOP
) { 
1717                 __assert_only 
int ret
; 
1718                 ret 
= pthread_kern
->workloop_fulfill_threadreq(wq
->wq_proc
, (void*)req
, 
1719                                 THREAD_NULL
, WORKLOOP_FULFILL_THREADREQ_CANCEL
); 
1720                 assert(ret 
== 0 || ret 
== ECANCELED
); 
1721         } else if (req 
!= &wq
->wq_event_manager_threadreq
) { 
1722                 zfree(pthread_zone_threadreq
, req
); 
1726 #pragma mark workqueue lock 
1728 static boolean_t 
workqueue_lock_spin_is_acquired_kdp(struct workqueue 
*wq
) { 
1729   return kdp_lck_spin_is_acquired(&wq
->wq_lock
); 
1733 workqueue_lock_spin(struct workqueue 
*wq
) 
1735         assert(ml_get_interrupts_enabled() == TRUE
); 
1736         lck_spin_lock(&wq
->wq_lock
); 
1740 workqueue_lock_try(struct workqueue 
*wq
) 
1742         return lck_spin_try_lock(&wq
->wq_lock
); 
1746 workqueue_unlock(struct workqueue 
*wq
) 
1748         lck_spin_unlock(&wq
->wq_lock
); 
1751 #pragma mark workqueue add timer 
1754  * Sets up the timer which will call out to workqueue_add_timer 
1757 workqueue_interval_timer_start(struct workqueue 
*wq
) 
1761         /* n.b. wq_timer_interval is reset to 0 in workqueue_add_timer if the 
1762          ATIMER_RUNNING flag is not present.  The net effect here is that if a 
1763          sequence of threads is required, we'll double the time before we give out 
1765         if (wq
->wq_timer_interval 
== 0) { 
1766                 wq
->wq_timer_interval 
= wq_stalled_window_usecs
; 
1769                 wq
->wq_timer_interval 
= wq
->wq_timer_interval 
* 2; 
1771                 if (wq
->wq_timer_interval 
> wq_max_timer_interval_usecs
) { 
1772                         wq
->wq_timer_interval 
= wq_max_timer_interval_usecs
; 
1775         clock_interval_to_deadline(wq
->wq_timer_interval
, 1000, &deadline
); 
1777         PTHREAD_TRACE_WQ(TRACE_wq_start_add_timer
, wq
, wq
->wq_reqcount
, 
1778                         _wq_flags(wq
), wq
->wq_timer_interval
, 0); 
1780         thread_call_t call 
= wq
->wq_atimer_delayed_call
; 
1781         if (thread_call_enter1_delayed(call
, call
, deadline
)) { 
1782                 panic("delayed_call was already enqueued"); 
1787  * Immediately trigger the workqueue_add_timer 
1790 workqueue_interval_timer_trigger(struct workqueue 
*wq
) 
1792         PTHREAD_TRACE_WQ(TRACE_wq_start_add_timer
, wq
, wq
->wq_reqcount
, 
1793                         _wq_flags(wq
), 0, 0); 
1795         thread_call_t call 
= wq
->wq_atimer_immediate_call
; 
1796         if (thread_call_enter1(call
, call
)) { 
1797                 panic("immediate_call was already enqueued"); 
1802  * returns whether lastblocked_tsp is within wq_stalled_window_usecs of cur_ts 
1805 wq_thread_is_busy(uint64_t cur_ts
, _Atomic 
uint64_t *lastblocked_tsp
) 
1809         uint64_t lastblocked_ts
; 
1812         lastblocked_ts 
= atomic_load_explicit(lastblocked_tsp
, memory_order_relaxed
); 
1813         if (lastblocked_ts 
>= cur_ts
) { 
1815                  * because the update of the timestamp when a thread blocks isn't 
1816                  * serialized against us looking at it (i.e. we don't hold the workq lock) 
1817                  * it's possible to have a timestamp that matches the current time or 
1818                  * that even looks to be in the future relative to when we grabbed the current 
1819                  * time... just treat this as a busy thread since it must have just blocked. 
1823         elapsed 
= cur_ts 
- lastblocked_ts
; 
1825         pthread_kern
->absolutetime_to_microtime(elapsed
, &secs
, &usecs
); 
1827         return (secs 
== 0 && usecs 
< wq_stalled_window_usecs
); 
1831  * handler function for the timer 
1834 workqueue_add_timer(struct workqueue 
*wq
, thread_call_t thread_call_self
) 
1836         proc_t p 
= wq
->wq_proc
; 
1838         workqueue_lock_spin(wq
); 
1840         PTHREAD_TRACE_WQ(TRACE_wq_add_timer 
| DBG_FUNC_START
, wq
, 
1841                         _wq_flags(wq
), wq
->wq_nthreads
, wq
->wq_thidlecount
, 0); 
1844          * There's two tricky issues here. 
1846          * First issue: we start the thread_call's that invoke this routine without 
1847          * the workqueue lock held.  The scheduler callback needs to trigger 
1848          * reevaluation of the number of running threads but shouldn't take that 
1849          * lock, so we can't use it to synchronize state around the thread_call. 
1850          * As a result, it might re-enter the thread_call while this routine is 
1851          * already running.  This could cause it to fire a second time and we'll 
1852          * have two add_timers running at once.  Obviously, we don't want that to 
1853          * keep stacking, so we need to keep it at two timers. 
1855          * Solution: use wq_flags (accessed via atomic CAS) to synchronize the 
1856          * enqueue of the thread_call itself.  When a thread needs to trigger the 
1857          * add_timer, it checks for ATIMER_DELAYED_RUNNING and, when not set, sets 
1858          * the flag then does a thread_call_enter.  We'll then remove that flag 
1859          * only once we've got the lock and it's safe for the thread_call to be 
1862          * Second issue: we need to make sure that the two timers don't execute this 
1863          * routine concurrently.  We can't use the workqueue lock for this because 
1864          * we'll need to drop it during our execution. 
1866          * Solution: use WQL_ATIMER_BUSY as a condition variable to indicate that 
1867          * we are currently executing the routine and the next thread should wait. 
1869          * After all that, we arrive at the following four possible states: 
1870          * !WQ_ATIMER_DELAYED_RUNNING && !WQL_ATIMER_BUSY       no pending timer, no active timer 
1871          * !WQ_ATIMER_DELAYED_RUNNING &&  WQL_ATIMER_BUSY       no pending timer,  1 active timer 
1872          *  WQ_ATIMER_DELAYED_RUNNING && !WQL_ATIMER_BUSY        1 pending timer, no active timer 
1873          *  WQ_ATIMER_DELAYED_RUNNING &&  WQL_ATIMER_BUSY        1 pending timer,  1 active timer 
1875          * Further complication sometimes we need to trigger this function to run 
1876          * without delay.  Because we aren't under a lock between setting 
1877          * WQ_ATIMER_DELAYED_RUNNING and calling thread_call_enter, we can't simply 
1878          * re-enter the thread call: if thread_call_enter() returned false, we 
1879          * wouldn't be able to distinguish the case where the thread_call had 
1880          * already fired from the case where it hadn't been entered yet from the 
1881          * other thread.  So, we use a separate thread_call for immediate 
1882          * invocations, and a separate RUNNING flag, WQ_ATIMER_IMMEDIATE_RUNNING. 
1885         while (wq
->wq_lflags 
& WQL_ATIMER_BUSY
) { 
1886                 wq
->wq_lflags 
|= WQL_ATIMER_WAITING
; 
1888                 assert_wait((caddr_t
)wq
, (THREAD_UNINT
)); 
1889                 workqueue_unlock(wq
); 
1891                 thread_block(THREAD_CONTINUE_NULL
); 
1893                 workqueue_lock_spin(wq
); 
1896          * Prevent _workqueue_mark_exiting() from going away 
1898         wq
->wq_lflags 
|= WQL_ATIMER_BUSY
; 
1901          * Decide which timer we are and remove the RUNNING flag. 
1903         if (thread_call_self 
== wq
->wq_atimer_delayed_call
) { 
1904                 uint64_t wq_flags 
= _wq_flags_and_orig(wq
, ~WQ_ATIMER_DELAYED_RUNNING
); 
1905                 if ((wq_flags 
& WQ_ATIMER_DELAYED_RUNNING
) == 0) { 
1906                         panic("workqueue_add_timer(delayed) w/o WQ_ATIMER_DELAYED_RUNNING"); 
1908         } else if (thread_call_self 
== wq
->wq_atimer_immediate_call
) { 
1909                 uint64_t wq_flags 
= _wq_flags_and_orig(wq
, ~WQ_ATIMER_IMMEDIATE_RUNNING
); 
1910                 if ((wq_flags 
& WQ_ATIMER_IMMEDIATE_RUNNING
) == 0) { 
1911                         panic("workqueue_add_timer(immediate) w/o WQ_ATIMER_IMMEDIATE_RUNNING"); 
1914                 panic("workqueue_add_timer can't figure out which timer it is"); 
1917         int ret 
= WQ_RUN_TR_THREAD_STARTED
; 
1918         while (ret 
== WQ_RUN_TR_THREAD_STARTED 
&& wq
->wq_reqcount
) { 
1919                 ret 
= workqueue_run_threadreq_and_unlock(p
, wq
, NULL
, NULL
, true); 
1921                 workqueue_lock_spin(wq
); 
1923         _threadreq_copy_prepare(wq
); 
1926          * If we called WQ_TIMER_NEEDED above, then this flag will be set if that 
1927          * call marked the timer running.  If so, we let the timer interval grow. 
1928          * Otherwise, we reset it back to 0. 
1930         uint32_t wq_flags 
= _wq_flags(wq
); 
1931         if (!(wq_flags 
& WQ_ATIMER_DELAYED_RUNNING
)) { 
1932                 wq
->wq_timer_interval 
= 0; 
1935         wq
->wq_lflags 
&= ~WQL_ATIMER_BUSY
; 
1937         if ((wq_flags 
& WQ_EXITING
) || (wq
->wq_lflags 
& WQL_ATIMER_WAITING
)) { 
1939                  * wakeup the thread hung up in _workqueue_mark_exiting or 
1940                  * workqueue_add_timer waiting for this timer to finish getting out of 
1943                 wq
->wq_lflags 
&= ~WQL_ATIMER_WAITING
; 
1947         PTHREAD_TRACE_WQ(TRACE_wq_add_timer 
| DBG_FUNC_END
, wq
, 0, wq
->wq_nthreads
, wq
->wq_thidlecount
, 0); 
1949         workqueue_unlock(wq
); 
1952 #pragma mark thread state tracking 
1954 // called by spinlock code when trying to yield to lock owner 
1956 _workqueue_thread_yielded(void) 
1961 workqueue_callback(int type
, thread_t thread
) 
1963         struct uthread 
*uth 
= pthread_kern
->get_bsdthread_info(thread
); 
1964         struct threadlist 
*tl 
= pthread_kern
->uthread_get_threadlist(uth
); 
1965         struct workqueue 
*wq 
= tl
->th_workq
; 
1966         uint32_t old_count
, req_qos
, qos 
= tl
->th_priority
; 
1967         wq_thactive_t old_thactive
; 
1970         case SCHED_CALL_BLOCK
: { 
1971                 bool start_timer 
= false; 
1973                 old_thactive 
= _wq_thactive_dec(wq
, tl
->th_priority
); 
1974                 req_qos 
= WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(old_thactive
); 
1975                 old_count 
= _wq_thactive_aggregate_downto_qos(wq
, old_thactive
, 
1978                 if (old_count 
== wq_max_concurrency
[tl
->th_priority
]) { 
1980                          * The number of active threads at this priority has fallen below 
1981                          * the maximum number of concurrent threads that are allowed to run 
1983                          * if we collide with another thread trying to update the 
1984                          * last_blocked (really unlikely since another thread would have to 
1985                          * get scheduled and then block after we start down this path), it's 
1986                          * not a problem.  Either timestamp is adequate, so no need to retry 
1988                         atomic_store_explicit(&wq
->wq_lastblocked_ts
[qos
], 
1989                                         mach_absolute_time(), memory_order_relaxed
); 
1992                 if (req_qos 
== WORKQUEUE_EVENT_MANAGER_BUCKET 
|| qos 
> req_qos
) { 
1994                          * The blocking thread is at a lower QoS than the highest currently 
1995                          * pending constrained request, nothing has to be redriven 
1998                         uint32_t max_busycount
, old_req_count
; 
1999                         old_req_count 
= _wq_thactive_aggregate_downto_qos(wq
, old_thactive
, 
2000                                         req_qos
, NULL
, &max_busycount
); 
2002                          * If it is possible that may_start_constrained_thread had refused 
2003                          * admission due to being over the max concurrency, we may need to 
2004                          * spin up a new thread. 
2006                          * We take into account the maximum number of busy threads 
2007                          * that can affect may_start_constrained_thread as looking at the 
2008                          * actual number may_start_constrained_thread will see is racy. 
2010                          * IOW at NCPU = 4, for IN (req_qos = 1), if the old req count is 
2011                          * between NCPU (4) and NCPU - 2 (2) we need to redrive. 
2013                         if (wq_max_concurrency
[req_qos
] <= old_req_count 
+ max_busycount 
&& 
2014                                         old_req_count 
<= wq_max_concurrency
[req_qos
]) { 
2015                                 if (WQ_TIMER_DELAYED_NEEDED(wq
)) { 
2017                                         workqueue_interval_timer_start(wq
); 
2022                 PTHREAD_TRACE_WQ(TRACE_wq_thread_block 
| DBG_FUNC_START
, wq
, 
2023                                 old_count 
- 1, qos 
| (req_qos 
<< 8), 
2024                                 wq
->wq_reqcount 
<< 1 | start_timer
, 0); 
2027         case SCHED_CALL_UNBLOCK
: { 
2029                  * we cannot take the workqueue_lock here... 
2030                  * an UNBLOCK can occur from a timer event which 
2031                  * is run from an interrupt context... if the workqueue_lock 
2032                  * is already held by this processor, we'll deadlock... 
2033                  * the thread lock for the thread being UNBLOCKED 
2036                 old_thactive 
= _wq_thactive_inc(wq
, qos
); 
2037                 if (pthread_debug_tracing
) { 
2038                         req_qos 
= WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(old_thactive
); 
2039                         old_count 
= _wq_thactive_aggregate_downto_qos(wq
, old_thactive
, 
2041                         PTHREAD_TRACE_WQ(TRACE_wq_thread_block 
| DBG_FUNC_END
, wq
, 
2042                                         old_count 
+ 1, qos 
| (req_qos 
<< 8), 
2043                                         wq
->wq_threads_scheduled
, 0); 
2051 _workqueue_get_sched_callback(void) 
2053         return workqueue_callback
; 
2056 #pragma mark thread addition/removal 
2058 static mach_vm_size_t
 
2059 _workqueue_allocsize(struct workqueue 
*wq
) 
2061         proc_t p 
= wq
->wq_proc
; 
2062         mach_vm_size_t guardsize 
= vm_map_page_size(wq
->wq_map
); 
2063         mach_vm_size_t pthread_size 
= 
2064                 vm_map_round_page_mask(pthread_kern
->proc_get_pthsize(p
) + PTHREAD_T_OFFSET
, vm_map_page_mask(wq
->wq_map
)); 
2065         return guardsize 
+ PTH_DEFAULT_STACKSIZE 
+ pthread_size
; 
2069  * pop goes the thread 
2071  * If fromexit is set, the call is from workqueue_exit(, 
2072  * so some cleanups are to be avoided. 
2075 workqueue_removethread(struct threadlist 
*tl
, bool fromexit
, bool first_use
) 
2077         struct uthread 
* uth
; 
2078         struct workqueue 
* wq 
= tl
->th_workq
; 
2080         if (tl
->th_priority 
== WORKQUEUE_EVENT_MANAGER_BUCKET
){ 
2081                 TAILQ_REMOVE(&wq
->wq_thidlemgrlist
, tl
, th_entry
); 
2083                 TAILQ_REMOVE(&wq
->wq_thidlelist
, tl
, th_entry
); 
2086         if (fromexit 
== 0) { 
2087                 assert(wq
->wq_nthreads 
&& wq
->wq_thidlecount
); 
2089                 wq
->wq_thidlecount
--; 
2093          * Clear the threadlist pointer in uthread so 
2094          * blocked thread on wakeup for termination will 
2095          * not access the thread list as it is going to be 
2098         pthread_kern
->thread_sched_call(tl
->th_thread
, NULL
); 
2100         uth 
= pthread_kern
->get_bsdthread_info(tl
->th_thread
); 
2101         if (uth 
!= (struct uthread 
*)0) { 
2102                 pthread_kern
->uthread_set_threadlist(uth
, NULL
); 
2104         if (fromexit 
== 0) { 
2105                 /* during exit the lock is not held */ 
2106                 workqueue_unlock(wq
); 
2109         if ( (tl
->th_flags 
& TH_LIST_NEW
) || first_use 
) { 
2111                  * thread was created, but never used... 
2112                  * need to clean up the stack and port ourselves 
2113                  * since we're not going to spin up through the 
2114                  * normal exit path triggered from Libc 
2116                 if (fromexit 
== 0) { 
2117                         /* vm map is already deallocated when this is called from exit */ 
2118                         (void)mach_vm_deallocate(wq
->wq_map
, tl
->th_stackaddr
, _workqueue_allocsize(wq
)); 
2120                 (void)pthread_kern
->mach_port_deallocate(pthread_kern
->task_get_ipcspace(wq
->wq_task
), tl
->th_thport
); 
2123          * drop our ref on the thread 
2125         thread_deallocate(tl
->th_thread
); 
2127         zfree(pthread_zone_threadlist
, tl
); 
2132  * Try to add a new workqueue thread. 
2134  * - called with workq lock held 
2135  * - dropped and retaken around thread creation 
2136  * - return with workq lock held 
2139 workqueue_addnewthread(proc_t p
, struct workqueue 
*wq
) 
2145         workqueue_unlock(wq
); 
2147         struct threadlist 
*tl 
= zalloc(pthread_zone_threadlist
); 
2148         bzero(tl
, sizeof(struct threadlist
)); 
2151         kret 
= pthread_kern
->thread_create_workq_waiting(wq
->wq_task
, wq_unpark_continue
, tl
, &th
); 
2152         if (kret 
!= KERN_SUCCESS
) { 
2153                 PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed 
| DBG_FUNC_NONE
, wq
, kret
, 0, 0, 0); 
2157         mach_vm_offset_t stackaddr 
= pthread_kern
->proc_get_stack_addr_hint(p
); 
2159         mach_vm_size_t guardsize 
= vm_map_page_size(wq
->wq_map
); 
2160         mach_vm_size_t pthread_size 
= 
2161                 vm_map_round_page_mask(pthread_kern
->proc_get_pthsize(p
) + PTHREAD_T_OFFSET
, vm_map_page_mask(wq
->wq_map
)); 
2162         mach_vm_size_t th_allocsize 
= guardsize 
+ PTH_DEFAULT_STACKSIZE 
+ pthread_size
; 
2164         kret 
= mach_vm_map(wq
->wq_map
, &stackaddr
, 
2165                         th_allocsize
, page_size
-1, 
2166                         VM_MAKE_TAG(VM_MEMORY_STACK
)| VM_FLAGS_ANYWHERE
, NULL
, 
2167                         0, FALSE
, VM_PROT_DEFAULT
, VM_PROT_ALL
, 
2168                         VM_INHERIT_DEFAULT
); 
2170         if (kret 
!= KERN_SUCCESS
) { 
2171                 kret 
= mach_vm_allocate(wq
->wq_map
, 
2172                                 &stackaddr
, th_allocsize
, 
2173                                 VM_MAKE_TAG(VM_MEMORY_STACK
) | VM_FLAGS_ANYWHERE
); 
2176         if (kret 
!= KERN_SUCCESS
) { 
2177                 PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed 
| DBG_FUNC_NONE
, wq
, kret
, 1, 0, 0); 
2178                 goto fail_terminate
; 
2182          * The guard page is at the lowest address 
2183          * The stack base is the highest address 
2185         kret 
= mach_vm_protect(wq
->wq_map
, stackaddr
, guardsize
, FALSE
, VM_PROT_NONE
); 
2186         if (kret 
!= KERN_SUCCESS
) { 
2187                 PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed 
| DBG_FUNC_NONE
, wq
, kret
, 2, 0, 0); 
2188                 goto fail_vm_deallocate
; 
2192         pthread_kern
->thread_set_tag(th
, THREAD_TAG_PTHREAD 
| THREAD_TAG_WORKQUEUE
); 
2193         pthread_kern
->thread_static_param(th
, TRUE
); 
2196          * convert_thread_to_port() consumes a reference 
2198         thread_reference(th
); 
2199         void *sright 
= (void *)pthread_kern
->convert_thread_to_port(th
); 
2200         tl
->th_thport 
= pthread_kern
->ipc_port_copyout_send(sright
, 
2201                         pthread_kern
->task_get_ipcspace(wq
->wq_task
)); 
2203         tl
->th_flags 
= TH_LIST_INITED 
| TH_LIST_NEW
; 
2206         tl
->th_stackaddr 
= stackaddr
; 
2207         tl
->th_priority 
= WORKQUEUE_NUM_BUCKETS
; 
2209         struct uthread 
*uth
; 
2210         uth 
= pthread_kern
->get_bsdthread_info(tl
->th_thread
); 
2212         workqueue_lock_spin(wq
); 
2214         void *current_tl 
= pthread_kern
->uthread_get_threadlist(uth
); 
2215         if (current_tl 
== NULL
) { 
2216                 pthread_kern
->uthread_set_threadlist(uth
, tl
); 
2217                 TAILQ_INSERT_TAIL(&wq
->wq_thidlelist
, tl
, th_entry
); 
2218                 wq
->wq_thidlecount
++; 
2219         } else if (current_tl 
== WQ_THREADLIST_EXITING_POISON
) { 
2221                  * Failed thread creation race: The thread already woke up and has exited. 
2223                 PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed 
| DBG_FUNC_NONE
, wq
, kret
, 3, 0, 0); 
2226                 panic("Unexpected initial threadlist value"); 
2229         PTHREAD_TRACE_WQ(TRACE_wq_thread_create 
| DBG_FUNC_NONE
, wq
, 0, 0, 0, 0); 
2234         workqueue_unlock(wq
); 
2235         (void)pthread_kern
->mach_port_deallocate(pthread_kern
->task_get_ipcspace(wq
->wq_task
), 
2239         (void) mach_vm_deallocate(wq
->wq_map
, stackaddr
, th_allocsize
); 
2242         if (pthread_kern
->thread_will_park_or_terminate
) { 
2243                 pthread_kern
->thread_will_park_or_terminate(th
); 
2245         (void)thread_terminate(th
); 
2246         thread_deallocate(th
); 
2249         zfree(pthread_zone_threadlist
, tl
); 
2251         workqueue_lock_spin(wq
); 
2258  * Setup per-process state for the workqueue. 
2261 _workq_open(struct proc 
*p
, __unused 
int32_t *retval
) 
2263         struct workqueue 
* wq
; 
2268         if (pthread_kern
->proc_get_register(p
) == 0) { 
2272         num_cpus 
= pthread_kern
->ml_get_max_cpus(); 
2274         if (wq_init_constrained_limit
) { 
2277                  * set up the limit for the constrained pool 
2278                  * this is a virtual pool in that we don't 
2279                  * maintain it on a separate idle and run list 
2281                 limit 
= num_cpus 
* WORKQUEUE_CONSTRAINED_FACTOR
; 
2283                 if (limit 
> wq_max_constrained_threads
) 
2284                         wq_max_constrained_threads 
= limit
; 
2286                 wq_init_constrained_limit 
= 0; 
2288                 if (wq_max_threads 
> WQ_THACTIVE_BUCKET_HALF
) { 
2289                         wq_max_threads 
= WQ_THACTIVE_BUCKET_HALF
; 
2291                 if (wq_max_threads 
> pthread_kern
->config_thread_max 
- 20) { 
2292                         wq_max_threads 
= pthread_kern
->config_thread_max 
- 20; 
2296         if (pthread_kern
->proc_get_wqptr(p
) == NULL
) { 
2297                 if (pthread_kern
->proc_init_wqptr_or_wait(p
) == FALSE
) { 
2298                         assert(pthread_kern
->proc_get_wqptr(p
) != NULL
); 
2302                 ptr 
= (char *)zalloc(pthread_zone_workqueue
); 
2303                 bzero(ptr
, sizeof(struct workqueue
)); 
2305                 wq 
= (struct workqueue 
*)ptr
; 
2307                 wq
->wq_task 
= current_task(); 
2308                 wq
->wq_map  
= pthread_kern
->current_map(); 
2310                 // Start the event manager at the priority hinted at by the policy engine 
2311                 int mgr_priority_hint 
= pthread_kern
->task_get_default_manager_qos(current_task()); 
2312                 wq
->wq_event_manager_priority 
= (uint32_t)thread_qos_get_pthread_priority(mgr_priority_hint
) | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
; 
2314                 TAILQ_INIT(&wq
->wq_thrunlist
); 
2315                 TAILQ_INIT(&wq
->wq_thidlelist
); 
2316                 for (int i 
= 0; i 
< WORKQUEUE_EVENT_MANAGER_BUCKET
; i
++) { 
2317                         TAILQ_INIT(&wq
->wq_overcommit_reqlist
[i
]); 
2318                         TAILQ_INIT(&wq
->wq_reqlist
[i
]); 
2321                 wq
->wq_atimer_delayed_call 
= 
2322                                 thread_call_allocate_with_priority((thread_call_func_t
)workqueue_add_timer
, 
2323                                                 (thread_call_param_t
)wq
, THREAD_CALL_PRIORITY_KERNEL
); 
2324                 wq
->wq_atimer_immediate_call 
= 
2325                                 thread_call_allocate_with_priority((thread_call_func_t
)workqueue_add_timer
, 
2326                                                 (thread_call_param_t
)wq
, THREAD_CALL_PRIORITY_KERNEL
); 
2328                 lck_spin_init(&wq
->wq_lock
, pthread_lck_grp
, pthread_lck_attr
); 
2330                 wq
->wq_cached_threadreq 
= zalloc(pthread_zone_threadreq
); 
2331                 *(wq_thactive_t 
*)&wq
->wq_thactive 
= 
2332                                 (wq_thactive_t
)WQ_THACTIVE_NO_PENDING_REQUEST 
<< 
2333                                 WQ_THACTIVE_QOS_SHIFT
; 
2335                 pthread_kern
->proc_set_wqptr(p
, wq
); 
2344  * Routine:     workqueue_mark_exiting 
2346  * Function:    Mark the work queue such that new threads will not be added to the 
2347  *              work queue after we return. 
2349  * Conditions:  Called against the current process. 
2352 _workqueue_mark_exiting(struct proc 
*p
) 
2354         struct workqueue 
*wq 
= pthread_kern
->proc_get_wqptr(p
); 
2357         PTHREAD_TRACE_WQ(TRACE_wq_pthread_exit
|DBG_FUNC_START
, wq
, 0, 0, 0, 0); 
2359         workqueue_lock_spin(wq
); 
2362          * We arm the add timer without holding the workqueue lock so we need 
2363          * to synchronize with any running or soon to be running timers. 
2365          * Threads that intend to arm the timer atomically OR 
2366          * WQ_ATIMER_{DELAYED,IMMEDIATE}_RUNNING into the wq_flags, only if 
2367          * WQ_EXITING is not present.  So, once we have set WQ_EXITING, we can 
2368          * be sure that no new RUNNING flags will be set, but still need to 
2369          * wait for the already running timers to complete. 
2371          * We always hold the workq lock when dropping WQ_ATIMER_RUNNING, so 
2372          * the check for and sleep until clear is protected. 
2374         uint64_t wq_flags 
= _wq_flags_or_orig(wq
, WQ_EXITING
); 
2376         if (wq_flags 
& WQ_ATIMER_DELAYED_RUNNING
) { 
2377                 if (thread_call_cancel(wq
->wq_atimer_delayed_call
) == TRUE
) { 
2378                         wq_flags 
= _wq_flags_and_orig(wq
, ~WQ_ATIMER_DELAYED_RUNNING
); 
2381         if (wq_flags 
& WQ_ATIMER_IMMEDIATE_RUNNING
) { 
2382                 if (thread_call_cancel(wq
->wq_atimer_immediate_call
) == TRUE
) { 
2383                         wq_flags 
= _wq_flags_and_orig(wq
, ~WQ_ATIMER_IMMEDIATE_RUNNING
); 
2386         while ((_wq_flags(wq
) & (WQ_ATIMER_DELAYED_RUNNING 
| WQ_ATIMER_IMMEDIATE_RUNNING
)) || 
2387                         (wq
->wq_lflags 
& WQL_ATIMER_BUSY
)) { 
2388                 assert_wait((caddr_t
)wq
, (THREAD_UNINT
)); 
2389                 workqueue_unlock(wq
); 
2391                 thread_block(THREAD_CONTINUE_NULL
); 
2393                 workqueue_lock_spin(wq
); 
2397          * Save off pending requests, will complete/free them below after unlocking 
2399         TAILQ_HEAD(, threadreq
) local_list 
= TAILQ_HEAD_INITIALIZER(local_list
); 
2401         for (int i 
= 0; i 
< WORKQUEUE_EVENT_MANAGER_BUCKET
; i
++) { 
2402                 TAILQ_CONCAT(&local_list
, &wq
->wq_overcommit_reqlist
[i
], tr_entry
); 
2403                 TAILQ_CONCAT(&local_list
, &wq
->wq_reqlist
[i
], tr_entry
); 
2407          * XXX: Can't deferred cancel the event manager request, so just smash it. 
2409         assert((wq
->wq_event_manager_threadreq
.tr_flags 
& TR_FLAG_WORKLOOP
) == 0); 
2410         wq
->wq_event_manager_threadreq
.tr_state 
= TR_STATE_DEAD
; 
2412         workqueue_unlock(wq
); 
2414         struct threadreq 
*tr
, *tr_temp
; 
2415         TAILQ_FOREACH_SAFE(tr
, &local_list
, tr_entry
, tr_temp
) { 
2416                 _threadreq_cancel(wq
, tr
); 
2418         PTHREAD_TRACE(TRACE_wq_pthread_exit
|DBG_FUNC_END
, 0, 0, 0, 0, 0); 
2422  * Routine:     workqueue_exit 
2424  * Function:    clean up the work queue structure(s) now that there are no threads 
2425  *              left running inside the work queue (except possibly current_thread). 
2427  * Conditions:  Called by the last thread in the process. 
2428  *              Called against current process. 
2431 _workqueue_exit(struct proc 
*p
) 
2433         struct workqueue  
* wq
; 
2434         struct threadlist  
* tl
, *tlist
; 
2435         struct uthread  
*uth
; 
2437         wq 
= pthread_kern
->proc_get_wqptr(p
); 
2440                 PTHREAD_TRACE_WQ(TRACE_wq_workqueue_exit
|DBG_FUNC_START
, wq
, 0, 0, 0, 0); 
2442                 pthread_kern
->proc_set_wqptr(p
, NULL
); 
2445                  * Clean up workqueue data structures for threads that exited and 
2446                  * didn't get a chance to clean up after themselves. 
2448                 TAILQ_FOREACH_SAFE(tl
, &wq
->wq_thrunlist
, th_entry
, tlist
) { 
2449                         assert((tl
->th_flags 
& TH_LIST_RUNNING
) != 0); 
2451                         pthread_kern
->thread_sched_call(tl
->th_thread
, NULL
); 
2453                         uth 
= pthread_kern
->get_bsdthread_info(tl
->th_thread
); 
2454                         if (uth 
!= (struct uthread 
*)0) { 
2455                                 pthread_kern
->uthread_set_threadlist(uth
, NULL
); 
2457                         TAILQ_REMOVE(&wq
->wq_thrunlist
, tl
, th_entry
); 
2460                          * drop our last ref on the thread 
2462                         thread_deallocate(tl
->th_thread
); 
2464                         zfree(pthread_zone_threadlist
, tl
); 
2466                 TAILQ_FOREACH_SAFE(tl
, &wq
->wq_thidlelist
, th_entry
, tlist
) { 
2467                         assert((tl
->th_flags 
& TH_LIST_RUNNING
) == 0); 
2468                         assert(tl
->th_priority 
!= WORKQUEUE_EVENT_MANAGER_BUCKET
); 
2469                         workqueue_removethread(tl
, true, false); 
2471                 TAILQ_FOREACH_SAFE(tl
, &wq
->wq_thidlemgrlist
, th_entry
, tlist
) { 
2472                         assert((tl
->th_flags 
& TH_LIST_RUNNING
) == 0); 
2473                         assert(tl
->th_priority 
== WORKQUEUE_EVENT_MANAGER_BUCKET
); 
2474                         workqueue_removethread(tl
, true, false); 
2476                 if (wq
->wq_cached_threadreq
) { 
2477                         zfree(pthread_zone_threadreq
, wq
->wq_cached_threadreq
); 
2479                 thread_call_free(wq
->wq_atimer_delayed_call
); 
2480                 thread_call_free(wq
->wq_atimer_immediate_call
); 
2481                 lck_spin_destroy(&wq
->wq_lock
, pthread_lck_grp
); 
2483                 for (int i 
= 0; i 
< WORKQUEUE_EVENT_MANAGER_BUCKET
; i
++) { 
2484                         assert(TAILQ_EMPTY(&wq
->wq_overcommit_reqlist
[i
])); 
2485                         assert(TAILQ_EMPTY(&wq
->wq_reqlist
[i
])); 
2488                 zfree(pthread_zone_workqueue
, wq
); 
2490                 PTHREAD_TRACE(TRACE_wq_workqueue_exit
|DBG_FUNC_END
, 0, 0, 0, 0, 0); 
2495 #pragma mark workqueue thread manipulation 
2499  * Entry point for libdispatch to ask for threads 
2502 wqops_queue_reqthreads(struct proc 
*p
, int reqcount
, 
2503                 pthread_priority_t priority
) 
2505         bool overcommit 
= _pthread_priority_get_flags(priority
) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
; 
2506         bool event_manager 
= _pthread_priority_get_flags(priority
) & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
; 
2507         int class = event_manager 
? WORKQUEUE_EVENT_MANAGER_BUCKET 
: 
2508                         pthread_priority_get_class_index(priority
); 
2510         if ((reqcount 
<= 0) || (class < 0) || (class >= WORKQUEUE_NUM_BUCKETS
) || 
2511                         (overcommit 
&& event_manager
)) { 
2515         struct workqueue 
*wq
; 
2516         if ((wq 
= (struct workqueue 
*)pthread_kern
->proc_get_wqptr(p
)) == NULL
) { 
2520         workqueue_lock_spin(wq
); 
2521         _threadreq_copy_prepare(wq
); 
2523         PTHREAD_TRACE_WQ(TRACE_wq_wqops_reqthreads 
| DBG_FUNC_NONE
, wq
, reqcount
, priority
, 0, 0); 
2526         if (overcommit
) tr_flags 
|= TR_FLAG_OVERCOMMIT
; 
2529                  * when libdispatch asks for more than one thread, it wants to achieve 
2530                  * parallelism. Pacing would be detrimental to this ask, so treat 
2531                  * these specially to not do the pacing admission check 
2533                 tr_flags 
|= TR_FLAG_NO_PACING
; 
2536         while (reqcount
-- && !_wq_exiting(wq
)) { 
2537                 struct threadreq req
; 
2538                 _threadreq_init_stack(&req
, class, tr_flags
); 
2540                 workqueue_run_threadreq_and_unlock(p
, wq
, NULL
, &req
, true); 
2542                 workqueue_lock_spin(wq
); /* reacquire */ 
2543                 _threadreq_copy_prepare(wq
); 
2546         workqueue_unlock(wq
); 
2552  * Used by the kevent system to request threads. 
2554  * Currently count is ignored and we always return one thread per invocation. 
2557 _workq_kevent_reqthreads(struct proc 
*p
, pthread_priority_t priority
, 
2560         int wq_run_tr 
= WQ_RUN_TR_THROTTLED
; 
2561         bool emergency_thread 
= false; 
2562         struct threadreq req
; 
2565         struct workqueue 
*wq
; 
2566         if ((wq 
= (struct workqueue 
*)pthread_kern
->proc_get_wqptr(p
)) == NULL
) { 
2570         int class = pthread_priority_get_class_index(priority
); 
2572         workqueue_lock_spin(wq
); 
2573         bool has_threadreq 
= _threadreq_copy_prepare_noblock(wq
); 
2575         PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads 
| DBG_FUNC_NONE
, wq
, NULL
, priority
, 0, 0); 
2578          * Skip straight to event manager if that's what was requested 
2580         if ((_pthread_priority_get_qos_newest(priority
) == QOS_CLASS_UNSPECIFIED
) || 
2581                         (_pthread_priority_get_flags(priority
) & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
)){ 
2585         bool will_pace 
= _wq_should_pace_priority(wq
, class); 
2586         if ((wq
->wq_thidlecount 
== 0 || will_pace
) && has_threadreq 
== false) { 
2588                  * We'll need to persist the request and can't, so return the emergency 
2589                  * thread instead, which has a persistent request object. 
2591                 emergency_thread 
= true; 
2596          * Handle overcommit requests 
2598         if ((_pthread_priority_get_flags(priority
) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
) != 0){ 
2599                 _threadreq_init_stack(&req
, class, TR_FLAG_KEVENT 
| TR_FLAG_OVERCOMMIT
); 
2600                 wq_run_tr 
= workqueue_run_threadreq_and_unlock(p
, wq
, NULL
, &req
, false); 
2605          * Handle constrained requests 
2607         boolean_t may_start 
= may_start_constrained_thread(wq
, class, NULL
, false); 
2608         if (may_start 
|| no_emergency
) { 
2609                 _threadreq_init_stack(&req
, class, TR_FLAG_KEVENT
); 
2610                 wq_run_tr 
= workqueue_run_threadreq_and_unlock(p
, wq
, NULL
, &req
, false); 
2613                 emergency_thread 
= true; 
2618         _threadreq_init_stack(&req
, WORKQUEUE_EVENT_MANAGER_BUCKET
, TR_FLAG_KEVENT
); 
2619         wq_run_tr 
= workqueue_run_threadreq_and_unlock(p
, wq
, NULL
, &req
, false); 
2622         if (wq_run_tr 
== WQ_RUN_TR_THREAD_NEEDED 
&& WQ_TIMER_IMMEDIATE_NEEDED(wq
)) { 
2623                 workqueue_interval_timer_trigger(wq
); 
2625         return emergency_thread 
? (void*)-1 : 0; 
2629 _workq_reqthreads(struct proc 
*p
, __assert_only 
int requests_count
, 
2630                 workq_reqthreads_req_t request
) 
2632         assert(requests_count 
== 1); 
2634         pthread_priority_t priority 
= request
->priority
; 
2635         bool no_emergency 
= request
->count 
& WORKQ_REQTHREADS_NOEMERGENCY
; 
2637         return _workq_kevent_reqthreads(p
, priority
, no_emergency
); 
2642 workq_kern_threadreq(struct proc 
*p
, workq_threadreq_t _req
, 
2643                 enum workq_threadreq_type type
, unsigned long priority
, int flags
) 
2645         struct workqueue 
*wq
; 
2648         if ((wq 
= (struct workqueue 
*)pthread_kern
->proc_get_wqptr(p
)) == NULL
) { 
2653         case WORKQ_THREADREQ_KEVENT
: { 
2654                 bool no_emergency 
= flags 
& WORKQ_THREADREQ_FLAG_NOEMERGENCY
; 
2655                 (void)_workq_kevent_reqthreads(p
, priority
, no_emergency
); 
2658         case WORKQ_THREADREQ_WORKLOOP
: 
2659         case WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL
: { 
2660                 struct threadreq 
*req 
= (struct threadreq 
*)_req
; 
2661                 int req_class 
= pthread_priority_get_class_index(priority
); 
2662                 int req_flags 
= TR_FLAG_WORKLOOP
; 
2663                 if ((_pthread_priority_get_flags(priority
) & 
2664                                 _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
) != 0){ 
2665                         req_flags 
|= TR_FLAG_OVERCOMMIT
; 
2668                 thread_t thread 
= current_thread(); 
2669                 struct threadlist 
*tl 
= util_get_thread_threadlist_entry(thread
); 
2671                 if (tl 
&& tl 
!= WQ_THREADLIST_EXITING_POISON 
&& 
2672                                 (tl
->th_flags 
& TH_LIST_UNBINDING
)) { 
2674                          * we're called back synchronously from the context of 
2675                          * kevent_qos_internal_unbind from within wqops_thread_return() 
2676                          * we can try to match up this thread with this request ! 
2682                 _threadreq_init_alloced(req
, req_class
, req_flags
); 
2683                 workqueue_lock_spin(wq
); 
2684                 PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads 
| DBG_FUNC_NONE
, wq
, req
, priority
, 1, 0); 
2685                 ret 
= workqueue_run_threadreq_and_unlock(p
, wq
, tl
, req
, false); 
2686                 if (ret 
== WQ_RUN_TR_EXITING
) { 
2689                 if (ret 
== WQ_RUN_TR_THREAD_NEEDED
) { 
2690                         if (type 
== WORKQ_THREADREQ_WORKLOOP_NO_THREAD_CALL
) { 
2693                         if (WQ_TIMER_IMMEDIATE_NEEDED(wq
)) { 
2694                                 workqueue_interval_timer_trigger(wq
); 
2699         case WORKQ_THREADREQ_REDRIVE
: 
2700                 PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads 
| DBG_FUNC_NONE
, wq
, 0, 0, 4, 0); 
2701                 workqueue_lock_spin(wq
); 
2702                 ret 
= workqueue_run_threadreq_and_unlock(p
, wq
, NULL
, NULL
, true); 
2703                 if (ret 
== WQ_RUN_TR_EXITING
) { 
2713 workq_kern_threadreq_modify(struct proc 
*p
, workq_threadreq_t _req
, 
2714                 enum workq_threadreq_op operation
, unsigned long arg1
, 
2715                 unsigned long __unused arg2
) 
2717         struct threadreq 
*req 
= (struct threadreq 
*)_req
; 
2718         struct workqueue 
*wq
; 
2719         int priclass
, ret 
= 0, wq_tr_rc 
= WQ_RUN_TR_THROTTLED
; 
2721         if (req 
== NULL 
|| (wq 
= pthread_kern
->proc_get_wqptr(p
)) == NULL
) { 
2725         workqueue_lock_spin(wq
); 
2727         if (_wq_exiting(wq
)) { 
2733          * Find/validate the referenced request structure 
2735         if (req
->tr_state 
!= TR_STATE_WAITING
) { 
2739         assert(req
->tr_priority 
< WORKQUEUE_EVENT_MANAGER_BUCKET
); 
2740         assert(req
->tr_flags 
& TR_FLAG_WORKLOOP
); 
2742         switch (operation
) { 
2743         case WORKQ_THREADREQ_CHANGE_PRI
: 
2744         case WORKQ_THREADREQ_CHANGE_PRI_NO_THREAD_CALL
: 
2745                 priclass 
= pthread_priority_get_class_index(arg1
); 
2746                 PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads 
| DBG_FUNC_NONE
, wq
, req
, arg1
, 2, 0); 
2747                 if (req
->tr_priority 
== priclass
) { 
2750                 _threadreq_dequeue(wq
, req
); 
2751                 req
->tr_priority 
= priclass
; 
2752                 req
->tr_state 
= TR_STATE_NEW
; // what was old is new again 
2753                 wq_tr_rc 
= workqueue_run_threadreq_and_unlock(p
, wq
, NULL
, req
, false); 
2756         case WORKQ_THREADREQ_CANCEL
: 
2757                 PTHREAD_TRACE_WQ_REQ(TRACE_wq_kevent_reqthreads 
| DBG_FUNC_NONE
, wq
, req
, 0, 3, 0); 
2758                 _threadreq_dequeue(wq
, req
); 
2759                 req
->tr_state 
= TR_STATE_DEAD
; 
2768         workqueue_unlock(wq
); 
2770         if (wq_tr_rc 
== WQ_RUN_TR_THREAD_NEEDED
) { 
2771                 if (operation 
== WORKQ_THREADREQ_CHANGE_PRI_NO_THREAD_CALL
) { 
2773                 } else if (WQ_TIMER_IMMEDIATE_NEEDED(wq
)) { 
2774                         workqueue_interval_timer_trigger(wq
); 
2782 wqops_thread_return(struct proc 
*p
, struct workqueue 
*wq
) 
2784         thread_t th 
= current_thread(); 
2785         struct uthread 
*uth 
= pthread_kern
->get_bsdthread_info(th
); 
2786         struct threadlist 
*tl 
= pthread_kern
->uthread_get_threadlist(uth
); 
2788         /* reset signal mask on the workqueue thread to default state */ 
2789         if (pthread_kern
->uthread_get_sigmask(uth
) != (sigset_t
)(~workq_threadmask
)) { 
2790                 pthread_kern
->proc_lock(p
); 
2791                 pthread_kern
->uthread_set_sigmask(uth
, ~workq_threadmask
); 
2792                 pthread_kern
->proc_unlock(p
); 
2795         if (wq 
== NULL 
|| !tl
) { 
2799         PTHREAD_TRACE_WQ(TRACE_wq_override_reset 
| DBG_FUNC_START
, tl
->th_workq
, 0, 0, 0, 0); 
2802          * This squash call has neat semantics: it removes the specified overrides, 
2803          * replacing the current requested QoS with the previous effective QoS from 
2804          * those overrides.  This means we won't be preempted due to having our QoS 
2805          * lowered.  Of course, now our understanding of the thread's QoS is wrong, 
2806          * so we'll adjust below. 
2808         bool was_manager 
= (tl
->th_priority 
== WORKQUEUE_EVENT_MANAGER_BUCKET
); 
2812                 new_qos 
= pthread_kern
->proc_usynch_thread_qos_squash_override_for_resource(th
, 
2813                                 THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD
, 
2814                                 THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE
); 
2817         PTHREAD_TRACE_WQ(TRACE_wq_runitem 
| DBG_FUNC_END
, wq
, tl
->th_priority
, 0, 0, 0); 
2819         workqueue_lock_spin(wq
); 
2821         if (tl
->th_flags 
& TH_LIST_KEVENT_BOUND
) { 
2822                 unsigned int flags 
= KEVENT_FLAG_WORKQ
; 
2824                         flags 
|= KEVENT_FLAG_WORKQ_MANAGER
; 
2827                 tl
->th_flags 
|= TH_LIST_UNBINDING
; 
2828                 workqueue_unlock(wq
); 
2829                 kevent_qos_internal_unbind(p
, class_index_get_thread_qos(tl
->th_priority
), th
, flags
); 
2830                 if (!(tl
->th_flags 
& TH_LIST_UNBINDING
)) { 
2831                         _setup_wqthread(p
, th
, wq
, tl
, WQ_SETUP_CLEAR_VOUCHER
); 
2832                         pthread_kern
->unix_syscall_return(EJUSTRETURN
); 
2833                         __builtin_unreachable(); 
2835                 workqueue_lock_spin(wq
); 
2836                 tl
->th_flags 
&= ~(TH_LIST_KEVENT_BOUND 
| TH_LIST_UNBINDING
); 
2840                 /* Fix up counters from the squash operation. */ 
2841                 uint8_t old_bucket 
= tl
->th_priority
; 
2842                 uint8_t new_bucket 
= thread_qos_get_class_index(new_qos
); 
2844                 if (old_bucket 
!= new_bucket
) { 
2845                         _wq_thactive_move(wq
, old_bucket
, new_bucket
); 
2846                         wq
->wq_thscheduled_count
[old_bucket
]--; 
2847                         wq
->wq_thscheduled_count
[new_bucket
]++; 
2849                         PTHREAD_TRACE_WQ(TRACE_wq_thread_squash 
| DBG_FUNC_NONE
, wq
, tl
->th_priority
, new_bucket
, 0, 0); 
2850                         tl
->th_priority 
= new_bucket
; 
2851                         PTHREAD_TRACE_WQ(TRACE_wq_override_reset 
| DBG_FUNC_END
, tl
->th_workq
, new_qos
, 0, 0, 0); 
2855         workqueue_run_threadreq_and_unlock(p
, wq
, tl
, NULL
, false); 
2860  * Multiplexed call to interact with the workqueue mechanism 
2863 _workq_kernreturn(struct proc 
*p
, 
2870         struct workqueue 
*wq
; 
2873         if (pthread_kern
->proc_get_register(p
) == 0) { 
2878         case WQOPS_QUEUE_NEWSPISUPP
: { 
2880                  * arg2 = offset of serialno into dispatch queue 
2881                  * arg3 = kevent support 
2885                         // If we get here, then userspace has indicated support for kevent delivery. 
2888                 pthread_kern
->proc_set_dispatchqueue_serialno_offset(p
, (uint64_t)offset
); 
2891         case WQOPS_QUEUE_REQTHREADS
: { 
2893                  * arg2 = number of threads to start 
2896                 error 
= wqops_queue_reqthreads(p
, arg2
, arg3
); 
2899         case WQOPS_SET_EVENT_MANAGER_PRIORITY
: { 
2901                  * arg2 = priority for the manager thread 
2903                  * if _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG is set, the 
2904                  * ~_PTHREAD_PRIORITY_FLAGS_MASK contains a scheduling priority instead 
2907                 pthread_priority_t pri 
= arg2
; 
2909                 wq 
= (struct workqueue 
*)pthread_kern
->proc_get_wqptr(p
); 
2914                 workqueue_lock_spin(wq
); 
2915                 if (pri 
& _PTHREAD_PRIORITY_SCHED_PRI_FLAG
){ 
2917                          * If userspace passes a scheduling priority, that takes precidence 
2918                          * over any QoS.  (So, userspace should take care not to accidenatally 
2919                          * lower the priority this way.) 
2921                         uint32_t sched_pri 
= pri 
& _PTHREAD_PRIORITY_SCHED_PRI_MASK
; 
2922                         if (wq
->wq_event_manager_priority 
& _PTHREAD_PRIORITY_SCHED_PRI_FLAG
){ 
2923                                 wq
->wq_event_manager_priority 
= MAX(sched_pri
, wq
->wq_event_manager_priority 
& _PTHREAD_PRIORITY_SCHED_PRI_MASK
) 
2924                                                 | _PTHREAD_PRIORITY_SCHED_PRI_FLAG 
| _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
; 
2926                                 wq
->wq_event_manager_priority 
= sched_pri
 
2927                                                 | _PTHREAD_PRIORITY_SCHED_PRI_FLAG 
| _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
; 
2929                 } else if ((wq
->wq_event_manager_priority 
& _PTHREAD_PRIORITY_SCHED_PRI_FLAG
) == 0){ 
2930                         int cur_qos 
= pthread_priority_get_thread_qos(wq
->wq_event_manager_priority
); 
2931                         int new_qos 
= pthread_priority_get_thread_qos(pri
); 
2932                         wq
->wq_event_manager_priority 
= (uint32_t)thread_qos_get_pthread_priority(MAX(cur_qos
, new_qos
)) | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG
; 
2934                 workqueue_unlock(wq
); 
2937         case WQOPS_THREAD_KEVENT_RETURN
: 
2938         case WQOPS_THREAD_WORKLOOP_RETURN
: 
2939                 wq 
= (struct workqueue 
*)pthread_kern
->proc_get_wqptr(p
); 
2940                 PTHREAD_TRACE_WQ(TRACE_wq_runthread 
| DBG_FUNC_END
, wq
, options
, 0, 0, 0); 
2941                 if (item 
!= 0 && arg2 
!= 0) { 
2942                         int32_t kevent_retval
; 
2944                         if (options 
== WQOPS_THREAD_KEVENT_RETURN
) { 
2945                                 ret 
= kevent_qos_internal(p
, -1, item
, arg2
, item
, arg2
, NULL
, NULL
, 
2946                                                 KEVENT_FLAG_WORKQ 
| KEVENT_FLAG_IMMEDIATE 
| KEVENT_FLAG_ERROR_EVENTS
, 
2948                         } else /* options == WQOPS_THREAD_WORKLOOP_RETURN */ { 
2949                                 kqueue_id_t kevent_id 
= -1; 
2950                                 ret 
= kevent_id_internal(p
, &kevent_id
, item
, arg2
, item
, arg2
, 
2952                                                 KEVENT_FLAG_WORKLOOP 
| KEVENT_FLAG_IMMEDIATE 
| KEVENT_FLAG_ERROR_EVENTS
, 
2956                          * We shouldn't be getting more errors out than events we put in, so 
2957                          * reusing the input buffer should always provide enough space.  But, 
2958                          * the assert is commented out since we get errors in edge cases in the 
2959                          * process lifecycle. 
2961                         //assert(ret == KERN_SUCCESS && kevent_retval >= 0); 
2962                         if (ret 
!= KERN_SUCCESS
){ 
2965                         } else if (kevent_retval 
> 0){ 
2966                                 assert(kevent_retval 
<= arg2
); 
2967                                 *retval 
= kevent_retval
; 
2974         case WQOPS_THREAD_RETURN
: 
2975                 wq 
= (struct workqueue 
*)pthread_kern
->proc_get_wqptr(p
); 
2976                 PTHREAD_TRACE_WQ(TRACE_wq_runthread 
| DBG_FUNC_END
, wq
, options
, 0, 0, 0); 
2978                 error 
= wqops_thread_return(p
, wq
); 
2979                 // NOT REACHED except in case of error 
2983         case WQOPS_SHOULD_NARROW
: { 
2985                  * arg2 = priority to test 
2988                 pthread_priority_t priority 
= arg2
; 
2989                 thread_t th 
= current_thread(); 
2990                 struct threadlist 
*tl 
= util_get_thread_threadlist_entry(th
); 
2992                 if (tl 
== NULL 
|| (tl
->th_flags 
& TH_LIST_CONSTRAINED
) == 0) { 
2997                 int class = pthread_priority_get_class_index(priority
); 
2999                 workqueue_lock_spin(wq
); 
3000                 bool should_narrow 
= !may_start_constrained_thread(wq
, class, tl
, false); 
3001                 workqueue_unlock(wq
); 
3003                 *retval 
= should_narrow
; 
3012         case WQOPS_THREAD_KEVENT_RETURN
: 
3013         case WQOPS_THREAD_WORKLOOP_RETURN
: 
3014         case WQOPS_THREAD_RETURN
: 
3015                 PTHREAD_TRACE_WQ(TRACE_wq_runthread 
| DBG_FUNC_START
, wq
, options
, 0, 0, 0); 
3022  * We have no work to do, park ourselves on the idle list. 
3024  * Consumes the workqueue lock and does not return. 
3027 parkit(struct workqueue 
*wq
, struct threadlist 
*tl
, thread_t thread
) 
3029         assert(thread 
== tl
->th_thread
); 
3030         assert(thread 
== current_thread()); 
3032         PTHREAD_TRACE_WQ(TRACE_wq_thread_park 
| DBG_FUNC_START
, wq
, 0, 0, 0, 0); 
3034         uint32_t us_to_wait 
= 0; 
3036         TAILQ_REMOVE(&wq
->wq_thrunlist
, tl
, th_entry
); 
3038         tl
->th_flags 
&= ~TH_LIST_RUNNING
; 
3039         tl
->th_flags 
&= ~TH_LIST_KEVENT
; 
3040         assert((tl
->th_flags 
& TH_LIST_KEVENT_BOUND
) == 0); 
3042         if (tl
->th_flags 
& TH_LIST_CONSTRAINED
) { 
3043                 wq
->wq_constrained_threads_scheduled
--; 
3044                 tl
->th_flags 
&= ~TH_LIST_CONSTRAINED
; 
3047         _wq_thactive_dec(wq
, tl
->th_priority
); 
3048         wq
->wq_thscheduled_count
[tl
->th_priority
]--; 
3049         wq
->wq_threads_scheduled
--; 
3050         uint32_t thidlecount 
= ++wq
->wq_thidlecount
; 
3052         pthread_kern
->thread_sched_call(thread
, NULL
); 
3055          * We'd like to always have one manager thread parked so that we can have 
3056          * low latency when we need to bring a manager thread up.  If that idle 
3057          * thread list is empty, make this thread a manager thread. 
3059          * XXX: This doesn't check that there's not a manager thread outstanding, 
3060          * so it's based on the assumption that most manager callouts will change 
3061          * their QoS before parking.  If that stops being true, this may end up 
3062          * costing us more than we gain. 
3064         if (TAILQ_EMPTY(&wq
->wq_thidlemgrlist
) && 
3065                         tl
->th_priority 
!= WORKQUEUE_EVENT_MANAGER_BUCKET
){ 
3066                 PTHREAD_TRACE_WQ(TRACE_wq_thread_reset_priority 
| DBG_FUNC_NONE
, 
3067                                         wq
, thread_tid(thread
), 
3068                                         (tl
->th_priority 
<< 16) | WORKQUEUE_EVENT_MANAGER_BUCKET
, 2, 0); 
3069                 reset_priority(tl
, pthread_priority_from_wq_class_index(wq
, WORKQUEUE_EVENT_MANAGER_BUCKET
)); 
3070                 tl
->th_priority 
= WORKQUEUE_EVENT_MANAGER_BUCKET
; 
3073         if (tl
->th_priority 
== WORKQUEUE_EVENT_MANAGER_BUCKET
){ 
3074                 TAILQ_INSERT_HEAD(&wq
->wq_thidlemgrlist
, tl
, th_entry
); 
3076                 TAILQ_INSERT_HEAD(&wq
->wq_thidlelist
, tl
, th_entry
); 
3080          * When we remove the voucher from the thread, we may lose our importance 
3081          * causing us to get preempted, so we do this after putting the thread on 
3082          * the idle list.  That when, when we get our importance back we'll be able 
3083          * to use this thread from e.g. the kevent call out to deliver a boosting 
3086         tl
->th_flags 
|= TH_LIST_REMOVING_VOUCHER
; 
3087         workqueue_unlock(wq
); 
3088         if (pthread_kern
->thread_will_park_or_terminate
) { 
3089                 pthread_kern
->thread_will_park_or_terminate(tl
->th_thread
); 
3091         __assert_only kern_return_t kr
; 
3092         kr 
= pthread_kern
->thread_set_voucher_name(MACH_PORT_NULL
); 
3093         assert(kr 
== KERN_SUCCESS
); 
3094         workqueue_lock_spin(wq
); 
3095         tl
->th_flags 
&= ~(TH_LIST_REMOVING_VOUCHER
); 
3097         if ((tl
->th_flags 
& TH_LIST_RUNNING
) == 0) { 
3098                 if (thidlecount 
< 101) { 
3099                         us_to_wait 
= wq_reduce_pool_window_usecs 
- ((thidlecount
-2) * (wq_reduce_pool_window_usecs 
/ 100)); 
3101                         us_to_wait 
= wq_reduce_pool_window_usecs 
/ 100; 
3104                 thread_set_pending_block_hint(thread
, kThreadWaitParkedWorkQueue
); 
3105                 assert_wait_timeout_with_leeway((caddr_t
)tl
, (THREAD_INTERRUPTIBLE
), 
3106                                 TIMEOUT_URGENCY_SYS_BACKGROUND
|TIMEOUT_URGENCY_LEEWAY
, us_to_wait
, 
3107                                 wq_reduce_pool_window_usecs
/10, NSEC_PER_USEC
); 
3109                 workqueue_unlock(wq
); 
3111                 thread_block(wq_unpark_continue
); 
3112                 panic("thread_block(wq_unpark_continue) returned!"); 
3114                 workqueue_unlock(wq
); 
3117                  * While we'd dropped the lock to unset our voucher, someone came 
3118                  * around and made us runnable.  But because we weren't waiting on the 
3119                  * event their wakeup() was ineffectual.  To correct for that, we just 
3120                  * run the continuation ourselves. 
3122                 wq_unpark_continue(NULL
, THREAD_AWAKENED
); 
3127 may_start_constrained_thread(struct workqueue 
*wq
, uint32_t at_priclass
, 
3128                 struct threadlist 
*tl
, bool may_start_timer
) 
3130         uint32_t req_qos 
= _wq_thactive_best_constrained_req_qos(wq
); 
3131         wq_thactive_t thactive
; 
3133         if (may_start_timer 
&& at_priclass 
< req_qos
) { 
3135                  * When called from workqueue_run_threadreq_and_unlock() pre-post newest 
3136                  * higher priorities into the thactive state so that 
3137                  * workqueue_callback() takes the right decision. 
3139                  * If the admission check passes, workqueue_run_threadreq_and_unlock 
3140                  * will reset this value before running the request. 
3142                 thactive 
= _wq_thactive_set_best_constrained_req_qos(wq
, req_qos
, 
3145                 PTHREAD_TRACE_WQ(TRACE_wq_thactive_update
, 1, (uint64_t)thactive
, 
3146                                 (uint64_t)(thactive 
>> 64), 0, 0); 
3149                 thactive 
= _wq_thactive(wq
); 
3152         uint32_t constrained_threads 
= wq
->wq_constrained_threads_scheduled
; 
3153         if (tl 
&& (tl
->th_flags 
& TH_LIST_CONSTRAINED
)) { 
3155                  * don't count the current thread as scheduled 
3157                 constrained_threads
--; 
3159         if (constrained_threads 
>= wq_max_constrained_threads
) { 
3160                 PTHREAD_TRACE_WQ(TRACE_wq_constrained_admission 
| DBG_FUNC_NONE
, wq
, 1, 
3161                                 wq
->wq_constrained_threads_scheduled
, 
3162                                 wq_max_constrained_threads
, 0); 
3164                  * we need 1 or more constrained threads to return to the kernel before 
3165                  * we can dispatch additional work 
3171          * Compute a metric for many how many threads are active.  We find the 
3172          * highest priority request outstanding and then add up the number of 
3173          * active threads in that and all higher-priority buckets.  We'll also add 
3174          * any "busy" threads which are not active but blocked recently enough that 
3175          * we can't be sure they've gone idle yet.  We'll then compare this metric 
3176          * to our max concurrency to decide whether to add a new thread. 
3179         uint32_t busycount
, thactive_count
; 
3181         thactive_count 
= _wq_thactive_aggregate_downto_qos(wq
, thactive
, 
3182                         at_priclass
, &busycount
, NULL
); 
3184         if (tl 
&& tl
->th_priority 
<= at_priclass
) { 
3186                  * don't count this thread as currently active 
3188                 assert(thactive_count 
> 0); 
3192         if (thactive_count 
+ busycount 
< wq_max_concurrency
[at_priclass
]) { 
3193                 PTHREAD_TRACE_WQ(TRACE_wq_constrained_admission 
| DBG_FUNC_NONE
, wq
, 2, 
3194                                 thactive_count
, busycount
, 0); 
3197                 PTHREAD_TRACE_WQ(TRACE_wq_constrained_admission 
| DBG_FUNC_NONE
, wq
, 3, 
3198                                 thactive_count
, busycount
, 0); 
3201         if (busycount 
&& may_start_timer
) { 
3203                  * If this is called from the add timer, we won't have another timer 
3204                  * fire when the thread exits the "busy" state, so rearm the timer. 
3206                 if (WQ_TIMER_DELAYED_NEEDED(wq
)) { 
3207                         workqueue_interval_timer_start(wq
); 
3214 static struct threadlist 
* 
3215 pop_from_thidlelist(struct workqueue 
*wq
, uint32_t priclass
) 
3217         assert(wq
->wq_thidlecount
); 
3219         struct threadlist 
*tl 
= NULL
; 
3221         if (!TAILQ_EMPTY(&wq
->wq_thidlemgrlist
) && 
3222                         (priclass 
== WORKQUEUE_EVENT_MANAGER_BUCKET 
|| TAILQ_EMPTY(&wq
->wq_thidlelist
))){ 
3223                 tl 
= TAILQ_FIRST(&wq
->wq_thidlemgrlist
); 
3224                 TAILQ_REMOVE(&wq
->wq_thidlemgrlist
, tl
, th_entry
); 
3225                 assert(tl
->th_priority 
== WORKQUEUE_EVENT_MANAGER_BUCKET
); 
3226         } else if (!TAILQ_EMPTY(&wq
->wq_thidlelist
) && 
3227                         (priclass 
!= WORKQUEUE_EVENT_MANAGER_BUCKET 
|| TAILQ_EMPTY(&wq
->wq_thidlemgrlist
))){ 
3228                 tl 
= TAILQ_FIRST(&wq
->wq_thidlelist
); 
3229                 TAILQ_REMOVE(&wq
->wq_thidlelist
, tl
, th_entry
); 
3230                 assert(tl
->th_priority 
!= WORKQUEUE_EVENT_MANAGER_BUCKET
); 
3232                 panic("pop_from_thidlelist called with no threads available"); 
3234         assert((tl
->th_flags 
& TH_LIST_RUNNING
) == 0); 
3236         assert(wq
->wq_thidlecount
); 
3237         wq
->wq_thidlecount
--; 
3239         TAILQ_INSERT_TAIL(&wq
->wq_thrunlist
, tl
, th_entry
); 
3241         tl
->th_flags 
|= TH_LIST_RUNNING 
| TH_LIST_BUSY
; 
3243         wq
->wq_threads_scheduled
++; 
3244         wq
->wq_thscheduled_count
[priclass
]++; 
3245         _wq_thactive_inc(wq
, priclass
); 
3249 static pthread_priority_t
 
3250 pthread_priority_from_wq_class_index(struct workqueue 
*wq
, int index
) 
3252         if (index 
== WORKQUEUE_EVENT_MANAGER_BUCKET
){ 
3253                 return wq
->wq_event_manager_priority
; 
3255                 return class_index_get_pthread_priority(index
); 
3260 reset_priority(struct threadlist 
*tl
, pthread_priority_t pri
) 
3263         thread_t th 
= tl
->th_thread
; 
3265         if ((pri 
& _PTHREAD_PRIORITY_SCHED_PRI_FLAG
) == 0){ 
3266                 ret 
= pthread_kern
->thread_set_workq_qos(th
, pthread_priority_get_thread_qos(pri
), 0); 
3267                 assert(ret 
== KERN_SUCCESS 
|| ret 
== KERN_TERMINATED
); 
3269                 if (tl
->th_flags 
& TH_LIST_EVENT_MGR_SCHED_PRI
) { 
3271                         /* Reset priority to default (masked by QoS) */ 
3273                         ret 
= pthread_kern
->thread_set_workq_pri(th
, 31, POLICY_TIMESHARE
); 
3274                         assert(ret 
== KERN_SUCCESS 
|| ret 
== KERN_TERMINATED
); 
3276                         tl
->th_flags 
&= ~TH_LIST_EVENT_MGR_SCHED_PRI
; 
3279                 ret 
= pthread_kern
->thread_set_workq_qos(th
, THREAD_QOS_UNSPECIFIED
, 0); 
3280                 assert(ret 
== KERN_SUCCESS 
|| ret 
== KERN_TERMINATED
); 
3281                 ret 
= pthread_kern
->thread_set_workq_pri(th
, (pri 
& (~_PTHREAD_PRIORITY_FLAGS_MASK
)), POLICY_TIMESHARE
); 
3282                 assert(ret 
== KERN_SUCCESS 
|| ret 
== KERN_TERMINATED
); 
3284                 tl
->th_flags 
|= TH_LIST_EVENT_MGR_SCHED_PRI
; 
3289  * Picks the best request to run, and returns the best overcommit fallback 
3290  * if the best pick is non overcommit and risks failing its admission check. 
3292 static struct threadreq 
* 
3293 workqueue_best_threadreqs(struct workqueue 
*wq
, struct threadlist 
*tl
, 
3294                 struct threadreq 
**fallback
) 
3296         struct threadreq 
*req
, *best_req 
= NULL
; 
3297         int priclass
, prilimit
; 
3299         if ((wq
->wq_event_manager_threadreq
.tr_state 
== TR_STATE_WAITING
) && 
3300                         ((wq
->wq_thscheduled_count
[WORKQUEUE_EVENT_MANAGER_BUCKET
] == 0) || 
3301                         (tl 
&& tl
->th_priority 
== WORKQUEUE_EVENT_MANAGER_BUCKET
))) { 
3303                  * There's an event manager request and either: 
3304                  *   - no event manager currently running 
3305                  *   - we are re-using the event manager 
3307                 req 
= &wq
->wq_event_manager_threadreq
; 
3308                 PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_req_select 
| DBG_FUNC_NONE
, wq
, req
, 1, 0, 0); 
3313                 prilimit 
= WORKQUEUE_EVENT_MANAGER_BUCKET
; 
3315                 prilimit 
= _wq_highest_paced_priority(wq
); 
3317         for (priclass 
= 0; priclass 
< prilimit
; priclass
++) { 
3318                 req 
= TAILQ_FIRST(&wq
->wq_overcommit_reqlist
[priclass
]); 
3320                         PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_req_select 
| DBG_FUNC_NONE
, wq
, req
, 2, 0, 0); 
3329                         best_req 
= TAILQ_FIRST(&wq
->wq_reqlist
[priclass
]); 
3331                                 PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_req_select 
| DBG_FUNC_NONE
, wq
, best_req
, 3, 0, 0); 
3339  * Runs a thread request on a thread 
3341  * - if thread is THREAD_NULL, will find a thread and run the request there. 
3342  *   Otherwise, the thread must be the current thread. 
3344  * - if req is NULL, will find the highest priority request and run that.  If 
3345  *   it is not NULL, it must be a threadreq object in state NEW.  If it can not 
3346  *   be run immediately, it will be enqueued and moved to state WAITING. 
3348  *   Either way, the thread request object serviced will be moved to state 
3349  *   PENDING and attached to the threadlist. 
3351  *   Should be called with the workqueue lock held.  Will drop it. 
3353  *   WARNING: _workq_kevent_reqthreads needs to be able to preflight any 
3354  *   admission checks in this function.  If you are changing this function, 
3355  *   keep that one up-to-date. 
3357  * - if parking_tl is non NULL, then the current thread is parking. This will 
3358  *   try to reuse this thread for a request. If no match is found, it will be 
3362 workqueue_run_threadreq_and_unlock(proc_t p
, struct workqueue 
*wq
, 
3363                 struct threadlist 
*parking_tl
, struct threadreq 
*req
, 
3364                 bool may_add_new_thread
) 
3366         struct threadreq 
*incoming_req 
= req
; 
3368         struct threadlist 
*tl 
= parking_tl
; 
3369         int rc 
= WQ_RUN_TR_THROTTLED
; 
3371         assert(tl 
== NULL 
|| tl
->th_thread 
== current_thread()); 
3372         assert(req 
== NULL 
|| req
->tr_state 
== TR_STATE_NEW
); 
3373         assert(!may_add_new_thread 
|| !tl
); 
3375         PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq 
| DBG_FUNC_START
, wq
, req
, 
3376                         tl 
? thread_tid(tl
->th_thread
) : 0, 
3377                         req 
? (req
->tr_priority 
<< 16 | req
->tr_flags
) : 0, 0); 
3380          * Special cases when provided an event manager request 
3382         if (req 
&& req
->tr_priority 
== WORKQUEUE_EVENT_MANAGER_BUCKET
) { 
3383                 // Clients must not rely on identity of event manager requests 
3384                 assert(req
->tr_flags 
& TR_FLAG_ONSTACK
); 
3385                 // You can't be both overcommit and event manager 
3386                 assert((req
->tr_flags 
& TR_FLAG_OVERCOMMIT
) == 0); 
3389                  * We can only ever have one event manager request, so coalesce them if 
3390                  * there's already one outstanding. 
3392                 if (wq
->wq_event_manager_threadreq
.tr_state 
== TR_STATE_WAITING
) { 
3393                         PTHREAD_TRACE_WQ_REQ(TRACE_wq_run_threadreq_mgr_merge 
| DBG_FUNC_NONE
, wq
, req
, 0, 0, 0); 
3395                         struct threadreq 
*existing_req 
= &wq
->wq_event_manager_threadreq
; 
3396                         if (req
->tr_flags 
& TR_FLAG_KEVENT
) { 
3397                                 existing_req
->tr_flags 
|= TR_FLAG_KEVENT
; 
3401                         incoming_req 
= NULL
; 
3404                 if (wq
->wq_thscheduled_count
[WORKQUEUE_EVENT_MANAGER_BUCKET
] && 
3405                                 (!tl 
|| tl
->th_priority 
!= WORKQUEUE_EVENT_MANAGER_BUCKET
)){ 
3407                          * There can only be one event manager running at a time. 
3409                         PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq 
| DBG_FUNC_END
, wq
, 1, 0, 0, 0); 
3414 again
: // Start again after creating a thread 
3416         if (_wq_exiting(wq
)) { 
3417                 rc 
= WQ_RUN_TR_EXITING
; 
3422          * Thread request selection and admission control 
3424         struct threadreq 
*fallback 
= NULL
; 
3426                 if ((req
->tr_flags 
& TR_FLAG_NO_PACING
) == 0 && 
3427                                 _wq_should_pace_priority(wq
, req
->tr_priority
)) { 
3429                          * If a request fails the pacing admission check, then thread 
3430                          * requests are redriven when the pacing thread is finally scheduled 
3431                          * when it calls _wq_pacing_end() in wq_unpark_continue(). 
3435         } else if (wq
->wq_reqcount 
== 0) { 
3436                 PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq 
| DBG_FUNC_END
, wq
, 2, 0, 0, 0); 
3438         } else if ((req 
= workqueue_best_threadreqs(wq
, tl
, &fallback
)) == NULL
) { 
3439                 PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq 
| DBG_FUNC_END
, wq
, 3, 0, 0, 0); 
3443         if ((req
->tr_flags 
& TR_FLAG_OVERCOMMIT
) == 0 && 
3444                         (req
->tr_priority 
< WORKQUEUE_EVENT_MANAGER_BUCKET
)) { 
3445                 if (!may_start_constrained_thread(wq
, req
->tr_priority
, parking_tl
, true)) { 
3447                                 PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq 
| DBG_FUNC_END
, wq
, 4, 0, 0, 0); 
3450                         assert(req
->tr_state 
== TR_STATE_WAITING
); 
3459                 if (tl
->th_priority 
!= req
->tr_priority
) { 
3460                         _wq_thactive_move(wq
, tl
->th_priority
, req
->tr_priority
); 
3461                         wq
->wq_thscheduled_count
[tl
->th_priority
]--; 
3462                         wq
->wq_thscheduled_count
[req
->tr_priority
]++; 
3464                 PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq_thread_select 
| DBG_FUNC_NONE
, 
3465                                 wq
, 1, thread_tid(tl
->th_thread
), 0, 0); 
3466         } else if (wq
->wq_thidlecount
) { 
3467                 tl 
= pop_from_thidlelist(wq
, req
->tr_priority
); 
3469                  * This call will update wq_thscheduled_count and wq_thactive_count for 
3470                  * the provided priority.  It will not set the returned thread to that 
3471                  * priority.  This matches the behavior of the parking_tl clause above. 
3473                 PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq_thread_select 
| DBG_FUNC_NONE
, 
3474                                 wq
, 2, thread_tid(tl
->th_thread
), 0, 0); 
3475         } else /* no idle threads */ { 
3476                 if (!may_add_new_thread 
|| wq
->wq_nthreads 
>= wq_max_threads
) { 
3477                         PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq 
| DBG_FUNC_END
, wq
, 5, 
3478                                         may_add_new_thread
, wq
->wq_nthreads
, 0); 
3479                         if (wq
->wq_nthreads 
< wq_max_threads
) { 
3480                                 rc 
= WQ_RUN_TR_THREAD_NEEDED
; 
3485                 bool added_thread 
= workqueue_addnewthread(p
, wq
); 
3487                  * workqueue_addnewthread will drop and re-take the lock, so we 
3488                  * need to ensure we still have a cached request. 
3490                  * It also means we have to pick a new request, since our old pick may 
3491                  * not be valid anymore. 
3494                 if (req 
&& (req
->tr_flags 
& TR_FLAG_ONSTACK
)) { 
3495                         _threadreq_copy_prepare(wq
); 
3499                         PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq_thread_select 
| DBG_FUNC_NONE
, 
3502                 } else if (_wq_exiting(wq
)) { 
3503                         rc 
= WQ_RUN_TR_EXITING
; 
3506                         PTHREAD_TRACE_WQ(TRACE_wq_run_threadreq 
| DBG_FUNC_END
, wq
, 6, 0, 0, 0); 
3508                          * Something caused thread creation to fail.  Kick off the timer in 
3509                          * the hope that it'll succeed next time. 
3511                         if (WQ_TIMER_DELAYED_NEEDED(wq
)) { 
3512                                 workqueue_interval_timer_start(wq
); 
3519          * Setup thread, mark request as complete and run with it. 
3521         if (req
->tr_state 
== TR_STATE_WAITING
) { 
3522                 _threadreq_dequeue(wq
, req
); 
3524         if (tl
->th_priority 
!= req
->tr_priority
) { 
3525                 PTHREAD_TRACE_WQ(TRACE_wq_thread_reset_priority 
| DBG_FUNC_NONE
, 
3526                                         wq
, thread_tid(tl
->th_thread
), 
3527                                         (tl
->th_priority 
<< 16) | req
->tr_priority
, 1, 0); 
3528                 reset_priority(tl
, pthread_priority_from_wq_class_index(wq
, req
->tr_priority
)); 
3529                 tl
->th_priority 
= (uint8_t)req
->tr_priority
; 
3531         if (req
->tr_flags 
& TR_FLAG_OVERCOMMIT
) { 
3532                 if ((tl
->th_flags 
& TH_LIST_CONSTRAINED
) != 0) { 
3533                         tl
->th_flags 
&= ~TH_LIST_CONSTRAINED
; 
3534                         wq
->wq_constrained_threads_scheduled
--; 
3537                 if ((tl
->th_flags 
& TH_LIST_CONSTRAINED
) == 0) { 
3538                         tl
->th_flags 
|= TH_LIST_CONSTRAINED
; 
3539                         wq
->wq_constrained_threads_scheduled
++; 
3543         if (!parking_tl 
&& !(req
->tr_flags 
& TR_FLAG_NO_PACING
)) { 
3544                 _wq_pacing_start(wq
, tl
); 
3546         if ((req
->tr_flags 
& TR_FLAG_OVERCOMMIT
) == 0) { 
3547                 uint32_t old_qos
, new_qos
; 
3550                  * If we are scheduling a constrained thread request, we may need to 
3551                  * update the best constrained qos in the thactive atomic state. 
3553                 for (new_qos 
= 0; new_qos 
< WQ_THACTIVE_NO_PENDING_REQUEST
; new_qos
++) { 
3554                         if (TAILQ_FIRST(&wq
->wq_reqlist
[new_qos
])) 
3557                 old_qos 
= _wq_thactive_best_constrained_req_qos(wq
); 
3558                 if (old_qos 
!= new_qos
) { 
3559                         wq_thactive_t v 
= _wq_thactive_set_best_constrained_req_qos(wq
, 
3562                         PTHREAD_TRACE_WQ(TRACE_wq_thactive_update
, 2, (uint64_t)v
, 
3563                                         (uint64_t)(v 
>> 64), 0, 0); 
3565                         PTHREAD_TRACE_WQ(TRACE_wq_thactive_update
, 2, v
, 0, 0, 0); 
3570                 uint32_t upcall_flags 
= WQ_FLAG_THREAD_NEWSPI
; 
3571                 if (req
->tr_flags 
& TR_FLAG_OVERCOMMIT
) 
3572                         upcall_flags 
|= WQ_FLAG_THREAD_OVERCOMMIT
; 
3573                 if (req
->tr_flags 
& TR_FLAG_KEVENT
) 
3574                         upcall_flags 
|= WQ_FLAG_THREAD_KEVENT
; 
3575                 if (req
->tr_flags 
& TR_FLAG_WORKLOOP
) 
3576                         upcall_flags 
|= WQ_FLAG_THREAD_WORKLOOP 
| WQ_FLAG_THREAD_KEVENT
; 
3577                 if (tl
->th_priority 
== WORKQUEUE_EVENT_MANAGER_BUCKET
) 
3578                         upcall_flags 
|= WQ_FLAG_THREAD_EVENT_MANAGER
; 
3579                 tl
->th_upcall_flags 
= upcall_flags 
>> WQ_FLAG_THREAD_PRIOSHIFT
; 
3581         if (req
->tr_flags 
& TR_FLAG_KEVENT
) { 
3582                 tl
->th_flags 
|= TH_LIST_KEVENT
; 
3584                 tl
->th_flags 
&= ~TH_LIST_KEVENT
; 
3586         return _threadreq_complete_and_unlock(p
, wq
, req
, tl
); 
3590                 _threadreq_enqueue(wq
, incoming_req
); 
3595         if (parking_tl 
&& !(parking_tl
->th_flags 
& TH_LIST_UNBINDING
)) { 
3596                 parkit(wq
, parking_tl
, parking_tl
->th_thread
); 
3597                 __builtin_unreachable(); 
3600         workqueue_unlock(wq
); 
3606  * parked thread wakes up 
3609 wq_unpark_continue(void* __unused ptr
, wait_result_t wait_result
) 
3611         boolean_t first_use 
= false; 
3612         thread_t th 
= current_thread(); 
3613         proc_t p 
= current_proc(); 
3615         struct uthread 
*uth 
= pthread_kern
->get_bsdthread_info(th
); 
3616         if (uth 
== NULL
) goto done
; 
3618         struct workqueue 
*wq 
= pthread_kern
->proc_get_wqptr(p
); 
3619         if (wq 
== NULL
) goto done
; 
3621         workqueue_lock_spin(wq
); 
3623         struct threadlist 
*tl 
= pthread_kern
->uthread_get_threadlist(uth
); 
3624         assert(tl 
!= WQ_THREADLIST_EXITING_POISON
); 
3627                  * We woke up before addnewthread() was finished setting us up.  Go 
3628                  * ahead and exit, but before we do poison the threadlist variable so 
3629                  * that addnewthread() doesn't think we are valid still. 
3631                 pthread_kern
->uthread_set_threadlist(uth
, WQ_THREADLIST_EXITING_POISON
); 
3632                 workqueue_unlock(wq
); 
3636         assert(tl
->th_flags 
& TH_LIST_INITED
); 
3638         if ((tl
->th_flags 
& TH_LIST_NEW
)){ 
3639                 tl
->th_flags 
&= ~(TH_LIST_NEW
); 
3643         if ((tl
->th_flags 
& (TH_LIST_RUNNING 
| TH_LIST_BUSY
)) == TH_LIST_RUNNING
) { 
3645                  * The normal wakeup path. 
3647                 goto return_to_user
; 
3650         if ((tl
->th_flags 
& TH_LIST_RUNNING
) == 0 && 
3651                         wait_result 
== THREAD_TIMED_OUT 
&& 
3652                         tl
->th_priority 
== WORKQUEUE_EVENT_MANAGER_BUCKET 
&& 
3653                         TAILQ_FIRST(&wq
->wq_thidlemgrlist
) == tl 
&& 
3654                         TAILQ_NEXT(tl
, th_entry
) == NULL
){ 
3656                  * If we are the only idle manager and we pop'ed for self-destruction, 
3657                  * then don't actually exit.  Instead, free our stack to save some 
3658                  * memory and re-park. 
3661                 workqueue_unlock(wq
); 
3663                 vm_map_t vmap 
= wq
->wq_map
; 
3665                 // Keep this in sync with _setup_wqthread() 
3666                 const vm_size_t       guardsize 
= vm_map_page_size(vmap
); 
3667                 const user_addr_t     freeaddr 
= (user_addr_t
)tl
->th_stackaddr 
+ guardsize
; 
3668                 const vm_map_offset_t freesize 
= vm_map_trunc_page_mask((PTH_DEFAULT_STACKSIZE 
+ guardsize 
+ PTHREAD_T_OFFSET
) - 1, vm_map_page_mask(vmap
)) - guardsize
; 
3670                 __assert_only 
int kr 
= mach_vm_behavior_set(vmap
, freeaddr
, freesize
, VM_BEHAVIOR_REUSABLE
); 
3672                 if (kr 
!= KERN_SUCCESS 
&& kr 
!= KERN_INVALID_ADDRESS
) { 
3673                         os_log_error(OS_LOG_DEFAULT
, "unable to make thread stack reusable (kr: %d)", kr
); 
3677                 workqueue_lock_spin(wq
); 
3679                 if ( !(tl
->th_flags 
& TH_LIST_RUNNING
)) { 
3680                         thread_set_pending_block_hint(th
, kThreadWaitParkedWorkQueue
); 
3681                         assert_wait((caddr_t
)tl
, (THREAD_INTERRUPTIBLE
)); 
3683                         workqueue_unlock(wq
); 
3685                         thread_block(wq_unpark_continue
); 
3686                         __builtin_unreachable(); 
3690         if ((tl
->th_flags 
& TH_LIST_RUNNING
) == 0) { 
3691                 assert((tl
->th_flags 
& TH_LIST_BUSY
) == 0); 
3693                         PTHREAD_TRACE_WQ(TRACE_wq_thread_park 
| DBG_FUNC_END
, wq
, 0, 0, 0, 0); 
3696                  * We were set running, but not for the purposes of actually running. 
3697                  * This could be because the timer elapsed.  Or it could be because the 
3698                  * thread aborted.  Either way, we need to return to userspace to exit. 
3700                  * The call to workqueue_removethread will consume the lock. 
3704                                 (tl
->th_priority 
< qos_class_get_class_index(WQ_THREAD_CLEANUP_QOS
) || 
3705                                 (tl
->th_priority 
== WORKQUEUE_EVENT_MANAGER_BUCKET
))) { 
3706                         // Reset the QoS to something low for the pthread cleanup 
3707                         PTHREAD_TRACE_WQ(TRACE_wq_thread_reset_priority 
| DBG_FUNC_NONE
, 
3709                                                 (tl
->th_priority 
<< 16) | qos_class_get_class_index(WQ_THREAD_CLEANUP_QOS
), 3, 0); 
3710                         pthread_priority_t cleanup_pri 
= _pthread_priority_make_newest(WQ_THREAD_CLEANUP_QOS
, 0, 0); 
3711                         reset_priority(tl
, cleanup_pri
); 
3714                 workqueue_removethread(tl
, 0, first_use
); 
3717                         pthread_kern
->thread_bootstrap_return(); 
3719                         pthread_kern
->unix_syscall_return(0); 
3721                 __builtin_unreachable(); 
3725          * The timer woke us up or the thread was aborted.  However, we have 
3726          * already started to make this a runnable thread.  Wait for that to 
3727          * finish, then continue to userspace. 
3729         while ((tl
->th_flags 
& TH_LIST_BUSY
)) { 
3730                 assert_wait((caddr_t
)tl
, (THREAD_UNINT
)); 
3732                 workqueue_unlock(wq
); 
3734                 thread_block(THREAD_CONTINUE_NULL
); 
3736                 workqueue_lock_spin(wq
); 
3741                 PTHREAD_TRACE_WQ(TRACE_wq_thread_park 
| DBG_FUNC_END
, wq
, 0, 0, 0, 0); 
3743         if (_wq_pacing_end(wq
, tl
) && wq
->wq_reqcount
) { 
3744                 workqueue_run_threadreq_and_unlock(p
, wq
, NULL
, NULL
, true); 
3746                 workqueue_unlock(wq
); 
3748         _setup_wqthread(p
, th
, wq
, tl
, first_use 
? WQ_SETUP_FIRST_USE 
: 0); 
3749         pthread_kern
->thread_sched_call(th
, workqueue_callback
); 
3752                 pthread_kern
->thread_bootstrap_return(); 
3754                 pthread_kern
->unix_syscall_return(EJUSTRETURN
); 
3756         panic("Our attempt to return to userspace failed..."); 
3760  * configures initial thread stack/registers to jump into: 
3761  * _pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *keventlist, int upcall_flags, int nkevents); 
3762  * to get there we jump through assembily stubs in pthread_asm.s.  Those 
3763  * routines setup a stack frame, using the current stack pointer, and marshall 
3764  * arguments from registers to the stack as required by the ABI. 
3766  * One odd thing we do here is to start the pthread_t 4k below what would be the 
3767  * top of the stack otherwise.  This is because usually only the first 4k of the 
3768  * pthread_t will be used and so we want to put it on the same 16k page as the 
3769  * top of the stack to save memory. 
3771  * When we are done the stack will look like: 
3772  * |-----------| th_stackaddr + th_allocsize 
3773  * |pthread_t  | th_stackaddr + DEFAULT_STACKSIZE + guardsize + PTHREAD_STACK_OFFSET 
3774  * |kevent list| optionally - at most WQ_KEVENT_LIST_LEN events 
3775  * |kevent data| optionally - at most WQ_KEVENT_DATA_SIZE bytes 
3776  * |stack gap  | bottom aligned to 16 bytes, and at least as big as stack_gap_min 
3780  * |guard page | guardsize 
3781  * |-----------| th_stackaddr 
3784 _setup_wqthread(proc_t p
, thread_t th
, struct workqueue 
*wq
, 
3785                 struct threadlist 
*tl
, int setup_flags
) 
3788         if (setup_flags 
& WQ_SETUP_CLEAR_VOUCHER
) { 
3790                  * For preemption reasons, we want to reset the voucher as late as 
3791                  * possible, so we do it in two places: 
3792                  *   - Just before parking (i.e. in parkit()) 
3793                  *   - Prior to doing the setup for the next workitem (i.e. here) 
3795                  * Those two places are sufficient to ensure we always reset it before 
3796                  * it goes back out to user space, but be careful to not break that 
3799                 __assert_only kern_return_t kr
; 
3800                 kr 
= pthread_kern
->thread_set_voucher_name(MACH_PORT_NULL
); 
3801                 assert(kr 
== KERN_SUCCESS
); 
3804         uint32_t upcall_flags 
= tl
->th_upcall_flags 
<< WQ_FLAG_THREAD_PRIOSHIFT
; 
3805         if (!(setup_flags 
& WQ_SETUP_FIRST_USE
)) { 
3806                 upcall_flags 
|= WQ_FLAG_THREAD_REUSE
; 
3810          * Put the QoS class value into the lower bits of the reuse_thread register, this is where 
3811          * the thread priority used to be stored anyway. 
3813         pthread_priority_t priority 
= pthread_priority_from_wq_class_index(wq
, tl
->th_priority
); 
3814         upcall_flags 
|= (_pthread_priority_get_qos_newest(priority
) & WQ_FLAG_THREAD_PRIOMASK
); 
3816         const vm_size_t guardsize 
= vm_map_page_size(tl
->th_workq
->wq_map
); 
3817         const vm_size_t stack_gap_min 
= (proc_is64bit(p
) == 0) ? C_32_STK_ALIGN 
: C_64_REDZONE_LEN
; 
3818         const vm_size_t stack_align_min 
= (proc_is64bit(p
) == 0) ? C_32_STK_ALIGN 
: C_64_STK_ALIGN
; 
3820         user_addr_t pthread_self_addr 
= (user_addr_t
)(tl
->th_stackaddr 
+ PTH_DEFAULT_STACKSIZE 
+ guardsize 
+ PTHREAD_T_OFFSET
); 
3821         user_addr_t stack_top_addr 
= (user_addr_t
)((pthread_self_addr 
- stack_gap_min
) & -stack_align_min
); 
3822         user_addr_t stack_bottom_addr 
= (user_addr_t
)(tl
->th_stackaddr 
+ guardsize
); 
3824         user_addr_t wqstart_fnptr 
= pthread_kern
->proc_get_wqthread(p
); 
3825         if (!wqstart_fnptr
) { 
3826                 panic("workqueue thread start function pointer is NULL"); 
3829         if (setup_flags 
& WQ_SETUP_FIRST_USE
) { 
3830                 uint32_t tsd_offset 
= pthread_kern
->proc_get_pthread_tsd_offset(p
); 
3832                         mach_vm_offset_t th_tsd_base 
= (mach_vm_offset_t
)pthread_self_addr 
+ tsd_offset
; 
3833                         kern_return_t kret 
= pthread_kern
->thread_set_tsd_base(th
, th_tsd_base
); 
3834                         if (kret 
== KERN_SUCCESS
) { 
3835                                 upcall_flags 
|= WQ_FLAG_THREAD_TSD_BASE_SET
; 
3840                 * Pre-fault the first page of the new thread's stack and the page that will 
3841                 * contain the pthread_t structure. 
3843                 vm_map_t vmap 
= pthread_kern
->current_map(); 
3844                 if (vm_map_trunc_page_mask((vm_map_offset_t
)(stack_top_addr 
- C_64_REDZONE_LEN
), vm_map_page_mask(vmap
)) != 
3845                                 vm_map_trunc_page_mask((vm_map_offset_t
)pthread_self_addr
, vm_map_page_mask(vmap
))){ 
3847                                         vm_map_trunc_page_mask((vm_map_offset_t
)(stack_top_addr 
- C_64_REDZONE_LEN
), vm_map_page_mask(vmap
)), 
3848                                         VM_PROT_READ 
| VM_PROT_WRITE
, 
3850                                         THREAD_UNINT
, NULL
, 0); 
3853                                 vm_map_trunc_page_mask((vm_map_offset_t
)pthread_self_addr
, vm_map_page_mask(vmap
)), 
3854                                 VM_PROT_READ 
| VM_PROT_WRITE
, 
3856                                 THREAD_UNINT
, NULL
, 0); 
3859         user_addr_t kevent_list 
= NULL
; 
3860         int kevent_count 
= 0; 
3861         if (upcall_flags 
& WQ_FLAG_THREAD_KEVENT
){ 
3862                 bool workloop 
= upcall_flags 
& WQ_FLAG_THREAD_WORKLOOP
; 
3864                 kevent_list 
= pthread_self_addr 
- WQ_KEVENT_LIST_LEN 
* sizeof(struct kevent_qos_s
); 
3865                 kevent_count 
= WQ_KEVENT_LIST_LEN
; 
3867                 user_addr_t kevent_id_addr 
= kevent_list
; 
3870                          * The kevent ID goes just below the kevent list.  Sufficiently new 
3871                          * userspace will know to look there.  Old userspace will just 
3874                         kevent_id_addr 
-= sizeof(kqueue_id_t
); 
3877                 user_addr_t kevent_data_buf 
= kevent_id_addr 
- WQ_KEVENT_DATA_SIZE
; 
3878                 user_size_t kevent_data_available 
= WQ_KEVENT_DATA_SIZE
; 
3880                 int32_t events_out 
= 0; 
3882                 assert(tl
->th_flags 
| TH_LIST_KEVENT_BOUND
); 
3883                 unsigned int flags 
= KEVENT_FLAG_STACK_DATA 
| KEVENT_FLAG_IMMEDIATE
; 
3884                 if (tl
->th_priority 
== WORKQUEUE_EVENT_MANAGER_BUCKET
) { 
3885                         flags 
|= KEVENT_FLAG_WORKQ_MANAGER
; 
3889                         flags 
|= KEVENT_FLAG_WORKLOOP
; 
3890                         kqueue_id_t kevent_id 
= -1; 
3891                         ret 
= kevent_id_internal(p
, &kevent_id
, 
3892                                         NULL
, 0, kevent_list
, kevent_count
, 
3893                                         kevent_data_buf
, &kevent_data_available
, 
3894                                         flags
, &events_out
); 
3895                         copyout(&kevent_id
, kevent_id_addr
, sizeof(kevent_id
)); 
3897                         flags 
|= KEVENT_FLAG_WORKQ
; 
3898                         ret 
= kevent_qos_internal(p
, 
3899                                         class_index_get_thread_qos(tl
->th_priority
), 
3900                                         NULL
, 0, kevent_list
, kevent_count
, 
3901                                         kevent_data_buf
, &kevent_data_available
, 
3902                                         flags
, &events_out
); 
3905                 // squash any errors into just empty output 
3906                 if (ret 
!= KERN_SUCCESS 
|| events_out 
== -1){ 
3908                         kevent_data_available 
= WQ_KEVENT_DATA_SIZE
; 
3911                 // We shouldn't get data out if there aren't events available 
3912                 assert(events_out 
!= 0 || kevent_data_available 
== WQ_KEVENT_DATA_SIZE
); 
3914                 if (events_out 
> 0){ 
3915                         if (kevent_data_available 
== WQ_KEVENT_DATA_SIZE
){ 
3916                                 stack_top_addr 
= (kevent_id_addr 
- stack_gap_min
) & -stack_align_min
; 
3918                                 stack_top_addr 
= (kevent_data_buf 
+ kevent_data_available 
- stack_gap_min
) & -stack_align_min
; 
3921                         kevent_count 
= events_out
; 
3928         PTHREAD_TRACE_WQ(TRACE_wq_runthread 
| DBG_FUNC_START
, wq
, 0, 0, 0, 0); 
3930 #if defined(__i386__) || defined(__x86_64__) 
3931         if (proc_is64bit(p
) == 0) { 
3932                 x86_thread_state32_t state 
= { 
3933                         .eip 
= (unsigned int)wqstart_fnptr
, 
3934                         .eax 
= /* arg0 */ (unsigned int)pthread_self_addr
, 
3935                         .ebx 
= /* arg1 */ (unsigned int)tl
->th_thport
, 
3936                         .ecx 
= /* arg2 */ (unsigned int)stack_bottom_addr
, 
3937                         .edx 
= /* arg3 */ (unsigned int)kevent_list
, 
3938                         .edi 
= /* arg4 */ (unsigned int)upcall_flags
, 
3939                         .esi 
= /* arg5 */ (unsigned int)kevent_count
, 
3941                         .esp 
= (int)((vm_offset_t
)stack_top_addr
), 
3944                 error 
= pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)&state
); 
3945                 if (error 
!= KERN_SUCCESS
) { 
3946                         panic(__func__ 
": thread_set_wq_state failed: %d", error
); 
3949                 x86_thread_state64_t state64 
= { 
3950                         // x86-64 already passes all the arguments in registers, so we just put them in their final place here 
3951                         .rip 
= (uint64_t)wqstart_fnptr
, 
3952                         .rdi 
= (uint64_t)pthread_self_addr
, 
3953                         .rsi 
= (uint64_t)tl
->th_thport
, 
3954                         .rdx 
= (uint64_t)stack_bottom_addr
, 
3955                         .rcx 
= (uint64_t)kevent_list
, 
3956                         .r8  
= (uint64_t)upcall_flags
, 
3957                         .r9  
= (uint64_t)kevent_count
, 
3959                         .rsp 
= (uint64_t)(stack_top_addr
) 
3962                 error 
= pthread_kern
->thread_set_wq_state64(th
, (thread_state_t
)&state64
); 
3963                 if (error 
!= KERN_SUCCESS
) { 
3964                         panic(__func__ 
": thread_set_wq_state failed: %d", error
); 
3968 #error setup_wqthread  not defined for this architecture 
3973 static int wq_kevent_test SYSCTL_HANDLER_ARGS 
{ 
3974         //(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req) 
3975 #pragma unused(oidp, arg1, arg2) 
3977         struct workq_reqthreads_req_s requests
[64] = {}; 
3979         if (req
->newlen 
> sizeof(requests
) || req
->newlen 
< sizeof(struct workq_reqthreads_req_s
)) 
3982         error 
= copyin(req
->newptr
, requests
, req
->newlen
); 
3983         if (error
) return error
; 
3985         _workq_reqthreads(req
->p
, (int)(req
->newlen 
/ sizeof(struct workq_reqthreads_req_s
)), requests
); 
3994 _fill_procworkqueue(proc_t p
, struct proc_workqueueinfo 
* pwqinfo
) 
3996         struct workqueue 
* wq
; 
4000         if ((wq 
= pthread_kern
->proc_get_wqptr(p
)) == NULL
) { 
4005          * This is sometimes called from interrupt context by the kperf sampler. 
4006          * In that case, it's not safe to spin trying to take the lock since we 
4007          * might already hold it.  So, we just try-lock it and error out if it's 
4008          * already held.  Since this is just a debugging aid, and all our callers 
4009          * are able to handle an error, that's fine. 
4011         bool locked 
= workqueue_lock_try(wq
); 
4016         activecount 
= _wq_thactive_aggregate_downto_qos(wq
, _wq_thactive(wq
), 
4017                         WORKQUEUE_NUM_BUCKETS 
- 1, NULL
, NULL
); 
4018         pwqinfo
->pwq_nthreads 
= wq
->wq_nthreads
; 
4019         pwqinfo
->pwq_runthreads 
= activecount
; 
4020         pwqinfo
->pwq_blockedthreads 
= wq
->wq_threads_scheduled 
- activecount
; 
4021         pwqinfo
->pwq_state 
= 0; 
4023         if (wq
->wq_constrained_threads_scheduled 
>= wq_max_constrained_threads
) { 
4024                 pwqinfo
->pwq_state 
|= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT
; 
4027         if (wq
->wq_nthreads 
>= wq_max_threads
) { 
4028                 pwqinfo
->pwq_state 
|= WQ_EXCEEDED_TOTAL_THREAD_LIMIT
; 
4031         workqueue_unlock(wq
); 
4036 _get_pwq_state_kdp(proc_t p
) 
4042         struct workqueue 
*wq 
= pthread_kern
->proc_get_wqptr(p
); 
4044         if (wq 
== NULL 
|| workqueue_lock_spin_is_acquired_kdp(wq
)) { 
4048         uint32_t pwq_state 
= WQ_FLAGS_AVAILABLE
; 
4050         if (wq
->wq_constrained_threads_scheduled 
>= wq_max_constrained_threads
) { 
4051                 pwq_state 
|= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT
; 
4054         if (wq
->wq_nthreads 
>= wq_max_threads
) { 
4055                 pwq_state 
|= WQ_EXCEEDED_TOTAL_THREAD_LIMIT
; 
4062 _thread_selfid(__unused 
struct proc 
*p
, uint64_t *retval
) 
4064         thread_t thread 
= current_thread(); 
4065         *retval 
= thread_tid(thread
); 
4066         return KERN_SUCCESS
; 
4072         pthread_lck_grp_attr 
= lck_grp_attr_alloc_init(); 
4073         pthread_lck_grp 
= lck_grp_alloc_init("pthread", pthread_lck_grp_attr
); 
4076          * allocate the lock attribute for pthread synchronizers 
4078         pthread_lck_attr 
= lck_attr_alloc_init(); 
4080         pthread_list_mlock 
= lck_mtx_alloc_init(pthread_lck_grp
, pthread_lck_attr
); 
4082         pth_global_hashinit(); 
4083         psynch_thcall 
= thread_call_allocate(psynch_wq_cleanup
, NULL
); 
4086         pthread_zone_workqueue 
= zinit(sizeof(struct workqueue
), 
4087                         1024 * sizeof(struct workqueue
), 8192, "pthread.workqueue"); 
4088         pthread_zone_threadlist 
= zinit(sizeof(struct threadlist
), 
4089                         1024 * sizeof(struct threadlist
), 8192, "pthread.threadlist"); 
4090         pthread_zone_threadreq 
= zinit(sizeof(struct threadreq
), 
4091                         1024 * sizeof(struct threadreq
), 8192, "pthread.threadreq"); 
4096         sysctl_register_oid(&sysctl__kern_wq_stalled_window_usecs
); 
4097         sysctl_register_oid(&sysctl__kern_wq_reduce_pool_window_usecs
); 
4098         sysctl_register_oid(&sysctl__kern_wq_max_timer_interval_usecs
); 
4099         sysctl_register_oid(&sysctl__kern_wq_max_threads
); 
4100         sysctl_register_oid(&sysctl__kern_wq_max_constrained_threads
); 
4101         sysctl_register_oid(&sysctl__kern_pthread_debug_tracing
); 
4104         sysctl_register_oid(&sysctl__debug_wq_kevent_test
); 
4107         for (int i 
= 0; i 
< WORKQUEUE_NUM_BUCKETS
; i
++) { 
4108                 uint32_t thread_qos 
= _wq_bucket_to_thread_qos(i
); 
4109                 wq_max_concurrency
[i
] = pthread_kern
->qos_max_parallelism(thread_qos
, 
4110                                 QOS_PARALLELISM_COUNT_LOGICAL
); 
4112         wq_max_concurrency
[WORKQUEUE_EVENT_MANAGER_BUCKET
] = 1;