]> git.saurik.com Git - apple/libpthread.git/blame - kern/kern_support.c
libpthread-218.1.3.tar.gz
[apple/libpthread.git] / kern / kern_support.c
CommitLineData
f1a1da6c
A
1/*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
29/*
30 * pthread_synch.c
31 */
32
964d3577
A
33#pragma mark - Front Matter
34
f1a1da6c
A
35#define _PTHREAD_CONDATTR_T
36#define _PTHREAD_COND_T
37#define _PTHREAD_MUTEXATTR_T
38#define _PTHREAD_MUTEX_T
39#define _PTHREAD_RWLOCKATTR_T
40#define _PTHREAD_RWLOCK_T
41
42#undef pthread_mutexattr_t
43#undef pthread_mutex_t
44#undef pthread_condattr_t
45#undef pthread_cond_t
46#undef pthread_rwlockattr_t
47#undef pthread_rwlock_t
48
2546420a
A
49#include <sys/cdefs.h>
50
51// <rdar://problem/26158937> panic() should be marked noreturn
52extern void panic(const char *string, ...) __printflike(1,2) __dead2;
53
f1a1da6c
A
54#include <sys/param.h>
55#include <sys/queue.h>
56#include <sys/resourcevar.h>
57//#include <sys/proc_internal.h>
58#include <sys/kauth.h>
59#include <sys/systm.h>
60#include <sys/timeb.h>
61#include <sys/times.h>
62#include <sys/acct.h>
63#include <sys/kernel.h>
64#include <sys/wait.h>
65#include <sys/signalvar.h>
66#include <sys/sysctl.h>
67#include <sys/syslog.h>
68#include <sys/stat.h>
69#include <sys/lock.h>
70#include <sys/kdebug.h>
71//#include <sys/sysproto.h>
72#include <sys/vm.h>
73#include <sys/user.h> /* for coredump */
74#include <sys/proc_info.h> /* for fill_procworkqueue */
75
f1a1da6c
A
76#include <mach/mach_port.h>
77#include <mach/mach_types.h>
78#include <mach/semaphore.h>
79#include <mach/sync_policy.h>
80#include <mach/task.h>
81#include <mach/vm_prot.h>
82#include <kern/kern_types.h>
83#include <kern/task.h>
84#include <kern/clock.h>
85#include <mach/kern_return.h>
86#include <kern/thread.h>
87#include <kern/sched_prim.h>
88#include <kern/kalloc.h>
89#include <kern/sched_prim.h> /* for thread_exception_return */
90#include <kern/processor.h>
91#include <kern/assert.h>
92#include <mach/mach_vm.h>
93#include <mach/mach_param.h>
94#include <mach/thread_status.h>
95#include <mach/thread_policy.h>
96#include <mach/message.h>
97#include <mach/port.h>
98//#include <vm/vm_protos.h>
99#include <vm/vm_fault.h>
100#include <vm/vm_map.h>
101#include <mach/thread_act.h> /* for thread_resume */
102#include <machine/machine_routines.h>
964d3577 103#include <mach/shared_region.h>
f1a1da6c
A
104
105#include <libkern/OSAtomic.h>
2546420a 106#include <libkern/libkern.h>
f1a1da6c
A
107
108#include <sys/pthread_shims.h>
109#include "kern_internal.h"
110
f1a1da6c
A
111// XXX: Dirty import for sys/signarvar.h that's wrapped in BSD_KERNEL_PRIVATE
112#define sigcantmask (sigmask(SIGKILL) | sigmask(SIGSTOP))
113
2546420a
A
114// XXX: Ditto for thread tags from kern/thread.h
115#define THREAD_TAG_MAINTHREAD 0x1
116#define THREAD_TAG_PTHREAD 0x10
117#define THREAD_TAG_WORKQUEUE 0x20
118
f1a1da6c
A
119lck_grp_attr_t *pthread_lck_grp_attr;
120lck_grp_t *pthread_lck_grp;
121lck_attr_t *pthread_lck_attr;
122
123extern void thread_set_cthreadself(thread_t thread, uint64_t pself, int isLP64);
124extern void workqueue_thread_yielded(void);
125
2546420a
A
126enum run_nextreq_mode {
127 RUN_NEXTREQ_DEFAULT,
128 RUN_NEXTREQ_DEFAULT_KEVENT,
129 RUN_NEXTREQ_OVERCOMMIT,
130 RUN_NEXTREQ_OVERCOMMIT_KEVENT,
131 RUN_NEXTREQ_DEFERRED_OVERCOMMIT,
132 RUN_NEXTREQ_UNCONSTRAINED,
133 RUN_NEXTREQ_EVENT_MANAGER,
134 RUN_NEXTREQ_ADD_TIMER
135};
136static thread_t workqueue_run_nextreq(proc_t p, struct workqueue *wq, thread_t th,
137 enum run_nextreq_mode mode, pthread_priority_t prio,
138 bool kevent_bind_via_return);
f1a1da6c
A
139
140static boolean_t workqueue_run_one(proc_t p, struct workqueue *wq, boolean_t overcommit, pthread_priority_t priority);
141
2546420a
A
142static void wq_runreq(proc_t p, thread_t th, struct workqueue *wq,
143 struct threadlist *tl, boolean_t return_directly, boolean_t deferred_kevent);
f1a1da6c 144
2546420a 145static void _setup_wqthread(proc_t p, thread_t th, struct workqueue *wq, struct threadlist *tl, bool first_use);
f1a1da6c 146
2546420a
A
147static void reset_priority(struct threadlist *tl, pthread_priority_t pri);
148static pthread_priority_t pthread_priority_from_wq_class_index(struct workqueue *wq, int index);
149
150static void wq_unpark_continue(void* ptr, wait_result_t wait_result) __dead2;
f1a1da6c 151
964d3577 152static boolean_t workqueue_addnewthread(struct workqueue *wq, boolean_t ignore_constrained_thread_limit);
2546420a
A
153
154static void workqueue_removethread(struct threadlist *tl, bool fromexit, bool first_use);
155static void workqueue_lock_spin(struct workqueue *);
156static void workqueue_unlock(struct workqueue *);
f1a1da6c 157
964d3577
A
158static boolean_t may_start_constrained_thread(struct workqueue *wq, uint32_t at_priclass, uint32_t my_priclass, boolean_t *start_timer);
159
2546420a 160static mach_vm_offset_t stack_addr_hint(proc_t p, vm_map_t vmap);
964d3577 161
f1a1da6c
A
162int proc_settargetconc(pid_t pid, int queuenum, int32_t targetconc);
163int proc_setalltargetconc(pid_t pid, int32_t * targetconcp);
164
165#define WQ_MAXPRI_MIN 0 /* low prio queue num */
166#define WQ_MAXPRI_MAX 2 /* max prio queuenum */
167#define WQ_PRI_NUM 3 /* number of prio work queues */
168
169#define C_32_STK_ALIGN 16
170#define C_64_STK_ALIGN 16
171#define C_64_REDZONE_LEN 128
964d3577
A
172
173#define PTHREAD_T_OFFSET 0
f1a1da6c
A
174
175/*
176 * Flags filed passed to bsdthread_create and back in pthread_start
17731 <---------------------------------> 0
178_________________________________________
179| flags(8) | policy(8) | importance(16) |
180-----------------------------------------
181*/
182
2546420a
A
183#define PTHREAD_START_CUSTOM 0x01000000
184#define PTHREAD_START_SETSCHED 0x02000000
185#define PTHREAD_START_DETACHED 0x04000000
186#define PTHREAD_START_QOSCLASS 0x08000000
187#define PTHREAD_START_TSD_BASE_SET 0x10000000
188#define PTHREAD_START_QOSCLASS_MASK 0x00ffffff
f1a1da6c
A
189#define PTHREAD_START_POLICY_BITSHIFT 16
190#define PTHREAD_START_POLICY_MASK 0xff
191#define PTHREAD_START_IMPORTANCE_MASK 0xffff
192
193#define SCHED_OTHER POLICY_TIMESHARE
194#define SCHED_FIFO POLICY_FIFO
195#define SCHED_RR POLICY_RR
196
964d3577
A
197#define BASEPRI_DEFAULT 31
198
2546420a
A
199#pragma mark sysctls
200
201uint32_t wq_yielded_threshold = WQ_YIELDED_THRESHOLD;
202uint32_t wq_yielded_window_usecs = WQ_YIELDED_WINDOW_USECS;
203uint32_t wq_stalled_window_usecs = WQ_STALLED_WINDOW_USECS;
204uint32_t wq_reduce_pool_window_usecs = WQ_REDUCE_POOL_WINDOW_USECS;
205uint32_t wq_max_timer_interval_usecs = WQ_MAX_TIMER_INTERVAL_USECS;
206uint32_t wq_max_threads = WORKQUEUE_MAXTHREADS;
207uint32_t wq_max_constrained_threads = WORKQUEUE_MAXTHREADS / 8;
208uint32_t wq_max_concurrency = 1; // set to ncpus on load
209
210SYSCTL_INT(_kern, OID_AUTO, wq_yielded_threshold, CTLFLAG_RW | CTLFLAG_LOCKED,
211 &wq_yielded_threshold, 0, "");
212
213SYSCTL_INT(_kern, OID_AUTO, wq_yielded_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
214 &wq_yielded_window_usecs, 0, "");
215
216SYSCTL_INT(_kern, OID_AUTO, wq_stalled_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
217 &wq_stalled_window_usecs, 0, "");
218
219SYSCTL_INT(_kern, OID_AUTO, wq_reduce_pool_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
220 &wq_reduce_pool_window_usecs, 0, "");
221
222SYSCTL_INT(_kern, OID_AUTO, wq_max_timer_interval_usecs, CTLFLAG_RW | CTLFLAG_LOCKED,
223 &wq_max_timer_interval_usecs, 0, "");
224
225SYSCTL_INT(_kern, OID_AUTO, wq_max_threads, CTLFLAG_RW | CTLFLAG_LOCKED,
226 &wq_max_threads, 0, "");
227
228SYSCTL_INT(_kern, OID_AUTO, wq_max_constrained_threads, CTLFLAG_RW | CTLFLAG_LOCKED,
229 &wq_max_constrained_threads, 0, "");
230
231#ifdef DEBUG
232SYSCTL_INT(_kern, OID_AUTO, wq_max_concurrency, CTLFLAG_RW | CTLFLAG_LOCKED,
233 &wq_max_concurrency, 0, "");
234
235static int wq_kevent_test SYSCTL_HANDLER_ARGS;
236SYSCTL_PROC(_debug, OID_AUTO, wq_kevent_test, CTLFLAG_MASKED | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY | CTLTYPE_OPAQUE, NULL, 0, wq_kevent_test, 0, "-");
237#endif
238
239static uint32_t wq_init_constrained_limit = 1;
240
241uint32_t pthread_debug_tracing = 1;
242
243SYSCTL_INT(_kern, OID_AUTO, pthread_debug_tracing, CTLFLAG_RW | CTLFLAG_LOCKED,
244 &pthread_debug_tracing, 0, "")
245
246
964d3577
A
247#pragma mark - Process/Thread Setup/Teardown syscalls
248
2546420a
A
249static mach_vm_offset_t
250stack_addr_hint(proc_t p, vm_map_t vmap)
251{
964d3577 252 mach_vm_offset_t stackaddr;
2546420a
A
253 mach_vm_offset_t aslr_offset;
254 bool proc64bit = proc_is64bit(p);
255
256 // We can't safely take random values % something unless its a power-of-two
257 _Static_assert(powerof2(PTH_DEFAULT_STACKSIZE), "PTH_DEFAULT_STACKSIZE is a power-of-two");
258
964d3577 259#if defined(__i386__) || defined(__x86_64__)
2546420a
A
260 if (proc64bit) {
261 // Matches vm_map_get_max_aslr_slide_pages's image shift in xnu
262 aslr_offset = random() % (1 << 28); // about 512 stacks
263 } else {
264 // Actually bigger than the image shift, we've got ~256MB to work with
265 aslr_offset = random() % (16 * PTH_DEFAULT_STACKSIZE);
266 }
267 aslr_offset = vm_map_trunc_page_mask(aslr_offset, vm_map_page_mask(vmap));
268 if (proc64bit) {
964d3577 269 // Above nanomalloc range (see NANOZONE_SIGNATURE)
2546420a 270 stackaddr = 0x700000000000 + aslr_offset;
964d3577 271 } else {
2546420a 272 stackaddr = SHARED_REGION_BASE_I386 + SHARED_REGION_SIZE_I386 + aslr_offset;
964d3577
A
273 }
274#elif defined(__arm__) || defined(__arm64__)
2546420a
A
275 // vm_map_get_max_aslr_slide_pages ensures 1MB of slide, we do better
276 aslr_offset = random() % ((proc64bit ? 4 : 2) * PTH_DEFAULT_STACKSIZE);
277 aslr_offset = vm_map_trunc_page_mask((vm_map_offset_t)aslr_offset, vm_map_page_mask(vmap));
278 if (proc64bit) {
964d3577 279 // 64 stacks below nanomalloc (see NANOZONE_SIGNATURE)
2546420a 280 stackaddr = 0x170000000 - 64 * PTH_DEFAULT_STACKSIZE - aslr_offset;
964d3577 281 } else {
2546420a
A
282 // If you try to slide down from this point, you risk ending up in memory consumed by malloc
283 stackaddr = SHARED_REGION_BASE_ARM - 32 * PTH_DEFAULT_STACKSIZE + aslr_offset;
964d3577
A
284 }
285#else
286#error Need to define a stack address hint for this architecture
287#endif
288 return stackaddr;
289}
290
291/**
292 * bsdthread_create system call. Used by pthread_create.
293 */
f1a1da6c
A
294int
295_bsdthread_create(struct proc *p, user_addr_t user_func, user_addr_t user_funcarg, user_addr_t user_stack, user_addr_t user_pthread, uint32_t flags, user_addr_t *retval)
296{
297 kern_return_t kret;
298 void * sright;
299 int error = 0;
300 int allocated = 0;
301 mach_vm_offset_t stackaddr;
302 mach_vm_size_t th_allocsize = 0;
f1a1da6c 303 mach_vm_size_t th_guardsize;
f1a1da6c
A
304 mach_vm_offset_t th_stack;
305 mach_vm_offset_t th_pthread;
2546420a 306 mach_vm_offset_t th_tsd_base;
f1a1da6c
A
307 mach_port_name_t th_thport;
308 thread_t th;
309 vm_map_t vmap = pthread_kern->current_map();
310 task_t ctask = current_task();
311 unsigned int policy, importance;
2546420a
A
312 uint32_t tsd_offset;
313
f1a1da6c
A
314 int isLP64 = 0;
315
316 if (pthread_kern->proc_get_register(p) == 0) {
317 return EINVAL;
318 }
319
320 PTHREAD_TRACE(TRACE_pthread_thread_create | DBG_FUNC_START, flags, 0, 0, 0, 0);
321
322 isLP64 = proc_is64bit(p);
323 th_guardsize = vm_map_page_size(vmap);
324
2546420a 325 stackaddr = pthread_kern->proc_get_stack_addr_hint(p);
f1a1da6c
A
326 kret = pthread_kern->thread_create(ctask, &th);
327 if (kret != KERN_SUCCESS)
328 return(ENOMEM);
329 thread_reference(th);
330
2546420a
A
331 pthread_kern->thread_set_tag(th, THREAD_TAG_PTHREAD);
332
f1a1da6c
A
333 sright = (void *)pthread_kern->convert_thread_to_port(th);
334 th_thport = pthread_kern->ipc_port_copyout_send(sright, pthread_kern->task_get_ipcspace(ctask));
335
964d3577
A
336 if ((flags & PTHREAD_START_CUSTOM) == 0) {
337 mach_vm_size_t pthread_size =
338 vm_map_round_page_mask(pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET, vm_map_page_mask(vmap));
339 th_allocsize = th_guardsize + user_stack + pthread_size;
340 user_stack += PTHREAD_T_OFFSET;
341
2546420a
A
342 kret = mach_vm_map(vmap, &stackaddr,
343 th_allocsize,
964d3577
A
344 page_size-1,
345 VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE , NULL,
346 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
347 VM_INHERIT_DEFAULT);
348 if (kret != KERN_SUCCESS){
2546420a 349 kret = mach_vm_allocate(vmap,
964d3577
A
350 &stackaddr, th_allocsize,
351 VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE);
352 }
353 if (kret != KERN_SUCCESS) {
f1a1da6c
A
354 error = ENOMEM;
355 goto out;
964d3577 356 }
f1a1da6c
A
357
358 PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_NONE, th_allocsize, stackaddr, 0, 2, 0);
359
f1a1da6c 360 allocated = 1;
964d3577 361 /*
f1a1da6c 362 * The guard page is at the lowest address
964d3577 363 * The stack base is the highest address
f1a1da6c
A
364 */
365 kret = mach_vm_protect(vmap, stackaddr, th_guardsize, FALSE, VM_PROT_NONE);
366
964d3577 367 if (kret != KERN_SUCCESS) {
f1a1da6c
A
368 error = ENOMEM;
369 goto out1;
964d3577
A
370 }
371
372 th_pthread = stackaddr + th_guardsize + user_stack;
373 th_stack = th_pthread;
f1a1da6c 374
964d3577 375 /*
f1a1da6c
A
376 * Pre-fault the first page of the new thread's stack and the page that will
377 * contain the pthread_t structure.
378 */
964d3577
A
379 if (vm_map_trunc_page_mask((vm_map_offset_t)(th_stack - C_64_REDZONE_LEN), vm_map_page_mask(vmap)) !=
380 vm_map_trunc_page_mask((vm_map_offset_t)th_pthread, vm_map_page_mask(vmap))){
381 vm_fault( vmap,
382 vm_map_trunc_page_mask((vm_map_offset_t)(th_stack - C_64_REDZONE_LEN), vm_map_page_mask(vmap)),
383 VM_PROT_READ | VM_PROT_WRITE,
384 FALSE,
385 THREAD_UNINT, NULL, 0);
386 }
f1a1da6c
A
387
388 vm_fault( vmap,
964d3577
A
389 vm_map_trunc_page_mask((vm_map_offset_t)th_pthread, vm_map_page_mask(vmap)),
390 VM_PROT_READ | VM_PROT_WRITE,
391 FALSE,
392 THREAD_UNINT, NULL, 0);
393
f1a1da6c
A
394 } else {
395 th_stack = user_stack;
f1a1da6c
A
396 th_pthread = user_pthread;
397
398 PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_NONE, 0, 0, 0, 3, 0);
399 }
2546420a
A
400
401 tsd_offset = pthread_kern->proc_get_pthread_tsd_offset(p);
402 if (tsd_offset) {
403 th_tsd_base = th_pthread + tsd_offset;
404 kret = pthread_kern->thread_set_tsd_base(th, th_tsd_base);
405 if (kret == KERN_SUCCESS) {
406 flags |= PTHREAD_START_TSD_BASE_SET;
407 }
408 }
409
f1a1da6c
A
410#if defined(__i386__) || defined(__x86_64__)
411 /*
412 * Set up i386 registers & function call.
413 */
414 if (isLP64 == 0) {
964d3577
A
415 x86_thread_state32_t state = {
416 .eip = (unsigned int)pthread_kern->proc_get_threadstart(p),
417 .eax = (unsigned int)th_pthread,
418 .ebx = (unsigned int)th_thport,
419 .ecx = (unsigned int)user_func,
420 .edx = (unsigned int)user_funcarg,
421 .edi = (unsigned int)user_stack,
422 .esi = (unsigned int)flags,
423 /*
424 * set stack pointer
425 */
426 .esp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN))
427 };
f1a1da6c 428
964d3577 429 error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
f1a1da6c
A
430 if (error != KERN_SUCCESS) {
431 error = EINVAL;
432 goto out;
433 }
434 } else {
964d3577
A
435 x86_thread_state64_t state64 = {
436 .rip = (uint64_t)pthread_kern->proc_get_threadstart(p),
437 .rdi = (uint64_t)th_pthread,
438 .rsi = (uint64_t)(th_thport),
439 .rdx = (uint64_t)user_func,
440 .rcx = (uint64_t)user_funcarg,
441 .r8 = (uint64_t)user_stack,
442 .r9 = (uint64_t)flags,
443 /*
444 * set stack pointer aligned to 16 byte boundary
445 */
446 .rsp = (uint64_t)(th_stack - C_64_REDZONE_LEN)
447 };
f1a1da6c 448
964d3577 449 error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state64);
f1a1da6c
A
450 if (error != KERN_SUCCESS) {
451 error = EINVAL;
452 goto out;
453 }
454
455 }
456#elif defined(__arm__)
964d3577
A
457 arm_thread_state_t state = {
458 .pc = (int)pthread_kern->proc_get_threadstart(p),
459 .r[0] = (unsigned int)th_pthread,
460 .r[1] = (unsigned int)th_thport,
461 .r[2] = (unsigned int)user_func,
462 .r[3] = (unsigned int)user_funcarg,
463 .r[4] = (unsigned int)user_stack,
464 .r[5] = (unsigned int)flags,
465
466 /* Set r7 & lr to 0 for better back tracing */
467 .r[7] = 0,
468 .lr = 0,
469
470 /*
471 * set stack pointer
472 */
473 .sp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN))
474 };
f1a1da6c 475
964d3577 476 (void) pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
f1a1da6c
A
477
478#else
479#error bsdthread_create not defined for this architecture
480#endif
481
482 if ((flags & PTHREAD_START_SETSCHED) != 0) {
483 /* Set scheduling parameters if needed */
484 thread_extended_policy_data_t extinfo;
485 thread_precedence_policy_data_t precedinfo;
486
487 importance = (flags & PTHREAD_START_IMPORTANCE_MASK);
488 policy = (flags >> PTHREAD_START_POLICY_BITSHIFT) & PTHREAD_START_POLICY_MASK;
489
490 if (policy == SCHED_OTHER) {
491 extinfo.timeshare = 1;
492 } else {
493 extinfo.timeshare = 0;
494 }
495
496 thread_policy_set(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extinfo, THREAD_EXTENDED_POLICY_COUNT);
497
f1a1da6c
A
498 precedinfo.importance = (importance - BASEPRI_DEFAULT);
499 thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT);
500 } else if ((flags & PTHREAD_START_QOSCLASS) != 0) {
501 /* Set thread QoS class if requested. */
502 pthread_priority_t priority = (pthread_priority_t)(flags & PTHREAD_START_QOSCLASS_MASK);
503
504 thread_qos_policy_data_t qos;
2546420a 505 qos.qos_tier = pthread_priority_get_thread_qos(priority);
f1a1da6c
A
506 qos.tier_importance = (qos.qos_tier == QOS_CLASS_UNSPECIFIED) ? 0 :
507 _pthread_priority_get_relpri(priority);
508
509 pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
510 }
511
512 kret = pthread_kern->thread_resume(th);
513 if (kret != KERN_SUCCESS) {
514 error = EINVAL;
515 goto out1;
516 }
517 thread_deallocate(th); /* drop the creator reference */
518
519 PTHREAD_TRACE(TRACE_pthread_thread_create|DBG_FUNC_END, error, th_pthread, 0, 0, 0);
520
964d3577
A
521 // cast required as mach_vm_offset_t is always 64 bits even on 32-bit platforms
522 *retval = (user_addr_t)th_pthread;
f1a1da6c
A
523
524 return(0);
525
526out1:
527 if (allocated != 0) {
964d3577 528 (void)mach_vm_deallocate(vmap, stackaddr, th_allocsize);
f1a1da6c
A
529 }
530out:
531 (void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(ctask), th_thport);
532 (void)thread_terminate(th);
533 (void)thread_deallocate(th);
534 return(error);
535}
536
964d3577
A
537/**
538 * bsdthread_terminate system call. Used by pthread_terminate
539 */
f1a1da6c
A
540int
541_bsdthread_terminate(__unused struct proc *p,
542 user_addr_t stackaddr,
543 size_t size,
544 uint32_t kthport,
545 uint32_t sem,
546 __unused int32_t *retval)
547{
548 mach_vm_offset_t freeaddr;
549 mach_vm_size_t freesize;
550 kern_return_t kret;
2546420a 551 thread_t th = current_thread();
f1a1da6c
A
552
553 freeaddr = (mach_vm_offset_t)stackaddr;
554 freesize = size;
555
556 PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_START, freeaddr, freesize, kthport, 0xff, 0);
557
558 if ((freesize != (mach_vm_size_t)0) && (freeaddr != (mach_vm_offset_t)0)) {
2546420a
A
559 if (pthread_kern->thread_get_tag(th) & THREAD_TAG_MAINTHREAD){
560 vm_map_t user_map = pthread_kern->current_map();
561 freesize = vm_map_trunc_page_mask((vm_map_offset_t)freesize - 1, vm_map_page_mask(user_map));
562 kret = mach_vm_behavior_set(user_map, freeaddr, freesize, VM_BEHAVIOR_REUSABLE);
563 assert(kret == KERN_SUCCESS || kret == KERN_INVALID_ADDRESS);
564 kret = kret ? kret : mach_vm_protect(user_map, freeaddr, freesize, FALSE, VM_PROT_NONE);
565 assert(kret == KERN_SUCCESS || kret == KERN_INVALID_ADDRESS);
566 } else {
567 kret = mach_vm_deallocate(pthread_kern->current_map(), freeaddr, freesize);
568 if (kret != KERN_SUCCESS) {
569 PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0, 0);
570 return(EINVAL);
571 }
f1a1da6c
A
572 }
573 }
574
2546420a 575 (void) thread_terminate(th);
f1a1da6c
A
576 if (sem != MACH_PORT_NULL) {
577 kret = pthread_kern->semaphore_signal_internal_trap(sem);
578 if (kret != KERN_SUCCESS) {
579 PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, kret, 0, 0, 0, 0);
580 return(EINVAL);
581 }
582 }
583
584 if (kthport != MACH_PORT_NULL) {
585 pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(current_task()), kthport);
586 }
587
588 PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, 0, 0, 0, 0, 0);
589
590 pthread_kern->thread_exception_return();
591 panic("bsdthread_terminate: still running\n");
592
593 PTHREAD_TRACE(TRACE_pthread_thread_terminate|DBG_FUNC_END, 0, 0xff, 0, 0, 0);
594
595 return(0);
596}
597
964d3577
A
598/**
599 * bsdthread_register system call. Performs per-process setup. Responsible for
600 * returning capabilitiy bits to userspace and receiving userspace function addresses.
601 */
f1a1da6c
A
602int
603_bsdthread_register(struct proc *p,
604 user_addr_t threadstart,
605 user_addr_t wqthread,
606 int pthsize,
607 user_addr_t pthread_init_data,
2546420a 608 user_addr_t pthread_init_data_size,
f1a1da6c
A
609 uint64_t dispatchqueue_offset,
610 int32_t *retval)
611{
2546420a
A
612 /* We have to do this first so that it resets after fork */
613 pthread_kern->proc_set_stack_addr_hint(p, (user_addr_t)stack_addr_hint(p, pthread_kern->current_map()));
614
f1a1da6c
A
615 /* prevent multiple registrations */
616 if (pthread_kern->proc_get_register(p) != 0) {
617 return(EINVAL);
618 }
619 /* syscall randomizer test can pass bogus values */
620 if (pthsize < 0 || pthsize > MAX_PTHREAD_SIZE) {
621 return(EINVAL);
622 }
623 pthread_kern->proc_set_threadstart(p, threadstart);
624 pthread_kern->proc_set_wqthread(p, wqthread);
625 pthread_kern->proc_set_pthsize(p, pthsize);
626 pthread_kern->proc_set_register(p);
627
628 /* if we have pthread_init_data, then we use that and target_concptr (which is an offset) get data. */
629 if (pthread_init_data != 0) {
630 thread_qos_policy_data_t qos;
631
2546420a
A
632 struct _pthread_registration_data data = {};
633 size_t pthread_init_sz = MIN(sizeof(struct _pthread_registration_data), (size_t)pthread_init_data_size);
f1a1da6c
A
634
635 kern_return_t kr = copyin(pthread_init_data, &data, pthread_init_sz);
636 if (kr != KERN_SUCCESS) {
637 return EINVAL;
638 }
639
640 /* Incoming data from the data structure */
641 pthread_kern->proc_set_dispatchqueue_offset(p, data.dispatch_queue_offset);
2546420a
A
642 if (data.version > offsetof(struct _pthread_registration_data, tsd_offset)
643 && data.tsd_offset < (uint32_t)pthsize) {
644 pthread_kern->proc_set_pthread_tsd_offset(p, data.tsd_offset);
645 }
f1a1da6c
A
646
647 /* Outgoing data that userspace expects as a reply */
2546420a 648 data.version = sizeof(struct _pthread_registration_data);
f1a1da6c
A
649 if (pthread_kern->qos_main_thread_active()) {
650 mach_msg_type_number_t nqos = THREAD_QOS_POLICY_COUNT;
651 boolean_t gd = FALSE;
652
653 kr = pthread_kern->thread_policy_get(current_thread(), THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd);
654 if (kr != KERN_SUCCESS || qos.qos_tier == THREAD_QOS_UNSPECIFIED) {
655 /* Unspecified threads means the kernel wants us to impose legacy upon the thread. */
656 qos.qos_tier = THREAD_QOS_LEGACY;
657 qos.tier_importance = 0;
658
659 kr = pthread_kern->thread_policy_set_internal(current_thread(), THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
660 }
661
662 if (kr == KERN_SUCCESS) {
2546420a 663 data.main_qos = thread_qos_get_pthread_priority(qos.qos_tier);
f1a1da6c
A
664 } else {
665 data.main_qos = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
666 }
667 } else {
668 data.main_qos = _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED, 0, 0);
669 }
670
671 kr = copyout(&data, pthread_init_data, pthread_init_sz);
672 if (kr != KERN_SUCCESS) {
673 return EINVAL;
674 }
675 } else {
676 pthread_kern->proc_set_dispatchqueue_offset(p, dispatchqueue_offset);
f1a1da6c
A
677 }
678
679 /* return the supported feature set as the return value. */
680 *retval = PTHREAD_FEATURE_SUPPORTED;
681
682 return(0);
683}
684
964d3577
A
685#pragma mark - QoS Manipulation
686
f1a1da6c
A
687int
688_bsdthread_ctl_set_qos(struct proc *p, user_addr_t __unused cmd, mach_port_name_t kport, user_addr_t tsd_priority_addr, user_addr_t arg3, int *retval)
689{
690 kern_return_t kr;
691 thread_t th;
692
693 pthread_priority_t priority;
694
695 /* Unused parameters must be zero. */
696 if (arg3 != 0) {
697 return EINVAL;
698 }
699
700 /* QoS is stored in a given slot in the pthread TSD. We need to copy that in and set our QoS based on it. */
701 if (proc_is64bit(p)) {
702 uint64_t v;
703 kr = copyin(tsd_priority_addr, &v, sizeof(v));
704 if (kr != KERN_SUCCESS) {
705 return kr;
706 }
707 priority = (int)(v & 0xffffffff);
708 } else {
709 uint32_t v;
710 kr = copyin(tsd_priority_addr, &v, sizeof(v));
711 if (kr != KERN_SUCCESS) {
712 return kr;
713 }
714 priority = v;
715 }
716
717 if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
718 return ESRCH;
719 }
720
721 /* <rdar://problem/16211829> Disable pthread_set_qos_class_np() on threads other than pthread_self */
722 if (th != current_thread()) {
723 thread_deallocate(th);
724 return EPERM;
725 }
726
727 int rv = _bsdthread_ctl_set_self(p, 0, priority, 0, _PTHREAD_SET_SELF_QOS_FLAG, retval);
728
729 /* Static param the thread, we just set QoS on it, so its stuck in QoS land now. */
730 /* pthread_kern->thread_static_param(th, TRUE); */ // see <rdar://problem/16433744>, for details
731
732 thread_deallocate(th);
733
734 return rv;
735}
736
737static inline struct threadlist *
738util_get_thread_threadlist_entry(thread_t th)
739{
740 struct uthread *uth = pthread_kern->get_bsdthread_info(th);
741 if (uth) {
742 struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
743 return tl;
744 }
745 return NULL;
746}
747
f1a1da6c
A
748int
749_bsdthread_ctl_set_self(struct proc *p, user_addr_t __unused cmd, pthread_priority_t priority, mach_port_name_t voucher, _pthread_set_flags_t flags, int __unused *retval)
750{
751 thread_qos_policy_data_t qos;
752 mach_msg_type_number_t nqos = THREAD_QOS_POLICY_COUNT;
753 boolean_t gd = FALSE;
2546420a
A
754 bool was_manager_thread = false;
755 thread_t th = current_thread();
756 struct workqueue *wq = NULL;
757 struct threadlist *tl = NULL;
f1a1da6c
A
758
759 kern_return_t kr;
760 int qos_rv = 0, voucher_rv = 0, fixedpri_rv = 0;
761
2546420a
A
762 if ((flags & _PTHREAD_SET_SELF_WQ_KEVENT_UNBIND) != 0) {
763 tl = util_get_thread_threadlist_entry(th);
764 if (tl) {
765 wq = tl->th_workq;
766 } else {
767 goto qos;
768 }
769
770 workqueue_lock_spin(wq);
771 if (tl->th_flags & TH_LIST_KEVENT_BOUND) {
772 tl->th_flags &= ~TH_LIST_KEVENT_BOUND;
773 unsigned int kevent_flags = KEVENT_FLAG_WORKQ;
774 if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) {
775 kevent_flags |= KEVENT_FLAG_WORKQ_MANAGER;
776 }
777
778 workqueue_unlock(wq);
779 kevent_qos_internal_unbind(p, class_index_get_thread_qos(tl->th_priority), th, kevent_flags);
780 } else {
781 workqueue_unlock(wq);
782 }
783 }
784
785qos:
f1a1da6c 786 if ((flags & _PTHREAD_SET_SELF_QOS_FLAG) != 0) {
2546420a 787 kr = pthread_kern->thread_policy_get(th, THREAD_QOS_POLICY, (thread_policy_t)&qos, &nqos, &gd);
f1a1da6c
A
788 if (kr != KERN_SUCCESS) {
789 qos_rv = EINVAL;
790 goto voucher;
791 }
792
793 /* If we have main-thread QoS then we don't allow a thread to come out of QOS_CLASS_UNSPECIFIED. */
794 if (pthread_kern->qos_main_thread_active() && qos.qos_tier == THREAD_QOS_UNSPECIFIED) {
795 qos_rv = EPERM;
796 goto voucher;
797 }
798
799 /* Get the work queue for tracing, also the threadlist for bucket manipluation. */
2546420a
A
800 if (!tl) {
801 tl = util_get_thread_threadlist_entry(th);
802 if (tl) wq = tl->th_workq;
f1a1da6c
A
803 }
804
2546420a 805 PTHREAD_TRACE_WQ(TRACE_pthread_set_qos_self | DBG_FUNC_START, wq, qos.qos_tier, qos.tier_importance, 0, 0);
f1a1da6c 806
2546420a 807 qos.qos_tier = pthread_priority_get_thread_qos(priority);
f1a1da6c
A
808 qos.tier_importance = (qos.qos_tier == QOS_CLASS_UNSPECIFIED) ? 0 : _pthread_priority_get_relpri(priority);
809
2546420a 810 if (qos.qos_tier == QOS_CLASS_UNSPECIFIED) {
f1a1da6c
A
811 qos_rv = EINVAL;
812 goto voucher;
813 }
814
815 /* If we're a workqueue, the threadlist item priority needs adjusting, along with the bucket we were running in. */
816 if (tl) {
2546420a
A
817 workqueue_lock_spin(wq);
818 bool now_under_constrained_limit = false;
819
820 assert(!(tl->th_flags & TH_LIST_KEVENT_BOUND));
821
822 kr = pthread_kern->thread_set_workq_qos(th, qos.qos_tier, qos.tier_importance);
823 assert(kr == KERN_SUCCESS || kr == KERN_TERMINATED);
f1a1da6c
A
824
825 /* Fix up counters. */
826 uint8_t old_bucket = tl->th_priority;
827 uint8_t new_bucket = pthread_priority_get_class_index(priority);
2546420a
A
828 if (old_bucket == WORKQUEUE_EVENT_MANAGER_BUCKET) {
829 was_manager_thread = true;
830 }
f1a1da6c
A
831
832 uint32_t old_active = OSAddAtomic(-1, &wq->wq_thactive_count[old_bucket]);
833 OSAddAtomic(1, &wq->wq_thactive_count[new_bucket]);
834
835 wq->wq_thscheduled_count[old_bucket]--;
836 wq->wq_thscheduled_count[new_bucket]++;
837
2546420a
A
838 bool old_overcommit = !(tl->th_flags & TH_LIST_CONSTRAINED);
839 bool new_overcommit = priority & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG;
840 if (!old_overcommit && new_overcommit) {
841 wq->wq_constrained_threads_scheduled--;
842 tl->th_flags &= ~TH_LIST_CONSTRAINED;
843 if (wq->wq_constrained_threads_scheduled == wq_max_constrained_threads - 1) {
844 now_under_constrained_limit = true;
845 }
846 } else if (old_overcommit && !new_overcommit) {
847 wq->wq_constrained_threads_scheduled++;
848 tl->th_flags |= TH_LIST_CONSTRAINED;
849 }
850
f1a1da6c
A
851 tl->th_priority = new_bucket;
852
2546420a
A
853 /* If we were at the ceiling of threads for a given bucket, we have
854 * to reevaluate whether we should start more work.
f1a1da6c 855 */
2546420a 856 if (old_active == wq->wq_reqconc[old_bucket] || now_under_constrained_limit) {
f1a1da6c 857 /* workqueue_run_nextreq will drop the workqueue lock in all exit paths. */
2546420a 858 (void)workqueue_run_nextreq(p, wq, THREAD_NULL, RUN_NEXTREQ_DEFAULT, 0, false);
f1a1da6c 859 } else {
2546420a
A
860 workqueue_unlock(wq);
861 }
862 } else {
863 kr = pthread_kern->thread_policy_set_internal(th, THREAD_QOS_POLICY, (thread_policy_t)&qos, THREAD_QOS_POLICY_COUNT);
864 if (kr != KERN_SUCCESS) {
865 qos_rv = EINVAL;
f1a1da6c
A
866 }
867 }
868
2546420a 869 PTHREAD_TRACE_WQ(TRACE_pthread_set_qos_self | DBG_FUNC_END, wq, qos.qos_tier, qos.tier_importance, 0, 0);
f1a1da6c
A
870 }
871
872voucher:
873 if ((flags & _PTHREAD_SET_SELF_VOUCHER_FLAG) != 0) {
874 kr = pthread_kern->thread_set_voucher_name(voucher);
875 if (kr != KERN_SUCCESS) {
876 voucher_rv = ENOENT;
877 goto fixedpri;
878 }
879 }
880
881fixedpri:
2546420a 882 if (qos_rv) goto done;
f1a1da6c 883 if ((flags & _PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG) != 0) {
964d3577 884 thread_extended_policy_data_t extpol = {.timeshare = 0};
2546420a
A
885
886 if (!tl) tl = util_get_thread_threadlist_entry(th);
964d3577
A
887 if (tl) {
888 /* Not allowed on workqueue threads */
889 fixedpri_rv = ENOTSUP;
890 goto done;
891 }
892
2546420a 893 kr = pthread_kern->thread_policy_set_internal(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extpol, THREAD_EXTENDED_POLICY_COUNT);
964d3577
A
894 if (kr != KERN_SUCCESS) {
895 fixedpri_rv = EINVAL;
896 goto done;
897 }
898 } else if ((flags & _PTHREAD_SET_SELF_TIMESHARE_FLAG) != 0) {
899 thread_extended_policy_data_t extpol = {.timeshare = 1};
2546420a
A
900
901 if (!tl) tl = util_get_thread_threadlist_entry(th);
f1a1da6c 902 if (tl) {
964d3577 903 /* Not allowed on workqueue threads */
f1a1da6c
A
904 fixedpri_rv = ENOTSUP;
905 goto done;
906 }
907
2546420a 908 kr = pthread_kern->thread_policy_set_internal(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extpol, THREAD_EXTENDED_POLICY_COUNT);
f1a1da6c
A
909 if (kr != KERN_SUCCESS) {
910 fixedpri_rv = EINVAL;
911 goto done;
912 }
913 }
2546420a 914
f1a1da6c
A
915done:
916 if (qos_rv && voucher_rv) {
917 /* Both failed, give that a unique error. */
918 return EBADMSG;
919 }
920
921 if (qos_rv) {
922 return qos_rv;
923 }
924
925 if (voucher_rv) {
926 return voucher_rv;
927 }
928
929 if (fixedpri_rv) {
930 return fixedpri_rv;
931 }
932
933 return 0;
934}
935
936int
215aeb03 937_bsdthread_ctl_qos_override_start(struct proc __unused *p, user_addr_t __unused cmd, mach_port_name_t kport, pthread_priority_t priority, user_addr_t resource, int __unused *retval)
f1a1da6c
A
938{
939 thread_t th;
940 int rv = 0;
941
f1a1da6c
A
942 if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
943 return ESRCH;
944 }
945
2546420a 946 int override_qos = pthread_priority_get_thread_qos(priority);
f1a1da6c
A
947
948 struct threadlist *tl = util_get_thread_threadlist_entry(th);
949 if (tl) {
2546420a 950 PTHREAD_TRACE_WQ(TRACE_wq_override_start | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), 1, priority, 0);
f1a1da6c
A
951 }
952
953 /* The only failure case here is if we pass a tid and have it lookup the thread, we pass the uthread, so this all always succeeds. */
2546420a
A
954 pthread_kern->proc_usynch_thread_qos_add_override_for_resource_check_owner(th, override_qos, TRUE,
955 resource, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE, USER_ADDR_NULL, MACH_PORT_NULL);
f1a1da6c
A
956 thread_deallocate(th);
957 return rv;
958}
959
960int
215aeb03 961_bsdthread_ctl_qos_override_end(struct proc __unused *p, user_addr_t __unused cmd, mach_port_name_t kport, user_addr_t resource, user_addr_t arg3, int __unused *retval)
f1a1da6c
A
962{
963 thread_t th;
964 int rv = 0;
965
215aeb03 966 if (arg3 != 0) {
f1a1da6c
A
967 return EINVAL;
968 }
969
970 if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
971 return ESRCH;
972 }
973
974 struct uthread *uth = pthread_kern->get_bsdthread_info(th);
975
976 struct threadlist *tl = util_get_thread_threadlist_entry(th);
977 if (tl) {
2546420a 978 PTHREAD_TRACE_WQ(TRACE_wq_override_end | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), 0, 0, 0);
f1a1da6c
A
979 }
980
215aeb03 981 pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), uth, 0, resource, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE);
f1a1da6c
A
982
983 thread_deallocate(th);
984 return rv;
985}
986
2546420a
A
987static int
988_bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(mach_port_name_t kport, pthread_priority_t priority, user_addr_t resource, user_addr_t ulock_addr)
215aeb03
A
989{
990 thread_t th;
991 int rv = 0;
992
f1a1da6c
A
993 if ((th = port_name_to_thread(kport)) == THREAD_NULL) {
994 return ESRCH;
995 }
996
2546420a 997 int override_qos = pthread_priority_get_thread_qos(priority);
f1a1da6c
A
998
999 struct threadlist *tl = util_get_thread_threadlist_entry(th);
1000 if (!tl) {
1001 thread_deallocate(th);
1002 return EPERM;
1003 }
1004
2546420a 1005 PTHREAD_TRACE_WQ(TRACE_wq_override_dispatch | DBG_FUNC_NONE, tl->th_workq, thread_tid(th), 1, priority, 0);
f1a1da6c 1006
2546420a
A
1007 rv = pthread_kern->proc_usynch_thread_qos_add_override_for_resource_check_owner(th, override_qos, TRUE,
1008 resource, THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE, ulock_addr, kport);
f1a1da6c
A
1009
1010 thread_deallocate(th);
1011 return rv;
1012}
1013
2546420a
A
1014int _bsdthread_ctl_qos_dispatch_asynchronous_override_add(struct proc __unused *p, user_addr_t __unused cmd,
1015 mach_port_name_t kport, pthread_priority_t priority, user_addr_t resource, int __unused *retval)
1016{
1017 return _bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(kport, priority, resource, USER_ADDR_NULL);
1018}
1019
1020int
1021_bsdthread_ctl_qos_override_dispatch(struct proc *p __unused, user_addr_t cmd __unused, mach_port_name_t kport, pthread_priority_t priority, user_addr_t ulock_addr, int __unused *retval)
1022{
1023 return _bsdthread_ctl_qos_dispatch_asynchronous_override_add_internal(kport, priority, USER_ADDR_NULL, ulock_addr);
1024}
1025
f1a1da6c 1026int
215aeb03
A
1027_bsdthread_ctl_qos_override_reset(struct proc *p, user_addr_t cmd, user_addr_t arg1, user_addr_t arg2, user_addr_t arg3, int *retval)
1028{
1029 if (arg1 != 0 || arg2 != 0 || arg3 != 0) {
1030 return EINVAL;
1031 }
1032
1033 return _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(p, cmd, 1 /* reset_all */, 0, 0, retval);
1034}
1035
1036int
1037_bsdthread_ctl_qos_dispatch_asynchronous_override_reset(struct proc __unused *p, user_addr_t __unused cmd, int reset_all, user_addr_t resource, user_addr_t arg3, int __unused *retval)
f1a1da6c 1038{
215aeb03 1039 if ((reset_all && (resource != 0)) || arg3 != 0) {
f1a1da6c
A
1040 return EINVAL;
1041 }
1042
2546420a
A
1043 thread_t th = current_thread();
1044 struct uthread *uth = pthread_kern->get_bsdthread_info(th);
1045 struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
f1a1da6c 1046
2546420a
A
1047 if (!tl) {
1048 return EPERM;
f1a1da6c
A
1049 }
1050
2546420a
A
1051 PTHREAD_TRACE_WQ(TRACE_wq_override_reset | DBG_FUNC_NONE, tl->th_workq, 0, 0, 0, 0);
1052
1053 resource = reset_all ? THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD : resource;
1054 pthread_kern->proc_usynch_thread_qos_reset_override_for_resource(current_task(), uth, 0, resource, THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE);
1055
1056 return 0;
f1a1da6c
A
1057}
1058
1059int
1060_bsdthread_ctl(struct proc *p, user_addr_t cmd, user_addr_t arg1, user_addr_t arg2, user_addr_t arg3, int *retval)
1061{
1062 switch (cmd) {
2546420a
A
1063 case BSDTHREAD_CTL_SET_QOS:
1064 return _bsdthread_ctl_set_qos(p, cmd, (mach_port_name_t)arg1, arg2, arg3, retval);
1065 case BSDTHREAD_CTL_QOS_OVERRIDE_START:
1066 return _bsdthread_ctl_qos_override_start(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval);
1067 case BSDTHREAD_CTL_QOS_OVERRIDE_END:
1068 return _bsdthread_ctl_qos_override_end(p, cmd, (mach_port_name_t)arg1, arg2, arg3, retval);
1069 case BSDTHREAD_CTL_QOS_OVERRIDE_RESET:
1070 return _bsdthread_ctl_qos_override_reset(p, cmd, arg1, arg2, arg3, retval);
1071 case BSDTHREAD_CTL_QOS_OVERRIDE_DISPATCH:
1072 return _bsdthread_ctl_qos_override_dispatch(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval);
1073 case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_ADD:
1074 return _bsdthread_ctl_qos_dispatch_asynchronous_override_add(p, cmd, (mach_port_name_t)arg1, (pthread_priority_t)arg2, arg3, retval);
1075 case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_RESET:
1076 return _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(p, cmd, (int)arg1, arg2, arg3, retval);
1077 case BSDTHREAD_CTL_SET_SELF:
1078 return _bsdthread_ctl_set_self(p, cmd, (pthread_priority_t)arg1, (mach_port_name_t)arg2, (_pthread_set_flags_t)arg3, retval);
1079 default:
1080 return EINVAL;
f1a1da6c
A
1081 }
1082}
1083
964d3577 1084#pragma mark - Workqueue Implementation
964d3577 1085#pragma mark workqueue lock
f1a1da6c 1086
2546420a
A
1087static boolean_t workqueue_lock_spin_is_acquired_kdp(struct workqueue *wq) {
1088 return kdp_lck_spin_is_acquired(&wq->wq_lock);
f1a1da6c
A
1089}
1090
f1a1da6c 1091static void
2546420a 1092workqueue_lock_spin(struct workqueue *wq)
f1a1da6c 1093{
2546420a
A
1094 boolean_t interrupt_state = ml_set_interrupts_enabled(FALSE);
1095 lck_spin_lock(&wq->wq_lock);
1096 wq->wq_interrupt_state = interrupt_state;
f1a1da6c
A
1097}
1098
1099static void
2546420a 1100workqueue_unlock(struct workqueue *wq)
f1a1da6c 1101{
2546420a
A
1102 boolean_t interrupt_state = wq->wq_interrupt_state;
1103 lck_spin_unlock(&wq->wq_lock);
1104 ml_set_interrupts_enabled(interrupt_state);
f1a1da6c
A
1105}
1106
964d3577 1107#pragma mark workqueue add timer
f1a1da6c 1108
964d3577
A
1109/**
1110 * Sets up the timer which will call out to workqueue_add_timer
1111 */
f1a1da6c
A
1112static void
1113workqueue_interval_timer_start(struct workqueue *wq)
1114{
1115 uint64_t deadline;
1116
964d3577
A
1117 /* n.b. wq_timer_interval is reset to 0 in workqueue_add_timer if the
1118 ATIMER_RUNNING flag is not present. The net effect here is that if a
1119 sequence of threads is required, we'll double the time before we give out
1120 the next one. */
f1a1da6c
A
1121 if (wq->wq_timer_interval == 0) {
1122 wq->wq_timer_interval = wq_stalled_window_usecs;
1123
1124 } else {
1125 wq->wq_timer_interval = wq->wq_timer_interval * 2;
1126
1127 if (wq->wq_timer_interval > wq_max_timer_interval_usecs) {
1128 wq->wq_timer_interval = wq_max_timer_interval_usecs;
1129 }
1130 }
1131 clock_interval_to_deadline(wq->wq_timer_interval, 1000, &deadline);
1132
2546420a
A
1133 PTHREAD_TRACE_WQ(TRACE_wq_start_add_timer, wq, wq->wq_reqcount, wq->wq_flags, wq->wq_timer_interval, 0);
1134
1135 boolean_t ret = thread_call_enter1_delayed(wq->wq_atimer_delayed_call, wq->wq_atimer_delayed_call, deadline);
1136 if (ret) {
1137 panic("delayed_call was already enqueued");
1138 }
1139}
1140
1141/**
1142 * Immediately trigger the workqueue_add_timer
1143 */
1144static void
1145workqueue_interval_timer_trigger(struct workqueue *wq)
1146{
1147 PTHREAD_TRACE_WQ(TRACE_wq_start_add_timer, wq, wq->wq_reqcount, wq->wq_flags, 0, 0);
f1a1da6c 1148
2546420a
A
1149 boolean_t ret = thread_call_enter1(wq->wq_atimer_immediate_call, wq->wq_atimer_immediate_call);
1150 if (ret) {
1151 panic("immediate_call was already enqueued");
1152 }
f1a1da6c
A
1153}
1154
964d3577
A
1155/**
1156 * returns whether lastblocked_tsp is within wq_stalled_window_usecs of cur_ts
1157 */
f1a1da6c
A
1158static boolean_t
1159wq_thread_is_busy(uint64_t cur_ts, uint64_t *lastblocked_tsp)
1160{
1161 clock_sec_t secs;
1162 clock_usec_t usecs;
1163 uint64_t lastblocked_ts;
1164 uint64_t elapsed;
1165
1166 /*
1167 * the timestamp is updated atomically w/o holding the workqueue lock
1168 * so we need to do an atomic read of the 64 bits so that we don't see
1169 * a mismatched pair of 32 bit reads... we accomplish this in an architecturally
1170 * independent fashion by using OSCompareAndSwap64 to write back the
1171 * value we grabbed... if it succeeds, then we have a good timestamp to
1172 * evaluate... if it fails, we straddled grabbing the timestamp while it
1173 * was being updated... treat a failed update as a busy thread since
1174 * it implies we are about to see a really fresh timestamp anyway
1175 */
1176 lastblocked_ts = *lastblocked_tsp;
1177
1178 if ( !OSCompareAndSwap64((UInt64)lastblocked_ts, (UInt64)lastblocked_ts, lastblocked_tsp))
1179 return (TRUE);
1180
1181 if (lastblocked_ts >= cur_ts) {
1182 /*
1183 * because the update of the timestamp when a thread blocks isn't
1184 * serialized against us looking at it (i.e. we don't hold the workq lock)
1185 * it's possible to have a timestamp that matches the current time or
1186 * that even looks to be in the future relative to when we grabbed the current
1187 * time... just treat this as a busy thread since it must have just blocked.
1188 */
1189 return (TRUE);
1190 }
1191 elapsed = cur_ts - lastblocked_ts;
1192
1193 pthread_kern->absolutetime_to_microtime(elapsed, &secs, &usecs);
1194
1195 if (secs == 0 && usecs < wq_stalled_window_usecs)
1196 return (TRUE);
1197 return (FALSE);
1198}
1199
2546420a
A
1200static inline bool
1201WQ_TIMER_DELAYED_NEEDED(struct workqueue *wq)
1202{
1203 int oldflags;
1204retry:
1205 oldflags = wq->wq_flags;
1206 if ( !(oldflags & (WQ_EXITING | WQ_ATIMER_DELAYED_RUNNING))) {
1207 if (OSCompareAndSwap(oldflags, oldflags | WQ_ATIMER_DELAYED_RUNNING, (UInt32 *)&wq->wq_flags)) {
1208 return true;
1209 } else {
1210 goto retry;
1211 }
1212 }
1213 return false;
1214}
1215
1216static inline bool
1217WQ_TIMER_IMMEDIATE_NEEDED(struct workqueue *wq)
1218{
1219 int oldflags;
1220retry:
1221 oldflags = wq->wq_flags;
1222 if ( !(oldflags & (WQ_EXITING | WQ_ATIMER_IMMEDIATE_RUNNING))) {
1223 if (OSCompareAndSwap(oldflags, oldflags | WQ_ATIMER_IMMEDIATE_RUNNING, (UInt32 *)&wq->wq_flags)) {
1224 return true;
1225 } else {
1226 goto retry;
1227 }
1228 }
1229 return false;
1230}
f1a1da6c 1231
964d3577
A
1232/**
1233 * handler function for the timer
1234 */
f1a1da6c 1235static void
2546420a 1236workqueue_add_timer(struct workqueue *wq, thread_call_t thread_call_self)
f1a1da6c
A
1237{
1238 proc_t p;
1239 boolean_t start_timer = FALSE;
1240 boolean_t retval;
964d3577 1241
2546420a 1242 PTHREAD_TRACE_WQ(TRACE_wq_add_timer | DBG_FUNC_START, wq, wq->wq_flags, wq->wq_nthreads, wq->wq_thidlecount, 0);
f1a1da6c
A
1243
1244 p = wq->wq_proc;
1245
2546420a 1246 workqueue_lock_spin(wq);
f1a1da6c
A
1247
1248 /*
2546420a 1249 * There's two tricky issues here.
f1a1da6c 1250 *
2546420a
A
1251 * First issue: we start the thread_call's that invoke this routine without
1252 * the workqueue lock held. The scheduler callback needs to trigger
1253 * reevaluation of the number of running threads but shouldn't take that
1254 * lock, so we can't use it to synchronize state around the thread_call.
1255 * As a result, it might re-enter the thread_call while this routine is
1256 * already running. This could cause it to fire a second time and we'll
1257 * have two add_timers running at once. Obviously, we don't want that to
1258 * keep stacking, so we need to keep it at two timers.
1259 *
1260 * Solution: use wq_flags (accessed via atomic CAS) to synchronize the
1261 * enqueue of the thread_call itself. When a thread needs to trigger the
1262 * add_timer, it checks for ATIMER_DELAYED_RUNNING and, when not set, sets
1263 * the flag then does a thread_call_enter. We'll then remove that flag
1264 * only once we've got the lock and it's safe for the thread_call to be
1265 * entered again.
1266 *
1267 * Second issue: we need to make sure that the two timers don't execute this
1268 * routine concurrently. We can't use the workqueue lock for this because
1269 * we'll need to drop it during our execution.
1270 *
1271 * Solution: use WQL_ATIMER_BUSY as a condition variable to indicate that
1272 * we are currently executing the routine and the next thread should wait.
1273 *
1274 * After all that, we arrive at the following four possible states:
1275 * !WQ_ATIMER_DELAYED_RUNNING && !WQL_ATIMER_BUSY no pending timer, no active timer
1276 * !WQ_ATIMER_DELAYED_RUNNING && WQL_ATIMER_BUSY no pending timer, 1 active timer
1277 * WQ_ATIMER_DELAYED_RUNNING && !WQL_ATIMER_BUSY 1 pending timer, no active timer
1278 * WQ_ATIMER_DELAYED_RUNNING && WQL_ATIMER_BUSY 1 pending timer, 1 active timer
1279 *
1280 * Further complication sometimes we need to trigger this function to run
1281 * without delay. Because we aren't under a lock between setting
1282 * WQ_ATIMER_DELAYED_RUNNING and calling thread_call_enter, we can't simply
1283 * re-enter the thread call: if thread_call_enter() returned false, we
1284 * wouldn't be able to distinguish the case where the thread_call had
1285 * already fired from the case where it hadn't been entered yet from the
1286 * other thread. So, we use a separate thread_call for immediate
1287 * invocations, and a separate RUNNING flag, WQ_ATIMER_IMMEDIATE_RUNNING.
f1a1da6c 1288 */
2546420a 1289
f1a1da6c
A
1290 while (wq->wq_lflags & WQL_ATIMER_BUSY) {
1291 wq->wq_lflags |= WQL_ATIMER_WAITING;
1292
1293 assert_wait((caddr_t)wq, (THREAD_UNINT));
2546420a 1294 workqueue_unlock(wq);
f1a1da6c
A
1295
1296 thread_block(THREAD_CONTINUE_NULL);
1297
2546420a 1298 workqueue_lock_spin(wq);
f1a1da6c
A
1299 }
1300 wq->wq_lflags |= WQL_ATIMER_BUSY;
1301
1302 /*
2546420a 1303 * Decide which timer we are and remove the RUNNING flag.
f1a1da6c 1304 */
2546420a
A
1305 if (thread_call_self == wq->wq_atimer_delayed_call) {
1306 if ((wq->wq_flags & WQ_ATIMER_DELAYED_RUNNING) == 0) {
1307 panic("workqueue_add_timer is the delayed timer but the delayed running flag isn't set");
1308 }
1309 WQ_UNSETFLAG(wq, WQ_ATIMER_DELAYED_RUNNING);
1310 } else if (thread_call_self == wq->wq_atimer_immediate_call) {
1311 if ((wq->wq_flags & WQ_ATIMER_IMMEDIATE_RUNNING) == 0) {
1312 panic("workqueue_add_timer is the immediate timer but the immediate running flag isn't set");
1313 }
1314 WQ_UNSETFLAG(wq, WQ_ATIMER_IMMEDIATE_RUNNING);
1315 } else {
1316 panic("workqueue_add_timer can't figure out which timer it is");
1317 }
f1a1da6c
A
1318
1319again:
1320 retval = TRUE;
f1a1da6c 1321 if ( !(wq->wq_flags & WQ_EXITING)) {
964d3577 1322 boolean_t add_thread = FALSE;
f1a1da6c
A
1323 /*
1324 * check to see if the stall frequency was beyond our tolerance
2546420a 1325 * or we have work on the queue, but haven't scheduled any
f1a1da6c
A
1326 * new work within our acceptable time interval because
1327 * there were no idle threads left to schedule
1328 */
1329 if (wq->wq_reqcount) {
964d3577
A
1330 uint32_t priclass = 0;
1331 uint32_t thactive_count = 0;
1332 uint64_t curtime = mach_absolute_time();
1333 uint64_t busycount = 0;
1334
1335 if (wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] &&
1336 wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0){
1337 priclass = WORKQUEUE_EVENT_MANAGER_BUCKET;
1338 } else {
1339 for (priclass = 0; priclass < WORKQUEUE_NUM_BUCKETS; priclass++) {
1340 if (wq->wq_requests[priclass])
1341 break;
1342 }
f1a1da6c 1343 }
f1a1da6c 1344
964d3577
A
1345 if (priclass < WORKQUEUE_EVENT_MANAGER_BUCKET){
1346 /*
1347 * Compute a metric for many how many threads are active. We
1348 * find the highest priority request outstanding and then add up
1349 * the number of active threads in that and all higher-priority
1350 * buckets. We'll also add any "busy" threads which are not
1351 * active but blocked recently enough that we can't be sure
1352 * they've gone idle yet. We'll then compare this metric to our
1353 * max concurrency to decide whether to add a new thread.
1354 */
1355 for (uint32_t i = 0; i <= priclass; i++) {
1356 thactive_count += wq->wq_thactive_count[i];
f1a1da6c 1357
2546420a 1358 if (wq->wq_thscheduled_count[i] < wq->wq_thactive_count[i]) {
964d3577
A
1359 if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i]))
1360 busycount++;
1361 }
f1a1da6c
A
1362 }
1363 }
964d3577
A
1364
1365 if (thactive_count + busycount < wq->wq_max_concurrency ||
1366 priclass == WORKQUEUE_EVENT_MANAGER_BUCKET) {
f1a1da6c
A
1367
1368 if (wq->wq_thidlecount == 0) {
1369 /*
1370 * if we have no idle threads, try to add one
1371 */
964d3577 1372 retval = workqueue_addnewthread(wq, priclass == WORKQUEUE_EVENT_MANAGER_BUCKET);
f1a1da6c
A
1373 }
1374 add_thread = TRUE;
1375 }
1376
1377 if (wq->wq_reqcount) {
1378 /*
1379 * as long as we have threads to schedule, and we successfully
1380 * scheduled new work, keep trying
1381 */
1382 while (wq->wq_thidlecount && !(wq->wq_flags & WQ_EXITING)) {
1383 /*
1384 * workqueue_run_nextreq is responsible for
1385 * dropping the workqueue lock in all cases
1386 */
2546420a
A
1387 retval = (workqueue_run_nextreq(p, wq, THREAD_NULL, RUN_NEXTREQ_ADD_TIMER, 0, false) != THREAD_NULL);
1388 workqueue_lock_spin(wq);
f1a1da6c
A
1389
1390 if (retval == FALSE)
1391 break;
1392 }
1393 if ( !(wq->wq_flags & WQ_EXITING) && wq->wq_reqcount) {
1394
1395 if (wq->wq_thidlecount == 0 && retval == TRUE && add_thread == TRUE)
1396 goto again;
1397
2546420a
A
1398 if (wq->wq_thidlecount == 0 || busycount) {
1399 start_timer = WQ_TIMER_DELAYED_NEEDED(wq);
1400 }
f1a1da6c 1401
2546420a 1402 PTHREAD_TRACE_WQ(TRACE_wq_add_timer | DBG_FUNC_NONE, wq, wq->wq_reqcount, wq->wq_thidlecount, busycount, 0);
f1a1da6c
A
1403 }
1404 }
1405 }
1406 }
964d3577 1407
2546420a 1408 /*
964d3577
A
1409 * If we called WQ_TIMER_NEEDED above, then this flag will be set if that
1410 * call marked the timer running. If so, we let the timer interval grow.
1411 * Otherwise, we reset it back to 0.
1412 */
2546420a 1413 if (!(wq->wq_flags & WQ_ATIMER_DELAYED_RUNNING)) {
f1a1da6c 1414 wq->wq_timer_interval = 0;
2546420a 1415 }
f1a1da6c
A
1416
1417 wq->wq_lflags &= ~WQL_ATIMER_BUSY;
1418
1419 if ((wq->wq_flags & WQ_EXITING) || (wq->wq_lflags & WQL_ATIMER_WAITING)) {
1420 /*
2546420a 1421 * wakeup the thread hung up in _workqueue_mark_exiting or workqueue_add_timer waiting for this timer
f1a1da6c
A
1422 * to finish getting out of the way
1423 */
1424 wq->wq_lflags &= ~WQL_ATIMER_WAITING;
1425 wakeup(wq);
1426 }
1427
2546420a 1428 PTHREAD_TRACE_WQ(TRACE_wq_add_timer | DBG_FUNC_END, wq, start_timer, wq->wq_nthreads, wq->wq_thidlecount, 0);
f1a1da6c 1429
2546420a 1430 workqueue_unlock(wq);
f1a1da6c 1431
964d3577
A
1432 if (start_timer == TRUE)
1433 workqueue_interval_timer_start(wq);
f1a1da6c
A
1434}
1435
964d3577 1436#pragma mark thread state tracking
f1a1da6c 1437
964d3577 1438// called by spinlock code when trying to yield to lock owner
f1a1da6c
A
1439void
1440_workqueue_thread_yielded(void)
1441{
1442 struct workqueue *wq;
1443 proc_t p;
1444
1445 p = current_proc();
1446
1447 if ((wq = pthread_kern->proc_get_wqptr(p)) == NULL || wq->wq_reqcount == 0)
1448 return;
2546420a
A
1449
1450 workqueue_lock_spin(wq);
f1a1da6c
A
1451
1452 if (wq->wq_reqcount) {
1453 uint64_t curtime;
1454 uint64_t elapsed;
1455 clock_sec_t secs;
1456 clock_usec_t usecs;
1457
1458 if (wq->wq_thread_yielded_count++ == 0)
1459 wq->wq_thread_yielded_timestamp = mach_absolute_time();
1460
1461 if (wq->wq_thread_yielded_count < wq_yielded_threshold) {
2546420a 1462 workqueue_unlock(wq);
f1a1da6c
A
1463 return;
1464 }
1465
2546420a 1466 PTHREAD_TRACE_WQ(TRACE_wq_thread_yielded | DBG_FUNC_START, wq, wq->wq_thread_yielded_count, wq->wq_reqcount, 0, 0);
f1a1da6c
A
1467
1468 wq->wq_thread_yielded_count = 0;
1469
1470 curtime = mach_absolute_time();
1471 elapsed = curtime - wq->wq_thread_yielded_timestamp;
1472 pthread_kern->absolutetime_to_microtime(elapsed, &secs, &usecs);
1473
1474 if (secs == 0 && usecs < wq_yielded_window_usecs) {
1475
1476 if (wq->wq_thidlecount == 0) {
1477 workqueue_addnewthread(wq, TRUE);
1478 /*
1479 * 'workqueue_addnewthread' drops the workqueue lock
1480 * when creating the new thread and then retakes it before
1481 * returning... this window allows other threads to process
1482 * requests, so we need to recheck for available work
1483 * if none found, we just return... the newly created thread
1484 * will eventually get used (if it hasn't already)...
1485 */
1486 if (wq->wq_reqcount == 0) {
2546420a 1487 workqueue_unlock(wq);
f1a1da6c
A
1488 return;
1489 }
1490 }
1491 if (wq->wq_thidlecount) {
2546420a 1492 (void)workqueue_run_nextreq(p, wq, THREAD_NULL, RUN_NEXTREQ_UNCONSTRAINED, 0, false);
f1a1da6c
A
1493 /*
1494 * workqueue_run_nextreq is responsible for
1495 * dropping the workqueue lock in all cases
1496 */
2546420a 1497 PTHREAD_TRACE_WQ(TRACE_wq_thread_yielded | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_reqcount, 1, 0);
f1a1da6c
A
1498
1499 return;
1500 }
1501 }
2546420a 1502 PTHREAD_TRACE_WQ(TRACE_wq_thread_yielded | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_reqcount, 2, 0);
f1a1da6c 1503 }
2546420a 1504 workqueue_unlock(wq);
f1a1da6c
A
1505}
1506
f1a1da6c
A
1507static void
1508workqueue_callback(int type, thread_t thread)
1509{
1510 struct uthread *uth;
1511 struct threadlist *tl;
1512 struct workqueue *wq;
1513
1514 uth = pthread_kern->get_bsdthread_info(thread);
1515 tl = pthread_kern->uthread_get_threadlist(uth);
1516 wq = tl->th_workq;
1517
1518 switch (type) {
1519 case SCHED_CALL_BLOCK: {
1520 uint32_t old_activecount;
1521 boolean_t start_timer = FALSE;
1522
1523 old_activecount = OSAddAtomic(-1, &wq->wq_thactive_count[tl->th_priority]);
1524
964d3577
A
1525 /*
1526 * If we blocked and were at the requested concurrency previously, we may
1527 * need to spin up a new thread. Of course, if it's the event manager
1528 * then that's moot, so ignore that case.
1529 */
1530 if (old_activecount == wq->wq_reqconc[tl->th_priority] &&
1531 tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET) {
f1a1da6c
A
1532 uint64_t curtime;
1533 UInt64 *lastblocked_ptr;
1534
1535 /*
1536 * the number of active threads at this priority
1537 * has fallen below the maximum number of concurrent
1538 * threads that we're allowed to run
1539 */
1540 lastblocked_ptr = (UInt64 *)&wq->wq_lastblocked_ts[tl->th_priority];
1541 curtime = mach_absolute_time();
1542
1543 /*
1544 * if we collide with another thread trying to update the last_blocked (really unlikely
1545 * since another thread would have to get scheduled and then block after we start down
1546 * this path), it's not a problem. Either timestamp is adequate, so no need to retry
1547 */
1548
1549 OSCompareAndSwap64(*lastblocked_ptr, (UInt64)curtime, lastblocked_ptr);
1550
1551 if (wq->wq_reqcount) {
1552 /*
2546420a
A
1553 * We have work to do so start up the timer if it's not
1554 * running; it'll sort out whether we need to start another
1555 * thread
f1a1da6c 1556 */
2546420a 1557 start_timer = WQ_TIMER_DELAYED_NEEDED(wq);
f1a1da6c
A
1558 }
1559
1560 if (start_timer == TRUE) {
1561 workqueue_interval_timer_start(wq);
1562 }
1563 }
2546420a 1564 PTHREAD_TRACE1_WQ(TRACE_wq_thread_block | DBG_FUNC_START, wq, old_activecount, tl->th_priority, start_timer, thread_tid(thread));
f1a1da6c
A
1565 break;
1566 }
1567 case SCHED_CALL_UNBLOCK:
1568 /*
1569 * we cannot take the workqueue_lock here...
1570 * an UNBLOCK can occur from a timer event which
1571 * is run from an interrupt context... if the workqueue_lock
1572 * is already held by this processor, we'll deadlock...
1573 * the thread lock for the thread being UNBLOCKED
1574 * is also held
1575 */
1576 OSAddAtomic(1, &wq->wq_thactive_count[tl->th_priority]);
2546420a
A
1577
1578 PTHREAD_TRACE1_WQ(TRACE_wq_thread_block | DBG_FUNC_END, wq, wq->wq_threads_scheduled, tl->th_priority, 0, thread_tid(thread));
1579
f1a1da6c
A
1580 break;
1581 }
1582}
1583
1584sched_call_t
1585_workqueue_get_sched_callback(void)
1586{
1587 return workqueue_callback;
1588}
1589
964d3577
A
1590#pragma mark thread addition/removal
1591
2546420a
A
1592static mach_vm_size_t
1593_workqueue_allocsize(struct workqueue *wq)
1594{
1595 proc_t p = wq->wq_proc;
1596 mach_vm_size_t guardsize = vm_map_page_size(wq->wq_map);
1597 mach_vm_size_t pthread_size =
1598 vm_map_round_page_mask(pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET, vm_map_page_mask(wq->wq_map));
1599 return guardsize + PTH_DEFAULT_STACKSIZE + pthread_size;
1600}
1601
964d3577
A
1602/**
1603 * pop goes the thread
2546420a
A
1604 *
1605 * If fromexit is set, the call is from workqueue_exit(,
1606 * so some cleanups are to be avoided.
964d3577 1607 */
f1a1da6c 1608static void
2546420a 1609workqueue_removethread(struct threadlist *tl, bool fromexit, bool first_use)
f1a1da6c 1610{
f1a1da6c 1611 struct uthread * uth;
2546420a 1612 struct workqueue * wq = tl->th_workq;
f1a1da6c 1613
2546420a
A
1614 if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET){
1615 TAILQ_REMOVE(&wq->wq_thidlemgrlist, tl, th_entry);
1616 } else {
1617 TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry);
1618 }
f1a1da6c
A
1619
1620 if (fromexit == 0) {
2546420a 1621 assert(wq->wq_nthreads && wq->wq_thidlecount);
f1a1da6c
A
1622 wq->wq_nthreads--;
1623 wq->wq_thidlecount--;
1624 }
1625
1626 /*
2546420a 1627 * Clear the threadlist pointer in uthread so
f1a1da6c
A
1628 * blocked thread on wakeup for termination will
1629 * not access the thread list as it is going to be
1630 * freed.
1631 */
1632 pthread_kern->thread_sched_call(tl->th_thread, NULL);
1633
1634 uth = pthread_kern->get_bsdthread_info(tl->th_thread);
1635 if (uth != (struct uthread *)0) {
1636 pthread_kern->uthread_set_threadlist(uth, NULL);
1637 }
1638 if (fromexit == 0) {
1639 /* during exit the lock is not held */
2546420a 1640 workqueue_unlock(wq);
f1a1da6c
A
1641 }
1642
2546420a 1643 if ( (tl->th_flags & TH_LIST_NEW) || first_use ) {
f1a1da6c 1644 /*
2546420a 1645 * thread was created, but never used...
f1a1da6c
A
1646 * need to clean up the stack and port ourselves
1647 * since we're not going to spin up through the
1648 * normal exit path triggered from Libc
1649 */
1650 if (fromexit == 0) {
1651 /* vm map is already deallocated when this is called from exit */
2546420a 1652 (void)mach_vm_deallocate(wq->wq_map, tl->th_stackaddr, _workqueue_allocsize(wq));
f1a1da6c
A
1653 }
1654 (void)pthread_kern->mach_port_deallocate(pthread_kern->task_get_ipcspace(wq->wq_task), tl->th_thport);
1655
f1a1da6c
A
1656 } else {
1657
2546420a 1658 PTHREAD_TRACE1_WQ(TRACE_wq_thread_park | DBG_FUNC_END, wq, (uintptr_t)thread_tid(current_thread()), wq->wq_nthreads, 0xdead, thread_tid(tl->th_thread));
f1a1da6c
A
1659 }
1660 /*
1661 * drop our ref on the thread
1662 */
1663 thread_deallocate(tl->th_thread);
1664
1665 kfree(tl, sizeof(struct threadlist));
1666}
1667
1668
964d3577
A
1669/**
1670 * Try to add a new workqueue thread.
1671 *
1672 * - called with workq lock held
1673 * - dropped and retaken around thread creation
1674 * - return with workq lock held
f1a1da6c
A
1675 */
1676static boolean_t
964d3577 1677workqueue_addnewthread(struct workqueue *wq, boolean_t ignore_constrained_thread_limit)
f1a1da6c
A
1678{
1679 struct threadlist *tl;
1680 struct uthread *uth;
1681 kern_return_t kret;
1682 thread_t th;
1683 proc_t p;
1684 void *sright;
1685 mach_vm_offset_t stackaddr;
f1a1da6c 1686
964d3577 1687 if ((wq->wq_flags & WQ_EXITING) == WQ_EXITING) {
2546420a 1688 PTHREAD_TRACE_WQ(TRACE_wq_thread_add_during_exit | DBG_FUNC_NONE, wq, 0, 0, 0, 0);
f1a1da6c 1689 return (FALSE);
964d3577 1690 }
f1a1da6c 1691
2546420a
A
1692 if (wq->wq_nthreads >= wq_max_threads) {
1693 PTHREAD_TRACE_WQ(TRACE_wq_thread_limit_exceeded | DBG_FUNC_NONE, wq, wq->wq_nthreads, wq_max_threads, 0, 0);
f1a1da6c
A
1694 return (FALSE);
1695 }
f1a1da6c 1696
964d3577
A
1697 if (ignore_constrained_thread_limit == FALSE &&
1698 wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
2546420a 1699 /*
964d3577
A
1700 * If we're not creating this thread to service an overcommit or
1701 * event manager request, then we check to see if we are over our
1702 * constrained thread limit, in which case we error out.
f1a1da6c 1703 */
2546420a 1704 PTHREAD_TRACE_WQ(TRACE_wq_thread_constrained_maxed | DBG_FUNC_NONE, wq, wq->wq_constrained_threads_scheduled,
964d3577 1705 wq_max_constrained_threads, 0, 0);
f1a1da6c
A
1706 return (FALSE);
1707 }
f1a1da6c
A
1708
1709 wq->wq_nthreads++;
1710
1711 p = wq->wq_proc;
2546420a
A
1712 workqueue_unlock(wq);
1713
1714 tl = kalloc(sizeof(struct threadlist));
1715 bzero(tl, sizeof(struct threadlist));
f1a1da6c 1716
2546420a 1717 kret = pthread_kern->thread_create_workq_waiting(wq->wq_task, wq_unpark_continue, tl, &th);
f1a1da6c 1718 if (kret != KERN_SUCCESS) {
2546420a
A
1719 PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 0, 0, 0);
1720 kfree(tl, sizeof(struct threadlist));
f1a1da6c
A
1721 goto failed;
1722 }
1723
2546420a 1724 stackaddr = pthread_kern->proc_get_stack_addr_hint(p);
964d3577
A
1725
1726 mach_vm_size_t guardsize = vm_map_page_size(wq->wq_map);
2546420a 1727 mach_vm_size_t pthread_size =
964d3577 1728 vm_map_round_page_mask(pthread_kern->proc_get_pthsize(p) + PTHREAD_T_OFFSET, vm_map_page_mask(wq->wq_map));
2546420a 1729 mach_vm_size_t th_allocsize = guardsize + PTH_DEFAULT_STACKSIZE + pthread_size;
f1a1da6c
A
1730
1731 kret = mach_vm_map(wq->wq_map, &stackaddr,
2546420a
A
1732 th_allocsize, page_size-1,
1733 VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE, NULL,
1734 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
1735 VM_INHERIT_DEFAULT);
f1a1da6c
A
1736
1737 if (kret != KERN_SUCCESS) {
2546420a 1738 PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 1, 0, 0);
964d3577
A
1739
1740 kret = mach_vm_allocate(wq->wq_map,
2546420a 1741 &stackaddr, th_allocsize,
964d3577 1742 VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE);
f1a1da6c
A
1743 }
1744 if (kret == KERN_SUCCESS) {
964d3577 1745 /*
f1a1da6c
A
1746 * The guard page is at the lowest address
1747 * The stack base is the highest address
1748 */
964d3577 1749 kret = mach_vm_protect(wq->wq_map, stackaddr, guardsize, FALSE, VM_PROT_NONE);
f1a1da6c 1750
964d3577 1751 if (kret != KERN_SUCCESS) {
2546420a
A
1752 (void) mach_vm_deallocate(wq->wq_map, stackaddr, th_allocsize);
1753 PTHREAD_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, kret, 2, 0, 0);
964d3577 1754 }
f1a1da6c
A
1755 }
1756 if (kret != KERN_SUCCESS) {
1757 (void) thread_terminate(th);
1758 thread_deallocate(th);
1759
1760 kfree(tl, sizeof(struct threadlist));
1761 goto failed;
1762 }
1763 thread_reference(th);
1764
2546420a
A
1765 pthread_kern->thread_set_tag(th, THREAD_TAG_PTHREAD | THREAD_TAG_WORKQUEUE);
1766
f1a1da6c
A
1767 sright = (void *)pthread_kern->convert_thread_to_port(th);
1768 tl->th_thport = pthread_kern->ipc_port_copyout_send(sright, pthread_kern->task_get_ipcspace(wq->wq_task));
1769
1770 pthread_kern->thread_static_param(th, TRUE);
1771
2546420a 1772 tl->th_flags = TH_LIST_INITED | TH_LIST_NEW;
f1a1da6c
A
1773
1774 tl->th_thread = th;
1775 tl->th_workq = wq;
1776 tl->th_stackaddr = stackaddr;
1777 tl->th_priority = WORKQUEUE_NUM_BUCKETS;
f1a1da6c
A
1778
1779 uth = pthread_kern->get_bsdthread_info(tl->th_thread);
1780
2546420a 1781 workqueue_lock_spin(wq);
f1a1da6c
A
1782
1783 pthread_kern->uthread_set_threadlist(uth, tl);
1784 TAILQ_INSERT_TAIL(&wq->wq_thidlelist, tl, th_entry);
1785
1786 wq->wq_thidlecount++;
1787
2546420a 1788 PTHREAD_TRACE_WQ(TRACE_wq_thread_create | DBG_FUNC_NONE, wq, 0, 0, 0, 0);
f1a1da6c
A
1789
1790 return (TRUE);
1791
1792failed:
2546420a 1793 workqueue_lock_spin(wq);
f1a1da6c
A
1794 wq->wq_nthreads--;
1795
1796 return (FALSE);
1797}
1798
964d3577
A
1799/**
1800 * Setup per-process state for the workqueue.
1801 */
f1a1da6c
A
1802int
1803_workq_open(struct proc *p, __unused int32_t *retval)
1804{
1805 struct workqueue * wq;
1806 int wq_size;
1807 char * ptr;
1808 uint32_t i;
1809 uint32_t num_cpus;
1810 int error = 0;
f1a1da6c
A
1811
1812 if (pthread_kern->proc_get_register(p) == 0) {
1813 return EINVAL;
1814 }
1815
1816 num_cpus = pthread_kern->ml_get_max_cpus();
1817
1818 if (wq_init_constrained_limit) {
1819 uint32_t limit;
1820 /*
1821 * set up the limit for the constrained pool
1822 * this is a virtual pool in that we don't
1823 * maintain it on a separate idle and run list
1824 */
1825 limit = num_cpus * WORKQUEUE_CONSTRAINED_FACTOR;
1826
1827 if (limit > wq_max_constrained_threads)
1828 wq_max_constrained_threads = limit;
1829
1830 wq_init_constrained_limit = 0;
2546420a
A
1831
1832 if (wq_max_threads > pthread_kern->config_thread_max - 20) {
1833 wq_max_threads = pthread_kern->config_thread_max - 20;
1834 }
f1a1da6c 1835 }
f1a1da6c
A
1836
1837 if (pthread_kern->proc_get_wqptr(p) == NULL) {
2546420a
A
1838 if (pthread_kern->proc_init_wqptr_or_wait(p) == FALSE) {
1839 assert(pthread_kern->proc_get_wqptr(p) != NULL);
f1a1da6c
A
1840 goto out;
1841 }
1842
f1a1da6c
A
1843 wq_size = sizeof(struct workqueue);
1844
1845 ptr = (char *)kalloc(wq_size);
1846 bzero(ptr, wq_size);
1847
1848 wq = (struct workqueue *)ptr;
1849 wq->wq_flags = WQ_LIST_INITED;
1850 wq->wq_proc = p;
964d3577 1851 wq->wq_max_concurrency = wq_max_concurrency;
f1a1da6c
A
1852 wq->wq_task = current_task();
1853 wq->wq_map = pthread_kern->current_map();
1854
1855 for (i = 0; i < WORKQUEUE_NUM_BUCKETS; i++)
1856 wq->wq_reqconc[i] = (uint16_t)wq->wq_max_concurrency;
1857
964d3577
A
1858 // The event manager bucket is special, so its gets a concurrency of 1
1859 // though we shouldn't ever read this value for that bucket
1860 wq->wq_reqconc[WORKQUEUE_EVENT_MANAGER_BUCKET] = 1;
1861
2546420a
A
1862 // Start the event manager at the priority hinted at by the policy engine
1863 int mgr_priority_hint = pthread_kern->task_get_default_manager_qos(current_task());
1864 wq->wq_event_manager_priority = (uint32_t)thread_qos_get_pthread_priority(mgr_priority_hint) | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
964d3577 1865
f1a1da6c
A
1866 TAILQ_INIT(&wq->wq_thrunlist);
1867 TAILQ_INIT(&wq->wq_thidlelist);
1868
2546420a
A
1869 wq->wq_atimer_delayed_call = thread_call_allocate((thread_call_func_t)workqueue_add_timer, (thread_call_param_t)wq);
1870 wq->wq_atimer_immediate_call = thread_call_allocate((thread_call_func_t)workqueue_add_timer, (thread_call_param_t)wq);
f1a1da6c 1871
2546420a 1872 lck_spin_init(&wq->wq_lock, pthread_lck_grp, pthread_lck_attr);
f1a1da6c
A
1873
1874 pthread_kern->proc_set_wqptr(p, wq);
f1a1da6c 1875
f1a1da6c
A
1876 }
1877out:
f1a1da6c 1878
f1a1da6c
A
1879 return(error);
1880}
1881
f1a1da6c
A
1882/*
1883 * Routine: workqueue_mark_exiting
1884 *
1885 * Function: Mark the work queue such that new threads will not be added to the
964d3577 1886 * work queue after we return.
f1a1da6c
A
1887 *
1888 * Conditions: Called against the current process.
1889 */
1890void
1891_workqueue_mark_exiting(struct proc *p)
1892{
1893 struct workqueue *wq = pthread_kern->proc_get_wqptr(p);
1894
1895 if (wq != NULL) {
1896
2546420a 1897 PTHREAD_TRACE_WQ(TRACE_wq_pthread_exit|DBG_FUNC_START, wq, 0, 0, 0, 0);
f1a1da6c 1898
2546420a 1899 workqueue_lock_spin(wq);
f1a1da6c
A
1900
1901 /*
2546420a
A
1902 * We arm the add timer without holding the workqueue lock so we need
1903 * to synchronize with any running or soon to be running timers.
f1a1da6c 1904 *
2546420a
A
1905 * Threads that intend to arm the timer atomically OR
1906 * WQ_ATIMER_{DELAYED,IMMEDIATE}_RUNNING into the wq_flags, only if
1907 * WQ_EXITING is not present. So, once we have set WQ_EXITING, we can
1908 * be sure that no new RUNNING flags will be set, but still need to
1909 * wait for the already running timers to complete.
f1a1da6c 1910 *
2546420a
A
1911 * We always hold the workq lock when dropping WQ_ATIMER_RUNNING, so
1912 * the check for and sleep until clear is protected.
f1a1da6c 1913 */
2546420a 1914 WQ_SETFLAG(wq, WQ_EXITING);
f1a1da6c 1915
2546420a
A
1916 if (wq->wq_flags & WQ_ATIMER_DELAYED_RUNNING) {
1917 if (thread_call_cancel(wq->wq_atimer_delayed_call) == TRUE) {
1918 WQ_UNSETFLAG(wq, WQ_ATIMER_DELAYED_RUNNING);
1919 }
1920 }
1921 if (wq->wq_flags & WQ_ATIMER_IMMEDIATE_RUNNING) {
1922 if (thread_call_cancel(wq->wq_atimer_immediate_call) == TRUE) {
1923 WQ_UNSETFLAG(wq, WQ_ATIMER_IMMEDIATE_RUNNING);
f1a1da6c
A
1924 }
1925 }
2546420a
A
1926 while (wq->wq_flags & (WQ_ATIMER_DELAYED_RUNNING | WQ_ATIMER_IMMEDIATE_RUNNING) ||
1927 (wq->wq_lflags & WQL_ATIMER_BUSY)) {
f1a1da6c 1928 assert_wait((caddr_t)wq, (THREAD_UNINT));
2546420a 1929 workqueue_unlock(wq);
f1a1da6c
A
1930
1931 thread_block(THREAD_CONTINUE_NULL);
1932
2546420a 1933 workqueue_lock_spin(wq);
f1a1da6c 1934 }
2546420a 1935 workqueue_unlock(wq);
f1a1da6c
A
1936
1937 PTHREAD_TRACE(TRACE_wq_pthread_exit|DBG_FUNC_END, 0, 0, 0, 0, 0);
1938 }
1939}
1940
1941/*
1942 * Routine: workqueue_exit
1943 *
1944 * Function: clean up the work queue structure(s) now that there are no threads
1945 * left running inside the work queue (except possibly current_thread).
1946 *
1947 * Conditions: Called by the last thread in the process.
1948 * Called against current process.
1949 */
1950void
1951_workqueue_exit(struct proc *p)
1952{
1953 struct workqueue * wq;
1954 struct threadlist * tl, *tlist;
1955 struct uthread *uth;
2546420a 1956 size_t wq_size = sizeof(struct workqueue);
f1a1da6c
A
1957
1958 wq = pthread_kern->proc_get_wqptr(p);
1959 if (wq != NULL) {
1960
2546420a 1961 PTHREAD_TRACE_WQ(TRACE_wq_workqueue_exit|DBG_FUNC_START, wq, 0, 0, 0, 0);
f1a1da6c 1962
f1a1da6c 1963 pthread_kern->proc_set_wqptr(p, NULL);
f1a1da6c
A
1964
1965 /*
1966 * Clean up workqueue data structures for threads that exited and
1967 * didn't get a chance to clean up after themselves.
1968 */
1969 TAILQ_FOREACH_SAFE(tl, &wq->wq_thrunlist, th_entry, tlist) {
2546420a
A
1970 assert((tl->th_flags & TH_LIST_RUNNING) != 0);
1971
f1a1da6c
A
1972 pthread_kern->thread_sched_call(tl->th_thread, NULL);
1973
1974 uth = pthread_kern->get_bsdthread_info(tl->th_thread);
1975 if (uth != (struct uthread *)0) {
1976 pthread_kern->uthread_set_threadlist(uth, NULL);
1977 }
1978 TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
1979
1980 /*
1981 * drop our last ref on the thread
1982 */
1983 thread_deallocate(tl->th_thread);
1984
1985 kfree(tl, sizeof(struct threadlist));
1986 }
1987 TAILQ_FOREACH_SAFE(tl, &wq->wq_thidlelist, th_entry, tlist) {
2546420a
A
1988 assert((tl->th_flags & TH_LIST_RUNNING) == 0);
1989 assert(tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET);
1990 workqueue_removethread(tl, true, false);
1991 }
1992 TAILQ_FOREACH_SAFE(tl, &wq->wq_thidlemgrlist, th_entry, tlist) {
1993 assert((tl->th_flags & TH_LIST_RUNNING) == 0);
1994 assert(tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET);
1995 workqueue_removethread(tl, true, false);
f1a1da6c 1996 }
2546420a
A
1997 thread_call_free(wq->wq_atimer_delayed_call);
1998 thread_call_free(wq->wq_atimer_immediate_call);
1999 lck_spin_destroy(&wq->wq_lock, pthread_lck_grp);
f1a1da6c
A
2000
2001 kfree(wq, wq_size);
2002
2003 PTHREAD_TRACE(TRACE_wq_workqueue_exit|DBG_FUNC_END, 0, 0, 0, 0, 0);
2004 }
2005}
2006
2007
964d3577 2008#pragma mark workqueue thread manipulation
f1a1da6c 2009
964d3577
A
2010/**
2011 * Entry point for libdispatch to ask for threads
2012 */
2013static int wqops_queue_reqthreads(struct proc *p, int reqcount, pthread_priority_t priority){
2014 struct workqueue *wq;
2546420a 2015 boolean_t start_timer = FALSE;
f1a1da6c 2016
964d3577
A
2017 boolean_t overcommit = (_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG) != 0;
2018 int class = pthread_priority_get_class_index(priority);
f1a1da6c 2019
964d3577
A
2020 boolean_t event_manager = (_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG) != 0;
2021 if (event_manager){
2022 class = WORKQUEUE_EVENT_MANAGER_BUCKET;
2023 }
f1a1da6c 2024
964d3577
A
2025 if ((reqcount <= 0) || (class < 0) || (class >= WORKQUEUE_NUM_BUCKETS) || (overcommit && event_manager)) {
2026 return EINVAL;
2027 }
2028
2546420a 2029
964d3577 2030 if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL) {
964d3577
A
2031 return EINVAL;
2032 }
2546420a
A
2033
2034 workqueue_lock_spin(wq);
964d3577
A
2035
2036 if (overcommit == 0 && event_manager == 0) {
2037 wq->wq_reqcount += reqcount;
2038 wq->wq_requests[class] += reqcount;
2039
2546420a 2040 PTHREAD_TRACE_WQ(TRACE_wq_req_threads | DBG_FUNC_NONE, wq, priority, wq->wq_requests[class], reqcount, 0);
964d3577
A
2041
2042 while (wq->wq_reqcount) {
2043 if (!workqueue_run_one(p, wq, overcommit, 0))
2044 break;
2045 }
2546420a
A
2046 } else if (overcommit) {
2047 PTHREAD_TRACE_WQ(TRACE_wq_req_octhreads | DBG_FUNC_NONE, wq, priority, wq->wq_ocrequests[class], reqcount, 0);
964d3577
A
2048
2049 while (reqcount) {
2050 if (!workqueue_run_one(p, wq, overcommit, priority))
2051 break;
2052 reqcount--;
2053 }
2054 if (reqcount) {
2055 /*
2546420a
A
2056 * We need to delay starting some of the overcommit requests.
2057 * We'll record the request here and as existing threads return to
2058 * the kernel, we'll notice the ocrequests and spin them back to
2059 * user space as the overcommit variety.
964d3577
A
2060 */
2061 wq->wq_reqcount += reqcount;
2062 wq->wq_requests[class] += reqcount;
2063 wq->wq_ocrequests[class] += reqcount;
2064
2546420a 2065 PTHREAD_TRACE_WQ(TRACE_wq_delay_octhreads | DBG_FUNC_NONE, wq, priority, wq->wq_ocrequests[class], reqcount, 0);
964d3577 2066
2546420a
A
2067 /*
2068 * If we delayed this thread coming up but we're not constrained
964d3577
A
2069 * or at max threads then we need to start the timer so we don't
2070 * risk dropping this request on the floor.
2071 */
2546420a
A
2072 if ((wq->wq_constrained_threads_scheduled < wq_max_constrained_threads) &&
2073 (wq->wq_nthreads < wq_max_threads)){
2074 start_timer = WQ_TIMER_DELAYED_NEEDED(wq);
964d3577
A
2075 }
2076 }
2077 } else if (event_manager) {
2546420a 2078 PTHREAD_TRACE_WQ(TRACE_wq_req_event_manager | DBG_FUNC_NONE, wq, wq->wq_event_manager_priority, wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET], wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET], 0);
964d3577
A
2079
2080 if (wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0){
2081 wq->wq_reqcount += 1;
2082 wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] = 1;
2083 }
2084
2085 // We've recorded the request for an event manager thread above. We'll
2086 // let the timer pick it up as we would for a kernel callout. We can
2087 // do a direct add/wakeup when that support is added for the kevent path.
2546420a
A
2088 if (wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0){
2089 start_timer = WQ_TIMER_DELAYED_NEEDED(wq);
2090 }
964d3577 2091 }
2546420a
A
2092
2093 if (start_timer) {
2094 workqueue_interval_timer_start(wq);
2095 }
2096
2097 workqueue_unlock(wq);
964d3577
A
2098
2099 return 0;
2100}
2101
2546420a
A
2102/*
2103 * Used by the kevent system to request threads.
2104 *
2105 * Currently count is ignored and we always return one thread per invocation.
964d3577
A
2106 */
2107thread_t _workq_reqthreads(struct proc *p, int requests_count, workq_reqthreads_req_t requests){
2546420a
A
2108 thread_t th = THREAD_NULL;
2109 boolean_t do_thread_call = FALSE;
2110 boolean_t emergency_thread = FALSE;
964d3577
A
2111 assert(requests_count > 0);
2112
2113#if DEBUG
2114 // Make sure that the requests array is sorted, highest priority first
2115 if (requests_count > 1){
2116 __assert_only qos_class_t priority = _pthread_priority_get_qos_newest(requests[0].priority);
2117 __assert_only unsigned long flags = ((_pthread_priority_get_flags(requests[0].priority) & (_PTHREAD_PRIORITY_OVERCOMMIT_FLAG|_PTHREAD_PRIORITY_EVENT_MANAGER_FLAG)) != 0);
2118 for (int i = 1; i < requests_count; i++){
2119 if (requests[i].count == 0) continue;
2120 __assert_only qos_class_t next_priority = _pthread_priority_get_qos_newest(requests[i].priority);
2121 __assert_only unsigned long next_flags = ((_pthread_priority_get_flags(requests[i].priority) & (_PTHREAD_PRIORITY_OVERCOMMIT_FLAG|_PTHREAD_PRIORITY_EVENT_MANAGER_FLAG)) != 0);
2122 if (next_flags != flags){
2123 flags = next_flags;
2124 priority = next_priority;
2125 } else {
2126 assert(next_priority <= priority);
2127 }
2128 }
2129 }
2130#endif // DEBUG
f1a1da6c 2131
964d3577 2132 struct workqueue *wq;
964d3577 2133 if ((wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p)) == NULL) {
2546420a 2134 return THREAD_NULL;
964d3577
A
2135 }
2136
2546420a
A
2137 workqueue_lock_spin(wq);
2138
2139 PTHREAD_TRACE_WQ(TRACE_wq_kevent_req_threads | DBG_FUNC_START, wq, requests_count, 0, 0, 0);
964d3577
A
2140
2141 // Look for overcommit or event-manager-only requests.
2142 boolean_t have_overcommit = FALSE;
2143 pthread_priority_t priority = 0;
2144 for (int i = 0; i < requests_count; i++){
2145 if (requests[i].count == 0)
2146 continue;
2147 priority = requests[i].priority;
2148 if (_pthread_priority_get_qos_newest(priority) == QOS_CLASS_UNSPECIFIED){
2149 priority |= _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
2150 }
2151 if ((_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG) != 0){
2152 goto event_manager;
2153 }
2154 if ((_pthread_priority_get_flags(priority) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG) != 0){
2155 have_overcommit = TRUE;
2156 break;
f1a1da6c
A
2157 }
2158 }
2159
964d3577 2160 if (have_overcommit){
2546420a
A
2161 if (wq->wq_thidlecount){
2162 th = workqueue_run_nextreq(p, wq, THREAD_NULL, RUN_NEXTREQ_OVERCOMMIT_KEVENT, priority, true);
2163 if (th != THREAD_NULL){
2164 goto out;
2165 } else {
2166 workqueue_lock_spin(wq); // reacquire lock
2167 }
964d3577 2168 }
2546420a
A
2169
2170 int class = pthread_priority_get_class_index(priority);
2171 wq->wq_reqcount += 1;
2172 wq->wq_requests[class] += 1;
2173 wq->wq_kevent_ocrequests[class] += 1;
2174
2175 do_thread_call = WQ_TIMER_IMMEDIATE_NEEDED(wq);
2176 goto deferred;
964d3577
A
2177 }
2178
2179 // Having no overcommit requests, try to find any request that can start
2180 // There's no TOCTTOU since we hold the workqueue lock
2181 for (int i = 0; i < requests_count; i++){
2182 workq_reqthreads_req_t req = requests + i;
2183 priority = req->priority;
2546420a 2184 int class = pthread_priority_get_class_index(priority);
964d3577
A
2185
2186 if (req->count == 0)
2187 continue;
2188
2546420a
A
2189 if (!may_start_constrained_thread(wq, class, WORKQUEUE_NUM_BUCKETS, NULL))
2190 continue;
964d3577 2191
2546420a
A
2192 wq->wq_reqcount += 1;
2193 wq->wq_requests[class] += 1;
2194 wq->wq_kevent_requests[class] += 1;
964d3577 2195
2546420a 2196 PTHREAD_TRACE_WQ(TRACE_wq_req_kevent_threads | DBG_FUNC_NONE, wq, priority, wq->wq_kevent_requests[class], 1, 0);
964d3577 2197
2546420a
A
2198 if (wq->wq_thidlecount){
2199 th = workqueue_run_nextreq(p, wq, THREAD_NULL, RUN_NEXTREQ_DEFAULT_KEVENT, priority, true);
2200 goto out;
2201 } else {
2202 do_thread_call = WQ_TIMER_IMMEDIATE_NEEDED(wq);
2203 goto deferred;
964d3577
A
2204 }
2205 }
2206
2207 // Okay, here's the fun case: we can't spin up any of the non-overcommit threads
2208 // that we've seen a request for, so we kick this over to the event manager thread
2546420a 2209 emergency_thread = TRUE;
964d3577
A
2210
2211event_manager:
964d3577
A
2212 if (wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0){
2213 wq->wq_reqcount += 1;
2214 wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] = 1;
2546420a
A
2215 PTHREAD_TRACE_WQ(TRACE_wq_req_event_manager | DBG_FUNC_NONE, wq, 0, wq->wq_kevent_requests[WORKQUEUE_EVENT_MANAGER_BUCKET], 1, 0);
2216 } else {
2217 PTHREAD_TRACE_WQ(TRACE_wq_req_event_manager | DBG_FUNC_NONE, wq, 0, wq->wq_kevent_requests[WORKQUEUE_EVENT_MANAGER_BUCKET], 0, 0);
964d3577
A
2218 }
2219 wq->wq_kevent_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] = 1;
2220
2546420a
A
2221 if (wq->wq_thidlecount && wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0){
2222 th = workqueue_run_nextreq(p, wq, THREAD_NULL, RUN_NEXTREQ_EVENT_MANAGER, 0, true);
2223 assert(th != THREAD_NULL);
2224 goto out;
2225 }
2226 do_thread_call = WQ_TIMER_IMMEDIATE_NEEDED(wq);
964d3577 2227
2546420a
A
2228deferred:
2229 workqueue_unlock(wq);
964d3577 2230
2546420a
A
2231 if (do_thread_call == TRUE){
2232 workqueue_interval_timer_trigger(wq);
2233 }
964d3577 2234
2546420a
A
2235out:
2236 PTHREAD_TRACE_WQ(TRACE_wq_kevent_req_threads | DBG_FUNC_END, wq, do_thread_call, 0, 0, 0);
964d3577 2237
2546420a 2238 return emergency_thread ? (void*)-1 : th;
964d3577
A
2239}
2240
2241
2242static int wqops_thread_return(struct proc *p){
2243 thread_t th = current_thread();
2244 struct uthread *uth = pthread_kern->get_bsdthread_info(th);
2546420a
A
2245 struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
2246
964d3577
A
2247 /* reset signal mask on the workqueue thread to default state */
2248 if (pthread_kern->uthread_get_sigmask(uth) != (sigset_t)(~workq_threadmask)) {
2249 pthread_kern->proc_lock(p);
2250 pthread_kern->uthread_set_sigmask(uth, ~workq_threadmask);
2251 pthread_kern->proc_unlock(p);
2252 }
964d3577
A
2253
2254 struct workqueue *wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p);
2255 if (wq == NULL || !tl) {
964d3577
A
2256 return EINVAL;
2257 }
964d3577 2258
2546420a
A
2259 PTHREAD_TRACE_WQ(TRACE_wq_override_reset | DBG_FUNC_START, tl->th_workq, 0, 0, 0, 0);
2260
2261 /*
2262 * This squash call has neat semantics: it removes the specified overrides,
2263 * replacing the current requested QoS with the previous effective QoS from
2264 * those overrides. This means we won't be preempted due to having our QoS
2265 * lowered. Of course, now our understanding of the thread's QoS is wrong,
2266 * so we'll adjust below.
2267 */
2268 int new_qos =
2269 pthread_kern->proc_usynch_thread_qos_squash_override_for_resource(th,
2270 THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD,
2271 THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE);
2272
2273 workqueue_lock_spin(wq);
2274
2275 if (tl->th_flags & TH_LIST_KEVENT_BOUND) {
2276 unsigned int flags = KEVENT_FLAG_WORKQ;
2277 if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) {
2278 flags |= KEVENT_FLAG_WORKQ_MANAGER;
2279 }
2280
2281 workqueue_unlock(wq);
2282 kevent_qos_internal_unbind(p, class_index_get_thread_qos(tl->th_priority), th, flags);
2283 workqueue_lock_spin(wq);
2284
2285 tl->th_flags &= ~TH_LIST_KEVENT_BOUND;
2286 }
2287
2288 /* Fix up counters from the squash operation. */
2289 uint8_t old_bucket = tl->th_priority;
2290 uint8_t new_bucket = thread_qos_get_class_index(new_qos);
2291
2292 if (old_bucket != new_bucket) {
2293 OSAddAtomic(-1, &wq->wq_thactive_count[old_bucket]);
2294 OSAddAtomic(1, &wq->wq_thactive_count[new_bucket]);
2295
2296 wq->wq_thscheduled_count[old_bucket]--;
2297 wq->wq_thscheduled_count[new_bucket]++;
2298
2299 tl->th_priority = new_bucket;
2300 }
2301
2302 PTHREAD_TRACE_WQ(TRACE_wq_override_reset | DBG_FUNC_END, tl->th_workq, new_qos, 0, 0, 0);
2303
2304 PTHREAD_TRACE_WQ(TRACE_wq_runitem | DBG_FUNC_END, wq, 0, 0, 0, 0);
2305
2306 (void)workqueue_run_nextreq(p, wq, th, RUN_NEXTREQ_DEFAULT, 0, false);
f1a1da6c 2307 /*
964d3577
A
2308 * workqueue_run_nextreq is responsible for
2309 * dropping the workqueue lock in all cases
f1a1da6c 2310 */
964d3577
A
2311 return 0;
2312}
f1a1da6c 2313
964d3577
A
2314/**
2315 * Multiplexed call to interact with the workqueue mechanism
2316 */
2317int
2318_workq_kernreturn(struct proc *p,
2319 int options,
2546420a 2320 user_addr_t item,
964d3577
A
2321 int arg2,
2322 int arg3,
2323 int32_t *retval)
2324{
2325 int error = 0;
2326
2327 if (pthread_kern->proc_get_register(p) == 0) {
2328 return EINVAL;
2329 }
f1a1da6c 2330
964d3577
A
2331 switch (options) {
2332 case WQOPS_QUEUE_NEWSPISUPP: {
2333 /*
2334 * arg2 = offset of serialno into dispatch queue
2335 * arg3 = kevent support
2336 */
2337 int offset = arg2;
2338 if (arg3 & 0x01){
2339 // If we get here, then userspace has indicated support for kevent delivery.
f1a1da6c 2340 }
964d3577
A
2341
2342 pthread_kern->proc_set_dispatchqueue_serialno_offset(p, (uint64_t)offset);
2343 break;
f1a1da6c 2344 }
964d3577
A
2345 case WQOPS_QUEUE_REQTHREADS: {
2346 /*
2347 * arg2 = number of threads to start
2348 * arg3 = priority
2349 */
2350 error = wqops_queue_reqthreads(p, arg2, arg3);
2351 break;
2352 }
2353 case WQOPS_SET_EVENT_MANAGER_PRIORITY: {
2354 /*
2355 * arg2 = priority for the manager thread
2356 *
2357 * if _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG is set, the
2358 * ~_PTHREAD_PRIORITY_FLAGS_MASK contains a scheduling priority instead
2359 * of a QOS value
2360 */
2361 pthread_priority_t pri = arg2;
2362
964d3577 2363 struct workqueue *wq = (struct workqueue *)pthread_kern->proc_get_wqptr(p);
2546420a 2364 if (wq == NULL) {
964d3577
A
2365 error = EINVAL;
2366 break;
2367 }
2546420a 2368 workqueue_lock_spin(wq);
964d3577
A
2369 if (pri & _PTHREAD_PRIORITY_SCHED_PRI_FLAG){
2370 // If userspace passes a scheduling priority, that takes precidence
2371 // over any QoS. (So, userspace should take care not to accidenatally
2372 // lower the priority this way.)
2373 uint32_t sched_pri = pri & (~_PTHREAD_PRIORITY_FLAGS_MASK);
2374 if (wq->wq_event_manager_priority & _PTHREAD_PRIORITY_SCHED_PRI_FLAG){
2375 wq->wq_event_manager_priority = MAX(sched_pri, wq->wq_event_manager_priority & (~_PTHREAD_PRIORITY_FLAGS_MASK))
2376 | _PTHREAD_PRIORITY_SCHED_PRI_FLAG | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
2377 } else {
2378 wq->wq_event_manager_priority = sched_pri
2379 | _PTHREAD_PRIORITY_SCHED_PRI_FLAG | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
2380 }
2381 } else if ((wq->wq_event_manager_priority & _PTHREAD_PRIORITY_SCHED_PRI_FLAG) == 0){
2546420a
A
2382 int cur_qos = pthread_priority_get_thread_qos(wq->wq_event_manager_priority);
2383 int new_qos = pthread_priority_get_thread_qos(pri);
2384 wq->wq_event_manager_priority = (uint32_t)thread_qos_get_pthread_priority(MAX(cur_qos, new_qos)) | _PTHREAD_PRIORITY_EVENT_MANAGER_FLAG;
964d3577 2385 }
2546420a 2386 workqueue_unlock(wq);
964d3577
A
2387 break;
2388 }
2546420a
A
2389 case WQOPS_THREAD_KEVENT_RETURN:
2390 if (item != 0) {
2391 int32_t kevent_retval;
2392 int ret = kevent_qos_internal(p, -1, item, arg2, item, arg2, NULL, NULL, KEVENT_FLAG_WORKQ | KEVENT_FLAG_IMMEDIATE | KEVENT_FLAG_ERROR_EVENTS, &kevent_retval);
2393 // We shouldn't be getting more errors out than events we put in, so
2394 // reusing the input buffer should always provide enough space. But,
2395 // the assert is commented out since we get errors in edge cases in the
2396 // process lifecycle.
2397 //assert(ret == KERN_SUCCESS && kevent_retval >= 0);
2398 if (ret != KERN_SUCCESS){
2399 error = ret;
2400 break;
2401 } else if (kevent_retval > 0){
2402 assert(kevent_retval <= arg2);
2403 *retval = kevent_retval;
2404 error = 0;
2405 break;
2406 }
f1a1da6c 2407 }
2546420a
A
2408 // FALLTHRU
2409 case WQOPS_THREAD_RETURN:
964d3577
A
2410 error = wqops_thread_return(p);
2411 // NOT REACHED except in case of error
2412 assert(error);
2413 break;
964d3577
A
2414 default:
2415 error = EINVAL;
2416 break;
2417 }
2418 return (error);
2419}
f1a1da6c 2420
f1a1da6c 2421
964d3577
A
2422static boolean_t
2423workqueue_run_one(proc_t p, struct workqueue *wq, boolean_t overcommit, pthread_priority_t priority)
2424{
2425 boolean_t ran_one;
f1a1da6c 2426
964d3577
A
2427 if (wq->wq_thidlecount == 0) {
2428 if (overcommit == FALSE) {
2429 if (wq->wq_constrained_threads_scheduled < wq->wq_max_concurrency)
2430 workqueue_addnewthread(wq, overcommit);
2431 } else {
2432 workqueue_addnewthread(wq, overcommit);
2433
2434 if (wq->wq_thidlecount == 0)
2435 return (FALSE);
f1a1da6c
A
2436 }
2437 }
2546420a 2438 ran_one = (workqueue_run_nextreq(p, wq, THREAD_NULL, overcommit ? RUN_NEXTREQ_OVERCOMMIT : RUN_NEXTREQ_DEFAULT, priority, false) != THREAD_NULL);
f1a1da6c 2439 /*
964d3577
A
2440 * workqueue_run_nextreq is responsible for
2441 * dropping the workqueue lock in all cases
f1a1da6c 2442 */
2546420a 2443 workqueue_lock_spin(wq);
964d3577
A
2444
2445 return (ran_one);
2446}
2447
2448/*
2546420a
A
2449 * We have no work to do, park ourselves on the idle list.
2450 *
2451 * Consumes the workqueue lock and does not return.
964d3577 2452 */
2546420a 2453static void __dead2
964d3577
A
2454parkit(struct workqueue *wq, struct threadlist *tl, thread_t thread)
2455{
2546420a
A
2456 assert(thread == tl->th_thread);
2457 assert(thread == current_thread());
2458
2459 uint32_t us_to_wait = 0;
2460
964d3577 2461 TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry);
964d3577 2462
2546420a
A
2463 tl->th_flags &= ~TH_LIST_RUNNING;
2464 tl->th_flags &= ~TH_LIST_KEVENT;
2465 assert((tl->th_flags & TH_LIST_KEVENT_BOUND) == 0);
964d3577 2466
2546420a
A
2467 if (tl->th_flags & TH_LIST_CONSTRAINED) {
2468 wq->wq_constrained_threads_scheduled--;
2469 tl->th_flags &= ~TH_LIST_CONSTRAINED;
2470 }
964d3577
A
2471
2472 OSAddAtomic(-1, &wq->wq_thactive_count[tl->th_priority]);
2473 wq->wq_thscheduled_count[tl->th_priority]--;
2474 wq->wq_threads_scheduled--;
2546420a 2475 uint32_t thidlecount = ++wq->wq_thidlecount;
964d3577 2476
2546420a
A
2477 pthread_kern->thread_sched_call(thread, NULL);
2478
2479 /*
2480 * We'd like to always have one manager thread parked so that we can have
2481 * low latency when we need to bring a manager thread up. If that idle
2482 * thread list is empty, make this thread a manager thread.
2483 *
2484 * XXX: This doesn't check that there's not a manager thread outstanding,
2485 * so it's based on the assumption that most manager callouts will change
2486 * their QoS before parking. If that stops being true, this may end up
2487 * costing us more than we gain.
2488 */
2489 if (TAILQ_EMPTY(&wq->wq_thidlemgrlist) &&
2490 tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET){
2491 reset_priority(tl, pthread_priority_from_wq_class_index(wq, WORKQUEUE_EVENT_MANAGER_BUCKET));
2492 tl->th_priority = WORKQUEUE_EVENT_MANAGER_BUCKET;
f1a1da6c
A
2493 }
2494
2546420a
A
2495 if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET){
2496 TAILQ_INSERT_HEAD(&wq->wq_thidlemgrlist, tl, th_entry);
2497 } else {
2498 TAILQ_INSERT_HEAD(&wq->wq_thidlelist, tl, th_entry);
2499 }
964d3577 2500
2546420a
A
2501 PTHREAD_TRACE_WQ(TRACE_wq_thread_park | DBG_FUNC_START, wq,
2502 wq->wq_threads_scheduled, wq->wq_thidlecount, us_to_wait, 0);
964d3577 2503
2546420a
A
2504 /*
2505 * When we remove the voucher from the thread, we may lose our importance
2506 * causing us to get preempted, so we do this after putting the thread on
2507 * the idle list. That when, when we get our importance back we'll be able
2508 * to use this thread from e.g. the kevent call out to deliver a boosting
2509 * message.
2510 */
2511 workqueue_unlock(wq);
2512 kern_return_t kr = pthread_kern->thread_set_voucher_name(MACH_PORT_NULL);
2513 assert(kr == KERN_SUCCESS);
2514 workqueue_lock_spin(wq);
2515
2516 if ((tl->th_flags & TH_LIST_RUNNING) == 0) {
2517 if (thidlecount < 101) {
2518 us_to_wait = wq_reduce_pool_window_usecs - ((thidlecount-2) * (wq_reduce_pool_window_usecs / 100));
2519 } else {
2520 us_to_wait = wq_reduce_pool_window_usecs / 100;
2521 }
2522
2523 assert_wait_timeout_with_leeway((caddr_t)tl, (THREAD_INTERRUPTIBLE),
2524 TIMEOUT_URGENCY_SYS_BACKGROUND|TIMEOUT_URGENCY_LEEWAY, us_to_wait,
2525 wq_reduce_pool_window_usecs/10, NSEC_PER_USEC);
2526
2527 workqueue_unlock(wq);
2528
2529 thread_block(wq_unpark_continue);
2530 panic("thread_block(wq_unpark_continue) returned!");
2531 } else {
2532 workqueue_unlock(wq);
2533
2534 /*
2535 * While we'd dropped the lock to unset our voucher, someone came
2536 * around and made us runnable. But because we weren't waiting on the
2537 * event their wakeup() was ineffectual. To correct for that, we just
2538 * run the continuation ourselves.
2539 */
2540 wq_unpark_continue(NULL, THREAD_AWAKENED);
2541 }
964d3577 2542}
f1a1da6c 2543
964d3577
A
2544static boolean_t may_start_constrained_thread(struct workqueue *wq, uint32_t at_priclass, uint32_t my_priclass, boolean_t *start_timer){
2545 if (wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
2546 /*
2547 * we need 1 or more constrained threads to return to the kernel before
2548 * we can dispatch additional work
2549 */
2550 return FALSE;
2551 }
f1a1da6c 2552
964d3577
A
2553 uint32_t busycount = 0;
2554 uint32_t thactive_count = wq->wq_thactive_count[at_priclass];
f1a1da6c 2555
964d3577
A
2556 // Has our most recently blocked thread blocked recently enough that we
2557 // should still consider it busy?
2546420a 2558 if (wq->wq_thscheduled_count[at_priclass] > wq->wq_thactive_count[at_priclass]) {
964d3577 2559 if (wq_thread_is_busy(mach_absolute_time(), &wq->wq_lastblocked_ts[at_priclass])) {
f1a1da6c
A
2560 busycount++;
2561 }
2562 }
2563
964d3577
A
2564 if (my_priclass < WORKQUEUE_NUM_BUCKETS && my_priclass == at_priclass){
2565 /*
2546420a 2566 * don't count this thread as currently active
964d3577
A
2567 */
2568 thactive_count--;
f1a1da6c 2569 }
964d3577 2570
f1a1da6c 2571 if (thactive_count + busycount >= wq->wq_max_concurrency) {
964d3577 2572 if (busycount && start_timer) {
f1a1da6c
A
2573 /*
2574 * we found at least 1 thread in the
2575 * 'busy' state... make sure we start
2576 * the timer because if they are the only
2577 * threads keeping us from scheduling
2578 * this work request, we won't get a callback
2579 * to kick off the timer... we need to
2580 * start it now...
2581 */
2546420a 2582 *start_timer = WQ_TIMER_DELAYED_NEEDED(wq);
f1a1da6c
A
2583 }
2584
2546420a 2585 PTHREAD_TRACE_WQ(TRACE_wq_overcommitted|DBG_FUNC_NONE, wq, ((start_timer && *start_timer) ? 1 << _PTHREAD_PRIORITY_FLAGS_SHIFT : 0) | class_index_get_pthread_priority(at_priclass), thactive_count, busycount, 0);
f1a1da6c 2586
964d3577
A
2587 return FALSE;
2588 }
2589 return TRUE;
2590}
2591
2546420a
A
2592static struct threadlist *
2593pop_from_thidlelist(struct workqueue *wq, uint32_t priclass)
2594{
2595 assert(wq->wq_thidlecount);
964d3577 2596
2546420a 2597 struct threadlist *tl = NULL;
964d3577 2598
2546420a
A
2599 if (!TAILQ_EMPTY(&wq->wq_thidlemgrlist) &&
2600 (priclass == WORKQUEUE_EVENT_MANAGER_BUCKET || TAILQ_EMPTY(&wq->wq_thidlelist))){
2601 tl = TAILQ_FIRST(&wq->wq_thidlemgrlist);
2602 TAILQ_REMOVE(&wq->wq_thidlemgrlist, tl, th_entry);
2603 assert(tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET);
2604 } else if (!TAILQ_EMPTY(&wq->wq_thidlelist) &&
2605 (priclass != WORKQUEUE_EVENT_MANAGER_BUCKET || TAILQ_EMPTY(&wq->wq_thidlemgrlist))){
2606 tl = TAILQ_FIRST(&wq->wq_thidlelist);
2607 TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry);
2608 assert(tl->th_priority != WORKQUEUE_EVENT_MANAGER_BUCKET);
2609 } else {
2610 panic("pop_from_thidlelist called with no threads available");
964d3577 2611 }
2546420a
A
2612 assert((tl->th_flags & TH_LIST_RUNNING) == 0);
2613
2614 assert(wq->wq_thidlecount);
2615 wq->wq_thidlecount--;
2616
2617 TAILQ_INSERT_TAIL(&wq->wq_thrunlist, tl, th_entry);
2618
964d3577
A
2619 tl->th_flags |= TH_LIST_RUNNING | TH_LIST_BUSY;
2620
2621 wq->wq_threads_scheduled++;
2622 wq->wq_thscheduled_count[priclass]++;
2623 OSAddAtomic(1, &wq->wq_thactive_count[priclass]);
2624
2625 return tl;
2626}
2627
2546420a
A
2628static pthread_priority_t
2629pthread_priority_from_wq_class_index(struct workqueue *wq, int index){
2630 if (index == WORKQUEUE_EVENT_MANAGER_BUCKET){
2631 return wq->wq_event_manager_priority;
2632 } else {
2633 return class_index_get_pthread_priority(index);
2634 }
2635}
2636
964d3577 2637static void
2546420a 2638reset_priority(struct threadlist *tl, pthread_priority_t pri){
964d3577
A
2639 kern_return_t ret;
2640 thread_t th = tl->th_thread;
2641
2546420a
A
2642 if ((pri & _PTHREAD_PRIORITY_SCHED_PRI_FLAG) == 0){
2643 ret = pthread_kern->thread_set_workq_qos(th, pthread_priority_get_thread_qos(pri), 0);
964d3577 2644 assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED);
964d3577 2645
2546420a 2646 if (tl->th_flags & TH_LIST_EVENT_MGR_SCHED_PRI) {
964d3577 2647
2546420a 2648 /* Reset priority to default (masked by QoS) */
964d3577 2649
2546420a
A
2650 ret = pthread_kern->thread_set_workq_pri(th, 31, POLICY_TIMESHARE);
2651 assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED);
964d3577 2652
2546420a
A
2653 tl->th_flags &= ~TH_LIST_EVENT_MGR_SCHED_PRI;
2654 }
2655 } else {
2656 ret = pthread_kern->thread_set_workq_qos(th, THREAD_QOS_UNSPECIFIED, 0);
2657 assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED);
2658 ret = pthread_kern->thread_set_workq_pri(th, (pri & (~_PTHREAD_PRIORITY_FLAGS_MASK)), POLICY_TIMESHARE);
2659 assert(ret == KERN_SUCCESS || ret == KERN_TERMINATED);
964d3577 2660
2546420a
A
2661 tl->th_flags |= TH_LIST_EVENT_MGR_SCHED_PRI;
2662 }
964d3577
A
2663}
2664
2665/**
2666 * grabs a thread for a request
2667 *
2668 * - called with the workqueue lock held...
2669 * - responsible for dropping it in all cases
2670 * - if provided mode is for overcommit, doesn't consume a reqcount
2671 *
2672 */
2546420a 2673static thread_t
964d3577 2674workqueue_run_nextreq(proc_t p, struct workqueue *wq, thread_t thread,
2546420a
A
2675 enum run_nextreq_mode mode, pthread_priority_t prio,
2676 bool kevent_bind_via_return)
964d3577
A
2677{
2678 thread_t th_to_run = THREAD_NULL;
2546420a 2679 uint32_t upcall_flags = 0;
964d3577
A
2680 uint32_t priclass;
2681 struct threadlist *tl = NULL;
2682 struct uthread *uth = NULL;
2683 boolean_t start_timer = FALSE;
2684
2546420a
A
2685 if (mode == RUN_NEXTREQ_ADD_TIMER) {
2686 mode = RUN_NEXTREQ_DEFAULT;
2687 }
2688
964d3577 2689 // valid modes to call this function with
2546420a
A
2690 assert(mode == RUN_NEXTREQ_DEFAULT || mode == RUN_NEXTREQ_DEFAULT_KEVENT ||
2691 mode == RUN_NEXTREQ_OVERCOMMIT || mode == RUN_NEXTREQ_UNCONSTRAINED ||
2692 mode == RUN_NEXTREQ_EVENT_MANAGER || mode == RUN_NEXTREQ_OVERCOMMIT_KEVENT);
2693 // may only have a priority if in OVERCOMMIT or DEFAULT_KEVENT mode
2694 assert(mode == RUN_NEXTREQ_OVERCOMMIT || mode == RUN_NEXTREQ_OVERCOMMIT_KEVENT ||
2695 mode == RUN_NEXTREQ_DEFAULT_KEVENT || prio == 0);
964d3577
A
2696 // thread == thread_null means "please spin up a new workqueue thread, we can't reuse this"
2697 // thread != thread_null is thread reuse, and must be the current thread
2698 assert(thread == THREAD_NULL || thread == current_thread());
2699
2546420a 2700 PTHREAD_TRACE_WQ(TRACE_wq_run_nextitem|DBG_FUNC_START, wq, thread_tid(thread), wq->wq_thidlecount, wq->wq_reqcount, 0);
964d3577
A
2701
2702 if (thread != THREAD_NULL) {
2703 uth = pthread_kern->get_bsdthread_info(thread);
2704
2705 if ((tl = pthread_kern->uthread_get_threadlist(uth)) == NULL) {
2706 panic("wq thread with no threadlist");
f1a1da6c 2707 }
964d3577 2708 }
f1a1da6c 2709
964d3577 2710 /*
2546420a
A
2711 * from here until we drop the workq lock we can't be pre-empted since we
2712 * hold the lock in spin mode... this is important since we have to
2713 * independently update the priority that the thread is associated with and
2714 * the priorty based counters that "workqueue_callback" also changes and
2715 * bases decisions on.
964d3577
A
2716 */
2717
2546420a
A
2718 /*
2719 * This giant monstrosity does three things:
2720 *
2721 * - adjusts the mode, if required
2722 * - selects the priclass that we'll be servicing
2723 * - sets any mode-specific upcall flags
2724 *
2725 * When possible special-cases should be handled here and converted into
2726 * non-special cases.
2727 */
964d3577 2728 if (mode == RUN_NEXTREQ_OVERCOMMIT) {
2546420a 2729 priclass = pthread_priority_get_class_index(prio);
964d3577 2730 upcall_flags |= WQ_FLAG_THREAD_OVERCOMMIT;
2546420a
A
2731 } else if (mode == RUN_NEXTREQ_OVERCOMMIT_KEVENT){
2732 priclass = pthread_priority_get_class_index(prio);
2733 upcall_flags |= WQ_FLAG_THREAD_KEVENT;
2734 } else if (mode == RUN_NEXTREQ_EVENT_MANAGER){
2735 assert(wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0);
2736 priclass = WORKQUEUE_EVENT_MANAGER_BUCKET;
2737 upcall_flags |= WQ_FLAG_THREAD_EVENT_MANAGER;
2738 if (wq->wq_kevent_requests[WORKQUEUE_EVENT_MANAGER_BUCKET]){
2739 upcall_flags |= WQ_FLAG_THREAD_KEVENT;
2740 }
964d3577
A
2741 } else if (wq->wq_reqcount == 0){
2742 // no work to do. we'll check again when new work arrives.
2743 goto done;
2546420a
A
2744 } else if (mode == RUN_NEXTREQ_DEFAULT_KEVENT) {
2745 assert(kevent_bind_via_return);
2746
2747 priclass = pthread_priority_get_class_index(prio);
2748 assert(priclass < WORKQUEUE_EVENT_MANAGER_BUCKET);
2749 assert(wq->wq_kevent_requests[priclass] > 0);
2750
2751 upcall_flags |= WQ_FLAG_THREAD_KEVENT;
2752 mode = RUN_NEXTREQ_DEFAULT;
964d3577
A
2753 } else if (wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] &&
2754 ((wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] == 0) ||
2755 (thread != THREAD_NULL && tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET))){
2756 // There's an event manager request and either:
2757 // - no event manager currently running
2758 // - we are re-using the event manager
2759 mode = RUN_NEXTREQ_EVENT_MANAGER;
2760 priclass = WORKQUEUE_EVENT_MANAGER_BUCKET;
2761 upcall_flags |= WQ_FLAG_THREAD_EVENT_MANAGER;
2546420a 2762 if (wq->wq_kevent_requests[WORKQUEUE_EVENT_MANAGER_BUCKET]){
964d3577 2763 upcall_flags |= WQ_FLAG_THREAD_KEVENT;
2546420a 2764 }
964d3577
A
2765 } else {
2766 // Find highest priority and check for special request types
2767 for (priclass = 0; priclass < WORKQUEUE_EVENT_MANAGER_BUCKET; priclass++) {
2768 if (wq->wq_requests[priclass])
2769 break;
2770 }
2771 if (priclass == WORKQUEUE_EVENT_MANAGER_BUCKET){
2772 // only request should have been event manager since it's not in a bucket,
2773 // but we weren't able to handle it since there's already an event manager running,
2774 // so we fell into this case
2775 assert(wq->wq_requests[WORKQUEUE_EVENT_MANAGER_BUCKET] == 1 &&
2776 wq->wq_thscheduled_count[WORKQUEUE_EVENT_MANAGER_BUCKET] == 1 &&
2777 wq->wq_reqcount == 1);
2778 goto done;
2779 }
2780
2781 if (wq->wq_kevent_ocrequests[priclass]){
2782 mode = RUN_NEXTREQ_DEFERRED_OVERCOMMIT;
2783 upcall_flags |= WQ_FLAG_THREAD_KEVENT;
2784 upcall_flags |= WQ_FLAG_THREAD_OVERCOMMIT;
2785 } else if (wq->wq_ocrequests[priclass]){
2786 mode = RUN_NEXTREQ_DEFERRED_OVERCOMMIT;
2787 upcall_flags |= WQ_FLAG_THREAD_OVERCOMMIT;
2788 } else if (wq->wq_kevent_requests[priclass]){
2789 upcall_flags |= WQ_FLAG_THREAD_KEVENT;
2790 }
2791 }
2792
2546420a
A
2793 assert(mode != RUN_NEXTREQ_EVENT_MANAGER || priclass == WORKQUEUE_EVENT_MANAGER_BUCKET);
2794 assert(mode == RUN_NEXTREQ_EVENT_MANAGER || priclass != WORKQUEUE_EVENT_MANAGER_BUCKET);
2795
964d3577
A
2796 if (mode == RUN_NEXTREQ_DEFAULT /* non-overcommit */){
2797 uint32_t my_priclass = (thread != THREAD_NULL) ? tl->th_priority : WORKQUEUE_NUM_BUCKETS;
2798 if (may_start_constrained_thread(wq, priclass, my_priclass, &start_timer) == FALSE){
2799 // per policy, we won't start another constrained thread
2800 goto done;
2801 }
f1a1da6c
A
2802 }
2803
2804 if (thread != THREAD_NULL) {
2805 /*
2806 * thread is non-NULL here when we return from userspace
2807 * in workq_kernreturn, rather than trying to find a thread
2808 * we pick up new work for this specific thread.
2809 */
2810 th_to_run = thread;
2546420a 2811 upcall_flags |= WQ_FLAG_THREAD_REUSE;
964d3577 2812 } else if (wq->wq_thidlecount == 0) {
f1a1da6c
A
2813 /*
2814 * we have no additional threads waiting to pick up
2815 * work, however, there is additional work to do.
2816 */
2546420a 2817 start_timer = WQ_TIMER_DELAYED_NEEDED(wq);
f1a1da6c 2818
2546420a 2819 PTHREAD_TRACE_WQ(TRACE_wq_stalled, wq, wq->wq_nthreads, start_timer, 0, 0);
f1a1da6c 2820
964d3577
A
2821 goto done;
2822 } else {
2546420a
A
2823 // there is both work available and an idle thread, so activate a thread
2824 tl = pop_from_thidlelist(wq, priclass);
2825 th_to_run = tl->th_thread;
f1a1da6c
A
2826 }
2827
964d3577
A
2828 // Adjust counters and thread flags AKA consume the request
2829 // TODO: It would be lovely if OVERCOMMIT consumed reqcount
2830 switch (mode) {
2831 case RUN_NEXTREQ_DEFAULT:
2546420a
A
2832 case RUN_NEXTREQ_DEFAULT_KEVENT: /* actually mapped to DEFAULT above */
2833 case RUN_NEXTREQ_ADD_TIMER: /* actually mapped to DEFAULT above */
964d3577
A
2834 case RUN_NEXTREQ_UNCONSTRAINED:
2835 wq->wq_reqcount--;
2836 wq->wq_requests[priclass]--;
f1a1da6c 2837
964d3577
A
2838 if (mode == RUN_NEXTREQ_DEFAULT){
2839 if (!(tl->th_flags & TH_LIST_CONSTRAINED)) {
2840 wq->wq_constrained_threads_scheduled++;
2841 tl->th_flags |= TH_LIST_CONSTRAINED;
2842 }
2843 } else if (mode == RUN_NEXTREQ_UNCONSTRAINED){
2844 if (tl->th_flags & TH_LIST_CONSTRAINED) {
964d3577
A
2845 wq->wq_constrained_threads_scheduled--;
2846 tl->th_flags &= ~TH_LIST_CONSTRAINED;
2847 }
2848 }
2849 if (upcall_flags & WQ_FLAG_THREAD_KEVENT){
2850 wq->wq_kevent_requests[priclass]--;
2851 }
2852 break;
f1a1da6c 2853
964d3577
A
2854 case RUN_NEXTREQ_EVENT_MANAGER:
2855 wq->wq_reqcount--;
2856 wq->wq_requests[priclass]--;
f1a1da6c 2857
964d3577
A
2858 if (tl->th_flags & TH_LIST_CONSTRAINED) {
2859 wq->wq_constrained_threads_scheduled--;
2860 tl->th_flags &= ~TH_LIST_CONSTRAINED;
2861 }
2862 if (upcall_flags & WQ_FLAG_THREAD_KEVENT){
2863 wq->wq_kevent_requests[priclass]--;
2864 }
2865 break;
f1a1da6c 2866
964d3577
A
2867 case RUN_NEXTREQ_DEFERRED_OVERCOMMIT:
2868 wq->wq_reqcount--;
2869 wq->wq_requests[priclass]--;
2870 if (upcall_flags & WQ_FLAG_THREAD_KEVENT){
2871 wq->wq_kevent_ocrequests[priclass]--;
2872 } else {
2873 wq->wq_ocrequests[priclass]--;
2874 }
2875 /* FALLTHROUGH */
2876 case RUN_NEXTREQ_OVERCOMMIT:
2546420a 2877 case RUN_NEXTREQ_OVERCOMMIT_KEVENT:
964d3577
A
2878 if (tl->th_flags & TH_LIST_CONSTRAINED) {
2879 wq->wq_constrained_threads_scheduled--;
2880 tl->th_flags &= ~TH_LIST_CONSTRAINED;
2881 }
2882 break;
f1a1da6c
A
2883 }
2884
964d3577
A
2885 // Confirm we've maintained our counter invariants
2886 assert(wq->wq_requests[priclass] < UINT16_MAX);
2887 assert(wq->wq_ocrequests[priclass] < UINT16_MAX);
2888 assert(wq->wq_kevent_requests[priclass] < UINT16_MAX);
2889 assert(wq->wq_kevent_ocrequests[priclass] < UINT16_MAX);
2890 assert(wq->wq_ocrequests[priclass] + wq->wq_kevent_requests[priclass] +
2891 wq->wq_kevent_ocrequests[priclass] <=
2892 wq->wq_requests[priclass]);
2893
2546420a
A
2894 assert((tl->th_flags & TH_LIST_KEVENT_BOUND) == 0);
2895 if (upcall_flags & WQ_FLAG_THREAD_KEVENT) {
2896 tl->th_flags |= TH_LIST_KEVENT;
2897 } else {
2898 tl->th_flags &= ~TH_LIST_KEVENT;
2899 }
2900
964d3577 2901 uint32_t orig_class = tl->th_priority;
f1a1da6c
A
2902 tl->th_priority = (uint8_t)priclass;
2903
964d3577 2904 if ((thread != THREAD_NULL) && (orig_class != priclass)) {
f1a1da6c
A
2905 /*
2906 * we need to adjust these counters based on this
2907 * thread's new disposition w/r to priority
2908 */
2909 OSAddAtomic(-1, &wq->wq_thactive_count[orig_class]);
2910 OSAddAtomic(1, &wq->wq_thactive_count[priclass]);
2911
2912 wq->wq_thscheduled_count[orig_class]--;
2913 wq->wq_thscheduled_count[priclass]++;
2914 }
2915 wq->wq_thread_yielded_count = 0;
2916
2546420a
A
2917 pthread_priority_t outgoing_priority = pthread_priority_from_wq_class_index(wq, tl->th_priority);
2918 PTHREAD_TRACE_WQ(TRACE_wq_reset_priority | DBG_FUNC_START, wq, thread_tid(tl->th_thread), outgoing_priority, 0, 0);
2919 reset_priority(tl, outgoing_priority);
2920 PTHREAD_TRACE_WQ(TRACE_wq_reset_priority | DBG_FUNC_END, wq, thread_tid(tl->th_thread), outgoing_priority, 0, 0);
f1a1da6c 2921
2546420a
A
2922 /*
2923 * persist upcall_flags so that in can be retrieved in setup_wqthread
2924 */
2925 tl->th_upcall_flags = upcall_flags >> WQ_FLAG_THREAD_PRIOSHIFT;
f1a1da6c
A
2926
2927 /*
2928 * if current thread is reused for work request, does not return via unix_syscall
2929 */
2546420a
A
2930 wq_runreq(p, th_to_run, wq, tl, (thread == th_to_run),
2931 (upcall_flags & WQ_FLAG_THREAD_KEVENT) && !kevent_bind_via_return);
f1a1da6c 2932
2546420a 2933 PTHREAD_TRACE_WQ(TRACE_wq_run_nextitem|DBG_FUNC_END, wq, thread_tid(th_to_run), mode == RUN_NEXTREQ_OVERCOMMIT, 1, 0);
f1a1da6c 2934
2546420a
A
2935 assert(!kevent_bind_via_return || (upcall_flags & WQ_FLAG_THREAD_KEVENT));
2936 if (kevent_bind_via_return && (upcall_flags & WQ_FLAG_THREAD_KEVENT)) {
2937 tl->th_flags |= TH_LIST_KEVENT_BOUND;
f1a1da6c 2938 }
f1a1da6c 2939
2546420a
A
2940 workqueue_unlock(wq);
2941
2942 return th_to_run;
f1a1da6c 2943
2546420a 2944done:
f1a1da6c
A
2945 if (start_timer)
2946 workqueue_interval_timer_start(wq);
2947
2546420a 2948 PTHREAD_TRACE_WQ(TRACE_wq_run_nextitem | DBG_FUNC_END, wq, thread_tid(thread), start_timer, 3, 0);
f1a1da6c 2949
964d3577 2950 if (thread != THREAD_NULL){
2546420a 2951 parkit(wq, tl, thread);
964d3577
A
2952 /* NOT REACHED */
2953 }
f1a1da6c 2954
2546420a
A
2955 workqueue_unlock(wq);
2956
2957 return THREAD_NULL;
f1a1da6c
A
2958}
2959
964d3577 2960/**
2546420a 2961 * parked thread wakes up
964d3577 2962 */
2546420a
A
2963static void __dead2
2964wq_unpark_continue(void* __unused ptr, wait_result_t wait_result)
f1a1da6c 2965{
2546420a
A
2966 boolean_t first_use = false;
2967 thread_t th = current_thread();
2968 proc_t p = current_proc();
f1a1da6c 2969
2546420a
A
2970 struct uthread *uth = pthread_kern->get_bsdthread_info(th);
2971 if (uth == NULL) goto done;
f1a1da6c 2972
2546420a
A
2973 struct threadlist *tl = pthread_kern->uthread_get_threadlist(uth);
2974 if (tl == NULL) goto done;
f1a1da6c 2975
2546420a 2976 struct workqueue *wq = tl->th_workq;
f1a1da6c 2977
2546420a 2978 workqueue_lock_spin(wq);
f1a1da6c 2979
2546420a 2980 assert(tl->th_flags & TH_LIST_INITED);
f1a1da6c 2981
2546420a
A
2982 if ((tl->th_flags & TH_LIST_NEW)){
2983 tl->th_flags &= ~(TH_LIST_NEW);
2984 first_use = true;
2985 }
2986
2987 if ((tl->th_flags & (TH_LIST_RUNNING | TH_LIST_BUSY)) == TH_LIST_RUNNING) {
f1a1da6c 2988 /*
2546420a 2989 * The normal wakeup path.
f1a1da6c 2990 */
2546420a 2991 goto return_to_user;
f1a1da6c 2992 }
f1a1da6c 2993
2546420a
A
2994 if ((tl->th_flags & TH_LIST_RUNNING) == 0 &&
2995 wait_result == THREAD_TIMED_OUT &&
2996 tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET &&
2997 TAILQ_FIRST(&wq->wq_thidlemgrlist) == tl &&
2998 TAILQ_NEXT(tl, th_entry) == NULL){
2999 /*
3000 * If we are the only idle manager and we pop'ed for self-destruction,
3001 * then don't actually exit. Instead, free our stack to save some
3002 * memory and re-park.
3003 */
f1a1da6c 3004
2546420a 3005 workqueue_unlock(wq);
f1a1da6c 3006
2546420a 3007 vm_map_t vmap = wq->wq_map;
f1a1da6c 3008
2546420a
A
3009 // Keep this in sync with _setup_wqthread()
3010 const vm_size_t guardsize = vm_map_page_size(vmap);
3011 const user_addr_t freeaddr = (user_addr_t)tl->th_stackaddr + guardsize;
3012 const vm_map_offset_t freesize = vm_map_trunc_page_mask((PTH_DEFAULT_STACKSIZE + guardsize + PTHREAD_T_OFFSET) - 1, vm_map_page_mask(vmap)) - guardsize;
3013
3014 int kr;
3015 kr = mach_vm_behavior_set(vmap, freeaddr, freesize, VM_BEHAVIOR_REUSABLE);
3016 assert(kr == KERN_SUCCESS || kr == KERN_INVALID_ADDRESS);
f1a1da6c 3017
2546420a 3018 workqueue_lock_spin(wq);
f1a1da6c 3019
964d3577 3020 if ( !(tl->th_flags & TH_LIST_RUNNING)) {
2546420a
A
3021 assert_wait((caddr_t)tl, (THREAD_INTERRUPTIBLE));
3022
3023 workqueue_unlock(wq);
3024
3025 thread_block(wq_unpark_continue);
3026 /* NOT REACHED */
964d3577 3027 }
2546420a 3028 }
f1a1da6c 3029
2546420a
A
3030 if ((tl->th_flags & TH_LIST_RUNNING) == 0) {
3031 assert((tl->th_flags & TH_LIST_BUSY) == 0);
964d3577 3032 /*
2546420a
A
3033 * We were set running, but not for the purposes of actually running.
3034 * This could be because the timer elapsed. Or it could be because the
3035 * thread aborted. Either way, we need to return to userspace to exit.
3036 *
3037 * The call to workqueue_removethread will consume the lock.
964d3577 3038 */
f1a1da6c 3039
2546420a
A
3040 if (!first_use &&
3041 tl->th_priority != qos_class_get_class_index(WQ_THREAD_CLEANUP_QOS)) {
3042 // Reset the QoS to something low for the pthread cleanup
3043 pthread_priority_t cleanup_pri = _pthread_priority_make_newest(WQ_THREAD_CLEANUP_QOS, 0, 0);
3044 reset_priority(tl, cleanup_pri);
3045 }
f1a1da6c 3046
2546420a 3047 workqueue_removethread(tl, 0, first_use);
964d3577 3048
2546420a
A
3049 if (first_use){
3050 pthread_kern->thread_bootstrap_return();
3051 } else {
3052 pthread_kern->unix_syscall_return(0);
f1a1da6c 3053 }
2546420a 3054 /* NOT REACHED */
f1a1da6c 3055 }
964d3577 3056
2546420a
A
3057 /*
3058 * The timer woke us up or the thread was aborted. However, we have
3059 * already started to make this a runnable thread. Wait for that to
3060 * finish, then continue to userspace.
3061 */
3062 while ((tl->th_flags & TH_LIST_BUSY)) {
3063 assert_wait((caddr_t)tl, (THREAD_UNINT));
964d3577 3064
2546420a 3065 workqueue_unlock(wq);
964d3577 3066
2546420a 3067 thread_block(THREAD_CONTINUE_NULL);
f1a1da6c 3068
2546420a
A
3069 workqueue_lock_spin(wq);
3070 }
f1a1da6c 3071
2546420a
A
3072return_to_user:
3073 workqueue_unlock(wq);
3074 _setup_wqthread(p, th, wq, tl, first_use);
3075 pthread_kern->thread_sched_call(th, workqueue_callback);
3076done:
3077 if (first_use){
3078 pthread_kern->thread_bootstrap_return();
3079 } else {
3080 pthread_kern->unix_syscall_return(EJUSTRETURN);
3081 }
3082 panic("Our attempt to return to userspace failed...");
3083}
f1a1da6c 3084
2546420a
A
3085/* called with workqueue lock held */
3086static void
3087wq_runreq(proc_t p, thread_t th, struct workqueue *wq, struct threadlist *tl,
3088 boolean_t return_directly, boolean_t needs_kevent_bind)
f1a1da6c 3089{
2546420a 3090 PTHREAD_TRACE1_WQ(TRACE_wq_runitem | DBG_FUNC_START, tl->th_workq, 0, 0, thread_tid(current_thread()), thread_tid(th));
f1a1da6c 3091
2546420a
A
3092 unsigned int kevent_flags = KEVENT_FLAG_WORKQ;
3093 if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) {
3094 kevent_flags |= KEVENT_FLAG_WORKQ_MANAGER;
3095 }
f1a1da6c
A
3096
3097 if (return_directly) {
2546420a
A
3098 if (needs_kevent_bind) {
3099 assert((tl->th_flags & TH_LIST_KEVENT_BOUND) == 0);
3100 tl->th_flags |= TH_LIST_KEVENT_BOUND;
3101 }
f1a1da6c 3102
2546420a 3103 workqueue_unlock(wq);
f1a1da6c 3104
2546420a
A
3105 if (needs_kevent_bind) {
3106 kevent_qos_internal_bind(p, class_index_get_thread_qos(tl->th_priority), th, kevent_flags);
3107 }
3108
3109 /*
3110 * For preemption reasons, we want to reset the voucher as late as
3111 * possible, so we do it in two places:
3112 * - Just before parking (i.e. in parkit())
3113 * - Prior to doing the setup for the next workitem (i.e. here)
3114 *
3115 * Those two places are sufficient to ensure we always reset it before
3116 * it goes back out to user space, but be careful to not break that
3117 * guarantee.
3118 */
3119 kern_return_t kr = pthread_kern->thread_set_voucher_name(MACH_PORT_NULL);
3120 assert(kr == KERN_SUCCESS);
f1a1da6c 3121
2546420a 3122 _setup_wqthread(p, th, wq, tl, false);
f1a1da6c 3123
2546420a 3124 PTHREAD_TRACE_WQ(TRACE_wq_run_nextitem|DBG_FUNC_END, tl->th_workq, 0, 0, 4, 0);
f1a1da6c 3125
2546420a
A
3126 pthread_kern->unix_syscall_return(EJUSTRETURN);
3127 /* NOT REACHED */
3128 }
f1a1da6c 3129
2546420a
A
3130 if (needs_kevent_bind) {
3131 // Leave TH_LIST_BUSY set so that the thread can't beat us to calling kevent
3132 workqueue_unlock(wq);
3133 assert((tl->th_flags & TH_LIST_KEVENT_BOUND) == 0);
3134 kevent_qos_internal_bind(p, class_index_get_thread_qos(tl->th_priority), th, kevent_flags);
3135 tl->th_flags |= TH_LIST_KEVENT_BOUND;
3136 workqueue_lock_spin(wq);
f1a1da6c 3137 }
2546420a
A
3138 tl->th_flags &= ~(TH_LIST_BUSY);
3139 thread_wakeup_thread(tl,th);
f1a1da6c
A
3140}
3141
2546420a 3142#define KEVENT_LIST_LEN 16 // WORKQ_KEVENT_EVENT_BUFFER_LEN
964d3577 3143#define KEVENT_DATA_SIZE (32 * 1024)
f1a1da6c 3144
964d3577
A
3145/**
3146 * configures initial thread stack/registers to jump into:
2546420a 3147 * _pthread_wqthread(pthread_t self, mach_port_t kport, void *stackaddr, void *keventlist, int upcall_flags, int nkevents);
964d3577
A
3148 * to get there we jump through assembily stubs in pthread_asm.s. Those
3149 * routines setup a stack frame, using the current stack pointer, and marshall
3150 * arguments from registers to the stack as required by the ABI.
3151 *
3152 * One odd thing we do here is to start the pthread_t 4k below what would be the
3153 * top of the stack otherwise. This is because usually only the first 4k of the
3154 * pthread_t will be used and so we want to put it on the same 16k page as the
3155 * top of the stack to save memory.
3156 *
3157 * When we are done the stack will look like:
3158 * |-----------| th_stackaddr + th_allocsize
3159 * |pthread_t | th_stackaddr + DEFAULT_STACKSIZE + guardsize + PTHREAD_STACK_OFFSET
3160 * |kevent list| optionally - at most KEVENT_LIST_LEN events
3161 * |kevent data| optionally - at most KEVENT_DATA_SIZE bytes
3162 * |stack gap | bottom aligned to 16 bytes, and at least as big as stack_gap_min
3163 * | STACK |
3164 * | ⇓ |
3165 * | |
3166 * |guard page | guardsize
3167 * |-----------| th_stackaddr
3168 */
2546420a
A
3169void
3170_setup_wqthread(proc_t p, thread_t th, struct workqueue *wq, struct threadlist *tl,
3171 bool first_use)
f1a1da6c 3172{
2546420a
A
3173 int error;
3174 uint32_t upcall_flags;
3175
3176 pthread_priority_t priority = pthread_priority_from_wq_class_index(wq, tl->th_priority);
f1a1da6c 3177
964d3577
A
3178 const vm_size_t guardsize = vm_map_page_size(tl->th_workq->wq_map);
3179 const vm_size_t stack_gap_min = (proc_is64bit(p) == 0) ? C_32_STK_ALIGN : C_64_REDZONE_LEN;
3180 const vm_size_t stack_align_min = (proc_is64bit(p) == 0) ? C_32_STK_ALIGN : C_64_STK_ALIGN;
3181
3182 user_addr_t pthread_self_addr = (user_addr_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + guardsize + PTHREAD_T_OFFSET);
3183 user_addr_t stack_top_addr = (user_addr_t)((pthread_self_addr - stack_gap_min) & -stack_align_min);
3184 user_addr_t stack_bottom_addr = (user_addr_t)(tl->th_stackaddr + guardsize);
f1a1da6c 3185
2546420a
A
3186 user_addr_t wqstart_fnptr = pthread_kern->proc_get_wqthread(p);
3187 if (!wqstart_fnptr) {
3188 panic("workqueue thread start function pointer is NULL");
3189 }
3190
f1a1da6c
A
3191 /* Put the QoS class value into the lower bits of the reuse_thread register, this is where
3192 * the thread priority used to be stored anyway.
3193 */
2546420a
A
3194 upcall_flags = tl->th_upcall_flags << WQ_FLAG_THREAD_PRIOSHIFT;
3195 upcall_flags |= (_pthread_priority_get_qos_newest(priority) & WQ_FLAG_THREAD_PRIOMASK);
f1a1da6c 3196
2546420a
A
3197 upcall_flags |= WQ_FLAG_THREAD_NEWSPI;
3198
3199 uint32_t tsd_offset = pthread_kern->proc_get_pthread_tsd_offset(p);
3200 if (tsd_offset) {
3201 mach_vm_offset_t th_tsd_base = (mach_vm_offset_t)pthread_self_addr + tsd_offset;
3202 kern_return_t kret = pthread_kern->thread_set_tsd_base(th, th_tsd_base);
3203 if (kret == KERN_SUCCESS) {
3204 upcall_flags |= WQ_FLAG_THREAD_TSD_BASE_SET;
3205 }
3206 }
3207
3208 if (first_use) {
3209 /*
3210 * Pre-fault the first page of the new thread's stack and the page that will
3211 * contain the pthread_t structure.
3212 */
3213 vm_map_t vmap = pthread_kern->current_map();
3214 if (vm_map_trunc_page_mask((vm_map_offset_t)(stack_top_addr - C_64_REDZONE_LEN), vm_map_page_mask(vmap)) !=
3215 vm_map_trunc_page_mask((vm_map_offset_t)pthread_self_addr, vm_map_page_mask(vmap))){
3216 vm_fault( vmap,
3217 vm_map_trunc_page_mask((vm_map_offset_t)(stack_top_addr - C_64_REDZONE_LEN), vm_map_page_mask(vmap)),
3218 VM_PROT_READ | VM_PROT_WRITE,
3219 FALSE,
3220 THREAD_UNINT, NULL, 0);
3221 }
3222 vm_fault( vmap,
3223 vm_map_trunc_page_mask((vm_map_offset_t)pthread_self_addr, vm_map_page_mask(vmap)),
3224 VM_PROT_READ | VM_PROT_WRITE,
3225 FALSE,
3226 THREAD_UNINT, NULL, 0);
3227 } else {
3228 upcall_flags |= WQ_FLAG_THREAD_REUSE;
3229 }
f1a1da6c 3230
964d3577
A
3231 user_addr_t kevent_list = NULL;
3232 int kevent_count = 0;
2546420a 3233 if (upcall_flags & WQ_FLAG_THREAD_KEVENT){
964d3577
A
3234 kevent_list = pthread_self_addr - KEVENT_LIST_LEN * sizeof(struct kevent_qos_s);
3235 kevent_count = KEVENT_LIST_LEN;
f1a1da6c 3236
964d3577
A
3237 user_addr_t kevent_data_buf = kevent_list - KEVENT_DATA_SIZE;
3238 user_size_t kevent_data_available = KEVENT_DATA_SIZE;
f1a1da6c 3239
964d3577 3240 int32_t events_out = 0;
f1a1da6c 3241
2546420a
A
3242 assert(tl->th_flags | TH_LIST_KEVENT_BOUND);
3243 unsigned int flags = KEVENT_FLAG_WORKQ | KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_IMMEDIATE;
3244 if (tl->th_priority == WORKQUEUE_EVENT_MANAGER_BUCKET) {
3245 flags |= KEVENT_FLAG_WORKQ_MANAGER;
3246 }
3247 int ret = kevent_qos_internal(p, class_index_get_thread_qos(tl->th_priority), NULL, 0, kevent_list, kevent_count,
964d3577 3248 kevent_data_buf, &kevent_data_available,
2546420a 3249 flags, &events_out);
f1a1da6c 3250
2546420a
A
3251 // turns out there are a lot of edge cases where this will fail, so not enabled by default
3252 //assert((ret == KERN_SUCCESS && events_out != -1) || ret == KERN_ABORTED);
3253
3254 // squash any errors into just empty output on
964d3577
A
3255 if (ret != KERN_SUCCESS || events_out == -1){
3256 events_out = 0;
3257 kevent_data_available = KEVENT_DATA_SIZE;
3258 }
3259
3260 // We shouldn't get data out if there aren't events available
3261 assert(events_out != 0 || kevent_data_available == KEVENT_DATA_SIZE);
3262
2546420a 3263 if (events_out > 0){
964d3577
A
3264 if (kevent_data_available == KEVENT_DATA_SIZE){
3265 stack_top_addr = (kevent_list - stack_gap_min) & -stack_align_min;
3266 } else {
3267 stack_top_addr = (kevent_data_buf + kevent_data_available - stack_gap_min) & -stack_align_min;
3268 }
2546420a
A
3269
3270 kevent_count = events_out;
964d3577
A
3271 } else {
3272 kevent_list = NULL;
3273 kevent_count = 0;
3274 }
3275 }
3276
3277#if defined(__i386__) || defined(__x86_64__)
2546420a 3278 if (proc_is64bit(p) == 0) {
964d3577 3279 x86_thread_state32_t state = {
2546420a 3280 .eip = (unsigned int)wqstart_fnptr,
964d3577
A
3281 .eax = /* arg0 */ (unsigned int)pthread_self_addr,
3282 .ebx = /* arg1 */ (unsigned int)tl->th_thport,
3283 .ecx = /* arg2 */ (unsigned int)stack_bottom_addr,
3284 .edx = /* arg3 */ (unsigned int)kevent_list,
2546420a 3285 .edi = /* arg4 */ (unsigned int)upcall_flags,
964d3577
A
3286 .esi = /* arg5 */ (unsigned int)kevent_count,
3287
3288 .esp = (int)((vm_offset_t)stack_top_addr),
3289 };
3290
2546420a
A
3291 error = pthread_kern->thread_set_wq_state32(th, (thread_state_t)&state);
3292 if (error != KERN_SUCCESS) {
3293 panic(__func__ ": thread_set_wq_state failed: %d", error);
3294 }
964d3577
A
3295 } else {
3296 x86_thread_state64_t state64 = {
3297 // x86-64 already passes all the arguments in registers, so we just put them in their final place here
2546420a 3298 .rip = (uint64_t)wqstart_fnptr,
964d3577
A
3299 .rdi = (uint64_t)pthread_self_addr,
3300 .rsi = (uint64_t)tl->th_thport,
3301 .rdx = (uint64_t)stack_bottom_addr,
3302 .rcx = (uint64_t)kevent_list,
2546420a 3303 .r8 = (uint64_t)upcall_flags,
964d3577
A
3304 .r9 = (uint64_t)kevent_count,
3305
3306 .rsp = (uint64_t)(stack_top_addr)
3307 };
3308
3309 error = pthread_kern->thread_set_wq_state64(th, (thread_state_t)&state64);
f1a1da6c 3310 if (error != KERN_SUCCESS) {
2546420a 3311 panic(__func__ ": thread_set_wq_state failed: %d", error);
f1a1da6c
A
3312 }
3313 }
3314#else
3315#error setup_wqthread not defined for this architecture
3316#endif
f1a1da6c
A
3317}
3318
964d3577
A
3319#if DEBUG
3320static int wq_kevent_test SYSCTL_HANDLER_ARGS {
3321 //(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req)
3322#pragma unused(oidp, arg1, arg2)
3323 int error;
3324 struct workq_reqthreads_req_s requests[64] = {};
3325
3326 if (req->newlen > sizeof(requests) || req->newlen < sizeof(struct workq_reqthreads_req_s))
3327 return EINVAL;
3328
3329 error = copyin(req->newptr, requests, req->newlen);
3330 if (error) return error;
3331
3332 _workq_reqthreads(req->p, (int)(req->newlen / sizeof(struct workq_reqthreads_req_s)), requests);
3333
3334 return 0;
3335}
3336#endif // DEBUG
3337
3338#pragma mark - Misc
3339
f1a1da6c
A
3340int
3341_fill_procworkqueue(proc_t p, struct proc_workqueueinfo * pwqinfo)
3342{
3343 struct workqueue * wq;
3344 int error = 0;
3345 int activecount;
3346 uint32_t pri;
3347
f1a1da6c 3348 if ((wq = pthread_kern->proc_get_wqptr(p)) == NULL) {
2546420a 3349 return EINVAL;
f1a1da6c 3350 }
2546420a
A
3351
3352 workqueue_lock_spin(wq);
f1a1da6c
A
3353 activecount = 0;
3354
3355 for (pri = 0; pri < WORKQUEUE_NUM_BUCKETS; pri++) {
3356 activecount += wq->wq_thactive_count[pri];
3357 }
3358 pwqinfo->pwq_nthreads = wq->wq_nthreads;
3359 pwqinfo->pwq_runthreads = activecount;
3360 pwqinfo->pwq_blockedthreads = wq->wq_threads_scheduled - activecount;
3361 pwqinfo->pwq_state = 0;
3362
2546420a 3363 if (wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
f1a1da6c
A
3364 pwqinfo->pwq_state |= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
3365 }
3366
2546420a 3367 if (wq->wq_nthreads >= wq_max_threads) {
f1a1da6c
A
3368 pwqinfo->pwq_state |= WQ_EXCEEDED_TOTAL_THREAD_LIMIT;
3369 }
3370
2546420a 3371 workqueue_unlock(wq);
f1a1da6c
A
3372 return(error);
3373}
3374
2546420a
A
3375uint32_t
3376_get_pwq_state_kdp(proc_t p)
3377{
3378 if (p == NULL) {
3379 return 0;
3380 }
3381
3382 struct workqueue *wq = pthread_kern->proc_get_wqptr(p);
3383
3384 if (wq == NULL || workqueue_lock_spin_is_acquired_kdp(wq)) {
3385 return 0;
3386 }
3387
3388 uint32_t pwq_state = WQ_FLAGS_AVAILABLE;
3389
3390 if (wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) {
3391 pwq_state |= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT;
3392 }
3393
3394 if (wq->wq_nthreads >= wq_max_threads) {
3395 pwq_state |= WQ_EXCEEDED_TOTAL_THREAD_LIMIT;
3396 }
3397
3398 return pwq_state;
3399}
3400
f1a1da6c
A
3401int
3402_thread_selfid(__unused struct proc *p, uint64_t *retval)
3403{
3404 thread_t thread = current_thread();
3405 *retval = thread_tid(thread);
3406 return KERN_SUCCESS;
3407}
3408
3409void
3410_pthread_init(void)
3411{
3412 pthread_lck_grp_attr = lck_grp_attr_alloc_init();
3413 pthread_lck_grp = lck_grp_alloc_init("pthread", pthread_lck_grp_attr);
3414
3415 /*
3416 * allocate the lock attribute for pthread synchronizers
3417 */
3418 pthread_lck_attr = lck_attr_alloc_init();
3419
f1a1da6c
A
3420 pthread_list_mlock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr);
3421
3422 pth_global_hashinit();
3423 psynch_thcall = thread_call_allocate(psynch_wq_cleanup, NULL);
3424 psynch_zoneinit();
3425
3426 /*
3427 * register sysctls
3428 */
3429 sysctl_register_oid(&sysctl__kern_wq_yielded_threshold);
3430 sysctl_register_oid(&sysctl__kern_wq_yielded_window_usecs);
3431 sysctl_register_oid(&sysctl__kern_wq_stalled_window_usecs);
3432 sysctl_register_oid(&sysctl__kern_wq_reduce_pool_window_usecs);
3433 sysctl_register_oid(&sysctl__kern_wq_max_timer_interval_usecs);
3434 sysctl_register_oid(&sysctl__kern_wq_max_threads);
3435 sysctl_register_oid(&sysctl__kern_wq_max_constrained_threads);
3436 sysctl_register_oid(&sysctl__kern_pthread_debug_tracing);
964d3577
A
3437
3438#if DEBUG
3439 sysctl_register_oid(&sysctl__kern_wq_max_concurrency);
3440 sysctl_register_oid(&sysctl__debug_wq_kevent_test);
3441#endif
3442
3443 wq_max_concurrency = pthread_kern->ml_get_max_cpus();
3444
f1a1da6c 3445}