2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
33 #define _PTHREAD_CONDATTR_T
34 #define _PTHREAD_COND_T
35 #define _PTHREAD_MUTEXATTR_T
36 #define _PTHREAD_MUTEX_T
37 #define _PTHREAD_RWLOCKATTR_T
38 #define _PTHREAD_RWLOCK_T
40 #undef pthread_mutexattr_t
41 #undef pthread_mutex_t
42 #undef pthread_condattr_t
44 #undef pthread_rwlockattr_t
45 #undef pthread_rwlock_t
47 #include <sys/param.h>
48 #include <sys/queue.h>
49 #include <sys/resourcevar.h>
50 //#include <sys/proc_internal.h>
51 #include <sys/kauth.h>
52 #include <sys/systm.h>
53 #include <sys/timeb.h>
54 #include <sys/times.h>
56 #include <sys/kernel.h>
58 #include <sys/signalvar.h>
59 #include <sys/sysctl.h>
60 #include <sys/syslog.h>
63 #include <sys/kdebug.h>
64 //#include <sys/sysproto.h>
66 #include <sys/user.h> /* for coredump */
67 #include <sys/proc_info.h> /* for fill_procworkqueue */
70 #include <mach/mach_port.h>
71 #include <mach/mach_types.h>
72 #include <mach/semaphore.h>
73 #include <mach/sync_policy.h>
74 #include <mach/task.h>
75 #include <mach/vm_prot.h>
76 #include <kern/kern_types.h>
77 #include <kern/task.h>
78 #include <kern/clock.h>
79 #include <mach/kern_return.h>
80 #include <kern/thread.h>
81 #include <kern/sched_prim.h>
82 #include <kern/kalloc.h>
83 #include <kern/sched_prim.h> /* for thread_exception_return */
84 #include <kern/processor.h>
85 #include <kern/assert.h>
86 #include <mach/mach_vm.h>
87 #include <mach/mach_param.h>
88 #include <mach/thread_status.h>
89 #include <mach/thread_policy.h>
90 #include <mach/message.h>
91 #include <mach/port.h>
92 //#include <vm/vm_protos.h>
93 #include <vm/vm_fault.h>
94 #include <vm/vm_map.h>
95 #include <mach/thread_act.h> /* for thread_resume */
96 #include <machine/machine_routines.h>
98 #include <libkern/OSAtomic.h>
100 #include <sys/pthread_shims.h>
101 #include "kern_internal.h"
103 uint32_t pthread_debug_tracing
= 0;
105 SYSCTL_INT(_kern
, OID_AUTO
, pthread_debug_tracing
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
106 &pthread_debug_tracing
, 0, "")
108 // XXX: Dirty import for sys/signarvar.h that's wrapped in BSD_KERNEL_PRIVATE
109 #define sigcantmask (sigmask(SIGKILL) | sigmask(SIGSTOP))
111 lck_grp_attr_t
*pthread_lck_grp_attr
;
112 lck_grp_t
*pthread_lck_grp
;
113 lck_attr_t
*pthread_lck_attr
;
115 extern void thread_set_cthreadself(thread_t thread
, uint64_t pself
, int isLP64
);
116 extern void workqueue_thread_yielded(void);
118 static boolean_t
workqueue_run_nextreq(proc_t p
, struct workqueue
*wq
, thread_t th
, boolean_t force_oc
,
119 boolean_t overcommit
, pthread_priority_t oc_prio
);
121 static boolean_t
workqueue_run_one(proc_t p
, struct workqueue
*wq
, boolean_t overcommit
, pthread_priority_t priority
);
123 static void wq_runreq(proc_t p
, boolean_t overcommit
, pthread_priority_t priority
, thread_t th
, struct threadlist
*tl
,
124 int reuse_thread
, int wake_thread
, int return_directly
);
126 static int _setup_wqthread(proc_t p
, thread_t th
, boolean_t overcommit
, pthread_priority_t priority
, int reuse_thread
, struct threadlist
*tl
);
128 static void wq_unpark_continue(void);
129 static void wq_unsuspend_continue(void);
131 static boolean_t
workqueue_addnewthread(struct workqueue
*wq
, boolean_t oc_thread
);
132 static void workqueue_removethread(struct threadlist
*tl
, int fromexit
);
133 static void workqueue_lock_spin(proc_t
);
134 static void workqueue_unlock(proc_t
);
136 int proc_settargetconc(pid_t pid
, int queuenum
, int32_t targetconc
);
137 int proc_setalltargetconc(pid_t pid
, int32_t * targetconcp
);
139 #define WQ_MAXPRI_MIN 0 /* low prio queue num */
140 #define WQ_MAXPRI_MAX 2 /* max prio queuenum */
141 #define WQ_PRI_NUM 3 /* number of prio work queues */
143 #define C_32_STK_ALIGN 16
144 #define C_64_STK_ALIGN 16
145 #define C_64_REDZONE_LEN 128
146 #define TRUNC_DOWN32(a,c) ((((uint32_t)a)-(c)) & ((uint32_t)(-(c))))
147 #define TRUNC_DOWN64(a,c) ((((uint64_t)a)-(c)) & ((uint64_t)(-(c))))
150 * Flags filed passed to bsdthread_create and back in pthread_start
151 31 <---------------------------------> 0
152 _________________________________________
153 | flags(8) | policy(8) | importance(16) |
154 -----------------------------------------
157 #define PTHREAD_START_CUSTOM 0x01000000
158 #define PTHREAD_START_SETSCHED 0x02000000
159 #define PTHREAD_START_DETACHED 0x04000000
160 #define PTHREAD_START_QOSCLASS 0x08000000
161 #define PTHREAD_START_QOSCLASS_MASK 0xffffff
162 #define PTHREAD_START_POLICY_BITSHIFT 16
163 #define PTHREAD_START_POLICY_MASK 0xff
164 #define PTHREAD_START_IMPORTANCE_MASK 0xffff
166 #define SCHED_OTHER POLICY_TIMESHARE
167 #define SCHED_FIFO POLICY_FIFO
168 #define SCHED_RR POLICY_RR
171 _bsdthread_create(struct proc
*p
, user_addr_t user_func
, user_addr_t user_funcarg
, user_addr_t user_stack
, user_addr_t user_pthread
, uint32_t flags
, user_addr_t
*retval
)
177 mach_vm_offset_t stackaddr
;
178 mach_vm_size_t th_allocsize
= 0;
179 mach_vm_size_t user_stacksize
;
180 mach_vm_size_t th_stacksize
;
181 mach_vm_size_t th_guardsize
;
182 mach_vm_offset_t th_stackaddr
;
183 mach_vm_offset_t th_stack
;
184 mach_vm_offset_t th_pthread
;
185 mach_port_name_t th_thport
;
187 vm_map_t vmap
= pthread_kern
->current_map();
188 task_t ctask
= current_task();
189 unsigned int policy
, importance
;
193 if (pthread_kern
->proc_get_register(p
) == 0) {
197 PTHREAD_TRACE(TRACE_pthread_thread_create
| DBG_FUNC_START
, flags
, 0, 0, 0, 0);
199 isLP64
= proc_is64bit(p
);
200 th_guardsize
= vm_map_page_size(vmap
);
202 #if defined(__i386__) || defined(__x86_64__)
203 stackaddr
= 0xB0000000;
205 #error Need to define a stack address hint for this architecture
207 kret
= pthread_kern
->thread_create(ctask
, &th
);
208 if (kret
!= KERN_SUCCESS
)
210 thread_reference(th
);
212 sright
= (void *)pthread_kern
->convert_thread_to_port(th
);
213 th_thport
= pthread_kern
->ipc_port_copyout_send(sright
, pthread_kern
->task_get_ipcspace(ctask
));
215 if ((flags
& PTHREAD_START_CUSTOM
) == 0) {
216 th_stacksize
= (mach_vm_size_t
)user_stack
; /* if it is custom them it is stacksize */
217 th_allocsize
= th_stacksize
+ th_guardsize
+ pthread_kern
->proc_get_pthsize(p
);
219 kret
= mach_vm_map(vmap
, &stackaddr
,
222 VM_MAKE_TAG(VM_MEMORY_STACK
)| VM_FLAGS_ANYWHERE
, NULL
,
223 0, FALSE
, VM_PROT_DEFAULT
, VM_PROT_ALL
,
225 if (kret
!= KERN_SUCCESS
)
226 kret
= mach_vm_allocate(vmap
,
227 &stackaddr
, th_allocsize
,
228 VM_MAKE_TAG(VM_MEMORY_STACK
)| VM_FLAGS_ANYWHERE
);
229 if (kret
!= KERN_SUCCESS
) {
234 PTHREAD_TRACE(TRACE_pthread_thread_create
|DBG_FUNC_NONE
, th_allocsize
, stackaddr
, 0, 2, 0);
236 th_stackaddr
= stackaddr
;
239 * The guard page is at the lowest address
240 * The stack base is the highest address
242 kret
= mach_vm_protect(vmap
, stackaddr
, th_guardsize
, FALSE
, VM_PROT_NONE
);
244 if (kret
!= KERN_SUCCESS
) {
248 th_stack
= (stackaddr
+ th_stacksize
+ th_guardsize
);
249 th_pthread
= (stackaddr
+ th_stacksize
+ th_guardsize
);
250 user_stacksize
= th_stacksize
;
253 * Pre-fault the first page of the new thread's stack and the page that will
254 * contain the pthread_t structure.
257 vm_map_trunc_page_mask(th_stack
- PAGE_SIZE_64
, vm_map_page_mask(vmap
)),
258 VM_PROT_READ
| VM_PROT_WRITE
,
260 THREAD_UNINT
, NULL
, 0);
263 vm_map_trunc_page_mask(th_pthread
, vm_map_page_mask(vmap
)),
264 VM_PROT_READ
| VM_PROT_WRITE
,
266 THREAD_UNINT
, NULL
, 0);
268 th_stack
= user_stack
;
269 user_stacksize
= user_stack
;
270 th_pthread
= user_pthread
;
272 PTHREAD_TRACE(TRACE_pthread_thread_create
|DBG_FUNC_NONE
, 0, 0, 0, 3, 0);
275 #if defined(__i386__) || defined(__x86_64__)
277 * Set up i386 registers & function call.
280 x86_thread_state32_t state
;
281 x86_thread_state32_t
*ts
= &state
;
283 ts
->eip
= (unsigned int)pthread_kern
->proc_get_threadstart(p
);
284 ts
->eax
= (unsigned int)th_pthread
;
285 ts
->ebx
= (unsigned int)th_thport
;
286 ts
->ecx
= (unsigned int)user_func
;
287 ts
->edx
= (unsigned int)user_funcarg
;
288 ts
->edi
= (unsigned int)user_stacksize
;
289 ts
->esi
= (unsigned int)flags
;
293 ts
->esp
= (int)((vm_offset_t
)(th_stack
-C_32_STK_ALIGN
));
295 error
= pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)ts
);
296 if (error
!= KERN_SUCCESS
) {
301 x86_thread_state64_t state64
;
302 x86_thread_state64_t
*ts64
= &state64
;
304 ts64
->rip
= (uint64_t)pthread_kern
->proc_get_threadstart(p
);
305 ts64
->rdi
= (uint64_t)th_pthread
;
306 ts64
->rsi
= (uint64_t)(th_thport
);
307 ts64
->rdx
= (uint64_t)user_func
;
308 ts64
->rcx
= (uint64_t)user_funcarg
;
309 ts64
->r8
= (uint64_t)user_stacksize
;
310 ts64
->r9
= (uint64_t)flags
;
312 * set stack pointer aligned to 16 byte boundary
314 ts64
->rsp
= (uint64_t)(th_stack
- C_64_REDZONE_LEN
);
316 error
= pthread_kern
->thread_set_wq_state64(th
, (thread_state_t
)ts64
);
317 if (error
!= KERN_SUCCESS
) {
323 #elif defined(__arm__)
324 arm_thread_state_t state
;
325 arm_thread_state_t
*ts
= &state
;
327 ts
->pc
= (int)pthread_kern
->proc_get_threadstart(p
);
328 ts
->r
[0] = (unsigned int)th_pthread
;
329 ts
->r
[1] = (unsigned int)th_thport
;
330 ts
->r
[2] = (unsigned int)user_func
;
331 ts
->r
[3] = (unsigned int)user_funcarg
;
332 ts
->r
[4] = (unsigned int)user_stacksize
;
333 ts
->r
[5] = (unsigned int)flags
;
335 /* Set r7 & lr to 0 for better back tracing */
342 ts
->sp
= (int)((vm_offset_t
)(th_stack
-C_32_STK_ALIGN
));
344 (void) pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)ts
);
347 #error bsdthread_create not defined for this architecture
350 if ((flags
& PTHREAD_START_SETSCHED
) != 0) {
351 /* Set scheduling parameters if needed */
352 thread_extended_policy_data_t extinfo
;
353 thread_precedence_policy_data_t precedinfo
;
355 importance
= (flags
& PTHREAD_START_IMPORTANCE_MASK
);
356 policy
= (flags
>> PTHREAD_START_POLICY_BITSHIFT
) & PTHREAD_START_POLICY_MASK
;
358 if (policy
== SCHED_OTHER
) {
359 extinfo
.timeshare
= 1;
361 extinfo
.timeshare
= 0;
364 thread_policy_set(th
, THREAD_EXTENDED_POLICY
, (thread_policy_t
)&extinfo
, THREAD_EXTENDED_POLICY_COUNT
);
366 #define BASEPRI_DEFAULT 31
367 precedinfo
.importance
= (importance
- BASEPRI_DEFAULT
);
368 thread_policy_set(th
, THREAD_PRECEDENCE_POLICY
, (thread_policy_t
)&precedinfo
, THREAD_PRECEDENCE_POLICY_COUNT
);
369 } else if ((flags
& PTHREAD_START_QOSCLASS
) != 0) {
370 /* Set thread QoS class if requested. */
371 pthread_priority_t priority
= (pthread_priority_t
)(flags
& PTHREAD_START_QOSCLASS_MASK
);
373 thread_qos_policy_data_t qos
;
374 qos
.qos_tier
= pthread_priority_get_qos_class(priority
);
375 qos
.tier_importance
= (qos
.qos_tier
== QOS_CLASS_UNSPECIFIED
) ? 0 :
376 _pthread_priority_get_relpri(priority
);
378 pthread_kern
->thread_policy_set_internal(th
, THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, THREAD_QOS_POLICY_COUNT
);
381 kret
= pthread_kern
->thread_resume(th
);
382 if (kret
!= KERN_SUCCESS
) {
386 thread_deallocate(th
); /* drop the creator reference */
388 PTHREAD_TRACE(TRACE_pthread_thread_create
|DBG_FUNC_END
, error
, th_pthread
, 0, 0, 0);
390 *retval
= th_pthread
;
395 if (allocated
!= 0) {
396 (void)mach_vm_deallocate(vmap
, stackaddr
, th_allocsize
);
399 (void)pthread_kern
->mach_port_deallocate(pthread_kern
->task_get_ipcspace(ctask
), th_thport
);
400 (void)thread_terminate(th
);
401 (void)thread_deallocate(th
);
406 _bsdthread_terminate(__unused
struct proc
*p
,
407 user_addr_t stackaddr
,
411 __unused
int32_t *retval
)
413 mach_vm_offset_t freeaddr
;
414 mach_vm_size_t freesize
;
417 freeaddr
= (mach_vm_offset_t
)stackaddr
;
420 PTHREAD_TRACE(TRACE_pthread_thread_terminate
|DBG_FUNC_START
, freeaddr
, freesize
, kthport
, 0xff, 0);
422 if ((freesize
!= (mach_vm_size_t
)0) && (freeaddr
!= (mach_vm_offset_t
)0)) {
423 kret
= mach_vm_deallocate(pthread_kern
->current_map(), freeaddr
, freesize
);
424 if (kret
!= KERN_SUCCESS
) {
425 PTHREAD_TRACE(TRACE_pthread_thread_terminate
|DBG_FUNC_END
, kret
, 0, 0, 0, 0);
430 (void) thread_terminate(current_thread());
431 if (sem
!= MACH_PORT_NULL
) {
432 kret
= pthread_kern
->semaphore_signal_internal_trap(sem
);
433 if (kret
!= KERN_SUCCESS
) {
434 PTHREAD_TRACE(TRACE_pthread_thread_terminate
|DBG_FUNC_END
, kret
, 0, 0, 0, 0);
439 if (kthport
!= MACH_PORT_NULL
) {
440 pthread_kern
->mach_port_deallocate(pthread_kern
->task_get_ipcspace(current_task()), kthport
);
443 PTHREAD_TRACE(TRACE_pthread_thread_terminate
|DBG_FUNC_END
, 0, 0, 0, 0, 0);
445 pthread_kern
->thread_exception_return();
446 panic("bsdthread_terminate: still running\n");
448 PTHREAD_TRACE(TRACE_pthread_thread_terminate
|DBG_FUNC_END
, 0, 0xff, 0, 0, 0);
454 _bsdthread_register(struct proc
*p
,
455 user_addr_t threadstart
,
456 user_addr_t wqthread
,
458 user_addr_t pthread_init_data
,
459 user_addr_t targetconc_ptr
,
460 uint64_t dispatchqueue_offset
,
463 /* prevent multiple registrations */
464 if (pthread_kern
->proc_get_register(p
) != 0) {
467 /* syscall randomizer test can pass bogus values */
468 if (pthsize
< 0 || pthsize
> MAX_PTHREAD_SIZE
) {
471 pthread_kern
->proc_set_threadstart(p
, threadstart
);
472 pthread_kern
->proc_set_wqthread(p
, wqthread
);
473 pthread_kern
->proc_set_pthsize(p
, pthsize
);
474 pthread_kern
->proc_set_register(p
);
476 /* if we have pthread_init_data, then we use that and target_concptr (which is an offset) get data. */
477 if (pthread_init_data
!= 0) {
478 thread_qos_policy_data_t qos
;
480 struct _pthread_registration_data data
;
481 size_t pthread_init_sz
= MIN(sizeof(struct _pthread_registration_data
), (size_t)targetconc_ptr
);
483 kern_return_t kr
= copyin(pthread_init_data
, &data
, pthread_init_sz
);
484 if (kr
!= KERN_SUCCESS
) {
488 /* Incoming data from the data structure */
489 pthread_kern
->proc_set_dispatchqueue_offset(p
, data
.dispatch_queue_offset
);
491 /* Outgoing data that userspace expects as a reply */
492 if (pthread_kern
->qos_main_thread_active()) {
493 mach_msg_type_number_t nqos
= THREAD_QOS_POLICY_COUNT
;
494 boolean_t gd
= FALSE
;
496 kr
= pthread_kern
->thread_policy_get(current_thread(), THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, &nqos
, &gd
);
497 if (kr
!= KERN_SUCCESS
|| qos
.qos_tier
== THREAD_QOS_UNSPECIFIED
) {
498 /* Unspecified threads means the kernel wants us to impose legacy upon the thread. */
499 qos
.qos_tier
= THREAD_QOS_LEGACY
;
500 qos
.tier_importance
= 0;
502 kr
= pthread_kern
->thread_policy_set_internal(current_thread(), THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, THREAD_QOS_POLICY_COUNT
);
505 if (kr
== KERN_SUCCESS
) {
506 data
.main_qos
= pthread_qos_class_get_priority(qos
.qos_tier
);
508 data
.main_qos
= _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED
, 0, 0);
511 data
.main_qos
= _pthread_priority_make_newest(QOS_CLASS_UNSPECIFIED
, 0, 0);
514 kr
= copyout(&data
, pthread_init_data
, pthread_init_sz
);
515 if (kr
!= KERN_SUCCESS
) {
519 pthread_kern
->proc_set_dispatchqueue_offset(p
, dispatchqueue_offset
);
520 pthread_kern
->proc_set_targconc(p
, targetconc_ptr
);
523 /* return the supported feature set as the return value. */
524 *retval
= PTHREAD_FEATURE_SUPPORTED
;
530 _bsdthread_ctl_set_qos(struct proc
*p
, user_addr_t __unused cmd
, mach_port_name_t kport
, user_addr_t tsd_priority_addr
, user_addr_t arg3
, int *retval
)
535 pthread_priority_t priority
;
537 /* Unused parameters must be zero. */
542 /* QoS is stored in a given slot in the pthread TSD. We need to copy that in and set our QoS based on it. */
543 if (proc_is64bit(p
)) {
545 kr
= copyin(tsd_priority_addr
, &v
, sizeof(v
));
546 if (kr
!= KERN_SUCCESS
) {
549 priority
= (int)(v
& 0xffffffff);
552 kr
= copyin(tsd_priority_addr
, &v
, sizeof(v
));
553 if (kr
!= KERN_SUCCESS
) {
559 if ((th
= port_name_to_thread(kport
)) == THREAD_NULL
) {
563 /* <rdar://problem/16211829> Disable pthread_set_qos_class_np() on threads other than pthread_self */
564 if (th
!= current_thread()) {
565 thread_deallocate(th
);
569 int rv
= _bsdthread_ctl_set_self(p
, 0, priority
, 0, _PTHREAD_SET_SELF_QOS_FLAG
, retval
);
571 /* Static param the thread, we just set QoS on it, so its stuck in QoS land now. */
572 /* pthread_kern->thread_static_param(th, TRUE); */ // see <rdar://problem/16433744>, for details
574 thread_deallocate(th
);
579 static inline struct threadlist
*
580 util_get_thread_threadlist_entry(thread_t th
)
582 struct uthread
*uth
= pthread_kern
->get_bsdthread_info(th
);
584 struct threadlist
*tl
= pthread_kern
->uthread_get_threadlist(uth
);
591 wq_thread_override_reset(thread_t th
, user_addr_t resource
)
593 struct uthread
*uth
= pthread_kern
->get_bsdthread_info(th
);
594 struct threadlist
*tl
= pthread_kern
->uthread_get_threadlist(uth
);
598 * Drop all outstanding overrides on this thread, done outside the wq lock
599 * because proc_usynch_thread_qos_remove_override_for_resource takes a spinlock that
600 * could cause us to panic.
602 PTHREAD_TRACE(TRACE_wq_override_reset
| DBG_FUNC_NONE
, tl
->th_workq
, 0, 0, 0, 0);
604 pthread_kern
->proc_usynch_thread_qos_reset_override_for_resource(current_task(), uth
, 0, resource
, THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE
);
609 _bsdthread_ctl_set_self(struct proc
*p
, user_addr_t __unused cmd
, pthread_priority_t priority
, mach_port_name_t voucher
, _pthread_set_flags_t flags
, int __unused
*retval
)
611 thread_qos_policy_data_t qos
;
612 mach_msg_type_number_t nqos
= THREAD_QOS_POLICY_COUNT
;
613 boolean_t gd
= FALSE
;
616 int qos_rv
= 0, voucher_rv
= 0, fixedpri_rv
= 0;
618 if ((flags
& _PTHREAD_SET_SELF_QOS_FLAG
) != 0) {
619 kr
= pthread_kern
->thread_policy_get(current_thread(), THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, &nqos
, &gd
);
620 if (kr
!= KERN_SUCCESS
) {
625 /* If we have main-thread QoS then we don't allow a thread to come out of QOS_CLASS_UNSPECIFIED. */
626 if (pthread_kern
->qos_main_thread_active() && qos
.qos_tier
== THREAD_QOS_UNSPECIFIED
) {
631 /* Get the work queue for tracing, also the threadlist for bucket manipluation. */
632 struct workqueue
*wq
= NULL
;
633 struct threadlist
*tl
= util_get_thread_threadlist_entry(current_thread());
638 PTHREAD_TRACE(TRACE_pthread_set_qos_self
| DBG_FUNC_START
, wq
, qos
.qos_tier
, qos
.tier_importance
, 0, 0);
640 qos
.qos_tier
= pthread_priority_get_qos_class(priority
);
641 qos
.tier_importance
= (qos
.qos_tier
== QOS_CLASS_UNSPECIFIED
) ? 0 : _pthread_priority_get_relpri(priority
);
643 kr
= pthread_kern
->thread_policy_set_internal(current_thread(), THREAD_QOS_POLICY
, (thread_policy_t
)&qos
, THREAD_QOS_POLICY_COUNT
);
644 if (kr
!= KERN_SUCCESS
) {
649 /* If we're a workqueue, the threadlist item priority needs adjusting, along with the bucket we were running in. */
651 workqueue_lock_spin(p
);
653 /* Fix up counters. */
654 uint8_t old_bucket
= tl
->th_priority
;
655 uint8_t new_bucket
= pthread_priority_get_class_index(priority
);
657 uint32_t old_active
= OSAddAtomic(-1, &wq
->wq_thactive_count
[old_bucket
]);
658 OSAddAtomic(1, &wq
->wq_thactive_count
[new_bucket
]);
660 wq
->wq_thscheduled_count
[old_bucket
]--;
661 wq
->wq_thscheduled_count
[new_bucket
]++;
663 tl
->th_priority
= new_bucket
;
665 /* If we were at the ceiling of non-overcommitted threads for a given bucket, we have to
666 * reevaluate whether we should start more work.
668 if (old_active
== wq
->wq_reqconc
[old_bucket
]) {
669 /* workqueue_run_nextreq will drop the workqueue lock in all exit paths. */
670 (void)workqueue_run_nextreq(p
, wq
, THREAD_NULL
, FALSE
, FALSE
, 0);
676 PTHREAD_TRACE(TRACE_pthread_set_qos_self
| DBG_FUNC_END
, wq
, qos
.qos_tier
, qos
.tier_importance
, 0, 0);
680 if ((flags
& _PTHREAD_SET_SELF_VOUCHER_FLAG
) != 0) {
681 kr
= pthread_kern
->thread_set_voucher_name(voucher
);
682 if (kr
!= KERN_SUCCESS
) {
689 if ((flags
& _PTHREAD_SET_SELF_FIXEDPRIORITY_FLAG
) != 0) {
690 thread_extended_policy_data_t extpol
;
691 thread_t thread
= current_thread();
693 extpol
.timeshare
= 0;
695 struct threadlist
*tl
= util_get_thread_threadlist_entry(thread
);
697 /* Not allowed on workqueue threads, since there is no symmetric clear function */
698 fixedpri_rv
= ENOTSUP
;
702 kr
= pthread_kern
->thread_policy_set_internal(thread
, THREAD_EXTENDED_POLICY
, (thread_policy_t
)&extpol
, THREAD_EXTENDED_POLICY_COUNT
);
703 if (kr
!= KERN_SUCCESS
) {
704 fixedpri_rv
= EINVAL
;
710 if (qos_rv
&& voucher_rv
) {
711 /* Both failed, give that a unique error. */
731 _bsdthread_ctl_qos_override_start(struct proc __unused
*p
, user_addr_t __unused cmd
, mach_port_name_t kport
, pthread_priority_t priority
, user_addr_t resource
, int __unused
*retval
)
736 if ((th
= port_name_to_thread(kport
)) == THREAD_NULL
) {
740 struct uthread
*uth
= pthread_kern
->get_bsdthread_info(th
);
741 int override_qos
= pthread_priority_get_qos_class(priority
);
743 struct threadlist
*tl
= util_get_thread_threadlist_entry(th
);
745 PTHREAD_TRACE(TRACE_wq_override_start
| DBG_FUNC_NONE
, tl
->th_workq
, thread_tid(th
), 1, priority
, 0);
748 /* The only failure case here is if we pass a tid and have it lookup the thread, we pass the uthread, so this all always succeeds. */
749 pthread_kern
->proc_usynch_thread_qos_add_override_for_resource(current_task(), uth
, 0, override_qos
, TRUE
, resource
, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE
);
751 thread_deallocate(th
);
756 _bsdthread_ctl_qos_override_end(struct proc __unused
*p
, user_addr_t __unused cmd
, mach_port_name_t kport
, user_addr_t resource
, user_addr_t arg3
, int __unused
*retval
)
765 if ((th
= port_name_to_thread(kport
)) == THREAD_NULL
) {
769 struct uthread
*uth
= pthread_kern
->get_bsdthread_info(th
);
771 struct threadlist
*tl
= util_get_thread_threadlist_entry(th
);
773 PTHREAD_TRACE(TRACE_wq_override_end
| DBG_FUNC_NONE
, tl
->th_workq
, thread_tid(th
), 0, 0, 0);
776 pthread_kern
->proc_usynch_thread_qos_remove_override_for_resource(current_task(), uth
, 0, resource
, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE
);
778 thread_deallocate(th
);
783 _bsdthread_ctl_qos_override_dispatch(struct proc
*p
, user_addr_t cmd
, mach_port_name_t kport
, pthread_priority_t priority
, user_addr_t arg3
, int *retval
)
789 return _bsdthread_ctl_qos_dispatch_asynchronous_override_add(p
, cmd
, kport
, priority
, USER_ADDR_NULL
, retval
);
793 _bsdthread_ctl_qos_dispatch_asynchronous_override_add(struct proc __unused
*p
, user_addr_t __unused cmd
, mach_port_name_t kport
, pthread_priority_t priority
, user_addr_t resource
, int __unused
*retval
)
798 if ((th
= port_name_to_thread(kport
)) == THREAD_NULL
) {
802 struct uthread
*uth
= pthread_kern
->get_bsdthread_info(th
);
803 int override_qos
= pthread_priority_get_qos_class(priority
);
805 struct threadlist
*tl
= util_get_thread_threadlist_entry(th
);
807 thread_deallocate(th
);
811 PTHREAD_TRACE(TRACE_wq_override_dispatch
| DBG_FUNC_NONE
, tl
->th_workq
, thread_tid(th
), 1, priority
, 0);
813 /* The only failure case here is if we pass a tid and have it lookup the thread, we pass the uthread, so this all always succeeds. */
814 pthread_kern
->proc_usynch_thread_qos_add_override_for_resource(current_task(), uth
, 0, override_qos
, TRUE
, resource
, THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE
);
816 thread_deallocate(th
);
821 _bsdthread_ctl_qos_override_reset(struct proc
*p
, user_addr_t cmd
, user_addr_t arg1
, user_addr_t arg2
, user_addr_t arg3
, int *retval
)
823 if (arg1
!= 0 || arg2
!= 0 || arg3
!= 0) {
827 return _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(p
, cmd
, 1 /* reset_all */, 0, 0, retval
);
831 _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(struct proc __unused
*p
, user_addr_t __unused cmd
, int reset_all
, user_addr_t resource
, user_addr_t arg3
, int __unused
*retval
)
834 struct threadlist
*tl
;
837 if ((reset_all
&& (resource
!= 0)) || arg3
!= 0) {
841 th
= current_thread();
842 tl
= util_get_thread_threadlist_entry(th
);
845 wq_thread_override_reset(th
, reset_all
? THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD
: resource
);
854 _bsdthread_ctl(struct proc
*p
, user_addr_t cmd
, user_addr_t arg1
, user_addr_t arg2
, user_addr_t arg3
, int *retval
)
857 case BSDTHREAD_CTL_SET_QOS
:
858 return _bsdthread_ctl_set_qos(p
, cmd
, (mach_port_name_t
)arg1
, arg2
, arg3
, retval
);
859 case BSDTHREAD_CTL_QOS_OVERRIDE_START
:
860 return _bsdthread_ctl_qos_override_start(p
, cmd
, (mach_port_name_t
)arg1
, (pthread_priority_t
)arg2
, arg3
, retval
);
861 case BSDTHREAD_CTL_QOS_OVERRIDE_END
:
862 return _bsdthread_ctl_qos_override_end(p
, cmd
, (mach_port_name_t
)arg1
, arg2
, arg3
, retval
);
863 case BSDTHREAD_CTL_QOS_OVERRIDE_RESET
:
864 return _bsdthread_ctl_qos_override_reset(p
, cmd
, arg1
, arg2
, arg3
, retval
);
865 case BSDTHREAD_CTL_QOS_OVERRIDE_DISPATCH
:
866 return _bsdthread_ctl_qos_override_dispatch(p
, cmd
, (mach_port_name_t
)arg1
, (pthread_priority_t
)arg2
, arg3
, retval
);
867 case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_ADD
:
868 return _bsdthread_ctl_qos_dispatch_asynchronous_override_add(p
, cmd
, (mach_port_name_t
)arg1
, (pthread_priority_t
)arg2
, arg3
, retval
);
869 case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_RESET
:
870 return _bsdthread_ctl_qos_dispatch_asynchronous_override_reset(p
, cmd
, (int)arg1
, arg2
, arg3
, retval
);
871 case BSDTHREAD_CTL_SET_SELF
:
872 return _bsdthread_ctl_set_self(p
, cmd
, (pthread_priority_t
)arg1
, (mach_port_name_t
)arg2
, (_pthread_set_flags_t
)arg3
, retval
);
878 uint32_t wq_yielded_threshold
= WQ_YIELDED_THRESHOLD
;
879 uint32_t wq_yielded_window_usecs
= WQ_YIELDED_WINDOW_USECS
;
880 uint32_t wq_stalled_window_usecs
= WQ_STALLED_WINDOW_USECS
;
881 uint32_t wq_reduce_pool_window_usecs
= WQ_REDUCE_POOL_WINDOW_USECS
;
882 uint32_t wq_max_timer_interval_usecs
= WQ_MAX_TIMER_INTERVAL_USECS
;
883 uint32_t wq_max_threads
= WORKQUEUE_MAXTHREADS
;
884 uint32_t wq_max_constrained_threads
= WORKQUEUE_MAXTHREADS
/ 8;
887 SYSCTL_INT(_kern
, OID_AUTO
, wq_yielded_threshold
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
888 &wq_yielded_threshold
, 0, "");
890 SYSCTL_INT(_kern
, OID_AUTO
, wq_yielded_window_usecs
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
891 &wq_yielded_window_usecs
, 0, "");
893 SYSCTL_INT(_kern
, OID_AUTO
, wq_stalled_window_usecs
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
894 &wq_stalled_window_usecs
, 0, "");
896 SYSCTL_INT(_kern
, OID_AUTO
, wq_reduce_pool_window_usecs
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
897 &wq_reduce_pool_window_usecs
, 0, "");
899 SYSCTL_INT(_kern
, OID_AUTO
, wq_max_timer_interval_usecs
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
900 &wq_max_timer_interval_usecs
, 0, "");
902 SYSCTL_INT(_kern
, OID_AUTO
, wq_max_threads
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
903 &wq_max_threads
, 0, "");
905 SYSCTL_INT(_kern
, OID_AUTO
, wq_max_constrained_threads
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
906 &wq_max_constrained_threads
, 0, "");
909 static uint32_t wq_init_constrained_limit
= 1;
913 _workqueue_init_lock(proc_t p
)
915 lck_spin_init(pthread_kern
->proc_get_wqlockptr(p
), pthread_lck_grp
, pthread_lck_attr
);
916 *(pthread_kern
->proc_get_wqinitingptr(p
)) = FALSE
;
920 _workqueue_destroy_lock(proc_t p
)
922 lck_spin_destroy(pthread_kern
->proc_get_wqlockptr(p
), pthread_lck_grp
);
927 workqueue_lock_spin(proc_t p
)
929 lck_spin_lock(pthread_kern
->proc_get_wqlockptr(p
));
933 workqueue_unlock(proc_t p
)
935 lck_spin_unlock(pthread_kern
->proc_get_wqlockptr(p
));
940 workqueue_interval_timer_start(struct workqueue
*wq
)
944 if (wq
->wq_timer_interval
== 0) {
945 wq
->wq_timer_interval
= wq_stalled_window_usecs
;
948 wq
->wq_timer_interval
= wq
->wq_timer_interval
* 2;
950 if (wq
->wq_timer_interval
> wq_max_timer_interval_usecs
) {
951 wq
->wq_timer_interval
= wq_max_timer_interval_usecs
;
954 clock_interval_to_deadline(wq
->wq_timer_interval
, 1000, &deadline
);
956 thread_call_enter_delayed(wq
->wq_atimer_call
, deadline
);
958 PTHREAD_TRACE(TRACE_wq_start_add_timer
, wq
, wq
->wq_reqcount
, wq
->wq_flags
, wq
->wq_timer_interval
, 0);
963 wq_thread_is_busy(uint64_t cur_ts
, uint64_t *lastblocked_tsp
)
967 uint64_t lastblocked_ts
;
971 * the timestamp is updated atomically w/o holding the workqueue lock
972 * so we need to do an atomic read of the 64 bits so that we don't see
973 * a mismatched pair of 32 bit reads... we accomplish this in an architecturally
974 * independent fashion by using OSCompareAndSwap64 to write back the
975 * value we grabbed... if it succeeds, then we have a good timestamp to
976 * evaluate... if it fails, we straddled grabbing the timestamp while it
977 * was being updated... treat a failed update as a busy thread since
978 * it implies we are about to see a really fresh timestamp anyway
980 lastblocked_ts
= *lastblocked_tsp
;
982 if ( !OSCompareAndSwap64((UInt64
)lastblocked_ts
, (UInt64
)lastblocked_ts
, lastblocked_tsp
))
985 if (lastblocked_ts
>= cur_ts
) {
987 * because the update of the timestamp when a thread blocks isn't
988 * serialized against us looking at it (i.e. we don't hold the workq lock)
989 * it's possible to have a timestamp that matches the current time or
990 * that even looks to be in the future relative to when we grabbed the current
991 * time... just treat this as a busy thread since it must have just blocked.
995 elapsed
= cur_ts
- lastblocked_ts
;
997 pthread_kern
->absolutetime_to_microtime(elapsed
, &secs
, &usecs
);
999 if (secs
== 0 && usecs
< wq_stalled_window_usecs
)
1005 #define WQ_TIMER_NEEDED(wq, start_timer) do { \
1006 int oldflags = wq->wq_flags; \
1008 if ( !(oldflags & (WQ_EXITING | WQ_ATIMER_RUNNING))) { \
1009 if (OSCompareAndSwap(oldflags, oldflags | WQ_ATIMER_RUNNING, (UInt32 *)&wq->wq_flags)) \
1010 start_timer = TRUE; \
1017 workqueue_add_timer(struct workqueue
*wq
, __unused
int param1
)
1020 boolean_t start_timer
= FALSE
;
1022 boolean_t add_thread
;
1025 PTHREAD_TRACE(TRACE_wq_add_timer
| DBG_FUNC_START
, wq
, wq
->wq_flags
, wq
->wq_nthreads
, wq
->wq_thidlecount
, 0);
1029 workqueue_lock_spin(p
);
1032 * because workqueue_callback now runs w/o taking the workqueue lock
1033 * we are unsynchronized w/r to a change in state of the running threads...
1034 * to make sure we always evaluate that change, we allow it to start up
1035 * a new timer if the current one is actively evalutating the state
1036 * however, we do not need more than 2 timers fired up (1 active and 1 pending)
1037 * and we certainly do not want 2 active timers evaluating the state
1038 * simultaneously... so use WQL_ATIMER_BUSY to serialize the timers...
1039 * note that WQL_ATIMER_BUSY is in a different flag word from WQ_ATIMER_RUNNING since
1040 * it is always protected by the workq lock... WQ_ATIMER_RUNNING is evaluated
1041 * and set atomimcally since the callback function needs to manipulate it
1042 * w/o holding the workq lock...
1044 * !WQ_ATIMER_RUNNING && !WQL_ATIMER_BUSY == no pending timer, no active timer
1045 * !WQ_ATIMER_RUNNING && WQL_ATIMER_BUSY == no pending timer, 1 active timer
1046 * WQ_ATIMER_RUNNING && !WQL_ATIMER_BUSY == 1 pending timer, no active timer
1047 * WQ_ATIMER_RUNNING && WQL_ATIMER_BUSY == 1 pending timer, 1 active timer
1049 while (wq
->wq_lflags
& WQL_ATIMER_BUSY
) {
1050 wq
->wq_lflags
|= WQL_ATIMER_WAITING
;
1052 assert_wait((caddr_t
)wq
, (THREAD_UNINT
));
1053 workqueue_unlock(p
);
1055 thread_block(THREAD_CONTINUE_NULL
);
1057 workqueue_lock_spin(p
);
1059 wq
->wq_lflags
|= WQL_ATIMER_BUSY
;
1062 * the workq lock will protect us from seeing WQ_EXITING change state, but we
1063 * still need to update this atomically in case someone else tries to start
1064 * the timer just as we're releasing it
1066 while ( !(OSCompareAndSwap(wq
->wq_flags
, (wq
->wq_flags
& ~WQ_ATIMER_RUNNING
), (UInt32
*)&wq
->wq_flags
)));
1072 if ( !(wq
->wq_flags
& WQ_EXITING
)) {
1074 * check to see if the stall frequency was beyond our tolerance
1075 * or we have work on the queue, but haven't scheduled any
1076 * new work within our acceptable time interval because
1077 * there were no idle threads left to schedule
1079 if (wq
->wq_reqcount
) {
1081 uint32_t thactive_count
;
1085 for (priclass
= 0; priclass
< WORKQUEUE_NUM_BUCKETS
; priclass
++) {
1086 if (wq
->wq_requests
[priclass
])
1089 assert(priclass
< WORKQUEUE_NUM_BUCKETS
);
1091 curtime
= mach_absolute_time();
1096 * check for conditions under which we would not add a thread, either
1097 * a) we've got as many running threads as we want in this priority
1098 * band and the priority bands above it
1100 * b) check to see if the priority group has blocked threads, if the
1101 * last blocked timestamp is old enough, we will have already passed
1102 * (a) where we would have stopped if we had enough active threads.
1104 for (i
= 0; i
<= priclass
; i
++) {
1106 thactive_count
+= wq
->wq_thactive_count
[i
];
1108 if (wq
->wq_thscheduled_count
[i
]) {
1109 if (wq_thread_is_busy(curtime
, &wq
->wq_lastblocked_ts
[i
]))
1113 if (thactive_count
+ busycount
< wq
->wq_max_concurrency
) {
1115 if (wq
->wq_thidlecount
== 0) {
1117 * if we have no idle threads, try to add one
1119 retval
= workqueue_addnewthread(wq
, FALSE
);
1124 if (wq
->wq_reqcount
) {
1126 * as long as we have threads to schedule, and we successfully
1127 * scheduled new work, keep trying
1129 while (wq
->wq_thidlecount
&& !(wq
->wq_flags
& WQ_EXITING
)) {
1131 * workqueue_run_nextreq is responsible for
1132 * dropping the workqueue lock in all cases
1134 retval
= workqueue_run_nextreq(p
, wq
, THREAD_NULL
, FALSE
, FALSE
, 0);
1135 workqueue_lock_spin(p
);
1137 if (retval
== FALSE
)
1140 if ( !(wq
->wq_flags
& WQ_EXITING
) && wq
->wq_reqcount
) {
1142 if (wq
->wq_thidlecount
== 0 && retval
== TRUE
&& add_thread
== TRUE
)
1145 if (wq
->wq_thidlecount
== 0 || busycount
)
1146 WQ_TIMER_NEEDED(wq
, start_timer
);
1148 PTHREAD_TRACE(TRACE_wq_add_timer
| DBG_FUNC_NONE
, wq
, wq
->wq_reqcount
, wq
->wq_thidlecount
, busycount
, 0);
1153 if ( !(wq
->wq_flags
& WQ_ATIMER_RUNNING
))
1154 wq
->wq_timer_interval
= 0;
1156 wq
->wq_lflags
&= ~WQL_ATIMER_BUSY
;
1158 if ((wq
->wq_flags
& WQ_EXITING
) || (wq
->wq_lflags
& WQL_ATIMER_WAITING
)) {
1160 * wakeup the thread hung up in workqueue_exit or workqueue_add_timer waiting for this timer
1161 * to finish getting out of the way
1163 wq
->wq_lflags
&= ~WQL_ATIMER_WAITING
;
1167 PTHREAD_TRACE(TRACE_wq_add_timer
| DBG_FUNC_END
, wq
, start_timer
, wq
->wq_nthreads
, wq
->wq_thidlecount
, 0);
1169 workqueue_unlock(p
);
1171 if (start_timer
== TRUE
)
1172 workqueue_interval_timer_start(wq
);
1177 _workqueue_thread_yielded(void)
1179 struct workqueue
*wq
;
1184 if ((wq
= pthread_kern
->proc_get_wqptr(p
)) == NULL
|| wq
->wq_reqcount
== 0)
1187 workqueue_lock_spin(p
);
1189 if (wq
->wq_reqcount
) {
1195 if (wq
->wq_thread_yielded_count
++ == 0)
1196 wq
->wq_thread_yielded_timestamp
= mach_absolute_time();
1198 if (wq
->wq_thread_yielded_count
< wq_yielded_threshold
) {
1199 workqueue_unlock(p
);
1203 PTHREAD_TRACE(TRACE_wq_thread_yielded
| DBG_FUNC_START
, wq
, wq
->wq_thread_yielded_count
, wq
->wq_reqcount
, 0, 0);
1205 wq
->wq_thread_yielded_count
= 0;
1207 curtime
= mach_absolute_time();
1208 elapsed
= curtime
- wq
->wq_thread_yielded_timestamp
;
1209 pthread_kern
->absolutetime_to_microtime(elapsed
, &secs
, &usecs
);
1211 if (secs
== 0 && usecs
< wq_yielded_window_usecs
) {
1213 if (wq
->wq_thidlecount
== 0) {
1214 workqueue_addnewthread(wq
, TRUE
);
1216 * 'workqueue_addnewthread' drops the workqueue lock
1217 * when creating the new thread and then retakes it before
1218 * returning... this window allows other threads to process
1219 * requests, so we need to recheck for available work
1220 * if none found, we just return... the newly created thread
1221 * will eventually get used (if it hasn't already)...
1223 if (wq
->wq_reqcount
== 0) {
1224 workqueue_unlock(p
);
1228 if (wq
->wq_thidlecount
) {
1230 boolean_t overcommit
= FALSE
;
1231 boolean_t force_oc
= FALSE
;
1233 for (priority
= 0; priority
< WORKQUEUE_NUM_BUCKETS
; priority
++) {
1234 if (wq
->wq_requests
[priority
]) {
1238 assert(priority
< WORKQUEUE_NUM_BUCKETS
);
1241 wq
->wq_requests
[priority
]--;
1243 if (wq
->wq_ocrequests
[priority
]) {
1244 wq
->wq_ocrequests
[priority
]--;
1249 (void)workqueue_run_nextreq(p
, wq
, THREAD_NULL
, force_oc
, overcommit
, pthread_priority_from_class_index(priority
));
1251 * workqueue_run_nextreq is responsible for
1252 * dropping the workqueue lock in all cases
1254 PTHREAD_TRACE(TRACE_wq_thread_yielded
| DBG_FUNC_END
, wq
, wq
->wq_thread_yielded_count
, wq
->wq_reqcount
, 1, 0);
1259 PTHREAD_TRACE(TRACE_wq_thread_yielded
| DBG_FUNC_END
, wq
, wq
->wq_thread_yielded_count
, wq
->wq_reqcount
, 2, 0);
1261 workqueue_unlock(p
);
1267 workqueue_callback(int type
, thread_t thread
)
1269 struct uthread
*uth
;
1270 struct threadlist
*tl
;
1271 struct workqueue
*wq
;
1273 uth
= pthread_kern
->get_bsdthread_info(thread
);
1274 tl
= pthread_kern
->uthread_get_threadlist(uth
);
1278 case SCHED_CALL_BLOCK
: {
1279 uint32_t old_activecount
;
1280 boolean_t start_timer
= FALSE
;
1282 old_activecount
= OSAddAtomic(-1, &wq
->wq_thactive_count
[tl
->th_priority
]);
1284 if (old_activecount
== wq
->wq_reqconc
[tl
->th_priority
]) {
1286 UInt64
*lastblocked_ptr
;
1289 * the number of active threads at this priority
1290 * has fallen below the maximum number of concurrent
1291 * threads that we're allowed to run
1293 lastblocked_ptr
= (UInt64
*)&wq
->wq_lastblocked_ts
[tl
->th_priority
];
1294 curtime
= mach_absolute_time();
1297 * if we collide with another thread trying to update the last_blocked (really unlikely
1298 * since another thread would have to get scheduled and then block after we start down
1299 * this path), it's not a problem. Either timestamp is adequate, so no need to retry
1302 OSCompareAndSwap64(*lastblocked_ptr
, (UInt64
)curtime
, lastblocked_ptr
);
1304 if (wq
->wq_reqcount
) {
1306 * we have work to do so start up the timer
1307 * if it's not running... we'll let it sort
1308 * out whether we really need to start up
1311 WQ_TIMER_NEEDED(wq
, start_timer
);
1314 if (start_timer
== TRUE
) {
1315 workqueue_interval_timer_start(wq
);
1318 PTHREAD_TRACE1(TRACE_wq_thread_block
| DBG_FUNC_START
, wq
, old_activecount
, tl
->th_priority
, start_timer
, thread_tid(thread
));
1321 case SCHED_CALL_UNBLOCK
:
1323 * we cannot take the workqueue_lock here...
1324 * an UNBLOCK can occur from a timer event which
1325 * is run from an interrupt context... if the workqueue_lock
1326 * is already held by this processor, we'll deadlock...
1327 * the thread lock for the thread being UNBLOCKED
1330 OSAddAtomic(1, &wq
->wq_thactive_count
[tl
->th_priority
]);
1332 PTHREAD_TRACE1(TRACE_wq_thread_block
| DBG_FUNC_END
, wq
, wq
->wq_threads_scheduled
, tl
->th_priority
, 0, thread_tid(thread
));
1339 _workqueue_get_sched_callback(void)
1341 return workqueue_callback
;
1345 workqueue_removethread(struct threadlist
*tl
, int fromexit
)
1347 struct workqueue
*wq
;
1348 struct uthread
* uth
;
1351 * If fromexit is set, the call is from workqueue_exit(,
1352 * so some cleanups are to be avoided.
1356 TAILQ_REMOVE(&wq
->wq_thidlelist
, tl
, th_entry
);
1358 if (fromexit
== 0) {
1360 wq
->wq_thidlecount
--;
1364 * Clear the threadlist pointer in uthread so
1365 * blocked thread on wakeup for termination will
1366 * not access the thread list as it is going to be
1369 pthread_kern
->thread_sched_call(tl
->th_thread
, NULL
);
1371 uth
= pthread_kern
->get_bsdthread_info(tl
->th_thread
);
1372 if (uth
!= (struct uthread
*)0) {
1373 pthread_kern
->uthread_set_threadlist(uth
, NULL
);
1375 if (fromexit
== 0) {
1376 /* during exit the lock is not held */
1377 workqueue_unlock(wq
->wq_proc
);
1380 if ( (tl
->th_flags
& TH_LIST_SUSPENDED
) ) {
1382 * thread was created, but never used...
1383 * need to clean up the stack and port ourselves
1384 * since we're not going to spin up through the
1385 * normal exit path triggered from Libc
1387 if (fromexit
== 0) {
1388 /* vm map is already deallocated when this is called from exit */
1389 (void)mach_vm_deallocate(wq
->wq_map
, tl
->th_stackaddr
, tl
->th_allocsize
);
1391 (void)pthread_kern
->mach_port_deallocate(pthread_kern
->task_get_ipcspace(wq
->wq_task
), tl
->th_thport
);
1393 PTHREAD_TRACE1(TRACE_wq_thread_suspend
| DBG_FUNC_END
, wq
, (uintptr_t)thread_tid(current_thread()), wq
->wq_nthreads
, 0xdead, thread_tid(tl
->th_thread
));
1396 PTHREAD_TRACE1(TRACE_wq_thread_park
| DBG_FUNC_END
, wq
, (uintptr_t)thread_tid(current_thread()), wq
->wq_nthreads
, 0xdead, thread_tid(tl
->th_thread
));
1399 * drop our ref on the thread
1401 thread_deallocate(tl
->th_thread
);
1403 kfree(tl
, sizeof(struct threadlist
));
1408 * called with workq lock held
1409 * dropped and retaken around thread creation
1410 * return with workq lock held
1413 workqueue_addnewthread(struct workqueue
*wq
, boolean_t oc_thread
)
1415 struct threadlist
*tl
;
1416 struct uthread
*uth
;
1421 mach_vm_offset_t stackaddr
;
1422 mach_vm_size_t guardsize
;
1424 if ((wq
->wq_flags
& WQ_EXITING
) == WQ_EXITING
)
1427 if (wq
->wq_nthreads
>= wq_max_threads
|| wq
->wq_nthreads
>= (pthread_kern
->config_thread_max
- 20)) {
1428 wq
->wq_lflags
|= WQL_EXCEEDED_TOTAL_THREAD_LIMIT
;
1431 wq
->wq_lflags
&= ~WQL_EXCEEDED_TOTAL_THREAD_LIMIT
;
1433 if (oc_thread
== FALSE
&& wq
->wq_constrained_threads_scheduled
>= wq_max_constrained_threads
) {
1435 * if we're not creating this thread to service an overcommit request,
1436 * then check the size of the constrained thread pool... if we've already
1437 * reached our max for threads scheduled from this pool, don't create a new
1438 * one... the callers of this function are prepared for failure.
1440 wq
->wq_lflags
|= WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT
;
1443 if (wq
->wq_constrained_threads_scheduled
< wq_max_constrained_threads
)
1444 wq
->wq_lflags
&= ~WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT
;
1449 workqueue_unlock(p
);
1451 kret
= pthread_kern
->thread_create_workq(wq
->wq_task
, (thread_continue_t
)wq_unsuspend_continue
, &th
);
1452 if (kret
!= KERN_SUCCESS
) {
1456 tl
= kalloc(sizeof(struct threadlist
));
1457 bzero(tl
, sizeof(struct threadlist
));
1459 #if defined(__i386__) || defined(__x86_64__)
1460 stackaddr
= 0xB0000000;
1462 #error Need to define a stack address hint for this architecture
1465 guardsize
= vm_map_page_size(wq
->wq_map
);
1466 tl
->th_allocsize
= PTH_DEFAULT_STACKSIZE
+ guardsize
+ pthread_kern
->proc_get_pthsize(p
);
1468 kret
= mach_vm_map(wq
->wq_map
, &stackaddr
,
1471 VM_MAKE_TAG(VM_MEMORY_STACK
)| VM_FLAGS_ANYWHERE
, NULL
,
1472 0, FALSE
, VM_PROT_DEFAULT
, VM_PROT_ALL
,
1473 VM_INHERIT_DEFAULT
);
1475 if (kret
!= KERN_SUCCESS
) {
1476 kret
= mach_vm_allocate(wq
->wq_map
,
1477 &stackaddr
, tl
->th_allocsize
,
1478 VM_MAKE_TAG(VM_MEMORY_STACK
) | VM_FLAGS_ANYWHERE
);
1480 if (kret
== KERN_SUCCESS
) {
1482 * The guard page is at the lowest address
1483 * The stack base is the highest address
1485 kret
= mach_vm_protect(wq
->wq_map
, stackaddr
, guardsize
, FALSE
, VM_PROT_NONE
);
1487 if (kret
!= KERN_SUCCESS
)
1488 (void) mach_vm_deallocate(wq
->wq_map
, stackaddr
, tl
->th_allocsize
);
1490 if (kret
!= KERN_SUCCESS
) {
1491 (void) thread_terminate(th
);
1492 thread_deallocate(th
);
1494 kfree(tl
, sizeof(struct threadlist
));
1497 thread_reference(th
);
1499 sright
= (void *)pthread_kern
->convert_thread_to_port(th
);
1500 tl
->th_thport
= pthread_kern
->ipc_port_copyout_send(sright
, pthread_kern
->task_get_ipcspace(wq
->wq_task
));
1502 pthread_kern
->thread_static_param(th
, TRUE
);
1504 tl
->th_flags
= TH_LIST_INITED
| TH_LIST_SUSPENDED
;
1508 tl
->th_stackaddr
= stackaddr
;
1509 tl
->th_priority
= WORKQUEUE_NUM_BUCKETS
;
1512 uth
= pthread_kern
->get_bsdthread_info(tl
->th_thread
);
1514 workqueue_lock_spin(p
);
1516 pthread_kern
->uthread_set_threadlist(uth
, tl
);
1517 TAILQ_INSERT_TAIL(&wq
->wq_thidlelist
, tl
, th_entry
);
1519 wq
->wq_thidlecount
++;
1521 PTHREAD_TRACE1(TRACE_wq_thread_suspend
| DBG_FUNC_START
, wq
, wq
->wq_nthreads
, 0, thread_tid(current_thread()), thread_tid(tl
->th_thread
));
1526 workqueue_lock_spin(p
);
1534 _workq_open(struct proc
*p
, __unused
int32_t *retval
)
1536 struct workqueue
* wq
;
1542 boolean_t need_wakeup
= FALSE
;
1544 if (pthread_kern
->proc_get_register(p
) == 0) {
1548 num_cpus
= pthread_kern
->ml_get_max_cpus();
1550 if (wq_init_constrained_limit
) {
1553 * set up the limit for the constrained pool
1554 * this is a virtual pool in that we don't
1555 * maintain it on a separate idle and run list
1557 limit
= num_cpus
* WORKQUEUE_CONSTRAINED_FACTOR
;
1559 if (limit
> wq_max_constrained_threads
)
1560 wq_max_constrained_threads
= limit
;
1562 wq_init_constrained_limit
= 0;
1564 workqueue_lock_spin(p
);
1566 if (pthread_kern
->proc_get_wqptr(p
) == NULL
) {
1568 while (*pthread_kern
->proc_get_wqinitingptr(p
) == TRUE
) {
1570 assert_wait((caddr_t
)pthread_kern
->proc_get_wqinitingptr(p
), THREAD_UNINT
);
1571 workqueue_unlock(p
);
1573 thread_block(THREAD_CONTINUE_NULL
);
1575 workqueue_lock_spin(p
);
1577 if (pthread_kern
->proc_get_wqptr(p
) != NULL
) {
1581 *(pthread_kern
->proc_get_wqinitingptr(p
)) = TRUE
;
1583 workqueue_unlock(p
);
1585 wq_size
= sizeof(struct workqueue
);
1587 ptr
= (char *)kalloc(wq_size
);
1588 bzero(ptr
, wq_size
);
1590 wq
= (struct workqueue
*)ptr
;
1591 wq
->wq_flags
= WQ_LIST_INITED
;
1593 wq
->wq_max_concurrency
= num_cpus
;
1594 wq
->wq_task
= current_task();
1595 wq
->wq_map
= pthread_kern
->current_map();
1597 for (i
= 0; i
< WORKQUEUE_NUM_BUCKETS
; i
++)
1598 wq
->wq_reqconc
[i
] = (uint16_t)wq
->wq_max_concurrency
;
1600 TAILQ_INIT(&wq
->wq_thrunlist
);
1601 TAILQ_INIT(&wq
->wq_thidlelist
);
1603 wq
->wq_atimer_call
= thread_call_allocate((thread_call_func_t
)workqueue_add_timer
, (thread_call_param_t
)wq
);
1605 workqueue_lock_spin(p
);
1607 pthread_kern
->proc_set_wqptr(p
, wq
);
1608 pthread_kern
->proc_set_wqsize(p
, wq_size
);
1610 *(pthread_kern
->proc_get_wqinitingptr(p
)) = FALSE
;
1614 workqueue_unlock(p
);
1616 if (need_wakeup
== TRUE
) {
1617 wakeup(pthread_kern
->proc_get_wqinitingptr(p
));
1624 _workq_kernreturn(struct proc
*p
,
1626 __unused user_addr_t item
,
1629 __unused
int32_t *retval
)
1631 struct workqueue
*wq
;
1634 if (pthread_kern
->proc_get_register(p
) == 0) {
1639 case WQOPS_QUEUE_NEWSPISUPP
: {
1641 * arg2 = offset of serialno into dispatch queue
1645 pthread_kern
->proc_set_dispatchqueue_serialno_offset(p
, (uint64_t)offset
);
1648 case WQOPS_QUEUE_REQTHREADS
: {
1650 * arg2 = number of threads to start
1653 boolean_t overcommit
= FALSE
;
1654 int reqcount
= arg2
;
1655 pthread_priority_t priority
= arg3
;
1658 overcommit
= (_pthread_priority_get_flags(priority
) & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG
) != 0;
1659 class = pthread_priority_get_class_index(priority
);
1661 if ((reqcount
<= 0) || (class < 0) || (class >= WORKQUEUE_NUM_BUCKETS
)) {
1666 workqueue_lock_spin(p
);
1668 if ((wq
= (struct workqueue
*)pthread_kern
->proc_get_wqptr(p
)) == NULL
) {
1669 workqueue_unlock(p
);
1676 wq
->wq_reqcount
+= reqcount
;
1677 wq
->wq_requests
[class] += reqcount
;
1679 PTHREAD_TRACE(TRACE_wq_req_threads
| DBG_FUNC_NONE
, wq
, priority
, wq
->wq_requests
[class], reqcount
, 0);
1681 while (wq
->wq_reqcount
) {
1682 if (!workqueue_run_one(p
, wq
, overcommit
, priority
))
1686 PTHREAD_TRACE(TRACE_wq_req_octhreads
| DBG_FUNC_NONE
, wq
, priority
, wq
->wq_requests
[class], reqcount
, 0);
1689 if (!workqueue_run_one(p
, wq
, overcommit
, priority
))
1695 * we need to delay starting some of the overcommit requests...
1696 * we should only fail to create the overcommit threads if
1697 * we're at the max thread limit... as existing threads
1698 * return to the kernel, we'll notice the ocrequests
1699 * and spin them back to user space as the overcommit variety
1701 wq
->wq_reqcount
+= reqcount
;
1702 wq
->wq_requests
[class] += reqcount
;
1703 wq
->wq_ocrequests
[class] += reqcount
;
1705 PTHREAD_TRACE(TRACE_wq_delay_octhreads
| DBG_FUNC_NONE
, wq
, priority
, wq
->wq_requests
[class], reqcount
, 0);
1708 workqueue_unlock(p
);
1712 case WQOPS_THREAD_RETURN
: {
1713 thread_t th
= current_thread();
1714 struct uthread
*uth
= pthread_kern
->get_bsdthread_info(th
);
1715 struct threadlist
*tl
= util_get_thread_threadlist_entry(th
);
1717 /* reset signal mask on the workqueue thread to default state */
1718 if (pthread_kern
->uthread_get_sigmask(uth
) != (sigset_t
)(~workq_threadmask
)) {
1719 pthread_kern
->proc_lock(p
);
1720 pthread_kern
->uthread_set_sigmask(uth
, ~workq_threadmask
);
1721 pthread_kern
->proc_unlock(p
);
1724 /* dropping WQ override counts has to be done outside the wq lock. */
1725 wq_thread_override_reset(th
, THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD
);
1727 workqueue_lock_spin(p
);
1729 if ((wq
= (struct workqueue
*)pthread_kern
->proc_get_wqptr(p
)) == NULL
|| !tl
) {
1730 workqueue_unlock(p
);
1735 PTHREAD_TRACE(TRACE_wq_runitem
| DBG_FUNC_END
, wq
, 0, 0, 0, 0);
1738 (void)workqueue_run_nextreq(p
, wq
, th
, FALSE
, FALSE
, 0);
1740 * workqueue_run_nextreq is responsible for
1741 * dropping the workqueue lock in all cases
1754 * Routine: workqueue_mark_exiting
1756 * Function: Mark the work queue such that new threads will not be added to the
1757 * work queue after we return.
1759 * Conditions: Called against the current process.
1762 _workqueue_mark_exiting(struct proc
*p
)
1764 struct workqueue
*wq
= pthread_kern
->proc_get_wqptr(p
);
1768 PTHREAD_TRACE(TRACE_wq_pthread_exit
|DBG_FUNC_START
, wq
, 0, 0, 0, 0);
1770 workqueue_lock_spin(p
);
1773 * we now arm the timer in the callback function w/o holding the workq lock...
1774 * we do this by setting WQ_ATIMER_RUNNING via OSCompareAndSwap in order to
1775 * insure only a single timer if running and to notice that WQ_EXITING has
1776 * been set (we don't want to start a timer once WQ_EXITING is posted)
1778 * so once we have successfully set WQ_EXITING, we cannot fire up a new timer...
1779 * therefor no need to clear the timer state atomically from the flags
1781 * since we always hold the workq lock when dropping WQ_ATIMER_RUNNING
1782 * the check for and sleep until clear is protected
1784 while (!(OSCompareAndSwap(wq
->wq_flags
, (wq
->wq_flags
| WQ_EXITING
), (UInt32
*)&wq
->wq_flags
)));
1786 if (wq
->wq_flags
& WQ_ATIMER_RUNNING
) {
1787 if (thread_call_cancel(wq
->wq_atimer_call
) == TRUE
) {
1788 wq
->wq_flags
&= ~WQ_ATIMER_RUNNING
;
1791 while ((wq
->wq_flags
& WQ_ATIMER_RUNNING
) || (wq
->wq_lflags
& WQL_ATIMER_BUSY
)) {
1792 assert_wait((caddr_t
)wq
, (THREAD_UNINT
));
1793 workqueue_unlock(p
);
1795 thread_block(THREAD_CONTINUE_NULL
);
1797 workqueue_lock_spin(p
);
1799 workqueue_unlock(p
);
1801 PTHREAD_TRACE(TRACE_wq_pthread_exit
|DBG_FUNC_END
, 0, 0, 0, 0, 0);
1806 * Routine: workqueue_exit
1808 * Function: clean up the work queue structure(s) now that there are no threads
1809 * left running inside the work queue (except possibly current_thread).
1811 * Conditions: Called by the last thread in the process.
1812 * Called against current process.
1815 _workqueue_exit(struct proc
*p
)
1817 struct workqueue
* wq
;
1818 struct threadlist
* tl
, *tlist
;
1819 struct uthread
*uth
;
1822 wq
= pthread_kern
->proc_get_wqptr(p
);
1825 PTHREAD_TRACE(TRACE_wq_workqueue_exit
|DBG_FUNC_START
, wq
, 0, 0, 0, 0);
1827 wq_size
= pthread_kern
->proc_get_wqsize(p
);
1828 pthread_kern
->proc_set_wqptr(p
, NULL
);
1829 pthread_kern
->proc_set_wqsize(p
, 0);
1832 * Clean up workqueue data structures for threads that exited and
1833 * didn't get a chance to clean up after themselves.
1835 TAILQ_FOREACH_SAFE(tl
, &wq
->wq_thrunlist
, th_entry
, tlist
) {
1836 pthread_kern
->thread_sched_call(tl
->th_thread
, NULL
);
1838 uth
= pthread_kern
->get_bsdthread_info(tl
->th_thread
);
1839 if (uth
!= (struct uthread
*)0) {
1840 pthread_kern
->uthread_set_threadlist(uth
, NULL
);
1842 TAILQ_REMOVE(&wq
->wq_thrunlist
, tl
, th_entry
);
1845 * drop our last ref on the thread
1847 thread_deallocate(tl
->th_thread
);
1849 kfree(tl
, sizeof(struct threadlist
));
1851 TAILQ_FOREACH_SAFE(tl
, &wq
->wq_thidlelist
, th_entry
, tlist
) {
1852 workqueue_removethread(tl
, 1);
1854 thread_call_free(wq
->wq_atimer_call
);
1858 PTHREAD_TRACE(TRACE_wq_workqueue_exit
|DBG_FUNC_END
, 0, 0, 0, 0, 0);
1864 workqueue_run_one(proc_t p
, struct workqueue
*wq
, boolean_t overcommit
, pthread_priority_t priority
)
1868 if (wq
->wq_thidlecount
== 0) {
1869 if (overcommit
== FALSE
) {
1870 if (wq
->wq_constrained_threads_scheduled
< wq
->wq_max_concurrency
)
1871 workqueue_addnewthread(wq
, overcommit
);
1873 workqueue_addnewthread(wq
, overcommit
);
1875 if (wq
->wq_thidlecount
== 0)
1879 ran_one
= workqueue_run_nextreq(p
, wq
, THREAD_NULL
, FALSE
, overcommit
, priority
);
1881 * workqueue_run_nextreq is responsible for
1882 * dropping the workqueue lock in all cases
1884 workqueue_lock_spin(p
);
1892 * workqueue_run_nextreq:
1893 * called with the workqueue lock held...
1894 * responsible for dropping it in all cases
1897 workqueue_run_nextreq(proc_t p
, struct workqueue
*wq
, thread_t thread
,
1898 boolean_t force_oc
, boolean_t overcommit
, pthread_priority_t oc_prio
)
1900 thread_t th_to_run
= THREAD_NULL
;
1901 thread_t th_to_park
= THREAD_NULL
;
1902 int wake_thread
= 0;
1903 int reuse_thread
= WQ_FLAG_THREAD_REUSE
;
1904 uint32_t priclass
, orig_class
;
1905 uint32_t us_to_wait
;
1906 struct threadlist
*tl
= NULL
;
1907 struct uthread
*uth
= NULL
;
1908 boolean_t start_timer
= FALSE
;
1909 boolean_t adjust_counters
= TRUE
;
1911 uint32_t thactive_count
;
1914 PTHREAD_TRACE(TRACE_wq_run_nextitem
|DBG_FUNC_START
, wq
, thread
, wq
->wq_thidlecount
, wq
->wq_reqcount
, 0);
1916 if (thread
!= THREAD_NULL
) {
1917 uth
= pthread_kern
->get_bsdthread_info(thread
);
1919 if ((tl
= pthread_kern
->uthread_get_threadlist(uth
)) == NULL
) {
1920 panic("wq thread with no threadlist");
1925 * from here until we drop the workq lock
1926 * we can't be pre-empted since we hold
1927 * the lock in spin mode... this is important
1928 * since we have to independently update the priority that
1929 * the thread is associated with and the priorty based
1930 * counters that "workqueue_callback" also changes and bases
1933 dispatch_overcommit
:
1935 if (overcommit
|| force_oc
) {
1936 priclass
= pthread_priority_get_class_index(oc_prio
);
1938 if (thread
!= THREAD_NULL
) {
1942 goto grab_idle_thread
;
1944 if (wq
->wq_reqcount
) {
1945 for (priclass
= 0; priclass
< WORKQUEUE_NUM_BUCKETS
; priclass
++) {
1946 if (wq
->wq_requests
[priclass
])
1949 assert(priclass
< WORKQUEUE_NUM_BUCKETS
);
1951 if (wq
->wq_ocrequests
[priclass
] && (thread
!= THREAD_NULL
|| wq
->wq_thidlecount
)) {
1953 * handle delayed overcommit request...
1954 * they have priority over normal requests
1955 * within a given priority level
1958 wq
->wq_requests
[priclass
]--;
1959 wq
->wq_ocrequests
[priclass
]--;
1961 oc_prio
= pthread_priority_from_class_index(priclass
);
1964 goto dispatch_overcommit
;
1968 * if we get here, the work should be handled by a constrained thread
1970 if (wq
->wq_reqcount
== 0 || wq
->wq_constrained_threads_scheduled
>= wq_max_constrained_threads
) {
1972 * no work to do, or we're already at or over the scheduling limit for
1973 * constrained threads... just return or park the thread...
1974 * do not start the timer for this condition... if we don't have any work,
1975 * we'll check again when new work arrives... if we're over the limit, we need 1 or more
1976 * constrained threads to return to the kernel before we can dispatch additional work
1978 if ((th_to_park
= thread
) == THREAD_NULL
)
1986 curtime
= mach_absolute_time();
1988 thactive_count
+= wq
->wq_thactive_count
[priclass
];
1990 if (wq
->wq_thscheduled_count
[priclass
]) {
1991 if (wq_thread_is_busy(curtime
, &wq
->wq_lastblocked_ts
[priclass
])) {
1996 if (thread
!= THREAD_NULL
) {
1997 if (tl
->th_priority
== priclass
) {
1999 * dont't count this thread as currently active
2004 if (thactive_count
+ busycount
>= wq
->wq_max_concurrency
) {
2007 * we found at least 1 thread in the
2008 * 'busy' state... make sure we start
2009 * the timer because if they are the only
2010 * threads keeping us from scheduling
2011 * this work request, we won't get a callback
2012 * to kick off the timer... we need to
2015 WQ_TIMER_NEEDED(wq
, start_timer
);
2018 PTHREAD_TRACE(TRACE_wq_overcommitted
|DBG_FUNC_NONE
, wq
, (start_timer
? 1<<7 : 0) | pthread_priority_from_class_index(priclass
), thactive_count
, busycount
, 0);
2020 if ((th_to_park
= thread
) == THREAD_NULL
) {
2027 if (thread
!= THREAD_NULL
) {
2029 * thread is non-NULL here when we return from userspace
2030 * in workq_kernreturn, rather than trying to find a thread
2031 * we pick up new work for this specific thread.
2038 if (wq
->wq_thidlecount
== 0) {
2040 * we have no additional threads waiting to pick up
2041 * work, however, there is additional work to do.
2043 WQ_TIMER_NEEDED(wq
, start_timer
);
2045 PTHREAD_TRACE(TRACE_wq_stalled
, wq
, wq
->wq_nthreads
, start_timer
, 0, 0);
2047 goto no_thread_to_run
;
2051 * we already know there is both work available
2052 * and an idle thread, so activate a thread and then
2053 * fall into the code that pulls a new work request...
2055 tl
= TAILQ_FIRST(&wq
->wq_thidlelist
);
2056 TAILQ_REMOVE(&wq
->wq_thidlelist
, tl
, th_entry
);
2057 wq
->wq_thidlecount
--;
2059 TAILQ_INSERT_TAIL(&wq
->wq_thrunlist
, tl
, th_entry
);
2061 if ((tl
->th_flags
& TH_LIST_SUSPENDED
) == TH_LIST_SUSPENDED
) {
2062 tl
->th_flags
&= ~TH_LIST_SUSPENDED
;
2065 } else if ((tl
->th_flags
& TH_LIST_BLOCKED
) == TH_LIST_BLOCKED
) {
2066 tl
->th_flags
&= ~TH_LIST_BLOCKED
;
2069 tl
->th_flags
|= TH_LIST_RUNNING
| TH_LIST_BUSY
;
2071 wq
->wq_threads_scheduled
++;
2072 wq
->wq_thscheduled_count
[priclass
]++;
2073 OSAddAtomic(1, &wq
->wq_thactive_count
[priclass
]);
2075 adjust_counters
= FALSE
;
2076 th_to_run
= tl
->th_thread
;
2079 if (!overcommit
&& !force_oc
) {
2081 wq
->wq_requests
[priclass
]--;
2083 if ( !(tl
->th_flags
& TH_LIST_CONSTRAINED
)) {
2084 wq
->wq_constrained_threads_scheduled
++;
2085 tl
->th_flags
|= TH_LIST_CONSTRAINED
;
2088 if (tl
->th_flags
& TH_LIST_CONSTRAINED
) {
2089 wq
->wq_constrained_threads_scheduled
--;
2090 tl
->th_flags
&= ~TH_LIST_CONSTRAINED
;
2094 orig_class
= tl
->th_priority
;
2095 tl
->th_priority
= (uint8_t)priclass
;
2097 if (adjust_counters
&& (orig_class
!= priclass
)) {
2099 * we need to adjust these counters based on this
2100 * thread's new disposition w/r to priority
2102 OSAddAtomic(-1, &wq
->wq_thactive_count
[orig_class
]);
2103 OSAddAtomic(1, &wq
->wq_thactive_count
[priclass
]);
2105 wq
->wq_thscheduled_count
[orig_class
]--;
2106 wq
->wq_thscheduled_count
[priclass
]++;
2108 wq
->wq_thread_yielded_count
= 0;
2110 workqueue_unlock(p
);
2112 if (orig_class
!= priclass
) {
2113 pthread_priority_t pri
= pthread_priority_from_class_index(priclass
);
2115 thread_qos_policy_data_t qosinfo
;
2117 /* Set the QoS tier on the thread, along with the ceiling of max importance for this class. */
2118 qosinfo
.qos_tier
= pthread_priority_get_qos_class(pri
);
2119 qosinfo
.tier_importance
= 0;
2121 PTHREAD_TRACE(TRACE_wq_reset_priority
| DBG_FUNC_START
, wq
, thread_tid(tl
->th_thread
), pthread_priority_from_class_index(orig_class
), 0, 0);
2123 /* All the previous implementation here now boils down to setting the QoS policy on the thread. */
2124 pthread_kern
->thread_policy_set_internal(th_to_run
, THREAD_QOS_POLICY
, (thread_policy_t
)&qosinfo
, THREAD_QOS_POLICY_COUNT
);
2126 PTHREAD_TRACE(TRACE_wq_reset_priority
| DBG_FUNC_END
, wq
, thread_tid(tl
->th_thread
), pthread_priority_from_class_index(priclass
), qosinfo
.qos_tier
, 0);
2130 * if current thread is reused for work request, does not return via unix_syscall
2132 wq_runreq(p
, overcommit
, pthread_priority_from_class_index(priclass
), th_to_run
, tl
, reuse_thread
, wake_thread
, (thread
== th_to_run
));
2134 PTHREAD_TRACE(TRACE_wq_run_nextitem
|DBG_FUNC_END
, wq
, thread_tid(th_to_run
), overcommit
, 1, 0);
2140 * we have no work to do or we are fully booked
2141 * w/r to running threads...
2144 workqueue_unlock(p
);
2147 workqueue_interval_timer_start(wq
);
2149 PTHREAD_TRACE(TRACE_wq_run_nextitem
|DBG_FUNC_END
, wq
, thread_tid(thread
), start_timer
, 2, 0);
2155 * this is a workqueue thread with no more
2156 * work to do... park it for now
2158 TAILQ_REMOVE(&wq
->wq_thrunlist
, tl
, th_entry
);
2159 tl
->th_flags
&= ~TH_LIST_RUNNING
;
2161 tl
->th_flags
|= TH_LIST_BLOCKED
;
2162 TAILQ_INSERT_HEAD(&wq
->wq_thidlelist
, tl
, th_entry
);
2164 pthread_kern
->thread_sched_call(th_to_park
, NULL
);
2166 OSAddAtomic(-1, &wq
->wq_thactive_count
[tl
->th_priority
]);
2167 wq
->wq_thscheduled_count
[tl
->th_priority
]--;
2168 wq
->wq_threads_scheduled
--;
2170 if (tl
->th_flags
& TH_LIST_CONSTRAINED
) {
2171 wq
->wq_constrained_threads_scheduled
--;
2172 wq
->wq_lflags
&= ~WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT
;
2173 tl
->th_flags
&= ~TH_LIST_CONSTRAINED
;
2175 if (wq
->wq_thidlecount
< 100)
2176 us_to_wait
= wq_reduce_pool_window_usecs
- (wq
->wq_thidlecount
* (wq_reduce_pool_window_usecs
/ 100));
2178 us_to_wait
= wq_reduce_pool_window_usecs
/ 100;
2180 wq
->wq_thidlecount
++;
2181 wq
->wq_lflags
&= ~WQL_EXCEEDED_TOTAL_THREAD_LIMIT
;
2183 assert_wait_timeout_with_leeway((caddr_t
)tl
, (THREAD_INTERRUPTIBLE
),
2184 TIMEOUT_URGENCY_SYS_BACKGROUND
|TIMEOUT_URGENCY_LEEWAY
, us_to_wait
,
2185 wq_reduce_pool_window_usecs
, NSEC_PER_USEC
);
2187 workqueue_unlock(p
);
2190 workqueue_interval_timer_start(wq
);
2192 PTHREAD_TRACE1(TRACE_wq_thread_park
| DBG_FUNC_START
, wq
, wq
->wq_threads_scheduled
, wq
->wq_thidlecount
, us_to_wait
, thread_tid(th_to_park
));
2193 PTHREAD_TRACE(TRACE_wq_run_nextitem
| DBG_FUNC_END
, wq
, thread_tid(thread
), 0, 3, 0);
2195 thread_block((thread_continue_t
)wq_unpark_continue
);
2203 wq_unsuspend_continue(void)
2205 struct uthread
*uth
= NULL
;
2206 thread_t th_to_unsuspend
;
2207 struct threadlist
*tl
;
2210 th_to_unsuspend
= current_thread();
2211 uth
= pthread_kern
->get_bsdthread_info(th_to_unsuspend
);
2213 if (uth
!= NULL
&& (tl
= pthread_kern
->uthread_get_threadlist(uth
)) != NULL
) {
2215 if ((tl
->th_flags
& (TH_LIST_RUNNING
| TH_LIST_BUSY
)) == TH_LIST_RUNNING
) {
2217 * most likely a normal resume of this thread occurred...
2218 * it's also possible that the thread was aborted after we
2219 * finished setting it up so that it could be dispatched... if
2220 * so, thread_bootstrap_return will notice the abort and put
2221 * the thread on the path to self-destruction
2223 normal_resume_to_user
:
2224 pthread_kern
->thread_sched_call(th_to_unsuspend
, workqueue_callback
);
2225 pthread_kern
->thread_bootstrap_return();
2228 * if we get here, it's because we've been resumed due to
2229 * an abort of this thread (process is crashing)
2233 workqueue_lock_spin(p
);
2235 if (tl
->th_flags
& TH_LIST_SUSPENDED
) {
2237 * thread has been aborted while still on our idle
2238 * queue... remove it from our domain...
2239 * workqueue_removethread consumes the lock
2241 workqueue_removethread(tl
, 0);
2242 pthread_kern
->thread_bootstrap_return();
2244 while ((tl
->th_flags
& TH_LIST_BUSY
)) {
2246 * this thread was aborted after we started making
2247 * it runnable, but before we finished dispatching it...
2248 * we need to wait for that process to finish,
2249 * and we need to ask for a wakeup instead of a
2250 * thread_resume since the abort has already resumed us
2252 tl
->th_flags
|= TH_LIST_NEED_WAKEUP
;
2254 assert_wait((caddr_t
)tl
, (THREAD_UNINT
));
2256 workqueue_unlock(p
);
2257 thread_block(THREAD_CONTINUE_NULL
);
2258 workqueue_lock_spin(p
);
2260 workqueue_unlock(p
);
2262 * we have finished setting up the thread's context...
2263 * thread_bootstrap_return will take us through the abort path
2264 * where the thread will self destruct
2266 goto normal_resume_to_user
;
2268 pthread_kern
->thread_bootstrap_return();
2273 wq_unpark_continue(void)
2275 struct uthread
*uth
= NULL
;
2276 struct threadlist
*tl
;
2277 thread_t th_to_unpark
;
2280 th_to_unpark
= current_thread();
2281 uth
= pthread_kern
->get_bsdthread_info(th_to_unpark
);
2284 if ((tl
= pthread_kern
->uthread_get_threadlist(uth
)) != NULL
) {
2286 if ((tl
->th_flags
& (TH_LIST_RUNNING
| TH_LIST_BUSY
)) == TH_LIST_RUNNING
) {
2288 * a normal wakeup of this thread occurred... no need
2289 * for any synchronization with the timer and wq_runreq
2291 normal_return_to_user
:
2292 pthread_kern
->thread_sched_call(th_to_unpark
, workqueue_callback
);
2294 PTHREAD_TRACE(0xefffd018 | DBG_FUNC_END
, tl
->th_workq
, 0, 0, 0, 0);
2296 pthread_kern
->thread_exception_return();
2300 workqueue_lock_spin(p
);
2302 if ( !(tl
->th_flags
& TH_LIST_RUNNING
)) {
2304 * the timer popped us out and we've not
2305 * been moved off of the idle list
2306 * so we should now self-destruct
2308 * workqueue_removethread consumes the lock
2310 workqueue_removethread(tl
, 0);
2311 pthread_kern
->thread_exception_return();
2314 * the timer woke us up, but we have already
2315 * started to make this a runnable thread,
2316 * but have not yet finished that process...
2317 * so wait for the normal wakeup
2319 while ((tl
->th_flags
& TH_LIST_BUSY
)) {
2321 assert_wait((caddr_t
)tl
, (THREAD_UNINT
));
2323 workqueue_unlock(p
);
2325 thread_block(THREAD_CONTINUE_NULL
);
2327 workqueue_lock_spin(p
);
2330 * we have finished setting up the thread's context
2331 * now we can return as if we got a normal wakeup
2333 workqueue_unlock(p
);
2335 goto normal_return_to_user
;
2338 pthread_kern
->thread_exception_return();
2344 wq_runreq(proc_t p
, boolean_t overcommit
, pthread_priority_t priority
, thread_t th
, struct threadlist
*tl
,
2345 int reuse_thread
, int wake_thread
, int return_directly
)
2348 boolean_t need_resume
= FALSE
;
2350 PTHREAD_TRACE1(TRACE_wq_runitem
| DBG_FUNC_START
, tl
->th_workq
, overcommit
, priority
, thread_tid(current_thread()), thread_tid(th
));
2352 ret
= _setup_wqthread(p
, th
, overcommit
, priority
, reuse_thread
, tl
);
2355 panic("setup_wqthread failed %x\n", ret
);
2357 if (return_directly
) {
2358 PTHREAD_TRACE(TRACE_wq_run_nextitem
|DBG_FUNC_END
, tl
->th_workq
, 0, 0, 4, 0);
2360 pthread_kern
->thread_exception_return();
2361 panic("wq_runreq: thread_exception_return returned ...\n");
2364 workqueue_lock_spin(p
);
2366 tl
->th_flags
&= ~TH_LIST_BUSY
;
2369 workqueue_unlock(p
);
2371 PTHREAD_TRACE1(TRACE_wq_thread_suspend
| DBG_FUNC_END
, tl
->th_workq
, 0, 0, thread_tid(current_thread()), thread_tid(th
));
2373 workqueue_lock_spin(p
);
2375 if (tl
->th_flags
& TH_LIST_NEED_WAKEUP
) {
2381 tl
->th_flags
&= ~(TH_LIST_BUSY
| TH_LIST_NEED_WAKEUP
);
2383 workqueue_unlock(p
);
2387 * need to do this outside of the workqueue spin lock
2388 * since thread_resume locks the thread via a full mutex
2390 pthread_kern
->thread_resume(th
);
2397 _setup_wqthread(proc_t p
, thread_t th
, boolean_t overcommit
, pthread_priority_t priority
, int reuse_thread
, struct threadlist
*tl
)
2399 uint32_t flags
= reuse_thread
| WQ_FLAG_THREAD_NEWSPI
;
2400 mach_vm_size_t guardsize
= vm_map_page_size(tl
->th_workq
->wq_map
);
2404 flags
|= WQ_FLAG_THREAD_OVERCOMMIT
;
2407 /* Put the QoS class value into the lower bits of the reuse_thread register, this is where
2408 * the thread priority used to be stored anyway.
2410 flags
|= (_pthread_priority_get_qos_newest(priority
) & WQ_FLAG_THREAD_PRIOMASK
);
2412 #if defined(__i386__) || defined(__x86_64__)
2413 int isLP64
= proc_is64bit(p
);
2416 * Set up i386 registers & function call.
2419 x86_thread_state32_t state
;
2420 x86_thread_state32_t
*ts
= &state
;
2422 ts
->eip
= (unsigned int)pthread_kern
->proc_get_wqthread(p
);
2423 ts
->eax
= (unsigned int)(tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ guardsize
);
2424 ts
->ebx
= (unsigned int)tl
->th_thport
;
2425 ts
->ecx
= (unsigned int)(tl
->th_stackaddr
+ guardsize
);
2426 ts
->edx
= (unsigned int)0;
2427 ts
->edi
= (unsigned int)flags
;
2428 ts
->esi
= (unsigned int)0;
2432 ts
->esp
= (int)((vm_offset_t
)((tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ guardsize
) - C_32_STK_ALIGN
));
2434 (void)pthread_kern
->thread_set_wq_state32(th
, (thread_state_t
)ts
);
2437 x86_thread_state64_t state64
;
2438 x86_thread_state64_t
*ts64
= &state64
;
2440 ts64
->rip
= (uint64_t)pthread_kern
->proc_get_wqthread(p
);
2441 ts64
->rdi
= (uint64_t)(tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ guardsize
);
2442 ts64
->rsi
= (uint64_t)(tl
->th_thport
);
2443 ts64
->rdx
= (uint64_t)(tl
->th_stackaddr
+ guardsize
);
2444 ts64
->rcx
= (uint64_t)0;
2445 ts64
->r8
= (uint64_t)flags
;
2446 ts64
->r9
= (uint64_t)0;
2449 * set stack pointer aligned to 16 byte boundary
2451 ts64
->rsp
= (uint64_t)((tl
->th_stackaddr
+ PTH_DEFAULT_STACKSIZE
+ guardsize
) - C_64_REDZONE_LEN
);
2453 error
= pthread_kern
->thread_set_wq_state64(th
, (thread_state_t
)ts64
);
2454 if (error
!= KERN_SUCCESS
) {
2459 #error setup_wqthread not defined for this architecture
2466 _fill_procworkqueue(proc_t p
, struct proc_workqueueinfo
* pwqinfo
)
2468 struct workqueue
* wq
;
2473 workqueue_lock_spin(p
);
2474 if ((wq
= pthread_kern
->proc_get_wqptr(p
)) == NULL
) {
2480 for (pri
= 0; pri
< WORKQUEUE_NUM_BUCKETS
; pri
++) {
2481 activecount
+= wq
->wq_thactive_count
[pri
];
2483 pwqinfo
->pwq_nthreads
= wq
->wq_nthreads
;
2484 pwqinfo
->pwq_runthreads
= activecount
;
2485 pwqinfo
->pwq_blockedthreads
= wq
->wq_threads_scheduled
- activecount
;
2486 pwqinfo
->pwq_state
= 0;
2488 if (wq
->wq_lflags
& WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT
) {
2489 pwqinfo
->pwq_state
|= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT
;
2492 if (wq
->wq_lflags
& WQL_EXCEEDED_TOTAL_THREAD_LIMIT
) {
2493 pwqinfo
->pwq_state
|= WQ_EXCEEDED_TOTAL_THREAD_LIMIT
;
2497 workqueue_unlock(p
);
2502 _thread_selfid(__unused
struct proc
*p
, uint64_t *retval
)
2504 thread_t thread
= current_thread();
2505 *retval
= thread_tid(thread
);
2506 return KERN_SUCCESS
;
2512 pthread_lck_grp_attr
= lck_grp_attr_alloc_init();
2513 pthread_lck_grp
= lck_grp_alloc_init("pthread", pthread_lck_grp_attr
);
2516 * allocate the lock attribute for pthread synchronizers
2518 pthread_lck_attr
= lck_attr_alloc_init();
2520 _workqueue_init_lock((proc_t
)get_bsdtask_info(kernel_task
));
2521 pthread_list_mlock
= lck_mtx_alloc_init(pthread_lck_grp
, pthread_lck_attr
);
2523 pth_global_hashinit();
2524 psynch_thcall
= thread_call_allocate(psynch_wq_cleanup
, NULL
);
2530 sysctl_register_oid(&sysctl__kern_wq_yielded_threshold
);
2531 sysctl_register_oid(&sysctl__kern_wq_yielded_window_usecs
);
2532 sysctl_register_oid(&sysctl__kern_wq_stalled_window_usecs
);
2533 sysctl_register_oid(&sysctl__kern_wq_reduce_pool_window_usecs
);
2534 sysctl_register_oid(&sysctl__kern_wq_max_timer_interval_usecs
);
2535 sysctl_register_oid(&sysctl__kern_wq_max_threads
);
2536 sysctl_register_oid(&sysctl__kern_wq_max_constrained_threads
);
2537 sysctl_register_oid(&sysctl__kern_pthread_debug_tracing
);